aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@woody.linux-foundation.org>2007-04-27 12:26:46 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-04-27 12:26:46 -0400
commit15c54033964a943de7b0763efd3bd0ede7326395 (patch)
tree840b292612d1b5396d5bab5bde537a9013db3ceb
parentad5da3cf39a5b11a198929be1f2644e17ecd767e (diff)
parent912a41a4ab935ce8c4308428ec13fc7f8b1f18f4 (diff)
Merge master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
* master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6: (448 commits) [IPV4] nl_fib_lookup: Initialise res.r before fib_res_put(&res) [IPV6]: Fix thinko in ipv6_rthdr_rcv() changes. [IPV4]: Add multipath cached to feature-removal-schedule.txt [WIRELESS] cfg80211: Clarify locking comment. [WIRELESS] cfg80211: Fix locking in wiphy_new. [WEXT] net_device: Don't include wext bits if not required. [WEXT]: Misc code cleanups. [WEXT]: Reduce inline abuse. [WEXT]: Move EXPORT_SYMBOL statements where they belong. [WEXT]: Cleanup early ioctl call path. [WEXT]: Remove options. [WEXT]: Remove dead debug code. [WEXT]: Clean up how wext is called. [WEXT]: Move to net/wireless [AFS]: Eliminate cmpxchg() usage in vlocation code. [RXRPC]: Fix pointers passed to bitops. [RXRPC]: Remove bogus atomic_* overrides. [AFS]: Fix u64 printing in debug logging. [AFS]: Add "directory write" support. [AFS]: Implement the CB.InitCallBackState3 operation. ...
-rw-r--r--CREDITS14
-rw-r--r--Documentation/feature-removal-schedule.txt40
-rw-r--r--Documentation/filesystems/afs.txt214
-rw-r--r--Documentation/filesystems/proc.txt9
-rw-r--r--Documentation/keys.txt12
-rw-r--r--Documentation/networking/bonding.txt35
-rw-r--r--Documentation/networking/dccp.txt10
-rw-r--r--Documentation/networking/ip-sysctl.txt31
-rw-r--r--Documentation/networking/rxrpc.txt859
-rw-r--r--Documentation/networking/wan-router.txt1
-rw-r--r--MAINTAINERS38
-rw-r--r--arch/ia64/hp/sim/simeth.c3
-rw-r--r--arch/ia64/sn/kernel/xpnet.c18
-rw-r--r--arch/ppc/8260_io/enet.c1
-rw-r--r--arch/ppc/8260_io/fcc_enet.c1
-rw-r--r--arch/ppc/8xx_io/enet.c1
-rw-r--r--arch/ppc/8xx_io/fec.c1
-rw-r--r--arch/s390/appldata/appldata_net_sum.c4
-rw-r--r--arch/s390/lib/Makefile2
-rw-r--r--arch/s390/lib/div64.c2
-rw-r--r--arch/um/drivers/daemon_kern.c2
-rw-r--r--arch/um/drivers/mcast_kern.c2
-rw-r--r--arch/um/drivers/net_kern.c2
-rw-r--r--arch/um/drivers/pcap_kern.c2
-rw-r--r--arch/um/drivers/slip_kern.c2
-rw-r--r--arch/um/drivers/slirp_kern.c2
-rw-r--r--arch/um/os-Linux/drivers/ethertap_kern.c2
-rw-r--r--arch/um/os-Linux/drivers/tuntap_kern.c2
-rw-r--r--arch/xtensa/platform-iss/network.c2
-rw-r--r--drivers/atm/ambassador.c2
-rw-r--r--drivers/atm/atmtcp.c6
-rw-r--r--drivers/atm/eni.c4
-rw-r--r--drivers/atm/eni.h2
-rw-r--r--drivers/atm/fore200e.c20
-rw-r--r--drivers/atm/fore200e.h2
-rw-r--r--drivers/atm/he.c4
-rw-r--r--drivers/atm/idt77252.c28
-rw-r--r--drivers/atm/nicstar.c14
-rw-r--r--drivers/block/aoe/aoe.h9
-rw-r--r--drivers/block/aoe/aoecmd.c17
-rw-r--r--drivers/block/aoe/aoenet.c2
-rw-r--r--drivers/bluetooth/bfusb.c2
-rw-r--r--drivers/bluetooth/bluecard_cs.c6
-rw-r--r--drivers/bluetooth/bpa10x.c4
-rw-r--r--drivers/bluetooth/bt3c_cs.c6
-rw-r--r--drivers/bluetooth/btuart_cs.c6
-rw-r--r--drivers/bluetooth/dtl1_cs.c2
-rw-r--r--drivers/bluetooth/hci_h4.c6
-rw-r--r--drivers/char/pcmcia/synclink_cs.c2
-rw-r--r--drivers/char/random.c38
-rw-r--r--drivers/connector/connector.c4
-rw-r--r--drivers/ieee1394/eth1394.c4
-rw-r--r--drivers/ieee1394/eth1394.h2
-rw-r--r--drivers/infiniband/hw/amso1100/c2.c6
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.c17
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c2
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c2
-rw-r--r--drivers/isdn/act2000/module.c2
-rw-r--r--drivers/isdn/gigaset/usb-gigaset.c2
-rw-r--r--drivers/isdn/hardware/avm/b1dma.c3
-rw-r--r--drivers/isdn/hardware/avm/c4.c3
-rw-r--r--drivers/isdn/hisax/elsa_ser.c6
-rw-r--r--drivers/isdn/hisax/isdnl2.c3
-rw-r--r--drivers/isdn/hysdn/hycapi.c5
-rw-r--r--drivers/isdn/hysdn/hysdn_net.c2
-rw-r--r--drivers/isdn/hysdn/hysdn_sched.c5
-rw-r--r--drivers/isdn/i4l/isdn_common.c2
-rw-r--r--drivers/isdn/i4l/isdn_net.c11
-rw-r--r--drivers/isdn/i4l/isdn_ppp.c9
-rw-r--r--drivers/isdn/isdnloop/isdnloop.c3
-rw-r--r--drivers/isdn/pcbit/capi.c12
-rw-r--r--drivers/media/dvb/dvb-core/dvb_net.c16
-rw-r--r--drivers/message/fusion/mptlan.c36
-rw-r--r--drivers/net/3c501.c1
-rw-r--r--drivers/net/3c505.c3
-rw-r--r--drivers/net/3c507.c1
-rw-r--r--drivers/net/3c509.c1
-rw-r--r--drivers/net/3c515.c2
-rw-r--r--drivers/net/3c523.c3
-rw-r--r--drivers/net/3c527.c1
-rw-r--r--drivers/net/3c59x.c2
-rw-r--r--drivers/net/7990.c3
-rw-r--r--drivers/net/8139cp.c6
-rw-r--r--drivers/net/8139too.c7
-rw-r--r--drivers/net/82596.c1
-rw-r--r--drivers/net/Makefile2
-rw-r--r--drivers/net/a2065.c3
-rw-r--r--drivers/net/acenic.c1
-rw-r--r--drivers/net/amd8111e.c4
-rw-r--r--drivers/net/appletalk/cops.c4
-rw-r--r--drivers/net/appletalk/ltpc.c15
-rw-r--r--drivers/net/arcnet/arc-rawmode.c2
-rw-r--r--drivers/net/arcnet/arcnet.c17
-rw-r--r--drivers/net/arcnet/capmode.c14
-rw-r--r--drivers/net/arcnet/rfc1051.c2
-rw-r--r--drivers/net/arcnet/rfc1201.c2
-rw-r--r--drivers/net/ariadne.c1
-rw-r--r--drivers/net/arm/am79c961a.c1
-rw-r--r--drivers/net/arm/at91_ether.c1
-rw-r--r--drivers/net/arm/ep93xx_eth.c1
-rw-r--r--drivers/net/arm/ether1.c1
-rw-r--r--drivers/net/arm/ether3.c1
-rw-r--r--drivers/net/at1700.c1
-rw-r--r--drivers/net/atari_bionet.c7
-rw-r--r--drivers/net/atari_pamsnet.c6
-rw-r--r--drivers/net/atarilance.c1
-rw-r--r--drivers/net/atl1/atl1_main.c33
-rw-r--r--drivers/net/atp.c1
-rw-r--r--drivers/net/au1000_eth.c3
-rw-r--r--drivers/net/b44.c8
-rw-r--r--drivers/net/bmac.c1
-rw-r--r--drivers/net/bnx2.c37
-rw-r--r--drivers/net/bonding/bond_3ad.c8
-rw-r--r--drivers/net/bonding/bond_alb.c36
-rw-r--r--drivers/net/bonding/bond_main.c9
-rw-r--r--drivers/net/cassini.c11
-rw-r--r--drivers/net/chelsio/sge.c39
-rw-r--r--drivers/net/cris/eth_v10.c4
-rw-r--r--drivers/net/cs89x0.c2
-rw-r--r--drivers/net/cxgb3/cxgb3_offload.c2
-rw-r--r--drivers/net/cxgb3/sge.c39
-rw-r--r--drivers/net/de600.c1
-rw-r--r--drivers/net/de620.c1
-rw-r--r--drivers/net/declance.c1
-rw-r--r--drivers/net/defxx.c6
-rw-r--r--drivers/net/depca.c1
-rw-r--r--drivers/net/dgrs.c3
-rw-r--r--drivers/net/dl2k.c4
-rw-r--r--drivers/net/dm9000.c1
-rw-r--r--drivers/net/e100.c2
-rw-r--r--drivers/net/e1000/e1000_main.c59
-rw-r--r--drivers/net/eepro.c1
-rw-r--r--drivers/net/eepro100.c6
-rw-r--r--drivers/net/eexpress.c1
-rw-r--r--drivers/net/ehea/ehea_main.c39
-rw-r--r--drivers/net/epic100.c3
-rw-r--r--drivers/net/eth16i.c1
-rw-r--r--drivers/net/ewrk3.c1
-rw-r--r--drivers/net/fealnx.c1
-rw-r--r--drivers/net/fec.c1
-rw-r--r--drivers/net/fec_8xx/fec_main.c5
-rw-r--r--drivers/net/forcedeth.c30
-rw-r--r--drivers/net/fs_enet/fs_enet-main.c9
-rw-r--r--drivers/net/gianfar.c12
-rw-r--r--drivers/net/hamachi.c1
-rw-r--r--drivers/net/hamradio/bpqether.c2
-rw-r--r--drivers/net/hamradio/dmascc.c2
-rw-r--r--drivers/net/hamradio/hdlcdrv.c4
-rw-r--r--drivers/net/hamradio/yam.c4
-rw-r--r--drivers/net/hp100.c1
-rw-r--r--drivers/net/ibm_emac/ibm_emac_core.c3
-rw-r--r--drivers/net/ibmlana.c1
-rw-r--r--drivers/net/ibmveth.c1
-rw-r--r--drivers/net/ioc3-eth.c13
-rw-r--r--drivers/net/irda/ali-ircc.c9
-rw-r--r--drivers/net/irda/au1k_ir.c6
-rw-r--r--drivers/net/irda/donauboe.c8
-rw-r--r--drivers/net/irda/irda-usb.c6
-rw-r--r--drivers/net/irda/mcs7780.c38
-rw-r--r--drivers/net/irda/nsc-ircc.c15
-rw-r--r--drivers/net/irda/pxaficp_ir.c6
-rw-r--r--drivers/net/irda/sa1100_ir.c2
-rw-r--r--drivers/net/irda/smsc-ircc2.c5
-rw-r--r--drivers/net/irda/stir4200.c5
-rw-r--r--drivers/net/irda/via-ircc.c18
-rw-r--r--drivers/net/irda/vlsi_ir.c4
-rw-r--r--drivers/net/irda/w83977af_ir.c12
-rw-r--r--drivers/net/iseries_veth.c1
-rw-r--r--drivers/net/ixgb/ixgb_main.c36
-rw-r--r--drivers/net/ixp2000/ixpdev.c3
-rw-r--r--drivers/net/lance.c3
-rw-r--r--drivers/net/lasi_82596.c1
-rw-r--r--drivers/net/lib8390.c1
-rw-r--r--drivers/net/loopback.c24
-rw-r--r--drivers/net/lp486e.c1
-rw-r--r--drivers/net/mac89x0.c1
-rw-r--r--drivers/net/macb.c11
-rw-r--r--drivers/net/mace.c1
-rw-r--r--drivers/net/macmace.c4
-rw-r--r--drivers/net/meth.c11
-rw-r--r--drivers/net/mipsnet.c1
-rw-r--r--drivers/net/mv643xx_eth.c9
-rw-r--r--drivers/net/myri10ge/myri10ge.c7
-rw-r--r--drivers/net/myri_sbus.c4
-rw-r--r--drivers/net/natsemi.c1
-rw-r--r--drivers/net/netx-eth.c1
-rw-r--r--drivers/net/netxen/netxen_nic_hw.c15
-rw-r--r--drivers/net/netxen/netxen_nic_init.c1
-rw-r--r--drivers/net/netxen/netxen_nic_main.c12
-rw-r--r--drivers/net/ni5010.c1
-rw-r--r--drivers/net/ni52.c3
-rw-r--r--drivers/net/ni65.c7
-rw-r--r--drivers/net/ns83820.c5
-rw-r--r--drivers/net/pasemi_mac.c14
-rw-r--r--drivers/net/pci-skeleton.c3
-rw-r--r--drivers/net/pcmcia/3c574_cs.c1
-rw-r--r--drivers/net/pcmcia/3c589_cs.c1
-rw-r--r--drivers/net/pcmcia/axnet_cs.c3
-rw-r--r--drivers/net/pcmcia/fmvj18x_cs.c1
-rw-r--r--drivers/net/pcmcia/nmclan_cs.c4
-rw-r--r--drivers/net/pcmcia/smc91c92_cs.c1
-rw-r--r--drivers/net/pcmcia/xirc2ps_cs.c1
-rw-r--r--drivers/net/pcnet32.c1
-rw-r--r--drivers/net/plip.c2
-rw-r--r--drivers/net/ppp_generic.c6
-rw-r--r--drivers/net/ppp_synctty.c3
-rw-r--r--drivers/net/pppoe.c156
-rw-r--r--drivers/net/pppox.c2
-rwxr-xr-xdrivers/net/qla3xxx.c5
-rw-r--r--drivers/net/r8169.c3
-rw-r--r--drivers/net/rionet.c1
-rw-r--r--drivers/net/rrunner.c3
-rw-r--r--drivers/net/s2io.c4
-rw-r--r--drivers/net/saa9730.c1
-rw-r--r--drivers/net/sb1000.c2
-rw-r--r--drivers/net/sb1250-mac.c3
-rw-r--r--drivers/net/sc92031.c1
-rw-r--r--drivers/net/seeq8005.c1
-rw-r--r--drivers/net/sgiseeq.c3
-rw-r--r--drivers/net/sis190.c1
-rw-r--r--drivers/net/sis900.c3
-rw-r--r--drivers/net/sk98lin/skge.c12
-rw-r--r--drivers/net/skfp/skfddi.c3
-rw-r--r--drivers/net/skge.c6
-rw-r--r--drivers/net/sky2.c11
-rw-r--r--drivers/net/slip.c2
-rw-r--r--drivers/net/smc911x.c2
-rw-r--r--drivers/net/smc9194.c1
-rw-r--r--drivers/net/smc91x.c1
-rw-r--r--drivers/net/sonic.c2
-rw-r--r--drivers/net/spider_net.c3
-rw-r--r--drivers/net/starfire.c1
-rw-r--r--drivers/net/sun3_82586.c3
-rw-r--r--drivers/net/sun3lance.c5
-rw-r--r--drivers/net/sunbmac.c1
-rw-r--r--drivers/net/sundance.c1
-rw-r--r--drivers/net/sungem.c9
-rw-r--r--drivers/net/sunhme.c9
-rw-r--r--drivers/net/sunlance.c4
-rw-r--r--drivers/net/sunqe.c3
-rw-r--r--drivers/net/tc35815.c1
-rw-r--r--drivers/net/tg3.c51
-rw-r--r--drivers/net/tlan.c4
-rw-r--r--drivers/net/tokenring/3c359.c11
-rw-r--r--drivers/net/tokenring/ibmtr.c1
-rw-r--r--drivers/net/tokenring/lanstreamer.c7
-rw-r--r--drivers/net/tokenring/olympic.c18
-rw-r--r--drivers/net/tokenring/smctr.c6
-rw-r--r--drivers/net/tokenring/tms380tr.c6
-rw-r--r--drivers/net/tsi108_eth.c1
-rw-r--r--drivers/net/tulip/de2104x.c5
-rw-r--r--drivers/net/tulip/de4x5.c2
-rw-r--r--drivers/net/tulip/dmfe.c12
-rw-r--r--drivers/net/tulip/interrupt.c2
-rw-r--r--drivers/net/tulip/uli526x.c22
-rw-r--r--drivers/net/tulip/winbond-840.c3
-rw-r--r--drivers/net/tulip/xircom_cb.c7
-rw-r--r--drivers/net/tulip/xircom_tulip_cb.c5
-rw-r--r--drivers/net/tun.c46
-rw-r--r--drivers/net/typhoon.c1
-rw-r--r--drivers/net/via-rhine.c1
-rw-r--r--drivers/net/via-velocity.c12
-rw-r--r--drivers/net/wan/cosa.c2
-rw-r--r--drivers/net/wan/cycx_x25.c2
-rw-r--r--drivers/net/wan/dlci.c2
-rw-r--r--drivers/net/wan/dscc4.c3
-rw-r--r--drivers/net/wan/farsync.c2
-rw-r--r--drivers/net/wan/hdlc_cisco.c2
-rw-r--r--drivers/net/wan/hdlc_fr.c5
-rw-r--r--drivers/net/wan/hostess_sv11.c2
-rw-r--r--drivers/net/wan/lmc/lmc_main.c16
-rw-r--r--drivers/net/wan/pc300_drv.c6
-rw-r--r--drivers/net/wan/pc300_tty.c6
-rw-r--r--drivers/net/wan/sbni.c5
-rw-r--r--drivers/net/wan/sealevel.c2
-rw-r--r--drivers/net/wan/syncppp.c2
-rw-r--r--drivers/net/wan/z85230.c4
-rw-r--r--drivers/net/wireless/Kconfig120
-rw-r--r--drivers/net/wireless/airo.c11
-rw-r--r--drivers/net/wireless/arlan-main.c1
-rw-r--r--drivers/net/wireless/atmel.c6
-rw-r--r--drivers/net/wireless/bcm43xx/Kconfig3
-rw-r--r--drivers/net/wireless/bcm43xx/bcm43xx_dma.c3
-rw-r--r--drivers/net/wireless/hostap/Kconfig3
-rw-r--r--drivers/net/wireless/hostap/hostap_80211_rx.c23
-rw-r--r--drivers/net/wireless/hostap/hostap_80211_tx.c25
-rw-r--r--drivers/net/wireless/hostap/hostap_ap.c7
-rw-r--r--drivers/net/wireless/hostap/hostap_hw.c7
-rw-r--r--drivers/net/wireless/hostap/hostap_main.c17
-rw-r--r--drivers/net/wireless/ipw2100.c5
-rw-r--r--drivers/net/wireless/ipw2200.c4
-rw-r--r--drivers/net/wireless/netwave_cs.c1
-rw-r--r--drivers/net/wireless/orinoco.c5
-rw-r--r--drivers/net/wireless/prism54/islpci_eth.c23
-rw-r--r--drivers/net/wireless/ray_cs.c4
-rw-r--r--drivers/net/wireless/strip.c2
-rw-r--r--drivers/net/wireless/wavelan.c9
-rw-r--r--drivers/net/wireless/wavelan_cs.c6
-rw-r--r--drivers/net/wireless/zd1201.c6
-rw-r--r--drivers/net/wireless/zd1211rw/Kconfig3
-rw-r--r--drivers/net/yellowfin.c1
-rw-r--r--drivers/net/znet.c1
-rw-r--r--drivers/parisc/led.c4
-rw-r--r--drivers/s390/net/claw.c2
-rw-r--r--drivers/s390/net/ctcmain.c28
-rw-r--r--drivers/s390/net/lcs.c3
-rw-r--r--drivers/s390/net/netiucv.c21
-rw-r--r--drivers/s390/net/qeth_eddp.c30
-rw-r--r--drivers/s390/net/qeth_main.c45
-rw-r--r--drivers/s390/net/qeth_tso.h14
-rw-r--r--drivers/scsi/scsi_netlink.c5
-rw-r--r--drivers/scsi/scsi_transport_iscsi.c4
-rw-r--r--drivers/usb/atm/usbatm.c10
-rw-r--r--drivers/usb/gadget/ether.c1
-rw-r--r--drivers/usb/net/asix.c8
-rw-r--r--drivers/usb/net/catc.c3
-rw-r--r--drivers/usb/net/gl620a.c2
-rw-r--r--drivers/usb/net/kaweth.c2
-rw-r--r--drivers/usb/net/net1080.c2
-rw-r--r--drivers/usb/net/pegasus.c7
-rw-r--r--drivers/usb/net/rndis_host.c2
-rw-r--r--drivers/usb/net/rtl8150.c1
-rw-r--r--drivers/usb/net/usbnet.c1
-rw-r--r--fs/Kconfig13
-rw-r--r--fs/afs/Makefile7
-rw-r--r--fs/afs/afs.h146
-rw-r--r--fs/afs/afs_cm.h32
-rw-r--r--fs/afs/afs_fs.h48
-rw-r--r--fs/afs/afs_vl.h (renamed from fs/afs/vlclient.h)49
-rw-r--r--fs/afs/cache.c256
-rw-r--r--fs/afs/cache.h12
-rw-r--r--fs/afs/callback.c509
-rw-r--r--fs/afs/cell.c471
-rw-r--r--fs/afs/cell.h78
-rw-r--r--fs/afs/cmservice.c926
-rw-r--r--fs/afs/cmservice.h29
-rw-r--r--fs/afs/dir.c852
-rw-r--r--fs/afs/errors.h34
-rw-r--r--fs/afs/file.c124
-rw-r--r--fs/afs/fsclient.c1528
-rw-r--r--fs/afs/fsclient.h54
-rw-r--r--fs/afs/inode.c248
-rw-r--r--fs/afs/internal.h755
-rw-r--r--fs/afs/kafsasyncd.c255
-rw-r--r--fs/afs/kafsasyncd.h52
-rw-r--r--fs/afs/kafstimod.c205
-rw-r--r--fs/afs/kafstimod.h49
-rw-r--r--fs/afs/main.c262
-rw-r--r--fs/afs/misc.c38
-rw-r--r--fs/afs/mntpt.c141
-rw-r--r--fs/afs/mount.h23
-rw-r--r--fs/afs/proc.c230
-rw-r--r--fs/afs/rxrpc.c782
-rw-r--r--fs/afs/security.c356
-rw-r--r--fs/afs/server.c647
-rw-r--r--fs/afs/server.h102
-rw-r--r--fs/afs/super.c326
-rw-r--r--fs/afs/super.h45
-rw-r--r--fs/afs/transport.h21
-rw-r--r--fs/afs/types.h125
-rw-r--r--fs/afs/use-rtnetlink.c473
-rw-r--r--fs/afs/vlclient.c737
-rw-r--r--fs/afs/vlocation.c1225
-rw-r--r--fs/afs/vnode.c731
-rw-r--r--fs/afs/vnode.h94
-rw-r--r--fs/afs/volume.c290
-rw-r--r--fs/afs/volume.h140
-rw-r--r--fs/compat_ioctl.c18
-rw-r--r--fs/ecryptfs/netlink.c6
-rw-r--r--include/asm-alpha/socket.h2
-rw-r--r--include/asm-alpha/sockios.h3
-rw-r--r--include/asm-arm/div64.h3
-rw-r--r--include/asm-arm/socket.h2
-rw-r--r--include/asm-arm/sockios.h3
-rw-r--r--include/asm-arm26/socket.h2
-rw-r--r--include/asm-arm26/sockios.h3
-rw-r--r--include/asm-avr32/socket.h2
-rw-r--r--include/asm-avr32/sockios.h3
-rw-r--r--include/asm-cris/socket.h2
-rw-r--r--include/asm-cris/sockios.h3
-rw-r--r--include/asm-frv/socket.h2
-rw-r--r--include/asm-frv/sockios.h3
-rw-r--r--include/asm-generic/div64.h7
-rw-r--r--include/asm-h8300/socket.h2
-rw-r--r--include/asm-h8300/sockios.h3
-rw-r--r--include/asm-i386/div64.h4
-rw-r--r--include/asm-i386/socket.h2
-rw-r--r--include/asm-i386/sockios.h3
-rw-r--r--include/asm-ia64/socket.h2
-rw-r--r--include/asm-ia64/sockios.h3
-rw-r--r--include/asm-m32r/socket.h2
-rw-r--r--include/asm-m32r/sockios.h3
-rw-r--r--include/asm-m68k/div64.h3
-rw-r--r--include/asm-m68k/socket.h2
-rw-r--r--include/asm-m68k/sockios.h3
-rw-r--r--include/asm-mips/div64.h11
-rw-r--r--include/asm-mips/socket.h2
-rw-r--r--include/asm-mips/sockios.h3
-rw-r--r--include/asm-parisc/socket.h2
-rw-r--r--include/asm-parisc/sockios.h3
-rw-r--r--include/asm-powerpc/socket.h2
-rw-r--r--include/asm-powerpc/sockios.h3
-rw-r--r--include/asm-s390/socket.h2
-rw-r--r--include/asm-s390/sockios.h3
-rw-r--r--include/asm-sh/socket.h2
-rw-r--r--include/asm-sh/sockios.h3
-rw-r--r--include/asm-sh64/sockios.h3
-rw-r--r--include/asm-sparc/socket.h2
-rw-r--r--include/asm-sparc/sockios.h3
-rw-r--r--include/asm-sparc64/socket.h2
-rw-r--r--include/asm-sparc64/sockios.h3
-rw-r--r--include/asm-um/div64.h1
-rw-r--r--include/asm-v850/socket.h2
-rw-r--r--include/asm-v850/sockios.h3
-rw-r--r--include/asm-x86_64/socket.h2
-rw-r--r--include/asm-x86_64/sockios.h3
-rw-r--r--include/asm-xtensa/div64.h6
-rw-r--r--include/asm-xtensa/socket.h2
-rw-r--r--include/asm-xtensa/sockios.h3
-rw-r--r--include/keys/rxrpc-type.h22
-rw-r--r--include/linux/Kbuild7
-rw-r--r--include/linux/atalk.h4
-rw-r--r--include/linux/dccp.h46
-rw-r--r--include/linux/fib_rules.h15
-rw-r--r--include/linux/hdlc.h4
-rw-r--r--include/linux/icmp.h9
-rw-r--r--include/linux/icmpv6.h9
-rw-r--r--include/linux/if_addr.h1
-rw-r--r--include/linux/if_arp.h9
-rw-r--r--include/linux/if_bridge.h3
-rw-r--r--include/linux/if_ether.h3
-rw-r--r--include/linux/if_link.h1
-rw-r--r--include/linux/if_packet.h1
-rw-r--r--include/linux/if_pppox.h10
-rw-r--r--include/linux/if_tr.h2
-rw-r--r--include/linux/if_vlan.h6
-rw-r--r--include/linux/if_wanpipe_common.h58
-rw-r--r--include/linux/igmp.h21
-rw-r--r--include/linux/in.h1
-rw-r--r--include/linux/in6.h3
-rw-r--r--include/linux/ip.h14
-rw-r--r--include/linux/ipv6.h14
-rw-r--r--include/linux/jhash.h2
-rw-r--r--include/linux/key.h2
-rw-r--r--include/linux/ktime.h6
-rw-r--r--include/linux/net.h2
-rw-r--r--include/linux/netdevice.h9
-rw-r--r--include/linux/netfilter.h12
-rw-r--r--include/linux/netfilter/nf_conntrack_tcp.h5
-rw-r--r--include/linux/netfilter/nfnetlink.h19
-rw-r--r--include/linux/netfilter/nfnetlink_conntrack.h4
-rw-r--r--include/linux/netfilter_bridge.h11
-rw-r--r--include/linux/netfilter_bridge/ebt_802_3.h2
-rw-r--r--include/linux/netfilter_bridge/ebt_arp.h4
-rw-r--r--include/linux/netfilter_ipv4/Kbuild14
-rw-r--r--include/linux/netfilter_ipv4/ip_conntrack.h402
-rw-r--r--include/linux/netfilter_ipv4/ip_conntrack_amanda.h11
-rw-r--r--include/linux/netfilter_ipv4/ip_conntrack_core.h61
-rw-r--r--include/linux/netfilter_ipv4/ip_conntrack_ftp.h44
-rw-r--r--include/linux/netfilter_ipv4/ip_conntrack_h323.h89
-rw-r--r--include/linux/netfilter_ipv4/ip_conntrack_helper.h46
-rw-r--r--include/linux/netfilter_ipv4/ip_conntrack_icmp.h6
-rw-r--r--include/linux/netfilter_ipv4/ip_conntrack_irc.h32
-rw-r--r--include/linux/netfilter_ipv4/ip_conntrack_pptp.h326
-rw-r--r--include/linux/netfilter_ipv4/ip_conntrack_proto_gre.h114
-rw-r--r--include/linux/netfilter_ipv4/ip_conntrack_protocol.h98
-rw-r--r--include/linux/netfilter_ipv4/ip_conntrack_sctp.h6
-rw-r--r--include/linux/netfilter_ipv4/ip_conntrack_sip.h40
-rw-r--r--include/linux/netfilter_ipv4/ip_conntrack_tcp.h6
-rw-r--r--include/linux/netfilter_ipv4/ip_conntrack_tftp.h20
-rw-r--r--include/linux/netfilter_ipv4/ip_conntrack_tuple.h146
-rw-r--r--include/linux/netfilter_ipv4/ip_nat.h79
-rw-r--r--include/linux/netfilter_ipv4/ip_nat_core.h18
-rw-r--r--include/linux/netfilter_ipv4/ip_nat_helper.h33
-rw-r--r--include/linux/netfilter_ipv4/ip_nat_pptp.h11
-rw-r--r--include/linux/netfilter_ipv4/ip_nat_protocol.h74
-rw-r--r--include/linux/netfilter_ipv4/ip_nat_rule.h28
-rw-r--r--include/linux/netfilter_ipv4/ipt_SAME.h2
-rw-r--r--include/linux/netlink.h33
-rw-r--r--include/linux/nl80211.h38
-rw-r--r--include/linux/rtnetlink.h13
-rw-r--r--include/linux/rxrpc.h62
-rw-r--r--include/linux/sctp.h9
-rw-r--r--include/linux/sdla_fr.h638
-rw-r--r--include/linux/skbuff.h392
-rw-r--r--include/linux/socket.h5
-rw-r--r--include/linux/sysctl.h4
-rw-r--r--include/linux/tcp.h21
-rw-r--r--include/linux/udp.h9
-rw-r--r--include/linux/workqueue.h7
-rw-r--r--include/linux/xfrm.h25
-rw-r--r--include/net/addrconf.h4
-rw-r--r--include/net/af_rxrpc.h57
-rw-r--r--include/net/ax25.h2
-rw-r--r--include/net/bluetooth/hci.h18
-rw-r--r--include/net/cfg80211.h40
-rw-r--r--include/net/cipso_ipv4.h2
-rw-r--r--include/net/compat.h1
-rw-r--r--include/net/dn_fib.h9
-rw-r--r--include/net/dn_route.h1
-rw-r--r--include/net/esp.h2
-rw-r--r--include/net/fib_rules.h20
-rw-r--r--include/net/inet6_hashtables.h12
-rw-r--r--include/net/inet_ecn.h8
-rw-r--r--include/net/inet_sock.h11
-rw-r--r--include/net/ip.h11
-rw-r--r--include/net/ip6_fib.h2
-rw-r--r--include/net/ip6_route.h5
-rw-r--r--include/net/ip_fib.h6
-rw-r--r--include/net/ipv6.h19
-rw-r--r--include/net/ipx.h2
-rw-r--r--include/net/iw_handler.h21
-rw-r--r--include/net/llc_pdu.h15
-rw-r--r--include/net/neighbour.h10
-rw-r--r--include/net/netfilter/nf_conntrack.h5
-rw-r--r--include/net/netfilter/nf_conntrack_compat.h145
-rw-r--r--include/net/netfilter/nf_conntrack_core.h3
-rw-r--r--include/net/netfilter/nf_conntrack_ecache.h30
-rw-r--r--include/net/netfilter/nf_conntrack_l3proto.h5
-rw-r--r--include/net/netfilter/nf_conntrack_l4proto.h1
-rw-r--r--include/net/netfilter/nf_nat_rule.h10
-rw-r--r--include/net/netlink.h18
-rw-r--r--include/net/pkt_cls.h10
-rw-r--r--include/net/pkt_sched.h182
-rw-r--r--include/net/red.h10
-rw-r--r--include/net/rtnetlink.h25
-rw-r--r--include/net/sch_generic.h12
-rw-r--r--include/net/sctp/constants.h2
-rw-r--r--include/net/sctp/structs.h5
-rw-r--r--include/net/sctp/ulpevent.h1
-rw-r--r--include/net/sctp/ulpqueue.h2
-rw-r--r--include/net/sctp/user.h25
-rw-r--r--include/net/sock.h89
-rw-r--r--include/net/tcp.h190
-rw-r--r--include/net/tcp_ecn.h17
-rw-r--r--include/net/udp.h11
-rw-r--r--include/net/udplite.h45
-rw-r--r--include/net/wext.h24
-rw-r--r--include/net/wireless.h139
-rw-r--r--include/net/x25device.h2
-rw-r--r--include/net/xfrm.h10
-rw-r--r--include/rxrpc/call.h212
-rw-r--r--include/rxrpc/connection.h83
-rw-r--r--include/rxrpc/krxiod.h27
-rw-r--r--include/rxrpc/krxsecd.h22
-rw-r--r--include/rxrpc/krxtimod.h45
-rw-r--r--include/rxrpc/message.h71
-rw-r--r--include/rxrpc/packet.h119
-rw-r--r--include/rxrpc/peer.h82
-rw-r--r--include/rxrpc/rxrpc.h36
-rw-r--r--include/rxrpc/transport.h106
-rw-r--r--kernel/audit.c16
-rw-r--r--kernel/hrtimer.c1
-rw-r--r--kernel/taskstats.c4
-rw-r--r--kernel/time.c2
-rw-r--r--kernel/timer.c2
-rw-r--r--lib/Makefile5
-rw-r--r--lib/div64.c22
-rw-r--r--lib/kobject_uevent.c2
-rw-r--r--net/802/fddi.c7
-rw-r--r--net/802/hippi.c12
-rw-r--r--net/802/psnap.c4
-rw-r--r--net/802/tr.c9
-rw-r--r--net/8021q/vlan.c6
-rw-r--r--net/8021q/vlan_dev.c14
-rw-r--r--net/Kconfig19
-rw-r--r--net/Makefile3
-rw-r--r--net/appletalk/aarp.c14
-rw-r--r--net/appletalk/ddp.c46
-rw-r--r--net/atm/br2684.c8
-rw-r--r--net/atm/clip.c4
-rw-r--r--net/atm/ioctl.c3
-rw-r--r--net/atm/lec.c15
-rw-r--r--net/atm/mpc.c17
-rw-r--r--net/atm/signaling.c2
-rw-r--r--net/ax25/af_ax25.c113
-rw-r--r--net/ax25/ax25_ds_subr.c2
-rw-r--r--net/ax25/ax25_in.c24
-rw-r--r--net/ax25/ax25_ip.c4
-rw-r--r--net/ax25/ax25_out.c12
-rw-r--r--net/ax25/ax25_subr.c4
-rw-r--r--net/bluetooth/af_bluetooth.c2
-rw-r--r--net/bluetooth/bnep/core.c16
-rw-r--r--net/bluetooth/cmtp/core.c4
-rw-r--r--net/bluetooth/hci_conn.c36
-rw-r--r--net/bluetooth/hci_core.c35
-rw-r--r--net/bluetooth/hci_event.c8
-rw-r--r--net/bluetooth/hci_sock.c2
-rw-r--r--net/bluetooth/l2cap.c76
-rw-r--r--net/bluetooth/rfcomm/core.c6
-rw-r--r--net/bluetooth/sco.c2
-rw-r--r--net/bridge/br.c12
-rw-r--r--net/bridge/br_device.c22
-rw-r--r--net/bridge/br_fdb.c42
-rw-r--r--net/bridge/br_forward.c2
-rw-r--r--net/bridge/br_if.c6
-rw-r--r--net/bridge/br_input.c51
-rw-r--r--net/bridge/br_ioctl.c5
-rw-r--r--net/bridge/br_netfilter.c142
-rw-r--r--net/bridge/br_netlink.c24
-rw-r--r--net/bridge/br_notify.c13
-rw-r--r--net/bridge/br_private.h23
-rw-r--r--net/bridge/br_stp.c10
-rw-r--r--net/bridge/br_stp_bpdu.c19
-rw-r--r--net/bridge/br_stp_if.c59
-rw-r--r--net/bridge/br_sysfs_br.c20
-rw-r--r--net/bridge/br_sysfs_if.c8
-rw-r--r--net/bridge/netfilter/ebt_arp.c48
-rw-r--r--net/bridge/netfilter/ebt_log.c12
-rw-r--r--net/bridge/netfilter/ebt_ulog.c12
-rw-r--r--net/compat.c79
-rw-r--r--net/core/Makefile1
-rw-r--r--net/core/datagram.c60
-rw-r--r--net/core/dev.c328
-rw-r--r--net/core/dev_mcast.c2
-rw-r--r--net/core/ethtool.c4
-rw-r--r--net/core/fib_rules.c161
-rw-r--r--net/core/filter.c6
-rw-r--r--net/core/gen_stats.c4
-rw-r--r--net/core/link_watch.c2
-rw-r--r--net/core/neighbour.c34
-rw-r--r--net/core/net-sysfs.c4
-rw-r--r--net/core/netpoll.c23
-rw-r--r--net/core/pktgen.c299
-rw-r--r--net/core/rtnetlink.c298
-rw-r--r--net/core/skbuff.c467
-rw-r--r--net/core/sock.c775
-rw-r--r--net/core/sysctl_net_core.c8
-rw-r--r--net/core/user_dma.c25
-rw-r--r--net/core/utils.c6
-rw-r--r--net/dccp/ackvec.c2
-rw-r--r--net/dccp/ccids/ccid3.c322
-rw-r--r--net/dccp/ccids/ccid3.h10
-rw-r--r--net/dccp/ccids/lib/loss_interval.c2
-rw-r--r--net/dccp/dccp.h75
-rw-r--r--net/dccp/input.c54
-rw-r--r--net/dccp/ipv4.c43
-rw-r--r--net/dccp/ipv6.c40
-rw-r--r--net/dccp/minisocks.c2
-rw-r--r--net/dccp/options.c18
-rw-r--r--net/dccp/output.c3
-rw-r--r--net/dccp/probe.c17
-rw-r--r--net/decnet/af_decnet.c1
-rw-r--r--net/decnet/dn_dev.c31
-rw-r--r--net/decnet/dn_fib.c8
-rw-r--r--net/decnet/dn_neigh.c6
-rw-r--r--net/decnet/dn_nsp_in.c7
-rw-r--r--net/decnet/dn_nsp_out.c8
-rw-r--r--net/decnet/dn_route.c28
-rw-r--r--net/decnet/dn_rules.c6
-rw-r--r--net/decnet/dn_table.c11
-rw-r--r--net/decnet/netfilter/dn_rtmsg.c8
-rw-r--r--net/econet/af_econet.c15
-rw-r--r--net/ethernet/eth.c5
-rw-r--r--net/ieee80211/Kconfig3
-rw-r--r--net/ieee80211/ieee80211_crypt_wep.c2
-rw-r--r--net/ieee80211/ieee80211_rx.c21
-rw-r--r--net/ieee80211/ieee80211_tx.c12
-rw-r--r--net/ipv4/Kconfig27
-rw-r--r--net/ipv4/Makefile2
-rw-r--r--net/ipv4/af_inet.c148
-rw-r--r--net/ipv4/ah4.c14
-rw-r--r--net/ipv4/arp.c16
-rw-r--r--net/ipv4/cipso_ipv4.c4
-rw-r--r--net/ipv4/devinet.c37
-rw-r--r--net/ipv4/esp4.c59
-rw-r--r--net/ipv4/fib_frontend.c21
-rw-r--r--net/ipv4/fib_hash.c2
-rw-r--r--net/ipv4/fib_rules.c11
-rw-r--r--net/ipv4/fib_semantics.c2
-rw-r--r--net/ipv4/fib_trie.c51
-rw-r--r--net/ipv4/icmp.c31
-rw-r--r--net/ipv4/igmp.c43
-rw-r--r--net/ipv4/inet_diag.c90
-rw-r--r--net/ipv4/inetpeer.c38
-rw-r--r--net/ipv4/ip_forward.c14
-rw-r--r--net/ipv4/ip_fragment.c47
-rw-r--r--net/ipv4/ip_gre.c63
-rw-r--r--net/ipv4/ip_input.c24
-rw-r--r--net/ipv4/ip_options.c26
-rw-r--r--net/ipv4/ip_output.c123
-rw-r--r--net/ipv4/ip_sockglue.c1169
-rw-r--r--net/ipv4/ipcomp.c58
-rw-r--r--net/ipv4/ipconfig.c19
-rw-r--r--net/ipv4/ipip.c60
-rw-r--r--net/ipv4/ipmr.c418
-rw-r--r--net/ipv4/ipvs/ip_vs_app.c14
-rw-r--r--net/ipv4/ipvs/ip_vs_core.c56
-rw-r--r--net/ipv4/ipvs/ip_vs_dh.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_ftp.c8
-rw-r--r--net/ipv4/ipvs/ip_vs_lblc.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_lblcr.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_ah.c16
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_tcp.c24
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_udp.c26
-rw-r--r--net/ipv4/ipvs/ip_vs_sh.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_xmit.c44
-rw-r--r--net/ipv4/multipath_drr.c2
-rw-r--r--net/ipv4/netfilter.c8
-rw-r--r--net/ipv4/netfilter/Kconfig267
-rw-r--r--net/ipv4/netfilter/Makefile45
-rw-r--r--net/ipv4/netfilter/arp_tables.c4
-rw-r--r--net/ipv4/netfilter/arpt_mangle.c12
-rw-r--r--net/ipv4/netfilter/ip_conntrack_amanda.c229
-rw-r--r--net/ipv4/netfilter/ip_conntrack_core.c1550
-rw-r--r--net/ipv4/netfilter/ip_conntrack_ftp.c520
-rw-r--r--net/ipv4/netfilter/ip_conntrack_helper_h323.c1841
-rw-r--r--net/ipv4/netfilter/ip_conntrack_helper_pptp.c684
-rw-r--r--net/ipv4/netfilter/ip_conntrack_irc.c314
-rw-r--r--net/ipv4/netfilter/ip_conntrack_netbios_ns.c143
-rw-r--r--net/ipv4/netfilter/ip_conntrack_netlink.c1577
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_generic.c74
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_gre.c328
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_icmp.c315
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_sctp.c659
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_tcp.c1164
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_udp.c148
-rw-r--r--net/ipv4/netfilter/ip_conntrack_sip.c520
-rw-r--r--net/ipv4/netfilter/ip_conntrack_standalone.c962
-rw-r--r--net/ipv4/netfilter/ip_conntrack_tftp.c161
-rw-r--r--net/ipv4/netfilter/ip_nat_amanda.c85
-rw-r--r--net/ipv4/netfilter/ip_nat_core.c634
-rw-r--r--net/ipv4/netfilter/ip_nat_ftp.c180
-rw-r--r--net/ipv4/netfilter/ip_nat_helper.c436
-rw-r--r--net/ipv4/netfilter/ip_nat_helper_h323.c611
-rw-r--r--net/ipv4/netfilter/ip_nat_helper_pptp.c350
-rw-r--r--net/ipv4/netfilter/ip_nat_irc.c122
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_gre.c174
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_icmp.c87
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_tcp.c154
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_udp.c144
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_unknown.c55
-rw-r--r--net/ipv4/netfilter/ip_nat_rule.c314
-rw-r--r--net/ipv4/netfilter/ip_nat_sip.c282
-rw-r--r--net/ipv4/netfilter/ip_nat_snmp_basic.c1333
-rw-r--r--net/ipv4/netfilter/ip_nat_standalone.c388
-rw-r--r--net/ipv4/netfilter/ip_nat_tftp.c70
-rw-r--r--net/ipv4/netfilter/ip_queue.c28
-rw-r--r--net/ipv4/netfilter/ip_tables.c12
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c24
-rw-r--r--net/ipv4/netfilter/ipt_ECN.c15
-rw-r--r--net/ipv4/netfilter/ipt_LOG.c16
-rw-r--r--net/ipv4/netfilter/ipt_MASQUERADE.c57
-rw-r--r--net/ipv4/netfilter/ipt_NETMAP.c26
-rw-r--r--net/ipv4/netfilter/ipt_REDIRECT.c24
-rw-r--r--net/ipv4/netfilter/ipt_REJECT.c45
-rw-r--r--net/ipv4/netfilter/ipt_SAME.c40
-rw-r--r--net/ipv4/netfilter/ipt_TOS.c4
-rw-r--r--net/ipv4/netfilter/ipt_TTL.c2
-rw-r--r--net/ipv4/netfilter/ipt_ULOG.c77
-rw-r--r--net/ipv4/netfilter/ipt_addrtype.c2
-rw-r--r--net/ipv4/netfilter/ipt_ecn.c10
-rw-r--r--net/ipv4/netfilter/ipt_iprange.c2
-rw-r--r--net/ipv4/netfilter/ipt_recent.c6
-rw-r--r--net/ipv4/netfilter/ipt_tos.c2
-rw-r--r--net/ipv4/netfilter/ipt_ttl.c11
-rw-r--r--net/ipv4/netfilter/iptable_filter.c3
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c30
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c27
-rw-r--r--net/ipv4/netfilter/nf_conntrack_proto_icmp.c11
-rw-r--r--net/ipv4/netfilter/nf_nat_core.c14
-rw-r--r--net/ipv4/netfilter/nf_nat_h323.c14
-rw-r--r--net/ipv4/netfilter/nf_nat_helper.c76
-rw-r--r--net/ipv4/netfilter/nf_nat_pptp.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_rule.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_sip.c11
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c8
-rw-r--r--net/ipv4/netfilter/nf_nat_standalone.c18
-rw-r--r--net/ipv4/proc.c41
-rw-r--r--net/ipv4/protocol.c2
-rw-r--r--net/ipv4/raw.c18
-rw-r--r--net/ipv4/route.c29
-rw-r--r--net/ipv4/syncookies.c40
-rw-r--r--net/ipv4/sysctl_net_ipv4.c16
-rw-r--r--net/ipv4/tcp.c131
-rw-r--r--net/ipv4/tcp_bic.c2
-rw-r--r--net/ipv4/tcp_cong.c45
-rw-r--r--net/ipv4/tcp_cubic.c81
-rw-r--r--net/ipv4/tcp_htcp.c2
-rw-r--r--net/ipv4/tcp_hybla.c2
-rw-r--r--net/ipv4/tcp_illinois.c356
-rw-r--r--net/ipv4/tcp_input.c642
-rw-r--r--net/ipv4/tcp_ipv4.c143
-rw-r--r--net/ipv4/tcp_lp.c8
-rw-r--r--net/ipv4/tcp_minisocks.c29
-rw-r--r--net/ipv4/tcp_output.c198
-rw-r--r--net/ipv4/tcp_probe.c68
-rw-r--r--net/ipv4/tcp_timer.c10
-rw-r--r--net/ipv4/tcp_vegas.c57
-rw-r--r--net/ipv4/tcp_vegas.h24
-rw-r--r--net/ipv4/tcp_veno.c10
-rw-r--r--net/ipv4/tcp_westwood.c21
-rw-r--r--net/ipv4/tcp_yeah.c268
-rw-r--r--net/ipv4/tcp_yeah.h7
-rw-r--r--net/ipv4/udp.c238
-rw-r--r--net/ipv4/udplite.c2
-rw-r--r--net/ipv4/xfrm4_input.c23
-rw-r--r--net/ipv4/xfrm4_mode_beet.c37
-rw-r--r--net/ipv4/xfrm4_mode_transport.c28
-rw-r--r--net/ipv4/xfrm4_mode_tunnel.c31
-rw-r--r--net/ipv4/xfrm4_output.c3
-rw-r--r--net/ipv4/xfrm4_policy.c8
-rw-r--r--net/ipv4/xfrm4_tunnel.c3
-rw-r--r--net/ipv6/Kconfig10
-rw-r--r--net/ipv6/Makefile5
-rw-r--r--net/ipv6/addrconf.c240
-rw-r--r--net/ipv6/af_inet6.c88
-rw-r--r--net/ipv6/ah6.c34
-rw-r--r--net/ipv6/datagram.c63
-rw-r--r--net/ipv6/esp6.c52
-rw-r--r--net/ipv6/exthdrs.c118
-rw-r--r--net/ipv6/fib6_rules.c39
-rw-r--r--net/ipv6/icmp.c48
-rw-r--r--net/ipv6/ip6_fib.c4
-rw-r--r--net/ipv6/ip6_input.c18
-rw-r--r--net/ipv6/ip6_output.c187
-rw-r--r--net/ipv6/ip6_tunnel.c643
-rw-r--r--net/ipv6/ipcomp6.c16
-rw-r--r--net/ipv6/ipv6_sockglue.c46
-rw-r--r--net/ipv6/ipv6_syms.c36
-rw-r--r--net/ipv6/mcast.c46
-rw-r--r--net/ipv6/mip6.c62
-rw-r--r--net/ipv6/ndisc.c435
-rw-r--r--net/ipv6/netfilter.c8
-rw-r--r--net/ipv6/netfilter/ip6_queue.c28
-rw-r--r--net/ipv6/netfilter/ip6_tables.c17
-rw-r--r--net/ipv6/netfilter/ip6t_HL.c2
-rw-r--r--net/ipv6/netfilter/ip6t_LOG.c21
-rw-r--r--net/ipv6/netfilter/ip6t_REJECT.c11
-rw-r--r--net/ipv6/netfilter/ip6t_eui64.c8
-rw-r--r--net/ipv6/netfilter/ip6t_hl.c2
-rw-r--r--net/ipv6/netfilter/ip6t_ipv6header.c2
-rw-r--r--net/ipv6/netfilter/ip6table_filter.c2
-rw-r--r--net/ipv6/netfilter/ip6table_mangle.c18
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c30
-rw-r--r--net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c7
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c59
-rw-r--r--net/ipv6/proc.c61
-rw-r--r--net/ipv6/protocol.c4
-rw-r--r--net/ipv6/raw.c52
-rw-r--r--net/ipv6/reassembly.c62
-rw-r--r--net/ipv6/route.c24
-rw-r--r--net/ipv6/sit.c58
-rw-r--r--net/ipv6/tcp_ipv6.c120
-rw-r--r--net/ipv6/udp.c123
-rw-r--r--net/ipv6/udplite.c2
-rw-r--r--net/ipv6/xfrm6_input.c18
-rw-r--r--net/ipv6/xfrm6_mode_beet.c27
-rw-r--r--net/ipv6/xfrm6_mode_ro.c7
-rw-r--r--net/ipv6/xfrm6_mode_transport.c20
-rw-r--r--net/ipv6/xfrm6_mode_tunnel.c36
-rw-r--r--net/ipv6/xfrm6_output.c6
-rw-r--r--net/ipv6/xfrm6_policy.c25
-rw-r--r--net/ipv6/xfrm6_tunnel.c2
-rw-r--r--net/ipx/af_ipx.c8
-rw-r--r--net/ipx/ipx_route.c4
-rw-r--r--net/irda/af_irda.c136
-rw-r--r--net/irda/ircomm/ircomm_param.c4
-rw-r--r--net/irda/irda_device.c21
-rw-r--r--net/irda/irlan/irlan_common.c2
-rw-r--r--net/irda/irlan/irlan_eth.c3
-rw-r--r--net/irda/irlap_event.c2
-rw-r--r--net/irda/irlap_frame.c18
-rw-r--r--net/irda/irqueue.c9
-rw-r--r--net/irda/irttp.c10
-rw-r--r--net/irda/parameters.c8
-rw-r--r--net/irda/qos.c14
-rw-r--r--net/irda/wrapper.c5
-rw-r--r--net/iucv/af_iucv.c6
-rw-r--r--net/iucv/iucv.c2
-rw-r--r--net/key/af_key.c4
-rw-r--r--net/llc/llc_input.c2
-rw-r--r--net/llc/llc_output.c8
-rw-r--r--net/llc/llc_sap.c5
-rw-r--r--net/netfilter/Kconfig63
-rw-r--r--net/netfilter/core.c21
-rw-r--r--net/netfilter/nf_conntrack_core.c58
-rw-r--r--net/netfilter/nf_conntrack_ecache.c23
-rw-r--r--net/netfilter/nf_conntrack_expect.c4
-rw-r--r--net/netfilter/nf_conntrack_ftp.c6
-rw-r--r--net/netfilter/nf_conntrack_netbios_ns.c2
-rw-r--r--net/netfilter/nf_conntrack_netlink.c66
-rw-r--r--net/netfilter/nf_conntrack_proto.c144
-rw-r--r--net/netfilter/nf_conntrack_proto_generic.c5
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c9
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c88
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c5
-rw-r--r--net/netfilter/nf_conntrack_standalone.c11
-rw-r--r--net/netfilter/nfnetlink.c197
-rw-r--r--net/netfilter/nfnetlink_log.c108
-rw-r--r--net/netfilter/nfnetlink_queue.c20
-rw-r--r--net/netfilter/x_tables.c26
-rw-r--r--net/netfilter/xt_CONNMARK.c32
-rw-r--r--net/netfilter/xt_CONNSECMARK.c18
-rw-r--r--net/netfilter/xt_DSCP.c10
-rw-r--r--net/netfilter/xt_NOTRACK.c4
-rw-r--r--net/netfilter/xt_TCPMSS.c12
-rw-r--r--net/netfilter/xt_connbytes.c35
-rw-r--r--net/netfilter/xt_connmark.c17
-rw-r--r--net/netfilter/xt_conntrack.c110
-rw-r--r--net/netfilter/xt_dscp.c6
-rw-r--r--net/netfilter/xt_hashlimit.c14
-rw-r--r--net/netfilter/xt_helper.c60
-rw-r--r--net/netfilter/xt_length.c5
-rw-r--r--net/netfilter/xt_limit.c7
-rw-r--r--net/netfilter/xt_mac.c4
-rw-r--r--net/netfilter/xt_pkttype.c2
-rw-r--r--net/netfilter/xt_realm.c2
-rw-r--r--net/netfilter/xt_state.c4
-rw-r--r--net/netlink/af_netlink.c98
-rw-r--r--net/netlink/attr.c5
-rw-r--r--net/netlink/genetlink.c66
-rw-r--r--net/netrom/af_netrom.c115
-rw-r--r--net/netrom/nr_dev.c4
-rw-r--r--net/netrom/nr_in.c6
-rw-r--r--net/netrom/nr_loopback.c4
-rw-r--r--net/netrom/nr_out.c8
-rw-r--r--net/netrom/nr_subr.c4
-rw-r--r--net/packet/af_packet.c94
-rw-r--r--net/rose/af_rose.c70
-rw-r--r--net/rose/rose_loopback.c2
-rw-r--r--net/rose/rose_route.c2
-rw-r--r--net/rxrpc/Kconfig37
-rw-r--r--net/rxrpc/Makefile40
-rw-r--r--net/rxrpc/af_rxrpc.c879
-rw-r--r--net/rxrpc/ar-accept.c504
-rw-r--r--net/rxrpc/ar-ack.c1250
-rw-r--r--net/rxrpc/ar-call.c804
-rw-r--r--net/rxrpc/ar-connection.c911
-rw-r--r--net/rxrpc/ar-connevent.c403
-rw-r--r--net/rxrpc/ar-error.c253
-rw-r--r--net/rxrpc/ar-input.c797
-rw-r--r--net/rxrpc/ar-internal.h808
-rw-r--r--net/rxrpc/ar-key.c334
-rw-r--r--net/rxrpc/ar-local.c309
-rw-r--r--net/rxrpc/ar-output.c734
-rw-r--r--net/rxrpc/ar-peer.c273
-rw-r--r--net/rxrpc/ar-proc.c247
-rw-r--r--net/rxrpc/ar-recvmsg.c437
-rw-r--r--net/rxrpc/ar-security.c258
-rw-r--r--net/rxrpc/ar-skbuff.c132
-rw-r--r--net/rxrpc/ar-transport.c276
-rw-r--r--net/rxrpc/call.c2277
-rw-r--r--net/rxrpc/connection.c777
-rw-r--r--net/rxrpc/internal.h106
-rw-r--r--net/rxrpc/krxiod.c262
-rw-r--r--net/rxrpc/krxsecd.c270
-rw-r--r--net/rxrpc/krxtimod.c204
-rw-r--r--net/rxrpc/main.c180
-rw-r--r--net/rxrpc/peer.c398
-rw-r--r--net/rxrpc/proc.c617
-rw-r--r--net/rxrpc/rxkad.c1153
-rw-r--r--net/rxrpc/rxrpc_syms.c34
-rw-r--r--net/rxrpc/sysctl.c121
-rw-r--r--net/rxrpc/transport.c846
-rw-r--r--net/sched/Kconfig56
-rw-r--r--net/sched/act_api.c81
-rw-r--r--net/sched/act_gact.c5
-rw-r--r--net/sched/act_ipt.c5
-rw-r--r--net/sched/act_mirred.c5
-rw-r--r--net/sched/act_pedit.c7
-rw-r--r--net/sched/act_police.c34
-rw-r--r--net/sched/act_simple.c5
-rw-r--r--net/sched/cls_api.c36
-rw-r--r--net/sched/cls_basic.c7
-rw-r--r--net/sched/cls_fw.c7
-rw-r--r--net/sched/cls_route.c11
-rw-r--r--net/sched/cls_rsvp.c1
-rw-r--r--net/sched/cls_rsvp.h12
-rw-r--r--net/sched/cls_rsvp6.c1
-rw-r--r--net/sched/cls_tcindex.c9
-rw-r--r--net/sched/cls_u32.c13
-rw-r--r--net/sched/em_u32.c2
-rw-r--r--net/sched/ematch.c17
-rw-r--r--net/sched/sch_api.c227
-rw-r--r--net/sched/sch_atm.c28
-rw-r--r--net/sched/sch_cbq.c207
-rw-r--r--net/sched/sch_dsmark.c22
-rw-r--r--net/sched/sch_generic.c35
-rw-r--r--net/sched/sch_hfsc.c109
-rw-r--r--net/sched/sch_htb.c130
-rw-r--r--net/sched/sch_ingress.c27
-rw-r--r--net/sched/sch_netem.c108
-rw-r--r--net/sched/sch_prio.c14
-rw-r--r--net/sched/sch_sfq.c9
-rw-r--r--net/sched/sch_tbf.c47
-rw-r--r--net/sched/sch_teql.c2
-rw-r--r--net/sctp/associola.c14
-rw-r--r--net/sctp/debug.c5
-rw-r--r--net/sctp/input.c51
-rw-r--r--net/sctp/inqueue.c8
-rw-r--r--net/sctp/ipv6.c36
-rw-r--r--net/sctp/output.c2
-rw-r--r--net/sctp/outqueue.c12
-rw-r--r--net/sctp/protocol.c20
-rw-r--r--net/sctp/sm_make_chunk.c12
-rw-r--r--net/sctp/sm_sideeffect.c16
-rw-r--r--net/sctp/sm_statefuns.c30
-rw-r--r--net/sctp/sm_statetable.c2
-rw-r--r--net/sctp/socket.c267
-rw-r--r--net/sctp/transport.c2
-rw-r--r--net/sctp/ulpevent.c49
-rw-r--r--net/sctp/ulpqueue.c173
-rw-r--r--net/socket.c33
-rw-r--r--net/sunrpc/cache.c10
-rw-r--r--net/sunrpc/socklib.c2
-rw-r--r--net/sunrpc/svcsock.c10
-rw-r--r--net/tipc/config.c2
-rw-r--r--net/tipc/eth_media.c8
-rw-r--r--net/tipc/link.c48
-rw-r--r--net/tipc/msg.h18
-rw-r--r--net/tipc/netlink.c2
-rw-r--r--net/tipc/port.c8
-rw-r--r--net/tipc/socket.c2
-rw-r--r--net/unix/af_unix.c2
-rw-r--r--net/wanrouter/wanmain.c6
-rw-r--r--net/wireless/Kconfig16
-rw-r--r--net/wireless/Makefile4
-rw-r--r--net/wireless/core.c224
-rw-r--r--net/wireless/core.h49
-rw-r--r--net/wireless/sysfs.c80
-rw-r--r--net/wireless/sysfs.h9
-rw-r--r--net/wireless/wext.c (renamed from net/core/wireless.c)1122
-rw-r--r--net/x25/af_x25.c22
-rw-r--r--net/x25/x25_dev.c4
-rw-r--r--net/x25/x25_in.c14
-rw-r--r--net/x25/x25_out.c6
-rw-r--r--net/xfrm/xfrm_algo.c191
-rw-r--r--net/xfrm/xfrm_input.c6
-rw-r--r--net/xfrm/xfrm_policy.c14
-rw-r--r--net/xfrm/xfrm_state.c60
-rw-r--r--net/xfrm/xfrm_user.c179
-rw-r--r--security/keys/keyring.c2
-rw-r--r--security/selinux/hooks.c6
-rw-r--r--security/selinux/netlink.c4
1035 files changed, 32695 insertions, 43534 deletions
diff --git a/CREDITS b/CREDITS
index 6bd8ab86b5bd..dede114d046e 100644
--- a/CREDITS
+++ b/CREDITS
@@ -317,6 +317,12 @@ S: 2322 37th Ave SW
317S: Seattle, Washington 98126-2010 317S: Seattle, Washington 98126-2010
318S: USA 318S: USA
319 319
320N: Johannes Berg
321E: johannes@sipsolutions.net
322W: http://johannes.sipsolutions.net/
323P: 1024D/9AB78CA5 AD02 0176 4E29 C137 1DF6 08D2 FC44 CF86 9AB7 8CA5
324D: powerpc & 802.11 hacker
325
320N: Stephen R. van den Berg (AKA BuGless) 326N: Stephen R. van den Berg (AKA BuGless)
321E: berg@pool.informatik.rwth-aachen.de 327E: berg@pool.informatik.rwth-aachen.de
322D: General kernel, gcc, and libc hacker 328D: General kernel, gcc, and libc hacker
@@ -2286,14 +2292,14 @@ S: D-90453 Nuernberg
2286S: Germany 2292S: Germany
2287 2293
2288N: Arnaldo Carvalho de Melo 2294N: Arnaldo Carvalho de Melo
2289E: acme@mandriva.com
2290E: acme@ghostprotocols.net 2295E: acme@ghostprotocols.net
2296E: arnaldo.melo@gmail.com
2297E: acme@redhat.com
2291W: http://oops.ghostprotocols.net:81/blog/ 2298W: http://oops.ghostprotocols.net:81/blog/
2292P: 1024D/9224DF01 D5DF E3BB E3C8 BCBB F8AD 841A B6AB 4681 9224 DF01 2299P: 1024D/9224DF01 D5DF E3BB E3C8 BCBB F8AD 841A B6AB 4681 9224 DF01
2293D: IPX, LLC, DCCP, cyc2x, wl3501_cs, net/ hacks 2300D: IPX, LLC, DCCP, cyc2x, wl3501_cs, net/ hacks
2294S: Mandriva 2301S: R. Brasílio Itiberê, 4270/1010 - Água Verde
2295S: R. Tocantins, 89 - Cristo Rei 2302S: 80240-060 - Curitiba - Paraná
2296S: 80050-430 - Curitiba - Paraná
2297S: Brazil 2303S: Brazil
2298 2304
2299N: Karsten Merker 2305N: Karsten Merker
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 19b4c96b2a49..6da663607f7b 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -211,15 +211,6 @@ Who: Adrian Bunk <bunk@stusta.de>
211 211
212--------------------------- 212---------------------------
213 213
214What: IPv4 only connection tracking/NAT/helpers
215When: 2.6.22
216Why: The new layer 3 independant connection tracking replaces the old
217 IPv4 only version. After some stabilization of the new code the
218 old one will be removed.
219Who: Patrick McHardy <kaber@trash.net>
220
221---------------------------
222
223What: ACPI hooks (X86_SPEEDSTEP_CENTRINO_ACPI) in speedstep-centrino driver 214What: ACPI hooks (X86_SPEEDSTEP_CENTRINO_ACPI) in speedstep-centrino driver
224When: December 2006 215When: December 2006
225Why: Speedstep-centrino driver with ACPI hooks and acpi-cpufreq driver are 216Why: Speedstep-centrino driver with ACPI hooks and acpi-cpufreq driver are
@@ -294,18 +285,6 @@ Who: Richard Purdie <rpurdie@rpsys.net>
294 285
295--------------------------- 286---------------------------
296 287
297What: Wireless extensions over netlink (CONFIG_NET_WIRELESS_RTNETLINK)
298When: with the merge of wireless-dev, 2.6.22 or later
299Why: The option/code is
300 * not enabled on most kernels
301 * not required by any userspace tools (except an experimental one,
302 and even there only for some parts, others use ioctl)
303 * pointless since wext is no longer evolving and the ioctl
304 interface needs to be kept
305Who: Johannes Berg <johannes@sipsolutions.net>
306
307---------------------------
308
309What: i8xx_tco watchdog driver 288What: i8xx_tco watchdog driver
310When: in 2.6.22 289When: in 2.6.22
311Why: the i8xx_tco watchdog driver has been replaced by the iTCO_wdt 290Why: the i8xx_tco watchdog driver has been replaced by the iTCO_wdt
@@ -313,3 +292,22 @@ Why: the i8xx_tco watchdog driver has been replaced by the iTCO_wdt
313Who: Wim Van Sebroeck <wim@iguana.be> 292Who: Wim Van Sebroeck <wim@iguana.be>
314 293
315--------------------------- 294---------------------------
295
296What: Multipath cached routing support in ipv4
297When: in 2.6.23
298Why: Code was merged, then submitter immediately disappeared leaving
299 us with no maintainer and lots of bugs. The code should not have
300 been merged in the first place, and many aspects of it's
301 implementation are blocking more critical core networking
302 development. It's marked EXPERIMENTAL and no distribution
303 enables it because it cause obscure crashes due to unfixable bugs
304 (interfaces don't return errors so memory allocation can't be
305 handled, calling contexts of these interfaces make handling
306 errors impossible too because they get called after we've
307 totally commited to creating a route object, for example).
308 This problem has existed for years and no forward progress
309 has ever been made, and nobody steps up to try and salvage
310 this code, so we're going to finally just get rid of it.
311Who: David S. Miller <davem@davemloft.net>
312
313---------------------------
diff --git a/Documentation/filesystems/afs.txt b/Documentation/filesystems/afs.txt
index 2f4237dfb8c7..12ad6c7f4e50 100644
--- a/Documentation/filesystems/afs.txt
+++ b/Documentation/filesystems/afs.txt
@@ -1,31 +1,82 @@
1 ====================
1 kAFS: AFS FILESYSTEM 2 kAFS: AFS FILESYSTEM
2 ==================== 3 ====================
3 4
4ABOUT 5Contents:
5===== 6
7 - Overview.
8 - Usage.
9 - Mountpoints.
10 - Proc filesystem.
11 - The cell database.
12 - Security.
13 - Examples.
14
15
16========
17OVERVIEW
18========
6 19
7This filesystem provides a fairly simple AFS filesystem driver. It is under 20This filesystem provides a fairly simple secure AFS filesystem driver. It is
8development and only provides very basic facilities. It does not yet support 21under development and does not yet provide the full feature set. The features
9the following AFS features: 22it does support include:
10 23
11 (*) Write support. 24 (*) Security (currently only AFS kaserver and KerberosIV tickets).
12 (*) Communications security.
13 (*) Local caching.
14 (*) pioctl() system call.
15 (*) Automatic mounting of embedded mountpoints.
16 25
26 (*) File reading.
17 27
28 (*) Automounting.
29
30It does not yet support the following AFS features:
31
32 (*) Write support.
33
34 (*) Local caching.
35
36 (*) pioctl() system call.
37
38
39===========
40COMPILATION
41===========
42
43The filesystem should be enabled by turning on the kernel configuration
44options:
45
46 CONFIG_AF_RXRPC - The RxRPC protocol transport
47 CONFIG_RXKAD - The RxRPC Kerberos security handler
48 CONFIG_AFS - The AFS filesystem
49
50Additionally, the following can be turned on to aid debugging:
51
52 CONFIG_AF_RXRPC_DEBUG - Permit AF_RXRPC debugging to be enabled
53 CONFIG_AFS_DEBUG - Permit AFS debugging to be enabled
54
55They permit the debugging messages to be turned on dynamically by manipulating
56the masks in the following files:
57
58 /sys/module/af_rxrpc/parameters/debug
59 /sys/module/afs/parameters/debug
60
61
62=====
18USAGE 63USAGE
19===== 64=====
20 65
21When inserting the driver modules the root cell must be specified along with a 66When inserting the driver modules the root cell must be specified along with a
22list of volume location server IP addresses: 67list of volume location server IP addresses:
23 68
24 insmod rxrpc.o 69 insmod af_rxrpc.o
70 insmod rxkad.o
25 insmod kafs.o rootcell=cambridge.redhat.com:172.16.18.73:172.16.18.91 71 insmod kafs.o rootcell=cambridge.redhat.com:172.16.18.73:172.16.18.91
26 72
27The first module is a driver for the RxRPC remote operation protocol, and the 73The first module is the AF_RXRPC network protocol driver. This provides the
28second is the actual filesystem driver for the AFS filesystem. 74RxRPC remote operation protocol and may also be accessed from userspace. See:
75
76 Documentation/networking/rxrpc.txt
77
78The second module is the kerberos RxRPC security driver, and the third module
79is the actual filesystem driver for the AFS filesystem.
29 80
30Once the module has been loaded, more modules can be added by the following 81Once the module has been loaded, more modules can be added by the following
31procedure: 82procedure:
@@ -33,7 +84,7 @@ procedure:
33 echo add grand.central.org 18.7.14.88:128.2.191.224 >/proc/fs/afs/cells 84 echo add grand.central.org 18.7.14.88:128.2.191.224 >/proc/fs/afs/cells
34 85
35Where the parameters to the "add" command are the name of a cell and a list of 86Where the parameters to the "add" command are the name of a cell and a list of
36volume location servers within that cell. 87volume location servers within that cell, with the latter separated by colons.
37 88
38Filesystems can be mounted anywhere by commands similar to the following: 89Filesystems can be mounted anywhere by commands similar to the following:
39 90
@@ -42,11 +93,6 @@ Filesystems can be mounted anywhere by commands similar to the following:
42 mount -t afs "#root.afs." /afs 93 mount -t afs "#root.afs." /afs
43 mount -t afs "#root.cell." /afs/cambridge 94 mount -t afs "#root.cell." /afs/cambridge
44 95
45 NB: When using this on Linux 2.4, the mount command has to be different,
46 since the filesystem doesn't have access to the device name argument:
47
48 mount -t afs none /afs -ovol="#root.afs."
49
50Where the initial character is either a hash or a percent symbol depending on 96Where the initial character is either a hash or a percent symbol depending on
51whether you definitely want a R/W volume (hash) or whether you'd prefer a R/O 97whether you definitely want a R/W volume (hash) or whether you'd prefer a R/O
52volume, but are willing to use a R/W volume instead (percent). 98volume, but are willing to use a R/W volume instead (percent).
@@ -60,55 +106,66 @@ named volume will be looked up in the cell specified during insmod.
60Additional cells can be added through /proc (see later section). 106Additional cells can be added through /proc (see later section).
61 107
62 108
109===========
63MOUNTPOINTS 110MOUNTPOINTS
64=========== 111===========
65 112
66AFS has a concept of mountpoints. These are specially formatted symbolic links 113AFS has a concept of mountpoints. In AFS terms, these are specially formatted
67(of the same form as the "device name" passed to mount). kAFS presents these 114symbolic links (of the same form as the "device name" passed to mount). kAFS
68to the user as directories that have special properties: 115presents these to the user as directories that have a follow-link capability
116(ie: symbolic link semantics). If anyone attempts to access them, they will
117automatically cause the target volume to be mounted (if possible) on that site.
69 118
70 (*) They cannot be listed. Running a program like "ls" on them will incur an 119Automatically mounted filesystems will be automatically unmounted approximately
71 EREMOTE error (Object is remote). 120twenty minutes after they were last used. Alternatively they can be unmounted
121directly with the umount() system call.
72 122
73 (*) Other objects can't be looked up inside of them. This also incurs an 123Manually unmounting an AFS volume will cause any idle submounts upon it to be
74 EREMOTE error. 124culled first. If all are culled, then the requested volume will also be
125unmounted, otherwise error EBUSY will be returned.
75 126
76 (*) They can be queried with the readlink() system call, which will return 127This can be used by the administrator to attempt to unmount the whole AFS tree
77 the name of the mountpoint to which they point. The "readlink" program 128mounted on /afs in one go by doing:
78 will also work.
79 129
80 (*) They can be mounted on (which symbolic links can't). 130 umount /afs
81 131
82 132
133===============
83PROC FILESYSTEM 134PROC FILESYSTEM
84=============== 135===============
85 136
86The rxrpc module creates a number of files in various places in the /proc
87filesystem:
88
89 (*) Firstly, some information files are made available in a directory called
90 "/proc/net/rxrpc/". These list the extant transport endpoint, peer,
91 connection and call records.
92
93 (*) Secondly, some control files are made available in a directory called
94 "/proc/sys/rxrpc/". Currently, all these files can be used for is to
95 turn on various levels of tracing.
96
97The AFS modules creates a "/proc/fs/afs/" directory and populates it: 137The AFS modules creates a "/proc/fs/afs/" directory and populates it:
98 138
99 (*) A "cells" file that lists cells currently known to the afs module. 139 (*) A "cells" file that lists cells currently known to the afs module and
140 their usage counts:
141
142 [root@andromeda ~]# cat /proc/fs/afs/cells
143 USE NAME
144 3 cambridge.redhat.com
100 145
101 (*) A directory per cell that contains files that list volume location 146 (*) A directory per cell that contains files that list volume location
102 servers, volumes, and active servers known within that cell. 147 servers, volumes, and active servers known within that cell.
103 148
149 [root@andromeda ~]# cat /proc/fs/afs/cambridge.redhat.com/servers
150 USE ADDR STATE
151 4 172.16.18.91 0
152 [root@andromeda ~]# cat /proc/fs/afs/cambridge.redhat.com/vlservers
153 ADDRESS
154 172.16.18.91
155 [root@andromeda ~]# cat /proc/fs/afs/cambridge.redhat.com/volumes
156 USE STT VLID[0] VLID[1] VLID[2] NAME
157 1 Val 20000000 20000001 20000002 root.afs
104 158
159
160=================
105THE CELL DATABASE 161THE CELL DATABASE
106================= 162=================
107 163
108The filesystem maintains an internal database of all the cells it knows and 164The filesystem maintains an internal database of all the cells it knows and the
109the IP addresses of the volume location servers for those cells. The cell to 165IP addresses of the volume location servers for those cells. The cell to which
110which the computer belongs is added to the database when insmod is performed 166the system belongs is added to the database when insmod is performed by the
111by the "rootcell=" argument. 167"rootcell=" argument or, if compiled in, using a "kafs.rootcell=" argument on
168the kernel command line.
112 169
113Further cells can be added by commands similar to the following: 170Further cells can be added by commands similar to the following:
114 171
@@ -118,20 +175,65 @@ Further cells can be added by commands similar to the following:
118No other cell database operations are available at this time. 175No other cell database operations are available at this time.
119 176
120 177
178========
179SECURITY
180========
181
182Secure operations are initiated by acquiring a key using the klog program. A
183very primitive klog program is available at:
184
185 http://people.redhat.com/~dhowells/rxrpc/klog.c
186
187This should be compiled by:
188
189 make klog LDLIBS="-lcrypto -lcrypt -lkrb4 -lkeyutils"
190
191And then run as:
192
193 ./klog
194
195Assuming it's successful, this adds a key of type RxRPC, named for the service
196and cell, eg: "afs@<cellname>". This can be viewed with the keyctl program or
197by cat'ing /proc/keys:
198
199 [root@andromeda ~]# keyctl show
200 Session Keyring
201 -3 --alswrv 0 0 keyring: _ses.3268
202 2 --alswrv 0 0 \_ keyring: _uid.0
203 111416553 --als--v 0 0 \_ rxrpc: afs@CAMBRIDGE.REDHAT.COM
204
205Currently the username, realm, password and proposed ticket lifetime are
206compiled in to the program.
207
208It is not required to acquire a key before using AFS facilities, but if one is
209not acquired then all operations will be governed by the anonymous user parts
210of the ACLs.
211
212If a key is acquired, then all AFS operations, including mounts and automounts,
213made by a possessor of that key will be secured with that key.
214
215If a file is opened with a particular key and then the file descriptor is
216passed to a process that doesn't have that key (perhaps over an AF_UNIX
217socket), then the operations on the file will be made with key that was used to
218open the file.
219
220
221========
121EXAMPLES 222EXAMPLES
122======== 223========
123 224
124Here's what I use to test this. Some of the names and IP addresses are local 225Here's what I use to test this. Some of the names and IP addresses are local
125to my internal DNS. My "root.afs" partition has a mount point within it for 226to my internal DNS. My "root.afs" partition has a mount point within it for
126some public volumes volumes. 227some public volumes volumes.
127 228
128insmod -S /tmp/rxrpc.o 229insmod /tmp/rxrpc.o
129insmod -S /tmp/kafs.o rootcell=cambridge.redhat.com:172.16.18.73:172.16.18.91 230insmod /tmp/rxkad.o
231insmod /tmp/kafs.o rootcell=cambridge.redhat.com:172.16.18.91
130 232
131mount -t afs \%root.afs. /afs 233mount -t afs \%root.afs. /afs
132mount -t afs \%cambridge.redhat.com:root.cell. /afs/cambridge.redhat.com/ 234mount -t afs \%cambridge.redhat.com:root.cell. /afs/cambridge.redhat.com/
133 235
134echo add grand.central.org 18.7.14.88:128.2.191.224 > /proc/fs/afs/cells 236echo add grand.central.org 18.7.14.88:128.2.191.224 > /proc/fs/afs/cells
135mount -t afs "#grand.central.org:root.cell." /afs/grand.central.org/ 237mount -t afs "#grand.central.org:root.cell." /afs/grand.central.org/
136mount -t afs "#grand.central.org:root.archive." /afs/grand.central.org/archive 238mount -t afs "#grand.central.org:root.archive." /afs/grand.central.org/archive
137mount -t afs "#grand.central.org:root.contrib." /afs/grand.central.org/contrib 239mount -t afs "#grand.central.org:root.contrib." /afs/grand.central.org/contrib
@@ -141,15 +243,7 @@ mount -t afs "#grand.central.org:root.service." /afs/grand.central.org/service
141mount -t afs "#grand.central.org:root.software." /afs/grand.central.org/software 243mount -t afs "#grand.central.org:root.software." /afs/grand.central.org/software
142mount -t afs "#grand.central.org:root.user." /afs/grand.central.org/user 244mount -t afs "#grand.central.org:root.user." /afs/grand.central.org/user
143 245
144umount /afs/grand.central.org/user
145umount /afs/grand.central.org/software
146umount /afs/grand.central.org/service
147umount /afs/grand.central.org/project
148umount /afs/grand.central.org/doc
149umount /afs/grand.central.org/contrib
150umount /afs/grand.central.org/archive
151umount /afs/grand.central.org
152umount /afs/cambridge.redhat.com
153umount /afs 246umount /afs
154rmmod kafs 247rmmod kafs
248rmmod rxkad
155rmmod rxrpc 249rmmod rxrpc
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 5484ab5efd4f..7aaf09b86a55 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -1421,6 +1421,15 @@ fewer messages that will be written. Message_burst controls when messages will
1421be dropped. The default settings limit warning messages to one every five 1421be dropped. The default settings limit warning messages to one every five
1422seconds. 1422seconds.
1423 1423
1424warnings
1425--------
1426
1427This controls console messages from the networking stack that can occur because
1428of problems on the network like duplicate address or bad checksums. Normally,
1429this should be enabled, but if the problem persists the messages can be
1430disabled.
1431
1432
1424netdev_max_backlog 1433netdev_max_backlog
1425------------------ 1434------------------
1426 1435
diff --git a/Documentation/keys.txt b/Documentation/keys.txt
index 60c665d9cfaa..81d9aa097298 100644
--- a/Documentation/keys.txt
+++ b/Documentation/keys.txt
@@ -859,6 +859,18 @@ payload contents" for more information.
859 void unregister_key_type(struct key_type *type); 859 void unregister_key_type(struct key_type *type);
860 860
861 861
862Under some circumstances, it may be desirable to desirable to deal with a
863bundle of keys. The facility provides access to the keyring type for managing
864such a bundle:
865
866 struct key_type key_type_keyring;
867
868This can be used with a function such as request_key() to find a specific
869keyring in a process's keyrings. A keyring thus found can then be searched
870with keyring_search(). Note that it is not possible to use request_key() to
871search a specific keyring, so using keyrings in this way is of limited utility.
872
873
862=================================== 874===================================
863NOTES ON ACCESSING PAYLOAD CONTENTS 875NOTES ON ACCESSING PAYLOAD CONTENTS
864=================================== 876===================================
diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt
index de809e58092f..1da566630831 100644
--- a/Documentation/networking/bonding.txt
+++ b/Documentation/networking/bonding.txt
@@ -920,40 +920,9 @@ options, you may wish to use the "max_bonds" module parameter,
920documented above. 920documented above.
921 921
922 To create multiple bonding devices with differing options, it 922 To create multiple bonding devices with differing options, it
923is necessary to load the bonding driver multiple times. Note that 923is necessary to use bonding parameters exported by sysfs, documented
924current versions of the sysconfig network initialization scripts 924in the section below.
925handle this automatically; if your distro uses these scripts, no
926special action is needed. See the section Configuring Bonding
927Devices, above, if you're not sure about your network initialization
928scripts.
929
930 To load multiple instances of the module, it is necessary to
931specify a different name for each instance (the module loading system
932requires that every loaded module, even multiple instances of the same
933module, have a unique name). This is accomplished by supplying
934multiple sets of bonding options in /etc/modprobe.conf, for example:
935
936alias bond0 bonding
937options bond0 -o bond0 mode=balance-rr miimon=100
938
939alias bond1 bonding
940options bond1 -o bond1 mode=balance-alb miimon=50
941
942 will load the bonding module two times. The first instance is
943named "bond0" and creates the bond0 device in balance-rr mode with an
944miimon of 100. The second instance is named "bond1" and creates the
945bond1 device in balance-alb mode with an miimon of 50.
946
947 In some circumstances (typically with older distributions),
948the above does not work, and the second bonding instance never sees
949its options. In that case, the second options line can be substituted
950as follows:
951
952install bond1 /sbin/modprobe --ignore-install bonding -o bond1 \
953 mode=balance-alb miimon=50
954 925
955 This may be repeated any number of times, specifying a new and
956unique name in place of bond1 for each subsequent instance.
957 926
9583.4 Configuring Bonding Manually via Sysfs 9273.4 Configuring Bonding Manually via Sysfs
959------------------------------------------ 928------------------------------------------
diff --git a/Documentation/networking/dccp.txt b/Documentation/networking/dccp.txt
index 387482e46c47..4504cc59e405 100644
--- a/Documentation/networking/dccp.txt
+++ b/Documentation/networking/dccp.txt
@@ -57,6 +57,16 @@ DCCP_SOCKOPT_SEND_CSCOV is for the receiver and has a different meaning: it
57 coverage value are also acceptable. The higher the number, the more 57 coverage value are also acceptable. The higher the number, the more
58 restrictive this setting (see [RFC 4340, sec. 9.2.1]). 58 restrictive this setting (see [RFC 4340, sec. 9.2.1]).
59 59
60The following two options apply to CCID 3 exclusively and are getsockopt()-only.
61In either case, a TFRC info struct (defined in <linux/tfrc.h>) is returned.
62DCCP_SOCKOPT_CCID_RX_INFO
63 Returns a `struct tfrc_rx_info' in optval; the buffer for optval and
64 optlen must be set to at least sizeof(struct tfrc_rx_info).
65DCCP_SOCKOPT_CCID_TX_INFO
66 Returns a `struct tfrc_tx_info' in optval; the buffer for optval and
67 optlen must be set to at least sizeof(struct tfrc_tx_info).
68
69
60Sysctl variables 70Sysctl variables
61================ 71================
62Several DCCP default parameters can be managed by the following sysctls 72Several DCCP default parameters can be managed by the following sysctls
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 702d1d8dd04a..af6a63ab9026 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -179,11 +179,31 @@ tcp_fin_timeout - INTEGER
179 because they eat maximum 1.5K of memory, but they tend 179 because they eat maximum 1.5K of memory, but they tend
180 to live longer. Cf. tcp_max_orphans. 180 to live longer. Cf. tcp_max_orphans.
181 181
182tcp_frto - BOOLEAN 182tcp_frto - INTEGER
183 Enables F-RTO, an enhanced recovery algorithm for TCP retransmission 183 Enables F-RTO, an enhanced recovery algorithm for TCP retransmission
184 timeouts. It is particularly beneficial in wireless environments 184 timeouts. It is particularly beneficial in wireless environments
185 where packet loss is typically due to random radio interference 185 where packet loss is typically due to random radio interference
186 rather than intermediate router congestion. 186 rather than intermediate router congestion. If set to 1, basic
187 version is enabled. 2 enables SACK enhanced F-RTO, which is
188 EXPERIMENTAL. The basic version can be used also when SACK is
189 enabled for a flow through tcp_sack sysctl.
190
191tcp_frto_response - INTEGER
192 When F-RTO has detected that a TCP retransmission timeout was
193 spurious (i.e, the timeout would have been avoided had TCP set a
194 longer retransmission timeout), TCP has several options what to do
195 next. Possible values are:
196 0 Rate halving based; a smooth and conservative response,
197 results in halved cwnd and ssthresh after one RTT
198 1 Very conservative response; not recommended because even
199 though being valid, it interacts poorly with the rest of
200 Linux TCP, halves cwnd and ssthresh immediately
201 2 Aggressive response; undoes congestion control measures
202 that are now known to be unnecessary (ignoring the
203 possibility of a lost retransmission that would require
204 TCP to be more cautious), cwnd and ssthresh are restored
205 to the values prior timeout
206 Default: 0 (rate halving based)
187 207
188tcp_keepalive_time - INTEGER 208tcp_keepalive_time - INTEGER
189 How often TCP sends out keepalive messages when keepalive is enabled. 209 How often TCP sends out keepalive messages when keepalive is enabled.
@@ -995,7 +1015,12 @@ bridge-nf-call-ip6tables - BOOLEAN
995 Default: 1 1015 Default: 1
996 1016
997bridge-nf-filter-vlan-tagged - BOOLEAN 1017bridge-nf-filter-vlan-tagged - BOOLEAN
998 1 : pass bridged vlan-tagged ARP/IP traffic to arptables/iptables. 1018 1 : pass bridged vlan-tagged ARP/IP/IPv6 traffic to {arp,ip,ip6}tables.
1019 0 : disable this.
1020 Default: 1
1021
1022bridge-nf-filter-pppoe-tagged - BOOLEAN
1023 1 : pass bridged pppoe-tagged IP/IPv6 traffic to {ip,ip6}tables.
999 0 : disable this. 1024 0 : disable this.
1000 Default: 1 1025 Default: 1
1001 1026
diff --git a/Documentation/networking/rxrpc.txt b/Documentation/networking/rxrpc.txt
new file mode 100644
index 000000000000..cae231b1c134
--- /dev/null
+++ b/Documentation/networking/rxrpc.txt
@@ -0,0 +1,859 @@
1 ======================
2 RxRPC NETWORK PROTOCOL
3 ======================
4
5The RxRPC protocol driver provides a reliable two-phase transport on top of UDP
6that can be used to perform RxRPC remote operations. This is done over sockets
7of AF_RXRPC family, using sendmsg() and recvmsg() with control data to send and
8receive data, aborts and errors.
9
10Contents of this document:
11
12 (*) Overview.
13
14 (*) RxRPC protocol summary.
15
16 (*) AF_RXRPC driver model.
17
18 (*) Control messages.
19
20 (*) Socket options.
21
22 (*) Security.
23
24 (*) Example client usage.
25
26 (*) Example server usage.
27
28 (*) AF_RXRPC kernel interface.
29
30
31========
32OVERVIEW
33========
34
35RxRPC is a two-layer protocol. There is a session layer which provides
36reliable virtual connections using UDP over IPv4 (or IPv6) as the transport
37layer, but implements a real network protocol; and there's the presentation
38layer which renders structured data to binary blobs and back again using XDR
39(as does SunRPC):
40
41 +-------------+
42 | Application |
43 +-------------+
44 | XDR | Presentation
45 +-------------+
46 | RxRPC | Session
47 +-------------+
48 | UDP | Transport
49 +-------------+
50
51
52AF_RXRPC provides:
53
54 (1) Part of an RxRPC facility for both kernel and userspace applications by
55 making the session part of it a Linux network protocol (AF_RXRPC).
56
57 (2) A two-phase protocol. The client transmits a blob (the request) and then
58 receives a blob (the reply), and the server receives the request and then
59 transmits the reply.
60
61 (3) Retention of the reusable bits of the transport system set up for one call
62 to speed up subsequent calls.
63
64 (4) A secure protocol, using the Linux kernel's key retention facility to
65 manage security on the client end. The server end must of necessity be
66 more active in security negotiations.
67
68AF_RXRPC does not provide XDR marshalling/presentation facilities. That is
69left to the application. AF_RXRPC only deals in blobs. Even the operation ID
70is just the first four bytes of the request blob, and as such is beyond the
71kernel's interest.
72
73
74Sockets of AF_RXRPC family are:
75
76 (1) created as type SOCK_DGRAM;
77
78 (2) provided with a protocol of the type of underlying transport they're going
79 to use - currently only PF_INET is supported.
80
81
82The Andrew File System (AFS) is an example of an application that uses this and
83that has both kernel (filesystem) and userspace (utility) components.
84
85
86======================
87RXRPC PROTOCOL SUMMARY
88======================
89
90An overview of the RxRPC protocol:
91
92 (*) RxRPC sits on top of another networking protocol (UDP is the only option
93 currently), and uses this to provide network transport. UDP ports, for
94 example, provide transport endpoints.
95
96 (*) RxRPC supports multiple virtual "connections" from any given transport
97 endpoint, thus allowing the endpoints to be shared, even to the same
98 remote endpoint.
99
100 (*) Each connection goes to a particular "service". A connection may not go
101 to multiple services. A service may be considered the RxRPC equivalent of
102 a port number. AF_RXRPC permits multiple services to share an endpoint.
103
104 (*) Client-originating packets are marked, thus a transport endpoint can be
105 shared between client and server connections (connections have a
106 direction).
107
108 (*) Up to a billion connections may be supported concurrently between one
109 local transport endpoint and one service on one remote endpoint. An RxRPC
110 connection is described by seven numbers:
111
112 Local address }
113 Local port } Transport (UDP) address
114 Remote address }
115 Remote port }
116 Direction
117 Connection ID
118 Service ID
119
120 (*) Each RxRPC operation is a "call". A connection may make up to four
121 billion calls, but only up to four calls may be in progress on a
122 connection at any one time.
123
124 (*) Calls are two-phase and asymmetric: the client sends its request data,
125 which the service receives; then the service transmits the reply data
126 which the client receives.
127
128 (*) The data blobs are of indefinite size, the end of a phase is marked with a
129 flag in the packet. The number of packets of data making up one blob may
130 not exceed 4 billion, however, as this would cause the sequence number to
131 wrap.
132
133 (*) The first four bytes of the request data are the service operation ID.
134
135 (*) Security is negotiated on a per-connection basis. The connection is
136 initiated by the first data packet on it arriving. If security is
137 requested, the server then issues a "challenge" and then the client
138 replies with a "response". If the response is successful, the security is
139 set for the lifetime of that connection, and all subsequent calls made
140 upon it use that same security. In the event that the server lets a
141 connection lapse before the client, the security will be renegotiated if
142 the client uses the connection again.
143
144 (*) Calls use ACK packets to handle reliability. Data packets are also
145 explicitly sequenced per call.
146
147 (*) There are two types of positive acknowledgement: hard-ACKs and soft-ACKs.
148 A hard-ACK indicates to the far side that all the data received to a point
149 has been received and processed; a soft-ACK indicates that the data has
150 been received but may yet be discarded and re-requested. The sender may
151 not discard any transmittable packets until they've been hard-ACK'd.
152
153 (*) Reception of a reply data packet implicitly hard-ACK's all the data
154 packets that make up the request.
155
156 (*) An call is complete when the request has been sent, the reply has been
157 received and the final hard-ACK on the last packet of the reply has
158 reached the server.
159
160 (*) An call may be aborted by either end at any time up to its completion.
161
162
163=====================
164AF_RXRPC DRIVER MODEL
165=====================
166
167About the AF_RXRPC driver:
168
169 (*) The AF_RXRPC protocol transparently uses internal sockets of the transport
170 protocol to represent transport endpoints.
171
172 (*) AF_RXRPC sockets map onto RxRPC connection bundles. Actual RxRPC
173 connections are handled transparently. One client socket may be used to
174 make multiple simultaneous calls to the same service. One server socket
175 may handle calls from many clients.
176
177 (*) Additional parallel client connections will be initiated to support extra
178 concurrent calls, up to a tunable limit.
179
180 (*) Each connection is retained for a certain amount of time [tunable] after
181 the last call currently using it has completed in case a new call is made
182 that could reuse it.
183
184 (*) Each internal UDP socket is retained [tunable] for a certain amount of
185 time [tunable] after the last connection using it discarded, in case a new
186 connection is made that could use it.
187
188 (*) A client-side connection is only shared between calls if they have have
189 the same key struct describing their security (and assuming the calls
190 would otherwise share the connection). Non-secured calls would also be
191 able to share connections with each other.
192
193 (*) A server-side connection is shared if the client says it is.
194
195 (*) ACK'ing is handled by the protocol driver automatically, including ping
196 replying.
197
198 (*) SO_KEEPALIVE automatically pings the other side to keep the connection
199 alive [TODO].
200
201 (*) If an ICMP error is received, all calls affected by that error will be
202 aborted with an appropriate network error passed through recvmsg().
203
204
205Interaction with the user of the RxRPC socket:
206
207 (*) A socket is made into a server socket by binding an address with a
208 non-zero service ID.
209
210 (*) In the client, sending a request is achieved with one or more sendmsgs,
211 followed by the reply being received with one or more recvmsgs.
212
213 (*) The first sendmsg for a request to be sent from a client contains a tag to
214 be used in all other sendmsgs or recvmsgs associated with that call. The
215 tag is carried in the control data.
216
217 (*) connect() is used to supply a default destination address for a client
218 socket. This may be overridden by supplying an alternate address to the
219 first sendmsg() of a call (struct msghdr::msg_name).
220
221 (*) If connect() is called on an unbound client, a random local port will
222 bound before the operation takes place.
223
224 (*) A server socket may also be used to make client calls. To do this, the
225 first sendmsg() of the call must specify the target address. The server's
226 transport endpoint is used to send the packets.
227
228 (*) Once the application has received the last message associated with a call,
229 the tag is guaranteed not to be seen again, and so it can be used to pin
230 client resources. A new call can then be initiated with the same tag
231 without fear of interference.
232
233 (*) In the server, a request is received with one or more recvmsgs, then the
234 the reply is transmitted with one or more sendmsgs, and then the final ACK
235 is received with a last recvmsg.
236
237 (*) When sending data for a call, sendmsg is given MSG_MORE if there's more
238 data to come on that call.
239
240 (*) When receiving data for a call, recvmsg flags MSG_MORE if there's more
241 data to come for that call.
242
243 (*) When receiving data or messages for a call, MSG_EOR is flagged by recvmsg
244 to indicate the terminal message for that call.
245
246 (*) A call may be aborted by adding an abort control message to the control
247 data. Issuing an abort terminates the kernel's use of that call's tag.
248 Any messages waiting in the receive queue for that call will be discarded.
249
250 (*) Aborts, busy notifications and challenge packets are delivered by recvmsg,
251 and control data messages will be set to indicate the context. Receiving
252 an abort or a busy message terminates the kernel's use of that call's tag.
253
254 (*) The control data part of the msghdr struct is used for a number of things:
255
256 (*) The tag of the intended or affected call.
257
258 (*) Sending or receiving errors, aborts and busy notifications.
259
260 (*) Notifications of incoming calls.
261
262 (*) Sending debug requests and receiving debug replies [TODO].
263
264 (*) When the kernel has received and set up an incoming call, it sends a
265 message to server application to let it know there's a new call awaiting
266 its acceptance [recvmsg reports a special control message]. The server
267 application then uses sendmsg to assign a tag to the new call. Once that
268 is done, the first part of the request data will be delivered by recvmsg.
269
270 (*) The server application has to provide the server socket with a keyring of
271 secret keys corresponding to the security types it permits. When a secure
272 connection is being set up, the kernel looks up the appropriate secret key
273 in the keyring and then sends a challenge packet to the client and
274 receives a response packet. The kernel then checks the authorisation of
275 the packet and either aborts the connection or sets up the security.
276
277 (*) The name of the key a client will use to secure its communications is
278 nominated by a socket option.
279
280
281Notes on recvmsg:
282
283 (*) If there's a sequence of data messages belonging to a particular call on
284 the receive queue, then recvmsg will keep working through them until:
285
286 (a) it meets the end of that call's received data,
287
288 (b) it meets a non-data message,
289
290 (c) it meets a message belonging to a different call, or
291
292 (d) it fills the user buffer.
293
294 If recvmsg is called in blocking mode, it will keep sleeping, awaiting the
295 reception of further data, until one of the above four conditions is met.
296
297 (2) MSG_PEEK operates similarly, but will return immediately if it has put any
298 data in the buffer rather than sleeping until it can fill the buffer.
299
300 (3) If a data message is only partially consumed in filling a user buffer,
301 then the remainder of that message will be left on the front of the queue
302 for the next taker. MSG_TRUNC will never be flagged.
303
304 (4) If there is more data to be had on a call (it hasn't copied the last byte
305 of the last data message in that phase yet), then MSG_MORE will be
306 flagged.
307
308
309================
310CONTROL MESSAGES
311================
312
313AF_RXRPC makes use of control messages in sendmsg() and recvmsg() to multiplex
314calls, to invoke certain actions and to report certain conditions. These are:
315
316 MESSAGE ID SRT DATA MEANING
317 ======================= === =========== ===============================
318 RXRPC_USER_CALL_ID sr- User ID App's call specifier
319 RXRPC_ABORT srt Abort code Abort code to issue/received
320 RXRPC_ACK -rt n/a Final ACK received
321 RXRPC_NET_ERROR -rt error num Network error on call
322 RXRPC_BUSY -rt n/a Call rejected (server busy)
323 RXRPC_LOCAL_ERROR -rt error num Local error encountered
324 RXRPC_NEW_CALL -r- n/a New call received
325 RXRPC_ACCEPT s-- n/a Accept new call
326
327 (SRT = usable in Sendmsg / delivered by Recvmsg / Terminal message)
328
329 (*) RXRPC_USER_CALL_ID
330
331 This is used to indicate the application's call ID. It's an unsigned long
332 that the app specifies in the client by attaching it to the first data
333 message or in the server by passing it in association with an RXRPC_ACCEPT
334 message. recvmsg() passes it in conjunction with all messages except
335 those of the RXRPC_NEW_CALL message.
336
337 (*) RXRPC_ABORT
338
339 This is can be used by an application to abort a call by passing it to
340 sendmsg, or it can be delivered by recvmsg to indicate a remote abort was
341 received. Either way, it must be associated with an RXRPC_USER_CALL_ID to
342 specify the call affected. If an abort is being sent, then error EBADSLT
343 will be returned if there is no call with that user ID.
344
345 (*) RXRPC_ACK
346
347 This is delivered to a server application to indicate that the final ACK
348 of a call was received from the client. It will be associated with an
349 RXRPC_USER_CALL_ID to indicate the call that's now complete.
350
351 (*) RXRPC_NET_ERROR
352
353 This is delivered to an application to indicate that an ICMP error message
354 was encountered in the process of trying to talk to the peer. An
355 errno-class integer value will be included in the control message data
356 indicating the problem, and an RXRPC_USER_CALL_ID will indicate the call
357 affected.
358
359 (*) RXRPC_BUSY
360
361 This is delivered to a client application to indicate that a call was
362 rejected by the server due to the server being busy. It will be
363 associated with an RXRPC_USER_CALL_ID to indicate the rejected call.
364
365 (*) RXRPC_LOCAL_ERROR
366
367 This is delivered to an application to indicate that a local error was
368 encountered and that a call has been aborted because of it. An
369 errno-class integer value will be included in the control message data
370 indicating the problem, and an RXRPC_USER_CALL_ID will indicate the call
371 affected.
372
373 (*) RXRPC_NEW_CALL
374
375 This is delivered to indicate to a server application that a new call has
376 arrived and is awaiting acceptance. No user ID is associated with this,
377 as a user ID must subsequently be assigned by doing an RXRPC_ACCEPT.
378
379 (*) RXRPC_ACCEPT
380
381 This is used by a server application to attempt to accept a call and
382 assign it a user ID. It should be associated with an RXRPC_USER_CALL_ID
383 to indicate the user ID to be assigned. If there is no call to be
384 accepted (it may have timed out, been aborted, etc.), then sendmsg will
385 return error ENODATA. If the user ID is already in use by another call,
386 then error EBADSLT will be returned.
387
388
389==============
390SOCKET OPTIONS
391==============
392
393AF_RXRPC sockets support a few socket options at the SOL_RXRPC level:
394
395 (*) RXRPC_SECURITY_KEY
396
397 This is used to specify the description of the key to be used. The key is
398 extracted from the calling process's keyrings with request_key() and
399 should be of "rxrpc" type.
400
401 The optval pointer points to the description string, and optlen indicates
402 how long the string is, without the NUL terminator.
403
404 (*) RXRPC_SECURITY_KEYRING
405
406 Similar to above but specifies a keyring of server secret keys to use (key
407 type "keyring"). See the "Security" section.
408
409 (*) RXRPC_EXCLUSIVE_CONNECTION
410
411 This is used to request that new connections should be used for each call
412 made subsequently on this socket. optval should be NULL and optlen 0.
413
414 (*) RXRPC_MIN_SECURITY_LEVEL
415
416 This is used to specify the minimum security level required for calls on
417 this socket. optval must point to an int containing one of the following
418 values:
419
420 (a) RXRPC_SECURITY_PLAIN
421
422 Encrypted checksum only.
423
424 (b) RXRPC_SECURITY_AUTH
425
426 Encrypted checksum plus packet padded and first eight bytes of packet
427 encrypted - which includes the actual packet length.
428
429 (c) RXRPC_SECURITY_ENCRYPTED
430
431 Encrypted checksum plus entire packet padded and encrypted, including
432 actual packet length.
433
434
435========
436SECURITY
437========
438
439Currently, only the kerberos 4 equivalent protocol has been implemented
440(security index 2 - rxkad). This requires the rxkad module to be loaded and,
441on the client, tickets of the appropriate type to be obtained from the AFS
442kaserver or the kerberos server and installed as "rxrpc" type keys. This is
443normally done using the klog program. An example simple klog program can be
444found at:
445
446 http://people.redhat.com/~dhowells/rxrpc/klog.c
447
448The payload provided to add_key() on the client should be of the following
449form:
450
451 struct rxrpc_key_sec2_v1 {
452 uint16_t security_index; /* 2 */
453 uint16_t ticket_length; /* length of ticket[] */
454 uint32_t expiry; /* time at which expires */
455 uint8_t kvno; /* key version number */
456 uint8_t __pad[3];
457 uint8_t session_key[8]; /* DES session key */
458 uint8_t ticket[0]; /* the encrypted ticket */
459 };
460
461Where the ticket blob is just appended to the above structure.
462
463
464For the server, keys of type "rxrpc_s" must be made available to the server.
465They have a description of "<serviceID>:<securityIndex>" (eg: "52:2" for an
466rxkad key for the AFS VL service). When such a key is created, it should be
467given the server's secret key as the instantiation data (see the example
468below).
469
470 add_key("rxrpc_s", "52:2", secret_key, 8, keyring);
471
472A keyring is passed to the server socket by naming it in a sockopt. The server
473socket then looks the server secret keys up in this keyring when secure
474incoming connections are made. This can be seen in an example program that can
475be found at:
476
477 http://people.redhat.com/~dhowells/rxrpc/listen.c
478
479
480====================
481EXAMPLE CLIENT USAGE
482====================
483
484A client would issue an operation by:
485
486 (1) An RxRPC socket is set up by:
487
488 client = socket(AF_RXRPC, SOCK_DGRAM, PF_INET);
489
490 Where the third parameter indicates the protocol family of the transport
491 socket used - usually IPv4 but it can also be IPv6 [TODO].
492
493 (2) A local address can optionally be bound:
494
495 struct sockaddr_rxrpc srx = {
496 .srx_family = AF_RXRPC,
497 .srx_service = 0, /* we're a client */
498 .transport_type = SOCK_DGRAM, /* type of transport socket */
499 .transport.sin_family = AF_INET,
500 .transport.sin_port = htons(7000), /* AFS callback */
501 .transport.sin_address = 0, /* all local interfaces */
502 };
503 bind(client, &srx, sizeof(srx));
504
505 This specifies the local UDP port to be used. If not given, a random
506 non-privileged port will be used. A UDP port may be shared between
507 several unrelated RxRPC sockets. Security is handled on a basis of
508 per-RxRPC virtual connection.
509
510 (3) The security is set:
511
512 const char *key = "AFS:cambridge.redhat.com";
513 setsockopt(client, SOL_RXRPC, RXRPC_SECURITY_KEY, key, strlen(key));
514
515 This issues a request_key() to get the key representing the security
516 context. The minimum security level can be set:
517
518 unsigned int sec = RXRPC_SECURITY_ENCRYPTED;
519 setsockopt(client, SOL_RXRPC, RXRPC_MIN_SECURITY_LEVEL,
520 &sec, sizeof(sec));
521
522 (4) The server to be contacted can then be specified (alternatively this can
523 be done through sendmsg):
524
525 struct sockaddr_rxrpc srx = {
526 .srx_family = AF_RXRPC,
527 .srx_service = VL_SERVICE_ID,
528 .transport_type = SOCK_DGRAM, /* type of transport socket */
529 .transport.sin_family = AF_INET,
530 .transport.sin_port = htons(7005), /* AFS volume manager */
531 .transport.sin_address = ...,
532 };
533 connect(client, &srx, sizeof(srx));
534
535 (5) The request data should then be posted to the server socket using a series
536 of sendmsg() calls, each with the following control message attached:
537
538 RXRPC_USER_CALL_ID - specifies the user ID for this call
539
540 MSG_MORE should be set in msghdr::msg_flags on all but the last part of
541 the request. Multiple requests may be made simultaneously.
542
543 If a call is intended to go to a destination other then the default
544 specified through connect(), then msghdr::msg_name should be set on the
545 first request message of that call.
546
547 (6) The reply data will then be posted to the server socket for recvmsg() to
548 pick up. MSG_MORE will be flagged by recvmsg() if there's more reply data
549 for a particular call to be read. MSG_EOR will be set on the terminal
550 read for a call.
551
552 All data will be delivered with the following control message attached:
553
554 RXRPC_USER_CALL_ID - specifies the user ID for this call
555
556 If an abort or error occurred, this will be returned in the control data
557 buffer instead, and MSG_EOR will be flagged to indicate the end of that
558 call.
559
560
561====================
562EXAMPLE SERVER USAGE
563====================
564
565A server would be set up to accept operations in the following manner:
566
567 (1) An RxRPC socket is created by:
568
569 server = socket(AF_RXRPC, SOCK_DGRAM, PF_INET);
570
571 Where the third parameter indicates the address type of the transport
572 socket used - usually IPv4.
573
574 (2) Security is set up if desired by giving the socket a keyring with server
575 secret keys in it:
576
577 keyring = add_key("keyring", "AFSkeys", NULL, 0,
578 KEY_SPEC_PROCESS_KEYRING);
579
580 const char secret_key[8] = {
581 0xa7, 0x83, 0x8a, 0xcb, 0xc7, 0x83, 0xec, 0x94 };
582 add_key("rxrpc_s", "52:2", secret_key, 8, keyring);
583
584 setsockopt(server, SOL_RXRPC, RXRPC_SECURITY_KEYRING, "AFSkeys", 7);
585
586 The keyring can be manipulated after it has been given to the socket. This
587 permits the server to add more keys, replace keys, etc. whilst it is live.
588
589 (2) A local address must then be bound:
590
591 struct sockaddr_rxrpc srx = {
592 .srx_family = AF_RXRPC,
593 .srx_service = VL_SERVICE_ID, /* RxRPC service ID */
594 .transport_type = SOCK_DGRAM, /* type of transport socket */
595 .transport.sin_family = AF_INET,
596 .transport.sin_port = htons(7000), /* AFS callback */
597 .transport.sin_address = 0, /* all local interfaces */
598 };
599 bind(server, &srx, sizeof(srx));
600
601 (3) The server is then set to listen out for incoming calls:
602
603 listen(server, 100);
604
605 (4) The kernel notifies the server of pending incoming connections by sending
606 it a message for each. This is received with recvmsg() on the server
607 socket. It has no data, and has a single dataless control message
608 attached:
609
610 RXRPC_NEW_CALL
611
612 The address that can be passed back by recvmsg() at this point should be
613 ignored since the call for which the message was posted may have gone by
614 the time it is accepted - in which case the first call still on the queue
615 will be accepted.
616
617 (5) The server then accepts the new call by issuing a sendmsg() with two
618 pieces of control data and no actual data:
619
620 RXRPC_ACCEPT - indicate connection acceptance
621 RXRPC_USER_CALL_ID - specify user ID for this call
622
623 (6) The first request data packet will then be posted to the server socket for
624 recvmsg() to pick up. At that point, the RxRPC address for the call can
625 be read from the address fields in the msghdr struct.
626
627 Subsequent request data will be posted to the server socket for recvmsg()
628 to collect as it arrives. All but the last piece of the request data will
629 be delivered with MSG_MORE flagged.
630
631 All data will be delivered with the following control message attached:
632
633 RXRPC_USER_CALL_ID - specifies the user ID for this call
634
635 (8) The reply data should then be posted to the server socket using a series
636 of sendmsg() calls, each with the following control messages attached:
637
638 RXRPC_USER_CALL_ID - specifies the user ID for this call
639
640 MSG_MORE should be set in msghdr::msg_flags on all but the last message
641 for a particular call.
642
643 (9) The final ACK from the client will be posted for retrieval by recvmsg()
644 when it is received. It will take the form of a dataless message with two
645 control messages attached:
646
647 RXRPC_USER_CALL_ID - specifies the user ID for this call
648 RXRPC_ACK - indicates final ACK (no data)
649
650 MSG_EOR will be flagged to indicate that this is the final message for
651 this call.
652
653(10) Up to the point the final packet of reply data is sent, the call can be
654 aborted by calling sendmsg() with a dataless message with the following
655 control messages attached:
656
657 RXRPC_USER_CALL_ID - specifies the user ID for this call
658 RXRPC_ABORT - indicates abort code (4 byte data)
659
660 Any packets waiting in the socket's receive queue will be discarded if
661 this is issued.
662
663Note that all the communications for a particular service take place through
664the one server socket, using control messages on sendmsg() and recvmsg() to
665determine the call affected.
666
667
668=========================
669AF_RXRPC KERNEL INTERFACE
670=========================
671
672The AF_RXRPC module also provides an interface for use by in-kernel utilities
673such as the AFS filesystem. This permits such a utility to:
674
675 (1) Use different keys directly on individual client calls on one socket
676 rather than having to open a whole slew of sockets, one for each key it
677 might want to use.
678
679 (2) Avoid having RxRPC call request_key() at the point of issue of a call or
680 opening of a socket. Instead the utility is responsible for requesting a
681 key at the appropriate point. AFS, for instance, would do this during VFS
682 operations such as open() or unlink(). The key is then handed through
683 when the call is initiated.
684
685 (3) Request the use of something other than GFP_KERNEL to allocate memory.
686
687 (4) Avoid the overhead of using the recvmsg() call. RxRPC messages can be
688 intercepted before they get put into the socket Rx queue and the socket
689 buffers manipulated directly.
690
691To use the RxRPC facility, a kernel utility must still open an AF_RXRPC socket,
692bind an addess as appropriate and listen if it's to be a server socket, but
693then it passes this to the kernel interface functions.
694
695The kernel interface functions are as follows:
696
697 (*) Begin a new client call.
698
699 struct rxrpc_call *
700 rxrpc_kernel_begin_call(struct socket *sock,
701 struct sockaddr_rxrpc *srx,
702 struct key *key,
703 unsigned long user_call_ID,
704 gfp_t gfp);
705
706 This allocates the infrastructure to make a new RxRPC call and assigns
707 call and connection numbers. The call will be made on the UDP port that
708 the socket is bound to. The call will go to the destination address of a
709 connected client socket unless an alternative is supplied (srx is
710 non-NULL).
711
712 If a key is supplied then this will be used to secure the call instead of
713 the key bound to the socket with the RXRPC_SECURITY_KEY sockopt. Calls
714 secured in this way will still share connections if at all possible.
715
716 The user_call_ID is equivalent to that supplied to sendmsg() in the
717 control data buffer. It is entirely feasible to use this to point to a
718 kernel data structure.
719
720 If this function is successful, an opaque reference to the RxRPC call is
721 returned. The caller now holds a reference on this and it must be
722 properly ended.
723
724 (*) End a client call.
725
726 void rxrpc_kernel_end_call(struct rxrpc_call *call);
727
728 This is used to end a previously begun call. The user_call_ID is expunged
729 from AF_RXRPC's knowledge and will not be seen again in association with
730 the specified call.
731
732 (*) Send data through a call.
733
734 int rxrpc_kernel_send_data(struct rxrpc_call *call, struct msghdr *msg,
735 size_t len);
736
737 This is used to supply either the request part of a client call or the
738 reply part of a server call. msg.msg_iovlen and msg.msg_iov specify the
739 data buffers to be used. msg_iov may not be NULL and must point
740 exclusively to in-kernel virtual addresses. msg.msg_flags may be given
741 MSG_MORE if there will be subsequent data sends for this call.
742
743 The msg must not specify a destination address, control data or any flags
744 other than MSG_MORE. len is the total amount of data to transmit.
745
746 (*) Abort a call.
747
748 void rxrpc_kernel_abort_call(struct rxrpc_call *call, u32 abort_code);
749
750 This is used to abort a call if it's still in an abortable state. The
751 abort code specified will be placed in the ABORT message sent.
752
753 (*) Intercept received RxRPC messages.
754
755 typedef void (*rxrpc_interceptor_t)(struct sock *sk,
756 unsigned long user_call_ID,
757 struct sk_buff *skb);
758
759 void
760 rxrpc_kernel_intercept_rx_messages(struct socket *sock,
761 rxrpc_interceptor_t interceptor);
762
763 This installs an interceptor function on the specified AF_RXRPC socket.
764 All messages that would otherwise wind up in the socket's Rx queue are
765 then diverted to this function. Note that care must be taken to process
766 the messages in the right order to maintain DATA message sequentiality.
767
768 The interceptor function itself is provided with the address of the socket
769 and handling the incoming message, the ID assigned by the kernel utility
770 to the call and the socket buffer containing the message.
771
772 The skb->mark field indicates the type of message:
773
774 MARK MEANING
775 =============================== =======================================
776 RXRPC_SKB_MARK_DATA Data message
777 RXRPC_SKB_MARK_FINAL_ACK Final ACK received for an incoming call
778 RXRPC_SKB_MARK_BUSY Client call rejected as server busy
779 RXRPC_SKB_MARK_REMOTE_ABORT Call aborted by peer
780 RXRPC_SKB_MARK_NET_ERROR Network error detected
781 RXRPC_SKB_MARK_LOCAL_ERROR Local error encountered
782 RXRPC_SKB_MARK_NEW_CALL New incoming call awaiting acceptance
783
784 The remote abort message can be probed with rxrpc_kernel_get_abort_code().
785 The two error messages can be probed with rxrpc_kernel_get_error_number().
786 A new call can be accepted with rxrpc_kernel_accept_call().
787
788 Data messages can have their contents extracted with the usual bunch of
789 socket buffer manipulation functions. A data message can be determined to
790 be the last one in a sequence with rxrpc_kernel_is_data_last(). When a
791 data message has been used up, rxrpc_kernel_data_delivered() should be
792 called on it..
793
794 Non-data messages should be handled to rxrpc_kernel_free_skb() to dispose
795 of. It is possible to get extra refs on all types of message for later
796 freeing, but this may pin the state of a call until the message is finally
797 freed.
798
799 (*) Accept an incoming call.
800
801 struct rxrpc_call *
802 rxrpc_kernel_accept_call(struct socket *sock,
803 unsigned long user_call_ID);
804
805 This is used to accept an incoming call and to assign it a call ID. This
806 function is similar to rxrpc_kernel_begin_call() and calls accepted must
807 be ended in the same way.
808
809 If this function is successful, an opaque reference to the RxRPC call is
810 returned. The caller now holds a reference on this and it must be
811 properly ended.
812
813 (*) Reject an incoming call.
814
815 int rxrpc_kernel_reject_call(struct socket *sock);
816
817 This is used to reject the first incoming call on the socket's queue with
818 a BUSY message. -ENODATA is returned if there were no incoming calls.
819 Other errors may be returned if the call had been aborted (-ECONNABORTED)
820 or had timed out (-ETIME).
821
822 (*) Record the delivery of a data message and free it.
823
824 void rxrpc_kernel_data_delivered(struct sk_buff *skb);
825
826 This is used to record a data message as having been delivered and to
827 update the ACK state for the call. The socket buffer will be freed.
828
829 (*) Free a message.
830
831 void rxrpc_kernel_free_skb(struct sk_buff *skb);
832
833 This is used to free a non-DATA socket buffer intercepted from an AF_RXRPC
834 socket.
835
836 (*) Determine if a data message is the last one on a call.
837
838 bool rxrpc_kernel_is_data_last(struct sk_buff *skb);
839
840 This is used to determine if a socket buffer holds the last data message
841 to be received for a call (true will be returned if it does, false
842 if not).
843
844 The data message will be part of the reply on a client call and the
845 request on an incoming call. In the latter case there will be more
846 messages, but in the former case there will not.
847
848 (*) Get the abort code from an abort message.
849
850 u32 rxrpc_kernel_get_abort_code(struct sk_buff *skb);
851
852 This is used to extract the abort code from a remote abort message.
853
854 (*) Get the error number from a local or network error message.
855
856 int rxrpc_kernel_get_error_number(struct sk_buff *skb);
857
858 This is used to extract the error number from a message indicating either
859 a local error occurred or a network error occurred.
diff --git a/Documentation/networking/wan-router.txt b/Documentation/networking/wan-router.txt
index 653978dcea7f..07dd6d9930a1 100644
--- a/Documentation/networking/wan-router.txt
+++ b/Documentation/networking/wan-router.txt
@@ -250,7 +250,6 @@ PRODUCT COMPONENTS AND RELATED FILES
250 sdladrv.h SDLA support module API definitions 250 sdladrv.h SDLA support module API definitions
251 sdlasfm.h SDLA firmware module definitions 251 sdlasfm.h SDLA firmware module definitions
252 if_wanpipe.h WANPIPE Socket definitions 252 if_wanpipe.h WANPIPE Socket definitions
253 if_wanpipe_common.h WANPIPE Socket/Driver common definitions.
254 sdlapci.h WANPIPE PCI definitions 253 sdlapci.h WANPIPE PCI definitions
255 254
256 255
diff --git a/MAINTAINERS b/MAINTAINERS
index 277877a34ef6..f56c7e172cee 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -384,7 +384,7 @@ S: Supported
384 384
385APPLETALK NETWORK LAYER 385APPLETALK NETWORK LAYER
386P: Arnaldo Carvalho de Melo 386P: Arnaldo Carvalho de Melo
387M: acme@conectiva.com.br 387M: acme@ghostprotocols.net
388S: Maintained 388S: Maintained
389 389
390ARC FRAMEBUFFER DRIVER 390ARC FRAMEBUFFER DRIVER
@@ -656,6 +656,7 @@ S: Supported
656ATMEL WIRELESS DRIVER 656ATMEL WIRELESS DRIVER
657P: Simon Kelley 657P: Simon Kelley
658M: simon@thekelleys.org.uk 658M: simon@thekelleys.org.uk
659L: linux-wireless@vger.kernel.org
659W: http://www.thekelleys.org.uk/atmel 660W: http://www.thekelleys.org.uk/atmel
660W: http://atmelwlandriver.sourceforge.net/ 661W: http://atmelwlandriver.sourceforge.net/
661S: Maintained 662S: Maintained
@@ -711,6 +712,7 @@ P: Larry Finger
711M: Larry.Finger@lwfinger.net 712M: Larry.Finger@lwfinger.net
712P: Stefano Brivio 713P: Stefano Brivio
713M: st3@riseup.net 714M: st3@riseup.net
715L: linux-wireless@vger.kernel.org
714W: http://bcm43xx.berlios.de/ 716W: http://bcm43xx.berlios.de/
715S: Maintained 717S: Maintained
716 718
@@ -892,6 +894,12 @@ M: maxextreme@gmail.com
892L: linux-kernel@vger.kernel.org 894L: linux-kernel@vger.kernel.org
893S: Maintained 895S: Maintained
894 896
897CFG80211 and NL80211
898P: Johannes Berg
899M: johannes@sipsolutions.net
900L: linux-wireless@vger.kernel.org
901S: Maintained
902
895COMMON INTERNET FILE SYSTEM (CIFS) 903COMMON INTERNET FILE SYSTEM (CIFS)
896P: Steve French 904P: Steve French
897M: sfrench@samba.org 905M: sfrench@samba.org
@@ -1034,9 +1042,8 @@ S: Maintained
1034 1042
1035CYCLADES 2X SYNC CARD DRIVER 1043CYCLADES 2X SYNC CARD DRIVER
1036P: Arnaldo Carvalho de Melo 1044P: Arnaldo Carvalho de Melo
1037M: acme@conectiva.com.br 1045M: acme@ghostprotocols.net
1038W: http://advogato.org/person/acme 1046W: http://oops.ghostprotocols.net:81/blog
1039L: cycsyn-devel@bazar.conectiva.com.br
1040S: Maintained 1047S: Maintained
1041 1048
1042CYCLADES ASYNC MUX DRIVER 1049CYCLADES ASYNC MUX DRIVER
@@ -1077,7 +1084,7 @@ S: Maintained
1077 1084
1078DCCP PROTOCOL 1085DCCP PROTOCOL
1079P: Arnaldo Carvalho de Melo 1086P: Arnaldo Carvalho de Melo
1080M: acme@mandriva.com 1087M: acme@ghostprotocols.net
1081L: dccp@vger.kernel.org 1088L: dccp@vger.kernel.org
1082W: http://linux-net.osdl.org/index.php/DCCP 1089W: http://linux-net.osdl.org/index.php/DCCP
1083S: Maintained 1090S: Maintained
@@ -1558,6 +1565,7 @@ S: Supported
1558HOST AP DRIVER 1565HOST AP DRIVER
1559P: Jouni Malinen 1566P: Jouni Malinen
1560M: jkmaline@cc.hut.fi 1567M: jkmaline@cc.hut.fi
1568L: linux-wireless@vger.kernel.org
1561L: hostap@shmoo.com 1569L: hostap@shmoo.com
1562W: http://hostap.epitest.fi/ 1570W: http://hostap.epitest.fi/
1563S: Maintained 1571S: Maintained
@@ -1830,6 +1838,7 @@ P: Yi Zhu
1830M: yi.zhu@intel.com 1838M: yi.zhu@intel.com
1831P: James Ketrenos 1839P: James Ketrenos
1832M: jketreno@linux.intel.com 1840M: jketreno@linux.intel.com
1841L: linux-wireless@vger.kernel.org
1833L: ipw2100-devel@lists.sourceforge.net 1842L: ipw2100-devel@lists.sourceforge.net
1834L: http://lists.sourceforge.net/mailman/listinfo/ipw2100-devel 1843L: http://lists.sourceforge.net/mailman/listinfo/ipw2100-devel
1835W: http://ipw2100.sourceforge.net 1844W: http://ipw2100.sourceforge.net
@@ -1840,6 +1849,7 @@ P: Yi Zhu
1840M: yi.zhu@intel.com 1849M: yi.zhu@intel.com
1841P: James Ketrenos 1850P: James Ketrenos
1842M: jketreno@linux.intel.com 1851M: jketreno@linux.intel.com
1852L: linux-wireless@vger.kernel.org
1843L: ipw2100-devel@lists.sourceforge.net 1853L: ipw2100-devel@lists.sourceforge.net
1844L: http://lists.sourceforge.net/mailman/listinfo/ipw2100-devel 1854L: http://lists.sourceforge.net/mailman/listinfo/ipw2100-devel
1845W: http://ipw2200.sourceforge.net 1855W: http://ipw2200.sourceforge.net
@@ -1871,7 +1881,7 @@ S: Supported
1871 1881
1872IPX NETWORK LAYER 1882IPX NETWORK LAYER
1873P: Arnaldo Carvalho de Melo 1883P: Arnaldo Carvalho de Melo
1874M: acme@conectiva.com.br 1884M: acme@ghostprotocols.net
1875L: netdev@vger.kernel.org 1885L: netdev@vger.kernel.org
1876S: Maintained 1886S: Maintained
1877 1887
@@ -2108,7 +2118,7 @@ S: Supported
2108 2118
2109LLC (802.2) 2119LLC (802.2)
2110P: Arnaldo Carvalho de Melo 2120P: Arnaldo Carvalho de Melo
2111M: acme@conectiva.com.br 2121M: acme@ghostprotocols.net
2112S: Maintained 2122S: Maintained
2113 2123
2114LINUX FOR 64BIT POWERPC 2124LINUX FOR 64BIT POWERPC
@@ -2532,6 +2542,7 @@ P: Pavel Roskin
2532M: proski@gnu.org 2542M: proski@gnu.org
2533P: David Gibson 2543P: David Gibson
2534M: hermes@gibson.dropbear.id.au 2544M: hermes@gibson.dropbear.id.au
2545L: linux-wireless@vger.kernel.org
2535L: orinoco-users@lists.sourceforge.net 2546L: orinoco-users@lists.sourceforge.net
2536L: orinoco-devel@lists.sourceforge.net 2547L: orinoco-devel@lists.sourceforge.net
2537W: http://www.nongnu.org/orinoco/ 2548W: http://www.nongnu.org/orinoco/
@@ -2711,7 +2722,7 @@ S: Supported
2711PRISM54 WIRELESS DRIVER 2722PRISM54 WIRELESS DRIVER
2712P: Prism54 Development Team 2723P: Prism54 Development Team
2713M: developers@islsm.org 2724M: developers@islsm.org
2714L: netdev@vger.kernel.org 2725L: linux-wireless@vger.kernel.org
2715W: http://prism54.org 2726W: http://prism54.org
2716S: Maintained 2727S: Maintained
2717 2728
@@ -2782,7 +2793,7 @@ S: Maintained
2782RAYLINK/WEBGEAR 802.11 WIRELESS LAN DRIVER 2793RAYLINK/WEBGEAR 802.11 WIRELESS LAN DRIVER
2783P: Corey Thomas 2794P: Corey Thomas
2784M: corey@world.std.com 2795M: corey@world.std.com
2785L: linux-kernel@vger.kernel.org 2796L: linux-wireless@vger.kernel.org
2786S: Maintained 2797S: Maintained
2787 2798
2788RANDOM NUMBER DRIVER 2799RANDOM NUMBER DRIVER
@@ -3045,7 +3056,7 @@ M: josejx@gentoo.org
3045P: Daniel Drake 3056P: Daniel Drake
3046M: dsd@gentoo.org 3057M: dsd@gentoo.org
3047W: http://softmac.sipsolutions.net/ 3058W: http://softmac.sipsolutions.net/
3048L: netdev@vger.kernel.org 3059L: linux-wireless@vger.kernel.org
3049S: Maintained 3060S: Maintained
3050 3061
3051SOFTWARE RAID (Multiple Disks) SUPPORT 3062SOFTWARE RAID (Multiple Disks) SUPPORT
@@ -3750,6 +3761,7 @@ S: Maintained
3750WAVELAN NETWORK DRIVER & WIRELESS EXTENSIONS 3761WAVELAN NETWORK DRIVER & WIRELESS EXTENSIONS
3751P: Jean Tourrilhes 3762P: Jean Tourrilhes
3752M: jt@hpl.hp.com 3763M: jt@hpl.hp.com
3764L: linux-wireless@vger.kernel.org
3753W: http://www.hpl.hp.com/personal/Jean_Tourrilhes/Linux/ 3765W: http://www.hpl.hp.com/personal/Jean_Tourrilhes/Linux/
3754S: Maintained 3766S: Maintained
3755 3767
@@ -3766,8 +3778,9 @@ S: Maintained
3766 3778
3767WL3501 WIRELESS PCMCIA CARD DRIVER 3779WL3501 WIRELESS PCMCIA CARD DRIVER
3768P: Arnaldo Carvalho de Melo 3780P: Arnaldo Carvalho de Melo
3769M: acme@conectiva.com.br 3781M: acme@ghostprotocols.net
3770W: http://advogato.org/person/acme 3782L: linux-wireless@vger.kernel.org
3783W: http://oops.ghostprotocols.net:81/blog
3771S: Maintained 3784S: Maintained
3772 3785
3773X.25 NETWORK LAYER 3786X.25 NETWORK LAYER
@@ -3830,6 +3843,7 @@ M: dsd@gentoo.org
3830P: Ulrich Kunitz 3843P: Ulrich Kunitz
3831M: kune@deine-taler.de 3844M: kune@deine-taler.de
3832W: http://zd1211.ath.cx/wiki/DriverRewrite 3845W: http://zd1211.ath.cx/wiki/DriverRewrite
3846L: linux-wireless@vger.kernel.org
3833L: zd1211-devs@lists.sourceforge.net (subscribers-only) 3847L: zd1211-devs@lists.sourceforge.net (subscribers-only)
3834S: Maintained 3848S: Maintained
3835 3849
diff --git a/arch/ia64/hp/sim/simeth.c b/arch/ia64/hp/sim/simeth.c
index 424e9257c9a0..f26077a773d5 100644
--- a/arch/ia64/hp/sim/simeth.c
+++ b/arch/ia64/hp/sim/simeth.c
@@ -427,7 +427,6 @@ make_new_skb(struct net_device *dev)
427 printk(KERN_NOTICE "%s: memory squeeze. dropping packet.\n", dev->name); 427 printk(KERN_NOTICE "%s: memory squeeze. dropping packet.\n", dev->name);
428 return NULL; 428 return NULL;
429 } 429 }
430 nskb->dev = dev;
431 430
432 skb_reserve(nskb, 2); /* Align IP on 16 byte boundaries */ 431 skb_reserve(nskb, 2); /* Align IP on 16 byte boundaries */
433 432
@@ -474,7 +473,7 @@ simeth_rx(struct net_device *dev)
474 * XXX Fix me 473 * XXX Fix me
475 * Should really do a csum+copy here 474 * Should really do a csum+copy here
476 */ 475 */
477 memcpy(skb->data, frame, len); 476 skb_copy_to_linear_data(skb, frame, len);
478#endif 477#endif
479 skb->protocol = eth_type_trans(skb, dev); 478 skb->protocol = eth_type_trans(skb, dev);
480 479
diff --git a/arch/ia64/sn/kernel/xpnet.c b/arch/ia64/sn/kernel/xpnet.c
index c8173db0d84f..5419acb89a8c 100644
--- a/arch/ia64/sn/kernel/xpnet.c
+++ b/arch/ia64/sn/kernel/xpnet.c
@@ -233,7 +233,7 @@ xpnet_receive(partid_t partid, int channel, struct xpnet_message *msg)
233 "%lu)\n", skb->data, &msg->data, 233 "%lu)\n", skb->data, &msg->data,
234 (size_t) msg->embedded_bytes); 234 (size_t) msg->embedded_bytes);
235 235
236 memcpy(skb->data, &msg->data, (size_t) msg->embedded_bytes); 236 skb_copy_to_linear_data(skb, &msg->data, (size_t)msg->embedded_bytes);
237 } else { 237 } else {
238 dev_dbg(xpnet, "transferring buffer to the skb->data area;\n\t" 238 dev_dbg(xpnet, "transferring buffer to the skb->data area;\n\t"
239 "bte_copy(0x%p, 0x%p, %hu)\n", (void *)msg->buf_pa, 239 "bte_copy(0x%p, 0x%p, %hu)\n", (void *)msg->buf_pa,
@@ -264,17 +264,16 @@ xpnet_receive(partid_t partid, int channel, struct xpnet_message *msg)
264 264
265 dev_dbg(xpnet, "<skb->head=0x%p skb->data=0x%p skb->tail=0x%p " 265 dev_dbg(xpnet, "<skb->head=0x%p skb->data=0x%p skb->tail=0x%p "
266 "skb->end=0x%p skb->len=%d\n", (void *) skb->head, 266 "skb->end=0x%p skb->len=%d\n", (void *) skb->head,
267 (void *) skb->data, (void *) skb->tail, (void *) skb->end, 267 (void *)skb->data, skb_tail_pointer(skb), skb_end_pointer(skb),
268 skb->len); 268 skb->len);
269 269
270 skb->dev = xpnet_device;
271 skb->protocol = eth_type_trans(skb, xpnet_device); 270 skb->protocol = eth_type_trans(skb, xpnet_device);
272 skb->ip_summed = CHECKSUM_UNNECESSARY; 271 skb->ip_summed = CHECKSUM_UNNECESSARY;
273 272
274 dev_dbg(xpnet, "passing skb to network layer; \n\tskb->head=0x%p " 273 dev_dbg(xpnet, "passing skb to network layer; \n\tskb->head=0x%p "
275 "skb->data=0x%p skb->tail=0x%p skb->end=0x%p skb->len=%d\n", 274 "skb->data=0x%p skb->tail=0x%p skb->end=0x%p skb->len=%d\n",
276 (void *) skb->head, (void *) skb->data, (void *) skb->tail, 275 (void *)skb->head, (void *)skb->data, skb_tail_pointer(skb),
277 (void *) skb->end, skb->len); 276 skb_end_pointer(skb), skb->len);
278 277
279 278
280 xpnet_device->last_rx = jiffies; 279 xpnet_device->last_rx = jiffies;
@@ -476,7 +475,7 @@ xpnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
476 475
477 dev_dbg(xpnet, ">skb->head=0x%p skb->data=0x%p skb->tail=0x%p " 476 dev_dbg(xpnet, ">skb->head=0x%p skb->data=0x%p skb->tail=0x%p "
478 "skb->end=0x%p skb->len=%d\n", (void *) skb->head, 477 "skb->end=0x%p skb->len=%d\n", (void *) skb->head,
479 (void *) skb->data, (void *) skb->tail, (void *) skb->end, 478 (void *)skb->data, skb_tail_pointer(skb), skb_end_pointer(skb),
480 skb->len); 479 skb->len);
481 480
482 481
@@ -498,7 +497,7 @@ xpnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
498 497
499 /* get the beginning of the first cacheline and end of last */ 498 /* get the beginning of the first cacheline and end of last */
500 start_addr = ((u64) skb->data & ~(L1_CACHE_BYTES - 1)); 499 start_addr = ((u64) skb->data & ~(L1_CACHE_BYTES - 1));
501 end_addr = L1_CACHE_ALIGN((u64) skb->tail); 500 end_addr = L1_CACHE_ALIGN((u64)skb_tail_pointer(skb));
502 501
503 /* calculate how many bytes to embed in the XPC message */ 502 /* calculate how many bytes to embed in the XPC message */
504 embedded_bytes = 0; 503 embedded_bytes = 0;
@@ -567,14 +566,15 @@ xpnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
567 msg->version = XPNET_VERSION_EMBED; 566 msg->version = XPNET_VERSION_EMBED;
568 dev_dbg(xpnet, "calling memcpy(0x%p, 0x%p, 0x%lx)\n", 567 dev_dbg(xpnet, "calling memcpy(0x%p, 0x%p, 0x%lx)\n",
569 &msg->data, skb->data, (size_t) embedded_bytes); 568 &msg->data, skb->data, (size_t) embedded_bytes);
570 memcpy(&msg->data, skb->data, (size_t) embedded_bytes); 569 skb_copy_from_linear_data(skb, &msg->data,
570 (size_t)embedded_bytes);
571 } else { 571 } else {
572 msg->version = XPNET_VERSION; 572 msg->version = XPNET_VERSION;
573 } 573 }
574 msg->magic = XPNET_MAGIC; 574 msg->magic = XPNET_MAGIC;
575 msg->size = end_addr - start_addr; 575 msg->size = end_addr - start_addr;
576 msg->leadin_ignore = (u64) skb->data - start_addr; 576 msg->leadin_ignore = (u64) skb->data - start_addr;
577 msg->tailout_ignore = end_addr - (u64) skb->tail; 577 msg->tailout_ignore = end_addr - (u64)skb_tail_pointer(skb);
578 msg->buf_pa = __pa(start_addr); 578 msg->buf_pa = __pa(start_addr);
579 579
580 dev_dbg(xpnet, "sending XPC message to %d:%d\nmsg->buf_pa=" 580 dev_dbg(xpnet, "sending XPC message to %d:%d\nmsg->buf_pa="
diff --git a/arch/ppc/8260_io/enet.c b/arch/ppc/8260_io/enet.c
index a6056c29cf00..48ce84f5be93 100644
--- a/arch/ppc/8260_io/enet.c
+++ b/arch/ppc/8260_io/enet.c
@@ -477,7 +477,6 @@ for (;;) {
477 cep->stats.rx_dropped++; 477 cep->stats.rx_dropped++;
478 } 478 }
479 else { 479 else {
480 skb->dev = dev;
481 skb_put(skb,pkt_len-4); /* Make room */ 480 skb_put(skb,pkt_len-4); /* Make room */
482 eth_copy_and_sum(skb, 481 eth_copy_and_sum(skb,
483 (unsigned char *)__va(bdp->cbd_bufaddr), 482 (unsigned char *)__va(bdp->cbd_bufaddr),
diff --git a/arch/ppc/8260_io/fcc_enet.c b/arch/ppc/8260_io/fcc_enet.c
index 06b84c372e58..9db825fe37f0 100644
--- a/arch/ppc/8260_io/fcc_enet.c
+++ b/arch/ppc/8260_io/fcc_enet.c
@@ -734,7 +734,6 @@ for (;;) {
734 cep->stats.rx_dropped++; 734 cep->stats.rx_dropped++;
735 } 735 }
736 else { 736 else {
737 skb->dev = dev;
738 skb_put(skb,pkt_len); /* Make room */ 737 skb_put(skb,pkt_len); /* Make room */
739 eth_copy_and_sum(skb, 738 eth_copy_and_sum(skb,
740 (unsigned char *)__va(bdp->cbd_bufaddr), 739 (unsigned char *)__va(bdp->cbd_bufaddr),
diff --git a/arch/ppc/8xx_io/enet.c b/arch/ppc/8xx_io/enet.c
index b23c45bc151a..bfa3f52996d1 100644
--- a/arch/ppc/8xx_io/enet.c
+++ b/arch/ppc/8xx_io/enet.c
@@ -506,7 +506,6 @@ for (;;) {
506 cep->stats.rx_dropped++; 506 cep->stats.rx_dropped++;
507 } 507 }
508 else { 508 else {
509 skb->dev = dev;
510 skb_put(skb,pkt_len-4); /* Make room */ 509 skb_put(skb,pkt_len-4); /* Make room */
511 eth_copy_and_sum(skb, 510 eth_copy_and_sum(skb,
512 cep->rx_vaddr[bdp - cep->rx_bd_base], 511 cep->rx_vaddr[bdp - cep->rx_bd_base],
diff --git a/arch/ppc/8xx_io/fec.c b/arch/ppc/8xx_io/fec.c
index e6c28fb423b2..57a9a61e54b5 100644
--- a/arch/ppc/8xx_io/fec.c
+++ b/arch/ppc/8xx_io/fec.c
@@ -724,7 +724,6 @@ while (!(bdp->cbd_sc & BD_ENET_RX_EMPTY)) {
724 printk("%s: Memory squeeze, dropping packet.\n", dev->name); 724 printk("%s: Memory squeeze, dropping packet.\n", dev->name);
725 fep->stats.rx_dropped++; 725 fep->stats.rx_dropped++;
726 } else { 726 } else {
727 skb->dev = dev;
728 skb_put(skb,pkt_len-4); /* Make room */ 727 skb_put(skb,pkt_len-4); /* Make room */
729 eth_copy_and_sum(skb, data, pkt_len-4, 0); 728 eth_copy_and_sum(skb, data, pkt_len-4, 0);
730 skb->protocol=eth_type_trans(skb,dev); 729 skb->protocol=eth_type_trans(skb,dev);
diff --git a/arch/s390/appldata/appldata_net_sum.c b/arch/s390/appldata/appldata_net_sum.c
index f64b8c867ae2..516b3ac9a9b5 100644
--- a/arch/s390/appldata/appldata_net_sum.c
+++ b/arch/s390/appldata/appldata_net_sum.c
@@ -108,10 +108,10 @@ static void appldata_get_net_sum_data(void *data)
108 collisions = 0; 108 collisions = 0;
109 read_lock(&dev_base_lock); 109 read_lock(&dev_base_lock);
110 for (dev = dev_base; dev != NULL; dev = dev->next) { 110 for (dev = dev_base; dev != NULL; dev = dev->next) {
111 if (dev->get_stats == NULL) { 111 stats = dev->get_stats(dev);
112 if (stats == NULL) {
112 continue; 113 continue;
113 } 114 }
114 stats = dev->get_stats(dev);
115 rx_packets += stats->rx_packets; 115 rx_packets += stats->rx_packets;
116 tx_packets += stats->tx_packets; 116 tx_packets += stats->tx_packets;
117 rx_bytes += stats->rx_bytes; 117 rx_bytes += stats->rx_bytes;
diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile
index 7a44fed21b35..59aea65ce99f 100644
--- a/arch/s390/lib/Makefile
+++ b/arch/s390/lib/Makefile
@@ -5,6 +5,6 @@
5EXTRA_AFLAGS := -traditional 5EXTRA_AFLAGS := -traditional
6 6
7lib-y += delay.o string.o uaccess_std.o uaccess_pt.o qrnnd.o 7lib-y += delay.o string.o uaccess_std.o uaccess_pt.o qrnnd.o
8lib-$(CONFIG_32BIT) += div64.o 8obj-$(CONFIG_32BIT) += div64.o
9lib-$(CONFIG_64BIT) += uaccess_mvcos.o 9lib-$(CONFIG_64BIT) += uaccess_mvcos.o
10lib-$(CONFIG_SMP) += spinlock.o 10lib-$(CONFIG_SMP) += spinlock.o
diff --git a/arch/s390/lib/div64.c b/arch/s390/lib/div64.c
index 0481f3424a13..a5f8300bf3ee 100644
--- a/arch/s390/lib/div64.c
+++ b/arch/s390/lib/div64.c
@@ -147,5 +147,3 @@ uint32_t __div64_32(uint64_t *n, uint32_t base)
147} 147}
148 148
149#endif /* MARCH_G5 */ 149#endif /* MARCH_G5 */
150
151EXPORT_SYMBOL(__div64_32);
diff --git a/arch/um/drivers/daemon_kern.c b/arch/um/drivers/daemon_kern.c
index 9c2e7a758f21..adeece11e596 100644
--- a/arch/um/drivers/daemon_kern.c
+++ b/arch/um/drivers/daemon_kern.c
@@ -46,7 +46,7 @@ static int daemon_read(int fd, struct sk_buff **skb,
46{ 46{
47 *skb = ether_adjust_skb(*skb, ETH_HEADER_OTHER); 47 *skb = ether_adjust_skb(*skb, ETH_HEADER_OTHER);
48 if(*skb == NULL) return(-ENOMEM); 48 if(*skb == NULL) return(-ENOMEM);
49 return(net_recvfrom(fd, (*skb)->mac.raw, 49 return(net_recvfrom(fd, skb_mac_header(*skb),
50 (*skb)->dev->mtu + ETH_HEADER_OTHER)); 50 (*skb)->dev->mtu + ETH_HEADER_OTHER));
51} 51}
52 52
diff --git a/arch/um/drivers/mcast_kern.c b/arch/um/drivers/mcast_kern.c
index 52ccb7b53cd2..e6b8e0dd72a8 100644
--- a/arch/um/drivers/mcast_kern.c
+++ b/arch/um/drivers/mcast_kern.c
@@ -50,7 +50,7 @@ static int mcast_read(int fd, struct sk_buff **skb, struct uml_net_private *lp)
50{ 50{
51 *skb = ether_adjust_skb(*skb, ETH_HEADER_OTHER); 51 *skb = ether_adjust_skb(*skb, ETH_HEADER_OTHER);
52 if(*skb == NULL) return(-ENOMEM); 52 if(*skb == NULL) return(-ENOMEM);
53 return(net_recvfrom(fd, (*skb)->mac.raw, 53 return(net_recvfrom(fd, skb_mac_header(*skb),
54 (*skb)->dev->mtu + ETH_HEADER_OTHER)); 54 (*skb)->dev->mtu + ETH_HEADER_OTHER));
55} 55}
56 56
diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c
index 04e31f86c10a..859303730b2f 100644
--- a/arch/um/drivers/net_kern.c
+++ b/arch/um/drivers/net_kern.c
@@ -55,7 +55,7 @@ static int uml_net_rx(struct net_device *dev)
55 55
56 skb->dev = dev; 56 skb->dev = dev;
57 skb_put(skb, dev->mtu); 57 skb_put(skb, dev->mtu);
58 skb->mac.raw = skb->data; 58 skb_reset_mac_header(skb);
59 pkt_len = (*lp->read)(lp->fd, &skb, lp); 59 pkt_len = (*lp->read)(lp->fd, &skb, lp);
60 60
61 if (pkt_len > 0) { 61 if (pkt_len > 0) {
diff --git a/arch/um/drivers/pcap_kern.c b/arch/um/drivers/pcap_kern.c
index e67362acf0e7..948849343ca4 100644
--- a/arch/um/drivers/pcap_kern.c
+++ b/arch/um/drivers/pcap_kern.c
@@ -36,7 +36,7 @@ static int pcap_read(int fd, struct sk_buff **skb,
36{ 36{
37 *skb = ether_adjust_skb(*skb, ETH_HEADER_OTHER); 37 *skb = ether_adjust_skb(*skb, ETH_HEADER_OTHER);
38 if(*skb == NULL) return(-ENOMEM); 38 if(*skb == NULL) return(-ENOMEM);
39 return(pcap_user_read(fd, (*skb)->mac.raw, 39 return(pcap_user_read(fd, skb_mac_header(*skb),
40 (*skb)->dev->mtu + ETH_HEADER_OTHER, 40 (*skb)->dev->mtu + ETH_HEADER_OTHER,
41 (struct pcap_data *) &lp->user)); 41 (struct pcap_data *) &lp->user));
42} 42}
diff --git a/arch/um/drivers/slip_kern.c b/arch/um/drivers/slip_kern.c
index 25634bd1f585..125c44f77638 100644
--- a/arch/um/drivers/slip_kern.c
+++ b/arch/um/drivers/slip_kern.c
@@ -49,7 +49,7 @@ static unsigned short slip_protocol(struct sk_buff *skbuff)
49static int slip_read(int fd, struct sk_buff **skb, 49static int slip_read(int fd, struct sk_buff **skb,
50 struct uml_net_private *lp) 50 struct uml_net_private *lp)
51{ 51{
52 return(slip_user_read(fd, (*skb)->mac.raw, (*skb)->dev->mtu, 52 return(slip_user_read(fd, skb_mac_header(*skb), (*skb)->dev->mtu,
53 (struct slip_data *) &lp->user)); 53 (struct slip_data *) &lp->user));
54} 54}
55 55
diff --git a/arch/um/drivers/slirp_kern.c b/arch/um/drivers/slirp_kern.c
index b3ed8fb874ab..0a0324a6d290 100644
--- a/arch/um/drivers/slirp_kern.c
+++ b/arch/um/drivers/slirp_kern.c
@@ -53,7 +53,7 @@ static unsigned short slirp_protocol(struct sk_buff *skbuff)
53static int slirp_read(int fd, struct sk_buff **skb, 53static int slirp_read(int fd, struct sk_buff **skb,
54 struct uml_net_private *lp) 54 struct uml_net_private *lp)
55{ 55{
56 return(slirp_user_read(fd, (*skb)->mac.raw, (*skb)->dev->mtu, 56 return(slirp_user_read(fd, skb_mac_header(*skb), (*skb)->dev->mtu,
57 (struct slirp_data *) &lp->user)); 57 (struct slirp_data *) &lp->user));
58} 58}
59 59
diff --git a/arch/um/os-Linux/drivers/ethertap_kern.c b/arch/um/os-Linux/drivers/ethertap_kern.c
index 70541821775f..12689141414d 100644
--- a/arch/um/os-Linux/drivers/ethertap_kern.c
+++ b/arch/um/os-Linux/drivers/ethertap_kern.c
@@ -43,7 +43,7 @@ static int etap_read(int fd, struct sk_buff **skb, struct uml_net_private *lp)
43 43
44 *skb = ether_adjust_skb(*skb, ETH_HEADER_ETHERTAP); 44 *skb = ether_adjust_skb(*skb, ETH_HEADER_ETHERTAP);
45 if(*skb == NULL) return(-ENOMEM); 45 if(*skb == NULL) return(-ENOMEM);
46 len = net_recvfrom(fd, (*skb)->mac.raw, 46 len = net_recvfrom(fd, skb_mac_header(*skb),
47 (*skb)->dev->mtu + 2 * ETH_HEADER_ETHERTAP); 47 (*skb)->dev->mtu + 2 * ETH_HEADER_ETHERTAP);
48 if(len <= 0) return(len); 48 if(len <= 0) return(len);
49 skb_pull(*skb, 2); 49 skb_pull(*skb, 2);
diff --git a/arch/um/os-Linux/drivers/tuntap_kern.c b/arch/um/os-Linux/drivers/tuntap_kern.c
index 76570a2c25c3..f1714e7fb1d0 100644
--- a/arch/um/os-Linux/drivers/tuntap_kern.c
+++ b/arch/um/os-Linux/drivers/tuntap_kern.c
@@ -43,7 +43,7 @@ static int tuntap_read(int fd, struct sk_buff **skb,
43{ 43{
44 *skb = ether_adjust_skb(*skb, ETH_HEADER_OTHER); 44 *skb = ether_adjust_skb(*skb, ETH_HEADER_OTHER);
45 if(*skb == NULL) return(-ENOMEM); 45 if(*skb == NULL) return(-ENOMEM);
46 return(net_read(fd, (*skb)->mac.raw, 46 return(net_read(fd, skb_mac_header(*skb),
47 (*skb)->dev->mtu + ETH_HEADER_OTHER)); 47 (*skb)->dev->mtu + ETH_HEADER_OTHER));
48} 48}
49 49
diff --git a/arch/xtensa/platform-iss/network.c b/arch/xtensa/platform-iss/network.c
index 8ebfc8761229..ab05bff40104 100644
--- a/arch/xtensa/platform-iss/network.c
+++ b/arch/xtensa/platform-iss/network.c
@@ -386,7 +386,7 @@ static int iss_net_rx(struct net_device *dev)
386 /* Setup skb */ 386 /* Setup skb */
387 387
388 skb->dev = dev; 388 skb->dev = dev;
389 skb->mac.raw = skb->data; 389 skb_reset_mac_header(skb);
390 pkt_len = lp->tp.read(lp, &skb); 390 pkt_len = lp->tp.read(lp, &skb);
391 skb_put(skb, pkt_len); 391 skb_put(skb, pkt_len);
392 392
diff --git a/drivers/atm/ambassador.c b/drivers/atm/ambassador.c
index 3c372e08f77d..59651abfa4f8 100644
--- a/drivers/atm/ambassador.c
+++ b/drivers/atm/ambassador.c
@@ -821,7 +821,7 @@ static inline void fill_rx_pool (amb_dev * dev, unsigned char pool,
821 } 821 }
822 // cast needed as there is no %? for pointer differences 822 // cast needed as there is no %? for pointer differences
823 PRINTD (DBG_SKB, "allocated skb at %p, head %p, area %li", 823 PRINTD (DBG_SKB, "allocated skb at %p, head %p, area %li",
824 skb, skb->head, (long) (skb->end - skb->head)); 824 skb, skb->head, (long) (skb_end_pointer(skb) - skb->head));
825 rx.handle = virt_to_bus (skb); 825 rx.handle = virt_to_bus (skb);
826 rx.host_address = cpu_to_be32 (virt_to_bus (skb->data)); 826 rx.host_address = cpu_to_be32 (virt_to_bus (skb->data));
827 if (rx_give (dev, &rx, pool)) 827 if (rx_give (dev, &rx, pool))
diff --git a/drivers/atm/atmtcp.c b/drivers/atm/atmtcp.c
index fc518d85543d..02ad83d6b562 100644
--- a/drivers/atm/atmtcp.c
+++ b/drivers/atm/atmtcp.c
@@ -221,7 +221,7 @@ static int atmtcp_v_send(struct atm_vcc *vcc,struct sk_buff *skb)
221 hdr->vpi = htons(vcc->vpi); 221 hdr->vpi = htons(vcc->vpi);
222 hdr->vci = htons(vcc->vci); 222 hdr->vci = htons(vcc->vci);
223 hdr->length = htonl(skb->len); 223 hdr->length = htonl(skb->len);
224 memcpy(skb_put(new_skb,skb->len),skb->data,skb->len); 224 skb_copy_from_linear_data(skb, skb_put(new_skb, skb->len), skb->len);
225 if (vcc->pop) vcc->pop(vcc,skb); 225 if (vcc->pop) vcc->pop(vcc,skb);
226 else dev_kfree_skb(skb); 226 else dev_kfree_skb(skb);
227 out_vcc->push(out_vcc,new_skb); 227 out_vcc->push(out_vcc,new_skb);
@@ -310,7 +310,7 @@ static int atmtcp_c_send(struct atm_vcc *vcc,struct sk_buff *skb)
310 goto done; 310 goto done;
311 } 311 }
312 __net_timestamp(new_skb); 312 __net_timestamp(new_skb);
313 memcpy(skb_put(new_skb,skb->len),skb->data,skb->len); 313 skb_copy_from_linear_data(skb, skb_put(new_skb, skb->len), skb->len);
314 out_vcc->push(out_vcc,new_skb); 314 out_vcc->push(out_vcc,new_skb);
315 atomic_inc(&vcc->stats->tx); 315 atomic_inc(&vcc->stats->tx);
316 atomic_inc(&out_vcc->stats->rx); 316 atomic_inc(&out_vcc->stats->rx);
@@ -352,7 +352,7 @@ static struct atm_dev atmtcp_control_dev = {
352 .ops = &atmtcp_c_dev_ops, 352 .ops = &atmtcp_c_dev_ops,
353 .type = "atmtcp", 353 .type = "atmtcp",
354 .number = 999, 354 .number = 999,
355 .lock = SPIN_LOCK_UNLOCKED 355 .lock = __SPIN_LOCK_UNLOCKED(atmtcp_control_dev.lock)
356}; 356};
357 357
358 358
diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c
index 8fccf018f165..0d3a38b1cb0b 100644
--- a/drivers/atm/eni.c
+++ b/drivers/atm/eni.c
@@ -536,7 +536,7 @@ static int rx_aal0(struct atm_vcc *vcc)
536 return 0; 536 return 0;
537 } 537 }
538 skb_put(skb,length); 538 skb_put(skb,length);
539 skb_set_timestamp(skb, &eni_vcc->timestamp); 539 skb->tstamp = eni_vcc->timestamp;
540 DPRINTK("got len %ld\n",length); 540 DPRINTK("got len %ld\n",length);
541 if (do_rx_dma(vcc,skb,1,length >> 2,length >> 2)) return 1; 541 if (do_rx_dma(vcc,skb,1,length >> 2,length >> 2)) return 1;
542 eni_vcc->rxing++; 542 eni_vcc->rxing++;
@@ -701,7 +701,7 @@ static void get_service(struct atm_dev *dev)
701 DPRINTK("Grr, servicing VCC %ld twice\n",vci); 701 DPRINTK("Grr, servicing VCC %ld twice\n",vci);
702 continue; 702 continue;
703 } 703 }
704 do_gettimeofday(&ENI_VCC(vcc)->timestamp); 704 ENI_VCC(vcc)->timestamp = ktime_get_real();
705 ENI_VCC(vcc)->next = NULL; 705 ENI_VCC(vcc)->next = NULL;
706 if (vcc->qos.rxtp.traffic_class == ATM_CBR) { 706 if (vcc->qos.rxtp.traffic_class == ATM_CBR) {
707 if (eni_dev->fast) 707 if (eni_dev->fast)
diff --git a/drivers/atm/eni.h b/drivers/atm/eni.h
index 385090c2a580..d04fefb0841f 100644
--- a/drivers/atm/eni.h
+++ b/drivers/atm/eni.h
@@ -59,7 +59,7 @@ struct eni_vcc {
59 int rxing; /* number of pending PDUs */ 59 int rxing; /* number of pending PDUs */
60 int servicing; /* number of waiting VCs (0 or 1) */ 60 int servicing; /* number of waiting VCs (0 or 1) */
61 int txing; /* number of pending TX bytes */ 61 int txing; /* number of pending TX bytes */
62 struct timeval timestamp; /* for RX timing */ 62 ktime_t timestamp; /* for RX timing */
63 struct atm_vcc *next; /* next pending RX */ 63 struct atm_vcc *next; /* next pending RX */
64 struct sk_buff *last; /* last PDU being DMAed (used to carry 64 struct sk_buff *last; /* last PDU being DMAed (used to carry
65 discard information) */ 65 discard information) */
diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c
index a7c0ed3107e3..405ee5e09221 100644
--- a/drivers/atm/fore200e.c
+++ b/drivers/atm/fore200e.c
@@ -1,6 +1,4 @@
1/* 1/*
2 $Id: fore200e.c,v 1.5 2000/04/14 10:10:34 davem Exp $
3
4 A FORE Systems 200E-series driver for ATM on Linux. 2 A FORE Systems 200E-series driver for ATM on Linux.
5 Christophe Lizzi (lizzi@cnam.fr), October 1999-March 2003. 3 Christophe Lizzi (lizzi@cnam.fr), October 1999-March 2003.
6 4
@@ -1502,9 +1500,9 @@ fore200e_open(struct atm_vcc *vcc)
1502 /* pseudo-CBR bandwidth requested? */ 1500 /* pseudo-CBR bandwidth requested? */
1503 if ((vcc->qos.txtp.traffic_class == ATM_CBR) && (vcc->qos.txtp.max_pcr > 0)) { 1501 if ((vcc->qos.txtp.traffic_class == ATM_CBR) && (vcc->qos.txtp.max_pcr > 0)) {
1504 1502
1505 down(&fore200e->rate_sf); 1503 mutex_lock(&fore200e->rate_mtx);
1506 if (fore200e->available_cell_rate < vcc->qos.txtp.max_pcr) { 1504 if (fore200e->available_cell_rate < vcc->qos.txtp.max_pcr) {
1507 up(&fore200e->rate_sf); 1505 mutex_unlock(&fore200e->rate_mtx);
1508 1506
1509 kfree(fore200e_vcc); 1507 kfree(fore200e_vcc);
1510 vc_map->vcc = NULL; 1508 vc_map->vcc = NULL;
@@ -1513,7 +1511,7 @@ fore200e_open(struct atm_vcc *vcc)
1513 1511
1514 /* reserve bandwidth */ 1512 /* reserve bandwidth */
1515 fore200e->available_cell_rate -= vcc->qos.txtp.max_pcr; 1513 fore200e->available_cell_rate -= vcc->qos.txtp.max_pcr;
1516 up(&fore200e->rate_sf); 1514 mutex_unlock(&fore200e->rate_mtx);
1517 } 1515 }
1518 1516
1519 vcc->itf = vcc->dev->number; 1517 vcc->itf = vcc->dev->number;
@@ -1599,9 +1597,9 @@ fore200e_close(struct atm_vcc* vcc)
1599 /* release reserved bandwidth, if any */ 1597 /* release reserved bandwidth, if any */
1600 if ((vcc->qos.txtp.traffic_class == ATM_CBR) && (vcc->qos.txtp.max_pcr > 0)) { 1598 if ((vcc->qos.txtp.traffic_class == ATM_CBR) && (vcc->qos.txtp.max_pcr > 0)) {
1601 1599
1602 down(&fore200e->rate_sf); 1600 mutex_lock(&fore200e->rate_mtx);
1603 fore200e->available_cell_rate += vcc->qos.txtp.max_pcr; 1601 fore200e->available_cell_rate += vcc->qos.txtp.max_pcr;
1604 up(&fore200e->rate_sf); 1602 mutex_unlock(&fore200e->rate_mtx);
1605 1603
1606 clear_bit(ATM_VF_HASQOS, &vcc->flags); 1604 clear_bit(ATM_VF_HASQOS, &vcc->flags);
1607 } 1605 }
@@ -2064,16 +2062,16 @@ fore200e_change_qos(struct atm_vcc* vcc,struct atm_qos* qos, int flags)
2064 2062
2065 if ((qos->txtp.traffic_class == ATM_CBR) && (qos->txtp.max_pcr > 0)) { 2063 if ((qos->txtp.traffic_class == ATM_CBR) && (qos->txtp.max_pcr > 0)) {
2066 2064
2067 down(&fore200e->rate_sf); 2065 mutex_lock(&fore200e->rate_mtx);
2068 if (fore200e->available_cell_rate + vcc->qos.txtp.max_pcr < qos->txtp.max_pcr) { 2066 if (fore200e->available_cell_rate + vcc->qos.txtp.max_pcr < qos->txtp.max_pcr) {
2069 up(&fore200e->rate_sf); 2067 mutex_unlock(&fore200e->rate_mtx);
2070 return -EAGAIN; 2068 return -EAGAIN;
2071 } 2069 }
2072 2070
2073 fore200e->available_cell_rate += vcc->qos.txtp.max_pcr; 2071 fore200e->available_cell_rate += vcc->qos.txtp.max_pcr;
2074 fore200e->available_cell_rate -= qos->txtp.max_pcr; 2072 fore200e->available_cell_rate -= qos->txtp.max_pcr;
2075 2073
2076 up(&fore200e->rate_sf); 2074 mutex_unlock(&fore200e->rate_mtx);
2077 2075
2078 memcpy(&vcc->qos, qos, sizeof(struct atm_qos)); 2076 memcpy(&vcc->qos, qos, sizeof(struct atm_qos));
2079 2077
@@ -2459,7 +2457,7 @@ fore200e_initialize(struct fore200e* fore200e)
2459 2457
2460 DPRINTK(2, "device %s being initialized\n", fore200e->name); 2458 DPRINTK(2, "device %s being initialized\n", fore200e->name);
2461 2459
2462 init_MUTEX(&fore200e->rate_sf); 2460 mutex_init(&fore200e->rate_mtx);
2463 spin_lock_init(&fore200e->q_lock); 2461 spin_lock_init(&fore200e->q_lock);
2464 2462
2465 cpq = fore200e->cp_queues = fore200e->virt_base + FORE200E_CP_QUEUES_OFFSET; 2463 cpq = fore200e->cp_queues = fore200e->virt_base + FORE200E_CP_QUEUES_OFFSET;
diff --git a/drivers/atm/fore200e.h b/drivers/atm/fore200e.h
index f9abfdac33e4..b85a54613dea 100644
--- a/drivers/atm/fore200e.h
+++ b/drivers/atm/fore200e.h
@@ -869,7 +869,7 @@ typedef struct fore200e {
869 869
870 struct stats* stats; /* last snapshot of the stats */ 870 struct stats* stats; /* last snapshot of the stats */
871 871
872 struct semaphore rate_sf; /* protects rate reservation ops */ 872 struct mutex rate_mtx; /* protects rate reservation ops */
873 spinlock_t q_lock; /* protects queue ops */ 873 spinlock_t q_lock; /* protects queue ops */
874#ifdef FORE200E_USE_TASKLET 874#ifdef FORE200E_USE_TASKLET
875 struct tasklet_struct tx_tasklet; /* performs tx interrupt work */ 875 struct tasklet_struct tx_tasklet; /* performs tx interrupt work */
diff --git a/drivers/atm/he.c b/drivers/atm/he.c
index 8510026b690a..d33aba6864c2 100644
--- a/drivers/atm/he.c
+++ b/drivers/atm/he.c
@@ -1901,13 +1901,13 @@ he_service_rbrq(struct he_dev *he_dev, int group)
1901 case ATM_AAL0: 1901 case ATM_AAL0:
1902 /* 2.10.1.5 raw cell receive */ 1902 /* 2.10.1.5 raw cell receive */
1903 skb->len = ATM_AAL0_SDU; 1903 skb->len = ATM_AAL0_SDU;
1904 skb->tail = skb->data + skb->len; 1904 skb_set_tail_pointer(skb, skb->len);
1905 break; 1905 break;
1906 case ATM_AAL5: 1906 case ATM_AAL5:
1907 /* 2.10.1.2 aal5 receive */ 1907 /* 2.10.1.2 aal5 receive */
1908 1908
1909 skb->len = AAL5_LEN(skb->data, he_vcc->pdu_len); 1909 skb->len = AAL5_LEN(skb->data, he_vcc->pdu_len);
1910 skb->tail = skb->data + skb->len; 1910 skb_set_tail_pointer(skb, skb->len);
1911#ifdef USE_CHECKSUM_HW 1911#ifdef USE_CHECKSUM_HW
1912 if (vcc->vpi == 0 && vcc->vci >= ATM_NOT_RSV_VCI) { 1912 if (vcc->vpi == 0 && vcc->vci >= ATM_NOT_RSV_VCI) {
1913 skb->ip_summed = CHECKSUM_COMPLETE; 1913 skb->ip_summed = CHECKSUM_COMPLETE;
diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c
index b4b80140c398..057efbc55d38 100644
--- a/drivers/atm/idt77252.c
+++ b/drivers/atm/idt77252.c
@@ -1065,7 +1065,8 @@ dequeue_rx(struct idt77252_dev *card, struct rsq_entry *rsqe)
1065 vcc = vc->rx_vcc; 1065 vcc = vc->rx_vcc;
1066 1066
1067 pci_dma_sync_single_for_cpu(card->pcidev, IDT77252_PRV_PADDR(skb), 1067 pci_dma_sync_single_for_cpu(card->pcidev, IDT77252_PRV_PADDR(skb),
1068 skb->end - skb->data, PCI_DMA_FROMDEVICE); 1068 skb_end_pointer(skb) - skb->data,
1069 PCI_DMA_FROMDEVICE);
1069 1070
1070 if ((vcc->qos.aal == ATM_AAL0) || 1071 if ((vcc->qos.aal == ATM_AAL0) ||
1071 (vcc->qos.aal == ATM_AAL34)) { 1072 (vcc->qos.aal == ATM_AAL34)) {
@@ -1194,7 +1195,8 @@ dequeue_rx(struct idt77252_dev *card, struct rsq_entry *rsqe)
1194 } 1195 }
1195 1196
1196 pci_unmap_single(card->pcidev, IDT77252_PRV_PADDR(skb), 1197 pci_unmap_single(card->pcidev, IDT77252_PRV_PADDR(skb),
1197 skb->end - skb->data, PCI_DMA_FROMDEVICE); 1198 skb_end_pointer(skb) - skb->data,
1199 PCI_DMA_FROMDEVICE);
1198 sb_pool_remove(card, skb); 1200 sb_pool_remove(card, skb);
1199 1201
1200 skb_trim(skb, len); 1202 skb_trim(skb, len);
@@ -1267,7 +1269,7 @@ idt77252_rx_raw(struct idt77252_dev *card)
1267 tail = readl(SAR_REG_RAWCT); 1269 tail = readl(SAR_REG_RAWCT);
1268 1270
1269 pci_dma_sync_single_for_cpu(card->pcidev, IDT77252_PRV_PADDR(queue), 1271 pci_dma_sync_single_for_cpu(card->pcidev, IDT77252_PRV_PADDR(queue),
1270 queue->end - queue->head - 16, 1272 skb_end_pointer(queue) - queue->head - 16,
1271 PCI_DMA_FROMDEVICE); 1273 PCI_DMA_FROMDEVICE);
1272 1274
1273 while (head != tail) { 1275 while (head != tail) {
@@ -1363,7 +1365,8 @@ drop:
1363 queue = card->raw_cell_head; 1365 queue = card->raw_cell_head;
1364 pci_dma_sync_single_for_cpu(card->pcidev, 1366 pci_dma_sync_single_for_cpu(card->pcidev,
1365 IDT77252_PRV_PADDR(queue), 1367 IDT77252_PRV_PADDR(queue),
1366 queue->end - queue->data, 1368 (skb_end_pointer(queue) -
1369 queue->data),
1367 PCI_DMA_FROMDEVICE); 1370 PCI_DMA_FROMDEVICE);
1368 } else { 1371 } else {
1369 card->raw_cell_head = NULL; 1372 card->raw_cell_head = NULL;
@@ -1816,7 +1819,8 @@ push_rx_skb(struct idt77252_dev *card, struct sk_buff *skb, int queue)
1816 u32 handle; 1819 u32 handle;
1817 u32 addr; 1820 u32 addr;
1818 1821
1819 skb->data = skb->tail = skb->head; 1822 skb->data = skb->head;
1823 skb_reset_tail_pointer(skb);
1820 skb->len = 0; 1824 skb->len = 0;
1821 1825
1822 skb_reserve(skb, 16); 1826 skb_reserve(skb, 16);
@@ -1835,7 +1839,6 @@ push_rx_skb(struct idt77252_dev *card, struct sk_buff *skb, int queue)
1835 skb_put(skb, SAR_FB_SIZE_3); 1839 skb_put(skb, SAR_FB_SIZE_3);
1836 break; 1840 break;
1837 default: 1841 default:
1838 dev_kfree_skb(skb);
1839 return -1; 1842 return -1;
1840 } 1843 }
1841 1844
@@ -1874,7 +1877,7 @@ add_rx_skb(struct idt77252_dev *card, int queue,
1874 } 1877 }
1875 1878
1876 paddr = pci_map_single(card->pcidev, skb->data, 1879 paddr = pci_map_single(card->pcidev, skb->data,
1877 skb->end - skb->data, 1880 skb_end_pointer(skb) - skb->data,
1878 PCI_DMA_FROMDEVICE); 1881 PCI_DMA_FROMDEVICE);
1879 IDT77252_PRV_PADDR(skb) = paddr; 1882 IDT77252_PRV_PADDR(skb) = paddr;
1880 1883
@@ -1888,7 +1891,7 @@ add_rx_skb(struct idt77252_dev *card, int queue,
1888 1891
1889outunmap: 1892outunmap:
1890 pci_unmap_single(card->pcidev, IDT77252_PRV_PADDR(skb), 1893 pci_unmap_single(card->pcidev, IDT77252_PRV_PADDR(skb),
1891 skb->end - skb->data, PCI_DMA_FROMDEVICE); 1894 skb_end_pointer(skb) - skb->data, PCI_DMA_FROMDEVICE);
1892 1895
1893 handle = IDT77252_PRV_POOL(skb); 1896 handle = IDT77252_PRV_POOL(skb);
1894 card->sbpool[POOL_QUEUE(handle)].skb[POOL_INDEX(handle)] = NULL; 1897 card->sbpool[POOL_QUEUE(handle)].skb[POOL_INDEX(handle)] = NULL;
@@ -1905,12 +1908,14 @@ recycle_rx_skb(struct idt77252_dev *card, struct sk_buff *skb)
1905 int err; 1908 int err;
1906 1909
1907 pci_dma_sync_single_for_device(card->pcidev, IDT77252_PRV_PADDR(skb), 1910 pci_dma_sync_single_for_device(card->pcidev, IDT77252_PRV_PADDR(skb),
1908 skb->end - skb->data, PCI_DMA_FROMDEVICE); 1911 skb_end_pointer(skb) - skb->data,
1912 PCI_DMA_FROMDEVICE);
1909 1913
1910 err = push_rx_skb(card, skb, POOL_QUEUE(handle)); 1914 err = push_rx_skb(card, skb, POOL_QUEUE(handle));
1911 if (err) { 1915 if (err) {
1912 pci_unmap_single(card->pcidev, IDT77252_PRV_PADDR(skb), 1916 pci_unmap_single(card->pcidev, IDT77252_PRV_PADDR(skb),
1913 skb->end - skb->data, PCI_DMA_FROMDEVICE); 1917 skb_end_pointer(skb) - skb->data,
1918 PCI_DMA_FROMDEVICE);
1914 sb_pool_remove(card, skb); 1919 sb_pool_remove(card, skb);
1915 dev_kfree_skb(skb); 1920 dev_kfree_skb(skb);
1916 } 1921 }
@@ -3122,7 +3127,8 @@ deinit_card(struct idt77252_dev *card)
3122 if (skb) { 3127 if (skb) {
3123 pci_unmap_single(card->pcidev, 3128 pci_unmap_single(card->pcidev,
3124 IDT77252_PRV_PADDR(skb), 3129 IDT77252_PRV_PADDR(skb),
3125 skb->end - skb->data, 3130 (skb_end_pointer(skb) -
3131 skb->data),
3126 PCI_DMA_FROMDEVICE); 3132 PCI_DMA_FROMDEVICE);
3127 card->sbpool[i].skb[j] = NULL; 3133 card->sbpool[i].skb[j] = NULL;
3128 dev_kfree_skb(skb); 3134 dev_kfree_skb(skb);
diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c
index aab9b3733d52..14ced85b3f54 100644
--- a/drivers/atm/nicstar.c
+++ b/drivers/atm/nicstar.c
@@ -2208,7 +2208,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe)
2208 if (i == 1 && ns_rsqe_eopdu(rsqe)) 2208 if (i == 1 && ns_rsqe_eopdu(rsqe))
2209 *((u32 *) sb->data) |= 0x00000002; 2209 *((u32 *) sb->data) |= 0x00000002;
2210 skb_put(sb, NS_AAL0_HEADER); 2210 skb_put(sb, NS_AAL0_HEADER);
2211 memcpy(sb->tail, cell, ATM_CELL_PAYLOAD); 2211 memcpy(skb_tail_pointer(sb), cell, ATM_CELL_PAYLOAD);
2212 skb_put(sb, ATM_CELL_PAYLOAD); 2212 skb_put(sb, ATM_CELL_PAYLOAD);
2213 ATM_SKB(sb)->vcc = vcc; 2213 ATM_SKB(sb)->vcc = vcc;
2214 __net_timestamp(sb); 2214 __net_timestamp(sb);
@@ -2252,7 +2252,8 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe)
2252 vc->rx_iov = iovb; 2252 vc->rx_iov = iovb;
2253 NS_SKB(iovb)->iovcnt = 0; 2253 NS_SKB(iovb)->iovcnt = 0;
2254 iovb->len = 0; 2254 iovb->len = 0;
2255 iovb->tail = iovb->data = iovb->head; 2255 iovb->data = iovb->head;
2256 skb_reset_tail_pointer(iovb);
2256 NS_SKB(iovb)->vcc = vcc; 2257 NS_SKB(iovb)->vcc = vcc;
2257 /* IMPORTANT: a pointer to the sk_buff containing the small or large 2258 /* IMPORTANT: a pointer to the sk_buff containing the small or large
2258 buffer is stored as iovec base, NOT a pointer to the 2259 buffer is stored as iovec base, NOT a pointer to the
@@ -2265,7 +2266,8 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe)
2265 recycle_iovec_rx_bufs(card, (struct iovec *) iovb->data, NS_MAX_IOVECS); 2266 recycle_iovec_rx_bufs(card, (struct iovec *) iovb->data, NS_MAX_IOVECS);
2266 NS_SKB(iovb)->iovcnt = 0; 2267 NS_SKB(iovb)->iovcnt = 0;
2267 iovb->len = 0; 2268 iovb->len = 0;
2268 iovb->tail = iovb->data = iovb->head; 2269 iovb->data = iovb->head;
2270 skb_reset_tail_pointer(iovb);
2269 NS_SKB(iovb)->vcc = vcc; 2271 NS_SKB(iovb)->vcc = vcc;
2270 } 2272 }
2271 iov = &((struct iovec *) iovb->data)[NS_SKB(iovb)->iovcnt++]; 2273 iov = &((struct iovec *) iovb->data)[NS_SKB(iovb)->iovcnt++];
@@ -2393,7 +2395,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe)
2393 skb->destructor = ns_lb_destructor; 2395 skb->destructor = ns_lb_destructor;
2394#endif /* NS_USE_DESTRUCTORS */ 2396#endif /* NS_USE_DESTRUCTORS */
2395 skb_push(skb, NS_SMBUFSIZE); 2397 skb_push(skb, NS_SMBUFSIZE);
2396 memcpy(skb->data, sb->data, NS_SMBUFSIZE); 2398 skb_copy_from_linear_data(sb, skb->data, NS_SMBUFSIZE);
2397 skb_put(skb, len - NS_SMBUFSIZE); 2399 skb_put(skb, len - NS_SMBUFSIZE);
2398 ATM_SKB(skb)->vcc = vcc; 2400 ATM_SKB(skb)->vcc = vcc;
2399 __net_timestamp(skb); 2401 __net_timestamp(skb);
@@ -2477,7 +2479,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe)
2477 { 2479 {
2478 /* Copy the small buffer to the huge buffer */ 2480 /* Copy the small buffer to the huge buffer */
2479 sb = (struct sk_buff *) iov->iov_base; 2481 sb = (struct sk_buff *) iov->iov_base;
2480 memcpy(hb->data, sb->data, iov->iov_len); 2482 skb_copy_from_linear_data(sb, hb->data, iov->iov_len);
2481 skb_put(hb, iov->iov_len); 2483 skb_put(hb, iov->iov_len);
2482 remaining = len - iov->iov_len; 2484 remaining = len - iov->iov_len;
2483 iov++; 2485 iov++;
@@ -2489,7 +2491,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe)
2489 { 2491 {
2490 lb = (struct sk_buff *) iov->iov_base; 2492 lb = (struct sk_buff *) iov->iov_base;
2491 tocopy = min_t(int, remaining, iov->iov_len); 2493 tocopy = min_t(int, remaining, iov->iov_len);
2492 memcpy(hb->tail, lb->data, tocopy); 2494 skb_copy_from_linear_data(lb, skb_tail_pointer(hb), tocopy);
2493 skb_put(hb, tocopy); 2495 skb_put(hb, tocopy);
2494 iov++; 2496 iov++;
2495 remaining -= tocopy; 2497 remaining -= tocopy;
diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h
index 2308e83e5f33..1d8466817943 100644
--- a/drivers/block/aoe/aoe.h
+++ b/drivers/block/aoe/aoe.h
@@ -48,6 +48,15 @@ struct aoe_hdr {
48 __be32 tag; 48 __be32 tag;
49}; 49};
50 50
51#ifdef __KERNEL__
52#include <linux/skbuff.h>
53
54static inline struct aoe_hdr *aoe_hdr(const struct sk_buff *skb)
55{
56 return (struct aoe_hdr *)skb_mac_header(skb);
57}
58#endif
59
51struct aoe_atahdr { 60struct aoe_atahdr {
52 unsigned char aflags; 61 unsigned char aflags;
53 unsigned char errfeat; 62 unsigned char errfeat;
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 8d17d8df3662..1a6aeac5a1c3 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -27,7 +27,8 @@ new_skb(ulong len)
27 27
28 skb = alloc_skb(len, GFP_ATOMIC); 28 skb = alloc_skb(len, GFP_ATOMIC);
29 if (skb) { 29 if (skb) {
30 skb->nh.raw = skb->mac.raw = skb->data; 30 skb_reset_mac_header(skb);
31 skb_reset_network_header(skb);
31 skb->protocol = __constant_htons(ETH_P_AOE); 32 skb->protocol = __constant_htons(ETH_P_AOE);
32 skb->priority = 0; 33 skb->priority = 0;
33 skb->next = skb->prev = NULL; 34 skb->next = skb->prev = NULL;
@@ -118,7 +119,7 @@ aoecmd_ata_rw(struct aoedev *d, struct frame *f)
118 119
119 /* initialize the headers & frame */ 120 /* initialize the headers & frame */
120 skb = f->skb; 121 skb = f->skb;
121 h = (struct aoe_hdr *) skb->mac.raw; 122 h = aoe_hdr(skb);
122 ah = (struct aoe_atahdr *) (h+1); 123 ah = (struct aoe_atahdr *) (h+1);
123 skb_put(skb, sizeof *h + sizeof *ah); 124 skb_put(skb, sizeof *h + sizeof *ah);
124 memset(h, 0, skb->len); 125 memset(h, 0, skb->len);
@@ -207,7 +208,7 @@ aoecmd_cfg_pkts(ushort aoemajor, unsigned char aoeminor, struct sk_buff **tail)
207 skb->dev = ifp; 208 skb->dev = ifp;
208 if (sl_tail == NULL) 209 if (sl_tail == NULL)
209 sl_tail = skb; 210 sl_tail = skb;
210 h = (struct aoe_hdr *) skb->mac.raw; 211 h = aoe_hdr(skb);
211 memset(h, 0, sizeof *h + sizeof *ch); 212 memset(h, 0, sizeof *h + sizeof *ch);
212 213
213 memset(h->dst, 0xff, sizeof h->dst); 214 memset(h->dst, 0xff, sizeof h->dst);
@@ -300,7 +301,7 @@ rexmit(struct aoedev *d, struct frame *f)
300 aoechr_error(buf); 301 aoechr_error(buf);
301 302
302 skb = f->skb; 303 skb = f->skb;
303 h = (struct aoe_hdr *) skb->mac.raw; 304 h = aoe_hdr(skb);
304 ah = (struct aoe_atahdr *) (h+1); 305 ah = (struct aoe_atahdr *) (h+1);
305 f->tag = n; 306 f->tag = n;
306 h->tag = cpu_to_be32(n); 307 h->tag = cpu_to_be32(n);
@@ -529,7 +530,7 @@ aoecmd_ata_rsp(struct sk_buff *skb)
529 char ebuf[128]; 530 char ebuf[128];
530 u16 aoemajor; 531 u16 aoemajor;
531 532
532 hin = (struct aoe_hdr *) skb->mac.raw; 533 hin = aoe_hdr(skb);
533 aoemajor = be16_to_cpu(get_unaligned(&hin->major)); 534 aoemajor = be16_to_cpu(get_unaligned(&hin->major));
534 d = aoedev_by_aoeaddr(aoemajor, hin->minor); 535 d = aoedev_by_aoeaddr(aoemajor, hin->minor);
535 if (d == NULL) { 536 if (d == NULL) {
@@ -561,7 +562,7 @@ aoecmd_ata_rsp(struct sk_buff *skb)
561 calc_rttavg(d, tsince(f->tag)); 562 calc_rttavg(d, tsince(f->tag));
562 563
563 ahin = (struct aoe_atahdr *) (hin+1); 564 ahin = (struct aoe_atahdr *) (hin+1);
564 hout = (struct aoe_hdr *) f->skb->mac.raw; 565 hout = aoe_hdr(f->skb);
565 ahout = (struct aoe_atahdr *) (hout+1); 566 ahout = (struct aoe_atahdr *) (hout+1);
566 buf = f->buf; 567 buf = f->buf;
567 568
@@ -695,7 +696,7 @@ aoecmd_ata_id(struct aoedev *d)
695 696
696 /* initialize the headers & frame */ 697 /* initialize the headers & frame */
697 skb = f->skb; 698 skb = f->skb;
698 h = (struct aoe_hdr *) skb->mac.raw; 699 h = aoe_hdr(skb);
699 ah = (struct aoe_atahdr *) (h+1); 700 ah = (struct aoe_atahdr *) (h+1);
700 skb_put(skb, sizeof *h + sizeof *ah); 701 skb_put(skb, sizeof *h + sizeof *ah);
701 memset(h, 0, skb->len); 702 memset(h, 0, skb->len);
@@ -726,7 +727,7 @@ aoecmd_cfg_rsp(struct sk_buff *skb)
726 enum { MAXFRAMES = 16 }; 727 enum { MAXFRAMES = 16 };
727 u16 n; 728 u16 n;
728 729
729 h = (struct aoe_hdr *) skb->mac.raw; 730 h = aoe_hdr(skb);
730 ch = (struct aoe_cfghdr *) (h+1); 731 ch = (struct aoe_cfghdr *) (h+1);
731 732
732 /* 733 /*
diff --git a/drivers/block/aoe/aoenet.c b/drivers/block/aoe/aoenet.c
index aab6d91a2c22..f9ddfda4d9cb 100644
--- a/drivers/block/aoe/aoenet.c
+++ b/drivers/block/aoe/aoenet.c
@@ -123,7 +123,7 @@ aoenet_rcv(struct sk_buff *skb, struct net_device *ifp, struct packet_type *pt,
123 goto exit; 123 goto exit;
124 skb_push(skb, ETH_HLEN); /* (1) */ 124 skb_push(skb, ETH_HLEN); /* (1) */
125 125
126 h = (struct aoe_hdr *) skb->mac.raw; 126 h = aoe_hdr(skb);
127 n = be32_to_cpu(get_unaligned(&h->tag)); 127 n = be32_to_cpu(get_unaligned(&h->tag));
128 if ((h->verfl & AOEFL_RSP) == 0 || (n & 1<<31)) 128 if ((h->verfl & AOEFL_RSP) == 0 || (n & 1<<31))
129 goto exit; 129 goto exit;
diff --git a/drivers/bluetooth/bfusb.c b/drivers/bluetooth/bfusb.c
index 4c766f36d884..b990805806af 100644
--- a/drivers/bluetooth/bfusb.c
+++ b/drivers/bluetooth/bfusb.c
@@ -527,7 +527,7 @@ static int bfusb_send_frame(struct sk_buff *skb)
527 buf[2] = (size == BFUSB_MAX_BLOCK_SIZE) ? 0 : size; 527 buf[2] = (size == BFUSB_MAX_BLOCK_SIZE) ? 0 : size;
528 528
529 memcpy(skb_put(nskb, 3), buf, 3); 529 memcpy(skb_put(nskb, 3), buf, 3);
530 memcpy(skb_put(nskb, size), skb->data + sent, size); 530 skb_copy_from_linear_data_offset(skb, sent, skb_put(nskb, size), size);
531 531
532 sent += size; 532 sent += size;
533 count -= size; 533 count -= size;
diff --git a/drivers/bluetooth/bluecard_cs.c b/drivers/bluetooth/bluecard_cs.c
index acfb6a430dcc..851de4d5b7de 100644
--- a/drivers/bluetooth/bluecard_cs.c
+++ b/drivers/bluetooth/bluecard_cs.c
@@ -461,20 +461,20 @@ static void bluecard_receive(bluecard_info_t *info, unsigned int offset)
461 switch (info->rx_state) { 461 switch (info->rx_state) {
462 462
463 case RECV_WAIT_EVENT_HEADER: 463 case RECV_WAIT_EVENT_HEADER:
464 eh = (struct hci_event_hdr *)(info->rx_skb->data); 464 eh = hci_event_hdr(info->rx_skb);
465 info->rx_state = RECV_WAIT_DATA; 465 info->rx_state = RECV_WAIT_DATA;
466 info->rx_count = eh->plen; 466 info->rx_count = eh->plen;
467 break; 467 break;
468 468
469 case RECV_WAIT_ACL_HEADER: 469 case RECV_WAIT_ACL_HEADER:
470 ah = (struct hci_acl_hdr *)(info->rx_skb->data); 470 ah = hci_acl_hdr(info->rx_skb);
471 dlen = __le16_to_cpu(ah->dlen); 471 dlen = __le16_to_cpu(ah->dlen);
472 info->rx_state = RECV_WAIT_DATA; 472 info->rx_state = RECV_WAIT_DATA;
473 info->rx_count = dlen; 473 info->rx_count = dlen;
474 break; 474 break;
475 475
476 case RECV_WAIT_SCO_HEADER: 476 case RECV_WAIT_SCO_HEADER:
477 sh = (struct hci_sco_hdr *)(info->rx_skb->data); 477 sh = hci_sco_hdr(info->rx_skb);
478 info->rx_state = RECV_WAIT_DATA; 478 info->rx_state = RECV_WAIT_DATA;
479 info->rx_count = sh->dlen; 479 info->rx_count = sh->dlen;
480 break; 480 break;
diff --git a/drivers/bluetooth/bpa10x.c b/drivers/bluetooth/bpa10x.c
index 9fca6513562d..e8ebd5d3de86 100644
--- a/drivers/bluetooth/bpa10x.c
+++ b/drivers/bluetooth/bpa10x.c
@@ -231,7 +231,7 @@ static void bpa10x_wakeup(struct bpa10x_data *data)
231 cr = (struct usb_ctrlrequest *) urb->setup_packet; 231 cr = (struct usb_ctrlrequest *) urb->setup_packet;
232 cr->wLength = __cpu_to_le16(skb->len); 232 cr->wLength = __cpu_to_le16(skb->len);
233 233
234 memcpy(urb->transfer_buffer, skb->data, skb->len); 234 skb_copy_from_linear_data(skb, urb->transfer_buffer, skb->len);
235 urb->transfer_buffer_length = skb->len; 235 urb->transfer_buffer_length = skb->len;
236 236
237 err = usb_submit_urb(urb, GFP_ATOMIC); 237 err = usb_submit_urb(urb, GFP_ATOMIC);
@@ -250,7 +250,7 @@ static void bpa10x_wakeup(struct bpa10x_data *data)
250 skb = skb_dequeue(&data->tx_queue); 250 skb = skb_dequeue(&data->tx_queue);
251 251
252 if (skb) { 252 if (skb) {
253 memcpy(urb->transfer_buffer, skb->data, skb->len); 253 skb_copy_from_linear_data(skb, urb->transfer_buffer, skb->len);
254 urb->transfer_buffer_length = skb->len; 254 urb->transfer_buffer_length = skb->len;
255 255
256 err = usb_submit_urb(urb, GFP_ATOMIC); 256 err = usb_submit_urb(urb, GFP_ATOMIC);
diff --git a/drivers/bluetooth/bt3c_cs.c b/drivers/bluetooth/bt3c_cs.c
index 18b0f3992c5b..39516074636b 100644
--- a/drivers/bluetooth/bt3c_cs.c
+++ b/drivers/bluetooth/bt3c_cs.c
@@ -303,20 +303,20 @@ static void bt3c_receive(bt3c_info_t *info)
303 switch (info->rx_state) { 303 switch (info->rx_state) {
304 304
305 case RECV_WAIT_EVENT_HEADER: 305 case RECV_WAIT_EVENT_HEADER:
306 eh = (struct hci_event_hdr *)(info->rx_skb->data); 306 eh = hci_event_hdr(info->rx_skb);
307 info->rx_state = RECV_WAIT_DATA; 307 info->rx_state = RECV_WAIT_DATA;
308 info->rx_count = eh->plen; 308 info->rx_count = eh->plen;
309 break; 309 break;
310 310
311 case RECV_WAIT_ACL_HEADER: 311 case RECV_WAIT_ACL_HEADER:
312 ah = (struct hci_acl_hdr *)(info->rx_skb->data); 312 ah = hci_acl_hdr(info->rx_skb);
313 dlen = __le16_to_cpu(ah->dlen); 313 dlen = __le16_to_cpu(ah->dlen);
314 info->rx_state = RECV_WAIT_DATA; 314 info->rx_state = RECV_WAIT_DATA;
315 info->rx_count = dlen; 315 info->rx_count = dlen;
316 break; 316 break;
317 317
318 case RECV_WAIT_SCO_HEADER: 318 case RECV_WAIT_SCO_HEADER:
319 sh = (struct hci_sco_hdr *)(info->rx_skb->data); 319 sh = hci_sco_hdr(info->rx_skb);
320 info->rx_state = RECV_WAIT_DATA; 320 info->rx_state = RECV_WAIT_DATA;
321 info->rx_count = sh->dlen; 321 info->rx_count = sh->dlen;
322 break; 322 break;
diff --git a/drivers/bluetooth/btuart_cs.c b/drivers/bluetooth/btuart_cs.c
index c1bce75148fe..d7d2ea0d86a1 100644
--- a/drivers/bluetooth/btuart_cs.c
+++ b/drivers/bluetooth/btuart_cs.c
@@ -250,20 +250,20 @@ static void btuart_receive(btuart_info_t *info)
250 switch (info->rx_state) { 250 switch (info->rx_state) {
251 251
252 case RECV_WAIT_EVENT_HEADER: 252 case RECV_WAIT_EVENT_HEADER:
253 eh = (struct hci_event_hdr *)(info->rx_skb->data); 253 eh = hci_event_hdr(info->rx_skb);
254 info->rx_state = RECV_WAIT_DATA; 254 info->rx_state = RECV_WAIT_DATA;
255 info->rx_count = eh->plen; 255 info->rx_count = eh->plen;
256 break; 256 break;
257 257
258 case RECV_WAIT_ACL_HEADER: 258 case RECV_WAIT_ACL_HEADER:
259 ah = (struct hci_acl_hdr *)(info->rx_skb->data); 259 ah = hci_acl_hdr(info->rx_skb);
260 dlen = __le16_to_cpu(ah->dlen); 260 dlen = __le16_to_cpu(ah->dlen);
261 info->rx_state = RECV_WAIT_DATA; 261 info->rx_state = RECV_WAIT_DATA;
262 info->rx_count = dlen; 262 info->rx_count = dlen;
263 break; 263 break;
264 264
265 case RECV_WAIT_SCO_HEADER: 265 case RECV_WAIT_SCO_HEADER:
266 sh = (struct hci_sco_hdr *)(info->rx_skb->data); 266 sh = hci_sco_hdr(info->rx_skb);
267 info->rx_state = RECV_WAIT_DATA; 267 info->rx_state = RECV_WAIT_DATA;
268 info->rx_count = sh->dlen; 268 info->rx_count = sh->dlen;
269 break; 269 break;
diff --git a/drivers/bluetooth/dtl1_cs.c b/drivers/bluetooth/dtl1_cs.c
index 459aa97937ab..7f9c54b9964a 100644
--- a/drivers/bluetooth/dtl1_cs.c
+++ b/drivers/bluetooth/dtl1_cs.c
@@ -425,7 +425,7 @@ static int dtl1_hci_send_frame(struct sk_buff *skb)
425 return -ENOMEM; 425 return -ENOMEM;
426 426
427 skb_reserve(s, NSHL); 427 skb_reserve(s, NSHL);
428 memcpy(skb_put(s, skb->len), skb->data, skb->len); 428 skb_copy_from_linear_data(skb, skb_put(s, skb->len), skb->len);
429 if (skb->len & 0x0001) 429 if (skb->len & 0x0001)
430 *skb_put(s, 1) = 0; /* PAD */ 430 *skb_put(s, 1) = 0; /* PAD */
431 431
diff --git a/drivers/bluetooth/hci_h4.c b/drivers/bluetooth/hci_h4.c
index 34f0afc42407..bfbae14cf93d 100644
--- a/drivers/bluetooth/hci_h4.c
+++ b/drivers/bluetooth/hci_h4.c
@@ -188,7 +188,7 @@ static int h4_recv(struct hci_uart *hu, void *data, int count)
188 continue; 188 continue;
189 189
190 case H4_W4_EVENT_HDR: 190 case H4_W4_EVENT_HDR:
191 eh = (struct hci_event_hdr *) h4->rx_skb->data; 191 eh = hci_event_hdr(h4->rx_skb);
192 192
193 BT_DBG("Event header: evt 0x%2.2x plen %d", eh->evt, eh->plen); 193 BT_DBG("Event header: evt 0x%2.2x plen %d", eh->evt, eh->plen);
194 194
@@ -196,7 +196,7 @@ static int h4_recv(struct hci_uart *hu, void *data, int count)
196 continue; 196 continue;
197 197
198 case H4_W4_ACL_HDR: 198 case H4_W4_ACL_HDR:
199 ah = (struct hci_acl_hdr *) h4->rx_skb->data; 199 ah = hci_acl_hdr(h4->rx_skb);
200 dlen = __le16_to_cpu(ah->dlen); 200 dlen = __le16_to_cpu(ah->dlen);
201 201
202 BT_DBG("ACL header: dlen %d", dlen); 202 BT_DBG("ACL header: dlen %d", dlen);
@@ -205,7 +205,7 @@ static int h4_recv(struct hci_uart *hu, void *data, int count)
205 continue; 205 continue;
206 206
207 case H4_W4_SCO_HDR: 207 case H4_W4_SCO_HDR:
208 sh = (struct hci_sco_hdr *) h4->rx_skb->data; 208 sh = hci_sco_hdr(h4->rx_skb);
209 209
210 BT_DBG("SCO header: dlen %d", sh->dlen); 210 BT_DBG("SCO header: dlen %d", sh->dlen);
211 211
diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c
index 8d025e9b5bce..157b1d09ab55 100644
--- a/drivers/char/pcmcia/synclink_cs.c
+++ b/drivers/char/pcmcia/synclink_cs.c
@@ -4169,7 +4169,7 @@ static int hdlcdev_xmit(struct sk_buff *skb, struct net_device *dev)
4169 netif_stop_queue(dev); 4169 netif_stop_queue(dev);
4170 4170
4171 /* copy data to device buffers */ 4171 /* copy data to device buffers */
4172 memcpy(info->tx_buf, skb->data, skb->len); 4172 skb_copy_from_linear_data(skb, info->tx_buf, skb->len);
4173 info->tx_get = 0; 4173 info->tx_get = 0;
4174 info->tx_put = info->tx_count = skb->len; 4174 info->tx_put = info->tx_count = skb->len;
4175 4175
diff --git a/drivers/char/random.c b/drivers/char/random.c
index b9dc7aa1dfb3..46c1b97748b6 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -881,15 +881,15 @@ EXPORT_SYMBOL(get_random_bytes);
881 */ 881 */
882static void init_std_data(struct entropy_store *r) 882static void init_std_data(struct entropy_store *r)
883{ 883{
884 struct timeval tv; 884 ktime_t now;
885 unsigned long flags; 885 unsigned long flags;
886 886
887 spin_lock_irqsave(&r->lock, flags); 887 spin_lock_irqsave(&r->lock, flags);
888 r->entropy_count = 0; 888 r->entropy_count = 0;
889 spin_unlock_irqrestore(&r->lock, flags); 889 spin_unlock_irqrestore(&r->lock, flags);
890 890
891 do_gettimeofday(&tv); 891 now = ktime_get_real();
892 add_entropy_words(r, (__u32 *)&tv, sizeof(tv)/4); 892 add_entropy_words(r, (__u32 *)&now, sizeof(now)/4);
893 add_entropy_words(r, (__u32 *)utsname(), 893 add_entropy_words(r, (__u32 *)utsname(),
894 sizeof(*(utsname()))/4); 894 sizeof(*(utsname()))/4);
895} 895}
@@ -911,14 +911,12 @@ void rand_initialize_irq(int irq)
911 return; 911 return;
912 912
913 /* 913 /*
914 * If kmalloc returns null, we just won't use that entropy 914 * If kzalloc returns null, we just won't use that entropy
915 * source. 915 * source.
916 */ 916 */
917 state = kmalloc(sizeof(struct timer_rand_state), GFP_KERNEL); 917 state = kzalloc(sizeof(struct timer_rand_state), GFP_KERNEL);
918 if (state) { 918 if (state)
919 memset(state, 0, sizeof(struct timer_rand_state));
920 irq_timer_state[irq] = state; 919 irq_timer_state[irq] = state;
921 }
922} 920}
923 921
924#ifdef CONFIG_BLOCK 922#ifdef CONFIG_BLOCK
@@ -927,14 +925,12 @@ void rand_initialize_disk(struct gendisk *disk)
927 struct timer_rand_state *state; 925 struct timer_rand_state *state;
928 926
929 /* 927 /*
930 * If kmalloc returns null, we just won't use that entropy 928 * If kzalloc returns null, we just won't use that entropy
931 * source. 929 * source.
932 */ 930 */
933 state = kmalloc(sizeof(struct timer_rand_state), GFP_KERNEL); 931 state = kzalloc(sizeof(struct timer_rand_state), GFP_KERNEL);
934 if (state) { 932 if (state)
935 memset(state, 0, sizeof(struct timer_rand_state));
936 disk->random = state; 933 disk->random = state;
937 }
938} 934}
939#endif 935#endif
940 936
@@ -1469,7 +1465,6 @@ late_initcall(seqgen_init);
1469__u32 secure_tcpv6_sequence_number(__be32 *saddr, __be32 *daddr, 1465__u32 secure_tcpv6_sequence_number(__be32 *saddr, __be32 *daddr,
1470 __be16 sport, __be16 dport) 1466 __be16 sport, __be16 dport)
1471{ 1467{
1472 struct timeval tv;
1473 __u32 seq; 1468 __u32 seq;
1474 __u32 hash[12]; 1469 __u32 hash[12];
1475 struct keydata *keyptr = get_keyptr(); 1470 struct keydata *keyptr = get_keyptr();
@@ -1485,8 +1480,7 @@ __u32 secure_tcpv6_sequence_number(__be32 *saddr, __be32 *daddr,
1485 seq = twothirdsMD4Transform((const __u32 *)daddr, hash) & HASH_MASK; 1480 seq = twothirdsMD4Transform((const __u32 *)daddr, hash) & HASH_MASK;
1486 seq += keyptr->count; 1481 seq += keyptr->count;
1487 1482
1488 do_gettimeofday(&tv); 1483 seq += ktime_get_real().tv64;
1489 seq += tv.tv_usec + tv.tv_sec * 1000000;
1490 1484
1491 return seq; 1485 return seq;
1492} 1486}
@@ -1521,7 +1515,6 @@ __u32 secure_ip_id(__be32 daddr)
1521__u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr, 1515__u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
1522 __be16 sport, __be16 dport) 1516 __be16 sport, __be16 dport)
1523{ 1517{
1524 struct timeval tv;
1525 __u32 seq; 1518 __u32 seq;
1526 __u32 hash[4]; 1519 __u32 hash[4];
1527 struct keydata *keyptr = get_keyptr(); 1520 struct keydata *keyptr = get_keyptr();
@@ -1543,12 +1536,11 @@ __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
1543 * As close as possible to RFC 793, which 1536 * As close as possible to RFC 793, which
1544 * suggests using a 250 kHz clock. 1537 * suggests using a 250 kHz clock.
1545 * Further reading shows this assumes 2 Mb/s networks. 1538 * Further reading shows this assumes 2 Mb/s networks.
1546 * For 10 Mb/s Ethernet, a 1 MHz clock is appropriate. 1539 * For 10 Gb/s Ethernet, a 1 GHz clock is appropriate.
1547 * That's funny, Linux has one built in! Use it! 1540 * That's funny, Linux has one built in! Use it!
1548 * (Networks are faster now - should this be increased?) 1541 * (Networks are faster now - should this be increased?)
1549 */ 1542 */
1550 do_gettimeofday(&tv); 1543 seq += ktime_get_real().tv64;
1551 seq += tv.tv_usec + tv.tv_sec * 1000000;
1552#if 0 1544#if 0
1553 printk("init_seq(%lx, %lx, %d, %d) = %d\n", 1545 printk("init_seq(%lx, %lx, %d, %d) = %d\n",
1554 saddr, daddr, sport, dport, seq); 1546 saddr, daddr, sport, dport, seq);
@@ -1556,8 +1548,6 @@ __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
1556 return seq; 1548 return seq;
1557} 1549}
1558 1550
1559EXPORT_SYMBOL(secure_tcp_sequence_number);
1560
1561/* Generate secure starting point for ephemeral IPV4 transport port search */ 1551/* Generate secure starting point for ephemeral IPV4 transport port search */
1562u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport) 1552u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport)
1563{ 1553{
@@ -1598,7 +1588,6 @@ u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, __be16
1598u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr, 1588u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr,
1599 __be16 sport, __be16 dport) 1589 __be16 sport, __be16 dport)
1600{ 1590{
1601 struct timeval tv;
1602 u64 seq; 1591 u64 seq;
1603 __u32 hash[4]; 1592 __u32 hash[4];
1604 struct keydata *keyptr = get_keyptr(); 1593 struct keydata *keyptr = get_keyptr();
@@ -1611,8 +1600,7 @@ u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr,
1611 seq = half_md4_transform(hash, keyptr->secret); 1600 seq = half_md4_transform(hash, keyptr->secret);
1612 seq |= ((u64)keyptr->count) << (32 - HASH_BITS); 1601 seq |= ((u64)keyptr->count) << (32 - HASH_BITS);
1613 1602
1614 do_gettimeofday(&tv); 1603 seq += ktime_get_real().tv64;
1615 seq += tv.tv_usec + tv.tv_sec * 1000000;
1616 seq &= (1ull << 48) - 1; 1604 seq &= (1ull << 48) - 1;
1617#if 0 1605#if 0
1618 printk("dccp init_seq(%lx, %lx, %d, %d) = %d\n", 1606 printk("dccp init_seq(%lx, %lx, %d, %d) = %d\n",
diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c
index a905f7820331..a7b9e9bb3e8d 100644
--- a/drivers/connector/connector.c
+++ b/drivers/connector/connector.c
@@ -212,7 +212,7 @@ static void cn_rx_skb(struct sk_buff *__skb)
212 skb = skb_get(__skb); 212 skb = skb_get(__skb);
213 213
214 if (skb->len >= NLMSG_SPACE(0)) { 214 if (skb->len >= NLMSG_SPACE(0)) {
215 nlh = (struct nlmsghdr *)skb->data; 215 nlh = nlmsg_hdr(skb);
216 216
217 if (nlh->nlmsg_len < sizeof(struct cn_msg) || 217 if (nlh->nlmsg_len < sizeof(struct cn_msg) ||
218 skb->len < nlh->nlmsg_len || 218 skb->len < nlh->nlmsg_len ||
@@ -448,7 +448,7 @@ static int __devinit cn_init(void)
448 448
449 dev->nls = netlink_kernel_create(NETLINK_CONNECTOR, 449 dev->nls = netlink_kernel_create(NETLINK_CONNECTOR,
450 CN_NETLINK_USERS + 0xf, 450 CN_NETLINK_USERS + 0xf,
451 dev->input, THIS_MODULE); 451 dev->input, NULL, THIS_MODULE);
452 if (!dev->nls) 452 if (!dev->nls)
453 return -EIO; 453 return -EIO;
454 454
diff --git a/drivers/ieee1394/eth1394.c b/drivers/ieee1394/eth1394.c
index 03e44b337eb0..a364003ba47f 100644
--- a/drivers/ieee1394/eth1394.c
+++ b/drivers/ieee1394/eth1394.c
@@ -834,7 +834,7 @@ static inline u16 ether1394_type_trans(struct sk_buff *skb,
834 struct eth1394hdr *eth; 834 struct eth1394hdr *eth;
835 unsigned char *rawp; 835 unsigned char *rawp;
836 836
837 skb->mac.raw = skb->data; 837 skb_reset_mac_header(skb);
838 skb_pull (skb, ETH1394_HLEN); 838 skb_pull (skb, ETH1394_HLEN);
839 eth = eth1394_hdr(skb); 839 eth = eth1394_hdr(skb);
840 840
@@ -1668,7 +1668,7 @@ static int ether1394_tx (struct sk_buff *skb, struct net_device *dev)
1668 if (memcmp(eth->h_dest, dev->broadcast, ETH1394_ALEN) == 0 || 1668 if (memcmp(eth->h_dest, dev->broadcast, ETH1394_ALEN) == 0 ||
1669 proto == htons(ETH_P_ARP) || 1669 proto == htons(ETH_P_ARP) ||
1670 (proto == htons(ETH_P_IP) && 1670 (proto == htons(ETH_P_IP) &&
1671 IN_MULTICAST(ntohl(skb->nh.iph->daddr)))) { 1671 IN_MULTICAST(ntohl(ip_hdr(skb)->daddr)))) {
1672 tx_type = ETH1394_GASP; 1672 tx_type = ETH1394_GASP;
1673 dest_node = LOCAL_BUS | ALL_NODES; 1673 dest_node = LOCAL_BUS | ALL_NODES;
1674 max_payload = priv->bc_maxpayload - ETHER1394_GASP_OVERHEAD; 1674 max_payload = priv->bc_maxpayload - ETHER1394_GASP_OVERHEAD;
diff --git a/drivers/ieee1394/eth1394.h b/drivers/ieee1394/eth1394.h
index c45cbff9138d..1e8356535149 100644
--- a/drivers/ieee1394/eth1394.h
+++ b/drivers/ieee1394/eth1394.h
@@ -90,7 +90,7 @@ struct eth1394hdr {
90 90
91static inline struct eth1394hdr *eth1394_hdr(const struct sk_buff *skb) 91static inline struct eth1394hdr *eth1394_hdr(const struct sk_buff *skb)
92{ 92{
93 return (struct eth1394hdr *)skb->mac.raw; 93 return (struct eth1394hdr *)skb_mac_header(skb);
94} 94}
95#endif 95#endif
96 96
diff --git a/drivers/infiniband/hw/amso1100/c2.c b/drivers/infiniband/hw/amso1100/c2.c
index 59243d9aedd6..58bc272bd407 100644
--- a/drivers/infiniband/hw/amso1100/c2.c
+++ b/drivers/infiniband/hw/amso1100/c2.c
@@ -439,7 +439,8 @@ static void c2_rx_error(struct c2_port *c2_port, struct c2_element *elem)
439 } 439 }
440 440
441 /* Setup the skb for reuse since we're dropping this pkt */ 441 /* Setup the skb for reuse since we're dropping this pkt */
442 elem->skb->tail = elem->skb->data = elem->skb->head; 442 elem->skb->data = elem->skb->head;
443 skb_reset_tail_pointer(elem->skb);
443 444
444 /* Zero out the rxp hdr in the sk_buff */ 445 /* Zero out the rxp hdr in the sk_buff */
445 memset(elem->skb->data, 0, sizeof(*rxp_hdr)); 446 memset(elem->skb->data, 0, sizeof(*rxp_hdr));
@@ -521,9 +522,8 @@ static void c2_rx_interrupt(struct net_device *netdev)
521 * "sizeof(struct c2_rxp_hdr)". 522 * "sizeof(struct c2_rxp_hdr)".
522 */ 523 */
523 skb->data += sizeof(*rxp_hdr); 524 skb->data += sizeof(*rxp_hdr);
524 skb->tail = skb->data + buflen; 525 skb_set_tail_pointer(skb, buflen);
525 skb->len = buflen; 526 skb->len = buflen;
526 skb->dev = netdev;
527 skb->protocol = eth_type_trans(skb, netdev); 527 skb->protocol = eth_type_trans(skb, netdev);
528 528
529 netif_rx(skb); 529 netif_rx(skb);
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
index 2d2de9b8b729..3b4b0acd707f 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -477,7 +477,7 @@ static void send_mpa_req(struct iwch_ep *ep, struct sk_buff *skb)
477 BUG_ON(skb_cloned(skb)); 477 BUG_ON(skb_cloned(skb));
478 478
479 mpalen = sizeof(*mpa) + ep->plen; 479 mpalen = sizeof(*mpa) + ep->plen;
480 if (skb->data + mpalen + sizeof(*req) > skb->end) { 480 if (skb->data + mpalen + sizeof(*req) > skb_end_pointer(skb)) {
481 kfree_skb(skb); 481 kfree_skb(skb);
482 skb=alloc_skb(mpalen + sizeof(*req), GFP_KERNEL); 482 skb=alloc_skb(mpalen + sizeof(*req), GFP_KERNEL);
483 if (!skb) { 483 if (!skb) {
@@ -507,7 +507,7 @@ static void send_mpa_req(struct iwch_ep *ep, struct sk_buff *skb)
507 */ 507 */
508 skb_get(skb); 508 skb_get(skb);
509 set_arp_failure_handler(skb, arp_failure_discard); 509 set_arp_failure_handler(skb, arp_failure_discard);
510 skb->h.raw = skb->data; 510 skb_reset_transport_header(skb);
511 len = skb->len; 511 len = skb->len;
512 req = (struct tx_data_wr *) skb_push(skb, sizeof(*req)); 512 req = (struct tx_data_wr *) skb_push(skb, sizeof(*req));
513 req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)); 513 req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA));
@@ -559,7 +559,7 @@ static int send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen)
559 skb_get(skb); 559 skb_get(skb);
560 skb->priority = CPL_PRIORITY_DATA; 560 skb->priority = CPL_PRIORITY_DATA;
561 set_arp_failure_handler(skb, arp_failure_discard); 561 set_arp_failure_handler(skb, arp_failure_discard);
562 skb->h.raw = skb->data; 562 skb_reset_transport_header(skb);
563 req = (struct tx_data_wr *) skb_push(skb, sizeof(*req)); 563 req = (struct tx_data_wr *) skb_push(skb, sizeof(*req));
564 req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)); 564 req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA));
565 req->wr_lo = htonl(V_WR_TID(ep->hwtid)); 565 req->wr_lo = htonl(V_WR_TID(ep->hwtid));
@@ -610,7 +610,7 @@ static int send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen)
610 */ 610 */
611 skb_get(skb); 611 skb_get(skb);
612 set_arp_failure_handler(skb, arp_failure_discard); 612 set_arp_failure_handler(skb, arp_failure_discard);
613 skb->h.raw = skb->data; 613 skb_reset_transport_header(skb);
614 len = skb->len; 614 len = skb->len;
615 req = (struct tx_data_wr *) skb_push(skb, sizeof(*req)); 615 req = (struct tx_data_wr *) skb_push(skb, sizeof(*req));
616 req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)); 616 req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA));
@@ -821,7 +821,8 @@ static void process_mpa_reply(struct iwch_ep *ep, struct sk_buff *skb)
821 /* 821 /*
822 * copy the new data into our accumulation buffer. 822 * copy the new data into our accumulation buffer.
823 */ 823 */
824 memcpy(&(ep->mpa_pkt[ep->mpa_pkt_len]), skb->data, skb->len); 824 skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]),
825 skb->len);
825 ep->mpa_pkt_len += skb->len; 826 ep->mpa_pkt_len += skb->len;
826 827
827 /* 828 /*
@@ -940,7 +941,8 @@ static void process_mpa_request(struct iwch_ep *ep, struct sk_buff *skb)
940 /* 941 /*
941 * Copy the new data into our accumulation buffer. 942 * Copy the new data into our accumulation buffer.
942 */ 943 */
943 memcpy(&(ep->mpa_pkt[ep->mpa_pkt_len]), skb->data, skb->len); 944 skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]),
945 skb->len);
944 ep->mpa_pkt_len += skb->len; 946 ep->mpa_pkt_len += skb->len;
945 947
946 /* 948 /*
@@ -1619,7 +1621,8 @@ static int terminate(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1619 PDBG("%s ep %p\n", __FUNCTION__, ep); 1621 PDBG("%s ep %p\n", __FUNCTION__, ep);
1620 skb_pull(skb, sizeof(struct cpl_rdma_terminate)); 1622 skb_pull(skb, sizeof(struct cpl_rdma_terminate));
1621 PDBG("%s saving %d bytes of term msg\n", __FUNCTION__, skb->len); 1623 PDBG("%s saving %d bytes of term msg\n", __FUNCTION__, skb->len);
1622 memcpy(ep->com.qp->attr.terminate_buffer, skb->data, skb->len); 1624 skb_copy_from_linear_data(skb, ep->com.qp->attr.terminate_buffer,
1625 skb->len);
1623 ep->com.qp->attr.terminate_msg_len = skb->len; 1626 ep->com.qp->attr.terminate_msg_len = skb->len;
1624 ep->com.qp->attr.is_terminate_local = 0; 1627 ep->com.qp->attr.is_terminate_local = 0;
1625 return CPL_RET_BUF_DONE; 1628 return CPL_RET_BUF_DONE;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 2b242a4823f8..c722e5c141b3 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -408,7 +408,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
408 skb_put_frags(skb, IPOIB_CM_HEAD_SIZE, wc->byte_len, newskb); 408 skb_put_frags(skb, IPOIB_CM_HEAD_SIZE, wc->byte_len, newskb);
409 409
410 skb->protocol = ((struct ipoib_header *) skb->data)->proto; 410 skb->protocol = ((struct ipoib_header *) skb->data)->proto;
411 skb->mac.raw = skb->data; 411 skb_reset_mac_header(skb);
412 skb_pull(skb, IPOIB_ENCAP_LEN); 412 skb_pull(skb, IPOIB_ENCAP_LEN);
413 413
414 dev->last_rx = jiffies; 414 dev->last_rx = jiffies;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index ba0ee5cf2ad7..93f74567897e 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -216,7 +216,7 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
216 if (wc->slid != priv->local_lid || 216 if (wc->slid != priv->local_lid ||
217 wc->src_qp != priv->qp->qp_num) { 217 wc->src_qp != priv->qp->qp_num) {
218 skb->protocol = ((struct ipoib_header *) skb->data)->proto; 218 skb->protocol = ((struct ipoib_header *) skb->data)->proto;
219 skb->mac.raw = skb->data; 219 skb_reset_mac_header(skb);
220 skb_pull(skb, IPOIB_ENCAP_LEN); 220 skb_pull(skb, IPOIB_ENCAP_LEN);
221 221
222 dev->last_rx = jiffies; 222 dev->last_rx = jiffies;
diff --git a/drivers/isdn/act2000/module.c b/drivers/isdn/act2000/module.c
index e3e5c1399076..ee2b0b9f8f46 100644
--- a/drivers/isdn/act2000/module.c
+++ b/drivers/isdn/act2000/module.c
@@ -442,7 +442,7 @@ act2000_sendbuf(act2000_card *card, int channel, int ack, struct sk_buff *skb)
442 return 0; 442 return 0;
443 } 443 }
444 skb_reserve(xmit_skb, 19); 444 skb_reserve(xmit_skb, 19);
445 memcpy(skb_put(xmit_skb, len), skb->data, len); 445 skb_copy_from_linear_data(skb, skb_put(xmit_skb, len), len);
446 } else { 446 } else {
447 xmit_skb = skb_clone(skb, GFP_ATOMIC); 447 xmit_skb = skb_clone(skb, GFP_ATOMIC);
448 if (!xmit_skb) { 448 if (!xmit_skb) {
diff --git a/drivers/isdn/gigaset/usb-gigaset.c b/drivers/isdn/gigaset/usb-gigaset.c
index 2baef349c12d..c8e1c357cec8 100644
--- a/drivers/isdn/gigaset/usb-gigaset.c
+++ b/drivers/isdn/gigaset/usb-gigaset.c
@@ -652,7 +652,7 @@ static int write_modem(struct cardstate *cs)
652 * transmit data 652 * transmit data
653 */ 653 */
654 count = min(bcs->tx_skb->len, (unsigned) ucs->bulk_out_size); 654 count = min(bcs->tx_skb->len, (unsigned) ucs->bulk_out_size);
655 memcpy(ucs->bulk_out_buffer, bcs->tx_skb->data, count); 655 skb_copy_from_linear_data(bcs->tx_skb, ucs->bulk_out_buffer, count);
656 skb_pull(bcs->tx_skb, count); 656 skb_pull(bcs->tx_skb, count);
657 atomic_set(&ucs->busy, 1); 657 atomic_set(&ucs->busy, 1);
658 gig_dbg(DEBUG_OUTPUT, "write_modem: send %d bytes", count); 658 gig_dbg(DEBUG_OUTPUT, "write_modem: send %d bytes", count);
diff --git a/drivers/isdn/hardware/avm/b1dma.c b/drivers/isdn/hardware/avm/b1dma.c
index 1e2d38e3d68c..428872b653e9 100644
--- a/drivers/isdn/hardware/avm/b1dma.c
+++ b/drivers/isdn/hardware/avm/b1dma.c
@@ -404,7 +404,8 @@ static void b1dma_dispatch_tx(avmcard *card)
404 printk(KERN_DEBUG "tx: put 0x%x len=%d\n", 404 printk(KERN_DEBUG "tx: put 0x%x len=%d\n",
405 skb->data[2], txlen); 405 skb->data[2], txlen);
406#endif 406#endif
407 memcpy(dma->sendbuf.dmabuf, skb->data+2, skb->len-2); 407 skb_copy_from_linear_data_offset(skb, 2, dma->sendbuf.dmabuf,
408 skb->len - 2);
408 } 409 }
409 txlen = (txlen + 3) & ~3; 410 txlen = (txlen + 3) & ~3;
410 411
diff --git a/drivers/isdn/hardware/avm/c4.c b/drivers/isdn/hardware/avm/c4.c
index 6f5efa8d78cb..d58f927e766a 100644
--- a/drivers/isdn/hardware/avm/c4.c
+++ b/drivers/isdn/hardware/avm/c4.c
@@ -457,7 +457,8 @@ static void c4_dispatch_tx(avmcard *card)
457 printk(KERN_DEBUG "%s: tx put 0x%x len=%d\n", 457 printk(KERN_DEBUG "%s: tx put 0x%x len=%d\n",
458 card->name, skb->data[2], txlen); 458 card->name, skb->data[2], txlen);
459#endif 459#endif
460 memcpy(dma->sendbuf.dmabuf, skb->data+2, skb->len-2); 460 skb_copy_from_linear_data_offset(skb, 2, dma->sendbuf.dmabuf,
461 skb->len - 2);
461 } 462 }
462 txlen = (txlen + 3) & ~3; 463 txlen = (txlen + 3) & ~3;
463 464
diff --git a/drivers/isdn/hisax/elsa_ser.c b/drivers/isdn/hisax/elsa_ser.c
index ae377e812775..1642dca988a1 100644
--- a/drivers/isdn/hisax/elsa_ser.c
+++ b/drivers/isdn/hisax/elsa_ser.c
@@ -254,14 +254,16 @@ write_modem(struct BCState *bcs) {
254 count = len; 254 count = len;
255 if (count > MAX_MODEM_BUF - fp) { 255 if (count > MAX_MODEM_BUF - fp) {
256 count = MAX_MODEM_BUF - fp; 256 count = MAX_MODEM_BUF - fp;
257 memcpy(cs->hw.elsa.transbuf + fp, bcs->tx_skb->data, count); 257 skb_copy_from_linear_data(bcs->tx_skb,
258 cs->hw.elsa.transbuf + fp, count);
258 skb_pull(bcs->tx_skb, count); 259 skb_pull(bcs->tx_skb, count);
259 cs->hw.elsa.transcnt += count; 260 cs->hw.elsa.transcnt += count;
260 ret = count; 261 ret = count;
261 count = len - count; 262 count = len - count;
262 fp = 0; 263 fp = 0;
263 } 264 }
264 memcpy((cs->hw.elsa.transbuf + fp), bcs->tx_skb->data, count); 265 skb_copy_from_linear_data(bcs->tx_skb,
266 cs->hw.elsa.transbuf + fp, count);
265 skb_pull(bcs->tx_skb, count); 267 skb_pull(bcs->tx_skb, count);
266 cs->hw.elsa.transcnt += count; 268 cs->hw.elsa.transcnt += count;
267 ret += count; 269 ret += count;
diff --git a/drivers/isdn/hisax/isdnl2.c b/drivers/isdn/hisax/isdnl2.c
index cd3b5ad53491..3446f249d675 100644
--- a/drivers/isdn/hisax/isdnl2.c
+++ b/drivers/isdn/hisax/isdnl2.c
@@ -1293,7 +1293,8 @@ l2_pull_iqueue(struct FsmInst *fi, int event, void *arg)
1293 oskb = skb; 1293 oskb = skb;
1294 skb = alloc_skb(oskb->len + i, GFP_ATOMIC); 1294 skb = alloc_skb(oskb->len + i, GFP_ATOMIC);
1295 memcpy(skb_put(skb, i), header, i); 1295 memcpy(skb_put(skb, i), header, i);
1296 memcpy(skb_put(skb, oskb->len), oskb->data, oskb->len); 1296 skb_copy_from_linear_data(oskb,
1297 skb_put(skb, oskb->len), oskb->len);
1297 dev_kfree_skb(oskb); 1298 dev_kfree_skb(oskb);
1298 } 1299 }
1299 st->l2.l2l1(st, PH_PULL | INDICATION, skb); 1300 st->l2.l2l1(st, PH_PULL | INDICATION, skb);
diff --git a/drivers/isdn/hysdn/hycapi.c b/drivers/isdn/hysdn/hycapi.c
index b2ae4ec1e49e..f85450146bdc 100644
--- a/drivers/isdn/hysdn/hycapi.c
+++ b/drivers/isdn/hysdn/hycapi.c
@@ -398,8 +398,9 @@ static u16 hycapi_send_message(struct capi_ctr *ctrl, struct sk_buff *skb)
398 _len = CAPIMSG_LEN(skb->data); 398 _len = CAPIMSG_LEN(skb->data);
399 if (_len > 22) { 399 if (_len > 22) {
400 _len2 = _len - 22; 400 _len2 = _len - 22;
401 memcpy(msghead, skb->data, 22); 401 skb_copy_from_linear_data(skb, msghead, 22);
402 memcpy(skb->data + _len2, msghead, 22); 402 skb_copy_to_linear_data_offset(skb, _len2,
403 msghead, 22);
403 skb_pull(skb, _len2); 404 skb_pull(skb, _len2);
404 CAPIMSG_SETLEN(skb->data, 22); 405 CAPIMSG_SETLEN(skb->data, 22);
405 retval = capilib_data_b3_req(&cinfo->ncci_head, 406 retval = capilib_data_b3_req(&cinfo->ncci_head,
diff --git a/drivers/isdn/hysdn/hysdn_net.c b/drivers/isdn/hysdn/hysdn_net.c
index 557d96c78a62..cfa8fa5e44ab 100644
--- a/drivers/isdn/hysdn/hysdn_net.c
+++ b/drivers/isdn/hysdn/hysdn_net.c
@@ -214,8 +214,6 @@ hysdn_rx_netpkt(hysdn_card * card, unsigned char *buf, unsigned short len)
214 lp->stats.rx_dropped++; 214 lp->stats.rx_dropped++;
215 return; 215 return;
216 } 216 }
217 skb->dev = &lp->netdev;
218
219 /* copy the data */ 217 /* copy the data */
220 memcpy(skb_put(skb, len), buf, len); 218 memcpy(skb_put(skb, len), buf, len);
221 219
diff --git a/drivers/isdn/hysdn/hysdn_sched.c b/drivers/isdn/hysdn/hysdn_sched.c
index b7b5aa4748a0..81db4a190d41 100644
--- a/drivers/isdn/hysdn/hysdn_sched.c
+++ b/drivers/isdn/hysdn/hysdn_sched.c
@@ -113,7 +113,8 @@ hysdn_sched_tx(hysdn_card *card, unsigned char *buf,
113 (skb = hysdn_tx_netget(card)) != NULL) 113 (skb = hysdn_tx_netget(card)) != NULL)
114 { 114 {
115 if (skb->len <= maxlen) { 115 if (skb->len <= maxlen) {
116 memcpy(buf, skb->data, skb->len); /* copy the packet to the buffer */ 116 /* copy the packet to the buffer */
117 skb_copy_from_linear_data(skb, buf, skb->len);
117 *len = skb->len; 118 *len = skb->len;
118 *chan = CHAN_NDIS_DATA; 119 *chan = CHAN_NDIS_DATA;
119 card->net_tx_busy = 1; /* we are busy sending network data */ 120 card->net_tx_busy = 1; /* we are busy sending network data */
@@ -126,7 +127,7 @@ hysdn_sched_tx(hysdn_card *card, unsigned char *buf,
126 ((skb = hycapi_tx_capiget(card)) != NULL) ) 127 ((skb = hycapi_tx_capiget(card)) != NULL) )
127 { 128 {
128 if (skb->len <= maxlen) { 129 if (skb->len <= maxlen) {
129 memcpy(buf, skb->data, skb->len); 130 skb_copy_from_linear_data(skb, buf, skb->len);
130 *len = skb->len; 131 *len = skb->len;
131 *chan = CHAN_CAPI; 132 *chan = CHAN_CAPI;
132 hycapi_tx_capiack(card); 133 hycapi_tx_capiack(card);
diff --git a/drivers/isdn/i4l/isdn_common.c b/drivers/isdn/i4l/isdn_common.c
index 9c926e41b114..c97330b19877 100644
--- a/drivers/isdn/i4l/isdn_common.c
+++ b/drivers/isdn/i4l/isdn_common.c
@@ -829,7 +829,7 @@ isdn_readbchan(int di, int channel, u_char * buf, u_char * fp, int len, wait_que
829 dflag = 0; 829 dflag = 0;
830 } 830 }
831 count_put = count_pull; 831 count_put = count_pull;
832 memcpy(cp, skb->data, count_put); 832 skb_copy_from_linear_data(skb, cp, count_put);
833 cp += count_put; 833 cp += count_put;
834 len -= count_put; 834 len -= count_put;
835#ifdef CONFIG_ISDN_AUDIO 835#ifdef CONFIG_ISDN_AUDIO
diff --git a/drivers/isdn/i4l/isdn_net.c b/drivers/isdn/i4l/isdn_net.c
index 838b3734e2b6..aa83277aba74 100644
--- a/drivers/isdn/i4l/isdn_net.c
+++ b/drivers/isdn/i4l/isdn_net.c
@@ -872,7 +872,8 @@ typedef struct {
872static void 872static void
873isdn_net_log_skb(struct sk_buff * skb, isdn_net_local * lp) 873isdn_net_log_skb(struct sk_buff * skb, isdn_net_local * lp)
874{ 874{
875 u_char *p = skb->nh.raw; /* hopefully, this was set correctly */ 875 /* hopefully, this was set correctly */
876 const u_char *p = skb_network_header(skb);
876 unsigned short proto = ntohs(skb->protocol); 877 unsigned short proto = ntohs(skb->protocol);
877 int data_ofs; 878 int data_ofs;
878 ip_ports *ipp; 879 ip_ports *ipp;
@@ -880,7 +881,7 @@ isdn_net_log_skb(struct sk_buff * skb, isdn_net_local * lp)
880 881
881 addinfo[0] = '\0'; 882 addinfo[0] = '\0';
882 /* This check stolen from 2.1.72 dev_queue_xmit_nit() */ 883 /* This check stolen from 2.1.72 dev_queue_xmit_nit() */
883 if (skb->nh.raw < skb->data || skb->nh.raw >= skb->tail) { 884 if (p < skb->data || skb->network_header >= skb->tail) {
884 /* fall back to old isdn_net_log_packet method() */ 885 /* fall back to old isdn_net_log_packet method() */
885 char * buf = skb->data; 886 char * buf = skb->data;
886 887
@@ -1121,7 +1122,7 @@ isdn_net_adjust_hdr(struct sk_buff *skb, struct net_device *dev)
1121 if (!skb) 1122 if (!skb)
1122 return; 1123 return;
1123 if (lp->p_encap == ISDN_NET_ENCAP_ETHER) { 1124 if (lp->p_encap == ISDN_NET_ENCAP_ETHER) {
1124 int pullsize = (ulong)skb->nh.raw - (ulong)skb->data - ETH_HLEN; 1125 const int pullsize = skb_network_offset(skb) - ETH_HLEN;
1125 if (pullsize > 0) { 1126 if (pullsize > 0) {
1126 printk(KERN_DEBUG "isdn_net: Pull junk %d\n", pullsize); 1127 printk(KERN_DEBUG "isdn_net: Pull junk %d\n", pullsize);
1127 skb_pull(skb, pullsize); 1128 skb_pull(skb, pullsize);
@@ -1366,7 +1367,7 @@ isdn_net_type_trans(struct sk_buff *skb, struct net_device *dev)
1366 struct ethhdr *eth; 1367 struct ethhdr *eth;
1367 unsigned char *rawp; 1368 unsigned char *rawp;
1368 1369
1369 skb->mac.raw = skb->data; 1370 skb_reset_mac_header(skb);
1370 skb_pull(skb, ETH_HLEN); 1371 skb_pull(skb, ETH_HLEN);
1371 eth = eth_hdr(skb); 1372 eth = eth_hdr(skb);
1372 1373
@@ -1786,7 +1787,7 @@ isdn_net_receive(struct net_device *ndev, struct sk_buff *skb)
1786 } 1787 }
1787 skb->dev = ndev; 1788 skb->dev = ndev;
1788 skb->pkt_type = PACKET_HOST; 1789 skb->pkt_type = PACKET_HOST;
1789 skb->mac.raw = skb->data; 1790 skb_reset_mac_header(skb);
1790#ifdef ISDN_DEBUG_NET_DUMP 1791#ifdef ISDN_DEBUG_NET_DUMP
1791 isdn_dumppkt("R:", skb->data, skb->len, 40); 1792 isdn_dumppkt("R:", skb->data, skb->len, 40);
1792#endif 1793#endif
diff --git a/drivers/isdn/i4l/isdn_ppp.c b/drivers/isdn/i4l/isdn_ppp.c
index 1b2df80c3bce..387392cb3d68 100644
--- a/drivers/isdn/i4l/isdn_ppp.c
+++ b/drivers/isdn/i4l/isdn_ppp.c
@@ -1100,7 +1100,8 @@ isdn_ppp_push_higher(isdn_net_dev * net_dev, isdn_net_local * lp, struct sk_buff
1100 goto drop_packet; 1100 goto drop_packet;
1101 } 1101 }
1102 skb_put(skb, skb_old->len + 128); 1102 skb_put(skb, skb_old->len + 128);
1103 memcpy(skb->data, skb_old->data, skb_old->len); 1103 skb_copy_from_linear_data(skb_old, skb->data,
1104 skb_old->len);
1104 if (net_dev->local->ppp_slot < 0) { 1105 if (net_dev->local->ppp_slot < 0) {
1105 printk(KERN_ERR "%s: net_dev->local->ppp_slot(%d) out of range\n", 1106 printk(KERN_ERR "%s: net_dev->local->ppp_slot(%d) out of range\n",
1106 __FUNCTION__, net_dev->local->ppp_slot); 1107 __FUNCTION__, net_dev->local->ppp_slot);
@@ -1167,7 +1168,7 @@ isdn_ppp_push_higher(isdn_net_dev * net_dev, isdn_net_local * lp, struct sk_buff
1167 mlp->huptimer = 0; 1168 mlp->huptimer = 0;
1168#endif /* CONFIG_IPPP_FILTER */ 1169#endif /* CONFIG_IPPP_FILTER */
1169 skb->dev = dev; 1170 skb->dev = dev;
1170 skb->mac.raw = skb->data; 1171 skb_reset_mac_header(skb);
1171 netif_rx(skb); 1172 netif_rx(skb);
1172 /* net_dev->local->stats.rx_packets++; done in isdn_net.c */ 1173 /* net_dev->local->stats.rx_packets++; done in isdn_net.c */
1173 return; 1174 return;
@@ -1902,7 +1903,9 @@ void isdn_ppp_mp_reassembly( isdn_net_dev * net_dev, isdn_net_local * lp,
1902 while( from != to ) { 1903 while( from != to ) {
1903 unsigned int len = from->len - MP_HEADER_LEN; 1904 unsigned int len = from->len - MP_HEADER_LEN;
1904 1905
1905 memcpy(skb_put(skb,len), from->data+MP_HEADER_LEN, len); 1906 skb_copy_from_linear_data_offset(from, MP_HEADER_LEN,
1907 skb_put(skb,len),
1908 len);
1906 frag = from->next; 1909 frag = from->next;
1907 isdn_ppp_mp_free_skb(mp, from); 1910 isdn_ppp_mp_free_skb(mp, from);
1908 from = frag; 1911 from = frag;
diff --git a/drivers/isdn/isdnloop/isdnloop.c b/drivers/isdn/isdnloop/isdnloop.c
index e3add27dd0e1..e93ad59f60bf 100644
--- a/drivers/isdn/isdnloop/isdnloop.c
+++ b/drivers/isdn/isdnloop/isdnloop.c
@@ -415,7 +415,8 @@ isdnloop_sendbuf(int channel, struct sk_buff *skb, isdnloop_card * card)
415 spin_lock_irqsave(&card->isdnloop_lock, flags); 415 spin_lock_irqsave(&card->isdnloop_lock, flags);
416 nskb = dev_alloc_skb(skb->len); 416 nskb = dev_alloc_skb(skb->len);
417 if (nskb) { 417 if (nskb) {
418 memcpy(skb_put(nskb, len), skb->data, len); 418 skb_copy_from_linear_data(skb,
419 skb_put(nskb, len), len);
419 skb_queue_tail(&card->bqueue[channel], nskb); 420 skb_queue_tail(&card->bqueue[channel], nskb);
420 dev_kfree_skb(skb); 421 dev_kfree_skb(skb);
421 } else 422 } else
diff --git a/drivers/isdn/pcbit/capi.c b/drivers/isdn/pcbit/capi.c
index 47c59e95898d..7b55e151f1b0 100644
--- a/drivers/isdn/pcbit/capi.c
+++ b/drivers/isdn/pcbit/capi.c
@@ -429,8 +429,9 @@ int capi_decode_conn_ind(struct pcbit_chan * chan,
429 if (!(info->data.setup.CallingPN = kmalloc(len - count + 1, GFP_ATOMIC))) 429 if (!(info->data.setup.CallingPN = kmalloc(len - count + 1, GFP_ATOMIC)))
430 return -1; 430 return -1;
431 431
432 memcpy(info->data.setup.CallingPN, skb->data + count + 1, 432 skb_copy_from_linear_data_offset(skb, count + 1,
433 len - count); 433 info->data.setup.CallingPN,
434 len - count);
434 info->data.setup.CallingPN[len - count] = 0; 435 info->data.setup.CallingPN[len - count] = 0;
435 436
436 } 437 }
@@ -457,8 +458,9 @@ int capi_decode_conn_ind(struct pcbit_chan * chan,
457 if (!(info->data.setup.CalledPN = kmalloc(len - count + 1, GFP_ATOMIC))) 458 if (!(info->data.setup.CalledPN = kmalloc(len - count + 1, GFP_ATOMIC)))
458 return -1; 459 return -1;
459 460
460 memcpy(info->data.setup.CalledPN, skb->data + count + 1, 461 skb_copy_from_linear_data_offset(skb, count + 1,
461 len - count); 462 info->data.setup.CalledPN,
463 len - count);
462 info->data.setup.CalledPN[len - count] = 0; 464 info->data.setup.CalledPN[len - count] = 0;
463 465
464 } 466 }
@@ -539,7 +541,7 @@ int capi_decode_conn_actv_ind(struct pcbit_chan * chan, struct sk_buff *skb)
539 541
540#ifdef DEBUG 542#ifdef DEBUG
541 if (len > 1 && len < 31) { 543 if (len > 1 && len < 31) {
542 memcpy(str, skb->data + 2, len - 1); 544 skb_copy_from_linear_data_offset(skb, 2, str, len - 1);
543 str[len] = 0; 545 str[len] = 0;
544 printk(KERN_DEBUG "Connected Party Number: %s\n", str); 546 printk(KERN_DEBUG "Connected Party Number: %s\n", str);
545 } 547 }
diff --git a/drivers/media/dvb/dvb-core/dvb_net.c b/drivers/media/dvb/dvb-core/dvb_net.c
index 76e9c36597eb..6a5ab409c4e7 100644
--- a/drivers/media/dvb/dvb-core/dvb_net.c
+++ b/drivers/media/dvb/dvb-core/dvb_net.c
@@ -174,7 +174,7 @@ static unsigned short dvb_net_eth_type_trans(struct sk_buff *skb,
174 struct ethhdr *eth; 174 struct ethhdr *eth;
175 unsigned char *rawp; 175 unsigned char *rawp;
176 176
177 skb->mac.raw=skb->data; 177 skb_reset_mac_header(skb);
178 skb_pull(skb,dev->hard_header_len); 178 skb_pull(skb,dev->hard_header_len);
179 eth = eth_hdr(skb); 179 eth = eth_hdr(skb);
180 180
@@ -600,6 +600,7 @@ static void dvb_net_ule( struct net_device *dev, const u8 *buf, size_t buf_len )
600 /* Check CRC32, we've got it in our skb already. */ 600 /* Check CRC32, we've got it in our skb already. */
601 unsigned short ulen = htons(priv->ule_sndu_len); 601 unsigned short ulen = htons(priv->ule_sndu_len);
602 unsigned short utype = htons(priv->ule_sndu_type); 602 unsigned short utype = htons(priv->ule_sndu_type);
603 const u8 *tail;
603 struct kvec iov[3] = { 604 struct kvec iov[3] = {
604 { &ulen, sizeof ulen }, 605 { &ulen, sizeof ulen },
605 { &utype, sizeof utype }, 606 { &utype, sizeof utype },
@@ -613,10 +614,11 @@ static void dvb_net_ule( struct net_device *dev, const u8 *buf, size_t buf_len )
613 } 614 }
614 615
615 ule_crc = iov_crc32(ule_crc, iov, 3); 616 ule_crc = iov_crc32(ule_crc, iov, 3);
616 expected_crc = *((u8 *)priv->ule_skb->tail - 4) << 24 | 617 tail = skb_tail_pointer(priv->ule_skb);
617 *((u8 *)priv->ule_skb->tail - 3) << 16 | 618 expected_crc = *(tail - 4) << 24 |
618 *((u8 *)priv->ule_skb->tail - 2) << 8 | 619 *(tail - 3) << 16 |
619 *((u8 *)priv->ule_skb->tail - 1); 620 *(tail - 2) << 8 |
621 *(tail - 1);
620 if (ule_crc != expected_crc) { 622 if (ule_crc != expected_crc) {
621 printk(KERN_WARNING "%lu: CRC32 check FAILED: %08x / %08x, SNDU len %d type %#x, ts_remain %d, next 2: %x.\n", 623 printk(KERN_WARNING "%lu: CRC32 check FAILED: %08x / %08x, SNDU len %d type %#x, ts_remain %d, next 2: %x.\n",
622 priv->ts_count, ule_crc, expected_crc, priv->ule_sndu_len, priv->ule_sndu_type, ts_remain, ts_remain > 2 ? *(unsigned short *)from_where : 0); 624 priv->ts_count, ule_crc, expected_crc, priv->ule_sndu_len, priv->ule_sndu_type, ts_remain, ts_remain > 2 ? *(unsigned short *)from_where : 0);
@@ -695,7 +697,9 @@ static void dvb_net_ule( struct net_device *dev, const u8 *buf, size_t buf_len )
695 } 697 }
696 else 698 else
697 { 699 {
698 memcpy(dest_addr, priv->ule_skb->data, ETH_ALEN); 700 skb_copy_from_linear_data(priv->ule_skb,
701 dest_addr,
702 ETH_ALEN);
699 skb_pull(priv->ule_skb, ETH_ALEN); 703 skb_pull(priv->ule_skb, ETH_ALEN);
700 } 704 }
701 } 705 }
diff --git a/drivers/message/fusion/mptlan.c b/drivers/message/fusion/mptlan.c
index b691292ff599..7dd34bd28efc 100644
--- a/drivers/message/fusion/mptlan.c
+++ b/drivers/message/fusion/mptlan.c
@@ -714,6 +714,7 @@ mpt_lan_sdu_send (struct sk_buff *skb, struct net_device *dev)
714 LANSendRequest_t *pSendReq; 714 LANSendRequest_t *pSendReq;
715 SGETransaction32_t *pTrans; 715 SGETransaction32_t *pTrans;
716 SGESimple64_t *pSimple; 716 SGESimple64_t *pSimple;
717 const unsigned char *mac;
717 dma_addr_t dma; 718 dma_addr_t dma;
718 unsigned long flags; 719 unsigned long flags;
719 int ctx; 720 int ctx;
@@ -753,7 +754,7 @@ mpt_lan_sdu_send (struct sk_buff *skb, struct net_device *dev)
753 /* Set the mac.raw pointer, since this apparently isn't getting 754 /* Set the mac.raw pointer, since this apparently isn't getting
754 * done before we get the skb. Pull the data pointer past the mac data. 755 * done before we get the skb. Pull the data pointer past the mac data.
755 */ 756 */
756 skb->mac.raw = skb->data; 757 skb_reset_mac_header(skb);
757 skb_pull(skb, 12); 758 skb_pull(skb, 12);
758 759
759 dma = pci_map_single(mpt_dev->pcidev, skb->data, skb->len, 760 dma = pci_map_single(mpt_dev->pcidev, skb->data, skb->len,
@@ -784,6 +785,7 @@ mpt_lan_sdu_send (struct sk_buff *skb, struct net_device *dev)
784// IOC_AND_NETDEV_NAMES_s_s(dev), 785// IOC_AND_NETDEV_NAMES_s_s(dev),
785// ctx, skb, skb->data)); 786// ctx, skb, skb->data));
786 787
788 mac = skb_mac_header(skb);
787#ifdef QLOGIC_NAA_WORKAROUND 789#ifdef QLOGIC_NAA_WORKAROUND
788{ 790{
789 struct NAA_Hosed *nh; 791 struct NAA_Hosed *nh;
@@ -793,12 +795,12 @@ mpt_lan_sdu_send (struct sk_buff *skb, struct net_device *dev)
793 drops. */ 795 drops. */
794 read_lock_irq(&bad_naa_lock); 796 read_lock_irq(&bad_naa_lock);
795 for (nh = mpt_bad_naa; nh != NULL; nh=nh->next) { 797 for (nh = mpt_bad_naa; nh != NULL; nh=nh->next) {
796 if ((nh->ieee[0] == skb->mac.raw[0]) && 798 if ((nh->ieee[0] == mac[0]) &&
797 (nh->ieee[1] == skb->mac.raw[1]) && 799 (nh->ieee[1] == mac[1]) &&
798 (nh->ieee[2] == skb->mac.raw[2]) && 800 (nh->ieee[2] == mac[2]) &&
799 (nh->ieee[3] == skb->mac.raw[3]) && 801 (nh->ieee[3] == mac[3]) &&
800 (nh->ieee[4] == skb->mac.raw[4]) && 802 (nh->ieee[4] == mac[4]) &&
801 (nh->ieee[5] == skb->mac.raw[5])) { 803 (nh->ieee[5] == mac[5])) {
802 cur_naa = nh->NAA; 804 cur_naa = nh->NAA;
803 dlprintk ((KERN_INFO "mptlan/sdu_send: using NAA value " 805 dlprintk ((KERN_INFO "mptlan/sdu_send: using NAA value "
804 "= %04x.\n", cur_naa)); 806 "= %04x.\n", cur_naa));
@@ -810,12 +812,12 @@ mpt_lan_sdu_send (struct sk_buff *skb, struct net_device *dev)
810#endif 812#endif
811 813
812 pTrans->TransactionDetails[0] = cpu_to_le32((cur_naa << 16) | 814 pTrans->TransactionDetails[0] = cpu_to_le32((cur_naa << 16) |
813 (skb->mac.raw[0] << 8) | 815 (mac[0] << 8) |
814 (skb->mac.raw[1] << 0)); 816 (mac[1] << 0));
815 pTrans->TransactionDetails[1] = cpu_to_le32((skb->mac.raw[2] << 24) | 817 pTrans->TransactionDetails[1] = cpu_to_le32((mac[2] << 24) |
816 (skb->mac.raw[3] << 16) | 818 (mac[3] << 16) |
817 (skb->mac.raw[4] << 8) | 819 (mac[4] << 8) |
818 (skb->mac.raw[5] << 0)); 820 (mac[5] << 0));
819 821
820 pSimple = (SGESimple64_t *) &pTrans->TransactionDetails[2]; 822 pSimple = (SGESimple64_t *) &pTrans->TransactionDetails[2];
821 823
@@ -930,7 +932,7 @@ mpt_lan_receive_post_turbo(struct net_device *dev, u32 tmsg)
930 pci_dma_sync_single_for_cpu(mpt_dev->pcidev, priv->RcvCtl[ctx].dma, 932 pci_dma_sync_single_for_cpu(mpt_dev->pcidev, priv->RcvCtl[ctx].dma,
931 priv->RcvCtl[ctx].len, PCI_DMA_FROMDEVICE); 933 priv->RcvCtl[ctx].len, PCI_DMA_FROMDEVICE);
932 934
933 memcpy(skb_put(skb, len), old_skb->data, len); 935 skb_copy_from_linear_data(old_skb, skb_put(skb, len), len);
934 936
935 pci_dma_sync_single_for_device(mpt_dev->pcidev, priv->RcvCtl[ctx].dma, 937 pci_dma_sync_single_for_device(mpt_dev->pcidev, priv->RcvCtl[ctx].dma,
936 priv->RcvCtl[ctx].len, PCI_DMA_FROMDEVICE); 938 priv->RcvCtl[ctx].len, PCI_DMA_FROMDEVICE);
@@ -1091,7 +1093,7 @@ mpt_lan_receive_post_reply(struct net_device *dev,
1091 priv->RcvCtl[ctx].dma, 1093 priv->RcvCtl[ctx].dma,
1092 priv->RcvCtl[ctx].len, 1094 priv->RcvCtl[ctx].len,
1093 PCI_DMA_FROMDEVICE); 1095 PCI_DMA_FROMDEVICE);
1094 memcpy(skb_put(skb, l), old_skb->data, l); 1096 skb_copy_from_linear_data(old_skb, skb_put(skb, l), l);
1095 1097
1096 pci_dma_sync_single_for_device(mpt_dev->pcidev, 1098 pci_dma_sync_single_for_device(mpt_dev->pcidev,
1097 priv->RcvCtl[ctx].dma, 1099 priv->RcvCtl[ctx].dma,
@@ -1120,7 +1122,7 @@ mpt_lan_receive_post_reply(struct net_device *dev,
1120 priv->RcvCtl[ctx].len, 1122 priv->RcvCtl[ctx].len,
1121 PCI_DMA_FROMDEVICE); 1123 PCI_DMA_FROMDEVICE);
1122 1124
1123 memcpy(skb_put(skb, len), old_skb->data, len); 1125 skb_copy_from_linear_data(old_skb, skb_put(skb, len), len);
1124 1126
1125 pci_dma_sync_single_for_device(mpt_dev->pcidev, 1127 pci_dma_sync_single_for_device(mpt_dev->pcidev,
1126 priv->RcvCtl[ctx].dma, 1128 priv->RcvCtl[ctx].dma,
@@ -1549,7 +1551,7 @@ mpt_lan_type_trans(struct sk_buff *skb, struct net_device *dev)
1549 struct mpt_lan_ohdr *fch = (struct mpt_lan_ohdr *)skb->data; 1551 struct mpt_lan_ohdr *fch = (struct mpt_lan_ohdr *)skb->data;
1550 struct fcllc *fcllc; 1552 struct fcllc *fcllc;
1551 1553
1552 skb->mac.raw = skb->data; 1554 skb_reset_mac_header(skb);
1553 skb_pull(skb, sizeof(struct mpt_lan_ohdr)); 1555 skb_pull(skb, sizeof(struct mpt_lan_ohdr));
1554 1556
1555 if (fch->dtype == htons(0xffff)) { 1557 if (fch->dtype == htons(0xffff)) {
diff --git a/drivers/net/3c501.c b/drivers/net/3c501.c
index 06e33786078d..4bee99ba7dbb 100644
--- a/drivers/net/3c501.c
+++ b/drivers/net/3c501.c
@@ -735,7 +735,6 @@ static void el_receive(struct net_device *dev)
735 else 735 else
736 { 736 {
737 skb_reserve(skb,2); /* Force 16 byte alignment */ 737 skb_reserve(skb,2); /* Force 16 byte alignment */
738 skb->dev = dev;
739 /* 738 /*
740 * The read increments through the bytes. The interrupt 739 * The read increments through the bytes. The interrupt
741 * handler will fix the pointer when it returns to 740 * handler will fix the pointer when it returns to
diff --git a/drivers/net/3c505.c b/drivers/net/3c505.c
index 702bfb2a5e99..e985a85a5623 100644
--- a/drivers/net/3c505.c
+++ b/drivers/net/3c505.c
@@ -615,7 +615,6 @@ static void receive_packet(struct net_device *dev, int len)
615 if (test_and_set_bit(0, (void *) &adapter->dmaing)) 615 if (test_and_set_bit(0, (void *) &adapter->dmaing))
616 printk(KERN_ERR "%s: rx blocked, DMA in progress, dir %d\n", dev->name, adapter->current_dma.direction); 616 printk(KERN_ERR "%s: rx blocked, DMA in progress, dir %d\n", dev->name, adapter->current_dma.direction);
617 617
618 skb->dev = dev;
619 adapter->current_dma.direction = 0; 618 adapter->current_dma.direction = 0;
620 adapter->current_dma.length = rlen; 619 adapter->current_dma.length = rlen;
621 adapter->current_dma.skb = skb; 620 adapter->current_dma.skb = skb;
@@ -1026,7 +1025,7 @@ static int send_packet(struct net_device *dev, struct sk_buff *skb)
1026 adapter->current_dma.start_time = jiffies; 1025 adapter->current_dma.start_time = jiffies;
1027 1026
1028 if ((unsigned long)(skb->data + nlen) >= MAX_DMA_ADDRESS || nlen != skb->len) { 1027 if ((unsigned long)(skb->data + nlen) >= MAX_DMA_ADDRESS || nlen != skb->len) {
1029 memcpy(adapter->dma_buffer, skb->data, nlen); 1028 skb_copy_from_linear_data(skb, adapter->dma_buffer, nlen);
1030 memset(adapter->dma_buffer+skb->len, 0, nlen-skb->len); 1029 memset(adapter->dma_buffer+skb->len, 0, nlen-skb->len);
1031 target = isa_virt_to_bus(adapter->dma_buffer); 1030 target = isa_virt_to_bus(adapter->dma_buffer);
1032 } 1031 }
diff --git a/drivers/net/3c507.c b/drivers/net/3c507.c
index 54e1d5aebed3..eed4299dc426 100644
--- a/drivers/net/3c507.c
+++ b/drivers/net/3c507.c
@@ -873,7 +873,6 @@ static void el16_rx(struct net_device *dev)
873 } 873 }
874 874
875 skb_reserve(skb,2); 875 skb_reserve(skb,2);
876 skb->dev = dev;
877 876
878 /* 'skb->data' points to the start of sk_buff data area. */ 877 /* 'skb->data' points to the start of sk_buff data area. */
879 memcpy_fromio(skb_put(skb,pkt_len), data_frame + 10, pkt_len); 878 memcpy_fromio(skb_put(skb,pkt_len), data_frame + 10, pkt_len);
diff --git a/drivers/net/3c509.c b/drivers/net/3c509.c
index f791bf026e51..c7511c4d3b68 100644
--- a/drivers/net/3c509.c
+++ b/drivers/net/3c509.c
@@ -1091,7 +1091,6 @@ el3_rx(struct net_device *dev)
1091 printk("Receiving packet size %d status %4.4x.\n", 1091 printk("Receiving packet size %d status %4.4x.\n",
1092 pkt_len, rx_status); 1092 pkt_len, rx_status);
1093 if (skb != NULL) { 1093 if (skb != NULL) {
1094 skb->dev = dev;
1095 skb_reserve(skb, 2); /* Align IP on 16 byte */ 1094 skb_reserve(skb, 2); /* Align IP on 16 byte */
1096 1095
1097 /* 'skb->data' points to the start of sk_buff data area. */ 1096 /* 'skb->data' points to the start of sk_buff data area. */
diff --git a/drivers/net/3c515.c b/drivers/net/3c515.c
index c307ce66145c..290166d5e7d1 100644
--- a/drivers/net/3c515.c
+++ b/drivers/net/3c515.c
@@ -1292,7 +1292,6 @@ static int corkscrew_rx(struct net_device *dev)
1292 printk("Receiving packet size %d status %4.4x.\n", 1292 printk("Receiving packet size %d status %4.4x.\n",
1293 pkt_len, rx_status); 1293 pkt_len, rx_status);
1294 if (skb != NULL) { 1294 if (skb != NULL) {
1295 skb->dev = dev;
1296 skb_reserve(skb, 2); /* Align IP on 16 byte boundaries */ 1295 skb_reserve(skb, 2); /* Align IP on 16 byte boundaries */
1297 /* 'skb_put()' points to the start of sk_buff data area. */ 1296 /* 'skb_put()' points to the start of sk_buff data area. */
1298 insl(ioaddr + RX_FIFO, 1297 insl(ioaddr + RX_FIFO,
@@ -1363,7 +1362,6 @@ static int boomerang_rx(struct net_device *dev)
1363 copying to a properly sized skbuff. */ 1362 copying to a properly sized skbuff. */
1364 if (pkt_len < rx_copybreak 1363 if (pkt_len < rx_copybreak
1365 && (skb = dev_alloc_skb(pkt_len + 4)) != 0) { 1364 && (skb = dev_alloc_skb(pkt_len + 4)) != 0) {
1366 skb->dev = dev;
1367 skb_reserve(skb, 2); /* Align IP on 16 byte boundaries */ 1365 skb_reserve(skb, 2); /* Align IP on 16 byte boundaries */
1368 /* 'skb_put()' points to the start of sk_buff data area. */ 1366 /* 'skb_put()' points to the start of sk_buff data area. */
1369 memcpy(skb_put(skb, pkt_len), 1367 memcpy(skb_put(skb, pkt_len),
diff --git a/drivers/net/3c523.c b/drivers/net/3c523.c
index 17d61eb0a7e5..da1a22c13865 100644
--- a/drivers/net/3c523.c
+++ b/drivers/net/3c523.c
@@ -988,7 +988,6 @@ static void elmc_rcv_int(struct net_device *dev)
988 rbd->status = 0; 988 rbd->status = 0;
989 skb = (struct sk_buff *) dev_alloc_skb(totlen + 2); 989 skb = (struct sk_buff *) dev_alloc_skb(totlen + 2);
990 if (skb != NULL) { 990 if (skb != NULL) {
991 skb->dev = dev;
992 skb_reserve(skb, 2); /* 16 byte alignment */ 991 skb_reserve(skb, 2); /* 16 byte alignment */
993 skb_put(skb,totlen); 992 skb_put(skb,totlen);
994 eth_copy_and_sum(skb, (char *) p->base+(unsigned long) rbd->buffer,totlen,0); 993 eth_copy_and_sum(skb, (char *) p->base+(unsigned long) rbd->buffer,totlen,0);
@@ -1146,7 +1145,7 @@ static int elmc_send_packet(struct sk_buff *skb, struct net_device *dev)
1146 1145
1147 if (len != skb->len) 1146 if (len != skb->len)
1148 memset((char *) p->xmit_cbuffs[p->xmit_count], 0, ETH_ZLEN); 1147 memset((char *) p->xmit_cbuffs[p->xmit_count], 0, ETH_ZLEN);
1149 memcpy((char *) p->xmit_cbuffs[p->xmit_count], (char *) (skb->data), skb->len); 1148 skb_copy_from_linear_data(skb, (char *) p->xmit_cbuffs[p->xmit_count], skb->len);
1150 1149
1151#if (NUM_XMIT_BUFFS == 1) 1150#if (NUM_XMIT_BUFFS == 1)
1152#ifdef NO_NOPCOMMANDS 1151#ifdef NO_NOPCOMMANDS
diff --git a/drivers/net/3c527.c b/drivers/net/3c527.c
index 6c7437e60bd2..c7b571be20e0 100644
--- a/drivers/net/3c527.c
+++ b/drivers/net/3c527.c
@@ -1189,7 +1189,6 @@ static void mc32_rx_ring(struct net_device *dev)
1189 } 1189 }
1190 1190
1191 skb->protocol=eth_type_trans(skb,dev); 1191 skb->protocol=eth_type_trans(skb,dev);
1192 skb->dev=dev;
1193 dev->last_rx = jiffies; 1192 dev->last_rx = jiffies;
1194 lp->net_stats.rx_packets++; 1193 lp->net_stats.rx_packets++;
1195 lp->net_stats.rx_bytes += length; 1194 lp->net_stats.rx_bytes += length;
diff --git a/drivers/net/3c59x.c b/drivers/net/3c59x.c
index b406ecfa7268..80924f76dee8 100644
--- a/drivers/net/3c59x.c
+++ b/drivers/net/3c59x.c
@@ -2414,7 +2414,6 @@ static int vortex_rx(struct net_device *dev)
2414 printk(KERN_DEBUG "Receiving packet size %d status %4.4x.\n", 2414 printk(KERN_DEBUG "Receiving packet size %d status %4.4x.\n",
2415 pkt_len, rx_status); 2415 pkt_len, rx_status);
2416 if (skb != NULL) { 2416 if (skb != NULL) {
2417 skb->dev = dev;
2418 skb_reserve(skb, 2); /* Align IP on 16 byte boundaries */ 2417 skb_reserve(skb, 2); /* Align IP on 16 byte boundaries */
2419 /* 'skb_put()' points to the start of sk_buff data area. */ 2418 /* 'skb_put()' points to the start of sk_buff data area. */
2420 if (vp->bus_master && 2419 if (vp->bus_master &&
@@ -2491,7 +2490,6 @@ boomerang_rx(struct net_device *dev)
2491 /* Check if the packet is long enough to just accept without 2490 /* Check if the packet is long enough to just accept without
2492 copying to a properly sized skbuff. */ 2491 copying to a properly sized skbuff. */
2493 if (pkt_len < rx_copybreak && (skb = dev_alloc_skb(pkt_len + 2)) != 0) { 2492 if (pkt_len < rx_copybreak && (skb = dev_alloc_skb(pkt_len + 2)) != 0) {
2494 skb->dev = dev;
2495 skb_reserve(skb, 2); /* Align IP on 16 byte boundaries */ 2493 skb_reserve(skb, 2); /* Align IP on 16 byte boundaries */
2496 pci_dma_sync_single_for_cpu(VORTEX_PCI(vp), dma, PKT_BUF_SZ, PCI_DMA_FROMDEVICE); 2494 pci_dma_sync_single_for_cpu(VORTEX_PCI(vp), dma, PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
2497 /* 'skb_put()' points to the start of sk_buff data area. */ 2495 /* 'skb_put()' points to the start of sk_buff data area. */
diff --git a/drivers/net/7990.c b/drivers/net/7990.c
index 1b3d11ed6cff..d396f996af57 100644
--- a/drivers/net/7990.c
+++ b/drivers/net/7990.c
@@ -331,7 +331,6 @@ static int lance_rx (struct net_device *dev)
331 return 0; 331 return 0;
332 } 332 }
333 333
334 skb->dev = dev;
335 skb_reserve (skb, 2); /* 16 byte align */ 334 skb_reserve (skb, 2); /* 16 byte align */
336 skb_put (skb, len); /* make room */ 335 skb_put (skb, len); /* make room */
337 eth_copy_and_sum(skb, 336 eth_copy_and_sum(skb,
@@ -568,7 +567,7 @@ int lance_start_xmit (struct sk_buff *skb, struct net_device *dev)
568 567
569 if (skb->len < ETH_ZLEN) 568 if (skb->len < ETH_ZLEN)
570 memset((char *)&ib->tx_buf[entry][0], 0, ETH_ZLEN); 569 memset((char *)&ib->tx_buf[entry][0], 0, ETH_ZLEN);
571 memcpy ((char *)&ib->tx_buf [entry][0], skb->data, skblen); 570 skb_copy_from_linear_data(skb, &ib->tx_buf[entry][0], skblen);
572 571
573 /* Now, give the packet to the lance */ 572 /* Now, give the packet to the lance */
574 ib->btx_ring [entry].tmd1_bits = (LE_T1_POK|LE_T1_OWN); 573 ib->btx_ring [entry].tmd1_bits = (LE_T1_POK|LE_T1_OWN);
diff --git a/drivers/net/8139cp.c b/drivers/net/8139cp.c
index 12c8453f44bc..e8c9f27817b0 100644
--- a/drivers/net/8139cp.c
+++ b/drivers/net/8139cp.c
@@ -573,7 +573,6 @@ rx_status_loop:
573 } 573 }
574 574
575 skb_reserve(new_skb, RX_OFFSET); 575 skb_reserve(new_skb, RX_OFFSET);
576 new_skb->dev = dev;
577 576
578 pci_unmap_single(cp->pdev, mapping, 577 pci_unmap_single(cp->pdev, mapping,
579 buflen, PCI_DMA_FROMDEVICE); 578 buflen, PCI_DMA_FROMDEVICE);
@@ -807,7 +806,7 @@ static int cp_start_xmit (struct sk_buff *skb, struct net_device *dev)
807 if (mss) 806 if (mss)
808 flags |= LargeSend | ((mss & MSSMask) << MSSShift); 807 flags |= LargeSend | ((mss & MSSMask) << MSSShift);
809 else if (skb->ip_summed == CHECKSUM_PARTIAL) { 808 else if (skb->ip_summed == CHECKSUM_PARTIAL) {
810 const struct iphdr *ip = skb->nh.iph; 809 const struct iphdr *ip = ip_hdr(skb);
811 if (ip->protocol == IPPROTO_TCP) 810 if (ip->protocol == IPPROTO_TCP)
812 flags |= IPCS | TCPCS; 811 flags |= IPCS | TCPCS;
813 else if (ip->protocol == IPPROTO_UDP) 812 else if (ip->protocol == IPPROTO_UDP)
@@ -826,7 +825,7 @@ static int cp_start_xmit (struct sk_buff *skb, struct net_device *dev)
826 u32 first_len, first_eor; 825 u32 first_len, first_eor;
827 dma_addr_t first_mapping; 826 dma_addr_t first_mapping;
828 int frag, first_entry = entry; 827 int frag, first_entry = entry;
829 const struct iphdr *ip = skb->nh.iph; 828 const struct iphdr *ip = ip_hdr(skb);
830 829
831 /* We must give this initial chunk to the device last. 830 /* We must give this initial chunk to the device last.
832 * Otherwise we could race with the device. 831 * Otherwise we could race with the device.
@@ -1082,7 +1081,6 @@ static int cp_refill_rx (struct cp_private *cp)
1082 if (!skb) 1081 if (!skb)
1083 goto err_out; 1082 goto err_out;
1084 1083
1085 skb->dev = cp->dev;
1086 skb_reserve(skb, RX_OFFSET); 1084 skb_reserve(skb, RX_OFFSET);
1087 1085
1088 mapping = pci_map_single(cp->pdev, skb->data, cp->rx_buf_sz, 1086 mapping = pci_map_single(cp->pdev, skb->data, cp->rx_buf_sz,
diff --git a/drivers/net/8139too.c b/drivers/net/8139too.c
index 99304b2aa86e..a844b1fe2dc4 100644
--- a/drivers/net/8139too.c
+++ b/drivers/net/8139too.c
@@ -1904,10 +1904,10 @@ static __inline__ void wrap_copy(struct sk_buff *skb, const unsigned char *ring,
1904 u32 left = RX_BUF_LEN - offset; 1904 u32 left = RX_BUF_LEN - offset;
1905 1905
1906 if (size > left) { 1906 if (size > left) {
1907 memcpy(skb->data, ring + offset, left); 1907 skb_copy_to_linear_data(skb, ring + offset, left);
1908 memcpy(skb->data+left, ring, size - left); 1908 skb_copy_to_linear_data_offset(skb, left, ring, size - left);
1909 } else 1909 } else
1910 memcpy(skb->data, ring + offset, size); 1910 skb_copy_to_linear_data(skb, ring + offset, size);
1911} 1911}
1912#endif 1912#endif
1913 1913
@@ -2013,7 +2013,6 @@ no_early_rx:
2013 2013
2014 skb = dev_alloc_skb (pkt_size + 2); 2014 skb = dev_alloc_skb (pkt_size + 2);
2015 if (likely(skb)) { 2015 if (likely(skb)) {
2016 skb->dev = dev;
2017 skb_reserve (skb, 2); /* 16 byte align the IP fields. */ 2016 skb_reserve (skb, 2); /* 16 byte align the IP fields. */
2018#if RX_BUF_IDX == 3 2017#if RX_BUF_IDX == 3
2019 wrap_copy(skb, rx_ring, ring_offset+4, pkt_size); 2018 wrap_copy(skb, rx_ring, ring_offset+4, pkt_size);
diff --git a/drivers/net/82596.c b/drivers/net/82596.c
index 640d7ca2ebcf..3ff1155459a3 100644
--- a/drivers/net/82596.c
+++ b/drivers/net/82596.c
@@ -830,7 +830,6 @@ memory_squeeze:
830 lp->stats.rx_dropped++; 830 lp->stats.rx_dropped++;
831 } 831 }
832 else { 832 else {
833 skb->dev = dev;
834 if (!rx_in_place) { 833 if (!rx_in_place) {
835 /* 16 byte align the data fields */ 834 /* 16 byte align the data fields */
836 skb_reserve(skb, 2); 835 skb_reserve(skb, 2);
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 33af833667da..58527322a39d 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -206,7 +206,7 @@ obj-$(CONFIG_TR) += tokenring/
206obj-$(CONFIG_WAN) += wan/ 206obj-$(CONFIG_WAN) += wan/
207obj-$(CONFIG_ARCNET) += arcnet/ 207obj-$(CONFIG_ARCNET) += arcnet/
208obj-$(CONFIG_NET_PCMCIA) += pcmcia/ 208obj-$(CONFIG_NET_PCMCIA) += pcmcia/
209obj-$(CONFIG_NET_RADIO) += wireless/ 209obj-y += wireless/
210obj-$(CONFIG_NET_TULIP) += tulip/ 210obj-$(CONFIG_NET_TULIP) += tulip/
211obj-$(CONFIG_HAMRADIO) += hamradio/ 211obj-$(CONFIG_HAMRADIO) += hamradio/
212obj-$(CONFIG_IRDA) += irda/ 212obj-$(CONFIG_IRDA) += irda/
diff --git a/drivers/net/a2065.c b/drivers/net/a2065.c
index d76548e75350..1226cbba0450 100644
--- a/drivers/net/a2065.c
+++ b/drivers/net/a2065.c
@@ -320,7 +320,6 @@ static int lance_rx (struct net_device *dev)
320 return 0; 320 return 0;
321 } 321 }
322 322
323 skb->dev = dev;
324 skb_reserve (skb, 2); /* 16 byte align */ 323 skb_reserve (skb, 2); /* 16 byte align */
325 skb_put (skb, len); /* make room */ 324 skb_put (skb, len); /* make room */
326 eth_copy_and_sum(skb, 325 eth_copy_and_sum(skb,
@@ -599,7 +598,7 @@ static int lance_start_xmit (struct sk_buff *skb, struct net_device *dev)
599 ib->btx_ring [entry].length = (-len) | 0xf000; 598 ib->btx_ring [entry].length = (-len) | 0xf000;
600 ib->btx_ring [entry].misc = 0; 599 ib->btx_ring [entry].misc = 0;
601 600
602 memcpy ((char *)&ib->tx_buf [entry][0], skb->data, skblen); 601 skb_copy_from_linear_data(skb, &ib->tx_buf [entry][0], skblen);
603 602
604 /* Clear the slack of the packet, do I need this? */ 603 /* Clear the slack of the packet, do I need this? */
605 if (len != skblen) 604 if (len != skblen)
diff --git a/drivers/net/acenic.c b/drivers/net/acenic.c
index 7138e0e025bc..7122b7ba8d61 100644
--- a/drivers/net/acenic.c
+++ b/drivers/net/acenic.c
@@ -2027,7 +2027,6 @@ static void ace_rx_int(struct net_device *dev, u32 rxretprd, u32 rxretcsm)
2027 */ 2027 */
2028 csum = retdesc->tcp_udp_csum; 2028 csum = retdesc->tcp_udp_csum;
2029 2029
2030 skb->dev = dev;
2031 skb->protocol = eth_type_trans(skb, dev); 2030 skb->protocol = eth_type_trans(skb, dev);
2032 2031
2033 /* 2032 /*
diff --git a/drivers/net/amd8111e.c b/drivers/net/amd8111e.c
index 962c954c2d56..675fe918421b 100644
--- a/drivers/net/amd8111e.c
+++ b/drivers/net/amd8111e.c
@@ -798,9 +798,7 @@ static int amd8111e_rx_poll(struct net_device *dev, int * budget)
798 pci_unmap_single(lp->pci_dev,lp->rx_dma_addr[rx_index], 798 pci_unmap_single(lp->pci_dev,lp->rx_dma_addr[rx_index],
799 lp->rx_buff_len-2, PCI_DMA_FROMDEVICE); 799 lp->rx_buff_len-2, PCI_DMA_FROMDEVICE);
800 skb_put(skb, pkt_len); 800 skb_put(skb, pkt_len);
801 skb->dev = dev;
802 lp->rx_skbuff[rx_index] = new_skb; 801 lp->rx_skbuff[rx_index] = new_skb;
803 new_skb->dev = dev;
804 lp->rx_dma_addr[rx_index] = pci_map_single(lp->pci_dev, 802 lp->rx_dma_addr[rx_index] = pci_map_single(lp->pci_dev,
805 new_skb->data, 803 new_skb->data,
806 lp->rx_buff_len-2, 804 lp->rx_buff_len-2,
@@ -926,9 +924,7 @@ static int amd8111e_rx(struct net_device *dev)
926 pci_unmap_single(lp->pci_dev,lp->rx_dma_addr[rx_index], 924 pci_unmap_single(lp->pci_dev,lp->rx_dma_addr[rx_index],
927 lp->rx_buff_len-2, PCI_DMA_FROMDEVICE); 925 lp->rx_buff_len-2, PCI_DMA_FROMDEVICE);
928 skb_put(skb, pkt_len); 926 skb_put(skb, pkt_len);
929 skb->dev = dev;
930 lp->rx_skbuff[rx_index] = new_skb; 927 lp->rx_skbuff[rx_index] = new_skb;
931 new_skb->dev = dev;
932 lp->rx_dma_addr[rx_index] = pci_map_single(lp->pci_dev, 928 lp->rx_dma_addr[rx_index] = pci_map_single(lp->pci_dev,
933 new_skb->data, lp->rx_buff_len-2,PCI_DMA_FROMDEVICE); 929 new_skb->data, lp->rx_buff_len-2,PCI_DMA_FROMDEVICE);
934 930
diff --git a/drivers/net/appletalk/cops.c b/drivers/net/appletalk/cops.c
index dba5e5165452..da6ffa8cd81e 100644
--- a/drivers/net/appletalk/cops.c
+++ b/drivers/net/appletalk/cops.c
@@ -853,9 +853,9 @@ static void cops_rx(struct net_device *dev)
853 return; 853 return;
854 } 854 }
855 855
856 skb->mac.raw = skb->data; /* Point to entire packet. */ 856 skb_reset_mac_header(skb); /* Point to entire packet. */
857 skb_pull(skb,3); 857 skb_pull(skb,3);
858 skb->h.raw = skb->data; /* Point to data (Skip header). */ 858 skb_reset_transport_header(skb); /* Point to data (Skip header). */
859 859
860 /* Update the counters. */ 860 /* Update the counters. */
861 lp->stats.rx_packets++; 861 lp->stats.rx_packets++;
diff --git a/drivers/net/appletalk/ltpc.c b/drivers/net/appletalk/ltpc.c
index 2ea44ce49810..6a6cbd331a16 100644
--- a/drivers/net/appletalk/ltpc.c
+++ b/drivers/net/appletalk/ltpc.c
@@ -770,13 +770,13 @@ static int sendup_buffer (struct net_device *dev)
770 skb->data[0] = dnode; 770 skb->data[0] = dnode;
771 skb->data[1] = snode; 771 skb->data[1] = snode;
772 skb->data[2] = llaptype; 772 skb->data[2] = llaptype;
773 skb->mac.raw = skb->data; /* save pointer to llap header */ 773 skb_reset_mac_header(skb); /* save pointer to llap header */
774 skb_pull(skb,3); 774 skb_pull(skb,3);
775 775
776 /* copy ddp(s,e)hdr + contents */ 776 /* copy ddp(s,e)hdr + contents */
777 memcpy(skb->data,(void*)ltdmabuf,len); 777 skb_copy_to_linear_data(skb, ltdmabuf, len);
778 778
779 skb->h.raw = skb->data; 779 skb_reset_transport_header(skb);
780 780
781 stats->rx_packets++; 781 stats->rx_packets++;
782 stats->rx_bytes+=skb->len; 782 stats->rx_bytes+=skb->len;
@@ -917,13 +917,14 @@ static int ltpc_xmit(struct sk_buff *skb, struct net_device *dev)
917 917
918 int i; 918 int i;
919 struct lt_sendlap cbuf; 919 struct lt_sendlap cbuf;
920 unsigned char *hdr;
920 921
921 cbuf.command = LT_SENDLAP; 922 cbuf.command = LT_SENDLAP;
922 cbuf.dnode = skb->data[0]; 923 cbuf.dnode = skb->data[0];
923 cbuf.laptype = skb->data[2]; 924 cbuf.laptype = skb->data[2];
924 skb_pull(skb,3); /* skip past LLAP header */ 925 skb_pull(skb,3); /* skip past LLAP header */
925 cbuf.length = skb->len; /* this is host order */ 926 cbuf.length = skb->len; /* this is host order */
926 skb->h.raw=skb->data; 927 skb_reset_transport_header(skb);
927 928
928 if(debug & DEBUG_UPPER) { 929 if(debug & DEBUG_UPPER) {
929 printk("command "); 930 printk("command ");
@@ -932,11 +933,13 @@ static int ltpc_xmit(struct sk_buff *skb, struct net_device *dev)
932 printk("\n"); 933 printk("\n");
933 } 934 }
934 935
935 do_write(dev,&cbuf,sizeof(cbuf),skb->h.raw,skb->len); 936 hdr = skb_transport_header(skb);
937 do_write(dev, &cbuf, sizeof(cbuf), hdr, skb->len);
936 938
937 if(debug & DEBUG_UPPER) { 939 if(debug & DEBUG_UPPER) {
938 printk("sent %d ddp bytes\n",skb->len); 940 printk("sent %d ddp bytes\n",skb->len);
939 for(i=0;i<skb->len;i++) printk("%02x ",skb->h.raw[i]); 941 for (i = 0; i < skb->len; i++)
942 printk("%02x ", hdr[i]);
940 printk("\n"); 943 printk("\n");
941 } 944 }
942 945
diff --git a/drivers/net/arcnet/arc-rawmode.c b/drivers/net/arcnet/arc-rawmode.c
index 6318814a11a8..e0a18e7c73cb 100644
--- a/drivers/net/arcnet/arc-rawmode.c
+++ b/drivers/net/arcnet/arc-rawmode.c
@@ -110,7 +110,7 @@ static void rx(struct net_device *dev, int bufnum,
110 110
111 pkt = (struct archdr *) skb->data; 111 pkt = (struct archdr *) skb->data;
112 112
113 skb->mac.raw = skb->data; 113 skb_reset_mac_header(skb);
114 skb_pull(skb, ARC_HDR_SIZE); 114 skb_pull(skb, ARC_HDR_SIZE);
115 115
116 /* up to sizeof(pkt->soft) has already been copied from the card */ 116 /* up to sizeof(pkt->soft) has already been copied from the card */
diff --git a/drivers/net/arcnet/arcnet.c b/drivers/net/arcnet/arcnet.c
index 83004fdab0a4..681e20b8466f 100644
--- a/drivers/net/arcnet/arcnet.c
+++ b/drivers/net/arcnet/arcnet.c
@@ -519,9 +519,12 @@ static int arcnet_header(struct sk_buff *skb, struct net_device *dev,
519 * real header when we do rebuild_header. 519 * real header when we do rebuild_header.
520 */ 520 */
521 *(uint16_t *) skb_push(skb, 2) = type; 521 *(uint16_t *) skb_push(skb, 2) = type;
522 if (skb->nh.raw - skb->mac.raw != 2) 522 /*
523 * XXX: Why not use skb->mac_len?
524 */
525 if (skb->network_header - skb->mac_header != 2)
523 BUGMSG(D_NORMAL, "arcnet_header: Yikes! diff (%d) is not 2!\n", 526 BUGMSG(D_NORMAL, "arcnet_header: Yikes! diff (%d) is not 2!\n",
524 (int)(skb->nh.raw - skb->mac.raw)); 527 (int)(skb->network_header - skb->mac_header));
525 return -2; /* return error -- can't transmit yet! */ 528 return -2; /* return error -- can't transmit yet! */
526 } 529 }
527 else { 530 else {
@@ -554,11 +557,13 @@ static int arcnet_rebuild_header(struct sk_buff *skb)
554 unsigned short type; 557 unsigned short type;
555 uint8_t daddr=0; 558 uint8_t daddr=0;
556 struct ArcProto *proto; 559 struct ArcProto *proto;
557 560 /*
558 if (skb->nh.raw - skb->mac.raw != 2) { 561 * XXX: Why not use skb->mac_len?
562 */
563 if (skb->network_header - skb->mac_header != 2) {
559 BUGMSG(D_NORMAL, 564 BUGMSG(D_NORMAL,
560 "rebuild_header: shouldn't be here! (hdrsize=%d)\n", 565 "rebuild_header: shouldn't be here! (hdrsize=%d)\n",
561 (int)(skb->nh.raw - skb->mac.raw)); 566 (int)(skb->network_header - skb->mac_header));
562 return 0; 567 return 0;
563 } 568 }
564 type = *(uint16_t *) skb_pull(skb, 2); 569 type = *(uint16_t *) skb_pull(skb, 2);
diff --git a/drivers/net/arcnet/capmode.c b/drivers/net/arcnet/capmode.c
index 66485585ab39..cc4610db6395 100644
--- a/drivers/net/arcnet/capmode.c
+++ b/drivers/net/arcnet/capmode.c
@@ -122,10 +122,8 @@ static void rx(struct net_device *dev, int bufnum,
122 } 122 }
123 skb_put(skb, length + ARC_HDR_SIZE + sizeof(int)); 123 skb_put(skb, length + ARC_HDR_SIZE + sizeof(int));
124 skb->dev = dev; 124 skb->dev = dev;
125 125 skb_reset_mac_header(skb);
126 pkt = (struct archdr *) skb->data; 126 pkt = (struct archdr *)skb_mac_header(skb);
127
128 skb->mac.raw = skb->data;
129 skb_pull(skb, ARC_HDR_SIZE); 127 skb_pull(skb, ARC_HDR_SIZE);
130 128
131 /* up to sizeof(pkt->soft) has already been copied from the card */ 129 /* up to sizeof(pkt->soft) has already been copied from the card */
@@ -270,13 +268,13 @@ static int ack_tx(struct net_device *dev, int acked)
270 skb_put(ackskb, length + ARC_HDR_SIZE ); 268 skb_put(ackskb, length + ARC_HDR_SIZE );
271 ackskb->dev = dev; 269 ackskb->dev = dev;
272 270
273 ackpkt = (struct archdr *) ackskb->data; 271 skb_reset_mac_header(ackskb);
274 272 ackpkt = (struct archdr *)skb_mac_header(ackskb);
275 ackskb->mac.raw = ackskb->data;
276 /* skb_pull(ackskb, ARC_HDR_SIZE); */ 273 /* skb_pull(ackskb, ARC_HDR_SIZE); */
277 274
278 275
279 memcpy(ackpkt, lp->outgoing.skb->data, ARC_HDR_SIZE+sizeof(struct arc_cap)); 276 skb_copy_from_linear_data(lp->outgoing.skb, ackpkt,
277 ARC_HDR_SIZE + sizeof(struct arc_cap));
280 ackpkt->soft.cap.proto=0; /* using protocol 0 for acknowledge */ 278 ackpkt->soft.cap.proto=0; /* using protocol 0 for acknowledge */
281 ackpkt->soft.cap.mes.ack=acked; 279 ackpkt->soft.cap.mes.ack=acked;
282 280
diff --git a/drivers/net/arcnet/rfc1051.c b/drivers/net/arcnet/rfc1051.c
index 6d6c69f036ef..2de8877ece29 100644
--- a/drivers/net/arcnet/rfc1051.c
+++ b/drivers/net/arcnet/rfc1051.c
@@ -94,7 +94,7 @@ static unsigned short type_trans(struct sk_buff *skb, struct net_device *dev)
94 int hdr_size = ARC_HDR_SIZE + RFC1051_HDR_SIZE; 94 int hdr_size = ARC_HDR_SIZE + RFC1051_HDR_SIZE;
95 95
96 /* Pull off the arcnet header. */ 96 /* Pull off the arcnet header. */
97 skb->mac.raw = skb->data; 97 skb_reset_mac_header(skb);
98 skb_pull(skb, hdr_size); 98 skb_pull(skb, hdr_size);
99 99
100 if (pkt->hard.dest == 0) 100 if (pkt->hard.dest == 0)
diff --git a/drivers/net/arcnet/rfc1201.c b/drivers/net/arcnet/rfc1201.c
index bee34226abfa..460a095000c2 100644
--- a/drivers/net/arcnet/rfc1201.c
+++ b/drivers/net/arcnet/rfc1201.c
@@ -96,7 +96,7 @@ static unsigned short type_trans(struct sk_buff *skb, struct net_device *dev)
96 int hdr_size = ARC_HDR_SIZE + RFC1201_HDR_SIZE; 96 int hdr_size = ARC_HDR_SIZE + RFC1201_HDR_SIZE;
97 97
98 /* Pull off the arcnet header. */ 98 /* Pull off the arcnet header. */
99 skb->mac.raw = skb->data; 99 skb_reset_mac_header(skb);
100 skb_pull(skb, hdr_size); 100 skb_pull(skb, hdr_size);
101 101
102 if (pkt->hard.dest == 0) 102 if (pkt->hard.dest == 0)
diff --git a/drivers/net/ariadne.c b/drivers/net/ariadne.c
index 9dfc09b181c1..a0e68e718531 100644
--- a/drivers/net/ariadne.c
+++ b/drivers/net/ariadne.c
@@ -743,7 +743,6 @@ static int ariadne_rx(struct net_device *dev)
743 } 743 }
744 744
745 745
746 skb->dev = dev;
747 skb_reserve(skb,2); /* 16 byte align */ 746 skb_reserve(skb,2); /* 16 byte align */
748 skb_put(skb,pkt_len); /* Make room */ 747 skb_put(skb,pkt_len); /* Make room */
749 eth_copy_and_sum(skb, (char *)priv->rx_buff[entry], pkt_len,0); 748 eth_copy_and_sum(skb, (char *)priv->rx_buff[entry], pkt_len,0);
diff --git a/drivers/net/arm/am79c961a.c b/drivers/net/arm/am79c961a.c
index ddd12d44ff22..8f0d7ce503c9 100644
--- a/drivers/net/arm/am79c961a.c
+++ b/drivers/net/arm/am79c961a.c
@@ -526,7 +526,6 @@ am79c961_rx(struct net_device *dev, struct dev_priv *priv)
526 skb = dev_alloc_skb(len + 2); 526 skb = dev_alloc_skb(len + 2);
527 527
528 if (skb) { 528 if (skb) {
529 skb->dev = dev;
530 skb_reserve(skb, 2); 529 skb_reserve(skb, 2);
531 530
532 am_readbuffer(dev, pktaddr, skb_put(skb, len), len); 531 am_readbuffer(dev, pktaddr, skb_put(skb, len), len);
diff --git a/drivers/net/arm/at91_ether.c b/drivers/net/arm/at91_ether.c
index 1621b8fe35cf..152fa7a042b8 100644
--- a/drivers/net/arm/at91_ether.c
+++ b/drivers/net/arm/at91_ether.c
@@ -858,7 +858,6 @@ static void at91ether_rx(struct net_device *dev)
858 skb_reserve(skb, 2); 858 skb_reserve(skb, 2);
859 memcpy(skb_put(skb, pktlen), p_recv, pktlen); 859 memcpy(skb_put(skb, pktlen), p_recv, pktlen);
860 860
861 skb->dev = dev;
862 skb->protocol = eth_type_trans(skb, dev); 861 skb->protocol = eth_type_trans(skb, dev);
863 dev->last_rx = jiffies; 862 dev->last_rx = jiffies;
864 lp->stats.rx_bytes += pktlen; 863 lp->stats.rx_bytes += pktlen;
diff --git a/drivers/net/arm/ep93xx_eth.c b/drivers/net/arm/ep93xx_eth.c
index dd698b033a62..2438c5bff237 100644
--- a/drivers/net/arm/ep93xx_eth.c
+++ b/drivers/net/arm/ep93xx_eth.c
@@ -255,7 +255,6 @@ static int ep93xx_rx(struct net_device *dev, int *budget)
255 255
256 skb = dev_alloc_skb(length + 2); 256 skb = dev_alloc_skb(length + 2);
257 if (likely(skb != NULL)) { 257 if (likely(skb != NULL)) {
258 skb->dev = dev;
259 skb_reserve(skb, 2); 258 skb_reserve(skb, 2);
260 dma_sync_single(NULL, ep->descs->rdesc[entry].buf_addr, 259 dma_sync_single(NULL, ep->descs->rdesc[entry].buf_addr,
261 length, DMA_FROM_DEVICE); 260 length, DMA_FROM_DEVICE);
diff --git a/drivers/net/arm/ether1.c b/drivers/net/arm/ether1.c
index a2921882eba8..f075cebe84ad 100644
--- a/drivers/net/arm/ether1.c
+++ b/drivers/net/arm/ether1.c
@@ -875,7 +875,6 @@ ether1_recv_done (struct net_device *dev)
875 skb = dev_alloc_skb (length + 2); 875 skb = dev_alloc_skb (length + 2);
876 876
877 if (skb) { 877 if (skb) {
878 skb->dev = dev;
879 skb_reserve (skb, 2); 878 skb_reserve (skb, 2);
880 879
881 ether1_readbuffer (dev, skb_put (skb, length), rbd.rbd_bufl, length); 880 ether1_readbuffer (dev, skb_put (skb, length), rbd.rbd_bufl, length);
diff --git a/drivers/net/arm/ether3.c b/drivers/net/arm/ether3.c
index 841178343a07..32da2eb9bcee 100644
--- a/drivers/net/arm/ether3.c
+++ b/drivers/net/arm/ether3.c
@@ -661,7 +661,6 @@ if (next_ptr < RX_START || next_ptr >= RX_END) {
661 if (skb) { 661 if (skb) {
662 unsigned char *buf; 662 unsigned char *buf;
663 663
664 skb->dev = dev;
665 skb_reserve(skb, 2); 664 skb_reserve(skb, 2);
666 buf = skb_put(skb, length); 665 buf = skb_put(skb, length);
667 ether3_readbuffer(dev, buf + 12, length - 12); 666 ether3_readbuffer(dev, buf + 12, length - 12);
diff --git a/drivers/net/at1700.c b/drivers/net/at1700.c
index 56ae8babd919..bed8e0ebaf19 100644
--- a/drivers/net/at1700.c
+++ b/drivers/net/at1700.c
@@ -768,7 +768,6 @@ net_rx(struct net_device *dev)
768 lp->stats.rx_dropped++; 768 lp->stats.rx_dropped++;
769 break; 769 break;
770 } 770 }
771 skb->dev = dev;
772 skb_reserve(skb,2); 771 skb_reserve(skb,2);
773 772
774 insw(ioaddr + DATAPORT, skb_put(skb,pkt_len), (pkt_len + 1) >> 1); 773 insw(ioaddr + DATAPORT, skb_put(skb,pkt_len), (pkt_len + 1) >> 1);
diff --git a/drivers/net/atari_bionet.c b/drivers/net/atari_bionet.c
index 4e3bf6a1f22c..3d87bd2b4194 100644
--- a/drivers/net/atari_bionet.c
+++ b/drivers/net/atari_bionet.c
@@ -453,7 +453,8 @@ bionet_send_packet(struct sk_buff *skb, struct net_device *dev) {
453 stdma_lock(bionet_intr, NULL); 453 stdma_lock(bionet_intr, NULL);
454 local_irq_restore(flags); 454 local_irq_restore(flags);
455 if( !STRAM_ADDR(buf+length-1) ) { 455 if( !STRAM_ADDR(buf+length-1) ) {
456 memcpy(nic_packet->buffer, skb->data, length); 456 skb_copy_from_linear_data(skb, nic_packet->buffer,
457 length);
457 buf = (unsigned long)&((struct nic_pkt_s *)phys_nic_packet)->buffer; 458 buf = (unsigned long)&((struct nic_pkt_s *)phys_nic_packet)->buffer;
458 } 459 }
459 460
@@ -544,13 +545,13 @@ bionet_poll_rx(struct net_device *dev) {
544 break; 545 break;
545 } 546 }
546 547
547 skb->dev = dev;
548 skb_reserve( skb, 2 ); /* 16 Byte align */ 548 skb_reserve( skb, 2 ); /* 16 Byte align */
549 skb_put( skb, pkt_len ); /* make room */ 549 skb_put( skb, pkt_len ); /* make room */
550 550
551 /* 'skb->data' points to the start of sk_buff data area. 551 /* 'skb->data' points to the start of sk_buff data area.
552 */ 552 */
553 memcpy(skb->data, nic_packet->buffer, pkt_len); 553 skb_copy_to_linear_data(skb, nic_packet->buffer,
554 pkt_len);
554 skb->protocol = eth_type_trans( skb, dev ); 555 skb->protocol = eth_type_trans( skb, dev );
555 netif_rx(skb); 556 netif_rx(skb);
556 dev->last_rx = jiffies; 557 dev->last_rx = jiffies;
diff --git a/drivers/net/atari_pamsnet.c b/drivers/net/atari_pamsnet.c
index 3b5436149286..54714409a09b 100644
--- a/drivers/net/atari_pamsnet.c
+++ b/drivers/net/atari_pamsnet.c
@@ -717,7 +717,8 @@ pamsnet_send_packet(struct sk_buff *skb, struct net_device *dev) {
717 717
718 local_irq_restore(flags); 718 local_irq_restore(flags);
719 if( !STRAM_ADDR(buf+length-1) ) { 719 if( !STRAM_ADDR(buf+length-1) ) {
720 memcpy(nic_packet->buffer, skb->data, length); 720 skb_copy_from_linear_data(skb, nic_packet->buffer,
721 length);
721 buf = (unsigned long)phys_nic_packet; 722 buf = (unsigned long)phys_nic_packet;
722 } 723 }
723 724
@@ -792,7 +793,8 @@ pamsnet_poll_rx(struct net_device *dev) {
792 793
793 /* 'skb->data' points to the start of sk_buff data area. 794 /* 'skb->data' points to the start of sk_buff data area.
794 */ 795 */
795 memcpy(skb->data, nic_packet->buffer, pkt_len); 796 skb_copy_to_linear_data(skb, nic_packet->buffer,
797 pkt_len);
796 netif_rx(skb); 798 netif_rx(skb);
797 dev->last_rx = jiffies; 799 dev->last_rx = jiffies;
798 lp->stats.rx_packets++; 800 lp->stats.rx_packets++;
diff --git a/drivers/net/atarilance.c b/drivers/net/atarilance.c
index 7e37ac86a69a..dfa8b9ba4c80 100644
--- a/drivers/net/atarilance.c
+++ b/drivers/net/atarilance.c
@@ -1047,7 +1047,6 @@ static int lance_rx( struct net_device *dev )
1047 pkt_len ); 1047 pkt_len );
1048 } 1048 }
1049 1049
1050 skb->dev = dev;
1051 skb_reserve( skb, 2 ); /* 16 byte align */ 1050 skb_reserve( skb, 2 ); /* 16 byte align */
1052 skb_put( skb, pkt_len ); /* Make room */ 1051 skb_put( skb, pkt_len ); /* Make room */
1053 lp->memcpy_f( skb->data, PKTBUF_ADDR(head), pkt_len ); 1052 lp->memcpy_f( skb->data, PKTBUF_ADDR(head), pkt_len );
diff --git a/drivers/net/atl1/atl1_main.c b/drivers/net/atl1/atl1_main.c
index 8606eac5bec8..4b1d4d153ecf 100644
--- a/drivers/net/atl1/atl1_main.c
+++ b/drivers/net/atl1/atl1_main.c
@@ -408,7 +408,6 @@ static void atl1_rx_checksum(struct atl1_adapter *adapter,
408static u16 atl1_alloc_rx_buffers(struct atl1_adapter *adapter) 408static u16 atl1_alloc_rx_buffers(struct atl1_adapter *adapter)
409{ 409{
410 struct atl1_rfd_ring *rfd_ring = &adapter->rfd_ring; 410 struct atl1_rfd_ring *rfd_ring = &adapter->rfd_ring;
411 struct net_device *netdev = adapter->netdev;
412 struct pci_dev *pdev = adapter->pdev; 411 struct pci_dev *pdev = adapter->pdev;
413 struct page *page; 412 struct page *page;
414 unsigned long offset; 413 unsigned long offset;
@@ -444,7 +443,6 @@ static u16 atl1_alloc_rx_buffers(struct atl1_adapter *adapter)
444 * the 14 byte MAC header is removed 443 * the 14 byte MAC header is removed
445 */ 444 */
446 skb_reserve(skb, NET_IP_ALIGN); 445 skb_reserve(skb, NET_IP_ALIGN);
447 skb->dev = netdev;
448 446
449 buffer_info->alloced = 1; 447 buffer_info->alloced = 1;
450 buffer_info->skb = skb; 448 buffer_info->skb = skb;
@@ -1296,19 +1294,21 @@ static int atl1_tso(struct atl1_adapter *adapter, struct sk_buff *skb,
1296 } 1294 }
1297 1295
1298 if (skb->protocol == ntohs(ETH_P_IP)) { 1296 if (skb->protocol == ntohs(ETH_P_IP)) {
1299 skb->nh.iph->tot_len = 0; 1297 struct iphdr *iph = ip_hdr(skb);
1300 skb->nh.iph->check = 0; 1298
1301 skb->h.th->check = 1299 iph->tot_len = 0;
1302 ~csum_tcpudp_magic(skb->nh.iph->saddr, 1300 iph->check = 0;
1303 skb->nh.iph->daddr, 0, 1301 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
1304 IPPROTO_TCP, 0); 1302 iph->daddr, 0,
1305 ipofst = skb->nh.raw - skb->data; 1303 IPPROTO_TCP,
1304 0);
1305 ipofst = skb_network_offset(skb);
1306 if (ipofst != ENET_HEADER_SIZE) /* 802.3 frame */ 1306 if (ipofst != ENET_HEADER_SIZE) /* 802.3 frame */
1307 tso->tsopl |= 1 << TSO_PARAM_ETHTYPE_SHIFT; 1307 tso->tsopl |= 1 << TSO_PARAM_ETHTYPE_SHIFT;
1308 1308
1309 tso->tsopl |= (skb->nh.iph->ihl & 1309 tso->tsopl |= (iph->ihl &
1310 CSUM_PARAM_IPHL_MASK) << CSUM_PARAM_IPHL_SHIFT; 1310 CSUM_PARAM_IPHL_MASK) << CSUM_PARAM_IPHL_SHIFT;
1311 tso->tsopl |= ((skb->h.th->doff << 2) & 1311 tso->tsopl |= (tcp_hdrlen(skb) &
1312 TSO_PARAM_TCPHDRLEN_MASK) << TSO_PARAM_TCPHDRLEN_SHIFT; 1312 TSO_PARAM_TCPHDRLEN_MASK) << TSO_PARAM_TCPHDRLEN_SHIFT;
1313 tso->tsopl |= (skb_shinfo(skb)->gso_size & 1313 tso->tsopl |= (skb_shinfo(skb)->gso_size &
1314 TSO_PARAM_MSS_MASK) << TSO_PARAM_MSS_SHIFT; 1314 TSO_PARAM_MSS_MASK) << TSO_PARAM_MSS_SHIFT;
@@ -1327,8 +1327,8 @@ static int atl1_tx_csum(struct atl1_adapter *adapter, struct sk_buff *skb,
1327 u8 css, cso; 1327 u8 css, cso;
1328 1328
1329 if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { 1329 if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
1330 cso = skb->h.raw - skb->data; 1330 cso = skb_transport_offset(skb);
1331 css = (skb->h.raw + skb->csum_offset) - skb->data; 1331 css = cso + skb->csum_offset;
1332 if (unlikely(cso & 0x1)) { 1332 if (unlikely(cso & 0x1)) {
1333 printk(KERN_DEBUG "%s: payload offset != even number\n", 1333 printk(KERN_DEBUG "%s: payload offset != even number\n",
1334 atl1_driver_name); 1334 atl1_driver_name);
@@ -1370,8 +1370,7 @@ static void atl1_tx_map(struct atl1_adapter *adapter,
1370 1370
1371 if (tcp_seg) { 1371 if (tcp_seg) {
1372 /* TSO/GSO */ 1372 /* TSO/GSO */
1373 proto_hdr_len = 1373 proto_hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
1374 ((skb->h.raw - skb->data) + (skb->h.th->doff << 2));
1375 buffer_info->length = proto_hdr_len; 1374 buffer_info->length = proto_hdr_len;
1376 page = virt_to_page(skb->data); 1375 page = virt_to_page(skb->data);
1377 offset = (unsigned long)skb->data & ~PAGE_MASK; 1376 offset = (unsigned long)skb->data & ~PAGE_MASK;
@@ -1563,8 +1562,8 @@ static int atl1_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
1563 mss = skb_shinfo(skb)->gso_size; 1562 mss = skb_shinfo(skb)->gso_size;
1564 if (mss) { 1563 if (mss) {
1565 if (skb->protocol == htons(ETH_P_IP)) { 1564 if (skb->protocol == htons(ETH_P_IP)) {
1566 proto_hdr_len = ((skb->h.raw - skb->data) + 1565 proto_hdr_len = (skb_transport_offset(skb) +
1567 (skb->h.th->doff << 2)); 1566 tcp_hdrlen(skb));
1568 if (unlikely(proto_hdr_len > len)) { 1567 if (unlikely(proto_hdr_len > len)) {
1569 dev_kfree_skb_any(skb); 1568 dev_kfree_skb_any(skb);
1570 return NETDEV_TX_OK; 1569 return NETDEV_TX_OK;
diff --git a/drivers/net/atp.c b/drivers/net/atp.c
index 2d306fcb7f36..18aba838c1ff 100644
--- a/drivers/net/atp.c
+++ b/drivers/net/atp.c
@@ -793,7 +793,6 @@ static void net_rx(struct net_device *dev)
793 lp->stats.rx_dropped++; 793 lp->stats.rx_dropped++;
794 goto done; 794 goto done;
795 } 795 }
796 skb->dev = dev;
797 796
798 skb_reserve(skb, 2); /* Align IP on 16 byte boundaries */ 797 skb_reserve(skb, 2); /* Align IP on 16 byte boundaries */
799 read_block(ioaddr, pkt_len, skb_put(skb,pkt_len), dev->if_port); 798 read_block(ioaddr, pkt_len, skb_put(skb,pkt_len), dev->if_port);
diff --git a/drivers/net/au1000_eth.c b/drivers/net/au1000_eth.c
index 69ae229b680e..d10fb80e9a63 100644
--- a/drivers/net/au1000_eth.c
+++ b/drivers/net/au1000_eth.c
@@ -1125,7 +1125,7 @@ static int au1000_tx(struct sk_buff *skb, struct net_device *dev)
1125 } 1125 }
1126 1126
1127 pDB = aup->tx_db_inuse[aup->tx_head]; 1127 pDB = aup->tx_db_inuse[aup->tx_head];
1128 memcpy((void *)pDB->vaddr, skb->data, skb->len); 1128 skb_copy_from_linear_data(skb, pDB->vaddr, skb->len);
1129 if (skb->len < ETH_ZLEN) { 1129 if (skb->len < ETH_ZLEN) {
1130 for (i=skb->len; i<ETH_ZLEN; i++) { 1130 for (i=skb->len; i<ETH_ZLEN; i++) {
1131 ((char *)pDB->vaddr)[i] = 0; 1131 ((char *)pDB->vaddr)[i] = 0;
@@ -1205,7 +1205,6 @@ static int au1000_rx(struct net_device *dev)
1205 aup->stats.rx_dropped++; 1205 aup->stats.rx_dropped++;
1206 continue; 1206 continue;
1207 } 1207 }
1208 skb->dev = dev;
1209 skb_reserve(skb, 2); /* 16 byte IP header align */ 1208 skb_reserve(skb, 2); /* 16 byte IP header align */
1210 eth_copy_and_sum(skb, 1209 eth_copy_and_sum(skb,
1211 (unsigned char *)pDB->vaddr, frmlen, 0); 1210 (unsigned char *)pDB->vaddr, frmlen, 0);
diff --git a/drivers/net/b44.c b/drivers/net/b44.c
index d742bfe24471..879a2fff474e 100644
--- a/drivers/net/b44.c
+++ b/drivers/net/b44.c
@@ -825,12 +825,11 @@ static int b44_rx(struct b44 *bp, int budget)
825 if (copy_skb == NULL) 825 if (copy_skb == NULL)
826 goto drop_it_no_recycle; 826 goto drop_it_no_recycle;
827 827
828 copy_skb->dev = bp->dev;
829 skb_reserve(copy_skb, 2); 828 skb_reserve(copy_skb, 2);
830 skb_put(copy_skb, len); 829 skb_put(copy_skb, len);
831 /* DMA sync done above, copy just the actual packet */ 830 /* DMA sync done above, copy just the actual packet */
832 memcpy(copy_skb->data, skb->data+bp->rx_offset, len); 831 skb_copy_from_linear_data_offset(skb, bp->rx_offset,
833 832 copy_skb->data, len);
834 skb = copy_skb; 833 skb = copy_skb;
835 } 834 }
836 skb->ip_summed = CHECKSUM_NONE; 835 skb->ip_summed = CHECKSUM_NONE;
@@ -1007,7 +1006,8 @@ static int b44_start_xmit(struct sk_buff *skb, struct net_device *dev)
1007 goto err_out; 1006 goto err_out;
1008 } 1007 }
1009 1008
1010 memcpy(skb_put(bounce_skb, len), skb->data, skb->len); 1009 skb_copy_from_linear_data(skb, skb_put(bounce_skb, len),
1010 skb->len);
1011 dev_kfree_skb_any(skb); 1011 dev_kfree_skb_any(skb);
1012 skb = bounce_skb; 1012 skb = bounce_skb;
1013 } 1013 }
diff --git a/drivers/net/bmac.c b/drivers/net/bmac.c
index c143304dcff5..4612725965df 100644
--- a/drivers/net/bmac.c
+++ b/drivers/net/bmac.c
@@ -715,7 +715,6 @@ static irqreturn_t bmac_rxdma_intr(int irq, void *dev_id)
715 if (skb != NULL) { 715 if (skb != NULL) {
716 nb -= ETHERCRC; 716 nb -= ETHERCRC;
717 skb_put(skb, nb); 717 skb_put(skb, nb);
718 skb->dev = dev;
719 skb->protocol = eth_type_trans(skb, dev); 718 skb->protocol = eth_type_trans(skb, dev);
720 netif_rx(skb); 719 netif_rx(skb);
721 dev->last_rx = jiffies; 720 dev->last_rx = jiffies;
diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index e85f5ec48f96..f98a2205a090 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -1884,10 +1884,8 @@ bnx2_rx_int(struct bnx2 *bp, int budget)
1884 goto reuse_rx; 1884 goto reuse_rx;
1885 1885
1886 /* aligned copy */ 1886 /* aligned copy */
1887 memcpy(new_skb->data, 1887 skb_copy_from_linear_data_offset(skb, bp->rx_offset - 2,
1888 skb->data + bp->rx_offset - 2, 1888 new_skb->data, len + 2);
1889 len + 2);
1890
1891 skb_reserve(new_skb, 2); 1889 skb_reserve(new_skb, 2);
1892 skb_put(new_skb, len); 1890 skb_put(new_skb, len);
1893 1891
@@ -4513,6 +4511,7 @@ bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev)
4513 if ((mss = skb_shinfo(skb)->gso_size) && 4511 if ((mss = skb_shinfo(skb)->gso_size) &&
4514 (skb->len > (bp->dev->mtu + ETH_HLEN))) { 4512 (skb->len > (bp->dev->mtu + ETH_HLEN))) {
4515 u32 tcp_opt_len, ip_tcp_len; 4513 u32 tcp_opt_len, ip_tcp_len;
4514 struct iphdr *iph;
4516 4515
4517 if (skb_header_cloned(skb) && 4516 if (skb_header_cloned(skb) &&
4518 pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) { 4517 pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) {
@@ -4520,25 +4519,23 @@ bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev)
4520 return NETDEV_TX_OK; 4519 return NETDEV_TX_OK;
4521 } 4520 }
4522 4521
4523 tcp_opt_len = ((skb->h.th->doff - 5) * 4);
4524 vlan_tag_flags |= TX_BD_FLAGS_SW_LSO; 4522 vlan_tag_flags |= TX_BD_FLAGS_SW_LSO;
4525 4523
4526 tcp_opt_len = 0; 4524 tcp_opt_len = 0;
4527 if (skb->h.th->doff > 5) { 4525 if (tcp_hdr(skb)->doff > 5)
4528 tcp_opt_len = (skb->h.th->doff - 5) << 2; 4526 tcp_opt_len = tcp_optlen(skb);
4529 } 4527
4530 ip_tcp_len = (skb->nh.iph->ihl << 2) + sizeof(struct tcphdr); 4528 ip_tcp_len = ip_hdrlen(skb) + sizeof(struct tcphdr);
4531 4529
4532 skb->nh.iph->check = 0; 4530 iph = ip_hdr(skb);
4533 skb->nh.iph->tot_len = htons(mss + ip_tcp_len + tcp_opt_len); 4531 iph->check = 0;
4534 skb->h.th->check = 4532 iph->tot_len = htons(mss + ip_tcp_len + tcp_opt_len);
4535 ~csum_tcpudp_magic(skb->nh.iph->saddr, 4533 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
4536 skb->nh.iph->daddr, 4534 iph->daddr, 0,
4537 0, IPPROTO_TCP, 0); 4535 IPPROTO_TCP, 0);
4538 4536 if (tcp_opt_len || (iph->ihl > 5)) {
4539 if (tcp_opt_len || (skb->nh.iph->ihl > 5)) { 4537 vlan_tag_flags |= ((iph->ihl - 5) +
4540 vlan_tag_flags |= ((skb->nh.iph->ihl - 5) + 4538 (tcp_opt_len >> 2)) << 8;
4541 (tcp_opt_len >> 2)) << 8;
4542 } 4539 }
4543 } 4540 }
4544 else 4541 else
diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index 3fb354d9c515..7e03f41ae2c2 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -884,8 +884,8 @@ static int ad_lacpdu_send(struct port *port)
884 } 884 }
885 885
886 skb->dev = slave->dev; 886 skb->dev = slave->dev;
887 skb->mac.raw = skb->data; 887 skb_reset_mac_header(skb);
888 skb->nh.raw = skb->data + ETH_HLEN; 888 skb->network_header = skb->mac_header + ETH_HLEN;
889 skb->protocol = PKT_TYPE_LACPDU; 889 skb->protocol = PKT_TYPE_LACPDU;
890 skb->priority = TC_PRIO_CONTROL; 890 skb->priority = TC_PRIO_CONTROL;
891 891
@@ -928,8 +928,8 @@ static int ad_marker_send(struct port *port, struct marker *marker)
928 skb_reserve(skb, 16); 928 skb_reserve(skb, 16);
929 929
930 skb->dev = slave->dev; 930 skb->dev = slave->dev;
931 skb->mac.raw = skb->data; 931 skb_reset_mac_header(skb);
932 skb->nh.raw = skb->data + ETH_HLEN; 932 skb->network_header = skb->mac_header + ETH_HLEN;
933 skb->protocol = PKT_TYPE_LACPDU; 933 skb->protocol = PKT_TYPE_LACPDU;
934 934
935 marker_header = (struct marker_header *)skb_put(skb, length); 935 marker_header = (struct marker_header *)skb_put(skb, length);
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index 217a2eedee0a..92c3b6f6a8e7 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -104,10 +104,15 @@ struct arp_pkt {
104}; 104};
105#pragma pack() 105#pragma pack()
106 106
107static inline struct arp_pkt *arp_pkt(const struct sk_buff *skb)
108{
109 return (struct arp_pkt *)skb_network_header(skb);
110}
111
107/* Forward declaration */ 112/* Forward declaration */
108static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]); 113static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]);
109 114
110static inline u8 _simple_hash(u8 *hash_start, int hash_size) 115static inline u8 _simple_hash(const u8 *hash_start, int hash_size)
111{ 116{
112 int i; 117 int i;
113 u8 hash = 0; 118 u8 hash = 0;
@@ -613,7 +618,7 @@ static void rlb_req_update_subnet_clients(struct bonding *bond, u32 src_ip)
613static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bond) 618static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bond)
614{ 619{
615 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 620 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
616 struct arp_pkt *arp = (struct arp_pkt *)skb->nh.raw; 621 struct arp_pkt *arp = arp_pkt(skb);
617 struct slave *assigned_slave; 622 struct slave *assigned_slave;
618 struct rlb_client_info *client_info; 623 struct rlb_client_info *client_info;
619 u32 hash_index = 0; 624 u32 hash_index = 0;
@@ -701,7 +706,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
701 */ 706 */
702static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond) 707static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond)
703{ 708{
704 struct arp_pkt *arp = (struct arp_pkt *)skb->nh.raw; 709 struct arp_pkt *arp = arp_pkt(skb);
705 struct slave *tx_slave = NULL; 710 struct slave *tx_slave = NULL;
706 711
707 if (arp->op_code == __constant_htons(ARPOP_REPLY)) { 712 if (arp->op_code == __constant_htons(ARPOP_REPLY)) {
@@ -890,8 +895,8 @@ static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[])
890 data = skb_put(skb, size); 895 data = skb_put(skb, size);
891 memcpy(data, &pkt, size); 896 memcpy(data, &pkt, size);
892 897
893 skb->mac.raw = data; 898 skb_reset_mac_header(skb);
894 skb->nh.raw = data + ETH_HLEN; 899 skb->network_header = skb->mac_header + ETH_HLEN;
895 skb->protocol = pkt.type; 900 skb->protocol = pkt.type;
896 skb->priority = TC_PRIO_CONTROL; 901 skb->priority = TC_PRIO_CONTROL;
897 skb->dev = slave->dev; 902 skb->dev = slave->dev;
@@ -1263,10 +1268,10 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
1263 int hash_size = 0; 1268 int hash_size = 0;
1264 int do_tx_balance = 1; 1269 int do_tx_balance = 1;
1265 u32 hash_index = 0; 1270 u32 hash_index = 0;
1266 u8 *hash_start = NULL; 1271 const u8 *hash_start = NULL;
1267 int res = 1; 1272 int res = 1;
1268 1273
1269 skb->mac.raw = (unsigned char *)skb->data; 1274 skb_reset_mac_header(skb);
1270 eth_data = eth_hdr(skb); 1275 eth_data = eth_hdr(skb);
1271 1276
1272 /* make sure that the curr_active_slave and the slaves list do 1277 /* make sure that the curr_active_slave and the slaves list do
@@ -1280,15 +1285,18 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
1280 } 1285 }
1281 1286
1282 switch (ntohs(skb->protocol)) { 1287 switch (ntohs(skb->protocol)) {
1283 case ETH_P_IP: 1288 case ETH_P_IP: {
1289 const struct iphdr *iph = ip_hdr(skb);
1290
1284 if ((memcmp(eth_data->h_dest, mac_bcast, ETH_ALEN) == 0) || 1291 if ((memcmp(eth_data->h_dest, mac_bcast, ETH_ALEN) == 0) ||
1285 (skb->nh.iph->daddr == ip_bcast) || 1292 (iph->daddr == ip_bcast) ||
1286 (skb->nh.iph->protocol == IPPROTO_IGMP)) { 1293 (iph->protocol == IPPROTO_IGMP)) {
1287 do_tx_balance = 0; 1294 do_tx_balance = 0;
1288 break; 1295 break;
1289 } 1296 }
1290 hash_start = (char*)&(skb->nh.iph->daddr); 1297 hash_start = (char *)&(iph->daddr);
1291 hash_size = sizeof(skb->nh.iph->daddr); 1298 hash_size = sizeof(iph->daddr);
1299 }
1292 break; 1300 break;
1293 case ETH_P_IPV6: 1301 case ETH_P_IPV6:
1294 if (memcmp(eth_data->h_dest, mac_bcast, ETH_ALEN) == 0) { 1302 if (memcmp(eth_data->h_dest, mac_bcast, ETH_ALEN) == 0) {
@@ -1296,8 +1304,8 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
1296 break; 1304 break;
1297 } 1305 }
1298 1306
1299 hash_start = (char*)&(skb->nh.ipv6h->daddr); 1307 hash_start = (char *)&(ipv6_hdr(skb)->daddr);
1300 hash_size = sizeof(skb->nh.ipv6h->daddr); 1308 hash_size = sizeof(ipv6_hdr(skb)->daddr);
1301 break; 1309 break;
1302 case ETH_P_IPX: 1310 case ETH_P_IPX:
1303 if (ipx_hdr(skb)->ipx_checksum != 1311 if (ipx_hdr(skb)->ipx_checksum !=
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index e4724d874e7c..cea3783c92c5 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2524,7 +2524,7 @@ static int bond_arp_rcv(struct sk_buff *skb, struct net_device *dev, struct pack
2524 (2 * sizeof(u32))))) 2524 (2 * sizeof(u32)))))
2525 goto out_unlock; 2525 goto out_unlock;
2526 2526
2527 arp = skb->nh.arph; 2527 arp = arp_hdr(skb);
2528 if (arp->ar_hln != dev->addr_len || 2528 if (arp->ar_hln != dev->addr_len ||
2529 skb->pkt_type == PACKET_OTHERHOST || 2529 skb->pkt_type == PACKET_OTHERHOST ||
2530 skb->pkt_type == PACKET_LOOPBACK || 2530 skb->pkt_type == PACKET_LOOPBACK ||
@@ -3476,7 +3476,7 @@ static int bond_xmit_hash_policy_l34(struct sk_buff *skb,
3476 struct net_device *bond_dev, int count) 3476 struct net_device *bond_dev, int count)
3477{ 3477{
3478 struct ethhdr *data = (struct ethhdr *)skb->data; 3478 struct ethhdr *data = (struct ethhdr *)skb->data;
3479 struct iphdr *iph = skb->nh.iph; 3479 struct iphdr *iph = ip_hdr(skb);
3480 u16 *layer4hdr = (u16 *)((u32 *)iph + iph->ihl); 3480 u16 *layer4hdr = (u16 *)((u32 *)iph + iph->ihl);
3481 int layer4_xor = 0; 3481 int layer4_xor = 0;
3482 3482
@@ -3640,9 +3640,8 @@ static struct net_device_stats *bond_get_stats(struct net_device *bond_dev)
3640 read_lock_bh(&bond->lock); 3640 read_lock_bh(&bond->lock);
3641 3641
3642 bond_for_each_slave(bond, slave, i) { 3642 bond_for_each_slave(bond, slave, i) {
3643 if (slave->dev->get_stats) { 3643 sstats = slave->dev->get_stats(slave->dev);
3644 sstats = slave->dev->get_stats(slave->dev); 3644 if (sstats) {
3645
3646 stats->rx_packets += sstats->rx_packets; 3645 stats->rx_packets += sstats->rx_packets;
3647 stats->rx_bytes += sstats->rx_bytes; 3646 stats->rx_bytes += sstats->rx_bytes;
3648 stats->rx_errors += sstats->rx_errors; 3647 stats->rx_errors += sstats->rx_errors;
diff --git a/drivers/net/cassini.c b/drivers/net/cassini.c
index c8126484c2be..4aec747d9e43 100644
--- a/drivers/net/cassini.c
+++ b/drivers/net/cassini.c
@@ -1995,7 +1995,6 @@ static int cas_rx_process_pkt(struct cas *cp, struct cas_rx_comp *rxc,
1995 return -1; 1995 return -1;
1996 1996
1997 *skbref = skb; 1997 *skbref = skb;
1998 skb->dev = cp->dev;
1999 skb_reserve(skb, swivel); 1998 skb_reserve(skb, swivel);
2000 1999
2001 p = skb->data; 2000 p = skb->data;
@@ -2822,10 +2821,8 @@ static inline int cas_xmit_tx_ringN(struct cas *cp, int ring,
2822 2821
2823 ctrl = 0; 2822 ctrl = 0;
2824 if (skb->ip_summed == CHECKSUM_PARTIAL) { 2823 if (skb->ip_summed == CHECKSUM_PARTIAL) {
2825 u64 csum_start_off, csum_stuff_off; 2824 const u64 csum_start_off = skb_transport_offset(skb);
2826 2825 const u64 csum_stuff_off = csum_start_off + skb->csum_offset;
2827 csum_start_off = (u64) (skb->h.raw - skb->data);
2828 csum_stuff_off = csum_start_off + skb->csum_offset;
2829 2826
2830 ctrl = TX_DESC_CSUM_EN | 2827 ctrl = TX_DESC_CSUM_EN |
2831 CAS_BASE(TX_DESC_CSUM_START, csum_start_off) | 2828 CAS_BASE(TX_DESC_CSUM_START, csum_start_off) |
@@ -2849,8 +2846,8 @@ static inline int cas_xmit_tx_ringN(struct cas *cp, int ring,
2849 ctrl | TX_DESC_SOF, 0); 2846 ctrl | TX_DESC_SOF, 0);
2850 entry = TX_DESC_NEXT(ring, entry); 2847 entry = TX_DESC_NEXT(ring, entry);
2851 2848
2852 memcpy(tx_tiny_buf(cp, ring, entry), skb->data + 2849 skb_copy_from_linear_data_offset(skb, len - tabort,
2853 len - tabort, tabort); 2850 tx_tiny_buf(cp, ring, entry), tabort);
2854 mapping = tx_tiny_map(cp, ring, entry, tentry); 2851 mapping = tx_tiny_map(cp, ring, entry, tentry);
2855 cas_write_txd(cp, ring, entry, mapping, tabort, ctrl, 2852 cas_write_txd(cp, ring, entry, mapping, tabort, ctrl,
2856 (nr_frags == 0)); 2853 (nr_frags == 0));
diff --git a/drivers/net/chelsio/sge.c b/drivers/net/chelsio/sge.c
index 326d4a665123..e4f874a70fe5 100644
--- a/drivers/net/chelsio/sge.c
+++ b/drivers/net/chelsio/sge.c
@@ -1062,7 +1062,7 @@ static inline struct sk_buff *get_packet(struct pci_dev *pdev,
1062 pci_unmap_addr(ce, dma_addr), 1062 pci_unmap_addr(ce, dma_addr),
1063 pci_unmap_len(ce, dma_len), 1063 pci_unmap_len(ce, dma_len),
1064 PCI_DMA_FROMDEVICE); 1064 PCI_DMA_FROMDEVICE);
1065 memcpy(skb->data, ce->skb->data, len); 1065 skb_copy_from_linear_data(ce->skb, skb->data, len);
1066 pci_dma_sync_single_for_device(pdev, 1066 pci_dma_sync_single_for_device(pdev,
1067 pci_unmap_addr(ce, dma_addr), 1067 pci_unmap_addr(ce, dma_addr),
1068 pci_unmap_len(ce, dma_len), 1068 pci_unmap_len(ce, dma_len),
@@ -1379,12 +1379,11 @@ static void sge_rx(struct sge *sge, struct freelQ *fl, unsigned int len)
1379 } 1379 }
1380 __skb_pull(skb, sizeof(*p)); 1380 __skb_pull(skb, sizeof(*p));
1381 1381
1382 skb->dev = adapter->port[p->iff].dev;
1383 skb->dev->last_rx = jiffies; 1382 skb->dev->last_rx = jiffies;
1384 st = per_cpu_ptr(sge->port_stats[p->iff], smp_processor_id()); 1383 st = per_cpu_ptr(sge->port_stats[p->iff], smp_processor_id());
1385 st->rx_packets++; 1384 st->rx_packets++;
1386 1385
1387 skb->protocol = eth_type_trans(skb, skb->dev); 1386 skb->protocol = eth_type_trans(skb, adapter->port[p->iff].dev);
1388 if ((adapter->flags & RX_CSUM_ENABLED) && p->csum == 0xffff && 1387 if ((adapter->flags & RX_CSUM_ENABLED) && p->csum == 0xffff &&
1389 skb->protocol == htons(ETH_P_IP) && 1388 skb->protocol == htons(ETH_P_IP) &&
1390 (skb->data[9] == IPPROTO_TCP || skb->data[9] == IPPROTO_UDP)) { 1389 (skb->data[9] == IPPROTO_TCP || skb->data[9] == IPPROTO_UDP)) {
@@ -1866,14 +1865,14 @@ int t1_start_xmit(struct sk_buff *skb, struct net_device *dev)
1866 1865
1867 ++st->tx_tso; 1866 ++st->tx_tso;
1868 1867
1869 eth_type = skb->nh.raw - skb->data == ETH_HLEN ? 1868 eth_type = skb_network_offset(skb) == ETH_HLEN ?
1870 CPL_ETH_II : CPL_ETH_II_VLAN; 1869 CPL_ETH_II : CPL_ETH_II_VLAN;
1871 1870
1872 hdr = (struct cpl_tx_pkt_lso *)skb_push(skb, sizeof(*hdr)); 1871 hdr = (struct cpl_tx_pkt_lso *)skb_push(skb, sizeof(*hdr));
1873 hdr->opcode = CPL_TX_PKT_LSO; 1872 hdr->opcode = CPL_TX_PKT_LSO;
1874 hdr->ip_csum_dis = hdr->l4_csum_dis = 0; 1873 hdr->ip_csum_dis = hdr->l4_csum_dis = 0;
1875 hdr->ip_hdr_words = skb->nh.iph->ihl; 1874 hdr->ip_hdr_words = ip_hdr(skb)->ihl;
1876 hdr->tcp_hdr_words = skb->h.th->doff; 1875 hdr->tcp_hdr_words = tcp_hdr(skb)->doff;
1877 hdr->eth_type_mss = htons(MK_ETH_TYPE_MSS(eth_type, 1876 hdr->eth_type_mss = htons(MK_ETH_TYPE_MSS(eth_type,
1878 skb_shinfo(skb)->gso_size)); 1877 skb_shinfo(skb)->gso_size));
1879 hdr->len = htonl(skb->len - sizeof(*hdr)); 1878 hdr->len = htonl(skb->len - sizeof(*hdr));
@@ -1913,7 +1912,7 @@ int t1_start_xmit(struct sk_buff *skb, struct net_device *dev)
1913 1912
1914 if (!(adapter->flags & UDP_CSUM_CAPABLE) && 1913 if (!(adapter->flags & UDP_CSUM_CAPABLE) &&
1915 skb->ip_summed == CHECKSUM_PARTIAL && 1914 skb->ip_summed == CHECKSUM_PARTIAL &&
1916 skb->nh.iph->protocol == IPPROTO_UDP) { 1915 ip_hdr(skb)->protocol == IPPROTO_UDP) {
1917 if (unlikely(skb_checksum_help(skb))) { 1916 if (unlikely(skb_checksum_help(skb))) {
1918 pr_debug("%s: unable to do udp checksum\n", dev->name); 1917 pr_debug("%s: unable to do udp checksum\n", dev->name);
1919 dev_kfree_skb_any(skb); 1918 dev_kfree_skb_any(skb);
@@ -1926,7 +1925,7 @@ int t1_start_xmit(struct sk_buff *skb, struct net_device *dev)
1926 */ 1925 */
1927 if ((unlikely(!adapter->sge->espibug_skb[dev->if_port]))) { 1926 if ((unlikely(!adapter->sge->espibug_skb[dev->if_port]))) {
1928 if (skb->protocol == htons(ETH_P_ARP) && 1927 if (skb->protocol == htons(ETH_P_ARP) &&
1929 skb->nh.arph->ar_op == htons(ARPOP_REQUEST)) { 1928 arp_hdr(skb)->ar_op == htons(ARPOP_REQUEST)) {
1930 adapter->sge->espibug_skb[dev->if_port] = skb; 1929 adapter->sge->espibug_skb[dev->if_port] = skb;
1931 /* We want to re-use this skb later. We 1930 /* We want to re-use this skb later. We
1932 * simply bump the reference count and it 1931 * simply bump the reference count and it
@@ -2096,10 +2095,14 @@ static void espibug_workaround_t204(unsigned long data)
2096 0x0, 0x7, 0x43, 0x0, 0x0, 0x0 2095 0x0, 0x7, 0x43, 0x0, 0x0, 0x0
2097 }; 2096 };
2098 2097
2099 memcpy(skb->data + sizeof(struct cpl_tx_pkt), 2098 skb_copy_to_linear_data_offset(skb,
2100 ch_mac_addr, ETH_ALEN); 2099 sizeof(struct cpl_tx_pkt),
2101 memcpy(skb->data + skb->len - 10, 2100 ch_mac_addr,
2102 ch_mac_addr, ETH_ALEN); 2101 ETH_ALEN);
2102 skb_copy_to_linear_data_offset(skb,
2103 skb->len - 10,
2104 ch_mac_addr,
2105 ETH_ALEN);
2103 skb->cb[0] = 0xff; 2106 skb->cb[0] = 0xff;
2104 } 2107 }
2105 2108
@@ -2126,10 +2129,14 @@ static void espibug_workaround(unsigned long data)
2126 if (!skb->cb[0]) { 2129 if (!skb->cb[0]) {
2127 u8 ch_mac_addr[ETH_ALEN] = 2130 u8 ch_mac_addr[ETH_ALEN] =
2128 {0x0, 0x7, 0x43, 0x0, 0x0, 0x0}; 2131 {0x0, 0x7, 0x43, 0x0, 0x0, 0x0};
2129 memcpy(skb->data + sizeof(struct cpl_tx_pkt), 2132 skb_copy_to_linear_data_offset(skb,
2130 ch_mac_addr, ETH_ALEN); 2133 sizeof(struct cpl_tx_pkt),
2131 memcpy(skb->data + skb->len - 10, ch_mac_addr, 2134 ch_mac_addr,
2132 ETH_ALEN); 2135 ETH_ALEN);
2136 skb_copy_to_linear_data_offset(skb,
2137 skb->len - 10,
2138 ch_mac_addr,
2139 ETH_ALEN);
2133 skb->cb[0] = 0xff; 2140 skb->cb[0] = 0xff;
2134 } 2141 }
2135 2142
diff --git a/drivers/net/cris/eth_v10.c b/drivers/net/cris/eth_v10.c
index 8eb571276000..5bdf5ca85a65 100644
--- a/drivers/net/cris/eth_v10.c
+++ b/drivers/net/cris/eth_v10.c
@@ -1348,7 +1348,8 @@ e100_rx(struct net_device *dev)
1348 1348
1349#ifdef ETHDEBUG 1349#ifdef ETHDEBUG
1350 printk("head = 0x%x, data = 0x%x, tail = 0x%x, end = 0x%x\n", 1350 printk("head = 0x%x, data = 0x%x, tail = 0x%x, end = 0x%x\n",
1351 skb->head, skb->data, skb->tail, skb->end); 1351 skb->head, skb->data, skb_tail_pointer(skb),
1352 skb_end_pointer(skb));
1352 printk("copying packet to 0x%x.\n", skb_data_ptr); 1353 printk("copying packet to 0x%x.\n", skb_data_ptr);
1353#endif 1354#endif
1354 1355
@@ -1375,7 +1376,6 @@ e100_rx(struct net_device *dev)
1375 myNextRxDesc->descr.buf = L1_CACHE_ALIGN(virt_to_phys(myNextRxDesc->skb->data)); 1376 myNextRxDesc->descr.buf = L1_CACHE_ALIGN(virt_to_phys(myNextRxDesc->skb->data));
1376 } 1377 }
1377 1378
1378 skb->dev = dev;
1379 skb->protocol = eth_type_trans(skb, dev); 1379 skb->protocol = eth_type_trans(skb, dev);
1380 1380
1381 /* Send the packet to the upper layers */ 1381 /* Send the packet to the upper layers */
diff --git a/drivers/net/cs89x0.c b/drivers/net/cs89x0.c
index 4612f71a7106..9774bb1b3e80 100644
--- a/drivers/net/cs89x0.c
+++ b/drivers/net/cs89x0.c
@@ -1004,7 +1004,6 @@ skip_this_frame:
1004 return; 1004 return;
1005 } 1005 }
1006 skb_reserve(skb, 2); /* longword align L3 header */ 1006 skb_reserve(skb, 2); /* longword align L3 header */
1007 skb->dev = dev;
1008 1007
1009 if (bp + length > lp->end_dma_buff) { 1008 if (bp + length > lp->end_dma_buff) {
1010 int semi_cnt = lp->end_dma_buff - bp; 1009 int semi_cnt = lp->end_dma_buff - bp;
@@ -1702,7 +1701,6 @@ net_rx(struct net_device *dev)
1702 return; 1701 return;
1703 } 1702 }
1704 skb_reserve(skb, 2); /* longword align L3 header */ 1703 skb_reserve(skb, 2); /* longword align L3 header */
1705 skb->dev = dev;
1706 1704
1707 readwords(ioaddr, RX_FRAME_PORT, skb_put(skb, length), length >> 1); 1705 readwords(ioaddr, RX_FRAME_PORT, skb_put(skb, length), length >> 1);
1708 if (length & 1) 1706 if (length & 1)
diff --git a/drivers/net/cxgb3/cxgb3_offload.c b/drivers/net/cxgb3/cxgb3_offload.c
index 199e5066acf3..ebcf35e4cf5b 100644
--- a/drivers/net/cxgb3/cxgb3_offload.c
+++ b/drivers/net/cxgb3/cxgb3_offload.c
@@ -783,7 +783,7 @@ static int do_trace(struct t3cdev *dev, struct sk_buff *skb)
783 skb->protocol = htons(0xffff); 783 skb->protocol = htons(0xffff);
784 skb->dev = dev->lldev; 784 skb->dev = dev->lldev;
785 skb_pull(skb, sizeof(*p)); 785 skb_pull(skb, sizeof(*p));
786 skb->mac.raw = skb->data; 786 skb_reset_mac_header(skb);
787 netif_receive_skb(skb); 787 netif_receive_skb(skb);
788 return 0; 788 return 0;
789} 789}
diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c
index 027ab2c3825c..3666586a4831 100644
--- a/drivers/net/cxgb3/sge.c
+++ b/drivers/net/cxgb3/sge.c
@@ -661,7 +661,7 @@ static inline struct sk_buff *get_imm_packet(const struct rsp_desc *resp)
661 661
662 if (skb) { 662 if (skb) {
663 __skb_put(skb, IMMED_PKT_SIZE); 663 __skb_put(skb, IMMED_PKT_SIZE);
664 memcpy(skb->data, resp->imm_data, IMMED_PKT_SIZE); 664 skb_copy_to_linear_data(skb, resp->imm_data, IMMED_PKT_SIZE);
665 } 665 }
666 return skb; 666 return skb;
667} 667}
@@ -897,11 +897,11 @@ static void write_tx_pkt_wr(struct adapter *adap, struct sk_buff *skb,
897 d->flit[2] = 0; 897 d->flit[2] = 0;
898 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO); 898 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
899 hdr->cntrl = htonl(cntrl); 899 hdr->cntrl = htonl(cntrl);
900 eth_type = skb->nh.raw - skb->data == ETH_HLEN ? 900 eth_type = skb_network_offset(skb) == ETH_HLEN ?
901 CPL_ETH_II : CPL_ETH_II_VLAN; 901 CPL_ETH_II : CPL_ETH_II_VLAN;
902 tso_info |= V_LSO_ETH_TYPE(eth_type) | 902 tso_info |= V_LSO_ETH_TYPE(eth_type) |
903 V_LSO_IPHDR_WORDS(skb->nh.iph->ihl) | 903 V_LSO_IPHDR_WORDS(ip_hdr(skb)->ihl) |
904 V_LSO_TCPHDR_WORDS(skb->h.th->doff); 904 V_LSO_TCPHDR_WORDS(tcp_hdr(skb)->doff);
905 hdr->lso_info = htonl(tso_info); 905 hdr->lso_info = htonl(tso_info);
906 flits = 3; 906 flits = 3;
907 } else { 907 } else {
@@ -913,7 +913,8 @@ static void write_tx_pkt_wr(struct adapter *adap, struct sk_buff *skb,
913 if (skb->len <= WR_LEN - sizeof(*cpl)) { 913 if (skb->len <= WR_LEN - sizeof(*cpl)) {
914 q->sdesc[pidx].skb = NULL; 914 q->sdesc[pidx].skb = NULL;
915 if (!skb->data_len) 915 if (!skb->data_len)
916 memcpy(&d->flit[2], skb->data, skb->len); 916 skb_copy_from_linear_data(skb, &d->flit[2],
917 skb->len);
917 else 918 else
918 skb_copy_bits(skb, 0, &d->flit[2], skb->len); 919 skb_copy_bits(skb, 0, &d->flit[2], skb->len);
919 920
@@ -1319,16 +1320,19 @@ static void write_ofld_wr(struct adapter *adap, struct sk_buff *skb,
1319 /* Only TX_DATA builds SGLs */ 1320 /* Only TX_DATA builds SGLs */
1320 1321
1321 from = (struct work_request_hdr *)skb->data; 1322 from = (struct work_request_hdr *)skb->data;
1322 memcpy(&d->flit[1], &from[1], skb->h.raw - skb->data - sizeof(*from)); 1323 memcpy(&d->flit[1], &from[1],
1324 skb_transport_offset(skb) - sizeof(*from));
1323 1325
1324 flits = (skb->h.raw - skb->data) / 8; 1326 flits = skb_transport_offset(skb) / 8;
1325 sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl; 1327 sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
1326 sgl_flits = make_sgl(skb, sgp, skb->h.raw, skb->tail - skb->h.raw, 1328 sgl_flits = make_sgl(skb, sgp, skb_transport_header(skb),
1329 skb->tail - skb->transport_header,
1327 adap->pdev); 1330 adap->pdev);
1328 if (need_skb_unmap()) { 1331 if (need_skb_unmap()) {
1329 setup_deferred_unmapping(skb, adap->pdev, sgp, sgl_flits); 1332 setup_deferred_unmapping(skb, adap->pdev, sgp, sgl_flits);
1330 skb->destructor = deferred_unmap_destructor; 1333 skb->destructor = deferred_unmap_destructor;
1331 ((struct unmap_info *)skb->cb)->len = skb->tail - skb->h.raw; 1334 ((struct unmap_info *)skb->cb)->len = (skb->tail -
1335 skb->transport_header);
1332 } 1336 }
1333 1337
1334 write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits, 1338 write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits,
@@ -1349,8 +1353,8 @@ static inline unsigned int calc_tx_descs_ofld(const struct sk_buff *skb)
1349 if (skb->len <= WR_LEN && cnt == 0) 1353 if (skb->len <= WR_LEN && cnt == 0)
1350 return 1; /* packet fits as immediate data */ 1354 return 1; /* packet fits as immediate data */
1351 1355
1352 flits = (skb->h.raw - skb->data) / 8; /* headers */ 1356 flits = skb_transport_offset(skb) / 8; /* headers */
1353 if (skb->tail != skb->h.raw) 1357 if (skb->tail != skb->transport_header)
1354 cnt++; 1358 cnt++;
1355 return flits_to_desc(flits + sgl_len(cnt)); 1359 return flits_to_desc(flits + sgl_len(cnt));
1356} 1360}
@@ -1620,7 +1624,9 @@ static inline int rx_offload(struct t3cdev *tdev, struct sge_rspq *rq,
1620 unsigned int gather_idx) 1624 unsigned int gather_idx)
1621{ 1625{
1622 rq->offload_pkts++; 1626 rq->offload_pkts++;
1623 skb->mac.raw = skb->nh.raw = skb->h.raw = skb->data; 1627 skb_reset_mac_header(skb);
1628 skb_reset_network_header(skb);
1629 skb_reset_transport_header(skb);
1624 1630
1625 if (rq->polling) { 1631 if (rq->polling) {
1626 rx_gather[gather_idx++] = skb; 1632 rx_gather[gather_idx++] = skb;
@@ -1684,9 +1690,8 @@ static void rx_eth(struct adapter *adap, struct sge_rspq *rq,
1684 struct port_info *pi; 1690 struct port_info *pi;
1685 1691
1686 skb_pull(skb, sizeof(*p) + pad); 1692 skb_pull(skb, sizeof(*p) + pad);
1687 skb->dev = adap->port[p->iff];
1688 skb->dev->last_rx = jiffies; 1693 skb->dev->last_rx = jiffies;
1689 skb->protocol = eth_type_trans(skb, skb->dev); 1694 skb->protocol = eth_type_trans(skb, adap->port[p->iff]);
1690 pi = netdev_priv(skb->dev); 1695 pi = netdev_priv(skb->dev);
1691 if (pi->rx_csum_offload && p->csum_valid && p->csum == 0xffff && 1696 if (pi->rx_csum_offload && p->csum_valid && p->csum == 0xffff &&
1692 !p->fragment) { 1697 !p->fragment) {
@@ -1717,11 +1722,11 @@ static void skb_data_init(struct sk_buff *skb, struct sge_fl_page *p,
1717{ 1722{
1718 skb->len = len; 1723 skb->len = len;
1719 if (len <= SKB_DATA_SIZE) { 1724 if (len <= SKB_DATA_SIZE) {
1720 memcpy(skb->data, p->va, len); 1725 skb_copy_to_linear_data(skb, p->va, len);
1721 skb->tail += len; 1726 skb->tail += len;
1722 put_page(p->frag.page); 1727 put_page(p->frag.page);
1723 } else { 1728 } else {
1724 memcpy(skb->data, p->va, SKB_DATA_SIZE); 1729 skb_copy_to_linear_data(skb, p->va, SKB_DATA_SIZE);
1725 skb_shinfo(skb)->frags[0].page = p->frag.page; 1730 skb_shinfo(skb)->frags[0].page = p->frag.page;
1726 skb_shinfo(skb)->frags[0].page_offset = 1731 skb_shinfo(skb)->frags[0].page_offset =
1727 p->frag.page_offset + SKB_DATA_SIZE; 1732 p->frag.page_offset + SKB_DATA_SIZE;
@@ -1767,7 +1772,7 @@ static struct sk_buff *get_packet(struct adapter *adap, struct sge_fl *fl,
1767 __skb_put(skb, len); 1772 __skb_put(skb, len);
1768 pci_dma_sync_single_for_cpu(adap->pdev, mapping, len, 1773 pci_dma_sync_single_for_cpu(adap->pdev, mapping, len,
1769 PCI_DMA_FROMDEVICE); 1774 PCI_DMA_FROMDEVICE);
1770 memcpy(skb->data, sd->t.skb->data, len); 1775 skb_copy_from_linear_data(sd->t.skb, skb->data, len);
1771 pci_dma_sync_single_for_device(adap->pdev, mapping, len, 1776 pci_dma_sync_single_for_device(adap->pdev, mapping, len,
1772 PCI_DMA_FROMDEVICE); 1777 PCI_DMA_FROMDEVICE);
1773 } else if (!drop_thres) 1778 } else if (!drop_thres)
diff --git a/drivers/net/de600.c b/drivers/net/de600.c
index e547ce14eefe..dae97b860daa 100644
--- a/drivers/net/de600.c
+++ b/drivers/net/de600.c
@@ -359,7 +359,6 @@ static void de600_rx_intr(struct net_device *dev)
359 } 359 }
360 /* else */ 360 /* else */
361 361
362 skb->dev = dev;
363 skb_reserve(skb,2); /* Align */ 362 skb_reserve(skb,2); /* Align */
364 363
365 /* 'skb->data' points to the start of sk_buff data area. */ 364 /* 'skb->data' points to the start of sk_buff data area. */
diff --git a/drivers/net/de620.c b/drivers/net/de620.c
index b6ad0cb50552..dc4892426174 100644
--- a/drivers/net/de620.c
+++ b/drivers/net/de620.c
@@ -697,7 +697,6 @@ static int de620_rx_intr(struct net_device *dev)
697 } 697 }
698 else { /* Yep! Go get it! */ 698 else { /* Yep! Go get it! */
699 skb_reserve(skb,2); /* Align */ 699 skb_reserve(skb,2); /* Align */
700 skb->dev = dev;
701 /* skb->data points to the start of sk_buff data area */ 700 /* skb->data points to the start of sk_buff data area */
702 buffer = skb_put(skb,size); 701 buffer = skb_put(skb,size);
703 /* copy the packet into the buffer */ 702 /* copy the packet into the buffer */
diff --git a/drivers/net/declance.c b/drivers/net/declance.c
index 9f7e1db8ce62..95d854e2295c 100644
--- a/drivers/net/declance.c
+++ b/drivers/net/declance.c
@@ -616,7 +616,6 @@ static int lance_rx(struct net_device *dev)
616 } 616 }
617 lp->stats.rx_bytes += len; 617 lp->stats.rx_bytes += len;
618 618
619 skb->dev = dev;
620 skb_reserve(skb, 2); /* 16 byte align */ 619 skb_reserve(skb, 2); /* 16 byte align */
621 skb_put(skb, len); /* make room */ 620 skb_put(skb, len); /* make room */
622 621
diff --git a/drivers/net/defxx.c b/drivers/net/defxx.c
index 07d2731c1aa8..571d82f8008c 100644
--- a/drivers/net/defxx.c
+++ b/drivers/net/defxx.c
@@ -3091,13 +3091,13 @@ static void dfx_rcv_queue_process(
3091 { 3091 {
3092 /* Receive buffer allocated, pass receive packet up */ 3092 /* Receive buffer allocated, pass receive packet up */
3093 3093
3094 memcpy(skb->data, p_buff + RCV_BUFF_K_PADDING, pkt_len+3); 3094 skb_copy_to_linear_data(skb,
3095 p_buff + RCV_BUFF_K_PADDING,
3096 pkt_len + 3);
3095 } 3097 }
3096 3098
3097 skb_reserve(skb,3); /* adjust data field so that it points to FC byte */ 3099 skb_reserve(skb,3); /* adjust data field so that it points to FC byte */
3098 skb_put(skb, pkt_len); /* pass up packet length, NOT including CRC */ 3100 skb_put(skb, pkt_len); /* pass up packet length, NOT including CRC */
3099 skb->dev = bp->dev; /* pass up device pointer */
3100
3101 skb->protocol = fddi_type_trans(skb, bp->dev); 3101 skb->protocol = fddi_type_trans(skb, bp->dev);
3102 bp->rcv_total_bytes += skb->len; 3102 bp->rcv_total_bytes += skb->len;
3103 netif_rx(skb); 3103 netif_rx(skb);
diff --git a/drivers/net/depca.c b/drivers/net/depca.c
index f3807aaf10aa..183497020bfc 100644
--- a/drivers/net/depca.c
+++ b/drivers/net/depca.c
@@ -1044,7 +1044,6 @@ static int depca_rx(struct net_device *dev)
1044 unsigned char *buf; 1044 unsigned char *buf;
1045 skb_reserve(skb, 2); /* 16 byte align the IP header */ 1045 skb_reserve(skb, 2); /* 16 byte align the IP header */
1046 buf = skb_put(skb, pkt_len); 1046 buf = skb_put(skb, pkt_len);
1047 skb->dev = dev;
1048 if (entry < lp->rx_old) { /* Wrapped buffer */ 1047 if (entry < lp->rx_old) { /* Wrapped buffer */
1049 len = (lp->rxRingMask - lp->rx_old + 1) * RX_BUFF_SZ; 1048 len = (lp->rxRingMask - lp->rx_old + 1) * RX_BUFF_SZ;
1050 memcpy_fromio(buf, lp->rx_buff[lp->rx_old], len); 1049 memcpy_fromio(buf, lp->rx_buff[lp->rx_old], len);
diff --git a/drivers/net/dgrs.c b/drivers/net/dgrs.c
index a79520295fd0..df62c0232f36 100644
--- a/drivers/net/dgrs.c
+++ b/drivers/net/dgrs.c
@@ -503,7 +503,6 @@ dgrs_rcv_frame(
503 /* discarding the frame */ 503 /* discarding the frame */
504 goto out; 504 goto out;
505 } 505 }
506 skb->dev = devN;
507 skb_reserve(skb, 2); /* Align IP header */ 506 skb_reserve(skb, 2); /* Align IP header */
508 507
509again: 508again:
@@ -742,7 +741,7 @@ static int dgrs_start_xmit(struct sk_buff *skb, struct net_device *devN)
742 } 741 }
743 742
744 amt = min_t(unsigned int, len, rbdp->size - count); 743 amt = min_t(unsigned int, len, rbdp->size - count);
745 memcpy( (char *) S2H(rbdp->buf) + count, skb->data + i, amt); 744 skb_copy_from_linear_data_offset(skb, i, S2H(rbdp->buf) + count, amt);
746 i += amt; 745 i += amt;
747 count += amt; 746 count += amt;
748 len -= amt; 747 len -= amt;
diff --git a/drivers/net/dl2k.c b/drivers/net/dl2k.c
index 9d446a0fe0bf..74ec64a1625d 100644
--- a/drivers/net/dl2k.c
+++ b/drivers/net/dl2k.c
@@ -504,7 +504,6 @@ rio_timer (unsigned long data)
504 break; 504 break;
505 } 505 }
506 np->rx_skbuff[entry] = skb; 506 np->rx_skbuff[entry] = skb;
507 skb->dev = dev;
508 /* 16 byte align the IP header */ 507 /* 16 byte align the IP header */
509 skb_reserve (skb, 2); 508 skb_reserve (skb, 2);
510 np->rx_ring[entry].fraginfo = 509 np->rx_ring[entry].fraginfo =
@@ -575,7 +574,6 @@ alloc_list (struct net_device *dev)
575 dev->name); 574 dev->name);
576 break; 575 break;
577 } 576 }
578 skb->dev = dev; /* Mark as being used by this device. */
579 skb_reserve (skb, 2); /* 16 byte align the IP header. */ 577 skb_reserve (skb, 2); /* 16 byte align the IP header. */
580 /* Rubicon now supports 40 bits of addressing space. */ 578 /* Rubicon now supports 40 bits of addressing space. */
581 np->rx_ring[i].fraginfo = 579 np->rx_ring[i].fraginfo =
@@ -866,7 +864,6 @@ receive_packet (struct net_device *dev)
866 DMA_48BIT_MASK, 864 DMA_48BIT_MASK,
867 np->rx_buf_sz, 865 np->rx_buf_sz,
868 PCI_DMA_FROMDEVICE); 866 PCI_DMA_FROMDEVICE);
869 skb->dev = dev;
870 /* 16 byte align the IP header */ 867 /* 16 byte align the IP header */
871 skb_reserve (skb, 2); 868 skb_reserve (skb, 2);
872 eth_copy_and_sum (skb, 869 eth_copy_and_sum (skb,
@@ -910,7 +907,6 @@ receive_packet (struct net_device *dev)
910 break; 907 break;
911 } 908 }
912 np->rx_skbuff[entry] = skb; 909 np->rx_skbuff[entry] = skb;
913 skb->dev = dev;
914 /* 16 byte align the IP header */ 910 /* 16 byte align the IP header */
915 skb_reserve (skb, 2); 911 skb_reserve (skb, 2);
916 np->rx_ring[entry].fraginfo = 912 np->rx_ring[entry].fraginfo =
diff --git a/drivers/net/dm9000.c b/drivers/net/dm9000.c
index 615d2b14efa7..8cc1174e7f64 100644
--- a/drivers/net/dm9000.c
+++ b/drivers/net/dm9000.c
@@ -954,7 +954,6 @@ dm9000_rx(struct net_device *dev)
954 /* Move data from DM9000 */ 954 /* Move data from DM9000 */
955 if (GoodPacket 955 if (GoodPacket
956 && ((skb = dev_alloc_skb(RxLen + 4)) != NULL)) { 956 && ((skb = dev_alloc_skb(RxLen + 4)) != NULL)) {
957 skb->dev = dev;
958 skb_reserve(skb, 2); 957 skb_reserve(skb, 2);
959 rdptr = (u8 *) skb_put(skb, RxLen - 4); 958 rdptr = (u8 *) skb_put(skb, RxLen - 4);
960 959
diff --git a/drivers/net/e100.c b/drivers/net/e100.c
index 0cefef5e3f06..4d0e0aea72bf 100644
--- a/drivers/net/e100.c
+++ b/drivers/net/e100.c
@@ -1769,7 +1769,7 @@ static int e100_rx_alloc_skb(struct nic *nic, struct rx *rx)
1769 1769
1770 /* Align, init, and map the RFD. */ 1770 /* Align, init, and map the RFD. */
1771 skb_reserve(rx->skb, NET_IP_ALIGN); 1771 skb_reserve(rx->skb, NET_IP_ALIGN);
1772 memcpy(rx->skb->data, &nic->blank_rfd, sizeof(struct rfd)); 1772 skb_copy_to_linear_data(rx->skb, &nic->blank_rfd, sizeof(struct rfd));
1773 rx->dma_addr = pci_map_single(nic->pdev, rx->skb->data, 1773 rx->dma_addr = pci_map_single(nic->pdev, rx->skb->data,
1774 RFD_BUF_LEN, PCI_DMA_BIDIRECTIONAL); 1774 RFD_BUF_LEN, PCI_DMA_BIDIRECTIONAL);
1775 1775
diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
index b28a915bd980..48e2ade704d3 100644
--- a/drivers/net/e1000/e1000_main.c
+++ b/drivers/net/e1000/e1000_main.c
@@ -2887,33 +2887,30 @@ e1000_tso(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring,
2887 return err; 2887 return err;
2888 } 2888 }
2889 2889
2890 hdr_len = ((skb->h.raw - skb->data) + (skb->h.th->doff << 2)); 2890 hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
2891 mss = skb_shinfo(skb)->gso_size; 2891 mss = skb_shinfo(skb)->gso_size;
2892 if (skb->protocol == htons(ETH_P_IP)) { 2892 if (skb->protocol == htons(ETH_P_IP)) {
2893 skb->nh.iph->tot_len = 0; 2893 struct iphdr *iph = ip_hdr(skb);
2894 skb->nh.iph->check = 0; 2894 iph->tot_len = 0;
2895 skb->h.th->check = 2895 iph->check = 0;
2896 ~csum_tcpudp_magic(skb->nh.iph->saddr, 2896 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
2897 skb->nh.iph->daddr, 2897 iph->daddr, 0,
2898 0, 2898 IPPROTO_TCP,
2899 IPPROTO_TCP, 2899 0);
2900 0);
2901 cmd_length = E1000_TXD_CMD_IP; 2900 cmd_length = E1000_TXD_CMD_IP;
2902 ipcse = skb->h.raw - skb->data - 1; 2901 ipcse = skb_transport_offset(skb) - 1;
2903 } else if (skb->protocol == htons(ETH_P_IPV6)) { 2902 } else if (skb->protocol == htons(ETH_P_IPV6)) {
2904 skb->nh.ipv6h->payload_len = 0; 2903 ipv6_hdr(skb)->payload_len = 0;
2905 skb->h.th->check = 2904 tcp_hdr(skb)->check =
2906 ~csum_ipv6_magic(&skb->nh.ipv6h->saddr, 2905 ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
2907 &skb->nh.ipv6h->daddr, 2906 &ipv6_hdr(skb)->daddr,
2908 0, 2907 0, IPPROTO_TCP, 0);
2909 IPPROTO_TCP,
2910 0);
2911 ipcse = 0; 2908 ipcse = 0;
2912 } 2909 }
2913 ipcss = skb->nh.raw - skb->data; 2910 ipcss = skb_network_offset(skb);
2914 ipcso = (void *)&(skb->nh.iph->check) - (void *)skb->data; 2911 ipcso = (void *)&(ip_hdr(skb)->check) - (void *)skb->data;
2915 tucss = skb->h.raw - skb->data; 2912 tucss = skb_transport_offset(skb);
2916 tucso = (void *)&(skb->h.th->check) - (void *)skb->data; 2913 tucso = (void *)&(tcp_hdr(skb)->check) - (void *)skb->data;
2917 tucse = 0; 2914 tucse = 0;
2918 2915
2919 cmd_length |= (E1000_TXD_CMD_DEXT | E1000_TXD_CMD_TSE | 2916 cmd_length |= (E1000_TXD_CMD_DEXT | E1000_TXD_CMD_TSE |
@@ -2954,7 +2951,7 @@ e1000_tx_csum(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring,
2954 uint8_t css; 2951 uint8_t css;
2955 2952
2956 if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { 2953 if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
2957 css = skb->h.raw - skb->data; 2954 css = skb_transport_offset(skb);
2958 2955
2959 i = tx_ring->next_to_use; 2956 i = tx_ring->next_to_use;
2960 buffer_info = &tx_ring->buffer_info[i]; 2957 buffer_info = &tx_ring->buffer_info[i];
@@ -2962,7 +2959,8 @@ e1000_tx_csum(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring,
2962 2959
2963 context_desc->lower_setup.ip_config = 0; 2960 context_desc->lower_setup.ip_config = 0;
2964 context_desc->upper_setup.tcp_fields.tucss = css; 2961 context_desc->upper_setup.tcp_fields.tucss = css;
2965 context_desc->upper_setup.tcp_fields.tucso = css + skb->csum; 2962 context_desc->upper_setup.tcp_fields.tucso =
2963 css + skb->csum_offset;
2966 context_desc->upper_setup.tcp_fields.tucse = 0; 2964 context_desc->upper_setup.tcp_fields.tucse = 0;
2967 context_desc->tcp_seg_setup.data = 0; 2965 context_desc->tcp_seg_setup.data = 0;
2968 context_desc->cmd_and_length = cpu_to_le32(E1000_TXD_CMD_DEXT); 2966 context_desc->cmd_and_length = cpu_to_le32(E1000_TXD_CMD_DEXT);
@@ -3296,7 +3294,7 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
3296 /* TSO Workaround for 82571/2/3 Controllers -- if skb->data 3294 /* TSO Workaround for 82571/2/3 Controllers -- if skb->data
3297 * points to just header, pull a few bytes of payload from 3295 * points to just header, pull a few bytes of payload from
3298 * frags into skb->data */ 3296 * frags into skb->data */
3299 hdr_len = ((skb->h.raw - skb->data) + (skb->h.th->doff << 2)); 3297 hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
3300 if (skb->data_len && (hdr_len == (skb->len - skb->data_len))) { 3298 if (skb->data_len && (hdr_len == (skb->len - skb->data_len))) {
3301 switch (adapter->hw.mac_type) { 3299 switch (adapter->hw.mac_type) {
3302 unsigned int pull_size; 3300 unsigned int pull_size;
@@ -3307,7 +3305,7 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
3307 * NOTE: this is a TSO only workaround 3305 * NOTE: this is a TSO only workaround
3308 * if end byte alignment not correct move us 3306 * if end byte alignment not correct move us
3309 * into the next dword */ 3307 * into the next dword */
3310 if ((unsigned long)(skb->tail - 1) & 4) 3308 if ((unsigned long)(skb_tail_pointer(skb) - 1) & 4)
3311 break; 3309 break;
3312 /* fall through */ 3310 /* fall through */
3313 case e1000_82571: 3311 case e1000_82571:
@@ -4227,9 +4225,12 @@ e1000_clean_rx_irq(struct e1000_adapter *adapter,
4227 netdev_alloc_skb(netdev, length + NET_IP_ALIGN); 4225 netdev_alloc_skb(netdev, length + NET_IP_ALIGN);
4228 if (new_skb) { 4226 if (new_skb) {
4229 skb_reserve(new_skb, NET_IP_ALIGN); 4227 skb_reserve(new_skb, NET_IP_ALIGN);
4230 memcpy(new_skb->data - NET_IP_ALIGN, 4228 skb_copy_to_linear_data_offset(new_skb,
4231 skb->data - NET_IP_ALIGN, 4229 -NET_IP_ALIGN,
4232 length + NET_IP_ALIGN); 4230 (skb->data -
4231 NET_IP_ALIGN),
4232 (length +
4233 NET_IP_ALIGN));
4233 /* save the skb in buffer_info as good */ 4234 /* save the skb in buffer_info as good */
4234 buffer_info->skb = skb; 4235 buffer_info->skb = skb;
4235 skb = new_skb; 4236 skb = new_skb;
@@ -4391,7 +4392,7 @@ e1000_clean_rx_irq_ps(struct e1000_adapter *adapter,
4391 PCI_DMA_FROMDEVICE); 4392 PCI_DMA_FROMDEVICE);
4392 vaddr = kmap_atomic(ps_page->ps_page[0], 4393 vaddr = kmap_atomic(ps_page->ps_page[0],
4393 KM_SKB_DATA_SOFTIRQ); 4394 KM_SKB_DATA_SOFTIRQ);
4394 memcpy(skb->tail, vaddr, l1); 4395 memcpy(skb_tail_pointer(skb), vaddr, l1);
4395 kunmap_atomic(vaddr, KM_SKB_DATA_SOFTIRQ); 4396 kunmap_atomic(vaddr, KM_SKB_DATA_SOFTIRQ);
4396 pci_dma_sync_single_for_device(pdev, 4397 pci_dma_sync_single_for_device(pdev,
4397 ps_page_dma->ps_page_dma[0], 4398 ps_page_dma->ps_page_dma[0],
diff --git a/drivers/net/eepro.c b/drivers/net/eepro.c
index b4463094c93a..39654e1e2bed 100644
--- a/drivers/net/eepro.c
+++ b/drivers/net/eepro.c
@@ -1591,7 +1591,6 @@ eepro_rx(struct net_device *dev)
1591 1591
1592 break; 1592 break;
1593 } 1593 }
1594 skb->dev = dev;
1595 skb_reserve(skb,2); 1594 skb_reserve(skb,2);
1596 1595
1597 if (lp->version == LAN595) 1596 if (lp->version == LAN595)
diff --git a/drivers/net/eepro100.c b/drivers/net/eepro100.c
index e28bb1e38f8d..6c267c38df97 100644
--- a/drivers/net/eepro100.c
+++ b/drivers/net/eepro100.c
@@ -1793,7 +1793,6 @@ speedo_rx(struct net_device *dev)
1793 copying to a properly sized skbuff. */ 1793 copying to a properly sized skbuff. */
1794 if (pkt_len < rx_copybreak 1794 if (pkt_len < rx_copybreak
1795 && (skb = dev_alloc_skb(pkt_len + 2)) != 0) { 1795 && (skb = dev_alloc_skb(pkt_len + 2)) != 0) {
1796 skb->dev = dev;
1797 skb_reserve(skb, 2); /* Align IP on 16 byte boundaries */ 1796 skb_reserve(skb, 2); /* Align IP on 16 byte boundaries */
1798 /* 'skb_put()' points to the start of sk_buff data area. */ 1797 /* 'skb_put()' points to the start of sk_buff data area. */
1799 pci_dma_sync_single_for_cpu(sp->pdev, sp->rx_ring_dma[entry], 1798 pci_dma_sync_single_for_cpu(sp->pdev, sp->rx_ring_dma[entry],
@@ -1805,8 +1804,9 @@ speedo_rx(struct net_device *dev)
1805 eth_copy_and_sum(skb, sp->rx_skbuff[entry]->data, pkt_len, 0); 1804 eth_copy_and_sum(skb, sp->rx_skbuff[entry]->data, pkt_len, 0);
1806 skb_put(skb, pkt_len); 1805 skb_put(skb, pkt_len);
1807#else 1806#else
1808 memcpy(skb_put(skb, pkt_len), sp->rx_skbuff[entry]->data, 1807 skb_copy_from_linear_data(sp->rx_skbuff[entry],
1809 pkt_len); 1808 skb_put(skb, pkt_len),
1809 pkt_len);
1810#endif 1810#endif
1811 pci_dma_sync_single_for_device(sp->pdev, sp->rx_ring_dma[entry], 1811 pci_dma_sync_single_for_device(sp->pdev, sp->rx_ring_dma[entry],
1812 sizeof(struct RxFD) + pkt_len, 1812 sizeof(struct RxFD) + pkt_len,
diff --git a/drivers/net/eexpress.c b/drivers/net/eexpress.c
index 3868b8031266..8aaf5ec0c360 100644
--- a/drivers/net/eexpress.c
+++ b/drivers/net/eexpress.c
@@ -976,7 +976,6 @@ static void eexp_hw_rx_pio(struct net_device *dev)
976 lp->stats.rx_dropped++; 976 lp->stats.rx_dropped++;
977 break; 977 break;
978 } 978 }
979 skb->dev = dev;
980 skb_reserve(skb, 2); 979 skb_reserve(skb, 2);
981 outw(pbuf+10, ioaddr+READ_PTR); 980 outw(pbuf+10, ioaddr+READ_PTR);
982 insw(ioaddr+DATAPORT, skb_put(skb,pkt_len),(pkt_len+1)>>1); 981 insw(ioaddr+DATAPORT, skb_put(skb,pkt_len),(pkt_len+1)>>1);
diff --git a/drivers/net/ehea/ehea_main.c b/drivers/net/ehea/ehea_main.c
index 0e4042bc0a48..58364a0ff378 100644
--- a/drivers/net/ehea/ehea_main.c
+++ b/drivers/net/ehea/ehea_main.c
@@ -391,8 +391,8 @@ static int ehea_poll(struct net_device *dev, int *budget)
391 if (!skb) 391 if (!skb)
392 break; 392 break;
393 } 393 }
394 memcpy(skb->data, ((char*)cqe) + 64, 394 skb_copy_to_linear_data(skb, ((char*)cqe) + 64,
395 cqe->num_bytes_transfered - 4); 395 cqe->num_bytes_transfered - 4);
396 ehea_fill_skb(dev, skb, cqe); 396 ehea_fill_skb(dev, skb, cqe);
397 } else if (rq == 2) { /* RQ2 */ 397 } else if (rq == 2) { /* RQ2 */
398 skb = get_skb_by_index(skb_arr_rq2, 398 skb = get_skb_by_index(skb_arr_rq2,
@@ -1262,8 +1262,8 @@ static int ehea_clean_portres(struct ehea_port *port, struct ehea_port_res *pr)
1262static inline void write_ip_start_end(struct ehea_swqe *swqe, 1262static inline void write_ip_start_end(struct ehea_swqe *swqe,
1263 const struct sk_buff *skb) 1263 const struct sk_buff *skb)
1264{ 1264{
1265 swqe->ip_start = (u8)(((u64)skb->nh.iph) - ((u64)skb->data)); 1265 swqe->ip_start = skb_network_offset(skb);
1266 swqe->ip_end = (u8)(swqe->ip_start + skb->nh.iph->ihl * 4 - 1); 1266 swqe->ip_end = (u8)(swqe->ip_start + ip_hdrlen(skb) - 1);
1267} 1267}
1268 1268
1269static inline void write_tcp_offset_end(struct ehea_swqe *swqe, 1269static inline void write_tcp_offset_end(struct ehea_swqe *swqe,
@@ -1300,13 +1300,13 @@ static void write_swqe2_TSO(struct sk_buff *skb,
1300 /* copy only eth/ip/tcp headers to immediate data and 1300 /* copy only eth/ip/tcp headers to immediate data and
1301 * the rest of skb->data to sg1entry 1301 * the rest of skb->data to sg1entry
1302 */ 1302 */
1303 headersize = ETH_HLEN + (skb->nh.iph->ihl * 4) + (skb->h.th->doff * 4); 1303 headersize = ETH_HLEN + ip_hdrlen(skb) + tcp_hdrlen(skb);
1304 1304
1305 skb_data_size = skb->len - skb->data_len; 1305 skb_data_size = skb->len - skb->data_len;
1306 1306
1307 if (skb_data_size >= headersize) { 1307 if (skb_data_size >= headersize) {
1308 /* copy immediate data */ 1308 /* copy immediate data */
1309 memcpy(imm_data, skb->data, headersize); 1309 skb_copy_from_linear_data(skb, imm_data, headersize);
1310 swqe->immediate_data_length = headersize; 1310 swqe->immediate_data_length = headersize;
1311 1311
1312 if (skb_data_size > headersize) { 1312 if (skb_data_size > headersize) {
@@ -1337,7 +1337,7 @@ static void write_swqe2_nonTSO(struct sk_buff *skb,
1337 */ 1337 */
1338 if (skb_data_size >= SWQE2_MAX_IMM) { 1338 if (skb_data_size >= SWQE2_MAX_IMM) {
1339 /* copy immediate data */ 1339 /* copy immediate data */
1340 memcpy(imm_data, skb->data, SWQE2_MAX_IMM); 1340 skb_copy_from_linear_data(skb, imm_data, SWQE2_MAX_IMM);
1341 1341
1342 swqe->immediate_data_length = SWQE2_MAX_IMM; 1342 swqe->immediate_data_length = SWQE2_MAX_IMM;
1343 1343
@@ -1350,7 +1350,7 @@ static void write_swqe2_nonTSO(struct sk_buff *skb,
1350 swqe->descriptors++; 1350 swqe->descriptors++;
1351 } 1351 }
1352 } else { 1352 } else {
1353 memcpy(imm_data, skb->data, skb_data_size); 1353 skb_copy_from_linear_data(skb, imm_data, skb_data_size);
1354 swqe->immediate_data_length = skb_data_size; 1354 swqe->immediate_data_length = skb_data_size;
1355 } 1355 }
1356} 1356}
@@ -1688,6 +1688,7 @@ static void ehea_xmit2(struct sk_buff *skb, struct net_device *dev,
1688 struct ehea_swqe *swqe, u32 lkey) 1688 struct ehea_swqe *swqe, u32 lkey)
1689{ 1689{
1690 if (skb->protocol == htons(ETH_P_IP)) { 1690 if (skb->protocol == htons(ETH_P_IP)) {
1691 const struct iphdr *iph = ip_hdr(skb);
1691 /* IPv4 */ 1692 /* IPv4 */
1692 swqe->tx_control |= EHEA_SWQE_CRC 1693 swqe->tx_control |= EHEA_SWQE_CRC
1693 | EHEA_SWQE_IP_CHECKSUM 1694 | EHEA_SWQE_IP_CHECKSUM
@@ -1697,15 +1698,15 @@ static void ehea_xmit2(struct sk_buff *skb, struct net_device *dev,
1697 1698
1698 write_ip_start_end(swqe, skb); 1699 write_ip_start_end(swqe, skb);
1699 1700
1700 if (skb->nh.iph->protocol == IPPROTO_UDP) { 1701 if (iph->protocol == IPPROTO_UDP) {
1701 if ((skb->nh.iph->frag_off & IP_MF) || 1702 if ((iph->frag_off & IP_MF) ||
1702 (skb->nh.iph->frag_off & IP_OFFSET)) 1703 (iph->frag_off & IP_OFFSET))
1703 /* IP fragment, so don't change cs */ 1704 /* IP fragment, so don't change cs */
1704 swqe->tx_control &= ~EHEA_SWQE_TCP_CHECKSUM; 1705 swqe->tx_control &= ~EHEA_SWQE_TCP_CHECKSUM;
1705 else 1706 else
1706 write_udp_offset_end(swqe, skb); 1707 write_udp_offset_end(swqe, skb);
1707 1708
1708 } else if (skb->nh.iph->protocol == IPPROTO_TCP) { 1709 } else if (iph->protocol == IPPROTO_TCP) {
1709 write_tcp_offset_end(swqe, skb); 1710 write_tcp_offset_end(swqe, skb);
1710 } 1711 }
1711 1712
@@ -1731,10 +1732,11 @@ static void ehea_xmit3(struct sk_buff *skb, struct net_device *dev,
1731 int i; 1732 int i;
1732 1733
1733 if (skb->protocol == htons(ETH_P_IP)) { 1734 if (skb->protocol == htons(ETH_P_IP)) {
1735 const struct iphdr *iph = ip_hdr(skb);
1734 /* IPv4 */ 1736 /* IPv4 */
1735 write_ip_start_end(swqe, skb); 1737 write_ip_start_end(swqe, skb);
1736 1738
1737 if (skb->nh.iph->protocol == IPPROTO_TCP) { 1739 if (iph->protocol == IPPROTO_TCP) {
1738 swqe->tx_control |= EHEA_SWQE_CRC 1740 swqe->tx_control |= EHEA_SWQE_CRC
1739 | EHEA_SWQE_IP_CHECKSUM 1741 | EHEA_SWQE_IP_CHECKSUM
1740 | EHEA_SWQE_TCP_CHECKSUM 1742 | EHEA_SWQE_TCP_CHECKSUM
@@ -1742,9 +1744,9 @@ static void ehea_xmit3(struct sk_buff *skb, struct net_device *dev,
1742 1744
1743 write_tcp_offset_end(swqe, skb); 1745 write_tcp_offset_end(swqe, skb);
1744 1746
1745 } else if (skb->nh.iph->protocol == IPPROTO_UDP) { 1747 } else if (iph->protocol == IPPROTO_UDP) {
1746 if ((skb->nh.iph->frag_off & IP_MF) || 1748 if ((iph->frag_off & IP_MF) ||
1747 (skb->nh.iph->frag_off & IP_OFFSET)) 1749 (iph->frag_off & IP_OFFSET))
1748 /* IP fragment, so don't change cs */ 1750 /* IP fragment, so don't change cs */
1749 swqe->tx_control |= EHEA_SWQE_CRC 1751 swqe->tx_control |= EHEA_SWQE_CRC
1750 | EHEA_SWQE_IMM_DATA_PRESENT; 1752 | EHEA_SWQE_IMM_DATA_PRESENT;
@@ -1770,10 +1772,11 @@ static void ehea_xmit3(struct sk_buff *skb, struct net_device *dev,
1770 /* copy (immediate) data */ 1772 /* copy (immediate) data */
1771 if (nfrags == 0) { 1773 if (nfrags == 0) {
1772 /* data is in a single piece */ 1774 /* data is in a single piece */
1773 memcpy(imm_data, skb->data, skb->len); 1775 skb_copy_from_linear_data(skb, imm_data, skb->len);
1774 } else { 1776 } else {
1775 /* first copy data from the skb->data buffer ... */ 1777 /* first copy data from the skb->data buffer ... */
1776 memcpy(imm_data, skb->data, skb->len - skb->data_len); 1778 skb_copy_from_linear_data(skb, imm_data,
1779 skb->len - skb->data_len);
1777 imm_data += skb->len - skb->data_len; 1780 imm_data += skb->len - skb->data_len;
1778 1781
1779 /* ... then copy data from the fragments */ 1782 /* ... then copy data from the fragments */
diff --git a/drivers/net/epic100.c b/drivers/net/epic100.c
index 3a6a83d3ee1c..4e3f14c9c717 100644
--- a/drivers/net/epic100.c
+++ b/drivers/net/epic100.c
@@ -934,7 +934,6 @@ static void epic_init_ring(struct net_device *dev)
934 ep->rx_skbuff[i] = skb; 934 ep->rx_skbuff[i] = skb;
935 if (skb == NULL) 935 if (skb == NULL)
936 break; 936 break;
937 skb->dev = dev; /* Mark as being used by this device. */
938 skb_reserve(skb, 2); /* 16 byte align the IP header. */ 937 skb_reserve(skb, 2); /* 16 byte align the IP header. */
939 ep->rx_ring[i].bufaddr = pci_map_single(ep->pci_dev, 938 ep->rx_ring[i].bufaddr = pci_map_single(ep->pci_dev,
940 skb->data, ep->rx_buf_sz, PCI_DMA_FROMDEVICE); 939 skb->data, ep->rx_buf_sz, PCI_DMA_FROMDEVICE);
@@ -1199,7 +1198,6 @@ static int epic_rx(struct net_device *dev, int budget)
1199 to a minimally-sized skbuff. */ 1198 to a minimally-sized skbuff. */
1200 if (pkt_len < rx_copybreak 1199 if (pkt_len < rx_copybreak
1201 && (skb = dev_alloc_skb(pkt_len + 2)) != NULL) { 1200 && (skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
1202 skb->dev = dev;
1203 skb_reserve(skb, 2); /* 16 byte align the IP header */ 1201 skb_reserve(skb, 2); /* 16 byte align the IP header */
1204 pci_dma_sync_single_for_cpu(ep->pci_dev, 1202 pci_dma_sync_single_for_cpu(ep->pci_dev,
1205 ep->rx_ring[entry].bufaddr, 1203 ep->rx_ring[entry].bufaddr,
@@ -1236,7 +1234,6 @@ static int epic_rx(struct net_device *dev, int budget)
1236 skb = ep->rx_skbuff[entry] = dev_alloc_skb(ep->rx_buf_sz); 1234 skb = ep->rx_skbuff[entry] = dev_alloc_skb(ep->rx_buf_sz);
1237 if (skb == NULL) 1235 if (skb == NULL)
1238 break; 1236 break;
1239 skb->dev = dev; /* Mark as being used by this device. */
1240 skb_reserve(skb, 2); /* Align IP on 16 byte boundaries */ 1237 skb_reserve(skb, 2); /* Align IP on 16 byte boundaries */
1241 ep->rx_ring[entry].bufaddr = pci_map_single(ep->pci_dev, 1238 ep->rx_ring[entry].bufaddr = pci_map_single(ep->pci_dev,
1242 skb->data, ep->rx_buf_sz, PCI_DMA_FROMDEVICE); 1239 skb->data, ep->rx_buf_sz, PCI_DMA_FROMDEVICE);
diff --git a/drivers/net/eth16i.c b/drivers/net/eth16i.c
index 93283e386f3a..04abf59e5007 100644
--- a/drivers/net/eth16i.c
+++ b/drivers/net/eth16i.c
@@ -1175,7 +1175,6 @@ static void eth16i_rx(struct net_device *dev)
1175 break; 1175 break;
1176 } 1176 }
1177 1177
1178 skb->dev = dev;
1179 skb_reserve(skb,2); 1178 skb_reserve(skb,2);
1180 1179
1181 /* 1180 /*
diff --git a/drivers/net/ewrk3.c b/drivers/net/ewrk3.c
index 714ea1176ec7..cb0792c187ba 100644
--- a/drivers/net/ewrk3.c
+++ b/drivers/net/ewrk3.c
@@ -993,7 +993,6 @@ static int ewrk3_rx(struct net_device *dev)
993 993
994 if ((skb = dev_alloc_skb(pkt_len + 2)) != NULL) { 994 if ((skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
995 unsigned char *p; 995 unsigned char *p;
996 skb->dev = dev;
997 skb_reserve(skb, 2); /* Align to 16 bytes */ 996 skb_reserve(skb, 2); /* Align to 16 bytes */
998 p = skb_put(skb, pkt_len); 997 p = skb_put(skb, pkt_len);
999 998
diff --git a/drivers/net/fealnx.c b/drivers/net/fealnx.c
index 38a13f440530..abe9b089c610 100644
--- a/drivers/net/fealnx.c
+++ b/drivers/net/fealnx.c
@@ -1719,7 +1719,6 @@ static int netdev_rx(struct net_device *dev)
1719 to a minimally-sized skbuff. */ 1719 to a minimally-sized skbuff. */
1720 if (pkt_len < rx_copybreak && 1720 if (pkt_len < rx_copybreak &&
1721 (skb = dev_alloc_skb(pkt_len + 2)) != NULL) { 1721 (skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
1722 skb->dev = dev;
1723 skb_reserve(skb, 2); /* 16 byte align the IP header */ 1722 skb_reserve(skb, 2); /* 16 byte align the IP header */
1724 pci_dma_sync_single_for_cpu(np->pci_dev, 1723 pci_dma_sync_single_for_cpu(np->pci_dev,
1725 np->cur_rx->buffer, 1724 np->cur_rx->buffer,
diff --git a/drivers/net/fec.c b/drivers/net/fec.c
index 6764281b4531..255b09124e11 100644
--- a/drivers/net/fec.c
+++ b/drivers/net/fec.c
@@ -647,7 +647,6 @@ while (!((status = bdp->cbd_sc) & BD_ENET_RX_EMPTY)) {
647 printk("%s: Memory squeeze, dropping packet.\n", dev->name); 647 printk("%s: Memory squeeze, dropping packet.\n", dev->name);
648 fep->stats.rx_dropped++; 648 fep->stats.rx_dropped++;
649 } else { 649 } else {
650 skb->dev = dev;
651 skb_put(skb,pkt_len-4); /* Make room */ 650 skb_put(skb,pkt_len-4); /* Make room */
652 eth_copy_and_sum(skb, data, pkt_len-4, 0); 651 eth_copy_and_sum(skb, data, pkt_len-4, 0);
653 skb->protocol=eth_type_trans(skb,dev); 652 skb->protocol=eth_type_trans(skb,dev);
diff --git a/drivers/net/fec_8xx/fec_main.c b/drivers/net/fec_8xx/fec_main.c
index 77f747a5afa7..e824d5d231af 100644
--- a/drivers/net/fec_8xx/fec_main.c
+++ b/drivers/net/fec_8xx/fec_main.c
@@ -551,7 +551,9 @@ static int fec_enet_rx_common(struct net_device *dev, int *budget)
551 skbn = dev_alloc_skb(pkt_len + 2); 551 skbn = dev_alloc_skb(pkt_len + 2);
552 if (skbn != NULL) { 552 if (skbn != NULL) {
553 skb_reserve(skbn, 2); /* align IP header */ 553 skb_reserve(skbn, 2); /* align IP header */
554 memcpy(skbn->data, skb->data, pkt_len); 554 skb_copy_from_linear_data(skb
555 skbn->data,
556 pkt_len);
555 /* swap */ 557 /* swap */
556 skbt = skb; 558 skbt = skb;
557 skb = skbn; 559 skb = skbn;
@@ -561,7 +563,6 @@ static int fec_enet_rx_common(struct net_device *dev, int *budget)
561 skbn = dev_alloc_skb(ENET_RX_FRSIZE); 563 skbn = dev_alloc_skb(ENET_RX_FRSIZE);
562 564
563 if (skbn != NULL) { 565 if (skbn != NULL) {
564 skb->dev = dev;
565 skb_put(skb, pkt_len); /* Make room */ 566 skb_put(skb, pkt_len); /* Make room */
566 skb->protocol = eth_type_trans(skb, dev); 567 skb->protocol = eth_type_trans(skb, dev);
567 received++; 568 received++;
diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c
index d04214e4e581..7a018027fcc0 100644
--- a/drivers/net/forcedeth.c
+++ b/drivers/net/forcedeth.c
@@ -1385,11 +1385,12 @@ static int nv_alloc_rx(struct net_device *dev)
1385 while (np->put_rx.orig != less_rx) { 1385 while (np->put_rx.orig != less_rx) {
1386 struct sk_buff *skb = dev_alloc_skb(np->rx_buf_sz + NV_RX_ALLOC_PAD); 1386 struct sk_buff *skb = dev_alloc_skb(np->rx_buf_sz + NV_RX_ALLOC_PAD);
1387 if (skb) { 1387 if (skb) {
1388 skb->dev = dev;
1389 np->put_rx_ctx->skb = skb; 1388 np->put_rx_ctx->skb = skb;
1390 np->put_rx_ctx->dma = pci_map_single(np->pci_dev, skb->data, 1389 np->put_rx_ctx->dma = pci_map_single(np->pci_dev,
1391 skb->end-skb->data, PCI_DMA_FROMDEVICE); 1390 skb->data,
1392 np->put_rx_ctx->dma_len = skb->end-skb->data; 1391 skb_tailroom(skb),
1392 PCI_DMA_FROMDEVICE);
1393 np->put_rx_ctx->dma_len = skb_tailroom(skb);
1393 np->put_rx.orig->buf = cpu_to_le32(np->put_rx_ctx->dma); 1394 np->put_rx.orig->buf = cpu_to_le32(np->put_rx_ctx->dma);
1394 wmb(); 1395 wmb();
1395 np->put_rx.orig->flaglen = cpu_to_le32(np->rx_buf_sz | NV_RX_AVAIL); 1396 np->put_rx.orig->flaglen = cpu_to_le32(np->rx_buf_sz | NV_RX_AVAIL);
@@ -1416,11 +1417,12 @@ static int nv_alloc_rx_optimized(struct net_device *dev)
1416 while (np->put_rx.ex != less_rx) { 1417 while (np->put_rx.ex != less_rx) {
1417 struct sk_buff *skb = dev_alloc_skb(np->rx_buf_sz + NV_RX_ALLOC_PAD); 1418 struct sk_buff *skb = dev_alloc_skb(np->rx_buf_sz + NV_RX_ALLOC_PAD);
1418 if (skb) { 1419 if (skb) {
1419 skb->dev = dev;
1420 np->put_rx_ctx->skb = skb; 1420 np->put_rx_ctx->skb = skb;
1421 np->put_rx_ctx->dma = pci_map_single(np->pci_dev, skb->data, 1421 np->put_rx_ctx->dma = pci_map_single(np->pci_dev,
1422 skb->end-skb->data, PCI_DMA_FROMDEVICE); 1422 skb->data,
1423 np->put_rx_ctx->dma_len = skb->end-skb->data; 1423 skb_tailroom(skb),
1424 PCI_DMA_FROMDEVICE);
1425 np->put_rx_ctx->dma_len = skb_tailroom(skb);
1424 np->put_rx.ex->bufhigh = cpu_to_le64(np->put_rx_ctx->dma) >> 32; 1426 np->put_rx.ex->bufhigh = cpu_to_le64(np->put_rx_ctx->dma) >> 32;
1425 np->put_rx.ex->buflow = cpu_to_le64(np->put_rx_ctx->dma) & 0x0FFFFFFFF; 1427 np->put_rx.ex->buflow = cpu_to_le64(np->put_rx_ctx->dma) & 0x0FFFFFFFF;
1426 wmb(); 1428 wmb();
@@ -1604,8 +1606,9 @@ static void nv_drain_rx(struct net_device *dev)
1604 wmb(); 1606 wmb();
1605 if (np->rx_skb[i].skb) { 1607 if (np->rx_skb[i].skb) {
1606 pci_unmap_single(np->pci_dev, np->rx_skb[i].dma, 1608 pci_unmap_single(np->pci_dev, np->rx_skb[i].dma,
1607 np->rx_skb[i].skb->end-np->rx_skb[i].skb->data, 1609 (skb_end_pointer(np->rx_skb[i].skb) -
1608 PCI_DMA_FROMDEVICE); 1610 np->rx_skb[i].skb->data),
1611 PCI_DMA_FROMDEVICE);
1609 dev_kfree_skb(np->rx_skb[i].skb); 1612 dev_kfree_skb(np->rx_skb[i].skb);
1610 np->rx_skb[i].skb = NULL; 1613 np->rx_skb[i].skb = NULL;
1611 } 1614 }
@@ -4376,11 +4379,12 @@ static int nv_loopback_test(struct net_device *dev)
4376 ret = 0; 4379 ret = 0;
4377 goto out; 4380 goto out;
4378 } 4381 }
4382 test_dma_addr = pci_map_single(np->pci_dev, tx_skb->data,
4383 skb_tailroom(tx_skb),
4384 PCI_DMA_FROMDEVICE);
4379 pkt_data = skb_put(tx_skb, pkt_len); 4385 pkt_data = skb_put(tx_skb, pkt_len);
4380 for (i = 0; i < pkt_len; i++) 4386 for (i = 0; i < pkt_len; i++)
4381 pkt_data[i] = (u8)(i & 0xff); 4387 pkt_data[i] = (u8)(i & 0xff);
4382 test_dma_addr = pci_map_single(np->pci_dev, tx_skb->data,
4383 tx_skb->end-tx_skb->data, PCI_DMA_FROMDEVICE);
4384 4388
4385 if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) { 4389 if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
4386 np->tx_ring.orig[0].buf = cpu_to_le32(test_dma_addr); 4390 np->tx_ring.orig[0].buf = cpu_to_le32(test_dma_addr);
@@ -4437,7 +4441,7 @@ static int nv_loopback_test(struct net_device *dev)
4437 } 4441 }
4438 4442
4439 pci_unmap_page(np->pci_dev, test_dma_addr, 4443 pci_unmap_page(np->pci_dev, test_dma_addr,
4440 tx_skb->end-tx_skb->data, 4444 (skb_end_pointer(tx_skb) - tx_skb->data),
4441 PCI_DMA_TODEVICE); 4445 PCI_DMA_TODEVICE);
4442 dev_kfree_skb_any(tx_skb); 4446 dev_kfree_skb_any(tx_skb);
4443 out: 4447 out:
diff --git a/drivers/net/fs_enet/fs_enet-main.c b/drivers/net/fs_enet/fs_enet-main.c
index 4a05c14bf7ec..e2ddd617493a 100644
--- a/drivers/net/fs_enet/fs_enet-main.c
+++ b/drivers/net/fs_enet/fs_enet-main.c
@@ -160,7 +160,8 @@ static int fs_enet_rx_napi(struct net_device *dev, int *budget)
160 skbn = dev_alloc_skb(pkt_len + 2); 160 skbn = dev_alloc_skb(pkt_len + 2);
161 if (skbn != NULL) { 161 if (skbn != NULL) {
162 skb_reserve(skbn, 2); /* align IP header */ 162 skb_reserve(skbn, 2); /* align IP header */
163 memcpy(skbn->data, skb->data, pkt_len); 163 skb_copy_from_linear_data(skb,
164 skbn->data, pkt_len);
164 /* swap */ 165 /* swap */
165 skbt = skb; 166 skbt = skb;
166 skb = skbn; 167 skb = skbn;
@@ -170,7 +171,6 @@ static int fs_enet_rx_napi(struct net_device *dev, int *budget)
170 skbn = dev_alloc_skb(ENET_RX_FRSIZE); 171 skbn = dev_alloc_skb(ENET_RX_FRSIZE);
171 172
172 if (skbn != NULL) { 173 if (skbn != NULL) {
173 skb->dev = dev;
174 skb_put(skb, pkt_len); /* Make room */ 174 skb_put(skb, pkt_len); /* Make room */
175 skb->protocol = eth_type_trans(skb, dev); 175 skb->protocol = eth_type_trans(skb, dev);
176 received++; 176 received++;
@@ -294,7 +294,8 @@ static int fs_enet_rx_non_napi(struct net_device *dev)
294 skbn = dev_alloc_skb(pkt_len + 2); 294 skbn = dev_alloc_skb(pkt_len + 2);
295 if (skbn != NULL) { 295 if (skbn != NULL) {
296 skb_reserve(skbn, 2); /* align IP header */ 296 skb_reserve(skbn, 2); /* align IP header */
297 memcpy(skbn->data, skb->data, pkt_len); 297 skb_copy_from_linear_data(skb,
298 skbn->data, pkt_len);
298 /* swap */ 299 /* swap */
299 skbt = skb; 300 skbt = skb;
300 skb = skbn; 301 skb = skbn;
@@ -304,7 +305,6 @@ static int fs_enet_rx_non_napi(struct net_device *dev)
304 skbn = dev_alloc_skb(ENET_RX_FRSIZE); 305 skbn = dev_alloc_skb(ENET_RX_FRSIZE);
305 306
306 if (skbn != NULL) { 307 if (skbn != NULL) {
307 skb->dev = dev;
308 skb_put(skb, pkt_len); /* Make room */ 308 skb_put(skb, pkt_len); /* Make room */
309 skb->protocol = eth_type_trans(skb, dev); 309 skb->protocol = eth_type_trans(skb, dev);
310 received++; 310 received++;
@@ -516,7 +516,6 @@ void fs_init_bds(struct net_device *dev)
516 break; 516 break;
517 } 517 }
518 fep->rx_skbuff[i] = skb; 518 fep->rx_skbuff[i] = skb;
519 skb->dev = dev;
520 CBDW_BUFADDR(bdp, 519 CBDW_BUFADDR(bdp,
521 dma_map_single(fep->dev, skb->data, 520 dma_map_single(fep->dev, skb->data,
522 L1_CACHE_ALIGN(PKT_MAXBUF_SIZE), 521 L1_CACHE_ALIGN(PKT_MAXBUF_SIZE),
diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c
index d981d4c41dd3..b666a0cc0642 100644
--- a/drivers/net/gianfar.c
+++ b/drivers/net/gianfar.c
@@ -942,18 +942,18 @@ static inline void gfar_tx_checksum(struct sk_buff *skb, struct txfcb *fcb)
942 942
943 /* Tell the controller what the protocol is */ 943 /* Tell the controller what the protocol is */
944 /* And provide the already calculated phcs */ 944 /* And provide the already calculated phcs */
945 if (skb->nh.iph->protocol == IPPROTO_UDP) { 945 if (ip_hdr(skb)->protocol == IPPROTO_UDP) {
946 flags |= TXFCB_UDP; 946 flags |= TXFCB_UDP;
947 fcb->phcs = skb->h.uh->check; 947 fcb->phcs = udp_hdr(skb)->check;
948 } else 948 } else
949 fcb->phcs = skb->h.th->check; 949 fcb->phcs = udp_hdr(skb)->check;
950 950
951 /* l3os is the distance between the start of the 951 /* l3os is the distance between the start of the
952 * frame (skb->data) and the start of the IP hdr. 952 * frame (skb->data) and the start of the IP hdr.
953 * l4os is the distance between the start of the 953 * l4os is the distance between the start of the
954 * l3 hdr and the l4 hdr */ 954 * l3 hdr and the l4 hdr */
955 fcb->l3os = (u16)(skb->nh.raw - skb->data - GMAC_FCB_LEN); 955 fcb->l3os = (u16)(skb_network_offset(skb) - GMAC_FCB_LEN);
956 fcb->l4os = (u16)(skb->h.raw - skb->nh.raw); 956 fcb->l4os = skb_network_header_len(skb);
957 957
958 fcb->flags = flags; 958 fcb->flags = flags;
959} 959}
@@ -1295,8 +1295,6 @@ struct sk_buff * gfar_new_skb(struct net_device *dev, struct rxbd8 *bdp)
1295 */ 1295 */
1296 skb_reserve(skb, alignamount); 1296 skb_reserve(skb, alignamount);
1297 1297
1298 skb->dev = dev;
1299
1300 bdp->bufPtr = dma_map_single(NULL, skb->data, 1298 bdp->bufPtr = dma_map_single(NULL, skb->data,
1301 priv->rx_buffer_size, DMA_FROM_DEVICE); 1299 priv->rx_buffer_size, DMA_FROM_DEVICE);
1302 1300
diff --git a/drivers/net/hamachi.c b/drivers/net/hamachi.c
index c3c0d67fc383..2521b111b3a5 100644
--- a/drivers/net/hamachi.c
+++ b/drivers/net/hamachi.c
@@ -1568,7 +1568,6 @@ static int hamachi_rx(struct net_device *dev)
1568 printk(KERN_ERR "%s: rx_copybreak non-zero " 1568 printk(KERN_ERR "%s: rx_copybreak non-zero "
1569 "not good with RX_CHECKSUM\n", dev->name); 1569 "not good with RX_CHECKSUM\n", dev->name);
1570#endif 1570#endif
1571 skb->dev = dev;
1572 skb_reserve(skb, 2); /* 16 byte align the IP header */ 1571 skb_reserve(skb, 2); /* 16 byte align the IP header */
1573 pci_dma_sync_single_for_cpu(hmp->pci_dev, 1572 pci_dma_sync_single_for_cpu(hmp->pci_dev,
1574 hmp->rx_ring[entry].addr, 1573 hmp->rx_ring[entry].addr,
diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c
index d2542697e298..656f2789c9ba 100644
--- a/drivers/net/hamradio/bpqether.c
+++ b/drivers/net/hamradio/bpqether.c
@@ -282,7 +282,7 @@ static int bpq_xmit(struct sk_buff *skb, struct net_device *dev)
282 } 282 }
283 283
284 skb->protocol = ax25_type_trans(skb, dev); 284 skb->protocol = ax25_type_trans(skb, dev);
285 skb->nh.raw = skb->data; 285 skb_reset_network_header(skb);
286 dev->hard_header(skb, dev, ETH_P_BPQ, bpq->dest_addr, NULL, 0); 286 dev->hard_header(skb, dev, ETH_P_BPQ, bpq->dest_addr, NULL, 0);
287 bpq->stats.tx_packets++; 287 bpq->stats.tx_packets++;
288 bpq->stats.tx_bytes+=skb->len; 288 bpq->stats.tx_bytes+=skb->len;
diff --git a/drivers/net/hamradio/dmascc.c b/drivers/net/hamradio/dmascc.c
index 0fbb414b5a4d..3be8c5047599 100644
--- a/drivers/net/hamradio/dmascc.c
+++ b/drivers/net/hamradio/dmascc.c
@@ -930,7 +930,7 @@ static int scc_send_packet(struct sk_buff *skb, struct net_device *dev)
930 930
931 /* Transfer data to DMA buffer */ 931 /* Transfer data to DMA buffer */
932 i = priv->tx_head; 932 i = priv->tx_head;
933 memcpy(priv->tx_buf[i], skb->data + 1, skb->len - 1); 933 skb_copy_from_linear_data_offset(skb, 1, priv->tx_buf[i], skb->len - 1);
934 priv->tx_len[i] = skb->len - 1; 934 priv->tx_len[i] = skb->len - 1;
935 935
936 /* Clear interrupts while we touch our circular buffers */ 936 /* Clear interrupts while we touch our circular buffers */
diff --git a/drivers/net/hamradio/hdlcdrv.c b/drivers/net/hamradio/hdlcdrv.c
index f5a17ad9d3d6..b33adc6a340b 100644
--- a/drivers/net/hamradio/hdlcdrv.c
+++ b/drivers/net/hamradio/hdlcdrv.c
@@ -317,7 +317,9 @@ void hdlcdrv_transmitter(struct net_device *dev, struct hdlcdrv_state *s)
317 dev_kfree_skb_irq(skb); 317 dev_kfree_skb_irq(skb);
318 break; 318 break;
319 } 319 }
320 memcpy(s->hdlctx.buffer, skb->data+1, pkt_len); 320 skb_copy_from_linear_data_offset(skb, 1,
321 s->hdlctx.buffer,
322 pkt_len);
321 dev_kfree_skb_irq(skb); 323 dev_kfree_skb_irq(skb);
322 s->hdlctx.bp = s->hdlctx.buffer; 324 s->hdlctx.bp = s->hdlctx.buffer;
323 append_crc_ccitt(s->hdlctx.buffer, pkt_len); 325 append_crc_ccitt(s->hdlctx.buffer, pkt_len);
diff --git a/drivers/net/hamradio/yam.c b/drivers/net/hamradio/yam.c
index ee3ea4fa729f..467559debfd6 100644
--- a/drivers/net/hamradio/yam.c
+++ b/drivers/net/hamradio/yam.c
@@ -638,7 +638,9 @@ static void yam_tx_byte(struct net_device *dev, struct yam_port *yp)
638 dev_kfree_skb_any(skb); 638 dev_kfree_skb_any(skb);
639 break; 639 break;
640 } 640 }
641 memcpy(yp->tx_buf, skb->data + 1, yp->tx_len); 641 skb_copy_from_linear_data_offset(skb, 1,
642 yp->tx_buf,
643 yp->tx_len);
642 dev_kfree_skb_any(skb); 644 dev_kfree_skb_any(skb);
643 yp->tx_count = 0; 645 yp->tx_count = 0;
644 yp->tx_crcl = 0x21; 646 yp->tx_crcl = 0x21;
diff --git a/drivers/net/hp100.c b/drivers/net/hp100.c
index 7dc5185aa2c0..8118a6750b61 100644
--- a/drivers/net/hp100.c
+++ b/drivers/net/hp100.c
@@ -1816,7 +1816,6 @@ static void hp100_rx(struct net_device *dev)
1816 u_char *ptr; 1816 u_char *ptr;
1817 1817
1818 skb_reserve(skb,2); 1818 skb_reserve(skb,2);
1819 skb->dev = dev;
1820 1819
1821 /* ptr to start of the sk_buff data area */ 1820 /* ptr to start of the sk_buff data area */
1822 skb_put(skb, pkt_len); 1821 skb_put(skb, pkt_len);
diff --git a/drivers/net/ibm_emac/ibm_emac_core.c b/drivers/net/ibm_emac/ibm_emac_core.c
index dd8ad8746825..3d82d46f4998 100644
--- a/drivers/net/ibm_emac/ibm_emac_core.c
+++ b/drivers/net/ibm_emac/ibm_emac_core.c
@@ -1338,7 +1338,7 @@ static inline int emac_rx_sg_append(struct ocp_enet_private *dev, int slot)
1338 dev_kfree_skb(dev->rx_sg_skb); 1338 dev_kfree_skb(dev->rx_sg_skb);
1339 dev->rx_sg_skb = NULL; 1339 dev->rx_sg_skb = NULL;
1340 } else { 1340 } else {
1341 cacheable_memcpy(dev->rx_sg_skb->tail, 1341 cacheable_memcpy(skb_tail_pointer(dev->rx_sg_skb),
1342 dev->rx_skb[slot]->data, len); 1342 dev->rx_skb[slot]->data, len);
1343 skb_put(dev->rx_sg_skb, len); 1343 skb_put(dev->rx_sg_skb, len);
1344 emac_recycle_rx_skb(dev, slot, len); 1344 emac_recycle_rx_skb(dev, slot, len);
@@ -1398,7 +1398,6 @@ static int emac_poll_rx(void *param, int budget)
1398 1398
1399 skb_put(skb, len); 1399 skb_put(skb, len);
1400 push_packet: 1400 push_packet:
1401 skb->dev = dev->ndev;
1402 skb->protocol = eth_type_trans(skb, dev->ndev); 1401 skb->protocol = eth_type_trans(skb, dev->ndev);
1403 emac_rx_csum(dev, skb, ctrl); 1402 emac_rx_csum(dev, skb, ctrl);
1404 1403
diff --git a/drivers/net/ibmlana.c b/drivers/net/ibmlana.c
index 3f946c811511..fe85d6fcba33 100644
--- a/drivers/net/ibmlana.c
+++ b/drivers/net/ibmlana.c
@@ -601,7 +601,6 @@ static void irqrx_handler(struct net_device *dev)
601 601
602 /* set up skb fields */ 602 /* set up skb fields */
603 603
604 skb->dev = dev;
605 skb->protocol = eth_type_trans(skb, dev); 604 skb->protocol = eth_type_trans(skb, dev);
606 skb->ip_summed = CHECKSUM_NONE; 605 skb->ip_summed = CHECKSUM_NONE;
607 606
diff --git a/drivers/net/ibmveth.c b/drivers/net/ibmveth.c
index 458db0538a9a..0573fcfcb2c4 100644
--- a/drivers/net/ibmveth.c
+++ b/drivers/net/ibmveth.c
@@ -798,7 +798,6 @@ static int ibmveth_poll(struct net_device *netdev, int *budget)
798 798
799 skb_reserve(skb, offset); 799 skb_reserve(skb, offset);
800 skb_put(skb, length); 800 skb_put(skb, length);
801 skb->dev = netdev;
802 skb->protocol = eth_type_trans(skb, netdev); 801 skb->protocol = eth_type_trans(skb, netdev);
803 802
804 netif_receive_skb(skb); /* send it up */ 803 netif_receive_skb(skb); /* send it up */
diff --git a/drivers/net/ioc3-eth.c b/drivers/net/ioc3-eth.c
index 4ad780719a84..f749e07c6425 100644
--- a/drivers/net/ioc3-eth.c
+++ b/drivers/net/ioc3-eth.c
@@ -633,8 +633,6 @@ static inline void ioc3_rx(struct ioc3_private *ip)
633 633
634 ip->rx_skbs[rx_entry] = NULL; /* Poison */ 634 ip->rx_skbs[rx_entry] = NULL; /* Poison */
635 635
636 new_skb->dev = priv_netdev(ip);
637
638 /* Because we reserve afterwards. */ 636 /* Because we reserve afterwards. */
639 skb_put(new_skb, (1664 + RX_OFFSET)); 637 skb_put(new_skb, (1664 + RX_OFFSET));
640 rxb = (struct ioc3_erxbuf *) new_skb->data; 638 rxb = (struct ioc3_erxbuf *) new_skb->data;
@@ -940,7 +938,6 @@ static void ioc3_alloc_rings(struct net_device *dev)
940 } 938 }
941 939
942 ip->rx_skbs[i] = skb; 940 ip->rx_skbs[i] = skb;
943 skb->dev = dev;
944 941
945 /* Because we reserve afterwards. */ 942 /* Because we reserve afterwards. */
946 skb_put(skb, (1664 + RX_OFFSET)); 943 skb_put(skb, (1664 + RX_OFFSET));
@@ -1396,9 +1393,9 @@ static int ioc3_start_xmit(struct sk_buff *skb, struct net_device *dev)
1396 * manually. 1393 * manually.
1397 */ 1394 */
1398 if (skb->ip_summed == CHECKSUM_PARTIAL) { 1395 if (skb->ip_summed == CHECKSUM_PARTIAL) {
1399 int proto = ntohs(skb->nh.iph->protocol); 1396 const struct iphdr *ih = ip_hdr(skb);
1397 const int proto = ntohs(ih->protocol);
1400 unsigned int csoff; 1398 unsigned int csoff;
1401 struct iphdr *ih = skb->nh.iph;
1402 uint32_t csum, ehsum; 1399 uint32_t csum, ehsum;
1403 uint16_t *eh; 1400 uint16_t *eh;
1404 1401
@@ -1425,11 +1422,11 @@ static int ioc3_start_xmit(struct sk_buff *skb, struct net_device *dev)
1425 csoff = ETH_HLEN + (ih->ihl << 2); 1422 csoff = ETH_HLEN + (ih->ihl << 2);
1426 if (proto == IPPROTO_UDP) { 1423 if (proto == IPPROTO_UDP) {
1427 csoff += offsetof(struct udphdr, check); 1424 csoff += offsetof(struct udphdr, check);
1428 skb->h.uh->check = csum; 1425 udp_hdr(skb)->check = csum;
1429 } 1426 }
1430 if (proto == IPPROTO_TCP) { 1427 if (proto == IPPROTO_TCP) {
1431 csoff += offsetof(struct tcphdr, check); 1428 csoff += offsetof(struct tcphdr, check);
1432 skb->h.th->check = csum; 1429 tcp_hdr(skb)->check = csum;
1433 } 1430 }
1434 1431
1435 w0 = ETXD_DOCHECKSUM | (csoff << ETXD_CHKOFF_SHIFT); 1432 w0 = ETXD_DOCHECKSUM | (csoff << ETXD_CHKOFF_SHIFT);
@@ -1446,7 +1443,7 @@ static int ioc3_start_xmit(struct sk_buff *skb, struct net_device *dev)
1446 1443
1447 if (len <= 104) { 1444 if (len <= 104) {
1448 /* Short packet, let's copy it directly into the ring. */ 1445 /* Short packet, let's copy it directly into the ring. */
1449 memcpy(desc->data, skb->data, skb->len); 1446 skb_copy_from_linear_data(skb, desc->data, skb->len);
1450 if (len < ETH_ZLEN) { 1447 if (len < ETH_ZLEN) {
1451 /* Very short packet, pad with zeros at the end. */ 1448 /* Very short packet, pad with zeros at the end. */
1452 memset(desc->data + len, 0, ETH_ZLEN - len); 1449 memset(desc->data + len, 0, ETH_ZLEN - len);
diff --git a/drivers/net/irda/ali-ircc.c b/drivers/net/irda/ali-ircc.c
index cebf8c374bc5..f9c889c0dd07 100644
--- a/drivers/net/irda/ali-ircc.c
+++ b/drivers/net/irda/ali-ircc.c
@@ -1472,9 +1472,8 @@ static int ali_ircc_fir_hard_xmit(struct sk_buff *skb, struct net_device *dev)
1472 1472
1473 self->stats.tx_bytes += skb->len; 1473 self->stats.tx_bytes += skb->len;
1474 1474
1475 memcpy(self->tx_fifo.queue[self->tx_fifo.free].start, skb->data, 1475 skb_copy_from_linear_data(skb, self->tx_fifo.queue[self->tx_fifo.free].start,
1476 skb->len); 1476 skb->len);
1477
1478 self->tx_fifo.len++; 1477 self->tx_fifo.len++;
1479 self->tx_fifo.free++; 1478 self->tx_fifo.free++;
1480 1479
@@ -1924,7 +1923,7 @@ static int ali_ircc_dma_receive_complete(struct ali_ircc_cb *self)
1924 1923
1925 /* Copy frame without CRC, CRC is removed by hardware*/ 1924 /* Copy frame without CRC, CRC is removed by hardware*/
1926 skb_put(skb, len); 1925 skb_put(skb, len);
1927 memcpy(skb->data, self->rx_buff.data, len); 1926 skb_copy_to_linear_data(skb, self->rx_buff.data, len);
1928 1927
1929 /* Move to next frame */ 1928 /* Move to next frame */
1930 self->rx_buff.data += len; 1929 self->rx_buff.data += len;
@@ -1932,7 +1931,7 @@ static int ali_ircc_dma_receive_complete(struct ali_ircc_cb *self)
1932 self->stats.rx_packets++; 1931 self->stats.rx_packets++;
1933 1932
1934 skb->dev = self->netdev; 1933 skb->dev = self->netdev;
1935 skb->mac.raw = skb->data; 1934 skb_reset_mac_header(skb);
1936 skb->protocol = htons(ETH_P_IRDA); 1935 skb->protocol = htons(ETH_P_IRDA);
1937 netif_rx(skb); 1936 netif_rx(skb);
1938 self->netdev->last_rx = jiffies; 1937 self->netdev->last_rx = jiffies;
diff --git a/drivers/net/irda/au1k_ir.c b/drivers/net/irda/au1k_ir.c
index 37914dc5b90e..4dbdfaaf37bf 100644
--- a/drivers/net/irda/au1k_ir.c
+++ b/drivers/net/irda/au1k_ir.c
@@ -526,7 +526,7 @@ static int au1k_irda_hard_xmit(struct sk_buff *skb, struct net_device *dev)
526 526
527 if (aup->speed == 4000000) { 527 if (aup->speed == 4000000) {
528 /* FIR */ 528 /* FIR */
529 memcpy((void *)pDB->vaddr, skb->data, skb->len); 529 skb_copy_from_linear_data(skb, pDB->vaddr, skb->len);
530 ptxd->count_0 = skb->len & 0xff; 530 ptxd->count_0 = skb->len & 0xff;
531 ptxd->count_1 = (skb->len >> 8) & 0xff; 531 ptxd->count_1 = (skb->len >> 8) & 0xff;
532 532
@@ -604,9 +604,9 @@ static int au1k_irda_rx(struct net_device *dev)
604 skb_put(skb, count); 604 skb_put(skb, count);
605 else 605 else
606 skb_put(skb, count-2); 606 skb_put(skb, count-2);
607 memcpy(skb->data, (void *)pDB->vaddr, count-2); 607 skb_copy_to_linear_data(skb, pDB->vaddr, count - 2);
608 skb->dev = dev; 608 skb->dev = dev;
609 skb->mac.raw = skb->data; 609 skb_reset_mac_header(skb);
610 skb->protocol = htons(ETH_P_IRDA); 610 skb->protocol = htons(ETH_P_IRDA);
611 netif_rx(skb); 611 netif_rx(skb);
612 prxd->count_0 = 0; 612 prxd->count_0 = 0;
diff --git a/drivers/net/irda/donauboe.c b/drivers/net/irda/donauboe.c
index 11af0ae7510e..3ca47bf6dfec 100644
--- a/drivers/net/irda/donauboe.c
+++ b/drivers/net/irda/donauboe.c
@@ -1119,7 +1119,7 @@ dumpbufs(skb->data,skb->len,'>');
1119 else 1119 else
1120 { 1120 {
1121 len = skb->len; 1121 len = skb->len;
1122 memcpy (self->tx_bufs[self->txs], skb->data, len); 1122 skb_copy_from_linear_data(skb, self->tx_bufs[self->txs], len);
1123 } 1123 }
1124 self->ring->tx[self->txs].len = len & 0x0fff; 1124 self->ring->tx[self->txs].len = len & 0x0fff;
1125 1125
@@ -1282,11 +1282,11 @@ dumpbufs(self->rx_bufs[self->rxs],len,'<');
1282 skb_reserve (skb, 1); 1282 skb_reserve (skb, 1);
1283 1283
1284 skb_put (skb, len); 1284 skb_put (skb, len);
1285 memcpy (skb->data, self->rx_bufs[self->rxs], len); 1285 skb_copy_to_linear_data(skb, self->rx_bufs[self->rxs],
1286 1286 len);
1287 self->stats.rx_packets++; 1287 self->stats.rx_packets++;
1288 skb->dev = self->netdev; 1288 skb->dev = self->netdev;
1289 skb->mac.raw = skb->data; 1289 skb_reset_mac_header(skb);
1290 skb->protocol = htons (ETH_P_IRDA); 1290 skb->protocol = htons (ETH_P_IRDA);
1291 } 1291 }
1292 else 1292 else
diff --git a/drivers/net/irda/irda-usb.c b/drivers/net/irda/irda-usb.c
index 1d510bdc9b84..0ac240ca905b 100644
--- a/drivers/net/irda/irda-usb.c
+++ b/drivers/net/irda/irda-usb.c
@@ -441,7 +441,7 @@ static int irda_usb_hard_xmit(struct sk_buff *skb, struct net_device *netdev)
441 goto drop; 441 goto drop;
442 } 442 }
443 443
444 memcpy(self->tx_buff + self->header_length, skb->data, skb->len); 444 skb_copy_from_linear_data(skb, self->tx_buff + self->header_length, skb->len);
445 445
446 /* Change setting for next frame */ 446 /* Change setting for next frame */
447 if (self->capability & IUC_STIR421X) { 447 if (self->capability & IUC_STIR421X) {
@@ -902,7 +902,7 @@ static void irda_usb_receive(struct urb *urb)
902 902
903 if(docopy) { 903 if(docopy) {
904 /* Copy packet, so we can recycle the original */ 904 /* Copy packet, so we can recycle the original */
905 memcpy(newskb->data, skb->data, urb->actual_length); 905 skb_copy_from_linear_data(skb, newskb->data, urb->actual_length);
906 /* Deliver this new skb */ 906 /* Deliver this new skb */
907 dataskb = newskb; 907 dataskb = newskb;
908 /* And hook the old skb to the URB 908 /* And hook the old skb to the URB
@@ -921,7 +921,7 @@ static void irda_usb_receive(struct urb *urb)
921 921
922 /* Ask the networking layer to queue the packet for the IrDA stack */ 922 /* Ask the networking layer to queue the packet for the IrDA stack */
923 dataskb->dev = self->netdev; 923 dataskb->dev = self->netdev;
924 dataskb->mac.raw = dataskb->data; 924 skb_reset_mac_header(dataskb);
925 dataskb->protocol = htons(ETH_P_IRDA); 925 dataskb->protocol = htons(ETH_P_IRDA);
926 len = dataskb->len; 926 len = dataskb->len;
927 netif_rx(dataskb); 927 netif_rx(dataskb);
diff --git a/drivers/net/irda/mcs7780.c b/drivers/net/irda/mcs7780.c
index f0c61f3b2a82..0de867288a47 100644
--- a/drivers/net/irda/mcs7780.c
+++ b/drivers/net/irda/mcs7780.c
@@ -200,14 +200,14 @@ static inline int mcs_setup_transceiver_vishay(struct mcs_cb *mcs)
200/* Setup a communication between mcs7780 and agilent chip. */ 200/* Setup a communication between mcs7780 and agilent chip. */
201static inline int mcs_setup_transceiver_agilent(struct mcs_cb *mcs) 201static inline int mcs_setup_transceiver_agilent(struct mcs_cb *mcs)
202{ 202{
203 IRDA_WARNING("This transceiver type is not supported yet."); 203 IRDA_WARNING("This transceiver type is not supported yet.\n");
204 return 1; 204 return 1;
205} 205}
206 206
207/* Setup a communication between mcs7780 and sharp chip. */ 207/* Setup a communication between mcs7780 and sharp chip. */
208static inline int mcs_setup_transceiver_sharp(struct mcs_cb *mcs) 208static inline int mcs_setup_transceiver_sharp(struct mcs_cb *mcs)
209{ 209{
210 IRDA_WARNING("This transceiver type is not supported yet."); 210 IRDA_WARNING("This transceiver type is not supported yet.\n");
211 return 1; 211 return 1;
212} 212}
213 213
@@ -279,7 +279,7 @@ static inline int mcs_setup_transceiver(struct mcs_cb *mcs)
279 break; 279 break;
280 280
281 default: 281 default:
282 IRDA_WARNING("Unknown transceiver type: %d", 282 IRDA_WARNING("Unknown transceiver type: %d\n",
283 mcs->transceiver_type); 283 mcs->transceiver_type);
284 ret = 1; 284 ret = 1;
285 } 285 }
@@ -318,7 +318,7 @@ static inline int mcs_setup_transceiver(struct mcs_cb *mcs)
318 return ret; 318 return ret;
319 319
320error: 320error:
321 IRDA_ERROR("%s", msg); 321 IRDA_ERROR("%s\n", msg);
322 return ret; 322 return ret;
323} 323}
324 324
@@ -353,7 +353,7 @@ static unsigned mcs_wrap_fir_skb(const struct sk_buff *skb, __u8 *buf)
353 buf[0] = len & 0xff; 353 buf[0] = len & 0xff;
354 buf[1] = (len >> 8) & 0xff; 354 buf[1] = (len >> 8) & 0xff;
355 /* copy the data into the tx buffer. */ 355 /* copy the data into the tx buffer. */
356 memcpy(buf+2, skb->data, skb->len); 356 skb_copy_from_linear_data(skb, buf + 2, skb->len);
357 /* put the fcs in the last four bytes in little endian order. */ 357 /* put the fcs in the last four bytes in little endian order. */
358 buf[len - 4] = fcs & 0xff; 358 buf[len - 4] = fcs & 0xff;
359 buf[len - 3] = (fcs >> 8) & 0xff; 359 buf[len - 3] = (fcs >> 8) & 0xff;
@@ -377,7 +377,7 @@ static unsigned mcs_wrap_mir_skb(const struct sk_buff *skb, __u8 *buf)
377 buf[0] = len & 0xff; 377 buf[0] = len & 0xff;
378 buf[1] = (len >> 8) & 0xff; 378 buf[1] = (len >> 8) & 0xff;
379 /* copy the data */ 379 /* copy the data */
380 memcpy(buf+2, skb->data, skb->len); 380 skb_copy_from_linear_data(skb, buf + 2, skb->len);
381 /* put the fcs in last two bytes in little endian order. */ 381 /* put the fcs in last two bytes in little endian order. */
382 buf[len - 2] = fcs & 0xff; 382 buf[len - 2] = fcs & 0xff;
383 buf[len - 1] = (fcs >> 8) & 0xff; 383 buf[len - 1] = (fcs >> 8) & 0xff;
@@ -426,9 +426,9 @@ static void mcs_unwrap_mir(struct mcs_cb *mcs, __u8 *buf, int len)
426 } 426 }
427 427
428 skb_reserve(skb, 1); 428 skb_reserve(skb, 1);
429 memcpy(skb->data, buf, new_len); 429 skb_copy_to_linear_data(skb, buf, new_len);
430 skb_put(skb, new_len); 430 skb_put(skb, new_len);
431 skb->mac.raw = skb->data; 431 skb_reset_mac_header(skb);
432 skb->protocol = htons(ETH_P_IRDA); 432 skb->protocol = htons(ETH_P_IRDA);
433 skb->dev = mcs->netdev; 433 skb->dev = mcs->netdev;
434 434
@@ -479,9 +479,9 @@ static void mcs_unwrap_fir(struct mcs_cb *mcs, __u8 *buf, int len)
479 } 479 }
480 480
481 skb_reserve(skb, 1); 481 skb_reserve(skb, 1);
482 memcpy(skb->data, buf, new_len); 482 skb_copy_to_linear_data(skb, buf, new_len);
483 skb_put(skb, new_len); 483 skb_put(skb, new_len);
484 skb->mac.raw = skb->data; 484 skb_reset_mac_header(skb);
485 skb->protocol = htons(ETH_P_IRDA); 485 skb->protocol = htons(ETH_P_IRDA);
486 skb->dev = mcs->netdev; 486 skb->dev = mcs->netdev;
487 487
@@ -587,7 +587,7 @@ static int mcs_speed_change(struct mcs_cb *mcs)
587 } while(cnt++ < 100 && (rval & MCS_IRINTX)); 587 } while(cnt++ < 100 && (rval & MCS_IRINTX));
588 588
589 if(cnt >= 100) { 589 if(cnt >= 100) {
590 IRDA_ERROR("unable to change speed"); 590 IRDA_ERROR("unable to change speed\n");
591 ret = -EIO; 591 ret = -EIO;
592 goto error; 592 goto error;
593 } 593 }
@@ -638,7 +638,7 @@ static int mcs_speed_change(struct mcs_cb *mcs)
638 638
639 default: 639 default:
640 ret = 1; 640 ret = 1;
641 IRDA_WARNING("Unknown transceiver type: %d", 641 IRDA_WARNING("Unknown transceiver type: %d\n",
642 mcs->transceiver_type); 642 mcs->transceiver_type);
643 } 643 }
644 if (unlikely(ret)) 644 if (unlikely(ret))
@@ -733,7 +733,7 @@ static int mcs_net_open(struct net_device *netdev)
733 sprintf(hwname, "usb#%d", mcs->usbdev->devnum); 733 sprintf(hwname, "usb#%d", mcs->usbdev->devnum);
734 mcs->irlap = irlap_open(netdev, &mcs->qos, hwname); 734 mcs->irlap = irlap_open(netdev, &mcs->qos, hwname);
735 if (!mcs->irlap) { 735 if (!mcs->irlap) {
736 IRDA_ERROR("mcs7780: irlap_open failed"); 736 IRDA_ERROR("mcs7780: irlap_open failed\n");
737 goto error2; 737 goto error2;
738 } 738 }
739 739
@@ -862,7 +862,7 @@ static int mcs_hard_xmit(struct sk_buff *skb, struct net_device *ndev)
862 mcs->out_buf, wraplen, mcs_send_irq, mcs); 862 mcs->out_buf, wraplen, mcs_send_irq, mcs);
863 863
864 if ((ret = usb_submit_urb(mcs->tx_urb, GFP_ATOMIC))) { 864 if ((ret = usb_submit_urb(mcs->tx_urb, GFP_ATOMIC))) {
865 IRDA_ERROR("failed tx_urb: %d", ret); 865 IRDA_ERROR("failed tx_urb: %d\n", ret);
866 switch (ret) { 866 switch (ret) {
867 case -ENODEV: 867 case -ENODEV:
868 case -EPIPE: 868 case -EPIPE:
@@ -897,7 +897,7 @@ static int mcs_probe(struct usb_interface *intf,
897 if (!ndev) 897 if (!ndev)
898 goto error1; 898 goto error1;
899 899
900 IRDA_DEBUG(1, "MCS7780 USB-IrDA bridge found at %d.", udev->devnum); 900 IRDA_DEBUG(1, "MCS7780 USB-IrDA bridge found at %d.\n", udev->devnum);
901 901
902 /* what is it realy for? */ 902 /* what is it realy for? */
903 SET_MODULE_OWNER(ndev); 903 SET_MODULE_OWNER(ndev);
@@ -905,7 +905,7 @@ static int mcs_probe(struct usb_interface *intf,
905 905
906 ret = usb_reset_configuration(udev); 906 ret = usb_reset_configuration(udev);
907 if (ret != 0) { 907 if (ret != 0) {
908 IRDA_ERROR("mcs7780: usb reset configuration failed"); 908 IRDA_ERROR("mcs7780: usb reset configuration failed\n");
909 goto error2; 909 goto error2;
910 } 910 }
911 911
@@ -950,7 +950,7 @@ static int mcs_probe(struct usb_interface *intf,
950 if (ret != 0) 950 if (ret != 0)
951 goto error2; 951 goto error2;
952 952
953 IRDA_DEBUG(1, "IrDA: Registered MosChip MCS7780 device as %s", 953 IRDA_DEBUG(1, "IrDA: Registered MosChip MCS7780 device as %s\n",
954 ndev->name); 954 ndev->name);
955 955
956 mcs->transceiver_type = transceiver_type; 956 mcs->transceiver_type = transceiver_type;
@@ -981,7 +981,7 @@ static void mcs_disconnect(struct usb_interface *intf)
981 free_netdev(mcs->netdev); 981 free_netdev(mcs->netdev);
982 982
983 usb_set_intfdata(intf, NULL); 983 usb_set_intfdata(intf, NULL);
984 IRDA_DEBUG(0, "MCS7780 now disconnected."); 984 IRDA_DEBUG(0, "MCS7780 now disconnected.\n");
985} 985}
986 986
987/* Module insertion */ 987/* Module insertion */
@@ -992,7 +992,7 @@ static int __init mcs_init(void)
992 /* register this driver with the USB subsystem */ 992 /* register this driver with the USB subsystem */
993 result = usb_register(&mcs_driver); 993 result = usb_register(&mcs_driver);
994 if (result) 994 if (result)
995 IRDA_ERROR("usb_register failed. Error number %d", result); 995 IRDA_ERROR("usb_register failed. Error number %d\n", result);
996 996
997 return result; 997 return result;
998} 998}
diff --git a/drivers/net/irda/nsc-ircc.c b/drivers/net/irda/nsc-ircc.c
index 29b5ccd29d0b..d96c89751a71 100644
--- a/drivers/net/irda/nsc-ircc.c
+++ b/drivers/net/irda/nsc-ircc.c
@@ -1466,9 +1466,8 @@ static int nsc_ircc_hard_xmit_fir(struct sk_buff *skb, struct net_device *dev)
1466 1466
1467 self->stats.tx_bytes += skb->len; 1467 self->stats.tx_bytes += skb->len;
1468 1468
1469 memcpy(self->tx_fifo.queue[self->tx_fifo.free].start, skb->data, 1469 skb_copy_from_linear_data(skb, self->tx_fifo.queue[self->tx_fifo.free].start,
1470 skb->len); 1470 skb->len);
1471
1472 self->tx_fifo.len++; 1471 self->tx_fifo.len++;
1473 self->tx_fifo.free++; 1472 self->tx_fifo.free++;
1474 1473
@@ -1869,10 +1868,14 @@ static int nsc_ircc_dma_receive_complete(struct nsc_ircc_cb *self, int iobase)
1869 /* Copy frame without CRC */ 1868 /* Copy frame without CRC */
1870 if (self->io.speed < 4000000) { 1869 if (self->io.speed < 4000000) {
1871 skb_put(skb, len-2); 1870 skb_put(skb, len-2);
1872 memcpy(skb->data, self->rx_buff.data, len-2); 1871 skb_copy_to_linear_data(skb,
1872 self->rx_buff.data,
1873 len - 2);
1873 } else { 1874 } else {
1874 skb_put(skb, len-4); 1875 skb_put(skb, len-4);
1875 memcpy(skb->data, self->rx_buff.data, len-4); 1876 skb_copy_to_linear_data(skb,
1877 self->rx_buff.data,
1878 len - 4);
1876 } 1879 }
1877 1880
1878 /* Move to next frame */ 1881 /* Move to next frame */
@@ -1881,7 +1884,7 @@ static int nsc_ircc_dma_receive_complete(struct nsc_ircc_cb *self, int iobase)
1881 self->stats.rx_packets++; 1884 self->stats.rx_packets++;
1882 1885
1883 skb->dev = self->netdev; 1886 skb->dev = self->netdev;
1884 skb->mac.raw = skb->data; 1887 skb_reset_mac_header(skb);
1885 skb->protocol = htons(ETH_P_IRDA); 1888 skb->protocol = htons(ETH_P_IRDA);
1886 netif_rx(skb); 1889 netif_rx(skb);
1887 self->netdev->last_rx = jiffies; 1890 self->netdev->last_rx = jiffies;
diff --git a/drivers/net/irda/pxaficp_ir.c b/drivers/net/irda/pxaficp_ir.c
index 2272156af31e..fb196fd91855 100644
--- a/drivers/net/irda/pxaficp_ir.c
+++ b/drivers/net/irda/pxaficp_ir.c
@@ -386,12 +386,12 @@ static void pxa_irda_fir_irq_eif(struct pxa_irda *si, struct net_device *dev, in
386 386
387 /* Align IP header to 20 bytes */ 387 /* Align IP header to 20 bytes */
388 skb_reserve(skb, 1); 388 skb_reserve(skb, 1);
389 memcpy(skb->data, si->dma_rx_buff, len); 389 skb_copy_to_linear_data(skb, si->dma_rx_buff, len);
390 skb_put(skb, len); 390 skb_put(skb, len);
391 391
392 /* Feed it to IrLAP */ 392 /* Feed it to IrLAP */
393 skb->dev = dev; 393 skb->dev = dev;
394 skb->mac.raw = skb->data; 394 skb_reset_mac_header(skb);
395 skb->protocol = htons(ETH_P_IRDA); 395 skb->protocol = htons(ETH_P_IRDA);
396 netif_rx(skb); 396 netif_rx(skb);
397 397
@@ -484,7 +484,7 @@ static int pxa_irda_hard_xmit(struct sk_buff *skb, struct net_device *dev)
484 unsigned long mtt = irda_get_mtt(skb); 484 unsigned long mtt = irda_get_mtt(skb);
485 485
486 si->dma_tx_buff_len = skb->len; 486 si->dma_tx_buff_len = skb->len;
487 memcpy(si->dma_tx_buff, skb->data, skb->len); 487 skb_copy_from_linear_data(skb, si->dma_tx_buff, skb->len);
488 488
489 if (mtt) 489 if (mtt)
490 while ((unsigned)(OSCR - si->last_oscr)/4 < mtt) 490 while ((unsigned)(OSCR - si->last_oscr)/4 < mtt)
diff --git a/drivers/net/irda/sa1100_ir.c b/drivers/net/irda/sa1100_ir.c
index 937372d00398..056639f72bec 100644
--- a/drivers/net/irda/sa1100_ir.c
+++ b/drivers/net/irda/sa1100_ir.c
@@ -504,7 +504,7 @@ static void sa1100_irda_fir_error(struct sa1100_irda *si, struct net_device *dev
504 504
505 skb_put(skb, len); 505 skb_put(skb, len);
506 skb->dev = dev; 506 skb->dev = dev;
507 skb->mac.raw = skb->data; 507 skb_reset_mac_header(skb);
508 skb->protocol = htons(ETH_P_IRDA); 508 skb->protocol = htons(ETH_P_IRDA);
509 si->stats.rx_packets++; 509 si->stats.rx_packets++;
510 si->stats.rx_bytes += len; 510 si->stats.rx_bytes += len;
diff --git a/drivers/net/irda/smsc-ircc2.c b/drivers/net/irda/smsc-ircc2.c
index 31c623381ea8..198bf3bfa70f 100644
--- a/drivers/net/irda/smsc-ircc2.c
+++ b/drivers/net/irda/smsc-ircc2.c
@@ -315,6 +315,7 @@ static struct smsc_chip __initdata lpc_chips_flat[] =
315{ 315{
316 /* Base address 0x2E or 0x4E */ 316 /* Base address 0x2E or 0x4E */
317 { "47N227", KEY55_1|FIR|SERx4, 0x5a, 0x00 }, 317 { "47N227", KEY55_1|FIR|SERx4, 0x5a, 0x00 },
318 { "47N227", KEY55_1|FIR|SERx4, 0x7a, 0x00 },
318 { "47N267", KEY55_1|FIR|SERx4, 0x5e, 0x00 }, 319 { "47N267", KEY55_1|FIR|SERx4, 0x5e, 0x00 },
319 { NULL } 320 { NULL }
320}; 321};
@@ -1161,7 +1162,7 @@ static int smsc_ircc_hard_xmit_fir(struct sk_buff *skb, struct net_device *dev)
1161 self->new_speed = speed; 1162 self->new_speed = speed;
1162 } 1163 }
1163 1164
1164 memcpy(self->tx_buff.head, skb->data, skb->len); 1165 skb_copy_from_linear_data(skb, self->tx_buff.head, skb->len);
1165 1166
1166 self->tx_buff.len = skb->len; 1167 self->tx_buff.len = skb->len;
1167 self->tx_buff.data = self->tx_buff.head; 1168 self->tx_buff.data = self->tx_buff.head;
@@ -1412,7 +1413,7 @@ static void smsc_ircc_dma_receive_complete(struct smsc_ircc_cb *self)
1412 self->stats.rx_bytes += len; 1413 self->stats.rx_bytes += len;
1413 1414
1414 skb->dev = self->netdev; 1415 skb->dev = self->netdev;
1415 skb->mac.raw = skb->data; 1416 skb_reset_mac_header(skb);
1416 skb->protocol = htons(ETH_P_IRDA); 1417 skb->protocol = htons(ETH_P_IRDA);
1417 netif_rx(skb); 1418 netif_rx(skb);
1418} 1419}
diff --git a/drivers/net/irda/stir4200.c b/drivers/net/irda/stir4200.c
index 20d306fea4cb..755aa444a4dd 100644
--- a/drivers/net/irda/stir4200.c
+++ b/drivers/net/irda/stir4200.c
@@ -52,7 +52,6 @@
52#include <linux/kthread.h> 52#include <linux/kthread.h>
53#include <linux/freezer.h> 53#include <linux/freezer.h>
54#include <net/irda/irda.h> 54#include <net/irda/irda.h>
55#include <net/irda/irlap.h>
56#include <net/irda/irda_device.h> 55#include <net/irda/irda_device.h>
57#include <net/irda/wrapper.h> 56#include <net/irda/wrapper.h>
58#include <net/irda/crc.h> 57#include <net/irda/crc.h>
@@ -349,7 +348,7 @@ static void fir_eof(struct stir_cb *stir)
349 } 348 }
350 skb_reserve(nskb, 1); 349 skb_reserve(nskb, 1);
351 skb = nskb; 350 skb = nskb;
352 memcpy(nskb->data, rx_buff->data, len); 351 skb_copy_to_linear_data(nskb, rx_buff->data, len);
353 } else { 352 } else {
354 nskb = dev_alloc_skb(rx_buff->truesize); 353 nskb = dev_alloc_skb(rx_buff->truesize);
355 if (unlikely(!nskb)) { 354 if (unlikely(!nskb)) {
@@ -364,7 +363,7 @@ static void fir_eof(struct stir_cb *stir)
364 363
365 skb_put(skb, len); 364 skb_put(skb, len);
366 365
367 skb->mac.raw = skb->data; 366 skb_reset_mac_header(skb);
368 skb->protocol = htons(ETH_P_IRDA); 367 skb->protocol = htons(ETH_P_IRDA);
369 skb->dev = stir->netdev; 368 skb->dev = stir->netdev;
370 369
diff --git a/drivers/net/irda/via-ircc.c b/drivers/net/irda/via-ircc.c
index c3ed9b3067e5..ff5358574d0a 100644
--- a/drivers/net/irda/via-ircc.c
+++ b/drivers/net/irda/via-ircc.c
@@ -925,8 +925,8 @@ static int via_ircc_hard_xmit_fir(struct sk_buff *skb,
925 925
926 self->tx_fifo.tail += skb->len; 926 self->tx_fifo.tail += skb->len;
927 self->stats.tx_bytes += skb->len; 927 self->stats.tx_bytes += skb->len;
928 memcpy(self->tx_fifo.queue[self->tx_fifo.free].start, skb->data, 928 skb_copy_from_linear_data(skb,
929 skb->len); 929 self->tx_fifo.queue[self->tx_fifo.free].start, skb->len);
930 self->tx_fifo.len++; 930 self->tx_fifo.len++;
931 self->tx_fifo.free++; 931 self->tx_fifo.free++;
932//F01 if (self->tx_fifo.len == 1) { 932//F01 if (self->tx_fifo.len == 1) {
@@ -1125,7 +1125,7 @@ static int via_ircc_dma_receive_complete(struct via_ircc_cb *self,
1125 self->stats.rx_bytes += len; 1125 self->stats.rx_bytes += len;
1126 self->stats.rx_packets++; 1126 self->stats.rx_packets++;
1127 skb->dev = self->netdev; 1127 skb->dev = self->netdev;
1128 skb->mac.raw = skb->data; 1128 skb_reset_mac_header(skb);
1129 skb->protocol = htons(ETH_P_IRDA); 1129 skb->protocol = htons(ETH_P_IRDA);
1130 netif_rx(skb); 1130 netif_rx(skb);
1131 return TRUE; 1131 return TRUE;
@@ -1189,7 +1189,7 @@ F01_E */
1189 skb_reserve(skb, 1); 1189 skb_reserve(skb, 1);
1190 skb_put(skb, len - 4); 1190 skb_put(skb, len - 4);
1191 1191
1192 memcpy(skb->data, self->rx_buff.data, len - 4); 1192 skb_copy_to_linear_data(skb, self->rx_buff.data, len - 4);
1193 IRDA_DEBUG(2, "%s(): len=%x.rx_buff=%p\n", __FUNCTION__, 1193 IRDA_DEBUG(2, "%s(): len=%x.rx_buff=%p\n", __FUNCTION__,
1194 len - 4, self->rx_buff.data); 1194 len - 4, self->rx_buff.data);
1195 1195
@@ -1198,7 +1198,7 @@ F01_E */
1198 self->stats.rx_bytes += len; 1198 self->stats.rx_bytes += len;
1199 self->stats.rx_packets++; 1199 self->stats.rx_packets++;
1200 skb->dev = self->netdev; 1200 skb->dev = self->netdev;
1201 skb->mac.raw = skb->data; 1201 skb_reset_mac_header(skb);
1202 skb->protocol = htons(ETH_P_IRDA); 1202 skb->protocol = htons(ETH_P_IRDA);
1203 netif_rx(skb); 1203 netif_rx(skb);
1204 1204
@@ -1234,7 +1234,7 @@ static int upload_rxdata(struct via_ircc_cb *self, int iobase)
1234 } 1234 }
1235 skb_reserve(skb, 1); 1235 skb_reserve(skb, 1);
1236 skb_put(skb, len - 4 + 1); 1236 skb_put(skb, len - 4 + 1);
1237 memcpy(skb->data, self->rx_buff.data, len - 4 + 1); 1237 skb_copy_to_linear_data(skb, self->rx_buff.data, len - 4 + 1);
1238 st_fifo->tail++; 1238 st_fifo->tail++;
1239 st_fifo->len++; 1239 st_fifo->len++;
1240 if (st_fifo->tail > MAX_RX_WINDOW) 1240 if (st_fifo->tail > MAX_RX_WINDOW)
@@ -1244,7 +1244,7 @@ static int upload_rxdata(struct via_ircc_cb *self, int iobase)
1244 self->stats.rx_bytes += len; 1244 self->stats.rx_bytes += len;
1245 self->stats.rx_packets++; 1245 self->stats.rx_packets++;
1246 skb->dev = self->netdev; 1246 skb->dev = self->netdev;
1247 skb->mac.raw = skb->data; 1247 skb_reset_mac_header(skb);
1248 skb->protocol = htons(ETH_P_IRDA); 1248 skb->protocol = htons(ETH_P_IRDA);
1249 netif_rx(skb); 1249 netif_rx(skb);
1250 if (st_fifo->len < (MAX_RX_WINDOW + 2)) { 1250 if (st_fifo->len < (MAX_RX_WINDOW + 2)) {
@@ -1303,7 +1303,7 @@ static int RxTimerHandler(struct via_ircc_cb *self, int iobase)
1303 } 1303 }
1304 skb_reserve(skb, 1); 1304 skb_reserve(skb, 1);
1305 skb_put(skb, len - 4); 1305 skb_put(skb, len - 4);
1306 memcpy(skb->data, self->rx_buff.data, len - 4); 1306 skb_copy_to_linear_data(skb, self->rx_buff.data, len - 4);
1307 1307
1308 IRDA_DEBUG(2, "%s(): len=%x.head=%x\n", __FUNCTION__, 1308 IRDA_DEBUG(2, "%s(): len=%x.head=%x\n", __FUNCTION__,
1309 len - 4, st_fifo->head); 1309 len - 4, st_fifo->head);
@@ -1313,7 +1313,7 @@ static int RxTimerHandler(struct via_ircc_cb *self, int iobase)
1313 self->stats.rx_bytes += len; 1313 self->stats.rx_bytes += len;
1314 self->stats.rx_packets++; 1314 self->stats.rx_packets++;
1315 skb->dev = self->netdev; 1315 skb->dev = self->netdev;
1316 skb->mac.raw = skb->data; 1316 skb_reset_mac_header(skb);
1317 skb->protocol = htons(ETH_P_IRDA); 1317 skb->protocol = htons(ETH_P_IRDA);
1318 netif_rx(skb); 1318 netif_rx(skb);
1319 } //while 1319 } //while
diff --git a/drivers/net/irda/vlsi_ir.c b/drivers/net/irda/vlsi_ir.c
index 3457e9d8b667..c4be973867a6 100644
--- a/drivers/net/irda/vlsi_ir.c
+++ b/drivers/net/irda/vlsi_ir.c
@@ -595,7 +595,7 @@ static int vlsi_process_rx(struct vlsi_ring *r, struct ring_descr *rd)
595 rd->skb = NULL; 595 rd->skb = NULL;
596 skb->dev = ndev; 596 skb->dev = ndev;
597 memcpy(skb_put(skb,len), rd->buf, len); 597 memcpy(skb_put(skb,len), rd->buf, len);
598 skb->mac.raw = skb->data; 598 skb_reset_mac_header(skb);
599 if (in_interrupt()) 599 if (in_interrupt())
600 netif_rx(skb); 600 netif_rx(skb);
601 else 601 else
@@ -993,7 +993,7 @@ static int vlsi_hard_start_xmit(struct sk_buff *skb, struct net_device *ndev)
993 goto drop; 993 goto drop;
994 } 994 }
995 else 995 else
996 memcpy(rd->buf, skb->data, len); 996 skb_copy_from_linear_data(skb, rd->buf, len);
997 } 997 }
998 998
999 rd->skb = skb; /* remember skb for tx-complete stats */ 999 rd->skb = skb; /* remember skb for tx-complete stats */
diff --git a/drivers/net/irda/w83977af_ir.c b/drivers/net/irda/w83977af_ir.c
index 4212657fa4f9..5182e800cc18 100644
--- a/drivers/net/irda/w83977af_ir.c
+++ b/drivers/net/irda/w83977af_ir.c
@@ -529,7 +529,7 @@ int w83977af_hard_xmit(struct sk_buff *skb, struct net_device *dev)
529 /* Decide if we should use PIO or DMA transfer */ 529 /* Decide if we should use PIO or DMA transfer */
530 if (self->io.speed > PIO_MAX_SPEED) { 530 if (self->io.speed > PIO_MAX_SPEED) {
531 self->tx_buff.data = self->tx_buff.head; 531 self->tx_buff.data = self->tx_buff.head;
532 memcpy(self->tx_buff.data, skb->data, skb->len); 532 skb_copy_from_linear_data(skb, self->tx_buff.data, skb->len);
533 self->tx_buff.len = skb->len; 533 self->tx_buff.len = skb->len;
534 534
535 mtt = irda_get_mtt(skb); 535 mtt = irda_get_mtt(skb);
@@ -908,10 +908,14 @@ int w83977af_dma_receive_complete(struct w83977af_ir *self)
908 /* Copy frame without CRC */ 908 /* Copy frame without CRC */
909 if (self->io.speed < 4000000) { 909 if (self->io.speed < 4000000) {
910 skb_put(skb, len-2); 910 skb_put(skb, len-2);
911 memcpy(skb->data, self->rx_buff.data, len-2); 911 skb_copy_to_linear_data(skb,
912 self->rx_buff.data,
913 len - 2);
912 } else { 914 } else {
913 skb_put(skb, len-4); 915 skb_put(skb, len-4);
914 memcpy(skb->data, self->rx_buff.data, len-4); 916 skb_copy_to_linear_data(skb,
917 self->rx_buff.data,
918 len - 4);
915 } 919 }
916 920
917 /* Move to next frame */ 921 /* Move to next frame */
@@ -919,7 +923,7 @@ int w83977af_dma_receive_complete(struct w83977af_ir *self)
919 self->stats.rx_packets++; 923 self->stats.rx_packets++;
920 924
921 skb->dev = self->netdev; 925 skb->dev = self->netdev;
922 skb->mac.raw = skb->data; 926 skb_reset_mac_header(skb);
923 skb->protocol = htons(ETH_P_IRDA); 927 skb->protocol = htons(ETH_P_IRDA);
924 netif_rx(skb); 928 netif_rx(skb);
925 self->netdev->last_rx = jiffies; 929 self->netdev->last_rx = jiffies;
diff --git a/drivers/net/iseries_veth.c b/drivers/net/iseries_veth.c
index 0e9ba3c3faf7..347d50cd77d4 100644
--- a/drivers/net/iseries_veth.c
+++ b/drivers/net/iseries_veth.c
@@ -1540,7 +1540,6 @@ static void veth_receive(struct veth_lpar_connection *cnx,
1540 } 1540 }
1541 1541
1542 skb_put(skb, length); 1542 skb_put(skb, length);
1543 skb->dev = dev;
1544 skb->protocol = eth_type_trans(skb, dev); 1543 skb->protocol = eth_type_trans(skb, dev);
1545 skb->ip_summed = CHECKSUM_NONE; 1544 skb->ip_summed = CHECKSUM_NONE;
1546 netif_rx(skb); /* send it up */ 1545 netif_rx(skb); /* send it up */
diff --git a/drivers/net/ixgb/ixgb_main.c b/drivers/net/ixgb/ixgb_main.c
index afc2ec72529e..dfde80e54aef 100644
--- a/drivers/net/ixgb/ixgb_main.c
+++ b/drivers/net/ixgb/ixgb_main.c
@@ -1182,24 +1182,27 @@ ixgb_tso(struct ixgb_adapter *adapter, struct sk_buff *skb)
1182 1182
1183 if (likely(skb_is_gso(skb))) { 1183 if (likely(skb_is_gso(skb))) {
1184 struct ixgb_buffer *buffer_info; 1184 struct ixgb_buffer *buffer_info;
1185 struct iphdr *iph;
1186
1185 if (skb_header_cloned(skb)) { 1187 if (skb_header_cloned(skb)) {
1186 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); 1188 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1187 if (err) 1189 if (err)
1188 return err; 1190 return err;
1189 } 1191 }
1190 1192
1191 hdr_len = ((skb->h.raw - skb->data) + (skb->h.th->doff << 2)); 1193 hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
1192 mss = skb_shinfo(skb)->gso_size; 1194 mss = skb_shinfo(skb)->gso_size;
1193 skb->nh.iph->tot_len = 0; 1195 iph = ip_hdr(skb);
1194 skb->nh.iph->check = 0; 1196 iph->tot_len = 0;
1195 skb->h.th->check = ~csum_tcpudp_magic(skb->nh.iph->saddr, 1197 iph->check = 0;
1196 skb->nh.iph->daddr, 1198 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
1197 0, IPPROTO_TCP, 0); 1199 iph->daddr, 0,
1198 ipcss = skb->nh.raw - skb->data; 1200 IPPROTO_TCP, 0);
1199 ipcso = (void *)&(skb->nh.iph->check) - (void *)skb->data; 1201 ipcss = skb_network_offset(skb);
1200 ipcse = skb->h.raw - skb->data - 1; 1202 ipcso = (void *)&(iph->check) - (void *)skb->data;
1201 tucss = skb->h.raw - skb->data; 1203 ipcse = skb_transport_offset(skb) - 1;
1202 tucso = (void *)&(skb->h.th->check) - (void *)skb->data; 1204 tucss = skb_transport_offset(skb);
1205 tucso = (void *)&(tcp_hdr(skb)->check) - (void *)skb->data;
1203 tucse = 0; 1206 tucse = 0;
1204 1207
1205 i = adapter->tx_ring.next_to_use; 1208 i = adapter->tx_ring.next_to_use;
@@ -1243,7 +1246,7 @@ ixgb_tx_csum(struct ixgb_adapter *adapter, struct sk_buff *skb)
1243 1246
1244 if(likely(skb->ip_summed == CHECKSUM_PARTIAL)) { 1247 if(likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
1245 struct ixgb_buffer *buffer_info; 1248 struct ixgb_buffer *buffer_info;
1246 css = skb->h.raw - skb->data; 1249 css = skb_transport_offset(skb);
1247 cso = css + skb->csum_offset; 1250 cso = css + skb->csum_offset;
1248 1251
1249 i = adapter->tx_ring.next_to_use; 1252 i = adapter->tx_ring.next_to_use;
@@ -2014,9 +2017,12 @@ ixgb_clean_rx_irq(struct ixgb_adapter *adapter)
2014 netdev_alloc_skb(netdev, length + NET_IP_ALIGN); 2017 netdev_alloc_skb(netdev, length + NET_IP_ALIGN);
2015 if (new_skb) { 2018 if (new_skb) {
2016 skb_reserve(new_skb, NET_IP_ALIGN); 2019 skb_reserve(new_skb, NET_IP_ALIGN);
2017 memcpy(new_skb->data - NET_IP_ALIGN, 2020 skb_copy_to_linear_data_offset(new_skb,
2018 skb->data - NET_IP_ALIGN, 2021 -NET_IP_ALIGN,
2019 length + NET_IP_ALIGN); 2022 (skb->data -
2023 NET_IP_ALIGN),
2024 (length +
2025 NET_IP_ALIGN));
2020 /* save the skb in buffer_info as good */ 2026 /* save the skb in buffer_info as good */
2021 buffer_info->skb = skb; 2027 buffer_info->skb = skb;
2022 skb = new_skb; 2028 skb = new_skb;
diff --git a/drivers/net/ixp2000/ixpdev.c b/drivers/net/ixp2000/ixpdev.c
index a4eccb11d677..6683afc02aaa 100644
--- a/drivers/net/ixp2000/ixpdev.c
+++ b/drivers/net/ixp2000/ixpdev.c
@@ -110,11 +110,10 @@ static int ixpdev_rx(struct net_device *dev, int *budget)
110 110
111 skb = dev_alloc_skb(desc->pkt_length + 2); 111 skb = dev_alloc_skb(desc->pkt_length + 2);
112 if (likely(skb != NULL)) { 112 if (likely(skb != NULL)) {
113 skb->dev = nds[desc->channel];
114 skb_reserve(skb, 2); 113 skb_reserve(skb, 2);
115 eth_copy_and_sum(skb, buf, desc->pkt_length, 0); 114 eth_copy_and_sum(skb, buf, desc->pkt_length, 0);
116 skb_put(skb, desc->pkt_length); 115 skb_put(skb, desc->pkt_length);
117 skb->protocol = eth_type_trans(skb, skb->dev); 116 skb->protocol = eth_type_trans(skb, nds[desc->channel]);
118 117
119 skb->dev->last_rx = jiffies; 118 skb->dev->last_rx = jiffies;
120 119
diff --git a/drivers/net/lance.c b/drivers/net/lance.c
index a3843320dbe1..0fe96c85828b 100644
--- a/drivers/net/lance.c
+++ b/drivers/net/lance.c
@@ -988,7 +988,7 @@ static int lance_start_xmit(struct sk_buff *skb, struct net_device *dev)
988 if (lance_debug > 5) 988 if (lance_debug > 5)
989 printk("%s: bouncing a high-memory packet (%#x).\n", 989 printk("%s: bouncing a high-memory packet (%#x).\n",
990 dev->name, (u32)isa_virt_to_bus(skb->data)); 990 dev->name, (u32)isa_virt_to_bus(skb->data));
991 memcpy(&lp->tx_bounce_buffs[entry], skb->data, skb->len); 991 skb_copy_from_linear_data(skb, &lp->tx_bounce_buffs[entry], skb->len);
992 lp->tx_ring[entry].base = 992 lp->tx_ring[entry].base =
993 ((u32)isa_virt_to_bus((lp->tx_bounce_buffs + entry)) & 0xffffff) | 0x83000000; 993 ((u32)isa_virt_to_bus((lp->tx_bounce_buffs + entry)) & 0xffffff) | 0x83000000;
994 dev_kfree_skb(skb); 994 dev_kfree_skb(skb);
@@ -1184,7 +1184,6 @@ lance_rx(struct net_device *dev)
1184 } 1184 }
1185 break; 1185 break;
1186 } 1186 }
1187 skb->dev = dev;
1188 skb_reserve(skb,2); /* 16 byte align */ 1187 skb_reserve(skb,2); /* 16 byte align */
1189 skb_put(skb,pkt_len); /* Make room */ 1188 skb_put(skb,pkt_len); /* Make room */
1190 eth_copy_and_sum(skb, 1189 eth_copy_and_sum(skb,
diff --git a/drivers/net/lasi_82596.c b/drivers/net/lasi_82596.c
index 452863d5d498..0edcd125fd61 100644
--- a/drivers/net/lasi_82596.c
+++ b/drivers/net/lasi_82596.c
@@ -801,7 +801,6 @@ memory_squeeze:
801 lp->stats.rx_dropped++; 801 lp->stats.rx_dropped++;
802 } 802 }
803 else { 803 else {
804 skb->dev = dev;
805 if (!rx_in_place) { 804 if (!rx_in_place) {
806 /* 16 byte align the data fields */ 805 /* 16 byte align the data fields */
807 dma_sync_single_for_cpu(lp->dev, (dma_addr_t)WSWAPchar(rbd->b_data), PKT_BUF_SZ, DMA_FROM_DEVICE); 806 dma_sync_single_for_cpu(lp->dev, (dma_addr_t)WSWAPchar(rbd->b_data), PKT_BUF_SZ, DMA_FROM_DEVICE);
diff --git a/drivers/net/lib8390.c b/drivers/net/lib8390.c
index e726c06b8dc6..5c86e737f954 100644
--- a/drivers/net/lib8390.c
+++ b/drivers/net/lib8390.c
@@ -722,7 +722,6 @@ static void ei_receive(struct net_device *dev)
722 else 722 else
723 { 723 {
724 skb_reserve(skb,2); /* IP headers on 16 byte boundaries */ 724 skb_reserve(skb,2); /* IP headers on 16 byte boundaries */
725 skb->dev = dev;
726 skb_put(skb, pkt_len); /* Make room */ 725 skb_put(skb, pkt_len); /* Make room */
727 ei_block_input(dev, pkt_len, skb, current_offset + sizeof(rx_frame)); 726 ei_block_input(dev, pkt_len, skb, current_offset + sizeof(rx_frame));
728 skb->protocol=eth_type_trans(skb,dev); 727 skb->protocol=eth_type_trans(skb,dev);
diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index 2b739fd584f1..6ba6ed2b480a 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -75,8 +75,9 @@ static DEFINE_PER_CPU(struct pcpu_lstats, pcpu_lstats);
75#ifdef LOOPBACK_TSO 75#ifdef LOOPBACK_TSO
76static void emulate_large_send_offload(struct sk_buff *skb) 76static void emulate_large_send_offload(struct sk_buff *skb)
77{ 77{
78 struct iphdr *iph = skb->nh.iph; 78 struct iphdr *iph = ip_hdr(skb);
79 struct tcphdr *th = (struct tcphdr*)(skb->nh.raw + (iph->ihl * 4)); 79 struct tcphdr *th = (struct tcphdr *)(skb_network_header(skb) +
80 (iph->ihl * 4));
80 unsigned int doffset = (iph->ihl + th->doff) * 4; 81 unsigned int doffset = (iph->ihl + th->doff) * 4;
81 unsigned int mtu = skb_shinfo(skb)->gso_size + doffset; 82 unsigned int mtu = skb_shinfo(skb)->gso_size + doffset;
82 unsigned int offset = 0; 83 unsigned int offset = 0;
@@ -90,10 +91,11 @@ static void emulate_large_send_offload(struct sk_buff *skb)
90 if (!nskb) 91 if (!nskb)
91 break; 92 break;
92 skb_reserve(nskb, 32); 93 skb_reserve(nskb, 32);
93 nskb->mac.raw = nskb->data - 14; 94 skb_set_mac_header(nskb, -ETH_HLEN);
94 nskb->nh.raw = nskb->data; 95 skb_reset_network_header(nskb);
95 iph = nskb->nh.iph; 96 iph = ip_hdr(nskb);
96 memcpy(nskb->data, skb->nh.raw, doffset); 97 skb_copy_to_linear_data(nskb, skb_network_header(skb),
98 doffset);
97 if (skb_copy_bits(skb, 99 if (skb_copy_bits(skb,
98 doffset + offset, 100 doffset + offset,
99 nskb->data + doffset, 101 nskb->data + doffset,
@@ -108,7 +110,7 @@ static void emulate_large_send_offload(struct sk_buff *skb)
108 memcpy(nskb->cb, skb->cb, sizeof(skb->cb)); 110 memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
109 nskb->pkt_type = skb->pkt_type; 111 nskb->pkt_type = skb->pkt_type;
110 112
111 th = (struct tcphdr*)(nskb->nh.raw + iph->ihl*4); 113 th = (struct tcphdr *)(skb_network_header(nskb) + iph->ihl * 4);
112 iph->tot_len = htons(frag_size + doffset); 114 iph->tot_len = htons(frag_size + doffset);
113 iph->id = htons(id); 115 iph->id = htons(id);
114 iph->check = 0; 116 iph->check = 0;
@@ -137,7 +139,6 @@ static int loopback_xmit(struct sk_buff *skb, struct net_device *dev)
137 skb_orphan(skb); 139 skb_orphan(skb);
138 140
139 skb->protocol = eth_type_trans(skb,dev); 141 skb->protocol = eth_type_trans(skb,dev);
140 skb->dev = dev;
141#ifndef LOOPBACK_MUST_CHECKSUM 142#ifndef LOOPBACK_MUST_CHECKSUM
142 skb->ip_summed = CHECKSUM_UNNECESSARY; 143 skb->ip_summed = CHECKSUM_UNNECESSARY;
143#endif 144#endif
@@ -145,7 +146,7 @@ static int loopback_xmit(struct sk_buff *skb, struct net_device *dev)
145#ifdef LOOPBACK_TSO 146#ifdef LOOPBACK_TSO
146 if (skb_is_gso(skb)) { 147 if (skb_is_gso(skb)) {
147 BUG_ON(skb->protocol != htons(ETH_P_IP)); 148 BUG_ON(skb->protocol != htons(ETH_P_IP));
148 BUG_ON(skb->nh.iph->protocol != IPPROTO_TCP); 149 BUG_ON(ip_hdr(skb)->protocol != IPPROTO_TCP);
149 150
150 emulate_large_send_offload(skb); 151 emulate_large_send_offload(skb);
151 return 0; 152 return 0;
@@ -163,11 +164,9 @@ static int loopback_xmit(struct sk_buff *skb, struct net_device *dev)
163 return 0; 164 return 0;
164} 165}
165 166
166static struct net_device_stats loopback_stats;
167
168static struct net_device_stats *get_stats(struct net_device *dev) 167static struct net_device_stats *get_stats(struct net_device *dev)
169{ 168{
170 struct net_device_stats *stats = &loopback_stats; 169 struct net_device_stats *stats = &dev->stats;
171 unsigned long bytes = 0; 170 unsigned long bytes = 0;
172 unsigned long packets = 0; 171 unsigned long packets = 0;
173 int i; 172 int i;
@@ -207,7 +206,6 @@ static const struct ethtool_ops loopback_ethtool_ops = {
207struct net_device loopback_dev = { 206struct net_device loopback_dev = {
208 .name = "lo", 207 .name = "lo",
209 .get_stats = &get_stats, 208 .get_stats = &get_stats,
210 .priv = &loopback_stats,
211 .mtu = (16 * 1024) + 20 + 20 + 12, 209 .mtu = (16 * 1024) + 20 + 20 + 12,
212 .hard_start_xmit = loopback_xmit, 210 .hard_start_xmit = loopback_xmit,
213 .hard_header = eth_header, 211 .hard_header = eth_header,
diff --git a/drivers/net/lp486e.c b/drivers/net/lp486e.c
index 177c502f7385..5fc18da1873d 100644
--- a/drivers/net/lp486e.c
+++ b/drivers/net/lp486e.c
@@ -676,7 +676,6 @@ i596_rx_one(struct net_device *dev, struct i596_private *lp,
676 return 1; 676 return 1;
677 } 677 }
678 678
679 skb->dev = dev;
680 memcpy(skb_put(skb,pkt_len), rfd->data, pkt_len); 679 memcpy(skb_put(skb,pkt_len), rfd->data, pkt_len);
681 680
682 skb->protocol = eth_type_trans(skb,dev); 681 skb->protocol = eth_type_trans(skb,dev);
diff --git a/drivers/net/mac89x0.c b/drivers/net/mac89x0.c
index e960138011c0..90e695d53266 100644
--- a/drivers/net/mac89x0.c
+++ b/drivers/net/mac89x0.c
@@ -530,7 +530,6 @@ net_rx(struct net_device *dev)
530 return; 530 return;
531 } 531 }
532 skb_put(skb, length); 532 skb_put(skb, length);
533 skb->dev = dev;
534 533
535 memcpy_fromio(skb->data, dev->mem_start + PP_RxFrame, length); 534 memcpy_fromio(skb->data, dev->mem_start + PP_RxFrame, length);
536 535
diff --git a/drivers/net/macb.c b/drivers/net/macb.c
index 2e9571bf0736..0e04f7ac3f2e 100644
--- a/drivers/net/macb.c
+++ b/drivers/net/macb.c
@@ -357,7 +357,6 @@ static int macb_rx_frame(struct macb *bp, unsigned int first_frag,
357 } 357 }
358 358
359 skb_reserve(skb, RX_OFFSET); 359 skb_reserve(skb, RX_OFFSET);
360 skb->dev = bp->dev;
361 skb->ip_summed = CHECKSUM_NONE; 360 skb->ip_summed = CHECKSUM_NONE;
362 skb_put(skb, len); 361 skb_put(skb, len);
363 362
@@ -368,9 +367,10 @@ static int macb_rx_frame(struct macb *bp, unsigned int first_frag,
368 BUG_ON(frag != last_frag); 367 BUG_ON(frag != last_frag);
369 frag_len = len - offset; 368 frag_len = len - offset;
370 } 369 }
371 memcpy(skb->data + offset, 370 skb_copy_to_linear_data_offset(skb, offset,
372 bp->rx_buffers + (RX_BUFFER_SIZE * frag), 371 (bp->rx_buffers +
373 frag_len); 372 (RX_BUFFER_SIZE * frag)),
373 frag_len);
374 offset += RX_BUFFER_SIZE; 374 offset += RX_BUFFER_SIZE;
375 bp->rx_ring[frag].addr &= ~MACB_BIT(RX_USED); 375 bp->rx_ring[frag].addr &= ~MACB_BIT(RX_USED);
376 wmb(); 376 wmb();
@@ -576,7 +576,8 @@ static int macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
576 int i; 576 int i;
577 dev_dbg(&bp->pdev->dev, 577 dev_dbg(&bp->pdev->dev,
578 "start_xmit: len %u head %p data %p tail %p end %p\n", 578 "start_xmit: len %u head %p data %p tail %p end %p\n",
579 skb->len, skb->head, skb->data, skb->tail, skb->end); 579 skb->len, skb->head, skb->data,
580 skb_tail_pointer(skb), skb_end_pointer(skb));
580 dev_dbg(&bp->pdev->dev, 581 dev_dbg(&bp->pdev->dev,
581 "data:"); 582 "data:");
582 for (i = 0; i < 16; i++) 583 for (i = 0; i < 16; i++)
diff --git a/drivers/net/mace.c b/drivers/net/mace.c
index 9ec24f0d5d68..b3bd62394958 100644
--- a/drivers/net/mace.c
+++ b/drivers/net/mace.c
@@ -939,7 +939,6 @@ static irqreturn_t mace_rxdma_intr(int irq, void *dev_id)
939 else /* Ethernet header; mace includes FCS */ 939 else /* Ethernet header; mace includes FCS */
940 nb -= 8; 940 nb -= 8;
941 skb_put(skb, nb); 941 skb_put(skb, nb);
942 skb->dev = dev;
943 skb->protocol = eth_type_trans(skb, dev); 942 skb->protocol = eth_type_trans(skb, dev);
944 mp->stats.rx_bytes += skb->len; 943 mp->stats.rx_bytes += skb->len;
945 netif_rx(skb); 944 netif_rx(skb);
diff --git a/drivers/net/macmace.c b/drivers/net/macmace.c
index 5d541e873041..27911c07558d 100644
--- a/drivers/net/macmace.c
+++ b/drivers/net/macmace.c
@@ -420,8 +420,7 @@ static int mace_xmit_start(struct sk_buff *skb, struct net_device *dev)
420 mp->stats.tx_bytes += skb->len; 420 mp->stats.tx_bytes += skb->len;
421 421
422 /* We need to copy into our xmit buffer to take care of alignment and caching issues */ 422 /* We need to copy into our xmit buffer to take care of alignment and caching issues */
423 423 skb_copy_from_linear_data(skb, mp->tx_ring, skb->len);
424 memcpy((void *) mp->tx_ring, skb->data, skb->len);
425 424
426 /* load the Tx DMA and fire it off */ 425 /* load the Tx DMA and fire it off */
427 426
@@ -621,7 +620,6 @@ static void mace_dma_rx_frame(struct net_device *dev, struct mace_frame *mf)
621 skb_reserve(skb,2); 620 skb_reserve(skb,2);
622 memcpy(skb_put(skb, mf->len), mf->data, mf->len); 621 memcpy(skb_put(skb, mf->len), mf->data, mf->len);
623 622
624 skb->dev = dev;
625 skb->protocol = eth_type_trans(skb, dev); 623 skb->protocol = eth_type_trans(skb, dev);
626 netif_rx(skb); 624 netif_rx(skb);
627 dev->last_rx = jiffies; 625 dev->last_rx = jiffies;
diff --git a/drivers/net/meth.c b/drivers/net/meth.c
index 7e69ca6edd91..0343ea12b299 100644
--- a/drivers/net/meth.c
+++ b/drivers/net/meth.c
@@ -421,7 +421,6 @@ static void meth_rx(struct net_device* dev, unsigned long int_status)
421 /* Write metadata, and then pass to the receive level */ 421 /* Write metadata, and then pass to the receive level */
422 skb_put(skb_c, len); 422 skb_put(skb_c, len);
423 priv->rx_skbs[priv->rx_write] = skb; 423 priv->rx_skbs[priv->rx_write] = skb;
424 skb_c->dev = dev;
425 skb_c->protocol = eth_type_trans(skb_c, dev); 424 skb_c->protocol = eth_type_trans(skb_c, dev);
426 dev->last_rx = jiffies; 425 dev->last_rx = jiffies;
427 priv->stats.rx_packets++; 426 priv->stats.rx_packets++;
@@ -609,7 +608,7 @@ static void meth_tx_short_prepare(struct meth_private *priv,
609 608
610 desc->header.raw = METH_TX_CMD_INT_EN | (len-1) | ((128-len) << 16); 609 desc->header.raw = METH_TX_CMD_INT_EN | (len-1) | ((128-len) << 16);
611 /* maybe I should set whole thing to 0 first... */ 610 /* maybe I should set whole thing to 0 first... */
612 memcpy(desc->data.dt + (120 - len), skb->data, skb->len); 611 skb_copy_from_linear_data(skb, desc->data.dt + (120 - len), skb->len);
613 if (skb->len < len) 612 if (skb->len < len)
614 memset(desc->data.dt + 120 - len + skb->len, 0, len-skb->len); 613 memset(desc->data.dt + 120 - len + skb->len, 0, len-skb->len);
615} 614}
@@ -627,8 +626,8 @@ static void meth_tx_1page_prepare(struct meth_private *priv,
627 626
628 /* unaligned part */ 627 /* unaligned part */
629 if (unaligned_len) { 628 if (unaligned_len) {
630 memcpy(desc->data.dt + (120 - unaligned_len), 629 skb_copy_from_linear_data(skb, desc->data.dt + (120 - unaligned_len),
631 skb->data, unaligned_len); 630 unaligned_len);
632 desc->header.raw |= (128 - unaligned_len) << 16; 631 desc->header.raw |= (128 - unaligned_len) << 16;
633 } 632 }
634 633
@@ -653,8 +652,8 @@ static void meth_tx_2page_prepare(struct meth_private *priv,
653 desc->header.raw = METH_TX_CMD_INT_EN | TX_CATBUF1 | TX_CATBUF2| (skb->len - 1); 652 desc->header.raw = METH_TX_CMD_INT_EN | TX_CATBUF1 | TX_CATBUF2| (skb->len - 1);
654 /* unaligned part */ 653 /* unaligned part */
655 if (unaligned_len){ 654 if (unaligned_len){
656 memcpy(desc->data.dt + (120 - unaligned_len), 655 skb_copy_from_linear_data(skb, desc->data.dt + (120 - unaligned_len),
657 skb->data, unaligned_len); 656 unaligned_len);
658 desc->header.raw |= (128 - unaligned_len) << 16; 657 desc->header.raw |= (128 - unaligned_len) << 16;
659 } 658 }
660 659
diff --git a/drivers/net/mipsnet.c b/drivers/net/mipsnet.c
index f42b9e201937..403f63afd201 100644
--- a/drivers/net/mipsnet.c
+++ b/drivers/net/mipsnet.c
@@ -101,7 +101,6 @@ static inline ssize_t mipsnet_get_fromdev(struct net_device *dev, size_t count)
101 if (ioiocpy_frommipsnet(dev, skb_put(skb, len), len)) 101 if (ioiocpy_frommipsnet(dev, skb_put(skb, len), len))
102 return -EFAULT; 102 return -EFAULT;
103 103
104 skb->dev = dev;
105 skb->protocol = eth_type_trans(skb, dev); 104 skb->protocol = eth_type_trans(skb, dev);
106 skb->ip_summed = CHECKSUM_UNNECESSARY; 105 skb->ip_summed = CHECKSUM_UNNECESSARY;
107 106
diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c
index 8015a7c5b0c9..ab15ecd4b3d6 100644
--- a/drivers/net/mv643xx_eth.c
+++ b/drivers/net/mv643xx_eth.c
@@ -434,7 +434,6 @@ static int mv643xx_eth_receive_queue(struct net_device *dev, int budget)
434 * received packet 434 * received packet
435 */ 435 */
436 skb_put(skb, pkt_info.byte_cnt - 4); 436 skb_put(skb, pkt_info.byte_cnt - 4);
437 skb->dev = dev;
438 437
439 if (pkt_info.cmd_sts & ETH_LAYER_4_CHECKSUM_OK) { 438 if (pkt_info.cmd_sts & ETH_LAYER_4_CHECKSUM_OK) {
440 skb->ip_summed = CHECKSUM_UNNECESSARY; 439 skb->ip_summed = CHECKSUM_UNNECESSARY;
@@ -1162,15 +1161,15 @@ static void eth_tx_submit_descs_for_skb(struct mv643xx_private *mp,
1162 1161
1163 cmd_sts |= ETH_GEN_TCP_UDP_CHECKSUM | 1162 cmd_sts |= ETH_GEN_TCP_UDP_CHECKSUM |
1164 ETH_GEN_IP_V_4_CHECKSUM | 1163 ETH_GEN_IP_V_4_CHECKSUM |
1165 skb->nh.iph->ihl << ETH_TX_IHL_SHIFT; 1164 ip_hdr(skb)->ihl << ETH_TX_IHL_SHIFT;
1166 1165
1167 switch (skb->nh.iph->protocol) { 1166 switch (ip_hdr(skb)->protocol) {
1168 case IPPROTO_UDP: 1167 case IPPROTO_UDP:
1169 cmd_sts |= ETH_UDP_FRAME; 1168 cmd_sts |= ETH_UDP_FRAME;
1170 desc->l4i_chk = skb->h.uh->check; 1169 desc->l4i_chk = udp_hdr(skb)->check;
1171 break; 1170 break;
1172 case IPPROTO_TCP: 1171 case IPPROTO_TCP:
1173 desc->l4i_chk = skb->h.th->check; 1172 desc->l4i_chk = tcp_hdr(skb)->check;
1174 break; 1173 break;
1175 default: 1174 default:
1176 BUG(); 1175 BUG();
diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c
index f8efe0e70a6b..16e3c4315e82 100644
--- a/drivers/net/myri10ge/myri10ge.c
+++ b/drivers/net/myri10ge/myri10ge.c
@@ -879,7 +879,7 @@ myri10ge_rx_skb_build(struct sk_buff *skb, u8 * va,
879 * skb_pull() (for ether_pad and eth_type_trans()) requires 879 * skb_pull() (for ether_pad and eth_type_trans()) requires
880 * the beginning of the packet in skb_headlen(), move it 880 * the beginning of the packet in skb_headlen(), move it
881 * manually */ 881 * manually */
882 memcpy(skb->data, va, hlen); 882 skb_copy_to_linear_data(skb, va, hlen);
883 skb_shinfo(skb)->frags[0].page_offset += hlen; 883 skb_shinfo(skb)->frags[0].page_offset += hlen;
884 skb_shinfo(skb)->frags[0].size -= hlen; 884 skb_shinfo(skb)->frags[0].size -= hlen;
885 skb->data_len -= hlen; 885 skb->data_len -= hlen;
@@ -1020,7 +1020,6 @@ myri10ge_rx_done(struct myri10ge_priv *mgp, struct myri10ge_rx_buf *rx,
1020 skb_shinfo(skb)->nr_frags = 0; 1020 skb_shinfo(skb)->nr_frags = 0;
1021 } 1021 }
1022 skb->protocol = eth_type_trans(skb, dev); 1022 skb->protocol = eth_type_trans(skb, dev);
1023 skb->dev = dev;
1024 1023
1025 if (mgp->csum_flag) { 1024 if (mgp->csum_flag) {
1026 if ((skb->protocol == htons(ETH_P_IP)) || 1025 if ((skb->protocol == htons(ETH_P_IP)) ||
@@ -2030,7 +2029,7 @@ again:
2030 odd_flag = 0; 2029 odd_flag = 0;
2031 flags = (MXGEFW_FLAGS_NO_TSO | MXGEFW_FLAGS_FIRST); 2030 flags = (MXGEFW_FLAGS_NO_TSO | MXGEFW_FLAGS_FIRST);
2032 if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { 2031 if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
2033 cksum_offset = (skb->h.raw - skb->data); 2032 cksum_offset = skb_transport_offset(skb);
2034 pseudo_hdr_offset = cksum_offset + skb->csum_offset; 2033 pseudo_hdr_offset = cksum_offset + skb->csum_offset;
2035 /* If the headers are excessively large, then we must 2034 /* If the headers are excessively large, then we must
2036 * fall back to a software checksum */ 2035 * fall back to a software checksum */
@@ -2055,7 +2054,7 @@ again:
2055 * send loop that we are still in the 2054 * send loop that we are still in the
2056 * header portion of the TSO packet. 2055 * header portion of the TSO packet.
2057 * TSO header must be at most 134 bytes long */ 2056 * TSO header must be at most 134 bytes long */
2058 cum_len = -((skb->h.raw - skb->data) + (skb->h.th->doff << 2)); 2057 cum_len = -(skb_transport_offset(skb) + tcp_hdrlen(skb));
2059 2058
2060 /* for TSO, pseudo_hdr_offset holds mss. 2059 /* for TSO, pseudo_hdr_offset holds mss.
2061 * The firmware figures out where to put 2060 * The firmware figures out where to put
diff --git a/drivers/net/myri_sbus.c b/drivers/net/myri_sbus.c
index ee26ef52289f..13444da93273 100644
--- a/drivers/net/myri_sbus.c
+++ b/drivers/net/myri_sbus.c
@@ -368,7 +368,7 @@ static __be16 myri_type_trans(struct sk_buff *skb, struct net_device *dev)
368 struct ethhdr *eth; 368 struct ethhdr *eth;
369 unsigned char *rawp; 369 unsigned char *rawp;
370 370
371 skb->mac.raw = (((unsigned char *)skb->data) + MYRI_PAD_LEN); 371 skb_set_mac_header(skb, MYRI_PAD_LEN);
372 skb_pull(skb, dev->hard_header_len); 372 skb_pull(skb, dev->hard_header_len);
373 eth = eth_hdr(skb); 373 eth = eth_hdr(skb);
374 374
@@ -502,7 +502,7 @@ static void myri_rx(struct myri_eth *mp, struct net_device *dev)
502 copy_skb->dev = dev; 502 copy_skb->dev = dev;
503 DRX(("resv_and_put ")); 503 DRX(("resv_and_put "));
504 skb_put(copy_skb, len); 504 skb_put(copy_skb, len);
505 memcpy(copy_skb->data, skb->data, len); 505 skb_copy_from_linear_data(skb, copy_skb->data, len);
506 506
507 /* Reuse original ring buffer. */ 507 /* Reuse original ring buffer. */
508 DRX(("reuse ")); 508 DRX(("reuse "));
diff --git a/drivers/net/natsemi.c b/drivers/net/natsemi.c
index 349b96a3ec4c..a8d7ff2c96ac 100644
--- a/drivers/net/natsemi.c
+++ b/drivers/net/natsemi.c
@@ -2289,7 +2289,6 @@ static void netdev_rx(struct net_device *dev, int *work_done, int work_to_do)
2289 * without copying to a minimally-sized skbuff. */ 2289 * without copying to a minimally-sized skbuff. */
2290 if (pkt_len < rx_copybreak 2290 if (pkt_len < rx_copybreak
2291 && (skb = dev_alloc_skb(pkt_len + RX_OFFSET)) != NULL) { 2291 && (skb = dev_alloc_skb(pkt_len + RX_OFFSET)) != NULL) {
2292 skb->dev = dev;
2293 /* 16 byte align the IP header */ 2292 /* 16 byte align the IP header */
2294 skb_reserve(skb, RX_OFFSET); 2293 skb_reserve(skb, RX_OFFSET);
2295 pci_dma_sync_single_for_cpu(np->pci_dev, 2294 pci_dma_sync_single_for_cpu(np->pci_dev,
diff --git a/drivers/net/netx-eth.c b/drivers/net/netx-eth.c
index a53644f6a29b..2b8da0a54998 100644
--- a/drivers/net/netx-eth.c
+++ b/drivers/net/netx-eth.c
@@ -168,7 +168,6 @@ static void netx_eth_receive(struct net_device *ndev)
168 FIFO_PTR_SEGMENT(seg) | FIFO_PTR_FRAMENO(frameno)); 168 FIFO_PTR_SEGMENT(seg) | FIFO_PTR_FRAMENO(frameno));
169 169
170 ndev->last_rx = jiffies; 170 ndev->last_rx = jiffies;
171 skb->dev = ndev;
172 skb->protocol = eth_type_trans(skb, ndev); 171 skb->protocol = eth_type_trans(skb, ndev);
173 netif_rx(skb); 172 netif_rx(skb);
174 priv->stats.rx_packets++; 173 priv->stats.rx_packets++;
diff --git a/drivers/net/netxen/netxen_nic_hw.c b/drivers/net/netxen/netxen_nic_hw.c
index 6537574a9cda..0fba8f190762 100644
--- a/drivers/net/netxen/netxen_nic_hw.c
+++ b/drivers/net/netxen/netxen_nic_hw.c
@@ -35,6 +35,8 @@
35#include "netxen_nic_hw.h" 35#include "netxen_nic_hw.h"
36#include "netxen_nic_phan_reg.h" 36#include "netxen_nic_phan_reg.h"
37 37
38#include <net/ip.h>
39
38/* PCI Windowing for DDR regions. */ 40/* PCI Windowing for DDR regions. */
39 41
40#define ADDR_IN_RANGE(addr, low, high) \ 42#define ADDR_IN_RANGE(addr, low, high) \
@@ -371,22 +373,21 @@ void netxen_tso_check(struct netxen_adapter *adapter,
371 struct cmd_desc_type0 *desc, struct sk_buff *skb) 373 struct cmd_desc_type0 *desc, struct sk_buff *skb)
372{ 374{
373 if (desc->mss) { 375 if (desc->mss) {
374 desc->total_hdr_length = sizeof(struct ethhdr) + 376 desc->total_hdr_length = (sizeof(struct ethhdr) +
375 ((skb->nh.iph)->ihl * sizeof(u32)) + 377 ip_hdrlen(skb) + tcp_hdrlen(skb));
376 ((skb->h.th)->doff * sizeof(u32));
377 netxen_set_cmd_desc_opcode(desc, TX_TCP_LSO); 378 netxen_set_cmd_desc_opcode(desc, TX_TCP_LSO);
378 } else if (skb->ip_summed == CHECKSUM_PARTIAL) { 379 } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
379 if (skb->nh.iph->protocol == IPPROTO_TCP) { 380 if (ip_hdr(skb)->protocol == IPPROTO_TCP) {
380 netxen_set_cmd_desc_opcode(desc, TX_TCP_PKT); 381 netxen_set_cmd_desc_opcode(desc, TX_TCP_PKT);
381 } else if (skb->nh.iph->protocol == IPPROTO_UDP) { 382 } else if (ip_hdr(skb)->protocol == IPPROTO_UDP) {
382 netxen_set_cmd_desc_opcode(desc, TX_UDP_PKT); 383 netxen_set_cmd_desc_opcode(desc, TX_UDP_PKT);
383 } else { 384 } else {
384 return; 385 return;
385 } 386 }
386 } 387 }
387 adapter->stats.xmitcsummed++; 388 adapter->stats.xmitcsummed++;
388 desc->tcp_hdr_offset = skb->h.raw - skb->data; 389 desc->tcp_hdr_offset = skb_transport_offset(skb);
389 desc->ip_hdr_offset = skb->nh.raw - skb->data; 390 desc->ip_hdr_offset = skb_network_offset(skb);
390} 391}
391 392
392int netxen_is_flash_supported(struct netxen_adapter *adapter) 393int netxen_is_flash_supported(struct netxen_adapter *adapter)
diff --git a/drivers/net/netxen/netxen_nic_init.c b/drivers/net/netxen/netxen_nic_init.c
index eff965dc5fff..5cd40562da7c 100644
--- a/drivers/net/netxen/netxen_nic_init.c
+++ b/drivers/net/netxen/netxen_nic_init.c
@@ -1129,7 +1129,6 @@ netxen_process_rcv(struct netxen_adapter *adapter, int ctxid,
1129 port->stats.csummed++; 1129 port->stats.csummed++;
1130 skb->ip_summed = CHECKSUM_UNNECESSARY; 1130 skb->ip_summed = CHECKSUM_UNNECESSARY;
1131 } 1131 }
1132 skb->dev = netdev;
1133 if (desc_ctx == RCV_DESC_LRO_CTXID) { 1132 if (desc_ctx == RCV_DESC_LRO_CTXID) {
1134 /* True length was only available on the last pkt */ 1133 /* True length was only available on the last pkt */
1135 skb_put(skb, buffer->lro_length); 1134 skb_put(skb, buffer->lro_length);
diff --git a/drivers/net/netxen/netxen_nic_main.c b/drivers/net/netxen/netxen_nic_main.c
index 7d2525e76abb..ab25c225a07e 100644
--- a/drivers/net/netxen/netxen_nic_main.c
+++ b/drivers/net/netxen/netxen_nic_main.c
@@ -41,6 +41,7 @@
41 41
42#include <linux/dma-mapping.h> 42#include <linux/dma-mapping.h>
43#include <linux/vmalloc.h> 43#include <linux/vmalloc.h>
44#include <net/ip.h>
44 45
45MODULE_DESCRIPTION("NetXen Multi port (1/10) Gigabit Network Driver"); 46MODULE_DESCRIPTION("NetXen Multi port (1/10) Gigabit Network Driver");
46MODULE_LICENSE("GPL"); 47MODULE_LICENSE("GPL");
@@ -778,9 +779,8 @@ static int netxen_nic_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
778 if (skb_shinfo(skb)->gso_size > 0) { 779 if (skb_shinfo(skb)->gso_size > 0) {
779 780
780 no_of_desc++; 781 no_of_desc++;
781 if (((skb->nh.iph)->ihl * sizeof(u32)) + 782 if ((ip_hdrlen(skb) + tcp_hdrlen(skb) +
782 ((skb->h.th)->doff * sizeof(u32)) + 783 sizeof(struct ethhdr)) >
783 sizeof(struct ethhdr) >
784 (sizeof(struct cmd_desc_type0) - 2)) { 784 (sizeof(struct cmd_desc_type0) - 2)) {
785 no_of_desc++; 785 no_of_desc++;
786 } 786 }
@@ -920,8 +920,10 @@ static int netxen_nic_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
920 /* copy the next 64 bytes - should be enough except 920 /* copy the next 64 bytes - should be enough except
921 * for pathological case 921 * for pathological case
922 */ 922 */
923 memcpy((void *)hwdesc, (void *)(skb->data) + 923 skb_copy_from_linear_data_offset(skb, first_hdr_len,
924 first_hdr_len, hdr_len - first_hdr_len); 924 hwdesc,
925 (hdr_len -
926 first_hdr_len));
925 producer = get_next_index(producer, max_tx_desc_count); 927 producer = get_next_index(producer, max_tx_desc_count);
926 } 928 }
927 } 929 }
diff --git a/drivers/net/ni5010.c b/drivers/net/ni5010.c
index 8be0d030d6f4..3d5b4232f65f 100644
--- a/drivers/net/ni5010.c
+++ b/drivers/net/ni5010.c
@@ -562,7 +562,6 @@ static void ni5010_rx(struct net_device *dev)
562 return; 562 return;
563 } 563 }
564 564
565 skb->dev = dev;
566 skb_reserve(skb, 2); 565 skb_reserve(skb, 2);
567 566
568 /* Read packet into buffer */ 567 /* Read packet into buffer */
diff --git a/drivers/net/ni52.c b/drivers/net/ni52.c
index a6f4b24b0176..8dbd6d1900b5 100644
--- a/drivers/net/ni52.c
+++ b/drivers/net/ni52.c
@@ -934,7 +934,6 @@ static void ni52_rcv_int(struct net_device *dev)
934 skb = (struct sk_buff *) dev_alloc_skb(totlen+2); 934 skb = (struct sk_buff *) dev_alloc_skb(totlen+2);
935 if(skb != NULL) 935 if(skb != NULL)
936 { 936 {
937 skb->dev = dev;
938 skb_reserve(skb,2); 937 skb_reserve(skb,2);
939 skb_put(skb,totlen); 938 skb_put(skb,totlen);
940 eth_copy_and_sum(skb,(char *) p->base+(unsigned long) rbd->buffer,totlen,0); 939 eth_copy_and_sum(skb,(char *) p->base+(unsigned long) rbd->buffer,totlen,0);
@@ -1183,7 +1182,7 @@ static int ni52_send_packet(struct sk_buff *skb, struct net_device *dev)
1183 else 1182 else
1184#endif 1183#endif
1185 { 1184 {
1186 memcpy((char *)p->xmit_cbuffs[p->xmit_count],(char *)(skb->data),skb->len); 1185 skb_copy_from_linear_data(skb, (char *) p->xmit_cbuffs[p->xmit_count], skb->len);
1187 len = skb->len; 1186 len = skb->len;
1188 if (len < ETH_ZLEN) { 1187 if (len < ETH_ZLEN) {
1189 len = ETH_ZLEN; 1188 len = ETH_ZLEN;
diff --git a/drivers/net/ni65.c b/drivers/net/ni65.c
index 1578f4d98498..3818edf0ac18 100644
--- a/drivers/net/ni65.c
+++ b/drivers/net/ni65.c
@@ -610,7 +610,6 @@ static void *ni65_alloc_mem(struct net_device *dev,char *what,int size,int type)
610 printk(KERN_WARNING "%s: unable to allocate %s memory.\n",dev->name,what); 610 printk(KERN_WARNING "%s: unable to allocate %s memory.\n",dev->name,what);
611 return NULL; 611 return NULL;
612 } 612 }
613 skb->dev = dev;
614 skb_reserve(skb,2+16); 613 skb_reserve(skb,2+16);
615 skb_put(skb,R_BUF_SIZE); /* grab the whole space .. (not necessary) */ 614 skb_put(skb,R_BUF_SIZE); /* grab the whole space .. (not necessary) */
616 ptr = skb->data; 615 ptr = skb->data;
@@ -1094,7 +1093,6 @@ static void ni65_recv_intr(struct net_device *dev,int csr0)
1094 if(skb) 1093 if(skb)
1095 { 1094 {
1096 skb_reserve(skb,2); 1095 skb_reserve(skb,2);
1097 skb->dev = dev;
1098#ifdef RCV_VIA_SKB 1096#ifdef RCV_VIA_SKB
1099 if( (unsigned long) (skb->data + R_BUF_SIZE) > 0x1000000) { 1097 if( (unsigned long) (skb->data + R_BUF_SIZE) > 0x1000000) {
1100 skb_put(skb,len); 1098 skb_put(skb,len);
@@ -1178,8 +1176,9 @@ static int ni65_send_packet(struct sk_buff *skb, struct net_device *dev)
1178 if( (unsigned long) (skb->data + skb->len) > 0x1000000) { 1176 if( (unsigned long) (skb->data + skb->len) > 0x1000000) {
1179#endif 1177#endif
1180 1178
1181 memcpy((char *) p->tmdbounce[p->tmdbouncenum] ,(char *)skb->data, 1179 skb_copy_from_linear_data(skb, p->tmdbounce[p->tmdbouncenum],
1182 (skb->len > T_BUF_SIZE) ? T_BUF_SIZE : skb->len); 1180 skb->len > T_BUF_SIZE ? T_BUF_SIZE :
1181 skb->len);
1183 if (len > skb->len) 1182 if (len > skb->len)
1184 memset((char *)p->tmdbounce[p->tmdbouncenum]+skb->len, 0, len-skb->len); 1183 memset((char *)p->tmdbounce[p->tmdbouncenum]+skb->len, 0, len-skb->len);
1185 dev_kfree_skb (skb); 1184 dev_kfree_skb (skb);
diff --git a/drivers/net/ns83820.c b/drivers/net/ns83820.c
index 9ec6e9e54f47..6a32338623f1 100644
--- a/drivers/net/ns83820.c
+++ b/drivers/net/ns83820.c
@@ -607,7 +607,6 @@ static inline int rx_refill(struct net_device *ndev, gfp_t gfp)
607 res &= 0xf; 607 res &= 0xf;
608 skb_reserve(skb, res); 608 skb_reserve(skb, res);
609 609
610 skb->dev = ndev;
611 if (gfp != GFP_ATOMIC) 610 if (gfp != GFP_ATOMIC)
612 spin_lock_irqsave(&dev->rx_info.lock, flags); 611 spin_lock_irqsave(&dev->rx_info.lock, flags);
613 res = ns83820_add_rx_skb(dev, skb); 612 res = ns83820_add_rx_skb(dev, skb);
@@ -1157,9 +1156,9 @@ again:
1157 extsts = 0; 1156 extsts = 0;
1158 if (skb->ip_summed == CHECKSUM_PARTIAL) { 1157 if (skb->ip_summed == CHECKSUM_PARTIAL) {
1159 extsts |= EXTSTS_IPPKT; 1158 extsts |= EXTSTS_IPPKT;
1160 if (IPPROTO_TCP == skb->nh.iph->protocol) 1159 if (IPPROTO_TCP == ip_hdr(skb)->protocol)
1161 extsts |= EXTSTS_TCPPKT; 1160 extsts |= EXTSTS_TCPPKT;
1162 else if (IPPROTO_UDP == skb->nh.iph->protocol) 1161 else if (IPPROTO_UDP == ip_hdr(skb)->protocol)
1163 extsts |= EXTSTS_UDPPKT; 1162 extsts |= EXTSTS_UDPPKT;
1164 } 1163 }
1165 1164
diff --git a/drivers/net/pasemi_mac.c b/drivers/net/pasemi_mac.c
index d670ac74824f..76fe9dd8e841 100644
--- a/drivers/net/pasemi_mac.c
+++ b/drivers/net/pasemi_mac.c
@@ -334,8 +334,6 @@ static void pasemi_mac_replenish_rx_ring(struct net_device *dev)
334 break; 334 break;
335 } 335 }
336 336
337 skb->dev = dev;
338
339 dma = pci_map_single(mac->dma_pdev, skb->data, skb->len, 337 dma = pci_map_single(mac->dma_pdev, skb->data, skb->len,
340 PCI_DMA_FROMDEVICE); 338 PCI_DMA_FROMDEVICE);
341 339
@@ -731,16 +729,18 @@ static int pasemi_mac_start_tx(struct sk_buff *skb, struct net_device *dev)
731 dflags = XCT_MACTX_O | XCT_MACTX_ST | XCT_MACTX_SS | XCT_MACTX_CRC_PAD; 729 dflags = XCT_MACTX_O | XCT_MACTX_ST | XCT_MACTX_SS | XCT_MACTX_CRC_PAD;
732 730
733 if (skb->ip_summed == CHECKSUM_PARTIAL) { 731 if (skb->ip_summed == CHECKSUM_PARTIAL) {
734 switch (skb->nh.iph->protocol) { 732 const unsigned char *nh = skb_network_header(skb);
733
734 switch (ip_hdr(skb)->protocol) {
735 case IPPROTO_TCP: 735 case IPPROTO_TCP:
736 dflags |= XCT_MACTX_CSUM_TCP; 736 dflags |= XCT_MACTX_CSUM_TCP;
737 dflags |= XCT_MACTX_IPH((skb->h.raw - skb->nh.raw) >> 2); 737 dflags |= XCT_MACTX_IPH(skb_network_header_len(skb) >> 2);
738 dflags |= XCT_MACTX_IPO(skb->nh.raw - skb->data); 738 dflags |= XCT_MACTX_IPO(nh - skb->data);
739 break; 739 break;
740 case IPPROTO_UDP: 740 case IPPROTO_UDP:
741 dflags |= XCT_MACTX_CSUM_UDP; 741 dflags |= XCT_MACTX_CSUM_UDP;
742 dflags |= XCT_MACTX_IPH((skb->h.raw - skb->nh.raw) >> 2); 742 dflags |= XCT_MACTX_IPH(skb_network_header_len(skb) >> 2);
743 dflags |= XCT_MACTX_IPO(skb->nh.raw - skb->data); 743 dflags |= XCT_MACTX_IPO(nh - skb->data);
744 break; 744 break;
745 } 745 }
746 } 746 }
diff --git a/drivers/net/pci-skeleton.c b/drivers/net/pci-skeleton.c
index 6ca4e4fa6b88..df8998b4f37e 100644
--- a/drivers/net/pci-skeleton.c
+++ b/drivers/net/pci-skeleton.c
@@ -1344,7 +1344,7 @@ static int netdrv_start_xmit (struct sk_buff *skb, struct net_device *dev)
1344 1344
1345 tp->tx_info[entry].skb = skb; 1345 tp->tx_info[entry].skb = skb;
1346 /* tp->tx_info[entry].mapping = 0; */ 1346 /* tp->tx_info[entry].mapping = 0; */
1347 memcpy (tp->tx_buf[entry], skb->data, skb->len); 1347 skb_copy_from_linear_data(skb, tp->tx_buf[entry], skb->len);
1348 1348
1349 /* Note: the chip doesn't have auto-pad! */ 1349 /* Note: the chip doesn't have auto-pad! */
1350 NETDRV_W32 (TxStatus0 + (entry * sizeof(u32)), 1350 NETDRV_W32 (TxStatus0 + (entry * sizeof(u32)),
@@ -1565,7 +1565,6 @@ static void netdrv_rx_interrupt (struct net_device *dev,
1565 1565
1566 skb = dev_alloc_skb (pkt_size + 2); 1566 skb = dev_alloc_skb (pkt_size + 2);
1567 if (skb) { 1567 if (skb) {
1568 skb->dev = dev;
1569 skb_reserve (skb, 2); /* 16 byte align the IP fields. */ 1568 skb_reserve (skb, 2); /* 16 byte align the IP fields. */
1570 1569
1571 eth_copy_and_sum (skb, &rx_ring[ring_offset + 4], pkt_size, 0); 1570 eth_copy_and_sum (skb, &rx_ring[ring_offset + 4], pkt_size, 0);
diff --git a/drivers/net/pcmcia/3c574_cs.c b/drivers/net/pcmcia/3c574_cs.c
index c7bd9c1c7f31..2b395ee21f75 100644
--- a/drivers/net/pcmcia/3c574_cs.c
+++ b/drivers/net/pcmcia/3c574_cs.c
@@ -1056,7 +1056,6 @@ static int el3_rx(struct net_device *dev, int worklimit)
1056 DEBUG(3, " Receiving packet size %d status %4.4x.\n", 1056 DEBUG(3, " Receiving packet size %d status %4.4x.\n",
1057 pkt_len, rx_status); 1057 pkt_len, rx_status);
1058 if (skb != NULL) { 1058 if (skb != NULL) {
1059 skb->dev = dev;
1060 skb_reserve(skb, 2); 1059 skb_reserve(skb, 2);
1061 insl(ioaddr+RX_FIFO, skb_put(skb, pkt_len), 1060 insl(ioaddr+RX_FIFO, skb_put(skb, pkt_len),
1062 ((pkt_len+3)>>2)); 1061 ((pkt_len+3)>>2));
diff --git a/drivers/net/pcmcia/3c589_cs.c b/drivers/net/pcmcia/3c589_cs.c
index 461e8274ef69..143ae2ff309e 100644
--- a/drivers/net/pcmcia/3c589_cs.c
+++ b/drivers/net/pcmcia/3c589_cs.c
@@ -883,7 +883,6 @@ static int el3_rx(struct net_device *dev)
883 DEBUG(3, " Receiving packet size %d status %4.4x.\n", 883 DEBUG(3, " Receiving packet size %d status %4.4x.\n",
884 pkt_len, rx_status); 884 pkt_len, rx_status);
885 if (skb != NULL) { 885 if (skb != NULL) {
886 skb->dev = dev;
887 skb_reserve(skb, 2); 886 skb_reserve(skb, 2);
888 insl(ioaddr+RX_FIFO, skb_put(skb, pkt_len), 887 insl(ioaddr+RX_FIFO, skb_put(skb, pkt_len),
889 (pkt_len+3)>>2); 888 (pkt_len+3)>>2);
diff --git a/drivers/net/pcmcia/axnet_cs.c b/drivers/net/pcmcia/axnet_cs.c
index 6139048f8117..808fae1577e0 100644
--- a/drivers/net/pcmcia/axnet_cs.c
+++ b/drivers/net/pcmcia/axnet_cs.c
@@ -1136,7 +1136,7 @@ static int ei_start_xmit(struct sk_buff *skb, struct net_device *dev)
1136 ei_block_output(dev, length, skb->data, output_page); 1136 ei_block_output(dev, length, skb->data, output_page);
1137 else { 1137 else {
1138 memset(packet, 0, ETH_ZLEN); 1138 memset(packet, 0, ETH_ZLEN);
1139 memcpy(packet, skb->data, skb->len); 1139 skb_copy_from_linear_data(skb, packet, skb->len);
1140 ei_block_output(dev, length, packet, output_page); 1140 ei_block_output(dev, length, packet, output_page);
1141 } 1141 }
1142 1142
@@ -1496,7 +1496,6 @@ static void ei_receive(struct net_device *dev)
1496 else 1496 else
1497 { 1497 {
1498 skb_reserve(skb,2); /* IP headers on 16 byte boundaries */ 1498 skb_reserve(skb,2); /* IP headers on 16 byte boundaries */
1499 skb->dev = dev;
1500 skb_put(skb, pkt_len); /* Make room */ 1499 skb_put(skb, pkt_len); /* Make room */
1501 ei_block_input(dev, pkt_len, skb, current_offset + sizeof(rx_frame)); 1500 ei_block_input(dev, pkt_len, skb, current_offset + sizeof(rx_frame));
1502 skb->protocol=eth_type_trans(skb,dev); 1501 skb->protocol=eth_type_trans(skb,dev);
diff --git a/drivers/net/pcmcia/fmvj18x_cs.c b/drivers/net/pcmcia/fmvj18x_cs.c
index 0d7de617e535..3f93d4933235 100644
--- a/drivers/net/pcmcia/fmvj18x_cs.c
+++ b/drivers/net/pcmcia/fmvj18x_cs.c
@@ -999,7 +999,6 @@ static void fjn_rx(struct net_device *dev)
999 lp->stats.rx_dropped++; 999 lp->stats.rx_dropped++;
1000 break; 1000 break;
1001 } 1001 }
1002 skb->dev = dev;
1003 1002
1004 skb_reserve(skb, 2); 1003 skb_reserve(skb, 2);
1005 insw(ioaddr + DATAPORT, skb_put(skb, pkt_len), 1004 insw(ioaddr + DATAPORT, skb_put(skb, pkt_len),
diff --git a/drivers/net/pcmcia/nmclan_cs.c b/drivers/net/pcmcia/nmclan_cs.c
index 3b707747a811..73da611fd536 100644
--- a/drivers/net/pcmcia/nmclan_cs.c
+++ b/drivers/net/pcmcia/nmclan_cs.c
@@ -1182,12 +1182,10 @@ static int mace_rx(struct net_device *dev, unsigned char RxCnt)
1182 skb = dev_alloc_skb(pkt_len+2); 1182 skb = dev_alloc_skb(pkt_len+2);
1183 1183
1184 if (skb != NULL) { 1184 if (skb != NULL) {
1185 skb->dev = dev;
1186
1187 skb_reserve(skb, 2); 1185 skb_reserve(skb, 2);
1188 insw(ioaddr + AM2150_RCV, skb_put(skb, pkt_len), pkt_len>>1); 1186 insw(ioaddr + AM2150_RCV, skb_put(skb, pkt_len), pkt_len>>1);
1189 if (pkt_len & 1) 1187 if (pkt_len & 1)
1190 *(skb->tail-1) = inb(ioaddr + AM2150_RCV); 1188 *(skb_tail_pointer(skb) - 1) = inb(ioaddr + AM2150_RCV);
1191 skb->protocol = eth_type_trans(skb, dev); 1189 skb->protocol = eth_type_trans(skb, dev);
1192 1190
1193 netif_rx(skb); /* Send the packet to the upper (protocol) layers. */ 1191 netif_rx(skb); /* Send the packet to the upper (protocol) layers. */
diff --git a/drivers/net/pcmcia/smc91c92_cs.c b/drivers/net/pcmcia/smc91c92_cs.c
index 2561f76033ea..7912dbd14251 100644
--- a/drivers/net/pcmcia/smc91c92_cs.c
+++ b/drivers/net/pcmcia/smc91c92_cs.c
@@ -1669,7 +1669,6 @@ static void smc_rx(struct net_device *dev)
1669 (packet_length+1)>>1); 1669 (packet_length+1)>>1);
1670 skb->protocol = eth_type_trans(skb, dev); 1670 skb->protocol = eth_type_trans(skb, dev);
1671 1671
1672 skb->dev = dev;
1673 netif_rx(skb); 1672 netif_rx(skb);
1674 dev->last_rx = jiffies; 1673 dev->last_rx = jiffies;
1675 smc->stats.rx_packets++; 1674 smc->stats.rx_packets++;
diff --git a/drivers/net/pcmcia/xirc2ps_cs.c b/drivers/net/pcmcia/xirc2ps_cs.c
index 5879e7c36988..809ec440b8eb 100644
--- a/drivers/net/pcmcia/xirc2ps_cs.c
+++ b/drivers/net/pcmcia/xirc2ps_cs.c
@@ -1226,7 +1226,6 @@ xirc2ps_interrupt(int irq, void *dev_id)
1226 (pktlen+1)>>1); 1226 (pktlen+1)>>1);
1227 } 1227 }
1228 skb->protocol = eth_type_trans(skb, dev); 1228 skb->protocol = eth_type_trans(skb, dev);
1229 skb->dev = dev;
1230 netif_rx(skb); 1229 netif_rx(skb);
1231 dev->last_rx = jiffies; 1230 dev->last_rx = jiffies;
1232 lp->stats.rx_packets++; 1231 lp->stats.rx_packets++;
diff --git a/drivers/net/pcnet32.c b/drivers/net/pcnet32.c
index 4d94ba7899bf..0791360a6a66 100644
--- a/drivers/net/pcnet32.c
+++ b/drivers/net/pcnet32.c
@@ -1206,7 +1206,6 @@ static void pcnet32_rx_entry(struct net_device *dev,
1206 PCI_DMA_FROMDEVICE); 1206 PCI_DMA_FROMDEVICE);
1207 skb_put(skb, pkt_len); 1207 skb_put(skb, pkt_len);
1208 lp->rx_skbuff[entry] = newskb; 1208 lp->rx_skbuff[entry] = newskb;
1209 newskb->dev = dev;
1210 lp->rx_dma_addr[entry] = 1209 lp->rx_dma_addr[entry] =
1211 pci_map_single(lp->pci_dev, 1210 pci_map_single(lp->pci_dev,
1212 newskb->data, 1211 newskb->data,
diff --git a/drivers/net/plip.c b/drivers/net/plip.c
index 6bb085f54437..8754cf3356b0 100644
--- a/drivers/net/plip.c
+++ b/drivers/net/plip.c
@@ -546,7 +546,7 @@ static __be16 plip_type_trans(struct sk_buff *skb, struct net_device *dev)
546 struct ethhdr *eth; 546 struct ethhdr *eth;
547 unsigned char *rawp; 547 unsigned char *rawp;
548 548
549 skb->mac.raw=skb->data; 549 skb_reset_mac_header(skb);
550 skb_pull(skb,dev->hard_header_len); 550 skb_pull(skb,dev->hard_header_len);
551 eth = eth_hdr(skb); 551 eth = eth_hdr(skb);
552 552
diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c
index ef58e4128782..6d596ca50cfd 100644
--- a/drivers/net/ppp_generic.c
+++ b/drivers/net/ppp_generic.c
@@ -88,8 +88,6 @@ struct ppp_file {
88#define PF_TO_PPP(pf) PF_TO_X(pf, struct ppp) 88#define PF_TO_PPP(pf) PF_TO_X(pf, struct ppp)
89#define PF_TO_CHANNEL(pf) PF_TO_X(pf, struct channel) 89#define PF_TO_CHANNEL(pf) PF_TO_X(pf, struct channel)
90 90
91#define ROUNDUP(n, x) (((n) + (x) - 1) / (x))
92
93/* 91/*
94 * Data structure describing one ppp unit. 92 * Data structure describing one ppp unit.
95 * A ppp unit corresponds to a ppp network interface device 93 * A ppp unit corresponds to a ppp network interface device
@@ -1297,7 +1295,7 @@ static int ppp_mp_explode(struct ppp *ppp, struct sk_buff *skb)
1297 */ 1295 */
1298 fragsize = len; 1296 fragsize = len;
1299 if (nfree > 1) 1297 if (nfree > 1)
1300 fragsize = ROUNDUP(fragsize, nfree); 1298 fragsize = DIV_ROUND_UP(fragsize, nfree);
1301 /* nbigger channels get fragsize bytes, the rest get fragsize-1, 1299 /* nbigger channels get fragsize bytes, the rest get fragsize-1,
1302 except if nbigger==0, then they all get fragsize. */ 1300 except if nbigger==0, then they all get fragsize. */
1303 nbigger = len % nfree; 1301 nbigger = len % nfree;
@@ -1685,7 +1683,7 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb)
1685 skb_pull_rcsum(skb, 2); 1683 skb_pull_rcsum(skb, 2);
1686 skb->dev = ppp->dev; 1684 skb->dev = ppp->dev;
1687 skb->protocol = htons(npindex_to_ethertype[npi]); 1685 skb->protocol = htons(npindex_to_ethertype[npi]);
1688 skb->mac.raw = skb->data; 1686 skb_reset_mac_header(skb);
1689 netif_rx(skb); 1687 netif_rx(skb);
1690 ppp->dev->last_rx = jiffies; 1688 ppp->dev->last_rx = jiffies;
1691 } 1689 }
diff --git a/drivers/net/ppp_synctty.c b/drivers/net/ppp_synctty.c
index b6f0e9a25e26..5918fab38349 100644
--- a/drivers/net/ppp_synctty.c
+++ b/drivers/net/ppp_synctty.c
@@ -594,7 +594,8 @@ ppp_sync_txmunge(struct syncppp *ap, struct sk_buff *skb)
594 return NULL; 594 return NULL;
595 } 595 }
596 skb_reserve(npkt,2); 596 skb_reserve(npkt,2);
597 memcpy(skb_put(npkt,skb->len), skb->data, skb->len); 597 skb_copy_from_linear_data(skb,
598 skb_put(npkt, skb->len), skb->len);
598 kfree_skb(skb); 599 kfree_skb(skb);
599 skb = npkt; 600 skb = npkt;
600 } 601 }
diff --git a/drivers/net/pppoe.c b/drivers/net/pppoe.c
index ebfa2967cd68..6f98834e6ace 100644
--- a/drivers/net/pppoe.c
+++ b/drivers/net/pppoe.c
@@ -207,7 +207,7 @@ static inline struct pppox_sock *get_item(unsigned long sid,
207 207
208static inline struct pppox_sock *get_item_by_addr(struct sockaddr_pppox *sp) 208static inline struct pppox_sock *get_item_by_addr(struct sockaddr_pppox *sp)
209{ 209{
210 struct net_device *dev = NULL; 210 struct net_device *dev;
211 int ifindex; 211 int ifindex;
212 212
213 dev = dev_get_by_name(sp->sa_addr.pppoe.dev); 213 dev = dev_get_by_name(sp->sa_addr.pppoe.dev);
@@ -218,20 +218,6 @@ static inline struct pppox_sock *get_item_by_addr(struct sockaddr_pppox *sp)
218 return get_item(sp->sa_addr.pppoe.sid, sp->sa_addr.pppoe.remote, ifindex); 218 return get_item(sp->sa_addr.pppoe.sid, sp->sa_addr.pppoe.remote, ifindex);
219} 219}
220 220
221static inline int set_item(struct pppox_sock *po)
222{
223 int i;
224
225 if (!po)
226 return -EINVAL;
227
228 write_lock_bh(&pppoe_hash_lock);
229 i = __set_item(po);
230 write_unlock_bh(&pppoe_hash_lock);
231
232 return i;
233}
234
235static inline struct pppox_sock *delete_item(unsigned long sid, char *addr, int ifindex) 221static inline struct pppox_sock *delete_item(unsigned long sid, char *addr, int ifindex)
236{ 222{
237 struct pppox_sock *ret; 223 struct pppox_sock *ret;
@@ -255,54 +241,53 @@ static inline struct pppox_sock *delete_item(unsigned long sid, char *addr, int
255static void pppoe_flush_dev(struct net_device *dev) 241static void pppoe_flush_dev(struct net_device *dev)
256{ 242{
257 int hash; 243 int hash;
258
259 BUG_ON(dev == NULL); 244 BUG_ON(dev == NULL);
260 245
261 read_lock_bh(&pppoe_hash_lock); 246 write_lock_bh(&pppoe_hash_lock);
262 for (hash = 0; hash < PPPOE_HASH_SIZE; hash++) { 247 for (hash = 0; hash < PPPOE_HASH_SIZE; hash++) {
263 struct pppox_sock *po = item_hash_table[hash]; 248 struct pppox_sock *po = item_hash_table[hash];
264 249
265 while (po != NULL) { 250 while (po != NULL) {
266 if (po->pppoe_dev == dev) { 251 struct sock *sk = sk_pppox(po);
267 struct sock *sk = sk_pppox(po); 252 if (po->pppoe_dev != dev) {
268 253 po = po->next;
269 sock_hold(sk); 254 continue;
270 po->pppoe_dev = NULL; 255 }
256 po->pppoe_dev = NULL;
257 dev_put(dev);
271 258
272 /* We hold a reference to SK, now drop the
273 * hash table lock so that we may attempt
274 * to lock the socket (which can sleep).
275 */
276 read_unlock_bh(&pppoe_hash_lock);
277 259
278 lock_sock(sk); 260 /* We always grab the socket lock, followed by the
261 * pppoe_hash_lock, in that order. Since we should
262 * hold the sock lock while doing any unbinding,
263 * we need to release the lock we're holding.
264 * Hold a reference to the sock so it doesn't disappear
265 * as we're jumping between locks.
266 */
279 267
280 if (sk->sk_state & 268 sock_hold(sk);
281 (PPPOX_CONNECTED | PPPOX_BOUND)) {
282 pppox_unbind_sock(sk);
283 dev_put(dev);
284 sk->sk_state = PPPOX_ZOMBIE;
285 sk->sk_state_change(sk);
286 }
287 269
288 release_sock(sk); 270 write_unlock_bh(&pppoe_hash_lock);
271 lock_sock(sk);
289 272
290 sock_put(sk); 273 if (sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND)) {
274 pppox_unbind_sock(sk);
275 sk->sk_state = PPPOX_ZOMBIE;
276 sk->sk_state_change(sk);
277 }
291 278
292 read_lock_bh(&pppoe_hash_lock); 279 release_sock(sk);
280 sock_put(sk);
293 281
294 /* Now restart from the beginning of this 282 /* Restart scan at the beginning of this hash chain.
295 * hash chain. We always NULL out pppoe_dev 283 * While the lock was dropped the chain contents may
296 * so we are guaranteed to make forward 284 * have changed.
297 * progress. 285 */
298 */ 286 write_lock_bh(&pppoe_hash_lock);
299 po = item_hash_table[hash]; 287 po = item_hash_table[hash];
300 continue;
301 }
302 po = po->next;
303 } 288 }
304 } 289 }
305 read_unlock_bh(&pppoe_hash_lock); 290 write_unlock_bh(&pppoe_hash_lock);
306} 291}
307 292
308static int pppoe_device_event(struct notifier_block *this, 293static int pppoe_device_event(struct notifier_block *this,
@@ -344,10 +329,10 @@ static struct notifier_block pppoe_notifier = {
344static int pppoe_rcv_core(struct sock *sk, struct sk_buff *skb) 329static int pppoe_rcv_core(struct sock *sk, struct sk_buff *skb)
345{ 330{
346 struct pppox_sock *po = pppox_sk(sk); 331 struct pppox_sock *po = pppox_sk(sk);
347 struct pppox_sock *relay_po = NULL; 332 struct pppox_sock *relay_po;
348 333
349 if (sk->sk_state & PPPOX_BOUND) { 334 if (sk->sk_state & PPPOX_BOUND) {
350 struct pppoe_hdr *ph = (struct pppoe_hdr *) skb->nh.raw; 335 struct pppoe_hdr *ph = pppoe_hdr(skb);
351 int len = ntohs(ph->length); 336 int len = ntohs(ph->length);
352 skb_pull_rcsum(skb, sizeof(struct pppoe_hdr)); 337 skb_pull_rcsum(skb, sizeof(struct pppoe_hdr));
353 if (pskb_trim_rcsum(skb, len)) 338 if (pskb_trim_rcsum(skb, len))
@@ -401,7 +386,7 @@ static int pppoe_rcv(struct sk_buff *skb,
401 if (!(skb = skb_share_check(skb, GFP_ATOMIC))) 386 if (!(skb = skb_share_check(skb, GFP_ATOMIC)))
402 goto out; 387 goto out;
403 388
404 ph = (struct pppoe_hdr *) skb->nh.raw; 389 ph = pppoe_hdr(skb);
405 390
406 po = get_item((unsigned long) ph->sid, eth_hdr(skb)->h_source, dev->ifindex); 391 po = get_item((unsigned long) ph->sid, eth_hdr(skb)->h_source, dev->ifindex);
407 if (po != NULL) 392 if (po != NULL)
@@ -433,7 +418,7 @@ static int pppoe_disc_rcv(struct sk_buff *skb,
433 if (!(skb = skb_share_check(skb, GFP_ATOMIC))) 418 if (!(skb = skb_share_check(skb, GFP_ATOMIC)))
434 goto out; 419 goto out;
435 420
436 ph = (struct pppoe_hdr *) skb->nh.raw; 421 ph = pppoe_hdr(skb);
437 if (ph->code != PADT_CODE) 422 if (ph->code != PADT_CODE)
438 goto abort; 423 goto abort;
439 424
@@ -514,36 +499,49 @@ static int pppoe_release(struct socket *sock)
514{ 499{
515 struct sock *sk = sock->sk; 500 struct sock *sk = sock->sk;
516 struct pppox_sock *po; 501 struct pppox_sock *po;
517 int error = 0;
518 502
519 if (!sk) 503 if (!sk)
520 return 0; 504 return 0;
521 505
522 if (sock_flag(sk, SOCK_DEAD)) 506 lock_sock(sk);
507 if (sock_flag(sk, SOCK_DEAD)){
508 release_sock(sk);
523 return -EBADF; 509 return -EBADF;
510 }
524 511
525 pppox_unbind_sock(sk); 512 pppox_unbind_sock(sk);
526 513
527 /* Signal the death of the socket. */ 514 /* Signal the death of the socket. */
528 sk->sk_state = PPPOX_DEAD; 515 sk->sk_state = PPPOX_DEAD;
529 516
517
518 /* Write lock on hash lock protects the entire "po" struct from
519 * concurrent updates via pppoe_flush_dev. The "po" struct should
520 * be considered part of the hash table contents, thus protected
521 * by the hash table lock */
522 write_lock_bh(&pppoe_hash_lock);
523
530 po = pppox_sk(sk); 524 po = pppox_sk(sk);
531 if (po->pppoe_pa.sid) { 525 if (po->pppoe_pa.sid) {
532 delete_item(po->pppoe_pa.sid, po->pppoe_pa.remote, po->pppoe_ifindex); 526 __delete_item(po->pppoe_pa.sid,
527 po->pppoe_pa.remote, po->pppoe_ifindex);
533 } 528 }
534 529
535 if (po->pppoe_dev) 530 if (po->pppoe_dev) {
536 dev_put(po->pppoe_dev); 531 dev_put(po->pppoe_dev);
532 po->pppoe_dev = NULL;
533 }
537 534
538 po->pppoe_dev = NULL; 535 write_unlock_bh(&pppoe_hash_lock);
539 536
540 sock_orphan(sk); 537 sock_orphan(sk);
541 sock->sk = NULL; 538 sock->sk = NULL;
542 539
543 skb_queue_purge(&sk->sk_receive_queue); 540 skb_queue_purge(&sk->sk_receive_queue);
541 release_sock(sk);
544 sock_put(sk); 542 sock_put(sk);
545 543
546 return error; 544 return 0;
547} 545}
548 546
549 547
@@ -599,14 +597,18 @@ static int pppoe_connect(struct socket *sock, struct sockaddr *uservaddr,
599 po->pppoe_dev = dev; 597 po->pppoe_dev = dev;
600 po->pppoe_ifindex = dev->ifindex; 598 po->pppoe_ifindex = dev->ifindex;
601 599
602 if (!(dev->flags & IFF_UP)) 600 write_lock_bh(&pppoe_hash_lock);
601 if (!(dev->flags & IFF_UP)){
602 write_unlock_bh(&pppoe_hash_lock);
603 goto err_put; 603 goto err_put;
604 }
604 605
605 memcpy(&po->pppoe_pa, 606 memcpy(&po->pppoe_pa,
606 &sp->sa_addr.pppoe, 607 &sp->sa_addr.pppoe,
607 sizeof(struct pppoe_addr)); 608 sizeof(struct pppoe_addr));
608 609
609 error = set_item(po); 610 error = __set_item(po);
611 write_unlock_bh(&pppoe_hash_lock);
610 if (error < 0) 612 if (error < 0)
611 goto err_put; 613 goto err_put;
612 614
@@ -762,10 +764,10 @@ static int pppoe_ioctl(struct socket *sock, unsigned int cmd,
762static int pppoe_sendmsg(struct kiocb *iocb, struct socket *sock, 764static int pppoe_sendmsg(struct kiocb *iocb, struct socket *sock,
763 struct msghdr *m, size_t total_len) 765 struct msghdr *m, size_t total_len)
764{ 766{
765 struct sk_buff *skb = NULL; 767 struct sk_buff *skb;
766 struct sock *sk = sock->sk; 768 struct sock *sk = sock->sk;
767 struct pppox_sock *po = pppox_sk(sk); 769 struct pppox_sock *po = pppox_sk(sk);
768 int error = 0; 770 int error;
769 struct pppoe_hdr hdr; 771 struct pppoe_hdr hdr;
770 struct pppoe_hdr *ph; 772 struct pppoe_hdr *ph;
771 struct net_device *dev; 773 struct net_device *dev;
@@ -799,7 +801,7 @@ static int pppoe_sendmsg(struct kiocb *iocb, struct socket *sock,
799 801
800 /* Reserve space for headers. */ 802 /* Reserve space for headers. */
801 skb_reserve(skb, dev->hard_header_len); 803 skb_reserve(skb, dev->hard_header_len);
802 skb->nh.raw = skb->data; 804 skb_reset_network_header(skb);
803 805
804 skb->dev = dev; 806 skb->dev = dev;
805 807
@@ -869,7 +871,8 @@ static int __pppoe_xmit(struct sock *sk, struct sk_buff *skb)
869 goto abort; 871 goto abort;
870 872
871 skb_reserve(skb2, dev->hard_header_len + sizeof(struct pppoe_hdr)); 873 skb_reserve(skb2, dev->hard_header_len + sizeof(struct pppoe_hdr));
872 memcpy(skb_put(skb2, skb->len), skb->data, skb->len); 874 skb_copy_from_linear_data(skb, skb_put(skb2, skb->len),
875 skb->len);
873 } else { 876 } else {
874 /* Make a clone so as to not disturb the original skb, 877 /* Make a clone so as to not disturb the original skb,
875 * give dev_queue_xmit something it can free. 878 * give dev_queue_xmit something it can free.
@@ -884,7 +887,7 @@ static int __pppoe_xmit(struct sock *sk, struct sk_buff *skb)
884 memcpy(ph, &hdr, sizeof(struct pppoe_hdr)); 887 memcpy(ph, &hdr, sizeof(struct pppoe_hdr));
885 skb2->protocol = __constant_htons(ETH_P_PPP_SES); 888 skb2->protocol = __constant_htons(ETH_P_PPP_SES);
886 889
887 skb2->nh.raw = skb2->data; 890 skb_reset_network_header(skb2);
888 891
889 skb2->dev = dev; 892 skb2->dev = dev;
890 893
@@ -929,10 +932,8 @@ static int pppoe_recvmsg(struct kiocb *iocb, struct socket *sock,
929 struct msghdr *m, size_t total_len, int flags) 932 struct msghdr *m, size_t total_len, int flags)
930{ 933{
931 struct sock *sk = sock->sk; 934 struct sock *sk = sock->sk;
932 struct sk_buff *skb = NULL; 935 struct sk_buff *skb;
933 int error = 0; 936 int error = 0;
934 int len;
935 struct pppoe_hdr *ph = NULL;
936 937
937 if (sk->sk_state & PPPOX_BOUND) { 938 if (sk->sk_state & PPPOX_BOUND) {
938 error = -EIO; 939 error = -EIO;
@@ -942,26 +943,21 @@ static int pppoe_recvmsg(struct kiocb *iocb, struct socket *sock,
942 skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, 943 skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
943 flags & MSG_DONTWAIT, &error); 944 flags & MSG_DONTWAIT, &error);
944 945
945 if (error < 0) { 946 if (error < 0)
946 goto end; 947 goto end;
947 }
948 948
949 m->msg_namelen = 0; 949 m->msg_namelen = 0;
950 950
951 if (skb) { 951 if (skb) {
952 error = 0; 952 struct pppoe_hdr *ph = pppoe_hdr(skb);
953 ph = (struct pppoe_hdr *) skb->nh.raw; 953 const int len = ntohs(ph->length);
954 len = ntohs(ph->length);
955 954
956 error = memcpy_toiovec(m->msg_iov, (unsigned char *) &ph->tag[0], len); 955 error = memcpy_toiovec(m->msg_iov, (unsigned char *) &ph->tag[0], len);
957 if (error < 0) 956 if (error == 0)
958 goto do_skb_free; 957 error = len;
959 error = len;
960 } 958 }
961 959
962do_skb_free: 960 kfree_skb(skb);
963 if (skb)
964 kfree_skb(skb);
965end: 961end:
966 return error; 962 return error;
967} 963}
@@ -991,7 +987,7 @@ out:
991 987
992static __inline__ struct pppox_sock *pppoe_get_idx(loff_t pos) 988static __inline__ struct pppox_sock *pppoe_get_idx(loff_t pos)
993{ 989{
994 struct pppox_sock *po = NULL; 990 struct pppox_sock *po;
995 int i = 0; 991 int i = 0;
996 992
997 for (; i < PPPOE_HASH_SIZE; i++) { 993 for (; i < PPPOE_HASH_SIZE; i++) {
diff --git a/drivers/net/pppox.c b/drivers/net/pppox.c
index 9315046b3f55..3f8115db4d54 100644
--- a/drivers/net/pppox.c
+++ b/drivers/net/pppox.c
@@ -58,7 +58,7 @@ void pppox_unbind_sock(struct sock *sk)
58{ 58{
59 /* Clear connection to ppp device, if attached. */ 59 /* Clear connection to ppp device, if attached. */
60 60
61 if (sk->sk_state & (PPPOX_BOUND | PPPOX_ZOMBIE)) { 61 if (sk->sk_state & (PPPOX_BOUND | PPPOX_CONNECTED | PPPOX_ZOMBIE)) {
62 ppp_unregister_channel(&pppox_sk(sk)->chan); 62 ppp_unregister_channel(&pppox_sk(sk)->chan);
63 sk->sk_state = PPPOX_DEAD; 63 sk->sk_state = PPPOX_DEAD;
64 } 64 }
diff --git a/drivers/net/qla3xxx.c b/drivers/net/qla3xxx.c
index a8246eb2f8d9..7b80fb7a9d9b 100755
--- a/drivers/net/qla3xxx.c
+++ b/drivers/net/qla3xxx.c
@@ -1873,7 +1873,6 @@ static void ql_process_mac_rx_intr(struct ql3_adapter *qdev,
1873 pci_unmap_len(lrg_buf_cb2, maplen), 1873 pci_unmap_len(lrg_buf_cb2, maplen),
1874 PCI_DMA_FROMDEVICE); 1874 PCI_DMA_FROMDEVICE);
1875 prefetch(skb->data); 1875 prefetch(skb->data);
1876 skb->dev = qdev->ndev;
1877 skb->ip_summed = CHECKSUM_NONE; 1876 skb->ip_summed = CHECKSUM_NONE;
1878 skb->protocol = eth_type_trans(skb, qdev->ndev); 1877 skb->protocol = eth_type_trans(skb, qdev->ndev);
1879 1878
@@ -1928,7 +1927,8 @@ static void ql_process_macip_rx_intr(struct ql3_adapter *qdev,
1928 * Copy the ethhdr from first buffer to second. This 1927 * Copy the ethhdr from first buffer to second. This
1929 * is necessary for 3022 IP completions. 1928 * is necessary for 3022 IP completions.
1930 */ 1929 */
1931 memcpy(skb_push(skb2, size), skb1->data + VLAN_ID_LEN, size); 1930 skb_copy_from_linear_data_offset(skb1, VLAN_ID_LEN,
1931 skb_push(skb2, size), size);
1932 } else { 1932 } else {
1933 u16 checksum = le16_to_cpu(ib_ip_rsp_ptr->checksum); 1933 u16 checksum = le16_to_cpu(ib_ip_rsp_ptr->checksum);
1934 if (checksum & 1934 if (checksum &
@@ -1946,7 +1946,6 @@ static void ql_process_macip_rx_intr(struct ql3_adapter *qdev,
1946 skb2->ip_summed = CHECKSUM_UNNECESSARY; 1946 skb2->ip_summed = CHECKSUM_UNNECESSARY;
1947 } 1947 }
1948 } 1948 }
1949 skb2->dev = qdev->ndev;
1950 skb2->protocol = eth_type_trans(skb2, qdev->ndev); 1949 skb2->protocol = eth_type_trans(skb2, qdev->ndev);
1951 1950
1952 netif_receive_skb(skb2); 1951 netif_receive_skb(skb2);
diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index 6a77b8a92245..45876a854f00 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -2284,7 +2284,7 @@ static inline u32 rtl8169_tso_csum(struct sk_buff *skb, struct net_device *dev)
2284 return LargeSend | ((mss & MSSMask) << MSSShift); 2284 return LargeSend | ((mss & MSSMask) << MSSShift);
2285 } 2285 }
2286 if (skb->ip_summed == CHECKSUM_PARTIAL) { 2286 if (skb->ip_summed == CHECKSUM_PARTIAL) {
2287 const struct iphdr *ip = skb->nh.iph; 2287 const struct iphdr *ip = ip_hdr(skb);
2288 2288
2289 if (ip->protocol == IPPROTO_TCP) 2289 if (ip->protocol == IPPROTO_TCP)
2290 return IPCS | TCPCS; 2290 return IPCS | TCPCS;
@@ -2586,7 +2586,6 @@ rtl8169_rx_interrupt(struct net_device *dev, struct rtl8169_private *tp,
2586 pci_action(tp->pci_dev, le64_to_cpu(desc->addr), 2586 pci_action(tp->pci_dev, le64_to_cpu(desc->addr),
2587 tp->rx_buf_sz, PCI_DMA_FROMDEVICE); 2587 tp->rx_buf_sz, PCI_DMA_FROMDEVICE);
2588 2588
2589 skb->dev = dev;
2590 skb_put(skb, pkt_size); 2589 skb_put(skb, pkt_size);
2591 skb->protocol = eth_type_trans(skb, dev); 2590 skb->protocol = eth_type_trans(skb, dev);
2592 2591
diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c
index b7ff484af3e1..df6b73872fdb 100644
--- a/drivers/net/rionet.c
+++ b/drivers/net/rionet.c
@@ -115,7 +115,6 @@ static int rionet_rx_clean(struct net_device *ndev)
115 115
116 rnet->rx_skb[i]->data = data; 116 rnet->rx_skb[i]->data = data;
117 skb_put(rnet->rx_skb[i], RIO_MAX_MSG_SIZE); 117 skb_put(rnet->rx_skb[i], RIO_MAX_MSG_SIZE);
118 rnet->rx_skb[i]->dev = ndev;
119 rnet->rx_skb[i]->protocol = 118 rnet->rx_skb[i]->protocol =
120 eth_type_trans(rnet->rx_skb[i], ndev); 119 eth_type_trans(rnet->rx_skb[i], ndev);
121 error = netif_rx(rnet->rx_skb[i]); 120 error = netif_rx(rnet->rx_skb[i]);
diff --git a/drivers/net/rrunner.c b/drivers/net/rrunner.c
index d81536f90df6..25c73d47daad 100644
--- a/drivers/net/rrunner.c
+++ b/drivers/net/rrunner.c
@@ -1029,7 +1029,6 @@ static void rx_int(struct net_device *dev, u32 rxlimit, u32 index)
1029 goto defer; 1029 goto defer;
1030 } 1030 }
1031 } 1031 }
1032 skb->dev = dev;
1033 skb->protocol = hippi_type_trans(skb, dev); 1032 skb->protocol = hippi_type_trans(skb, dev);
1034 1033
1035 netif_rx(skb); /* send it up */ 1034 netif_rx(skb); /* send it up */
@@ -1452,7 +1451,7 @@ static int rr_start_xmit(struct sk_buff *skb, struct net_device *dev)
1452 } 1451 }
1453 skb_reserve(new_skb, 8); 1452 skb_reserve(new_skb, 8);
1454 skb_put(new_skb, len); 1453 skb_put(new_skb, len);
1455 memcpy(new_skb->data, skb->data, len); 1454 skb_copy_from_linear_data(skb, new_skb->data, len);
1456 dev_kfree_skb(skb); 1455 dev_kfree_skb(skb);
1457 skb = new_skb; 1456 skb = new_skb;
1458 } 1457 }
diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c
index 46ebf141ee5a..600d3ff347fc 100644
--- a/drivers/net/s2io.c
+++ b/drivers/net/s2io.c
@@ -2195,7 +2195,7 @@ static int fill_rxd_3buf(struct s2io_nic *nic, struct RxD_t *rxdp, struct \
2195 frag_list->next = NULL; 2195 frag_list->next = NULL;
2196 tmp = (void *)ALIGN((long)frag_list->data, ALIGN_SIZE + 1); 2196 tmp = (void *)ALIGN((long)frag_list->data, ALIGN_SIZE + 1);
2197 frag_list->data = tmp; 2197 frag_list->data = tmp;
2198 frag_list->tail = tmp; 2198 skb_reset_tail_pointer(frag_list);
2199 2199
2200 /* Buffer-2 receives L4 data payload */ 2200 /* Buffer-2 receives L4 data payload */
2201 ((struct RxD3*)rxdp)->Buffer2_ptr = pci_map_single(nic->pdev, 2201 ((struct RxD3*)rxdp)->Buffer2_ptr = pci_map_single(nic->pdev,
@@ -2349,7 +2349,7 @@ static int fill_rx_buffers(struct s2io_nic *nic, int ring_no)
2349 tmp += ALIGN_SIZE; 2349 tmp += ALIGN_SIZE;
2350 tmp &= ~ALIGN_SIZE; 2350 tmp &= ~ALIGN_SIZE;
2351 skb->data = (void *) (unsigned long)tmp; 2351 skb->data = (void *) (unsigned long)tmp;
2352 skb->tail = (void *) (unsigned long)tmp; 2352 skb_reset_tail_pointer(skb);
2353 2353
2354 if (!(((struct RxD3*)rxdp)->Buffer0_ptr)) 2354 if (!(((struct RxD3*)rxdp)->Buffer0_ptr))
2355 ((struct RxD3*)rxdp)->Buffer0_ptr = 2355 ((struct RxD3*)rxdp)->Buffer0_ptr =
diff --git a/drivers/net/saa9730.c b/drivers/net/saa9730.c
index 143958f1ef0a..ad94358ece89 100644
--- a/drivers/net/saa9730.c
+++ b/drivers/net/saa9730.c
@@ -688,7 +688,6 @@ static int lan_saa9730_rx(struct net_device *dev)
688 } else { 688 } else {
689 lp->stats.rx_bytes += len; 689 lp->stats.rx_bytes += len;
690 lp->stats.rx_packets++; 690 lp->stats.rx_packets++;
691 skb->dev = dev;
692 skb_reserve(skb, 2); /* 16 byte align */ 691 skb_reserve(skb, 2); /* 16 byte align */
693 skb_put(skb, len); /* make room */ 692 skb_put(skb, len); /* make room */
694 eth_copy_and_sum(skb, 693 eth_copy_and_sum(skb,
diff --git a/drivers/net/sb1000.c b/drivers/net/sb1000.c
index b9fa4fbb1398..1de3eec1a792 100644
--- a/drivers/net/sb1000.c
+++ b/drivers/net/sb1000.c
@@ -834,7 +834,7 @@ printk("cm0: IP identification: %02x%02x fragment offset: %02x%02x\n", buffer[3
834 goto dropped_frame; 834 goto dropped_frame;
835 } 835 }
836 skb->dev = dev; 836 skb->dev = dev;
837 skb->mac.raw = skb->data; 837 skb_reset_mac_header(skb);
838 skb->protocol = (unsigned short) buffer[NewDatagramHeaderSkip + 16]; 838 skb->protocol = (unsigned short) buffer[NewDatagramHeaderSkip + 16];
839 insw(ioaddr, skb_put(skb, NewDatagramDataSize), 839 insw(ioaddr, skb_put(skb, NewDatagramDataSize),
840 NewDatagramDataSize / 2); 840 NewDatagramDataSize / 2);
diff --git a/drivers/net/sb1250-mac.c b/drivers/net/sb1250-mac.c
index 103c3174ab54..0a3a379b634c 100644
--- a/drivers/net/sb1250-mac.c
+++ b/drivers/net/sb1250-mac.c
@@ -933,9 +933,6 @@ static int sbdma_add_rcvbuffer(sbmacdma_t *d,struct sk_buff *sb)
933 } 933 }
934 934
935 sbdma_align_skb(sb_new, SMP_CACHE_BYTES, ETHER_ALIGN); 935 sbdma_align_skb(sb_new, SMP_CACHE_BYTES, ETHER_ALIGN);
936
937 /* mark skbuff owned by our device */
938 sb_new->dev = d->sbdma_eth->sbm_dev;
939 } 936 }
940 else { 937 else {
941 sb_new = sb; 938 sb_new = sb;
diff --git a/drivers/net/sc92031.c b/drivers/net/sc92031.c
index c32c21af3fdd..5b7284c955dc 100644
--- a/drivers/net/sc92031.c
+++ b/drivers/net/sc92031.c
@@ -814,7 +814,6 @@ static void _sc92031_rx_tasklet(struct net_device *dev)
814 memcpy(skb_put(skb, pkt_size), rx_ring + rx_ring_offset, pkt_size); 814 memcpy(skb_put(skb, pkt_size), rx_ring + rx_ring_offset, pkt_size);
815 } 815 }
816 816
817 skb->dev = dev;
818 skb->protocol = eth_type_trans(skb, dev); 817 skb->protocol = eth_type_trans(skb, dev);
819 dev->last_rx = jiffies; 818 dev->last_rx = jiffies;
820 netif_rx(skb); 819 netif_rx(skb);
diff --git a/drivers/net/seeq8005.c b/drivers/net/seeq8005.c
index 0d6c95c7aedf..4bce7c4f373c 100644
--- a/drivers/net/seeq8005.c
+++ b/drivers/net/seeq8005.c
@@ -550,7 +550,6 @@ static void seeq8005_rx(struct net_device *dev)
550 lp->stats.rx_dropped++; 550 lp->stats.rx_dropped++;
551 break; 551 break;
552 } 552 }
553 skb->dev = dev;
554 skb_reserve(skb, 2); /* align data on 16 byte */ 553 skb_reserve(skb, 2); /* align data on 16 byte */
555 buf = skb_put(skb,pkt_len); 554 buf = skb_put(skb,pkt_len);
556 555
diff --git a/drivers/net/sgiseeq.c b/drivers/net/sgiseeq.c
index 52ed522a234c..d8c9c5d66d4f 100644
--- a/drivers/net/sgiseeq.c
+++ b/drivers/net/sgiseeq.c
@@ -318,7 +318,6 @@ static inline void sgiseeq_rx(struct net_device *dev, struct sgiseeq_private *sp
318 skb = dev_alloc_skb(len + 2); 318 skb = dev_alloc_skb(len + 2);
319 319
320 if (skb) { 320 if (skb) {
321 skb->dev = dev;
322 skb_reserve(skb, 2); 321 skb_reserve(skb, 2);
323 skb_put(skb, len); 322 skb_put(skb, len);
324 323
@@ -535,7 +534,7 @@ static int sgiseeq_start_xmit(struct sk_buff *skb, struct net_device *dev)
535 * entry and the HPC got to the end of the chain before we 534 * entry and the HPC got to the end of the chain before we
536 * added this new entry and restarted it. 535 * added this new entry and restarted it.
537 */ 536 */
538 memcpy((char *)(long)td->buf_vaddr, skb->data, skblen); 537 skb_copy_from_linear_data(skb, (char *)(long)td->buf_vaddr, skblen);
539 if (len != skblen) 538 if (len != skblen)
540 memset((char *)(long)td->buf_vaddr + skb->len, 0, len-skblen); 539 memset((char *)(long)td->buf_vaddr + skb->len, 0, len-skblen);
541 td->tdma.cntinfo = (len & HPCDMA_BCNT) | 540 td->tdma.cntinfo = (len & HPCDMA_BCNT) |
diff --git a/drivers/net/sis190.c b/drivers/net/sis190.c
index 34463ce6f132..bc8de48da313 100644
--- a/drivers/net/sis190.c
+++ b/drivers/net/sis190.c
@@ -632,7 +632,6 @@ static int sis190_rx_interrupt(struct net_device *dev,
632 pci_action(tp->pci_dev, le32_to_cpu(desc->addr), 632 pci_action(tp->pci_dev, le32_to_cpu(desc->addr),
633 tp->rx_buf_sz, PCI_DMA_FROMDEVICE); 633 tp->rx_buf_sz, PCI_DMA_FROMDEVICE);
634 634
635 skb->dev = dev;
636 skb_put(skb, pkt_size); 635 skb_put(skb, pkt_size);
637 skb->protocol = eth_type_trans(skb, dev); 636 skb->protocol = eth_type_trans(skb, dev);
638 637
diff --git a/drivers/net/sis900.c b/drivers/net/sis900.c
index b2a3b19d773a..dea0126723da 100644
--- a/drivers/net/sis900.c
+++ b/drivers/net/sis900.c
@@ -1160,7 +1160,6 @@ sis900_init_rx_ring(struct net_device *net_dev)
1160 buffer */ 1160 buffer */
1161 break; 1161 break;
1162 } 1162 }
1163 skb->dev = net_dev;
1164 sis_priv->rx_skbuff[i] = skb; 1163 sis_priv->rx_skbuff[i] = skb;
1165 sis_priv->rx_ring[i].cmdsts = RX_BUF_SIZE; 1164 sis_priv->rx_ring[i].cmdsts = RX_BUF_SIZE;
1166 sis_priv->rx_ring[i].bufptr = pci_map_single(sis_priv->pci_dev, 1165 sis_priv->rx_ring[i].bufptr = pci_map_single(sis_priv->pci_dev,
@@ -1800,7 +1799,6 @@ static int sis900_rx(struct net_device *net_dev)
1800 sis_priv->stats.rx_packets++; 1799 sis_priv->stats.rx_packets++;
1801 sis_priv->dirty_rx++; 1800 sis_priv->dirty_rx++;
1802refill_rx_ring: 1801refill_rx_ring:
1803 skb->dev = net_dev;
1804 sis_priv->rx_skbuff[entry] = skb; 1802 sis_priv->rx_skbuff[entry] = skb;
1805 sis_priv->rx_ring[entry].cmdsts = RX_BUF_SIZE; 1803 sis_priv->rx_ring[entry].cmdsts = RX_BUF_SIZE;
1806 sis_priv->rx_ring[entry].bufptr = 1804 sis_priv->rx_ring[entry].bufptr =
@@ -1832,7 +1830,6 @@ refill_rx_ring:
1832 sis_priv->stats.rx_dropped++; 1830 sis_priv->stats.rx_dropped++;
1833 break; 1831 break;
1834 } 1832 }
1835 skb->dev = net_dev;
1836 sis_priv->rx_skbuff[entry] = skb; 1833 sis_priv->rx_skbuff[entry] = skb;
1837 sis_priv->rx_ring[entry].cmdsts = RX_BUF_SIZE; 1834 sis_priv->rx_ring[entry].cmdsts = RX_BUF_SIZE;
1838 sis_priv->rx_ring[entry].bufptr = 1835 sis_priv->rx_ring[entry].bufptr =
diff --git a/drivers/net/sk98lin/skge.c b/drivers/net/sk98lin/skge.c
index e94ab256b540..e0a93005e6dc 100644
--- a/drivers/net/sk98lin/skge.c
+++ b/drivers/net/sk98lin/skge.c
@@ -1562,10 +1562,10 @@ struct sk_buff *pMessage) /* pointer to send-message */
1562 pTxd->pMBuf = pMessage; 1562 pTxd->pMBuf = pMessage;
1563 1563
1564 if (pMessage->ip_summed == CHECKSUM_PARTIAL) { 1564 if (pMessage->ip_summed == CHECKSUM_PARTIAL) {
1565 u16 hdrlen = pMessage->h.raw - pMessage->data; 1565 u16 hdrlen = skb_transport_offset(pMessage);
1566 u16 offset = hdrlen + pMessage->csum_offset; 1566 u16 offset = hdrlen + pMessage->csum_offset;
1567 1567
1568 if ((pMessage->h.ipiph->protocol == IPPROTO_UDP ) && 1568 if ((ipip_hdr(pMessage)->protocol == IPPROTO_UDP) &&
1569 (pAC->GIni.GIChipRev == 0) && 1569 (pAC->GIni.GIChipRev == 0) &&
1570 (pAC->GIni.GIChipId == CHIP_ID_YUKON)) { 1570 (pAC->GIni.GIChipId == CHIP_ID_YUKON)) {
1571 pTxd->TBControl = BMU_TCP_CHECK; 1571 pTxd->TBControl = BMU_TCP_CHECK;
@@ -1681,7 +1681,7 @@ struct sk_buff *pMessage) /* pointer to send-message */
1681 ** Does the HW need to evaluate checksum for TCP or UDP packets? 1681 ** Does the HW need to evaluate checksum for TCP or UDP packets?
1682 */ 1682 */
1683 if (pMessage->ip_summed == CHECKSUM_PARTIAL) { 1683 if (pMessage->ip_summed == CHECKSUM_PARTIAL) {
1684 u16 hdrlen = pMessage->h.raw - pMessage->data; 1684 u16 hdrlen = skb_transport_offset(pMessage);
1685 u16 offset = hdrlen + pMessage->csum_offset; 1685 u16 offset = hdrlen + pMessage->csum_offset;
1686 1686
1687 Control = BMU_STFWD; 1687 Control = BMU_STFWD;
@@ -1691,7 +1691,7 @@ struct sk_buff *pMessage) /* pointer to send-message */
1691 ** opcode for udp is not working in the hardware yet 1691 ** opcode for udp is not working in the hardware yet
1692 ** (Revision 2.0) 1692 ** (Revision 2.0)
1693 */ 1693 */
1694 if ((pMessage->h.ipiph->protocol == IPPROTO_UDP ) && 1694 if ((ipip_hdr(pMessage)->protocol == IPPROTO_UDP) &&
1695 (pAC->GIni.GIChipRev == 0) && 1695 (pAC->GIni.GIChipRev == 0) &&
1696 (pAC->GIni.GIChipId == CHIP_ID_YUKON)) { 1696 (pAC->GIni.GIChipId == CHIP_ID_YUKON)) {
1697 Control |= BMU_TCP_CHECK; 1697 Control |= BMU_TCP_CHECK;
@@ -2127,7 +2127,7 @@ rx_start:
2127 (dma_addr_t) PhysAddr, 2127 (dma_addr_t) PhysAddr,
2128 FrameLength, 2128 FrameLength,
2129 PCI_DMA_FROMDEVICE); 2129 PCI_DMA_FROMDEVICE);
2130 memcpy(pNewMsg->data, pMsg, FrameLength); 2130 skb_copy_to_linear_data(pNewMsg, pMsg, FrameLength);
2131 2131
2132 pci_dma_sync_single_for_device(pAC->PciDev, 2132 pci_dma_sync_single_for_device(pAC->PciDev,
2133 (dma_addr_t) PhysAddr, 2133 (dma_addr_t) PhysAddr,
@@ -2193,7 +2193,6 @@ rx_start:
2193 SK_PNMI_CNT_RX_OCTETS_DELIVERED(pAC, 2193 SK_PNMI_CNT_RX_OCTETS_DELIVERED(pAC,
2194 FrameLength, pRxPort->PortIndex); 2194 FrameLength, pRxPort->PortIndex);
2195 2195
2196 pMsg->dev = pAC->dev[pRxPort->PortIndex];
2197 pMsg->protocol = eth_type_trans(pMsg, 2196 pMsg->protocol = eth_type_trans(pMsg,
2198 pAC->dev[pRxPort->PortIndex]); 2197 pAC->dev[pRxPort->PortIndex]);
2199 netif_rx(pMsg); 2198 netif_rx(pMsg);
@@ -2246,7 +2245,6 @@ rx_start:
2246 (IFF_PROMISC | IFF_ALLMULTI)) != 0 || 2245 (IFF_PROMISC | IFF_ALLMULTI)) != 0 ||
2247 (ForRlmt & SK_RLMT_RX_PROTOCOL) == 2246 (ForRlmt & SK_RLMT_RX_PROTOCOL) ==
2248 SK_RLMT_RX_PROTOCOL) { 2247 SK_RLMT_RX_PROTOCOL) {
2249 pMsg->dev = pAC->dev[pRxPort->PortIndex];
2250 pMsg->protocol = eth_type_trans(pMsg, 2248 pMsg->protocol = eth_type_trans(pMsg,
2251 pAC->dev[pRxPort->PortIndex]); 2249 pAC->dev[pRxPort->PortIndex]);
2252 netif_rx(pMsg); 2250 netif_rx(pMsg);
diff --git a/drivers/net/skfp/skfddi.c b/drivers/net/skfp/skfddi.c
index 9733a11c6146..a7ef6c8b7721 100644
--- a/drivers/net/skfp/skfddi.c
+++ b/drivers/net/skfp/skfddi.c
@@ -1680,7 +1680,6 @@ void mac_drv_rx_complete(struct s_smc *smc, volatile struct s_smt_fp_rxd *rxd,
1680 rxd->rxd_os.skb = NULL; 1680 rxd->rxd_os.skb = NULL;
1681 skb_trim(skb, len); 1681 skb_trim(skb, len);
1682 skb->protocol = fddi_type_trans(skb, bp->dev); 1682 skb->protocol = fddi_type_trans(skb, bp->dev);
1683 skb->dev = bp->dev; /* pass up device pointer */
1684 1683
1685 netif_rx(skb); 1684 netif_rx(skb);
1686 bp->dev->last_rx = jiffies; 1685 bp->dev->last_rx = jiffies;
@@ -1938,7 +1937,7 @@ int mac_drv_rx_init(struct s_smc *smc, int len, int fc,
1938 } 1937 }
1939 skb_reserve(skb, 3); 1938 skb_reserve(skb, 3);
1940 skb_put(skb, len); 1939 skb_put(skb, len);
1941 memcpy(skb->data, look_ahead, len); 1940 skb_copy_to_linear_data(skb, look_ahead, len);
1942 1941
1943 // deliver frame to system 1942 // deliver frame to system
1944 skb->protocol = fddi_type_trans(skb, smc->os.dev); 1943 skb->protocol = fddi_type_trans(skb, smc->os.dev);
diff --git a/drivers/net/skge.c b/drivers/net/skge.c
index d476a3cc2e94..f1a0e6c0fbdd 100644
--- a/drivers/net/skge.c
+++ b/drivers/net/skge.c
@@ -2654,12 +2654,12 @@ static int skge_xmit_frame(struct sk_buff *skb, struct net_device *dev)
2654 td->dma_hi = map >> 32; 2654 td->dma_hi = map >> 32;
2655 2655
2656 if (skb->ip_summed == CHECKSUM_PARTIAL) { 2656 if (skb->ip_summed == CHECKSUM_PARTIAL) {
2657 int offset = skb->h.raw - skb->data; 2657 const int offset = skb_transport_offset(skb);
2658 2658
2659 /* This seems backwards, but it is what the sk98lin 2659 /* This seems backwards, but it is what the sk98lin
2660 * does. Looks like hardware is wrong? 2660 * does. Looks like hardware is wrong?
2661 */ 2661 */
2662 if (skb->h.ipiph->protocol == IPPROTO_UDP 2662 if (ipip_hdr(skb)->protocol == IPPROTO_UDP
2663 && hw->chip_rev == 0 && hw->chip_id == CHIP_ID_YUKON) 2663 && hw->chip_rev == 0 && hw->chip_id == CHIP_ID_YUKON)
2664 control = BMU_TCP_CHECK; 2664 control = BMU_TCP_CHECK;
2665 else 2665 else
@@ -2950,7 +2950,7 @@ static struct sk_buff *skge_rx_get(struct net_device *dev,
2950 pci_dma_sync_single_for_cpu(skge->hw->pdev, 2950 pci_dma_sync_single_for_cpu(skge->hw->pdev,
2951 pci_unmap_addr(e, mapaddr), 2951 pci_unmap_addr(e, mapaddr),
2952 len, PCI_DMA_FROMDEVICE); 2952 len, PCI_DMA_FROMDEVICE);
2953 memcpy(skb->data, e->skb->data, len); 2953 skb_copy_from_linear_data(e->skb, skb->data, len);
2954 pci_dma_sync_single_for_device(skge->hw->pdev, 2954 pci_dma_sync_single_for_device(skge->hw->pdev,
2955 pci_unmap_addr(e, mapaddr), 2955 pci_unmap_addr(e, mapaddr),
2956 len, PCI_DMA_FROMDEVICE); 2956 len, PCI_DMA_FROMDEVICE);
diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c
index ac36152c68bf..238c2ca34da6 100644
--- a/drivers/net/sky2.c
+++ b/drivers/net/sky2.c
@@ -32,6 +32,7 @@
32#include <linux/ethtool.h> 32#include <linux/ethtool.h>
33#include <linux/pci.h> 33#include <linux/pci.h>
34#include <linux/ip.h> 34#include <linux/ip.h>
35#include <net/ip.h>
35#include <linux/tcp.h> 36#include <linux/tcp.h>
36#include <linux/in.h> 37#include <linux/in.h>
37#include <linux/delay.h> 38#include <linux/delay.h>
@@ -1391,8 +1392,8 @@ static int sky2_xmit_frame(struct sk_buff *skb, struct net_device *dev)
1391 /* Check for TCP Segmentation Offload */ 1392 /* Check for TCP Segmentation Offload */
1392 mss = skb_shinfo(skb)->gso_size; 1393 mss = skb_shinfo(skb)->gso_size;
1393 if (mss != 0) { 1394 if (mss != 0) {
1394 mss += ((skb->h.th->doff - 5) * 4); /* TCP options */ 1395 mss += tcp_optlen(skb); /* TCP options */
1395 mss += (skb->nh.iph->ihl * 4) + sizeof(struct tcphdr); 1396 mss += ip_hdrlen(skb) + sizeof(struct tcphdr);
1396 mss += ETH_HLEN; 1397 mss += ETH_HLEN;
1397 1398
1398 if (mss != sky2->tx_last_mss) { 1399 if (mss != sky2->tx_last_mss) {
@@ -1420,14 +1421,14 @@ static int sky2_xmit_frame(struct sk_buff *skb, struct net_device *dev)
1420 1421
1421 /* Handle TCP checksum offload */ 1422 /* Handle TCP checksum offload */
1422 if (skb->ip_summed == CHECKSUM_PARTIAL) { 1423 if (skb->ip_summed == CHECKSUM_PARTIAL) {
1423 unsigned offset = skb->h.raw - skb->data; 1424 const unsigned offset = skb_transport_offset(skb);
1424 u32 tcpsum; 1425 u32 tcpsum;
1425 1426
1426 tcpsum = offset << 16; /* sum start */ 1427 tcpsum = offset << 16; /* sum start */
1427 tcpsum |= offset + skb->csum_offset; /* sum write */ 1428 tcpsum |= offset + skb->csum_offset; /* sum write */
1428 1429
1429 ctrl = CALSUM | WR_SUM | INIT_SUM | LOCK_SUM; 1430 ctrl = CALSUM | WR_SUM | INIT_SUM | LOCK_SUM;
1430 if (skb->nh.iph->protocol == IPPROTO_UDP) 1431 if (ip_hdr(skb)->protocol == IPPROTO_UDP)
1431 ctrl |= UDPTCP; 1432 ctrl |= UDPTCP;
1432 1433
1433 if (tcpsum != sky2->tx_tcpsum) { 1434 if (tcpsum != sky2->tx_tcpsum) {
@@ -1970,7 +1971,7 @@ static struct sk_buff *receive_copy(struct sky2_port *sky2,
1970 skb_reserve(skb, 2); 1971 skb_reserve(skb, 2);
1971 pci_dma_sync_single_for_cpu(sky2->hw->pdev, re->data_addr, 1972 pci_dma_sync_single_for_cpu(sky2->hw->pdev, re->data_addr,
1972 length, PCI_DMA_FROMDEVICE); 1973 length, PCI_DMA_FROMDEVICE);
1973 memcpy(skb->data, re->skb->data, length); 1974 skb_copy_from_linear_data(re->skb, skb->data, length);
1974 skb->ip_summed = re->skb->ip_summed; 1975 skb->ip_summed = re->skb->ip_summed;
1975 skb->csum = re->skb->csum; 1976 skb->csum = re->skb->csum;
1976 pci_dma_sync_single_for_device(sky2->hw->pdev, re->data_addr, 1977 pci_dma_sync_single_for_device(sky2->hw->pdev, re->data_addr,
diff --git a/drivers/net/slip.c b/drivers/net/slip.c
index 2f4b1de7a2b4..65bd20fac820 100644
--- a/drivers/net/slip.c
+++ b/drivers/net/slip.c
@@ -363,7 +363,7 @@ sl_bump(struct slip *sl)
363 } 363 }
364 skb->dev = sl->dev; 364 skb->dev = sl->dev;
365 memcpy(skb_put(skb,count), sl->rbuff, count); 365 memcpy(skb_put(skb,count), sl->rbuff, count);
366 skb->mac.raw=skb->data; 366 skb_reset_mac_header(skb);
367 skb->protocol=htons(ETH_P_IP); 367 skb->protocol=htons(ETH_P_IP);
368 netif_rx(skb); 368 netif_rx(skb);
369 sl->dev->last_rx = jiffies; 369 sl->dev->last_rx = jiffies;
diff --git a/drivers/net/smc911x.c b/drivers/net/smc911x.c
index c95614131980..8a2109a913b6 100644
--- a/drivers/net/smc911x.c
+++ b/drivers/net/smc911x.c
@@ -502,7 +502,6 @@ static inline void smc911x_rcv(struct net_device *dev)
502 DBG(SMC_DEBUG_PKTS, "%s: Received packet\n", dev->name,); 502 DBG(SMC_DEBUG_PKTS, "%s: Received packet\n", dev->name,);
503 PRINT_PKT(data, ((pkt_len - 4) <= 64) ? pkt_len - 4 : 64); 503 PRINT_PKT(data, ((pkt_len - 4) <= 64) ? pkt_len - 4 : 64);
504 dev->last_rx = jiffies; 504 dev->last_rx = jiffies;
505 skb->dev = dev;
506 skb->protocol = eth_type_trans(skb, dev); 505 skb->protocol = eth_type_trans(skb, dev);
507 netif_rx(skb); 506 netif_rx(skb);
508 lp->stats.rx_packets++; 507 lp->stats.rx_packets++;
@@ -1307,7 +1306,6 @@ smc911x_rx_dma_irq(int dma, void *data)
1307 lp->current_rx_skb = NULL; 1306 lp->current_rx_skb = NULL;
1308 PRINT_PKT(skb->data, skb->len); 1307 PRINT_PKT(skb->data, skb->len);
1309 dev->last_rx = jiffies; 1308 dev->last_rx = jiffies;
1310 skb->dev = dev;
1311 skb->protocol = eth_type_trans(skb, dev); 1309 skb->protocol = eth_type_trans(skb, dev);
1312 netif_rx(skb); 1310 netif_rx(skb);
1313 lp->stats.rx_packets++; 1311 lp->stats.rx_packets++;
diff --git a/drivers/net/smc9194.c b/drivers/net/smc9194.c
index bd6e84506c29..36c1ebadbf20 100644
--- a/drivers/net/smc9194.c
+++ b/drivers/net/smc9194.c
@@ -1262,7 +1262,6 @@ static void smc_rcv(struct net_device *dev)
1262 1262
1263 skb_reserve( skb, 2 ); /* 16 bit alignment */ 1263 skb_reserve( skb, 2 ); /* 16 bit alignment */
1264 1264
1265 skb->dev = dev;
1266 data = skb_put( skb, packet_length); 1265 data = skb_put( skb, packet_length);
1267 1266
1268#ifdef USE_32_BIT 1267#ifdef USE_32_BIT
diff --git a/drivers/net/smc91x.c b/drivers/net/smc91x.c
index 49f4b7712ebf..01cc3c742c38 100644
--- a/drivers/net/smc91x.c
+++ b/drivers/net/smc91x.c
@@ -568,7 +568,6 @@ static inline void smc_rcv(struct net_device *dev)
568 PRINT_PKT(data, packet_len - 4); 568 PRINT_PKT(data, packet_len - 4);
569 569
570 dev->last_rx = jiffies; 570 dev->last_rx = jiffies;
571 skb->dev = dev;
572 skb->protocol = eth_type_trans(skb, dev); 571 skb->protocol = eth_type_trans(skb, dev);
573 netif_rx(skb); 572 netif_rx(skb);
574 lp->stats.rx_packets++; 573 lp->stats.rx_packets++;
diff --git a/drivers/net/sonic.c b/drivers/net/sonic.c
index ed7aa0a5acca..c6320c719931 100644
--- a/drivers/net/sonic.c
+++ b/drivers/net/sonic.c
@@ -85,7 +85,6 @@ static int sonic_open(struct net_device *dev)
85 dev->name); 85 dev->name);
86 return -ENOMEM; 86 return -ENOMEM;
87 } 87 }
88 skb->dev = dev;
89 /* align IP header unless DMA requires otherwise */ 88 /* align IP header unless DMA requires otherwise */
90 if (SONIC_BUS_SCALE(lp->dma_bitmode) == 2) 89 if (SONIC_BUS_SCALE(lp->dma_bitmode) == 2)
91 skb_reserve(skb, 2); 90 skb_reserve(skb, 2);
@@ -451,7 +450,6 @@ static void sonic_rx(struct net_device *dev)
451 lp->stats.rx_dropped++; 450 lp->stats.rx_dropped++;
452 break; 451 break;
453 } 452 }
454 new_skb->dev = dev;
455 /* provide 16 byte IP header alignment unless DMA requires otherwise */ 453 /* provide 16 byte IP header alignment unless DMA requires otherwise */
456 if(SONIC_BUS_SCALE(lp->dma_bitmode) == 2) 454 if(SONIC_BUS_SCALE(lp->dma_bitmode) == 2)
457 skb_reserve(new_skb, 2); 455 skb_reserve(new_skb, 2);
diff --git a/drivers/net/spider_net.c b/drivers/net/spider_net.c
index e3019d52c30f..230da14b1b68 100644
--- a/drivers/net/spider_net.c
+++ b/drivers/net/spider_net.c
@@ -720,7 +720,7 @@ spider_net_prepare_tx_descr(struct spider_net_card *card,
720 spin_unlock_irqrestore(&chain->lock, flags); 720 spin_unlock_irqrestore(&chain->lock, flags);
721 721
722 if (skb->protocol == htons(ETH_P_IP) && skb->ip_summed == CHECKSUM_PARTIAL) 722 if (skb->protocol == htons(ETH_P_IP) && skb->ip_summed == CHECKSUM_PARTIAL)
723 switch (skb->nh.iph->protocol) { 723 switch (ip_hdr(skb)->protocol) {
724 case IPPROTO_TCP: 724 case IPPROTO_TCP:
725 hwdescr->dmac_cmd_status |= SPIDER_NET_DMAC_TCP; 725 hwdescr->dmac_cmd_status |= SPIDER_NET_DMAC_TCP;
726 break; 726 break;
@@ -990,7 +990,6 @@ spider_net_pass_skb_up(struct spider_net_descr *descr,
990 netdev = card->netdev; 990 netdev = card->netdev;
991 991
992 skb = descr->skb; 992 skb = descr->skb;
993 skb->dev = netdev;
994 skb_put(skb, hwdescr->valid_size); 993 skb_put(skb, hwdescr->valid_size);
995 994
996 /* the card seems to add 2 bytes of junk in front 995 /* the card seems to add 2 bytes of junk in front
diff --git a/drivers/net/starfire.c b/drivers/net/starfire.c
index 8bba2e3da7e1..9d6e454a8f98 100644
--- a/drivers/net/starfire.c
+++ b/drivers/net/starfire.c
@@ -1452,7 +1452,6 @@ static int __netdev_rx(struct net_device *dev, int *quota)
1452 to a minimally-sized skbuff. */ 1452 to a minimally-sized skbuff. */
1453 if (pkt_len < rx_copybreak 1453 if (pkt_len < rx_copybreak
1454 && (skb = dev_alloc_skb(pkt_len + 2)) != NULL) { 1454 && (skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
1455 skb->dev = dev;
1456 skb_reserve(skb, 2); /* 16 byte align the IP header */ 1455 skb_reserve(skb, 2); /* 16 byte align the IP header */
1457 pci_dma_sync_single_for_cpu(np->pci_dev, 1456 pci_dma_sync_single_for_cpu(np->pci_dev,
1458 np->rx_info[entry].mapping, 1457 np->rx_info[entry].mapping,
diff --git a/drivers/net/sun3_82586.c b/drivers/net/sun3_82586.c
index 4757aa647c7a..396c3d961f88 100644
--- a/drivers/net/sun3_82586.c
+++ b/drivers/net/sun3_82586.c
@@ -775,7 +775,6 @@ static void sun3_82586_rcv_int(struct net_device *dev)
775 skb = (struct sk_buff *) dev_alloc_skb(totlen+2); 775 skb = (struct sk_buff *) dev_alloc_skb(totlen+2);
776 if(skb != NULL) 776 if(skb != NULL)
777 { 777 {
778 skb->dev = dev;
779 skb_reserve(skb,2); 778 skb_reserve(skb,2);
780 skb_put(skb,totlen); 779 skb_put(skb,totlen);
781 eth_copy_and_sum(skb,(char *) p->base+swab32((unsigned long) rbd->buffer),totlen,0); 780 eth_copy_and_sum(skb,(char *) p->base+swab32((unsigned long) rbd->buffer),totlen,0);
@@ -1027,7 +1026,7 @@ static int sun3_82586_send_packet(struct sk_buff *skb, struct net_device *dev)
1027 memset((char *)p->xmit_cbuffs[p->xmit_count], 0, ETH_ZLEN); 1026 memset((char *)p->xmit_cbuffs[p->xmit_count], 0, ETH_ZLEN);
1028 len = ETH_ZLEN; 1027 len = ETH_ZLEN;
1029 } 1028 }
1030 memcpy((char *)p->xmit_cbuffs[p->xmit_count],(char *)(skb->data),skb->len); 1029 skb_copy_from_linear_data(skb, p->xmit_cbuffs[p->xmit_count], skb->len);
1031 1030
1032#if (NUM_XMIT_BUFFS == 1) 1031#if (NUM_XMIT_BUFFS == 1)
1033# ifdef NO_NOPCOMMANDS 1032# ifdef NO_NOPCOMMANDS
diff --git a/drivers/net/sun3lance.c b/drivers/net/sun3lance.c
index 7bee45b42a2c..791e081fdc15 100644
--- a/drivers/net/sun3lance.c
+++ b/drivers/net/sun3lance.c
@@ -629,7 +629,7 @@ static int lance_start_xmit( struct sk_buff *skb, struct net_device *dev )
629 head->length = (-len) | 0xf000; 629 head->length = (-len) | 0xf000;
630 head->misc = 0; 630 head->misc = 0;
631 631
632 memcpy( PKTBUF_ADDR(head), (void *)skb->data, skb->len ); 632 skb_copy_from_linear_data(skb, PKTBUF_ADDR(head), skb->len);
633 if (len != skb->len) 633 if (len != skb->len)
634 memset(PKTBUF_ADDR(head) + skb->len, 0, len-skb->len); 634 memset(PKTBUF_ADDR(head) + skb->len, 0, len-skb->len);
635 635
@@ -851,10 +851,9 @@ static int lance_rx( struct net_device *dev )
851 } 851 }
852 852
853 853
854 skb->dev = dev;
855 skb_reserve( skb, 2 ); /* 16 byte align */ 854 skb_reserve( skb, 2 ); /* 16 byte align */
856 skb_put( skb, pkt_len ); /* Make room */ 855 skb_put( skb, pkt_len ); /* Make room */
857// memcpy( skb->data, PKTBUF_ADDR(head), pkt_len ); 856// skb_copy_to_linear_data(skb, PKTBUF_ADDR(head), pkt_len);
858 eth_copy_and_sum(skb, 857 eth_copy_and_sum(skb,
859 PKTBUF_ADDR(head), 858 PKTBUF_ADDR(head),
860 pkt_len, 0); 859 pkt_len, 0);
diff --git a/drivers/net/sunbmac.c b/drivers/net/sunbmac.c
index 18f88853e1e5..2ad8d58dee3b 100644
--- a/drivers/net/sunbmac.c
+++ b/drivers/net/sunbmac.c
@@ -855,7 +855,6 @@ static void bigmac_rx(struct bigmac *bp)
855 drops++; 855 drops++;
856 goto drop_it; 856 goto drop_it;
857 } 857 }
858 copy_skb->dev = bp->dev;
859 skb_reserve(copy_skb, 2); 858 skb_reserve(copy_skb, 2);
860 skb_put(copy_skb, len); 859 skb_put(copy_skb, len);
861 sbus_dma_sync_single_for_cpu(bp->bigmac_sdev, 860 sbus_dma_sync_single_for_cpu(bp->bigmac_sdev,
diff --git a/drivers/net/sundance.c b/drivers/net/sundance.c
index c06ecc8002b9..f51ba31970aa 100644
--- a/drivers/net/sundance.c
+++ b/drivers/net/sundance.c
@@ -1308,7 +1308,6 @@ static void rx_poll(unsigned long data)
1308 to a minimally-sized skbuff. */ 1308 to a minimally-sized skbuff. */
1309 if (pkt_len < rx_copybreak 1309 if (pkt_len < rx_copybreak
1310 && (skb = dev_alloc_skb(pkt_len + 2)) != NULL) { 1310 && (skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
1311 skb->dev = dev;
1312 skb_reserve(skb, 2); /* 16 byte align the IP header */ 1311 skb_reserve(skb, 2); /* 16 byte align the IP header */
1313 pci_dma_sync_single_for_cpu(np->pci_dev, 1312 pci_dma_sync_single_for_cpu(np->pci_dev,
1314 desc->frag[0].addr, 1313 desc->frag[0].addr,
diff --git a/drivers/net/sungem.c b/drivers/net/sungem.c
index 08ea61db46fe..9df1038ec6bb 100644
--- a/drivers/net/sungem.c
+++ b/drivers/net/sungem.c
@@ -845,11 +845,10 @@ static int gem_rx(struct gem *gp, int work_to_do)
845 goto drop_it; 845 goto drop_it;
846 } 846 }
847 847
848 copy_skb->dev = gp->dev;
849 skb_reserve(copy_skb, 2); 848 skb_reserve(copy_skb, 2);
850 skb_put(copy_skb, len); 849 skb_put(copy_skb, len);
851 pci_dma_sync_single_for_cpu(gp->pdev, dma_addr, len, PCI_DMA_FROMDEVICE); 850 pci_dma_sync_single_for_cpu(gp->pdev, dma_addr, len, PCI_DMA_FROMDEVICE);
852 memcpy(copy_skb->data, skb->data, len); 851 skb_copy_from_linear_data(skb, copy_skb->data, len);
853 pci_dma_sync_single_for_device(gp->pdev, dma_addr, len, PCI_DMA_FROMDEVICE); 852 pci_dma_sync_single_for_device(gp->pdev, dma_addr, len, PCI_DMA_FROMDEVICE);
854 853
855 /* We'll reuse the original ring buffer. */ 854 /* We'll reuse the original ring buffer. */
@@ -1029,10 +1028,8 @@ static int gem_start_xmit(struct sk_buff *skb, struct net_device *dev)
1029 1028
1030 ctrl = 0; 1029 ctrl = 0;
1031 if (skb->ip_summed == CHECKSUM_PARTIAL) { 1030 if (skb->ip_summed == CHECKSUM_PARTIAL) {
1032 u64 csum_start_off, csum_stuff_off; 1031 const u64 csum_start_off = skb_transport_offset(skb);
1033 1032 const u64 csum_stuff_off = csum_start_off + skb->csum_offset;
1034 csum_start_off = (u64) (skb->h.raw - skb->data);
1035 csum_stuff_off = csum_start_off + skb->csum_offset;
1036 1033
1037 ctrl = (TXDCTRL_CENAB | 1034 ctrl = (TXDCTRL_CENAB |
1038 (csum_start_off << 15) | 1035 (csum_start_off << 15) |
diff --git a/drivers/net/sunhme.c b/drivers/net/sunhme.c
index 192bbc91c731..5304d7b94e5e 100644
--- a/drivers/net/sunhme.c
+++ b/drivers/net/sunhme.c
@@ -2058,11 +2058,10 @@ static void happy_meal_rx(struct happy_meal *hp, struct net_device *dev)
2058 goto drop_it; 2058 goto drop_it;
2059 } 2059 }
2060 2060
2061 copy_skb->dev = dev;
2062 skb_reserve(copy_skb, 2); 2061 skb_reserve(copy_skb, 2);
2063 skb_put(copy_skb, len); 2062 skb_put(copy_skb, len);
2064 hme_dma_sync_for_cpu(hp, dma_addr, len, DMA_FROMDEVICE); 2063 hme_dma_sync_for_cpu(hp, dma_addr, len, DMA_FROMDEVICE);
2065 memcpy(copy_skb->data, skb->data, len); 2064 skb_copy_from_linear_data(skb, copy_skb->data, len);
2066 hme_dma_sync_for_device(hp, dma_addr, len, DMA_FROMDEVICE); 2065 hme_dma_sync_for_device(hp, dma_addr, len, DMA_FROMDEVICE);
2067 2066
2068 /* Reuse original ring buffer. */ 2067 /* Reuse original ring buffer. */
@@ -2270,10 +2269,8 @@ static int happy_meal_start_xmit(struct sk_buff *skb, struct net_device *dev)
2270 2269
2271 tx_flags = TXFLAG_OWN; 2270 tx_flags = TXFLAG_OWN;
2272 if (skb->ip_summed == CHECKSUM_PARTIAL) { 2271 if (skb->ip_summed == CHECKSUM_PARTIAL) {
2273 u32 csum_start_off, csum_stuff_off; 2272 const u32 csum_start_off = skb_transport_offset(skb);
2274 2273 const u32 csum_stuff_off = csum_start_off + skb->csum_offset;
2275 csum_start_off = (u32) (skb->h.raw - skb->data);
2276 csum_stuff_off = csum_start_off + skb->csum_offset;
2277 2274
2278 tx_flags = (TXFLAG_OWN | TXFLAG_CSENABLE | 2275 tx_flags = (TXFLAG_OWN | TXFLAG_CSENABLE |
2279 ((csum_start_off << 14) & TXFLAG_CSBUFBEGIN) | 2276 ((csum_start_off << 14) & TXFLAG_CSBUFBEGIN) |
diff --git a/drivers/net/sunlance.c b/drivers/net/sunlance.c
index b0929a457b60..42722530ab24 100644
--- a/drivers/net/sunlance.c
+++ b/drivers/net/sunlance.c
@@ -547,7 +547,6 @@ static void lance_rx_dvma(struct net_device *dev)
547 547
548 lp->stats.rx_bytes += len; 548 lp->stats.rx_bytes += len;
549 549
550 skb->dev = dev;
551 skb_reserve(skb, 2); /* 16 byte align */ 550 skb_reserve(skb, 2); /* 16 byte align */
552 skb_put(skb, len); /* make room */ 551 skb_put(skb, len); /* make room */
553 eth_copy_and_sum(skb, 552 eth_copy_and_sum(skb,
@@ -721,7 +720,6 @@ static void lance_rx_pio(struct net_device *dev)
721 720
722 lp->stats.rx_bytes += len; 721 lp->stats.rx_bytes += len;
723 722
724 skb->dev = dev;
725 skb_reserve (skb, 2); /* 16 byte align */ 723 skb_reserve (skb, 2); /* 16 byte align */
726 skb_put(skb, len); /* make room */ 724 skb_put(skb, len); /* make room */
727 lance_piocopy_to_skb(skb, &(ib->rx_buf[entry][0]), len); 725 lance_piocopy_to_skb(skb, &(ib->rx_buf[entry][0]), len);
@@ -1145,7 +1143,7 @@ static int lance_start_xmit(struct sk_buff *skb, struct net_device *dev)
1145 struct lance_init_block *ib = lp->init_block_mem; 1143 struct lance_init_block *ib = lp->init_block_mem;
1146 ib->btx_ring [entry].length = (-len) | 0xf000; 1144 ib->btx_ring [entry].length = (-len) | 0xf000;
1147 ib->btx_ring [entry].misc = 0; 1145 ib->btx_ring [entry].misc = 0;
1148 memcpy((char *)&ib->tx_buf [entry][0], skb->data, skblen); 1146 skb_copy_from_linear_data(skb, &ib->tx_buf [entry][0], skblen);
1149 if (len != skblen) 1147 if (len != skblen)
1150 memset((char *) &ib->tx_buf [entry][skblen], 0, len - skblen); 1148 memset((char *) &ib->tx_buf [entry][skblen], 0, len - skblen);
1151 ib->btx_ring [entry].tmd1_bits = (LE_T1_POK | LE_T1_OWN); 1149 ib->btx_ring [entry].tmd1_bits = (LE_T1_POK | LE_T1_OWN);
diff --git a/drivers/net/sunqe.c b/drivers/net/sunqe.c
index f3bad56d476a..fa70e0b78af7 100644
--- a/drivers/net/sunqe.c
+++ b/drivers/net/sunqe.c
@@ -437,7 +437,6 @@ static void qe_rx(struct sunqe *qep)
437 drops++; 437 drops++;
438 qep->net_stats.rx_dropped++; 438 qep->net_stats.rx_dropped++;
439 } else { 439 } else {
440 skb->dev = qep->dev;
441 skb_reserve(skb, 2); 440 skb_reserve(skb, 2);
442 skb_put(skb, len); 441 skb_put(skb, len);
443 eth_copy_and_sum(skb, (unsigned char *) this_qbuf, 442 eth_copy_and_sum(skb, (unsigned char *) this_qbuf,
@@ -593,7 +592,7 @@ static int qe_start_xmit(struct sk_buff *skb, struct net_device *dev)
593 /* Avoid a race... */ 592 /* Avoid a race... */
594 qep->qe_block->qe_txd[entry].tx_flags = TXD_UPDATE; 593 qep->qe_block->qe_txd[entry].tx_flags = TXD_UPDATE;
595 594
596 memcpy(txbuf, skb->data, len); 595 skb_copy_from_linear_data(skb, txbuf, len);
597 596
598 qep->qe_block->qe_txd[entry].tx_addr = txbuf_dvma; 597 qep->qe_block->qe_txd[entry].tx_addr = txbuf_dvma;
599 qep->qe_block->qe_txd[entry].tx_flags = 598 qep->qe_block->qe_txd[entry].tx_flags =
diff --git a/drivers/net/tc35815.c b/drivers/net/tc35815.c
index e3a7e3ceab77..d7741e23f8de 100644
--- a/drivers/net/tc35815.c
+++ b/drivers/net/tc35815.c
@@ -1145,7 +1145,6 @@ tc35815_rx(struct net_device *dev)
1145 break; 1145 break;
1146 } 1146 }
1147 skb_reserve(skb, 2); /* 16 bit alignment */ 1147 skb_reserve(skb, 2); /* 16 bit alignment */
1148 skb->dev = dev;
1149 1148
1150 data = skb_put(skb, pkt_len); 1149 data = skb_put(skb, pkt_len);
1151 1150
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 256969e1300c..38383e4e07a1 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -40,6 +40,7 @@
40#include <linux/dma-mapping.h> 40#include <linux/dma-mapping.h>
41 41
42#include <net/checksum.h> 42#include <net/checksum.h>
43#include <net/ip.h>
43 44
44#include <asm/system.h> 45#include <asm/system.h>
45#include <asm/io.h> 46#include <asm/io.h>
@@ -3349,7 +3350,7 @@ static int tg3_rx(struct tg3 *tp, int budget)
3349 skb_reserve(copy_skb, 2); 3350 skb_reserve(copy_skb, 2);
3350 skb_put(copy_skb, len); 3351 skb_put(copy_skb, len);
3351 pci_dma_sync_single_for_cpu(tp->pdev, dma_addr, len, PCI_DMA_FROMDEVICE); 3352 pci_dma_sync_single_for_cpu(tp->pdev, dma_addr, len, PCI_DMA_FROMDEVICE);
3352 memcpy(copy_skb->data, skb->data, len); 3353 skb_copy_from_linear_data(skb, copy_skb->data, len);
3353 pci_dma_sync_single_for_device(tp->pdev, dma_addr, len, PCI_DMA_FROMDEVICE); 3354 pci_dma_sync_single_for_device(tp->pdev, dma_addr, len, PCI_DMA_FROMDEVICE);
3354 3355
3355 /* We'll reuse the original ring buffer. */ 3356 /* We'll reuse the original ring buffer. */
@@ -3908,20 +3909,20 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
3908 if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) 3909 if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
3909 mss |= (skb_headlen(skb) - ETH_HLEN) << 9; 3910 mss |= (skb_headlen(skb) - ETH_HLEN) << 9;
3910 else { 3911 else {
3911 tcp_opt_len = ((skb->h.th->doff - 5) * 4); 3912 struct iphdr *iph = ip_hdr(skb);
3912 ip_tcp_len = (skb->nh.iph->ihl * 4) +
3913 sizeof(struct tcphdr);
3914 3913
3915 skb->nh.iph->check = 0; 3914 tcp_opt_len = tcp_optlen(skb);
3916 skb->nh.iph->tot_len = htons(mss + ip_tcp_len + 3915 ip_tcp_len = ip_hdrlen(skb) + sizeof(struct tcphdr);
3917 tcp_opt_len); 3916
3917 iph->check = 0;
3918 iph->tot_len = htons(mss + ip_tcp_len + tcp_opt_len);
3918 mss |= (ip_tcp_len + tcp_opt_len) << 9; 3919 mss |= (ip_tcp_len + tcp_opt_len) << 9;
3919 } 3920 }
3920 3921
3921 base_flags |= (TXD_FLAG_CPU_PRE_DMA | 3922 base_flags |= (TXD_FLAG_CPU_PRE_DMA |
3922 TXD_FLAG_CPU_POST_DMA); 3923 TXD_FLAG_CPU_POST_DMA);
3923 3924
3924 skb->h.th->check = 0; 3925 tcp_hdr(skb)->check = 0;
3925 3926
3926 } 3927 }
3927 else if (skb->ip_summed == CHECKSUM_PARTIAL) 3928 else if (skb->ip_summed == CHECKSUM_PARTIAL)
@@ -4055,6 +4056,7 @@ static int tg3_start_xmit_dma_bug(struct sk_buff *skb, struct net_device *dev)
4055 mss = 0; 4056 mss = 0;
4056 if (skb->len > (tp->dev->mtu + ETH_HLEN) && 4057 if (skb->len > (tp->dev->mtu + ETH_HLEN) &&
4057 (mss = skb_shinfo(skb)->gso_size) != 0) { 4058 (mss = skb_shinfo(skb)->gso_size) != 0) {
4059 struct iphdr *iph;
4058 int tcp_opt_len, ip_tcp_len, hdr_len; 4060 int tcp_opt_len, ip_tcp_len, hdr_len;
4059 4061
4060 if (skb_header_cloned(skb) && 4062 if (skb_header_cloned(skb) &&
@@ -4063,8 +4065,8 @@ static int tg3_start_xmit_dma_bug(struct sk_buff *skb, struct net_device *dev)
4063 goto out_unlock; 4065 goto out_unlock;
4064 } 4066 }
4065 4067
4066 tcp_opt_len = ((skb->h.th->doff - 5) * 4); 4068 tcp_opt_len = tcp_optlen(skb);
4067 ip_tcp_len = (skb->nh.iph->ihl * 4) + sizeof(struct tcphdr); 4069 ip_tcp_len = ip_hdrlen(skb) + sizeof(struct tcphdr);
4068 4070
4069 hdr_len = ip_tcp_len + tcp_opt_len; 4071 hdr_len = ip_tcp_len + tcp_opt_len;
4070 if (unlikely((ETH_HLEN + hdr_len) > 80) && 4072 if (unlikely((ETH_HLEN + hdr_len) > 80) &&
@@ -4074,34 +4076,31 @@ static int tg3_start_xmit_dma_bug(struct sk_buff *skb, struct net_device *dev)
4074 base_flags |= (TXD_FLAG_CPU_PRE_DMA | 4076 base_flags |= (TXD_FLAG_CPU_PRE_DMA |
4075 TXD_FLAG_CPU_POST_DMA); 4077 TXD_FLAG_CPU_POST_DMA);
4076 4078
4077 skb->nh.iph->check = 0; 4079 iph = ip_hdr(skb);
4078 skb->nh.iph->tot_len = htons(mss + hdr_len); 4080 iph->check = 0;
4081 iph->tot_len = htons(mss + hdr_len);
4079 if (tp->tg3_flags2 & TG3_FLG2_HW_TSO) { 4082 if (tp->tg3_flags2 & TG3_FLG2_HW_TSO) {
4080 skb->h.th->check = 0; 4083 tcp_hdr(skb)->check = 0;
4081 base_flags &= ~TXD_FLAG_TCPUDP_CSUM; 4084 base_flags &= ~TXD_FLAG_TCPUDP_CSUM;
4082 } 4085 } else
4083 else { 4086 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
4084 skb->h.th->check = 4087 iph->daddr, 0,
4085 ~csum_tcpudp_magic(skb->nh.iph->saddr, 4088 IPPROTO_TCP,
4086 skb->nh.iph->daddr, 4089 0);
4087 0, IPPROTO_TCP, 0);
4088 }
4089 4090
4090 if ((tp->tg3_flags2 & TG3_FLG2_HW_TSO) || 4091 if ((tp->tg3_flags2 & TG3_FLG2_HW_TSO) ||
4091 (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5705)) { 4092 (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5705)) {
4092 if (tcp_opt_len || skb->nh.iph->ihl > 5) { 4093 if (tcp_opt_len || iph->ihl > 5) {
4093 int tsflags; 4094 int tsflags;
4094 4095
4095 tsflags = ((skb->nh.iph->ihl - 5) + 4096 tsflags = (iph->ihl - 5) + (tcp_opt_len >> 2);
4096 (tcp_opt_len >> 2));
4097 mss |= (tsflags << 11); 4097 mss |= (tsflags << 11);
4098 } 4098 }
4099 } else { 4099 } else {
4100 if (tcp_opt_len || skb->nh.iph->ihl > 5) { 4100 if (tcp_opt_len || iph->ihl > 5) {
4101 int tsflags; 4101 int tsflags;
4102 4102
4103 tsflags = ((skb->nh.iph->ihl - 5) + 4103 tsflags = (iph->ihl - 5) + (tcp_opt_len >> 2);
4104 (tcp_opt_len >> 2));
4105 base_flags |= tsflags << 12; 4104 base_flags |= tsflags << 12;
4106 } 4105 }
4107 } 4106 }
diff --git a/drivers/net/tlan.c b/drivers/net/tlan.c
index f85f00251123..106dc1ef0acb 100644
--- a/drivers/net/tlan.c
+++ b/drivers/net/tlan.c
@@ -1112,7 +1112,7 @@ static int TLan_StartTx( struct sk_buff *skb, struct net_device *dev )
1112 1112
1113 if ( bbuf ) { 1113 if ( bbuf ) {
1114 tail_buffer = priv->txBuffer + ( priv->txTail * TLAN_MAX_FRAME_SIZE ); 1114 tail_buffer = priv->txBuffer + ( priv->txTail * TLAN_MAX_FRAME_SIZE );
1115 memcpy( tail_buffer, skb->data, skb->len ); 1115 skb_copy_from_linear_data(skb, tail_buffer, skb->len);
1116 } else { 1116 } else {
1117 tail_list->buffer[0].address = pci_map_single(priv->pciDev, skb->data, skb->len, PCI_DMA_TODEVICE); 1117 tail_list->buffer[0].address = pci_map_single(priv->pciDev, skb->data, skb->len, PCI_DMA_TODEVICE);
1118 TLan_StoreSKB(tail_list, skb); 1118 TLan_StoreSKB(tail_list, skb);
@@ -1577,7 +1577,6 @@ u32 TLan_HandleRxEOF( struct net_device *dev, u16 host_int )
1577 printk(KERN_INFO "TLAN: Couldn't allocate memory for received data.\n"); 1577 printk(KERN_INFO "TLAN: Couldn't allocate memory for received data.\n");
1578 else { 1578 else {
1579 head_buffer = priv->rxBuffer + (priv->rxHead * TLAN_MAX_FRAME_SIZE); 1579 head_buffer = priv->rxBuffer + (priv->rxHead * TLAN_MAX_FRAME_SIZE);
1580 skb->dev = dev;
1581 skb_reserve(skb, 2); 1580 skb_reserve(skb, 2);
1582 t = (void *) skb_put(skb, frameSize); 1581 t = (void *) skb_put(skb, frameSize);
1583 1582
@@ -1608,7 +1607,6 @@ u32 TLan_HandleRxEOF( struct net_device *dev, u16 host_int )
1608 skb->protocol = eth_type_trans( skb, dev ); 1607 skb->protocol = eth_type_trans( skb, dev );
1609 netif_rx( skb ); 1608 netif_rx( skb );
1610 1609
1611 new_skb->dev = dev;
1612 skb_reserve( new_skb, 2 ); 1610 skb_reserve( new_skb, 2 );
1613 t = (void *) skb_put( new_skb, TLAN_MAX_FRAME_SIZE ); 1611 t = (void *) skb_put( new_skb, TLAN_MAX_FRAME_SIZE );
1614 head_list->buffer[0].address = pci_map_single(priv->pciDev, new_skb->data, TLAN_MAX_FRAME_SIZE, PCI_DMA_FROMDEVICE); 1612 head_list->buffer[0].address = pci_map_single(priv->pciDev, new_skb->data, TLAN_MAX_FRAME_SIZE, PCI_DMA_FROMDEVICE);
diff --git a/drivers/net/tokenring/3c359.c b/drivers/net/tokenring/3c359.c
index 7580bdeacadc..e22a3f5333ef 100644
--- a/drivers/net/tokenring/3c359.c
+++ b/drivers/net/tokenring/3c359.c
@@ -933,20 +933,21 @@ static void xl_rx(struct net_device *dev)
933 return ; 933 return ;
934 } 934 }
935 935
936 skb->dev = dev ;
937
938 while (xl_priv->rx_ring_tail != temp_ring_loc) { 936 while (xl_priv->rx_ring_tail != temp_ring_loc) {
939 copy_len = xl_priv->xl_rx_ring[xl_priv->rx_ring_tail].upfraglen & 0x7FFF ; 937 copy_len = xl_priv->xl_rx_ring[xl_priv->rx_ring_tail].upfraglen & 0x7FFF ;
940 frame_length -= copy_len ; 938 frame_length -= copy_len ;
941 pci_dma_sync_single_for_cpu(xl_priv->pdev,xl_priv->xl_rx_ring[xl_priv->rx_ring_tail].upfragaddr,xl_priv->pkt_buf_sz,PCI_DMA_FROMDEVICE) ; 939 pci_dma_sync_single_for_cpu(xl_priv->pdev,xl_priv->xl_rx_ring[xl_priv->rx_ring_tail].upfragaddr,xl_priv->pkt_buf_sz,PCI_DMA_FROMDEVICE) ;
942 memcpy(skb_put(skb,copy_len), xl_priv->rx_ring_skb[xl_priv->rx_ring_tail]->data, copy_len) ; 940 skb_copy_from_linear_data(xl_priv->rx_ring_skb[xl_priv->rx_ring_tail],
941 skb_put(skb, copy_len),
942 copy_len);
943 pci_dma_sync_single_for_device(xl_priv->pdev,xl_priv->xl_rx_ring[xl_priv->rx_ring_tail].upfragaddr,xl_priv->pkt_buf_sz,PCI_DMA_FROMDEVICE) ; 943 pci_dma_sync_single_for_device(xl_priv->pdev,xl_priv->xl_rx_ring[xl_priv->rx_ring_tail].upfragaddr,xl_priv->pkt_buf_sz,PCI_DMA_FROMDEVICE) ;
944 adv_rx_ring(dev) ; 944 adv_rx_ring(dev) ;
945 } 945 }
946 946
947 /* Now we have found the last fragment */ 947 /* Now we have found the last fragment */
948 pci_dma_sync_single_for_cpu(xl_priv->pdev,xl_priv->xl_rx_ring[xl_priv->rx_ring_tail].upfragaddr,xl_priv->pkt_buf_sz,PCI_DMA_FROMDEVICE) ; 948 pci_dma_sync_single_for_cpu(xl_priv->pdev,xl_priv->xl_rx_ring[xl_priv->rx_ring_tail].upfragaddr,xl_priv->pkt_buf_sz,PCI_DMA_FROMDEVICE) ;
949 memcpy(skb_put(skb,copy_len), xl_priv->rx_ring_skb[xl_priv->rx_ring_tail]->data, frame_length) ; 949 skb_copy_from_linear_data(xl_priv->rx_ring_skb[xl_priv->rx_ring_tail],
950 skb_put(skb,copy_len), frame_length);
950/* memcpy(skb_put(skb,frame_length), bus_to_virt(xl_priv->xl_rx_ring[xl_priv->rx_ring_tail].upfragaddr), frame_length) ; */ 951/* memcpy(skb_put(skb,frame_length), bus_to_virt(xl_priv->xl_rx_ring[xl_priv->rx_ring_tail].upfragaddr), frame_length) ; */
951 pci_dma_sync_single_for_device(xl_priv->pdev,xl_priv->xl_rx_ring[xl_priv->rx_ring_tail].upfragaddr,xl_priv->pkt_buf_sz,PCI_DMA_FROMDEVICE) ; 952 pci_dma_sync_single_for_device(xl_priv->pdev,xl_priv->xl_rx_ring[xl_priv->rx_ring_tail].upfragaddr,xl_priv->pkt_buf_sz,PCI_DMA_FROMDEVICE) ;
952 adv_rx_ring(dev) ; 953 adv_rx_ring(dev) ;
@@ -967,8 +968,6 @@ static void xl_rx(struct net_device *dev)
967 return ; 968 return ;
968 } 969 }
969 970
970 skb->dev = dev ;
971
972 skb2 = xl_priv->rx_ring_skb[xl_priv->rx_ring_tail] ; 971 skb2 = xl_priv->rx_ring_skb[xl_priv->rx_ring_tail] ;
973 pci_unmap_single(xl_priv->pdev, xl_priv->xl_rx_ring[xl_priv->rx_ring_tail].upfragaddr, xl_priv->pkt_buf_sz,PCI_DMA_FROMDEVICE) ; 972 pci_unmap_single(xl_priv->pdev, xl_priv->xl_rx_ring[xl_priv->rx_ring_tail].upfragaddr, xl_priv->pkt_buf_sz,PCI_DMA_FROMDEVICE) ;
974 skb_put(skb2, frame_length) ; 973 skb_put(skb2, frame_length) ;
diff --git a/drivers/net/tokenring/ibmtr.c b/drivers/net/tokenring/ibmtr.c
index 01d55315ee8c..1e8958ee2d0a 100644
--- a/drivers/net/tokenring/ibmtr.c
+++ b/drivers/net/tokenring/ibmtr.c
@@ -1771,7 +1771,6 @@ static void tr_rx(struct net_device *dev)
1771 /*BMS again, if she comes in with few but leaves with many */ 1771 /*BMS again, if she comes in with few but leaves with many */
1772 skb_reserve(skb, sizeof(struct trh_hdr) - lan_hdr_len); 1772 skb_reserve(skb, sizeof(struct trh_hdr) - lan_hdr_len);
1773 skb_put(skb, length); 1773 skb_put(skb, length);
1774 skb->dev = dev;
1775 data = skb->data; 1774 data = skb->data;
1776 rbuffer_len = ntohs(readw(rbuf + offsetof(struct rec_buf, buf_len))); 1775 rbuffer_len = ntohs(readw(rbuf + offsetof(struct rec_buf, buf_len)));
1777 rbufdata = rbuf + offsetof(struct rec_buf, data); 1776 rbufdata = rbuf + offsetof(struct rec_buf, data);
diff --git a/drivers/net/tokenring/lanstreamer.c b/drivers/net/tokenring/lanstreamer.c
index e999feb8c0bb..5d849c089a3b 100644
--- a/drivers/net/tokenring/lanstreamer.c
+++ b/drivers/net/tokenring/lanstreamer.c
@@ -944,8 +944,6 @@ static void streamer_rx(struct net_device *dev)
944 printk(KERN_WARNING "%s: Not enough memory to copy packet to upper layers. \n", dev->name); 944 printk(KERN_WARNING "%s: Not enough memory to copy packet to upper layers. \n", dev->name);
945 streamer_priv->streamer_stats.rx_dropped++; 945 streamer_priv->streamer_stats.rx_dropped++;
946 } else { /* we allocated an skb OK */ 946 } else { /* we allocated an skb OK */
947 skb->dev = dev;
948
949 if (buffer_cnt == 1) { 947 if (buffer_cnt == 1) {
950 /* release the DMA mapping */ 948 /* release the DMA mapping */
951 pci_unmap_single(streamer_priv->pci_dev, 949 pci_unmap_single(streamer_priv->pci_dev,
@@ -1607,10 +1605,11 @@ static void streamer_arb_cmd(struct net_device *dev)
1607 frame_data, buffer_len); 1605 frame_data, buffer_len);
1608 } while (next_ptr && (buff_off = next_ptr)); 1606 } while (next_ptr && (buff_off = next_ptr));
1609 1607
1608 mac_frame->protocol = tr_type_trans(mac_frame, dev);
1610#if STREAMER_NETWORK_MONITOR 1609#if STREAMER_NETWORK_MONITOR
1611 printk(KERN_WARNING "%s: Received MAC Frame, details: \n", 1610 printk(KERN_WARNING "%s: Received MAC Frame, details: \n",
1612 dev->name); 1611 dev->name);
1613 mac_hdr = (struct trh_hdr *) mac_frame->data; 1612 mac_hdr = tr_hdr(mac_frame);
1614 printk(KERN_WARNING 1613 printk(KERN_WARNING
1615 "%s: MAC Frame Dest. Addr: %02x:%02x:%02x:%02x:%02x:%02x \n", 1614 "%s: MAC Frame Dest. Addr: %02x:%02x:%02x:%02x:%02x:%02x \n",
1616 dev->name, mac_hdr->daddr[0], mac_hdr->daddr[1], 1615 dev->name, mac_hdr->daddr[0], mac_hdr->daddr[1],
@@ -1622,8 +1621,6 @@ static void streamer_arb_cmd(struct net_device *dev)
1622 mac_hdr->saddr[2], mac_hdr->saddr[3], 1621 mac_hdr->saddr[2], mac_hdr->saddr[3],
1623 mac_hdr->saddr[4], mac_hdr->saddr[5]); 1622 mac_hdr->saddr[4], mac_hdr->saddr[5]);
1624#endif 1623#endif
1625 mac_frame->dev = dev;
1626 mac_frame->protocol = tr_type_trans(mac_frame, dev);
1627 netif_rx(mac_frame); 1624 netif_rx(mac_frame);
1628 1625
1629 /* Now tell the card we have dealt with the received frame */ 1626 /* Now tell the card we have dealt with the received frame */
diff --git a/drivers/net/tokenring/olympic.c b/drivers/net/tokenring/olympic.c
index 8f4ecc1109cb..09b3cfb8e809 100644
--- a/drivers/net/tokenring/olympic.c
+++ b/drivers/net/tokenring/olympic.c
@@ -814,8 +814,6 @@ static void olympic_rx(struct net_device *dev)
814 olympic_priv->rx_ring_last_received += i ; 814 olympic_priv->rx_ring_last_received += i ;
815 olympic_priv->rx_ring_last_received &= (OLYMPIC_RX_RING_SIZE -1) ; 815 olympic_priv->rx_ring_last_received &= (OLYMPIC_RX_RING_SIZE -1) ;
816 } else { 816 } else {
817 skb->dev = dev ;
818
819 /* Optimise based upon number of buffers used. 817 /* Optimise based upon number of buffers used.
820 If only one buffer is used we can simply swap the buffers around. 818 If only one buffer is used we can simply swap the buffers around.
821 If more than one then we must use the new buffer and copy the information 819 If more than one then we must use the new buffer and copy the information
@@ -847,7 +845,9 @@ static void olympic_rx(struct net_device *dev)
847 pci_dma_sync_single_for_cpu(olympic_priv->pdev, 845 pci_dma_sync_single_for_cpu(olympic_priv->pdev,
848 le32_to_cpu(olympic_priv->olympic_rx_ring[rx_ring_last_received].buffer), 846 le32_to_cpu(olympic_priv->olympic_rx_ring[rx_ring_last_received].buffer),
849 olympic_priv->pkt_buf_sz,PCI_DMA_FROMDEVICE) ; 847 olympic_priv->pkt_buf_sz,PCI_DMA_FROMDEVICE) ;
850 memcpy(skb_put(skb,length-4),olympic_priv->rx_ring_skb[rx_ring_last_received]->data,length-4) ; 848 skb_copy_from_linear_data(olympic_priv->rx_ring_skb[rx_ring_last_received],
849 skb_put(skb,length - 4),
850 length - 4);
851 pci_dma_sync_single_for_device(olympic_priv->pdev, 851 pci_dma_sync_single_for_device(olympic_priv->pdev,
852 le32_to_cpu(olympic_priv->olympic_rx_ring[rx_ring_last_received].buffer), 852 le32_to_cpu(olympic_priv->olympic_rx_ring[rx_ring_last_received].buffer),
853 olympic_priv->pkt_buf_sz,PCI_DMA_FROMDEVICE) ; 853 olympic_priv->pkt_buf_sz,PCI_DMA_FROMDEVICE) ;
@@ -864,7 +864,9 @@ static void olympic_rx(struct net_device *dev)
864 olympic_priv->pkt_buf_sz,PCI_DMA_FROMDEVICE) ; 864 olympic_priv->pkt_buf_sz,PCI_DMA_FROMDEVICE) ;
865 rx_desc = &(olympic_priv->olympic_rx_ring[rx_ring_last_received]); 865 rx_desc = &(olympic_priv->olympic_rx_ring[rx_ring_last_received]);
866 cpy_length = (i == 1 ? frag_len : le32_to_cpu(rx_desc->res_length)); 866 cpy_length = (i == 1 ? frag_len : le32_to_cpu(rx_desc->res_length));
867 memcpy(skb_put(skb, cpy_length), olympic_priv->rx_ring_skb[rx_ring_last_received]->data, cpy_length) ; 867 skb_copy_from_linear_data(olympic_priv->rx_ring_skb[rx_ring_last_received],
868 skb_put(skb, cpy_length),
869 cpy_length);
868 pci_dma_sync_single_for_device(olympic_priv->pdev, 870 pci_dma_sync_single_for_device(olympic_priv->pdev,
869 le32_to_cpu(olympic_priv->olympic_rx_ring[rx_ring_last_received].buffer), 871 le32_to_cpu(olympic_priv->olympic_rx_ring[rx_ring_last_received].buffer),
870 olympic_priv->pkt_buf_sz,PCI_DMA_FROMDEVICE) ; 872 olympic_priv->pkt_buf_sz,PCI_DMA_FROMDEVICE) ;
@@ -1440,16 +1442,16 @@ static void olympic_arb_cmd(struct net_device *dev)
1440 next_ptr=readw(buf_ptr+offsetof(struct mac_receive_buffer,next)); 1442 next_ptr=readw(buf_ptr+offsetof(struct mac_receive_buffer,next));
1441 } while (next_ptr && (buf_ptr=olympic_priv->olympic_lap + ntohs(next_ptr))); 1443 } while (next_ptr && (buf_ptr=olympic_priv->olympic_lap + ntohs(next_ptr)));
1442 1444
1445 mac_frame->protocol = tr_type_trans(mac_frame, dev);
1446
1443 if (olympic_priv->olympic_network_monitor) { 1447 if (olympic_priv->olympic_network_monitor) {
1444 struct trh_hdr *mac_hdr ; 1448 struct trh_hdr *mac_hdr ;
1445 printk(KERN_WARNING "%s: Received MAC Frame, details: \n",dev->name) ; 1449 printk(KERN_WARNING "%s: Received MAC Frame, details: \n",dev->name) ;
1446 mac_hdr = (struct trh_hdr *)mac_frame->data ; 1450 mac_hdr = tr_hdr(mac_frame);
1447 printk(KERN_WARNING "%s: MAC Frame Dest. Addr: %02x:%02x:%02x:%02x:%02x:%02x \n", dev->name , mac_hdr->daddr[0], mac_hdr->daddr[1], mac_hdr->daddr[2], mac_hdr->daddr[3], mac_hdr->daddr[4], mac_hdr->daddr[5]) ; 1451 printk(KERN_WARNING "%s: MAC Frame Dest. Addr: %02x:%02x:%02x:%02x:%02x:%02x \n", dev->name , mac_hdr->daddr[0], mac_hdr->daddr[1], mac_hdr->daddr[2], mac_hdr->daddr[3], mac_hdr->daddr[4], mac_hdr->daddr[5]) ;
1448 printk(KERN_WARNING "%s: MAC Frame Srce. Addr: %02x:%02x:%02x:%02x:%02x:%02x \n", dev->name , mac_hdr->saddr[0], mac_hdr->saddr[1], mac_hdr->saddr[2], mac_hdr->saddr[3], mac_hdr->saddr[4], mac_hdr->saddr[5]) ; 1452 printk(KERN_WARNING "%s: MAC Frame Srce. Addr: %02x:%02x:%02x:%02x:%02x:%02x \n", dev->name , mac_hdr->saddr[0], mac_hdr->saddr[1], mac_hdr->saddr[2], mac_hdr->saddr[3], mac_hdr->saddr[4], mac_hdr->saddr[5]) ;
1449 } 1453 }
1450 mac_frame->dev = dev ; 1454 netif_rx(mac_frame);
1451 mac_frame->protocol = tr_type_trans(mac_frame,dev);
1452 netif_rx(mac_frame) ;
1453 dev->last_rx = jiffies; 1455 dev->last_rx = jiffies;
1454 1456
1455drop_frame: 1457drop_frame:
diff --git a/drivers/net/tokenring/smctr.c b/drivers/net/tokenring/smctr.c
index cec282a6f62d..9bbea5c8acf4 100644
--- a/drivers/net/tokenring/smctr.c
+++ b/drivers/net/tokenring/smctr.c
@@ -3889,14 +3889,13 @@ static int smctr_process_rx_packet(MAC_HEADER *rmf, __u16 size,
3889 3889
3890 /* Slide data into a sleek skb. */ 3890 /* Slide data into a sleek skb. */
3891 skb_put(skb, skb->len); 3891 skb_put(skb, skb->len);
3892 memcpy(skb->data, rmf, skb->len); 3892 skb_copy_to_linear_data(skb, rmf, skb->len);
3893 3893
3894 /* Update Counters */ 3894 /* Update Counters */
3895 tp->MacStat.rx_packets++; 3895 tp->MacStat.rx_packets++;
3896 tp->MacStat.rx_bytes += skb->len; 3896 tp->MacStat.rx_bytes += skb->len;
3897 3897
3898 /* Kick the packet on up. */ 3898 /* Kick the packet on up. */
3899 skb->dev = dev;
3900 skb->protocol = tr_type_trans(skb, dev); 3899 skb->protocol = tr_type_trans(skb, dev);
3901 netif_rx(skb); 3900 netif_rx(skb);
3902 dev->last_rx = jiffies; 3901 dev->last_rx = jiffies;
@@ -4476,14 +4475,13 @@ static int smctr_rx_frame(struct net_device *dev)
4476 if (skb) { 4475 if (skb) {
4477 skb_put(skb, rx_size); 4476 skb_put(skb, rx_size);
4478 4477
4479 memcpy(skb->data, pbuff, rx_size); 4478 skb_copy_to_linear_data(skb, pbuff, rx_size);
4480 4479
4481 /* Update Counters */ 4480 /* Update Counters */
4482 tp->MacStat.rx_packets++; 4481 tp->MacStat.rx_packets++;
4483 tp->MacStat.rx_bytes += skb->len; 4482 tp->MacStat.rx_bytes += skb->len;
4484 4483
4485 /* Kick the packet on up. */ 4484 /* Kick the packet on up. */
4486 skb->dev = dev;
4487 skb->protocol = tr_type_trans(skb, dev); 4485 skb->protocol = tr_type_trans(skb, dev);
4488 netif_rx(skb); 4486 netif_rx(skb);
4489 dev->last_rx = jiffies; 4487 dev->last_rx = jiffies;
diff --git a/drivers/net/tokenring/tms380tr.c b/drivers/net/tokenring/tms380tr.c
index ea797ca2b988..12bd294045a7 100644
--- a/drivers/net/tokenring/tms380tr.c
+++ b/drivers/net/tokenring/tms380tr.c
@@ -644,7 +644,7 @@ static int tms380tr_hardware_send_packet(struct sk_buff *skb, struct net_device
644 dmabuf = 0; 644 dmabuf = 0;
645 i = tp->TplFree->TPLIndex; 645 i = tp->TplFree->TPLIndex;
646 buf = tp->LocalTxBuffers[i]; 646 buf = tp->LocalTxBuffers[i];
647 memcpy(buf, skb->data, length); 647 skb_copy_from_linear_data(skb, buf, length);
648 newbuf = ((char *)buf - (char *)tp) + tp->dmabuffer; 648 newbuf = ((char *)buf - (char *)tp) + tp->dmabuffer;
649 } 649 }
650 else { 650 else {
@@ -2168,7 +2168,6 @@ static void tms380tr_rcv_status_irq(struct net_device *dev)
2168 } 2168 }
2169 else 2169 else
2170 { 2170 {
2171 skb->dev = dev;
2172 skb_put(skb, tp->MaxPacketSize); 2171 skb_put(skb, tp->MaxPacketSize);
2173 rpl->SkbStat = SKB_DATA_COPY; 2172 rpl->SkbStat = SKB_DATA_COPY;
2174 ReceiveDataPtr = rpl->MData; 2173 ReceiveDataPtr = rpl->MData;
@@ -2179,7 +2178,8 @@ static void tms380tr_rcv_status_irq(struct net_device *dev)
2179 || rpl->SkbStat == SKB_DMA_DIRECT)) 2178 || rpl->SkbStat == SKB_DMA_DIRECT))
2180 { 2179 {
2181 if(rpl->SkbStat == SKB_DATA_COPY) 2180 if(rpl->SkbStat == SKB_DATA_COPY)
2182 memcpy(skb->data, ReceiveDataPtr, Length); 2181 skb_copy_to_linear_data(skb, ReceiveDataPtr,
2182 Length);
2183 2183
2184 /* Deliver frame to system */ 2184 /* Deliver frame to system */
2185 rpl->Skb = NULL; 2185 rpl->Skb = NULL;
diff --git a/drivers/net/tsi108_eth.c b/drivers/net/tsi108_eth.c
index d92c5c597e16..0bfc2c9c1c08 100644
--- a/drivers/net/tsi108_eth.c
+++ b/drivers/net/tsi108_eth.c
@@ -788,7 +788,6 @@ static int tsi108_complete_rx(struct net_device *dev, int budget)
788 printk(".\n"); 788 printk(".\n");
789 } 789 }
790 790
791 skb->dev = dev;
792 skb_put(skb, data->rxring[rx].len); 791 skb_put(skb, data->rxring[rx].len);
793 skb->protocol = eth_type_trans(skb, dev); 792 skb->protocol = eth_type_trans(skb, dev);
794 netif_receive_skb(skb); 793 netif_receive_skb(skb);
diff --git a/drivers/net/tulip/de2104x.c b/drivers/net/tulip/de2104x.c
index c82befa209a2..d19f8568440f 100644
--- a/drivers/net/tulip/de2104x.c
+++ b/drivers/net/tulip/de2104x.c
@@ -435,7 +435,6 @@ static void de_rx (struct de_private *de)
435 rx_work = 100; 435 rx_work = 100;
436 goto rx_next; 436 goto rx_next;
437 } 437 }
438 copy_skb->dev = de->dev;
439 438
440 if (!copying_skb) { 439 if (!copying_skb) {
441 pci_unmap_single(de->pdev, mapping, 440 pci_unmap_single(de->pdev, mapping,
@@ -450,8 +449,8 @@ static void de_rx (struct de_private *de)
450 } else { 449 } else {
451 pci_dma_sync_single_for_cpu(de->pdev, mapping, len, PCI_DMA_FROMDEVICE); 450 pci_dma_sync_single_for_cpu(de->pdev, mapping, len, PCI_DMA_FROMDEVICE);
452 skb_reserve(copy_skb, RX_OFFSET); 451 skb_reserve(copy_skb, RX_OFFSET);
453 memcpy(skb_put(copy_skb, len), skb->data, len); 452 skb_copy_from_linear_data(skb, skb_put(copy_skb, len),
454 453 len);
455 pci_dma_sync_single_for_device(de->pdev, mapping, len, PCI_DMA_FROMDEVICE); 454 pci_dma_sync_single_for_device(de->pdev, mapping, len, PCI_DMA_FROMDEVICE);
456 455
457 /* We'll reuse the original ring buffer. */ 456 /* We'll reuse the original ring buffer. */
diff --git a/drivers/net/tulip/de4x5.c b/drivers/net/tulip/de4x5.c
index 4b3cd3d8b62a..e40ddb869583 100644
--- a/drivers/net/tulip/de4x5.c
+++ b/drivers/net/tulip/de4x5.c
@@ -3634,7 +3634,6 @@ de4x5_alloc_rx_buff(struct net_device *dev, int index, int len)
3634 p = dev_alloc_skb(IEEE802_3_SZ + DE4X5_ALIGN + 2); 3634 p = dev_alloc_skb(IEEE802_3_SZ + DE4X5_ALIGN + 2);
3635 if (!p) return NULL; 3635 if (!p) return NULL;
3636 3636
3637 p->dev = dev;
3638 tmp = virt_to_bus(p->data); 3637 tmp = virt_to_bus(p->data);
3639 i = ((tmp + DE4X5_ALIGN) & ~DE4X5_ALIGN) - tmp; 3638 i = ((tmp + DE4X5_ALIGN) & ~DE4X5_ALIGN) - tmp;
3640 skb_reserve(p, i); 3639 skb_reserve(p, i);
@@ -3655,7 +3654,6 @@ de4x5_alloc_rx_buff(struct net_device *dev, int index, int len)
3655 p = dev_alloc_skb(len + 2); 3654 p = dev_alloc_skb(len + 2);
3656 if (!p) return NULL; 3655 if (!p) return NULL;
3657 3656
3658 p->dev = dev;
3659 skb_reserve(p, 2); /* Align */ 3657 skb_reserve(p, 2); /* Align */
3660 if (index < lp->rx_old) { /* Wrapped buffer */ 3658 if (index < lp->rx_old) { /* Wrapped buffer */
3661 short tlen = (lp->rxRingSize - lp->rx_old) * RX_BUFF_SZ; 3659 short tlen = (lp->rxRingSize - lp->rx_old) * RX_BUFF_SZ;
diff --git a/drivers/net/tulip/dmfe.c b/drivers/net/tulip/dmfe.c
index 9aeac76184f3..b3a64ca98634 100644
--- a/drivers/net/tulip/dmfe.c
+++ b/drivers/net/tulip/dmfe.c
@@ -682,7 +682,7 @@ static int dmfe_start_xmit(struct sk_buff *skb, struct DEVICE *dev)
682 682
683 /* transmit this packet */ 683 /* transmit this packet */
684 txptr = db->tx_insert_ptr; 684 txptr = db->tx_insert_ptr;
685 memcpy(txptr->tx_buf_ptr, skb->data, skb->len); 685 skb_copy_from_linear_data(skb, txptr->tx_buf_ptr, skb->len);
686 txptr->tdes1 = cpu_to_le32(0xe1000000 | skb->len); 686 txptr->tdes1 = cpu_to_le32(0xe1000000 | skb->len);
687 687
688 /* Point to next transmit free descriptor */ 688 /* Point to next transmit free descriptor */
@@ -988,14 +988,14 @@ static void dmfe_rx_packet(struct DEVICE *dev, struct dmfe_board_info * db)
988 988
989 skb = newskb; 989 skb = newskb;
990 /* size less than COPY_SIZE, allocate a rxlen SKB */ 990 /* size less than COPY_SIZE, allocate a rxlen SKB */
991 skb->dev = dev;
992 skb_reserve(skb, 2); /* 16byte align */ 991 skb_reserve(skb, 2); /* 16byte align */
993 memcpy(skb_put(skb, rxlen), rxptr->rx_skb_ptr->data, rxlen); 992 skb_copy_from_linear_data(rxptr->rx_skb_ptr,
993 skb_put(skb, rxlen),
994 rxlen);
994 dmfe_reuse_skb(db, rxptr->rx_skb_ptr); 995 dmfe_reuse_skb(db, rxptr->rx_skb_ptr);
995 } else { 996 } else
996 skb->dev = dev;
997 skb_put(skb, rxlen); 997 skb_put(skb, rxlen);
998 } 998
999 skb->protocol = eth_type_trans(skb, dev); 999 skb->protocol = eth_type_trans(skb, dev);
1000 netif_rx(skb); 1000 netif_rx(skb);
1001 dev->last_rx = jiffies; 1001 dev->last_rx = jiffies;
diff --git a/drivers/net/tulip/interrupt.c b/drivers/net/tulip/interrupt.c
index e3488d7b8ede..e86df07769a1 100644
--- a/drivers/net/tulip/interrupt.c
+++ b/drivers/net/tulip/interrupt.c
@@ -192,7 +192,6 @@ int tulip_poll(struct net_device *dev, int *budget)
192 to a minimally-sized skbuff. */ 192 to a minimally-sized skbuff. */
193 if (pkt_len < tulip_rx_copybreak 193 if (pkt_len < tulip_rx_copybreak
194 && (skb = dev_alloc_skb(pkt_len + 2)) != NULL) { 194 && (skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
195 skb->dev = dev;
196 skb_reserve(skb, 2); /* 16 byte align the IP header */ 195 skb_reserve(skb, 2); /* 16 byte align the IP header */
197 pci_dma_sync_single_for_cpu(tp->pdev, 196 pci_dma_sync_single_for_cpu(tp->pdev,
198 tp->rx_buffers[entry].mapping, 197 tp->rx_buffers[entry].mapping,
@@ -416,7 +415,6 @@ static int tulip_rx(struct net_device *dev)
416 to a minimally-sized skbuff. */ 415 to a minimally-sized skbuff. */
417 if (pkt_len < tulip_rx_copybreak 416 if (pkt_len < tulip_rx_copybreak
418 && (skb = dev_alloc_skb(pkt_len + 2)) != NULL) { 417 && (skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
419 skb->dev = dev;
420 skb_reserve(skb, 2); /* 16 byte align the IP header */ 418 skb_reserve(skb, 2); /* 16 byte align the IP header */
421 pci_dma_sync_single_for_cpu(tp->pdev, 419 pci_dma_sync_single_for_cpu(tp->pdev,
422 tp->rx_buffers[entry].mapping, 420 tp->rx_buffers[entry].mapping,
diff --git a/drivers/net/tulip/uli526x.c b/drivers/net/tulip/uli526x.c
index 229158e8e4be..ca2548eb7d63 100644
--- a/drivers/net/tulip/uli526x.c
+++ b/drivers/net/tulip/uli526x.c
@@ -583,7 +583,7 @@ static int uli526x_start_xmit(struct sk_buff *skb, struct net_device *dev)
583 583
584 /* transmit this packet */ 584 /* transmit this packet */
585 txptr = db->tx_insert_ptr; 585 txptr = db->tx_insert_ptr;
586 memcpy(txptr->tx_buf_ptr, skb->data, skb->len); 586 skb_copy_from_linear_data(skb, txptr->tx_buf_ptr, skb->len);
587 txptr->tdes1 = cpu_to_le32(0xe1000000 | skb->len); 587 txptr->tdes1 = cpu_to_le32(0xe1000000 | skb->len);
588 588
589 /* Point to next transmit free descriptor */ 589 /* Point to next transmit free descriptor */
@@ -828,14 +828,14 @@ static void uli526x_rx_packet(struct net_device *dev, struct uli526x_board_info
828 ( (skb = dev_alloc_skb(rxlen + 2) ) 828 ( (skb = dev_alloc_skb(rxlen + 2) )
829 != NULL) ) { 829 != NULL) ) {
830 /* size less than COPY_SIZE, allocate a rxlen SKB */ 830 /* size less than COPY_SIZE, allocate a rxlen SKB */
831 skb->dev = dev;
832 skb_reserve(skb, 2); /* 16byte align */ 831 skb_reserve(skb, 2); /* 16byte align */
833 memcpy(skb_put(skb, rxlen), rxptr->rx_skb_ptr->tail, rxlen); 832 memcpy(skb_put(skb, rxlen),
833 skb_tail_pointer(rxptr->rx_skb_ptr),
834 rxlen);
834 uli526x_reuse_skb(db, rxptr->rx_skb_ptr); 835 uli526x_reuse_skb(db, rxptr->rx_skb_ptr);
835 } else { 836 } else
836 skb->dev = dev;
837 skb_put(skb, rxlen); 837 skb_put(skb, rxlen);
838 } 838
839 skb->protocol = eth_type_trans(skb, dev); 839 skb->protocol = eth_type_trans(skb, dev);
840 netif_rx(skb); 840 netif_rx(skb);
841 dev->last_rx = jiffies; 841 dev->last_rx = jiffies;
@@ -1177,7 +1177,10 @@ static void uli526x_reuse_skb(struct uli526x_board_info *db, struct sk_buff * sk
1177 1177
1178 if (!(rxptr->rdes0 & cpu_to_le32(0x80000000))) { 1178 if (!(rxptr->rdes0 & cpu_to_le32(0x80000000))) {
1179 rxptr->rx_skb_ptr = skb; 1179 rxptr->rx_skb_ptr = skb;
1180 rxptr->rdes2 = cpu_to_le32( pci_map_single(db->pdev, skb->tail, RX_ALLOC_SIZE, PCI_DMA_FROMDEVICE) ); 1180 rxptr->rdes2 = cpu_to_le32(pci_map_single(db->pdev,
1181 skb_tail_pointer(skb),
1182 RX_ALLOC_SIZE,
1183 PCI_DMA_FROMDEVICE));
1181 wmb(); 1184 wmb();
1182 rxptr->rdes0 = cpu_to_le32(0x80000000); 1185 rxptr->rdes0 = cpu_to_le32(0x80000000);
1183 db->rx_avail_cnt++; 1186 db->rx_avail_cnt++;
@@ -1341,7 +1344,10 @@ static void allocate_rx_buffer(struct uli526x_board_info *db)
1341 if ( ( skb = dev_alloc_skb(RX_ALLOC_SIZE) ) == NULL ) 1344 if ( ( skb = dev_alloc_skb(RX_ALLOC_SIZE) ) == NULL )
1342 break; 1345 break;
1343 rxptr->rx_skb_ptr = skb; /* FIXME (?) */ 1346 rxptr->rx_skb_ptr = skb; /* FIXME (?) */
1344 rxptr->rdes2 = cpu_to_le32( pci_map_single(db->pdev, skb->tail, RX_ALLOC_SIZE, PCI_DMA_FROMDEVICE) ); 1347 rxptr->rdes2 = cpu_to_le32(pci_map_single(db->pdev,
1348 skb_tail_pointer(skb),
1349 RX_ALLOC_SIZE,
1350 PCI_DMA_FROMDEVICE));
1345 wmb(); 1351 wmb();
1346 rxptr->rdes0 = cpu_to_le32(0x80000000); 1352 rxptr->rdes0 = cpu_to_le32(0x80000000);
1347 rxptr = rxptr->next_rx_desc; 1353 rxptr = rxptr->next_rx_desc;
diff --git a/drivers/net/tulip/winbond-840.c b/drivers/net/tulip/winbond-840.c
index 002a05e0722f..d74fa871de11 100644
--- a/drivers/net/tulip/winbond-840.c
+++ b/drivers/net/tulip/winbond-840.c
@@ -813,7 +813,6 @@ static void init_rxtx_rings(struct net_device *dev)
813 np->rx_skbuff[i] = skb; 813 np->rx_skbuff[i] = skb;
814 if (skb == NULL) 814 if (skb == NULL)
815 break; 815 break;
816 skb->dev = dev; /* Mark as being used by this device. */
817 np->rx_addr[i] = pci_map_single(np->pci_dev,skb->data, 816 np->rx_addr[i] = pci_map_single(np->pci_dev,skb->data,
818 np->rx_buf_sz,PCI_DMA_FROMDEVICE); 817 np->rx_buf_sz,PCI_DMA_FROMDEVICE);
819 818
@@ -1229,7 +1228,6 @@ static int netdev_rx(struct net_device *dev)
1229 to a minimally-sized skbuff. */ 1228 to a minimally-sized skbuff. */
1230 if (pkt_len < rx_copybreak 1229 if (pkt_len < rx_copybreak
1231 && (skb = dev_alloc_skb(pkt_len + 2)) != NULL) { 1230 && (skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
1232 skb->dev = dev;
1233 skb_reserve(skb, 2); /* 16 byte align the IP header */ 1231 skb_reserve(skb, 2); /* 16 byte align the IP header */
1234 pci_dma_sync_single_for_cpu(np->pci_dev,np->rx_addr[entry], 1232 pci_dma_sync_single_for_cpu(np->pci_dev,np->rx_addr[entry],
1235 np->rx_skbuff[entry]->len, 1233 np->rx_skbuff[entry]->len,
@@ -1278,7 +1276,6 @@ static int netdev_rx(struct net_device *dev)
1278 np->rx_skbuff[entry] = skb; 1276 np->rx_skbuff[entry] = skb;
1279 if (skb == NULL) 1277 if (skb == NULL)
1280 break; /* Better luck next round. */ 1278 break; /* Better luck next round. */
1281 skb->dev = dev; /* Mark as being used by this device. */
1282 np->rx_addr[entry] = pci_map_single(np->pci_dev, 1279 np->rx_addr[entry] = pci_map_single(np->pci_dev,
1283 skb->data, 1280 skb->data,
1284 np->rx_buf_sz, PCI_DMA_FROMDEVICE); 1281 np->rx_buf_sz, PCI_DMA_FROMDEVICE);
diff --git a/drivers/net/tulip/xircom_cb.c b/drivers/net/tulip/xircom_cb.c
index 61d313049dd0..985a1810ca59 100644
--- a/drivers/net/tulip/xircom_cb.c
+++ b/drivers/net/tulip/xircom_cb.c
@@ -411,9 +411,9 @@ static int xircom_start_xmit(struct sk_buff *skb, struct net_device *dev)
411 sometimes sends more than you ask it to. */ 411 sometimes sends more than you ask it to. */
412 412
413 memset(&card->tx_buffer[bufferoffsets[desc]/4],0,1536); 413 memset(&card->tx_buffer[bufferoffsets[desc]/4],0,1536);
414 memcpy(&(card->tx_buffer[bufferoffsets[desc]/4]),skb->data,skb->len); 414 skb_copy_from_linear_data(skb,
415 415 &(card->tx_buffer[bufferoffsets[desc] / 4]),
416 416 skb->len);
417 /* FIXME: The specification tells us that the length we send HAS to be a multiple of 417 /* FIXME: The specification tells us that the length we send HAS to be a multiple of
418 4 bytes. */ 418 4 bytes. */
419 419
@@ -1207,7 +1207,6 @@ static void investigate_read_descriptor(struct net_device *dev,struct xircom_pri
1207 card->stats.rx_dropped++; 1207 card->stats.rx_dropped++;
1208 goto out; 1208 goto out;
1209 } 1209 }
1210 skb->dev = dev;
1211 skb_reserve(skb, 2); 1210 skb_reserve(skb, 2);
1212 eth_copy_and_sum(skb, (unsigned char*)&card->rx_buffer[bufferoffset / 4], pkt_len, 0); 1211 eth_copy_and_sum(skb, (unsigned char*)&card->rx_buffer[bufferoffset / 4], pkt_len, 0);
1213 skb_put(skb, pkt_len); 1212 skb_put(skb, pkt_len);
diff --git a/drivers/net/tulip/xircom_tulip_cb.c b/drivers/net/tulip/xircom_tulip_cb.c
index a998c5d0ae9c..696b3b8aac8e 100644
--- a/drivers/net/tulip/xircom_tulip_cb.c
+++ b/drivers/net/tulip/xircom_tulip_cb.c
@@ -915,7 +915,9 @@ xircom_start_xmit(struct sk_buff *skb, struct net_device *dev)
915 915
916 tp->tx_skbuff[entry] = skb; 916 tp->tx_skbuff[entry] = skb;
917 if (tp->chip_id == X3201_3) { 917 if (tp->chip_id == X3201_3) {
918 memcpy(tp->tx_aligned_skbuff[entry]->data,skb->data,skb->len); 918 skb_copy_from_linear_data(skb,
919 tp->tx_aligned_skbuff[entry]->data,
920 skb->len);
919 tp->tx_ring[entry].buffer1 = virt_to_bus(tp->tx_aligned_skbuff[entry]->data); 921 tp->tx_ring[entry].buffer1 = virt_to_bus(tp->tx_aligned_skbuff[entry]->data);
920 } else 922 } else
921 tp->tx_ring[entry].buffer1 = virt_to_bus(skb->data); 923 tp->tx_ring[entry].buffer1 = virt_to_bus(skb->data);
@@ -1238,7 +1240,6 @@ xircom_rx(struct net_device *dev)
1238 to a minimally-sized skbuff. */ 1240 to a minimally-sized skbuff. */
1239 if (pkt_len < rx_copybreak 1241 if (pkt_len < rx_copybreak
1240 && (skb = dev_alloc_skb(pkt_len + 2)) != NULL) { 1242 && (skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
1241 skb->dev = dev;
1242 skb_reserve(skb, 2); /* 16 byte align the IP header */ 1243 skb_reserve(skb, 2); /* 16 byte align the IP header */
1243#if ! defined(__alpha__) 1244#if ! defined(__alpha__)
1244 eth_copy_and_sum(skb, bus_to_virt(tp->rx_ring[entry].buffer1), 1245 eth_copy_and_sum(skb, bus_to_virt(tp->rx_ring[entry].buffer1),
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 5643d1e84ed6..a2c6caaaae93 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -18,6 +18,10 @@
18/* 18/*
19 * Changes: 19 * Changes:
20 * 20 *
21 * Brian Braunstein <linuxkernel@bristyle.com> 2007/03/23
22 * Fixed hw address handling. Now net_device.dev_addr is kept consistent
23 * with tun.dev_addr when the address is set by this module.
24 *
21 * Mike Kershaw <dragorn@kismetwireless.net> 2005/08/14 25 * Mike Kershaw <dragorn@kismetwireless.net> 2005/08/14
22 * Add TUNSETLINK ioctl to set the link encapsulation 26 * Add TUNSETLINK ioctl to set the link encapsulation
23 * 27 *
@@ -196,7 +200,10 @@ static void tun_net_init(struct net_device *dev)
196 dev->set_multicast_list = tun_net_mclist; 200 dev->set_multicast_list = tun_net_mclist;
197 201
198 ether_setup(dev); 202 ether_setup(dev);
199 random_ether_addr(dev->dev_addr); 203
204 /* random address already created for us by tun_set_iff, use it */
205 memcpy(dev->dev_addr, tun->dev_addr, min(sizeof(tun->dev_addr), sizeof(dev->dev_addr)) );
206
200 dev->tx_queue_len = TUN_READQ_SIZE; /* We prefer our own queue length */ 207 dev->tx_queue_len = TUN_READQ_SIZE; /* We prefer our own queue length */
201 break; 208 break;
202 } 209 }
@@ -254,11 +261,11 @@ static __inline__ ssize_t tun_get_user(struct tun_struct *tun, struct iovec *iv,
254 return -EFAULT; 261 return -EFAULT;
255 } 262 }
256 263
257 skb->dev = tun->dev;
258 switch (tun->flags & TUN_TYPE_MASK) { 264 switch (tun->flags & TUN_TYPE_MASK) {
259 case TUN_TUN_DEV: 265 case TUN_TUN_DEV:
260 skb->mac.raw = skb->data; 266 skb_reset_mac_header(skb);
261 skb->protocol = pi.proto; 267 skb->protocol = pi.proto;
268 skb->dev = tun->dev;
262 break; 269 break;
263 case TUN_TAP_DEV: 270 case TUN_TAP_DEV:
264 skb->protocol = eth_type_trans(skb, tun->dev); 271 skb->protocol = eth_type_trans(skb, tun->dev);
@@ -386,8 +393,8 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv,
386 * - we are multicast promiscous. 393 * - we are multicast promiscous.
387 * - we belong to the multicast group. 394 * - we belong to the multicast group.
388 */ 395 */
389 memcpy(addr, skb->data, 396 skb_copy_from_linear_data(skb, addr, min_t(size_t, sizeof addr,
390 min_t(size_t, sizeof addr, skb->len)); 397 skb->len));
391 bit_nr = ether_crc(sizeof addr, addr) >> 26; 398 bit_nr = ether_crc(sizeof addr, addr) >> 26;
392 if ((tun->if_flags & IFF_PROMISC) || 399 if ((tun->if_flags & IFF_PROMISC) ||
393 memcmp(addr, tun->dev_addr, sizeof addr) == 0 || 400 memcmp(addr, tun->dev_addr, sizeof addr) == 0 ||
@@ -636,6 +643,7 @@ static int tun_chr_ioctl(struct inode *inode, struct file *file,
636 return 0; 643 return 0;
637 644
638 case SIOCGIFHWADDR: 645 case SIOCGIFHWADDR:
646 /* Note: the actual net device's address may be different */
639 memcpy(ifr.ifr_hwaddr.sa_data, tun->dev_addr, 647 memcpy(ifr.ifr_hwaddr.sa_data, tun->dev_addr,
640 min(sizeof ifr.ifr_hwaddr.sa_data, sizeof tun->dev_addr)); 648 min(sizeof ifr.ifr_hwaddr.sa_data, sizeof tun->dev_addr));
641 if (copy_to_user( argp, &ifr, sizeof ifr)) 649 if (copy_to_user( argp, &ifr, sizeof ifr))
@@ -643,16 +651,24 @@ static int tun_chr_ioctl(struct inode *inode, struct file *file,
643 return 0; 651 return 0;
644 652
645 case SIOCSIFHWADDR: 653 case SIOCSIFHWADDR:
646 /** Set the character device's hardware address. This is used when 654 {
647 * filtering packets being sent from the network device to the character 655 /* try to set the actual net device's hw address */
648 * device. */ 656 int ret = dev_set_mac_address(tun->dev, &ifr.ifr_hwaddr);
649 memcpy(tun->dev_addr, ifr.ifr_hwaddr.sa_data, 657
650 min(sizeof ifr.ifr_hwaddr.sa_data, sizeof tun->dev_addr)); 658 if (ret == 0) {
651 DBG(KERN_DEBUG "%s: set hardware address: %x:%x:%x:%x:%x:%x\n", 659 /** Set the character device's hardware address. This is used when
652 tun->dev->name, 660 * filtering packets being sent from the network device to the character
653 tun->dev_addr[0], tun->dev_addr[1], tun->dev_addr[2], 661 * device. */
654 tun->dev_addr[3], tun->dev_addr[4], tun->dev_addr[5]); 662 memcpy(tun->dev_addr, ifr.ifr_hwaddr.sa_data,
655 return 0; 663 min(sizeof ifr.ifr_hwaddr.sa_data, sizeof tun->dev_addr));
664 DBG(KERN_DEBUG "%s: set hardware address: %x:%x:%x:%x:%x:%x\n",
665 tun->dev->name,
666 tun->dev_addr[0], tun->dev_addr[1], tun->dev_addr[2],
667 tun->dev_addr[3], tun->dev_addr[4], tun->dev_addr[5]);
668 }
669
670 return ret;
671 }
656 672
657 case SIOCADDMULTI: 673 case SIOCADDMULTI:
658 /** Add the specified group to the character device's multicast filter 674 /** Add the specified group to the character device's multicast filter
diff --git a/drivers/net/typhoon.c b/drivers/net/typhoon.c
index 0d91d094edd9..f2dd7763cd0b 100644
--- a/drivers/net/typhoon.c
+++ b/drivers/net/typhoon.c
@@ -1708,7 +1708,6 @@ typhoon_rx(struct typhoon *tp, struct basic_ring *rxRing, volatile u32 * ready,
1708 1708
1709 if(pkt_len < rx_copybreak && 1709 if(pkt_len < rx_copybreak &&
1710 (new_skb = dev_alloc_skb(pkt_len + 2)) != NULL) { 1710 (new_skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
1711 new_skb->dev = tp->dev;
1712 skb_reserve(new_skb, 2); 1711 skb_reserve(new_skb, 2);
1713 pci_dma_sync_single_for_cpu(tp->pdev, dma_addr, 1712 pci_dma_sync_single_for_cpu(tp->pdev, dma_addr,
1714 PKT_BUF_SZ, 1713 PKT_BUF_SZ,
diff --git a/drivers/net/via-rhine.c b/drivers/net/via-rhine.c
index f3a972e74e9a..adea290a9d5e 100644
--- a/drivers/net/via-rhine.c
+++ b/drivers/net/via-rhine.c
@@ -1486,7 +1486,6 @@ static int rhine_rx(struct net_device *dev, int limit)
1486 copying to a minimally-sized skbuff. */ 1486 copying to a minimally-sized skbuff. */
1487 if (pkt_len < rx_copybreak && 1487 if (pkt_len < rx_copybreak &&
1488 (skb = dev_alloc_skb(pkt_len + 2)) != NULL) { 1488 (skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
1489 skb->dev = dev;
1490 skb_reserve(skb, 2); /* 16 byte align the IP header */ 1489 skb_reserve(skb, 2); /* 16 byte align the IP header */
1491 pci_dma_sync_single_for_cpu(rp->pdev, 1490 pci_dma_sync_single_for_cpu(rp->pdev,
1492 rp->rx_skbuff_dma[entry], 1491 rp->rx_skbuff_dma[entry],
diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c
index 8e5d82051bd4..25b75b615188 100644
--- a/drivers/net/via-velocity.c
+++ b/drivers/net/via-velocity.c
@@ -1339,7 +1339,8 @@ static inline int velocity_rx_copy(struct sk_buff **rx_skb, int pkt_size,
1339 if (vptr->flags & VELOCITY_FLAGS_IP_ALIGN) 1339 if (vptr->flags & VELOCITY_FLAGS_IP_ALIGN)
1340 skb_reserve(new_skb, 2); 1340 skb_reserve(new_skb, 2);
1341 1341
1342 memcpy(new_skb->data, rx_skb[0]->data, pkt_size); 1342 skb_copy_from_linear_data(rx_skb[0], new_skb->data,
1343 pkt_size);
1343 *rx_skb = new_skb; 1344 *rx_skb = new_skb;
1344 ret = 0; 1345 ret = 0;
1345 } 1346 }
@@ -1398,7 +1399,6 @@ static int velocity_receive_frame(struct velocity_info *vptr, int idx)
1398 vptr->stats.multicast++; 1399 vptr->stats.multicast++;
1399 1400
1400 skb = rd_info->skb; 1401 skb = rd_info->skb;
1401 skb->dev = vptr->dev;
1402 1402
1403 pci_dma_sync_single_for_cpu(vptr->pdev, rd_info->skb_dma, 1403 pci_dma_sync_single_for_cpu(vptr->pdev, rd_info->skb_dma,
1404 vptr->rx_buf_sz, PCI_DMA_FROMDEVICE); 1404 vptr->rx_buf_sz, PCI_DMA_FROMDEVICE);
@@ -1428,7 +1428,7 @@ static int velocity_receive_frame(struct velocity_info *vptr, int idx)
1428 PCI_DMA_FROMDEVICE); 1428 PCI_DMA_FROMDEVICE);
1429 1429
1430 skb_put(skb, pkt_len - 4); 1430 skb_put(skb, pkt_len - 4);
1431 skb->protocol = eth_type_trans(skb, skb->dev); 1431 skb->protocol = eth_type_trans(skb, vptr->dev);
1432 1432
1433 stats->rx_bytes += pkt_len; 1433 stats->rx_bytes += pkt_len;
1434 netif_rx(skb); 1434 netif_rx(skb);
@@ -1928,7 +1928,7 @@ static int velocity_xmit(struct sk_buff *skb, struct net_device *dev)
1928 if (pktlen < ETH_ZLEN) { 1928 if (pktlen < ETH_ZLEN) {
1929 /* Cannot occur until ZC support */ 1929 /* Cannot occur until ZC support */
1930 pktlen = ETH_ZLEN; 1930 pktlen = ETH_ZLEN;
1931 memcpy(tdinfo->buf, skb->data, skb->len); 1931 skb_copy_from_linear_data(skb, tdinfo->buf, skb->len);
1932 memset(tdinfo->buf + skb->len, 0, ETH_ZLEN - skb->len); 1932 memset(tdinfo->buf + skb->len, 0, ETH_ZLEN - skb->len);
1933 tdinfo->skb = skb; 1933 tdinfo->skb = skb;
1934 tdinfo->skb_dma[0] = tdinfo->buf_dma; 1934 tdinfo->skb_dma[0] = tdinfo->buf_dma;
@@ -1944,7 +1944,7 @@ static int velocity_xmit(struct sk_buff *skb, struct net_device *dev)
1944 int nfrags = skb_shinfo(skb)->nr_frags; 1944 int nfrags = skb_shinfo(skb)->nr_frags;
1945 tdinfo->skb = skb; 1945 tdinfo->skb = skb;
1946 if (nfrags > 6) { 1946 if (nfrags > 6) {
1947 memcpy(tdinfo->buf, skb->data, skb->len); 1947 skb_copy_from_linear_data(skb, tdinfo->buf, skb->len);
1948 tdinfo->skb_dma[0] = tdinfo->buf_dma; 1948 tdinfo->skb_dma[0] = tdinfo->buf_dma;
1949 td_ptr->tdesc0.pktsize = 1949 td_ptr->tdesc0.pktsize =
1950 td_ptr->td_buf[0].pa_low = cpu_to_le32(tdinfo->skb_dma[0]); 1950 td_ptr->td_buf[0].pa_low = cpu_to_le32(tdinfo->skb_dma[0]);
@@ -2007,7 +2007,7 @@ static int velocity_xmit(struct sk_buff *skb, struct net_device *dev)
2007 */ 2007 */
2008 if ((vptr->flags & VELOCITY_FLAGS_TX_CSUM) 2008 if ((vptr->flags & VELOCITY_FLAGS_TX_CSUM)
2009 && (skb->ip_summed == CHECKSUM_PARTIAL)) { 2009 && (skb->ip_summed == CHECKSUM_PARTIAL)) {
2010 struct iphdr *ip = skb->nh.iph; 2010 const struct iphdr *ip = ip_hdr(skb);
2011 if (ip->protocol == IPPROTO_TCP) 2011 if (ip->protocol == IPPROTO_TCP)
2012 td_ptr->tdesc1.TCR |= TCR0_TCPCK; 2012 td_ptr->tdesc1.TCR |= TCR0_TCPCK;
2013 else if (ip->protocol == IPPROTO_UDP) 2013 else if (ip->protocol == IPPROTO_UDP)
diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c
index 5b82e4fd0d73..23464735fa88 100644
--- a/drivers/net/wan/cosa.c
+++ b/drivers/net/wan/cosa.c
@@ -773,7 +773,7 @@ static int sppp_rx_done(struct channel_data *chan)
773 } 773 }
774 chan->rx_skb->protocol = htons(ETH_P_WAN_PPP); 774 chan->rx_skb->protocol = htons(ETH_P_WAN_PPP);
775 chan->rx_skb->dev = chan->pppdev.dev; 775 chan->rx_skb->dev = chan->pppdev.dev;
776 chan->rx_skb->mac.raw = chan->rx_skb->data; 776 skb_reset_mac_header(chan->rx_skb);
777 chan->stats.rx_packets++; 777 chan->stats.rx_packets++;
778 chan->stats.rx_bytes += chan->cosa->rxsize; 778 chan->stats.rx_bytes += chan->cosa->rxsize;
779 netif_rx(chan->rx_skb); 779 netif_rx(chan->rx_skb);
diff --git a/drivers/net/wan/cycx_x25.c b/drivers/net/wan/cycx_x25.c
index a631d1c2fa14..016b3ff3ea5e 100644
--- a/drivers/net/wan/cycx_x25.c
+++ b/drivers/net/wan/cycx_x25.c
@@ -834,7 +834,7 @@ static void cycx_x25_irq_rx(struct cycx_device *card, struct cycx_x25_cmd *cmd)
834 ++chan->ifstats.rx_packets; 834 ++chan->ifstats.rx_packets;
835 chan->ifstats.rx_bytes += pktlen; 835 chan->ifstats.rx_bytes += pktlen;
836 836
837 skb->mac.raw = skb->data; 837 skb_reset_mac_header(skb);
838 netif_rx(skb); 838 netif_rx(skb);
839 dev->last_rx = jiffies; /* timestamp */ 839 dev->last_rx = jiffies; /* timestamp */
840} 840}
diff --git a/drivers/net/wan/dlci.c b/drivers/net/wan/dlci.c
index 736987559432..66be20c292b6 100644
--- a/drivers/net/wan/dlci.c
+++ b/drivers/net/wan/dlci.c
@@ -176,7 +176,7 @@ static void dlci_receive(struct sk_buff *skb, struct net_device *dev)
176 if (process) 176 if (process)
177 { 177 {
178 /* we've set up the protocol, so discard the header */ 178 /* we've set up the protocol, so discard the header */
179 skb->mac.raw = skb->data; 179 skb_reset_mac_header(skb);
180 skb_pull(skb, header); 180 skb_pull(skb, header);
181 dlp->stats.rx_bytes += skb->len; 181 dlp->stats.rx_bytes += skb->len;
182 netif_rx(skb); 182 netif_rx(skb);
diff --git a/drivers/net/wan/dscc4.c b/drivers/net/wan/dscc4.c
index 25021a7992a9..dca024471455 100644
--- a/drivers/net/wan/dscc4.c
+++ b/drivers/net/wan/dscc4.c
@@ -1904,7 +1904,8 @@ static struct sk_buff *dscc4_init_dummy_skb(struct dscc4_dev_priv *dpriv)
1904 struct TxFD *tx_fd = dpriv->tx_fd + last; 1904 struct TxFD *tx_fd = dpriv->tx_fd + last;
1905 1905
1906 skb->len = DUMMY_SKB_SIZE; 1906 skb->len = DUMMY_SKB_SIZE;
1907 memcpy(skb->data, version, strlen(version)%DUMMY_SKB_SIZE); 1907 skb_copy_to_linear_data(skb, version,
1908 strlen(version) % DUMMY_SKB_SIZE);
1908 tx_fd->state = FrameEnd | TO_STATE_TX(DUMMY_SKB_SIZE); 1909 tx_fd->state = FrameEnd | TO_STATE_TX(DUMMY_SKB_SIZE);
1909 tx_fd->data = pci_map_single(dpriv->pci_priv->pdev, skb->data, 1910 tx_fd->data = pci_map_single(dpriv->pci_priv->pdev, skb->data,
1910 DUMMY_SKB_SIZE, PCI_DMA_TODEVICE); 1911 DUMMY_SKB_SIZE, PCI_DMA_TODEVICE);
diff --git a/drivers/net/wan/farsync.c b/drivers/net/wan/farsync.c
index c45d6a83339d..58a53b6d9b42 100644
--- a/drivers/net/wan/farsync.c
+++ b/drivers/net/wan/farsync.c
@@ -864,7 +864,7 @@ fst_tx_dma_complete(struct fst_card_info *card, struct fst_port_info *port,
864static __be16 farsync_type_trans(struct sk_buff *skb, struct net_device *dev) 864static __be16 farsync_type_trans(struct sk_buff *skb, struct net_device *dev)
865{ 865{
866 skb->dev = dev; 866 skb->dev = dev;
867 skb->mac.raw = skb->data; 867 skb_reset_mac_header(skb);
868 skb->pkt_type = PACKET_HOST; 868 skb->pkt_type = PACKET_HOST;
869 return htons(ETH_P_CUST); 869 return htons(ETH_P_CUST);
870} 870}
diff --git a/drivers/net/wan/hdlc_cisco.c b/drivers/net/wan/hdlc_cisco.c
index c9664fd8a917..00e0aaadabcc 100644
--- a/drivers/net/wan/hdlc_cisco.c
+++ b/drivers/net/wan/hdlc_cisco.c
@@ -124,7 +124,7 @@ static void cisco_keepalive_send(struct net_device *dev, u32 type,
124 skb_put(skb, sizeof(struct cisco_packet)); 124 skb_put(skb, sizeof(struct cisco_packet));
125 skb->priority = TC_PRIO_CONTROL; 125 skb->priority = TC_PRIO_CONTROL;
126 skb->dev = dev; 126 skb->dev = dev;
127 skb->nh.raw = skb->data; 127 skb_reset_network_header(skb);
128 128
129 dev_queue_xmit(skb); 129 dev_queue_xmit(skb);
130} 130}
diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c
index c6c3c757d6f1..aeb2789adf26 100644
--- a/drivers/net/wan/hdlc_fr.c
+++ b/drivers/net/wan/hdlc_fr.c
@@ -533,7 +533,7 @@ static void fr_lmi_send(struct net_device *dev, int fullrep)
533 skb->protocol = __constant_htons(NLPID_CCITT_ANSI_LMI); 533 skb->protocol = __constant_htons(NLPID_CCITT_ANSI_LMI);
534 fr_hard_header(&skb, LMI_CCITT_ANSI_DLCI); 534 fr_hard_header(&skb, LMI_CCITT_ANSI_DLCI);
535 } 535 }
536 data = skb->tail; 536 data = skb_tail_pointer(skb);
537 data[i++] = LMI_CALLREF; 537 data[i++] = LMI_CALLREF;
538 data[i++] = dce ? LMI_STATUS : LMI_STATUS_ENQUIRY; 538 data[i++] = dce ? LMI_STATUS : LMI_STATUS_ENQUIRY;
539 if (lmi == LMI_ANSI) 539 if (lmi == LMI_ANSI)
@@ -590,7 +590,7 @@ static void fr_lmi_send(struct net_device *dev, int fullrep)
590 skb_put(skb, i); 590 skb_put(skb, i);
591 skb->priority = TC_PRIO_CONTROL; 591 skb->priority = TC_PRIO_CONTROL;
592 skb->dev = dev; 592 skb->dev = dev;
593 skb->nh.raw = skb->data; 593 skb_reset_network_header(skb);
594 594
595 dev_queue_xmit(skb); 595 dev_queue_xmit(skb);
596} 596}
@@ -1011,7 +1011,6 @@ static int fr_rx(struct sk_buff *skb)
1011 stats->rx_bytes += skb->len; 1011 stats->rx_bytes += skb->len;
1012 if (pvc->state.becn) 1012 if (pvc->state.becn)
1013 stats->rx_compressed++; 1013 stats->rx_compressed++;
1014 skb->dev = dev;
1015 netif_rx(skb); 1014 netif_rx(skb);
1016 return NET_RX_SUCCESS; 1015 return NET_RX_SUCCESS;
1017 } else { 1016 } else {
diff --git a/drivers/net/wan/hostess_sv11.c b/drivers/net/wan/hostess_sv11.c
index a02c5fb40567..9ba3e4ee6ec7 100644
--- a/drivers/net/wan/hostess_sv11.c
+++ b/drivers/net/wan/hostess_sv11.c
@@ -59,7 +59,7 @@ static void hostess_input(struct z8530_channel *c, struct sk_buff *skb)
59 /* Drop the CRC - it's not a good idea to try and negotiate it ;) */ 59 /* Drop the CRC - it's not a good idea to try and negotiate it ;) */
60 skb_trim(skb, skb->len-2); 60 skb_trim(skb, skb->len-2);
61 skb->protocol=__constant_htons(ETH_P_WAN_PPP); 61 skb->protocol=__constant_htons(ETH_P_WAN_PPP);
62 skb->mac.raw=skb->data; 62 skb_reset_mac_header(skb);
63 skb->dev=c->netdevice; 63 skb->dev=c->netdevice;
64 /* 64 /*
65 * Send it to the PPP layer. We don't have time to process 65 * Send it to the PPP layer. We don't have time to process
diff --git a/drivers/net/wan/lmc/lmc_main.c b/drivers/net/wan/lmc/lmc_main.c
index 2b54f1bc3a0d..ae132c1c5459 100644
--- a/drivers/net/wan/lmc/lmc_main.c
+++ b/drivers/net/wan/lmc/lmc_main.c
@@ -1636,7 +1636,7 @@ static int lmc_rx (struct net_device *dev) /*fold00*/
1636 if (nsb) { 1636 if (nsb) {
1637 sc->lmc_rxq[i] = nsb; 1637 sc->lmc_rxq[i] = nsb;
1638 nsb->dev = dev; 1638 nsb->dev = dev;
1639 sc->lmc_rxring[i].buffer1 = virt_to_bus (nsb->tail); 1639 sc->lmc_rxring[i].buffer1 = virt_to_bus(skb_tail_pointer(nsb));
1640 } 1640 }
1641 sc->failed_recv_alloc = 1; 1641 sc->failed_recv_alloc = 1;
1642 goto skip_packet; 1642 goto skip_packet;
@@ -1667,8 +1667,8 @@ static int lmc_rx (struct net_device *dev) /*fold00*/
1667 skb_put (skb, len); 1667 skb_put (skb, len);
1668 skb->protocol = lmc_proto_type(sc, skb); 1668 skb->protocol = lmc_proto_type(sc, skb);
1669 skb->protocol = htons(ETH_P_WAN_PPP); 1669 skb->protocol = htons(ETH_P_WAN_PPP);
1670 skb->mac.raw = skb->data; 1670 skb_reset_mac_header(skb);
1671// skb->nh.raw = skb->data; 1671 /* skb_reset_network_header(skb); */
1672 skb->dev = dev; 1672 skb->dev = dev;
1673 lmc_proto_netif(sc, skb); 1673 lmc_proto_netif(sc, skb);
1674 1674
@@ -1679,7 +1679,7 @@ static int lmc_rx (struct net_device *dev) /*fold00*/
1679 if (nsb) { 1679 if (nsb) {
1680 sc->lmc_rxq[i] = nsb; 1680 sc->lmc_rxq[i] = nsb;
1681 nsb->dev = dev; 1681 nsb->dev = dev;
1682 sc->lmc_rxring[i].buffer1 = virt_to_bus (nsb->tail); 1682 sc->lmc_rxring[i].buffer1 = virt_to_bus(skb_tail_pointer(nsb));
1683 /* Transferred to 21140 below */ 1683 /* Transferred to 21140 below */
1684 } 1684 }
1685 else { 1685 else {
@@ -1702,11 +1702,11 @@ static int lmc_rx (struct net_device *dev) /*fold00*/
1702 if(!nsb) { 1702 if(!nsb) {
1703 goto give_it_anyways; 1703 goto give_it_anyways;
1704 } 1704 }
1705 memcpy(skb_put(nsb, len), skb->data, len); 1705 skb_copy_from_linear_data(skb, skb_put(nsb, len), len);
1706 1706
1707 nsb->protocol = lmc_proto_type(sc, skb); 1707 nsb->protocol = lmc_proto_type(sc, skb);
1708 nsb->mac.raw = nsb->data; 1708 skb_reset_mac_header(nsb);
1709// nsb->nh.raw = nsb->data; 1709 /* skb_reset_network_header(nsb); */
1710 nsb->dev = dev; 1710 nsb->dev = dev;
1711 lmc_proto_netif(sc, nsb); 1711 lmc_proto_netif(sc, nsb);
1712 } 1712 }
@@ -1932,7 +1932,7 @@ static void lmc_softreset (lmc_softc_t * const sc) /*fold00*/
1932 sc->lmc_rxring[i].status = 0x80000000; 1932 sc->lmc_rxring[i].status = 0x80000000;
1933 1933
1934 /* used to be PKT_BUF_SZ now uses skb since we lose some to head room */ 1934 /* used to be PKT_BUF_SZ now uses skb since we lose some to head room */
1935 sc->lmc_rxring[i].length = skb->end - skb->data; 1935 sc->lmc_rxring[i].length = skb_tailroom(skb);
1936 1936
1937 /* use to be tail which is dumb since you're thinking why write 1937 /* use to be tail which is dumb since you're thinking why write
1938 * to the end of the packj,et but since there's nothing there tail == data 1938 * to the end of the packj,et but since there's nothing there tail == data
diff --git a/drivers/net/wan/pc300_drv.c b/drivers/net/wan/pc300_drv.c
index 62184dee377c..999bf71937ca 100644
--- a/drivers/net/wan/pc300_drv.c
+++ b/drivers/net/wan/pc300_drv.c
@@ -1755,17 +1755,17 @@ cpc_trace(struct net_device *dev, struct sk_buff *skb_main, char rx_tx)
1755 1755
1756 skb->dev = dev; 1756 skb->dev = dev;
1757 skb->protocol = htons(ETH_P_CUST); 1757 skb->protocol = htons(ETH_P_CUST);
1758 skb->mac.raw = skb->data; 1758 skb_reset_mac_header(skb);
1759 skb->pkt_type = PACKET_HOST; 1759 skb->pkt_type = PACKET_HOST;
1760 skb->len = 10 + skb_main->len; 1760 skb->len = 10 + skb_main->len;
1761 1761
1762 memcpy(skb->data, dev->name, 5); 1762 skb_copy_to_linear_data(skb, dev->name, 5);
1763 skb->data[5] = '['; 1763 skb->data[5] = '[';
1764 skb->data[6] = rx_tx; 1764 skb->data[6] = rx_tx;
1765 skb->data[7] = ']'; 1765 skb->data[7] = ']';
1766 skb->data[8] = ':'; 1766 skb->data[8] = ':';
1767 skb->data[9] = ' '; 1767 skb->data[9] = ' ';
1768 memcpy(&skb->data[10], skb_main->data, skb_main->len); 1768 skb_copy_from_linear_data(skb_main, &skb->data[10], skb_main->len);
1769 1769
1770 netif_rx(skb); 1770 netif_rx(skb);
1771} 1771}
diff --git a/drivers/net/wan/pc300_tty.c b/drivers/net/wan/pc300_tty.c
index 5873c346e7e9..07dbdfbfc15d 100644
--- a/drivers/net/wan/pc300_tty.c
+++ b/drivers/net/wan/pc300_tty.c
@@ -1003,17 +1003,17 @@ static void cpc_tty_trace(pc300dev_t *dev, char* buf, int len, char rxtx)
1003 skb_put (skb, 10 + len); 1003 skb_put (skb, 10 + len);
1004 skb->dev = dev->dev; 1004 skb->dev = dev->dev;
1005 skb->protocol = htons(ETH_P_CUST); 1005 skb->protocol = htons(ETH_P_CUST);
1006 skb->mac.raw = skb->data; 1006 skb_reset_mac_header(skb);
1007 skb->pkt_type = PACKET_HOST; 1007 skb->pkt_type = PACKET_HOST;
1008 skb->len = 10 + len; 1008 skb->len = 10 + len;
1009 1009
1010 memcpy(skb->data,dev->dev->name,5); 1010 skb_copy_to_linear_data(skb, dev->dev->name, 5);
1011 skb->data[5] = '['; 1011 skb->data[5] = '[';
1012 skb->data[6] = rxtx; 1012 skb->data[6] = rxtx;
1013 skb->data[7] = ']'; 1013 skb->data[7] = ']';
1014 skb->data[8] = ':'; 1014 skb->data[8] = ':';
1015 skb->data[9] = ' '; 1015 skb->data[9] = ' ';
1016 memcpy(&skb->data[10], buf, len); 1016 skb_copy_to_linear_data_offset(skb, 10, buf, len);
1017 netif_rx(skb); 1017 netif_rx(skb);
1018} 1018}
1019 1019
diff --git a/drivers/net/wan/sbni.c b/drivers/net/wan/sbni.c
index fc5c0c611ffd..35eded7ffb2d 100644
--- a/drivers/net/wan/sbni.c
+++ b/drivers/net/wan/sbni.c
@@ -999,11 +999,6 @@ get_rx_buf( struct net_device *dev )
999 if( !skb ) 999 if( !skb )
1000 return NULL; 1000 return NULL;
1001 1001
1002#ifdef CONFIG_SBNI_MULTILINE
1003 skb->dev = ((struct net_local *) dev->priv)->master;
1004#else
1005 skb->dev = dev;
1006#endif
1007 skb_reserve( skb, 2 ); /* Align IP on longword boundaries */ 1002 skb_reserve( skb, 2 ); /* Align IP on longword boundaries */
1008 return skb; 1003 return skb;
1009} 1004}
diff --git a/drivers/net/wan/sealevel.c b/drivers/net/wan/sealevel.c
index 70fb1b98b1dd..131358108c5a 100644
--- a/drivers/net/wan/sealevel.c
+++ b/drivers/net/wan/sealevel.c
@@ -61,7 +61,7 @@ static void sealevel_input(struct z8530_channel *c, struct sk_buff *skb)
61 /* Drop the CRC - it's not a good idea to try and negotiate it ;) */ 61 /* Drop the CRC - it's not a good idea to try and negotiate it ;) */
62 skb_trim(skb, skb->len-2); 62 skb_trim(skb, skb->len-2);
63 skb->protocol=htons(ETH_P_WAN_PPP); 63 skb->protocol=htons(ETH_P_WAN_PPP);
64 skb->mac.raw=skb->data; 64 skb_reset_mac_header(skb);
65 skb->dev=c->netdevice; 65 skb->dev=c->netdevice;
66 /* 66 /*
67 * Send it to the PPP layer. We don't have time to process 67 * Send it to the PPP layer. We don't have time to process
diff --git a/drivers/net/wan/syncppp.c b/drivers/net/wan/syncppp.c
index 218f7b574ab3..67fc67cfd452 100644
--- a/drivers/net/wan/syncppp.c
+++ b/drivers/net/wan/syncppp.c
@@ -227,7 +227,7 @@ static void sppp_input (struct net_device *dev, struct sk_buff *skb)
227 unsigned long flags; 227 unsigned long flags;
228 228
229 skb->dev=dev; 229 skb->dev=dev;
230 skb->mac.raw=skb->data; 230 skb_reset_mac_header(skb);
231 231
232 if (dev->flags & IFF_RUNNING) 232 if (dev->flags & IFF_RUNNING)
233 { 233 {
diff --git a/drivers/net/wan/z85230.c b/drivers/net/wan/z85230.c
index 8b4540bfc1b0..98ef400908b8 100644
--- a/drivers/net/wan/z85230.c
+++ b/drivers/net/wan/z85230.c
@@ -1656,7 +1656,7 @@ static void z8530_rx_done(struct z8530_channel *c)
1656 else 1656 else
1657 { 1657 {
1658 skb_put(skb, ct); 1658 skb_put(skb, ct);
1659 memcpy(skb->data, rxb, ct); 1659 skb_copy_to_linear_data(skb, rxb, ct);
1660 c->stats.rx_packets++; 1660 c->stats.rx_packets++;
1661 c->stats.rx_bytes+=ct; 1661 c->stats.rx_bytes+=ct;
1662 } 1662 }
@@ -1782,7 +1782,7 @@ int z8530_queue_xmit(struct z8530_channel *c, struct sk_buff *skb)
1782 */ 1782 */
1783 c->tx_next_ptr=c->tx_dma_buf[c->tx_dma_used]; 1783 c->tx_next_ptr=c->tx_dma_buf[c->tx_dma_used];
1784 c->tx_dma_used^=1; /* Flip temp buffer */ 1784 c->tx_dma_used^=1; /* Flip temp buffer */
1785 memcpy(c->tx_next_ptr, skb->data, skb->len); 1785 skb_copy_from_linear_data(skb, c->tx_next_ptr, skb->len);
1786 } 1786 }
1787 else 1787 else
1788 c->tx_next_ptr=skb->data; 1788 c->tx_next_ptr=skb->data;
diff --git a/drivers/net/wireless/Kconfig b/drivers/net/wireless/Kconfig
index ece3d9c2dc61..4426841b2be6 100644
--- a/drivers/net/wireless/Kconfig
+++ b/drivers/net/wireless/Kconfig
@@ -2,47 +2,21 @@
2# Wireless LAN device configuration 2# Wireless LAN device configuration
3# 3#
4 4
5menu "Wireless LAN (non-hamradio)" 5menu "Wireless LAN"
6 depends on NETDEVICES
7
8config NET_RADIO
9 bool "Wireless LAN drivers (non-hamradio) & Wireless Extensions"
10 select WIRELESS_EXT
11 ---help---
12 Support for wireless LANs and everything having to do with radio,
13 but not with amateur radio or FM broadcasting.
14
15 Saying Y here also enables the Wireless Extensions (creates
16 /proc/net/wireless and enables iwconfig access). The Wireless
17 Extension is a generic API allowing a driver to expose to the user
18 space configuration and statistics specific to common Wireless LANs.
19 The beauty of it is that a single set of tool can support all the
20 variations of Wireless LANs, regardless of their type (as long as
21 the driver supports Wireless Extension). Another advantage is that
22 these parameters may be changed on the fly without restarting the
23 driver (or Linux). If you wish to use Wireless Extensions with
24 wireless PCMCIA (PC-) cards, you need to say Y here; you can fetch
25 the tools from
26 <http://www.hpl.hp.com/personal/Jean_Tourrilhes/Linux/Tools.html>.
27 6
28config NET_WIRELESS_RTNETLINK 7config WLAN_PRE80211
29 bool "Wireless Extension API over RtNetlink" 8 bool "Wireless LAN (pre-802.11)"
30 depends on NET_RADIO 9 depends on NETDEVICES
31 ---help--- 10 ---help---
32 Support the Wireless Extension API over the RtNetlink socket 11 Say Y if you have any pre-802.11 wireless LAN hardware.
33 in addition to the traditional ioctl interface (selected above).
34 12
35 For now, few tools use this facility, but it might grow in the 13 This option does not affect the kernel build, it only
36 future. The only downside is that it adds 4.5 kB to your kernel. 14 lets you choose drivers.
37
38# Note : the cards are obsolete (can't buy them anymore), but the drivers
39# are not, as people are still using them...
40comment "Obsolete Wireless cards support (pre-802.11)"
41 depends on NET_RADIO && (INET || ISA || PCMCIA)
42 15
43config STRIP 16config STRIP
44 tristate "STRIP (Metricom starmode radio IP)" 17 tristate "STRIP (Metricom starmode radio IP)"
45 depends on NET_RADIO && INET 18 depends on INET && WLAN_PRE80211
19 select WIRELESS_EXT
46 ---help--- 20 ---help---
47 Say Y if you have a Metricom radio and intend to use Starmode Radio 21 Say Y if you have a Metricom radio and intend to use Starmode Radio
48 IP. STRIP is a radio protocol developed for the MosquitoNet project 22 IP. STRIP is a radio protocol developed for the MosquitoNet project
@@ -65,7 +39,8 @@ config STRIP
65 39
66config ARLAN 40config ARLAN
67 tristate "Aironet Arlan 655 & IC2200 DS support" 41 tristate "Aironet Arlan 655 & IC2200 DS support"
68 depends on NET_RADIO && ISA && !64BIT 42 depends on ISA && !64BIT && WLAN_PRE80211
43 select WIRELESS_EXT
69 ---help--- 44 ---help---
70 Aironet makes Arlan, a class of wireless LAN adapters. These use the 45 Aironet makes Arlan, a class of wireless LAN adapters. These use the
71 www.Telxon.com chip, which is also used on several similar cards. 46 www.Telxon.com chip, which is also used on several similar cards.
@@ -80,7 +55,8 @@ config ARLAN
80 55
81config WAVELAN 56config WAVELAN
82 tristate "AT&T/Lucent old WaveLAN & DEC RoamAbout DS ISA support" 57 tristate "AT&T/Lucent old WaveLAN & DEC RoamAbout DS ISA support"
83 depends on NET_RADIO && ISA 58 depends on ISA && WLAN_PRE80211
59 select WIRELESS_EXT
84 ---help--- 60 ---help---
85 The Lucent WaveLAN (formerly NCR and AT&T; or DEC RoamAbout DS) is 61 The Lucent WaveLAN (formerly NCR and AT&T; or DEC RoamAbout DS) is
86 a Radio LAN (wireless Ethernet-like Local Area Network) using the 62 a Radio LAN (wireless Ethernet-like Local Area Network) using the
@@ -107,7 +83,8 @@ config WAVELAN
107 83
108config PCMCIA_WAVELAN 84config PCMCIA_WAVELAN
109 tristate "AT&T/Lucent old WaveLAN Pcmcia wireless support" 85 tristate "AT&T/Lucent old WaveLAN Pcmcia wireless support"
110 depends on NET_RADIO && PCMCIA 86 depends on PCMCIA && WLAN_PRE80211
87 select WIRELESS_EXT
111 help 88 help
112 Say Y here if you intend to attach an AT&T/Lucent Wavelan PCMCIA 89 Say Y here if you intend to attach an AT&T/Lucent Wavelan PCMCIA
113 (PC-card) wireless Ethernet networking card to your computer. This 90 (PC-card) wireless Ethernet networking card to your computer. This
@@ -118,7 +95,8 @@ config PCMCIA_WAVELAN
118 95
119config PCMCIA_NETWAVE 96config PCMCIA_NETWAVE
120 tristate "Xircom Netwave AirSurfer Pcmcia wireless support" 97 tristate "Xircom Netwave AirSurfer Pcmcia wireless support"
121 depends on NET_RADIO && PCMCIA 98 depends on PCMCIA && WLAN_PRE80211
99 select WIRELESS_EXT
122 help 100 help
123 Say Y here if you intend to attach this type of PCMCIA (PC-card) 101 Say Y here if you intend to attach this type of PCMCIA (PC-card)
124 wireless Ethernet networking card to your computer. 102 wireless Ethernet networking card to your computer.
@@ -126,12 +104,20 @@ config PCMCIA_NETWAVE
126 To compile this driver as a module, choose M here: the module will be 104 To compile this driver as a module, choose M here: the module will be
127 called netwave_cs. If unsure, say N. 105 called netwave_cs. If unsure, say N.
128 106
129comment "Wireless 802.11 Frequency Hopping cards support" 107
130 depends on NET_RADIO && PCMCIA 108config WLAN_80211
109 bool "Wireless LAN (IEEE 802.11)"
110 depends on NETDEVICES
111 ---help---
112 Say Y if you have any 802.11 wireless LAN hardware.
113
114 This option does not affect the kernel build, it only
115 lets you choose drivers.
131 116
132config PCMCIA_RAYCS 117config PCMCIA_RAYCS
133 tristate "Aviator/Raytheon 2.4MHz wireless support" 118 tristate "Aviator/Raytheon 2.4MHz wireless support"
134 depends on NET_RADIO && PCMCIA 119 depends on PCMCIA && WLAN_80211
120 select WIRELESS_EXT
135 ---help--- 121 ---help---
136 Say Y here if you intend to attach an Aviator/Raytheon PCMCIA 122 Say Y here if you intend to attach an Aviator/Raytheon PCMCIA
137 (PC-card) wireless Ethernet networking card to your computer. 123 (PC-card) wireless Ethernet networking card to your computer.
@@ -141,12 +127,10 @@ config PCMCIA_RAYCS
141 To compile this driver as a module, choose M here: the module will be 127 To compile this driver as a module, choose M here: the module will be
142 called ray_cs. If unsure, say N. 128 called ray_cs. If unsure, say N.
143 129
144comment "Wireless 802.11b ISA/PCI cards support"
145 depends on NET_RADIO && (ISA || PCI || PPC_PMAC || PCMCIA)
146
147config IPW2100 130config IPW2100
148 tristate "Intel PRO/Wireless 2100 Network Connection" 131 tristate "Intel PRO/Wireless 2100 Network Connection"
149 depends on NET_RADIO && PCI 132 depends on PCI && WLAN_80211
133 select WIRELESS_EXT
150 select FW_LOADER 134 select FW_LOADER
151 select IEEE80211 135 select IEEE80211
152 ---help--- 136 ---help---
@@ -200,7 +184,8 @@ config IPW2100_DEBUG
200 184
201config IPW2200 185config IPW2200
202 tristate "Intel PRO/Wireless 2200BG and 2915ABG Network Connection" 186 tristate "Intel PRO/Wireless 2200BG and 2915ABG Network Connection"
203 depends on NET_RADIO && PCI 187 depends on PCI && WLAN_80211
188 select WIRELESS_EXT
204 select FW_LOADER 189 select FW_LOADER
205 select IEEE80211 190 select IEEE80211
206 ---help--- 191 ---help---
@@ -282,7 +267,8 @@ config IPW2200_DEBUG
282 267
283config AIRO 268config AIRO
284 tristate "Cisco/Aironet 34X/35X/4500/4800 ISA and PCI cards" 269 tristate "Cisco/Aironet 34X/35X/4500/4800 ISA and PCI cards"
285 depends on NET_RADIO && ISA_DMA_API && (PCI || BROKEN) 270 depends on ISA_DMA_API && WLAN_80211 && (PCI || BROKEN)
271 select WIRELESS_EXT
286 select CRYPTO 272 select CRYPTO
287 ---help--- 273 ---help---
288 This is the standard Linux driver to support Cisco/Aironet ISA and 274 This is the standard Linux driver to support Cisco/Aironet ISA and
@@ -299,7 +285,8 @@ config AIRO
299 285
300config HERMES 286config HERMES
301 tristate "Hermes chipset 802.11b support (Orinoco/Prism2/Symbol)" 287 tristate "Hermes chipset 802.11b support (Orinoco/Prism2/Symbol)"
302 depends on NET_RADIO && (PPC_PMAC || PCI || PCMCIA) 288 depends on (PPC_PMAC || PCI || PCMCIA) && WLAN_80211
289 select WIRELESS_EXT
303 ---help--- 290 ---help---
304 A driver for 802.11b wireless cards based on the "Hermes" or 291 A driver for 802.11b wireless cards based on the "Hermes" or
305 Intersil HFA384x (Prism 2) MAC controller. This includes the vast 292 Intersil HFA384x (Prism 2) MAC controller. This includes the vast
@@ -373,7 +360,8 @@ config PCI_HERMES
373 360
374config ATMEL 361config ATMEL
375 tristate "Atmel at76c50x chipset 802.11b support" 362 tristate "Atmel at76c50x chipset 802.11b support"
376 depends on NET_RADIO && (PCI || PCMCIA) 363 depends on (PCI || PCMCIA) && WLAN_80211
364 select WIRELESS_EXT
377 select FW_LOADER 365 select FW_LOADER
378 select CRC32 366 select CRC32
379 ---help--- 367 ---help---
@@ -394,13 +382,9 @@ config PCI_ATMEL
394 Enable support for PCI and mini-PCI cards containing the 382 Enable support for PCI and mini-PCI cards containing the
395 Atmel at76c506 chip. 383 Atmel at76c506 chip.
396 384
397# If Pcmcia is compiled in, offer Pcmcia cards...
398comment "Wireless 802.11b Pcmcia/Cardbus cards support"
399 depends on NET_RADIO && PCMCIA
400
401config PCMCIA_HERMES 385config PCMCIA_HERMES
402 tristate "Hermes PCMCIA card support" 386 tristate "Hermes PCMCIA card support"
403 depends on NET_RADIO && PCMCIA && HERMES 387 depends on PCMCIA && HERMES
404 ---help--- 388 ---help---
405 A driver for "Hermes" chipset based PCMCIA wireless adaptors, such 389 A driver for "Hermes" chipset based PCMCIA wireless adaptors, such
406 as the Lucent WavelanIEEE/Orinoco cards and their OEM (Cabletron/ 390 as the Lucent WavelanIEEE/Orinoco cards and their OEM (Cabletron/
@@ -420,7 +404,7 @@ config PCMCIA_HERMES
420 404
421config PCMCIA_SPECTRUM 405config PCMCIA_SPECTRUM
422 tristate "Symbol Spectrum24 Trilogy PCMCIA card support" 406 tristate "Symbol Spectrum24 Trilogy PCMCIA card support"
423 depends on NET_RADIO && PCMCIA && HERMES 407 depends on PCMCIA && HERMES
424 select FW_LOADER 408 select FW_LOADER
425 ---help--- 409 ---help---
426 410
@@ -434,7 +418,8 @@ config PCMCIA_SPECTRUM
434 418
435config AIRO_CS 419config AIRO_CS
436 tristate "Cisco/Aironet 34X/35X/4500/4800 PCMCIA cards" 420 tristate "Cisco/Aironet 34X/35X/4500/4800 PCMCIA cards"
437 depends on NET_RADIO && PCMCIA && (BROKEN || !M32R) 421 depends on PCMCIA && (BROKEN || !M32R) && WLAN_80211
422 select WIRELESS_EXT
438 select CRYPTO 423 select CRYPTO
439 select CRYPTO_AES 424 select CRYPTO_AES
440 ---help--- 425 ---help---
@@ -458,7 +443,8 @@ config AIRO_CS
458 443
459config PCMCIA_ATMEL 444config PCMCIA_ATMEL
460 tristate "Atmel at76c502/at76c504 PCMCIA cards" 445 tristate "Atmel at76c502/at76c504 PCMCIA cards"
461 depends on NET_RADIO && ATMEL && PCMCIA 446 depends on ATMEL && PCMCIA
447 select WIRELESS_EXT
462 select FW_LOADER 448 select FW_LOADER
463 select CRC32 449 select CRC32
464 ---help--- 450 ---help---
@@ -467,17 +453,17 @@ config PCMCIA_ATMEL
467 453
468config PCMCIA_WL3501 454config PCMCIA_WL3501
469 tristate "Planet WL3501 PCMCIA cards" 455 tristate "Planet WL3501 PCMCIA cards"
470 depends on NET_RADIO && EXPERIMENTAL && PCMCIA 456 depends on EXPERIMENTAL && PCMCIA && WLAN_80211
457 select WIRELESS_EXT
471 ---help--- 458 ---help---
472 A driver for WL3501 PCMCIA 802.11 wireless cards made by Planet. 459 A driver for WL3501 PCMCIA 802.11 wireless cards made by Planet.
473 It has basic support for Linux wireless extensions and initial 460 It has basic support for Linux wireless extensions and initial
474 micro support for ethtool. 461 micro support for ethtool.
475 462
476comment "Prism GT/Duette 802.11(a/b/g) PCI/Cardbus support"
477 depends on NET_RADIO && PCI
478config PRISM54 463config PRISM54
479 tristate 'Intersil Prism GT/Duette/Indigo PCI/Cardbus' 464 tristate 'Intersil Prism GT/Duette/Indigo PCI/Cardbus'
480 depends on PCI && NET_RADIO && EXPERIMENTAL 465 depends on PCI && EXPERIMENTAL && WLAN_80211
466 select WIRELESS_EXT
481 select FW_LOADER 467 select FW_LOADER
482 ---help--- 468 ---help---
483 Enable PCI and Cardbus support for the following chipset based cards: 469 Enable PCI and Cardbus support for the following chipset based cards:
@@ -523,7 +509,8 @@ config PRISM54
523 509
524config USB_ZD1201 510config USB_ZD1201
525 tristate "USB ZD1201 based Wireless device support" 511 tristate "USB ZD1201 based Wireless device support"
526 depends on USB && NET_RADIO 512 depends on USB && WLAN_80211
513 select WIRELESS_EXT
527 select FW_LOADER 514 select FW_LOADER
528 ---help--- 515 ---help---
529 Say Y if you want to use wireless LAN adapters based on the ZyDAS 516 Say Y if you want to use wireless LAN adapters based on the ZyDAS
@@ -542,11 +529,4 @@ source "drivers/net/wireless/hostap/Kconfig"
542source "drivers/net/wireless/bcm43xx/Kconfig" 529source "drivers/net/wireless/bcm43xx/Kconfig"
543source "drivers/net/wireless/zd1211rw/Kconfig" 530source "drivers/net/wireless/zd1211rw/Kconfig"
544 531
545# yes, this works even when no drivers are selected
546config NET_WIRELESS
547 bool
548 depends on NET_RADIO && (ISA || PCI || PPC_PMAC || PCMCIA)
549 default y
550
551endmenu 532endmenu
552
diff --git a/drivers/net/wireless/airo.c b/drivers/net/wireless/airo.c
index 2ada76a93cb6..7fe0a61091a6 100644
--- a/drivers/net/wireless/airo.c
+++ b/drivers/net/wireless/airo.c
@@ -2444,7 +2444,7 @@ static int add_airo_dev( struct net_device *dev );
2444 2444
2445static int wll_header_parse(struct sk_buff *skb, unsigned char *haddr) 2445static int wll_header_parse(struct sk_buff *skb, unsigned char *haddr)
2446{ 2446{
2447 memcpy(haddr, skb->mac.raw + 10, ETH_ALEN); 2447 memcpy(haddr, skb_mac_header(skb) + 10, ETH_ALEN);
2448 return ETH_ALEN; 2448 return ETH_ALEN;
2449} 2449}
2450 2450
@@ -3411,14 +3411,12 @@ badrx:
3411 OUT4500( apriv, EVACK, EV_RX); 3411 OUT4500( apriv, EVACK, EV_RX);
3412 3412
3413 if (test_bit(FLAG_802_11, &apriv->flags)) { 3413 if (test_bit(FLAG_802_11, &apriv->flags)) {
3414 skb->mac.raw = skb->data; 3414 skb_reset_mac_header(skb);
3415 skb->pkt_type = PACKET_OTHERHOST; 3415 skb->pkt_type = PACKET_OTHERHOST;
3416 skb->dev = apriv->wifidev; 3416 skb->dev = apriv->wifidev;
3417 skb->protocol = htons(ETH_P_802_2); 3417 skb->protocol = htons(ETH_P_802_2);
3418 } else { 3418 } else
3419 skb->dev = dev;
3420 skb->protocol = eth_type_trans(skb,dev); 3419 skb->protocol = eth_type_trans(skb,dev);
3421 }
3422 skb->dev->last_rx = jiffies; 3420 skb->dev->last_rx = jiffies;
3423 skb->ip_summed = CHECKSUM_NONE; 3421 skb->ip_summed = CHECKSUM_NONE;
3424 3422
@@ -3641,7 +3639,6 @@ badmic:
3641 } 3639 }
3642#endif /* WIRELESS_SPY */ 3640#endif /* WIRELESS_SPY */
3643 3641
3644 skb->dev = ai->dev;
3645 skb->ip_summed = CHECKSUM_NONE; 3642 skb->ip_summed = CHECKSUM_NONE;
3646 skb->protocol = eth_type_trans(skb, ai->dev); 3643 skb->protocol = eth_type_trans(skb, ai->dev);
3647 skb->dev->last_rx = jiffies; 3644 skb->dev->last_rx = jiffies;
@@ -3749,7 +3746,7 @@ void mpi_receive_802_11 (struct airo_info *ai)
3749 wireless_spy_update(ai->dev, sa, &wstats); 3746 wireless_spy_update(ai->dev, sa, &wstats);
3750 } 3747 }
3751#endif /* IW_WIRELESS_SPY */ 3748#endif /* IW_WIRELESS_SPY */
3752 skb->mac.raw = skb->data; 3749 skb_reset_mac_header(skb);
3753 skb->pkt_type = PACKET_OTHERHOST; 3750 skb->pkt_type = PACKET_OTHERHOST;
3754 skb->dev = ai->wifidev; 3751 skb->dev = ai->wifidev;
3755 skb->protocol = htons(ETH_P_802_2); 3752 skb->protocol = htons(ETH_P_802_2);
diff --git a/drivers/net/wireless/arlan-main.c b/drivers/net/wireless/arlan-main.c
index 4688e56b69c7..498e8486d125 100644
--- a/drivers/net/wireless/arlan-main.c
+++ b/drivers/net/wireless/arlan-main.c
@@ -1500,7 +1500,6 @@ static void arlan_rx_interrupt(struct net_device *dev, u_char rxStatus, u_short
1500 break; 1500 break;
1501 } 1501 }
1502 skb_reserve(skb, 2); 1502 skb_reserve(skb, 2);
1503 skb->dev = dev;
1504 skbtmp = skb_put(skb, pkt_len); 1503 skbtmp = skb_put(skb, pkt_len);
1505 1504
1506 memcpy_fromio(skbtmp + ARLAN_FAKE_HDR_LEN, ((char __iomem *) arlan) + rxOffset, pkt_len - ARLAN_FAKE_HDR_LEN); 1505 memcpy_fromio(skbtmp + ARLAN_FAKE_HDR_LEN, ((char __iomem *) arlan) + rxOffset, pkt_len - ARLAN_FAKE_HDR_LEN);
diff --git a/drivers/net/wireless/atmel.c b/drivers/net/wireless/atmel.c
index 23eba698aec5..51a7db53afa5 100644
--- a/drivers/net/wireless/atmel.c
+++ b/drivers/net/wireless/atmel.c
@@ -827,14 +827,14 @@ static int start_tx(struct sk_buff *skb, struct net_device *dev)
827 if (priv->wep_is_on) 827 if (priv->wep_is_on)
828 frame_ctl |= IEEE80211_FCTL_PROTECTED; 828 frame_ctl |= IEEE80211_FCTL_PROTECTED;
829 if (priv->operating_mode == IW_MODE_ADHOC) { 829 if (priv->operating_mode == IW_MODE_ADHOC) {
830 memcpy(&header.addr1, skb->data, 6); 830 skb_copy_from_linear_data(skb, &header.addr1, 6);
831 memcpy(&header.addr2, dev->dev_addr, 6); 831 memcpy(&header.addr2, dev->dev_addr, 6);
832 memcpy(&header.addr3, priv->BSSID, 6); 832 memcpy(&header.addr3, priv->BSSID, 6);
833 } else { 833 } else {
834 frame_ctl |= IEEE80211_FCTL_TODS; 834 frame_ctl |= IEEE80211_FCTL_TODS;
835 memcpy(&header.addr1, priv->CurrentBSSID, 6); 835 memcpy(&header.addr1, priv->CurrentBSSID, 6);
836 memcpy(&header.addr2, dev->dev_addr, 6); 836 memcpy(&header.addr2, dev->dev_addr, 6);
837 memcpy(&header.addr3, skb->data, 6); 837 skb_copy_from_linear_data(skb, &header.addr3, 6);
838 } 838 }
839 839
840 if (priv->use_wpa) 840 if (priv->use_wpa)
@@ -920,7 +920,6 @@ static void fast_rx_path(struct atmel_private *priv,
920 memcpy(&skbp[6], header->addr2, 6); /* source address */ 920 memcpy(&skbp[6], header->addr2, 6); /* source address */
921 921
922 priv->dev->last_rx = jiffies; 922 priv->dev->last_rx = jiffies;
923 skb->dev = priv->dev;
924 skb->protocol = eth_type_trans(skb, priv->dev); 923 skb->protocol = eth_type_trans(skb, priv->dev);
925 skb->ip_summed = CHECKSUM_NONE; 924 skb->ip_summed = CHECKSUM_NONE;
926 netif_rx(skb); 925 netif_rx(skb);
@@ -1028,7 +1027,6 @@ static void frag_rx_path(struct atmel_private *priv,
1028 priv->rx_buf, 1027 priv->rx_buf,
1029 priv->frag_len + 12); 1028 priv->frag_len + 12);
1030 priv->dev->last_rx = jiffies; 1029 priv->dev->last_rx = jiffies;
1031 skb->dev = priv->dev;
1032 skb->protocol = eth_type_trans(skb, priv->dev); 1030 skb->protocol = eth_type_trans(skb, priv->dev);
1033 skb->ip_summed = CHECKSUM_NONE; 1031 skb->ip_summed = CHECKSUM_NONE;
1034 netif_rx(skb); 1032 netif_rx(skb);
diff --git a/drivers/net/wireless/bcm43xx/Kconfig b/drivers/net/wireless/bcm43xx/Kconfig
index 533993f538fc..ce397e4284f4 100644
--- a/drivers/net/wireless/bcm43xx/Kconfig
+++ b/drivers/net/wireless/bcm43xx/Kconfig
@@ -1,6 +1,7 @@
1config BCM43XX 1config BCM43XX
2 tristate "Broadcom BCM43xx wireless support" 2 tristate "Broadcom BCM43xx wireless support"
3 depends on PCI && IEEE80211 && IEEE80211_SOFTMAC && NET_RADIO && EXPERIMENTAL 3 depends on PCI && IEEE80211 && IEEE80211_SOFTMAC && WLAN_80211 && EXPERIMENTAL
4 select WIRELESS_EXT
4 select FW_LOADER 5 select FW_LOADER
5 select HW_RANDOM 6 select HW_RANDOM
6 ---help--- 7 ---help---
diff --git a/drivers/net/wireless/bcm43xx/bcm43xx_dma.c b/drivers/net/wireless/bcm43xx/bcm43xx_dma.c
index 6e0dc76400e5..e3d2e61a31ee 100644
--- a/drivers/net/wireless/bcm43xx/bcm43xx_dma.c
+++ b/drivers/net/wireless/bcm43xx/bcm43xx_dma.c
@@ -998,7 +998,8 @@ static void dma_tx_fragment(struct bcm43xx_dmaring *ring,
998 assert(0); 998 assert(0);
999 return; 999 return;
1000 } 1000 }
1001 memcpy(skb_put(bounce_skb, skb->len), skb->data, skb->len); 1001 skb_copy_from_linear_data(skb, skb_put(bounce_skb, skb->len),
1002 skb->len);
1002 dev_kfree_skb_any(skb); 1003 dev_kfree_skb_any(skb);
1003 skb = bounce_skb; 1004 skb = bounce_skb;
1004 } 1005 }
diff --git a/drivers/net/wireless/hostap/Kconfig b/drivers/net/wireless/hostap/Kconfig
index 308f773ad566..1fef33169fdd 100644
--- a/drivers/net/wireless/hostap/Kconfig
+++ b/drivers/net/wireless/hostap/Kconfig
@@ -1,6 +1,7 @@
1config HOSTAP 1config HOSTAP
2 tristate "IEEE 802.11 for Host AP (Prism2/2.5/3 and WEP/TKIP/CCMP)" 2 tristate "IEEE 802.11 for Host AP (Prism2/2.5/3 and WEP/TKIP/CCMP)"
3 depends on NET_RADIO 3 depends on WLAN_80211
4 select WIRELESS_EXT
4 select IEEE80211 5 select IEEE80211
5 select IEEE80211_CRYPT_WEP 6 select IEEE80211_CRYPT_WEP
6 ---help--- 7 ---help---
diff --git a/drivers/net/wireless/hostap/hostap_80211_rx.c b/drivers/net/wireless/hostap/hostap_80211_rx.c
index 7e04dc94b3bc..cbedc9ee740a 100644
--- a/drivers/net/wireless/hostap/hostap_80211_rx.c
+++ b/drivers/net/wireless/hostap/hostap_80211_rx.c
@@ -167,7 +167,7 @@ hdr->f.status = s; hdr->f.len = l; hdr->f.data = d
167 167
168 ret = skb->len - phdrlen; 168 ret = skb->len - phdrlen;
169 skb->dev = dev; 169 skb->dev = dev;
170 skb->mac.raw = skb->data; 170 skb_reset_mac_header(skb);
171 skb_pull(skb, hdrlen); 171 skb_pull(skb, hdrlen);
172 if (prism_header) 172 if (prism_header)
173 skb_pull(skb, phdrlen); 173 skb_pull(skb, phdrlen);
@@ -933,12 +933,14 @@ void hostap_80211_rx(struct net_device *dev, struct sk_buff *skb,
933 if (frag == 0) { 933 if (frag == 0) {
934 /* copy first fragment (including full headers) into 934 /* copy first fragment (including full headers) into
935 * beginning of the fragment cache skb */ 935 * beginning of the fragment cache skb */
936 memcpy(skb_put(frag_skb, flen), skb->data, flen); 936 skb_copy_from_linear_data(skb, skb_put(frag_skb, flen),
937 flen);
937 } else { 938 } else {
938 /* append frame payload to the end of the fragment 939 /* append frame payload to the end of the fragment
939 * cache skb */ 940 * cache skb */
940 memcpy(skb_put(frag_skb, flen), skb->data + hdrlen, 941 skb_copy_from_linear_data_offset(skb, hdrlen,
941 flen); 942 skb_put(frag_skb,
943 flen), flen);
942 } 944 }
943 dev_kfree_skb(skb); 945 dev_kfree_skb(skb);
944 skb = NULL; 946 skb = NULL;
@@ -1044,8 +1046,9 @@ void hostap_80211_rx(struct net_device *dev, struct sk_buff *skb,
1044 skb->len >= ETH_HLEN + ETH_ALEN) { 1046 skb->len >= ETH_HLEN + ETH_ALEN) {
1045 /* Non-standard frame: get addr4 from its bogus location after 1047 /* Non-standard frame: get addr4 from its bogus location after
1046 * the payload */ 1048 * the payload */
1047 memcpy(skb->data + ETH_ALEN, 1049 skb_copy_from_linear_data_offset(skb, skb->len - ETH_ALEN,
1048 skb->data + skb->len - ETH_ALEN, ETH_ALEN); 1050 skb->data + ETH_ALEN,
1051 ETH_ALEN);
1049 skb_trim(skb, skb->len - ETH_ALEN); 1052 skb_trim(skb, skb->len - ETH_ALEN);
1050 } 1053 }
1051 1054
@@ -1073,17 +1076,17 @@ void hostap_80211_rx(struct net_device *dev, struct sk_buff *skb,
1073 1076
1074 if (skb2 != NULL) { 1077 if (skb2 != NULL) {
1075 /* send to wireless media */ 1078 /* send to wireless media */
1076 skb2->protocol = __constant_htons(ETH_P_802_3);
1077 skb2->mac.raw = skb2->nh.raw = skb2->data;
1078 /* skb2->nh.raw = skb2->data + ETH_HLEN; */
1079 skb2->dev = dev; 1079 skb2->dev = dev;
1080 skb2->protocol = __constant_htons(ETH_P_802_3);
1081 skb_reset_mac_header(skb2);
1082 skb_reset_network_header(skb2);
1083 /* skb2->network_header += ETH_HLEN; */
1080 dev_queue_xmit(skb2); 1084 dev_queue_xmit(skb2);
1081 } 1085 }
1082 1086
1083 if (skb) { 1087 if (skb) {
1084 skb->protocol = eth_type_trans(skb, dev); 1088 skb->protocol = eth_type_trans(skb, dev);
1085 memset(skb->cb, 0, sizeof(skb->cb)); 1089 memset(skb->cb, 0, sizeof(skb->cb));
1086 skb->dev = dev;
1087 netif_rx(skb); 1090 netif_rx(skb);
1088 } 1091 }
1089 1092
diff --git a/drivers/net/wireless/hostap/hostap_80211_tx.c b/drivers/net/wireless/hostap/hostap_80211_tx.c
index 4a5be70c0419..246fac0e8001 100644
--- a/drivers/net/wireless/hostap/hostap_80211_tx.c
+++ b/drivers/net/wireless/hostap/hostap_80211_tx.c
@@ -146,7 +146,8 @@ int hostap_data_start_xmit(struct sk_buff *skb, struct net_device *dev)
146 fc |= IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS; 146 fc |= IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS;
147 /* From&To DS: Addr1 = RA, Addr2 = TA, Addr3 = DA, 147 /* From&To DS: Addr1 = RA, Addr2 = TA, Addr3 = DA,
148 * Addr4 = SA */ 148 * Addr4 = SA */
149 memcpy(&hdr.addr4, skb->data + ETH_ALEN, ETH_ALEN); 149 skb_copy_from_linear_data_offset(skb, ETH_ALEN,
150 &hdr.addr4, ETH_ALEN);
150 hdr_len += ETH_ALEN; 151 hdr_len += ETH_ALEN;
151 } else { 152 } else {
152 /* bogus 4-addr format to workaround Prism2 station 153 /* bogus 4-addr format to workaround Prism2 station
@@ -159,7 +160,8 @@ int hostap_data_start_xmit(struct sk_buff *skb, struct net_device *dev)
159 /* SA from skb->data + ETH_ALEN will be added after 160 /* SA from skb->data + ETH_ALEN will be added after
160 * frame payload; use hdr.addr4 as a temporary buffer 161 * frame payload; use hdr.addr4 as a temporary buffer
161 */ 162 */
162 memcpy(&hdr.addr4, skb->data + ETH_ALEN, ETH_ALEN); 163 skb_copy_from_linear_data_offset(skb, ETH_ALEN,
164 &hdr.addr4, ETH_ALEN);
163 need_tailroom += ETH_ALEN; 165 need_tailroom += ETH_ALEN;
164 } 166 }
165 167
@@ -174,24 +176,27 @@ int hostap_data_start_xmit(struct sk_buff *skb, struct net_device *dev)
174 else 176 else
175 memcpy(&hdr.addr1, local->bssid, ETH_ALEN); 177 memcpy(&hdr.addr1, local->bssid, ETH_ALEN);
176 memcpy(&hdr.addr2, dev->dev_addr, ETH_ALEN); 178 memcpy(&hdr.addr2, dev->dev_addr, ETH_ALEN);
177 memcpy(&hdr.addr3, skb->data, ETH_ALEN); 179 skb_copy_from_linear_data(skb, &hdr.addr3, ETH_ALEN);
178 } else if (local->iw_mode == IW_MODE_MASTER && !to_assoc_ap) { 180 } else if (local->iw_mode == IW_MODE_MASTER && !to_assoc_ap) {
179 fc |= IEEE80211_FCTL_FROMDS; 181 fc |= IEEE80211_FCTL_FROMDS;
180 /* From DS: Addr1 = DA, Addr2 = BSSID, Addr3 = SA */ 182 /* From DS: Addr1 = DA, Addr2 = BSSID, Addr3 = SA */
181 memcpy(&hdr.addr1, skb->data, ETH_ALEN); 183 skb_copy_from_linear_data(skb, &hdr.addr1, ETH_ALEN);
182 memcpy(&hdr.addr2, dev->dev_addr, ETH_ALEN); 184 memcpy(&hdr.addr2, dev->dev_addr, ETH_ALEN);
183 memcpy(&hdr.addr3, skb->data + ETH_ALEN, ETH_ALEN); 185 skb_copy_from_linear_data_offset(skb, ETH_ALEN, &hdr.addr3,
186 ETH_ALEN);
184 } else if (local->iw_mode == IW_MODE_INFRA || to_assoc_ap) { 187 } else if (local->iw_mode == IW_MODE_INFRA || to_assoc_ap) {
185 fc |= IEEE80211_FCTL_TODS; 188 fc |= IEEE80211_FCTL_TODS;
186 /* To DS: Addr1 = BSSID, Addr2 = SA, Addr3 = DA */ 189 /* To DS: Addr1 = BSSID, Addr2 = SA, Addr3 = DA */
187 memcpy(&hdr.addr1, to_assoc_ap ? 190 memcpy(&hdr.addr1, to_assoc_ap ?
188 local->assoc_ap_addr : local->bssid, ETH_ALEN); 191 local->assoc_ap_addr : local->bssid, ETH_ALEN);
189 memcpy(&hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN); 192 skb_copy_from_linear_data_offset(skb, ETH_ALEN, &hdr.addr2,
190 memcpy(&hdr.addr3, skb->data, ETH_ALEN); 193 ETH_ALEN);
194 skb_copy_from_linear_data(skb, &hdr.addr3, ETH_ALEN);
191 } else if (local->iw_mode == IW_MODE_ADHOC) { 195 } else if (local->iw_mode == IW_MODE_ADHOC) {
192 /* not From/To DS: Addr1 = DA, Addr2 = SA, Addr3 = BSSID */ 196 /* not From/To DS: Addr1 = DA, Addr2 = SA, Addr3 = BSSID */
193 memcpy(&hdr.addr1, skb->data, ETH_ALEN); 197 skb_copy_from_linear_data(skb, &hdr.addr1, ETH_ALEN);
194 memcpy(&hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN); 198 skb_copy_from_linear_data_offset(skb, ETH_ALEN, &hdr.addr2,
199 ETH_ALEN);
195 memcpy(&hdr.addr3, local->bssid, ETH_ALEN); 200 memcpy(&hdr.addr3, local->bssid, ETH_ALEN);
196 } 201 }
197 202
@@ -237,7 +242,7 @@ int hostap_data_start_xmit(struct sk_buff *skb, struct net_device *dev)
237 iface->stats.tx_packets++; 242 iface->stats.tx_packets++;
238 iface->stats.tx_bytes += skb->len; 243 iface->stats.tx_bytes += skb->len;
239 244
240 skb->mac.raw = skb->data; 245 skb_reset_mac_header(skb);
241 meta = (struct hostap_skb_tx_data *) skb->cb; 246 meta = (struct hostap_skb_tx_data *) skb->cb;
242 memset(meta, 0, sizeof(*meta)); 247 memset(meta, 0, sizeof(*meta));
243 meta->magic = HOSTAP_SKB_TX_DATA_MAGIC; 248 meta->magic = HOSTAP_SKB_TX_DATA_MAGIC;
diff --git a/drivers/net/wireless/hostap/hostap_ap.c b/drivers/net/wireless/hostap/hostap_ap.c
index efb8cf3bd8ad..4ca8a27b8c55 100644
--- a/drivers/net/wireless/hostap/hostap_ap.c
+++ b/drivers/net/wireless/hostap/hostap_ap.c
@@ -982,7 +982,8 @@ static void prism2_send_mgmt(struct net_device *dev,
982 meta->tx_cb_idx = tx_cb_idx; 982 meta->tx_cb_idx = tx_cb_idx;
983 983
984 skb->dev = dev; 984 skb->dev = dev;
985 skb->mac.raw = skb->nh.raw = skb->data; 985 skb_reset_mac_header(skb);
986 skb_reset_network_header(skb);
986 dev_queue_xmit(skb); 987 dev_queue_xmit(skb);
987} 988}
988#endif /* PRISM2_NO_KERNEL_IEEE80211_MGMT */ 989#endif /* PRISM2_NO_KERNEL_IEEE80211_MGMT */
@@ -1276,8 +1277,8 @@ static char * ap_auth_make_challenge(struct ap_data *ap)
1276 return NULL; 1277 return NULL;
1277 } 1278 }
1278 1279
1279 memcpy(tmpbuf, skb->data + ap->crypt->extra_mpdu_prefix_len, 1280 skb_copy_from_linear_data_offset(skb, ap->crypt->extra_mpdu_prefix_len,
1280 WLAN_AUTH_CHALLENGE_LEN); 1281 tmpbuf, WLAN_AUTH_CHALLENGE_LEN);
1281 dev_kfree_skb(skb); 1282 dev_kfree_skb(skb);
1282 1283
1283 return tmpbuf; 1284 return tmpbuf;
diff --git a/drivers/net/wireless/hostap/hostap_hw.c b/drivers/net/wireless/hostap/hostap_hw.c
index 3079378fb8cd..fb01fb95a9f0 100644
--- a/drivers/net/wireless/hostap/hostap_hw.c
+++ b/drivers/net/wireless/hostap/hostap_hw.c
@@ -1838,13 +1838,14 @@ static int prism2_tx_80211(struct sk_buff *skb, struct net_device *dev)
1838 1838
1839 /* skb->data starts with txdesc->frame_control */ 1839 /* skb->data starts with txdesc->frame_control */
1840 hdr_len = 24; 1840 hdr_len = 24;
1841 memcpy(&txdesc.frame_control, skb->data, hdr_len); 1841 skb_copy_from_linear_data(skb, &txdesc.frame_control, hdr_len);
1842 fc = le16_to_cpu(txdesc.frame_control); 1842 fc = le16_to_cpu(txdesc.frame_control);
1843 if (WLAN_FC_GET_TYPE(fc) == IEEE80211_FTYPE_DATA && 1843 if (WLAN_FC_GET_TYPE(fc) == IEEE80211_FTYPE_DATA &&
1844 (fc & IEEE80211_FCTL_FROMDS) && (fc & IEEE80211_FCTL_TODS) && 1844 (fc & IEEE80211_FCTL_FROMDS) && (fc & IEEE80211_FCTL_TODS) &&
1845 skb->len >= 30) { 1845 skb->len >= 30) {
1846 /* Addr4 */ 1846 /* Addr4 */
1847 memcpy(txdesc.addr4, skb->data + hdr_len, ETH_ALEN); 1847 skb_copy_from_linear_data_offset(skb, hdr_len, txdesc.addr4,
1848 ETH_ALEN);
1848 hdr_len += ETH_ALEN; 1849 hdr_len += ETH_ALEN;
1849 } 1850 }
1850 1851
@@ -2217,7 +2218,7 @@ static void hostap_tx_callback(local_info_t *local,
2217 memcpy(skb_put(skb, len), payload, len); 2218 memcpy(skb_put(skb, len), payload, len);
2218 2219
2219 skb->dev = local->dev; 2220 skb->dev = local->dev;
2220 skb->mac.raw = skb->data; 2221 skb_reset_mac_header(skb);
2221 2222
2222 cb->func(skb, ok, cb->data); 2223 cb->func(skb, ok, cb->data);
2223} 2224}
diff --git a/drivers/net/wireless/hostap/hostap_main.c b/drivers/net/wireless/hostap/hostap_main.c
index 9077e6edde34..1f9edd91565d 100644
--- a/drivers/net/wireless/hostap/hostap_main.c
+++ b/drivers/net/wireless/hostap/hostap_main.c
@@ -590,20 +590,20 @@ void hostap_dump_tx_header(const char *name, const struct hfa384x_tx_frame *tx)
590 590
591int hostap_80211_header_parse(struct sk_buff *skb, unsigned char *haddr) 591int hostap_80211_header_parse(struct sk_buff *skb, unsigned char *haddr)
592{ 592{
593 memcpy(haddr, skb->mac.raw + 10, ETH_ALEN); /* addr2 */ 593 memcpy(haddr, skb_mac_header(skb) + 10, ETH_ALEN); /* addr2 */
594 return ETH_ALEN; 594 return ETH_ALEN;
595} 595}
596 596
597 597
598int hostap_80211_prism_header_parse(struct sk_buff *skb, unsigned char *haddr) 598int hostap_80211_prism_header_parse(struct sk_buff *skb, unsigned char *haddr)
599{ 599{
600 if (*(u32 *)skb->mac.raw == LWNG_CAP_DID_BASE) { 600 const unsigned char *mac = skb_mac_header(skb);
601 memcpy(haddr, skb->mac.raw + 601
602 sizeof(struct linux_wlan_ng_prism_hdr) + 10, 602 if (*(u32 *)mac == LWNG_CAP_DID_BASE) {
603 memcpy(haddr, mac + sizeof(struct linux_wlan_ng_prism_hdr) + 10,
603 ETH_ALEN); /* addr2 */ 604 ETH_ALEN); /* addr2 */
604 } else { /* (*(u32 *)skb->mac.raw == htonl(LWNG_CAPHDR_VERSION)) */ 605 } else { /* (*(u32 *)mac == htonl(LWNG_CAPHDR_VERSION)) */
605 memcpy(haddr, skb->mac.raw + 606 memcpy(haddr, mac + sizeof(struct linux_wlan_ng_cap_hdr) + 10,
606 sizeof(struct linux_wlan_ng_cap_hdr) + 10,
607 ETH_ALEN); /* addr2 */ 607 ETH_ALEN); /* addr2 */
608 } 608 }
609 return ETH_ALEN; 609 return ETH_ALEN;
@@ -1063,7 +1063,8 @@ int prism2_sta_send_mgmt(local_info_t *local, u8 *dst, u16 stype,
1063 meta->iface = netdev_priv(dev); 1063 meta->iface = netdev_priv(dev);
1064 1064
1065 skb->dev = dev; 1065 skb->dev = dev;
1066 skb->mac.raw = skb->nh.raw = skb->data; 1066 skb_reset_mac_header(skb);
1067 skb_reset_network_header(skb);
1067 dev_queue_xmit(skb); 1068 dev_queue_xmit(skb);
1068 1069
1069 return 0; 1070 return 0;
diff --git a/drivers/net/wireless/ipw2100.c b/drivers/net/wireless/ipw2100.c
index ad6e4a428355..9137a4dd02eb 100644
--- a/drivers/net/wireless/ipw2100.c
+++ b/drivers/net/wireless/ipw2100.c
@@ -2416,8 +2416,9 @@ static void isr_rx(struct ipw2100_priv *priv, int i,
2416#ifdef IPW2100_RX_DEBUG 2416#ifdef IPW2100_RX_DEBUG
2417 /* Make a copy of the frame so we can dump it to the logs if 2417 /* Make a copy of the frame so we can dump it to the logs if
2418 * ieee80211_rx fails */ 2418 * ieee80211_rx fails */
2419 memcpy(packet_data, packet->skb->data, 2419 skb_copy_from_linear_data(packet->skb, packet_data,
2420 min_t(u32, status->frame_size, IPW_RX_NIC_BUFFER_LENGTH)); 2420 min_t(u32, status->frame_size,
2421 IPW_RX_NIC_BUFFER_LENGTH));
2421#endif 2422#endif
2422 2423
2423 if (!ieee80211_rx(priv->ieee, packet->skb, stats)) { 2424 if (!ieee80211_rx(priv->ieee, packet->skb, stats)) {
diff --git a/drivers/net/wireless/ipw2200.c b/drivers/net/wireless/ipw2200.c
index c878a2f3239c..4839a45098cb 100644
--- a/drivers/net/wireless/ipw2200.c
+++ b/drivers/net/wireless/ipw2200.c
@@ -8133,7 +8133,7 @@ static void ipw_handle_mgmt_packet(struct ipw_priv *priv,
8133 skb->dev = priv->ieee->dev; 8133 skb->dev = priv->ieee->dev;
8134 8134
8135 /* Point raw at the ieee80211_stats */ 8135 /* Point raw at the ieee80211_stats */
8136 skb->mac.raw = skb->data; 8136 skb_reset_mac_header(skb);
8137 8137
8138 skb->pkt_type = PACKET_OTHERHOST; 8138 skb->pkt_type = PACKET_OTHERHOST;
8139 skb->protocol = __constant_htons(ETH_P_80211_STATS); 8139 skb->protocol = __constant_htons(ETH_P_80211_STATS);
@@ -10355,7 +10355,7 @@ static void ipw_handle_promiscuous_tx(struct ipw_priv *priv,
10355 10355
10356 rt_hdr->it_len = dst->len; 10356 rt_hdr->it_len = dst->len;
10357 10357
10358 memcpy(skb_put(dst, len), src->data, len); 10358 skb_copy_from_linear_data(src, skb_put(dst, len), len);
10359 10359
10360 if (!ieee80211_rx(priv->prom_priv->ieee, dst, &dummystats)) 10360 if (!ieee80211_rx(priv->prom_priv->ieee, dst, &dummystats))
10361 dev_kfree_skb_any(dst); 10361 dev_kfree_skb_any(dst);
diff --git a/drivers/net/wireless/netwave_cs.c b/drivers/net/wireless/netwave_cs.c
index a009ab517710..45b00e13ab2b 100644
--- a/drivers/net/wireless/netwave_cs.c
+++ b/drivers/net/wireless/netwave_cs.c
@@ -1283,7 +1283,6 @@ static int netwave_rx(struct net_device *dev)
1283 1283
1284 skb_reserve( skb, 2); /* Align IP on 16 byte */ 1284 skb_reserve( skb, 2); /* Align IP on 16 byte */
1285 skb_put( skb, rcvLen); 1285 skb_put( skb, rcvLen);
1286 skb->dev = dev;
1287 1286
1288 /* Copy packet fragments to the skb data area */ 1287 /* Copy packet fragments to the skb data area */
1289 ptr = (u_char*) skb->data; 1288 ptr = (u_char*) skb->data;
diff --git a/drivers/net/wireless/orinoco.c b/drivers/net/wireless/orinoco.c
index 4e7f6cf51436..062286dc8e15 100644
--- a/drivers/net/wireless/orinoco.c
+++ b/drivers/net/wireless/orinoco.c
@@ -689,7 +689,7 @@ static void orinoco_stat_gather(struct net_device *dev,
689 /* Note : gcc will optimise the whole section away if 689 /* Note : gcc will optimise the whole section away if
690 * WIRELESS_SPY is not defined... - Jean II */ 690 * WIRELESS_SPY is not defined... - Jean II */
691 if (SPY_NUMBER(priv)) { 691 if (SPY_NUMBER(priv)) {
692 orinoco_spy_gather(dev, skb->mac.raw + ETH_ALEN, 692 orinoco_spy_gather(dev, skb_mac_header(skb) + ETH_ALEN,
693 desc->signal, desc->silence); 693 desc->signal, desc->silence);
694 } 694 }
695} 695}
@@ -770,7 +770,7 @@ static void orinoco_rx_monitor(struct net_device *dev, u16 rxfid,
770 770
771 /* Copy the 802.11 header to the skb */ 771 /* Copy the 802.11 header to the skb */
772 memcpy(skb_put(skb, hdrlen), &(desc->frame_ctl), hdrlen); 772 memcpy(skb_put(skb, hdrlen), &(desc->frame_ctl), hdrlen);
773 skb->mac.raw = skb->data; 773 skb_reset_mac_header(skb);
774 774
775 /* If any, copy the data from the card to the skb */ 775 /* If any, copy the data from the card to the skb */
776 if (datalen > 0) { 776 if (datalen > 0) {
@@ -915,7 +915,6 @@ static void __orinoco_ev_rx(struct net_device *dev, hermes_t *hw)
915 memcpy(hdr->h_source, desc.addr2, ETH_ALEN); 915 memcpy(hdr->h_source, desc.addr2, ETH_ALEN);
916 916
917 dev->last_rx = jiffies; 917 dev->last_rx = jiffies;
918 skb->dev = dev;
919 skb->protocol = eth_type_trans(skb, dev); 918 skb->protocol = eth_type_trans(skb, dev);
920 skb->ip_summed = CHECKSUM_NONE; 919 skb->ip_summed = CHECKSUM_NONE;
921 if (fc & IEEE80211_FCTL_TODS) 920 if (fc & IEEE80211_FCTL_TODS)
diff --git a/drivers/net/wireless/prism54/islpci_eth.c b/drivers/net/wireless/prism54/islpci_eth.c
index b1122912ee2d..dd070cccf324 100644
--- a/drivers/net/wireless/prism54/islpci_eth.c
+++ b/drivers/net/wireless/prism54/islpci_eth.c
@@ -136,7 +136,7 @@ islpci_eth_transmit(struct sk_buff *skb, struct net_device *ndev)
136 printk("islpci_eth_transmit:wds_mac\n"); 136 printk("islpci_eth_transmit:wds_mac\n");
137#endif 137#endif
138 memmove(skb->data + 6, src, skb->len); 138 memmove(skb->data + 6, src, skb->len);
139 memcpy(skb->data, wds_mac, 6); 139 skb_copy_to_linear_data(skb, wds_mac, 6);
140 } else { 140 } else {
141 memmove(skb->data, src, skb->len); 141 memmove(skb->data, src, skb->len);
142 } 142 }
@@ -162,13 +162,16 @@ islpci_eth_transmit(struct sk_buff *skb, struct net_device *ndev)
162 162
163 skb_put(newskb, init_wds ? skb->len + 6 : skb->len); 163 skb_put(newskb, init_wds ? skb->len + 6 : skb->len);
164 if (init_wds) { 164 if (init_wds) {
165 memcpy(newskb->data + 6, skb->data, skb->len); 165 skb_copy_from_linear_data(skb,
166 memcpy(newskb->data, wds_mac, 6); 166 newskb->data + 6,
167 skb->len);
168 skb_copy_to_linear_data(newskb, wds_mac, 6);
167#ifdef ISLPCI_ETH_DEBUG 169#ifdef ISLPCI_ETH_DEBUG
168 printk("islpci_eth_transmit:wds_mac\n"); 170 printk("islpci_eth_transmit:wds_mac\n");
169#endif 171#endif
170 } else 172 } else
171 memcpy(newskb->data, skb->data, skb->len); 173 skb_copy_from_linear_data(skb, newskb->data,
174 skb->len);
172 175
173#if VERBOSE > SHOW_ERROR_MESSAGES 176#if VERBOSE > SHOW_ERROR_MESSAGES
174 DEBUG(SHOW_TRACING, "memcpy %p %p %i wds %i\n", 177 DEBUG(SHOW_TRACING, "memcpy %p %p %i wds %i\n",
@@ -303,7 +306,7 @@ islpci_monitor_rx(islpci_private *priv, struct sk_buff **skb)
303 skb_pull(*skb, sizeof (struct rfmon_header)); 306 skb_pull(*skb, sizeof (struct rfmon_header));
304 307
305 (*skb)->protocol = htons(ETH_P_802_2); 308 (*skb)->protocol = htons(ETH_P_802_2);
306 (*skb)->mac.raw = (*skb)->data; 309 skb_reset_mac_header(*skb);
307 (*skb)->pkt_type = PACKET_OTHERHOST; 310 (*skb)->pkt_type = PACKET_OTHERHOST;
308 311
309 return 0; 312 return 0;
@@ -374,10 +377,6 @@ islpci_eth_receive(islpci_private *priv)
374 DEBUG(SHOW_BUFFER_CONTENTS, "\nrx %p ", skb->data); 377 DEBUG(SHOW_BUFFER_CONTENTS, "\nrx %p ", skb->data);
375 display_buffer((char *) skb->data, skb->len); 378 display_buffer((char *) skb->data, skb->len);
376#endif 379#endif
377
378 /* do some additional sk_buff and network layer parameters */
379 skb->dev = ndev;
380
381 /* take care of monitor mode and spy monitoring. */ 380 /* take care of monitor mode and spy monitoring. */
382 if (unlikely(priv->iw_mode == IW_MODE_MONITOR)) 381 if (unlikely(priv->iw_mode == IW_MODE_MONITOR))
383 discard = islpci_monitor_rx(priv, &skb); 382 discard = islpci_monitor_rx(priv, &skb);
@@ -398,8 +397,10 @@ islpci_eth_receive(islpci_private *priv)
398 /* Update spy records */ 397 /* Update spy records */
399 wireless_spy_update(ndev, annex->addr2, &wstats); 398 wireless_spy_update(ndev, annex->addr2, &wstats);
400 399
401 memcpy(skb->data + sizeof (struct rfmon_header), 400 skb_copy_from_linear_data(skb,
402 skb->data, 2 * ETH_ALEN); 401 (skb->data +
402 sizeof(struct rfmon_header)),
403 2 * ETH_ALEN);
403 skb_pull(skb, sizeof (struct rfmon_header)); 404 skb_pull(skb, sizeof (struct rfmon_header));
404 } 405 }
405 skb->protocol = eth_type_trans(skb, ndev); 406 skb->protocol = eth_type_trans(skb, ndev);
diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c
index 47b2ccb6a633..3be624295a1f 100644
--- a/drivers/net/wireless/ray_cs.c
+++ b/drivers/net/wireless/ray_cs.c
@@ -2232,7 +2232,6 @@ static void rx_data(struct net_device *dev, struct rcs __iomem *prcs, unsigned i
2232 return; 2232 return;
2233 } 2233 }
2234 skb_reserve( skb, 2); /* Align IP on 16 byte (TBD check this)*/ 2234 skb_reserve( skb, 2); /* Align IP on 16 byte (TBD check this)*/
2235 skb->dev = dev;
2236 2235
2237 DEBUG(4,"ray_cs rx_data total_len = %x, rx_len = %x\n",total_len,rx_len); 2236 DEBUG(4,"ray_cs rx_data total_len = %x, rx_len = %x\n",total_len,rx_len);
2238 2237
@@ -2243,7 +2242,8 @@ static void rx_data(struct net_device *dev, struct rcs __iomem *prcs, unsigned i
2243 rx_ptr += copy_from_rx_buff(local, rx_ptr, pkt_addr & RX_BUFF_END, rx_len); 2242 rx_ptr += copy_from_rx_buff(local, rx_ptr, pkt_addr & RX_BUFF_END, rx_len);
2244 /* Get source address */ 2243 /* Get source address */
2245#ifdef WIRELESS_SPY 2244#ifdef WIRELESS_SPY
2246 memcpy(linksrcaddr, ((struct mac_header *)skb->data)->addr_2, ETH_ALEN); 2245 skb_copy_from_linear_data_offset(skb, offsetof(struct mac_header, addr_2),
2246 linksrcaddr, ETH_ALEN);
2247#endif 2247#endif
2248 /* Now, deal with encapsulation/translation/sniffer */ 2248 /* Now, deal with encapsulation/translation/sniffer */
2249 if (!sniffer) { 2249 if (!sniffer) {
diff --git a/drivers/net/wireless/strip.c b/drivers/net/wireless/strip.c
index f5ce1c6063d8..2a299a0676a6 100644
--- a/drivers/net/wireless/strip.c
+++ b/drivers/net/wireless/strip.c
@@ -2009,7 +2009,7 @@ static void deliver_packet(struct strip *strip_info, STRIP_Header * header,
2009 packetlen); 2009 packetlen);
2010 skb->dev = get_strip_dev(strip_info); 2010 skb->dev = get_strip_dev(strip_info);
2011 skb->protocol = header->protocol; 2011 skb->protocol = header->protocol;
2012 skb->mac.raw = skb->data; 2012 skb_reset_mac_header(skb);
2013 2013
2014 /* Having put a fake header on the front of the sk_buff for the */ 2014 /* Having put a fake header on the front of the sk_buff for the */
2015 /* benefit of tools like tcpdump, skb_pull now 'consumes' that */ 2015 /* benefit of tools like tcpdump, skb_pull now 'consumes' that */
diff --git a/drivers/net/wireless/wavelan.c b/drivers/net/wireless/wavelan.c
index 2aa3c761dd83..1cf090d60edc 100644
--- a/drivers/net/wireless/wavelan.c
+++ b/drivers/net/wireless/wavelan.c
@@ -2512,14 +2512,13 @@ wv_packet_read(struct net_device * dev, u16 buf_off, int sksize)
2512 return; 2512 return;
2513 } 2513 }
2514 2514
2515 skb->dev = dev;
2516
2517 /* Copy the packet to the buffer. */ 2515 /* Copy the packet to the buffer. */
2518 obram_read(ioaddr, buf_off, skb_put(skb, sksize), sksize); 2516 obram_read(ioaddr, buf_off, skb_put(skb, sksize), sksize);
2519 skb->protocol = eth_type_trans(skb, dev); 2517 skb->protocol = eth_type_trans(skb, dev);
2520 2518
2521#ifdef DEBUG_RX_INFO 2519#ifdef DEBUG_RX_INFO
2522 wv_packet_info(skb->mac.raw, sksize, dev->name, "wv_packet_read"); 2520 wv_packet_info(skb_mac_header(skb), sksize, dev->name,
2521 "wv_packet_read");
2523#endif /* DEBUG_RX_INFO */ 2522#endif /* DEBUG_RX_INFO */
2524 2523
2525 /* Statistics-gathering and associated stuff. 2524 /* Statistics-gathering and associated stuff.
@@ -2555,7 +2554,7 @@ wv_packet_read(struct net_device * dev, u16 buf_off, int sksize)
2555 2554
2556 /* Spying stuff */ 2555 /* Spying stuff */
2557#ifdef IW_WIRELESS_SPY 2556#ifdef IW_WIRELESS_SPY
2558 wl_spy_gather(dev, skb->mac.raw + WAVELAN_ADDR_SIZE, 2557 wl_spy_gather(dev, skb_mac_header(skb) + WAVELAN_ADDR_SIZE,
2559 stats); 2558 stats);
2560#endif /* IW_WIRELESS_SPY */ 2559#endif /* IW_WIRELESS_SPY */
2561#ifdef HISTOGRAM 2560#ifdef HISTOGRAM
@@ -2939,7 +2938,7 @@ static int wavelan_packet_xmit(struct sk_buff *skb, struct net_device * dev)
2939 * need to pad. Jean II */ 2938 * need to pad. Jean II */
2940 if (skb->len < ETH_ZLEN) { 2939 if (skb->len < ETH_ZLEN) {
2941 memset(data, 0, ETH_ZLEN); 2940 memset(data, 0, ETH_ZLEN);
2942 memcpy(data, skb->data, skb->len); 2941 skb_copy_from_linear_data(skb, data, skb->len);
2943 /* Write packet on the card */ 2942 /* Write packet on the card */
2944 if(wv_packet_write(dev, data, ETH_ZLEN)) 2943 if(wv_packet_write(dev, data, ETH_ZLEN))
2945 return 1; /* We failed */ 2944 return 1; /* We failed */
diff --git a/drivers/net/wireless/wavelan_cs.c b/drivers/net/wireless/wavelan_cs.c
index b04239792f63..67b867f837ca 100644
--- a/drivers/net/wireless/wavelan_cs.c
+++ b/drivers/net/wireless/wavelan_cs.c
@@ -2884,14 +2884,12 @@ wv_packet_read(struct net_device * dev,
2884 return; 2884 return;
2885 } 2885 }
2886 2886
2887 skb->dev = dev;
2888
2889 skb_reserve(skb, 2); 2887 skb_reserve(skb, 2);
2890 fd_p = read_ringbuf(dev, fd_p, (char *) skb_put(skb, sksize), sksize); 2888 fd_p = read_ringbuf(dev, fd_p, (char *) skb_put(skb, sksize), sksize);
2891 skb->protocol = eth_type_trans(skb, dev); 2889 skb->protocol = eth_type_trans(skb, dev);
2892 2890
2893#ifdef DEBUG_RX_INFO 2891#ifdef DEBUG_RX_INFO
2894 wv_packet_info(skb->mac.raw, sksize, dev->name, "wv_packet_read"); 2892 wv_packet_info(skb_mac_header(skb), sksize, dev->name, "wv_packet_read");
2895#endif /* DEBUG_RX_INFO */ 2893#endif /* DEBUG_RX_INFO */
2896 2894
2897 /* Statistics gathering & stuff associated. 2895 /* Statistics gathering & stuff associated.
@@ -2925,7 +2923,7 @@ wv_packet_read(struct net_device * dev,
2925#endif /* WAVELAN_ROAMING */ 2923#endif /* WAVELAN_ROAMING */
2926 2924
2927#ifdef WIRELESS_SPY 2925#ifdef WIRELESS_SPY
2928 wl_spy_gather(dev, skb->mac.raw + WAVELAN_ADDR_SIZE, stats); 2926 wl_spy_gather(dev, skb_mac_header(skb) + WAVELAN_ADDR_SIZE, stats);
2929#endif /* WIRELESS_SPY */ 2927#endif /* WIRELESS_SPY */
2930#ifdef HISTOGRAM 2928#ifdef HISTOGRAM
2931 wl_his_gather(dev, stats); 2929 wl_his_gather(dev, stats);
diff --git a/drivers/net/wireless/zd1201.c b/drivers/net/wireless/zd1201.c
index 6cb66a356c96..935b144d9b56 100644
--- a/drivers/net/wireless/zd1201.c
+++ b/drivers/net/wireless/zd1201.c
@@ -327,7 +327,6 @@ static void zd1201_usbrx(struct urb *urb)
327 memcpy(skb_put(skb, 6), &data[datalen-8], 6); 327 memcpy(skb_put(skb, 6), &data[datalen-8], 6);
328 memcpy(skb_put(skb, 2), &data[datalen-24], 2); 328 memcpy(skb_put(skb, 2), &data[datalen-24], 2);
329 memcpy(skb_put(skb, len), data, len); 329 memcpy(skb_put(skb, len), data, len);
330 skb->dev = zd->dev;
331 skb->dev->last_rx = jiffies; 330 skb->dev->last_rx = jiffies;
332 skb->protocol = eth_type_trans(skb, zd->dev); 331 skb->protocol = eth_type_trans(skb, zd->dev);
333 zd->stats.rx_packets++; 332 zd->stats.rx_packets++;
@@ -385,7 +384,6 @@ static void zd1201_usbrx(struct urb *urb)
385 memcpy(skb_put(skb, 2), &data[6], 2); 384 memcpy(skb_put(skb, 2), &data[6], 2);
386 memcpy(skb_put(skb, len), data+8, len); 385 memcpy(skb_put(skb, len), data+8, len);
387 } 386 }
388 skb->dev = zd->dev;
389 skb->dev->last_rx = jiffies; 387 skb->dev->last_rx = jiffies;
390 skb->protocol = eth_type_trans(skb, zd->dev); 388 skb->protocol = eth_type_trans(skb, zd->dev);
391 zd->stats.rx_packets++; 389 zd->stats.rx_packets++;
@@ -809,10 +807,10 @@ static int zd1201_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
809 txbuf[4] = 0x00; 807 txbuf[4] = 0x00;
810 txbuf[5] = 0x00; 808 txbuf[5] = 0x00;
811 809
812 memcpy(txbuf+6, skb->data+12, skb->len-12); 810 skb_copy_from_linear_data_offset(skb, 12, txbuf + 6, skb->len - 12);
813 if (pad) 811 if (pad)
814 txbuf[skb->len-12+6]=0; 812 txbuf[skb->len-12+6]=0;
815 memcpy(txbuf+skb->len-12+6+pad, skb->data, 12); 813 skb_copy_from_linear_data(skb, txbuf + skb->len - 12 + 6 + pad, 12);
816 *(__be16*)&txbuf[skb->len+6+pad] = htons(skb->len-12+6); 814 *(__be16*)&txbuf[skb->len+6+pad] = htons(skb->len-12+6);
817 txbuf[txbuflen-1] = 0; 815 txbuf[txbuflen-1] = 0;
818 816
diff --git a/drivers/net/wireless/zd1211rw/Kconfig b/drivers/net/wireless/zd1211rw/Kconfig
index 66ed55bc5460..d1ab24a95630 100644
--- a/drivers/net/wireless/zd1211rw/Kconfig
+++ b/drivers/net/wireless/zd1211rw/Kconfig
@@ -1,6 +1,7 @@
1config ZD1211RW 1config ZD1211RW
2 tristate "ZyDAS ZD1211/ZD1211B USB-wireless support" 2 tristate "ZyDAS ZD1211/ZD1211B USB-wireless support"
3 depends on USB && IEEE80211 && IEEE80211_SOFTMAC && NET_RADIO && EXPERIMENTAL 3 depends on USB && IEEE80211_SOFTMAC && WLAN_80211 && EXPERIMENTAL
4 select WIRELESS_EXT
4 select FW_LOADER 5 select FW_LOADER
5 ---help--- 6 ---help---
6 This is an experimental driver for the ZyDAS ZD1211/ZD1211B wireless 7 This is an experimental driver for the ZyDAS ZD1211/ZD1211B wireless
diff --git a/drivers/net/yellowfin.c b/drivers/net/yellowfin.c
index 2412ce4917f2..3f4a7cf9efea 100644
--- a/drivers/net/yellowfin.c
+++ b/drivers/net/yellowfin.c
@@ -1137,7 +1137,6 @@ static int yellowfin_rx(struct net_device *dev)
1137 skb = dev_alloc_skb(pkt_len + 2); 1137 skb = dev_alloc_skb(pkt_len + 2);
1138 if (skb == NULL) 1138 if (skb == NULL)
1139 break; 1139 break;
1140 skb->dev = dev;
1141 skb_reserve(skb, 2); /* 16 byte align the IP header */ 1140 skb_reserve(skb, 2); /* 16 byte align the IP header */
1142 eth_copy_and_sum(skb, rx_skb->data, pkt_len, 0); 1141 eth_copy_and_sum(skb, rx_skb->data, pkt_len, 0);
1143 skb_put(skb, pkt_len); 1142 skb_put(skb, pkt_len);
diff --git a/drivers/net/znet.c b/drivers/net/znet.c
index b24b0727108c..4032e9f6f9b0 100644
--- a/drivers/net/znet.c
+++ b/drivers/net/znet.c
@@ -774,7 +774,6 @@ static void znet_rx(struct net_device *dev)
774 znet->stats.rx_dropped++; 774 znet->stats.rx_dropped++;
775 break; 775 break;
776 } 776 }
777 skb->dev = dev;
778 777
779 if (&znet->rx_cur[(pkt_len+1)>>1] > znet->rx_end) { 778 if (&znet->rx_cur[(pkt_len+1)>>1] > znet->rx_end) {
780 int semi_cnt = (znet->rx_end - znet->rx_cur)<<1; 779 int semi_cnt = (znet->rx_end - znet->rx_cur)<<1;
diff --git a/drivers/parisc/led.c b/drivers/parisc/led.c
index d190c05d87ed..453e6829756c 100644
--- a/drivers/parisc/led.c
+++ b/drivers/parisc/led.c
@@ -372,9 +372,9 @@ static __inline__ int led_get_net_activity(void)
372 continue; 372 continue;
373 if (LOOPBACK(in_dev->ifa_list->ifa_local)) 373 if (LOOPBACK(in_dev->ifa_list->ifa_local))
374 continue; 374 continue;
375 if (!dev->get_stats)
376 continue;
377 stats = dev->get_stats(dev); 375 stats = dev->get_stats(dev);
376 if (!stats)
377 continue;
378 rx_total += stats->rx_packets; 378 rx_total += stats->rx_packets;
379 tx_total += stats->tx_packets; 379 tx_total += stats->tx_packets;
380 } 380 }
diff --git a/drivers/s390/net/claw.c b/drivers/s390/net/claw.c
index 7809a79feec7..6dd64d0c8d45 100644
--- a/drivers/s390/net/claw.c
+++ b/drivers/s390/net/claw.c
@@ -3525,8 +3525,8 @@ unpack_next:
3525 memcpy(skb_put(skb,len_of_data), 3525 memcpy(skb_put(skb,len_of_data),
3526 privptr->p_mtc_envelope, 3526 privptr->p_mtc_envelope,
3527 len_of_data); 3527 len_of_data);
3528 skb->mac.raw=skb->data;
3529 skb->dev=dev; 3528 skb->dev=dev;
3529 skb_reset_mac_header(skb);
3530 skb->protocol=htons(ETH_P_IP); 3530 skb->protocol=htons(ETH_P_IP);
3531 skb->ip_summed=CHECKSUM_UNNECESSARY; 3531 skb->ip_summed=CHECKSUM_UNNECESSARY;
3532 privptr->stats.rx_packets++; 3532 privptr->stats.rx_packets++;
diff --git a/drivers/s390/net/ctcmain.c b/drivers/s390/net/ctcmain.c
index 570a960bfb5b..b20fd0681733 100644
--- a/drivers/s390/net/ctcmain.c
+++ b/drivers/s390/net/ctcmain.c
@@ -455,7 +455,7 @@ ctc_unpack_skb(struct channel *ch, struct sk_buff *pskb)
455 return; 455 return;
456 } 456 }
457 skb_put(pskb, header->length); 457 skb_put(pskb, header->length);
458 pskb->mac.raw = pskb->data; 458 skb_reset_mac_header(pskb);
459 len -= header->length; 459 len -= header->length;
460 skb = dev_alloc_skb(pskb->len); 460 skb = dev_alloc_skb(pskb->len);
461 if (!skb) { 461 if (!skb) {
@@ -472,8 +472,9 @@ ctc_unpack_skb(struct channel *ch, struct sk_buff *pskb)
472 privptr->stats.rx_dropped++; 472 privptr->stats.rx_dropped++;
473 return; 473 return;
474 } 474 }
475 memcpy(skb_put(skb, pskb->len), pskb->data, pskb->len); 475 skb_copy_from_linear_data(pskb, skb_put(skb, pskb->len),
476 skb->mac.raw = skb->data; 476 pskb->len);
477 skb_reset_mac_header(skb);
477 skb->dev = pskb->dev; 478 skb->dev = pskb->dev;
478 skb->protocol = pskb->protocol; 479 skb->protocol = pskb->protocol;
479 pskb->ip_summed = CHECKSUM_UNNECESSARY; 480 pskb->ip_summed = CHECKSUM_UNNECESSARY;
@@ -706,7 +707,8 @@ ch_action_txdone(fsm_instance * fi, int event, void *arg)
706 spin_unlock(&ch->collect_lock); 707 spin_unlock(&ch->collect_lock);
707 return; 708 return;
708 } 709 }
709 ch->trans_skb->tail = ch->trans_skb->data = ch->trans_skb_data; 710 ch->trans_skb->data = ch->trans_skb_data;
711 skb_reset_tail_pointer(ch->trans_skb);
710 ch->trans_skb->len = 0; 712 ch->trans_skb->len = 0;
711 if (ch->prof.maxmulti < (ch->collect_len + 2)) 713 if (ch->prof.maxmulti < (ch->collect_len + 2))
712 ch->prof.maxmulti = ch->collect_len + 2; 714 ch->prof.maxmulti = ch->collect_len + 2;
@@ -715,8 +717,9 @@ ch_action_txdone(fsm_instance * fi, int event, void *arg)
715 *((__u16 *) skb_put(ch->trans_skb, 2)) = ch->collect_len + 2; 717 *((__u16 *) skb_put(ch->trans_skb, 2)) = ch->collect_len + 2;
716 i = 0; 718 i = 0;
717 while ((skb = skb_dequeue(&ch->collect_queue))) { 719 while ((skb = skb_dequeue(&ch->collect_queue))) {
718 memcpy(skb_put(ch->trans_skb, skb->len), skb->data, 720 skb_copy_from_linear_data(skb, skb_put(ch->trans_skb,
719 skb->len); 721 skb->len),
722 skb->len);
720 privptr->stats.tx_packets++; 723 privptr->stats.tx_packets++;
721 privptr->stats.tx_bytes += skb->len - LL_HEADER_LENGTH; 724 privptr->stats.tx_bytes += skb->len - LL_HEADER_LENGTH;
722 atomic_dec(&skb->users); 725 atomic_dec(&skb->users);
@@ -831,7 +834,8 @@ ch_action_rx(fsm_instance * fi, int event, void *arg)
831 ctc_unpack_skb(ch, skb); 834 ctc_unpack_skb(ch, skb);
832 } 835 }
833 again: 836 again:
834 skb->data = skb->tail = ch->trans_skb_data; 837 skb->data = ch->trans_skb_data;
838 skb_reset_tail_pointer(skb);
835 skb->len = 0; 839 skb->len = 0;
836 if (ctc_checkalloc_buffer(ch, 1)) 840 if (ctc_checkalloc_buffer(ch, 1))
837 return; 841 return;
@@ -2223,7 +2227,8 @@ transmit_skb(struct channel *ch, struct sk_buff *skb)
2223 * IDAL support in CTC is broken, so we have to 2227 * IDAL support in CTC is broken, so we have to
2224 * care about skb's above 2G ourselves. 2228 * care about skb's above 2G ourselves.
2225 */ 2229 */
2226 hi = ((unsigned long) skb->tail + LL_HEADER_LENGTH) >> 31; 2230 hi = ((unsigned long)skb_tail_pointer(skb) +
2231 LL_HEADER_LENGTH) >> 31;
2227 if (hi) { 2232 if (hi) {
2228 nskb = alloc_skb(skb->len, GFP_ATOMIC | GFP_DMA); 2233 nskb = alloc_skb(skb->len, GFP_ATOMIC | GFP_DMA);
2229 if (!nskb) { 2234 if (!nskb) {
@@ -2259,11 +2264,12 @@ transmit_skb(struct channel *ch, struct sk_buff *skb)
2259 return -EBUSY; 2264 return -EBUSY;
2260 } 2265 }
2261 2266
2262 ch->trans_skb->tail = ch->trans_skb->data; 2267 skb_reset_tail_pointer(ch->trans_skb);
2263 ch->trans_skb->len = 0; 2268 ch->trans_skb->len = 0;
2264 ch->ccw[1].count = skb->len; 2269 ch->ccw[1].count = skb->len;
2265 memcpy(skb_put(ch->trans_skb, skb->len), skb->data, 2270 skb_copy_from_linear_data(skb, skb_put(ch->trans_skb,
2266 skb->len); 2271 skb->len),
2272 skb->len);
2267 atomic_dec(&skb->users); 2273 atomic_dec(&skb->users);
2268 dev_kfree_skb_irq(skb); 2274 dev_kfree_skb_irq(skb);
2269 ccw_idx = 0; 2275 ccw_idx = 0;
diff --git a/drivers/s390/net/lcs.c b/drivers/s390/net/lcs.c
index ecca1046714e..08a994fdd1a4 100644
--- a/drivers/s390/net/lcs.c
+++ b/drivers/s390/net/lcs.c
@@ -1576,7 +1576,7 @@ __lcs_start_xmit(struct lcs_card *card, struct sk_buff *skb,
1576 header->offset = card->tx_buffer->count; 1576 header->offset = card->tx_buffer->count;
1577 header->type = card->lan_type; 1577 header->type = card->lan_type;
1578 header->slot = card->portno; 1578 header->slot = card->portno;
1579 memcpy(header + 1, skb->data, skb->len); 1579 skb_copy_from_linear_data(skb, header + 1, skb->len);
1580 spin_unlock(&card->lock); 1580 spin_unlock(&card->lock);
1581 card->stats.tx_bytes += skb->len; 1581 card->stats.tx_bytes += skb->len;
1582 card->stats.tx_packets++; 1582 card->stats.tx_packets++;
@@ -1784,7 +1784,6 @@ lcs_get_skb(struct lcs_card *card, char *skb_data, unsigned int skb_len)
1784 card->stats.rx_dropped++; 1784 card->stats.rx_dropped++;
1785 return; 1785 return;
1786 } 1786 }
1787 skb->dev = card->dev;
1788 memcpy(skb_put(skb, skb_len), skb_data, skb_len); 1787 memcpy(skb_put(skb, skb_len), skb_data, skb_len);
1789 skb->protocol = card->lan_type_trans(skb, card->dev); 1788 skb->protocol = card->lan_type_trans(skb, card->dev);
1790 card->stats.rx_bytes += skb_len; 1789 card->stats.rx_bytes += skb_len;
diff --git a/drivers/s390/net/netiucv.c b/drivers/s390/net/netiucv.c
index 594320ca1b7c..e10e85e85c84 100644
--- a/drivers/s390/net/netiucv.c
+++ b/drivers/s390/net/netiucv.c
@@ -635,7 +635,7 @@ static void netiucv_unpack_skb(struct iucv_connection *conn,
635 return; 635 return;
636 } 636 }
637 skb_put(pskb, header->next); 637 skb_put(pskb, header->next);
638 pskb->mac.raw = pskb->data; 638 skb_reset_mac_header(pskb);
639 skb = dev_alloc_skb(pskb->len); 639 skb = dev_alloc_skb(pskb->len);
640 if (!skb) { 640 if (!skb) {
641 PRINT_WARN("%s Out of memory in netiucv_unpack_skb\n", 641 PRINT_WARN("%s Out of memory in netiucv_unpack_skb\n",
@@ -645,8 +645,9 @@ static void netiucv_unpack_skb(struct iucv_connection *conn,
645 privptr->stats.rx_dropped++; 645 privptr->stats.rx_dropped++;
646 return; 646 return;
647 } 647 }
648 memcpy(skb_put(skb, pskb->len), pskb->data, pskb->len); 648 skb_copy_from_linear_data(pskb, skb_put(skb, pskb->len),
649 skb->mac.raw = skb->data; 649 pskb->len);
650 skb_reset_mac_header(skb);
650 skb->dev = pskb->dev; 651 skb->dev = pskb->dev;
651 skb->protocol = pskb->protocol; 652 skb->protocol = pskb->protocol;
652 pskb->ip_summed = CHECKSUM_UNNECESSARY; 653 pskb->ip_summed = CHECKSUM_UNNECESSARY;
@@ -689,7 +690,8 @@ static void conn_action_rx(fsm_instance *fi, int event, void *arg)
689 msg->length, conn->max_buffsize); 690 msg->length, conn->max_buffsize);
690 return; 691 return;
691 } 692 }
692 conn->rx_buff->data = conn->rx_buff->tail = conn->rx_buff->head; 693 conn->rx_buff->data = conn->rx_buff->head;
694 skb_reset_tail_pointer(conn->rx_buff);
693 conn->rx_buff->len = 0; 695 conn->rx_buff->len = 0;
694 rc = iucv_message_receive(conn->path, msg, 0, conn->rx_buff->data, 696 rc = iucv_message_receive(conn->path, msg, 0, conn->rx_buff->data,
695 msg->length, NULL); 697 msg->length, NULL);
@@ -735,14 +737,17 @@ static void conn_action_txdone(fsm_instance *fi, int event, void *arg)
735 } 737 }
736 } 738 }
737 } 739 }
738 conn->tx_buff->data = conn->tx_buff->tail = conn->tx_buff->head; 740 conn->tx_buff->data = conn->tx_buff->head;
741 skb_reset_tail_pointer(conn->tx_buff);
739 conn->tx_buff->len = 0; 742 conn->tx_buff->len = 0;
740 spin_lock_irqsave(&conn->collect_lock, saveflags); 743 spin_lock_irqsave(&conn->collect_lock, saveflags);
741 while ((skb = skb_dequeue(&conn->collect_queue))) { 744 while ((skb = skb_dequeue(&conn->collect_queue))) {
742 header.next = conn->tx_buff->len + skb->len + NETIUCV_HDRLEN; 745 header.next = conn->tx_buff->len + skb->len + NETIUCV_HDRLEN;
743 memcpy(skb_put(conn->tx_buff, NETIUCV_HDRLEN), &header, 746 memcpy(skb_put(conn->tx_buff, NETIUCV_HDRLEN), &header,
744 NETIUCV_HDRLEN); 747 NETIUCV_HDRLEN);
745 memcpy(skb_put(conn->tx_buff, skb->len), skb->data, skb->len); 748 skb_copy_from_linear_data(skb,
749 skb_put(conn->tx_buff, skb->len),
750 skb->len);
746 txbytes += skb->len; 751 txbytes += skb->len;
747 txpackets++; 752 txpackets++;
748 stat_maxcq++; 753 stat_maxcq++;
@@ -1164,8 +1169,8 @@ static int netiucv_transmit_skb(struct iucv_connection *conn,
1164 * Copy the skb to a new allocated skb in lowmem only if the 1169 * Copy the skb to a new allocated skb in lowmem only if the
1165 * data is located above 2G in memory or tailroom is < 2. 1170 * data is located above 2G in memory or tailroom is < 2.
1166 */ 1171 */
1167 unsigned long hi = 1172 unsigned long hi = ((unsigned long)(skb_tail_pointer(skb) +
1168 ((unsigned long)(skb->tail + NETIUCV_HDRLEN)) >> 31; 1173 NETIUCV_HDRLEN)) >> 31;
1169 int copied = 0; 1174 int copied = 0;
1170 if (hi || (skb_tailroom(skb) < 2)) { 1175 if (hi || (skb_tailroom(skb) < 2)) {
1171 nskb = alloc_skb(skb->len + NETIUCV_HDRLEN + 1176 nskb = alloc_skb(skb->len + NETIUCV_HDRLEN +
diff --git a/drivers/s390/net/qeth_eddp.c b/drivers/s390/net/qeth_eddp.c
index 7c735e1fe063..dd7034fbfff8 100644
--- a/drivers/s390/net/qeth_eddp.c
+++ b/drivers/s390/net/qeth_eddp.c
@@ -267,7 +267,8 @@ qeth_eddp_copy_data_tcp(char *dst, struct qeth_eddp_data *eddp, int len,
267 267
268 QETH_DBF_TEXT(trace, 5, "eddpcdtc"); 268 QETH_DBF_TEXT(trace, 5, "eddpcdtc");
269 if (skb_shinfo(eddp->skb)->nr_frags == 0) { 269 if (skb_shinfo(eddp->skb)->nr_frags == 0) {
270 memcpy(dst, eddp->skb->data + eddp->skb_offset, len); 270 skb_copy_from_linear_data_offset(eddp->skb, eddp->skb_offset,
271 dst, len);
271 *hcsum = csum_partial(eddp->skb->data + eddp->skb_offset, len, 272 *hcsum = csum_partial(eddp->skb->data + eddp->skb_offset, len,
272 *hcsum); 273 *hcsum);
273 eddp->skb_offset += len; 274 eddp->skb_offset += len;
@@ -416,7 +417,7 @@ __qeth_eddp_fill_context_tcp(struct qeth_eddp_context *ctx,
416 eddp->skb_offset += VLAN_HLEN; 417 eddp->skb_offset += VLAN_HLEN;
417#endif /* CONFIG_QETH_VLAN */ 418#endif /* CONFIG_QETH_VLAN */
418 } 419 }
419 tcph = eddp->skb->h.th; 420 tcph = tcp_hdr(eddp->skb);
420 while (eddp->skb_offset < eddp->skb->len) { 421 while (eddp->skb_offset < eddp->skb->len) {
421 data_len = min((int)skb_shinfo(eddp->skb)->gso_size, 422 data_len = min((int)skb_shinfo(eddp->skb)->gso_size,
422 (int)(eddp->skb->len - eddp->skb_offset)); 423 (int)(eddp->skb->len - eddp->skb_offset));
@@ -473,20 +474,24 @@ qeth_eddp_fill_context_tcp(struct qeth_eddp_context *ctx,
473 QETH_DBF_TEXT(trace, 5, "eddpficx"); 474 QETH_DBF_TEXT(trace, 5, "eddpficx");
474 /* create our segmentation headers and copy original headers */ 475 /* create our segmentation headers and copy original headers */
475 if (skb->protocol == htons(ETH_P_IP)) 476 if (skb->protocol == htons(ETH_P_IP))
476 eddp = qeth_eddp_create_eddp_data(qhdr, (u8 *)skb->nh.iph, 477 eddp = qeth_eddp_create_eddp_data(qhdr,
477 skb->nh.iph->ihl*4, 478 skb_network_header(skb),
478 (u8 *)skb->h.th, skb->h.th->doff*4); 479 ip_hdrlen(skb),
480 skb_transport_header(skb),
481 tcp_hdrlen(skb));
479 else 482 else
480 eddp = qeth_eddp_create_eddp_data(qhdr, (u8 *)skb->nh.ipv6h, 483 eddp = qeth_eddp_create_eddp_data(qhdr,
481 sizeof(struct ipv6hdr), 484 skb_network_header(skb),
482 (u8 *)skb->h.th, skb->h.th->doff*4); 485 sizeof(struct ipv6hdr),
486 skb_transport_header(skb),
487 tcp_hdrlen(skb));
483 488
484 if (eddp == NULL) { 489 if (eddp == NULL) {
485 QETH_DBF_TEXT(trace, 2, "eddpfcnm"); 490 QETH_DBF_TEXT(trace, 2, "eddpfcnm");
486 return -ENOMEM; 491 return -ENOMEM;
487 } 492 }
488 if (qhdr->hdr.l2.id == QETH_HEADER_TYPE_LAYER2) { 493 if (qhdr->hdr.l2.id == QETH_HEADER_TYPE_LAYER2) {
489 skb->mac.raw = (skb->data) + sizeof(struct qeth_hdr); 494 skb_set_mac_header(skb, sizeof(struct qeth_hdr));
490 memcpy(&eddp->mac, eth_hdr(skb), ETH_HLEN); 495 memcpy(&eddp->mac, eth_hdr(skb), ETH_HLEN);
491#ifdef CONFIG_QETH_VLAN 496#ifdef CONFIG_QETH_VLAN
492 if (eddp->mac.h_proto == __constant_htons(ETH_P_8021Q)) { 497 if (eddp->mac.h_proto == __constant_htons(ETH_P_8021Q)) {
@@ -590,12 +595,13 @@ qeth_eddp_create_context_tcp(struct qeth_card *card, struct sk_buff *skb,
590 QETH_DBF_TEXT(trace, 5, "creddpct"); 595 QETH_DBF_TEXT(trace, 5, "creddpct");
591 if (skb->protocol == htons(ETH_P_IP)) 596 if (skb->protocol == htons(ETH_P_IP))
592 ctx = qeth_eddp_create_context_generic(card, skb, 597 ctx = qeth_eddp_create_context_generic(card, skb,
593 sizeof(struct qeth_hdr) + skb->nh.iph->ihl*4 + 598 (sizeof(struct qeth_hdr) +
594 skb->h.th->doff*4); 599 ip_hdrlen(skb) +
600 tcp_hdrlen(skb)));
595 else if (skb->protocol == htons(ETH_P_IPV6)) 601 else if (skb->protocol == htons(ETH_P_IPV6))
596 ctx = qeth_eddp_create_context_generic(card, skb, 602 ctx = qeth_eddp_create_context_generic(card, skb,
597 sizeof(struct qeth_hdr) + sizeof(struct ipv6hdr) + 603 sizeof(struct qeth_hdr) + sizeof(struct ipv6hdr) +
598 skb->h.th->doff*4); 604 tcp_hdrlen(skb));
599 else 605 else
600 QETH_DBF_TEXT(trace, 2, "cetcpinv"); 606 QETH_DBF_TEXT(trace, 2, "cetcpinv");
601 607
diff --git a/drivers/s390/net/qeth_main.c b/drivers/s390/net/qeth_main.c
index d8a86f5af379..ad7792dc1a04 100644
--- a/drivers/s390/net/qeth_main.c
+++ b/drivers/s390/net/qeth_main.c
@@ -2278,7 +2278,7 @@ qeth_type_trans(struct sk_buff *skb, struct net_device *dev)
2278 (card->info.link_type == QETH_LINK_TYPE_LANE_TR)) 2278 (card->info.link_type == QETH_LINK_TYPE_LANE_TR))
2279 return tr_type_trans(skb,dev); 2279 return tr_type_trans(skb,dev);
2280#endif /* CONFIG_TR */ 2280#endif /* CONFIG_TR */
2281 skb->mac.raw = skb->data; 2281 skb_reset_mac_header(skb);
2282 skb_pull(skb, ETH_HLEN ); 2282 skb_pull(skb, ETH_HLEN );
2283 eth = eth_hdr(skb); 2283 eth = eth_hdr(skb);
2284 2284
@@ -2306,9 +2306,9 @@ qeth_rebuild_skb_fake_ll_tr(struct qeth_card *card, struct sk_buff *skb,
2306 struct iphdr *ip_hdr; 2306 struct iphdr *ip_hdr;
2307 2307
2308 QETH_DBF_TEXT(trace,5,"skbfktr"); 2308 QETH_DBF_TEXT(trace,5,"skbfktr");
2309 skb->mac.raw = skb->data - QETH_FAKE_LL_LEN_TR; 2309 skb_set_mac_header(skb, -QETH_FAKE_LL_LEN_TR);
2310 /* this is a fake ethernet header */ 2310 /* this is a fake ethernet header */
2311 fake_hdr = (struct trh_hdr *) skb->mac.raw; 2311 fake_hdr = tr_hdr(skb);
2312 2312
2313 /* the destination MAC address */ 2313 /* the destination MAC address */
2314 switch (skb->pkt_type){ 2314 switch (skb->pkt_type){
@@ -2359,9 +2359,9 @@ qeth_rebuild_skb_fake_ll_eth(struct qeth_card *card, struct sk_buff *skb,
2359 struct iphdr *ip_hdr; 2359 struct iphdr *ip_hdr;
2360 2360
2361 QETH_DBF_TEXT(trace,5,"skbfketh"); 2361 QETH_DBF_TEXT(trace,5,"skbfketh");
2362 skb->mac.raw = skb->data - QETH_FAKE_LL_LEN_ETH; 2362 skb_set_mac_header(skb, -QETH_FAKE_LL_LEN_ETH);
2363 /* this is a fake ethernet header */ 2363 /* this is a fake ethernet header */
2364 fake_hdr = (struct ethhdr *) skb->mac.raw; 2364 fake_hdr = eth_hdr(skb);
2365 2365
2366 /* the destination MAC address */ 2366 /* the destination MAC address */
2367 switch (skb->pkt_type){ 2367 switch (skb->pkt_type){
@@ -2461,7 +2461,7 @@ qeth_rebuild_skb(struct qeth_card *card, struct sk_buff *skb,
2461 if (card->options.fake_ll) 2461 if (card->options.fake_ll)
2462 qeth_rebuild_skb_fake_ll(card, skb, hdr); 2462 qeth_rebuild_skb_fake_ll(card, skb, hdr);
2463 else 2463 else
2464 skb->mac.raw = skb->data; 2464 skb_reset_mac_header(skb);
2465 skb->ip_summed = card->options.checksum_type; 2465 skb->ip_summed = card->options.checksum_type;
2466 if (card->options.checksum_type == HW_CHECKSUMMING){ 2466 if (card->options.checksum_type == HW_CHECKSUMMING){
2467 if ( (hdr->hdr.l3.ext_flags & 2467 if ( (hdr->hdr.l3.ext_flags &
@@ -2501,7 +2501,8 @@ qeth_process_inbound_buffer(struct qeth_card *card,
2501 vlan_tag = qeth_rebuild_skb(card, skb, hdr); 2501 vlan_tag = qeth_rebuild_skb(card, skb, hdr);
2502 else { /*in case of OSN*/ 2502 else { /*in case of OSN*/
2503 skb_push(skb, sizeof(struct qeth_hdr)); 2503 skb_push(skb, sizeof(struct qeth_hdr));
2504 memcpy(skb->data, hdr, sizeof(struct qeth_hdr)); 2504 skb_copy_to_linear_data(skb, hdr,
2505 sizeof(struct qeth_hdr));
2505 } 2506 }
2506 /* is device UP ? */ 2507 /* is device UP ? */
2507 if (!(card->dev->flags & IFF_UP)){ 2508 if (!(card->dev->flags & IFF_UP)){
@@ -3778,9 +3779,11 @@ qeth_get_cast_type(struct qeth_card *card, struct sk_buff *skb)
3778 } 3779 }
3779 /* try something else */ 3780 /* try something else */
3780 if (skb->protocol == ETH_P_IPV6) 3781 if (skb->protocol == ETH_P_IPV6)
3781 return (skb->nh.raw[24] == 0xff) ? RTN_MULTICAST : 0; 3782 return (skb_network_header(skb)[24] == 0xff) ?
3783 RTN_MULTICAST : 0;
3782 else if (skb->protocol == ETH_P_IP) 3784 else if (skb->protocol == ETH_P_IP)
3783 return ((skb->nh.raw[16] & 0xf0) == 0xe0) ? RTN_MULTICAST : 0; 3785 return ((skb_network_header(skb)[16] & 0xf0) == 0xe0) ?
3786 RTN_MULTICAST : 0;
3784 /* ... */ 3787 /* ... */
3785 if (!memcmp(skb->data, skb->dev->broadcast, 6)) 3788 if (!memcmp(skb->data, skb->dev->broadcast, 6))
3786 return RTN_BROADCAST; 3789 return RTN_BROADCAST;
@@ -3818,18 +3821,20 @@ qeth_get_priority_queue(struct qeth_card *card, struct sk_buff *skb,
3818 return card->info.is_multicast_different & 3821 return card->info.is_multicast_different &
3819 (card->qdio.no_out_queues - 1); 3822 (card->qdio.no_out_queues - 1);
3820 if (card->qdio.do_prio_queueing && (ipv == 4)) { 3823 if (card->qdio.do_prio_queueing && (ipv == 4)) {
3824 const u8 tos = ip_hdr(skb)->tos;
3825
3821 if (card->qdio.do_prio_queueing==QETH_PRIO_Q_ING_TOS){ 3826 if (card->qdio.do_prio_queueing==QETH_PRIO_Q_ING_TOS){
3822 if (skb->nh.iph->tos & IP_TOS_NOTIMPORTANT) 3827 if (tos & IP_TOS_NOTIMPORTANT)
3823 return 3; 3828 return 3;
3824 if (skb->nh.iph->tos & IP_TOS_HIGHRELIABILITY) 3829 if (tos & IP_TOS_HIGHRELIABILITY)
3825 return 2; 3830 return 2;
3826 if (skb->nh.iph->tos & IP_TOS_HIGHTHROUGHPUT) 3831 if (tos & IP_TOS_HIGHTHROUGHPUT)
3827 return 1; 3832 return 1;
3828 if (skb->nh.iph->tos & IP_TOS_LOWDELAY) 3833 if (tos & IP_TOS_LOWDELAY)
3829 return 0; 3834 return 0;
3830 } 3835 }
3831 if (card->qdio.do_prio_queueing==QETH_PRIO_Q_ING_PREC) 3836 if (card->qdio.do_prio_queueing==QETH_PRIO_Q_ING_PREC)
3832 return 3 - (skb->nh.iph->tos >> 6); 3837 return 3 - (tos >> 6);
3833 } else if (card->qdio.do_prio_queueing && (ipv == 6)) { 3838 } else if (card->qdio.do_prio_queueing && (ipv == 6)) {
3834 /* TODO: IPv6!!! */ 3839 /* TODO: IPv6!!! */
3835 } 3840 }
@@ -3866,9 +3871,9 @@ __qeth_prepare_skb(struct qeth_card *card, struct sk_buff *skb, int ipv)
3866 * memcpys instead of one memmove to save cycles. 3871 * memcpys instead of one memmove to save cycles.
3867 */ 3872 */
3868 skb_push(skb, VLAN_HLEN); 3873 skb_push(skb, VLAN_HLEN);
3869 memcpy(skb->data, skb->data + 4, 4); 3874 skb_copy_to_linear_data(skb, skb->data + 4, 4);
3870 memcpy(skb->data + 4, skb->data + 8, 4); 3875 skb_copy_to_linear_data_offset(skb, 4, skb->data + 8, 4);
3871 memcpy(skb->data + 8, skb->data + 12, 4); 3876 skb_copy_to_linear_data_offset(skb, 8, skb->data + 12, 4);
3872 tag = (u16 *)(skb->data + 12); 3877 tag = (u16 *)(skb->data + 12);
3873 /* 3878 /*
3874 * first two bytes = ETH_P_8021Q (0x8100) 3879 * first two bytes = ETH_P_8021Q (0x8100)
@@ -4039,7 +4044,8 @@ qeth_fill_header(struct qeth_card *card, struct qeth_hdr *hdr,
4039 *((u32 *) skb->dst->neighbour->primary_key); 4044 *((u32 *) skb->dst->neighbour->primary_key);
4040 } else { 4045 } else {
4041 /* fill in destination address used in ip header */ 4046 /* fill in destination address used in ip header */
4042 *((u32 *) (&hdr->hdr.l3.dest_addr[12])) = skb->nh.iph->daddr; 4047 *((u32 *)(&hdr->hdr.l3.dest_addr[12])) =
4048 ip_hdr(skb)->daddr;
4043 } 4049 }
4044 } else if (ipv == 6) { /* IPv6 or passthru */ 4050 } else if (ipv == 6) { /* IPv6 or passthru */
4045 hdr->hdr.l3.flags = qeth_get_qeth_hdr_flags6(cast_type); 4051 hdr->hdr.l3.flags = qeth_get_qeth_hdr_flags6(cast_type);
@@ -4048,7 +4054,8 @@ qeth_fill_header(struct qeth_card *card, struct qeth_hdr *hdr,
4048 skb->dst->neighbour->primary_key, 16); 4054 skb->dst->neighbour->primary_key, 16);
4049 } else { 4055 } else {
4050 /* fill in destination address used in ip header */ 4056 /* fill in destination address used in ip header */
4051 memcpy(hdr->hdr.l3.dest_addr, &skb->nh.ipv6h->daddr, 16); 4057 memcpy(hdr->hdr.l3.dest_addr,
4058 &ipv6_hdr(skb)->daddr, 16);
4052 } 4059 }
4053 } else { /* passthrough */ 4060 } else { /* passthrough */
4054 if((skb->dev->type == ARPHRD_IEEE802_TR) && 4061 if((skb->dev->type == ARPHRD_IEEE802_TR) &&
diff --git a/drivers/s390/net/qeth_tso.h b/drivers/s390/net/qeth_tso.h
index 14504afb044e..c20e923cf9ad 100644
--- a/drivers/s390/net/qeth_tso.h
+++ b/drivers/s390/net/qeth_tso.h
@@ -40,8 +40,8 @@ qeth_tso_fill_header(struct qeth_card *card, struct sk_buff *skb)
40 QETH_DBF_TEXT(trace, 5, "tsofhdr"); 40 QETH_DBF_TEXT(trace, 5, "tsofhdr");
41 41
42 hdr = (struct qeth_hdr_tso *) skb->data; 42 hdr = (struct qeth_hdr_tso *) skb->data;
43 iph = skb->nh.iph; 43 iph = ip_hdr(skb);
44 tcph = skb->h.th; 44 tcph = tcp_hdr(skb);
45 /*fix header to TSO values ...*/ 45 /*fix header to TSO values ...*/
46 hdr->hdr.hdr.l3.id = QETH_HEADER_TYPE_TSO; 46 hdr->hdr.hdr.l3.id = QETH_HEADER_TYPE_TSO;
47 /*set values which are fix for the first approach ...*/ 47 /*set values which are fix for the first approach ...*/
@@ -63,13 +63,9 @@ qeth_tso_fill_header(struct qeth_card *card, struct sk_buff *skb)
63static inline void 63static inline void
64qeth_tso_set_tcpip_header(struct qeth_card *card, struct sk_buff *skb) 64qeth_tso_set_tcpip_header(struct qeth_card *card, struct sk_buff *skb)
65{ 65{
66 struct iphdr *iph; 66 struct iphdr *iph = ip_hdr(skb);
67 struct ipv6hdr *ip6h; 67 struct ipv6hdr *ip6h = ipv6_hdr(skb);
68 struct tcphdr *tcph; 68 struct tcphdr *tcph = tcp_hdr(skb);
69
70 iph = skb->nh.iph;
71 ip6h = skb->nh.ipv6h;
72 tcph = skb->h.th;
73 69
74 tcph->check = 0; 70 tcph->check = 0;
75 if (skb->protocol == ETH_P_IPV6) { 71 if (skb->protocol == ETH_P_IPV6) {
diff --git a/drivers/scsi/scsi_netlink.c b/drivers/scsi/scsi_netlink.c
index 1b59b27e887f..4bf9aa547c78 100644
--- a/drivers/scsi/scsi_netlink.c
+++ b/drivers/scsi/scsi_netlink.c
@@ -50,7 +50,7 @@ scsi_nl_rcv_msg(struct sk_buff *skb)
50 while (skb->len >= NLMSG_SPACE(0)) { 50 while (skb->len >= NLMSG_SPACE(0)) {
51 err = 0; 51 err = 0;
52 52
53 nlh = (struct nlmsghdr *) skb->data; 53 nlh = nlmsg_hdr(skb);
54 if ((nlh->nlmsg_len < (sizeof(*nlh) + sizeof(*hdr))) || 54 if ((nlh->nlmsg_len < (sizeof(*nlh) + sizeof(*hdr))) ||
55 (skb->len < nlh->nlmsg_len)) { 55 (skb->len < nlh->nlmsg_len)) {
56 printk(KERN_WARNING "%s: discarding partial skb\n", 56 printk(KERN_WARNING "%s: discarding partial skb\n",
@@ -168,7 +168,8 @@ scsi_netlink_init(void)
168 } 168 }
169 169
170 scsi_nl_sock = netlink_kernel_create(NETLINK_SCSITRANSPORT, 170 scsi_nl_sock = netlink_kernel_create(NETLINK_SCSITRANSPORT,
171 SCSI_NL_GRP_CNT, scsi_nl_rcv, THIS_MODULE); 171 SCSI_NL_GRP_CNT, scsi_nl_rcv, NULL,
172 THIS_MODULE);
172 if (!scsi_nl_sock) { 173 if (!scsi_nl_sock) {
173 printk(KERN_ERR "%s: register of recieve handler failed\n", 174 printk(KERN_ERR "%s: register of recieve handler failed\n",
174 __FUNCTION__); 175 __FUNCTION__);
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
index ce0d14af33c8..aabaa0576ab4 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -1081,7 +1081,7 @@ iscsi_if_rx(struct sock *sk, int len)
1081 struct nlmsghdr *nlh; 1081 struct nlmsghdr *nlh;
1082 struct iscsi_uevent *ev; 1082 struct iscsi_uevent *ev;
1083 1083
1084 nlh = (struct nlmsghdr *)skb->data; 1084 nlh = nlmsg_hdr(skb);
1085 if (nlh->nlmsg_len < sizeof(*nlh) || 1085 if (nlh->nlmsg_len < sizeof(*nlh) ||
1086 skb->len < nlh->nlmsg_len) { 1086 skb->len < nlh->nlmsg_len) {
1087 break; 1087 break;
@@ -1435,7 +1435,7 @@ static __init int iscsi_transport_init(void)
1435 if (err) 1435 if (err)
1436 goto unregister_conn_class; 1436 goto unregister_conn_class;
1437 1437
1438 nls = netlink_kernel_create(NETLINK_ISCSI, 1, iscsi_if_rx, 1438 nls = netlink_kernel_create(NETLINK_ISCSI, 1, iscsi_if_rx, NULL,
1439 THIS_MODULE); 1439 THIS_MODULE);
1440 if (!nls) { 1440 if (!nls) {
1441 err = -ENOBUFS; 1441 err = -ENOBUFS;
diff --git a/drivers/usb/atm/usbatm.c b/drivers/usb/atm/usbatm.c
index ec63b0ee0743..d3e2c5f90a26 100644
--- a/drivers/usb/atm/usbatm.c
+++ b/drivers/usb/atm/usbatm.c
@@ -343,7 +343,7 @@ static void usbatm_extract_one_cell(struct usbatm_data *instance, unsigned char
343 UDSL_ASSERT(sarb->tail + ATM_CELL_PAYLOAD <= sarb->end); 343 UDSL_ASSERT(sarb->tail + ATM_CELL_PAYLOAD <= sarb->end);
344 } 344 }
345 345
346 memcpy(sarb->tail, source + ATM_CELL_HEADER, ATM_CELL_PAYLOAD); 346 memcpy(skb_tail_pointer(sarb), source + ATM_CELL_HEADER, ATM_CELL_PAYLOAD);
347 __skb_put(sarb, ATM_CELL_PAYLOAD); 347 __skb_put(sarb, ATM_CELL_PAYLOAD);
348 348
349 if (pti & 1) { 349 if (pti & 1) {
@@ -370,7 +370,7 @@ static void usbatm_extract_one_cell(struct usbatm_data *instance, unsigned char
370 goto out; 370 goto out;
371 } 371 }
372 372
373 if (crc32_be(~0, sarb->tail - pdu_length, pdu_length) != 0xc704dd7b) { 373 if (crc32_be(~0, skb_tail_pointer(sarb) - pdu_length, pdu_length) != 0xc704dd7b) {
374 atm_rldbg(instance, "%s: packet failed crc check (vcc: 0x%p)!\n", 374 atm_rldbg(instance, "%s: packet failed crc check (vcc: 0x%p)!\n",
375 __func__, vcc); 375 __func__, vcc);
376 atomic_inc(&vcc->stats->rx_err); 376 atomic_inc(&vcc->stats->rx_err);
@@ -396,7 +396,9 @@ static void usbatm_extract_one_cell(struct usbatm_data *instance, unsigned char
396 goto out; /* atm_charge increments rx_drop */ 396 goto out; /* atm_charge increments rx_drop */
397 } 397 }
398 398
399 memcpy(skb->data, sarb->tail - pdu_length, length); 399 skb_copy_to_linear_data(skb,
400 skb_tail_pointer(sarb) - pdu_length,
401 length);
400 __skb_put(skb, length); 402 __skb_put(skb, length);
401 403
402 vdbg("%s: sending skb 0x%p, skb->len %u, skb->truesize %u", 404 vdbg("%s: sending skb 0x%p, skb->len %u, skb->truesize %u",
@@ -484,7 +486,7 @@ static unsigned int usbatm_write_cells(struct usbatm_data *instance,
484 ptr[4] = 0xec; 486 ptr[4] = 0xec;
485 ptr += ATM_CELL_HEADER; 487 ptr += ATM_CELL_HEADER;
486 488
487 memcpy(ptr, skb->data, data_len); 489 skb_copy_from_linear_data(skb, ptr, data_len);
488 ptr += data_len; 490 ptr += data_len;
489 __skb_pull(skb, data_len); 491 __skb_pull(skb, data_len);
490 492
diff --git a/drivers/usb/gadget/ether.c b/drivers/usb/gadget/ether.c
index 04e6b8508fb6..8f9f217e0a68 100644
--- a/drivers/usb/gadget/ether.c
+++ b/drivers/usb/gadget/ether.c
@@ -1766,7 +1766,6 @@ static void rx_complete (struct usb_ep *ep, struct usb_request *req)
1766 break; 1766 break;
1767 } 1767 }
1768 1768
1769 skb->dev = dev->net;
1770 skb->protocol = eth_type_trans (skb, dev->net); 1769 skb->protocol = eth_type_trans (skb, dev->net);
1771 dev->stats.rx_packets++; 1770 dev->stats.rx_packets++;
1772 dev->stats.rx_bytes += skb->len; 1771 dev->stats.rx_bytes += skb->len;
diff --git a/drivers/usb/net/asix.c b/drivers/usb/net/asix.c
index 5808ea082459..d5ef97bc4d01 100644
--- a/drivers/usb/net/asix.c
+++ b/drivers/usb/net/asix.c
@@ -298,7 +298,7 @@ static int asix_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
298 if (ax_skb) { 298 if (ax_skb) {
299 ax_skb->len = size; 299 ax_skb->len = size;
300 ax_skb->data = packet; 300 ax_skb->data = packet;
301 ax_skb->tail = packet + size; 301 skb_set_tail_pointer(ax_skb, size);
302 usbnet_skb_return(dev, ax_skb); 302 usbnet_skb_return(dev, ax_skb);
303 } else { 303 } else {
304 return 0; 304 return 0;
@@ -338,7 +338,7 @@ static struct sk_buff *asix_tx_fixup(struct usbnet *dev, struct sk_buff *skb,
338 && ((headroom + tailroom) >= (4 + padlen))) { 338 && ((headroom + tailroom) >= (4 + padlen))) {
339 if ((headroom < 4) || (tailroom < padlen)) { 339 if ((headroom < 4) || (tailroom < padlen)) {
340 skb->data = memmove(skb->head + 4, skb->data, skb->len); 340 skb->data = memmove(skb->head + 4, skb->data, skb->len);
341 skb->tail = skb->data + skb->len; 341 skb_set_tail_pointer(skb, skb->len);
342 } 342 }
343 } else { 343 } else {
344 struct sk_buff *skb2; 344 struct sk_buff *skb2;
@@ -352,11 +352,11 @@ static struct sk_buff *asix_tx_fixup(struct usbnet *dev, struct sk_buff *skb,
352 skb_push(skb, 4); 352 skb_push(skb, 4);
353 packet_len = (((skb->len - 4) ^ 0x0000ffff) << 16) + (skb->len - 4); 353 packet_len = (((skb->len - 4) ^ 0x0000ffff) << 16) + (skb->len - 4);
354 cpu_to_le32s(&packet_len); 354 cpu_to_le32s(&packet_len);
355 memcpy(skb->data, &packet_len, sizeof(packet_len)); 355 skb_copy_to_linear_data(skb, &packet_len, sizeof(packet_len));
356 356
357 if ((skb->len % 512) == 0) { 357 if ((skb->len % 512) == 0) {
358 cpu_to_le32s(&padbytes); 358 cpu_to_le32s(&padbytes);
359 memcpy( skb->tail, &padbytes, sizeof(padbytes)); 359 memcpy(skb_tail_pointer(skb), &padbytes, sizeof(padbytes));
360 skb_put(skb, sizeof(padbytes)); 360 skb_put(skb, sizeof(padbytes));
361 } 361 }
362 return skb; 362 return skb;
diff --git a/drivers/usb/net/catc.c b/drivers/usb/net/catc.c
index 4852012735f6..ffec2e01b896 100644
--- a/drivers/usb/net/catc.c
+++ b/drivers/usb/net/catc.c
@@ -255,7 +255,6 @@ static void catc_rx_done(struct urb *urb)
255 if (!(skb = dev_alloc_skb(pkt_len))) 255 if (!(skb = dev_alloc_skb(pkt_len)))
256 return; 256 return;
257 257
258 skb->dev = catc->netdev;
259 eth_copy_and_sum(skb, pkt_start + pkt_offset, pkt_len, 0); 258 eth_copy_and_sum(skb, pkt_start + pkt_offset, pkt_len, 0);
260 skb_put(skb, pkt_len); 259 skb_put(skb, pkt_len);
261 260
@@ -419,7 +418,7 @@ static int catc_hard_start_xmit(struct sk_buff *skb, struct net_device *netdev)
419 catc->tx_ptr = (((catc->tx_ptr - 1) >> 6) + 1) << 6; 418 catc->tx_ptr = (((catc->tx_ptr - 1) >> 6) + 1) << 6;
420 tx_buf = catc->tx_buf[catc->tx_idx] + catc->tx_ptr; 419 tx_buf = catc->tx_buf[catc->tx_idx] + catc->tx_ptr;
421 *((u16*)tx_buf) = (catc->is_f5u011) ? cpu_to_be16((u16)skb->len) : cpu_to_le16((u16)skb->len); 420 *((u16*)tx_buf) = (catc->is_f5u011) ? cpu_to_be16((u16)skb->len) : cpu_to_le16((u16)skb->len);
422 memcpy(tx_buf + 2, skb->data, skb->len); 421 skb_copy_from_linear_data(skb, tx_buf + 2, skb->len);
423 catc->tx_ptr += skb->len + 2; 422 catc->tx_ptr += skb->len + 2;
424 423
425 if (!test_and_set_bit(TX_RUNNING, &catc->flags)) 424 if (!test_and_set_bit(TX_RUNNING, &catc->flags))
diff --git a/drivers/usb/net/gl620a.c b/drivers/usb/net/gl620a.c
index d257a8e026d6..031cf5ca4dbb 100644
--- a/drivers/usb/net/gl620a.c
+++ b/drivers/usb/net/gl620a.c
@@ -157,7 +157,7 @@ genelink_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags)
157 if ((headroom < (4 + 4*1)) || (tailroom < padlen)) { 157 if ((headroom < (4 + 4*1)) || (tailroom < padlen)) {
158 skb->data = memmove(skb->head + (4 + 4*1), 158 skb->data = memmove(skb->head + (4 + 4*1),
159 skb->data, skb->len); 159 skb->data, skb->len);
160 skb->tail = skb->data + skb->len; 160 skb_set_tail_pointer(skb, skb->len);
161 } 161 }
162 } else { 162 } else {
163 struct sk_buff *skb2; 163 struct sk_buff *skb2;
diff --git a/drivers/usb/net/kaweth.c b/drivers/usb/net/kaweth.c
index de95268ae4b8..a0cc05d21a6a 100644
--- a/drivers/usb/net/kaweth.c
+++ b/drivers/usb/net/kaweth.c
@@ -636,8 +636,6 @@ static void kaweth_usb_receive(struct urb *urb)
636 636
637 skb_reserve(skb, 2); /* Align IP on 16 byte boundaries */ 637 skb_reserve(skb, 2); /* Align IP on 16 byte boundaries */
638 638
639 skb->dev = net;
640
641 eth_copy_and_sum(skb, kaweth->rx_buf + 2, pkt_len, 0); 639 eth_copy_and_sum(skb, kaweth->rx_buf + 2, pkt_len, 0);
642 640
643 skb_put(skb, pkt_len); 641 skb_put(skb, pkt_len);
diff --git a/drivers/usb/net/net1080.c b/drivers/usb/net/net1080.c
index ccebfdef4751..19bf8dae70c9 100644
--- a/drivers/usb/net/net1080.c
+++ b/drivers/usb/net/net1080.c
@@ -520,7 +520,7 @@ net1080_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags)
520 skb->data = memmove(skb->head 520 skb->data = memmove(skb->head
521 + sizeof (struct nc_header), 521 + sizeof (struct nc_header),
522 skb->data, skb->len); 522 skb->data, skb->len);
523 skb->tail = skb->data + len; 523 skb_set_tail_pointer(skb, len);
524 goto encapsulate; 524 goto encapsulate;
525 } 525 }
526 } 526 }
diff --git a/drivers/usb/net/pegasus.c b/drivers/usb/net/pegasus.c
index 6d12961cf9f9..1ad4ee54b186 100644
--- a/drivers/usb/net/pegasus.c
+++ b/drivers/usb/net/pegasus.c
@@ -575,7 +575,6 @@ static void fill_skb_pool(pegasus_t * pegasus)
575 */ 575 */
576 if (pegasus->rx_pool[i] == NULL) 576 if (pegasus->rx_pool[i] == NULL)
577 return; 577 return;
578 pegasus->rx_pool[i]->dev = pegasus->net;
579 skb_reserve(pegasus->rx_pool[i], 2); 578 skb_reserve(pegasus->rx_pool[i], 2);
580 } 579 }
581} 580}
@@ -890,7 +889,7 @@ static int pegasus_start_xmit(struct sk_buff *skb, struct net_device *net)
890 netif_stop_queue(net); 889 netif_stop_queue(net);
891 890
892 ((__le16 *) pegasus->tx_buff)[0] = cpu_to_le16(l16); 891 ((__le16 *) pegasus->tx_buff)[0] = cpu_to_le16(l16);
893 memcpy(pegasus->tx_buff + 2, skb->data, skb->len); 892 skb_copy_from_linear_data(skb, pegasus->tx_buff + 2, skb->len);
894 usb_fill_bulk_urb(pegasus->tx_urb, pegasus->usb, 893 usb_fill_bulk_urb(pegasus->tx_urb, pegasus->usb,
895 usb_sndbulkpipe(pegasus->usb, 2), 894 usb_sndbulkpipe(pegasus->usb, 2),
896 pegasus->tx_buff, count, 895 pegasus->tx_buff, count,
@@ -1415,8 +1414,10 @@ static void pegasus_disconnect(struct usb_interface *intf)
1415 unlink_all_urbs(pegasus); 1414 unlink_all_urbs(pegasus);
1416 free_all_urbs(pegasus); 1415 free_all_urbs(pegasus);
1417 free_skb_pool(pegasus); 1416 free_skb_pool(pegasus);
1418 if (pegasus->rx_skb) 1417 if (pegasus->rx_skb != NULL) {
1419 dev_kfree_skb(pegasus->rx_skb); 1418 dev_kfree_skb(pegasus->rx_skb);
1419 pegasus->rx_skb = NULL;
1420 }
1420 free_netdev(pegasus->net); 1421 free_netdev(pegasus->net);
1421} 1422}
1422 1423
diff --git a/drivers/usb/net/rndis_host.c b/drivers/usb/net/rndis_host.c
index 39a21c74fdf4..1d36772ba6e1 100644
--- a/drivers/usb/net/rndis_host.c
+++ b/drivers/usb/net/rndis_host.c
@@ -588,7 +588,7 @@ rndis_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags)
588 if (likely((sizeof *hdr) <= room)) { 588 if (likely((sizeof *hdr) <= room)) {
589 skb->data = memmove(skb->head + sizeof *hdr, 589 skb->data = memmove(skb->head + sizeof *hdr,
590 skb->data, len); 590 skb->data, len);
591 skb->tail = skb->data + len; 591 skb_set_tail_pointer(skb, len);
592 goto fill; 592 goto fill;
593 } 593 }
594 } 594 }
diff --git a/drivers/usb/net/rtl8150.c b/drivers/usb/net/rtl8150.c
index ea153dc9b0ac..fa598f0340cf 100644
--- a/drivers/usb/net/rtl8150.c
+++ b/drivers/usb/net/rtl8150.c
@@ -646,7 +646,6 @@ static void fill_skb_pool(rtl8150_t *dev)
646 if (!skb) { 646 if (!skb) {
647 return; 647 return;
648 } 648 }
649 skb->dev = dev->netdev;
650 skb_reserve(skb, 2); 649 skb_reserve(skb, 2);
651 dev->rx_skb_pool[i] = skb; 650 dev->rx_skb_pool[i] = skb;
652 } 651 }
diff --git a/drivers/usb/net/usbnet.c b/drivers/usb/net/usbnet.c
index de69b183bd2f..0c5465a7909b 100644
--- a/drivers/usb/net/usbnet.c
+++ b/drivers/usb/net/usbnet.c
@@ -203,7 +203,6 @@ void usbnet_skb_return (struct usbnet *dev, struct sk_buff *skb)
203{ 203{
204 int status; 204 int status;
205 205
206 skb->dev = dev->net;
207 skb->protocol = eth_type_trans (skb, dev->net); 206 skb->protocol = eth_type_trans (skb, dev->net);
208 dev->stats.rx_packets++; 207 dev->stats.rx_packets++;
209 dev->stats.rx_bytes += skb->len; 208 dev->stats.rx_bytes += skb->len;
diff --git a/fs/Kconfig b/fs/Kconfig
index 3c4886b849f5..e33c08924572 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -2019,7 +2019,7 @@ config CODA_FS_OLD_API
2019config AFS_FS 2019config AFS_FS
2020 tristate "Andrew File System support (AFS) (EXPERIMENTAL)" 2020 tristate "Andrew File System support (AFS) (EXPERIMENTAL)"
2021 depends on INET && EXPERIMENTAL 2021 depends on INET && EXPERIMENTAL
2022 select RXRPC 2022 select AF_RXRPC
2023 help 2023 help
2024 If you say Y here, you will get an experimental Andrew File System 2024 If you say Y here, you will get an experimental Andrew File System
2025 driver. It currently only supports unsecured read-only AFS access. 2025 driver. It currently only supports unsecured read-only AFS access.
@@ -2028,8 +2028,15 @@ config AFS_FS
2028 2028
2029 If unsure, say N. 2029 If unsure, say N.
2030 2030
2031config RXRPC 2031config AFS_DEBUG
2032 tristate 2032 bool "AFS dynamic debugging"
2033 depends on AFS_FS
2034 help
2035 Say Y here to make runtime controllable debugging messages appear.
2036
2037 See <file:Documentation/filesystems/afs.txt> for more information.
2038
2039 If unsure, say N.
2033 2040
2034config 9P_FS 2041config 9P_FS
2035 tristate "Plan 9 Resource Sharing Support (9P2000) (Experimental)" 2042 tristate "Plan 9 Resource Sharing Support (9P2000) (Experimental)"
diff --git a/fs/afs/Makefile b/fs/afs/Makefile
index 4029c9da4b86..01545eb1d872 100644
--- a/fs/afs/Makefile
+++ b/fs/afs/Makefile
@@ -2,8 +2,6 @@
2# Makefile for Red Hat Linux AFS client. 2# Makefile for Red Hat Linux AFS client.
3# 3#
4 4
5#CFLAGS += -finstrument-functions
6
7kafs-objs := \ 5kafs-objs := \
8 callback.o \ 6 callback.o \
9 cell.o \ 7 cell.o \
@@ -12,14 +10,15 @@ kafs-objs := \
12 file.o \ 10 file.o \
13 fsclient.o \ 11 fsclient.o \
14 inode.o \ 12 inode.o \
15 kafsasyncd.o \
16 kafstimod.o \
17 main.o \ 13 main.o \
18 misc.o \ 14 misc.o \
19 mntpt.o \ 15 mntpt.o \
20 proc.o \ 16 proc.o \
17 rxrpc.o \
18 security.o \
21 server.o \ 19 server.o \
22 super.o \ 20 super.o \
21 use-rtnetlink.o \
23 vlclient.o \ 22 vlclient.o \
24 vlocation.o \ 23 vlocation.o \
25 vnode.o \ 24 vnode.o \
diff --git a/fs/afs/afs.h b/fs/afs/afs.h
new file mode 100644
index 000000000000..52d0752265b8
--- /dev/null
+++ b/fs/afs/afs.h
@@ -0,0 +1,146 @@
1/* AFS common types
2 *
3 * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#ifndef AFS_H
13#define AFS_H
14
15#include <linux/in.h>
16
17#define AFS_MAXCELLNAME 64 /* maximum length of a cell name */
18#define AFS_MAXVOLNAME 64 /* maximum length of a volume name */
19
20typedef unsigned afs_volid_t;
21typedef unsigned afs_vnodeid_t;
22typedef unsigned long long afs_dataversion_t;
23
24typedef enum {
25 AFSVL_RWVOL, /* read/write volume */
26 AFSVL_ROVOL, /* read-only volume */
27 AFSVL_BACKVOL, /* backup volume */
28} __attribute__((packed)) afs_voltype_t;
29
30typedef enum {
31 AFS_FTYPE_INVALID = 0,
32 AFS_FTYPE_FILE = 1,
33 AFS_FTYPE_DIR = 2,
34 AFS_FTYPE_SYMLINK = 3,
35} afs_file_type_t;
36
37/*
38 * AFS file identifier
39 */
40struct afs_fid {
41 afs_volid_t vid; /* volume ID */
42 afs_vnodeid_t vnode; /* file index within volume */
43 unsigned unique; /* unique ID number (file index version) */
44};
45
46/*
47 * AFS callback notification
48 */
49typedef enum {
50 AFSCM_CB_UNTYPED = 0, /* no type set on CB break */
51 AFSCM_CB_EXCLUSIVE = 1, /* CB exclusive to CM [not implemented] */
52 AFSCM_CB_SHARED = 2, /* CB shared by other CM's */
53 AFSCM_CB_DROPPED = 3, /* CB promise cancelled by file server */
54} afs_callback_type_t;
55
56struct afs_callback {
57 struct afs_fid fid; /* file identifier */
58 unsigned version; /* callback version */
59 unsigned expiry; /* time at which expires */
60 afs_callback_type_t type; /* type of callback */
61};
62
63#define AFSCBMAX 50 /* maximum callbacks transferred per bulk op */
64
65/*
66 * AFS volume information
67 */
68struct afs_volume_info {
69 afs_volid_t vid; /* volume ID */
70 afs_voltype_t type; /* type of this volume */
71 afs_volid_t type_vids[5]; /* volume ID's for possible types for this vol */
72
73 /* list of fileservers serving this volume */
74 size_t nservers; /* number of entries used in servers[] */
75 struct {
76 struct in_addr addr; /* fileserver address */
77 } servers[8];
78};
79
80/*
81 * AFS security ACE access mask
82 */
83typedef u32 afs_access_t;
84#define AFS_ACE_READ 0x00000001U /* - permission to read a file/dir */
85#define AFS_ACE_WRITE 0x00000002U /* - permission to write/chmod a file */
86#define AFS_ACE_INSERT 0x00000004U /* - permission to create dirent in a dir */
87#define AFS_ACE_LOOKUP 0x00000008U /* - permission to lookup a file/dir in a dir */
88#define AFS_ACE_DELETE 0x00000010U /* - permission to delete a dirent from a dir */
89#define AFS_ACE_LOCK 0x00000020U /* - permission to lock a file */
90#define AFS_ACE_ADMINISTER 0x00000040U /* - permission to change ACL */
91#define AFS_ACE_USER_A 0x01000000U /* - 'A' user-defined permission */
92#define AFS_ACE_USER_B 0x02000000U /* - 'B' user-defined permission */
93#define AFS_ACE_USER_C 0x04000000U /* - 'C' user-defined permission */
94#define AFS_ACE_USER_D 0x08000000U /* - 'D' user-defined permission */
95#define AFS_ACE_USER_E 0x10000000U /* - 'E' user-defined permission */
96#define AFS_ACE_USER_F 0x20000000U /* - 'F' user-defined permission */
97#define AFS_ACE_USER_G 0x40000000U /* - 'G' user-defined permission */
98#define AFS_ACE_USER_H 0x80000000U /* - 'H' user-defined permission */
99
100/*
101 * AFS file status information
102 */
103struct afs_file_status {
104 unsigned if_version; /* interface version */
105#define AFS_FSTATUS_VERSION 1
106
107 afs_file_type_t type; /* file type */
108 unsigned nlink; /* link count */
109 u64 size; /* file size */
110 afs_dataversion_t data_version; /* current data version */
111 u32 author; /* author ID */
112 u32 owner; /* owner ID */
113 u32 group; /* group ID */
114 afs_access_t caller_access; /* access rights for authenticated caller */
115 afs_access_t anon_access; /* access rights for unauthenticated caller */
116 umode_t mode; /* UNIX mode */
117 struct afs_fid parent; /* parent dir ID for non-dirs only */
118 time_t mtime_client; /* last time client changed data */
119 time_t mtime_server; /* last time server changed data */
120};
121
122/*
123 * AFS file status change request
124 */
125struct afs_store_status {
126 u32 mask; /* which bits of the struct are set */
127 u32 mtime_client; /* last time client changed data */
128 u32 owner; /* owner ID */
129 u32 group; /* group ID */
130 umode_t mode; /* UNIX mode */
131};
132
133#define AFS_SET_MTIME 0x01 /* set the mtime */
134#define AFS_SET_OWNER 0x02 /* set the owner ID */
135#define AFS_SET_GROUP 0x04 /* set the group ID (unsupported?) */
136#define AFS_SET_MODE 0x08 /* set the UNIX mode */
137#define AFS_SET_SEG_SIZE 0x10 /* set the segment size (unsupported) */
138
139/*
140 * AFS volume synchronisation information
141 */
142struct afs_volsync {
143 time_t creation; /* volume creation time */
144};
145
146#endif /* AFS_H */
diff --git a/fs/afs/afs_cm.h b/fs/afs/afs_cm.h
new file mode 100644
index 000000000000..7b4d4fab4c80
--- /dev/null
+++ b/fs/afs/afs_cm.h
@@ -0,0 +1,32 @@
1/* AFS Cache Manager definitions
2 *
3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#ifndef AFS_CM_H
13#define AFS_CM_H
14
15#define AFS_CM_PORT 7001 /* AFS file server port */
16#define CM_SERVICE 1 /* AFS File Service ID */
17
18enum AFS_CM_Operations {
19 CBCallBack = 204, /* break callback promises */
20 CBInitCallBackState = 205, /* initialise callback state */
21 CBProbe = 206, /* probe client */
22 CBGetLock = 207, /* get contents of CM lock table */
23 CBGetCE = 208, /* get cache file description */
24 CBGetXStatsVersion = 209, /* get version of extended statistics */
25 CBGetXStats = 210, /* get contents of extended statistics data */
26 CBInitCallBackState3 = 213, /* initialise callback state, version 3 */
27 CBGetCapabilities = 65538, /* get client capabilities */
28};
29
30#define AFS_CAP_ERROR_TRANSLATION 0x1
31
32#endif /* AFS_FS_H */
diff --git a/fs/afs/afs_fs.h b/fs/afs/afs_fs.h
new file mode 100644
index 000000000000..89e0d1650a72
--- /dev/null
+++ b/fs/afs/afs_fs.h
@@ -0,0 +1,48 @@
1/* AFS File Service definitions
2 *
3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#ifndef AFS_FS_H
13#define AFS_FS_H
14
15#define AFS_FS_PORT 7000 /* AFS file server port */
16#define FS_SERVICE 1 /* AFS File Service ID */
17
18enum AFS_FS_Operations {
19 FSFETCHDATA = 130, /* AFS Fetch file data */
20 FSFETCHSTATUS = 132, /* AFS Fetch file status */
21 FSREMOVEFILE = 136, /* AFS Remove a file */
22 FSCREATEFILE = 137, /* AFS Create a file */
23 FSRENAME = 138, /* AFS Rename or move a file or directory */
24 FSSYMLINK = 139, /* AFS Create a symbolic link */
25 FSLINK = 140, /* AFS Create a hard link */
26 FSMAKEDIR = 141, /* AFS Create a directory */
27 FSREMOVEDIR = 142, /* AFS Remove a directory */
28 FSGIVEUPCALLBACKS = 147, /* AFS Discard callback promises */
29 FSGETVOLUMEINFO = 148, /* AFS Get root volume information */
30 FSGETROOTVOLUME = 151, /* AFS Get root volume name */
31 FSLOOKUP = 161, /* AFS lookup file in directory */
32};
33
34enum AFS_FS_Errors {
35 VSALVAGE = 101, /* volume needs salvaging */
36 VNOVNODE = 102, /* no such file/dir (vnode) */
37 VNOVOL = 103, /* no such volume or volume unavailable */
38 VVOLEXISTS = 104, /* volume name already exists */
39 VNOSERVICE = 105, /* volume not currently in service */
40 VOFFLINE = 106, /* volume is currently offline (more info available [VVL-spec]) */
41 VONLINE = 107, /* volume is already online */
42 VDISKFULL = 108, /* disk partition is full */
43 VOVERQUOTA = 109, /* volume's maximum quota exceeded */
44 VBUSY = 110, /* volume is temporarily unavailable */
45 VMOVED = 111, /* volume moved to new server - ask this FS where */
46};
47
48#endif /* AFS_FS_H */
diff --git a/fs/afs/vlclient.h b/fs/afs/afs_vl.h
index e3d601179c46..8bbefe009ed4 100644
--- a/fs/afs/vlclient.h
+++ b/fs/afs/afs_vl.h
@@ -1,6 +1,6 @@
1/* vlclient.h: Volume Location Service client interface 1/* AFS Volume Location Service client interface
2 * 2 *
3 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved. 3 * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com) 4 * Written by David Howells (dhowells@redhat.com)
5 * 5 *
6 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
@@ -9,10 +9,19 @@
9 * 2 of the License, or (at your option) any later version. 9 * 2 of the License, or (at your option) any later version.
10 */ 10 */
11 11
12#ifndef _LINUX_AFS_VLCLIENT_H 12#ifndef AFS_VL_H
13#define _LINUX_AFS_VLCLIENT_H 13#define AFS_VL_H
14 14
15#include "types.h" 15#include "afs.h"
16
17#define AFS_VL_PORT 7003 /* volume location service port */
18#define VL_SERVICE 52 /* RxRPC service ID for the Volume Location service */
19
20enum AFSVL_Operations {
21 VLGETENTRYBYID = 503, /* AFS Get Cache Entry By ID operation ID */
22 VLGETENTRYBYNAME = 504, /* AFS Get Cache Entry By Name operation ID */
23 VLPROBE = 514, /* AFS Probe Volume Location Service operation ID */
24};
16 25
17enum AFSVL_Errors { 26enum AFSVL_Errors {
18 AFSVL_IDEXIST = 363520, /* Volume Id entry exists in vl database */ 27 AFSVL_IDEXIST = 363520, /* Volume Id entry exists in vl database */
@@ -40,14 +49,16 @@ enum AFSVL_Errors {
40 AFSVL_BADVOLOPER = 363542, /* Bad volume operation code */ 49 AFSVL_BADVOLOPER = 363542, /* Bad volume operation code */
41 AFSVL_BADRELLOCKTYPE = 363543, /* Bad release lock type */ 50 AFSVL_BADRELLOCKTYPE = 363543, /* Bad release lock type */
42 AFSVL_RERELEASE = 363544, /* Status report: last release was aborted */ 51 AFSVL_RERELEASE = 363544, /* Status report: last release was aborted */
43 AFSVL_BADSERVERFLAG = 363545, /* Invalid replication site server °ag */ 52 AFSVL_BADSERVERFLAG = 363545, /* Invalid replication site server °ag */
44 AFSVL_PERM = 363546, /* No permission access */ 53 AFSVL_PERM = 363546, /* No permission access */
45 AFSVL_NOMEM = 363547, /* malloc/realloc failed to alloc enough memory */ 54 AFSVL_NOMEM = 363547, /* malloc/realloc failed to alloc enough memory */
46}; 55};
47 56
48/* maps to "struct vldbentry" in vvl-spec.pdf */ 57/*
58 * maps to "struct vldbentry" in vvl-spec.pdf
59 */
49struct afs_vldbentry { 60struct afs_vldbentry {
50 char name[65]; /* name of volume (including NUL char) */ 61 char name[65]; /* name of volume (with NUL char) */
51 afs_voltype_t type; /* volume type */ 62 afs_voltype_t type; /* volume type */
52 unsigned num_servers; /* num servers that hold instances of this vol */ 63 unsigned num_servers; /* num servers that hold instances of this vol */
53 unsigned clone_id; /* cloning ID */ 64 unsigned clone_id; /* cloning ID */
@@ -68,26 +79,6 @@ struct afs_vldbentry {
68#define AFS_VLSF_RWVOL 0x0004 /* this server holds a R/W instance of the volume */ 79#define AFS_VLSF_RWVOL 0x0004 /* this server holds a R/W instance of the volume */
69#define AFS_VLSF_BACKVOL 0x0008 /* this server holds a backup instance of the volume */ 80#define AFS_VLSF_BACKVOL 0x0008 /* this server holds a backup instance of the volume */
70 } servers[8]; 81 } servers[8];
71
72}; 82};
73 83
74/* look up a volume location database entry by name */ 84#endif /* AFS_VL_H */
75extern int afs_rxvl_get_entry_by_name(struct afs_server *server,
76 const char *volname,
77 unsigned volnamesz,
78 struct afs_cache_vlocation *entry);
79
80/* look up a volume location database entry by ID */
81extern int afs_rxvl_get_entry_by_id(struct afs_server *server,
82 afs_volid_t volid,
83 afs_voltype_t voltype,
84 struct afs_cache_vlocation *entry);
85
86extern int afs_rxvl_get_entry_by_id_async(struct afs_async_op *op,
87 afs_volid_t volid,
88 afs_voltype_t voltype);
89
90extern int afs_rxvl_get_entry_by_id_async2(struct afs_async_op *op,
91 struct afs_cache_vlocation *entry);
92
93#endif /* _LINUX_AFS_VLCLIENT_H */
diff --git a/fs/afs/cache.c b/fs/afs/cache.c
new file mode 100644
index 000000000000..de0d7de69edc
--- /dev/null
+++ b/fs/afs/cache.c
@@ -0,0 +1,256 @@
1/* AFS caching stuff
2 *
3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#ifdef AFS_CACHING_SUPPORT
13static cachefs_match_val_t afs_cell_cache_match(void *target,
14 const void *entry);
15static void afs_cell_cache_update(void *source, void *entry);
16
17struct cachefs_index_def afs_cache_cell_index_def = {
18 .name = "cell_ix",
19 .data_size = sizeof(struct afs_cache_cell),
20 .keys[0] = { CACHEFS_INDEX_KEYS_ASCIIZ, 64 },
21 .match = afs_cell_cache_match,
22 .update = afs_cell_cache_update,
23};
24#endif
25
26/*
27 * match a cell record obtained from the cache
28 */
29#ifdef AFS_CACHING_SUPPORT
30static cachefs_match_val_t afs_cell_cache_match(void *target,
31 const void *entry)
32{
33 const struct afs_cache_cell *ccell = entry;
34 struct afs_cell *cell = target;
35
36 _enter("{%s},{%s}", ccell->name, cell->name);
37
38 if (strncmp(ccell->name, cell->name, sizeof(ccell->name)) == 0) {
39 _leave(" = SUCCESS");
40 return CACHEFS_MATCH_SUCCESS;
41 }
42
43 _leave(" = FAILED");
44 return CACHEFS_MATCH_FAILED;
45}
46#endif
47
48/*
49 * update a cell record in the cache
50 */
51#ifdef AFS_CACHING_SUPPORT
52static void afs_cell_cache_update(void *source, void *entry)
53{
54 struct afs_cache_cell *ccell = entry;
55 struct afs_cell *cell = source;
56
57 _enter("%p,%p", source, entry);
58
59 strncpy(ccell->name, cell->name, sizeof(ccell->name));
60
61 memcpy(ccell->vl_servers,
62 cell->vl_addrs,
63 min(sizeof(ccell->vl_servers), sizeof(cell->vl_addrs)));
64
65}
66#endif
67
68#ifdef AFS_CACHING_SUPPORT
69static cachefs_match_val_t afs_vlocation_cache_match(void *target,
70 const void *entry);
71static void afs_vlocation_cache_update(void *source, void *entry);
72
73struct cachefs_index_def afs_vlocation_cache_index_def = {
74 .name = "vldb",
75 .data_size = sizeof(struct afs_cache_vlocation),
76 .keys[0] = { CACHEFS_INDEX_KEYS_ASCIIZ, 64 },
77 .match = afs_vlocation_cache_match,
78 .update = afs_vlocation_cache_update,
79};
80#endif
81
82/*
83 * match a VLDB record stored in the cache
84 * - may also load target from entry
85 */
86#ifdef AFS_CACHING_SUPPORT
87static cachefs_match_val_t afs_vlocation_cache_match(void *target,
88 const void *entry)
89{
90 const struct afs_cache_vlocation *vldb = entry;
91 struct afs_vlocation *vlocation = target;
92
93 _enter("{%s},{%s}", vlocation->vldb.name, vldb->name);
94
95 if (strncmp(vlocation->vldb.name, vldb->name, sizeof(vldb->name)) == 0
96 ) {
97 if (!vlocation->valid ||
98 vlocation->vldb.rtime == vldb->rtime
99 ) {
100 vlocation->vldb = *vldb;
101 vlocation->valid = 1;
102 _leave(" = SUCCESS [c->m]");
103 return CACHEFS_MATCH_SUCCESS;
104 } else if (memcmp(&vlocation->vldb, vldb, sizeof(*vldb)) != 0) {
105 /* delete if VIDs for this name differ */
106 if (memcmp(&vlocation->vldb.vid,
107 &vldb->vid,
108 sizeof(vldb->vid)) != 0) {
109 _leave(" = DELETE");
110 return CACHEFS_MATCH_SUCCESS_DELETE;
111 }
112
113 _leave(" = UPDATE");
114 return CACHEFS_MATCH_SUCCESS_UPDATE;
115 } else {
116 _leave(" = SUCCESS");
117 return CACHEFS_MATCH_SUCCESS;
118 }
119 }
120
121 _leave(" = FAILED");
122 return CACHEFS_MATCH_FAILED;
123}
124#endif
125
126/*
127 * update a VLDB record stored in the cache
128 */
129#ifdef AFS_CACHING_SUPPORT
130static void afs_vlocation_cache_update(void *source, void *entry)
131{
132 struct afs_cache_vlocation *vldb = entry;
133 struct afs_vlocation *vlocation = source;
134
135 _enter("");
136
137 *vldb = vlocation->vldb;
138}
139#endif
140
141#ifdef AFS_CACHING_SUPPORT
142static cachefs_match_val_t afs_volume_cache_match(void *target,
143 const void *entry);
144static void afs_volume_cache_update(void *source, void *entry);
145
146struct cachefs_index_def afs_volume_cache_index_def = {
147 .name = "volume",
148 .data_size = sizeof(struct afs_cache_vhash),
149 .keys[0] = { CACHEFS_INDEX_KEYS_BIN, 1 },
150 .keys[1] = { CACHEFS_INDEX_KEYS_BIN, 1 },
151 .match = afs_volume_cache_match,
152 .update = afs_volume_cache_update,
153};
154#endif
155
156/*
157 * match a volume hash record stored in the cache
158 */
159#ifdef AFS_CACHING_SUPPORT
160static cachefs_match_val_t afs_volume_cache_match(void *target,
161 const void *entry)
162{
163 const struct afs_cache_vhash *vhash = entry;
164 struct afs_volume *volume = target;
165
166 _enter("{%u},{%u}", volume->type, vhash->vtype);
167
168 if (volume->type == vhash->vtype) {
169 _leave(" = SUCCESS");
170 return CACHEFS_MATCH_SUCCESS;
171 }
172
173 _leave(" = FAILED");
174 return CACHEFS_MATCH_FAILED;
175}
176#endif
177
178/*
179 * update a volume hash record stored in the cache
180 */
181#ifdef AFS_CACHING_SUPPORT
182static void afs_volume_cache_update(void *source, void *entry)
183{
184 struct afs_cache_vhash *vhash = entry;
185 struct afs_volume *volume = source;
186
187 _enter("");
188
189 vhash->vtype = volume->type;
190}
191#endif
192
193#ifdef AFS_CACHING_SUPPORT
194static cachefs_match_val_t afs_vnode_cache_match(void *target,
195 const void *entry);
196static void afs_vnode_cache_update(void *source, void *entry);
197
198struct cachefs_index_def afs_vnode_cache_index_def = {
199 .name = "vnode",
200 .data_size = sizeof(struct afs_cache_vnode),
201 .keys[0] = { CACHEFS_INDEX_KEYS_BIN, 4 },
202 .match = afs_vnode_cache_match,
203 .update = afs_vnode_cache_update,
204};
205#endif
206
207/*
208 * match a vnode record stored in the cache
209 */
210#ifdef AFS_CACHING_SUPPORT
211static cachefs_match_val_t afs_vnode_cache_match(void *target,
212 const void *entry)
213{
214 const struct afs_cache_vnode *cvnode = entry;
215 struct afs_vnode *vnode = target;
216
217 _enter("{%x,%x,%Lx},{%x,%x,%Lx}",
218 vnode->fid.vnode,
219 vnode->fid.unique,
220 vnode->status.version,
221 cvnode->vnode_id,
222 cvnode->vnode_unique,
223 cvnode->data_version);
224
225 if (vnode->fid.vnode != cvnode->vnode_id) {
226 _leave(" = FAILED");
227 return CACHEFS_MATCH_FAILED;
228 }
229
230 if (vnode->fid.unique != cvnode->vnode_unique ||
231 vnode->status.version != cvnode->data_version) {
232 _leave(" = DELETE");
233 return CACHEFS_MATCH_SUCCESS_DELETE;
234 }
235
236 _leave(" = SUCCESS");
237 return CACHEFS_MATCH_SUCCESS;
238}
239#endif
240
241/*
242 * update a vnode record stored in the cache
243 */
244#ifdef AFS_CACHING_SUPPORT
245static void afs_vnode_cache_update(void *source, void *entry)
246{
247 struct afs_cache_vnode *cvnode = entry;
248 struct afs_vnode *vnode = source;
249
250 _enter("");
251
252 cvnode->vnode_id = vnode->fid.vnode;
253 cvnode->vnode_unique = vnode->fid.unique;
254 cvnode->data_version = vnode->status.version;
255}
256#endif
diff --git a/fs/afs/cache.h b/fs/afs/cache.h
index 9eb7722b34d5..36a3642cf90e 100644
--- a/fs/afs/cache.h
+++ b/fs/afs/cache.h
@@ -1,4 +1,4 @@
1/* cache.h: AFS local cache management interface 1/* AFS local cache management interface
2 * 2 *
3 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved. 3 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com) 4 * Written by David Howells (dhowells@redhat.com)
@@ -9,8 +9,8 @@
9 * 2 of the License, or (at your option) any later version. 9 * 2 of the License, or (at your option) any later version.
10 */ 10 */
11 11
12#ifndef _LINUX_AFS_CACHE_H 12#ifndef AFS_CACHE_H
13#define _LINUX_AFS_CACHE_H 13#define AFS_CACHE_H
14 14
15#undef AFS_CACHING_SUPPORT 15#undef AFS_CACHING_SUPPORT
16 16
@@ -20,8 +20,4 @@
20#endif 20#endif
21#include "types.h" 21#include "types.h"
22 22
23#ifdef __KERNEL__ 23#endif /* AFS_CACHE_H */
24
25#endif /* __KERNEL__ */
26
27#endif /* _LINUX_AFS_CACHE_H */
diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index 9cb206e9d4be..639399f0ab6f 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2002 Red Hat, Inc. All rights reserved. 2 * Copyright (c) 2002, 2007 Red Hat, Inc. All rights reserved.
3 * 3 *
4 * This software may be freely redistributed under the terms of the 4 * This software may be freely redistributed under the terms of the
5 * GNU General Public License. 5 * GNU General Public License.
@@ -16,85 +16,187 @@
16#include <linux/kernel.h> 16#include <linux/kernel.h>
17#include <linux/module.h> 17#include <linux/module.h>
18#include <linux/init.h> 18#include <linux/init.h>
19#include "server.h" 19#include <linux/circ_buf.h>
20#include "vnode.h"
21#include "internal.h" 20#include "internal.h"
22#include "cmservice.h"
23 21
24/*****************************************************************************/ 22unsigned afs_vnode_update_timeout = 10;
23
24#define afs_breakring_space(server) \
25 CIRC_SPACE((server)->cb_break_head, (server)->cb_break_tail, \
26 ARRAY_SIZE((server)->cb_break))
27
28//static void afs_callback_updater(struct work_struct *);
29
30static struct workqueue_struct *afs_callback_update_worker;
31
25/* 32/*
26 * allow the fileserver to request callback state (re-)initialisation 33 * allow the fileserver to request callback state (re-)initialisation
27 */ 34 */
28int SRXAFSCM_InitCallBackState(struct afs_server *server) 35void afs_init_callback_state(struct afs_server *server)
29{ 36{
30 struct list_head callbacks; 37 struct afs_vnode *vnode;
31 38
32 _enter("%p", server); 39 _enter("{%p}", server);
33 40
34 INIT_LIST_HEAD(&callbacks);
35
36 /* transfer the callback list from the server to a temp holding area */
37 spin_lock(&server->cb_lock); 41 spin_lock(&server->cb_lock);
38 42
39 list_add(&callbacks, &server->cb_promises); 43 /* kill all the promises on record from this server */
40 list_del_init(&server->cb_promises); 44 while (!RB_EMPTY_ROOT(&server->cb_promises)) {
45 vnode = rb_entry(server->cb_promises.rb_node,
46 struct afs_vnode, cb_promise);
47 _debug("UNPROMISE { vid=%x vn=%u uq=%u}",
48 vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique);
49 rb_erase(&vnode->cb_promise, &server->cb_promises);
50 vnode->cb_promised = false;
51 }
41 52
42 /* munch our way through the list, grabbing the inode, dropping all the 53 spin_unlock(&server->cb_lock);
43 * locks and regetting them in the right order 54 _leave("");
44 */ 55}
45 while (!list_empty(&callbacks)) {
46 struct afs_vnode *vnode;
47 struct inode *inode;
48 56
49 vnode = list_entry(callbacks.next, struct afs_vnode, cb_link); 57/*
50 list_del_init(&vnode->cb_link); 58 * handle the data invalidation side of a callback being broken
59 */
60void afs_broken_callback_work(struct work_struct *work)
61{
62 struct afs_vnode *vnode =
63 container_of(work, struct afs_vnode, cb_broken_work);
51 64
52 /* try and grab the inode - may fail */ 65 _enter("");
53 inode = igrab(AFS_VNODE_TO_I(vnode));
54 if (inode) {
55 int release = 0;
56 66
57 spin_unlock(&server->cb_lock); 67 if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
58 spin_lock(&vnode->lock); 68 return;
59 69
60 if (vnode->cb_server == server) { 70 /* we're only interested in dealing with a broken callback on *this*
61 vnode->cb_server = NULL; 71 * vnode and only if no-one else has dealt with it yet */
62 afs_kafstimod_del_timer(&vnode->cb_timeout); 72 if (!mutex_trylock(&vnode->validate_lock))
63 spin_lock(&afs_cb_hash_lock); 73 return; /* someone else is dealing with it */
64 list_del_init(&vnode->cb_hash_link);
65 spin_unlock(&afs_cb_hash_lock);
66 release = 1;
67 }
68 74
69 spin_unlock(&vnode->lock); 75 if (test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags)) {
76 if (S_ISDIR(vnode->vfs_inode.i_mode))
77 afs_clear_permits(vnode);
70 78
71 iput(inode); 79 if (afs_vnode_fetch_status(vnode, NULL, NULL) < 0)
72 afs_put_server(server); 80 goto out;
73 81
74 spin_lock(&server->cb_lock); 82 if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
83 goto out;
84
85 /* if the vnode's data version number changed then its contents
86 * are different */
87 if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) {
88 _debug("zap data {%x:%u}",
89 vnode->fid.vid, vnode->fid.vnode);
90 invalidate_remote_inode(&vnode->vfs_inode);
75 } 91 }
76 } 92 }
77 93
78 spin_unlock(&server->cb_lock); 94out:
95 mutex_unlock(&vnode->validate_lock);
79 96
80 _leave(" = 0"); 97 /* avoid the potential race whereby the mutex_trylock() in this
81 return 0; 98 * function happens again between the clear_bit() and the
82} /* end SRXAFSCM_InitCallBackState() */ 99 * mutex_unlock() */
100 if (test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags)) {
101 _debug("requeue");
102 queue_work(afs_callback_update_worker, &vnode->cb_broken_work);
103 }
104 _leave("");
105}
106
107/*
108 * actually break a callback
109 */
110static void afs_break_callback(struct afs_server *server,
111 struct afs_vnode *vnode)
112{
113 _enter("");
114
115 set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
116
117 if (vnode->cb_promised) {
118 spin_lock(&vnode->lock);
119
120 _debug("break callback");
121
122 spin_lock(&server->cb_lock);
123 if (vnode->cb_promised) {
124 rb_erase(&vnode->cb_promise, &server->cb_promises);
125 vnode->cb_promised = false;
126 }
127 spin_unlock(&server->cb_lock);
128
129 queue_work(afs_callback_update_worker, &vnode->cb_broken_work);
130 spin_unlock(&vnode->lock);
131 }
132}
133
134/*
135 * allow the fileserver to explicitly break one callback
136 * - happens when
137 * - the backing file is changed
138 * - a lock is released
139 */
140static void afs_break_one_callback(struct afs_server *server,
141 struct afs_fid *fid)
142{
143 struct afs_vnode *vnode;
144 struct rb_node *p;
145
146 _debug("find");
147 spin_lock(&server->fs_lock);
148 p = server->fs_vnodes.rb_node;
149 while (p) {
150 vnode = rb_entry(p, struct afs_vnode, server_rb);
151 if (fid->vid < vnode->fid.vid)
152 p = p->rb_left;
153 else if (fid->vid > vnode->fid.vid)
154 p = p->rb_right;
155 else if (fid->vnode < vnode->fid.vnode)
156 p = p->rb_left;
157 else if (fid->vnode > vnode->fid.vnode)
158 p = p->rb_right;
159 else if (fid->unique < vnode->fid.unique)
160 p = p->rb_left;
161 else if (fid->unique > vnode->fid.unique)
162 p = p->rb_right;
163 else
164 goto found;
165 }
166
167 /* not found so we just ignore it (it may have moved to another
168 * server) */
169not_available:
170 _debug("not avail");
171 spin_unlock(&server->fs_lock);
172 _leave("");
173 return;
174
175found:
176 _debug("found");
177 ASSERTCMP(server, ==, vnode->server);
178
179 if (!igrab(AFS_VNODE_TO_I(vnode)))
180 goto not_available;
181 spin_unlock(&server->fs_lock);
182
183 afs_break_callback(server, vnode);
184 iput(&vnode->vfs_inode);
185 _leave("");
186}
83 187
84/*****************************************************************************/
85/* 188/*
86 * allow the fileserver to break callback promises 189 * allow the fileserver to break callback promises
87 */ 190 */
88int SRXAFSCM_CallBack(struct afs_server *server, size_t count, 191void afs_break_callbacks(struct afs_server *server, size_t count,
89 struct afs_callback callbacks[]) 192 struct afs_callback callbacks[])
90{ 193{
91 _enter("%p,%u,", server, count); 194 _enter("%p,%zu,", server, count);
92 195
93 for (; count > 0; callbacks++, count--) { 196 ASSERT(server != NULL);
94 struct afs_vnode *vnode = NULL; 197 ASSERTCMP(count, <=, AFSCBMAX);
95 struct inode *inode = NULL;
96 int valid = 0;
97 198
199 for (; count > 0; callbacks++, count--) {
98 _debug("- Fid { vl=%08x n=%u u=%u } CB { v=%u x=%u t=%u }", 200 _debug("- Fid { vl=%08x n=%u u=%u } CB { v=%u x=%u t=%u }",
99 callbacks->fid.vid, 201 callbacks->fid.vid,
100 callbacks->fid.vnode, 202 callbacks->fid.vnode,
@@ -103,67 +205,270 @@ int SRXAFSCM_CallBack(struct afs_server *server, size_t count,
103 callbacks->expiry, 205 callbacks->expiry,
104 callbacks->type 206 callbacks->type
105 ); 207 );
208 afs_break_one_callback(server, &callbacks->fid);
209 }
106 210
107 /* find the inode for this fid */ 211 _leave("");
108 spin_lock(&afs_cb_hash_lock); 212 return;
213}
109 214
110 list_for_each_entry(vnode, 215/*
111 &afs_cb_hash(server, &callbacks->fid), 216 * record the callback for breaking
112 cb_hash_link) { 217 * - the caller must hold server->cb_lock
113 if (memcmp(&vnode->fid, &callbacks->fid, 218 */
114 sizeof(struct afs_fid)) != 0) 219static void afs_do_give_up_callback(struct afs_server *server,
115 continue; 220 struct afs_vnode *vnode)
221{
222 struct afs_callback *cb;
116 223
117 /* right vnode, but is it same server? */ 224 _enter("%p,%p", server, vnode);
118 if (vnode->cb_server != server)
119 break; /* no */
120 225
121 /* try and nail the inode down */ 226 cb = &server->cb_break[server->cb_break_head];
122 inode = igrab(AFS_VNODE_TO_I(vnode)); 227 cb->fid = vnode->fid;
123 break; 228 cb->version = vnode->cb_version;
229 cb->expiry = vnode->cb_expiry;
230 cb->type = vnode->cb_type;
231 smp_wmb();
232 server->cb_break_head =
233 (server->cb_break_head + 1) &
234 (ARRAY_SIZE(server->cb_break) - 1);
235
236 /* defer the breaking of callbacks to try and collect as many as
237 * possible to ship in one operation */
238 switch (atomic_inc_return(&server->cb_break_n)) {
239 case 1 ... AFSCBMAX - 1:
240 queue_delayed_work(afs_callback_update_worker,
241 &server->cb_break_work, HZ * 2);
242 break;
243 case AFSCBMAX:
244 afs_flush_callback_breaks(server);
245 break;
246 default:
247 break;
248 }
249
250 ASSERT(server->cb_promises.rb_node != NULL);
251 rb_erase(&vnode->cb_promise, &server->cb_promises);
252 vnode->cb_promised = false;
253 _leave("");
254}
255
256/*
257 * discard the callback on a deleted item
258 */
259void afs_discard_callback_on_delete(struct afs_vnode *vnode)
260{
261 struct afs_server *server = vnode->server;
262
263 _enter("%d", vnode->cb_promised);
264
265 if (!vnode->cb_promised) {
266 _leave(" [not promised]");
267 return;
268 }
269
270 ASSERT(server != NULL);
271
272 spin_lock(&server->cb_lock);
273 if (vnode->cb_promised) {
274 ASSERT(server->cb_promises.rb_node != NULL);
275 rb_erase(&vnode->cb_promise, &server->cb_promises);
276 vnode->cb_promised = false;
277 }
278 spin_unlock(&server->cb_lock);
279 _leave("");
280}
281
282/*
283 * give up the callback registered for a vnode on the file server when the
284 * inode is being cleared
285 */
286void afs_give_up_callback(struct afs_vnode *vnode)
287{
288 struct afs_server *server = vnode->server;
289
290 DECLARE_WAITQUEUE(myself, current);
291
292 _enter("%d", vnode->cb_promised);
293
294 _debug("GIVE UP INODE %p", &vnode->vfs_inode);
295
296 if (!vnode->cb_promised) {
297 _leave(" [not promised]");
298 return;
299 }
300
301 ASSERT(server != NULL);
302
303 spin_lock(&server->cb_lock);
304 if (vnode->cb_promised && afs_breakring_space(server) == 0) {
305 add_wait_queue(&server->cb_break_waitq, &myself);
306 for (;;) {
307 set_current_state(TASK_UNINTERRUPTIBLE);
308 if (!vnode->cb_promised ||
309 afs_breakring_space(server) != 0)
310 break;
311 spin_unlock(&server->cb_lock);
312 schedule();
313 spin_lock(&server->cb_lock);
124 } 314 }
315 remove_wait_queue(&server->cb_break_waitq, &myself);
316 __set_current_state(TASK_RUNNING);
317 }
318
319 /* of course, it's always possible for the server to break this vnode's
320 * callback first... */
321 if (vnode->cb_promised)
322 afs_do_give_up_callback(server, vnode);
323
324 spin_unlock(&server->cb_lock);
325 _leave("");
326}
327
328/*
329 * dispatch a deferred give up callbacks operation
330 */
331void afs_dispatch_give_up_callbacks(struct work_struct *work)
332{
333 struct afs_server *server =
334 container_of(work, struct afs_server, cb_break_work.work);
335
336 _enter("");
337
338 /* tell the fileserver to discard the callback promises it has
339 * - in the event of ENOMEM or some other error, we just forget that we
340 * had callbacks entirely, and the server will call us later to break
341 * them
342 */
343 afs_fs_give_up_callbacks(server, &afs_async_call);
344}
345
346/*
347 * flush the outstanding callback breaks on a server
348 */
349void afs_flush_callback_breaks(struct afs_server *server)
350{
351 cancel_delayed_work(&server->cb_break_work);
352 queue_delayed_work(afs_callback_update_worker,
353 &server->cb_break_work, 0);
354}
125 355
126 spin_unlock(&afs_cb_hash_lock); 356#if 0
127 357/*
128 if (inode) { 358 * update a bunch of callbacks
129 /* we've found the record for this vnode */ 359 */
130 spin_lock(&vnode->lock); 360static void afs_callback_updater(struct work_struct *work)
131 if (vnode->cb_server == server) { 361{
132 /* the callback _is_ on the calling server */ 362 struct afs_server *server;
133 vnode->cb_server = NULL; 363 struct afs_vnode *vnode, *xvnode;
134 valid = 1; 364 time_t now;
135 365 long timeout;
136 afs_kafstimod_del_timer(&vnode->cb_timeout); 366 int ret;
137 vnode->flags |= AFS_VNODE_CHANGED; 367
138 368 server = container_of(work, struct afs_server, updater);
139 spin_lock(&server->cb_lock); 369
140 list_del_init(&vnode->cb_link); 370 _enter("");
141 spin_unlock(&server->cb_lock); 371
142 372 now = get_seconds();
143 spin_lock(&afs_cb_hash_lock); 373
144 list_del_init(&vnode->cb_hash_link); 374 /* find the first vnode to update */
145 spin_unlock(&afs_cb_hash_lock); 375 spin_lock(&server->cb_lock);
146 } 376 for (;;) {
147 spin_unlock(&vnode->lock); 377 if (RB_EMPTY_ROOT(&server->cb_promises)) {
148 378 spin_unlock(&server->cb_lock);
149 if (valid) { 379 _leave(" [nothing]");
150 invalidate_remote_inode(inode); 380 return;
151 afs_put_server(server);
152 }
153 iput(inode);
154 } 381 }
382
383 vnode = rb_entry(rb_first(&server->cb_promises),
384 struct afs_vnode, cb_promise);
385 if (atomic_read(&vnode->usage) > 0)
386 break;
387 rb_erase(&vnode->cb_promise, &server->cb_promises);
388 vnode->cb_promised = false;
155 } 389 }
156 390
157 _leave(" = 0"); 391 timeout = vnode->update_at - now;
158 return 0; 392 if (timeout > 0) {
159} /* end SRXAFSCM_CallBack() */ 393 queue_delayed_work(afs_vnode_update_worker,
394 &afs_vnode_update, timeout * HZ);
395 spin_unlock(&server->cb_lock);
396 _leave(" [nothing]");
397 return;
398 }
399
400 list_del_init(&vnode->update);
401 atomic_inc(&vnode->usage);
402 spin_unlock(&server->cb_lock);
403
404 /* we can now perform the update */
405 _debug("update %s", vnode->vldb.name);
406 vnode->state = AFS_VL_UPDATING;
407 vnode->upd_rej_cnt = 0;
408 vnode->upd_busy_cnt = 0;
409
410 ret = afs_vnode_update_record(vl, &vldb);
411 switch (ret) {
412 case 0:
413 afs_vnode_apply_update(vl, &vldb);
414 vnode->state = AFS_VL_UPDATING;
415 break;
416 case -ENOMEDIUM:
417 vnode->state = AFS_VL_VOLUME_DELETED;
418 break;
419 default:
420 vnode->state = AFS_VL_UNCERTAIN;
421 break;
422 }
423
424 /* and then reschedule */
425 _debug("reschedule");
426 vnode->update_at = get_seconds() + afs_vnode_update_timeout;
427
428 spin_lock(&server->cb_lock);
429
430 if (!list_empty(&server->cb_promises)) {
431 /* next update in 10 minutes, but wait at least 1 second more
432 * than the newest record already queued so that we don't spam
433 * the VL server suddenly with lots of requests
434 */
435 xvnode = list_entry(server->cb_promises.prev,
436 struct afs_vnode, update);
437 if (vnode->update_at <= xvnode->update_at)
438 vnode->update_at = xvnode->update_at + 1;
439 xvnode = list_entry(server->cb_promises.next,
440 struct afs_vnode, update);
441 timeout = xvnode->update_at - now;
442 if (timeout < 0)
443 timeout = 0;
444 } else {
445 timeout = afs_vnode_update_timeout;
446 }
447
448 list_add_tail(&vnode->update, &server->cb_promises);
449
450 _debug("timeout %ld", timeout);
451 queue_delayed_work(afs_vnode_update_worker,
452 &afs_vnode_update, timeout * HZ);
453 spin_unlock(&server->cb_lock);
454 afs_put_vnode(vl);
455}
456#endif
457
458/*
459 * initialise the callback update process
460 */
461int __init afs_callback_update_init(void)
462{
463 afs_callback_update_worker =
464 create_singlethread_workqueue("kafs_callbackd");
465 return afs_callback_update_worker ? 0 : -ENOMEM;
466}
160 467
161/*****************************************************************************/
162/* 468/*
163 * allow the fileserver to see if the cache manager is still alive 469 * shut down the callback update process
164 */ 470 */
165int SRXAFSCM_Probe(struct afs_server *server) 471void __exit afs_callback_update_kill(void)
166{ 472{
167 _debug("SRXAFSCM_Probe(%p)\n", server); 473 destroy_workqueue(afs_callback_update_worker);
168 return 0; 474}
169} /* end SRXAFSCM_Probe() */
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index 1fc578372759..9b1311a1df51 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -1,4 +1,4 @@
1/* cell.c: AFS cell and server record management 1/* AFS cell and server record management
2 * 2 *
3 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved. 3 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com) 4 * Written by David Howells (dhowells@redhat.com)
@@ -11,15 +11,9 @@
11 11
12#include <linux/module.h> 12#include <linux/module.h>
13#include <linux/slab.h> 13#include <linux/slab.h>
14#include <rxrpc/peer.h> 14#include <linux/key.h>
15#include <rxrpc/connection.h> 15#include <linux/ctype.h>
16#include "volume.h" 16#include <keys/rxrpc-type.h>
17#include "cell.h"
18#include "server.h"
19#include "transport.h"
20#include "vlclient.h"
21#include "kafstimod.h"
22#include "super.h"
23#include "internal.h" 17#include "internal.h"
24 18
25DECLARE_RWSEM(afs_proc_cells_sem); 19DECLARE_RWSEM(afs_proc_cells_sem);
@@ -28,66 +22,47 @@ LIST_HEAD(afs_proc_cells);
28static struct list_head afs_cells = LIST_HEAD_INIT(afs_cells); 22static struct list_head afs_cells = LIST_HEAD_INIT(afs_cells);
29static DEFINE_RWLOCK(afs_cells_lock); 23static DEFINE_RWLOCK(afs_cells_lock);
30static DECLARE_RWSEM(afs_cells_sem); /* add/remove serialisation */ 24static DECLARE_RWSEM(afs_cells_sem); /* add/remove serialisation */
25static DECLARE_WAIT_QUEUE_HEAD(afs_cells_freeable_wq);
31static struct afs_cell *afs_cell_root; 26static struct afs_cell *afs_cell_root;
32 27
33#ifdef AFS_CACHING_SUPPORT
34static cachefs_match_val_t afs_cell_cache_match(void *target,
35 const void *entry);
36static void afs_cell_cache_update(void *source, void *entry);
37
38struct cachefs_index_def afs_cache_cell_index_def = {
39 .name = "cell_ix",
40 .data_size = sizeof(struct afs_cache_cell),
41 .keys[0] = { CACHEFS_INDEX_KEYS_ASCIIZ, 64 },
42 .match = afs_cell_cache_match,
43 .update = afs_cell_cache_update,
44};
45#endif
46
47/*****************************************************************************/
48/* 28/*
49 * create a cell record 29 * allocate a cell record and fill in its name, VL server address list and
50 * - "name" is the name of the cell 30 * allocate an anonymous key
51 * - "vllist" is a colon separated list of IP addresses in "a.b.c.d" format
52 */ 31 */
53int afs_cell_create(const char *name, char *vllist, struct afs_cell **_cell) 32static struct afs_cell *afs_cell_alloc(const char *name, char *vllist)
54{ 33{
55 struct afs_cell *cell; 34 struct afs_cell *cell;
56 char *next; 35 size_t namelen;
36 char keyname[4 + AFS_MAXCELLNAME + 1], *cp, *dp, *next;
57 int ret; 37 int ret;
58 38
59 _enter("%s", name); 39 _enter("%s,%s", name, vllist);
60 40
61 BUG_ON(!name); /* TODO: want to look up "this cell" in the cache */ 41 BUG_ON(!name); /* TODO: want to look up "this cell" in the cache */
62 42
43 namelen = strlen(name);
44 if (namelen > AFS_MAXCELLNAME)
45 return ERR_PTR(-ENAMETOOLONG);
46
63 /* allocate and initialise a cell record */ 47 /* allocate and initialise a cell record */
64 cell = kmalloc(sizeof(struct afs_cell) + strlen(name) + 1, GFP_KERNEL); 48 cell = kzalloc(sizeof(struct afs_cell) + namelen + 1, GFP_KERNEL);
65 if (!cell) { 49 if (!cell) {
66 _leave(" = -ENOMEM"); 50 _leave(" = -ENOMEM");
67 return -ENOMEM; 51 return ERR_PTR(-ENOMEM);
68 } 52 }
69 53
70 down_write(&afs_cells_sem); 54 memcpy(cell->name, name, namelen);
71 55 cell->name[namelen] = 0;
72 memset(cell, 0, sizeof(struct afs_cell));
73 atomic_set(&cell->usage, 0);
74 56
57 atomic_set(&cell->usage, 1);
75 INIT_LIST_HEAD(&cell->link); 58 INIT_LIST_HEAD(&cell->link);
76 59 rwlock_init(&cell->servers_lock);
77 rwlock_init(&cell->sv_lock); 60 INIT_LIST_HEAD(&cell->servers);
78 INIT_LIST_HEAD(&cell->sv_list);
79 INIT_LIST_HEAD(&cell->sv_graveyard);
80 spin_lock_init(&cell->sv_gylock);
81
82 init_rwsem(&cell->vl_sem); 61 init_rwsem(&cell->vl_sem);
83 INIT_LIST_HEAD(&cell->vl_list); 62 INIT_LIST_HEAD(&cell->vl_list);
84 INIT_LIST_HEAD(&cell->vl_graveyard); 63 spin_lock_init(&cell->vl_lock);
85 spin_lock_init(&cell->vl_gylock);
86
87 strcpy(cell->name,name);
88 64
89 /* fill in the VL server list from the rest of the string */ 65 /* fill in the VL server list from the rest of the string */
90 ret = -EINVAL;
91 do { 66 do {
92 unsigned a, b, c, d; 67 unsigned a, b, c, d;
93 68
@@ -96,20 +71,75 @@ int afs_cell_create(const char *name, char *vllist, struct afs_cell **_cell)
96 *next++ = 0; 71 *next++ = 0;
97 72
98 if (sscanf(vllist, "%u.%u.%u.%u", &a, &b, &c, &d) != 4) 73 if (sscanf(vllist, "%u.%u.%u.%u", &a, &b, &c, &d) != 4)
99 goto badaddr; 74 goto bad_address;
100 75
101 if (a > 255 || b > 255 || c > 255 || d > 255) 76 if (a > 255 || b > 255 || c > 255 || d > 255)
102 goto badaddr; 77 goto bad_address;
103 78
104 cell->vl_addrs[cell->vl_naddrs++].s_addr = 79 cell->vl_addrs[cell->vl_naddrs++].s_addr =
105 htonl((a << 24) | (b << 16) | (c << 8) | d); 80 htonl((a << 24) | (b << 16) | (c << 8) | d);
106 81
107 if (cell->vl_naddrs >= AFS_CELL_MAX_ADDRS) 82 } while (cell->vl_naddrs < AFS_CELL_MAX_ADDRS && (vllist = next));
108 break; 83
84 /* create a key to represent an anonymous user */
85 memcpy(keyname, "afs@", 4);
86 dp = keyname + 4;
87 cp = cell->name;
88 do {
89 *dp++ = toupper(*cp);
90 } while (*cp++);
91 cell->anonymous_key = key_alloc(&key_type_rxrpc, keyname, 0, 0, current,
92 KEY_POS_SEARCH, KEY_ALLOC_NOT_IN_QUOTA);
93 if (IS_ERR(cell->anonymous_key)) {
94 _debug("no key");
95 ret = PTR_ERR(cell->anonymous_key);
96 goto error;
97 }
98
99 ret = key_instantiate_and_link(cell->anonymous_key, NULL, 0,
100 NULL, NULL);
101 if (ret < 0) {
102 _debug("instantiate failed");
103 goto error;
104 }
105
106 _debug("anon key %p{%x}",
107 cell->anonymous_key, key_serial(cell->anonymous_key));
108
109 _leave(" = %p", cell);
110 return cell;
111
112bad_address:
113 printk(KERN_ERR "kAFS: bad VL server IP address\n");
114 ret = -EINVAL;
115error:
116 key_put(cell->anonymous_key);
117 kfree(cell);
118 _leave(" = %d", ret);
119 return ERR_PTR(ret);
120}
121
122/*
123 * create a cell record
124 * - "name" is the name of the cell
125 * - "vllist" is a colon separated list of IP addresses in "a.b.c.d" format
126 */
127struct afs_cell *afs_cell_create(const char *name, char *vllist)
128{
129 struct afs_cell *cell;
130 int ret;
131
132 _enter("%s,%s", name, vllist);
109 133
110 } while(vllist = next, vllist); 134 cell = afs_cell_alloc(name, vllist);
135 if (IS_ERR(cell)) {
136 _leave(" = %ld", PTR_ERR(cell));
137 return cell;
138 }
139
140 down_write(&afs_cells_sem);
111 141
112 /* add a proc dir for this cell */ 142 /* add a proc directory for this cell */
113 ret = afs_proc_cell_setup(cell); 143 ret = afs_proc_cell_setup(cell);
114 if (ret < 0) 144 if (ret < 0)
115 goto error; 145 goto error;
@@ -130,31 +160,28 @@ int afs_cell_create(const char *name, char *vllist, struct afs_cell **_cell)
130 down_write(&afs_proc_cells_sem); 160 down_write(&afs_proc_cells_sem);
131 list_add_tail(&cell->proc_link, &afs_proc_cells); 161 list_add_tail(&cell->proc_link, &afs_proc_cells);
132 up_write(&afs_proc_cells_sem); 162 up_write(&afs_proc_cells_sem);
133
134 *_cell = cell;
135 up_write(&afs_cells_sem); 163 up_write(&afs_cells_sem);
136 164
137 _leave(" = 0 (%p)", cell); 165 _leave(" = %p", cell);
138 return 0; 166 return cell;
139 167
140 badaddr: 168error:
141 printk(KERN_ERR "kAFS: bad VL server IP address: '%s'\n", vllist);
142 error:
143 up_write(&afs_cells_sem); 169 up_write(&afs_cells_sem);
170 key_put(cell->anonymous_key);
144 kfree(cell); 171 kfree(cell);
145 _leave(" = %d", ret); 172 _leave(" = %d", ret);
146 return ret; 173 return ERR_PTR(ret);
147} /* end afs_cell_create() */ 174}
148 175
149/*****************************************************************************/
150/* 176/*
151 * initialise the cell database from module parameters 177 * set the root cell information
178 * - can be called with a module parameter string
179 * - can be called from a write to /proc/fs/afs/rootcell
152 */ 180 */
153int afs_cell_init(char *rootcell) 181int afs_cell_init(char *rootcell)
154{ 182{
155 struct afs_cell *old_root, *new_root; 183 struct afs_cell *old_root, *new_root;
156 char *cp; 184 char *cp;
157 int ret;
158 185
159 _enter(""); 186 _enter("");
160 187
@@ -162,82 +189,60 @@ int afs_cell_init(char *rootcell)
162 /* module is loaded with no parameters, or built statically. 189 /* module is loaded with no parameters, or built statically.
163 * - in the future we might initialize cell DB here. 190 * - in the future we might initialize cell DB here.
164 */ 191 */
165 _leave(" = 0 (but no root)"); 192 _leave(" = 0 [no root]");
166 return 0; 193 return 0;
167 } 194 }
168 195
169 cp = strchr(rootcell, ':'); 196 cp = strchr(rootcell, ':');
170 if (!cp) { 197 if (!cp) {
171 printk(KERN_ERR "kAFS: no VL server IP addresses specified\n"); 198 printk(KERN_ERR "kAFS: no VL server IP addresses specified\n");
172 _leave(" = %d (no colon)", -EINVAL); 199 _leave(" = -EINVAL");
173 return -EINVAL; 200 return -EINVAL;
174 } 201 }
175 202
176 /* allocate a cell record for the root cell */ 203 /* allocate a cell record for the root cell */
177 *cp++ = 0; 204 *cp++ = 0;
178 ret = afs_cell_create(rootcell, cp, &new_root); 205 new_root = afs_cell_create(rootcell, cp);
179 if (ret < 0) { 206 if (IS_ERR(new_root)) {
180 _leave(" = %d", ret); 207 _leave(" = %ld", PTR_ERR(new_root));
181 return ret; 208 return PTR_ERR(new_root);
182 } 209 }
183 210
184 /* as afs_put_cell() takes locks by itself, we have to do 211 /* install the new cell */
185 * a little gymnastics to be race-free.
186 */
187 afs_get_cell(new_root);
188
189 write_lock(&afs_cells_lock); 212 write_lock(&afs_cells_lock);
190 while (afs_cell_root) { 213 old_root = afs_cell_root;
191 old_root = afs_cell_root;
192 afs_cell_root = NULL;
193 write_unlock(&afs_cells_lock);
194 afs_put_cell(old_root);
195 write_lock(&afs_cells_lock);
196 }
197 afs_cell_root = new_root; 214 afs_cell_root = new_root;
198 write_unlock(&afs_cells_lock); 215 write_unlock(&afs_cells_lock);
216 afs_put_cell(old_root);
199 217
200 _leave(" = %d", ret); 218 _leave(" = 0");
201 return ret; 219 return 0;
202 220}
203} /* end afs_cell_init() */
204 221
205/*****************************************************************************/
206/* 222/*
207 * lookup a cell record 223 * lookup a cell record
208 */ 224 */
209int afs_cell_lookup(const char *name, unsigned namesz, struct afs_cell **_cell) 225struct afs_cell *afs_cell_lookup(const char *name, unsigned namesz)
210{ 226{
211 struct afs_cell *cell; 227 struct afs_cell *cell;
212 int ret;
213 228
214 _enter("\"%*.*s\",", namesz, namesz, name ? name : ""); 229 _enter("\"%*.*s\",", namesz, namesz, name ? name : "");
215 230
216 *_cell = NULL; 231 down_read(&afs_cells_sem);
232 read_lock(&afs_cells_lock);
217 233
218 if (name) { 234 if (name) {
219 /* if the cell was named, look for it in the cell record list */ 235 /* if the cell was named, look for it in the cell record list */
220 ret = -ENOENT;
221 cell = NULL;
222 read_lock(&afs_cells_lock);
223
224 list_for_each_entry(cell, &afs_cells, link) { 236 list_for_each_entry(cell, &afs_cells, link) {
225 if (strncmp(cell->name, name, namesz) == 0) { 237 if (strncmp(cell->name, name, namesz) == 0) {
226 afs_get_cell(cell); 238 afs_get_cell(cell);
227 goto found; 239 goto found;
228 } 240 }
229 } 241 }
230 cell = NULL; 242 cell = ERR_PTR(-ENOENT);
231 found: 243 found:
232 244 ;
233 read_unlock(&afs_cells_lock); 245 } else {
234
235 if (cell)
236 ret = 0;
237 }
238 else {
239 read_lock(&afs_cells_lock);
240
241 cell = afs_cell_root; 246 cell = afs_cell_root;
242 if (!cell) { 247 if (!cell) {
243 /* this should not happen unless user tries to mount 248 /* this should not happen unless user tries to mount
@@ -246,44 +251,35 @@ int afs_cell_lookup(const char *name, unsigned namesz, struct afs_cell **_cell)
246 * ENOENT might be "more appropriate" but they happen 251 * ENOENT might be "more appropriate" but they happen
247 * for other reasons. 252 * for other reasons.
248 */ 253 */
249 ret = -EDESTADDRREQ; 254 cell = ERR_PTR(-EDESTADDRREQ);
250 } 255 } else {
251 else {
252 afs_get_cell(cell); 256 afs_get_cell(cell);
253 ret = 0;
254 } 257 }
255 258
256 read_unlock(&afs_cells_lock);
257 } 259 }
258 260
259 *_cell = cell; 261 read_unlock(&afs_cells_lock);
260 _leave(" = %d (%p)", ret, cell); 262 up_read(&afs_cells_sem);
261 return ret; 263 _leave(" = %p", cell);
262 264 return cell;
263} /* end afs_cell_lookup() */ 265}
264 266
265/*****************************************************************************/
266/* 267/*
267 * try and get a cell record 268 * try and get a cell record
268 */ 269 */
269struct afs_cell *afs_get_cell_maybe(struct afs_cell **_cell) 270struct afs_cell *afs_get_cell_maybe(struct afs_cell *cell)
270{ 271{
271 struct afs_cell *cell;
272
273 write_lock(&afs_cells_lock); 272 write_lock(&afs_cells_lock);
274 273
275 cell = *_cell;
276 if (cell && !list_empty(&cell->link)) 274 if (cell && !list_empty(&cell->link))
277 afs_get_cell(cell); 275 afs_get_cell(cell);
278 else 276 else
279 cell = NULL; 277 cell = NULL;
280 278
281 write_unlock(&afs_cells_lock); 279 write_unlock(&afs_cells_lock);
282
283 return cell; 280 return cell;
284} /* end afs_get_cell_maybe() */ 281}
285 282
286/*****************************************************************************/
287/* 283/*
288 * destroy a cell record 284 * destroy a cell record
289 */ 285 */
@@ -294,8 +290,7 @@ void afs_put_cell(struct afs_cell *cell)
294 290
295 _enter("%p{%d,%s}", cell, atomic_read(&cell->usage), cell->name); 291 _enter("%p{%d,%s}", cell, atomic_read(&cell->usage), cell->name);
296 292
297 /* sanity check */ 293 ASSERTCMP(atomic_read(&cell->usage), >, 0);
298 BUG_ON(atomic_read(&cell->usage) <= 0);
299 294
300 /* to prevent a race, the decrement and the dequeue must be effectively 295 /* to prevent a race, the decrement and the dequeue must be effectively
301 * atomic */ 296 * atomic */
@@ -307,36 +302,49 @@ void afs_put_cell(struct afs_cell *cell)
307 return; 302 return;
308 } 303 }
309 304
305 ASSERT(list_empty(&cell->servers));
306 ASSERT(list_empty(&cell->vl_list));
307
310 write_unlock(&afs_cells_lock); 308 write_unlock(&afs_cells_lock);
311 309
312 BUG_ON(!list_empty(&cell->sv_list)); 310 wake_up(&afs_cells_freeable_wq);
313 BUG_ON(!list_empty(&cell->sv_graveyard));
314 BUG_ON(!list_empty(&cell->vl_list));
315 BUG_ON(!list_empty(&cell->vl_graveyard));
316 311
317 _leave(" [unused]"); 312 _leave(" [unused]");
318} /* end afs_put_cell() */ 313}
319 314
320/*****************************************************************************/
321/* 315/*
322 * destroy a cell record 316 * destroy a cell record
317 * - must be called with the afs_cells_sem write-locked
318 * - cell->link should have been broken by the caller
323 */ 319 */
324static void afs_cell_destroy(struct afs_cell *cell) 320static void afs_cell_destroy(struct afs_cell *cell)
325{ 321{
326 _enter("%p{%d,%s}", cell, atomic_read(&cell->usage), cell->name); 322 _enter("%p{%d,%s}", cell, atomic_read(&cell->usage), cell->name);
327 323
328 /* to prevent a race, the decrement and the dequeue must be effectively 324 ASSERTCMP(atomic_read(&cell->usage), >=, 0);
329 * atomic */ 325 ASSERT(list_empty(&cell->link));
330 write_lock(&afs_cells_lock);
331 326
332 /* sanity check */ 327 /* wait for everyone to stop using the cell */
333 BUG_ON(atomic_read(&cell->usage) != 0); 328 if (atomic_read(&cell->usage) > 0) {
329 DECLARE_WAITQUEUE(myself, current);
334 330
335 list_del_init(&cell->link); 331 _debug("wait for cell %s", cell->name);
332 set_current_state(TASK_UNINTERRUPTIBLE);
333 add_wait_queue(&afs_cells_freeable_wq, &myself);
336 334
337 write_unlock(&afs_cells_lock); 335 while (atomic_read(&cell->usage) > 0) {
336 schedule();
337 set_current_state(TASK_UNINTERRUPTIBLE);
338 }
338 339
339 down_write(&afs_cells_sem); 340 remove_wait_queue(&afs_cells_freeable_wq, &myself);
341 set_current_state(TASK_RUNNING);
342 }
343
344 _debug("cell dead");
345 ASSERTCMP(atomic_read(&cell->usage), ==, 0);
346 ASSERT(list_empty(&cell->servers));
347 ASSERT(list_empty(&cell->vl_list));
340 348
341 afs_proc_cell_remove(cell); 349 afs_proc_cell_remove(cell);
342 350
@@ -348,104 +356,26 @@ static void afs_cell_destroy(struct afs_cell *cell)
348 cachefs_relinquish_cookie(cell->cache, 0); 356 cachefs_relinquish_cookie(cell->cache, 0);
349#endif 357#endif
350 358
351 up_write(&afs_cells_sem); 359 key_put(cell->anonymous_key);
352
353 BUG_ON(!list_empty(&cell->sv_list));
354 BUG_ON(!list_empty(&cell->sv_graveyard));
355 BUG_ON(!list_empty(&cell->vl_list));
356 BUG_ON(!list_empty(&cell->vl_graveyard));
357
358 /* finish cleaning up the cell */
359 kfree(cell); 360 kfree(cell);
360 361
361 _leave(" [destroyed]"); 362 _leave(" [destroyed]");
362} /* end afs_cell_destroy() */ 363}
363
364/*****************************************************************************/
365/*
366 * lookup the server record corresponding to an Rx RPC peer
367 */
368int afs_server_find_by_peer(const struct rxrpc_peer *peer,
369 struct afs_server **_server)
370{
371 struct afs_server *server;
372 struct afs_cell *cell;
373
374 _enter("%p{a=%08x},", peer, ntohl(peer->addr.s_addr));
375
376 /* search the cell list */
377 read_lock(&afs_cells_lock);
378
379 list_for_each_entry(cell, &afs_cells, link) {
380
381 _debug("? cell %s",cell->name);
382
383 write_lock(&cell->sv_lock);
384
385 /* check the active list */
386 list_for_each_entry(server, &cell->sv_list, link) {
387 _debug("?? server %08x", ntohl(server->addr.s_addr));
388
389 if (memcmp(&server->addr, &peer->addr,
390 sizeof(struct in_addr)) == 0)
391 goto found_server;
392 }
393 364
394 /* check the inactive list */
395 spin_lock(&cell->sv_gylock);
396 list_for_each_entry(server, &cell->sv_graveyard, link) {
397 _debug("?? dead server %08x",
398 ntohl(server->addr.s_addr));
399
400 if (memcmp(&server->addr, &peer->addr,
401 sizeof(struct in_addr)) == 0)
402 goto found_dead_server;
403 }
404 spin_unlock(&cell->sv_gylock);
405
406 write_unlock(&cell->sv_lock);
407 }
408 read_unlock(&afs_cells_lock);
409
410 _leave(" = -ENOENT");
411 return -ENOENT;
412
413 /* we found it in the graveyard - resurrect it */
414 found_dead_server:
415 list_move_tail(&server->link, &cell->sv_list);
416 afs_get_server(server);
417 afs_kafstimod_del_timer(&server->timeout);
418 spin_unlock(&cell->sv_gylock);
419 goto success;
420
421 /* we found it - increment its ref count and return it */
422 found_server:
423 afs_get_server(server);
424
425 success:
426 write_unlock(&cell->sv_lock);
427 read_unlock(&afs_cells_lock);
428
429 *_server = server;
430 _leave(" = 0 (s=%p c=%p)", server, cell);
431 return 0;
432
433} /* end afs_server_find_by_peer() */
434
435/*****************************************************************************/
436/* 365/*
437 * purge in-memory cell database on module unload or afs_init() failure 366 * purge in-memory cell database on module unload or afs_init() failure
438 * - the timeout daemon is stopped before calling this 367 * - the timeout daemon is stopped before calling this
439 */ 368 */
440void afs_cell_purge(void) 369void afs_cell_purge(void)
441{ 370{
442 struct afs_vlocation *vlocation;
443 struct afs_cell *cell; 371 struct afs_cell *cell;
444 372
445 _enter(""); 373 _enter("");
446 374
447 afs_put_cell(afs_cell_root); 375 afs_put_cell(afs_cell_root);
448 376
377 down_write(&afs_cells_sem);
378
449 while (!list_empty(&afs_cells)) { 379 while (!list_empty(&afs_cells)) {
450 cell = NULL; 380 cell = NULL;
451 381
@@ -464,104 +394,11 @@ void afs_cell_purge(void)
464 _debug("PURGING CELL %s (%d)", 394 _debug("PURGING CELL %s (%d)",
465 cell->name, atomic_read(&cell->usage)); 395 cell->name, atomic_read(&cell->usage));
466 396
467 BUG_ON(!list_empty(&cell->sv_list));
468 BUG_ON(!list_empty(&cell->vl_list));
469
470 /* purge the cell's VL graveyard list */
471 _debug(" - clearing VL graveyard");
472
473 spin_lock(&cell->vl_gylock);
474
475 while (!list_empty(&cell->vl_graveyard)) {
476 vlocation = list_entry(cell->vl_graveyard.next,
477 struct afs_vlocation,
478 link);
479 list_del_init(&vlocation->link);
480
481 afs_kafstimod_del_timer(&vlocation->timeout);
482
483 spin_unlock(&cell->vl_gylock);
484
485 afs_vlocation_do_timeout(vlocation);
486 /* TODO: race if move to use krxtimod instead
487 * of kafstimod */
488
489 spin_lock(&cell->vl_gylock);
490 }
491
492 spin_unlock(&cell->vl_gylock);
493
494 /* purge the cell's server graveyard list */
495 _debug(" - clearing server graveyard");
496
497 spin_lock(&cell->sv_gylock);
498
499 while (!list_empty(&cell->sv_graveyard)) {
500 struct afs_server *server;
501
502 server = list_entry(cell->sv_graveyard.next,
503 struct afs_server, link);
504 list_del_init(&server->link);
505
506 afs_kafstimod_del_timer(&server->timeout);
507
508 spin_unlock(&cell->sv_gylock);
509
510 afs_server_do_timeout(server);
511
512 spin_lock(&cell->sv_gylock);
513 }
514
515 spin_unlock(&cell->sv_gylock);
516
517 /* now the cell should be left with no references */ 397 /* now the cell should be left with no references */
518 afs_cell_destroy(cell); 398 afs_cell_destroy(cell);
519 } 399 }
520 } 400 }
521 401
402 up_write(&afs_cells_sem);
522 _leave(""); 403 _leave("");
523} /* end afs_cell_purge() */ 404}
524
525/*****************************************************************************/
526/*
527 * match a cell record obtained from the cache
528 */
529#ifdef AFS_CACHING_SUPPORT
530static cachefs_match_val_t afs_cell_cache_match(void *target,
531 const void *entry)
532{
533 const struct afs_cache_cell *ccell = entry;
534 struct afs_cell *cell = target;
535
536 _enter("{%s},{%s}", ccell->name, cell->name);
537
538 if (strncmp(ccell->name, cell->name, sizeof(ccell->name)) == 0) {
539 _leave(" = SUCCESS");
540 return CACHEFS_MATCH_SUCCESS;
541 }
542
543 _leave(" = FAILED");
544 return CACHEFS_MATCH_FAILED;
545} /* end afs_cell_cache_match() */
546#endif
547
548/*****************************************************************************/
549/*
550 * update a cell record in the cache
551 */
552#ifdef AFS_CACHING_SUPPORT
553static void afs_cell_cache_update(void *source, void *entry)
554{
555 struct afs_cache_cell *ccell = entry;
556 struct afs_cell *cell = source;
557
558 _enter("%p,%p", source, entry);
559
560 strncpy(ccell->name, cell->name, sizeof(ccell->name));
561
562 memcpy(ccell->vl_servers,
563 cell->vl_addrs,
564 min(sizeof(ccell->vl_servers), sizeof(cell->vl_addrs)));
565
566} /* end afs_cell_cache_update() */
567#endif
diff --git a/fs/afs/cell.h b/fs/afs/cell.h
deleted file mode 100644
index 48349108fb00..000000000000
--- a/fs/afs/cell.h
+++ /dev/null
@@ -1,78 +0,0 @@
1/* cell.h: AFS cell record
2 *
3 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#ifndef _LINUX_AFS_CELL_H
13#define _LINUX_AFS_CELL_H
14
15#include "types.h"
16#include "cache.h"
17
18#define AFS_CELL_MAX_ADDRS 15
19
20extern volatile int afs_cells_being_purged; /* T when cells are being purged by rmmod */
21
22/*****************************************************************************/
23/*
24 * entry in the cached cell catalogue
25 */
26struct afs_cache_cell
27{
28 char name[64]; /* cell name (padded with NULs) */
29 struct in_addr vl_servers[15]; /* cached cell VL servers */
30};
31
32/*****************************************************************************/
33/*
34 * AFS cell record
35 */
36struct afs_cell
37{
38 atomic_t usage;
39 struct list_head link; /* main cell list link */
40 struct list_head proc_link; /* /proc cell list link */
41 struct proc_dir_entry *proc_dir; /* /proc dir for this cell */
42#ifdef AFS_CACHING_SUPPORT
43 struct cachefs_cookie *cache; /* caching cookie */
44#endif
45
46 /* server record management */
47 rwlock_t sv_lock; /* active server list lock */
48 struct list_head sv_list; /* active server list */
49 struct list_head sv_graveyard; /* inactive server list */
50 spinlock_t sv_gylock; /* inactive server list lock */
51
52 /* volume location record management */
53 struct rw_semaphore vl_sem; /* volume management serialisation semaphore */
54 struct list_head vl_list; /* cell's active VL record list */
55 struct list_head vl_graveyard; /* cell's inactive VL record list */
56 spinlock_t vl_gylock; /* graveyard lock */
57 unsigned short vl_naddrs; /* number of VL servers in addr list */
58 unsigned short vl_curr_svix; /* current server index */
59 struct in_addr vl_addrs[AFS_CELL_MAX_ADDRS]; /* cell VL server addresses */
60
61 char name[0]; /* cell name - must go last */
62};
63
64extern int afs_cell_init(char *rootcell);
65
66extern int afs_cell_create(const char *name, char *vllist, struct afs_cell **_cell);
67
68extern int afs_cell_lookup(const char *name, unsigned nmsize, struct afs_cell **_cell);
69
70#define afs_get_cell(C) do { atomic_inc(&(C)->usage); } while(0)
71
72extern struct afs_cell *afs_get_cell_maybe(struct afs_cell **_cell);
73
74extern void afs_put_cell(struct afs_cell *cell);
75
76extern void afs_cell_purge(void);
77
78#endif /* _LINUX_AFS_CELL_H */
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 3d097fddcb7a..6685f4cbccb3 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -1,4 +1,4 @@
1/* cmservice.c: AFS Cache Manager Service 1/* AFS Cache Manager Service
2 * 2 *
3 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved. 3 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com) 4 * Written by David Howells (dhowells@redhat.com)
@@ -12,641 +12,463 @@
12#include <linux/module.h> 12#include <linux/module.h>
13#include <linux/init.h> 13#include <linux/init.h>
14#include <linux/sched.h> 14#include <linux/sched.h>
15#include <linux/completion.h> 15#include <linux/ip.h>
16#include "server.h"
17#include "cell.h"
18#include "transport.h"
19#include <rxrpc/rxrpc.h>
20#include <rxrpc/transport.h>
21#include <rxrpc/connection.h>
22#include <rxrpc/call.h>
23#include "cmservice.h"
24#include "internal.h" 16#include "internal.h"
17#include "afs_cm.h"
25 18
26static unsigned afscm_usage; /* AFS cache manager usage count */ 19struct workqueue_struct *afs_cm_workqueue;
27static struct rw_semaphore afscm_sem; /* AFS cache manager start/stop semaphore */
28
29static int afscm_new_call(struct rxrpc_call *call);
30static void afscm_attention(struct rxrpc_call *call);
31static void afscm_error(struct rxrpc_call *call);
32static void afscm_aemap(struct rxrpc_call *call);
33
34static void _SRXAFSCM_CallBack(struct rxrpc_call *call);
35static void _SRXAFSCM_InitCallBackState(struct rxrpc_call *call);
36static void _SRXAFSCM_Probe(struct rxrpc_call *call);
37
38typedef void (*_SRXAFSCM_xxxx_t)(struct rxrpc_call *call);
39
40static const struct rxrpc_operation AFSCM_ops[] = {
41 {
42 .id = 204,
43 .asize = RXRPC_APP_MARK_EOF,
44 .name = "CallBack",
45 .user = _SRXAFSCM_CallBack,
46 },
47 {
48 .id = 205,
49 .asize = RXRPC_APP_MARK_EOF,
50 .name = "InitCallBackState",
51 .user = _SRXAFSCM_InitCallBackState,
52 },
53 {
54 .id = 206,
55 .asize = RXRPC_APP_MARK_EOF,
56 .name = "Probe",
57 .user = _SRXAFSCM_Probe,
58 },
59#if 0
60 {
61 .id = 207,
62 .asize = RXRPC_APP_MARK_EOF,
63 .name = "GetLock",
64 .user = _SRXAFSCM_GetLock,
65 },
66 {
67 .id = 208,
68 .asize = RXRPC_APP_MARK_EOF,
69 .name = "GetCE",
70 .user = _SRXAFSCM_GetCE,
71 },
72 {
73 .id = 209,
74 .asize = RXRPC_APP_MARK_EOF,
75 .name = "GetXStatsVersion",
76 .user = _SRXAFSCM_GetXStatsVersion,
77 },
78 {
79 .id = 210,
80 .asize = RXRPC_APP_MARK_EOF,
81 .name = "GetXStats",
82 .user = _SRXAFSCM_GetXStats,
83 }
84#endif
85};
86 20
87static struct rxrpc_service AFSCM_service = { 21static int afs_deliver_cb_init_call_back_state(struct afs_call *,
88 .name = "AFS/CM", 22 struct sk_buff *, bool);
89 .owner = THIS_MODULE, 23static int afs_deliver_cb_init_call_back_state3(struct afs_call *,
90 .link = LIST_HEAD_INIT(AFSCM_service.link), 24 struct sk_buff *, bool);
91 .new_call = afscm_new_call, 25static int afs_deliver_cb_probe(struct afs_call *, struct sk_buff *, bool);
92 .service_id = 1, 26static int afs_deliver_cb_callback(struct afs_call *, struct sk_buff *, bool);
93 .attn_func = afscm_attention, 27static int afs_deliver_cb_get_capabilities(struct afs_call *, struct sk_buff *,
94 .error_func = afscm_error, 28 bool);
95 .aemap_func = afscm_aemap, 29static void afs_cm_destructor(struct afs_call *);
96 .ops_begin = &AFSCM_ops[0],
97 .ops_end = &AFSCM_ops[ARRAY_SIZE(AFSCM_ops)],
98};
99 30
100static DECLARE_COMPLETION(kafscmd_alive);
101static DECLARE_COMPLETION(kafscmd_dead);
102static DECLARE_WAIT_QUEUE_HEAD(kafscmd_sleepq);
103static LIST_HEAD(kafscmd_attention_list);
104static LIST_HEAD(afscm_calls);
105static DEFINE_SPINLOCK(afscm_calls_lock);
106static DEFINE_SPINLOCK(kafscmd_attention_lock);
107static int kafscmd_die;
108
109/*****************************************************************************/
110/* 31/*
111 * AFS Cache Manager kernel thread 32 * CB.CallBack operation type
112 */ 33 */
113static int kafscmd(void *arg) 34static const struct afs_call_type afs_SRXCBCallBack = {
114{ 35 .name = "CB.CallBack",
115 DECLARE_WAITQUEUE(myself, current); 36 .deliver = afs_deliver_cb_callback,
116 37 .abort_to_error = afs_abort_to_error,
117 struct rxrpc_call *call; 38 .destructor = afs_cm_destructor,
118 _SRXAFSCM_xxxx_t func; 39};
119 int die;
120
121 printk(KERN_INFO "kAFS: Started kafscmd %d\n", current->pid);
122
123 daemonize("kafscmd");
124
125 complete(&kafscmd_alive);
126
127 /* loop around looking for things to attend to */
128 do {
129 if (list_empty(&kafscmd_attention_list)) {
130 set_current_state(TASK_INTERRUPTIBLE);
131 add_wait_queue(&kafscmd_sleepq, &myself);
132
133 for (;;) {
134 set_current_state(TASK_INTERRUPTIBLE);
135 if (!list_empty(&kafscmd_attention_list) ||
136 signal_pending(current) ||
137 kafscmd_die)
138 break;
139
140 schedule();
141 }
142
143 remove_wait_queue(&kafscmd_sleepq, &myself);
144 set_current_state(TASK_RUNNING);
145 }
146
147 die = kafscmd_die;
148
149 /* dequeue the next call requiring attention */
150 call = NULL;
151 spin_lock(&kafscmd_attention_lock);
152
153 if (!list_empty(&kafscmd_attention_list)) {
154 call = list_entry(kafscmd_attention_list.next,
155 struct rxrpc_call,
156 app_attn_link);
157 list_del_init(&call->app_attn_link);
158 die = 0;
159 }
160
161 spin_unlock(&kafscmd_attention_lock);
162
163 if (call) {
164 /* act upon it */
165 _debug("@@@ Begin Attend Call %p", call);
166
167 func = call->app_user;
168 if (func)
169 func(call);
170
171 rxrpc_put_call(call);
172
173 _debug("@@@ End Attend Call %p", call);
174 }
175
176 } while(!die);
177
178 /* and that's all */
179 complete_and_exit(&kafscmd_dead, 0);
180
181} /* end kafscmd() */
182 40
183/*****************************************************************************/
184/* 41/*
185 * handle a call coming in to the cache manager 42 * CB.InitCallBackState operation type
186 * - if I want to keep the call, I must increment its usage count
187 * - the return value will be negated and passed back in an abort packet if
188 * non-zero
189 * - serialised by virtue of there only being one krxiod
190 */ 43 */
191static int afscm_new_call(struct rxrpc_call *call) 44static const struct afs_call_type afs_SRXCBInitCallBackState = {
192{ 45 .name = "CB.InitCallBackState",
193 _enter("%p{cid=%u u=%d}", 46 .deliver = afs_deliver_cb_init_call_back_state,
194 call, ntohl(call->call_id), atomic_read(&call->usage)); 47 .abort_to_error = afs_abort_to_error,
195 48 .destructor = afs_cm_destructor,
196 rxrpc_get_call(call); 49};
197
198 /* add to my current call list */
199 spin_lock(&afscm_calls_lock);
200 list_add(&call->app_link,&afscm_calls);
201 spin_unlock(&afscm_calls_lock);
202
203 _leave(" = 0");
204 return 0;
205
206} /* end afscm_new_call() */
207 50
208/*****************************************************************************/
209/* 51/*
210 * queue on the kafscmd queue for attention 52 * CB.InitCallBackState3 operation type
211 */ 53 */
212static void afscm_attention(struct rxrpc_call *call) 54static const struct afs_call_type afs_SRXCBInitCallBackState3 = {
213{ 55 .name = "CB.InitCallBackState3",
214 _enter("%p{cid=%u u=%d}", 56 .deliver = afs_deliver_cb_init_call_back_state3,
215 call, ntohl(call->call_id), atomic_read(&call->usage)); 57 .abort_to_error = afs_abort_to_error,
216 58 .destructor = afs_cm_destructor,
217 spin_lock(&kafscmd_attention_lock); 59};
218
219 if (list_empty(&call->app_attn_link)) {
220 list_add_tail(&call->app_attn_link, &kafscmd_attention_list);
221 rxrpc_get_call(call);
222 }
223
224 spin_unlock(&kafscmd_attention_lock);
225
226 wake_up(&kafscmd_sleepq);
227
228 _leave(" {u=%d}", atomic_read(&call->usage));
229} /* end afscm_attention() */
230 60
231/*****************************************************************************/
232/* 61/*
233 * handle my call being aborted 62 * CB.Probe operation type
234 * - clean up, dequeue and put my ref to the call
235 */ 63 */
236static void afscm_error(struct rxrpc_call *call) 64static const struct afs_call_type afs_SRXCBProbe = {
237{ 65 .name = "CB.Probe",
238 int removed; 66 .deliver = afs_deliver_cb_probe,
239 67 .abort_to_error = afs_abort_to_error,
240 _enter("%p{est=%s ac=%u er=%d}", 68 .destructor = afs_cm_destructor,
241 call, 69};
242 rxrpc_call_error_states[call->app_err_state],
243 call->app_abort_code,
244 call->app_errno);
245
246 spin_lock(&kafscmd_attention_lock);
247
248 if (list_empty(&call->app_attn_link)) {
249 list_add_tail(&call->app_attn_link, &kafscmd_attention_list);
250 rxrpc_get_call(call);
251 }
252
253 spin_unlock(&kafscmd_attention_lock);
254
255 removed = 0;
256 spin_lock(&afscm_calls_lock);
257 if (!list_empty(&call->app_link)) {
258 list_del_init(&call->app_link);
259 removed = 1;
260 }
261 spin_unlock(&afscm_calls_lock);
262
263 if (removed)
264 rxrpc_put_call(call);
265
266 wake_up(&kafscmd_sleepq);
267 70
268 _leave(""); 71/*
269} /* end afscm_error() */ 72 * CB.GetCapabilities operation type
73 */
74static const struct afs_call_type afs_SRXCBGetCapabilites = {
75 .name = "CB.GetCapabilities",
76 .deliver = afs_deliver_cb_get_capabilities,
77 .abort_to_error = afs_abort_to_error,
78 .destructor = afs_cm_destructor,
79};
270 80
271/*****************************************************************************/
272/* 81/*
273 * map afs abort codes to/from Linux error codes 82 * route an incoming cache manager call
274 * - called with call->lock held 83 * - return T if supported, F if not
275 */ 84 */
276static void afscm_aemap(struct rxrpc_call *call) 85bool afs_cm_incoming_call(struct afs_call *call)
277{ 86{
278 switch (call->app_err_state) { 87 u32 operation_id = ntohl(call->operation_ID);
279 case RXRPC_ESTATE_LOCAL_ABORT: 88
280 call->app_abort_code = -call->app_errno; 89 _enter("{CB.OP %u}", operation_id);
281 break; 90
282 case RXRPC_ESTATE_PEER_ABORT: 91 switch (operation_id) {
283 call->app_errno = -ECONNABORTED; 92 case CBCallBack:
284 break; 93 call->type = &afs_SRXCBCallBack;
94 return true;
95 case CBInitCallBackState:
96 call->type = &afs_SRXCBInitCallBackState;
97 return true;
98 case CBInitCallBackState3:
99 call->type = &afs_SRXCBInitCallBackState3;
100 return true;
101 case CBProbe:
102 call->type = &afs_SRXCBProbe;
103 return true;
104 case CBGetCapabilities:
105 call->type = &afs_SRXCBGetCapabilites;
106 return true;
285 default: 107 default:
286 break; 108 return false;
287 } 109 }
288} /* end afscm_aemap() */ 110}
289 111
290/*****************************************************************************/
291/* 112/*
292 * start the cache manager service if not already started 113 * clean up a cache manager call
293 */ 114 */
294int afscm_start(void) 115static void afs_cm_destructor(struct afs_call *call)
295{ 116{
296 int ret; 117 _enter("");
297
298 down_write(&afscm_sem);
299 if (!afscm_usage) {
300 ret = kernel_thread(kafscmd, NULL, 0);
301 if (ret < 0)
302 goto out;
303
304 wait_for_completion(&kafscmd_alive);
305
306 ret = rxrpc_add_service(afs_transport, &AFSCM_service);
307 if (ret < 0)
308 goto kill;
309
310 afs_kafstimod_add_timer(&afs_mntpt_expiry_timer,
311 afs_mntpt_expiry_timeout * HZ);
312 }
313
314 afscm_usage++;
315 up_write(&afscm_sem);
316
317 return 0;
318
319 kill:
320 kafscmd_die = 1;
321 wake_up(&kafscmd_sleepq);
322 wait_for_completion(&kafscmd_dead);
323
324 out:
325 up_write(&afscm_sem);
326 return ret;
327 118
328} /* end afscm_start() */ 119 afs_put_server(call->server);
120 call->server = NULL;
121 kfree(call->buffer);
122 call->buffer = NULL;
123}
329 124
330/*****************************************************************************/
331/* 125/*
332 * stop the cache manager service 126 * allow the fileserver to see if the cache manager is still alive
333 */ 127 */
334void afscm_stop(void) 128static void SRXAFSCB_CallBack(struct work_struct *work)
335{ 129{
336 struct rxrpc_call *call; 130 struct afs_call *call = container_of(work, struct afs_call, work);
337 131
338 down_write(&afscm_sem); 132 _enter("");
339 133
340 BUG_ON(afscm_usage == 0); 134 /* be sure to send the reply *before* attempting to spam the AFS server
341 afscm_usage--; 135 * with FSFetchStatus requests on the vnodes with broken callbacks lest
136 * the AFS server get into a vicious cycle of trying to break further
137 * callbacks because it hadn't received completion of the CBCallBack op
138 * yet */
139 afs_send_empty_reply(call);
342 140
343 if (afscm_usage == 0) { 141 afs_break_callbacks(call->server, call->count, call->request);
344 /* don't want more incoming calls */ 142 _leave("");
345 rxrpc_del_service(afs_transport, &AFSCM_service); 143}
346
347 /* abort any calls I've still got open (the afscm_error() will
348 * dequeue them) */
349 spin_lock(&afscm_calls_lock);
350 while (!list_empty(&afscm_calls)) {
351 call = list_entry(afscm_calls.next,
352 struct rxrpc_call,
353 app_link);
354 144
355 list_del_init(&call->app_link); 145/*
356 rxrpc_get_call(call); 146 * deliver request data to a CB.CallBack call
357 spin_unlock(&afscm_calls_lock); 147 */
148static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb,
149 bool last)
150{
151 struct afs_callback *cb;
152 struct afs_server *server;
153 struct in_addr addr;
154 __be32 *bp;
155 u32 tmp;
156 int ret, loop;
157
158 _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
159
160 switch (call->unmarshall) {
161 case 0:
162 call->offset = 0;
163 call->unmarshall++;
164
165 /* extract the FID array and its count in two steps */
166 case 1:
167 _debug("extract FID count");
168 ret = afs_extract_data(call, skb, last, &call->tmp, 4);
169 switch (ret) {
170 case 0: break;
171 case -EAGAIN: return 0;
172 default: return ret;
173 }
358 174
359 rxrpc_call_abort(call, -ESRCH); /* abort, dequeue and 175 call->count = ntohl(call->tmp);
360 * put */ 176 _debug("FID count: %u", call->count);
177 if (call->count > AFSCBMAX)
178 return -EBADMSG;
179
180 call->buffer = kmalloc(call->count * 3 * 4, GFP_KERNEL);
181 if (!call->buffer)
182 return -ENOMEM;
183 call->offset = 0;
184 call->unmarshall++;
185
186 case 2:
187 _debug("extract FID array");
188 ret = afs_extract_data(call, skb, last, call->buffer,
189 call->count * 3 * 4);
190 switch (ret) {
191 case 0: break;
192 case -EAGAIN: return 0;
193 default: return ret;
194 }
361 195
362 _debug("nuking active call %08x.%d", 196 _debug("unmarshall FID array");
363 ntohl(call->conn->conn_id), 197 call->request = kcalloc(call->count,
364 ntohl(call->call_id)); 198 sizeof(struct afs_callback),
365 rxrpc_put_call(call); 199 GFP_KERNEL);
366 rxrpc_put_call(call); 200 if (!call->request)
201 return -ENOMEM;
202
203 cb = call->request;
204 bp = call->buffer;
205 for (loop = call->count; loop > 0; loop--, cb++) {
206 cb->fid.vid = ntohl(*bp++);
207 cb->fid.vnode = ntohl(*bp++);
208 cb->fid.unique = ntohl(*bp++);
209 cb->type = AFSCM_CB_UNTYPED;
210 }
367 211
368 spin_lock(&afscm_calls_lock); 212 call->offset = 0;
213 call->unmarshall++;
214
215 /* extract the callback array and its count in two steps */
216 case 3:
217 _debug("extract CB count");
218 ret = afs_extract_data(call, skb, last, &call->tmp, 4);
219 switch (ret) {
220 case 0: break;
221 case -EAGAIN: return 0;
222 default: return ret;
369 } 223 }
370 spin_unlock(&afscm_calls_lock);
371 224
372 /* get rid of my daemon */ 225 tmp = ntohl(call->tmp);
373 kafscmd_die = 1; 226 _debug("CB count: %u", tmp);
374 wake_up(&kafscmd_sleepq); 227 if (tmp != call->count && tmp != 0)
375 wait_for_completion(&kafscmd_dead); 228 return -EBADMSG;
229 call->offset = 0;
230 call->unmarshall++;
231 if (tmp == 0)
232 goto empty_cb_array;
233
234 case 4:
235 _debug("extract CB array");
236 ret = afs_extract_data(call, skb, last, call->request,
237 call->count * 3 * 4);
238 switch (ret) {
239 case 0: break;
240 case -EAGAIN: return 0;
241 default: return ret;
242 }
376 243
377 /* dispose of any calls waiting for attention */ 244 _debug("unmarshall CB array");
378 spin_lock(&kafscmd_attention_lock); 245 cb = call->request;
379 while (!list_empty(&kafscmd_attention_list)) { 246 bp = call->buffer;
380 call = list_entry(kafscmd_attention_list.next, 247 for (loop = call->count; loop > 0; loop--, cb++) {
381 struct rxrpc_call, 248 cb->version = ntohl(*bp++);
382 app_attn_link); 249 cb->expiry = ntohl(*bp++);
250 cb->type = ntohl(*bp++);
251 }
383 252
384 list_del_init(&call->app_attn_link); 253 empty_cb_array:
385 spin_unlock(&kafscmd_attention_lock); 254 call->offset = 0;
255 call->unmarshall++;
386 256
387 rxrpc_put_call(call); 257 case 5:
258 _debug("trailer");
259 if (skb->len != 0)
260 return -EBADMSG;
261 break;
262 }
388 263
389 spin_lock(&kafscmd_attention_lock); 264 if (!last)
390 } 265 return 0;
391 spin_unlock(&kafscmd_attention_lock);
392 266
393 afs_kafstimod_del_timer(&afs_mntpt_expiry_timer); 267 call->state = AFS_CALL_REPLYING;
394 }
395 268
396 up_write(&afscm_sem); 269 /* we'll need the file server record as that tells us which set of
270 * vnodes to operate upon */
271 memcpy(&addr, &ip_hdr(skb)->saddr, 4);
272 server = afs_find_server(&addr);
273 if (!server)
274 return -ENOTCONN;
275 call->server = server;
397 276
398} /* end afscm_stop() */ 277 INIT_WORK(&call->work, SRXAFSCB_CallBack);
278 schedule_work(&call->work);
279 return 0;
280}
399 281
400/*****************************************************************************/
401/* 282/*
402 * handle the fileserver breaking a set of callbacks 283 * allow the fileserver to request callback state (re-)initialisation
403 */ 284 */
404static void _SRXAFSCM_CallBack(struct rxrpc_call *call) 285static void SRXAFSCB_InitCallBackState(struct work_struct *work)
405{ 286{
406 struct afs_server *server; 287 struct afs_call *call = container_of(work, struct afs_call, work);
407 size_t count, qty, tmp;
408 int ret = 0, removed;
409
410 _enter("%p{acs=%s}", call, rxrpc_call_states[call->app_call_state]);
411
412 server = afs_server_get_from_peer(call->conn->peer);
413
414 switch (call->app_call_state) {
415 /* we've received the last packet
416 * - drain all the data from the call and send the reply
417 */
418 case RXRPC_CSTATE_SRVR_GOT_ARGS:
419 ret = -EBADMSG;
420 qty = call->app_ready_qty;
421 if (qty < 8 || qty > 50 * (6 * 4) + 8)
422 break;
423
424 {
425 struct afs_callback *cb, *pcb;
426 int loop;
427 __be32 *fp, *bp;
428
429 fp = rxrpc_call_alloc_scratch(call, qty);
430
431 /* drag the entire argument block out to the scratch
432 * space */
433 ret = rxrpc_call_read_data(call, fp, qty, 0);
434 if (ret < 0)
435 break;
436
437 /* and unmarshall the parameter block */
438 ret = -EBADMSG;
439 count = ntohl(*fp++);
440 if (count>AFSCBMAX ||
441 (count * (3 * 4) + 8 != qty &&
442 count * (6 * 4) + 8 != qty))
443 break;
444
445 bp = fp + count*3;
446 tmp = ntohl(*bp++);
447 if (tmp > 0 && tmp != count)
448 break;
449 if (tmp == 0)
450 bp = NULL;
451
452 pcb = cb = rxrpc_call_alloc_scratch_s(
453 call, struct afs_callback);
454
455 for (loop = count - 1; loop >= 0; loop--) {
456 pcb->fid.vid = ntohl(*fp++);
457 pcb->fid.vnode = ntohl(*fp++);
458 pcb->fid.unique = ntohl(*fp++);
459 if (bp) {
460 pcb->version = ntohl(*bp++);
461 pcb->expiry = ntohl(*bp++);
462 pcb->type = ntohl(*bp++);
463 }
464 else {
465 pcb->version = 0;
466 pcb->expiry = 0;
467 pcb->type = AFSCM_CB_UNTYPED;
468 }
469 pcb++;
470 }
471
472 /* invoke the actual service routine */
473 ret = SRXAFSCM_CallBack(server, count, cb);
474 if (ret < 0)
475 break;
476 }
477 288
478 /* send the reply */ 289 _enter("{%p}", call->server);
479 ret = rxrpc_call_write_data(call, 0, NULL, RXRPC_LAST_PACKET,
480 GFP_KERNEL, 0, &count);
481 if (ret < 0)
482 break;
483 break;
484
485 /* operation complete */
486 case RXRPC_CSTATE_COMPLETE:
487 call->app_user = NULL;
488 removed = 0;
489 spin_lock(&afscm_calls_lock);
490 if (!list_empty(&call->app_link)) {
491 list_del_init(&call->app_link);
492 removed = 1;
493 }
494 spin_unlock(&afscm_calls_lock);
495 290
496 if (removed) 291 afs_init_callback_state(call->server);
497 rxrpc_put_call(call); 292 afs_send_empty_reply(call);
498 break; 293 _leave("");
294}
499 295
500 /* operation terminated on error */ 296/*
501 case RXRPC_CSTATE_ERROR: 297 * deliver request data to a CB.InitCallBackState call
502 call->app_user = NULL; 298 */
503 break; 299static int afs_deliver_cb_init_call_back_state(struct afs_call *call,
300 struct sk_buff *skb,
301 bool last)
302{
303 struct afs_server *server;
304 struct in_addr addr;
504 305
505 default: 306 _enter(",{%u},%d", skb->len, last);
506 break;
507 }
508 307
509 if (ret < 0) 308 if (skb->len > 0)
510 rxrpc_call_abort(call, ret); 309 return -EBADMSG;
310 if (!last)
311 return 0;
511 312
512 afs_put_server(server); 313 /* no unmarshalling required */
314 call->state = AFS_CALL_REPLYING;
513 315
514 _leave(" = %d", ret); 316 /* we'll need the file server record as that tells us which set of
317 * vnodes to operate upon */
318 memcpy(&addr, &ip_hdr(skb)->saddr, 4);
319 server = afs_find_server(&addr);
320 if (!server)
321 return -ENOTCONN;
322 call->server = server;
515 323
516} /* end _SRXAFSCM_CallBack() */ 324 INIT_WORK(&call->work, SRXAFSCB_InitCallBackState);
325 schedule_work(&call->work);
326 return 0;
327}
517 328
518/*****************************************************************************/
519/* 329/*
520 * handle the fileserver asking us to initialise our callback state 330 * deliver request data to a CB.InitCallBackState3 call
521 */ 331 */
522static void _SRXAFSCM_InitCallBackState(struct rxrpc_call *call) 332static int afs_deliver_cb_init_call_back_state3(struct afs_call *call,
333 struct sk_buff *skb,
334 bool last)
523{ 335{
524 struct afs_server *server; 336 struct afs_server *server;
525 size_t count; 337 struct in_addr addr;
526 int ret = 0, removed;
527 338
528 _enter("%p{acs=%s}", call, rxrpc_call_states[call->app_call_state]); 339 _enter(",{%u},%d", skb->len, last);
529 340
530 server = afs_server_get_from_peer(call->conn->peer); 341 if (!last)
342 return 0;
531 343
532 switch (call->app_call_state) { 344 /* no unmarshalling required */
533 /* we've received the last packet - drain all the data from the 345 call->state = AFS_CALL_REPLYING;
534 * call */
535 case RXRPC_CSTATE_SRVR_GOT_ARGS:
536 /* shouldn't be any args */
537 ret = -EBADMSG;
538 break;
539
540 /* send the reply when asked for it */
541 case RXRPC_CSTATE_SRVR_SND_REPLY:
542 /* invoke the actual service routine */
543 ret = SRXAFSCM_InitCallBackState(server);
544 if (ret < 0)
545 break;
546
547 ret = rxrpc_call_write_data(call, 0, NULL, RXRPC_LAST_PACKET,
548 GFP_KERNEL, 0, &count);
549 if (ret < 0)
550 break;
551 break;
552 346
553 /* operation complete */ 347 /* we'll need the file server record as that tells us which set of
554 case RXRPC_CSTATE_COMPLETE: 348 * vnodes to operate upon */
555 call->app_user = NULL; 349 memcpy(&addr, &ip_hdr(skb)->saddr, 4);
556 removed = 0; 350 server = afs_find_server(&addr);
557 spin_lock(&afscm_calls_lock); 351 if (!server)
558 if (!list_empty(&call->app_link)) { 352 return -ENOTCONN;
559 list_del_init(&call->app_link); 353 call->server = server;
560 removed = 1;
561 }
562 spin_unlock(&afscm_calls_lock);
563 354
564 if (removed) 355 INIT_WORK(&call->work, SRXAFSCB_InitCallBackState);
565 rxrpc_put_call(call); 356 schedule_work(&call->work);
566 break; 357 return 0;
567 358}
568 /* operation terminated on error */
569 case RXRPC_CSTATE_ERROR:
570 call->app_user = NULL;
571 break;
572
573 default:
574 break;
575 }
576
577 if (ret < 0)
578 rxrpc_call_abort(call, ret);
579
580 afs_put_server(server);
581 359
582 _leave(" = %d", ret); 360/*
361 * allow the fileserver to see if the cache manager is still alive
362 */
363static void SRXAFSCB_Probe(struct work_struct *work)
364{
365 struct afs_call *call = container_of(work, struct afs_call, work);
583 366
584} /* end _SRXAFSCM_InitCallBackState() */ 367 _enter("");
368 afs_send_empty_reply(call);
369 _leave("");
370}
585 371
586/*****************************************************************************/
587/* 372/*
588 * handle a probe from a fileserver 373 * deliver request data to a CB.Probe call
589 */ 374 */
590static void _SRXAFSCM_Probe(struct rxrpc_call *call) 375static int afs_deliver_cb_probe(struct afs_call *call, struct sk_buff *skb,
376 bool last)
591{ 377{
592 struct afs_server *server; 378 _enter(",{%u},%d", skb->len, last);
593 size_t count;
594 int ret = 0, removed;
595
596 _enter("%p{acs=%s}", call, rxrpc_call_states[call->app_call_state]);
597 379
598 server = afs_server_get_from_peer(call->conn->peer); 380 if (skb->len > 0)
381 return -EBADMSG;
382 if (!last)
383 return 0;
599 384
600 switch (call->app_call_state) { 385 /* no unmarshalling required */
601 /* we've received the last packet - drain all the data from the 386 call->state = AFS_CALL_REPLYING;
602 * call */
603 case RXRPC_CSTATE_SRVR_GOT_ARGS:
604 /* shouldn't be any args */
605 ret = -EBADMSG;
606 break;
607 387
608 /* send the reply when asked for it */ 388 INIT_WORK(&call->work, SRXAFSCB_Probe);
609 case RXRPC_CSTATE_SRVR_SND_REPLY: 389 schedule_work(&call->work);
610 /* invoke the actual service routine */ 390 return 0;
611 ret = SRXAFSCM_Probe(server); 391}
612 if (ret < 0)
613 break;
614
615 ret = rxrpc_call_write_data(call, 0, NULL, RXRPC_LAST_PACKET,
616 GFP_KERNEL, 0, &count);
617 if (ret < 0)
618 break;
619 break;
620 392
621 /* operation complete */ 393/*
622 case RXRPC_CSTATE_COMPLETE: 394 * allow the fileserver to ask about the cache manager's capabilities
623 call->app_user = NULL; 395 */
624 removed = 0; 396static void SRXAFSCB_GetCapabilities(struct work_struct *work)
625 spin_lock(&afscm_calls_lock); 397{
626 if (!list_empty(&call->app_link)) { 398 struct afs_interface *ifs;
627 list_del_init(&call->app_link); 399 struct afs_call *call = container_of(work, struct afs_call, work);
628 removed = 1; 400 int loop, nifs;
401
402 struct {
403 struct /* InterfaceAddr */ {
404 __be32 nifs;
405 __be32 uuid[11];
406 __be32 ifaddr[32];
407 __be32 netmask[32];
408 __be32 mtu[32];
409 } ia;
410 struct /* Capabilities */ {
411 __be32 capcount;
412 __be32 caps[1];
413 } cap;
414 } reply;
415
416 _enter("");
417
418 nifs = 0;
419 ifs = kcalloc(32, sizeof(*ifs), GFP_KERNEL);
420 if (ifs) {
421 nifs = afs_get_ipv4_interfaces(ifs, 32, false);
422 if (nifs < 0) {
423 kfree(ifs);
424 ifs = NULL;
425 nifs = 0;
629 } 426 }
630 spin_unlock(&afscm_calls_lock); 427 }
631 428
632 if (removed) 429 memset(&reply, 0, sizeof(reply));
633 rxrpc_put_call(call); 430 reply.ia.nifs = htonl(nifs);
634 break; 431
432 reply.ia.uuid[0] = htonl(afs_uuid.time_low);
433 reply.ia.uuid[1] = htonl(afs_uuid.time_mid);
434 reply.ia.uuid[2] = htonl(afs_uuid.time_hi_and_version);
435 reply.ia.uuid[3] = htonl((s8) afs_uuid.clock_seq_hi_and_reserved);
436 reply.ia.uuid[4] = htonl((s8) afs_uuid.clock_seq_low);
437 for (loop = 0; loop < 6; loop++)
438 reply.ia.uuid[loop + 5] = htonl((s8) afs_uuid.node[loop]);
439
440 if (ifs) {
441 for (loop = 0; loop < nifs; loop++) {
442 reply.ia.ifaddr[loop] = ifs[loop].address.s_addr;
443 reply.ia.netmask[loop] = ifs[loop].netmask.s_addr;
444 reply.ia.mtu[loop] = htonl(ifs[loop].mtu);
445 }
446 }
635 447
636 /* operation terminated on error */ 448 reply.cap.capcount = htonl(1);
637 case RXRPC_CSTATE_ERROR: 449 reply.cap.caps[0] = htonl(AFS_CAP_ERROR_TRANSLATION);
638 call->app_user = NULL; 450 afs_send_simple_reply(call, &reply, sizeof(reply));
639 break;
640 451
641 default: 452 _leave("");
642 break; 453}
643 }
644 454
645 if (ret < 0) 455/*
646 rxrpc_call_abort(call, ret); 456 * deliver request data to a CB.GetCapabilities call
457 */
458static int afs_deliver_cb_get_capabilities(struct afs_call *call,
459 struct sk_buff *skb, bool last)
460{
461 _enter(",{%u},%d", skb->len, last);
647 462
648 afs_put_server(server); 463 if (skb->len > 0)
464 return -EBADMSG;
465 if (!last)
466 return 0;
649 467
650 _leave(" = %d", ret); 468 /* no unmarshalling required */
469 call->state = AFS_CALL_REPLYING;
651 470
652} /* end _SRXAFSCM_Probe() */ 471 INIT_WORK(&call->work, SRXAFSCB_GetCapabilities);
472 schedule_work(&call->work);
473 return 0;
474}
diff --git a/fs/afs/cmservice.h b/fs/afs/cmservice.h
deleted file mode 100644
index af8d4d689cb2..000000000000
--- a/fs/afs/cmservice.h
+++ /dev/null
@@ -1,29 +0,0 @@
1/* cmservice.h: AFS Cache Manager Service declarations
2 *
3 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#ifndef _LINUX_AFS_CMSERVICE_H
13#define _LINUX_AFS_CMSERVICE_H
14
15#include <rxrpc/transport.h>
16#include "types.h"
17
18/* cache manager start/stop */
19extern int afscm_start(void);
20extern void afscm_stop(void);
21
22/* cache manager server functions */
23extern int SRXAFSCM_InitCallBackState(struct afs_server *server);
24extern int SRXAFSCM_CallBack(struct afs_server *server,
25 size_t count,
26 struct afs_callback callbacks[]);
27extern int SRXAFSCM_Probe(struct afs_server *server);
28
29#endif /* _LINUX_AFS_CMSERVICE_H */
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index b6dc2ebe47a8..dac5b990c0cd 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -15,45 +15,53 @@
15#include <linux/slab.h> 15#include <linux/slab.h>
16#include <linux/fs.h> 16#include <linux/fs.h>
17#include <linux/pagemap.h> 17#include <linux/pagemap.h>
18#include <linux/smp_lock.h> 18#include <linux/ctype.h>
19#include "vnode.h"
20#include "volume.h"
21#include <rxrpc/call.h>
22#include "super.h"
23#include "internal.h" 19#include "internal.h"
24 20
25static struct dentry *afs_dir_lookup(struct inode *dir, struct dentry *dentry, 21static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
26 struct nameidata *nd); 22 struct nameidata *nd);
27static int afs_dir_open(struct inode *inode, struct file *file); 23static int afs_dir_open(struct inode *inode, struct file *file);
28static int afs_dir_readdir(struct file *file, void *dirent, filldir_t filldir); 24static int afs_readdir(struct file *file, void *dirent, filldir_t filldir);
29static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd); 25static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd);
30static int afs_d_delete(struct dentry *dentry); 26static int afs_d_delete(struct dentry *dentry);
31static int afs_dir_lookup_filldir(void *_cookie, const char *name, int nlen, 27static void afs_d_release(struct dentry *dentry);
28static int afs_lookup_filldir(void *_cookie, const char *name, int nlen,
32 loff_t fpos, u64 ino, unsigned dtype); 29 loff_t fpos, u64 ino, unsigned dtype);
30static int afs_create(struct inode *dir, struct dentry *dentry, int mode,
31 struct nameidata *nd);
32static int afs_mkdir(struct inode *dir, struct dentry *dentry, int mode);
33static int afs_rmdir(struct inode *dir, struct dentry *dentry);
34static int afs_unlink(struct inode *dir, struct dentry *dentry);
35static int afs_link(struct dentry *from, struct inode *dir,
36 struct dentry *dentry);
37static int afs_symlink(struct inode *dir, struct dentry *dentry,
38 const char *content);
39static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
40 struct inode *new_dir, struct dentry *new_dentry);
33 41
34const struct file_operations afs_dir_file_operations = { 42const struct file_operations afs_dir_file_operations = {
35 .open = afs_dir_open, 43 .open = afs_dir_open,
36 .readdir = afs_dir_readdir, 44 .release = afs_release,
45 .readdir = afs_readdir,
37}; 46};
38 47
39const struct inode_operations afs_dir_inode_operations = { 48const struct inode_operations afs_dir_inode_operations = {
40 .lookup = afs_dir_lookup, 49 .create = afs_create,
50 .lookup = afs_lookup,
51 .link = afs_link,
52 .unlink = afs_unlink,
53 .symlink = afs_symlink,
54 .mkdir = afs_mkdir,
55 .rmdir = afs_rmdir,
56 .rename = afs_rename,
57 .permission = afs_permission,
41 .getattr = afs_inode_getattr, 58 .getattr = afs_inode_getattr,
42#if 0 /* TODO */
43 .create = afs_dir_create,
44 .link = afs_dir_link,
45 .unlink = afs_dir_unlink,
46 .symlink = afs_dir_symlink,
47 .mkdir = afs_dir_mkdir,
48 .rmdir = afs_dir_rmdir,
49 .mknod = afs_dir_mknod,
50 .rename = afs_dir_rename,
51#endif
52}; 59};
53 60
54static struct dentry_operations afs_fs_dentry_operations = { 61static struct dentry_operations afs_fs_dentry_operations = {
55 .d_revalidate = afs_d_revalidate, 62 .d_revalidate = afs_d_revalidate,
56 .d_delete = afs_d_delete, 63 .d_delete = afs_d_delete,
64 .d_release = afs_d_release,
57}; 65};
58 66
59#define AFS_DIR_HASHTBL_SIZE 128 67#define AFS_DIR_HASHTBL_SIZE 128
@@ -105,14 +113,13 @@ struct afs_dir_page {
105 union afs_dir_block blocks[PAGE_SIZE / sizeof(union afs_dir_block)]; 113 union afs_dir_block blocks[PAGE_SIZE / sizeof(union afs_dir_block)];
106}; 114};
107 115
108struct afs_dir_lookup_cookie { 116struct afs_lookup_cookie {
109 struct afs_fid fid; 117 struct afs_fid fid;
110 const char *name; 118 const char *name;
111 size_t nlen; 119 size_t nlen;
112 int found; 120 int found;
113}; 121};
114 122
115/*****************************************************************************/
116/* 123/*
117 * check that a directory page is valid 124 * check that a directory page is valid
118 */ 125 */
@@ -128,9 +135,10 @@ static inline void afs_dir_check_page(struct inode *dir, struct page *page)
128 if (qty == 0) 135 if (qty == 0)
129 goto error; 136 goto error;
130 137
131 if (page->index==0 && qty!=ntohs(dbuf->blocks[0].pagehdr.npages)) { 138 if (page->index == 0 && qty != ntohs(dbuf->blocks[0].pagehdr.npages)) {
132 printk("kAFS: %s(%lu): wrong number of dir blocks %d!=%hu\n", 139 printk("kAFS: %s(%lu): wrong number of dir blocks %d!=%hu\n",
133 __FUNCTION__,dir->i_ino,qty,ntohs(dbuf->blocks[0].pagehdr.npages)); 140 __FUNCTION__, dir->i_ino, qty,
141 ntohs(dbuf->blocks[0].pagehdr.npages));
134 goto error; 142 goto error;
135 } 143 }
136#endif 144#endif
@@ -157,13 +165,11 @@ static inline void afs_dir_check_page(struct inode *dir, struct page *page)
157 SetPageChecked(page); 165 SetPageChecked(page);
158 return; 166 return;
159 167
160 error: 168error:
161 SetPageChecked(page); 169 SetPageChecked(page);
162 SetPageError(page); 170 SetPageError(page);
171}
163 172
164} /* end afs_dir_check_page() */
165
166/*****************************************************************************/
167/* 173/*
168 * discard a page cached in the pagecache 174 * discard a page cached in the pagecache
169 */ 175 */
@@ -171,20 +177,22 @@ static inline void afs_dir_put_page(struct page *page)
171{ 177{
172 kunmap(page); 178 kunmap(page);
173 page_cache_release(page); 179 page_cache_release(page);
180}
174 181
175} /* end afs_dir_put_page() */
176
177/*****************************************************************************/
178/* 182/*
179 * get a page into the pagecache 183 * get a page into the pagecache
180 */ 184 */
181static struct page *afs_dir_get_page(struct inode *dir, unsigned long index) 185static struct page *afs_dir_get_page(struct inode *dir, unsigned long index,
186 struct key *key)
182{ 187{
183 struct page *page; 188 struct page *page;
189 struct file file = {
190 .private_data = key,
191 };
184 192
185 _enter("{%lu},%lu", dir->i_ino, index); 193 _enter("{%lu},%lu", dir->i_ino, index);
186 194
187 page = read_mapping_page(dir->i_mapping, index, NULL); 195 page = read_mapping_page(dir->i_mapping, index, &file);
188 if (!IS_ERR(page)) { 196 if (!IS_ERR(page)) {
189 wait_on_page_locked(page); 197 wait_on_page_locked(page);
190 kmap(page); 198 kmap(page);
@@ -197,12 +205,12 @@ static struct page *afs_dir_get_page(struct inode *dir, unsigned long index)
197 } 205 }
198 return page; 206 return page;
199 207
200 fail: 208fail:
201 afs_dir_put_page(page); 209 afs_dir_put_page(page);
210 _leave(" = -EIO");
202 return ERR_PTR(-EIO); 211 return ERR_PTR(-EIO);
203} /* end afs_dir_get_page() */ 212}
204 213
205/*****************************************************************************/
206/* 214/*
207 * open an AFS directory file 215 * open an AFS directory file
208 */ 216 */
@@ -213,15 +221,12 @@ static int afs_dir_open(struct inode *inode, struct file *file)
213 BUILD_BUG_ON(sizeof(union afs_dir_block) != 2048); 221 BUILD_BUG_ON(sizeof(union afs_dir_block) != 2048);
214 BUILD_BUG_ON(sizeof(union afs_dirent) != 32); 222 BUILD_BUG_ON(sizeof(union afs_dirent) != 32);
215 223
216 if (AFS_FS_I(inode)->flags & AFS_VNODE_DELETED) 224 if (test_bit(AFS_VNODE_DELETED, &AFS_FS_I(inode)->flags))
217 return -ENOENT; 225 return -ENOENT;
218 226
219 _leave(" = 0"); 227 return afs_open(inode, file);
220 return 0; 228}
221
222} /* end afs_dir_open() */
223 229
224/*****************************************************************************/
225/* 230/*
226 * deal with one block in an AFS directory 231 * deal with one block in an AFS directory
227 */ 232 */
@@ -250,7 +255,7 @@ static int afs_dir_iterate_block(unsigned *fpos,
250 /* skip entries marked unused in the bitmap */ 255 /* skip entries marked unused in the bitmap */
251 if (!(block->pagehdr.bitmap[offset / 8] & 256 if (!(block->pagehdr.bitmap[offset / 8] &
252 (1 << (offset % 8)))) { 257 (1 << (offset % 8)))) {
253 _debug("ENT[%Zu.%u]: unused\n", 258 _debug("ENT[%Zu.%u]: unused",
254 blkoff / sizeof(union afs_dir_block), offset); 259 blkoff / sizeof(union afs_dir_block), offset);
255 if (offset >= curr) 260 if (offset >= curr)
256 *fpos = blkoff + 261 *fpos = blkoff +
@@ -264,7 +269,7 @@ static int afs_dir_iterate_block(unsigned *fpos,
264 sizeof(*block) - 269 sizeof(*block) -
265 offset * sizeof(union afs_dirent)); 270 offset * sizeof(union afs_dirent));
266 271
267 _debug("ENT[%Zu.%u]: %s %Zu \"%s\"\n", 272 _debug("ENT[%Zu.%u]: %s %Zu \"%s\"",
268 blkoff / sizeof(union afs_dir_block), offset, 273 blkoff / sizeof(union afs_dir_block), offset,
269 (offset < curr ? "skip" : "fill"), 274 (offset < curr ? "skip" : "fill"),
270 nlen, dire->u.name); 275 nlen, dire->u.name);
@@ -274,7 +279,7 @@ static int afs_dir_iterate_block(unsigned *fpos,
274 if (next >= AFS_DIRENT_PER_BLOCK) { 279 if (next >= AFS_DIRENT_PER_BLOCK) {
275 _debug("ENT[%Zu.%u]:" 280 _debug("ENT[%Zu.%u]:"
276 " %u travelled beyond end dir block" 281 " %u travelled beyond end dir block"
277 " (len %u/%Zu)\n", 282 " (len %u/%Zu)",
278 blkoff / sizeof(union afs_dir_block), 283 blkoff / sizeof(union afs_dir_block),
279 offset, next, tmp, nlen); 284 offset, next, tmp, nlen);
280 return -EIO; 285 return -EIO;
@@ -282,13 +287,13 @@ static int afs_dir_iterate_block(unsigned *fpos,
282 if (!(block->pagehdr.bitmap[next / 8] & 287 if (!(block->pagehdr.bitmap[next / 8] &
283 (1 << (next % 8)))) { 288 (1 << (next % 8)))) {
284 _debug("ENT[%Zu.%u]:" 289 _debug("ENT[%Zu.%u]:"
285 " %u unmarked extension (len %u/%Zu)\n", 290 " %u unmarked extension (len %u/%Zu)",
286 blkoff / sizeof(union afs_dir_block), 291 blkoff / sizeof(union afs_dir_block),
287 offset, next, tmp, nlen); 292 offset, next, tmp, nlen);
288 return -EIO; 293 return -EIO;
289 } 294 }
290 295
291 _debug("ENT[%Zu.%u]: ext %u/%Zu\n", 296 _debug("ENT[%Zu.%u]: ext %u/%Zu",
292 blkoff / sizeof(union afs_dir_block), 297 blkoff / sizeof(union afs_dir_block),
293 next, tmp, nlen); 298 next, tmp, nlen);
294 next++; 299 next++;
@@ -304,7 +309,7 @@ static int afs_dir_iterate_block(unsigned *fpos,
304 nlen, 309 nlen,
305 blkoff + offset * sizeof(union afs_dirent), 310 blkoff + offset * sizeof(union afs_dirent),
306 ntohl(dire->u.vnode), 311 ntohl(dire->u.vnode),
307 filldir == afs_dir_lookup_filldir ? 312 filldir == afs_lookup_filldir ?
308 ntohl(dire->u.unique) : DT_UNKNOWN); 313 ntohl(dire->u.unique) : DT_UNKNOWN);
309 if (ret < 0) { 314 if (ret < 0) {
310 _leave(" = 0 [full]"); 315 _leave(" = 0 [full]");
@@ -316,16 +321,15 @@ static int afs_dir_iterate_block(unsigned *fpos,
316 321
317 _leave(" = 1 [more]"); 322 _leave(" = 1 [more]");
318 return 1; 323 return 1;
319} /* end afs_dir_iterate_block() */ 324}
320 325
321/*****************************************************************************/
322/* 326/*
323 * read an AFS directory 327 * iterate through the data blob that lists the contents of an AFS directory
324 */ 328 */
325static int afs_dir_iterate(struct inode *dir, unsigned *fpos, void *cookie, 329static int afs_dir_iterate(struct inode *dir, unsigned *fpos, void *cookie,
326 filldir_t filldir) 330 filldir_t filldir, struct key *key)
327{ 331{
328 union afs_dir_block *dblock; 332 union afs_dir_block *dblock;
329 struct afs_dir_page *dbuf; 333 struct afs_dir_page *dbuf;
330 struct page *page; 334 struct page *page;
331 unsigned blkoff, limit; 335 unsigned blkoff, limit;
@@ -333,7 +337,7 @@ static int afs_dir_iterate(struct inode *dir, unsigned *fpos, void *cookie,
333 337
334 _enter("{%lu},%u,,", dir->i_ino, *fpos); 338 _enter("{%lu},%u,,", dir->i_ino, *fpos);
335 339
336 if (AFS_FS_I(dir)->flags & AFS_VNODE_DELETED) { 340 if (test_bit(AFS_VNODE_DELETED, &AFS_FS_I(dir)->flags)) {
337 _leave(" = -ESTALE"); 341 _leave(" = -ESTALE");
338 return -ESTALE; 342 return -ESTALE;
339 } 343 }
@@ -348,7 +352,7 @@ static int afs_dir_iterate(struct inode *dir, unsigned *fpos, void *cookie,
348 blkoff = *fpos & ~(sizeof(union afs_dir_block) - 1); 352 blkoff = *fpos & ~(sizeof(union afs_dir_block) - 1);
349 353
350 /* fetch the appropriate page from the directory */ 354 /* fetch the appropriate page from the directory */
351 page = afs_dir_get_page(dir, blkoff / PAGE_SIZE); 355 page = afs_dir_get_page(dir, blkoff / PAGE_SIZE, key);
352 if (IS_ERR(page)) { 356 if (IS_ERR(page)) {
353 ret = PTR_ERR(page); 357 ret = PTR_ERR(page);
354 break; 358 break;
@@ -377,43 +381,50 @@ static int afs_dir_iterate(struct inode *dir, unsigned *fpos, void *cookie,
377 ret = 0; 381 ret = 0;
378 } 382 }
379 383
380 out: 384out:
381 _leave(" = %d", ret); 385 _leave(" = %d", ret);
382 return ret; 386 return ret;
383} /* end afs_dir_iterate() */ 387}
384 388
385/*****************************************************************************/
386/* 389/*
387 * read an AFS directory 390 * read an AFS directory
388 */ 391 */
389static int afs_dir_readdir(struct file *file, void *cookie, filldir_t filldir) 392static int afs_readdir(struct file *file, void *cookie, filldir_t filldir)
390{ 393{
391 unsigned fpos; 394 unsigned fpos;
392 int ret; 395 int ret;
393 396
394 _enter("{%Ld,{%lu}}", file->f_pos, file->f_path.dentry->d_inode->i_ino); 397 _enter("{%Ld,{%lu}}",
398 file->f_pos, file->f_path.dentry->d_inode->i_ino);
399
400 ASSERT(file->private_data != NULL);
395 401
396 fpos = file->f_pos; 402 fpos = file->f_pos;
397 ret = afs_dir_iterate(file->f_path.dentry->d_inode, &fpos, cookie, filldir); 403 ret = afs_dir_iterate(file->f_path.dentry->d_inode, &fpos,
404 cookie, filldir, file->private_data);
398 file->f_pos = fpos; 405 file->f_pos = fpos;
399 406
400 _leave(" = %d", ret); 407 _leave(" = %d", ret);
401 return ret; 408 return ret;
402} /* end afs_dir_readdir() */ 409}
403 410
404/*****************************************************************************/
405/* 411/*
406 * search the directory for a name 412 * search the directory for a name
407 * - if afs_dir_iterate_block() spots this function, it'll pass the FID 413 * - if afs_dir_iterate_block() spots this function, it'll pass the FID
408 * uniquifier through dtype 414 * uniquifier through dtype
409 */ 415 */
410static int afs_dir_lookup_filldir(void *_cookie, const char *name, int nlen, 416static int afs_lookup_filldir(void *_cookie, const char *name, int nlen,
411 loff_t fpos, u64 ino, unsigned dtype) 417 loff_t fpos, u64 ino, unsigned dtype)
412{ 418{
413 struct afs_dir_lookup_cookie *cookie = _cookie; 419 struct afs_lookup_cookie *cookie = _cookie;
414 420
415 _enter("{%s,%Zu},%s,%u,,%lu,%u", 421 _enter("{%s,%Zu},%s,%u,,%llu,%u",
416 cookie->name, cookie->nlen, name, nlen, ino, dtype); 422 cookie->name, cookie->nlen, name, nlen,
423 (unsigned long long) ino, dtype);
424
425 /* insanity checks first */
426 BUILD_BUG_ON(sizeof(union afs_dir_block) != 2048);
427 BUILD_BUG_ON(sizeof(union afs_dirent) != 32);
417 428
418 if (cookie->nlen != nlen || memcmp(cookie->name, name, nlen) != 0) { 429 if (cookie->nlen != nlen || memcmp(cookie->name, name, nlen) != 0) {
419 _leave(" = 0 [no]"); 430 _leave(" = 0 [no]");
@@ -426,216 +437,254 @@ static int afs_dir_lookup_filldir(void *_cookie, const char *name, int nlen,
426 437
427 _leave(" = -1 [found]"); 438 _leave(" = -1 [found]");
428 return -1; 439 return -1;
429} /* end afs_dir_lookup_filldir() */ 440}
430 441
431/*****************************************************************************/
432/* 442/*
433 * look up an entry in a directory 443 * do a lookup in a directory
444 * - just returns the FID the dentry name maps to if found
434 */ 445 */
435static struct dentry *afs_dir_lookup(struct inode *dir, struct dentry *dentry, 446static int afs_do_lookup(struct inode *dir, struct dentry *dentry,
436 struct nameidata *nd) 447 struct afs_fid *fid, struct key *key)
437{ 448{
438 struct afs_dir_lookup_cookie cookie; 449 struct afs_lookup_cookie cookie;
439 struct afs_super_info *as; 450 struct afs_super_info *as;
451 unsigned fpos;
452 int ret;
453
454 _enter("{%lu},%p{%s},", dir->i_ino, dentry, dentry->d_name.name);
455
456 as = dir->i_sb->s_fs_info;
457
458 /* search the directory */
459 cookie.name = dentry->d_name.name;
460 cookie.nlen = dentry->d_name.len;
461 cookie.fid.vid = as->volume->vid;
462 cookie.found = 0;
463
464 fpos = 0;
465 ret = afs_dir_iterate(dir, &fpos, &cookie, afs_lookup_filldir,
466 key);
467 if (ret < 0) {
468 _leave(" = %d [iter]", ret);
469 return ret;
470 }
471
472 ret = -ENOENT;
473 if (!cookie.found) {
474 _leave(" = -ENOENT [not found]");
475 return -ENOENT;
476 }
477
478 *fid = cookie.fid;
479 _leave(" = 0 { vn=%u u=%u }", fid->vnode, fid->unique);
480 return 0;
481}
482
483/*
484 * look up an entry in a directory
485 */
486static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
487 struct nameidata *nd)
488{
440 struct afs_vnode *vnode; 489 struct afs_vnode *vnode;
490 struct afs_fid fid;
441 struct inode *inode; 491 struct inode *inode;
442 unsigned fpos; 492 struct key *key;
443 int ret; 493 int ret;
444 494
445 _enter("{%lu},%p{%s}", dir->i_ino, dentry, dentry->d_name.name); 495 vnode = AFS_FS_I(dir);
446 496
447 /* insanity checks first */ 497 _enter("{%x:%d},%p{%s},",
448 BUILD_BUG_ON(sizeof(union afs_dir_block) != 2048); 498 vnode->fid.vid, vnode->fid.vnode, dentry, dentry->d_name.name);
449 BUILD_BUG_ON(sizeof(union afs_dirent) != 32); 499
500 ASSERTCMP(dentry->d_inode, ==, NULL);
450 501
451 if (dentry->d_name.len > 255) { 502 if (dentry->d_name.len > 255) {
452 _leave(" = -ENAMETOOLONG"); 503 _leave(" = -ENAMETOOLONG");
453 return ERR_PTR(-ENAMETOOLONG); 504 return ERR_PTR(-ENAMETOOLONG);
454 } 505 }
455 506
456 vnode = AFS_FS_I(dir); 507 if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
457 if (vnode->flags & AFS_VNODE_DELETED) {
458 _leave(" = -ESTALE"); 508 _leave(" = -ESTALE");
459 return ERR_PTR(-ESTALE); 509 return ERR_PTR(-ESTALE);
460 } 510 }
461 511
462 as = dir->i_sb->s_fs_info; 512 key = afs_request_key(vnode->volume->cell);
463 513 if (IS_ERR(key)) {
464 /* search the directory */ 514 _leave(" = %ld [key]", PTR_ERR(key));
465 cookie.name = dentry->d_name.name; 515 return ERR_PTR(PTR_ERR(key));
466 cookie.nlen = dentry->d_name.len; 516 }
467 cookie.fid.vid = as->volume->vid;
468 cookie.found = 0;
469 517
470 fpos = 0; 518 ret = afs_validate(vnode, key);
471 ret = afs_dir_iterate(dir, &fpos, &cookie, afs_dir_lookup_filldir);
472 if (ret < 0) { 519 if (ret < 0) {
473 _leave(" = %d", ret); 520 key_put(key);
521 _leave(" = %d [val]", ret);
474 return ERR_PTR(ret); 522 return ERR_PTR(ret);
475 } 523 }
476 524
477 ret = -ENOENT; 525 ret = afs_do_lookup(dir, dentry, &fid, key);
478 if (!cookie.found) { 526 if (ret < 0) {
479 _leave(" = %d", ret); 527 key_put(key);
528 if (ret == -ENOENT) {
529 d_add(dentry, NULL);
530 _leave(" = NULL [negative]");
531 return NULL;
532 }
533 _leave(" = %d [do]", ret);
480 return ERR_PTR(ret); 534 return ERR_PTR(ret);
481 } 535 }
536 dentry->d_fsdata = (void *)(unsigned long) vnode->status.data_version;
482 537
483 /* instantiate the dentry */ 538 /* instantiate the dentry */
484 ret = afs_iget(dir->i_sb, &cookie.fid, &inode); 539 inode = afs_iget(dir->i_sb, key, &fid, NULL, NULL);
485 if (ret < 0) { 540 key_put(key);
486 _leave(" = %d", ret); 541 if (IS_ERR(inode)) {
487 return ERR_PTR(ret); 542 _leave(" = %ld", PTR_ERR(inode));
543 return ERR_PTR(PTR_ERR(inode));
488 } 544 }
489 545
490 dentry->d_op = &afs_fs_dentry_operations; 546 dentry->d_op = &afs_fs_dentry_operations;
491 dentry->d_fsdata = (void *) (unsigned long) vnode->status.version;
492 547
493 d_add(dentry, inode); 548 d_add(dentry, inode);
494 _leave(" = 0 { vn=%u u=%u } -> { ino=%lu v=%lu }", 549 _leave(" = 0 { vn=%u u=%u } -> { ino=%lu v=%lu }",
495 cookie.fid.vnode, 550 fid.vnode,
496 cookie.fid.unique, 551 fid.unique,
497 dentry->d_inode->i_ino, 552 dentry->d_inode->i_ino,
498 dentry->d_inode->i_version); 553 dentry->d_inode->i_version);
499 554
500 return NULL; 555 return NULL;
501} /* end afs_dir_lookup() */ 556}
502 557
503/*****************************************************************************/
504/* 558/*
505 * check that a dentry lookup hit has found a valid entry 559 * check that a dentry lookup hit has found a valid entry
506 * - NOTE! the hit can be a negative hit too, so we can't assume we have an 560 * - NOTE! the hit can be a negative hit too, so we can't assume we have an
507 * inode 561 * inode
508 * (derived from nfs_lookup_revalidate)
509 */ 562 */
510static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd) 563static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
511{ 564{
512 struct afs_dir_lookup_cookie cookie; 565 struct afs_vnode *vnode, *dir;
566 struct afs_fid fid;
513 struct dentry *parent; 567 struct dentry *parent;
514 struct inode *inode, *dir; 568 struct key *key;
515 unsigned fpos; 569 void *dir_version;
516 int ret; 570 int ret;
517 571
518 _enter("{sb=%p n=%s},", dentry->d_sb, dentry->d_name.name); 572 vnode = AFS_FS_I(dentry->d_inode);
519 573
520 /* lock down the parent dentry so we can peer at it */ 574 if (dentry->d_inode)
521 parent = dget_parent(dentry->d_parent); 575 _enter("{v={%x:%u} n=%s fl=%lx},",
576 vnode->fid.vid, vnode->fid.vnode, dentry->d_name.name,
577 vnode->flags);
578 else
579 _enter("{neg n=%s}", dentry->d_name.name);
522 580
523 dir = parent->d_inode; 581 key = afs_request_key(AFS_FS_S(dentry->d_sb)->volume->cell);
524 inode = dentry->d_inode; 582 if (IS_ERR(key))
583 key = NULL;
525 584
526 /* handle a negative dentry */ 585 /* lock down the parent dentry so we can peer at it */
527 if (!inode) 586 parent = dget_parent(dentry);
587 if (!parent->d_inode)
528 goto out_bad; 588 goto out_bad;
529 589
530 /* handle a bad inode */ 590 dir = AFS_FS_I(parent->d_inode);
531 if (is_bad_inode(inode)) {
532 printk("kAFS: afs_d_revalidate: %s/%s has bad inode\n",
533 dentry->d_parent->d_name.name, dentry->d_name.name);
534 goto out_bad;
535 }
536 591
537 /* force a full look up if the parent directory changed since last the 592 /* validate the parent directory */
538 * server was consulted 593 if (test_bit(AFS_VNODE_MODIFIED, &dir->flags))
539 * - otherwise this inode must still exist, even if the inode details 594 afs_validate(dir, key);
540 * themselves have changed
541 */
542 if (AFS_FS_I(dir)->flags & AFS_VNODE_CHANGED)
543 afs_vnode_fetch_status(AFS_FS_I(dir));
544 595
545 if (AFS_FS_I(dir)->flags & AFS_VNODE_DELETED) { 596 if (test_bit(AFS_VNODE_DELETED, &dir->flags)) {
546 _debug("%s: parent dir deleted", dentry->d_name.name); 597 _debug("%s: parent dir deleted", dentry->d_name.name);
547 goto out_bad; 598 goto out_bad;
548 } 599 }
549 600
550 if (AFS_FS_I(inode)->flags & AFS_VNODE_DELETED) { 601 dir_version = (void *) (unsigned long) dir->status.data_version;
551 _debug("%s: file already deleted", dentry->d_name.name); 602 if (dentry->d_fsdata == dir_version)
552 goto out_bad; 603 goto out_valid; /* the dir contents are unchanged */
553 }
554
555 if ((unsigned long) dentry->d_fsdata !=
556 (unsigned long) AFS_FS_I(dir)->status.version) {
557 _debug("%s: parent changed %lu -> %u",
558 dentry->d_name.name,
559 (unsigned long) dentry->d_fsdata,
560 (unsigned) AFS_FS_I(dir)->status.version);
561 604
562 /* search the directory for this vnode */ 605 _debug("dir modified");
563 cookie.name = dentry->d_name.name;
564 cookie.nlen = dentry->d_name.len;
565 cookie.fid.vid = AFS_FS_I(inode)->volume->vid;
566 cookie.found = 0;
567 606
568 fpos = 0; 607 /* search the directory for this vnode */
569 ret = afs_dir_iterate(dir, &fpos, &cookie, 608 ret = afs_do_lookup(&dir->vfs_inode, dentry, &fid, key);
570 afs_dir_lookup_filldir); 609 switch (ret) {
571 if (ret < 0) { 610 case 0:
572 _debug("failed to iterate dir %s: %d", 611 /* the filename maps to something */
573 parent->d_name.name, ret); 612 if (!dentry->d_inode)
613 goto out_bad;
614 if (is_bad_inode(dentry->d_inode)) {
615 printk("kAFS: afs_d_revalidate: %s/%s has bad inode\n",
616 parent->d_name.name, dentry->d_name.name);
574 goto out_bad; 617 goto out_bad;
575 }
576
577 if (!cookie.found) {
578 _debug("%s: dirent not found", dentry->d_name.name);
579 goto not_found;
580 } 618 }
581 619
582 /* if the vnode ID has changed, then the dirent points to a 620 /* if the vnode ID has changed, then the dirent points to a
583 * different file */ 621 * different file */
584 if (cookie.fid.vnode != AFS_FS_I(inode)->fid.vnode) { 622 if (fid.vnode != vnode->fid.vnode) {
585 _debug("%s: dirent changed", dentry->d_name.name); 623 _debug("%s: dirent changed [%u != %u]",
624 dentry->d_name.name, fid.vnode,
625 vnode->fid.vnode);
586 goto not_found; 626 goto not_found;
587 } 627 }
588 628
589 /* if the vnode ID uniqifier has changed, then the file has 629 /* if the vnode ID uniqifier has changed, then the file has
590 * been deleted */ 630 * been deleted and replaced, and the original vnode ID has
591 if (cookie.fid.unique != AFS_FS_I(inode)->fid.unique) { 631 * been reused */
632 if (fid.unique != vnode->fid.unique) {
592 _debug("%s: file deleted (uq %u -> %u I:%lu)", 633 _debug("%s: file deleted (uq %u -> %u I:%lu)",
593 dentry->d_name.name, 634 dentry->d_name.name, fid.unique,
594 cookie.fid.unique, 635 vnode->fid.unique, dentry->d_inode->i_version);
595 AFS_FS_I(inode)->fid.unique, 636 spin_lock(&vnode->lock);
596 inode->i_version); 637 set_bit(AFS_VNODE_DELETED, &vnode->flags);
597 spin_lock(&AFS_FS_I(inode)->lock); 638 spin_unlock(&vnode->lock);
598 AFS_FS_I(inode)->flags |= AFS_VNODE_DELETED; 639 goto not_found;
599 spin_unlock(&AFS_FS_I(inode)->lock);
600 invalidate_remote_inode(inode);
601 goto out_bad;
602 } 640 }
641 goto out_valid;
642
643 case -ENOENT:
644 /* the filename is unknown */
645 _debug("%s: dirent not found", dentry->d_name.name);
646 if (dentry->d_inode)
647 goto not_found;
648 goto out_valid;
603 649
604 dentry->d_fsdata = 650 default:
605 (void *) (unsigned long) AFS_FS_I(dir)->status.version; 651 _debug("failed to iterate dir %s: %d",
652 parent->d_name.name, ret);
653 goto out_bad;
606 } 654 }
607 655
608 out_valid: 656out_valid:
657 dentry->d_fsdata = dir_version;
658out_skip:
609 dput(parent); 659 dput(parent);
660 key_put(key);
610 _leave(" = 1 [valid]"); 661 _leave(" = 1 [valid]");
611 return 1; 662 return 1;
612 663
613 /* the dirent, if it exists, now points to a different vnode */ 664 /* the dirent, if it exists, now points to a different vnode */
614 not_found: 665not_found:
615 spin_lock(&dentry->d_lock); 666 spin_lock(&dentry->d_lock);
616 dentry->d_flags |= DCACHE_NFSFS_RENAMED; 667 dentry->d_flags |= DCACHE_NFSFS_RENAMED;
617 spin_unlock(&dentry->d_lock); 668 spin_unlock(&dentry->d_lock);
618 669
619 out_bad: 670out_bad:
620 if (inode) { 671 if (dentry->d_inode) {
621 /* don't unhash if we have submounts */ 672 /* don't unhash if we have submounts */
622 if (have_submounts(dentry)) 673 if (have_submounts(dentry))
623 goto out_valid; 674 goto out_skip;
624 } 675 }
625 676
626 shrink_dcache_parent(dentry);
627
628 _debug("dropping dentry %s/%s", 677 _debug("dropping dentry %s/%s",
629 dentry->d_parent->d_name.name, dentry->d_name.name); 678 parent->d_name.name, dentry->d_name.name);
679 shrink_dcache_parent(dentry);
630 d_drop(dentry); 680 d_drop(dentry);
631
632 dput(parent); 681 dput(parent);
682 key_put(key);
633 683
634 _leave(" = 0 [bad]"); 684 _leave(" = 0 [bad]");
635 return 0; 685 return 0;
636} /* end afs_d_revalidate() */ 686}
637 687
638/*****************************************************************************/
639/* 688/*
640 * allow the VFS to enquire as to whether a dentry should be unhashed (mustn't 689 * allow the VFS to enquire as to whether a dentry should be unhashed (mustn't
641 * sleep) 690 * sleep)
@@ -649,15 +698,444 @@ static int afs_d_delete(struct dentry *dentry)
649 if (dentry->d_flags & DCACHE_NFSFS_RENAMED) 698 if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
650 goto zap; 699 goto zap;
651 700
652 if (dentry->d_inode) { 701 if (dentry->d_inode &&
653 if (AFS_FS_I(dentry->d_inode)->flags & AFS_VNODE_DELETED) 702 test_bit(AFS_VNODE_DELETED, &AFS_FS_I(dentry->d_inode)->flags))
654 goto zap; 703 goto zap;
655 }
656 704
657 _leave(" = 0 [keep]"); 705 _leave(" = 0 [keep]");
658 return 0; 706 return 0;
659 707
660 zap: 708zap:
661 _leave(" = 1 [zap]"); 709 _leave(" = 1 [zap]");
662 return 1; 710 return 1;
663} /* end afs_d_delete() */ 711}
712
713/*
714 * handle dentry release
715 */
716static void afs_d_release(struct dentry *dentry)
717{
718 _enter("%s", dentry->d_name.name);
719}
720
721/*
722 * create a directory on an AFS filesystem
723 */
724static int afs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
725{
726 struct afs_file_status status;
727 struct afs_callback cb;
728 struct afs_server *server;
729 struct afs_vnode *dvnode, *vnode;
730 struct afs_fid fid;
731 struct inode *inode;
732 struct key *key;
733 int ret;
734
735 dvnode = AFS_FS_I(dir);
736
737 _enter("{%x:%d},{%s},%o",
738 dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name, mode);
739
740 ret = -ENAMETOOLONG;
741 if (dentry->d_name.len > 255)
742 goto error;
743
744 key = afs_request_key(dvnode->volume->cell);
745 if (IS_ERR(key)) {
746 ret = PTR_ERR(key);
747 goto error;
748 }
749
750 mode |= S_IFDIR;
751 ret = afs_vnode_create(dvnode, key, dentry->d_name.name,
752 mode, &fid, &status, &cb, &server);
753 if (ret < 0)
754 goto mkdir_error;
755
756 inode = afs_iget(dir->i_sb, key, &fid, &status, &cb);
757 if (IS_ERR(inode)) {
758 /* ENOMEM at a really inconvenient time - just abandon the new
759 * directory on the server */
760 ret = PTR_ERR(inode);
761 goto iget_error;
762 }
763
764 /* apply the status report we've got for the new vnode */
765 vnode = AFS_FS_I(inode);
766 spin_lock(&vnode->lock);
767 vnode->update_cnt++;
768 spin_unlock(&vnode->lock);
769 afs_vnode_finalise_status_update(vnode, server);
770 afs_put_server(server);
771
772 d_instantiate(dentry, inode);
773 if (d_unhashed(dentry)) {
774 _debug("not hashed");
775 d_rehash(dentry);
776 }
777 key_put(key);
778 _leave(" = 0");
779 return 0;
780
781iget_error:
782 afs_put_server(server);
783mkdir_error:
784 key_put(key);
785error:
786 d_drop(dentry);
787 _leave(" = %d", ret);
788 return ret;
789}
790
791/*
792 * remove a directory from an AFS filesystem
793 */
794static int afs_rmdir(struct inode *dir, struct dentry *dentry)
795{
796 struct afs_vnode *dvnode, *vnode;
797 struct key *key;
798 int ret;
799
800 dvnode = AFS_FS_I(dir);
801
802 _enter("{%x:%d},{%s}",
803 dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name);
804
805 ret = -ENAMETOOLONG;
806 if (dentry->d_name.len > 255)
807 goto error;
808
809 key = afs_request_key(dvnode->volume->cell);
810 if (IS_ERR(key)) {
811 ret = PTR_ERR(key);
812 goto error;
813 }
814
815 ret = afs_vnode_remove(dvnode, key, dentry->d_name.name, true);
816 if (ret < 0)
817 goto rmdir_error;
818
819 if (dentry->d_inode) {
820 vnode = AFS_FS_I(dentry->d_inode);
821 clear_nlink(&vnode->vfs_inode);
822 set_bit(AFS_VNODE_DELETED, &vnode->flags);
823 afs_discard_callback_on_delete(vnode);
824 }
825
826 key_put(key);
827 _leave(" = 0");
828 return 0;
829
830rmdir_error:
831 key_put(key);
832error:
833 _leave(" = %d", ret);
834 return ret;
835}
836
837/*
838 * remove a file from an AFS filesystem
839 */
840static int afs_unlink(struct inode *dir, struct dentry *dentry)
841{
842 struct afs_vnode *dvnode, *vnode;
843 struct key *key;
844 int ret;
845
846 dvnode = AFS_FS_I(dir);
847
848 _enter("{%x:%d},{%s}",
849 dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name);
850
851 ret = -ENAMETOOLONG;
852 if (dentry->d_name.len > 255)
853 goto error;
854
855 key = afs_request_key(dvnode->volume->cell);
856 if (IS_ERR(key)) {
857 ret = PTR_ERR(key);
858 goto error;
859 }
860
861 if (dentry->d_inode) {
862 vnode = AFS_FS_I(dentry->d_inode);
863
864 /* make sure we have a callback promise on the victim */
865 ret = afs_validate(vnode, key);
866 if (ret < 0)
867 goto error;
868 }
869
870 ret = afs_vnode_remove(dvnode, key, dentry->d_name.name, false);
871 if (ret < 0)
872 goto remove_error;
873
874 if (dentry->d_inode) {
875 /* if the file wasn't deleted due to excess hard links, the
876 * fileserver will break the callback promise on the file - if
877 * it had one - before it returns to us, and if it was deleted,
878 * it won't
879 *
880 * however, if we didn't have a callback promise outstanding,
881 * or it was outstanding on a different server, then it won't
882 * break it either...
883 */
884 vnode = AFS_FS_I(dentry->d_inode);
885 if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
886 _debug("AFS_VNODE_DELETED");
887 if (test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags))
888 _debug("AFS_VNODE_CB_BROKEN");
889 set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
890 ret = afs_validate(vnode, key);
891 _debug("nlink %d [val %d]", vnode->vfs_inode.i_nlink, ret);
892 }
893
894 key_put(key);
895 _leave(" = 0");
896 return 0;
897
898remove_error:
899 key_put(key);
900error:
901 _leave(" = %d", ret);
902 return ret;
903}
904
905/*
906 * create a regular file on an AFS filesystem
907 */
908static int afs_create(struct inode *dir, struct dentry *dentry, int mode,
909 struct nameidata *nd)
910{
911 struct afs_file_status status;
912 struct afs_callback cb;
913 struct afs_server *server;
914 struct afs_vnode *dvnode, *vnode;
915 struct afs_fid fid;
916 struct inode *inode;
917 struct key *key;
918 int ret;
919
920 dvnode = AFS_FS_I(dir);
921
922 _enter("{%x:%d},{%s},%o,",
923 dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name, mode);
924
925 ret = -ENAMETOOLONG;
926 if (dentry->d_name.len > 255)
927 goto error;
928
929 key = afs_request_key(dvnode->volume->cell);
930 if (IS_ERR(key)) {
931 ret = PTR_ERR(key);
932 goto error;
933 }
934
935 mode |= S_IFREG;
936 ret = afs_vnode_create(dvnode, key, dentry->d_name.name,
937 mode, &fid, &status, &cb, &server);
938 if (ret < 0)
939 goto create_error;
940
941 inode = afs_iget(dir->i_sb, key, &fid, &status, &cb);
942 if (IS_ERR(inode)) {
943 /* ENOMEM at a really inconvenient time - just abandon the new
944 * directory on the server */
945 ret = PTR_ERR(inode);
946 goto iget_error;
947 }
948
949 /* apply the status report we've got for the new vnode */
950 vnode = AFS_FS_I(inode);
951 spin_lock(&vnode->lock);
952 vnode->update_cnt++;
953 spin_unlock(&vnode->lock);
954 afs_vnode_finalise_status_update(vnode, server);
955 afs_put_server(server);
956
957 d_instantiate(dentry, inode);
958 if (d_unhashed(dentry)) {
959 _debug("not hashed");
960 d_rehash(dentry);
961 }
962 key_put(key);
963 _leave(" = 0");
964 return 0;
965
966iget_error:
967 afs_put_server(server);
968create_error:
969 key_put(key);
970error:
971 d_drop(dentry);
972 _leave(" = %d", ret);
973 return ret;
974}
975
976/*
977 * create a hard link between files in an AFS filesystem
978 */
979static int afs_link(struct dentry *from, struct inode *dir,
980 struct dentry *dentry)
981{
982 struct afs_vnode *dvnode, *vnode;
983 struct key *key;
984 int ret;
985
986 vnode = AFS_FS_I(from->d_inode);
987 dvnode = AFS_FS_I(dir);
988
989 _enter("{%x:%d},{%x:%d},{%s}",
990 vnode->fid.vid, vnode->fid.vnode,
991 dvnode->fid.vid, dvnode->fid.vnode,
992 dentry->d_name.name);
993
994 ret = -ENAMETOOLONG;
995 if (dentry->d_name.len > 255)
996 goto error;
997
998 key = afs_request_key(dvnode->volume->cell);
999 if (IS_ERR(key)) {
1000 ret = PTR_ERR(key);
1001 goto error;
1002 }
1003
1004 ret = afs_vnode_link(dvnode, vnode, key, dentry->d_name.name);
1005 if (ret < 0)
1006 goto link_error;
1007
1008 atomic_inc(&vnode->vfs_inode.i_count);
1009 d_instantiate(dentry, &vnode->vfs_inode);
1010 key_put(key);
1011 _leave(" = 0");
1012 return 0;
1013
1014link_error:
1015 key_put(key);
1016error:
1017 d_drop(dentry);
1018 _leave(" = %d", ret);
1019 return ret;
1020}
1021
1022/*
1023 * create a symlink in an AFS filesystem
1024 */
1025static int afs_symlink(struct inode *dir, struct dentry *dentry,
1026 const char *content)
1027{
1028 struct afs_file_status status;
1029 struct afs_server *server;
1030 struct afs_vnode *dvnode, *vnode;
1031 struct afs_fid fid;
1032 struct inode *inode;
1033 struct key *key;
1034 int ret;
1035
1036 dvnode = AFS_FS_I(dir);
1037
1038 _enter("{%x:%d},{%s},%s",
1039 dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name,
1040 content);
1041
1042 ret = -ENAMETOOLONG;
1043 if (dentry->d_name.len > 255)
1044 goto error;
1045
1046 ret = -EINVAL;
1047 if (strlen(content) > 1023)
1048 goto error;
1049
1050 key = afs_request_key(dvnode->volume->cell);
1051 if (IS_ERR(key)) {
1052 ret = PTR_ERR(key);
1053 goto error;
1054 }
1055
1056 ret = afs_vnode_symlink(dvnode, key, dentry->d_name.name, content,
1057 &fid, &status, &server);
1058 if (ret < 0)
1059 goto create_error;
1060
1061 inode = afs_iget(dir->i_sb, key, &fid, &status, NULL);
1062 if (IS_ERR(inode)) {
1063 /* ENOMEM at a really inconvenient time - just abandon the new
1064 * directory on the server */
1065 ret = PTR_ERR(inode);
1066 goto iget_error;
1067 }
1068
1069 /* apply the status report we've got for the new vnode */
1070 vnode = AFS_FS_I(inode);
1071 spin_lock(&vnode->lock);
1072 vnode->update_cnt++;
1073 spin_unlock(&vnode->lock);
1074 afs_vnode_finalise_status_update(vnode, server);
1075 afs_put_server(server);
1076
1077 d_instantiate(dentry, inode);
1078 if (d_unhashed(dentry)) {
1079 _debug("not hashed");
1080 d_rehash(dentry);
1081 }
1082 key_put(key);
1083 _leave(" = 0");
1084 return 0;
1085
1086iget_error:
1087 afs_put_server(server);
1088create_error:
1089 key_put(key);
1090error:
1091 d_drop(dentry);
1092 _leave(" = %d", ret);
1093 return ret;
1094}
1095
1096/*
1097 * rename a file in an AFS filesystem and/or move it between directories
1098 */
1099static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
1100 struct inode *new_dir, struct dentry *new_dentry)
1101{
1102 struct afs_vnode *orig_dvnode, *new_dvnode, *vnode;
1103 struct key *key;
1104 int ret;
1105
1106 vnode = AFS_FS_I(old_dentry->d_inode);
1107 orig_dvnode = AFS_FS_I(old_dir);
1108 new_dvnode = AFS_FS_I(new_dir);
1109
1110 _enter("{%x:%d},{%x:%d},{%x:%d},{%s}",
1111 orig_dvnode->fid.vid, orig_dvnode->fid.vnode,
1112 vnode->fid.vid, vnode->fid.vnode,
1113 new_dvnode->fid.vid, new_dvnode->fid.vnode,
1114 new_dentry->d_name.name);
1115
1116 ret = -ENAMETOOLONG;
1117 if (new_dentry->d_name.len > 255)
1118 goto error;
1119
1120 key = afs_request_key(orig_dvnode->volume->cell);
1121 if (IS_ERR(key)) {
1122 ret = PTR_ERR(key);
1123 goto error;
1124 }
1125
1126 ret = afs_vnode_rename(orig_dvnode, new_dvnode, key,
1127 old_dentry->d_name.name,
1128 new_dentry->d_name.name);
1129 if (ret < 0)
1130 goto rename_error;
1131 key_put(key);
1132 _leave(" = 0");
1133 return 0;
1134
1135rename_error:
1136 key_put(key);
1137error:
1138 d_drop(new_dentry);
1139 _leave(" = %d", ret);
1140 return ret;
1141}
diff --git a/fs/afs/errors.h b/fs/afs/errors.h
deleted file mode 100644
index 574d94ac8d05..000000000000
--- a/fs/afs/errors.h
+++ /dev/null
@@ -1,34 +0,0 @@
1/* errors.h: AFS abort/error codes
2 *
3 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#ifndef _LINUX_AFS_ERRORS_H
13#define _LINUX_AFS_ERRORS_H
14
15#include "types.h"
16
17/* file server abort codes */
18typedef enum {
19 VSALVAGE = 101, /* volume needs salvaging */
20 VNOVNODE = 102, /* no such file/dir (vnode) */
21 VNOVOL = 103, /* no such volume or volume unavailable */
22 VVOLEXISTS = 104, /* volume name already exists */
23 VNOSERVICE = 105, /* volume not currently in service */
24 VOFFLINE = 106, /* volume is currently offline (more info available [VVL-spec]) */
25 VONLINE = 107, /* volume is already online */
26 VDISKFULL = 108, /* disk partition is full */
27 VOVERQUOTA = 109, /* volume's maximum quota exceeded */
28 VBUSY = 110, /* volume is temporarily unavailable */
29 VMOVED = 111, /* volume moved to new server - ask this FS where */
30} afs_rxfs_abort_t;
31
32extern int afs_abort_to_error(int abortcode);
33
34#endif /* _LINUX_AFS_ERRORS_H */
diff --git a/fs/afs/file.c b/fs/afs/file.c
index b17634541f67..ae256498f4f7 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -1,6 +1,6 @@
1/* file.c: AFS filesystem file handling 1/* AFS filesystem file handling
2 * 2 *
3 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved. 3 * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com) 4 * Written by David Howells (dhowells@redhat.com)
5 * 5 *
6 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
@@ -15,22 +15,25 @@
15#include <linux/slab.h> 15#include <linux/slab.h>
16#include <linux/fs.h> 16#include <linux/fs.h>
17#include <linux/pagemap.h> 17#include <linux/pagemap.h>
18#include "volume.h"
19#include "vnode.h"
20#include <rxrpc/call.h>
21#include "internal.h" 18#include "internal.h"
22 19
23#if 0
24static int afs_file_open(struct inode *inode, struct file *file);
25static int afs_file_release(struct inode *inode, struct file *file);
26#endif
27
28static int afs_file_readpage(struct file *file, struct page *page); 20static int afs_file_readpage(struct file *file, struct page *page);
29static void afs_file_invalidatepage(struct page *page, unsigned long offset); 21static void afs_file_invalidatepage(struct page *page, unsigned long offset);
30static int afs_file_releasepage(struct page *page, gfp_t gfp_flags); 22static int afs_file_releasepage(struct page *page, gfp_t gfp_flags);
31 23
24const struct file_operations afs_file_operations = {
25 .open = afs_open,
26 .release = afs_release,
27 .llseek = generic_file_llseek,
28 .read = do_sync_read,
29 .aio_read = generic_file_aio_read,
30 .mmap = generic_file_readonly_mmap,
31 .sendfile = generic_file_sendfile,
32};
33
32const struct inode_operations afs_file_inode_operations = { 34const struct inode_operations afs_file_inode_operations = {
33 .getattr = afs_inode_getattr, 35 .getattr = afs_inode_getattr,
36 .permission = afs_permission,
34}; 37};
35 38
36const struct address_space_operations afs_fs_aops = { 39const struct address_space_operations afs_fs_aops = {
@@ -40,7 +43,48 @@ const struct address_space_operations afs_fs_aops = {
40 .invalidatepage = afs_file_invalidatepage, 43 .invalidatepage = afs_file_invalidatepage,
41}; 44};
42 45
43/*****************************************************************************/ 46/*
47 * open an AFS file or directory and attach a key to it
48 */
49int afs_open(struct inode *inode, struct file *file)
50{
51 struct afs_vnode *vnode = AFS_FS_I(inode);
52 struct key *key;
53 int ret;
54
55 _enter("{%x:%x},", vnode->fid.vid, vnode->fid.vnode);
56
57 key = afs_request_key(vnode->volume->cell);
58 if (IS_ERR(key)) {
59 _leave(" = %ld [key]", PTR_ERR(key));
60 return PTR_ERR(key);
61 }
62
63 ret = afs_validate(vnode, key);
64 if (ret < 0) {
65 _leave(" = %d [val]", ret);
66 return ret;
67 }
68
69 file->private_data = key;
70 _leave(" = 0");
71 return 0;
72}
73
74/*
75 * release an AFS file or directory and discard its key
76 */
77int afs_release(struct inode *inode, struct file *file)
78{
79 struct afs_vnode *vnode = AFS_FS_I(inode);
80
81 _enter("{%x:%x},", vnode->fid.vid, vnode->fid.vnode);
82
83 key_put(file->private_data);
84 _leave(" = 0");
85 return 0;
86}
87
44/* 88/*
45 * deal with notification that a page was read from the cache 89 * deal with notification that a page was read from the cache
46 */ 90 */
@@ -58,10 +102,9 @@ static void afs_file_readpage_read_complete(void *cookie_data,
58 SetPageUptodate(page); 102 SetPageUptodate(page);
59 unlock_page(page); 103 unlock_page(page);
60 104
61} /* end afs_file_readpage_read_complete() */ 105}
62#endif 106#endif
63 107
64/*****************************************************************************/
65/* 108/*
66 * deal with notification that a page was written to the cache 109 * deal with notification that a page was written to the cache
67 */ 110 */
@@ -74,41 +117,38 @@ static void afs_file_readpage_write_complete(void *cookie_data,
74 _enter("%p,%p,%p,%d", cookie_data, page, data, error); 117 _enter("%p,%p,%p,%d", cookie_data, page, data, error);
75 118
76 unlock_page(page); 119 unlock_page(page);
77 120}
78} /* end afs_file_readpage_write_complete() */
79#endif 121#endif
80 122
81/*****************************************************************************/
82/* 123/*
83 * AFS read page from file (or symlink) 124 * AFS read page from file (or symlink)
84 */ 125 */
85static int afs_file_readpage(struct file *file, struct page *page) 126static int afs_file_readpage(struct file *file, struct page *page)
86{ 127{
87 struct afs_rxfs_fetch_descriptor desc;
88#ifdef AFS_CACHING_SUPPORT
89 struct cachefs_page *pageio;
90#endif
91 struct afs_vnode *vnode; 128 struct afs_vnode *vnode;
92 struct inode *inode; 129 struct inode *inode;
130 struct key *key;
131 size_t len;
132 off_t offset;
93 int ret; 133 int ret;
94 134
95 inode = page->mapping->host; 135 inode = page->mapping->host;
96 136
97 _enter("{%lu},{%lu}", inode->i_ino, page->index); 137 ASSERT(file != NULL);
138 key = file->private_data;
139 ASSERT(key != NULL);
140
141 _enter("{%x},{%lu},{%lu}", key_serial(key), inode->i_ino, page->index);
98 142
99 vnode = AFS_FS_I(inode); 143 vnode = AFS_FS_I(inode);
100 144
101 BUG_ON(!PageLocked(page)); 145 BUG_ON(!PageLocked(page));
102 146
103 ret = -ESTALE; 147 ret = -ESTALE;
104 if (vnode->flags & AFS_VNODE_DELETED) 148 if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
105 goto error; 149 goto error;
106 150
107#ifdef AFS_CACHING_SUPPORT 151#ifdef AFS_CACHING_SUPPORT
108 ret = cachefs_page_get_private(page, &pageio, GFP_NOIO);
109 if (ret < 0)
110 goto error;
111
112 /* is it cached? */ 152 /* is it cached? */
113 ret = cachefs_read_or_alloc_page(vnode->cache, 153 ret = cachefs_read_or_alloc_page(vnode->cache,
114 page, 154 page,
@@ -132,26 +172,19 @@ static int afs_file_readpage(struct file *file, struct page *page)
132 case -ENOBUFS: 172 case -ENOBUFS:
133 case -ENODATA: 173 case -ENODATA:
134 default: 174 default:
135 desc.fid = vnode->fid; 175 offset = page->index << PAGE_CACHE_SHIFT;
136 desc.offset = page->index << PAGE_CACHE_SHIFT; 176 len = min_t(size_t, i_size_read(inode) - offset, PAGE_SIZE);
137 desc.size = min((size_t) (inode->i_size - desc.offset),
138 (size_t) PAGE_SIZE);
139 desc.buffer = kmap(page);
140
141 clear_page(desc.buffer);
142 177
143 /* read the contents of the file from the server into the 178 /* read the contents of the file from the server into the
144 * page */ 179 * page */
145 ret = afs_vnode_fetch_data(vnode, &desc); 180 ret = afs_vnode_fetch_data(vnode, key, offset, len, page);
146 kunmap(page);
147 if (ret < 0) { 181 if (ret < 0) {
148 if (ret==-ENOENT) { 182 if (ret == -ENOENT) {
149 _debug("got NOENT from server" 183 _debug("got NOENT from server"
150 " - marking file deleted and stale"); 184 " - marking file deleted and stale");
151 vnode->flags |= AFS_VNODE_DELETED; 185 set_bit(AFS_VNODE_DELETED, &vnode->flags);
152 ret = -ESTALE; 186 ret = -ESTALE;
153 } 187 }
154
155#ifdef AFS_CACHING_SUPPORT 188#ifdef AFS_CACHING_SUPPORT
156 cachefs_uncache_page(vnode->cache, page); 189 cachefs_uncache_page(vnode->cache, page);
157#endif 190#endif
@@ -178,16 +211,13 @@ static int afs_file_readpage(struct file *file, struct page *page)
178 _leave(" = 0"); 211 _leave(" = 0");
179 return 0; 212 return 0;
180 213
181 error: 214error:
182 SetPageError(page); 215 SetPageError(page);
183 unlock_page(page); 216 unlock_page(page);
184
185 _leave(" = %d", ret); 217 _leave(" = %d", ret);
186 return ret; 218 return ret;
219}
187 220
188} /* end afs_file_readpage() */
189
190/*****************************************************************************/
191/* 221/*
192 * get a page cookie for the specified page 222 * get a page cookie for the specified page
193 */ 223 */
@@ -202,10 +232,9 @@ int afs_cache_get_page_cookie(struct page *page,
202 232
203 _leave(" = %d", ret); 233 _leave(" = %d", ret);
204 return ret; 234 return ret;
205} /* end afs_cache_get_page_cookie() */ 235}
206#endif 236#endif
207 237
208/*****************************************************************************/
209/* 238/*
210 * invalidate part or all of a page 239 * invalidate part or all of a page
211 */ 240 */
@@ -240,9 +269,8 @@ static void afs_file_invalidatepage(struct page *page, unsigned long offset)
240 } 269 }
241 270
242 _leave(" = %d", ret); 271 _leave(" = %d", ret);
243} /* end afs_file_invalidatepage() */ 272}
244 273
245/*****************************************************************************/
246/* 274/*
247 * release a page and cleanup its private data 275 * release a page and cleanup its private data
248 */ 276 */
@@ -267,4 +295,4 @@ static int afs_file_releasepage(struct page *page, gfp_t gfp_flags)
267 295
268 _leave(" = 0"); 296 _leave(" = 0");
269 return 0; 297 return 0;
270} /* end afs_file_releasepage() */ 298}
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 61bc371532ab..2393d2a08d79 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -1,6 +1,6 @@
1/* fsclient.c: AFS File Server client stubs 1/* AFS File Server client stubs
2 * 2 *
3 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved. 3 * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com) 4 * Written by David Howells (dhowells@redhat.com)
5 * 5 *
6 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
@@ -11,827 +11,927 @@
11 11
12#include <linux/init.h> 12#include <linux/init.h>
13#include <linux/sched.h> 13#include <linux/sched.h>
14#include <rxrpc/rxrpc.h> 14#include <linux/circ_buf.h>
15#include <rxrpc/transport.h>
16#include <rxrpc/connection.h>
17#include <rxrpc/call.h>
18#include "fsclient.h"
19#include "cmservice.h"
20#include "vnode.h"
21#include "server.h"
22#include "errors.h"
23#include "internal.h" 15#include "internal.h"
16#include "afs_fs.h"
24 17
25#define FSFETCHSTATUS 132 /* AFS Fetch file status */
26#define FSFETCHDATA 130 /* AFS Fetch file data */
27#define FSGIVEUPCALLBACKS 147 /* AFS Discard callback promises */
28#define FSGETVOLUMEINFO 148 /* AFS Get root volume information */
29#define FSGETROOTVOLUME 151 /* AFS Get root volume name */
30#define FSLOOKUP 161 /* AFS lookup file in directory */
31
32/*****************************************************************************/
33/* 18/*
34 * map afs abort codes to/from Linux error codes 19 * decode an AFSFid block
35 * - called with call->lock held
36 */ 20 */
37static void afs_rxfs_aemap(struct rxrpc_call *call) 21static void xdr_decode_AFSFid(const __be32 **_bp, struct afs_fid *fid)
38{ 22{
39 switch (call->app_err_state) { 23 const __be32 *bp = *_bp;
40 case RXRPC_ESTATE_LOCAL_ABORT: 24
41 call->app_abort_code = -call->app_errno; 25 fid->vid = ntohl(*bp++);
42 break; 26 fid->vnode = ntohl(*bp++);
43 case RXRPC_ESTATE_PEER_ABORT: 27 fid->unique = ntohl(*bp++);
44 call->app_errno = afs_abort_to_error(call->app_abort_code); 28 *_bp = bp;
45 break; 29}
46 default:
47 break;
48 }
49} /* end afs_rxfs_aemap() */
50 30
51/*****************************************************************************/
52/* 31/*
53 * get the root volume name from a fileserver 32 * decode an AFSFetchStatus block
54 * - this operation doesn't seem to work correctly in OpenAFS server 1.2.2
55 */ 33 */
56#if 0 34static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
57int afs_rxfs_get_root_volume(struct afs_server *server, 35 struct afs_file_status *status,
58 char *buf, size_t *buflen) 36 struct afs_vnode *vnode)
59{ 37{
60 struct rxrpc_connection *conn; 38 const __be32 *bp = *_bp;
61 struct rxrpc_call *call; 39 umode_t mode;
62 struct kvec piov[2]; 40 u64 data_version, size;
63 size_t sent; 41 u32 changed = 0; /* becomes non-zero if ctime-type changes seen */
64 int ret; 42
65 u32 param[1]; 43#define EXTRACT(DST) \
44 do { \
45 u32 x = ntohl(*bp++); \
46 changed |= DST - x; \
47 DST = x; \
48 } while (0)
49
50 status->if_version = ntohl(*bp++);
51 EXTRACT(status->type);
52 EXTRACT(status->nlink);
53 size = ntohl(*bp++);
54 data_version = ntohl(*bp++);
55 EXTRACT(status->author);
56 EXTRACT(status->owner);
57 EXTRACT(status->caller_access); /* call ticket dependent */
58 EXTRACT(status->anon_access);
59 EXTRACT(status->mode);
60 EXTRACT(status->parent.vnode);
61 EXTRACT(status->parent.unique);
62 bp++; /* seg size */
63 status->mtime_client = ntohl(*bp++);
64 status->mtime_server = ntohl(*bp++);
65 EXTRACT(status->group);
66 bp++; /* sync counter */
67 data_version |= (u64) ntohl(*bp++) << 32;
68 bp++; /* lock count */
69 size |= (u64) ntohl(*bp++) << 32;
70 bp++; /* spare 4 */
71 *_bp = bp;
72
73 if (size != status->size) {
74 status->size = size;
75 changed |= true;
76 }
77 status->mode &= S_IALLUGO;
78
79 _debug("vnode time %lx, %lx",
80 status->mtime_client, status->mtime_server);
81
82 if (vnode) {
83 status->parent.vid = vnode->fid.vid;
84 if (changed && !test_bit(AFS_VNODE_UNSET, &vnode->flags)) {
85 _debug("vnode changed");
86 i_size_write(&vnode->vfs_inode, size);
87 vnode->vfs_inode.i_uid = status->owner;
88 vnode->vfs_inode.i_gid = status->group;
89 vnode->vfs_inode.i_version = vnode->fid.unique;
90 vnode->vfs_inode.i_nlink = status->nlink;
91
92 mode = vnode->vfs_inode.i_mode;
93 mode &= ~S_IALLUGO;
94 mode |= status->mode;
95 barrier();
96 vnode->vfs_inode.i_mode = mode;
97 }
66 98
67 DECLARE_WAITQUEUE(myself, current); 99 vnode->vfs_inode.i_ctime.tv_sec = status->mtime_server;
100 vnode->vfs_inode.i_mtime = vnode->vfs_inode.i_ctime;
101 vnode->vfs_inode.i_atime = vnode->vfs_inode.i_ctime;
102 }
68 103
69 kenter("%p,%p,%u",server, buf, *buflen); 104 if (status->data_version != data_version) {
105 status->data_version = data_version;
106 if (vnode && !test_bit(AFS_VNODE_UNSET, &vnode->flags)) {
107 _debug("vnode modified %llx on {%x:%u}",
108 (unsigned long long) data_version,
109 vnode->fid.vid, vnode->fid.vnode);
110 set_bit(AFS_VNODE_MODIFIED, &vnode->flags);
111 set_bit(AFS_VNODE_ZAP_DATA, &vnode->flags);
112 }
113 }
114}
70 115
71 /* get hold of the fileserver connection */ 116/*
72 ret = afs_server_get_fsconn(server, &conn); 117 * decode an AFSCallBack block
73 if (ret < 0) 118 */
74 goto out; 119static void xdr_decode_AFSCallBack(const __be32 **_bp, struct afs_vnode *vnode)
120{
121 const __be32 *bp = *_bp;
75 122
76 /* create a call through that connection */ 123 vnode->cb_version = ntohl(*bp++);
77 ret = rxrpc_create_call(conn, NULL, NULL, afs_rxfs_aemap, &call); 124 vnode->cb_expiry = ntohl(*bp++);
78 if (ret < 0) { 125 vnode->cb_type = ntohl(*bp++);
79 printk("kAFS: Unable to create call: %d\n", ret); 126 vnode->cb_expires = vnode->cb_expiry + get_seconds();
80 goto out_put_conn; 127 *_bp = bp;
81 } 128}
82 call->app_opcode = FSGETROOTVOLUME;
83 129
84 /* we want to get event notifications from the call */ 130static void xdr_decode_AFSCallBack_raw(const __be32 **_bp,
85 add_wait_queue(&call->waitq, &myself); 131 struct afs_callback *cb)
132{
133 const __be32 *bp = *_bp;
86 134
87 /* marshall the parameters */ 135 cb->version = ntohl(*bp++);
88 param[0] = htonl(FSGETROOTVOLUME); 136 cb->expiry = ntohl(*bp++);
89 137 cb->type = ntohl(*bp++);
90 piov[0].iov_len = sizeof(param); 138 *_bp = bp;
91 piov[0].iov_base = param; 139}
92
93 /* send the parameters to the server */
94 ret = rxrpc_call_write_data(call, 1, piov, RXRPC_LAST_PACKET, GFP_NOFS,
95 0, &sent);
96 if (ret < 0)
97 goto abort;
98
99 /* wait for the reply to completely arrive */
100 for (;;) {
101 set_current_state(TASK_INTERRUPTIBLE);
102 if (call->app_call_state != RXRPC_CSTATE_CLNT_RCV_REPLY ||
103 signal_pending(current))
104 break;
105 schedule();
106 }
107 set_current_state(TASK_RUNNING);
108 140
109 ret = -EINTR; 141/*
110 if (signal_pending(current)) 142 * decode an AFSVolSync block
111 goto abort; 143 */
144static void xdr_decode_AFSVolSync(const __be32 **_bp,
145 struct afs_volsync *volsync)
146{
147 const __be32 *bp = *_bp;
112 148
113 switch (call->app_call_state) { 149 volsync->creation = ntohl(*bp++);
114 case RXRPC_CSTATE_ERROR: 150 bp++; /* spare2 */
115 ret = call->app_errno; 151 bp++; /* spare3 */
116 kdebug("Got Error: %d", ret); 152 bp++; /* spare4 */
117 goto out_unwait; 153 bp++; /* spare5 */
154 bp++; /* spare6 */
155 *_bp = bp;
156}
118 157
119 case RXRPC_CSTATE_CLNT_GOT_REPLY: 158/*
120 /* read the reply */ 159 * deliver reply data to an FS.FetchStatus
121 kdebug("Got Reply: qty=%d", call->app_ready_qty); 160 */
161static int afs_deliver_fs_fetch_status(struct afs_call *call,
162 struct sk_buff *skb, bool last)
163{
164 struct afs_vnode *vnode = call->reply;
165 const __be32 *bp;
122 166
123 ret = -EBADMSG; 167 _enter(",,%u", last);
124 if (call->app_ready_qty <= 4)
125 goto abort;
126 168
127 ret = rxrpc_call_read_data(call, NULL, call->app_ready_qty, 0); 169 afs_transfer_reply(call, skb);
128 if (ret < 0) 170 if (!last)
129 goto abort; 171 return 0;
130 172
131#if 0 173 if (call->reply_size != call->reply_max)
132 /* unmarshall the reply */ 174 return -EBADMSG;
133 bp = buffer;
134 for (loop = 0; loop < 65; loop++)
135 entry->name[loop] = ntohl(*bp++);
136 entry->name[64] = 0;
137 175
138 entry->type = ntohl(*bp++); 176 /* unmarshall the reply once we've received all of it */
139 entry->num_servers = ntohl(*bp++); 177 bp = call->buffer;
178 xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode);
179 xdr_decode_AFSCallBack(&bp, vnode);
180 if (call->reply2)
181 xdr_decode_AFSVolSync(&bp, call->reply2);
140 182
141 for (loop = 0; loop < 8; loop++) 183 _leave(" = 0 [done]");
142 entry->servers[loop].addr.s_addr = *bp++; 184 return 0;
185}
143 186
144 for (loop = 0; loop < 8; loop++) 187/*
145 entry->servers[loop].partition = ntohl(*bp++); 188 * FS.FetchStatus operation type
189 */
190static const struct afs_call_type afs_RXFSFetchStatus = {
191 .name = "FS.FetchStatus",
192 .deliver = afs_deliver_fs_fetch_status,
193 .abort_to_error = afs_abort_to_error,
194 .destructor = afs_flat_call_destructor,
195};
146 196
147 for (loop = 0; loop < 8; loop++) 197/*
148 entry->servers[loop].flags = ntohl(*bp++); 198 * fetch the status information for a file
199 */
200int afs_fs_fetch_file_status(struct afs_server *server,
201 struct key *key,
202 struct afs_vnode *vnode,
203 struct afs_volsync *volsync,
204 const struct afs_wait_mode *wait_mode)
205{
206 struct afs_call *call;
207 __be32 *bp;
149 208
150 for (loop = 0; loop < 3; loop++) 209 _enter(",%x,{%x:%d},,",
151 entry->volume_ids[loop] = ntohl(*bp++); 210 key_serial(key), vnode->fid.vid, vnode->fid.vnode);
152 211
153 entry->clone_id = ntohl(*bp++); 212 call = afs_alloc_flat_call(&afs_RXFSFetchStatus, 16, (21 + 3 + 6) * 4);
154 entry->flags = ntohl(*bp); 213 if (!call)
155#endif 214 return -ENOMEM;
156 215
157 /* success */ 216 call->key = key;
158 ret = 0; 217 call->reply = vnode;
159 goto out_unwait; 218 call->reply2 = volsync;
219 call->service_id = FS_SERVICE;
220 call->port = htons(AFS_FS_PORT);
160 221
161 default: 222 /* marshall the parameters */
162 BUG(); 223 bp = call->request;
163 } 224 bp[0] = htonl(FSFETCHSTATUS);
225 bp[1] = htonl(vnode->fid.vid);
226 bp[2] = htonl(vnode->fid.vnode);
227 bp[3] = htonl(vnode->fid.unique);
228
229 return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode);
230}
164 231
165 abort:
166 set_current_state(TASK_UNINTERRUPTIBLE);
167 rxrpc_call_abort(call, ret);
168 schedule();
169 out_unwait:
170 set_current_state(TASK_RUNNING);
171 remove_wait_queue(&call->waitq, &myself);
172 rxrpc_put_call(call);
173 out_put_conn:
174 afs_server_release_fsconn(server, conn);
175 out:
176 kleave("");
177 return ret;
178} /* end afs_rxfs_get_root_volume() */
179#endif
180
181/*****************************************************************************/
182/* 232/*
183 * get information about a volume 233 * deliver reply data to an FS.FetchData
184 */ 234 */
185#if 0 235static int afs_deliver_fs_fetch_data(struct afs_call *call,
186int afs_rxfs_get_volume_info(struct afs_server *server, 236 struct sk_buff *skb, bool last)
187 const char *name,
188 struct afs_volume_info *vinfo)
189{ 237{
190 struct rxrpc_connection *conn; 238 struct afs_vnode *vnode = call->reply;
191 struct rxrpc_call *call; 239 const __be32 *bp;
192 struct kvec piov[3]; 240 struct page *page;
193 size_t sent; 241 void *buffer;
194 int ret; 242 int ret;
195 u32 param[2], *bp, zero;
196 243
197 DECLARE_WAITQUEUE(myself, current); 244 _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
245
246 switch (call->unmarshall) {
247 case 0:
248 call->offset = 0;
249 call->unmarshall++;
250
251 /* extract the returned data length */
252 case 1:
253 _debug("extract data length");
254 ret = afs_extract_data(call, skb, last, &call->tmp, 4);
255 switch (ret) {
256 case 0: break;
257 case -EAGAIN: return 0;
258 default: return ret;
259 }
198 260
199 _enter("%p,%s,%p", server, name, vinfo); 261 call->count = ntohl(call->tmp);
262 _debug("DATA length: %u", call->count);
263 if (call->count > PAGE_SIZE)
264 return -EBADMSG;
265 call->offset = 0;
266 call->unmarshall++;
267
268 if (call->count < PAGE_SIZE) {
269 buffer = kmap_atomic(call->reply3, KM_USER0);
270 memset(buffer + PAGE_SIZE - call->count, 0,
271 call->count);
272 kunmap_atomic(buffer, KM_USER0);
273 }
200 274
201 /* get hold of the fileserver connection */ 275 /* extract the returned data */
202 ret = afs_server_get_fsconn(server, &conn); 276 case 2:
203 if (ret < 0) 277 _debug("extract data");
204 goto out; 278 page = call->reply3;
279 buffer = kmap_atomic(page, KM_USER0);
280 ret = afs_extract_data(call, skb, last, buffer, call->count);
281 kunmap_atomic(buffer, KM_USER0);
282 switch (ret) {
283 case 0: break;
284 case -EAGAIN: return 0;
285 default: return ret;
286 }
205 287
206 /* create a call through that connection */ 288 call->offset = 0;
207 ret = rxrpc_create_call(conn, NULL, NULL, afs_rxfs_aemap, &call); 289 call->unmarshall++;
208 if (ret < 0) { 290
209 printk("kAFS: Unable to create call: %d\n", ret); 291 /* extract the metadata */
210 goto out_put_conn; 292 case 3:
211 } 293 ret = afs_extract_data(call, skb, last, call->buffer,
212 call->app_opcode = FSGETVOLUMEINFO; 294 (21 + 3 + 6) * 4);
295 switch (ret) {
296 case 0: break;
297 case -EAGAIN: return 0;
298 default: return ret;
299 }
213 300
214 /* we want to get event notifications from the call */ 301 bp = call->buffer;
215 add_wait_queue(&call->waitq, &myself); 302 xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode);
303 xdr_decode_AFSCallBack(&bp, vnode);
304 if (call->reply2)
305 xdr_decode_AFSVolSync(&bp, call->reply2);
216 306
217 /* marshall the parameters */ 307 call->offset = 0;
218 piov[1].iov_len = strlen(name); 308 call->unmarshall++;
219 piov[1].iov_base = (char *) name; 309
220 310 case 4:
221 zero = 0; 311 _debug("trailer");
222 piov[2].iov_len = (4 - (piov[1].iov_len & 3)) & 3; 312 if (skb->len != 0)
223 piov[2].iov_base = &zero; 313 return -EBADMSG;
224 314 break;
225 param[0] = htonl(FSGETVOLUMEINFO);
226 param[1] = htonl(piov[1].iov_len);
227
228 piov[0].iov_len = sizeof(param);
229 piov[0].iov_base = param;
230
231 /* send the parameters to the server */
232 ret = rxrpc_call_write_data(call, 3, piov, RXRPC_LAST_PACKET, GFP_NOFS,
233 0, &sent);
234 if (ret < 0)
235 goto abort;
236
237 /* wait for the reply to completely arrive */
238 bp = rxrpc_call_alloc_scratch(call, 64);
239
240 ret = rxrpc_call_read_data(call, bp, 64,
241 RXRPC_CALL_READ_BLOCK |
242 RXRPC_CALL_READ_ALL);
243 if (ret < 0) {
244 if (ret == -ECONNABORTED) {
245 ret = call->app_errno;
246 goto out_unwait;
247 }
248 goto abort;
249 } 315 }
250 316
251 /* unmarshall the reply */ 317 if (!last)
252 vinfo->vid = ntohl(*bp++); 318 return 0;
253 vinfo->type = ntohl(*bp++); 319
254 320 _leave(" = 0 [done]");
255 vinfo->type_vids[0] = ntohl(*bp++); 321 return 0;
256 vinfo->type_vids[1] = ntohl(*bp++); 322}
257 vinfo->type_vids[2] = ntohl(*bp++); 323
258 vinfo->type_vids[3] = ntohl(*bp++);
259 vinfo->type_vids[4] = ntohl(*bp++);
260
261 vinfo->nservers = ntohl(*bp++);
262 vinfo->servers[0].addr.s_addr = *bp++;
263 vinfo->servers[1].addr.s_addr = *bp++;
264 vinfo->servers[2].addr.s_addr = *bp++;
265 vinfo->servers[3].addr.s_addr = *bp++;
266 vinfo->servers[4].addr.s_addr = *bp++;
267 vinfo->servers[5].addr.s_addr = *bp++;
268 vinfo->servers[6].addr.s_addr = *bp++;
269 vinfo->servers[7].addr.s_addr = *bp++;
270
271 ret = -EBADMSG;
272 if (vinfo->nservers > 8)
273 goto abort;
274
275 /* success */
276 ret = 0;
277
278 out_unwait:
279 set_current_state(TASK_RUNNING);
280 remove_wait_queue(&call->waitq, &myself);
281 rxrpc_put_call(call);
282 out_put_conn:
283 afs_server_release_fsconn(server, conn);
284 out:
285 _leave("");
286 return ret;
287
288 abort:
289 set_current_state(TASK_UNINTERRUPTIBLE);
290 rxrpc_call_abort(call, ret);
291 schedule();
292 goto out_unwait;
293
294} /* end afs_rxfs_get_volume_info() */
295#endif
296
297/*****************************************************************************/
298/* 324/*
299 * fetch the status information for a file 325 * FS.FetchData operation type
326 */
327static const struct afs_call_type afs_RXFSFetchData = {
328 .name = "FS.FetchData",
329 .deliver = afs_deliver_fs_fetch_data,
330 .abort_to_error = afs_abort_to_error,
331 .destructor = afs_flat_call_destructor,
332};
333
334/*
335 * fetch data from a file
300 */ 336 */
301int afs_rxfs_fetch_file_status(struct afs_server *server, 337int afs_fs_fetch_data(struct afs_server *server,
302 struct afs_vnode *vnode, 338 struct key *key,
303 struct afs_volsync *volsync) 339 struct afs_vnode *vnode,
340 off_t offset, size_t length,
341 struct page *buffer,
342 const struct afs_wait_mode *wait_mode)
304{ 343{
305 struct afs_server_callslot callslot; 344 struct afs_call *call;
306 struct rxrpc_call *call;
307 struct kvec piov[1];
308 size_t sent;
309 int ret;
310 __be32 *bp; 345 __be32 *bp;
311 346
312 DECLARE_WAITQUEUE(myself, current); 347 _enter("");
313 348
314 _enter("%p,{%u,%u,%u}", 349 call = afs_alloc_flat_call(&afs_RXFSFetchData, 24, (21 + 3 + 6) * 4);
315 server, vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique); 350 if (!call)
351 return -ENOMEM;
316 352
317 /* get hold of the fileserver connection */ 353 call->key = key;
318 ret = afs_server_request_callslot(server, &callslot); 354 call->reply = vnode;
319 if (ret < 0) 355 call->reply2 = NULL; /* volsync */
320 goto out; 356 call->reply3 = buffer;
321 357 call->service_id = FS_SERVICE;
322 /* create a call through that connection */ 358 call->port = htons(AFS_FS_PORT);
323 ret = rxrpc_create_call(callslot.conn, NULL, NULL, afs_rxfs_aemap,
324 &call);
325 if (ret < 0) {
326 printk("kAFS: Unable to create call: %d\n", ret);
327 goto out_put_conn;
328 }
329 call->app_opcode = FSFETCHSTATUS;
330
331 /* we want to get event notifications from the call */
332 add_wait_queue(&call->waitq, &myself);
333 359
334 /* marshall the parameters */ 360 /* marshall the parameters */
335 bp = rxrpc_call_alloc_scratch(call, 16); 361 bp = call->request;
336 bp[0] = htonl(FSFETCHSTATUS); 362 bp[0] = htonl(FSFETCHDATA);
337 bp[1] = htonl(vnode->fid.vid); 363 bp[1] = htonl(vnode->fid.vid);
338 bp[2] = htonl(vnode->fid.vnode); 364 bp[2] = htonl(vnode->fid.vnode);
339 bp[3] = htonl(vnode->fid.unique); 365 bp[3] = htonl(vnode->fid.unique);
366 bp[4] = htonl(offset);
367 bp[5] = htonl(length);
340 368
341 piov[0].iov_len = 16; 369 return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode);
342 piov[0].iov_base = bp; 370}
343
344 /* send the parameters to the server */
345 ret = rxrpc_call_write_data(call, 1, piov, RXRPC_LAST_PACKET, GFP_NOFS,
346 0, &sent);
347 if (ret < 0)
348 goto abort;
349
350 /* wait for the reply to completely arrive */
351 bp = rxrpc_call_alloc_scratch(call, 120);
352
353 ret = rxrpc_call_read_data(call, bp, 120,
354 RXRPC_CALL_READ_BLOCK |
355 RXRPC_CALL_READ_ALL);
356 if (ret < 0) {
357 if (ret == -ECONNABORTED) {
358 ret = call->app_errno;
359 goto out_unwait;
360 }
361 goto abort;
362 }
363 371
364 /* unmarshall the reply */ 372/*
365 vnode->status.if_version = ntohl(*bp++); 373 * deliver reply data to an FS.GiveUpCallBacks
366 vnode->status.type = ntohl(*bp++); 374 */
367 vnode->status.nlink = ntohl(*bp++); 375static int afs_deliver_fs_give_up_callbacks(struct afs_call *call,
368 vnode->status.size = ntohl(*bp++); 376 struct sk_buff *skb, bool last)
369 vnode->status.version = ntohl(*bp++); 377{
370 vnode->status.author = ntohl(*bp++); 378 _enter(",{%u},%d", skb->len, last);
371 vnode->status.owner = ntohl(*bp++);
372 vnode->status.caller_access = ntohl(*bp++);
373 vnode->status.anon_access = ntohl(*bp++);
374 vnode->status.mode = ntohl(*bp++);
375 vnode->status.parent.vid = vnode->fid.vid;
376 vnode->status.parent.vnode = ntohl(*bp++);
377 vnode->status.parent.unique = ntohl(*bp++);
378 bp++; /* seg size */
379 vnode->status.mtime_client = ntohl(*bp++);
380 vnode->status.mtime_server = ntohl(*bp++);
381 bp++; /* group */
382 bp++; /* sync counter */
383 vnode->status.version |= ((unsigned long long) ntohl(*bp++)) << 32;
384 bp++; /* spare2 */
385 bp++; /* spare3 */
386 bp++; /* spare4 */
387 379
388 vnode->cb_version = ntohl(*bp++); 380 if (skb->len > 0)
389 vnode->cb_expiry = ntohl(*bp++); 381 return -EBADMSG; /* shouldn't be any reply data */
390 vnode->cb_type = ntohl(*bp++); 382 return 0;
391 383}
392 if (volsync) {
393 volsync->creation = ntohl(*bp++);
394 bp++; /* spare2 */
395 bp++; /* spare3 */
396 bp++; /* spare4 */
397 bp++; /* spare5 */
398 bp++; /* spare6 */
399 }
400 384
401 /* success */
402 ret = 0;
403
404 out_unwait:
405 set_current_state(TASK_RUNNING);
406 remove_wait_queue(&call->waitq, &myself);
407 rxrpc_put_call(call);
408 out_put_conn:
409 afs_server_release_callslot(server, &callslot);
410 out:
411 _leave("");
412 return ret;
413
414 abort:
415 set_current_state(TASK_UNINTERRUPTIBLE);
416 rxrpc_call_abort(call, ret);
417 schedule();
418 goto out_unwait;
419} /* end afs_rxfs_fetch_file_status() */
420
421/*****************************************************************************/
422/* 385/*
423 * fetch the contents of a file or directory 386 * FS.GiveUpCallBacks operation type
424 */ 387 */
425int afs_rxfs_fetch_file_data(struct afs_server *server, 388static const struct afs_call_type afs_RXFSGiveUpCallBacks = {
426 struct afs_vnode *vnode, 389 .name = "FS.GiveUpCallBacks",
427 struct afs_rxfs_fetch_descriptor *desc, 390 .deliver = afs_deliver_fs_give_up_callbacks,
428 struct afs_volsync *volsync) 391 .abort_to_error = afs_abort_to_error,
392 .destructor = afs_flat_call_destructor,
393};
394
395/*
396 * give up a set of callbacks
397 * - the callbacks are held in the server->cb_break ring
398 */
399int afs_fs_give_up_callbacks(struct afs_server *server,
400 const struct afs_wait_mode *wait_mode)
429{ 401{
430 struct afs_server_callslot callslot; 402 struct afs_call *call;
431 struct rxrpc_call *call; 403 size_t ncallbacks;
432 struct kvec piov[1]; 404 __be32 *bp, *tp;
433 size_t sent; 405 int loop;
434 int ret;
435 __be32 *bp;
436 406
437 DECLARE_WAITQUEUE(myself, current); 407 ncallbacks = CIRC_CNT(server->cb_break_head, server->cb_break_tail,
438 408 ARRAY_SIZE(server->cb_break));
439 _enter("%p,{fid={%u,%u,%u},sz=%Zu,of=%lu}", 409
440 server, 410 _enter("{%zu},", ncallbacks);
441 desc->fid.vid, 411
442 desc->fid.vnode, 412 if (ncallbacks == 0)
443 desc->fid.unique, 413 return 0;
444 desc->size, 414 if (ncallbacks > AFSCBMAX)
445 desc->offset); 415 ncallbacks = AFSCBMAX;
446 416
447 /* get hold of the fileserver connection */ 417 _debug("break %zu callbacks", ncallbacks);
448 ret = afs_server_request_callslot(server, &callslot);
449 if (ret < 0)
450 goto out;
451
452 /* create a call through that connection */
453 ret = rxrpc_create_call(callslot.conn, NULL, NULL, afs_rxfs_aemap, &call);
454 if (ret < 0) {
455 printk("kAFS: Unable to create call: %d\n", ret);
456 goto out_put_conn;
457 }
458 call->app_opcode = FSFETCHDATA;
459 418
460 /* we want to get event notifications from the call */ 419 call = afs_alloc_flat_call(&afs_RXFSGiveUpCallBacks,
461 add_wait_queue(&call->waitq, &myself); 420 12 + ncallbacks * 6 * 4, 0);
421 if (!call)
422 return -ENOMEM;
423
424 call->service_id = FS_SERVICE;
425 call->port = htons(AFS_FS_PORT);
462 426
463 /* marshall the parameters */ 427 /* marshall the parameters */
464 bp = rxrpc_call_alloc_scratch(call, 24); 428 bp = call->request;
465 bp[0] = htonl(FSFETCHDATA); 429 tp = bp + 2 + ncallbacks * 3;
466 bp[1] = htonl(desc->fid.vid); 430 *bp++ = htonl(FSGIVEUPCALLBACKS);
467 bp[2] = htonl(desc->fid.vnode); 431 *bp++ = htonl(ncallbacks);
468 bp[3] = htonl(desc->fid.unique); 432 *tp++ = htonl(ncallbacks);
469 bp[4] = htonl(desc->offset); 433
470 bp[5] = htonl(desc->size); 434 atomic_sub(ncallbacks, &server->cb_break_n);
471 435 for (loop = ncallbacks; loop > 0; loop--) {
472 piov[0].iov_len = 24; 436 struct afs_callback *cb =
473 piov[0].iov_base = bp; 437 &server->cb_break[server->cb_break_tail];
474 438
475 /* send the parameters to the server */ 439 *bp++ = htonl(cb->fid.vid);
476 ret = rxrpc_call_write_data(call, 1, piov, RXRPC_LAST_PACKET, GFP_NOFS, 440 *bp++ = htonl(cb->fid.vnode);
477 0, &sent); 441 *bp++ = htonl(cb->fid.unique);
478 if (ret < 0) 442 *tp++ = htonl(cb->version);
479 goto abort; 443 *tp++ = htonl(cb->expiry);
480 444 *tp++ = htonl(cb->type);
481 /* wait for the data count to arrive */ 445 smp_mb();
482 ret = rxrpc_call_read_data(call, bp, 4, RXRPC_CALL_READ_BLOCK); 446 server->cb_break_tail =
483 if (ret < 0) 447 (server->cb_break_tail + 1) &
484 goto read_failed; 448 (ARRAY_SIZE(server->cb_break) - 1);
485
486 desc->actual = ntohl(bp[0]);
487 if (desc->actual != desc->size) {
488 ret = -EBADMSG;
489 goto abort;
490 } 449 }
491 450
492 /* call the app to read the actual data */ 451 ASSERT(ncallbacks > 0);
493 rxrpc_call_reset_scratch(call); 452 wake_up_nr(&server->cb_break_waitq, ncallbacks);
494
495 ret = rxrpc_call_read_data(call, desc->buffer, desc->actual,
496 RXRPC_CALL_READ_BLOCK);
497 if (ret < 0)
498 goto read_failed;
499
500 /* wait for the rest of the reply to completely arrive */
501 rxrpc_call_reset_scratch(call);
502 bp = rxrpc_call_alloc_scratch(call, 120);
503
504 ret = rxrpc_call_read_data(call, bp, 120,
505 RXRPC_CALL_READ_BLOCK |
506 RXRPC_CALL_READ_ALL);
507 if (ret < 0)
508 goto read_failed;
509
510 /* unmarshall the reply */
511 vnode->status.if_version = ntohl(*bp++);
512 vnode->status.type = ntohl(*bp++);
513 vnode->status.nlink = ntohl(*bp++);
514 vnode->status.size = ntohl(*bp++);
515 vnode->status.version = ntohl(*bp++);
516 vnode->status.author = ntohl(*bp++);
517 vnode->status.owner = ntohl(*bp++);
518 vnode->status.caller_access = ntohl(*bp++);
519 vnode->status.anon_access = ntohl(*bp++);
520 vnode->status.mode = ntohl(*bp++);
521 vnode->status.parent.vid = desc->fid.vid;
522 vnode->status.parent.vnode = ntohl(*bp++);
523 vnode->status.parent.unique = ntohl(*bp++);
524 bp++; /* seg size */
525 vnode->status.mtime_client = ntohl(*bp++);
526 vnode->status.mtime_server = ntohl(*bp++);
527 bp++; /* group */
528 bp++; /* sync counter */
529 vnode->status.version |= ((unsigned long long) ntohl(*bp++)) << 32;
530 bp++; /* spare2 */
531 bp++; /* spare3 */
532 bp++; /* spare4 */
533 453
534 vnode->cb_version = ntohl(*bp++); 454 return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode);
535 vnode->cb_expiry = ntohl(*bp++); 455}
536 vnode->cb_type = ntohl(*bp++);
537
538 if (volsync) {
539 volsync->creation = ntohl(*bp++);
540 bp++; /* spare2 */
541 bp++; /* spare3 */
542 bp++; /* spare4 */
543 bp++; /* spare5 */
544 bp++; /* spare6 */
545 }
546 456
547 /* success */ 457/*
548 ret = 0; 458 * deliver reply data to an FS.CreateFile or an FS.MakeDir
549 459 */
550 out_unwait: 460static int afs_deliver_fs_create_vnode(struct afs_call *call,
551 set_current_state(TASK_RUNNING); 461 struct sk_buff *skb, bool last)
552 remove_wait_queue(&call->waitq,&myself); 462{
553 rxrpc_put_call(call); 463 struct afs_vnode *vnode = call->reply;
554 out_put_conn: 464 const __be32 *bp;
555 afs_server_release_callslot(server, &callslot); 465
556 out: 466 _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
557 _leave(" = %d", ret);
558 return ret;
559
560 read_failed:
561 if (ret == -ECONNABORTED) {
562 ret = call->app_errno;
563 goto out_unwait;
564 }
565 467
566 abort: 468 afs_transfer_reply(call, skb);
567 set_current_state(TASK_UNINTERRUPTIBLE); 469 if (!last)
568 rxrpc_call_abort(call, ret); 470 return 0;
569 schedule();
570 goto out_unwait;
571 471
572} /* end afs_rxfs_fetch_file_data() */ 472 if (call->reply_size != call->reply_max)
473 return -EBADMSG;
474
475 /* unmarshall the reply once we've received all of it */
476 bp = call->buffer;
477 xdr_decode_AFSFid(&bp, call->reply2);
478 xdr_decode_AFSFetchStatus(&bp, call->reply3, NULL);
479 xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode);
480 xdr_decode_AFSCallBack_raw(&bp, call->reply4);
481 /* xdr_decode_AFSVolSync(&bp, call->replyX); */
482
483 _leave(" = 0 [done]");
484 return 0;
485}
486
487/*
488 * FS.CreateFile and FS.MakeDir operation type
489 */
490static const struct afs_call_type afs_RXFSCreateXXXX = {
491 .name = "FS.CreateXXXX",
492 .deliver = afs_deliver_fs_create_vnode,
493 .abort_to_error = afs_abort_to_error,
494 .destructor = afs_flat_call_destructor,
495};
573 496
574/*****************************************************************************/
575/* 497/*
576 * ask the AFS fileserver to discard a callback request on a file 498 * create a file or make a directory
577 */ 499 */
578int afs_rxfs_give_up_callback(struct afs_server *server, 500int afs_fs_create(struct afs_server *server,
579 struct afs_vnode *vnode) 501 struct key *key,
502 struct afs_vnode *vnode,
503 const char *name,
504 umode_t mode,
505 struct afs_fid *newfid,
506 struct afs_file_status *newstatus,
507 struct afs_callback *newcb,
508 const struct afs_wait_mode *wait_mode)
580{ 509{
581 struct afs_server_callslot callslot; 510 struct afs_call *call;
582 struct rxrpc_call *call; 511 size_t namesz, reqsz, padsz;
583 struct kvec piov[1];
584 size_t sent;
585 int ret;
586 __be32 *bp; 512 __be32 *bp;
587 513
588 DECLARE_WAITQUEUE(myself, current); 514 _enter("");
589 515
590 _enter("%p,{%u,%u,%u}", 516 namesz = strlen(name);
591 server, vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique); 517 padsz = (4 - (namesz & 3)) & 3;
518 reqsz = (5 * 4) + namesz + padsz + (6 * 4);
592 519
593 /* get hold of the fileserver connection */ 520 call = afs_alloc_flat_call(&afs_RXFSCreateXXXX, reqsz,
594 ret = afs_server_request_callslot(server, &callslot); 521 (3 + 21 + 21 + 3 + 6) * 4);
595 if (ret < 0) 522 if (!call)
596 goto out; 523 return -ENOMEM;
597 524
598 /* create a call through that connection */ 525 call->key = key;
599 ret = rxrpc_create_call(callslot.conn, NULL, NULL, afs_rxfs_aemap, &call); 526 call->reply = vnode;
600 if (ret < 0) { 527 call->reply2 = newfid;
601 printk("kAFS: Unable to create call: %d\n", ret); 528 call->reply3 = newstatus;
602 goto out_put_conn; 529 call->reply4 = newcb;
530 call->service_id = FS_SERVICE;
531 call->port = htons(AFS_FS_PORT);
532
533 /* marshall the parameters */
534 bp = call->request;
535 *bp++ = htonl(S_ISDIR(mode) ? FSMAKEDIR : FSCREATEFILE);
536 *bp++ = htonl(vnode->fid.vid);
537 *bp++ = htonl(vnode->fid.vnode);
538 *bp++ = htonl(vnode->fid.unique);
539 *bp++ = htonl(namesz);
540 memcpy(bp, name, namesz);
541 bp = (void *) bp + namesz;
542 if (padsz > 0) {
543 memset(bp, 0, padsz);
544 bp = (void *) bp + padsz;
603 } 545 }
604 call->app_opcode = FSGIVEUPCALLBACKS; 546 *bp++ = htonl(AFS_SET_MODE);
547 *bp++ = 0; /* mtime */
548 *bp++ = 0; /* owner */
549 *bp++ = 0; /* group */
550 *bp++ = htonl(mode & S_IALLUGO); /* unix mode */
551 *bp++ = 0; /* segment size */
605 552
606 /* we want to get event notifications from the call */ 553 return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode);
607 add_wait_queue(&call->waitq, &myself); 554}
608 555
609 /* marshall the parameters */ 556/*
610 bp = rxrpc_call_alloc_scratch(call, (1 + 4 + 4) * 4); 557 * deliver reply data to an FS.RemoveFile or FS.RemoveDir
558 */
559static int afs_deliver_fs_remove(struct afs_call *call,
560 struct sk_buff *skb, bool last)
561{
562 struct afs_vnode *vnode = call->reply;
563 const __be32 *bp;
611 564
612 piov[0].iov_len = (1 + 4 + 4) * 4; 565 _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
613 piov[0].iov_base = bp;
614 566
615 *bp++ = htonl(FSGIVEUPCALLBACKS); 567 afs_transfer_reply(call, skb);
616 *bp++ = htonl(1); 568 if (!last)
569 return 0;
570
571 if (call->reply_size != call->reply_max)
572 return -EBADMSG;
573
574 /* unmarshall the reply once we've received all of it */
575 bp = call->buffer;
576 xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode);
577 /* xdr_decode_AFSVolSync(&bp, call->replyX); */
578
579 _leave(" = 0 [done]");
580 return 0;
581}
582
583/*
584 * FS.RemoveDir/FS.RemoveFile operation type
585 */
586static const struct afs_call_type afs_RXFSRemoveXXXX = {
587 .name = "FS.RemoveXXXX",
588 .deliver = afs_deliver_fs_remove,
589 .abort_to_error = afs_abort_to_error,
590 .destructor = afs_flat_call_destructor,
591};
592
593/*
594 * remove a file or directory
595 */
596int afs_fs_remove(struct afs_server *server,
597 struct key *key,
598 struct afs_vnode *vnode,
599 const char *name,
600 bool isdir,
601 const struct afs_wait_mode *wait_mode)
602{
603 struct afs_call *call;
604 size_t namesz, reqsz, padsz;
605 __be32 *bp;
606
607 _enter("");
608
609 namesz = strlen(name);
610 padsz = (4 - (namesz & 3)) & 3;
611 reqsz = (5 * 4) + namesz + padsz;
612
613 call = afs_alloc_flat_call(&afs_RXFSRemoveXXXX, reqsz, (21 + 6) * 4);
614 if (!call)
615 return -ENOMEM;
616
617 call->key = key;
618 call->reply = vnode;
619 call->service_id = FS_SERVICE;
620 call->port = htons(AFS_FS_PORT);
621
622 /* marshall the parameters */
623 bp = call->request;
624 *bp++ = htonl(isdir ? FSREMOVEDIR : FSREMOVEFILE);
617 *bp++ = htonl(vnode->fid.vid); 625 *bp++ = htonl(vnode->fid.vid);
618 *bp++ = htonl(vnode->fid.vnode); 626 *bp++ = htonl(vnode->fid.vnode);
619 *bp++ = htonl(vnode->fid.unique); 627 *bp++ = htonl(vnode->fid.unique);
620 *bp++ = htonl(1); 628 *bp++ = htonl(namesz);
621 *bp++ = htonl(vnode->cb_version); 629 memcpy(bp, name, namesz);
622 *bp++ = htonl(vnode->cb_expiry); 630 bp = (void *) bp + namesz;
623 *bp++ = htonl(vnode->cb_type); 631 if (padsz > 0) {
624 632 memset(bp, 0, padsz);
625 /* send the parameters to the server */ 633 bp = (void *) bp + padsz;
626 ret = rxrpc_call_write_data(call, 1, piov, RXRPC_LAST_PACKET, GFP_NOFS,
627 0, &sent);
628 if (ret < 0)
629 goto abort;
630
631 /* wait for the reply to completely arrive */
632 for (;;) {
633 set_current_state(TASK_INTERRUPTIBLE);
634 if (call->app_call_state != RXRPC_CSTATE_CLNT_RCV_REPLY ||
635 signal_pending(current))
636 break;
637 schedule();
638 } 634 }
639 set_current_state(TASK_RUNNING);
640 635
641 ret = -EINTR; 636 return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode);
642 if (signal_pending(current)) 637}
643 goto abort;
644 638
645 switch (call->app_call_state) { 639/*
646 case RXRPC_CSTATE_ERROR: 640 * deliver reply data to an FS.Link
647 ret = call->app_errno; 641 */
648 goto out_unwait; 642static int afs_deliver_fs_link(struct afs_call *call,
643 struct sk_buff *skb, bool last)
644{
645 struct afs_vnode *dvnode = call->reply, *vnode = call->reply2;
646 const __be32 *bp;
649 647
650 case RXRPC_CSTATE_CLNT_GOT_REPLY: 648 _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
651 ret = 0;
652 goto out_unwait;
653 649
654 default: 650 afs_transfer_reply(call, skb);
655 BUG(); 651 if (!last)
656 } 652 return 0;
653
654 if (call->reply_size != call->reply_max)
655 return -EBADMSG;
656
657 /* unmarshall the reply once we've received all of it */
658 bp = call->buffer;
659 xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode);
660 xdr_decode_AFSFetchStatus(&bp, &dvnode->status, dvnode);
661 /* xdr_decode_AFSVolSync(&bp, call->replyX); */
662
663 _leave(" = 0 [done]");
664 return 0;
665}
666
667/*
668 * FS.Link operation type
669 */
670static const struct afs_call_type afs_RXFSLink = {
671 .name = "FS.Link",
672 .deliver = afs_deliver_fs_link,
673 .abort_to_error = afs_abort_to_error,
674 .destructor = afs_flat_call_destructor,
675};
657 676
658 out_unwait:
659 set_current_state(TASK_RUNNING);
660 remove_wait_queue(&call->waitq, &myself);
661 rxrpc_put_call(call);
662 out_put_conn:
663 afs_server_release_callslot(server, &callslot);
664 out:
665 _leave("");
666 return ret;
667
668 abort:
669 set_current_state(TASK_UNINTERRUPTIBLE);
670 rxrpc_call_abort(call, ret);
671 schedule();
672 goto out_unwait;
673} /* end afs_rxfs_give_up_callback() */
674
675/*****************************************************************************/
676/* 677/*
677 * look a filename up in a directory 678 * make a hard link
678 * - this operation doesn't seem to work correctly in OpenAFS server 1.2.2
679 */ 679 */
680#if 0 680int afs_fs_link(struct afs_server *server,
681int afs_rxfs_lookup(struct afs_server *server, 681 struct key *key,
682 struct afs_vnode *dir, 682 struct afs_vnode *dvnode,
683 const char *filename, 683 struct afs_vnode *vnode,
684 struct afs_vnode *vnode, 684 const char *name,
685 struct afs_volsync *volsync) 685 const struct afs_wait_mode *wait_mode)
686{ 686{
687 struct rxrpc_connection *conn; 687 struct afs_call *call;
688 struct rxrpc_call *call; 688 size_t namesz, reqsz, padsz;
689 struct kvec piov[3]; 689 __be32 *bp;
690 size_t sent;
691 int ret;
692 u32 *bp, zero;
693 690
694 DECLARE_WAITQUEUE(myself, current); 691 _enter("");
695 692
696 kenter("%p,{%u,%u,%u},%s", 693 namesz = strlen(name);
697 server, fid->vid, fid->vnode, fid->unique, filename); 694 padsz = (4 - (namesz & 3)) & 3;
695 reqsz = (5 * 4) + namesz + padsz + (3 * 4);
698 696
699 /* get hold of the fileserver connection */ 697 call = afs_alloc_flat_call(&afs_RXFSLink, reqsz, (21 + 21 + 6) * 4);
700 ret = afs_server_get_fsconn(server, &conn); 698 if (!call)
701 if (ret < 0) 699 return -ENOMEM;
702 goto out;
703 700
704 /* create a call through that connection */ 701 call->key = key;
705 ret = rxrpc_create_call(conn, NULL, NULL, afs_rxfs_aemap, &call); 702 call->reply = dvnode;
706 if (ret < 0) { 703 call->reply2 = vnode;
707 printk("kAFS: Unable to create call: %d\n", ret); 704 call->service_id = FS_SERVICE;
708 goto out_put_conn; 705 call->port = htons(AFS_FS_PORT);
706
707 /* marshall the parameters */
708 bp = call->request;
709 *bp++ = htonl(FSLINK);
710 *bp++ = htonl(dvnode->fid.vid);
711 *bp++ = htonl(dvnode->fid.vnode);
712 *bp++ = htonl(dvnode->fid.unique);
713 *bp++ = htonl(namesz);
714 memcpy(bp, name, namesz);
715 bp = (void *) bp + namesz;
716 if (padsz > 0) {
717 memset(bp, 0, padsz);
718 bp = (void *) bp + padsz;
709 } 719 }
710 call->app_opcode = FSLOOKUP; 720 *bp++ = htonl(vnode->fid.vid);
721 *bp++ = htonl(vnode->fid.vnode);
722 *bp++ = htonl(vnode->fid.unique);
723
724 return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode);
725}
726
727/*
728 * deliver reply data to an FS.Symlink
729 */
730static int afs_deliver_fs_symlink(struct afs_call *call,
731 struct sk_buff *skb, bool last)
732{
733 struct afs_vnode *vnode = call->reply;
734 const __be32 *bp;
711 735
712 /* we want to get event notifications from the call */ 736 _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
713 add_wait_queue(&call->waitq,&myself); 737
738 afs_transfer_reply(call, skb);
739 if (!last)
740 return 0;
741
742 if (call->reply_size != call->reply_max)
743 return -EBADMSG;
744
745 /* unmarshall the reply once we've received all of it */
746 bp = call->buffer;
747 xdr_decode_AFSFid(&bp, call->reply2);
748 xdr_decode_AFSFetchStatus(&bp, call->reply3, NULL);
749 xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode);
750 /* xdr_decode_AFSVolSync(&bp, call->replyX); */
751
752 _leave(" = 0 [done]");
753 return 0;
754}
755
756/*
757 * FS.Symlink operation type
758 */
759static const struct afs_call_type afs_RXFSSymlink = {
760 .name = "FS.Symlink",
761 .deliver = afs_deliver_fs_symlink,
762 .abort_to_error = afs_abort_to_error,
763 .destructor = afs_flat_call_destructor,
764};
765
766/*
767 * create a symbolic link
768 */
769int afs_fs_symlink(struct afs_server *server,
770 struct key *key,
771 struct afs_vnode *vnode,
772 const char *name,
773 const char *contents,
774 struct afs_fid *newfid,
775 struct afs_file_status *newstatus,
776 const struct afs_wait_mode *wait_mode)
777{
778 struct afs_call *call;
779 size_t namesz, reqsz, padsz, c_namesz, c_padsz;
780 __be32 *bp;
781
782 _enter("");
783
784 namesz = strlen(name);
785 padsz = (4 - (namesz & 3)) & 3;
786
787 c_namesz = strlen(contents);
788 c_padsz = (4 - (c_namesz & 3)) & 3;
789
790 reqsz = (6 * 4) + namesz + padsz + c_namesz + c_padsz + (6 * 4);
791
792 call = afs_alloc_flat_call(&afs_RXFSSymlink, reqsz,
793 (3 + 21 + 21 + 6) * 4);
794 if (!call)
795 return -ENOMEM;
796
797 call->key = key;
798 call->reply = vnode;
799 call->reply2 = newfid;
800 call->reply3 = newstatus;
801 call->service_id = FS_SERVICE;
802 call->port = htons(AFS_FS_PORT);
714 803
715 /* marshall the parameters */ 804 /* marshall the parameters */
716 bp = rxrpc_call_alloc_scratch(call, 20); 805 bp = call->request;
717 806 *bp++ = htonl(FSSYMLINK);
718 zero = 0; 807 *bp++ = htonl(vnode->fid.vid);
719 808 *bp++ = htonl(vnode->fid.vnode);
720 piov[0].iov_len = 20; 809 *bp++ = htonl(vnode->fid.unique);
721 piov[0].iov_base = bp; 810 *bp++ = htonl(namesz);
722 piov[1].iov_len = strlen(filename); 811 memcpy(bp, name, namesz);
723 piov[1].iov_base = (char *) filename; 812 bp = (void *) bp + namesz;
724 piov[2].iov_len = (4 - (piov[1].iov_len & 3)) & 3; 813 if (padsz > 0) {
725 piov[2].iov_base = &zero; 814 memset(bp, 0, padsz);
726 815 bp = (void *) bp + padsz;
727 *bp++ = htonl(FSLOOKUP);
728 *bp++ = htonl(dirfid->vid);
729 *bp++ = htonl(dirfid->vnode);
730 *bp++ = htonl(dirfid->unique);
731 *bp++ = htonl(piov[1].iov_len);
732
733 /* send the parameters to the server */
734 ret = rxrpc_call_write_data(call, 3, piov, RXRPC_LAST_PACKET, GFP_NOFS,
735 0, &sent);
736 if (ret < 0)
737 goto abort;
738
739 /* wait for the reply to completely arrive */
740 bp = rxrpc_call_alloc_scratch(call, 220);
741
742 ret = rxrpc_call_read_data(call, bp, 220,
743 RXRPC_CALL_READ_BLOCK |
744 RXRPC_CALL_READ_ALL);
745 if (ret < 0) {
746 if (ret == -ECONNABORTED) {
747 ret = call->app_errno;
748 goto out_unwait;
749 }
750 goto abort;
751 } 816 }
817 *bp++ = htonl(c_namesz);
818 memcpy(bp, contents, c_namesz);
819 bp = (void *) bp + c_namesz;
820 if (c_padsz > 0) {
821 memset(bp, 0, c_padsz);
822 bp = (void *) bp + c_padsz;
823 }
824 *bp++ = htonl(AFS_SET_MODE);
825 *bp++ = 0; /* mtime */
826 *bp++ = 0; /* owner */
827 *bp++ = 0; /* group */
828 *bp++ = htonl(S_IRWXUGO); /* unix mode */
829 *bp++ = 0; /* segment size */
752 830
753 /* unmarshall the reply */ 831 return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode);
754 fid->vid = ntohl(*bp++); 832}
755 fid->vnode = ntohl(*bp++);
756 fid->unique = ntohl(*bp++);
757 833
758 vnode->status.if_version = ntohl(*bp++); 834/*
759 vnode->status.type = ntohl(*bp++); 835 * deliver reply data to an FS.Rename
760 vnode->status.nlink = ntohl(*bp++); 836 */
761 vnode->status.size = ntohl(*bp++); 837static int afs_deliver_fs_rename(struct afs_call *call,
762 vnode->status.version = ntohl(*bp++); 838 struct sk_buff *skb, bool last)
763 vnode->status.author = ntohl(*bp++); 839{
764 vnode->status.owner = ntohl(*bp++); 840 struct afs_vnode *orig_dvnode = call->reply, *new_dvnode = call->reply2;
765 vnode->status.caller_access = ntohl(*bp++); 841 const __be32 *bp;
766 vnode->status.anon_access = ntohl(*bp++);
767 vnode->status.mode = ntohl(*bp++);
768 vnode->status.parent.vid = dirfid->vid;
769 vnode->status.parent.vnode = ntohl(*bp++);
770 vnode->status.parent.unique = ntohl(*bp++);
771 bp++; /* seg size */
772 vnode->status.mtime_client = ntohl(*bp++);
773 vnode->status.mtime_server = ntohl(*bp++);
774 bp++; /* group */
775 bp++; /* sync counter */
776 vnode->status.version |= ((unsigned long long) ntohl(*bp++)) << 32;
777 bp++; /* spare2 */
778 bp++; /* spare3 */
779 bp++; /* spare4 */
780 842
781 dir->status.if_version = ntohl(*bp++); 843 _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
782 dir->status.type = ntohl(*bp++); 844
783 dir->status.nlink = ntohl(*bp++); 845 afs_transfer_reply(call, skb);
784 dir->status.size = ntohl(*bp++); 846 if (!last)
785 dir->status.version = ntohl(*bp++); 847 return 0;
786 dir->status.author = ntohl(*bp++); 848
787 dir->status.owner = ntohl(*bp++); 849 if (call->reply_size != call->reply_max)
788 dir->status.caller_access = ntohl(*bp++); 850 return -EBADMSG;
789 dir->status.anon_access = ntohl(*bp++); 851
790 dir->status.mode = ntohl(*bp++); 852 /* unmarshall the reply once we've received all of it */
791 dir->status.parent.vid = dirfid->vid; 853 bp = call->buffer;
792 dir->status.parent.vnode = ntohl(*bp++); 854 xdr_decode_AFSFetchStatus(&bp, &orig_dvnode->status, orig_dvnode);
793 dir->status.parent.unique = ntohl(*bp++); 855 if (new_dvnode != orig_dvnode)
794 bp++; /* seg size */ 856 xdr_decode_AFSFetchStatus(&bp, &new_dvnode->status, new_dvnode);
795 dir->status.mtime_client = ntohl(*bp++); 857 /* xdr_decode_AFSVolSync(&bp, call->replyX); */
796 dir->status.mtime_server = ntohl(*bp++); 858
797 bp++; /* group */ 859 _leave(" = 0 [done]");
798 bp++; /* sync counter */ 860 return 0;
799 dir->status.version |= ((unsigned long long) ntohl(*bp++)) << 32; 861}
800 bp++; /* spare2 */ 862
801 bp++; /* spare3 */ 863/*
802 bp++; /* spare4 */ 864 * FS.Rename operation type
865 */
866static const struct afs_call_type afs_RXFSRename = {
867 .name = "FS.Rename",
868 .deliver = afs_deliver_fs_rename,
869 .abort_to_error = afs_abort_to_error,
870 .destructor = afs_flat_call_destructor,
871};
872
873/*
874 * create a symbolic link
875 */
876int afs_fs_rename(struct afs_server *server,
877 struct key *key,
878 struct afs_vnode *orig_dvnode,
879 const char *orig_name,
880 struct afs_vnode *new_dvnode,
881 const char *new_name,
882 const struct afs_wait_mode *wait_mode)
883{
884 struct afs_call *call;
885 size_t reqsz, o_namesz, o_padsz, n_namesz, n_padsz;
886 __be32 *bp;
887
888 _enter("");
889
890 o_namesz = strlen(orig_name);
891 o_padsz = (4 - (o_namesz & 3)) & 3;
892
893 n_namesz = strlen(new_name);
894 n_padsz = (4 - (n_namesz & 3)) & 3;
895
896 reqsz = (4 * 4) +
897 4 + o_namesz + o_padsz +
898 (3 * 4) +
899 4 + n_namesz + n_padsz;
900
901 call = afs_alloc_flat_call(&afs_RXFSRename, reqsz, (21 + 21 + 6) * 4);
902 if (!call)
903 return -ENOMEM;
904
905 call->key = key;
906 call->reply = orig_dvnode;
907 call->reply2 = new_dvnode;
908 call->service_id = FS_SERVICE;
909 call->port = htons(AFS_FS_PORT);
910
911 /* marshall the parameters */
912 bp = call->request;
913 *bp++ = htonl(FSRENAME);
914 *bp++ = htonl(orig_dvnode->fid.vid);
915 *bp++ = htonl(orig_dvnode->fid.vnode);
916 *bp++ = htonl(orig_dvnode->fid.unique);
917 *bp++ = htonl(o_namesz);
918 memcpy(bp, orig_name, o_namesz);
919 bp = (void *) bp + o_namesz;
920 if (o_padsz > 0) {
921 memset(bp, 0, o_padsz);
922 bp = (void *) bp + o_padsz;
923 }
803 924
804 callback->fid = *fid; 925 *bp++ = htonl(new_dvnode->fid.vid);
805 callback->version = ntohl(*bp++); 926 *bp++ = htonl(new_dvnode->fid.vnode);
806 callback->expiry = ntohl(*bp++); 927 *bp++ = htonl(new_dvnode->fid.unique);
807 callback->type = ntohl(*bp++); 928 *bp++ = htonl(n_namesz);
808 929 memcpy(bp, new_name, n_namesz);
809 if (volsync) { 930 bp = (void *) bp + n_namesz;
810 volsync->creation = ntohl(*bp++); 931 if (n_padsz > 0) {
811 bp++; /* spare2 */ 932 memset(bp, 0, n_padsz);
812 bp++; /* spare3 */ 933 bp = (void *) bp + n_padsz;
813 bp++; /* spare4 */
814 bp++; /* spare5 */
815 bp++; /* spare6 */
816 } 934 }
817 935
818 /* success */ 936 return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode);
819 ret = 0; 937}
820
821 out_unwait:
822 set_current_state(TASK_RUNNING);
823 remove_wait_queue(&call->waitq, &myself);
824 rxrpc_put_call(call);
825 out_put_conn:
826 afs_server_release_fsconn(server, conn);
827 out:
828 kleave("");
829 return ret;
830
831 abort:
832 set_current_state(TASK_UNINTERRUPTIBLE);
833 rxrpc_call_abort(call, ret);
834 schedule();
835 goto out_unwait;
836} /* end afs_rxfs_lookup() */
837#endif
diff --git a/fs/afs/fsclient.h b/fs/afs/fsclient.h
deleted file mode 100644
index 8ba3e749ee3c..000000000000
--- a/fs/afs/fsclient.h
+++ /dev/null
@@ -1,54 +0,0 @@
1/* fsclient.h: AFS File Server client stub declarations
2 *
3 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#ifndef _LINUX_AFS_FSCLIENT_H
13#define _LINUX_AFS_FSCLIENT_H
14
15#include "server.h"
16
17extern int afs_rxfs_get_volume_info(struct afs_server *server,
18 const char *name,
19 struct afs_volume_info *vinfo);
20
21extern int afs_rxfs_fetch_file_status(struct afs_server *server,
22 struct afs_vnode *vnode,
23 struct afs_volsync *volsync);
24
25struct afs_rxfs_fetch_descriptor {
26 struct afs_fid fid; /* file ID to fetch */
27 size_t size; /* total number of bytes to fetch */
28 off_t offset; /* offset in file to start from */
29 void *buffer; /* read buffer */
30 size_t actual; /* actual size sent back by server */
31};
32
33extern int afs_rxfs_fetch_file_data(struct afs_server *server,
34 struct afs_vnode *vnode,
35 struct afs_rxfs_fetch_descriptor *desc,
36 struct afs_volsync *volsync);
37
38extern int afs_rxfs_give_up_callback(struct afs_server *server,
39 struct afs_vnode *vnode);
40
41/* this doesn't appear to work in OpenAFS server */
42extern int afs_rxfs_lookup(struct afs_server *server,
43 struct afs_vnode *dir,
44 const char *filename,
45 struct afs_vnode *vnode,
46 struct afs_volsync *volsync);
47
48/* this is apparently mis-implemented in OpenAFS server */
49extern int afs_rxfs_get_root_volume(struct afs_server *server,
50 char *buf,
51 size_t *buflen);
52
53
54#endif /* _LINUX_AFS_FSCLIENT_H */
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 9d9bca6c28b5..c184a4ee5995 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -19,9 +19,6 @@
19#include <linux/slab.h> 19#include <linux/slab.h>
20#include <linux/fs.h> 20#include <linux/fs.h>
21#include <linux/pagemap.h> 21#include <linux/pagemap.h>
22#include "volume.h"
23#include "vnode.h"
24#include "super.h"
25#include "internal.h" 22#include "internal.h"
26 23
27struct afs_iget_data { 24struct afs_iget_data {
@@ -29,26 +26,25 @@ struct afs_iget_data {
29 struct afs_volume *volume; /* volume on which resides */ 26 struct afs_volume *volume; /* volume on which resides */
30}; 27};
31 28
32/*****************************************************************************/
33/* 29/*
34 * map the AFS file status to the inode member variables 30 * map the AFS file status to the inode member variables
35 */ 31 */
36static int afs_inode_map_status(struct afs_vnode *vnode) 32static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
37{ 33{
38 struct inode *inode = AFS_VNODE_TO_I(vnode); 34 struct inode *inode = AFS_VNODE_TO_I(vnode);
39 35
40 _debug("FS: ft=%d lk=%d sz=%Zu ver=%Lu mod=%hu", 36 _debug("FS: ft=%d lk=%d sz=%llu ver=%Lu mod=%hu",
41 vnode->status.type, 37 vnode->status.type,
42 vnode->status.nlink, 38 vnode->status.nlink,
43 vnode->status.size, 39 (unsigned long long) vnode->status.size,
44 vnode->status.version, 40 vnode->status.data_version,
45 vnode->status.mode); 41 vnode->status.mode);
46 42
47 switch (vnode->status.type) { 43 switch (vnode->status.type) {
48 case AFS_FTYPE_FILE: 44 case AFS_FTYPE_FILE:
49 inode->i_mode = S_IFREG | vnode->status.mode; 45 inode->i_mode = S_IFREG | vnode->status.mode;
50 inode->i_op = &afs_file_inode_operations; 46 inode->i_op = &afs_file_inode_operations;
51 inode->i_fop = &generic_ro_fops; 47 inode->i_fop = &afs_file_operations;
52 break; 48 break;
53 case AFS_FTYPE_DIR: 49 case AFS_FTYPE_DIR:
54 inode->i_mode = S_IFDIR | vnode->status.mode; 50 inode->i_mode = S_IFDIR | vnode->status.mode;
@@ -77,9 +73,9 @@ static int afs_inode_map_status(struct afs_vnode *vnode)
77 73
78 /* check to see whether a symbolic link is really a mountpoint */ 74 /* check to see whether a symbolic link is really a mountpoint */
79 if (vnode->status.type == AFS_FTYPE_SYMLINK) { 75 if (vnode->status.type == AFS_FTYPE_SYMLINK) {
80 afs_mntpt_check_symlink(vnode); 76 afs_mntpt_check_symlink(vnode, key);
81 77
82 if (vnode->flags & AFS_VNODE_MOUNTPOINT) { 78 if (test_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags)) {
83 inode->i_mode = S_IFDIR | vnode->status.mode; 79 inode->i_mode = S_IFDIR | vnode->status.mode;
84 inode->i_op = &afs_mntpt_inode_operations; 80 inode->i_op = &afs_mntpt_inode_operations;
85 inode->i_fop = &afs_mntpt_file_operations; 81 inode->i_fop = &afs_mntpt_file_operations;
@@ -87,30 +83,8 @@ static int afs_inode_map_status(struct afs_vnode *vnode)
87 } 83 }
88 84
89 return 0; 85 return 0;
90} /* end afs_inode_map_status() */ 86}
91 87
92/*****************************************************************************/
93/*
94 * attempt to fetch the status of an inode, coelescing multiple simultaneous
95 * fetches
96 */
97static int afs_inode_fetch_status(struct inode *inode)
98{
99 struct afs_vnode *vnode;
100 int ret;
101
102 vnode = AFS_FS_I(inode);
103
104 ret = afs_vnode_fetch_status(vnode);
105
106 if (ret == 0)
107 ret = afs_inode_map_status(vnode);
108
109 return ret;
110
111} /* end afs_inode_fetch_status() */
112
113/*****************************************************************************/
114/* 88/*
115 * iget5() comparator 89 * iget5() comparator
116 */ 90 */
@@ -120,9 +94,8 @@ static int afs_iget5_test(struct inode *inode, void *opaque)
120 94
121 return inode->i_ino == data->fid.vnode && 95 return inode->i_ino == data->fid.vnode &&
122 inode->i_version == data->fid.unique; 96 inode->i_version == data->fid.unique;
123} /* end afs_iget5_test() */ 97}
124 98
125/*****************************************************************************/
126/* 99/*
127 * iget5() inode initialiser 100 * iget5() inode initialiser
128 */ 101 */
@@ -137,14 +110,14 @@ static int afs_iget5_set(struct inode *inode, void *opaque)
137 vnode->volume = data->volume; 110 vnode->volume = data->volume;
138 111
139 return 0; 112 return 0;
140} /* end afs_iget5_set() */ 113}
141 114
142/*****************************************************************************/
143/* 115/*
144 * inode retrieval 116 * inode retrieval
145 */ 117 */
146inline int afs_iget(struct super_block *sb, struct afs_fid *fid, 118struct inode *afs_iget(struct super_block *sb, struct key *key,
147 struct inode **_inode) 119 struct afs_fid *fid, struct afs_file_status *status,
120 struct afs_callback *cb)
148{ 121{
149 struct afs_iget_data data = { .fid = *fid }; 122 struct afs_iget_data data = { .fid = *fid };
150 struct afs_super_info *as; 123 struct afs_super_info *as;
@@ -161,20 +134,18 @@ inline int afs_iget(struct super_block *sb, struct afs_fid *fid,
161 &data); 134 &data);
162 if (!inode) { 135 if (!inode) {
163 _leave(" = -ENOMEM"); 136 _leave(" = -ENOMEM");
164 return -ENOMEM; 137 return ERR_PTR(-ENOMEM);
165 } 138 }
166 139
140 _debug("GOT INODE %p { vl=%x vn=%x, u=%x }",
141 inode, fid->vid, fid->vnode, fid->unique);
142
167 vnode = AFS_FS_I(inode); 143 vnode = AFS_FS_I(inode);
168 144
169 /* deal with an existing inode */ 145 /* deal with an existing inode */
170 if (!(inode->i_state & I_NEW)) { 146 if (!(inode->i_state & I_NEW)) {
171 ret = afs_vnode_fetch_status(vnode); 147 _leave(" = %p", inode);
172 if (ret==0) 148 return inode;
173 *_inode = inode;
174 else
175 iput(inode);
176 _leave(" = %d", ret);
177 return ret;
178 } 149 }
179 150
180#ifdef AFS_CACHING_SUPPORT 151#ifdef AFS_CACHING_SUPPORT
@@ -186,100 +157,185 @@ inline int afs_iget(struct super_block *sb, struct afs_fid *fid,
186 &vnode->cache); 157 &vnode->cache);
187#endif 158#endif
188 159
189 /* okay... it's a new inode */ 160 if (!status) {
190 inode->i_flags |= S_NOATIME; 161 /* it's a remotely extant inode */
191 vnode->flags |= AFS_VNODE_CHANGED; 162 set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
192 ret = afs_inode_fetch_status(inode); 163 ret = afs_vnode_fetch_status(vnode, NULL, key);
193 if (ret<0) 164 if (ret < 0)
165 goto bad_inode;
166 } else {
167 /* it's an inode we just created */
168 memcpy(&vnode->status, status, sizeof(vnode->status));
169
170 if (!cb) {
171 /* it's a symlink we just created (the fileserver
172 * didn't give us a callback) */
173 vnode->cb_version = 0;
174 vnode->cb_expiry = 0;
175 vnode->cb_type = 0;
176 vnode->cb_expires = get_seconds();
177 } else {
178 vnode->cb_version = cb->version;
179 vnode->cb_expiry = cb->expiry;
180 vnode->cb_type = cb->type;
181 vnode->cb_expires = vnode->cb_expiry + get_seconds();
182 }
183 }
184
185 ret = afs_inode_map_status(vnode, key);
186 if (ret < 0)
194 goto bad_inode; 187 goto bad_inode;
195 188
196 /* success */ 189 /* success */
190 clear_bit(AFS_VNODE_UNSET, &vnode->flags);
191 inode->i_flags |= S_NOATIME;
197 unlock_new_inode(inode); 192 unlock_new_inode(inode);
198 193 _leave(" = %p [CB { v=%u t=%u }]", inode, vnode->cb_version, vnode->cb_type);
199 *_inode = inode; 194 return inode;
200 _leave(" = 0 [CB { v=%u x=%lu t=%u }]",
201 vnode->cb_version,
202 vnode->cb_timeout.timo_jif,
203 vnode->cb_type);
204 return 0;
205 195
206 /* failure */ 196 /* failure */
207 bad_inode: 197bad_inode:
208 make_bad_inode(inode); 198 make_bad_inode(inode);
209 unlock_new_inode(inode); 199 unlock_new_inode(inode);
210 iput(inode); 200 iput(inode);
211 201
212 _leave(" = %d [bad]", ret); 202 _leave(" = %d [bad]", ret);
203 return ERR_PTR(ret);
204}
205
206/*
207 * validate a vnode/inode
208 * - there are several things we need to check
209 * - parent dir data changes (rm, rmdir, rename, mkdir, create, link,
210 * symlink)
211 * - parent dir metadata changed (security changes)
212 * - dentry data changed (write, truncate)
213 * - dentry metadata changed (security changes)
214 */
215int afs_validate(struct afs_vnode *vnode, struct key *key)
216{
217 int ret;
218
219 _enter("{v={%x:%u} fl=%lx},%x",
220 vnode->fid.vid, vnode->fid.vnode, vnode->flags,
221 key_serial(key));
222
223 if (vnode->cb_promised &&
224 !test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags) &&
225 !test_bit(AFS_VNODE_MODIFIED, &vnode->flags) &&
226 !test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) {
227 if (vnode->cb_expires < get_seconds() + 10) {
228 _debug("callback expired");
229 set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
230 } else {
231 goto valid;
232 }
233 }
234
235 if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
236 goto valid;
237
238 mutex_lock(&vnode->validate_lock);
239
240 /* if the promise has expired, we need to check the server again to get
241 * a new promise - note that if the (parent) directory's metadata was
242 * changed then the security may be different and we may no longer have
243 * access */
244 if (!vnode->cb_promised ||
245 test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags)) {
246 _debug("not promised");
247 ret = afs_vnode_fetch_status(vnode, NULL, key);
248 if (ret < 0)
249 goto error_unlock;
250 _debug("new promise [fl=%lx]", vnode->flags);
251 }
252
253 if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
254 _debug("file already deleted");
255 ret = -ESTALE;
256 goto error_unlock;
257 }
258
259 /* if the vnode's data version number changed then its contents are
260 * different */
261 if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) {
262 _debug("zap data {%x:%d}", vnode->fid.vid, vnode->fid.vnode);
263 invalidate_remote_inode(&vnode->vfs_inode);
264 }
265
266 clear_bit(AFS_VNODE_MODIFIED, &vnode->flags);
267 mutex_unlock(&vnode->validate_lock);
268valid:
269 _leave(" = 0");
270 return 0;
271
272error_unlock:
273 mutex_unlock(&vnode->validate_lock);
274 _leave(" = %d", ret);
213 return ret; 275 return ret;
214} /* end afs_iget() */ 276}
215 277
216/*****************************************************************************/
217/* 278/*
218 * read the attributes of an inode 279 * read the attributes of an inode
219 */ 280 */
220int afs_inode_getattr(struct vfsmount *mnt, struct dentry *dentry, 281int afs_inode_getattr(struct vfsmount *mnt, struct dentry *dentry,
221 struct kstat *stat) 282 struct kstat *stat)
222{ 283{
223 struct afs_vnode *vnode;
224 struct inode *inode; 284 struct inode *inode;
225 int ret;
226 285
227 inode = dentry->d_inode; 286 inode = dentry->d_inode;
228 287
229 _enter("{ ino=%lu v=%lu }", inode->i_ino, inode->i_version); 288 _enter("{ ino=%lu v=%lu }", inode->i_ino, inode->i_version);
230 289
231 vnode = AFS_FS_I(inode);
232
233 ret = afs_inode_fetch_status(inode);
234 if (ret == -ENOENT) {
235 _leave(" = %d [%d %p]",
236 ret, atomic_read(&dentry->d_count), dentry->d_inode);
237 return ret;
238 }
239 else if (ret < 0) {
240 make_bad_inode(inode);
241 _leave(" = %d", ret);
242 return ret;
243 }
244
245 /* transfer attributes from the inode structure to the stat
246 * structure */
247 generic_fillattr(inode, stat); 290 generic_fillattr(inode, stat);
248
249 _leave(" = 0 CB { v=%u x=%u t=%u }",
250 vnode->cb_version,
251 vnode->cb_expiry,
252 vnode->cb_type);
253
254 return 0; 291 return 0;
255} /* end afs_inode_getattr() */ 292}
256 293
257/*****************************************************************************/
258/* 294/*
259 * clear an AFS inode 295 * clear an AFS inode
260 */ 296 */
261void afs_clear_inode(struct inode *inode) 297void afs_clear_inode(struct inode *inode)
262{ 298{
299 struct afs_permits *permits;
263 struct afs_vnode *vnode; 300 struct afs_vnode *vnode;
264 301
265 vnode = AFS_FS_I(inode); 302 vnode = AFS_FS_I(inode);
266 303
267 _enter("ino=%lu { vn=%08x v=%u x=%u t=%u }", 304 _enter("{%x:%d.%d} v=%u x=%u t=%u }",
268 inode->i_ino, 305 vnode->fid.vid,
269 vnode->fid.vnode, 306 vnode->fid.vnode,
307 vnode->fid.unique,
270 vnode->cb_version, 308 vnode->cb_version,
271 vnode->cb_expiry, 309 vnode->cb_expiry,
272 vnode->cb_type 310 vnode->cb_type);
273 );
274 311
275 BUG_ON(inode->i_ino != vnode->fid.vnode); 312 _debug("CLEAR INODE %p", inode);
276 313
277 afs_vnode_give_up_callback(vnode); 314 ASSERTCMP(inode->i_ino, ==, vnode->fid.vnode);
315
316 afs_give_up_callback(vnode);
317
318 if (vnode->server) {
319 spin_lock(&vnode->server->fs_lock);
320 rb_erase(&vnode->server_rb, &vnode->server->fs_vnodes);
321 spin_unlock(&vnode->server->fs_lock);
322 afs_put_server(vnode->server);
323 vnode->server = NULL;
324 }
325
326 ASSERT(!vnode->cb_promised);
278 327
279#ifdef AFS_CACHING_SUPPORT 328#ifdef AFS_CACHING_SUPPORT
280 cachefs_relinquish_cookie(vnode->cache, 0); 329 cachefs_relinquish_cookie(vnode->cache, 0);
281 vnode->cache = NULL; 330 vnode->cache = NULL;
282#endif 331#endif
283 332
333 mutex_lock(&vnode->permits_lock);
334 permits = vnode->permits;
335 rcu_assign_pointer(vnode->permits, NULL);
336 mutex_unlock(&vnode->permits_lock);
337 if (permits)
338 call_rcu(&permits->rcu, afs_zap_permits);
339
284 _leave(""); 340 _leave("");
285} /* end afs_clear_inode() */ 341}
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 5151d5da2c2f..6dd3197d1d8d 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -1,6 +1,6 @@
1/* internal.h: internal AFS stuff 1/* internal AFS stuff
2 * 2 *
3 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved. 3 * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com) 4 * Written by David Howells (dhowells@redhat.com)
5 * 5 *
6 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
@@ -9,48 +9,391 @@
9 * 2 of the License, or (at your option) any later version. 9 * 2 of the License, or (at your option) any later version.
10 */ 10 */
11 11
12#ifndef AFS_INTERNAL_H
13#define AFS_INTERNAL_H
14
15#include <linux/compiler.h> 12#include <linux/compiler.h>
16#include <linux/kernel.h> 13#include <linux/kernel.h>
17#include <linux/fs.h> 14#include <linux/fs.h>
18#include <linux/pagemap.h> 15#include <linux/pagemap.h>
16#include <linux/skbuff.h>
17#include <linux/rxrpc.h>
18#include <linux/key.h>
19#include "afs.h"
20#include "afs_vl.h"
21
22#define AFS_CELL_MAX_ADDRS 15
23
24struct afs_call;
25
26typedef enum {
27 AFS_VL_NEW, /* new, uninitialised record */
28 AFS_VL_CREATING, /* creating record */
29 AFS_VL_VALID, /* record is pending */
30 AFS_VL_NO_VOLUME, /* no such volume available */
31 AFS_VL_UPDATING, /* update in progress */
32 AFS_VL_VOLUME_DELETED, /* volume was deleted */
33 AFS_VL_UNCERTAIN, /* uncertain state (update failed) */
34} __attribute__((packed)) afs_vlocation_state_t;
35
36struct afs_mount_params {
37 bool rwpath; /* T if the parent should be considered R/W */
38 bool force; /* T to force cell type */
39 afs_voltype_t type; /* type of volume requested */
40 int volnamesz; /* size of volume name */
41 const char *volname; /* name of volume to mount */
42 struct afs_cell *cell; /* cell in which to find volume */
43 struct afs_volume *volume; /* volume record */
44 struct key *key; /* key to use for secure mounting */
45};
19 46
20/* 47/*
21 * debug tracing 48 * definition of how to wait for the completion of an operation
22 */ 49 */
23#define kenter(FMT, a...) printk("==> %s("FMT")\n",__FUNCTION__ , ## a) 50struct afs_wait_mode {
24#define kleave(FMT, a...) printk("<== %s()"FMT"\n",__FUNCTION__ , ## a) 51 /* RxRPC received message notification */
25#define kdebug(FMT, a...) printk(FMT"\n" , ## a) 52 void (*rx_wakeup)(struct afs_call *call);
26#define kproto(FMT, a...) printk("### "FMT"\n" , ## a)
27#define knet(FMT, a...) printk(FMT"\n" , ## a)
28
29#ifdef __KDEBUG
30#define _enter(FMT, a...) kenter(FMT , ## a)
31#define _leave(FMT, a...) kleave(FMT , ## a)
32#define _debug(FMT, a...) kdebug(FMT , ## a)
33#define _proto(FMT, a...) kproto(FMT , ## a)
34#define _net(FMT, a...) knet(FMT , ## a)
35#else
36#define _enter(FMT, a...) do { } while(0)
37#define _leave(FMT, a...) do { } while(0)
38#define _debug(FMT, a...) do { } while(0)
39#define _proto(FMT, a...) do { } while(0)
40#define _net(FMT, a...) do { } while(0)
41#endif
42 53
43static inline void afs_discard_my_signals(void) 54 /* synchronous call waiter and call dispatched notification */
44{ 55 int (*wait)(struct afs_call *call);
45 while (signal_pending(current)) { 56
46 siginfo_t sinfo; 57 /* asynchronous call completion */
58 void (*async_complete)(void *reply, int error);
59};
60
61extern const struct afs_wait_mode afs_sync_call;
62extern const struct afs_wait_mode afs_async_call;
47 63
48 spin_lock_irq(&current->sighand->siglock); 64/*
49 dequeue_signal(current,&current->blocked, &sinfo); 65 * a record of an in-progress RxRPC call
50 spin_unlock_irq(&current->sighand->siglock); 66 */
51 } 67struct afs_call {
68 const struct afs_call_type *type; /* type of call */
69 const struct afs_wait_mode *wait_mode; /* completion wait mode */
70 wait_queue_head_t waitq; /* processes awaiting completion */
71 struct work_struct async_work; /* asynchronous work processor */
72 struct work_struct work; /* actual work processor */
73 struct sk_buff_head rx_queue; /* received packets */
74 struct rxrpc_call *rxcall; /* RxRPC call handle */
75 struct key *key; /* security for this call */
76 struct afs_server *server; /* server affected by incoming CM call */
77 void *request; /* request data (first part) */
78 void *request2; /* request data (second part) */
79 void *buffer; /* reply receive buffer */
80 void *reply; /* reply buffer (first part) */
81 void *reply2; /* reply buffer (second part) */
82 void *reply3; /* reply buffer (third part) */
83 void *reply4; /* reply buffer (fourth part) */
84 enum { /* call state */
85 AFS_CALL_REQUESTING, /* request is being sent for outgoing call */
86 AFS_CALL_AWAIT_REPLY, /* awaiting reply to outgoing call */
87 AFS_CALL_AWAIT_OP_ID, /* awaiting op ID on incoming call */
88 AFS_CALL_AWAIT_REQUEST, /* awaiting request data on incoming call */
89 AFS_CALL_REPLYING, /* replying to incoming call */
90 AFS_CALL_AWAIT_ACK, /* awaiting final ACK of incoming call */
91 AFS_CALL_COMPLETE, /* successfully completed */
92 AFS_CALL_BUSY, /* server was busy */
93 AFS_CALL_ABORTED, /* call was aborted */
94 AFS_CALL_ERROR, /* call failed due to error */
95 } state;
96 int error; /* error code */
97 unsigned request_size; /* size of request data */
98 unsigned reply_max; /* maximum size of reply */
99 unsigned reply_size; /* current size of reply */
100 unsigned short offset; /* offset into received data store */
101 unsigned char unmarshall; /* unmarshalling phase */
102 bool incoming; /* T if incoming call */
103 u16 service_id; /* RxRPC service ID to call */
104 __be16 port; /* target UDP port */
105 __be32 operation_ID; /* operation ID for an incoming call */
106 u32 count; /* count for use in unmarshalling */
107 __be32 tmp; /* place to extract temporary data */
108};
109
110struct afs_call_type {
111 const char *name;
112
113 /* deliver request or reply data to an call
114 * - returning an error will cause the call to be aborted
115 */
116 int (*deliver)(struct afs_call *call, struct sk_buff *skb,
117 bool last);
118
119 /* map an abort code to an error number */
120 int (*abort_to_error)(u32 abort_code);
121
122 /* clean up a call */
123 void (*destructor)(struct afs_call *call);
124};
125
126/*
127 * AFS superblock private data
128 * - there's one superblock per volume
129 */
130struct afs_super_info {
131 struct afs_volume *volume; /* volume record */
132 char rwparent; /* T if parent is R/W AFS volume */
133};
134
135static inline struct afs_super_info *AFS_FS_S(struct super_block *sb)
136{
137 return sb->s_fs_info;
52} 138}
53 139
140extern struct file_system_type afs_fs_type;
141
142/*
143 * entry in the cached cell catalogue
144 */
145struct afs_cache_cell {
146 char name[AFS_MAXCELLNAME]; /* cell name (padded with NULs) */
147 struct in_addr vl_servers[15]; /* cached cell VL servers */
148};
149
150/*
151 * AFS cell record
152 */
153struct afs_cell {
154 atomic_t usage;
155 struct list_head link; /* main cell list link */
156 struct key *anonymous_key; /* anonymous user key for this cell */
157 struct list_head proc_link; /* /proc cell list link */
158 struct proc_dir_entry *proc_dir; /* /proc dir for this cell */
159#ifdef AFS_CACHING_SUPPORT
160 struct cachefs_cookie *cache; /* caching cookie */
161#endif
162
163 /* server record management */
164 rwlock_t servers_lock; /* active server list lock */
165 struct list_head servers; /* active server list */
166
167 /* volume location record management */
168 struct rw_semaphore vl_sem; /* volume management serialisation semaphore */
169 struct list_head vl_list; /* cell's active VL record list */
170 spinlock_t vl_lock; /* vl_list lock */
171 unsigned short vl_naddrs; /* number of VL servers in addr list */
172 unsigned short vl_curr_svix; /* current server index */
173 struct in_addr vl_addrs[AFS_CELL_MAX_ADDRS]; /* cell VL server addresses */
174
175 char name[0]; /* cell name - must go last */
176};
177
178/*
179 * entry in the cached volume location catalogue
180 */
181struct afs_cache_vlocation {
182 /* volume name (lowercase, padded with NULs) */
183 uint8_t name[AFS_MAXVOLNAME + 1];
184
185 uint8_t nservers; /* number of entries used in servers[] */
186 uint8_t vidmask; /* voltype mask for vid[] */
187 uint8_t srvtmask[8]; /* voltype masks for servers[] */
188#define AFS_VOL_VTM_RW 0x01 /* R/W version of the volume is available (on this server) */
189#define AFS_VOL_VTM_RO 0x02 /* R/O version of the volume is available (on this server) */
190#define AFS_VOL_VTM_BAK 0x04 /* backup version of the volume is available (on this server) */
191
192 afs_volid_t vid[3]; /* volume IDs for R/W, R/O and Bak volumes */
193 struct in_addr servers[8]; /* fileserver addresses */
194 time_t rtime; /* last retrieval time */
195};
196
197/*
198 * volume -> vnode hash table entry
199 */
200struct afs_cache_vhash {
201 afs_voltype_t vtype; /* which volume variation */
202 uint8_t hash_bucket; /* which hash bucket this represents */
203} __attribute__((packed));
204
205/*
206 * AFS volume location record
207 */
208struct afs_vlocation {
209 atomic_t usage;
210 time_t time_of_death; /* time at which put reduced usage to 0 */
211 struct list_head link; /* link in cell volume location list */
212 struct list_head grave; /* link in master graveyard list */
213 struct list_head update; /* link in master update list */
214 struct afs_cell *cell; /* cell to which volume belongs */
215#ifdef AFS_CACHING_SUPPORT
216 struct cachefs_cookie *cache; /* caching cookie */
217#endif
218 struct afs_cache_vlocation vldb; /* volume information DB record */
219 struct afs_volume *vols[3]; /* volume access record pointer (index by type) */
220 wait_queue_head_t waitq; /* status change waitqueue */
221 time_t update_at; /* time at which record should be updated */
222 spinlock_t lock; /* access lock */
223 afs_vlocation_state_t state; /* volume location state */
224 unsigned short upd_rej_cnt; /* ENOMEDIUM count during update */
225 unsigned short upd_busy_cnt; /* EBUSY count during update */
226 bool valid; /* T if valid */
227};
228
229/*
230 * AFS fileserver record
231 */
232struct afs_server {
233 atomic_t usage;
234 time_t time_of_death; /* time at which put reduced usage to 0 */
235 struct in_addr addr; /* server address */
236 struct afs_cell *cell; /* cell in which server resides */
237 struct list_head link; /* link in cell's server list */
238 struct list_head grave; /* link in master graveyard list */
239 struct rb_node master_rb; /* link in master by-addr tree */
240 struct rw_semaphore sem; /* access lock */
241
242 /* file service access */
243 struct rb_root fs_vnodes; /* vnodes backed by this server (ordered by FID) */
244 unsigned long fs_act_jif; /* time at which last activity occurred */
245 unsigned long fs_dead_jif; /* time at which no longer to be considered dead */
246 spinlock_t fs_lock; /* access lock */
247 int fs_state; /* 0 or reason FS currently marked dead (-errno) */
248
249 /* callback promise management */
250 struct rb_root cb_promises; /* vnode expiration list (ordered earliest first) */
251 struct delayed_work cb_updater; /* callback updater */
252 struct delayed_work cb_break_work; /* collected break dispatcher */
253 wait_queue_head_t cb_break_waitq; /* space available in cb_break waitqueue */
254 spinlock_t cb_lock; /* access lock */
255 struct afs_callback cb_break[64]; /* ring of callbacks awaiting breaking */
256 atomic_t cb_break_n; /* number of pending breaks */
257 u8 cb_break_head; /* head of callback breaking ring */
258 u8 cb_break_tail; /* tail of callback breaking ring */
259};
260
261/*
262 * AFS volume access record
263 */
264struct afs_volume {
265 atomic_t usage;
266 struct afs_cell *cell; /* cell to which belongs (unrefd ptr) */
267 struct afs_vlocation *vlocation; /* volume location */
268#ifdef AFS_CACHING_SUPPORT
269 struct cachefs_cookie *cache; /* caching cookie */
270#endif
271 afs_volid_t vid; /* volume ID */
272 afs_voltype_t type; /* type of volume */
273 char type_force; /* force volume type (suppress R/O -> R/W) */
274 unsigned short nservers; /* number of server slots filled */
275 unsigned short rjservers; /* number of servers discarded due to -ENOMEDIUM */
276 struct afs_server *servers[8]; /* servers on which volume resides (ordered) */
277 struct rw_semaphore server_sem; /* lock for accessing current server */
278};
279
280/*
281 * vnode catalogue entry
282 */
283struct afs_cache_vnode {
284 afs_vnodeid_t vnode_id; /* vnode ID */
285 unsigned vnode_unique; /* vnode ID uniquifier */
286 afs_dataversion_t data_version; /* data version */
287};
288
289/*
290 * AFS inode private data
291 */
292struct afs_vnode {
293 struct inode vfs_inode; /* the VFS's inode record */
294
295 struct afs_volume *volume; /* volume on which vnode resides */
296 struct afs_server *server; /* server currently supplying this file */
297 struct afs_fid fid; /* the file identifier for this inode */
298 struct afs_file_status status; /* AFS status info for this file */
299#ifdef AFS_CACHING_SUPPORT
300 struct cachefs_cookie *cache; /* caching cookie */
301#endif
302 struct afs_permits *permits; /* cache of permits so far obtained */
303 struct mutex permits_lock; /* lock for altering permits list */
304 struct mutex validate_lock; /* lock for validating this vnode */
305 wait_queue_head_t update_waitq; /* status fetch waitqueue */
306 int update_cnt; /* number of outstanding ops that will update the
307 * status */
308 spinlock_t lock; /* waitqueue/flags lock */
309 unsigned long flags;
310#define AFS_VNODE_CB_BROKEN 0 /* set if vnode's callback was broken */
311#define AFS_VNODE_UNSET 1 /* set if vnode attributes not yet set */
312#define AFS_VNODE_MODIFIED 2 /* set if vnode's data modified */
313#define AFS_VNODE_ZAP_DATA 3 /* set if vnode's data should be invalidated */
314#define AFS_VNODE_DELETED 4 /* set if vnode deleted on server */
315#define AFS_VNODE_MOUNTPOINT 5 /* set if vnode is a mountpoint symlink */
316
317 long acl_order; /* ACL check count (callback break count) */
318
319 /* outstanding callback notification on this file */
320 struct rb_node server_rb; /* link in server->fs_vnodes */
321 struct rb_node cb_promise; /* link in server->cb_promises */
322 struct work_struct cb_broken_work; /* work to be done on callback break */
323 time_t cb_expires; /* time at which callback expires */
324 time_t cb_expires_at; /* time used to order cb_promise */
325 unsigned cb_version; /* callback version */
326 unsigned cb_expiry; /* callback expiry time */
327 afs_callback_type_t cb_type; /* type of callback */
328 bool cb_promised; /* true if promise still holds */
329};
330
331/*
332 * cached security record for one user's attempt to access a vnode
333 */
334struct afs_permit {
335 struct key *key; /* RxRPC ticket holding a security context */
336 afs_access_t access_mask; /* access mask for this key */
337};
338
339/*
340 * cache of security records from attempts to access a vnode
341 */
342struct afs_permits {
343 struct rcu_head rcu; /* disposal procedure */
344 int count; /* number of records */
345 struct afs_permit permits[0]; /* the permits so far examined */
346};
347
348/*
349 * record of one of a system's set of network interfaces
350 */
351struct afs_interface {
352 unsigned index; /* interface index */
353 struct in_addr address; /* IPv4 address bound to interface */
354 struct in_addr netmask; /* netmask applied to address */
355 unsigned mtu; /* MTU of interface */
356};
357
358/*
359 * UUID definition [internet draft]
360 * - the timestamp is a 60-bit value, split 32/16/12, and goes in 100ns
361 * increments since midnight 15th October 1582
362 * - add AFS_UUID_TO_UNIX_TIME to convert unix time in 100ns units to UUID
363 * time
364 * - the clock sequence is a 14-bit counter to avoid duplicate times
365 */
366struct afs_uuid {
367 u32 time_low; /* low part of timestamp */
368 u16 time_mid; /* mid part of timestamp */
369 u16 time_hi_and_version; /* high part of timestamp and version */
370#define AFS_UUID_TO_UNIX_TIME 0x01b21dd213814000
371#define AFS_UUID_TIMEHI_MASK 0x0fff
372#define AFS_UUID_VERSION_TIME 0x1000 /* time-based UUID */
373#define AFS_UUID_VERSION_NAME 0x3000 /* name-based UUID */
374#define AFS_UUID_VERSION_RANDOM 0x4000 /* (pseudo-)random generated UUID */
375 u8 clock_seq_hi_and_reserved; /* clock seq hi and variant */
376#define AFS_UUID_CLOCKHI_MASK 0x3f
377#define AFS_UUID_VARIANT_STD 0x80
378 u8 clock_seq_low; /* clock seq low */
379 u8 node[6]; /* spatially unique node ID (MAC addr) */
380};
381
382/*****************************************************************************/
383/*
384 * callback.c
385 */
386extern void afs_init_callback_state(struct afs_server *);
387extern void afs_broken_callback_work(struct work_struct *);
388extern void afs_break_callbacks(struct afs_server *, size_t,
389 struct afs_callback[]);
390extern void afs_discard_callback_on_delete(struct afs_vnode *);
391extern void afs_give_up_callback(struct afs_vnode *);
392extern void afs_dispatch_give_up_callbacks(struct work_struct *);
393extern void afs_flush_callback_breaks(struct afs_server *);
394extern int __init afs_callback_update_init(void);
395extern void __exit afs_callback_update_kill(void);
396
54/* 397/*
55 * cell.c 398 * cell.c
56 */ 399 */
@@ -60,57 +403,156 @@ extern struct list_head afs_proc_cells;
60extern struct cachefs_index_def afs_cache_cell_index_def; 403extern struct cachefs_index_def afs_cache_cell_index_def;
61#endif 404#endif
62 405
406#define afs_get_cell(C) do { atomic_inc(&(C)->usage); } while(0)
407extern int afs_cell_init(char *);
408extern struct afs_cell *afs_cell_create(const char *, char *);
409extern struct afs_cell *afs_cell_lookup(const char *, unsigned);
410extern struct afs_cell *afs_grab_cell(struct afs_cell *);
411extern void afs_put_cell(struct afs_cell *);
412extern void afs_cell_purge(void);
413
414/*
415 * cmservice.c
416 */
417extern bool afs_cm_incoming_call(struct afs_call *);
418
63/* 419/*
64 * dir.c 420 * dir.c
65 */ 421 */
66extern const struct inode_operations afs_dir_inode_operations; 422extern const struct inode_operations afs_dir_inode_operations;
67extern const struct file_operations afs_dir_file_operations; 423extern const struct file_operations afs_dir_file_operations;
68 424
425extern int afs_permission(struct inode *, int, struct nameidata *);
426
69/* 427/*
70 * file.c 428 * file.c
71 */ 429 */
72extern const struct address_space_operations afs_fs_aops; 430extern const struct address_space_operations afs_fs_aops;
73extern const struct inode_operations afs_file_inode_operations; 431extern const struct inode_operations afs_file_inode_operations;
432extern const struct file_operations afs_file_operations;
433
434extern int afs_open(struct inode *, struct file *);
435extern int afs_release(struct inode *, struct file *);
74 436
75#ifdef AFS_CACHING_SUPPORT 437#ifdef AFS_CACHING_SUPPORT
76extern int afs_cache_get_page_cookie(struct page *page, 438extern int afs_cache_get_page_cookie(struct page *, struct cachefs_page **);
77 struct cachefs_page **_page_cookie);
78#endif 439#endif
79 440
80/* 441/*
81 * inode.c 442 * fsclient.c
82 */ 443 */
83extern int afs_iget(struct super_block *sb, struct afs_fid *fid, 444extern int afs_fs_fetch_file_status(struct afs_server *, struct key *,
84 struct inode **_inode); 445 struct afs_vnode *, struct afs_volsync *,
85extern int afs_inode_getattr(struct vfsmount *mnt, struct dentry *dentry, 446 const struct afs_wait_mode *);
86 struct kstat *stat); 447extern int afs_fs_give_up_callbacks(struct afs_server *,
87extern void afs_clear_inode(struct inode *inode); 448 const struct afs_wait_mode *);
449extern int afs_fs_fetch_data(struct afs_server *, struct key *,
450 struct afs_vnode *, off_t, size_t, struct page *,
451 const struct afs_wait_mode *);
452extern int afs_fs_create(struct afs_server *, struct key *,
453 struct afs_vnode *, const char *, umode_t,
454 struct afs_fid *, struct afs_file_status *,
455 struct afs_callback *,
456 const struct afs_wait_mode *);
457extern int afs_fs_remove(struct afs_server *, struct key *,
458 struct afs_vnode *, const char *, bool,
459 const struct afs_wait_mode *);
460extern int afs_fs_link(struct afs_server *, struct key *, struct afs_vnode *,
461 struct afs_vnode *, const char *,
462 const struct afs_wait_mode *);
463extern int afs_fs_symlink(struct afs_server *, struct key *,
464 struct afs_vnode *, const char *, const char *,
465 struct afs_fid *, struct afs_file_status *,
466 const struct afs_wait_mode *);
467extern int afs_fs_rename(struct afs_server *, struct key *,
468 struct afs_vnode *, const char *,
469 struct afs_vnode *, const char *,
470 const struct afs_wait_mode *);
88 471
89/* 472/*
90 * key_afs.c 473 * inode.c
91 */ 474 */
92#ifdef CONFIG_KEYS 475extern struct inode *afs_iget(struct super_block *, struct key *,
93extern int afs_key_register(void); 476 struct afs_fid *, struct afs_file_status *,
94extern void afs_key_unregister(void); 477 struct afs_callback *);
95#endif 478extern int afs_validate(struct afs_vnode *, struct key *);
479extern int afs_inode_getattr(struct vfsmount *, struct dentry *,
480 struct kstat *);
481extern void afs_zap_permits(struct rcu_head *);
482extern void afs_clear_inode(struct inode *);
96 483
97/* 484/*
98 * main.c 485 * main.c
99 */ 486 */
487extern struct afs_uuid afs_uuid;
100#ifdef AFS_CACHING_SUPPORT 488#ifdef AFS_CACHING_SUPPORT
101extern struct cachefs_netfs afs_cache_netfs; 489extern struct cachefs_netfs afs_cache_netfs;
102#endif 490#endif
103 491
104/* 492/*
493 * misc.c
494 */
495extern int afs_abort_to_error(u32);
496
497/*
105 * mntpt.c 498 * mntpt.c
106 */ 499 */
107extern const struct inode_operations afs_mntpt_inode_operations; 500extern const struct inode_operations afs_mntpt_inode_operations;
108extern const struct file_operations afs_mntpt_file_operations; 501extern const struct file_operations afs_mntpt_file_operations;
109extern struct afs_timer afs_mntpt_expiry_timer;
110extern struct afs_timer_ops afs_mntpt_expiry_timer_ops;
111extern unsigned long afs_mntpt_expiry_timeout; 502extern unsigned long afs_mntpt_expiry_timeout;
112 503
113extern int afs_mntpt_check_symlink(struct afs_vnode *vnode); 504extern int afs_mntpt_check_symlink(struct afs_vnode *, struct key *);
505extern void afs_mntpt_kill_timer(void);
506extern void afs_umount_begin(struct vfsmount *, int);
507
508/*
509 * proc.c
510 */
511extern int afs_proc_init(void);
512extern void afs_proc_cleanup(void);
513extern int afs_proc_cell_setup(struct afs_cell *);
514extern void afs_proc_cell_remove(struct afs_cell *);
515
516/*
517 * rxrpc.c
518 */
519extern int afs_open_socket(void);
520extern void afs_close_socket(void);
521extern int afs_make_call(struct in_addr *, struct afs_call *, gfp_t,
522 const struct afs_wait_mode *);
523extern struct afs_call *afs_alloc_flat_call(const struct afs_call_type *,
524 size_t, size_t);
525extern void afs_flat_call_destructor(struct afs_call *);
526extern void afs_transfer_reply(struct afs_call *, struct sk_buff *);
527extern void afs_send_empty_reply(struct afs_call *);
528extern void afs_send_simple_reply(struct afs_call *, const void *, size_t);
529extern int afs_extract_data(struct afs_call *, struct sk_buff *, bool, void *,
530 size_t);
531
532/*
533 * security.c
534 */
535extern void afs_clear_permits(struct afs_vnode *);
536extern void afs_cache_permit(struct afs_vnode *, struct key *, long);
537extern struct key *afs_request_key(struct afs_cell *);
538extern int afs_permission(struct inode *, int, struct nameidata *);
539
540/*
541 * server.c
542 */
543extern spinlock_t afs_server_peer_lock;
544
545#define afs_get_server(S) \
546do { \
547 _debug("GET SERVER %d", atomic_read(&(S)->usage)); \
548 atomic_inc(&(S)->usage); \
549} while(0)
550
551extern struct afs_server *afs_lookup_server(struct afs_cell *,
552 const struct in_addr *);
553extern struct afs_server *afs_find_server(const struct in_addr *);
554extern void afs_put_server(struct afs_server *);
555extern void __exit afs_purge_servers(void);
114 556
115/* 557/*
116 * super.c 558 * super.c
@@ -118,22 +560,211 @@ extern int afs_mntpt_check_symlink(struct afs_vnode *vnode);
118extern int afs_fs_init(void); 560extern int afs_fs_init(void);
119extern void afs_fs_exit(void); 561extern void afs_fs_exit(void);
120 562
121#define AFS_CB_HASH_COUNT (PAGE_SIZE / sizeof(struct list_head)) 563/*
564 * use-rtnetlink.c
565 */
566extern int afs_get_ipv4_interfaces(struct afs_interface *, size_t, bool);
567extern int afs_get_MAC_address(u8 [6]);
122 568
123extern struct list_head afs_cb_hash_tbl[]; 569/*
124extern spinlock_t afs_cb_hash_lock; 570 * vlclient.c
571 */
572#ifdef AFS_CACHING_SUPPORT
573extern struct cachefs_index_def afs_vlocation_cache_index_def;
574#endif
125 575
126#define afs_cb_hash(SRV,FID) \ 576extern int afs_vl_get_entry_by_name(struct in_addr *, struct key *,
127 afs_cb_hash_tbl[((unsigned long)(SRV) + \ 577 const char *, struct afs_cache_vlocation *,
128 (FID)->vid + (FID)->vnode + (FID)->unique) % \ 578 const struct afs_wait_mode *);
129 AFS_CB_HASH_COUNT] 579extern int afs_vl_get_entry_by_id(struct in_addr *, struct key *,
580 afs_volid_t, afs_voltype_t,
581 struct afs_cache_vlocation *,
582 const struct afs_wait_mode *);
130 583
131/* 584/*
132 * proc.c 585 * vlocation.c
133 */ 586 */
134extern int afs_proc_init(void); 587#define afs_get_vlocation(V) do { atomic_inc(&(V)->usage); } while(0)
135extern void afs_proc_cleanup(void); 588
136extern int afs_proc_cell_setup(struct afs_cell *cell); 589extern int __init afs_vlocation_update_init(void);
137extern void afs_proc_cell_remove(struct afs_cell *cell); 590extern struct afs_vlocation *afs_vlocation_lookup(struct afs_cell *,
591 struct key *,
592 const char *, size_t);
593extern void afs_put_vlocation(struct afs_vlocation *);
594extern void __exit afs_vlocation_purge(void);
595
596/*
597 * vnode.c
598 */
599#ifdef AFS_CACHING_SUPPORT
600extern struct cachefs_index_def afs_vnode_cache_index_def;
601#endif
602
603extern struct afs_timer_ops afs_vnode_cb_timed_out_ops;
604
605static inline struct afs_vnode *AFS_FS_I(struct inode *inode)
606{
607 return container_of(inode, struct afs_vnode, vfs_inode);
608}
609
610static inline struct inode *AFS_VNODE_TO_I(struct afs_vnode *vnode)
611{
612 return &vnode->vfs_inode;
613}
614
615extern void afs_vnode_finalise_status_update(struct afs_vnode *,
616 struct afs_server *);
617extern int afs_vnode_fetch_status(struct afs_vnode *, struct afs_vnode *,
618 struct key *);
619extern int afs_vnode_fetch_data(struct afs_vnode *, struct key *,
620 off_t, size_t, struct page *);
621extern int afs_vnode_create(struct afs_vnode *, struct key *, const char *,
622 umode_t, struct afs_fid *, struct afs_file_status *,
623 struct afs_callback *, struct afs_server **);
624extern int afs_vnode_remove(struct afs_vnode *, struct key *, const char *,
625 bool);
626extern int afs_vnode_link(struct afs_vnode *, struct afs_vnode *, struct key *,
627 const char *);
628extern int afs_vnode_symlink(struct afs_vnode *, struct key *, const char *,
629 const char *, struct afs_fid *,
630 struct afs_file_status *, struct afs_server **);
631extern int afs_vnode_rename(struct afs_vnode *, struct afs_vnode *,
632 struct key *, const char *, const char *);
633
634/*
635 * volume.c
636 */
637#ifdef AFS_CACHING_SUPPORT
638extern struct cachefs_index_def afs_volume_cache_index_def;
639#endif
640
641#define afs_get_volume(V) do { atomic_inc(&(V)->usage); } while(0)
642
643extern void afs_put_volume(struct afs_volume *);
644extern struct afs_volume *afs_volume_lookup(struct afs_mount_params *);
645extern struct afs_server *afs_volume_pick_fileserver(struct afs_vnode *);
646extern int afs_volume_release_fileserver(struct afs_vnode *,
647 struct afs_server *, int);
648
649/*****************************************************************************/
650/*
651 * debug tracing
652 */
653extern unsigned afs_debug;
654
655#define dbgprintk(FMT,...) \
656 printk("[%x%-6.6s] "FMT"\n", smp_processor_id(), current->comm ,##__VA_ARGS__)
657
658/* make sure we maintain the format strings, even when debugging is disabled */
659static inline __attribute__((format(printf,1,2)))
660void _dbprintk(const char *fmt, ...)
661{
662}
663
664#define kenter(FMT,...) dbgprintk("==> %s("FMT")",__FUNCTION__ ,##__VA_ARGS__)
665#define kleave(FMT,...) dbgprintk("<== %s()"FMT"",__FUNCTION__ ,##__VA_ARGS__)
666#define kdebug(FMT,...) dbgprintk(" "FMT ,##__VA_ARGS__)
667
668
669#if defined(__KDEBUG)
670#define _enter(FMT,...) kenter(FMT,##__VA_ARGS__)
671#define _leave(FMT,...) kleave(FMT,##__VA_ARGS__)
672#define _debug(FMT,...) kdebug(FMT,##__VA_ARGS__)
673
674#elif defined(CONFIG_AFS_DEBUG)
675#define AFS_DEBUG_KENTER 0x01
676#define AFS_DEBUG_KLEAVE 0x02
677#define AFS_DEBUG_KDEBUG 0x04
678
679#define _enter(FMT,...) \
680do { \
681 if (unlikely(afs_debug & AFS_DEBUG_KENTER)) \
682 kenter(FMT,##__VA_ARGS__); \
683} while (0)
684
685#define _leave(FMT,...) \
686do { \
687 if (unlikely(afs_debug & AFS_DEBUG_KLEAVE)) \
688 kleave(FMT,##__VA_ARGS__); \
689} while (0)
690
691#define _debug(FMT,...) \
692do { \
693 if (unlikely(afs_debug & AFS_DEBUG_KDEBUG)) \
694 kdebug(FMT,##__VA_ARGS__); \
695} while (0)
696
697#else
698#define _enter(FMT,...) _dbprintk("==> %s("FMT")",__FUNCTION__ ,##__VA_ARGS__)
699#define _leave(FMT,...) _dbprintk("<== %s()"FMT"",__FUNCTION__ ,##__VA_ARGS__)
700#define _debug(FMT,...) _dbprintk(" "FMT ,##__VA_ARGS__)
701#endif
702
703/*
704 * debug assertion checking
705 */
706#if 1 // defined(__KDEBUGALL)
707
708#define ASSERT(X) \
709do { \
710 if (unlikely(!(X))) { \
711 printk(KERN_ERR "\n"); \
712 printk(KERN_ERR "AFS: Assertion failed\n"); \
713 BUG(); \
714 } \
715} while(0)
716
717#define ASSERTCMP(X, OP, Y) \
718do { \
719 if (unlikely(!((X) OP (Y)))) { \
720 printk(KERN_ERR "\n"); \
721 printk(KERN_ERR "AFS: Assertion failed\n"); \
722 printk(KERN_ERR "%lu " #OP " %lu is false\n", \
723 (unsigned long)(X), (unsigned long)(Y)); \
724 printk(KERN_ERR "0x%lx " #OP " 0x%lx is false\n", \
725 (unsigned long)(X), (unsigned long)(Y)); \
726 BUG(); \
727 } \
728} while(0)
729
730#define ASSERTIF(C, X) \
731do { \
732 if (unlikely((C) && !(X))) { \
733 printk(KERN_ERR "\n"); \
734 printk(KERN_ERR "AFS: Assertion failed\n"); \
735 BUG(); \
736 } \
737} while(0)
738
739#define ASSERTIFCMP(C, X, OP, Y) \
740do { \
741 if (unlikely((C) && !((X) OP (Y)))) { \
742 printk(KERN_ERR "\n"); \
743 printk(KERN_ERR "AFS: Assertion failed\n"); \
744 printk(KERN_ERR "%lu " #OP " %lu is false\n", \
745 (unsigned long)(X), (unsigned long)(Y)); \
746 printk(KERN_ERR "0x%lx " #OP " 0x%lx is false\n", \
747 (unsigned long)(X), (unsigned long)(Y)); \
748 BUG(); \
749 } \
750} while(0)
751
752#else
753
754#define ASSERT(X) \
755do { \
756} while(0)
757
758#define ASSERTCMP(X, OP, Y) \
759do { \
760} while(0)
761
762#define ASSERTIF(C, X) \
763do { \
764} while(0)
765
766#define ASSERTIFCMP(C, X, OP, Y) \
767do { \
768} while(0)
138 769
139#endif /* AFS_INTERNAL_H */ 770#endif /* __KDEBUGALL */
diff --git a/fs/afs/kafsasyncd.c b/fs/afs/kafsasyncd.c
deleted file mode 100644
index 615df2407cb2..000000000000
--- a/fs/afs/kafsasyncd.c
+++ /dev/null
@@ -1,255 +0,0 @@
1/* kafsasyncd.c: AFS asynchronous operation daemon
2 *
3 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 *
12 * The AFS async daemon is used to the following:
13 * - probe "dead" servers to see whether they've come back to life yet.
14 * - probe "live" servers that we haven't talked to for a while to see if they are better
15 * candidates for serving than what we're currently using
16 * - poll volume location servers to keep up to date volume location lists
17 */
18
19#include <linux/module.h>
20#include <linux/init.h>
21#include <linux/sched.h>
22#include <linux/completion.h>
23#include <linux/freezer.h>
24#include "cell.h"
25#include "server.h"
26#include "volume.h"
27#include "kafsasyncd.h"
28#include "kafstimod.h"
29#include <rxrpc/call.h>
30#include <asm/errno.h>
31#include "internal.h"
32
33static DECLARE_COMPLETION(kafsasyncd_alive);
34static DECLARE_COMPLETION(kafsasyncd_dead);
35static DECLARE_WAIT_QUEUE_HEAD(kafsasyncd_sleepq);
36static struct task_struct *kafsasyncd_task;
37static int kafsasyncd_die;
38
39static int kafsasyncd(void *arg);
40
41static LIST_HEAD(kafsasyncd_async_attnq);
42static LIST_HEAD(kafsasyncd_async_busyq);
43static DEFINE_SPINLOCK(kafsasyncd_async_lock);
44
45static void kafsasyncd_null_call_attn_func(struct rxrpc_call *call)
46{
47}
48
49static void kafsasyncd_null_call_error_func(struct rxrpc_call *call)
50{
51}
52
53/*****************************************************************************/
54/*
55 * start the async daemon
56 */
57int afs_kafsasyncd_start(void)
58{
59 int ret;
60
61 ret = kernel_thread(kafsasyncd, NULL, 0);
62 if (ret < 0)
63 return ret;
64
65 wait_for_completion(&kafsasyncd_alive);
66
67 return ret;
68} /* end afs_kafsasyncd_start() */
69
70/*****************************************************************************/
71/*
72 * stop the async daemon
73 */
74void afs_kafsasyncd_stop(void)
75{
76 /* get rid of my daemon */
77 kafsasyncd_die = 1;
78 wake_up(&kafsasyncd_sleepq);
79 wait_for_completion(&kafsasyncd_dead);
80
81} /* end afs_kafsasyncd_stop() */
82
83/*****************************************************************************/
84/*
85 * probing daemon
86 */
87static int kafsasyncd(void *arg)
88{
89 struct afs_async_op *op;
90 int die;
91
92 DECLARE_WAITQUEUE(myself, current);
93
94 kafsasyncd_task = current;
95
96 printk("kAFS: Started kafsasyncd %d\n", current->pid);
97
98 daemonize("kafsasyncd");
99
100 complete(&kafsasyncd_alive);
101
102 /* loop around looking for things to attend to */
103 do {
104 set_current_state(TASK_INTERRUPTIBLE);
105 add_wait_queue(&kafsasyncd_sleepq, &myself);
106
107 for (;;) {
108 if (!list_empty(&kafsasyncd_async_attnq) ||
109 signal_pending(current) ||
110 kafsasyncd_die)
111 break;
112
113 schedule();
114 set_current_state(TASK_INTERRUPTIBLE);
115 }
116
117 remove_wait_queue(&kafsasyncd_sleepq, &myself);
118 set_current_state(TASK_RUNNING);
119
120 try_to_freeze();
121
122 /* discard pending signals */
123 afs_discard_my_signals();
124
125 die = kafsasyncd_die;
126
127 /* deal with the next asynchronous operation requiring
128 * attention */
129 if (!list_empty(&kafsasyncd_async_attnq)) {
130 struct afs_async_op *op;
131
132 _debug("@@@ Begin Asynchronous Operation");
133
134 op = NULL;
135 spin_lock(&kafsasyncd_async_lock);
136
137 if (!list_empty(&kafsasyncd_async_attnq)) {
138 op = list_entry(kafsasyncd_async_attnq.next,
139 struct afs_async_op, link);
140 list_move_tail(&op->link,
141 &kafsasyncd_async_busyq);
142 }
143
144 spin_unlock(&kafsasyncd_async_lock);
145
146 _debug("@@@ Operation %p {%p}\n",
147 op, op ? op->ops : NULL);
148
149 if (op)
150 op->ops->attend(op);
151
152 _debug("@@@ End Asynchronous Operation");
153 }
154
155 } while(!die);
156
157 /* need to kill all outstanding asynchronous operations before
158 * exiting */
159 kafsasyncd_task = NULL;
160 spin_lock(&kafsasyncd_async_lock);
161
162 /* fold the busy and attention queues together */
163 list_splice_init(&kafsasyncd_async_busyq,
164 &kafsasyncd_async_attnq);
165
166 /* dequeue kafsasyncd from all their wait queues */
167 list_for_each_entry(op, &kafsasyncd_async_attnq, link) {
168 op->call->app_attn_func = kafsasyncd_null_call_attn_func;
169 op->call->app_error_func = kafsasyncd_null_call_error_func;
170 remove_wait_queue(&op->call->waitq, &op->waiter);
171 }
172
173 spin_unlock(&kafsasyncd_async_lock);
174
175 /* abort all the operations */
176 while (!list_empty(&kafsasyncd_async_attnq)) {
177 op = list_entry(kafsasyncd_async_attnq.next, struct afs_async_op, link);
178 list_del_init(&op->link);
179
180 rxrpc_call_abort(op->call, -EIO);
181 rxrpc_put_call(op->call);
182 op->call = NULL;
183
184 op->ops->discard(op);
185 }
186
187 /* and that's all */
188 _leave("");
189 complete_and_exit(&kafsasyncd_dead, 0);
190
191} /* end kafsasyncd() */
192
193/*****************************************************************************/
194/*
195 * begin an operation
196 * - place operation on busy queue
197 */
198void afs_kafsasyncd_begin_op(struct afs_async_op *op)
199{
200 _enter("");
201
202 spin_lock(&kafsasyncd_async_lock);
203
204 init_waitqueue_entry(&op->waiter, kafsasyncd_task);
205 add_wait_queue(&op->call->waitq, &op->waiter);
206
207 list_move_tail(&op->link, &kafsasyncd_async_busyq);
208
209 spin_unlock(&kafsasyncd_async_lock);
210
211 _leave("");
212} /* end afs_kafsasyncd_begin_op() */
213
214/*****************************************************************************/
215/*
216 * request attention for an operation
217 * - move to attention queue
218 */
219void afs_kafsasyncd_attend_op(struct afs_async_op *op)
220{
221 _enter("");
222
223 spin_lock(&kafsasyncd_async_lock);
224
225 list_move_tail(&op->link, &kafsasyncd_async_attnq);
226
227 spin_unlock(&kafsasyncd_async_lock);
228
229 wake_up(&kafsasyncd_sleepq);
230
231 _leave("");
232} /* end afs_kafsasyncd_attend_op() */
233
234/*****************************************************************************/
235/*
236 * terminate an operation
237 * - remove from either queue
238 */
239void afs_kafsasyncd_terminate_op(struct afs_async_op *op)
240{
241 _enter("");
242
243 spin_lock(&kafsasyncd_async_lock);
244
245 if (!list_empty(&op->link)) {
246 list_del_init(&op->link);
247 remove_wait_queue(&op->call->waitq, &op->waiter);
248 }
249
250 spin_unlock(&kafsasyncd_async_lock);
251
252 wake_up(&kafsasyncd_sleepq);
253
254 _leave("");
255} /* end afs_kafsasyncd_terminate_op() */
diff --git a/fs/afs/kafsasyncd.h b/fs/afs/kafsasyncd.h
deleted file mode 100644
index 791803f9a6fb..000000000000
--- a/fs/afs/kafsasyncd.h
+++ /dev/null
@@ -1,52 +0,0 @@
1/* kafsasyncd.h: AFS asynchronous operation daemon
2 *
3 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#ifndef _LINUX_AFS_KAFSASYNCD_H
13#define _LINUX_AFS_KAFSASYNCD_H
14
15#include "types.h"
16
17struct afs_async_op;
18
19struct afs_async_op_ops {
20 void (*attend)(struct afs_async_op *op);
21 void (*discard)(struct afs_async_op *op);
22};
23
24/*****************************************************************************/
25/*
26 * asynchronous operation record
27 */
28struct afs_async_op
29{
30 struct list_head link;
31 struct afs_server *server; /* server being contacted */
32 struct rxrpc_call *call; /* RxRPC call performing op */
33 wait_queue_t waiter; /* wait queue for kafsasyncd */
34 const struct afs_async_op_ops *ops; /* operations */
35};
36
37static inline void afs_async_op_init(struct afs_async_op *op,
38 const struct afs_async_op_ops *ops)
39{
40 INIT_LIST_HEAD(&op->link);
41 op->call = NULL;
42 op->ops = ops;
43}
44
45extern int afs_kafsasyncd_start(void);
46extern void afs_kafsasyncd_stop(void);
47
48extern void afs_kafsasyncd_begin_op(struct afs_async_op *op);
49extern void afs_kafsasyncd_attend_op(struct afs_async_op *op);
50extern void afs_kafsasyncd_terminate_op(struct afs_async_op *op);
51
52#endif /* _LINUX_AFS_KAFSASYNCD_H */
diff --git a/fs/afs/kafstimod.c b/fs/afs/kafstimod.c
deleted file mode 100644
index 694344e4d3c7..000000000000
--- a/fs/afs/kafstimod.c
+++ /dev/null
@@ -1,205 +0,0 @@
1/* kafstimod.c: AFS timeout daemon
2 *
3 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/module.h>
13#include <linux/init.h>
14#include <linux/sched.h>
15#include <linux/completion.h>
16#include <linux/freezer.h>
17#include "cell.h"
18#include "volume.h"
19#include "kafstimod.h"
20#include <asm/errno.h>
21#include "internal.h"
22
23static DECLARE_COMPLETION(kafstimod_alive);
24static DECLARE_COMPLETION(kafstimod_dead);
25static DECLARE_WAIT_QUEUE_HEAD(kafstimod_sleepq);
26static int kafstimod_die;
27
28static LIST_HEAD(kafstimod_list);
29static DEFINE_SPINLOCK(kafstimod_lock);
30
31static int kafstimod(void *arg);
32
33/*****************************************************************************/
34/*
35 * start the timeout daemon
36 */
37int afs_kafstimod_start(void)
38{
39 int ret;
40
41 ret = kernel_thread(kafstimod, NULL, 0);
42 if (ret < 0)
43 return ret;
44
45 wait_for_completion(&kafstimod_alive);
46
47 return ret;
48} /* end afs_kafstimod_start() */
49
50/*****************************************************************************/
51/*
52 * stop the timeout daemon
53 */
54void afs_kafstimod_stop(void)
55{
56 /* get rid of my daemon */
57 kafstimod_die = 1;
58 wake_up(&kafstimod_sleepq);
59 wait_for_completion(&kafstimod_dead);
60
61} /* end afs_kafstimod_stop() */
62
63/*****************************************************************************/
64/*
65 * timeout processing daemon
66 */
67static int kafstimod(void *arg)
68{
69 struct afs_timer *timer;
70
71 DECLARE_WAITQUEUE(myself, current);
72
73 printk("kAFS: Started kafstimod %d\n", current->pid);
74
75 daemonize("kafstimod");
76
77 complete(&kafstimod_alive);
78
79 /* loop around looking for things to attend to */
80 loop:
81 set_current_state(TASK_INTERRUPTIBLE);
82 add_wait_queue(&kafstimod_sleepq, &myself);
83
84 for (;;) {
85 unsigned long jif;
86 signed long timeout;
87
88 /* deal with the server being asked to die */
89 if (kafstimod_die) {
90 remove_wait_queue(&kafstimod_sleepq, &myself);
91 _leave("");
92 complete_and_exit(&kafstimod_dead, 0);
93 }
94
95 try_to_freeze();
96
97 /* discard pending signals */
98 afs_discard_my_signals();
99
100 /* work out the time to elapse before the next event */
101 spin_lock(&kafstimod_lock);
102 if (list_empty(&kafstimod_list)) {
103 timeout = MAX_SCHEDULE_TIMEOUT;
104 }
105 else {
106 timer = list_entry(kafstimod_list.next,
107 struct afs_timer, link);
108 timeout = timer->timo_jif;
109 jif = jiffies;
110
111 if (time_before_eq((unsigned long) timeout, jif))
112 goto immediate;
113
114 else {
115 timeout = (long) timeout - (long) jiffies;
116 }
117 }
118 spin_unlock(&kafstimod_lock);
119
120 schedule_timeout(timeout);
121
122 set_current_state(TASK_INTERRUPTIBLE);
123 }
124
125 /* the thing on the front of the queue needs processing
126 * - we come here with the lock held and timer pointing to the expired
127 * entry
128 */
129 immediate:
130 remove_wait_queue(&kafstimod_sleepq, &myself);
131 set_current_state(TASK_RUNNING);
132
133 _debug("@@@ Begin Timeout of %p", timer);
134
135 /* dequeue the timer */
136 list_del_init(&timer->link);
137 spin_unlock(&kafstimod_lock);
138
139 /* call the timeout function */
140 timer->ops->timed_out(timer);
141
142 _debug("@@@ End Timeout");
143 goto loop;
144
145} /* end kafstimod() */
146
147/*****************************************************************************/
148/*
149 * (re-)queue a timer
150 */
151void afs_kafstimod_add_timer(struct afs_timer *timer, unsigned long timeout)
152{
153 struct afs_timer *ptimer;
154 struct list_head *_p;
155
156 _enter("%p,%lu", timer, timeout);
157
158 spin_lock(&kafstimod_lock);
159
160 list_del(&timer->link);
161
162 /* the timer was deferred or reset - put it back in the queue at the
163 * right place */
164 timer->timo_jif = jiffies + timeout;
165
166 list_for_each(_p, &kafstimod_list) {
167 ptimer = list_entry(_p, struct afs_timer, link);
168 if (time_before(timer->timo_jif, ptimer->timo_jif))
169 break;
170 }
171
172 list_add_tail(&timer->link, _p); /* insert before stopping point */
173
174 spin_unlock(&kafstimod_lock);
175
176 wake_up(&kafstimod_sleepq);
177
178 _leave("");
179} /* end afs_kafstimod_add_timer() */
180
181/*****************************************************************************/
182/*
183 * dequeue a timer
184 * - returns 0 if the timer was deleted or -ENOENT if it wasn't queued
185 */
186int afs_kafstimod_del_timer(struct afs_timer *timer)
187{
188 int ret = 0;
189
190 _enter("%p", timer);
191
192 spin_lock(&kafstimod_lock);
193
194 if (list_empty(&timer->link))
195 ret = -ENOENT;
196 else
197 list_del_init(&timer->link);
198
199 spin_unlock(&kafstimod_lock);
200
201 wake_up(&kafstimod_sleepq);
202
203 _leave(" = %d", ret);
204 return ret;
205} /* end afs_kafstimod_del_timer() */
diff --git a/fs/afs/kafstimod.h b/fs/afs/kafstimod.h
deleted file mode 100644
index e312f1a61a7f..000000000000
--- a/fs/afs/kafstimod.h
+++ /dev/null
@@ -1,49 +0,0 @@
1/* kafstimod.h: AFS timeout daemon
2 *
3 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#ifndef _LINUX_AFS_KAFSTIMOD_H
13#define _LINUX_AFS_KAFSTIMOD_H
14
15#include "types.h"
16
17struct afs_timer;
18
19struct afs_timer_ops {
20 /* called when the front of the timer queue has timed out */
21 void (*timed_out)(struct afs_timer *timer);
22};
23
24/*****************************************************************************/
25/*
26 * AFS timer/timeout record
27 */
28struct afs_timer
29{
30 struct list_head link; /* link in timer queue */
31 unsigned long timo_jif; /* timeout time */
32 const struct afs_timer_ops *ops; /* timeout expiry function */
33};
34
35static inline void afs_timer_init(struct afs_timer *timer,
36 const struct afs_timer_ops *ops)
37{
38 INIT_LIST_HEAD(&timer->link);
39 timer->ops = ops;
40}
41
42extern int afs_kafstimod_start(void);
43extern void afs_kafstimod_stop(void);
44
45extern void afs_kafstimod_add_timer(struct afs_timer *timer,
46 unsigned long timeout);
47extern int afs_kafstimod_del_timer(struct afs_timer *timer);
48
49#endif /* _LINUX_AFS_KAFSTIMOD_H */
diff --git a/fs/afs/main.c b/fs/afs/main.c
index f2704ba53857..40c2704e7557 100644
--- a/fs/afs/main.c
+++ b/fs/afs/main.c
@@ -1,4 +1,4 @@
1/* main.c: AFS client file system 1/* AFS client file system
2 * 2 *
3 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved. 3 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com) 4 * Written by David Howells (dhowells@redhat.com)
@@ -13,43 +13,21 @@
13#include <linux/moduleparam.h> 13#include <linux/moduleparam.h>
14#include <linux/init.h> 14#include <linux/init.h>
15#include <linux/completion.h> 15#include <linux/completion.h>
16#include <rxrpc/rxrpc.h>
17#include <rxrpc/transport.h>
18#include <rxrpc/call.h>
19#include <rxrpc/peer.h>
20#include "cache.h"
21#include "cell.h"
22#include "server.h"
23#include "fsclient.h"
24#include "cmservice.h"
25#include "kafstimod.h"
26#include "kafsasyncd.h"
27#include "internal.h" 16#include "internal.h"
28 17
29struct rxrpc_transport *afs_transport;
30
31static int afs_adding_peer(struct rxrpc_peer *peer);
32static void afs_discarding_peer(struct rxrpc_peer *peer);
33
34
35MODULE_DESCRIPTION("AFS Client File System"); 18MODULE_DESCRIPTION("AFS Client File System");
36MODULE_AUTHOR("Red Hat, Inc."); 19MODULE_AUTHOR("Red Hat, Inc.");
37MODULE_LICENSE("GPL"); 20MODULE_LICENSE("GPL");
38 21
22unsigned afs_debug;
23module_param_named(debug, afs_debug, uint, S_IWUSR | S_IRUGO);
24MODULE_PARM_DESC(afs_debug, "AFS debugging mask");
25
39static char *rootcell; 26static char *rootcell;
40 27
41module_param(rootcell, charp, 0); 28module_param(rootcell, charp, 0);
42MODULE_PARM_DESC(rootcell, "root AFS cell name and VL server IP addr list"); 29MODULE_PARM_DESC(rootcell, "root AFS cell name and VL server IP addr list");
43 30
44
45static struct rxrpc_peer_ops afs_peer_ops = {
46 .adding = afs_adding_peer,
47 .discarding = afs_discarding_peer,
48};
49
50struct list_head afs_cb_hash_tbl[AFS_CB_HASH_COUNT];
51DEFINE_SPINLOCK(afs_cb_hash_lock);
52
53#ifdef AFS_CACHING_SUPPORT 31#ifdef AFS_CACHING_SUPPORT
54static struct cachefs_netfs_operations afs_cache_ops = { 32static struct cachefs_netfs_operations afs_cache_ops = {
55 .get_page_cookie = afs_cache_get_page_cookie, 33 .get_page_cookie = afs_cache_get_page_cookie,
@@ -62,20 +40,63 @@ struct cachefs_netfs afs_cache_netfs = {
62}; 40};
63#endif 41#endif
64 42
65/*****************************************************************************/ 43struct afs_uuid afs_uuid;
44
45/*
46 * get a client UUID
47 */
48static int __init afs_get_client_UUID(void)
49{
50 struct timespec ts;
51 u64 uuidtime;
52 u16 clockseq;
53 int ret;
54
55 /* read the MAC address of one of the external interfaces and construct
56 * a UUID from it */
57 ret = afs_get_MAC_address(afs_uuid.node);
58 if (ret < 0)
59 return ret;
60
61 getnstimeofday(&ts);
62 uuidtime = (u64) ts.tv_sec * 1000 * 1000 * 10;
63 uuidtime += ts.tv_nsec / 100;
64 uuidtime += AFS_UUID_TO_UNIX_TIME;
65 afs_uuid.time_low = uuidtime;
66 afs_uuid.time_mid = uuidtime >> 32;
67 afs_uuid.time_hi_and_version = (uuidtime >> 48) & AFS_UUID_TIMEHI_MASK;
68 afs_uuid.time_hi_and_version = AFS_UUID_VERSION_TIME;
69
70 get_random_bytes(&clockseq, 2);
71 afs_uuid.clock_seq_low = clockseq;
72 afs_uuid.clock_seq_hi_and_reserved =
73 (clockseq >> 8) & AFS_UUID_CLOCKHI_MASK;
74 afs_uuid.clock_seq_hi_and_reserved = AFS_UUID_VARIANT_STD;
75
76 _debug("AFS UUID: %08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x",
77 afs_uuid.time_low,
78 afs_uuid.time_mid,
79 afs_uuid.time_hi_and_version,
80 afs_uuid.clock_seq_hi_and_reserved,
81 afs_uuid.clock_seq_low,
82 afs_uuid.node[0], afs_uuid.node[1], afs_uuid.node[2],
83 afs_uuid.node[3], afs_uuid.node[4], afs_uuid.node[5]);
84
85 return 0;
86}
87
66/* 88/*
67 * initialise the AFS client FS module 89 * initialise the AFS client FS module
68 */ 90 */
69static int __init afs_init(void) 91static int __init afs_init(void)
70{ 92{
71 int loop, ret; 93 int ret;
72 94
73 printk(KERN_INFO "kAFS: Red Hat AFS client v0.1 registering.\n"); 95 printk(KERN_INFO "kAFS: Red Hat AFS client v0.1 registering.\n");
74 96
75 /* initialise the callback hash table */ 97 ret = afs_get_client_UUID();
76 spin_lock_init(&afs_cb_hash_lock); 98 if (ret < 0)
77 for (loop = AFS_CB_HASH_COUNT - 1; loop >= 0; loop--) 99 return ret;
78 INIT_LIST_HEAD(&afs_cb_hash_tbl[loop]);
79 100
80 /* register the /proc stuff */ 101 /* register the /proc stuff */
81 ret = afs_proc_init(); 102 ret = afs_proc_init();
@@ -87,70 +108,56 @@ static int __init afs_init(void)
87 ret = cachefs_register_netfs(&afs_cache_netfs, 108 ret = cachefs_register_netfs(&afs_cache_netfs,
88 &afs_cache_cell_index_def); 109 &afs_cache_cell_index_def);
89 if (ret < 0) 110 if (ret < 0)
90 goto error;
91#endif
92
93#ifdef CONFIG_KEYS_TURNED_OFF
94 ret = afs_key_register();
95 if (ret < 0)
96 goto error_cache; 111 goto error_cache;
97#endif 112#endif
98 113
99 /* initialise the cell DB */ 114 /* initialise the cell DB */
100 ret = afs_cell_init(rootcell); 115 ret = afs_cell_init(rootcell);
101 if (ret < 0) 116 if (ret < 0)
102 goto error_keys; 117 goto error_cell_init;
103 118
104 /* start the timeout daemon */ 119 /* initialise the VL update process */
105 ret = afs_kafstimod_start(); 120 ret = afs_vlocation_update_init();
106 if (ret < 0) 121 if (ret < 0)
107 goto error_keys; 122 goto error_vl_update_init;
108 123
109 /* start the async operation daemon */ 124 /* initialise the callback update process */
110 ret = afs_kafsasyncd_start(); 125 ret = afs_callback_update_init();
111 if (ret < 0)
112 goto error_kafstimod;
113 126
114 /* create the RxRPC transport */ 127 /* create the RxRPC transport */
115 ret = rxrpc_create_transport(7001, &afs_transport); 128 ret = afs_open_socket();
116 if (ret < 0) 129 if (ret < 0)
117 goto error_kafsasyncd; 130 goto error_open_socket;
118
119 afs_transport->peer_ops = &afs_peer_ops;
120 131
121 /* register the filesystems */ 132 /* register the filesystems */
122 ret = afs_fs_init(); 133 ret = afs_fs_init();
123 if (ret < 0) 134 if (ret < 0)
124 goto error_transport; 135 goto error_fs;
125 136
126 return ret; 137 return ret;
127 138
128 error_transport: 139error_fs:
129 rxrpc_put_transport(afs_transport); 140 afs_close_socket();
130 error_kafsasyncd: 141error_open_socket:
131 afs_kafsasyncd_stop(); 142error_vl_update_init:
132 error_kafstimod: 143error_cell_init:
133 afs_kafstimod_stop();
134 error_keys:
135#ifdef CONFIG_KEYS_TURNED_OFF
136 afs_key_unregister();
137 error_cache:
138#endif
139#ifdef AFS_CACHING_SUPPORT 144#ifdef AFS_CACHING_SUPPORT
140 cachefs_unregister_netfs(&afs_cache_netfs); 145 cachefs_unregister_netfs(&afs_cache_netfs);
141 error: 146error_cache:
142#endif 147#endif
148 afs_callback_update_kill();
149 afs_vlocation_purge();
143 afs_cell_purge(); 150 afs_cell_purge();
144 afs_proc_cleanup(); 151 afs_proc_cleanup();
145 printk(KERN_ERR "kAFS: failed to register: %d\n", ret); 152 printk(KERN_ERR "kAFS: failed to register: %d\n", ret);
146 return ret; 153 return ret;
147} /* end afs_init() */ 154}
148 155
149/* XXX late_initcall is kludgy, but the only alternative seems to create 156/* XXX late_initcall is kludgy, but the only alternative seems to create
150 * a transport upon the first mount, which is worse. Or is it? 157 * a transport upon the first mount, which is worse. Or is it?
151 */ 158 */
152late_initcall(afs_init); /* must be called after net/ to create socket */ 159late_initcall(afs_init); /* must be called after net/ to create socket */
153/*****************************************************************************/ 160
154/* 161/*
155 * clean up on module removal 162 * clean up on module removal
156 */ 163 */
@@ -159,127 +166,16 @@ static void __exit afs_exit(void)
159 printk(KERN_INFO "kAFS: Red Hat AFS client v0.1 unregistering.\n"); 166 printk(KERN_INFO "kAFS: Red Hat AFS client v0.1 unregistering.\n");
160 167
161 afs_fs_exit(); 168 afs_fs_exit();
162 rxrpc_put_transport(afs_transport); 169 afs_close_socket();
163 afs_kafstimod_stop(); 170 afs_purge_servers();
164 afs_kafsasyncd_stop(); 171 afs_callback_update_kill();
172 afs_vlocation_purge();
173 flush_scheduled_work();
165 afs_cell_purge(); 174 afs_cell_purge();
166#ifdef CONFIG_KEYS_TURNED_OFF
167 afs_key_unregister();
168#endif
169#ifdef AFS_CACHING_SUPPORT 175#ifdef AFS_CACHING_SUPPORT
170 cachefs_unregister_netfs(&afs_cache_netfs); 176 cachefs_unregister_netfs(&afs_cache_netfs);
171#endif 177#endif
172 afs_proc_cleanup(); 178 afs_proc_cleanup();
173
174} /* end afs_exit() */
175
176module_exit(afs_exit);
177
178/*****************************************************************************/
179/*
180 * notification that new peer record is being added
181 * - called from krxsecd
182 * - return an error to induce an abort
183 * - mustn't sleep (caller holds an rwlock)
184 */
185static int afs_adding_peer(struct rxrpc_peer *peer)
186{
187 struct afs_server *server;
188 int ret;
189
190 _debug("kAFS: Adding new peer %08x\n", ntohl(peer->addr.s_addr));
191
192 /* determine which server the peer resides in (if any) */
193 ret = afs_server_find_by_peer(peer, &server);
194 if (ret < 0)
195 return ret; /* none that we recognise, so abort */
196
197 _debug("Server %p{u=%d}\n", server, atomic_read(&server->usage));
198
199 _debug("Cell %p{u=%d}\n",
200 server->cell, atomic_read(&server->cell->usage));
201
202 /* cross-point the structs under a global lock */
203 spin_lock(&afs_server_peer_lock);
204 peer->user = server;
205 server->peer = peer;
206 spin_unlock(&afs_server_peer_lock);
207
208 afs_put_server(server);
209
210 return 0;
211} /* end afs_adding_peer() */
212
213/*****************************************************************************/
214/*
215 * notification that a peer record is being discarded
216 * - called from krxiod or krxsecd
217 */
218static void afs_discarding_peer(struct rxrpc_peer *peer)
219{
220 struct afs_server *server;
221
222 _enter("%p",peer);
223
224 _debug("Discarding peer %08x (rtt=%lu.%lumS)\n",
225 ntohl(peer->addr.s_addr),
226 (long) (peer->rtt / 1000),
227 (long) (peer->rtt % 1000));
228
229 /* uncross-point the structs under a global lock */
230 spin_lock(&afs_server_peer_lock);
231 server = peer->user;
232 if (server) {
233 peer->user = NULL;
234 server->peer = NULL;
235 }
236 spin_unlock(&afs_server_peer_lock);
237
238 _leave("");
239
240} /* end afs_discarding_peer() */
241
242/*****************************************************************************/
243/*
244 * clear the dead space between task_struct and kernel stack
245 * - called by supplying -finstrument-functions to gcc
246 */
247#if 0
248void __cyg_profile_func_enter (void *this_fn, void *call_site)
249__attribute__((no_instrument_function));
250
251void __cyg_profile_func_enter (void *this_fn, void *call_site)
252{
253 asm volatile(" movl %%esp,%%edi \n"
254 " andl %0,%%edi \n"
255 " addl %1,%%edi \n"
256 " movl %%esp,%%ecx \n"
257 " subl %%edi,%%ecx \n"
258 " shrl $2,%%ecx \n"
259 " movl $0xedededed,%%eax \n"
260 " rep stosl \n"
261 :
262 : "i"(~(THREAD_SIZE - 1)), "i"(sizeof(struct thread_info))
263 : "eax", "ecx", "edi", "memory", "cc"
264 );
265} 179}
266 180
267void __cyg_profile_func_exit(void *this_fn, void *call_site) 181module_exit(afs_exit);
268__attribute__((no_instrument_function));
269
270void __cyg_profile_func_exit(void *this_fn, void *call_site)
271{
272 asm volatile(" movl %%esp,%%edi \n"
273 " andl %0,%%edi \n"
274 " addl %1,%%edi \n"
275 " movl %%esp,%%ecx \n"
276 " subl %%edi,%%ecx \n"
277 " shrl $2,%%ecx \n"
278 " movl $0xdadadada,%%eax \n"
279 " rep stosl \n"
280 :
281 : "i"(~(THREAD_SIZE - 1)), "i"(sizeof(struct thread_info))
282 : "eax", "ecx", "edi", "memory", "cc"
283 );
284}
285#endif
diff --git a/fs/afs/misc.c b/fs/afs/misc.c
index e4fce66d76e0..cdb9792d8161 100644
--- a/fs/afs/misc.c
+++ b/fs/afs/misc.c
@@ -1,6 +1,6 @@
1/* misc.c: miscellaneous bits 1/* miscellaneous bits
2 * 2 *
3 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved. 3 * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com) 4 * Written by David Howells (dhowells@redhat.com)
5 * 5 *
6 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
@@ -12,19 +12,20 @@
12#include <linux/kernel.h> 12#include <linux/kernel.h>
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/errno.h> 14#include <linux/errno.h>
15#include "errors.h"
16#include "internal.h" 15#include "internal.h"
16#include "afs_fs.h"
17 17
18/*****************************************************************************/
19/* 18/*
20 * convert an AFS abort code to a Linux error number 19 * convert an AFS abort code to a Linux error number
21 */ 20 */
22int afs_abort_to_error(int abortcode) 21int afs_abort_to_error(u32 abort_code)
23{ 22{
24 switch (abortcode) { 23 switch (abort_code) {
24 case 13: return -EACCES;
25 case 30: return -EROFS;
25 case VSALVAGE: return -EIO; 26 case VSALVAGE: return -EIO;
26 case VNOVNODE: return -ENOENT; 27 case VNOVNODE: return -ENOENT;
27 case VNOVOL: return -ENXIO; 28 case VNOVOL: return -ENOMEDIUM;
28 case VVOLEXISTS: return -EEXIST; 29 case VVOLEXISTS: return -EEXIST;
29 case VNOSERVICE: return -EIO; 30 case VNOSERVICE: return -EIO;
30 case VOFFLINE: return -ENOENT; 31 case VOFFLINE: return -ENOENT;
@@ -33,7 +34,24 @@ int afs_abort_to_error(int abortcode)
33 case VOVERQUOTA: return -EDQUOT; 34 case VOVERQUOTA: return -EDQUOT;
34 case VBUSY: return -EBUSY; 35 case VBUSY: return -EBUSY;
35 case VMOVED: return -ENXIO; 36 case VMOVED: return -ENXIO;
36 default: return -EIO; 37 case 0x2f6df0c: return -EACCES;
38 case 0x2f6df0f: return -EBUSY;
39 case 0x2f6df10: return -EEXIST;
40 case 0x2f6df11: return -EXDEV;
41 case 0x2f6df13: return -ENOTDIR;
42 case 0x2f6df14: return -EISDIR;
43 case 0x2f6df15: return -EINVAL;
44 case 0x2f6df1a: return -EFBIG;
45 case 0x2f6df1b: return -ENOSPC;
46 case 0x2f6df1d: return -EROFS;
47 case 0x2f6df1e: return -EMLINK;
48 case 0x2f6df20: return -EDOM;
49 case 0x2f6df21: return -ERANGE;
50 case 0x2f6df22: return -EDEADLK;
51 case 0x2f6df23: return -ENAMETOOLONG;
52 case 0x2f6df24: return -ENOLCK;
53 case 0x2f6df26: return -ENOTEMPTY;
54 case 0x2f6df78: return -EDQUOT;
55 default: return -EREMOTEIO;
37 } 56 }
38 57}
39} /* end afs_abort_to_error() */
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 68495f0de7b3..b905ae37f912 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -1,4 +1,4 @@
1/* mntpt.c: mountpoint management 1/* mountpoint management
2 * 2 *
3 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved. 3 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com) 4 * Written by David Howells (dhowells@redhat.com)
@@ -18,10 +18,6 @@
18#include <linux/mount.h> 18#include <linux/mount.h>
19#include <linux/namei.h> 19#include <linux/namei.h>
20#include <linux/mnt_namespace.h> 20#include <linux/mnt_namespace.h>
21#include "super.h"
22#include "cell.h"
23#include "volume.h"
24#include "vnode.h"
25#include "internal.h" 21#include "internal.h"
26 22
27 23
@@ -30,6 +26,7 @@ static struct dentry *afs_mntpt_lookup(struct inode *dir,
30 struct nameidata *nd); 26 struct nameidata *nd);
31static int afs_mntpt_open(struct inode *inode, struct file *file); 27static int afs_mntpt_open(struct inode *inode, struct file *file);
32static void *afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd); 28static void *afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd);
29static void afs_mntpt_expiry_timed_out(struct work_struct *work);
33 30
34const struct file_operations afs_mntpt_file_operations = { 31const struct file_operations afs_mntpt_file_operations = {
35 .open = afs_mntpt_open, 32 .open = afs_mntpt_open,
@@ -43,24 +40,19 @@ const struct inode_operations afs_mntpt_inode_operations = {
43}; 40};
44 41
45static LIST_HEAD(afs_vfsmounts); 42static LIST_HEAD(afs_vfsmounts);
43static DECLARE_DELAYED_WORK(afs_mntpt_expiry_timer, afs_mntpt_expiry_timed_out);
46 44
47static void afs_mntpt_expiry_timed_out(struct afs_timer *timer); 45unsigned long afs_mntpt_expiry_timeout = 10 * 60;
48 46
49struct afs_timer_ops afs_mntpt_expiry_timer_ops = {
50 .timed_out = afs_mntpt_expiry_timed_out,
51};
52
53struct afs_timer afs_mntpt_expiry_timer;
54
55unsigned long afs_mntpt_expiry_timeout = 20;
56
57/*****************************************************************************/
58/* 47/*
59 * check a symbolic link to see whether it actually encodes a mountpoint 48 * check a symbolic link to see whether it actually encodes a mountpoint
60 * - sets the AFS_VNODE_MOUNTPOINT flag on the vnode appropriately 49 * - sets the AFS_VNODE_MOUNTPOINT flag on the vnode appropriately
61 */ 50 */
62int afs_mntpt_check_symlink(struct afs_vnode *vnode) 51int afs_mntpt_check_symlink(struct afs_vnode *vnode, struct key *key)
63{ 52{
53 struct file file = {
54 .private_data = key,
55 };
64 struct page *page; 56 struct page *page;
65 size_t size; 57 size_t size;
66 char *buf; 58 char *buf;
@@ -69,7 +61,7 @@ int afs_mntpt_check_symlink(struct afs_vnode *vnode)
69 _enter("{%u,%u}", vnode->fid.vnode, vnode->fid.unique); 61 _enter("{%u,%u}", vnode->fid.vnode, vnode->fid.unique);
70 62
71 /* read the contents of the symlink into the pagecache */ 63 /* read the contents of the symlink into the pagecache */
72 page = read_mapping_page(AFS_VNODE_TO_I(vnode)->i_mapping, 0, NULL); 64 page = read_mapping_page(AFS_VNODE_TO_I(vnode)->i_mapping, 0, &file);
73 if (IS_ERR(page)) { 65 if (IS_ERR(page)) {
74 ret = PTR_ERR(page); 66 ret = PTR_ERR(page);
75 goto out; 67 goto out;
@@ -85,7 +77,7 @@ int afs_mntpt_check_symlink(struct afs_vnode *vnode)
85 77
86 /* examine the symlink's contents */ 78 /* examine the symlink's contents */
87 size = vnode->status.size; 79 size = vnode->status.size;
88 _debug("symlink to %*.*s", size, (int) size, buf); 80 _debug("symlink to %*.*s", (int) size, (int) size, buf);
89 81
90 if (size > 2 && 82 if (size > 2 &&
91 (buf[0] == '%' || buf[0] == '#') && 83 (buf[0] == '%' || buf[0] == '#') &&
@@ -93,22 +85,20 @@ int afs_mntpt_check_symlink(struct afs_vnode *vnode)
93 ) { 85 ) {
94 _debug("symlink is a mountpoint"); 86 _debug("symlink is a mountpoint");
95 spin_lock(&vnode->lock); 87 spin_lock(&vnode->lock);
96 vnode->flags |= AFS_VNODE_MOUNTPOINT; 88 set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags);
97 spin_unlock(&vnode->lock); 89 spin_unlock(&vnode->lock);
98 } 90 }
99 91
100 ret = 0; 92 ret = 0;
101 93
102 out_free: 94out_free:
103 kunmap(page); 95 kunmap(page);
104 page_cache_release(page); 96 page_cache_release(page);
105 out: 97out:
106 _leave(" = %d", ret); 98 _leave(" = %d", ret);
107 return ret; 99 return ret;
100}
108 101
109} /* end afs_mntpt_check_symlink() */
110
111/*****************************************************************************/
112/* 102/*
113 * no valid lookup procedure on this sort of dir 103 * no valid lookup procedure on this sort of dir
114 */ 104 */
@@ -116,7 +106,7 @@ static struct dentry *afs_mntpt_lookup(struct inode *dir,
116 struct dentry *dentry, 106 struct dentry *dentry,
117 struct nameidata *nd) 107 struct nameidata *nd)
118{ 108{
119 kenter("%p,%p{%p{%s},%s}", 109 _enter("%p,%p{%p{%s},%s}",
120 dir, 110 dir,
121 dentry, 111 dentry,
122 dentry->d_parent, 112 dentry->d_parent,
@@ -125,15 +115,14 @@ static struct dentry *afs_mntpt_lookup(struct inode *dir,
125 dentry->d_name.name); 115 dentry->d_name.name);
126 116
127 return ERR_PTR(-EREMOTE); 117 return ERR_PTR(-EREMOTE);
128} /* end afs_mntpt_lookup() */ 118}
129 119
130/*****************************************************************************/
131/* 120/*
132 * no valid open procedure on this sort of dir 121 * no valid open procedure on this sort of dir
133 */ 122 */
134static int afs_mntpt_open(struct inode *inode, struct file *file) 123static int afs_mntpt_open(struct inode *inode, struct file *file)
135{ 124{
136 kenter("%p,%p{%p{%s},%s}", 125 _enter("%p,%p{%p{%s},%s}",
137 inode, file, 126 inode, file,
138 file->f_path.dentry->d_parent, 127 file->f_path.dentry->d_parent,
139 file->f_path.dentry->d_parent ? 128 file->f_path.dentry->d_parent ?
@@ -142,9 +131,8 @@ static int afs_mntpt_open(struct inode *inode, struct file *file)
142 file->f_path.dentry->d_name.name); 131 file->f_path.dentry->d_name.name);
143 132
144 return -EREMOTE; 133 return -EREMOTE;
145} /* end afs_mntpt_open() */ 134}
146 135
147/*****************************************************************************/
148/* 136/*
149 * create a vfsmount to be automounted 137 * create a vfsmount to be automounted
150 */ 138 */
@@ -157,7 +145,7 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
157 char *buf, *devname = NULL, *options = NULL; 145 char *buf, *devname = NULL, *options = NULL;
158 int ret; 146 int ret;
159 147
160 kenter("{%s}", mntpt->d_name.name); 148 _enter("{%s}", mntpt->d_name.name);
161 149
162 BUG_ON(!mntpt->d_inode); 150 BUG_ON(!mntpt->d_inode);
163 151
@@ -201,79 +189,108 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
201 strcat(options, ",rwpath"); 189 strcat(options, ",rwpath");
202 190
203 /* try and do the mount */ 191 /* try and do the mount */
204 kdebug("--- attempting mount %s -o %s ---", devname, options); 192 _debug("--- attempting mount %s -o %s ---", devname, options);
205 mnt = vfs_kern_mount(&afs_fs_type, 0, devname, options); 193 mnt = vfs_kern_mount(&afs_fs_type, 0, devname, options);
206 kdebug("--- mount result %p ---", mnt); 194 _debug("--- mount result %p ---", mnt);
207 195
208 free_page((unsigned long) devname); 196 free_page((unsigned long) devname);
209 free_page((unsigned long) options); 197 free_page((unsigned long) options);
210 kleave(" = %p", mnt); 198 _leave(" = %p", mnt);
211 return mnt; 199 return mnt;
212 200
213 error: 201error:
214 if (page) 202 if (page)
215 page_cache_release(page); 203 page_cache_release(page);
216 if (devname) 204 if (devname)
217 free_page((unsigned long) devname); 205 free_page((unsigned long) devname);
218 if (options) 206 if (options)
219 free_page((unsigned long) options); 207 free_page((unsigned long) options);
220 kleave(" = %d", ret); 208 _leave(" = %d", ret);
221 return ERR_PTR(ret); 209 return ERR_PTR(ret);
222} /* end afs_mntpt_do_automount() */ 210}
223 211
224/*****************************************************************************/
225/* 212/*
226 * follow a link from a mountpoint directory, thus causing it to be mounted 213 * follow a link from a mountpoint directory, thus causing it to be mounted
227 */ 214 */
228static void *afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd) 215static void *afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd)
229{ 216{
230 struct vfsmount *newmnt; 217 struct vfsmount *newmnt;
231 struct dentry *old_dentry;
232 int err; 218 int err;
233 219
234 kenter("%p{%s},{%s:%p{%s}}", 220 _enter("%p{%s},{%s:%p{%s},}",
235 dentry, 221 dentry,
236 dentry->d_name.name, 222 dentry->d_name.name,
237 nd->mnt->mnt_devname, 223 nd->mnt->mnt_devname,
238 dentry, 224 dentry,
239 nd->dentry->d_name.name); 225 nd->dentry->d_name.name);
240 226
241 newmnt = afs_mntpt_do_automount(dentry); 227 dput(nd->dentry);
228 nd->dentry = dget(dentry);
229
230 newmnt = afs_mntpt_do_automount(nd->dentry);
242 if (IS_ERR(newmnt)) { 231 if (IS_ERR(newmnt)) {
243 path_release(nd); 232 path_release(nd);
244 return (void *)newmnt; 233 return (void *)newmnt;
245 } 234 }
246 235
247 old_dentry = nd->dentry; 236 mntget(newmnt);
248 nd->dentry = dentry; 237 err = do_add_mount(newmnt, nd, MNT_SHRINKABLE, &afs_vfsmounts);
249 err = do_add_mount(newmnt, nd, 0, &afs_vfsmounts); 238 switch (err) {
250 nd->dentry = old_dentry; 239 case 0:
251 240 mntput(nd->mnt);
252 path_release(nd); 241 dput(nd->dentry);
253
254 if (!err) {
255 mntget(newmnt);
256 nd->mnt = newmnt; 242 nd->mnt = newmnt;
257 dget(newmnt->mnt_root); 243 nd->dentry = dget(newmnt->mnt_root);
258 nd->dentry = newmnt->mnt_root; 244 schedule_delayed_work(&afs_mntpt_expiry_timer,
245 afs_mntpt_expiry_timeout * HZ);
246 break;
247 case -EBUSY:
248 /* someone else made a mount here whilst we were busy */
249 while (d_mountpoint(nd->dentry) &&
250 follow_down(&nd->mnt, &nd->dentry))
251 ;
252 err = 0;
253 default:
254 mntput(newmnt);
255 break;
259 } 256 }
260 257
261 kleave(" = %d", err); 258 _leave(" = %d", err);
262 return ERR_PTR(err); 259 return ERR_PTR(err);
263} /* end afs_mntpt_follow_link() */ 260}
264 261
265/*****************************************************************************/
266/* 262/*
267 * handle mountpoint expiry timer going off 263 * handle mountpoint expiry timer going off
268 */ 264 */
269static void afs_mntpt_expiry_timed_out(struct afs_timer *timer) 265static void afs_mntpt_expiry_timed_out(struct work_struct *work)
270{ 266{
271 kenter(""); 267 _enter("");
272 268
273 mark_mounts_for_expiry(&afs_vfsmounts); 269 if (!list_empty(&afs_vfsmounts)) {
270 mark_mounts_for_expiry(&afs_vfsmounts);
271 schedule_delayed_work(&afs_mntpt_expiry_timer,
272 afs_mntpt_expiry_timeout * HZ);
273 }
274
275 _leave("");
276}
274 277
275 afs_kafstimod_add_timer(&afs_mntpt_expiry_timer, 278/*
276 afs_mntpt_expiry_timeout * HZ); 279 * kill the AFS mountpoint timer if it's still running
280 */
281void afs_mntpt_kill_timer(void)
282{
283 _enter("");
277 284
278 kleave(""); 285 ASSERT(list_empty(&afs_vfsmounts));
279} /* end afs_mntpt_expiry_timed_out() */ 286 cancel_delayed_work(&afs_mntpt_expiry_timer);
287 flush_scheduled_work();
288}
289
290/*
291 * begin unmount by attempting to remove all automounted mountpoints we added
292 */
293void afs_umount_begin(struct vfsmount *vfsmnt, int flags)
294{
295 shrink_submounts(vfsmnt, &afs_vfsmounts);
296}
diff --git a/fs/afs/mount.h b/fs/afs/mount.h
deleted file mode 100644
index 9d2f46ec549f..000000000000
--- a/fs/afs/mount.h
+++ /dev/null
@@ -1,23 +0,0 @@
1/* mount.h: mount parameters
2 *
3 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#ifndef _LINUX_AFS_MOUNT_H
13#define _LINUX_AFS_MOUNT_H
14
15struct afs_mountdata {
16 const char *volume; /* name of volume */
17 const char *cell; /* name of cell containing volume */
18 const char *cache; /* name of cache block device */
19 size_t nservers; /* number of server addresses listed */
20 uint32_t servers[10]; /* IP addresses of servers in this cell */
21};
22
23#endif /* _LINUX_AFS_MOUNT_H */
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index ae6b85b1e484..d5601f617cdb 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -1,4 +1,4 @@
1/* proc.c: /proc interface for AFS 1/* /proc interface for AFS
2 * 2 *
3 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved. 3 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com) 4 * Written by David Howells (dhowells@redhat.com)
@@ -13,8 +13,6 @@
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/proc_fs.h> 14#include <linux/proc_fs.h>
15#include <linux/seq_file.h> 15#include <linux/seq_file.h>
16#include "cell.h"
17#include "volume.h"
18#include <asm/uaccess.h> 16#include <asm/uaccess.h>
19#include "internal.h" 17#include "internal.h"
20 18
@@ -130,7 +128,6 @@ static const struct file_operations afs_proc_cell_servers_fops = {
130 .release = afs_proc_cell_servers_release, 128 .release = afs_proc_cell_servers_release,
131}; 129};
132 130
133/*****************************************************************************/
134/* 131/*
135 * initialise the /proc/fs/afs/ directory 132 * initialise the /proc/fs/afs/ directory
136 */ 133 */
@@ -142,47 +139,43 @@ int afs_proc_init(void)
142 139
143 proc_afs = proc_mkdir("fs/afs", NULL); 140 proc_afs = proc_mkdir("fs/afs", NULL);
144 if (!proc_afs) 141 if (!proc_afs)
145 goto error; 142 goto error_dir;
146 proc_afs->owner = THIS_MODULE; 143 proc_afs->owner = THIS_MODULE;
147 144
148 p = create_proc_entry("cells", 0, proc_afs); 145 p = create_proc_entry("cells", 0, proc_afs);
149 if (!p) 146 if (!p)
150 goto error_proc; 147 goto error_cells;
151 p->proc_fops = &afs_proc_cells_fops; 148 p->proc_fops = &afs_proc_cells_fops;
152 p->owner = THIS_MODULE; 149 p->owner = THIS_MODULE;
153 150
154 p = create_proc_entry("rootcell", 0, proc_afs); 151 p = create_proc_entry("rootcell", 0, proc_afs);
155 if (!p) 152 if (!p)
156 goto error_cells; 153 goto error_rootcell;
157 p->proc_fops = &afs_proc_rootcell_fops; 154 p->proc_fops = &afs_proc_rootcell_fops;
158 p->owner = THIS_MODULE; 155 p->owner = THIS_MODULE;
159 156
160 _leave(" = 0"); 157 _leave(" = 0");
161 return 0; 158 return 0;
162 159
163 error_cells: 160error_rootcell:
164 remove_proc_entry("cells", proc_afs); 161 remove_proc_entry("cells", proc_afs);
165 error_proc: 162error_cells:
166 remove_proc_entry("fs/afs", NULL); 163 remove_proc_entry("fs/afs", NULL);
167 error: 164error_dir:
168 _leave(" = -ENOMEM"); 165 _leave(" = -ENOMEM");
169 return -ENOMEM; 166 return -ENOMEM;
167}
170 168
171} /* end afs_proc_init() */
172
173/*****************************************************************************/
174/* 169/*
175 * clean up the /proc/fs/afs/ directory 170 * clean up the /proc/fs/afs/ directory
176 */ 171 */
177void afs_proc_cleanup(void) 172void afs_proc_cleanup(void)
178{ 173{
174 remove_proc_entry("rootcell", proc_afs);
179 remove_proc_entry("cells", proc_afs); 175 remove_proc_entry("cells", proc_afs);
180
181 remove_proc_entry("fs/afs", NULL); 176 remove_proc_entry("fs/afs", NULL);
177}
182 178
183} /* end afs_proc_cleanup() */
184
185/*****************************************************************************/
186/* 179/*
187 * open "/proc/fs/afs/cells" which provides a summary of extant cells 180 * open "/proc/fs/afs/cells" which provides a summary of extant cells
188 */ 181 */
@@ -199,9 +192,8 @@ static int afs_proc_cells_open(struct inode *inode, struct file *file)
199 m->private = PDE(inode)->data; 192 m->private = PDE(inode)->data;
200 193
201 return 0; 194 return 0;
202} /* end afs_proc_cells_open() */ 195}
203 196
204/*****************************************************************************/
205/* 197/*
206 * set up the iterator to start reading from the cells list and return the 198 * set up the iterator to start reading from the cells list and return the
207 * first item 199 * first item
@@ -225,9 +217,8 @@ static void *afs_proc_cells_start(struct seq_file *m, loff_t *_pos)
225 break; 217 break;
226 218
227 return _p != &afs_proc_cells ? _p : NULL; 219 return _p != &afs_proc_cells ? _p : NULL;
228} /* end afs_proc_cells_start() */ 220}
229 221
230/*****************************************************************************/
231/* 222/*
232 * move to next cell in cells list 223 * move to next cell in cells list
233 */ 224 */
@@ -241,19 +232,16 @@ static void *afs_proc_cells_next(struct seq_file *p, void *v, loff_t *pos)
241 _p = v == (void *) 1 ? afs_proc_cells.next : _p->next; 232 _p = v == (void *) 1 ? afs_proc_cells.next : _p->next;
242 233
243 return _p != &afs_proc_cells ? _p : NULL; 234 return _p != &afs_proc_cells ? _p : NULL;
244} /* end afs_proc_cells_next() */ 235}
245 236
246/*****************************************************************************/
247/* 237/*
248 * clean up after reading from the cells list 238 * clean up after reading from the cells list
249 */ 239 */
250static void afs_proc_cells_stop(struct seq_file *p, void *v) 240static void afs_proc_cells_stop(struct seq_file *p, void *v)
251{ 241{
252 up_read(&afs_proc_cells_sem); 242 up_read(&afs_proc_cells_sem);
243}
253 244
254} /* end afs_proc_cells_stop() */
255
256/*****************************************************************************/
257/* 245/*
258 * display a header line followed by a load of cell lines 246 * display a header line followed by a load of cell lines
259 */ 247 */
@@ -261,19 +249,18 @@ static int afs_proc_cells_show(struct seq_file *m, void *v)
261{ 249{
262 struct afs_cell *cell = list_entry(v, struct afs_cell, proc_link); 250 struct afs_cell *cell = list_entry(v, struct afs_cell, proc_link);
263 251
264 /* display header on line 1 */
265 if (v == (void *) 1) { 252 if (v == (void *) 1) {
253 /* display header on line 1 */
266 seq_puts(m, "USE NAME\n"); 254 seq_puts(m, "USE NAME\n");
267 return 0; 255 return 0;
268 } 256 }
269 257
270 /* display one cell per line on subsequent lines */ 258 /* display one cell per line on subsequent lines */
271 seq_printf(m, "%3d %s\n", atomic_read(&cell->usage), cell->name); 259 seq_printf(m, "%3d %s\n",
272 260 atomic_read(&cell->usage), cell->name);
273 return 0; 261 return 0;
274} /* end afs_proc_cells_show() */ 262}
275 263
276/*****************************************************************************/
277/* 264/*
278 * handle writes to /proc/fs/afs/cells 265 * handle writes to /proc/fs/afs/cells
279 * - to add cells: echo "add <cellname> <IP>[:<IP>][:<IP>]" 266 * - to add cells: echo "add <cellname> <IP>[:<IP>][:<IP>]"
@@ -326,30 +313,32 @@ static ssize_t afs_proc_cells_write(struct file *file, const char __user *buf,
326 313
327 if (strcmp(kbuf, "add") == 0) { 314 if (strcmp(kbuf, "add") == 0) {
328 struct afs_cell *cell; 315 struct afs_cell *cell;
329 ret = afs_cell_create(name, args, &cell); 316
330 if (ret < 0) 317 cell = afs_cell_create(name, args);
318 if (IS_ERR(cell)) {
319 ret = PTR_ERR(cell);
331 goto done; 320 goto done;
321 }
332 322
323 afs_put_cell(cell);
333 printk("kAFS: Added new cell '%s'\n", name); 324 printk("kAFS: Added new cell '%s'\n", name);
334 } 325 } else {
335 else {
336 goto inval; 326 goto inval;
337 } 327 }
338 328
339 ret = size; 329 ret = size;
340 330
341 done: 331done:
342 kfree(kbuf); 332 kfree(kbuf);
343 _leave(" = %d", ret); 333 _leave(" = %d", ret);
344 return ret; 334 return ret;
345 335
346 inval: 336inval:
347 ret = -EINVAL; 337 ret = -EINVAL;
348 printk("kAFS: Invalid Command on /proc/fs/afs/cells file\n"); 338 printk("kAFS: Invalid Command on /proc/fs/afs/cells file\n");
349 goto done; 339 goto done;
350} /* end afs_proc_cells_write() */ 340}
351 341
352/*****************************************************************************/
353/* 342/*
354 * Stubs for /proc/fs/afs/rootcell 343 * Stubs for /proc/fs/afs/rootcell
355 */ 344 */
@@ -369,7 +358,6 @@ static ssize_t afs_proc_rootcell_read(struct file *file, char __user *buf,
369 return 0; 358 return 0;
370} 359}
371 360
372/*****************************************************************************/
373/* 361/*
374 * handle writes to /proc/fs/afs/rootcell 362 * handle writes to /proc/fs/afs/rootcell
375 * - to initialize rootcell: echo "cell.name:192.168.231.14" 363 * - to initialize rootcell: echo "cell.name:192.168.231.14"
@@ -407,14 +395,13 @@ static ssize_t afs_proc_rootcell_write(struct file *file,
407 if (ret >= 0) 395 if (ret >= 0)
408 ret = size; /* consume everything, always */ 396 ret = size; /* consume everything, always */
409 397
410 infault: 398infault:
411 kfree(kbuf); 399 kfree(kbuf);
412 nomem: 400nomem:
413 _leave(" = %d", ret); 401 _leave(" = %d", ret);
414 return ret; 402 return ret;
415} /* end afs_proc_rootcell_write() */ 403}
416 404
417/*****************************************************************************/
418/* 405/*
419 * initialise /proc/fs/afs/<cell>/ 406 * initialise /proc/fs/afs/<cell>/
420 */ 407 */
@@ -426,25 +413,25 @@ int afs_proc_cell_setup(struct afs_cell *cell)
426 413
427 cell->proc_dir = proc_mkdir(cell->name, proc_afs); 414 cell->proc_dir = proc_mkdir(cell->name, proc_afs);
428 if (!cell->proc_dir) 415 if (!cell->proc_dir)
429 return -ENOMEM; 416 goto error_dir;
430 417
431 p = create_proc_entry("servers", 0, cell->proc_dir); 418 p = create_proc_entry("servers", 0, cell->proc_dir);
432 if (!p) 419 if (!p)
433 goto error_proc; 420 goto error_servers;
434 p->proc_fops = &afs_proc_cell_servers_fops; 421 p->proc_fops = &afs_proc_cell_servers_fops;
435 p->owner = THIS_MODULE; 422 p->owner = THIS_MODULE;
436 p->data = cell; 423 p->data = cell;
437 424
438 p = create_proc_entry("vlservers", 0, cell->proc_dir); 425 p = create_proc_entry("vlservers", 0, cell->proc_dir);
439 if (!p) 426 if (!p)
440 goto error_servers; 427 goto error_vlservers;
441 p->proc_fops = &afs_proc_cell_vlservers_fops; 428 p->proc_fops = &afs_proc_cell_vlservers_fops;
442 p->owner = THIS_MODULE; 429 p->owner = THIS_MODULE;
443 p->data = cell; 430 p->data = cell;
444 431
445 p = create_proc_entry("volumes", 0, cell->proc_dir); 432 p = create_proc_entry("volumes", 0, cell->proc_dir);
446 if (!p) 433 if (!p)
447 goto error_vlservers; 434 goto error_volumes;
448 p->proc_fops = &afs_proc_cell_volumes_fops; 435 p->proc_fops = &afs_proc_cell_volumes_fops;
449 p->owner = THIS_MODULE; 436 p->owner = THIS_MODULE;
450 p->data = cell; 437 p->data = cell;
@@ -452,17 +439,17 @@ int afs_proc_cell_setup(struct afs_cell *cell)
452 _leave(" = 0"); 439 _leave(" = 0");
453 return 0; 440 return 0;
454 441
455 error_vlservers: 442error_volumes:
456 remove_proc_entry("vlservers", cell->proc_dir); 443 remove_proc_entry("vlservers", cell->proc_dir);
457 error_servers: 444error_vlservers:
458 remove_proc_entry("servers", cell->proc_dir); 445 remove_proc_entry("servers", cell->proc_dir);
459 error_proc: 446error_servers:
460 remove_proc_entry(cell->name, proc_afs); 447 remove_proc_entry(cell->name, proc_afs);
448error_dir:
461 _leave(" = -ENOMEM"); 449 _leave(" = -ENOMEM");
462 return -ENOMEM; 450 return -ENOMEM;
463} /* end afs_proc_cell_setup() */ 451}
464 452
465/*****************************************************************************/
466/* 453/*
467 * remove /proc/fs/afs/<cell>/ 454 * remove /proc/fs/afs/<cell>/
468 */ 455 */
@@ -476,9 +463,8 @@ void afs_proc_cell_remove(struct afs_cell *cell)
476 remove_proc_entry(cell->name, proc_afs); 463 remove_proc_entry(cell->name, proc_afs);
477 464
478 _leave(""); 465 _leave("");
479} /* end afs_proc_cell_remove() */ 466}
480 467
481/*****************************************************************************/
482/* 468/*
483 * open "/proc/fs/afs/<cell>/volumes" which provides a summary of extant cells 469 * open "/proc/fs/afs/<cell>/volumes" which provides a summary of extant cells
484 */ 470 */
@@ -488,7 +474,7 @@ static int afs_proc_cell_volumes_open(struct inode *inode, struct file *file)
488 struct seq_file *m; 474 struct seq_file *m;
489 int ret; 475 int ret;
490 476
491 cell = afs_get_cell_maybe((struct afs_cell **) &PDE(inode)->data); 477 cell = PDE(inode)->data;
492 if (!cell) 478 if (!cell)
493 return -ENOENT; 479 return -ENOENT;
494 480
@@ -500,25 +486,16 @@ static int afs_proc_cell_volumes_open(struct inode *inode, struct file *file)
500 m->private = cell; 486 m->private = cell;
501 487
502 return 0; 488 return 0;
503} /* end afs_proc_cell_volumes_open() */ 489}
504 490
505/*****************************************************************************/
506/* 491/*
507 * close the file and release the ref to the cell 492 * close the file and release the ref to the cell
508 */ 493 */
509static int afs_proc_cell_volumes_release(struct inode *inode, struct file *file) 494static int afs_proc_cell_volumes_release(struct inode *inode, struct file *file)
510{ 495{
511 struct afs_cell *cell = PDE(inode)->data; 496 return seq_release(inode, file);
512 int ret; 497}
513
514 ret = seq_release(inode,file);
515
516 afs_put_cell(cell);
517
518 return ret;
519} /* end afs_proc_cell_volumes_release() */
520 498
521/*****************************************************************************/
522/* 499/*
523 * set up the iterator to start reading from the cells list and return the 500 * set up the iterator to start reading from the cells list and return the
524 * first item 501 * first item
@@ -545,9 +522,8 @@ static void *afs_proc_cell_volumes_start(struct seq_file *m, loff_t *_pos)
545 break; 522 break;
546 523
547 return _p != &cell->vl_list ? _p : NULL; 524 return _p != &cell->vl_list ? _p : NULL;
548} /* end afs_proc_cell_volumes_start() */ 525}
549 526
550/*****************************************************************************/
551/* 527/*
552 * move to next cell in cells list 528 * move to next cell in cells list
553 */ 529 */
@@ -562,12 +538,11 @@ static void *afs_proc_cell_volumes_next(struct seq_file *p, void *v,
562 (*_pos)++; 538 (*_pos)++;
563 539
564 _p = v; 540 _p = v;
565 _p = v == (void *) 1 ? cell->vl_list.next : _p->next; 541 _p = (v == (void *) 1) ? cell->vl_list.next : _p->next;
566 542
567 return _p != &cell->vl_list ? _p : NULL; 543 return (_p != &cell->vl_list) ? _p : NULL;
568} /* end afs_proc_cell_volumes_next() */ 544}
569 545
570/*****************************************************************************/
571/* 546/*
572 * clean up after reading from the cells list 547 * clean up after reading from the cells list
573 */ 548 */
@@ -576,10 +551,18 @@ static void afs_proc_cell_volumes_stop(struct seq_file *p, void *v)
576 struct afs_cell *cell = p->private; 551 struct afs_cell *cell = p->private;
577 552
578 up_read(&cell->vl_sem); 553 up_read(&cell->vl_sem);
554}
579 555
580} /* end afs_proc_cell_volumes_stop() */ 556const char afs_vlocation_states[][4] = {
557 [AFS_VL_NEW] = "New",
558 [AFS_VL_CREATING] = "Crt",
559 [AFS_VL_VALID] = "Val",
560 [AFS_VL_NO_VOLUME] = "NoV",
561 [AFS_VL_UPDATING] = "Upd",
562 [AFS_VL_VOLUME_DELETED] = "Del",
563 [AFS_VL_UNCERTAIN] = "Unc",
564};
581 565
582/*****************************************************************************/
583/* 566/*
584 * display a header line followed by a load of volume lines 567 * display a header line followed by a load of volume lines
585 */ 568 */
@@ -590,23 +573,22 @@ static int afs_proc_cell_volumes_show(struct seq_file *m, void *v)
590 573
591 /* display header on line 1 */ 574 /* display header on line 1 */
592 if (v == (void *) 1) { 575 if (v == (void *) 1) {
593 seq_puts(m, "USE VLID[0] VLID[1] VLID[2] NAME\n"); 576 seq_puts(m, "USE STT VLID[0] VLID[1] VLID[2] NAME\n");
594 return 0; 577 return 0;
595 } 578 }
596 579
597 /* display one cell per line on subsequent lines */ 580 /* display one cell per line on subsequent lines */
598 seq_printf(m, "%3d %08x %08x %08x %s\n", 581 seq_printf(m, "%3d %s %08x %08x %08x %s\n",
599 atomic_read(&vlocation->usage), 582 atomic_read(&vlocation->usage),
583 afs_vlocation_states[vlocation->state],
600 vlocation->vldb.vid[0], 584 vlocation->vldb.vid[0],
601 vlocation->vldb.vid[1], 585 vlocation->vldb.vid[1],
602 vlocation->vldb.vid[2], 586 vlocation->vldb.vid[2],
603 vlocation->vldb.name 587 vlocation->vldb.name);
604 );
605 588
606 return 0; 589 return 0;
607} /* end afs_proc_cell_volumes_show() */ 590}
608 591
609/*****************************************************************************/
610/* 592/*
611 * open "/proc/fs/afs/<cell>/vlservers" which provides a list of volume 593 * open "/proc/fs/afs/<cell>/vlservers" which provides a list of volume
612 * location server 594 * location server
@@ -617,11 +599,11 @@ static int afs_proc_cell_vlservers_open(struct inode *inode, struct file *file)
617 struct seq_file *m; 599 struct seq_file *m;
618 int ret; 600 int ret;
619 601
620 cell = afs_get_cell_maybe((struct afs_cell**)&PDE(inode)->data); 602 cell = PDE(inode)->data;
621 if (!cell) 603 if (!cell)
622 return -ENOENT; 604 return -ENOENT;
623 605
624 ret = seq_open(file,&afs_proc_cell_vlservers_ops); 606 ret = seq_open(file, &afs_proc_cell_vlservers_ops);
625 if (ret<0) 607 if (ret<0)
626 return ret; 608 return ret;
627 609
@@ -629,26 +611,17 @@ static int afs_proc_cell_vlservers_open(struct inode *inode, struct file *file)
629 m->private = cell; 611 m->private = cell;
630 612
631 return 0; 613 return 0;
632} /* end afs_proc_cell_vlservers_open() */ 614}
633 615
634/*****************************************************************************/
635/* 616/*
636 * close the file and release the ref to the cell 617 * close the file and release the ref to the cell
637 */ 618 */
638static int afs_proc_cell_vlservers_release(struct inode *inode, 619static int afs_proc_cell_vlservers_release(struct inode *inode,
639 struct file *file) 620 struct file *file)
640{ 621{
641 struct afs_cell *cell = PDE(inode)->data; 622 return seq_release(inode, file);
642 int ret; 623}
643
644 ret = seq_release(inode,file);
645
646 afs_put_cell(cell);
647
648 return ret;
649} /* end afs_proc_cell_vlservers_release() */
650 624
651/*****************************************************************************/
652/* 625/*
653 * set up the iterator to start reading from the cells list and return the 626 * set up the iterator to start reading from the cells list and return the
654 * first item 627 * first item
@@ -672,9 +645,8 @@ static void *afs_proc_cell_vlservers_start(struct seq_file *m, loff_t *_pos)
672 return NULL; 645 return NULL;
673 646
674 return &cell->vl_addrs[pos]; 647 return &cell->vl_addrs[pos];
675} /* end afs_proc_cell_vlservers_start() */ 648}
676 649
677/*****************************************************************************/
678/* 650/*
679 * move to next cell in cells list 651 * move to next cell in cells list
680 */ 652 */
@@ -692,9 +664,8 @@ static void *afs_proc_cell_vlservers_next(struct seq_file *p, void *v,
692 return NULL; 664 return NULL;
693 665
694 return &cell->vl_addrs[pos]; 666 return &cell->vl_addrs[pos];
695} /* end afs_proc_cell_vlservers_next() */ 667}
696 668
697/*****************************************************************************/
698/* 669/*
699 * clean up after reading from the cells list 670 * clean up after reading from the cells list
700 */ 671 */
@@ -703,10 +674,8 @@ static void afs_proc_cell_vlservers_stop(struct seq_file *p, void *v)
703 struct afs_cell *cell = p->private; 674 struct afs_cell *cell = p->private;
704 675
705 up_read(&cell->vl_sem); 676 up_read(&cell->vl_sem);
677}
706 678
707} /* end afs_proc_cell_vlservers_stop() */
708
709/*****************************************************************************/
710/* 679/*
711 * display a header line followed by a load of volume lines 680 * display a header line followed by a load of volume lines
712 */ 681 */
@@ -722,11 +691,9 @@ static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v)
722 691
723 /* display one cell per line on subsequent lines */ 692 /* display one cell per line on subsequent lines */
724 seq_printf(m, "%u.%u.%u.%u\n", NIPQUAD(addr->s_addr)); 693 seq_printf(m, "%u.%u.%u.%u\n", NIPQUAD(addr->s_addr));
725
726 return 0; 694 return 0;
727} /* end afs_proc_cell_vlservers_show() */ 695}
728 696
729/*****************************************************************************/
730/* 697/*
731 * open "/proc/fs/afs/<cell>/servers" which provides a summary of active 698 * open "/proc/fs/afs/<cell>/servers" which provides a summary of active
732 * servers 699 * servers
@@ -737,7 +704,7 @@ static int afs_proc_cell_servers_open(struct inode *inode, struct file *file)
737 struct seq_file *m; 704 struct seq_file *m;
738 int ret; 705 int ret;
739 706
740 cell = afs_get_cell_maybe((struct afs_cell **) &PDE(inode)->data); 707 cell = PDE(inode)->data;
741 if (!cell) 708 if (!cell)
742 return -ENOENT; 709 return -ENOENT;
743 710
@@ -747,34 +714,24 @@ static int afs_proc_cell_servers_open(struct inode *inode, struct file *file)
747 714
748 m = file->private_data; 715 m = file->private_data;
749 m->private = cell; 716 m->private = cell;
750
751 return 0; 717 return 0;
752} /* end afs_proc_cell_servers_open() */ 718}
753 719
754/*****************************************************************************/
755/* 720/*
756 * close the file and release the ref to the cell 721 * close the file and release the ref to the cell
757 */ 722 */
758static int afs_proc_cell_servers_release(struct inode *inode, 723static int afs_proc_cell_servers_release(struct inode *inode,
759 struct file *file) 724 struct file *file)
760{ 725{
761 struct afs_cell *cell = PDE(inode)->data; 726 return seq_release(inode, file);
762 int ret; 727}
763
764 ret = seq_release(inode, file);
765
766 afs_put_cell(cell);
767
768 return ret;
769} /* end afs_proc_cell_servers_release() */
770 728
771/*****************************************************************************/
772/* 729/*
773 * set up the iterator to start reading from the cells list and return the 730 * set up the iterator to start reading from the cells list and return the
774 * first item 731 * first item
775 */ 732 */
776static void *afs_proc_cell_servers_start(struct seq_file *m, loff_t *_pos) 733static void *afs_proc_cell_servers_start(struct seq_file *m, loff_t *_pos)
777 __acquires(m->private->sv_lock) 734 __acquires(m->private->servers_lock)
778{ 735{
779 struct list_head *_p; 736 struct list_head *_p;
780 struct afs_cell *cell = m->private; 737 struct afs_cell *cell = m->private;
@@ -783,7 +740,7 @@ static void *afs_proc_cell_servers_start(struct seq_file *m, loff_t *_pos)
783 _enter("cell=%p pos=%Ld", cell, *_pos); 740 _enter("cell=%p pos=%Ld", cell, *_pos);
784 741
785 /* lock the list against modification */ 742 /* lock the list against modification */
786 read_lock(&cell->sv_lock); 743 read_lock(&cell->servers_lock);
787 744
788 /* allow for the header line */ 745 /* allow for the header line */
789 if (!pos) 746 if (!pos)
@@ -791,14 +748,13 @@ static void *afs_proc_cell_servers_start(struct seq_file *m, loff_t *_pos)
791 pos--; 748 pos--;
792 749
793 /* find the n'th element in the list */ 750 /* find the n'th element in the list */
794 list_for_each(_p, &cell->sv_list) 751 list_for_each(_p, &cell->servers)
795 if (!pos--) 752 if (!pos--)
796 break; 753 break;
797 754
798 return _p != &cell->sv_list ? _p : NULL; 755 return _p != &cell->servers ? _p : NULL;
799} /* end afs_proc_cell_servers_start() */ 756}
800 757
801/*****************************************************************************/
802/* 758/*
803 * move to next cell in cells list 759 * move to next cell in cells list
804 */ 760 */
@@ -813,25 +769,22 @@ static void *afs_proc_cell_servers_next(struct seq_file *p, void *v,
813 (*_pos)++; 769 (*_pos)++;
814 770
815 _p = v; 771 _p = v;
816 _p = v == (void *) 1 ? cell->sv_list.next : _p->next; 772 _p = v == (void *) 1 ? cell->servers.next : _p->next;
817 773
818 return _p != &cell->sv_list ? _p : NULL; 774 return _p != &cell->servers ? _p : NULL;
819} /* end afs_proc_cell_servers_next() */ 775}
820 776
821/*****************************************************************************/
822/* 777/*
823 * clean up after reading from the cells list 778 * clean up after reading from the cells list
824 */ 779 */
825static void afs_proc_cell_servers_stop(struct seq_file *p, void *v) 780static void afs_proc_cell_servers_stop(struct seq_file *p, void *v)
826 __releases(p->private->sv_lock) 781 __releases(p->private->servers_lock)
827{ 782{
828 struct afs_cell *cell = p->private; 783 struct afs_cell *cell = p->private;
829 784
830 read_unlock(&cell->sv_lock); 785 read_unlock(&cell->servers_lock);
831 786}
832} /* end afs_proc_cell_servers_stop() */
833 787
834/*****************************************************************************/
835/* 788/*
836 * display a header line followed by a load of volume lines 789 * display a header line followed by a load of volume lines
837 */ 790 */
@@ -849,10 +802,7 @@ static int afs_proc_cell_servers_show(struct seq_file *m, void *v)
849 /* display one cell per line on subsequent lines */ 802 /* display one cell per line on subsequent lines */
850 sprintf(ipaddr, "%u.%u.%u.%u", NIPQUAD(server->addr)); 803 sprintf(ipaddr, "%u.%u.%u.%u", NIPQUAD(server->addr));
851 seq_printf(m, "%3d %-15.15s %5d\n", 804 seq_printf(m, "%3d %-15.15s %5d\n",
852 atomic_read(&server->usage), 805 atomic_read(&server->usage), ipaddr, server->fs_state);
853 ipaddr,
854 server->fs_state
855 );
856 806
857 return 0; 807 return 0;
858} /* end afs_proc_cell_servers_show() */ 808}
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
new file mode 100644
index 000000000000..e7b047328a39
--- /dev/null
+++ b/fs/afs/rxrpc.c
@@ -0,0 +1,782 @@
1/* Maintain an RxRPC server socket to do AFS communications through
2 *
3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <net/sock.h>
13#include <net/af_rxrpc.h>
14#include <rxrpc/packet.h>
15#include "internal.h"
16#include "afs_cm.h"
17
18static struct socket *afs_socket; /* my RxRPC socket */
19static struct workqueue_struct *afs_async_calls;
20static atomic_t afs_outstanding_calls;
21static atomic_t afs_outstanding_skbs;
22
23static void afs_wake_up_call_waiter(struct afs_call *);
24static int afs_wait_for_call_to_complete(struct afs_call *);
25static void afs_wake_up_async_call(struct afs_call *);
26static int afs_dont_wait_for_call_to_complete(struct afs_call *);
27static void afs_process_async_call(struct work_struct *);
28static void afs_rx_interceptor(struct sock *, unsigned long, struct sk_buff *);
29static int afs_deliver_cm_op_id(struct afs_call *, struct sk_buff *, bool);
30
31/* synchronous call management */
32const struct afs_wait_mode afs_sync_call = {
33 .rx_wakeup = afs_wake_up_call_waiter,
34 .wait = afs_wait_for_call_to_complete,
35};
36
37/* asynchronous call management */
38const struct afs_wait_mode afs_async_call = {
39 .rx_wakeup = afs_wake_up_async_call,
40 .wait = afs_dont_wait_for_call_to_complete,
41};
42
43/* asynchronous incoming call management */
44static const struct afs_wait_mode afs_async_incoming_call = {
45 .rx_wakeup = afs_wake_up_async_call,
46};
47
48/* asynchronous incoming call initial processing */
49static const struct afs_call_type afs_RXCMxxxx = {
50 .name = "CB.xxxx",
51 .deliver = afs_deliver_cm_op_id,
52 .abort_to_error = afs_abort_to_error,
53};
54
55static void afs_collect_incoming_call(struct work_struct *);
56
57static struct sk_buff_head afs_incoming_calls;
58static DECLARE_WORK(afs_collect_incoming_call_work, afs_collect_incoming_call);
59
60/*
61 * open an RxRPC socket and bind it to be a server for callback notifications
62 * - the socket is left in blocking mode and non-blocking ops use MSG_DONTWAIT
63 */
64int afs_open_socket(void)
65{
66 struct sockaddr_rxrpc srx;
67 struct socket *socket;
68 int ret;
69
70 _enter("");
71
72 skb_queue_head_init(&afs_incoming_calls);
73
74 afs_async_calls = create_singlethread_workqueue("kafsd");
75 if (!afs_async_calls) {
76 _leave(" = -ENOMEM [wq]");
77 return -ENOMEM;
78 }
79
80 ret = sock_create_kern(AF_RXRPC, SOCK_DGRAM, PF_INET, &socket);
81 if (ret < 0) {
82 destroy_workqueue(afs_async_calls);
83 _leave(" = %d [socket]", ret);
84 return ret;
85 }
86
87 socket->sk->sk_allocation = GFP_NOFS;
88
89 /* bind the callback manager's address to make this a server socket */
90 srx.srx_family = AF_RXRPC;
91 srx.srx_service = CM_SERVICE;
92 srx.transport_type = SOCK_DGRAM;
93 srx.transport_len = sizeof(srx.transport.sin);
94 srx.transport.sin.sin_family = AF_INET;
95 srx.transport.sin.sin_port = htons(AFS_CM_PORT);
96 memset(&srx.transport.sin.sin_addr, 0,
97 sizeof(srx.transport.sin.sin_addr));
98
99 ret = kernel_bind(socket, (struct sockaddr *) &srx, sizeof(srx));
100 if (ret < 0) {
101 sock_release(socket);
102 _leave(" = %d [bind]", ret);
103 return ret;
104 }
105
106 rxrpc_kernel_intercept_rx_messages(socket, afs_rx_interceptor);
107
108 afs_socket = socket;
109 _leave(" = 0");
110 return 0;
111}
112
113/*
114 * close the RxRPC socket AFS was using
115 */
116void afs_close_socket(void)
117{
118 _enter("");
119
120 sock_release(afs_socket);
121
122 _debug("dework");
123 destroy_workqueue(afs_async_calls);
124
125 ASSERTCMP(atomic_read(&afs_outstanding_skbs), ==, 0);
126 ASSERTCMP(atomic_read(&afs_outstanding_calls), ==, 0);
127 _leave("");
128}
129
130/*
131 * note that the data in a socket buffer is now delivered and that the buffer
132 * should be freed
133 */
134static void afs_data_delivered(struct sk_buff *skb)
135{
136 if (!skb) {
137 _debug("DLVR NULL [%d]", atomic_read(&afs_outstanding_skbs));
138 dump_stack();
139 } else {
140 _debug("DLVR %p{%u} [%d]",
141 skb, skb->mark, atomic_read(&afs_outstanding_skbs));
142 if (atomic_dec_return(&afs_outstanding_skbs) == -1)
143 BUG();
144 rxrpc_kernel_data_delivered(skb);
145 }
146}
147
148/*
149 * free a socket buffer
150 */
151static void afs_free_skb(struct sk_buff *skb)
152{
153 if (!skb) {
154 _debug("FREE NULL [%d]", atomic_read(&afs_outstanding_skbs));
155 dump_stack();
156 } else {
157 _debug("FREE %p{%u} [%d]",
158 skb, skb->mark, atomic_read(&afs_outstanding_skbs));
159 if (atomic_dec_return(&afs_outstanding_skbs) == -1)
160 BUG();
161 rxrpc_kernel_free_skb(skb);
162 }
163}
164
165/*
166 * free a call
167 */
168static void afs_free_call(struct afs_call *call)
169{
170 _debug("DONE %p{%s} [%d]",
171 call, call->type->name, atomic_read(&afs_outstanding_calls));
172 if (atomic_dec_return(&afs_outstanding_calls) == -1)
173 BUG();
174
175 ASSERTCMP(call->rxcall, ==, NULL);
176 ASSERT(!work_pending(&call->async_work));
177 ASSERT(skb_queue_empty(&call->rx_queue));
178 ASSERT(call->type->name != NULL);
179
180 kfree(call->request);
181 kfree(call);
182}
183
184/*
185 * allocate a call with flat request and reply buffers
186 */
187struct afs_call *afs_alloc_flat_call(const struct afs_call_type *type,
188 size_t request_size, size_t reply_size)
189{
190 struct afs_call *call;
191
192 call = kzalloc(sizeof(*call), GFP_NOFS);
193 if (!call)
194 goto nomem_call;
195
196 _debug("CALL %p{%s} [%d]",
197 call, type->name, atomic_read(&afs_outstanding_calls));
198 atomic_inc(&afs_outstanding_calls);
199
200 call->type = type;
201 call->request_size = request_size;
202 call->reply_max = reply_size;
203
204 if (request_size) {
205 call->request = kmalloc(request_size, GFP_NOFS);
206 if (!call->request)
207 goto nomem_free;
208 }
209
210 if (reply_size) {
211 call->buffer = kmalloc(reply_size, GFP_NOFS);
212 if (!call->buffer)
213 goto nomem_free;
214 }
215
216 init_waitqueue_head(&call->waitq);
217 skb_queue_head_init(&call->rx_queue);
218 return call;
219
220nomem_free:
221 afs_free_call(call);
222nomem_call:
223 return NULL;
224}
225
226/*
227 * clean up a call with flat buffer
228 */
229void afs_flat_call_destructor(struct afs_call *call)
230{
231 _enter("");
232
233 kfree(call->request);
234 call->request = NULL;
235 kfree(call->buffer);
236 call->buffer = NULL;
237}
238
239/*
240 * initiate a call
241 */
242int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
243 const struct afs_wait_mode *wait_mode)
244{
245 struct sockaddr_rxrpc srx;
246 struct rxrpc_call *rxcall;
247 struct msghdr msg;
248 struct kvec iov[1];
249 int ret;
250
251 _enter("%x,{%d},", addr->s_addr, ntohs(call->port));
252
253 ASSERT(call->type != NULL);
254 ASSERT(call->type->name != NULL);
255
256 _debug("MAKE %p{%s} [%d]",
257 call, call->type->name, atomic_read(&afs_outstanding_calls));
258
259 call->wait_mode = wait_mode;
260 INIT_WORK(&call->async_work, afs_process_async_call);
261
262 memset(&srx, 0, sizeof(srx));
263 srx.srx_family = AF_RXRPC;
264 srx.srx_service = call->service_id;
265 srx.transport_type = SOCK_DGRAM;
266 srx.transport_len = sizeof(srx.transport.sin);
267 srx.transport.sin.sin_family = AF_INET;
268 srx.transport.sin.sin_port = call->port;
269 memcpy(&srx.transport.sin.sin_addr, addr, 4);
270
271 /* create a call */
272 rxcall = rxrpc_kernel_begin_call(afs_socket, &srx, call->key,
273 (unsigned long) call, gfp);
274 call->key = NULL;
275 if (IS_ERR(rxcall)) {
276 ret = PTR_ERR(rxcall);
277 goto error_kill_call;
278 }
279
280 call->rxcall = rxcall;
281
282 /* send the request */
283 iov[0].iov_base = call->request;
284 iov[0].iov_len = call->request_size;
285
286 msg.msg_name = NULL;
287 msg.msg_namelen = 0;
288 msg.msg_iov = (struct iovec *) iov;
289 msg.msg_iovlen = 1;
290 msg.msg_control = NULL;
291 msg.msg_controllen = 0;
292 msg.msg_flags = 0;
293
294 /* have to change the state *before* sending the last packet as RxRPC
295 * might give us the reply before it returns from sending the
296 * request */
297 call->state = AFS_CALL_AWAIT_REPLY;
298 ret = rxrpc_kernel_send_data(rxcall, &msg, call->request_size);
299 if (ret < 0)
300 goto error_do_abort;
301
302 /* at this point, an async call may no longer exist as it may have
303 * already completed */
304 return wait_mode->wait(call);
305
306error_do_abort:
307 rxrpc_kernel_abort_call(rxcall, RX_USER_ABORT);
308 rxrpc_kernel_end_call(rxcall);
309 call->rxcall = NULL;
310error_kill_call:
311 call->type->destructor(call);
312 afs_free_call(call);
313 _leave(" = %d", ret);
314 return ret;
315}
316
317/*
318 * handles intercepted messages that were arriving in the socket's Rx queue
319 * - called with the socket receive queue lock held to ensure message ordering
320 * - called with softirqs disabled
321 */
322static void afs_rx_interceptor(struct sock *sk, unsigned long user_call_ID,
323 struct sk_buff *skb)
324{
325 struct afs_call *call = (struct afs_call *) user_call_ID;
326
327 _enter("%p,,%u", call, skb->mark);
328
329 _debug("ICPT %p{%u} [%d]",
330 skb, skb->mark, atomic_read(&afs_outstanding_skbs));
331
332 ASSERTCMP(sk, ==, afs_socket->sk);
333 atomic_inc(&afs_outstanding_skbs);
334
335 if (!call) {
336 /* its an incoming call for our callback service */
337 skb_queue_tail(&afs_incoming_calls, skb);
338 schedule_work(&afs_collect_incoming_call_work);
339 } else {
340 /* route the messages directly to the appropriate call */
341 skb_queue_tail(&call->rx_queue, skb);
342 call->wait_mode->rx_wakeup(call);
343 }
344
345 _leave("");
346}
347
348/*
349 * deliver messages to a call
350 */
351static void afs_deliver_to_call(struct afs_call *call)
352{
353 struct sk_buff *skb;
354 bool last;
355 u32 abort_code;
356 int ret;
357
358 _enter("");
359
360 while ((call->state == AFS_CALL_AWAIT_REPLY ||
361 call->state == AFS_CALL_AWAIT_OP_ID ||
362 call->state == AFS_CALL_AWAIT_REQUEST ||
363 call->state == AFS_CALL_AWAIT_ACK) &&
364 (skb = skb_dequeue(&call->rx_queue))) {
365 switch (skb->mark) {
366 case RXRPC_SKB_MARK_DATA:
367 _debug("Rcv DATA");
368 last = rxrpc_kernel_is_data_last(skb);
369 ret = call->type->deliver(call, skb, last);
370 switch (ret) {
371 case 0:
372 if (last &&
373 call->state == AFS_CALL_AWAIT_REPLY)
374 call->state = AFS_CALL_COMPLETE;
375 break;
376 case -ENOTCONN:
377 abort_code = RX_CALL_DEAD;
378 goto do_abort;
379 case -ENOTSUPP:
380 abort_code = RX_INVALID_OPERATION;
381 goto do_abort;
382 default:
383 abort_code = RXGEN_CC_UNMARSHAL;
384 if (call->state != AFS_CALL_AWAIT_REPLY)
385 abort_code = RXGEN_SS_UNMARSHAL;
386 do_abort:
387 rxrpc_kernel_abort_call(call->rxcall,
388 abort_code);
389 call->error = ret;
390 call->state = AFS_CALL_ERROR;
391 break;
392 }
393 afs_data_delivered(skb);
394 skb = NULL;
395 continue;
396 case RXRPC_SKB_MARK_FINAL_ACK:
397 _debug("Rcv ACK");
398 call->state = AFS_CALL_COMPLETE;
399 break;
400 case RXRPC_SKB_MARK_BUSY:
401 _debug("Rcv BUSY");
402 call->error = -EBUSY;
403 call->state = AFS_CALL_BUSY;
404 break;
405 case RXRPC_SKB_MARK_REMOTE_ABORT:
406 abort_code = rxrpc_kernel_get_abort_code(skb);
407 call->error = call->type->abort_to_error(abort_code);
408 call->state = AFS_CALL_ABORTED;
409 _debug("Rcv ABORT %u -> %d", abort_code, call->error);
410 break;
411 case RXRPC_SKB_MARK_NET_ERROR:
412 call->error = -rxrpc_kernel_get_error_number(skb);
413 call->state = AFS_CALL_ERROR;
414 _debug("Rcv NET ERROR %d", call->error);
415 break;
416 case RXRPC_SKB_MARK_LOCAL_ERROR:
417 call->error = -rxrpc_kernel_get_error_number(skb);
418 call->state = AFS_CALL_ERROR;
419 _debug("Rcv LOCAL ERROR %d", call->error);
420 break;
421 default:
422 BUG();
423 break;
424 }
425
426 afs_free_skb(skb);
427 }
428
429 /* make sure the queue is empty if the call is done with (we might have
430 * aborted the call early because of an unmarshalling error) */
431 if (call->state >= AFS_CALL_COMPLETE) {
432 while ((skb = skb_dequeue(&call->rx_queue)))
433 afs_free_skb(skb);
434 if (call->incoming) {
435 rxrpc_kernel_end_call(call->rxcall);
436 call->rxcall = NULL;
437 call->type->destructor(call);
438 afs_free_call(call);
439 }
440 }
441
442 _leave("");
443}
444
445/*
446 * wait synchronously for a call to complete
447 */
448static int afs_wait_for_call_to_complete(struct afs_call *call)
449{
450 struct sk_buff *skb;
451 int ret;
452
453 DECLARE_WAITQUEUE(myself, current);
454
455 _enter("");
456
457 add_wait_queue(&call->waitq, &myself);
458 for (;;) {
459 set_current_state(TASK_INTERRUPTIBLE);
460
461 /* deliver any messages that are in the queue */
462 if (!skb_queue_empty(&call->rx_queue)) {
463 __set_current_state(TASK_RUNNING);
464 afs_deliver_to_call(call);
465 continue;
466 }
467
468 ret = call->error;
469 if (call->state >= AFS_CALL_COMPLETE)
470 break;
471 ret = -EINTR;
472 if (signal_pending(current))
473 break;
474 schedule();
475 }
476
477 remove_wait_queue(&call->waitq, &myself);
478 __set_current_state(TASK_RUNNING);
479
480 /* kill the call */
481 if (call->state < AFS_CALL_COMPLETE) {
482 _debug("call incomplete");
483 rxrpc_kernel_abort_call(call->rxcall, RX_CALL_DEAD);
484 while ((skb = skb_dequeue(&call->rx_queue)))
485 afs_free_skb(skb);
486 }
487
488 _debug("call complete");
489 rxrpc_kernel_end_call(call->rxcall);
490 call->rxcall = NULL;
491 call->type->destructor(call);
492 afs_free_call(call);
493 _leave(" = %d", ret);
494 return ret;
495}
496
497/*
498 * wake up a waiting call
499 */
500static void afs_wake_up_call_waiter(struct afs_call *call)
501{
502 wake_up(&call->waitq);
503}
504
505/*
506 * wake up an asynchronous call
507 */
508static void afs_wake_up_async_call(struct afs_call *call)
509{
510 _enter("");
511 queue_work(afs_async_calls, &call->async_work);
512}
513
514/*
515 * put a call into asynchronous mode
516 * - mustn't touch the call descriptor as the call my have completed by the
517 * time we get here
518 */
519static int afs_dont_wait_for_call_to_complete(struct afs_call *call)
520{
521 _enter("");
522 return -EINPROGRESS;
523}
524
525/*
526 * delete an asynchronous call
527 */
528static void afs_delete_async_call(struct work_struct *work)
529{
530 struct afs_call *call =
531 container_of(work, struct afs_call, async_work);
532
533 _enter("");
534
535 afs_free_call(call);
536
537 _leave("");
538}
539
540/*
541 * perform processing on an asynchronous call
542 * - on a multiple-thread workqueue this work item may try to run on several
543 * CPUs at the same time
544 */
545static void afs_process_async_call(struct work_struct *work)
546{
547 struct afs_call *call =
548 container_of(work, struct afs_call, async_work);
549
550 _enter("");
551
552 if (!skb_queue_empty(&call->rx_queue))
553 afs_deliver_to_call(call);
554
555 if (call->state >= AFS_CALL_COMPLETE && call->wait_mode) {
556 if (call->wait_mode->async_complete)
557 call->wait_mode->async_complete(call->reply,
558 call->error);
559 call->reply = NULL;
560
561 /* kill the call */
562 rxrpc_kernel_end_call(call->rxcall);
563 call->rxcall = NULL;
564 if (call->type->destructor)
565 call->type->destructor(call);
566
567 /* we can't just delete the call because the work item may be
568 * queued */
569 PREPARE_WORK(&call->async_work, afs_delete_async_call);
570 queue_work(afs_async_calls, &call->async_work);
571 }
572
573 _leave("");
574}
575
576/*
577 * empty a socket buffer into a flat reply buffer
578 */
579void afs_transfer_reply(struct afs_call *call, struct sk_buff *skb)
580{
581 size_t len = skb->len;
582
583 if (skb_copy_bits(skb, 0, call->buffer + call->reply_size, len) < 0)
584 BUG();
585 call->reply_size += len;
586}
587
588/*
589 * accept the backlog of incoming calls
590 */
591static void afs_collect_incoming_call(struct work_struct *work)
592{
593 struct rxrpc_call *rxcall;
594 struct afs_call *call = NULL;
595 struct sk_buff *skb;
596
597 while ((skb = skb_dequeue(&afs_incoming_calls))) {
598 _debug("new call");
599
600 /* don't need the notification */
601 afs_free_skb(skb);
602
603 if (!call) {
604 call = kzalloc(sizeof(struct afs_call), GFP_KERNEL);
605 if (!call) {
606 rxrpc_kernel_reject_call(afs_socket);
607 return;
608 }
609
610 INIT_WORK(&call->async_work, afs_process_async_call);
611 call->wait_mode = &afs_async_incoming_call;
612 call->type = &afs_RXCMxxxx;
613 init_waitqueue_head(&call->waitq);
614 skb_queue_head_init(&call->rx_queue);
615 call->state = AFS_CALL_AWAIT_OP_ID;
616
617 _debug("CALL %p{%s} [%d]",
618 call, call->type->name,
619 atomic_read(&afs_outstanding_calls));
620 atomic_inc(&afs_outstanding_calls);
621 }
622
623 rxcall = rxrpc_kernel_accept_call(afs_socket,
624 (unsigned long) call);
625 if (!IS_ERR(rxcall)) {
626 call->rxcall = rxcall;
627 call = NULL;
628 }
629 }
630
631 if (call)
632 afs_free_call(call);
633}
634
635/*
636 * grab the operation ID from an incoming cache manager call
637 */
638static int afs_deliver_cm_op_id(struct afs_call *call, struct sk_buff *skb,
639 bool last)
640{
641 size_t len = skb->len;
642 void *oibuf = (void *) &call->operation_ID;
643
644 _enter("{%u},{%zu},%d", call->offset, len, last);
645
646 ASSERTCMP(call->offset, <, 4);
647
648 /* the operation ID forms the first four bytes of the request data */
649 len = min_t(size_t, len, 4 - call->offset);
650 if (skb_copy_bits(skb, 0, oibuf + call->offset, len) < 0)
651 BUG();
652 if (!pskb_pull(skb, len))
653 BUG();
654 call->offset += len;
655
656 if (call->offset < 4) {
657 if (last) {
658 _leave(" = -EBADMSG [op ID short]");
659 return -EBADMSG;
660 }
661 _leave(" = 0 [incomplete]");
662 return 0;
663 }
664
665 call->state = AFS_CALL_AWAIT_REQUEST;
666
667 /* ask the cache manager to route the call (it'll change the call type
668 * if successful) */
669 if (!afs_cm_incoming_call(call))
670 return -ENOTSUPP;
671
672 /* pass responsibility for the remainer of this message off to the
673 * cache manager op */
674 return call->type->deliver(call, skb, last);
675}
676
677/*
678 * send an empty reply
679 */
680void afs_send_empty_reply(struct afs_call *call)
681{
682 struct msghdr msg;
683 struct iovec iov[1];
684
685 _enter("");
686
687 iov[0].iov_base = NULL;
688 iov[0].iov_len = 0;
689 msg.msg_name = NULL;
690 msg.msg_namelen = 0;
691 msg.msg_iov = iov;
692 msg.msg_iovlen = 0;
693 msg.msg_control = NULL;
694 msg.msg_controllen = 0;
695 msg.msg_flags = 0;
696
697 call->state = AFS_CALL_AWAIT_ACK;
698 switch (rxrpc_kernel_send_data(call->rxcall, &msg, 0)) {
699 case 0:
700 _leave(" [replied]");
701 return;
702
703 case -ENOMEM:
704 _debug("oom");
705 rxrpc_kernel_abort_call(call->rxcall, RX_USER_ABORT);
706 default:
707 rxrpc_kernel_end_call(call->rxcall);
708 call->rxcall = NULL;
709 call->type->destructor(call);
710 afs_free_call(call);
711 _leave(" [error]");
712 return;
713 }
714}
715
716/*
717 * send a simple reply
718 */
719void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
720{
721 struct msghdr msg;
722 struct iovec iov[1];
723
724 _enter("");
725
726 iov[0].iov_base = (void *) buf;
727 iov[0].iov_len = len;
728 msg.msg_name = NULL;
729 msg.msg_namelen = 0;
730 msg.msg_iov = iov;
731 msg.msg_iovlen = 1;
732 msg.msg_control = NULL;
733 msg.msg_controllen = 0;
734 msg.msg_flags = 0;
735
736 call->state = AFS_CALL_AWAIT_ACK;
737 switch (rxrpc_kernel_send_data(call->rxcall, &msg, len)) {
738 case 0:
739 _leave(" [replied]");
740 return;
741
742 case -ENOMEM:
743 _debug("oom");
744 rxrpc_kernel_abort_call(call->rxcall, RX_USER_ABORT);
745 default:
746 rxrpc_kernel_end_call(call->rxcall);
747 call->rxcall = NULL;
748 call->type->destructor(call);
749 afs_free_call(call);
750 _leave(" [error]");
751 return;
752 }
753}
754
755/*
756 * extract a piece of data from the received data socket buffers
757 */
758int afs_extract_data(struct afs_call *call, struct sk_buff *skb,
759 bool last, void *buf, size_t count)
760{
761 size_t len = skb->len;
762
763 _enter("{%u},{%zu},%d,,%zu", call->offset, len, last, count);
764
765 ASSERTCMP(call->offset, <, count);
766
767 len = min_t(size_t, len, count - call->offset);
768 if (skb_copy_bits(skb, 0, buf + call->offset, len) < 0 ||
769 !pskb_pull(skb, len))
770 BUG();
771 call->offset += len;
772
773 if (call->offset < count) {
774 if (last) {
775 _leave(" = -EBADMSG [%d < %lu]", call->offset, count);
776 return -EBADMSG;
777 }
778 _leave(" = -EAGAIN");
779 return -EAGAIN;
780 }
781 return 0;
782}
diff --git a/fs/afs/security.c b/fs/afs/security.c
new file mode 100644
index 000000000000..f9f424d80458
--- /dev/null
+++ b/fs/afs/security.c
@@ -0,0 +1,356 @@
1/* AFS security handling
2 *
3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/init.h>
13#include <linux/slab.h>
14#include <linux/fs.h>
15#include <linux/ctype.h>
16#include <keys/rxrpc-type.h>
17#include "internal.h"
18
19/*
20 * get a key
21 */
22struct key *afs_request_key(struct afs_cell *cell)
23{
24 struct key *key;
25
26 _enter("{%x}", key_serial(cell->anonymous_key));
27
28 _debug("key %s", cell->anonymous_key->description);
29 key = request_key(&key_type_rxrpc, cell->anonymous_key->description,
30 NULL);
31 if (IS_ERR(key)) {
32 if (PTR_ERR(key) != -ENOKEY) {
33 _leave(" = %ld", PTR_ERR(key));
34 return key;
35 }
36
37 /* act as anonymous user */
38 _leave(" = {%x} [anon]", key_serial(cell->anonymous_key));
39 return key_get(cell->anonymous_key);
40 } else {
41 /* act as authorised user */
42 _leave(" = {%x} [auth]", key_serial(key));
43 return key;
44 }
45}
46
47/*
48 * dispose of a permits list
49 */
50void afs_zap_permits(struct rcu_head *rcu)
51{
52 struct afs_permits *permits =
53 container_of(rcu, struct afs_permits, rcu);
54 int loop;
55
56 _enter("{%d}", permits->count);
57
58 for (loop = permits->count - 1; loop >= 0; loop--)
59 key_put(permits->permits[loop].key);
60 kfree(permits);
61}
62
63/*
64 * dispose of a permits list in which all the key pointers have been copied
65 */
66static void afs_dispose_of_permits(struct rcu_head *rcu)
67{
68 struct afs_permits *permits =
69 container_of(rcu, struct afs_permits, rcu);
70
71 _enter("{%d}", permits->count);
72
73 kfree(permits);
74}
75
76/*
77 * get the authorising vnode - this is the specified inode itself if it's a
78 * directory or it's the parent directory if the specified inode is a file or
79 * symlink
80 * - the caller must release the ref on the inode
81 */
82static struct afs_vnode *afs_get_auth_inode(struct afs_vnode *vnode,
83 struct key *key)
84{
85 struct afs_vnode *auth_vnode;
86 struct inode *auth_inode;
87
88 _enter("");
89
90 if (S_ISDIR(vnode->vfs_inode.i_mode)) {
91 auth_inode = igrab(&vnode->vfs_inode);
92 ASSERT(auth_inode != NULL);
93 } else {
94 auth_inode = afs_iget(vnode->vfs_inode.i_sb, key,
95 &vnode->status.parent, NULL, NULL);
96 if (IS_ERR(auth_inode))
97 return ERR_PTR(PTR_ERR(auth_inode));
98 }
99
100 auth_vnode = AFS_FS_I(auth_inode);
101 _leave(" = {%x}", auth_vnode->fid.vnode);
102 return auth_vnode;
103}
104
105/*
106 * clear the permit cache on a directory vnode
107 */
108void afs_clear_permits(struct afs_vnode *vnode)
109{
110 struct afs_permits *permits;
111
112 _enter("{%x}", vnode->fid.vnode);
113
114 mutex_lock(&vnode->permits_lock);
115 permits = vnode->permits;
116 rcu_assign_pointer(vnode->permits, NULL);
117 mutex_unlock(&vnode->permits_lock);
118
119 if (permits)
120 call_rcu(&permits->rcu, afs_zap_permits);
121 _leave("");
122}
123
124/*
125 * add the result obtained for a vnode to its or its parent directory's cache
126 * for the key used to access it
127 */
128void afs_cache_permit(struct afs_vnode *vnode, struct key *key, long acl_order)
129{
130 struct afs_permits *permits, *xpermits;
131 struct afs_permit *permit;
132 struct afs_vnode *auth_vnode;
133 int count, loop;
134
135 _enter("{%x},%x,%lx", vnode->fid.vnode, key_serial(key), acl_order);
136
137 auth_vnode = afs_get_auth_inode(vnode, key);
138 if (IS_ERR(auth_vnode)) {
139 _leave(" [get error %ld]", PTR_ERR(auth_vnode));
140 return;
141 }
142
143 mutex_lock(&auth_vnode->permits_lock);
144
145 /* guard against a rename being detected whilst we waited for the
146 * lock */
147 if (memcmp(&auth_vnode->fid, &vnode->status.parent,
148 sizeof(struct afs_fid)) != 0) {
149 _debug("renamed");
150 goto out_unlock;
151 }
152
153 /* have to be careful as the directory's callback may be broken between
154 * us receiving the status we're trying to cache and us getting the
155 * lock to update the cache for the status */
156 if (auth_vnode->acl_order - acl_order > 0) {
157 _debug("ACL changed?");
158 goto out_unlock;
159 }
160
161 /* always update the anonymous mask */
162 _debug("anon access %x", vnode->status.anon_access);
163 auth_vnode->status.anon_access = vnode->status.anon_access;
164 if (key == vnode->volume->cell->anonymous_key)
165 goto out_unlock;
166
167 xpermits = auth_vnode->permits;
168 count = 0;
169 if (xpermits) {
170 /* see if the permit is already in the list
171 * - if it is then we just amend the list
172 */
173 count = xpermits->count;
174 permit = xpermits->permits;
175 for (loop = count; loop > 0; loop--) {
176 if (permit->key == key) {
177 permit->access_mask =
178 vnode->status.caller_access;
179 goto out_unlock;
180 }
181 permit++;
182 }
183 }
184
185 permits = kmalloc(sizeof(*permits) + sizeof(*permit) * (count + 1),
186 GFP_NOFS);
187 if (!permits)
188 goto out_unlock;
189
190 memcpy(permits->permits, xpermits->permits,
191 count * sizeof(struct afs_permit));
192
193 _debug("key %x access %x",
194 key_serial(key), vnode->status.caller_access);
195 permits->permits[count].access_mask = vnode->status.caller_access;
196 permits->permits[count].key = key_get(key);
197 permits->count = count + 1;
198
199 rcu_assign_pointer(auth_vnode->permits, permits);
200 if (xpermits)
201 call_rcu(&xpermits->rcu, afs_dispose_of_permits);
202
203out_unlock:
204 mutex_unlock(&auth_vnode->permits_lock);
205 iput(&auth_vnode->vfs_inode);
206 _leave("");
207}
208
209/*
210 * check with the fileserver to see if the directory or parent directory is
211 * permitted to be accessed with this authorisation, and if so, what access it
212 * is granted
213 */
214static int afs_check_permit(struct afs_vnode *vnode, struct key *key,
215 afs_access_t *_access)
216{
217 struct afs_permits *permits;
218 struct afs_permit *permit;
219 struct afs_vnode *auth_vnode;
220 bool valid;
221 int loop, ret;
222
223 _enter("");
224
225 auth_vnode = afs_get_auth_inode(vnode, key);
226 if (IS_ERR(auth_vnode)) {
227 *_access = 0;
228 _leave(" = %ld", PTR_ERR(auth_vnode));
229 return PTR_ERR(auth_vnode);
230 }
231
232 ASSERT(S_ISDIR(auth_vnode->vfs_inode.i_mode));
233
234 /* check the permits to see if we've got one yet */
235 if (key == auth_vnode->volume->cell->anonymous_key) {
236 _debug("anon");
237 *_access = auth_vnode->status.anon_access;
238 valid = true;
239 } else {
240 valid = false;
241 rcu_read_lock();
242 permits = rcu_dereference(auth_vnode->permits);
243 if (permits) {
244 permit = permits->permits;
245 for (loop = permits->count; loop > 0; loop--) {
246 if (permit->key == key) {
247 _debug("found in cache");
248 *_access = permit->access_mask;
249 valid = true;
250 break;
251 }
252 permit++;
253 }
254 }
255 rcu_read_unlock();
256 }
257
258 if (!valid) {
259 /* check the status on the file we're actually interested in
260 * (the post-processing will cache the result on auth_vnode) */
261 _debug("no valid permit");
262
263 set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
264 ret = afs_vnode_fetch_status(vnode, auth_vnode, key);
265 if (ret < 0) {
266 iput(&auth_vnode->vfs_inode);
267 *_access = 0;
268 _leave(" = %d", ret);
269 return ret;
270 }
271 }
272
273 *_access = vnode->status.caller_access;
274 iput(&auth_vnode->vfs_inode);
275 _leave(" = 0 [access %x]", *_access);
276 return 0;
277}
278
279/*
280 * check the permissions on an AFS file
281 * - AFS ACLs are attached to directories only, and a file is controlled by its
282 * parent directory's ACL
283 */
284int afs_permission(struct inode *inode, int mask, struct nameidata *nd)
285{
286 struct afs_vnode *vnode = AFS_FS_I(inode);
287 afs_access_t access;
288 struct key *key;
289 int ret;
290
291 _enter("{{%x:%x},%lx},%x,",
292 vnode->fid.vid, vnode->fid.vnode, vnode->flags, mask);
293
294 key = afs_request_key(vnode->volume->cell);
295 if (IS_ERR(key)) {
296 _leave(" = %ld [key]", PTR_ERR(key));
297 return PTR_ERR(key);
298 }
299
300 /* if the promise has expired, we need to check the server again */
301 if (!vnode->cb_promised) {
302 _debug("not promised");
303 ret = afs_vnode_fetch_status(vnode, NULL, key);
304 if (ret < 0)
305 goto error;
306 _debug("new promise [fl=%lx]", vnode->flags);
307 }
308
309 /* check the permits to see if we've got one yet */
310 ret = afs_check_permit(vnode, key, &access);
311 if (ret < 0)
312 goto error;
313
314 /* interpret the access mask */
315 _debug("REQ %x ACC %x on %s",
316 mask, access, S_ISDIR(inode->i_mode) ? "dir" : "file");
317
318 if (S_ISDIR(inode->i_mode)) {
319 if (mask & MAY_EXEC) {
320 if (!(access & AFS_ACE_LOOKUP))
321 goto permission_denied;
322 } else if (mask & MAY_READ) {
323 if (!(access & AFS_ACE_READ))
324 goto permission_denied;
325 } else if (mask & MAY_WRITE) {
326 if (!(access & (AFS_ACE_DELETE | /* rmdir, unlink, rename from */
327 AFS_ACE_INSERT | /* create, mkdir, symlink, rename to */
328 AFS_ACE_WRITE))) /* chmod */
329 goto permission_denied;
330 } else {
331 BUG();
332 }
333 } else {
334 if (!(access & AFS_ACE_LOOKUP))
335 goto permission_denied;
336 if (mask & (MAY_EXEC | MAY_READ)) {
337 if (!(access & AFS_ACE_READ))
338 goto permission_denied;
339 } else if (mask & MAY_WRITE) {
340 if (!(access & AFS_ACE_WRITE))
341 goto permission_denied;
342 }
343 }
344
345 key_put(key);
346 ret = generic_permission(inode, mask, NULL);
347 _leave(" = %d", ret);
348 return ret;
349
350permission_denied:
351 ret = -EACCES;
352error:
353 key_put(key);
354 _leave(" = %d", ret);
355 return ret;
356}
diff --git a/fs/afs/server.c b/fs/afs/server.c
index 44aff81dc6a7..96bb23b476a2 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -1,6 +1,6 @@
1/* server.c: AFS server record management 1/* AFS server record management
2 * 2 *
3 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved. 3 * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com) 4 * Written by David Howells (dhowells@redhat.com)
5 * 5 *
6 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
@@ -11,489 +11,314 @@
11 11
12#include <linux/sched.h> 12#include <linux/sched.h>
13#include <linux/slab.h> 13#include <linux/slab.h>
14#include <rxrpc/peer.h>
15#include <rxrpc/connection.h>
16#include "volume.h"
17#include "cell.h"
18#include "server.h"
19#include "transport.h"
20#include "vlclient.h"
21#include "kafstimod.h"
22#include "internal.h" 14#include "internal.h"
23 15
24DEFINE_SPINLOCK(afs_server_peer_lock); 16unsigned afs_server_timeout = 10; /* server timeout in seconds */
25 17
26#define FS_SERVICE_ID 1 /* AFS Volume Location Service ID */ 18static void afs_reap_server(struct work_struct *);
27#define VL_SERVICE_ID 52 /* AFS Volume Location Service ID */
28 19
29static void __afs_server_timeout(struct afs_timer *timer) 20/* tree of all the servers, indexed by IP address */
21static struct rb_root afs_servers = RB_ROOT;
22static DEFINE_RWLOCK(afs_servers_lock);
23
24/* LRU list of all the servers not currently in use */
25static LIST_HEAD(afs_server_graveyard);
26static DEFINE_SPINLOCK(afs_server_graveyard_lock);
27static DECLARE_DELAYED_WORK(afs_server_reaper, afs_reap_server);
28
29/*
30 * install a server record in the master tree
31 */
32static int afs_install_server(struct afs_server *server)
30{ 33{
31 struct afs_server *server = 34 struct afs_server *xserver;
32 list_entry(timer, struct afs_server, timeout); 35 struct rb_node **pp, *p;
36 int ret;
33 37
34 _debug("SERVER TIMEOUT [%p{u=%d}]", 38 _enter("%p", server);
35 server, atomic_read(&server->usage));
36 39
37 afs_server_do_timeout(server); 40 write_lock(&afs_servers_lock);
38} 41
42 ret = -EEXIST;
43 pp = &afs_servers.rb_node;
44 p = NULL;
45 while (*pp) {
46 p = *pp;
47 _debug("- consider %p", p);
48 xserver = rb_entry(p, struct afs_server, master_rb);
49 if (server->addr.s_addr < xserver->addr.s_addr)
50 pp = &(*pp)->rb_left;
51 else if (server->addr.s_addr > xserver->addr.s_addr)
52 pp = &(*pp)->rb_right;
53 else
54 goto error;
55 }
39 56
40static const struct afs_timer_ops afs_server_timer_ops = { 57 rb_link_node(&server->master_rb, p, pp);
41 .timed_out = __afs_server_timeout, 58 rb_insert_color(&server->master_rb, &afs_servers);
42}; 59 ret = 0;
60
61error:
62 write_unlock(&afs_servers_lock);
63 return ret;
64}
43 65
44/*****************************************************************************/
45/* 66/*
46 * lookup a server record in a cell 67 * allocate a new server record
47 * - TODO: search the cell's server list
48 */ 68 */
49int afs_server_lookup(struct afs_cell *cell, const struct in_addr *addr, 69static struct afs_server *afs_alloc_server(struct afs_cell *cell,
50 struct afs_server **_server) 70 const struct in_addr *addr)
51{ 71{
52 struct afs_server *server, *active, *zombie; 72 struct afs_server *server;
53 int loop;
54 73
55 _enter("%p,%08x,", cell, ntohl(addr->s_addr)); 74 _enter("");
56 75
57 /* allocate and initialise a server record */
58 server = kzalloc(sizeof(struct afs_server), GFP_KERNEL); 76 server = kzalloc(sizeof(struct afs_server), GFP_KERNEL);
59 if (!server) { 77 if (server) {
60 _leave(" = -ENOMEM"); 78 atomic_set(&server->usage, 1);
61 return -ENOMEM; 79 server->cell = cell;
80
81 INIT_LIST_HEAD(&server->link);
82 INIT_LIST_HEAD(&server->grave);
83 init_rwsem(&server->sem);
84 spin_lock_init(&server->fs_lock);
85 server->fs_vnodes = RB_ROOT;
86 server->cb_promises = RB_ROOT;
87 spin_lock_init(&server->cb_lock);
88 init_waitqueue_head(&server->cb_break_waitq);
89 INIT_DELAYED_WORK(&server->cb_break_work,
90 afs_dispatch_give_up_callbacks);
91
92 memcpy(&server->addr, addr, sizeof(struct in_addr));
93 server->addr.s_addr = addr->s_addr;
62 } 94 }
63 95
64 atomic_set(&server->usage, 1); 96 _leave(" = %p{%d}", server, atomic_read(&server->usage));
65 97 return server;
66 INIT_LIST_HEAD(&server->link); 98}
67 init_rwsem(&server->sem);
68 INIT_LIST_HEAD(&server->fs_callq);
69 spin_lock_init(&server->fs_lock);
70 INIT_LIST_HEAD(&server->cb_promises);
71 spin_lock_init(&server->cb_lock);
72
73 for (loop = 0; loop < AFS_SERVER_CONN_LIST_SIZE; loop++)
74 server->fs_conn_cnt[loop] = 4;
75 99
76 memcpy(&server->addr, addr, sizeof(struct in_addr)); 100/*
77 server->addr.s_addr = addr->s_addr; 101 * get an FS-server record for a cell
102 */
103struct afs_server *afs_lookup_server(struct afs_cell *cell,
104 const struct in_addr *addr)
105{
106 struct afs_server *server, *candidate;
78 107
79 afs_timer_init(&server->timeout, &afs_server_timer_ops); 108 _enter("%p,"NIPQUAD_FMT, cell, NIPQUAD(addr->s_addr));
80 109
81 /* add to the cell */ 110 /* quick scan of the list to see if we already have the server */
82 write_lock(&cell->sv_lock); 111 read_lock(&cell->servers_lock);
83 112
84 /* check the active list */ 113 list_for_each_entry(server, &cell->servers, link) {
85 list_for_each_entry(active, &cell->sv_list, link) { 114 if (server->addr.s_addr == addr->s_addr)
86 if (active->addr.s_addr == addr->s_addr) 115 goto found_server_quickly;
87 goto use_active_server;
88 } 116 }
117 read_unlock(&cell->servers_lock);
89 118
90 /* check the inactive list */ 119 candidate = afs_alloc_server(cell, addr);
91 spin_lock(&cell->sv_gylock); 120 if (!candidate) {
92 list_for_each_entry(zombie, &cell->sv_graveyard, link) { 121 _leave(" = -ENOMEM");
93 if (zombie->addr.s_addr == addr->s_addr) 122 return ERR_PTR(-ENOMEM);
94 goto resurrect_server;
95 } 123 }
96 spin_unlock(&cell->sv_gylock);
97 124
98 afs_get_cell(cell); 125 write_lock(&cell->servers_lock);
99 server->cell = cell;
100 list_add_tail(&server->link, &cell->sv_list);
101 126
102 write_unlock(&cell->sv_lock); 127 /* check the cell's server list again */
128 list_for_each_entry(server, &cell->servers, link) {
129 if (server->addr.s_addr == addr->s_addr)
130 goto found_server;
131 }
103 132
104 *_server = server; 133 _debug("new");
105 _leave(" = 0 (%p)", server); 134 server = candidate;
106 return 0; 135 if (afs_install_server(server) < 0)
136 goto server_in_two_cells;
107 137
108 /* found a matching active server */ 138 afs_get_cell(cell);
109 use_active_server: 139 list_add_tail(&server->link, &cell->servers);
110 _debug("active server"); 140
111 afs_get_server(active); 141 write_unlock(&cell->servers_lock);
112 write_unlock(&cell->sv_lock); 142 _leave(" = %p{%d}", server, atomic_read(&server->usage));
143 return server;
144
145 /* found a matching server quickly */
146found_server_quickly:
147 _debug("found quickly");
148 afs_get_server(server);
149 read_unlock(&cell->servers_lock);
150no_longer_unused:
151 if (!list_empty(&server->grave)) {
152 spin_lock(&afs_server_graveyard_lock);
153 list_del_init(&server->grave);
154 spin_unlock(&afs_server_graveyard_lock);
155 }
156 _leave(" = %p{%d}", server, atomic_read(&server->usage));
157 return server;
158
159 /* found a matching server on the second pass */
160found_server:
161 _debug("found");
162 afs_get_server(server);
163 write_unlock(&cell->servers_lock);
164 kfree(candidate);
165 goto no_longer_unused;
166
167 /* found a server that seems to be in two cells */
168server_in_two_cells:
169 write_unlock(&cell->servers_lock);
170 kfree(candidate);
171 printk(KERN_NOTICE "kAFS:"
172 " Server "NIPQUAD_FMT" appears to be in two cells\n",
173 NIPQUAD(*addr));
174 _leave(" = -EEXIST");
175 return ERR_PTR(-EEXIST);
176}
113 177
114 kfree(server); 178/*
179 * look up a server by its IP address
180 */
181struct afs_server *afs_find_server(const struct in_addr *_addr)
182{
183 struct afs_server *server = NULL;
184 struct rb_node *p;
185 struct in_addr addr = *_addr;
115 186
116 *_server = active; 187 _enter(NIPQUAD_FMT, NIPQUAD(addr.s_addr));
117 _leave(" = 0 (%p)", active);
118 return 0;
119 188
120 /* found a matching server in the graveyard, so resurrect it and 189 read_lock(&afs_servers_lock);
121 * dispose of the new record */
122 resurrect_server:
123 _debug("resurrecting server");
124 190
125 list_move_tail(&zombie->link, &cell->sv_list); 191 p = afs_servers.rb_node;
126 afs_get_server(zombie); 192 while (p) {
127 afs_kafstimod_del_timer(&zombie->timeout); 193 server = rb_entry(p, struct afs_server, master_rb);
128 spin_unlock(&cell->sv_gylock);
129 write_unlock(&cell->sv_lock);
130 194
131 kfree(server); 195 _debug("- consider %p", p);
132 196
133 *_server = zombie; 197 if (addr.s_addr < server->addr.s_addr) {
134 _leave(" = 0 (%p)", zombie); 198 p = p->rb_left;
135 return 0; 199 } else if (addr.s_addr > server->addr.s_addr) {
200 p = p->rb_right;
201 } else {
202 afs_get_server(server);
203 goto found;
204 }
205 }
136 206
137} /* end afs_server_lookup() */ 207 server = NULL;
208found:
209 read_unlock(&afs_servers_lock);
210 ASSERTIFCMP(server, server->addr.s_addr, ==, addr.s_addr);
211 _leave(" = %p", server);
212 return server;
213}
138 214
139/*****************************************************************************/
140/* 215/*
141 * destroy a server record 216 * destroy a server record
142 * - removes from the cell list 217 * - removes from the cell list
143 */ 218 */
144void afs_put_server(struct afs_server *server) 219void afs_put_server(struct afs_server *server)
145{ 220{
146 struct afs_cell *cell;
147
148 if (!server) 221 if (!server)
149 return; 222 return;
150 223
151 _enter("%p", server); 224 _enter("%p{%d}", server, atomic_read(&server->usage));
152
153 cell = server->cell;
154 225
155 /* sanity check */ 226 _debug("PUT SERVER %d", atomic_read(&server->usage));
156 BUG_ON(atomic_read(&server->usage) <= 0);
157 227
158 /* to prevent a race, the decrement and the dequeue must be effectively 228 ASSERTCMP(atomic_read(&server->usage), >, 0);
159 * atomic */
160 write_lock(&cell->sv_lock);
161 229
162 if (likely(!atomic_dec_and_test(&server->usage))) { 230 if (likely(!atomic_dec_and_test(&server->usage))) {
163 write_unlock(&cell->sv_lock);
164 _leave(""); 231 _leave("");
165 return; 232 return;
166 } 233 }
167 234
168 spin_lock(&cell->sv_gylock); 235 afs_flush_callback_breaks(server);
169 list_move_tail(&server->link, &cell->sv_graveyard);
170 236
171 /* time out in 10 secs */ 237 spin_lock(&afs_server_graveyard_lock);
172 afs_kafstimod_add_timer(&server->timeout, 10 * HZ); 238 if (atomic_read(&server->usage) == 0) {
173 239 list_move_tail(&server->grave, &afs_server_graveyard);
174 spin_unlock(&cell->sv_gylock); 240 server->time_of_death = get_seconds();
175 write_unlock(&cell->sv_lock); 241 schedule_delayed_work(&afs_server_reaper,
176 242 afs_server_timeout * HZ);
177 _leave(" [killed]"); 243 }
178} /* end afs_put_server() */ 244 spin_unlock(&afs_server_graveyard_lock);
245 _leave(" [dead]");
246}
179 247
180/*****************************************************************************/
181/* 248/*
182 * timeout server record 249 * destroy a dead server
183 * - removes from the cell's graveyard if the usage count is zero
184 */ 250 */
185void afs_server_do_timeout(struct afs_server *server) 251static void afs_destroy_server(struct afs_server *server)
186{ 252{
187 struct rxrpc_peer *peer;
188 struct afs_cell *cell;
189 int loop;
190
191 _enter("%p", server); 253 _enter("%p", server);
192 254
193 cell = server->cell; 255 ASSERTCMP(server->fs_vnodes.rb_node, ==, NULL);
194 256 ASSERTCMP(server->cb_promises.rb_node, ==, NULL);
195 BUG_ON(atomic_read(&server->usage) < 0); 257 ASSERTCMP(server->cb_break_head, ==, server->cb_break_tail);
196 258 ASSERTCMP(atomic_read(&server->cb_break_n), ==, 0);
197 /* remove from graveyard if still dead */
198 spin_lock(&cell->vl_gylock);
199 if (atomic_read(&server->usage) == 0)
200 list_del_init(&server->link);
201 else
202 server = NULL;
203 spin_unlock(&cell->vl_gylock);
204
205 if (!server) {
206 _leave("");
207 return; /* resurrected */
208 }
209
210 /* we can now destroy it properly */
211 afs_put_cell(cell);
212
213 /* uncross-point the structs under a global lock */
214 spin_lock(&afs_server_peer_lock);
215 peer = server->peer;
216 if (peer) {
217 server->peer = NULL;
218 peer->user = NULL;
219 }
220 spin_unlock(&afs_server_peer_lock);
221
222 /* finish cleaning up the server */
223 for (loop = AFS_SERVER_CONN_LIST_SIZE - 1; loop >= 0; loop--)
224 if (server->fs_conn[loop])
225 rxrpc_put_connection(server->fs_conn[loop]);
226
227 if (server->vlserver)
228 rxrpc_put_connection(server->vlserver);
229 259
260 afs_put_cell(server->cell);
230 kfree(server); 261 kfree(server);
262}
231 263
232 _leave(" [destroyed]");
233} /* end afs_server_do_timeout() */
234
235/*****************************************************************************/
236/* 264/*
237 * get a callslot on a connection to the fileserver on the specified server 265 * reap dead server records
238 */ 266 */
239int afs_server_request_callslot(struct afs_server *server, 267static void afs_reap_server(struct work_struct *work)
240 struct afs_server_callslot *callslot)
241{ 268{
242 struct afs_server_callslot *pcallslot; 269 LIST_HEAD(corpses);
243 struct rxrpc_connection *conn; 270 struct afs_server *server;
244 int nconn, ret; 271 unsigned long delay, expiry;
245 272 time_t now;
246 _enter("%p,",server); 273
247 274 now = get_seconds();
248 INIT_LIST_HEAD(&callslot->link); 275 spin_lock(&afs_server_graveyard_lock);
249 callslot->task = current; 276
250 callslot->conn = NULL; 277 while (!list_empty(&afs_server_graveyard)) {
251 callslot->nconn = -1; 278 server = list_entry(afs_server_graveyard.next,
252 callslot->ready = 0; 279 struct afs_server, grave);
253 280
254 ret = 0; 281 /* the queue is ordered most dead first */
255 conn = NULL; 282 expiry = server->time_of_death + afs_server_timeout;
256 283 if (expiry > now) {
257 /* get hold of a callslot first */ 284 delay = (expiry - now) * HZ;
258 spin_lock(&server->fs_lock); 285 if (!schedule_delayed_work(&afs_server_reaper, delay)) {
259 286 cancel_delayed_work(&afs_server_reaper);
260 /* resurrect the server if it's death timeout has expired */ 287 schedule_delayed_work(&afs_server_reaper,
261 if (server->fs_state) { 288 delay);
262 if (time_before(jiffies, server->fs_dead_jif)) { 289 }
263 ret = server->fs_state; 290 break;
264 spin_unlock(&server->fs_lock);
265 _leave(" = %d [still dead]", ret);
266 return ret;
267 } 291 }
268 292
269 server->fs_state = 0; 293 write_lock(&server->cell->servers_lock);
270 } 294 write_lock(&afs_servers_lock);
271 295 if (atomic_read(&server->usage) > 0) {
272 /* try and find a connection that has spare callslots */ 296 list_del_init(&server->grave);
273 for (nconn = 0; nconn < AFS_SERVER_CONN_LIST_SIZE; nconn++) { 297 } else {
274 if (server->fs_conn_cnt[nconn] > 0) { 298 list_move_tail(&server->grave, &corpses);
275 server->fs_conn_cnt[nconn]--; 299 list_del_init(&server->link);
276 spin_unlock(&server->fs_lock); 300 rb_erase(&server->master_rb, &afs_servers);
277 callslot->nconn = nconn;
278 goto obtained_slot;
279 } 301 }
302 write_unlock(&afs_servers_lock);
303 write_unlock(&server->cell->servers_lock);
280 } 304 }
281 305
282 /* none were available - wait interruptibly for one to become 306 spin_unlock(&afs_server_graveyard_lock);
283 * available */
284 set_current_state(TASK_INTERRUPTIBLE);
285 list_add_tail(&callslot->link, &server->fs_callq);
286 spin_unlock(&server->fs_lock);
287
288 while (!callslot->ready && !signal_pending(current)) {
289 schedule();
290 set_current_state(TASK_INTERRUPTIBLE);
291 }
292
293 set_current_state(TASK_RUNNING);
294
295 /* even if we were interrupted we may still be queued */
296 if (!callslot->ready) {
297 spin_lock(&server->fs_lock);
298 list_del_init(&callslot->link);
299 spin_unlock(&server->fs_lock);
300 }
301
302 nconn = callslot->nconn;
303 307
304 /* if interrupted, we must release any slot we also got before 308 /* now reap the corpses we've extracted */
305 * returning an error */ 309 while (!list_empty(&corpses)) {
306 if (signal_pending(current)) { 310 server = list_entry(corpses.next, struct afs_server, grave);
307 ret = -EINTR; 311 list_del(&server->grave);
308 goto error_release; 312 afs_destroy_server(server);
309 } 313 }
314}
310 315
311 /* if we were woken up with an error, then pass that error back to the
312 * called */
313 if (nconn < 0) {
314 _leave(" = %d", callslot->errno);
315 return callslot->errno;
316 }
317
318 /* were we given a connection directly? */
319 if (callslot->conn) {
320 /* yes - use it */
321 _leave(" = 0 (nc=%d)", nconn);
322 return 0;
323 }
324
325 /* got a callslot, but no connection */
326 obtained_slot:
327
328 /* need to get hold of the RxRPC connection */
329 down_write(&server->sem);
330
331 /* quick check to see if there's an outstanding error */
332 ret = server->fs_state;
333 if (ret)
334 goto error_release_upw;
335
336 if (server->fs_conn[nconn]) {
337 /* reuse an existing connection */
338 rxrpc_get_connection(server->fs_conn[nconn]);
339 callslot->conn = server->fs_conn[nconn];
340 }
341 else {
342 /* create a new connection */
343 ret = rxrpc_create_connection(afs_transport,
344 htons(7000),
345 server->addr.s_addr,
346 FS_SERVICE_ID,
347 NULL,
348 &server->fs_conn[nconn]);
349
350 if (ret < 0)
351 goto error_release_upw;
352
353 callslot->conn = server->fs_conn[0];
354 rxrpc_get_connection(callslot->conn);
355 }
356
357 up_write(&server->sem);
358
359 _leave(" = 0");
360 return 0;
361
362 /* handle an error occurring */
363 error_release_upw:
364 up_write(&server->sem);
365
366 error_release:
367 /* either release the callslot or pass it along to another deserving
368 * task */
369 spin_lock(&server->fs_lock);
370
371 if (nconn < 0) {
372 /* no callslot allocated */
373 }
374 else if (list_empty(&server->fs_callq)) {
375 /* no one waiting */
376 server->fs_conn_cnt[nconn]++;
377 spin_unlock(&server->fs_lock);
378 }
379 else {
380 /* someone's waiting - dequeue them and wake them up */
381 pcallslot = list_entry(server->fs_callq.next,
382 struct afs_server_callslot, link);
383 list_del_init(&pcallslot->link);
384
385 pcallslot->errno = server->fs_state;
386 if (!pcallslot->errno) {
387 /* pass them out callslot details */
388 callslot->conn = xchg(&pcallslot->conn,
389 callslot->conn);
390 pcallslot->nconn = nconn;
391 callslot->nconn = nconn = -1;
392 }
393 pcallslot->ready = 1;
394 wake_up_process(pcallslot->task);
395 spin_unlock(&server->fs_lock);
396 }
397
398 rxrpc_put_connection(callslot->conn);
399 callslot->conn = NULL;
400
401 _leave(" = %d", ret);
402 return ret;
403
404} /* end afs_server_request_callslot() */
405
406/*****************************************************************************/
407/*
408 * release a callslot back to the server
409 * - transfers the RxRPC connection to the next pending callslot if possible
410 */
411void afs_server_release_callslot(struct afs_server *server,
412 struct afs_server_callslot *callslot)
413{
414 struct afs_server_callslot *pcallslot;
415
416 _enter("{ad=%08x,cnt=%u},{%d}",
417 ntohl(server->addr.s_addr),
418 server->fs_conn_cnt[callslot->nconn],
419 callslot->nconn);
420
421 BUG_ON(callslot->nconn < 0);
422
423 spin_lock(&server->fs_lock);
424
425 if (list_empty(&server->fs_callq)) {
426 /* no one waiting */
427 server->fs_conn_cnt[callslot->nconn]++;
428 spin_unlock(&server->fs_lock);
429 }
430 else {
431 /* someone's waiting - dequeue them and wake them up */
432 pcallslot = list_entry(server->fs_callq.next,
433 struct afs_server_callslot, link);
434 list_del_init(&pcallslot->link);
435
436 pcallslot->errno = server->fs_state;
437 if (!pcallslot->errno) {
438 /* pass them out callslot details */
439 callslot->conn = xchg(&pcallslot->conn, callslot->conn);
440 pcallslot->nconn = callslot->nconn;
441 callslot->nconn = -1;
442 }
443
444 pcallslot->ready = 1;
445 wake_up_process(pcallslot->task);
446 spin_unlock(&server->fs_lock);
447 }
448
449 rxrpc_put_connection(callslot->conn);
450
451 _leave("");
452} /* end afs_server_release_callslot() */
453
454/*****************************************************************************/
455/* 316/*
456 * get a handle to a connection to the vlserver (volume location) on the 317 * discard all the server records for rmmod
457 * specified server
458 */ 318 */
459int afs_server_get_vlconn(struct afs_server *server, 319void __exit afs_purge_servers(void)
460 struct rxrpc_connection **_conn)
461{ 320{
462 struct rxrpc_connection *conn; 321 afs_server_timeout = 0;
463 int ret; 322 cancel_delayed_work(&afs_server_reaper);
464 323 schedule_delayed_work(&afs_server_reaper, 0);
465 _enter("%p,", server); 324}
466
467 ret = 0;
468 conn = NULL;
469 down_read(&server->sem);
470
471 if (server->vlserver) {
472 /* reuse an existing connection */
473 rxrpc_get_connection(server->vlserver);
474 conn = server->vlserver;
475 up_read(&server->sem);
476 }
477 else {
478 /* create a new connection */
479 up_read(&server->sem);
480 down_write(&server->sem);
481 if (!server->vlserver) {
482 ret = rxrpc_create_connection(afs_transport,
483 htons(7003),
484 server->addr.s_addr,
485 VL_SERVICE_ID,
486 NULL,
487 &server->vlserver);
488 }
489 if (ret == 0) {
490 rxrpc_get_connection(server->vlserver);
491 conn = server->vlserver;
492 }
493 up_write(&server->sem);
494 }
495
496 *_conn = conn;
497 _leave(" = %d", ret);
498 return ret;
499} /* end afs_server_get_vlconn() */
diff --git a/fs/afs/server.h b/fs/afs/server.h
deleted file mode 100644
index c3d24115578f..000000000000
--- a/fs/afs/server.h
+++ /dev/null
@@ -1,102 +0,0 @@
1/* server.h: AFS server record
2 *
3 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#ifndef _LINUX_AFS_SERVER_H
13#define _LINUX_AFS_SERVER_H
14
15#include "types.h"
16#include "kafstimod.h"
17#include <rxrpc/peer.h>
18#include <linux/rwsem.h>
19
20extern spinlock_t afs_server_peer_lock;
21
22/*****************************************************************************/
23/*
24 * AFS server record
25 */
26struct afs_server
27{
28 atomic_t usage;
29 struct afs_cell *cell; /* cell in which server resides */
30 struct list_head link; /* link in cell's server list */
31 struct rw_semaphore sem; /* access lock */
32 struct afs_timer timeout; /* graveyard timeout */
33 struct in_addr addr; /* server address */
34 struct rxrpc_peer *peer; /* peer record for this server */
35 struct rxrpc_connection *vlserver; /* connection to the volume location service */
36
37 /* file service access */
38#define AFS_SERVER_CONN_LIST_SIZE 2
39 struct rxrpc_connection *fs_conn[AFS_SERVER_CONN_LIST_SIZE]; /* FS connections */
40 unsigned fs_conn_cnt[AFS_SERVER_CONN_LIST_SIZE]; /* per conn call count */
41 struct list_head fs_callq; /* queue of processes waiting to make a call */
42 spinlock_t fs_lock; /* access lock */
43 int fs_state; /* 0 or reason FS currently marked dead (-errno) */
44 unsigned fs_rtt; /* FS round trip time */
45 unsigned long fs_act_jif; /* time at which last activity occurred */
46 unsigned long fs_dead_jif; /* time at which no longer to be considered dead */
47
48 /* callback promise management */
49 struct list_head cb_promises; /* as yet unbroken promises from this server */
50 spinlock_t cb_lock; /* access lock */
51};
52
53extern int afs_server_lookup(struct afs_cell *cell,
54 const struct in_addr *addr,
55 struct afs_server **_server);
56
57#define afs_get_server(S) do { atomic_inc(&(S)->usage); } while(0)
58
59extern void afs_put_server(struct afs_server *server);
60extern void afs_server_do_timeout(struct afs_server *server);
61
62extern int afs_server_find_by_peer(const struct rxrpc_peer *peer,
63 struct afs_server **_server);
64
65extern int afs_server_get_vlconn(struct afs_server *server,
66 struct rxrpc_connection **_conn);
67
68static inline
69struct afs_server *afs_server_get_from_peer(struct rxrpc_peer *peer)
70{
71 struct afs_server *server;
72
73 spin_lock(&afs_server_peer_lock);
74 server = peer->user;
75 if (server)
76 afs_get_server(server);
77 spin_unlock(&afs_server_peer_lock);
78
79 return server;
80}
81
82/*****************************************************************************/
83/*
84 * AFS server callslot grant record
85 */
86struct afs_server_callslot
87{
88 struct list_head link; /* link in server's list */
89 struct task_struct *task; /* process waiting to make call */
90 struct rxrpc_connection *conn; /* connection to use (or NULL on error) */
91 short nconn; /* connection slot number (-1 on error) */
92 char ready; /* T when ready */
93 int errno; /* error number if nconn==-1 */
94};
95
96extern int afs_server_request_callslot(struct afs_server *server,
97 struct afs_server_callslot *callslot);
98
99extern void afs_server_release_callslot(struct afs_server *server,
100 struct afs_server_callslot *callslot);
101
102#endif /* _LINUX_AFS_SERVER_H */
diff --git a/fs/afs/super.c b/fs/afs/super.c
index eb7e32349da3..cebd03c91f57 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -1,5 +1,6 @@
1/* 1/* AFS superblock handling
2 * Copyright (c) 2002 Red Hat, Inc. All rights reserved. 2 *
3 * Copyright (c) 2002, 2007 Red Hat, Inc. All rights reserved.
3 * 4 *
4 * This software may be freely redistributed under the terms of the 5 * This software may be freely redistributed under the terms of the
5 * GNU General Public License. 6 * GNU General Public License.
@@ -9,7 +10,7 @@
9 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 10 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
10 * 11 *
11 * Authors: David Howells <dhowells@redhat.com> 12 * Authors: David Howells <dhowells@redhat.com>
12 * David Woodhouse <dwmw2@cambridge.redhat.com> 13 * David Woodhouse <dwmw2@redhat.com>
13 * 14 *
14 */ 15 */
15 16
@@ -19,22 +20,10 @@
19#include <linux/slab.h> 20#include <linux/slab.h>
20#include <linux/fs.h> 21#include <linux/fs.h>
21#include <linux/pagemap.h> 22#include <linux/pagemap.h>
22#include "vnode.h"
23#include "volume.h"
24#include "cell.h"
25#include "cmservice.h"
26#include "fsclient.h"
27#include "super.h"
28#include "internal.h" 23#include "internal.h"
29 24
30#define AFS_FS_MAGIC 0x6B414653 /* 'kAFS' */ 25#define AFS_FS_MAGIC 0x6B414653 /* 'kAFS' */
31 26
32struct afs_mount_params {
33 int rwpath;
34 struct afs_cell *default_cell;
35 struct afs_volume *volume;
36};
37
38static void afs_i_init_once(void *foo, struct kmem_cache *cachep, 27static void afs_i_init_once(void *foo, struct kmem_cache *cachep,
39 unsigned long flags); 28 unsigned long flags);
40 29
@@ -62,13 +51,13 @@ static const struct super_operations afs_super_ops = {
62 .drop_inode = generic_delete_inode, 51 .drop_inode = generic_delete_inode,
63 .destroy_inode = afs_destroy_inode, 52 .destroy_inode = afs_destroy_inode,
64 .clear_inode = afs_clear_inode, 53 .clear_inode = afs_clear_inode,
54 .umount_begin = afs_umount_begin,
65 .put_super = afs_put_super, 55 .put_super = afs_put_super,
66}; 56};
67 57
68static struct kmem_cache *afs_inode_cachep; 58static struct kmem_cache *afs_inode_cachep;
69static atomic_t afs_count_active_inodes; 59static atomic_t afs_count_active_inodes;
70 60
71/*****************************************************************************/
72/* 61/*
73 * initialise the filesystem 62 * initialise the filesystem
74 */ 63 */
@@ -78,8 +67,6 @@ int __init afs_fs_init(void)
78 67
79 _enter(""); 68 _enter("");
80 69
81 afs_timer_init(&afs_mntpt_expiry_timer, &afs_mntpt_expiry_timer_ops);
82
83 /* create ourselves an inode cache */ 70 /* create ourselves an inode cache */
84 atomic_set(&afs_count_active_inodes, 0); 71 atomic_set(&afs_count_active_inodes, 0);
85 72
@@ -99,20 +86,22 @@ int __init afs_fs_init(void)
99 ret = register_filesystem(&afs_fs_type); 86 ret = register_filesystem(&afs_fs_type);
100 if (ret < 0) { 87 if (ret < 0) {
101 kmem_cache_destroy(afs_inode_cachep); 88 kmem_cache_destroy(afs_inode_cachep);
102 kleave(" = %d", ret); 89 _leave(" = %d", ret);
103 return ret; 90 return ret;
104 } 91 }
105 92
106 kleave(" = 0"); 93 _leave(" = 0");
107 return 0; 94 return 0;
108} /* end afs_fs_init() */ 95}
109 96
110/*****************************************************************************/
111/* 97/*
112 * clean up the filesystem 98 * clean up the filesystem
113 */ 99 */
114void __exit afs_fs_exit(void) 100void __exit afs_fs_exit(void)
115{ 101{
102 _enter("");
103
104 afs_mntpt_kill_timer();
116 unregister_filesystem(&afs_fs_type); 105 unregister_filesystem(&afs_fs_type);
117 106
118 if (atomic_read(&afs_count_active_inodes) != 0) { 107 if (atomic_read(&afs_count_active_inodes) != 0) {
@@ -122,10 +111,9 @@ void __exit afs_fs_exit(void)
122 } 111 }
123 112
124 kmem_cache_destroy(afs_inode_cachep); 113 kmem_cache_destroy(afs_inode_cachep);
114 _leave("");
115}
125 116
126} /* end afs_fs_exit() */
127
128/*****************************************************************************/
129/* 117/*
130 * check that an argument has a value 118 * check that an argument has a value
131 */ 119 */
@@ -136,9 +124,8 @@ static int want_arg(char **_value, const char *option)
136 return 0; 124 return 0;
137 } 125 }
138 return 1; 126 return 1;
139} /* end want_arg() */ 127}
140 128
141/*****************************************************************************/
142/* 129/*
143 * check that there's no subsequent value 130 * check that there's no subsequent value
144 */ 131 */
@@ -150,18 +137,17 @@ static int want_no_value(char *const *_value, const char *option)
150 return 0; 137 return 0;
151 } 138 }
152 return 1; 139 return 1;
153} /* end want_no_value() */ 140}
154 141
155/*****************************************************************************/
156/* 142/*
157 * parse the mount options 143 * parse the mount options
158 * - this function has been shamelessly adapted from the ext3 fs which 144 * - this function has been shamelessly adapted from the ext3 fs which
159 * shamelessly adapted it from the msdos fs 145 * shamelessly adapted it from the msdos fs
160 */ 146 */
161static int afs_super_parse_options(struct afs_mount_params *params, 147static int afs_parse_options(struct afs_mount_params *params,
162 char *options, 148 char *options, const char **devname)
163 const char **devname)
164{ 149{
150 struct afs_cell *cell;
165 char *key, *value; 151 char *key, *value;
166 int ret; 152 int ret;
167 153
@@ -170,51 +156,135 @@ static int afs_super_parse_options(struct afs_mount_params *params,
170 options[PAGE_SIZE - 1] = 0; 156 options[PAGE_SIZE - 1] = 0;
171 157
172 ret = 0; 158 ret = 0;
173 while ((key = strsep(&options, ",")) != 0) 159 while ((key = strsep(&options, ","))) {
174 {
175 value = strchr(key, '='); 160 value = strchr(key, '=');
176 if (value) 161 if (value)
177 *value++ = 0; 162 *value++ = 0;
178 163
179 printk("kAFS: KEY: %s, VAL:%s\n", key, value ?: "-"); 164 _debug("kAFS: KEY: %s, VAL:%s", key, value ?: "-");
180 165
181 if (strcmp(key, "rwpath") == 0) { 166 if (strcmp(key, "rwpath") == 0) {
182 if (!want_no_value(&value, "rwpath")) 167 if (!want_no_value(&value, "rwpath"))
183 return -EINVAL; 168 return -EINVAL;
184 params->rwpath = 1; 169 params->rwpath = 1;
185 continue; 170 } else if (strcmp(key, "vol") == 0) {
186 }
187 else if (strcmp(key, "vol") == 0) {
188 if (!want_arg(&value, "vol")) 171 if (!want_arg(&value, "vol"))
189 return -EINVAL; 172 return -EINVAL;
190 *devname = value; 173 *devname = value;
191 continue; 174 } else if (strcmp(key, "cell") == 0) {
192 }
193 else if (strcmp(key, "cell") == 0) {
194 if (!want_arg(&value, "cell")) 175 if (!want_arg(&value, "cell"))
195 return -EINVAL; 176 return -EINVAL;
196 afs_put_cell(params->default_cell); 177 cell = afs_cell_lookup(value, strlen(value));
197 ret = afs_cell_lookup(value, 178 if (IS_ERR(cell))
198 strlen(value), 179 return PTR_ERR(cell);
199 &params->default_cell); 180 afs_put_cell(params->cell);
200 if (ret < 0) 181 params->cell = cell;
201 return -EINVAL; 182 } else {
202 continue; 183 printk("kAFS: Unknown mount option: '%s'\n", key);
184 ret = -EINVAL;
185 goto error;
203 } 186 }
204
205 printk("kAFS: Unknown mount option: '%s'\n", key);
206 ret = -EINVAL;
207 goto error;
208 } 187 }
209 188
210 ret = 0; 189 ret = 0;
211 190error:
212 error:
213 _leave(" = %d", ret); 191 _leave(" = %d", ret);
214 return ret; 192 return ret;
215} /* end afs_super_parse_options() */ 193}
194
195/*
196 * parse a device name to get cell name, volume name, volume type and R/W
197 * selector
198 * - this can be one of the following:
199 * "%[cell:]volume[.]" R/W volume
200 * "#[cell:]volume[.]" R/O or R/W volume (rwpath=0),
201 * or R/W (rwpath=1) volume
202 * "%[cell:]volume.readonly" R/O volume
203 * "#[cell:]volume.readonly" R/O volume
204 * "%[cell:]volume.backup" Backup volume
205 * "#[cell:]volume.backup" Backup volume
206 */
207static int afs_parse_device_name(struct afs_mount_params *params,
208 const char *name)
209{
210 struct afs_cell *cell;
211 const char *cellname, *suffix;
212 int cellnamesz;
213
214 _enter(",%s", name);
215
216 if (!name) {
217 printk(KERN_ERR "kAFS: no volume name specified\n");
218 return -EINVAL;
219 }
220
221 if ((name[0] != '%' && name[0] != '#') || !name[1]) {
222 printk(KERN_ERR "kAFS: unparsable volume name\n");
223 return -EINVAL;
224 }
225
226 /* determine the type of volume we're looking for */
227 params->type = AFSVL_ROVOL;
228 params->force = false;
229 if (params->rwpath || name[0] == '%') {
230 params->type = AFSVL_RWVOL;
231 params->force = true;
232 }
233 name++;
234
235 /* split the cell name out if there is one */
236 params->volname = strchr(name, ':');
237 if (params->volname) {
238 cellname = name;
239 cellnamesz = params->volname - name;
240 params->volname++;
241 } else {
242 params->volname = name;
243 cellname = NULL;
244 cellnamesz = 0;
245 }
246
247 /* the volume type is further affected by a possible suffix */
248 suffix = strrchr(params->volname, '.');
249 if (suffix) {
250 if (strcmp(suffix, ".readonly") == 0) {
251 params->type = AFSVL_ROVOL;
252 params->force = true;
253 } else if (strcmp(suffix, ".backup") == 0) {
254 params->type = AFSVL_BACKVOL;
255 params->force = true;
256 } else if (suffix[1] == 0) {
257 } else {
258 suffix = NULL;
259 }
260 }
261
262 params->volnamesz = suffix ?
263 suffix - params->volname : strlen(params->volname);
264
265 _debug("cell %*.*s [%p]",
266 cellnamesz, cellnamesz, cellname ?: "", params->cell);
267
268 /* lookup the cell record */
269 if (cellname || !params->cell) {
270 cell = afs_cell_lookup(cellname, cellnamesz);
271 if (IS_ERR(cell)) {
272 printk(KERN_ERR "kAFS: unable to lookup cell '%s'\n",
273 cellname ?: "");
274 return PTR_ERR(cell);
275 }
276 afs_put_cell(params->cell);
277 params->cell = cell;
278 }
279
280 _debug("CELL:%s [%p] VOLUME:%*.*s SUFFIX:%s TYPE:%d%s",
281 params->cell->name, params->cell,
282 params->volnamesz, params->volnamesz, params->volname,
283 suffix ?: "-", params->type, params->force ? " FORCE" : "");
284
285 return 0;
286}
216 287
217/*****************************************************************************/
218/* 288/*
219 * check a superblock to see if it's the one we're looking for 289 * check a superblock to see if it's the one we're looking for
220 */ 290 */
@@ -224,13 +294,12 @@ static int afs_test_super(struct super_block *sb, void *data)
224 struct afs_super_info *as = sb->s_fs_info; 294 struct afs_super_info *as = sb->s_fs_info;
225 295
226 return as->volume == params->volume; 296 return as->volume == params->volume;
227} /* end afs_test_super() */ 297}
228 298
229/*****************************************************************************/
230/* 299/*
231 * fill in the superblock 300 * fill in the superblock
232 */ 301 */
233static int afs_fill_super(struct super_block *sb, void *data, int silent) 302static int afs_fill_super(struct super_block *sb, void *data)
234{ 303{
235 struct afs_mount_params *params = data; 304 struct afs_mount_params *params = data;
236 struct afs_super_info *as = NULL; 305 struct afs_super_info *as = NULL;
@@ -239,7 +308,7 @@ static int afs_fill_super(struct super_block *sb, void *data, int silent)
239 struct inode *inode = NULL; 308 struct inode *inode = NULL;
240 int ret; 309 int ret;
241 310
242 kenter(""); 311 _enter("");
243 312
244 /* allocate a superblock info record */ 313 /* allocate a superblock info record */
245 as = kzalloc(sizeof(struct afs_super_info), GFP_KERNEL); 314 as = kzalloc(sizeof(struct afs_super_info), GFP_KERNEL);
@@ -262,9 +331,9 @@ static int afs_fill_super(struct super_block *sb, void *data, int silent)
262 fid.vid = as->volume->vid; 331 fid.vid = as->volume->vid;
263 fid.vnode = 1; 332 fid.vnode = 1;
264 fid.unique = 1; 333 fid.unique = 1;
265 ret = afs_iget(sb, &fid, &inode); 334 inode = afs_iget(sb, params->key, &fid, NULL, NULL);
266 if (ret < 0) 335 if (IS_ERR(inode))
267 goto error; 336 goto error_inode;
268 337
269 ret = -ENOMEM; 338 ret = -ENOMEM;
270 root = d_alloc_root(inode); 339 root = d_alloc_root(inode);
@@ -273,21 +342,23 @@ static int afs_fill_super(struct super_block *sb, void *data, int silent)
273 342
274 sb->s_root = root; 343 sb->s_root = root;
275 344
276 kleave(" = 0"); 345 _leave(" = 0");
277 return 0; 346 return 0;
278 347
279 error: 348error_inode:
349 ret = PTR_ERR(inode);
350 inode = NULL;
351error:
280 iput(inode); 352 iput(inode);
281 afs_put_volume(as->volume); 353 afs_put_volume(as->volume);
282 kfree(as); 354 kfree(as);
283 355
284 sb->s_fs_info = NULL; 356 sb->s_fs_info = NULL;
285 357
286 kleave(" = %d", ret); 358 _leave(" = %d", ret);
287 return ret; 359 return ret;
288} /* end afs_fill_super() */ 360}
289 361
290/*****************************************************************************/
291/* 362/*
292 * get an AFS superblock 363 * get an AFS superblock
293 * - TODO: don't use get_sb_nodev(), but rather call sget() directly 364 * - TODO: don't use get_sb_nodev(), but rather call sget() directly
@@ -300,69 +371,80 @@ static int afs_get_sb(struct file_system_type *fs_type,
300{ 371{
301 struct afs_mount_params params; 372 struct afs_mount_params params;
302 struct super_block *sb; 373 struct super_block *sb;
374 struct afs_volume *vol;
375 struct key *key;
303 int ret; 376 int ret;
304 377
305 _enter(",,%s,%p", dev_name, options); 378 _enter(",,%s,%p", dev_name, options);
306 379
307 memset(&params, 0, sizeof(params)); 380 memset(&params, 0, sizeof(params));
308 381
309 /* start the cache manager */ 382 /* parse the options and device name */
310 ret = afscm_start();
311 if (ret < 0) {
312 _leave(" = %d", ret);
313 return ret;
314 }
315
316 /* parse the options */
317 if (options) { 383 if (options) {
318 ret = afs_super_parse_options(&params, options, &dev_name); 384 ret = afs_parse_options(&params, options, &dev_name);
319 if (ret < 0) 385 if (ret < 0)
320 goto error; 386 goto error;
321 if (!dev_name) {
322 printk("kAFS: no volume name specified\n");
323 ret = -EINVAL;
324 goto error;
325 }
326 } 387 }
327 388
328 /* parse the device name */ 389
329 ret = afs_volume_lookup(dev_name, 390 ret = afs_parse_device_name(&params, dev_name);
330 params.default_cell,
331 params.rwpath,
332 &params.volume);
333 if (ret < 0) 391 if (ret < 0)
334 goto error; 392 goto error;
335 393
336 /* allocate a deviceless superblock */ 394 /* try and do the mount securely */
337 sb = sget(fs_type, afs_test_super, set_anon_super, &params); 395 key = afs_request_key(params.cell);
338 if (IS_ERR(sb)) 396 if (IS_ERR(key)) {
397 _leave(" = %ld [key]", PTR_ERR(key));
398 ret = PTR_ERR(key);
339 goto error; 399 goto error;
400 }
401 params.key = key;
340 402
341 sb->s_flags = flags; 403 /* parse the device name */
404 vol = afs_volume_lookup(&params);
405 if (IS_ERR(vol)) {
406 ret = PTR_ERR(vol);
407 goto error;
408 }
409 params.volume = vol;
342 410
343 ret = afs_fill_super(sb, &params, flags & MS_SILENT ? 1 : 0); 411 /* allocate a deviceless superblock */
344 if (ret < 0) { 412 sb = sget(fs_type, afs_test_super, set_anon_super, &params);
345 up_write(&sb->s_umount); 413 if (IS_ERR(sb)) {
346 deactivate_super(sb); 414 ret = PTR_ERR(sb);
347 goto error; 415 goto error;
348 } 416 }
349 sb->s_flags |= MS_ACTIVE;
350 simple_set_mnt(mnt, sb);
351 417
418 if (!sb->s_root) {
419 /* initial superblock/root creation */
420 _debug("create");
421 sb->s_flags = flags;
422 ret = afs_fill_super(sb, &params);
423 if (ret < 0) {
424 up_write(&sb->s_umount);
425 deactivate_super(sb);
426 goto error;
427 }
428 sb->s_flags |= MS_ACTIVE;
429 } else {
430 _debug("reuse");
431 ASSERTCMP(sb->s_flags, &, MS_ACTIVE);
432 }
433
434 simple_set_mnt(mnt, sb);
352 afs_put_volume(params.volume); 435 afs_put_volume(params.volume);
353 afs_put_cell(params.default_cell); 436 afs_put_cell(params.cell);
354 _leave(" = 0 [%p]", 0, sb); 437 _leave(" = 0 [%p]", sb);
355 return 0; 438 return 0;
356 439
357 error: 440error:
358 afs_put_volume(params.volume); 441 afs_put_volume(params.volume);
359 afs_put_cell(params.default_cell); 442 afs_put_cell(params.cell);
360 afscm_stop(); 443 key_put(params.key);
361 _leave(" = %d", ret); 444 _leave(" = %d", ret);
362 return ret; 445 return ret;
363} /* end afs_get_sb() */ 446}
364 447
365/*****************************************************************************/
366/* 448/*
367 * finish the unmounting process on the superblock 449 * finish the unmounting process on the superblock
368 */ 450 */
@@ -373,35 +455,30 @@ static void afs_put_super(struct super_block *sb)
373 _enter(""); 455 _enter("");
374 456
375 afs_put_volume(as->volume); 457 afs_put_volume(as->volume);
376 afscm_stop();
377 458
378 _leave(""); 459 _leave("");
379} /* end afs_put_super() */ 460}
380 461
381/*****************************************************************************/
382/* 462/*
383 * initialise an inode cache slab element prior to any use 463 * initialise an inode cache slab element prior to any use
384 */ 464 */
385static void afs_i_init_once(void *_vnode, struct kmem_cache *cachep, 465static void afs_i_init_once(void *_vnode, struct kmem_cache *cachep,
386 unsigned long flags) 466 unsigned long flags)
387{ 467{
388 struct afs_vnode *vnode = (struct afs_vnode *) _vnode; 468 struct afs_vnode *vnode = _vnode;
389 469
390 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == 470 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
391 SLAB_CTOR_CONSTRUCTOR) { 471 SLAB_CTOR_CONSTRUCTOR) {
392 memset(vnode, 0, sizeof(*vnode)); 472 memset(vnode, 0, sizeof(*vnode));
393 inode_init_once(&vnode->vfs_inode); 473 inode_init_once(&vnode->vfs_inode);
394 init_waitqueue_head(&vnode->update_waitq); 474 init_waitqueue_head(&vnode->update_waitq);
475 mutex_init(&vnode->permits_lock);
476 mutex_init(&vnode->validate_lock);
395 spin_lock_init(&vnode->lock); 477 spin_lock_init(&vnode->lock);
396 INIT_LIST_HEAD(&vnode->cb_link); 478 INIT_WORK(&vnode->cb_broken_work, afs_broken_callback_work);
397 INIT_LIST_HEAD(&vnode->cb_hash_link);
398 afs_timer_init(&vnode->cb_timeout,
399 &afs_vnode_cb_timed_out_ops);
400 } 479 }
480}
401 481
402} /* end afs_i_init_once() */
403
404/*****************************************************************************/
405/* 482/*
406 * allocate an AFS inode struct from our slab cache 483 * allocate an AFS inode struct from our slab cache
407 */ 484 */
@@ -409,8 +486,7 @@ static struct inode *afs_alloc_inode(struct super_block *sb)
409{ 486{
410 struct afs_vnode *vnode; 487 struct afs_vnode *vnode;
411 488
412 vnode = (struct afs_vnode *) 489 vnode = kmem_cache_alloc(afs_inode_cachep, GFP_KERNEL);
413 kmem_cache_alloc(afs_inode_cachep, GFP_KERNEL);
414 if (!vnode) 490 if (!vnode)
415 return NULL; 491 return NULL;
416 492
@@ -421,21 +497,25 @@ static struct inode *afs_alloc_inode(struct super_block *sb)
421 497
422 vnode->volume = NULL; 498 vnode->volume = NULL;
423 vnode->update_cnt = 0; 499 vnode->update_cnt = 0;
424 vnode->flags = 0; 500 vnode->flags = 1 << AFS_VNODE_UNSET;
501 vnode->cb_promised = false;
425 502
426 return &vnode->vfs_inode; 503 return &vnode->vfs_inode;
427} /* end afs_alloc_inode() */ 504}
428 505
429/*****************************************************************************/
430/* 506/*
431 * destroy an AFS inode struct 507 * destroy an AFS inode struct
432 */ 508 */
433static void afs_destroy_inode(struct inode *inode) 509static void afs_destroy_inode(struct inode *inode)
434{ 510{
511 struct afs_vnode *vnode = AFS_FS_I(inode);
512
435 _enter("{%lu}", inode->i_ino); 513 _enter("{%lu}", inode->i_ino);
436 514
437 kmem_cache_free(afs_inode_cachep, AFS_FS_I(inode)); 515 _debug("DESTROY INODE %p", inode);
438 516
439 atomic_dec(&afs_count_active_inodes); 517 ASSERTCMP(vnode->server, ==, NULL);
440 518
441} /* end afs_destroy_inode() */ 519 kmem_cache_free(afs_inode_cachep, vnode);
520 atomic_dec(&afs_count_active_inodes);
521}
diff --git a/fs/afs/super.h b/fs/afs/super.h
deleted file mode 100644
index 32de8cc6fae8..000000000000
--- a/fs/afs/super.h
+++ /dev/null
@@ -1,45 +0,0 @@
1/* super.h: AFS filesystem internal private data
2 *
3 * Copyright (c) 2002 Red Hat, Inc. All rights reserved.
4 *
5 * This software may be freely redistributed under the terms of the
6 * GNU General Public License.
7 *
8 * You should have received a copy of the GNU General Public License
9 * along with this program; if not, write to the Free Software
10 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
11 *
12 * Authors: David Woodhouse <dwmw2@cambridge.redhat.com>
13 * David Howells <dhowells@redhat.com>
14 *
15 */
16
17#ifndef _LINUX_AFS_SUPER_H
18#define _LINUX_AFS_SUPER_H
19
20#include <linux/fs.h>
21#include "server.h"
22
23#ifdef __KERNEL__
24
25/*****************************************************************************/
26/*
27 * AFS superblock private data
28 * - there's one superblock per volume
29 */
30struct afs_super_info
31{
32 struct afs_volume *volume; /* volume record */
33 char rwparent; /* T if parent is R/W AFS volume */
34};
35
36static inline struct afs_super_info *AFS_FS_S(struct super_block *sb)
37{
38 return sb->s_fs_info;
39}
40
41extern struct file_system_type afs_fs_type;
42
43#endif /* __KERNEL__ */
44
45#endif /* _LINUX_AFS_SUPER_H */
diff --git a/fs/afs/transport.h b/fs/afs/transport.h
deleted file mode 100644
index 7013ae6ccc8c..000000000000
--- a/fs/afs/transport.h
+++ /dev/null
@@ -1,21 +0,0 @@
1/* transport.h: AFS transport management
2 *
3 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#ifndef _LINUX_AFS_TRANSPORT_H
13#define _LINUX_AFS_TRANSPORT_H
14
15#include "types.h"
16#include <rxrpc/transport.h>
17
18/* the cache manager transport endpoint */
19extern struct rxrpc_transport *afs_transport;
20
21#endif /* _LINUX_AFS_TRANSPORT_H */
diff --git a/fs/afs/types.h b/fs/afs/types.h
deleted file mode 100644
index b1a2367c7587..000000000000
--- a/fs/afs/types.h
+++ /dev/null
@@ -1,125 +0,0 @@
1/* types.h: AFS types
2 *
3 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#ifndef _LINUX_AFS_TYPES_H
13#define _LINUX_AFS_TYPES_H
14
15#ifdef __KERNEL__
16#include <rxrpc/types.h>
17#endif /* __KERNEL__ */
18
19typedef unsigned afs_volid_t;
20typedef unsigned afs_vnodeid_t;
21typedef unsigned long long afs_dataversion_t;
22
23typedef enum {
24 AFSVL_RWVOL, /* read/write volume */
25 AFSVL_ROVOL, /* read-only volume */
26 AFSVL_BACKVOL, /* backup volume */
27} __attribute__((packed)) afs_voltype_t;
28
29typedef enum {
30 AFS_FTYPE_INVALID = 0,
31 AFS_FTYPE_FILE = 1,
32 AFS_FTYPE_DIR = 2,
33 AFS_FTYPE_SYMLINK = 3,
34} afs_file_type_t;
35
36#ifdef __KERNEL__
37
38struct afs_cell;
39struct afs_vnode;
40
41/*****************************************************************************/
42/*
43 * AFS file identifier
44 */
45struct afs_fid
46{
47 afs_volid_t vid; /* volume ID */
48 afs_vnodeid_t vnode; /* file index within volume */
49 unsigned unique; /* unique ID number (file index version) */
50};
51
52/*****************************************************************************/
53/*
54 * AFS callback notification
55 */
56typedef enum {
57 AFSCM_CB_UNTYPED = 0, /* no type set on CB break */
58 AFSCM_CB_EXCLUSIVE = 1, /* CB exclusive to CM [not implemented] */
59 AFSCM_CB_SHARED = 2, /* CB shared by other CM's */
60 AFSCM_CB_DROPPED = 3, /* CB promise cancelled by file server */
61} afs_callback_type_t;
62
63struct afs_callback
64{
65 struct afs_server *server; /* server that made the promise */
66 struct afs_fid fid; /* file identifier */
67 unsigned version; /* callback version */
68 unsigned expiry; /* time at which expires */
69 afs_callback_type_t type; /* type of callback */
70};
71
72#define AFSCBMAX 50
73
74/*****************************************************************************/
75/*
76 * AFS volume information
77 */
78struct afs_volume_info
79{
80 afs_volid_t vid; /* volume ID */
81 afs_voltype_t type; /* type of this volume */
82 afs_volid_t type_vids[5]; /* volume ID's for possible types for this vol */
83
84 /* list of fileservers serving this volume */
85 size_t nservers; /* number of entries used in servers[] */
86 struct {
87 struct in_addr addr; /* fileserver address */
88 } servers[8];
89};
90
91/*****************************************************************************/
92/*
93 * AFS file status information
94 */
95struct afs_file_status
96{
97 unsigned if_version; /* interface version */
98#define AFS_FSTATUS_VERSION 1
99
100 afs_file_type_t type; /* file type */
101 unsigned nlink; /* link count */
102 size_t size; /* file size */
103 afs_dataversion_t version; /* current data version */
104 unsigned author; /* author ID */
105 unsigned owner; /* owner ID */
106 unsigned caller_access; /* access rights for authenticated caller */
107 unsigned anon_access; /* access rights for unauthenticated caller */
108 umode_t mode; /* UNIX mode */
109 struct afs_fid parent; /* parent file ID */
110 time_t mtime_client; /* last time client changed data */
111 time_t mtime_server; /* last time server changed data */
112};
113
114/*****************************************************************************/
115/*
116 * AFS volume synchronisation information
117 */
118struct afs_volsync
119{
120 time_t creation; /* volume creation time */
121};
122
123#endif /* __KERNEL__ */
124
125#endif /* _LINUX_AFS_TYPES_H */
diff --git a/fs/afs/use-rtnetlink.c b/fs/afs/use-rtnetlink.c
new file mode 100644
index 000000000000..82f0daa28970
--- /dev/null
+++ b/fs/afs/use-rtnetlink.c
@@ -0,0 +1,473 @@
1/* RTNETLINK client
2 *
3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11#include <linux/netlink.h>
12#include <linux/rtnetlink.h>
13#include <linux/if_addr.h>
14#include <linux/if_arp.h>
15#include <linux/inetdevice.h>
16#include <net/netlink.h>
17#include "internal.h"
18
19struct afs_rtm_desc {
20 struct socket *nlsock;
21 struct afs_interface *bufs;
22 u8 *mac;
23 size_t nbufs;
24 size_t maxbufs;
25 void *data;
26 ssize_t datalen;
27 size_t datamax;
28 int msg_seq;
29 unsigned mac_index;
30 bool wantloopback;
31 int (*parse)(struct afs_rtm_desc *, struct nlmsghdr *);
32};
33
34/*
35 * parse an RTM_GETADDR response
36 */
37static int afs_rtm_getaddr_parse(struct afs_rtm_desc *desc,
38 struct nlmsghdr *nlhdr)
39{
40 struct afs_interface *this;
41 struct ifaddrmsg *ifa;
42 struct rtattr *rtattr;
43 const char *name;
44 size_t len;
45
46 ifa = (struct ifaddrmsg *) NLMSG_DATA(nlhdr);
47
48 _enter("{ix=%d,af=%d}", ifa->ifa_index, ifa->ifa_family);
49
50 if (ifa->ifa_family != AF_INET) {
51 _leave(" = 0 [family %d]", ifa->ifa_family);
52 return 0;
53 }
54 if (desc->nbufs >= desc->maxbufs) {
55 _leave(" = 0 [max %zu/%zu]", desc->nbufs, desc->maxbufs);
56 return 0;
57 }
58
59 this = &desc->bufs[desc->nbufs];
60
61 this->index = ifa->ifa_index;
62 this->netmask.s_addr = inet_make_mask(ifa->ifa_prefixlen);
63 this->mtu = 0;
64
65 rtattr = NLMSG_DATA(nlhdr) + NLMSG_ALIGN(sizeof(struct ifaddrmsg));
66 len = NLMSG_PAYLOAD(nlhdr, sizeof(struct ifaddrmsg));
67
68 name = "unknown";
69 for (; RTA_OK(rtattr, len); rtattr = RTA_NEXT(rtattr, len)) {
70 switch (rtattr->rta_type) {
71 case IFA_ADDRESS:
72 memcpy(&this->address, RTA_DATA(rtattr), 4);
73 break;
74 case IFA_LABEL:
75 name = RTA_DATA(rtattr);
76 break;
77 }
78 }
79
80 _debug("%s: "NIPQUAD_FMT"/"NIPQUAD_FMT,
81 name, NIPQUAD(this->address), NIPQUAD(this->netmask));
82
83 desc->nbufs++;
84 _leave(" = 0");
85 return 0;
86}
87
88/*
89 * parse an RTM_GETLINK response for MTUs
90 */
91static int afs_rtm_getlink_if_parse(struct afs_rtm_desc *desc,
92 struct nlmsghdr *nlhdr)
93{
94 struct afs_interface *this;
95 struct ifinfomsg *ifi;
96 struct rtattr *rtattr;
97 const char *name;
98 size_t len, loop;
99
100 ifi = (struct ifinfomsg *) NLMSG_DATA(nlhdr);
101
102 _enter("{ix=%d}", ifi->ifi_index);
103
104 for (loop = 0; loop < desc->nbufs; loop++) {
105 this = &desc->bufs[loop];
106 if (this->index == ifi->ifi_index)
107 goto found;
108 }
109
110 _leave(" = 0 [no match]");
111 return 0;
112
113found:
114 if (ifi->ifi_type == ARPHRD_LOOPBACK && !desc->wantloopback) {
115 _leave(" = 0 [loopback]");
116 return 0;
117 }
118
119 rtattr = NLMSG_DATA(nlhdr) + NLMSG_ALIGN(sizeof(struct ifinfomsg));
120 len = NLMSG_PAYLOAD(nlhdr, sizeof(struct ifinfomsg));
121
122 name = "unknown";
123 for (; RTA_OK(rtattr, len); rtattr = RTA_NEXT(rtattr, len)) {
124 switch (rtattr->rta_type) {
125 case IFLA_MTU:
126 memcpy(&this->mtu, RTA_DATA(rtattr), 4);
127 break;
128 case IFLA_IFNAME:
129 name = RTA_DATA(rtattr);
130 break;
131 }
132 }
133
134 _debug("%s: "NIPQUAD_FMT"/"NIPQUAD_FMT" mtu %u",
135 name, NIPQUAD(this->address), NIPQUAD(this->netmask),
136 this->mtu);
137
138 _leave(" = 0");
139 return 0;
140}
141
142/*
143 * parse an RTM_GETLINK response for the MAC address belonging to the lowest
144 * non-internal interface
145 */
146static int afs_rtm_getlink_mac_parse(struct afs_rtm_desc *desc,
147 struct nlmsghdr *nlhdr)
148{
149 struct ifinfomsg *ifi;
150 struct rtattr *rtattr;
151 const char *name;
152 size_t remain, len;
153 bool set;
154
155 ifi = (struct ifinfomsg *) NLMSG_DATA(nlhdr);
156
157 _enter("{ix=%d}", ifi->ifi_index);
158
159 if (ifi->ifi_index >= desc->mac_index) {
160 _leave(" = 0 [high]");
161 return 0;
162 }
163 if (ifi->ifi_type == ARPHRD_LOOPBACK) {
164 _leave(" = 0 [loopback]");
165 return 0;
166 }
167
168 rtattr = NLMSG_DATA(nlhdr) + NLMSG_ALIGN(sizeof(struct ifinfomsg));
169 remain = NLMSG_PAYLOAD(nlhdr, sizeof(struct ifinfomsg));
170
171 name = "unknown";
172 set = false;
173 for (; RTA_OK(rtattr, remain); rtattr = RTA_NEXT(rtattr, remain)) {
174 switch (rtattr->rta_type) {
175 case IFLA_ADDRESS:
176 len = RTA_PAYLOAD(rtattr);
177 memcpy(desc->mac, RTA_DATA(rtattr),
178 min_t(size_t, len, 6));
179 desc->mac_index = ifi->ifi_index;
180 set = true;
181 break;
182 case IFLA_IFNAME:
183 name = RTA_DATA(rtattr);
184 break;
185 }
186 }
187
188 if (set)
189 _debug("%s: %02x:%02x:%02x:%02x:%02x:%02x",
190 name,
191 desc->mac[0], desc->mac[1], desc->mac[2],
192 desc->mac[3], desc->mac[4], desc->mac[5]);
193
194 _leave(" = 0");
195 return 0;
196}
197
198/*
199 * read the rtnetlink response and pass to parsing routine
200 */
201static int afs_read_rtm(struct afs_rtm_desc *desc)
202{
203 struct nlmsghdr *nlhdr, tmphdr;
204 struct msghdr msg;
205 struct kvec iov[1];
206 void *data;
207 bool last = false;
208 int len, ret, remain;
209
210 _enter("");
211
212 do {
213 /* first of all peek to see how big the packet is */
214 memset(&msg, 0, sizeof(msg));
215 iov[0].iov_base = &tmphdr;
216 iov[0].iov_len = sizeof(tmphdr);
217 len = kernel_recvmsg(desc->nlsock, &msg, iov, 1,
218 sizeof(tmphdr), MSG_PEEK | MSG_TRUNC);
219 if (len < 0) {
220 _leave(" = %d [peek]", len);
221 return len;
222 }
223 if (len == 0)
224 continue;
225 if (len < sizeof(tmphdr) || len < NLMSG_PAYLOAD(&tmphdr, 0)) {
226 _leave(" = -EMSGSIZE");
227 return -EMSGSIZE;
228 }
229
230 if (desc->datamax < len) {
231 kfree(desc->data);
232 desc->data = NULL;
233 data = kmalloc(len, GFP_KERNEL);
234 if (!data)
235 return -ENOMEM;
236 desc->data = data;
237 }
238 desc->datamax = len;
239
240 /* read all the data from this packet */
241 iov[0].iov_base = desc->data;
242 iov[0].iov_len = desc->datamax;
243 desc->datalen = kernel_recvmsg(desc->nlsock, &msg, iov, 1,
244 desc->datamax, 0);
245 if (desc->datalen < 0) {
246 _leave(" = %ld [recv]", desc->datalen);
247 return desc->datalen;
248 }
249
250 nlhdr = desc->data;
251
252 /* check if the header is valid */
253 if (!NLMSG_OK(nlhdr, desc->datalen) ||
254 nlhdr->nlmsg_type == NLMSG_ERROR) {
255 _leave(" = -EIO");
256 return -EIO;
257 }
258
259 /* see if this is the last message */
260 if (nlhdr->nlmsg_type == NLMSG_DONE ||
261 !(nlhdr->nlmsg_flags & NLM_F_MULTI))
262 last = true;
263
264 /* parse the bits we got this time */
265 nlmsg_for_each_msg(nlhdr, desc->data, desc->datalen, remain) {
266 ret = desc->parse(desc, nlhdr);
267 if (ret < 0) {
268 _leave(" = %d [parse]", ret);
269 return ret;
270 }
271 }
272
273 } while (!last);
274
275 _leave(" = 0");
276 return 0;
277}
278
279/*
280 * list the interface bound addresses to get the address and netmask
281 */
282static int afs_rtm_getaddr(struct afs_rtm_desc *desc)
283{
284 struct msghdr msg;
285 struct kvec iov[1];
286 int ret;
287
288 struct {
289 struct nlmsghdr nl_msg __attribute__((aligned(NLMSG_ALIGNTO)));
290 struct ifaddrmsg addr_msg __attribute__((aligned(NLMSG_ALIGNTO)));
291 } request;
292
293 _enter("");
294
295 memset(&request, 0, sizeof(request));
296
297 request.nl_msg.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifaddrmsg));
298 request.nl_msg.nlmsg_type = RTM_GETADDR;
299 request.nl_msg.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
300 request.nl_msg.nlmsg_seq = desc->msg_seq++;
301 request.nl_msg.nlmsg_pid = 0;
302
303 memset(&msg, 0, sizeof(msg));
304 iov[0].iov_base = &request;
305 iov[0].iov_len = sizeof(request);
306
307 ret = kernel_sendmsg(desc->nlsock, &msg, iov, 1, iov[0].iov_len);
308 _leave(" = %d", ret);
309 return ret;
310}
311
312/*
313 * list the interface link statuses to get the MTUs
314 */
315static int afs_rtm_getlink(struct afs_rtm_desc *desc)
316{
317 struct msghdr msg;
318 struct kvec iov[1];
319 int ret;
320
321 struct {
322 struct nlmsghdr nl_msg __attribute__((aligned(NLMSG_ALIGNTO)));
323 struct ifinfomsg link_msg __attribute__((aligned(NLMSG_ALIGNTO)));
324 } request;
325
326 _enter("");
327
328 memset(&request, 0, sizeof(request));
329
330 request.nl_msg.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
331 request.nl_msg.nlmsg_type = RTM_GETLINK;
332 request.nl_msg.nlmsg_flags = NLM_F_REQUEST | NLM_F_ROOT;
333 request.nl_msg.nlmsg_seq = desc->msg_seq++;
334 request.nl_msg.nlmsg_pid = 0;
335
336 memset(&msg, 0, sizeof(msg));
337 iov[0].iov_base = &request;
338 iov[0].iov_len = sizeof(request);
339
340 ret = kernel_sendmsg(desc->nlsock, &msg, iov, 1, iov[0].iov_len);
341 _leave(" = %d", ret);
342 return ret;
343}
344
345/*
346 * cull any interface records for which there isn't an MTU value
347 */
348static void afs_cull_interfaces(struct afs_rtm_desc *desc)
349{
350 struct afs_interface *bufs = desc->bufs;
351 size_t nbufs = desc->nbufs;
352 int loop, point = 0;
353
354 _enter("{%zu}", nbufs);
355
356 for (loop = 0; loop < nbufs; loop++) {
357 if (desc->bufs[loop].mtu != 0) {
358 if (loop != point) {
359 ASSERTCMP(loop, >, point);
360 bufs[point] = bufs[loop];
361 }
362 point++;
363 }
364 }
365
366 desc->nbufs = point;
367 _leave(" [%zu/%zu]", desc->nbufs, nbufs);
368}
369
370/*
371 * get a list of this system's interface IPv4 addresses, netmasks and MTUs
372 * - returns the number of interface records in the buffer
373 */
374int afs_get_ipv4_interfaces(struct afs_interface *bufs, size_t maxbufs,
375 bool wantloopback)
376{
377 struct afs_rtm_desc desc;
378 int ret, loop;
379
380 _enter("");
381
382 memset(&desc, 0, sizeof(desc));
383 desc.bufs = bufs;
384 desc.maxbufs = maxbufs;
385 desc.wantloopback = wantloopback;
386
387 ret = sock_create_kern(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE,
388 &desc.nlsock);
389 if (ret < 0) {
390 _leave(" = %d [sock]", ret);
391 return ret;
392 }
393
394 /* issue RTM_GETADDR */
395 desc.parse = afs_rtm_getaddr_parse;
396 ret = afs_rtm_getaddr(&desc);
397 if (ret < 0)
398 goto error;
399 ret = afs_read_rtm(&desc);
400 if (ret < 0)
401 goto error;
402
403 /* issue RTM_GETLINK */
404 desc.parse = afs_rtm_getlink_if_parse;
405 ret = afs_rtm_getlink(&desc);
406 if (ret < 0)
407 goto error;
408 ret = afs_read_rtm(&desc);
409 if (ret < 0)
410 goto error;
411
412 afs_cull_interfaces(&desc);
413 ret = desc.nbufs;
414
415 for (loop = 0; loop < ret; loop++)
416 _debug("[%d] "NIPQUAD_FMT"/"NIPQUAD_FMT" mtu %u",
417 bufs[loop].index,
418 NIPQUAD(bufs[loop].address),
419 NIPQUAD(bufs[loop].netmask),
420 bufs[loop].mtu);
421
422error:
423 kfree(desc.data);
424 sock_release(desc.nlsock);
425 _leave(" = %d", ret);
426 return ret;
427}
428
429/*
430 * get a MAC address from a random ethernet interface that has a real one
431 * - the buffer should be 6 bytes in size
432 */
433int afs_get_MAC_address(u8 mac[6])
434{
435 struct afs_rtm_desc desc;
436 int ret;
437
438 _enter("");
439
440 memset(&desc, 0, sizeof(desc));
441 desc.mac = mac;
442 desc.mac_index = UINT_MAX;
443
444 ret = sock_create_kern(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE,
445 &desc.nlsock);
446 if (ret < 0) {
447 _leave(" = %d [sock]", ret);
448 return ret;
449 }
450
451 /* issue RTM_GETLINK */
452 desc.parse = afs_rtm_getlink_mac_parse;
453 ret = afs_rtm_getlink(&desc);
454 if (ret < 0)
455 goto error;
456 ret = afs_read_rtm(&desc);
457 if (ret < 0)
458 goto error;
459
460 if (desc.mac_index < UINT_MAX) {
461 /* got a MAC address */
462 _debug("[%d] %02x:%02x:%02x:%02x:%02x:%02x",
463 desc.mac_index,
464 mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]);
465 } else {
466 ret = -ENONET;
467 }
468
469error:
470 sock_release(desc.nlsock);
471 _leave(" = %d", ret);
472 return ret;
473}
diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c
index 7b0e3192ee39..36c1306e09e0 100644
--- a/fs/afs/vlclient.c
+++ b/fs/afs/vlclient.c
@@ -1,4 +1,4 @@
1/* vlclient.c: AFS Volume Location Service client 1/* AFS Volume Location Service client
2 * 2 *
3 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved. 3 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com) 4 * Written by David Howells (dhowells@redhat.com)
@@ -11,247 +11,76 @@
11 11
12#include <linux/init.h> 12#include <linux/init.h>
13#include <linux/sched.h> 13#include <linux/sched.h>
14#include <rxrpc/rxrpc.h>
15#include <rxrpc/transport.h>
16#include <rxrpc/connection.h>
17#include <rxrpc/call.h>
18#include "server.h"
19#include "volume.h"
20#include "vlclient.h"
21#include "kafsasyncd.h"
22#include "kafstimod.h"
23#include "errors.h"
24#include "internal.h" 14#include "internal.h"
25 15
26#define VLGETENTRYBYID 503 /* AFS Get Cache Entry By ID operation ID */
27#define VLGETENTRYBYNAME 504 /* AFS Get Cache Entry By Name operation ID */
28#define VLPROBE 514 /* AFS Probe Volume Location Service operation ID */
29
30static void afs_rxvl_get_entry_by_id_attn(struct rxrpc_call *call);
31static void afs_rxvl_get_entry_by_id_error(struct rxrpc_call *call);
32
33/*****************************************************************************/
34/* 16/*
35 * map afs VL abort codes to/from Linux error codes 17 * map volume locator abort codes to error codes
36 * - called with call->lock held
37 */ 18 */
38static void afs_rxvl_aemap(struct rxrpc_call *call) 19static int afs_vl_abort_to_error(u32 abort_code)
39{ 20{
40 int err; 21 _enter("%u", abort_code);
41 22
42 _enter("{%u,%u,%d}", 23 switch (abort_code) {
43 call->app_err_state, call->app_abort_code, call->app_errno); 24 case AFSVL_IDEXIST: return -EEXIST;
44 25 case AFSVL_IO: return -EREMOTEIO;
45 switch (call->app_err_state) { 26 case AFSVL_NAMEEXIST: return -EEXIST;
46 case RXRPC_ESTATE_LOCAL_ABORT: 27 case AFSVL_CREATEFAIL: return -EREMOTEIO;
47 call->app_abort_code = -call->app_errno; 28 case AFSVL_NOENT: return -ENOMEDIUM;
48 return; 29 case AFSVL_EMPTY: return -ENOMEDIUM;
49 30 case AFSVL_ENTDELETED: return -ENOMEDIUM;
50 case RXRPC_ESTATE_PEER_ABORT: 31 case AFSVL_BADNAME: return -EINVAL;
51 switch (call->app_abort_code) { 32 case AFSVL_BADINDEX: return -EINVAL;
52 case AFSVL_IDEXIST: err = -EEXIST; break; 33 case AFSVL_BADVOLTYPE: return -EINVAL;
53 case AFSVL_IO: err = -EREMOTEIO; break; 34 case AFSVL_BADSERVER: return -EINVAL;
54 case AFSVL_NAMEEXIST: err = -EEXIST; break; 35 case AFSVL_BADPARTITION: return -EINVAL;
55 case AFSVL_CREATEFAIL: err = -EREMOTEIO; break; 36 case AFSVL_REPSFULL: return -EFBIG;
56 case AFSVL_NOENT: err = -ENOMEDIUM; break; 37 case AFSVL_NOREPSERVER: return -ENOENT;
57 case AFSVL_EMPTY: err = -ENOMEDIUM; break; 38 case AFSVL_DUPREPSERVER: return -EEXIST;
58 case AFSVL_ENTDELETED: err = -ENOMEDIUM; break; 39 case AFSVL_RWNOTFOUND: return -ENOENT;
59 case AFSVL_BADNAME: err = -EINVAL; break; 40 case AFSVL_BADREFCOUNT: return -EINVAL;
60 case AFSVL_BADINDEX: err = -EINVAL; break; 41 case AFSVL_SIZEEXCEEDED: return -EINVAL;
61 case AFSVL_BADVOLTYPE: err = -EINVAL; break; 42 case AFSVL_BADENTRY: return -EINVAL;
62 case AFSVL_BADSERVER: err = -EINVAL; break; 43 case AFSVL_BADVOLIDBUMP: return -EINVAL;
63 case AFSVL_BADPARTITION: err = -EINVAL; break; 44 case AFSVL_IDALREADYHASHED: return -EINVAL;
64 case AFSVL_REPSFULL: err = -EFBIG; break; 45 case AFSVL_ENTRYLOCKED: return -EBUSY;
65 case AFSVL_NOREPSERVER: err = -ENOENT; break; 46 case AFSVL_BADVOLOPER: return -EBADRQC;
66 case AFSVL_DUPREPSERVER: err = -EEXIST; break; 47 case AFSVL_BADRELLOCKTYPE: return -EINVAL;
67 case AFSVL_RWNOTFOUND: err = -ENOENT; break; 48 case AFSVL_RERELEASE: return -EREMOTEIO;
68 case AFSVL_BADREFCOUNT: err = -EINVAL; break; 49 case AFSVL_BADSERVERFLAG: return -EINVAL;
69 case AFSVL_SIZEEXCEEDED: err = -EINVAL; break; 50 case AFSVL_PERM: return -EACCES;
70 case AFSVL_BADENTRY: err = -EINVAL; break; 51 case AFSVL_NOMEM: return -EREMOTEIO;
71 case AFSVL_BADVOLIDBUMP: err = -EINVAL; break;
72 case AFSVL_IDALREADYHASHED: err = -EINVAL; break;
73 case AFSVL_ENTRYLOCKED: err = -EBUSY; break;
74 case AFSVL_BADVOLOPER: err = -EBADRQC; break;
75 case AFSVL_BADRELLOCKTYPE: err = -EINVAL; break;
76 case AFSVL_RERELEASE: err = -EREMOTEIO; break;
77 case AFSVL_BADSERVERFLAG: err = -EINVAL; break;
78 case AFSVL_PERM: err = -EACCES; break;
79 case AFSVL_NOMEM: err = -EREMOTEIO; break;
80 default:
81 err = afs_abort_to_error(call->app_abort_code);
82 break;
83 }
84 call->app_errno = err;
85 return;
86
87 default: 52 default:
88 return; 53 return afs_abort_to_error(abort_code);
89 } 54 }
90} /* end afs_rxvl_aemap() */ 55}
91 56
92#if 0
93/*****************************************************************************/
94/* 57/*
95 * probe a volume location server to see if it is still alive -- unused 58 * deliver reply data to a VL.GetEntryByXXX call
96 */ 59 */
97static int afs_rxvl_probe(struct afs_server *server, int alloc_flags) 60static int afs_deliver_vl_get_entry_by_xxx(struct afs_call *call,
61 struct sk_buff *skb, bool last)
98{ 62{
99 struct rxrpc_connection *conn; 63 struct afs_cache_vlocation *entry;
100 struct rxrpc_call *call; 64 __be32 *bp;
101 struct kvec piov[1]; 65 u32 tmp;
102 size_t sent; 66 int loop;
103 int ret;
104 __be32 param[1];
105
106 DECLARE_WAITQUEUE(myself, current);
107
108 /* get hold of the vlserver connection */
109 ret = afs_server_get_vlconn(server, &conn);
110 if (ret < 0)
111 goto out;
112
113 /* create a call through that connection */
114 ret = rxrpc_create_call(conn, NULL, NULL, afs_rxvl_aemap, &call);
115 if (ret < 0) {
116 printk("kAFS: Unable to create call: %d\n", ret);
117 goto out_put_conn;
118 }
119 call->app_opcode = VLPROBE;
120
121 /* we want to get event notifications from the call */
122 add_wait_queue(&call->waitq, &myself);
123
124 /* marshall the parameters */
125 param[0] = htonl(VLPROBE);
126 piov[0].iov_len = sizeof(param);
127 piov[0].iov_base = param;
128
129 /* send the parameters to the server */
130 ret = rxrpc_call_write_data(call, 1, piov, RXRPC_LAST_PACKET,
131 alloc_flags, 0, &sent);
132 if (ret < 0)
133 goto abort;
134
135 /* wait for the reply to completely arrive */
136 for (;;) {
137 set_current_state(TASK_INTERRUPTIBLE);
138 if (call->app_call_state != RXRPC_CSTATE_CLNT_RCV_REPLY ||
139 signal_pending(current))
140 break;
141 schedule();
142 }
143 set_current_state(TASK_RUNNING);
144
145 ret = -EINTR;
146 if (signal_pending(current))
147 goto abort;
148
149 switch (call->app_call_state) {
150 case RXRPC_CSTATE_ERROR:
151 ret = call->app_errno;
152 goto out_unwait;
153
154 case RXRPC_CSTATE_CLNT_GOT_REPLY:
155 ret = 0;
156 goto out_unwait;
157
158 default:
159 BUG();
160 }
161
162 abort:
163 set_current_state(TASK_UNINTERRUPTIBLE);
164 rxrpc_call_abort(call, ret);
165 schedule();
166
167 out_unwait:
168 set_current_state(TASK_RUNNING);
169 remove_wait_queue(&call->waitq, &myself);
170 rxrpc_put_call(call);
171 out_put_conn:
172 rxrpc_put_connection(conn);
173 out:
174 return ret;
175 67
176} /* end afs_rxvl_probe() */ 68 _enter(",,%u", last);
177#endif
178 69
179/*****************************************************************************/ 70 afs_transfer_reply(call, skb);
180/* 71 if (!last)
181 * look up a volume location database entry by name 72 return 0;
182 */
183int afs_rxvl_get_entry_by_name(struct afs_server *server,
184 const char *volname,
185 unsigned volnamesz,
186 struct afs_cache_vlocation *entry)
187{
188 DECLARE_WAITQUEUE(myself, current);
189
190 struct rxrpc_connection *conn;
191 struct rxrpc_call *call;
192 struct kvec piov[3];
193 unsigned tmp;
194 size_t sent;
195 int ret, loop;
196 __be32 *bp, param[2], zero;
197
198 _enter(",%*.*s,%u,", volnamesz, volnamesz, volname, volnamesz);
199
200 memset(entry, 0, sizeof(*entry));
201
202 /* get hold of the vlserver connection */
203 ret = afs_server_get_vlconn(server, &conn);
204 if (ret < 0)
205 goto out;
206
207 /* create a call through that connection */
208 ret = rxrpc_create_call(conn, NULL, NULL, afs_rxvl_aemap, &call);
209 if (ret < 0) {
210 printk("kAFS: Unable to create call: %d\n", ret);
211 goto out_put_conn;
212 }
213 call->app_opcode = VLGETENTRYBYNAME;
214 73
215 /* we want to get event notifications from the call */ 74 if (call->reply_size != call->reply_max)
216 add_wait_queue(&call->waitq, &myself); 75 return -EBADMSG;
217 76
218 /* marshall the parameters */ 77 /* unmarshall the reply once we've received all of it */
219 piov[1].iov_len = volnamesz; 78 entry = call->reply;
220 piov[1].iov_base = (char *) volname; 79 bp = call->buffer;
221
222 zero = 0;
223 piov[2].iov_len = (4 - (piov[1].iov_len & 3)) & 3;
224 piov[2].iov_base = &zero;
225
226 param[0] = htonl(VLGETENTRYBYNAME);
227 param[1] = htonl(piov[1].iov_len);
228
229 piov[0].iov_len = sizeof(param);
230 piov[0].iov_base = param;
231
232 /* send the parameters to the server */
233 ret = rxrpc_call_write_data(call, 3, piov, RXRPC_LAST_PACKET, GFP_NOFS,
234 0, &sent);
235 if (ret < 0)
236 goto abort;
237
238 /* wait for the reply to completely arrive */
239 bp = rxrpc_call_alloc_scratch(call, 384);
240
241 ret = rxrpc_call_read_data(call, bp, 384,
242 RXRPC_CALL_READ_BLOCK |
243 RXRPC_CALL_READ_ALL);
244 if (ret < 0) {
245 if (ret == -ECONNABORTED) {
246 ret = call->app_errno;
247 goto out_unwait;
248 }
249 goto abort;
250 }
251 80
252 /* unmarshall the reply */
253 for (loop = 0; loop < 64; loop++) 81 for (loop = 0; loop < 64; loop++)
254 entry->name[loop] = ntohl(*bp++); 82 entry->name[loop] = ntohl(*bp++);
83 entry->name[loop] = 0;
255 bp++; /* final NUL */ 84 bp++; /* final NUL */
256 85
257 bp++; /* type */ 86 bp++; /* type */
@@ -264,6 +93,7 @@ int afs_rxvl_get_entry_by_name(struct afs_server *server,
264 93
265 for (loop = 0; loop < 8; loop++) { 94 for (loop = 0; loop < 8; loop++) {
266 tmp = ntohl(*bp++); 95 tmp = ntohl(*bp++);
96 entry->srvtmask[loop] = 0;
267 if (tmp & AFS_VLSF_RWVOL) 97 if (tmp & AFS_VLSF_RWVOL)
268 entry->srvtmask[loop] |= AFS_VOL_VTM_RW; 98 entry->srvtmask[loop] |= AFS_VOL_VTM_RW;
269 if (tmp & AFS_VLSF_ROVOL) 99 if (tmp & AFS_VLSF_ROVOL)
@@ -279,417 +109,110 @@ int afs_rxvl_get_entry_by_name(struct afs_server *server,
279 bp++; /* clone ID */ 109 bp++; /* clone ID */
280 110
281 tmp = ntohl(*bp++); /* flags */ 111 tmp = ntohl(*bp++); /* flags */
112 entry->vidmask = 0;
282 if (tmp & AFS_VLF_RWEXISTS) 113 if (tmp & AFS_VLF_RWEXISTS)
283 entry->vidmask |= AFS_VOL_VTM_RW; 114 entry->vidmask |= AFS_VOL_VTM_RW;
284 if (tmp & AFS_VLF_ROEXISTS) 115 if (tmp & AFS_VLF_ROEXISTS)
285 entry->vidmask |= AFS_VOL_VTM_RO; 116 entry->vidmask |= AFS_VOL_VTM_RO;
286 if (tmp & AFS_VLF_BACKEXISTS) 117 if (tmp & AFS_VLF_BACKEXISTS)
287 entry->vidmask |= AFS_VOL_VTM_BAK; 118 entry->vidmask |= AFS_VOL_VTM_BAK;
288
289 ret = -ENOMEDIUM;
290 if (!entry->vidmask) 119 if (!entry->vidmask)
291 goto abort; 120 return -EBADMSG;
292
293 /* success */
294 entry->rtime = get_seconds();
295 ret = 0;
296
297 out_unwait:
298 set_current_state(TASK_RUNNING);
299 remove_wait_queue(&call->waitq, &myself);
300 rxrpc_put_call(call);
301 out_put_conn:
302 rxrpc_put_connection(conn);
303 out:
304 _leave(" = %d", ret);
305 return ret;
306
307 abort:
308 set_current_state(TASK_UNINTERRUPTIBLE);
309 rxrpc_call_abort(call, ret);
310 schedule();
311 goto out_unwait;
312} /* end afs_rxvl_get_entry_by_name() */
313
314/*****************************************************************************/
315/*
316 * look up a volume location database entry by ID
317 */
318int afs_rxvl_get_entry_by_id(struct afs_server *server,
319 afs_volid_t volid,
320 afs_voltype_t voltype,
321 struct afs_cache_vlocation *entry)
322{
323 DECLARE_WAITQUEUE(myself, current);
324
325 struct rxrpc_connection *conn;
326 struct rxrpc_call *call;
327 struct kvec piov[1];
328 unsigned tmp;
329 size_t sent;
330 int ret, loop;
331 __be32 *bp, param[3];
332
333 _enter(",%x,%d,", volid, voltype);
334
335 memset(entry, 0, sizeof(*entry));
336
337 /* get hold of the vlserver connection */
338 ret = afs_server_get_vlconn(server, &conn);
339 if (ret < 0)
340 goto out;
341
342 /* create a call through that connection */
343 ret = rxrpc_create_call(conn, NULL, NULL, afs_rxvl_aemap, &call);
344 if (ret < 0) {
345 printk("kAFS: Unable to create call: %d\n", ret);
346 goto out_put_conn;
347 }
348 call->app_opcode = VLGETENTRYBYID;
349
350 /* we want to get event notifications from the call */
351 add_wait_queue(&call->waitq, &myself);
352
353 /* marshall the parameters */
354 param[0] = htonl(VLGETENTRYBYID);
355 param[1] = htonl(volid);
356 param[2] = htonl(voltype);
357
358 piov[0].iov_len = sizeof(param);
359 piov[0].iov_base = param;
360
361 /* send the parameters to the server */
362 ret = rxrpc_call_write_data(call, 1, piov, RXRPC_LAST_PACKET, GFP_NOFS,
363 0, &sent);
364 if (ret < 0)
365 goto abort;
366
367 /* wait for the reply to completely arrive */
368 bp = rxrpc_call_alloc_scratch(call, 384);
369
370 ret = rxrpc_call_read_data(call, bp, 384,
371 RXRPC_CALL_READ_BLOCK |
372 RXRPC_CALL_READ_ALL);
373 if (ret < 0) {
374 if (ret == -ECONNABORTED) {
375 ret = call->app_errno;
376 goto out_unwait;
377 }
378 goto abort;
379 }
380
381 /* unmarshall the reply */
382 for (loop = 0; loop < 64; loop++)
383 entry->name[loop] = ntohl(*bp++);
384 bp++; /* final NUL */
385 121
386 bp++; /* type */ 122 _leave(" = 0 [done]");
387 entry->nservers = ntohl(*bp++); 123 return 0;
388 124}
389 for (loop = 0; loop < 8; loop++)
390 entry->servers[loop].s_addr = *bp++;
391
392 bp += 8; /* partition IDs */
393 125
394 for (loop = 0; loop < 8; loop++) {
395 tmp = ntohl(*bp++);
396 if (tmp & AFS_VLSF_RWVOL)
397 entry->srvtmask[loop] |= AFS_VOL_VTM_RW;
398 if (tmp & AFS_VLSF_ROVOL)
399 entry->srvtmask[loop] |= AFS_VOL_VTM_RO;
400 if (tmp & AFS_VLSF_BACKVOL)
401 entry->srvtmask[loop] |= AFS_VOL_VTM_BAK;
402 }
403
404 entry->vid[0] = ntohl(*bp++);
405 entry->vid[1] = ntohl(*bp++);
406 entry->vid[2] = ntohl(*bp++);
407
408 bp++; /* clone ID */
409
410 tmp = ntohl(*bp++); /* flags */
411 if (tmp & AFS_VLF_RWEXISTS)
412 entry->vidmask |= AFS_VOL_VTM_RW;
413 if (tmp & AFS_VLF_ROEXISTS)
414 entry->vidmask |= AFS_VOL_VTM_RO;
415 if (tmp & AFS_VLF_BACKEXISTS)
416 entry->vidmask |= AFS_VOL_VTM_BAK;
417
418 ret = -ENOMEDIUM;
419 if (!entry->vidmask)
420 goto abort;
421
422#if 0 /* TODO: remove */
423 entry->nservers = 3;
424 entry->servers[0].s_addr = htonl(0xac101249);
425 entry->servers[1].s_addr = htonl(0xac101243);
426 entry->servers[2].s_addr = htonl(0xac10125b /*0xac10125b*/);
427
428 entry->srvtmask[0] = AFS_VOL_VTM_RO;
429 entry->srvtmask[1] = AFS_VOL_VTM_RO;
430 entry->srvtmask[2] = AFS_VOL_VTM_RO | AFS_VOL_VTM_RW;
431#endif
432
433 /* success */
434 entry->rtime = get_seconds();
435 ret = 0;
436
437 out_unwait:
438 set_current_state(TASK_RUNNING);
439 remove_wait_queue(&call->waitq, &myself);
440 rxrpc_put_call(call);
441 out_put_conn:
442 rxrpc_put_connection(conn);
443 out:
444 _leave(" = %d", ret);
445 return ret;
446
447 abort:
448 set_current_state(TASK_UNINTERRUPTIBLE);
449 rxrpc_call_abort(call, ret);
450 schedule();
451 goto out_unwait;
452} /* end afs_rxvl_get_entry_by_id() */
453
454/*****************************************************************************/
455/* 126/*
456 * look up a volume location database entry by ID asynchronously 127 * VL.GetEntryByName operation type
457 */ 128 */
458int afs_rxvl_get_entry_by_id_async(struct afs_async_op *op, 129static const struct afs_call_type afs_RXVLGetEntryByName = {
459 afs_volid_t volid, 130 .name = "VL.GetEntryByName",
460 afs_voltype_t voltype) 131 .deliver = afs_deliver_vl_get_entry_by_xxx,
461{ 132 .abort_to_error = afs_vl_abort_to_error,
462 struct rxrpc_connection *conn; 133 .destructor = afs_flat_call_destructor,
463 struct rxrpc_call *call; 134};
464 struct kvec piov[1];
465 size_t sent;
466 int ret;
467 __be32 param[3];
468
469 _enter(",%x,%d,", volid, voltype);
470
471 /* get hold of the vlserver connection */
472 ret = afs_server_get_vlconn(op->server, &conn);
473 if (ret < 0) {
474 _leave(" = %d", ret);
475 return ret;
476 }
477
478 /* create a call through that connection */
479 ret = rxrpc_create_call(conn,
480 afs_rxvl_get_entry_by_id_attn,
481 afs_rxvl_get_entry_by_id_error,
482 afs_rxvl_aemap,
483 &op->call);
484 rxrpc_put_connection(conn);
485
486 if (ret < 0) {
487 printk("kAFS: Unable to create call: %d\n", ret);
488 _leave(" = %d", ret);
489 return ret;
490 }
491 135
492 op->call->app_opcode = VLGETENTRYBYID; 136/*
493 op->call->app_user = op; 137 * VL.GetEntryById operation type
494 138 */
495 call = op->call; 139static const struct afs_call_type afs_RXVLGetEntryById = {
496 rxrpc_get_call(call); 140 .name = "VL.GetEntryById",
497 141 .deliver = afs_deliver_vl_get_entry_by_xxx,
498 /* send event notifications from the call to kafsasyncd */ 142 .abort_to_error = afs_vl_abort_to_error,
499 afs_kafsasyncd_begin_op(op); 143 .destructor = afs_flat_call_destructor,
500 144};
501 /* marshall the parameters */
502 param[0] = htonl(VLGETENTRYBYID);
503 param[1] = htonl(volid);
504 param[2] = htonl(voltype);
505
506 piov[0].iov_len = sizeof(param);
507 piov[0].iov_base = param;
508
509 /* allocate result read buffer in scratch space */
510 call->app_scr_ptr = rxrpc_call_alloc_scratch(op->call, 384);
511
512 /* send the parameters to the server */
513 ret = rxrpc_call_write_data(call, 1, piov, RXRPC_LAST_PACKET, GFP_NOFS,
514 0, &sent);
515 if (ret < 0) {
516 rxrpc_call_abort(call, ret); /* handle from kafsasyncd */
517 ret = 0;
518 goto out;
519 }
520
521 /* wait for the reply to completely arrive */
522 ret = rxrpc_call_read_data(call, call->app_scr_ptr, 384, 0);
523 switch (ret) {
524 case 0:
525 case -EAGAIN:
526 case -ECONNABORTED:
527 ret = 0;
528 break; /* all handled by kafsasyncd */
529
530 default:
531 rxrpc_call_abort(call, ret); /* make kafsasyncd handle it */
532 ret = 0;
533 break;
534 }
535
536 out:
537 rxrpc_put_call(call);
538 _leave(" = %d", ret);
539 return ret;
540
541} /* end afs_rxvl_get_entry_by_id_async() */
542 145
543/*****************************************************************************/
544/* 146/*
545 * attend to the asynchronous get VLDB entry by ID 147 * dispatch a get volume entry by name operation
546 */ 148 */
547int afs_rxvl_get_entry_by_id_async2(struct afs_async_op *op, 149int afs_vl_get_entry_by_name(struct in_addr *addr,
548 struct afs_cache_vlocation *entry) 150 struct key *key,
151 const char *volname,
152 struct afs_cache_vlocation *entry,
153 const struct afs_wait_mode *wait_mode)
549{ 154{
155 struct afs_call *call;
156 size_t volnamesz, reqsz, padsz;
550 __be32 *bp; 157 __be32 *bp;
551 __u32 tmp;
552 int loop, ret;
553
554 _enter("{op=%p cst=%u}", op, op->call->app_call_state);
555
556 memset(entry, 0, sizeof(*entry));
557
558 if (op->call->app_call_state == RXRPC_CSTATE_COMPLETE) {
559 /* operation finished */
560 afs_kafsasyncd_terminate_op(op);
561
562 bp = op->call->app_scr_ptr;
563
564 /* unmarshall the reply */
565 for (loop = 0; loop < 64; loop++)
566 entry->name[loop] = ntohl(*bp++);
567 bp++; /* final NUL */
568
569 bp++; /* type */
570 entry->nservers = ntohl(*bp++);
571
572 for (loop = 0; loop < 8; loop++)
573 entry->servers[loop].s_addr = *bp++;
574
575 bp += 8; /* partition IDs */
576
577 for (loop = 0; loop < 8; loop++) {
578 tmp = ntohl(*bp++);
579 if (tmp & AFS_VLSF_RWVOL)
580 entry->srvtmask[loop] |= AFS_VOL_VTM_RW;
581 if (tmp & AFS_VLSF_ROVOL)
582 entry->srvtmask[loop] |= AFS_VOL_VTM_RO;
583 if (tmp & AFS_VLSF_BACKVOL)
584 entry->srvtmask[loop] |= AFS_VOL_VTM_BAK;
585 }
586
587 entry->vid[0] = ntohl(*bp++);
588 entry->vid[1] = ntohl(*bp++);
589 entry->vid[2] = ntohl(*bp++);
590
591 bp++; /* clone ID */
592
593 tmp = ntohl(*bp++); /* flags */
594 if (tmp & AFS_VLF_RWEXISTS)
595 entry->vidmask |= AFS_VOL_VTM_RW;
596 if (tmp & AFS_VLF_ROEXISTS)
597 entry->vidmask |= AFS_VOL_VTM_RO;
598 if (tmp & AFS_VLF_BACKEXISTS)
599 entry->vidmask |= AFS_VOL_VTM_BAK;
600
601 ret = -ENOMEDIUM;
602 if (!entry->vidmask) {
603 rxrpc_call_abort(op->call, ret);
604 goto done;
605 }
606
607#if 0 /* TODO: remove */
608 entry->nservers = 3;
609 entry->servers[0].s_addr = htonl(0xac101249);
610 entry->servers[1].s_addr = htonl(0xac101243);
611 entry->servers[2].s_addr = htonl(0xac10125b /*0xac10125b*/);
612
613 entry->srvtmask[0] = AFS_VOL_VTM_RO;
614 entry->srvtmask[1] = AFS_VOL_VTM_RO;
615 entry->srvtmask[2] = AFS_VOL_VTM_RO | AFS_VOL_VTM_RW;
616#endif
617
618 /* success */
619 entry->rtime = get_seconds();
620 ret = 0;
621 goto done;
622 }
623 158
624 if (op->call->app_call_state == RXRPC_CSTATE_ERROR) { 159 _enter("");
625 /* operation error */
626 ret = op->call->app_errno;
627 goto done;
628 }
629 160
630 _leave(" = -EAGAIN"); 161 volnamesz = strlen(volname);
631 return -EAGAIN; 162 padsz = (4 - (volnamesz & 3)) & 3;
163 reqsz = 8 + volnamesz + padsz;
632 164
633 done: 165 call = afs_alloc_flat_call(&afs_RXVLGetEntryByName, reqsz, 384);
634 rxrpc_put_call(op->call); 166 if (!call)
635 op->call = NULL; 167 return -ENOMEM;
636 _leave(" = %d", ret);
637 return ret;
638} /* end afs_rxvl_get_entry_by_id_async2() */
639 168
640/*****************************************************************************/ 169 call->key = key;
641/* 170 call->reply = entry;
642 * handle attention events on an async get-entry-by-ID op 171 call->service_id = VL_SERVICE;
643 * - called from krxiod 172 call->port = htons(AFS_VL_PORT);
644 */
645static void afs_rxvl_get_entry_by_id_attn(struct rxrpc_call *call)
646{
647 struct afs_async_op *op = call->app_user;
648
649 _enter("{op=%p cst=%u}", op, call->app_call_state);
650
651 switch (call->app_call_state) {
652 case RXRPC_CSTATE_COMPLETE:
653 afs_kafsasyncd_attend_op(op);
654 break;
655 case RXRPC_CSTATE_CLNT_RCV_REPLY:
656 if (call->app_async_read)
657 break;
658 case RXRPC_CSTATE_CLNT_GOT_REPLY:
659 if (call->app_read_count == 0)
660 break;
661 printk("kAFS: Reply bigger than expected"
662 " {cst=%u asyn=%d mark=%Zu rdy=%Zu pr=%u%s}",
663 call->app_call_state,
664 call->app_async_read,
665 call->app_mark,
666 call->app_ready_qty,
667 call->pkt_rcv_count,
668 call->app_last_rcv ? " last" : "");
669
670 rxrpc_call_abort(call, -EBADMSG);
671 break;
672 default:
673 BUG();
674 }
675 173
676 _leave(""); 174 /* marshall the parameters */
175 bp = call->request;
176 *bp++ = htonl(VLGETENTRYBYNAME);
177 *bp++ = htonl(volnamesz);
178 memcpy(bp, volname, volnamesz);
179 if (padsz > 0)
180 memset((void *) bp + volnamesz, 0, padsz);
677 181
678} /* end afs_rxvl_get_entry_by_id_attn() */ 182 /* initiate the call */
183 return afs_make_call(addr, call, GFP_KERNEL, wait_mode);
184}
679 185
680/*****************************************************************************/
681/* 186/*
682 * handle error events on an async get-entry-by-ID op 187 * dispatch a get volume entry by ID operation
683 * - called from krxiod
684 */ 188 */
685static void afs_rxvl_get_entry_by_id_error(struct rxrpc_call *call) 189int afs_vl_get_entry_by_id(struct in_addr *addr,
190 struct key *key,
191 afs_volid_t volid,
192 afs_voltype_t voltype,
193 struct afs_cache_vlocation *entry,
194 const struct afs_wait_mode *wait_mode)
686{ 195{
687 struct afs_async_op *op = call->app_user; 196 struct afs_call *call;
197 __be32 *bp;
688 198
689 _enter("{op=%p cst=%u}", op, call->app_call_state); 199 _enter("");
690 200
691 afs_kafsasyncd_attend_op(op); 201 call = afs_alloc_flat_call(&afs_RXVLGetEntryById, 12, 384);
202 if (!call)
203 return -ENOMEM;
692 204
693 _leave(""); 205 call->key = key;
206 call->reply = entry;
207 call->service_id = VL_SERVICE;
208 call->port = htons(AFS_VL_PORT);
694 209
695} /* end afs_rxvl_get_entry_by_id_error() */ 210 /* marshall the parameters */
211 bp = call->request;
212 *bp++ = htonl(VLGETENTRYBYID);
213 *bp++ = htonl(volid);
214 *bp = htonl(voltype);
215
216 /* initiate the call */
217 return afs_make_call(addr, call, GFP_KERNEL, wait_mode);
218}
diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c
index 782ee7c600ca..74cce174882a 100644
--- a/fs/afs/vlocation.c
+++ b/fs/afs/vlocation.c
@@ -1,6 +1,6 @@
1/* vlocation.c: volume location management 1/* AFS volume location management
2 * 2 *
3 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved. 3 * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com) 4 * Written by David Howells (dhowells@redhat.com)
5 * 5 *
6 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
@@ -12,131 +12,61 @@
12#include <linux/kernel.h> 12#include <linux/kernel.h>
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/init.h> 14#include <linux/init.h>
15#include <linux/slab.h>
16#include <linux/fs.h>
17#include <linux/pagemap.h>
18#include "volume.h"
19#include "cell.h"
20#include "cmservice.h"
21#include "fsclient.h"
22#include "vlclient.h"
23#include "kafstimod.h"
24#include <rxrpc/connection.h>
25#include "internal.h" 15#include "internal.h"
26 16
27#define AFS_VLDB_TIMEOUT HZ*1000 17unsigned afs_vlocation_timeout = 10; /* volume location timeout in seconds */
18unsigned afs_vlocation_update_timeout = 10 * 60;
28 19
29static void afs_vlocation_update_timer(struct afs_timer *timer); 20static void afs_vlocation_reaper(struct work_struct *);
30static void afs_vlocation_update_attend(struct afs_async_op *op); 21static void afs_vlocation_updater(struct work_struct *);
31static void afs_vlocation_update_discard(struct afs_async_op *op);
32static void __afs_put_vlocation(struct afs_vlocation *vlocation);
33 22
34static void __afs_vlocation_timeout(struct afs_timer *timer) 23static LIST_HEAD(afs_vlocation_updates);
35{ 24static LIST_HEAD(afs_vlocation_graveyard);
36 struct afs_vlocation *vlocation = 25static DEFINE_SPINLOCK(afs_vlocation_updates_lock);
37 list_entry(timer, struct afs_vlocation, timeout); 26static DEFINE_SPINLOCK(afs_vlocation_graveyard_lock);
38 27static DECLARE_DELAYED_WORK(afs_vlocation_reap, afs_vlocation_reaper);
39 _debug("VL TIMEOUT [%s{u=%d}]", 28static DECLARE_DELAYED_WORK(afs_vlocation_update, afs_vlocation_updater);
40 vlocation->vldb.name, atomic_read(&vlocation->usage)); 29static struct workqueue_struct *afs_vlocation_update_worker;
41
42 afs_vlocation_do_timeout(vlocation);
43}
44
45static const struct afs_timer_ops afs_vlocation_timer_ops = {
46 .timed_out = __afs_vlocation_timeout,
47};
48 30
49static const struct afs_timer_ops afs_vlocation_update_timer_ops = {
50 .timed_out = afs_vlocation_update_timer,
51};
52
53static const struct afs_async_op_ops afs_vlocation_update_op_ops = {
54 .attend = afs_vlocation_update_attend,
55 .discard = afs_vlocation_update_discard,
56};
57
58static LIST_HEAD(afs_vlocation_update_pendq); /* queue of VLs awaiting update */
59static struct afs_vlocation *afs_vlocation_update; /* VL currently being updated */
60static DEFINE_SPINLOCK(afs_vlocation_update_lock); /* lock guarding update queue */
61
62#ifdef AFS_CACHING_SUPPORT
63static cachefs_match_val_t afs_vlocation_cache_match(void *target,
64 const void *entry);
65static void afs_vlocation_cache_update(void *source, void *entry);
66
67struct cachefs_index_def afs_vlocation_cache_index_def = {
68 .name = "vldb",
69 .data_size = sizeof(struct afs_cache_vlocation),
70 .keys[0] = { CACHEFS_INDEX_KEYS_ASCIIZ, 64 },
71 .match = afs_vlocation_cache_match,
72 .update = afs_vlocation_cache_update,
73};
74#endif
75
76/*****************************************************************************/
77/* 31/*
78 * iterate through the VL servers in a cell until one of them admits knowing 32 * iterate through the VL servers in a cell until one of them admits knowing
79 * about the volume in question 33 * about the volume in question
80 * - caller must have cell->vl_sem write-locked
81 */ 34 */
82static int afs_vlocation_access_vl_by_name(struct afs_vlocation *vlocation, 35static int afs_vlocation_access_vl_by_name(struct afs_vlocation *vl,
83 const char *name, 36 struct key *key,
84 unsigned namesz,
85 struct afs_cache_vlocation *vldb) 37 struct afs_cache_vlocation *vldb)
86{ 38{
87 struct afs_server *server = NULL; 39 struct afs_cell *cell = vl->cell;
88 struct afs_cell *cell = vlocation->cell; 40 struct in_addr addr;
89 int count, ret; 41 int count, ret;
90 42
91 _enter("%s,%*.*s,%u", cell->name, namesz, namesz, name, namesz); 43 _enter("%s,%s", cell->name, vl->vldb.name);
92 44
45 down_write(&vl->cell->vl_sem);
93 ret = -ENOMEDIUM; 46 ret = -ENOMEDIUM;
94 for (count = cell->vl_naddrs; count > 0; count--) { 47 for (count = cell->vl_naddrs; count > 0; count--) {
95 _debug("CellServ[%hu]: %08x", 48 addr = cell->vl_addrs[cell->vl_curr_svix];
96 cell->vl_curr_svix, 49
97 cell->vl_addrs[cell->vl_curr_svix].s_addr); 50 _debug("CellServ[%hu]: %08x", cell->vl_curr_svix, addr.s_addr);
98
99 /* try and create a server */
100 ret = afs_server_lookup(cell,
101 &cell->vl_addrs[cell->vl_curr_svix],
102 &server);
103 switch (ret) {
104 case 0:
105 break;
106 case -ENOMEM:
107 case -ENONET:
108 goto out;
109 default:
110 goto rotate;
111 }
112 51
113 /* attempt to access the VL server */ 52 /* attempt to access the VL server */
114 ret = afs_rxvl_get_entry_by_name(server, name, namesz, vldb); 53 ret = afs_vl_get_entry_by_name(&addr, key, vl->vldb.name, vldb,
54 &afs_sync_call);
115 switch (ret) { 55 switch (ret) {
116 case 0: 56 case 0:
117 afs_put_server(server);
118 goto out; 57 goto out;
119 case -ENOMEM: 58 case -ENOMEM:
120 case -ENONET: 59 case -ENONET:
121 case -ENETUNREACH: 60 case -ENETUNREACH:
122 case -EHOSTUNREACH: 61 case -EHOSTUNREACH:
123 case -ECONNREFUSED: 62 case -ECONNREFUSED:
124 down_write(&server->sem);
125 if (server->vlserver) {
126 rxrpc_put_connection(server->vlserver);
127 server->vlserver = NULL;
128 }
129 up_write(&server->sem);
130 afs_put_server(server);
131 if (ret == -ENOMEM || ret == -ENONET) 63 if (ret == -ENOMEM || ret == -ENONET)
132 goto out; 64 goto out;
133 goto rotate; 65 goto rotate;
134 case -ENOMEDIUM: 66 case -ENOMEDIUM:
135 afs_put_server(server);
136 goto out; 67 goto out;
137 default: 68 default:
138 afs_put_server(server); 69 ret = -EIO;
139 ret = -ENOMEDIUM;
140 goto rotate; 70 goto rotate;
141 } 71 }
142 72
@@ -146,76 +76,66 @@ static int afs_vlocation_access_vl_by_name(struct afs_vlocation *vlocation,
146 cell->vl_curr_svix %= cell->vl_naddrs; 76 cell->vl_curr_svix %= cell->vl_naddrs;
147 } 77 }
148 78
149 out: 79out:
80 up_write(&vl->cell->vl_sem);
150 _leave(" = %d", ret); 81 _leave(" = %d", ret);
151 return ret; 82 return ret;
83}
152 84
153} /* end afs_vlocation_access_vl_by_name() */
154
155/*****************************************************************************/
156/* 85/*
157 * iterate through the VL servers in a cell until one of them admits knowing 86 * iterate through the VL servers in a cell until one of them admits knowing
158 * about the volume in question 87 * about the volume in question
159 * - caller must have cell->vl_sem write-locked
160 */ 88 */
161static int afs_vlocation_access_vl_by_id(struct afs_vlocation *vlocation, 89static int afs_vlocation_access_vl_by_id(struct afs_vlocation *vl,
90 struct key *key,
162 afs_volid_t volid, 91 afs_volid_t volid,
163 afs_voltype_t voltype, 92 afs_voltype_t voltype,
164 struct afs_cache_vlocation *vldb) 93 struct afs_cache_vlocation *vldb)
165{ 94{
166 struct afs_server *server = NULL; 95 struct afs_cell *cell = vl->cell;
167 struct afs_cell *cell = vlocation->cell; 96 struct in_addr addr;
168 int count, ret; 97 int count, ret;
169 98
170 _enter("%s,%x,%d,", cell->name, volid, voltype); 99 _enter("%s,%x,%d,", cell->name, volid, voltype);
171 100
101 down_write(&vl->cell->vl_sem);
172 ret = -ENOMEDIUM; 102 ret = -ENOMEDIUM;
173 for (count = cell->vl_naddrs; count > 0; count--) { 103 for (count = cell->vl_naddrs; count > 0; count--) {
174 _debug("CellServ[%hu]: %08x", 104 addr = cell->vl_addrs[cell->vl_curr_svix];
175 cell->vl_curr_svix, 105
176 cell->vl_addrs[cell->vl_curr_svix].s_addr); 106 _debug("CellServ[%hu]: %08x", cell->vl_curr_svix, addr.s_addr);
177
178 /* try and create a server */
179 ret = afs_server_lookup(cell,
180 &cell->vl_addrs[cell->vl_curr_svix],
181 &server);
182 switch (ret) {
183 case 0:
184 break;
185 case -ENOMEM:
186 case -ENONET:
187 goto out;
188 default:
189 goto rotate;
190 }
191 107
192 /* attempt to access the VL server */ 108 /* attempt to access the VL server */
193 ret = afs_rxvl_get_entry_by_id(server, volid, voltype, vldb); 109 ret = afs_vl_get_entry_by_id(&addr, key, volid, voltype, vldb,
110 &afs_sync_call);
194 switch (ret) { 111 switch (ret) {
195 case 0: 112 case 0:
196 afs_put_server(server);
197 goto out; 113 goto out;
198 case -ENOMEM: 114 case -ENOMEM:
199 case -ENONET: 115 case -ENONET:
200 case -ENETUNREACH: 116 case -ENETUNREACH:
201 case -EHOSTUNREACH: 117 case -EHOSTUNREACH:
202 case -ECONNREFUSED: 118 case -ECONNREFUSED:
203 down_write(&server->sem);
204 if (server->vlserver) {
205 rxrpc_put_connection(server->vlserver);
206 server->vlserver = NULL;
207 }
208 up_write(&server->sem);
209 afs_put_server(server);
210 if (ret == -ENOMEM || ret == -ENONET) 119 if (ret == -ENOMEM || ret == -ENONET)
211 goto out; 120 goto out;
212 goto rotate; 121 goto rotate;
122 case -EBUSY:
123 vl->upd_busy_cnt++;
124 if (vl->upd_busy_cnt <= 3) {
125 if (vl->upd_busy_cnt > 1) {
126 /* second+ BUSY - sleep a little bit */
127 set_current_state(TASK_UNINTERRUPTIBLE);
128 schedule_timeout(1);
129 __set_current_state(TASK_RUNNING);
130 }
131 continue;
132 }
133 break;
213 case -ENOMEDIUM: 134 case -ENOMEDIUM:
214 afs_put_server(server); 135 vl->upd_rej_cnt++;
215 goto out; 136 goto rotate;
216 default: 137 default:
217 afs_put_server(server); 138 ret = -EIO;
218 ret = -ENOMEDIUM;
219 goto rotate; 139 goto rotate;
220 } 140 }
221 141
@@ -223,729 +143,580 @@ static int afs_vlocation_access_vl_by_id(struct afs_vlocation *vlocation,
223 rotate: 143 rotate:
224 cell->vl_curr_svix++; 144 cell->vl_curr_svix++;
225 cell->vl_curr_svix %= cell->vl_naddrs; 145 cell->vl_curr_svix %= cell->vl_naddrs;
146 vl->upd_busy_cnt = 0;
226 } 147 }
227 148
228 out: 149out:
150 if (ret < 0 && vl->upd_rej_cnt > 0) {
151 printk(KERN_NOTICE "kAFS:"
152 " Active volume no longer valid '%s'\n",
153 vl->vldb.name);
154 vl->valid = 0;
155 ret = -ENOMEDIUM;
156 }
157
158 up_write(&vl->cell->vl_sem);
229 _leave(" = %d", ret); 159 _leave(" = %d", ret);
230 return ret; 160 return ret;
161}
231 162
232} /* end afs_vlocation_access_vl_by_id() */
233
234/*****************************************************************************/
235/* 163/*
236 * lookup volume location 164 * allocate a volume location record
237 * - caller must have cell->vol_sem write-locked
238 * - iterate through the VL servers in a cell until one of them admits knowing
239 * about the volume in question
240 * - lookup in the local cache if not able to find on the VL server
241 * - insert/update in the local cache if did get a VL response
242 */ 165 */
243int afs_vlocation_lookup(struct afs_cell *cell, 166static struct afs_vlocation *afs_vlocation_alloc(struct afs_cell *cell,
244 const char *name, 167 const char *name,
245 unsigned namesz, 168 size_t namesz)
246 struct afs_vlocation **_vlocation)
247{ 169{
248 struct afs_cache_vlocation vldb; 170 struct afs_vlocation *vl;
249 struct afs_vlocation *vlocation; 171
250 afs_voltype_t voltype; 172 vl = kzalloc(sizeof(struct afs_vlocation), GFP_KERNEL);
251 afs_volid_t vid; 173 if (vl) {
252 int active = 0, ret; 174 vl->cell = cell;
253 175 vl->state = AFS_VL_NEW;
254 _enter("{%s},%*.*s,%u,", cell->name, namesz, namesz, name, namesz); 176 atomic_set(&vl->usage, 1);
255 177 INIT_LIST_HEAD(&vl->link);
256 if (namesz > sizeof(vlocation->vldb.name)) { 178 INIT_LIST_HEAD(&vl->grave);
257 _leave(" = -ENAMETOOLONG"); 179 INIT_LIST_HEAD(&vl->update);
258 return -ENAMETOOLONG; 180 init_waitqueue_head(&vl->waitq);
259 } 181 spin_lock_init(&vl->lock);
260 182 memcpy(vl->vldb.name, name, namesz);
261 /* search the cell's active list first */
262 list_for_each_entry(vlocation, &cell->vl_list, link) {
263 if (namesz < sizeof(vlocation->vldb.name) &&
264 vlocation->vldb.name[namesz] != '\0')
265 continue;
266
267 if (memcmp(vlocation->vldb.name, name, namesz) == 0)
268 goto found_in_memory;
269 }
270
271 /* search the cell's graveyard list second */
272 spin_lock(&cell->vl_gylock);
273 list_for_each_entry(vlocation, &cell->vl_graveyard, link) {
274 if (namesz < sizeof(vlocation->vldb.name) &&
275 vlocation->vldb.name[namesz] != '\0')
276 continue;
277
278 if (memcmp(vlocation->vldb.name, name, namesz) == 0)
279 goto found_in_graveyard;
280 }
281 spin_unlock(&cell->vl_gylock);
282
283 /* not in the cell's in-memory lists - create a new record */
284 vlocation = kzalloc(sizeof(struct afs_vlocation), GFP_KERNEL);
285 if (!vlocation)
286 return -ENOMEM;
287
288 atomic_set(&vlocation->usage, 1);
289 INIT_LIST_HEAD(&vlocation->link);
290 rwlock_init(&vlocation->lock);
291 memcpy(vlocation->vldb.name, name, namesz);
292
293 afs_timer_init(&vlocation->timeout, &afs_vlocation_timer_ops);
294 afs_timer_init(&vlocation->upd_timer, &afs_vlocation_update_timer_ops);
295 afs_async_op_init(&vlocation->upd_op, &afs_vlocation_update_op_ops);
296
297 afs_get_cell(cell);
298 vlocation->cell = cell;
299
300 list_add_tail(&vlocation->link, &cell->vl_list);
301
302#ifdef AFS_CACHING_SUPPORT
303 /* we want to store it in the cache, plus it might already be
304 * encached */
305 cachefs_acquire_cookie(cell->cache,
306 &afs_volume_cache_index_def,
307 vlocation,
308 &vlocation->cache);
309
310 if (vlocation->valid)
311 goto found_in_cache;
312#endif
313
314 /* try to look up an unknown volume in the cell VL databases by name */
315 ret = afs_vlocation_access_vl_by_name(vlocation, name, namesz, &vldb);
316 if (ret < 0) {
317 printk("kAFS: failed to locate '%*.*s' in cell '%s'\n",
318 namesz, namesz, name, cell->name);
319 goto error;
320 } 183 }
321 184
322 goto found_on_vlserver; 185 _leave(" = %p", vl);
323 186 return vl;
324 found_in_graveyard: 187}
325 /* found in the graveyard - resurrect */
326 _debug("found in graveyard");
327 atomic_inc(&vlocation->usage);
328 list_move_tail(&vlocation->link, &cell->vl_list);
329 spin_unlock(&cell->vl_gylock);
330
331 afs_kafstimod_del_timer(&vlocation->timeout);
332 goto active;
333
334 found_in_memory:
335 /* found in memory - check to see if it's active */
336 _debug("found in memory");
337 atomic_inc(&vlocation->usage);
338 188
339 active: 189/*
340 active = 1; 190 * update record if we found it in the cache
191 */
192static int afs_vlocation_update_record(struct afs_vlocation *vl,
193 struct key *key,
194 struct afs_cache_vlocation *vldb)
195{
196 afs_voltype_t voltype;
197 afs_volid_t vid;
198 int ret;
341 199
342#ifdef AFS_CACHING_SUPPORT
343 found_in_cache:
344#endif
345 /* try to look up a cached volume in the cell VL databases by ID */ 200 /* try to look up a cached volume in the cell VL databases by ID */
346 _debug("found in cache");
347
348 _debug("Locally Cached: %s %02x { %08x(%x) %08x(%x) %08x(%x) }", 201 _debug("Locally Cached: %s %02x { %08x(%x) %08x(%x) %08x(%x) }",
349 vlocation->vldb.name, 202 vl->vldb.name,
350 vlocation->vldb.vidmask, 203 vl->vldb.vidmask,
351 ntohl(vlocation->vldb.servers[0].s_addr), 204 ntohl(vl->vldb.servers[0].s_addr),
352 vlocation->vldb.srvtmask[0], 205 vl->vldb.srvtmask[0],
353 ntohl(vlocation->vldb.servers[1].s_addr), 206 ntohl(vl->vldb.servers[1].s_addr),
354 vlocation->vldb.srvtmask[1], 207 vl->vldb.srvtmask[1],
355 ntohl(vlocation->vldb.servers[2].s_addr), 208 ntohl(vl->vldb.servers[2].s_addr),
356 vlocation->vldb.srvtmask[2] 209 vl->vldb.srvtmask[2]);
357 );
358 210
359 _debug("Vids: %08x %08x %08x", 211 _debug("Vids: %08x %08x %08x",
360 vlocation->vldb.vid[0], 212 vl->vldb.vid[0],
361 vlocation->vldb.vid[1], 213 vl->vldb.vid[1],
362 vlocation->vldb.vid[2]); 214 vl->vldb.vid[2]);
363 215
364 if (vlocation->vldb.vidmask & AFS_VOL_VTM_RW) { 216 if (vl->vldb.vidmask & AFS_VOL_VTM_RW) {
365 vid = vlocation->vldb.vid[0]; 217 vid = vl->vldb.vid[0];
366 voltype = AFSVL_RWVOL; 218 voltype = AFSVL_RWVOL;
367 } 219 } else if (vl->vldb.vidmask & AFS_VOL_VTM_RO) {
368 else if (vlocation->vldb.vidmask & AFS_VOL_VTM_RO) { 220 vid = vl->vldb.vid[1];
369 vid = vlocation->vldb.vid[1];
370 voltype = AFSVL_ROVOL; 221 voltype = AFSVL_ROVOL;
371 } 222 } else if (vl->vldb.vidmask & AFS_VOL_VTM_BAK) {
372 else if (vlocation->vldb.vidmask & AFS_VOL_VTM_BAK) { 223 vid = vl->vldb.vid[2];
373 vid = vlocation->vldb.vid[2];
374 voltype = AFSVL_BACKVOL; 224 voltype = AFSVL_BACKVOL;
375 } 225 } else {
376 else {
377 BUG(); 226 BUG();
378 vid = 0; 227 vid = 0;
379 voltype = 0; 228 voltype = 0;
380 } 229 }
381 230
382 ret = afs_vlocation_access_vl_by_id(vlocation, vid, voltype, &vldb); 231 /* contact the server to make sure the volume is still available
232 * - TODO: need to handle disconnected operation here
233 */
234 ret = afs_vlocation_access_vl_by_id(vl, key, vid, voltype, vldb);
383 switch (ret) { 235 switch (ret) {
384 /* net error */ 236 /* net error */
385 default: 237 default:
386 printk("kAFS: failed to volume '%*.*s' (%x) up in '%s': %d\n", 238 printk(KERN_WARNING "kAFS:"
387 namesz, namesz, name, vid, cell->name, ret); 239 " failed to update volume '%s' (%x) up in '%s': %d\n",
388 goto error; 240 vl->vldb.name, vid, vl->cell->name, ret);
241 _leave(" = %d", ret);
242 return ret;
389 243
390 /* pulled from local cache into memory */ 244 /* pulled from local cache into memory */
391 case 0: 245 case 0:
392 goto found_on_vlserver; 246 _leave(" = 0");
247 return 0;
393 248
394 /* uh oh... looks like the volume got deleted */ 249 /* uh oh... looks like the volume got deleted */
395 case -ENOMEDIUM: 250 case -ENOMEDIUM:
396 printk("kAFS: volume '%*.*s' (%x) does not exist '%s'\n", 251 printk(KERN_ERR "kAFS:"
397 namesz, namesz, name, vid, cell->name); 252 " volume '%s' (%x) does not exist '%s'\n",
253 vl->vldb.name, vid, vl->cell->name);
398 254
399 /* TODO: make existing record unavailable */ 255 /* TODO: make existing record unavailable */
400 goto error; 256 _leave(" = %d", ret);
257 return ret;
401 } 258 }
259}
402 260
403 found_on_vlserver: 261/*
404 _debug("Done VL Lookup: %*.*s %02x { %08x(%x) %08x(%x) %08x(%x) }", 262 * apply the update to a VL record
405 namesz, namesz, name, 263 */
406 vldb.vidmask, 264static void afs_vlocation_apply_update(struct afs_vlocation *vl,
407 ntohl(vldb.servers[0].s_addr), vldb.srvtmask[0], 265 struct afs_cache_vlocation *vldb)
408 ntohl(vldb.servers[1].s_addr), vldb.srvtmask[1], 266{
409 ntohl(vldb.servers[2].s_addr), vldb.srvtmask[2] 267 _debug("Done VL Lookup: %s %02x { %08x(%x) %08x(%x) %08x(%x) }",
410 ); 268 vldb->name, vldb->vidmask,
411 269 ntohl(vldb->servers[0].s_addr), vldb->srvtmask[0],
412 _debug("Vids: %08x %08x %08x", vldb.vid[0], vldb.vid[1], vldb.vid[2]); 270 ntohl(vldb->servers[1].s_addr), vldb->srvtmask[1],
271 ntohl(vldb->servers[2].s_addr), vldb->srvtmask[2]);
413 272
414 if ((namesz < sizeof(vlocation->vldb.name) && 273 _debug("Vids: %08x %08x %08x",
415 vlocation->vldb.name[namesz] != '\0') || 274 vldb->vid[0], vldb->vid[1], vldb->vid[2]);
416 memcmp(vldb.name, name, namesz) != 0)
417 printk("kAFS: name of volume '%*.*s' changed to '%s' on server\n",
418 namesz, namesz, name, vldb.name);
419 275
420 memcpy(&vlocation->vldb, &vldb, sizeof(vlocation->vldb)); 276 if (strcmp(vldb->name, vl->vldb.name) != 0)
277 printk(KERN_NOTICE "kAFS:"
278 " name of volume '%s' changed to '%s' on server\n",
279 vl->vldb.name, vldb->name);
421 280
422 afs_kafstimod_add_timer(&vlocation->upd_timer, 10 * HZ); 281 vl->vldb = *vldb;
423 282
424#ifdef AFS_CACHING_SUPPORT 283#ifdef AFS_CACHING_SUPPORT
425 /* update volume entry in local cache */ 284 /* update volume entry in local cache */
426 cachefs_update_cookie(vlocation->cache); 285 cachefs_update_cookie(vl->cache);
427#endif
428
429 *_vlocation = vlocation;
430 _leave(" = 0 (%p)",vlocation);
431 return 0;
432
433 error:
434 if (vlocation) {
435 if (active) {
436 __afs_put_vlocation(vlocation);
437 }
438 else {
439 list_del(&vlocation->link);
440#ifdef AFS_CACHING_SUPPORT
441 cachefs_relinquish_cookie(vlocation->cache, 0);
442#endif 286#endif
443 afs_put_cell(vlocation->cell); 287}
444 kfree(vlocation);
445 }
446 }
447
448 _leave(" = %d", ret);
449 return ret;
450} /* end afs_vlocation_lookup() */
451 288
452/*****************************************************************************/
453/* 289/*
454 * finish using a volume location record 290 * fill in a volume location record, consulting the cache and the VL server
455 * - caller must have cell->vol_sem write-locked 291 * both
456 */ 292 */
457static void __afs_put_vlocation(struct afs_vlocation *vlocation) 293static int afs_vlocation_fill_in_record(struct afs_vlocation *vl,
294 struct key *key)
458{ 295{
459 struct afs_cell *cell; 296 struct afs_cache_vlocation vldb;
297 int ret;
460 298
461 if (!vlocation) 299 _enter("");
462 return;
463 300
464 _enter("%s", vlocation->vldb.name); 301 ASSERTCMP(vl->valid, ==, 0);
465 302
466 cell = vlocation->cell; 303 memset(&vldb, 0, sizeof(vldb));
467 304
468 /* sanity check */ 305 /* see if we have an in-cache copy (will set vl->valid if there is) */
469 BUG_ON(atomic_read(&vlocation->usage) <= 0); 306#ifdef AFS_CACHING_SUPPORT
307 cachefs_acquire_cookie(cell->cache,
308 &afs_volume_cache_index_def,
309 vlocation,
310 &vl->cache);
311#endif
470 312
471 spin_lock(&cell->vl_gylock); 313 if (vl->valid) {
472 if (likely(!atomic_dec_and_test(&vlocation->usage))) { 314 /* try to update a known volume in the cell VL databases by
473 spin_unlock(&cell->vl_gylock); 315 * ID as the name may have changed */
474 _leave(""); 316 _debug("found in cache");
475 return; 317 ret = afs_vlocation_update_record(vl, key, &vldb);
318 } else {
319 /* try to look up an unknown volume in the cell VL databases by
320 * name */
321 ret = afs_vlocation_access_vl_by_name(vl, key, &vldb);
322 if (ret < 0) {
323 printk("kAFS: failed to locate '%s' in cell '%s'\n",
324 vl->vldb.name, vl->cell->name);
325 return ret;
326 }
476 } 327 }
477 328
478 /* move to graveyard queue */ 329 afs_vlocation_apply_update(vl, &vldb);
479 list_move_tail(&vlocation->link,&cell->vl_graveyard); 330 _leave(" = 0");
480 331 return 0;
481 /* remove from pending timeout queue (refcounted if actually being 332}
482 * updated) */
483 list_del_init(&vlocation->upd_op.link);
484
485 /* time out in 10 secs */
486 afs_kafstimod_del_timer(&vlocation->upd_timer);
487 afs_kafstimod_add_timer(&vlocation->timeout, 10 * HZ);
488
489 spin_unlock(&cell->vl_gylock);
490
491 _leave(" [killed]");
492} /* end __afs_put_vlocation() */
493
494/*****************************************************************************/
495/*
496 * finish using a volume location record
497 */
498void afs_put_vlocation(struct afs_vlocation *vlocation)
499{
500 if (vlocation) {
501 struct afs_cell *cell = vlocation->cell;
502
503 down_write(&cell->vl_sem);
504 __afs_put_vlocation(vlocation);
505 up_write(&cell->vl_sem);
506 }
507} /* end afs_put_vlocation() */
508 333
509/*****************************************************************************/
510/* 334/*
511 * timeout vlocation record 335 * queue a vlocation record for updates
512 * - removes from the cell's graveyard if the usage count is zero
513 */ 336 */
514void afs_vlocation_do_timeout(struct afs_vlocation *vlocation) 337void afs_vlocation_queue_for_updates(struct afs_vlocation *vl)
515{ 338{
516 struct afs_cell *cell; 339 struct afs_vlocation *xvl;
517 340
518 _enter("%s", vlocation->vldb.name); 341 /* wait at least 10 minutes before updating... */
342 vl->update_at = get_seconds() + afs_vlocation_update_timeout;
519 343
520 cell = vlocation->cell; 344 spin_lock(&afs_vlocation_updates_lock);
521 345
522 BUG_ON(atomic_read(&vlocation->usage) < 0); 346 if (!list_empty(&afs_vlocation_updates)) {
523 347 /* ... but wait at least 1 second more than the newest record
524 /* remove from graveyard if still dead */ 348 * already queued so that we don't spam the VL server suddenly
525 spin_lock(&cell->vl_gylock); 349 * with lots of requests
526 if (atomic_read(&vlocation->usage) == 0) 350 */
527 list_del_init(&vlocation->link); 351 xvl = list_entry(afs_vlocation_updates.prev,
528 else 352 struct afs_vlocation, update);
529 vlocation = NULL; 353 if (vl->update_at <= xvl->update_at)
530 spin_unlock(&cell->vl_gylock); 354 vl->update_at = xvl->update_at + 1;
531 355 } else {
532 if (!vlocation) { 356 queue_delayed_work(afs_vlocation_update_worker,
533 _leave(""); 357 &afs_vlocation_update,
534 return; /* resurrected */ 358 afs_vlocation_update_timeout * HZ);
535 } 359 }
536 360
537 /* we can now destroy it properly */ 361 list_add_tail(&vl->update, &afs_vlocation_updates);
538#ifdef AFS_CACHING_SUPPORT 362 spin_unlock(&afs_vlocation_updates_lock);
539 cachefs_relinquish_cookie(vlocation->cache, 0); 363}
540#endif
541 afs_put_cell(cell);
542
543 kfree(vlocation);
544
545 _leave(" [destroyed]");
546} /* end afs_vlocation_do_timeout() */
547 364
548/*****************************************************************************/
549/* 365/*
550 * send an update operation to the currently selected server 366 * lookup volume location
367 * - iterate through the VL servers in a cell until one of them admits knowing
368 * about the volume in question
369 * - lookup in the local cache if not able to find on the VL server
370 * - insert/update in the local cache if did get a VL response
551 */ 371 */
552static int afs_vlocation_update_begin(struct afs_vlocation *vlocation) 372struct afs_vlocation *afs_vlocation_lookup(struct afs_cell *cell,
373 struct key *key,
374 const char *name,
375 size_t namesz)
553{ 376{
554 afs_voltype_t voltype; 377 struct afs_vlocation *vl;
555 afs_volid_t vid;
556 int ret; 378 int ret;
557 379
558 _enter("%s{ufs=%u ucs=%u}", 380 _enter("{%s},{%x},%*.*s,%zu",
559 vlocation->vldb.name, 381 cell->name, key_serial(key),
560 vlocation->upd_first_svix, 382 (int) namesz, (int) namesz, name, namesz);
561 vlocation->upd_curr_svix);
562 383
563 /* try to look up a cached volume in the cell VL databases by ID */ 384 if (namesz > sizeof(vl->vldb.name)) {
564 if (vlocation->vldb.vidmask & AFS_VOL_VTM_RW) { 385 _leave(" = -ENAMETOOLONG");
565 vid = vlocation->vldb.vid[0]; 386 return ERR_PTR(-ENAMETOOLONG);
566 voltype = AFSVL_RWVOL;
567 }
568 else if (vlocation->vldb.vidmask & AFS_VOL_VTM_RO) {
569 vid = vlocation->vldb.vid[1];
570 voltype = AFSVL_ROVOL;
571 } 387 }
572 else if (vlocation->vldb.vidmask & AFS_VOL_VTM_BAK) { 388
573 vid = vlocation->vldb.vid[2]; 389 /* see if we have an in-memory copy first */
574 voltype = AFSVL_BACKVOL; 390 down_write(&cell->vl_sem);
391 spin_lock(&cell->vl_lock);
392 list_for_each_entry(vl, &cell->vl_list, link) {
393 if (vl->vldb.name[namesz] != '\0')
394 continue;
395 if (memcmp(vl->vldb.name, name, namesz) == 0)
396 goto found_in_memory;
575 } 397 }
576 else { 398 spin_unlock(&cell->vl_lock);
577 BUG(); 399
578 vid = 0; 400 /* not in the cell's in-memory lists - create a new record */
579 voltype = 0; 401 vl = afs_vlocation_alloc(cell, name, namesz);
402 if (!vl) {
403 up_write(&cell->vl_sem);
404 return ERR_PTR(-ENOMEM);
580 } 405 }
581 406
582 /* contact the chosen server */ 407 afs_get_cell(cell);
583 ret = afs_server_lookup(
584 vlocation->cell,
585 &vlocation->cell->vl_addrs[vlocation->upd_curr_svix],
586 &vlocation->upd_op.server);
587 408
588 switch (ret) { 409 list_add_tail(&vl->link, &cell->vl_list);
589 case 0: 410 vl->state = AFS_VL_CREATING;
590 break; 411 up_write(&cell->vl_sem);
591 case -ENOMEM:
592 case -ENONET:
593 default:
594 _leave(" = %d", ret);
595 return ret;
596 }
597 412
598 /* initiate the update operation */ 413fill_in_record:
599 ret = afs_rxvl_get_entry_by_id_async(&vlocation->upd_op, vid, voltype); 414 ret = afs_vlocation_fill_in_record(vl, key);
600 if (ret < 0) { 415 if (ret < 0)
601 _leave(" = %d", ret); 416 goto error_abandon;
602 return ret; 417 spin_lock(&vl->lock);
418 vl->state = AFS_VL_VALID;
419 wake_up(&vl->waitq);
420 spin_unlock(&vl->lock);
421
422 /* schedule for regular updates */
423 afs_vlocation_queue_for_updates(vl);
424 goto success;
425
426found_in_memory:
427 /* found in memory */
428 _debug("found in memory");
429 atomic_inc(&vl->usage);
430 spin_unlock(&cell->vl_lock);
431 if (!list_empty(&vl->grave)) {
432 spin_lock(&afs_vlocation_graveyard_lock);
433 list_del_init(&vl->grave);
434 spin_unlock(&afs_vlocation_graveyard_lock);
603 } 435 }
436 up_write(&cell->vl_sem);
437
438 /* see if it was an abandoned record that we might try filling in */
439 spin_lock(&vl->lock);
440 while (vl->state != AFS_VL_VALID) {
441 afs_vlocation_state_t state = vl->state;
442
443 _debug("invalid [state %d]", state);
444
445 if ((state == AFS_VL_NEW || state == AFS_VL_NO_VOLUME)) {
446 vl->state = AFS_VL_CREATING;
447 spin_unlock(&vl->lock);
448 goto fill_in_record;
449 }
450
451 /* must now wait for creation or update by someone else to
452 * complete */
453 _debug("wait");
604 454
455 spin_unlock(&vl->lock);
456 ret = wait_event_interruptible(
457 vl->waitq,
458 vl->state == AFS_VL_NEW ||
459 vl->state == AFS_VL_VALID ||
460 vl->state == AFS_VL_NO_VOLUME);
461 if (ret < 0)
462 goto error;
463 spin_lock(&vl->lock);
464 }
465 spin_unlock(&vl->lock);
466
467success:
468 _leave(" = %p",vl);
469 return vl;
470
471error_abandon:
472 spin_lock(&vl->lock);
473 vl->state = AFS_VL_NEW;
474 wake_up(&vl->waitq);
475 spin_unlock(&vl->lock);
476error:
477 ASSERT(vl != NULL);
478 afs_put_vlocation(vl);
605 _leave(" = %d", ret); 479 _leave(" = %d", ret);
606 return ret; 480 return ERR_PTR(ret);
607} /* end afs_vlocation_update_begin() */ 481}
608 482
609/*****************************************************************************/
610/* 483/*
611 * abandon updating a VL record 484 * finish using a volume location record
612 * - does not restart the update timer
613 */ 485 */
614static void afs_vlocation_update_abandon(struct afs_vlocation *vlocation, 486void afs_put_vlocation(struct afs_vlocation *vl)
615 afs_vlocation_upd_t state,
616 int ret)
617{ 487{
618 _enter("%s,%u", vlocation->vldb.name, state); 488 if (!vl)
619 489 return;
620 if (ret < 0)
621 printk("kAFS: Abandoning VL update '%s': %d\n",
622 vlocation->vldb.name, ret);
623
624 /* discard the server record */
625 afs_put_server(vlocation->upd_op.server);
626 vlocation->upd_op.server = NULL;
627 490
628 spin_lock(&afs_vlocation_update_lock); 491 _enter("%s", vl->vldb.name);
629 afs_vlocation_update = NULL;
630 vlocation->upd_state = state;
631 492
632 /* TODO: start updating next VL record on pending list */ 493 ASSERTCMP(atomic_read(&vl->usage), >, 0);
633 494
634 spin_unlock(&afs_vlocation_update_lock); 495 if (likely(!atomic_dec_and_test(&vl->usage))) {
496 _leave("");
497 return;
498 }
635 499
636 _leave(""); 500 spin_lock(&afs_vlocation_graveyard_lock);
637} /* end afs_vlocation_update_abandon() */ 501 if (atomic_read(&vl->usage) == 0) {
502 _debug("buried");
503 list_move_tail(&vl->grave, &afs_vlocation_graveyard);
504 vl->time_of_death = get_seconds();
505 schedule_delayed_work(&afs_vlocation_reap,
506 afs_vlocation_timeout * HZ);
507
508 /* suspend updates on this record */
509 if (!list_empty(&vl->update)) {
510 spin_lock(&afs_vlocation_updates_lock);
511 list_del_init(&vl->update);
512 spin_unlock(&afs_vlocation_updates_lock);
513 }
514 }
515 spin_unlock(&afs_vlocation_graveyard_lock);
516 _leave(" [killed?]");
517}
638 518
639/*****************************************************************************/
640/* 519/*
641 * handle periodic update timeouts and busy retry timeouts 520 * destroy a dead volume location record
642 * - called from kafstimod
643 */ 521 */
644static void afs_vlocation_update_timer(struct afs_timer *timer) 522static void afs_vlocation_destroy(struct afs_vlocation *vl)
645{ 523{
646 struct afs_vlocation *vlocation = 524 _enter("%p", vl);
647 list_entry(timer, struct afs_vlocation, upd_timer);
648 int ret;
649 525
650 _enter("%s", vlocation->vldb.name); 526#ifdef AFS_CACHING_SUPPORT
527 cachefs_relinquish_cookie(vl->cache, 0);
528#endif
651 529
652 /* only update if not in the graveyard (defend against putting too) */ 530 afs_put_cell(vl->cell);
653 spin_lock(&vlocation->cell->vl_gylock); 531 kfree(vl);
532}
654 533
655 if (!atomic_read(&vlocation->usage)) 534/*
656 goto out_unlock1; 535 * reap dead volume location records
536 */
537static void afs_vlocation_reaper(struct work_struct *work)
538{
539 LIST_HEAD(corpses);
540 struct afs_vlocation *vl;
541 unsigned long delay, expiry;
542 time_t now;
657 543
658 spin_lock(&afs_vlocation_update_lock); 544 _enter("");
659 545
660 /* if we were woken up due to EBUSY sleep then restart immediately if 546 now = get_seconds();
661 * possible or else jump to front of pending queue */ 547 spin_lock(&afs_vlocation_graveyard_lock);
662 if (vlocation->upd_state == AFS_VLUPD_BUSYSLEEP) { 548
663 if (afs_vlocation_update) { 549 while (!list_empty(&afs_vlocation_graveyard)) {
664 list_add(&vlocation->upd_op.link, 550 vl = list_entry(afs_vlocation_graveyard.next,
665 &afs_vlocation_update_pendq); 551 struct afs_vlocation, grave);
552
553 _debug("check %p", vl);
554
555 /* the queue is ordered most dead first */
556 expiry = vl->time_of_death + afs_vlocation_timeout;
557 if (expiry > now) {
558 delay = (expiry - now) * HZ;
559 _debug("delay %lu", delay);
560 if (!schedule_delayed_work(&afs_vlocation_reap,
561 delay)) {
562 cancel_delayed_work(&afs_vlocation_reap);
563 schedule_delayed_work(&afs_vlocation_reap,
564 delay);
565 }
566 break;
666 } 567 }
667 else { 568
668 afs_get_vlocation(vlocation); 569 spin_lock(&vl->cell->vl_lock);
669 afs_vlocation_update = vlocation; 570 if (atomic_read(&vl->usage) > 0) {
670 vlocation->upd_state = AFS_VLUPD_INPROGRESS; 571 _debug("no reap");
572 list_del_init(&vl->grave);
573 } else {
574 _debug("reap");
575 list_move_tail(&vl->grave, &corpses);
576 list_del_init(&vl->link);
671 } 577 }
672 goto out_unlock2; 578 spin_unlock(&vl->cell->vl_lock);
673 } 579 }
674 580
675 /* put on pending queue if there's already another update in progress */ 581 spin_unlock(&afs_vlocation_graveyard_lock);
676 if (afs_vlocation_update) {
677 vlocation->upd_state = AFS_VLUPD_PENDING;
678 list_add_tail(&vlocation->upd_op.link,
679 &afs_vlocation_update_pendq);
680 goto out_unlock2;
681 }
682 582
683 /* hold a ref on it while actually updating */ 583 /* now reap the corpses we've extracted */
684 afs_get_vlocation(vlocation); 584 while (!list_empty(&corpses)) {
685 afs_vlocation_update = vlocation; 585 vl = list_entry(corpses.next, struct afs_vlocation, grave);
686 vlocation->upd_state = AFS_VLUPD_INPROGRESS; 586 list_del(&vl->grave);
687 587 afs_vlocation_destroy(vl);
688 spin_unlock(&afs_vlocation_update_lock);
689 spin_unlock(&vlocation->cell->vl_gylock);
690
691 /* okay... we can start the update */
692 _debug("BEGIN VL UPDATE [%s]", vlocation->vldb.name);
693 vlocation->upd_first_svix = vlocation->cell->vl_curr_svix;
694 vlocation->upd_curr_svix = vlocation->upd_first_svix;
695 vlocation->upd_rej_cnt = 0;
696 vlocation->upd_busy_cnt = 0;
697
698 ret = afs_vlocation_update_begin(vlocation);
699 if (ret < 0) {
700 afs_vlocation_update_abandon(vlocation, AFS_VLUPD_SLEEP, ret);
701 afs_kafstimod_add_timer(&vlocation->upd_timer,
702 AFS_VLDB_TIMEOUT);
703 afs_put_vlocation(vlocation);
704 } 588 }
705 589
706 _leave(""); 590 _leave("");
707 return; 591}
708 592
709 out_unlock2: 593/*
710 spin_unlock(&afs_vlocation_update_lock); 594 * initialise the VL update process
711 out_unlock1: 595 */
712 spin_unlock(&vlocation->cell->vl_gylock); 596int __init afs_vlocation_update_init(void)
713 _leave(""); 597{
714 return; 598 afs_vlocation_update_worker =
599 create_singlethread_workqueue("kafs_vlupdated");
600 return afs_vlocation_update_worker ? 0 : -ENOMEM;
601}
715 602
716} /* end afs_vlocation_update_timer() */ 603/*
604 * discard all the volume location records for rmmod
605 */
606void __exit afs_vlocation_purge(void)
607{
608 afs_vlocation_timeout = 0;
609
610 spin_lock(&afs_vlocation_updates_lock);
611 list_del_init(&afs_vlocation_updates);
612 spin_unlock(&afs_vlocation_updates_lock);
613 cancel_delayed_work(&afs_vlocation_update);
614 queue_delayed_work(afs_vlocation_update_worker,
615 &afs_vlocation_update, 0);
616 destroy_workqueue(afs_vlocation_update_worker);
617
618 cancel_delayed_work(&afs_vlocation_reap);
619 schedule_delayed_work(&afs_vlocation_reap, 0);
620}
717 621
718/*****************************************************************************/
719/* 622/*
720 * attend to an update operation upon which an event happened 623 * update a volume location
721 * - called in kafsasyncd context
722 */ 624 */
723static void afs_vlocation_update_attend(struct afs_async_op *op) 625static void afs_vlocation_updater(struct work_struct *work)
724{ 626{
725 struct afs_cache_vlocation vldb; 627 struct afs_cache_vlocation vldb;
726 struct afs_vlocation *vlocation = 628 struct afs_vlocation *vl, *xvl;
727 list_entry(op, struct afs_vlocation, upd_op); 629 time_t now;
728 unsigned tmp; 630 long timeout;
729 int ret; 631 int ret;
730 632
731 _enter("%s", vlocation->vldb.name); 633 _enter("");
732
733 ret = afs_rxvl_get_entry_by_id_async2(op, &vldb);
734 switch (ret) {
735 case -EAGAIN:
736 _leave(" [unfinished]");
737 return;
738
739 case 0:
740 _debug("END VL UPDATE: %d\n", ret);
741 vlocation->valid = 1;
742
743 _debug("Done VL Lookup: %02x { %08x(%x) %08x(%x) %08x(%x) }",
744 vldb.vidmask,
745 ntohl(vldb.servers[0].s_addr), vldb.srvtmask[0],
746 ntohl(vldb.servers[1].s_addr), vldb.srvtmask[1],
747 ntohl(vldb.servers[2].s_addr), vldb.srvtmask[2]
748 );
749
750 _debug("Vids: %08x %08x %08x",
751 vldb.vid[0], vldb.vid[1], vldb.vid[2]);
752
753 afs_vlocation_update_abandon(vlocation, AFS_VLUPD_SLEEP, 0);
754
755 down_write(&vlocation->cell->vl_sem);
756
757 /* actually update the cache */
758 if (strncmp(vldb.name, vlocation->vldb.name,
759 sizeof(vlocation->vldb.name)) != 0)
760 printk("kAFS: name of volume '%s'"
761 " changed to '%s' on server\n",
762 vlocation->vldb.name, vldb.name);
763
764 memcpy(&vlocation->vldb, &vldb, sizeof(vlocation->vldb));
765
766#if 0
767 /* TODO update volume entry in local cache */
768#endif
769
770 up_write(&vlocation->cell->vl_sem);
771
772 if (ret < 0)
773 printk("kAFS: failed to update local cache: %d\n", ret);
774
775 afs_kafstimod_add_timer(&vlocation->upd_timer,
776 AFS_VLDB_TIMEOUT);
777 afs_put_vlocation(vlocation);
778 _leave(" [found]");
779 return;
780
781 case -ENOMEDIUM:
782 vlocation->upd_rej_cnt++;
783 goto try_next;
784
785 /* the server is locked - retry in a very short while */
786 case -EBUSY:
787 vlocation->upd_busy_cnt++;
788 if (vlocation->upd_busy_cnt > 3)
789 goto try_next; /* too many retries */
790
791 afs_vlocation_update_abandon(vlocation,
792 AFS_VLUPD_BUSYSLEEP, 0);
793 afs_kafstimod_add_timer(&vlocation->upd_timer, HZ / 2);
794 afs_put_vlocation(vlocation);
795 _leave(" [busy]");
796 return;
797
798 case -ENETUNREACH:
799 case -EHOSTUNREACH:
800 case -ECONNREFUSED:
801 case -EREMOTEIO:
802 /* record bad vlserver info in the cell too
803 * - TODO: use down_write_trylock() if available
804 */
805 if (vlocation->upd_curr_svix == vlocation->cell->vl_curr_svix)
806 vlocation->cell->vl_curr_svix =
807 vlocation->cell->vl_curr_svix %
808 vlocation->cell->vl_naddrs;
809
810 case -EBADRQC:
811 case -EINVAL:
812 case -EACCES:
813 case -EBADMSG:
814 goto try_next;
815
816 default:
817 goto abandon;
818 }
819
820 /* try contacting the next server */
821 try_next:
822 vlocation->upd_busy_cnt = 0;
823
824 /* discard the server record */
825 afs_put_server(vlocation->upd_op.server);
826 vlocation->upd_op.server = NULL;
827 634
828 tmp = vlocation->cell->vl_naddrs; 635 now = get_seconds();
829 if (tmp == 0)
830 goto abandon;
831 636
832 vlocation->upd_curr_svix++; 637 /* find a record to update */
833 if (vlocation->upd_curr_svix >= tmp) 638 spin_lock(&afs_vlocation_updates_lock);
834 vlocation->upd_curr_svix = 0; 639 for (;;) {
835 if (vlocation->upd_first_svix >= tmp) 640 if (list_empty(&afs_vlocation_updates)) {
836 vlocation->upd_first_svix = tmp - 1; 641 spin_unlock(&afs_vlocation_updates_lock);
642 _leave(" [nothing]");
643 return;
644 }
837 645
838 /* move to the next server */ 646 vl = list_entry(afs_vlocation_updates.next,
839 if (vlocation->upd_curr_svix != vlocation->upd_first_svix) { 647 struct afs_vlocation, update);
840 afs_vlocation_update_begin(vlocation); 648 if (atomic_read(&vl->usage) > 0)
841 _leave(" [next]"); 649 break;
842 return; 650 list_del_init(&vl->update);
843 } 651 }
844 652
845 /* run out of servers to try - was the volume rejected? */ 653 timeout = vl->update_at - now;
846 if (vlocation->upd_rej_cnt > 0) { 654 if (timeout > 0) {
847 printk("kAFS: Active volume no longer valid '%s'\n", 655 queue_delayed_work(afs_vlocation_update_worker,
848 vlocation->vldb.name); 656 &afs_vlocation_update, timeout * HZ);
849 vlocation->valid = 0; 657 spin_unlock(&afs_vlocation_updates_lock);
850 afs_vlocation_update_abandon(vlocation, AFS_VLUPD_SLEEP, 0); 658 _leave(" [nothing]");
851 afs_kafstimod_add_timer(&vlocation->upd_timer,
852 AFS_VLDB_TIMEOUT);
853 afs_put_vlocation(vlocation);
854 _leave(" [invalidated]");
855 return; 659 return;
856 } 660 }
857 661
858 /* abandon the update */ 662 list_del_init(&vl->update);
859 abandon: 663 atomic_inc(&vl->usage);
860 afs_vlocation_update_abandon(vlocation, AFS_VLUPD_SLEEP, ret); 664 spin_unlock(&afs_vlocation_updates_lock);
861 afs_kafstimod_add_timer(&vlocation->upd_timer, HZ * 10);
862 afs_put_vlocation(vlocation);
863 _leave(" [abandoned]");
864
865} /* end afs_vlocation_update_attend() */
866
867/*****************************************************************************/
868/*
869 * deal with an update operation being discarded
870 * - called in kafsasyncd context when it's dying due to rmmod
871 * - the call has already been aborted and put()'d
872 */
873static void afs_vlocation_update_discard(struct afs_async_op *op)
874{
875 struct afs_vlocation *vlocation =
876 list_entry(op, struct afs_vlocation, upd_op);
877 665
878 _enter("%s", vlocation->vldb.name); 666 /* we can now perform the update */
667 _debug("update %s", vl->vldb.name);
668 vl->state = AFS_VL_UPDATING;
669 vl->upd_rej_cnt = 0;
670 vl->upd_busy_cnt = 0;
879 671
880 afs_put_server(op->server); 672 ret = afs_vlocation_update_record(vl, NULL, &vldb);
881 op->server = NULL; 673 spin_lock(&vl->lock);
674 switch (ret) {
675 case 0:
676 afs_vlocation_apply_update(vl, &vldb);
677 vl->state = AFS_VL_VALID;
678 wake_up(&vl->waitq);
679 break;
680 case -ENOMEDIUM:
681 vl->state = AFS_VL_VOLUME_DELETED;
682 break;
683 default:
684 vl->state = AFS_VL_UNCERTAIN;
685 break;
686 }
687 spin_unlock(&vl->lock);
882 688
883 afs_put_vlocation(vlocation); 689 /* and then reschedule */
690 _debug("reschedule");
691 vl->update_at = get_seconds() + afs_vlocation_update_timeout;
884 692
885 _leave(""); 693 spin_lock(&afs_vlocation_updates_lock);
886} /* end afs_vlocation_update_discard() */
887 694
888/*****************************************************************************/ 695 if (!list_empty(&afs_vlocation_updates)) {
889/* 696 /* next update in 10 minutes, but wait at least 1 second more
890 * match a VLDB record stored in the cache 697 * than the newest record already queued so that we don't spam
891 * - may also load target from entry 698 * the VL server suddenly with lots of requests
892 */ 699 */
893#ifdef AFS_CACHING_SUPPORT 700 xvl = list_entry(afs_vlocation_updates.prev,
894static cachefs_match_val_t afs_vlocation_cache_match(void *target, 701 struct afs_vlocation, update);
895 const void *entry) 702 if (vl->update_at <= xvl->update_at)
896{ 703 vl->update_at = xvl->update_at + 1;
897 const struct afs_cache_vlocation *vldb = entry; 704 xvl = list_entry(afs_vlocation_updates.next,
898 struct afs_vlocation *vlocation = target; 705 struct afs_vlocation, update);
899 706 timeout = xvl->update_at - now;
900 _enter("{%s},{%s}", vlocation->vldb.name, vldb->name); 707 if (timeout < 0)
901 708 timeout = 0;
902 if (strncmp(vlocation->vldb.name, vldb->name, sizeof(vldb->name)) == 0 709 } else {
903 ) { 710 timeout = afs_vlocation_update_timeout;
904 if (!vlocation->valid ||
905 vlocation->vldb.rtime == vldb->rtime
906 ) {
907 vlocation->vldb = *vldb;
908 vlocation->valid = 1;
909 _leave(" = SUCCESS [c->m]");
910 return CACHEFS_MATCH_SUCCESS;
911 }
912 /* need to update cache if cached info differs */
913 else if (memcmp(&vlocation->vldb, vldb, sizeof(*vldb)) != 0) {
914 /* delete if VIDs for this name differ */
915 if (memcmp(&vlocation->vldb.vid,
916 &vldb->vid,
917 sizeof(vldb->vid)) != 0) {
918 _leave(" = DELETE");
919 return CACHEFS_MATCH_SUCCESS_DELETE;
920 }
921
922 _leave(" = UPDATE");
923 return CACHEFS_MATCH_SUCCESS_UPDATE;
924 }
925 else {
926 _leave(" = SUCCESS");
927 return CACHEFS_MATCH_SUCCESS;
928 }
929 } 711 }
930 712
931 _leave(" = FAILED"); 713 ASSERT(list_empty(&vl->update));
932 return CACHEFS_MATCH_FAILED;
933} /* end afs_vlocation_cache_match() */
934#endif
935
936/*****************************************************************************/
937/*
938 * update a VLDB record stored in the cache
939 */
940#ifdef AFS_CACHING_SUPPORT
941static void afs_vlocation_cache_update(void *source, void *entry)
942{
943 struct afs_cache_vlocation *vldb = entry;
944 struct afs_vlocation *vlocation = source;
945 714
946 _enter(""); 715 list_add_tail(&vl->update, &afs_vlocation_updates);
947
948 *vldb = vlocation->vldb;
949 716
950} /* end afs_vlocation_cache_update() */ 717 _debug("timeout %ld", timeout);
951#endif 718 queue_delayed_work(afs_vlocation_update_worker,
719 &afs_vlocation_update, timeout * HZ);
720 spin_unlock(&afs_vlocation_updates_lock);
721 afs_put_vlocation(vl);
722}
diff --git a/fs/afs/vnode.c b/fs/afs/vnode.c
index cf62da5d7825..a1904ab8426a 100644
--- a/fs/afs/vnode.c
+++ b/fs/afs/vnode.c
@@ -1,6 +1,6 @@
1/* vnode.c: AFS vnode management 1/* AFS vnode management
2 * 2 *
3 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved. 3 * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com) 4 * Written by David Howells (dhowells@redhat.com)
5 * 5 *
6 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
@@ -14,142 +14,237 @@
14#include <linux/init.h> 14#include <linux/init.h>
15#include <linux/slab.h> 15#include <linux/slab.h>
16#include <linux/fs.h> 16#include <linux/fs.h>
17#include <linux/pagemap.h>
18#include "volume.h"
19#include "cell.h"
20#include "cmservice.h"
21#include "fsclient.h"
22#include "vlclient.h"
23#include "vnode.h"
24#include "internal.h" 17#include "internal.h"
25 18
26static void afs_vnode_cb_timed_out(struct afs_timer *timer); 19#if 0
20static noinline bool dump_tree_aux(struct rb_node *node, struct rb_node *parent,
21 int depth, char lr)
22{
23 struct afs_vnode *vnode;
24 bool bad = false;
25
26 if (!node)
27 return false;
28
29 if (node->rb_left)
30 bad = dump_tree_aux(node->rb_left, node, depth + 2, '/');
31
32 vnode = rb_entry(node, struct afs_vnode, cb_promise);
33 _debug("%c %*.*s%c%p {%d}",
34 rb_is_red(node) ? 'R' : 'B',
35 depth, depth, "", lr,
36 vnode, vnode->cb_expires_at);
37 if (rb_parent(node) != parent) {
38 printk("BAD: %p != %p\n", rb_parent(node), parent);
39 bad = true;
40 }
27 41
28struct afs_timer_ops afs_vnode_cb_timed_out_ops = { 42 if (node->rb_right)
29 .timed_out = afs_vnode_cb_timed_out, 43 bad |= dump_tree_aux(node->rb_right, node, depth + 2, '\\');
30};
31 44
32#ifdef AFS_CACHING_SUPPORT 45 return bad;
33static cachefs_match_val_t afs_vnode_cache_match(void *target, 46}
34 const void *entry);
35static void afs_vnode_cache_update(void *source, void *entry);
36 47
37struct cachefs_index_def afs_vnode_cache_index_def = { 48static noinline void dump_tree(const char *name, struct afs_server *server)
38 .name = "vnode", 49{
39 .data_size = sizeof(struct afs_cache_vnode), 50 _enter("%s", name);
40 .keys[0] = { CACHEFS_INDEX_KEYS_BIN, 4 }, 51 if (dump_tree_aux(server->cb_promises.rb_node, NULL, 0, '-'))
41 .match = afs_vnode_cache_match, 52 BUG();
42 .update = afs_vnode_cache_update, 53}
43};
44#endif 54#endif
45 55
46/*****************************************************************************/
47/* 56/*
48 * handle a callback timing out 57 * insert a vnode into the backing server's vnode tree
49 * TODO: retain a ref to vnode struct for an outstanding callback timeout
50 */ 58 */
51static void afs_vnode_cb_timed_out(struct afs_timer *timer) 59static void afs_install_vnode(struct afs_vnode *vnode,
60 struct afs_server *server)
52{ 61{
53 struct afs_server *oldserver; 62 struct afs_server *old_server = vnode->server;
54 struct afs_vnode *vnode; 63 struct afs_vnode *xvnode;
64 struct rb_node *parent, **p;
55 65
56 vnode = list_entry(timer, struct afs_vnode, cb_timeout); 66 _enter("%p,%p", vnode, server);
57 67
58 _enter("%p", vnode); 68 if (old_server) {
69 spin_lock(&old_server->fs_lock);
70 rb_erase(&vnode->server_rb, &old_server->fs_vnodes);
71 spin_unlock(&old_server->fs_lock);
72 }
59 73
60 /* set the changed flag in the vnode and release the server */ 74 afs_get_server(server);
61 spin_lock(&vnode->lock); 75 vnode->server = server;
76 afs_put_server(old_server);
77
78 /* insert into the server's vnode tree in FID order */
79 spin_lock(&server->fs_lock);
80
81 parent = NULL;
82 p = &server->fs_vnodes.rb_node;
83 while (*p) {
84 parent = *p;
85 xvnode = rb_entry(parent, struct afs_vnode, server_rb);
86 if (vnode->fid.vid < xvnode->fid.vid)
87 p = &(*p)->rb_left;
88 else if (vnode->fid.vid > xvnode->fid.vid)
89 p = &(*p)->rb_right;
90 else if (vnode->fid.vnode < xvnode->fid.vnode)
91 p = &(*p)->rb_left;
92 else if (vnode->fid.vnode > xvnode->fid.vnode)
93 p = &(*p)->rb_right;
94 else if (vnode->fid.unique < xvnode->fid.unique)
95 p = &(*p)->rb_left;
96 else if (vnode->fid.unique > xvnode->fid.unique)
97 p = &(*p)->rb_right;
98 else
99 BUG(); /* can't happen unless afs_iget() malfunctions */
100 }
101
102 rb_link_node(&vnode->server_rb, parent, p);
103 rb_insert_color(&vnode->server_rb, &server->fs_vnodes);
62 104
63 oldserver = xchg(&vnode->cb_server, NULL); 105 spin_unlock(&server->fs_lock);
64 if (oldserver) { 106 _leave("");
65 vnode->flags |= AFS_VNODE_CHANGED; 107}
66 108
67 spin_lock(&afs_cb_hash_lock); 109/*
68 list_del_init(&vnode->cb_hash_link); 110 * insert a vnode into the promising server's update/expiration tree
69 spin_unlock(&afs_cb_hash_lock); 111 * - caller must hold vnode->lock
112 */
113static void afs_vnode_note_promise(struct afs_vnode *vnode,
114 struct afs_server *server)
115{
116 struct afs_server *old_server;
117 struct afs_vnode *xvnode;
118 struct rb_node *parent, **p;
70 119
71 spin_lock(&oldserver->cb_lock); 120 _enter("%p,%p", vnode, server);
72 list_del_init(&vnode->cb_link); 121
73 spin_unlock(&oldserver->cb_lock); 122 ASSERT(server != NULL);
123
124 old_server = vnode->server;
125 if (vnode->cb_promised) {
126 if (server == old_server &&
127 vnode->cb_expires == vnode->cb_expires_at) {
128 _leave(" [no change]");
129 return;
130 }
131
132 spin_lock(&old_server->cb_lock);
133 if (vnode->cb_promised) {
134 _debug("delete");
135 rb_erase(&vnode->cb_promise, &old_server->cb_promises);
136 vnode->cb_promised = false;
137 }
138 spin_unlock(&old_server->cb_lock);
74 } 139 }
75 140
76 spin_unlock(&vnode->lock); 141 if (vnode->server != server)
142 afs_install_vnode(vnode, server);
143
144 vnode->cb_expires_at = vnode->cb_expires;
145 _debug("PROMISE on %p {%lu}",
146 vnode, (unsigned long) vnode->cb_expires_at);
147
148 /* abuse an RB-tree to hold the expiration order (we may have multiple
149 * items with the same expiration time) */
150 spin_lock(&server->cb_lock);
151
152 parent = NULL;
153 p = &server->cb_promises.rb_node;
154 while (*p) {
155 parent = *p;
156 xvnode = rb_entry(parent, struct afs_vnode, cb_promise);
157 if (vnode->cb_expires_at < xvnode->cb_expires_at)
158 p = &(*p)->rb_left;
159 else
160 p = &(*p)->rb_right;
161 }
77 162
78 afs_put_server(oldserver); 163 rb_link_node(&vnode->cb_promise, parent, p);
164 rb_insert_color(&vnode->cb_promise, &server->cb_promises);
165 vnode->cb_promised = true;
79 166
167 spin_unlock(&server->cb_lock);
80 _leave(""); 168 _leave("");
81} /* end afs_vnode_cb_timed_out() */ 169}
82 170
83/*****************************************************************************/
84/* 171/*
85 * finish off updating the recorded status of a file 172 * handle remote file deletion by discarding the callback promise
173 */
174static void afs_vnode_deleted_remotely(struct afs_vnode *vnode)
175{
176 struct afs_server *server;
177
178 set_bit(AFS_VNODE_DELETED, &vnode->flags);
179
180 server = vnode->server;
181 if (vnode->cb_promised) {
182 spin_lock(&server->cb_lock);
183 if (vnode->cb_promised) {
184 rb_erase(&vnode->cb_promise, &server->cb_promises);
185 vnode->cb_promised = false;
186 }
187 spin_unlock(&server->cb_lock);
188 }
189
190 spin_lock(&vnode->server->fs_lock);
191 rb_erase(&vnode->server_rb, &vnode->server->fs_vnodes);
192 spin_unlock(&vnode->server->fs_lock);
193
194 vnode->server = NULL;
195 afs_put_server(server);
196}
197
198/*
199 * finish off updating the recorded status of a file after a successful
200 * operation completion
86 * - starts callback expiry timer 201 * - starts callback expiry timer
87 * - adds to server's callback list 202 * - adds to server's callback list
88 */ 203 */
89static void afs_vnode_finalise_status_update(struct afs_vnode *vnode, 204void afs_vnode_finalise_status_update(struct afs_vnode *vnode,
90 struct afs_server *server, 205 struct afs_server *server)
91 int ret)
92{ 206{
93 struct afs_server *oldserver = NULL; 207 struct afs_server *oldserver = NULL;
94 208
95 _enter("%p,%p,%d", vnode, server, ret); 209 _enter("%p,%p", vnode, server);
96 210
97 spin_lock(&vnode->lock); 211 spin_lock(&vnode->lock);
212 clear_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
213 afs_vnode_note_promise(vnode, server);
214 vnode->update_cnt--;
215 ASSERTCMP(vnode->update_cnt, >=, 0);
216 spin_unlock(&vnode->lock);
217
218 wake_up_all(&vnode->update_waitq);
219 afs_put_server(oldserver);
220 _leave("");
221}
98 222
99 vnode->flags &= ~AFS_VNODE_CHANGED; 223/*
224 * finish off updating the recorded status of a file after an operation failed
225 */
226static void afs_vnode_status_update_failed(struct afs_vnode *vnode, int ret)
227{
228 _enter("%p,%d", vnode, ret);
100 229
101 if (ret == 0) { 230 spin_lock(&vnode->lock);
102 /* adjust the callback timeout appropriately */
103 afs_kafstimod_add_timer(&vnode->cb_timeout,
104 vnode->cb_expiry * HZ);
105
106 spin_lock(&afs_cb_hash_lock);
107 list_move_tail(&vnode->cb_hash_link,
108 &afs_cb_hash(server, &vnode->fid));
109 spin_unlock(&afs_cb_hash_lock);
110
111 /* swap ref to old callback server with that for new callback
112 * server */
113 oldserver = xchg(&vnode->cb_server, server);
114 if (oldserver != server) {
115 if (oldserver) {
116 spin_lock(&oldserver->cb_lock);
117 list_del_init(&vnode->cb_link);
118 spin_unlock(&oldserver->cb_lock);
119 }
120 231
121 afs_get_server(server); 232 clear_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
122 spin_lock(&server->cb_lock);
123 list_add_tail(&vnode->cb_link, &server->cb_promises);
124 spin_unlock(&server->cb_lock);
125 }
126 else {
127 /* same server */
128 oldserver = NULL;
129 }
130 }
131 else if (ret == -ENOENT) {
132 /* the file was deleted - clear the callback timeout */
133 oldserver = xchg(&vnode->cb_server, NULL);
134 afs_kafstimod_del_timer(&vnode->cb_timeout);
135 233
234 if (ret == -ENOENT) {
235 /* the file was deleted on the server */
136 _debug("got NOENT from server - marking file deleted"); 236 _debug("got NOENT from server - marking file deleted");
137 vnode->flags |= AFS_VNODE_DELETED; 237 afs_vnode_deleted_remotely(vnode);
138 } 238 }
139 239
140 vnode->update_cnt--; 240 vnode->update_cnt--;
141 241 ASSERTCMP(vnode->update_cnt, >=, 0);
142 spin_unlock(&vnode->lock); 242 spin_unlock(&vnode->lock);
143 243
144 wake_up_all(&vnode->update_waitq); 244 wake_up_all(&vnode->update_waitq);
145
146 afs_put_server(oldserver);
147
148 _leave(""); 245 _leave("");
246}
149 247
150} /* end afs_vnode_finalise_status_update() */
151
152/*****************************************************************************/
153/* 248/*
154 * fetch file status from the volume 249 * fetch file status from the volume
155 * - don't issue a fetch if: 250 * - don't issue a fetch if:
@@ -157,9 +252,11 @@ static void afs_vnode_finalise_status_update(struct afs_vnode *vnode,
157 * - there are any outstanding ops that will fetch the status 252 * - there are any outstanding ops that will fetch the status
158 * - TODO implement local caching 253 * - TODO implement local caching
159 */ 254 */
160int afs_vnode_fetch_status(struct afs_vnode *vnode) 255int afs_vnode_fetch_status(struct afs_vnode *vnode,
256 struct afs_vnode *auth_vnode, struct key *key)
161{ 257{
162 struct afs_server *server; 258 struct afs_server *server;
259 unsigned long acl_order;
163 int ret; 260 int ret;
164 261
165 DECLARE_WAITQUEUE(myself, current); 262 DECLARE_WAITQUEUE(myself, current);
@@ -168,38 +265,49 @@ int afs_vnode_fetch_status(struct afs_vnode *vnode)
168 vnode->volume->vlocation->vldb.name, 265 vnode->volume->vlocation->vldb.name,
169 vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique); 266 vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique);
170 267
171 if (!(vnode->flags & AFS_VNODE_CHANGED) && vnode->cb_server) { 268 if (!test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags) &&
269 vnode->cb_promised) {
172 _leave(" [unchanged]"); 270 _leave(" [unchanged]");
173 return 0; 271 return 0;
174 } 272 }
175 273
176 if (vnode->flags & AFS_VNODE_DELETED) { 274 if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
177 _leave(" [deleted]"); 275 _leave(" [deleted]");
178 return -ENOENT; 276 return -ENOENT;
179 } 277 }
180 278
279 acl_order = 0;
280 if (auth_vnode)
281 acl_order = auth_vnode->acl_order;
282
181 spin_lock(&vnode->lock); 283 spin_lock(&vnode->lock);
182 284
183 if (!(vnode->flags & AFS_VNODE_CHANGED)) { 285 if (!test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags) &&
286 vnode->cb_promised) {
184 spin_unlock(&vnode->lock); 287 spin_unlock(&vnode->lock);
185 _leave(" [unchanged]"); 288 _leave(" [unchanged]");
186 return 0; 289 return 0;
187 } 290 }
188 291
292 ASSERTCMP(vnode->update_cnt, >=, 0);
293
189 if (vnode->update_cnt > 0) { 294 if (vnode->update_cnt > 0) {
190 /* someone else started a fetch */ 295 /* someone else started a fetch */
296 _debug("wait on fetch %d", vnode->update_cnt);
297
191 set_current_state(TASK_UNINTERRUPTIBLE); 298 set_current_state(TASK_UNINTERRUPTIBLE);
299 ASSERT(myself.func != NULL);
192 add_wait_queue(&vnode->update_waitq, &myself); 300 add_wait_queue(&vnode->update_waitq, &myself);
193 301
194 /* wait for the status to be updated */ 302 /* wait for the status to be updated */
195 for (;;) { 303 for (;;) {
196 if (!(vnode->flags & AFS_VNODE_CHANGED)) 304 if (!test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags))
197 break; 305 break;
198 if (vnode->flags & AFS_VNODE_DELETED) 306 if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
199 break; 307 break;
200 308
201 /* it got updated and invalidated all before we saw 309 /* check to see if it got updated and invalidated all
202 * it */ 310 * before we saw it */
203 if (vnode->update_cnt == 0) { 311 if (vnode->update_cnt == 0) {
204 remove_wait_queue(&vnode->update_waitq, 312 remove_wait_queue(&vnode->update_waitq,
205 &myself); 313 &myself);
@@ -219,10 +327,11 @@ int afs_vnode_fetch_status(struct afs_vnode *vnode)
219 spin_unlock(&vnode->lock); 327 spin_unlock(&vnode->lock);
220 set_current_state(TASK_RUNNING); 328 set_current_state(TASK_RUNNING);
221 329
222 return vnode->flags & AFS_VNODE_DELETED ? -ENOENT : 0; 330 return test_bit(AFS_VNODE_DELETED, &vnode->flags) ?
331 -ENOENT : 0;
223 } 332 }
224 333
225 get_anyway: 334get_anyway:
226 /* okay... we're going to have to initiate the op */ 335 /* okay... we're going to have to initiate the op */
227 vnode->update_cnt++; 336 vnode->update_cnt++;
228 337
@@ -232,39 +341,60 @@ int afs_vnode_fetch_status(struct afs_vnode *vnode)
232 * vnode */ 341 * vnode */
233 do { 342 do {
234 /* pick a server to query */ 343 /* pick a server to query */
235 ret = afs_volume_pick_fileserver(vnode->volume, &server); 344 server = afs_volume_pick_fileserver(vnode);
236 if (ret<0) 345 if (IS_ERR(server))
237 return ret; 346 goto no_server;
238 347
239 _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr)); 348 _debug("USING SERVER: %p{%08x}",
349 server, ntohl(server->addr.s_addr));
240 350
241 ret = afs_rxfs_fetch_file_status(server, vnode, NULL); 351 ret = afs_fs_fetch_file_status(server, key, vnode, NULL,
352 &afs_sync_call);
242 353
243 } while (!afs_volume_release_fileserver(vnode->volume, server, ret)); 354 } while (!afs_volume_release_fileserver(vnode, server, ret));
244 355
245 /* adjust the flags */ 356 /* adjust the flags */
246 afs_vnode_finalise_status_update(vnode, server, ret); 357 if (ret == 0) {
358 _debug("adjust");
359 if (auth_vnode)
360 afs_cache_permit(vnode, key, acl_order);
361 afs_vnode_finalise_status_update(vnode, server);
362 afs_put_server(server);
363 } else {
364 _debug("failed [%d]", ret);
365 afs_vnode_status_update_failed(vnode, ret);
366 }
247 367
248 _leave(" = %d", ret); 368 ASSERTCMP(vnode->update_cnt, >=, 0);
369
370 _leave(" = %d [cnt %d]", ret, vnode->update_cnt);
249 return ret; 371 return ret;
250} /* end afs_vnode_fetch_status() */
251 372
252/*****************************************************************************/ 373no_server:
374 spin_lock(&vnode->lock);
375 vnode->update_cnt--;
376 ASSERTCMP(vnode->update_cnt, >=, 0);
377 spin_unlock(&vnode->lock);
378 _leave(" = %ld [cnt %d]", PTR_ERR(server), vnode->update_cnt);
379 return PTR_ERR(server);
380}
381
253/* 382/*
254 * fetch file data from the volume 383 * fetch file data from the volume
255 * - TODO implement caching and server failover 384 * - TODO implement caching
256 */ 385 */
257int afs_vnode_fetch_data(struct afs_vnode *vnode, 386int afs_vnode_fetch_data(struct afs_vnode *vnode, struct key *key,
258 struct afs_rxfs_fetch_descriptor *desc) 387 off_t offset, size_t length, struct page *page)
259{ 388{
260 struct afs_server *server; 389 struct afs_server *server;
261 int ret; 390 int ret;
262 391
263 _enter("%s,{%u,%u,%u}", 392 _enter("%s{%u,%u,%u},%x,,,",
264 vnode->volume->vlocation->vldb.name, 393 vnode->volume->vlocation->vldb.name,
265 vnode->fid.vid, 394 vnode->fid.vid,
266 vnode->fid.vnode, 395 vnode->fid.vnode,
267 vnode->fid.unique); 396 vnode->fid.unique,
397 key_serial(key));
268 398
269 /* this op will fetch the status */ 399 /* this op will fetch the status */
270 spin_lock(&vnode->lock); 400 spin_lock(&vnode->lock);
@@ -275,120 +405,351 @@ int afs_vnode_fetch_data(struct afs_vnode *vnode,
275 * vnode */ 405 * vnode */
276 do { 406 do {
277 /* pick a server to query */ 407 /* pick a server to query */
278 ret = afs_volume_pick_fileserver(vnode->volume, &server); 408 server = afs_volume_pick_fileserver(vnode);
279 if (ret < 0) 409 if (IS_ERR(server))
280 return ret; 410 goto no_server;
281 411
282 _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr)); 412 _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
283 413
284 ret = afs_rxfs_fetch_file_data(server, vnode, desc, NULL); 414 ret = afs_fs_fetch_data(server, key, vnode, offset, length,
415 page, &afs_sync_call);
285 416
286 } while (!afs_volume_release_fileserver(vnode->volume, server, ret)); 417 } while (!afs_volume_release_fileserver(vnode, server, ret));
287 418
288 /* adjust the flags */ 419 /* adjust the flags */
289 afs_vnode_finalise_status_update(vnode, server, ret); 420 if (ret == 0) {
421 afs_vnode_finalise_status_update(vnode, server);
422 afs_put_server(server);
423 } else {
424 afs_vnode_status_update_failed(vnode, ret);
425 }
290 426
291 _leave(" = %d", ret); 427 _leave(" = %d", ret);
292 return ret; 428 return ret;
293 429
294} /* end afs_vnode_fetch_data() */ 430no_server:
431 spin_lock(&vnode->lock);
432 vnode->update_cnt--;
433 ASSERTCMP(vnode->update_cnt, >=, 0);
434 spin_unlock(&vnode->lock);
435 return PTR_ERR(server);
436}
295 437
296/*****************************************************************************/
297/* 438/*
298 * break any outstanding callback on a vnode 439 * make a file or a directory
299 * - only relevent to server that issued it
300 */ 440 */
301int afs_vnode_give_up_callback(struct afs_vnode *vnode) 441int afs_vnode_create(struct afs_vnode *vnode, struct key *key,
442 const char *name, umode_t mode, struct afs_fid *newfid,
443 struct afs_file_status *newstatus,
444 struct afs_callback *newcb, struct afs_server **_server)
302{ 445{
303 struct afs_server *server; 446 struct afs_server *server;
304 int ret; 447 int ret;
305 448
306 _enter("%s,{%u,%u,%u}", 449 _enter("%s{%u,%u,%u},%x,%s,,",
307 vnode->volume->vlocation->vldb.name, 450 vnode->volume->vlocation->vldb.name,
308 vnode->fid.vid, 451 vnode->fid.vid,
309 vnode->fid.vnode, 452 vnode->fid.vnode,
310 vnode->fid.unique); 453 vnode->fid.unique,
311 454 key_serial(key),
312 spin_lock(&afs_cb_hash_lock); 455 name);
313 list_del_init(&vnode->cb_hash_link);
314 spin_unlock(&afs_cb_hash_lock);
315 456
316 /* set the changed flag in the vnode and release the server */ 457 /* this op will fetch the status on the directory we're creating in */
317 spin_lock(&vnode->lock); 458 spin_lock(&vnode->lock);
459 vnode->update_cnt++;
460 spin_unlock(&vnode->lock);
318 461
319 afs_kafstimod_del_timer(&vnode->cb_timeout); 462 do {
463 /* pick a server to query */
464 server = afs_volume_pick_fileserver(vnode);
465 if (IS_ERR(server))
466 goto no_server;
467
468 _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
320 469
321 server = xchg(&vnode->cb_server, NULL); 470 ret = afs_fs_create(server, key, vnode, name, mode, newfid,
322 if (server) { 471 newstatus, newcb, &afs_sync_call);
323 vnode->flags |= AFS_VNODE_CHANGED;
324 472
325 spin_lock(&server->cb_lock); 473 } while (!afs_volume_release_fileserver(vnode, server, ret));
326 list_del_init(&vnode->cb_link); 474
327 spin_unlock(&server->cb_lock); 475 /* adjust the flags */
476 if (ret == 0) {
477 afs_vnode_finalise_status_update(vnode, server);
478 *_server = server;
479 } else {
480 afs_vnode_status_update_failed(vnode, ret);
481 *_server = NULL;
328 } 482 }
329 483
484 _leave(" = %d [cnt %d]", ret, vnode->update_cnt);
485 return ret;
486
487no_server:
488 spin_lock(&vnode->lock);
489 vnode->update_cnt--;
490 ASSERTCMP(vnode->update_cnt, >=, 0);
330 spin_unlock(&vnode->lock); 491 spin_unlock(&vnode->lock);
492 _leave(" = %ld [cnt %d]", PTR_ERR(server), vnode->update_cnt);
493 return PTR_ERR(server);
494}
331 495
332 ret = 0; 496/*
333 if (server) { 497 * remove a file or directory
334 ret = afs_rxfs_give_up_callback(server, vnode); 498 */
499int afs_vnode_remove(struct afs_vnode *vnode, struct key *key, const char *name,
500 bool isdir)
501{
502 struct afs_server *server;
503 int ret;
504
505 _enter("%s{%u,%u,%u},%x,%s",
506 vnode->volume->vlocation->vldb.name,
507 vnode->fid.vid,
508 vnode->fid.vnode,
509 vnode->fid.unique,
510 key_serial(key),
511 name);
512
513 /* this op will fetch the status on the directory we're removing from */
514 spin_lock(&vnode->lock);
515 vnode->update_cnt++;
516 spin_unlock(&vnode->lock);
517
518 do {
519 /* pick a server to query */
520 server = afs_volume_pick_fileserver(vnode);
521 if (IS_ERR(server))
522 goto no_server;
523
524 _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
525
526 ret = afs_fs_remove(server, key, vnode, name, isdir,
527 &afs_sync_call);
528
529 } while (!afs_volume_release_fileserver(vnode, server, ret));
530
531 /* adjust the flags */
532 if (ret == 0) {
533 afs_vnode_finalise_status_update(vnode, server);
335 afs_put_server(server); 534 afs_put_server(server);
535 } else {
536 afs_vnode_status_update_failed(vnode, ret);
336 } 537 }
337 538
338 _leave(" = %d", ret); 539 _leave(" = %d [cnt %d]", ret, vnode->update_cnt);
339 return ret; 540 return ret;
340} /* end afs_vnode_give_up_callback() */
341 541
342/*****************************************************************************/ 542no_server:
543 spin_lock(&vnode->lock);
544 vnode->update_cnt--;
545 ASSERTCMP(vnode->update_cnt, >=, 0);
546 spin_unlock(&vnode->lock);
547 _leave(" = %ld [cnt %d]", PTR_ERR(server), vnode->update_cnt);
548 return PTR_ERR(server);
549}
550
343/* 551/*
344 * match a vnode record stored in the cache 552 * create a hard link
345 */ 553 */
346#ifdef AFS_CACHING_SUPPORT 554extern int afs_vnode_link(struct afs_vnode *dvnode, struct afs_vnode *vnode,
347static cachefs_match_val_t afs_vnode_cache_match(void *target, 555 struct key *key, const char *name)
348 const void *entry)
349{ 556{
350 const struct afs_cache_vnode *cvnode = entry; 557 struct afs_server *server;
351 struct afs_vnode *vnode = target; 558 int ret;
352 559
353 _enter("{%x,%x,%Lx},{%x,%x,%Lx}", 560 _enter("%s{%u,%u,%u},%s{%u,%u,%u},%x,%s",
561 dvnode->volume->vlocation->vldb.name,
562 dvnode->fid.vid,
563 dvnode->fid.vnode,
564 dvnode->fid.unique,
565 vnode->volume->vlocation->vldb.name,
566 vnode->fid.vid,
354 vnode->fid.vnode, 567 vnode->fid.vnode,
355 vnode->fid.unique, 568 vnode->fid.unique,
356 vnode->status.version, 569 key_serial(key),
357 cvnode->vnode_id, 570 name);
358 cvnode->vnode_unique, 571
359 cvnode->data_version); 572 /* this op will fetch the status on the directory we're removing from */
360 573 spin_lock(&vnode->lock);
361 if (vnode->fid.vnode != cvnode->vnode_id) { 574 vnode->update_cnt++;
362 _leave(" = FAILED"); 575 spin_unlock(&vnode->lock);
363 return CACHEFS_MATCH_FAILED; 576 spin_lock(&dvnode->lock);
577 dvnode->update_cnt++;
578 spin_unlock(&dvnode->lock);
579
580 do {
581 /* pick a server to query */
582 server = afs_volume_pick_fileserver(dvnode);
583 if (IS_ERR(server))
584 goto no_server;
585
586 _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
587
588 ret = afs_fs_link(server, key, dvnode, vnode, name,
589 &afs_sync_call);
590
591 } while (!afs_volume_release_fileserver(dvnode, server, ret));
592
593 /* adjust the flags */
594 if (ret == 0) {
595 afs_vnode_finalise_status_update(vnode, server);
596 afs_vnode_finalise_status_update(dvnode, server);
597 afs_put_server(server);
598 } else {
599 afs_vnode_status_update_failed(vnode, ret);
600 afs_vnode_status_update_failed(dvnode, ret);
364 } 601 }
365 602
366 if (vnode->fid.unique != cvnode->vnode_unique || 603 _leave(" = %d [cnt %d]", ret, vnode->update_cnt);
367 vnode->status.version != cvnode->data_version) { 604 return ret;
368 _leave(" = DELETE"); 605
369 return CACHEFS_MATCH_SUCCESS_DELETE; 606no_server:
607 spin_lock(&vnode->lock);
608 vnode->update_cnt--;
609 ASSERTCMP(vnode->update_cnt, >=, 0);
610 spin_unlock(&vnode->lock);
611 spin_lock(&dvnode->lock);
612 dvnode->update_cnt--;
613 ASSERTCMP(dvnode->update_cnt, >=, 0);
614 spin_unlock(&dvnode->lock);
615 _leave(" = %ld [cnt %d]", PTR_ERR(server), vnode->update_cnt);
616 return PTR_ERR(server);
617}
618
619/*
620 * create a symbolic link
621 */
622int afs_vnode_symlink(struct afs_vnode *vnode, struct key *key,
623 const char *name, const char *content,
624 struct afs_fid *newfid,
625 struct afs_file_status *newstatus,
626 struct afs_server **_server)
627{
628 struct afs_server *server;
629 int ret;
630
631 _enter("%s{%u,%u,%u},%x,%s,%s,,,",
632 vnode->volume->vlocation->vldb.name,
633 vnode->fid.vid,
634 vnode->fid.vnode,
635 vnode->fid.unique,
636 key_serial(key),
637 name, content);
638
639 /* this op will fetch the status on the directory we're creating in */
640 spin_lock(&vnode->lock);
641 vnode->update_cnt++;
642 spin_unlock(&vnode->lock);
643
644 do {
645 /* pick a server to query */
646 server = afs_volume_pick_fileserver(vnode);
647 if (IS_ERR(server))
648 goto no_server;
649
650 _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
651
652 ret = afs_fs_symlink(server, key, vnode, name, content,
653 newfid, newstatus, &afs_sync_call);
654
655 } while (!afs_volume_release_fileserver(vnode, server, ret));
656
657 /* adjust the flags */
658 if (ret == 0) {
659 afs_vnode_finalise_status_update(vnode, server);
660 *_server = server;
661 } else {
662 afs_vnode_status_update_failed(vnode, ret);
663 *_server = NULL;
370 } 664 }
371 665
372 _leave(" = SUCCESS"); 666 _leave(" = %d [cnt %d]", ret, vnode->update_cnt);
373 return CACHEFS_MATCH_SUCCESS; 667 return ret;
374} /* end afs_vnode_cache_match() */ 668
375#endif 669no_server:
670 spin_lock(&vnode->lock);
671 vnode->update_cnt--;
672 ASSERTCMP(vnode->update_cnt, >=, 0);
673 spin_unlock(&vnode->lock);
674 _leave(" = %ld [cnt %d]", PTR_ERR(server), vnode->update_cnt);
675 return PTR_ERR(server);
676}
376 677
377/*****************************************************************************/
378/* 678/*
379 * update a vnode record stored in the cache 679 * rename a file
380 */ 680 */
381#ifdef AFS_CACHING_SUPPORT 681int afs_vnode_rename(struct afs_vnode *orig_dvnode,
382static void afs_vnode_cache_update(void *source, void *entry) 682 struct afs_vnode *new_dvnode,
683 struct key *key,
684 const char *orig_name,
685 const char *new_name)
383{ 686{
384 struct afs_cache_vnode *cvnode = entry; 687 struct afs_server *server;
385 struct afs_vnode *vnode = source; 688 int ret;
386 689
387 _enter(""); 690 _enter("%s{%u,%u,%u},%s{%u,%u,%u},%x,%s,%s",
691 orig_dvnode->volume->vlocation->vldb.name,
692 orig_dvnode->fid.vid,
693 orig_dvnode->fid.vnode,
694 orig_dvnode->fid.unique,
695 new_dvnode->volume->vlocation->vldb.name,
696 new_dvnode->fid.vid,
697 new_dvnode->fid.vnode,
698 new_dvnode->fid.unique,
699 key_serial(key),
700 orig_name,
701 new_name);
702
703 /* this op will fetch the status on both the directories we're dealing
704 * with */
705 spin_lock(&orig_dvnode->lock);
706 orig_dvnode->update_cnt++;
707 spin_unlock(&orig_dvnode->lock);
708 if (new_dvnode != orig_dvnode) {
709 spin_lock(&new_dvnode->lock);
710 new_dvnode->update_cnt++;
711 spin_unlock(&new_dvnode->lock);
712 }
388 713
389 cvnode->vnode_id = vnode->fid.vnode; 714 do {
390 cvnode->vnode_unique = vnode->fid.unique; 715 /* pick a server to query */
391 cvnode->data_version = vnode->status.version; 716 server = afs_volume_pick_fileserver(orig_dvnode);
717 if (IS_ERR(server))
718 goto no_server;
392 719
393} /* end afs_vnode_cache_update() */ 720 _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
394#endif 721
722 ret = afs_fs_rename(server, key, orig_dvnode, orig_name,
723 new_dvnode, new_name, &afs_sync_call);
724
725 } while (!afs_volume_release_fileserver(orig_dvnode, server, ret));
726
727 /* adjust the flags */
728 if (ret == 0) {
729 afs_vnode_finalise_status_update(orig_dvnode, server);
730 if (new_dvnode != orig_dvnode)
731 afs_vnode_finalise_status_update(new_dvnode, server);
732 afs_put_server(server);
733 } else {
734 afs_vnode_status_update_failed(orig_dvnode, ret);
735 if (new_dvnode != orig_dvnode)
736 afs_vnode_status_update_failed(new_dvnode, ret);
737 }
738
739 _leave(" = %d [cnt %d]", ret, orig_dvnode->update_cnt);
740 return ret;
741
742no_server:
743 spin_lock(&orig_dvnode->lock);
744 orig_dvnode->update_cnt--;
745 ASSERTCMP(orig_dvnode->update_cnt, >=, 0);
746 spin_unlock(&orig_dvnode->lock);
747 if (new_dvnode != orig_dvnode) {
748 spin_lock(&new_dvnode->lock);
749 new_dvnode->update_cnt--;
750 ASSERTCMP(new_dvnode->update_cnt, >=, 0);
751 spin_unlock(&new_dvnode->lock);
752 }
753 _leave(" = %ld [cnt %d]", PTR_ERR(server), orig_dvnode->update_cnt);
754 return PTR_ERR(server);
755}
diff --git a/fs/afs/vnode.h b/fs/afs/vnode.h
deleted file mode 100644
index b86a97102e8b..000000000000
--- a/fs/afs/vnode.h
+++ /dev/null
@@ -1,94 +0,0 @@
1/* vnode.h: AFS vnode record
2 *
3 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#ifndef _LINUX_AFS_VNODE_H
13#define _LINUX_AFS_VNODE_H
14
15#include <linux/fs.h>
16#include "server.h"
17#include "kafstimod.h"
18#include "cache.h"
19
20#ifdef __KERNEL__
21
22struct afs_rxfs_fetch_descriptor;
23
24/*****************************************************************************/
25/*
26 * vnode catalogue entry
27 */
28struct afs_cache_vnode
29{
30 afs_vnodeid_t vnode_id; /* vnode ID */
31 unsigned vnode_unique; /* vnode ID uniquifier */
32 afs_dataversion_t data_version; /* data version */
33};
34
35#ifdef AFS_CACHING_SUPPORT
36extern struct cachefs_index_def afs_vnode_cache_index_def;
37#endif
38
39/*****************************************************************************/
40/*
41 * AFS inode private data
42 */
43struct afs_vnode
44{
45 struct inode vfs_inode; /* the VFS's inode record */
46
47 struct afs_volume *volume; /* volume on which vnode resides */
48 struct afs_fid fid; /* the file identifier for this inode */
49 struct afs_file_status status; /* AFS status info for this file */
50#ifdef AFS_CACHING_SUPPORT
51 struct cachefs_cookie *cache; /* caching cookie */
52#endif
53
54 wait_queue_head_t update_waitq; /* status fetch waitqueue */
55 unsigned update_cnt; /* number of outstanding ops that will update the
56 * status */
57 spinlock_t lock; /* waitqueue/flags lock */
58 unsigned flags;
59#define AFS_VNODE_CHANGED 0x00000001 /* set if vnode reported changed by callback */
60#define AFS_VNODE_DELETED 0x00000002 /* set if vnode deleted on server */
61#define AFS_VNODE_MOUNTPOINT 0x00000004 /* set if vnode is a mountpoint symlink */
62
63 /* outstanding callback notification on this file */
64 struct afs_server *cb_server; /* server that made the current promise */
65 struct list_head cb_link; /* link in server's promises list */
66 struct list_head cb_hash_link; /* link in master callback hash */
67 struct afs_timer cb_timeout; /* timeout on promise */
68 unsigned cb_version; /* callback version */
69 unsigned cb_expiry; /* callback expiry time */
70 afs_callback_type_t cb_type; /* type of callback */
71};
72
73static inline struct afs_vnode *AFS_FS_I(struct inode *inode)
74{
75 return container_of(inode,struct afs_vnode,vfs_inode);
76}
77
78static inline struct inode *AFS_VNODE_TO_I(struct afs_vnode *vnode)
79{
80 return &vnode->vfs_inode;
81}
82
83extern int afs_vnode_fetch_status(struct afs_vnode *vnode);
84
85extern int afs_vnode_fetch_data(struct afs_vnode *vnode,
86 struct afs_rxfs_fetch_descriptor *desc);
87
88extern int afs_vnode_give_up_callback(struct afs_vnode *vnode);
89
90extern struct afs_timer_ops afs_vnode_cb_timed_out_ops;
91
92#endif /* __KERNEL__ */
93
94#endif /* _LINUX_AFS_VNODE_H */
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index 768c6dbd323a..dd160cada45d 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -1,6 +1,6 @@
1/* volume.c: AFS volume management 1/* AFS volume management
2 * 2 *
3 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved. 3 * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com) 4 * Written by David Howells (dhowells@redhat.com)
5 * 5 *
6 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
@@ -15,35 +15,10 @@
15#include <linux/slab.h> 15#include <linux/slab.h>
16#include <linux/fs.h> 16#include <linux/fs.h>
17#include <linux/pagemap.h> 17#include <linux/pagemap.h>
18#include "volume.h"
19#include "vnode.h"
20#include "cell.h"
21#include "cache.h"
22#include "cmservice.h"
23#include "fsclient.h"
24#include "vlclient.h"
25#include "internal.h" 18#include "internal.h"
26 19
27#ifdef __KDEBUG
28static const char *afs_voltypes[] = { "R/W", "R/O", "BAK" }; 20static const char *afs_voltypes[] = { "R/W", "R/O", "BAK" };
29#endif
30
31#ifdef AFS_CACHING_SUPPORT
32static cachefs_match_val_t afs_volume_cache_match(void *target,
33 const void *entry);
34static void afs_volume_cache_update(void *source, void *entry);
35
36struct cachefs_index_def afs_volume_cache_index_def = {
37 .name = "volume",
38 .data_size = sizeof(struct afs_cache_vhash),
39 .keys[0] = { CACHEFS_INDEX_KEYS_BIN, 1 },
40 .keys[1] = { CACHEFS_INDEX_KEYS_BIN, 1 },
41 .match = afs_volume_cache_match,
42 .update = afs_volume_cache_update,
43};
44#endif
45 21
46/*****************************************************************************/
47/* 22/*
48 * lookup a volume by name 23 * lookup a volume by name
49 * - this can be one of the following: 24 * - this can be one of the following:
@@ -66,118 +41,52 @@ struct cachefs_index_def afs_volume_cache_index_def = {
66 * - Rule 3: If parent volume is R/W, then only mount R/W volume unless 41 * - Rule 3: If parent volume is R/W, then only mount R/W volume unless
67 * explicitly told otherwise 42 * explicitly told otherwise
68 */ 43 */
69int afs_volume_lookup(const char *name, struct afs_cell *cell, int rwpath, 44struct afs_volume *afs_volume_lookup(struct afs_mount_params *params)
70 struct afs_volume **_volume)
71{ 45{
72 struct afs_vlocation *vlocation = NULL; 46 struct afs_vlocation *vlocation = NULL;
73 struct afs_volume *volume = NULL; 47 struct afs_volume *volume = NULL;
74 afs_voltype_t type; 48 struct afs_server *server = NULL;
75 const char *cellname, *volname, *suffix;
76 char srvtmask; 49 char srvtmask;
77 int force, ret, loop, cellnamesz, volnamesz; 50 int ret, loop;
78
79 _enter("%s,,%d,", name, rwpath);
80
81 if (!name || (name[0] != '%' && name[0] != '#') || !name[1]) {
82 printk("kAFS: unparsable volume name\n");
83 return -EINVAL;
84 }
85
86 /* determine the type of volume we're looking for */
87 force = 0;
88 type = AFSVL_ROVOL;
89
90 if (rwpath || name[0] == '%') {
91 type = AFSVL_RWVOL;
92 force = 1;
93 }
94
95 suffix = strrchr(name, '.');
96 if (suffix) {
97 if (strcmp(suffix, ".readonly") == 0) {
98 type = AFSVL_ROVOL;
99 force = 1;
100 }
101 else if (strcmp(suffix, ".backup") == 0) {
102 type = AFSVL_BACKVOL;
103 force = 1;
104 }
105 else if (suffix[1] == 0) {
106 }
107 else {
108 suffix = NULL;
109 }
110 }
111 51
112 /* split the cell and volume names */ 52 _enter("{%*.*s,%d}",
113 name++; 53 params->volnamesz, params->volnamesz, params->volname, params->rwpath);
114 volname = strchr(name, ':');
115 if (volname) {
116 cellname = name;
117 cellnamesz = volname - name;
118 volname++;
119 }
120 else {
121 volname = name;
122 cellname = NULL;
123 cellnamesz = 0;
124 }
125
126 volnamesz = suffix ? suffix - volname : strlen(volname);
127
128 _debug("CELL:%*.*s [%p] VOLUME:%*.*s SUFFIX:%s TYPE:%d%s",
129 cellnamesz, cellnamesz, cellname ?: "", cell,
130 volnamesz, volnamesz, volname, suffix ?: "-",
131 type,
132 force ? " FORCE" : "");
133
134 /* lookup the cell record */
135 if (cellname || !cell) {
136 ret = afs_cell_lookup(cellname, cellnamesz, &cell);
137 if (ret<0) {
138 printk("kAFS: unable to lookup cell '%s'\n",
139 cellname ?: "");
140 goto error;
141 }
142 }
143 else {
144 afs_get_cell(cell);
145 }
146 54
147 /* lookup the volume location record */ 55 /* lookup the volume location record */
148 ret = afs_vlocation_lookup(cell, volname, volnamesz, &vlocation); 56 vlocation = afs_vlocation_lookup(params->cell, params->key,
149 if (ret < 0) 57 params->volname, params->volnamesz);
58 if (IS_ERR(vlocation)) {
59 ret = PTR_ERR(vlocation);
60 vlocation = NULL;
150 goto error; 61 goto error;
62 }
151 63
152 /* make the final decision on the type we want */ 64 /* make the final decision on the type we want */
153 ret = -ENOMEDIUM; 65 ret = -ENOMEDIUM;
154 if (force && !(vlocation->vldb.vidmask & (1 << type))) 66 if (params->force && !(vlocation->vldb.vidmask & (1 << params->type)))
155 goto error; 67 goto error;
156 68
157 srvtmask = 0; 69 srvtmask = 0;
158 for (loop = 0; loop < vlocation->vldb.nservers; loop++) 70 for (loop = 0; loop < vlocation->vldb.nservers; loop++)
159 srvtmask |= vlocation->vldb.srvtmask[loop]; 71 srvtmask |= vlocation->vldb.srvtmask[loop];
160 72
161 if (force) { 73 if (params->force) {
162 if (!(srvtmask & (1 << type))) 74 if (!(srvtmask & (1 << params->type)))
163 goto error; 75 goto error;
164 } 76 } else if (srvtmask & AFS_VOL_VTM_RO) {
165 else if (srvtmask & AFS_VOL_VTM_RO) { 77 params->type = AFSVL_ROVOL;
166 type = AFSVL_ROVOL; 78 } else if (srvtmask & AFS_VOL_VTM_RW) {
167 } 79 params->type = AFSVL_RWVOL;
168 else if (srvtmask & AFS_VOL_VTM_RW) { 80 } else {
169 type = AFSVL_RWVOL;
170 }
171 else {
172 goto error; 81 goto error;
173 } 82 }
174 83
175 down_write(&cell->vl_sem); 84 down_write(&params->cell->vl_sem);
176 85
177 /* is the volume already active? */ 86 /* is the volume already active? */
178 if (vlocation->vols[type]) { 87 if (vlocation->vols[params->type]) {
179 /* yes - re-use it */ 88 /* yes - re-use it */
180 volume = vlocation->vols[type]; 89 volume = vlocation->vols[params->type];
181 afs_get_volume(volume); 90 afs_get_volume(volume);
182 goto success; 91 goto success;
183 } 92 }
@@ -191,23 +100,24 @@ int afs_volume_lookup(const char *name, struct afs_cell *cell, int rwpath,
191 goto error_up; 100 goto error_up;
192 101
193 atomic_set(&volume->usage, 1); 102 atomic_set(&volume->usage, 1);
194 volume->type = type; 103 volume->type = params->type;
195 volume->type_force = force; 104 volume->type_force = params->force;
196 volume->cell = cell; 105 volume->cell = params->cell;
197 volume->vid = vlocation->vldb.vid[type]; 106 volume->vid = vlocation->vldb.vid[params->type];
198 107
199 init_rwsem(&volume->server_sem); 108 init_rwsem(&volume->server_sem);
200 109
201 /* look up all the applicable server records */ 110 /* look up all the applicable server records */
202 for (loop = 0; loop < 8; loop++) { 111 for (loop = 0; loop < 8; loop++) {
203 if (vlocation->vldb.srvtmask[loop] & (1 << volume->type)) { 112 if (vlocation->vldb.srvtmask[loop] & (1 << volume->type)) {
204 ret = afs_server_lookup( 113 server = afs_lookup_server(
205 volume->cell, 114 volume->cell, &vlocation->vldb.servers[loop]);
206 &vlocation->vldb.servers[loop], 115 if (IS_ERR(server)) {
207 &volume->servers[volume->nservers]); 116 ret = PTR_ERR(server);
208 if (ret < 0)
209 goto error_discard; 117 goto error_discard;
118 }
210 119
120 volume->servers[volume->nservers] = server;
211 volume->nservers++; 121 volume->nservers++;
212 } 122 }
213 } 123 }
@@ -223,35 +133,34 @@ int afs_volume_lookup(const char *name, struct afs_cell *cell, int rwpath,
223 afs_get_vlocation(vlocation); 133 afs_get_vlocation(vlocation);
224 volume->vlocation = vlocation; 134 volume->vlocation = vlocation;
225 135
226 vlocation->vols[type] = volume; 136 vlocation->vols[volume->type] = volume;
227 137
228 success: 138success:
229 _debug("kAFS selected %s volume %08x", 139 _debug("kAFS selected %s volume %08x",
230 afs_voltypes[volume->type], volume->vid); 140 afs_voltypes[volume->type], volume->vid);
231 *_volume = volume; 141 up_write(&params->cell->vl_sem);
232 ret = 0; 142 afs_put_vlocation(vlocation);
143 _leave(" = %p", volume);
144 return volume;
233 145
234 /* clean up */ 146 /* clean up */
235 error_up: 147error_up:
236 up_write(&cell->vl_sem); 148 up_write(&params->cell->vl_sem);
237 error: 149error:
238 afs_put_vlocation(vlocation); 150 afs_put_vlocation(vlocation);
239 afs_put_cell(cell); 151 _leave(" = %d", ret);
240 152 return ERR_PTR(ret);
241 _leave(" = %d (%p)", ret, volume);
242 return ret;
243 153
244 error_discard: 154error_discard:
245 up_write(&cell->vl_sem); 155 up_write(&params->cell->vl_sem);
246 156
247 for (loop = volume->nservers - 1; loop >= 0; loop--) 157 for (loop = volume->nservers - 1; loop >= 0; loop--)
248 afs_put_server(volume->servers[loop]); 158 afs_put_server(volume->servers[loop]);
249 159
250 kfree(volume); 160 kfree(volume);
251 goto error; 161 goto error;
252} /* end afs_volume_lookup() */ 162}
253 163
254/*****************************************************************************/
255/* 164/*
256 * destroy a volume record 165 * destroy a volume record
257 */ 166 */
@@ -265,10 +174,9 @@ void afs_put_volume(struct afs_volume *volume)
265 174
266 _enter("%p", volume); 175 _enter("%p", volume);
267 176
268 vlocation = volume->vlocation; 177 ASSERTCMP(atomic_read(&volume->usage), >, 0);
269 178
270 /* sanity check */ 179 vlocation = volume->vlocation;
271 BUG_ON(atomic_read(&volume->usage) <= 0);
272 180
273 /* to prevent a race, the decrement and the dequeue must be effectively 181 /* to prevent a race, the decrement and the dequeue must be effectively
274 * atomic */ 182 * atomic */
@@ -296,21 +204,27 @@ void afs_put_volume(struct afs_volume *volume)
296 kfree(volume); 204 kfree(volume);
297 205
298 _leave(" [destroyed]"); 206 _leave(" [destroyed]");
299} /* end afs_put_volume() */ 207}
300 208
301/*****************************************************************************/
302/* 209/*
303 * pick a server to use to try accessing this volume 210 * pick a server to use to try accessing this volume
304 * - returns with an elevated usage count on the server chosen 211 * - returns with an elevated usage count on the server chosen
305 */ 212 */
306int afs_volume_pick_fileserver(struct afs_volume *volume, 213struct afs_server *afs_volume_pick_fileserver(struct afs_vnode *vnode)
307 struct afs_server **_server)
308{ 214{
215 struct afs_volume *volume = vnode->volume;
309 struct afs_server *server; 216 struct afs_server *server;
310 int ret, state, loop; 217 int ret, state, loop;
311 218
312 _enter("%s", volume->vlocation->vldb.name); 219 _enter("%s", volume->vlocation->vldb.name);
313 220
221 /* stick with the server we're already using if we can */
222 if (vnode->server && vnode->server->fs_state == 0) {
223 afs_get_server(vnode->server);
224 _leave(" = %p [current]", vnode->server);
225 return vnode->server;
226 }
227
314 down_read(&volume->server_sem); 228 down_read(&volume->server_sem);
315 229
316 /* handle the no-server case */ 230 /* handle the no-server case */
@@ -318,7 +232,7 @@ int afs_volume_pick_fileserver(struct afs_volume *volume,
318 ret = volume->rjservers ? -ENOMEDIUM : -ESTALE; 232 ret = volume->rjservers ? -ENOMEDIUM : -ESTALE;
319 up_read(&volume->server_sem); 233 up_read(&volume->server_sem);
320 _leave(" = %d [no servers]", ret); 234 _leave(" = %d [no servers]", ret);
321 return ret; 235 return ERR_PTR(ret);
322 } 236 }
323 237
324 /* basically, just search the list for the first live server and use 238 /* basically, just search the list for the first live server and use
@@ -328,15 +242,16 @@ int afs_volume_pick_fileserver(struct afs_volume *volume,
328 server = volume->servers[loop]; 242 server = volume->servers[loop];
329 state = server->fs_state; 243 state = server->fs_state;
330 244
245 _debug("consider %d [%d]", loop, state);
246
331 switch (state) { 247 switch (state) {
332 /* found an apparently healthy server */ 248 /* found an apparently healthy server */
333 case 0: 249 case 0:
334 afs_get_server(server); 250 afs_get_server(server);
335 up_read(&volume->server_sem); 251 up_read(&volume->server_sem);
336 *_server = server; 252 _leave(" = %p (picked %08x)",
337 _leave(" = 0 (picked %08x)", 253 server, ntohl(server->addr.s_addr));
338 ntohl(server->addr.s_addr)); 254 return server;
339 return 0;
340 255
341 case -ENETUNREACH: 256 case -ENETUNREACH:
342 if (ret == 0) 257 if (ret == 0)
@@ -372,20 +287,21 @@ int afs_volume_pick_fileserver(struct afs_volume *volume,
372 */ 287 */
373 up_read(&volume->server_sem); 288 up_read(&volume->server_sem);
374 _leave(" = %d", ret); 289 _leave(" = %d", ret);
375 return ret; 290 return ERR_PTR(ret);
376} /* end afs_volume_pick_fileserver() */ 291}
377 292
378/*****************************************************************************/
379/* 293/*
380 * release a server after use 294 * release a server after use
381 * - releases the ref on the server struct that was acquired by picking 295 * - releases the ref on the server struct that was acquired by picking
382 * - records result of using a particular server to access a volume 296 * - records result of using a particular server to access a volume
383 * - return 0 to try again, 1 if okay or to issue error 297 * - return 0 to try again, 1 if okay or to issue error
298 * - the caller must release the server struct if result was 0
384 */ 299 */
385int afs_volume_release_fileserver(struct afs_volume *volume, 300int afs_volume_release_fileserver(struct afs_vnode *vnode,
386 struct afs_server *server, 301 struct afs_server *server,
387 int result) 302 int result)
388{ 303{
304 struct afs_volume *volume = vnode->volume;
389 unsigned loop; 305 unsigned loop;
390 306
391 _enter("%s,%08x,%d", 307 _enter("%s,%08x,%d",
@@ -396,14 +312,16 @@ int afs_volume_release_fileserver(struct afs_volume *volume,
396 /* success */ 312 /* success */
397 case 0: 313 case 0:
398 server->fs_act_jif = jiffies; 314 server->fs_act_jif = jiffies;
399 break; 315 server->fs_state = 0;
316 _leave("");
317 return 1;
400 318
401 /* the fileserver denied all knowledge of the volume */ 319 /* the fileserver denied all knowledge of the volume */
402 case -ENOMEDIUM: 320 case -ENOMEDIUM:
403 server->fs_act_jif = jiffies; 321 server->fs_act_jif = jiffies;
404 down_write(&volume->server_sem); 322 down_write(&volume->server_sem);
405 323
406 /* first, find where the server is in the active list (if it 324 /* firstly, find where the server is in the active list (if it
407 * is) */ 325 * is) */
408 for (loop = 0; loop < volume->nservers; loop++) 326 for (loop = 0; loop < volume->nservers; loop++)
409 if (volume->servers[loop] == server) 327 if (volume->servers[loop] == server)
@@ -441,6 +359,7 @@ int afs_volume_release_fileserver(struct afs_volume *volume,
441 case -ENETUNREACH: 359 case -ENETUNREACH:
442 case -EHOSTUNREACH: 360 case -EHOSTUNREACH:
443 case -ECONNREFUSED: 361 case -ECONNREFUSED:
362 case -ETIME:
444 case -ETIMEDOUT: 363 case -ETIMEDOUT:
445 case -EREMOTEIO: 364 case -EREMOTEIO:
446 /* mark the server as dead 365 /* mark the server as dead
@@ -460,60 +379,17 @@ int afs_volume_release_fileserver(struct afs_volume *volume,
460 server->fs_act_jif = jiffies; 379 server->fs_act_jif = jiffies;
461 case -ENOMEM: 380 case -ENOMEM:
462 case -ENONET: 381 case -ENONET:
463 break; 382 /* tell the caller to accept the result */
383 afs_put_server(server);
384 _leave(" [local failure]");
385 return 1;
464 } 386 }
465 387
466 /* tell the caller to accept the result */
467 afs_put_server(server);
468 _leave("");
469 return 1;
470
471 /* tell the caller to loop around and try the next server */ 388 /* tell the caller to loop around and try the next server */
472 try_next_server_upw: 389try_next_server_upw:
473 up_write(&volume->server_sem); 390 up_write(&volume->server_sem);
474 try_next_server: 391try_next_server:
475 afs_put_server(server); 392 afs_put_server(server);
476 _leave(" [try next server]"); 393 _leave(" [try next server]");
477 return 0; 394 return 0;
478 395}
479} /* end afs_volume_release_fileserver() */
480
481/*****************************************************************************/
482/*
483 * match a volume hash record stored in the cache
484 */
485#ifdef AFS_CACHING_SUPPORT
486static cachefs_match_val_t afs_volume_cache_match(void *target,
487 const void *entry)
488{
489 const struct afs_cache_vhash *vhash = entry;
490 struct afs_volume *volume = target;
491
492 _enter("{%u},{%u}", volume->type, vhash->vtype);
493
494 if (volume->type == vhash->vtype) {
495 _leave(" = SUCCESS");
496 return CACHEFS_MATCH_SUCCESS;
497 }
498
499 _leave(" = FAILED");
500 return CACHEFS_MATCH_FAILED;
501} /* end afs_volume_cache_match() */
502#endif
503
504/*****************************************************************************/
505/*
506 * update a volume hash record stored in the cache
507 */
508#ifdef AFS_CACHING_SUPPORT
509static void afs_volume_cache_update(void *source, void *entry)
510{
511 struct afs_cache_vhash *vhash = entry;
512 struct afs_volume *volume = source;
513
514 _enter("");
515
516 vhash->vtype = volume->type;
517
518} /* end afs_volume_cache_update() */
519#endif
diff --git a/fs/afs/volume.h b/fs/afs/volume.h
deleted file mode 100644
index bfdcf19ba3f3..000000000000
--- a/fs/afs/volume.h
+++ /dev/null
@@ -1,140 +0,0 @@
1/* volume.h: AFS volume management
2 *
3 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#ifndef _LINUX_AFS_VOLUME_H
13#define _LINUX_AFS_VOLUME_H
14
15#include "types.h"
16#include "fsclient.h"
17#include "kafstimod.h"
18#include "kafsasyncd.h"
19#include "cache.h"
20
21typedef enum {
22 AFS_VLUPD_SLEEP, /* sleeping waiting for update timer to fire */
23 AFS_VLUPD_PENDING, /* on pending queue */
24 AFS_VLUPD_INPROGRESS, /* op in progress */
25 AFS_VLUPD_BUSYSLEEP, /* sleeping because server returned EBUSY */
26
27} __attribute__((packed)) afs_vlocation_upd_t;
28
29/*****************************************************************************/
30/*
31 * entry in the cached volume location catalogue
32 */
33struct afs_cache_vlocation
34{
35 uint8_t name[64]; /* volume name (lowercase, padded with NULs) */
36 uint8_t nservers; /* number of entries used in servers[] */
37 uint8_t vidmask; /* voltype mask for vid[] */
38 uint8_t srvtmask[8]; /* voltype masks for servers[] */
39#define AFS_VOL_VTM_RW 0x01 /* R/W version of the volume is available (on this server) */
40#define AFS_VOL_VTM_RO 0x02 /* R/O version of the volume is available (on this server) */
41#define AFS_VOL_VTM_BAK 0x04 /* backup version of the volume is available (on this server) */
42
43 afs_volid_t vid[3]; /* volume IDs for R/W, R/O and Bak volumes */
44 struct in_addr servers[8]; /* fileserver addresses */
45 time_t rtime; /* last retrieval time */
46};
47
48#ifdef AFS_CACHING_SUPPORT
49extern struct cachefs_index_def afs_vlocation_cache_index_def;
50#endif
51
52/*****************************************************************************/
53/*
54 * volume -> vnode hash table entry
55 */
56struct afs_cache_vhash
57{
58 afs_voltype_t vtype; /* which volume variation */
59 uint8_t hash_bucket; /* which hash bucket this represents */
60} __attribute__((packed));
61
62#ifdef AFS_CACHING_SUPPORT
63extern struct cachefs_index_def afs_volume_cache_index_def;
64#endif
65
66/*****************************************************************************/
67/*
68 * AFS volume location record
69 */
70struct afs_vlocation
71{
72 atomic_t usage;
73 struct list_head link; /* link in cell volume location list */
74 struct afs_timer timeout; /* decaching timer */
75 struct afs_cell *cell; /* cell to which volume belongs */
76#ifdef AFS_CACHING_SUPPORT
77 struct cachefs_cookie *cache; /* caching cookie */
78#endif
79 struct afs_cache_vlocation vldb; /* volume information DB record */
80 struct afs_volume *vols[3]; /* volume access record pointer (index by type) */
81 rwlock_t lock; /* access lock */
82 unsigned long read_jif; /* time at which last read from vlserver */
83 struct afs_timer upd_timer; /* update timer */
84 struct afs_async_op upd_op; /* update operation */
85 afs_vlocation_upd_t upd_state; /* update state */
86 unsigned short upd_first_svix; /* first server index during update */
87 unsigned short upd_curr_svix; /* current server index during update */
88 unsigned short upd_rej_cnt; /* ENOMEDIUM count during update */
89 unsigned short upd_busy_cnt; /* EBUSY count during update */
90 unsigned short valid; /* T if valid */
91};
92
93extern int afs_vlocation_lookup(struct afs_cell *cell,
94 const char *name,
95 unsigned namesz,
96 struct afs_vlocation **_vlocation);
97
98#define afs_get_vlocation(V) do { atomic_inc(&(V)->usage); } while(0)
99
100extern void afs_put_vlocation(struct afs_vlocation *vlocation);
101extern void afs_vlocation_do_timeout(struct afs_vlocation *vlocation);
102
103/*****************************************************************************/
104/*
105 * AFS volume access record
106 */
107struct afs_volume
108{
109 atomic_t usage;
110 struct afs_cell *cell; /* cell to which belongs (unrefd ptr) */
111 struct afs_vlocation *vlocation; /* volume location */
112#ifdef AFS_CACHING_SUPPORT
113 struct cachefs_cookie *cache; /* caching cookie */
114#endif
115 afs_volid_t vid; /* volume ID */
116 afs_voltype_t type; /* type of volume */
117 char type_force; /* force volume type (suppress R/O -> R/W) */
118 unsigned short nservers; /* number of server slots filled */
119 unsigned short rjservers; /* number of servers discarded due to -ENOMEDIUM */
120 struct afs_server *servers[8]; /* servers on which volume resides (ordered) */
121 struct rw_semaphore server_sem; /* lock for accessing current server */
122};
123
124extern int afs_volume_lookup(const char *name,
125 struct afs_cell *cell,
126 int rwpath,
127 struct afs_volume **_volume);
128
129#define afs_get_volume(V) do { atomic_inc(&(V)->usage); } while(0)
130
131extern void afs_put_volume(struct afs_volume *volume);
132
133extern int afs_volume_pick_fileserver(struct afs_volume *volume,
134 struct afs_server **_server);
135
136extern int afs_volume_release_fileserver(struct afs_volume *volume,
137 struct afs_server *server,
138 int result);
139
140#endif /* _LINUX_AFS_VOLUME_H */
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 8b1c5d8bf4ef..c68b055fa26e 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -266,6 +266,23 @@ static int do_siocgstamp(unsigned int fd, unsigned int cmd, unsigned long arg)
266 return err; 266 return err;
267} 267}
268 268
269static int do_siocgstampns(unsigned int fd, unsigned int cmd, unsigned long arg)
270{
271 struct compat_timespec __user *up = compat_ptr(arg);
272 struct timespec kts;
273 mm_segment_t old_fs = get_fs();
274 int err;
275
276 set_fs(KERNEL_DS);
277 err = sys_ioctl(fd, cmd, (unsigned long)&kts);
278 set_fs(old_fs);
279 if (!err) {
280 err = put_user(kts.tv_sec, &up->tv_sec);
281 err |= __put_user(kts.tv_nsec, &up->tv_nsec);
282 }
283 return err;
284}
285
269struct ifmap32 { 286struct ifmap32 {
270 compat_ulong_t mem_start; 287 compat_ulong_t mem_start;
271 compat_ulong_t mem_end; 288 compat_ulong_t mem_end;
@@ -2437,6 +2454,7 @@ HANDLE_IOCTL(SIOCBRDELIF, dev_ifsioc)
2437/* Note SIOCRTMSG is no longer, so this is safe and * the user would have seen just an -EINVAL anyways. */ 2454/* Note SIOCRTMSG is no longer, so this is safe and * the user would have seen just an -EINVAL anyways. */
2438HANDLE_IOCTL(SIOCRTMSG, ret_einval) 2455HANDLE_IOCTL(SIOCRTMSG, ret_einval)
2439HANDLE_IOCTL(SIOCGSTAMP, do_siocgstamp) 2456HANDLE_IOCTL(SIOCGSTAMP, do_siocgstamp)
2457HANDLE_IOCTL(SIOCGSTAMPNS, do_siocgstampns)
2440#endif 2458#endif
2441#ifdef CONFIG_BLOCK 2459#ifdef CONFIG_BLOCK
2442HANDLE_IOCTL(HDIO_GETGEO, hdio_getgeo) 2460HANDLE_IOCTL(HDIO_GETGEO, hdio_getgeo)
diff --git a/fs/ecryptfs/netlink.c b/fs/ecryptfs/netlink.c
index e3aa2253c850..fe9186312d7c 100644
--- a/fs/ecryptfs/netlink.c
+++ b/fs/ecryptfs/netlink.c
@@ -97,7 +97,7 @@ out:
97 */ 97 */
98static int ecryptfs_process_nl_response(struct sk_buff *skb) 98static int ecryptfs_process_nl_response(struct sk_buff *skb)
99{ 99{
100 struct nlmsghdr *nlh = (struct nlmsghdr*)skb->data; 100 struct nlmsghdr *nlh = nlmsg_hdr(skb);
101 struct ecryptfs_message *msg = NLMSG_DATA(nlh); 101 struct ecryptfs_message *msg = NLMSG_DATA(nlh);
102 int rc; 102 int rc;
103 103
@@ -181,7 +181,7 @@ receive:
181 "rc = [%d]\n", rc); 181 "rc = [%d]\n", rc);
182 return; 182 return;
183 } 183 }
184 nlh = (struct nlmsghdr *)skb->data; 184 nlh = nlmsg_hdr(skb);
185 if (!NLMSG_OK(nlh, skb->len)) { 185 if (!NLMSG_OK(nlh, skb->len)) {
186 ecryptfs_printk(KERN_ERR, "Received corrupt netlink " 186 ecryptfs_printk(KERN_ERR, "Received corrupt netlink "
187 "message\n"); 187 "message\n");
@@ -229,7 +229,7 @@ int ecryptfs_init_netlink(void)
229 229
230 ecryptfs_nl_sock = netlink_kernel_create(NETLINK_ECRYPTFS, 0, 230 ecryptfs_nl_sock = netlink_kernel_create(NETLINK_ECRYPTFS, 0,
231 ecryptfs_receive_nl_message, 231 ecryptfs_receive_nl_message,
232 THIS_MODULE); 232 NULL, THIS_MODULE);
233 if (!ecryptfs_nl_sock) { 233 if (!ecryptfs_nl_sock) {
234 rc = -EIO; 234 rc = -EIO;
235 ecryptfs_printk(KERN_ERR, "Failed to create netlink socket\n"); 235 ecryptfs_printk(KERN_ERR, "Failed to create netlink socket\n");
diff --git a/include/asm-alpha/socket.h b/include/asm-alpha/socket.h
index d22ab97ea72e..1fede7f92860 100644
--- a/include/asm-alpha/socket.h
+++ b/include/asm-alpha/socket.h
@@ -52,6 +52,8 @@
52 52
53#define SO_PEERSEC 30 53#define SO_PEERSEC 30
54#define SO_PASSSEC 34 54#define SO_PASSSEC 34
55#define SO_TIMESTAMPNS 35
56#define SCM_TIMESTAMPNS SO_TIMESTAMPNS
55 57
56/* Security levels - as per NRL IPv6 - don't actually do anything */ 58/* Security levels - as per NRL IPv6 - don't actually do anything */
57#define SO_SECURITY_AUTHENTICATION 19 59#define SO_SECURITY_AUTHENTICATION 19
diff --git a/include/asm-alpha/sockios.h b/include/asm-alpha/sockios.h
index e4961a740e5f..7932c7ab4a4d 100644
--- a/include/asm-alpha/sockios.h
+++ b/include/asm-alpha/sockios.h
@@ -10,6 +10,7 @@
10#define SIOCSPGRP _IOW('s', 8, pid_t) 10#define SIOCSPGRP _IOW('s', 8, pid_t)
11#define SIOCGPGRP _IOR('s', 9, pid_t) 11#define SIOCGPGRP _IOR('s', 9, pid_t)
12 12
13#define SIOCGSTAMP 0x8906 /* Get stamp - linux-specific */ 13#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */
14#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */
14 15
15#endif /* _ASM_ALPHA_SOCKIOS_H */ 16#endif /* _ASM_ALPHA_SOCKIOS_H */
diff --git a/include/asm-arm/div64.h b/include/asm-arm/div64.h
index 37e0a96e8789..0b5f881c3d85 100644
--- a/include/asm-arm/div64.h
+++ b/include/asm-arm/div64.h
@@ -2,6 +2,7 @@
2#define __ASM_ARM_DIV64 2#define __ASM_ARM_DIV64
3 3
4#include <asm/system.h> 4#include <asm/system.h>
5#include <linux/types.h>
5 6
6/* 7/*
7 * The semantics of do_div() are: 8 * The semantics of do_div() are:
@@ -223,4 +224,6 @@
223 224
224#endif 225#endif
225 226
227extern uint64_t div64_64(uint64_t dividend, uint64_t divisor);
228
226#endif 229#endif
diff --git a/include/asm-arm/socket.h b/include/asm-arm/socket.h
index 19f7df702b06..65a1a64bf934 100644
--- a/include/asm-arm/socket.h
+++ b/include/asm-arm/socket.h
@@ -49,5 +49,7 @@
49 49
50#define SO_PEERSEC 31 50#define SO_PEERSEC 31
51#define SO_PASSSEC 34 51#define SO_PASSSEC 34
52#define SO_TIMESTAMPNS 35
53#define SCM_TIMESTAMPNS SO_TIMESTAMPNS
52 54
53#endif /* _ASM_SOCKET_H */ 55#endif /* _ASM_SOCKET_H */
diff --git a/include/asm-arm/sockios.h b/include/asm-arm/sockios.h
index 77c34087d513..a2588a2512df 100644
--- a/include/asm-arm/sockios.h
+++ b/include/asm-arm/sockios.h
@@ -7,6 +7,7 @@
7#define FIOGETOWN 0x8903 7#define FIOGETOWN 0x8903
8#define SIOCGPGRP 0x8904 8#define SIOCGPGRP 0x8904
9#define SIOCATMARK 0x8905 9#define SIOCATMARK 0x8905
10#define SIOCGSTAMP 0x8906 /* Get stamp */ 10#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */
11#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */
11 12
12#endif 13#endif
diff --git a/include/asm-arm26/socket.h b/include/asm-arm26/socket.h
index 19f7df702b06..65a1a64bf934 100644
--- a/include/asm-arm26/socket.h
+++ b/include/asm-arm26/socket.h
@@ -49,5 +49,7 @@
49 49
50#define SO_PEERSEC 31 50#define SO_PEERSEC 31
51#define SO_PASSSEC 34 51#define SO_PASSSEC 34
52#define SO_TIMESTAMPNS 35
53#define SCM_TIMESTAMPNS SO_TIMESTAMPNS
52 54
53#endif /* _ASM_SOCKET_H */ 55#endif /* _ASM_SOCKET_H */
diff --git a/include/asm-arm26/sockios.h b/include/asm-arm26/sockios.h
index 77c34087d513..a2588a2512df 100644
--- a/include/asm-arm26/sockios.h
+++ b/include/asm-arm26/sockios.h
@@ -7,6 +7,7 @@
7#define FIOGETOWN 0x8903 7#define FIOGETOWN 0x8903
8#define SIOCGPGRP 0x8904 8#define SIOCGPGRP 0x8904
9#define SIOCATMARK 0x8905 9#define SIOCATMARK 0x8905
10#define SIOCGSTAMP 0x8906 /* Get stamp */ 10#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */
11#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */
11 12
12#endif 13#endif
diff --git a/include/asm-avr32/socket.h b/include/asm-avr32/socket.h
index 543229de8173..a0d0507a5034 100644
--- a/include/asm-avr32/socket.h
+++ b/include/asm-avr32/socket.h
@@ -49,5 +49,7 @@
49 49
50#define SO_PEERSEC 31 50#define SO_PEERSEC 31
51#define SO_PASSSEC 34 51#define SO_PASSSEC 34
52#define SO_TIMESTAMPNS 35
53#define SCM_TIMESTAMPNS SO_TIMESTAMPNS
52 54
53#endif /* __ASM_AVR32_SOCKET_H */ 55#endif /* __ASM_AVR32_SOCKET_H */
diff --git a/include/asm-avr32/sockios.h b/include/asm-avr32/sockios.h
index 84f3d65b3b3b..0802d742f97d 100644
--- a/include/asm-avr32/sockios.h
+++ b/include/asm-avr32/sockios.h
@@ -7,6 +7,7 @@
7#define FIOGETOWN 0x8903 7#define FIOGETOWN 0x8903
8#define SIOCGPGRP 0x8904 8#define SIOCGPGRP 0x8904
9#define SIOCATMARK 0x8905 9#define SIOCATMARK 0x8905
10#define SIOCGSTAMP 0x8906 /* Get stamp */ 10#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */
11#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */
11 12
12#endif /* __ASM_AVR32_SOCKIOS_H */ 13#endif /* __ASM_AVR32_SOCKIOS_H */
diff --git a/include/asm-cris/socket.h b/include/asm-cris/socket.h
index 01cfdf1d6d33..5b18dfdf1748 100644
--- a/include/asm-cris/socket.h
+++ b/include/asm-cris/socket.h
@@ -51,6 +51,8 @@
51 51
52#define SO_PEERSEC 31 52#define SO_PEERSEC 31
53#define SO_PASSSEC 34 53#define SO_PASSSEC 34
54#define SO_TIMESTAMPNS 35
55#define SCM_TIMESTAMPNS SO_TIMESTAMPNS
54 56
55#endif /* _ASM_SOCKET_H */ 57#endif /* _ASM_SOCKET_H */
56 58
diff --git a/include/asm-cris/sockios.h b/include/asm-cris/sockios.h
index 6c4012f0b29f..cfe7bfecf599 100644
--- a/include/asm-cris/sockios.h
+++ b/include/asm-cris/sockios.h
@@ -7,6 +7,7 @@
7#define FIOGETOWN 0x8903 7#define FIOGETOWN 0x8903
8#define SIOCGPGRP 0x8904 8#define SIOCGPGRP 0x8904
9#define SIOCATMARK 0x8905 9#define SIOCATMARK 0x8905
10#define SIOCGSTAMP 0x8906 /* Get stamp */ 10#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */
11#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */
11 12
12#endif 13#endif
diff --git a/include/asm-frv/socket.h b/include/asm-frv/socket.h
index 31db18fc871f..a823befd11dd 100644
--- a/include/asm-frv/socket.h
+++ b/include/asm-frv/socket.h
@@ -49,6 +49,8 @@
49 49
50#define SO_PEERSEC 31 50#define SO_PEERSEC 31
51#define SO_PASSSEC 34 51#define SO_PASSSEC 34
52#define SO_TIMESTAMPNS 35
53#define SCM_TIMESTAMPNS SO_TIMESTAMPNS
52 54
53#endif /* _ASM_SOCKET_H */ 55#endif /* _ASM_SOCKET_H */
54 56
diff --git a/include/asm-frv/sockios.h b/include/asm-frv/sockios.h
index 8a6e4b2074b7..5dbdd13e6de3 100644
--- a/include/asm-frv/sockios.h
+++ b/include/asm-frv/sockios.h
@@ -7,7 +7,8 @@
7#define FIOGETOWN 0x8903 7#define FIOGETOWN 0x8903
8#define SIOCGPGRP 0x8904 8#define SIOCGPGRP 0x8904
9#define SIOCATMARK 0x8905 9#define SIOCATMARK 0x8905
10#define SIOCGSTAMP 0x8906 /* Get stamp */ 10#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */
11#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */
11 12
12#endif /* _ASM_SOCKIOS__ */ 13#endif /* _ASM_SOCKIOS__ */
13 14
diff --git a/include/asm-generic/div64.h b/include/asm-generic/div64.h
index 8f4e3193342e..a4a49370793c 100644
--- a/include/asm-generic/div64.h
+++ b/include/asm-generic/div64.h
@@ -30,6 +30,11 @@
30 __rem; \ 30 __rem; \
31 }) 31 })
32 32
33static inline uint64_t div64_64(uint64_t dividend, uint64_t divisor)
34{
35 return dividend / divisor;
36}
37
33#elif BITS_PER_LONG == 32 38#elif BITS_PER_LONG == 32
34 39
35extern uint32_t __div64_32(uint64_t *dividend, uint32_t divisor); 40extern uint32_t __div64_32(uint64_t *dividend, uint32_t divisor);
@@ -49,6 +54,8 @@ extern uint32_t __div64_32(uint64_t *dividend, uint32_t divisor);
49 __rem; \ 54 __rem; \
50 }) 55 })
51 56
57extern uint64_t div64_64(uint64_t dividend, uint64_t divisor);
58
52#else /* BITS_PER_LONG == ?? */ 59#else /* BITS_PER_LONG == ?? */
53 60
54# error do_div() does not yet support the C64 61# error do_div() does not yet support the C64
diff --git a/include/asm-h8300/socket.h b/include/asm-h8300/socket.h
index ebc830fee0d0..39911d8c9684 100644
--- a/include/asm-h8300/socket.h
+++ b/include/asm-h8300/socket.h
@@ -49,5 +49,7 @@
49 49
50#define SO_PEERSEC 31 50#define SO_PEERSEC 31
51#define SO_PASSSEC 34 51#define SO_PASSSEC 34
52#define SO_TIMESTAMPNS 35
53#define SCM_TIMESTAMPNS SO_TIMESTAMPNS
52 54
53#endif /* _ASM_SOCKET_H */ 55#endif /* _ASM_SOCKET_H */
diff --git a/include/asm-h8300/sockios.h b/include/asm-h8300/sockios.h
index d005d9594cc6..e9c7ec810c23 100644
--- a/include/asm-h8300/sockios.h
+++ b/include/asm-h8300/sockios.h
@@ -7,6 +7,7 @@
7#define FIOGETOWN 0x8903 7#define FIOGETOWN 0x8903
8#define SIOCGPGRP 0x8904 8#define SIOCGPGRP 0x8904
9#define SIOCATMARK 0x8905 9#define SIOCATMARK 0x8905
10#define SIOCGSTAMP 0x8906 /* Get stamp */ 10#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */
11#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */
11 12
12#endif /* __ARCH_H8300_SOCKIOS__ */ 13#endif /* __ARCH_H8300_SOCKIOS__ */
diff --git a/include/asm-i386/div64.h b/include/asm-i386/div64.h
index 75c67c785bb8..438e980068bd 100644
--- a/include/asm-i386/div64.h
+++ b/include/asm-i386/div64.h
@@ -1,6 +1,8 @@
1#ifndef __I386_DIV64 1#ifndef __I386_DIV64
2#define __I386_DIV64 2#define __I386_DIV64
3 3
4#include <linux/types.h>
5
4/* 6/*
5 * do_div() is NOT a C function. It wants to return 7 * do_div() is NOT a C function. It wants to return
6 * two values (the quotient and the remainder), but 8 * two values (the quotient and the remainder), but
@@ -45,4 +47,6 @@ div_ll_X_l_rem(long long divs, long div, long *rem)
45 return dum2; 47 return dum2;
46 48
47} 49}
50
51extern uint64_t div64_64(uint64_t dividend, uint64_t divisor);
48#endif 52#endif
diff --git a/include/asm-i386/socket.h b/include/asm-i386/socket.h
index 5755d57c4e95..99ca648b94c5 100644
--- a/include/asm-i386/socket.h
+++ b/include/asm-i386/socket.h
@@ -49,5 +49,7 @@
49 49
50#define SO_PEERSEC 31 50#define SO_PEERSEC 31
51#define SO_PASSSEC 34 51#define SO_PASSSEC 34
52#define SO_TIMESTAMPNS 35
53#define SCM_TIMESTAMPNS SO_TIMESTAMPNS
52 54
53#endif /* _ASM_SOCKET_H */ 55#endif /* _ASM_SOCKET_H */
diff --git a/include/asm-i386/sockios.h b/include/asm-i386/sockios.h
index 6b747f8e228b..ff528c7d255c 100644
--- a/include/asm-i386/sockios.h
+++ b/include/asm-i386/sockios.h
@@ -7,6 +7,7 @@
7#define FIOGETOWN 0x8903 7#define FIOGETOWN 0x8903
8#define SIOCGPGRP 0x8904 8#define SIOCGPGRP 0x8904
9#define SIOCATMARK 0x8905 9#define SIOCATMARK 0x8905
10#define SIOCGSTAMP 0x8906 /* Get stamp */ 10#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */
11#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */
11 12
12#endif 13#endif
diff --git a/include/asm-ia64/socket.h b/include/asm-ia64/socket.h
index d638ef3d50c3..9e42ce43cfbe 100644
--- a/include/asm-ia64/socket.h
+++ b/include/asm-ia64/socket.h
@@ -58,5 +58,7 @@
58 58
59#define SO_PEERSEC 31 59#define SO_PEERSEC 31
60#define SO_PASSSEC 34 60#define SO_PASSSEC 34
61#define SO_TIMESTAMPNS 35
62#define SCM_TIMESTAMPNS SO_TIMESTAMPNS
61 63
62#endif /* _ASM_IA64_SOCKET_H */ 64#endif /* _ASM_IA64_SOCKET_H */
diff --git a/include/asm-ia64/sockios.h b/include/asm-ia64/sockios.h
index cf94857c8a54..15c92468ad38 100644
--- a/include/asm-ia64/sockios.h
+++ b/include/asm-ia64/sockios.h
@@ -14,6 +14,7 @@
14#define FIOGETOWN 0x8903 14#define FIOGETOWN 0x8903
15#define SIOCGPGRP 0x8904 15#define SIOCGPGRP 0x8904
16#define SIOCATMARK 0x8905 16#define SIOCATMARK 0x8905
17#define SIOCGSTAMP 0x8906 /* Get stamp */ 17#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */
18#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */
18 19
19#endif /* _ASM_IA64_SOCKIOS_H */ 20#endif /* _ASM_IA64_SOCKIOS_H */
diff --git a/include/asm-m32r/socket.h b/include/asm-m32r/socket.h
index acdf748fcdc8..793d5d30c850 100644
--- a/include/asm-m32r/socket.h
+++ b/include/asm-m32r/socket.h
@@ -49,5 +49,7 @@
49 49
50#define SO_PEERSEC 31 50#define SO_PEERSEC 31
51#define SO_PASSSEC 34 51#define SO_PASSSEC 34
52#define SO_TIMESTAMPNS 35
53#define SCM_TIMESTAMPNS SO_TIMESTAMPNS
52 54
53#endif /* _ASM_M32R_SOCKET_H */ 55#endif /* _ASM_M32R_SOCKET_H */
diff --git a/include/asm-m32r/sockios.h b/include/asm-m32r/sockios.h
index f89962e231fe..6c1fb9b43bdb 100644
--- a/include/asm-m32r/sockios.h
+++ b/include/asm-m32r/sockios.h
@@ -7,6 +7,7 @@
7#define FIOGETOWN 0x8903 7#define FIOGETOWN 0x8903
8#define SIOCGPGRP 0x8904 8#define SIOCGPGRP 0x8904
9#define SIOCATMARK 0x8905 9#define SIOCATMARK 0x8905
10#define SIOCGSTAMP 0x8906 /* Get stamp */ 10#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */
11#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */
11 12
12#endif /* _ASM_M32R_SOCKIOS_H */ 13#endif /* _ASM_M32R_SOCKIOS_H */
diff --git a/include/asm-m68k/div64.h b/include/asm-m68k/div64.h
index 9f65de1a2480..33caad1628d4 100644
--- a/include/asm-m68k/div64.h
+++ b/include/asm-m68k/div64.h
@@ -1,6 +1,8 @@
1#ifndef _M68K_DIV64_H 1#ifndef _M68K_DIV64_H
2#define _M68K_DIV64_H 2#define _M68K_DIV64_H
3 3
4#include <linux/types.h>
5
4/* n = n / base; return rem; */ 6/* n = n / base; return rem; */
5 7
6#define do_div(n, base) ({ \ 8#define do_div(n, base) ({ \
@@ -23,4 +25,5 @@
23 __rem; \ 25 __rem; \
24}) 26})
25 27
28extern uint64_t div64_64(uint64_t dividend, uint64_t divisor);
26#endif /* _M68K_DIV64_H */ 29#endif /* _M68K_DIV64_H */
diff --git a/include/asm-m68k/socket.h b/include/asm-m68k/socket.h
index a5966ec005ae..6d21b90863ad 100644
--- a/include/asm-m68k/socket.h
+++ b/include/asm-m68k/socket.h
@@ -49,5 +49,7 @@
49 49
50#define SO_PEERSEC 31 50#define SO_PEERSEC 31
51#define SO_PASSSEC 34 51#define SO_PASSSEC 34
52#define SO_TIMESTAMPNS 35
53#define SCM_TIMESTAMPNS SO_TIMESTAMPNS
52 54
53#endif /* _ASM_SOCKET_H */ 55#endif /* _ASM_SOCKET_H */
diff --git a/include/asm-m68k/sockios.h b/include/asm-m68k/sockios.h
index 9b9ed973c24e..c04a23943cb7 100644
--- a/include/asm-m68k/sockios.h
+++ b/include/asm-m68k/sockios.h
@@ -7,6 +7,7 @@
7#define FIOGETOWN 0x8903 7#define FIOGETOWN 0x8903
8#define SIOCGPGRP 0x8904 8#define SIOCGPGRP 0x8904
9#define SIOCATMARK 0x8905 9#define SIOCATMARK 0x8905
10#define SIOCGSTAMP 0x8906 /* Get stamp */ 10#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */
11#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */
11 12
12#endif /* __ARCH_M68K_SOCKIOS__ */ 13#endif /* __ARCH_M68K_SOCKIOS__ */
diff --git a/include/asm-mips/div64.h b/include/asm-mips/div64.h
index d107832de1b6..66189f5f6399 100644
--- a/include/asm-mips/div64.h
+++ b/include/asm-mips/div64.h
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright (C) 2000, 2004 Maciej W. Rozycki 2 * Copyright (C) 2000, 2004 Maciej W. Rozycki
3 * Copyright (C) 2003 Ralf Baechle 3 * Copyright (C) 2003, 07 Ralf Baechle (ralf@linux-mips.org)
4 * 4 *
5 * This file is subject to the terms and conditions of the GNU General Public 5 * This file is subject to the terms and conditions of the GNU General Public
6 * License. See the file "COPYING" in the main directory of this archive 6 * License. See the file "COPYING" in the main directory of this archive
@@ -9,6 +9,8 @@
9#ifndef _ASM_DIV64_H 9#ifndef _ASM_DIV64_H
10#define _ASM_DIV64_H 10#define _ASM_DIV64_H
11 11
12#include <linux/types.h>
13
12#if (_MIPS_SZLONG == 32) 14#if (_MIPS_SZLONG == 32)
13 15
14#include <asm/compiler.h> 16#include <asm/compiler.h>
@@ -78,6 +80,8 @@
78 __quot = __quot << 32 | __low; \ 80 __quot = __quot << 32 | __low; \
79 (n) = __quot; \ 81 (n) = __quot; \
80 __mod; }) 82 __mod; })
83
84extern uint64_t div64_64(uint64_t dividend, uint64_t divisor);
81#endif /* (_MIPS_SZLONG == 32) */ 85#endif /* (_MIPS_SZLONG == 32) */
82 86
83#if (_MIPS_SZLONG == 64) 87#if (_MIPS_SZLONG == 64)
@@ -101,6 +105,11 @@
101 (n) = __quot; \ 105 (n) = __quot; \
102 __mod; }) 106 __mod; })
103 107
108static inline uint64_t div64_64(uint64_t dividend, uint64_t divisor)
109{
110 return dividend / divisor;
111}
112
104#endif /* (_MIPS_SZLONG == 64) */ 113#endif /* (_MIPS_SZLONG == 64) */
105 114
106#endif /* _ASM_DIV64_H */ 115#endif /* _ASM_DIV64_H */
diff --git a/include/asm-mips/socket.h b/include/asm-mips/socket.h
index 36ebe4e186a7..95945689b1c6 100644
--- a/include/asm-mips/socket.h
+++ b/include/asm-mips/socket.h
@@ -70,6 +70,8 @@ To add: #define SO_REUSEPORT 0x0200 /* Allow local address and port reuse. */
70#define SO_SNDBUFFORCE 31 70#define SO_SNDBUFFORCE 31
71#define SO_RCVBUFFORCE 33 71#define SO_RCVBUFFORCE 33
72#define SO_PASSSEC 34 72#define SO_PASSSEC 34
73#define SO_TIMESTAMPNS 35
74#define SCM_TIMESTAMPNS SO_TIMESTAMPNS
73 75
74#ifdef __KERNEL__ 76#ifdef __KERNEL__
75 77
diff --git a/include/asm-mips/sockios.h b/include/asm-mips/sockios.h
index 87a50bf039ed..ed1a5f78d22f 100644
--- a/include/asm-mips/sockios.h
+++ b/include/asm-mips/sockios.h
@@ -20,6 +20,7 @@
20#define SIOCSPGRP _IOW('s', 8, pid_t) 20#define SIOCSPGRP _IOW('s', 8, pid_t)
21#define SIOCGPGRP _IOR('s', 9, pid_t) 21#define SIOCGPGRP _IOR('s', 9, pid_t)
22 22
23#define SIOCGSTAMP 0x8906 /* Get stamp - linux-specific */ 23#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */
24#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */
24 25
25#endif /* _ASM_SOCKIOS_H */ 26#endif /* _ASM_SOCKIOS_H */
diff --git a/include/asm-parisc/socket.h b/include/asm-parisc/socket.h
index ce2eae1708b5..99e868f6a8f5 100644
--- a/include/asm-parisc/socket.h
+++ b/include/asm-parisc/socket.h
@@ -33,6 +33,8 @@
33#define SO_PEERCRED 0x4011 33#define SO_PEERCRED 0x4011
34#define SO_TIMESTAMP 0x4012 34#define SO_TIMESTAMP 0x4012
35#define SCM_TIMESTAMP SO_TIMESTAMP 35#define SCM_TIMESTAMP SO_TIMESTAMP
36#define SO_TIMESTAMPNS 0x4013
37#define SCM_TIMESTAMPNS SO_TIMESTAMPNS
36 38
37/* Security levels - as per NRL IPv6 - don't actually do anything */ 39/* Security levels - as per NRL IPv6 - don't actually do anything */
38#define SO_SECURITY_AUTHENTICATION 0x4016 40#define SO_SECURITY_AUTHENTICATION 0x4016
diff --git a/include/asm-parisc/sockios.h b/include/asm-parisc/sockios.h
index aace49629949..dabfbc7483f6 100644
--- a/include/asm-parisc/sockios.h
+++ b/include/asm-parisc/sockios.h
@@ -7,6 +7,7 @@
7#define FIOGETOWN 0x8903 7#define FIOGETOWN 0x8903
8#define SIOCGPGRP 0x8904 8#define SIOCGPGRP 0x8904
9#define SIOCATMARK 0x8905 9#define SIOCATMARK 0x8905
10#define SIOCGSTAMP 0x8906 /* Get stamp */ 10#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */
11#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */
11 12
12#endif 13#endif
diff --git a/include/asm-powerpc/socket.h b/include/asm-powerpc/socket.h
index c8b1da50e72d..403e9fde2eb5 100644
--- a/include/asm-powerpc/socket.h
+++ b/include/asm-powerpc/socket.h
@@ -56,5 +56,7 @@
56 56
57#define SO_PEERSEC 31 57#define SO_PEERSEC 31
58#define SO_PASSSEC 34 58#define SO_PASSSEC 34
59#define SO_TIMESTAMPNS 35
60#define SCM_TIMESTAMPNS SO_TIMESTAMPNS
59 61
60#endif /* _ASM_POWERPC_SOCKET_H */ 62#endif /* _ASM_POWERPC_SOCKET_H */
diff --git a/include/asm-powerpc/sockios.h b/include/asm-powerpc/sockios.h
index 590078d8ed28..55cef7675a31 100644
--- a/include/asm-powerpc/sockios.h
+++ b/include/asm-powerpc/sockios.h
@@ -14,6 +14,7 @@
14#define FIOGETOWN 0x8903 14#define FIOGETOWN 0x8903
15#define SIOCGPGRP 0x8904 15#define SIOCGPGRP 0x8904
16#define SIOCATMARK 0x8905 16#define SIOCATMARK 0x8905
17#define SIOCGSTAMP 0x8906 /* Get stamp */ 17#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */
18#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */
18 19
19#endif /* _ASM_POWERPC_SOCKIOS_H */ 20#endif /* _ASM_POWERPC_SOCKIOS_H */
diff --git a/include/asm-s390/socket.h b/include/asm-s390/socket.h
index 1778a49a74c5..1161ebe3dec9 100644
--- a/include/asm-s390/socket.h
+++ b/include/asm-s390/socket.h
@@ -57,5 +57,7 @@
57 57
58#define SO_PEERSEC 31 58#define SO_PEERSEC 31
59#define SO_PASSSEC 34 59#define SO_PASSSEC 34
60#define SO_TIMESTAMPNS 35
61#define SCM_TIMESTAMPNS SO_TIMESTAMPNS
60 62
61#endif /* _ASM_SOCKET_H */ 63#endif /* _ASM_SOCKET_H */
diff --git a/include/asm-s390/sockios.h b/include/asm-s390/sockios.h
index 412aeb4dd6ce..f4fc16c7da59 100644
--- a/include/asm-s390/sockios.h
+++ b/include/asm-s390/sockios.h
@@ -15,6 +15,7 @@
15#define FIOGETOWN 0x8903 15#define FIOGETOWN 0x8903
16#define SIOCGPGRP 0x8904 16#define SIOCGPGRP 0x8904
17#define SIOCATMARK 0x8905 17#define SIOCATMARK 0x8905
18#define SIOCGSTAMP 0x8906 /* Get stamp */ 18#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */
19#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */
19 20
20#endif 21#endif
diff --git a/include/asm-sh/socket.h b/include/asm-sh/socket.h
index ca70362eb563..c48d6fc9da38 100644
--- a/include/asm-sh/socket.h
+++ b/include/asm-sh/socket.h
@@ -49,5 +49,7 @@
49 49
50#define SO_PEERSEC 31 50#define SO_PEERSEC 31
51#define SO_PASSSEC 34 51#define SO_PASSSEC 34
52#define SO_TIMESTAMPNS 35
53#define SCM_TIMESTAMPNS SO_TIMESTAMPNS
52 54
53#endif /* __ASM_SH_SOCKET_H */ 55#endif /* __ASM_SH_SOCKET_H */
diff --git a/include/asm-sh/sockios.h b/include/asm-sh/sockios.h
index 08a71df8a8be..cf8b96b1f9ab 100644
--- a/include/asm-sh/sockios.h
+++ b/include/asm-sh/sockios.h
@@ -9,5 +9,6 @@
9#define SIOCSPGRP _IOW('s', 8, pid_t) 9#define SIOCSPGRP _IOW('s', 8, pid_t)
10#define SIOCGPGRP _IOR('s', 9, pid_t) 10#define SIOCGPGRP _IOR('s', 9, pid_t)
11 11
12#define SIOCGSTAMP _IOR('s', 100, struct timeval) /* Get stamp - linux-specific */ 12#define SIOCGSTAMP _IOR('s', 100, struct timeval) /* Get stamp (timeval) */
13#define SIOCGSTAMPNS _IOR('s', 101, struct timespec) /* Get stamp (timespec) */
13#endif /* __ASM_SH_SOCKIOS_H */ 14#endif /* __ASM_SH_SOCKIOS_H */
diff --git a/include/asm-sh64/sockios.h b/include/asm-sh64/sockios.h
index 1ae23ae82977..419e76f12f41 100644
--- a/include/asm-sh64/sockios.h
+++ b/include/asm-sh64/sockios.h
@@ -20,5 +20,6 @@
20#define SIOCSPGRP _IOW('s', 8, pid_t) 20#define SIOCSPGRP _IOW('s', 8, pid_t)
21#define SIOCGPGRP _IOR('s', 9, pid_t) 21#define SIOCGPGRP _IOR('s', 9, pid_t)
22 22
23#define SIOCGSTAMP _IOR('s', 100, struct timeval) /* Get stamp - linux-specific */ 23#define SIOCGSTAMP _IOR('s', 100, struct timeval) /* Get stamp (timeval) */
24#define SIOCGSTAMPNS _IOR('s', 101, struct timespec) /* Get stamp (timespec) */
24#endif /* __ASM_SH64_SOCKIOS_H */ 25#endif /* __ASM_SH64_SOCKIOS_H */
diff --git a/include/asm-sparc/socket.h b/include/asm-sparc/socket.h
index f6c4e5baf3f7..7c1423997cf0 100644
--- a/include/asm-sparc/socket.h
+++ b/include/asm-sparc/socket.h
@@ -49,6 +49,8 @@
49 49
50#define SO_PEERSEC 0x001e 50#define SO_PEERSEC 0x001e
51#define SO_PASSSEC 0x001f 51#define SO_PASSSEC 0x001f
52#define SO_TIMESTAMPNS 0x0021
53#define SCM_TIMESTAMPNS SO_TIMESTAMPNS
52 54
53/* Security levels - as per NRL IPv6 - don't actually do anything */ 55/* Security levels - as per NRL IPv6 - don't actually do anything */
54#define SO_SECURITY_AUTHENTICATION 0x5001 56#define SO_SECURITY_AUTHENTICATION 0x5001
diff --git a/include/asm-sparc/sockios.h b/include/asm-sparc/sockios.h
index 0c01b597b06f..990ea746486b 100644
--- a/include/asm-sparc/sockios.h
+++ b/include/asm-sparc/sockios.h
@@ -7,7 +7,8 @@
7#define FIOGETOWN 0x8903 7#define FIOGETOWN 0x8903
8#define SIOCGPGRP 0x8904 8#define SIOCGPGRP 0x8904
9#define SIOCATMARK 0x8905 9#define SIOCATMARK 0x8905
10#define SIOCGSTAMP 0x8906 /* Get stamp */ 10#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */
11#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */
11 12
12#endif /* !(_ASM_SPARC_SOCKIOS_H) */ 13#endif /* !(_ASM_SPARC_SOCKIOS_H) */
13 14
diff --git a/include/asm-sparc64/socket.h b/include/asm-sparc64/socket.h
index 754d46a50af3..986441dcb8f0 100644
--- a/include/asm-sparc64/socket.h
+++ b/include/asm-sparc64/socket.h
@@ -49,6 +49,8 @@
49 49
50#define SO_PEERSEC 0x001e 50#define SO_PEERSEC 0x001e
51#define SO_PASSSEC 0x001f 51#define SO_PASSSEC 0x001f
52#define SO_TIMESTAMPNS 0x0021
53#define SCM_TIMESTAMPNS SO_TIMESTAMPNS
52 54
53/* Security levels - as per NRL IPv6 - don't actually do anything */ 55/* Security levels - as per NRL IPv6 - don't actually do anything */
54#define SO_SECURITY_AUTHENTICATION 0x5001 56#define SO_SECURITY_AUTHENTICATION 0x5001
diff --git a/include/asm-sparc64/sockios.h b/include/asm-sparc64/sockios.h
index 6735bab4f39d..c7d9900638d0 100644
--- a/include/asm-sparc64/sockios.h
+++ b/include/asm-sparc64/sockios.h
@@ -7,7 +7,8 @@
7#define FIOGETOWN 0x8903 7#define FIOGETOWN 0x8903
8#define SIOCGPGRP 0x8904 8#define SIOCGPGRP 0x8904
9#define SIOCATMARK 0x8905 9#define SIOCATMARK 0x8905
10#define SIOCGSTAMP 0x8906 /* Get stamp */ 10#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */
11#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */
11 12
12#endif /* !(_ASM_SPARC64_SOCKIOS_H) */ 13#endif /* !(_ASM_SPARC64_SOCKIOS_H) */
13 14
diff --git a/include/asm-um/div64.h b/include/asm-um/div64.h
index 1e17f7409cab..7b73b2cd5b34 100644
--- a/include/asm-um/div64.h
+++ b/include/asm-um/div64.h
@@ -3,4 +3,5 @@
3 3
4#include "asm/arch/div64.h" 4#include "asm/arch/div64.h"
5 5
6extern uint64_t div64_64(uint64_t dividend, uint64_t divisor);
6#endif 7#endif
diff --git a/include/asm-v850/socket.h b/include/asm-v850/socket.h
index 0dfe55ac2ef2..a4c2493b025f 100644
--- a/include/asm-v850/socket.h
+++ b/include/asm-v850/socket.h
@@ -49,5 +49,7 @@
49 49
50#define SO_PEERSEC 31 50#define SO_PEERSEC 31
51#define SO_PASSSEC 34 51#define SO_PASSSEC 34
52#define SO_TIMESTAMPNS 35
53#define SCM_TIMESTAMPNS SO_TIMESTAMPNS
52 54
53#endif /* __V850_SOCKET_H__ */ 55#endif /* __V850_SOCKET_H__ */
diff --git a/include/asm-v850/sockios.h b/include/asm-v850/sockios.h
index cf4874c2fd8a..823e106e6cd0 100644
--- a/include/asm-v850/sockios.h
+++ b/include/asm-v850/sockios.h
@@ -7,6 +7,7 @@
7#define FIOGETOWN 0x8903 7#define FIOGETOWN 0x8903
8#define SIOCGPGRP 0x8904 8#define SIOCGPGRP 0x8904
9#define SIOCATMARK 0x8905 9#define SIOCATMARK 0x8905
10#define SIOCGSTAMP 0x8906 /* Get stamp */ 10#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */
11#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */
11 12
12#endif /* __V850_SOCKIOS_H__ */ 13#endif /* __V850_SOCKIOS_H__ */
diff --git a/include/asm-x86_64/socket.h b/include/asm-x86_64/socket.h
index b46702607933..90af60cf3c0e 100644
--- a/include/asm-x86_64/socket.h
+++ b/include/asm-x86_64/socket.h
@@ -49,5 +49,7 @@
49 49
50#define SO_PEERSEC 31 50#define SO_PEERSEC 31
51#define SO_PASSSEC 34 51#define SO_PASSSEC 34
52#define SO_TIMESTAMPNS 35
53#define SCM_TIMESTAMPNS SO_TIMESTAMPNS
52 54
53#endif /* _ASM_SOCKET_H */ 55#endif /* _ASM_SOCKET_H */
diff --git a/include/asm-x86_64/sockios.h b/include/asm-x86_64/sockios.h
index 2eefd10d4f48..d726ba2513e3 100644
--- a/include/asm-x86_64/sockios.h
+++ b/include/asm-x86_64/sockios.h
@@ -7,6 +7,7 @@
7#define FIOGETOWN 0x8903 7#define FIOGETOWN 0x8903
8#define SIOCGPGRP 0x8904 8#define SIOCGPGRP 0x8904
9#define SIOCATMARK 0x8905 9#define SIOCATMARK 0x8905
10#define SIOCGSTAMP 0x8906 /* Get stamp */ 10#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */
11#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */
11 12
12#endif 13#endif
diff --git a/include/asm-xtensa/div64.h b/include/asm-xtensa/div64.h
index c4a105776383..20965e3af1dd 100644
--- a/include/asm-xtensa/div64.h
+++ b/include/asm-xtensa/div64.h
@@ -11,9 +11,15 @@
11#ifndef _XTENSA_DIV64_H 11#ifndef _XTENSA_DIV64_H
12#define _XTENSA_DIV64_H 12#define _XTENSA_DIV64_H
13 13
14#include <linux/types.h>
15
14#define do_div(n,base) ({ \ 16#define do_div(n,base) ({ \
15 int __res = n % ((unsigned int) base); \ 17 int __res = n % ((unsigned int) base); \
16 n /= (unsigned int) base; \ 18 n /= (unsigned int) base; \
17 __res; }) 19 __res; })
18 20
21static inline uint64_t div64_64(uint64_t dividend, uint64_t divisor)
22{
23 return dividend / divisor;
24}
19#endif 25#endif
diff --git a/include/asm-xtensa/socket.h b/include/asm-xtensa/socket.h
index 971d231be60e..1f5aeacb9da2 100644
--- a/include/asm-xtensa/socket.h
+++ b/include/asm-xtensa/socket.h
@@ -60,5 +60,7 @@
60#define SO_ACCEPTCONN 30 60#define SO_ACCEPTCONN 30
61#define SO_PEERSEC 31 61#define SO_PEERSEC 31
62#define SO_PASSSEC 34 62#define SO_PASSSEC 34
63#define SO_TIMESTAMPNS 35
64#define SCM_TIMESTAMPNS SO_TIMESTAMPNS
63 65
64#endif /* _XTENSA_SOCKET_H */ 66#endif /* _XTENSA_SOCKET_H */
diff --git a/include/asm-xtensa/sockios.h b/include/asm-xtensa/sockios.h
index 20d2ba10ecd1..efe0af379f01 100644
--- a/include/asm-xtensa/sockios.h
+++ b/include/asm-xtensa/sockios.h
@@ -25,6 +25,7 @@
25#define SIOCSPGRP _IOW('s', 8, pid_t) 25#define SIOCSPGRP _IOW('s', 8, pid_t)
26#define SIOCGPGRP _IOR('s', 9, pid_t) 26#define SIOCGPGRP _IOR('s', 9, pid_t)
27 27
28#define SIOCGSTAMP 0x8906 /* Get stamp - linux-specific */ 28#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */
29#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */
29 30
30#endif /* _XTENSA_SOCKIOS_H */ 31#endif /* _XTENSA_SOCKIOS_H */
diff --git a/include/keys/rxrpc-type.h b/include/keys/rxrpc-type.h
new file mode 100644
index 000000000000..e2ee73aef0ee
--- /dev/null
+++ b/include/keys/rxrpc-type.h
@@ -0,0 +1,22 @@
1/* RxRPC key type
2 *
3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#ifndef _KEYS_RXRPC_TYPE_H
13#define _KEYS_RXRPC_TYPE_H
14
15#include <linux/key.h>
16
17/*
18 * key type for AF_RXRPC keys
19 */
20extern struct key_type key_type_rxrpc;
21
22#endif /* _KEYS_USER_TYPE_H */
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index e81e301a4d71..4ff0f57d0add 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -69,9 +69,7 @@ header-y += hdsmart.h
69header-y += hysdn_if.h 69header-y += hysdn_if.h
70header-y += i2c-dev.h 70header-y += i2c-dev.h
71header-y += i8k.h 71header-y += i8k.h
72header-y += icmp.h
73header-y += if_arcnet.h 72header-y += if_arcnet.h
74header-y += if_arp.h
75header-y += if_bonding.h 73header-y += if_bonding.h
76header-y += if_cablemodem.h 74header-y += if_cablemodem.h
77header-y += if_fc.h 75header-y += if_fc.h
@@ -88,7 +86,6 @@ header-y += if_tunnel.h
88header-y += in6.h 86header-y += in6.h
89header-y += in_route.h 87header-y += in_route.h
90header-y += ioctl.h 88header-y += ioctl.h
91header-y += ip.h
92header-y += ipmi_msgdefs.h 89header-y += ipmi_msgdefs.h
93header-y += ip_mp_alg.h 90header-y += ip_mp_alg.h
94header-y += ipsec.h 91header-y += ipsec.h
@@ -116,6 +113,7 @@ header-y += netrom.h
116header-y += nfs2.h 113header-y += nfs2.h
117header-y += nfs4_mount.h 114header-y += nfs4_mount.h
118header-y += nfs_mount.h 115header-y += nfs_mount.h
116header-y += nl80211.h
119header-y += oom.h 117header-y += oom.h
120header-y += param.h 118header-y += param.h
121header-y += pci_regs.h 119header-y += pci_regs.h
@@ -210,8 +208,10 @@ unifdef-y += hiddev.h
210unifdef-y += hpet.h 208unifdef-y += hpet.h
211unifdef-y += i2c.h 209unifdef-y += i2c.h
212unifdef-y += i2o-dev.h 210unifdef-y += i2o-dev.h
211unifdef-y += icmp.h
213unifdef-y += icmpv6.h 212unifdef-y += icmpv6.h
214unifdef-y += if_addr.h 213unifdef-y += if_addr.h
214unifdef-y += if_arp.h
215unifdef-y += if_bridge.h 215unifdef-y += if_bridge.h
216unifdef-y += if_ec.h 216unifdef-y += if_ec.h
217unifdef-y += if_eql.h 217unifdef-y += if_eql.h
@@ -231,6 +231,7 @@ unifdef-y += inet_diag.h
231unifdef-y += in.h 231unifdef-y += in.h
232unifdef-y += inotify.h 232unifdef-y += inotify.h
233unifdef-y += input.h 233unifdef-y += input.h
234unifdef-y += ip.h
234unifdef-y += ipc.h 235unifdef-y += ipc.h
235unifdef-y += ipmi.h 236unifdef-y += ipmi.h
236unifdef-y += ipv6.h 237unifdef-y += ipv6.h
diff --git a/include/linux/atalk.h b/include/linux/atalk.h
index d12984ddaa9f..ced8a1ed080c 100644
--- a/include/linux/atalk.h
+++ b/include/linux/atalk.h
@@ -101,7 +101,7 @@ struct ddpehdr {
101 101
102static __inline__ struct ddpehdr *ddp_hdr(struct sk_buff *skb) 102static __inline__ struct ddpehdr *ddp_hdr(struct sk_buff *skb)
103{ 103{
104 return (struct ddpehdr *)skb->h.raw; 104 return (struct ddpehdr *)skb_transport_header(skb);
105} 105}
106 106
107/* AppleTalk AARP headers */ 107/* AppleTalk AARP headers */
@@ -129,7 +129,7 @@ struct elapaarp {
129 129
130static __inline__ struct elapaarp *aarp_hdr(struct sk_buff *skb) 130static __inline__ struct elapaarp *aarp_hdr(struct sk_buff *skb)
131{ 131{
132 return (struct elapaarp *)skb->h.raw; 132 return (struct elapaarp *)skb_transport_header(skb);
133} 133}
134 134
135/* Not specified - how long till we drop a resolved entry */ 135/* Not specified - how long till we drop a resolved entry */
diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 1cb054bd93f2..fda2148d8c85 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -260,19 +260,20 @@ enum {
260 260
261static inline struct dccp_hdr *dccp_hdr(const struct sk_buff *skb) 261static inline struct dccp_hdr *dccp_hdr(const struct sk_buff *skb)
262{ 262{
263 return (struct dccp_hdr *)skb->h.raw; 263 return (struct dccp_hdr *)skb_transport_header(skb);
264} 264}
265 265
266static inline struct dccp_hdr *dccp_zeroed_hdr(struct sk_buff *skb, int headlen) 266static inline struct dccp_hdr *dccp_zeroed_hdr(struct sk_buff *skb, int headlen)
267{ 267{
268 skb->h.raw = skb_push(skb, headlen); 268 skb_push(skb, headlen);
269 memset(skb->h.raw, 0, headlen); 269 skb_reset_transport_header(skb);
270 return dccp_hdr(skb); 270 return memset(skb_transport_header(skb), 0, headlen);
271} 271}
272 272
273static inline struct dccp_hdr_ext *dccp_hdrx(const struct sk_buff *skb) 273static inline struct dccp_hdr_ext *dccp_hdrx(const struct sk_buff *skb)
274{ 274{
275 return (struct dccp_hdr_ext *)(skb->h.raw + sizeof(struct dccp_hdr)); 275 return (struct dccp_hdr_ext *)(skb_transport_header(skb) +
276 sizeof(struct dccp_hdr));
276} 277}
277 278
278static inline unsigned int __dccp_basic_hdr_len(const struct dccp_hdr *dh) 279static inline unsigned int __dccp_basic_hdr_len(const struct dccp_hdr *dh)
@@ -301,12 +302,14 @@ static inline __u64 dccp_hdr_seq(const struct sk_buff *skb)
301 302
302static inline struct dccp_hdr_request *dccp_hdr_request(struct sk_buff *skb) 303static inline struct dccp_hdr_request *dccp_hdr_request(struct sk_buff *skb)
303{ 304{
304 return (struct dccp_hdr_request *)(skb->h.raw + dccp_basic_hdr_len(skb)); 305 return (struct dccp_hdr_request *)(skb_transport_header(skb) +
306 dccp_basic_hdr_len(skb));
305} 307}
306 308
307static inline struct dccp_hdr_ack_bits *dccp_hdr_ack_bits(const struct sk_buff *skb) 309static inline struct dccp_hdr_ack_bits *dccp_hdr_ack_bits(const struct sk_buff *skb)
308{ 310{
309 return (struct dccp_hdr_ack_bits *)(skb->h.raw + dccp_basic_hdr_len(skb)); 311 return (struct dccp_hdr_ack_bits *)(skb_transport_header(skb) +
312 dccp_basic_hdr_len(skb));
310} 313}
311 314
312static inline u64 dccp_hdr_ack_seq(const struct sk_buff *skb) 315static inline u64 dccp_hdr_ack_seq(const struct sk_buff *skb)
@@ -317,12 +320,14 @@ static inline u64 dccp_hdr_ack_seq(const struct sk_buff *skb)
317 320
318static inline struct dccp_hdr_response *dccp_hdr_response(struct sk_buff *skb) 321static inline struct dccp_hdr_response *dccp_hdr_response(struct sk_buff *skb)
319{ 322{
320 return (struct dccp_hdr_response *)(skb->h.raw + dccp_basic_hdr_len(skb)); 323 return (struct dccp_hdr_response *)(skb_transport_header(skb) +
324 dccp_basic_hdr_len(skb));
321} 325}
322 326
323static inline struct dccp_hdr_reset *dccp_hdr_reset(struct sk_buff *skb) 327static inline struct dccp_hdr_reset *dccp_hdr_reset(struct sk_buff *skb)
324{ 328{
325 return (struct dccp_hdr_reset *)(skb->h.raw + dccp_basic_hdr_len(skb)); 329 return (struct dccp_hdr_reset *)(skb_transport_header(skb) +
330 dccp_basic_hdr_len(skb));
326} 331}
327 332
328static inline unsigned int __dccp_hdr_len(const struct dccp_hdr *dh) 333static inline unsigned int __dccp_hdr_len(const struct dccp_hdr *dh)
@@ -460,26 +465,27 @@ struct dccp_ackvec;
460 * @dccps_service_list - second .. last service code on passive socket 465 * @dccps_service_list - second .. last service code on passive socket
461 * @dccps_timestamp_time - time of latest TIMESTAMP option 466 * @dccps_timestamp_time - time of latest TIMESTAMP option
462 * @dccps_timestamp_echo - latest timestamp received on a TIMESTAMP option 467 * @dccps_timestamp_echo - latest timestamp received on a TIMESTAMP option
463 * @dccps_l_ack_ratio - 468 * @dccps_l_ack_ratio - feature-local Ack Ratio
464 * @dccps_r_ack_ratio - 469 * @dccps_r_ack_ratio - feature-remote Ack Ratio
465 * @dccps_pcslen - sender partial checksum coverage (via sockopt) 470 * @dccps_pcslen - sender partial checksum coverage (via sockopt)
466 * @dccps_pcrlen - receiver partial checksum coverage (via sockopt) 471 * @dccps_pcrlen - receiver partial checksum coverage (via sockopt)
467 * @dccps_ndp_count - number of Non Data Packets since last data packet 472 * @dccps_ndp_count - number of Non Data Packets since last data packet
468 * @dccps_mss_cache - 473 * @dccps_mss_cache - current value of MSS (path MTU minus header sizes)
469 * @dccps_minisock - 474 * @dccps_minisock - associated minisock (accessed via dccp_msk)
470 * @dccps_hc_rx_ackvec - rx half connection ack vector 475 * @dccps_hc_rx_ackvec - rx half connection ack vector
471 * @dccps_hc_rx_ccid - 476 * @dccps_hc_rx_ccid - CCID used for the receiver (or receiving half-connection)
472 * @dccps_hc_tx_ccid - 477 * @dccps_hc_tx_ccid - CCID used for the sender (or sending half-connection)
473 * @dccps_options_received - 478 * @dccps_options_received - parsed set of retrieved options
474 * @dccps_epoch - 479 * @dccps_role - role of this sock, one of %dccp_role
475 * @dccps_role - Role of this sock, one of %dccp_role 480 * @dccps_hc_rx_insert_options - receiver wants to add options when acking
476 * @dccps_hc_rx_insert_options - 481 * @dccps_hc_tx_insert_options - sender wants to add options when sending
477 * @dccps_hc_tx_insert_options -
478 * @dccps_xmit_timer - timer for when CCID is not ready to send 482 * @dccps_xmit_timer - timer for when CCID is not ready to send
483 * @dccps_syn_rtt - RTT sample from Request/Response exchange (in usecs)
479 */ 484 */
480struct dccp_sock { 485struct dccp_sock {
481 /* inet_connection_sock has to be the first member of dccp_sock */ 486 /* inet_connection_sock has to be the first member of dccp_sock */
482 struct inet_connection_sock dccps_inet_connection; 487 struct inet_connection_sock dccps_inet_connection;
488#define dccps_syn_rtt dccps_inet_connection.icsk_ack.lrcvtime
483 __u64 dccps_swl; 489 __u64 dccps_swl;
484 __u64 dccps_swh; 490 __u64 dccps_swh;
485 __u64 dccps_awl; 491 __u64 dccps_awl;
diff --git a/include/linux/fib_rules.h b/include/linux/fib_rules.h
index 8270aac2aa5d..87b606b63f1e 100644
--- a/include/linux/fib_rules.h
+++ b/include/linux/fib_rules.h
@@ -5,8 +5,13 @@
5#include <linux/rtnetlink.h> 5#include <linux/rtnetlink.h>
6 6
7/* rule is permanent, and cannot be deleted */ 7/* rule is permanent, and cannot be deleted */
8#define FIB_RULE_PERMANENT 1 8#define FIB_RULE_PERMANENT 0x00000001
9#define FIB_RULE_INVERT 2 9#define FIB_RULE_INVERT 0x00000002
10#define FIB_RULE_UNRESOLVED 0x00000004
11#define FIB_RULE_DEV_DETACHED 0x00000008
12
13/* try to find source address in routing lookups */
14#define FIB_RULE_FIND_SADDR 0x00010000
10 15
11struct fib_rule_hdr 16struct fib_rule_hdr
12{ 17{
@@ -29,7 +34,7 @@ enum
29 FRA_DST, /* destination address */ 34 FRA_DST, /* destination address */
30 FRA_SRC, /* source address */ 35 FRA_SRC, /* source address */
31 FRA_IFNAME, /* interface name */ 36 FRA_IFNAME, /* interface name */
32 FRA_UNUSED1, 37 FRA_GOTO, /* target to jump to (FR_ACT_GOTO) */
33 FRA_UNUSED2, 38 FRA_UNUSED2,
34 FRA_PRIORITY, /* priority/preference */ 39 FRA_PRIORITY, /* priority/preference */
35 FRA_UNUSED3, 40 FRA_UNUSED3,
@@ -51,8 +56,8 @@ enum
51{ 56{
52 FR_ACT_UNSPEC, 57 FR_ACT_UNSPEC,
53 FR_ACT_TO_TBL, /* Pass to fixed table */ 58 FR_ACT_TO_TBL, /* Pass to fixed table */
54 FR_ACT_RES1, 59 FR_ACT_GOTO, /* Jump to another rule */
55 FR_ACT_RES2, 60 FR_ACT_NOP, /* No operation */
56 FR_ACT_RES3, 61 FR_ACT_RES3,
57 FR_ACT_RES4, 62 FR_ACT_RES4,
58 FR_ACT_BLACKHOLE, /* Drop without notification */ 63 FR_ACT_BLACKHOLE, /* Drop without notification */
diff --git a/include/linux/hdlc.h b/include/linux/hdlc.h
index d4b333938f73..0fe562af9c8c 100644
--- a/include/linux/hdlc.h
+++ b/include/linux/hdlc.h
@@ -132,8 +132,8 @@ static __inline__ __be16 hdlc_type_trans(struct sk_buff *skb,
132{ 132{
133 hdlc_device *hdlc = dev_to_hdlc(dev); 133 hdlc_device *hdlc = dev_to_hdlc(dev);
134 134
135 skb->mac.raw = skb->data; 135 skb->dev = dev;
136 skb->dev = dev; 136 skb_reset_mac_header(skb);
137 137
138 if (hdlc->proto->type_trans) 138 if (hdlc->proto->type_trans)
139 return hdlc->proto->type_trans(skb, dev); 139 return hdlc->proto->type_trans(skb, dev);
diff --git a/include/linux/icmp.h b/include/linux/icmp.h
index 24da4fbc1a2f..474f2a51cf0a 100644
--- a/include/linux/icmp.h
+++ b/include/linux/icmp.h
@@ -82,6 +82,15 @@ struct icmphdr {
82 } un; 82 } un;
83}; 83};
84 84
85#ifdef __KERNEL__
86#include <linux/skbuff.h>
87
88static inline struct icmphdr *icmp_hdr(const struct sk_buff *skb)
89{
90 return (struct icmphdr *)skb_transport_header(skb);
91}
92#endif
93
85/* 94/*
86 * constants for (set|get)sockopt 95 * constants for (set|get)sockopt
87 */ 96 */
diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h
index 68d3526c3a05..7c5e9817e998 100644
--- a/include/linux/icmpv6.h
+++ b/include/linux/icmpv6.h
@@ -75,6 +75,15 @@ struct icmp6hdr {
75#define icmp6_router_pref icmp6_dataun.u_nd_ra.router_pref 75#define icmp6_router_pref icmp6_dataun.u_nd_ra.router_pref
76}; 76};
77 77
78#ifdef __KERNEL__
79#include <linux/skbuff.h>
80
81static inline struct icmp6hdr *icmp6_hdr(const struct sk_buff *skb)
82{
83 return (struct icmp6hdr *)skb_transport_header(skb);
84}
85#endif
86
78#define ICMPV6_ROUTER_PREF_LOW 0x3 87#define ICMPV6_ROUTER_PREF_LOW 0x3
79#define ICMPV6_ROUTER_PREF_MEDIUM 0x0 88#define ICMPV6_ROUTER_PREF_MEDIUM 0x0
80#define ICMPV6_ROUTER_PREF_HIGH 0x1 89#define ICMPV6_ROUTER_PREF_HIGH 0x1
diff --git a/include/linux/if_addr.h b/include/linux/if_addr.h
index d557e4ce9b6b..43f3bedaafd3 100644
--- a/include/linux/if_addr.h
+++ b/include/linux/if_addr.h
@@ -39,6 +39,7 @@ enum
39#define IFA_F_TEMPORARY IFA_F_SECONDARY 39#define IFA_F_TEMPORARY IFA_F_SECONDARY
40 40
41#define IFA_F_NODAD 0x02 41#define IFA_F_NODAD 0x02
42#define IFA_F_OPTIMISTIC 0x04
42#define IFA_F_HOMEADDRESS 0x10 43#define IFA_F_HOMEADDRESS 0x10
43#define IFA_F_DEPRECATED 0x20 44#define IFA_F_DEPRECATED 0x20
44#define IFA_F_TENTATIVE 0x40 45#define IFA_F_TENTATIVE 0x40
diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h
index 7f5714214ee3..ed7b93c3083a 100644
--- a/include/linux/if_arp.h
+++ b/include/linux/if_arp.h
@@ -148,4 +148,13 @@ struct arphdr
148 148
149}; 149};
150 150
151#ifdef __KERNEL__
152#include <linux/skbuff.h>
153
154static inline struct arphdr *arp_hdr(const struct sk_buff *skb)
155{
156 return (struct arphdr *)skb_network_header(skb);
157}
158#endif
159
151#endif /* _LINUX_IF_ARP_H */ 160#endif /* _LINUX_IF_ARP_H */
diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index fd1b6eb94a5f..4ff211d98769 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -105,7 +105,8 @@ struct __fdb_entry
105#include <linux/netdevice.h> 105#include <linux/netdevice.h>
106 106
107extern void brioctl_set(int (*ioctl_hook)(unsigned int, void __user *)); 107extern void brioctl_set(int (*ioctl_hook)(unsigned int, void __user *));
108extern int (*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff **pskb); 108extern struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
109 struct sk_buff *skb);
109extern int (*br_should_route_hook)(struct sk_buff **pskb); 110extern int (*br_should_route_hook)(struct sk_buff **pskb);
110 111
111#endif 112#endif
diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h
index ab08f35cbc35..1db774cf9dc2 100644
--- a/include/linux/if_ether.h
+++ b/include/linux/if_ether.h
@@ -61,6 +61,7 @@
61#define ETH_P_8021Q 0x8100 /* 802.1Q VLAN Extended Header */ 61#define ETH_P_8021Q 0x8100 /* 802.1Q VLAN Extended Header */
62#define ETH_P_IPX 0x8137 /* IPX over DIX */ 62#define ETH_P_IPX 0x8137 /* IPX over DIX */
63#define ETH_P_IPV6 0x86DD /* IPv6 over bluebook */ 63#define ETH_P_IPV6 0x86DD /* IPv6 over bluebook */
64#define ETH_P_PAUSE 0x8808 /* IEEE Pause frames. See 802.3 31B */
64#define ETH_P_SLOW 0x8809 /* Slow Protocol. See 802.3ad 43B */ 65#define ETH_P_SLOW 0x8809 /* Slow Protocol. See 802.3ad 43B */
65#define ETH_P_WCCP 0x883E /* Web-cache coordination protocol 66#define ETH_P_WCCP 0x883E /* Web-cache coordination protocol
66 * defined in draft-wilson-wrec-wccp-v2-00.txt */ 67 * defined in draft-wilson-wrec-wccp-v2-00.txt */
@@ -112,7 +113,7 @@ struct ethhdr {
112 113
113static inline struct ethhdr *eth_hdr(const struct sk_buff *skb) 114static inline struct ethhdr *eth_hdr(const struct sk_buff *skb)
114{ 115{
115 return (struct ethhdr *)skb->mac.raw; 116 return (struct ethhdr *)skb_mac_header(skb);
116} 117}
117 118
118#ifdef CONFIG_SYSCTL 119#ifdef CONFIG_SYSCTL
diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index 35ed3b5467f3..604c2434f71c 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -126,6 +126,7 @@ enum
126 IFLA_INET6_STATS, /* statistics */ 126 IFLA_INET6_STATS, /* statistics */
127 IFLA_INET6_MCAST, /* MC things. What of them? */ 127 IFLA_INET6_MCAST, /* MC things. What of them? */
128 IFLA_INET6_CACHEINFO, /* time values and max reasm size */ 128 IFLA_INET6_CACHEINFO, /* time values and max reasm size */
129 IFLA_INET6_ICMP6STATS, /* statistics (icmpv6) */
129 __IFLA_INET6_MAX 130 __IFLA_INET6_MAX
130}; 131};
131 132
diff --git a/include/linux/if_packet.h b/include/linux/if_packet.h
index f3de05c30678..ad09609227ff 100644
--- a/include/linux/if_packet.h
+++ b/include/linux/if_packet.h
@@ -42,6 +42,7 @@ struct sockaddr_ll
42#define PACKET_STATISTICS 6 42#define PACKET_STATISTICS 6
43#define PACKET_COPY_THRESH 7 43#define PACKET_COPY_THRESH 7
44#define PACKET_AUXDATA 8 44#define PACKET_AUXDATA 8
45#define PACKET_ORIGDEV 9
45 46
46struct tpacket_stats 47struct tpacket_stats
47{ 48{
diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h
index e33ee763c052..6f987be60fe2 100644
--- a/include/linux/if_pppox.h
+++ b/include/linux/if_pppox.h
@@ -111,7 +111,17 @@ struct pppoe_hdr {
111 struct pppoe_tag tag[0]; 111 struct pppoe_tag tag[0];
112} __attribute__ ((packed)); 112} __attribute__ ((packed));
113 113
114/* Length of entire PPPoE + PPP header */
115#define PPPOE_SES_HLEN 8
116
114#ifdef __KERNEL__ 117#ifdef __KERNEL__
118#include <linux/skbuff.h>
119
120static inline struct pppoe_hdr *pppoe_hdr(const struct sk_buff *skb)
121{
122 return (struct pppoe_hdr *)skb_network_header(skb);
123}
124
115struct pppoe_opt { 125struct pppoe_opt {
116 struct net_device *dev; /* device associated with socket*/ 126 struct net_device *dev; /* device associated with socket*/
117 int ifindex; /* ifindex of device associated with socket */ 127 int ifindex; /* ifindex of device associated with socket */
diff --git a/include/linux/if_tr.h b/include/linux/if_tr.h
index 2f94cf2c7abb..046e9d95ba9a 100644
--- a/include/linux/if_tr.h
+++ b/include/linux/if_tr.h
@@ -47,7 +47,7 @@ struct trh_hdr {
47 47
48static inline struct trh_hdr *tr_hdr(const struct sk_buff *skb) 48static inline struct trh_hdr *tr_hdr(const struct sk_buff *skb)
49{ 49{
50 return (struct trh_hdr *)skb->mac.raw; 50 return (struct trh_hdr *)skb_mac_header(skb);
51} 51}
52#ifdef CONFIG_SYSCTL 52#ifdef CONFIG_SYSCTL
53extern struct ctl_table tr_table[]; 53extern struct ctl_table tr_table[];
diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index d103580c72d2..81e9bc93569b 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -51,7 +51,7 @@ struct vlan_ethhdr {
51 51
52static inline struct vlan_ethhdr *vlan_eth_hdr(const struct sk_buff *skb) 52static inline struct vlan_ethhdr *vlan_eth_hdr(const struct sk_buff *skb)
53{ 53{
54 return (struct vlan_ethhdr *)skb->mac.raw; 54 return (struct vlan_ethhdr *)skb_mac_header(skb);
55} 55}
56 56
57struct vlan_hdr { 57struct vlan_hdr {
@@ -275,8 +275,8 @@ static inline struct sk_buff *__vlan_put_tag(struct sk_buff *skb, unsigned short
275 veth->h_vlan_TCI = htons(tag); 275 veth->h_vlan_TCI = htons(tag);
276 276
277 skb->protocol = __constant_htons(ETH_P_8021Q); 277 skb->protocol = __constant_htons(ETH_P_8021Q);
278 skb->mac.raw -= VLAN_HLEN; 278 skb->mac_header -= VLAN_HLEN;
279 skb->nh.raw -= VLAN_HLEN; 279 skb->network_header -= VLAN_HLEN;
280 280
281 return skb; 281 return skb;
282} 282}
diff --git a/include/linux/if_wanpipe_common.h b/include/linux/if_wanpipe_common.h
deleted file mode 100644
index 6e5461d69fdd..000000000000
--- a/include/linux/if_wanpipe_common.h
+++ /dev/null
@@ -1,58 +0,0 @@
1/*****************************************************************************
2* if_wanipe_common.h Sangoma Driver/Socket common area definitions.
3*
4* Author: Nenad Corbic <ncorbic@sangoma.com>
5*
6* Copyright: (c) 2000 Sangoma Technologies Inc.
7*
8* This program is free software; you can redistribute it and/or
9* modify it under the terms of the GNU General Public License
10* as published by the Free Software Foundation; either version
11* 2 of the License, or (at your option) any later version.
12* ============================================================================
13* Jan 13, 2000 Nenad Corbic Initial version
14*****************************************************************************/
15
16
17#ifndef _WANPIPE_SOCK_DRIVER_COMMON_H
18#define _WANPIPE_SOCK_DRIVER_COMMON_H
19
20typedef struct {
21 struct net_device *slave;
22 atomic_t packet_sent;
23 atomic_t receive_block;
24 atomic_t command;
25 atomic_t disconnect;
26 atomic_t driver_busy;
27 long common_critical;
28 struct timer_list *tx_timer;
29 struct sock *sk; /* Wanpipe Sock bind's here */
30 int (*func)(struct sk_buff *skb, struct net_device *dev,
31 struct sock *sk);
32
33 struct work_struct wanpipe_work; /* deferred keventd work */
34 unsigned char rw_bind; /* Sock bind state */
35 unsigned char usedby;
36 unsigned char state;
37 unsigned char svc;
38 unsigned short lcn;
39 void *mbox;
40} wanpipe_common_t;
41
42
43enum {
44 WANSOCK_UNCONFIGURED, /* link/channel is not configured */
45 WANSOCK_DISCONNECTED, /* link/channel is disconnected */
46 WANSOCK_CONNECTING, /* connection is in progress */
47 WANSOCK_CONNECTED, /* link/channel is operational */
48 WANSOCK_LIMIT, /* for verification only */
49 WANSOCK_DUALPORT, /* for Dual Port cards */
50 WANSOCK_DISCONNECTING,
51 WANSOCK_BINDED,
52 WANSOCK_BIND_LISTEN,
53 WANSOCK_LISTEN
54};
55
56#endif
57
58
diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index a113fe68d8a1..f510e7e382a8 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -80,6 +80,27 @@ struct igmpv3_query {
80 __be32 srcs[0]; 80 __be32 srcs[0];
81}; 81};
82 82
83#ifdef __KERNEL__
84#include <linux/skbuff.h>
85
86static inline struct igmphdr *igmp_hdr(const struct sk_buff *skb)
87{
88 return (struct igmphdr *)skb_transport_header(skb);
89}
90
91static inline struct igmpv3_report *
92 igmpv3_report_hdr(const struct sk_buff *skb)
93{
94 return (struct igmpv3_report *)skb_transport_header(skb);
95}
96
97static inline struct igmpv3_query *
98 igmpv3_query_hdr(const struct sk_buff *skb)
99{
100 return (struct igmpv3_query *)skb_transport_header(skb);
101}
102#endif
103
83#define IGMP_HOST_MEMBERSHIP_QUERY 0x11 /* From RFC1112 */ 104#define IGMP_HOST_MEMBERSHIP_QUERY 0x11 /* From RFC1112 */
84#define IGMP_HOST_MEMBERSHIP_REPORT 0x12 /* Ditto */ 105#define IGMP_HOST_MEMBERSHIP_REPORT 0x12 /* Ditto */
85#define IGMP_DVMRP 0x13 /* DVMRP routing */ 106#define IGMP_DVMRP 0x13 /* DVMRP routing */
diff --git a/include/linux/in.h b/include/linux/in.h
index 1912e7c0bc26..3975cbf52f20 100644
--- a/include/linux/in.h
+++ b/include/linux/in.h
@@ -83,6 +83,7 @@ struct in_addr {
83#define IP_PMTUDISC_DONT 0 /* Never send DF frames */ 83#define IP_PMTUDISC_DONT 0 /* Never send DF frames */
84#define IP_PMTUDISC_WANT 1 /* Use per route hints */ 84#define IP_PMTUDISC_WANT 1 /* Use per route hints */
85#define IP_PMTUDISC_DO 2 /* Always DF */ 85#define IP_PMTUDISC_DO 2 /* Always DF */
86#define IP_PMTUDISC_PROBE 3 /* Ignore dst pmtu */
86 87
87#define IP_MULTICAST_IF 32 88#define IP_MULTICAST_IF 32
88#define IP_MULTICAST_TTL 33 89#define IP_MULTICAST_TTL 33
diff --git a/include/linux/in6.h b/include/linux/in6.h
index 4e8350ae8869..2a61c82af115 100644
--- a/include/linux/in6.h
+++ b/include/linux/in6.h
@@ -44,10 +44,8 @@ struct in6_addr
44 * NOTE: Be aware the IN6ADDR_* constants and in6addr_* externals are defined 44 * NOTE: Be aware the IN6ADDR_* constants and in6addr_* externals are defined
45 * in network byte order, not in host byte order as are the IPv4 equivalents 45 * in network byte order, not in host byte order as are the IPv4 equivalents
46 */ 46 */
47#if 0
48extern const struct in6_addr in6addr_any; 47extern const struct in6_addr in6addr_any;
49#define IN6ADDR_ANY_INIT { { { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } } } 48#define IN6ADDR_ANY_INIT { { { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } } }
50#endif
51extern const struct in6_addr in6addr_loopback; 49extern const struct in6_addr in6addr_loopback;
52#define IN6ADDR_LOOPBACK_INIT { { { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 } } } 50#define IN6ADDR_LOOPBACK_INIT { { { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 } } }
53 51
@@ -179,6 +177,7 @@ struct in6_flowlabel_req
179#define IPV6_PMTUDISC_DONT 0 177#define IPV6_PMTUDISC_DONT 0
180#define IPV6_PMTUDISC_WANT 1 178#define IPV6_PMTUDISC_WANT 1
181#define IPV6_PMTUDISC_DO 2 179#define IPV6_PMTUDISC_DO 2
180#define IPV6_PMTUDISC_PROBE 3
182 181
183/* Flowlabel */ 182/* Flowlabel */
184#define IPV6_FLOWLABEL_MGR 32 183#define IPV6_FLOWLABEL_MGR 32
diff --git a/include/linux/ip.h b/include/linux/ip.h
index 1d36b971a8b5..bd0a2a8631c6 100644
--- a/include/linux/ip.h
+++ b/include/linux/ip.h
@@ -104,6 +104,20 @@ struct iphdr {
104 /*The options start here. */ 104 /*The options start here. */
105}; 105};
106 106
107#ifdef __KERNEL__
108#include <linux/skbuff.h>
109
110static inline struct iphdr *ip_hdr(const struct sk_buff *skb)
111{
112 return (struct iphdr *)skb_network_header(skb);
113}
114
115static inline struct iphdr *ipip_hdr(const struct sk_buff *skb)
116{
117 return (struct iphdr *)skb_transport_header(skb);
118}
119#endif
120
107struct ip_auth_hdr { 121struct ip_auth_hdr {
108 __u8 nexthdr; 122 __u8 nexthdr;
109 __u8 hdrlen; /* This one is measured in 32 bit units! */ 123 __u8 hdrlen; /* This one is measured in 32 bit units! */
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 713eb5eaa81f..09ea01a8a99c 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -178,6 +178,9 @@ struct ipv6_devconf {
178#endif 178#endif
179 __s32 proxy_ndp; 179 __s32 proxy_ndp;
180 __s32 accept_source_route; 180 __s32 accept_source_route;
181#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
182 __s32 optimistic_dad;
183#endif
181 void *sysctl; 184 void *sysctl;
182}; 185};
183 186
@@ -208,6 +211,7 @@ enum {
208 DEVCONF_PROXY_NDP, 211 DEVCONF_PROXY_NDP,
209 __DEVCONF_OPTIMISTIC_DAD, 212 __DEVCONF_OPTIMISTIC_DAD,
210 DEVCONF_ACCEPT_SOURCE_ROUTE, 213 DEVCONF_ACCEPT_SOURCE_ROUTE,
214 DEVCONF_OPTIMISTIC_DAD,
211 DEVCONF_MAX 215 DEVCONF_MAX
212}; 216};
213 217
@@ -219,6 +223,16 @@ enum {
219#include <net/if_inet6.h> /* struct ipv6_mc_socklist */ 223#include <net/if_inet6.h> /* struct ipv6_mc_socklist */
220#include <net/inet_sock.h> 224#include <net/inet_sock.h>
221 225
226static inline struct ipv6hdr *ipv6_hdr(const struct sk_buff *skb)
227{
228 return (struct ipv6hdr *)skb_network_header(skb);
229}
230
231static inline struct ipv6hdr *ipipv6_hdr(const struct sk_buff *skb)
232{
233 return (struct ipv6hdr *)skb_transport_header(skb);
234}
235
222/* 236/*
223 This structure contains results of exthdrs parsing 237 This structure contains results of exthdrs parsing
224 as offsets from skb->nh. 238 as offsets from skb->nh.
diff --git a/include/linux/jhash.h b/include/linux/jhash.h
index 82c7ae412eec..2a2f99fbcb16 100644
--- a/include/linux/jhash.h
+++ b/include/linux/jhash.h
@@ -84,7 +84,7 @@ static inline u32 jhash(const void *key, u32 length, u32 initval)
84/* A special optimized version that handles 1 or more of u32s. 84/* A special optimized version that handles 1 or more of u32s.
85 * The length parameter here is the number of u32s in the key. 85 * The length parameter here is the number of u32s in the key.
86 */ 86 */
87static inline u32 jhash2(u32 *k, u32 length, u32 initval) 87static inline u32 jhash2(const u32 *k, u32 length, u32 initval)
88{ 88{
89 u32 a, b, c, len; 89 u32 a, b, c, len;
90 90
diff --git a/include/linux/key.h b/include/linux/key.h
index 169f05e4863e..a9220e75782e 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -160,6 +160,8 @@ struct key {
160 */ 160 */
161 union { 161 union {
162 struct list_head link; 162 struct list_head link;
163 unsigned long x[2];
164 void *p[2];
163 } type_data; 165 } type_data;
164 166
165 /* key data 167 /* key data
diff --git a/include/linux/ktime.h b/include/linux/ktime.h
index 248305bb9a18..81bb9c7a4eb3 100644
--- a/include/linux/ktime.h
+++ b/include/linux/ktime.h
@@ -259,6 +259,12 @@ static inline s64 ktime_to_ns(const ktime_t kt)
259 259
260#endif 260#endif
261 261
262static inline s64 ktime_to_us(const ktime_t kt)
263{
264 struct timeval tv = ktime_to_timeval(kt);
265 return (s64) tv.tv_sec * USEC_PER_SEC + tv.tv_usec;
266}
267
262/* 268/*
263 * The resolution of the clocks. The resolution value is returned in 269 * The resolution of the clocks. The resolution value is returned in
264 * the clock_getres() system call to give application programmers an 270 * the clock_getres() system call to give application programmers an
diff --git a/include/linux/net.h b/include/linux/net.h
index 4db21e63d8d2..efc45177b503 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -24,7 +24,7 @@
24struct poll_table_struct; 24struct poll_table_struct;
25struct inode; 25struct inode;
26 26
27#define NPROTO 33 /* should be enough for now.. */ 27#define NPROTO 34 /* should be enough for now.. */
28 28
29#define SYS_SOCKET 1 /* sys_socket(2) */ 29#define SYS_SOCKET 1 /* sys_socket(2) */
30#define SYS_BIND 2 /* sys_bind(2) */ 30#define SYS_BIND 2 /* sys_bind(2) */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1a528548cd1d..e027a3750a77 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -42,6 +42,8 @@
42struct vlan_group; 42struct vlan_group;
43struct ethtool_ops; 43struct ethtool_ops;
44struct netpoll_info; 44struct netpoll_info;
45/* 802.11 specific */
46struct wireless_dev;
45 /* source back-compat hooks */ 47 /* source back-compat hooks */
46#define SET_ETHTOOL_OPS(netdev,ops) \ 48#define SET_ETHTOOL_OPS(netdev,ops) \
47 ( (netdev)->ethtool_ops = (ops) ) 49 ( (netdev)->ethtool_ops = (ops) )
@@ -323,6 +325,7 @@ struct net_device
323#define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */ 325#define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */
324#define NETIF_F_GSO 2048 /* Enable software GSO. */ 326#define NETIF_F_GSO 2048 /* Enable software GSO. */
325#define NETIF_F_LLTX 4096 /* LockLess TX */ 327#define NETIF_F_LLTX 4096 /* LockLess TX */
328#define NETIF_F_INTERNAL_STATS 8192 /* Use stats structure in net_device */
326 329
327 /* Segmentation offload features */ 330 /* Segmentation offload features */
328#define NETIF_F_GSO_SHIFT 16 331#define NETIF_F_GSO_SHIFT 16
@@ -347,13 +350,15 @@ struct net_device
347 350
348 351
349 struct net_device_stats* (*get_stats)(struct net_device *dev); 352 struct net_device_stats* (*get_stats)(struct net_device *dev);
353 struct net_device_stats stats;
350 354
355#ifdef CONFIG_WIRELESS_EXT
351 /* List of functions to handle Wireless Extensions (instead of ioctl). 356 /* List of functions to handle Wireless Extensions (instead of ioctl).
352 * See <net/iw_handler.h> for details. Jean II */ 357 * See <net/iw_handler.h> for details. Jean II */
353 const struct iw_handler_def * wireless_handlers; 358 const struct iw_handler_def * wireless_handlers;
354 /* Instance data managed by the core of Wireless Extensions. */ 359 /* Instance data managed by the core of Wireless Extensions. */
355 struct iw_public_data * wireless_data; 360 struct iw_public_data * wireless_data;
356 361#endif
357 const struct ethtool_ops *ethtool_ops; 362 const struct ethtool_ops *ethtool_ops;
358 363
359 /* 364 /*
@@ -398,6 +403,8 @@ struct net_device
398 void *ip6_ptr; /* IPv6 specific data */ 403 void *ip6_ptr; /* IPv6 specific data */
399 void *ec_ptr; /* Econet specific data */ 404 void *ec_ptr; /* Econet specific data */
400 void *ax25_ptr; /* AX.25 specific data */ 405 void *ax25_ptr; /* AX.25 specific data */
406 struct wireless_dev *ieee80211_ptr; /* IEEE 802.11 specific data,
407 assign before registering */
401 408
402/* 409/*
403 * Cache line mostly used on receive path (including eth_type_trans()) 410 * Cache line mostly used on receive path (including eth_type_trans())
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 70d3b4f1e48d..10b5c6275706 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -281,9 +281,6 @@ extern void nf_reinject(struct sk_buff *skb,
281 struct nf_info *info, 281 struct nf_info *info,
282 unsigned int verdict); 282 unsigned int verdict);
283 283
284extern void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *);
285extern void nf_ct_attach(struct sk_buff *, struct sk_buff *);
286
287/* FIXME: Before cache is ever used, this must be implemented for real. */ 284/* FIXME: Before cache is ever used, this must be implemented for real. */
288extern void nf_invalidate_cache(int pf); 285extern void nf_invalidate_cache(int pf);
289 286
@@ -388,11 +385,18 @@ static inline int nf_hook(int pf, unsigned int hook, struct sk_buff **pskb,
388{ 385{
389 return 1; 386 return 1;
390} 387}
391static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {}
392struct flowi; 388struct flowi;
393static inline void 389static inline void
394nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, int family) {} 390nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, int family) {}
395#endif /*CONFIG_NETFILTER*/ 391#endif /*CONFIG_NETFILTER*/
396 392
393#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
394extern void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *);
395extern void nf_ct_attach(struct sk_buff *, struct sk_buff *);
396extern void (*nf_ct_destroy)(struct nf_conntrack *);
397#else
398static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {}
399#endif
400
397#endif /*__KERNEL__*/ 401#endif /*__KERNEL__*/
398#endif /*__LINUX_NETFILTER_H*/ 402#endif /*__LINUX_NETFILTER_H*/
diff --git a/include/linux/netfilter/nf_conntrack_tcp.h b/include/linux/netfilter/nf_conntrack_tcp.h
index 007af4c2770b..22ce29995f13 100644
--- a/include/linux/netfilter/nf_conntrack_tcp.h
+++ b/include/linux/netfilter/nf_conntrack_tcp.h
@@ -30,6 +30,11 @@ enum tcp_conntrack {
30/* Be liberal in window checking */ 30/* Be liberal in window checking */
31#define IP_CT_TCP_FLAG_BE_LIBERAL 0x08 31#define IP_CT_TCP_FLAG_BE_LIBERAL 0x08
32 32
33struct nf_ct_tcp_flags {
34 u_int8_t flags;
35 u_int8_t mask;
36};
37
33#ifdef __KERNEL__ 38#ifdef __KERNEL__
34 39
35struct ip_ct_tcp_state { 40struct ip_ct_tcp_state {
diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
index 1e9c821f152d..0f9311df1559 100644
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -62,11 +62,11 @@ struct nfattr
62#define NFA_DATA(nfa) ((void *)(((char *)(nfa)) + NFA_LENGTH(0))) 62#define NFA_DATA(nfa) ((void *)(((char *)(nfa)) + NFA_LENGTH(0)))
63#define NFA_PAYLOAD(nfa) ((int)((nfa)->nfa_len) - NFA_LENGTH(0)) 63#define NFA_PAYLOAD(nfa) ((int)((nfa)->nfa_len) - NFA_LENGTH(0))
64#define NFA_NEST(skb, type) \ 64#define NFA_NEST(skb, type) \
65({ struct nfattr *__start = (struct nfattr *) (skb)->tail; \ 65({ struct nfattr *__start = (struct nfattr *)skb_tail_pointer(skb); \
66 NFA_PUT(skb, (NFNL_NFA_NEST | type), 0, NULL); \ 66 NFA_PUT(skb, (NFNL_NFA_NEST | type), 0, NULL); \
67 __start; }) 67 __start; })
68#define NFA_NEST_END(skb, start) \ 68#define NFA_NEST_END(skb, start) \
69({ (start)->nfa_len = ((skb)->tail - (unsigned char *) (start)); \ 69({ (start)->nfa_len = skb_tail_pointer(skb) - (unsigned char *)(start); \
70 (skb)->len; }) 70 (skb)->len; })
71#define NFA_NEST_CANCEL(skb, start) \ 71#define NFA_NEST_CANCEL(skb, start) \
72({ if (start) \ 72({ if (start) \
@@ -111,7 +111,7 @@ struct nfgenmsg {
111struct nfnl_callback 111struct nfnl_callback
112{ 112{
113 int (*call)(struct sock *nl, struct sk_buff *skb, 113 int (*call)(struct sock *nl, struct sk_buff *skb,
114 struct nlmsghdr *nlh, struct nfattr *cda[], int *errp); 114 struct nlmsghdr *nlh, struct nfattr *cda[]);
115 u_int16_t attr_count; /* number of nfattr's */ 115 u_int16_t attr_count; /* number of nfattr's */
116}; 116};
117 117
@@ -129,19 +129,6 @@ extern void __nfa_fill(struct sk_buff *skb, int attrtype,
129({ if (skb_tailroom(skb) < (int)NFA_SPACE(attrlen)) goto nfattr_failure; \ 129({ if (skb_tailroom(skb) < (int)NFA_SPACE(attrlen)) goto nfattr_failure; \
130 __nfa_fill(skb, attrtype, attrlen, data); }) 130 __nfa_fill(skb, attrtype, attrlen, data); })
131 131
132extern struct semaphore nfnl_sem;
133
134#define nfnl_shlock() down(&nfnl_sem)
135#define nfnl_shlock_nowait() down_trylock(&nfnl_sem)
136
137#define nfnl_shunlock() do { up(&nfnl_sem); \
138 if(nfnl && nfnl->sk_receive_queue.qlen) \
139 nfnl->sk_data_ready(nfnl, 0); \
140 } while(0)
141
142extern void nfnl_lock(void);
143extern void nfnl_unlock(void);
144
145extern int nfnetlink_subsys_register(struct nfnetlink_subsystem *n); 132extern int nfnetlink_subsys_register(struct nfnetlink_subsystem *n);
146extern int nfnetlink_subsys_unregister(struct nfnetlink_subsystem *n); 133extern int nfnetlink_subsys_unregister(struct nfnetlink_subsystem *n);
147 134
diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h
index b5883ccee295..d7c35039721e 100644
--- a/include/linux/netfilter/nfnetlink_conntrack.h
+++ b/include/linux/netfilter/nfnetlink_conntrack.h
@@ -83,6 +83,10 @@ enum ctattr_protoinfo {
83enum ctattr_protoinfo_tcp { 83enum ctattr_protoinfo_tcp {
84 CTA_PROTOINFO_TCP_UNSPEC, 84 CTA_PROTOINFO_TCP_UNSPEC,
85 CTA_PROTOINFO_TCP_STATE, 85 CTA_PROTOINFO_TCP_STATE,
86 CTA_PROTOINFO_TCP_WSCALE_ORIGINAL,
87 CTA_PROTOINFO_TCP_WSCALE_REPLY,
88 CTA_PROTOINFO_TCP_FLAGS_ORIGINAL,
89 CTA_PROTOINFO_TCP_FLAGS_REPLY,
86 __CTA_PROTOINFO_TCP_MAX 90 __CTA_PROTOINFO_TCP_MAX
87}; 91};
88#define CTA_PROTOINFO_TCP_MAX (__CTA_PROTOINFO_TCP_MAX - 1) 92#define CTA_PROTOINFO_TCP_MAX (__CTA_PROTOINFO_TCP_MAX - 1)
diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h
index 55689f39f77a..19060030bac9 100644
--- a/include/linux/netfilter_bridge.h
+++ b/include/linux/netfilter_bridge.h
@@ -7,6 +7,7 @@
7#include <linux/netfilter.h> 7#include <linux/netfilter.h>
8#include <linux/if_ether.h> 8#include <linux/if_ether.h>
9#include <linux/if_vlan.h> 9#include <linux/if_vlan.h>
10#include <linux/if_pppox.h>
10 11
11/* Bridge Hooks */ 12/* Bridge Hooks */
12/* After promisc drops, checksum checks. */ 13/* After promisc drops, checksum checks. */
@@ -58,8 +59,14 @@ static inline int nf_bridge_maybe_copy_header(struct sk_buff *skb)
58 * enough room for the encapsulating header (if there is one). */ 59 * enough room for the encapsulating header (if there is one). */
59static inline int nf_bridge_pad(const struct sk_buff *skb) 60static inline int nf_bridge_pad(const struct sk_buff *skb)
60{ 61{
61 return (skb->nf_bridge && skb->protocol == htons(ETH_P_8021Q)) 62 int padding = 0;
62 ? VLAN_HLEN : 0; 63
64 if (skb->nf_bridge && skb->protocol == htons(ETH_P_8021Q))
65 padding = VLAN_HLEN;
66 else if (skb->nf_bridge && skb->protocol == htons(ETH_P_PPP_SES))
67 padding = PPPOE_SES_HLEN;
68
69 return padding;
63} 70}
64 71
65struct bridge_skb_cb { 72struct bridge_skb_cb {
diff --git a/include/linux/netfilter_bridge/ebt_802_3.h b/include/linux/netfilter_bridge/ebt_802_3.h
index 07f044ff1a6b..a11b0c2017fd 100644
--- a/include/linux/netfilter_bridge/ebt_802_3.h
+++ b/include/linux/netfilter_bridge/ebt_802_3.h
@@ -54,7 +54,7 @@ struct ebt_802_3_hdr {
54 54
55static inline struct ebt_802_3_hdr *ebt_802_3_hdr(const struct sk_buff *skb) 55static inline struct ebt_802_3_hdr *ebt_802_3_hdr(const struct sk_buff *skb)
56{ 56{
57 return (struct ebt_802_3_hdr *)skb->mac.raw; 57 return (struct ebt_802_3_hdr *)skb_mac_header(skb);
58} 58}
59#endif 59#endif
60 60
diff --git a/include/linux/netfilter_bridge/ebt_arp.h b/include/linux/netfilter_bridge/ebt_arp.h
index 97e4dbde1f89..cbf4843b6b0f 100644
--- a/include/linux/netfilter_bridge/ebt_arp.h
+++ b/include/linux/netfilter_bridge/ebt_arp.h
@@ -8,8 +8,10 @@
8#define EBT_ARP_DST_IP 0x10 8#define EBT_ARP_DST_IP 0x10
9#define EBT_ARP_SRC_MAC 0x20 9#define EBT_ARP_SRC_MAC 0x20
10#define EBT_ARP_DST_MAC 0x40 10#define EBT_ARP_DST_MAC 0x40
11#define EBT_ARP_GRAT 0x80
11#define EBT_ARP_MASK (EBT_ARP_OPCODE | EBT_ARP_HTYPE | EBT_ARP_PTYPE | \ 12#define EBT_ARP_MASK (EBT_ARP_OPCODE | EBT_ARP_HTYPE | EBT_ARP_PTYPE | \
12 EBT_ARP_SRC_IP | EBT_ARP_DST_IP | EBT_ARP_SRC_MAC | EBT_ARP_DST_MAC) 13 EBT_ARP_SRC_IP | EBT_ARP_DST_IP | EBT_ARP_SRC_MAC | EBT_ARP_DST_MAC | \
14 EBT_ARP_GRAT)
13#define EBT_ARP_MATCH "arp" 15#define EBT_ARP_MATCH "arp"
14 16
15struct ebt_arp_info 17struct ebt_arp_info
diff --git a/include/linux/netfilter_ipv4/Kbuild b/include/linux/netfilter_ipv4/Kbuild
index 180337801a86..7185792b900f 100644
--- a/include/linux/netfilter_ipv4/Kbuild
+++ b/include/linux/netfilter_ipv4/Kbuild
@@ -1,9 +1,3 @@
1header-y += ip_conntrack_helper.h
2header-y += ip_conntrack_protocol.h
3header-y += ip_conntrack_sctp.h
4header-y += ip_conntrack_tcp.h
5header-y += ip_conntrack_tftp.h
6header-y += ip_nat_pptp.h
7header-y += ipt_addrtype.h 1header-y += ipt_addrtype.h
8header-y += ipt_ah.h 2header-y += ipt_ah.h
9header-y += ipt_CLASSIFY.h 3header-y += ipt_CLASSIFY.h
@@ -49,13 +43,5 @@ header-y += ipt_ttl.h
49header-y += ipt_TTL.h 43header-y += ipt_TTL.h
50header-y += ipt_ULOG.h 44header-y += ipt_ULOG.h
51 45
52unifdef-y += ip_conntrack.h
53unifdef-y += ip_conntrack_h323.h
54unifdef-y += ip_conntrack_irc.h
55unifdef-y += ip_conntrack_pptp.h
56unifdef-y += ip_conntrack_proto_gre.h
57unifdef-y += ip_conntrack_tuple.h
58unifdef-y += ip_nat.h
59unifdef-y += ip_nat_rule.h
60unifdef-y += ip_queue.h 46unifdef-y += ip_queue.h
61unifdef-y += ip_tables.h 47unifdef-y += ip_tables.h
diff --git a/include/linux/netfilter_ipv4/ip_conntrack.h b/include/linux/netfilter_ipv4/ip_conntrack.h
deleted file mode 100644
index da9274e6bf12..000000000000
--- a/include/linux/netfilter_ipv4/ip_conntrack.h
+++ /dev/null
@@ -1,402 +0,0 @@
1#ifndef _IP_CONNTRACK_H
2#define _IP_CONNTRACK_H
3
4#include <linux/netfilter/nf_conntrack_common.h>
5
6#ifdef __KERNEL__
7#include <linux/netfilter_ipv4/ip_conntrack_tuple.h>
8#include <linux/bitops.h>
9#include <linux/compiler.h>
10#include <asm/atomic.h>
11
12#include <linux/timer.h>
13#include <linux/netfilter_ipv4/ip_conntrack_tcp.h>
14#include <linux/netfilter_ipv4/ip_conntrack_icmp.h>
15#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
16#include <linux/netfilter_ipv4/ip_conntrack_sctp.h>
17
18/* per conntrack: protocol private data */
19union ip_conntrack_proto {
20 /* insert conntrack proto private data here */
21 struct ip_ct_gre gre;
22 struct ip_ct_sctp sctp;
23 struct ip_ct_tcp tcp;
24 struct ip_ct_icmp icmp;
25};
26
27union ip_conntrack_expect_proto {
28 /* insert expect proto private data here */
29};
30
31/* Add protocol helper include file here */
32#include <linux/netfilter_ipv4/ip_conntrack_h323.h>
33#include <linux/netfilter_ipv4/ip_conntrack_pptp.h>
34#include <linux/netfilter_ipv4/ip_conntrack_amanda.h>
35#include <linux/netfilter_ipv4/ip_conntrack_ftp.h>
36#include <linux/netfilter_ipv4/ip_conntrack_irc.h>
37
38/* per conntrack: application helper private data */
39union ip_conntrack_help {
40 /* insert conntrack helper private data (master) here */
41 struct ip_ct_h323_master ct_h323_info;
42 struct ip_ct_pptp_master ct_pptp_info;
43 struct ip_ct_ftp_master ct_ftp_info;
44 struct ip_ct_irc_master ct_irc_info;
45};
46
47#ifdef CONFIG_IP_NF_NAT_NEEDED
48#include <linux/netfilter_ipv4/ip_nat.h>
49#include <linux/netfilter_ipv4/ip_nat_pptp.h>
50
51/* per conntrack: nat application helper private data */
52union ip_conntrack_nat_help {
53 /* insert nat helper private data here */
54 struct ip_nat_pptp nat_pptp_info;
55};
56#endif
57
58#include <linux/types.h>
59#include <linux/skbuff.h>
60
61#ifdef CONFIG_NETFILTER_DEBUG
62#define IP_NF_ASSERT(x) \
63do { \
64 if (!(x)) \
65 /* Wooah! I'm tripping my conntrack in a frenzy of \
66 netplay... */ \
67 printk("NF_IP_ASSERT: %s:%i(%s)\n", \
68 __FILE__, __LINE__, __FUNCTION__); \
69} while(0)
70#else
71#define IP_NF_ASSERT(x)
72#endif
73
74struct ip_conntrack_helper;
75
76struct ip_conntrack
77{
78 /* Usage count in here is 1 for hash table/destruct timer, 1 per skb,
79 plus 1 for any connection(s) we are `master' for */
80 struct nf_conntrack ct_general;
81
82 /* Have we seen traffic both ways yet? (bitset) */
83 unsigned long status;
84
85 /* Timer function; drops refcnt when it goes off. */
86 struct timer_list timeout;
87
88#ifdef CONFIG_IP_NF_CT_ACCT
89 /* Accounting Information (same cache line as other written members) */
90 struct ip_conntrack_counter counters[IP_CT_DIR_MAX];
91#endif
92 /* If we were expected by an expectation, this will be it */
93 struct ip_conntrack *master;
94
95 /* Current number of expected connections */
96 unsigned int expecting;
97
98 /* Unique ID that identifies this conntrack*/
99 unsigned int id;
100
101 /* Helper, if any. */
102 struct ip_conntrack_helper *helper;
103
104 /* Storage reserved for other modules: */
105 union ip_conntrack_proto proto;
106
107 union ip_conntrack_help help;
108
109#ifdef CONFIG_IP_NF_NAT_NEEDED
110 struct {
111 struct ip_nat_info info;
112 union ip_conntrack_nat_help help;
113#if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \
114 defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE)
115 int masq_index;
116#endif
117 } nat;
118#endif /* CONFIG_IP_NF_NAT_NEEDED */
119
120#if defined(CONFIG_IP_NF_CONNTRACK_MARK)
121 u_int32_t mark;
122#endif
123
124#ifdef CONFIG_IP_NF_CONNTRACK_SECMARK
125 u_int32_t secmark;
126#endif
127
128 /* Traversed often, so hopefully in different cacheline to top */
129 /* These are my tuples; original and reply */
130 struct ip_conntrack_tuple_hash tuplehash[IP_CT_DIR_MAX];
131};
132
133struct ip_conntrack_expect
134{
135 /* Internal linked list (global expectation list) */
136 struct list_head list;
137
138 /* We expect this tuple, with the following mask */
139 struct ip_conntrack_tuple tuple, mask;
140
141 /* Function to call after setup and insertion */
142 void (*expectfn)(struct ip_conntrack *new,
143 struct ip_conntrack_expect *this);
144
145 /* The conntrack of the master connection */
146 struct ip_conntrack *master;
147
148 /* Timer function; deletes the expectation. */
149 struct timer_list timeout;
150
151 /* Usage count. */
152 atomic_t use;
153
154 /* Unique ID */
155 unsigned int id;
156
157 /* Flags */
158 unsigned int flags;
159
160#ifdef CONFIG_IP_NF_NAT_NEEDED
161 __be32 saved_ip;
162 /* This is the original per-proto part, used to map the
163 * expected connection the way the recipient expects. */
164 union ip_conntrack_manip_proto saved_proto;
165 /* Direction relative to the master connection. */
166 enum ip_conntrack_dir dir;
167#endif
168};
169
170#define IP_CT_EXPECT_PERMANENT 0x1
171
172static inline struct ip_conntrack *
173tuplehash_to_ctrack(const struct ip_conntrack_tuple_hash *hash)
174{
175 return container_of(hash, struct ip_conntrack,
176 tuplehash[hash->tuple.dst.dir]);
177}
178
179/* get master conntrack via master expectation */
180#define master_ct(conntr) (conntr->master)
181
182/* Alter reply tuple (maybe alter helper). */
183extern void
184ip_conntrack_alter_reply(struct ip_conntrack *conntrack,
185 const struct ip_conntrack_tuple *newreply);
186
187/* Is this tuple taken? (ignoring any belonging to the given
188 conntrack). */
189extern int
190ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple,
191 const struct ip_conntrack *ignored_conntrack);
192
193/* Return conntrack_info and tuple hash for given skb. */
194static inline struct ip_conntrack *
195ip_conntrack_get(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo)
196{
197 *ctinfo = skb->nfctinfo;
198 return (struct ip_conntrack *)skb->nfct;
199}
200
201/* decrement reference count on a conntrack */
202static inline void
203ip_conntrack_put(struct ip_conntrack *ct)
204{
205 IP_NF_ASSERT(ct);
206 nf_conntrack_put(&ct->ct_general);
207}
208
209extern int invert_tuplepr(struct ip_conntrack_tuple *inverse,
210 const struct ip_conntrack_tuple *orig);
211
212extern void __ip_ct_refresh_acct(struct ip_conntrack *ct,
213 enum ip_conntrack_info ctinfo,
214 const struct sk_buff *skb,
215 unsigned long extra_jiffies,
216 int do_acct);
217
218/* Refresh conntrack for this many jiffies and do accounting */
219static inline void ip_ct_refresh_acct(struct ip_conntrack *ct,
220 enum ip_conntrack_info ctinfo,
221 const struct sk_buff *skb,
222 unsigned long extra_jiffies)
223{
224 __ip_ct_refresh_acct(ct, ctinfo, skb, extra_jiffies, 1);
225}
226
227/* Refresh conntrack for this many jiffies */
228static inline void ip_ct_refresh(struct ip_conntrack *ct,
229 const struct sk_buff *skb,
230 unsigned long extra_jiffies)
231{
232 __ip_ct_refresh_acct(ct, 0, skb, extra_jiffies, 0);
233}
234
235/* These are for NAT. Icky. */
236/* Update TCP window tracking data when NAT mangles the packet */
237extern void ip_conntrack_tcp_update(struct sk_buff *skb,
238 struct ip_conntrack *conntrack,
239 enum ip_conntrack_dir dir);
240
241/* Call me when a conntrack is destroyed. */
242extern void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack);
243
244/* Fake conntrack entry for untracked connections */
245extern struct ip_conntrack ip_conntrack_untracked;
246
247/* Returns new sk_buff, or NULL */
248struct sk_buff *
249ip_ct_gather_frags(struct sk_buff *skb, u_int32_t user);
250
251/* Iterate over all conntracks: if iter returns true, it's deleted. */
252extern void
253ip_ct_iterate_cleanup(int (*iter)(struct ip_conntrack *i, void *data),
254 void *data);
255
256extern struct ip_conntrack_helper *
257__ip_conntrack_helper_find_byname(const char *);
258extern struct ip_conntrack_helper *
259ip_conntrack_helper_find_get(const struct ip_conntrack_tuple *tuple);
260extern void ip_conntrack_helper_put(struct ip_conntrack_helper *helper);
261
262extern struct ip_conntrack_protocol *
263__ip_conntrack_proto_find(u_int8_t protocol);
264extern struct ip_conntrack_protocol *
265ip_conntrack_proto_find_get(u_int8_t protocol);
266extern void ip_conntrack_proto_put(struct ip_conntrack_protocol *proto);
267
268extern void ip_ct_remove_expectations(struct ip_conntrack *ct);
269
270extern struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *,
271 struct ip_conntrack_tuple *);
272
273extern void ip_conntrack_free(struct ip_conntrack *ct);
274
275extern void ip_conntrack_hash_insert(struct ip_conntrack *ct);
276
277extern struct ip_conntrack_expect *
278__ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple);
279
280extern struct ip_conntrack_expect *
281ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple);
282
283extern struct ip_conntrack_tuple_hash *
284__ip_conntrack_find(const struct ip_conntrack_tuple *tuple,
285 const struct ip_conntrack *ignored_conntrack);
286
287extern void ip_conntrack_flush(void);
288
289/* It's confirmed if it is, or has been in the hash table. */
290static inline int is_confirmed(struct ip_conntrack *ct)
291{
292 return test_bit(IPS_CONFIRMED_BIT, &ct->status);
293}
294
295static inline int is_dying(struct ip_conntrack *ct)
296{
297 return test_bit(IPS_DYING_BIT, &ct->status);
298}
299
300extern unsigned int ip_conntrack_htable_size;
301extern int ip_conntrack_checksum;
302
303#define CONNTRACK_STAT_INC(count) (__get_cpu_var(ip_conntrack_stat).count++)
304#define CONNTRACK_STAT_INC_ATOMIC(count) \
305do { \
306 local_bh_disable(); \
307 __get_cpu_var(ip_conntrack_stat).count++; \
308 local_bh_enable(); \
309} while (0)
310
311#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
312#include <linux/notifier.h>
313#include <linux/interrupt.h>
314
315struct ip_conntrack_ecache {
316 struct ip_conntrack *ct;
317 unsigned int events;
318};
319DECLARE_PER_CPU(struct ip_conntrack_ecache, ip_conntrack_ecache);
320
321#define CONNTRACK_ECACHE(x) (__get_cpu_var(ip_conntrack_ecache).x)
322
323extern struct atomic_notifier_head ip_conntrack_chain;
324extern struct atomic_notifier_head ip_conntrack_expect_chain;
325
326static inline int ip_conntrack_register_notifier(struct notifier_block *nb)
327{
328 return atomic_notifier_chain_register(&ip_conntrack_chain, nb);
329}
330
331static inline int ip_conntrack_unregister_notifier(struct notifier_block *nb)
332{
333 return atomic_notifier_chain_unregister(&ip_conntrack_chain, nb);
334}
335
336static inline int
337ip_conntrack_expect_register_notifier(struct notifier_block *nb)
338{
339 return atomic_notifier_chain_register(&ip_conntrack_expect_chain, nb);
340}
341
342static inline int
343ip_conntrack_expect_unregister_notifier(struct notifier_block *nb)
344{
345 return atomic_notifier_chain_unregister(&ip_conntrack_expect_chain,
346 nb);
347}
348
349extern void ip_ct_deliver_cached_events(const struct ip_conntrack *ct);
350extern void __ip_ct_event_cache_init(struct ip_conntrack *ct);
351
352static inline void
353ip_conntrack_event_cache(enum ip_conntrack_events event,
354 const struct sk_buff *skb)
355{
356 struct ip_conntrack *ct = (struct ip_conntrack *)skb->nfct;
357 struct ip_conntrack_ecache *ecache;
358
359 local_bh_disable();
360 ecache = &__get_cpu_var(ip_conntrack_ecache);
361 if (ct != ecache->ct)
362 __ip_ct_event_cache_init(ct);
363 ecache->events |= event;
364 local_bh_enable();
365}
366
367static inline void ip_conntrack_event(enum ip_conntrack_events event,
368 struct ip_conntrack *ct)
369{
370 if (is_confirmed(ct) && !is_dying(ct))
371 atomic_notifier_call_chain(&ip_conntrack_chain, event, ct);
372}
373
374static inline void
375ip_conntrack_expect_event(enum ip_conntrack_expect_events event,
376 struct ip_conntrack_expect *exp)
377{
378 atomic_notifier_call_chain(&ip_conntrack_expect_chain, event, exp);
379}
380#else /* CONFIG_IP_NF_CONNTRACK_EVENTS */
381static inline void ip_conntrack_event_cache(enum ip_conntrack_events event,
382 const struct sk_buff *skb) {}
383static inline void ip_conntrack_event(enum ip_conntrack_events event,
384 struct ip_conntrack *ct) {}
385static inline void ip_ct_deliver_cached_events(const struct ip_conntrack *ct) {}
386static inline void
387ip_conntrack_expect_event(enum ip_conntrack_expect_events event,
388 struct ip_conntrack_expect *exp) {}
389#endif /* CONFIG_IP_NF_CONNTRACK_EVENTS */
390
391#ifdef CONFIG_IP_NF_NAT_NEEDED
392static inline int ip_nat_initialized(struct ip_conntrack *conntrack,
393 enum ip_nat_manip_type manip)
394{
395 if (manip == IP_NAT_MANIP_SRC)
396 return test_bit(IPS_SRC_NAT_DONE_BIT, &conntrack->status);
397 return test_bit(IPS_DST_NAT_DONE_BIT, &conntrack->status);
398}
399#endif /* CONFIG_IP_NF_NAT_NEEDED */
400
401#endif /* __KERNEL__ */
402#endif /* _IP_CONNTRACK_H */
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_amanda.h b/include/linux/netfilter_ipv4/ip_conntrack_amanda.h
deleted file mode 100644
index de3e41f51aec..000000000000
--- a/include/linux/netfilter_ipv4/ip_conntrack_amanda.h
+++ /dev/null
@@ -1,11 +0,0 @@
1#ifndef _IP_CONNTRACK_AMANDA_H
2#define _IP_CONNTRACK_AMANDA_H
3/* AMANDA tracking. */
4
5struct ip_conntrack_expect;
6extern unsigned int (*ip_nat_amanda_hook)(struct sk_buff **pskb,
7 enum ip_conntrack_info ctinfo,
8 unsigned int matchoff,
9 unsigned int matchlen,
10 struct ip_conntrack_expect *exp);
11#endif /* _IP_CONNTRACK_AMANDA_H */
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_core.h b/include/linux/netfilter_ipv4/ip_conntrack_core.h
deleted file mode 100644
index e3a6df07aa4b..000000000000
--- a/include/linux/netfilter_ipv4/ip_conntrack_core.h
+++ /dev/null
@@ -1,61 +0,0 @@
1#ifndef _IP_CONNTRACK_CORE_H
2#define _IP_CONNTRACK_CORE_H
3#include <linux/netfilter.h>
4
5#define MAX_IP_CT_PROTO 256
6extern struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO];
7
8/* This header is used to share core functionality between the
9 standalone connection tracking module, and the compatibility layer's use
10 of connection tracking. */
11extern unsigned int ip_conntrack_in(unsigned int hooknum,
12 struct sk_buff **pskb,
13 const struct net_device *in,
14 const struct net_device *out,
15 int (*okfn)(struct sk_buff *));
16
17extern int ip_conntrack_init(void);
18extern void ip_conntrack_cleanup(void);
19
20struct ip_conntrack_protocol;
21
22extern int
23ip_ct_get_tuple(const struct iphdr *iph,
24 const struct sk_buff *skb,
25 unsigned int dataoff,
26 struct ip_conntrack_tuple *tuple,
27 const struct ip_conntrack_protocol *protocol);
28
29extern int
30ip_ct_invert_tuple(struct ip_conntrack_tuple *inverse,
31 const struct ip_conntrack_tuple *orig,
32 const struct ip_conntrack_protocol *protocol);
33
34/* Find a connection corresponding to a tuple. */
35struct ip_conntrack_tuple_hash *
36ip_conntrack_find_get(const struct ip_conntrack_tuple *tuple,
37 const struct ip_conntrack *ignored_conntrack);
38
39extern int __ip_conntrack_confirm(struct sk_buff **pskb);
40
41/* Confirm a connection: returns NF_DROP if packet must be dropped. */
42static inline int ip_conntrack_confirm(struct sk_buff **pskb)
43{
44 struct ip_conntrack *ct = (struct ip_conntrack *)(*pskb)->nfct;
45 int ret = NF_ACCEPT;
46
47 if (ct) {
48 if (!is_confirmed(ct) && !is_dying(ct))
49 ret = __ip_conntrack_confirm(pskb);
50 ip_ct_deliver_cached_events(ct);
51 }
52 return ret;
53}
54
55extern void ip_ct_unlink_expect(struct ip_conntrack_expect *exp);
56
57extern struct list_head *ip_conntrack_hash;
58extern struct list_head ip_conntrack_expect_list;
59extern rwlock_t ip_conntrack_lock;
60#endif /* _IP_CONNTRACK_CORE_H */
61
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_ftp.h b/include/linux/netfilter_ipv4/ip_conntrack_ftp.h
deleted file mode 100644
index 2129fc3972ac..000000000000
--- a/include/linux/netfilter_ipv4/ip_conntrack_ftp.h
+++ /dev/null
@@ -1,44 +0,0 @@
1#ifndef _IP_CONNTRACK_FTP_H
2#define _IP_CONNTRACK_FTP_H
3/* FTP tracking. */
4
5/* This enum is exposed to userspace */
6enum ip_ct_ftp_type
7{
8 /* PORT command from client */
9 IP_CT_FTP_PORT,
10 /* PASV response from server */
11 IP_CT_FTP_PASV,
12 /* EPRT command from client */
13 IP_CT_FTP_EPRT,
14 /* EPSV response from server */
15 IP_CT_FTP_EPSV,
16};
17
18#ifdef __KERNEL__
19
20#define FTP_PORT 21
21
22#define NUM_SEQ_TO_REMEMBER 2
23/* This structure exists only once per master */
24struct ip_ct_ftp_master {
25 /* Valid seq positions for cmd matching after newline */
26 u_int32_t seq_aft_nl[IP_CT_DIR_MAX][NUM_SEQ_TO_REMEMBER];
27 /* 0 means seq_match_aft_nl not set */
28 int seq_aft_nl_num[IP_CT_DIR_MAX];
29};
30
31struct ip_conntrack_expect;
32
33/* For NAT to hook in when we find a packet which describes what other
34 * connection we should expect. */
35extern unsigned int (*ip_nat_ftp_hook)(struct sk_buff **pskb,
36 enum ip_conntrack_info ctinfo,
37 enum ip_ct_ftp_type type,
38 unsigned int matchoff,
39 unsigned int matchlen,
40 struct ip_conntrack_expect *exp,
41 u32 *seq);
42#endif /* __KERNEL__ */
43
44#endif /* _IP_CONNTRACK_FTP_H */
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_h323.h b/include/linux/netfilter_ipv4/ip_conntrack_h323.h
deleted file mode 100644
index 18f769818f4e..000000000000
--- a/include/linux/netfilter_ipv4/ip_conntrack_h323.h
+++ /dev/null
@@ -1,89 +0,0 @@
1#ifndef _IP_CONNTRACK_H323_H
2#define _IP_CONNTRACK_H323_H
3
4#ifdef __KERNEL__
5
6#include <linux/netfilter/nf_conntrack_h323_asn1.h>
7
8#define RAS_PORT 1719
9#define Q931_PORT 1720
10#define H323_RTP_CHANNEL_MAX 4 /* Audio, video, FAX and other */
11
12/* This structure exists only once per master */
13struct ip_ct_h323_master {
14
15 /* Original and NATed Q.931 or H.245 signal ports */
16 u_int16_t sig_port[IP_CT_DIR_MAX];
17
18 /* Original and NATed RTP ports */
19 u_int16_t rtp_port[H323_RTP_CHANNEL_MAX][IP_CT_DIR_MAX];
20
21 union {
22 /* RAS connection timeout */
23 u_int32_t timeout;
24
25 /* Next TPKT length (for separate TPKT header and data) */
26 u_int16_t tpkt_len[IP_CT_DIR_MAX];
27 };
28};
29
30struct ip_conntrack_expect;
31
32extern int get_h225_addr(unsigned char *data, TransportAddress * addr,
33 __be32 * ip, u_int16_t * port);
34extern void ip_conntrack_h245_expect(struct ip_conntrack *new,
35 struct ip_conntrack_expect *this);
36extern void ip_conntrack_q931_expect(struct ip_conntrack *new,
37 struct ip_conntrack_expect *this);
38extern int (*set_h245_addr_hook) (struct sk_buff ** pskb,
39 unsigned char **data, int dataoff,
40 H245_TransportAddress * addr,
41 __be32 ip, u_int16_t port);
42extern int (*set_h225_addr_hook) (struct sk_buff ** pskb,
43 unsigned char **data, int dataoff,
44 TransportAddress * addr,
45 __be32 ip, u_int16_t port);
46extern int (*set_sig_addr_hook) (struct sk_buff ** pskb,
47 struct ip_conntrack * ct,
48 enum ip_conntrack_info ctinfo,
49 unsigned char **data,
50 TransportAddress * addr, int count);
51extern int (*set_ras_addr_hook) (struct sk_buff ** pskb,
52 struct ip_conntrack * ct,
53 enum ip_conntrack_info ctinfo,
54 unsigned char **data,
55 TransportAddress * addr, int count);
56extern int (*nat_rtp_rtcp_hook) (struct sk_buff ** pskb,
57 struct ip_conntrack * ct,
58 enum ip_conntrack_info ctinfo,
59 unsigned char **data, int dataoff,
60 H245_TransportAddress * addr,
61 u_int16_t port, u_int16_t rtp_port,
62 struct ip_conntrack_expect * rtp_exp,
63 struct ip_conntrack_expect * rtcp_exp);
64extern int (*nat_t120_hook) (struct sk_buff ** pskb, struct ip_conntrack * ct,
65 enum ip_conntrack_info ctinfo,
66 unsigned char **data, int dataoff,
67 H245_TransportAddress * addr, u_int16_t port,
68 struct ip_conntrack_expect * exp);
69extern int (*nat_h245_hook) (struct sk_buff ** pskb, struct ip_conntrack * ct,
70 enum ip_conntrack_info ctinfo,
71 unsigned char **data, int dataoff,
72 TransportAddress * addr, u_int16_t port,
73 struct ip_conntrack_expect * exp);
74extern int (*nat_callforwarding_hook) (struct sk_buff ** pskb,
75 struct ip_conntrack * ct,
76 enum ip_conntrack_info ctinfo,
77 unsigned char **data, int dataoff,
78 TransportAddress * addr,
79 u_int16_t port,
80 struct ip_conntrack_expect * exp);
81extern int (*nat_q931_hook) (struct sk_buff ** pskb, struct ip_conntrack * ct,
82 enum ip_conntrack_info ctinfo,
83 unsigned char **data, TransportAddress * addr,
84 int idx, u_int16_t port,
85 struct ip_conntrack_expect * exp);
86
87#endif
88
89#endif
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_helper.h b/include/linux/netfilter_ipv4/ip_conntrack_helper.h
deleted file mode 100644
index 77fe868d36ff..000000000000
--- a/include/linux/netfilter_ipv4/ip_conntrack_helper.h
+++ /dev/null
@@ -1,46 +0,0 @@
1/* IP connection tracking helpers. */
2#ifndef _IP_CONNTRACK_HELPER_H
3#define _IP_CONNTRACK_HELPER_H
4#include <linux/netfilter_ipv4/ip_conntrack.h>
5
6struct module;
7
8struct ip_conntrack_helper
9{
10 struct list_head list; /* Internal use. */
11
12 const char *name; /* name of the module */
13 struct module *me; /* pointer to self */
14 unsigned int max_expected; /* Maximum number of concurrent
15 * expected connections */
16 unsigned int timeout; /* timeout for expecteds */
17
18 /* Mask of things we will help (compared against server response) */
19 struct ip_conntrack_tuple tuple;
20 struct ip_conntrack_tuple mask;
21
22 /* Function to call when data passes; return verdict, or -1 to
23 invalidate. */
24 int (*help)(struct sk_buff **pskb,
25 struct ip_conntrack *ct,
26 enum ip_conntrack_info conntrackinfo);
27
28 void (*destroy)(struct ip_conntrack *ct);
29
30 int (*to_nfattr)(struct sk_buff *skb, const struct ip_conntrack *ct);
31};
32
33extern int ip_conntrack_helper_register(struct ip_conntrack_helper *);
34extern void ip_conntrack_helper_unregister(struct ip_conntrack_helper *);
35
36/* Allocate space for an expectation: this is mandatory before calling
37 ip_conntrack_expect_related. You will have to call put afterwards. */
38extern struct ip_conntrack_expect *
39ip_conntrack_expect_alloc(struct ip_conntrack *master);
40extern void ip_conntrack_expect_put(struct ip_conntrack_expect *exp);
41
42/* Add an expected connection: can have more than one per connection */
43extern int ip_conntrack_expect_related(struct ip_conntrack_expect *exp);
44extern void ip_conntrack_unexpect_related(struct ip_conntrack_expect *exp);
45
46#endif /*_IP_CONNTRACK_HELPER_H*/
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_icmp.h b/include/linux/netfilter_ipv4/ip_conntrack_icmp.h
deleted file mode 100644
index eed5ee3e4744..000000000000
--- a/include/linux/netfilter_ipv4/ip_conntrack_icmp.h
+++ /dev/null
@@ -1,6 +0,0 @@
1#ifndef _IP_CONNTRACK_ICMP_H
2#define _IP_CONNTRACK_ICMP_H
3
4#include <net/netfilter/ipv4/nf_conntrack_icmp.h>
5
6#endif /* _IP_CONNTRACK_ICMP_H */
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_irc.h b/include/linux/netfilter_ipv4/ip_conntrack_irc.h
deleted file mode 100644
index 16601e0d5626..000000000000
--- a/include/linux/netfilter_ipv4/ip_conntrack_irc.h
+++ /dev/null
@@ -1,32 +0,0 @@
1/* IRC extension for IP connection tracking.
2 * (C) 2000 by Harald Welte <laforge@gnumonks.org>
3 * based on RR's ip_conntrack_ftp.h
4 *
5 * ip_conntrack_irc.h,v 1.6 2000/11/07 18:26:42 laforge Exp
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
12 *
13 */
14#ifndef _IP_CONNTRACK_IRC_H
15#define _IP_CONNTRACK_IRC_H
16
17/* This structure exists only once per master */
18struct ip_ct_irc_master {
19};
20
21#ifdef __KERNEL__
22extern unsigned int (*ip_nat_irc_hook)(struct sk_buff **pskb,
23 enum ip_conntrack_info ctinfo,
24 unsigned int matchoff,
25 unsigned int matchlen,
26 struct ip_conntrack_expect *exp);
27
28#define IRC_PORT 6667
29
30#endif /* __KERNEL__ */
31
32#endif /* _IP_CONNTRACK_IRC_H */
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_pptp.h b/include/linux/netfilter_ipv4/ip_conntrack_pptp.h
deleted file mode 100644
index 2644b1faddd6..000000000000
--- a/include/linux/netfilter_ipv4/ip_conntrack_pptp.h
+++ /dev/null
@@ -1,326 +0,0 @@
1/* PPTP constants and structs */
2#ifndef _CONNTRACK_PPTP_H
3#define _CONNTRACK_PPTP_H
4
5/* state of the control session */
6enum pptp_ctrlsess_state {
7 PPTP_SESSION_NONE, /* no session present */
8 PPTP_SESSION_ERROR, /* some session error */
9 PPTP_SESSION_STOPREQ, /* stop_sess request seen */
10 PPTP_SESSION_REQUESTED, /* start_sess request seen */
11 PPTP_SESSION_CONFIRMED, /* session established */
12};
13
14/* state of the call inside the control session */
15enum pptp_ctrlcall_state {
16 PPTP_CALL_NONE,
17 PPTP_CALL_ERROR,
18 PPTP_CALL_OUT_REQ,
19 PPTP_CALL_OUT_CONF,
20 PPTP_CALL_IN_REQ,
21 PPTP_CALL_IN_REP,
22 PPTP_CALL_IN_CONF,
23 PPTP_CALL_CLEAR_REQ,
24};
25
26
27/* conntrack private data */
28struct ip_ct_pptp_master {
29 enum pptp_ctrlsess_state sstate; /* session state */
30
31 /* everything below is going to be per-expectation in newnat,
32 * since there could be more than one call within one session */
33 enum pptp_ctrlcall_state cstate; /* call state */
34 __be16 pac_call_id; /* call id of PAC, host byte order */
35 __be16 pns_call_id; /* call id of PNS, host byte order */
36
37 /* in pre-2.6.11 this used to be per-expect. Now it is per-conntrack
38 * and therefore imposes a fixed limit on the number of maps */
39 struct ip_ct_gre_keymap *keymap_orig, *keymap_reply;
40};
41
42/* conntrack_expect private member */
43struct ip_ct_pptp_expect {
44 enum pptp_ctrlcall_state cstate; /* call state */
45 __be16 pac_call_id; /* call id of PAC */
46 __be16 pns_call_id; /* call id of PNS */
47};
48
49
50#ifdef __KERNEL__
51
52#define IP_CONNTR_PPTP PPTP_CONTROL_PORT
53
54#define PPTP_CONTROL_PORT 1723
55
56#define PPTP_PACKET_CONTROL 1
57#define PPTP_PACKET_MGMT 2
58
59#define PPTP_MAGIC_COOKIE 0x1a2b3c4d
60
61struct pptp_pkt_hdr {
62 __u16 packetLength;
63 __be16 packetType;
64 __be32 magicCookie;
65};
66
67/* PptpControlMessageType values */
68#define PPTP_START_SESSION_REQUEST 1
69#define PPTP_START_SESSION_REPLY 2
70#define PPTP_STOP_SESSION_REQUEST 3
71#define PPTP_STOP_SESSION_REPLY 4
72#define PPTP_ECHO_REQUEST 5
73#define PPTP_ECHO_REPLY 6
74#define PPTP_OUT_CALL_REQUEST 7
75#define PPTP_OUT_CALL_REPLY 8
76#define PPTP_IN_CALL_REQUEST 9
77#define PPTP_IN_CALL_REPLY 10
78#define PPTP_IN_CALL_CONNECT 11
79#define PPTP_CALL_CLEAR_REQUEST 12
80#define PPTP_CALL_DISCONNECT_NOTIFY 13
81#define PPTP_WAN_ERROR_NOTIFY 14
82#define PPTP_SET_LINK_INFO 15
83
84#define PPTP_MSG_MAX 15
85
86/* PptpGeneralError values */
87#define PPTP_ERROR_CODE_NONE 0
88#define PPTP_NOT_CONNECTED 1
89#define PPTP_BAD_FORMAT 2
90#define PPTP_BAD_VALUE 3
91#define PPTP_NO_RESOURCE 4
92#define PPTP_BAD_CALLID 5
93#define PPTP_REMOVE_DEVICE_ERROR 6
94
95struct PptpControlHeader {
96 __be16 messageType;
97 __u16 reserved;
98};
99
100/* FramingCapability Bitmap Values */
101#define PPTP_FRAME_CAP_ASYNC 0x1
102#define PPTP_FRAME_CAP_SYNC 0x2
103
104/* BearerCapability Bitmap Values */
105#define PPTP_BEARER_CAP_ANALOG 0x1
106#define PPTP_BEARER_CAP_DIGITAL 0x2
107
108struct PptpStartSessionRequest {
109 __be16 protocolVersion;
110 __u16 reserved1;
111 __be32 framingCapability;
112 __be32 bearerCapability;
113 __be16 maxChannels;
114 __be16 firmwareRevision;
115 __u8 hostName[64];
116 __u8 vendorString[64];
117};
118
119/* PptpStartSessionResultCode Values */
120#define PPTP_START_OK 1
121#define PPTP_START_GENERAL_ERROR 2
122#define PPTP_START_ALREADY_CONNECTED 3
123#define PPTP_START_NOT_AUTHORIZED 4
124#define PPTP_START_UNKNOWN_PROTOCOL 5
125
126struct PptpStartSessionReply {
127 __be16 protocolVersion;
128 __u8 resultCode;
129 __u8 generalErrorCode;
130 __be32 framingCapability;
131 __be32 bearerCapability;
132 __be16 maxChannels;
133 __be16 firmwareRevision;
134 __u8 hostName[64];
135 __u8 vendorString[64];
136};
137
138/* PptpStopReasons */
139#define PPTP_STOP_NONE 1
140#define PPTP_STOP_PROTOCOL 2
141#define PPTP_STOP_LOCAL_SHUTDOWN 3
142
143struct PptpStopSessionRequest {
144 __u8 reason;
145 __u8 reserved1;
146 __u16 reserved2;
147};
148
149/* PptpStopSessionResultCode */
150#define PPTP_STOP_OK 1
151#define PPTP_STOP_GENERAL_ERROR 2
152
153struct PptpStopSessionReply {
154 __u8 resultCode;
155 __u8 generalErrorCode;
156 __u16 reserved1;
157};
158
159struct PptpEchoRequest {
160 __be32 identNumber;
161};
162
163/* PptpEchoReplyResultCode */
164#define PPTP_ECHO_OK 1
165#define PPTP_ECHO_GENERAL_ERROR 2
166
167struct PptpEchoReply {
168 __be32 identNumber;
169 __u8 resultCode;
170 __u8 generalErrorCode;
171 __u16 reserved;
172};
173
174/* PptpFramingType */
175#define PPTP_ASYNC_FRAMING 1
176#define PPTP_SYNC_FRAMING 2
177#define PPTP_DONT_CARE_FRAMING 3
178
179/* PptpCallBearerType */
180#define PPTP_ANALOG_TYPE 1
181#define PPTP_DIGITAL_TYPE 2
182#define PPTP_DONT_CARE_BEARER_TYPE 3
183
184struct PptpOutCallRequest {
185 __be16 callID;
186 __be16 callSerialNumber;
187 __be32 minBPS;
188 __be32 maxBPS;
189 __be32 bearerType;
190 __be32 framingType;
191 __be16 packetWindow;
192 __be16 packetProcDelay;
193 __be16 phoneNumberLength;
194 __u16 reserved1;
195 __u8 phoneNumber[64];
196 __u8 subAddress[64];
197};
198
199/* PptpCallResultCode */
200#define PPTP_OUTCALL_CONNECT 1
201#define PPTP_OUTCALL_GENERAL_ERROR 2
202#define PPTP_OUTCALL_NO_CARRIER 3
203#define PPTP_OUTCALL_BUSY 4
204#define PPTP_OUTCALL_NO_DIAL_TONE 5
205#define PPTP_OUTCALL_TIMEOUT 6
206#define PPTP_OUTCALL_DONT_ACCEPT 7
207
208struct PptpOutCallReply {
209 __be16 callID;
210 __be16 peersCallID;
211 __u8 resultCode;
212 __u8 generalErrorCode;
213 __be16 causeCode;
214 __be32 connectSpeed;
215 __be16 packetWindow;
216 __be16 packetProcDelay;
217 __be32 physChannelID;
218};
219
220struct PptpInCallRequest {
221 __be16 callID;
222 __be16 callSerialNumber;
223 __be32 callBearerType;
224 __be32 physChannelID;
225 __be16 dialedNumberLength;
226 __be16 dialingNumberLength;
227 __u8 dialedNumber[64];
228 __u8 dialingNumber[64];
229 __u8 subAddress[64];
230};
231
232/* PptpInCallResultCode */
233#define PPTP_INCALL_ACCEPT 1
234#define PPTP_INCALL_GENERAL_ERROR 2
235#define PPTP_INCALL_DONT_ACCEPT 3
236
237struct PptpInCallReply {
238 __be16 callID;
239 __be16 peersCallID;
240 __u8 resultCode;
241 __u8 generalErrorCode;
242 __be16 packetWindow;
243 __be16 packetProcDelay;
244 __u16 reserved;
245};
246
247struct PptpInCallConnected {
248 __be16 peersCallID;
249 __u16 reserved;
250 __be32 connectSpeed;
251 __be16 packetWindow;
252 __be16 packetProcDelay;
253 __be32 callFramingType;
254};
255
256struct PptpClearCallRequest {
257 __be16 callID;
258 __u16 reserved;
259};
260
261struct PptpCallDisconnectNotify {
262 __be16 callID;
263 __u8 resultCode;
264 __u8 generalErrorCode;
265 __be16 causeCode;
266 __u16 reserved;
267 __u8 callStatistics[128];
268};
269
270struct PptpWanErrorNotify {
271 __be16 peersCallID;
272 __u16 reserved;
273 __be32 crcErrors;
274 __be32 framingErrors;
275 __be32 hardwareOverRuns;
276 __be32 bufferOverRuns;
277 __be32 timeoutErrors;
278 __be32 alignmentErrors;
279};
280
281struct PptpSetLinkInfo {
282 __be16 peersCallID;
283 __u16 reserved;
284 __be32 sendAccm;
285 __be32 recvAccm;
286};
287
288union pptp_ctrl_union {
289 struct PptpStartSessionRequest sreq;
290 struct PptpStartSessionReply srep;
291 struct PptpStopSessionRequest streq;
292 struct PptpStopSessionReply strep;
293 struct PptpOutCallRequest ocreq;
294 struct PptpOutCallReply ocack;
295 struct PptpInCallRequest icreq;
296 struct PptpInCallReply icack;
297 struct PptpInCallConnected iccon;
298 struct PptpClearCallRequest clrreq;
299 struct PptpCallDisconnectNotify disc;
300 struct PptpWanErrorNotify wanerr;
301 struct PptpSetLinkInfo setlink;
302};
303
304extern int
305(*ip_nat_pptp_hook_outbound)(struct sk_buff **pskb,
306 struct ip_conntrack *ct,
307 enum ip_conntrack_info ctinfo,
308 struct PptpControlHeader *ctlh,
309 union pptp_ctrl_union *pptpReq);
310
311extern int
312(*ip_nat_pptp_hook_inbound)(struct sk_buff **pskb,
313 struct ip_conntrack *ct,
314 enum ip_conntrack_info ctinfo,
315 struct PptpControlHeader *ctlh,
316 union pptp_ctrl_union *pptpReq);
317
318extern void
319(*ip_nat_pptp_hook_exp_gre)(struct ip_conntrack_expect *exp_orig,
320 struct ip_conntrack_expect *exp_reply);
321
322extern void
323(*ip_nat_pptp_hook_expectfn)(struct ip_conntrack *ct,
324 struct ip_conntrack_expect *exp);
325#endif /* __KERNEL__ */
326#endif /* _CONNTRACK_PPTP_H */
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_proto_gre.h b/include/linux/netfilter_ipv4/ip_conntrack_proto_gre.h
deleted file mode 100644
index e371e0fc1672..000000000000
--- a/include/linux/netfilter_ipv4/ip_conntrack_proto_gre.h
+++ /dev/null
@@ -1,114 +0,0 @@
1#ifndef _CONNTRACK_PROTO_GRE_H
2#define _CONNTRACK_PROTO_GRE_H
3#include <asm/byteorder.h>
4
5/* GRE PROTOCOL HEADER */
6
7/* GRE Version field */
8#define GRE_VERSION_1701 0x0
9#define GRE_VERSION_PPTP 0x1
10
11/* GRE Protocol field */
12#define GRE_PROTOCOL_PPTP 0x880B
13
14/* GRE Flags */
15#define GRE_FLAG_C 0x80
16#define GRE_FLAG_R 0x40
17#define GRE_FLAG_K 0x20
18#define GRE_FLAG_S 0x10
19#define GRE_FLAG_A 0x80
20
21#define GRE_IS_C(f) ((f)&GRE_FLAG_C)
22#define GRE_IS_R(f) ((f)&GRE_FLAG_R)
23#define GRE_IS_K(f) ((f)&GRE_FLAG_K)
24#define GRE_IS_S(f) ((f)&GRE_FLAG_S)
25#define GRE_IS_A(f) ((f)&GRE_FLAG_A)
26
27/* GRE is a mess: Four different standards */
28struct gre_hdr {
29#if defined(__LITTLE_ENDIAN_BITFIELD)
30 __u16 rec:3,
31 srr:1,
32 seq:1,
33 key:1,
34 routing:1,
35 csum:1,
36 version:3,
37 reserved:4,
38 ack:1;
39#elif defined(__BIG_ENDIAN_BITFIELD)
40 __u16 csum:1,
41 routing:1,
42 key:1,
43 seq:1,
44 srr:1,
45 rec:3,
46 ack:1,
47 reserved:4,
48 version:3;
49#else
50#error "Adjust your <asm/byteorder.h> defines"
51#endif
52 __be16 protocol;
53};
54
55/* modified GRE header for PPTP */
56struct gre_hdr_pptp {
57 __u8 flags; /* bitfield */
58 __u8 version; /* should be GRE_VERSION_PPTP */
59 __be16 protocol; /* should be GRE_PROTOCOL_PPTP */
60 __be16 payload_len; /* size of ppp payload, not inc. gre header */
61 __be16 call_id; /* peer's call_id for this session */
62 __be32 seq; /* sequence number. Present if S==1 */
63 __be32 ack; /* seq number of highest packet recieved by */
64 /* sender in this session */
65};
66
67
68/* this is part of ip_conntrack */
69struct ip_ct_gre {
70 unsigned int stream_timeout;
71 unsigned int timeout;
72};
73
74#ifdef __KERNEL__
75struct ip_conntrack_expect;
76struct ip_conntrack;
77
78/* structure for original <-> reply keymap */
79struct ip_ct_gre_keymap {
80 struct list_head list;
81
82 struct ip_conntrack_tuple tuple;
83};
84
85/* add new tuple->key_reply pair to keymap */
86int ip_ct_gre_keymap_add(struct ip_conntrack *ct,
87 struct ip_conntrack_tuple *t,
88 int reply);
89
90/* delete keymap entries */
91void ip_ct_gre_keymap_destroy(struct ip_conntrack *ct);
92
93
94/* get pointer to gre key, if present */
95static inline __be32 *gre_key(struct gre_hdr *greh)
96{
97 if (!greh->key)
98 return NULL;
99 if (greh->csum || greh->routing)
100 return (__be32 *) (greh+sizeof(*greh)+4);
101 return (__be32 *) (greh+sizeof(*greh));
102}
103
104/* get pointer ot gre csum, if present */
105static inline __sum16 *gre_csum(struct gre_hdr *greh)
106{
107 if (!greh->csum)
108 return NULL;
109 return (__sum16 *) (greh+sizeof(*greh));
110}
111
112#endif /* __KERNEL__ */
113
114#endif /* _CONNTRACK_PROTO_GRE_H */
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_protocol.h b/include/linux/netfilter_ipv4/ip_conntrack_protocol.h
deleted file mode 100644
index 2c76b879e3dc..000000000000
--- a/include/linux/netfilter_ipv4/ip_conntrack_protocol.h
+++ /dev/null
@@ -1,98 +0,0 @@
1/* Header for use in defining a given protocol for connection tracking. */
2#ifndef _IP_CONNTRACK_PROTOCOL_H
3#define _IP_CONNTRACK_PROTOCOL_H
4#include <linux/netfilter_ipv4/ip_conntrack.h>
5#include <linux/netfilter/nfnetlink_conntrack.h>
6
7struct seq_file;
8
9struct ip_conntrack_protocol
10{
11 /* Protocol number. */
12 u_int8_t proto;
13
14 /* Protocol name */
15 const char *name;
16
17 /* Try to fill in the third arg: dataoff is offset past IP
18 hdr. Return true if possible. */
19 int (*pkt_to_tuple)(const struct sk_buff *skb,
20 unsigned int dataoff,
21 struct ip_conntrack_tuple *tuple);
22
23 /* Invert the per-proto part of the tuple: ie. turn xmit into reply.
24 * Some packets can't be inverted: return 0 in that case.
25 */
26 int (*invert_tuple)(struct ip_conntrack_tuple *inverse,
27 const struct ip_conntrack_tuple *orig);
28
29 /* Print out the per-protocol part of the tuple. Return like seq_* */
30 int (*print_tuple)(struct seq_file *,
31 const struct ip_conntrack_tuple *);
32
33 /* Print out the private part of the conntrack. */
34 int (*print_conntrack)(struct seq_file *, const struct ip_conntrack *);
35
36 /* Returns verdict for packet, or -1 for invalid. */
37 int (*packet)(struct ip_conntrack *conntrack,
38 const struct sk_buff *skb,
39 enum ip_conntrack_info ctinfo);
40
41 /* Called when a new connection for this protocol found;
42 * returns TRUE if it's OK. If so, packet() called next. */
43 int (*new)(struct ip_conntrack *conntrack, const struct sk_buff *skb);
44
45 /* Called when a conntrack entry is destroyed */
46 void (*destroy)(struct ip_conntrack *conntrack);
47
48 int (*error)(struct sk_buff *skb, enum ip_conntrack_info *ctinfo,
49 unsigned int hooknum);
50
51 /* convert protoinfo to nfnetink attributes */
52 int (*to_nfattr)(struct sk_buff *skb, struct nfattr *nfa,
53 const struct ip_conntrack *ct);
54
55 /* convert nfnetlink attributes to protoinfo */
56 int (*from_nfattr)(struct nfattr *tb[], struct ip_conntrack *ct);
57
58 int (*tuple_to_nfattr)(struct sk_buff *skb,
59 const struct ip_conntrack_tuple *t);
60 int (*nfattr_to_tuple)(struct nfattr *tb[],
61 struct ip_conntrack_tuple *t);
62
63 /* Module (if any) which this is connected to. */
64 struct module *me;
65};
66
67/* Protocol registration. */
68extern int ip_conntrack_protocol_register(struct ip_conntrack_protocol *proto);
69extern void ip_conntrack_protocol_unregister(struct ip_conntrack_protocol *proto);
70/* Existing built-in protocols */
71extern struct ip_conntrack_protocol ip_conntrack_protocol_tcp;
72extern struct ip_conntrack_protocol ip_conntrack_protocol_udp;
73extern struct ip_conntrack_protocol ip_conntrack_protocol_icmp;
74extern struct ip_conntrack_protocol ip_conntrack_generic_protocol;
75extern int ip_conntrack_protocol_tcp_init(void);
76
77/* Log invalid packets */
78extern unsigned int ip_ct_log_invalid;
79
80extern int ip_ct_port_tuple_to_nfattr(struct sk_buff *,
81 const struct ip_conntrack_tuple *);
82extern int ip_ct_port_nfattr_to_tuple(struct nfattr *tb[],
83 struct ip_conntrack_tuple *);
84
85#ifdef CONFIG_SYSCTL
86#ifdef DEBUG_INVALID_PACKETS
87#define LOG_INVALID(proto) \
88 (ip_ct_log_invalid == (proto) || ip_ct_log_invalid == IPPROTO_RAW)
89#else
90#define LOG_INVALID(proto) \
91 ((ip_ct_log_invalid == (proto) || ip_ct_log_invalid == IPPROTO_RAW) \
92 && net_ratelimit())
93#endif
94#else
95#define LOG_INVALID(proto) 0
96#endif /* CONFIG_SYSCTL */
97
98#endif /*_IP_CONNTRACK_PROTOCOL_H*/
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_sctp.h b/include/linux/netfilter_ipv4/ip_conntrack_sctp.h
deleted file mode 100644
index 4099a041a32a..000000000000
--- a/include/linux/netfilter_ipv4/ip_conntrack_sctp.h
+++ /dev/null
@@ -1,6 +0,0 @@
1#ifndef _IP_CONNTRACK_SCTP_H
2#define _IP_CONNTRACK_SCTP_H
3
4#include <linux/netfilter/nf_conntrack_sctp.h>
5
6#endif /* _IP_CONNTRACK_SCTP_H */
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_sip.h b/include/linux/netfilter_ipv4/ip_conntrack_sip.h
deleted file mode 100644
index bef6c646defa..000000000000
--- a/include/linux/netfilter_ipv4/ip_conntrack_sip.h
+++ /dev/null
@@ -1,40 +0,0 @@
1#ifndef __IP_CONNTRACK_SIP_H__
2#define __IP_CONNTRACK_SIP_H__
3#ifdef __KERNEL__
4
5#define SIP_PORT 5060
6#define SIP_TIMEOUT 3600
7
8enum sip_header_pos {
9 POS_REG_REQ_URI,
10 POS_REQ_URI,
11 POS_FROM,
12 POS_TO,
13 POS_VIA,
14 POS_CONTACT,
15 POS_CONTENT,
16 POS_MEDIA,
17 POS_OWNER,
18 POS_CONNECTION,
19 POS_SDP_HEADER,
20};
21
22extern unsigned int (*ip_nat_sip_hook)(struct sk_buff **pskb,
23 enum ip_conntrack_info ctinfo,
24 struct ip_conntrack *ct,
25 const char **dptr);
26extern unsigned int (*ip_nat_sdp_hook)(struct sk_buff **pskb,
27 enum ip_conntrack_info ctinfo,
28 struct ip_conntrack_expect *exp,
29 const char *dptr);
30
31extern int ct_sip_get_info(const char *dptr, size_t dlen,
32 unsigned int *matchoff,
33 unsigned int *matchlen,
34 enum sip_header_pos pos);
35extern int ct_sip_lnlen(const char *line, const char *limit);
36extern const char *ct_sip_search(const char *needle, const char *haystack,
37 size_t needle_len, size_t haystack_len,
38 int case_sensitive);
39#endif /* __KERNEL__ */
40#endif /* __IP_CONNTRACK_SIP_H__ */
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_tcp.h b/include/linux/netfilter_ipv4/ip_conntrack_tcp.h
deleted file mode 100644
index 876b8fb17e68..000000000000
--- a/include/linux/netfilter_ipv4/ip_conntrack_tcp.h
+++ /dev/null
@@ -1,6 +0,0 @@
1#ifndef _IP_CONNTRACK_TCP_H
2#define _IP_CONNTRACK_TCP_H
3
4#include <linux/netfilter/nf_conntrack_tcp.h>
5
6#endif /* _IP_CONNTRACK_TCP_H */
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_tftp.h b/include/linux/netfilter_ipv4/ip_conntrack_tftp.h
deleted file mode 100644
index a404fc0abf0e..000000000000
--- a/include/linux/netfilter_ipv4/ip_conntrack_tftp.h
+++ /dev/null
@@ -1,20 +0,0 @@
1#ifndef _IP_CT_TFTP
2#define _IP_CT_TFTP
3
4#define TFTP_PORT 69
5
6struct tftphdr {
7 __be16 opcode;
8};
9
10#define TFTP_OPCODE_READ 1
11#define TFTP_OPCODE_WRITE 2
12#define TFTP_OPCODE_DATA 3
13#define TFTP_OPCODE_ACK 4
14#define TFTP_OPCODE_ERROR 5
15
16extern unsigned int (*ip_nat_tftp_hook)(struct sk_buff **pskb,
17 enum ip_conntrack_info ctinfo,
18 struct ip_conntrack_expect *exp);
19
20#endif /* _IP_CT_TFTP */
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_tuple.h b/include/linux/netfilter_ipv4/ip_conntrack_tuple.h
deleted file mode 100644
index c228bde74c33..000000000000
--- a/include/linux/netfilter_ipv4/ip_conntrack_tuple.h
+++ /dev/null
@@ -1,146 +0,0 @@
1#ifndef _IP_CONNTRACK_TUPLE_H
2#define _IP_CONNTRACK_TUPLE_H
3
4#include <linux/types.h>
5#include <linux/netfilter/nf_conntrack_tuple_common.h>
6
7/* A `tuple' is a structure containing the information to uniquely
8 identify a connection. ie. if two packets have the same tuple, they
9 are in the same connection; if not, they are not.
10
11 We divide the structure along "manipulatable" and
12 "non-manipulatable" lines, for the benefit of the NAT code.
13*/
14
15/* The protocol-specific manipulable parts of the tuple: always in
16 network order! */
17union ip_conntrack_manip_proto
18{
19 /* Add other protocols here. */
20 u_int16_t all;
21
22 struct {
23 __be16 port;
24 } tcp;
25 struct {
26 __be16 port;
27 } udp;
28 struct {
29 __be16 id;
30 } icmp;
31 struct {
32 __be16 port;
33 } sctp;
34 struct {
35 __be16 key; /* key is 32bit, pptp only uses 16 */
36 } gre;
37};
38
39/* The manipulable part of the tuple. */
40struct ip_conntrack_manip
41{
42 __be32 ip;
43 union ip_conntrack_manip_proto u;
44};
45
46/* This contains the information to distinguish a connection. */
47struct ip_conntrack_tuple
48{
49 struct ip_conntrack_manip src;
50
51 /* These are the parts of the tuple which are fixed. */
52 struct {
53 __be32 ip;
54 union {
55 /* Add other protocols here. */
56 u_int16_t all;
57
58 struct {
59 __be16 port;
60 } tcp;
61 struct {
62 __be16 port;
63 } udp;
64 struct {
65 u_int8_t type, code;
66 } icmp;
67 struct {
68 __be16 port;
69 } sctp;
70 struct {
71 __be16 key; /* key is 32bit,
72 * pptp only uses 16 */
73 } gre;
74 } u;
75
76 /* The protocol. */
77 u_int8_t protonum;
78
79 /* The direction (for tuplehash) */
80 u_int8_t dir;
81 } dst;
82};
83
84/* This is optimized opposed to a memset of the whole structure. Everything we
85 * really care about is the source/destination unions */
86#define IP_CT_TUPLE_U_BLANK(tuple) \
87 do { \
88 (tuple)->src.u.all = 0; \
89 (tuple)->dst.u.all = 0; \
90 } while (0)
91
92#ifdef __KERNEL__
93
94#define DUMP_TUPLE(tp) \
95DEBUGP("tuple %p: %u %u.%u.%u.%u:%hu -> %u.%u.%u.%u:%hu\n", \
96 (tp), (tp)->dst.protonum, \
97 NIPQUAD((tp)->src.ip), ntohs((tp)->src.u.all), \
98 NIPQUAD((tp)->dst.ip), ntohs((tp)->dst.u.all))
99
100/* If we're the first tuple, it's the original dir. */
101#define DIRECTION(h) ((enum ip_conntrack_dir)(h)->tuple.dst.dir)
102
103/* Connections have two entries in the hash table: one for each way */
104struct ip_conntrack_tuple_hash
105{
106 struct list_head list;
107
108 struct ip_conntrack_tuple tuple;
109};
110
111#endif /* __KERNEL__ */
112
113static inline int ip_ct_tuple_src_equal(const struct ip_conntrack_tuple *t1,
114 const struct ip_conntrack_tuple *t2)
115{
116 return t1->src.ip == t2->src.ip
117 && t1->src.u.all == t2->src.u.all;
118}
119
120static inline int ip_ct_tuple_dst_equal(const struct ip_conntrack_tuple *t1,
121 const struct ip_conntrack_tuple *t2)
122{
123 return t1->dst.ip == t2->dst.ip
124 && t1->dst.u.all == t2->dst.u.all
125 && t1->dst.protonum == t2->dst.protonum;
126}
127
128static inline int ip_ct_tuple_equal(const struct ip_conntrack_tuple *t1,
129 const struct ip_conntrack_tuple *t2)
130{
131 return ip_ct_tuple_src_equal(t1, t2) && ip_ct_tuple_dst_equal(t1, t2);
132}
133
134static inline int ip_ct_tuple_mask_cmp(const struct ip_conntrack_tuple *t,
135 const struct ip_conntrack_tuple *tuple,
136 const struct ip_conntrack_tuple *mask)
137{
138 return !(((t->src.ip ^ tuple->src.ip) & mask->src.ip)
139 || ((t->dst.ip ^ tuple->dst.ip) & mask->dst.ip)
140 || ((t->src.u.all ^ tuple->src.u.all) & mask->src.u.all)
141 || ((t->dst.u.all ^ tuple->dst.u.all) & mask->dst.u.all)
142 || ((t->dst.protonum ^ tuple->dst.protonum)
143 & mask->dst.protonum));
144}
145
146#endif /* _IP_CONNTRACK_TUPLE_H */
diff --git a/include/linux/netfilter_ipv4/ip_nat.h b/include/linux/netfilter_ipv4/ip_nat.h
deleted file mode 100644
index bbca89aab813..000000000000
--- a/include/linux/netfilter_ipv4/ip_nat.h
+++ /dev/null
@@ -1,79 +0,0 @@
1#ifndef _IP_NAT_H
2#define _IP_NAT_H
3#include <linux/netfilter_ipv4.h>
4#include <linux/netfilter_ipv4/ip_conntrack_tuple.h>
5
6#define IP_NAT_MAPPING_TYPE_MAX_NAMELEN 16
7
8enum ip_nat_manip_type
9{
10 IP_NAT_MANIP_SRC,
11 IP_NAT_MANIP_DST
12};
13
14/* SRC manip occurs POST_ROUTING or LOCAL_IN */
15#define HOOK2MANIP(hooknum) ((hooknum) != NF_IP_POST_ROUTING && (hooknum) != NF_IP_LOCAL_IN)
16
17#define IP_NAT_RANGE_MAP_IPS 1
18#define IP_NAT_RANGE_PROTO_SPECIFIED 2
19#define IP_NAT_RANGE_PROTO_RANDOM 4 /* add randomness to "port" selection */
20
21/* NAT sequence number modifications */
22struct ip_nat_seq {
23 /* position of the last TCP sequence number
24 * modification (if any) */
25 u_int32_t correction_pos;
26 /* sequence number offset before and after last modification */
27 int16_t offset_before, offset_after;
28};
29
30/* Single range specification. */
31struct ip_nat_range
32{
33 /* Set to OR of flags above. */
34 unsigned int flags;
35
36 /* Inclusive: network order. */
37 __be32 min_ip, max_ip;
38
39 /* Inclusive: network order */
40 union ip_conntrack_manip_proto min, max;
41};
42
43/* For backwards compat: don't use in modern code. */
44struct ip_nat_multi_range_compat
45{
46 unsigned int rangesize; /* Must be 1. */
47
48 /* hangs off end. */
49 struct ip_nat_range range[1];
50};
51
52#ifdef __KERNEL__
53#include <linux/list.h>
54
55/* Protects NAT hash tables, and NAT-private part of conntracks. */
56extern rwlock_t ip_nat_lock;
57
58/* The structure embedded in the conntrack structure. */
59struct ip_nat_info
60{
61 struct list_head bysource;
62 struct ip_nat_seq seq[IP_CT_DIR_MAX];
63};
64
65struct ip_conntrack;
66
67/* Set up the info structure to map into this range. */
68extern unsigned int ip_nat_setup_info(struct ip_conntrack *conntrack,
69 const struct ip_nat_range *range,
70 unsigned int hooknum);
71
72/* Is this tuple already taken? (not by us)*/
73extern int ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple,
74 const struct ip_conntrack *ignored_conntrack);
75
76#else /* !__KERNEL__: iptables wants this to compile. */
77#define ip_nat_multi_range ip_nat_multi_range_compat
78#endif /*__KERNEL__*/
79#endif
diff --git a/include/linux/netfilter_ipv4/ip_nat_core.h b/include/linux/netfilter_ipv4/ip_nat_core.h
deleted file mode 100644
index 60566f9fd7b3..000000000000
--- a/include/linux/netfilter_ipv4/ip_nat_core.h
+++ /dev/null
@@ -1,18 +0,0 @@
1#ifndef _IP_NAT_CORE_H
2#define _IP_NAT_CORE_H
3#include <linux/list.h>
4#include <linux/netfilter_ipv4/ip_conntrack.h>
5
6/* This header used to share core functionality between the standalone
7 NAT module, and the compatibility layer's use of NAT for masquerading. */
8
9extern unsigned int ip_nat_packet(struct ip_conntrack *ct,
10 enum ip_conntrack_info conntrackinfo,
11 unsigned int hooknum,
12 struct sk_buff **pskb);
13
14extern int ip_nat_icmp_reply_translation(struct ip_conntrack *ct,
15 enum ip_conntrack_info ctinfo,
16 unsigned int hooknum,
17 struct sk_buff **pskb);
18#endif /* _IP_NAT_CORE_H */
diff --git a/include/linux/netfilter_ipv4/ip_nat_helper.h b/include/linux/netfilter_ipv4/ip_nat_helper.h
deleted file mode 100644
index bf9cb105c885..000000000000
--- a/include/linux/netfilter_ipv4/ip_nat_helper.h
+++ /dev/null
@@ -1,33 +0,0 @@
1#ifndef _IP_NAT_HELPER_H
2#define _IP_NAT_HELPER_H
3/* NAT protocol helper routines. */
4
5#include <linux/netfilter_ipv4/ip_conntrack.h>
6#include <linux/module.h>
7
8struct sk_buff;
9
10/* These return true or false. */
11extern int ip_nat_mangle_tcp_packet(struct sk_buff **skb,
12 struct ip_conntrack *ct,
13 enum ip_conntrack_info ctinfo,
14 unsigned int match_offset,
15 unsigned int match_len,
16 const char *rep_buffer,
17 unsigned int rep_len);
18extern int ip_nat_mangle_udp_packet(struct sk_buff **skb,
19 struct ip_conntrack *ct,
20 enum ip_conntrack_info ctinfo,
21 unsigned int match_offset,
22 unsigned int match_len,
23 const char *rep_buffer,
24 unsigned int rep_len);
25extern int ip_nat_seq_adjust(struct sk_buff **pskb,
26 struct ip_conntrack *ct,
27 enum ip_conntrack_info ctinfo);
28
29/* Setup NAT on this expected conntrack so it follows master, but goes
30 * to port ct->master->saved_proto. */
31extern void ip_nat_follow_master(struct ip_conntrack *ct,
32 struct ip_conntrack_expect *this);
33#endif
diff --git a/include/linux/netfilter_ipv4/ip_nat_pptp.h b/include/linux/netfilter_ipv4/ip_nat_pptp.h
deleted file mode 100644
index 36668bf0f373..000000000000
--- a/include/linux/netfilter_ipv4/ip_nat_pptp.h
+++ /dev/null
@@ -1,11 +0,0 @@
1/* PPTP constants and structs */
2#ifndef _NAT_PPTP_H
3#define _NAT_PPTP_H
4
5/* conntrack private data */
6struct ip_nat_pptp {
7 __be16 pns_call_id; /* NAT'ed PNS call id */
8 __be16 pac_call_id; /* NAT'ed PAC call id */
9};
10
11#endif /* _NAT_PPTP_H */
diff --git a/include/linux/netfilter_ipv4/ip_nat_protocol.h b/include/linux/netfilter_ipv4/ip_nat_protocol.h
deleted file mode 100644
index 612a43614e7b..000000000000
--- a/include/linux/netfilter_ipv4/ip_nat_protocol.h
+++ /dev/null
@@ -1,74 +0,0 @@
1/* Header for use in defining a given protocol. */
2#ifndef _IP_NAT_PROTOCOL_H
3#define _IP_NAT_PROTOCOL_H
4#include <linux/init.h>
5#include <linux/list.h>
6
7#include <linux/netfilter_ipv4/ip_nat.h>
8#include <linux/netfilter/nfnetlink_conntrack.h>
9
10struct iphdr;
11struct ip_nat_range;
12
13struct ip_nat_protocol
14{
15 /* Protocol name */
16 const char *name;
17
18 /* Protocol number. */
19 unsigned int protonum;
20
21 struct module *me;
22
23 /* Translate a packet to the target according to manip type.
24 Return true if succeeded. */
25 int (*manip_pkt)(struct sk_buff **pskb,
26 unsigned int iphdroff,
27 const struct ip_conntrack_tuple *tuple,
28 enum ip_nat_manip_type maniptype);
29
30 /* Is the manipable part of the tuple between min and max incl? */
31 int (*in_range)(const struct ip_conntrack_tuple *tuple,
32 enum ip_nat_manip_type maniptype,
33 const union ip_conntrack_manip_proto *min,
34 const union ip_conntrack_manip_proto *max);
35
36 /* Alter the per-proto part of the tuple (depending on
37 maniptype), to give a unique tuple in the given range if
38 possible; return false if not. Per-protocol part of tuple
39 is initialized to the incoming packet. */
40 int (*unique_tuple)(struct ip_conntrack_tuple *tuple,
41 const struct ip_nat_range *range,
42 enum ip_nat_manip_type maniptype,
43 const struct ip_conntrack *conntrack);
44
45 int (*range_to_nfattr)(struct sk_buff *skb,
46 const struct ip_nat_range *range);
47
48 int (*nfattr_to_range)(struct nfattr *tb[],
49 struct ip_nat_range *range);
50};
51
52/* Protocol registration. */
53extern int ip_nat_protocol_register(struct ip_nat_protocol *proto);
54extern void ip_nat_protocol_unregister(struct ip_nat_protocol *proto);
55
56extern struct ip_nat_protocol *ip_nat_proto_find_get(u_int8_t protocol);
57extern void ip_nat_proto_put(struct ip_nat_protocol *proto);
58
59/* Built-in protocols. */
60extern struct ip_nat_protocol ip_nat_protocol_tcp;
61extern struct ip_nat_protocol ip_nat_protocol_udp;
62extern struct ip_nat_protocol ip_nat_protocol_icmp;
63extern struct ip_nat_protocol ip_nat_unknown_protocol;
64
65extern int init_protocols(void) __init;
66extern void cleanup_protocols(void);
67extern struct ip_nat_protocol *find_nat_proto(u_int16_t protonum);
68
69extern int ip_nat_port_range_to_nfattr(struct sk_buff *skb,
70 const struct ip_nat_range *range);
71extern int ip_nat_port_nfattr_to_range(struct nfattr *tb[],
72 struct ip_nat_range *range);
73
74#endif /*_IP_NAT_PROTO_H*/
diff --git a/include/linux/netfilter_ipv4/ip_nat_rule.h b/include/linux/netfilter_ipv4/ip_nat_rule.h
deleted file mode 100644
index 73b9552e6a89..000000000000
--- a/include/linux/netfilter_ipv4/ip_nat_rule.h
+++ /dev/null
@@ -1,28 +0,0 @@
1#ifndef _IP_NAT_RULE_H
2#define _IP_NAT_RULE_H
3#include <linux/netfilter_ipv4/ip_conntrack.h>
4#include <linux/netfilter_ipv4/ip_tables.h>
5#include <linux/netfilter_ipv4/ip_nat.h>
6
7#ifdef __KERNEL__
8
9extern int ip_nat_rule_init(void) __init;
10extern void ip_nat_rule_cleanup(void);
11extern int ip_nat_rule_find(struct sk_buff **pskb,
12 unsigned int hooknum,
13 const struct net_device *in,
14 const struct net_device *out,
15 struct ip_conntrack *ct,
16 struct ip_nat_info *info);
17
18extern unsigned int
19alloc_null_binding(struct ip_conntrack *conntrack,
20 struct ip_nat_info *info,
21 unsigned int hooknum);
22
23extern unsigned int
24alloc_null_binding_confirmed(struct ip_conntrack *conntrack,
25 struct ip_nat_info *info,
26 unsigned int hooknum);
27#endif
28#endif /* _IP_NAT_RULE_H */
diff --git a/include/linux/netfilter_ipv4/ipt_SAME.h b/include/linux/netfilter_ipv4/ipt_SAME.h
index cc4c0b2269af..be6e682a85ec 100644
--- a/include/linux/netfilter_ipv4/ipt_SAME.h
+++ b/include/linux/netfilter_ipv4/ipt_SAME.h
@@ -13,7 +13,7 @@ struct ipt_same_info
13 u_int32_t *iparray; 13 u_int32_t *iparray;
14 14
15 /* hangs off end. */ 15 /* hangs off end. */
16 struct ip_nat_range range[IPT_SAME_MAX_RANGE]; 16 struct nf_nat_range range[IPT_SAME_MAX_RANGE];
17}; 17};
18 18
19#endif /*_IPT_SAME_H*/ 19#endif /*_IPT_SAME_H*/
diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 2a20f488ac1b..f41688f56632 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -138,6 +138,11 @@ struct nlattr
138#include <linux/capability.h> 138#include <linux/capability.h>
139#include <linux/skbuff.h> 139#include <linux/skbuff.h>
140 140
141static inline struct nlmsghdr *nlmsg_hdr(const struct sk_buff *skb)
142{
143 return (struct nlmsghdr *)skb->data;
144}
145
141struct netlink_skb_parms 146struct netlink_skb_parms
142{ 147{
143 struct ucred creds; /* Skb credentials */ 148 struct ucred creds; /* Skb credentials */
@@ -152,7 +157,10 @@ struct netlink_skb_parms
152#define NETLINK_CREDS(skb) (&NETLINK_CB((skb)).creds) 157#define NETLINK_CREDS(skb) (&NETLINK_CB((skb)).creds)
153 158
154 159
155extern struct sock *netlink_kernel_create(int unit, unsigned int groups, void (*input)(struct sock *sk, int len), struct module *module); 160extern struct sock *netlink_kernel_create(int unit, unsigned int groups,
161 void (*input)(struct sock *sk, int len),
162 struct mutex *cb_mutex,
163 struct module *module);
156extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err); 164extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err);
157extern int netlink_has_listeners(struct sock *sk, unsigned int group); 165extern int netlink_has_listeners(struct sock *sk, unsigned int group);
158extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 pid, int nonblock); 166extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 pid, int nonblock);
@@ -171,9 +179,16 @@ int netlink_sendskb(struct sock *sk, struct sk_buff *skb, int protocol);
171 179
172/* 180/*
173 * skb should fit one page. This choice is good for headerless malloc. 181 * skb should fit one page. This choice is good for headerless malloc.
182 * But we should limit to 8K so that userspace does not have to
183 * use enormous buffer sizes on recvmsg() calls just to avoid
184 * MSG_TRUNC when PAGE_SIZE is very large.
174 */ 185 */
175#define NLMSG_GOODORDER 0 186#if PAGE_SIZE < 8192UL
176#define NLMSG_GOODSIZE (SKB_MAX_ORDER(0, NLMSG_GOODORDER)) 187#define NLMSG_GOODSIZE SKB_WITH_OVERHEAD(PAGE_SIZE)
188#else
189#define NLMSG_GOODSIZE SKB_WITH_OVERHEAD(8192UL)
190#endif
191
177#define NLMSG_DEFAULT_SIZE (NLMSG_GOODSIZE - NLMSG_HDRLEN) 192#define NLMSG_DEFAULT_SIZE (NLMSG_GOODSIZE - NLMSG_HDRLEN)
178 193
179 194
@@ -217,18 +232,6 @@ __nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, int type, int len, int flags)
217#define NLMSG_PUT(skb, pid, seq, type, len) \ 232#define NLMSG_PUT(skb, pid, seq, type, len) \
218 NLMSG_NEW(skb, pid, seq, type, len, 0) 233 NLMSG_NEW(skb, pid, seq, type, len, 0)
219 234
220#define NLMSG_NEW_ANSWER(skb, cb, type, len, flags) \
221 NLMSG_NEW(skb, NETLINK_CB((cb)->skb).pid, \
222 (cb)->nlh->nlmsg_seq, type, len, flags)
223
224#define NLMSG_END(skb, nlh) \
225({ (nlh)->nlmsg_len = (skb)->tail - (unsigned char *) (nlh); \
226 (skb)->len; })
227
228#define NLMSG_CANCEL(skb, nlh) \
229({ skb_trim(skb, (unsigned char *) (nlh) - (skb)->data); \
230 -1; })
231
232extern int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, 235extern int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
233 struct nlmsghdr *nlh, 236 struct nlmsghdr *nlh,
234 int (*dump)(struct sk_buff *skb, struct netlink_callback*), 237 int (*dump)(struct sk_buff *skb, struct netlink_callback*),
diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
new file mode 100644
index 000000000000..9a30ba2ca75e
--- /dev/null
+++ b/include/linux/nl80211.h
@@ -0,0 +1,38 @@
1#ifndef __LINUX_NL80211_H
2#define __LINUX_NL80211_H
3/*
4 * 802.11 netlink interface public header
5 *
6 * Copyright 2006, 2007 Johannes Berg <johannes@sipsolutions.net>
7 */
8
9/**
10 * enum nl80211_iftype - (virtual) interface types
11 * @NL80211_IFTYPE_UNSPECIFIED: unspecified type, driver decides
12 * @NL80211_IFTYPE_ADHOC: independent BSS member
13 * @NL80211_IFTYPE_STATION: managed BSS member
14 * @NL80211_IFTYPE_AP: access point
15 * @NL80211_IFTYPE_AP_VLAN: VLAN interface for access points
16 * @NL80211_IFTYPE_WDS: wireless distribution interface
17 * @NL80211_IFTYPE_MONITOR: monitor interface receiving all frames
18 * @__NL80211_IFTYPE_AFTER_LAST: internal use
19 *
20 * These values are used with the NL80211_ATTR_IFTYPE
21 * to set the type of an interface.
22 *
23 */
24enum nl80211_iftype {
25 NL80211_IFTYPE_UNSPECIFIED,
26 NL80211_IFTYPE_ADHOC,
27 NL80211_IFTYPE_STATION,
28 NL80211_IFTYPE_AP,
29 NL80211_IFTYPE_AP_VLAN,
30 NL80211_IFTYPE_WDS,
31 NL80211_IFTYPE_MONITOR,
32
33 /* keep last */
34 __NL80211_IFTYPE_AFTER_LAST
35};
36#define NL80211_IFTYPE_MAX (__NL80211_IFTYPE_AFTER_LAST - 1)
37
38#endif /* __LINUX_NL80211_H */
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 4a629ea70cc4..1fae30af91f3 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -574,13 +574,6 @@ extern int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, in
574#define rtattr_parse_nested(tb, max, rta) \ 574#define rtattr_parse_nested(tb, max, rta) \
575 rtattr_parse((tb), (max), RTA_DATA((rta)), RTA_PAYLOAD((rta))) 575 rtattr_parse((tb), (max), RTA_DATA((rta)), RTA_PAYLOAD((rta)))
576 576
577struct rtnetlink_link
578{
579 int (*doit)(struct sk_buff *, struct nlmsghdr*, void *attr);
580 int (*dumpit)(struct sk_buff *, struct netlink_callback *cb);
581};
582
583extern struct rtnetlink_link * rtnetlink_links[NPROTO];
584extern int rtnetlink_send(struct sk_buff *skb, u32 pid, u32 group, int echo); 577extern int rtnetlink_send(struct sk_buff *skb, u32 pid, u32 group, int echo);
585extern int rtnl_unicast(struct sk_buff *skb, u32 pid); 578extern int rtnl_unicast(struct sk_buff *skb, u32 pid);
586extern int rtnl_notify(struct sk_buff *skb, u32 pid, u32 group, 579extern int rtnl_notify(struct sk_buff *skb, u32 pid, u32 group,
@@ -605,7 +598,7 @@ extern void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const voi
605 598
606#define RTA_PUT_NOHDR(skb, attrlen, data) \ 599#define RTA_PUT_NOHDR(skb, attrlen, data) \
607({ RTA_APPEND(skb, RTA_ALIGN(attrlen), data); \ 600({ RTA_APPEND(skb, RTA_ALIGN(attrlen), data); \
608 memset(skb->tail - (RTA_ALIGN(attrlen) - attrlen), 0, \ 601 memset(skb_tail_pointer(skb) - (RTA_ALIGN(attrlen) - attrlen), 0, \
609 RTA_ALIGN(attrlen) - attrlen); }) 602 RTA_ALIGN(attrlen) - attrlen); })
610 603
611#define RTA_PUT_U8(skb, attrtype, value) \ 604#define RTA_PUT_U8(skb, attrtype, value) \
@@ -637,12 +630,12 @@ extern void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const voi
637 RTA_PUT(skb, attrtype, 0, NULL); 630 RTA_PUT(skb, attrtype, 0, NULL);
638 631
639#define RTA_NEST(skb, type) \ 632#define RTA_NEST(skb, type) \
640({ struct rtattr *__start = (struct rtattr *) (skb)->tail; \ 633({ struct rtattr *__start = (struct rtattr *)skb_tail_pointer(skb); \
641 RTA_PUT(skb, type, 0, NULL); \ 634 RTA_PUT(skb, type, 0, NULL); \
642 __start; }) 635 __start; })
643 636
644#define RTA_NEST_END(skb, start) \ 637#define RTA_NEST_END(skb, start) \
645({ (start)->rta_len = ((skb)->tail - (unsigned char *) (start)); \ 638({ (start)->rta_len = skb_tail_pointer(skb) - (unsigned char *)(start); \
646 (skb)->len; }) 639 (skb)->len; })
647 640
648#define RTA_NEST_CANCEL(skb, start) \ 641#define RTA_NEST_CANCEL(skb, start) \
diff --git a/include/linux/rxrpc.h b/include/linux/rxrpc.h
new file mode 100644
index 000000000000..f7b826b565c7
--- /dev/null
+++ b/include/linux/rxrpc.h
@@ -0,0 +1,62 @@
1/* AF_RXRPC parameters
2 *
3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#ifndef _LINUX_RXRPC_H
13#define _LINUX_RXRPC_H
14
15#include <linux/in.h>
16#include <linux/in6.h>
17
18/*
19 * RxRPC socket address
20 */
21struct sockaddr_rxrpc {
22 sa_family_t srx_family; /* address family */
23 u16 srx_service; /* service desired */
24 u16 transport_type; /* type of transport socket (SOCK_DGRAM) */
25 u16 transport_len; /* length of transport address */
26 union {
27 sa_family_t family; /* transport address family */
28 struct sockaddr_in sin; /* IPv4 transport address */
29 struct sockaddr_in6 sin6; /* IPv6 transport address */
30 } transport;
31};
32
33/*
34 * RxRPC socket options
35 */
36#define RXRPC_SECURITY_KEY 1 /* [clnt] set client security key */
37#define RXRPC_SECURITY_KEYRING 2 /* [srvr] set ring of server security keys */
38#define RXRPC_EXCLUSIVE_CONNECTION 3 /* [clnt] use exclusive RxRPC connection */
39#define RXRPC_MIN_SECURITY_LEVEL 4 /* minimum security level */
40
41/*
42 * RxRPC control messages
43 * - terminal messages mean that a user call ID tag can be recycled
44 */
45#define RXRPC_USER_CALL_ID 1 /* user call ID specifier */
46#define RXRPC_ABORT 2 /* abort request / notification [terminal] */
47#define RXRPC_ACK 3 /* [Server] RPC op final ACK received [terminal] */
48#define RXRPC_NET_ERROR 5 /* network error received [terminal] */
49#define RXRPC_BUSY 6 /* server busy received [terminal] */
50#define RXRPC_LOCAL_ERROR 7 /* local error generated [terminal] */
51#define RXRPC_NEW_CALL 8 /* [Server] new incoming call notification */
52#define RXRPC_ACCEPT 9 /* [Server] accept request */
53
54/*
55 * RxRPC security levels
56 */
57#define RXRPC_SECURITY_PLAIN 0 /* plain secure-checksummed packets only */
58#define RXRPC_SECURITY_AUTH 1 /* authenticated packets */
59#define RXRPC_SECURITY_ENCRYPT 2 /* encrypted packets */
60
61
62#endif /* _LINUX_RXRPC_H */
diff --git a/include/linux/sctp.h b/include/linux/sctp.h
index d4f86560bfff..d70df61a029f 100644
--- a/include/linux/sctp.h
+++ b/include/linux/sctp.h
@@ -63,6 +63,15 @@ typedef struct sctphdr {
63 __be32 checksum; 63 __be32 checksum;
64} __attribute__((packed)) sctp_sctphdr_t; 64} __attribute__((packed)) sctp_sctphdr_t;
65 65
66#ifdef __KERNEL__
67#include <linux/skbuff.h>
68
69static inline struct sctphdr *sctp_hdr(const struct sk_buff *skb)
70{
71 return (struct sctphdr *)skb_transport_header(skb);
72}
73#endif
74
66/* Section 3.2. Chunk Field Descriptions. */ 75/* Section 3.2. Chunk Field Descriptions. */
67typedef struct sctp_chunkhdr { 76typedef struct sctp_chunkhdr {
68 __u8 type; 77 __u8 type;
diff --git a/include/linux/sdla_fr.h b/include/linux/sdla_fr.h
deleted file mode 100644
index cdfa77fcb06b..000000000000
--- a/include/linux/sdla_fr.h
+++ /dev/null
@@ -1,638 +0,0 @@
1/*****************************************************************************
2* sdla_fr.h Sangoma frame relay firmware API definitions.
3*
4* Author: Gideon Hack
5* Nenad Corbic <ncorbic@sangoma.com>
6*
7* Copyright: (c) 1995-2000 Sangoma Technologies Inc.
8*
9* This program is free software; you can redistribute it and/or
10* modify it under the terms of the GNU General Public License
11* as published by the Free Software Foundation; either version
12* 2 of the License, or (at your option) any later version.
13* ============================================================================
14* Oct 04, 1999 Gideon Hack Updated API structures
15* Jun 02, 1999 Gideon Hack Modifications for S514 support
16* Oct 12, 1997 Jaspreet Singh Added FR_READ_DLCI_IB_MAPPING
17* Jul 21, 1997 Jaspreet Singh Changed FRRES_TOO_LONG and FRRES_TOO_MANY to
18* 0x05 and 0x06 respectively.
19* Dec 23, 1996 Gene Kozin v2.0
20* Apr 29, 1996 Gene Kozin v1.0 (merged version S502 & S508 definitions).
21* Sep 26, 1995 Gene Kozin Initial version.
22*****************************************************************************/
23#ifndef _SDLA_FR_H
24#define _SDLA_FR_H
25
26/*----------------------------------------------------------------------------
27 * Notes:
28 * ------
29 * 1. All structures defined in this file are byte-alined.
30 *
31 * Compiler Platform
32 * -------- --------
33 * GNU C Linux
34 */
35
36#ifndef PACKED
37# define PACKED __attribute__((packed))
38#endif /* PACKED */
39
40/* Adapter memory layout */
41#define FR_MB_VECTOR 0xE000 /* mailbox window vector */
42#define FR502_RX_VECTOR 0xA000 /* S502 direct receive window vector */
43#define FR502_MBOX_OFFS 0xF60 /* S502 mailbox offset */
44#define FR508_MBOX_OFFS 0 /* S508 mailbox offset */
45#define FR502_FLAG_OFFS 0x1FF0 /* S502 status flags offset */
46#define FR508_FLAG_OFFS 0x1000 /* S508 status flags offset */
47#define FR502_RXMB_OFFS 0x900 /* S502 direct receive mailbox offset */
48#define FR508_TXBC_OFFS 0x1100 /* S508 Tx buffer info offset */
49#define FR508_RXBC_OFFS 0x1120 /* S508 Rx buffer info offset */
50
51/* Important constants */
52#define FR502_MAX_DATA 4096 /* maximum data buffer length */
53#define FR508_MAX_DATA 4080 /* maximum data buffer length */
54#define MIN_LGTH_FR_DATA_CFG 300 /* min Information frame length
55(for configuration purposes) */
56#define FR_MAX_NO_DATA_BYTES_IN_FRAME 15354 /* max Information frame length */
57
58#define HIGHEST_VALID_DLCI 991
59
60/****** Data Structures *****************************************************/
61
62/*----------------------------------------------------------------------------
63 * Frame relay command block.
64 */
65typedef struct fr_cmd
66{
67 unsigned char command PACKED; /* command code */
68 unsigned short length PACKED; /* length of data buffer */
69 unsigned char result PACKED; /* return code */
70 unsigned short dlci PACKED; /* DLCI number */
71 unsigned char attr PACKED; /* FECN, BECN, DE and C/R bits */
72 unsigned short rxlost1 PACKED; /* frames discarded at int. level */
73 unsigned long rxlost2 PACKED; /* frames discarded at app. level */
74 unsigned char rsrv[2] PACKED; /* reserved for future use */
75} fr_cmd_t;
76
77/* 'command' field defines */
78#define FR_WRITE 0x01
79#define FR_READ 0x02
80#define FR_ISSUE_IS_FRAME 0x03
81#define FR_SET_CONFIG 0x10
82#define FR_READ_CONFIG 0x11
83#define FR_COMM_DISABLE 0x12
84#define FR_COMM_ENABLE 0x13
85#define FR_READ_STATUS 0x14
86#define FR_READ_STATISTICS 0x15
87#define FR_FLUSH_STATISTICS 0x16
88#define FR_LIST_ACTIVE_DLCI 0x17
89#define FR_FLUSH_DATA_BUFFERS 0x18
90#define FR_READ_ADD_DLC_STATS 0x19
91#define FR_ADD_DLCI 0x20
92#define FR_DELETE_DLCI 0x21
93#define FR_ACTIVATE_DLCI 0x22
94#define FR_DEACTIVATE_DLCI 0x22
95#define FR_READ_MODEM_STATUS 0x30
96#define FR_SET_MODEM_STATUS 0x31
97#define FR_READ_ERROR_STATS 0x32
98#define FR_FLUSH_ERROR_STATS 0x33
99#define FR_READ_DLCI_IB_MAPPING 0x34
100#define FR_READ_CODE_VERSION 0x40
101#define FR_SET_INTR_MODE 0x50
102#define FR_READ_INTR_MODE 0x51
103#define FR_SET_TRACE_CONFIG 0x60
104#define FR_FT1_STATUS_CTRL 0x80
105#define FR_SET_FT1_MODE 0x81
106
107/* Special UDP drivers management commands */
108#define FPIPE_ENABLE_TRACING 0x41
109#define FPIPE_DISABLE_TRACING 0x42
110#define FPIPE_GET_TRACE_INFO 0x43
111#define FPIPE_FT1_READ_STATUS 0x44
112#define FPIPE_DRIVER_STAT_IFSEND 0x45
113#define FPIPE_DRIVER_STAT_INTR 0x46
114#define FPIPE_DRIVER_STAT_GEN 0x47
115#define FPIPE_FLUSH_DRIVER_STATS 0x48
116#define FPIPE_ROUTER_UP_TIME 0x49
117
118/* 'result' field defines */
119#define FRRES_OK 0x00 /* command executed successfully */
120#define FRRES_DISABLED 0x01 /* communications not enabled */
121#define FRRES_INOPERATIVE 0x02 /* channel inoperative */
122#define FRRES_DLCI_INACTIVE 0x03 /* DLCI is inactive */
123#define FRRES_DLCI_INVALID 0x04 /* DLCI is not configured */
124#define FRRES_TOO_LONG 0x05
125#define FRRES_TOO_MANY 0x06
126#define FRRES_CIR_OVERFLOW 0x07 /* Tx throughput has exceeded CIR */
127#define FRRES_BUFFER_OVERFLOW 0x08
128#define FRRES_MODEM_FAILURE 0x10 /* DCD and/or CTS dropped */
129#define FRRES_CHANNEL_DOWN 0x11 /* channel became inoperative */
130#define FRRES_CHANNEL_UP 0x12 /* channel became operative */
131#define FRRES_DLCI_CHANGE 0x13 /* DLCI status (or number) changed */
132#define FRRES_DLCI_MISMATCH 0x14
133#define FRRES_INVALID_CMD 0x1F /* invalid command */
134
135/* 'attr' field defines */
136#define FRATTR_
137
138/*----------------------------------------------------------------------------
139 * Frame relay mailbox.
140 * This structure is located at offset FR50?_MBOX_OFFS into FR_MB_VECTOR.
141 * For S502 it is also located at offset FR502_RXMB_OFFS into
142 * FR502_RX_VECTOR.
143 */
144typedef struct fr_mbox
145{
146 unsigned char opflag PACKED; /* 00h: execution flag */
147 fr_cmd_t cmd PACKED; /* 01h: command block */
148 unsigned char data[1] PACKED; /* 10h: variable length data buffer */
149} fr_mbox_t;
150
151/*----------------------------------------------------------------------------
152 * S502 frame relay status flags.
153 * This structure is located at offset FR502_FLAG_OFFS into FR_MB_VECTOR.
154 */
155typedef struct fr502_flags
156{
157 unsigned char rsrv1[1] PACKED; /* 00h: */
158 unsigned char tx_ready PACKED; /* 01h: Tx buffer available */
159 unsigned char rx_ready PACKED; /* 02h: Rx frame available */
160 unsigned char event PACKED; /* 03h: asynchronous event */
161 unsigned char mstatus PACKED; /* 04h: modem status */
162 unsigned char rsrv2[8] PACKED; /* 05h: */
163 unsigned char iflag PACKED; /* 0Dh: interrupt flag */
164 unsigned char imask PACKED; /* 0Eh: interrupt mask */
165} fr502_flags_t;
166
167/*----------------------------------------------------------------------------
168 * S508 frame relay status flags.
169 * This structure is located at offset FR508_FLAG_OFFS into FR_MB_VECTOR.
170 */
171typedef struct fr508_flags
172{
173 unsigned char rsrv1[3] PACKED; /* 00h: reserved */
174 unsigned char event PACKED; /* 03h: asynchronous event */
175 unsigned char mstatus PACKED; /* 04h: modem status */
176 unsigned char rsrv2[11] PACKED; /* 05h: reserved */
177 unsigned char iflag PACKED; /* 10h: interrupt flag */
178 unsigned char imask PACKED; /* 11h: interrupt mask */
179 unsigned long tse_offs PACKED; /* 12h: Tx status element */
180 unsigned short dlci PACKED; /* 16h: DLCI NUMBER */
181} fr508_flags_t;
182
183/* 'event' field defines */
184#define FR_EVENT_STATUS 0x01 /* channel status change */
185#define FR_EVENT_DLC_STATUS 0x02 /* DLC status change */
186#define FR_EVENT_BAD_DLCI 0x04 /* FSR included wrong DLCI */
187#define FR_EVENT_LINK_DOWN 0x40 /* DCD or CTS low */
188
189/* 'mstatus' field defines */
190#define FR_MDM_DCD 0x08 /* mdm_status: DCD */
191#define FR_MDM_CTS 0x20 /* mdm_status: CTS */
192
193/* 'iflag' & 'imask' fields defines */
194#define FR_INTR_RXRDY 0x01 /* Rx ready */
195#define FR_INTR_TXRDY 0x02 /* Tx ready */
196#define FR_INTR_MODEM 0x04 /* modem status change (DCD, CTS) */
197#define FR_INTR_READY 0x08 /* interface command completed */
198#define FR_INTR_DLC 0x10 /* DLC status change */
199#define FR_INTR_TIMER 0x20 /* millisecond timer */
200#define FR_INTR_TX_MULT_DLCIs 0x80 /* Tx interrupt on multiple DLCIs */
201
202
203/*----------------------------------------------------------------------------
204 * Receive Buffer Configuration Info. S508 only!
205 * This structure is located at offset FR508_RXBC_OFFS into FR_MB_VECTOR.
206 */
207typedef struct fr_buf_info
208{
209 unsigned short rse_num PACKED; /* 00h: number of status elements */
210 unsigned long rse_base PACKED; /* 02h: receive status array base */
211 unsigned long rse_next PACKED; /* 06h: next status element */
212 unsigned long buf_base PACKED; /* 0Ah: rotational buffer base */
213 unsigned short reserved PACKED; /* 0Eh: */
214 unsigned long buf_top PACKED; /* 10h: rotational buffer top */
215} fr_buf_info_t;
216
217/*----------------------------------------------------------------------------
218 * Buffer Status Element. S508 only!
219 * Array of structures of this type is located at offset defined by the
220 * 'rse_base' field of the frBufInfo_t structure into absolute adapter
221 * memory address space.
222 */
223typedef struct fr_rx_buf_ctl
224{
225 unsigned char flag PACKED; /* 00h: ready flag */
226 unsigned short length PACKED; /* 01h: frame length */
227 unsigned short dlci PACKED; /* 03h: DLCI */
228 unsigned char attr PACKED; /* 05h: FECN/BECN/DE/CR */
229 unsigned short tmstamp PACKED; /* 06h: time stamp */
230 unsigned short rsrv[2] PACKED; /* 08h: */
231 unsigned long offset PACKED; /* 0Ch: buffer absolute address */
232} fr_rx_buf_ctl_t;
233
234typedef struct fr_tx_buf_ctl
235{
236 unsigned char flag PACKED; /* 00h: ready flag */
237 unsigned short rsrv0[2] PACKED; /* 01h: */
238 unsigned short length PACKED; /* 05h: frame length */
239 unsigned short dlci PACKED; /* 07h: DLCI */
240 unsigned char attr PACKED; /* 09h: FECN/BECN/DE/CR */
241 unsigned short rsrv1 PACKED; /* 0Ah: */
242 unsigned long offset PACKED; /* 0Ch: buffer absolute address */
243} fr_tx_buf_ctl_t;
244
245/*----------------------------------------------------------------------------
246 * Global Configuration Block. Passed to FR_SET_CONFIG command when dlci == 0.
247 */
248typedef struct fr_conf
249{
250 unsigned short station PACKED; /* 00h: CPE/Node */
251 unsigned short options PACKED; /* 02h: configuration options */
252 unsigned short kbps PACKED; /* 04h: baud rate in kbps */
253 unsigned short port PACKED; /* 06h: RS-232/V.35 */
254 unsigned short mtu PACKED; /* 08h: max. transmit length */
255 unsigned short t391 PACKED; /* 0Ah: */
256 unsigned short t392 PACKED; /* 0Ch: */
257 unsigned short n391 PACKED; /* 0Eh: */
258 unsigned short n392 PACKED; /* 10h: */
259 unsigned short n393 PACKED; /* 12h: */
260 unsigned short cir_fwd PACKED; /* 14h: */
261 unsigned short bc_fwd PACKED; /* 16h: */
262 unsigned short be_fwd PACKED; /* 18h: */
263 unsigned short cir_bwd PACKED; /* 1Ah: */
264 unsigned short bc_bwd PACKED; /* 1Ch: */
265 unsigned short be_bwd PACKED; /* 1Eh: */
266 unsigned short dlci[0] PACKED; /* 20h: */
267} fr_conf_t;
268
269/* 'station_type' defines */
270#define FRCFG_STATION_CPE 0
271#define FRCFG_STATION_NODE 1
272
273/* 'conf_flags' defines */
274#define FRCFG_IGNORE_TX_CIR 0x0001
275#define FRCFG_IGNORE_RX_CIR 0x0002
276#define FRCFG_DONT_RETRANSMIT 0x0004
277#define FRCFG_IGNORE_CBS 0x0008
278#define FRCFG_THROUGHPUT 0x0010 /* enable throughput calculation */
279#define FRCFG_DIRECT_RX 0x0080 /* enable direct receive buffer */
280#define FRCFG_AUTO_CONFIG 0x8000 /* enable auto DLCI configuration */
281
282/* 'baud_rate' defines */
283#define FRCFG_BAUD_1200 12
284#define FRCFG_BAUD_2400 24
285#define FRCFG_BAUD_4800 48
286#define FRCFG_BAUD_9600 96
287#define FRCFG_BAUD_19200 19
288#define FRCFG_BAUD_38400 38
289#define FRCFG_BAUD_56000 56
290#define FRCFG_BAUD_64000 64
291#define FRCFG_BAUD_128000 128
292
293/* 'port_mode' defines */
294#define FRCFG_MODE_EXT_CLK 0x0000
295#define FRCFG_MODE_INT_CLK 0x0001
296#define FRCFG_MODE_V35 0x0000 /* S508 only */
297#define FRCFG_MODE_RS232 0x0002 /* S508 only */
298
299/* defines for line tracing */
300
301/* the line trace status element presented by the frame relay code */
302typedef struct {
303 unsigned char flag PACKED; /* ready flag */
304 unsigned short length PACKED; /* trace length */
305 unsigned char rsrv0[2] PACKED; /* reserved */
306 unsigned char attr PACKED; /* trace attributes */
307 unsigned short tmstamp PACKED; /* time stamp */
308 unsigned char rsrv1[4] PACKED; /* reserved */
309 unsigned long offset PACKED; /* buffer absolute address */
310} fr_trc_el_t;
311
312typedef struct {
313 unsigned char status PACKED; /* status flag */
314 unsigned char data_passed PACKED; /* 0 if no data passed, 1 if */
315 /* data passed */
316 unsigned short length PACKED; /* frame length */
317 unsigned short tmstamp PACKED; /* time stamp */
318} fpipemon_trc_hdr_t;
319
320typedef struct {
321 fpipemon_trc_hdr_t fpipemon_trc_hdr PACKED;
322 unsigned char data[FR_MAX_NO_DATA_BYTES_IN_FRAME] PACKED;
323} fpipemon_trc_t;
324
325/* bit settings for the 'status' byte - note that bits 1, 2 and 3 are used */
326/* for returning the number of frames being passed to fpipemon */
327#define TRC_OUTGOING_FRM 0x01
328#define TRC_ABORT_ERROR 0x10
329#define TRC_CRC_ERROR 0x20
330#define TRC_OVERRUN_ERROR 0x40
331#define MORE_TRC_DATA 0x80
332
333#define MAX_FRMS_TRACED 0x07
334
335#define NO_TRC_ELEMENTS_OFF 0x9000
336#define BASE_TRC_ELEMENTS_OFF 0x9002
337#define TRC_ACTIVE 0x01
338#define FLUSH_TRC_BUFFERS 0x02
339#define FLUSH_TRC_STATISTICS 0x04
340#define TRC_SIGNALLING_FRMS 0x10
341#define TRC_INFO_FRMS 0x20
342#define ACTIVATE_TRC (TRC_ACTIVE | TRC_SIGNALLING_FRMS | TRC_INFO_FRMS)
343#define RESET_TRC (FLUSH_TRC_BUFFERS | FLUSH_TRC_STATISTICS)
344
345/*----------------------------------------------------------------------------
346 * Channel configuration.
347 * This structure is passed to the FR_SET_CONFIG command when dlci != 0.
348 */
349typedef struct fr_dlc_conf
350{
351 unsigned short conf_flags PACKED; /* 00h: configuration bits */
352 unsigned short cir_fwd PACKED; /* 02h: */
353 unsigned short bc_fwd PACKED; /* 04h: */
354 unsigned short be_fwd PACKED; /* 06h: */
355 unsigned short cir_bwd PACKED; /* 08h: */
356 unsigned short bc_bwd PACKED; /* 0Ah: */
357 unsigned short be_bwd PACKED; /* 0Ch: */
358} fr_dlc_conf_t;
359
360/*----------------------------------------------------------------------------
361 * S502 interrupt mode control block.
362 * This structure is passed to the FR_SET_INTR_FLAGS and returned by the
363 * FR_READ_INTR_FLAGS commands.
364 */
365typedef struct fr502_intr_ctl
366{
367 unsigned char mode PACKED; /* 00h: interrupt enable flags */
368 unsigned short tx_len PACKED; /* 01h: required Tx buffer size */
369} fr502_intr_ctl_t;
370
371/*----------------------------------------------------------------------------
372 * S508 interrupt mode control block.
373 * This structure is passed to the FR_SET_INTR_FLAGS and returned by the
374 * FR_READ_INTR_FLAGS commands.
375 */
376typedef struct fr508_intr_ctl
377{
378 unsigned char mode PACKED; /* 00h: interrupt enable flags */
379 unsigned short tx_len PACKED; /* 01h: required Tx buffer size */
380 unsigned char irq PACKED; /* 03h: IRQ level to activate */
381 unsigned char flags PACKED; /* 04h: ?? */
382 unsigned short timeout PACKED; /* 05h: ms, for timer interrupt */
383} fr508_intr_ctl_t;
384
385/*----------------------------------------------------------------------------
386 * Channel status.
387 * This structure is returned by the FR_READ_STATUS command.
388 */
389typedef struct fr_dlc_Status
390{
391 unsigned char status PACKED; /* 00h: link/DLCI status */
392 struct
393 {
394 unsigned short dlci PACKED; /* 01h: DLCI number */
395 unsigned char status PACKED; /* 03h: DLCI status */
396 } circuit[1] PACKED;
397} fr_dlc_status_t;
398
399/* 'status' defines */
400#define FR_LINK_INOPER 0x00 /* for global status (DLCI == 0) */
401#define FR_LINK_OPER 0x01
402#define FR_DLCI_DELETED 0x01 /* for circuit status (DLCI != 0) */
403#define FR_DLCI_ACTIVE 0x02
404#define FR_DLCI_WAITING 0x04
405#define FR_DLCI_NEW 0x08
406#define FR_DLCI_REPORT 0x40
407
408/*----------------------------------------------------------------------------
409 * Global Statistics Block.
410 * This structure is returned by the FR_READ_STATISTICS command when
411 * dcli == 0.
412 */
413typedef struct fr_link_stat
414{
415 unsigned short rx_too_long PACKED; /* 00h: */
416 unsigned short rx_dropped PACKED; /* 02h: */
417 unsigned short rx_dropped2 PACKED; /* 04h: */
418 unsigned short rx_bad_dlci PACKED; /* 06h: */
419 unsigned short rx_bad_format PACKED; /* 08h: */
420 unsigned short retransmitted PACKED; /* 0Ah: */
421 unsigned short cpe_tx_FSE PACKED; /* 0Ch: */
422 unsigned short cpe_tx_LIV PACKED; /* 0Eh: */
423 unsigned short cpe_rx_FSR PACKED; /* 10h: */
424 unsigned short cpe_rx_LIV PACKED; /* 12h: */
425 unsigned short node_rx_FSE PACKED; /* 14h: */
426 unsigned short node_rx_LIV PACKED; /* 16h: */
427 unsigned short node_tx_FSR PACKED; /* 18h: */
428 unsigned short node_tx_LIV PACKED; /* 1Ah: */
429 unsigned short rx_ISF_err PACKED; /* 1Ch: */
430 unsigned short rx_unsolicited PACKED; /* 1Eh: */
431 unsigned short rx_SSN_err PACKED; /* 20h: */
432 unsigned short rx_RSN_err PACKED; /* 22h: */
433 unsigned short T391_timeouts PACKED; /* 24h: */
434 unsigned short T392_timeouts PACKED; /* 26h: */
435 unsigned short N392_reached PACKED; /* 28h: */
436 unsigned short cpe_SSN_RSN PACKED; /* 2Ah: */
437 unsigned short current_SSN PACKED; /* 2Ch: */
438 unsigned short current_RSN PACKED; /* 2Eh: */
439 unsigned short curreny_T391 PACKED; /* 30h: */
440 unsigned short current_T392 PACKED; /* 32h: */
441 unsigned short current_N392 PACKED; /* 34h: */
442 unsigned short current_N393 PACKED; /* 36h: */
443} fr_link_stat_t;
444
445/*----------------------------------------------------------------------------
446 * DLCI statistics.
447 * This structure is returned by the FR_READ_STATISTICS command when
448 * dlci != 0.
449 */
450typedef struct fr_dlci_stat
451{
452 unsigned long tx_frames PACKED; /* 00h: */
453 unsigned long tx_bytes PACKED; /* 04h: */
454 unsigned long rx_frames PACKED; /* 08h: */
455 unsigned long rx_bytes PACKED; /* 0Ch: */
456 unsigned long rx_dropped PACKED; /* 10h: */
457 unsigned long rx_inactive PACKED; /* 14h: */
458 unsigned long rx_exceed_CIR PACKED; /* 18h: */
459 unsigned long rx_DE_set PACKED; /* 1Ch: */
460 unsigned long tx_throughput PACKED; /* 20h: */
461 unsigned long tx_calc_timer PACKED; /* 24h: */
462 unsigned long rx_throughput PACKED; /* 28h: */
463 unsigned long rx_calc_timer PACKED; /* 2Ch: */
464} fr_dlci_stat_t;
465
466/*----------------------------------------------------------------------------
467 * Communications error statistics.
468 * This structure is returned by the FR_READ_ERROR_STATS command.
469 */
470typedef struct fr_comm_stat
471{
472 unsigned char rx_overruns PACKED; /* 00h: */
473 unsigned char rx_bad_crc PACKED; /* 01h: */
474 unsigned char rx_aborts PACKED; /* 02h: */
475 unsigned char rx_too_long PACKED; /* 03h: */
476 unsigned char tx_aborts PACKED; /* 04h: */
477 unsigned char tx_underruns PACKED; /* 05h: */
478 unsigned char tx_missed_undr PACKED; /* 06h: */
479 unsigned char dcd_dropped PACKED; /* 07h: */
480 unsigned char cts_dropped PACKED; /* 08h: */
481} fr_comm_stat_t;
482
483/*----------------------------------------------------------------------------
484 * Defines for the FR_ISSUE_IS_FRAME command.
485 */
486#define FR_ISF_LVE 2 /* issue Link Verification Enquiry */
487#define FR_ISF_FSE 3 /* issue Full Status Enquiry */
488
489/*----------------------------------------------------------------------------
490 * Frame Relay ARP Header -- Used for Dynamic route creation with InvARP
491 */
492
493typedef struct arphdr_fr
494 {
495 unsigned short ar_hrd PACKED; /* format of hardware addr */
496 unsigned short ar_pro PACKED; /* format of protocol addr */
497 unsigned char ar_hln PACKED; /* length of hardware addr */
498 unsigned char ar_pln PACKED; /* length of protocol addr */
499 unsigned short ar_op PACKED; /* ARP opcode */
500 unsigned short ar_sha PACKED; /* Sender DLCI addr 2 bytes */
501 unsigned long ar_sip PACKED; /* Sender IP addr 4 bytes */
502 unsigned short ar_tha PACKED; /* Target DLCI addr 2 bytes */
503 unsigned long ar_tip PACKED; /* Target IP addr 4 bytes */
504 } arphdr_fr_t;
505
506/*----------------------------------------------------------------------------
507 * Frame Relay RFC 1490 SNAP Header -- Used to check for ARP packets
508 */
509typedef struct arphdr_1490
510 {
511 unsigned char control PACKED; /* UI, etc... */
512 unsigned char pad PACKED; /* Pad */
513 unsigned char NLPID PACKED; /* SNAP */
514 unsigned char OUI[3] PACKED; /* Ethertype, etc... */
515 unsigned short PID PACKED; /* ARP, IP, etc... */
516 } arphdr_1490_t;
517
518/* UDP/IP packet (for UDP management) layout */
519
520/* The embedded control block for UDP mgmt
521 This is essentially a mailbox structure, without the large data field */
522
523typedef struct {
524 unsigned char opp_flag PACKED; /* the opp flag */
525 unsigned char command PACKED; /* command code */
526 unsigned short length PACKED; /* length of data buffer */
527 unsigned char result PACKED; /* return code */
528 unsigned short dlci PACKED; /* DLCI number */
529 unsigned char attr PACKED; /* FECN, BECN, DE and C/R bits */
530 unsigned short rxlost1 PACKED; /* frames discarded at int. level */
531 unsigned long rxlost2 PACKED; /* frames discarded at app. level */
532 unsigned char rsrv[2] PACKED; /* reserved for future use */
533} cblock_t;
534
535
536/* UDP management packet layout (data area of ip packet) */
537
538typedef struct {
539 unsigned char control PACKED;
540 unsigned char NLPID PACKED;
541} fr_encap_hdr_t;
542
543typedef struct {
544// fr_encap_hdr_t fr_encap_hdr PACKED;
545 ip_pkt_t ip_pkt PACKED;
546 udp_pkt_t udp_pkt PACKED;
547 wp_mgmt_t wp_mgmt PACKED;
548 cblock_t cblock PACKED;
549 unsigned char data[4080] PACKED;
550} fr_udp_pkt_t;
551
552
553/* valid ip_protocol for UDP management */
554#define UDPMGMT_UDP_PROTOCOL 0x11
555
556#define UDPMGMT_FPIPE_SIGNATURE "FPIPE8ND"
557#define UDPMGMT_DRVRSTATS_SIGNATURE "DRVSTATS"
558
559/* values for request/reply byte */
560#define UDPMGMT_REQUEST 0x01
561#define UDPMGMT_REPLY 0x02
562#define UDP_OFFSET 12
563
564typedef struct {
565 unsigned long if_send_entry;
566 unsigned long if_send_skb_null;
567 unsigned long if_send_broadcast;
568 unsigned long if_send_multicast;
569 unsigned long if_send_critical_ISR;
570 unsigned long if_send_critical_non_ISR;
571 unsigned long if_send_busy;
572 unsigned long if_send_busy_timeout;
573 unsigned long if_send_DRVSTATS_request;
574 unsigned long if_send_FPIPE_request;
575 unsigned long if_send_wan_disconnected;
576 unsigned long if_send_dlci_disconnected;
577 unsigned long if_send_no_bfrs;
578 unsigned long if_send_adptr_bfrs_full;
579 unsigned long if_send_bfrs_passed_to_adptr;
580 unsigned long if_send_consec_send_fail;
581} drvstats_if_send_t;
582
583typedef struct {
584 unsigned long rx_intr_no_socket;
585 unsigned long rx_intr_dev_not_started;
586 unsigned long rx_intr_DRVSTATS_request;
587 unsigned long rx_intr_FPIPE_request;
588 unsigned long rx_intr_bfr_not_passed_to_stack;
589 unsigned long rx_intr_bfr_passed_to_stack;
590 } drvstats_rx_intr_t;
591
592typedef struct {
593 unsigned long UDP_FPIPE_mgmt_kmalloc_err;
594 unsigned long UDP_FPIPE_mgmt_direction_err;
595 unsigned long UDP_FPIPE_mgmt_adptr_type_err;
596 unsigned long UDP_FPIPE_mgmt_adptr_cmnd_OK;
597 unsigned long UDP_FPIPE_mgmt_adptr_cmnd_timeout;
598 unsigned long UDP_FPIPE_mgmt_adptr_send_passed;
599 unsigned long UDP_FPIPE_mgmt_adptr_send_failed;
600 unsigned long UDP_FPIPE_mgmt_not_passed_to_stack;
601 unsigned long UDP_FPIPE_mgmt_passed_to_stack;
602 unsigned long UDP_FPIPE_mgmt_no_socket;
603 unsigned long UDP_DRVSTATS_mgmt_kmalloc_err;
604 unsigned long UDP_DRVSTATS_mgmt_adptr_cmnd_OK;
605 unsigned long UDP_DRVSTATS_mgmt_adptr_cmnd_timeout;
606 unsigned long UDP_DRVSTATS_mgmt_adptr_send_passed;
607 unsigned long UDP_DRVSTATS_mgmt_adptr_send_failed;
608 unsigned long UDP_DRVSTATS_mgmt_not_passed_to_stack;
609 unsigned long UDP_DRVSTATS_mgmt_passed_to_stack;
610 unsigned long UDP_DRVSTATS_mgmt_no_socket;
611} drvstats_gen_t;
612
613typedef struct {
614 unsigned char attr PACKED;
615 unsigned short time_stamp PACKED;
616 unsigned char reserved[13] PACKED;
617} api_rx_hdr_t;
618
619typedef struct {
620 api_rx_hdr_t api_rx_hdr PACKED;
621 void * data PACKED;
622} api_rx_element_t;
623
624typedef struct {
625 unsigned char attr PACKED;
626 unsigned char reserved[15] PACKED;
627} api_tx_hdr_t;
628
629typedef struct {
630 api_tx_hdr_t api_tx_hdr PACKED;
631 void * data PACKED;
632} api_tx_element_t;
633
634#ifdef _MSC_
635# pragma pack()
636#endif
637#endif /* _SDLA_FR_H */
638
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 5992f65b4184..2694cb3ca763 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -27,20 +27,24 @@
27#include <net/checksum.h> 27#include <net/checksum.h>
28#include <linux/rcupdate.h> 28#include <linux/rcupdate.h>
29#include <linux/dmaengine.h> 29#include <linux/dmaengine.h>
30#include <linux/hrtimer.h>
30 31
31#define HAVE_ALLOC_SKB /* For the drivers to know */ 32#define HAVE_ALLOC_SKB /* For the drivers to know */
32#define HAVE_ALIGNABLE_SKB /* Ditto 8) */ 33#define HAVE_ALIGNABLE_SKB /* Ditto 8) */
33 34
35/* Don't change this without changing skb_csum_unnecessary! */
34#define CHECKSUM_NONE 0 36#define CHECKSUM_NONE 0
35#define CHECKSUM_PARTIAL 1 37#define CHECKSUM_UNNECESSARY 1
36#define CHECKSUM_UNNECESSARY 2 38#define CHECKSUM_COMPLETE 2
37#define CHECKSUM_COMPLETE 3 39#define CHECKSUM_PARTIAL 3
38 40
39#define SKB_DATA_ALIGN(X) (((X) + (SMP_CACHE_BYTES - 1)) & \ 41#define SKB_DATA_ALIGN(X) (((X) + (SMP_CACHE_BYTES - 1)) & \
40 ~(SMP_CACHE_BYTES - 1)) 42 ~(SMP_CACHE_BYTES - 1))
41#define SKB_MAX_ORDER(X, ORDER) (((PAGE_SIZE << (ORDER)) - (X) - \ 43#define SKB_WITH_OVERHEAD(X) \
42 sizeof(struct skb_shared_info)) & \ 44 (((X) - sizeof(struct skb_shared_info)) & \
43 ~(SMP_CACHE_BYTES - 1)) 45 ~(SMP_CACHE_BYTES - 1))
46#define SKB_MAX_ORDER(X, ORDER) \
47 SKB_WITH_OVERHEAD((PAGE_SIZE << (ORDER)) - (X))
44#define SKB_MAX_HEAD(X) (SKB_MAX_ORDER((X), 0)) 48#define SKB_MAX_HEAD(X) (SKB_MAX_ORDER((X), 0))
45#define SKB_MAX_ALLOC (SKB_MAX_ORDER(0, 2)) 49#define SKB_MAX_ALLOC (SKB_MAX_ORDER(0, 2))
46 50
@@ -66,8 +70,8 @@
66 * NONE: skb is checksummed by protocol or csum is not required. 70 * NONE: skb is checksummed by protocol or csum is not required.
67 * 71 *
68 * PARTIAL: device is required to csum packet as seen by hard_start_xmit 72 * PARTIAL: device is required to csum packet as seen by hard_start_xmit
69 * from skb->h.raw to the end and to record the checksum 73 * from skb->transport_header to the end and to record the checksum
70 * at skb->h.raw+skb->csum. 74 * at skb->transport_header + skb->csum.
71 * 75 *
72 * Device must show its capabilities in dev->features, set 76 * Device must show its capabilities in dev->features, set
73 * at device setup time. 77 * at device setup time.
@@ -83,12 +87,13 @@
83 */ 87 */
84 88
85struct net_device; 89struct net_device;
90struct scatterlist;
86 91
87#ifdef CONFIG_NETFILTER 92#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
88struct nf_conntrack { 93struct nf_conntrack {
89 atomic_t use; 94 atomic_t use;
90 void (*destroy)(struct nf_conntrack *);
91}; 95};
96#endif
92 97
93#ifdef CONFIG_BRIDGE_NETFILTER 98#ifdef CONFIG_BRIDGE_NETFILTER
94struct nf_bridge_info { 99struct nf_bridge_info {
@@ -103,8 +108,6 @@ struct nf_bridge_info {
103}; 108};
104#endif 109#endif
105 110
106#endif
107
108struct sk_buff_head { 111struct sk_buff_head {
109 /* These two members must be first. */ 112 /* These two members must be first. */
110 struct sk_buff *next; 113 struct sk_buff *next;
@@ -156,11 +159,6 @@ struct skb_shared_info {
156#define SKB_DATAREF_SHIFT 16 159#define SKB_DATAREF_SHIFT 16
157#define SKB_DATAREF_MASK ((1 << SKB_DATAREF_SHIFT) - 1) 160#define SKB_DATAREF_MASK ((1 << SKB_DATAREF_SHIFT) - 1)
158 161
159struct skb_timeval {
160 u32 off_sec;
161 u32 off_usec;
162};
163
164 162
165enum { 163enum {
166 SKB_FCLONE_UNAVAILABLE, 164 SKB_FCLONE_UNAVAILABLE,
@@ -181,6 +179,16 @@ enum {
181 SKB_GSO_TCPV6 = 1 << 4, 179 SKB_GSO_TCPV6 = 1 << 4,
182}; 180};
183 181
182#if BITS_PER_LONG > 32
183#define NET_SKBUFF_DATA_USES_OFFSET 1
184#endif
185
186#ifdef NET_SKBUFF_DATA_USES_OFFSET
187typedef unsigned int sk_buff_data_t;
188#else
189typedef unsigned char *sk_buff_data_t;
190#endif
191
184/** 192/**
185 * struct sk_buff - socket buffer 193 * struct sk_buff - socket buffer
186 * @next: Next buffer in list 194 * @next: Next buffer in list
@@ -190,15 +198,17 @@ enum {
190 * @dev: Device we arrived on/are leaving by 198 * @dev: Device we arrived on/are leaving by
191 * @iif: ifindex of device we arrived on 199 * @iif: ifindex of device we arrived on
192 * @h: Transport layer header 200 * @h: Transport layer header
193 * @nh: Network layer header 201 * @network_header: Network layer header
194 * @mac: Link layer header 202 * @mac_header: Link layer header
195 * @dst: destination entry 203 * @dst: destination entry
196 * @sp: the security path, used for xfrm 204 * @sp: the security path, used for xfrm
197 * @cb: Control buffer. Free for use by every layer. Put private vars here 205 * @cb: Control buffer. Free for use by every layer. Put private vars here
198 * @len: Length of actual data 206 * @len: Length of actual data
199 * @data_len: Data length 207 * @data_len: Data length
200 * @mac_len: Length of link layer header 208 * @mac_len: Length of link layer header
201 * @csum: Checksum 209 * @csum: Checksum (must include start/offset pair)
210 * @csum_start: Offset from skb->head where checksumming should start
211 * @csum_offset: Offset from csum_start where checksum should be stored
202 * @local_df: allow local fragmentation 212 * @local_df: allow local fragmentation
203 * @cloned: Head may be cloned (check refcnt to be sure) 213 * @cloned: Head may be cloned (check refcnt to be sure)
204 * @nohdr: Payload reference only, must not modify header 214 * @nohdr: Payload reference only, must not modify header
@@ -233,32 +243,11 @@ struct sk_buff {
233 struct sk_buff *prev; 243 struct sk_buff *prev;
234 244
235 struct sock *sk; 245 struct sock *sk;
236 struct skb_timeval tstamp; 246 ktime_t tstamp;
237 struct net_device *dev; 247 struct net_device *dev;
238 int iif; 248 int iif;
239 /* 4 byte hole on 64 bit*/ 249 /* 4 byte hole on 64 bit*/
240 250
241 union {
242 struct tcphdr *th;
243 struct udphdr *uh;
244 struct icmphdr *icmph;
245 struct igmphdr *igmph;
246 struct iphdr *ipiph;
247 struct ipv6hdr *ipv6h;
248 unsigned char *raw;
249 } h;
250
251 union {
252 struct iphdr *iph;
253 struct ipv6hdr *ipv6h;
254 struct arphdr *arph;
255 unsigned char *raw;
256 } nh;
257
258 union {
259 unsigned char *raw;
260 } mac;
261
262 struct dst_entry *dst; 251 struct dst_entry *dst;
263 struct sec_path *sp; 252 struct sec_path *sp;
264 253
@@ -275,7 +264,10 @@ struct sk_buff {
275 mac_len; 264 mac_len;
276 union { 265 union {
277 __wsum csum; 266 __wsum csum;
278 __u32 csum_offset; 267 struct {
268 __u16 csum_start;
269 __u16 csum_offset;
270 };
279 }; 271 };
280 __u32 priority; 272 __u32 priority;
281 __u8 local_df:1, 273 __u8 local_df:1,
@@ -289,15 +281,13 @@ struct sk_buff {
289 __be16 protocol; 281 __be16 protocol;
290 282
291 void (*destructor)(struct sk_buff *skb); 283 void (*destructor)(struct sk_buff *skb);
292#ifdef CONFIG_NETFILTER
293 struct nf_conntrack *nfct;
294#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 284#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
285 struct nf_conntrack *nfct;
295 struct sk_buff *nfct_reasm; 286 struct sk_buff *nfct_reasm;
296#endif 287#endif
297#ifdef CONFIG_BRIDGE_NETFILTER 288#ifdef CONFIG_BRIDGE_NETFILTER
298 struct nf_bridge_info *nf_bridge; 289 struct nf_bridge_info *nf_bridge;
299#endif 290#endif
300#endif /* CONFIG_NETFILTER */
301#ifdef CONFIG_NET_SCHED 291#ifdef CONFIG_NET_SCHED
302 __u16 tc_index; /* traffic control index */ 292 __u16 tc_index; /* traffic control index */
303#ifdef CONFIG_NET_CLS_ACT 293#ifdef CONFIG_NET_CLS_ACT
@@ -313,13 +303,16 @@ struct sk_buff {
313 303
314 __u32 mark; 304 __u32 mark;
315 305
306 sk_buff_data_t transport_header;
307 sk_buff_data_t network_header;
308 sk_buff_data_t mac_header;
316 /* These elements must be at the end, see alloc_skb() for details. */ 309 /* These elements must be at the end, see alloc_skb() for details. */
310 sk_buff_data_t tail;
311 sk_buff_data_t end;
312 unsigned char *head,
313 *data;
317 unsigned int truesize; 314 unsigned int truesize;
318 atomic_t users; 315 atomic_t users;
319 unsigned char *head,
320 *data,
321 *tail,
322 *end;
323}; 316};
324 317
325#ifdef __KERNEL__ 318#ifdef __KERNEL__
@@ -361,6 +354,11 @@ extern struct sk_buff *skb_realloc_headroom(struct sk_buff *skb,
361extern struct sk_buff *skb_copy_expand(const struct sk_buff *skb, 354extern struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
362 int newheadroom, int newtailroom, 355 int newheadroom, int newtailroom,
363 gfp_t priority); 356 gfp_t priority);
357extern int skb_to_sgvec(struct sk_buff *skb,
358 struct scatterlist *sg, int offset,
359 int len);
360extern int skb_cow_data(struct sk_buff *skb, int tailbits,
361 struct sk_buff **trailer);
364extern int skb_pad(struct sk_buff *skb, int pad); 362extern int skb_pad(struct sk_buff *skb, int pad);
365#define dev_kfree_skb(a) kfree_skb(a) 363#define dev_kfree_skb(a) kfree_skb(a)
366extern void skb_over_panic(struct sk_buff *skb, int len, 364extern void skb_over_panic(struct sk_buff *skb, int len,
@@ -402,8 +400,20 @@ extern unsigned int skb_find_text(struct sk_buff *skb, unsigned int from,
402 unsigned int to, struct ts_config *config, 400 unsigned int to, struct ts_config *config,
403 struct ts_state *state); 401 struct ts_state *state);
404 402
403#ifdef NET_SKBUFF_DATA_USES_OFFSET
404static inline unsigned char *skb_end_pointer(const struct sk_buff *skb)
405{
406 return skb->head + skb->end;
407}
408#else
409static inline unsigned char *skb_end_pointer(const struct sk_buff *skb)
410{
411 return skb->end;
412}
413#endif
414
405/* Internal */ 415/* Internal */
406#define skb_shinfo(SKB) ((struct skb_shared_info *)((SKB)->end)) 416#define skb_shinfo(SKB) ((struct skb_shared_info *)(skb_end_pointer(SKB)))
407 417
408/** 418/**
409 * skb_queue_empty - check if a queue is empty 419 * skb_queue_empty - check if a queue is empty
@@ -822,12 +832,46 @@ static inline void skb_fill_page_desc(struct sk_buff *skb, int i,
822#define SKB_FRAG_ASSERT(skb) BUG_ON(skb_shinfo(skb)->frag_list) 832#define SKB_FRAG_ASSERT(skb) BUG_ON(skb_shinfo(skb)->frag_list)
823#define SKB_LINEAR_ASSERT(skb) BUG_ON(skb_is_nonlinear(skb)) 833#define SKB_LINEAR_ASSERT(skb) BUG_ON(skb_is_nonlinear(skb))
824 834
835#ifdef NET_SKBUFF_DATA_USES_OFFSET
836static inline unsigned char *skb_tail_pointer(const struct sk_buff *skb)
837{
838 return skb->head + skb->tail;
839}
840
841static inline void skb_reset_tail_pointer(struct sk_buff *skb)
842{
843 skb->tail = skb->data - skb->head;
844}
845
846static inline void skb_set_tail_pointer(struct sk_buff *skb, const int offset)
847{
848 skb_reset_tail_pointer(skb);
849 skb->tail += offset;
850}
851#else /* NET_SKBUFF_DATA_USES_OFFSET */
852static inline unsigned char *skb_tail_pointer(const struct sk_buff *skb)
853{
854 return skb->tail;
855}
856
857static inline void skb_reset_tail_pointer(struct sk_buff *skb)
858{
859 skb->tail = skb->data;
860}
861
862static inline void skb_set_tail_pointer(struct sk_buff *skb, const int offset)
863{
864 skb->tail = skb->data + offset;
865}
866
867#endif /* NET_SKBUFF_DATA_USES_OFFSET */
868
825/* 869/*
826 * Add data to an sk_buff 870 * Add data to an sk_buff
827 */ 871 */
828static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len) 872static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len)
829{ 873{
830 unsigned char *tmp = skb->tail; 874 unsigned char *tmp = skb_tail_pointer(skb);
831 SKB_LINEAR_ASSERT(skb); 875 SKB_LINEAR_ASSERT(skb);
832 skb->tail += len; 876 skb->tail += len;
833 skb->len += len; 877 skb->len += len;
@@ -845,11 +889,11 @@ static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len)
845 */ 889 */
846static inline unsigned char *skb_put(struct sk_buff *skb, unsigned int len) 890static inline unsigned char *skb_put(struct sk_buff *skb, unsigned int len)
847{ 891{
848 unsigned char *tmp = skb->tail; 892 unsigned char *tmp = skb_tail_pointer(skb);
849 SKB_LINEAR_ASSERT(skb); 893 SKB_LINEAR_ASSERT(skb);
850 skb->tail += len; 894 skb->tail += len;
851 skb->len += len; 895 skb->len += len;
852 if (unlikely(skb->tail>skb->end)) 896 if (unlikely(skb->tail > skb->end))
853 skb_over_panic(skb, len, current_text_addr()); 897 skb_over_panic(skb, len, current_text_addr());
854 return tmp; 898 return tmp;
855} 899}
@@ -962,6 +1006,130 @@ static inline void skb_reserve(struct sk_buff *skb, int len)
962 skb->tail += len; 1006 skb->tail += len;
963} 1007}
964 1008
1009#ifdef NET_SKBUFF_DATA_USES_OFFSET
1010static inline unsigned char *skb_transport_header(const struct sk_buff *skb)
1011{
1012 return skb->head + skb->transport_header;
1013}
1014
1015static inline void skb_reset_transport_header(struct sk_buff *skb)
1016{
1017 skb->transport_header = skb->data - skb->head;
1018}
1019
1020static inline void skb_set_transport_header(struct sk_buff *skb,
1021 const int offset)
1022{
1023 skb_reset_transport_header(skb);
1024 skb->transport_header += offset;
1025}
1026
1027static inline unsigned char *skb_network_header(const struct sk_buff *skb)
1028{
1029 return skb->head + skb->network_header;
1030}
1031
1032static inline void skb_reset_network_header(struct sk_buff *skb)
1033{
1034 skb->network_header = skb->data - skb->head;
1035}
1036
1037static inline void skb_set_network_header(struct sk_buff *skb, const int offset)
1038{
1039 skb_reset_network_header(skb);
1040 skb->network_header += offset;
1041}
1042
1043static inline unsigned char *skb_mac_header(const struct sk_buff *skb)
1044{
1045 return skb->head + skb->mac_header;
1046}
1047
1048static inline int skb_mac_header_was_set(const struct sk_buff *skb)
1049{
1050 return skb->mac_header != ~0U;
1051}
1052
1053static inline void skb_reset_mac_header(struct sk_buff *skb)
1054{
1055 skb->mac_header = skb->data - skb->head;
1056}
1057
1058static inline void skb_set_mac_header(struct sk_buff *skb, const int offset)
1059{
1060 skb_reset_mac_header(skb);
1061 skb->mac_header += offset;
1062}
1063
1064#else /* NET_SKBUFF_DATA_USES_OFFSET */
1065
1066static inline unsigned char *skb_transport_header(const struct sk_buff *skb)
1067{
1068 return skb->transport_header;
1069}
1070
1071static inline void skb_reset_transport_header(struct sk_buff *skb)
1072{
1073 skb->transport_header = skb->data;
1074}
1075
1076static inline void skb_set_transport_header(struct sk_buff *skb,
1077 const int offset)
1078{
1079 skb->transport_header = skb->data + offset;
1080}
1081
1082static inline unsigned char *skb_network_header(const struct sk_buff *skb)
1083{
1084 return skb->network_header;
1085}
1086
1087static inline void skb_reset_network_header(struct sk_buff *skb)
1088{
1089 skb->network_header = skb->data;
1090}
1091
1092static inline void skb_set_network_header(struct sk_buff *skb, const int offset)
1093{
1094 skb->network_header = skb->data + offset;
1095}
1096
1097static inline unsigned char *skb_mac_header(const struct sk_buff *skb)
1098{
1099 return skb->mac_header;
1100}
1101
1102static inline int skb_mac_header_was_set(const struct sk_buff *skb)
1103{
1104 return skb->mac_header != NULL;
1105}
1106
1107static inline void skb_reset_mac_header(struct sk_buff *skb)
1108{
1109 skb->mac_header = skb->data;
1110}
1111
1112static inline void skb_set_mac_header(struct sk_buff *skb, const int offset)
1113{
1114 skb->mac_header = skb->data + offset;
1115}
1116#endif /* NET_SKBUFF_DATA_USES_OFFSET */
1117
1118static inline int skb_transport_offset(const struct sk_buff *skb)
1119{
1120 return skb_transport_header(skb) - skb->data;
1121}
1122
1123static inline u32 skb_network_header_len(const struct sk_buff *skb)
1124{
1125 return skb->transport_header - skb->network_header;
1126}
1127
1128static inline int skb_network_offset(const struct sk_buff *skb)
1129{
1130 return skb_network_header(skb) - skb->data;
1131}
1132
965/* 1133/*
966 * CPUs often take a performance hit when accessing unaligned memory 1134 * CPUs often take a performance hit when accessing unaligned memory
967 * locations. The actual performance hit varies, it can be small if the 1135 * locations. The actual performance hit varies, it can be small if the
@@ -1013,8 +1181,8 @@ static inline void __skb_trim(struct sk_buff *skb, unsigned int len)
1013 WARN_ON(1); 1181 WARN_ON(1);
1014 return; 1182 return;
1015 } 1183 }
1016 skb->len = len; 1184 skb->len = len;
1017 skb->tail = skb->data + len; 1185 skb_set_tail_pointer(skb, len);
1018} 1186}
1019 1187
1020/** 1188/**
@@ -1326,8 +1494,8 @@ extern __wsum skb_checksum(const struct sk_buff *skb, int offset,
1326 int len, __wsum csum); 1494 int len, __wsum csum);
1327extern int skb_copy_bits(const struct sk_buff *skb, int offset, 1495extern int skb_copy_bits(const struct sk_buff *skb, int offset,
1328 void *to, int len); 1496 void *to, int len);
1329extern int skb_store_bits(const struct sk_buff *skb, int offset, 1497extern int skb_store_bits(struct sk_buff *skb, int offset,
1330 void *from, int len); 1498 const void *from, int len);
1331extern __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, 1499extern __wsum skb_copy_and_csum_bits(const struct sk_buff *skb,
1332 int offset, u8 *to, int len, 1500 int offset, u8 *to, int len,
1333 __wsum csum); 1501 __wsum csum);
@@ -1351,8 +1519,36 @@ static inline void *skb_header_pointer(const struct sk_buff *skb, int offset,
1351 return buffer; 1519 return buffer;
1352} 1520}
1353 1521
1522static inline void skb_copy_from_linear_data(const struct sk_buff *skb,
1523 void *to,
1524 const unsigned int len)
1525{
1526 memcpy(to, skb->data, len);
1527}
1528
1529static inline void skb_copy_from_linear_data_offset(const struct sk_buff *skb,
1530 const int offset, void *to,
1531 const unsigned int len)
1532{
1533 memcpy(to, skb->data + offset, len);
1534}
1535
1536static inline void skb_copy_to_linear_data(struct sk_buff *skb,
1537 const void *from,
1538 const unsigned int len)
1539{
1540 memcpy(skb->data, from, len);
1541}
1542
1543static inline void skb_copy_to_linear_data_offset(struct sk_buff *skb,
1544 const int offset,
1545 const void *from,
1546 const unsigned int len)
1547{
1548 memcpy(skb->data + offset, from, len);
1549}
1550
1354extern void skb_init(void); 1551extern void skb_init(void);
1355extern void skb_add_mtu(int mtu);
1356 1552
1357/** 1553/**
1358 * skb_get_timestamp - get timestamp from a skb 1554 * skb_get_timestamp - get timestamp from a skb
@@ -1365,29 +1561,28 @@ extern void skb_add_mtu(int mtu);
1365 */ 1561 */
1366static inline void skb_get_timestamp(const struct sk_buff *skb, struct timeval *stamp) 1562static inline void skb_get_timestamp(const struct sk_buff *skb, struct timeval *stamp)
1367{ 1563{
1368 stamp->tv_sec = skb->tstamp.off_sec; 1564 *stamp = ktime_to_timeval(skb->tstamp);
1369 stamp->tv_usec = skb->tstamp.off_usec;
1370} 1565}
1371 1566
1372/** 1567static inline void __net_timestamp(struct sk_buff *skb)
1373 * skb_set_timestamp - set timestamp of a skb 1568{
1374 * @skb: skb to set stamp of 1569 skb->tstamp = ktime_get_real();
1375 * @stamp: pointer to struct timeval to get stamp from 1570}
1376 * 1571
1377 * Timestamps are stored in the skb as offsets to a base timestamp. 1572static inline ktime_t net_timedelta(ktime_t t)
1378 * This function converts a struct timeval to an offset and stores
1379 * it in the skb.
1380 */
1381static inline void skb_set_timestamp(struct sk_buff *skb, const struct timeval *stamp)
1382{ 1573{
1383 skb->tstamp.off_sec = stamp->tv_sec; 1574 return ktime_sub(ktime_get_real(), t);
1384 skb->tstamp.off_usec = stamp->tv_usec;
1385} 1575}
1386 1576
1387extern void __net_timestamp(struct sk_buff *skb);
1388 1577
1578extern __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len);
1389extern __sum16 __skb_checksum_complete(struct sk_buff *skb); 1579extern __sum16 __skb_checksum_complete(struct sk_buff *skb);
1390 1580
1581static inline int skb_csum_unnecessary(const struct sk_buff *skb)
1582{
1583 return skb->ip_summed & CHECKSUM_UNNECESSARY;
1584}
1585
1391/** 1586/**
1392 * skb_checksum_complete - Calculate checksum of an entire packet 1587 * skb_checksum_complete - Calculate checksum of an entire packet
1393 * @skb: packet to process 1588 * @skb: packet to process
@@ -1406,22 +1601,22 @@ extern __sum16 __skb_checksum_complete(struct sk_buff *skb);
1406 */ 1601 */
1407static inline unsigned int skb_checksum_complete(struct sk_buff *skb) 1602static inline unsigned int skb_checksum_complete(struct sk_buff *skb)
1408{ 1603{
1409 return skb->ip_summed != CHECKSUM_UNNECESSARY && 1604 return skb_csum_unnecessary(skb) ?
1410 __skb_checksum_complete(skb); 1605 0 : __skb_checksum_complete(skb);
1411} 1606}
1412 1607
1413#ifdef CONFIG_NETFILTER 1608#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
1609extern void nf_conntrack_destroy(struct nf_conntrack *nfct);
1414static inline void nf_conntrack_put(struct nf_conntrack *nfct) 1610static inline void nf_conntrack_put(struct nf_conntrack *nfct)
1415{ 1611{
1416 if (nfct && atomic_dec_and_test(&nfct->use)) 1612 if (nfct && atomic_dec_and_test(&nfct->use))
1417 nfct->destroy(nfct); 1613 nf_conntrack_destroy(nfct);
1418} 1614}
1419static inline void nf_conntrack_get(struct nf_conntrack *nfct) 1615static inline void nf_conntrack_get(struct nf_conntrack *nfct)
1420{ 1616{
1421 if (nfct) 1617 if (nfct)
1422 atomic_inc(&nfct->use); 1618 atomic_inc(&nfct->use);
1423} 1619}
1424#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
1425static inline void nf_conntrack_get_reasm(struct sk_buff *skb) 1620static inline void nf_conntrack_get_reasm(struct sk_buff *skb)
1426{ 1621{
1427 if (skb) 1622 if (skb)
@@ -1447,9 +1642,9 @@ static inline void nf_bridge_get(struct nf_bridge_info *nf_bridge)
1447#endif /* CONFIG_BRIDGE_NETFILTER */ 1642#endif /* CONFIG_BRIDGE_NETFILTER */
1448static inline void nf_reset(struct sk_buff *skb) 1643static inline void nf_reset(struct sk_buff *skb)
1449{ 1644{
1645#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
1450 nf_conntrack_put(skb->nfct); 1646 nf_conntrack_put(skb->nfct);
1451 skb->nfct = NULL; 1647 skb->nfct = NULL;
1452#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
1453 nf_conntrack_put_reasm(skb->nfct_reasm); 1648 nf_conntrack_put_reasm(skb->nfct_reasm);
1454 skb->nfct_reasm = NULL; 1649 skb->nfct_reasm = NULL;
1455#endif 1650#endif
@@ -1459,9 +1654,33 @@ static inline void nf_reset(struct sk_buff *skb)
1459#endif 1654#endif
1460} 1655}
1461 1656
1462#else /* CONFIG_NETFILTER */ 1657/* Note: This doesn't put any conntrack and bridge info in dst. */
1463static inline void nf_reset(struct sk_buff *skb) {} 1658static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src)
1464#endif /* CONFIG_NETFILTER */ 1659{
1660#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
1661 dst->nfct = src->nfct;
1662 nf_conntrack_get(src->nfct);
1663 dst->nfctinfo = src->nfctinfo;
1664 dst->nfct_reasm = src->nfct_reasm;
1665 nf_conntrack_get_reasm(src->nfct_reasm);
1666#endif
1667#ifdef CONFIG_BRIDGE_NETFILTER
1668 dst->nf_bridge = src->nf_bridge;
1669 nf_bridge_get(src->nf_bridge);
1670#endif
1671}
1672
1673static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src)
1674{
1675#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
1676 nf_conntrack_put(dst->nfct);
1677 nf_conntrack_put_reasm(dst->nfct_reasm);
1678#endif
1679#ifdef CONFIG_BRIDGE_NETFILTER
1680 nf_bridge_put(dst->nf_bridge);
1681#endif
1682 __nf_copy(dst, src);
1683}
1465 1684
1466#ifdef CONFIG_NETWORK_SECMARK 1685#ifdef CONFIG_NETWORK_SECMARK
1467static inline void skb_copy_secmark(struct sk_buff *to, const struct sk_buff *from) 1686static inline void skb_copy_secmark(struct sk_buff *to, const struct sk_buff *from)
@@ -1486,5 +1705,12 @@ static inline int skb_is_gso(const struct sk_buff *skb)
1486 return skb_shinfo(skb)->gso_size; 1705 return skb_shinfo(skb)->gso_size;
1487} 1706}
1488 1707
1708static inline void skb_forward_csum(struct sk_buff *skb)
1709{
1710 /* Unfortunately we don't support this one. Any brave souls? */
1711 if (skb->ip_summed == CHECKSUM_COMPLETE)
1712 skb->ip_summed = CHECKSUM_NONE;
1713}
1714
1489#endif /* __KERNEL__ */ 1715#endif /* __KERNEL__ */
1490#endif /* _LINUX_SKBUFF_H */ 1716#endif /* _LINUX_SKBUFF_H */
diff --git a/include/linux/socket.h b/include/linux/socket.h
index fcd35a210e7f..6e7c9483a6a6 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -188,7 +188,8 @@ struct ucred {
188#define AF_TIPC 30 /* TIPC sockets */ 188#define AF_TIPC 30 /* TIPC sockets */
189#define AF_BLUETOOTH 31 /* Bluetooth sockets */ 189#define AF_BLUETOOTH 31 /* Bluetooth sockets */
190#define AF_IUCV 32 /* IUCV sockets */ 190#define AF_IUCV 32 /* IUCV sockets */
191#define AF_MAX 33 /* For now.. */ 191#define AF_RXRPC 33 /* RxRPC sockets */
192#define AF_MAX 34 /* For now.. */
192 193
193/* Protocol families, same as address families. */ 194/* Protocol families, same as address families. */
194#define PF_UNSPEC AF_UNSPEC 195#define PF_UNSPEC AF_UNSPEC
@@ -222,6 +223,7 @@ struct ucred {
222#define PF_TIPC AF_TIPC 223#define PF_TIPC AF_TIPC
223#define PF_BLUETOOTH AF_BLUETOOTH 224#define PF_BLUETOOTH AF_BLUETOOTH
224#define PF_IUCV AF_IUCV 225#define PF_IUCV AF_IUCV
226#define PF_RXRPC AF_RXRPC
225#define PF_MAX AF_MAX 227#define PF_MAX AF_MAX
226 228
227/* Maximum queue length specifiable by listen. */ 229/* Maximum queue length specifiable by listen. */
@@ -284,6 +286,7 @@ struct ucred {
284#define SOL_DCCP 269 286#define SOL_DCCP 269
285#define SOL_NETLINK 270 287#define SOL_NETLINK 270
286#define SOL_TIPC 271 288#define SOL_TIPC 271
289#define SOL_RXRPC 272
287 290
288/* IPX options */ 291/* IPX options */
289#define IPX_TYPE 1 292#define IPX_TYPE 1
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 9a8970bf99a6..47f1c53332ce 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -290,6 +290,7 @@ enum
290 NET_CORE_BUDGET=19, 290 NET_CORE_BUDGET=19,
291 NET_CORE_AEVENT_ETIME=20, 291 NET_CORE_AEVENT_ETIME=20,
292 NET_CORE_AEVENT_RSEQTH=21, 292 NET_CORE_AEVENT_RSEQTH=21,
293 NET_CORE_WARNINGS=22,
293}; 294};
294 295
295/* /proc/sys/net/ethernet */ 296/* /proc/sys/net/ethernet */
@@ -438,6 +439,8 @@ enum
438 NET_CIPSOV4_RBM_STRICTVALID=121, 439 NET_CIPSOV4_RBM_STRICTVALID=121,
439 NET_TCP_AVAIL_CONG_CONTROL=122, 440 NET_TCP_AVAIL_CONG_CONTROL=122,
440 NET_TCP_ALLOWED_CONG_CONTROL=123, 441 NET_TCP_ALLOWED_CONG_CONTROL=123,
442 NET_TCP_MAX_SSTHRESH=124,
443 NET_TCP_FRTO_RESPONSE=125,
441}; 444};
442 445
443enum { 446enum {
@@ -789,6 +792,7 @@ enum {
789 NET_BRIDGE_NF_CALL_IPTABLES = 2, 792 NET_BRIDGE_NF_CALL_IPTABLES = 2,
790 NET_BRIDGE_NF_CALL_IP6TABLES = 3, 793 NET_BRIDGE_NF_CALL_IP6TABLES = 3,
791 NET_BRIDGE_NF_FILTER_VLAN_TAGGED = 4, 794 NET_BRIDGE_NF_FILTER_VLAN_TAGGED = 4,
795 NET_BRIDGE_NF_FILTER_PPPOE_TAGGED = 5,
792}; 796};
793 797
794/* CTL_FS names: */ 798/* CTL_FS names: */
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 29d3089038ab..c6b9f92e8289 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -178,6 +178,21 @@ struct tcp_md5sig {
178#include <net/inet_connection_sock.h> 178#include <net/inet_connection_sock.h>
179#include <net/inet_timewait_sock.h> 179#include <net/inet_timewait_sock.h>
180 180
181static inline struct tcphdr *tcp_hdr(const struct sk_buff *skb)
182{
183 return (struct tcphdr *)skb_transport_header(skb);
184}
185
186static inline unsigned int tcp_hdrlen(const struct sk_buff *skb)
187{
188 return tcp_hdr(skb)->doff * 4;
189}
190
191static inline unsigned int tcp_optlen(const struct sk_buff *skb)
192{
193 return (tcp_hdr(skb)->doff - 5) * 4;
194}
195
181/* This defines a selective acknowledgement block. */ 196/* This defines a selective acknowledgement block. */
182struct tcp_sack_block_wire { 197struct tcp_sack_block_wire {
183 __be32 start_seq; 198 __be32 start_seq;
@@ -242,6 +257,8 @@ struct tcp_sock {
242 * See RFC793 and RFC1122. The RFC writes these in capitals. 257 * See RFC793 and RFC1122. The RFC writes these in capitals.
243 */ 258 */
244 u32 rcv_nxt; /* What we want to receive next */ 259 u32 rcv_nxt; /* What we want to receive next */
260 u32 copied_seq; /* Head of yet unread data */
261 u32 rcv_wup; /* rcv_nxt on last window update sent */
245 u32 snd_nxt; /* Next sequence we send */ 262 u32 snd_nxt; /* Next sequence we send */
246 263
247 u32 snd_una; /* First byte we want an ack for */ 264 u32 snd_una; /* First byte we want an ack for */
@@ -300,17 +317,15 @@ struct tcp_sock {
300 u32 snd_ssthresh; /* Slow start size threshold */ 317 u32 snd_ssthresh; /* Slow start size threshold */
301 u32 snd_cwnd; /* Sending congestion window */ 318 u32 snd_cwnd; /* Sending congestion window */
302 u16 snd_cwnd_cnt; /* Linear increase counter */ 319 u16 snd_cwnd_cnt; /* Linear increase counter */
303 u16 snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */ 320 u32 snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */
304 u32 snd_cwnd_used; 321 u32 snd_cwnd_used;
305 u32 snd_cwnd_stamp; 322 u32 snd_cwnd_stamp;
306 323
307 struct sk_buff_head out_of_order_queue; /* Out of order segments go here */ 324 struct sk_buff_head out_of_order_queue; /* Out of order segments go here */
308 325
309 u32 rcv_wnd; /* Current receiver window */ 326 u32 rcv_wnd; /* Current receiver window */
310 u32 rcv_wup; /* rcv_nxt on last window update sent */
311 u32 write_seq; /* Tail(+1) of data held in tcp send buffer */ 327 u32 write_seq; /* Tail(+1) of data held in tcp send buffer */
312 u32 pushed_seq; /* Last pushed seq, required to talk to windows */ 328 u32 pushed_seq; /* Last pushed seq, required to talk to windows */
313 u32 copied_seq; /* Head of yet unread data */
314 329
315/* SACKs data */ 330/* SACKs data */
316 struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */ 331 struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */
diff --git a/include/linux/udp.h b/include/linux/udp.h
index 7e08c07efe0f..6de445c31a64 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -26,6 +26,15 @@ struct udphdr {
26 __sum16 check; 26 __sum16 check;
27}; 27};
28 28
29#ifdef __KERNEL__
30#include <linux/skbuff.h>
31
32static inline struct udphdr *udp_hdr(const struct sk_buff *skb)
33{
34 return (struct udphdr *)skb_transport_header(skb);
35}
36#endif
37
29/* UDP socket options */ 38/* UDP socket options */
30#define UDP_CORK 1 /* Never send partially complete segments */ 39#define UDP_CORK 1 /* Never send partially complete segments */
31#define UDP_ENCAP 100 /* Set the socket to accept encapsulated packets */ 40#define UDP_ENCAP 100 /* Set the socket to accept encapsulated packets */
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 2a7b38d87018..b8abfc74d038 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -191,14 +191,15 @@ int execute_in_process_context(work_func_t fn, struct execute_work *);
191 191
192/* 192/*
193 * Kill off a pending schedule_delayed_work(). Note that the work callback 193 * Kill off a pending schedule_delayed_work(). Note that the work callback
194 * function may still be running on return from cancel_delayed_work(). Run 194 * function may still be running on return from cancel_delayed_work(), unless
195 * flush_scheduled_work() to wait on it. 195 * it returns 1 and the work doesn't re-arm itself. Run flush_workqueue() or
196 * cancel_work_sync() to wait on it.
196 */ 197 */
197static inline int cancel_delayed_work(struct delayed_work *work) 198static inline int cancel_delayed_work(struct delayed_work *work)
198{ 199{
199 int ret; 200 int ret;
200 201
201 ret = del_timer_sync(&work->timer); 202 ret = del_timer(&work->timer);
202 if (ret) 203 if (ret)
203 work_release(&work->work); 204 work_release(&work->work);
204 return ret; 205 return ret;
diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h
index 15ca89e9961b..9c656a5cf842 100644
--- a/include/linux/xfrm.h
+++ b/include/linux/xfrm.h
@@ -181,6 +181,10 @@ enum {
181 XFRM_MSG_MIGRATE, 181 XFRM_MSG_MIGRATE,
182#define XFRM_MSG_MIGRATE XFRM_MSG_MIGRATE 182#define XFRM_MSG_MIGRATE XFRM_MSG_MIGRATE
183 183
184 XFRM_MSG_NEWSADINFO,
185#define XFRM_MSG_NEWSADINFO XFRM_MSG_NEWSADINFO
186 XFRM_MSG_GETSADINFO,
187#define XFRM_MSG_GETSADINFO XFRM_MSG_GETSADINFO
184 __XFRM_MSG_MAX 188 __XFRM_MSG_MAX
185}; 189};
186#define XFRM_MSG_MAX (__XFRM_MSG_MAX - 1) 190#define XFRM_MSG_MAX (__XFRM_MSG_MAX - 1)
@@ -234,6 +238,17 @@ enum xfrm_ae_ftype_t {
234#define XFRM_AE_MAX (__XFRM_AE_MAX - 1) 238#define XFRM_AE_MAX (__XFRM_AE_MAX - 1)
235}; 239};
236 240
241/* SAD Table filter flags */
242enum xfrm_sad_ftype_t {
243 XFRM_SAD_UNSPEC,
244 XFRM_SAD_HMASK=1,
245 XFRM_SAD_HMAX=2,
246 XFRM_SAD_CNT=4,
247 __XFRM_SAD_MAX
248
249#define XFRM_SAD_MAX (__XFRM_SAD_MAX - 1)
250};
251
237struct xfrm_userpolicy_type { 252struct xfrm_userpolicy_type {
238 __u8 type; 253 __u8 type;
239 __u16 reserved1; 254 __u16 reserved1;
@@ -265,6 +280,16 @@ enum xfrm_attr_type_t {
265#define XFRMA_MAX (__XFRMA_MAX - 1) 280#define XFRMA_MAX (__XFRMA_MAX - 1)
266}; 281};
267 282
283enum xfrm_sadattr_type_t {
284 XFRMA_SAD_UNSPEC,
285 XFRMA_SADHMASK,
286 XFRMA_SADHMAX,
287 XFRMA_SADCNT,
288 __XFRMA_SAD_MAX
289
290#define XFRMA_SAD_MAX (__XFRMA_SAD_MAX - 1)
291};
292
268struct xfrm_usersa_info { 293struct xfrm_usersa_info {
269 struct xfrm_selector sel; 294 struct xfrm_selector sel;
270 struct xfrm_id id; 295 struct xfrm_id id;
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 88df8fc814e4..f3531d0bcd05 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -73,7 +73,9 @@ extern int ipv6_get_saddr(struct dst_entry *dst,
73extern int ipv6_dev_get_saddr(struct net_device *dev, 73extern int ipv6_dev_get_saddr(struct net_device *dev,
74 struct in6_addr *daddr, 74 struct in6_addr *daddr,
75 struct in6_addr *saddr); 75 struct in6_addr *saddr);
76extern int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *); 76extern int ipv6_get_lladdr(struct net_device *dev,
77 struct in6_addr *addr,
78 unsigned char banned_flags);
77extern int ipv6_rcv_saddr_equal(const struct sock *sk, 79extern int ipv6_rcv_saddr_equal(const struct sock *sk,
78 const struct sock *sk2); 80 const struct sock *sk2);
79extern void addrconf_join_solict(struct net_device *dev, 81extern void addrconf_join_solict(struct net_device *dev,
diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h
new file mode 100644
index 000000000000..00c2eaa07c25
--- /dev/null
+++ b/include/net/af_rxrpc.h
@@ -0,0 +1,57 @@
1/* RxRPC kernel service interface definitions
2 *
3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#ifndef _NET_RXRPC_H
13#define _NET_RXRPC_H
14
15#ifdef __KERNEL__
16
17#include <linux/rxrpc.h>
18
19struct rxrpc_call;
20
21/*
22 * the mark applied to socket buffers that may be intercepted
23 */
24enum {
25 RXRPC_SKB_MARK_DATA, /* data message */
26 RXRPC_SKB_MARK_FINAL_ACK, /* final ACK received message */
27 RXRPC_SKB_MARK_BUSY, /* server busy message */
28 RXRPC_SKB_MARK_REMOTE_ABORT, /* remote abort message */
29 RXRPC_SKB_MARK_NET_ERROR, /* network error message */
30 RXRPC_SKB_MARK_LOCAL_ERROR, /* local error message */
31 RXRPC_SKB_MARK_NEW_CALL, /* local error message */
32};
33
34typedef void (*rxrpc_interceptor_t)(struct sock *, unsigned long,
35 struct sk_buff *);
36extern void rxrpc_kernel_intercept_rx_messages(struct socket *,
37 rxrpc_interceptor_t);
38extern struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *,
39 struct sockaddr_rxrpc *,
40 struct key *,
41 unsigned long,
42 gfp_t);
43extern int rxrpc_kernel_send_data(struct rxrpc_call *, struct msghdr *,
44 size_t);
45extern void rxrpc_kernel_abort_call(struct rxrpc_call *, u32);
46extern void rxrpc_kernel_end_call(struct rxrpc_call *);
47extern bool rxrpc_kernel_is_data_last(struct sk_buff *);
48extern u32 rxrpc_kernel_get_abort_code(struct sk_buff *);
49extern int rxrpc_kernel_get_error_number(struct sk_buff *);
50extern void rxrpc_kernel_data_delivered(struct sk_buff *);
51extern void rxrpc_kernel_free_skb(struct sk_buff *);
52extern struct rxrpc_call *rxrpc_kernel_accept_call(struct socket *,
53 unsigned long);
54extern int rxrpc_kernel_reject_call(struct socket *);
55
56#endif /* __KERNEL__ */
57#endif /* _NET_RXRPC_H */
diff --git a/include/net/ax25.h b/include/net/ax25.h
index 47ff2f46e908..99a4e364c74a 100644
--- a/include/net/ax25.h
+++ b/include/net/ax25.h
@@ -263,8 +263,8 @@ static __inline__ void ax25_cb_put(ax25_cb *ax25)
263static inline __be16 ax25_type_trans(struct sk_buff *skb, struct net_device *dev) 263static inline __be16 ax25_type_trans(struct sk_buff *skb, struct net_device *dev)
264{ 264{
265 skb->dev = dev; 265 skb->dev = dev;
266 skb_reset_mac_header(skb);
266 skb->pkt_type = PACKET_HOST; 267 skb->pkt_type = PACKET_HOST;
267 skb->mac.raw = skb->data;
268 return htons(ETH_P_AX25); 268 return htons(ETH_P_AX25);
269} 269}
270 270
diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 41456c148842..93ce272a5d27 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -709,6 +709,24 @@ struct hci_sco_hdr {
709 __u8 dlen; 709 __u8 dlen;
710} __attribute__ ((packed)); 710} __attribute__ ((packed));
711 711
712#ifdef __KERNEL__
713#include <linux/skbuff.h>
714static inline struct hci_event_hdr *hci_event_hdr(const struct sk_buff *skb)
715{
716 return (struct hci_event_hdr *)skb->data;
717}
718
719static inline struct hci_acl_hdr *hci_acl_hdr(const struct sk_buff *skb)
720{
721 return (struct hci_acl_hdr *)skb->data;
722}
723
724static inline struct hci_sco_hdr *hci_sco_hdr(const struct sk_buff *skb)
725{
726 return (struct hci_sco_hdr *)skb->data;
727}
728#endif
729
712/* Command opcode pack/unpack */ 730/* Command opcode pack/unpack */
713#define hci_opcode_pack(ogf, ocf) (__u16) ((ocf & 0x03ff)|(ogf << 10)) 731#define hci_opcode_pack(ogf, ocf) (__u16) ((ocf & 0x03ff)|(ogf << 10))
714#define hci_opcode_ogf(op) (op >> 10) 732#define hci_opcode_ogf(op) (op >> 10)
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
new file mode 100644
index 000000000000..88171f8ce58a
--- /dev/null
+++ b/include/net/cfg80211.h
@@ -0,0 +1,40 @@
1#ifndef __NET_CFG80211_H
2#define __NET_CFG80211_H
3
4#include <linux/netlink.h>
5#include <linux/skbuff.h>
6#include <net/genetlink.h>
7
8/*
9 * 802.11 configuration in-kernel interface
10 *
11 * Copyright 2006 Johannes Berg <johannes@sipsolutions.net>
12 */
13
14/* from net/wireless.h */
15struct wiphy;
16
17/**
18 * struct cfg80211_ops - backend description for wireless configuration
19 *
20 * This struct is registered by fullmac card drivers and/or wireless stacks
21 * in order to handle configuration requests on their interfaces.
22 *
23 * All callbacks except where otherwise noted should return 0
24 * on success or a negative error code.
25 *
26 * All operations are currently invoked under rtnl for consistency with the
27 * wireless extensions but this is subject to reevaluation as soon as this
28 * code is used more widely and we have a first user without wext.
29 *
30 * @add_virtual_intf: create a new virtual interface with the given name
31 *
32 * @del_virtual_intf: remove the virtual interface determined by ifindex.
33 */
34struct cfg80211_ops {
35 int (*add_virtual_intf)(struct wiphy *wiphy, char *name,
36 unsigned int type);
37 int (*del_virtual_intf)(struct wiphy *wiphy, int ifindex);
38};
39
40#endif /* __NET_CFG80211_H */
diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h
index 4c9522c5178f..4f90f5554fac 100644
--- a/include/net/cipso_ipv4.h
+++ b/include/net/cipso_ipv4.h
@@ -120,7 +120,7 @@ extern int cipso_v4_rbm_strictvalid;
120 */ 120 */
121 121
122#define CIPSO_V4_OPTEXIST(x) (IPCB(x)->opt.cipso != 0) 122#define CIPSO_V4_OPTEXIST(x) (IPCB(x)->opt.cipso != 0)
123#define CIPSO_V4_OPTPTR(x) ((x)->nh.raw + IPCB(x)->opt.cipso) 123#define CIPSO_V4_OPTPTR(x) (skb_network_header(x) + IPCB(x)->opt.cipso)
124 124
125/* 125/*
126 * DOI List Functions 126 * DOI List Functions
diff --git a/include/net/compat.h b/include/net/compat.h
index 9859b60280d5..406db242f73a 100644
--- a/include/net/compat.h
+++ b/include/net/compat.h
@@ -25,6 +25,7 @@ struct compat_cmsghdr {
25}; 25};
26 26
27extern int compat_sock_get_timestamp(struct sock *, struct timeval __user *); 27extern int compat_sock_get_timestamp(struct sock *, struct timeval __user *);
28extern int compat_sock_get_timestampns(struct sock *, struct timespec __user *);
28 29
29#else /* defined(CONFIG_COMPAT) */ 30#else /* defined(CONFIG_COMPAT) */
30#define compat_msghdr msghdr /* to avoid compiler warnings */ 31#define compat_msghdr msghdr /* to avoid compiler warnings */
diff --git a/include/net/dn_fib.h b/include/net/dn_fib.h
index f01626cbbed6..30125119c950 100644
--- a/include/net/dn_fib.h
+++ b/include/net/dn_fib.h
@@ -148,17 +148,8 @@ extern void dn_fib_rules_cleanup(void);
148extern unsigned dnet_addr_type(__le16 addr); 148extern unsigned dnet_addr_type(__le16 addr);
149extern int dn_fib_lookup(struct flowi *fl, struct dn_fib_res *res); 149extern int dn_fib_lookup(struct flowi *fl, struct dn_fib_res *res);
150 150
151/*
152 * rtnetlink interface
153 */
154extern int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg);
155extern int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg);
156extern int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb); 151extern int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb);
157 152
158extern int dn_fib_rtm_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg);
159extern int dn_fib_rtm_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg);
160extern int dn_fib_dump_rules(struct sk_buff *skb, struct netlink_callback *cb);
161
162extern void dn_fib_free_info(struct dn_fib_info *fi); 153extern void dn_fib_free_info(struct dn_fib_info *fi);
163 154
164static inline void dn_fib_info_put(struct dn_fib_info *fi) 155static inline void dn_fib_info_put(struct dn_fib_info *fi)
diff --git a/include/net/dn_route.h b/include/net/dn_route.h
index a566944c4962..c10e8e7e59a7 100644
--- a/include/net/dn_route.h
+++ b/include/net/dn_route.h
@@ -18,7 +18,6 @@
18extern struct sk_buff *dn_alloc_skb(struct sock *sk, int size, gfp_t pri); 18extern struct sk_buff *dn_alloc_skb(struct sock *sk, int size, gfp_t pri);
19extern int dn_route_output_sock(struct dst_entry **pprt, struct flowi *, struct sock *sk, int flags); 19extern int dn_route_output_sock(struct dst_entry **pprt, struct flowi *, struct sock *sk, int flags);
20extern int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb); 20extern int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb);
21extern int dn_cache_getroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg);
22extern void dn_rt_cache_flush(int delay); 21extern void dn_rt_cache_flush(int delay);
23 22
24/* Masks for flags field */ 23/* Masks for flags field */
diff --git a/include/net/esp.h b/include/net/esp.h
index 713d039f4af7..d05d8d2c78f4 100644
--- a/include/net/esp.h
+++ b/include/net/esp.h
@@ -40,8 +40,6 @@ struct esp_data
40 } auth; 40 } auth;
41}; 41};
42 42
43extern int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len);
44extern int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer);
45extern void *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len); 43extern void *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len);
46 44
47static inline int esp_mac_digest(struct esp_data *esp, struct sk_buff *skb, 45static inline int esp_mac_digest(struct esp_data *esp, struct sk_buff *skb,
diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index d585ea9fa97d..ed3a8872c6ca 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -5,7 +5,7 @@
5#include <linux/netdevice.h> 5#include <linux/netdevice.h>
6#include <linux/fib_rules.h> 6#include <linux/fib_rules.h>
7#include <net/flow.h> 7#include <net/flow.h>
8#include <net/netlink.h> 8#include <net/rtnetlink.h>
9 9
10struct fib_rule 10struct fib_rule
11{ 11{
@@ -19,6 +19,8 @@ struct fib_rule
19 u32 flags; 19 u32 flags;
20 u32 table; 20 u32 table;
21 u8 action; 21 u8 action;
22 u32 target;
23 struct fib_rule * ctarget;
22 struct rcu_head rcu; 24 struct rcu_head rcu;
23}; 25};
24 26
@@ -35,6 +37,8 @@ struct fib_rules_ops
35 struct list_head list; 37 struct list_head list;
36 int rule_size; 38 int rule_size;
37 int addr_size; 39 int addr_size;
40 int unresolved_rules;
41 int nr_goto_rules;
38 42
39 int (*action)(struct fib_rule *, 43 int (*action)(struct fib_rule *,
40 struct flowi *, int, 44 struct flowi *, int,
@@ -55,6 +59,10 @@ struct fib_rules_ops
55 u32 (*default_pref)(void); 59 u32 (*default_pref)(void);
56 size_t (*nlmsg_payload)(struct fib_rule *); 60 size_t (*nlmsg_payload)(struct fib_rule *);
57 61
62 /* Called after modifications to the rules set, must flush
63 * the route cache if one exists. */
64 void (*flush_cache)(void);
65
58 int nlgroup; 66 int nlgroup;
59 struct nla_policy *policy; 67 struct nla_policy *policy;
60 struct list_head *rules_list; 68 struct list_head *rules_list;
@@ -66,7 +74,8 @@ struct fib_rules_ops
66 [FRA_PRIORITY] = { .type = NLA_U32 }, \ 74 [FRA_PRIORITY] = { .type = NLA_U32 }, \
67 [FRA_FWMARK] = { .type = NLA_U32 }, \ 75 [FRA_FWMARK] = { .type = NLA_U32 }, \
68 [FRA_FWMASK] = { .type = NLA_U32 }, \ 76 [FRA_FWMASK] = { .type = NLA_U32 }, \
69 [FRA_TABLE] = { .type = NLA_U32 } 77 [FRA_TABLE] = { .type = NLA_U32 }, \
78 [FRA_GOTO] = { .type = NLA_U32 }
70 79
71static inline void fib_rule_get(struct fib_rule *rule) 80static inline void fib_rule_get(struct fib_rule *rule)
72{ 81{
@@ -98,11 +107,4 @@ extern int fib_rules_unregister(struct fib_rules_ops *);
98extern int fib_rules_lookup(struct fib_rules_ops *, 107extern int fib_rules_lookup(struct fib_rules_ops *,
99 struct flowi *, int flags, 108 struct flowi *, int flags,
100 struct fib_lookup_arg *); 109 struct fib_lookup_arg *);
101
102extern int fib_nl_newrule(struct sk_buff *,
103 struct nlmsghdr *, void *);
104extern int fib_nl_delrule(struct sk_buff *,
105 struct nlmsghdr *, void *);
106extern int fib_rules_dump(struct sk_buff *,
107 struct netlink_callback *, int);
108#endif 110#endif
diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h
index c28e424f53d9..668056b4bb0b 100644
--- a/include/net/inet6_hashtables.h
+++ b/include/net/inet6_hashtables.h
@@ -19,6 +19,9 @@
19#include <linux/in6.h> 19#include <linux/in6.h>
20#include <linux/ipv6.h> 20#include <linux/ipv6.h>
21#include <linux/types.h> 21#include <linux/types.h>
22#include <linux/jhash.h>
23
24#include <net/inet_sock.h>
22 25
23#include <net/ipv6.h> 26#include <net/ipv6.h>
24 27
@@ -28,12 +31,11 @@ struct inet_hashinfo;
28static inline unsigned int inet6_ehashfn(const struct in6_addr *laddr, const u16 lport, 31static inline unsigned int inet6_ehashfn(const struct in6_addr *laddr, const u16 lport,
29 const struct in6_addr *faddr, const __be16 fport) 32 const struct in6_addr *faddr, const __be16 fport)
30{ 33{
31 unsigned int hashent = (lport ^ (__force u16)fport); 34 u32 ports = (lport ^ (__force u16)fport);
32 35
33 hashent ^= (__force u32)(laddr->s6_addr32[3] ^ faddr->s6_addr32[3]); 36 return jhash_3words((__force u32)laddr->s6_addr32[3],
34 hashent ^= hashent >> 16; 37 (__force u32)faddr->s6_addr32[3],
35 hashent ^= hashent >> 8; 38 ports, inet_ehash_secret);
36 return hashent;
37} 39}
38 40
39static inline int inet6_sk_ehashfn(const struct sock *sk) 41static inline int inet6_sk_ehashfn(const struct sock *sk)
diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h
index 10117c8503e8..de8399a79774 100644
--- a/include/net/inet_ecn.h
+++ b/include/net/inet_ecn.h
@@ -114,13 +114,13 @@ static inline int INET_ECN_set_ce(struct sk_buff *skb)
114{ 114{
115 switch (skb->protocol) { 115 switch (skb->protocol) {
116 case __constant_htons(ETH_P_IP): 116 case __constant_htons(ETH_P_IP):
117 if (skb->nh.raw + sizeof(struct iphdr) <= skb->tail) 117 if (skb->network_header + sizeof(struct iphdr) <= skb->tail)
118 return IP_ECN_set_ce(skb->nh.iph); 118 return IP_ECN_set_ce(ip_hdr(skb));
119 break; 119 break;
120 120
121 case __constant_htons(ETH_P_IPV6): 121 case __constant_htons(ETH_P_IPV6):
122 if (skb->nh.raw + sizeof(struct ipv6hdr) <= skb->tail) 122 if (skb->network_header + sizeof(struct ipv6hdr) <= skb->tail)
123 return IP6_ECN_set_ce(skb->nh.ipv6h); 123 return IP6_ECN_set_ce(ipv6_hdr(skb));
124 break; 124 break;
125 } 125 }
126 126
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index ce6da97bc848..62daf214931f 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -19,6 +19,7 @@
19 19
20#include <linux/string.h> 20#include <linux/string.h>
21#include <linux/types.h> 21#include <linux/types.h>
22#include <linux/jhash.h>
22 23
23#include <net/flow.h> 24#include <net/flow.h>
24#include <net/sock.h> 25#include <net/sock.h>
@@ -167,13 +168,15 @@ static inline void inet_sk_copy_descendant(struct sock *sk_to,
167 168
168extern int inet_sk_rebuild_header(struct sock *sk); 169extern int inet_sk_rebuild_header(struct sock *sk);
169 170
171extern u32 inet_ehash_secret;
172extern void build_ehash_secret(void);
173
170static inline unsigned int inet_ehashfn(const __be32 laddr, const __u16 lport, 174static inline unsigned int inet_ehashfn(const __be32 laddr, const __u16 lport,
171 const __be32 faddr, const __be16 fport) 175 const __be32 faddr, const __be16 fport)
172{ 176{
173 unsigned int h = ((__force __u32)laddr ^ lport) ^ ((__force __u32)faddr ^ (__force __u32)fport); 177 return jhash_2words((__force __u32) laddr ^ (__force __u32) faddr,
174 h ^= h >> 16; 178 ((__u32) lport) << 16 | (__force __u32)fport,
175 h ^= h >> 8; 179 inet_ehash_secret);
176 return h;
177} 180}
178 181
179static inline int inet_sk_ehashfn(const struct sock *sk) 182static inline int inet_sk_ehashfn(const struct sock *sk)
diff --git a/include/net/ip.h b/include/net/ip.h
index e79c3e3aa4f6..bb207db03675 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -25,6 +25,7 @@
25#include <linux/types.h> 25#include <linux/types.h>
26#include <linux/ip.h> 26#include <linux/ip.h>
27#include <linux/in.h> 27#include <linux/in.h>
28#include <linux/skbuff.h>
28 29
29#include <net/inet_sock.h> 30#include <net/inet_sock.h>
30#include <net/snmp.h> 31#include <net/snmp.h>
@@ -43,6 +44,11 @@ struct inet_skb_parm
43#define IPSKB_REROUTED 16 44#define IPSKB_REROUTED 16
44}; 45};
45 46
47static inline unsigned int ip_hdrlen(const struct sk_buff *skb)
48{
49 return ip_hdr(skb)->ihl * 4;
50}
51
46struct ipcm_cookie 52struct ipcm_cookie
47{ 53{
48 __be32 addr; 54 __be32 addr;
@@ -74,7 +80,6 @@ struct msghdr;
74struct net_device; 80struct net_device;
75struct packet_type; 81struct packet_type;
76struct rtable; 82struct rtable;
77struct sk_buff;
78struct sockaddr; 83struct sockaddr;
79 84
80extern void ip_mc_dropsocket(struct sock *); 85extern void ip_mc_dropsocket(struct sock *);
@@ -161,6 +166,10 @@ DECLARE_SNMP_STAT(struct linux_mib, net_statistics);
161#define NET_ADD_STATS_BH(field, adnd) SNMP_ADD_STATS_BH(net_statistics, field, adnd) 166#define NET_ADD_STATS_BH(field, adnd) SNMP_ADD_STATS_BH(net_statistics, field, adnd)
162#define NET_ADD_STATS_USER(field, adnd) SNMP_ADD_STATS_USER(net_statistics, field, adnd) 167#define NET_ADD_STATS_USER(field, adnd) SNMP_ADD_STATS_USER(net_statistics, field, adnd)
163 168
169extern unsigned long snmp_fold_field(void *mib[], int offt);
170extern int snmp_mib_init(void *ptr[2], size_t mibsize, size_t mibalign);
171extern void snmp_mib_free(void *ptr[2]);
172
164extern int sysctl_local_port_range[2]; 173extern int sysctl_local_port_range[2];
165extern int sysctl_ip_default_ttl; 174extern int sysctl_ip_default_ttl;
166extern int sysctl_ip_nonlocal_bind; 175extern int sysctl_ip_nonlocal_bind;
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index cf355a3c2ad5..c48ea873f1e0 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -219,8 +219,6 @@ extern void fib6_init(void);
219 219
220extern void fib6_rules_init(void); 220extern void fib6_rules_init(void);
221extern void fib6_rules_cleanup(void); 221extern void fib6_rules_cleanup(void);
222extern int fib6_rules_dump(struct sk_buff *,
223 struct netlink_callback *);
224 222
225#endif 223#endif
226#endif 224#endif
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 4e927ebd1cb3..5456fdd6d047 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -116,12 +116,7 @@ extern void rt6_pmtu_discovery(struct in6_addr *daddr,
116 struct net_device *dev, 116 struct net_device *dev,
117 u32 pmtu); 117 u32 pmtu);
118 118
119struct nlmsghdr;
120struct netlink_callback; 119struct netlink_callback;
121extern int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb);
122extern int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg);
123extern int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg);
124extern int inet6_rtm_getroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg);
125 120
126struct rt6_rtnl_dump_arg 121struct rt6_rtnl_dump_arg
127{ 122{
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 36c635ca1aa6..5a4a0366c24f 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -215,10 +215,6 @@ extern void fib_select_default(const struct flowi *flp, struct fib_result *res);
215/* Exported by fib_frontend.c */ 215/* Exported by fib_frontend.c */
216extern struct nla_policy rtm_ipv4_policy[]; 216extern struct nla_policy rtm_ipv4_policy[];
217extern void ip_fib_init(void); 217extern void ip_fib_init(void);
218extern int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg);
219extern int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg);
220extern int inet_rtm_getroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg);
221extern int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb);
222extern int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, 218extern int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
223 struct net_device *dev, __be32 *spec_dst, u32 *itag); 219 struct net_device *dev, __be32 *spec_dst, u32 *itag);
224extern void fib_select_multipath(const struct flowi *flp, struct fib_result *res); 220extern void fib_select_multipath(const struct flowi *flp, struct fib_result *res);
@@ -235,8 +231,6 @@ extern __be32 __fib_res_prefsrc(struct fib_result *res);
235extern struct fib_table *fib_hash_init(u32 id); 231extern struct fib_table *fib_hash_init(u32 id);
236 232
237#ifdef CONFIG_IP_MULTIPLE_TABLES 233#ifdef CONFIG_IP_MULTIPLE_TABLES
238extern int fib4_rules_dump(struct sk_buff *skb, struct netlink_callback *cb);
239
240extern void __init fib4_rules_init(void); 234extern void __init fib4_rules_init(void);
241 235
242#ifdef CONFIG_NET_CLS_ROUTE 236#ifdef CONFIG_NET_CLS_ROUTE
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 00328b71a08c..f70afef9c3cc 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -166,13 +166,6 @@ DECLARE_SNMP_STAT(struct udp_mib, udplite_stats_in6);
166 if (is_udplite) SNMP_INC_STATS_USER(udplite_stats_in6, field); \ 166 if (is_udplite) SNMP_INC_STATS_USER(udplite_stats_in6, field); \
167 else SNMP_INC_STATS_USER(udp_stats_in6, field); } while(0) 167 else SNMP_INC_STATS_USER(udp_stats_in6, field); } while(0)
168 168
169int snmp6_register_dev(struct inet6_dev *idev);
170int snmp6_unregister_dev(struct inet6_dev *idev);
171int snmp6_alloc_dev(struct inet6_dev *idev);
172int snmp6_free_dev(struct inet6_dev *idev);
173int snmp6_mib_init(void *ptr[2], size_t mibsize, size_t mibalign);
174void snmp6_mib_free(void *ptr[2]);
175
176struct ip6_ra_chain 169struct ip6_ra_chain
177{ 170{
178 struct ip6_ra_chain *next; 171 struct ip6_ra_chain *next;
@@ -605,8 +598,20 @@ extern int udplite6_proc_init(void);
605extern void udplite6_proc_exit(void); 598extern void udplite6_proc_exit(void);
606extern int ipv6_misc_proc_init(void); 599extern int ipv6_misc_proc_init(void);
607extern void ipv6_misc_proc_exit(void); 600extern void ipv6_misc_proc_exit(void);
601extern int snmp6_register_dev(struct inet6_dev *idev);
602extern int snmp6_unregister_dev(struct inet6_dev *idev);
608 603
609extern struct rt6_statistics rt6_stats; 604extern struct rt6_statistics rt6_stats;
605#else
606static inline int snmp6_register_dev(struct inet6_dev *idev)
607{
608 return 0;
609}
610
611static inline int snmp6_unregister_dev(struct inet6_dev *idev)
612{
613 return 0;
614}
610#endif 615#endif
611 616
612#ifdef CONFIG_SYSCTL 617#ifdef CONFIG_SYSCTL
diff --git a/include/net/ipx.h b/include/net/ipx.h
index c6b2ee610866..4cc0b4eca948 100644
--- a/include/net/ipx.h
+++ b/include/net/ipx.h
@@ -43,7 +43,7 @@ struct ipxhdr {
43 43
44static __inline__ struct ipxhdr *ipx_hdr(struct sk_buff *skb) 44static __inline__ struct ipxhdr *ipx_hdr(struct sk_buff *skb)
45{ 45{
46 return (struct ipxhdr *)skb->h.raw; 46 return (struct ipxhdr *)skb_transport_header(skb);
47} 47}
48 48
49struct ipx_interface { 49struct ipx_interface {
diff --git a/include/net/iw_handler.h b/include/net/iw_handler.h
index 8a830188354d..f23d07ca7c59 100644
--- a/include/net/iw_handler.h
+++ b/include/net/iw_handler.h
@@ -431,26 +431,7 @@ struct iw_public_data {
431 * Those may be called only within the kernel. 431 * Those may be called only within the kernel.
432 */ 432 */
433 433
434/* First : function strictly used inside the kernel */ 434/* functions that may be called by driver modules */
435
436/* Handle /proc/net/wireless, called in net/code/dev.c */
437extern int dev_get_wireless_info(char * buffer, char **start, off_t offset,
438 int length);
439
440/* Handle IOCTLs, called in net/core/dev.c */
441extern int wireless_process_ioctl(struct ifreq *ifr, unsigned int cmd);
442
443/* Handle RtNetlink requests, called in net/core/rtnetlink.c */
444extern int wireless_rtnetlink_set(struct net_device * dev,
445 char * data,
446 int len);
447extern int wireless_rtnetlink_get(struct net_device * dev,
448 char * data,
449 int len,
450 char ** p_buf,
451 int * p_len);
452
453/* Second : functions that may be called by driver modules */
454 435
455/* Send a single event to user space */ 436/* Send a single event to user space */
456extern void wireless_send_event(struct net_device * dev, 437extern void wireless_send_event(struct net_device * dev,
diff --git a/include/net/llc_pdu.h b/include/net/llc_pdu.h
index aa33a477c3fb..4a8f58b17e43 100644
--- a/include/net/llc_pdu.h
+++ b/include/net/llc_pdu.h
@@ -203,7 +203,7 @@ struct llc_pdu_sn {
203 203
204static inline struct llc_pdu_sn *llc_pdu_sn_hdr(struct sk_buff *skb) 204static inline struct llc_pdu_sn *llc_pdu_sn_hdr(struct sk_buff *skb)
205{ 205{
206 return (struct llc_pdu_sn *)skb->nh.raw; 206 return (struct llc_pdu_sn *)skb_network_header(skb);
207} 207}
208 208
209/* Un-numbered PDU format (3 bytes in length) */ 209/* Un-numbered PDU format (3 bytes in length) */
@@ -215,12 +215,7 @@ struct llc_pdu_un {
215 215
216static inline struct llc_pdu_un *llc_pdu_un_hdr(struct sk_buff *skb) 216static inline struct llc_pdu_un *llc_pdu_un_hdr(struct sk_buff *skb)
217{ 217{
218 return (struct llc_pdu_un *)skb->nh.raw; 218 return (struct llc_pdu_un *)skb_network_header(skb);
219}
220
221static inline void *llc_set_pdu_hdr(struct sk_buff *skb, void *ptr)
222{
223 return skb->nh.raw = ptr;
224} 219}
225 220
226/** 221/**
@@ -237,7 +232,11 @@ static inline void llc_pdu_header_init(struct sk_buff *skb, u8 type,
237 u8 ssap, u8 dsap, u8 cr) 232 u8 ssap, u8 dsap, u8 cr)
238{ 233{
239 const int hlen = type == LLC_PDU_TYPE_U ? 3 : 4; 234 const int hlen = type == LLC_PDU_TYPE_U ? 3 : 4;
240 struct llc_pdu_un *pdu = llc_set_pdu_hdr(skb, skb_push(skb, hlen)); 235 struct llc_pdu_un *pdu;
236
237 skb_push(skb, hlen);
238 skb_reset_network_header(skb);
239 pdu = llc_pdu_un_hdr(skb);
241 pdu->dsap = dsap; 240 pdu->dsap = dsap;
242 pdu->ssap = ssap; 241 pdu->ssap = ssap;
243 pdu->ssap |= cr; 242 pdu->ssap |= cr;
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index ad7fe1121412..a4f26187fc1a 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -24,6 +24,7 @@
24 24
25#include <linux/err.h> 25#include <linux/err.h>
26#include <linux/sysctl.h> 26#include <linux/sysctl.h>
27#include <net/rtnetlink.h>
27 28
28#define NUD_IN_TIMER (NUD_INCOMPLETE|NUD_REACHABLE|NUD_DELAY|NUD_PROBE) 29#define NUD_IN_TIMER (NUD_INCOMPLETE|NUD_REACHABLE|NUD_DELAY|NUD_PROBE)
29#define NUD_VALID (NUD_PERMANENT|NUD_NOARP|NUD_REACHABLE|NUD_PROBE|NUD_STALE|NUD_DELAY) 30#define NUD_VALID (NUD_PERMANENT|NUD_NOARP|NUD_REACHABLE|NUD_PROBE|NUD_STALE|NUD_DELAY)
@@ -213,16 +214,7 @@ extern void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
213extern struct pneigh_entry *pneigh_lookup(struct neigh_table *tbl, const void *key, struct net_device *dev, int creat); 214extern struct pneigh_entry *pneigh_lookup(struct neigh_table *tbl, const void *key, struct net_device *dev, int creat);
214extern int pneigh_delete(struct neigh_table *tbl, const void *key, struct net_device *dev); 215extern int pneigh_delete(struct neigh_table *tbl, const void *key, struct net_device *dev);
215 216
216struct netlink_callback;
217struct nlmsghdr;
218extern int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb);
219extern int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg);
220extern int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg);
221extern void neigh_app_ns(struct neighbour *n); 217extern void neigh_app_ns(struct neighbour *n);
222
223extern int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb);
224extern int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg);
225
226extern void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie); 218extern void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie);
227extern void __neigh_for_each_release(struct neigh_table *tbl, int (*cb)(struct neighbour *)); 219extern void __neigh_for_each_release(struct neigh_table *tbl, int (*cb)(struct neighbour *));
228extern void pneigh_for_each(struct neigh_table *tbl, void (*cb)(struct pneigh_entry *)); 220extern void pneigh_for_each(struct neigh_table *tbl, void (*cb)(struct pneigh_entry *));
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 0e690e34c00b..1c6b8bd09b9a 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -250,6 +250,11 @@ static inline int nf_ct_is_dying(struct nf_conn *ct)
250 return test_bit(IPS_DYING_BIT, &ct->status); 250 return test_bit(IPS_DYING_BIT, &ct->status);
251} 251}
252 252
253static inline int nf_ct_is_untracked(const struct sk_buff *skb)
254{
255 return (skb->nfct == &nf_conntrack_untracked.ct_general);
256}
257
253extern unsigned int nf_conntrack_htable_size; 258extern unsigned int nf_conntrack_htable_size;
254extern int nf_conntrack_checksum; 259extern int nf_conntrack_checksum;
255extern atomic_t nf_conntrack_count; 260extern atomic_t nf_conntrack_count;
diff --git a/include/net/netfilter/nf_conntrack_compat.h b/include/net/netfilter/nf_conntrack_compat.h
deleted file mode 100644
index 6f84c1f7fcd4..000000000000
--- a/include/net/netfilter/nf_conntrack_compat.h
+++ /dev/null
@@ -1,145 +0,0 @@
1#ifndef _NF_CONNTRACK_COMPAT_H
2#define _NF_CONNTRACK_COMPAT_H
3
4#ifdef __KERNEL__
5
6#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
7
8#include <linux/netfilter_ipv4/ip_conntrack.h>
9#include <linux/socket.h>
10
11#ifdef CONFIG_IP_NF_CONNTRACK_MARK
12static inline u_int32_t *nf_ct_get_mark(const struct sk_buff *skb,
13 u_int32_t *ctinfo)
14{
15 struct ip_conntrack *ct = ip_conntrack_get(skb, ctinfo);
16
17 if (ct)
18 return &ct->mark;
19 else
20 return NULL;
21}
22#endif /* CONFIG_IP_NF_CONNTRACK_MARK */
23
24#ifdef CONFIG_IP_NF_CONNTRACK_SECMARK
25static inline u_int32_t *nf_ct_get_secmark(const struct sk_buff *skb,
26 u_int32_t *ctinfo)
27{
28 struct ip_conntrack *ct = ip_conntrack_get(skb, ctinfo);
29
30 if (ct)
31 return &ct->secmark;
32 else
33 return NULL;
34}
35#endif /* CONFIG_IP_NF_CONNTRACK_SECMARK */
36
37#ifdef CONFIG_IP_NF_CT_ACCT
38static inline struct ip_conntrack_counter *
39nf_ct_get_counters(const struct sk_buff *skb)
40{
41 enum ip_conntrack_info ctinfo;
42 struct ip_conntrack *ct = ip_conntrack_get(skb, &ctinfo);
43
44 if (ct)
45 return ct->counters;
46 else
47 return NULL;
48}
49#endif /* CONFIG_IP_NF_CT_ACCT */
50
51static inline int nf_ct_is_untracked(const struct sk_buff *skb)
52{
53 return (skb->nfct == &ip_conntrack_untracked.ct_general);
54}
55
56static inline void nf_ct_untrack(struct sk_buff *skb)
57{
58 skb->nfct = &ip_conntrack_untracked.ct_general;
59}
60
61static inline int nf_ct_get_ctinfo(const struct sk_buff *skb,
62 enum ip_conntrack_info *ctinfo)
63{
64 struct ip_conntrack *ct = ip_conntrack_get(skb, ctinfo);
65 return (ct != NULL);
66}
67
68static inline int nf_ct_l3proto_try_module_get(unsigned short l3proto)
69{
70 need_conntrack();
71 return l3proto == PF_INET ? 0 : -1;
72}
73
74static inline void nf_ct_l3proto_module_put(unsigned short l3proto)
75{
76}
77
78#else /* CONFIG_IP_NF_CONNTRACK */
79
80#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
81#include <net/netfilter/nf_conntrack.h>
82
83#ifdef CONFIG_NF_CONNTRACK_MARK
84
85static inline u_int32_t *nf_ct_get_mark(const struct sk_buff *skb,
86 u_int32_t *ctinfo)
87{
88 struct nf_conn *ct = nf_ct_get(skb, ctinfo);
89
90 if (ct)
91 return &ct->mark;
92 else
93 return NULL;
94}
95#endif /* CONFIG_NF_CONNTRACK_MARK */
96
97#ifdef CONFIG_NF_CONNTRACK_SECMARK
98static inline u_int32_t *nf_ct_get_secmark(const struct sk_buff *skb,
99 u_int32_t *ctinfo)
100{
101 struct nf_conn *ct = nf_ct_get(skb, ctinfo);
102
103 if (ct)
104 return &ct->secmark;
105 else
106 return NULL;
107}
108#endif /* CONFIG_NF_CONNTRACK_MARK */
109
110#ifdef CONFIG_NF_CT_ACCT
111static inline struct ip_conntrack_counter *
112nf_ct_get_counters(const struct sk_buff *skb)
113{
114 enum ip_conntrack_info ctinfo;
115 struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
116
117 if (ct)
118 return ct->counters;
119 else
120 return NULL;
121}
122#endif /* CONFIG_NF_CT_ACCT */
123
124static inline int nf_ct_is_untracked(const struct sk_buff *skb)
125{
126 return (skb->nfct == &nf_conntrack_untracked.ct_general);
127}
128
129static inline void nf_ct_untrack(struct sk_buff *skb)
130{
131 skb->nfct = &nf_conntrack_untracked.ct_general;
132}
133
134static inline int nf_ct_get_ctinfo(const struct sk_buff *skb,
135 enum ip_conntrack_info *ctinfo)
136{
137 struct nf_conn *ct = nf_ct_get(skb, ctinfo);
138 return (ct != NULL);
139}
140
141#endif /* CONFIG_IP_NF_CONNTRACK */
142
143#endif /* __KERNEL__ */
144
145#endif /* _NF_CONNTRACK_COMPAT_H */
diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index 85634e1865c3..9fb906688ffa 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -27,6 +27,9 @@ extern unsigned int nf_conntrack_in(int pf,
27extern int nf_conntrack_init(void); 27extern int nf_conntrack_init(void);
28extern void nf_conntrack_cleanup(void); 28extern void nf_conntrack_cleanup(void);
29 29
30extern int nf_conntrack_proto_init(void);
31extern void nf_conntrack_proto_fini(void);
32
30struct nf_conntrack_l3proto; 33struct nf_conntrack_l3proto;
31extern struct nf_conntrack_l3proto *nf_ct_find_l3proto(u_int16_t pf); 34extern struct nf_conntrack_l3proto *nf_ct_find_l3proto(u_int16_t pf);
32/* Like above, but you already have conntrack read lock. */ 35/* Like above, but you already have conntrack read lock. */
diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h
index b62a8a9ec9d8..811c9073c532 100644
--- a/include/net/netfilter/nf_conntrack_ecache.h
+++ b/include/net/netfilter/nf_conntrack_ecache.h
@@ -20,30 +20,8 @@ DECLARE_PER_CPU(struct nf_conntrack_ecache, nf_conntrack_ecache);
20#define CONNTRACK_ECACHE(x) (__get_cpu_var(nf_conntrack_ecache).x) 20#define CONNTRACK_ECACHE(x) (__get_cpu_var(nf_conntrack_ecache).x)
21 21
22extern struct atomic_notifier_head nf_conntrack_chain; 22extern struct atomic_notifier_head nf_conntrack_chain;
23extern struct atomic_notifier_head nf_conntrack_expect_chain; 23extern int nf_conntrack_register_notifier(struct notifier_block *nb);
24 24extern int nf_conntrack_unregister_notifier(struct notifier_block *nb);
25static inline int nf_conntrack_register_notifier(struct notifier_block *nb)
26{
27 return atomic_notifier_chain_register(&nf_conntrack_chain, nb);
28}
29
30static inline int nf_conntrack_unregister_notifier(struct notifier_block *nb)
31{
32 return atomic_notifier_chain_unregister(&nf_conntrack_chain, nb);
33}
34
35static inline int
36nf_conntrack_expect_register_notifier(struct notifier_block *nb)
37{
38 return atomic_notifier_chain_register(&nf_conntrack_expect_chain, nb);
39}
40
41static inline int
42nf_conntrack_expect_unregister_notifier(struct notifier_block *nb)
43{
44 return atomic_notifier_chain_unregister(&nf_conntrack_expect_chain,
45 nb);
46}
47 25
48extern void nf_ct_deliver_cached_events(const struct nf_conn *ct); 26extern void nf_ct_deliver_cached_events(const struct nf_conn *ct);
49extern void __nf_ct_event_cache_init(struct nf_conn *ct); 27extern void __nf_ct_event_cache_init(struct nf_conn *ct);
@@ -71,6 +49,10 @@ static inline void nf_conntrack_event(enum ip_conntrack_events event,
71 atomic_notifier_call_chain(&nf_conntrack_chain, event, ct); 49 atomic_notifier_call_chain(&nf_conntrack_chain, event, ct);
72} 50}
73 51
52extern struct atomic_notifier_head nf_conntrack_expect_chain;
53extern int nf_conntrack_expect_register_notifier(struct notifier_block *nb);
54extern int nf_conntrack_expect_unregister_notifier(struct notifier_block *nb);
55
74static inline void 56static inline void
75nf_conntrack_expect_event(enum ip_conntrack_expect_events event, 57nf_conntrack_expect_event(enum ip_conntrack_expect_events event,
76 struct nf_conntrack_expect *exp) 58 struct nf_conntrack_expect *exp)
diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h
index eb575cbd4c95..f32f714e5d92 100644
--- a/include/net/netfilter/nf_conntrack_l3proto.h
+++ b/include/net/netfilter/nf_conntrack_l3proto.h
@@ -90,10 +90,7 @@ extern struct nf_conntrack_l3proto *nf_ct_l3protos[AF_MAX];
90/* Protocol registration. */ 90/* Protocol registration. */
91extern int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto); 91extern int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto);
92extern void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto); 92extern void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto);
93 93extern struct nf_conntrack_l3proto *nf_ct_l3proto_find_get(u_int16_t l3proto);
94extern struct nf_conntrack_l3proto *
95nf_ct_l3proto_find_get(u_int16_t l3proto);
96
97extern void nf_ct_l3proto_put(struct nf_conntrack_l3proto *p); 94extern void nf_ct_l3proto_put(struct nf_conntrack_l3proto *p);
98 95
99/* Existing built-in protocols */ 96/* Existing built-in protocols */
diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index 8415182ec126..f46cb930414c 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -97,7 +97,6 @@ extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6;
97extern struct nf_conntrack_l4proto nf_conntrack_l4proto_generic; 97extern struct nf_conntrack_l4proto nf_conntrack_l4proto_generic;
98 98
99#define MAX_NF_CT_PROTO 256 99#define MAX_NF_CT_PROTO 256
100extern struct nf_conntrack_l4proto **nf_ct_protos[PF_MAX];
101 100
102extern struct nf_conntrack_l4proto * 101extern struct nf_conntrack_l4proto *
103__nf_ct_l4proto_find(u_int16_t l3proto, u_int8_t l4proto); 102__nf_ct_l4proto_find(u_int16_t l3proto, u_int8_t l4proto);
diff --git a/include/net/netfilter/nf_nat_rule.h b/include/net/netfilter/nf_nat_rule.h
index f191c672bcc6..e76565459ad9 100644
--- a/include/net/netfilter/nf_nat_rule.h
+++ b/include/net/netfilter/nf_nat_rule.h
@@ -4,16 +4,6 @@
4#include <net/netfilter/nf_nat.h> 4#include <net/netfilter/nf_nat.h>
5#include <linux/netfilter_ipv4/ip_tables.h> 5#include <linux/netfilter_ipv4/ip_tables.h>
6 6
7/* Compatibility definitions for ipt_FOO modules */
8#define ip_nat_range nf_nat_range
9#define ip_conntrack_tuple nf_conntrack_tuple
10#define ip_conntrack_get nf_ct_get
11#define ip_conntrack nf_conn
12#define ip_nat_setup_info nf_nat_setup_info
13#define ip_nat_multi_range_compat nf_nat_multi_range_compat
14#define ip_ct_iterate_cleanup nf_ct_iterate_cleanup
15#define IP_NF_ASSERT NF_CT_ASSERT
16
17extern int nf_nat_rule_init(void) __init; 7extern int nf_nat_rule_init(void) __init;
18extern void nf_nat_rule_cleanup(void); 8extern void nf_nat_rule_cleanup(void);
19extern int nf_nat_rule_find(struct sk_buff **pskb, 9extern int nf_nat_rule_find(struct sk_buff **pskb,
diff --git a/include/net/netlink.h b/include/net/netlink.h
index bcaf67b7a19d..0bf325c29aff 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -171,6 +171,7 @@ enum {
171 NLA_MSECS, 171 NLA_MSECS,
172 NLA_NESTED, 172 NLA_NESTED,
173 NLA_NUL_STRING, 173 NLA_NUL_STRING,
174 NLA_BINARY,
174 __NLA_TYPE_MAX, 175 __NLA_TYPE_MAX,
175}; 176};
176 177
@@ -188,12 +189,13 @@ enum {
188 * NLA_STRING Maximum length of string 189 * NLA_STRING Maximum length of string
189 * NLA_NUL_STRING Maximum length of string (excluding NUL) 190 * NLA_NUL_STRING Maximum length of string (excluding NUL)
190 * NLA_FLAG Unused 191 * NLA_FLAG Unused
192 * NLA_BINARY Maximum length of attribute payload
191 * All other Exact length of attribute payload 193 * All other Exact length of attribute payload
192 * 194 *
193 * Example: 195 * Example:
194 * static struct nla_policy my_policy[ATTR_MAX+1] __read_mostly = { 196 * static struct nla_policy my_policy[ATTR_MAX+1] __read_mostly = {
195 * [ATTR_FOO] = { .type = NLA_U16 }, 197 * [ATTR_FOO] = { .type = NLA_U16 },
196 * [ATTR_BAR] = { .type = NLA_STRING, len = BARSIZ }, 198 * [ATTR_BAR] = { .type = NLA_STRING, .len = BARSIZ },
197 * [ATTR_BAZ] = { .len = sizeof(struct mystruct) }, 199 * [ATTR_BAZ] = { .len = sizeof(struct mystruct) },
198 * }; 200 * };
199 */ 201 */
@@ -214,9 +216,7 @@ struct nl_info {
214 216
215extern void netlink_run_queue(struct sock *sk, unsigned int *qlen, 217extern void netlink_run_queue(struct sock *sk, unsigned int *qlen,
216 int (*cb)(struct sk_buff *, 218 int (*cb)(struct sk_buff *,
217 struct nlmsghdr *, int *)); 219 struct nlmsghdr *));
218extern void netlink_queue_skip(struct nlmsghdr *nlh,
219 struct sk_buff *skb);
220extern int nlmsg_notify(struct sock *sk, struct sk_buff *skb, 220extern int nlmsg_notify(struct sock *sk, struct sk_buff *skb,
221 u32 pid, unsigned int group, int report, 221 u32 pid, unsigned int group, int report,
222 gfp_t flags); 222 gfp_t flags);
@@ -525,7 +525,7 @@ static inline struct sk_buff *nlmsg_new(size_t payload, gfp_t flags)
525 */ 525 */
526static inline int nlmsg_end(struct sk_buff *skb, struct nlmsghdr *nlh) 526static inline int nlmsg_end(struct sk_buff *skb, struct nlmsghdr *nlh)
527{ 527{
528 nlh->nlmsg_len = skb->tail - (unsigned char *) nlh; 528 nlh->nlmsg_len = skb_tail_pointer(skb) - (unsigned char *)nlh;
529 529
530 return skb->len; 530 return skb->len;
531} 531}
@@ -538,7 +538,7 @@ static inline int nlmsg_end(struct sk_buff *skb, struct nlmsghdr *nlh)
538 */ 538 */
539static inline void *nlmsg_get_pos(struct sk_buff *skb) 539static inline void *nlmsg_get_pos(struct sk_buff *skb)
540{ 540{
541 return skb->tail; 541 return skb_tail_pointer(skb);
542} 542}
543 543
544/** 544/**
@@ -548,7 +548,7 @@ static inline void *nlmsg_get_pos(struct sk_buff *skb)
548 * 548 *
549 * Trims the message to the provided mark. Returns -1. 549 * Trims the message to the provided mark. Returns -1.
550 */ 550 */
551static inline int nlmsg_trim(struct sk_buff *skb, void *mark) 551static inline int nlmsg_trim(struct sk_buff *skb, const void *mark)
552{ 552{
553 if (mark) 553 if (mark)
554 skb_trim(skb, (unsigned char *) mark - skb->data); 554 skb_trim(skb, (unsigned char *) mark - skb->data);
@@ -940,7 +940,7 @@ static inline unsigned long nla_get_msecs(struct nlattr *nla)
940 */ 940 */
941static inline struct nlattr *nla_nest_start(struct sk_buff *skb, int attrtype) 941static inline struct nlattr *nla_nest_start(struct sk_buff *skb, int attrtype)
942{ 942{
943 struct nlattr *start = (struct nlattr *) skb->tail; 943 struct nlattr *start = (struct nlattr *)skb_tail_pointer(skb);
944 944
945 if (nla_put(skb, attrtype, 0, NULL) < 0) 945 if (nla_put(skb, attrtype, 0, NULL) < 0)
946 return NULL; 946 return NULL;
@@ -960,7 +960,7 @@ static inline struct nlattr *nla_nest_start(struct sk_buff *skb, int attrtype)
960 */ 960 */
961static inline int nla_nest_end(struct sk_buff *skb, struct nlattr *start) 961static inline int nla_nest_end(struct sk_buff *skb, struct nlattr *start)
962{ 962{
963 start->nla_len = skb->tail - (unsigned char *) start; 963 start->nla_len = skb_tail_pointer(skb) - (unsigned char *)start;
964 return skb->len; 964 return skb->len;
965} 965}
966 966
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 02647fe3d74b..4129df708079 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -326,18 +326,18 @@ static inline unsigned char * tcf_get_base_ptr(struct sk_buff *skb, int layer)
326 case TCF_LAYER_LINK: 326 case TCF_LAYER_LINK:
327 return skb->data; 327 return skb->data;
328 case TCF_LAYER_NETWORK: 328 case TCF_LAYER_NETWORK:
329 return skb->nh.raw; 329 return skb_network_header(skb);
330 case TCF_LAYER_TRANSPORT: 330 case TCF_LAYER_TRANSPORT:
331 return skb->h.raw; 331 return skb_transport_header(skb);
332 } 332 }
333 333
334 return NULL; 334 return NULL;
335} 335}
336 336
337static inline int tcf_valid_offset(struct sk_buff *skb, unsigned char *ptr, 337static inline int tcf_valid_offset(const struct sk_buff *skb,
338 int len) 338 const unsigned char *ptr, const int len)
339{ 339{
340 return unlikely((ptr + len) < skb->tail && ptr > skb->head); 340 return unlikely((ptr + len) < skb_tail_pointer(skb) && ptr > skb->head);
341} 341}
342 342
343#ifdef CONFIG_NET_CLS_IND 343#ifdef CONFIG_NET_CLS_IND
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index f6afee73235d..5754d53d9efc 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -2,6 +2,7 @@
2#define __NET_PKT_SCHED_H 2#define __NET_PKT_SCHED_H
3 3
4#include <linux/jiffies.h> 4#include <linux/jiffies.h>
5#include <linux/ktime.h>
5#include <net/sch_generic.h> 6#include <net/sch_generic.h>
6 7
7struct qdisc_walker 8struct qdisc_walker
@@ -12,8 +13,6 @@ struct qdisc_walker
12 int (*fn)(struct Qdisc *, unsigned long cl, struct qdisc_walker *); 13 int (*fn)(struct Qdisc *, unsigned long cl, struct qdisc_walker *);
13}; 14};
14 15
15extern rwlock_t qdisc_tree_lock;
16
17#define QDISC_ALIGNTO 32 16#define QDISC_ALIGNTO 32
18#define QDISC_ALIGN(len) (((len) + QDISC_ALIGNTO-1) & ~(QDISC_ALIGNTO-1)) 17#define QDISC_ALIGN(len) (((len) + QDISC_ALIGNTO-1) & ~(QDISC_ALIGNTO-1))
19 18
@@ -37,175 +36,38 @@ static inline void *qdisc_priv(struct Qdisc *q)
37 The things are not so bad, because we may use artifical 36 The things are not so bad, because we may use artifical
38 clock evaluated by integration of network data flow 37 clock evaluated by integration of network data flow
39 in the most critical places. 38 in the most critical places.
40
41 Note: we do not use fastgettimeofday.
42 The reason is that, when it is not the same thing as
43 gettimeofday, it returns invalid timestamp, which is
44 not updated, when net_bh is active.
45 */
46
47/* General note about internal clock.
48
49 Any clock source returns time intervals, measured in units
50 close to 1usec. With source CONFIG_NET_SCH_CLK_GETTIMEOFDAY it is precisely
51 microseconds, otherwise something close but different chosen to minimize
52 arithmetic cost. Ratio usec/internal untis in form nominator/denominator
53 may be read from /proc/net/psched.
54 */ 39 */
55 40
56
57#ifdef CONFIG_NET_SCH_CLK_GETTIMEOFDAY
58
59typedef struct timeval psched_time_t;
60typedef long psched_tdiff_t;
61
62#define PSCHED_GET_TIME(stamp) do_gettimeofday(&(stamp))
63#define PSCHED_US2JIFFIE(usecs) usecs_to_jiffies(usecs)
64#define PSCHED_JIFFIE2US(delay) jiffies_to_usecs(delay)
65
66#else /* !CONFIG_NET_SCH_CLK_GETTIMEOFDAY */
67
68typedef u64 psched_time_t; 41typedef u64 psched_time_t;
69typedef long psched_tdiff_t; 42typedef long psched_tdiff_t;
70 43
71#ifdef CONFIG_NET_SCH_CLK_JIFFIES 44/* Avoid doing 64 bit divide by 1000 */
72 45#define PSCHED_US2NS(x) ((s64)(x) << 10)
73#if HZ < 96 46#define PSCHED_NS2US(x) ((x) >> 10)
74#define PSCHED_JSCALE 14
75#elif HZ >= 96 && HZ < 192
76#define PSCHED_JSCALE 13
77#elif HZ >= 192 && HZ < 384
78#define PSCHED_JSCALE 12
79#elif HZ >= 384 && HZ < 768
80#define PSCHED_JSCALE 11
81#elif HZ >= 768
82#define PSCHED_JSCALE 10
83#endif
84 47
85#define PSCHED_GET_TIME(stamp) ((stamp) = (get_jiffies_64()<<PSCHED_JSCALE)) 48#define PSCHED_TICKS_PER_SEC PSCHED_NS2US(NSEC_PER_SEC)
86#define PSCHED_US2JIFFIE(delay) (((delay)+(1<<PSCHED_JSCALE)-1)>>PSCHED_JSCALE) 49#define PSCHED_PASTPERFECT 0
87#define PSCHED_JIFFIE2US(delay) ((delay)<<PSCHED_JSCALE) 50
88 51static inline psched_time_t psched_get_time(void)
89#endif /* CONFIG_NET_SCH_CLK_JIFFIES */
90#ifdef CONFIG_NET_SCH_CLK_CPU
91#include <asm/timex.h>
92
93extern psched_tdiff_t psched_clock_per_hz;
94extern int psched_clock_scale;
95extern psched_time_t psched_time_base;
96extern cycles_t psched_time_mark;
97
98#define PSCHED_GET_TIME(stamp) \
99do { \
100 cycles_t cur = get_cycles(); \
101 if (sizeof(cycles_t) == sizeof(u32)) { \
102 if (cur <= psched_time_mark) \
103 psched_time_base += 0x100000000ULL; \
104 psched_time_mark = cur; \
105 (stamp) = (psched_time_base + cur)>>psched_clock_scale; \
106 } else { \
107 (stamp) = cur>>psched_clock_scale; \
108 } \
109} while (0)
110#define PSCHED_US2JIFFIE(delay) (((delay)+psched_clock_per_hz-1)/psched_clock_per_hz)
111#define PSCHED_JIFFIE2US(delay) ((delay)*psched_clock_per_hz)
112
113#endif /* CONFIG_NET_SCH_CLK_CPU */
114
115#endif /* !CONFIG_NET_SCH_CLK_GETTIMEOFDAY */
116
117#ifdef CONFIG_NET_SCH_CLK_GETTIMEOFDAY
118#define PSCHED_TDIFF(tv1, tv2) \
119({ \
120 int __delta_sec = (tv1).tv_sec - (tv2).tv_sec; \
121 int __delta = (tv1).tv_usec - (tv2).tv_usec; \
122 if (__delta_sec) { \
123 switch (__delta_sec) { \
124 default: \
125 __delta = 0; \
126 case 2: \
127 __delta += USEC_PER_SEC; \
128 case 1: \
129 __delta += USEC_PER_SEC; \
130 } \
131 } \
132 __delta; \
133})
134
135static inline int
136psched_tod_diff(int delta_sec, int bound)
137{ 52{
138 int delta; 53 return PSCHED_NS2US(ktime_to_ns(ktime_get()));
139
140 if (bound <= USEC_PER_SEC || delta_sec > (0x7FFFFFFF/USEC_PER_SEC)-1)
141 return bound;
142 delta = delta_sec * USEC_PER_SEC;
143 if (delta > bound || delta < 0)
144 delta = bound;
145 return delta;
146} 54}
147 55
148#define PSCHED_TDIFF_SAFE(tv1, tv2, bound) \ 56static inline psched_tdiff_t
149({ \ 57psched_tdiff_bounded(psched_time_t tv1, psched_time_t tv2, psched_time_t bound)
150 int __delta_sec = (tv1).tv_sec - (tv2).tv_sec; \ 58{
151 int __delta = (tv1).tv_usec - (tv2).tv_usec; \ 59 return min(tv1 - tv2, bound);
152 switch (__delta_sec) { \ 60}
153 default: \
154 __delta = psched_tod_diff(__delta_sec, bound); break; \
155 case 2: \
156 __delta += USEC_PER_SEC; \
157 case 1: \
158 __delta += USEC_PER_SEC; \
159 case 0: \
160 if (__delta > bound || __delta < 0) \
161 __delta = bound; \
162 } \
163 __delta; \
164})
165
166#define PSCHED_TLESS(tv1, tv2) (((tv1).tv_usec < (tv2).tv_usec && \
167 (tv1).tv_sec <= (tv2).tv_sec) || \
168 (tv1).tv_sec < (tv2).tv_sec)
169
170#define PSCHED_TADD2(tv, delta, tv_res) \
171({ \
172 int __delta = (tv).tv_usec + (delta); \
173 (tv_res).tv_sec = (tv).tv_sec; \
174 while (__delta >= USEC_PER_SEC) { (tv_res).tv_sec++; __delta -= USEC_PER_SEC; } \
175 (tv_res).tv_usec = __delta; \
176})
177
178#define PSCHED_TADD(tv, delta) \
179({ \
180 (tv).tv_usec += (delta); \
181 while ((tv).tv_usec >= USEC_PER_SEC) { (tv).tv_sec++; \
182 (tv).tv_usec -= USEC_PER_SEC; } \
183})
184
185/* Set/check that time is in the "past perfect";
186 it depends on concrete representation of system time
187 */
188
189#define PSCHED_SET_PASTPERFECT(t) ((t).tv_sec = 0)
190#define PSCHED_IS_PASTPERFECT(t) ((t).tv_sec == 0)
191
192#define PSCHED_AUDIT_TDIFF(t) ({ if ((t) > 2000000) (t) = 2000000; })
193
194#else /* !CONFIG_NET_SCH_CLK_GETTIMEOFDAY */
195
196#define PSCHED_TDIFF(tv1, tv2) (long)((tv1) - (tv2))
197#define PSCHED_TDIFF_SAFE(tv1, tv2, bound) \
198 min_t(long long, (tv1) - (tv2), bound)
199
200 61
201#define PSCHED_TLESS(tv1, tv2) ((tv1) < (tv2)) 62struct qdisc_watchdog {
202#define PSCHED_TADD2(tv, delta, tv_res) ((tv_res) = (tv) + (delta)) 63 struct hrtimer timer;
203#define PSCHED_TADD(tv, delta) ((tv) += (delta)) 64 struct Qdisc *qdisc;
204#define PSCHED_SET_PASTPERFECT(t) ((t) = 0) 65};
205#define PSCHED_IS_PASTPERFECT(t) ((t) == 0)
206#define PSCHED_AUDIT_TDIFF(t)
207 66
208#endif /* !CONFIG_NET_SCH_CLK_GETTIMEOFDAY */ 67extern void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc);
68extern void qdisc_watchdog_schedule(struct qdisc_watchdog *wd,
69 psched_time_t expires);
70extern void qdisc_watchdog_cancel(struct qdisc_watchdog *wd);
209 71
210extern struct Qdisc_ops pfifo_qdisc_ops; 72extern struct Qdisc_ops pfifo_qdisc_ops;
211extern struct Qdisc_ops bfifo_qdisc_ops; 73extern struct Qdisc_ops bfifo_qdisc_ops;
diff --git a/include/net/red.h b/include/net/red.h
index a4eb37946f2c..3cf31d466a81 100644
--- a/include/net/red.h
+++ b/include/net/red.h
@@ -151,17 +151,17 @@ static inline void red_set_parms(struct red_parms *p,
151 151
152static inline int red_is_idling(struct red_parms *p) 152static inline int red_is_idling(struct red_parms *p)
153{ 153{
154 return !PSCHED_IS_PASTPERFECT(p->qidlestart); 154 return p->qidlestart != PSCHED_PASTPERFECT;
155} 155}
156 156
157static inline void red_start_of_idle_period(struct red_parms *p) 157static inline void red_start_of_idle_period(struct red_parms *p)
158{ 158{
159 PSCHED_GET_TIME(p->qidlestart); 159 p->qidlestart = psched_get_time();
160} 160}
161 161
162static inline void red_end_of_idle_period(struct red_parms *p) 162static inline void red_end_of_idle_period(struct red_parms *p)
163{ 163{
164 PSCHED_SET_PASTPERFECT(p->qidlestart); 164 p->qidlestart = PSCHED_PASTPERFECT;
165} 165}
166 166
167static inline void red_restart(struct red_parms *p) 167static inline void red_restart(struct red_parms *p)
@@ -177,8 +177,8 @@ static inline unsigned long red_calc_qavg_from_idle_time(struct red_parms *p)
177 long us_idle; 177 long us_idle;
178 int shift; 178 int shift;
179 179
180 PSCHED_GET_TIME(now); 180 now = psched_get_time();
181 us_idle = PSCHED_TDIFF_SAFE(now, p->qidlestart, p->Scell_max); 181 us_idle = psched_tdiff_bounded(now, p->qidlestart, p->Scell_max);
182 182
183 /* 183 /*
184 * The problem: ideally, average length queue recalcultion should 184 * The problem: ideally, average length queue recalcultion should
diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
new file mode 100644
index 000000000000..3b3d4745618d
--- /dev/null
+++ b/include/net/rtnetlink.h
@@ -0,0 +1,25 @@
1#ifndef __NET_RTNETLINK_H
2#define __NET_RTNETLINK_H
3
4#include <linux/rtnetlink.h>
5#include <net/netlink.h>
6
7typedef int (*rtnl_doit_func)(struct sk_buff *, struct nlmsghdr *, void *);
8typedef int (*rtnl_dumpit_func)(struct sk_buff *, struct netlink_callback *);
9
10extern int __rtnl_register(int protocol, int msgtype,
11 rtnl_doit_func, rtnl_dumpit_func);
12extern void rtnl_register(int protocol, int msgtype,
13 rtnl_doit_func, rtnl_dumpit_func);
14extern int rtnl_unregister(int protocol, int msgtype);
15extern void rtnl_unregister_all(int protocol);
16
17static inline int rtnl_msg_family(struct nlmsghdr *nlh)
18{
19 if (nlmsg_len(nlh) >= sizeof(struct rtgenmsg))
20 return ((struct rtgenmsg *) nlmsg_data(nlh))->rtgen_family;
21 else
22 return AF_UNSPEC;
23}
24
25#endif
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 82086392735a..1b8e35197ebe 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -5,10 +5,10 @@
5#include <linux/types.h> 5#include <linux/types.h>
6#include <linux/rcupdate.h> 6#include <linux/rcupdate.h>
7#include <linux/module.h> 7#include <linux/module.h>
8#include <linux/rtnetlink.h>
9#include <linux/pkt_sched.h> 8#include <linux/pkt_sched.h>
10#include <linux/pkt_cls.h> 9#include <linux/pkt_cls.h>
11#include <net/gen_stats.h> 10#include <net/gen_stats.h>
11#include <net/rtnetlink.h>
12 12
13struct Qdisc_ops; 13struct Qdisc_ops;
14struct qdisc_walker; 14struct qdisc_walker;
@@ -177,14 +177,8 @@ extern void qdisc_tree_decrease_qlen(struct Qdisc *qdisc, unsigned int n);
177extern struct Qdisc *qdisc_alloc(struct net_device *dev, struct Qdisc_ops *ops); 177extern struct Qdisc *qdisc_alloc(struct net_device *dev, struct Qdisc_ops *ops);
178extern struct Qdisc *qdisc_create_dflt(struct net_device *dev, 178extern struct Qdisc *qdisc_create_dflt(struct net_device *dev,
179 struct Qdisc_ops *ops, u32 parentid); 179 struct Qdisc_ops *ops, u32 parentid);
180 180extern void tcf_destroy(struct tcf_proto *tp);
181static inline void 181extern void tcf_destroy_chain(struct tcf_proto *fl);
182tcf_destroy(struct tcf_proto *tp)
183{
184 tp->ops->destroy(tp);
185 module_put(tp->ops->owner);
186 kfree(tp);
187}
188 182
189static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch, 183static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch,
190 struct sk_buff_head *list) 184 struct sk_buff_head *list)
diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h
index 5ddb85599863..bb37724495a5 100644
--- a/include/net/sctp/constants.h
+++ b/include/net/sctp/constants.h
@@ -283,7 +283,7 @@ enum { SCTP_MAX_GABS = 16 };
283#define SCTP_RTO_BETA 2 /* 1/4 when converted to right shifts. */ 283#define SCTP_RTO_BETA 2 /* 1/4 when converted to right shifts. */
284 284
285/* Maximum number of new data packets that can be sent in a burst. */ 285/* Maximum number of new data packets that can be sent in a burst. */
286#define SCTP_MAX_BURST 4 286#define SCTP_DEFAULT_MAX_BURST 4
287 287
288#define SCTP_CLOCK_GRANULARITY 1 /* 1 jiffy */ 288#define SCTP_CLOCK_GRANULARITY 1 /* 1 jiffy */
289 289
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index f431acf3dcea..7b4fff93ba7f 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -276,6 +276,7 @@ struct sctp_sock {
276 __u32 default_context; 276 __u32 default_context;
277 __u32 default_timetolive; 277 __u32 default_timetolive;
278 __u32 default_rcv_context; 278 __u32 default_rcv_context;
279 int max_burst;
279 280
280 /* Heartbeat interval: The endpoint sends out a Heartbeat chunk to 281 /* Heartbeat interval: The endpoint sends out a Heartbeat chunk to
281 * the destination address every heartbeat interval. This value 282 * the destination address every heartbeat interval. This value
@@ -304,10 +305,12 @@ struct sctp_sock {
304 __u32 autoclose; 305 __u32 autoclose;
305 __u8 nodelay; 306 __u8 nodelay;
306 __u8 disable_fragments; 307 __u8 disable_fragments;
307 __u8 pd_mode;
308 __u8 v4mapped; 308 __u8 v4mapped;
309 __u8 frag_interleave;
309 __u32 adaptation_ind; 310 __u32 adaptation_ind;
311 __u32 pd_point;
310 312
313 atomic_t pd_mode;
311 /* Receive to here while partial delivery is in effect. */ 314 /* Receive to here while partial delivery is in effect. */
312 struct sk_buff_head pd_lobby; 315 struct sk_buff_head pd_lobby;
313}; 316};
diff --git a/include/net/sctp/ulpevent.h b/include/net/sctp/ulpevent.h
index 2923e3d31a08..de88ed5b0ba6 100644
--- a/include/net/sctp/ulpevent.h
+++ b/include/net/sctp/ulpevent.h
@@ -89,6 +89,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_assoc_change(
89 __u16 error, 89 __u16 error,
90 __u16 outbound, 90 __u16 outbound,
91 __u16 inbound, 91 __u16 inbound,
92 struct sctp_chunk *chunk,
92 gfp_t gfp); 93 gfp_t gfp);
93 94
94struct sctp_ulpevent *sctp_ulpevent_make_peer_addr_change( 95struct sctp_ulpevent *sctp_ulpevent_make_peer_addr_change(
diff --git a/include/net/sctp/ulpqueue.h b/include/net/sctp/ulpqueue.h
index ab26ab3adae1..39ea3f442b47 100644
--- a/include/net/sctp/ulpqueue.h
+++ b/include/net/sctp/ulpqueue.h
@@ -78,7 +78,7 @@ void sctp_ulpq_partial_delivery(struct sctp_ulpq *, struct sctp_chunk *, gfp_t);
78void sctp_ulpq_abort_pd(struct sctp_ulpq *, gfp_t); 78void sctp_ulpq_abort_pd(struct sctp_ulpq *, gfp_t);
79 79
80/* Clear the partial data delivery condition on this socket. */ 80/* Clear the partial data delivery condition on this socket. */
81int sctp_clear_pd(struct sock *sk); 81int sctp_clear_pd(struct sock *sk, struct sctp_association *asoc);
82 82
83/* Skip over an SSN. */ 83/* Skip over an SSN. */
84void sctp_ulpq_skip(struct sctp_ulpq *ulpq, __u16 sid, __u16 ssn); 84void sctp_ulpq_skip(struct sctp_ulpq *ulpq, __u16 sid, __u16 ssn);
diff --git a/include/net/sctp/user.h b/include/net/sctp/user.h
index 67a30eb2b3a4..6d2b57758cca 100644
--- a/include/net/sctp/user.h
+++ b/include/net/sctp/user.h
@@ -97,6 +97,12 @@ enum sctp_optname {
97#define SCTP_DELAYED_ACK_TIME SCTP_DELAYED_ACK_TIME 97#define SCTP_DELAYED_ACK_TIME SCTP_DELAYED_ACK_TIME
98 SCTP_CONTEXT, /* Receive Context */ 98 SCTP_CONTEXT, /* Receive Context */
99#define SCTP_CONTEXT SCTP_CONTEXT 99#define SCTP_CONTEXT SCTP_CONTEXT
100 SCTP_FRAGMENT_INTERLEAVE,
101#define SCTP_FRAGMENT_INTERLEAVE SCTP_FRAGMENT_INTERLEAVE
102 SCTP_PARTIAL_DELIVERY_POINT, /* Set/Get partial delivery point */
103#define SCTP_PARTIAL_DELIVERY_POINT SCTP_PARTIAL_DELIVERY_POINT
104 SCTP_MAX_BURST, /* Set/Get max burst */
105#define SCTP_MAX_BURST SCTP_MAX_BURST
100 106
101 /* Internal Socket Options. Some of the sctp library functions are 107 /* Internal Socket Options. Some of the sctp library functions are
102 * implemented using these socket options. 108 * implemented using these socket options.
@@ -213,6 +219,7 @@ struct sctp_assoc_change {
213 __u16 sac_outbound_streams; 219 __u16 sac_outbound_streams;
214 __u16 sac_inbound_streams; 220 __u16 sac_inbound_streams;
215 sctp_assoc_t sac_assoc_id; 221 sctp_assoc_t sac_assoc_id;
222 __u8 sac_info[0];
216}; 223};
217 224
218/* 225/*
@@ -261,6 +268,7 @@ enum sctp_spc_state {
261 SCTP_ADDR_REMOVED, 268 SCTP_ADDR_REMOVED,
262 SCTP_ADDR_ADDED, 269 SCTP_ADDR_ADDED,
263 SCTP_ADDR_MADE_PRIM, 270 SCTP_ADDR_MADE_PRIM,
271 SCTP_ADDR_CONFIRMED,
264}; 272};
265 273
266 274
@@ -508,16 +516,17 @@ struct sctp_setadaptation {
508 * address's parameters: 516 * address's parameters:
509 */ 517 */
510enum sctp_spp_flags { 518enum sctp_spp_flags {
511 SPP_HB_ENABLE = 1, /*Enable heartbeats*/ 519 SPP_HB_ENABLE = 1<<0, /*Enable heartbeats*/
512 SPP_HB_DISABLE = 2, /*Disable heartbeats*/ 520 SPP_HB_DISABLE = 1<<1, /*Disable heartbeats*/
513 SPP_HB = SPP_HB_ENABLE | SPP_HB_DISABLE, 521 SPP_HB = SPP_HB_ENABLE | SPP_HB_DISABLE,
514 SPP_HB_DEMAND = 4, /*Send heartbeat immediately*/ 522 SPP_HB_DEMAND = 1<<2, /*Send heartbeat immediately*/
515 SPP_PMTUD_ENABLE = 8, /*Enable PMTU discovery*/ 523 SPP_PMTUD_ENABLE = 1<<3, /*Enable PMTU discovery*/
516 SPP_PMTUD_DISABLE = 16, /*Disable PMTU discovery*/ 524 SPP_PMTUD_DISABLE = 1<<4, /*Disable PMTU discovery*/
517 SPP_PMTUD = SPP_PMTUD_ENABLE | SPP_PMTUD_DISABLE, 525 SPP_PMTUD = SPP_PMTUD_ENABLE | SPP_PMTUD_DISABLE,
518 SPP_SACKDELAY_ENABLE = 32, /*Enable SACK*/ 526 SPP_SACKDELAY_ENABLE = 1<<5, /*Enable SACK*/
519 SPP_SACKDELAY_DISABLE = 64, /*Disable SACK*/ 527 SPP_SACKDELAY_DISABLE = 1<<6, /*Disable SACK*/
520 SPP_SACKDELAY = SPP_SACKDELAY_ENABLE | SPP_SACKDELAY_DISABLE, 528 SPP_SACKDELAY = SPP_SACKDELAY_ENABLE | SPP_SACKDELAY_DISABLE,
529 SPP_HB_TIME_IS_ZERO = 1<<7, /* Set HB delay to 0 */
521}; 530};
522 531
523struct sctp_paddrparams { 532struct sctp_paddrparams {
@@ -530,7 +539,7 @@ struct sctp_paddrparams {
530 __u32 spp_flags; 539 __u32 spp_flags;
531} __attribute__((packed, aligned(4))); 540} __attribute__((packed, aligned(4)));
532 541
533/* 7.1.24. Delayed Ack Timer (SCTP_DELAYED_ACK_TIME) 542/* 7.1.23. Delayed Ack Timer (SCTP_DELAYED_ACK_TIME)
534 * 543 *
535 * This options will get or set the delayed ack timer. The time is set 544 * This options will get or set the delayed ack timer. The time is set
536 * in milliseconds. If the assoc_id is 0, then this sets or gets the 545 * in milliseconds. If the assoc_id is 0, then this sets or gets the
diff --git a/include/net/sock.h b/include/net/sock.h
index 2c7d60ca3548..25c37e34bfdc 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -202,6 +202,15 @@ struct sock {
202 unsigned short sk_type; 202 unsigned short sk_type;
203 int sk_rcvbuf; 203 int sk_rcvbuf;
204 socket_lock_t sk_lock; 204 socket_lock_t sk_lock;
205 /*
206 * The backlog queue is special, it is always used with
207 * the per-socket spinlock held and requires low latency
208 * access. Therefore we special case it's implementation.
209 */
210 struct {
211 struct sk_buff *head;
212 struct sk_buff *tail;
213 } sk_backlog;
205 wait_queue_head_t *sk_sleep; 214 wait_queue_head_t *sk_sleep;
206 struct dst_entry *sk_dst_cache; 215 struct dst_entry *sk_dst_cache;
207 struct xfrm_policy *sk_policy[2]; 216 struct xfrm_policy *sk_policy[2];
@@ -221,15 +230,6 @@ struct sock {
221 int sk_rcvlowat; 230 int sk_rcvlowat;
222 unsigned long sk_flags; 231 unsigned long sk_flags;
223 unsigned long sk_lingertime; 232 unsigned long sk_lingertime;
224 /*
225 * The backlog queue is special, it is always used with
226 * the per-socket spinlock held and requires low latency
227 * access. Therefore we special case it's implementation.
228 */
229 struct {
230 struct sk_buff *head;
231 struct sk_buff *tail;
232 } sk_backlog;
233 struct sk_buff_head sk_error_queue; 233 struct sk_buff_head sk_error_queue;
234 struct proto *sk_prot_creator; 234 struct proto *sk_prot_creator;
235 rwlock_t sk_callback_lock; 235 rwlock_t sk_callback_lock;
@@ -244,7 +244,7 @@ struct sock {
244 struct sk_filter *sk_filter; 244 struct sk_filter *sk_filter;
245 void *sk_protinfo; 245 void *sk_protinfo;
246 struct timer_list sk_timer; 246 struct timer_list sk_timer;
247 struct timeval sk_stamp; 247 ktime_t sk_stamp;
248 struct socket *sk_socket; 248 struct socket *sk_socket;
249 void *sk_user_data; 249 void *sk_user_data;
250 struct page *sk_sndmsg_page; 250 struct page *sk_sndmsg_page;
@@ -390,6 +390,7 @@ enum sock_flags {
390 SOCK_USE_WRITE_QUEUE, /* whether to call sk->sk_write_space in sock_wfree */ 390 SOCK_USE_WRITE_QUEUE, /* whether to call sk->sk_write_space in sock_wfree */
391 SOCK_DBG, /* %SO_DEBUG setting */ 391 SOCK_DBG, /* %SO_DEBUG setting */
392 SOCK_RCVTSTAMP, /* %SO_TIMESTAMP setting */ 392 SOCK_RCVTSTAMP, /* %SO_TIMESTAMP setting */
393 SOCK_RCVTSTAMPNS, /* %SO_TIMESTAMPNS setting */
393 SOCK_LOCALROUTE, /* route locally only, %SO_DONTROUTE setting */ 394 SOCK_LOCALROUTE, /* route locally only, %SO_DONTROUTE setting */
394 SOCK_QUEUE_SHRUNK, /* write queue has been shrunk recently */ 395 SOCK_QUEUE_SHRUNK, /* write queue has been shrunk recently */
395}; 396};
@@ -710,15 +711,6 @@ static inline void sk_stream_mem_reclaim(struct sock *sk)
710 __sk_stream_mem_reclaim(sk); 711 __sk_stream_mem_reclaim(sk);
711} 712}
712 713
713static inline void sk_stream_writequeue_purge(struct sock *sk)
714{
715 struct sk_buff *skb;
716
717 while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL)
718 sk_stream_free_skb(sk, skb);
719 sk_stream_mem_reclaim(sk);
720}
721
722static inline int sk_stream_rmem_schedule(struct sock *sk, struct sk_buff *skb) 714static inline int sk_stream_rmem_schedule(struct sock *sk, struct sk_buff *skb)
723{ 715{
724 return (int)skb->truesize <= sk->sk_forward_alloc || 716 return (int)skb->truesize <= sk->sk_forward_alloc ||
@@ -1083,19 +1075,7 @@ static inline int sk_can_gso(const struct sock *sk)
1083 return net_gso_ok(sk->sk_route_caps, sk->sk_gso_type); 1075 return net_gso_ok(sk->sk_route_caps, sk->sk_gso_type);
1084} 1076}
1085 1077
1086static inline void sk_setup_caps(struct sock *sk, struct dst_entry *dst) 1078extern void sk_setup_caps(struct sock *sk, struct dst_entry *dst);
1087{
1088 __sk_dst_set(sk, dst);
1089 sk->sk_route_caps = dst->dev->features;
1090 if (sk->sk_route_caps & NETIF_F_GSO)
1091 sk->sk_route_caps |= NETIF_F_GSO_MASK;
1092 if (sk_can_gso(sk)) {
1093 if (dst->header_len)
1094 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
1095 else
1096 sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
1097 }
1098}
1099 1079
1100static inline void sk_charge_skb(struct sock *sk, struct sk_buff *skb) 1080static inline void sk_charge_skb(struct sock *sk, struct sk_buff *skb)
1101{ 1081{
@@ -1256,18 +1236,6 @@ static inline struct page *sk_stream_alloc_page(struct sock *sk)
1256 return page; 1236 return page;
1257} 1237}
1258 1238
1259#define sk_stream_for_retrans_queue(skb, sk) \
1260 for (skb = (sk)->sk_write_queue.next; \
1261 (skb != (sk)->sk_send_head) && \
1262 (skb != (struct sk_buff *)&(sk)->sk_write_queue); \
1263 skb = skb->next)
1264
1265/*from STCP for fast SACK Process*/
1266#define sk_stream_for_retrans_queue_from(skb, sk) \
1267 for (; (skb != (sk)->sk_send_head) && \
1268 (skb != (struct sk_buff *)&(sk)->sk_write_queue); \
1269 skb = skb->next)
1270
1271/* 1239/*
1272 * Default write policy as shown to user space via poll/select/SIGIO 1240 * Default write policy as shown to user space via poll/select/SIGIO
1273 */ 1241 */
@@ -1304,22 +1272,18 @@ static inline int sock_intr_errno(long timeo)
1304 return timeo == MAX_SCHEDULE_TIMEOUT ? -ERESTARTSYS : -EINTR; 1272 return timeo == MAX_SCHEDULE_TIMEOUT ? -ERESTARTSYS : -EINTR;
1305} 1273}
1306 1274
1275extern void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
1276 struct sk_buff *skb);
1277
1307static __inline__ void 1278static __inline__ void
1308sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) 1279sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
1309{ 1280{
1310 struct timeval stamp; 1281 ktime_t kt = skb->tstamp;
1311 1282
1312 skb_get_timestamp(skb, &stamp); 1283 if (sock_flag(sk, SOCK_RCVTSTAMP))
1313 if (sock_flag(sk, SOCK_RCVTSTAMP)) { 1284 __sock_recv_timestamp(msg, sk, skb);
1314 /* Race occurred between timestamp enabling and packet 1285 else
1315 receiving. Fill in the current time for now. */ 1286 sk->sk_stamp = kt;
1316 if (stamp.tv_sec == 0)
1317 do_gettimeofday(&stamp);
1318 skb_set_timestamp(skb, &stamp);
1319 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP, sizeof(struct timeval),
1320 &stamp);
1321 } else
1322 sk->sk_stamp = stamp;
1323} 1287}
1324 1288
1325/** 1289/**
@@ -1350,18 +1314,17 @@ static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb, int copied_e
1350 1314
1351extern void sock_enable_timestamp(struct sock *sk); 1315extern void sock_enable_timestamp(struct sock *sk);
1352extern int sock_get_timestamp(struct sock *, struct timeval __user *); 1316extern int sock_get_timestamp(struct sock *, struct timeval __user *);
1317extern int sock_get_timestampns(struct sock *, struct timespec __user *);
1353 1318
1354/* 1319/*
1355 * Enable debug/info messages 1320 * Enable debug/info messages
1356 */ 1321 */
1322extern int net_msg_warn;
1323#define NETDEBUG(fmt, args...) \
1324 do { if (net_msg_warn) printk(fmt,##args); } while (0)
1357 1325
1358#ifdef CONFIG_NETDEBUG 1326#define LIMIT_NETDEBUG(fmt, args...) \
1359#define NETDEBUG(fmt, args...) printk(fmt,##args) 1327 do { if (net_msg_warn && net_ratelimit()) printk(fmt,##args); } while(0)
1360#define LIMIT_NETDEBUG(fmt, args...) do { if (net_ratelimit()) printk(fmt,##args); } while(0)
1361#else
1362#define NETDEBUG(fmt, args...) do { } while (0)
1363#define LIMIT_NETDEBUG(fmt, args...) do { } while(0)
1364#endif
1365 1328
1366/* 1329/*
1367 * Macros for sleeping on a socket. Use them like this: 1330 * Macros for sleeping on a socket. Use them like this:
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 5c472f255b77..a385797f160a 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -220,6 +220,7 @@ extern int sysctl_tcp_app_win;
220extern int sysctl_tcp_adv_win_scale; 220extern int sysctl_tcp_adv_win_scale;
221extern int sysctl_tcp_tw_reuse; 221extern int sysctl_tcp_tw_reuse;
222extern int sysctl_tcp_frto; 222extern int sysctl_tcp_frto;
223extern int sysctl_tcp_frto_response;
223extern int sysctl_tcp_low_latency; 224extern int sysctl_tcp_low_latency;
224extern int sysctl_tcp_dma_copybreak; 225extern int sysctl_tcp_dma_copybreak;
225extern int sysctl_tcp_nometrics_save; 226extern int sysctl_tcp_nometrics_save;
@@ -230,6 +231,7 @@ extern int sysctl_tcp_mtu_probing;
230extern int sysctl_tcp_base_mss; 231extern int sysctl_tcp_base_mss;
231extern int sysctl_tcp_workaround_signed_windows; 232extern int sysctl_tcp_workaround_signed_windows;
232extern int sysctl_tcp_slow_start_after_idle; 233extern int sysctl_tcp_slow_start_after_idle;
234extern int sysctl_tcp_max_ssthresh;
233 235
234extern atomic_t tcp_memory_allocated; 236extern atomic_t tcp_memory_allocated;
235extern atomic_t tcp_sockets_allocated; 237extern atomic_t tcp_sockets_allocated;
@@ -341,6 +343,7 @@ extern struct sock * tcp_check_req(struct sock *sk,struct sk_buff *skb,
341extern int tcp_child_process(struct sock *parent, 343extern int tcp_child_process(struct sock *parent,
342 struct sock *child, 344 struct sock *child,
343 struct sk_buff *skb); 345 struct sk_buff *skb);
346extern int tcp_use_frto(struct sock *sk);
344extern void tcp_enter_frto(struct sock *sk); 347extern void tcp_enter_frto(struct sock *sk);
345extern void tcp_enter_loss(struct sock *sk, int how); 348extern void tcp_enter_loss(struct sock *sk, int how);
346extern void tcp_clear_retrans(struct tcp_sock *tp); 349extern void tcp_clear_retrans(struct tcp_sock *tp);
@@ -417,9 +420,9 @@ extern __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb,
417 420
418/* tcp_output.c */ 421/* tcp_output.c */
419 422
420extern void __tcp_push_pending_frames(struct sock *sk, struct tcp_sock *tp, 423extern void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
421 unsigned int cur_mss, int nonagle); 424 int nonagle);
422extern int tcp_may_send_now(struct sock *sk, struct tcp_sock *tp); 425extern int tcp_may_send_now(struct sock *sk);
423extern int tcp_retransmit_skb(struct sock *, struct sk_buff *); 426extern int tcp_retransmit_skb(struct sock *, struct sk_buff *);
424extern void tcp_xmit_retransmit_queue(struct sock *); 427extern void tcp_xmit_retransmit_queue(struct sock *);
425extern void tcp_simple_retransmit(struct sock *); 428extern void tcp_simple_retransmit(struct sock *);
@@ -476,8 +479,10 @@ static inline void tcp_fast_path_on(struct tcp_sock *tp)
476 __tcp_fast_path_on(tp, tp->snd_wnd >> tp->rx_opt.snd_wscale); 479 __tcp_fast_path_on(tp, tp->snd_wnd >> tp->rx_opt.snd_wscale);
477} 480}
478 481
479static inline void tcp_fast_path_check(struct sock *sk, struct tcp_sock *tp) 482static inline void tcp_fast_path_check(struct sock *sk)
480{ 483{
484 struct tcp_sock *tp = tcp_sk(sk);
485
481 if (skb_queue_empty(&tp->out_of_order_queue) && 486 if (skb_queue_empty(&tp->out_of_order_queue) &&
482 tp->rcv_wnd && 487 tp->rcv_wnd &&
483 atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf && 488 atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf &&
@@ -588,10 +593,10 @@ static inline void tcp_dec_pcount_approx(__u32 *count,
588 } 593 }
589} 594}
590 595
591static inline void tcp_packets_out_inc(struct sock *sk, 596static inline void tcp_packets_out_inc(struct sock *sk,
592 struct tcp_sock *tp,
593 const struct sk_buff *skb) 597 const struct sk_buff *skb)
594{ 598{
599 struct tcp_sock *tp = tcp_sk(sk);
595 int orig = tp->packets_out; 600 int orig = tp->packets_out;
596 601
597 tp->packets_out += tcp_skb_pcount(skb); 602 tp->packets_out += tcp_skb_pcount(skb);
@@ -624,9 +629,12 @@ enum tcp_ca_event {
624#define TCP_CA_MAX 128 629#define TCP_CA_MAX 128
625#define TCP_CA_BUF_MAX (TCP_CA_NAME_MAX*TCP_CA_MAX) 630#define TCP_CA_BUF_MAX (TCP_CA_NAME_MAX*TCP_CA_MAX)
626 631
632#define TCP_CONG_NON_RESTRICTED 0x1
633#define TCP_CONG_RTT_STAMP 0x2
634
627struct tcp_congestion_ops { 635struct tcp_congestion_ops {
628 struct list_head list; 636 struct list_head list;
629 int non_restricted; 637 unsigned long flags;
630 638
631 /* initialize private data (optional) */ 639 /* initialize private data (optional) */
632 void (*init)(struct sock *sk); 640 void (*init)(struct sock *sk);
@@ -640,8 +648,6 @@ struct tcp_congestion_ops {
640 /* do new cwnd calculation (required) */ 648 /* do new cwnd calculation (required) */
641 void (*cong_avoid)(struct sock *sk, u32 ack, 649 void (*cong_avoid)(struct sock *sk, u32 ack,
642 u32 rtt, u32 in_flight, int good_ack); 650 u32 rtt, u32 in_flight, int good_ack);
643 /* round trip time sample per acked packet (optional) */
644 void (*rtt_sample)(struct sock *sk, u32 usrtt);
645 /* call before changing ca_state (optional) */ 651 /* call before changing ca_state (optional) */
646 void (*set_state)(struct sock *sk, u8 new_state); 652 void (*set_state)(struct sock *sk, u8 new_state);
647 /* call when cwnd event occurs (optional) */ 653 /* call when cwnd event occurs (optional) */
@@ -649,7 +655,7 @@ struct tcp_congestion_ops {
649 /* new value of cwnd after loss (optional) */ 655 /* new value of cwnd after loss (optional) */
650 u32 (*undo_cwnd)(struct sock *sk); 656 u32 (*undo_cwnd)(struct sock *sk);
651 /* hook for packet ack accounting (optional) */ 657 /* hook for packet ack accounting (optional) */
652 void (*pkts_acked)(struct sock *sk, u32 num_acked); 658 void (*pkts_acked)(struct sock *sk, u32 num_acked, ktime_t last);
653 /* get info for inet_diag (optional) */ 659 /* get info for inet_diag (optional) */
654 void (*get_info)(struct sock *sk, u32 ext, struct sk_buff *skb); 660 void (*get_info)(struct sock *sk, u32 ext, struct sk_buff *skb);
655 661
@@ -736,7 +742,7 @@ static inline void tcp_sync_left_out(struct tcp_sock *tp)
736 tp->left_out = tp->sacked_out + tp->lost_out; 742 tp->left_out = tp->sacked_out + tp->lost_out;
737} 743}
738 744
739extern void tcp_enter_cwr(struct sock *sk); 745extern void tcp_enter_cwr(struct sock *sk, const int set_ssthresh);
740extern __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst); 746extern __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst);
741 747
742/* Slow start with delack produces 3 packets of burst, so that 748/* Slow start with delack produces 3 packets of burst, so that
@@ -775,18 +781,21 @@ static inline void tcp_minshall_update(struct tcp_sock *tp, int mss,
775 tp->snd_sml = TCP_SKB_CB(skb)->end_seq; 781 tp->snd_sml = TCP_SKB_CB(skb)->end_seq;
776} 782}
777 783
778static inline void tcp_check_probe_timer(struct sock *sk, struct tcp_sock *tp) 784static inline void tcp_check_probe_timer(struct sock *sk)
779{ 785{
786 struct tcp_sock *tp = tcp_sk(sk);
780 const struct inet_connection_sock *icsk = inet_csk(sk); 787 const struct inet_connection_sock *icsk = inet_csk(sk);
788
781 if (!tp->packets_out && !icsk->icsk_pending) 789 if (!tp->packets_out && !icsk->icsk_pending)
782 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, 790 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
783 icsk->icsk_rto, TCP_RTO_MAX); 791 icsk->icsk_rto, TCP_RTO_MAX);
784} 792}
785 793
786static inline void tcp_push_pending_frames(struct sock *sk, 794static inline void tcp_push_pending_frames(struct sock *sk)
787 struct tcp_sock *tp)
788{ 795{
789 __tcp_push_pending_frames(sk, tp, tcp_current_mss(sk, 1), tp->nonagle); 796 struct tcp_sock *tp = tcp_sk(sk);
797
798 __tcp_push_pending_frames(sk, tcp_current_mss(sk, 1), tp->nonagle);
790} 799}
791 800
792static inline void tcp_init_wl(struct tcp_sock *tp, u32 ack, u32 seq) 801static inline void tcp_init_wl(struct tcp_sock *tp, u32 ack, u32 seq)
@@ -815,7 +824,7 @@ static inline __sum16 __tcp_checksum_complete(struct sk_buff *skb)
815 824
816static inline int tcp_checksum_complete(struct sk_buff *skb) 825static inline int tcp_checksum_complete(struct sk_buff *skb)
817{ 826{
818 return skb->ip_summed != CHECKSUM_UNNECESSARY && 827 return !skb_csum_unnecessary(skb) &&
819 __tcp_checksum_complete(skb); 828 __tcp_checksum_complete(skb);
820} 829}
821 830
@@ -918,21 +927,7 @@ static inline void tcp_set_state(struct sock *sk, int state)
918#endif 927#endif
919} 928}
920 929
921static inline void tcp_done(struct sock *sk) 930extern void tcp_done(struct sock *sk);
922{
923 if(sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV)
924 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
925
926 tcp_set_state(sk, TCP_CLOSE);
927 tcp_clear_xmit_timers(sk);
928
929 sk->sk_shutdown = SHUTDOWN_MASK;
930
931 if (!sock_flag(sk, SOCK_DEAD))
932 sk->sk_state_change(sk);
933 else
934 inet_csk_destroy_sock(sk);
935}
936 931
937static inline void tcp_sack_reset(struct tcp_options_received *rx_opt) 932static inline void tcp_sack_reset(struct tcp_options_received *rx_opt)
938{ 933{
@@ -981,7 +976,7 @@ static inline void tcp_openreq_init(struct request_sock *req,
981 ireq->wscale_ok = rx_opt->wscale_ok; 976 ireq->wscale_ok = rx_opt->wscale_ok;
982 ireq->acked = 0; 977 ireq->acked = 0;
983 ireq->ecn_ok = 0; 978 ireq->ecn_ok = 0;
984 ireq->rmt_port = skb->h.th->source; 979 ireq->rmt_port = tcp_hdr(skb)->source;
985} 980}
986 981
987extern void tcp_enter_memory_pressure(void); 982extern void tcp_enter_memory_pressure(void);
@@ -1011,7 +1006,7 @@ static inline int tcp_paws_check(const struct tcp_options_received *rx_opt, int
1011{ 1006{
1012 if ((s32)(rx_opt->rcv_tsval - rx_opt->ts_recent) >= 0) 1007 if ((s32)(rx_opt->rcv_tsval - rx_opt->ts_recent) >= 0)
1013 return 0; 1008 return 0;
1014 if (xtime.tv_sec >= rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS) 1009 if (get_seconds() >= rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS)
1015 return 0; 1010 return 0;
1016 1011
1017 /* RST segments are not recommended to carry timestamp, 1012 /* RST segments are not recommended to carry timestamp,
@@ -1026,26 +1021,13 @@ static inline int tcp_paws_check(const struct tcp_options_received *rx_opt, int
1026 1021
1027 However, we can relax time bounds for RST segments to MSL. 1022 However, we can relax time bounds for RST segments to MSL.
1028 */ 1023 */
1029 if (rst && xtime.tv_sec >= rx_opt->ts_recent_stamp + TCP_PAWS_MSL) 1024 if (rst && get_seconds() >= rx_opt->ts_recent_stamp + TCP_PAWS_MSL)
1030 return 0; 1025 return 0;
1031 return 1; 1026 return 1;
1032} 1027}
1033 1028
1034#define TCP_CHECK_TIMER(sk) do { } while (0) 1029#define TCP_CHECK_TIMER(sk) do { } while (0)
1035 1030
1036static inline int tcp_use_frto(const struct sock *sk)
1037{
1038 const struct tcp_sock *tp = tcp_sk(sk);
1039
1040 /* F-RTO must be activated in sysctl and there must be some
1041 * unsent new data, and the advertised window should allow
1042 * sending it.
1043 */
1044 return (sysctl_tcp_frto && sk->sk_send_head &&
1045 !after(TCP_SKB_CB(sk->sk_send_head)->end_seq,
1046 tp->snd_una + tp->snd_wnd));
1047}
1048
1049static inline void tcp_mib_init(void) 1031static inline void tcp_mib_init(void)
1050{ 1032{
1051 /* See RFC 2012 */ 1033 /* See RFC 2012 */
@@ -1172,6 +1154,120 @@ static inline void tcp_put_md5sig_pool(void)
1172 put_cpu(); 1154 put_cpu();
1173} 1155}
1174 1156
1157/* write queue abstraction */
1158static inline void tcp_write_queue_purge(struct sock *sk)
1159{
1160 struct sk_buff *skb;
1161
1162 while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL)
1163 sk_stream_free_skb(sk, skb);
1164 sk_stream_mem_reclaim(sk);
1165}
1166
1167static inline struct sk_buff *tcp_write_queue_head(struct sock *sk)
1168{
1169 struct sk_buff *skb = sk->sk_write_queue.next;
1170 if (skb == (struct sk_buff *) &sk->sk_write_queue)
1171 return NULL;
1172 return skb;
1173}
1174
1175static inline struct sk_buff *tcp_write_queue_tail(struct sock *sk)
1176{
1177 struct sk_buff *skb = sk->sk_write_queue.prev;
1178 if (skb == (struct sk_buff *) &sk->sk_write_queue)
1179 return NULL;
1180 return skb;
1181}
1182
1183static inline struct sk_buff *tcp_write_queue_next(struct sock *sk, struct sk_buff *skb)
1184{
1185 return skb->next;
1186}
1187
1188#define tcp_for_write_queue(skb, sk) \
1189 for (skb = (sk)->sk_write_queue.next; \
1190 (skb != (struct sk_buff *)&(sk)->sk_write_queue); \
1191 skb = skb->next)
1192
1193#define tcp_for_write_queue_from(skb, sk) \
1194 for (; (skb != (struct sk_buff *)&(sk)->sk_write_queue);\
1195 skb = skb->next)
1196
1197static inline struct sk_buff *tcp_send_head(struct sock *sk)
1198{
1199 return sk->sk_send_head;
1200}
1201
1202static inline void tcp_advance_send_head(struct sock *sk, struct sk_buff *skb)
1203{
1204 sk->sk_send_head = skb->next;
1205 if (sk->sk_send_head == (struct sk_buff *)&sk->sk_write_queue)
1206 sk->sk_send_head = NULL;
1207}
1208
1209static inline void tcp_check_send_head(struct sock *sk, struct sk_buff *skb_unlinked)
1210{
1211 if (sk->sk_send_head == skb_unlinked)
1212 sk->sk_send_head = NULL;
1213}
1214
1215static inline void tcp_init_send_head(struct sock *sk)
1216{
1217 sk->sk_send_head = NULL;
1218}
1219
1220static inline void __tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb)
1221{
1222 __skb_queue_tail(&sk->sk_write_queue, skb);
1223}
1224
1225static inline void tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb)
1226{
1227 __tcp_add_write_queue_tail(sk, skb);
1228
1229 /* Queue it, remembering where we must start sending. */
1230 if (sk->sk_send_head == NULL)
1231 sk->sk_send_head = skb;
1232}
1233
1234static inline void __tcp_add_write_queue_head(struct sock *sk, struct sk_buff *skb)
1235{
1236 __skb_queue_head(&sk->sk_write_queue, skb);
1237}
1238
1239/* Insert buff after skb on the write queue of sk. */
1240static inline void tcp_insert_write_queue_after(struct sk_buff *skb,
1241 struct sk_buff *buff,
1242 struct sock *sk)
1243{
1244 __skb_append(skb, buff, &sk->sk_write_queue);
1245}
1246
1247/* Insert skb between prev and next on the write queue of sk. */
1248static inline void tcp_insert_write_queue_before(struct sk_buff *new,
1249 struct sk_buff *skb,
1250 struct sock *sk)
1251{
1252 __skb_insert(new, skb->prev, skb, &sk->sk_write_queue);
1253}
1254
1255static inline void tcp_unlink_write_queue(struct sk_buff *skb, struct sock *sk)
1256{
1257 __skb_unlink(skb, &sk->sk_write_queue);
1258}
1259
1260static inline int tcp_skb_is_last(const struct sock *sk,
1261 const struct sk_buff *skb)
1262{
1263 return skb->next == (struct sk_buff *)&sk->sk_write_queue;
1264}
1265
1266static inline int tcp_write_queue_empty(struct sock *sk)
1267{
1268 return skb_queue_empty(&sk->sk_write_queue);
1269}
1270
1175/* /proc */ 1271/* /proc */
1176enum tcp_seq_states { 1272enum tcp_seq_states {
1177 TCP_SEQ_STATE_LISTENING, 1273 TCP_SEQ_STATE_LISTENING,
diff --git a/include/net/tcp_ecn.h b/include/net/tcp_ecn.h
index 4629d77173f2..89eb3e05116d 100644
--- a/include/net/tcp_ecn.h
+++ b/include/net/tcp_ecn.h
@@ -27,9 +27,10 @@ static inline void TCP_ECN_send_synack(struct tcp_sock *tp,
27 TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_ECE; 27 TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_ECE;
28} 28}
29 29
30static inline void TCP_ECN_send_syn(struct sock *sk, struct tcp_sock *tp, 30static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb)
31 struct sk_buff *skb)
32{ 31{
32 struct tcp_sock *tp = tcp_sk(sk);
33
33 tp->ecn_flags = 0; 34 tp->ecn_flags = 0;
34 if (sysctl_tcp_ecn) { 35 if (sysctl_tcp_ecn) {
35 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ECE|TCPCB_FLAG_CWR; 36 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ECE|TCPCB_FLAG_CWR;
@@ -44,9 +45,11 @@ TCP_ECN_make_synack(struct request_sock *req, struct tcphdr *th)
44 th->ece = 1; 45 th->ece = 1;
45} 46}
46 47
47static inline void TCP_ECN_send(struct sock *sk, struct tcp_sock *tp, 48static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb,
48 struct sk_buff *skb, int tcp_header_len) 49 int tcp_header_len)
49{ 50{
51 struct tcp_sock *tp = tcp_sk(sk);
52
50 if (tp->ecn_flags & TCP_ECN_OK) { 53 if (tp->ecn_flags & TCP_ECN_OK) {
51 /* Not-retransmitted data segment: set ECT and inject CWR. */ 54 /* Not-retransmitted data segment: set ECT and inject CWR. */
52 if (skb->len != tcp_header_len && 55 if (skb->len != tcp_header_len &&
@@ -54,7 +57,7 @@ static inline void TCP_ECN_send(struct sock *sk, struct tcp_sock *tp,
54 INET_ECN_xmit(sk); 57 INET_ECN_xmit(sk);
55 if (tp->ecn_flags&TCP_ECN_QUEUE_CWR) { 58 if (tp->ecn_flags&TCP_ECN_QUEUE_CWR) {
56 tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR; 59 tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR;
57 skb->h.th->cwr = 1; 60 tcp_hdr(skb)->cwr = 1;
58 skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; 61 skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
59 } 62 }
60 } else { 63 } else {
@@ -62,7 +65,7 @@ static inline void TCP_ECN_send(struct sock *sk, struct tcp_sock *tp,
62 INET_ECN_dontxmit(sk); 65 INET_ECN_dontxmit(sk);
63 } 66 }
64 if (tp->ecn_flags & TCP_ECN_DEMAND_CWR) 67 if (tp->ecn_flags & TCP_ECN_DEMAND_CWR)
65 skb->h.th->ece = 1; 68 tcp_hdr(skb)->ece = 1;
66 } 69 }
67} 70}
68 71
@@ -70,7 +73,7 @@ static inline void TCP_ECN_send(struct sock *sk, struct tcp_sock *tp,
70 73
71static inline void TCP_ECN_accept_cwr(struct tcp_sock *tp, struct sk_buff *skb) 74static inline void TCP_ECN_accept_cwr(struct tcp_sock *tp, struct sk_buff *skb)
72{ 75{
73 if (skb->h.th->cwr) 76 if (tcp_hdr(skb)->cwr)
74 tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR; 77 tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
75} 78}
76 79
diff --git a/include/net/udp.h b/include/net/udp.h
index 1b921fa81474..98755ebaf163 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -72,15 +72,12 @@ struct sk_buff;
72 */ 72 */
73static inline __sum16 __udp_lib_checksum_complete(struct sk_buff *skb) 73static inline __sum16 __udp_lib_checksum_complete(struct sk_buff *skb)
74{ 74{
75 if (! UDP_SKB_CB(skb)->partial_cov) 75 return __skb_checksum_complete_head(skb, UDP_SKB_CB(skb)->cscov);
76 return __skb_checksum_complete(skb);
77 return csum_fold(skb_checksum(skb, 0, UDP_SKB_CB(skb)->cscov,
78 skb->csum));
79} 76}
80 77
81static inline int udp_lib_checksum_complete(struct sk_buff *skb) 78static inline int udp_lib_checksum_complete(struct sk_buff *skb)
82{ 79{
83 return skb->ip_summed != CHECKSUM_UNNECESSARY && 80 return !skb_csum_unnecessary(skb) &&
84 __udp_lib_checksum_complete(skb); 81 __udp_lib_checksum_complete(skb);
85} 82}
86 83
@@ -92,8 +89,8 @@ static inline int udp_lib_checksum_complete(struct sk_buff *skb)
92 */ 89 */
93static inline __wsum udp_csum_outgoing(struct sock *sk, struct sk_buff *skb) 90static inline __wsum udp_csum_outgoing(struct sock *sk, struct sk_buff *skb)
94{ 91{
95 __wsum csum = csum_partial(skb->h.raw, sizeof(struct udphdr), 0); 92 __wsum csum = csum_partial(skb_transport_header(skb),
96 93 sizeof(struct udphdr), 0);
97 skb_queue_walk(&sk->sk_write_queue, skb) { 94 skb_queue_walk(&sk->sk_write_queue, skb) {
98 csum = csum_add(csum, skb->csum); 95 csum = csum_add(csum, skb->csum);
99 } 96 }
diff --git a/include/net/udplite.h b/include/net/udplite.h
index 67ac51424307..635b0eafca95 100644
--- a/include/net/udplite.h
+++ b/include/net/udplite.h
@@ -47,11 +47,10 @@ static inline int udplite_checksum_init(struct sk_buff *skb, struct udphdr *uh)
47 return 1; 47 return 1;
48 } 48 }
49 49
50 UDP_SKB_CB(skb)->partial_cov = 0;
51 cscov = ntohs(uh->len); 50 cscov = ntohs(uh->len);
52 51
53 if (cscov == 0) /* Indicates that full coverage is required. */ 52 if (cscov == 0) /* Indicates that full coverage is required. */
54 cscov = skb->len; 53 ;
55 else if (cscov < 8 || cscov > skb->len) { 54 else if (cscov < 8 || cscov > skb->len) {
56 /* 55 /*
57 * Coverage length violates RFC 3828: log and discard silently. 56 * Coverage length violates RFC 3828: log and discard silently.
@@ -60,42 +59,16 @@ static inline int udplite_checksum_init(struct sk_buff *skb, struct udphdr *uh)
60 cscov, skb->len); 59 cscov, skb->len);
61 return 1; 60 return 1;
62 61
63 } else if (cscov < skb->len) 62 } else if (cscov < skb->len) {
64 UDP_SKB_CB(skb)->partial_cov = 1; 63 UDP_SKB_CB(skb)->partial_cov = 1;
65 64 UDP_SKB_CB(skb)->cscov = cscov;
66 UDP_SKB_CB(skb)->cscov = cscov; 65 if (skb->ip_summed == CHECKSUM_COMPLETE)
67 66 skb->ip_summed = CHECKSUM_NONE;
68 /* 67 }
69 * There is no known NIC manufacturer supporting UDP-Lite yet,
70 * hence ip_summed is always (re-)set to CHECKSUM_NONE.
71 */
72 skb->ip_summed = CHECKSUM_NONE;
73 68
74 return 0; 69 return 0;
75} 70}
76 71
77static __inline__ int udplite4_csum_init(struct sk_buff *skb, struct udphdr *uh)
78{
79 int rc = udplite_checksum_init(skb, uh);
80
81 if (!rc)
82 skb->csum = csum_tcpudp_nofold(skb->nh.iph->saddr,
83 skb->nh.iph->daddr,
84 skb->len, IPPROTO_UDPLITE, 0);
85 return rc;
86}
87
88static __inline__ int udplite6_csum_init(struct sk_buff *skb, struct udphdr *uh)
89{
90 int rc = udplite_checksum_init(skb, uh);
91
92 if (!rc)
93 skb->csum = ~csum_unfold(csum_ipv6_magic(&skb->nh.ipv6h->saddr,
94 &skb->nh.ipv6h->daddr,
95 skb->len, IPPROTO_UDPLITE, 0));
96 return rc;
97}
98
99static inline int udplite_sender_cscov(struct udp_sock *up, struct udphdr *uh) 72static inline int udplite_sender_cscov(struct udp_sock *up, struct udphdr *uh)
100{ 73{
101 int cscov = up->len; 74 int cscov = up->len;
@@ -128,14 +101,14 @@ static inline int udplite_sender_cscov(struct udp_sock *up, struct udphdr *uh)
128 101
129static inline __wsum udplite_csum_outgoing(struct sock *sk, struct sk_buff *skb) 102static inline __wsum udplite_csum_outgoing(struct sock *sk, struct sk_buff *skb)
130{ 103{
131 int off, len, cscov = udplite_sender_cscov(udp_sk(sk), skb->h.uh); 104 int cscov = udplite_sender_cscov(udp_sk(sk), udp_hdr(skb));
132 __wsum csum = 0; 105 __wsum csum = 0;
133 106
134 skb->ip_summed = CHECKSUM_NONE; /* no HW support for checksumming */ 107 skb->ip_summed = CHECKSUM_NONE; /* no HW support for checksumming */
135 108
136 skb_queue_walk(&sk->sk_write_queue, skb) { 109 skb_queue_walk(&sk->sk_write_queue, skb) {
137 off = skb->h.raw - skb->data; 110 const int off = skb_transport_offset(skb);
138 len = skb->len - off; 111 const int len = skb->len - off;
139 112
140 csum = skb_checksum(skb, off, (cscov > len)? len : cscov, csum); 113 csum = skb_checksum(skb, off, (cscov > len)? len : cscov, csum);
141 114
diff --git a/include/net/wext.h b/include/net/wext.h
new file mode 100644
index 000000000000..55741836a675
--- /dev/null
+++ b/include/net/wext.h
@@ -0,0 +1,24 @@
1#ifndef __NET_WEXT_H
2#define __NET_WEXT_H
3
4/*
5 * wireless extensions interface to the core code
6 */
7
8#ifdef CONFIG_WIRELESS_EXT
9extern int wext_proc_init(void);
10extern int wext_handle_ioctl(struct ifreq *ifr, unsigned int cmd,
11 void __user *arg);
12#else
13static inline int wext_proc_init()
14{
15 return 0;
16}
17static inline int wext_handle_ioctl(struct ifreq *ifr, unsigned int cmd,
18 void __user *arg)
19{
20 return -EINVAL;
21}
22#endif
23
24#endif /* __NET_WEXT_H */
diff --git a/include/net/wireless.h b/include/net/wireless.h
new file mode 100644
index 000000000000..d30c4ba8fd99
--- /dev/null
+++ b/include/net/wireless.h
@@ -0,0 +1,139 @@
1#ifndef __NET_WIRELESS_H
2#define __NET_WIRELESS_H
3
4/*
5 * 802.11 device management
6 *
7 * Copyright 2007 Johannes Berg <johannes@sipsolutions.net>
8 */
9
10#include <linux/netdevice.h>
11#include <linux/debugfs.h>
12#include <linux/list.h>
13#include <net/cfg80211.h>
14
15/**
16 * struct wiphy - wireless hardware description
17 * @idx: the wiphy index assigned to this item
18 * @class_dev: the class device representing /sys/class/ieee80211/<wiphy-name>
19 */
20struct wiphy {
21 /* assign these fields before you register the wiphy */
22
23 /* permanent MAC address */
24 u8 perm_addr[ETH_ALEN];
25
26 /* If multiple wiphys are registered and you're handed e.g.
27 * a regular netdev with assigned ieee80211_ptr, you won't
28 * know whether it points to a wiphy your driver has registered
29 * or not. Assign this to something global to your driver to
30 * help determine whether you own this wiphy or not. */
31 void *privid;
32
33 /* fields below are read-only, assigned by cfg80211 */
34
35 /* the item in /sys/class/ieee80211/ points to this,
36 * you need use set_wiphy_dev() (see below) */
37 struct device dev;
38
39 /* dir in debugfs: ieee80211/<wiphyname> */
40 struct dentry *debugfsdir;
41
42 char priv[0] __attribute__((__aligned__(NETDEV_ALIGN)));
43};
44
45/** struct wireless_dev - wireless per-netdev state
46 *
47 * This structure must be allocated by the driver/stack
48 * that uses the ieee80211_ptr field in struct net_device
49 * (this is intentional so it can be allocated along with
50 * the netdev.)
51 *
52 * @wiphy: pointer to hardware description
53 */
54struct wireless_dev {
55 struct wiphy *wiphy;
56
57 /* private to the generic wireless code */
58 struct list_head list;
59 struct net_device *netdev;
60};
61
62/**
63 * wiphy_priv - return priv from wiphy
64 */
65static inline void *wiphy_priv(struct wiphy *wiphy)
66{
67 BUG_ON(!wiphy);
68 return &wiphy->priv;
69}
70
71/**
72 * set_wiphy_dev - set device pointer for wiphy
73 */
74static inline void set_wiphy_dev(struct wiphy *wiphy, struct device *dev)
75{
76 wiphy->dev.parent = dev;
77}
78
79/**
80 * wiphy_dev - get wiphy dev pointer
81 */
82static inline struct device *wiphy_dev(struct wiphy *wiphy)
83{
84 return wiphy->dev.parent;
85}
86
87/**
88 * wiphy_name - get wiphy name
89 */
90static inline char *wiphy_name(struct wiphy *wiphy)
91{
92 return wiphy->dev.bus_id;
93}
94
95/**
96 * wdev_priv - return wiphy priv from wireless_dev
97 */
98static inline void *wdev_priv(struct wireless_dev *wdev)
99{
100 BUG_ON(!wdev);
101 return wiphy_priv(wdev->wiphy);
102}
103
104/**
105 * wiphy_new - create a new wiphy for use with cfg80211
106 *
107 * create a new wiphy and associate the given operations with it.
108 * @sizeof_priv bytes are allocated for private use.
109 *
110 * the returned pointer must be assigned to each netdev's
111 * ieee80211_ptr for proper operation.
112 */
113struct wiphy *wiphy_new(struct cfg80211_ops *ops, int sizeof_priv);
114
115/**
116 * wiphy_register - register a wiphy with cfg80211
117 *
118 * register the given wiphy
119 *
120 * Returns a non-negative wiphy index or a negative error code.
121 */
122extern int wiphy_register(struct wiphy *wiphy);
123
124/**
125 * wiphy_unregister - deregister a wiphy from cfg80211
126 *
127 * unregister a device with the given priv pointer.
128 * After this call, no more requests can be made with this priv
129 * pointer, but the call may sleep to wait for an outstanding
130 * request that is being handled.
131 */
132extern void wiphy_unregister(struct wiphy *wiphy);
133
134/**
135 * wiphy_free - free wiphy
136 */
137extern void wiphy_free(struct wiphy *wiphy);
138
139#endif /* __NET_WIRELESS_H */
diff --git a/include/net/x25device.h b/include/net/x25device.h
index 1d10c879f7e2..1415bcf93980 100644
--- a/include/net/x25device.h
+++ b/include/net/x25device.h
@@ -7,8 +7,8 @@
7 7
8static inline __be16 x25_type_trans(struct sk_buff *skb, struct net_device *dev) 8static inline __be16 x25_type_trans(struct sk_buff *skb, struct net_device *dev)
9{ 9{
10 skb->mac.raw = skb->data;
11 skb->dev = dev; 10 skb->dev = dev;
11 skb_reset_mac_header(skb);
12 skb->pkt_type = PACKET_HOST; 12 skb->pkt_type = PACKET_HOST;
13 13
14 return htons(ETH_P_X25); 14 return htons(ETH_P_X25);
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 5a00aa85b756..8287081d77f2 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -279,7 +279,7 @@ struct xfrm_type
279 xfrm_address_t *(*local_addr)(struct xfrm_state *, xfrm_address_t *); 279 xfrm_address_t *(*local_addr)(struct xfrm_state *, xfrm_address_t *);
280 xfrm_address_t *(*remote_addr)(struct xfrm_state *, xfrm_address_t *); 280 xfrm_address_t *(*remote_addr)(struct xfrm_state *, xfrm_address_t *);
281 /* Estimate maximal size of result of transformation of a dgram */ 281 /* Estimate maximal size of result of transformation of a dgram */
282 u32 (*get_max_size)(struct xfrm_state *, int size); 282 u32 (*get_mtu)(struct xfrm_state *, int size);
283}; 283};
284 284
285extern int xfrm_register_type(struct xfrm_type *type, unsigned short family); 285extern int xfrm_register_type(struct xfrm_type *type, unsigned short family);
@@ -416,6 +416,13 @@ struct xfrm_audit
416 u32 secid; 416 u32 secid;
417}; 417};
418 418
419/* SAD metadata, add more later */
420struct xfrm_sadinfo
421{
422 u32 sadhcnt; /* current hash bkts */
423 u32 sadhmcnt; /* max allowed hash bkts */
424 u32 sadcnt; /* current running count */
425};
419#ifdef CONFIG_AUDITSYSCALL 426#ifdef CONFIG_AUDITSYSCALL
420extern void xfrm_audit_log(uid_t auid, u32 secid, int type, int result, 427extern void xfrm_audit_log(uid_t auid, u32 secid, int type, int result,
421 struct xfrm_policy *xp, struct xfrm_state *x); 428 struct xfrm_policy *xp, struct xfrm_state *x);
@@ -938,6 +945,7 @@ static inline int xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **s
938extern struct xfrm_state *xfrm_find_acq_byseq(u32 seq); 945extern struct xfrm_state *xfrm_find_acq_byseq(u32 seq);
939extern int xfrm_state_delete(struct xfrm_state *x); 946extern int xfrm_state_delete(struct xfrm_state *x);
940extern void xfrm_state_flush(u8 proto, struct xfrm_audit *audit_info); 947extern void xfrm_state_flush(u8 proto, struct xfrm_audit *audit_info);
948extern void xfrm_sad_getinfo(struct xfrm_sadinfo *si);
941extern int xfrm_replay_check(struct xfrm_state *x, __be32 seq); 949extern int xfrm_replay_check(struct xfrm_state *x, __be32 seq);
942extern void xfrm_replay_advance(struct xfrm_state *x, __be32 seq); 950extern void xfrm_replay_advance(struct xfrm_state *x, __be32 seq);
943extern void xfrm_replay_notify(struct xfrm_state *x, int event); 951extern void xfrm_replay_notify(struct xfrm_state *x, int event);
diff --git a/include/rxrpc/call.h b/include/rxrpc/call.h
deleted file mode 100644
index b86f83743510..000000000000
--- a/include/rxrpc/call.h
+++ /dev/null
@@ -1,212 +0,0 @@
1/* call.h: Rx call record
2 *
3 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#ifndef _LINUX_RXRPC_CALL_H
13#define _LINUX_RXRPC_CALL_H
14
15#include <rxrpc/types.h>
16#include <rxrpc/rxrpc.h>
17#include <rxrpc/packet.h>
18#include <linux/timer.h>
19
20#define RXRPC_CALL_ACK_WINDOW_SIZE 16
21
22extern unsigned rxrpc_call_rcv_timeout; /* receive activity timeout (secs) */
23
24/* application call state
25 * - only state 0 and ffff are reserved, the state is set to 1 after an opid is received
26 */
27enum rxrpc_app_cstate {
28 RXRPC_CSTATE_COMPLETE = 0, /* operation complete */
29 RXRPC_CSTATE_ERROR, /* operation ICMP error or aborted */
30 RXRPC_CSTATE_SRVR_RCV_OPID, /* [SERVER] receiving operation ID */
31 RXRPC_CSTATE_SRVR_RCV_ARGS, /* [SERVER] receiving operation data */
32 RXRPC_CSTATE_SRVR_GOT_ARGS, /* [SERVER] completely received operation data */
33 RXRPC_CSTATE_SRVR_SND_REPLY, /* [SERVER] sending operation reply */
34 RXRPC_CSTATE_SRVR_RCV_FINAL_ACK, /* [SERVER] receiving final ACK */
35 RXRPC_CSTATE_CLNT_SND_ARGS, /* [CLIENT] sending operation args */
36 RXRPC_CSTATE_CLNT_RCV_REPLY, /* [CLIENT] receiving operation reply */
37 RXRPC_CSTATE_CLNT_GOT_REPLY, /* [CLIENT] completely received operation reply */
38} __attribute__((packed));
39
40extern const char *rxrpc_call_states[];
41
42enum rxrpc_app_estate {
43 RXRPC_ESTATE_NO_ERROR = 0, /* no error */
44 RXRPC_ESTATE_LOCAL_ABORT, /* aborted locally by application layer */
45 RXRPC_ESTATE_PEER_ABORT, /* aborted remotely by peer */
46 RXRPC_ESTATE_LOCAL_ERROR, /* local ICMP network error */
47 RXRPC_ESTATE_REMOTE_ERROR, /* remote ICMP network error */
48} __attribute__((packed));
49
50extern const char *rxrpc_call_error_states[];
51
52/*****************************************************************************/
53/*
54 * Rx call record and application scratch buffer
55 * - the call record occupies the bottom of a complete page
56 * - the application scratch buffer occupies the rest
57 */
58struct rxrpc_call
59{
60 atomic_t usage;
61 struct rxrpc_connection *conn; /* connection upon which active */
62 spinlock_t lock; /* access lock */
63 struct module *owner; /* owner module */
64 wait_queue_head_t waitq; /* wait queue for events to happen */
65 struct list_head link; /* general internal list link */
66 struct list_head call_link; /* master call list link */
67 __be32 chan_ix; /* connection channel index */
68 __be32 call_id; /* call ID on connection */
69 unsigned long cjif; /* jiffies at call creation */
70 unsigned long flags; /* control flags */
71#define RXRPC_CALL_ACKS_TIMO 0x00000001 /* ACKS timeout reached */
72#define RXRPC_CALL_ACKR_TIMO 0x00000002 /* ACKR timeout reached */
73#define RXRPC_CALL_RCV_TIMO 0x00000004 /* RCV timeout reached */
74#define RXRPC_CALL_RCV_PKT 0x00000008 /* received packet */
75
76 /* transmission */
77 rxrpc_seq_t snd_seq_count; /* outgoing packet sequence number counter */
78 struct rxrpc_message *snd_nextmsg; /* next message being constructed for sending */
79 struct rxrpc_message *snd_ping; /* last ping message sent */
80 unsigned short snd_resend_cnt; /* count of resends since last ACK */
81
82 /* transmission ACK tracking */
83 struct list_head acks_pendq; /* messages pending ACK (ordered by seq) */
84 unsigned acks_pend_cnt; /* number of un-ACK'd packets */
85 rxrpc_seq_t acks_dftv_seq; /* highest definitively ACK'd msg seq */
86 struct timer_list acks_timeout; /* timeout on expected ACK */
87
88 /* reception */
89 struct list_head rcv_receiveq; /* messages pending reception (ordered by seq) */
90 struct list_head rcv_krxiodq_lk; /* krxiod queue for new inbound packets */
91 struct timer_list rcv_timeout; /* call receive activity timeout */
92
93 /* reception ACK'ing */
94 rxrpc_seq_t ackr_win_bot; /* bottom of ACK window */
95 rxrpc_seq_t ackr_win_top; /* top of ACK window */
96 rxrpc_seq_t ackr_high_seq; /* highest seqno yet received */
97 rxrpc_seq_net_t ackr_prev_seq; /* previous seqno received */
98 unsigned ackr_pend_cnt; /* number of pending ACKs */
99 struct timer_list ackr_dfr_timo; /* timeout on deferred ACK */
100 char ackr_dfr_perm; /* request for deferred ACKs permitted */
101 rxrpc_seq_t ackr_dfr_seq; /* seqno for deferred ACK */
102 struct rxrpc_ackpacket ackr; /* pending normal ACK packet */
103 uint8_t ackr_array[RXRPC_CALL_ACK_WINDOW_SIZE]; /* ACK records */
104
105 /* presentation layer */
106 char app_last_rcv; /* T if received last packet from remote end */
107 enum rxrpc_app_cstate app_call_state; /* call state */
108 enum rxrpc_app_estate app_err_state; /* abort/error state */
109 struct list_head app_readyq; /* ordered ready received packet queue */
110 struct list_head app_unreadyq; /* ordered post-hole recv'd packet queue */
111 rxrpc_seq_t app_ready_seq; /* last seq number dropped into readyq */
112 size_t app_ready_qty; /* amount of data ready in readyq */
113 unsigned app_opcode; /* operation ID */
114 unsigned app_abort_code; /* abort code (when aborted) */
115 int app_errno; /* error number (when ICMP error received) */
116
117 /* statisics */
118 unsigned pkt_rcv_count; /* count of received packets on this call */
119 unsigned pkt_snd_count; /* count of sent packets on this call */
120 unsigned app_read_count; /* number of reads issued */
121
122 /* bits for the application to use */
123 rxrpc_call_attn_func_t app_attn_func; /* callback when attention required */
124 rxrpc_call_error_func_t app_error_func; /* callback when abort sent (cleanup and put) */
125 rxrpc_call_aemap_func_t app_aemap_func; /* callback to map abort code to/from errno */
126 void *app_user; /* application data */
127 struct list_head app_link; /* application list linkage */
128 struct list_head app_attn_link; /* application attention list linkage */
129 size_t app_mark; /* trigger callback when app_ready_qty>=app_mark */
130 char app_async_read; /* T if in async-read mode */
131 uint8_t *app_read_buf; /* application async read buffer (app_mark size) */
132 uint8_t *app_scr_alloc; /* application scratch allocation pointer */
133 void *app_scr_ptr; /* application pointer into scratch buffer */
134
135#define RXRPC_APP_MARK_EOF 0xFFFFFFFFU /* mark at end of input */
136
137 /* application scratch buffer */
138 uint8_t app_scratch[0] __attribute__((aligned(sizeof(long))));
139};
140
141#define RXRPC_CALL_SCRATCH_SIZE (PAGE_SIZE - sizeof(struct rxrpc_call))
142
143#define rxrpc_call_reset_scratch(CALL) \
144do { (CALL)->app_scr_alloc = (CALL)->app_scratch; } while(0)
145
146#define rxrpc_call_alloc_scratch(CALL,SIZE) \
147({ \
148 void *ptr; \
149 ptr = (CALL)->app_scr_alloc; \
150 (CALL)->app_scr_alloc += (SIZE); \
151 if ((SIZE)>RXRPC_CALL_SCRATCH_SIZE || \
152 (size_t)((CALL)->app_scr_alloc - (u8*)(CALL)) > RXRPC_CALL_SCRATCH_SIZE) { \
153 printk("rxrpc_call_alloc_scratch(%p,%Zu)\n",(CALL),(size_t)(SIZE)); \
154 BUG(); \
155 } \
156 ptr; \
157})
158
159#define rxrpc_call_alloc_scratch_s(CALL,TYPE) \
160({ \
161 size_t size = sizeof(TYPE); \
162 TYPE *ptr; \
163 ptr = (TYPE*)(CALL)->app_scr_alloc; \
164 (CALL)->app_scr_alloc += size; \
165 if (size>RXRPC_CALL_SCRATCH_SIZE || \
166 (size_t)((CALL)->app_scr_alloc - (u8*)(CALL)) > RXRPC_CALL_SCRATCH_SIZE) { \
167 printk("rxrpc_call_alloc_scratch(%p,%Zu)\n",(CALL),size); \
168 BUG(); \
169 } \
170 ptr; \
171})
172
173#define rxrpc_call_is_ack_pending(CALL) ((CALL)->ackr.reason != 0)
174
175extern int rxrpc_create_call(struct rxrpc_connection *conn,
176 rxrpc_call_attn_func_t attn,
177 rxrpc_call_error_func_t error,
178 rxrpc_call_aemap_func_t aemap,
179 struct rxrpc_call **_call);
180
181extern int rxrpc_incoming_call(struct rxrpc_connection *conn,
182 struct rxrpc_message *msg,
183 struct rxrpc_call **_call);
184
185static inline void rxrpc_get_call(struct rxrpc_call *call)
186{
187 BUG_ON(atomic_read(&call->usage)<=0);
188 atomic_inc(&call->usage);
189 /*printk("rxrpc_get_call(%p{u=%d})\n",(C),atomic_read(&(C)->usage));*/
190}
191
192extern void rxrpc_put_call(struct rxrpc_call *call);
193
194extern void rxrpc_call_do_stuff(struct rxrpc_call *call);
195
196extern int rxrpc_call_abort(struct rxrpc_call *call, int error);
197
198#define RXRPC_CALL_READ_BLOCK 0x0001 /* block if not enough data and not yet EOF */
199#define RXRPC_CALL_READ_ALL 0x0002 /* error if insufficient data received */
200extern int rxrpc_call_read_data(struct rxrpc_call *call, void *buffer, size_t size, int flags);
201
202extern int rxrpc_call_write_data(struct rxrpc_call *call,
203 size_t sioc,
204 struct kvec *siov,
205 uint8_t rxhdr_flags,
206 gfp_t alloc_flags,
207 int dup_data,
208 size_t *size_sent);
209
210extern void rxrpc_call_handle_error(struct rxrpc_call *conn, int local, int errno);
211
212#endif /* _LINUX_RXRPC_CALL_H */
diff --git a/include/rxrpc/connection.h b/include/rxrpc/connection.h
deleted file mode 100644
index 41e6781ad067..000000000000
--- a/include/rxrpc/connection.h
+++ /dev/null
@@ -1,83 +0,0 @@
1/* connection.h: Rx connection record
2 *
3 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#ifndef _LINUX_RXRPC_CONNECTION_H
13#define _LINUX_RXRPC_CONNECTION_H
14
15#include <rxrpc/types.h>
16#include <rxrpc/krxtimod.h>
17
18struct sk_buff;
19
20/*****************************************************************************/
21/*
22 * Rx connection
23 * - connections are matched by (rmt_port,rmt_addr,service_id,conn_id,clientflag)
24 * - connections only retain a refcount on the peer when they are active
25 * - connections with refcount==0 are inactive and reside in the peer's graveyard
26 */
27struct rxrpc_connection
28{
29 atomic_t usage;
30 struct rxrpc_transport *trans; /* transport endpoint */
31 struct rxrpc_peer *peer; /* peer from/to which connected */
32 struct rxrpc_service *service; /* responsible service (inbound conns) */
33 struct rxrpc_timer timeout; /* decaching timer */
34 struct list_head link; /* link in peer's list */
35 struct list_head proc_link; /* link in proc list */
36 struct list_head err_link; /* link in ICMP error processing list */
37 struct list_head id_link; /* link in ID grant list */
38 struct sockaddr_in addr; /* remote address */
39 struct rxrpc_call *channels[4]; /* channels (active calls) */
40 wait_queue_head_t chanwait; /* wait for channel to become available */
41 spinlock_t lock; /* access lock */
42 struct timeval atime; /* last access time */
43 size_t mtu_size; /* MTU size for outbound messages */
44 unsigned call_counter; /* call ID counter */
45 rxrpc_serial_t serial_counter; /* packet serial number counter */
46
47 /* the following should all be in net order */
48 __be32 in_epoch; /* peer's epoch */
49 __be32 out_epoch; /* my epoch */
50 __be32 conn_id; /* connection ID, appropriately shifted */
51 __be16 service_id; /* service ID */
52 uint8_t security_ix; /* security ID */
53 uint8_t in_clientflag; /* RXRPC_CLIENT_INITIATED if we are server */
54 uint8_t out_clientflag; /* RXRPC_CLIENT_INITIATED if we are client */
55};
56
57extern int rxrpc_create_connection(struct rxrpc_transport *trans,
58 __be16 port,
59 __be32 addr,
60 uint16_t service_id,
61 void *security,
62 struct rxrpc_connection **_conn);
63
64extern int rxrpc_connection_lookup(struct rxrpc_peer *peer,
65 struct rxrpc_message *msg,
66 struct rxrpc_connection **_conn);
67
68static inline void rxrpc_get_connection(struct rxrpc_connection *conn)
69{
70 BUG_ON(atomic_read(&conn->usage)<0);
71 atomic_inc(&conn->usage);
72 //printk("rxrpc_get_conn(%p{u=%d})\n",conn,atomic_read(&conn->usage));
73}
74
75extern void rxrpc_put_connection(struct rxrpc_connection *conn);
76
77extern int rxrpc_conn_receive_call_packet(struct rxrpc_connection *conn,
78 struct rxrpc_call *call,
79 struct rxrpc_message *msg);
80
81extern void rxrpc_conn_handle_error(struct rxrpc_connection *conn, int local, int errno);
82
83#endif /* _LINUX_RXRPC_CONNECTION_H */
diff --git a/include/rxrpc/krxiod.h b/include/rxrpc/krxiod.h
deleted file mode 100644
index c0e0e82e4df2..000000000000
--- a/include/rxrpc/krxiod.h
+++ /dev/null
@@ -1,27 +0,0 @@
1/* krxiod.h: Rx RPC I/O kernel thread interface
2 *
3 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#ifndef _LINUX_RXRPC_KRXIOD_H
13#define _LINUX_RXRPC_KRXIOD_H
14
15#include <rxrpc/types.h>
16
17extern int rxrpc_krxiod_init(void);
18extern void rxrpc_krxiod_kill(void);
19extern void rxrpc_krxiod_queue_transport(struct rxrpc_transport *trans);
20extern void rxrpc_krxiod_dequeue_transport(struct rxrpc_transport *trans);
21extern void rxrpc_krxiod_queue_peer(struct rxrpc_peer *peer);
22extern void rxrpc_krxiod_dequeue_peer(struct rxrpc_peer *peer);
23extern void rxrpc_krxiod_clear_peers(struct rxrpc_transport *trans);
24extern void rxrpc_krxiod_queue_call(struct rxrpc_call *call);
25extern void rxrpc_krxiod_dequeue_call(struct rxrpc_call *call);
26
27#endif /* _LINUX_RXRPC_KRXIOD_H */
diff --git a/include/rxrpc/krxsecd.h b/include/rxrpc/krxsecd.h
deleted file mode 100644
index 55ce43a25b38..000000000000
--- a/include/rxrpc/krxsecd.h
+++ /dev/null
@@ -1,22 +0,0 @@
1/* krxsecd.h: Rx RPC security kernel thread interface
2 *
3 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#ifndef _LINUX_RXRPC_KRXSECD_H
13#define _LINUX_RXRPC_KRXSECD_H
14
15#include <rxrpc/types.h>
16
17extern int rxrpc_krxsecd_init(void);
18extern void rxrpc_krxsecd_kill(void);
19extern void rxrpc_krxsecd_clear_transport(struct rxrpc_transport *trans);
20extern void rxrpc_krxsecd_queue_incoming_call(struct rxrpc_message *msg);
21
22#endif /* _LINUX_RXRPC_KRXSECD_H */
diff --git a/include/rxrpc/krxtimod.h b/include/rxrpc/krxtimod.h
deleted file mode 100644
index b3d298b612f2..000000000000
--- a/include/rxrpc/krxtimod.h
+++ /dev/null
@@ -1,45 +0,0 @@
1/* krxtimod.h: RxRPC timeout daemon
2 *
3 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#ifndef _LINUX_RXRPC_KRXTIMOD_H
13#define _LINUX_RXRPC_KRXTIMOD_H
14
15#include <rxrpc/types.h>
16
17struct rxrpc_timer_ops {
18 /* called when the front of the timer queue has timed out */
19 void (*timed_out)(struct rxrpc_timer *timer);
20};
21
22/*****************************************************************************/
23/*
24 * RXRPC timer/timeout record
25 */
26struct rxrpc_timer
27{
28 struct list_head link; /* link in timer queue */
29 unsigned long timo_jif; /* timeout time */
30 const struct rxrpc_timer_ops *ops; /* timeout expiry function */
31};
32
33static inline void rxrpc_timer_init(rxrpc_timer_t *timer, const struct rxrpc_timer_ops *ops)
34{
35 INIT_LIST_HEAD(&timer->link);
36 timer->ops = ops;
37}
38
39extern int rxrpc_krxtimod_start(void);
40extern void rxrpc_krxtimod_kill(void);
41
42extern void rxrpc_krxtimod_add_timer(rxrpc_timer_t *timer, unsigned long timeout);
43extern int rxrpc_krxtimod_del_timer(rxrpc_timer_t *timer);
44
45#endif /* _LINUX_RXRPC_KRXTIMOD_H */
diff --git a/include/rxrpc/message.h b/include/rxrpc/message.h
deleted file mode 100644
index b318f273d4f2..000000000000
--- a/include/rxrpc/message.h
+++ /dev/null
@@ -1,71 +0,0 @@
1/* message.h: Rx message caching
2 *
3 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#ifndef _LINUX_RXRPC_MESSAGE_H
13#define _LINUX_RXRPC_MESSAGE_H
14
15#include <rxrpc/packet.h>
16
17/*****************************************************************************/
18/*
19 * Rx message record
20 */
21struct rxrpc_message
22{
23 atomic_t usage;
24 struct list_head link; /* list link */
25 struct timeval stamp; /* time received or last sent */
26 rxrpc_seq_t seq; /* message sequence number */
27
28 int state; /* the state the message is currently in */
29#define RXRPC_MSG_PREPARED 0
30#define RXRPC_MSG_SENT 1
31#define RXRPC_MSG_ACKED 2 /* provisionally ACK'd */
32#define RXRPC_MSG_DONE 3 /* definitively ACK'd (msg->seq<ack.firstPacket) */
33#define RXRPC_MSG_RECEIVED 4
34#define RXRPC_MSG_ERROR -1
35 char rttdone; /* used for RTT */
36
37 struct rxrpc_transport *trans; /* transport received through */
38 struct rxrpc_connection *conn; /* connection received over */
39 struct sk_buff *pkt; /* received packet */
40 off_t offset; /* offset into pkt of next byte of data */
41
42 struct rxrpc_header hdr; /* message header */
43
44 int dcount; /* data part count */
45 size_t dsize; /* data size */
46#define RXRPC_MSG_MAX_IOCS 8
47 struct kvec data[RXRPC_MSG_MAX_IOCS]; /* message data */
48 unsigned long dfree; /* bit mask indicating kfree(data[x]) if T */
49};
50
51#define rxrpc_get_message(M) do { atomic_inc(&(M)->usage); } while(0)
52
53extern void __rxrpc_put_message(struct rxrpc_message *msg);
54static inline void rxrpc_put_message(struct rxrpc_message *msg)
55{
56 BUG_ON(atomic_read(&msg->usage)<=0);
57 if (atomic_dec_and_test(&msg->usage))
58 __rxrpc_put_message(msg);
59}
60
61extern int rxrpc_conn_newmsg(struct rxrpc_connection *conn,
62 struct rxrpc_call *call,
63 uint8_t type,
64 int count,
65 struct kvec *diov,
66 gfp_t alloc_flags,
67 struct rxrpc_message **_msg);
68
69extern int rxrpc_conn_sendmsg(struct rxrpc_connection *conn, struct rxrpc_message *msg);
70
71#endif /* _LINUX_RXRPC_MESSAGE_H */
diff --git a/include/rxrpc/packet.h b/include/rxrpc/packet.h
index 1447f0aaa0eb..b69e6e173ea1 100644
--- a/include/rxrpc/packet.h
+++ b/include/rxrpc/packet.h
@@ -1,6 +1,6 @@
1/* packet.h: Rx packet layout and definitions 1/* packet.h: Rx packet layout and definitions
2 * 2 *
3 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved. 3 * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com) 4 * Written by David Howells (dhowells@redhat.com)
5 * 5 *
6 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
@@ -12,28 +12,25 @@
12#ifndef _LINUX_RXRPC_PACKET_H 12#ifndef _LINUX_RXRPC_PACKET_H
13#define _LINUX_RXRPC_PACKET_H 13#define _LINUX_RXRPC_PACKET_H
14 14
15#include <rxrpc/types.h> 15typedef u32 rxrpc_seq_t; /* Rx message sequence number */
16 16typedef u32 rxrpc_serial_t; /* Rx message serial number */
17#define RXRPC_IPUDP_SIZE 28 17typedef __be32 rxrpc_seq_net_t; /* on-the-wire Rx message sequence number */
18extern size_t RXRPC_MAX_PACKET_SIZE; 18typedef __be32 rxrpc_serial_net_t; /* on-the-wire Rx message serial number */
19#define RXRPC_MAX_PACKET_DATA_SIZE (RXRPC_MAX_PACKET_SIZE - sizeof(struct rxrpc_header))
20#define RXRPC_LOCAL_PACKET_SIZE RXRPC_MAX_PACKET_SIZE
21#define RXRPC_REMOTE_PACKET_SIZE (576 - RXRPC_IPUDP_SIZE)
22 19
23/*****************************************************************************/ 20/*****************************************************************************/
24/* 21/*
25 * on-the-wire Rx packet header 22 * on-the-wire Rx packet header
26 * - all multibyte fields should be in network byte order 23 * - all multibyte fields should be in network byte order
27 */ 24 */
28struct rxrpc_header 25struct rxrpc_header {
29{
30 __be32 epoch; /* client boot timestamp */ 26 __be32 epoch; /* client boot timestamp */
31 27
32 __be32 cid; /* connection and channel ID */ 28 __be32 cid; /* connection and channel ID */
33#define RXRPC_MAXCALLS 4 /* max active calls per conn */ 29#define RXRPC_MAXCALLS 4 /* max active calls per conn */
34#define RXRPC_CHANNELMASK (RXRPC_MAXCALLS-1) /* mask for channel ID */ 30#define RXRPC_CHANNELMASK (RXRPC_MAXCALLS-1) /* mask for channel ID */
35#define RXRPC_CIDMASK (~RXRPC_CHANNELMASK) /* mask for connection ID */ 31#define RXRPC_CIDMASK (~RXRPC_CHANNELMASK) /* mask for connection ID */
36#define RXRPC_CIDSHIFT 2 /* shift for connection ID */ 32#define RXRPC_CIDSHIFT ilog2(RXRPC_MAXCALLS) /* shift for connection ID */
33#define RXRPC_CID_INC (1 << RXRPC_CIDSHIFT) /* connection ID increment */
37 34
38 __be32 callNumber; /* call ID (0 for connection-level packets) */ 35 __be32 callNumber; /* call ID (0 for connection-level packets) */
39#define RXRPC_PROCESS_MAXCALLS (1<<2) /* maximum number of active calls per conn (power of 2) */ 36#define RXRPC_PROCESS_MAXCALLS (1<<2) /* maximum number of active calls per conn (power of 2) */
@@ -62,7 +59,10 @@ struct rxrpc_header
62 59
63 uint8_t userStatus; /* app-layer defined status */ 60 uint8_t userStatus; /* app-layer defined status */
64 uint8_t securityIndex; /* security protocol ID */ 61 uint8_t securityIndex; /* security protocol ID */
65 __be16 _rsvd; /* reserved (used by kerberos security as cksum) */ 62 union {
63 __be16 _rsvd; /* reserved */
64 __be16 cksum; /* kerberos security checksum */
65 };
66 __be16 serviceId; /* service ID */ 66 __be16 serviceId; /* service ID */
67 67
68} __attribute__((packed)); 68} __attribute__((packed));
@@ -81,8 +81,7 @@ extern const char *rxrpc_pkts[];
81 * - new__rsvd = j__rsvd 81 * - new__rsvd = j__rsvd
82 * - duplicating all other fields 82 * - duplicating all other fields
83 */ 83 */
84struct rxrpc_jumbo_header 84struct rxrpc_jumbo_header {
85{
86 uint8_t flags; /* packet flags (as per rxrpc_header) */ 85 uint8_t flags; /* packet flags (as per rxrpc_header) */
87 uint8_t pad; 86 uint8_t pad;
88 __be16 _rsvd; /* reserved (used by kerberos security as cksum) */ 87 __be16 _rsvd; /* reserved (used by kerberos security as cksum) */
@@ -95,8 +94,7 @@ struct rxrpc_jumbo_header
95 * on-the-wire Rx ACK packet data payload 94 * on-the-wire Rx ACK packet data payload
96 * - all multibyte fields should be in network byte order 95 * - all multibyte fields should be in network byte order
97 */ 96 */
98struct rxrpc_ackpacket 97struct rxrpc_ackpacket {
99{
100 __be16 bufferSpace; /* number of packet buffers available */ 98 __be16 bufferSpace; /* number of packet buffers available */
101 __be16 maxSkew; /* diff between serno being ACK'd and highest serial no 99 __be16 maxSkew; /* diff between serno being ACK'd and highest serial no
102 * received */ 100 * received */
@@ -124,4 +122,93 @@ struct rxrpc_ackpacket
124 122
125} __attribute__((packed)); 123} __attribute__((packed));
126 124
125/*
126 * ACK packets can have a further piece of information tagged on the end
127 */
128struct rxrpc_ackinfo {
129 __be32 rxMTU; /* maximum Rx MTU size (bytes) [AFS 3.3] */
130 __be32 maxMTU; /* maximum interface MTU size (bytes) [AFS 3.3] */
131 __be32 rwind; /* Rx window size (packets) [AFS 3.4] */
132 __be32 jumbo_max; /* max packets to stick into a jumbo packet [AFS 3.5] */
133};
134
135/*****************************************************************************/
136/*
137 * Kerberos security type-2 challenge packet
138 */
139struct rxkad_challenge {
140 __be32 version; /* version of this challenge type */
141 __be32 nonce; /* encrypted random number */
142 __be32 min_level; /* minimum security level */
143 __be32 __padding; /* padding to 8-byte boundary */
144} __attribute__((packed));
145
146/*****************************************************************************/
147/*
148 * Kerberos security type-2 response packet
149 */
150struct rxkad_response {
151 __be32 version; /* version of this reponse type */
152 __be32 __pad;
153
154 /* encrypted bit of the response */
155 struct {
156 __be32 epoch; /* current epoch */
157 __be32 cid; /* parent connection ID */
158 __be32 checksum; /* checksum */
159 __be32 securityIndex; /* security type */
160 __be32 call_id[4]; /* encrypted call IDs */
161 __be32 inc_nonce; /* challenge nonce + 1 */
162 __be32 level; /* desired level */
163 } encrypted;
164
165 __be32 kvno; /* Kerberos key version number */
166 __be32 ticket_len; /* Kerberos ticket length */
167} __attribute__((packed));
168
169/*****************************************************************************/
170/*
171 * RxRPC-level abort codes
172 */
173#define RX_CALL_DEAD -1 /* call/conn has been inactive and is shut down */
174#define RX_INVALID_OPERATION -2 /* invalid operation requested / attempted */
175#define RX_CALL_TIMEOUT -3 /* call timeout exceeded */
176#define RX_EOF -4 /* unexpected end of data on read op */
177#define RX_PROTOCOL_ERROR -5 /* low-level protocol error */
178#define RX_USER_ABORT -6 /* generic user abort */
179#define RX_ADDRINUSE -7 /* UDP port in use */
180#define RX_DEBUGI_BADTYPE -8 /* bad debugging packet type */
181
182/*
183 * (un)marshalling abort codes (rxgen)
184 */
185#define RXGEN_CC_MARSHAL -450
186#define RXGEN_CC_UNMARSHAL -451
187#define RXGEN_SS_MARSHAL -452
188#define RXGEN_SS_UNMARSHAL -453
189#define RXGEN_DECODE -454
190#define RXGEN_OPCODE -455
191#define RXGEN_SS_XDRFREE -456
192#define RXGEN_CC_XDRFREE -457
193
194/*
195 * Rx kerberos security abort codes
196 * - unfortunately we have no generalised security abort codes to say things
197 * like "unsupported security", so we have to use these instead and hope the
198 * other side understands
199 */
200#define RXKADINCONSISTENCY 19270400 /* security module structure inconsistent */
201#define RXKADPACKETSHORT 19270401 /* packet too short for security challenge */
202#define RXKADLEVELFAIL 19270402 /* security level negotiation failed */
203#define RXKADTICKETLEN 19270403 /* ticket length too short or too long */
204#define RXKADOUTOFSEQUENCE 19270404 /* packet had bad sequence number */
205#define RXKADNOAUTH 19270405 /* caller not authorised */
206#define RXKADBADKEY 19270406 /* illegal key: bad parity or weak */
207#define RXKADBADTICKET 19270407 /* security object was passed a bad ticket */
208#define RXKADUNKNOWNKEY 19270408 /* ticket contained unknown key version number */
209#define RXKADEXPIRED 19270409 /* authentication expired */
210#define RXKADSEALEDINCON 19270410 /* sealed data inconsistent */
211#define RXKADDATALEN 19270411 /* user data too long */
212#define RXKADILLEGALLEVEL 19270412 /* caller not authorised to use encrypted conns */
213
127#endif /* _LINUX_RXRPC_PACKET_H */ 214#endif /* _LINUX_RXRPC_PACKET_H */
diff --git a/include/rxrpc/peer.h b/include/rxrpc/peer.h
deleted file mode 100644
index 8b8fe97cbbcc..000000000000
--- a/include/rxrpc/peer.h
+++ /dev/null
@@ -1,82 +0,0 @@
1/* peer.h: Rx RPC per-transport peer record
2 *
3 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#ifndef _LINUX_RXRPC_PEER_H
13#define _LINUX_RXRPC_PEER_H
14
15#include <linux/wait.h>
16#include <rxrpc/types.h>
17#include <rxrpc/krxtimod.h>
18
19struct rxrpc_peer_ops
20{
21 /* peer record being added */
22 int (*adding)(struct rxrpc_peer *peer);
23
24 /* peer record being discarded from graveyard */
25 void (*discarding)(struct rxrpc_peer *peer);
26
27 /* change of epoch detected on connection */
28 void (*change_of_epoch)(struct rxrpc_connection *conn);
29};
30
31/*****************************************************************************/
32/*
33 * Rx RPC per-transport peer record
34 * - peers only retain a refcount on the transport when they are active
35 * - peers with refcount==0 are inactive and reside in the transport's graveyard
36 */
37struct rxrpc_peer
38{
39 atomic_t usage;
40 struct rxrpc_peer_ops *ops; /* operations on this peer */
41 struct rxrpc_transport *trans; /* owner transport */
42 struct rxrpc_timer timeout; /* timeout for grave destruction */
43 struct list_head link; /* link in transport's peer list */
44 struct list_head proc_link; /* link in /proc list */
45 rwlock_t conn_idlock; /* lock for connection IDs */
46 struct list_head conn_idlist; /* list of connections granted IDs */
47 uint32_t conn_idcounter; /* connection ID counter */
48 rwlock_t conn_lock; /* lock for active/dead connections */
49 struct list_head conn_active; /* active connections to/from this peer */
50 struct list_head conn_graveyard; /* graveyard for inactive connections */
51 spinlock_t conn_gylock; /* lock for conn_graveyard */
52 wait_queue_head_t conn_gy_waitq; /* wait queue hit when graveyard is empty */
53 atomic_t conn_count; /* number of attached connections */
54 struct in_addr addr; /* remote address */
55 size_t if_mtu; /* interface MTU for this peer */
56 spinlock_t lock; /* access lock */
57
58 void *user; /* application layer data */
59
60 /* calculated RTT cache */
61#define RXRPC_RTT_CACHE_SIZE 32
62 suseconds_t rtt; /* current RTT estimate (in uS) */
63 unsigned rtt_point; /* next entry at which to insert */
64 unsigned rtt_usage; /* amount of cache actually used */
65 suseconds_t rtt_cache[RXRPC_RTT_CACHE_SIZE]; /* calculated RTT cache */
66};
67
68
69extern int rxrpc_peer_lookup(struct rxrpc_transport *trans,
70 __be32 addr,
71 struct rxrpc_peer **_peer);
72
73static inline void rxrpc_get_peer(struct rxrpc_peer *peer)
74{
75 BUG_ON(atomic_read(&peer->usage)<0);
76 atomic_inc(&peer->usage);
77 //printk("rxrpc_get_peer(%p{u=%d})\n",peer,atomic_read(&peer->usage));
78}
79
80extern void rxrpc_put_peer(struct rxrpc_peer *peer);
81
82#endif /* _LINUX_RXRPC_PEER_H */
diff --git a/include/rxrpc/rxrpc.h b/include/rxrpc/rxrpc.h
deleted file mode 100644
index 8d9874cef991..000000000000
--- a/include/rxrpc/rxrpc.h
+++ /dev/null
@@ -1,36 +0,0 @@
1/* rx.h: Rx RPC interface
2 *
3 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#ifndef _LINUX_RXRPC_RXRPC_H
13#define _LINUX_RXRPC_RXRPC_H
14
15#ifdef __KERNEL__
16
17extern __be32 rxrpc_epoch;
18
19#ifdef CONFIG_SYSCTL
20extern int rxrpc_ktrace;
21extern int rxrpc_kdebug;
22extern int rxrpc_kproto;
23extern int rxrpc_knet;
24#else
25#define rxrpc_ktrace 0
26#define rxrpc_kdebug 0
27#define rxrpc_kproto 0
28#define rxrpc_knet 0
29#endif
30
31extern int rxrpc_sysctl_init(void);
32extern void rxrpc_sysctl_cleanup(void);
33
34#endif /* __KERNEL__ */
35
36#endif /* _LINUX_RXRPC_RXRPC_H */
diff --git a/include/rxrpc/transport.h b/include/rxrpc/transport.h
deleted file mode 100644
index 7c7b9683fa39..000000000000
--- a/include/rxrpc/transport.h
+++ /dev/null
@@ -1,106 +0,0 @@
1/* transport.h: Rx transport management
2 *
3 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#ifndef _LINUX_RXRPC_TRANSPORT_H
13#define _LINUX_RXRPC_TRANSPORT_H
14
15#include <rxrpc/types.h>
16#include <rxrpc/krxiod.h>
17#include <rxrpc/rxrpc.h>
18#include <linux/skbuff.h>
19#include <linux/rwsem.h>
20
21typedef int (*rxrpc_newcall_fnx_t)(struct rxrpc_call *call);
22
23extern wait_queue_head_t rxrpc_krxiod_wq;
24
25/*****************************************************************************/
26/*
27 * Rx operation specification
28 * - tables of these must be sorted by op ID so that they can be binary-chop searched
29 */
30struct rxrpc_operation
31{
32 unsigned id; /* operation ID */
33 size_t asize; /* minimum size of argument block */
34 const char *name; /* name of operation */
35 void *user; /* initial user data */
36};
37
38/*****************************************************************************/
39/*
40 * Rx transport service record
41 */
42struct rxrpc_service
43{
44 struct list_head link; /* link in services list on transport */
45 struct module *owner; /* owner module */
46 rxrpc_newcall_fnx_t new_call; /* new call handler function */
47 const char *name; /* name of service */
48 unsigned short service_id; /* Rx service ID */
49 rxrpc_call_attn_func_t attn_func; /* call requires attention callback */
50 rxrpc_call_error_func_t error_func; /* call error callback */
51 rxrpc_call_aemap_func_t aemap_func; /* abort -> errno mapping callback */
52
53 const struct rxrpc_operation *ops_begin; /* beginning of operations table */
54 const struct rxrpc_operation *ops_end; /* end of operations table */
55};
56
57/*****************************************************************************/
58/*
59 * Rx transport endpoint record
60 */
61struct rxrpc_transport
62{
63 atomic_t usage;
64 struct socket *socket; /* my UDP socket */
65 struct list_head services; /* services listening on this socket */
66 struct list_head link; /* link in transport list */
67 struct list_head proc_link; /* link in transport proc list */
68 struct list_head krxiodq_link; /* krxiod attention queue link */
69 spinlock_t lock; /* access lock */
70 struct list_head peer_active; /* active peers connected to over this socket */
71 struct list_head peer_graveyard; /* inactive peer list */
72 spinlock_t peer_gylock; /* peer graveyard lock */
73 wait_queue_head_t peer_gy_waitq; /* wait queue hit when peer graveyard is empty */
74 rwlock_t peer_lock; /* peer list access lock */
75 atomic_t peer_count; /* number of peers */
76 struct rxrpc_peer_ops *peer_ops; /* default peer operations */
77 unsigned short port; /* port upon which listening */
78 volatile char error_rcvd; /* T if received ICMP error outstanding */
79};
80
81extern int rxrpc_create_transport(unsigned short port,
82 struct rxrpc_transport **_trans);
83
84static inline void rxrpc_get_transport(struct rxrpc_transport *trans)
85{
86 BUG_ON(atomic_read(&trans->usage) <= 0);
87 atomic_inc(&trans->usage);
88 //printk("rxrpc_get_transport(%p{u=%d})\n",
89 // trans, atomic_read(&trans->usage));
90}
91
92extern void rxrpc_put_transport(struct rxrpc_transport *trans);
93
94extern int rxrpc_add_service(struct rxrpc_transport *trans,
95 struct rxrpc_service *srv);
96
97extern void rxrpc_del_service(struct rxrpc_transport *trans,
98 struct rxrpc_service *srv);
99
100extern void rxrpc_trans_receive_packet(struct rxrpc_transport *trans);
101
102extern int rxrpc_trans_immediate_abort(struct rxrpc_transport *trans,
103 struct rxrpc_message *msg,
104 int error);
105
106#endif /* _LINUX_RXRPC_TRANSPORT_H */
diff --git a/kernel/audit.c b/kernel/audit.c
index 76c9a11b72d6..4e9d20829681 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -151,7 +151,7 @@ struct audit_buffer {
151 151
152static void audit_set_pid(struct audit_buffer *ab, pid_t pid) 152static void audit_set_pid(struct audit_buffer *ab, pid_t pid)
153{ 153{
154 struct nlmsghdr *nlh = (struct nlmsghdr *)ab->skb->data; 154 struct nlmsghdr *nlh = nlmsg_hdr(ab->skb);
155 nlh->nlmsg_pid = pid; 155 nlh->nlmsg_pid = pid;
156} 156}
157 157
@@ -750,7 +750,7 @@ static void audit_receive_skb(struct sk_buff *skb)
750 u32 rlen; 750 u32 rlen;
751 751
752 while (skb->len >= NLMSG_SPACE(0)) { 752 while (skb->len >= NLMSG_SPACE(0)) {
753 nlh = (struct nlmsghdr *)skb->data; 753 nlh = nlmsg_hdr(skb);
754 if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len) 754 if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len)
755 return; 755 return;
756 rlen = NLMSG_ALIGN(nlh->nlmsg_len); 756 rlen = NLMSG_ALIGN(nlh->nlmsg_len);
@@ -795,7 +795,7 @@ static int __init audit_init(void)
795 printk(KERN_INFO "audit: initializing netlink socket (%s)\n", 795 printk(KERN_INFO "audit: initializing netlink socket (%s)\n",
796 audit_default ? "enabled" : "disabled"); 796 audit_default ? "enabled" : "disabled");
797 audit_sock = netlink_kernel_create(NETLINK_AUDIT, 0, audit_receive, 797 audit_sock = netlink_kernel_create(NETLINK_AUDIT, 0, audit_receive,
798 THIS_MODULE); 798 NULL, THIS_MODULE);
799 if (!audit_sock) 799 if (!audit_sock)
800 audit_panic("cannot initialize netlink socket"); 800 audit_panic("cannot initialize netlink socket");
801 else 801 else
@@ -1073,7 +1073,7 @@ static void audit_log_vformat(struct audit_buffer *ab, const char *fmt,
1073 goto out; 1073 goto out;
1074 } 1074 }
1075 va_copy(args2, args); 1075 va_copy(args2, args);
1076 len = vsnprintf(skb->tail, avail, fmt, args); 1076 len = vsnprintf(skb_tail_pointer(skb), avail, fmt, args);
1077 if (len >= avail) { 1077 if (len >= avail) {
1078 /* The printk buffer is 1024 bytes long, so if we get 1078 /* The printk buffer is 1024 bytes long, so if we get
1079 * here and AUDIT_BUFSIZ is at least 1024, then we can 1079 * here and AUDIT_BUFSIZ is at least 1024, then we can
@@ -1082,7 +1082,7 @@ static void audit_log_vformat(struct audit_buffer *ab, const char *fmt,
1082 max_t(unsigned, AUDIT_BUFSIZ, 1+len-avail)); 1082 max_t(unsigned, AUDIT_BUFSIZ, 1+len-avail));
1083 if (!avail) 1083 if (!avail)
1084 goto out; 1084 goto out;
1085 len = vsnprintf(skb->tail, avail, fmt, args2); 1085 len = vsnprintf(skb_tail_pointer(skb), avail, fmt, args2);
1086 } 1086 }
1087 if (len > 0) 1087 if (len > 0)
1088 skb_put(skb, len); 1088 skb_put(skb, len);
@@ -1143,7 +1143,7 @@ void audit_log_hex(struct audit_buffer *ab, const unsigned char *buf,
1143 return; 1143 return;
1144 } 1144 }
1145 1145
1146 ptr = skb->tail; 1146 ptr = skb_tail_pointer(skb);
1147 for (i=0; i<len; i++) { 1147 for (i=0; i<len; i++) {
1148 *ptr++ = hex[(buf[i] & 0xF0)>>4]; /* Upper nibble */ 1148 *ptr++ = hex[(buf[i] & 0xF0)>>4]; /* Upper nibble */
1149 *ptr++ = hex[buf[i] & 0x0F]; /* Lower nibble */ 1149 *ptr++ = hex[buf[i] & 0x0F]; /* Lower nibble */
@@ -1175,7 +1175,7 @@ static void audit_log_n_string(struct audit_buffer *ab, size_t slen,
1175 if (!avail) 1175 if (!avail)
1176 return; 1176 return;
1177 } 1177 }
1178 ptr = skb->tail; 1178 ptr = skb_tail_pointer(skb);
1179 *ptr++ = '"'; 1179 *ptr++ = '"';
1180 memcpy(ptr, string, slen); 1180 memcpy(ptr, string, slen);
1181 ptr += slen; 1181 ptr += slen;
@@ -1268,7 +1268,7 @@ void audit_log_end(struct audit_buffer *ab)
1268 audit_log_lost("rate limit exceeded"); 1268 audit_log_lost("rate limit exceeded");
1269 } else { 1269 } else {
1270 if (audit_pid) { 1270 if (audit_pid) {
1271 struct nlmsghdr *nlh = (struct nlmsghdr *)ab->skb->data; 1271 struct nlmsghdr *nlh = nlmsg_hdr(ab->skb);
1272 nlh->nlmsg_len = ab->skb->len - NLMSG_SPACE(0); 1272 nlh->nlmsg_len = ab->skb->len - NLMSG_SPACE(0);
1273 skb_queue_tail(&audit_skb_queue, ab->skb); 1273 skb_queue_tail(&audit_skb_queue, ab->skb);
1274 ab->skb = NULL; 1274 ab->skb = NULL;
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index b74860aaf5f1..f5cfde8c9025 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -59,6 +59,7 @@ ktime_t ktime_get(void)
59 59
60 return timespec_to_ktime(now); 60 return timespec_to_ktime(now);
61} 61}
62EXPORT_SYMBOL_GPL(ktime_get);
62 63
63/** 64/**
64 * ktime_get_real - get the real (wall-) time in ktime_t format 65 * ktime_get_real - get the real (wall-) time in ktime_t format
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 4c3476fa058d..ad7d2392cb0e 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -102,7 +102,7 @@ static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp,
102 */ 102 */
103static int send_reply(struct sk_buff *skb, pid_t pid) 103static int send_reply(struct sk_buff *skb, pid_t pid)
104{ 104{
105 struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data); 105 struct genlmsghdr *genlhdr = nlmsg_data(nlmsg_hdr(skb));
106 void *reply = genlmsg_data(genlhdr); 106 void *reply = genlmsg_data(genlhdr);
107 int rc; 107 int rc;
108 108
@@ -121,7 +121,7 @@ static int send_reply(struct sk_buff *skb, pid_t pid)
121static void send_cpu_listeners(struct sk_buff *skb, 121static void send_cpu_listeners(struct sk_buff *skb,
122 struct listener_list *listeners) 122 struct listener_list *listeners)
123{ 123{
124 struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data); 124 struct genlmsghdr *genlhdr = nlmsg_data(nlmsg_hdr(skb));
125 struct listener *s, *tmp; 125 struct listener *s, *tmp;
126 struct sk_buff *skb_next, *skb_cur = skb; 126 struct sk_buff *skb_next, *skb_cur = skb;
127 void *reply = genlmsg_data(genlhdr); 127 void *reply = genlmsg_data(genlhdr);
diff --git a/kernel/time.c b/kernel/time.c
index 2f47888e46c9..ba18ec4899bd 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -452,6 +452,7 @@ struct timespec ns_to_timespec(const s64 nsec)
452 452
453 return ts; 453 return ts;
454} 454}
455EXPORT_SYMBOL(ns_to_timespec);
455 456
456/** 457/**
457 * ns_to_timeval - Convert nanoseconds to timeval 458 * ns_to_timeval - Convert nanoseconds to timeval
@@ -469,6 +470,7 @@ struct timeval ns_to_timeval(const s64 nsec)
469 470
470 return tv; 471 return tv;
471} 472}
473EXPORT_SYMBOL(ns_to_timeval);
472 474
473/* 475/*
474 * Convert jiffies to milliseconds and back. 476 * Convert jiffies to milliseconds and back.
diff --git a/kernel/timer.c b/kernel/timer.c
index dd6c2c1c561b..b22bd39740dd 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -505,6 +505,8 @@ out:
505 return ret; 505 return ret;
506} 506}
507 507
508EXPORT_SYMBOL(try_to_del_timer_sync);
509
508/** 510/**
509 * del_timer_sync - deactivate a timer and wait for the handler to finish. 511 * del_timer_sync - deactivate a timer and wait for the handler to finish.
510 * @timer: the timer to be deactivated 512 * @timer: the timer to be deactivated
diff --git a/lib/Makefile b/lib/Makefile
index 992a39ef9ffd..ae57f357fec0 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -4,7 +4,7 @@
4 4
5lib-y := ctype.o string.o vsprintf.o cmdline.o \ 5lib-y := ctype.o string.o vsprintf.o cmdline.o \
6 rbtree.o radix-tree.o dump_stack.o \ 6 rbtree.o radix-tree.o dump_stack.o \
7 idr.o div64.o int_sqrt.o bitmap.o extable.o prio_tree.o \ 7 idr.o int_sqrt.o bitmap.o extable.o prio_tree.o \
8 sha1.o irq_regs.o reciprocal_div.o 8 sha1.o irq_regs.o reciprocal_div.o
9 9
10lib-$(CONFIG_MMU) += ioremap.o 10lib-$(CONFIG_MMU) += ioremap.o
@@ -12,7 +12,8 @@ lib-$(CONFIG_SMP) += cpumask.o
12 12
13lib-y += kobject.o kref.o kobject_uevent.o klist.o 13lib-y += kobject.o kref.o kobject_uevent.o klist.o
14 14
15obj-y += sort.o parser.o halfmd4.o debug_locks.o random32.o bust_spinlocks.o 15obj-y += div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
16 bust_spinlocks.o
16 17
17ifeq ($(CONFIG_DEBUG_KOBJECT),y) 18ifeq ($(CONFIG_DEBUG_KOBJECT),y)
18CFLAGS_kobject.o += -DDEBUG 19CFLAGS_kobject.o += -DDEBUG
diff --git a/lib/div64.c b/lib/div64.c
index 365719f84832..b71cf93c529a 100644
--- a/lib/div64.c
+++ b/lib/div64.c
@@ -23,7 +23,7 @@
23/* Not needed on 64bit architectures */ 23/* Not needed on 64bit architectures */
24#if BITS_PER_LONG == 32 24#if BITS_PER_LONG == 32
25 25
26uint32_t __div64_32(uint64_t *n, uint32_t base) 26uint32_t __attribute__((weak)) __div64_32(uint64_t *n, uint32_t base)
27{ 27{
28 uint64_t rem = *n; 28 uint64_t rem = *n;
29 uint64_t b = base; 29 uint64_t b = base;
@@ -58,4 +58,24 @@ uint32_t __div64_32(uint64_t *n, uint32_t base)
58 58
59EXPORT_SYMBOL(__div64_32); 59EXPORT_SYMBOL(__div64_32);
60 60
61/* 64bit divisor, dividend and result. dynamic precision */
62uint64_t div64_64(uint64_t dividend, uint64_t divisor)
63{
64 uint32_t high, d;
65
66 high = divisor >> 32;
67 if (high) {
68 unsigned int shift = fls(high);
69
70 d = divisor >> shift;
71 dividend >>= shift;
72 } else
73 d = divisor;
74
75 do_div(dividend, d);
76
77 return dividend;
78}
79EXPORT_SYMBOL(div64_64);
80
61#endif /* BITS_PER_LONG == 32 */ 81#endif /* BITS_PER_LONG == 32 */
diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index 84272ed77f03..82fc1794b691 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -293,7 +293,7 @@ EXPORT_SYMBOL_GPL(add_uevent_var);
293static int __init kobject_uevent_init(void) 293static int __init kobject_uevent_init(void)
294{ 294{
295 uevent_sock = netlink_kernel_create(NETLINK_KOBJECT_UEVENT, 1, NULL, 295 uevent_sock = netlink_kernel_create(NETLINK_KOBJECT_UEVENT, 1, NULL,
296 THIS_MODULE); 296 NULL, THIS_MODULE);
297 297
298 if (!uevent_sock) { 298 if (!uevent_sock) {
299 printk(KERN_ERR 299 printk(KERN_ERR
diff --git a/net/802/fddi.c b/net/802/fddi.c
index ace6386384bc..91dde41b5481 100644
--- a/net/802/fddi.c
+++ b/net/802/fddi.c
@@ -100,7 +100,7 @@ static int fddi_rebuild_header(struct sk_buff *skb)
100 struct fddihdr *fddi = (struct fddihdr *)skb->data; 100 struct fddihdr *fddi = (struct fddihdr *)skb->data;
101 101
102#ifdef CONFIG_INET 102#ifdef CONFIG_INET
103 if (fddi->hdr.llc_snap.ethertype == __constant_htons(ETH_P_IP)) 103 if (fddi->hdr.llc_snap.ethertype == htons(ETH_P_IP))
104 /* Try to get ARP to resolve the header and fill destination address */ 104 /* Try to get ARP to resolve the header and fill destination address */
105 return arp_find(fddi->daddr, skb); 105 return arp_find(fddi->daddr, skb);
106 else 106 else
@@ -130,12 +130,13 @@ __be16 fddi_type_trans(struct sk_buff *skb, struct net_device *dev)
130 * to start of packet data. Assume 802.2 SNAP frames for now. 130 * to start of packet data. Assume 802.2 SNAP frames for now.
131 */ 131 */
132 132
133 skb->mac.raw = skb->data; /* point to frame control (FC) */ 133 skb->dev = dev;
134 skb_reset_mac_header(skb); /* point to frame control (FC) */
134 135
135 if(fddi->hdr.llc_8022_1.dsap==0xe0) 136 if(fddi->hdr.llc_8022_1.dsap==0xe0)
136 { 137 {
137 skb_pull(skb, FDDI_K_8022_HLEN-3); 138 skb_pull(skb, FDDI_K_8022_HLEN-3);
138 type = __constant_htons(ETH_P_802_2); 139 type = htons(ETH_P_802_2);
139 } 140 }
140 else 141 else
141 { 142 {
diff --git a/net/802/hippi.c b/net/802/hippi.c
index 578f2a3d692d..87ffc12b6891 100644
--- a/net/802/hippi.c
+++ b/net/802/hippi.c
@@ -60,7 +60,7 @@ static int hippi_header(struct sk_buff *skb, struct net_device *dev,
60 * Due to the stupidity of the little endian byte-order we 60 * Due to the stupidity of the little endian byte-order we
61 * have to set the fp field this way. 61 * have to set the fp field this way.
62 */ 62 */
63 hip->fp.fixed = __constant_htonl(0x04800018); 63 hip->fp.fixed = htonl(0x04800018);
64 hip->fp.d2_size = htonl(len + 8); 64 hip->fp.d2_size = htonl(len + 8);
65 hip->le.fc = 0; 65 hip->le.fc = 0;
66 hip->le.double_wide = 0; /* only HIPPI 800 for the time being */ 66 hip->le.double_wide = 0; /* only HIPPI 800 for the time being */
@@ -104,7 +104,7 @@ static int hippi_rebuild_header(struct sk_buff *skb)
104 * Only IP is currently supported 104 * Only IP is currently supported
105 */ 105 */
106 106
107 if(hip->snap.ethertype != __constant_htons(ETH_P_IP)) 107 if(hip->snap.ethertype != htons(ETH_P_IP))
108 { 108 {
109 printk(KERN_DEBUG "%s: unable to resolve type %X addresses.\n",skb->dev->name,ntohs(hip->snap.ethertype)); 109 printk(KERN_DEBUG "%s: unable to resolve type %X addresses.\n",skb->dev->name,ntohs(hip->snap.ethertype));
110 return 0; 110 return 0;
@@ -126,14 +126,14 @@ __be16 hippi_type_trans(struct sk_buff *skb, struct net_device *dev)
126{ 126{
127 struct hippi_hdr *hip; 127 struct hippi_hdr *hip;
128 128
129 hip = (struct hippi_hdr *) skb->data;
130
131 /* 129 /*
132 * This is actually wrong ... question is if we really should 130 * This is actually wrong ... question is if we really should
133 * set the raw address here. 131 * set the raw address here.
134 */ 132 */
135 skb->mac.raw = skb->data; 133 skb->dev = dev;
136 skb_pull(skb, HIPPI_HLEN); 134 skb_reset_mac_header(skb);
135 hip = (struct hippi_hdr *)skb_mac_header(skb);
136 skb_pull(skb, HIPPI_HLEN);
137 137
138 /* 138 /*
139 * No fancy promisc stuff here now. 139 * No fancy promisc stuff here now.
diff --git a/net/802/psnap.c b/net/802/psnap.c
index 6e7c2120b83f..04ee43e7538f 100644
--- a/net/802/psnap.c
+++ b/net/802/psnap.c
@@ -56,10 +56,10 @@ static int snap_rcv(struct sk_buff *skb, struct net_device *dev,
56 }; 56 };
57 57
58 rcu_read_lock(); 58 rcu_read_lock();
59 proto = find_snap_client(skb->h.raw); 59 proto = find_snap_client(skb_transport_header(skb));
60 if (proto) { 60 if (proto) {
61 /* Pass the frame on. */ 61 /* Pass the frame on. */
62 skb->h.raw += 5; 62 skb->transport_header += 5;
63 skb_pull_rcsum(skb, 5); 63 skb_pull_rcsum(skb, 5);
64 rc = proto->rcvfunc(skb, dev, &snap_packet_type, orig_dev); 64 rc = proto->rcvfunc(skb, dev, &snap_packet_type, orig_dev);
65 } else { 65 } else {
diff --git a/net/802/tr.c b/net/802/tr.c
index 96bd14452c55..0ba1946211c9 100644
--- a/net/802/tr.c
+++ b/net/802/tr.c
@@ -189,11 +189,13 @@ static int tr_rebuild_header(struct sk_buff *skb)
189__be16 tr_type_trans(struct sk_buff *skb, struct net_device *dev) 189__be16 tr_type_trans(struct sk_buff *skb, struct net_device *dev)
190{ 190{
191 191
192 struct trh_hdr *trh=(struct trh_hdr *)skb->data; 192 struct trh_hdr *trh;
193 struct trllc *trllc; 193 struct trllc *trllc;
194 unsigned riflen=0; 194 unsigned riflen=0;
195 195
196 skb->mac.raw = skb->data; 196 skb->dev = dev;
197 skb_reset_mac_header(skb);
198 trh = tr_hdr(skb);
197 199
198 if(trh->saddr[0] & TR_RII) 200 if(trh->saddr[0] & TR_RII)
199 riflen = (ntohs(trh->rcf) & TR_RCF_LEN_MASK) >> 8; 201 riflen = (ntohs(trh->rcf) & TR_RCF_LEN_MASK) >> 8;
@@ -552,7 +554,8 @@ static int rif_seq_show(struct seq_file *seq, void *v)
552 if(j==1) { 554 if(j==1) {
553 segment=ntohs(entry->rseg[j-1])>>4; 555 segment=ntohs(entry->rseg[j-1])>>4;
554 seq_printf(seq," %03X",segment); 556 seq_printf(seq," %03X",segment);
555 }; 557 }
558
556 segment=ntohs(entry->rseg[j])>>4; 559 segment=ntohs(entry->rseg[j])>>4;
557 brdgnmb=ntohs(entry->rseg[j-1])&0x00f; 560 brdgnmb=ntohs(entry->rseg[j-1])&0x00f;
558 seq_printf(seq,"-%01X-%03X",brdgnmb,segment); 561 seq_printf(seq,"-%01X-%03X",brdgnmb,segment);
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index eb1c71ed7dfe..c0c7bb8e9f07 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -470,7 +470,7 @@ static struct net_device *register_vlan_device(const char *eth_IF_name,
470 */ 470 */
471 default: 471 default:
472 snprintf(name, IFNAMSIZ, "vlan%.4i", VLAN_ID); 472 snprintf(name, IFNAMSIZ, "vlan%.4i", VLAN_ID);
473 }; 473 }
474 474
475 new_dev = alloc_netdev(sizeof(struct vlan_dev_info), name, 475 new_dev = alloc_netdev(sizeof(struct vlan_dev_info), name,
476 vlan_setup); 476 vlan_setup);
@@ -685,7 +685,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
685 break; 685 break;
686 } 686 }
687 break; 687 break;
688 }; 688 }
689 689
690out: 690out:
691 return NOTIFY_DONE; 691 return NOTIFY_DONE;
@@ -819,7 +819,7 @@ static int vlan_ioctl_handler(void __user *arg)
819 printk(VLAN_DBG "%s: Unknown VLAN CMD: %x \n", 819 printk(VLAN_DBG "%s: Unknown VLAN CMD: %x \n",
820 __FUNCTION__, args.cmd); 820 __FUNCTION__, args.cmd);
821 return -EINVAL; 821 return -EINVAL;
822 }; 822 }
823out: 823out:
824 return err; 824 return err;
825} 825}
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index b6e0eea1e39e..ec46084f44b4 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -66,7 +66,7 @@ int vlan_dev_rebuild_header(struct sk_buff *skb)
66 66
67 memcpy(veth->h_source, dev->dev_addr, ETH_ALEN); 67 memcpy(veth->h_source, dev->dev_addr, ETH_ALEN);
68 break; 68 break;
69 }; 69 }
70 70
71 return 0; 71 return 0;
72} 72}
@@ -83,7 +83,7 @@ static inline struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb)
83 /* Lifted from Gleb's VLAN code... */ 83 /* Lifted from Gleb's VLAN code... */
84 memmove(skb->data - ETH_HLEN, 84 memmove(skb->data - ETH_HLEN,
85 skb->data - VLAN_ETH_HLEN, 12); 85 skb->data - VLAN_ETH_HLEN, 12);
86 skb->mac.raw += VLAN_HLEN; 86 skb->mac_header += VLAN_HLEN;
87 } 87 }
88 } 88 }
89 89
@@ -219,7 +219,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
219 break; 219 break;
220 default: 220 default:
221 break; 221 break;
222 }; 222 }
223 223
224 /* Was a VLAN packet, grab the encapsulated protocol, which the layer 224 /* Was a VLAN packet, grab the encapsulated protocol, which the layer
225 * three protocols care about. 225 * three protocols care about.
@@ -258,7 +258,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
258 * won't work for fault tolerant netware but does for the rest. 258 * won't work for fault tolerant netware but does for the rest.
259 */ 259 */
260 if (*(unsigned short *)rawp == 0xFFFF) { 260 if (*(unsigned short *)rawp == 0xFFFF) {
261 skb->protocol = __constant_htons(ETH_P_802_3); 261 skb->protocol = htons(ETH_P_802_3);
262 /* place it back on the queue to be handled by true layer 3 protocols. 262 /* place it back on the queue to be handled by true layer 3 protocols.
263 */ 263 */
264 264
@@ -281,7 +281,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
281 /* 281 /*
282 * Real 802.2 LLC 282 * Real 802.2 LLC
283 */ 283 */
284 skb->protocol = __constant_htons(ETH_P_802_2); 284 skb->protocol = htons(ETH_P_802_2);
285 /* place it back on the queue to be handled by upper layer protocols. 285 /* place it back on the queue to be handled by upper layer protocols.
286 */ 286 */
287 287
@@ -382,7 +382,7 @@ int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
382 } 382 }
383 383
384 skb->protocol = htons(ETH_P_8021Q); 384 skb->protocol = htons(ETH_P_8021Q);
385 skb->nh.raw = skb->data; 385 skb_reset_network_header(skb);
386 } 386 }
387 387
388 /* Before delegating work to the lower layer, enter our MAC-address */ 388 /* Before delegating work to the lower layer, enter our MAC-address */
@@ -448,7 +448,7 @@ int vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
448 * OTHER THINGS LIKE FDDI/TokenRing/802.3 SNAPs... 448 * OTHER THINGS LIKE FDDI/TokenRing/802.3 SNAPs...
449 */ 449 */
450 450
451 if (veth->h_vlan_proto != __constant_htons(ETH_P_8021Q)) { 451 if (veth->h_vlan_proto != htons(ETH_P_8021Q)) {
452 int orig_headroom = skb_headroom(skb); 452 int orig_headroom = skb_headroom(skb);
453 unsigned short veth_TCI; 453 unsigned short veth_TCI;
454 454
diff --git a/net/Kconfig b/net/Kconfig
index 915657832d94..2fc8e77b1e62 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -27,13 +27,6 @@ if NET
27 27
28menu "Networking options" 28menu "Networking options"
29 29
30config NETDEBUG
31 bool "Network packet debugging"
32 help
33 You can say Y here if you want to get additional messages useful in
34 debugging bad packets, but can overwhelm logs under denial of service
35 attacks.
36
37source "net/packet/Kconfig" 30source "net/packet/Kconfig"
38source "net/unix/Kconfig" 31source "net/unix/Kconfig"
39source "net/xfrm/Kconfig" 32source "net/xfrm/Kconfig"
@@ -219,14 +212,18 @@ endmenu
219source "net/ax25/Kconfig" 212source "net/ax25/Kconfig"
220source "net/irda/Kconfig" 213source "net/irda/Kconfig"
221source "net/bluetooth/Kconfig" 214source "net/bluetooth/Kconfig"
222source "net/ieee80211/Kconfig" 215source "net/rxrpc/Kconfig"
223
224config WIRELESS_EXT
225 bool
226 216
227config FIB_RULES 217config FIB_RULES
228 bool 218 bool
229 219
220menu "Wireless"
221
222source "net/wireless/Kconfig"
223source "net/ieee80211/Kconfig"
224
225endmenu
226
230endif # if NET 227endif # if NET
231endmenu # Networking 228endmenu # Networking
232 229
diff --git a/net/Makefile b/net/Makefile
index 4854ac506313..6b74d4118c5b 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -38,6 +38,7 @@ obj-$(CONFIG_IRDA) += irda/
38obj-$(CONFIG_BT) += bluetooth/ 38obj-$(CONFIG_BT) += bluetooth/
39obj-$(CONFIG_SUNRPC) += sunrpc/ 39obj-$(CONFIG_SUNRPC) += sunrpc/
40obj-$(CONFIG_RXRPC) += rxrpc/ 40obj-$(CONFIG_RXRPC) += rxrpc/
41obj-$(CONFIG_AF_RXRPC) += rxrpc/
41obj-$(CONFIG_ATM) += atm/ 42obj-$(CONFIG_ATM) += atm/
42obj-$(CONFIG_DECNET) += decnet/ 43obj-$(CONFIG_DECNET) += decnet/
43obj-$(CONFIG_ECONET) += econet/ 44obj-$(CONFIG_ECONET) += econet/
@@ -52,3 +53,5 @@ obj-$(CONFIG_IUCV) += iucv/
52ifeq ($(CONFIG_NET),y) 53ifeq ($(CONFIG_NET),y)
53obj-$(CONFIG_SYSCTL) += sysctl_net.o 54obj-$(CONFIG_SYSCTL) += sysctl_net.o
54endif 55endif
56
57obj-y += wireless/
diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c
index d89d62f3702f..5ef6a238bdbc 100644
--- a/net/appletalk/aarp.c
+++ b/net/appletalk/aarp.c
@@ -118,7 +118,9 @@ static void __aarp_send_query(struct aarp_entry *a)
118 118
119 /* Set up the buffer */ 119 /* Set up the buffer */
120 skb_reserve(skb, dev->hard_header_len + aarp_dl->header_length); 120 skb_reserve(skb, dev->hard_header_len + aarp_dl->header_length);
121 skb->nh.raw = skb->h.raw = skb_put(skb, sizeof(*eah)); 121 skb_reset_network_header(skb);
122 skb_reset_transport_header(skb);
123 skb_put(skb, sizeof(*eah));
122 skb->protocol = htons(ETH_P_ATALK); 124 skb->protocol = htons(ETH_P_ATALK);
123 skb->dev = dev; 125 skb->dev = dev;
124 eah = aarp_hdr(skb); 126 eah = aarp_hdr(skb);
@@ -163,7 +165,9 @@ static void aarp_send_reply(struct net_device *dev, struct atalk_addr *us,
163 165
164 /* Set up the buffer */ 166 /* Set up the buffer */
165 skb_reserve(skb, dev->hard_header_len + aarp_dl->header_length); 167 skb_reserve(skb, dev->hard_header_len + aarp_dl->header_length);
166 skb->nh.raw = skb->h.raw = skb_put(skb, sizeof(*eah)); 168 skb_reset_network_header(skb);
169 skb_reset_transport_header(skb);
170 skb_put(skb, sizeof(*eah));
167 skb->protocol = htons(ETH_P_ATALK); 171 skb->protocol = htons(ETH_P_ATALK);
168 skb->dev = dev; 172 skb->dev = dev;
169 eah = aarp_hdr(skb); 173 eah = aarp_hdr(skb);
@@ -212,7 +216,9 @@ static void aarp_send_probe(struct net_device *dev, struct atalk_addr *us)
212 216
213 /* Set up the buffer */ 217 /* Set up the buffer */
214 skb_reserve(skb, dev->hard_header_len + aarp_dl->header_length); 218 skb_reserve(skb, dev->hard_header_len + aarp_dl->header_length);
215 skb->nh.raw = skb->h.raw = skb_put(skb, sizeof(*eah)); 219 skb_reset_network_header(skb);
220 skb_reset_transport_header(skb);
221 skb_put(skb, sizeof(*eah));
216 skb->protocol = htons(ETH_P_ATALK); 222 skb->protocol = htons(ETH_P_ATALK);
217 skb->dev = dev; 223 skb->dev = dev;
218 eah = aarp_hdr(skb); 224 eah = aarp_hdr(skb);
@@ -539,7 +545,7 @@ int aarp_send_ddp(struct net_device *dev, struct sk_buff *skb,
539 int hash; 545 int hash;
540 struct aarp_entry *a; 546 struct aarp_entry *a;
541 547
542 skb->nh.raw = skb->data; 548 skb_reset_network_header(skb);
543 549
544 /* Check for LocalTalk first */ 550 /* Check for LocalTalk first */
545 if (dev->type == ARPHRD_LOCALTLK) { 551 if (dev->type == ARPHRD_LOCALTLK) {
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index c8b7dc2c3257..16eda21fb38c 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -937,11 +937,11 @@ static unsigned long atalk_sum_partial(const unsigned char *data,
937static unsigned long atalk_sum_skb(const struct sk_buff *skb, int offset, 937static unsigned long atalk_sum_skb(const struct sk_buff *skb, int offset,
938 int len, unsigned long sum) 938 int len, unsigned long sum)
939{ 939{
940 int start = skb_headlen(skb); 940 int end = skb_headlen(skb);
941 int i, copy; 941 int i, copy;
942 942
943 /* checksum stuff in header space */ 943 /* checksum stuff in header space */
944 if ( (copy = start - offset) > 0) { 944 if ((copy = end - offset) > 0) {
945 if (copy > len) 945 if (copy > len)
946 copy = len; 946 copy = len;
947 sum = atalk_sum_partial(skb->data + offset, copy, sum); 947 sum = atalk_sum_partial(skb->data + offset, copy, sum);
@@ -953,11 +953,9 @@ static unsigned long atalk_sum_skb(const struct sk_buff *skb, int offset,
953 953
954 /* checksum stuff in frags */ 954 /* checksum stuff in frags */
955 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 955 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
956 int end; 956 BUG_TRAP(len >= 0);
957 957
958 BUG_TRAP(start <= offset + len); 958 end = offset + skb_shinfo(skb)->frags[i].size;
959
960 end = start + skb_shinfo(skb)->frags[i].size;
961 if ((copy = end - offset) > 0) { 959 if ((copy = end - offset) > 0) {
962 u8 *vaddr; 960 u8 *vaddr;
963 skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 961 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
@@ -965,36 +963,31 @@ static unsigned long atalk_sum_skb(const struct sk_buff *skb, int offset,
965 if (copy > len) 963 if (copy > len)
966 copy = len; 964 copy = len;
967 vaddr = kmap_skb_frag(frag); 965 vaddr = kmap_skb_frag(frag);
968 sum = atalk_sum_partial(vaddr + frag->page_offset + 966 sum = atalk_sum_partial(vaddr + frag->page_offset,
969 offset - start, copy, sum); 967 copy, sum);
970 kunmap_skb_frag(vaddr); 968 kunmap_skb_frag(vaddr);
971 969
972 if (!(len -= copy)) 970 if (!(len -= copy))
973 return sum; 971 return sum;
974 offset += copy; 972 offset += copy;
975 } 973 }
976 start = end;
977 } 974 }
978 975
979 if (skb_shinfo(skb)->frag_list) { 976 if (skb_shinfo(skb)->frag_list) {
980 struct sk_buff *list = skb_shinfo(skb)->frag_list; 977 struct sk_buff *list = skb_shinfo(skb)->frag_list;
981 978
982 for (; list; list = list->next) { 979 for (; list; list = list->next) {
983 int end; 980 BUG_TRAP(len >= 0);
984
985 BUG_TRAP(start <= offset + len);
986 981
987 end = start + list->len; 982 end = offset + list->len;
988 if ((copy = end - offset) > 0) { 983 if ((copy = end - offset) > 0) {
989 if (copy > len) 984 if (copy > len)
990 copy = len; 985 copy = len;
991 sum = atalk_sum_skb(list, offset - start, 986 sum = atalk_sum_skb(list, 0, copy, sum);
992 copy, sum);
993 if ((len -= copy) == 0) 987 if ((len -= copy) == 0)
994 return sum; 988 return sum;
995 offset += copy; 989 offset += copy;
996 } 990 }
997 start = end;
998 } 991 }
999 } 992 }
1000 993
@@ -1275,7 +1268,7 @@ static int handle_ip_over_ddp(struct sk_buff *skb)
1275 skb->protocol = htons(ETH_P_IP); 1268 skb->protocol = htons(ETH_P_IP);
1276 skb_pull(skb, 13); 1269 skb_pull(skb, 13);
1277 skb->dev = dev; 1270 skb->dev = dev;
1278 skb->h.raw = skb->data; 1271 skb_reset_transport_header(skb);
1279 1272
1280 stats = dev->priv; 1273 stats = dev->priv;
1281 stats->rx_packets++; 1274 stats->rx_packets++;
@@ -1383,10 +1376,10 @@ free_it:
1383 * @pt - packet type 1376 * @pt - packet type
1384 * 1377 *
1385 * Receive a packet (in skb) from device dev. This has come from the SNAP 1378 * Receive a packet (in skb) from device dev. This has come from the SNAP
1386 * decoder, and on entry skb->h.raw is the DDP header, skb->len is the DDP 1379 * decoder, and on entry skb->transport_header is the DDP header, skb->len
1387 * header, skb->len is the DDP length. The physical headers have been 1380 * is the DDP header, skb->len is the DDP length. The physical headers
1388 * extracted. PPP should probably pass frames marked as for this layer. 1381 * have been extracted. PPP should probably pass frames marked as for this
1389 * [ie ARPHRD_ETHERTALK] 1382 * layer. [ie ARPHRD_ETHERTALK]
1390 */ 1383 */
1391static int atalk_rcv(struct sk_buff *skb, struct net_device *dev, 1384static int atalk_rcv(struct sk_buff *skb, struct net_device *dev,
1392 struct packet_type *pt, struct net_device *orig_dev) 1385 struct packet_type *pt, struct net_device *orig_dev)
@@ -1484,7 +1477,7 @@ static int ltalk_rcv(struct sk_buff *skb, struct net_device *dev,
1484 struct packet_type *pt, struct net_device *orig_dev) 1477 struct packet_type *pt, struct net_device *orig_dev)
1485{ 1478{
1486 /* Expand any short form frames */ 1479 /* Expand any short form frames */
1487 if (skb->mac.raw[2] == 1) { 1480 if (skb_mac_header(skb)[2] == 1) {
1488 struct ddpehdr *ddp; 1481 struct ddpehdr *ddp;
1489 /* Find our address */ 1482 /* Find our address */
1490 struct atalk_addr *ap = atalk_find_dev_addr(dev); 1483 struct atalk_addr *ap = atalk_find_dev_addr(dev);
@@ -1510,8 +1503,8 @@ static int ltalk_rcv(struct sk_buff *skb, struct net_device *dev,
1510 * we write the network numbers ! 1503 * we write the network numbers !
1511 */ 1504 */
1512 1505
1513 ddp->deh_dnode = skb->mac.raw[0]; /* From physical header */ 1506 ddp->deh_dnode = skb_mac_header(skb)[0]; /* From physical header */
1514 ddp->deh_snode = skb->mac.raw[1]; /* From physical header */ 1507 ddp->deh_snode = skb_mac_header(skb)[1]; /* From physical header */
1515 1508
1516 ddp->deh_dnet = ap->s_net; /* Network number */ 1509 ddp->deh_dnet = ap->s_net; /* Network number */
1517 ddp->deh_snet = ap->s_net; 1510 ddp->deh_snet = ap->s_net;
@@ -1522,7 +1515,7 @@ static int ltalk_rcv(struct sk_buff *skb, struct net_device *dev,
1522 /* Non routable, so force a drop if we slip up later */ 1515 /* Non routable, so force a drop if we slip up later */
1523 ddp->deh_len_hops = htons(skb->len + (DDP_MAXHOPS << 10)); 1516 ddp->deh_len_hops = htons(skb->len + (DDP_MAXHOPS << 10));
1524 } 1517 }
1525 skb->h.raw = skb->data; 1518 skb_reset_transport_header(skb);
1526 1519
1527 return atalk_rcv(skb, dev, pt, orig_dev); 1520 return atalk_rcv(skb, dev, pt, orig_dev);
1528freeit: 1521freeit:
@@ -1771,6 +1764,9 @@ static int atalk_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1771 case SIOCGSTAMP: 1764 case SIOCGSTAMP:
1772 rc = sock_get_timestamp(sk, argp); 1765 rc = sock_get_timestamp(sk, argp);
1773 break; 1766 break;
1767 case SIOCGSTAMPNS:
1768 rc = sock_get_timestampns(sk, argp);
1769 break;
1774 /* Routing */ 1770 /* Routing */
1775 case SIOCADDRT: 1771 case SIOCADDRT:
1776 case SIOCDELRT: 1772 case SIOCDELRT:
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index ec4ebd3299e3..0e9f00c5c899 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -173,7 +173,7 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct br2684_dev *brdev,
173 } 173 }
174 skb_push(skb, minheadroom); 174 skb_push(skb, minheadroom);
175 if (brvcc->encaps == e_llc) 175 if (brvcc->encaps == e_llc)
176 memcpy(skb->data, llc_oui_pid_pad, 10); 176 skb_copy_to_linear_data(skb, llc_oui_pid_pad, 10);
177 else 177 else
178 memset(skb->data, 0, 2); 178 memset(skb->data, 0, 2);
179#endif /* FASTER_VERSION */ 179#endif /* FASTER_VERSION */
@@ -375,11 +375,11 @@ packet_fails_filter(__be16 type, struct br2684_vcc *brvcc, struct sk_buff *skb)
375{ 375{
376 if (brvcc->filter.netmask == 0) 376 if (brvcc->filter.netmask == 0)
377 return 0; /* no filter in place */ 377 return 0; /* no filter in place */
378 if (type == __constant_htons(ETH_P_IP) && 378 if (type == htons(ETH_P_IP) &&
379 (((struct iphdr *) (skb->data))->daddr & brvcc->filter. 379 (((struct iphdr *) (skb->data))->daddr & brvcc->filter.
380 netmask) == brvcc->filter.prefix) 380 netmask) == brvcc->filter.prefix)
381 return 0; 381 return 0;
382 if (type == __constant_htons(ETH_P_ARP)) 382 if (type == htons(ETH_P_ARP))
383 return 0; 383 return 0;
384 /* TODO: we should probably filter ARPs too.. don't want to have 384 /* TODO: we should probably filter ARPs too.. don't want to have
385 * them returning values that don't make sense, or is that ok? 385 * them returning values that don't make sense, or is that ok?
@@ -458,7 +458,7 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb)
458 /* FIXME: tcpdump shows that pointer to mac header is 2 bytes earlier, 458 /* FIXME: tcpdump shows that pointer to mac header is 2 bytes earlier,
459 than should be. What else should I set? */ 459 than should be. What else should I set? */
460 skb_pull(skb, plen); 460 skb_pull(skb, plen);
461 skb->mac.raw = ((char *) (skb->data)) - ETH_HLEN; 461 skb_set_mac_header(skb, -ETH_HLEN);
462 skb->pkt_type = PACKET_HOST; 462 skb->pkt_type = PACKET_HOST;
463#ifdef CONFIG_BR2684_FAST_TRANS 463#ifdef CONFIG_BR2684_FAST_TRANS
464 skb->protocol = ((u16 *) skb->data)[-1]; 464 skb->protocol = ((u16 *) skb->data)[-1];
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 8c3825816085..876b77f14745 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -213,7 +213,7 @@ static void clip_push(struct atm_vcc *vcc, struct sk_buff *skb)
213 return; 213 return;
214 } 214 }
215 ATM_SKB(skb)->vcc = vcc; 215 ATM_SKB(skb)->vcc = vcc;
216 skb->mac.raw = skb->data; 216 skb_reset_mac_header(skb);
217 if (!clip_vcc->encap 217 if (!clip_vcc->encap
218 || skb->len < RFC1483LLC_LEN 218 || skb->len < RFC1483LLC_LEN
219 || memcmp(skb->data, llc_oui, sizeof (llc_oui))) 219 || memcmp(skb->data, llc_oui, sizeof (llc_oui)))
@@ -702,7 +702,7 @@ static struct atm_dev atmarpd_dev = {
702 .ops = &atmarpd_dev_ops, 702 .ops = &atmarpd_dev_ops,
703 .type = "arpd", 703 .type = "arpd",
704 .number = 999, 704 .number = 999,
705 .lock = SPIN_LOCK_UNLOCKED 705 .lock = __SPIN_LOCK_UNLOCKED(atmarpd_dev.lock)
706}; 706};
707 707
708 708
diff --git a/net/atm/ioctl.c b/net/atm/ioctl.c
index 8ccee4591f65..7afd8e7754fd 100644
--- a/net/atm/ioctl.c
+++ b/net/atm/ioctl.c
@@ -82,6 +82,9 @@ int vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
82 case SIOCGSTAMP: /* borrowed from IP */ 82 case SIOCGSTAMP: /* borrowed from IP */
83 error = sock_get_timestamp(sk, argp); 83 error = sock_get_timestamp(sk, argp);
84 goto done; 84 goto done;
85 case SIOCGSTAMPNS: /* borrowed from IP */
86 error = sock_get_timestampns(sk, argp);
87 goto done;
85 case ATM_SETSC: 88 case ATM_SETSC:
86 printk(KERN_WARNING "ATM_SETSC is obsolete\n"); 89 printk(KERN_WARNING "ATM_SETSC is obsolete\n");
87 error = 0; 90 error = 0;
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 3d804d61f656..4dc5f2b8c43c 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -283,8 +283,8 @@ static int lec_start_xmit(struct sk_buff *skb, struct net_device *dev)
283 } 283 }
284 284
285 DPRINTK("skbuff head:%lx data:%lx tail:%lx end:%lx\n", 285 DPRINTK("skbuff head:%lx data:%lx tail:%lx end:%lx\n",
286 (long)skb->head, (long)skb->data, (long)skb->tail, 286 (long)skb->head, (long)skb->data, (long)skb_tail_pointer(skb),
287 (long)skb->end); 287 (long)skb_end_pointer(skb));
288#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) 288#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
289 if (memcmp(skb->data, bridge_ula_lec, sizeof(bridge_ula_lec)) == 0) 289 if (memcmp(skb->data, bridge_ula_lec, sizeof(bridge_ula_lec)) == 0)
290 lec_handle_bridge(skb, dev); 290 lec_handle_bridge(skb, dev);
@@ -576,8 +576,8 @@ static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb)
576 break; 576 break;
577 } 577 }
578 skb2->len = sizeof(struct atmlec_msg); 578 skb2->len = sizeof(struct atmlec_msg);
579 memcpy(skb2->data, mesg, 579 skb_copy_to_linear_data(skb2, mesg,
580 sizeof(struct atmlec_msg)); 580 sizeof(*mesg));
581 atm_force_charge(priv->lecd, skb2->truesize); 581 atm_force_charge(priv->lecd, skb2->truesize);
582 sk = sk_atm(priv->lecd); 582 sk = sk_atm(priv->lecd);
583 skb_queue_tail(&sk->sk_receive_queue, skb2); 583 skb_queue_tail(&sk->sk_receive_queue, skb2);
@@ -630,7 +630,7 @@ static struct atm_dev lecatm_dev = {
630 .ops = &lecdev_ops, 630 .ops = &lecdev_ops,
631 .type = "lec", 631 .type = "lec",
632 .number = 999, /* dummy device number */ 632 .number = 999, /* dummy device number */
633 .lock = SPIN_LOCK_UNLOCKED 633 .lock = __SPIN_LOCK_UNLOCKED(lecatm_dev.lock)
634}; 634};
635 635
636/* 636/*
@@ -825,7 +825,6 @@ static void lec_push(struct atm_vcc *vcc, struct sk_buff *skb)
825 if (!hlist_empty(&priv->lec_arp_empty_ones)) { 825 if (!hlist_empty(&priv->lec_arp_empty_ones)) {
826 lec_arp_check_empties(priv, vcc, skb); 826 lec_arp_check_empties(priv, vcc, skb);
827 } 827 }
828 skb->dev = dev;
829 skb_pull(skb, 2); /* skip lec_id */ 828 skb_pull(skb, 2); /* skip lec_id */
830#ifdef CONFIG_TR 829#ifdef CONFIG_TR
831 if (priv->is_trdev) 830 if (priv->is_trdev)
@@ -1338,7 +1337,7 @@ static int lane2_resolve(struct net_device *dev, u8 *dst_mac, int force,
1338 if (skb == NULL) 1337 if (skb == NULL)
1339 return -1; 1338 return -1;
1340 skb->len = *sizeoftlvs; 1339 skb->len = *sizeoftlvs;
1341 memcpy(skb->data, *tlvs, *sizeoftlvs); 1340 skb_copy_to_linear_data(skb, *tlvs, *sizeoftlvs);
1342 retval = send_to_lecd(priv, l_arp_xmt, dst_mac, NULL, skb); 1341 retval = send_to_lecd(priv, l_arp_xmt, dst_mac, NULL, skb);
1343 } 1342 }
1344 return retval; 1343 return retval;
@@ -1372,7 +1371,7 @@ static int lane2_associate_req(struct net_device *dev, u8 *lan_dst,
1372 if (skb == NULL) 1371 if (skb == NULL)
1373 return 0; 1372 return 0;
1374 skb->len = sizeoftlvs; 1373 skb->len = sizeoftlvs;
1375 memcpy(skb->data, tlvs, sizeoftlvs); 1374 skb_copy_to_linear_data(skb, tlvs, sizeoftlvs);
1376 retval = send_to_lecd(priv, l_associate_req, NULL, NULL, skb); 1375 retval = send_to_lecd(priv, l_associate_req, NULL, NULL, skb);
1377 if (retval != 0) 1376 if (retval != 0)
1378 printk("lec.c: lane2_associate_req() failed\n"); 1377 printk("lec.c: lane2_associate_req() failed\n");
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index cb3c004ff022..7c85aa551d5e 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -504,11 +504,13 @@ static int send_via_shortcut(struct sk_buff *skb, struct mpoa_client *mpc)
504 tagged_llc_snap_hdr.tag = entry->ctrl_info.tag; 504 tagged_llc_snap_hdr.tag = entry->ctrl_info.tag;
505 skb_pull(skb, ETH_HLEN); /* get rid of Eth header */ 505 skb_pull(skb, ETH_HLEN); /* get rid of Eth header */
506 skb_push(skb, sizeof(tagged_llc_snap_hdr)); /* add LLC/SNAP header */ 506 skb_push(skb, sizeof(tagged_llc_snap_hdr)); /* add LLC/SNAP header */
507 memcpy(skb->data, &tagged_llc_snap_hdr, sizeof(tagged_llc_snap_hdr)); 507 skb_copy_to_linear_data(skb, &tagged_llc_snap_hdr,
508 sizeof(tagged_llc_snap_hdr));
508 } else { 509 } else {
509 skb_pull(skb, ETH_HLEN); /* get rid of Eth header */ 510 skb_pull(skb, ETH_HLEN); /* get rid of Eth header */
510 skb_push(skb, sizeof(struct llc_snap_hdr)); /* add LLC/SNAP header + tag */ 511 skb_push(skb, sizeof(struct llc_snap_hdr)); /* add LLC/SNAP header + tag */
511 memcpy(skb->data, &llc_snap_mpoa_data, sizeof(struct llc_snap_hdr)); 512 skb_copy_to_linear_data(skb, &llc_snap_mpoa_data,
513 sizeof(struct llc_snap_hdr));
512 } 514 }
513 515
514 atomic_add(skb->truesize, &sk_atm(entry->shortcut)->sk_wmem_alloc); 516 atomic_add(skb->truesize, &sk_atm(entry->shortcut)->sk_wmem_alloc);
@@ -711,11 +713,12 @@ static void mpc_push(struct atm_vcc *vcc, struct sk_buff *skb)
711 return; 713 return;
712 } 714 }
713 skb_push(new_skb, eg->ctrl_info.DH_length); /* add MAC header */ 715 skb_push(new_skb, eg->ctrl_info.DH_length); /* add MAC header */
714 memcpy(new_skb->data, eg->ctrl_info.DLL_header, eg->ctrl_info.DH_length); 716 skb_copy_to_linear_data(new_skb, eg->ctrl_info.DLL_header,
717 eg->ctrl_info.DH_length);
715 new_skb->protocol = eth_type_trans(new_skb, dev); 718 new_skb->protocol = eth_type_trans(new_skb, dev);
716 new_skb->nh.raw = new_skb->data; 719 skb_reset_network_header(new_skb);
717 720
718 eg->latest_ip_addr = new_skb->nh.iph->saddr; 721 eg->latest_ip_addr = ip_hdr(new_skb)->saddr;
719 eg->packets_rcvd++; 722 eg->packets_rcvd++;
720 mpc->eg_ops->put(eg); 723 mpc->eg_ops->put(eg);
721 724
@@ -734,7 +737,7 @@ static struct atm_dev mpc_dev = {
734 .ops = &mpc_ops, 737 .ops = &mpc_ops,
735 .type = "mpc", 738 .type = "mpc",
736 .number = 42, 739 .number = 42,
737 .lock = SPIN_LOCK_UNLOCKED 740 .lock = __SPIN_LOCK_UNLOCKED(mpc_dev.lock)
738 /* members not explicitly initialised will be 0 */ 741 /* members not explicitly initialised will be 0 */
739}; 742};
740 743
@@ -936,7 +939,7 @@ int msg_to_mpoad(struct k_message *mesg, struct mpoa_client *mpc)
936 if (skb == NULL) 939 if (skb == NULL)
937 return -ENOMEM; 940 return -ENOMEM;
938 skb_put(skb, sizeof(struct k_message)); 941 skb_put(skb, sizeof(struct k_message));
939 memcpy(skb->data, mesg, sizeof(struct k_message)); 942 skb_copy_to_linear_data(skb, mesg, sizeof(*mesg));
940 atm_force_charge(mpc->mpoad_vcc, skb->truesize); 943 atm_force_charge(mpc->mpoad_vcc, skb->truesize);
941 944
942 sk = sk_atm(mpc->mpoad_vcc); 945 sk = sk_atm(mpc->mpoad_vcc);
diff --git a/net/atm/signaling.c b/net/atm/signaling.c
index 31d98b57e1de..d14baaf1f4c3 100644
--- a/net/atm/signaling.c
+++ b/net/atm/signaling.c
@@ -256,7 +256,7 @@ static struct atm_dev sigd_dev = {
256 .ops = &sigd_dev_ops, 256 .ops = &sigd_dev_ops,
257 .type = "sig", 257 .type = "sig",
258 .number = 999, 258 .number = 999,
259 .lock = SPIN_LOCK_UNLOCKED 259 .lock = __SPIN_LOCK_UNLOCKED(sigd_dev.lock)
260}; 260};
261 261
262 262
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 1c07c6a50eb8..6ded95272a53 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1127,22 +1127,22 @@ static int __must_check ax25_connect(struct socket *sock,
1127 switch (sk->sk_state) { 1127 switch (sk->sk_state) {
1128 case TCP_SYN_SENT: /* still trying */ 1128 case TCP_SYN_SENT: /* still trying */
1129 err = -EINPROGRESS; 1129 err = -EINPROGRESS;
1130 goto out; 1130 goto out_release;
1131 1131
1132 case TCP_ESTABLISHED: /* connection established */ 1132 case TCP_ESTABLISHED: /* connection established */
1133 sock->state = SS_CONNECTED; 1133 sock->state = SS_CONNECTED;
1134 goto out; 1134 goto out_release;
1135 1135
1136 case TCP_CLOSE: /* connection refused */ 1136 case TCP_CLOSE: /* connection refused */
1137 sock->state = SS_UNCONNECTED; 1137 sock->state = SS_UNCONNECTED;
1138 err = -ECONNREFUSED; 1138 err = -ECONNREFUSED;
1139 goto out; 1139 goto out_release;
1140 } 1140 }
1141 } 1141 }
1142 1142
1143 if (sk->sk_state == TCP_ESTABLISHED && sk->sk_type == SOCK_SEQPACKET) { 1143 if (sk->sk_state == TCP_ESTABLISHED && sk->sk_type == SOCK_SEQPACKET) {
1144 err = -EISCONN; /* No reconnect on a seqpacket socket */ 1144 err = -EISCONN; /* No reconnect on a seqpacket socket */
1145 goto out; 1145 goto out_release;
1146 } 1146 }
1147 1147
1148 sk->sk_state = TCP_CLOSE; 1148 sk->sk_state = TCP_CLOSE;
@@ -1159,12 +1159,12 @@ static int __must_check ax25_connect(struct socket *sock,
1159 /* Valid number of digipeaters ? */ 1159 /* Valid number of digipeaters ? */
1160 if (fsa->fsa_ax25.sax25_ndigis < 1 || fsa->fsa_ax25.sax25_ndigis > AX25_MAX_DIGIS) { 1160 if (fsa->fsa_ax25.sax25_ndigis < 1 || fsa->fsa_ax25.sax25_ndigis > AX25_MAX_DIGIS) {
1161 err = -EINVAL; 1161 err = -EINVAL;
1162 goto out; 1162 goto out_release;
1163 } 1163 }
1164 1164
1165 if ((digi = kmalloc(sizeof(ax25_digi), GFP_KERNEL)) == NULL) { 1165 if ((digi = kmalloc(sizeof(ax25_digi), GFP_KERNEL)) == NULL) {
1166 err = -ENOBUFS; 1166 err = -ENOBUFS;
1167 goto out; 1167 goto out_release;
1168 } 1168 }
1169 1169
1170 digi->ndigi = fsa->fsa_ax25.sax25_ndigis; 1170 digi->ndigi = fsa->fsa_ax25.sax25_ndigis;
@@ -1194,7 +1194,7 @@ static int __must_check ax25_connect(struct socket *sock,
1194 current->comm); 1194 current->comm);
1195 if ((err = ax25_rt_autobind(ax25, &fsa->fsa_ax25.sax25_call)) < 0) { 1195 if ((err = ax25_rt_autobind(ax25, &fsa->fsa_ax25.sax25_call)) < 0) {
1196 kfree(digi); 1196 kfree(digi);
1197 goto out; 1197 goto out_release;
1198 } 1198 }
1199 1199
1200 ax25_fillin_cb(ax25, ax25->ax25_dev); 1200 ax25_fillin_cb(ax25, ax25->ax25_dev);
@@ -1203,7 +1203,7 @@ static int __must_check ax25_connect(struct socket *sock,
1203 if (ax25->ax25_dev == NULL) { 1203 if (ax25->ax25_dev == NULL) {
1204 kfree(digi); 1204 kfree(digi);
1205 err = -EHOSTUNREACH; 1205 err = -EHOSTUNREACH;
1206 goto out; 1206 goto out_release;
1207 } 1207 }
1208 } 1208 }
1209 1209
@@ -1213,7 +1213,7 @@ static int __must_check ax25_connect(struct socket *sock,
1213 kfree(digi); 1213 kfree(digi);
1214 err = -EADDRINUSE; /* Already such a connection */ 1214 err = -EADDRINUSE; /* Already such a connection */
1215 ax25_cb_put(ax25t); 1215 ax25_cb_put(ax25t);
1216 goto out; 1216 goto out_release;
1217 } 1217 }
1218 1218
1219 ax25->dest_addr = fsa->fsa_ax25.sax25_call; 1219 ax25->dest_addr = fsa->fsa_ax25.sax25_call;
@@ -1223,7 +1223,7 @@ static int __must_check ax25_connect(struct socket *sock,
1223 if (sk->sk_type != SOCK_SEQPACKET) { 1223 if (sk->sk_type != SOCK_SEQPACKET) {
1224 sock->state = SS_CONNECTED; 1224 sock->state = SS_CONNECTED;
1225 sk->sk_state = TCP_ESTABLISHED; 1225 sk->sk_state = TCP_ESTABLISHED;
1226 goto out; 1226 goto out_release;
1227 } 1227 }
1228 1228
1229 /* Move to connecting socket, ax.25 lapb WAIT_UA.. */ 1229 /* Move to connecting socket, ax.25 lapb WAIT_UA.. */
@@ -1255,55 +1255,53 @@ static int __must_check ax25_connect(struct socket *sock,
1255 /* Now the loop */ 1255 /* Now the loop */
1256 if (sk->sk_state != TCP_ESTABLISHED && (flags & O_NONBLOCK)) { 1256 if (sk->sk_state != TCP_ESTABLISHED && (flags & O_NONBLOCK)) {
1257 err = -EINPROGRESS; 1257 err = -EINPROGRESS;
1258 goto out; 1258 goto out_release;
1259 } 1259 }
1260 1260
1261 if (sk->sk_state == TCP_SYN_SENT) { 1261 if (sk->sk_state == TCP_SYN_SENT) {
1262 struct task_struct *tsk = current; 1262 DEFINE_WAIT(wait);
1263 DECLARE_WAITQUEUE(wait, tsk);
1264 1263
1265 add_wait_queue(sk->sk_sleep, &wait);
1266 for (;;) { 1264 for (;;) {
1265 prepare_to_wait(sk->sk_sleep, &wait,
1266 TASK_INTERRUPTIBLE);
1267 if (sk->sk_state != TCP_SYN_SENT) 1267 if (sk->sk_state != TCP_SYN_SENT)
1268 break; 1268 break;
1269 set_current_state(TASK_INTERRUPTIBLE); 1269 if (!signal_pending(current)) {
1270 release_sock(sk); 1270 release_sock(sk);
1271 if (!signal_pending(tsk)) {
1272 schedule(); 1271 schedule();
1273 lock_sock(sk); 1272 lock_sock(sk);
1274 continue; 1273 continue;
1275 } 1274 }
1276 current->state = TASK_RUNNING; 1275 err = -ERESTARTSYS;
1277 remove_wait_queue(sk->sk_sleep, &wait); 1276 break;
1278 return -ERESTARTSYS;
1279 } 1277 }
1280 current->state = TASK_RUNNING; 1278 finish_wait(sk->sk_sleep, &wait);
1281 remove_wait_queue(sk->sk_sleep, &wait); 1279
1280 if (err)
1281 goto out_release;
1282 } 1282 }
1283 1283
1284 if (sk->sk_state != TCP_ESTABLISHED) { 1284 if (sk->sk_state != TCP_ESTABLISHED) {
1285 /* Not in ABM, not in WAIT_UA -> failed */ 1285 /* Not in ABM, not in WAIT_UA -> failed */
1286 sock->state = SS_UNCONNECTED; 1286 sock->state = SS_UNCONNECTED;
1287 err = sock_error(sk); /* Always set at this point */ 1287 err = sock_error(sk); /* Always set at this point */
1288 goto out; 1288 goto out_release;
1289 } 1289 }
1290 1290
1291 sock->state = SS_CONNECTED; 1291 sock->state = SS_CONNECTED;
1292 1292
1293 err=0; 1293 err = 0;
1294out: 1294out_release:
1295 release_sock(sk); 1295 release_sock(sk);
1296 1296
1297 return err; 1297 return err;
1298} 1298}
1299 1299
1300
1301static int ax25_accept(struct socket *sock, struct socket *newsock, int flags) 1300static int ax25_accept(struct socket *sock, struct socket *newsock, int flags)
1302{ 1301{
1303 struct task_struct *tsk = current;
1304 DECLARE_WAITQUEUE(wait, tsk);
1305 struct sk_buff *skb; 1302 struct sk_buff *skb;
1306 struct sock *newsk; 1303 struct sock *newsk;
1304 DEFINE_WAIT(wait);
1307 struct sock *sk; 1305 struct sock *sk;
1308 int err = 0; 1306 int err = 0;
1309 1307
@@ -1328,30 +1326,29 @@ static int ax25_accept(struct socket *sock, struct socket *newsock, int flags)
1328 * The read queue this time is holding sockets ready to use 1326 * The read queue this time is holding sockets ready to use
1329 * hooked into the SABM we saved 1327 * hooked into the SABM we saved
1330 */ 1328 */
1331 add_wait_queue(sk->sk_sleep, &wait);
1332 for (;;) { 1329 for (;;) {
1330 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1333 skb = skb_dequeue(&sk->sk_receive_queue); 1331 skb = skb_dequeue(&sk->sk_receive_queue);
1334 if (skb) 1332 if (skb)
1335 break; 1333 break;
1336 1334
1337 release_sock(sk);
1338 current->state = TASK_INTERRUPTIBLE;
1339 if (flags & O_NONBLOCK) { 1335 if (flags & O_NONBLOCK) {
1340 current->state = TASK_RUNNING; 1336 err = -EWOULDBLOCK;
1341 remove_wait_queue(sk->sk_sleep, &wait); 1337 break;
1342 return -EWOULDBLOCK;
1343 } 1338 }
1344 if (!signal_pending(tsk)) { 1339 if (!signal_pending(current)) {
1340 release_sock(sk);
1345 schedule(); 1341 schedule();
1346 lock_sock(sk); 1342 lock_sock(sk);
1347 continue; 1343 continue;
1348 } 1344 }
1349 current->state = TASK_RUNNING; 1345 err = -ERESTARTSYS;
1350 remove_wait_queue(sk->sk_sleep, &wait); 1346 break;
1351 return -ERESTARTSYS;
1352 } 1347 }
1353 current->state = TASK_RUNNING; 1348 finish_wait(sk->sk_sleep, &wait);
1354 remove_wait_queue(sk->sk_sleep, &wait); 1349
1350 if (err)
1351 goto out;
1355 1352
1356 newsk = skb->sk; 1353 newsk = skb->sk;
1357 newsk->sk_socket = newsock; 1354 newsk->sk_socket = newsock;
@@ -1425,7 +1422,6 @@ static int ax25_sendmsg(struct kiocb *iocb, struct socket *sock,
1425 struct sockaddr_ax25 sax; 1422 struct sockaddr_ax25 sax;
1426 struct sk_buff *skb; 1423 struct sk_buff *skb;
1427 ax25_digi dtmp, *dp; 1424 ax25_digi dtmp, *dp;
1428 unsigned char *asmptr;
1429 ax25_cb *ax25; 1425 ax25_cb *ax25;
1430 size_t size; 1426 size_t size;
1431 int lv, err, addr_len = msg->msg_namelen; 1427 int lv, err, addr_len = msg->msg_namelen;
@@ -1548,13 +1544,11 @@ static int ax25_sendmsg(struct kiocb *iocb, struct socket *sock,
1548 goto out; 1544 goto out;
1549 } 1545 }
1550 1546
1551 skb->nh.raw = skb->data; 1547 skb_reset_network_header(skb);
1552 1548
1553 /* Add the PID if one is not supplied by the user in the skb */ 1549 /* Add the PID if one is not supplied by the user in the skb */
1554 if (!ax25->pidincl) { 1550 if (!ax25->pidincl)
1555 asmptr = skb_push(skb, 1); 1551 *skb_push(skb, 1) = sk->sk_protocol;
1556 *asmptr = sk->sk_protocol;
1557 }
1558 1552
1559 SOCK_DEBUG(sk, "AX.25: Transmitting buffer\n"); 1553 SOCK_DEBUG(sk, "AX.25: Transmitting buffer\n");
1560 1554
@@ -1573,7 +1567,7 @@ static int ax25_sendmsg(struct kiocb *iocb, struct socket *sock,
1573 goto out; 1567 goto out;
1574 } 1568 }
1575 1569
1576 asmptr = skb_push(skb, 1 + ax25_addr_size(dp)); 1570 skb_push(skb, 1 + ax25_addr_size(dp));
1577 1571
1578 SOCK_DEBUG(sk, "Building AX.25 Header (dp=%p).\n", dp); 1572 SOCK_DEBUG(sk, "Building AX.25 Header (dp=%p).\n", dp);
1579 1573
@@ -1581,17 +1575,17 @@ static int ax25_sendmsg(struct kiocb *iocb, struct socket *sock,
1581 SOCK_DEBUG(sk, "Num digipeaters=%d\n", dp->ndigi); 1575 SOCK_DEBUG(sk, "Num digipeaters=%d\n", dp->ndigi);
1582 1576
1583 /* Build an AX.25 header */ 1577 /* Build an AX.25 header */
1584 asmptr += (lv = ax25_addr_build(asmptr, &ax25->source_addr, 1578 lv = ax25_addr_build(skb->data, &ax25->source_addr, &sax.sax25_call,
1585 &sax.sax25_call, dp, 1579 dp, AX25_COMMAND, AX25_MODULUS);
1586 AX25_COMMAND, AX25_MODULUS));
1587 1580
1588 SOCK_DEBUG(sk, "Built header (%d bytes)\n",lv); 1581 SOCK_DEBUG(sk, "Built header (%d bytes)\n",lv);
1589 1582
1590 skb->h.raw = asmptr; 1583 skb_set_transport_header(skb, lv);
1591 1584
1592 SOCK_DEBUG(sk, "base=%p pos=%p\n", skb->data, asmptr); 1585 SOCK_DEBUG(sk, "base=%p pos=%p\n",
1586 skb->data, skb_transport_header(skb));
1593 1587
1594 *asmptr = AX25_UI; 1588 *skb_transport_header(skb) = AX25_UI;
1595 1589
1596 /* Datagram frames go straight out of the door as UI */ 1590 /* Datagram frames go straight out of the door as UI */
1597 ax25_queue_xmit(skb, ax25->ax25_dev->dev); 1591 ax25_queue_xmit(skb, ax25->ax25_dev->dev);
@@ -1631,8 +1625,8 @@ static int ax25_recvmsg(struct kiocb *iocb, struct socket *sock,
1631 if (!ax25_sk(sk)->pidincl) 1625 if (!ax25_sk(sk)->pidincl)
1632 skb_pull(skb, 1); /* Remove PID */ 1626 skb_pull(skb, 1); /* Remove PID */
1633 1627
1634 skb->h.raw = skb->data; 1628 skb_reset_transport_header(skb);
1635 copied = skb->len; 1629 copied = skb->len;
1636 1630
1637 if (copied > size) { 1631 if (copied > size) {
1638 copied = size; 1632 copied = size;
@@ -1645,9 +1639,10 @@ static int ax25_recvmsg(struct kiocb *iocb, struct socket *sock,
1645 struct sockaddr_ax25 *sax = (struct sockaddr_ax25 *)msg->msg_name; 1639 struct sockaddr_ax25 *sax = (struct sockaddr_ax25 *)msg->msg_name;
1646 ax25_digi digi; 1640 ax25_digi digi;
1647 ax25_address src; 1641 ax25_address src;
1642 const unsigned char *mac = skb_mac_header(skb);
1648 1643
1649 ax25_addr_parse(skb->mac.raw+1, skb->data-skb->mac.raw-1, &src, NULL, &digi, NULL, NULL); 1644 ax25_addr_parse(mac + 1, skb->data - mac - 1, &src, NULL,
1650 1645 &digi, NULL, NULL);
1651 sax->sax25_family = AF_AX25; 1646 sax->sax25_family = AF_AX25;
1652 /* We set this correctly, even though we may not let the 1647 /* We set this correctly, even though we may not let the
1653 application know the digi calls further down (because it 1648 application know the digi calls further down (because it
@@ -1711,6 +1706,10 @@ static int ax25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1711 res = sock_get_timestamp(sk, argp); 1706 res = sock_get_timestamp(sk, argp);
1712 break; 1707 break;
1713 1708
1709 case SIOCGSTAMPNS:
1710 res = sock_get_timestampns(sk, argp);
1711 break;
1712
1714 case SIOCAX25ADDUID: /* Add a uid to the uid/call map table */ 1713 case SIOCAX25ADDUID: /* Add a uid to the uid/call map table */
1715 case SIOCAX25DELUID: /* Delete a uid from the uid/call map table */ 1714 case SIOCAX25DELUID: /* Delete a uid from the uid/call map table */
1716 case SIOCAX25GETUID: { 1715 case SIOCAX25GETUID: {
diff --git a/net/ax25/ax25_ds_subr.c b/net/ax25/ax25_ds_subr.c
index 9569dd3fa466..a49773ff2b92 100644
--- a/net/ax25/ax25_ds_subr.c
+++ b/net/ax25/ax25_ds_subr.c
@@ -136,7 +136,7 @@ static void ax25_kiss_cmd(ax25_dev *ax25_dev, unsigned char cmd, unsigned char p
136 if ((skb = alloc_skb(2, GFP_ATOMIC)) == NULL) 136 if ((skb = alloc_skb(2, GFP_ATOMIC)) == NULL)
137 return; 137 return;
138 138
139 skb->nh.raw = skb->data; 139 skb_reset_network_header(skb);
140 p = skb_put(skb, 2); 140 p = skb_put(skb, 2);
141 141
142 *p++ = cmd; 142 *p++ = cmd;
diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c
index 4a6b26becadc..0ddaff0df217 100644
--- a/net/ax25/ax25_in.c
+++ b/net/ax25/ax25_in.c
@@ -61,12 +61,14 @@ static int ax25_rx_fragment(ax25_cb *ax25, struct sk_buff *skb)
61 skb_reserve(skbn, AX25_MAX_HEADER_LEN); 61 skb_reserve(skbn, AX25_MAX_HEADER_LEN);
62 62
63 skbn->dev = ax25->ax25_dev->dev; 63 skbn->dev = ax25->ax25_dev->dev;
64 skbn->h.raw = skbn->data; 64 skb_reset_network_header(skbn);
65 skbn->nh.raw = skbn->data; 65 skb_reset_transport_header(skbn);
66 66
67 /* Copy data from the fragments */ 67 /* Copy data from the fragments */
68 while ((skbo = skb_dequeue(&ax25->frag_queue)) != NULL) { 68 while ((skbo = skb_dequeue(&ax25->frag_queue)) != NULL) {
69 memcpy(skb_put(skbn, skbo->len), skbo->data, skbo->len); 69 skb_copy_from_linear_data(skbo,
70 skb_put(skbn, skbo->len),
71 skbo->len);
70 kfree_skb(skbo); 72 kfree_skb(skbo);
71 } 73 }
72 74
@@ -122,8 +124,8 @@ int ax25_rx_iframe(ax25_cb *ax25, struct sk_buff *skb)
122 } 124 }
123 125
124 skb_pull(skb, 1); /* Remove PID */ 126 skb_pull(skb, 1); /* Remove PID */
125 skb->mac.raw = skb->nh.raw; 127 skb_reset_mac_header(skb);
126 skb->nh.raw = skb->data; 128 skb_reset_network_header(skb);
127 skb->dev = ax25->ax25_dev->dev; 129 skb->dev = ax25->ax25_dev->dev;
128 skb->pkt_type = PACKET_HOST; 130 skb->pkt_type = PACKET_HOST;
129 skb->protocol = htons(ETH_P_IP); 131 skb->protocol = htons(ETH_P_IP);
@@ -196,7 +198,7 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev,
196 * Process the AX.25/LAPB frame. 198 * Process the AX.25/LAPB frame.
197 */ 199 */
198 200
199 skb->h.raw = skb->data; 201 skb_reset_transport_header(skb);
200 202
201 if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL) { 203 if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL) {
202 kfree_skb(skb); 204 kfree_skb(skb);
@@ -233,7 +235,7 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev,
233 235
234 /* UI frame - bypass LAPB processing */ 236 /* UI frame - bypass LAPB processing */
235 if ((*skb->data & ~0x10) == AX25_UI && dp.lastrepeat + 1 == dp.ndigi) { 237 if ((*skb->data & ~0x10) == AX25_UI && dp.lastrepeat + 1 == dp.ndigi) {
236 skb->h.raw = skb->data + 2; /* skip control and pid */ 238 skb_set_transport_header(skb, 2); /* skip control and pid */
237 239
238 ax25_send_to_raw(&dest, skb, skb->data[1]); 240 ax25_send_to_raw(&dest, skb, skb->data[1]);
239 241
@@ -246,8 +248,8 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev,
246 switch (skb->data[1]) { 248 switch (skb->data[1]) {
247 case AX25_P_IP: 249 case AX25_P_IP:
248 skb_pull(skb,2); /* drop PID/CTRL */ 250 skb_pull(skb,2); /* drop PID/CTRL */
249 skb->h.raw = skb->data; 251 skb_reset_transport_header(skb);
250 skb->nh.raw = skb->data; 252 skb_reset_network_header(skb);
251 skb->dev = dev; 253 skb->dev = dev;
252 skb->pkt_type = PACKET_HOST; 254 skb->pkt_type = PACKET_HOST;
253 skb->protocol = htons(ETH_P_IP); 255 skb->protocol = htons(ETH_P_IP);
@@ -256,8 +258,8 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev,
256 258
257 case AX25_P_ARP: 259 case AX25_P_ARP:
258 skb_pull(skb,2); 260 skb_pull(skb,2);
259 skb->h.raw = skb->data; 261 skb_reset_transport_header(skb);
260 skb->nh.raw = skb->data; 262 skb_reset_network_header(skb);
261 skb->dev = dev; 263 skb->dev = dev;
262 skb->pkt_type = PACKET_HOST; 264 skb->pkt_type = PACKET_HOST;
263 skb->protocol = htons(ETH_P_ARP); 265 skb->protocol = htons(ETH_P_ARP);
diff --git a/net/ax25/ax25_ip.c b/net/ax25/ax25_ip.c
index 7f818bbcd1c5..930e4918037f 100644
--- a/net/ax25/ax25_ip.c
+++ b/net/ax25/ax25_ip.c
@@ -121,7 +121,7 @@ int ax25_rebuild_header(struct sk_buff *skb)
121 digipeat = route->digipeat; 121 digipeat = route->digipeat;
122 dev = route->dev; 122 dev = route->dev;
123 ip_mode = route->ip_mode; 123 ip_mode = route->ip_mode;
124 }; 124 }
125 125
126 if (dev == NULL) 126 if (dev == NULL)
127 dev = skb->dev; 127 dev = skb->dev;
@@ -171,7 +171,7 @@ int ax25_rebuild_header(struct sk_buff *skb)
171 src_c = *(ax25_address *)(bp + 8); 171 src_c = *(ax25_address *)(bp + 8);
172 172
173 skb_pull(ourskb, AX25_HEADER_LEN - 1); /* Keep PID */ 173 skb_pull(ourskb, AX25_HEADER_LEN - 1); /* Keep PID */
174 ourskb->nh.raw = ourskb->data; 174 skb_reset_network_header(ourskb);
175 175
176 ax25=ax25_send_frame( 176 ax25=ax25_send_frame(
177 ourskb, 177 ourskb,
diff --git a/net/ax25/ax25_out.c b/net/ax25/ax25_out.c
index 223835092b7a..92b517af7260 100644
--- a/net/ax25/ax25_out.c
+++ b/net/ax25/ax25_out.c
@@ -148,8 +148,9 @@ void ax25_output(ax25_cb *ax25, int paclen, struct sk_buff *skb)
148 148
149 if (ka9qfrag == 1) { 149 if (ka9qfrag == 1) {
150 skb_reserve(skbn, frontlen + 2); 150 skb_reserve(skbn, frontlen + 2);
151 skbn->nh.raw = skbn->data + (skb->nh.raw - skb->data); 151 skb_set_network_header(skbn,
152 memcpy(skb_put(skbn, len), skb->data, len); 152 skb_network_offset(skb));
153 skb_copy_from_linear_data(skb, skb_put(skbn, len), len);
153 p = skb_push(skbn, 2); 154 p = skb_push(skbn, 2);
154 155
155 *p++ = AX25_P_SEGMENT; 156 *p++ = AX25_P_SEGMENT;
@@ -161,8 +162,9 @@ void ax25_output(ax25_cb *ax25, int paclen, struct sk_buff *skb)
161 } 162 }
162 } else { 163 } else {
163 skb_reserve(skbn, frontlen + 1); 164 skb_reserve(skbn, frontlen + 1);
164 skbn->nh.raw = skbn->data + (skb->nh.raw - skb->data); 165 skb_set_network_header(skbn,
165 memcpy(skb_put(skbn, len), skb->data, len); 166 skb_network_offset(skb));
167 skb_copy_from_linear_data(skb, skb_put(skbn, len), len);
166 p = skb_push(skbn, 1); 168 p = skb_push(skbn, 1);
167 *p = AX25_P_TEXT; 169 *p = AX25_P_TEXT;
168 } 170 }
@@ -205,7 +207,7 @@ static void ax25_send_iframe(ax25_cb *ax25, struct sk_buff *skb, int poll_bit)
205 if (skb == NULL) 207 if (skb == NULL)
206 return; 208 return;
207 209
208 skb->nh.raw = skb->data; 210 skb_reset_network_header(skb);
209 211
210 if (ax25->modulus == AX25_MODULUS) { 212 if (ax25->modulus == AX25_MODULUS) {
211 frame = skb_push(skb, 1); 213 frame = skb_push(skb, 1);
diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c
index b6c577e3c914..5fe9b2a6697d 100644
--- a/net/ax25/ax25_subr.c
+++ b/net/ax25/ax25_subr.c
@@ -162,7 +162,7 @@ void ax25_send_control(ax25_cb *ax25, int frametype, int poll_bit, int type)
162 162
163 skb_reserve(skb, ax25->ax25_dev->dev->hard_header_len); 163 skb_reserve(skb, ax25->ax25_dev->dev->hard_header_len);
164 164
165 skb->nh.raw = skb->data; 165 skb_reset_network_header(skb);
166 166
167 /* Assume a response - address structure for DTE */ 167 /* Assume a response - address structure for DTE */
168 if (ax25->modulus == AX25_MODULUS) { 168 if (ax25->modulus == AX25_MODULUS) {
@@ -205,7 +205,7 @@ void ax25_return_dm(struct net_device *dev, ax25_address *src, ax25_address *des
205 return; /* Next SABM will get DM'd */ 205 return; /* Next SABM will get DM'd */
206 206
207 skb_reserve(skb, dev->hard_header_len); 207 skb_reserve(skb, dev->hard_header_len);
208 skb->nh.raw = skb->data; 208 skb_reset_network_header(skb);
209 209
210 ax25_digi_invert(digi, &retdigi); 210 ax25_digi_invert(digi, &retdigi);
211 211
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index c7228cfc6218..d942b946ba07 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -221,7 +221,7 @@ int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
221 copied = len; 221 copied = len;
222 } 222 }
223 223
224 skb->h.raw = skb->data; 224 skb_reset_transport_header(skb);
225 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); 225 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
226 226
227 skb_free_datagram(sk, skb); 227 skb_free_datagram(sk, skb);
diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index b85d1492c357..ab2db55982ca 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -326,7 +326,7 @@ static inline int bnep_rx_frame(struct bnep_session *s, struct sk_buff *skb)
326 return 0; 326 return 0;
327 } 327 }
328 328
329 skb->mac.raw = skb->data; 329 skb_reset_mac_header(skb);
330 330
331 /* Verify and pull out header */ 331 /* Verify and pull out header */
332 if (!skb_pull(skb, __bnep_rx_hlen[type & BNEP_TYPE_MASK])) 332 if (!skb_pull(skb, __bnep_rx_hlen[type & BNEP_TYPE_MASK]))
@@ -364,26 +364,28 @@ static inline int bnep_rx_frame(struct bnep_session *s, struct sk_buff *skb)
364 364
365 case BNEP_COMPRESSED_SRC_ONLY: 365 case BNEP_COMPRESSED_SRC_ONLY:
366 memcpy(__skb_put(nskb, ETH_ALEN), s->eh.h_dest, ETH_ALEN); 366 memcpy(__skb_put(nskb, ETH_ALEN), s->eh.h_dest, ETH_ALEN);
367 memcpy(__skb_put(nskb, ETH_ALEN), skb->mac.raw, ETH_ALEN); 367 memcpy(__skb_put(nskb, ETH_ALEN), skb_mac_header(skb), ETH_ALEN);
368 put_unaligned(s->eh.h_proto, (__be16 *) __skb_put(nskb, 2)); 368 put_unaligned(s->eh.h_proto, (__be16 *) __skb_put(nskb, 2));
369 break; 369 break;
370 370
371 case BNEP_COMPRESSED_DST_ONLY: 371 case BNEP_COMPRESSED_DST_ONLY:
372 memcpy(__skb_put(nskb, ETH_ALEN), skb->mac.raw, ETH_ALEN); 372 memcpy(__skb_put(nskb, ETH_ALEN), skb_mac_header(skb),
373 memcpy(__skb_put(nskb, ETH_ALEN + 2), s->eh.h_source, ETH_ALEN + 2); 373 ETH_ALEN);
374 memcpy(__skb_put(nskb, ETH_ALEN + 2), s->eh.h_source,
375 ETH_ALEN + 2);
374 break; 376 break;
375 377
376 case BNEP_GENERAL: 378 case BNEP_GENERAL:
377 memcpy(__skb_put(nskb, ETH_ALEN * 2), skb->mac.raw, ETH_ALEN * 2); 379 memcpy(__skb_put(nskb, ETH_ALEN * 2), skb_mac_header(skb),
380 ETH_ALEN * 2);
378 put_unaligned(s->eh.h_proto, (__be16 *) __skb_put(nskb, 2)); 381 put_unaligned(s->eh.h_proto, (__be16 *) __skb_put(nskb, 2));
379 break; 382 break;
380 } 383 }
381 384
382 memcpy(__skb_put(nskb, skb->len), skb->data, skb->len); 385 skb_copy_from_linear_data(skb, __skb_put(nskb, skb->len), skb->len);
383 kfree_skb(skb); 386 kfree_skb(skb);
384 387
385 s->stats.rx_packets++; 388 s->stats.rx_packets++;
386 nskb->dev = dev;
387 nskb->ip_summed = CHECKSUM_NONE; 389 nskb->ip_summed = CHECKSUM_NONE;
388 nskb->protocol = eth_type_trans(nskb, dev); 390 nskb->protocol = eth_type_trans(nskb, dev);
389 netif_rx_ni(nskb); 391 netif_rx_ni(nskb);
diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c
index 3933608a9296..66bef1ccee2a 100644
--- a/net/bluetooth/cmtp/core.c
+++ b/net/bluetooth/cmtp/core.c
@@ -124,7 +124,7 @@ static inline void cmtp_add_msgpart(struct cmtp_session *session, int id, const
124 } 124 }
125 125
126 if (skb && (skb->len > 0)) 126 if (skb && (skb->len > 0))
127 memcpy(skb_put(nskb, skb->len), skb->data, skb->len); 127 skb_copy_from_linear_data(skb, skb_put(nskb, skb->len), skb->len);
128 128
129 memcpy(skb_put(nskb, count), buf, count); 129 memcpy(skb_put(nskb, count), buf, count);
130 130
@@ -256,7 +256,7 @@ static void cmtp_process_transmit(struct cmtp_session *session)
256 hdr[2] = size >> 8; 256 hdr[2] = size >> 8;
257 } 257 }
258 258
259 memcpy(skb_put(nskb, size), skb->data, size); 259 skb_copy_from_linear_data(skb, skb_put(nskb, size), size);
260 skb_pull(skb, size); 260 skb_pull(skb, size);
261 261
262 if (skb->len > 0) { 262 if (skb->len > 0) {
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index f3403fdb59f8..63980bd6b5f2 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -72,11 +72,11 @@ void hci_acl_connect(struct hci_conn *conn)
72 inquiry_entry_age(ie) <= INQUIRY_ENTRY_AGE_MAX) { 72 inquiry_entry_age(ie) <= INQUIRY_ENTRY_AGE_MAX) {
73 cp.pscan_rep_mode = ie->data.pscan_rep_mode; 73 cp.pscan_rep_mode = ie->data.pscan_rep_mode;
74 cp.pscan_mode = ie->data.pscan_mode; 74 cp.pscan_mode = ie->data.pscan_mode;
75 cp.clock_offset = ie->data.clock_offset | __cpu_to_le16(0x8000); 75 cp.clock_offset = ie->data.clock_offset | cpu_to_le16(0x8000);
76 memcpy(conn->dev_class, ie->data.dev_class, 3); 76 memcpy(conn->dev_class, ie->data.dev_class, 3);
77 } 77 }
78 78
79 cp.pkt_type = __cpu_to_le16(hdev->pkt_type & ACL_PTYPE_MASK); 79 cp.pkt_type = cpu_to_le16(hdev->pkt_type & ACL_PTYPE_MASK);
80 if (lmp_rswitch_capable(hdev) && !(hdev->link_mode & HCI_LM_MASTER)) 80 if (lmp_rswitch_capable(hdev) && !(hdev->link_mode & HCI_LM_MASTER))
81 cp.role_switch = 0x01; 81 cp.role_switch = 0x01;
82 else 82 else
@@ -107,7 +107,7 @@ void hci_acl_disconn(struct hci_conn *conn, __u8 reason)
107 107
108 conn->state = BT_DISCONN; 108 conn->state = BT_DISCONN;
109 109
110 cp.handle = __cpu_to_le16(conn->handle); 110 cp.handle = cpu_to_le16(conn->handle);
111 cp.reason = reason; 111 cp.reason = reason;
112 hci_send_cmd(conn->hdev, OGF_LINK_CTL, 112 hci_send_cmd(conn->hdev, OGF_LINK_CTL,
113 OCF_DISCONNECT, sizeof(cp), &cp); 113 OCF_DISCONNECT, sizeof(cp), &cp);
@@ -123,8 +123,8 @@ void hci_add_sco(struct hci_conn *conn, __u16 handle)
123 conn->state = BT_CONNECT; 123 conn->state = BT_CONNECT;
124 conn->out = 1; 124 conn->out = 1;
125 125
126 cp.pkt_type = __cpu_to_le16(hdev->pkt_type & SCO_PTYPE_MASK); 126 cp.pkt_type = cpu_to_le16(hdev->pkt_type & SCO_PTYPE_MASK);
127 cp.handle = __cpu_to_le16(handle); 127 cp.handle = cpu_to_le16(handle);
128 128
129 hci_send_cmd(hdev, OGF_LINK_CTL, OCF_ADD_SCO, sizeof(cp), &cp); 129 hci_send_cmd(hdev, OGF_LINK_CTL, OCF_ADD_SCO, sizeof(cp), &cp);
130} 130}
@@ -348,7 +348,7 @@ int hci_conn_auth(struct hci_conn *conn)
348 348
349 if (!test_and_set_bit(HCI_CONN_AUTH_PEND, &conn->pend)) { 349 if (!test_and_set_bit(HCI_CONN_AUTH_PEND, &conn->pend)) {
350 struct hci_cp_auth_requested cp; 350 struct hci_cp_auth_requested cp;
351 cp.handle = __cpu_to_le16(conn->handle); 351 cp.handle = cpu_to_le16(conn->handle);
352 hci_send_cmd(conn->hdev, OGF_LINK_CTL, OCF_AUTH_REQUESTED, sizeof(cp), &cp); 352 hci_send_cmd(conn->hdev, OGF_LINK_CTL, OCF_AUTH_REQUESTED, sizeof(cp), &cp);
353 } 353 }
354 return 0; 354 return 0;
@@ -368,7 +368,7 @@ int hci_conn_encrypt(struct hci_conn *conn)
368 368
369 if (hci_conn_auth(conn)) { 369 if (hci_conn_auth(conn)) {
370 struct hci_cp_set_conn_encrypt cp; 370 struct hci_cp_set_conn_encrypt cp;
371 cp.handle = __cpu_to_le16(conn->handle); 371 cp.handle = cpu_to_le16(conn->handle);
372 cp.encrypt = 1; 372 cp.encrypt = 1;
373 hci_send_cmd(conn->hdev, OGF_LINK_CTL, OCF_SET_CONN_ENCRYPT, sizeof(cp), &cp); 373 hci_send_cmd(conn->hdev, OGF_LINK_CTL, OCF_SET_CONN_ENCRYPT, sizeof(cp), &cp);
374 } 374 }
@@ -383,7 +383,7 @@ int hci_conn_change_link_key(struct hci_conn *conn)
383 383
384 if (!test_and_set_bit(HCI_CONN_AUTH_PEND, &conn->pend)) { 384 if (!test_and_set_bit(HCI_CONN_AUTH_PEND, &conn->pend)) {
385 struct hci_cp_change_conn_link_key cp; 385 struct hci_cp_change_conn_link_key cp;
386 cp.handle = __cpu_to_le16(conn->handle); 386 cp.handle = cpu_to_le16(conn->handle);
387 hci_send_cmd(conn->hdev, OGF_LINK_CTL, OCF_CHANGE_CONN_LINK_KEY, sizeof(cp), &cp); 387 hci_send_cmd(conn->hdev, OGF_LINK_CTL, OCF_CHANGE_CONN_LINK_KEY, sizeof(cp), &cp);
388 } 388 }
389 return 0; 389 return 0;
@@ -423,7 +423,7 @@ void hci_conn_enter_active_mode(struct hci_conn *conn)
423 423
424 if (!test_and_set_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend)) { 424 if (!test_and_set_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend)) {
425 struct hci_cp_exit_sniff_mode cp; 425 struct hci_cp_exit_sniff_mode cp;
426 cp.handle = __cpu_to_le16(conn->handle); 426 cp.handle = cpu_to_le16(conn->handle);
427 hci_send_cmd(hdev, OGF_LINK_POLICY, 427 hci_send_cmd(hdev, OGF_LINK_POLICY,
428 OCF_EXIT_SNIFF_MODE, sizeof(cp), &cp); 428 OCF_EXIT_SNIFF_MODE, sizeof(cp), &cp);
429 } 429 }
@@ -452,21 +452,21 @@ void hci_conn_enter_sniff_mode(struct hci_conn *conn)
452 452
453 if (lmp_sniffsubr_capable(hdev) && lmp_sniffsubr_capable(conn)) { 453 if (lmp_sniffsubr_capable(hdev) && lmp_sniffsubr_capable(conn)) {
454 struct hci_cp_sniff_subrate cp; 454 struct hci_cp_sniff_subrate cp;
455 cp.handle = __cpu_to_le16(conn->handle); 455 cp.handle = cpu_to_le16(conn->handle);
456 cp.max_latency = __constant_cpu_to_le16(0); 456 cp.max_latency = cpu_to_le16(0);
457 cp.min_remote_timeout = __constant_cpu_to_le16(0); 457 cp.min_remote_timeout = cpu_to_le16(0);
458 cp.min_local_timeout = __constant_cpu_to_le16(0); 458 cp.min_local_timeout = cpu_to_le16(0);
459 hci_send_cmd(hdev, OGF_LINK_POLICY, 459 hci_send_cmd(hdev, OGF_LINK_POLICY,
460 OCF_SNIFF_SUBRATE, sizeof(cp), &cp); 460 OCF_SNIFF_SUBRATE, sizeof(cp), &cp);
461 } 461 }
462 462
463 if (!test_and_set_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend)) { 463 if (!test_and_set_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend)) {
464 struct hci_cp_sniff_mode cp; 464 struct hci_cp_sniff_mode cp;
465 cp.handle = __cpu_to_le16(conn->handle); 465 cp.handle = cpu_to_le16(conn->handle);
466 cp.max_interval = __cpu_to_le16(hdev->sniff_max_interval); 466 cp.max_interval = cpu_to_le16(hdev->sniff_max_interval);
467 cp.min_interval = __cpu_to_le16(hdev->sniff_min_interval); 467 cp.min_interval = cpu_to_le16(hdev->sniff_min_interval);
468 cp.attempt = __constant_cpu_to_le16(4); 468 cp.attempt = cpu_to_le16(4);
469 cp.timeout = __constant_cpu_to_le16(1); 469 cp.timeout = cpu_to_le16(1);
470 hci_send_cmd(hdev, OGF_LINK_POLICY, 470 hci_send_cmd(hdev, OGF_LINK_POLICY,
471 OCF_SNIFF_MODE, sizeof(cp), &cp); 471 OCF_SNIFF_MODE, sizeof(cp), &cp);
472 } 472 }
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 4917919d86a6..aa4b56a8c3ea 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -149,7 +149,7 @@ static int __hci_request(struct hci_dev *hdev, void (*req)(struct hci_dev *hdev,
149 default: 149 default:
150 err = -ETIMEDOUT; 150 err = -ETIMEDOUT;
151 break; 151 break;
152 }; 152 }
153 153
154 hdev->req_status = hdev->req_result = 0; 154 hdev->req_status = hdev->req_result = 0;
155 155
@@ -216,10 +216,10 @@ static void hci_init_req(struct hci_dev *hdev, unsigned long opt)
216 /* Host buffer size */ 216 /* Host buffer size */
217 { 217 {
218 struct hci_cp_host_buffer_size cp; 218 struct hci_cp_host_buffer_size cp;
219 cp.acl_mtu = __cpu_to_le16(HCI_MAX_ACL_SIZE); 219 cp.acl_mtu = cpu_to_le16(HCI_MAX_ACL_SIZE);
220 cp.sco_mtu = HCI_MAX_SCO_SIZE; 220 cp.sco_mtu = HCI_MAX_SCO_SIZE;
221 cp.acl_max_pkt = __cpu_to_le16(0xffff); 221 cp.acl_max_pkt = cpu_to_le16(0xffff);
222 cp.sco_max_pkt = __cpu_to_le16(0xffff); 222 cp.sco_max_pkt = cpu_to_le16(0xffff);
223 hci_send_cmd(hdev, OGF_HOST_CTL, OCF_HOST_BUFFER_SIZE, sizeof(cp), &cp); 223 hci_send_cmd(hdev, OGF_HOST_CTL, OCF_HOST_BUFFER_SIZE, sizeof(cp), &cp);
224 } 224 }
225#endif 225#endif
@@ -240,11 +240,11 @@ static void hci_init_req(struct hci_dev *hdev, unsigned long opt)
240 } 240 }
241 241
242 /* Page timeout ~20 secs */ 242 /* Page timeout ~20 secs */
243 param = __cpu_to_le16(0x8000); 243 param = cpu_to_le16(0x8000);
244 hci_send_cmd(hdev, OGF_HOST_CTL, OCF_WRITE_PG_TIMEOUT, 2, &param); 244 hci_send_cmd(hdev, OGF_HOST_CTL, OCF_WRITE_PG_TIMEOUT, 2, &param);
245 245
246 /* Connection accept timeout ~20 secs */ 246 /* Connection accept timeout ~20 secs */
247 param = __cpu_to_le16(0x7d00); 247 param = cpu_to_le16(0x7d00);
248 hci_send_cmd(hdev, OGF_HOST_CTL, OCF_WRITE_CA_TIMEOUT, 2, &param); 248 hci_send_cmd(hdev, OGF_HOST_CTL, OCF_WRITE_CA_TIMEOUT, 2, &param);
249} 249}
250 250
@@ -1034,7 +1034,7 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 ogf, __u16 ocf, __u32 plen, void *p
1034 } 1034 }
1035 1035
1036 hdr = (struct hci_command_hdr *) skb_put(skb, HCI_COMMAND_HDR_SIZE); 1036 hdr = (struct hci_command_hdr *) skb_put(skb, HCI_COMMAND_HDR_SIZE);
1037 hdr->opcode = __cpu_to_le16(hci_opcode_pack(ogf, ocf)); 1037 hdr->opcode = cpu_to_le16(hci_opcode_pack(ogf, ocf));
1038 hdr->plen = plen; 1038 hdr->plen = plen;
1039 1039
1040 if (plen) 1040 if (plen)
@@ -1060,7 +1060,7 @@ void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 ogf, __u16 ocf)
1060 1060
1061 hdr = (void *) hdev->sent_cmd->data; 1061 hdr = (void *) hdev->sent_cmd->data;
1062 1062
1063 if (hdr->opcode != __cpu_to_le16(hci_opcode_pack(ogf, ocf))) 1063 if (hdr->opcode != cpu_to_le16(hci_opcode_pack(ogf, ocf)))
1064 return NULL; 1064 return NULL;
1065 1065
1066 BT_DBG("%s ogf 0x%x ocf 0x%x", hdev->name, ogf, ocf); 1066 BT_DBG("%s ogf 0x%x ocf 0x%x", hdev->name, ogf, ocf);
@@ -1074,11 +1074,11 @@ static void hci_add_acl_hdr(struct sk_buff *skb, __u16 handle, __u16 flags)
1074 struct hci_acl_hdr *hdr; 1074 struct hci_acl_hdr *hdr;
1075 int len = skb->len; 1075 int len = skb->len;
1076 1076
1077 hdr = (struct hci_acl_hdr *) skb_push(skb, HCI_ACL_HDR_SIZE); 1077 skb_push(skb, HCI_ACL_HDR_SIZE);
1078 hdr->handle = __cpu_to_le16(hci_handle_pack(handle, flags)); 1078 skb_reset_transport_header(skb);
1079 hdr->dlen = __cpu_to_le16(len); 1079 hdr = (struct hci_acl_hdr *)skb_transport_header(skb);
1080 1080 hdr->handle = cpu_to_le16(hci_handle_pack(handle, flags));
1081 skb->h.raw = (void *) hdr; 1081 hdr->dlen = cpu_to_le16(len);
1082} 1082}
1083 1083
1084int hci_send_acl(struct hci_conn *conn, struct sk_buff *skb, __u16 flags) 1084int hci_send_acl(struct hci_conn *conn, struct sk_buff *skb, __u16 flags)
@@ -1140,11 +1140,12 @@ int hci_send_sco(struct hci_conn *conn, struct sk_buff *skb)
1140 return -EINVAL; 1140 return -EINVAL;
1141 } 1141 }
1142 1142
1143 hdr.handle = __cpu_to_le16(conn->handle); 1143 hdr.handle = cpu_to_le16(conn->handle);
1144 hdr.dlen = skb->len; 1144 hdr.dlen = skb->len;
1145 1145
1146 skb->h.raw = skb_push(skb, HCI_SCO_HDR_SIZE); 1146 skb_push(skb, HCI_SCO_HDR_SIZE);
1147 memcpy(skb->h.raw, &hdr, HCI_SCO_HDR_SIZE); 1147 skb_reset_transport_header(skb);
1148 memcpy(skb_transport_header(skb), &hdr, HCI_SCO_HDR_SIZE);
1148 1149
1149 skb->dev = (void *) hdev; 1150 skb->dev = (void *) hdev;
1150 bt_cb(skb)->pkt_type = HCI_SCODATA_PKT; 1151 bt_cb(skb)->pkt_type = HCI_SCODATA_PKT;
@@ -1387,7 +1388,7 @@ static void hci_rx_task(unsigned long arg)
1387 case HCI_SCODATA_PKT: 1388 case HCI_SCODATA_PKT:
1388 kfree_skb(skb); 1389 kfree_skb(skb);
1389 continue; 1390 continue;
1390 }; 1391 }
1391 } 1392 }
1392 1393
1393 /* Process frame */ 1394 /* Process frame */
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 936d3fc479cd..447ba7131220 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -783,7 +783,7 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s
783 if (conn->type == ACL_LINK && hdev->link_policy) { 783 if (conn->type == ACL_LINK && hdev->link_policy) {
784 struct hci_cp_write_link_policy cp; 784 struct hci_cp_write_link_policy cp;
785 cp.handle = ev->handle; 785 cp.handle = ev->handle;
786 cp.policy = __cpu_to_le16(hdev->link_policy); 786 cp.policy = cpu_to_le16(hdev->link_policy);
787 hci_send_cmd(hdev, OGF_LINK_POLICY, 787 hci_send_cmd(hdev, OGF_LINK_POLICY,
788 OCF_WRITE_LINK_POLICY, sizeof(cp), &cp); 788 OCF_WRITE_LINK_POLICY, sizeof(cp), &cp);
789 } 789 }
@@ -793,8 +793,8 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s
793 struct hci_cp_change_conn_ptype cp; 793 struct hci_cp_change_conn_ptype cp;
794 cp.handle = ev->handle; 794 cp.handle = ev->handle;
795 cp.pkt_type = (conn->type == ACL_LINK) ? 795 cp.pkt_type = (conn->type == ACL_LINK) ?
796 __cpu_to_le16(hdev->pkt_type & ACL_PTYPE_MASK): 796 cpu_to_le16(hdev->pkt_type & ACL_PTYPE_MASK):
797 __cpu_to_le16(hdev->pkt_type & SCO_PTYPE_MASK); 797 cpu_to_le16(hdev->pkt_type & SCO_PTYPE_MASK);
798 798
799 hci_send_cmd(hdev, OGF_LINK_CTL, 799 hci_send_cmd(hdev, OGF_LINK_CTL,
800 OCF_CHANGE_CONN_PTYPE, sizeof(cp), &cp); 800 OCF_CHANGE_CONN_PTYPE, sizeof(cp), &cp);
@@ -970,7 +970,7 @@ static inline void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *s
970 if (test_bit(HCI_CONN_ENCRYPT_PEND, &conn->pend)) { 970 if (test_bit(HCI_CONN_ENCRYPT_PEND, &conn->pend)) {
971 if (!ev->status) { 971 if (!ev->status) {
972 struct hci_cp_set_conn_encrypt cp; 972 struct hci_cp_set_conn_encrypt cp;
973 cp.handle = __cpu_to_le16(conn->handle); 973 cp.handle = cpu_to_le16(conn->handle);
974 cp.encrypt = 1; 974 cp.encrypt = 1;
975 hci_send_cmd(conn->hdev, OGF_LINK_CTL, 975 hci_send_cmd(conn->hdev, OGF_LINK_CTL,
976 OCF_SET_CONN_ENCRYPT, sizeof(cp), &cp); 976 OCF_SET_CONN_ENCRYPT, sizeof(cp), &cp);
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 71f5cfbbebb8..832b5f44be5c 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -375,7 +375,7 @@ static int hci_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
375 copied = len; 375 copied = len;
376 } 376 }
377 377
378 skb->h.raw = skb->data; 378 skb_reset_transport_header(skb);
379 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); 379 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
380 380
381 hci_sock_cmsg(sk, msg, skb); 381 hci_sock_cmsg(sk, msg, skb);
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index e83ee82440d3..a5867879b615 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -459,8 +459,8 @@ static void __l2cap_sock_close(struct sock *sk, int reason)
459 sk->sk_state = BT_DISCONN; 459 sk->sk_state = BT_DISCONN;
460 l2cap_sock_set_timer(sk, sk->sk_sndtimeo); 460 l2cap_sock_set_timer(sk, sk->sk_sndtimeo);
461 461
462 req.dcid = __cpu_to_le16(l2cap_pi(sk)->dcid); 462 req.dcid = cpu_to_le16(l2cap_pi(sk)->dcid);
463 req.scid = __cpu_to_le16(l2cap_pi(sk)->scid); 463 req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
464 l2cap_send_cmd(conn, l2cap_get_ident(conn), 464 l2cap_send_cmd(conn, l2cap_get_ident(conn),
465 L2CAP_DISCONN_REQ, sizeof(req), &req); 465 L2CAP_DISCONN_REQ, sizeof(req), &req);
466 } else { 466 } else {
@@ -652,7 +652,7 @@ static int l2cap_do_connect(struct sock *sk)
652 if (sk->sk_type == SOCK_SEQPACKET) { 652 if (sk->sk_type == SOCK_SEQPACKET) {
653 struct l2cap_conn_req req; 653 struct l2cap_conn_req req;
654 l2cap_pi(sk)->ident = l2cap_get_ident(conn); 654 l2cap_pi(sk)->ident = l2cap_get_ident(conn);
655 req.scid = __cpu_to_le16(l2cap_pi(sk)->scid); 655 req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
656 req.psm = l2cap_pi(sk)->psm; 656 req.psm = l2cap_pi(sk)->psm;
657 l2cap_send_cmd(conn, l2cap_pi(sk)->ident, 657 l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
658 L2CAP_CONN_REQ, sizeof(req), &req); 658 L2CAP_CONN_REQ, sizeof(req), &req);
@@ -868,8 +868,8 @@ static inline int l2cap_do_send(struct sock *sk, struct msghdr *msg, int len)
868 868
869 /* Create L2CAP header */ 869 /* Create L2CAP header */
870 lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE); 870 lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE);
871 lh->cid = __cpu_to_le16(l2cap_pi(sk)->dcid); 871 lh->cid = cpu_to_le16(l2cap_pi(sk)->dcid);
872 lh->len = __cpu_to_le16(len + (hlen - L2CAP_HDR_SIZE)); 872 lh->len = cpu_to_le16(len + (hlen - L2CAP_HDR_SIZE));
873 873
874 if (sk->sk_type == SOCK_DGRAM) 874 if (sk->sk_type == SOCK_DGRAM)
875 put_unaligned(l2cap_pi(sk)->psm, (u16 *) skb_put(skb, 2)); 875 put_unaligned(l2cap_pi(sk)->psm, (u16 *) skb_put(skb, 2));
@@ -1096,7 +1096,7 @@ static void l2cap_conn_ready(struct l2cap_conn *conn)
1096 } else if (sk->sk_state == BT_CONNECT) { 1096 } else if (sk->sk_state == BT_CONNECT) {
1097 struct l2cap_conn_req req; 1097 struct l2cap_conn_req req;
1098 l2cap_pi(sk)->ident = l2cap_get_ident(conn); 1098 l2cap_pi(sk)->ident = l2cap_get_ident(conn);
1099 req.scid = __cpu_to_le16(l2cap_pi(sk)->scid); 1099 req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
1100 req.psm = l2cap_pi(sk)->psm; 1100 req.psm = l2cap_pi(sk)->psm;
1101 l2cap_send_cmd(conn, l2cap_pi(sk)->ident, L2CAP_CONN_REQ, sizeof(req), &req); 1101 l2cap_send_cmd(conn, l2cap_pi(sk)->ident, L2CAP_CONN_REQ, sizeof(req), &req);
1102 } 1102 }
@@ -1192,13 +1192,13 @@ static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn,
1192 return NULL; 1192 return NULL;
1193 1193
1194 lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE); 1194 lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE);
1195 lh->len = __cpu_to_le16(L2CAP_CMD_HDR_SIZE + dlen); 1195 lh->len = cpu_to_le16(L2CAP_CMD_HDR_SIZE + dlen);
1196 lh->cid = __cpu_to_le16(0x0001); 1196 lh->cid = cpu_to_le16(0x0001);
1197 1197
1198 cmd = (struct l2cap_cmd_hdr *) skb_put(skb, L2CAP_CMD_HDR_SIZE); 1198 cmd = (struct l2cap_cmd_hdr *) skb_put(skb, L2CAP_CMD_HDR_SIZE);
1199 cmd->code = code; 1199 cmd->code = code;
1200 cmd->ident = ident; 1200 cmd->ident = ident;
1201 cmd->len = __cpu_to_le16(dlen); 1201 cmd->len = cpu_to_le16(dlen);
1202 1202
1203 if (dlen) { 1203 if (dlen) {
1204 count -= L2CAP_HDR_SIZE + L2CAP_CMD_HDR_SIZE; 1204 count -= L2CAP_HDR_SIZE + L2CAP_CMD_HDR_SIZE;
@@ -1316,11 +1316,11 @@ static void l2cap_add_conf_opt(void **ptr, u8 type, u8 len, unsigned long val)
1316 break; 1316 break;
1317 1317
1318 case 2: 1318 case 2:
1319 *((u16 *) opt->val) = __cpu_to_le16(val); 1319 *((u16 *) opt->val) = cpu_to_le16(val);
1320 break; 1320 break;
1321 1321
1322 case 4: 1322 case 4:
1323 *((u32 *) opt->val) = __cpu_to_le32(val); 1323 *((u32 *) opt->val) = cpu_to_le32(val);
1324 break; 1324 break;
1325 1325
1326 default: 1326 default:
@@ -1346,8 +1346,8 @@ static int l2cap_build_conf_req(struct sock *sk, void *data)
1346 //if (flush_to != L2CAP_DEFAULT_FLUSH_TO) 1346 //if (flush_to != L2CAP_DEFAULT_FLUSH_TO)
1347 // l2cap_add_conf_opt(&ptr, L2CAP_CONF_FLUSH_TO, 2, pi->flush_to); 1347 // l2cap_add_conf_opt(&ptr, L2CAP_CONF_FLUSH_TO, 2, pi->flush_to);
1348 1348
1349 req->dcid = __cpu_to_le16(pi->dcid); 1349 req->dcid = cpu_to_le16(pi->dcid);
1350 req->flags = __cpu_to_le16(0); 1350 req->flags = cpu_to_le16(0);
1351 1351
1352 return ptr - data; 1352 return ptr - data;
1353} 1353}
@@ -1383,9 +1383,9 @@ static int l2cap_build_conf_rsp(struct sock *sk, void *data, int *result)
1383 else 1383 else
1384 flags = 0x0001; 1384 flags = 0x0001;
1385 1385
1386 rsp->scid = __cpu_to_le16(l2cap_pi(sk)->dcid); 1386 rsp->scid = cpu_to_le16(l2cap_pi(sk)->dcid);
1387 rsp->result = __cpu_to_le16(result ? *result : 0); 1387 rsp->result = cpu_to_le16(result ? *result : 0);
1388 rsp->flags = __cpu_to_le16(flags); 1388 rsp->flags = cpu_to_le16(flags);
1389 1389
1390 return ptr - data; 1390 return ptr - data;
1391} 1391}
@@ -1470,10 +1470,10 @@ response:
1470 bh_unlock_sock(parent); 1470 bh_unlock_sock(parent);
1471 1471
1472sendresp: 1472sendresp:
1473 rsp.scid = __cpu_to_le16(scid); 1473 rsp.scid = cpu_to_le16(scid);
1474 rsp.dcid = __cpu_to_le16(dcid); 1474 rsp.dcid = cpu_to_le16(dcid);
1475 rsp.result = __cpu_to_le16(result); 1475 rsp.result = cpu_to_le16(result);
1476 rsp.status = __cpu_to_le16(status); 1476 rsp.status = cpu_to_le16(status);
1477 l2cap_send_cmd(conn, cmd->ident, L2CAP_CONN_RSP, sizeof(rsp), &rsp); 1477 l2cap_send_cmd(conn, cmd->ident, L2CAP_CONN_RSP, sizeof(rsp), &rsp);
1478 return 0; 1478 return 0;
1479} 1479}
@@ -1613,8 +1613,8 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr
1613 l2cap_sock_set_timer(sk, HZ * 5); 1613 l2cap_sock_set_timer(sk, HZ * 5);
1614 { 1614 {
1615 struct l2cap_disconn_req req; 1615 struct l2cap_disconn_req req;
1616 req.dcid = __cpu_to_le16(l2cap_pi(sk)->dcid); 1616 req.dcid = cpu_to_le16(l2cap_pi(sk)->dcid);
1617 req.scid = __cpu_to_le16(l2cap_pi(sk)->scid); 1617 req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
1618 l2cap_send_cmd(conn, l2cap_get_ident(conn), 1618 l2cap_send_cmd(conn, l2cap_get_ident(conn),
1619 L2CAP_DISCONN_REQ, sizeof(req), &req); 1619 L2CAP_DISCONN_REQ, sizeof(req), &req);
1620 } 1620 }
@@ -1652,8 +1652,8 @@ static inline int l2cap_disconnect_req(struct l2cap_conn *conn, struct l2cap_cmd
1652 if (!(sk = l2cap_get_chan_by_scid(&conn->chan_list, dcid))) 1652 if (!(sk = l2cap_get_chan_by_scid(&conn->chan_list, dcid)))
1653 return 0; 1653 return 0;
1654 1654
1655 rsp.dcid = __cpu_to_le16(l2cap_pi(sk)->scid); 1655 rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid);
1656 rsp.scid = __cpu_to_le16(l2cap_pi(sk)->dcid); 1656 rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid);
1657 l2cap_send_cmd(conn, cmd->ident, L2CAP_DISCONN_RSP, sizeof(rsp), &rsp); 1657 l2cap_send_cmd(conn, cmd->ident, L2CAP_DISCONN_RSP, sizeof(rsp), &rsp);
1658 1658
1659 sk->sk_shutdown = SHUTDOWN_MASK; 1659 sk->sk_shutdown = SHUTDOWN_MASK;
@@ -1696,8 +1696,8 @@ static inline int l2cap_information_req(struct l2cap_conn *conn, struct l2cap_cm
1696 1696
1697 BT_DBG("type 0x%4.4x", type); 1697 BT_DBG("type 0x%4.4x", type);
1698 1698
1699 rsp.type = __cpu_to_le16(type); 1699 rsp.type = cpu_to_le16(type);
1700 rsp.result = __cpu_to_le16(L2CAP_IR_NOTSUPP); 1700 rsp.result = cpu_to_le16(L2CAP_IR_NOTSUPP);
1701 l2cap_send_cmd(conn, cmd->ident, L2CAP_INFO_RSP, sizeof(rsp), &rsp); 1701 l2cap_send_cmd(conn, cmd->ident, L2CAP_INFO_RSP, sizeof(rsp), &rsp);
1702 1702
1703 return 0; 1703 return 0;
@@ -1794,7 +1794,7 @@ static inline void l2cap_sig_channel(struct l2cap_conn *conn, struct sk_buff *sk
1794 BT_DBG("error %d", err); 1794 BT_DBG("error %d", err);
1795 1795
1796 /* FIXME: Map err to a valid reason */ 1796 /* FIXME: Map err to a valid reason */
1797 rej.reason = __cpu_to_le16(0); 1797 rej.reason = cpu_to_le16(0);
1798 l2cap_send_cmd(conn, cmd.ident, L2CAP_COMMAND_REJ, sizeof(rej), &rej); 1798 l2cap_send_cmd(conn, cmd.ident, L2CAP_COMMAND_REJ, sizeof(rej), &rej);
1799 } 1799 }
1800 1800
@@ -1993,10 +1993,10 @@ static int l2cap_auth_cfm(struct hci_conn *hcon, u8 status)
1993 result = L2CAP_CR_SEC_BLOCK; 1993 result = L2CAP_CR_SEC_BLOCK;
1994 } 1994 }
1995 1995
1996 rsp.scid = __cpu_to_le16(l2cap_pi(sk)->dcid); 1996 rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid);
1997 rsp.dcid = __cpu_to_le16(l2cap_pi(sk)->scid); 1997 rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid);
1998 rsp.result = __cpu_to_le16(result); 1998 rsp.result = cpu_to_le16(result);
1999 rsp.status = __cpu_to_le16(0); 1999 rsp.status = cpu_to_le16(0);
2000 l2cap_send_cmd(conn, l2cap_pi(sk)->ident, 2000 l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
2001 L2CAP_CONN_RSP, sizeof(rsp), &rsp); 2001 L2CAP_CONN_RSP, sizeof(rsp), &rsp);
2002 2002
@@ -2041,10 +2041,10 @@ static int l2cap_encrypt_cfm(struct hci_conn *hcon, u8 status)
2041 result = L2CAP_CR_SEC_BLOCK; 2041 result = L2CAP_CR_SEC_BLOCK;
2042 } 2042 }
2043 2043
2044 rsp.scid = __cpu_to_le16(l2cap_pi(sk)->dcid); 2044 rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid);
2045 rsp.dcid = __cpu_to_le16(l2cap_pi(sk)->scid); 2045 rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid);
2046 rsp.result = __cpu_to_le16(result); 2046 rsp.result = cpu_to_le16(result);
2047 rsp.status = __cpu_to_le16(0); 2047 rsp.status = cpu_to_le16(0);
2048 l2cap_send_cmd(conn, l2cap_pi(sk)->ident, 2048 l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
2049 L2CAP_CONN_RSP, sizeof(rsp), &rsp); 2049 L2CAP_CONN_RSP, sizeof(rsp), &rsp);
2050 2050
@@ -2107,7 +2107,8 @@ static int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 fl
2107 if (!(conn->rx_skb = bt_skb_alloc(len, GFP_ATOMIC))) 2107 if (!(conn->rx_skb = bt_skb_alloc(len, GFP_ATOMIC)))
2108 goto drop; 2108 goto drop;
2109 2109
2110 memcpy(skb_put(conn->rx_skb, skb->len), skb->data, skb->len); 2110 skb_copy_from_linear_data(skb, skb_put(conn->rx_skb, skb->len),
2111 skb->len);
2111 conn->rx_len = len - skb->len; 2112 conn->rx_len = len - skb->len;
2112 } else { 2113 } else {
2113 BT_DBG("Cont: frag len %d (expecting %d)", skb->len, conn->rx_len); 2114 BT_DBG("Cont: frag len %d (expecting %d)", skb->len, conn->rx_len);
@@ -2128,7 +2129,8 @@ static int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 fl
2128 goto drop; 2129 goto drop;
2129 } 2130 }
2130 2131
2131 memcpy(skb_put(conn->rx_skb, skb->len), skb->data, skb->len); 2132 skb_copy_from_linear_data(skb, skb_put(conn->rx_skb, skb->len),
2133 skb->len);
2132 conn->rx_len -= skb->len; 2134 conn->rx_len -= skb->len;
2133 2135
2134 if (!conn->rx_len) { 2136 if (!conn->rx_len) {
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 94f457360560..fe7df90eb707 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -1567,7 +1567,7 @@ static int rfcomm_recv_frame(struct rfcomm_session *s, struct sk_buff *skb)
1567 1567
1568 /* Trim FCS */ 1568 /* Trim FCS */
1569 skb->len--; skb->tail--; 1569 skb->len--; skb->tail--;
1570 fcs = *(u8 *) skb->tail; 1570 fcs = *(u8 *)skb_tail_pointer(skb);
1571 1571
1572 if (__check_fcs(skb->data, type, fcs)) { 1572 if (__check_fcs(skb->data, type, fcs)) {
1573 BT_ERR("bad checksum in packet"); 1573 BT_ERR("bad checksum in packet");
@@ -1851,18 +1851,18 @@ static void rfcomm_worker(void)
1851 BT_DBG(""); 1851 BT_DBG("");
1852 1852
1853 while (!atomic_read(&terminate)) { 1853 while (!atomic_read(&terminate)) {
1854 set_current_state(TASK_INTERRUPTIBLE);
1854 if (!test_bit(RFCOMM_SCHED_WAKEUP, &rfcomm_event)) { 1855 if (!test_bit(RFCOMM_SCHED_WAKEUP, &rfcomm_event)) {
1855 /* No pending events. Let's sleep. 1856 /* No pending events. Let's sleep.
1856 * Incoming connections and data will wake us up. */ 1857 * Incoming connections and data will wake us up. */
1857 set_current_state(TASK_INTERRUPTIBLE);
1858 schedule(); 1858 schedule();
1859 } 1859 }
1860 set_current_state(TASK_RUNNING);
1860 1861
1861 /* Process stuff */ 1862 /* Process stuff */
1862 clear_bit(RFCOMM_SCHED_WAKEUP, &rfcomm_event); 1863 clear_bit(RFCOMM_SCHED_WAKEUP, &rfcomm_event);
1863 rfcomm_process_sessions(); 1864 rfcomm_process_sessions();
1864 } 1865 }
1865 set_current_state(TASK_RUNNING);
1866 return; 1866 return;
1867} 1867}
1868 1868
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index ae4391440950..3f5163e725ed 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -393,7 +393,7 @@ static void sco_sock_close(struct sock *sk)
393 default: 393 default:
394 sock_set_flag(sk, SOCK_ZAPPED); 394 sock_set_flag(sk, SOCK_ZAPPED);
395 break; 395 break;
396 }; 396 }
397 397
398 release_sock(sk); 398 release_sock(sk);
399 399
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 2994387999a8..848b8fa8bedd 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -37,7 +37,9 @@ static int __init br_init(void)
37 return -EADDRINUSE; 37 return -EADDRINUSE;
38 } 38 }
39 39
40 br_fdb_init(); 40 err = br_fdb_init();
41 if (err)
42 goto err_out1;
41 43
42 err = br_netfilter_init(); 44 err = br_netfilter_init();
43 if (err) 45 if (err)
@@ -47,7 +49,10 @@ static int __init br_init(void)
47 if (err) 49 if (err)
48 goto err_out2; 50 goto err_out2;
49 51
50 br_netlink_init(); 52 err = br_netlink_init();
53 if (err)
54 goto err_out3;
55
51 brioctl_set(br_ioctl_deviceless_stub); 56 brioctl_set(br_ioctl_deviceless_stub);
52 br_handle_frame_hook = br_handle_frame; 57 br_handle_frame_hook = br_handle_frame;
53 58
@@ -55,7 +60,8 @@ static int __init br_init(void)
55 br_fdb_put_hook = br_fdb_put; 60 br_fdb_put_hook = br_fdb_put;
56 61
57 return 0; 62 return 0;
58 63err_out3:
64 unregister_netdevice_notifier(&br_device_notifier);
59err_out2: 65err_out2:
60 br_netfilter_fini(); 66 br_netfilter_fini();
61err_out1: 67err_out1:
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 905a39c33a16..5e1892d8d874 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -37,7 +37,7 @@ int br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
37 br->statistics.tx_packets++; 37 br->statistics.tx_packets++;
38 br->statistics.tx_bytes += skb->len; 38 br->statistics.tx_bytes += skb->len;
39 39
40 skb->mac.raw = skb->data; 40 skb_reset_mac_header(skb);
41 skb_pull(skb, ETH_HLEN); 41 skb_pull(skb, ETH_HLEN);
42 42
43 if (dest[0] & 1) 43 if (dest[0] & 1)
@@ -83,27 +83,21 @@ static int br_change_mtu(struct net_device *dev, int new_mtu)
83 return 0; 83 return 0;
84} 84}
85 85
86/* Allow setting mac address of pseudo-bridge to be same as 86/* Allow setting mac address to any valid ethernet address. */
87 * any of the bound interfaces
88 */
89static int br_set_mac_address(struct net_device *dev, void *p) 87static int br_set_mac_address(struct net_device *dev, void *p)
90{ 88{
91 struct net_bridge *br = netdev_priv(dev); 89 struct net_bridge *br = netdev_priv(dev);
92 struct sockaddr *addr = p; 90 struct sockaddr *addr = p;
93 struct net_bridge_port *port; 91
94 int err = -EADDRNOTAVAIL; 92 if (!is_valid_ether_addr(addr->sa_data))
93 return -EINVAL;
95 94
96 spin_lock_bh(&br->lock); 95 spin_lock_bh(&br->lock);
97 list_for_each_entry(port, &br->port_list, list) { 96 memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
98 if (!compare_ether_addr(port->dev->dev_addr, addr->sa_data)) { 97 br_stp_change_bridge_id(br, addr->sa_data);
99 br_stp_change_bridge_id(br, addr->sa_data);
100 err = 0;
101 break;
102 }
103 }
104 spin_unlock_bh(&br->lock); 98 spin_unlock_bh(&br->lock);
105 99
106 return err; 100 return 0;
107} 101}
108 102
109static void br_getinfo(struct net_device *dev, struct ethtool_drvinfo *info) 103static void br_getinfo(struct net_device *dev, struct ethtool_drvinfo *info)
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 8d566c13cc73..91b017016d5b 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -20,19 +20,28 @@
20#include <linux/netdevice.h> 20#include <linux/netdevice.h>
21#include <linux/etherdevice.h> 21#include <linux/etherdevice.h>
22#include <linux/jhash.h> 22#include <linux/jhash.h>
23#include <linux/random.h>
23#include <asm/atomic.h> 24#include <asm/atomic.h>
25#include <asm/unaligned.h>
24#include "br_private.h" 26#include "br_private.h"
25 27
26static struct kmem_cache *br_fdb_cache __read_mostly; 28static struct kmem_cache *br_fdb_cache __read_mostly;
27static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, 29static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
28 const unsigned char *addr); 30 const unsigned char *addr);
29 31
30void __init br_fdb_init(void) 32static u32 fdb_salt __read_mostly;
33
34int __init br_fdb_init(void)
31{ 35{
32 br_fdb_cache = kmem_cache_create("bridge_fdb_cache", 36 br_fdb_cache = kmem_cache_create("bridge_fdb_cache",
33 sizeof(struct net_bridge_fdb_entry), 37 sizeof(struct net_bridge_fdb_entry),
34 0, 38 0,
35 SLAB_HWCACHE_ALIGN, NULL, NULL); 39 SLAB_HWCACHE_ALIGN, NULL, NULL);
40 if (!br_fdb_cache)
41 return -ENOMEM;
42
43 get_random_bytes(&fdb_salt, sizeof(fdb_salt));
44 return 0;
36} 45}
37 46
38void __exit br_fdb_fini(void) 47void __exit br_fdb_fini(void)
@@ -44,24 +53,26 @@ void __exit br_fdb_fini(void)
44/* if topology_changing then use forward_delay (default 15 sec) 53/* if topology_changing then use forward_delay (default 15 sec)
45 * otherwise keep longer (default 5 minutes) 54 * otherwise keep longer (default 5 minutes)
46 */ 55 */
47static __inline__ unsigned long hold_time(const struct net_bridge *br) 56static inline unsigned long hold_time(const struct net_bridge *br)
48{ 57{
49 return br->topology_change ? br->forward_delay : br->ageing_time; 58 return br->topology_change ? br->forward_delay : br->ageing_time;
50} 59}
51 60
52static __inline__ int has_expired(const struct net_bridge *br, 61static inline int has_expired(const struct net_bridge *br,
53 const struct net_bridge_fdb_entry *fdb) 62 const struct net_bridge_fdb_entry *fdb)
54{ 63{
55 return !fdb->is_static 64 return !fdb->is_static
56 && time_before_eq(fdb->ageing_timer + hold_time(br), jiffies); 65 && time_before_eq(fdb->ageing_timer + hold_time(br), jiffies);
57} 66}
58 67
59static __inline__ int br_mac_hash(const unsigned char *mac) 68static inline int br_mac_hash(const unsigned char *mac)
60{ 69{
61 return jhash(mac, ETH_ALEN, 0) & (BR_HASH_SIZE - 1); 70 /* use 1 byte of OUI cnd 3 bytes of NIC */
71 u32 key = get_unaligned((u32 *)(mac + 2));
72 return jhash_1word(key, fdb_salt) & (BR_HASH_SIZE - 1);
62} 73}
63 74
64static __inline__ void fdb_delete(struct net_bridge_fdb_entry *f) 75static inline void fdb_delete(struct net_bridge_fdb_entry *f)
65{ 76{
66 hlist_del_rcu(&f->hlist); 77 hlist_del_rcu(&f->hlist);
67 br_fdb_put(f); 78 br_fdb_put(f);
@@ -128,7 +139,26 @@ void br_fdb_cleanup(unsigned long _data)
128 mod_timer(&br->gc_timer, jiffies + HZ/10); 139 mod_timer(&br->gc_timer, jiffies + HZ/10);
129} 140}
130 141
142/* Completely flush all dynamic entries in forwarding database.*/
143void br_fdb_flush(struct net_bridge *br)
144{
145 int i;
131 146
147 spin_lock_bh(&br->hash_lock);
148 for (i = 0; i < BR_HASH_SIZE; i++) {
149 struct net_bridge_fdb_entry *f;
150 struct hlist_node *h, *n;
151 hlist_for_each_entry_safe(f, h, n, &br->hash[i], hlist) {
152 if (!f->is_static)
153 fdb_delete(f);
154 }
155 }
156 spin_unlock_bh(&br->hash_lock);
157}
158
159/* Flush all entries refering to a specific port.
160 * if do_all is set also flush static entries
161 */
132void br_fdb_delete_by_port(struct net_bridge *br, 162void br_fdb_delete_by_port(struct net_bridge *br,
133 const struct net_bridge_port *p, 163 const struct net_bridge_port *p,
134 int do_all) 164 int do_all)
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 3e45c1a1aa96..ada7f495445c 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -71,7 +71,7 @@ static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
71 71
72 indev = skb->dev; 72 indev = skb->dev;
73 skb->dev = to->dev; 73 skb->dev = to->dev;
74 skb->ip_summed = CHECKSUM_NONE; 74 skb_forward_csum(skb);
75 75
76 NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, indev, skb->dev, 76 NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, indev, skb->dev,
77 br_forward_finish); 77 br_forward_finish);
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index f3a2e29be40c..690573bbf012 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -152,6 +152,8 @@ static void del_nbp(struct net_bridge_port *p)
152 br_stp_disable_port(p); 152 br_stp_disable_port(p);
153 spin_unlock_bh(&br->lock); 153 spin_unlock_bh(&br->lock);
154 154
155 br_ifinfo_notify(RTM_DELLINK, p);
156
155 br_fdb_delete_by_port(br, p, 1); 157 br_fdb_delete_by_port(br, p, 1);
156 158
157 list_del_rcu(&p->list); 159 list_del_rcu(&p->list);
@@ -203,7 +205,7 @@ static struct net_device *new_bridge_dev(const char *name)
203 memcpy(br->group_addr, br_group_address, ETH_ALEN); 205 memcpy(br->group_addr, br_group_address, ETH_ALEN);
204 206
205 br->feature_mask = dev->features; 207 br->feature_mask = dev->features;
206 br->stp_enabled = 0; 208 br->stp_enabled = BR_NO_STP;
207 br->designated_root = br->bridge_id; 209 br->designated_root = br->bridge_id;
208 br->root_path_cost = 0; 210 br->root_path_cost = 0;
209 br->root_port = 0; 211 br->root_port = 0;
@@ -434,6 +436,8 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
434 br_stp_enable_port(p); 436 br_stp_enable_port(p);
435 spin_unlock_bh(&br->lock); 437 spin_unlock_bh(&br->lock);
436 438
439 br_ifinfo_notify(RTM_NEWLINK, p);
440
437 dev_set_mtu(br->dev, br_min_mtu(br)); 441 dev_set_mtu(br->dev, br_min_mtu(br));
438 442
439 kobject_uevent(&p->kobj, KOBJ_ADD); 443 kobject_uevent(&p->kobj, KOBJ_ADD);
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 35b94f9a1ac5..420bbb9955e9 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -112,46 +112,59 @@ static int br_handle_local_finish(struct sk_buff *skb)
112 */ 112 */
113static inline int is_link_local(const unsigned char *dest) 113static inline int is_link_local(const unsigned char *dest)
114{ 114{
115 return memcmp(dest, br_group_address, 5) == 0 && (dest[5] & 0xf0) == 0; 115 const u16 *a = (const u16 *) dest;
116 static const u16 *const b = (const u16 *const ) br_group_address;
117 static const u16 m = __constant_cpu_to_be16(0xfff0);
118
119 return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | ((a[2] ^ b[2]) & m)) == 0;
116} 120}
117 121
118/* 122/*
119 * Called via br_handle_frame_hook. 123 * Called via br_handle_frame_hook.
120 * Return 0 if *pskb should be processed furthur 124 * Return NULL if skb is handled
121 * 1 if *pskb is handled
122 * note: already called with rcu_read_lock (preempt_disabled) 125 * note: already called with rcu_read_lock (preempt_disabled)
123 */ 126 */
124int br_handle_frame(struct net_bridge_port *p, struct sk_buff **pskb) 127struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb)
125{ 128{
126 struct sk_buff *skb = *pskb;
127 const unsigned char *dest = eth_hdr(skb)->h_dest; 129 const unsigned char *dest = eth_hdr(skb)->h_dest;
128 130
129 if (!is_valid_ether_addr(eth_hdr(skb)->h_source)) 131 if (!is_valid_ether_addr(eth_hdr(skb)->h_source))
130 goto err; 132 goto drop;
131 133
132 if (unlikely(is_link_local(dest))) { 134 if (unlikely(is_link_local(dest))) {
133 skb->pkt_type = PACKET_HOST; 135 /* Pause frames shouldn't be passed up by driver anyway */
134 return NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev, 136 if (skb->protocol == htons(ETH_P_PAUSE))
135 NULL, br_handle_local_finish) != 0; 137 goto drop;
138
139 /* Process STP BPDU's through normal netif_receive_skb() path */
140 if (p->br->stp_enabled != BR_NO_STP) {
141 if (NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev,
142 NULL, br_handle_local_finish))
143 return NULL;
144 else
145 return skb;
146 }
136 } 147 }
137 148
138 if (p->state == BR_STATE_FORWARDING || p->state == BR_STATE_LEARNING) { 149 switch (p->state) {
150 case BR_STATE_FORWARDING:
151
139 if (br_should_route_hook) { 152 if (br_should_route_hook) {
140 if (br_should_route_hook(pskb)) 153 if (br_should_route_hook(&skb))
141 return 0; 154 return skb;
142 skb = *pskb;
143 dest = eth_hdr(skb)->h_dest; 155 dest = eth_hdr(skb)->h_dest;
144 } 156 }
145 157 /* fall through */
158 case BR_STATE_LEARNING:
146 if (!compare_ether_addr(p->br->dev->dev_addr, dest)) 159 if (!compare_ether_addr(p->br->dev->dev_addr, dest))
147 skb->pkt_type = PACKET_HOST; 160 skb->pkt_type = PACKET_HOST;
148 161
149 NF_HOOK(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL, 162 NF_HOOK(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
150 br_handle_frame_finish); 163 br_handle_frame_finish);
151 return 1; 164 break;
165 default:
166drop:
167 kfree_skb(skb);
152 } 168 }
153 169 return NULL;
154err:
155 kfree_skb(skb);
156 return 1;
157} 170}
diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c
index 147015fe5c75..eda0fbfc923a 100644
--- a/net/bridge/br_ioctl.c
+++ b/net/bridge/br_ioctl.c
@@ -137,7 +137,8 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
137 b.topology_change = br->topology_change; 137 b.topology_change = br->topology_change;
138 b.topology_change_detected = br->topology_change_detected; 138 b.topology_change_detected = br->topology_change_detected;
139 b.root_port = br->root_port; 139 b.root_port = br->root_port;
140 b.stp_enabled = br->stp_enabled; 140
141 b.stp_enabled = (br->stp_enabled != BR_NO_STP);
141 b.ageing_time = jiffies_to_clock_t(br->ageing_time); 142 b.ageing_time = jiffies_to_clock_t(br->ageing_time);
142 b.hello_timer_value = br_timer_value(&br->hello_timer); 143 b.hello_timer_value = br_timer_value(&br->hello_timer);
143 b.tcn_timer_value = br_timer_value(&br->tcn_timer); 144 b.tcn_timer_value = br_timer_value(&br->tcn_timer);
@@ -251,7 +252,7 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
251 if (!capable(CAP_NET_ADMIN)) 252 if (!capable(CAP_NET_ADMIN))
252 return -EPERM; 253 return -EPERM;
253 254
254 br->stp_enabled = args[1]?1:0; 255 br_stp_set_enabled(br, args[1]);
255 return 0; 256 return 0;
256 257
257 case BRCTL_SET_BRIDGE_PRIORITY: 258 case BRCTL_SET_BRIDGE_PRIORITY:
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 5439a3c46c3e..9b2986b182ba 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -29,6 +29,8 @@
29#include <linux/if_arp.h> 29#include <linux/if_arp.h>
30#include <linux/if_ether.h> 30#include <linux/if_ether.h>
31#include <linux/if_vlan.h> 31#include <linux/if_vlan.h>
32#include <linux/if_pppox.h>
33#include <linux/ppp_defs.h>
32#include <linux/netfilter_bridge.h> 34#include <linux/netfilter_bridge.h>
33#include <linux/netfilter_ipv4.h> 35#include <linux/netfilter_ipv4.h>
34#include <linux/netfilter_ipv6.h> 36#include <linux/netfilter_ipv6.h>
@@ -48,8 +50,8 @@
48 50
49#define skb_origaddr(skb) (((struct bridge_skb_cb *) \ 51#define skb_origaddr(skb) (((struct bridge_skb_cb *) \
50 (skb->nf_bridge->data))->daddr.ipv4) 52 (skb->nf_bridge->data))->daddr.ipv4)
51#define store_orig_dstaddr(skb) (skb_origaddr(skb) = (skb)->nh.iph->daddr) 53#define store_orig_dstaddr(skb) (skb_origaddr(skb) = ip_hdr(skb)->daddr)
52#define dnat_took_place(skb) (skb_origaddr(skb) != (skb)->nh.iph->daddr) 54#define dnat_took_place(skb) (skb_origaddr(skb) != ip_hdr(skb)->daddr)
53 55
54#ifdef CONFIG_SYSCTL 56#ifdef CONFIG_SYSCTL
55static struct ctl_table_header *brnf_sysctl_header; 57static struct ctl_table_header *brnf_sysctl_header;
@@ -57,8 +59,10 @@ static int brnf_call_iptables __read_mostly = 1;
57static int brnf_call_ip6tables __read_mostly = 1; 59static int brnf_call_ip6tables __read_mostly = 1;
58static int brnf_call_arptables __read_mostly = 1; 60static int brnf_call_arptables __read_mostly = 1;
59static int brnf_filter_vlan_tagged __read_mostly = 1; 61static int brnf_filter_vlan_tagged __read_mostly = 1;
62static int brnf_filter_pppoe_tagged __read_mostly = 1;
60#else 63#else
61#define brnf_filter_vlan_tagged 1 64#define brnf_filter_vlan_tagged 1
65#define brnf_filter_pppoe_tagged 1
62#endif 66#endif
63 67
64static inline __be16 vlan_proto(const struct sk_buff *skb) 68static inline __be16 vlan_proto(const struct sk_buff *skb)
@@ -81,6 +85,22 @@ static inline __be16 vlan_proto(const struct sk_buff *skb)
81 vlan_proto(skb) == htons(ETH_P_ARP) && \ 85 vlan_proto(skb) == htons(ETH_P_ARP) && \
82 brnf_filter_vlan_tagged) 86 brnf_filter_vlan_tagged)
83 87
88static inline __be16 pppoe_proto(const struct sk_buff *skb)
89{
90 return *((__be16 *)(skb_mac_header(skb) + ETH_HLEN +
91 sizeof(struct pppoe_hdr)));
92}
93
94#define IS_PPPOE_IP(skb) \
95 (skb->protocol == htons(ETH_P_PPP_SES) && \
96 pppoe_proto(skb) == htons(PPP_IP) && \
97 brnf_filter_pppoe_tagged)
98
99#define IS_PPPOE_IPV6(skb) \
100 (skb->protocol == htons(ETH_P_PPP_SES) && \
101 pppoe_proto(skb) == htons(PPP_IPV6) && \
102 brnf_filter_pppoe_tagged)
103
84/* We need these fake structures to make netfilter happy -- 104/* We need these fake structures to make netfilter happy --
85 * lots of places assume that skb->dst != NULL, which isn't 105 * lots of places assume that skb->dst != NULL, which isn't
86 * all that unreasonable. 106 * all that unreasonable.
@@ -128,8 +148,11 @@ static inline void nf_bridge_save_header(struct sk_buff *skb)
128 148
129 if (skb->protocol == htons(ETH_P_8021Q)) 149 if (skb->protocol == htons(ETH_P_8021Q))
130 header_size += VLAN_HLEN; 150 header_size += VLAN_HLEN;
151 else if (skb->protocol == htons(ETH_P_PPP_SES))
152 header_size += PPPOE_SES_HLEN;
131 153
132 memcpy(skb->nf_bridge->data, skb->data - header_size, header_size); 154 skb_copy_from_linear_data_offset(skb, -header_size,
155 skb->nf_bridge->data, header_size);
133} 156}
134 157
135/* 158/*
@@ -143,15 +166,20 @@ int nf_bridge_copy_header(struct sk_buff *skb)
143 166
144 if (skb->protocol == htons(ETH_P_8021Q)) 167 if (skb->protocol == htons(ETH_P_8021Q))
145 header_size += VLAN_HLEN; 168 header_size += VLAN_HLEN;
169 else if (skb->protocol == htons(ETH_P_PPP_SES))
170 header_size += PPPOE_SES_HLEN;
146 171
147 err = skb_cow(skb, header_size); 172 err = skb_cow(skb, header_size);
148 if (err) 173 if (err)
149 return err; 174 return err;
150 175
151 memcpy(skb->data - header_size, skb->nf_bridge->data, header_size); 176 skb_copy_to_linear_data_offset(skb, -header_size,
177 skb->nf_bridge->data, header_size);
152 178
153 if (skb->protocol == htons(ETH_P_8021Q)) 179 if (skb->protocol == htons(ETH_P_8021Q))
154 __skb_push(skb, VLAN_HLEN); 180 __skb_push(skb, VLAN_HLEN);
181 else if (skb->protocol == htons(ETH_P_PPP_SES))
182 __skb_push(skb, PPPOE_SES_HLEN);
155 return 0; 183 return 0;
156} 184}
157 185
@@ -174,7 +202,10 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb)
174 skb->dev = nf_bridge->physindev; 202 skb->dev = nf_bridge->physindev;
175 if (skb->protocol == htons(ETH_P_8021Q)) { 203 if (skb->protocol == htons(ETH_P_8021Q)) {
176 skb_push(skb, VLAN_HLEN); 204 skb_push(skb, VLAN_HLEN);
177 skb->nh.raw -= VLAN_HLEN; 205 skb->network_header -= VLAN_HLEN;
206 } else if (skb->protocol == htons(ETH_P_PPP_SES)) {
207 skb_push(skb, PPPOE_SES_HLEN);
208 skb->network_header -= PPPOE_SES_HLEN;
178 } 209 }
179 NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL, 210 NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
180 br_handle_frame_finish, 1); 211 br_handle_frame_finish, 1);
@@ -255,7 +286,10 @@ static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb)
255 else { 286 else {
256 if (skb->protocol == htons(ETH_P_8021Q)) { 287 if (skb->protocol == htons(ETH_P_8021Q)) {
257 skb_pull(skb, VLAN_HLEN); 288 skb_pull(skb, VLAN_HLEN);
258 skb->nh.raw += VLAN_HLEN; 289 skb->network_header += VLAN_HLEN;
290 } else if (skb->protocol == htons(ETH_P_PPP_SES)) {
291 skb_pull(skb, PPPOE_SES_HLEN);
292 skb->network_header += PPPOE_SES_HLEN;
259 } 293 }
260 skb->dst->output(skb); 294 skb->dst->output(skb);
261 } 295 }
@@ -265,7 +299,7 @@ static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb)
265static int br_nf_pre_routing_finish(struct sk_buff *skb) 299static int br_nf_pre_routing_finish(struct sk_buff *skb)
266{ 300{
267 struct net_device *dev = skb->dev; 301 struct net_device *dev = skb->dev;
268 struct iphdr *iph = skb->nh.iph; 302 struct iphdr *iph = ip_hdr(skb);
269 struct nf_bridge_info *nf_bridge = skb->nf_bridge; 303 struct nf_bridge_info *nf_bridge = skb->nf_bridge;
270 int err; 304 int err;
271 305
@@ -325,7 +359,11 @@ bridged_dnat:
325 if (skb->protocol == 359 if (skb->protocol ==
326 htons(ETH_P_8021Q)) { 360 htons(ETH_P_8021Q)) {
327 skb_push(skb, VLAN_HLEN); 361 skb_push(skb, VLAN_HLEN);
328 skb->nh.raw -= VLAN_HLEN; 362 skb->network_header -= VLAN_HLEN;
363 } else if(skb->protocol ==
364 htons(ETH_P_PPP_SES)) {
365 skb_push(skb, PPPOE_SES_HLEN);
366 skb->network_header -= PPPOE_SES_HLEN;
329 } 367 }
330 NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING, 368 NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING,
331 skb, skb->dev, NULL, 369 skb, skb->dev, NULL,
@@ -344,7 +382,10 @@ bridged_dnat:
344 skb->dev = nf_bridge->physindev; 382 skb->dev = nf_bridge->physindev;
345 if (skb->protocol == htons(ETH_P_8021Q)) { 383 if (skb->protocol == htons(ETH_P_8021Q)) {
346 skb_push(skb, VLAN_HLEN); 384 skb_push(skb, VLAN_HLEN);
347 skb->nh.raw -= VLAN_HLEN; 385 skb->network_header -= VLAN_HLEN;
386 } else if (skb->protocol == htons(ETH_P_PPP_SES)) {
387 skb_push(skb, PPPOE_SES_HLEN);
388 skb->network_header -= PPPOE_SES_HLEN;
348 } 389 }
349 NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL, 390 NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
350 br_handle_frame_finish, 1); 391 br_handle_frame_finish, 1);
@@ -372,9 +413,10 @@ static struct net_device *setup_pre_routing(struct sk_buff *skb)
372/* We only check the length. A bridge shouldn't do any hop-by-hop stuff anyway */ 413/* We only check the length. A bridge shouldn't do any hop-by-hop stuff anyway */
373static int check_hbh_len(struct sk_buff *skb) 414static int check_hbh_len(struct sk_buff *skb)
374{ 415{
375 unsigned char *raw = (u8 *) (skb->nh.ipv6h + 1); 416 unsigned char *raw = (u8 *)(ipv6_hdr(skb) + 1);
376 u32 pkt_len; 417 u32 pkt_len;
377 int off = raw - skb->nh.raw; 418 const unsigned char *nh = skb_network_header(skb);
419 int off = raw - nh;
378 int len = (raw[1] + 1) << 3; 420 int len = (raw[1] + 1) << 3;
379 421
380 if ((raw + len) - skb->data > skb_headlen(skb)) 422 if ((raw + len) - skb->data > skb_headlen(skb))
@@ -384,9 +426,9 @@ static int check_hbh_len(struct sk_buff *skb)
384 len -= 2; 426 len -= 2;
385 427
386 while (len > 0) { 428 while (len > 0) {
387 int optlen = skb->nh.raw[off + 1] + 2; 429 int optlen = nh[off + 1] + 2;
388 430
389 switch (skb->nh.raw[off]) { 431 switch (nh[off]) {
390 case IPV6_TLV_PAD0: 432 case IPV6_TLV_PAD0:
391 optlen = 1; 433 optlen = 1;
392 break; 434 break;
@@ -395,17 +437,18 @@ static int check_hbh_len(struct sk_buff *skb)
395 break; 437 break;
396 438
397 case IPV6_TLV_JUMBO: 439 case IPV6_TLV_JUMBO:
398 if (skb->nh.raw[off + 1] != 4 || (off & 3) != 2) 440 if (nh[off + 1] != 4 || (off & 3) != 2)
399 goto bad; 441 goto bad;
400 pkt_len = ntohl(*(__be32 *) (skb->nh.raw + off + 2)); 442 pkt_len = ntohl(*(__be32 *) (nh + off + 2));
401 if (pkt_len <= IPV6_MAXPLEN || 443 if (pkt_len <= IPV6_MAXPLEN ||
402 skb->nh.ipv6h->payload_len) 444 ipv6_hdr(skb)->payload_len)
403 goto bad; 445 goto bad;
404 if (pkt_len > skb->len - sizeof(struct ipv6hdr)) 446 if (pkt_len > skb->len - sizeof(struct ipv6hdr))
405 goto bad; 447 goto bad;
406 if (pskb_trim_rcsum(skb, 448 if (pskb_trim_rcsum(skb,
407 pkt_len + sizeof(struct ipv6hdr))) 449 pkt_len + sizeof(struct ipv6hdr)))
408 goto bad; 450 goto bad;
451 nh = skb_network_header(skb);
409 break; 452 break;
410 default: 453 default:
411 if (optlen > len) 454 if (optlen > len)
@@ -439,7 +482,7 @@ static unsigned int br_nf_pre_routing_ipv6(unsigned int hook,
439 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) 482 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
440 goto inhdr_error; 483 goto inhdr_error;
441 484
442 hdr = skb->nh.ipv6h; 485 hdr = ipv6_hdr(skb);
443 486
444 if (hdr->version != 6) 487 if (hdr->version != 6)
445 goto inhdr_error; 488 goto inhdr_error;
@@ -485,7 +528,8 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb,
485 __u32 len; 528 __u32 len;
486 struct sk_buff *skb = *pskb; 529 struct sk_buff *skb = *pskb;
487 530
488 if (skb->protocol == htons(ETH_P_IPV6) || IS_VLAN_IPV6(skb)) { 531 if (skb->protocol == htons(ETH_P_IPV6) || IS_VLAN_IPV6(skb) ||
532 IS_PPPOE_IPV6(skb)) {
489#ifdef CONFIG_SYSCTL 533#ifdef CONFIG_SYSCTL
490 if (!brnf_call_ip6tables) 534 if (!brnf_call_ip6tables)
491 return NF_ACCEPT; 535 return NF_ACCEPT;
@@ -495,7 +539,10 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb,
495 539
496 if (skb->protocol == htons(ETH_P_8021Q)) { 540 if (skb->protocol == htons(ETH_P_8021Q)) {
497 skb_pull_rcsum(skb, VLAN_HLEN); 541 skb_pull_rcsum(skb, VLAN_HLEN);
498 skb->nh.raw += VLAN_HLEN; 542 skb->network_header += VLAN_HLEN;
543 } else if (skb->protocol == htons(ETH_P_PPP_SES)) {
544 skb_pull_rcsum(skb, PPPOE_SES_HLEN);
545 skb->network_header += PPPOE_SES_HLEN;
499 } 546 }
500 return br_nf_pre_routing_ipv6(hook, skb, in, out, okfn); 547 return br_nf_pre_routing_ipv6(hook, skb, in, out, okfn);
501 } 548 }
@@ -504,7 +551,8 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb,
504 return NF_ACCEPT; 551 return NF_ACCEPT;
505#endif 552#endif
506 553
507 if (skb->protocol != htons(ETH_P_IP) && !IS_VLAN_IP(skb)) 554 if (skb->protocol != htons(ETH_P_IP) && !IS_VLAN_IP(skb) &&
555 !IS_PPPOE_IP(skb))
508 return NF_ACCEPT; 556 return NF_ACCEPT;
509 557
510 if ((skb = skb_share_check(*pskb, GFP_ATOMIC)) == NULL) 558 if ((skb = skb_share_check(*pskb, GFP_ATOMIC)) == NULL)
@@ -512,20 +560,23 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb,
512 560
513 if (skb->protocol == htons(ETH_P_8021Q)) { 561 if (skb->protocol == htons(ETH_P_8021Q)) {
514 skb_pull_rcsum(skb, VLAN_HLEN); 562 skb_pull_rcsum(skb, VLAN_HLEN);
515 skb->nh.raw += VLAN_HLEN; 563 skb->network_header += VLAN_HLEN;
564 } else if (skb->protocol == htons(ETH_P_PPP_SES)) {
565 skb_pull_rcsum(skb, PPPOE_SES_HLEN);
566 skb->network_header += PPPOE_SES_HLEN;
516 } 567 }
517 568
518 if (!pskb_may_pull(skb, sizeof(struct iphdr))) 569 if (!pskb_may_pull(skb, sizeof(struct iphdr)))
519 goto inhdr_error; 570 goto inhdr_error;
520 571
521 iph = skb->nh.iph; 572 iph = ip_hdr(skb);
522 if (iph->ihl < 5 || iph->version != 4) 573 if (iph->ihl < 5 || iph->version != 4)
523 goto inhdr_error; 574 goto inhdr_error;
524 575
525 if (!pskb_may_pull(skb, 4 * iph->ihl)) 576 if (!pskb_may_pull(skb, 4 * iph->ihl))
526 goto inhdr_error; 577 goto inhdr_error;
527 578
528 iph = skb->nh.iph; 579 iph = ip_hdr(skb);
529 if (ip_fast_csum((__u8 *) iph, iph->ihl) != 0) 580 if (ip_fast_csum((__u8 *) iph, iph->ihl) != 0)
530 goto inhdr_error; 581 goto inhdr_error;
531 582
@@ -593,7 +644,10 @@ static int br_nf_forward_finish(struct sk_buff *skb)
593 } 644 }
594 if (skb->protocol == htons(ETH_P_8021Q)) { 645 if (skb->protocol == htons(ETH_P_8021Q)) {
595 skb_push(skb, VLAN_HLEN); 646 skb_push(skb, VLAN_HLEN);
596 skb->nh.raw -= VLAN_HLEN; 647 skb->network_header -= VLAN_HLEN;
648 } else if (skb->protocol == htons(ETH_P_PPP_SES)) {
649 skb_push(skb, PPPOE_SES_HLEN);
650 skb->network_header -= PPPOE_SES_HLEN;
597 } 651 }
598 NF_HOOK_THRESH(PF_BRIDGE, NF_BR_FORWARD, skb, in, 652 NF_HOOK_THRESH(PF_BRIDGE, NF_BR_FORWARD, skb, in,
599 skb->dev, br_forward_finish, 1); 653 skb->dev, br_forward_finish, 1);
@@ -622,14 +676,18 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff **pskb,
622 if (!parent) 676 if (!parent)
623 return NF_DROP; 677 return NF_DROP;
624 678
625 if (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb)) 679 if (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb) ||
680 IS_PPPOE_IP(skb))
626 pf = PF_INET; 681 pf = PF_INET;
627 else 682 else
628 pf = PF_INET6; 683 pf = PF_INET6;
629 684
630 if (skb->protocol == htons(ETH_P_8021Q)) { 685 if (skb->protocol == htons(ETH_P_8021Q)) {
631 skb_pull(*pskb, VLAN_HLEN); 686 skb_pull(*pskb, VLAN_HLEN);
632 (*pskb)->nh.raw += VLAN_HLEN; 687 (*pskb)->network_header += VLAN_HLEN;
688 } else if (skb->protocol == htons(ETH_P_PPP_SES)) {
689 skb_pull(*pskb, PPPOE_SES_HLEN);
690 (*pskb)->network_header += PPPOE_SES_HLEN;
633 } 691 }
634 692
635 nf_bridge = skb->nf_bridge; 693 nf_bridge = skb->nf_bridge;
@@ -665,13 +723,13 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff **pskb,
665 if (!IS_VLAN_ARP(skb)) 723 if (!IS_VLAN_ARP(skb))
666 return NF_ACCEPT; 724 return NF_ACCEPT;
667 skb_pull(*pskb, VLAN_HLEN); 725 skb_pull(*pskb, VLAN_HLEN);
668 (*pskb)->nh.raw += VLAN_HLEN; 726 (*pskb)->network_header += VLAN_HLEN;
669 } 727 }
670 728
671 if (skb->nh.arph->ar_pln != 4) { 729 if (arp_hdr(skb)->ar_pln != 4) {
672 if (IS_VLAN_ARP(skb)) { 730 if (IS_VLAN_ARP(skb)) {
673 skb_push(*pskb, VLAN_HLEN); 731 skb_push(*pskb, VLAN_HLEN);
674 (*pskb)->nh.raw -= VLAN_HLEN; 732 (*pskb)->network_header -= VLAN_HLEN;
675 } 733 }
676 return NF_ACCEPT; 734 return NF_ACCEPT;
677 } 735 }
@@ -721,7 +779,10 @@ static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff **pskb,
721 } 779 }
722 if (skb->protocol == htons(ETH_P_8021Q)) { 780 if (skb->protocol == htons(ETH_P_8021Q)) {
723 skb_push(skb, VLAN_HLEN); 781 skb_push(skb, VLAN_HLEN);
724 skb->nh.raw -= VLAN_HLEN; 782 skb->network_header -= VLAN_HLEN;
783 } else if (skb->protocol == htons(ETH_P_PPP_SES)) {
784 skb_push(skb, PPPOE_SES_HLEN);
785 skb->network_header -= PPPOE_SES_HLEN;
725 } 786 }
726 787
727 NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, realindev, skb->dev, 788 NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, realindev, skb->dev,
@@ -753,7 +814,8 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb,
753#ifdef CONFIG_NETFILTER_DEBUG 814#ifdef CONFIG_NETFILTER_DEBUG
754 /* Be very paranoid. This probably won't happen anymore, but let's 815 /* Be very paranoid. This probably won't happen anymore, but let's
755 * keep the check just to be sure... */ 816 * keep the check just to be sure... */
756 if (skb->mac.raw < skb->head || skb->mac.raw + ETH_HLEN > skb->data) { 817 if (skb_mac_header(skb) < skb->head ||
818 skb_mac_header(skb) + ETH_HLEN > skb->data) {
757 printk(KERN_CRIT "br_netfilter: Argh!! br_nf_post_routing: " 819 printk(KERN_CRIT "br_netfilter: Argh!! br_nf_post_routing: "
758 "bad mac.raw pointer.\n"); 820 "bad mac.raw pointer.\n");
759 goto print_error; 821 goto print_error;
@@ -766,7 +828,8 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb,
766 if (!realoutdev) 828 if (!realoutdev)
767 return NF_DROP; 829 return NF_DROP;
768 830
769 if (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb)) 831 if (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb) ||
832 IS_PPPOE_IP(skb))
770 pf = PF_INET; 833 pf = PF_INET;
771 else 834 else
772 pf = PF_INET6; 835 pf = PF_INET6;
@@ -787,7 +850,10 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb,
787 850
788 if (skb->protocol == htons(ETH_P_8021Q)) { 851 if (skb->protocol == htons(ETH_P_8021Q)) {
789 skb_pull(skb, VLAN_HLEN); 852 skb_pull(skb, VLAN_HLEN);
790 skb->nh.raw += VLAN_HLEN; 853 skb->network_header += VLAN_HLEN;
854 } else if (skb->protocol == htons(ETH_P_PPP_SES)) {
855 skb_pull(skb, PPPOE_SES_HLEN);
856 skb->network_header += PPPOE_SES_HLEN;
791 } 857 }
792 858
793 nf_bridge_save_header(skb); 859 nf_bridge_save_header(skb);
@@ -808,7 +874,7 @@ print_error:
808 if (realoutdev) 874 if (realoutdev)
809 printk("[%s]", realoutdev->name); 875 printk("[%s]", realoutdev->name);
810 } 876 }
811 printk(" head:%p, raw:%p, data:%p\n", skb->head, skb->mac.raw, 877 printk(" head:%p, raw:%p, data:%p\n", skb->head, skb_mac_header(skb),
812 skb->data); 878 skb->data);
813 dump_stack(); 879 dump_stack();
814 return NF_ACCEPT; 880 return NF_ACCEPT;
@@ -925,6 +991,14 @@ static ctl_table brnf_table[] = {
925 .mode = 0644, 991 .mode = 0644,
926 .proc_handler = &brnf_sysctl_call_tables, 992 .proc_handler = &brnf_sysctl_call_tables,
927 }, 993 },
994 {
995 .ctl_name = NET_BRIDGE_NF_FILTER_PPPOE_TAGGED,
996 .procname = "bridge-nf-filter-pppoe-tagged",
997 .data = &brnf_filter_pppoe_tagged,
998 .maxlen = sizeof(int),
999 .mode = 0644,
1000 .proc_handler = &brnf_sysctl_call_tables,
1001 },
928 { .ctl_name = 0 } 1002 { .ctl_name = 0 }
929}; 1003};
930 1004
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 7d68b24b5654..35facc0c11c2 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -11,8 +11,7 @@
11 */ 11 */
12 12
13#include <linux/kernel.h> 13#include <linux/kernel.h>
14#include <linux/rtnetlink.h> 14#include <net/rtnetlink.h>
15#include <net/netlink.h>
16#include "br_private.h" 15#include "br_private.h"
17 16
18static inline size_t br_nlmsg_size(void) 17static inline size_t br_nlmsg_size(void)
@@ -110,7 +109,6 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
110 struct net_device *dev; 109 struct net_device *dev;
111 int idx; 110 int idx;
112 111
113 read_lock(&dev_base_lock);
114 for (dev = dev_base, idx = 0; dev; dev = dev->next) { 112 for (dev = dev_base, idx = 0; dev; dev = dev->next) {
115 /* not a bridge port */ 113 /* not a bridge port */
116 if (dev->br_port == NULL || idx < cb->args[0]) 114 if (dev->br_port == NULL || idx < cb->args[0])
@@ -123,7 +121,6 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
123skip: 121skip:
124 ++idx; 122 ++idx;
125 } 123 }
126 read_unlock(&dev_base_lock);
127 124
128 cb->args[0] = idx; 125 cb->args[0] = idx;
129 126
@@ -166,7 +163,7 @@ static int br_rtm_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
166 return -EINVAL; 163 return -EINVAL;
167 164
168 /* if kernel STP is running, don't allow changes */ 165 /* if kernel STP is running, don't allow changes */
169 if (p->br->stp_enabled) 166 if (p->br->stp_enabled == BR_KERNEL_STP)
170 return -EBUSY; 167 return -EBUSY;
171 168
172 if (!netif_running(dev) || 169 if (!netif_running(dev) ||
@@ -179,18 +176,19 @@ static int br_rtm_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
179} 176}
180 177
181 178
182static struct rtnetlink_link bridge_rtnetlink_table[RTM_NR_MSGTYPES] = { 179int __init br_netlink_init(void)
183 [RTM_GETLINK - RTM_BASE] = { .dumpit = br_dump_ifinfo, },
184 [RTM_SETLINK - RTM_BASE] = { .doit = br_rtm_setlink, },
185};
186
187void __init br_netlink_init(void)
188{ 180{
189 rtnetlink_links[PF_BRIDGE] = bridge_rtnetlink_table; 181 if (__rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, br_dump_ifinfo))
182 return -ENOBUFS;
183
184 /* Only the first call to __rtnl_register can fail */
185 __rtnl_register(PF_BRIDGE, RTM_SETLINK, br_rtm_setlink, NULL);
186
187 return 0;
190} 188}
191 189
192void __exit br_netlink_fini(void) 190void __exit br_netlink_fini(void)
193{ 191{
194 rtnetlink_links[PF_BRIDGE] = NULL; 192 rtnl_unregister_all(PF_BRIDGE);
195} 193}
196 194
diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c
index 37357ed2149b..c8451d3a070c 100644
--- a/net/bridge/br_notify.c
+++ b/net/bridge/br_notify.c
@@ -50,7 +50,6 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
50 case NETDEV_CHANGEADDR: 50 case NETDEV_CHANGEADDR:
51 spin_lock_bh(&br->lock); 51 spin_lock_bh(&br->lock);
52 br_fdb_changeaddr(p, dev->dev_addr); 52 br_fdb_changeaddr(p, dev->dev_addr);
53 br_ifinfo_notify(RTM_NEWLINK, p);
54 br_stp_recalculate_bridge_id(br); 53 br_stp_recalculate_bridge_id(br);
55 spin_unlock_bh(&br->lock); 54 spin_unlock_bh(&br->lock);
56 break; 55 break;
@@ -74,10 +73,11 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
74 break; 73 break;
75 74
76 case NETDEV_UP: 75 case NETDEV_UP:
77 spin_lock_bh(&br->lock); 76 if (netif_carrier_ok(dev) && (br->dev->flags & IFF_UP)) {
78 if (netif_carrier_ok(dev) && (br->dev->flags & IFF_UP)) 77 spin_lock_bh(&br->lock);
79 br_stp_enable_port(p); 78 br_stp_enable_port(p);
80 spin_unlock_bh(&br->lock); 79 spin_unlock_bh(&br->lock);
80 }
81 break; 81 break;
82 82
83 case NETDEV_UNREGISTER: 83 case NETDEV_UNREGISTER:
@@ -85,5 +85,10 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
85 break; 85 break;
86 } 86 }
87 87
88 /* Events that may cause spanning tree to refresh */
89 if (event == NETDEV_CHANGEADDR || event == NETDEV_UP ||
90 event == NETDEV_CHANGE || event == NETDEV_DOWN)
91 br_ifinfo_notify(RTM_NEWLINK, p);
92
88 return NOTIFY_DONE; 93 return NOTIFY_DONE;
89} 94}
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index cc3f1c99261a..21bf3a9a03fd 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -26,7 +26,10 @@
26#define BR_PORT_BITS 10 26#define BR_PORT_BITS 10
27#define BR_MAX_PORTS (1<<BR_PORT_BITS) 27#define BR_MAX_PORTS (1<<BR_PORT_BITS)
28 28
29#define BR_VERSION "2.2" 29#define BR_VERSION "2.3"
30
31/* Path to usermode spanning tree program */
32#define BR_STP_PROG "/sbin/bridge-stp"
30 33
31typedef struct bridge_id bridge_id; 34typedef struct bridge_id bridge_id;
32typedef struct mac_addr mac_addr; 35typedef struct mac_addr mac_addr;
@@ -107,7 +110,13 @@ struct net_bridge
107 110
108 u8 group_addr[ETH_ALEN]; 111 u8 group_addr[ETH_ALEN];
109 u16 root_port; 112 u16 root_port;
110 unsigned char stp_enabled; 113
114 enum {
115 BR_NO_STP, /* no spanning tree */
116 BR_KERNEL_STP, /* old STP in kernel */
117 BR_USER_STP, /* new RSTP in userspace */
118 } stp_enabled;
119
111 unsigned char topology_change; 120 unsigned char topology_change;
112 unsigned char topology_change_detected; 121 unsigned char topology_change_detected;
113 122
@@ -127,14 +136,14 @@ static inline int br_is_root_bridge(const struct net_bridge *br)
127 return !memcmp(&br->bridge_id, &br->designated_root, 8); 136 return !memcmp(&br->bridge_id, &br->designated_root, 8);
128} 137}
129 138
130
131/* br_device.c */ 139/* br_device.c */
132extern void br_dev_setup(struct net_device *dev); 140extern void br_dev_setup(struct net_device *dev);
133extern int br_dev_xmit(struct sk_buff *skb, struct net_device *dev); 141extern int br_dev_xmit(struct sk_buff *skb, struct net_device *dev);
134 142
135/* br_fdb.c */ 143/* br_fdb.c */
136extern void br_fdb_init(void); 144extern int br_fdb_init(void);
137extern void br_fdb_fini(void); 145extern void br_fdb_fini(void);
146extern void br_fdb_flush(struct net_bridge *br);
138extern void br_fdb_changeaddr(struct net_bridge_port *p, 147extern void br_fdb_changeaddr(struct net_bridge_port *p,
139 const unsigned char *newaddr); 148 const unsigned char *newaddr);
140extern void br_fdb_cleanup(unsigned long arg); 149extern void br_fdb_cleanup(unsigned long arg);
@@ -182,7 +191,8 @@ extern void br_features_recompute(struct net_bridge *br);
182 191
183/* br_input.c */ 192/* br_input.c */
184extern int br_handle_frame_finish(struct sk_buff *skb); 193extern int br_handle_frame_finish(struct sk_buff *skb);
185extern int br_handle_frame(struct net_bridge_port *p, struct sk_buff **pskb); 194extern struct sk_buff *br_handle_frame(struct net_bridge_port *p,
195 struct sk_buff *skb);
186 196
187/* br_ioctl.c */ 197/* br_ioctl.c */
188extern int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); 198extern int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
@@ -207,6 +217,7 @@ extern void br_become_designated_port(struct net_bridge_port *p);
207/* br_stp_if.c */ 217/* br_stp_if.c */
208extern void br_stp_enable_bridge(struct net_bridge *br); 218extern void br_stp_enable_bridge(struct net_bridge *br);
209extern void br_stp_disable_bridge(struct net_bridge *br); 219extern void br_stp_disable_bridge(struct net_bridge *br);
220extern void br_stp_set_enabled(struct net_bridge *br, unsigned long val);
210extern void br_stp_enable_port(struct net_bridge_port *p); 221extern void br_stp_enable_port(struct net_bridge_port *p);
211extern void br_stp_disable_port(struct net_bridge_port *p); 222extern void br_stp_disable_port(struct net_bridge_port *p);
212extern void br_stp_recalculate_bridge_id(struct net_bridge *br); 223extern void br_stp_recalculate_bridge_id(struct net_bridge *br);
@@ -235,7 +246,7 @@ extern void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent);
235 246
236 247
237/* br_netlink.c */ 248/* br_netlink.c */
238extern void br_netlink_init(void); 249extern int br_netlink_init(void);
239extern void br_netlink_fini(void); 250extern void br_netlink_fini(void);
240extern void br_ifinfo_notify(int event, struct net_bridge_port *port); 251extern void br_ifinfo_notify(int event, struct net_bridge_port *port);
241 252
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index f9ff4d57b0d7..ebb0861e9bd5 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -370,11 +370,11 @@ static void br_make_blocking(struct net_bridge_port *p)
370static void br_make_forwarding(struct net_bridge_port *p) 370static void br_make_forwarding(struct net_bridge_port *p)
371{ 371{
372 if (p->state == BR_STATE_BLOCKING) { 372 if (p->state == BR_STATE_BLOCKING) {
373 if (p->br->stp_enabled) { 373 if (p->br->stp_enabled == BR_KERNEL_STP)
374 p->state = BR_STATE_LISTENING; 374 p->state = BR_STATE_LISTENING;
375 } else { 375 else
376 p->state = BR_STATE_LEARNING; 376 p->state = BR_STATE_LEARNING;
377 } 377
378 br_log_state(p); 378 br_log_state(p);
379 mod_timer(&p->forward_delay_timer, jiffies + p->br->forward_delay); } 379 mod_timer(&p->forward_delay_timer, jiffies + p->br->forward_delay); }
380} 380}
@@ -384,6 +384,10 @@ void br_port_state_selection(struct net_bridge *br)
384{ 384{
385 struct net_bridge_port *p; 385 struct net_bridge_port *p;
386 386
387 /* Don't change port states if userspace is handling STP */
388 if (br->stp_enabled == BR_USER_STP)
389 return;
390
387 list_for_each_entry(p, &br->port_list, list) { 391 list_for_each_entry(p, &br->port_list, list) {
388 if (p->state != BR_STATE_DISABLED) { 392 if (p->state != BR_STATE_DISABLED) {
389 if (p->port_no == br->root_port) { 393 if (p->port_no == br->root_port) {
diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c
index b9fb0dc4ab12..60112bce6698 100644
--- a/net/bridge/br_stp_bpdu.c
+++ b/net/bridge/br_stp_bpdu.c
@@ -33,9 +33,6 @@ static void br_send_bpdu(struct net_bridge_port *p,
33{ 33{
34 struct sk_buff *skb; 34 struct sk_buff *skb;
35 35
36 if (!p->br->stp_enabled)
37 return;
38
39 skb = dev_alloc_skb(length+LLC_RESERVE); 36 skb = dev_alloc_skb(length+LLC_RESERVE);
40 if (!skb) 37 if (!skb)
41 return; 38 return;
@@ -75,6 +72,9 @@ void br_send_config_bpdu(struct net_bridge_port *p, struct br_config_bpdu *bpdu)
75{ 72{
76 unsigned char buf[35]; 73 unsigned char buf[35];
77 74
75 if (p->br->stp_enabled != BR_KERNEL_STP)
76 return;
77
78 buf[0] = 0; 78 buf[0] = 0;
79 buf[1] = 0; 79 buf[1] = 0;
80 buf[2] = 0; 80 buf[2] = 0;
@@ -117,6 +117,9 @@ void br_send_tcn_bpdu(struct net_bridge_port *p)
117{ 117{
118 unsigned char buf[4]; 118 unsigned char buf[4];
119 119
120 if (p->br->stp_enabled != BR_KERNEL_STP)
121 return;
122
120 buf[0] = 0; 123 buf[0] = 0;
121 buf[1] = 0; 124 buf[1] = 0;
122 buf[2] = 0; 125 buf[2] = 0;
@@ -157,9 +160,13 @@ int br_stp_rcv(struct sk_buff *skb, struct net_device *dev,
157 br = p->br; 160 br = p->br;
158 spin_lock(&br->lock); 161 spin_lock(&br->lock);
159 162
160 if (p->state == BR_STATE_DISABLED 163 if (br->stp_enabled != BR_KERNEL_STP)
161 || !br->stp_enabled 164 goto out;
162 || !(br->dev->flags & IFF_UP)) 165
166 if (!(br->dev->flags & IFF_UP))
167 goto out;
168
169 if (p->state == BR_STATE_DISABLED)
163 goto out; 170 goto out;
164 171
165 if (compare_ether_addr(dest, br->group_addr) != 0) 172 if (compare_ether_addr(dest, br->group_addr) != 0)
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index a285897a2fb4..3e246b37020e 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -87,7 +87,6 @@ void br_stp_disable_bridge(struct net_bridge *br)
87void br_stp_enable_port(struct net_bridge_port *p) 87void br_stp_enable_port(struct net_bridge_port *p)
88{ 88{
89 br_init_port(p); 89 br_init_port(p);
90 br_ifinfo_notify(RTM_NEWLINK, p);
91 br_port_state_selection(p->br); 90 br_port_state_selection(p->br);
92} 91}
93 92
@@ -101,8 +100,6 @@ void br_stp_disable_port(struct net_bridge_port *p)
101 printk(KERN_INFO "%s: port %i(%s) entering %s state\n", 100 printk(KERN_INFO "%s: port %i(%s) entering %s state\n",
102 br->dev->name, p->port_no, p->dev->name, "disabled"); 101 br->dev->name, p->port_no, p->dev->name, "disabled");
103 102
104 br_ifinfo_notify(RTM_DELLINK, p);
105
106 wasroot = br_is_root_bridge(br); 103 wasroot = br_is_root_bridge(br);
107 br_become_designated_port(p); 104 br_become_designated_port(p);
108 p->state = BR_STATE_DISABLED; 105 p->state = BR_STATE_DISABLED;
@@ -123,6 +120,62 @@ void br_stp_disable_port(struct net_bridge_port *p)
123 br_become_root_bridge(br); 120 br_become_root_bridge(br);
124} 121}
125 122
123static void br_stp_start(struct net_bridge *br)
124{
125 int r;
126 char *argv[] = { BR_STP_PROG, br->dev->name, "start", NULL };
127 char *envp[] = { NULL };
128
129 r = call_usermodehelper(BR_STP_PROG, argv, envp, 1);
130 if (r == 0) {
131 br->stp_enabled = BR_USER_STP;
132 printk(KERN_INFO "%s: userspace STP started\n", br->dev->name);
133 } else {
134 br->stp_enabled = BR_KERNEL_STP;
135 printk(KERN_INFO "%s: starting userspace STP failed, "
136 "staring kernel STP\n", br->dev->name);
137
138 /* To start timers on any ports left in blocking */
139 spin_lock_bh(&br->lock);
140 br_port_state_selection(br);
141 spin_unlock_bh(&br->lock);
142 }
143}
144
145static void br_stp_stop(struct net_bridge *br)
146{
147 int r;
148 char *argv[] = { BR_STP_PROG, br->dev->name, "stop", NULL };
149 char *envp[] = { NULL };
150
151 if (br->stp_enabled == BR_USER_STP) {
152 r = call_usermodehelper(BR_STP_PROG, argv, envp, 1);
153 printk(KERN_INFO "%s: userspace STP stopped, return code %d\n",
154 br->dev->name, r);
155
156
157 /* To start timers on any ports left in blocking */
158 spin_lock_bh(&br->lock);
159 br_port_state_selection(br);
160 spin_unlock_bh(&br->lock);
161 }
162
163 br->stp_enabled = BR_NO_STP;
164}
165
166void br_stp_set_enabled(struct net_bridge *br, unsigned long val)
167{
168 ASSERT_RTNL();
169
170 if (val) {
171 if (br->stp_enabled == BR_NO_STP)
172 br_stp_start(br);
173 } else {
174 if (br->stp_enabled != BR_NO_STP)
175 br_stp_stop(br);
176 }
177}
178
126/* called under bridge lock */ 179/* called under bridge lock */
127void br_stp_change_bridge_id(struct net_bridge *br, const unsigned char *addr) 180void br_stp_change_bridge_id(struct net_bridge *br, const unsigned char *addr)
128{ 181{
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 01a22ad0cc75..33c6c4a7c689 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -149,7 +149,11 @@ static ssize_t show_stp_state(struct device *d,
149 149
150static void set_stp_state(struct net_bridge *br, unsigned long val) 150static void set_stp_state(struct net_bridge *br, unsigned long val)
151{ 151{
152 br->stp_enabled = val; 152 rtnl_lock();
153 spin_unlock_bh(&br->lock);
154 br_stp_set_enabled(br, val);
155 spin_lock_bh(&br->lock);
156 rtnl_unlock();
153} 157}
154 158
155static ssize_t store_stp_state(struct device *d, 159static ssize_t store_stp_state(struct device *d,
@@ -309,6 +313,19 @@ static ssize_t store_group_addr(struct device *d,
309static DEVICE_ATTR(group_addr, S_IRUGO | S_IWUSR, 313static DEVICE_ATTR(group_addr, S_IRUGO | S_IWUSR,
310 show_group_addr, store_group_addr); 314 show_group_addr, store_group_addr);
311 315
316static ssize_t store_flush(struct device *d,
317 struct device_attribute *attr,
318 const char *buf, size_t len)
319{
320 struct net_bridge *br = to_bridge(d);
321
322 if (!capable(CAP_NET_ADMIN))
323 return -EPERM;
324
325 br_fdb_flush(br);
326 return len;
327}
328static DEVICE_ATTR(flush, S_IWUSR, NULL, store_flush);
312 329
313static struct attribute *bridge_attrs[] = { 330static struct attribute *bridge_attrs[] = {
314 &dev_attr_forward_delay.attr, 331 &dev_attr_forward_delay.attr,
@@ -328,6 +345,7 @@ static struct attribute *bridge_attrs[] = {
328 &dev_attr_topology_change_timer.attr, 345 &dev_attr_topology_change_timer.attr,
329 &dev_attr_gc_timer.attr, 346 &dev_attr_gc_timer.attr,
330 &dev_attr_group_addr.attr, 347 &dev_attr_group_addr.attr,
348 &dev_attr_flush.attr,
331 NULL 349 NULL
332}; 350};
333 351
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index 0bc2aef8f9f3..2da22927d8dd 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -137,6 +137,13 @@ static ssize_t show_hold_timer(struct net_bridge_port *p,
137} 137}
138static BRPORT_ATTR(hold_timer, S_IRUGO, show_hold_timer, NULL); 138static BRPORT_ATTR(hold_timer, S_IRUGO, show_hold_timer, NULL);
139 139
140static ssize_t store_flush(struct net_bridge_port *p, unsigned long v)
141{
142 br_fdb_delete_by_port(p->br, p, 0); // Don't delete local entry
143 return 0;
144}
145static BRPORT_ATTR(flush, S_IWUSR, NULL, store_flush);
146
140static struct brport_attribute *brport_attrs[] = { 147static struct brport_attribute *brport_attrs[] = {
141 &brport_attr_path_cost, 148 &brport_attr_path_cost,
142 &brport_attr_priority, 149 &brport_attr_priority,
@@ -152,6 +159,7 @@ static struct brport_attribute *brport_attrs[] = {
152 &brport_attr_message_age_timer, 159 &brport_attr_message_age_timer,
153 &brport_attr_forward_delay_timer, 160 &brport_attr_forward_delay_timer,
154 &brport_attr_hold_timer, 161 &brport_attr_hold_timer,
162 &brport_attr_flush,
155 NULL 163 NULL
156}; 164};
157 165
diff --git a/net/bridge/netfilter/ebt_arp.c b/net/bridge/netfilter/ebt_arp.c
index 9c599800a900..1a46952a56d9 100644
--- a/net/bridge/netfilter/ebt_arp.c
+++ b/net/bridge/netfilter/ebt_arp.c
@@ -35,40 +35,36 @@ static int ebt_filter_arp(const struct sk_buff *skb, const struct net_device *in
35 return EBT_NOMATCH; 35 return EBT_NOMATCH;
36 36
37 if (info->bitmask & (EBT_ARP_SRC_IP | EBT_ARP_DST_IP)) { 37 if (info->bitmask & (EBT_ARP_SRC_IP | EBT_ARP_DST_IP)) {
38 __be32 _addr, *ap; 38 __be32 saddr, daddr, *sap, *dap;
39 39
40 /* IPv4 addresses are always 4 bytes */ 40 if (ah->ar_pln != sizeof(__be32) || ah->ar_pro != htons(ETH_P_IP))
41 if (ah->ar_pln != sizeof(__be32)) 41 return EBT_NOMATCH;
42 sap = skb_header_pointer(skb, sizeof(struct arphdr) +
43 ah->ar_hln, sizeof(saddr),
44 &saddr);
45 if (sap == NULL)
46 return EBT_NOMATCH;
47 dap = skb_header_pointer(skb, sizeof(struct arphdr) +
48 2*ah->ar_hln+sizeof(saddr),
49 sizeof(daddr), &daddr);
50 if (dap == NULL)
51 return EBT_NOMATCH;
52 if (info->bitmask & EBT_ARP_SRC_IP &&
53 FWINV(info->saddr != (*sap & info->smsk), EBT_ARP_SRC_IP))
54 return EBT_NOMATCH;
55 if (info->bitmask & EBT_ARP_DST_IP &&
56 FWINV(info->daddr != (*dap & info->dmsk), EBT_ARP_DST_IP))
57 return EBT_NOMATCH;
58 if (info->bitmask & EBT_ARP_GRAT &&
59 FWINV(*dap != *sap, EBT_ARP_GRAT))
42 return EBT_NOMATCH; 60 return EBT_NOMATCH;
43 if (info->bitmask & EBT_ARP_SRC_IP) {
44 ap = skb_header_pointer(skb, sizeof(struct arphdr) +
45 ah->ar_hln, sizeof(_addr),
46 &_addr);
47 if (ap == NULL)
48 return EBT_NOMATCH;
49 if (FWINV(info->saddr != (*ap & info->smsk),
50 EBT_ARP_SRC_IP))
51 return EBT_NOMATCH;
52 }
53
54 if (info->bitmask & EBT_ARP_DST_IP) {
55 ap = skb_header_pointer(skb, sizeof(struct arphdr) +
56 2*ah->ar_hln+sizeof(__be32),
57 sizeof(_addr), &_addr);
58 if (ap == NULL)
59 return EBT_NOMATCH;
60 if (FWINV(info->daddr != (*ap & info->dmsk),
61 EBT_ARP_DST_IP))
62 return EBT_NOMATCH;
63 }
64 } 61 }
65 62
66 if (info->bitmask & (EBT_ARP_SRC_MAC | EBT_ARP_DST_MAC)) { 63 if (info->bitmask & (EBT_ARP_SRC_MAC | EBT_ARP_DST_MAC)) {
67 unsigned char _mac[ETH_ALEN], *mp; 64 unsigned char _mac[ETH_ALEN], *mp;
68 uint8_t verdict, i; 65 uint8_t verdict, i;
69 66
70 /* MAC addresses are 6 bytes */ 67 if (ah->ar_hln != ETH_ALEN || ah->ar_hrd != htons(ARPHRD_ETHER))
71 if (ah->ar_hln != ETH_ALEN)
72 return EBT_NOMATCH; 68 return EBT_NOMATCH;
73 if (info->bitmask & EBT_ARP_SRC_MAC) { 69 if (info->bitmask & EBT_ARP_SRC_MAC) {
74 mp = skb_header_pointer(skb, sizeof(struct arphdr), 70 mp = skb_header_pointer(skb, sizeof(struct arphdr),
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index 45712aec6a0e..031bfa4a51fc 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -196,14 +196,10 @@ static int __init ebt_log_init(void)
196 ret = ebt_register_watcher(&log); 196 ret = ebt_register_watcher(&log);
197 if (ret < 0) 197 if (ret < 0)
198 return ret; 198 return ret;
199 if (nf_log_register(PF_BRIDGE, &ebt_log_logger) < 0) { 199 ret = nf_log_register(PF_BRIDGE, &ebt_log_logger);
200 printk(KERN_WARNING "ebt_log: not logging via system console " 200 if (ret < 0 && ret != -EEXIST)
201 "since somebody else already registered for PF_INET\n"); 201 ebt_unregister_watcher(&log);
202 /* we cannot make module load fail here, since otherwise 202 return ret;
203 * ebtables userspace would abort */
204 }
205
206 return 0;
207} 203}
208 204
209static void __exit ebt_log_fini(void) 205static void __exit ebt_log_fini(void)
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index 8e15cc47f6c0..9411db625917 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -130,6 +130,7 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,
130 unsigned int group = uloginfo->nlgroup; 130 unsigned int group = uloginfo->nlgroup;
131 ebt_ulog_buff_t *ub = &ulog_buffers[group]; 131 ebt_ulog_buff_t *ub = &ulog_buffers[group];
132 spinlock_t *lock = &ub->lock; 132 spinlock_t *lock = &ub->lock;
133 ktime_t kt;
133 134
134 if ((uloginfo->cprange == 0) || 135 if ((uloginfo->cprange == 0) ||
135 (uloginfo->cprange > skb->len + ETH_HLEN)) 136 (uloginfo->cprange > skb->len + ETH_HLEN))
@@ -164,9 +165,10 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,
164 165
165 /* Fill in the ulog data */ 166 /* Fill in the ulog data */
166 pm->version = EBT_ULOG_VERSION; 167 pm->version = EBT_ULOG_VERSION;
167 do_gettimeofday(&pm->stamp); 168 kt = ktime_get_real();
169 pm->stamp = ktime_to_timeval(kt);
168 if (ub->qlen == 1) 170 if (ub->qlen == 1)
169 skb_set_timestamp(ub->skb, &pm->stamp); 171 ub->skb->tstamp = kt;
170 pm->data_len = copy_len; 172 pm->data_len = copy_len;
171 pm->mark = skb->mark; 173 pm->mark = skb->mark;
172 pm->hook = hooknr; 174 pm->hook = hooknr;
@@ -295,14 +297,12 @@ static int __init ebt_ulog_init(void)
295 297
296 /* initialize ulog_buffers */ 298 /* initialize ulog_buffers */
297 for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) { 299 for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) {
298 init_timer(&ulog_buffers[i].timer); 300 setup_timer(&ulog_buffers[i].timer, ulog_timer, i);
299 ulog_buffers[i].timer.function = ulog_timer;
300 ulog_buffers[i].timer.data = i;
301 spin_lock_init(&ulog_buffers[i].lock); 301 spin_lock_init(&ulog_buffers[i].lock);
302 } 302 }
303 303
304 ebtulognl = netlink_kernel_create(NETLINK_NFLOG, EBT_ULOG_MAXNLGROUPS, 304 ebtulognl = netlink_kernel_create(NETLINK_NFLOG, EBT_ULOG_MAXNLGROUPS,
305 NULL, THIS_MODULE); 305 NULL, NULL, THIS_MODULE);
306 if (!ebtulognl) 306 if (!ebtulognl)
307 ret = -ENOMEM; 307 ret = -ENOMEM;
308 else if ((ret = ebt_register_watcher(&ulog))) 308 else if ((ret = ebt_register_watcher(&ulog)))
diff --git a/net/compat.c b/net/compat.c
index 1f32866d09b7..9a0f5f2b90c8 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -34,11 +34,11 @@ static inline int iov_from_user_compat_to_kern(struct iovec *kiov,
34{ 34{
35 int tot_len = 0; 35 int tot_len = 0;
36 36
37 while(niov > 0) { 37 while (niov > 0) {
38 compat_uptr_t buf; 38 compat_uptr_t buf;
39 compat_size_t len; 39 compat_size_t len;
40 40
41 if(get_user(len, &uiov32->iov_len) || 41 if (get_user(len, &uiov32->iov_len) ||
42 get_user(buf, &uiov32->iov_base)) { 42 get_user(buf, &uiov32->iov_base)) {
43 tot_len = -EFAULT; 43 tot_len = -EFAULT;
44 break; 44 break;
@@ -78,12 +78,12 @@ int verify_compat_iovec(struct msghdr *kern_msg, struct iovec *kern_iov,
78{ 78{
79 int tot_len; 79 int tot_len;
80 80
81 if(kern_msg->msg_namelen) { 81 if (kern_msg->msg_namelen) {
82 if(mode==VERIFY_READ) { 82 if (mode==VERIFY_READ) {
83 int err = move_addr_to_kernel(kern_msg->msg_name, 83 int err = move_addr_to_kernel(kern_msg->msg_name,
84 kern_msg->msg_namelen, 84 kern_msg->msg_namelen,
85 kern_address); 85 kern_address);
86 if(err < 0) 86 if (err < 0)
87 return err; 87 return err;
88 } 88 }
89 kern_msg->msg_name = kern_address; 89 kern_msg->msg_name = kern_address;
@@ -93,7 +93,7 @@ int verify_compat_iovec(struct msghdr *kern_msg, struct iovec *kern_iov,
93 tot_len = iov_from_user_compat_to_kern(kern_iov, 93 tot_len = iov_from_user_compat_to_kern(kern_iov,
94 (struct compat_iovec __user *)kern_msg->msg_iov, 94 (struct compat_iovec __user *)kern_msg->msg_iov,
95 kern_msg->msg_iovlen); 95 kern_msg->msg_iovlen);
96 if(tot_len >= 0) 96 if (tot_len >= 0)
97 kern_msg->msg_iov = kern_iov; 97 kern_msg->msg_iov = kern_iov;
98 98
99 return tot_len; 99 return tot_len;
@@ -146,8 +146,8 @@ int cmsghdr_from_user_compat_to_kern(struct msghdr *kmsg, struct sock *sk,
146 kcmlen = 0; 146 kcmlen = 0;
147 kcmsg_base = kcmsg = (struct cmsghdr *)stackbuf; 147 kcmsg_base = kcmsg = (struct cmsghdr *)stackbuf;
148 ucmsg = CMSG_COMPAT_FIRSTHDR(kmsg); 148 ucmsg = CMSG_COMPAT_FIRSTHDR(kmsg);
149 while(ucmsg != NULL) { 149 while (ucmsg != NULL) {
150 if(get_user(ucmlen, &ucmsg->cmsg_len)) 150 if (get_user(ucmlen, &ucmsg->cmsg_len))
151 return -EFAULT; 151 return -EFAULT;
152 152
153 /* Catch bogons. */ 153 /* Catch bogons. */
@@ -160,7 +160,7 @@ int cmsghdr_from_user_compat_to_kern(struct msghdr *kmsg, struct sock *sk,
160 kcmlen += tmp; 160 kcmlen += tmp;
161 ucmsg = cmsg_compat_nxthdr(kmsg, ucmsg, ucmlen); 161 ucmsg = cmsg_compat_nxthdr(kmsg, ucmsg, ucmlen);
162 } 162 }
163 if(kcmlen == 0) 163 if (kcmlen == 0)
164 return -EINVAL; 164 return -EINVAL;
165 165
166 /* The kcmlen holds the 64-bit version of the control length. 166 /* The kcmlen holds the 64-bit version of the control length.
@@ -176,7 +176,7 @@ int cmsghdr_from_user_compat_to_kern(struct msghdr *kmsg, struct sock *sk,
176 /* Now copy them over neatly. */ 176 /* Now copy them over neatly. */
177 memset(kcmsg, 0, kcmlen); 177 memset(kcmsg, 0, kcmlen);
178 ucmsg = CMSG_COMPAT_FIRSTHDR(kmsg); 178 ucmsg = CMSG_COMPAT_FIRSTHDR(kmsg);
179 while(ucmsg != NULL) { 179 while (ucmsg != NULL) {
180 if (__get_user(ucmlen, &ucmsg->cmsg_len)) 180 if (__get_user(ucmlen, &ucmsg->cmsg_len))
181 goto Efault; 181 goto Efault;
182 if (!CMSG_COMPAT_OK(ucmlen, ucmsg, kmsg)) 182 if (!CMSG_COMPAT_OK(ucmlen, ucmsg, kmsg))
@@ -215,11 +215,12 @@ Efault:
215int put_cmsg_compat(struct msghdr *kmsg, int level, int type, int len, void *data) 215int put_cmsg_compat(struct msghdr *kmsg, int level, int type, int len, void *data)
216{ 216{
217 struct compat_timeval ctv; 217 struct compat_timeval ctv;
218 struct compat_timespec cts;
218 struct compat_cmsghdr __user *cm = (struct compat_cmsghdr __user *) kmsg->msg_control; 219 struct compat_cmsghdr __user *cm = (struct compat_cmsghdr __user *) kmsg->msg_control;
219 struct compat_cmsghdr cmhdr; 220 struct compat_cmsghdr cmhdr;
220 int cmlen; 221 int cmlen;
221 222
222 if(cm == NULL || kmsg->msg_controllen < sizeof(*cm)) { 223 if (cm == NULL || kmsg->msg_controllen < sizeof(*cm)) {
223 kmsg->msg_flags |= MSG_CTRUNC; 224 kmsg->msg_flags |= MSG_CTRUNC;
224 return 0; /* XXX: return error? check spec. */ 225 return 0; /* XXX: return error? check spec. */
225 } 226 }
@@ -229,11 +230,18 @@ int put_cmsg_compat(struct msghdr *kmsg, int level, int type, int len, void *dat
229 ctv.tv_sec = tv->tv_sec; 230 ctv.tv_sec = tv->tv_sec;
230 ctv.tv_usec = tv->tv_usec; 231 ctv.tv_usec = tv->tv_usec;
231 data = &ctv; 232 data = &ctv;
232 len = sizeof(struct compat_timeval); 233 len = sizeof(ctv);
234 }
235 if (level == SOL_SOCKET && type == SO_TIMESTAMPNS) {
236 struct timespec *ts = (struct timespec *)data;
237 cts.tv_sec = ts->tv_sec;
238 cts.tv_nsec = ts->tv_nsec;
239 data = &cts;
240 len = sizeof(cts);
233 } 241 }
234 242
235 cmlen = CMSG_COMPAT_LEN(len); 243 cmlen = CMSG_COMPAT_LEN(len);
236 if(kmsg->msg_controllen < cmlen) { 244 if (kmsg->msg_controllen < cmlen) {
237 kmsg->msg_flags |= MSG_CTRUNC; 245 kmsg->msg_flags |= MSG_CTRUNC;
238 cmlen = kmsg->msg_controllen; 246 cmlen = kmsg->msg_controllen;
239 } 247 }
@@ -241,9 +249,9 @@ int put_cmsg_compat(struct msghdr *kmsg, int level, int type, int len, void *dat
241 cmhdr.cmsg_type = type; 249 cmhdr.cmsg_type = type;
242 cmhdr.cmsg_len = cmlen; 250 cmhdr.cmsg_len = cmlen;
243 251
244 if(copy_to_user(cm, &cmhdr, sizeof cmhdr)) 252 if (copy_to_user(cm, &cmhdr, sizeof cmhdr))
245 return -EFAULT; 253 return -EFAULT;
246 if(copy_to_user(CMSG_COMPAT_DATA(cm), data, cmlen - sizeof(struct compat_cmsghdr))) 254 if (copy_to_user(CMSG_COMPAT_DATA(cm), data, cmlen - sizeof(struct compat_cmsghdr)))
247 return -EFAULT; 255 return -EFAULT;
248 cmlen = CMSG_COMPAT_SPACE(len); 256 cmlen = CMSG_COMPAT_SPACE(len);
249 kmsg->msg_control += cmlen; 257 kmsg->msg_control += cmlen;
@@ -545,20 +553,49 @@ int compat_sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
545 struct compat_timeval __user *ctv = 553 struct compat_timeval __user *ctv =
546 (struct compat_timeval __user*) userstamp; 554 (struct compat_timeval __user*) userstamp;
547 int err = -ENOENT; 555 int err = -ENOENT;
556 struct timeval tv;
548 557
549 if (!sock_flag(sk, SOCK_TIMESTAMP)) 558 if (!sock_flag(sk, SOCK_TIMESTAMP))
550 sock_enable_timestamp(sk); 559 sock_enable_timestamp(sk);
551 if (sk->sk_stamp.tv_sec == -1) 560 tv = ktime_to_timeval(sk->sk_stamp);
561 if (tv.tv_sec == -1)
552 return err; 562 return err;
553 if (sk->sk_stamp.tv_sec == 0) 563 if (tv.tv_sec == 0) {
554 do_gettimeofday(&sk->sk_stamp); 564 sk->sk_stamp = ktime_get_real();
555 if (put_user(sk->sk_stamp.tv_sec, &ctv->tv_sec) || 565 tv = ktime_to_timeval(sk->sk_stamp);
556 put_user(sk->sk_stamp.tv_usec, &ctv->tv_usec)) 566 }
567 err = 0;
568 if (put_user(tv.tv_sec, &ctv->tv_sec) ||
569 put_user(tv.tv_usec, &ctv->tv_usec))
557 err = -EFAULT; 570 err = -EFAULT;
558 return err; 571 return err;
559} 572}
560EXPORT_SYMBOL(compat_sock_get_timestamp); 573EXPORT_SYMBOL(compat_sock_get_timestamp);
561 574
575int compat_sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
576{
577 struct compat_timespec __user *ctv =
578 (struct compat_timespec __user*) userstamp;
579 int err = -ENOENT;
580 struct timespec ts;
581
582 if (!sock_flag(sk, SOCK_TIMESTAMP))
583 sock_enable_timestamp(sk);
584 ts = ktime_to_timespec(sk->sk_stamp);
585 if (ts.tv_sec == -1)
586 return err;
587 if (ts.tv_sec == 0) {
588 sk->sk_stamp = ktime_get_real();
589 ts = ktime_to_timespec(sk->sk_stamp);
590 }
591 err = 0;
592 if (put_user(ts.tv_sec, &ctv->tv_sec) ||
593 put_user(ts.tv_nsec, &ctv->tv_nsec))
594 err = -EFAULT;
595 return err;
596}
597EXPORT_SYMBOL(compat_sock_get_timestampns);
598
562asmlinkage long compat_sys_getsockopt(int fd, int level, int optname, 599asmlinkage long compat_sys_getsockopt(int fd, int level, int optname,
563 char __user *optval, int __user *optlen) 600 char __user *optval, int __user *optlen)
564{ 601{
@@ -617,7 +654,7 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
617 a0 = a[0]; 654 a0 = a[0];
618 a1 = a[1]; 655 a1 = a[1];
619 656
620 switch(call) { 657 switch (call) {
621 case SYS_SOCKET: 658 case SYS_SOCKET:
622 ret = sys_socket(a0, a1, a[2]); 659 ret = sys_socket(a0, a1, a[2]);
623 break; 660 break;
diff --git a/net/core/Makefile b/net/core/Makefile
index 73272d506e93..4751613e1b59 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -13,7 +13,6 @@ obj-y += dev.o ethtool.o dev_mcast.o dst.o netevent.o \
13obj-$(CONFIG_XFRM) += flow.o 13obj-$(CONFIG_XFRM) += flow.o
14obj-$(CONFIG_SYSFS) += net-sysfs.o 14obj-$(CONFIG_SYSFS) += net-sysfs.o
15obj-$(CONFIG_NET_PKTGEN) += pktgen.o 15obj-$(CONFIG_NET_PKTGEN) += pktgen.o
16obj-$(CONFIG_WIRELESS_EXT) += wireless.o
17obj-$(CONFIG_NETPOLL) += netpoll.o 16obj-$(CONFIG_NETPOLL) += netpoll.o
18obj-$(CONFIG_NET_DMA) += user_dma.o 17obj-$(CONFIG_NET_DMA) += user_dma.o
19obj-$(CONFIG_FIB_RULES) += fib_rules.o 18obj-$(CONFIG_FIB_RULES) += fib_rules.o
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 186212b5b7da..e1afa7679445 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -247,8 +247,8 @@ EXPORT_SYMBOL(skb_kill_datagram);
247int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset, 247int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset,
248 struct iovec *to, int len) 248 struct iovec *to, int len)
249{ 249{
250 int start = skb_headlen(skb); 250 int end = skb_headlen(skb);
251 int i, copy = start - offset; 251 int i, copy = end - offset;
252 252
253 /* Copy header. */ 253 /* Copy header. */
254 if (copy > 0) { 254 if (copy > 0) {
@@ -263,11 +263,9 @@ int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset,
263 263
264 /* Copy paged appendix. Hmm... why does this look so complicated? */ 264 /* Copy paged appendix. Hmm... why does this look so complicated? */
265 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 265 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
266 int end; 266 BUG_TRAP(len >= 0);
267 267
268 BUG_TRAP(start <= offset + len); 268 end = offset + skb_shinfo(skb)->frags[i].size;
269
270 end = start + skb_shinfo(skb)->frags[i].size;
271 if ((copy = end - offset) > 0) { 269 if ((copy = end - offset) > 0) {
272 int err; 270 int err;
273 u8 *vaddr; 271 u8 *vaddr;
@@ -277,8 +275,8 @@ int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset,
277 if (copy > len) 275 if (copy > len)
278 copy = len; 276 copy = len;
279 vaddr = kmap(page); 277 vaddr = kmap(page);
280 err = memcpy_toiovec(to, vaddr + frag->page_offset + 278 err = memcpy_toiovec(to, vaddr + frag->page_offset,
281 offset - start, copy); 279 copy);
282 kunmap(page); 280 kunmap(page);
283 if (err) 281 if (err)
284 goto fault; 282 goto fault;
@@ -286,30 +284,24 @@ int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset,
286 return 0; 284 return 0;
287 offset += copy; 285 offset += copy;
288 } 286 }
289 start = end;
290 } 287 }
291 288
292 if (skb_shinfo(skb)->frag_list) { 289 if (skb_shinfo(skb)->frag_list) {
293 struct sk_buff *list = skb_shinfo(skb)->frag_list; 290 struct sk_buff *list = skb_shinfo(skb)->frag_list;
294 291
295 for (; list; list = list->next) { 292 for (; list; list = list->next) {
296 int end; 293 BUG_TRAP(len >= 0);
297
298 BUG_TRAP(start <= offset + len);
299 294
300 end = start + list->len; 295 end = offset + list->len;
301 if ((copy = end - offset) > 0) { 296 if ((copy = end - offset) > 0) {
302 if (copy > len) 297 if (copy > len)
303 copy = len; 298 copy = len;
304 if (skb_copy_datagram_iovec(list, 299 if (skb_copy_datagram_iovec(list, 0, to, copy))
305 offset - start,
306 to, copy))
307 goto fault; 300 goto fault;
308 if ((len -= copy) == 0) 301 if ((len -= copy) == 0)
309 return 0; 302 return 0;
310 offset += copy; 303 offset += copy;
311 } 304 }
312 start = end;
313 } 305 }
314 } 306 }
315 if (!len) 307 if (!len)
@@ -323,9 +315,9 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
323 u8 __user *to, int len, 315 u8 __user *to, int len,
324 __wsum *csump) 316 __wsum *csump)
325{ 317{
326 int start = skb_headlen(skb); 318 int end = skb_headlen(skb);
327 int pos = 0; 319 int pos = 0;
328 int i, copy = start - offset; 320 int i, copy = end - offset;
329 321
330 /* Copy header. */ 322 /* Copy header. */
331 if (copy > 0) { 323 if (copy > 0) {
@@ -344,11 +336,9 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
344 } 336 }
345 337
346 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 338 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
347 int end; 339 BUG_TRAP(len >= 0);
348
349 BUG_TRAP(start <= offset + len);
350 340
351 end = start + skb_shinfo(skb)->frags[i].size; 341 end = offset + skb_shinfo(skb)->frags[i].size;
352 if ((copy = end - offset) > 0) { 342 if ((copy = end - offset) > 0) {
353 __wsum csum2; 343 __wsum csum2;
354 int err = 0; 344 int err = 0;
@@ -360,8 +350,7 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
360 copy = len; 350 copy = len;
361 vaddr = kmap(page); 351 vaddr = kmap(page);
362 csum2 = csum_and_copy_to_user(vaddr + 352 csum2 = csum_and_copy_to_user(vaddr +
363 frag->page_offset + 353 frag->page_offset,
364 offset - start,
365 to, copy, 0, &err); 354 to, copy, 0, &err);
366 kunmap(page); 355 kunmap(page);
367 if (err) 356 if (err)
@@ -373,24 +362,20 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
373 to += copy; 362 to += copy;
374 pos += copy; 363 pos += copy;
375 } 364 }
376 start = end;
377 } 365 }
378 366
379 if (skb_shinfo(skb)->frag_list) { 367 if (skb_shinfo(skb)->frag_list) {
380 struct sk_buff *list = skb_shinfo(skb)->frag_list; 368 struct sk_buff *list = skb_shinfo(skb)->frag_list;
381 369
382 for (; list; list=list->next) { 370 for (; list; list=list->next) {
383 int end; 371 BUG_TRAP(len >= 0);
384 372
385 BUG_TRAP(start <= offset + len); 373 end = offset + list->len;
386
387 end = start + list->len;
388 if ((copy = end - offset) > 0) { 374 if ((copy = end - offset) > 0) {
389 __wsum csum2 = 0; 375 __wsum csum2 = 0;
390 if (copy > len) 376 if (copy > len)
391 copy = len; 377 copy = len;
392 if (skb_copy_and_csum_datagram(list, 378 if (skb_copy_and_csum_datagram(list, 0,
393 offset - start,
394 to, copy, 379 to, copy,
395 &csum2)) 380 &csum2))
396 goto fault; 381 goto fault;
@@ -401,7 +386,6 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
401 to += copy; 386 to += copy;
402 pos += copy; 387 pos += copy;
403 } 388 }
404 start = end;
405 } 389 }
406 } 390 }
407 if (!len) 391 if (!len)
@@ -411,11 +395,11 @@ fault:
411 return -EFAULT; 395 return -EFAULT;
412} 396}
413 397
414__sum16 __skb_checksum_complete(struct sk_buff *skb) 398__sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len)
415{ 399{
416 __sum16 sum; 400 __sum16 sum;
417 401
418 sum = csum_fold(skb_checksum(skb, 0, skb->len, skb->csum)); 402 sum = csum_fold(skb_checksum(skb, 0, len, skb->csum));
419 if (likely(!sum)) { 403 if (likely(!sum)) {
420 if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) 404 if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
421 netdev_rx_csum_fault(skb->dev); 405 netdev_rx_csum_fault(skb->dev);
@@ -423,6 +407,12 @@ __sum16 __skb_checksum_complete(struct sk_buff *skb)
423 } 407 }
424 return sum; 408 return sum;
425} 409}
410EXPORT_SYMBOL(__skb_checksum_complete_head);
411
412__sum16 __skb_checksum_complete(struct sk_buff *skb)
413{
414 return __skb_checksum_complete_head(skb, skb->len);
415}
426EXPORT_SYMBOL(__skb_checksum_complete); 416EXPORT_SYMBOL(__skb_checksum_complete);
427 417
428/** 418/**
diff --git a/net/core/dev.c b/net/core/dev.c
index 4dc93cc4d5b7..d5e42d13bd67 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -109,7 +109,7 @@
109#include <linux/netpoll.h> 109#include <linux/netpoll.h>
110#include <linux/rcupdate.h> 110#include <linux/rcupdate.h>
111#include <linux/delay.h> 111#include <linux/delay.h>
112#include <linux/wireless.h> 112#include <net/wext.h>
113#include <net/iw_handler.h> 113#include <net/iw_handler.h>
114#include <asm/current.h> 114#include <asm/current.h>
115#include <linux/audit.h> 115#include <linux/audit.h>
@@ -146,8 +146,8 @@
146 */ 146 */
147 147
148static DEFINE_SPINLOCK(ptype_lock); 148static DEFINE_SPINLOCK(ptype_lock);
149static struct list_head ptype_base[16]; /* 16 way hashed list */ 149static struct list_head ptype_base[16] __read_mostly; /* 16 way hashed list */
150static struct list_head ptype_all; /* Taps */ 150static struct list_head ptype_all __read_mostly; /* Taps */
151 151
152#ifdef CONFIG_NET_DMA 152#ifdef CONFIG_NET_DMA
153static struct dma_client *net_dma_client; 153static struct dma_client *net_dma_client;
@@ -226,12 +226,6 @@ extern void netdev_unregister_sysfs(struct net_device *);
226*******************************************************************************/ 226*******************************************************************************/
227 227
228/* 228/*
229 * For efficiency
230 */
231
232static int netdev_nit;
233
234/*
235 * Add a protocol ID to the list. Now that the input handler is 229 * Add a protocol ID to the list. Now that the input handler is
236 * smarter we can dispense with all the messy stuff that used to be 230 * smarter we can dispense with all the messy stuff that used to be
237 * here. 231 * here.
@@ -265,10 +259,9 @@ void dev_add_pack(struct packet_type *pt)
265 int hash; 259 int hash;
266 260
267 spin_lock_bh(&ptype_lock); 261 spin_lock_bh(&ptype_lock);
268 if (pt->type == htons(ETH_P_ALL)) { 262 if (pt->type == htons(ETH_P_ALL))
269 netdev_nit++;
270 list_add_rcu(&pt->list, &ptype_all); 263 list_add_rcu(&pt->list, &ptype_all);
271 } else { 264 else {
272 hash = ntohs(pt->type) & 15; 265 hash = ntohs(pt->type) & 15;
273 list_add_rcu(&pt->list, &ptype_base[hash]); 266 list_add_rcu(&pt->list, &ptype_base[hash]);
274 } 267 }
@@ -295,10 +288,9 @@ void __dev_remove_pack(struct packet_type *pt)
295 288
296 spin_lock_bh(&ptype_lock); 289 spin_lock_bh(&ptype_lock);
297 290
298 if (pt->type == htons(ETH_P_ALL)) { 291 if (pt->type == htons(ETH_P_ALL))
299 netdev_nit--;
300 head = &ptype_all; 292 head = &ptype_all;
301 } else 293 else
302 head = &ptype_base[ntohs(pt->type) & 15]; 294 head = &ptype_base[ntohs(pt->type) & 15];
303 295
304 list_for_each_entry(pt1, head, list) { 296 list_for_each_entry(pt1, head, list) {
@@ -817,7 +809,6 @@ static int default_rebuild_header(struct sk_buff *skb)
817 return 1; 809 return 1;
818} 810}
819 811
820
821/** 812/**
822 * dev_open - prepare an interface for use. 813 * dev_open - prepare an interface for use.
823 * @dev: device to open 814 * @dev: device to open
@@ -1031,23 +1022,12 @@ void net_disable_timestamp(void)
1031 atomic_dec(&netstamp_needed); 1022 atomic_dec(&netstamp_needed);
1032} 1023}
1033 1024
1034void __net_timestamp(struct sk_buff *skb)
1035{
1036 struct timeval tv;
1037
1038 do_gettimeofday(&tv);
1039 skb_set_timestamp(skb, &tv);
1040}
1041EXPORT_SYMBOL(__net_timestamp);
1042
1043static inline void net_timestamp(struct sk_buff *skb) 1025static inline void net_timestamp(struct sk_buff *skb)
1044{ 1026{
1045 if (atomic_read(&netstamp_needed)) 1027 if (atomic_read(&netstamp_needed))
1046 __net_timestamp(skb); 1028 __net_timestamp(skb);
1047 else { 1029 else
1048 skb->tstamp.off_sec = 0; 1030 skb->tstamp.tv64 = 0;
1049 skb->tstamp.off_usec = 0;
1050 }
1051} 1031}
1052 1032
1053/* 1033/*
@@ -1077,18 +1057,18 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1077 set by sender, so that the second statement is 1057 set by sender, so that the second statement is
1078 just protection against buggy protocols. 1058 just protection against buggy protocols.
1079 */ 1059 */
1080 skb2->mac.raw = skb2->data; 1060 skb_reset_mac_header(skb2);
1081 1061
1082 if (skb2->nh.raw < skb2->data || 1062 if (skb_network_header(skb2) < skb2->data ||
1083 skb2->nh.raw > skb2->tail) { 1063 skb2->network_header > skb2->tail) {
1084 if (net_ratelimit()) 1064 if (net_ratelimit())
1085 printk(KERN_CRIT "protocol %04x is " 1065 printk(KERN_CRIT "protocol %04x is "
1086 "buggy, dev %s\n", 1066 "buggy, dev %s\n",
1087 skb2->protocol, dev->name); 1067 skb2->protocol, dev->name);
1088 skb2->nh.raw = skb2->data; 1068 skb_reset_network_header(skb2);
1089 } 1069 }
1090 1070
1091 skb2->h.raw = skb2->nh.raw; 1071 skb2->transport_header = skb2->network_header;
1092 skb2->pkt_type = PACKET_OUTGOING; 1072 skb2->pkt_type = PACKET_OUTGOING;
1093 ptype->func(skb2, skb->dev, ptype, skb->dev); 1073 ptype->func(skb2, skb->dev, ptype, skb->dev);
1094 } 1074 }
@@ -1167,7 +1147,7 @@ EXPORT_SYMBOL(netif_device_attach);
1167int skb_checksum_help(struct sk_buff *skb) 1147int skb_checksum_help(struct sk_buff *skb)
1168{ 1148{
1169 __wsum csum; 1149 __wsum csum;
1170 int ret = 0, offset = skb->h.raw - skb->data; 1150 int ret = 0, offset;
1171 1151
1172 if (skb->ip_summed == CHECKSUM_COMPLETE) 1152 if (skb->ip_summed == CHECKSUM_COMPLETE)
1173 goto out_set_summed; 1153 goto out_set_summed;
@@ -1183,15 +1163,16 @@ int skb_checksum_help(struct sk_buff *skb)
1183 goto out; 1163 goto out;
1184 } 1164 }
1185 1165
1166 offset = skb->csum_start - skb_headroom(skb);
1186 BUG_ON(offset > (int)skb->len); 1167 BUG_ON(offset > (int)skb->len);
1187 csum = skb_checksum(skb, offset, skb->len-offset, 0); 1168 csum = skb_checksum(skb, offset, skb->len-offset, 0);
1188 1169
1189 offset = skb->tail - skb->h.raw; 1170 offset = skb_headlen(skb) - offset;
1190 BUG_ON(offset <= 0); 1171 BUG_ON(offset <= 0);
1191 BUG_ON(skb->csum_offset + 2 > offset); 1172 BUG_ON(skb->csum_offset + 2 > offset);
1192 1173
1193 *(__sum16*)(skb->h.raw + skb->csum_offset) = csum_fold(csum); 1174 *(__sum16 *)(skb->head + skb->csum_start + skb->csum_offset) =
1194 1175 csum_fold(csum);
1195out_set_summed: 1176out_set_summed:
1196 skb->ip_summed = CHECKSUM_NONE; 1177 skb->ip_summed = CHECKSUM_NONE;
1197out: 1178out:
@@ -1217,11 +1198,11 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
1217 1198
1218 BUG_ON(skb_shinfo(skb)->frag_list); 1199 BUG_ON(skb_shinfo(skb)->frag_list);
1219 1200
1220 skb->mac.raw = skb->data; 1201 skb_reset_mac_header(skb);
1221 skb->mac_len = skb->nh.raw - skb->data; 1202 skb->mac_len = skb->network_header - skb->mac_header;
1222 __skb_pull(skb, skb->mac_len); 1203 __skb_pull(skb, skb->mac_len);
1223 1204
1224 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { 1205 if (WARN_ON(skb->ip_summed != CHECKSUM_PARTIAL)) {
1225 if (skb_header_cloned(skb) && 1206 if (skb_header_cloned(skb) &&
1226 (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) 1207 (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1227 return ERR_PTR(err); 1208 return ERR_PTR(err);
@@ -1235,7 +1216,8 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
1235 segs = ERR_PTR(err); 1216 segs = ERR_PTR(err);
1236 if (err || skb_gso_ok(skb, features)) 1217 if (err || skb_gso_ok(skb, features))
1237 break; 1218 break;
1238 __skb_push(skb, skb->data - skb->nh.raw); 1219 __skb_push(skb, (skb->data -
1220 skb_network_header(skb)));
1239 } 1221 }
1240 segs = ptype->gso_segment(skb, features); 1222 segs = ptype->gso_segment(skb, features);
1241 break; 1223 break;
@@ -1243,7 +1225,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
1243 } 1225 }
1244 rcu_read_unlock(); 1226 rcu_read_unlock();
1245 1227
1246 __skb_push(skb, skb->data - skb->mac.raw); 1228 __skb_push(skb, skb->data - skb_mac_header(skb));
1247 1229
1248 return segs; 1230 return segs;
1249} 1231}
@@ -1340,7 +1322,7 @@ static int dev_gso_segment(struct sk_buff *skb)
1340int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) 1322int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
1341{ 1323{
1342 if (likely(!skb->next)) { 1324 if (likely(!skb->next)) {
1343 if (netdev_nit) 1325 if (!list_empty(&ptype_all))
1344 dev_queue_xmit_nit(skb, dev); 1326 dev_queue_xmit_nit(skb, dev);
1345 1327
1346 if (netif_needs_gso(dev, skb)) { 1328 if (netif_needs_gso(dev, skb)) {
@@ -1442,12 +1424,16 @@ int dev_queue_xmit(struct sk_buff *skb)
1442 /* If packet is not checksummed and device does not support 1424 /* If packet is not checksummed and device does not support
1443 * checksumming for this protocol, complete checksumming here. 1425 * checksumming for this protocol, complete checksumming here.
1444 */ 1426 */
1445 if (skb->ip_summed == CHECKSUM_PARTIAL && 1427 if (skb->ip_summed == CHECKSUM_PARTIAL) {
1446 (!(dev->features & NETIF_F_GEN_CSUM) && 1428 skb_set_transport_header(skb, skb->csum_start -
1447 (!(dev->features & NETIF_F_IP_CSUM) || 1429 skb_headroom(skb));
1448 skb->protocol != htons(ETH_P_IP)))) 1430
1449 if (skb_checksum_help(skb)) 1431 if (!(dev->features & NETIF_F_GEN_CSUM) &&
1450 goto out_kfree_skb; 1432 (!(dev->features & NETIF_F_IP_CSUM) ||
1433 skb->protocol != htons(ETH_P_IP)))
1434 if (skb_checksum_help(skb))
1435 goto out_kfree_skb;
1436 }
1451 1437
1452gso: 1438gso:
1453 spin_lock_prefetch(&dev->queue_lock); 1439 spin_lock_prefetch(&dev->queue_lock);
@@ -1543,9 +1529,9 @@ out:
1543 Receiver routines 1529 Receiver routines
1544 =======================================================================*/ 1530 =======================================================================*/
1545 1531
1546int netdev_max_backlog = 1000; 1532int netdev_max_backlog __read_mostly = 1000;
1547int netdev_budget = 300; 1533int netdev_budget __read_mostly = 300;
1548int weight_p = 64; /* old backlog weight */ 1534int weight_p __read_mostly = 64; /* old backlog weight */
1549 1535
1550DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, }; 1536DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1551 1537
@@ -1577,7 +1563,7 @@ int netif_rx(struct sk_buff *skb)
1577 if (netpoll_rx(skb)) 1563 if (netpoll_rx(skb))
1578 return NET_RX_DROP; 1564 return NET_RX_DROP;
1579 1565
1580 if (!skb->tstamp.off_sec) 1566 if (!skb->tstamp.tv64)
1581 net_timestamp(skb); 1567 net_timestamp(skb);
1582 1568
1583 /* 1569 /*
@@ -1684,40 +1670,46 @@ static void net_tx_action(struct softirq_action *h)
1684 } 1670 }
1685} 1671}
1686 1672
1687static __inline__ int deliver_skb(struct sk_buff *skb, 1673static inline int deliver_skb(struct sk_buff *skb,
1688 struct packet_type *pt_prev, 1674 struct packet_type *pt_prev,
1689 struct net_device *orig_dev) 1675 struct net_device *orig_dev)
1690{ 1676{
1691 atomic_inc(&skb->users); 1677 atomic_inc(&skb->users);
1692 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); 1678 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1693} 1679}
1694 1680
1695#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE) 1681#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
1696int (*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff **pskb); 1682/* These hooks defined here for ATM */
1697struct net_bridge; 1683struct net_bridge;
1698struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br, 1684struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
1699 unsigned char *addr); 1685 unsigned char *addr);
1700void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent); 1686void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent) __read_mostly;
1701 1687
1702static __inline__ int handle_bridge(struct sk_buff **pskb, 1688/*
1703 struct packet_type **pt_prev, int *ret, 1689 * If bridge module is loaded call bridging hook.
1704 struct net_device *orig_dev) 1690 * returns NULL if packet was consumed.
1691 */
1692struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
1693 struct sk_buff *skb) __read_mostly;
1694static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
1695 struct packet_type **pt_prev, int *ret,
1696 struct net_device *orig_dev)
1705{ 1697{
1706 struct net_bridge_port *port; 1698 struct net_bridge_port *port;
1707 1699
1708 if ((*pskb)->pkt_type == PACKET_LOOPBACK || 1700 if (skb->pkt_type == PACKET_LOOPBACK ||
1709 (port = rcu_dereference((*pskb)->dev->br_port)) == NULL) 1701 (port = rcu_dereference(skb->dev->br_port)) == NULL)
1710 return 0; 1702 return skb;
1711 1703
1712 if (*pt_prev) { 1704 if (*pt_prev) {
1713 *ret = deliver_skb(*pskb, *pt_prev, orig_dev); 1705 *ret = deliver_skb(skb, *pt_prev, orig_dev);
1714 *pt_prev = NULL; 1706 *pt_prev = NULL;
1715 } 1707 }
1716 1708
1717 return br_handle_frame_hook(port, pskb); 1709 return br_handle_frame_hook(port, skb);
1718} 1710}
1719#else 1711#else
1720#define handle_bridge(skb, pt_prev, ret, orig_dev) (0) 1712#define handle_bridge(skb, pt_prev, ret, orig_dev) (skb)
1721#endif 1713#endif
1722 1714
1723#ifdef CONFIG_NET_CLS_ACT 1715#ifdef CONFIG_NET_CLS_ACT
@@ -1747,10 +1739,10 @@ static int ing_filter(struct sk_buff *skb)
1747 1739
1748 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS); 1740 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);
1749 1741
1750 spin_lock(&dev->queue_lock); 1742 spin_lock(&dev->ingress_lock);
1751 if ((q = dev->qdisc_ingress) != NULL) 1743 if ((q = dev->qdisc_ingress) != NULL)
1752 result = q->enqueue(skb, q); 1744 result = q->enqueue(skb, q);
1753 spin_unlock(&dev->queue_lock); 1745 spin_unlock(&dev->ingress_lock);
1754 1746
1755 } 1747 }
1756 1748
@@ -1769,7 +1761,7 @@ int netif_receive_skb(struct sk_buff *skb)
1769 if (skb->dev->poll && netpoll_rx(skb)) 1761 if (skb->dev->poll && netpoll_rx(skb))
1770 return NET_RX_DROP; 1762 return NET_RX_DROP;
1771 1763
1772 if (!skb->tstamp.off_sec) 1764 if (!skb->tstamp.tv64)
1773 net_timestamp(skb); 1765 net_timestamp(skb);
1774 1766
1775 if (!skb->iif) 1767 if (!skb->iif)
@@ -1782,8 +1774,9 @@ int netif_receive_skb(struct sk_buff *skb)
1782 1774
1783 __get_cpu_var(netdev_rx_stat).total++; 1775 __get_cpu_var(netdev_rx_stat).total++;
1784 1776
1785 skb->h.raw = skb->nh.raw = skb->data; 1777 skb_reset_network_header(skb);
1786 skb->mac_len = skb->nh.raw - skb->mac.raw; 1778 skb_reset_transport_header(skb);
1779 skb->mac_len = skb->network_header - skb->mac_header;
1787 1780
1788 pt_prev = NULL; 1781 pt_prev = NULL;
1789 1782
@@ -1823,7 +1816,8 @@ int netif_receive_skb(struct sk_buff *skb)
1823ncls: 1816ncls:
1824#endif 1817#endif
1825 1818
1826 if (handle_bridge(&skb, &pt_prev, &ret, orig_dev)) 1819 skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
1820 if (!skb)
1827 goto out; 1821 goto out;
1828 1822
1829 type = skb->protocol; 1823 type = skb->protocol;
@@ -2076,7 +2070,7 @@ static int dev_ifconf(char __user *arg)
2076 * This is invoked by the /proc filesystem handler to display a device 2070 * This is invoked by the /proc filesystem handler to display a device
2077 * in detail. 2071 * in detail.
2078 */ 2072 */
2079static __inline__ struct net_device *dev_get_idx(loff_t pos) 2073static struct net_device *dev_get_idx(loff_t pos)
2080{ 2074{
2081 struct net_device *dev; 2075 struct net_device *dev;
2082 loff_t i; 2076 loff_t i;
@@ -2105,9 +2099,9 @@ void dev_seq_stop(struct seq_file *seq, void *v)
2105 2099
2106static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) 2100static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
2107{ 2101{
2108 if (dev->get_stats) { 2102 struct net_device_stats *stats = dev->get_stats(dev);
2109 struct net_device_stats *stats = dev->get_stats(dev);
2110 2103
2104 if (stats) {
2111 seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu " 2105 seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
2112 "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n", 2106 "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
2113 dev->name, stats->rx_bytes, stats->rx_packets, 2107 dev->name, stats->rx_bytes, stats->rx_packets,
@@ -2185,7 +2179,7 @@ static int softnet_seq_show(struct seq_file *seq, void *v)
2185 return 0; 2179 return 0;
2186} 2180}
2187 2181
2188static struct seq_operations dev_seq_ops = { 2182static const struct seq_operations dev_seq_ops = {
2189 .start = dev_seq_start, 2183 .start = dev_seq_start,
2190 .next = dev_seq_next, 2184 .next = dev_seq_next,
2191 .stop = dev_seq_stop, 2185 .stop = dev_seq_stop,
@@ -2205,7 +2199,7 @@ static const struct file_operations dev_seq_fops = {
2205 .release = seq_release, 2199 .release = seq_release,
2206}; 2200};
2207 2201
2208static struct seq_operations softnet_seq_ops = { 2202static const struct seq_operations softnet_seq_ops = {
2209 .start = softnet_seq_start, 2203 .start = softnet_seq_start,
2210 .next = softnet_seq_next, 2204 .next = softnet_seq_next,
2211 .stop = softnet_seq_stop, 2205 .stop = softnet_seq_stop,
@@ -2225,12 +2219,135 @@ static const struct file_operations softnet_seq_fops = {
2225 .release = seq_release, 2219 .release = seq_release,
2226}; 2220};
2227 2221
2228#ifdef CONFIG_WIRELESS_EXT 2222static void *ptype_get_idx(loff_t pos)
2229extern int wireless_proc_init(void); 2223{
2230#else 2224 struct packet_type *pt = NULL;
2231#define wireless_proc_init() 0 2225 loff_t i = 0;
2226 int t;
2227
2228 list_for_each_entry_rcu(pt, &ptype_all, list) {
2229 if (i == pos)
2230 return pt;
2231 ++i;
2232 }
2233
2234 for (t = 0; t < 16; t++) {
2235 list_for_each_entry_rcu(pt, &ptype_base[t], list) {
2236 if (i == pos)
2237 return pt;
2238 ++i;
2239 }
2240 }
2241 return NULL;
2242}
2243
2244static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
2245{
2246 rcu_read_lock();
2247 return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
2248}
2249
2250static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2251{
2252 struct packet_type *pt;
2253 struct list_head *nxt;
2254 int hash;
2255
2256 ++*pos;
2257 if (v == SEQ_START_TOKEN)
2258 return ptype_get_idx(0);
2259
2260 pt = v;
2261 nxt = pt->list.next;
2262 if (pt->type == htons(ETH_P_ALL)) {
2263 if (nxt != &ptype_all)
2264 goto found;
2265 hash = 0;
2266 nxt = ptype_base[0].next;
2267 } else
2268 hash = ntohs(pt->type) & 15;
2269
2270 while (nxt == &ptype_base[hash]) {
2271 if (++hash >= 16)
2272 return NULL;
2273 nxt = ptype_base[hash].next;
2274 }
2275found:
2276 return list_entry(nxt, struct packet_type, list);
2277}
2278
2279static void ptype_seq_stop(struct seq_file *seq, void *v)
2280{
2281 rcu_read_unlock();
2282}
2283
2284static void ptype_seq_decode(struct seq_file *seq, void *sym)
2285{
2286#ifdef CONFIG_KALLSYMS
2287 unsigned long offset = 0, symsize;
2288 const char *symname;
2289 char *modname;
2290 char namebuf[128];
2291
2292 symname = kallsyms_lookup((unsigned long)sym, &symsize, &offset,
2293 &modname, namebuf);
2294
2295 if (symname) {
2296 char *delim = ":";
2297
2298 if (!modname)
2299 modname = delim = "";
2300 seq_printf(seq, "%s%s%s%s+0x%lx", delim, modname, delim,
2301 symname, offset);
2302 return;
2303 }
2232#endif 2304#endif
2233 2305
2306 seq_printf(seq, "[%p]", sym);
2307}
2308
2309static int ptype_seq_show(struct seq_file *seq, void *v)
2310{
2311 struct packet_type *pt = v;
2312
2313 if (v == SEQ_START_TOKEN)
2314 seq_puts(seq, "Type Device Function\n");
2315 else {
2316 if (pt->type == htons(ETH_P_ALL))
2317 seq_puts(seq, "ALL ");
2318 else
2319 seq_printf(seq, "%04x", ntohs(pt->type));
2320
2321 seq_printf(seq, " %-8s ",
2322 pt->dev ? pt->dev->name : "");
2323 ptype_seq_decode(seq, pt->func);
2324 seq_putc(seq, '\n');
2325 }
2326
2327 return 0;
2328}
2329
2330static const struct seq_operations ptype_seq_ops = {
2331 .start = ptype_seq_start,
2332 .next = ptype_seq_next,
2333 .stop = ptype_seq_stop,
2334 .show = ptype_seq_show,
2335};
2336
2337static int ptype_seq_open(struct inode *inode, struct file *file)
2338{
2339 return seq_open(file, &ptype_seq_ops);
2340}
2341
2342static const struct file_operations ptype_seq_fops = {
2343 .owner = THIS_MODULE,
2344 .open = ptype_seq_open,
2345 .read = seq_read,
2346 .llseek = seq_lseek,
2347 .release = seq_release,
2348};
2349
2350
2234static int __init dev_proc_init(void) 2351static int __init dev_proc_init(void)
2235{ 2352{
2236 int rc = -ENOMEM; 2353 int rc = -ENOMEM;
@@ -2239,13 +2356,18 @@ static int __init dev_proc_init(void)
2239 goto out; 2356 goto out;
2240 if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops)) 2357 if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops))
2241 goto out_dev; 2358 goto out_dev;
2242 if (wireless_proc_init()) 2359 if (!proc_net_fops_create("ptype", S_IRUGO, &ptype_seq_fops))
2360 goto out_dev2;
2361
2362 if (wext_proc_init())
2243 goto out_softnet; 2363 goto out_softnet;
2244 rc = 0; 2364 rc = 0;
2245out: 2365out:
2246 return rc; 2366 return rc;
2247out_softnet: 2367out_softnet:
2248 proc_net_remove("softnet_stat"); 2368 proc_net_remove("softnet_stat");
2369out_dev2:
2370 proc_net_remove("ptype");
2249out_dev: 2371out_dev:
2250 proc_net_remove("dev"); 2372 proc_net_remove("dev");
2251 goto out; 2373 goto out;
@@ -2795,29 +2917,9 @@ int dev_ioctl(unsigned int cmd, void __user *arg)
2795 ret = -EFAULT; 2917 ret = -EFAULT;
2796 return ret; 2918 return ret;
2797 } 2919 }
2798#ifdef CONFIG_WIRELESS_EXT
2799 /* Take care of Wireless Extensions */ 2920 /* Take care of Wireless Extensions */
2800 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) { 2921 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
2801 /* If command is `set a parameter', or 2922 return wext_handle_ioctl(&ifr, cmd, arg);
2802 * `get the encoding parameters', check if
2803 * the user has the right to do it */
2804 if (IW_IS_SET(cmd) || cmd == SIOCGIWENCODE
2805 || cmd == SIOCGIWENCODEEXT) {
2806 if (!capable(CAP_NET_ADMIN))
2807 return -EPERM;
2808 }
2809 dev_load(ifr.ifr_name);
2810 rtnl_lock();
2811 /* Follow me in net/core/wireless.c */
2812 ret = wireless_process_ioctl(&ifr, cmd);
2813 rtnl_unlock();
2814 if (IW_IS_GET(cmd) &&
2815 copy_to_user(arg, &ifr,
2816 sizeof(struct ifreq)))
2817 ret = -EFAULT;
2818 return ret;
2819 }
2820#endif /* CONFIG_WIRELESS_EXT */
2821 return -EINVAL; 2923 return -EINVAL;
2822 } 2924 }
2823} 2925}
@@ -2847,7 +2949,7 @@ static int dev_boot_phase = 1;
2847static DEFINE_SPINLOCK(net_todo_list_lock); 2949static DEFINE_SPINLOCK(net_todo_list_lock);
2848static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list); 2950static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list);
2849 2951
2850static inline void net_set_todo(struct net_device *dev) 2952static void net_set_todo(struct net_device *dev)
2851{ 2953{
2852 spin_lock(&net_todo_list_lock); 2954 spin_lock(&net_todo_list_lock);
2853 list_add_tail(&dev->todo_list, &net_todo_list); 2955 list_add_tail(&dev->todo_list, &net_todo_list);
@@ -2888,9 +2990,7 @@ int register_netdevice(struct net_device *dev)
2888 spin_lock_init(&dev->queue_lock); 2990 spin_lock_init(&dev->queue_lock);
2889 spin_lock_init(&dev->_xmit_lock); 2991 spin_lock_init(&dev->_xmit_lock);
2890 dev->xmit_lock_owner = -1; 2992 dev->xmit_lock_owner = -1;
2891#ifdef CONFIG_NET_CLS_ACT
2892 spin_lock_init(&dev->ingress_lock); 2993 spin_lock_init(&dev->ingress_lock);
2893#endif
2894 2994
2895 dev->iflink = -1; 2995 dev->iflink = -1;
2896 2996
@@ -3002,7 +3102,7 @@ out:
3002 * chain. 0 is returned on success. A negative errno code is returned 3102 * chain. 0 is returned on success. A negative errno code is returned
3003 * on a failure to set up the device, or if the name is a duplicate. 3103 * on a failure to set up the device, or if the name is a duplicate.
3004 * 3104 *
3005 * This is a wrapper around register_netdev that takes the rtnl semaphore 3105 * This is a wrapper around register_netdevice that takes the rtnl semaphore
3006 * and expands the device name if you passed a format string to 3106 * and expands the device name if you passed a format string to
3007 * alloc_netdev. 3107 * alloc_netdev.
3008 */ 3108 */
@@ -3157,6 +3257,13 @@ out:
3157 mutex_unlock(&net_todo_run_mutex); 3257 mutex_unlock(&net_todo_run_mutex);
3158} 3258}
3159 3259
3260static struct net_device_stats *maybe_internal_stats(struct net_device *dev)
3261{
3262 if (dev->features & NETIF_F_INTERNAL_STATS)
3263 return &dev->stats;
3264 return NULL;
3265}
3266
3160/** 3267/**
3161 * alloc_netdev - allocate network device 3268 * alloc_netdev - allocate network device
3162 * @sizeof_priv: size of private data to allocate space for 3269 * @sizeof_priv: size of private data to allocate space for
@@ -3192,6 +3299,7 @@ struct net_device *alloc_netdev(int sizeof_priv, const char *name,
3192 if (sizeof_priv) 3299 if (sizeof_priv)
3193 dev->priv = netdev_priv(dev); 3300 dev->priv = netdev_priv(dev);
3194 3301
3302 dev->get_stats = maybe_internal_stats;
3195 setup(dev); 3303 setup(dev);
3196 strcpy(dev->name, name); 3304 strcpy(dev->name, name);
3197 return dev; 3305 return dev;
diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c
index 56b310c0c860..7d57bf77f3a3 100644
--- a/net/core/dev_mcast.c
+++ b/net/core/dev_mcast.c
@@ -264,7 +264,7 @@ static int dev_mc_seq_show(struct seq_file *seq, void *v)
264 return 0; 264 return 0;
265} 265}
266 266
267static struct seq_operations dev_mc_seq_ops = { 267static const struct seq_operations dev_mc_seq_ops = {
268 .start = dev_mc_seq_start, 268 .start = dev_mc_seq_start,
269 .next = dev_mc_seq_next, 269 .next = dev_mc_seq_next,
270 .stop = dev_mc_seq_stop, 270 .stop = dev_mc_seq_stop,
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 6168edd137dd..8d5e5a09b576 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -836,7 +836,7 @@ int dev_ethtool(struct ifreq *ifr)
836 return -EPERM; 836 return -EPERM;
837 } 837 }
838 838
839 if(dev->ethtool_ops->begin) 839 if (dev->ethtool_ops->begin)
840 if ((rc = dev->ethtool_ops->begin(dev)) < 0) 840 if ((rc = dev->ethtool_ops->begin(dev)) < 0)
841 return rc; 841 return rc;
842 842
@@ -952,7 +952,7 @@ int dev_ethtool(struct ifreq *ifr)
952 rc = -EOPNOTSUPP; 952 rc = -EOPNOTSUPP;
953 } 953 }
954 954
955 if(dev->ethtool_ops->complete) 955 if (dev->ethtool_ops->complete)
956 dev->ethtool_ops->complete(dev); 956 dev->ethtool_ops->complete(dev);
957 957
958 if (old_features != dev->features) 958 if (old_features != dev->features)
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 7174ced75efc..8c5474e16683 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -44,6 +44,12 @@ static void rules_ops_put(struct fib_rules_ops *ops)
44 module_put(ops->owner); 44 module_put(ops->owner);
45} 45}
46 46
47static void flush_route_cache(struct fib_rules_ops *ops)
48{
49 if (ops->flush_cache)
50 ops->flush_cache();
51}
52
47int fib_rules_register(struct fib_rules_ops *ops) 53int fib_rules_register(struct fib_rules_ops *ops)
48{ 54{
49 int err = -EEXIST; 55 int err = -EEXIST;
@@ -132,10 +138,25 @@ int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl,
132 rcu_read_lock(); 138 rcu_read_lock();
133 139
134 list_for_each_entry_rcu(rule, ops->rules_list, list) { 140 list_for_each_entry_rcu(rule, ops->rules_list, list) {
141jumped:
135 if (!fib_rule_match(rule, ops, fl, flags)) 142 if (!fib_rule_match(rule, ops, fl, flags))
136 continue; 143 continue;
137 144
138 err = ops->action(rule, fl, flags, arg); 145 if (rule->action == FR_ACT_GOTO) {
146 struct fib_rule *target;
147
148 target = rcu_dereference(rule->ctarget);
149 if (target == NULL) {
150 continue;
151 } else {
152 rule = target;
153 goto jumped;
154 }
155 } else if (rule->action == FR_ACT_NOP)
156 continue;
157 else
158 err = ops->action(rule, fl, flags, arg);
159
139 if (err != -EAGAIN) { 160 if (err != -EAGAIN) {
140 fib_rule_get(rule); 161 fib_rule_get(rule);
141 arg->rule = rule; 162 arg->rule = rule;
@@ -174,13 +195,13 @@ errout:
174 return err; 195 return err;
175} 196}
176 197
177int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 198static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
178{ 199{
179 struct fib_rule_hdr *frh = nlmsg_data(nlh); 200 struct fib_rule_hdr *frh = nlmsg_data(nlh);
180 struct fib_rules_ops *ops = NULL; 201 struct fib_rules_ops *ops = NULL;
181 struct fib_rule *rule, *r, *last = NULL; 202 struct fib_rule *rule, *r, *last = NULL;
182 struct nlattr *tb[FRA_MAX+1]; 203 struct nlattr *tb[FRA_MAX+1];
183 int err = -EINVAL; 204 int err = -EINVAL, unresolved = 0;
184 205
185 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) 206 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh)))
186 goto errout; 207 goto errout;
@@ -237,6 +258,28 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
237 if (!rule->pref && ops->default_pref) 258 if (!rule->pref && ops->default_pref)
238 rule->pref = ops->default_pref(); 259 rule->pref = ops->default_pref();
239 260
261 err = -EINVAL;
262 if (tb[FRA_GOTO]) {
263 if (rule->action != FR_ACT_GOTO)
264 goto errout_free;
265
266 rule->target = nla_get_u32(tb[FRA_GOTO]);
267 /* Backward jumps are prohibited to avoid endless loops */
268 if (rule->target <= rule->pref)
269 goto errout_free;
270
271 list_for_each_entry(r, ops->rules_list, list) {
272 if (r->pref == rule->target) {
273 rule->ctarget = r;
274 break;
275 }
276 }
277
278 if (rule->ctarget == NULL)
279 unresolved = 1;
280 } else if (rule->action == FR_ACT_GOTO)
281 goto errout_free;
282
240 err = ops->configure(rule, skb, nlh, frh, tb); 283 err = ops->configure(rule, skb, nlh, frh, tb);
241 if (err < 0) 284 if (err < 0)
242 goto errout_free; 285 goto errout_free;
@@ -249,12 +292,35 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
249 292
250 fib_rule_get(rule); 293 fib_rule_get(rule);
251 294
295 if (ops->unresolved_rules) {
296 /*
297 * There are unresolved goto rules in the list, check if
298 * any of them are pointing to this new rule.
299 */
300 list_for_each_entry(r, ops->rules_list, list) {
301 if (r->action == FR_ACT_GOTO &&
302 r->target == rule->pref) {
303 BUG_ON(r->ctarget != NULL);
304 rcu_assign_pointer(r->ctarget, rule);
305 if (--ops->unresolved_rules == 0)
306 break;
307 }
308 }
309 }
310
311 if (rule->action == FR_ACT_GOTO)
312 ops->nr_goto_rules++;
313
314 if (unresolved)
315 ops->unresolved_rules++;
316
252 if (last) 317 if (last)
253 list_add_rcu(&rule->list, &last->list); 318 list_add_rcu(&rule->list, &last->list);
254 else 319 else
255 list_add_rcu(&rule->list, ops->rules_list); 320 list_add_rcu(&rule->list, ops->rules_list);
256 321
257 notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid); 322 notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid);
323 flush_route_cache(ops);
258 rules_ops_put(ops); 324 rules_ops_put(ops);
259 return 0; 325 return 0;
260 326
@@ -265,11 +331,11 @@ errout:
265 return err; 331 return err;
266} 332}
267 333
268int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 334static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
269{ 335{
270 struct fib_rule_hdr *frh = nlmsg_data(nlh); 336 struct fib_rule_hdr *frh = nlmsg_data(nlh);
271 struct fib_rules_ops *ops = NULL; 337 struct fib_rules_ops *ops = NULL;
272 struct fib_rule *rule; 338 struct fib_rule *rule, *tmp;
273 struct nlattr *tb[FRA_MAX+1]; 339 struct nlattr *tb[FRA_MAX+1];
274 int err = -EINVAL; 340 int err = -EINVAL;
275 341
@@ -322,10 +388,30 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
322 } 388 }
323 389
324 list_del_rcu(&rule->list); 390 list_del_rcu(&rule->list);
391
392 if (rule->action == FR_ACT_GOTO)
393 ops->nr_goto_rules--;
394
395 /*
396 * Check if this rule is a target to any of them. If so,
397 * disable them. As this operation is eventually very
398 * expensive, it is only performed if goto rules have
399 * actually been added.
400 */
401 if (ops->nr_goto_rules > 0) {
402 list_for_each_entry(tmp, ops->rules_list, list) {
403 if (tmp->ctarget == rule) {
404 rcu_assign_pointer(tmp->ctarget, NULL);
405 ops->unresolved_rules++;
406 }
407 }
408 }
409
325 synchronize_rcu(); 410 synchronize_rcu();
326 notify_rule_change(RTM_DELRULE, rule, ops, nlh, 411 notify_rule_change(RTM_DELRULE, rule, ops, nlh,
327 NETLINK_CB(skb).pid); 412 NETLINK_CB(skb).pid);
328 fib_rule_put(rule); 413 fib_rule_put(rule);
414 flush_route_cache(ops);
329 rules_ops_put(ops); 415 rules_ops_put(ops);
330 return 0; 416 return 0;
331 } 417 }
@@ -371,9 +457,16 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
371 frh->action = rule->action; 457 frh->action = rule->action;
372 frh->flags = rule->flags; 458 frh->flags = rule->flags;
373 459
374 if (rule->ifname[0]) 460 if (rule->action == FR_ACT_GOTO && rule->ctarget == NULL)
461 frh->flags |= FIB_RULE_UNRESOLVED;
462
463 if (rule->ifname[0]) {
375 NLA_PUT_STRING(skb, FRA_IFNAME, rule->ifname); 464 NLA_PUT_STRING(skb, FRA_IFNAME, rule->ifname);
376 465
466 if (rule->ifindex == -1)
467 frh->flags |= FIB_RULE_DEV_DETACHED;
468 }
469
377 if (rule->pref) 470 if (rule->pref)
378 NLA_PUT_U32(skb, FRA_PRIORITY, rule->pref); 471 NLA_PUT_U32(skb, FRA_PRIORITY, rule->pref);
379 472
@@ -383,6 +476,9 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
383 if (rule->mark_mask || rule->mark) 476 if (rule->mark_mask || rule->mark)
384 NLA_PUT_U32(skb, FRA_FWMASK, rule->mark_mask); 477 NLA_PUT_U32(skb, FRA_FWMASK, rule->mark_mask);
385 478
479 if (rule->target)
480 NLA_PUT_U32(skb, FRA_GOTO, rule->target);
481
386 if (ops->fill(rule, skb, nlh, frh) < 0) 482 if (ops->fill(rule, skb, nlh, frh) < 0)
387 goto nla_put_failure; 483 goto nla_put_failure;
388 484
@@ -393,19 +489,14 @@ nla_put_failure:
393 return -EMSGSIZE; 489 return -EMSGSIZE;
394} 490}
395 491
396int fib_rules_dump(struct sk_buff *skb, struct netlink_callback *cb, int family) 492static int dump_rules(struct sk_buff *skb, struct netlink_callback *cb,
493 struct fib_rules_ops *ops)
397{ 494{
398 int idx = 0; 495 int idx = 0;
399 struct fib_rule *rule; 496 struct fib_rule *rule;
400 struct fib_rules_ops *ops;
401
402 ops = lookup_rules_ops(family);
403 if (ops == NULL)
404 return -EAFNOSUPPORT;
405 497
406 rcu_read_lock(); 498 list_for_each_entry(rule, ops->rules_list, list) {
407 list_for_each_entry_rcu(rule, ops->rules_list, list) { 499 if (idx < cb->args[1])
408 if (idx < cb->args[0])
409 goto skip; 500 goto skip;
410 501
411 if (fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).pid, 502 if (fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).pid,
@@ -415,14 +506,44 @@ int fib_rules_dump(struct sk_buff *skb, struct netlink_callback *cb, int family)
415skip: 506skip:
416 idx++; 507 idx++;
417 } 508 }
418 rcu_read_unlock(); 509 cb->args[1] = idx;
419 cb->args[0] = idx;
420 rules_ops_put(ops); 510 rules_ops_put(ops);
421 511
422 return skb->len; 512 return skb->len;
423} 513}
424 514
425EXPORT_SYMBOL_GPL(fib_rules_dump); 515static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb)
516{
517 struct fib_rules_ops *ops;
518 int idx = 0, family;
519
520 family = rtnl_msg_family(cb->nlh);
521 if (family != AF_UNSPEC) {
522 /* Protocol specific dump request */
523 ops = lookup_rules_ops(family);
524 if (ops == NULL)
525 return -EAFNOSUPPORT;
526
527 return dump_rules(skb, cb, ops);
528 }
529
530 rcu_read_lock();
531 list_for_each_entry_rcu(ops, &rules_ops, list) {
532 if (idx < cb->args[0] || !try_module_get(ops->owner))
533 goto skip;
534
535 if (dump_rules(skb, cb, ops) < 0)
536 break;
537
538 cb->args[1] = 0;
539 skip:
540 idx++;
541 }
542 rcu_read_unlock();
543 cb->args[0] = idx;
544
545 return skb->len;
546}
426 547
427static void notify_rule_change(int event, struct fib_rule *rule, 548static void notify_rule_change(int event, struct fib_rule *rule,
428 struct fib_rules_ops *ops, struct nlmsghdr *nlh, 549 struct fib_rules_ops *ops, struct nlmsghdr *nlh,
@@ -501,6 +622,10 @@ static struct notifier_block fib_rules_notifier = {
501 622
502static int __init fib_rules_init(void) 623static int __init fib_rules_init(void)
503{ 624{
625 rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL);
626 rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL);
627 rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule);
628
504 return register_netdevice_notifier(&fib_rules_notifier); 629 return register_netdevice_notifier(&fib_rules_notifier);
505} 630}
506 631
diff --git a/net/core/filter.c b/net/core/filter.c
index 8d185a089c53..bd903aaf7aa7 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -42,11 +42,11 @@ static void *__load_pointer(struct sk_buff *skb, int k)
42 u8 *ptr = NULL; 42 u8 *ptr = NULL;
43 43
44 if (k >= SKF_NET_OFF) 44 if (k >= SKF_NET_OFF)
45 ptr = skb->nh.raw + k - SKF_NET_OFF; 45 ptr = skb_network_header(skb) + k - SKF_NET_OFF;
46 else if (k >= SKF_LL_OFF) 46 else if (k >= SKF_LL_OFF)
47 ptr = skb->mac.raw + k - SKF_LL_OFF; 47 ptr = skb_mac_header(skb) + k - SKF_LL_OFF;
48 48
49 if (ptr >= skb->head && ptr < skb->tail) 49 if (ptr >= skb->head && ptr < skb_tail_pointer(skb))
50 return ptr; 50 return ptr;
51 return NULL; 51 return NULL;
52} 52}
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index 259473d0559d..bcc25591d8ac 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -61,7 +61,7 @@ gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int tc_stats_type,
61 spin_lock_bh(lock); 61 spin_lock_bh(lock);
62 d->lock = lock; 62 d->lock = lock;
63 if (type) 63 if (type)
64 d->tail = (struct rtattr *) skb->tail; 64 d->tail = (struct rtattr *)skb_tail_pointer(skb);
65 d->skb = skb; 65 d->skb = skb;
66 d->compat_tc_stats = tc_stats_type; 66 d->compat_tc_stats = tc_stats_type;
67 d->compat_xstats = xstats_type; 67 d->compat_xstats = xstats_type;
@@ -212,7 +212,7 @@ int
212gnet_stats_finish_copy(struct gnet_dump *d) 212gnet_stats_finish_copy(struct gnet_dump *d)
213{ 213{
214 if (d->tail) 214 if (d->tail)
215 d->tail->rta_len = d->skb->tail - (u8 *) d->tail; 215 d->tail->rta_len = skb_tail_pointer(d->skb) - (u8 *)d->tail;
216 216
217 if (d->compat_tc_stats) 217 if (d->compat_tc_stats)
218 if (gnet_stats_copy(d, d->compat_tc_stats, &d->tc_stats, 218 if (gnet_stats_copy(d, d->compat_tc_stats, &d->tc_stats,
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index 8b45c9d3b249..e3c26a9ccad6 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -79,7 +79,7 @@ static void rfc2863_policy(struct net_device *dev)
79 case IF_LINK_MODE_DEFAULT: 79 case IF_LINK_MODE_DEFAULT:
80 default: 80 default:
81 break; 81 break;
82 }; 82 }
83 83
84 dev->operstate = operstate; 84 dev->operstate = operstate;
85 85
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 841e3f32cab1..6f3bb73053c2 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1125,7 +1125,7 @@ int neigh_compat_output(struct sk_buff *skb)
1125{ 1125{
1126 struct net_device *dev = skb->dev; 1126 struct net_device *dev = skb->dev;
1127 1127
1128 __skb_pull(skb, skb->nh.raw - skb->data); 1128 __skb_pull(skb, skb_network_offset(skb));
1129 1129
1130 if (dev->hard_header && 1130 if (dev->hard_header &&
1131 dev->hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL, 1131 dev->hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
@@ -1147,7 +1147,7 @@ int neigh_resolve_output(struct sk_buff *skb)
1147 if (!dst || !(neigh = dst->neighbour)) 1147 if (!dst || !(neigh = dst->neighbour))
1148 goto discard; 1148 goto discard;
1149 1149
1150 __skb_pull(skb, skb->nh.raw - skb->data); 1150 __skb_pull(skb, skb_network_offset(skb));
1151 1151
1152 if (!neigh_event_send(neigh, skb)) { 1152 if (!neigh_event_send(neigh, skb)) {
1153 int err; 1153 int err;
@@ -1190,7 +1190,7 @@ int neigh_connected_output(struct sk_buff *skb)
1190 struct neighbour *neigh = dst->neighbour; 1190 struct neighbour *neigh = dst->neighbour;
1191 struct net_device *dev = neigh->dev; 1191 struct net_device *dev = neigh->dev;
1192 1192
1193 __skb_pull(skb, skb->nh.raw - skb->data); 1193 __skb_pull(skb, skb_network_offset(skb));
1194 1194
1195 read_lock_bh(&neigh->lock); 1195 read_lock_bh(&neigh->lock);
1196 err = dev->hard_header(skb, dev, ntohs(skb->protocol), 1196 err = dev->hard_header(skb, dev, ntohs(skb->protocol),
@@ -1441,7 +1441,7 @@ int neigh_table_clear(struct neigh_table *tbl)
1441 return 0; 1441 return 0;
1442} 1442}
1443 1443
1444int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 1444static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1445{ 1445{
1446 struct ndmsg *ndm; 1446 struct ndmsg *ndm;
1447 struct nlattr *dst_attr; 1447 struct nlattr *dst_attr;
@@ -1506,7 +1506,7 @@ out:
1506 return err; 1506 return err;
1507} 1507}
1508 1508
1509int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 1509static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1510{ 1510{
1511 struct ndmsg *ndm; 1511 struct ndmsg *ndm;
1512 struct nlattr *tb[NDA_MAX+1]; 1512 struct nlattr *tb[NDA_MAX+1];
@@ -1786,7 +1786,7 @@ static struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] __read_mostly = {
1786 [NDTPA_LOCKTIME] = { .type = NLA_U64 }, 1786 [NDTPA_LOCKTIME] = { .type = NLA_U64 },
1787}; 1787};
1788 1788
1789int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 1789static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1790{ 1790{
1791 struct neigh_table *tbl; 1791 struct neigh_table *tbl;
1792 struct ndtmsg *ndtmsg; 1792 struct ndtmsg *ndtmsg;
@@ -1910,7 +1910,7 @@ errout:
1910 return err; 1910 return err;
1911} 1911}
1912 1912
1913int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb) 1913static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
1914{ 1914{
1915 int family, tidx, nidx = 0; 1915 int family, tidx, nidx = 0;
1916 int tbl_skip = cb->args[0]; 1916 int tbl_skip = cb->args[0];
@@ -2034,7 +2034,7 @@ out:
2034 return rc; 2034 return rc;
2035} 2035}
2036 2036
2037int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb) 2037static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2038{ 2038{
2039 struct neigh_table *tbl; 2039 struct neigh_table *tbl;
2040 int t, family, s_t; 2040 int t, family, s_t;
@@ -2393,7 +2393,7 @@ static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2393 return 0; 2393 return 0;
2394} 2394}
2395 2395
2396static struct seq_operations neigh_stat_seq_ops = { 2396static const struct seq_operations neigh_stat_seq_ops = {
2397 .start = neigh_stat_seq_start, 2397 .start = neigh_stat_seq_start,
2398 .next = neigh_stat_seq_next, 2398 .next = neigh_stat_seq_next,
2399 .stop = neigh_stat_seq_stop, 2399 .stop = neigh_stat_seq_stop,
@@ -2746,14 +2746,26 @@ void neigh_sysctl_unregister(struct neigh_parms *p)
2746 2746
2747#endif /* CONFIG_SYSCTL */ 2747#endif /* CONFIG_SYSCTL */
2748 2748
2749static int __init neigh_init(void)
2750{
2751 rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL);
2752 rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL);
2753 rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info);
2754
2755 rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info);
2756 rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL);
2757
2758 return 0;
2759}
2760
2761subsys_initcall(neigh_init);
2762
2749EXPORT_SYMBOL(__neigh_event_send); 2763EXPORT_SYMBOL(__neigh_event_send);
2750EXPORT_SYMBOL(neigh_changeaddr); 2764EXPORT_SYMBOL(neigh_changeaddr);
2751EXPORT_SYMBOL(neigh_compat_output); 2765EXPORT_SYMBOL(neigh_compat_output);
2752EXPORT_SYMBOL(neigh_connected_output); 2766EXPORT_SYMBOL(neigh_connected_output);
2753EXPORT_SYMBOL(neigh_create); 2767EXPORT_SYMBOL(neigh_create);
2754EXPORT_SYMBOL(neigh_delete);
2755EXPORT_SYMBOL(neigh_destroy); 2768EXPORT_SYMBOL(neigh_destroy);
2756EXPORT_SYMBOL(neigh_dump_info);
2757EXPORT_SYMBOL(neigh_event_ns); 2769EXPORT_SYMBOL(neigh_event_ns);
2758EXPORT_SYMBOL(neigh_ifdown); 2770EXPORT_SYMBOL(neigh_ifdown);
2759EXPORT_SYMBOL(neigh_lookup); 2771EXPORT_SYMBOL(neigh_lookup);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 4cbb1290a6a3..221a64ab64f7 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -352,8 +352,8 @@ static ssize_t wireless_show(struct device *d, char *buf,
352 352
353 read_lock(&dev_base_lock); 353 read_lock(&dev_base_lock);
354 if (dev_isalive(dev)) { 354 if (dev_isalive(dev)) {
355 if(dev->wireless_handlers && 355 if (dev->wireless_handlers &&
356 dev->wireless_handlers->get_wireless_stats) 356 dev->wireless_handlers->get_wireless_stats)
357 iw = dev->wireless_handlers->get_wireless_stats(dev); 357 iw = dev->wireless_handlers->get_wireless_stats(dev);
358 if (iw != NULL) 358 if (iw != NULL)
359 ret = (*format)(iw, buf); 359 ret = (*format)(iw, buf);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 4581ece48bb2..b316435b0e2a 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -86,7 +86,7 @@ static __sum16 checksum_udp(struct sk_buff *skb, struct udphdr *uh,
86{ 86{
87 __wsum psum; 87 __wsum psum;
88 88
89 if (uh->check == 0 || skb->ip_summed == CHECKSUM_UNNECESSARY) 89 if (uh->check == 0 || skb_csum_unnecessary(skb))
90 return 0; 90 return 0;
91 91
92 psum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0); 92 psum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);
@@ -293,10 +293,12 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
293 if (!skb) 293 if (!skb)
294 return; 294 return;
295 295
296 memcpy(skb->data, msg, len); 296 skb_copy_to_linear_data(skb, msg, len);
297 skb->len += len; 297 skb->len += len;
298 298
299 skb->h.uh = udph = (struct udphdr *) skb_push(skb, sizeof(*udph)); 299 skb_push(skb, sizeof(*udph));
300 skb_reset_transport_header(skb);
301 udph = udp_hdr(skb);
300 udph->source = htons(np->local_port); 302 udph->source = htons(np->local_port);
301 udph->dest = htons(np->remote_port); 303 udph->dest = htons(np->remote_port);
302 udph->len = htons(udp_len); 304 udph->len = htons(udp_len);
@@ -308,7 +310,9 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
308 if (udph->check == 0) 310 if (udph->check == 0)
309 udph->check = CSUM_MANGLED_0; 311 udph->check = CSUM_MANGLED_0;
310 312
311 skb->nh.iph = iph = (struct iphdr *)skb_push(skb, sizeof(*iph)); 313 skb_push(skb, sizeof(*iph));
314 skb_reset_network_header(skb);
315 iph = ip_hdr(skb);
312 316
313 /* iph->version = 4; iph->ihl = 5; */ 317 /* iph->version = 4; iph->ihl = 5; */
314 put_unaligned(0x45, (unsigned char *)iph); 318 put_unaligned(0x45, (unsigned char *)iph);
@@ -324,7 +328,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
324 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); 328 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
325 329
326 eth = (struct ethhdr *) skb_push(skb, ETH_HLEN); 330 eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
327 skb->mac.raw = skb->data; 331 skb_reset_mac_header(skb);
328 skb->protocol = eth->h_proto = htons(ETH_P_IP); 332 skb->protocol = eth->h_proto = htons(ETH_P_IP);
329 memcpy(eth->h_source, np->local_mac, 6); 333 memcpy(eth->h_source, np->local_mac, 6);
330 memcpy(eth->h_dest, np->remote_mac, 6); 334 memcpy(eth->h_dest, np->remote_mac, 6);
@@ -359,8 +363,9 @@ static void arp_reply(struct sk_buff *skb)
359 (2 * sizeof(u32))))) 363 (2 * sizeof(u32)))))
360 return; 364 return;
361 365
362 skb->h.raw = skb->nh.raw = skb->data; 366 skb_reset_network_header(skb);
363 arp = skb->nh.arph; 367 skb_reset_transport_header(skb);
368 arp = arp_hdr(skb);
364 369
365 if ((arp->ar_hrd != htons(ARPHRD_ETHER) && 370 if ((arp->ar_hrd != htons(ARPHRD_ETHER) &&
366 arp->ar_hrd != htons(ARPHRD_IEEE802)) || 371 arp->ar_hrd != htons(ARPHRD_IEEE802)) ||
@@ -389,7 +394,7 @@ static void arp_reply(struct sk_buff *skb)
389 if (!send_skb) 394 if (!send_skb)
390 return; 395 return;
391 396
392 send_skb->nh.raw = send_skb->data; 397 skb_reset_network_header(send_skb);
393 arp = (struct arphdr *) skb_put(send_skb, size); 398 arp = (struct arphdr *) skb_put(send_skb, size);
394 send_skb->dev = skb->dev; 399 send_skb->dev = skb->dev;
395 send_skb->protocol = htons(ETH_P_ARP); 400 send_skb->protocol = htons(ETH_P_ARP);
@@ -443,7 +448,7 @@ int __netpoll_rx(struct sk_buff *skb)
443 goto out; 448 goto out;
444 449
445 /* check if netpoll clients need ARP */ 450 /* check if netpoll clients need ARP */
446 if (skb->protocol == __constant_htons(ETH_P_ARP) && 451 if (skb->protocol == htons(ETH_P_ARP) &&
447 atomic_read(&trapped)) { 452 atomic_read(&trapped)) {
448 skb_queue_tail(&npi->arp_tx, skb); 453 skb_queue_tail(&npi->arp_tx, skb);
449 return 1; 454 return 1;
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 4b01496dc33d..b92a322872a8 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -164,14 +164,11 @@
164 164
165#define VERSION "pktgen v2.68: Packet Generator for packet performance testing.\n" 165#define VERSION "pktgen v2.68: Packet Generator for packet performance testing.\n"
166 166
167/* #define PG_DEBUG(a) a */
168#define PG_DEBUG(a)
169
170/* The buckets are exponential in 'width' */ 167/* The buckets are exponential in 'width' */
171#define LAT_BUCKETS_MAX 32 168#define LAT_BUCKETS_MAX 32
172#define IP_NAME_SZ 32 169#define IP_NAME_SZ 32
173#define MAX_MPLS_LABELS 16 /* This is the max label stack depth */ 170#define MAX_MPLS_LABELS 16 /* This is the max label stack depth */
174#define MPLS_STACK_BOTTOM __constant_htonl(0x00000100) 171#define MPLS_STACK_BOTTOM htonl(0x00000100)
175 172
176/* Device flag bits */ 173/* Device flag bits */
177#define F_IPSRC_RND (1<<0) /* IP-Src Random */ 174#define F_IPSRC_RND (1<<0) /* IP-Src Random */
@@ -214,15 +211,11 @@ struct flow_state {
214}; 211};
215 212
216struct pktgen_dev { 213struct pktgen_dev {
217
218 /* 214 /*
219 * Try to keep frequent/infrequent used vars. separated. 215 * Try to keep frequent/infrequent used vars. separated.
220 */ 216 */
221 217 struct proc_dir_entry *entry; /* proc file */
222 char ifname[IFNAMSIZ]; 218 struct pktgen_thread *pg_thread;/* the owner */
223 char result[512];
224
225 struct pktgen_thread *pg_thread; /* the owner */
226 struct list_head list; /* Used for chaining in the thread's run-queue */ 219 struct list_head list; /* Used for chaining in the thread's run-queue */
227 220
228 int running; /* if this changes to false, the test will stop */ 221 int running; /* if this changes to false, the test will stop */
@@ -349,6 +342,8 @@ struct pktgen_dev {
349 unsigned cflows; /* Concurrent flows (config) */ 342 unsigned cflows; /* Concurrent flows (config) */
350 unsigned lflow; /* Flow length (config) */ 343 unsigned lflow; /* Flow length (config) */
351 unsigned nflows; /* accumulated flows (stats) */ 344 unsigned nflows; /* accumulated flows (stats) */
345
346 char result[512];
352}; 347};
353 348
354struct pktgen_hdr { 349struct pktgen_hdr {
@@ -468,17 +463,6 @@ static inline __u64 pg_div64(__u64 n, __u64 base)
468 return tmp; 463 return tmp;
469} 464}
470 465
471static inline u32 pktgen_random(void)
472{
473#if 0
474 __u32 n;
475 get_random_bytes(&n, 4);
476 return n;
477#else
478 return net_random();
479#endif
480}
481
482static inline __u64 getCurMs(void) 466static inline __u64 getCurMs(void)
483{ 467{
484 struct timeval tv; 468 struct timeval tv;
@@ -512,7 +496,7 @@ static void pktgen_stop_all_threads_ifs(void);
512static int pktgen_stop_device(struct pktgen_dev *pkt_dev); 496static int pktgen_stop_device(struct pktgen_dev *pkt_dev);
513static void pktgen_stop(struct pktgen_thread *t); 497static void pktgen_stop(struct pktgen_thread *t);
514static void pktgen_clear_counters(struct pktgen_dev *pkt_dev); 498static void pktgen_clear_counters(struct pktgen_dev *pkt_dev);
515static int pktgen_mark_device(const char *ifname); 499
516static unsigned int scan_ip6(const char *s, char ip[16]); 500static unsigned int scan_ip6(const char *s, char ip[16]);
517static unsigned int fmt_ip6(char *s, const char ip[16]); 501static unsigned int fmt_ip6(char *s, const char ip[16]);
518 502
@@ -606,7 +590,7 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
606 " frags: %d delay: %u clone_skb: %d ifname: %s\n", 590 " frags: %d delay: %u clone_skb: %d ifname: %s\n",
607 pkt_dev->nfrags, 591 pkt_dev->nfrags,
608 1000 * pkt_dev->delay_us + pkt_dev->delay_ns, 592 1000 * pkt_dev->delay_us + pkt_dev->delay_ns,
609 pkt_dev->clone_skb, pkt_dev->ifname); 593 pkt_dev->clone_skb, pkt_dev->odev->name);
610 594
611 seq_printf(seq, " flows: %u flowlen: %u\n", pkt_dev->cflows, 595 seq_printf(seq, " flows: %u flowlen: %u\n", pkt_dev->cflows,
612 pkt_dev->lflow); 596 pkt_dev->lflow);
@@ -661,7 +645,7 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
661 if (pkt_dev->nr_labels) { 645 if (pkt_dev->nr_labels) {
662 unsigned i; 646 unsigned i;
663 seq_printf(seq, " mpls: "); 647 seq_printf(seq, " mpls: ");
664 for(i = 0; i < pkt_dev->nr_labels; i++) 648 for (i = 0; i < pkt_dev->nr_labels; i++)
665 seq_printf(seq, "%08x%s", ntohl(pkt_dev->labels[i]), 649 seq_printf(seq, "%08x%s", ntohl(pkt_dev->labels[i]),
666 i == pkt_dev->nr_labels-1 ? "\n" : ", "); 650 i == pkt_dev->nr_labels-1 ? "\n" : ", ");
667 } 651 }
@@ -766,7 +750,7 @@ static int hex32_arg(const char __user *user_buffer, unsigned long maxlen, __u32
766 int i = 0; 750 int i = 0;
767 *num = 0; 751 *num = 0;
768 752
769 for(; i < maxlen; i++) { 753 for (; i < maxlen; i++) {
770 char c; 754 char c;
771 *num <<= 4; 755 *num <<= 4;
772 if (get_user(c, &user_buffer[i])) 756 if (get_user(c, &user_buffer[i]))
@@ -802,7 +786,7 @@ static int count_trail_chars(const char __user * user_buffer,
802 break; 786 break;
803 default: 787 default:
804 goto done; 788 goto done;
805 }; 789 }
806 } 790 }
807done: 791done:
808 return i; 792 return i;
@@ -845,7 +829,7 @@ static int strn_len(const char __user * user_buffer, unsigned int maxlen)
845 break; 829 break;
846 default: 830 default:
847 break; 831 break;
848 }; 832 }
849 } 833 }
850done_str: 834done_str:
851 return i; 835 return i;
@@ -874,7 +858,7 @@ static ssize_t get_labels(const char __user *buffer, struct pktgen_dev *pkt_dev)
874 n++; 858 n++;
875 if (n >= MAX_MPLS_LABELS) 859 if (n >= MAX_MPLS_LABELS)
876 return -E2BIG; 860 return -E2BIG;
877 } while(c == ','); 861 } while (c == ',');
878 862
879 pkt_dev->nr_labels = n; 863 pkt_dev->nr_labels = n;
880 return i; 864 return i;
@@ -1503,7 +1487,7 @@ static ssize_t pktgen_if_write(struct file *file,
1503 if (len < 0) { return len; } 1487 if (len < 0) { return len; }
1504 i += len; 1488 i += len;
1505 offset = sprintf(pg_result, "OK: mpls="); 1489 offset = sprintf(pg_result, "OK: mpls=");
1506 for(n = 0; n < pkt_dev->nr_labels; n++) 1490 for (n = 0; n < pkt_dev->nr_labels; n++)
1507 offset += sprintf(pg_result + offset, 1491 offset += sprintf(pg_result + offset,
1508 "%08x%s", ntohl(pkt_dev->labels[n]), 1492 "%08x%s", ntohl(pkt_dev->labels[n]),
1509 n == pkt_dev->nr_labels-1 ? "" : ","); 1493 n == pkt_dev->nr_labels-1 ? "" : ",");
@@ -1697,13 +1681,13 @@ static int pktgen_thread_show(struct seq_file *seq, void *v)
1697 if_lock(t); 1681 if_lock(t);
1698 list_for_each_entry(pkt_dev, &t->if_list, list) 1682 list_for_each_entry(pkt_dev, &t->if_list, list)
1699 if (pkt_dev->running) 1683 if (pkt_dev->running)
1700 seq_printf(seq, "%s ", pkt_dev->ifname); 1684 seq_printf(seq, "%s ", pkt_dev->odev->name);
1701 1685
1702 seq_printf(seq, "\nStopped: "); 1686 seq_printf(seq, "\nStopped: ");
1703 1687
1704 list_for_each_entry(pkt_dev, &t->if_list, list) 1688 list_for_each_entry(pkt_dev, &t->if_list, list)
1705 if (!pkt_dev->running) 1689 if (!pkt_dev->running)
1706 seq_printf(seq, "%s ", pkt_dev->ifname); 1690 seq_printf(seq, "%s ", pkt_dev->odev->name);
1707 1691
1708 if (t->result[0]) 1692 if (t->result[0])
1709 seq_printf(seq, "\nResult: %s\n", t->result); 1693 seq_printf(seq, "\nResult: %s\n", t->result);
@@ -1849,16 +1833,14 @@ static struct pktgen_dev *__pktgen_NN_threads(const char *ifname, int remove)
1849/* 1833/*
1850 * mark a device for removal 1834 * mark a device for removal
1851 */ 1835 */
1852static int pktgen_mark_device(const char *ifname) 1836static void pktgen_mark_device(const char *ifname)
1853{ 1837{
1854 struct pktgen_dev *pkt_dev = NULL; 1838 struct pktgen_dev *pkt_dev = NULL;
1855 const int max_tries = 10, msec_per_try = 125; 1839 const int max_tries = 10, msec_per_try = 125;
1856 int i = 0; 1840 int i = 0;
1857 int ret = 0;
1858 1841
1859 mutex_lock(&pktgen_thread_lock); 1842 mutex_lock(&pktgen_thread_lock);
1860 PG_DEBUG(printk("pktgen: pktgen_mark_device marking %s for removal\n", 1843 pr_debug("pktgen: pktgen_mark_device marking %s for removal\n", ifname);
1861 ifname));
1862 1844
1863 while (1) { 1845 while (1) {
1864 1846
@@ -1867,8 +1849,8 @@ static int pktgen_mark_device(const char *ifname)
1867 break; /* success */ 1849 break; /* success */
1868 1850
1869 mutex_unlock(&pktgen_thread_lock); 1851 mutex_unlock(&pktgen_thread_lock);
1870 PG_DEBUG(printk("pktgen: pktgen_mark_device waiting for %s " 1852 pr_debug("pktgen: pktgen_mark_device waiting for %s "
1871 "to disappear....\n", ifname)); 1853 "to disappear....\n", ifname);
1872 schedule_timeout_interruptible(msecs_to_jiffies(msec_per_try)); 1854 schedule_timeout_interruptible(msecs_to_jiffies(msec_per_try));
1873 mutex_lock(&pktgen_thread_lock); 1855 mutex_lock(&pktgen_thread_lock);
1874 1856
@@ -1876,79 +1858,91 @@ static int pktgen_mark_device(const char *ifname)
1876 printk("pktgen_mark_device: timed out after waiting " 1858 printk("pktgen_mark_device: timed out after waiting "
1877 "%d msec for device %s to be removed\n", 1859 "%d msec for device %s to be removed\n",
1878 msec_per_try * i, ifname); 1860 msec_per_try * i, ifname);
1879 ret = 1;
1880 break; 1861 break;
1881 } 1862 }
1882 1863
1883 } 1864 }
1884 1865
1885 mutex_unlock(&pktgen_thread_lock); 1866 mutex_unlock(&pktgen_thread_lock);
1867}
1886 1868
1887 return ret; 1869static void pktgen_change_name(struct net_device *dev)
1870{
1871 struct pktgen_thread *t;
1872
1873 list_for_each_entry(t, &pktgen_threads, th_list) {
1874 struct pktgen_dev *pkt_dev;
1875
1876 list_for_each_entry(pkt_dev, &t->if_list, list) {
1877 if (pkt_dev->odev != dev)
1878 continue;
1879
1880 remove_proc_entry(pkt_dev->entry->name, pg_proc_dir);
1881
1882 pkt_dev->entry = create_proc_entry(dev->name, 0600,
1883 pg_proc_dir);
1884 if (!pkt_dev->entry)
1885 printk(KERN_ERR "pktgen: can't move proc "
1886 " entry for '%s'\n", dev->name);
1887 break;
1888 }
1889 }
1888} 1890}
1889 1891
1890static int pktgen_device_event(struct notifier_block *unused, 1892static int pktgen_device_event(struct notifier_block *unused,
1891 unsigned long event, void *ptr) 1893 unsigned long event, void *ptr)
1892{ 1894{
1893 struct net_device *dev = (struct net_device *)(ptr); 1895 struct net_device *dev = ptr;
1894 1896
1895 /* It is OK that we do not hold the group lock right now, 1897 /* It is OK that we do not hold the group lock right now,
1896 * as we run under the RTNL lock. 1898 * as we run under the RTNL lock.
1897 */ 1899 */
1898 1900
1899 switch (event) { 1901 switch (event) {
1900 case NETDEV_CHANGEADDR: 1902 case NETDEV_CHANGENAME:
1901 case NETDEV_GOING_DOWN: 1903 pktgen_change_name(dev);
1902 case NETDEV_DOWN:
1903 case NETDEV_UP:
1904 /* Ignore for now */
1905 break; 1904 break;
1906 1905
1907 case NETDEV_UNREGISTER: 1906 case NETDEV_UNREGISTER:
1908 pktgen_mark_device(dev->name); 1907 pktgen_mark_device(dev->name);
1909 break; 1908 break;
1910 }; 1909 }
1911 1910
1912 return NOTIFY_DONE; 1911 return NOTIFY_DONE;
1913} 1912}
1914 1913
1915/* Associate pktgen_dev with a device. */ 1914/* Associate pktgen_dev with a device. */
1916 1915
1917static struct net_device *pktgen_setup_dev(struct pktgen_dev *pkt_dev) 1916static int pktgen_setup_dev(struct pktgen_dev *pkt_dev, const char *ifname)
1918{ 1917{
1919 struct net_device *odev; 1918 struct net_device *odev;
1919 int err;
1920 1920
1921 /* Clean old setups */ 1921 /* Clean old setups */
1922
1923 if (pkt_dev->odev) { 1922 if (pkt_dev->odev) {
1924 dev_put(pkt_dev->odev); 1923 dev_put(pkt_dev->odev);
1925 pkt_dev->odev = NULL; 1924 pkt_dev->odev = NULL;
1926 } 1925 }
1927 1926
1928 odev = dev_get_by_name(pkt_dev->ifname); 1927 odev = dev_get_by_name(ifname);
1929
1930 if (!odev) { 1928 if (!odev) {
1931 printk("pktgen: no such netdevice: \"%s\"\n", pkt_dev->ifname); 1929 printk("pktgen: no such netdevice: \"%s\"\n", ifname);
1932 goto out; 1930 return -ENODEV;
1933 } 1931 }
1932
1934 if (odev->type != ARPHRD_ETHER) { 1933 if (odev->type != ARPHRD_ETHER) {
1935 printk("pktgen: not an ethernet device: \"%s\"\n", 1934 printk("pktgen: not an ethernet device: \"%s\"\n", ifname);
1936 pkt_dev->ifname); 1935 err = -EINVAL;
1937 goto out_put; 1936 } else if (!netif_running(odev)) {
1938 } 1937 printk("pktgen: device is down: \"%s\"\n", ifname);
1939 if (!netif_running(odev)) { 1938 err = -ENETDOWN;
1940 printk("pktgen: device is down: \"%s\"\n", pkt_dev->ifname); 1939 } else {
1941 goto out_put; 1940 pkt_dev->odev = odev;
1941 return 0;
1942 } 1942 }
1943 pkt_dev->odev = odev;
1944 1943
1945 return pkt_dev->odev;
1946
1947out_put:
1948 dev_put(odev); 1944 dev_put(odev);
1949out: 1945 return err;
1950 return NULL;
1951
1952} 1946}
1953 1947
1954/* Read pkt_dev from the interface and set up internal pktgen_dev 1948/* Read pkt_dev from the interface and set up internal pktgen_dev
@@ -1956,10 +1950,6 @@ out:
1956 */ 1950 */
1957static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) 1951static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
1958{ 1952{
1959 /* Try once more, just in case it works now. */
1960 if (!pkt_dev->odev)
1961 pktgen_setup_dev(pkt_dev);
1962
1963 if (!pkt_dev->odev) { 1953 if (!pkt_dev->odev) {
1964 printk("pktgen: ERROR: pkt_dev->odev == NULL in setup_inject.\n"); 1954 printk("pktgen: ERROR: pkt_dev->odev == NULL in setup_inject.\n");
1965 sprintf(pkt_dev->result, 1955 sprintf(pkt_dev->result,
@@ -2096,7 +2086,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
2096 int flow = 0; 2086 int flow = 0;
2097 2087
2098 if (pkt_dev->cflows) { 2088 if (pkt_dev->cflows) {
2099 flow = pktgen_random() % pkt_dev->cflows; 2089 flow = random32() % pkt_dev->cflows;
2100 2090
2101 if (pkt_dev->flows[flow].count > pkt_dev->lflow) 2091 if (pkt_dev->flows[flow].count > pkt_dev->lflow)
2102 pkt_dev->flows[flow].count = 0; 2092 pkt_dev->flows[flow].count = 0;
@@ -2108,7 +2098,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
2108 __u32 tmp; 2098 __u32 tmp;
2109 2099
2110 if (pkt_dev->flags & F_MACSRC_RND) 2100 if (pkt_dev->flags & F_MACSRC_RND)
2111 mc = pktgen_random() % (pkt_dev->src_mac_count); 2101 mc = random32() % pkt_dev->src_mac_count;
2112 else { 2102 else {
2113 mc = pkt_dev->cur_src_mac_offset++; 2103 mc = pkt_dev->cur_src_mac_offset++;
2114 if (pkt_dev->cur_src_mac_offset > 2104 if (pkt_dev->cur_src_mac_offset >
@@ -2134,7 +2124,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
2134 __u32 tmp; 2124 __u32 tmp;
2135 2125
2136 if (pkt_dev->flags & F_MACDST_RND) 2126 if (pkt_dev->flags & F_MACDST_RND)
2137 mc = pktgen_random() % (pkt_dev->dst_mac_count); 2127 mc = random32() % pkt_dev->dst_mac_count;
2138 2128
2139 else { 2129 else {
2140 mc = pkt_dev->cur_dst_mac_offset++; 2130 mc = pkt_dev->cur_dst_mac_offset++;
@@ -2158,27 +2148,26 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
2158 2148
2159 if (pkt_dev->flags & F_MPLS_RND) { 2149 if (pkt_dev->flags & F_MPLS_RND) {
2160 unsigned i; 2150 unsigned i;
2161 for(i = 0; i < pkt_dev->nr_labels; i++) 2151 for (i = 0; i < pkt_dev->nr_labels; i++)
2162 if (pkt_dev->labels[i] & MPLS_STACK_BOTTOM) 2152 if (pkt_dev->labels[i] & MPLS_STACK_BOTTOM)
2163 pkt_dev->labels[i] = MPLS_STACK_BOTTOM | 2153 pkt_dev->labels[i] = MPLS_STACK_BOTTOM |
2164 ((__force __be32)pktgen_random() & 2154 ((__force __be32)random32() &
2165 htonl(0x000fffff)); 2155 htonl(0x000fffff));
2166 } 2156 }
2167 2157
2168 if ((pkt_dev->flags & F_VID_RND) && (pkt_dev->vlan_id != 0xffff)) { 2158 if ((pkt_dev->flags & F_VID_RND) && (pkt_dev->vlan_id != 0xffff)) {
2169 pkt_dev->vlan_id = pktgen_random() % 4096; 2159 pkt_dev->vlan_id = random32() & (4096-1);
2170 } 2160 }
2171 2161
2172 if ((pkt_dev->flags & F_SVID_RND) && (pkt_dev->svlan_id != 0xffff)) { 2162 if ((pkt_dev->flags & F_SVID_RND) && (pkt_dev->svlan_id != 0xffff)) {
2173 pkt_dev->svlan_id = pktgen_random() % 4096; 2163 pkt_dev->svlan_id = random32() & (4096 - 1);
2174 } 2164 }
2175 2165
2176 if (pkt_dev->udp_src_min < pkt_dev->udp_src_max) { 2166 if (pkt_dev->udp_src_min < pkt_dev->udp_src_max) {
2177 if (pkt_dev->flags & F_UDPSRC_RND) 2167 if (pkt_dev->flags & F_UDPSRC_RND)
2178 pkt_dev->cur_udp_src = 2168 pkt_dev->cur_udp_src = random32() %
2179 ((pktgen_random() % 2169 (pkt_dev->udp_src_max - pkt_dev->udp_src_min)
2180 (pkt_dev->udp_src_max - pkt_dev->udp_src_min)) + 2170 + pkt_dev->udp_src_min;
2181 pkt_dev->udp_src_min);
2182 2171
2183 else { 2172 else {
2184 pkt_dev->cur_udp_src++; 2173 pkt_dev->cur_udp_src++;
@@ -2189,10 +2178,9 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
2189 2178
2190 if (pkt_dev->udp_dst_min < pkt_dev->udp_dst_max) { 2179 if (pkt_dev->udp_dst_min < pkt_dev->udp_dst_max) {
2191 if (pkt_dev->flags & F_UDPDST_RND) { 2180 if (pkt_dev->flags & F_UDPDST_RND) {
2192 pkt_dev->cur_udp_dst = 2181 pkt_dev->cur_udp_dst = random32() %
2193 ((pktgen_random() % 2182 (pkt_dev->udp_dst_max - pkt_dev->udp_dst_min)
2194 (pkt_dev->udp_dst_max - pkt_dev->udp_dst_min)) + 2183 + pkt_dev->udp_dst_min;
2195 pkt_dev->udp_dst_min);
2196 } else { 2184 } else {
2197 pkt_dev->cur_udp_dst++; 2185 pkt_dev->cur_udp_dst++;
2198 if (pkt_dev->cur_udp_dst >= pkt_dev->udp_dst_max) 2186 if (pkt_dev->cur_udp_dst >= pkt_dev->udp_dst_max)
@@ -2207,7 +2195,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
2207 saddr_max))) { 2195 saddr_max))) {
2208 __u32 t; 2196 __u32 t;
2209 if (pkt_dev->flags & F_IPSRC_RND) 2197 if (pkt_dev->flags & F_IPSRC_RND)
2210 t = ((pktgen_random() % (imx - imn)) + imn); 2198 t = random32() % (imx - imn) + imn;
2211 else { 2199 else {
2212 t = ntohl(pkt_dev->cur_saddr); 2200 t = ntohl(pkt_dev->cur_saddr);
2213 t++; 2201 t++;
@@ -2228,14 +2216,13 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
2228 __be32 s; 2216 __be32 s;
2229 if (pkt_dev->flags & F_IPDST_RND) { 2217 if (pkt_dev->flags & F_IPDST_RND) {
2230 2218
2231 t = pktgen_random() % (imx - imn) + imn; 2219 t = random32() % (imx - imn) + imn;
2232 s = htonl(t); 2220 s = htonl(t);
2233 2221
2234 while (LOOPBACK(s) || MULTICAST(s) 2222 while (LOOPBACK(s) || MULTICAST(s)
2235 || BADCLASS(s) || ZERONET(s) 2223 || BADCLASS(s) || ZERONET(s)
2236 || LOCAL_MCAST(s)) { 2224 || LOCAL_MCAST(s)) {
2237 t = (pktgen_random() % 2225 t = random32() % (imx - imn) + imn;
2238 (imx - imn)) + imn;
2239 s = htonl(t); 2226 s = htonl(t);
2240 } 2227 }
2241 pkt_dev->cur_daddr = s; 2228 pkt_dev->cur_daddr = s;
@@ -2267,7 +2254,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
2267 2254
2268 for (i = 0; i < 4; i++) { 2255 for (i = 0; i < 4; i++) {
2269 pkt_dev->cur_in6_daddr.s6_addr32[i] = 2256 pkt_dev->cur_in6_daddr.s6_addr32[i] =
2270 (((__force __be32)pktgen_random() | 2257 (((__force __be32)random32() |
2271 pkt_dev->min_in6_daddr.s6_addr32[i]) & 2258 pkt_dev->min_in6_daddr.s6_addr32[i]) &
2272 pkt_dev->max_in6_daddr.s6_addr32[i]); 2259 pkt_dev->max_in6_daddr.s6_addr32[i]);
2273 } 2260 }
@@ -2277,9 +2264,9 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
2277 if (pkt_dev->min_pkt_size < pkt_dev->max_pkt_size) { 2264 if (pkt_dev->min_pkt_size < pkt_dev->max_pkt_size) {
2278 __u32 t; 2265 __u32 t;
2279 if (pkt_dev->flags & F_TXSIZE_RND) { 2266 if (pkt_dev->flags & F_TXSIZE_RND) {
2280 t = ((pktgen_random() % 2267 t = random32() %
2281 (pkt_dev->max_pkt_size - pkt_dev->min_pkt_size)) 2268 (pkt_dev->max_pkt_size - pkt_dev->min_pkt_size)
2282 + pkt_dev->min_pkt_size); 2269 + pkt_dev->min_pkt_size;
2283 } else { 2270 } else {
2284 t = pkt_dev->cur_pkt_size + 1; 2271 t = pkt_dev->cur_pkt_size + 1;
2285 if (t > pkt_dev->max_pkt_size) 2272 if (t > pkt_dev->max_pkt_size)
@@ -2294,7 +2281,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
2294static void mpls_push(__be32 *mpls, struct pktgen_dev *pkt_dev) 2281static void mpls_push(__be32 *mpls, struct pktgen_dev *pkt_dev)
2295{ 2282{
2296 unsigned i; 2283 unsigned i;
2297 for(i = 0; i < pkt_dev->nr_labels; i++) { 2284 for (i = 0; i < pkt_dev->nr_labels; i++) {
2298 *mpls++ = pkt_dev->labels[i] & ~MPLS_STACK_BOTTOM; 2285 *mpls++ = pkt_dev->labels[i] & ~MPLS_STACK_BOTTOM;
2299 } 2286 }
2300 mpls--; 2287 mpls--;
@@ -2316,7 +2303,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
2316 int datalen, iplen; 2303 int datalen, iplen;
2317 struct iphdr *iph; 2304 struct iphdr *iph;
2318 struct pktgen_hdr *pgh = NULL; 2305 struct pktgen_hdr *pgh = NULL;
2319 __be16 protocol = __constant_htons(ETH_P_IP); 2306 __be16 protocol = htons(ETH_P_IP);
2320 __be32 *mpls; 2307 __be32 *mpls;
2321 __be16 *vlan_tci = NULL; /* Encapsulates priority and VLAN ID */ 2308 __be16 *vlan_tci = NULL; /* Encapsulates priority and VLAN ID */
2322 __be16 *vlan_encapsulated_proto = NULL; /* packet type ID field (or len) for VLAN tag */ 2309 __be16 *vlan_encapsulated_proto = NULL; /* packet type ID field (or len) for VLAN tag */
@@ -2325,10 +2312,10 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
2325 2312
2326 2313
2327 if (pkt_dev->nr_labels) 2314 if (pkt_dev->nr_labels)
2328 protocol = __constant_htons(ETH_P_MPLS_UC); 2315 protocol = htons(ETH_P_MPLS_UC);
2329 2316
2330 if (pkt_dev->vlan_id != 0xffff) 2317 if (pkt_dev->vlan_id != 0xffff)
2331 protocol = __constant_htons(ETH_P_8021Q); 2318 protocol = htons(ETH_P_8021Q);
2332 2319
2333 /* Update any of the values, used when we're incrementing various 2320 /* Update any of the values, used when we're incrementing various
2334 * fields. 2321 * fields.
@@ -2354,24 +2341,28 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
2354 mpls_push(mpls, pkt_dev); 2341 mpls_push(mpls, pkt_dev);
2355 2342
2356 if (pkt_dev->vlan_id != 0xffff) { 2343 if (pkt_dev->vlan_id != 0xffff) {
2357 if(pkt_dev->svlan_id != 0xffff) { 2344 if (pkt_dev->svlan_id != 0xffff) {
2358 svlan_tci = (__be16 *)skb_put(skb, sizeof(__be16)); 2345 svlan_tci = (__be16 *)skb_put(skb, sizeof(__be16));
2359 *svlan_tci = build_tci(pkt_dev->svlan_id, 2346 *svlan_tci = build_tci(pkt_dev->svlan_id,
2360 pkt_dev->svlan_cfi, 2347 pkt_dev->svlan_cfi,
2361 pkt_dev->svlan_p); 2348 pkt_dev->svlan_p);
2362 svlan_encapsulated_proto = (__be16 *)skb_put(skb, sizeof(__be16)); 2349 svlan_encapsulated_proto = (__be16 *)skb_put(skb, sizeof(__be16));
2363 *svlan_encapsulated_proto = __constant_htons(ETH_P_8021Q); 2350 *svlan_encapsulated_proto = htons(ETH_P_8021Q);
2364 } 2351 }
2365 vlan_tci = (__be16 *)skb_put(skb, sizeof(__be16)); 2352 vlan_tci = (__be16 *)skb_put(skb, sizeof(__be16));
2366 *vlan_tci = build_tci(pkt_dev->vlan_id, 2353 *vlan_tci = build_tci(pkt_dev->vlan_id,
2367 pkt_dev->vlan_cfi, 2354 pkt_dev->vlan_cfi,
2368 pkt_dev->vlan_p); 2355 pkt_dev->vlan_p);
2369 vlan_encapsulated_proto = (__be16 *)skb_put(skb, sizeof(__be16)); 2356 vlan_encapsulated_proto = (__be16 *)skb_put(skb, sizeof(__be16));
2370 *vlan_encapsulated_proto = __constant_htons(ETH_P_IP); 2357 *vlan_encapsulated_proto = htons(ETH_P_IP);
2371 } 2358 }
2372 2359
2373 iph = (struct iphdr *)skb_put(skb, sizeof(struct iphdr)); 2360 skb->network_header = skb->tail;
2374 udph = (struct udphdr *)skb_put(skb, sizeof(struct udphdr)); 2361 skb->transport_header = skb->network_header + sizeof(struct iphdr);
2362 skb_put(skb, sizeof(struct iphdr) + sizeof(struct udphdr));
2363
2364 iph = ip_hdr(skb);
2365 udph = udp_hdr(skb);
2375 2366
2376 memcpy(eth, pkt_dev->hh, 12); 2367 memcpy(eth, pkt_dev->hh, 12);
2377 *(__be16 *) & eth[12] = protocol; 2368 *(__be16 *) & eth[12] = protocol;
@@ -2400,12 +2391,11 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
2400 iph->check = 0; 2391 iph->check = 0;
2401 iph->check = ip_fast_csum((void *)iph, iph->ihl); 2392 iph->check = ip_fast_csum((void *)iph, iph->ihl);
2402 skb->protocol = protocol; 2393 skb->protocol = protocol;
2403 skb->mac.raw = ((u8 *) iph) - 14 - pkt_dev->nr_labels*sizeof(u32) - 2394 skb->mac_header = (skb->network_header - ETH_HLEN -
2404 VLAN_TAG_SIZE(pkt_dev) - SVLAN_TAG_SIZE(pkt_dev); 2395 pkt_dev->nr_labels * sizeof(u32) -
2396 VLAN_TAG_SIZE(pkt_dev) - SVLAN_TAG_SIZE(pkt_dev));
2405 skb->dev = odev; 2397 skb->dev = odev;
2406 skb->pkt_type = PACKET_HOST; 2398 skb->pkt_type = PACKET_HOST;
2407 skb->nh.iph = iph;
2408 skb->h.uh = udph;
2409 2399
2410 if (pkt_dev->nfrags <= 0) 2400 if (pkt_dev->nfrags <= 0)
2411 pgh = (struct pktgen_hdr *)skb_put(skb, datalen); 2401 pgh = (struct pktgen_hdr *)skb_put(skb, datalen);
@@ -2654,7 +2644,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
2654 int datalen; 2644 int datalen;
2655 struct ipv6hdr *iph; 2645 struct ipv6hdr *iph;
2656 struct pktgen_hdr *pgh = NULL; 2646 struct pktgen_hdr *pgh = NULL;
2657 __be16 protocol = __constant_htons(ETH_P_IPV6); 2647 __be16 protocol = htons(ETH_P_IPV6);
2658 __be32 *mpls; 2648 __be32 *mpls;
2659 __be16 *vlan_tci = NULL; /* Encapsulates priority and VLAN ID */ 2649 __be16 *vlan_tci = NULL; /* Encapsulates priority and VLAN ID */
2660 __be16 *vlan_encapsulated_proto = NULL; /* packet type ID field (or len) for VLAN tag */ 2650 __be16 *vlan_encapsulated_proto = NULL; /* packet type ID field (or len) for VLAN tag */
@@ -2662,10 +2652,10 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
2662 __be16 *svlan_encapsulated_proto = NULL; /* packet type ID field (or len) for SVLAN tag */ 2652 __be16 *svlan_encapsulated_proto = NULL; /* packet type ID field (or len) for SVLAN tag */
2663 2653
2664 if (pkt_dev->nr_labels) 2654 if (pkt_dev->nr_labels)
2665 protocol = __constant_htons(ETH_P_MPLS_UC); 2655 protocol = htons(ETH_P_MPLS_UC);
2666 2656
2667 if (pkt_dev->vlan_id != 0xffff) 2657 if (pkt_dev->vlan_id != 0xffff)
2668 protocol = __constant_htons(ETH_P_8021Q); 2658 protocol = htons(ETH_P_8021Q);
2669 2659
2670 /* Update any of the values, used when we're incrementing various 2660 /* Update any of the values, used when we're incrementing various
2671 * fields. 2661 * fields.
@@ -2690,24 +2680,28 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
2690 mpls_push(mpls, pkt_dev); 2680 mpls_push(mpls, pkt_dev);
2691 2681
2692 if (pkt_dev->vlan_id != 0xffff) { 2682 if (pkt_dev->vlan_id != 0xffff) {
2693 if(pkt_dev->svlan_id != 0xffff) { 2683 if (pkt_dev->svlan_id != 0xffff) {
2694 svlan_tci = (__be16 *)skb_put(skb, sizeof(__be16)); 2684 svlan_tci = (__be16 *)skb_put(skb, sizeof(__be16));
2695 *svlan_tci = build_tci(pkt_dev->svlan_id, 2685 *svlan_tci = build_tci(pkt_dev->svlan_id,
2696 pkt_dev->svlan_cfi, 2686 pkt_dev->svlan_cfi,
2697 pkt_dev->svlan_p); 2687 pkt_dev->svlan_p);
2698 svlan_encapsulated_proto = (__be16 *)skb_put(skb, sizeof(__be16)); 2688 svlan_encapsulated_proto = (__be16 *)skb_put(skb, sizeof(__be16));
2699 *svlan_encapsulated_proto = __constant_htons(ETH_P_8021Q); 2689 *svlan_encapsulated_proto = htons(ETH_P_8021Q);
2700 } 2690 }
2701 vlan_tci = (__be16 *)skb_put(skb, sizeof(__be16)); 2691 vlan_tci = (__be16 *)skb_put(skb, sizeof(__be16));
2702 *vlan_tci = build_tci(pkt_dev->vlan_id, 2692 *vlan_tci = build_tci(pkt_dev->vlan_id,
2703 pkt_dev->vlan_cfi, 2693 pkt_dev->vlan_cfi,
2704 pkt_dev->vlan_p); 2694 pkt_dev->vlan_p);
2705 vlan_encapsulated_proto = (__be16 *)skb_put(skb, sizeof(__be16)); 2695 vlan_encapsulated_proto = (__be16 *)skb_put(skb, sizeof(__be16));
2706 *vlan_encapsulated_proto = __constant_htons(ETH_P_IPV6); 2696 *vlan_encapsulated_proto = htons(ETH_P_IPV6);
2707 } 2697 }
2708 2698
2709 iph = (struct ipv6hdr *)skb_put(skb, sizeof(struct ipv6hdr)); 2699 skb->network_header = skb->tail;
2710 udph = (struct udphdr *)skb_put(skb, sizeof(struct udphdr)); 2700 skb->transport_header = skb->network_header + sizeof(struct ipv6hdr);
2701 skb_put(skb, sizeof(struct ipv6hdr) + sizeof(struct udphdr));
2702
2703 iph = ipv6_hdr(skb);
2704 udph = udp_hdr(skb);
2711 2705
2712 memcpy(eth, pkt_dev->hh, 12); 2706 memcpy(eth, pkt_dev->hh, 12);
2713 *(__be16 *) & eth[12] = protocol; 2707 *(__be16 *) & eth[12] = protocol;
@@ -2729,7 +2723,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
2729 udph->len = htons(datalen + sizeof(struct udphdr)); 2723 udph->len = htons(datalen + sizeof(struct udphdr));
2730 udph->check = 0; /* No checksum */ 2724 udph->check = 0; /* No checksum */
2731 2725
2732 *(__be32 *) iph = __constant_htonl(0x60000000); /* Version + flow */ 2726 *(__be32 *) iph = htonl(0x60000000); /* Version + flow */
2733 2727
2734 if (pkt_dev->traffic_class) { 2728 if (pkt_dev->traffic_class) {
2735 /* Version + traffic class + flow (0) */ 2729 /* Version + traffic class + flow (0) */
@@ -2744,13 +2738,12 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
2744 ipv6_addr_copy(&iph->daddr, &pkt_dev->cur_in6_daddr); 2738 ipv6_addr_copy(&iph->daddr, &pkt_dev->cur_in6_daddr);
2745 ipv6_addr_copy(&iph->saddr, &pkt_dev->cur_in6_saddr); 2739 ipv6_addr_copy(&iph->saddr, &pkt_dev->cur_in6_saddr);
2746 2740
2747 skb->mac.raw = ((u8 *) iph) - 14 - pkt_dev->nr_labels*sizeof(u32) - 2741 skb->mac_header = (skb->network_header - ETH_HLEN -
2748 VLAN_TAG_SIZE(pkt_dev) - SVLAN_TAG_SIZE(pkt_dev); 2742 pkt_dev->nr_labels * sizeof(u32) -
2743 VLAN_TAG_SIZE(pkt_dev) - SVLAN_TAG_SIZE(pkt_dev));
2749 skb->protocol = protocol; 2744 skb->protocol = protocol;
2750 skb->dev = odev; 2745 skb->dev = odev;
2751 skb->pkt_type = PACKET_HOST; 2746 skb->pkt_type = PACKET_HOST;
2752 skb->nh.ipv6h = iph;
2753 skb->h.uh = udph;
2754 2747
2755 if (pkt_dev->nfrags <= 0) 2748 if (pkt_dev->nfrags <= 0)
2756 pgh = (struct pktgen_hdr *)skb_put(skb, datalen); 2749 pgh = (struct pktgen_hdr *)skb_put(skb, datalen);
@@ -2848,7 +2841,7 @@ static void pktgen_run(struct pktgen_thread *t)
2848 struct pktgen_dev *pkt_dev; 2841 struct pktgen_dev *pkt_dev;
2849 int started = 0; 2842 int started = 0;
2850 2843
2851 PG_DEBUG(printk("pktgen: entering pktgen_run. %p\n", t)); 2844 pr_debug("pktgen: entering pktgen_run. %p\n", t);
2852 2845
2853 if_lock(t); 2846 if_lock(t);
2854 list_for_each_entry(pkt_dev, &t->if_list, list) { 2847 list_for_each_entry(pkt_dev, &t->if_list, list) {
@@ -2880,7 +2873,7 @@ static void pktgen_stop_all_threads_ifs(void)
2880{ 2873{
2881 struct pktgen_thread *t; 2874 struct pktgen_thread *t;
2882 2875
2883 PG_DEBUG(printk("pktgen: entering pktgen_stop_all_threads_ifs.\n")); 2876 pr_debug("pktgen: entering pktgen_stop_all_threads_ifs.\n");
2884 2877
2885 mutex_lock(&pktgen_thread_lock); 2878 mutex_lock(&pktgen_thread_lock);
2886 2879
@@ -2948,7 +2941,7 @@ static void pktgen_run_all_threads(void)
2948{ 2941{
2949 struct pktgen_thread *t; 2942 struct pktgen_thread *t;
2950 2943
2951 PG_DEBUG(printk("pktgen: entering pktgen_run_all_threads.\n")); 2944 pr_debug("pktgen: entering pktgen_run_all_threads.\n");
2952 2945
2953 mutex_lock(&pktgen_thread_lock); 2946 mutex_lock(&pktgen_thread_lock);
2954 2947
@@ -3006,7 +2999,7 @@ static int pktgen_stop_device(struct pktgen_dev *pkt_dev)
3006 2999
3007 if (!pkt_dev->running) { 3000 if (!pkt_dev->running) {
3008 printk("pktgen: interface: %s is already stopped\n", 3001 printk("pktgen: interface: %s is already stopped\n",
3009 pkt_dev->ifname); 3002 pkt_dev->odev->name);
3010 return -EINVAL; 3003 return -EINVAL;
3011 } 3004 }
3012 3005
@@ -3040,7 +3033,7 @@ static void pktgen_stop(struct pktgen_thread *t)
3040{ 3033{
3041 struct pktgen_dev *pkt_dev; 3034 struct pktgen_dev *pkt_dev;
3042 3035
3043 PG_DEBUG(printk("pktgen: entering pktgen_stop\n")); 3036 pr_debug("pktgen: entering pktgen_stop\n");
3044 3037
3045 if_lock(t); 3038 if_lock(t);
3046 3039
@@ -3064,7 +3057,7 @@ static void pktgen_rem_one_if(struct pktgen_thread *t)
3064 struct list_head *q, *n; 3057 struct list_head *q, *n;
3065 struct pktgen_dev *cur; 3058 struct pktgen_dev *cur;
3066 3059
3067 PG_DEBUG(printk("pktgen: entering pktgen_rem_one_if\n")); 3060 pr_debug("pktgen: entering pktgen_rem_one_if\n");
3068 3061
3069 if_lock(t); 3062 if_lock(t);
3070 3063
@@ -3093,7 +3086,7 @@ static void pktgen_rem_all_ifs(struct pktgen_thread *t)
3093 3086
3094 /* Remove all devices, free mem */ 3087 /* Remove all devices, free mem */
3095 3088
3096 PG_DEBUG(printk("pktgen: entering pktgen_rem_all_ifs\n")); 3089 pr_debug("pktgen: entering pktgen_rem_all_ifs\n");
3097 if_lock(t); 3090 if_lock(t);
3098 3091
3099 list_for_each_safe(q, n, &t->if_list) { 3092 list_for_each_safe(q, n, &t->if_list) {
@@ -3276,7 +3269,7 @@ static int pktgen_thread_worker(void *arg)
3276 3269
3277 t->pid = current->pid; 3270 t->pid = current->pid;
3278 3271
3279 PG_DEBUG(printk("pktgen: starting pktgen/%d: pid=%d\n", cpu, current->pid)); 3272 pr_debug("pktgen: starting pktgen/%d: pid=%d\n", cpu, current->pid);
3280 3273
3281 max_before_softirq = t->max_before_softirq; 3274 max_before_softirq = t->max_before_softirq;
3282 3275
@@ -3339,13 +3332,13 @@ static int pktgen_thread_worker(void *arg)
3339 set_current_state(TASK_INTERRUPTIBLE); 3332 set_current_state(TASK_INTERRUPTIBLE);
3340 } 3333 }
3341 3334
3342 PG_DEBUG(printk("pktgen: %s stopping all device\n", t->tsk->comm)); 3335 pr_debug("pktgen: %s stopping all device\n", t->tsk->comm);
3343 pktgen_stop(t); 3336 pktgen_stop(t);
3344 3337
3345 PG_DEBUG(printk("pktgen: %s removing all device\n", t->tsk->comm)); 3338 pr_debug("pktgen: %s removing all device\n", t->tsk->comm);
3346 pktgen_rem_all_ifs(t); 3339 pktgen_rem_all_ifs(t);
3347 3340
3348 PG_DEBUG(printk("pktgen: %s removing thread.\n", t->tsk->comm)); 3341 pr_debug("pktgen: %s removing thread.\n", t->tsk->comm);
3349 pktgen_rem_thread(t); 3342 pktgen_rem_thread(t);
3350 3343
3351 return 0; 3344 return 0;
@@ -3358,13 +3351,13 @@ static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t,
3358 if_lock(t); 3351 if_lock(t);
3359 3352
3360 list_for_each_entry(p, &t->if_list, list) 3353 list_for_each_entry(p, &t->if_list, list)
3361 if (strncmp(p->ifname, ifname, IFNAMSIZ) == 0) { 3354 if (strncmp(p->odev->name, ifname, IFNAMSIZ) == 0) {
3362 pkt_dev = p; 3355 pkt_dev = p;
3363 break; 3356 break;
3364 } 3357 }
3365 3358
3366 if_unlock(t); 3359 if_unlock(t);
3367 PG_DEBUG(printk("pktgen: find_dev(%s) returning %p\n", ifname, pkt_dev)); 3360 pr_debug("pktgen: find_dev(%s) returning %p\n", ifname, pkt_dev);
3368 return pkt_dev; 3361 return pkt_dev;
3369} 3362}
3370 3363
@@ -3399,7 +3392,7 @@ out:
3399static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) 3392static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
3400{ 3393{
3401 struct pktgen_dev *pkt_dev; 3394 struct pktgen_dev *pkt_dev;
3402 struct proc_dir_entry *pe; 3395 int err;
3403 3396
3404 /* We don't allow a device to be on several threads */ 3397 /* We don't allow a device to be on several threads */
3405 3398
@@ -3441,29 +3434,28 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
3441 pkt_dev->svlan_cfi = 0; 3434 pkt_dev->svlan_cfi = 0;
3442 pkt_dev->svlan_id = 0xffff; 3435 pkt_dev->svlan_id = 0xffff;
3443 3436
3444 strncpy(pkt_dev->ifname, ifname, IFNAMSIZ); 3437 err = pktgen_setup_dev(pkt_dev, ifname);
3438 if (err)
3439 goto out1;
3445 3440
3446 if (!pktgen_setup_dev(pkt_dev)) { 3441 pkt_dev->entry = create_proc_entry(ifname, 0600, pg_proc_dir);
3447 printk("pktgen: ERROR: pktgen_setup_dev failed.\n"); 3442 if (!pkt_dev->entry) {
3448 if (pkt_dev->flows)
3449 vfree(pkt_dev->flows);
3450 kfree(pkt_dev);
3451 return -ENODEV;
3452 }
3453
3454 pe = create_proc_entry(ifname, 0600, pg_proc_dir);
3455 if (!pe) {
3456 printk("pktgen: cannot create %s/%s procfs entry.\n", 3443 printk("pktgen: cannot create %s/%s procfs entry.\n",
3457 PG_PROC_DIR, ifname); 3444 PG_PROC_DIR, ifname);
3458 if (pkt_dev->flows) 3445 err = -EINVAL;
3459 vfree(pkt_dev->flows); 3446 goto out2;
3460 kfree(pkt_dev);
3461 return -EINVAL;
3462 } 3447 }
3463 pe->proc_fops = &pktgen_if_fops; 3448 pkt_dev->entry->proc_fops = &pktgen_if_fops;
3464 pe->data = pkt_dev; 3449 pkt_dev->entry->data = pkt_dev;
3465 3450
3466 return add_dev_to_thread(t, pkt_dev); 3451 return add_dev_to_thread(t, pkt_dev);
3452out2:
3453 dev_put(pkt_dev->odev);
3454out1:
3455 if (pkt_dev->flows)
3456 vfree(pkt_dev->flows);
3457 kfree(pkt_dev);
3458 return err;
3467} 3459}
3468 3460
3469static int __init pktgen_create_thread(int cpu) 3461static int __init pktgen_create_thread(int cpu)
@@ -3533,7 +3525,7 @@ static int pktgen_remove_device(struct pktgen_thread *t,
3533 struct pktgen_dev *pkt_dev) 3525 struct pktgen_dev *pkt_dev)
3534{ 3526{
3535 3527
3536 PG_DEBUG(printk("pktgen: remove_device pkt_dev=%p\n", pkt_dev)); 3528 pr_debug("pktgen: remove_device pkt_dev=%p\n", pkt_dev);
3537 3529
3538 if (pkt_dev->running) { 3530 if (pkt_dev->running) {
3539 printk("pktgen:WARNING: trying to remove a running interface, stopping it now.\n"); 3531 printk("pktgen:WARNING: trying to remove a running interface, stopping it now.\n");
@@ -3551,9 +3543,8 @@ static int pktgen_remove_device(struct pktgen_thread *t,
3551 3543
3552 _rem_dev_from_if_list(t, pkt_dev); 3544 _rem_dev_from_if_list(t, pkt_dev);
3553 3545
3554 /* Clean up proc file system */ 3546 if (pkt_dev->entry)
3555 3547 remove_proc_entry(pkt_dev->entry->name, pg_proc_dir);
3556 remove_proc_entry(pkt_dev->ifname, pg_proc_dir);
3557 3548
3558 if (pkt_dev->flows) 3549 if (pkt_dev->flows)
3559 vfree(pkt_dev->flows); 3550 vfree(pkt_dev->flows);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 33ea8eac7fe0..cec111109155 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -50,11 +50,13 @@
50#include <net/sock.h> 50#include <net/sock.h>
51#include <net/pkt_sched.h> 51#include <net/pkt_sched.h>
52#include <net/fib_rules.h> 52#include <net/fib_rules.h>
53#include <net/netlink.h> 53#include <net/rtnetlink.h>
54#ifdef CONFIG_NET_WIRELESS_RTNETLINK 54
55#include <linux/wireless.h> 55struct rtnl_link
56#include <net/iw_handler.h> 56{
57#endif /* CONFIG_NET_WIRELESS_RTNETLINK */ 57 rtnl_doit_func doit;
58 rtnl_dumpit_func dumpit;
59};
58 60
59static DEFINE_MUTEX(rtnl_mutex); 61static DEFINE_MUTEX(rtnl_mutex);
60static struct sock *rtnl; 62static struct sock *rtnl;
@@ -95,7 +97,151 @@ int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, int len)
95 return 0; 97 return 0;
96} 98}
97 99
98struct rtnetlink_link * rtnetlink_links[NPROTO]; 100static struct rtnl_link *rtnl_msg_handlers[NPROTO];
101
102static inline int rtm_msgindex(int msgtype)
103{
104 int msgindex = msgtype - RTM_BASE;
105
106 /*
107 * msgindex < 0 implies someone tried to register a netlink
108 * control code. msgindex >= RTM_NR_MSGTYPES may indicate that
109 * the message type has not been added to linux/rtnetlink.h
110 */
111 BUG_ON(msgindex < 0 || msgindex >= RTM_NR_MSGTYPES);
112
113 return msgindex;
114}
115
116static rtnl_doit_func rtnl_get_doit(int protocol, int msgindex)
117{
118 struct rtnl_link *tab;
119
120 tab = rtnl_msg_handlers[protocol];
121 if (tab == NULL || tab[msgindex].doit == NULL)
122 tab = rtnl_msg_handlers[PF_UNSPEC];
123
124 return tab ? tab[msgindex].doit : NULL;
125}
126
127static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex)
128{
129 struct rtnl_link *tab;
130
131 tab = rtnl_msg_handlers[protocol];
132 if (tab == NULL || tab[msgindex].dumpit == NULL)
133 tab = rtnl_msg_handlers[PF_UNSPEC];
134
135 return tab ? tab[msgindex].dumpit : NULL;
136}
137
138/**
139 * __rtnl_register - Register a rtnetlink message type
140 * @protocol: Protocol family or PF_UNSPEC
141 * @msgtype: rtnetlink message type
142 * @doit: Function pointer called for each request message
143 * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message
144 *
145 * Registers the specified function pointers (at least one of them has
146 * to be non-NULL) to be called whenever a request message for the
147 * specified protocol family and message type is received.
148 *
149 * The special protocol family PF_UNSPEC may be used to define fallback
150 * function pointers for the case when no entry for the specific protocol
151 * family exists.
152 *
153 * Returns 0 on success or a negative error code.
154 */
155int __rtnl_register(int protocol, int msgtype,
156 rtnl_doit_func doit, rtnl_dumpit_func dumpit)
157{
158 struct rtnl_link *tab;
159 int msgindex;
160
161 BUG_ON(protocol < 0 || protocol >= NPROTO);
162 msgindex = rtm_msgindex(msgtype);
163
164 tab = rtnl_msg_handlers[protocol];
165 if (tab == NULL) {
166 tab = kcalloc(RTM_NR_MSGTYPES, sizeof(*tab), GFP_KERNEL);
167 if (tab == NULL)
168 return -ENOBUFS;
169
170 rtnl_msg_handlers[protocol] = tab;
171 }
172
173 if (doit)
174 tab[msgindex].doit = doit;
175
176 if (dumpit)
177 tab[msgindex].dumpit = dumpit;
178
179 return 0;
180}
181
182EXPORT_SYMBOL_GPL(__rtnl_register);
183
184/**
185 * rtnl_register - Register a rtnetlink message type
186 *
187 * Identical to __rtnl_register() but panics on failure. This is useful
188 * as failure of this function is very unlikely, it can only happen due
189 * to lack of memory when allocating the chain to store all message
190 * handlers for a protocol. Meant for use in init functions where lack
191 * of memory implies no sense in continueing.
192 */
193void rtnl_register(int protocol, int msgtype,
194 rtnl_doit_func doit, rtnl_dumpit_func dumpit)
195{
196 if (__rtnl_register(protocol, msgtype, doit, dumpit) < 0)
197 panic("Unable to register rtnetlink message handler, "
198 "protocol = %d, message type = %d\n",
199 protocol, msgtype);
200}
201
202EXPORT_SYMBOL_GPL(rtnl_register);
203
204/**
205 * rtnl_unregister - Unregister a rtnetlink message type
206 * @protocol: Protocol family or PF_UNSPEC
207 * @msgtype: rtnetlink message type
208 *
209 * Returns 0 on success or a negative error code.
210 */
211int rtnl_unregister(int protocol, int msgtype)
212{
213 int msgindex;
214
215 BUG_ON(protocol < 0 || protocol >= NPROTO);
216 msgindex = rtm_msgindex(msgtype);
217
218 if (rtnl_msg_handlers[protocol] == NULL)
219 return -ENOENT;
220
221 rtnl_msg_handlers[protocol][msgindex].doit = NULL;
222 rtnl_msg_handlers[protocol][msgindex].dumpit = NULL;
223
224 return 0;
225}
226
227EXPORT_SYMBOL_GPL(rtnl_unregister);
228
229/**
230 * rtnl_unregister_all - Unregister all rtnetlink message type of a protocol
231 * @protocol : Protocol family or PF_UNSPEC
232 *
233 * Identical to calling rtnl_unregster() for all registered message types
234 * of a certain protocol family.
235 */
236void rtnl_unregister_all(int protocol)
237{
238 BUG_ON(protocol < 0 || protocol >= NPROTO);
239
240 kfree(rtnl_msg_handlers[protocol]);
241 rtnl_msg_handlers[protocol] = NULL;
242}
243
244EXPORT_SYMBOL_GPL(rtnl_unregister_all);
99 245
100static const int rtm_min[RTM_NR_FAMILIES] = 246static const int rtm_min[RTM_NR_FAMILIES] =
101{ 247{
@@ -249,7 +395,7 @@ static void set_operstate(struct net_device *dev, unsigned char transition)
249 operstate == IF_OPER_UNKNOWN) 395 operstate == IF_OPER_UNKNOWN)
250 operstate = IF_OPER_DORMANT; 396 operstate = IF_OPER_DORMANT;
251 break; 397 break;
252 }; 398 }
253 399
254 if (dev->operstate != operstate) { 400 if (dev->operstate != operstate) {
255 write_lock_bh(&dev_base_lock); 401 write_lock_bh(&dev_base_lock);
@@ -393,7 +539,6 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
393 int s_idx = cb->args[0]; 539 int s_idx = cb->args[0];
394 struct net_device *dev; 540 struct net_device *dev;
395 541
396 read_lock(&dev_base_lock);
397 for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) { 542 for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
398 if (idx < s_idx) 543 if (idx < s_idx)
399 continue; 544 continue;
@@ -402,7 +547,6 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
402 cb->nlh->nlmsg_seq, 0, NLM_F_MULTI) <= 0) 547 cb->nlh->nlmsg_seq, 0, NLM_F_MULTI) <= 0)
403 break; 548 break;
404 } 549 }
405 read_unlock(&dev_base_lock);
406 cb->args[0] = idx; 550 cb->args[0] = idx;
407 551
408 return skb->len; 552 return skb->len;
@@ -536,17 +680,6 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
536 modified = 1; 680 modified = 1;
537 } 681 }
538 682
539#ifdef CONFIG_NET_WIRELESS_RTNETLINK
540 if (tb[IFLA_WIRELESS]) {
541 /* Call Wireless Extensions.
542 * Various stuff checked in there... */
543 err = wireless_rtnetlink_set(dev, nla_data(tb[IFLA_WIRELESS]),
544 nla_len(tb[IFLA_WIRELESS]));
545 if (err < 0)
546 goto errout_dev;
547 }
548#endif /* CONFIG_NET_WIRELESS_RTNETLINK */
549
550 if (tb[IFLA_BROADCAST]) { 683 if (tb[IFLA_BROADCAST]) {
551 nla_memcpy(dev->broadcast, tb[IFLA_BROADCAST], dev->addr_len); 684 nla_memcpy(dev->broadcast, tb[IFLA_BROADCAST], dev->addr_len);
552 send_addr_notify = 1; 685 send_addr_notify = 1;
@@ -610,22 +743,6 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
610 } else 743 } else
611 return -EINVAL; 744 return -EINVAL;
612 745
613
614#ifdef CONFIG_NET_WIRELESS_RTNETLINK
615 if (tb[IFLA_WIRELESS]) {
616 /* Call Wireless Extensions. We need to know the size before
617 * we can alloc. Various stuff checked in there... */
618 err = wireless_rtnetlink_get(dev, nla_data(tb[IFLA_WIRELESS]),
619 nla_len(tb[IFLA_WIRELESS]),
620 &iw_buf, &iw_buf_len);
621 if (err < 0)
622 goto errout;
623
624 /* Payload is at an offset in buffer */
625 iw = iw_buf + IW_EV_POINT_OFF;
626 }
627#endif /* CONFIG_NET_WIRELESS_RTNETLINK */
628
629 nskb = nlmsg_new(if_nlmsg_size(iw_buf_len), GFP_KERNEL); 746 nskb = nlmsg_new(if_nlmsg_size(iw_buf_len), GFP_KERNEL);
630 if (nskb == NULL) { 747 if (nskb == NULL) {
631 err = -ENOBUFS; 748 err = -ENOBUFS;
@@ -659,12 +776,12 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
659 int type = cb->nlh->nlmsg_type-RTM_BASE; 776 int type = cb->nlh->nlmsg_type-RTM_BASE;
660 if (idx < s_idx || idx == PF_PACKET) 777 if (idx < s_idx || idx == PF_PACKET)
661 continue; 778 continue;
662 if (rtnetlink_links[idx] == NULL || 779 if (rtnl_msg_handlers[idx] == NULL ||
663 rtnetlink_links[idx][type].dumpit == NULL) 780 rtnl_msg_handlers[idx][type].dumpit == NULL)
664 continue; 781 continue;
665 if (idx > s_idx) 782 if (idx > s_idx)
666 memset(&cb->args[0], 0, sizeof(cb->args)); 783 memset(&cb->args[0], 0, sizeof(cb->args));
667 if (rtnetlink_links[idx][type].dumpit(skb, cb)) 784 if (rtnl_msg_handlers[idx][type].dumpit(skb, cb))
668 break; 785 break;
669 } 786 }
670 cb->family = idx; 787 cb->family = idx;
@@ -700,30 +817,18 @@ static int rtattr_max;
700 817
701/* Process one rtnetlink message. */ 818/* Process one rtnetlink message. */
702 819
703static __inline__ int 820static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
704rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
705{ 821{
706 struct rtnetlink_link *link; 822 rtnl_doit_func doit;
707 struct rtnetlink_link *link_tab;
708 int sz_idx, kind; 823 int sz_idx, kind;
709 int min_len; 824 int min_len;
710 int family; 825 int family;
711 int type; 826 int type;
712 int err; 827 int err;
713 828
714 /* Only requests are handled by kernel now */
715 if (!(nlh->nlmsg_flags&NLM_F_REQUEST))
716 return 0;
717
718 type = nlh->nlmsg_type; 829 type = nlh->nlmsg_type;
719
720 /* A control message: ignore them */
721 if (type < RTM_BASE)
722 return 0;
723
724 /* Unknown message: reply with EINVAL */
725 if (type > RTM_MAX) 830 if (type > RTM_MAX)
726 goto err_inval; 831 return -EOPNOTSUPP;
727 832
728 type -= RTM_BASE; 833 type -= RTM_BASE;
729 834
@@ -732,45 +837,33 @@ rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
732 return 0; 837 return 0;
733 838
734 family = ((struct rtgenmsg*)NLMSG_DATA(nlh))->rtgen_family; 839 family = ((struct rtgenmsg*)NLMSG_DATA(nlh))->rtgen_family;
735 if (family >= NPROTO) { 840 if (family >= NPROTO)
736 *errp = -EAFNOSUPPORT; 841 return -EAFNOSUPPORT;
737 return -1;
738 }
739
740 link_tab = rtnetlink_links[family];
741 if (link_tab == NULL)
742 link_tab = rtnetlink_links[PF_UNSPEC];
743 link = &link_tab[type];
744 842
745 sz_idx = type>>2; 843 sz_idx = type>>2;
746 kind = type&3; 844 kind = type&3;
747 845
748 if (kind != 2 && security_netlink_recv(skb, CAP_NET_ADMIN)) { 846 if (kind != 2 && security_netlink_recv(skb, CAP_NET_ADMIN))
749 *errp = -EPERM; 847 return -EPERM;
750 return -1;
751 }
752 848
753 if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) { 849 if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
754 if (link->dumpit == NULL) 850 rtnl_dumpit_func dumpit;
755 link = &(rtnetlink_links[PF_UNSPEC][type]);
756
757 if (link->dumpit == NULL)
758 goto err_inval;
759 851
760 if ((*errp = netlink_dump_start(rtnl, skb, nlh, 852 dumpit = rtnl_get_dumpit(family, type);
761 link->dumpit, NULL)) != 0) { 853 if (dumpit == NULL)
762 return -1; 854 return -EOPNOTSUPP;
763 }
764 855
765 netlink_queue_skip(nlh, skb); 856 __rtnl_unlock();
766 return -1; 857 err = netlink_dump_start(rtnl, skb, nlh, dumpit, NULL);
858 rtnl_lock();
859 return err;
767 } 860 }
768 861
769 memset(rta_buf, 0, (rtattr_max * sizeof(struct rtattr *))); 862 memset(rta_buf, 0, (rtattr_max * sizeof(struct rtattr *)));
770 863
771 min_len = rtm_min[sz_idx]; 864 min_len = rtm_min[sz_idx];
772 if (nlh->nlmsg_len < min_len) 865 if (nlh->nlmsg_len < min_len)
773 goto err_inval; 866 return -EINVAL;
774 867
775 if (nlh->nlmsg_len > min_len) { 868 if (nlh->nlmsg_len > min_len) {
776 int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len); 869 int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len);
@@ -780,25 +873,18 @@ rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
780 unsigned flavor = attr->rta_type; 873 unsigned flavor = attr->rta_type;
781 if (flavor) { 874 if (flavor) {
782 if (flavor > rta_max[sz_idx]) 875 if (flavor > rta_max[sz_idx])
783 goto err_inval; 876 return -EINVAL;
784 rta_buf[flavor-1] = attr; 877 rta_buf[flavor-1] = attr;
785 } 878 }
786 attr = RTA_NEXT(attr, attrlen); 879 attr = RTA_NEXT(attr, attrlen);
787 } 880 }
788 } 881 }
789 882
790 if (link->doit == NULL) 883 doit = rtnl_get_doit(family, type);
791 link = &(rtnetlink_links[PF_UNSPEC][type]); 884 if (doit == NULL)
792 if (link->doit == NULL) 885 return -EOPNOTSUPP;
793 goto err_inval;
794 err = link->doit(skb, nlh, (void *)&rta_buf[0]);
795 886
796 *errp = err; 887 return doit(skb, nlh, (void *)&rta_buf[0]);
797 return err;
798
799err_inval:
800 *errp = -EINVAL;
801 return -1;
802} 888}
803 889
804static void rtnetlink_rcv(struct sock *sk, int len) 890static void rtnetlink_rcv(struct sock *sk, int len)
@@ -814,25 +900,6 @@ static void rtnetlink_rcv(struct sock *sk, int len)
814 } while (qlen); 900 } while (qlen);
815} 901}
816 902
817static struct rtnetlink_link link_rtnetlink_table[RTM_NR_MSGTYPES] =
818{
819 [RTM_GETLINK - RTM_BASE] = { .doit = rtnl_getlink,
820 .dumpit = rtnl_dump_ifinfo },
821 [RTM_SETLINK - RTM_BASE] = { .doit = rtnl_setlink },
822 [RTM_GETADDR - RTM_BASE] = { .dumpit = rtnl_dump_all },
823 [RTM_GETROUTE - RTM_BASE] = { .dumpit = rtnl_dump_all },
824 [RTM_NEWNEIGH - RTM_BASE] = { .doit = neigh_add },
825 [RTM_DELNEIGH - RTM_BASE] = { .doit = neigh_delete },
826 [RTM_GETNEIGH - RTM_BASE] = { .dumpit = neigh_dump_info },
827#ifdef CONFIG_FIB_RULES
828 [RTM_NEWRULE - RTM_BASE] = { .doit = fib_nl_newrule },
829 [RTM_DELRULE - RTM_BASE] = { .doit = fib_nl_delrule },
830#endif
831 [RTM_GETRULE - RTM_BASE] = { .dumpit = rtnl_dump_all },
832 [RTM_GETNEIGHTBL - RTM_BASE] = { .dumpit = neightbl_dump_info },
833 [RTM_SETNEIGHTBL - RTM_BASE] = { .doit = neightbl_set },
834};
835
836static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr) 903static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr)
837{ 904{
838 struct net_device *dev = ptr; 905 struct net_device *dev = ptr;
@@ -874,19 +941,22 @@ void __init rtnetlink_init(void)
874 panic("rtnetlink_init: cannot allocate rta_buf\n"); 941 panic("rtnetlink_init: cannot allocate rta_buf\n");
875 942
876 rtnl = netlink_kernel_create(NETLINK_ROUTE, RTNLGRP_MAX, rtnetlink_rcv, 943 rtnl = netlink_kernel_create(NETLINK_ROUTE, RTNLGRP_MAX, rtnetlink_rcv,
877 THIS_MODULE); 944 &rtnl_mutex, THIS_MODULE);
878 if (rtnl == NULL) 945 if (rtnl == NULL)
879 panic("rtnetlink_init: cannot initialize rtnetlink\n"); 946 panic("rtnetlink_init: cannot initialize rtnetlink\n");
880 netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV); 947 netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV);
881 register_netdevice_notifier(&rtnetlink_dev_notifier); 948 register_netdevice_notifier(&rtnetlink_dev_notifier);
882 rtnetlink_links[PF_UNSPEC] = link_rtnetlink_table; 949
883 rtnetlink_links[PF_PACKET] = link_rtnetlink_table; 950 rtnl_register(PF_UNSPEC, RTM_GETLINK, rtnl_getlink, rtnl_dump_ifinfo);
951 rtnl_register(PF_UNSPEC, RTM_SETLINK, rtnl_setlink, NULL);
952
953 rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all);
954 rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all);
884} 955}
885 956
886EXPORT_SYMBOL(__rta_fill); 957EXPORT_SYMBOL(__rta_fill);
887EXPORT_SYMBOL(rtattr_strlcpy); 958EXPORT_SYMBOL(rtattr_strlcpy);
888EXPORT_SYMBOL(rtattr_parse); 959EXPORT_SYMBOL(rtattr_parse);
889EXPORT_SYMBOL(rtnetlink_links);
890EXPORT_SYMBOL(rtnetlink_put_metrics); 960EXPORT_SYMBOL(rtnetlink_put_metrics);
891EXPORT_SYMBOL(rtnl_lock); 961EXPORT_SYMBOL(rtnl_lock);
892EXPORT_SYMBOL(rtnl_trylock); 962EXPORT_SYMBOL(rtnl_trylock);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 336958fbbcb2..32f087b5233e 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -55,6 +55,7 @@
55#include <linux/cache.h> 55#include <linux/cache.h>
56#include <linux/rtnetlink.h> 56#include <linux/rtnetlink.h>
57#include <linux/init.h> 57#include <linux/init.h>
58#include <linux/scatterlist.h>
58 59
59#include <net/protocol.h> 60#include <net/protocol.h>
60#include <net/dst.h> 61#include <net/dst.h>
@@ -87,8 +88,9 @@ static struct kmem_cache *skbuff_fclone_cache __read_mostly;
87void skb_over_panic(struct sk_buff *skb, int sz, void *here) 88void skb_over_panic(struct sk_buff *skb, int sz, void *here)
88{ 89{
89 printk(KERN_EMERG "skb_over_panic: text:%p len:%d put:%d head:%p " 90 printk(KERN_EMERG "skb_over_panic: text:%p len:%d put:%d head:%p "
90 "data:%p tail:%p end:%p dev:%s\n", 91 "data:%p tail:%#lx end:%#lx dev:%s\n",
91 here, skb->len, sz, skb->head, skb->data, skb->tail, skb->end, 92 here, skb->len, sz, skb->head, skb->data,
93 (unsigned long)skb->tail, (unsigned long)skb->end,
92 skb->dev ? skb->dev->name : "<NULL>"); 94 skb->dev ? skb->dev->name : "<NULL>");
93 BUG(); 95 BUG();
94} 96}
@@ -105,8 +107,9 @@ void skb_over_panic(struct sk_buff *skb, int sz, void *here)
105void skb_under_panic(struct sk_buff *skb, int sz, void *here) 107void skb_under_panic(struct sk_buff *skb, int sz, void *here)
106{ 108{
107 printk(KERN_EMERG "skb_under_panic: text:%p len:%d put:%d head:%p " 109 printk(KERN_EMERG "skb_under_panic: text:%p len:%d put:%d head:%p "
108 "data:%p tail:%p end:%p dev:%s\n", 110 "data:%p tail:%#lx end:%#lx dev:%s\n",
109 here, skb->len, sz, skb->head, skb->data, skb->tail, skb->end, 111 here, skb->len, sz, skb->head, skb->data,
112 (unsigned long)skb->tail, (unsigned long)skb->end,
110 skb->dev ? skb->dev->name : "<NULL>"); 113 skb->dev ? skb->dev->name : "<NULL>");
111 BUG(); 114 BUG();
112} 115}
@@ -155,20 +158,22 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
155 if (!skb) 158 if (!skb)
156 goto out; 159 goto out;
157 160
158 /* Get the DATA. Size must match skb_add_mtu(). */
159 size = SKB_DATA_ALIGN(size); 161 size = SKB_DATA_ALIGN(size);
160 data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info), 162 data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info),
161 gfp_mask, node); 163 gfp_mask, node);
162 if (!data) 164 if (!data)
163 goto nodata; 165 goto nodata;
164 166
165 memset(skb, 0, offsetof(struct sk_buff, truesize)); 167 /*
168 * See comment in sk_buff definition, just before the 'tail' member
169 */
170 memset(skb, 0, offsetof(struct sk_buff, tail));
166 skb->truesize = size + sizeof(struct sk_buff); 171 skb->truesize = size + sizeof(struct sk_buff);
167 atomic_set(&skb->users, 1); 172 atomic_set(&skb->users, 1);
168 skb->head = data; 173 skb->head = data;
169 skb->data = data; 174 skb->data = data;
170 skb->tail = data; 175 skb_reset_tail_pointer(skb);
171 skb->end = data + size; 176 skb->end = skb->tail + size;
172 /* make sure we initialize shinfo sequentially */ 177 /* make sure we initialize shinfo sequentially */
173 shinfo = skb_shinfo(skb); 178 shinfo = skb_shinfo(skb);
174 atomic_set(&shinfo->dataref, 1); 179 atomic_set(&shinfo->dataref, 1);
@@ -299,7 +304,7 @@ void kfree_skbmem(struct sk_buff *skb)
299 if (atomic_dec_and_test(fclone_ref)) 304 if (atomic_dec_and_test(fclone_ref))
300 kmem_cache_free(skbuff_fclone_cache, other); 305 kmem_cache_free(skbuff_fclone_cache, other);
301 break; 306 break;
302 }; 307 }
303} 308}
304 309
305/** 310/**
@@ -321,15 +326,13 @@ void __kfree_skb(struct sk_buff *skb)
321 WARN_ON(in_irq()); 326 WARN_ON(in_irq());
322 skb->destructor(skb); 327 skb->destructor(skb);
323 } 328 }
324#ifdef CONFIG_NETFILTER
325 nf_conntrack_put(skb->nfct);
326#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 329#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
330 nf_conntrack_put(skb->nfct);
327 nf_conntrack_put_reasm(skb->nfct_reasm); 331 nf_conntrack_put_reasm(skb->nfct_reasm);
328#endif 332#endif
329#ifdef CONFIG_BRIDGE_NETFILTER 333#ifdef CONFIG_BRIDGE_NETFILTER
330 nf_bridge_put(skb->nf_bridge); 334 nf_bridge_put(skb->nf_bridge);
331#endif 335#endif
332#endif
333/* XXX: IS this still necessary? - JHS */ 336/* XXX: IS this still necessary? - JHS */
334#ifdef CONFIG_NET_SCHED 337#ifdef CONFIG_NET_SCHED
335 skb->tc_index = 0; 338 skb->tc_index = 0;
@@ -396,9 +399,9 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
396 n->sk = NULL; 399 n->sk = NULL;
397 C(tstamp); 400 C(tstamp);
398 C(dev); 401 C(dev);
399 C(h); 402 C(transport_header);
400 C(nh); 403 C(network_header);
401 C(mac); 404 C(mac_header);
402 C(dst); 405 C(dst);
403 dst_clone(skb->dst); 406 dst_clone(skb->dst);
404 C(sp); 407 C(sp);
@@ -422,19 +425,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
422 C(protocol); 425 C(protocol);
423 n->destructor = NULL; 426 n->destructor = NULL;
424 C(mark); 427 C(mark);
425#ifdef CONFIG_NETFILTER 428 __nf_copy(n, skb);
426 C(nfct);
427 nf_conntrack_get(skb->nfct);
428 C(nfctinfo);
429#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
430 C(nfct_reasm);
431 nf_conntrack_get_reasm(skb->nfct_reasm);
432#endif
433#ifdef CONFIG_BRIDGE_NETFILTER
434 C(nf_bridge);
435 nf_bridge_get(skb->nf_bridge);
436#endif
437#endif /*CONFIG_NETFILTER*/
438#ifdef CONFIG_NET_SCHED 429#ifdef CONFIG_NET_SCHED
439 C(tc_index); 430 C(tc_index);
440#ifdef CONFIG_NET_CLS_ACT 431#ifdef CONFIG_NET_CLS_ACT
@@ -460,11 +451,12 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
460 451
461static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) 452static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
462{ 453{
454#ifndef NET_SKBUFF_DATA_USES_OFFSET
463 /* 455 /*
464 * Shift between the two data areas in bytes 456 * Shift between the two data areas in bytes
465 */ 457 */
466 unsigned long offset = new->data - old->data; 458 unsigned long offset = new->data - old->data;
467 459#endif
468 new->sk = NULL; 460 new->sk = NULL;
469 new->dev = old->dev; 461 new->dev = old->dev;
470 new->priority = old->priority; 462 new->priority = old->priority;
@@ -473,9 +465,15 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
473#ifdef CONFIG_INET 465#ifdef CONFIG_INET
474 new->sp = secpath_get(old->sp); 466 new->sp = secpath_get(old->sp);
475#endif 467#endif
476 new->h.raw = old->h.raw + offset; 468 new->transport_header = old->transport_header;
477 new->nh.raw = old->nh.raw + offset; 469 new->network_header = old->network_header;
478 new->mac.raw = old->mac.raw + offset; 470 new->mac_header = old->mac_header;
471#ifndef NET_SKBUFF_DATA_USES_OFFSET
472 /* {transport,network,mac}_header are relative to skb->head */
473 new->transport_header += offset;
474 new->network_header += offset;
475 new->mac_header += offset;
476#endif
479 memcpy(new->cb, old->cb, sizeof(old->cb)); 477 memcpy(new->cb, old->cb, sizeof(old->cb));
480 new->local_df = old->local_df; 478 new->local_df = old->local_df;
481 new->fclone = SKB_FCLONE_UNAVAILABLE; 479 new->fclone = SKB_FCLONE_UNAVAILABLE;
@@ -483,22 +481,10 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
483 new->tstamp = old->tstamp; 481 new->tstamp = old->tstamp;
484 new->destructor = NULL; 482 new->destructor = NULL;
485 new->mark = old->mark; 483 new->mark = old->mark;
486#ifdef CONFIG_NETFILTER 484 __nf_copy(new, old);
487 new->nfct = old->nfct;
488 nf_conntrack_get(old->nfct);
489 new->nfctinfo = old->nfctinfo;
490#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
491 new->nfct_reasm = old->nfct_reasm;
492 nf_conntrack_get_reasm(old->nfct_reasm);
493#endif
494#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) 485#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
495 new->ipvs_property = old->ipvs_property; 486 new->ipvs_property = old->ipvs_property;
496#endif 487#endif
497#ifdef CONFIG_BRIDGE_NETFILTER
498 new->nf_bridge = old->nf_bridge;
499 nf_bridge_get(old->nf_bridge);
500#endif
501#endif
502#ifdef CONFIG_NET_SCHED 488#ifdef CONFIG_NET_SCHED
503#ifdef CONFIG_NET_CLS_ACT 489#ifdef CONFIG_NET_CLS_ACT
504 new->tc_verd = old->tc_verd; 490 new->tc_verd = old->tc_verd;
@@ -535,8 +521,12 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
535 /* 521 /*
536 * Allocate the copy buffer 522 * Allocate the copy buffer
537 */ 523 */
538 struct sk_buff *n = alloc_skb(skb->end - skb->head + skb->data_len, 524 struct sk_buff *n;
539 gfp_mask); 525#ifdef NET_SKBUFF_DATA_USES_OFFSET
526 n = alloc_skb(skb->end + skb->data_len, gfp_mask);
527#else
528 n = alloc_skb(skb->end - skb->head + skb->data_len, gfp_mask);
529#endif
540 if (!n) 530 if (!n)
541 return NULL; 531 return NULL;
542 532
@@ -573,8 +563,12 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
573 /* 563 /*
574 * Allocate the copy buffer 564 * Allocate the copy buffer
575 */ 565 */
576 struct sk_buff *n = alloc_skb(skb->end - skb->head, gfp_mask); 566 struct sk_buff *n;
577 567#ifdef NET_SKBUFF_DATA_USES_OFFSET
568 n = alloc_skb(skb->end, gfp_mask);
569#else
570 n = alloc_skb(skb->end - skb->head, gfp_mask);
571#endif
578 if (!n) 572 if (!n)
579 goto out; 573 goto out;
580 574
@@ -583,7 +577,7 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
583 /* Set the tail pointer and length */ 577 /* Set the tail pointer and length */
584 skb_put(n, skb_headlen(skb)); 578 skb_put(n, skb_headlen(skb));
585 /* Copy the bytes */ 579 /* Copy the bytes */
586 memcpy(n->data, skb->data, n->len); 580 skb_copy_from_linear_data(skb, n->data, n->len);
587 n->csum = skb->csum; 581 n->csum = skb->csum;
588 n->ip_summed = skb->ip_summed; 582 n->ip_summed = skb->ip_summed;
589 583
@@ -632,7 +626,11 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
632{ 626{
633 int i; 627 int i;
634 u8 *data; 628 u8 *data;
629#ifdef NET_SKBUFF_DATA_USES_OFFSET
630 int size = nhead + skb->end + ntail;
631#else
635 int size = nhead + (skb->end - skb->head) + ntail; 632 int size = nhead + (skb->end - skb->head) + ntail;
633#endif
636 long off; 634 long off;
637 635
638 if (skb_shared(skb)) 636 if (skb_shared(skb))
@@ -646,8 +644,14 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
646 644
647 /* Copy only real data... and, alas, header. This should be 645 /* Copy only real data... and, alas, header. This should be
648 * optimized for the cases when header is void. */ 646 * optimized for the cases when header is void. */
649 memcpy(data + nhead, skb->head, skb->tail - skb->head); 647 memcpy(data + nhead, skb->head,
650 memcpy(data + size, skb->end, sizeof(struct skb_shared_info)); 648#ifdef NET_SKBUFF_DATA_USES_OFFSET
649 skb->tail);
650#else
651 skb->tail - skb->head);
652#endif
653 memcpy(data + size, skb_end_pointer(skb),
654 sizeof(struct skb_shared_info));
651 655
652 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) 656 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
653 get_page(skb_shinfo(skb)->frags[i].page); 657 get_page(skb_shinfo(skb)->frags[i].page);
@@ -660,12 +664,18 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
660 off = (data + nhead) - skb->head; 664 off = (data + nhead) - skb->head;
661 665
662 skb->head = data; 666 skb->head = data;
663 skb->end = data + size;
664 skb->data += off; 667 skb->data += off;
665 skb->tail += off; 668#ifdef NET_SKBUFF_DATA_USES_OFFSET
666 skb->mac.raw += off; 669 skb->end = size;
667 skb->h.raw += off; 670 off = nhead;
668 skb->nh.raw += off; 671#else
672 skb->end = skb->head + size;
673#endif
674 /* {transport,network,mac}_header and tail are relative to skb->head */
675 skb->tail += off;
676 skb->transport_header += off;
677 skb->network_header += off;
678 skb->mac_header += off;
669 skb->cloned = 0; 679 skb->cloned = 0;
670 skb->nohdr = 0; 680 skb->nohdr = 0;
671 atomic_set(&skb_shinfo(skb)->dataref, 1); 681 atomic_set(&skb_shinfo(skb)->dataref, 1);
@@ -726,7 +736,9 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
726 */ 736 */
727 struct sk_buff *n = alloc_skb(newheadroom + skb->len + newtailroom, 737 struct sk_buff *n = alloc_skb(newheadroom + skb->len + newtailroom,
728 gfp_mask); 738 gfp_mask);
739 int oldheadroom = skb_headroom(skb);
729 int head_copy_len, head_copy_off; 740 int head_copy_len, head_copy_off;
741 int off = 0;
730 742
731 if (!n) 743 if (!n)
732 return NULL; 744 return NULL;
@@ -736,7 +748,7 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
736 /* Set the tail pointer and length */ 748 /* Set the tail pointer and length */
737 skb_put(n, skb->len); 749 skb_put(n, skb->len);
738 750
739 head_copy_len = skb_headroom(skb); 751 head_copy_len = oldheadroom;
740 head_copy_off = 0; 752 head_copy_off = 0;
741 if (newheadroom <= head_copy_len) 753 if (newheadroom <= head_copy_len)
742 head_copy_len = newheadroom; 754 head_copy_len = newheadroom;
@@ -750,6 +762,13 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
750 762
751 copy_skb_header(n, skb); 763 copy_skb_header(n, skb);
752 764
765#ifdef NET_SKBUFF_DATA_USES_OFFSET
766 off = newheadroom - oldheadroom;
767#endif
768 n->transport_header += off;
769 n->network_header += off;
770 n->mac_header += off;
771
753 return n; 772 return n;
754} 773}
755 774
@@ -877,7 +896,7 @@ done:
877 } else { 896 } else {
878 skb->len = len; 897 skb->len = len;
879 skb->data_len = 0; 898 skb->data_len = 0;
880 skb->tail = skb->data + len; 899 skb_set_tail_pointer(skb, len);
881 } 900 }
882 901
883 return 0; 902 return 0;
@@ -922,7 +941,7 @@ unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta)
922 return NULL; 941 return NULL;
923 } 942 }
924 943
925 if (skb_copy_bits(skb, skb_headlen(skb), skb->tail, delta)) 944 if (skb_copy_bits(skb, skb_headlen(skb), skb_tail_pointer(skb), delta))
926 BUG(); 945 BUG();
927 946
928 /* Optimization: no fragments, no reasons to preestimate 947 /* Optimization: no fragments, no reasons to preestimate
@@ -1018,7 +1037,7 @@ pull_pages:
1018 skb->tail += delta; 1037 skb->tail += delta;
1019 skb->data_len -= delta; 1038 skb->data_len -= delta;
1020 1039
1021 return skb->tail; 1040 return skb_tail_pointer(skb);
1022} 1041}
1023 1042
1024/* Copy some data bits from skb to kernel buffer. */ 1043/* Copy some data bits from skb to kernel buffer. */
@@ -1026,16 +1045,16 @@ pull_pages:
1026int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len) 1045int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
1027{ 1046{
1028 int i, copy; 1047 int i, copy;
1029 int start = skb_headlen(skb); 1048 int end = skb_headlen(skb);
1030 1049
1031 if (offset > (int)skb->len - len) 1050 if (offset > (int)skb->len - len)
1032 goto fault; 1051 goto fault;
1033 1052
1034 /* Copy header. */ 1053 /* Copy header. */
1035 if ((copy = start - offset) > 0) { 1054 if ((copy = end - offset) > 0) {
1036 if (copy > len) 1055 if (copy > len)
1037 copy = len; 1056 copy = len;
1038 memcpy(to, skb->data + offset, copy); 1057 skb_copy_from_linear_data_offset(skb, offset, to, copy);
1039 if ((len -= copy) == 0) 1058 if ((len -= copy) == 0)
1040 return 0; 1059 return 0;
1041 offset += copy; 1060 offset += copy;
@@ -1043,11 +1062,9 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
1043 } 1062 }
1044 1063
1045 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 1064 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1046 int end; 1065 BUG_TRAP(len >= 0);
1047 1066
1048 BUG_TRAP(start <= offset + len); 1067 end = offset + skb_shinfo(skb)->frags[i].size;
1049
1050 end = start + skb_shinfo(skb)->frags[i].size;
1051 if ((copy = end - offset) > 0) { 1068 if ((copy = end - offset) > 0) {
1052 u8 *vaddr; 1069 u8 *vaddr;
1053 1070
@@ -1056,8 +1073,8 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
1056 1073
1057 vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]); 1074 vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]);
1058 memcpy(to, 1075 memcpy(to,
1059 vaddr + skb_shinfo(skb)->frags[i].page_offset+ 1076 vaddr + skb_shinfo(skb)->frags[i].page_offset,
1060 offset - start, copy); 1077 copy);
1061 kunmap_skb_frag(vaddr); 1078 kunmap_skb_frag(vaddr);
1062 1079
1063 if ((len -= copy) == 0) 1080 if ((len -= copy) == 0)
@@ -1065,30 +1082,25 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
1065 offset += copy; 1082 offset += copy;
1066 to += copy; 1083 to += copy;
1067 } 1084 }
1068 start = end;
1069 } 1085 }
1070 1086
1071 if (skb_shinfo(skb)->frag_list) { 1087 if (skb_shinfo(skb)->frag_list) {
1072 struct sk_buff *list = skb_shinfo(skb)->frag_list; 1088 struct sk_buff *list = skb_shinfo(skb)->frag_list;
1073 1089
1074 for (; list; list = list->next) { 1090 for (; list; list = list->next) {
1075 int end; 1091 BUG_TRAP(len >= 0);
1076
1077 BUG_TRAP(start <= offset + len);
1078 1092
1079 end = start + list->len; 1093 end = offset + list->len;
1080 if ((copy = end - offset) > 0) { 1094 if ((copy = end - offset) > 0) {
1081 if (copy > len) 1095 if (copy > len)
1082 copy = len; 1096 copy = len;
1083 if (skb_copy_bits(list, offset - start, 1097 if (skb_copy_bits(list, 0, to, copy))
1084 to, copy))
1085 goto fault; 1098 goto fault;
1086 if ((len -= copy) == 0) 1099 if ((len -= copy) == 0)
1087 return 0; 1100 return 0;
1088 offset += copy; 1101 offset += copy;
1089 to += copy; 1102 to += copy;
1090 } 1103 }
1091 start = end;
1092 } 1104 }
1093 } 1105 }
1094 if (!len) 1106 if (!len)
@@ -1110,18 +1122,18 @@ fault:
1110 * traversing fragment lists and such. 1122 * traversing fragment lists and such.
1111 */ 1123 */
1112 1124
1113int skb_store_bits(const struct sk_buff *skb, int offset, void *from, int len) 1125int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len)
1114{ 1126{
1115 int i, copy; 1127 int i, copy;
1116 int start = skb_headlen(skb); 1128 int end = skb_headlen(skb);
1117 1129
1118 if (offset > (int)skb->len - len) 1130 if (offset > (int)skb->len - len)
1119 goto fault; 1131 goto fault;
1120 1132
1121 if ((copy = start - offset) > 0) { 1133 if ((copy = end - offset) > 0) {
1122 if (copy > len) 1134 if (copy > len)
1123 copy = len; 1135 copy = len;
1124 memcpy(skb->data + offset, from, copy); 1136 skb_copy_to_linear_data_offset(skb, offset, from, copy);
1125 if ((len -= copy) == 0) 1137 if ((len -= copy) == 0)
1126 return 0; 1138 return 0;
1127 offset += copy; 1139 offset += copy;
@@ -1130,11 +1142,9 @@ int skb_store_bits(const struct sk_buff *skb, int offset, void *from, int len)
1130 1142
1131 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 1143 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1132 skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 1144 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
1133 int end; 1145 BUG_TRAP(len >= 0);
1134 1146
1135 BUG_TRAP(start <= offset + len); 1147 end = offset + frag->size;
1136
1137 end = start + frag->size;
1138 if ((copy = end - offset) > 0) { 1148 if ((copy = end - offset) > 0) {
1139 u8 *vaddr; 1149 u8 *vaddr;
1140 1150
@@ -1142,8 +1152,7 @@ int skb_store_bits(const struct sk_buff *skb, int offset, void *from, int len)
1142 copy = len; 1152 copy = len;
1143 1153
1144 vaddr = kmap_skb_frag(frag); 1154 vaddr = kmap_skb_frag(frag);
1145 memcpy(vaddr + frag->page_offset + offset - start, 1155 memcpy(vaddr + frag->page_offset, from, copy);
1146 from, copy);
1147 kunmap_skb_frag(vaddr); 1156 kunmap_skb_frag(vaddr);
1148 1157
1149 if ((len -= copy) == 0) 1158 if ((len -= copy) == 0)
@@ -1151,30 +1160,25 @@ int skb_store_bits(const struct sk_buff *skb, int offset, void *from, int len)
1151 offset += copy; 1160 offset += copy;
1152 from += copy; 1161 from += copy;
1153 } 1162 }
1154 start = end;
1155 } 1163 }
1156 1164
1157 if (skb_shinfo(skb)->frag_list) { 1165 if (skb_shinfo(skb)->frag_list) {
1158 struct sk_buff *list = skb_shinfo(skb)->frag_list; 1166 struct sk_buff *list = skb_shinfo(skb)->frag_list;
1159 1167
1160 for (; list; list = list->next) { 1168 for (; list; list = list->next) {
1161 int end; 1169 BUG_TRAP(len >= 0);
1162
1163 BUG_TRAP(start <= offset + len);
1164 1170
1165 end = start + list->len; 1171 end = offset + list->len;
1166 if ((copy = end - offset) > 0) { 1172 if ((copy = end - offset) > 0) {
1167 if (copy > len) 1173 if (copy > len)
1168 copy = len; 1174 copy = len;
1169 if (skb_store_bits(list, offset - start, 1175 if (skb_store_bits(list, 0, from, copy))
1170 from, copy))
1171 goto fault; 1176 goto fault;
1172 if ((len -= copy) == 0) 1177 if ((len -= copy) == 0)
1173 return 0; 1178 return 0;
1174 offset += copy; 1179 offset += copy;
1175 from += copy; 1180 from += copy;
1176 } 1181 }
1177 start = end;
1178 } 1182 }
1179 } 1183 }
1180 if (!len) 1184 if (!len)
@@ -1191,8 +1195,8 @@ EXPORT_SYMBOL(skb_store_bits);
1191__wsum skb_checksum(const struct sk_buff *skb, int offset, 1195__wsum skb_checksum(const struct sk_buff *skb, int offset,
1192 int len, __wsum csum) 1196 int len, __wsum csum)
1193{ 1197{
1194 int start = skb_headlen(skb); 1198 int end = skb_headlen(skb);
1195 int i, copy = start - offset; 1199 int i, copy = end - offset;
1196 int pos = 0; 1200 int pos = 0;
1197 1201
1198 /* Checksum header. */ 1202 /* Checksum header. */
@@ -1207,11 +1211,9 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset,
1207 } 1211 }
1208 1212
1209 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 1213 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1210 int end; 1214 BUG_TRAP(len >= 0);
1211 1215
1212 BUG_TRAP(start <= offset + len); 1216 end = offset + skb_shinfo(skb)->frags[i].size;
1213
1214 end = start + skb_shinfo(skb)->frags[i].size;
1215 if ((copy = end - offset) > 0) { 1217 if ((copy = end - offset) > 0) {
1216 __wsum csum2; 1218 __wsum csum2;
1217 u8 *vaddr; 1219 u8 *vaddr;
@@ -1220,8 +1222,8 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset,
1220 if (copy > len) 1222 if (copy > len)
1221 copy = len; 1223 copy = len;
1222 vaddr = kmap_skb_frag(frag); 1224 vaddr = kmap_skb_frag(frag);
1223 csum2 = csum_partial(vaddr + frag->page_offset + 1225 csum2 = csum_partial(vaddr + frag->page_offset,
1224 offset - start, copy, 0); 1226 copy, 0);
1225 kunmap_skb_frag(vaddr); 1227 kunmap_skb_frag(vaddr);
1226 csum = csum_block_add(csum, csum2, pos); 1228 csum = csum_block_add(csum, csum2, pos);
1227 if (!(len -= copy)) 1229 if (!(len -= copy))
@@ -1229,31 +1231,26 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset,
1229 offset += copy; 1231 offset += copy;
1230 pos += copy; 1232 pos += copy;
1231 } 1233 }
1232 start = end;
1233 } 1234 }
1234 1235
1235 if (skb_shinfo(skb)->frag_list) { 1236 if (skb_shinfo(skb)->frag_list) {
1236 struct sk_buff *list = skb_shinfo(skb)->frag_list; 1237 struct sk_buff *list = skb_shinfo(skb)->frag_list;
1237 1238
1238 for (; list; list = list->next) { 1239 for (; list; list = list->next) {
1239 int end; 1240 BUG_TRAP(len >= 0);
1240
1241 BUG_TRAP(start <= offset + len);
1242 1241
1243 end = start + list->len; 1242 end = offset + list->len;
1244 if ((copy = end - offset) > 0) { 1243 if ((copy = end - offset) > 0) {
1245 __wsum csum2; 1244 __wsum csum2;
1246 if (copy > len) 1245 if (copy > len)
1247 copy = len; 1246 copy = len;
1248 csum2 = skb_checksum(list, offset - start, 1247 csum2 = skb_checksum(list, 0, copy, 0);
1249 copy, 0);
1250 csum = csum_block_add(csum, csum2, pos); 1248 csum = csum_block_add(csum, csum2, pos);
1251 if ((len -= copy) == 0) 1249 if ((len -= copy) == 0)
1252 return csum; 1250 return csum;
1253 offset += copy; 1251 offset += copy;
1254 pos += copy; 1252 pos += copy;
1255 } 1253 }
1256 start = end;
1257 } 1254 }
1258 } 1255 }
1259 BUG_ON(len); 1256 BUG_ON(len);
@@ -1266,8 +1263,8 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset,
1266__wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, 1263__wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
1267 u8 *to, int len, __wsum csum) 1264 u8 *to, int len, __wsum csum)
1268{ 1265{
1269 int start = skb_headlen(skb); 1266 int end = skb_headlen(skb);
1270 int i, copy = start - offset; 1267 int i, copy = end - offset;
1271 int pos = 0; 1268 int pos = 0;
1272 1269
1273 /* Copy header. */ 1270 /* Copy header. */
@@ -1284,11 +1281,9 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
1284 } 1281 }
1285 1282
1286 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 1283 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1287 int end; 1284 BUG_TRAP(len >= 0);
1288 1285
1289 BUG_TRAP(start <= offset + len); 1286 end = offset + skb_shinfo(skb)->frags[i].size;
1290
1291 end = start + skb_shinfo(skb)->frags[i].size;
1292 if ((copy = end - offset) > 0) { 1287 if ((copy = end - offset) > 0) {
1293 __wsum csum2; 1288 __wsum csum2;
1294 u8 *vaddr; 1289 u8 *vaddr;
@@ -1298,9 +1293,8 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
1298 copy = len; 1293 copy = len;
1299 vaddr = kmap_skb_frag(frag); 1294 vaddr = kmap_skb_frag(frag);
1300 csum2 = csum_partial_copy_nocheck(vaddr + 1295 csum2 = csum_partial_copy_nocheck(vaddr +
1301 frag->page_offset + 1296 frag->page_offset,
1302 offset - start, to, 1297 to, copy, 0);
1303 copy, 0);
1304 kunmap_skb_frag(vaddr); 1298 kunmap_skb_frag(vaddr);
1305 csum = csum_block_add(csum, csum2, pos); 1299 csum = csum_block_add(csum, csum2, pos);
1306 if (!(len -= copy)) 1300 if (!(len -= copy))
@@ -1309,7 +1303,6 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
1309 to += copy; 1303 to += copy;
1310 pos += copy; 1304 pos += copy;
1311 } 1305 }
1312 start = end;
1313 } 1306 }
1314 1307
1315 if (skb_shinfo(skb)->frag_list) { 1308 if (skb_shinfo(skb)->frag_list) {
@@ -1317,16 +1310,13 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
1317 1310
1318 for (; list; list = list->next) { 1311 for (; list; list = list->next) {
1319 __wsum csum2; 1312 __wsum csum2;
1320 int end; 1313 BUG_TRAP(len >= 0);
1321
1322 BUG_TRAP(start <= offset + len);
1323 1314
1324 end = start + list->len; 1315 end = offset + list->len;
1325 if ((copy = end - offset) > 0) { 1316 if ((copy = end - offset) > 0) {
1326 if (copy > len) 1317 if (copy > len)
1327 copy = len; 1318 copy = len;
1328 csum2 = skb_copy_and_csum_bits(list, 1319 csum2 = skb_copy_and_csum_bits(list, 0,
1329 offset - start,
1330 to, copy, 0); 1320 to, copy, 0);
1331 csum = csum_block_add(csum, csum2, pos); 1321 csum = csum_block_add(csum, csum2, pos);
1332 if ((len -= copy) == 0) 1322 if ((len -= copy) == 0)
@@ -1335,7 +1325,6 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
1335 to += copy; 1325 to += copy;
1336 pos += copy; 1326 pos += copy;
1337 } 1327 }
1338 start = end;
1339 } 1328 }
1340 } 1329 }
1341 BUG_ON(len); 1330 BUG_ON(len);
@@ -1348,13 +1337,13 @@ void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
1348 long csstart; 1337 long csstart;
1349 1338
1350 if (skb->ip_summed == CHECKSUM_PARTIAL) 1339 if (skb->ip_summed == CHECKSUM_PARTIAL)
1351 csstart = skb->h.raw - skb->data; 1340 csstart = skb->csum_start - skb_headroom(skb);
1352 else 1341 else
1353 csstart = skb_headlen(skb); 1342 csstart = skb_headlen(skb);
1354 1343
1355 BUG_ON(csstart > skb_headlen(skb)); 1344 BUG_ON(csstart > skb_headlen(skb));
1356 1345
1357 memcpy(to, skb->data, csstart); 1346 skb_copy_from_linear_data(skb, to, csstart);
1358 1347
1359 csum = 0; 1348 csum = 0;
1360 if (csstart != skb->len) 1349 if (csstart != skb->len)
@@ -1522,27 +1511,14 @@ void skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head
1522 spin_unlock_irqrestore(&list->lock, flags); 1511 spin_unlock_irqrestore(&list->lock, flags);
1523} 1512}
1524 1513
1525#if 0
1526/*
1527 * Tune the memory allocator for a new MTU size.
1528 */
1529void skb_add_mtu(int mtu)
1530{
1531 /* Must match allocation in alloc_skb */
1532 mtu = SKB_DATA_ALIGN(mtu) + sizeof(struct skb_shared_info);
1533
1534 kmem_add_cache_size(mtu);
1535}
1536#endif
1537
1538static inline void skb_split_inside_header(struct sk_buff *skb, 1514static inline void skb_split_inside_header(struct sk_buff *skb,
1539 struct sk_buff* skb1, 1515 struct sk_buff* skb1,
1540 const u32 len, const int pos) 1516 const u32 len, const int pos)
1541{ 1517{
1542 int i; 1518 int i;
1543 1519
1544 memcpy(skb_put(skb1, pos - len), skb->data + len, pos - len); 1520 skb_copy_from_linear_data_offset(skb, len, skb_put(skb1, pos - len),
1545 1521 pos - len);
1546 /* And move data appendix as is. */ 1522 /* And move data appendix as is. */
1547 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) 1523 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1548 skb_shinfo(skb1)->frags[i] = skb_shinfo(skb)->frags[i]; 1524 skb_shinfo(skb1)->frags[i] = skb_shinfo(skb)->frags[i];
@@ -1553,7 +1529,7 @@ static inline void skb_split_inside_header(struct sk_buff *skb,
1553 skb1->len += skb1->data_len; 1529 skb1->len += skb1->data_len;
1554 skb->data_len = 0; 1530 skb->data_len = 0;
1555 skb->len = len; 1531 skb->len = len;
1556 skb->tail = skb->data + len; 1532 skb_set_tail_pointer(skb, len);
1557} 1533}
1558 1534
1559static inline void skb_split_no_header(struct sk_buff *skb, 1535static inline void skb_split_no_header(struct sk_buff *skb,
@@ -1878,7 +1854,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
1878 struct sk_buff *segs = NULL; 1854 struct sk_buff *segs = NULL;
1879 struct sk_buff *tail = NULL; 1855 struct sk_buff *tail = NULL;
1880 unsigned int mss = skb_shinfo(skb)->gso_size; 1856 unsigned int mss = skb_shinfo(skb)->gso_size;
1881 unsigned int doffset = skb->data - skb->mac.raw; 1857 unsigned int doffset = skb->data - skb_mac_header(skb);
1882 unsigned int offset = doffset; 1858 unsigned int offset = doffset;
1883 unsigned int headroom; 1859 unsigned int headroom;
1884 unsigned int len; 1860 unsigned int len;
@@ -1928,11 +1904,12 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
1928 nskb->mac_len = skb->mac_len; 1904 nskb->mac_len = skb->mac_len;
1929 1905
1930 skb_reserve(nskb, headroom); 1906 skb_reserve(nskb, headroom);
1931 nskb->mac.raw = nskb->data; 1907 skb_reset_mac_header(nskb);
1932 nskb->nh.raw = nskb->data + skb->mac_len; 1908 skb_set_network_header(nskb, skb->mac_len);
1933 nskb->h.raw = nskb->nh.raw + (skb->h.raw - skb->nh.raw); 1909 nskb->transport_header = (nskb->network_header +
1934 memcpy(skb_put(nskb, doffset), skb->data, doffset); 1910 skb_network_header_len(skb));
1935 1911 skb_copy_from_linear_data(skb, skb_put(nskb, doffset),
1912 doffset);
1936 if (!sg) { 1913 if (!sg) {
1937 nskb->csum = skb_copy_and_csum_bits(skb, offset, 1914 nskb->csum = skb_copy_and_csum_bits(skb, offset,
1938 skb_put(nskb, len), 1915 skb_put(nskb, len),
@@ -1945,7 +1922,8 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
1945 1922
1946 nskb->ip_summed = CHECKSUM_PARTIAL; 1923 nskb->ip_summed = CHECKSUM_PARTIAL;
1947 nskb->csum = skb->csum; 1924 nskb->csum = skb->csum;
1948 memcpy(skb_put(nskb, hsize), skb->data + offset, hsize); 1925 skb_copy_from_linear_data_offset(skb, offset,
1926 skb_put(nskb, hsize), hsize);
1949 1927
1950 while (pos < offset + len) { 1928 while (pos < offset + len) {
1951 BUG_ON(i >= nfrags); 1929 BUG_ON(i >= nfrags);
@@ -2005,6 +1983,184 @@ void __init skb_init(void)
2005 NULL, NULL); 1983 NULL, NULL);
2006} 1984}
2007 1985
1986/**
1987 * skb_to_sgvec - Fill a scatter-gather list from a socket buffer
1988 * @skb: Socket buffer containing the buffers to be mapped
1989 * @sg: The scatter-gather list to map into
1990 * @offset: The offset into the buffer's contents to start mapping
1991 * @len: Length of buffer space to be mapped
1992 *
1993 * Fill the specified scatter-gather list with mappings/pointers into a
1994 * region of the buffer space attached to a socket buffer.
1995 */
1996int
1997skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
1998{
1999 int end = skb_headlen(skb);
2000 int i, copy = end - offset;
2001 int elt = 0;
2002
2003 if (copy > 0) {
2004 if (copy > len)
2005 copy = len;
2006 sg[elt].page = virt_to_page(skb->data + offset);
2007 sg[elt].offset = (unsigned long)(skb->data + offset) % PAGE_SIZE;
2008 sg[elt].length = copy;
2009 elt++;
2010 if ((len -= copy) == 0)
2011 return elt;
2012 offset += copy;
2013 }
2014
2015 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
2016 BUG_TRAP(len >= 0);
2017
2018 end = offset + skb_shinfo(skb)->frags[i].size;
2019 if ((copy = end - offset) > 0) {
2020 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2021
2022 if (copy > len)
2023 copy = len;
2024 sg[elt].page = frag->page;
2025 sg[elt].offset = frag->page_offset;
2026 sg[elt].length = copy;
2027 elt++;
2028 if (!(len -= copy))
2029 return elt;
2030 offset += copy;
2031 }
2032 }
2033
2034 if (skb_shinfo(skb)->frag_list) {
2035 struct sk_buff *list = skb_shinfo(skb)->frag_list;
2036
2037 for (; list; list = list->next) {
2038 BUG_TRAP(len >= 0);
2039
2040 end = offset + list->len;
2041 if ((copy = end - offset) > 0) {
2042 if (copy > len)
2043 copy = len;
2044 elt += skb_to_sgvec(list, sg+elt, 0, copy);
2045 if ((len -= copy) == 0)
2046 return elt;
2047 offset += copy;
2048 }
2049 }
2050 }
2051 BUG_ON(len);
2052 return elt;
2053}
2054
2055/**
2056 * skb_cow_data - Check that a socket buffer's data buffers are writable
2057 * @skb: The socket buffer to check.
2058 * @tailbits: Amount of trailing space to be added
2059 * @trailer: Returned pointer to the skb where the @tailbits space begins
2060 *
2061 * Make sure that the data buffers attached to a socket buffer are
2062 * writable. If they are not, private copies are made of the data buffers
2063 * and the socket buffer is set to use these instead.
2064 *
2065 * If @tailbits is given, make sure that there is space to write @tailbits
2066 * bytes of data beyond current end of socket buffer. @trailer will be
2067 * set to point to the skb in which this space begins.
2068 *
2069 * The number of scatterlist elements required to completely map the
2070 * COW'd and extended socket buffer will be returned.
2071 */
2072int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
2073{
2074 int copyflag;
2075 int elt;
2076 struct sk_buff *skb1, **skb_p;
2077
2078 /* If skb is cloned or its head is paged, reallocate
2079 * head pulling out all the pages (pages are considered not writable
2080 * at the moment even if they are anonymous).
2081 */
2082 if ((skb_cloned(skb) || skb_shinfo(skb)->nr_frags) &&
2083 __pskb_pull_tail(skb, skb_pagelen(skb)-skb_headlen(skb)) == NULL)
2084 return -ENOMEM;
2085
2086 /* Easy case. Most of packets will go this way. */
2087 if (!skb_shinfo(skb)->frag_list) {
2088 /* A little of trouble, not enough of space for trailer.
2089 * This should not happen, when stack is tuned to generate
2090 * good frames. OK, on miss we reallocate and reserve even more
2091 * space, 128 bytes is fair. */
2092
2093 if (skb_tailroom(skb) < tailbits &&
2094 pskb_expand_head(skb, 0, tailbits-skb_tailroom(skb)+128, GFP_ATOMIC))
2095 return -ENOMEM;
2096
2097 /* Voila! */
2098 *trailer = skb;
2099 return 1;
2100 }
2101
2102 /* Misery. We are in troubles, going to mincer fragments... */
2103
2104 elt = 1;
2105 skb_p = &skb_shinfo(skb)->frag_list;
2106 copyflag = 0;
2107
2108 while ((skb1 = *skb_p) != NULL) {
2109 int ntail = 0;
2110
2111 /* The fragment is partially pulled by someone,
2112 * this can happen on input. Copy it and everything
2113 * after it. */
2114
2115 if (skb_shared(skb1))
2116 copyflag = 1;
2117
2118 /* If the skb is the last, worry about trailer. */
2119
2120 if (skb1->next == NULL && tailbits) {
2121 if (skb_shinfo(skb1)->nr_frags ||
2122 skb_shinfo(skb1)->frag_list ||
2123 skb_tailroom(skb1) < tailbits)
2124 ntail = tailbits + 128;
2125 }
2126
2127 if (copyflag ||
2128 skb_cloned(skb1) ||
2129 ntail ||
2130 skb_shinfo(skb1)->nr_frags ||
2131 skb_shinfo(skb1)->frag_list) {
2132 struct sk_buff *skb2;
2133
2134 /* Fuck, we are miserable poor guys... */
2135 if (ntail == 0)
2136 skb2 = skb_copy(skb1, GFP_ATOMIC);
2137 else
2138 skb2 = skb_copy_expand(skb1,
2139 skb_headroom(skb1),
2140 ntail,
2141 GFP_ATOMIC);
2142 if (unlikely(skb2 == NULL))
2143 return -ENOMEM;
2144
2145 if (skb1->sk)
2146 skb_set_owner_w(skb2, skb1->sk);
2147
2148 /* Looking around. Are we still alive?
2149 * OK, link new skb, drop old one */
2150
2151 skb2->next = skb1->next;
2152 *skb_p = skb2;
2153 kfree_skb(skb1);
2154 skb1 = skb2;
2155 }
2156 elt++;
2157 *trailer = skb1;
2158 skb_p = &skb1->next;
2159 }
2160
2161 return elt;
2162}
2163
2008EXPORT_SYMBOL(___pskb_trim); 2164EXPORT_SYMBOL(___pskb_trim);
2009EXPORT_SYMBOL(__kfree_skb); 2165EXPORT_SYMBOL(__kfree_skb);
2010EXPORT_SYMBOL(kfree_skb); 2166EXPORT_SYMBOL(kfree_skb);
@@ -2039,3 +2195,6 @@ EXPORT_SYMBOL(skb_seq_read);
2039EXPORT_SYMBOL(skb_abort_seq_read); 2195EXPORT_SYMBOL(skb_abort_seq_read);
2040EXPORT_SYMBOL(skb_find_text); 2196EXPORT_SYMBOL(skb_find_text);
2041EXPORT_SYMBOL(skb_append_datato_frags); 2197EXPORT_SYMBOL(skb_append_datato_frags);
2198
2199EXPORT_SYMBOL_GPL(skb_to_sgvec);
2200EXPORT_SYMBOL_GPL(skb_cow_data);
diff --git a/net/core/sock.c b/net/core/sock.c
index 27c4f62382bd..22183c2ef284 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -154,7 +154,8 @@ static const char *af_family_key_strings[AF_MAX+1] = {
154 "sk_lock-21" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" , 154 "sk_lock-21" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" ,
155 "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" , 155 "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" ,
156 "sk_lock-27" , "sk_lock-28" , "sk_lock-29" , 156 "sk_lock-27" , "sk_lock-28" , "sk_lock-29" ,
157 "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-AF_MAX" 157 "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" ,
158 "sk_lock-AF_RXRPC" , "sk_lock-AF_MAX"
158}; 159};
159static const char *af_family_slock_key_strings[AF_MAX+1] = { 160static const char *af_family_slock_key_strings[AF_MAX+1] = {
160 "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" , 161 "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" ,
@@ -167,7 +168,8 @@ static const char *af_family_slock_key_strings[AF_MAX+1] = {
167 "slock-21" , "slock-AF_SNA" , "slock-AF_IRDA" , 168 "slock-21" , "slock-AF_SNA" , "slock-AF_IRDA" ,
168 "slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" , 169 "slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" ,
169 "slock-27" , "slock-28" , "slock-29" , 170 "slock-27" , "slock-28" , "slock-29" ,
170 "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_MAX" 171 "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" ,
172 "slock-AF_RXRPC" , "slock-AF_MAX"
171}; 173};
172#endif 174#endif
173 175
@@ -361,8 +363,8 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
361 } 363 }
362#endif 364#endif
363 365
364 if(optlen<sizeof(int)) 366 if (optlen < sizeof(int))
365 return(-EINVAL); 367 return -EINVAL;
366 368
367 if (get_user(val, (int __user *)optval)) 369 if (get_user(val, (int __user *)optval))
368 return -EFAULT; 370 return -EFAULT;
@@ -371,265 +373,270 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
371 373
372 lock_sock(sk); 374 lock_sock(sk);
373 375
374 switch(optname) 376 switch(optname) {
375 { 377 case SO_DEBUG:
376 case SO_DEBUG: 378 if (val && !capable(CAP_NET_ADMIN)) {
377 if(val && !capable(CAP_NET_ADMIN)) 379 ret = -EACCES;
378 { 380 }
379 ret = -EACCES; 381 else if (valbool)
380 } 382 sock_set_flag(sk, SOCK_DBG);
381 else if (valbool) 383 else
382 sock_set_flag(sk, SOCK_DBG); 384 sock_reset_flag(sk, SOCK_DBG);
383 else 385 break;
384 sock_reset_flag(sk, SOCK_DBG); 386 case SO_REUSEADDR:
385 break; 387 sk->sk_reuse = valbool;
386 case SO_REUSEADDR: 388 break;
387 sk->sk_reuse = valbool; 389 case SO_TYPE:
388 break; 390 case SO_ERROR:
389 case SO_TYPE: 391 ret = -ENOPROTOOPT;
390 case SO_ERROR: 392 break;
391 ret = -ENOPROTOOPT; 393 case SO_DONTROUTE:
392 break; 394 if (valbool)
393 case SO_DONTROUTE: 395 sock_set_flag(sk, SOCK_LOCALROUTE);
394 if (valbool) 396 else
395 sock_set_flag(sk, SOCK_LOCALROUTE); 397 sock_reset_flag(sk, SOCK_LOCALROUTE);
396 else 398 break;
397 sock_reset_flag(sk, SOCK_LOCALROUTE); 399 case SO_BROADCAST:
398 break; 400 sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
399 case SO_BROADCAST: 401 break;
400 sock_valbool_flag(sk, SOCK_BROADCAST, valbool); 402 case SO_SNDBUF:
401 break; 403 /* Don't error on this BSD doesn't and if you think
402 case SO_SNDBUF: 404 about it this is right. Otherwise apps have to
403 /* Don't error on this BSD doesn't and if you think 405 play 'guess the biggest size' games. RCVBUF/SNDBUF
404 about it this is right. Otherwise apps have to 406 are treated in BSD as hints */
405 play 'guess the biggest size' games. RCVBUF/SNDBUF 407
406 are treated in BSD as hints */ 408 if (val > sysctl_wmem_max)
407 409 val = sysctl_wmem_max;
408 if (val > sysctl_wmem_max)
409 val = sysctl_wmem_max;
410set_sndbuf: 410set_sndbuf:
411 sk->sk_userlocks |= SOCK_SNDBUF_LOCK; 411 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
412 if ((val * 2) < SOCK_MIN_SNDBUF) 412 if ((val * 2) < SOCK_MIN_SNDBUF)
413 sk->sk_sndbuf = SOCK_MIN_SNDBUF; 413 sk->sk_sndbuf = SOCK_MIN_SNDBUF;
414 else 414 else
415 sk->sk_sndbuf = val * 2; 415 sk->sk_sndbuf = val * 2;
416 416
417 /* 417 /*
418 * Wake up sending tasks if we 418 * Wake up sending tasks if we
419 * upped the value. 419 * upped the value.
420 */ 420 */
421 sk->sk_write_space(sk); 421 sk->sk_write_space(sk);
422 break; 422 break;
423 423
424 case SO_SNDBUFFORCE: 424 case SO_SNDBUFFORCE:
425 if (!capable(CAP_NET_ADMIN)) { 425 if (!capable(CAP_NET_ADMIN)) {
426 ret = -EPERM; 426 ret = -EPERM;
427 break; 427 break;
428 } 428 }
429 goto set_sndbuf; 429 goto set_sndbuf;
430 430
431 case SO_RCVBUF: 431 case SO_RCVBUF:
432 /* Don't error on this BSD doesn't and if you think 432 /* Don't error on this BSD doesn't and if you think
433 about it this is right. Otherwise apps have to 433 about it this is right. Otherwise apps have to
434 play 'guess the biggest size' games. RCVBUF/SNDBUF 434 play 'guess the biggest size' games. RCVBUF/SNDBUF
435 are treated in BSD as hints */ 435 are treated in BSD as hints */
436 436
437 if (val > sysctl_rmem_max) 437 if (val > sysctl_rmem_max)
438 val = sysctl_rmem_max; 438 val = sysctl_rmem_max;
439set_rcvbuf: 439set_rcvbuf:
440 sk->sk_userlocks |= SOCK_RCVBUF_LOCK; 440 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
441 /* 441 /*
442 * We double it on the way in to account for 442 * We double it on the way in to account for
443 * "struct sk_buff" etc. overhead. Applications 443 * "struct sk_buff" etc. overhead. Applications
444 * assume that the SO_RCVBUF setting they make will 444 * assume that the SO_RCVBUF setting they make will
445 * allow that much actual data to be received on that 445 * allow that much actual data to be received on that
446 * socket. 446 * socket.
447 * 447 *
448 * Applications are unaware that "struct sk_buff" and 448 * Applications are unaware that "struct sk_buff" and
449 * other overheads allocate from the receive buffer 449 * other overheads allocate from the receive buffer
450 * during socket buffer allocation. 450 * during socket buffer allocation.
451 * 451 *
452 * And after considering the possible alternatives, 452 * And after considering the possible alternatives,
453 * returning the value we actually used in getsockopt 453 * returning the value we actually used in getsockopt
454 * is the most desirable behavior. 454 * is the most desirable behavior.
455 */ 455 */
456 if ((val * 2) < SOCK_MIN_RCVBUF) 456 if ((val * 2) < SOCK_MIN_RCVBUF)
457 sk->sk_rcvbuf = SOCK_MIN_RCVBUF; 457 sk->sk_rcvbuf = SOCK_MIN_RCVBUF;
458 else 458 else
459 sk->sk_rcvbuf = val * 2; 459 sk->sk_rcvbuf = val * 2;
460 break;
461
462 case SO_RCVBUFFORCE:
463 if (!capable(CAP_NET_ADMIN)) {
464 ret = -EPERM;
460 break; 465 break;
466 }
467 goto set_rcvbuf;
461 468
462 case SO_RCVBUFFORCE: 469 case SO_KEEPALIVE:
463 if (!capable(CAP_NET_ADMIN)) {
464 ret = -EPERM;
465 break;
466 }
467 goto set_rcvbuf;
468
469 case SO_KEEPALIVE:
470#ifdef CONFIG_INET 470#ifdef CONFIG_INET
471 if (sk->sk_protocol == IPPROTO_TCP) 471 if (sk->sk_protocol == IPPROTO_TCP)
472 tcp_set_keepalive(sk, valbool); 472 tcp_set_keepalive(sk, valbool);
473#endif 473#endif
474 sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool); 474 sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
475 break; 475 break;
476 476
477 case SO_OOBINLINE: 477 case SO_OOBINLINE:
478 sock_valbool_flag(sk, SOCK_URGINLINE, valbool); 478 sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
479 break;
480
481 case SO_NO_CHECK:
482 sk->sk_no_check = valbool;
483 break;
484
485 case SO_PRIORITY:
486 if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN))
487 sk->sk_priority = val;
488 else
489 ret = -EPERM;
490 break;
491
492 case SO_LINGER:
493 if (optlen < sizeof(ling)) {
494 ret = -EINVAL; /* 1003.1g */
479 break; 495 break;
480 496 }
481 case SO_NO_CHECK: 497 if (copy_from_user(&ling,optval,sizeof(ling))) {
482 sk->sk_no_check = valbool; 498 ret = -EFAULT;
483 break;
484
485 case SO_PRIORITY:
486 if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN))
487 sk->sk_priority = val;
488 else
489 ret = -EPERM;
490 break; 499 break;
491 500 }
492 case SO_LINGER: 501 if (!ling.l_onoff)
493 if(optlen<sizeof(ling)) { 502 sock_reset_flag(sk, SOCK_LINGER);
494 ret = -EINVAL; /* 1003.1g */ 503 else {
495 break;
496 }
497 if (copy_from_user(&ling,optval,sizeof(ling))) {
498 ret = -EFAULT;
499 break;
500 }
501 if (!ling.l_onoff)
502 sock_reset_flag(sk, SOCK_LINGER);
503 else {
504#if (BITS_PER_LONG == 32) 504#if (BITS_PER_LONG == 32)
505 if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ) 505 if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
506 sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT; 506 sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
507 else 507 else
508#endif 508#endif
509 sk->sk_lingertime = (unsigned int)ling.l_linger * HZ; 509 sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
510 sock_set_flag(sk, SOCK_LINGER); 510 sock_set_flag(sk, SOCK_LINGER);
511 } 511 }
512 break; 512 break;
513 513
514 case SO_BSDCOMPAT: 514 case SO_BSDCOMPAT:
515 sock_warn_obsolete_bsdism("setsockopt"); 515 sock_warn_obsolete_bsdism("setsockopt");
516 break; 516 break;
517 517
518 case SO_PASSCRED: 518 case SO_PASSCRED:
519 if (valbool) 519 if (valbool)
520 set_bit(SOCK_PASSCRED, &sock->flags); 520 set_bit(SOCK_PASSCRED, &sock->flags);
521 else
522 clear_bit(SOCK_PASSCRED, &sock->flags);
523 break;
524
525 case SO_TIMESTAMP:
526 case SO_TIMESTAMPNS:
527 if (valbool) {
528 if (optname == SO_TIMESTAMP)
529 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
521 else 530 else
522 clear_bit(SOCK_PASSCRED, &sock->flags); 531 sock_set_flag(sk, SOCK_RCVTSTAMPNS);
523 break; 532 sock_set_flag(sk, SOCK_RCVTSTAMP);
533 sock_enable_timestamp(sk);
534 } else {
535 sock_reset_flag(sk, SOCK_RCVTSTAMP);
536 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
537 }
538 break;
524 539
525 case SO_TIMESTAMP: 540 case SO_RCVLOWAT:
526 if (valbool) { 541 if (val < 0)
527 sock_set_flag(sk, SOCK_RCVTSTAMP); 542 val = INT_MAX;
528 sock_enable_timestamp(sk); 543 sk->sk_rcvlowat = val ? : 1;
529 } else 544 break;
530 sock_reset_flag(sk, SOCK_RCVTSTAMP);
531 break;
532 545
533 case SO_RCVLOWAT: 546 case SO_RCVTIMEO:
534 if (val < 0) 547 ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
535 val = INT_MAX; 548 break;
536 sk->sk_rcvlowat = val ? : 1;
537 break;
538 549
539 case SO_RCVTIMEO: 550 case SO_SNDTIMEO:
540 ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen); 551 ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
541 break; 552 break;
542 553
543 case SO_SNDTIMEO: 554#ifdef CONFIG_NETDEVICES
544 ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen); 555 case SO_BINDTODEVICE:
556 {
557 char devname[IFNAMSIZ];
558
559 /* Sorry... */
560 if (!capable(CAP_NET_RAW)) {
561 ret = -EPERM;
545 break; 562 break;
563 }
546 564
547#ifdef CONFIG_NETDEVICES 565 /* Bind this socket to a particular device like "eth0",
548 case SO_BINDTODEVICE: 566 * as specified in the passed interface name. If the
549 { 567 * name is "" or the option length is zero the socket
550 char devname[IFNAMSIZ]; 568 * is not bound.
569 */
551 570
552 /* Sorry... */ 571 if (!valbool) {
553 if (!capable(CAP_NET_RAW)) { 572 sk->sk_bound_dev_if = 0;
554 ret = -EPERM; 573 } else {
574 if (optlen > IFNAMSIZ - 1)
575 optlen = IFNAMSIZ - 1;
576 memset(devname, 0, sizeof(devname));
577 if (copy_from_user(devname, optval, optlen)) {
578 ret = -EFAULT;
555 break; 579 break;
556 } 580 }
557 581
558 /* Bind this socket to a particular device like "eth0", 582 /* Remove any cached route for this socket. */
559 * as specified in the passed interface name. If the 583 sk_dst_reset(sk);
560 * name is "" or the option length is zero the socket
561 * is not bound.
562 */
563 584
564 if (!valbool) { 585 if (devname[0] == '\0') {
565 sk->sk_bound_dev_if = 0; 586 sk->sk_bound_dev_if = 0;
566 } else { 587 } else {
567 if (optlen > IFNAMSIZ - 1) 588 struct net_device *dev = dev_get_by_name(devname);
568 optlen = IFNAMSIZ - 1; 589 if (!dev) {
569 memset(devname, 0, sizeof(devname)); 590 ret = -ENODEV;
570 if (copy_from_user(devname, optval, optlen)) {
571 ret = -EFAULT;
572 break; 591 break;
573 } 592 }
574 593 sk->sk_bound_dev_if = dev->ifindex;
575 /* Remove any cached route for this socket. */ 594 dev_put(dev);
576 sk_dst_reset(sk);
577
578 if (devname[0] == '\0') {
579 sk->sk_bound_dev_if = 0;
580 } else {
581 struct net_device *dev = dev_get_by_name(devname);
582 if (!dev) {
583 ret = -ENODEV;
584 break;
585 }
586 sk->sk_bound_dev_if = dev->ifindex;
587 dev_put(dev);
588 }
589 } 595 }
590 break;
591 } 596 }
597 break;
598 }
592#endif 599#endif
593 600
594 601
595 case SO_ATTACH_FILTER: 602 case SO_ATTACH_FILTER:
596 ret = -EINVAL; 603 ret = -EINVAL;
597 if (optlen == sizeof(struct sock_fprog)) { 604 if (optlen == sizeof(struct sock_fprog)) {
598 struct sock_fprog fprog; 605 struct sock_fprog fprog;
599 606
600 ret = -EFAULT; 607 ret = -EFAULT;
601 if (copy_from_user(&fprog, optval, sizeof(fprog))) 608 if (copy_from_user(&fprog, optval, sizeof(fprog)))
602 break;
603
604 ret = sk_attach_filter(&fprog, sk);
605 }
606 break;
607
608 case SO_DETACH_FILTER:
609 rcu_read_lock_bh();
610 filter = rcu_dereference(sk->sk_filter);
611 if (filter) {
612 rcu_assign_pointer(sk->sk_filter, NULL);
613 sk_filter_release(sk, filter);
614 rcu_read_unlock_bh();
615 break; 609 break;
616 } 610
611 ret = sk_attach_filter(&fprog, sk);
612 }
613 break;
614
615 case SO_DETACH_FILTER:
616 rcu_read_lock_bh();
617 filter = rcu_dereference(sk->sk_filter);
618 if (filter) {
619 rcu_assign_pointer(sk->sk_filter, NULL);
620 sk_filter_release(sk, filter);
617 rcu_read_unlock_bh(); 621 rcu_read_unlock_bh();
618 ret = -ENONET;
619 break; 622 break;
623 }
624 rcu_read_unlock_bh();
625 ret = -ENONET;
626 break;
620 627
621 case SO_PASSSEC: 628 case SO_PASSSEC:
622 if (valbool) 629 if (valbool)
623 set_bit(SOCK_PASSSEC, &sock->flags); 630 set_bit(SOCK_PASSSEC, &sock->flags);
624 else 631 else
625 clear_bit(SOCK_PASSSEC, &sock->flags); 632 clear_bit(SOCK_PASSSEC, &sock->flags);
626 break; 633 break;
627 634
628 /* We implement the SO_SNDLOWAT etc to 635 /* We implement the SO_SNDLOWAT etc to
629 not be settable (1003.1g 5.3) */ 636 not be settable (1003.1g 5.3) */
630 default: 637 default:
631 ret = -ENOPROTOOPT; 638 ret = -ENOPROTOOPT;
632 break; 639 break;
633 } 640 }
634 release_sock(sk); 641 release_sock(sk);
635 return ret; 642 return ret;
@@ -641,8 +648,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
641{ 648{
642 struct sock *sk = sock->sk; 649 struct sock *sk = sock->sk;
643 650
644 union 651 union {
645 {
646 int val; 652 int val;
647 struct linger ling; 653 struct linger ling;
648 struct timeval tm; 654 struct timeval tm;
@@ -651,148 +657,153 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
651 unsigned int lv = sizeof(int); 657 unsigned int lv = sizeof(int);
652 int len; 658 int len;
653 659
654 if(get_user(len,optlen)) 660 if (get_user(len, optlen))
655 return -EFAULT; 661 return -EFAULT;
656 if(len < 0) 662 if (len < 0)
657 return -EINVAL; 663 return -EINVAL;
658 664
659 switch(optname) 665 switch(optname) {
660 { 666 case SO_DEBUG:
661 case SO_DEBUG: 667 v.val = sock_flag(sk, SOCK_DBG);
662 v.val = sock_flag(sk, SOCK_DBG); 668 break;
663 break; 669
664 670 case SO_DONTROUTE:
665 case SO_DONTROUTE: 671 v.val = sock_flag(sk, SOCK_LOCALROUTE);
666 v.val = sock_flag(sk, SOCK_LOCALROUTE); 672 break;
667 break; 673
668 674 case SO_BROADCAST:
669 case SO_BROADCAST: 675 v.val = !!sock_flag(sk, SOCK_BROADCAST);
670 v.val = !!sock_flag(sk, SOCK_BROADCAST); 676 break;
671 break; 677
672 678 case SO_SNDBUF:
673 case SO_SNDBUF: 679 v.val = sk->sk_sndbuf;
674 v.val = sk->sk_sndbuf; 680 break;
675 break; 681
676 682 case SO_RCVBUF:
677 case SO_RCVBUF: 683 v.val = sk->sk_rcvbuf;
678 v.val = sk->sk_rcvbuf; 684 break;
679 break; 685
680 686 case SO_REUSEADDR:
681 case SO_REUSEADDR: 687 v.val = sk->sk_reuse;
682 v.val = sk->sk_reuse; 688 break;
683 break; 689
684 690 case SO_KEEPALIVE:
685 case SO_KEEPALIVE: 691 v.val = !!sock_flag(sk, SOCK_KEEPOPEN);
686 v.val = !!sock_flag(sk, SOCK_KEEPOPEN); 692 break;
687 break; 693
688 694 case SO_TYPE:
689 case SO_TYPE: 695 v.val = sk->sk_type;
690 v.val = sk->sk_type; 696 break;
691 break; 697
692 698 case SO_ERROR:
693 case SO_ERROR: 699 v.val = -sock_error(sk);
694 v.val = -sock_error(sk); 700 if (v.val==0)
695 if(v.val==0) 701 v.val = xchg(&sk->sk_err_soft, 0);
696 v.val = xchg(&sk->sk_err_soft, 0); 702 break;
697 break; 703
698 704 case SO_OOBINLINE:
699 case SO_OOBINLINE: 705 v.val = !!sock_flag(sk, SOCK_URGINLINE);
700 v.val = !!sock_flag(sk, SOCK_URGINLINE); 706 break;
701 break; 707
702 708 case SO_NO_CHECK:
703 case SO_NO_CHECK: 709 v.val = sk->sk_no_check;
704 v.val = sk->sk_no_check; 710 break;
705 break; 711
706 712 case SO_PRIORITY:
707 case SO_PRIORITY: 713 v.val = sk->sk_priority;
708 v.val = sk->sk_priority; 714 break;
709 break; 715
710 716 case SO_LINGER:
711 case SO_LINGER: 717 lv = sizeof(v.ling);
712 lv = sizeof(v.ling); 718 v.ling.l_onoff = !!sock_flag(sk, SOCK_LINGER);
713 v.ling.l_onoff = !!sock_flag(sk, SOCK_LINGER); 719 v.ling.l_linger = sk->sk_lingertime / HZ;
714 v.ling.l_linger = sk->sk_lingertime / HZ; 720 break;
715 break; 721
716 722 case SO_BSDCOMPAT:
717 case SO_BSDCOMPAT: 723 sock_warn_obsolete_bsdism("getsockopt");
718 sock_warn_obsolete_bsdism("getsockopt"); 724 break;
719 break; 725
720 726 case SO_TIMESTAMP:
721 case SO_TIMESTAMP: 727 v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
722 v.val = sock_flag(sk, SOCK_RCVTSTAMP); 728 !sock_flag(sk, SOCK_RCVTSTAMPNS);
723 break; 729 break;
730
731 case SO_TIMESTAMPNS:
732 v.val = sock_flag(sk, SOCK_RCVTSTAMPNS);
733 break;
734
735 case SO_RCVTIMEO:
736 lv=sizeof(struct timeval);
737 if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
738 v.tm.tv_sec = 0;
739 v.tm.tv_usec = 0;
740 } else {
741 v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
742 v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
743 }
744 break;
745
746 case SO_SNDTIMEO:
747 lv=sizeof(struct timeval);
748 if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
749 v.tm.tv_sec = 0;
750 v.tm.tv_usec = 0;
751 } else {
752 v.tm.tv_sec = sk->sk_sndtimeo / HZ;
753 v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
754 }
755 break;
724 756
725 case SO_RCVTIMEO: 757 case SO_RCVLOWAT:
726 lv=sizeof(struct timeval); 758 v.val = sk->sk_rcvlowat;
727 if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) { 759 break;
728 v.tm.tv_sec = 0;
729 v.tm.tv_usec = 0;
730 } else {
731 v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
732 v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
733 }
734 break;
735 760
736 case SO_SNDTIMEO: 761 case SO_SNDLOWAT:
737 lv=sizeof(struct timeval); 762 v.val=1;
738 if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) { 763 break;
739 v.tm.tv_sec = 0;
740 v.tm.tv_usec = 0;
741 } else {
742 v.tm.tv_sec = sk->sk_sndtimeo / HZ;
743 v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
744 }
745 break;
746 764
747 case SO_RCVLOWAT: 765 case SO_PASSCRED:
748 v.val = sk->sk_rcvlowat; 766 v.val = test_bit(SOCK_PASSCRED, &sock->flags) ? 1 : 0;
749 break; 767 break;
750 768
751 case SO_SNDLOWAT: 769 case SO_PEERCRED:
752 v.val=1; 770 if (len > sizeof(sk->sk_peercred))
753 break; 771 len = sizeof(sk->sk_peercred);
772 if (copy_to_user(optval, &sk->sk_peercred, len))
773 return -EFAULT;
774 goto lenout;
754 775
755 case SO_PASSCRED: 776 case SO_PEERNAME:
756 v.val = test_bit(SOCK_PASSCRED, &sock->flags) ? 1 : 0; 777 {
757 break; 778 char address[128];
758 779
759 case SO_PEERCRED: 780 if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
760 if (len > sizeof(sk->sk_peercred)) 781 return -ENOTCONN;
761 len = sizeof(sk->sk_peercred); 782 if (lv < len)
762 if (copy_to_user(optval, &sk->sk_peercred, len)) 783 return -EINVAL;
763 return -EFAULT; 784 if (copy_to_user(optval, address, len))
764 goto lenout; 785 return -EFAULT;
765 786 goto lenout;
766 case SO_PEERNAME: 787 }
767 {
768 char address[128];
769
770 if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
771 return -ENOTCONN;
772 if (lv < len)
773 return -EINVAL;
774 if (copy_to_user(optval, address, len))
775 return -EFAULT;
776 goto lenout;
777 }
778 788
779 /* Dubious BSD thing... Probably nobody even uses it, but 789 /* Dubious BSD thing... Probably nobody even uses it, but
780 * the UNIX standard wants it for whatever reason... -DaveM 790 * the UNIX standard wants it for whatever reason... -DaveM
781 */ 791 */
782 case SO_ACCEPTCONN: 792 case SO_ACCEPTCONN:
783 v.val = sk->sk_state == TCP_LISTEN; 793 v.val = sk->sk_state == TCP_LISTEN;
784 break; 794 break;
785 795
786 case SO_PASSSEC: 796 case SO_PASSSEC:
787 v.val = test_bit(SOCK_PASSSEC, &sock->flags) ? 1 : 0; 797 v.val = test_bit(SOCK_PASSSEC, &sock->flags) ? 1 : 0;
788 break; 798 break;
789 799
790 case SO_PEERSEC: 800 case SO_PEERSEC:
791 return security_socket_getpeersec_stream(sock, optval, optlen, len); 801 return security_socket_getpeersec_stream(sock, optval, optlen, len);
792 802
793 default: 803 default:
794 return(-ENOPROTOOPT); 804 return -ENOPROTOOPT;
795 } 805 }
806
796 if (len > lv) 807 if (len > lv)
797 len = lv; 808 len = lv;
798 if (copy_to_user(optval, &v, len)) 809 if (copy_to_user(optval, &v, len))
@@ -904,6 +915,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
904 sk_node_init(&newsk->sk_node); 915 sk_node_init(&newsk->sk_node);
905 sock_lock_init(newsk); 916 sock_lock_init(newsk);
906 bh_lock_sock(newsk); 917 bh_lock_sock(newsk);
918 newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL;
907 919
908 atomic_set(&newsk->sk_rmem_alloc, 0); 920 atomic_set(&newsk->sk_rmem_alloc, 0);
909 atomic_set(&newsk->sk_wmem_alloc, 0); 921 atomic_set(&newsk->sk_wmem_alloc, 0);
@@ -923,7 +935,6 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
923 newsk->sk_wmem_queued = 0; 935 newsk->sk_wmem_queued = 0;
924 newsk->sk_forward_alloc = 0; 936 newsk->sk_forward_alloc = 0;
925 newsk->sk_send_head = NULL; 937 newsk->sk_send_head = NULL;
926 newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL;
927 newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK; 938 newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
928 939
929 sock_reset_flag(newsk, SOCK_DONE); 940 sock_reset_flag(newsk, SOCK_DONE);
@@ -970,6 +981,21 @@ out:
970 981
971EXPORT_SYMBOL_GPL(sk_clone); 982EXPORT_SYMBOL_GPL(sk_clone);
972 983
984void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
985{
986 __sk_dst_set(sk, dst);
987 sk->sk_route_caps = dst->dev->features;
988 if (sk->sk_route_caps & NETIF_F_GSO)
989 sk->sk_route_caps |= NETIF_F_GSO_MASK;
990 if (sk_can_gso(sk)) {
991 if (dst->header_len)
992 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
993 else
994 sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
995 }
996}
997EXPORT_SYMBOL_GPL(sk_setup_caps);
998
973void __init sk_init(void) 999void __init sk_init(void)
974{ 1000{
975 if (num_physpages <= 4096) { 1001 if (num_physpages <= 4096) {
@@ -1220,13 +1246,13 @@ static void __lock_sock(struct sock *sk)
1220{ 1246{
1221 DEFINE_WAIT(wait); 1247 DEFINE_WAIT(wait);
1222 1248
1223 for(;;) { 1249 for (;;) {
1224 prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait, 1250 prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
1225 TASK_UNINTERRUPTIBLE); 1251 TASK_UNINTERRUPTIBLE);
1226 spin_unlock_bh(&sk->sk_lock.slock); 1252 spin_unlock_bh(&sk->sk_lock.slock);
1227 schedule(); 1253 schedule();
1228 spin_lock_bh(&sk->sk_lock.slock); 1254 spin_lock_bh(&sk->sk_lock.slock);
1229 if(!sock_owned_by_user(sk)) 1255 if (!sock_owned_by_user(sk))
1230 break; 1256 break;
1231 } 1257 }
1232 finish_wait(&sk->sk_lock.wq, &wait); 1258 finish_wait(&sk->sk_lock.wq, &wait);
@@ -1258,7 +1284,7 @@ static void __release_sock(struct sock *sk)
1258 } while (skb != NULL); 1284 } while (skb != NULL);
1259 1285
1260 bh_lock_sock(sk); 1286 bh_lock_sock(sk);
1261 } while((skb = sk->sk_backlog.head) != NULL); 1287 } while ((skb = sk->sk_backlog.head) != NULL);
1262} 1288}
1263 1289
1264/** 1290/**
@@ -1420,7 +1446,7 @@ static void sock_def_write_space(struct sock *sk)
1420 /* Do not wake up a writer until he can make "significant" 1446 /* Do not wake up a writer until he can make "significant"
1421 * progress. --DaveM 1447 * progress. --DaveM
1422 */ 1448 */
1423 if((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) { 1449 if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
1424 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) 1450 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1425 wake_up_interruptible(sk->sk_sleep); 1451 wake_up_interruptible(sk->sk_sleep);
1426 1452
@@ -1482,8 +1508,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
1482 1508
1483 sock_set_flag(sk, SOCK_ZAPPED); 1509 sock_set_flag(sk, SOCK_ZAPPED);
1484 1510
1485 if(sock) 1511 if (sock) {
1486 {
1487 sk->sk_type = sock->type; 1512 sk->sk_type = sock->type;
1488 sk->sk_sleep = &sock->wait; 1513 sk->sk_sleep = &sock->wait;
1489 sock->sk = sk; 1514 sock->sk = sk;
@@ -1512,8 +1537,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
1512 sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; 1537 sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
1513 sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT; 1538 sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
1514 1539
1515 sk->sk_stamp.tv_sec = -1L; 1540 sk->sk_stamp = ktime_set(-1L, -1L);
1516 sk->sk_stamp.tv_usec = -1L;
1517 1541
1518 atomic_set(&sk->sk_refcnt, 1); 1542 atomic_set(&sk->sk_refcnt, 1);
1519} 1543}
@@ -1554,17 +1578,36 @@ EXPORT_SYMBOL(release_sock);
1554 1578
1555int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp) 1579int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
1556{ 1580{
1581 struct timeval tv;
1557 if (!sock_flag(sk, SOCK_TIMESTAMP)) 1582 if (!sock_flag(sk, SOCK_TIMESTAMP))
1558 sock_enable_timestamp(sk); 1583 sock_enable_timestamp(sk);
1559 if (sk->sk_stamp.tv_sec == -1) 1584 tv = ktime_to_timeval(sk->sk_stamp);
1585 if (tv.tv_sec == -1)
1560 return -ENOENT; 1586 return -ENOENT;
1561 if (sk->sk_stamp.tv_sec == 0) 1587 if (tv.tv_sec == 0) {
1562 do_gettimeofday(&sk->sk_stamp); 1588 sk->sk_stamp = ktime_get_real();
1563 return copy_to_user(userstamp, &sk->sk_stamp, sizeof(struct timeval)) ? 1589 tv = ktime_to_timeval(sk->sk_stamp);
1564 -EFAULT : 0; 1590 }
1591 return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0;
1565} 1592}
1566EXPORT_SYMBOL(sock_get_timestamp); 1593EXPORT_SYMBOL(sock_get_timestamp);
1567 1594
1595int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
1596{
1597 struct timespec ts;
1598 if (!sock_flag(sk, SOCK_TIMESTAMP))
1599 sock_enable_timestamp(sk);
1600 ts = ktime_to_timespec(sk->sk_stamp);
1601 if (ts.tv_sec == -1)
1602 return -ENOENT;
1603 if (ts.tv_sec == 0) {
1604 sk->sk_stamp = ktime_get_real();
1605 ts = ktime_to_timespec(sk->sk_stamp);
1606 }
1607 return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0;
1608}
1609EXPORT_SYMBOL(sock_get_timestampns);
1610
1568void sock_enable_timestamp(struct sock *sk) 1611void sock_enable_timestamp(struct sock *sk)
1569{ 1612{
1570 if (!sock_flag(sk, SOCK_TIMESTAMP)) { 1613 if (!sock_flag(sk, SOCK_TIMESTAMP)) {
@@ -1899,7 +1942,7 @@ static int proto_seq_show(struct seq_file *seq, void *v)
1899 return 0; 1942 return 0;
1900} 1943}
1901 1944
1902static struct seq_operations proto_seq_ops = { 1945static const struct seq_operations proto_seq_ops = {
1903 .start = proto_seq_start, 1946 .start = proto_seq_start,
1904 .next = proto_seq_next, 1947 .next = proto_seq_next,
1905 .stop = proto_seq_stop, 1948 .stop = proto_seq_stop,
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 1e75b1585460..b29712033dd4 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -136,6 +136,14 @@ ctl_table core_table[] = {
136 .mode = 0644, 136 .mode = 0644,
137 .proc_handler = &proc_dointvec 137 .proc_handler = &proc_dointvec
138 }, 138 },
139 {
140 .ctl_name = NET_CORE_WARNINGS,
141 .procname = "warnings",
142 .data = &net_msg_warn,
143 .maxlen = sizeof(int),
144 .mode = 0644,
145 .proc_handler = &proc_dointvec
146 },
139 { .ctl_name = 0 } 147 { .ctl_name = 0 }
140}; 148};
141 149
diff --git a/net/core/user_dma.c b/net/core/user_dma.c
index 0ad1cd57bc39..89241cdeea3f 100644
--- a/net/core/user_dma.c
+++ b/net/core/user_dma.c
@@ -49,8 +49,8 @@ int dma_skb_copy_datagram_iovec(struct dma_chan *chan,
49 struct sk_buff *skb, int offset, struct iovec *to, 49 struct sk_buff *skb, int offset, struct iovec *to,
50 size_t len, struct dma_pinned_list *pinned_list) 50 size_t len, struct dma_pinned_list *pinned_list)
51{ 51{
52 int start = skb_headlen(skb); 52 int end = skb_headlen(skb);
53 int i, copy = start - offset; 53 int i, copy = end - offset;
54 dma_cookie_t cookie = 0; 54 dma_cookie_t cookie = 0;
55 55
56 /* Copy header. */ 56 /* Copy header. */
@@ -69,11 +69,9 @@ int dma_skb_copy_datagram_iovec(struct dma_chan *chan,
69 69
70 /* Copy paged appendix. Hmm... why does this look so complicated? */ 70 /* Copy paged appendix. Hmm... why does this look so complicated? */
71 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 71 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
72 int end; 72 BUG_TRAP(len >= 0);
73 73
74 BUG_TRAP(start <= offset + len); 74 end = offset + skb_shinfo(skb)->frags[i].size;
75
76 end = start + skb_shinfo(skb)->frags[i].size;
77 copy = end - offset; 75 copy = end - offset;
78 if ((copy = end - offset) > 0) { 76 if ((copy = end - offset) > 0) {
79 skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 77 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
@@ -82,8 +80,8 @@ int dma_skb_copy_datagram_iovec(struct dma_chan *chan,
82 if (copy > len) 80 if (copy > len)
83 copy = len; 81 copy = len;
84 82
85 cookie = dma_memcpy_pg_to_iovec(chan, to, pinned_list, page, 83 cookie = dma_memcpy_pg_to_iovec(chan, to, pinned_list,
86 frag->page_offset + offset - start, copy); 84 page, frag->page_offset, copy);
87 if (cookie < 0) 85 if (cookie < 0)
88 goto fault; 86 goto fault;
89 len -= copy; 87 len -= copy;
@@ -91,25 +89,21 @@ int dma_skb_copy_datagram_iovec(struct dma_chan *chan,
91 goto end; 89 goto end;
92 offset += copy; 90 offset += copy;
93 } 91 }
94 start = end;
95 } 92 }
96 93
97 if (skb_shinfo(skb)->frag_list) { 94 if (skb_shinfo(skb)->frag_list) {
98 struct sk_buff *list = skb_shinfo(skb)->frag_list; 95 struct sk_buff *list = skb_shinfo(skb)->frag_list;
99 96
100 for (; list; list = list->next) { 97 for (; list; list = list->next) {
101 int end; 98 BUG_TRAP(len >= 0);
102
103 BUG_TRAP(start <= offset + len);
104 99
105 end = start + list->len; 100 end = offset + list->len;
106 copy = end - offset; 101 copy = end - offset;
107 if (copy > 0) { 102 if (copy > 0) {
108 if (copy > len) 103 if (copy > len)
109 copy = len; 104 copy = len;
110 cookie = dma_skb_copy_datagram_iovec(chan, list, 105 cookie = dma_skb_copy_datagram_iovec(chan, list,
111 offset - start, to, copy, 106 0, to, copy, pinned_list);
112 pinned_list);
113 if (cookie < 0) 107 if (cookie < 0)
114 goto fault; 108 goto fault;
115 len -= copy; 109 len -= copy;
@@ -117,7 +111,6 @@ int dma_skb_copy_datagram_iovec(struct dma_chan *chan,
117 goto end; 111 goto end;
118 offset += copy; 112 offset += copy;
119 } 113 }
120 start = end;
121 } 114 }
122 } 115 }
123 116
diff --git a/net/core/utils.c b/net/core/utils.c
index 07236c17fab9..adecfd281ae9 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -30,8 +30,10 @@
30#include <asm/system.h> 30#include <asm/system.h>
31#include <asm/uaccess.h> 31#include <asm/uaccess.h>
32 32
33int net_msg_cost = 5*HZ; 33int net_msg_cost __read_mostly = 5*HZ;
34int net_msg_burst = 10; 34int net_msg_burst __read_mostly = 10;
35int net_msg_warn __read_mostly = 1;
36EXPORT_SYMBOL(net_msg_warn);
35 37
36/* 38/*
37 * All net warning printk()s should be guarded by this function. 39 * All net warning printk()s should be guarded by this function.
diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index a086c6312d3b..01030f346177 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -157,7 +157,7 @@ struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority)
157 157
158 if (av != NULL) { 158 if (av != NULL) {
159 av->dccpav_buf_head = DCCP_MAX_ACKVEC_LEN - 1; 159 av->dccpav_buf_head = DCCP_MAX_ACKVEC_LEN - 1;
160 av->dccpav_buf_ackno = DCCP_MAX_SEQNO + 1; 160 av->dccpav_buf_ackno = UINT48_MAX + 1;
161 av->dccpav_buf_nonce = av->dccpav_buf_nonce = 0; 161 av->dccpav_buf_nonce = av->dccpav_buf_nonce = 0;
162 av->dccpav_time.tv_sec = 0; 162 av->dccpav_time.tv_sec = 0;
163 av->dccpav_time.tv_usec = 0; 163 av->dccpav_time.tv_usec = 0;
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 746f79d104b3..d7d9ce737244 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -33,7 +33,6 @@
33 * along with this program; if not, write to the Free Software 33 * along with this program; if not, write to the Free Software
34 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 34 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
35 */ 35 */
36
37#include "../ccid.h" 36#include "../ccid.h"
38#include "../dccp.h" 37#include "../dccp.h"
39#include "lib/packet_history.h" 38#include "lib/packet_history.h"
@@ -52,6 +51,9 @@ static struct dccp_tx_hist *ccid3_tx_hist;
52static struct dccp_rx_hist *ccid3_rx_hist; 51static struct dccp_rx_hist *ccid3_rx_hist;
53static struct dccp_li_hist *ccid3_li_hist; 52static struct dccp_li_hist *ccid3_li_hist;
54 53
54/*
55 * Transmitter Half-Connection Routines
56 */
55#ifdef CONFIG_IP_DCCP_CCID3_DEBUG 57#ifdef CONFIG_IP_DCCP_CCID3_DEBUG
56static const char *ccid3_tx_state_name(enum ccid3_hc_tx_states state) 58static const char *ccid3_tx_state_name(enum ccid3_hc_tx_states state)
57{ 59{
@@ -80,23 +82,37 @@ static void ccid3_hc_tx_set_state(struct sock *sk,
80} 82}
81 83
82/* 84/*
83 * Recalculate scheduled nominal send time t_nom, inter-packet interval 85 * Compute the initial sending rate X_init according to RFC 3390:
84 * t_ipi, and delta value. Should be called after each change to X. 86 * w_init = min(4 * MSS, max(2 * MSS, 4380 bytes))
87 * X_init = w_init / RTT
88 * For consistency with other parts of the code, X_init is scaled by 2^6.
85 */ 89 */
86static inline void ccid3_update_send_time(struct ccid3_hc_tx_sock *hctx) 90static inline u64 rfc3390_initial_rate(struct sock *sk)
87{ 91{
88 timeval_sub_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi); 92 const struct dccp_sock *dp = dccp_sk(sk);
93 const __u32 w_init = min(4 * dp->dccps_mss_cache,
94 max(2 * dp->dccps_mss_cache, 4380U));
89 95
90 /* Calculate new t_ipi = s / X_inst (X_inst is in 64 * bytes/second) */ 96 return scaled_div(w_init << 6, ccid3_hc_tx_sk(sk)->ccid3hctx_rtt);
91 hctx->ccid3hctx_t_ipi = scaled_div(hctx->ccid3hctx_s, 97}
92 hctx->ccid3hctx_x >> 6);
93 98
94 /* Update nominal send time with regard to the new t_ipi */ 99/*
95 timeval_add_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi); 100 * Recalculate t_ipi and delta (should be called whenever X changes)
101 */
102static inline void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hctx)
103{
104 /* Calculate new t_ipi = s / X_inst (X_inst is in 64 * bytes/second) */
105 hctx->ccid3hctx_t_ipi = scaled_div32(((u64)hctx->ccid3hctx_s) << 6,
106 hctx->ccid3hctx_x);
96 107
97 /* Calculate new delta by delta = min(t_ipi / 2, t_gran / 2) */ 108 /* Calculate new delta by delta = min(t_ipi / 2, t_gran / 2) */
98 hctx->ccid3hctx_delta = min_t(u32, hctx->ccid3hctx_t_ipi / 2, 109 hctx->ccid3hctx_delta = min_t(u32, hctx->ccid3hctx_t_ipi / 2,
99 TFRC_OPSYS_HALF_TIME_GRAN); 110 TFRC_OPSYS_HALF_TIME_GRAN);
111
112 ccid3_pr_debug("t_ipi=%u, delta=%u, s=%u, X=%u\n",
113 hctx->ccid3hctx_t_ipi, hctx->ccid3hctx_delta,
114 hctx->ccid3hctx_s, (unsigned)(hctx->ccid3hctx_x >> 6));
115
100} 116}
101/* 117/*
102 * Update X by 118 * Update X by
@@ -112,19 +128,28 @@ static inline void ccid3_update_send_time(struct ccid3_hc_tx_sock *hctx)
112 * fine-grained resolution of sending rates. This requires scaling by 2^6 128 * fine-grained resolution of sending rates. This requires scaling by 2^6
113 * throughout the code. Only X_calc is unscaled (in bytes/second). 129 * throughout the code. Only X_calc is unscaled (in bytes/second).
114 * 130 *
115 * If X has changed, we also update the scheduled send time t_now,
116 * the inter-packet interval t_ipi, and the delta value.
117 */ 131 */
118static void ccid3_hc_tx_update_x(struct sock *sk, struct timeval *now) 132static void ccid3_hc_tx_update_x(struct sock *sk, struct timeval *now)
119 133
120{ 134{
121 struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); 135 struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
136 __u64 min_rate = 2 * hctx->ccid3hctx_x_recv;
122 const __u64 old_x = hctx->ccid3hctx_x; 137 const __u64 old_x = hctx->ccid3hctx_x;
123 138
139 /*
140 * Handle IDLE periods: do not reduce below RFC3390 initial sending rate
141 * when idling [RFC 4342, 5.1]. See also draft-ietf-dccp-rfc3448bis.
142 * For consistency with X and X_recv, min_rate is also scaled by 2^6.
143 */
144 if (unlikely(hctx->ccid3hctx_idle)) {
145 min_rate = rfc3390_initial_rate(sk);
146 min_rate = max(min_rate, 2 * hctx->ccid3hctx_x_recv);
147 }
148
124 if (hctx->ccid3hctx_p > 0) { 149 if (hctx->ccid3hctx_p > 0) {
125 150
126 hctx->ccid3hctx_x = min(((__u64)hctx->ccid3hctx_x_calc) << 6, 151 hctx->ccid3hctx_x = min(((__u64)hctx->ccid3hctx_x_calc) << 6,
127 hctx->ccid3hctx_x_recv * 2); 152 min_rate);
128 hctx->ccid3hctx_x = max(hctx->ccid3hctx_x, 153 hctx->ccid3hctx_x = max(hctx->ccid3hctx_x,
129 (((__u64)hctx->ccid3hctx_s) << 6) / 154 (((__u64)hctx->ccid3hctx_s) << 6) /
130 TFRC_T_MBI); 155 TFRC_T_MBI);
@@ -133,14 +158,21 @@ static void ccid3_hc_tx_update_x(struct sock *sk, struct timeval *now)
133 (suseconds_t)hctx->ccid3hctx_rtt >= 0) { 158 (suseconds_t)hctx->ccid3hctx_rtt >= 0) {
134 159
135 hctx->ccid3hctx_x = 160 hctx->ccid3hctx_x =
136 max(2 * min(hctx->ccid3hctx_x, hctx->ccid3hctx_x_recv), 161 max(min(2 * hctx->ccid3hctx_x, min_rate),
137 scaled_div(((__u64)hctx->ccid3hctx_s) << 6, 162 scaled_div(((__u64)hctx->ccid3hctx_s) << 6,
138 hctx->ccid3hctx_rtt)); 163 hctx->ccid3hctx_rtt));
139 hctx->ccid3hctx_t_ld = *now; 164 hctx->ccid3hctx_t_ld = *now;
140 } 165 }
141 166
142 if (hctx->ccid3hctx_x != old_x) 167 if (hctx->ccid3hctx_x != old_x) {
143 ccid3_update_send_time(hctx); 168 ccid3_pr_debug("X_prev=%u, X_now=%u, X_calc=%u, "
169 "X_recv=%u\n", (unsigned)(old_x >> 6),
170 (unsigned)(hctx->ccid3hctx_x >> 6),
171 hctx->ccid3hctx_x_calc,
172 (unsigned)(hctx->ccid3hctx_x_recv >> 6));
173
174 ccid3_update_send_interval(hctx);
175 }
144} 176}
145 177
146/* 178/*
@@ -149,17 +181,12 @@ static void ccid3_hc_tx_update_x(struct sock *sk, struct timeval *now)
149 */ 181 */
150static inline void ccid3_hc_tx_update_s(struct ccid3_hc_tx_sock *hctx, int len) 182static inline void ccid3_hc_tx_update_s(struct ccid3_hc_tx_sock *hctx, int len)
151{ 183{
152 if (unlikely(len == 0)) 184 const u16 old_s = hctx->ccid3hctx_s;
153 ccid3_pr_debug("Packet payload length is 0 - not updating\n"); 185
154 else 186 hctx->ccid3hctx_s = old_s == 0 ? len : (9 * old_s + len) / 10;
155 hctx->ccid3hctx_s = hctx->ccid3hctx_s == 0 ? len : 187
156 (9 * hctx->ccid3hctx_s + len) / 10; 188 if (hctx->ccid3hctx_s != old_s)
157 /* 189 ccid3_update_send_interval(hctx);
158 * Note: We could do a potential optimisation here - when `s' changes,
159 * recalculate sending rate and consequently t_ipi, t_delta, and
160 * t_now. This is however non-standard, and the benefits are not
161 * clear, so it is currently left out.
162 */
163} 190}
164 191
165/* 192/*
@@ -193,6 +220,7 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
193{ 220{
194 struct sock *sk = (struct sock *)data; 221 struct sock *sk = (struct sock *)data;
195 struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); 222 struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
223 struct timeval now;
196 unsigned long t_nfb = USEC_PER_SEC / 5; 224 unsigned long t_nfb = USEC_PER_SEC / 5;
197 225
198 bh_lock_sock(sk); 226 bh_lock_sock(sk);
@@ -205,6 +233,8 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
205 ccid3_pr_debug("%s(%p, state=%s) - entry \n", dccp_role(sk), sk, 233 ccid3_pr_debug("%s(%p, state=%s) - entry \n", dccp_role(sk), sk,
206 ccid3_tx_state_name(hctx->ccid3hctx_state)); 234 ccid3_tx_state_name(hctx->ccid3hctx_state));
207 235
236 hctx->ccid3hctx_idle = 1;
237
208 switch (hctx->ccid3hctx_state) { 238 switch (hctx->ccid3hctx_state) {
209 case TFRC_SSTATE_NO_FBACK: 239 case TFRC_SSTATE_NO_FBACK:
210 /* RFC 3448, 4.4: Halve send rate directly */ 240 /* RFC 3448, 4.4: Halve send rate directly */
@@ -219,53 +249,37 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
219 /* The value of R is still undefined and so we can not recompute 249 /* The value of R is still undefined and so we can not recompute
220 * the timout value. Keep initial value as per [RFC 4342, 5]. */ 250 * the timout value. Keep initial value as per [RFC 4342, 5]. */
221 t_nfb = TFRC_INITIAL_TIMEOUT; 251 t_nfb = TFRC_INITIAL_TIMEOUT;
222 ccid3_update_send_time(hctx); 252 ccid3_update_send_interval(hctx);
223 break; 253 break;
224 case TFRC_SSTATE_FBACK: 254 case TFRC_SSTATE_FBACK:
225 /* 255 /*
226 * Check if IDLE since last timeout and recv rate is less than 256 * Modify the cached value of X_recv [RFC 3448, 4.4]
227 * 4 packets (in units of 64*bytes/sec) per RTT 257 *
258 * If (p == 0 || X_calc > 2 * X_recv)
259 * X_recv = max(X_recv / 2, s / (2 * t_mbi));
260 * Else
261 * X_recv = X_calc / 4;
262 *
263 * Note that X_recv is scaled by 2^6 while X_calc is not
228 */ 264 */
229 if (!hctx->ccid3hctx_idle || 265 BUG_ON(hctx->ccid3hctx_p && !hctx->ccid3hctx_x_calc);
230 (hctx->ccid3hctx_x_recv >= 4 *
231 scaled_div(((__u64)hctx->ccid3hctx_s) << 6,
232 hctx->ccid3hctx_rtt))) {
233 struct timeval now;
234 266
235 ccid3_pr_debug("%s(%p, state=%s), not idle\n", 267 if (hctx->ccid3hctx_p == 0 ||
236 dccp_role(sk), sk, 268 (hctx->ccid3hctx_x_calc > (hctx->ccid3hctx_x_recv >> 5))) {
237 ccid3_tx_state_name(hctx->ccid3hctx_state));
238 269
239 /* 270 hctx->ccid3hctx_x_recv =
240 * Modify the cached value of X_recv [RFC 3448, 4.4] 271 max(hctx->ccid3hctx_x_recv / 2,
241 * 272 (((__u64)hctx->ccid3hctx_s) << 6) /
242 * If (p == 0 || X_calc > 2 * X_recv) 273 (2 * TFRC_T_MBI));
243 * X_recv = max(X_recv / 2, s / (2 * t_mbi)); 274
244 * Else 275 if (hctx->ccid3hctx_p == 0)
245 * X_recv = X_calc / 4; 276 dccp_timestamp(sk, &now);
246 * 277 } else {
247 * Note that X_recv is scaled by 2^6 while X_calc is not 278 hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc;
248 */ 279 hctx->ccid3hctx_x_recv <<= 4;
249 BUG_ON(hctx->ccid3hctx_p && !hctx->ccid3hctx_x_calc);
250
251 if (hctx->ccid3hctx_p == 0 ||
252 (hctx->ccid3hctx_x_calc >
253 (hctx->ccid3hctx_x_recv >> 5))) {
254
255 hctx->ccid3hctx_x_recv =
256 max(hctx->ccid3hctx_x_recv / 2,
257 (((__u64)hctx->ccid3hctx_s) << 6) /
258 (2 * TFRC_T_MBI));
259
260 if (hctx->ccid3hctx_p == 0)
261 dccp_timestamp(sk, &now);
262 } else {
263 hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc;
264 hctx->ccid3hctx_x_recv <<= 4;
265 }
266 /* Now recalculate X [RFC 3448, 4.3, step (4)] */
267 ccid3_hc_tx_update_x(sk, &now);
268 } 280 }
281 /* Now recalculate X [RFC 3448, 4.3, step (4)] */
282 ccid3_hc_tx_update_x(sk, &now);
269 /* 283 /*
270 * Schedule no feedback timer to expire in 284 * Schedule no feedback timer to expire in
271 * max(t_RTO, 2 * s/X) = max(t_RTO, 2 * t_ipi) 285 * max(t_RTO, 2 * s/X) = max(t_RTO, 2 * t_ipi)
@@ -280,8 +294,6 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
280 goto out; 294 goto out;
281 } 295 }
282 296
283 hctx->ccid3hctx_idle = 1;
284
285restart_timer: 297restart_timer:
286 sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, 298 sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
287 jiffies + usecs_to_jiffies(t_nfb)); 299 jiffies + usecs_to_jiffies(t_nfb));
@@ -322,24 +334,35 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
322 usecs_to_jiffies(TFRC_INITIAL_TIMEOUT))); 334 usecs_to_jiffies(TFRC_INITIAL_TIMEOUT)));
323 hctx->ccid3hctx_last_win_count = 0; 335 hctx->ccid3hctx_last_win_count = 0;
324 hctx->ccid3hctx_t_last_win_count = now; 336 hctx->ccid3hctx_t_last_win_count = now;
325 ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK);
326
327 /* Set initial sending rate X/s to 1pps (X is scaled by 2^6) */
328 ccid3_hc_tx_update_s(hctx, skb->len);
329 hctx->ccid3hctx_x = hctx->ccid3hctx_s;
330 hctx->ccid3hctx_x <<= 6;
331
332 /* First timeout, according to [RFC 3448, 4.2], is 1 second */
333 hctx->ccid3hctx_t_ipi = USEC_PER_SEC;
334 /* Initial delta: minimum of 0.5 sec and t_gran/2 */
335 hctx->ccid3hctx_delta = TFRC_OPSYS_HALF_TIME_GRAN;
336 337
337 /* Set t_0 for initial packet */ 338 /* Set t_0 for initial packet */
338 hctx->ccid3hctx_t_nom = now; 339 hctx->ccid3hctx_t_nom = now;
340
341 hctx->ccid3hctx_s = skb->len;
342
343 /*
344 * Use initial RTT sample when available: recommended by erratum
345 * to RFC 4342. This implements the initialisation procedure of
346 * draft rfc3448bis, section 4.2. Remember, X is scaled by 2^6.
347 */
348 if (dp->dccps_syn_rtt) {
349 ccid3_pr_debug("SYN RTT = %uus\n", dp->dccps_syn_rtt);
350 hctx->ccid3hctx_rtt = dp->dccps_syn_rtt;
351 hctx->ccid3hctx_x = rfc3390_initial_rate(sk);
352 hctx->ccid3hctx_t_ld = now;
353 } else {
354 /* Sender does not have RTT sample: X = MSS/second */
355 hctx->ccid3hctx_x = dp->dccps_mss_cache;
356 hctx->ccid3hctx_x <<= 6;
357 }
358 ccid3_update_send_interval(hctx);
359
360 ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK);
339 break; 361 break;
340 case TFRC_SSTATE_NO_FBACK: 362 case TFRC_SSTATE_NO_FBACK:
341 case TFRC_SSTATE_FBACK: 363 case TFRC_SSTATE_FBACK:
342 delay = timeval_delta(&hctx->ccid3hctx_t_nom, &now); 364 delay = timeval_delta(&hctx->ccid3hctx_t_nom, &now);
365 ccid3_pr_debug("delay=%ld\n", (long)delay);
343 /* 366 /*
344 * Scheduling of packet transmissions [RFC 3448, 4.6] 367 * Scheduling of packet transmissions [RFC 3448, 4.6]
345 * 368 *
@@ -361,6 +384,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
361 /* prepare to send now (add options etc.) */ 384 /* prepare to send now (add options etc.) */
362 dp->dccps_hc_tx_insert_options = 1; 385 dp->dccps_hc_tx_insert_options = 1;
363 DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count; 386 DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count;
387 hctx->ccid3hctx_idle = 0;
364 388
365 /* set the nominal send time for the next following packet */ 389 /* set the nominal send time for the next following packet */
366 timeval_add_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi); 390 timeval_add_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi);
@@ -391,7 +415,6 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more,
391 packet->dccphtx_seqno = dccp_sk(sk)->dccps_gss; 415 packet->dccphtx_seqno = dccp_sk(sk)->dccps_gss;
392 packet->dccphtx_rtt = hctx->ccid3hctx_rtt; 416 packet->dccphtx_rtt = hctx->ccid3hctx_rtt;
393 packet->dccphtx_sent = 1; 417 packet->dccphtx_sent = 1;
394 hctx->ccid3hctx_idle = 0;
395} 418}
396 419
397static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) 420static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
@@ -402,8 +425,7 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
402 struct dccp_tx_hist_entry *packet; 425 struct dccp_tx_hist_entry *packet;
403 struct timeval now; 426 struct timeval now;
404 unsigned long t_nfb; 427 unsigned long t_nfb;
405 u32 pinv; 428 u32 pinv, r_sample;
406 suseconds_t r_sample, t_elapsed;
407 429
408 BUG_ON(hctx == NULL); 430 BUG_ON(hctx == NULL);
409 431
@@ -445,18 +467,10 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
445 * Calculate new round trip sample as per [RFC 3448, 4.3] by 467 * Calculate new round trip sample as per [RFC 3448, 4.3] by
446 * R_sample = (now - t_recvdata) - t_elapsed 468 * R_sample = (now - t_recvdata) - t_elapsed
447 */ 469 */
448 r_sample = timeval_delta(&now, &packet->dccphtx_tstamp); 470 r_sample = dccp_sample_rtt(sk, &now, &packet->dccphtx_tstamp);
449 t_elapsed = dp->dccps_options_received.dccpor_elapsed_time * 10;
450
451 DCCP_BUG_ON(r_sample < 0);
452 if (unlikely(r_sample <= t_elapsed))
453 DCCP_WARN("WARNING: r_sample=%dus <= t_elapsed=%dus\n",
454 (int)r_sample, (int)t_elapsed);
455 else
456 r_sample -= t_elapsed;
457 CCID3_RTT_SANITY_CHECK(r_sample);
458 471
459 /* Update RTT estimate by 472 /*
473 * Update RTT estimate by
460 * If (No feedback recv) 474 * If (No feedback recv)
461 * R = R_sample; 475 * R = R_sample;
462 * Else 476 * Else
@@ -467,27 +481,23 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
467 if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) { 481 if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) {
468 /* 482 /*
469 * Larger Initial Windows [RFC 4342, sec. 5] 483 * Larger Initial Windows [RFC 4342, sec. 5]
470 * We deviate in that we use `s' instead of `MSS'.
471 */ 484 */
472 __u64 w_init = min(4 * hctx->ccid3hctx_s,
473 max(2 * hctx->ccid3hctx_s, 4380));
474 hctx->ccid3hctx_rtt = r_sample; 485 hctx->ccid3hctx_rtt = r_sample;
475 hctx->ccid3hctx_x = scaled_div(w_init << 6, r_sample); 486 hctx->ccid3hctx_x = rfc3390_initial_rate(sk);
476 hctx->ccid3hctx_t_ld = now; 487 hctx->ccid3hctx_t_ld = now;
477 488
478 ccid3_update_send_time(hctx); 489 ccid3_update_send_interval(hctx);
479 490
480 ccid3_pr_debug("%s(%p), s=%u, w_init=%llu, " 491 ccid3_pr_debug("%s(%p), s=%u, MSS=%u, "
481 "R_sample=%dus, X=%u\n", dccp_role(sk), 492 "R_sample=%uus, X=%u\n", dccp_role(sk),
482 sk, hctx->ccid3hctx_s, 493 sk, hctx->ccid3hctx_s,
483 (unsigned long long)w_init, 494 dp->dccps_mss_cache, r_sample,
484 (int)r_sample,
485 (unsigned)(hctx->ccid3hctx_x >> 6)); 495 (unsigned)(hctx->ccid3hctx_x >> 6));
486 496
487 ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK); 497 ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK);
488 } else { 498 } else {
489 hctx->ccid3hctx_rtt = (9 * hctx->ccid3hctx_rtt + 499 hctx->ccid3hctx_rtt = (9 * hctx->ccid3hctx_rtt +
490 (u32)r_sample) / 10; 500 r_sample) / 10;
491 501
492 /* Update sending rate (step 4 of [RFC 3448, 4.3]) */ 502 /* Update sending rate (step 4 of [RFC 3448, 4.3]) */
493 if (hctx->ccid3hctx_p > 0) 503 if (hctx->ccid3hctx_p > 0)
@@ -497,10 +507,10 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
497 hctx->ccid3hctx_p); 507 hctx->ccid3hctx_p);
498 ccid3_hc_tx_update_x(sk, &now); 508 ccid3_hc_tx_update_x(sk, &now);
499 509
500 ccid3_pr_debug("%s(%p), RTT=%uus (sample=%dus), s=%u, " 510 ccid3_pr_debug("%s(%p), RTT=%uus (sample=%uus), s=%u, "
501 "p=%u, X_calc=%u, X_recv=%u, X=%u\n", 511 "p=%u, X_calc=%u, X_recv=%u, X=%u\n",
502 dccp_role(sk), 512 dccp_role(sk),
503 sk, hctx->ccid3hctx_rtt, (int)r_sample, 513 sk, hctx->ccid3hctx_rtt, r_sample,
504 hctx->ccid3hctx_s, hctx->ccid3hctx_p, 514 hctx->ccid3hctx_s, hctx->ccid3hctx_p,
505 hctx->ccid3hctx_x_calc, 515 hctx->ccid3hctx_x_calc,
506 (unsigned)(hctx->ccid3hctx_x_recv >> 6), 516 (unsigned)(hctx->ccid3hctx_x_recv >> 6),
@@ -644,10 +654,50 @@ static void ccid3_hc_tx_exit(struct sock *sk)
644 dccp_tx_hist_purge(ccid3_tx_hist, &hctx->ccid3hctx_hist); 654 dccp_tx_hist_purge(ccid3_tx_hist, &hctx->ccid3hctx_hist);
645} 655}
646 656
657static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info)
658{
659 const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
660
661 /* Listen socks doesn't have a private CCID block */
662 if (sk->sk_state == DCCP_LISTEN)
663 return;
664
665 BUG_ON(hctx == NULL);
666
667 info->tcpi_rto = hctx->ccid3hctx_t_rto;
668 info->tcpi_rtt = hctx->ccid3hctx_rtt;
669}
670
671static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len,
672 u32 __user *optval, int __user *optlen)
673{
674 const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
675 const void *val;
676
677 /* Listen socks doesn't have a private CCID block */
678 if (sk->sk_state == DCCP_LISTEN)
679 return -EINVAL;
680
681 switch (optname) {
682 case DCCP_SOCKOPT_CCID_TX_INFO:
683 if (len < sizeof(hctx->ccid3hctx_tfrc))
684 return -EINVAL;
685 len = sizeof(hctx->ccid3hctx_tfrc);
686 val = &hctx->ccid3hctx_tfrc;
687 break;
688 default:
689 return -ENOPROTOOPT;
690 }
691
692 if (put_user(len, optlen) || copy_to_user(optval, val, len))
693 return -EFAULT;
694
695 return 0;
696}
697
647/* 698/*
648 * RX Half Connection methods 699 * Receiver Half-Connection Routines
649 */ 700 */
650
651#ifdef CONFIG_IP_DCCP_CCID3_DEBUG 701#ifdef CONFIG_IP_DCCP_CCID3_DEBUG
652static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state) 702static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state)
653{ 703{
@@ -977,8 +1027,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
977 const struct dccp_options_received *opt_recv; 1027 const struct dccp_options_received *opt_recv;
978 struct dccp_rx_hist_entry *packet; 1028 struct dccp_rx_hist_entry *packet;
979 struct timeval now; 1029 struct timeval now;
980 u32 p_prev, rtt_prev; 1030 u32 p_prev, r_sample, rtt_prev;
981 suseconds_t r_sample, t_elapsed;
982 int loss, payload_size; 1031 int loss, payload_size;
983 1032
984 BUG_ON(hcrx == NULL); 1033 BUG_ON(hcrx == NULL);
@@ -994,17 +1043,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
994 break; 1043 break;
995 rtt_prev = hcrx->ccid3hcrx_rtt; 1044 rtt_prev = hcrx->ccid3hcrx_rtt;
996 dccp_timestamp(sk, &now); 1045 dccp_timestamp(sk, &now);
997 timeval_sub_usecs(&now, opt_recv->dccpor_timestamp_echo * 10); 1046 r_sample = dccp_sample_rtt(sk, &now, NULL);
998 r_sample = timeval_usecs(&now);
999 t_elapsed = opt_recv->dccpor_elapsed_time * 10;
1000
1001 DCCP_BUG_ON(r_sample < 0);
1002 if (unlikely(r_sample <= t_elapsed))
1003 DCCP_WARN("r_sample=%ldus, t_elapsed=%ldus\n",
1004 (long)r_sample, (long)t_elapsed);
1005 else
1006 r_sample -= t_elapsed;
1007 CCID3_RTT_SANITY_CHECK(r_sample);
1008 1047
1009 if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA) 1048 if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA)
1010 hcrx->ccid3hcrx_rtt = r_sample; 1049 hcrx->ccid3hcrx_rtt = r_sample;
@@ -1132,20 +1171,6 @@ static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info)
1132 info->tcpi_rcv_rtt = hcrx->ccid3hcrx_rtt; 1171 info->tcpi_rcv_rtt = hcrx->ccid3hcrx_rtt;
1133} 1172}
1134 1173
1135static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info)
1136{
1137 const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
1138
1139 /* Listen socks doesn't have a private CCID block */
1140 if (sk->sk_state == DCCP_LISTEN)
1141 return;
1142
1143 BUG_ON(hctx == NULL);
1144
1145 info->tcpi_rto = hctx->ccid3hctx_t_rto;
1146 info->tcpi_rtt = hctx->ccid3hctx_rtt;
1147}
1148
1149static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len, 1174static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len,
1150 u32 __user *optval, int __user *optlen) 1175 u32 __user *optval, int __user *optlen)
1151{ 1176{
@@ -1173,33 +1198,6 @@ static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len,
1173 return 0; 1198 return 0;
1174} 1199}
1175 1200
1176static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len,
1177 u32 __user *optval, int __user *optlen)
1178{
1179 const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
1180 const void *val;
1181
1182 /* Listen socks doesn't have a private CCID block */
1183 if (sk->sk_state == DCCP_LISTEN)
1184 return -EINVAL;
1185
1186 switch (optname) {
1187 case DCCP_SOCKOPT_CCID_TX_INFO:
1188 if (len < sizeof(hctx->ccid3hctx_tfrc))
1189 return -EINVAL;
1190 len = sizeof(hctx->ccid3hctx_tfrc);
1191 val = &hctx->ccid3hctx_tfrc;
1192 break;
1193 default:
1194 return -ENOPROTOOPT;
1195 }
1196
1197 if (put_user(len, optlen) || copy_to_user(optval, val, len))
1198 return -EFAULT;
1199
1200 return 0;
1201}
1202
1203static struct ccid_operations ccid3 = { 1201static struct ccid_operations ccid3 = {
1204 .ccid_id = DCCPC_CCID3, 1202 .ccid_id = DCCPC_CCID3,
1205 .ccid_name = "ccid3", 1203 .ccid_name = "ccid3",
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index 15776a88c090..8d31b389c19c 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -51,16 +51,6 @@
51/* Parameter t_mbi from [RFC 3448, 4.3]: backoff interval in seconds */ 51/* Parameter t_mbi from [RFC 3448, 4.3]: backoff interval in seconds */
52#define TFRC_T_MBI 64 52#define TFRC_T_MBI 64
53 53
54/* What we think is a reasonable upper limit on RTT values */
55#define CCID3_SANE_RTT_MAX ((suseconds_t)(4 * USEC_PER_SEC))
56
57#define CCID3_RTT_SANITY_CHECK(rtt) do { \
58 if (rtt > CCID3_SANE_RTT_MAX) { \
59 DCCP_CRIT("RTT (%d) too large, substituting %d", \
60 (int)rtt, (int)CCID3_SANE_RTT_MAX); \
61 rtt = CCID3_SANE_RTT_MAX; \
62 } } while (0)
63
64enum ccid3_options { 54enum ccid3_options {
65 TFRC_OPT_LOSS_EVENT_RATE = 192, 55 TFRC_OPT_LOSS_EVENT_RATE = 192,
66 TFRC_OPT_LOSS_INTERVALS = 193, 56 TFRC_OPT_LOSS_INTERVALS = 193,
diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c
index 0a0baef16b3e..372d7e75cdd8 100644
--- a/net/dccp/ccids/lib/loss_interval.c
+++ b/net/dccp/ccids/lib/loss_interval.c
@@ -91,7 +91,7 @@ u32 dccp_li_hist_calc_i_mean(struct list_head *list)
91 u32 w_tot = 0; 91 u32 w_tot = 0;
92 92
93 list_for_each_entry_safe(li_entry, li_next, list, dccplih_node) { 93 list_for_each_entry_safe(li_entry, li_next, list, dccplih_node) {
94 if (li_entry->dccplih_interval != ~0) { 94 if (li_entry->dccplih_interval != ~0U) {
95 i_tot0 += li_entry->dccplih_interval * dccp_li_hist_w[i]; 95 i_tot0 += li_entry->dccplih_interval * dccp_li_hist_w[i];
96 w_tot += dccp_li_hist_w[i]; 96 w_tot += dccp_li_hist_w[i];
97 if (i != 0) 97 if (i != 0)
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index e33a9edb4036..d8ad27bfe01a 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -31,13 +31,9 @@
31 __stringify(cond)); \ 31 __stringify(cond)); \
32 } while (0) 32 } while (0)
33 33
34#ifdef MODULE
35#define DCCP_PRINTK(enable, fmt, args...) do { if (enable) \ 34#define DCCP_PRINTK(enable, fmt, args...) do { if (enable) \
36 printk(fmt, ##args); \ 35 printk(fmt, ##args); \
37 } while(0) 36 } while(0)
38#else
39#define DCCP_PRINTK(enable, fmt, args...) printk(fmt, ##args)
40#endif
41#define DCCP_PR_DEBUG(enable, fmt, a...) DCCP_PRINTK(enable, KERN_DEBUG \ 37#define DCCP_PR_DEBUG(enable, fmt, a...) DCCP_PRINTK(enable, KERN_DEBUG \
42 "%s: " fmt, __FUNCTION__, ##a) 38 "%s: " fmt, __FUNCTION__, ##a)
43 39
@@ -75,11 +71,15 @@ extern void dccp_time_wait(struct sock *sk, int state, int timeo);
75/* RFC 1122, 4.2.3.1 initial RTO value */ 71/* RFC 1122, 4.2.3.1 initial RTO value */
76#define DCCP_TIMEOUT_INIT ((unsigned)(3 * HZ)) 72#define DCCP_TIMEOUT_INIT ((unsigned)(3 * HZ))
77 73
74#define DCCP_RTO_MAX ((unsigned)(120 * HZ)) /* FIXME: using TCP value */
75
76/* bounds for sampled RTT values from packet exchanges (in usec) */
77#define DCCP_SANE_RTT_MIN 100
78#define DCCP_SANE_RTT_MAX (4 * USEC_PER_SEC)
79
78/* Maximal interval between probes for local resources. */ 80/* Maximal interval between probes for local resources. */
79#define DCCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ / 2U)) 81#define DCCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ / 2U))
80 82
81#define DCCP_RTO_MAX ((unsigned)(120 * HZ)) /* FIXME: using TCP value */
82
83/* sysctl variables for DCCP */ 83/* sysctl variables for DCCP */
84extern int sysctl_dccp_request_retries; 84extern int sysctl_dccp_request_retries;
85extern int sysctl_dccp_retries1; 85extern int sysctl_dccp_retries1;
@@ -92,17 +92,43 @@ extern int sysctl_dccp_feat_send_ack_vector;
92extern int sysctl_dccp_feat_send_ndp_count; 92extern int sysctl_dccp_feat_send_ndp_count;
93extern int sysctl_dccp_tx_qlen; 93extern int sysctl_dccp_tx_qlen;
94 94
95/*
96 * 48-bit sequence number arithmetic (signed and unsigned)
97 */
98#define INT48_MIN 0x800000000000LL /* 2^47 */
99#define UINT48_MAX 0xFFFFFFFFFFFFLL /* 2^48 - 1 */
100#define COMPLEMENT48(x) (0x1000000000000LL - (x)) /* 2^48 - x */
101#define TO_SIGNED48(x) (((x) < INT48_MIN)? (x) : -COMPLEMENT48( (x)))
102#define TO_UNSIGNED48(x) (((x) >= 0)? (x) : COMPLEMENT48(-(x)))
103#define ADD48(a, b) (((a) + (b)) & UINT48_MAX)
104#define SUB48(a, b) ADD48((a), COMPLEMENT48(b))
105
106static inline void dccp_set_seqno(u64 *seqno, u64 value)
107{
108 *seqno = value & UINT48_MAX;
109}
110
111static inline void dccp_inc_seqno(u64 *seqno)
112{
113 *seqno = ADD48(*seqno, 1);
114}
115
116/* signed mod-2^48 distance: pos. if seqno1 < seqno2, neg. if seqno1 > seqno2 */
117static inline s64 dccp_delta_seqno(const u64 seqno1, const u64 seqno2)
118{
119 u64 delta = SUB48(seqno2, seqno1);
120
121 return TO_SIGNED48(delta);
122}
123
95/* is seq1 < seq2 ? */ 124/* is seq1 < seq2 ? */
96static inline int before48(const u64 seq1, const u64 seq2) 125static inline int before48(const u64 seq1, const u64 seq2)
97{ 126{
98 return (s64)((seq1 << 16) - (seq2 << 16)) < 0; 127 return (s64)((seq2 << 16) - (seq1 << 16)) > 0;
99} 128}
100 129
101/* is seq1 > seq2 ? */ 130/* is seq1 > seq2 ? */
102static inline int after48(const u64 seq1, const u64 seq2) 131#define after48(seq1, seq2) before48(seq2, seq1)
103{
104 return (s64)((seq2 << 16) - (seq1 << 16)) < 0;
105}
106 132
107/* is seq2 <= seq1 <= seq3 ? */ 133/* is seq2 <= seq1 <= seq3 ? */
108static inline int between48(const u64 seq1, const u64 seq2, const u64 seq3) 134static inline int between48(const u64 seq1, const u64 seq2, const u64 seq3)
@@ -118,9 +144,7 @@ static inline u64 max48(const u64 seq1, const u64 seq2)
118/* is seq1 next seqno after seq2 */ 144/* is seq1 next seqno after seq2 */
119static inline int follows48(const u64 seq1, const u64 seq2) 145static inline int follows48(const u64 seq1, const u64 seq2)
120{ 146{
121 int diff = (seq1 & 0xFFFF) - (seq2 & 0xFFFF); 147 return dccp_delta_seqno(seq2, seq1) == 1;
122
123 return diff==1;
124} 148}
125 149
126enum { 150enum {
@@ -272,6 +296,8 @@ extern int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr,
272extern int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code); 296extern int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code);
273extern void dccp_send_close(struct sock *sk, const int active); 297extern void dccp_send_close(struct sock *sk, const int active);
274extern int dccp_invalid_packet(struct sk_buff *skb); 298extern int dccp_invalid_packet(struct sk_buff *skb);
299extern u32 dccp_sample_rtt(struct sock *sk, struct timeval *t_recv,
300 struct timeval *t_history);
275 301
276static inline int dccp_bad_service_code(const struct sock *sk, 302static inline int dccp_bad_service_code(const struct sock *sk,
277 const __be32 service) 303 const __be32 service)
@@ -313,26 +339,7 @@ static inline int dccp_packet_without_ack(const struct sk_buff *skb)
313 return type == DCCP_PKT_DATA || type == DCCP_PKT_REQUEST; 339 return type == DCCP_PKT_DATA || type == DCCP_PKT_REQUEST;
314} 340}
315 341
316#define DCCP_MAX_SEQNO ((((u64)1) << 48) - 1) 342#define DCCP_PKT_WITHOUT_ACK_SEQ (UINT48_MAX << 2)
317#define DCCP_PKT_WITHOUT_ACK_SEQ (DCCP_MAX_SEQNO << 2)
318
319static inline void dccp_set_seqno(u64 *seqno, u64 value)
320{
321 if (value > DCCP_MAX_SEQNO)
322 value -= DCCP_MAX_SEQNO + 1;
323 *seqno = value;
324}
325
326static inline u64 dccp_delta_seqno(u64 seqno1, u64 seqno2)
327{
328 return ((seqno2 << 16) - (seqno1 << 16)) >> 16;
329}
330
331static inline void dccp_inc_seqno(u64 *seqno)
332{
333 if (++*seqno > DCCP_MAX_SEQNO)
334 *seqno = 0;
335}
336 343
337static inline void dccp_hdr_set_seq(struct dccp_hdr *dh, const u64 gss) 344static inline void dccp_hdr_set_seq(struct dccp_hdr *dh, const u64 gss)
338{ 345{
diff --git a/net/dccp/input.c b/net/dccp/input.c
index 78b043c458bf..da6ec185ed5b 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -86,7 +86,8 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb)
86 dh->dccph_type == DCCP_PKT_SYNCACK) { 86 dh->dccph_type == DCCP_PKT_SYNCACK) {
87 if (between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, 87 if (between48(DCCP_SKB_CB(skb)->dccpd_ack_seq,
88 dp->dccps_awl, dp->dccps_awh) && 88 dp->dccps_awl, dp->dccps_awh) &&
89 !before48(DCCP_SKB_CB(skb)->dccpd_seq, dp->dccps_swl)) 89 dccp_delta_seqno(dp->dccps_swl,
90 DCCP_SKB_CB(skb)->dccpd_seq) >= 0)
90 dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq); 91 dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq);
91 else 92 else
92 return -1; 93 return -1;
@@ -203,7 +204,8 @@ static int __dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
203 if (dp->dccps_role != DCCP_ROLE_CLIENT) 204 if (dp->dccps_role != DCCP_ROLE_CLIENT)
204 goto send_sync; 205 goto send_sync;
205check_seq: 206check_seq:
206 if (!before48(DCCP_SKB_CB(skb)->dccpd_seq, dp->dccps_osr)) { 207 if (dccp_delta_seqno(dp->dccps_osr,
208 DCCP_SKB_CB(skb)->dccpd_seq) >= 0) {
207send_sync: 209send_sync:
208 dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, 210 dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq,
209 DCCP_PKT_SYNC); 211 DCCP_PKT_SYNC);
@@ -298,6 +300,14 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
298 if (dccp_parse_options(sk, skb)) 300 if (dccp_parse_options(sk, skb))
299 goto out_invalid_packet; 301 goto out_invalid_packet;
300 302
303 /* Obtain RTT sample from SYN exchange (used by CCID 3) */
304 if (dp->dccps_options_received.dccpor_timestamp_echo) {
305 struct timeval now;
306
307 dccp_timestamp(sk, &now);
308 dp->dccps_syn_rtt = dccp_sample_rtt(sk, &now, NULL);
309 }
310
301 if (dccp_msk(sk)->dccpms_send_ack_vector && 311 if (dccp_msk(sk)->dccpms_send_ack_vector &&
302 dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk, 312 dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
303 DCCP_SKB_CB(skb)->dccpd_seq, 313 DCCP_SKB_CB(skb)->dccpd_seq,
@@ -575,3 +585,43 @@ discard:
575} 585}
576 586
577EXPORT_SYMBOL_GPL(dccp_rcv_state_process); 587EXPORT_SYMBOL_GPL(dccp_rcv_state_process);
588
589/**
590 * dccp_sample_rtt - Sample RTT from packet exchange
591 *
592 * @sk: connected dccp_sock
593 * @t_recv: receive timestamp of packet with timestamp echo
594 * @t_hist: packet history timestamp or NULL
595 */
596u32 dccp_sample_rtt(struct sock *sk, struct timeval *t_recv,
597 struct timeval *t_hist)
598{
599 struct dccp_sock *dp = dccp_sk(sk);
600 struct dccp_options_received *or = &dp->dccps_options_received;
601 suseconds_t delta;
602
603 if (t_hist == NULL) {
604 if (!or->dccpor_timestamp_echo) {
605 DCCP_WARN("packet without timestamp echo\n");
606 return DCCP_SANE_RTT_MAX;
607 }
608 timeval_sub_usecs(t_recv, or->dccpor_timestamp_echo * 10);
609 delta = timeval_usecs(t_recv);
610 } else
611 delta = timeval_delta(t_recv, t_hist);
612
613 delta -= or->dccpor_elapsed_time * 10; /* either set or 0 */
614
615 if (unlikely(delta <= 0)) {
616 DCCP_WARN("unusable RTT sample %ld, using min\n", (long)delta);
617 return DCCP_SANE_RTT_MIN;
618 }
619 if (unlikely(delta - (suseconds_t)DCCP_SANE_RTT_MAX > 0)) {
620 DCCP_WARN("RTT sample %ld too large, using max\n", (long)delta);
621 return DCCP_SANE_RTT_MAX;
622 }
623
624 return delta;
625}
626
627EXPORT_SYMBOL_GPL(dccp_sample_rtt);
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 4a83978aa660..718f2fa923a1 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -207,8 +207,8 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
207 (iph->ihl << 2)); 207 (iph->ihl << 2));
208 struct dccp_sock *dp; 208 struct dccp_sock *dp;
209 struct inet_sock *inet; 209 struct inet_sock *inet;
210 const int type = skb->h.icmph->type; 210 const int type = icmp_hdr(skb)->type;
211 const int code = skb->h.icmph->code; 211 const int code = icmp_hdr(skb)->code;
212 struct sock *sk; 212 struct sock *sk;
213 __u64 seq; 213 __u64 seq;
214 int err; 214 int err;
@@ -363,8 +363,8 @@ EXPORT_SYMBOL_GPL(dccp_v4_send_check);
363 363
364static inline u64 dccp_v4_init_sequence(const struct sk_buff *skb) 364static inline u64 dccp_v4_init_sequence(const struct sk_buff *skb)
365{ 365{
366 return secure_dccp_sequence_number(skb->nh.iph->daddr, 366 return secure_dccp_sequence_number(ip_hdr(skb)->daddr,
367 skb->nh.iph->saddr, 367 ip_hdr(skb)->saddr,
368 dccp_hdr(skb)->dccph_dport, 368 dccp_hdr(skb)->dccph_dport,
369 dccp_hdr(skb)->dccph_sport); 369 dccp_hdr(skb)->dccph_sport);
370} 370}
@@ -405,7 +405,7 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb,
405 newinet->opt = ireq->opt; 405 newinet->opt = ireq->opt;
406 ireq->opt = NULL; 406 ireq->opt = NULL;
407 newinet->mc_index = inet_iif(skb); 407 newinet->mc_index = inet_iif(skb);
408 newinet->mc_ttl = skb->nh.iph->ttl; 408 newinet->mc_ttl = ip_hdr(skb)->ttl;
409 newinet->id = jiffies; 409 newinet->id = jiffies;
410 410
411 dccp_sync_mss(newsk, dst_mtu(dst)); 411 dccp_sync_mss(newsk, dst_mtu(dst));
@@ -428,7 +428,7 @@ EXPORT_SYMBOL_GPL(dccp_v4_request_recv_sock);
428static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) 428static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
429{ 429{
430 const struct dccp_hdr *dh = dccp_hdr(skb); 430 const struct dccp_hdr *dh = dccp_hdr(skb);
431 const struct iphdr *iph = skb->nh.iph; 431 const struct iphdr *iph = ip_hdr(skb);
432 struct sock *nsk; 432 struct sock *nsk;
433 struct request_sock **prev; 433 struct request_sock **prev;
434 /* Find possible connection requests. */ 434 /* Find possible connection requests. */
@@ -460,8 +460,8 @@ static struct dst_entry* dccp_v4_route_skb(struct sock *sk,
460 struct rtable *rt; 460 struct rtable *rt;
461 struct flowi fl = { .oif = ((struct rtable *)skb->dst)->rt_iif, 461 struct flowi fl = { .oif = ((struct rtable *)skb->dst)->rt_iif,
462 .nl_u = { .ip4_u = 462 .nl_u = { .ip4_u =
463 { .daddr = skb->nh.iph->saddr, 463 { .daddr = ip_hdr(skb)->saddr,
464 .saddr = skb->nh.iph->daddr, 464 .saddr = ip_hdr(skb)->daddr,
465 .tos = RT_CONN_FLAGS(sk) } }, 465 .tos = RT_CONN_FLAGS(sk) } },
466 .proto = sk->sk_protocol, 466 .proto = sk->sk_protocol,
467 .uli_u = { .ports = 467 .uli_u = { .ports =
@@ -513,6 +513,7 @@ static void dccp_v4_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
513{ 513{
514 int err; 514 int err;
515 struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh; 515 struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh;
516 const struct iphdr *rxiph;
516 const int dccp_hdr_reset_len = sizeof(struct dccp_hdr) + 517 const int dccp_hdr_reset_len = sizeof(struct dccp_hdr) +
517 sizeof(struct dccp_hdr_ext) + 518 sizeof(struct dccp_hdr_ext) +
518 sizeof(struct dccp_hdr_reset); 519 sizeof(struct dccp_hdr_reset);
@@ -559,13 +560,13 @@ static void dccp_v4_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
559 dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), DCCP_SKB_CB(rxskb)->dccpd_seq); 560 dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), DCCP_SKB_CB(rxskb)->dccpd_seq);
560 561
561 dccp_csum_outgoing(skb); 562 dccp_csum_outgoing(skb);
562 dh->dccph_checksum = dccp_v4_csum_finish(skb, rxskb->nh.iph->saddr, 563 rxiph = ip_hdr(rxskb);
563 rxskb->nh.iph->daddr); 564 dh->dccph_checksum = dccp_v4_csum_finish(skb, rxiph->saddr,
565 rxiph->daddr);
564 566
565 bh_lock_sock(dccp_v4_ctl_socket->sk); 567 bh_lock_sock(dccp_v4_ctl_socket->sk);
566 err = ip_build_and_send_pkt(skb, dccp_v4_ctl_socket->sk, 568 err = ip_build_and_send_pkt(skb, dccp_v4_ctl_socket->sk,
567 rxskb->nh.iph->daddr, 569 rxiph->daddr, rxiph->saddr, NULL);
568 rxskb->nh.iph->saddr, NULL);
569 bh_unlock_sock(dccp_v4_ctl_socket->sk); 570 bh_unlock_sock(dccp_v4_ctl_socket->sk);
570 571
571 if (net_xmit_eval(err) == 0) { 572 if (net_xmit_eval(err) == 0) {
@@ -640,8 +641,8 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
640 goto drop_and_free; 641 goto drop_and_free;
641 642
642 ireq = inet_rsk(req); 643 ireq = inet_rsk(req);
643 ireq->loc_addr = skb->nh.iph->daddr; 644 ireq->loc_addr = ip_hdr(skb)->daddr;
644 ireq->rmt_addr = skb->nh.iph->saddr; 645 ireq->rmt_addr = ip_hdr(skb)->saddr;
645 ireq->opt = NULL; 646 ireq->opt = NULL;
646 647
647 /* 648 /*
@@ -809,6 +810,7 @@ EXPORT_SYMBOL_GPL(dccp_invalid_packet);
809static int dccp_v4_rcv(struct sk_buff *skb) 810static int dccp_v4_rcv(struct sk_buff *skb)
810{ 811{
811 const struct dccp_hdr *dh; 812 const struct dccp_hdr *dh;
813 const struct iphdr *iph;
812 struct sock *sk; 814 struct sock *sk;
813 int min_cov; 815 int min_cov;
814 816
@@ -817,8 +819,9 @@ static int dccp_v4_rcv(struct sk_buff *skb)
817 if (dccp_invalid_packet(skb)) 819 if (dccp_invalid_packet(skb))
818 goto discard_it; 820 goto discard_it;
819 821
822 iph = ip_hdr(skb);
820 /* Step 1: If header checksum is incorrect, drop packet and return */ 823 /* Step 1: If header checksum is incorrect, drop packet and return */
821 if (dccp_v4_csum_finish(skb, skb->nh.iph->saddr, skb->nh.iph->daddr)) { 824 if (dccp_v4_csum_finish(skb, iph->saddr, iph->daddr)) {
822 DCCP_WARN("dropped packet with invalid checksum\n"); 825 DCCP_WARN("dropped packet with invalid checksum\n");
823 goto discard_it; 826 goto discard_it;
824 } 827 }
@@ -832,8 +835,8 @@ static int dccp_v4_rcv(struct sk_buff *skb)
832 "src=%u.%u.%u.%u@%-5d " 835 "src=%u.%u.%u.%u@%-5d "
833 "dst=%u.%u.%u.%u@%-5d seq=%llu", 836 "dst=%u.%u.%u.%u@%-5d seq=%llu",
834 dccp_packet_name(dh->dccph_type), 837 dccp_packet_name(dh->dccph_type),
835 NIPQUAD(skb->nh.iph->saddr), ntohs(dh->dccph_sport), 838 NIPQUAD(iph->saddr), ntohs(dh->dccph_sport),
836 NIPQUAD(skb->nh.iph->daddr), ntohs(dh->dccph_dport), 839 NIPQUAD(iph->daddr), ntohs(dh->dccph_dport),
837 (unsigned long long) DCCP_SKB_CB(skb)->dccpd_seq); 840 (unsigned long long) DCCP_SKB_CB(skb)->dccpd_seq);
838 841
839 if (dccp_packet_without_ack(skb)) { 842 if (dccp_packet_without_ack(skb)) {
@@ -848,10 +851,8 @@ static int dccp_v4_rcv(struct sk_buff *skb)
848 /* Step 2: 851 /* Step 2:
849 * Look up flow ID in table and get corresponding socket */ 852 * Look up flow ID in table and get corresponding socket */
850 sk = __inet_lookup(&dccp_hashinfo, 853 sk = __inet_lookup(&dccp_hashinfo,
851 skb->nh.iph->saddr, dh->dccph_sport, 854 iph->saddr, dh->dccph_sport,
852 skb->nh.iph->daddr, dh->dccph_dport, 855 iph->daddr, dh->dccph_dport, inet_iif(skb));
853 inet_iif(skb));
854
855 /* 856 /*
856 * Step 2: 857 * Step 2:
857 * If no socket ... 858 * If no socket ...
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 7f51e8db3967..64eac2515aa2 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -84,8 +84,8 @@ static inline __u32 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr,
84 84
85static inline __u32 dccp_v6_init_sequence(struct sk_buff *skb) 85static inline __u32 dccp_v6_init_sequence(struct sk_buff *skb)
86{ 86{
87 return secure_dccpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32, 87 return secure_dccpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
88 skb->nh.ipv6h->saddr.s6_addr32, 88 ipv6_hdr(skb)->saddr.s6_addr32,
89 dccp_hdr(skb)->dccph_dport, 89 dccp_hdr(skb)->dccph_dport,
90 dccp_hdr(skb)->dccph_sport ); 90 dccp_hdr(skb)->dccph_sport );
91 91
@@ -261,8 +261,8 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req,
261 261
262 if (rxopt->srcrt) 262 if (rxopt->srcrt)
263 opt = ipv6_invert_rthdr(sk, 263 opt = ipv6_invert_rthdr(sk,
264 (struct ipv6_rt_hdr *)(pktopts->nh.raw + 264 (struct ipv6_rt_hdr *)(skb_network_header(pktopts) +
265 rxopt->srcrt)); 265 rxopt->srcrt));
266 } 266 }
267 267
268 if (opt != NULL && opt->srcrt != NULL) { 268 if (opt != NULL && opt->srcrt != NULL) {
@@ -313,6 +313,7 @@ static void dccp_v6_reqsk_destructor(struct request_sock *req)
313static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) 313static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
314{ 314{
315 struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh; 315 struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh;
316 struct ipv6hdr *rxip6h;
316 const u32 dccp_hdr_reset_len = sizeof(struct dccp_hdr) + 317 const u32 dccp_hdr_reset_len = sizeof(struct dccp_hdr) +
317 sizeof(struct dccp_hdr_ext) + 318 sizeof(struct dccp_hdr_ext) +
318 sizeof(struct dccp_hdr_reset); 319 sizeof(struct dccp_hdr_reset);
@@ -352,12 +353,13 @@ static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
352 dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), DCCP_SKB_CB(rxskb)->dccpd_seq); 353 dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), DCCP_SKB_CB(rxskb)->dccpd_seq);
353 354
354 dccp_csum_outgoing(skb); 355 dccp_csum_outgoing(skb);
355 dh->dccph_checksum = dccp_v6_csum_finish(skb, &rxskb->nh.ipv6h->saddr, 356 rxip6h = ipv6_hdr(rxskb);
356 &rxskb->nh.ipv6h->daddr); 357 dh->dccph_checksum = dccp_v6_csum_finish(skb, &rxip6h->saddr,
358 &rxip6h->daddr);
357 359
358 memset(&fl, 0, sizeof(fl)); 360 memset(&fl, 0, sizeof(fl));
359 ipv6_addr_copy(&fl.fl6_dst, &rxskb->nh.ipv6h->saddr); 361 ipv6_addr_copy(&fl.fl6_dst, &rxip6h->saddr);
360 ipv6_addr_copy(&fl.fl6_src, &rxskb->nh.ipv6h->daddr); 362 ipv6_addr_copy(&fl.fl6_src, &rxip6h->daddr);
361 363
362 fl.proto = IPPROTO_DCCP; 364 fl.proto = IPPROTO_DCCP;
363 fl.oif = inet6_iif(rxskb); 365 fl.oif = inet6_iif(rxskb);
@@ -390,7 +392,7 @@ static struct request_sock_ops dccp6_request_sock_ops = {
390static struct sock *dccp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) 392static struct sock *dccp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
391{ 393{
392 const struct dccp_hdr *dh = dccp_hdr(skb); 394 const struct dccp_hdr *dh = dccp_hdr(skb);
393 const struct ipv6hdr *iph = skb->nh.ipv6h; 395 const struct ipv6hdr *iph = ipv6_hdr(skb);
394 struct sock *nsk; 396 struct sock *nsk;
395 struct request_sock **prev; 397 struct request_sock **prev;
396 /* Find possible connection requests. */ 398 /* Find possible connection requests. */
@@ -460,8 +462,8 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
460 goto drop_and_free; 462 goto drop_and_free;
461 463
462 ireq6 = inet6_rsk(req); 464 ireq6 = inet6_rsk(req);
463 ipv6_addr_copy(&ireq6->rmt_addr, &skb->nh.ipv6h->saddr); 465 ipv6_addr_copy(&ireq6->rmt_addr, &ipv6_hdr(skb)->saddr);
464 ipv6_addr_copy(&ireq6->loc_addr, &skb->nh.ipv6h->daddr); 466 ipv6_addr_copy(&ireq6->loc_addr, &ipv6_hdr(skb)->daddr);
465 ireq6->pktopts = NULL; 467 ireq6->pktopts = NULL;
466 468
467 if (ipv6_opt_accepted(sk, skb) || 469 if (ipv6_opt_accepted(sk, skb) ||
@@ -546,7 +548,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
546 newnp->pktoptions = NULL; 548 newnp->pktoptions = NULL;
547 newnp->opt = NULL; 549 newnp->opt = NULL;
548 newnp->mcast_oif = inet6_iif(skb); 550 newnp->mcast_oif = inet6_iif(skb);
549 newnp->mcast_hops = skb->nh.ipv6h->hop_limit; 551 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
550 552
551 /* 553 /*
552 * No need to charge this sock to the relevant IPv6 refcnt debug socks count 554 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
@@ -573,8 +575,8 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
573 575
574 if (rxopt->srcrt) 576 if (rxopt->srcrt)
575 opt = ipv6_invert_rthdr(sk, 577 opt = ipv6_invert_rthdr(sk,
576 (struct ipv6_rt_hdr *)(ireq6->pktopts->nh.raw + 578 (struct ipv6_rt_hdr *)(skb_network_header(ireq6->pktopts) +
577 rxopt->srcrt)); 579 rxopt->srcrt));
578 } 580 }
579 581
580 if (dst == NULL) { 582 if (dst == NULL) {
@@ -653,7 +655,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
653 } 655 }
654 newnp->opt = NULL; 656 newnp->opt = NULL;
655 newnp->mcast_oif = inet6_iif(skb); 657 newnp->mcast_oif = inet6_iif(skb);
656 newnp->mcast_hops = skb->nh.ipv6h->hop_limit; 658 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
657 659
658 /* 660 /*
659 * Clone native IPv6 options from listening socket (if any) 661 * Clone native IPv6 options from listening socket (if any)
@@ -826,8 +828,8 @@ static int dccp_v6_rcv(struct sk_buff **pskb)
826 goto discard_it; 828 goto discard_it;
827 829
828 /* Step 1: If header checksum is incorrect, drop packet and return. */ 830 /* Step 1: If header checksum is incorrect, drop packet and return. */
829 if (dccp_v6_csum_finish(skb, &skb->nh.ipv6h->saddr, 831 if (dccp_v6_csum_finish(skb, &ipv6_hdr(skb)->saddr,
830 &skb->nh.ipv6h->daddr)) { 832 &ipv6_hdr(skb)->daddr)) {
831 DCCP_WARN("dropped packet with invalid checksum\n"); 833 DCCP_WARN("dropped packet with invalid checksum\n");
832 goto discard_it; 834 goto discard_it;
833 } 835 }
@@ -844,9 +846,9 @@ static int dccp_v6_rcv(struct sk_buff **pskb)
844 846
845 /* Step 2: 847 /* Step 2:
846 * Look up flow ID in table and get corresponding socket */ 848 * Look up flow ID in table and get corresponding socket */
847 sk = __inet6_lookup(&dccp_hashinfo, &skb->nh.ipv6h->saddr, 849 sk = __inet6_lookup(&dccp_hashinfo, &ipv6_hdr(skb)->saddr,
848 dh->dccph_sport, 850 dh->dccph_sport,
849 &skb->nh.ipv6h->daddr, ntohs(dh->dccph_dport), 851 &ipv6_hdr(skb)->daddr, ntohs(dh->dccph_dport),
850 inet6_iif(skb)); 852 inet6_iif(skb));
851 /* 853 /*
852 * Step 2: 854 * Step 2:
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 6d235b3013dd..e18e249ac49b 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -27,7 +27,7 @@
27struct inet_timewait_death_row dccp_death_row = { 27struct inet_timewait_death_row dccp_death_row = {
28 .sysctl_max_tw_buckets = NR_FILE * 2, 28 .sysctl_max_tw_buckets = NR_FILE * 2,
29 .period = DCCP_TIMEWAIT_LEN / INET_TWDR_TWKILL_SLOTS, 29 .period = DCCP_TIMEWAIT_LEN / INET_TWDR_TWKILL_SLOTS,
30 .death_lock = SPIN_LOCK_UNLOCKED, 30 .death_lock = __SPIN_LOCK_UNLOCKED(dccp_death_row.death_lock),
31 .hashinfo = &dccp_hashinfo, 31 .hashinfo = &dccp_hashinfo,
32 .tw_timer = TIMER_INITIALIZER(inet_twdr_hangman, 0, 32 .tw_timer = TIMER_INITIALIZER(inet_twdr_hangman, 0,
33 (unsigned long)&dccp_death_row), 33 (unsigned long)&dccp_death_row),
diff --git a/net/dccp/options.c b/net/dccp/options.c
index ca13f7731994..34d536d5f1a1 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -29,8 +29,6 @@ int sysctl_dccp_feat_ack_ratio = DCCPF_INITIAL_ACK_RATIO;
29int sysctl_dccp_feat_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR; 29int sysctl_dccp_feat_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR;
30int sysctl_dccp_feat_send_ndp_count = DCCPF_INITIAL_SEND_NDP_COUNT; 30int sysctl_dccp_feat_send_ndp_count = DCCPF_INITIAL_SEND_NDP_COUNT;
31 31
32EXPORT_SYMBOL_GPL(sysctl_dccp_feat_sequence_window);
33
34void dccp_minisock_init(struct dccp_minisock *dmsk) 32void dccp_minisock_init(struct dccp_minisock *dmsk)
35{ 33{
36 dmsk->dccpms_sequence_window = sysctl_dccp_feat_sequence_window; 34 dmsk->dccpms_sequence_window = sysctl_dccp_feat_sequence_window;
@@ -174,21 +172,25 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
174 opt_recv->dccpor_timestamp_echo = ntohl(*(__be32 *)value); 172 opt_recv->dccpor_timestamp_echo = ntohl(*(__be32 *)value);
175 173
176 dccp_pr_debug("%s rx opt: TIMESTAMP_ECHO=%u, len=%d, " 174 dccp_pr_debug("%s rx opt: TIMESTAMP_ECHO=%u, len=%d, "
177 "ackno=%llu, ", dccp_role(sk), 175 "ackno=%llu", dccp_role(sk),
178 opt_recv->dccpor_timestamp_echo, 176 opt_recv->dccpor_timestamp_echo,
179 len + 2, 177 len + 2,
180 (unsigned long long) 178 (unsigned long long)
181 DCCP_SKB_CB(skb)->dccpd_ack_seq); 179 DCCP_SKB_CB(skb)->dccpd_ack_seq);
182 180
183 181
184 if (len == 4) 182 if (len == 4) {
183 dccp_pr_debug_cat("\n");
185 break; 184 break;
185 }
186 186
187 if (len == 6) 187 if (len == 6)
188 elapsed_time = ntohs(*(__be16 *)(value + 4)); 188 elapsed_time = ntohs(*(__be16 *)(value + 4));
189 else 189 else
190 elapsed_time = ntohl(*(__be32 *)(value + 4)); 190 elapsed_time = ntohl(*(__be32 *)(value + 4));
191 191
192 dccp_pr_debug_cat(", ELAPSED_TIME=%d\n", elapsed_time);
193
192 /* Give precedence to the biggest ELAPSED_TIME */ 194 /* Give precedence to the biggest ELAPSED_TIME */
193 if (elapsed_time > opt_recv->dccpor_elapsed_time) 195 if (elapsed_time > opt_recv->dccpor_elapsed_time)
194 opt_recv->dccpor_elapsed_time = elapsed_time; 196 opt_recv->dccpor_elapsed_time = elapsed_time;
@@ -565,6 +567,14 @@ int dccp_insert_options(struct sock *sk, struct sk_buff *skb)
565 dccp_insert_options_feat(sk, skb)) 567 dccp_insert_options_feat(sk, skb))
566 return -1; 568 return -1;
567 569
570 /*
571 * Obtain RTT sample from Request/Response exchange.
572 * This is currently used in CCID 3 initialisation.
573 */
574 if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_REQUEST &&
575 dccp_insert_option_timestamp(sk, skb))
576 return -1;
577
568 /* XXX: insert other options when appropriate */ 578 /* XXX: insert other options when appropriate */
569 579
570 if (DCCP_SKB_CB(skb)->dccpd_opt_len != 0) { 580 if (DCCP_SKB_CB(skb)->dccpd_opt_len != 0) {
diff --git a/net/dccp/output.c b/net/dccp/output.c
index aa21cc4de37f..c8d843e983fc 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -194,6 +194,7 @@ static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb)
194 rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); 194 rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
195 if (rc <= 0) 195 if (rc <= 0)
196 break; 196 break;
197 dccp_pr_debug("delayed send by %d msec\n", rc);
197 delay = msecs_to_jiffies(rc); 198 delay = msecs_to_jiffies(rc);
198 sk->sk_write_pending++; 199 sk->sk_write_pending++;
199 release_sock(sk); 200 release_sock(sk);
@@ -255,7 +256,7 @@ void dccp_write_xmit(struct sock *sk, int block)
255 DCCP_BUG("err=%d after ccid_hc_tx_packet_sent", 256 DCCP_BUG("err=%d after ccid_hc_tx_packet_sent",
256 err); 257 err);
257 } else { 258 } else {
258 dccp_pr_debug("packet discarded\n"); 259 dccp_pr_debug("packet discarded due to err=%d\n", err);
259 kfree_skb(skb); 260 kfree_skb(skb);
260 } 261 }
261 } 262 }
diff --git a/net/dccp/probe.c b/net/dccp/probe.c
index 3b1f509f51dd..1f5e3ba62065 100644
--- a/net/dccp/probe.c
+++ b/net/dccp/probe.c
@@ -90,15 +90,18 @@ static int jdccp_sendmsg(struct kiocb *iocb, struct sock *sk,
90 if (port == 0 || ntohs(inet->dport) == port || 90 if (port == 0 || ntohs(inet->dport) == port ||
91 ntohs(inet->sport) == port) { 91 ntohs(inet->sport) == port) {
92 if (hctx) 92 if (hctx)
93 printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d %d %d %d %d\n", 93 printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d %d %d %d %u "
94 NIPQUAD(inet->saddr), ntohs(inet->sport), 94 "%llu %llu %d\n",
95 NIPQUAD(inet->daddr), ntohs(inet->dport), size, 95 NIPQUAD(inet->saddr), ntohs(inet->sport),
96 hctx->ccid3hctx_s, hctx->ccid3hctx_rtt, 96 NIPQUAD(inet->daddr), ntohs(inet->dport), size,
97 hctx->ccid3hctx_p, hctx->ccid3hctx_t_ipi); 97 hctx->ccid3hctx_s, hctx->ccid3hctx_rtt,
98 hctx->ccid3hctx_p, hctx->ccid3hctx_x_calc,
99 hctx->ccid3hctx_x_recv >> 6,
100 hctx->ccid3hctx_x >> 6, hctx->ccid3hctx_t_ipi);
98 else 101 else
99 printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d\n", 102 printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d\n",
100 NIPQUAD(inet->saddr), ntohs(inet->sport), 103 NIPQUAD(inet->saddr), ntohs(inet->sport),
101 NIPQUAD(inet->daddr), ntohs(inet->dport), size); 104 NIPQUAD(inet->daddr), ntohs(inet->dport), size);
102 } 105 }
103 106
104 jprobe_return(); 107 jprobe_return();
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index c6568d637e1a..a205eaa87f52 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -2413,6 +2413,7 @@ module_init(decnet_init);
2413static void __exit decnet_exit(void) 2413static void __exit decnet_exit(void)
2414{ 2414{
2415 sock_unregister(AF_DECnet); 2415 sock_unregister(AF_DECnet);
2416 rtnl_unregister_all(PF_DECnet);
2416 dev_remove_pack(&dn_dix_packet_type); 2417 dev_remove_pack(&dn_dix_packet_type);
2417 2418
2418 dn_unregister_sysctl(); 2419 dn_unregister_sysctl();
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 060d725e2942..5c2a9951b638 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -799,7 +799,6 @@ static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
799 skip_ndevs = cb->args[0]; 799 skip_ndevs = cb->args[0];
800 skip_naddr = cb->args[1]; 800 skip_naddr = cb->args[1];
801 801
802 read_lock(&dev_base_lock);
803 for (dev = dev_base, idx = 0; dev; dev = dev->next, idx++) { 802 for (dev = dev_base, idx = 0; dev; dev = dev->next, idx++) {
804 if (idx < skip_ndevs) 803 if (idx < skip_ndevs)
805 continue; 804 continue;
@@ -824,8 +823,6 @@ static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
824 } 823 }
825 } 824 }
826done: 825done:
827 read_unlock(&dev_base_lock);
828
829 cb->args[0] = idx; 826 cb->args[0] = idx;
830 cb->args[1] = dn_idx; 827 cb->args[1] = dn_idx;
831 828
@@ -913,7 +910,7 @@ static void dn_send_endnode_hello(struct net_device *dev, struct dn_ifaddr *ifa)
913 pktlen = (__le16 *)skb_push(skb,2); 910 pktlen = (__le16 *)skb_push(skb,2);
914 *pktlen = dn_htons(skb->len - 2); 911 *pktlen = dn_htons(skb->len - 2);
915 912
916 skb->nh.raw = skb->data; 913 skb_reset_network_header(skb);
917 914
918 dn_rt_finish_output(skb, dn_rt_all_rt_mcast, msg->id); 915 dn_rt_finish_output(skb, dn_rt_all_rt_mcast, msg->id);
919} 916}
@@ -1005,7 +1002,7 @@ static void dn_send_router_hello(struct net_device *dev, struct dn_ifaddr *ifa)
1005 pktlen = (__le16 *)skb_push(skb, 2); 1002 pktlen = (__le16 *)skb_push(skb, 2);
1006 *pktlen = dn_htons(skb->len - 2); 1003 *pktlen = dn_htons(skb->len - 2);
1007 1004
1008 skb->nh.raw = skb->data; 1005 skb_reset_network_header(skb);
1009 1006
1010 if (dn_am_i_a_router(dn, dn_db, ifa)) { 1007 if (dn_am_i_a_router(dn, dn_db, ifa)) {
1011 struct sk_buff *skb2 = skb_copy(skb, GFP_ATOMIC); 1008 struct sk_buff *skb2 = skb_copy(skb, GFP_ATOMIC);
@@ -1447,24 +1444,6 @@ static const struct file_operations dn_dev_seq_fops = {
1447 1444
1448#endif /* CONFIG_PROC_FS */ 1445#endif /* CONFIG_PROC_FS */
1449 1446
1450static struct rtnetlink_link dnet_rtnetlink_table[RTM_NR_MSGTYPES] =
1451{
1452 [RTM_NEWADDR - RTM_BASE] = { .doit = dn_nl_newaddr, },
1453 [RTM_DELADDR - RTM_BASE] = { .doit = dn_nl_deladdr, },
1454 [RTM_GETADDR - RTM_BASE] = { .dumpit = dn_nl_dump_ifaddr, },
1455#ifdef CONFIG_DECNET_ROUTER
1456 [RTM_NEWROUTE - RTM_BASE] = { .doit = dn_fib_rtm_newroute, },
1457 [RTM_DELROUTE - RTM_BASE] = { .doit = dn_fib_rtm_delroute, },
1458 [RTM_GETROUTE - RTM_BASE] = { .doit = dn_cache_getroute,
1459 .dumpit = dn_fib_dump, },
1460 [RTM_GETRULE - RTM_BASE] = { .dumpit = dn_fib_dump_rules, },
1461#else
1462 [RTM_GETROUTE - RTM_BASE] = { .doit = dn_cache_getroute,
1463 .dumpit = dn_cache_dump, },
1464#endif
1465
1466};
1467
1468static int __initdata addr[2]; 1447static int __initdata addr[2];
1469module_param_array(addr, int, NULL, 0444); 1448module_param_array(addr, int, NULL, 0444);
1470MODULE_PARM_DESC(addr, "The DECnet address of this machine: area,node"); 1449MODULE_PARM_DESC(addr, "The DECnet address of this machine: area,node");
@@ -1485,7 +1464,9 @@ void __init dn_dev_init(void)
1485 1464
1486 dn_dev_devices_on(); 1465 dn_dev_devices_on();
1487 1466
1488 rtnetlink_links[PF_DECnet] = dnet_rtnetlink_table; 1467 rtnl_register(PF_DECnet, RTM_NEWADDR, dn_nl_newaddr, NULL);
1468 rtnl_register(PF_DECnet, RTM_DELADDR, dn_nl_deladdr, NULL);
1469 rtnl_register(PF_DECnet, RTM_GETADDR, NULL, dn_nl_dump_ifaddr);
1489 1470
1490 proc_net_fops_create("decnet_dev", S_IRUGO, &dn_dev_seq_fops); 1471 proc_net_fops_create("decnet_dev", S_IRUGO, &dn_dev_seq_fops);
1491 1472
@@ -1500,8 +1481,6 @@ void __init dn_dev_init(void)
1500 1481
1501void __exit dn_dev_cleanup(void) 1482void __exit dn_dev_cleanup(void)
1502{ 1483{
1503 rtnetlink_links[PF_DECnet] = NULL;
1504
1505#ifdef CONFIG_SYSCTL 1484#ifdef CONFIG_SYSCTL
1506 { 1485 {
1507 int i; 1486 int i;
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index 82d58a977e6f..310a86268d2b 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -504,7 +504,7 @@ static int dn_fib_check_attr(struct rtmsg *r, struct rtattr **rta)
504 return 0; 504 return 0;
505} 505}
506 506
507int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 507static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
508{ 508{
509 struct dn_fib_table *tb; 509 struct dn_fib_table *tb;
510 struct rtattr **rta = arg; 510 struct rtattr **rta = arg;
@@ -520,7 +520,7 @@ int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
520 return -ESRCH; 520 return -ESRCH;
521} 521}
522 522
523int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 523static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
524{ 524{
525 struct dn_fib_table *tb; 525 struct dn_fib_table *tb;
526 struct rtattr **rta = arg; 526 struct rtattr **rta = arg;
@@ -748,11 +748,13 @@ void __exit dn_fib_cleanup(void)
748 748
749void __init dn_fib_init(void) 749void __init dn_fib_init(void)
750{ 750{
751
752 dn_fib_table_init(); 751 dn_fib_table_init();
753 dn_fib_rules_init(); 752 dn_fib_rules_init();
754 753
755 register_dnaddr_notifier(&dn_fib_dnaddr_notifier); 754 register_dnaddr_notifier(&dn_fib_dnaddr_notifier);
755
756 rtnl_register(PF_DECnet, RTM_NEWROUTE, dn_fib_rtm_newroute, NULL);
757 rtnl_register(PF_DECnet, RTM_DELROUTE, dn_fib_rtm_delroute, NULL);
756} 758}
757 759
758 760
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index bf701cf5a386..4bf066c416e2 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -261,7 +261,7 @@ static int dn_long_output(struct sk_buff *skb)
261 lp->s_class = 0; 261 lp->s_class = 0;
262 lp->pt = 0; 262 lp->pt = 0;
263 263
264 skb->nh.raw = skb->data; 264 skb_reset_network_header(skb);
265 265
266 return NF_HOOK(PF_DECnet, NF_DN_POST_ROUTING, skb, NULL, neigh->dev, dn_neigh_output_packet); 266 return NF_HOOK(PF_DECnet, NF_DN_POST_ROUTING, skb, NULL, neigh->dev, dn_neigh_output_packet);
267} 267}
@@ -300,7 +300,7 @@ static int dn_short_output(struct sk_buff *skb)
300 sp->srcnode = cb->src; 300 sp->srcnode = cb->src;
301 sp->forward = cb->hops & 0x3f; 301 sp->forward = cb->hops & 0x3f;
302 302
303 skb->nh.raw = skb->data; 303 skb_reset_network_header(skb);
304 304
305 return NF_HOOK(PF_DECnet, NF_DN_POST_ROUTING, skb, NULL, neigh->dev, dn_neigh_output_packet); 305 return NF_HOOK(PF_DECnet, NF_DN_POST_ROUTING, skb, NULL, neigh->dev, dn_neigh_output_packet);
306} 306}
@@ -342,7 +342,7 @@ static int dn_phase3_output(struct sk_buff *skb)
342 sp->srcnode = cb->src & dn_htons(0x03ff); 342 sp->srcnode = cb->src & dn_htons(0x03ff);
343 sp->forward = cb->hops & 0x3f; 343 sp->forward = cb->hops & 0x3f;
344 344
345 skb->nh.raw = skb->data; 345 skb_reset_network_header(skb);
346 346
347 return NF_HOOK(PF_DECnet, NF_DN_POST_ROUTING, skb, NULL, neigh->dev, dn_neigh_output_packet); 347 return NF_HOOK(PF_DECnet, NF_DN_POST_ROUTING, skb, NULL, neigh->dev, dn_neigh_output_packet);
348} 348}
diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c
index 9d20904f6f52..4074a6e5d0de 100644
--- a/net/decnet/dn_nsp_in.c
+++ b/net/decnet/dn_nsp_in.c
@@ -362,7 +362,8 @@ static void dn_nsp_conn_conf(struct sock *sk, struct sk_buff *skb)
362 u16 dlen = *skb->data; 362 u16 dlen = *skb->data;
363 if ((dlen <= 16) && (dlen <= skb->len)) { 363 if ((dlen <= 16) && (dlen <= skb->len)) {
364 scp->conndata_in.opt_optl = dn_htons(dlen); 364 scp->conndata_in.opt_optl = dn_htons(dlen);
365 memcpy(scp->conndata_in.opt_data, skb->data + 1, dlen); 365 skb_copy_from_linear_data_offset(skb, 1,
366 scp->conndata_in.opt_data, dlen);
366 } 367 }
367 } 368 }
368 dn_nsp_send_link(sk, DN_NOCHANGE, 0); 369 dn_nsp_send_link(sk, DN_NOCHANGE, 0);
@@ -406,7 +407,7 @@ static void dn_nsp_disc_init(struct sock *sk, struct sk_buff *skb)
406 u16 dlen = *skb->data; 407 u16 dlen = *skb->data;
407 if ((dlen <= 16) && (dlen <= skb->len)) { 408 if ((dlen <= 16) && (dlen <= skb->len)) {
408 scp->discdata_in.opt_optl = dn_htons(dlen); 409 scp->discdata_in.opt_optl = dn_htons(dlen);
409 memcpy(scp->discdata_in.opt_data, skb->data + 1, dlen); 410 skb_copy_from_linear_data_offset(skb, 1, scp->discdata_in.opt_data, dlen);
410 } 411 }
411 } 412 }
412 413
@@ -725,7 +726,7 @@ static int dn_nsp_rx_packet(struct sk_buff *skb)
725 if (!pskb_may_pull(skb, 2)) 726 if (!pskb_may_pull(skb, 2))
726 goto free_out; 727 goto free_out;
727 728
728 skb->h.raw = skb->data; 729 skb_reset_transport_header(skb);
729 cb->nsp_flags = *ptr++; 730 cb->nsp_flags = *ptr++;
730 731
731 if (decnet_debug_level & 2) 732 if (decnet_debug_level & 2)
diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c
index 2d2cda82c7db..7404653880b0 100644
--- a/net/decnet/dn_nsp_out.c
+++ b/net/decnet/dn_nsp_out.c
@@ -79,7 +79,7 @@ static void dn_nsp_send(struct sk_buff *skb)
79 struct dst_entry *dst; 79 struct dst_entry *dst;
80 struct flowi fl; 80 struct flowi fl;
81 81
82 skb->h.raw = skb->data; 82 skb_reset_transport_header(skb);
83 scp->stamp = jiffies; 83 scp->stamp = jiffies;
84 84
85 dst = sk_dst_check(sk, 0); 85 dst = sk_dst_check(sk, 0);
@@ -681,8 +681,10 @@ void dn_nsp_send_conninit(struct sock *sk, unsigned char msgflg)
681 if (scp->peer.sdn_objnum) 681 if (scp->peer.sdn_objnum)
682 type = 0; 682 type = 0;
683 683
684 skb_put(skb, dn_sockaddr2username(&scp->peer, skb->tail, type)); 684 skb_put(skb, dn_sockaddr2username(&scp->peer,
685 skb_put(skb, dn_sockaddr2username(&scp->addr, skb->tail, 2)); 685 skb_tail_pointer(skb), type));
686 skb_put(skb, dn_sockaddr2username(&scp->addr,
687 skb_tail_pointer(skb), 2));
686 688
687 menuver = DN_MENUVER_ACC | DN_MENUVER_USR; 689 menuver = DN_MENUVER_ACC | DN_MENUVER_USR;
688 if (scp->peer.sdn_flags & SDF_PROXY) 690 if (scp->peer.sdn_flags & SDF_PROXY)
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index c1b5502f195b..5d7337bcf0fe 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -77,6 +77,7 @@
77#include <linux/rcupdate.h> 77#include <linux/rcupdate.h>
78#include <linux/times.h> 78#include <linux/times.h>
79#include <asm/errno.h> 79#include <asm/errno.h>
80#include <net/netlink.h>
80#include <net/neighbour.h> 81#include <net/neighbour.h>
81#include <net/dst.h> 82#include <net/dst.h>
82#include <net/flow.h> 83#include <net/flow.h>
@@ -386,7 +387,7 @@ static int dn_return_short(struct sk_buff *skb)
386 __le16 tmp; 387 __le16 tmp;
387 388
388 /* Add back headers */ 389 /* Add back headers */
389 skb_push(skb, skb->data - skb->nh.raw); 390 skb_push(skb, skb->data - skb_network_header(skb));
390 391
391 if ((skb = skb_unshare(skb, GFP_ATOMIC)) == NULL) 392 if ((skb = skb_unshare(skb, GFP_ATOMIC)) == NULL)
392 return NET_RX_DROP; 393 return NET_RX_DROP;
@@ -425,7 +426,7 @@ static int dn_return_long(struct sk_buff *skb)
425 unsigned char tmp[ETH_ALEN]; 426 unsigned char tmp[ETH_ALEN];
426 427
427 /* Add back all headers */ 428 /* Add back all headers */
428 skb_push(skb, skb->data - skb->nh.raw); 429 skb_push(skb, skb->data - skb_network_header(skb));
429 430
430 if ((skb = skb_unshare(skb, GFP_ATOMIC)) == NULL) 431 if ((skb = skb_unshare(skb, GFP_ATOMIC)) == NULL)
431 return NET_RX_DROP; 432 return NET_RX_DROP;
@@ -504,7 +505,7 @@ static int dn_route_rx_long(struct sk_buff *skb)
504 goto drop_it; 505 goto drop_it;
505 506
506 skb_pull(skb, 20); 507 skb_pull(skb, 20);
507 skb->h.raw = skb->data; 508 skb_reset_transport_header(skb);
508 509
509 /* Destination info */ 510 /* Destination info */
510 ptr += 2; 511 ptr += 2;
@@ -542,7 +543,7 @@ static int dn_route_rx_short(struct sk_buff *skb)
542 goto drop_it; 543 goto drop_it;
543 544
544 skb_pull(skb, 5); 545 skb_pull(skb, 5);
545 skb->h.raw = skb->data; 546 skb_reset_transport_header(skb);
546 547
547 cb->dst = *(__le16 *)ptr; 548 cb->dst = *(__le16 *)ptr;
548 ptr += 2; 549 ptr += 2;
@@ -615,7 +616,7 @@ int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type
615 flags = *skb->data; 616 flags = *skb->data;
616 } 617 }
617 618
618 skb->nh.raw = skb->data; 619 skb_reset_network_header(skb);
619 620
620 /* 621 /*
621 * Weed out future version DECnet 622 * Weed out future version DECnet
@@ -1468,7 +1469,7 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
1468 struct dn_route *rt = (struct dn_route *)skb->dst; 1469 struct dn_route *rt = (struct dn_route *)skb->dst;
1469 struct rtmsg *r; 1470 struct rtmsg *r;
1470 struct nlmsghdr *nlh; 1471 struct nlmsghdr *nlh;
1471 unsigned char *b = skb->tail; 1472 unsigned char *b = skb_tail_pointer(skb);
1472 long expires; 1473 long expires;
1473 1474
1474 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*r), flags); 1475 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*r), flags);
@@ -1509,19 +1510,19 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
1509 if (rt->fl.iif) 1510 if (rt->fl.iif)
1510 RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->fl.iif); 1511 RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->fl.iif);
1511 1512
1512 nlh->nlmsg_len = skb->tail - b; 1513 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1513 return skb->len; 1514 return skb->len;
1514 1515
1515nlmsg_failure: 1516nlmsg_failure:
1516rtattr_failure: 1517rtattr_failure:
1517 skb_trim(skb, b - skb->data); 1518 nlmsg_trim(skb, b);
1518 return -1; 1519 return -1;
1519} 1520}
1520 1521
1521/* 1522/*
1522 * This is called by both endnodes and routers now. 1523 * This is called by both endnodes and routers now.
1523 */ 1524 */
1524int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg) 1525static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg)
1525{ 1526{
1526 struct rtattr **rta = arg; 1527 struct rtattr **rta = arg;
1527 struct rtmsg *rtm = NLMSG_DATA(nlh); 1528 struct rtmsg *rtm = NLMSG_DATA(nlh);
@@ -1537,7 +1538,7 @@ int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg)
1537 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 1538 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1538 if (skb == NULL) 1539 if (skb == NULL)
1539 return -ENOBUFS; 1540 return -ENOBUFS;
1540 skb->mac.raw = skb->data; 1541 skb_reset_mac_header(skb);
1541 cb = DN_SKB_CB(skb); 1542 cb = DN_SKB_CB(skb);
1542 1543
1543 if (rta[RTA_SRC-1]) 1544 if (rta[RTA_SRC-1])
@@ -1812,6 +1813,13 @@ void __init dn_route_init(void)
1812 dn_dst_ops.gc_thresh = (dn_rt_hash_mask + 1); 1813 dn_dst_ops.gc_thresh = (dn_rt_hash_mask + 1);
1813 1814
1814 proc_net_fops_create("decnet_cache", S_IRUGO, &dn_rt_cache_seq_fops); 1815 proc_net_fops_create("decnet_cache", S_IRUGO, &dn_rt_cache_seq_fops);
1816
1817#ifdef CONFIG_DECNET_ROUTER
1818 rtnl_register(PF_DECnet, RTM_GETROUTE, dn_cache_getroute, dn_fib_dump);
1819#else
1820 rtnl_register(PF_DECnet, RTM_GETROUTE, dn_cache_getroute,
1821 dn_cache_dump);
1822#endif
1815} 1823}
1816 1824
1817void __exit dn_route_cleanup(void) 1825void __exit dn_route_cleanup(void)
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index 5e86dd542302..17a1932216d6 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -31,6 +31,7 @@
31#include <net/dn_fib.h> 31#include <net/dn_fib.h>
32#include <net/dn_neigh.h> 32#include <net/dn_neigh.h>
33#include <net/dn_dev.h> 33#include <net/dn_dev.h>
34#include <net/dn_route.h>
34 35
35static struct fib_rules_ops dn_fib_rules_ops; 36static struct fib_rules_ops dn_fib_rules_ops;
36 37
@@ -239,9 +240,9 @@ static u32 dn_fib_rule_default_pref(void)
239 return 0; 240 return 0;
240} 241}
241 242
242int dn_fib_dump_rules(struct sk_buff *skb, struct netlink_callback *cb) 243static void dn_fib_rule_flush_cache(void)
243{ 244{
244 return fib_rules_dump(skb, cb, AF_DECnet); 245 dn_rt_cache_flush(-1);
245} 246}
246 247
247static struct fib_rules_ops dn_fib_rules_ops = { 248static struct fib_rules_ops dn_fib_rules_ops = {
@@ -254,6 +255,7 @@ static struct fib_rules_ops dn_fib_rules_ops = {
254 .compare = dn_fib_rule_compare, 255 .compare = dn_fib_rule_compare,
255 .fill = dn_fib_rule_fill, 256 .fill = dn_fib_rule_fill,
256 .default_pref = dn_fib_rule_default_pref, 257 .default_pref = dn_fib_rule_default_pref,
258 .flush_cache = dn_fib_rule_flush_cache,
257 .nlgroup = RTNLGRP_DECnet_RULE, 259 .nlgroup = RTNLGRP_DECnet_RULE,
258 .policy = dn_fib_rule_policy, 260 .policy = dn_fib_rule_policy,
259 .rules_list = &dn_fib_rules, 261 .rules_list = &dn_fib_rules,
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index 780a141f8342..d6615c9361e9 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -28,6 +28,7 @@
28#include <asm/uaccess.h> 28#include <asm/uaccess.h>
29#include <linux/route.h> /* RTF_xxx */ 29#include <linux/route.h> /* RTF_xxx */
30#include <net/neighbour.h> 30#include <net/neighbour.h>
31#include <net/netlink.h>
31#include <net/dst.h> 32#include <net/dst.h>
32#include <net/flow.h> 33#include <net/flow.h>
33#include <net/fib_rules.h> 34#include <net/fib_rules.h>
@@ -295,7 +296,7 @@ static int dn_fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
295{ 296{
296 struct rtmsg *rtm; 297 struct rtmsg *rtm;
297 struct nlmsghdr *nlh; 298 struct nlmsghdr *nlh;
298 unsigned char *b = skb->tail; 299 unsigned char *b = skb_tail_pointer(skb);
299 300
300 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*rtm), flags); 301 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*rtm), flags);
301 rtm = NLMSG_DATA(nlh); 302 rtm = NLMSG_DATA(nlh);
@@ -337,19 +338,19 @@ static int dn_fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
337 nhp->rtnh_ifindex = nh->nh_oif; 338 nhp->rtnh_ifindex = nh->nh_oif;
338 if (nh->nh_gw) 339 if (nh->nh_gw)
339 RTA_PUT(skb, RTA_GATEWAY, 2, &nh->nh_gw); 340 RTA_PUT(skb, RTA_GATEWAY, 2, &nh->nh_gw);
340 nhp->rtnh_len = skb->tail - (unsigned char *)nhp; 341 nhp->rtnh_len = skb_tail_pointer(skb) - (unsigned char *)nhp;
341 } endfor_nexthops(fi); 342 } endfor_nexthops(fi);
342 mp_head->rta_type = RTA_MULTIPATH; 343 mp_head->rta_type = RTA_MULTIPATH;
343 mp_head->rta_len = skb->tail - (u8*)mp_head; 344 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
344 } 345 }
345 346
346 nlh->nlmsg_len = skb->tail - b; 347 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
347 return skb->len; 348 return skb->len;
348 349
349 350
350nlmsg_failure: 351nlmsg_failure:
351rtattr_failure: 352rtattr_failure:
352 skb_trim(skb, b - skb->data); 353 nlmsg_trim(skb, b);
353 return -EMSGSIZE; 354 return -EMSGSIZE;
354} 355}
355 356
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
index 0e62def05a58..696234688cf6 100644
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c
@@ -33,7 +33,7 @@ static struct sk_buff *dnrmg_build_message(struct sk_buff *rt_skb, int *errp)
33{ 33{
34 struct sk_buff *skb = NULL; 34 struct sk_buff *skb = NULL;
35 size_t size; 35 size_t size;
36 unsigned char *old_tail; 36 sk_buff_data_t old_tail;
37 struct nlmsghdr *nlh; 37 struct nlmsghdr *nlh;
38 unsigned char *ptr; 38 unsigned char *ptr;
39 struct nf_dn_rtmsg *rtm; 39 struct nf_dn_rtmsg *rtm;
@@ -48,7 +48,7 @@ static struct sk_buff *dnrmg_build_message(struct sk_buff *rt_skb, int *errp)
48 rtm = (struct nf_dn_rtmsg *)NLMSG_DATA(nlh); 48 rtm = (struct nf_dn_rtmsg *)NLMSG_DATA(nlh);
49 rtm->nfdn_ifindex = rt_skb->dev->ifindex; 49 rtm->nfdn_ifindex = rt_skb->dev->ifindex;
50 ptr = NFDN_RTMSG(rtm); 50 ptr = NFDN_RTMSG(rtm);
51 memcpy(ptr, rt_skb->data, rt_skb->len); 51 skb_copy_from_linear_data(rt_skb, ptr, rt_skb->len);
52 nlh->nlmsg_len = skb->tail - old_tail; 52 nlh->nlmsg_len = skb->tail - old_tail;
53 return skb; 53 return skb;
54 54
@@ -102,7 +102,7 @@ static unsigned int dnrmg_hook(unsigned int hook,
102 102
103static inline void dnrmg_receive_user_skb(struct sk_buff *skb) 103static inline void dnrmg_receive_user_skb(struct sk_buff *skb)
104{ 104{
105 struct nlmsghdr *nlh = (struct nlmsghdr *)skb->data; 105 struct nlmsghdr *nlh = nlmsg_hdr(skb);
106 106
107 if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len) 107 if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len)
108 return; 108 return;
@@ -138,7 +138,7 @@ static int __init dn_rtmsg_init(void)
138 int rv = 0; 138 int rv = 0;
139 139
140 dnrmg = netlink_kernel_create(NETLINK_DNRTMSG, DNRNG_NLGRP_MAX, 140 dnrmg = netlink_kernel_create(NETLINK_DNRTMSG, DNRNG_NLGRP_MAX,
141 dnrmg_receive_user_sk, THIS_MODULE); 141 dnrmg_receive_user_sk, NULL, THIS_MODULE);
142 if (dnrmg == NULL) { 142 if (dnrmg == NULL) {
143 printk(KERN_ERR "dn_rtmsg: Cannot create netlink socket"); 143 printk(KERN_ERR "dn_rtmsg: Cannot create netlink socket");
144 return -ENOMEM; 144 return -ENOMEM;
diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index bc12e36263f0..b5524f32ac2d 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -162,7 +162,7 @@ static int econet_recvmsg(struct kiocb *iocb, struct socket *sock,
162 err = memcpy_toiovec(msg->msg_iov, skb->data, copied); 162 err = memcpy_toiovec(msg->msg_iov, skb->data, copied);
163 if (err) 163 if (err)
164 goto out_free; 164 goto out_free;
165 skb_get_timestamp(skb, &sk->sk_stamp); 165 sk->sk_stamp = skb->tstamp;
166 166
167 if (msg->msg_name) 167 if (msg->msg_name)
168 memcpy(msg->msg_name, skb->cb, msg->msg_namelen); 168 memcpy(msg->msg_name, skb->cb, msg->msg_namelen);
@@ -345,7 +345,7 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
345 goto out_unlock; 345 goto out_unlock;
346 346
347 skb_reserve(skb, LL_RESERVED_SPACE(dev)); 347 skb_reserve(skb, LL_RESERVED_SPACE(dev));
348 skb->nh.raw = skb->data; 348 skb_reset_network_header(skb);
349 349
350 eb = (struct ec_cb *)&skb->cb; 350 eb = (struct ec_cb *)&skb->cb;
351 351
@@ -366,7 +366,7 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
366 fh->cb = cb; 366 fh->cb = cb;
367 fh->port = port; 367 fh->port = port;
368 if (sock->type != SOCK_DGRAM) { 368 if (sock->type != SOCK_DGRAM) {
369 skb->tail = skb->data; 369 skb_reset_tail_pointer(skb);
370 skb->len = 0; 370 skb->len = 0;
371 } else if (res < 0) 371 } else if (res < 0)
372 goto out_free; 372 goto out_free;
@@ -727,6 +727,9 @@ static int econet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg
727 case SIOCGSTAMP: 727 case SIOCGSTAMP:
728 return sock_get_timestamp(sk, argp); 728 return sock_get_timestamp(sk, argp);
729 729
730 case SIOCGSTAMPNS:
731 return sock_get_timestampns(sk, argp);
732
730 case SIOCSIFADDR: 733 case SIOCSIFADDR:
731 case SIOCGIFADDR: 734 case SIOCGIFADDR:
732 return ec_dev_ioctl(sock, cmd, argp); 735 return ec_dev_ioctl(sock, cmd, argp);
@@ -845,7 +848,7 @@ static void aun_send_response(__u32 addr, unsigned long seq, int code, int cb)
845 848
846static void aun_incoming(struct sk_buff *skb, struct aunhdr *ah, size_t len) 849static void aun_incoming(struct sk_buff *skb, struct aunhdr *ah, size_t len)
847{ 850{
848 struct iphdr *ip = skb->nh.iph; 851 struct iphdr *ip = ip_hdr(skb);
849 unsigned char stn = ntohl(ip->saddr) & 0xff; 852 unsigned char stn = ntohl(ip->saddr) & 0xff;
850 struct sock *sk; 853 struct sock *sk;
851 struct sk_buff *newskb; 854 struct sk_buff *newskb;
@@ -940,10 +943,10 @@ static void aun_data_available(struct sock *sk, int slen)
940 printk(KERN_DEBUG "AUN: recvfrom() error %d\n", -err); 943 printk(KERN_DEBUG "AUN: recvfrom() error %d\n", -err);
941 } 944 }
942 945
943 data = skb->h.raw + sizeof(struct udphdr); 946 data = skb_transport_header(skb) + sizeof(struct udphdr);
944 ah = (struct aunhdr *)data; 947 ah = (struct aunhdr *)data;
945 len = skb->len - sizeof(struct udphdr); 948 len = skb->len - sizeof(struct udphdr);
946 ip = skb->nh.iph; 949 ip = ip_hdr(skb);
947 950
948 switch (ah->code) 951 switch (ah->code)
949 { 952 {
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 7391f55904d1..0ac2524f3b68 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -156,7 +156,8 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
156 struct ethhdr *eth; 156 struct ethhdr *eth;
157 unsigned char *rawp; 157 unsigned char *rawp;
158 158
159 skb->mac.raw = skb->data; 159 skb->dev = dev;
160 skb_reset_mac_header(skb);
160 skb_pull(skb, ETH_HLEN); 161 skb_pull(skb, ETH_HLEN);
161 eth = eth_hdr(skb); 162 eth = eth_hdr(skb);
162 163
@@ -228,7 +229,7 @@ int eth_header_cache(struct neighbour *neigh, struct hh_cache *hh)
228 eth = (struct ethhdr *) 229 eth = (struct ethhdr *)
229 (((u8 *) hh->hh_data) + (HH_DATA_OFF(sizeof(*eth)))); 230 (((u8 *) hh->hh_data) + (HH_DATA_OFF(sizeof(*eth))));
230 231
231 if (type == __constant_htons(ETH_P_802_3)) 232 if (type == htons(ETH_P_802_3))
232 return -1; 233 return -1;
233 234
234 eth->h_proto = type; 235 eth->h_proto = type;
diff --git a/net/ieee80211/Kconfig b/net/ieee80211/Kconfig
index 6ef766ef9618..1438adedbc83 100644
--- a/net/ieee80211/Kconfig
+++ b/net/ieee80211/Kconfig
@@ -56,7 +56,8 @@ config IEEE80211_CRYPT_CCMP
56 56
57config IEEE80211_CRYPT_TKIP 57config IEEE80211_CRYPT_TKIP
58 tristate "IEEE 802.11i TKIP encryption" 58 tristate "IEEE 802.11i TKIP encryption"
59 depends on IEEE80211 && NET_RADIO 59 depends on IEEE80211
60 select WIRELESS_EXT
60 select CRYPTO 61 select CRYPTO
61 select CRYPTO_MICHAEL_MIC 62 select CRYPTO_MICHAEL_MIC
62 select CRYPTO_ECB 63 select CRYPTO_ECB
diff --git a/net/ieee80211/ieee80211_crypt_wep.c b/net/ieee80211/ieee80211_crypt_wep.c
index ec6d8851a061..4eb35079e434 100644
--- a/net/ieee80211/ieee80211_crypt_wep.c
+++ b/net/ieee80211/ieee80211_crypt_wep.c
@@ -152,7 +152,7 @@ static int prism2_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
152 return -1; 152 return -1;
153 153
154 /* Copy the IV into the first 3 bytes of the key */ 154 /* Copy the IV into the first 3 bytes of the key */
155 memcpy(key, skb->data + hdr_len, 3); 155 skb_copy_from_linear_data_offset(skb, hdr_len, key, 3);
156 156
157 /* Copy rest of the WEP key (the secret part) */ 157 /* Copy rest of the WEP key (the secret part) */
158 memcpy(key + 3, wep->key, wep->key_len); 158 memcpy(key + 3, wep->key, wep->key_len);
diff --git a/net/ieee80211/ieee80211_rx.c b/net/ieee80211/ieee80211_rx.c
index 4084909f6f92..6ae036b1920f 100644
--- a/net/ieee80211/ieee80211_rx.c
+++ b/net/ieee80211/ieee80211_rx.c
@@ -42,7 +42,7 @@ static void ieee80211_monitor_rx(struct ieee80211_device *ieee,
42 u16 fc = le16_to_cpu(hdr->frame_ctl); 42 u16 fc = le16_to_cpu(hdr->frame_ctl);
43 43
44 skb->dev = ieee->dev; 44 skb->dev = ieee->dev;
45 skb->mac.raw = skb->data; 45 skb_reset_mac_header(skb);
46 skb_pull(skb, ieee80211_get_hdrlen(fc)); 46 skb_pull(skb, ieee80211_get_hdrlen(fc));
47 skb->pkt_type = PACKET_OTHERHOST; 47 skb->pkt_type = PACKET_OTHERHOST;
48 skb->protocol = __constant_htons(ETH_P_80211_RAW); 48 skb->protocol = __constant_htons(ETH_P_80211_RAW);
@@ -606,12 +606,12 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
606 if (frag == 0) { 606 if (frag == 0) {
607 /* copy first fragment (including full headers) into 607 /* copy first fragment (including full headers) into
608 * beginning of the fragment cache skb */ 608 * beginning of the fragment cache skb */
609 memcpy(skb_put(frag_skb, flen), skb->data, flen); 609 skb_copy_from_linear_data(skb, skb_put(frag_skb, flen), flen);
610 } else { 610 } else {
611 /* append frame payload to the end of the fragment 611 /* append frame payload to the end of the fragment
612 * cache skb */ 612 * cache skb */
613 memcpy(skb_put(frag_skb, flen), skb->data + hdrlen, 613 skb_copy_from_linear_data_offset(skb, hdrlen,
614 flen); 614 skb_put(frag_skb, flen), flen);
615 } 615 }
616 dev_kfree_skb_any(skb); 616 dev_kfree_skb_any(skb);
617 skb = NULL; 617 skb = NULL;
@@ -759,8 +759,9 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
759 IEEE80211_FCTL_TODS) && skb->len >= ETH_HLEN + ETH_ALEN) { 759 IEEE80211_FCTL_TODS) && skb->len >= ETH_HLEN + ETH_ALEN) {
760 /* Non-standard frame: get addr4 from its bogus location after 760 /* Non-standard frame: get addr4 from its bogus location after
761 * the payload */ 761 * the payload */
762 memcpy(skb->data + ETH_ALEN, 762 skb_copy_to_linear_data_offset(skb, ETH_ALEN,
763 skb->data + skb->len - ETH_ALEN, ETH_ALEN); 763 skb->data + skb->len - ETH_ALEN,
764 ETH_ALEN);
764 skb_trim(skb, skb->len - ETH_ALEN); 765 skb_trim(skb, skb->len - ETH_ALEN);
765 } 766 }
766#endif 767#endif
@@ -789,10 +790,11 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
789 790
790 if (skb2 != NULL) { 791 if (skb2 != NULL) {
791 /* send to wireless media */ 792 /* send to wireless media */
792 skb2->protocol = __constant_htons(ETH_P_802_3);
793 skb2->mac.raw = skb2->nh.raw = skb2->data;
794 /* skb2->nh.raw = skb2->data + ETH_HLEN; */
795 skb2->dev = dev; 793 skb2->dev = dev;
794 skb2->protocol = __constant_htons(ETH_P_802_3);
795 skb_reset_mac_header(skb2);
796 skb_reset_network_header(skb2);
797 /* skb2->network_header += ETH_HLEN; */
796 dev_queue_xmit(skb2); 798 dev_queue_xmit(skb2);
797 } 799 }
798#endif 800#endif
@@ -800,7 +802,6 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
800 if (skb) { 802 if (skb) {
801 skb->protocol = eth_type_trans(skb, dev); 803 skb->protocol = eth_type_trans(skb, dev);
802 memset(skb->cb, 0, sizeof(skb->cb)); 804 memset(skb->cb, 0, sizeof(skb->cb));
803 skb->dev = dev;
804 skb->ip_summed = CHECKSUM_NONE; /* 802.11 crc not sufficient */ 805 skb->ip_summed = CHECKSUM_NONE; /* 802.11 crc not sufficient */
805 if (netif_rx(skb) == NET_RX_DROP) { 806 if (netif_rx(skb) == NET_RX_DROP) {
806 /* netif_rx always succeeds, but it might drop 807 /* netif_rx always succeeds, but it might drop
diff --git a/net/ieee80211/ieee80211_tx.c b/net/ieee80211/ieee80211_tx.c
index 0292d6348e12..a4c3c51140a3 100644
--- a/net/ieee80211/ieee80211_tx.c
+++ b/net/ieee80211/ieee80211_tx.c
@@ -225,10 +225,10 @@ static int ieee80211_classify(struct sk_buff *skb)
225 struct iphdr *ip; 225 struct iphdr *ip;
226 226
227 eth = (struct ethhdr *)skb->data; 227 eth = (struct ethhdr *)skb->data;
228 if (eth->h_proto != __constant_htons(ETH_P_IP)) 228 if (eth->h_proto != htons(ETH_P_IP))
229 return 0; 229 return 0;
230 230
231 ip = skb->nh.iph; 231 ip = ip_hdr(skb);
232 switch (ip->tos & 0xfc) { 232 switch (ip->tos & 0xfc) {
233 case 0x20: 233 case 0x20:
234 return 2; 234 return 2;
@@ -309,8 +309,8 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev)
309 } 309 }
310 310
311 /* Save source and destination addresses */ 311 /* Save source and destination addresses */
312 memcpy(dest, skb->data, ETH_ALEN); 312 skb_copy_from_linear_data(skb, dest, ETH_ALEN);
313 memcpy(src, skb->data + ETH_ALEN, ETH_ALEN); 313 skb_copy_from_linear_data_offset(skb, ETH_ALEN, src, ETH_ALEN);
314 314
315 if (host_encrypt || host_build_iv) 315 if (host_encrypt || host_build_iv)
316 fc = IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA | 316 fc = IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA |
@@ -363,7 +363,7 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev)
363 snapped = 1; 363 snapped = 1;
364 ieee80211_copy_snap(skb_put(skb_new, SNAP_SIZE + sizeof(u16)), 364 ieee80211_copy_snap(skb_put(skb_new, SNAP_SIZE + sizeof(u16)),
365 ether_type); 365 ether_type);
366 memcpy(skb_put(skb_new, skb->len), skb->data, skb->len); 366 skb_copy_from_linear_data(skb, skb_put(skb_new, skb->len), skb->len);
367 res = crypt->ops->encrypt_msdu(skb_new, hdr_len, crypt->priv); 367 res = crypt->ops->encrypt_msdu(skb_new, hdr_len, crypt->priv);
368 if (res < 0) { 368 if (res < 0) {
369 IEEE80211_ERROR("msdu encryption failed\n"); 369 IEEE80211_ERROR("msdu encryption failed\n");
@@ -492,7 +492,7 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev)
492 bytes -= SNAP_SIZE + sizeof(u16); 492 bytes -= SNAP_SIZE + sizeof(u16);
493 } 493 }
494 494
495 memcpy(skb_put(skb_frag, bytes), skb->data, bytes); 495 skb_copy_from_linear_data(skb, skb_put(skb_frag, bytes), bytes);
496 496
497 /* Advance the SKB... */ 497 /* Advance the SKB... */
498 skb_pull(skb, bytes); 498 skb_pull(skb, bytes);
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 9e8ef509c51d..e62aee0ec4c5 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -574,6 +574,33 @@ config TCP_CONG_VENO
574 loss packets. 574 loss packets.
575 See http://www.ntu.edu.sg/home5/ZHOU0022/papers/CPFu03a.pdf 575 See http://www.ntu.edu.sg/home5/ZHOU0022/papers/CPFu03a.pdf
576 576
577config TCP_CONG_YEAH
578 tristate "YeAH TCP"
579 depends on EXPERIMENTAL
580 default n
581 ---help---
582 YeAH-TCP is a sender-side high-speed enabled TCP congestion control
583 algorithm, which uses a mixed loss/delay approach to compute the
584 congestion window. It's design goals target high efficiency,
585 internal, RTT and Reno fairness, resilience to link loss while
586 keeping network elements load as low as possible.
587
588 For further details look here:
589 http://wil.cs.caltech.edu/pfldnet2007/paper/YeAH_TCP.pdf
590
591config TCP_CONG_ILLINOIS
592 tristate "TCP Illinois"
593 depends on EXPERIMENTAL
594 default n
595 ---help---
596 TCP-Illinois is a sender-side modificatio of TCP Reno for
597 high speed long delay links. It uses round-trip-time to
598 adjust the alpha and beta parameters to achieve a higher average
599 throughput and maintain fairness.
600
601 For further details see:
602 http://www.ews.uiuc.edu/~shaoliu/tcpillinois/index.html
603
577choice 604choice
578 prompt "Default TCP congestion control" 605 prompt "Default TCP congestion control"
579 default DEFAULT_CUBIC 606 default DEFAULT_CUBIC
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 7a068626feea..4ff6c151d7f3 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -49,6 +49,8 @@ obj-$(CONFIG_TCP_CONG_VEGAS) += tcp_vegas.o
49obj-$(CONFIG_TCP_CONG_VENO) += tcp_veno.o 49obj-$(CONFIG_TCP_CONG_VENO) += tcp_veno.o
50obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o 50obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o
51obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o 51obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
52obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
53obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
52obj-$(CONFIG_NETLABEL) += cipso_ipv4.o 54obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
53 55
54obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ 56obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index cf358c84c440..16aae8ef5555 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -87,6 +87,7 @@
87#include <linux/init.h> 87#include <linux/init.h>
88#include <linux/poll.h> 88#include <linux/poll.h>
89#include <linux/netfilter_ipv4.h> 89#include <linux/netfilter_ipv4.h>
90#include <linux/random.h>
90 91
91#include <asm/uaccess.h> 92#include <asm/uaccess.h>
92#include <asm/system.h> 93#include <asm/system.h>
@@ -217,6 +218,26 @@ out:
217 return err; 218 return err;
218} 219}
219 220
221u32 inet_ehash_secret __read_mostly;
222EXPORT_SYMBOL(inet_ehash_secret);
223
224/*
225 * inet_ehash_secret must be set exactly once
226 * Instead of using a dedicated spinlock, we (ab)use inetsw_lock
227 */
228void build_ehash_secret(void)
229{
230 u32 rnd;
231 do {
232 get_random_bytes(&rnd, sizeof(rnd));
233 } while (rnd == 0);
234 spin_lock_bh(&inetsw_lock);
235 if (!inet_ehash_secret)
236 inet_ehash_secret = rnd;
237 spin_unlock_bh(&inetsw_lock);
238}
239EXPORT_SYMBOL(build_ehash_secret);
240
220/* 241/*
221 * Create an inet socket. 242 * Create an inet socket.
222 */ 243 */
@@ -233,6 +254,11 @@ static int inet_create(struct socket *sock, int protocol)
233 int try_loading_module = 0; 254 int try_loading_module = 0;
234 int err; 255 int err;
235 256
257 if (sock->type != SOCK_RAW &&
258 sock->type != SOCK_DGRAM &&
259 !inet_ehash_secret)
260 build_ehash_secret();
261
236 sock->state = SS_UNCONNECTED; 262 sock->state = SS_UNCONNECTED;
237 263
238 /* Look for the requested type/protocol pair. */ 264 /* Look for the requested type/protocol pair. */
@@ -755,6 +781,9 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
755 case SIOCGSTAMP: 781 case SIOCGSTAMP:
756 err = sock_get_timestamp(sk, (struct timeval __user *)arg); 782 err = sock_get_timestamp(sk, (struct timeval __user *)arg);
757 break; 783 break;
784 case SIOCGSTAMPNS:
785 err = sock_get_timestampns(sk, (struct timespec __user *)arg);
786 break;
758 case SIOCADDRT: 787 case SIOCADDRT:
759 case SIOCDELRT: 788 case SIOCDELRT:
760 case SIOCRTMSG: 789 case SIOCRTMSG:
@@ -1109,7 +1138,7 @@ static int inet_gso_send_check(struct sk_buff *skb)
1109 if (unlikely(!pskb_may_pull(skb, sizeof(*iph)))) 1138 if (unlikely(!pskb_may_pull(skb, sizeof(*iph))))
1110 goto out; 1139 goto out;
1111 1140
1112 iph = skb->nh.iph; 1141 iph = ip_hdr(skb);
1113 ihl = iph->ihl * 4; 1142 ihl = iph->ihl * 4;
1114 if (ihl < sizeof(*iph)) 1143 if (ihl < sizeof(*iph))
1115 goto out; 1144 goto out;
@@ -1117,8 +1146,9 @@ static int inet_gso_send_check(struct sk_buff *skb)
1117 if (unlikely(!pskb_may_pull(skb, ihl))) 1146 if (unlikely(!pskb_may_pull(skb, ihl)))
1118 goto out; 1147 goto out;
1119 1148
1120 skb->h.raw = __skb_pull(skb, ihl); 1149 __skb_pull(skb, ihl);
1121 iph = skb->nh.iph; 1150 skb_reset_transport_header(skb);
1151 iph = ip_hdr(skb);
1122 proto = iph->protocol & (MAX_INET_PROTOS - 1); 1152 proto = iph->protocol & (MAX_INET_PROTOS - 1);
1123 err = -EPROTONOSUPPORT; 1153 err = -EPROTONOSUPPORT;
1124 1154
@@ -1152,7 +1182,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features)
1152 if (unlikely(!pskb_may_pull(skb, sizeof(*iph)))) 1182 if (unlikely(!pskb_may_pull(skb, sizeof(*iph))))
1153 goto out; 1183 goto out;
1154 1184
1155 iph = skb->nh.iph; 1185 iph = ip_hdr(skb);
1156 ihl = iph->ihl * 4; 1186 ihl = iph->ihl * 4;
1157 if (ihl < sizeof(*iph)) 1187 if (ihl < sizeof(*iph))
1158 goto out; 1188 goto out;
@@ -1160,8 +1190,9 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features)
1160 if (unlikely(!pskb_may_pull(skb, ihl))) 1190 if (unlikely(!pskb_may_pull(skb, ihl)))
1161 goto out; 1191 goto out;
1162 1192
1163 skb->h.raw = __skb_pull(skb, ihl); 1193 __skb_pull(skb, ihl);
1164 iph = skb->nh.iph; 1194 skb_reset_transport_header(skb);
1195 iph = ip_hdr(skb);
1165 id = ntohs(iph->id); 1196 id = ntohs(iph->id);
1166 proto = iph->protocol & (MAX_INET_PROTOS - 1); 1197 proto = iph->protocol & (MAX_INET_PROTOS - 1);
1167 segs = ERR_PTR(-EPROTONOSUPPORT); 1198 segs = ERR_PTR(-EPROTONOSUPPORT);
@@ -1177,17 +1208,57 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features)
1177 1208
1178 skb = segs; 1209 skb = segs;
1179 do { 1210 do {
1180 iph = skb->nh.iph; 1211 iph = ip_hdr(skb);
1181 iph->id = htons(id++); 1212 iph->id = htons(id++);
1182 iph->tot_len = htons(skb->len - skb->mac_len); 1213 iph->tot_len = htons(skb->len - skb->mac_len);
1183 iph->check = 0; 1214 iph->check = 0;
1184 iph->check = ip_fast_csum(skb->nh.raw, iph->ihl); 1215 iph->check = ip_fast_csum(skb_network_header(skb), iph->ihl);
1185 } while ((skb = skb->next)); 1216 } while ((skb = skb->next));
1186 1217
1187out: 1218out:
1188 return segs; 1219 return segs;
1189} 1220}
1190 1221
1222unsigned long snmp_fold_field(void *mib[], int offt)
1223{
1224 unsigned long res = 0;
1225 int i;
1226
1227 for_each_possible_cpu(i) {
1228 res += *(((unsigned long *) per_cpu_ptr(mib[0], i)) + offt);
1229 res += *(((unsigned long *) per_cpu_ptr(mib[1], i)) + offt);
1230 }
1231 return res;
1232}
1233EXPORT_SYMBOL_GPL(snmp_fold_field);
1234
1235int snmp_mib_init(void *ptr[2], size_t mibsize, size_t mibalign)
1236{
1237 BUG_ON(ptr == NULL);
1238 ptr[0] = __alloc_percpu(mibsize);
1239 if (!ptr[0])
1240 goto err0;
1241 ptr[1] = __alloc_percpu(mibsize);
1242 if (!ptr[1])
1243 goto err1;
1244 return 0;
1245err1:
1246 free_percpu(ptr[0]);
1247 ptr[0] = NULL;
1248err0:
1249 return -ENOMEM;
1250}
1251EXPORT_SYMBOL_GPL(snmp_mib_init);
1252
1253void snmp_mib_free(void *ptr[2])
1254{
1255 BUG_ON(ptr == NULL);
1256 free_percpu(ptr[0]);
1257 free_percpu(ptr[1]);
1258 ptr[0] = ptr[1] = NULL;
1259}
1260EXPORT_SYMBOL_GPL(snmp_mib_free);
1261
1191#ifdef CONFIG_IP_MULTICAST 1262#ifdef CONFIG_IP_MULTICAST
1192static struct net_protocol igmp_protocol = { 1263static struct net_protocol igmp_protocol = {
1193 .handler = igmp_rcv, 1264 .handler = igmp_rcv,
@@ -1214,28 +1285,47 @@ static struct net_protocol icmp_protocol = {
1214 1285
1215static int __init init_ipv4_mibs(void) 1286static int __init init_ipv4_mibs(void)
1216{ 1287{
1217 net_statistics[0] = alloc_percpu(struct linux_mib); 1288 if (snmp_mib_init((void **)net_statistics,
1218 net_statistics[1] = alloc_percpu(struct linux_mib); 1289 sizeof(struct linux_mib),
1219 ip_statistics[0] = alloc_percpu(struct ipstats_mib); 1290 __alignof__(struct linux_mib)) < 0)
1220 ip_statistics[1] = alloc_percpu(struct ipstats_mib); 1291 goto err_net_mib;
1221 icmp_statistics[0] = alloc_percpu(struct icmp_mib); 1292 if (snmp_mib_init((void **)ip_statistics,
1222 icmp_statistics[1] = alloc_percpu(struct icmp_mib); 1293 sizeof(struct ipstats_mib),
1223 tcp_statistics[0] = alloc_percpu(struct tcp_mib); 1294 __alignof__(struct ipstats_mib)) < 0)
1224 tcp_statistics[1] = alloc_percpu(struct tcp_mib); 1295 goto err_ip_mib;
1225 udp_statistics[0] = alloc_percpu(struct udp_mib); 1296 if (snmp_mib_init((void **)icmp_statistics,
1226 udp_statistics[1] = alloc_percpu(struct udp_mib); 1297 sizeof(struct icmp_mib),
1227 udplite_statistics[0] = alloc_percpu(struct udp_mib); 1298 __alignof__(struct icmp_mib)) < 0)
1228 udplite_statistics[1] = alloc_percpu(struct udp_mib); 1299 goto err_icmp_mib;
1229 if (! 1300 if (snmp_mib_init((void **)tcp_statistics,
1230 (net_statistics[0] && net_statistics[1] && ip_statistics[0] 1301 sizeof(struct tcp_mib),
1231 && ip_statistics[1] && tcp_statistics[0] && tcp_statistics[1] 1302 __alignof__(struct tcp_mib)) < 0)
1232 && udp_statistics[0] && udp_statistics[1] 1303 goto err_tcp_mib;
1233 && udplite_statistics[0] && udplite_statistics[1] ) ) 1304 if (snmp_mib_init((void **)udp_statistics,
1234 return -ENOMEM; 1305 sizeof(struct udp_mib),
1235 1306 __alignof__(struct udp_mib)) < 0)
1236 (void) tcp_mib_init(); 1307 goto err_udp_mib;
1308 if (snmp_mib_init((void **)udplite_statistics,
1309 sizeof(struct udp_mib),
1310 __alignof__(struct udp_mib)) < 0)
1311 goto err_udplite_mib;
1312
1313 tcp_mib_init();
1237 1314
1238 return 0; 1315 return 0;
1316
1317err_udplite_mib:
1318 snmp_mib_free((void **)udp_statistics);
1319err_udp_mib:
1320 snmp_mib_free((void **)tcp_statistics);
1321err_tcp_mib:
1322 snmp_mib_free((void **)icmp_statistics);
1323err_icmp_mib:
1324 snmp_mib_free((void **)ip_statistics);
1325err_ip_mib:
1326 snmp_mib_free((void **)net_statistics);
1327err_net_mib:
1328 return -ENOMEM;
1239} 1329}
1240 1330
1241static int ipv4_proc_init(void); 1331static int ipv4_proc_init(void);
@@ -1336,7 +1426,7 @@ static int __init inet_init(void)
1336 * Initialise per-cpu ipv4 mibs 1426 * Initialise per-cpu ipv4 mibs
1337 */ 1427 */
1338 1428
1339 if(init_ipv4_mibs()) 1429 if (init_ipv4_mibs())
1340 printk(KERN_CRIT "inet_init: Cannot init ipv4 mibs\n"); ; 1430 printk(KERN_CRIT "inet_init: Cannot init ipv4 mibs\n"); ;
1341 1431
1342 ipv4_proc_init(); 1432 ipv4_proc_init();
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 7194eb40b6d0..6da8ff597ad3 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -65,7 +65,7 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb)
65 char buf[60]; 65 char buf[60];
66 } tmp_iph; 66 } tmp_iph;
67 67
68 top_iph = skb->nh.iph; 68 top_iph = ip_hdr(skb);
69 iph = &tmp_iph.iph; 69 iph = &tmp_iph.iph;
70 70
71 iph->tos = top_iph->tos; 71 iph->tos = top_iph->tos;
@@ -152,9 +152,9 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
152 skb->ip_summed = CHECKSUM_NONE; 152 skb->ip_summed = CHECKSUM_NONE;
153 153
154 ah = (struct ip_auth_hdr*)skb->data; 154 ah = (struct ip_auth_hdr*)skb->data;
155 iph = skb->nh.iph; 155 iph = ip_hdr(skb);
156 156
157 ihl = skb->data - skb->nh.raw; 157 ihl = skb->data - skb_network_header(skb);
158 memcpy(work_buf, iph, ihl); 158 memcpy(work_buf, iph, ihl);
159 159
160 iph->ttl = 0; 160 iph->ttl = 0;
@@ -181,7 +181,9 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
181 } 181 }
182 } 182 }
183 ((struct iphdr*)work_buf)->protocol = ah->nexthdr; 183 ((struct iphdr*)work_buf)->protocol = ah->nexthdr;
184 skb->h.raw = memcpy(skb->nh.raw += ah_hlen, work_buf, ihl); 184 skb->network_header += ah_hlen;
185 memcpy(skb_network_header(skb), work_buf, ihl);
186 skb->transport_header = skb->network_header;
185 __skb_pull(skb, ah_hlen + ihl); 187 __skb_pull(skb, ah_hlen + ihl);
186 188
187 return 0; 189 return 0;
@@ -196,8 +198,8 @@ static void ah4_err(struct sk_buff *skb, u32 info)
196 struct ip_auth_hdr *ah = (struct ip_auth_hdr*)(skb->data+(iph->ihl<<2)); 198 struct ip_auth_hdr *ah = (struct ip_auth_hdr*)(skb->data+(iph->ihl<<2));
197 struct xfrm_state *x; 199 struct xfrm_state *x;
198 200
199 if (skb->h.icmph->type != ICMP_DEST_UNREACH || 201 if (icmp_hdr(skb)->type != ICMP_DEST_UNREACH ||
200 skb->h.icmph->code != ICMP_FRAG_NEEDED) 202 icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
201 return; 203 return;
202 204
203 x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET); 205 x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET);
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 1a3488a83f49..7110779a0244 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -342,13 +342,13 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
342 switch (IN_DEV_ARP_ANNOUNCE(in_dev)) { 342 switch (IN_DEV_ARP_ANNOUNCE(in_dev)) {
343 default: 343 default:
344 case 0: /* By default announce any local IP */ 344 case 0: /* By default announce any local IP */
345 if (skb && inet_addr_type(skb->nh.iph->saddr) == RTN_LOCAL) 345 if (skb && inet_addr_type(ip_hdr(skb)->saddr) == RTN_LOCAL)
346 saddr = skb->nh.iph->saddr; 346 saddr = ip_hdr(skb)->saddr;
347 break; 347 break;
348 case 1: /* Restrict announcements of saddr in same subnet */ 348 case 1: /* Restrict announcements of saddr in same subnet */
349 if (!skb) 349 if (!skb)
350 break; 350 break;
351 saddr = skb->nh.iph->saddr; 351 saddr = ip_hdr(skb)->saddr;
352 if (inet_addr_type(saddr) == RTN_LOCAL) { 352 if (inet_addr_type(saddr) == RTN_LOCAL) {
353 /* saddr should be known to target */ 353 /* saddr should be known to target */
354 if (inet_addr_onlink(in_dev, target, saddr)) 354 if (inet_addr_onlink(in_dev, target, saddr))
@@ -578,7 +578,7 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip,
578 return NULL; 578 return NULL;
579 579
580 skb_reserve(skb, LL_RESERVED_SPACE(dev)); 580 skb_reserve(skb, LL_RESERVED_SPACE(dev));
581 skb->nh.raw = skb->data; 581 skb_reset_network_header(skb);
582 arp = (struct arphdr *) skb_put(skb,sizeof(struct arphdr) + 2*(dev->addr_len+4)); 582 arp = (struct arphdr *) skb_put(skb,sizeof(struct arphdr) + 2*(dev->addr_len+4));
583 skb->dev = dev; 583 skb->dev = dev;
584 skb->protocol = htons(ETH_P_ARP); 584 skb->protocol = htons(ETH_P_ARP);
@@ -721,7 +721,7 @@ static int arp_process(struct sk_buff *skb)
721 if (in_dev == NULL) 721 if (in_dev == NULL)
722 goto out; 722 goto out;
723 723
724 arp = skb->nh.arph; 724 arp = arp_hdr(skb);
725 725
726 switch (dev_type) { 726 switch (dev_type) {
727 default: 727 default:
@@ -937,7 +937,7 @@ static int arp_rcv(struct sk_buff *skb, struct net_device *dev,
937 (2 * sizeof(u32))))) 937 (2 * sizeof(u32)))))
938 goto freeskb; 938 goto freeskb;
939 939
940 arp = skb->nh.arph; 940 arp = arp_hdr(skb);
941 if (arp->ar_hln != dev->addr_len || 941 if (arp->ar_hln != dev->addr_len ||
942 dev->flags & IFF_NOARP || 942 dev->flags & IFF_NOARP ||
943 skb->pkt_type == PACKET_OTHERHOST || 943 skb->pkt_type == PACKET_OTHERHOST ||
@@ -1178,7 +1178,7 @@ int arp_ioctl(unsigned int cmd, void __user *arg)
1178 goto out; 1178 goto out;
1179 } 1179 }
1180 1180
1181 switch(cmd) { 1181 switch (cmd) {
1182 case SIOCDARP: 1182 case SIOCDARP:
1183 err = arp_req_delete(&r, dev); 1183 err = arp_req_delete(&r, dev);
1184 break; 1184 break;
@@ -1360,7 +1360,7 @@ static void *arp_seq_start(struct seq_file *seq, loff_t *pos)
1360 1360
1361/* ------------------------------------------------------------------------ */ 1361/* ------------------------------------------------------------------------ */
1362 1362
1363static struct seq_operations arp_seq_ops = { 1363static const struct seq_operations arp_seq_ops = {
1364 .start = arp_seq_start, 1364 .start = arp_seq_start,
1365 .next = neigh_seq_next, 1365 .next = neigh_seq_next,
1366 .stop = neigh_seq_stop, 1366 .stop = neigh_seq_stop,
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 2ce5b693a8bd..11a3404d65af 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -1174,7 +1174,7 @@ static int cipso_v4_map_cat_rng_ntoh(const struct cipso_v4_doi *doi_def,
1174 u16 cat_low; 1174 u16 cat_low;
1175 u16 cat_high; 1175 u16 cat_high;
1176 1176
1177 for(net_iter = 0; net_iter < net_cat_len; net_iter += 4) { 1177 for (net_iter = 0; net_iter < net_cat_len; net_iter += 4) {
1178 cat_high = ntohs(*((__be16 *)&net_cat[net_iter])); 1178 cat_high = ntohs(*((__be16 *)&net_cat[net_iter]));
1179 if ((net_iter + 4) <= net_cat_len) 1179 if ((net_iter + 4) <= net_cat_len)
1180 cat_low = ntohs(*((__be16 *)&net_cat[net_iter + 2])); 1180 cat_low = ntohs(*((__be16 *)&net_cat[net_iter + 2]));
@@ -1676,7 +1676,7 @@ validate_return:
1676 */ 1676 */
1677void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway) 1677void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway)
1678{ 1678{
1679 if (skb->nh.iph->protocol == IPPROTO_ICMP || error != -EACCES) 1679 if (ip_hdr(skb)->protocol == IPPROTO_ICMP || error != -EACCES)
1680 return; 1680 return;
1681 1681
1682 if (gateway) 1682 if (gateway)
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 98a00d0edc76..088888db8b3d 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -48,7 +48,6 @@
48#include <linux/netdevice.h> 48#include <linux/netdevice.h>
49#include <linux/etherdevice.h> 49#include <linux/etherdevice.h>
50#include <linux/skbuff.h> 50#include <linux/skbuff.h>
51#include <linux/rtnetlink.h>
52#include <linux/init.h> 51#include <linux/init.h>
53#include <linux/notifier.h> 52#include <linux/notifier.h>
54#include <linux/inetdevice.h> 53#include <linux/inetdevice.h>
@@ -62,7 +61,7 @@
62#include <net/ip.h> 61#include <net/ip.h>
63#include <net/route.h> 62#include <net/route.h>
64#include <net/ip_fib.h> 63#include <net/ip_fib.h>
65#include <net/netlink.h> 64#include <net/rtnetlink.h>
66 65
67struct ipv4_devconf ipv4_devconf = { 66struct ipv4_devconf ipv4_devconf = {
68 .accept_redirects = 1, 67 .accept_redirects = 1,
@@ -633,7 +632,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg)
633 dev_load(ifr.ifr_name); 632 dev_load(ifr.ifr_name);
634#endif 633#endif
635 634
636 switch(cmd) { 635 switch (cmd) {
637 case SIOCGIFADDR: /* Get interface address */ 636 case SIOCGIFADDR: /* Get interface address */
638 case SIOCGIFBRDADDR: /* Get the broadcast address */ 637 case SIOCGIFBRDADDR: /* Get the broadcast address */
639 case SIOCGIFDSTADDR: /* Get the destination address */ 638 case SIOCGIFDSTADDR: /* Get the destination address */
@@ -708,7 +707,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg)
708 if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS) 707 if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
709 goto done; 708 goto done;
710 709
711 switch(cmd) { 710 switch (cmd) {
712 case SIOCGIFADDR: /* Get interface address */ 711 case SIOCGIFADDR: /* Get interface address */
713 sin->sin_addr.s_addr = ifa->ifa_local; 712 sin->sin_addr.s_addr = ifa->ifa_local;
714 goto rarok; 713 goto rarok;
@@ -1183,17 +1182,13 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1183 int s_ip_idx, s_idx = cb->args[0]; 1182 int s_ip_idx, s_idx = cb->args[0];
1184 1183
1185 s_ip_idx = ip_idx = cb->args[1]; 1184 s_ip_idx = ip_idx = cb->args[1];
1186 read_lock(&dev_base_lock);
1187 for (dev = dev_base, idx = 0; dev; dev = dev->next, idx++) { 1185 for (dev = dev_base, idx = 0; dev; dev = dev->next, idx++) {
1188 if (idx < s_idx) 1186 if (idx < s_idx)
1189 continue; 1187 continue;
1190 if (idx > s_idx) 1188 if (idx > s_idx)
1191 s_ip_idx = 0; 1189 s_ip_idx = 0;
1192 rcu_read_lock(); 1190 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
1193 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
1194 rcu_read_unlock();
1195 continue; 1191 continue;
1196 }
1197 1192
1198 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa; 1193 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1199 ifa = ifa->ifa_next, ip_idx++) { 1194 ifa = ifa->ifa_next, ip_idx++) {
@@ -1201,16 +1196,12 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1201 continue; 1196 continue;
1202 if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid, 1197 if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
1203 cb->nlh->nlmsg_seq, 1198 cb->nlh->nlmsg_seq,
1204 RTM_NEWADDR, NLM_F_MULTI) <= 0) { 1199 RTM_NEWADDR, NLM_F_MULTI) <= 0)
1205 rcu_read_unlock();
1206 goto done; 1200 goto done;
1207 }
1208 } 1201 }
1209 rcu_read_unlock();
1210 } 1202 }
1211 1203
1212done: 1204done:
1213 read_unlock(&dev_base_lock);
1214 cb->args[0] = idx; 1205 cb->args[0] = idx;
1215 cb->args[1] = ip_idx; 1206 cb->args[1] = ip_idx;
1216 1207
@@ -1241,19 +1232,6 @@ errout:
1241 rtnl_set_sk_err(RTNLGRP_IPV4_IFADDR, err); 1232 rtnl_set_sk_err(RTNLGRP_IPV4_IFADDR, err);
1242} 1233}
1243 1234
1244static struct rtnetlink_link inet_rtnetlink_table[RTM_NR_MSGTYPES] = {
1245 [RTM_NEWADDR - RTM_BASE] = { .doit = inet_rtm_newaddr, },
1246 [RTM_DELADDR - RTM_BASE] = { .doit = inet_rtm_deladdr, },
1247 [RTM_GETADDR - RTM_BASE] = { .dumpit = inet_dump_ifaddr, },
1248 [RTM_NEWROUTE - RTM_BASE] = { .doit = inet_rtm_newroute, },
1249 [RTM_DELROUTE - RTM_BASE] = { .doit = inet_rtm_delroute, },
1250 [RTM_GETROUTE - RTM_BASE] = { .doit = inet_rtm_getroute,
1251 .dumpit = inet_dump_fib, },
1252#ifdef CONFIG_IP_MULTIPLE_TABLES
1253 [RTM_GETRULE - RTM_BASE] = { .dumpit = fib4_rules_dump, },
1254#endif
1255};
1256
1257#ifdef CONFIG_SYSCTL 1235#ifdef CONFIG_SYSCTL
1258 1236
1259void inet_forward_change(void) 1237void inet_forward_change(void)
@@ -1636,7 +1614,10 @@ void __init devinet_init(void)
1636{ 1614{
1637 register_gifconf(PF_INET, inet_gifconf); 1615 register_gifconf(PF_INET, inet_gifconf);
1638 register_netdevice_notifier(&ip_netdev_notifier); 1616 register_netdevice_notifier(&ip_netdev_notifier);
1639 rtnetlink_links[PF_INET] = inet_rtnetlink_table; 1617
1618 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1619 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1620 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1640#ifdef CONFIG_SYSCTL 1621#ifdef CONFIG_SYSCTL
1641 devinet_sysctl.sysctl_header = 1622 devinet_sysctl.sysctl_header =
1642 register_sysctl_table(devinet_sysctl.devinet_root_dir); 1623 register_sysctl_table(devinet_sysctl.devinet_root_dir);
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 31041127eeb8..47c95e8ef045 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -21,13 +21,14 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
21 struct blkcipher_desc desc; 21 struct blkcipher_desc desc;
22 struct esp_data *esp; 22 struct esp_data *esp;
23 struct sk_buff *trailer; 23 struct sk_buff *trailer;
24 u8 *tail;
24 int blksize; 25 int blksize;
25 int clen; 26 int clen;
26 int alen; 27 int alen;
27 int nfrags; 28 int nfrags;
28 29
29 /* Strip IP+ESP header. */ 30 /* Strip IP+ESP header. */
30 __skb_pull(skb, skb->h.raw - skb->data); 31 __skb_pull(skb, skb_transport_offset(skb));
31 /* Now skb is pure payload to encrypt */ 32 /* Now skb is pure payload to encrypt */
32 33
33 err = -ENOMEM; 34 err = -ENOMEM;
@@ -49,19 +50,21 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
49 goto error; 50 goto error;
50 51
51 /* Fill padding... */ 52 /* Fill padding... */
53 tail = skb_tail_pointer(trailer);
52 do { 54 do {
53 int i; 55 int i;
54 for (i=0; i<clen-skb->len - 2; i++) 56 for (i=0; i<clen-skb->len - 2; i++)
55 *(u8*)(trailer->tail + i) = i+1; 57 tail[i] = i + 1;
56 } while (0); 58 } while (0);
57 *(u8*)(trailer->tail + clen-skb->len - 2) = (clen - skb->len)-2; 59 tail[clen - skb->len - 2] = (clen - skb->len) - 2;
58 pskb_put(skb, trailer, clen - skb->len); 60 pskb_put(skb, trailer, clen - skb->len);
59 61
60 __skb_push(skb, skb->data - skb->nh.raw); 62 __skb_push(skb, skb->data - skb_network_header(skb));
61 top_iph = skb->nh.iph; 63 top_iph = ip_hdr(skb);
62 esph = (struct ip_esp_hdr *)(skb->nh.raw + top_iph->ihl*4); 64 esph = (struct ip_esp_hdr *)(skb_network_header(skb) +
65 top_iph->ihl * 4);
63 top_iph->tot_len = htons(skb->len + alen); 66 top_iph->tot_len = htons(skb->len + alen);
64 *(u8*)(trailer->tail - 1) = top_iph->protocol; 67 *(skb_tail_pointer(trailer) - 1) = top_iph->protocol;
65 68
66 /* this is non-NULL only with UDP Encapsulation */ 69 /* this is non-NULL only with UDP Encapsulation */
67 if (x->encap) { 70 if (x->encap) {
@@ -217,12 +220,12 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
217 220
218 /* ... check padding bits here. Silly. :-) */ 221 /* ... check padding bits here. Silly. :-) */
219 222
220 iph = skb->nh.iph; 223 iph = ip_hdr(skb);
221 ihl = iph->ihl * 4; 224 ihl = iph->ihl * 4;
222 225
223 if (x->encap) { 226 if (x->encap) {
224 struct xfrm_encap_tmpl *encap = x->encap; 227 struct xfrm_encap_tmpl *encap = x->encap;
225 struct udphdr *uh = (void *)(skb->nh.raw + ihl); 228 struct udphdr *uh = (void *)(skb_network_header(skb) + ihl);
226 229
227 /* 230 /*
228 * 1) if the NAT-T peer's IP or port changed then 231 * 1) if the NAT-T peer's IP or port changed then
@@ -260,7 +263,8 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
260 263
261 iph->protocol = nexthdr[1]; 264 iph->protocol = nexthdr[1];
262 pskb_trim(skb, skb->len - alen - padlen - 2); 265 pskb_trim(skb, skb->len - alen - padlen - 2);
263 skb->h.raw = __skb_pull(skb, sizeof(*esph) + esp->conf.ivlen) - ihl; 266 __skb_pull(skb, sizeof(*esph) + esp->conf.ivlen);
267 skb_set_transport_header(skb, -ihl);
264 268
265 return 0; 269 return 0;
266 270
@@ -268,32 +272,33 @@ out:
268 return -EINVAL; 272 return -EINVAL;
269} 273}
270 274
271static u32 esp4_get_max_size(struct xfrm_state *x, int mtu) 275static u32 esp4_get_mtu(struct xfrm_state *x, int mtu)
272{ 276{
273 struct esp_data *esp = x->data; 277 struct esp_data *esp = x->data;
274 u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4); 278 u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4);
275 int enclen = 0; 279 u32 align = max_t(u32, blksize, esp->conf.padlen);
280 u32 rem;
281
282 mtu -= x->props.header_len + esp->auth.icv_trunc_len;
283 rem = mtu & (align - 1);
284 mtu &= ~(align - 1);
276 285
277 switch (x->props.mode) { 286 switch (x->props.mode) {
278 case XFRM_MODE_TUNNEL: 287 case XFRM_MODE_TUNNEL:
279 mtu = ALIGN(mtu +2, blksize);
280 break; 288 break;
281 default: 289 default:
282 case XFRM_MODE_TRANSPORT: 290 case XFRM_MODE_TRANSPORT:
283 /* The worst case */ 291 /* The worst case */
284 mtu = ALIGN(mtu + 2, 4) + blksize - 4; 292 mtu -= blksize - 4;
293 mtu += min_t(u32, blksize - 4, rem);
285 break; 294 break;
286 case XFRM_MODE_BEET: 295 case XFRM_MODE_BEET:
287 /* The worst case. */ 296 /* The worst case. */
288 enclen = IPV4_BEET_PHMAXLEN; 297 mtu += min_t(u32, IPV4_BEET_PHMAXLEN, rem);
289 mtu = ALIGN(mtu + enclen + 2, blksize);
290 break; 298 break;
291 } 299 }
292 300
293 if (esp->conf.padlen) 301 return mtu - 2;
294 mtu = ALIGN(mtu, esp->conf.padlen);
295
296 return mtu + x->props.header_len + esp->auth.icv_trunc_len - enclen;
297} 302}
298 303
299static void esp4_err(struct sk_buff *skb, u32 info) 304static void esp4_err(struct sk_buff *skb, u32 info)
@@ -302,8 +307,8 @@ static void esp4_err(struct sk_buff *skb, u32 info)
302 struct ip_esp_hdr *esph = (struct ip_esp_hdr*)(skb->data+(iph->ihl<<2)); 307 struct ip_esp_hdr *esph = (struct ip_esp_hdr*)(skb->data+(iph->ihl<<2));
303 struct xfrm_state *x; 308 struct xfrm_state *x;
304 309
305 if (skb->h.icmph->type != ICMP_DEST_UNREACH || 310 if (icmp_hdr(skb)->type != ICMP_DEST_UNREACH ||
306 skb->h.icmph->code != ICMP_FRAG_NEEDED) 311 icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
307 return; 312 return;
308 313
309 x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET); 314 x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET);
@@ -336,6 +341,7 @@ static int esp_init_state(struct xfrm_state *x)
336{ 341{
337 struct esp_data *esp = NULL; 342 struct esp_data *esp = NULL;
338 struct crypto_blkcipher *tfm; 343 struct crypto_blkcipher *tfm;
344 u32 align;
339 345
340 /* null auth and encryption can have zero length keys */ 346 /* null auth and encryption can have zero length keys */
341 if (x->aalg) { 347 if (x->aalg) {
@@ -402,6 +408,8 @@ static int esp_init_state(struct xfrm_state *x)
402 x->props.header_len = sizeof(struct ip_esp_hdr) + esp->conf.ivlen; 408 x->props.header_len = sizeof(struct ip_esp_hdr) + esp->conf.ivlen;
403 if (x->props.mode == XFRM_MODE_TUNNEL) 409 if (x->props.mode == XFRM_MODE_TUNNEL)
404 x->props.header_len += sizeof(struct iphdr); 410 x->props.header_len += sizeof(struct iphdr);
411 else if (x->props.mode == XFRM_MODE_BEET)
412 x->props.header_len += IPV4_BEET_PHMAXLEN;
405 if (x->encap) { 413 if (x->encap) {
406 struct xfrm_encap_tmpl *encap = x->encap; 414 struct xfrm_encap_tmpl *encap = x->encap;
407 415
@@ -417,7 +425,10 @@ static int esp_init_state(struct xfrm_state *x)
417 } 425 }
418 } 426 }
419 x->data = esp; 427 x->data = esp;
420 x->props.trailer_len = esp4_get_max_size(x, 0) - x->props.header_len; 428 align = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4);
429 if (esp->conf.padlen)
430 align = max_t(u32, align, esp->conf.padlen);
431 x->props.trailer_len = align + 1 + esp->auth.icv_trunc_len;
421 return 0; 432 return 0;
422 433
423error: 434error:
@@ -434,7 +445,7 @@ static struct xfrm_type esp_type =
434 .proto = IPPROTO_ESP, 445 .proto = IPPROTO_ESP,
435 .init_state = esp_init_state, 446 .init_state = esp_init_state,
436 .destructor = esp_destroy, 447 .destructor = esp_destroy,
437 .get_max_size = esp4_get_max_size, 448 .get_mtu = esp4_get_mtu,
438 .input = esp_input, 449 .input = esp_input,
439 .output = esp_output 450 .output = esp_output
440}; 451};
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index cac06c43f004..837f2957fa83 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -34,7 +34,6 @@
34#include <linux/if_addr.h> 34#include <linux/if_addr.h>
35#include <linux/if_arp.h> 35#include <linux/if_arp.h>
36#include <linux/skbuff.h> 36#include <linux/skbuff.h>
37#include <linux/netlink.h>
38#include <linux/init.h> 37#include <linux/init.h>
39#include <linux/list.h> 38#include <linux/list.h>
40 39
@@ -46,6 +45,7 @@
46#include <net/icmp.h> 45#include <net/icmp.h>
47#include <net/arp.h> 46#include <net/arp.h>
48#include <net/ip_fib.h> 47#include <net/ip_fib.h>
48#include <net/rtnetlink.h>
49 49
50#define FFprint(a...) printk(KERN_DEBUG a) 50#define FFprint(a...) printk(KERN_DEBUG a)
51 51
@@ -540,7 +540,7 @@ errout:
540 return err; 540 return err;
541} 541}
542 542
543int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 543static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
544{ 544{
545 struct fib_config cfg; 545 struct fib_config cfg;
546 struct fib_table *tb; 546 struct fib_table *tb;
@@ -561,7 +561,7 @@ errout:
561 return err; 561 return err;
562} 562}
563 563
564int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 564static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
565{ 565{
566 struct fib_config cfg; 566 struct fib_config cfg;
567 struct fib_table *tb; 567 struct fib_table *tb;
@@ -582,7 +582,7 @@ errout:
582 return err; 582 return err;
583} 583}
584 584
585int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) 585static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
586{ 586{
587 unsigned int h, s_h; 587 unsigned int h, s_h;
588 unsigned int e = 0, s_e; 588 unsigned int e = 0, s_e;
@@ -777,6 +777,10 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
777 .tos = frn->fl_tos, 777 .tos = frn->fl_tos,
778 .scope = frn->fl_scope } } }; 778 .scope = frn->fl_scope } } };
779 779
780#ifdef CONFIG_IP_MULTIPLE_TABLES
781 res.r = NULL;
782#endif
783
780 frn->err = -ENOENT; 784 frn->err = -ENOENT;
781 if (tb) { 785 if (tb) {
782 local_bh_disable(); 786 local_bh_disable();
@@ -807,7 +811,7 @@ static void nl_fib_input(struct sock *sk, int len)
807 if (skb == NULL) 811 if (skb == NULL)
808 return; 812 return;
809 813
810 nlh = (struct nlmsghdr *)skb->data; 814 nlh = nlmsg_hdr(skb);
811 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len || 815 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
812 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) { 816 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) {
813 kfree_skb(skb); 817 kfree_skb(skb);
@@ -827,7 +831,8 @@ static void nl_fib_input(struct sock *sk, int len)
827 831
828static void nl_fib_lookup_init(void) 832static void nl_fib_lookup_init(void)
829{ 833{
830 netlink_kernel_create(NETLINK_FIB_LOOKUP, 0, nl_fib_input, THIS_MODULE); 834 netlink_kernel_create(NETLINK_FIB_LOOKUP, 0, nl_fib_input, NULL,
835 THIS_MODULE);
831} 836}
832 837
833static void fib_disable_ip(struct net_device *dev, int force) 838static void fib_disable_ip(struct net_device *dev, int force)
@@ -925,6 +930,10 @@ void __init ip_fib_init(void)
925 register_netdevice_notifier(&fib_netdev_notifier); 930 register_netdevice_notifier(&fib_netdev_notifier);
926 register_inetaddr_notifier(&fib_inetaddr_notifier); 931 register_inetaddr_notifier(&fib_inetaddr_notifier);
927 nl_fib_lookup_init(); 932 nl_fib_lookup_init();
933
934 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
935 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
936 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
928} 937}
929 938
930EXPORT_SYMBOL(inet_addr_type); 939EXPORT_SYMBOL(inet_addr_type);
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index a4949f957ab5..9cfecf1215c9 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -1027,7 +1027,7 @@ out:
1027 return 0; 1027 return 0;
1028} 1028}
1029 1029
1030static struct seq_operations fib_seq_ops = { 1030static const struct seq_operations fib_seq_ops = {
1031 .start = fib_seq_start, 1031 .start = fib_seq_start,
1032 .next = fib_seq_next, 1032 .next = fib_seq_next,
1033 .stop = fib_seq_stop, 1033 .stop = fib_seq_stop,
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index c660c074c76c..33083ad52e9f 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -274,11 +274,6 @@ nla_put_failure:
274 return -ENOBUFS; 274 return -ENOBUFS;
275} 275}
276 276
277int fib4_rules_dump(struct sk_buff *skb, struct netlink_callback *cb)
278{
279 return fib_rules_dump(skb, cb, AF_INET);
280}
281
282static u32 fib4_rule_default_pref(void) 277static u32 fib4_rule_default_pref(void)
283{ 278{
284 struct list_head *pos; 279 struct list_head *pos;
@@ -303,6 +298,11 @@ static size_t fib4_rule_nlmsg_payload(struct fib_rule *rule)
303 + nla_total_size(4); /* flow */ 298 + nla_total_size(4); /* flow */
304} 299}
305 300
301static void fib4_rule_flush_cache(void)
302{
303 rt_cache_flush(-1);
304}
305
306static struct fib_rules_ops fib4_rules_ops = { 306static struct fib_rules_ops fib4_rules_ops = {
307 .family = AF_INET, 307 .family = AF_INET,
308 .rule_size = sizeof(struct fib4_rule), 308 .rule_size = sizeof(struct fib4_rule),
@@ -314,6 +314,7 @@ static struct fib_rules_ops fib4_rules_ops = {
314 .fill = fib4_rule_fill, 314 .fill = fib4_rule_fill,
315 .default_pref = fib4_rule_default_pref, 315 .default_pref = fib4_rule_default_pref,
316 .nlmsg_payload = fib4_rule_nlmsg_payload, 316 .nlmsg_payload = fib4_rule_nlmsg_payload,
317 .flush_cache = fib4_rule_flush_cache,
317 .nlgroup = RTNLGRP_IPV4_RULE, 318 .nlgroup = RTNLGRP_IPV4_RULE,
318 .policy = fib4_rule_policy, 319 .policy = fib4_rule_policy,
319 .rules_list = &fib4_rules, 320 .rules_list = &fib4_rules,
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 3dad12ee76c3..406ea7050aed 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -927,7 +927,7 @@ int fib_semantic_match(struct list_head *head, const struct flowi *flp,
927 default: 927 default:
928 printk(KERN_DEBUG "impossible 102\n"); 928 printk(KERN_DEBUG "impossible 102\n");
929 return -EINVAL; 929 return -EINVAL;
930 }; 930 }
931 } 931 }
932 return err; 932 return err;
933 } 933 }
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 214c34732e84..9be7da7c3a8f 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -50,7 +50,7 @@
50 * Patrick McHardy <kaber@trash.net> 50 * Patrick McHardy <kaber@trash.net>
51 */ 51 */
52 52
53#define VERSION "0.407" 53#define VERSION "0.408"
54 54
55#include <asm/uaccess.h> 55#include <asm/uaccess.h>
56#include <asm/system.h> 56#include <asm/system.h>
@@ -292,8 +292,8 @@ static inline void check_tnode(const struct tnode *tn)
292 292
293static int halve_threshold = 25; 293static int halve_threshold = 25;
294static int inflate_threshold = 50; 294static int inflate_threshold = 50;
295static int halve_threshold_root = 15; 295static int halve_threshold_root = 8;
296static int inflate_threshold_root = 25; 296static int inflate_threshold_root = 15;
297 297
298 298
299static void __alias_free_mem(struct rcu_head *head) 299static void __alias_free_mem(struct rcu_head *head)
@@ -350,11 +350,10 @@ static void __tnode_free_rcu(struct rcu_head *head)
350 350
351static inline void tnode_free(struct tnode *tn) 351static inline void tnode_free(struct tnode *tn)
352{ 352{
353 if(IS_LEAF(tn)) { 353 if (IS_LEAF(tn)) {
354 struct leaf *l = (struct leaf *) tn; 354 struct leaf *l = (struct leaf *) tn;
355 call_rcu_bh(&l->rcu, __leaf_free_rcu); 355 call_rcu_bh(&l->rcu, __leaf_free_rcu);
356 } 356 } else
357 else
358 call_rcu(&tn->rcu, __tnode_free_rcu); 357 call_rcu(&tn->rcu, __tnode_free_rcu);
359} 358}
360 359
@@ -459,6 +458,7 @@ static struct node *resize(struct trie *t, struct tnode *tn)
459 struct tnode *old_tn; 458 struct tnode *old_tn;
460 int inflate_threshold_use; 459 int inflate_threshold_use;
461 int halve_threshold_use; 460 int halve_threshold_use;
461 int max_resize;
462 462
463 if (!tn) 463 if (!tn)
464 return NULL; 464 return NULL;
@@ -553,13 +553,14 @@ static struct node *resize(struct trie *t, struct tnode *tn)
553 553
554 /* Keep root node larger */ 554 /* Keep root node larger */
555 555
556 if(!tn->parent) 556 if (!tn->parent)
557 inflate_threshold_use = inflate_threshold_root; 557 inflate_threshold_use = inflate_threshold_root;
558 else 558 else
559 inflate_threshold_use = inflate_threshold; 559 inflate_threshold_use = inflate_threshold;
560 560
561 err = 0; 561 err = 0;
562 while ((tn->full_children > 0 && 562 max_resize = 10;
563 while ((tn->full_children > 0 && max_resize-- &&
563 50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >= 564 50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >=
564 inflate_threshold_use * tnode_child_length(tn))) { 565 inflate_threshold_use * tnode_child_length(tn))) {
565 566
@@ -574,6 +575,15 @@ static struct node *resize(struct trie *t, struct tnode *tn)
574 } 575 }
575 } 576 }
576 577
578 if (max_resize < 0) {
579 if (!tn->parent)
580 printk(KERN_WARNING "Fix inflate_threshold_root. Now=%d size=%d bits\n",
581 inflate_threshold_root, tn->bits);
582 else
583 printk(KERN_WARNING "Fix inflate_threshold. Now=%d size=%d bits\n",
584 inflate_threshold, tn->bits);
585 }
586
577 check_tnode(tn); 587 check_tnode(tn);
578 588
579 /* 589 /*
@@ -584,13 +594,14 @@ static struct node *resize(struct trie *t, struct tnode *tn)
584 594
585 /* Keep root node larger */ 595 /* Keep root node larger */
586 596
587 if(!tn->parent) 597 if (!tn->parent)
588 halve_threshold_use = halve_threshold_root; 598 halve_threshold_use = halve_threshold_root;
589 else 599 else
590 halve_threshold_use = halve_threshold; 600 halve_threshold_use = halve_threshold;
591 601
592 err = 0; 602 err = 0;
593 while (tn->bits > 1 && 603 max_resize = 10;
604 while (tn->bits > 1 && max_resize-- &&
594 100 * (tnode_child_length(tn) - tn->empty_children) < 605 100 * (tnode_child_length(tn) - tn->empty_children) <
595 halve_threshold_use * tnode_child_length(tn)) { 606 halve_threshold_use * tnode_child_length(tn)) {
596 607
@@ -605,6 +616,14 @@ static struct node *resize(struct trie *t, struct tnode *tn)
605 } 616 }
606 } 617 }
607 618
619 if (max_resize < 0) {
620 if (!tn->parent)
621 printk(KERN_WARNING "Fix halve_threshold_root. Now=%d size=%d bits\n",
622 halve_threshold_root, tn->bits);
623 else
624 printk(KERN_WARNING "Fix halve_threshold. Now=%d size=%d bits\n",
625 halve_threshold, tn->bits);
626 }
608 627
609 /* Only one child remains */ 628 /* Only one child remains */
610 if (tn->empty_children == tnode_child_length(tn) - 1) 629 if (tn->empty_children == tnode_child_length(tn) - 1)
@@ -2039,12 +2058,12 @@ static struct node *fib_trie_get_first(struct fib_trie_iter *iter,
2039{ 2058{
2040 struct node *n ; 2059 struct node *n ;
2041 2060
2042 if(!t) 2061 if (!t)
2043 return NULL; 2062 return NULL;
2044 2063
2045 n = rcu_dereference(t->trie); 2064 n = rcu_dereference(t->trie);
2046 2065
2047 if(!iter) 2066 if (!iter)
2048 return NULL; 2067 return NULL;
2049 2068
2050 if (n) { 2069 if (n) {
@@ -2084,7 +2103,7 @@ static void trie_collect_stats(struct trie *t, struct trie_stat *s)
2084 int i; 2103 int i;
2085 2104
2086 s->tnodes++; 2105 s->tnodes++;
2087 if(tn->bits < MAX_STAT_DEPTH) 2106 if (tn->bits < MAX_STAT_DEPTH)
2088 s->nodesizes[tn->bits]++; 2107 s->nodesizes[tn->bits]++;
2089 2108
2090 for (i = 0; i < (1<<tn->bits); i++) 2109 for (i = 0; i < (1<<tn->bits); i++)
@@ -2250,7 +2269,7 @@ static inline const char *rtn_scope(enum rt_scope_t s)
2250{ 2269{
2251 static char buf[32]; 2270 static char buf[32];
2252 2271
2253 switch(s) { 2272 switch (s) {
2254 case RT_SCOPE_UNIVERSE: return "universe"; 2273 case RT_SCOPE_UNIVERSE: return "universe";
2255 case RT_SCOPE_SITE: return "site"; 2274 case RT_SCOPE_SITE: return "site";
2256 case RT_SCOPE_LINK: return "link"; 2275 case RT_SCOPE_LINK: return "link";
@@ -2340,7 +2359,7 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v)
2340 return 0; 2359 return 0;
2341} 2360}
2342 2361
2343static struct seq_operations fib_trie_seq_ops = { 2362static const struct seq_operations fib_trie_seq_ops = {
2344 .start = fib_trie_seq_start, 2363 .start = fib_trie_seq_start,
2345 .next = fib_trie_seq_next, 2364 .next = fib_trie_seq_next,
2346 .stop = fib_trie_seq_stop, 2365 .stop = fib_trie_seq_stop,
@@ -2461,7 +2480,7 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
2461 return 0; 2480 return 0;
2462} 2481}
2463 2482
2464static struct seq_operations fib_route_seq_ops = { 2483static const struct seq_operations fib_route_seq_ops = {
2465 .start = fib_trie_seq_start, 2484 .start = fib_trie_seq_start,
2466 .next = fib_trie_seq_next, 2485 .next = fib_trie_seq_next,
2467 .stop = fib_trie_seq_stop, 2486 .stop = fib_trie_seq_stop,
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 4b7a0d946a0d..d38cbba92a4d 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -355,7 +355,7 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param,
355 ipc, rt, MSG_DONTWAIT) < 0) 355 ipc, rt, MSG_DONTWAIT) < 0)
356 ip_flush_pending_frames(icmp_socket->sk); 356 ip_flush_pending_frames(icmp_socket->sk);
357 else if ((skb = skb_peek(&icmp_socket->sk->sk_write_queue)) != NULL) { 357 else if ((skb = skb_peek(&icmp_socket->sk->sk_write_queue)) != NULL) {
358 struct icmphdr *icmph = skb->h.icmph; 358 struct icmphdr *icmph = icmp_hdr(skb);
359 __wsum csum = 0; 359 __wsum csum = 0;
360 struct sk_buff *skb1; 360 struct sk_buff *skb1;
361 361
@@ -392,7 +392,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
392 icmp_param->data.icmph.checksum = 0; 392 icmp_param->data.icmph.checksum = 0;
393 icmp_out_count(icmp_param->data.icmph.type); 393 icmp_out_count(icmp_param->data.icmph.type);
394 394
395 inet->tos = skb->nh.iph->tos; 395 inet->tos = ip_hdr(skb)->tos;
396 daddr = ipc.addr = rt->rt_src; 396 daddr = ipc.addr = rt->rt_src;
397 ipc.opt = NULL; 397 ipc.opt = NULL;
398 if (icmp_param->replyopts.optlen) { 398 if (icmp_param->replyopts.optlen) {
@@ -404,7 +404,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
404 struct flowi fl = { .nl_u = { .ip4_u = 404 struct flowi fl = { .nl_u = { .ip4_u =
405 { .daddr = daddr, 405 { .daddr = daddr,
406 .saddr = rt->rt_spec_dst, 406 .saddr = rt->rt_spec_dst,
407 .tos = RT_TOS(skb->nh.iph->tos) } }, 407 .tos = RT_TOS(ip_hdr(skb)->tos) } },
408 .proto = IPPROTO_ICMP }; 408 .proto = IPPROTO_ICMP };
409 security_skb_classify_flow(skb, &fl); 409 security_skb_classify_flow(skb, &fl);
410 if (ip_route_output_key(&rt, &fl)) 410 if (ip_route_output_key(&rt, &fl))
@@ -448,9 +448,10 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
448 * Check this, icmp_send is called from the most obscure devices 448 * Check this, icmp_send is called from the most obscure devices
449 * sometimes. 449 * sometimes.
450 */ 450 */
451 iph = skb_in->nh.iph; 451 iph = ip_hdr(skb_in);
452 452
453 if ((u8 *)iph < skb_in->head || (u8 *)(iph + 1) > skb_in->tail) 453 if ((u8 *)iph < skb_in->head ||
454 (skb_in->network_header + sizeof(*iph)) > skb_in->tail)
454 goto out; 455 goto out;
455 456
456 /* 457 /*
@@ -484,7 +485,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
484 u8 _inner_type, *itp; 485 u8 _inner_type, *itp;
485 486
486 itp = skb_header_pointer(skb_in, 487 itp = skb_header_pointer(skb_in,
487 skb_in->nh.raw + 488 skb_network_header(skb_in) +
488 (iph->ihl << 2) + 489 (iph->ihl << 2) +
489 offsetof(struct icmphdr, 490 offsetof(struct icmphdr,
490 type) - 491 type) -
@@ -536,7 +537,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
536 icmp_param.data.icmph.un.gateway = info; 537 icmp_param.data.icmph.un.gateway = info;
537 icmp_param.data.icmph.checksum = 0; 538 icmp_param.data.icmph.checksum = 0;
538 icmp_param.skb = skb_in; 539 icmp_param.skb = skb_in;
539 icmp_param.offset = skb_in->nh.raw - skb_in->data; 540 icmp_param.offset = skb_network_offset(skb_in);
540 icmp_out_count(icmp_param.data.icmph.type); 541 icmp_out_count(icmp_param.data.icmph.type);
541 inet_sk(icmp_socket->sk)->tos = tos; 542 inet_sk(icmp_socket->sk)->tos = tos;
542 ipc.addr = iph->saddr; 543 ipc.addr = iph->saddr;
@@ -613,7 +614,7 @@ static void icmp_unreach(struct sk_buff *skb)
613 if (!pskb_may_pull(skb, sizeof(struct iphdr))) 614 if (!pskb_may_pull(skb, sizeof(struct iphdr)))
614 goto out_err; 615 goto out_err;
615 616
616 icmph = skb->h.icmph; 617 icmph = icmp_hdr(skb);
617 iph = (struct iphdr *)skb->data; 618 iph = (struct iphdr *)skb->data;
618 619
619 if (iph->ihl < 5) /* Mangled header, drop. */ 620 if (iph->ihl < 5) /* Mangled header, drop. */
@@ -676,7 +677,7 @@ static void icmp_unreach(struct sk_buff *skb)
676 printk(KERN_WARNING "%u.%u.%u.%u sent an invalid ICMP " 677 printk(KERN_WARNING "%u.%u.%u.%u sent an invalid ICMP "
677 "type %u, code %u " 678 "type %u, code %u "
678 "error to a broadcast: %u.%u.%u.%u on %s\n", 679 "error to a broadcast: %u.%u.%u.%u on %s\n",
679 NIPQUAD(skb->nh.iph->saddr), 680 NIPQUAD(ip_hdr(skb)->saddr),
680 icmph->type, icmph->code, 681 icmph->type, icmph->code,
681 NIPQUAD(iph->daddr), 682 NIPQUAD(iph->daddr),
682 skb->dev->name); 683 skb->dev->name);
@@ -743,7 +744,7 @@ static void icmp_redirect(struct sk_buff *skb)
743 744
744 iph = (struct iphdr *)skb->data; 745 iph = (struct iphdr *)skb->data;
745 746
746 switch (skb->h.icmph->code & 7) { 747 switch (icmp_hdr(skb)->code & 7) {
747 case ICMP_REDIR_NET: 748 case ICMP_REDIR_NET:
748 case ICMP_REDIR_NETTOS: 749 case ICMP_REDIR_NETTOS:
749 /* 750 /*
@@ -751,8 +752,8 @@ static void icmp_redirect(struct sk_buff *skb)
751 */ 752 */
752 case ICMP_REDIR_HOST: 753 case ICMP_REDIR_HOST:
753 case ICMP_REDIR_HOSTTOS: 754 case ICMP_REDIR_HOSTTOS:
754 ip_rt_redirect(skb->nh.iph->saddr, iph->daddr, 755 ip_rt_redirect(ip_hdr(skb)->saddr, iph->daddr,
755 skb->h.icmph->un.gateway, 756 icmp_hdr(skb)->un.gateway,
756 iph->saddr, skb->dev); 757 iph->saddr, skb->dev);
757 break; 758 break;
758 } 759 }
@@ -780,7 +781,7 @@ static void icmp_echo(struct sk_buff *skb)
780 if (!sysctl_icmp_echo_ignore_all) { 781 if (!sysctl_icmp_echo_ignore_all) {
781 struct icmp_bxm icmp_param; 782 struct icmp_bxm icmp_param;
782 783
783 icmp_param.data.icmph = *skb->h.icmph; 784 icmp_param.data.icmph = *icmp_hdr(skb);
784 icmp_param.data.icmph.type = ICMP_ECHOREPLY; 785 icmp_param.data.icmph.type = ICMP_ECHOREPLY;
785 icmp_param.skb = skb; 786 icmp_param.skb = skb;
786 icmp_param.offset = 0; 787 icmp_param.offset = 0;
@@ -816,7 +817,7 @@ static void icmp_timestamp(struct sk_buff *skb)
816 icmp_param.data.times[2] = icmp_param.data.times[1]; 817 icmp_param.data.times[2] = icmp_param.data.times[1];
817 if (skb_copy_bits(skb, 0, &icmp_param.data.times[0], 4)) 818 if (skb_copy_bits(skb, 0, &icmp_param.data.times[0], 4))
818 BUG(); 819 BUG();
819 icmp_param.data.icmph = *skb->h.icmph; 820 icmp_param.data.icmph = *icmp_hdr(skb);
820 icmp_param.data.icmph.type = ICMP_TIMESTAMPREPLY; 821 icmp_param.data.icmph.type = ICMP_TIMESTAMPREPLY;
821 icmp_param.data.icmph.code = 0; 822 icmp_param.data.icmph.code = 0;
822 icmp_param.skb = skb; 823 icmp_param.skb = skb;
@@ -943,7 +944,7 @@ int icmp_rcv(struct sk_buff *skb)
943 if (!pskb_pull(skb, sizeof(struct icmphdr))) 944 if (!pskb_pull(skb, sizeof(struct icmphdr)))
944 goto error; 945 goto error;
945 946
946 icmph = skb->h.icmph; 947 icmph = icmp_hdr(skb);
947 948
948 /* 949 /*
949 * 18 is the highest 'known' ICMP type. Anything else is a mystery 950 * 18 is the highest 'known' ICMP type. Anything else is a mystery
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 8cedb2a2c9df..2506021c2935 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -314,7 +314,9 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
314 314
315 skb_reserve(skb, LL_RESERVED_SPACE(dev)); 315 skb_reserve(skb, LL_RESERVED_SPACE(dev));
316 316
317 skb->nh.iph = pip =(struct iphdr *)skb_put(skb, sizeof(struct iphdr)+4); 317 skb_reset_network_header(skb);
318 pip = ip_hdr(skb);
319 skb_put(skb, sizeof(struct iphdr) + 4);
318 320
319 pip->version = 4; 321 pip->version = 4;
320 pip->ihl = (sizeof(struct iphdr)+4)>>2; 322 pip->ihl = (sizeof(struct iphdr)+4)>>2;
@@ -331,8 +333,9 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
331 ((u8*)&pip[1])[2] = 0; 333 ((u8*)&pip[1])[2] = 0;
332 ((u8*)&pip[1])[3] = 0; 334 ((u8*)&pip[1])[3] = 0;
333 335
334 pig =(struct igmpv3_report *)skb_put(skb, sizeof(*pig)); 336 skb->transport_header = skb->network_header + sizeof(struct iphdr) + 4;
335 skb->h.igmph = (struct igmphdr *)pig; 337 skb_put(skb, sizeof(*pig));
338 pig = igmpv3_report_hdr(skb);
336 pig->type = IGMPV3_HOST_MEMBERSHIP_REPORT; 339 pig->type = IGMPV3_HOST_MEMBERSHIP_REPORT;
337 pig->resv1 = 0; 340 pig->resv1 = 0;
338 pig->csum = 0; 341 pig->csum = 0;
@@ -343,16 +346,14 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
343 346
344static int igmpv3_sendpack(struct sk_buff *skb) 347static int igmpv3_sendpack(struct sk_buff *skb)
345{ 348{
346 struct iphdr *pip = skb->nh.iph; 349 struct iphdr *pip = ip_hdr(skb);
347 struct igmphdr *pig = skb->h.igmph; 350 struct igmphdr *pig = igmp_hdr(skb);
348 int iplen, igmplen; 351 const int iplen = skb->tail - skb->network_header;
352 const int igmplen = skb->tail - skb->transport_header;
349 353
350 iplen = skb->tail - (unsigned char *)skb->nh.iph;
351 pip->tot_len = htons(iplen); 354 pip->tot_len = htons(iplen);
352 ip_send_check(pip); 355 ip_send_check(pip);
353 356 pig->csum = ip_compute_csum(igmp_hdr(skb), igmplen);
354 igmplen = skb->tail - (unsigned char *)skb->h.igmph;
355 pig->csum = ip_compute_csum((void *)skb->h.igmph, igmplen);
356 357
357 return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, skb->dev, 358 return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, skb->dev,
358 dst_output); 359 dst_output);
@@ -379,7 +380,7 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ip_mc_list *pmc,
379 pgr->grec_auxwords = 0; 380 pgr->grec_auxwords = 0;
380 pgr->grec_nsrcs = 0; 381 pgr->grec_nsrcs = 0;
381 pgr->grec_mca = pmc->multiaddr; 382 pgr->grec_mca = pmc->multiaddr;
382 pih = (struct igmpv3_report *)skb->h.igmph; 383 pih = igmpv3_report_hdr(skb);
383 pih->ngrec = htons(ntohs(pih->ngrec)+1); 384 pih->ngrec = htons(ntohs(pih->ngrec)+1);
384 *ppgr = pgr; 385 *ppgr = pgr;
385 return skb; 386 return skb;
@@ -412,7 +413,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
412 if (!*psf_list) 413 if (!*psf_list)
413 goto empty_source; 414 goto empty_source;
414 415
415 pih = skb ? (struct igmpv3_report *)skb->h.igmph : NULL; 416 pih = skb ? igmpv3_report_hdr(skb) : NULL;
416 417
417 /* EX and TO_EX get a fresh packet, if needed */ 418 /* EX and TO_EX get a fresh packet, if needed */
418 if (truncate) { 419 if (truncate) {
@@ -664,7 +665,9 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
664 665
665 skb_reserve(skb, LL_RESERVED_SPACE(dev)); 666 skb_reserve(skb, LL_RESERVED_SPACE(dev));
666 667
667 skb->nh.iph = iph = (struct iphdr *)skb_put(skb, sizeof(struct iphdr)+4); 668 skb_reset_network_header(skb);
669 iph = ip_hdr(skb);
670 skb_put(skb, sizeof(struct iphdr) + 4);
668 671
669 iph->version = 4; 672 iph->version = 4;
670 iph->ihl = (sizeof(struct iphdr)+4)>>2; 673 iph->ihl = (sizeof(struct iphdr)+4)>>2;
@@ -827,8 +830,8 @@ static void igmp_heard_report(struct in_device *in_dev, __be32 group)
827static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, 830static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
828 int len) 831 int len)
829{ 832{
830 struct igmphdr *ih = skb->h.igmph; 833 struct igmphdr *ih = igmp_hdr(skb);
831 struct igmpv3_query *ih3 = (struct igmpv3_query *)ih; 834 struct igmpv3_query *ih3 = igmpv3_query_hdr(skb);
832 struct ip_mc_list *im; 835 struct ip_mc_list *im;
833 __be32 group = ih->group; 836 __be32 group = ih->group;
834 int max_delay; 837 int max_delay;
@@ -861,12 +864,12 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
861 if (!pskb_may_pull(skb, sizeof(struct igmpv3_query))) 864 if (!pskb_may_pull(skb, sizeof(struct igmpv3_query)))
862 return; 865 return;
863 866
864 ih3 = (struct igmpv3_query *) skb->h.raw; 867 ih3 = igmpv3_query_hdr(skb);
865 if (ih3->nsrcs) { 868 if (ih3->nsrcs) {
866 if (!pskb_may_pull(skb, sizeof(struct igmpv3_query) 869 if (!pskb_may_pull(skb, sizeof(struct igmpv3_query)
867 + ntohs(ih3->nsrcs)*sizeof(__be32))) 870 + ntohs(ih3->nsrcs)*sizeof(__be32)))
868 return; 871 return;
869 ih3 = (struct igmpv3_query *) skb->h.raw; 872 ih3 = igmpv3_query_hdr(skb);
870 } 873 }
871 874
872 max_delay = IGMPV3_MRC(ih3->code)*(HZ/IGMP_TIMER_SCALE); 875 max_delay = IGMPV3_MRC(ih3->code)*(HZ/IGMP_TIMER_SCALE);
@@ -943,7 +946,7 @@ int igmp_rcv(struct sk_buff *skb)
943 goto drop; 946 goto drop;
944 } 947 }
945 948
946 ih = skb->h.igmph; 949 ih = igmp_hdr(skb);
947 switch (ih->type) { 950 switch (ih->type) {
948 case IGMP_HOST_MEMBERSHIP_QUERY: 951 case IGMP_HOST_MEMBERSHIP_QUERY:
949 igmp_heard_query(in_dev, skb, len); 952 igmp_heard_query(in_dev, skb, len);
@@ -2397,7 +2400,7 @@ static int igmp_mc_seq_show(struct seq_file *seq, void *v)
2397 return 0; 2400 return 0;
2398} 2401}
2399 2402
2400static struct seq_operations igmp_mc_seq_ops = { 2403static const struct seq_operations igmp_mc_seq_ops = {
2401 .start = igmp_mc_seq_start, 2404 .start = igmp_mc_seq_start,
2402 .next = igmp_mc_seq_next, 2405 .next = igmp_mc_seq_next,
2403 .stop = igmp_mc_seq_stop, 2406 .stop = igmp_mc_seq_stop,
@@ -2571,7 +2574,7 @@ static int igmp_mcf_seq_show(struct seq_file *seq, void *v)
2571 return 0; 2574 return 0;
2572} 2575}
2573 2576
2574static struct seq_operations igmp_mcf_seq_ops = { 2577static const struct seq_operations igmp_mcf_seq_ops = {
2575 .start = igmp_mcf_seq_start, 2578 .start = igmp_mcf_seq_start,
2576 .next = igmp_mcf_seq_next, 2579 .next = igmp_mcf_seq_next,
2577 .stop = igmp_mcf_seq_stop, 2580 .stop = igmp_mcf_seq_stop,
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 5df71cd08da8..dbeacd8b0f90 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -27,6 +27,7 @@
27#include <net/inet_hashtables.h> 27#include <net/inet_hashtables.h>
28#include <net/inet_timewait_sock.h> 28#include <net/inet_timewait_sock.h>
29#include <net/inet6_hashtables.h> 29#include <net/inet6_hashtables.h>
30#include <net/netlink.h>
30 31
31#include <linux/inet.h> 32#include <linux/inet.h>
32#include <linux/stddef.h> 33#include <linux/stddef.h>
@@ -60,7 +61,7 @@ static int inet_csk_diag_fill(struct sock *sk,
60 struct nlmsghdr *nlh; 61 struct nlmsghdr *nlh;
61 void *info = NULL; 62 void *info = NULL;
62 struct inet_diag_meminfo *minfo = NULL; 63 struct inet_diag_meminfo *minfo = NULL;
63 unsigned char *b = skb->tail; 64 unsigned char *b = skb_tail_pointer(skb);
64 const struct inet_diag_handler *handler; 65 const struct inet_diag_handler *handler;
65 66
66 handler = inet_diag_table[unlh->nlmsg_type]; 67 handler = inet_diag_table[unlh->nlmsg_type];
@@ -147,12 +148,12 @@ static int inet_csk_diag_fill(struct sock *sk,
147 icsk->icsk_ca_ops && icsk->icsk_ca_ops->get_info) 148 icsk->icsk_ca_ops && icsk->icsk_ca_ops->get_info)
148 icsk->icsk_ca_ops->get_info(sk, ext, skb); 149 icsk->icsk_ca_ops->get_info(sk, ext, skb);
149 150
150 nlh->nlmsg_len = skb->tail - b; 151 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
151 return skb->len; 152 return skb->len;
152 153
153rtattr_failure: 154rtattr_failure:
154nlmsg_failure: 155nlmsg_failure:
155 skb_trim(skb, b - skb->data); 156 nlmsg_trim(skb, b);
156 return -EMSGSIZE; 157 return -EMSGSIZE;
157} 158}
158 159
@@ -163,7 +164,7 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
163{ 164{
164 long tmo; 165 long tmo;
165 struct inet_diag_msg *r; 166 struct inet_diag_msg *r;
166 const unsigned char *previous_tail = skb->tail; 167 const unsigned char *previous_tail = skb_tail_pointer(skb);
167 struct nlmsghdr *nlh = NLMSG_PUT(skb, pid, seq, 168 struct nlmsghdr *nlh = NLMSG_PUT(skb, pid, seq,
168 unlh->nlmsg_type, sizeof(*r)); 169 unlh->nlmsg_type, sizeof(*r));
169 170
@@ -205,10 +206,10 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
205 &tw6->tw_v6_daddr); 206 &tw6->tw_v6_daddr);
206 } 207 }
207#endif 208#endif
208 nlh->nlmsg_len = skb->tail - previous_tail; 209 nlh->nlmsg_len = skb_tail_pointer(skb) - previous_tail;
209 return skb->len; 210 return skb->len;
210nlmsg_failure: 211nlmsg_failure:
211 skb_trim(skb, previous_tail - skb->data); 212 nlmsg_trim(skb, previous_tail);
212 return -EMSGSIZE; 213 return -EMSGSIZE;
213} 214}
214 215
@@ -535,7 +536,7 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
535{ 536{
536 const struct inet_request_sock *ireq = inet_rsk(req); 537 const struct inet_request_sock *ireq = inet_rsk(req);
537 struct inet_sock *inet = inet_sk(sk); 538 struct inet_sock *inet = inet_sk(sk);
538 unsigned char *b = skb->tail; 539 unsigned char *b = skb_tail_pointer(skb);
539 struct inet_diag_msg *r; 540 struct inet_diag_msg *r;
540 struct nlmsghdr *nlh; 541 struct nlmsghdr *nlh;
541 long tmo; 542 long tmo;
@@ -574,12 +575,12 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
574 &inet6_rsk(req)->rmt_addr); 575 &inet6_rsk(req)->rmt_addr);
575 } 576 }
576#endif 577#endif
577 nlh->nlmsg_len = skb->tail - b; 578 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
578 579
579 return skb->len; 580 return skb->len;
580 581
581nlmsg_failure: 582nlmsg_failure:
582 skb_trim(skb, b - skb->data); 583 nlmsg_trim(skb, b);
583 return -1; 584 return -1;
584} 585}
585 586
@@ -805,68 +806,43 @@ done:
805 return skb->len; 806 return skb->len;
806} 807}
807 808
808static inline int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) 809static int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
809{ 810{
810 if (!(nlh->nlmsg_flags&NLM_F_REQUEST)) 811 int hdrlen = sizeof(struct inet_diag_req);
811 return 0;
812 812
813 if (nlh->nlmsg_type >= INET_DIAG_GETSOCK_MAX) 813 if (nlh->nlmsg_type >= INET_DIAG_GETSOCK_MAX ||
814 goto err_inval; 814 nlmsg_len(nlh) < hdrlen)
815 return -EINVAL;
815 816
816 if (inet_diag_table[nlh->nlmsg_type] == NULL) 817 if (inet_diag_table[nlh->nlmsg_type] == NULL)
817 return -ENOENT; 818 return -ENOENT;
818 819
819 if (NLMSG_LENGTH(sizeof(struct inet_diag_req)) > skb->len) 820 if (nlh->nlmsg_flags & NLM_F_DUMP) {
820 goto err_inval; 821 if (nlmsg_attrlen(nlh, hdrlen)) {
821 822 struct nlattr *attr;
822 if (nlh->nlmsg_flags&NLM_F_DUMP) { 823
823 if (nlh->nlmsg_len > 824 attr = nlmsg_find_attr(nlh, hdrlen,
824 (4 + NLMSG_SPACE(sizeof(struct inet_diag_req)))) { 825 INET_DIAG_REQ_BYTECODE);
825 struct rtattr *rta = (void *)(NLMSG_DATA(nlh) + 826 if (attr == NULL ||
826 sizeof(struct inet_diag_req)); 827 nla_len(attr) < sizeof(struct inet_diag_bc_op) ||
827 if (rta->rta_type != INET_DIAG_REQ_BYTECODE || 828 inet_diag_bc_audit(nla_data(attr), nla_len(attr)))
828 rta->rta_len < 8 || 829 return -EINVAL;
829 rta->rta_len >
830 (nlh->nlmsg_len -
831 NLMSG_SPACE(sizeof(struct inet_diag_req))))
832 goto err_inval;
833 if (inet_diag_bc_audit(RTA_DATA(rta), RTA_PAYLOAD(rta)))
834 goto err_inval;
835 } 830 }
831
836 return netlink_dump_start(idiagnl, skb, nlh, 832 return netlink_dump_start(idiagnl, skb, nlh,
837 inet_diag_dump, NULL); 833 inet_diag_dump, NULL);
838 } else
839 return inet_diag_get_exact(skb, nlh);
840
841err_inval:
842 return -EINVAL;
843}
844
845
846static inline void inet_diag_rcv_skb(struct sk_buff *skb)
847{
848 if (skb->len >= NLMSG_SPACE(0)) {
849 int err;
850 struct nlmsghdr *nlh = (struct nlmsghdr *)skb->data;
851
852 if (nlh->nlmsg_len < sizeof(*nlh) ||
853 skb->len < nlh->nlmsg_len)
854 return;
855 err = inet_diag_rcv_msg(skb, nlh);
856 if (err || nlh->nlmsg_flags & NLM_F_ACK)
857 netlink_ack(skb, nlh, err);
858 } 834 }
835
836 return inet_diag_get_exact(skb, nlh);
859} 837}
860 838
861static void inet_diag_rcv(struct sock *sk, int len) 839static void inet_diag_rcv(struct sock *sk, int len)
862{ 840{
863 struct sk_buff *skb; 841 unsigned int qlen = 0;
864 unsigned int qlen = skb_queue_len(&sk->sk_receive_queue);
865 842
866 while (qlen-- && (skb = skb_dequeue(&sk->sk_receive_queue))) { 843 do {
867 inet_diag_rcv_skb(skb); 844 netlink_run_queue(sk, &qlen, &inet_diag_rcv_msg);
868 kfree_skb(skb); 845 } while (qlen);
869 }
870} 846}
871 847
872static DEFINE_SPINLOCK(inet_diag_register_lock); 848static DEFINE_SPINLOCK(inet_diag_register_lock);
@@ -917,7 +893,7 @@ static int __init inet_diag_init(void)
917 goto out; 893 goto out;
918 894
919 idiagnl = netlink_kernel_create(NETLINK_INET_DIAG, 0, inet_diag_rcv, 895 idiagnl = netlink_kernel_create(NETLINK_INET_DIAG, 0, inet_diag_rcv,
920 THIS_MODULE); 896 NULL, THIS_MODULE);
921 if (idiagnl == NULL) 897 if (idiagnl == NULL)
922 goto out_free_table; 898 goto out_free_table;
923 err = 0; 899 err = 0;
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index db3ef96bdfd9..2f44e6128068 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -87,10 +87,12 @@ static DEFINE_RWLOCK(peer_pool_lock);
87 87
88static int peer_total; 88static int peer_total;
89/* Exported for sysctl_net_ipv4. */ 89/* Exported for sysctl_net_ipv4. */
90int inet_peer_threshold = 65536 + 128; /* start to throw entries more 90int inet_peer_threshold __read_mostly = 65536 + 128; /* start to throw entries more
91 * aggressively at this stage */ 91 * aggressively at this stage */
92int inet_peer_minttl = 120 * HZ; /* TTL under high load: 120 sec */ 92int inet_peer_minttl __read_mostly = 120 * HZ; /* TTL under high load: 120 sec */
93int inet_peer_maxttl = 10 * 60 * HZ; /* usual time to live: 10 min */ 93int inet_peer_maxttl __read_mostly = 10 * 60 * HZ; /* usual time to live: 10 min */
94int inet_peer_gc_mintime __read_mostly = 10 * HZ;
95int inet_peer_gc_maxtime __read_mostly = 120 * HZ;
94 96
95static struct inet_peer *inet_peer_unused_head; 97static struct inet_peer *inet_peer_unused_head;
96static struct inet_peer **inet_peer_unused_tailp = &inet_peer_unused_head; 98static struct inet_peer **inet_peer_unused_tailp = &inet_peer_unused_head;
@@ -99,9 +101,6 @@ static DEFINE_SPINLOCK(inet_peer_unused_lock);
99static void peer_check_expire(unsigned long dummy); 101static void peer_check_expire(unsigned long dummy);
100static DEFINE_TIMER(peer_periodic_timer, peer_check_expire, 0, 0); 102static DEFINE_TIMER(peer_periodic_timer, peer_check_expire, 0, 0);
101 103
102/* Exported for sysctl_net_ipv4. */
103int inet_peer_gc_mintime = 10 * HZ,
104 inet_peer_gc_maxtime = 120 * HZ;
105 104
106/* Called from ip_output.c:ip_init */ 105/* Called from ip_output.c:ip_init */
107void __init inet_initpeers(void) 106void __init inet_initpeers(void)
@@ -151,20 +150,27 @@ static void unlink_from_unused(struct inet_peer *p)
151 spin_unlock_bh(&inet_peer_unused_lock); 150 spin_unlock_bh(&inet_peer_unused_lock);
152} 151}
153 152
154/* Called with local BH disabled and the pool lock held. */ 153/*
155#define lookup(daddr) \ 154 * Called with local BH disabled and the pool lock held.
155 * _stack is known to be NULL or not at compile time,
156 * so compiler will optimize the if (_stack) tests.
157 */
158#define lookup(_daddr,_stack) \
156({ \ 159({ \
157 struct inet_peer *u, **v; \ 160 struct inet_peer *u, **v; \
158 stackptr = stack; \ 161 if (_stack) { \
159 *stackptr++ = &peer_root; \ 162 stackptr = _stack; \
163 *stackptr++ = &peer_root; \
164 } \
160 for (u = peer_root; u != peer_avl_empty; ) { \ 165 for (u = peer_root; u != peer_avl_empty; ) { \
161 if (daddr == u->v4daddr) \ 166 if (_daddr == u->v4daddr) \
162 break; \ 167 break; \
163 if ((__force __u32)daddr < (__force __u32)u->v4daddr) \ 168 if ((__force __u32)_daddr < (__force __u32)u->v4daddr) \
164 v = &u->avl_left; \ 169 v = &u->avl_left; \
165 else \ 170 else \
166 v = &u->avl_right; \ 171 v = &u->avl_right; \
167 *stackptr++ = v; \ 172 if (_stack) \
173 *stackptr++ = v; \
168 u = *v; \ 174 u = *v; \
169 } \ 175 } \
170 u; \ 176 u; \
@@ -288,7 +294,7 @@ static void unlink_from_pool(struct inet_peer *p)
288 if (atomic_read(&p->refcnt) == 1) { 294 if (atomic_read(&p->refcnt) == 1) {
289 struct inet_peer **stack[PEER_MAXDEPTH]; 295 struct inet_peer **stack[PEER_MAXDEPTH];
290 struct inet_peer ***stackptr, ***delp; 296 struct inet_peer ***stackptr, ***delp;
291 if (lookup(p->v4daddr) != p) 297 if (lookup(p->v4daddr, stack) != p)
292 BUG(); 298 BUG();
293 delp = stackptr - 1; /* *delp[0] == p */ 299 delp = stackptr - 1; /* *delp[0] == p */
294 if (p->avl_left == peer_avl_empty) { 300 if (p->avl_left == peer_avl_empty) {
@@ -373,7 +379,7 @@ struct inet_peer *inet_getpeer(__be32 daddr, int create)
373 379
374 /* Look up for the address quickly. */ 380 /* Look up for the address quickly. */
375 read_lock_bh(&peer_pool_lock); 381 read_lock_bh(&peer_pool_lock);
376 p = lookup(daddr); 382 p = lookup(daddr, NULL);
377 if (p != peer_avl_empty) 383 if (p != peer_avl_empty)
378 atomic_inc(&p->refcnt); 384 atomic_inc(&p->refcnt);
379 read_unlock_bh(&peer_pool_lock); 385 read_unlock_bh(&peer_pool_lock);
@@ -400,7 +406,7 @@ struct inet_peer *inet_getpeer(__be32 daddr, int create)
400 406
401 write_lock_bh(&peer_pool_lock); 407 write_lock_bh(&peer_pool_lock);
402 /* Check if an entry has suddenly appeared. */ 408 /* Check if an entry has suddenly appeared. */
403 p = lookup(daddr); 409 p = lookup(daddr, stack);
404 if (p != peer_avl_empty) 410 if (p != peer_avl_empty)
405 goto out_free; 411 goto out_free;
406 412
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 369e721c4bab..9cb04df0054b 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -67,14 +67,14 @@ int ip_forward(struct sk_buff *skb)
67 if (skb->pkt_type != PACKET_HOST) 67 if (skb->pkt_type != PACKET_HOST)
68 goto drop; 68 goto drop;
69 69
70 skb->ip_summed = CHECKSUM_NONE; 70 skb_forward_csum(skb);
71 71
72 /* 72 /*
73 * According to the RFC, we must first decrease the TTL field. If 73 * According to the RFC, we must first decrease the TTL field. If
74 * that reaches zero, we must reply an ICMP control message telling 74 * that reaches zero, we must reply an ICMP control message telling
75 * that the packet's lifetime expired. 75 * that the packet's lifetime expired.
76 */ 76 */
77 if (skb->nh.iph->ttl <= 1) 77 if (ip_hdr(skb)->ttl <= 1)
78 goto too_many_hops; 78 goto too_many_hops;
79 79
80 if (!xfrm4_route_forward(skb)) 80 if (!xfrm4_route_forward(skb))
@@ -85,10 +85,18 @@ int ip_forward(struct sk_buff *skb)
85 if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway) 85 if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
86 goto sr_failed; 86 goto sr_failed;
87 87
88 if (unlikely(skb->len > dst_mtu(&rt->u.dst) &&
89 (ip_hdr(skb)->frag_off & htons(IP_DF))) && !skb->local_df) {
90 IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
91 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
92 htonl(dst_mtu(&rt->u.dst)));
93 goto drop;
94 }
95
88 /* We are about to mangle packet. Copy it! */ 96 /* We are about to mangle packet. Copy it! */
89 if (skb_cow(skb, LL_RESERVED_SPACE(rt->u.dst.dev)+rt->u.dst.header_len)) 97 if (skb_cow(skb, LL_RESERVED_SPACE(rt->u.dst.dev)+rt->u.dst.header_len))
90 goto drop; 98 goto drop;
91 iph = skb->nh.iph; 99 iph = ip_hdr(skb);
92 100
93 /* Decrease ttl after skb cow done */ 101 /* Decrease ttl after skb cow done */
94 ip_decrease_ttl(iph); 102 ip_decrease_ttl(iph);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index b6f055380373..0231bdcb2ab7 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -92,7 +92,7 @@ struct ipq {
92 spinlock_t lock; 92 spinlock_t lock;
93 atomic_t refcnt; 93 atomic_t refcnt;
94 struct timer_list timer; /* when will this queue expire? */ 94 struct timer_list timer; /* when will this queue expire? */
95 struct timeval stamp; 95 ktime_t stamp;
96 int iif; 96 int iif;
97 unsigned int rid; 97 unsigned int rid;
98 struct inet_peer *peer; 98 struct inet_peer *peer;
@@ -184,7 +184,7 @@ static __inline__ struct ipq *frag_alloc_queue(void)
184{ 184{
185 struct ipq *qp = kmalloc(sizeof(struct ipq), GFP_ATOMIC); 185 struct ipq *qp = kmalloc(sizeof(struct ipq), GFP_ATOMIC);
186 186
187 if(!qp) 187 if (!qp)
188 return NULL; 188 return NULL;
189 atomic_add(sizeof(struct ipq), &ip_frag_mem); 189 atomic_add(sizeof(struct ipq), &ip_frag_mem);
190 return qp; 190 return qp;
@@ -321,11 +321,11 @@ static struct ipq *ip_frag_intern(struct ipq *qp_in)
321 * promoted read lock to write lock. 321 * promoted read lock to write lock.
322 */ 322 */
323 hlist_for_each_entry(qp, n, &ipq_hash[hash], list) { 323 hlist_for_each_entry(qp, n, &ipq_hash[hash], list) {
324 if(qp->id == qp_in->id && 324 if (qp->id == qp_in->id &&
325 qp->saddr == qp_in->saddr && 325 qp->saddr == qp_in->saddr &&
326 qp->daddr == qp_in->daddr && 326 qp->daddr == qp_in->daddr &&
327 qp->protocol == qp_in->protocol && 327 qp->protocol == qp_in->protocol &&
328 qp->user == qp_in->user) { 328 qp->user == qp_in->user) {
329 atomic_inc(&qp->refcnt); 329 atomic_inc(&qp->refcnt);
330 write_unlock(&ipfrag_lock); 330 write_unlock(&ipfrag_lock);
331 qp_in->last_in |= COMPLETE; 331 qp_in->last_in |= COMPLETE;
@@ -398,11 +398,11 @@ static inline struct ipq *ip_find(struct iphdr *iph, u32 user)
398 read_lock(&ipfrag_lock); 398 read_lock(&ipfrag_lock);
399 hash = ipqhashfn(id, saddr, daddr, protocol); 399 hash = ipqhashfn(id, saddr, daddr, protocol);
400 hlist_for_each_entry(qp, n, &ipq_hash[hash], list) { 400 hlist_for_each_entry(qp, n, &ipq_hash[hash], list) {
401 if(qp->id == id && 401 if (qp->id == id &&
402 qp->saddr == saddr && 402 qp->saddr == saddr &&
403 qp->daddr == daddr && 403 qp->daddr == daddr &&
404 qp->protocol == protocol && 404 qp->protocol == protocol &&
405 qp->user == user) { 405 qp->user == user) {
406 atomic_inc(&qp->refcnt); 406 atomic_inc(&qp->refcnt);
407 read_unlock(&ipfrag_lock); 407 read_unlock(&ipfrag_lock);
408 return qp; 408 return qp;
@@ -479,11 +479,11 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
479 goto err; 479 goto err;
480 } 480 }
481 481
482 offset = ntohs(skb->nh.iph->frag_off); 482 offset = ntohs(ip_hdr(skb)->frag_off);
483 flags = offset & ~IP_OFFSET; 483 flags = offset & ~IP_OFFSET;
484 offset &= IP_OFFSET; 484 offset &= IP_OFFSET;
485 offset <<= 3; /* offset is in 8-byte chunks */ 485 offset <<= 3; /* offset is in 8-byte chunks */
486 ihl = skb->nh.iph->ihl * 4; 486 ihl = ip_hdrlen(skb);
487 487
488 /* Determine the position of this fragment. */ 488 /* Determine the position of this fragment. */
489 end = offset + skb->len - ihl; 489 end = offset + skb->len - ihl;
@@ -524,7 +524,7 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
524 * this fragment, right? 524 * this fragment, right?
525 */ 525 */
526 prev = NULL; 526 prev = NULL;
527 for(next = qp->fragments; next != NULL; next = next->next) { 527 for (next = qp->fragments; next != NULL; next = next->next) {
528 if (FRAG_CB(next)->offset >= offset) 528 if (FRAG_CB(next)->offset >= offset)
529 break; /* bingo! */ 529 break; /* bingo! */
530 prev = next; 530 prev = next;
@@ -592,7 +592,7 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
592 if (skb->dev) 592 if (skb->dev)
593 qp->iif = skb->dev->ifindex; 593 qp->iif = skb->dev->ifindex;
594 skb->dev = NULL; 594 skb->dev = NULL;
595 skb_get_timestamp(skb, &qp->stamp); 595 qp->stamp = skb->tstamp;
596 qp->meat += skb->len; 596 qp->meat += skb->len;
597 atomic_add(skb->truesize, &ip_frag_mem); 597 atomic_add(skb->truesize, &ip_frag_mem);
598 if (offset == 0) 598 if (offset == 0)
@@ -624,10 +624,10 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev)
624 BUG_TRAP(FRAG_CB(head)->offset == 0); 624 BUG_TRAP(FRAG_CB(head)->offset == 0);
625 625
626 /* Allocate a new buffer for the datagram. */ 626 /* Allocate a new buffer for the datagram. */
627 ihlen = head->nh.iph->ihl*4; 627 ihlen = ip_hdrlen(head);
628 len = ihlen + qp->len; 628 len = ihlen + qp->len;
629 629
630 if(len > 65535) 630 if (len > 65535)
631 goto out_oversize; 631 goto out_oversize;
632 632
633 /* Head of list must not be cloned. */ 633 /* Head of list must not be cloned. */
@@ -658,7 +658,7 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev)
658 } 658 }
659 659
660 skb_shinfo(head)->frag_list = head->next; 660 skb_shinfo(head)->frag_list = head->next;
661 skb_push(head, head->data - head->nh.raw); 661 skb_push(head, head->data - skb_network_header(head));
662 atomic_sub(head->truesize, &ip_frag_mem); 662 atomic_sub(head->truesize, &ip_frag_mem);
663 663
664 for (fp=head->next; fp; fp = fp->next) { 664 for (fp=head->next; fp; fp = fp->next) {
@@ -674,9 +674,9 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev)
674 674
675 head->next = NULL; 675 head->next = NULL;
676 head->dev = dev; 676 head->dev = dev;
677 skb_set_timestamp(head, &qp->stamp); 677 head->tstamp = qp->stamp;
678 678
679 iph = head->nh.iph; 679 iph = ip_hdr(head);
680 iph->frag_off = 0; 680 iph->frag_off = 0;
681 iph->tot_len = htons(len); 681 iph->tot_len = htons(len);
682 IP_INC_STATS_BH(IPSTATS_MIB_REASMOKS); 682 IP_INC_STATS_BH(IPSTATS_MIB_REASMOKS);
@@ -700,7 +700,6 @@ out_fail:
700/* Process an incoming IP datagram fragment. */ 700/* Process an incoming IP datagram fragment. */
701struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user) 701struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user)
702{ 702{
703 struct iphdr *iph = skb->nh.iph;
704 struct ipq *qp; 703 struct ipq *qp;
705 struct net_device *dev; 704 struct net_device *dev;
706 705
@@ -713,7 +712,7 @@ struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user)
713 dev = skb->dev; 712 dev = skb->dev;
714 713
715 /* Lookup (or create) queue header */ 714 /* Lookup (or create) queue header */
716 if ((qp = ip_find(iph, user)) != NULL) { 715 if ((qp = ip_find(ip_hdr(skb), user)) != NULL) {
717 struct sk_buff *ret = NULL; 716 struct sk_buff *ret = NULL;
718 717
719 spin_lock(&qp->lock); 718 spin_lock(&qp->lock);
@@ -734,7 +733,7 @@ struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user)
734 return NULL; 733 return NULL;
735} 734}
736 735
737void ipfrag_init(void) 736void __init ipfrag_init(void)
738{ 737{
739 ipfrag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^ 738 ipfrag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
740 (jiffies ^ (jiffies >> 6))); 739 (jiffies ^ (jiffies >> 6)));
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 9151da642318..63282934725e 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -191,11 +191,11 @@ static struct ip_tunnel * ipgre_tunnel_lookup(__be32 remote, __be32 local, __be3
191 return NULL; 191 return NULL;
192} 192}
193 193
194static struct ip_tunnel **ipgre_bucket(struct ip_tunnel *t) 194static struct ip_tunnel **__ipgre_bucket(struct ip_tunnel_parm *parms)
195{ 195{
196 __be32 remote = t->parms.iph.daddr; 196 __be32 remote = parms->iph.daddr;
197 __be32 local = t->parms.iph.saddr; 197 __be32 local = parms->iph.saddr;
198 __be32 key = t->parms.i_key; 198 __be32 key = parms->i_key;
199 unsigned h = HASH(key); 199 unsigned h = HASH(key);
200 int prio = 0; 200 int prio = 0;
201 201
@@ -209,6 +209,11 @@ static struct ip_tunnel **ipgre_bucket(struct ip_tunnel *t)
209 return &tunnels[prio][h]; 209 return &tunnels[prio][h];
210} 210}
211 211
212static inline struct ip_tunnel **ipgre_bucket(struct ip_tunnel *t)
213{
214 return __ipgre_bucket(&t->parms);
215}
216
212static void ipgre_tunnel_link(struct ip_tunnel *t) 217static void ipgre_tunnel_link(struct ip_tunnel *t)
213{ 218{
214 struct ip_tunnel **tp = ipgre_bucket(t); 219 struct ip_tunnel **tp = ipgre_bucket(t);
@@ -240,17 +245,9 @@ static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int
240 __be32 key = parms->i_key; 245 __be32 key = parms->i_key;
241 struct ip_tunnel *t, **tp, *nt; 246 struct ip_tunnel *t, **tp, *nt;
242 struct net_device *dev; 247 struct net_device *dev;
243 unsigned h = HASH(key);
244 int prio = 0;
245 char name[IFNAMSIZ]; 248 char name[IFNAMSIZ];
246 249
247 if (local) 250 for (tp = __ipgre_bucket(parms); (t = *tp) != NULL; tp = &t->next) {
248 prio |= 1;
249 if (remote && !MULTICAST(remote)) {
250 prio |= 2;
251 h ^= HASH(remote);
252 }
253 for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
254 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) { 251 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
255 if (key == t->parms.i_key) 252 if (key == t->parms.i_key)
256 return t; 253 return t;
@@ -320,8 +317,8 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
320 struct iphdr *iph = (struct iphdr*)skb->data; 317 struct iphdr *iph = (struct iphdr*)skb->data;
321 __be16 *p = (__be16*)(skb->data+(iph->ihl<<2)); 318 __be16 *p = (__be16*)(skb->data+(iph->ihl<<2));
322 int grehlen = (iph->ihl<<2) + 4; 319 int grehlen = (iph->ihl<<2) + 4;
323 int type = skb->h.icmph->type; 320 const int type = icmp_hdr(skb)->type;
324 int code = skb->h.icmph->code; 321 const int code = icmp_hdr(skb)->code;
325 struct ip_tunnel *t; 322 struct ip_tunnel *t;
326 __be16 flags; 323 __be16 flags;
327 324
@@ -388,8 +385,8 @@ out:
388 struct iphdr *iph = (struct iphdr*)dp; 385 struct iphdr *iph = (struct iphdr*)dp;
389 struct iphdr *eiph; 386 struct iphdr *eiph;
390 __be16 *p = (__be16*)(dp+(iph->ihl<<2)); 387 __be16 *p = (__be16*)(dp+(iph->ihl<<2));
391 int type = skb->h.icmph->type; 388 const int type = icmp_hdr(skb)->type;
392 int code = skb->h.icmph->code; 389 const int code = icmp_hdr(skb)->code;
393 int rel_type = 0; 390 int rel_type = 0;
394 int rel_code = 0; 391 int rel_code = 0;
395 __be32 rel_info = 0; 392 __be32 rel_info = 0;
@@ -422,7 +419,7 @@ out:
422 default: 419 default:
423 return; 420 return;
424 case ICMP_PARAMETERPROB: 421 case ICMP_PARAMETERPROB:
425 n = ntohl(skb->h.icmph->un.gateway) >> 24; 422 n = ntohl(icmp_hdr(skb)->un.gateway) >> 24;
426 if (n < (iph->ihl<<2)) 423 if (n < (iph->ihl<<2))
427 return; 424 return;
428 425
@@ -442,7 +439,7 @@ out:
442 return; 439 return;
443 case ICMP_FRAG_NEEDED: 440 case ICMP_FRAG_NEEDED:
444 /* And it is the only really necessary thing :-) */ 441 /* And it is the only really necessary thing :-) */
445 n = ntohs(skb->h.icmph->un.frag.mtu); 442 n = ntohs(icmp_hdr(skb)->un.frag.mtu);
446 if (n < grehlen+68) 443 if (n < grehlen+68)
447 return; 444 return;
448 n -= grehlen; 445 n -= grehlen;
@@ -474,7 +471,7 @@ out:
474 dst_release(skb2->dst); 471 dst_release(skb2->dst);
475 skb2->dst = NULL; 472 skb2->dst = NULL;
476 skb_pull(skb2, skb->data - (u8*)eiph); 473 skb_pull(skb2, skb->data - (u8*)eiph);
477 skb2->nh.raw = skb2->data; 474 skb_reset_network_header(skb2);
478 475
479 /* Try to guess incoming interface */ 476 /* Try to guess incoming interface */
480 memset(&fl, 0, sizeof(fl)); 477 memset(&fl, 0, sizeof(fl));
@@ -533,9 +530,9 @@ static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
533{ 530{
534 if (INET_ECN_is_ce(iph->tos)) { 531 if (INET_ECN_is_ce(iph->tos)) {
535 if (skb->protocol == htons(ETH_P_IP)) { 532 if (skb->protocol == htons(ETH_P_IP)) {
536 IP_ECN_set_ce(skb->nh.iph); 533 IP_ECN_set_ce(ip_hdr(skb));
537 } else if (skb->protocol == htons(ETH_P_IPV6)) { 534 } else if (skb->protocol == htons(ETH_P_IPV6)) {
538 IP6_ECN_set_ce(skb->nh.ipv6h); 535 IP6_ECN_set_ce(ipv6_hdr(skb));
539 } 536 }
540 } 537 }
541} 538}
@@ -565,7 +562,7 @@ static int ipgre_rcv(struct sk_buff *skb)
565 if (!pskb_may_pull(skb, 16)) 562 if (!pskb_may_pull(skb, 16))
566 goto drop_nolock; 563 goto drop_nolock;
567 564
568 iph = skb->nh.iph; 565 iph = ip_hdr(skb);
569 h = skb->data; 566 h = skb->data;
570 flags = *(__be16*)h; 567 flags = *(__be16*)h;
571 568
@@ -616,9 +613,10 @@ static int ipgre_rcv(struct sk_buff *skb)
616 offset += 4; 613 offset += 4;
617 } 614 }
618 615
619 skb->mac.raw = skb->nh.raw; 616 skb_reset_mac_header(skb);
620 skb->nh.raw = __pskb_pull(skb, offset); 617 __pskb_pull(skb, offset);
621 skb_postpull_rcsum(skb, skb->h.raw, offset); 618 skb_reset_network_header(skb);
619 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
622 skb->pkt_type = PACKET_HOST; 620 skb->pkt_type = PACKET_HOST;
623#ifdef CONFIG_NET_IPGRE_BROADCAST 621#ifdef CONFIG_NET_IPGRE_BROADCAST
624 if (MULTICAST(iph->daddr)) { 622 if (MULTICAST(iph->daddr)) {
@@ -669,7 +667,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
669{ 667{
670 struct ip_tunnel *tunnel = netdev_priv(dev); 668 struct ip_tunnel *tunnel = netdev_priv(dev);
671 struct net_device_stats *stats = &tunnel->stat; 669 struct net_device_stats *stats = &tunnel->stat;
672 struct iphdr *old_iph = skb->nh.iph; 670 struct iphdr *old_iph = ip_hdr(skb);
673 struct iphdr *tiph; 671 struct iphdr *tiph;
674 u8 tos; 672 u8 tos;
675 __be16 df; 673 __be16 df;
@@ -720,7 +718,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
720 addr_type = ipv6_addr_type(addr6); 718 addr_type = ipv6_addr_type(addr6);
721 719
722 if (addr_type == IPV6_ADDR_ANY) { 720 if (addr_type == IPV6_ADDR_ANY) {
723 addr6 = &skb->nh.ipv6h->daddr; 721 addr6 = &ipv6_hdr(skb)->daddr;
724 addr_type = ipv6_addr_type(addr6); 722 addr_type = ipv6_addr_type(addr6);
725 } 723 }
726 724
@@ -824,11 +822,12 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
824 skb_set_owner_w(new_skb, skb->sk); 822 skb_set_owner_w(new_skb, skb->sk);
825 dev_kfree_skb(skb); 823 dev_kfree_skb(skb);
826 skb = new_skb; 824 skb = new_skb;
827 old_iph = skb->nh.iph; 825 old_iph = ip_hdr(skb);
828 } 826 }
829 827
830 skb->h.raw = skb->nh.raw; 828 skb->transport_header = skb->network_header;
831 skb->nh.raw = skb_push(skb, gre_hlen); 829 skb_push(skb, gre_hlen);
830 skb_reset_network_header(skb);
832 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 831 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
833 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | 832 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
834 IPSKB_REROUTED); 833 IPSKB_REROUTED);
@@ -839,7 +838,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
839 * Push down and install the IPIP header. 838 * Push down and install the IPIP header.
840 */ 839 */
841 840
842 iph = skb->nh.iph; 841 iph = ip_hdr(skb);
843 iph->version = 4; 842 iph->version = 4;
844 iph->ihl = sizeof(struct iphdr) >> 2; 843 iph->ihl = sizeof(struct iphdr) >> 2;
845 iph->frag_off = df; 844 iph->frag_off = df;
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index f38e97647ac0..324e7e0fdb2a 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -158,7 +158,7 @@ DEFINE_SNMP_STAT(struct ipstats_mib, ip_statistics) __read_mostly;
158int ip_call_ra_chain(struct sk_buff *skb) 158int ip_call_ra_chain(struct sk_buff *skb)
159{ 159{
160 struct ip_ra_chain *ra; 160 struct ip_ra_chain *ra;
161 u8 protocol = skb->nh.iph->protocol; 161 u8 protocol = ip_hdr(skb)->protocol;
162 struct sock *last = NULL; 162 struct sock *last = NULL;
163 163
164 read_lock(&ip_ra_lock); 164 read_lock(&ip_ra_lock);
@@ -171,7 +171,7 @@ int ip_call_ra_chain(struct sk_buff *skb)
171 if (sk && inet_sk(sk)->num == protocol && 171 if (sk && inet_sk(sk)->num == protocol &&
172 (!sk->sk_bound_dev_if || 172 (!sk->sk_bound_dev_if ||
173 sk->sk_bound_dev_if == skb->dev->ifindex)) { 173 sk->sk_bound_dev_if == skb->dev->ifindex)) {
174 if (skb->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) { 174 if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
175 skb = ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN); 175 skb = ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN);
176 if (skb == NULL) { 176 if (skb == NULL) {
177 read_unlock(&ip_ra_lock); 177 read_unlock(&ip_ra_lock);
@@ -198,17 +198,15 @@ int ip_call_ra_chain(struct sk_buff *skb)
198 198
199static inline int ip_local_deliver_finish(struct sk_buff *skb) 199static inline int ip_local_deliver_finish(struct sk_buff *skb)
200{ 200{
201 int ihl = skb->nh.iph->ihl*4; 201 __skb_pull(skb, ip_hdrlen(skb));
202
203 __skb_pull(skb, ihl);
204 202
205 /* Point into the IP datagram, just past the header. */ 203 /* Point into the IP datagram, just past the header. */
206 skb->h.raw = skb->data; 204 skb_reset_transport_header(skb);
207 205
208 rcu_read_lock(); 206 rcu_read_lock();
209 { 207 {
210 /* Note: See raw.c and net/raw.h, RAWV4_HTABLE_SIZE==MAX_INET_PROTOS */ 208 /* Note: See raw.c and net/raw.h, RAWV4_HTABLE_SIZE==MAX_INET_PROTOS */
211 int protocol = skb->nh.iph->protocol; 209 int protocol = ip_hdr(skb)->protocol;
212 int hash; 210 int hash;
213 struct sock *raw_sk; 211 struct sock *raw_sk;
214 struct net_protocol *ipprot; 212 struct net_protocol *ipprot;
@@ -220,7 +218,7 @@ static inline int ip_local_deliver_finish(struct sk_buff *skb)
220 /* If there maybe a raw socket we must check - if not we 218 /* If there maybe a raw socket we must check - if not we
221 * don't care less 219 * don't care less
222 */ 220 */
223 if (raw_sk && !raw_v4_input(skb, skb->nh.iph, hash)) 221 if (raw_sk && !raw_v4_input(skb, ip_hdr(skb), hash))
224 raw_sk = NULL; 222 raw_sk = NULL;
225 223
226 if ((ipprot = rcu_dereference(inet_protos[hash])) != NULL) { 224 if ((ipprot = rcu_dereference(inet_protos[hash])) != NULL) {
@@ -266,7 +264,7 @@ int ip_local_deliver(struct sk_buff *skb)
266 * Reassemble IP fragments. 264 * Reassemble IP fragments.
267 */ 265 */
268 266
269 if (skb->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) { 267 if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
270 skb = ip_defrag(skb, IP_DEFRAG_LOCAL_DELIVER); 268 skb = ip_defrag(skb, IP_DEFRAG_LOCAL_DELIVER);
271 if (!skb) 269 if (!skb)
272 return 0; 270 return 0;
@@ -294,7 +292,7 @@ static inline int ip_rcv_options(struct sk_buff *skb)
294 goto drop; 292 goto drop;
295 } 293 }
296 294
297 iph = skb->nh.iph; 295 iph = ip_hdr(skb);
298 296
299 if (ip_options_compile(NULL, skb)) { 297 if (ip_options_compile(NULL, skb)) {
300 IP_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); 298 IP_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
@@ -330,7 +328,7 @@ drop:
330 328
331static inline int ip_rcv_finish(struct sk_buff *skb) 329static inline int ip_rcv_finish(struct sk_buff *skb)
332{ 330{
333 struct iphdr *iph = skb->nh.iph; 331 const struct iphdr *iph = ip_hdr(skb);
334 332
335 /* 333 /*
336 * Initialise the virtual path cache for the packet. It describes 334 * Initialise the virtual path cache for the packet. It describes
@@ -391,7 +389,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
391 if (!pskb_may_pull(skb, sizeof(struct iphdr))) 389 if (!pskb_may_pull(skb, sizeof(struct iphdr)))
392 goto inhdr_error; 390 goto inhdr_error;
393 391
394 iph = skb->nh.iph; 392 iph = ip_hdr(skb);
395 393
396 /* 394 /*
397 * RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the checksum. 395 * RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the checksum.
@@ -410,7 +408,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
410 if (!pskb_may_pull(skb, iph->ihl*4)) 408 if (!pskb_may_pull(skb, iph->ihl*4))
411 goto inhdr_error; 409 goto inhdr_error;
412 410
413 iph = skb->nh.iph; 411 iph = ip_hdr(skb);
414 412
415 if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) 413 if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
416 goto inhdr_error; 414 goto inhdr_error;
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index f906a80d5a87..251346828cb4 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -40,7 +40,7 @@
40void ip_options_build(struct sk_buff * skb, struct ip_options * opt, 40void ip_options_build(struct sk_buff * skb, struct ip_options * opt,
41 __be32 daddr, struct rtable *rt, int is_frag) 41 __be32 daddr, struct rtable *rt, int is_frag)
42{ 42{
43 unsigned char * iph = skb->nh.raw; 43 unsigned char *iph = skb_network_header(skb);
44 44
45 memcpy(&(IPCB(skb)->opt), opt, sizeof(struct ip_options)); 45 memcpy(&(IPCB(skb)->opt), opt, sizeof(struct ip_options));
46 memcpy(iph+sizeof(struct iphdr), opt->__data, opt->optlen); 46 memcpy(iph+sizeof(struct iphdr), opt->__data, opt->optlen);
@@ -104,13 +104,13 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
104 return 0; 104 return 0;
105 } 105 }
106 106
107 sptr = skb->nh.raw; 107 sptr = skb_network_header(skb);
108 dptr = dopt->__data; 108 dptr = dopt->__data;
109 109
110 if (skb->dst) 110 if (skb->dst)
111 daddr = ((struct rtable*)skb->dst)->rt_spec_dst; 111 daddr = ((struct rtable*)skb->dst)->rt_spec_dst;
112 else 112 else
113 daddr = skb->nh.iph->daddr; 113 daddr = ip_hdr(skb)->daddr;
114 114
115 if (sopt->rr) { 115 if (sopt->rr) {
116 optlen = sptr[sopt->rr+1]; 116 optlen = sptr[sopt->rr+1];
@@ -180,7 +180,8 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
180 /* 180 /*
181 * RFC1812 requires to fix illegal source routes. 181 * RFC1812 requires to fix illegal source routes.
182 */ 182 */
183 if (memcmp(&skb->nh.iph->saddr, &start[soffset+3], 4) == 0) 183 if (memcmp(&ip_hdr(skb)->saddr,
184 &start[soffset + 3], 4) == 0)
184 doffset -= 4; 185 doffset -= 4;
185 } 186 }
186 if (doffset > 3) { 187 if (doffset > 3) {
@@ -217,7 +218,7 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
217 218
218void ip_options_fragment(struct sk_buff * skb) 219void ip_options_fragment(struct sk_buff * skb)
219{ 220{
220 unsigned char * optptr = skb->nh.raw + sizeof(struct iphdr); 221 unsigned char *optptr = skb_network_header(skb) + sizeof(struct iphdr);
221 struct ip_options * opt = &(IPCB(skb)->opt); 222 struct ip_options * opt = &(IPCB(skb)->opt);
222 int l = opt->optlen; 223 int l = opt->optlen;
223 int optlen; 224 int optlen;
@@ -264,12 +265,13 @@ int ip_options_compile(struct ip_options * opt, struct sk_buff * skb)
264 265
265 if (!opt) { 266 if (!opt) {
266 opt = &(IPCB(skb)->opt); 267 opt = &(IPCB(skb)->opt);
267 iph = skb->nh.raw; 268 iph = skb_network_header(skb);
268 opt->optlen = ((struct iphdr *)iph)->ihl*4 - sizeof(struct iphdr); 269 opt->optlen = ((struct iphdr *)iph)->ihl*4 - sizeof(struct iphdr);
269 optptr = iph + sizeof(struct iphdr); 270 optptr = iph + sizeof(struct iphdr);
270 opt->is_data = 0; 271 opt->is_data = 0;
271 } else { 272 } else {
272 optptr = opt->is_data ? opt->__data : (unsigned char*)&(skb->nh.iph[1]); 273 optptr = opt->is_data ? opt->__data :
274 (unsigned char *)&(ip_hdr(skb)[1]);
273 iph = optptr - sizeof(struct iphdr); 275 iph = optptr - sizeof(struct iphdr);
274 } 276 }
275 277
@@ -563,7 +565,7 @@ void ip_forward_options(struct sk_buff *skb)
563 struct ip_options * opt = &(IPCB(skb)->opt); 565 struct ip_options * opt = &(IPCB(skb)->opt);
564 unsigned char * optptr; 566 unsigned char * optptr;
565 struct rtable *rt = (struct rtable*)skb->dst; 567 struct rtable *rt = (struct rtable*)skb->dst;
566 unsigned char *raw = skb->nh.raw; 568 unsigned char *raw = skb_network_header(skb);
567 569
568 if (opt->rr_needaddr) { 570 if (opt->rr_needaddr) {
569 optptr = (unsigned char *)raw + opt->rr; 571 optptr = (unsigned char *)raw + opt->rr;
@@ -587,7 +589,7 @@ void ip_forward_options(struct sk_buff *skb)
587 if (srrptr + 3 <= srrspace) { 589 if (srrptr + 3 <= srrspace) {
588 opt->is_changed = 1; 590 opt->is_changed = 1;
589 ip_rt_get_source(&optptr[srrptr-1], rt); 591 ip_rt_get_source(&optptr[srrptr-1], rt);
590 skb->nh.iph->daddr = rt->rt_dst; 592 ip_hdr(skb)->daddr = rt->rt_dst;
591 optptr[2] = srrptr+4; 593 optptr[2] = srrptr+4;
592 } else if (net_ratelimit()) 594 } else if (net_ratelimit())
593 printk(KERN_CRIT "ip_forward(): Argh! Destination lost!\n"); 595 printk(KERN_CRIT "ip_forward(): Argh! Destination lost!\n");
@@ -599,7 +601,7 @@ void ip_forward_options(struct sk_buff *skb)
599 } 601 }
600 if (opt->is_changed) { 602 if (opt->is_changed) {
601 opt->is_changed = 0; 603 opt->is_changed = 0;
602 ip_send_check(skb->nh.iph); 604 ip_send_check(ip_hdr(skb));
603 } 605 }
604} 606}
605 607
@@ -608,8 +610,8 @@ int ip_options_rcv_srr(struct sk_buff *skb)
608 struct ip_options *opt = &(IPCB(skb)->opt); 610 struct ip_options *opt = &(IPCB(skb)->opt);
609 int srrspace, srrptr; 611 int srrspace, srrptr;
610 __be32 nexthop; 612 __be32 nexthop;
611 struct iphdr *iph = skb->nh.iph; 613 struct iphdr *iph = ip_hdr(skb);
612 unsigned char * optptr = skb->nh.raw + opt->srr; 614 unsigned char *optptr = skb_network_header(skb) + opt->srr;
613 struct rtable *rt = (struct rtable*)skb->dst; 615 struct rtable *rt = (struct rtable*)skb->dst;
614 struct rtable *rt2; 616 struct rtable *rt2;
615 int err; 617 int err;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index d096332f6c6d..534650cad3a8 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -95,8 +95,8 @@ __inline__ void ip_send_check(struct iphdr *iph)
95/* dev_loopback_xmit for use with netfilter. */ 95/* dev_loopback_xmit for use with netfilter. */
96static int ip_dev_loopback_xmit(struct sk_buff *newskb) 96static int ip_dev_loopback_xmit(struct sk_buff *newskb)
97{ 97{
98 newskb->mac.raw = newskb->data; 98 skb_reset_mac_header(newskb);
99 __skb_pull(newskb, newskb->nh.raw - newskb->data); 99 __skb_pull(newskb, skb_network_offset(newskb));
100 newskb->pkt_type = PACKET_LOOPBACK; 100 newskb->pkt_type = PACKET_LOOPBACK;
101 newskb->ip_summed = CHECKSUM_UNNECESSARY; 101 newskb->ip_summed = CHECKSUM_UNNECESSARY;
102 BUG_TRAP(newskb->dst); 102 BUG_TRAP(newskb->dst);
@@ -125,11 +125,9 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
125 struct iphdr *iph; 125 struct iphdr *iph;
126 126
127 /* Build the IP header. */ 127 /* Build the IP header. */
128 if (opt) 128 skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
129 iph=(struct iphdr *)skb_push(skb,sizeof(struct iphdr) + opt->optlen); 129 skb_reset_network_header(skb);
130 else 130 iph = ip_hdr(skb);
131 iph=(struct iphdr *)skb_push(skb,sizeof(struct iphdr));
132
133 iph->version = 4; 131 iph->version = 4;
134 iph->ihl = 5; 132 iph->ihl = 5;
135 iph->tos = inet->tos; 133 iph->tos = inet->tos;
@@ -143,7 +141,6 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
143 iph->protocol = sk->sk_protocol; 141 iph->protocol = sk->sk_protocol;
144 iph->tot_len = htons(skb->len); 142 iph->tot_len = htons(skb->len);
145 ip_select_ident(iph, &rt->u.dst, sk); 143 ip_select_ident(iph, &rt->u.dst, sk);
146 skb->nh.iph = iph;
147 144
148 if (opt && opt->optlen) { 145 if (opt && opt->optlen) {
149 iph->ihl += opt->optlen>>2; 146 iph->ihl += opt->optlen>>2;
@@ -192,6 +189,14 @@ static inline int ip_finish_output2(struct sk_buff *skb)
192 return -EINVAL; 189 return -EINVAL;
193} 190}
194 191
192static inline int ip_skb_dst_mtu(struct sk_buff *skb)
193{
194 struct inet_sock *inet = skb->sk ? inet_sk(skb->sk) : NULL;
195
196 return (inet && inet->pmtudisc == IP_PMTUDISC_PROBE) ?
197 skb->dst->dev->mtu : dst_mtu(skb->dst);
198}
199
195static inline int ip_finish_output(struct sk_buff *skb) 200static inline int ip_finish_output(struct sk_buff *skb)
196{ 201{
197#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) 202#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
@@ -201,7 +206,7 @@ static inline int ip_finish_output(struct sk_buff *skb)
201 return dst_output(skb); 206 return dst_output(skb);
202 } 207 }
203#endif 208#endif
204 if (skb->len > dst_mtu(skb->dst) && !skb_is_gso(skb)) 209 if (skb->len > ip_skb_dst_mtu(skb) && !skb_is_gso(skb))
205 return ip_fragment(skb, ip_finish_output2); 210 return ip_fragment(skb, ip_finish_output2);
206 else 211 else
207 return ip_finish_output2(skb); 212 return ip_finish_output2(skb);
@@ -248,7 +253,7 @@ int ip_mc_output(struct sk_buff *skb)
248 253
249 /* Multicasts with ttl 0 must not go beyond the host */ 254 /* Multicasts with ttl 0 must not go beyond the host */
250 255
251 if (skb->nh.iph->ttl == 0) { 256 if (ip_hdr(skb)->ttl == 0) {
252 kfree_skb(skb); 257 kfree_skb(skb);
253 return 0; 258 return 0;
254 } 259 }
@@ -333,7 +338,9 @@ packet_routed:
333 goto no_route; 338 goto no_route;
334 339
335 /* OK, we know where to send it, allocate and build IP header. */ 340 /* OK, we know where to send it, allocate and build IP header. */
336 iph = (struct iphdr *) skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0)); 341 skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
342 skb_reset_network_header(skb);
343 iph = ip_hdr(skb);
337 *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff)); 344 *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
338 iph->tot_len = htons(skb->len); 345 iph->tot_len = htons(skb->len);
339 if (ip_dont_fragment(sk, &rt->u.dst) && !ipfragok) 346 if (ip_dont_fragment(sk, &rt->u.dst) && !ipfragok)
@@ -344,7 +351,6 @@ packet_routed:
344 iph->protocol = sk->sk_protocol; 351 iph->protocol = sk->sk_protocol;
345 iph->saddr = rt->rt_src; 352 iph->saddr = rt->rt_src;
346 iph->daddr = rt->rt_dst; 353 iph->daddr = rt->rt_dst;
347 skb->nh.iph = iph;
348 /* Transport layer set skb->h.foo itself. */ 354 /* Transport layer set skb->h.foo itself. */
349 355
350 if (opt && opt->optlen) { 356 if (opt && opt->optlen) {
@@ -386,21 +392,10 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
386#ifdef CONFIG_NET_SCHED 392#ifdef CONFIG_NET_SCHED
387 to->tc_index = from->tc_index; 393 to->tc_index = from->tc_index;
388#endif 394#endif
389#ifdef CONFIG_NETFILTER 395 nf_copy(to, from);
390 /* Connection association is same as pre-frag packet */
391 nf_conntrack_put(to->nfct);
392 to->nfct = from->nfct;
393 nf_conntrack_get(to->nfct);
394 to->nfctinfo = from->nfctinfo;
395#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) 396#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
396 to->ipvs_property = from->ipvs_property; 397 to->ipvs_property = from->ipvs_property;
397#endif 398#endif
398#ifdef CONFIG_BRIDGE_NETFILTER
399 nf_bridge_put(to->nf_bridge);
400 to->nf_bridge = from->nf_bridge;
401 nf_bridge_get(to->nf_bridge);
402#endif
403#endif
404 skb_copy_secmark(to, from); 399 skb_copy_secmark(to, from);
405} 400}
406 401
@@ -430,12 +425,12 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
430 * Point into the IP datagram header. 425 * Point into the IP datagram header.
431 */ 426 */
432 427
433 iph = skb->nh.iph; 428 iph = ip_hdr(skb);
434 429
435 if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) { 430 if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) {
436 IP_INC_STATS(IPSTATS_MIB_FRAGFAILS); 431 IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
437 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, 432 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
438 htonl(dst_mtu(&rt->u.dst))); 433 htonl(ip_skb_dst_mtu(skb)));
439 kfree_skb(skb); 434 kfree_skb(skb);
440 return -EMSGSIZE; 435 return -EMSGSIZE;
441 } 436 }
@@ -502,10 +497,11 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
502 * before previous one went down. */ 497 * before previous one went down. */
503 if (frag) { 498 if (frag) {
504 frag->ip_summed = CHECKSUM_NONE; 499 frag->ip_summed = CHECKSUM_NONE;
505 frag->h.raw = frag->data; 500 skb_reset_transport_header(frag);
506 frag->nh.raw = __skb_push(frag, hlen); 501 __skb_push(frag, hlen);
507 memcpy(frag->nh.raw, iph, hlen); 502 skb_reset_network_header(frag);
508 iph = frag->nh.iph; 503 memcpy(skb_network_header(frag), iph, hlen);
504 iph = ip_hdr(frag);
509 iph->tot_len = htons(frag->len); 505 iph->tot_len = htons(frag->len);
510 ip_copy_metadata(frag, skb); 506 ip_copy_metadata(frag, skb);
511 if (offset == 0) 507 if (offset == 0)
@@ -566,7 +562,7 @@ slow_path:
566 * Keep copying data until we run out. 562 * Keep copying data until we run out.
567 */ 563 */
568 564
569 while(left > 0) { 565 while (left > 0) {
570 len = left; 566 len = left;
571 /* IF: it doesn't fit, use 'mtu' - the data space left */ 567 /* IF: it doesn't fit, use 'mtu' - the data space left */
572 if (len > mtu) 568 if (len > mtu)
@@ -593,8 +589,8 @@ slow_path:
593 ip_copy_metadata(skb2, skb); 589 ip_copy_metadata(skb2, skb);
594 skb_reserve(skb2, ll_rs); 590 skb_reserve(skb2, ll_rs);
595 skb_put(skb2, len + hlen); 591 skb_put(skb2, len + hlen);
596 skb2->nh.raw = skb2->data; 592 skb_reset_network_header(skb2);
597 skb2->h.raw = skb2->data + hlen; 593 skb2->transport_header = skb2->network_header + hlen;
598 594
599 /* 595 /*
600 * Charge the memory for the fragment to any owner 596 * Charge the memory for the fragment to any owner
@@ -608,19 +604,19 @@ slow_path:
608 * Copy the packet header into the new buffer. 604 * Copy the packet header into the new buffer.
609 */ 605 */
610 606
611 memcpy(skb2->nh.raw, skb->data, hlen); 607 skb_copy_from_linear_data(skb, skb_network_header(skb2), hlen);
612 608
613 /* 609 /*
614 * Copy a block of the IP datagram. 610 * Copy a block of the IP datagram.
615 */ 611 */
616 if (skb_copy_bits(skb, ptr, skb2->h.raw, len)) 612 if (skb_copy_bits(skb, ptr, skb_transport_header(skb2), len))
617 BUG(); 613 BUG();
618 left -= len; 614 left -= len;
619 615
620 /* 616 /*
621 * Fill in the new header fields. 617 * Fill in the new header fields.
622 */ 618 */
623 iph = skb2->nh.iph; 619 iph = ip_hdr(skb2);
624 iph->frag_off = htons((offset >> 3)); 620 iph->frag_off = htons((offset >> 3));
625 621
626 /* ANK: dirty, but effective trick. Upgrade options only if 622 /* ANK: dirty, but effective trick. Upgrade options only if
@@ -722,10 +718,10 @@ static inline int ip_ufo_append_data(struct sock *sk,
722 skb_put(skb,fragheaderlen + transhdrlen); 718 skb_put(skb,fragheaderlen + transhdrlen);
723 719
724 /* initialize network header pointer */ 720 /* initialize network header pointer */
725 skb->nh.raw = skb->data; 721 skb_reset_network_header(skb);
726 722
727 /* initialize protocol header pointer */ 723 /* initialize protocol header pointer */
728 skb->h.raw = skb->data + fragheaderlen; 724 skb->transport_header = skb->network_header + fragheaderlen;
729 725
730 skb->ip_summed = CHECKSUM_PARTIAL; 726 skb->ip_summed = CHECKSUM_PARTIAL;
731 skb->csum = 0; 727 skb->csum = 0;
@@ -799,7 +795,9 @@ int ip_append_data(struct sock *sk,
799 inet->cork.addr = ipc->addr; 795 inet->cork.addr = ipc->addr;
800 } 796 }
801 dst_hold(&rt->u.dst); 797 dst_hold(&rt->u.dst);
802 inet->cork.fragsize = mtu = dst_mtu(rt->u.dst.path); 798 inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ?
799 rt->u.dst.dev->mtu :
800 dst_mtu(rt->u.dst.path);
803 inet->cork.rt = rt; 801 inet->cork.rt = rt;
804 inet->cork.length = 0; 802 inet->cork.length = 0;
805 sk->sk_sndmsg_page = NULL; 803 sk->sk_sndmsg_page = NULL;
@@ -929,9 +927,10 @@ alloc_new_skb:
929 * Find where to start putting bytes. 927 * Find where to start putting bytes.
930 */ 928 */
931 data = skb_put(skb, fraglen); 929 data = skb_put(skb, fraglen);
932 skb->nh.raw = data + exthdrlen; 930 skb_set_network_header(skb, exthdrlen);
931 skb->transport_header = (skb->network_header +
932 fragheaderlen);
933 data += fragheaderlen; 933 data += fragheaderlen;
934 skb->h.raw = data + exthdrlen;
935 934
936 if (fraggap) { 935 if (fraggap) {
937 skb->csum = skb_copy_and_csum_bits( 936 skb->csum = skb_copy_and_csum_bits(
@@ -1100,8 +1099,6 @@ ssize_t ip_append_page(struct sock *sk, struct page *page,
1100 } 1099 }
1101 if (len <= 0) { 1100 if (len <= 0) {
1102 struct sk_buff *skb_prev; 1101 struct sk_buff *skb_prev;
1103 char *data;
1104 struct iphdr *iph;
1105 int alloclen; 1102 int alloclen;
1106 1103
1107 skb_prev = skb; 1104 skb_prev = skb;
@@ -1124,15 +1121,15 @@ ssize_t ip_append_page(struct sock *sk, struct page *page,
1124 /* 1121 /*
1125 * Find where to start putting bytes. 1122 * Find where to start putting bytes.
1126 */ 1123 */
1127 data = skb_put(skb, fragheaderlen + fraggap); 1124 skb_put(skb, fragheaderlen + fraggap);
1128 skb->nh.iph = iph = (struct iphdr *)data; 1125 skb_reset_network_header(skb);
1129 data += fragheaderlen; 1126 skb->transport_header = (skb->network_header +
1130 skb->h.raw = data; 1127 fragheaderlen);
1131
1132 if (fraggap) { 1128 if (fraggap) {
1133 skb->csum = skb_copy_and_csum_bits( 1129 skb->csum = skb_copy_and_csum_bits(skb_prev,
1134 skb_prev, maxfraglen, 1130 maxfraglen,
1135 data, fraggap, 0); 1131 skb_transport_header(skb),
1132 fraggap, 0);
1136 skb_prev->csum = csum_sub(skb_prev->csum, 1133 skb_prev->csum = csum_sub(skb_prev->csum,
1137 skb->csum); 1134 skb->csum);
1138 pskb_trim_unique(skb_prev, maxfraglen); 1135 pskb_trim_unique(skb_prev, maxfraglen);
@@ -1198,10 +1195,10 @@ int ip_push_pending_frames(struct sock *sk)
1198 tail_skb = &(skb_shinfo(skb)->frag_list); 1195 tail_skb = &(skb_shinfo(skb)->frag_list);
1199 1196
1200 /* move skb->data to ip header from ext header */ 1197 /* move skb->data to ip header from ext header */
1201 if (skb->data < skb->nh.raw) 1198 if (skb->data < skb_network_header(skb))
1202 __skb_pull(skb, skb->nh.raw - skb->data); 1199 __skb_pull(skb, skb_network_offset(skb));
1203 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { 1200 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1204 __skb_pull(tmp_skb, skb->h.raw - skb->nh.raw); 1201 __skb_pull(tmp_skb, skb_network_header_len(skb));
1205 *tail_skb = tmp_skb; 1202 *tail_skb = tmp_skb;
1206 tail_skb = &(tmp_skb->next); 1203 tail_skb = &(tmp_skb->next);
1207 skb->len += tmp_skb->len; 1204 skb->len += tmp_skb->len;
@@ -1216,13 +1213,13 @@ int ip_push_pending_frames(struct sock *sk)
1216 * to fragment the frame generated here. No matter, what transforms 1213 * to fragment the frame generated here. No matter, what transforms
1217 * how transforms change size of the packet, it will come out. 1214 * how transforms change size of the packet, it will come out.
1218 */ 1215 */
1219 if (inet->pmtudisc != IP_PMTUDISC_DO) 1216 if (inet->pmtudisc < IP_PMTUDISC_DO)
1220 skb->local_df = 1; 1217 skb->local_df = 1;
1221 1218
1222 /* DF bit is set when we want to see DF on outgoing frames. 1219 /* DF bit is set when we want to see DF on outgoing frames.
1223 * If local_df is set too, we still allow to fragment this frame 1220 * If local_df is set too, we still allow to fragment this frame
1224 * locally. */ 1221 * locally. */
1225 if (inet->pmtudisc == IP_PMTUDISC_DO || 1222 if (inet->pmtudisc >= IP_PMTUDISC_DO ||
1226 (skb->len <= dst_mtu(&rt->u.dst) && 1223 (skb->len <= dst_mtu(&rt->u.dst) &&
1227 ip_dont_fragment(sk, &rt->u.dst))) 1224 ip_dont_fragment(sk, &rt->u.dst)))
1228 df = htons(IP_DF); 1225 df = htons(IP_DF);
@@ -1352,11 +1349,11 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
1352 struct flowi fl = { .nl_u = { .ip4_u = 1349 struct flowi fl = { .nl_u = { .ip4_u =
1353 { .daddr = daddr, 1350 { .daddr = daddr,
1354 .saddr = rt->rt_spec_dst, 1351 .saddr = rt->rt_spec_dst,
1355 .tos = RT_TOS(skb->nh.iph->tos) } }, 1352 .tos = RT_TOS(ip_hdr(skb)->tos) } },
1356 /* Not quite clean, but right. */ 1353 /* Not quite clean, but right. */
1357 .uli_u = { .ports = 1354 .uli_u = { .ports =
1358 { .sport = skb->h.th->dest, 1355 { .sport = tcp_hdr(skb)->dest,
1359 .dport = skb->h.th->source } }, 1356 .dport = tcp_hdr(skb)->source } },
1360 .proto = sk->sk_protocol }; 1357 .proto = sk->sk_protocol };
1361 security_skb_classify_flow(skb, &fl); 1358 security_skb_classify_flow(skb, &fl);
1362 if (ip_route_output_key(&rt, &fl)) 1359 if (ip_route_output_key(&rt, &fl))
@@ -1370,14 +1367,16 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
1370 with locally disabled BH and that sk cannot be already spinlocked. 1367 with locally disabled BH and that sk cannot be already spinlocked.
1371 */ 1368 */
1372 bh_lock_sock(sk); 1369 bh_lock_sock(sk);
1373 inet->tos = skb->nh.iph->tos; 1370 inet->tos = ip_hdr(skb)->tos;
1374 sk->sk_priority = skb->priority; 1371 sk->sk_priority = skb->priority;
1375 sk->sk_protocol = skb->nh.iph->protocol; 1372 sk->sk_protocol = ip_hdr(skb)->protocol;
1376 ip_append_data(sk, ip_reply_glue_bits, arg->iov->iov_base, len, 0, 1373 ip_append_data(sk, ip_reply_glue_bits, arg->iov->iov_base, len, 0,
1377 &ipc, rt, MSG_DONTWAIT); 1374 &ipc, rt, MSG_DONTWAIT);
1378 if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) { 1375 if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
1379 if (arg->csumoffset >= 0) 1376 if (arg->csumoffset >= 0)
1380 *((__sum16 *)skb->h.raw + arg->csumoffset) = csum_fold(csum_add(skb->csum, arg->csum)); 1377 *((__sum16 *)skb_transport_header(skb) +
1378 arg->csumoffset) = csum_fold(csum_add(skb->csum,
1379 arg->csum));
1381 skb->ip_summed = CHECKSUM_NONE; 1380 skb->ip_summed = CHECKSUM_NONE;
1382 ip_push_pending_frames(sk); 1381 ip_push_pending_frames(sk);
1383 } 1382 }
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 23048d9f3584..4d544573f48a 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -59,7 +59,7 @@ static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
59 struct in_pktinfo info; 59 struct in_pktinfo info;
60 struct rtable *rt = (struct rtable *)skb->dst; 60 struct rtable *rt = (struct rtable *)skb->dst;
61 61
62 info.ipi_addr.s_addr = skb->nh.iph->daddr; 62 info.ipi_addr.s_addr = ip_hdr(skb)->daddr;
63 if (rt) { 63 if (rt) {
64 info.ipi_ifindex = rt->rt_iif; 64 info.ipi_ifindex = rt->rt_iif;
65 info.ipi_spec_dst.s_addr = rt->rt_spec_dst; 65 info.ipi_spec_dst.s_addr = rt->rt_spec_dst;
@@ -73,13 +73,13 @@ static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
73 73
74static void ip_cmsg_recv_ttl(struct msghdr *msg, struct sk_buff *skb) 74static void ip_cmsg_recv_ttl(struct msghdr *msg, struct sk_buff *skb)
75{ 75{
76 int ttl = skb->nh.iph->ttl; 76 int ttl = ip_hdr(skb)->ttl;
77 put_cmsg(msg, SOL_IP, IP_TTL, sizeof(int), &ttl); 77 put_cmsg(msg, SOL_IP, IP_TTL, sizeof(int), &ttl);
78} 78}
79 79
80static void ip_cmsg_recv_tos(struct msghdr *msg, struct sk_buff *skb) 80static void ip_cmsg_recv_tos(struct msghdr *msg, struct sk_buff *skb)
81{ 81{
82 put_cmsg(msg, SOL_IP, IP_TOS, 1, &skb->nh.iph->tos); 82 put_cmsg(msg, SOL_IP, IP_TOS, 1, &ip_hdr(skb)->tos);
83} 83}
84 84
85static void ip_cmsg_recv_opts(struct msghdr *msg, struct sk_buff *skb) 85static void ip_cmsg_recv_opts(struct msghdr *msg, struct sk_buff *skb)
@@ -87,7 +87,8 @@ static void ip_cmsg_recv_opts(struct msghdr *msg, struct sk_buff *skb)
87 if (IPCB(skb)->opt.optlen == 0) 87 if (IPCB(skb)->opt.optlen == 0)
88 return; 88 return;
89 89
90 put_cmsg(msg, SOL_IP, IP_RECVOPTS, IPCB(skb)->opt.optlen, skb->nh.iph+1); 90 put_cmsg(msg, SOL_IP, IP_RECVOPTS, IPCB(skb)->opt.optlen,
91 ip_hdr(skb) + 1);
91} 92}
92 93
93 94
@@ -268,18 +269,21 @@ void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
268 serr = SKB_EXT_ERR(skb); 269 serr = SKB_EXT_ERR(skb);
269 serr->ee.ee_errno = err; 270 serr->ee.ee_errno = err;
270 serr->ee.ee_origin = SO_EE_ORIGIN_ICMP; 271 serr->ee.ee_origin = SO_EE_ORIGIN_ICMP;
271 serr->ee.ee_type = skb->h.icmph->type; 272 serr->ee.ee_type = icmp_hdr(skb)->type;
272 serr->ee.ee_code = skb->h.icmph->code; 273 serr->ee.ee_code = icmp_hdr(skb)->code;
273 serr->ee.ee_pad = 0; 274 serr->ee.ee_pad = 0;
274 serr->ee.ee_info = info; 275 serr->ee.ee_info = info;
275 serr->ee.ee_data = 0; 276 serr->ee.ee_data = 0;
276 serr->addr_offset = (u8*)&(((struct iphdr*)(skb->h.icmph+1))->daddr) - skb->nh.raw; 277 serr->addr_offset = (u8 *)&(((struct iphdr *)(icmp_hdr(skb) + 1))->daddr) -
278 skb_network_header(skb);
277 serr->port = port; 279 serr->port = port;
278 280
279 skb->h.raw = payload; 281 if (skb_pull(skb, payload - skb->data) != NULL) {
280 if (!skb_pull(skb, payload - skb->data) || 282 skb_reset_transport_header(skb);
281 sock_queue_err_skb(sk, skb)) 283 if (sock_queue_err_skb(sk, skb) == 0)
282 kfree_skb(skb); 284 return;
285 }
286 kfree_skb(skb);
283} 287}
284 288
285void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 info) 289void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 info)
@@ -296,8 +300,9 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 inf
296 if (!skb) 300 if (!skb)
297 return; 301 return;
298 302
299 iph = (struct iphdr*)skb_put(skb, sizeof(struct iphdr)); 303 skb_put(skb, sizeof(struct iphdr));
300 skb->nh.iph = iph; 304 skb_reset_network_header(skb);
305 iph = ip_hdr(skb);
301 iph->daddr = daddr; 306 iph->daddr = daddr;
302 307
303 serr = SKB_EXT_ERR(skb); 308 serr = SKB_EXT_ERR(skb);
@@ -308,11 +313,11 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 inf
308 serr->ee.ee_pad = 0; 313 serr->ee.ee_pad = 0;
309 serr->ee.ee_info = info; 314 serr->ee.ee_info = info;
310 serr->ee.ee_data = 0; 315 serr->ee.ee_data = 0;
311 serr->addr_offset = (u8*)&iph->daddr - skb->nh.raw; 316 serr->addr_offset = (u8 *)&iph->daddr - skb_network_header(skb);
312 serr->port = port; 317 serr->port = port;
313 318
314 skb->h.raw = skb->tail; 319 __skb_pull(skb, skb_tail_pointer(skb) - skb->data);
315 __skb_pull(skb, skb->tail - skb->data); 320 skb_reset_transport_header(skb);
316 321
317 if (sock_queue_err_skb(sk, skb)) 322 if (sock_queue_err_skb(sk, skb))
318 kfree_skb(skb); 323 kfree_skb(skb);
@@ -354,7 +359,8 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len)
354 sin = (struct sockaddr_in *)msg->msg_name; 359 sin = (struct sockaddr_in *)msg->msg_name;
355 if (sin) { 360 if (sin) {
356 sin->sin_family = AF_INET; 361 sin->sin_family = AF_INET;
357 sin->sin_addr.s_addr = *(__be32*)(skb->nh.raw + serr->addr_offset); 362 sin->sin_addr.s_addr = *(__be32 *)(skb_network_header(skb) +
363 serr->addr_offset);
358 sin->sin_port = serr->port; 364 sin->sin_port = serr->port;
359 memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); 365 memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
360 } 366 }
@@ -366,7 +372,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len)
366 struct inet_sock *inet = inet_sk(sk); 372 struct inet_sock *inet = inet_sk(sk);
367 373
368 sin->sin_family = AF_INET; 374 sin->sin_family = AF_INET;
369 sin->sin_addr.s_addr = skb->nh.iph->saddr; 375 sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
370 sin->sin_port = 0; 376 sin->sin_port = 0;
371 memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); 377 memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
372 if (inet->cmsg_flags) 378 if (inet->cmsg_flags)
@@ -403,20 +409,20 @@ out:
403 */ 409 */
404 410
405static int do_ip_setsockopt(struct sock *sk, int level, 411static int do_ip_setsockopt(struct sock *sk, int level,
406 int optname, char __user *optval, int optlen) 412 int optname, char __user *optval, int optlen)
407{ 413{
408 struct inet_sock *inet = inet_sk(sk); 414 struct inet_sock *inet = inet_sk(sk);
409 int val=0,err; 415 int val=0,err;
410 416
411 if (((1<<optname) & ((1<<IP_PKTINFO) | (1<<IP_RECVTTL) | 417 if (((1<<optname) & ((1<<IP_PKTINFO) | (1<<IP_RECVTTL) |
412 (1<<IP_RECVOPTS) | (1<<IP_RECVTOS) | 418 (1<<IP_RECVOPTS) | (1<<IP_RECVTOS) |
413 (1<<IP_RETOPTS) | (1<<IP_TOS) | 419 (1<<IP_RETOPTS) | (1<<IP_TOS) |
414 (1<<IP_TTL) | (1<<IP_HDRINCL) | 420 (1<<IP_TTL) | (1<<IP_HDRINCL) |
415 (1<<IP_MTU_DISCOVER) | (1<<IP_RECVERR) | 421 (1<<IP_MTU_DISCOVER) | (1<<IP_RECVERR) |
416 (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) | 422 (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) |
417 (1<<IP_PASSSEC))) || 423 (1<<IP_PASSSEC))) ||
418 optname == IP_MULTICAST_TTL || 424 optname == IP_MULTICAST_TTL ||
419 optname == IP_MULTICAST_LOOP) { 425 optname == IP_MULTICAST_LOOP) {
420 if (optlen >= sizeof(int)) { 426 if (optlen >= sizeof(int)) {
421 if (get_user(val, (int __user *) optval)) 427 if (get_user(val, (int __user *) optval))
422 return -EFAULT; 428 return -EFAULT;
@@ -440,444 +446,444 @@ static int do_ip_setsockopt(struct sock *sk, int level,
440 lock_sock(sk); 446 lock_sock(sk);
441 447
442 switch (optname) { 448 switch (optname) {
443 case IP_OPTIONS: 449 case IP_OPTIONS:
444 { 450 {
445 struct ip_options * opt = NULL; 451 struct ip_options * opt = NULL;
446 if (optlen > 40 || optlen < 0) 452 if (optlen > 40 || optlen < 0)
447 goto e_inval; 453 goto e_inval;
448 err = ip_options_get_from_user(&opt, optval, optlen); 454 err = ip_options_get_from_user(&opt, optval, optlen);
449 if (err) 455 if (err)
450 break; 456 break;
451 if (inet->is_icsk) { 457 if (inet->is_icsk) {
452 struct inet_connection_sock *icsk = inet_csk(sk); 458 struct inet_connection_sock *icsk = inet_csk(sk);
453#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 459#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
454 if (sk->sk_family == PF_INET || 460 if (sk->sk_family == PF_INET ||
455 (!((1 << sk->sk_state) & 461 (!((1 << sk->sk_state) &
456 (TCPF_LISTEN | TCPF_CLOSE)) && 462 (TCPF_LISTEN | TCPF_CLOSE)) &&
457 inet->daddr != LOOPBACK4_IPV6)) { 463 inet->daddr != LOOPBACK4_IPV6)) {
458#endif 464#endif
459 if (inet->opt) 465 if (inet->opt)
460 icsk->icsk_ext_hdr_len -= inet->opt->optlen; 466 icsk->icsk_ext_hdr_len -= inet->opt->optlen;
461 if (opt) 467 if (opt)
462 icsk->icsk_ext_hdr_len += opt->optlen; 468 icsk->icsk_ext_hdr_len += opt->optlen;
463 icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie); 469 icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
464#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 470#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
465 }
466#endif
467 } 471 }
468 opt = xchg(&inet->opt, opt); 472#endif
469 kfree(opt);
470 break;
471 } 473 }
472 case IP_PKTINFO: 474 opt = xchg(&inet->opt, opt);
473 if (val) 475 kfree(opt);
474 inet->cmsg_flags |= IP_CMSG_PKTINFO; 476 break;
475 else 477 }
476 inet->cmsg_flags &= ~IP_CMSG_PKTINFO; 478 case IP_PKTINFO:
477 break; 479 if (val)
478 case IP_RECVTTL: 480 inet->cmsg_flags |= IP_CMSG_PKTINFO;
479 if (val) 481 else
480 inet->cmsg_flags |= IP_CMSG_TTL; 482 inet->cmsg_flags &= ~IP_CMSG_PKTINFO;
481 else 483 break;
482 inet->cmsg_flags &= ~IP_CMSG_TTL; 484 case IP_RECVTTL:
483 break; 485 if (val)
484 case IP_RECVTOS: 486 inet->cmsg_flags |= IP_CMSG_TTL;
485 if (val) 487 else
486 inet->cmsg_flags |= IP_CMSG_TOS; 488 inet->cmsg_flags &= ~IP_CMSG_TTL;
487 else 489 break;
488 inet->cmsg_flags &= ~IP_CMSG_TOS; 490 case IP_RECVTOS:
489 break; 491 if (val)
490 case IP_RECVOPTS: 492 inet->cmsg_flags |= IP_CMSG_TOS;
491 if (val) 493 else
492 inet->cmsg_flags |= IP_CMSG_RECVOPTS; 494 inet->cmsg_flags &= ~IP_CMSG_TOS;
493 else 495 break;
494 inet->cmsg_flags &= ~IP_CMSG_RECVOPTS; 496 case IP_RECVOPTS:
495 break; 497 if (val)
496 case IP_RETOPTS: 498 inet->cmsg_flags |= IP_CMSG_RECVOPTS;
497 if (val) 499 else
498 inet->cmsg_flags |= IP_CMSG_RETOPTS; 500 inet->cmsg_flags &= ~IP_CMSG_RECVOPTS;
499 else 501 break;
500 inet->cmsg_flags &= ~IP_CMSG_RETOPTS; 502 case IP_RETOPTS:
503 if (val)
504 inet->cmsg_flags |= IP_CMSG_RETOPTS;
505 else
506 inet->cmsg_flags &= ~IP_CMSG_RETOPTS;
507 break;
508 case IP_PASSSEC:
509 if (val)
510 inet->cmsg_flags |= IP_CMSG_PASSSEC;
511 else
512 inet->cmsg_flags &= ~IP_CMSG_PASSSEC;
513 break;
514 case IP_TOS: /* This sets both TOS and Precedence */
515 if (sk->sk_type == SOCK_STREAM) {
516 val &= ~3;
517 val |= inet->tos & 3;
518 }
519 if (IPTOS_PREC(val) >= IPTOS_PREC_CRITIC_ECP &&
520 !capable(CAP_NET_ADMIN)) {
521 err = -EPERM;
501 break; 522 break;
502 case IP_PASSSEC: 523 }
503 if (val) 524 if (inet->tos != val) {
504 inet->cmsg_flags |= IP_CMSG_PASSSEC; 525 inet->tos = val;
505 else 526 sk->sk_priority = rt_tos2priority(val);
506 inet->cmsg_flags &= ~IP_CMSG_PASSSEC; 527 sk_dst_reset(sk);
528 }
529 break;
530 case IP_TTL:
531 if (optlen<1)
532 goto e_inval;
533 if (val != -1 && (val < 1 || val>255))
534 goto e_inval;
535 inet->uc_ttl = val;
536 break;
537 case IP_HDRINCL:
538 if (sk->sk_type != SOCK_RAW) {
539 err = -ENOPROTOOPT;
507 break; 540 break;
508 case IP_TOS: /* This sets both TOS and Precedence */ 541 }
509 if (sk->sk_type == SOCK_STREAM) { 542 inet->hdrincl = val ? 1 : 0;
510 val &= ~3; 543 break;
511 val |= inet->tos & 3; 544 case IP_MTU_DISCOVER:
512 } 545 if (val<0 || val>3)
513 if (IPTOS_PREC(val) >= IPTOS_PREC_CRITIC_ECP && 546 goto e_inval;
514 !capable(CAP_NET_ADMIN)) { 547 inet->pmtudisc = val;
515 err = -EPERM; 548 break;
549 case IP_RECVERR:
550 inet->recverr = !!val;
551 if (!val)
552 skb_queue_purge(&sk->sk_error_queue);
553 break;
554 case IP_MULTICAST_TTL:
555 if (sk->sk_type == SOCK_STREAM)
556 goto e_inval;
557 if (optlen<1)
558 goto e_inval;
559 if (val==-1)
560 val = 1;
561 if (val < 0 || val > 255)
562 goto e_inval;
563 inet->mc_ttl = val;
564 break;
565 case IP_MULTICAST_LOOP:
566 if (optlen<1)
567 goto e_inval;
568 inet->mc_loop = !!val;
569 break;
570 case IP_MULTICAST_IF:
571 {
572 struct ip_mreqn mreq;
573 struct net_device *dev = NULL;
574
575 if (sk->sk_type == SOCK_STREAM)
576 goto e_inval;
577 /*
578 * Check the arguments are allowable
579 */
580
581 err = -EFAULT;
582 if (optlen >= sizeof(struct ip_mreqn)) {
583 if (copy_from_user(&mreq,optval,sizeof(mreq)))
516 break; 584 break;
517 } 585 } else {
518 if (inet->tos != val) { 586 memset(&mreq, 0, sizeof(mreq));
519 inet->tos = val; 587 if (optlen >= sizeof(struct in_addr) &&
520 sk->sk_priority = rt_tos2priority(val); 588 copy_from_user(&mreq.imr_address,optval,sizeof(struct in_addr)))
521 sk_dst_reset(sk); 589 break;
522 } 590 }
523 break; 591
524 case IP_TTL: 592 if (!mreq.imr_ifindex) {
525 if (optlen<1) 593 if (mreq.imr_address.s_addr == INADDR_ANY) {
526 goto e_inval; 594 inet->mc_index = 0;
527 if (val != -1 && (val < 1 || val>255)) 595 inet->mc_addr = 0;
528 goto e_inval; 596 err = 0;
529 inet->uc_ttl = val;
530 break;
531 case IP_HDRINCL:
532 if (sk->sk_type != SOCK_RAW) {
533 err = -ENOPROTOOPT;
534 break; 597 break;
535 } 598 }
536 inet->hdrincl = val ? 1 : 0; 599 dev = ip_dev_find(mreq.imr_address.s_addr);
537 break; 600 if (dev) {
538 case IP_MTU_DISCOVER: 601 mreq.imr_ifindex = dev->ifindex;
539 if (val<0 || val>2) 602 dev_put(dev);
540 goto e_inval; 603 }
541 inet->pmtudisc = val; 604 } else
542 break; 605 dev = __dev_get_by_index(mreq.imr_ifindex);
543 case IP_RECVERR:
544 inet->recverr = !!val;
545 if (!val)
546 skb_queue_purge(&sk->sk_error_queue);
547 break;
548 case IP_MULTICAST_TTL:
549 if (sk->sk_type == SOCK_STREAM)
550 goto e_inval;
551 if (optlen<1)
552 goto e_inval;
553 if (val==-1)
554 val = 1;
555 if (val < 0 || val > 255)
556 goto e_inval;
557 inet->mc_ttl = val;
558 break;
559 case IP_MULTICAST_LOOP:
560 if (optlen<1)
561 goto e_inval;
562 inet->mc_loop = !!val;
563 break;
564 case IP_MULTICAST_IF:
565 {
566 struct ip_mreqn mreq;
567 struct net_device *dev = NULL;
568 606
569 if (sk->sk_type == SOCK_STREAM)
570 goto e_inval;
571 /*
572 * Check the arguments are allowable
573 */
574 607
575 err = -EFAULT; 608 err = -EADDRNOTAVAIL;
576 if (optlen >= sizeof(struct ip_mreqn)) { 609 if (!dev)
577 if (copy_from_user(&mreq,optval,sizeof(mreq))) 610 break;
578 break;
579 } else {
580 memset(&mreq, 0, sizeof(mreq));
581 if (optlen >= sizeof(struct in_addr) &&
582 copy_from_user(&mreq.imr_address,optval,sizeof(struct in_addr)))
583 break;
584 }
585 611
586 if (!mreq.imr_ifindex) { 612 err = -EINVAL;
587 if (mreq.imr_address.s_addr == INADDR_ANY) { 613 if (sk->sk_bound_dev_if &&
588 inet->mc_index = 0; 614 mreq.imr_ifindex != sk->sk_bound_dev_if)
589 inet->mc_addr = 0; 615 break;
590 err = 0;
591 break;
592 }
593 dev = ip_dev_find(mreq.imr_address.s_addr);
594 if (dev) {
595 mreq.imr_ifindex = dev->ifindex;
596 dev_put(dev);
597 }
598 } else
599 dev = __dev_get_by_index(mreq.imr_ifindex);
600 616
617 inet->mc_index = mreq.imr_ifindex;
618 inet->mc_addr = mreq.imr_address.s_addr;
619 err = 0;
620 break;
621 }
601 622
602 err = -EADDRNOTAVAIL; 623 case IP_ADD_MEMBERSHIP:
603 if (!dev) 624 case IP_DROP_MEMBERSHIP:
604 break; 625 {
626 struct ip_mreqn mreq;
605 627
606 err = -EINVAL; 628 if (optlen < sizeof(struct ip_mreq))
607 if (sk->sk_bound_dev_if && 629 goto e_inval;
608 mreq.imr_ifindex != sk->sk_bound_dev_if) 630 err = -EFAULT;
631 if (optlen >= sizeof(struct ip_mreqn)) {
632 if (copy_from_user(&mreq,optval,sizeof(mreq)))
609 break; 633 break;
634 } else {
635 memset(&mreq, 0, sizeof(mreq));
636 if (copy_from_user(&mreq,optval,sizeof(struct ip_mreq)))
637 break;
638 }
610 639
611 inet->mc_index = mreq.imr_ifindex; 640 if (optname == IP_ADD_MEMBERSHIP)
612 inet->mc_addr = mreq.imr_address.s_addr; 641 err = ip_mc_join_group(sk, &mreq);
613 err = 0; 642 else
643 err = ip_mc_leave_group(sk, &mreq);
644 break;
645 }
646 case IP_MSFILTER:
647 {
648 extern int sysctl_igmp_max_msf;
649 struct ip_msfilter *msf;
650
651 if (optlen < IP_MSFILTER_SIZE(0))
652 goto e_inval;
653 if (optlen > sysctl_optmem_max) {
654 err = -ENOBUFS;
614 break; 655 break;
615 } 656 }
657 msf = kmalloc(optlen, GFP_KERNEL);
658 if (msf == 0) {
659 err = -ENOBUFS;
660 break;
661 }
662 err = -EFAULT;
663 if (copy_from_user(msf, optval, optlen)) {
664 kfree(msf);
665 break;
666 }
667 /* numsrc >= (1G-4) overflow in 32 bits */
668 if (msf->imsf_numsrc >= 0x3ffffffcU ||
669 msf->imsf_numsrc > sysctl_igmp_max_msf) {
670 kfree(msf);
671 err = -ENOBUFS;
672 break;
673 }
674 if (IP_MSFILTER_SIZE(msf->imsf_numsrc) > optlen) {
675 kfree(msf);
676 err = -EINVAL;
677 break;
678 }
679 err = ip_mc_msfilter(sk, msf, 0);
680 kfree(msf);
681 break;
682 }
683 case IP_BLOCK_SOURCE:
684 case IP_UNBLOCK_SOURCE:
685 case IP_ADD_SOURCE_MEMBERSHIP:
686 case IP_DROP_SOURCE_MEMBERSHIP:
687 {
688 struct ip_mreq_source mreqs;
689 int omode, add;
616 690
617 case IP_ADD_MEMBERSHIP: 691 if (optlen != sizeof(struct ip_mreq_source))
618 case IP_DROP_MEMBERSHIP: 692 goto e_inval;
619 { 693 if (copy_from_user(&mreqs, optval, sizeof(mreqs))) {
620 struct ip_mreqn mreq;
621
622 if (optlen < sizeof(struct ip_mreq))
623 goto e_inval;
624 err = -EFAULT; 694 err = -EFAULT;
625 if (optlen >= sizeof(struct ip_mreqn)) {
626 if(copy_from_user(&mreq,optval,sizeof(mreq)))
627 break;
628 } else {
629 memset(&mreq, 0, sizeof(mreq));
630 if (copy_from_user(&mreq,optval,sizeof(struct ip_mreq)))
631 break;
632 }
633
634 if (optname == IP_ADD_MEMBERSHIP)
635 err = ip_mc_join_group(sk, &mreq);
636 else
637 err = ip_mc_leave_group(sk, &mreq);
638 break; 695 break;
639 } 696 }
640 case IP_MSFILTER: 697 if (optname == IP_BLOCK_SOURCE) {
641 { 698 omode = MCAST_EXCLUDE;
642 extern int sysctl_igmp_max_msf; 699 add = 1;
643 struct ip_msfilter *msf; 700 } else if (optname == IP_UNBLOCK_SOURCE) {
701 omode = MCAST_EXCLUDE;
702 add = 0;
703 } else if (optname == IP_ADD_SOURCE_MEMBERSHIP) {
704 struct ip_mreqn mreq;
644 705
645 if (optlen < IP_MSFILTER_SIZE(0)) 706 mreq.imr_multiaddr.s_addr = mreqs.imr_multiaddr;
646 goto e_inval; 707 mreq.imr_address.s_addr = mreqs.imr_interface;
647 if (optlen > sysctl_optmem_max) { 708 mreq.imr_ifindex = 0;
648 err = -ENOBUFS; 709 err = ip_mc_join_group(sk, &mreq);
649 break; 710 if (err && err != -EADDRINUSE)
650 }
651 msf = kmalloc(optlen, GFP_KERNEL);
652 if (msf == 0) {
653 err = -ENOBUFS;
654 break; 711 break;
655 } 712 omode = MCAST_INCLUDE;
713 add = 1;
714 } else /* IP_DROP_SOURCE_MEMBERSHIP */ {
715 omode = MCAST_INCLUDE;
716 add = 0;
717 }
718 err = ip_mc_source(add, omode, sk, &mreqs, 0);
719 break;
720 }
721 case MCAST_JOIN_GROUP:
722 case MCAST_LEAVE_GROUP:
723 {
724 struct group_req greq;
725 struct sockaddr_in *psin;
726 struct ip_mreqn mreq;
727
728 if (optlen < sizeof(struct group_req))
729 goto e_inval;
730 err = -EFAULT;
731 if (copy_from_user(&greq, optval, sizeof(greq)))
732 break;
733 psin = (struct sockaddr_in *)&greq.gr_group;
734 if (psin->sin_family != AF_INET)
735 goto e_inval;
736 memset(&mreq, 0, sizeof(mreq));
737 mreq.imr_multiaddr = psin->sin_addr;
738 mreq.imr_ifindex = greq.gr_interface;
739
740 if (optname == MCAST_JOIN_GROUP)
741 err = ip_mc_join_group(sk, &mreq);
742 else
743 err = ip_mc_leave_group(sk, &mreq);
744 break;
745 }
746 case MCAST_JOIN_SOURCE_GROUP:
747 case MCAST_LEAVE_SOURCE_GROUP:
748 case MCAST_BLOCK_SOURCE:
749 case MCAST_UNBLOCK_SOURCE:
750 {
751 struct group_source_req greqs;
752 struct ip_mreq_source mreqs;
753 struct sockaddr_in *psin;
754 int omode, add;
755
756 if (optlen != sizeof(struct group_source_req))
757 goto e_inval;
758 if (copy_from_user(&greqs, optval, sizeof(greqs))) {
656 err = -EFAULT; 759 err = -EFAULT;
657 if (copy_from_user(msf, optval, optlen)) {
658 kfree(msf);
659 break;
660 }
661 /* numsrc >= (1G-4) overflow in 32 bits */
662 if (msf->imsf_numsrc >= 0x3ffffffcU ||
663 msf->imsf_numsrc > sysctl_igmp_max_msf) {
664 kfree(msf);
665 err = -ENOBUFS;
666 break;
667 }
668 if (IP_MSFILTER_SIZE(msf->imsf_numsrc) > optlen) {
669 kfree(msf);
670 err = -EINVAL;
671 break;
672 }
673 err = ip_mc_msfilter(sk, msf, 0);
674 kfree(msf);
675 break; 760 break;
676 } 761 }
677 case IP_BLOCK_SOURCE: 762 if (greqs.gsr_group.ss_family != AF_INET ||
678 case IP_UNBLOCK_SOURCE: 763 greqs.gsr_source.ss_family != AF_INET) {
679 case IP_ADD_SOURCE_MEMBERSHIP: 764 err = -EADDRNOTAVAIL;
680 case IP_DROP_SOURCE_MEMBERSHIP:
681 {
682 struct ip_mreq_source mreqs;
683 int omode, add;
684
685 if (optlen != sizeof(struct ip_mreq_source))
686 goto e_inval;
687 if (copy_from_user(&mreqs, optval, sizeof(mreqs))) {
688 err = -EFAULT;
689 break;
690 }
691 if (optname == IP_BLOCK_SOURCE) {
692 omode = MCAST_EXCLUDE;
693 add = 1;
694 } else if (optname == IP_UNBLOCK_SOURCE) {
695 omode = MCAST_EXCLUDE;
696 add = 0;
697 } else if (optname == IP_ADD_SOURCE_MEMBERSHIP) {
698 struct ip_mreqn mreq;
699
700 mreq.imr_multiaddr.s_addr = mreqs.imr_multiaddr;
701 mreq.imr_address.s_addr = mreqs.imr_interface;
702 mreq.imr_ifindex = 0;
703 err = ip_mc_join_group(sk, &mreq);
704 if (err && err != -EADDRINUSE)
705 break;
706 omode = MCAST_INCLUDE;
707 add = 1;
708 } else /* IP_DROP_SOURCE_MEMBERSHIP */ {
709 omode = MCAST_INCLUDE;
710 add = 0;
711 }
712 err = ip_mc_source(add, omode, sk, &mreqs, 0);
713 break; 765 break;
714 } 766 }
715 case MCAST_JOIN_GROUP: 767 psin = (struct sockaddr_in *)&greqs.gsr_group;
716 case MCAST_LEAVE_GROUP: 768 mreqs.imr_multiaddr = psin->sin_addr.s_addr;
717 { 769 psin = (struct sockaddr_in *)&greqs.gsr_source;
718 struct group_req greq; 770 mreqs.imr_sourceaddr = psin->sin_addr.s_addr;
719 struct sockaddr_in *psin; 771 mreqs.imr_interface = 0; /* use index for mc_source */
772
773 if (optname == MCAST_BLOCK_SOURCE) {
774 omode = MCAST_EXCLUDE;
775 add = 1;
776 } else if (optname == MCAST_UNBLOCK_SOURCE) {
777 omode = MCAST_EXCLUDE;
778 add = 0;
779 } else if (optname == MCAST_JOIN_SOURCE_GROUP) {
720 struct ip_mreqn mreq; 780 struct ip_mreqn mreq;
721 781
722 if (optlen < sizeof(struct group_req)) 782 psin = (struct sockaddr_in *)&greqs.gsr_group;
723 goto e_inval;
724 err = -EFAULT;
725 if(copy_from_user(&greq, optval, sizeof(greq)))
726 break;
727 psin = (struct sockaddr_in *)&greq.gr_group;
728 if (psin->sin_family != AF_INET)
729 goto e_inval;
730 memset(&mreq, 0, sizeof(mreq));
731 mreq.imr_multiaddr = psin->sin_addr; 783 mreq.imr_multiaddr = psin->sin_addr;
732 mreq.imr_ifindex = greq.gr_interface; 784 mreq.imr_address.s_addr = 0;
733 785 mreq.imr_ifindex = greqs.gsr_interface;
734 if (optname == MCAST_JOIN_GROUP) 786 err = ip_mc_join_group(sk, &mreq);
735 err = ip_mc_join_group(sk, &mreq); 787 if (err && err != -EADDRINUSE)
736 else 788 break;
737 err = ip_mc_leave_group(sk, &mreq); 789 greqs.gsr_interface = mreq.imr_ifindex;
790 omode = MCAST_INCLUDE;
791 add = 1;
792 } else /* MCAST_LEAVE_SOURCE_GROUP */ {
793 omode = MCAST_INCLUDE;
794 add = 0;
795 }
796 err = ip_mc_source(add, omode, sk, &mreqs,
797 greqs.gsr_interface);
798 break;
799 }
800 case MCAST_MSFILTER:
801 {
802 extern int sysctl_igmp_max_msf;
803 struct sockaddr_in *psin;
804 struct ip_msfilter *msf = NULL;
805 struct group_filter *gsf = NULL;
806 int msize, i, ifindex;
807
808 if (optlen < GROUP_FILTER_SIZE(0))
809 goto e_inval;
810 if (optlen > sysctl_optmem_max) {
811 err = -ENOBUFS;
738 break; 812 break;
739 } 813 }
740 case MCAST_JOIN_SOURCE_GROUP: 814 gsf = kmalloc(optlen,GFP_KERNEL);
741 case MCAST_LEAVE_SOURCE_GROUP: 815 if (gsf == 0) {
742 case MCAST_BLOCK_SOURCE: 816 err = -ENOBUFS;
743 case MCAST_UNBLOCK_SOURCE:
744 {
745 struct group_source_req greqs;
746 struct ip_mreq_source mreqs;
747 struct sockaddr_in *psin;
748 int omode, add;
749
750 if (optlen != sizeof(struct group_source_req))
751 goto e_inval;
752 if (copy_from_user(&greqs, optval, sizeof(greqs))) {
753 err = -EFAULT;
754 break;
755 }
756 if (greqs.gsr_group.ss_family != AF_INET ||
757 greqs.gsr_source.ss_family != AF_INET) {
758 err = -EADDRNOTAVAIL;
759 break;
760 }
761 psin = (struct sockaddr_in *)&greqs.gsr_group;
762 mreqs.imr_multiaddr = psin->sin_addr.s_addr;
763 psin = (struct sockaddr_in *)&greqs.gsr_source;
764 mreqs.imr_sourceaddr = psin->sin_addr.s_addr;
765 mreqs.imr_interface = 0; /* use index for mc_source */
766
767 if (optname == MCAST_BLOCK_SOURCE) {
768 omode = MCAST_EXCLUDE;
769 add = 1;
770 } else if (optname == MCAST_UNBLOCK_SOURCE) {
771 omode = MCAST_EXCLUDE;
772 add = 0;
773 } else if (optname == MCAST_JOIN_SOURCE_GROUP) {
774 struct ip_mreqn mreq;
775
776 psin = (struct sockaddr_in *)&greqs.gsr_group;
777 mreq.imr_multiaddr = psin->sin_addr;
778 mreq.imr_address.s_addr = 0;
779 mreq.imr_ifindex = greqs.gsr_interface;
780 err = ip_mc_join_group(sk, &mreq);
781 if (err && err != -EADDRINUSE)
782 break;
783 greqs.gsr_interface = mreq.imr_ifindex;
784 omode = MCAST_INCLUDE;
785 add = 1;
786 } else /* MCAST_LEAVE_SOURCE_GROUP */ {
787 omode = MCAST_INCLUDE;
788 add = 0;
789 }
790 err = ip_mc_source(add, omode, sk, &mreqs,
791 greqs.gsr_interface);
792 break; 817 break;
793 } 818 }
794 case MCAST_MSFILTER: 819 err = -EFAULT;
795 { 820 if (copy_from_user(gsf, optval, optlen)) {
796 extern int sysctl_igmp_max_msf; 821 goto mc_msf_out;
797 struct sockaddr_in *psin; 822 }
798 struct ip_msfilter *msf = NULL; 823 /* numsrc >= (4G-140)/128 overflow in 32 bits */
799 struct group_filter *gsf = NULL; 824 if (gsf->gf_numsrc >= 0x1ffffff ||
800 int msize, i, ifindex; 825 gsf->gf_numsrc > sysctl_igmp_max_msf) {
801 826 err = -ENOBUFS;
802 if (optlen < GROUP_FILTER_SIZE(0)) 827 goto mc_msf_out;
803 goto e_inval; 828 }
804 if (optlen > sysctl_optmem_max) { 829 if (GROUP_FILTER_SIZE(gsf->gf_numsrc) > optlen) {
805 err = -ENOBUFS; 830 err = -EINVAL;
806 break; 831 goto mc_msf_out;
807 } 832 }
808 gsf = kmalloc(optlen,GFP_KERNEL); 833 msize = IP_MSFILTER_SIZE(gsf->gf_numsrc);
809 if (gsf == 0) { 834 msf = kmalloc(msize,GFP_KERNEL);
810 err = -ENOBUFS; 835 if (msf == 0) {
811 break; 836 err = -ENOBUFS;
812 } 837 goto mc_msf_out;
813 err = -EFAULT; 838 }
814 if (copy_from_user(gsf, optval, optlen)) { 839 ifindex = gsf->gf_interface;
815 goto mc_msf_out; 840 psin = (struct sockaddr_in *)&gsf->gf_group;
816 } 841 if (psin->sin_family != AF_INET) {
817 /* numsrc >= (4G-140)/128 overflow in 32 bits */
818 if (gsf->gf_numsrc >= 0x1ffffff ||
819 gsf->gf_numsrc > sysctl_igmp_max_msf) {
820 err = -ENOBUFS;
821 goto mc_msf_out;
822 }
823 if (GROUP_FILTER_SIZE(gsf->gf_numsrc) > optlen) {
824 err = -EINVAL;
825 goto mc_msf_out;
826 }
827 msize = IP_MSFILTER_SIZE(gsf->gf_numsrc);
828 msf = kmalloc(msize,GFP_KERNEL);
829 if (msf == 0) {
830 err = -ENOBUFS;
831 goto mc_msf_out;
832 }
833 ifindex = gsf->gf_interface;
834 psin = (struct sockaddr_in *)&gsf->gf_group;
835 if (psin->sin_family != AF_INET) {
836 err = -EADDRNOTAVAIL;
837 goto mc_msf_out;
838 }
839 msf->imsf_multiaddr = psin->sin_addr.s_addr;
840 msf->imsf_interface = 0;
841 msf->imsf_fmode = gsf->gf_fmode;
842 msf->imsf_numsrc = gsf->gf_numsrc;
843 err = -EADDRNOTAVAIL; 842 err = -EADDRNOTAVAIL;
844 for (i=0; i<gsf->gf_numsrc; ++i) { 843 goto mc_msf_out;
845 psin = (struct sockaddr_in *)&gsf->gf_slist[i];
846
847 if (psin->sin_family != AF_INET)
848 goto mc_msf_out;
849 msf->imsf_slist[i] = psin->sin_addr.s_addr;
850 }
851 kfree(gsf);
852 gsf = NULL;
853
854 err = ip_mc_msfilter(sk, msf, ifindex);
855mc_msf_out:
856 kfree(msf);
857 kfree(gsf);
858 break;
859 } 844 }
860 case IP_ROUTER_ALERT: 845 msf->imsf_multiaddr = psin->sin_addr.s_addr;
861 err = ip_ra_control(sk, val ? 1 : 0, NULL); 846 msf->imsf_interface = 0;
862 break; 847 msf->imsf_fmode = gsf->gf_fmode;
863 848 msf->imsf_numsrc = gsf->gf_numsrc;
864 case IP_FREEBIND: 849 err = -EADDRNOTAVAIL;
865 if (optlen<1) 850 for (i=0; i<gsf->gf_numsrc; ++i) {
866 goto e_inval; 851 psin = (struct sockaddr_in *)&gsf->gf_slist[i];
867 inet->freebind = !!val;
868 break;
869 852
870 case IP_IPSEC_POLICY: 853 if (psin->sin_family != AF_INET)
871 case IP_XFRM_POLICY: 854 goto mc_msf_out;
872 err = -EPERM; 855 msf->imsf_slist[i] = psin->sin_addr.s_addr;
873 if (!capable(CAP_NET_ADMIN)) 856 }
874 break; 857 kfree(gsf);
875 err = xfrm_user_policy(sk, optname, optval, optlen); 858 gsf = NULL;
859
860 err = ip_mc_msfilter(sk, msf, ifindex);
861 mc_msf_out:
862 kfree(msf);
863 kfree(gsf);
864 break;
865 }
866 case IP_ROUTER_ALERT:
867 err = ip_ra_control(sk, val ? 1 : 0, NULL);
868 break;
869
870 case IP_FREEBIND:
871 if (optlen<1)
872 goto e_inval;
873 inet->freebind = !!val;
874 break;
875
876 case IP_IPSEC_POLICY:
877 case IP_XFRM_POLICY:
878 err = -EPERM;
879 if (!capable(CAP_NET_ADMIN))
876 break; 880 break;
881 err = xfrm_user_policy(sk, optname, optval, optlen);
882 break;
877 883
878 default: 884 default:
879 err = -ENOPROTOOPT; 885 err = -ENOPROTOOPT;
880 break; 886 break;
881 } 887 }
882 release_sock(sk); 888 release_sock(sk);
883 return err; 889 return err;
@@ -948,214 +954,213 @@ EXPORT_SYMBOL(compat_ip_setsockopt);
948 */ 954 */
949 955
950static int do_ip_getsockopt(struct sock *sk, int level, int optname, 956static int do_ip_getsockopt(struct sock *sk, int level, int optname,
951 char __user *optval, int __user *optlen) 957 char __user *optval, int __user *optlen)
952{ 958{
953 struct inet_sock *inet = inet_sk(sk); 959 struct inet_sock *inet = inet_sk(sk);
954 int val; 960 int val;
955 int len; 961 int len;
956 962
957 if(level!=SOL_IP) 963 if (level != SOL_IP)
958 return -EOPNOTSUPP; 964 return -EOPNOTSUPP;
959 965
960#ifdef CONFIG_IP_MROUTE 966#ifdef CONFIG_IP_MROUTE
961 if(optname>=MRT_BASE && optname <=MRT_BASE+10) 967 if (optname >= MRT_BASE && optname <= MRT_BASE+10) {
962 {
963 return ip_mroute_getsockopt(sk,optname,optval,optlen); 968 return ip_mroute_getsockopt(sk,optname,optval,optlen);
964 } 969 }
965#endif 970#endif
966 971
967 if(get_user(len,optlen)) 972 if (get_user(len,optlen))
968 return -EFAULT; 973 return -EFAULT;
969 if(len < 0) 974 if (len < 0)
970 return -EINVAL; 975 return -EINVAL;
971 976
972 lock_sock(sk); 977 lock_sock(sk);
973 978
974 switch(optname) { 979 switch (optname) {
975 case IP_OPTIONS: 980 case IP_OPTIONS:
976 { 981 {
977 unsigned char optbuf[sizeof(struct ip_options)+40]; 982 unsigned char optbuf[sizeof(struct ip_options)+40];
978 struct ip_options * opt = (struct ip_options*)optbuf; 983 struct ip_options * opt = (struct ip_options*)optbuf;
979 opt->optlen = 0; 984 opt->optlen = 0;
980 if (inet->opt) 985 if (inet->opt)
981 memcpy(optbuf, inet->opt, 986 memcpy(optbuf, inet->opt,
982 sizeof(struct ip_options)+ 987 sizeof(struct ip_options)+
983 inet->opt->optlen); 988 inet->opt->optlen);
984 release_sock(sk); 989 release_sock(sk);
985 990
986 if (opt->optlen == 0) 991 if (opt->optlen == 0)
987 return put_user(0, optlen); 992 return put_user(0, optlen);
988 993
989 ip_options_undo(opt); 994 ip_options_undo(opt);
990 995
991 len = min_t(unsigned int, len, opt->optlen); 996 len = min_t(unsigned int, len, opt->optlen);
992 if(put_user(len, optlen)) 997 if (put_user(len, optlen))
993 return -EFAULT; 998 return -EFAULT;
994 if(copy_to_user(optval, opt->__data, len)) 999 if (copy_to_user(optval, opt->__data, len))
995 return -EFAULT; 1000 return -EFAULT;
996 return 0; 1001 return 0;
997 } 1002 }
998 case IP_PKTINFO: 1003 case IP_PKTINFO:
999 val = (inet->cmsg_flags & IP_CMSG_PKTINFO) != 0; 1004 val = (inet->cmsg_flags & IP_CMSG_PKTINFO) != 0;
1000 break; 1005 break;
1001 case IP_RECVTTL: 1006 case IP_RECVTTL:
1002 val = (inet->cmsg_flags & IP_CMSG_TTL) != 0; 1007 val = (inet->cmsg_flags & IP_CMSG_TTL) != 0;
1003 break; 1008 break;
1004 case IP_RECVTOS: 1009 case IP_RECVTOS:
1005 val = (inet->cmsg_flags & IP_CMSG_TOS) != 0; 1010 val = (inet->cmsg_flags & IP_CMSG_TOS) != 0;
1006 break; 1011 break;
1007 case IP_RECVOPTS: 1012 case IP_RECVOPTS:
1008 val = (inet->cmsg_flags & IP_CMSG_RECVOPTS) != 0; 1013 val = (inet->cmsg_flags & IP_CMSG_RECVOPTS) != 0;
1009 break; 1014 break;
1010 case IP_RETOPTS: 1015 case IP_RETOPTS:
1011 val = (inet->cmsg_flags & IP_CMSG_RETOPTS) != 0; 1016 val = (inet->cmsg_flags & IP_CMSG_RETOPTS) != 0;
1012 break; 1017 break;
1013 case IP_PASSSEC: 1018 case IP_PASSSEC:
1014 val = (inet->cmsg_flags & IP_CMSG_PASSSEC) != 0; 1019 val = (inet->cmsg_flags & IP_CMSG_PASSSEC) != 0;
1015 break; 1020 break;
1016 case IP_TOS: 1021 case IP_TOS:
1017 val = inet->tos; 1022 val = inet->tos;
1018 break; 1023 break;
1019 case IP_TTL: 1024 case IP_TTL:
1020 val = (inet->uc_ttl == -1 ? 1025 val = (inet->uc_ttl == -1 ?
1021 sysctl_ip_default_ttl : 1026 sysctl_ip_default_ttl :
1022 inet->uc_ttl); 1027 inet->uc_ttl);
1023 break; 1028 break;
1024 case IP_HDRINCL: 1029 case IP_HDRINCL:
1025 val = inet->hdrincl; 1030 val = inet->hdrincl;
1026 break; 1031 break;
1027 case IP_MTU_DISCOVER: 1032 case IP_MTU_DISCOVER:
1028 val = inet->pmtudisc; 1033 val = inet->pmtudisc;
1029 break; 1034 break;
1030 case IP_MTU: 1035 case IP_MTU:
1031 { 1036 {
1032 struct dst_entry *dst; 1037 struct dst_entry *dst;
1033 val = 0; 1038 val = 0;
1034 dst = sk_dst_get(sk); 1039 dst = sk_dst_get(sk);
1035 if (dst) { 1040 if (dst) {
1036 val = dst_mtu(dst); 1041 val = dst_mtu(dst);
1037 dst_release(dst); 1042 dst_release(dst);
1038 }
1039 if (!val) {
1040 release_sock(sk);
1041 return -ENOTCONN;
1042 }
1043 break;
1044 } 1043 }
1045 case IP_RECVERR: 1044 if (!val) {
1046 val = inet->recverr;
1047 break;
1048 case IP_MULTICAST_TTL:
1049 val = inet->mc_ttl;
1050 break;
1051 case IP_MULTICAST_LOOP:
1052 val = inet->mc_loop;
1053 break;
1054 case IP_MULTICAST_IF:
1055 {
1056 struct in_addr addr;
1057 len = min_t(unsigned int, len, sizeof(struct in_addr));
1058 addr.s_addr = inet->mc_addr;
1059 release_sock(sk); 1045 release_sock(sk);
1060 1046 return -ENOTCONN;
1061 if(put_user(len, optlen))
1062 return -EFAULT;
1063 if(copy_to_user(optval, &addr, len))
1064 return -EFAULT;
1065 return 0;
1066 } 1047 }
1067 case IP_MSFILTER: 1048 break;
1068 { 1049 }
1069 struct ip_msfilter msf; 1050 case IP_RECVERR:
1070 int err; 1051 val = inet->recverr;
1052 break;
1053 case IP_MULTICAST_TTL:
1054 val = inet->mc_ttl;
1055 break;
1056 case IP_MULTICAST_LOOP:
1057 val = inet->mc_loop;
1058 break;
1059 case IP_MULTICAST_IF:
1060 {
1061 struct in_addr addr;
1062 len = min_t(unsigned int, len, sizeof(struct in_addr));
1063 addr.s_addr = inet->mc_addr;
1064 release_sock(sk);
1071 1065
1072 if (len < IP_MSFILTER_SIZE(0)) { 1066 if (put_user(len, optlen))
1073 release_sock(sk); 1067 return -EFAULT;
1074 return -EINVAL; 1068 if (copy_to_user(optval, &addr, len))
1075 } 1069 return -EFAULT;
1076 if (copy_from_user(&msf, optval, IP_MSFILTER_SIZE(0))) { 1070 return 0;
1077 release_sock(sk); 1071 }
1078 return -EFAULT; 1072 case IP_MSFILTER:
1079 } 1073 {
1080 err = ip_mc_msfget(sk, &msf, 1074 struct ip_msfilter msf;
1081 (struct ip_msfilter __user *)optval, optlen); 1075 int err;
1076
1077 if (len < IP_MSFILTER_SIZE(0)) {
1082 release_sock(sk); 1078 release_sock(sk);
1083 return err; 1079 return -EINVAL;
1084 } 1080 }
1085 case MCAST_MSFILTER: 1081 if (copy_from_user(&msf, optval, IP_MSFILTER_SIZE(0))) {
1086 {
1087 struct group_filter gsf;
1088 int err;
1089
1090 if (len < GROUP_FILTER_SIZE(0)) {
1091 release_sock(sk);
1092 return -EINVAL;
1093 }
1094 if (copy_from_user(&gsf, optval, GROUP_FILTER_SIZE(0))) {
1095 release_sock(sk);
1096 return -EFAULT;
1097 }
1098 err = ip_mc_gsfget(sk, &gsf,
1099 (struct group_filter __user *)optval, optlen);
1100 release_sock(sk); 1082 release_sock(sk);
1101 return err; 1083 return -EFAULT;
1102 } 1084 }
1103 case IP_PKTOPTIONS: 1085 err = ip_mc_msfget(sk, &msf,
1104 { 1086 (struct ip_msfilter __user *)optval, optlen);
1105 struct msghdr msg; 1087 release_sock(sk);
1088 return err;
1089 }
1090 case MCAST_MSFILTER:
1091 {
1092 struct group_filter gsf;
1093 int err;
1106 1094
1095 if (len < GROUP_FILTER_SIZE(0)) {
1107 release_sock(sk); 1096 release_sock(sk);
1097 return -EINVAL;
1098 }
1099 if (copy_from_user(&gsf, optval, GROUP_FILTER_SIZE(0))) {
1100 release_sock(sk);
1101 return -EFAULT;
1102 }
1103 err = ip_mc_gsfget(sk, &gsf,
1104 (struct group_filter __user *)optval, optlen);
1105 release_sock(sk);
1106 return err;
1107 }
1108 case IP_PKTOPTIONS:
1109 {
1110 struct msghdr msg;
1111
1112 release_sock(sk);
1108 1113
1109 if (sk->sk_type != SOCK_STREAM) 1114 if (sk->sk_type != SOCK_STREAM)
1110 return -ENOPROTOOPT; 1115 return -ENOPROTOOPT;
1111 1116
1112 msg.msg_control = optval; 1117 msg.msg_control = optval;
1113 msg.msg_controllen = len; 1118 msg.msg_controllen = len;
1114 msg.msg_flags = 0; 1119 msg.msg_flags = 0;
1115 1120
1116 if (inet->cmsg_flags & IP_CMSG_PKTINFO) { 1121 if (inet->cmsg_flags & IP_CMSG_PKTINFO) {
1117 struct in_pktinfo info; 1122 struct in_pktinfo info;
1118 1123
1119 info.ipi_addr.s_addr = inet->rcv_saddr; 1124 info.ipi_addr.s_addr = inet->rcv_saddr;
1120 info.ipi_spec_dst.s_addr = inet->rcv_saddr; 1125 info.ipi_spec_dst.s_addr = inet->rcv_saddr;
1121 info.ipi_ifindex = inet->mc_index; 1126 info.ipi_ifindex = inet->mc_index;
1122 put_cmsg(&msg, SOL_IP, IP_PKTINFO, sizeof(info), &info); 1127 put_cmsg(&msg, SOL_IP, IP_PKTINFO, sizeof(info), &info);
1123 }
1124 if (inet->cmsg_flags & IP_CMSG_TTL) {
1125 int hlim = inet->mc_ttl;
1126 put_cmsg(&msg, SOL_IP, IP_TTL, sizeof(hlim), &hlim);
1127 }
1128 len -= msg.msg_controllen;
1129 return put_user(len, optlen);
1130 } 1128 }
1131 case IP_FREEBIND: 1129 if (inet->cmsg_flags & IP_CMSG_TTL) {
1132 val = inet->freebind; 1130 int hlim = inet->mc_ttl;
1133 break; 1131 put_cmsg(&msg, SOL_IP, IP_TTL, sizeof(hlim), &hlim);
1134 default: 1132 }
1135 release_sock(sk); 1133 len -= msg.msg_controllen;
1136 return -ENOPROTOOPT; 1134 return put_user(len, optlen);
1135 }
1136 case IP_FREEBIND:
1137 val = inet->freebind;
1138 break;
1139 default:
1140 release_sock(sk);
1141 return -ENOPROTOOPT;
1137 } 1142 }
1138 release_sock(sk); 1143 release_sock(sk);
1139 1144
1140 if (len < sizeof(int) && len > 0 && val>=0 && val<255) { 1145 if (len < sizeof(int) && len > 0 && val>=0 && val<255) {
1141 unsigned char ucval = (unsigned char)val; 1146 unsigned char ucval = (unsigned char)val;
1142 len = 1; 1147 len = 1;
1143 if(put_user(len, optlen)) 1148 if (put_user(len, optlen))
1144 return -EFAULT; 1149 return -EFAULT;
1145 if(copy_to_user(optval,&ucval,1)) 1150 if (copy_to_user(optval,&ucval,1))
1146 return -EFAULT; 1151 return -EFAULT;
1147 } else { 1152 } else {
1148 len = min_t(unsigned int, sizeof(int), len); 1153 len = min_t(unsigned int, sizeof(int), len);
1149 if(put_user(len, optlen)) 1154 if (put_user(len, optlen))
1150 return -EFAULT; 1155 return -EFAULT;
1151 if(copy_to_user(optval,&val,len)) 1156 if (copy_to_user(optval,&val,len))
1152 return -EFAULT; 1157 return -EFAULT;
1153 } 1158 }
1154 return 0; 1159 return 0;
1155} 1160}
1156 1161
1157int ip_getsockopt(struct sock *sk, int level, 1162int ip_getsockopt(struct sock *sk, int level,
1158 int optname, char __user *optval, int __user *optlen) 1163 int optname, char __user *optval, int __user *optlen)
1159{ 1164{
1160 int err; 1165 int err;
1161 1166
@@ -1169,7 +1174,7 @@ int ip_getsockopt(struct sock *sk, int level,
1169 ) { 1174 ) {
1170 int len; 1175 int len;
1171 1176
1172 if(get_user(len,optlen)) 1177 if (get_user(len,optlen))
1173 return -EFAULT; 1178 return -EFAULT;
1174 1179
1175 lock_sock(sk); 1180 lock_sock(sk);
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index aa704b88f014..ab86137c71d2 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -43,21 +43,15 @@ static LIST_HEAD(ipcomp_tfms_list);
43 43
44static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb) 44static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb)
45{ 45{
46 int err, plen, dlen;
47 struct ipcomp_data *ipcd = x->data; 46 struct ipcomp_data *ipcd = x->data;
48 u8 *start, *scratch; 47 const int plen = skb->len;
49 struct crypto_comp *tfm; 48 int dlen = IPCOMP_SCRATCH_SIZE;
50 int cpu; 49 const u8 *start = skb->data;
51 50 const int cpu = get_cpu();
52 plen = skb->len; 51 u8 *scratch = *per_cpu_ptr(ipcomp_scratches, cpu);
53 dlen = IPCOMP_SCRATCH_SIZE; 52 struct crypto_comp *tfm = *per_cpu_ptr(ipcd->tfms, cpu);
54 start = skb->data; 53 int err = crypto_comp_decompress(tfm, start, plen, scratch, &dlen);
55 54
56 cpu = get_cpu();
57 scratch = *per_cpu_ptr(ipcomp_scratches, cpu);
58 tfm = *per_cpu_ptr(ipcd->tfms, cpu);
59
60 err = crypto_comp_decompress(tfm, start, plen, scratch, &dlen);
61 if (err) 55 if (err)
62 goto out; 56 goto out;
63 57
@@ -72,7 +66,7 @@ static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb)
72 66
73 skb->truesize += dlen - plen; 67 skb->truesize += dlen - plen;
74 __skb_put(skb, dlen - plen); 68 __skb_put(skb, dlen - plen);
75 memcpy(skb->data, scratch, dlen); 69 skb_copy_to_linear_data(skb, scratch, dlen);
76out: 70out:
77 put_cpu(); 71 put_cpu();
78 return err; 72 return err;
@@ -90,10 +84,10 @@ static int ipcomp_input(struct xfrm_state *x, struct sk_buff *skb)
90 skb->ip_summed = CHECKSUM_NONE; 84 skb->ip_summed = CHECKSUM_NONE;
91 85
92 /* Remove ipcomp header and decompress original payload */ 86 /* Remove ipcomp header and decompress original payload */
93 iph = skb->nh.iph; 87 iph = ip_hdr(skb);
94 ipch = (void *)skb->data; 88 ipch = (void *)skb->data;
95 iph->protocol = ipch->nexthdr; 89 iph->protocol = ipch->nexthdr;
96 skb->h.raw = skb->nh.raw + sizeof(*ipch); 90 skb->transport_header = skb->network_header + sizeof(*ipch);
97 __skb_pull(skb, sizeof(*ipch)); 91 __skb_pull(skb, sizeof(*ipch));
98 err = ipcomp_decompress(x, skb); 92 err = ipcomp_decompress(x, skb);
99 93
@@ -103,23 +97,16 @@ out:
103 97
104static int ipcomp_compress(struct xfrm_state *x, struct sk_buff *skb) 98static int ipcomp_compress(struct xfrm_state *x, struct sk_buff *skb)
105{ 99{
106 int err, plen, dlen, ihlen;
107 struct iphdr *iph = skb->nh.iph;
108 struct ipcomp_data *ipcd = x->data; 100 struct ipcomp_data *ipcd = x->data;
109 u8 *start, *scratch; 101 const int ihlen = ip_hdrlen(skb);
110 struct crypto_comp *tfm; 102 const int plen = skb->len - ihlen;
111 int cpu; 103 int dlen = IPCOMP_SCRATCH_SIZE;
104 u8 *start = skb->data + ihlen;
105 const int cpu = get_cpu();
106 u8 *scratch = *per_cpu_ptr(ipcomp_scratches, cpu);
107 struct crypto_comp *tfm = *per_cpu_ptr(ipcd->tfms, cpu);
108 int err = crypto_comp_compress(tfm, start, plen, scratch, &dlen);
112 109
113 ihlen = iph->ihl * 4;
114 plen = skb->len - ihlen;
115 dlen = IPCOMP_SCRATCH_SIZE;
116 start = skb->data + ihlen;
117
118 cpu = get_cpu();
119 scratch = *per_cpu_ptr(ipcomp_scratches, cpu);
120 tfm = *per_cpu_ptr(ipcd->tfms, cpu);
121
122 err = crypto_comp_compress(tfm, start, plen, scratch, &dlen);
123 if (err) 110 if (err)
124 goto out; 111 goto out;
125 112
@@ -142,12 +129,11 @@ out:
142static int ipcomp_output(struct xfrm_state *x, struct sk_buff *skb) 129static int ipcomp_output(struct xfrm_state *x, struct sk_buff *skb)
143{ 130{
144 int err; 131 int err;
145 struct iphdr *iph;
146 struct ip_comp_hdr *ipch; 132 struct ip_comp_hdr *ipch;
147 struct ipcomp_data *ipcd = x->data; 133 struct ipcomp_data *ipcd = x->data;
148 int hdr_len = 0; 134 int hdr_len = 0;
135 struct iphdr *iph = ip_hdr(skb);
149 136
150 iph = skb->nh.iph;
151 iph->tot_len = htons(skb->len); 137 iph->tot_len = htons(skb->len);
152 hdr_len = iph->ihl * 4; 138 hdr_len = iph->ihl * 4;
153 if ((skb->len - hdr_len) < ipcd->threshold) { 139 if ((skb->len - hdr_len) < ipcd->threshold) {
@@ -159,7 +145,7 @@ static int ipcomp_output(struct xfrm_state *x, struct sk_buff *skb)
159 goto out_ok; 145 goto out_ok;
160 146
161 err = ipcomp_compress(x, skb); 147 err = ipcomp_compress(x, skb);
162 iph = skb->nh.iph; 148 iph = ip_hdr(skb);
163 149
164 if (err) { 150 if (err) {
165 goto out_ok; 151 goto out_ok;
@@ -188,8 +174,8 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info)
188 struct ip_comp_hdr *ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2)); 174 struct ip_comp_hdr *ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2));
189 struct xfrm_state *x; 175 struct xfrm_state *x;
190 176
191 if (skb->h.icmph->type != ICMP_DEST_UNREACH || 177 if (icmp_hdr(skb)->type != ICMP_DEST_UNREACH ||
192 skb->h.icmph->code != ICMP_FRAG_NEEDED) 178 icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
193 return; 179 return;
194 180
195 spi = htonl(ntohs(ipch->cpi)); 181 spi = htonl(ntohs(ipch->cpi));
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index cf49de1a4983..597c800b2fdc 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -432,7 +432,7 @@ ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
432 goto drop; 432 goto drop;
433 433
434 /* Basic sanity checks can be done without the lock. */ 434 /* Basic sanity checks can be done without the lock. */
435 rarp = (struct arphdr *)skb->h.raw; 435 rarp = (struct arphdr *)skb_transport_header(skb);
436 436
437 /* If this test doesn't pass, it's not IP, or we should 437 /* If this test doesn't pass, it's not IP, or we should
438 * ignore it anyway. 438 * ignore it anyway.
@@ -455,7 +455,7 @@ ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
455 goto drop; 455 goto drop;
456 456
457 /* OK, it is all there and looks valid, process... */ 457 /* OK, it is all there and looks valid, process... */
458 rarp = (struct arphdr *)skb->h.raw; 458 rarp = (struct arphdr *)skb_transport_header(skb);
459 rarp_ptr = (unsigned char *) (rarp + 1); 459 rarp_ptr = (unsigned char *) (rarp + 1);
460 460
461 /* One reply at a time, please. */ 461 /* One reply at a time, please. */
@@ -702,7 +702,8 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d
702 memset(b, 0, sizeof(struct bootp_pkt)); 702 memset(b, 0, sizeof(struct bootp_pkt));
703 703
704 /* Construct IP header */ 704 /* Construct IP header */
705 skb->nh.iph = h = &b->iph; 705 skb_reset_network_header(skb);
706 h = ip_hdr(skb);
706 h->version = 4; 707 h->version = 4;
707 h->ihl = 5; 708 h->ihl = 5;
708 h->tot_len = htons(sizeof(struct bootp_pkt)); 709 h->tot_len = htons(sizeof(struct bootp_pkt));
@@ -782,7 +783,7 @@ static void __init ic_do_bootp_ext(u8 *ext)
782 u8 *c; 783 u8 *c;
783 784
784 printk("DHCP/BOOTP: Got extension %d:",*ext); 785 printk("DHCP/BOOTP: Got extension %d:",*ext);
785 for(c=ext+2; c<ext+2+ext[1]; c++) 786 for (c=ext+2; c<ext+2+ext[1]; c++)
786 printk(" %02x", *c); 787 printk(" %02x", *c);
787 printk("\n"); 788 printk("\n");
788#endif 789#endif
@@ -845,7 +846,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
845 sizeof(struct udphdr))) 846 sizeof(struct udphdr)))
846 goto drop; 847 goto drop;
847 848
848 b = (struct bootp_pkt *) skb->nh.iph; 849 b = (struct bootp_pkt *)skb_network_header(skb);
849 h = &b->iph; 850 h = &b->iph;
850 851
851 if (h->ihl != 5 || h->version != 4 || h->protocol != IPPROTO_UDP) 852 if (h->ihl != 5 || h->version != 4 || h->protocol != IPPROTO_UDP)
@@ -883,7 +884,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
883 if (!pskb_may_pull(skb, skb->len)) 884 if (!pskb_may_pull(skb, skb->len))
884 goto drop; 885 goto drop;
885 886
886 b = (struct bootp_pkt *) skb->nh.iph; 887 b = (struct bootp_pkt *)skb_network_header(skb);
887 h = &b->iph; 888 h = &b->iph;
888 889
889 /* One reply at a time, please. */ 890 /* One reply at a time, please. */
@@ -938,7 +939,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
938 if (opt[1] >= 4) 939 if (opt[1] >= 4)
939 memcpy(&server_id, opt + 2, 4); 940 memcpy(&server_id, opt + 2, 4);
940 break; 941 break;
941 }; 942 }
942 } 943 }
943 944
944#ifdef IPCONFIG_DEBUG 945#ifdef IPCONFIG_DEBUG
@@ -983,7 +984,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
983 ic_myaddr = NONE; 984 ic_myaddr = NONE;
984 ic_servaddr = NONE; 985 ic_servaddr = NONE;
985 goto drop_unlock; 986 goto drop_unlock;
986 }; 987 }
987 988
988 ic_dhcp_msgtype = mt; 989 ic_dhcp_msgtype = mt;
989 990
@@ -1094,7 +1095,7 @@ static int __init ic_dynamic(void)
1094 retries = CONF_SEND_RETRIES; 1095 retries = CONF_SEND_RETRIES;
1095 get_random_bytes(&timeout, sizeof(timeout)); 1096 get_random_bytes(&timeout, sizeof(timeout));
1096 timeout = CONF_BASE_TIMEOUT + (timeout % (unsigned) CONF_TIMEOUT_RANDOM); 1097 timeout = CONF_BASE_TIMEOUT + (timeout % (unsigned) CONF_TIMEOUT_RANDOM);
1097 for(;;) { 1098 for (;;) {
1098#ifdef IPCONFIG_BOOTP 1099#ifdef IPCONFIG_BOOTP
1099 if (do_bootp && (d->able & IC_BOOTP)) 1100 if (do_bootp && (d->able & IC_BOOTP))
1100 ic_bootp_send_if(d, jiffies - start_jiffies); 1101 ic_bootp_send_if(d, jiffies - start_jiffies);
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 3ec5ce0f5498..ebd2f2d532f6 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -157,10 +157,10 @@ static struct ip_tunnel * ipip_tunnel_lookup(__be32 remote, __be32 local)
157 return NULL; 157 return NULL;
158} 158}
159 159
160static struct ip_tunnel **ipip_bucket(struct ip_tunnel *t) 160static struct ip_tunnel **__ipip_bucket(struct ip_tunnel_parm *parms)
161{ 161{
162 __be32 remote = t->parms.iph.daddr; 162 __be32 remote = parms->iph.daddr;
163 __be32 local = t->parms.iph.saddr; 163 __be32 local = parms->iph.saddr;
164 unsigned h = 0; 164 unsigned h = 0;
165 int prio = 0; 165 int prio = 0;
166 166
@@ -175,6 +175,10 @@ static struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
175 return &tunnels[prio][h]; 175 return &tunnels[prio][h];
176} 176}
177 177
178static inline struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
179{
180 return __ipip_bucket(&t->parms);
181}
178 182
179static void ipip_tunnel_unlink(struct ip_tunnel *t) 183static void ipip_tunnel_unlink(struct ip_tunnel *t)
180{ 184{
@@ -206,19 +210,9 @@ static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int c
206 __be32 local = parms->iph.saddr; 210 __be32 local = parms->iph.saddr;
207 struct ip_tunnel *t, **tp, *nt; 211 struct ip_tunnel *t, **tp, *nt;
208 struct net_device *dev; 212 struct net_device *dev;
209 unsigned h = 0;
210 int prio = 0;
211 char name[IFNAMSIZ]; 213 char name[IFNAMSIZ];
212 214
213 if (remote) { 215 for (tp = __ipip_bucket(parms); (t = *tp) != NULL; tp = &t->next) {
214 prio |= 2;
215 h ^= HASH(remote);
216 }
217 if (local) {
218 prio |= 1;
219 h ^= HASH(local);
220 }
221 for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
222 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) 216 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
223 return t; 217 return t;
224 } 218 }
@@ -280,8 +274,8 @@ static int ipip_err(struct sk_buff *skb, u32 info)
280 ICMP in the real Internet is absolutely infeasible. 274 ICMP in the real Internet is absolutely infeasible.
281 */ 275 */
282 struct iphdr *iph = (struct iphdr*)skb->data; 276 struct iphdr *iph = (struct iphdr*)skb->data;
283 int type = skb->h.icmph->type; 277 const int type = icmp_hdr(skb)->type;
284 int code = skb->h.icmph->code; 278 const int code = icmp_hdr(skb)->code;
285 struct ip_tunnel *t; 279 struct ip_tunnel *t;
286 int err; 280 int err;
287 281
@@ -336,8 +330,8 @@ out:
336 struct iphdr *iph = (struct iphdr*)dp; 330 struct iphdr *iph = (struct iphdr*)dp;
337 int hlen = iph->ihl<<2; 331 int hlen = iph->ihl<<2;
338 struct iphdr *eiph; 332 struct iphdr *eiph;
339 int type = skb->h.icmph->type; 333 const int type = icmp_hdr(skb)->type;
340 int code = skb->h.icmph->code; 334 const int code = icmp_hdr(skb)->code;
341 int rel_type = 0; 335 int rel_type = 0;
342 int rel_code = 0; 336 int rel_code = 0;
343 __be32 rel_info = 0; 337 __be32 rel_info = 0;
@@ -354,7 +348,7 @@ out:
354 default: 348 default:
355 return 0; 349 return 0;
356 case ICMP_PARAMETERPROB: 350 case ICMP_PARAMETERPROB:
357 n = ntohl(skb->h.icmph->un.gateway) >> 24; 351 n = ntohl(icmp_hdr(skb)->un.gateway) >> 24;
358 if (n < hlen) 352 if (n < hlen)
359 return 0; 353 return 0;
360 354
@@ -373,7 +367,7 @@ out:
373 return 0; 367 return 0;
374 case ICMP_FRAG_NEEDED: 368 case ICMP_FRAG_NEEDED:
375 /* And it is the only really necessary thing :-) */ 369 /* And it is the only really necessary thing :-) */
376 n = ntohs(skb->h.icmph->un.frag.mtu); 370 n = ntohs(icmp_hdr(skb)->un.frag.mtu);
377 if (n < hlen+68) 371 if (n < hlen+68)
378 return 0; 372 return 0;
379 n -= hlen; 373 n -= hlen;
@@ -405,7 +399,7 @@ out:
405 dst_release(skb2->dst); 399 dst_release(skb2->dst);
406 skb2->dst = NULL; 400 skb2->dst = NULL;
407 skb_pull(skb2, skb->data - (u8*)eiph); 401 skb_pull(skb2, skb->data - (u8*)eiph);
408 skb2->nh.raw = skb2->data; 402 skb_reset_network_header(skb2);
409 403
410 /* Try to guess incoming interface */ 404 /* Try to guess incoming interface */
411 memset(&fl, 0, sizeof(fl)); 405 memset(&fl, 0, sizeof(fl));
@@ -461,9 +455,10 @@ out:
461#endif 455#endif
462} 456}
463 457
464static inline void ipip_ecn_decapsulate(struct iphdr *outer_iph, struct sk_buff *skb) 458static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
459 struct sk_buff *skb)
465{ 460{
466 struct iphdr *inner_iph = skb->nh.iph; 461 struct iphdr *inner_iph = ip_hdr(skb);
467 462
468 if (INET_ECN_is_ce(outer_iph->tos)) 463 if (INET_ECN_is_ce(outer_iph->tos))
469 IP_ECN_set_ce(inner_iph); 464 IP_ECN_set_ce(inner_iph);
@@ -471,10 +466,8 @@ static inline void ipip_ecn_decapsulate(struct iphdr *outer_iph, struct sk_buff
471 466
472static int ipip_rcv(struct sk_buff *skb) 467static int ipip_rcv(struct sk_buff *skb)
473{ 468{
474 struct iphdr *iph;
475 struct ip_tunnel *tunnel; 469 struct ip_tunnel *tunnel;
476 470 const struct iphdr *iph = ip_hdr(skb);
477 iph = skb->nh.iph;
478 471
479 read_lock(&ipip_lock); 472 read_lock(&ipip_lock);
480 if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) { 473 if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
@@ -486,8 +479,8 @@ static int ipip_rcv(struct sk_buff *skb)
486 479
487 secpath_reset(skb); 480 secpath_reset(skb);
488 481
489 skb->mac.raw = skb->nh.raw; 482 skb->mac_header = skb->network_header;
490 skb->nh.raw = skb->data; 483 skb_reset_network_header(skb);
491 skb->protocol = htons(ETH_P_IP); 484 skb->protocol = htons(ETH_P_IP);
492 skb->pkt_type = PACKET_HOST; 485 skb->pkt_type = PACKET_HOST;
493 486
@@ -521,7 +514,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
521 __be16 df = tiph->frag_off; 514 __be16 df = tiph->frag_off;
522 struct rtable *rt; /* Route to the other host */ 515 struct rtable *rt; /* Route to the other host */
523 struct net_device *tdev; /* Device to other host */ 516 struct net_device *tdev; /* Device to other host */
524 struct iphdr *old_iph = skb->nh.iph; 517 struct iphdr *old_iph = ip_hdr(skb);
525 struct iphdr *iph; /* Our new IP header */ 518 struct iphdr *iph; /* Our new IP header */
526 int max_headroom; /* The extra header space needed */ 519 int max_headroom; /* The extra header space needed */
527 __be32 dst = tiph->daddr; 520 __be32 dst = tiph->daddr;
@@ -615,11 +608,12 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
615 skb_set_owner_w(new_skb, skb->sk); 608 skb_set_owner_w(new_skb, skb->sk);
616 dev_kfree_skb(skb); 609 dev_kfree_skb(skb);
617 skb = new_skb; 610 skb = new_skb;
618 old_iph = skb->nh.iph; 611 old_iph = ip_hdr(skb);
619 } 612 }
620 613
621 skb->h.raw = skb->nh.raw; 614 skb->transport_header = skb->network_header;
622 skb->nh.raw = skb_push(skb, sizeof(struct iphdr)); 615 skb_push(skb, sizeof(struct iphdr));
616 skb_reset_network_header(skb);
623 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 617 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
624 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | 618 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
625 IPSKB_REROUTED); 619 IPSKB_REROUTED);
@@ -630,7 +624,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
630 * Push down and install the IPIP header. 624 * Push down and install the IPIP header.
631 */ 625 */
632 626
633 iph = skb->nh.iph; 627 iph = ip_hdr(skb);
634 iph->version = 4; 628 iph->version = 4;
635 iph->ihl = sizeof(struct iphdr)>>2; 629 iph->ihl = sizeof(struct iphdr)>>2;
636 iph->frag_off = df; 630 iph->frag_off = df;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 601e3df69258..0ebae413ae87 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -62,6 +62,7 @@
62#include <linux/netfilter_ipv4.h> 62#include <linux/netfilter_ipv4.h>
63#include <net/ipip.h> 63#include <net/ipip.h>
64#include <net/checksum.h> 64#include <net/checksum.h>
65#include <net/netlink.h>
65 66
66#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) 67#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
67#define CONFIG_IP_PIMSM 1 68#define CONFIG_IP_PIMSM 1
@@ -302,8 +303,8 @@ static void ipmr_destroy_unres(struct mfc_cache *c)
302 303
303 atomic_dec(&cache_resolve_queue_len); 304 atomic_dec(&cache_resolve_queue_len);
304 305
305 while((skb=skb_dequeue(&c->mfc_un.unres.unresolved))) { 306 while ((skb=skb_dequeue(&c->mfc_un.unres.unresolved))) {
306 if (skb->nh.iph->version == 0) { 307 if (ip_hdr(skb)->version == 0) {
307 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); 308 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
308 nlh->nlmsg_type = NLMSG_ERROR; 309 nlh->nlmsg_type = NLMSG_ERROR;
309 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); 310 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
@@ -479,7 +480,7 @@ static struct mfc_cache *ipmr_cache_find(__be32 origin, __be32 mcastgrp)
479static struct mfc_cache *ipmr_cache_alloc(void) 480static struct mfc_cache *ipmr_cache_alloc(void)
480{ 481{
481 struct mfc_cache *c=kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); 482 struct mfc_cache *c=kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
482 if(c==NULL) 483 if (c==NULL)
483 return NULL; 484 return NULL;
484 c->mfc_un.res.minvif = MAXVIFS; 485 c->mfc_un.res.minvif = MAXVIFS;
485 return c; 486 return c;
@@ -488,7 +489,7 @@ static struct mfc_cache *ipmr_cache_alloc(void)
488static struct mfc_cache *ipmr_cache_alloc_unres(void) 489static struct mfc_cache *ipmr_cache_alloc_unres(void)
489{ 490{
490 struct mfc_cache *c=kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); 491 struct mfc_cache *c=kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
491 if(c==NULL) 492 if (c==NULL)
492 return NULL; 493 return NULL;
493 skb_queue_head_init(&c->mfc_un.unres.unresolved); 494 skb_queue_head_init(&c->mfc_un.unres.unresolved);
494 c->mfc_un.unres.expires = jiffies + 10*HZ; 495 c->mfc_un.unres.expires = jiffies + 10*HZ;
@@ -508,12 +509,13 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
508 * Play the pending entries through our router 509 * Play the pending entries through our router
509 */ 510 */
510 511
511 while((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) { 512 while ((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) {
512 if (skb->nh.iph->version == 0) { 513 if (ip_hdr(skb)->version == 0) {
513 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); 514 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
514 515
515 if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) { 516 if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
516 nlh->nlmsg_len = skb->tail - (u8*)nlh; 517 nlh->nlmsg_len = (skb_tail_pointer(skb) -
518 (u8 *)nlh);
517 } else { 519 } else {
518 nlh->nlmsg_type = NLMSG_ERROR; 520 nlh->nlmsg_type = NLMSG_ERROR;
519 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); 521 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
@@ -539,7 +541,7 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
539static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert) 541static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
540{ 542{
541 struct sk_buff *skb; 543 struct sk_buff *skb;
542 int ihl = pkt->nh.iph->ihl<<2; 544 const int ihl = ip_hdrlen(pkt);
543 struct igmphdr *igmp; 545 struct igmphdr *igmp;
544 struct igmpmsg *msg; 546 struct igmpmsg *msg;
545 int ret; 547 int ret;
@@ -551,7 +553,7 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
551#endif 553#endif
552 skb = alloc_skb(128, GFP_ATOMIC); 554 skb = alloc_skb(128, GFP_ATOMIC);
553 555
554 if(!skb) 556 if (!skb)
555 return -ENOBUFS; 557 return -ENOBUFS;
556 558
557#ifdef CONFIG_IP_PIMSM 559#ifdef CONFIG_IP_PIMSM
@@ -561,14 +563,17 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
561 And all this only to mangle msg->im_msgtype and 563 And all this only to mangle msg->im_msgtype and
562 to set msg->im_mbz to "mbz" :-) 564 to set msg->im_mbz to "mbz" :-)
563 */ 565 */
564 msg = (struct igmpmsg*)skb_push(skb, sizeof(struct iphdr)); 566 skb_push(skb, sizeof(struct iphdr));
565 skb->nh.raw = skb->h.raw = (u8*)msg; 567 skb_reset_network_header(skb);
566 memcpy(msg, pkt->nh.raw, sizeof(struct iphdr)); 568 skb_reset_transport_header(skb);
569 msg = (struct igmpmsg *)skb_network_header(skb);
570 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
567 msg->im_msgtype = IGMPMSG_WHOLEPKT; 571 msg->im_msgtype = IGMPMSG_WHOLEPKT;
568 msg->im_mbz = 0; 572 msg->im_mbz = 0;
569 msg->im_vif = reg_vif_num; 573 msg->im_vif = reg_vif_num;
570 skb->nh.iph->ihl = sizeof(struct iphdr) >> 2; 574 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
571 skb->nh.iph->tot_len = htons(ntohs(pkt->nh.iph->tot_len) + sizeof(struct iphdr)); 575 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
576 sizeof(struct iphdr));
572 } else 577 } else
573#endif 578#endif
574 { 579 {
@@ -577,10 +582,11 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
577 * Copy the IP header 582 * Copy the IP header
578 */ 583 */
579 584
580 skb->nh.iph = (struct iphdr *)skb_put(skb, ihl); 585 skb->network_header = skb->tail;
581 memcpy(skb->data,pkt->data,ihl); 586 skb_put(skb, ihl);
582 skb->nh.iph->protocol = 0; /* Flag to the kernel this is a route add */ 587 skb_copy_to_linear_data(skb, pkt->data, ihl);
583 msg = (struct igmpmsg*)skb->nh.iph; 588 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */
589 msg = (struct igmpmsg *)skb_network_header(skb);
584 msg->im_vif = vifi; 590 msg->im_vif = vifi;
585 skb->dst = dst_clone(pkt->dst); 591 skb->dst = dst_clone(pkt->dst);
586 592
@@ -592,8 +598,8 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
592 igmp->type = 598 igmp->type =
593 msg->im_msgtype = assert; 599 msg->im_msgtype = assert;
594 igmp->code = 0; 600 igmp->code = 0;
595 skb->nh.iph->tot_len=htons(skb->len); /* Fix the length */ 601 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
596 skb->h.raw = skb->nh.raw; 602 skb->transport_header = skb->network_header;
597 } 603 }
598 604
599 if (mroute_socket == NULL) { 605 if (mroute_socket == NULL) {
@@ -622,11 +628,12 @@ ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
622{ 628{
623 int err; 629 int err;
624 struct mfc_cache *c; 630 struct mfc_cache *c;
631 const struct iphdr *iph = ip_hdr(skb);
625 632
626 spin_lock_bh(&mfc_unres_lock); 633 spin_lock_bh(&mfc_unres_lock);
627 for (c=mfc_unres_queue; c; c=c->next) { 634 for (c=mfc_unres_queue; c; c=c->next) {
628 if (c->mfc_mcastgrp == skb->nh.iph->daddr && 635 if (c->mfc_mcastgrp == iph->daddr &&
629 c->mfc_origin == skb->nh.iph->saddr) 636 c->mfc_origin == iph->saddr)
630 break; 637 break;
631 } 638 }
632 639
@@ -646,9 +653,9 @@ ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
646 /* 653 /*
647 * Fill in the new cache entry 654 * Fill in the new cache entry
648 */ 655 */
649 c->mfc_parent=-1; 656 c->mfc_parent = -1;
650 c->mfc_origin=skb->nh.iph->saddr; 657 c->mfc_origin = iph->saddr;
651 c->mfc_mcastgrp=skb->nh.iph->daddr; 658 c->mfc_mcastgrp = iph->daddr;
652 659
653 /* 660 /*
654 * Reflect first query at mrouted. 661 * Reflect first query at mrouted.
@@ -734,7 +741,7 @@ static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
734 return 0; 741 return 0;
735 } 742 }
736 743
737 if(!MULTICAST(mfc->mfcc_mcastgrp.s_addr)) 744 if (!MULTICAST(mfc->mfcc_mcastgrp.s_addr))
738 return -EINVAL; 745 return -EINVAL;
739 746
740 c=ipmr_cache_alloc(); 747 c=ipmr_cache_alloc();
@@ -788,7 +795,7 @@ static void mroute_clean_tables(struct sock *sk)
788 /* 795 /*
789 * Shut down all active vif entries 796 * Shut down all active vif entries
790 */ 797 */
791 for(i=0; i<maxvif; i++) { 798 for (i=0; i<maxvif; i++) {
792 if (!(vif_table[i].flags&VIFF_STATIC)) 799 if (!(vif_table[i].flags&VIFF_STATIC))
793 vif_delete(i); 800 vif_delete(i);
794 } 801 }
@@ -858,119 +865,117 @@ int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int opt
858 struct vifctl vif; 865 struct vifctl vif;
859 struct mfcctl mfc; 866 struct mfcctl mfc;
860 867
861 if(optname!=MRT_INIT) 868 if (optname != MRT_INIT) {
862 { 869 if (sk != mroute_socket && !capable(CAP_NET_ADMIN))
863 if(sk!=mroute_socket && !capable(CAP_NET_ADMIN))
864 return -EACCES; 870 return -EACCES;
865 } 871 }
866 872
867 switch(optname) 873 switch (optname) {
868 { 874 case MRT_INIT:
869 case MRT_INIT: 875 if (sk->sk_type != SOCK_RAW ||
870 if (sk->sk_type != SOCK_RAW || 876 inet_sk(sk)->num != IPPROTO_IGMP)
871 inet_sk(sk)->num != IPPROTO_IGMP) 877 return -EOPNOTSUPP;
872 return -EOPNOTSUPP; 878 if (optlen!=sizeof(int))
873 if(optlen!=sizeof(int)) 879 return -ENOPROTOOPT;
874 return -ENOPROTOOPT;
875
876 rtnl_lock();
877 if (mroute_socket) {
878 rtnl_unlock();
879 return -EADDRINUSE;
880 }
881
882 ret = ip_ra_control(sk, 1, mrtsock_destruct);
883 if (ret == 0) {
884 write_lock_bh(&mrt_lock);
885 mroute_socket=sk;
886 write_unlock_bh(&mrt_lock);
887 880
888 ipv4_devconf.mc_forwarding++; 881 rtnl_lock();
889 } 882 if (mroute_socket) {
890 rtnl_unlock(); 883 rtnl_unlock();
891 return ret; 884 return -EADDRINUSE;
892 case MRT_DONE: 885 }
893 if (sk!=mroute_socket) 886
894 return -EACCES; 887 ret = ip_ra_control(sk, 1, mrtsock_destruct);
895 return ip_ra_control(sk, 0, NULL); 888 if (ret == 0) {
896 case MRT_ADD_VIF: 889 write_lock_bh(&mrt_lock);
897 case MRT_DEL_VIF: 890 mroute_socket=sk;
898 if(optlen!=sizeof(vif)) 891 write_unlock_bh(&mrt_lock);
899 return -EINVAL; 892
900 if (copy_from_user(&vif,optval,sizeof(vif))) 893 ipv4_devconf.mc_forwarding++;
901 return -EFAULT; 894 }
902 if(vif.vifc_vifi >= MAXVIFS) 895 rtnl_unlock();
903 return -ENFILE; 896 return ret;
904 rtnl_lock(); 897 case MRT_DONE:
905 if (optname==MRT_ADD_VIF) { 898 if (sk!=mroute_socket)
906 ret = vif_add(&vif, sk==mroute_socket); 899 return -EACCES;
907 } else { 900 return ip_ra_control(sk, 0, NULL);
908 ret = vif_delete(vif.vifc_vifi); 901 case MRT_ADD_VIF:
909 } 902 case MRT_DEL_VIF:
910 rtnl_unlock(); 903 if (optlen!=sizeof(vif))
911 return ret; 904 return -EINVAL;
905 if (copy_from_user(&vif,optval,sizeof(vif)))
906 return -EFAULT;
907 if (vif.vifc_vifi >= MAXVIFS)
908 return -ENFILE;
909 rtnl_lock();
910 if (optname==MRT_ADD_VIF) {
911 ret = vif_add(&vif, sk==mroute_socket);
912 } else {
913 ret = vif_delete(vif.vifc_vifi);
914 }
915 rtnl_unlock();
916 return ret;
912 917
913 /* 918 /*
914 * Manipulate the forwarding caches. These live 919 * Manipulate the forwarding caches. These live
915 * in a sort of kernel/user symbiosis. 920 * in a sort of kernel/user symbiosis.
916 */ 921 */
917 case MRT_ADD_MFC: 922 case MRT_ADD_MFC:
918 case MRT_DEL_MFC: 923 case MRT_DEL_MFC:
919 if(optlen!=sizeof(mfc)) 924 if (optlen!=sizeof(mfc))
920 return -EINVAL; 925 return -EINVAL;
921 if (copy_from_user(&mfc,optval, sizeof(mfc))) 926 if (copy_from_user(&mfc,optval, sizeof(mfc)))
922 return -EFAULT; 927 return -EFAULT;
923 rtnl_lock(); 928 rtnl_lock();
924 if (optname==MRT_DEL_MFC) 929 if (optname==MRT_DEL_MFC)
925 ret = ipmr_mfc_delete(&mfc); 930 ret = ipmr_mfc_delete(&mfc);
926 else 931 else
927 ret = ipmr_mfc_add(&mfc, sk==mroute_socket); 932 ret = ipmr_mfc_add(&mfc, sk==mroute_socket);
928 rtnl_unlock(); 933 rtnl_unlock();
929 return ret; 934 return ret;
930 /* 935 /*
931 * Control PIM assert. 936 * Control PIM assert.
932 */ 937 */
933 case MRT_ASSERT: 938 case MRT_ASSERT:
934 { 939 {
935 int v; 940 int v;
936 if(get_user(v,(int __user *)optval)) 941 if (get_user(v,(int __user *)optval))
937 return -EFAULT; 942 return -EFAULT;
938 mroute_do_assert=(v)?1:0; 943 mroute_do_assert=(v)?1:0;
939 return 0; 944 return 0;
940 } 945 }
941#ifdef CONFIG_IP_PIMSM 946#ifdef CONFIG_IP_PIMSM
942 case MRT_PIM: 947 case MRT_PIM:
943 { 948 {
944 int v, ret; 949 int v, ret;
945 if(get_user(v,(int __user *)optval)) 950 if (get_user(v,(int __user *)optval))
946 return -EFAULT; 951 return -EFAULT;
947 v = (v)?1:0; 952 v = (v)?1:0;
948 rtnl_lock(); 953 rtnl_lock();
949 ret = 0; 954 ret = 0;
950 if (v != mroute_do_pim) { 955 if (v != mroute_do_pim) {
951 mroute_do_pim = v; 956 mroute_do_pim = v;
952 mroute_do_assert = v; 957 mroute_do_assert = v;
953#ifdef CONFIG_IP_PIMSM_V2 958#ifdef CONFIG_IP_PIMSM_V2
954 if (mroute_do_pim) 959 if (mroute_do_pim)
955 ret = inet_add_protocol(&pim_protocol, 960 ret = inet_add_protocol(&pim_protocol,
956 IPPROTO_PIM); 961 IPPROTO_PIM);
957 else 962 else
958 ret = inet_del_protocol(&pim_protocol, 963 ret = inet_del_protocol(&pim_protocol,
959 IPPROTO_PIM); 964 IPPROTO_PIM);
960 if (ret < 0) 965 if (ret < 0)
961 ret = -EAGAIN; 966 ret = -EAGAIN;
962#endif 967#endif
963 }
964 rtnl_unlock();
965 return ret;
966 } 968 }
969 rtnl_unlock();
970 return ret;
971 }
967#endif 972#endif
968 /* 973 /*
969 * Spurious command, or MRT_VERSION which you cannot 974 * Spurious command, or MRT_VERSION which you cannot
970 * set. 975 * set.
971 */ 976 */
972 default: 977 default:
973 return -ENOPROTOOPT; 978 return -ENOPROTOOPT;
974 } 979 }
975} 980}
976 981
@@ -983,7 +988,7 @@ int ip_mroute_getsockopt(struct sock *sk,int optname,char __user *optval,int __u
983 int olr; 988 int olr;
984 int val; 989 int val;
985 990
986 if(optname!=MRT_VERSION && 991 if (optname!=MRT_VERSION &&
987#ifdef CONFIG_IP_PIMSM 992#ifdef CONFIG_IP_PIMSM
988 optname!=MRT_PIM && 993 optname!=MRT_PIM &&
989#endif 994#endif
@@ -997,17 +1002,17 @@ int ip_mroute_getsockopt(struct sock *sk,int optname,char __user *optval,int __u
997 if (olr < 0) 1002 if (olr < 0)
998 return -EINVAL; 1003 return -EINVAL;
999 1004
1000 if(put_user(olr,optlen)) 1005 if (put_user(olr,optlen))
1001 return -EFAULT; 1006 return -EFAULT;
1002 if(optname==MRT_VERSION) 1007 if (optname==MRT_VERSION)
1003 val=0x0305; 1008 val=0x0305;
1004#ifdef CONFIG_IP_PIMSM 1009#ifdef CONFIG_IP_PIMSM
1005 else if(optname==MRT_PIM) 1010 else if (optname==MRT_PIM)
1006 val=mroute_do_pim; 1011 val=mroute_do_pim;
1007#endif 1012#endif
1008 else 1013 else
1009 val=mroute_do_assert; 1014 val=mroute_do_assert;
1010 if(copy_to_user(optval,&val,olr)) 1015 if (copy_to_user(optval,&val,olr))
1011 return -EFAULT; 1016 return -EFAULT;
1012 return 0; 1017 return 0;
1013} 1018}
@@ -1023,48 +1028,47 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1023 struct vif_device *vif; 1028 struct vif_device *vif;
1024 struct mfc_cache *c; 1029 struct mfc_cache *c;
1025 1030
1026 switch(cmd) 1031 switch (cmd) {
1027 { 1032 case SIOCGETVIFCNT:
1028 case SIOCGETVIFCNT: 1033 if (copy_from_user(&vr,arg,sizeof(vr)))
1029 if (copy_from_user(&vr,arg,sizeof(vr))) 1034 return -EFAULT;
1030 return -EFAULT; 1035 if (vr.vifi>=maxvif)
1031 if(vr.vifi>=maxvif) 1036 return -EINVAL;
1032 return -EINVAL; 1037 read_lock(&mrt_lock);
1033 read_lock(&mrt_lock); 1038 vif=&vif_table[vr.vifi];
1034 vif=&vif_table[vr.vifi]; 1039 if (VIF_EXISTS(vr.vifi)) {
1035 if(VIF_EXISTS(vr.vifi)) { 1040 vr.icount=vif->pkt_in;
1036 vr.icount=vif->pkt_in; 1041 vr.ocount=vif->pkt_out;
1037 vr.ocount=vif->pkt_out; 1042 vr.ibytes=vif->bytes_in;
1038 vr.ibytes=vif->bytes_in; 1043 vr.obytes=vif->bytes_out;
1039 vr.obytes=vif->bytes_out;
1040 read_unlock(&mrt_lock);
1041
1042 if (copy_to_user(arg,&vr,sizeof(vr)))
1043 return -EFAULT;
1044 return 0;
1045 }
1046 read_unlock(&mrt_lock); 1044 read_unlock(&mrt_lock);
1047 return -EADDRNOTAVAIL;
1048 case SIOCGETSGCNT:
1049 if (copy_from_user(&sr,arg,sizeof(sr)))
1050 return -EFAULT;
1051 1045
1052 read_lock(&mrt_lock); 1046 if (copy_to_user(arg,&vr,sizeof(vr)))
1053 c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr); 1047 return -EFAULT;
1054 if (c) { 1048 return 0;
1055 sr.pktcnt = c->mfc_un.res.pkt; 1049 }
1056 sr.bytecnt = c->mfc_un.res.bytes; 1050 read_unlock(&mrt_lock);
1057 sr.wrong_if = c->mfc_un.res.wrong_if; 1051 return -EADDRNOTAVAIL;
1058 read_unlock(&mrt_lock); 1052 case SIOCGETSGCNT:
1059 1053 if (copy_from_user(&sr,arg,sizeof(sr)))
1060 if (copy_to_user(arg,&sr,sizeof(sr))) 1054 return -EFAULT;
1061 return -EFAULT; 1055
1062 return 0; 1056 read_lock(&mrt_lock);
1063 } 1057 c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr);
1058 if (c) {
1059 sr.pktcnt = c->mfc_un.res.pkt;
1060 sr.bytecnt = c->mfc_un.res.bytes;
1061 sr.wrong_if = c->mfc_un.res.wrong_if;
1064 read_unlock(&mrt_lock); 1062 read_unlock(&mrt_lock);
1065 return -EADDRNOTAVAIL; 1063
1066 default: 1064 if (copy_to_user(arg,&sr,sizeof(sr)))
1067 return -ENOIOCTLCMD; 1065 return -EFAULT;
1066 return 0;
1067 }
1068 read_unlock(&mrt_lock);
1069 return -EADDRNOTAVAIL;
1070 default:
1071 return -ENOIOCTLCMD;
1068 } 1072 }
1069} 1073}
1070 1074
@@ -1076,7 +1080,7 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v
1076 if (event != NETDEV_UNREGISTER) 1080 if (event != NETDEV_UNREGISTER)
1077 return NOTIFY_DONE; 1081 return NOTIFY_DONE;
1078 v=&vif_table[0]; 1082 v=&vif_table[0];
1079 for(ct=0;ct<maxvif;ct++,v++) { 1083 for (ct=0;ct<maxvif;ct++,v++) {
1080 if (v->dev==ptr) 1084 if (v->dev==ptr)
1081 vif_delete(ct); 1085 vif_delete(ct);
1082 } 1086 }
@@ -1096,11 +1100,17 @@ static struct notifier_block ip_mr_notifier={
1096 1100
1097static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr) 1101static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1098{ 1102{
1099 struct iphdr *iph = (struct iphdr *)skb_push(skb,sizeof(struct iphdr)); 1103 struct iphdr *iph;
1104 struct iphdr *old_iph = ip_hdr(skb);
1105
1106 skb_push(skb, sizeof(struct iphdr));
1107 skb->transport_header = skb->network_header;
1108 skb_reset_network_header(skb);
1109 iph = ip_hdr(skb);
1100 1110
1101 iph->version = 4; 1111 iph->version = 4;
1102 iph->tos = skb->nh.iph->tos; 1112 iph->tos = old_iph->tos;
1103 iph->ttl = skb->nh.iph->ttl; 1113 iph->ttl = old_iph->ttl;
1104 iph->frag_off = 0; 1114 iph->frag_off = 0;
1105 iph->daddr = daddr; 1115 iph->daddr = daddr;
1106 iph->saddr = saddr; 1116 iph->saddr = saddr;
@@ -1110,8 +1120,6 @@ static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1110 ip_select_ident(iph, skb->dst, NULL); 1120 ip_select_ident(iph, skb->dst, NULL);
1111 ip_send_check(iph); 1121 ip_send_check(iph);
1112 1122
1113 skb->h.ipiph = skb->nh.iph;
1114 skb->nh.iph = iph;
1115 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 1123 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1116 nf_reset(skb); 1124 nf_reset(skb);
1117} 1125}
@@ -1134,7 +1142,7 @@ static inline int ipmr_forward_finish(struct sk_buff *skb)
1134 1142
1135static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) 1143static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1136{ 1144{
1137 struct iphdr *iph = skb->nh.iph; 1145 const struct iphdr *iph = ip_hdr(skb);
1138 struct vif_device *vif = &vif_table[vifi]; 1146 struct vif_device *vif = &vif_table[vifi];
1139 struct net_device *dev; 1147 struct net_device *dev;
1140 struct rtable *rt; 1148 struct rtable *rt;
@@ -1200,8 +1208,7 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1200 1208
1201 dst_release(skb->dst); 1209 dst_release(skb->dst);
1202 skb->dst = &rt->u.dst; 1210 skb->dst = &rt->u.dst;
1203 iph = skb->nh.iph; 1211 ip_decrease_ttl(ip_hdr(skb));
1204 ip_decrease_ttl(iph);
1205 1212
1206 /* FIXME: forward and output firewalls used to be called here. 1213 /* FIXME: forward and output firewalls used to be called here.
1207 * What do we do with netfilter? -- RR */ 1214 * What do we do with netfilter? -- RR */
@@ -1301,7 +1308,7 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local
1301 * Forward the frame 1308 * Forward the frame
1302 */ 1309 */
1303 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) { 1310 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1304 if (skb->nh.iph->ttl > cache->mfc_un.res.ttls[ct]) { 1311 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1305 if (psend != -1) { 1312 if (psend != -1) {
1306 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 1313 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1307 if (skb2) 1314 if (skb2)
@@ -1347,7 +1354,7 @@ int ip_mr_input(struct sk_buff *skb)
1347 if (IPCB(skb)->opt.router_alert) { 1354 if (IPCB(skb)->opt.router_alert) {
1348 if (ip_call_ra_chain(skb)) 1355 if (ip_call_ra_chain(skb))
1349 return 0; 1356 return 0;
1350 } else if (skb->nh.iph->protocol == IPPROTO_IGMP){ 1357 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1351 /* IGMPv1 (and broken IGMPv2 implementations sort of 1358 /* IGMPv1 (and broken IGMPv2 implementations sort of
1352 Cisco IOS <= 11.2(8)) do not put router alert 1359 Cisco IOS <= 11.2(8)) do not put router alert
1353 option to IGMP packets destined to routable 1360 option to IGMP packets destined to routable
@@ -1366,7 +1373,7 @@ int ip_mr_input(struct sk_buff *skb)
1366 } 1373 }
1367 1374
1368 read_lock(&mrt_lock); 1375 read_lock(&mrt_lock);
1369 cache = ipmr_cache_find(skb->nh.iph->saddr, skb->nh.iph->daddr); 1376 cache = ipmr_cache_find(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1370 1377
1371 /* 1378 /*
1372 * No usable cache entry 1379 * No usable cache entry
@@ -1426,14 +1433,15 @@ int pim_rcv_v1(struct sk_buff * skb)
1426 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) 1433 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
1427 goto drop; 1434 goto drop;
1428 1435
1429 pim = (struct igmphdr*)skb->h.raw; 1436 pim = igmp_hdr(skb);
1430 1437
1431 if (!mroute_do_pim || 1438 if (!mroute_do_pim ||
1432 skb->len < sizeof(*pim) + sizeof(*encap) || 1439 skb->len < sizeof(*pim) + sizeof(*encap) ||
1433 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) 1440 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1434 goto drop; 1441 goto drop;
1435 1442
1436 encap = (struct iphdr*)(skb->h.raw + sizeof(struct igmphdr)); 1443 encap = (struct iphdr *)(skb_transport_header(skb) +
1444 sizeof(struct igmphdr));
1437 /* 1445 /*
1438 Check that: 1446 Check that:
1439 a. packet is really destinted to a multicast group 1447 a. packet is really destinted to a multicast group
@@ -1455,9 +1463,9 @@ int pim_rcv_v1(struct sk_buff * skb)
1455 if (reg_dev == NULL) 1463 if (reg_dev == NULL)
1456 goto drop; 1464 goto drop;
1457 1465
1458 skb->mac.raw = skb->nh.raw; 1466 skb->mac_header = skb->network_header;
1459 skb_pull(skb, (u8*)encap - skb->data); 1467 skb_pull(skb, (u8*)encap - skb->data);
1460 skb->nh.iph = (struct iphdr *)skb->data; 1468 skb_reset_network_header(skb);
1461 skb->dev = reg_dev; 1469 skb->dev = reg_dev;
1462 skb->protocol = htons(ETH_P_IP); 1470 skb->protocol = htons(ETH_P_IP);
1463 skb->ip_summed = 0; 1471 skb->ip_summed = 0;
@@ -1486,7 +1494,7 @@ static int pim_rcv(struct sk_buff * skb)
1486 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) 1494 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
1487 goto drop; 1495 goto drop;
1488 1496
1489 pim = (struct pimreghdr*)skb->h.raw; 1497 pim = (struct pimreghdr *)skb_transport_header(skb);
1490 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) || 1498 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1491 (pim->flags&PIM_NULL_REGISTER) || 1499 (pim->flags&PIM_NULL_REGISTER) ||
1492 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 && 1500 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
@@ -1494,7 +1502,8 @@ static int pim_rcv(struct sk_buff * skb)
1494 goto drop; 1502 goto drop;
1495 1503
1496 /* check if the inner packet is destined to mcast group */ 1504 /* check if the inner packet is destined to mcast group */
1497 encap = (struct iphdr*)(skb->h.raw + sizeof(struct pimreghdr)); 1505 encap = (struct iphdr *)(skb_transport_header(skb) +
1506 sizeof(struct pimreghdr));
1498 if (!MULTICAST(encap->daddr) || 1507 if (!MULTICAST(encap->daddr) ||
1499 encap->tot_len == 0 || 1508 encap->tot_len == 0 ||
1500 ntohs(encap->tot_len) + sizeof(*pim) > skb->len) 1509 ntohs(encap->tot_len) + sizeof(*pim) > skb->len)
@@ -1510,9 +1519,9 @@ static int pim_rcv(struct sk_buff * skb)
1510 if (reg_dev == NULL) 1519 if (reg_dev == NULL)
1511 goto drop; 1520 goto drop;
1512 1521
1513 skb->mac.raw = skb->nh.raw; 1522 skb->mac_header = skb->network_header;
1514 skb_pull(skb, (u8*)encap - skb->data); 1523 skb_pull(skb, (u8*)encap - skb->data);
1515 skb->nh.iph = (struct iphdr *)skb->data; 1524 skb_reset_network_header(skb);
1516 skb->dev = reg_dev; 1525 skb->dev = reg_dev;
1517 skb->protocol = htons(ETH_P_IP); 1526 skb->protocol = htons(ETH_P_IP);
1518 skb->ip_summed = 0; 1527 skb->ip_summed = 0;
@@ -1537,7 +1546,7 @@ ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1537 int ct; 1546 int ct;
1538 struct rtnexthop *nhp; 1547 struct rtnexthop *nhp;
1539 struct net_device *dev = vif_table[c->mfc_parent].dev; 1548 struct net_device *dev = vif_table[c->mfc_parent].dev;
1540 u8 *b = skb->tail; 1549 u8 *b = skb_tail_pointer(skb);
1541 struct rtattr *mp_head; 1550 struct rtattr *mp_head;
1542 1551
1543 if (dev) 1552 if (dev)
@@ -1557,12 +1566,12 @@ ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1557 } 1566 }
1558 } 1567 }
1559 mp_head->rta_type = RTA_MULTIPATH; 1568 mp_head->rta_type = RTA_MULTIPATH;
1560 mp_head->rta_len = skb->tail - (u8*)mp_head; 1569 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1561 rtm->rtm_type = RTN_MULTICAST; 1570 rtm->rtm_type = RTN_MULTICAST;
1562 return 1; 1571 return 1;
1563 1572
1564rtattr_failure: 1573rtattr_failure:
1565 skb_trim(skb, b - skb->data); 1574 nlmsg_trim(skb, b);
1566 return -EMSGSIZE; 1575 return -EMSGSIZE;
1567} 1576}
1568 1577
@@ -1577,6 +1586,7 @@ int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1577 1586
1578 if (cache==NULL) { 1587 if (cache==NULL) {
1579 struct sk_buff *skb2; 1588 struct sk_buff *skb2;
1589 struct iphdr *iph;
1580 struct net_device *dev; 1590 struct net_device *dev;
1581 int vif; 1591 int vif;
1582 1592
@@ -1596,11 +1606,13 @@ int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1596 return -ENOMEM; 1606 return -ENOMEM;
1597 } 1607 }
1598 1608
1599 skb2->nh.raw = skb_push(skb2, sizeof(struct iphdr)); 1609 skb_push(skb2, sizeof(struct iphdr));
1600 skb2->nh.iph->ihl = sizeof(struct iphdr)>>2; 1610 skb_reset_network_header(skb2);
1601 skb2->nh.iph->saddr = rt->rt_src; 1611 iph = ip_hdr(skb2);
1602 skb2->nh.iph->daddr = rt->rt_dst; 1612 iph->ihl = sizeof(struct iphdr) >> 2;
1603 skb2->nh.iph->version = 0; 1613 iph->saddr = rt->rt_src;
1614 iph->daddr = rt->rt_dst;
1615 iph->version = 0;
1604 err = ipmr_cache_unresolved(vif, skb2); 1616 err = ipmr_cache_unresolved(vif, skb2);
1605 read_unlock(&mrt_lock); 1617 read_unlock(&mrt_lock);
1606 return err; 1618 return err;
@@ -1625,7 +1637,7 @@ static struct vif_device *ipmr_vif_seq_idx(struct ipmr_vif_iter *iter,
1625 loff_t pos) 1637 loff_t pos)
1626{ 1638{
1627 for (iter->ct = 0; iter->ct < maxvif; ++iter->ct) { 1639 for (iter->ct = 0; iter->ct < maxvif; ++iter->ct) {
1628 if(!VIF_EXISTS(iter->ct)) 1640 if (!VIF_EXISTS(iter->ct))
1629 continue; 1641 continue;
1630 if (pos-- == 0) 1642 if (pos-- == 0)
1631 return &vif_table[iter->ct]; 1643 return &vif_table[iter->ct];
@@ -1649,7 +1661,7 @@ static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1649 return ipmr_vif_seq_idx(iter, 0); 1661 return ipmr_vif_seq_idx(iter, 0);
1650 1662
1651 while (++iter->ct < maxvif) { 1663 while (++iter->ct < maxvif) {
1652 if(!VIF_EXISTS(iter->ct)) 1664 if (!VIF_EXISTS(iter->ct))
1653 continue; 1665 continue;
1654 return &vif_table[iter->ct]; 1666 return &vif_table[iter->ct];
1655 } 1667 }
@@ -1680,7 +1692,7 @@ static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1680 return 0; 1692 return 0;
1681} 1693}
1682 1694
1683static struct seq_operations ipmr_vif_seq_ops = { 1695static const struct seq_operations ipmr_vif_seq_ops = {
1684 .start = ipmr_vif_seq_start, 1696 .start = ipmr_vif_seq_start,
1685 .next = ipmr_vif_seq_next, 1697 .next = ipmr_vif_seq_next,
1686 .stop = ipmr_vif_seq_stop, 1698 .stop = ipmr_vif_seq_stop,
@@ -1732,14 +1744,14 @@ static struct mfc_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos)
1732 it->cache = mfc_cache_array; 1744 it->cache = mfc_cache_array;
1733 read_lock(&mrt_lock); 1745 read_lock(&mrt_lock);
1734 for (it->ct = 0; it->ct < MFC_LINES; it->ct++) 1746 for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
1735 for(mfc = mfc_cache_array[it->ct]; mfc; mfc = mfc->next) 1747 for (mfc = mfc_cache_array[it->ct]; mfc; mfc = mfc->next)
1736 if (pos-- == 0) 1748 if (pos-- == 0)
1737 return mfc; 1749 return mfc;
1738 read_unlock(&mrt_lock); 1750 read_unlock(&mrt_lock);
1739 1751
1740 it->cache = &mfc_unres_queue; 1752 it->cache = &mfc_unres_queue;
1741 spin_lock_bh(&mfc_unres_lock); 1753 spin_lock_bh(&mfc_unres_lock);
1742 for(mfc = mfc_unres_queue; mfc; mfc = mfc->next) 1754 for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
1743 if (pos-- == 0) 1755 if (pos-- == 0)
1744 return mfc; 1756 return mfc;
1745 spin_unlock_bh(&mfc_unres_lock); 1757 spin_unlock_bh(&mfc_unres_lock);
@@ -1829,9 +1841,9 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1829 mfc->mfc_un.res.wrong_if); 1841 mfc->mfc_un.res.wrong_if);
1830 1842
1831 if (it->cache != &mfc_unres_queue) { 1843 if (it->cache != &mfc_unres_queue) {
1832 for(n = mfc->mfc_un.res.minvif; 1844 for (n = mfc->mfc_un.res.minvif;
1833 n < mfc->mfc_un.res.maxvif; n++ ) { 1845 n < mfc->mfc_un.res.maxvif; n++ ) {
1834 if(VIF_EXISTS(n) 1846 if (VIF_EXISTS(n)
1835 && mfc->mfc_un.res.ttls[n] < 255) 1847 && mfc->mfc_un.res.ttls[n] < 255)
1836 seq_printf(seq, 1848 seq_printf(seq,
1837 " %2d:%-3d", 1849 " %2d:%-3d",
@@ -1843,7 +1855,7 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1843 return 0; 1855 return 0;
1844} 1856}
1845 1857
1846static struct seq_operations ipmr_mfc_seq_ops = { 1858static const struct seq_operations ipmr_mfc_seq_ops = {
1847 .start = ipmr_mfc_seq_start, 1859 .start = ipmr_mfc_seq_start,
1848 .next = ipmr_mfc_seq_next, 1860 .next = ipmr_mfc_seq_next,
1849 .stop = ipmr_mfc_seq_stop, 1861 .stop = ipmr_mfc_seq_stop,
diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c
index 22e104c6a493..15ad5dd2d984 100644
--- a/net/ipv4/ipvs/ip_vs_app.c
+++ b/net/ipv4/ipvs/ip_vs_app.c
@@ -331,14 +331,14 @@ static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb,
331 struct ip_vs_app *app) 331 struct ip_vs_app *app)
332{ 332{
333 int diff; 333 int diff;
334 unsigned int tcp_offset = (*pskb)->nh.iph->ihl*4; 334 const unsigned int tcp_offset = ip_hdrlen(*pskb);
335 struct tcphdr *th; 335 struct tcphdr *th;
336 __u32 seq; 336 __u32 seq;
337 337
338 if (!ip_vs_make_skb_writable(pskb, tcp_offset + sizeof(*th))) 338 if (!ip_vs_make_skb_writable(pskb, tcp_offset + sizeof(*th)))
339 return 0; 339 return 0;
340 340
341 th = (struct tcphdr *)((*pskb)->nh.raw + tcp_offset); 341 th = (struct tcphdr *)(skb_network_header(*pskb) + tcp_offset);
342 342
343 /* 343 /*
344 * Remember seq number in case this pkt gets resized 344 * Remember seq number in case this pkt gets resized
@@ -406,14 +406,14 @@ static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff **pskb,
406 struct ip_vs_app *app) 406 struct ip_vs_app *app)
407{ 407{
408 int diff; 408 int diff;
409 unsigned int tcp_offset = (*pskb)->nh.iph->ihl*4; 409 const unsigned int tcp_offset = ip_hdrlen(*pskb);
410 struct tcphdr *th; 410 struct tcphdr *th;
411 __u32 seq; 411 __u32 seq;
412 412
413 if (!ip_vs_make_skb_writable(pskb, tcp_offset + sizeof(*th))) 413 if (!ip_vs_make_skb_writable(pskb, tcp_offset + sizeof(*th)))
414 return 0; 414 return 0;
415 415
416 th = (struct tcphdr *)((*pskb)->nh.raw + tcp_offset); 416 th = (struct tcphdr *)(skb_network_header(*pskb) + tcp_offset);
417 417
418 /* 418 /*
419 * Remember seq number in case this pkt gets resized 419 * Remember seq number in case this pkt gets resized
@@ -577,7 +577,6 @@ static const struct file_operations ip_vs_app_fops = {
577int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri, 577int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri,
578 char *o_buf, int o_len, char *n_buf, int n_len) 578 char *o_buf, int o_len, char *n_buf, int n_len)
579{ 579{
580 struct iphdr *iph;
581 int diff; 580 int diff;
582 int o_offset; 581 int o_offset;
583 int o_left; 582 int o_left;
@@ -603,12 +602,11 @@ int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri,
603 skb_put(skb, diff); 602 skb_put(skb, diff);
604 memmove(skb->data + o_offset + n_len, 603 memmove(skb->data + o_offset + n_len,
605 skb->data + o_offset + o_len, o_left); 604 skb->data + o_offset + o_len, o_left);
606 memcpy(skb->data + o_offset, n_buf, n_len); 605 skb_copy_to_linear_data_offset(skb, o_offset, n_buf, n_len);
607 } 606 }
608 607
609 /* must update the iph total length here */ 608 /* must update the iph total length here */
610 iph = skb->nh.iph; 609 ip_hdr(skb)->tot_len = htons(skb->len);
611 iph->tot_len = htons(skb->len);
612 610
613 LeaveFunction(9); 611 LeaveFunction(9);
614 return 0; 612 return 0;
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index 24d7b66eb6d2..f005a2f929f4 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -212,7 +212,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
212 __be16 ports[2]) 212 __be16 ports[2])
213{ 213{
214 struct ip_vs_conn *cp = NULL; 214 struct ip_vs_conn *cp = NULL;
215 struct iphdr *iph = skb->nh.iph; 215 struct iphdr *iph = ip_hdr(skb);
216 struct ip_vs_dest *dest; 216 struct ip_vs_dest *dest;
217 struct ip_vs_conn *ct; 217 struct ip_vs_conn *ct;
218 __be16 dport; /* destination port to forward */ 218 __be16 dport; /* destination port to forward */
@@ -381,7 +381,7 @@ struct ip_vs_conn *
381ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) 381ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
382{ 382{
383 struct ip_vs_conn *cp = NULL; 383 struct ip_vs_conn *cp = NULL;
384 struct iphdr *iph = skb->nh.iph; 384 struct iphdr *iph = ip_hdr(skb);
385 struct ip_vs_dest *dest; 385 struct ip_vs_dest *dest;
386 __be16 _ports[2], *pptr; 386 __be16 _ports[2], *pptr;
387 387
@@ -447,7 +447,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
447 struct ip_vs_protocol *pp) 447 struct ip_vs_protocol *pp)
448{ 448{
449 __be16 _ports[2], *pptr; 449 __be16 _ports[2], *pptr;
450 struct iphdr *iph = skb->nh.iph; 450 struct iphdr *iph = ip_hdr(skb);
451 451
452 pptr = skb_header_pointer(skb, iph->ihl*4, 452 pptr = skb_header_pointer(skb, iph->ihl*4,
453 sizeof(_ports), _ports); 453 sizeof(_ports), _ports);
@@ -546,7 +546,7 @@ ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
546{ 546{
547 skb = ip_defrag(skb, user); 547 skb = ip_defrag(skb, user);
548 if (skb) 548 if (skb)
549 ip_send_check(skb->nh.iph); 549 ip_send_check(ip_hdr(skb));
550 return skb; 550 return skb;
551} 551}
552 552
@@ -557,9 +557,10 @@ ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
557void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp, 557void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
558 struct ip_vs_conn *cp, int inout) 558 struct ip_vs_conn *cp, int inout)
559{ 559{
560 struct iphdr *iph = skb->nh.iph; 560 struct iphdr *iph = ip_hdr(skb);
561 unsigned int icmp_offset = iph->ihl*4; 561 unsigned int icmp_offset = iph->ihl*4;
562 struct icmphdr *icmph = (struct icmphdr *)(skb->nh.raw + icmp_offset); 562 struct icmphdr *icmph = (struct icmphdr *)(skb_network_header(skb) +
563 icmp_offset);
563 struct iphdr *ciph = (struct iphdr *)(icmph + 1); 564 struct iphdr *ciph = (struct iphdr *)(icmph + 1);
564 565
565 if (inout) { 566 if (inout) {
@@ -617,14 +618,14 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related)
617 *related = 1; 618 *related = 1;
618 619
619 /* reassemble IP fragments */ 620 /* reassemble IP fragments */
620 if (skb->nh.iph->frag_off & __constant_htons(IP_MF|IP_OFFSET)) { 621 if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
621 skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT); 622 skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT);
622 if (!skb) 623 if (!skb)
623 return NF_STOLEN; 624 return NF_STOLEN;
624 *pskb = skb; 625 *pskb = skb;
625 } 626 }
626 627
627 iph = skb->nh.iph; 628 iph = ip_hdr(skb);
628 offset = ihl = iph->ihl * 4; 629 offset = ihl = iph->ihl * 4;
629 ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph); 630 ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
630 if (ic == NULL) 631 if (ic == NULL)
@@ -659,7 +660,7 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related)
659 return NF_ACCEPT; 660 return NF_ACCEPT;
660 661
661 /* Is the embedded protocol header present? */ 662 /* Is the embedded protocol header present? */
662 if (unlikely(cih->frag_off & __constant_htons(IP_OFFSET) && 663 if (unlikely(cih->frag_off & htons(IP_OFFSET) &&
663 pp->dont_defrag)) 664 pp->dont_defrag))
664 return NF_ACCEPT; 665 return NF_ACCEPT;
665 666
@@ -680,8 +681,7 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related)
680 } 681 }
681 682
682 /* Ensure the checksum is correct */ 683 /* Ensure the checksum is correct */
683 if (skb->ip_summed != CHECKSUM_UNNECESSARY && 684 if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) {
684 ip_vs_checksum_complete(skb, ihl)) {
685 /* Failed checksum! */ 685 /* Failed checksum! */
686 IP_VS_DBG(1, "Forward ICMP: failed checksum from %d.%d.%d.%d!\n", 686 IP_VS_DBG(1, "Forward ICMP: failed checksum from %d.%d.%d.%d!\n",
687 NIPQUAD(iph->saddr)); 687 NIPQUAD(iph->saddr));
@@ -712,8 +712,7 @@ static inline int is_tcp_reset(const struct sk_buff *skb)
712{ 712{
713 struct tcphdr _tcph, *th; 713 struct tcphdr _tcph, *th;
714 714
715 th = skb_header_pointer(skb, skb->nh.iph->ihl * 4, 715 th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
716 sizeof(_tcph), &_tcph);
717 if (th == NULL) 716 if (th == NULL)
718 return 0; 717 return 0;
719 return th->rst; 718 return th->rst;
@@ -740,14 +739,14 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
740 if (skb->ipvs_property) 739 if (skb->ipvs_property)
741 return NF_ACCEPT; 740 return NF_ACCEPT;
742 741
743 iph = skb->nh.iph; 742 iph = ip_hdr(skb);
744 if (unlikely(iph->protocol == IPPROTO_ICMP)) { 743 if (unlikely(iph->protocol == IPPROTO_ICMP)) {
745 int related, verdict = ip_vs_out_icmp(pskb, &related); 744 int related, verdict = ip_vs_out_icmp(pskb, &related);
746 745
747 if (related) 746 if (related)
748 return verdict; 747 return verdict;
749 skb = *pskb; 748 skb = *pskb;
750 iph = skb->nh.iph; 749 iph = ip_hdr(skb);
751 } 750 }
752 751
753 pp = ip_vs_proto_get(iph->protocol); 752 pp = ip_vs_proto_get(iph->protocol);
@@ -755,12 +754,12 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
755 return NF_ACCEPT; 754 return NF_ACCEPT;
756 755
757 /* reassemble IP fragments */ 756 /* reassemble IP fragments */
758 if (unlikely(iph->frag_off & __constant_htons(IP_MF|IP_OFFSET) && 757 if (unlikely(iph->frag_off & htons(IP_MF|IP_OFFSET) &&
759 !pp->dont_defrag)) { 758 !pp->dont_defrag)) {
760 skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT); 759 skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT);
761 if (!skb) 760 if (!skb)
762 return NF_STOLEN; 761 return NF_STOLEN;
763 iph = skb->nh.iph; 762 iph = ip_hdr(skb);
764 *pskb = skb; 763 *pskb = skb;
765 } 764 }
766 765
@@ -810,8 +809,8 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
810 if (pp->snat_handler && !pp->snat_handler(pskb, pp, cp)) 809 if (pp->snat_handler && !pp->snat_handler(pskb, pp, cp))
811 goto drop; 810 goto drop;
812 skb = *pskb; 811 skb = *pskb;
813 skb->nh.iph->saddr = cp->vaddr; 812 ip_hdr(skb)->saddr = cp->vaddr;
814 ip_send_check(skb->nh.iph); 813 ip_send_check(ip_hdr(skb));
815 814
816 /* For policy routing, packets originating from this 815 /* For policy routing, packets originating from this
817 * machine itself may be routed differently to packets 816 * machine itself may be routed differently to packets
@@ -861,7 +860,7 @@ ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum)
861 *related = 1; 860 *related = 1;
862 861
863 /* reassemble IP fragments */ 862 /* reassemble IP fragments */
864 if (skb->nh.iph->frag_off & __constant_htons(IP_MF|IP_OFFSET)) { 863 if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
865 skb = ip_vs_gather_frags(skb, 864 skb = ip_vs_gather_frags(skb,
866 hooknum == NF_IP_LOCAL_IN ? 865 hooknum == NF_IP_LOCAL_IN ?
867 IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD); 866 IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD);
@@ -870,7 +869,7 @@ ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum)
870 *pskb = skb; 869 *pskb = skb;
871 } 870 }
872 871
873 iph = skb->nh.iph; 872 iph = ip_hdr(skb);
874 offset = ihl = iph->ihl * 4; 873 offset = ihl = iph->ihl * 4;
875 ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph); 874 ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
876 if (ic == NULL) 875 if (ic == NULL)
@@ -905,7 +904,7 @@ ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum)
905 return NF_ACCEPT; 904 return NF_ACCEPT;
906 905
907 /* Is the embedded protocol header present? */ 906 /* Is the embedded protocol header present? */
908 if (unlikely(cih->frag_off & __constant_htons(IP_OFFSET) && 907 if (unlikely(cih->frag_off & htons(IP_OFFSET) &&
909 pp->dont_defrag)) 908 pp->dont_defrag))
910 return NF_ACCEPT; 909 return NF_ACCEPT;
911 910
@@ -921,8 +920,7 @@ ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum)
921 verdict = NF_DROP; 920 verdict = NF_DROP;
922 921
923 /* Ensure the checksum is correct */ 922 /* Ensure the checksum is correct */
924 if (skb->ip_summed != CHECKSUM_UNNECESSARY && 923 if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) {
925 ip_vs_checksum_complete(skb, ihl)) {
926 /* Failed checksum! */ 924 /* Failed checksum! */
927 IP_VS_DBG(1, "Incoming ICMP: failed checksum from %d.%d.%d.%d!\n", 925 IP_VS_DBG(1, "Incoming ICMP: failed checksum from %d.%d.%d.%d!\n",
928 NIPQUAD(iph->saddr)); 926 NIPQUAD(iph->saddr));
@@ -966,19 +964,19 @@ ip_vs_in(unsigned int hooknum, struct sk_buff **pskb,
966 || skb->dev == &loopback_dev || skb->sk)) { 964 || skb->dev == &loopback_dev || skb->sk)) {
967 IP_VS_DBG(12, "packet type=%d proto=%d daddr=%d.%d.%d.%d ignored\n", 965 IP_VS_DBG(12, "packet type=%d proto=%d daddr=%d.%d.%d.%d ignored\n",
968 skb->pkt_type, 966 skb->pkt_type,
969 skb->nh.iph->protocol, 967 ip_hdr(skb)->protocol,
970 NIPQUAD(skb->nh.iph->daddr)); 968 NIPQUAD(ip_hdr(skb)->daddr));
971 return NF_ACCEPT; 969 return NF_ACCEPT;
972 } 970 }
973 971
974 iph = skb->nh.iph; 972 iph = ip_hdr(skb);
975 if (unlikely(iph->protocol == IPPROTO_ICMP)) { 973 if (unlikely(iph->protocol == IPPROTO_ICMP)) {
976 int related, verdict = ip_vs_in_icmp(pskb, &related, hooknum); 974 int related, verdict = ip_vs_in_icmp(pskb, &related, hooknum);
977 975
978 if (related) 976 if (related)
979 return verdict; 977 return verdict;
980 skb = *pskb; 978 skb = *pskb;
981 iph = skb->nh.iph; 979 iph = ip_hdr(skb);
982 } 980 }
983 981
984 /* Protocol supported? */ 982 /* Protocol supported? */
@@ -1064,7 +1062,7 @@ ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff **pskb,
1064{ 1062{
1065 int r; 1063 int r;
1066 1064
1067 if ((*pskb)->nh.iph->protocol != IPPROTO_ICMP) 1065 if (ip_hdr(*pskb)->protocol != IPPROTO_ICMP)
1068 return NF_ACCEPT; 1066 return NF_ACCEPT;
1069 1067
1070 return ip_vs_in_icmp(pskb, &r, hooknum); 1068 return ip_vs_in_icmp(pskb, &r, hooknum);
diff --git a/net/ipv4/ipvs/ip_vs_dh.c b/net/ipv4/ipvs/ip_vs_dh.c
index 502111fba872..dcf5d46aaa5e 100644
--- a/net/ipv4/ipvs/ip_vs_dh.c
+++ b/net/ipv4/ipvs/ip_vs_dh.c
@@ -204,7 +204,7 @@ ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
204{ 204{
205 struct ip_vs_dest *dest; 205 struct ip_vs_dest *dest;
206 struct ip_vs_dh_bucket *tbl; 206 struct ip_vs_dh_bucket *tbl;
207 struct iphdr *iph = skb->nh.iph; 207 struct iphdr *iph = ip_hdr(skb);
208 208
209 IP_VS_DBG(6, "ip_vs_dh_schedule(): Scheduling...\n"); 209 IP_VS_DBG(6, "ip_vs_dh_schedule(): Scheduling...\n");
210 210
diff --git a/net/ipv4/ipvs/ip_vs_ftp.c b/net/ipv4/ipvs/ip_vs_ftp.c
index 847c47af040c..344ddbbdc756 100644
--- a/net/ipv4/ipvs/ip_vs_ftp.c
+++ b/net/ipv4/ipvs/ip_vs_ftp.c
@@ -159,10 +159,10 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
159 return 0; 159 return 0;
160 160
161 if (cp->app_data == &ip_vs_ftp_pasv) { 161 if (cp->app_data == &ip_vs_ftp_pasv) {
162 iph = (*pskb)->nh.iph; 162 iph = ip_hdr(*pskb);
163 th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]); 163 th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
164 data = (char *)th + (th->doff << 2); 164 data = (char *)th + (th->doff << 2);
165 data_limit = (*pskb)->tail; 165 data_limit = skb_tail_pointer(*pskb);
166 166
167 if (ip_vs_ftp_get_addrport(data, data_limit, 167 if (ip_vs_ftp_get_addrport(data, data_limit,
168 SERVER_STRING, 168 SERVER_STRING,
@@ -262,14 +262,14 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
262 /* 262 /*
263 * Detecting whether it is passive 263 * Detecting whether it is passive
264 */ 264 */
265 iph = (*pskb)->nh.iph; 265 iph = ip_hdr(*pskb);
266 th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]); 266 th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
267 267
268 /* Since there may be OPTIONS in the TCP packet and the HLEN is 268 /* Since there may be OPTIONS in the TCP packet and the HLEN is
269 the length of the header in 32-bit multiples, it is accurate 269 the length of the header in 32-bit multiples, it is accurate
270 to calculate data address by th+HLEN*4 */ 270 to calculate data address by th+HLEN*4 */
271 data = data_start = (char *)th + (th->doff << 2); 271 data = data_start = (char *)th + (th->doff << 2);
272 data_limit = (*pskb)->tail; 272 data_limit = skb_tail_pointer(*pskb);
273 273
274 while (data <= data_limit - 6) { 274 while (data <= data_limit - 6) {
275 if (strnicmp(data, "PASV\r\n", 6) == 0) { 275 if (strnicmp(data, "PASV\r\n", 6) == 0) {
diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/ipv4/ipvs/ip_vs_lblc.c
index c801273cb881..052f4ed59174 100644
--- a/net/ipv4/ipvs/ip_vs_lblc.c
+++ b/net/ipv4/ipvs/ip_vs_lblc.c
@@ -521,7 +521,7 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
521 struct ip_vs_dest *dest; 521 struct ip_vs_dest *dest;
522 struct ip_vs_lblc_table *tbl; 522 struct ip_vs_lblc_table *tbl;
523 struct ip_vs_lblc_entry *en; 523 struct ip_vs_lblc_entry *en;
524 struct iphdr *iph = skb->nh.iph; 524 struct iphdr *iph = ip_hdr(skb);
525 525
526 IP_VS_DBG(6, "ip_vs_lblc_schedule(): Scheduling...\n"); 526 IP_VS_DBG(6, "ip_vs_lblc_schedule(): Scheduling...\n");
527 527
diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c
index 23f9b9e73c85..6225acac7a3b 100644
--- a/net/ipv4/ipvs/ip_vs_lblcr.c
+++ b/net/ipv4/ipvs/ip_vs_lblcr.c
@@ -775,7 +775,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
775 struct ip_vs_dest *dest; 775 struct ip_vs_dest *dest;
776 struct ip_vs_lblcr_table *tbl; 776 struct ip_vs_lblcr_table *tbl;
777 struct ip_vs_lblcr_entry *en; 777 struct ip_vs_lblcr_entry *en;
778 struct iphdr *iph = skb->nh.iph; 778 struct iphdr *iph = ip_hdr(skb);
779 779
780 IP_VS_DBG(6, "ip_vs_lblcr_schedule(): Scheduling...\n"); 780 IP_VS_DBG(6, "ip_vs_lblcr_schedule(): Scheduling...\n");
781 781
diff --git a/net/ipv4/ipvs/ip_vs_proto_ah.c b/net/ipv4/ipvs/ip_vs_proto_ah.c
index 8b0505b09317..a842676e1c69 100644
--- a/net/ipv4/ipvs/ip_vs_proto_ah.c
+++ b/net/ipv4/ipvs/ip_vs_proto_ah.c
@@ -52,15 +52,15 @@ ah_conn_in_get(const struct sk_buff *skb,
52 if (likely(!inverse)) { 52 if (likely(!inverse)) {
53 cp = ip_vs_conn_in_get(IPPROTO_UDP, 53 cp = ip_vs_conn_in_get(IPPROTO_UDP,
54 iph->saddr, 54 iph->saddr,
55 __constant_htons(PORT_ISAKMP), 55 htons(PORT_ISAKMP),
56 iph->daddr, 56 iph->daddr,
57 __constant_htons(PORT_ISAKMP)); 57 htons(PORT_ISAKMP));
58 } else { 58 } else {
59 cp = ip_vs_conn_in_get(IPPROTO_UDP, 59 cp = ip_vs_conn_in_get(IPPROTO_UDP,
60 iph->daddr, 60 iph->daddr,
61 __constant_htons(PORT_ISAKMP), 61 htons(PORT_ISAKMP),
62 iph->saddr, 62 iph->saddr,
63 __constant_htons(PORT_ISAKMP)); 63 htons(PORT_ISAKMP));
64 } 64 }
65 65
66 if (!cp) { 66 if (!cp) {
@@ -89,15 +89,15 @@ ah_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
89 if (likely(!inverse)) { 89 if (likely(!inverse)) {
90 cp = ip_vs_conn_out_get(IPPROTO_UDP, 90 cp = ip_vs_conn_out_get(IPPROTO_UDP,
91 iph->saddr, 91 iph->saddr,
92 __constant_htons(PORT_ISAKMP), 92 htons(PORT_ISAKMP),
93 iph->daddr, 93 iph->daddr,
94 __constant_htons(PORT_ISAKMP)); 94 htons(PORT_ISAKMP));
95 } else { 95 } else {
96 cp = ip_vs_conn_out_get(IPPROTO_UDP, 96 cp = ip_vs_conn_out_get(IPPROTO_UDP,
97 iph->daddr, 97 iph->daddr,
98 __constant_htons(PORT_ISAKMP), 98 htons(PORT_ISAKMP),
99 iph->saddr, 99 iph->saddr,
100 __constant_htons(PORT_ISAKMP)); 100 htons(PORT_ISAKMP));
101 } 101 }
102 102
103 if (!cp) { 103 if (!cp) {
diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c
index 16a9ebee2fe6..e65577a77006 100644
--- a/net/ipv4/ipvs/ip_vs_proto_tcp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c
@@ -76,16 +76,15 @@ tcp_conn_schedule(struct sk_buff *skb,
76 struct ip_vs_service *svc; 76 struct ip_vs_service *svc;
77 struct tcphdr _tcph, *th; 77 struct tcphdr _tcph, *th;
78 78
79 th = skb_header_pointer(skb, skb->nh.iph->ihl*4, 79 th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
80 sizeof(_tcph), &_tcph);
81 if (th == NULL) { 80 if (th == NULL) {
82 *verdict = NF_DROP; 81 *verdict = NF_DROP;
83 return 0; 82 return 0;
84 } 83 }
85 84
86 if (th->syn && 85 if (th->syn &&
87 (svc = ip_vs_service_get(skb->mark, skb->nh.iph->protocol, 86 (svc = ip_vs_service_get(skb->mark, ip_hdr(skb)->protocol,
88 skb->nh.iph->daddr, th->dest))) { 87 ip_hdr(skb)->daddr, th->dest))) {
89 if (ip_vs_todrop()) { 88 if (ip_vs_todrop()) {
90 /* 89 /*
91 * It seems that we are very loaded. 90 * It seems that we are very loaded.
@@ -127,7 +126,7 @@ tcp_snat_handler(struct sk_buff **pskb,
127 struct ip_vs_protocol *pp, struct ip_vs_conn *cp) 126 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
128{ 127{
129 struct tcphdr *tcph; 128 struct tcphdr *tcph;
130 unsigned int tcphoff = (*pskb)->nh.iph->ihl * 4; 129 const unsigned int tcphoff = ip_hdrlen(*pskb);
131 130
132 /* csum_check requires unshared skb */ 131 /* csum_check requires unshared skb */
133 if (!ip_vs_make_skb_writable(pskb, tcphoff+sizeof(*tcph))) 132 if (!ip_vs_make_skb_writable(pskb, tcphoff+sizeof(*tcph)))
@@ -143,7 +142,7 @@ tcp_snat_handler(struct sk_buff **pskb,
143 return 0; 142 return 0;
144 } 143 }
145 144
146 tcph = (void *)(*pskb)->nh.iph + tcphoff; 145 tcph = (void *)ip_hdr(*pskb) + tcphoff;
147 tcph->source = cp->vport; 146 tcph->source = cp->vport;
148 147
149 /* Adjust TCP checksums */ 148 /* Adjust TCP checksums */
@@ -175,7 +174,7 @@ tcp_dnat_handler(struct sk_buff **pskb,
175 struct ip_vs_protocol *pp, struct ip_vs_conn *cp) 174 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
176{ 175{
177 struct tcphdr *tcph; 176 struct tcphdr *tcph;
178 unsigned int tcphoff = (*pskb)->nh.iph->ihl * 4; 177 const unsigned int tcphoff = ip_hdrlen(*pskb);
179 178
180 /* csum_check requires unshared skb */ 179 /* csum_check requires unshared skb */
181 if (!ip_vs_make_skb_writable(pskb, tcphoff+sizeof(*tcph))) 180 if (!ip_vs_make_skb_writable(pskb, tcphoff+sizeof(*tcph)))
@@ -194,7 +193,7 @@ tcp_dnat_handler(struct sk_buff **pskb,
194 return 0; 193 return 0;
195 } 194 }
196 195
197 tcph = (void *)(*pskb)->nh.iph + tcphoff; 196 tcph = (void *)ip_hdr(*pskb) + tcphoff;
198 tcph->dest = cp->dport; 197 tcph->dest = cp->dport;
199 198
200 /* 199 /*
@@ -224,15 +223,15 @@ tcp_dnat_handler(struct sk_buff **pskb,
224static int 223static int
225tcp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp) 224tcp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
226{ 225{
227 unsigned int tcphoff = skb->nh.iph->ihl*4; 226 const unsigned int tcphoff = ip_hdrlen(skb);
228 227
229 switch (skb->ip_summed) { 228 switch (skb->ip_summed) {
230 case CHECKSUM_NONE: 229 case CHECKSUM_NONE:
231 skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0); 230 skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
232 case CHECKSUM_COMPLETE: 231 case CHECKSUM_COMPLETE:
233 if (csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr, 232 if (csum_tcpudp_magic(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
234 skb->len - tcphoff, 233 skb->len - tcphoff,
235 skb->nh.iph->protocol, skb->csum)) { 234 ip_hdr(skb)->protocol, skb->csum)) {
236 IP_VS_DBG_RL_PKT(0, pp, skb, 0, 235 IP_VS_DBG_RL_PKT(0, pp, skb, 0,
237 "Failed checksum for"); 236 "Failed checksum for");
238 return 0; 237 return 0;
@@ -467,8 +466,7 @@ tcp_state_transition(struct ip_vs_conn *cp, int direction,
467{ 466{
468 struct tcphdr _tcph, *th; 467 struct tcphdr _tcph, *th;
469 468
470 th = skb_header_pointer(skb, skb->nh.iph->ihl*4, 469 th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
471 sizeof(_tcph), &_tcph);
472 if (th == NULL) 470 if (th == NULL)
473 return 0; 471 return 0;
474 472
diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c
index 03f0a414cfa4..8ee5fe6a101d 100644
--- a/net/ipv4/ipvs/ip_vs_proto_udp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_udp.c
@@ -22,7 +22,7 @@
22#include <linux/udp.h> 22#include <linux/udp.h>
23 23
24#include <net/ip_vs.h> 24#include <net/ip_vs.h>
25 25#include <net/ip.h>
26 26
27static struct ip_vs_conn * 27static struct ip_vs_conn *
28udp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, 28udp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
@@ -56,7 +56,7 @@ udp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
56 struct ip_vs_conn *cp; 56 struct ip_vs_conn *cp;
57 __be16 _ports[2], *pptr; 57 __be16 _ports[2], *pptr;
58 58
59 pptr = skb_header_pointer(skb, skb->nh.iph->ihl*4, 59 pptr = skb_header_pointer(skb, ip_hdrlen(skb),
60 sizeof(_ports), _ports); 60 sizeof(_ports), _ports);
61 if (pptr == NULL) 61 if (pptr == NULL)
62 return NULL; 62 return NULL;
@@ -82,15 +82,15 @@ udp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp,
82 struct ip_vs_service *svc; 82 struct ip_vs_service *svc;
83 struct udphdr _udph, *uh; 83 struct udphdr _udph, *uh;
84 84
85 uh = skb_header_pointer(skb, skb->nh.iph->ihl*4, 85 uh = skb_header_pointer(skb, ip_hdrlen(skb),
86 sizeof(_udph), &_udph); 86 sizeof(_udph), &_udph);
87 if (uh == NULL) { 87 if (uh == NULL) {
88 *verdict = NF_DROP; 88 *verdict = NF_DROP;
89 return 0; 89 return 0;
90 } 90 }
91 91
92 if ((svc = ip_vs_service_get(skb->mark, skb->nh.iph->protocol, 92 if ((svc = ip_vs_service_get(skb->mark, ip_hdr(skb)->protocol,
93 skb->nh.iph->daddr, uh->dest))) { 93 ip_hdr(skb)->daddr, uh->dest))) {
94 if (ip_vs_todrop()) { 94 if (ip_vs_todrop()) {
95 /* 95 /*
96 * It seems that we are very loaded. 96 * It seems that we are very loaded.
@@ -133,7 +133,7 @@ udp_snat_handler(struct sk_buff **pskb,
133 struct ip_vs_protocol *pp, struct ip_vs_conn *cp) 133 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
134{ 134{
135 struct udphdr *udph; 135 struct udphdr *udph;
136 unsigned int udphoff = (*pskb)->nh.iph->ihl * 4; 136 const unsigned int udphoff = ip_hdrlen(*pskb);
137 137
138 /* csum_check requires unshared skb */ 138 /* csum_check requires unshared skb */
139 if (!ip_vs_make_skb_writable(pskb, udphoff+sizeof(*udph))) 139 if (!ip_vs_make_skb_writable(pskb, udphoff+sizeof(*udph)))
@@ -151,7 +151,7 @@ udp_snat_handler(struct sk_buff **pskb,
151 return 0; 151 return 0;
152 } 152 }
153 153
154 udph = (void *)(*pskb)->nh.iph + udphoff; 154 udph = (void *)ip_hdr(*pskb) + udphoff;
155 udph->source = cp->vport; 155 udph->source = cp->vport;
156 156
157 /* 157 /*
@@ -187,7 +187,7 @@ udp_dnat_handler(struct sk_buff **pskb,
187 struct ip_vs_protocol *pp, struct ip_vs_conn *cp) 187 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
188{ 188{
189 struct udphdr *udph; 189 struct udphdr *udph;
190 unsigned int udphoff = (*pskb)->nh.iph->ihl * 4; 190 unsigned int udphoff = ip_hdrlen(*pskb);
191 191
192 /* csum_check requires unshared skb */ 192 /* csum_check requires unshared skb */
193 if (!ip_vs_make_skb_writable(pskb, udphoff+sizeof(*udph))) 193 if (!ip_vs_make_skb_writable(pskb, udphoff+sizeof(*udph)))
@@ -206,7 +206,7 @@ udp_dnat_handler(struct sk_buff **pskb,
206 return 0; 206 return 0;
207 } 207 }
208 208
209 udph = (void *)(*pskb)->nh.iph + udphoff; 209 udph = (void *)ip_hdr(*pskb) + udphoff;
210 udph->dest = cp->dport; 210 udph->dest = cp->dport;
211 211
212 /* 212 /*
@@ -239,7 +239,7 @@ static int
239udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp) 239udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
240{ 240{
241 struct udphdr _udph, *uh; 241 struct udphdr _udph, *uh;
242 unsigned int udphoff = skb->nh.iph->ihl*4; 242 const unsigned int udphoff = ip_hdrlen(skb);
243 243
244 uh = skb_header_pointer(skb, udphoff, sizeof(_udph), &_udph); 244 uh = skb_header_pointer(skb, udphoff, sizeof(_udph), &_udph);
245 if (uh == NULL) 245 if (uh == NULL)
@@ -251,10 +251,10 @@ udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
251 skb->csum = skb_checksum(skb, udphoff, 251 skb->csum = skb_checksum(skb, udphoff,
252 skb->len - udphoff, 0); 252 skb->len - udphoff, 0);
253 case CHECKSUM_COMPLETE: 253 case CHECKSUM_COMPLETE:
254 if (csum_tcpudp_magic(skb->nh.iph->saddr, 254 if (csum_tcpudp_magic(ip_hdr(skb)->saddr,
255 skb->nh.iph->daddr, 255 ip_hdr(skb)->daddr,
256 skb->len - udphoff, 256 skb->len - udphoff,
257 skb->nh.iph->protocol, 257 ip_hdr(skb)->protocol,
258 skb->csum)) { 258 skb->csum)) {
259 IP_VS_DBG_RL_PKT(0, pp, skb, 0, 259 IP_VS_DBG_RL_PKT(0, pp, skb, 0,
260 "Failed checksum for"); 260 "Failed checksum for");
diff --git a/net/ipv4/ipvs/ip_vs_sh.c b/net/ipv4/ipvs/ip_vs_sh.c
index 338668f88fe2..1b25b00ef1e1 100644
--- a/net/ipv4/ipvs/ip_vs_sh.c
+++ b/net/ipv4/ipvs/ip_vs_sh.c
@@ -201,7 +201,7 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
201{ 201{
202 struct ip_vs_dest *dest; 202 struct ip_vs_dest *dest;
203 struct ip_vs_sh_bucket *tbl; 203 struct ip_vs_sh_bucket *tbl;
204 struct iphdr *iph = skb->nh.iph; 204 struct iphdr *iph = ip_hdr(skb);
205 205
206 IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n"); 206 IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n");
207 207
diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c
index e1f77bd7c9a5..900ce29db382 100644
--- a/net/ipv4/ipvs/ip_vs_xmit.c
+++ b/net/ipv4/ipvs/ip_vs_xmit.c
@@ -156,7 +156,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
156 struct ip_vs_protocol *pp) 156 struct ip_vs_protocol *pp)
157{ 157{
158 struct rtable *rt; /* Route to the other host */ 158 struct rtable *rt; /* Route to the other host */
159 struct iphdr *iph = skb->nh.iph; 159 struct iphdr *iph = ip_hdr(skb);
160 u8 tos = iph->tos; 160 u8 tos = iph->tos;
161 int mtu; 161 int mtu;
162 struct flowi fl = { 162 struct flowi fl = {
@@ -178,7 +178,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
178 178
179 /* MTU checking */ 179 /* MTU checking */
180 mtu = dst_mtu(&rt->u.dst); 180 mtu = dst_mtu(&rt->u.dst);
181 if ((skb->len > mtu) && (iph->frag_off&__constant_htons(IP_DF))) { 181 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
182 ip_rt_put(rt); 182 ip_rt_put(rt);
183 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); 183 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
184 IP_VS_DBG_RL("ip_vs_bypass_xmit(): frag needed\n"); 184 IP_VS_DBG_RL("ip_vs_bypass_xmit(): frag needed\n");
@@ -193,7 +193,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
193 ip_rt_put(rt); 193 ip_rt_put(rt);
194 return NF_STOLEN; 194 return NF_STOLEN;
195 } 195 }
196 ip_send_check(skb->nh.iph); 196 ip_send_check(ip_hdr(skb));
197 197
198 /* drop old route */ 198 /* drop old route */
199 dst_release(skb->dst); 199 dst_release(skb->dst);
@@ -226,7 +226,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
226{ 226{
227 struct rtable *rt; /* Route to the other host */ 227 struct rtable *rt; /* Route to the other host */
228 int mtu; 228 int mtu;
229 struct iphdr *iph = skb->nh.iph; 229 struct iphdr *iph = ip_hdr(skb);
230 230
231 EnterFunction(10); 231 EnterFunction(10);
232 232
@@ -245,7 +245,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
245 245
246 /* MTU checking */ 246 /* MTU checking */
247 mtu = dst_mtu(&rt->u.dst); 247 mtu = dst_mtu(&rt->u.dst);
248 if ((skb->len > mtu) && (iph->frag_off&__constant_htons(IP_DF))) { 248 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
249 ip_rt_put(rt); 249 ip_rt_put(rt);
250 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); 250 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
251 IP_VS_DBG_RL_PKT(0, pp, skb, 0, "ip_vs_nat_xmit(): frag needed for"); 251 IP_VS_DBG_RL_PKT(0, pp, skb, 0, "ip_vs_nat_xmit(): frag needed for");
@@ -266,8 +266,8 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
266 /* mangle the packet */ 266 /* mangle the packet */
267 if (pp->dnat_handler && !pp->dnat_handler(&skb, pp, cp)) 267 if (pp->dnat_handler && !pp->dnat_handler(&skb, pp, cp))
268 goto tx_error; 268 goto tx_error;
269 skb->nh.iph->daddr = cp->daddr; 269 ip_hdr(skb)->daddr = cp->daddr;
270 ip_send_check(skb->nh.iph); 270 ip_send_check(ip_hdr(skb));
271 271
272 IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); 272 IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
273 273
@@ -320,19 +320,20 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
320{ 320{
321 struct rtable *rt; /* Route to the other host */ 321 struct rtable *rt; /* Route to the other host */
322 struct net_device *tdev; /* Device to other host */ 322 struct net_device *tdev; /* Device to other host */
323 struct iphdr *old_iph = skb->nh.iph; 323 struct iphdr *old_iph = ip_hdr(skb);
324 u8 tos = old_iph->tos; 324 u8 tos = old_iph->tos;
325 __be16 df = old_iph->frag_off; 325 __be16 df = old_iph->frag_off;
326 sk_buff_data_t old_transport_header = skb->transport_header;
326 struct iphdr *iph; /* Our new IP header */ 327 struct iphdr *iph; /* Our new IP header */
327 int max_headroom; /* The extra header space needed */ 328 int max_headroom; /* The extra header space needed */
328 int mtu; 329 int mtu;
329 330
330 EnterFunction(10); 331 EnterFunction(10);
331 332
332 if (skb->protocol != __constant_htons(ETH_P_IP)) { 333 if (skb->protocol != htons(ETH_P_IP)) {
333 IP_VS_DBG_RL("ip_vs_tunnel_xmit(): protocol error, " 334 IP_VS_DBG_RL("ip_vs_tunnel_xmit(): protocol error, "
334 "ETH_P_IP: %d, skb protocol: %d\n", 335 "ETH_P_IP: %d, skb protocol: %d\n",
335 __constant_htons(ETH_P_IP), skb->protocol); 336 htons(ETH_P_IP), skb->protocol);
336 goto tx_error; 337 goto tx_error;
337 } 338 }
338 339
@@ -350,9 +351,9 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
350 if (skb->dst) 351 if (skb->dst)
351 skb->dst->ops->update_pmtu(skb->dst, mtu); 352 skb->dst->ops->update_pmtu(skb->dst, mtu);
352 353
353 df |= (old_iph->frag_off&__constant_htons(IP_DF)); 354 df |= (old_iph->frag_off & htons(IP_DF));
354 355
355 if ((old_iph->frag_off&__constant_htons(IP_DF)) 356 if ((old_iph->frag_off & htons(IP_DF))
356 && mtu < ntohs(old_iph->tot_len)) { 357 && mtu < ntohs(old_iph->tot_len)) {
357 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); 358 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
358 ip_rt_put(rt); 359 ip_rt_put(rt);
@@ -377,15 +378,16 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
377 } 378 }
378 kfree_skb(skb); 379 kfree_skb(skb);
379 skb = new_skb; 380 skb = new_skb;
380 old_iph = skb->nh.iph; 381 old_iph = ip_hdr(skb);
381 } 382 }
382 383
383 skb->h.raw = (void *) old_iph; 384 skb->transport_header = old_transport_header;
384 385
385 /* fix old IP header checksum */ 386 /* fix old IP header checksum */
386 ip_send_check(old_iph); 387 ip_send_check(old_iph);
387 388
388 skb->nh.raw = skb_push(skb, sizeof(struct iphdr)); 389 skb_push(skb, sizeof(struct iphdr));
390 skb_reset_network_header(skb);
389 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 391 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
390 392
391 /* drop old route */ 393 /* drop old route */
@@ -395,7 +397,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
395 /* 397 /*
396 * Push down and install the IPIP header. 398 * Push down and install the IPIP header.
397 */ 399 */
398 iph = skb->nh.iph; 400 iph = ip_hdr(skb);
399 iph->version = 4; 401 iph->version = 4;
400 iph->ihl = sizeof(struct iphdr)>>2; 402 iph->ihl = sizeof(struct iphdr)>>2;
401 iph->frag_off = df; 403 iph->frag_off = df;
@@ -435,7 +437,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
435 struct ip_vs_protocol *pp) 437 struct ip_vs_protocol *pp)
436{ 438{
437 struct rtable *rt; /* Route to the other host */ 439 struct rtable *rt; /* Route to the other host */
438 struct iphdr *iph = skb->nh.iph; 440 struct iphdr *iph = ip_hdr(skb);
439 int mtu; 441 int mtu;
440 442
441 EnterFunction(10); 443 EnterFunction(10);
@@ -445,7 +447,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
445 447
446 /* MTU checking */ 448 /* MTU checking */
447 mtu = dst_mtu(&rt->u.dst); 449 mtu = dst_mtu(&rt->u.dst);
448 if ((iph->frag_off&__constant_htons(IP_DF)) && skb->len > mtu) { 450 if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu) {
449 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); 451 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
450 ip_rt_put(rt); 452 ip_rt_put(rt);
451 IP_VS_DBG_RL("ip_vs_dr_xmit(): frag needed\n"); 453 IP_VS_DBG_RL("ip_vs_dr_xmit(): frag needed\n");
@@ -460,7 +462,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
460 ip_rt_put(rt); 462 ip_rt_put(rt);
461 return NF_STOLEN; 463 return NF_STOLEN;
462 } 464 }
463 ip_send_check(skb->nh.iph); 465 ip_send_check(ip_hdr(skb));
464 466
465 /* drop old route */ 467 /* drop old route */
466 dst_release(skb->dst); 468 dst_release(skb->dst);
@@ -514,12 +516,12 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
514 * mangle and send the packet here (only for VS/NAT) 516 * mangle and send the packet here (only for VS/NAT)
515 */ 517 */
516 518
517 if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(skb->nh.iph->tos)))) 519 if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(ip_hdr(skb)->tos))))
518 goto tx_error_icmp; 520 goto tx_error_icmp;
519 521
520 /* MTU checking */ 522 /* MTU checking */
521 mtu = dst_mtu(&rt->u.dst); 523 mtu = dst_mtu(&rt->u.dst);
522 if ((skb->len > mtu) && (skb->nh.iph->frag_off&__constant_htons(IP_DF))) { 524 if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) {
523 ip_rt_put(rt); 525 ip_rt_put(rt);
524 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); 526 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
525 IP_VS_DBG_RL("ip_vs_in_icmp(): frag needed\n"); 527 IP_VS_DBG_RL("ip_vs_in_icmp(): frag needed\n");
diff --git a/net/ipv4/multipath_drr.c b/net/ipv4/multipath_drr.c
index 574c735836fc..b03c5ca2c823 100644
--- a/net/ipv4/multipath_drr.c
+++ b/net/ipv4/multipath_drr.c
@@ -100,7 +100,7 @@ static int drr_dev_event(struct notifier_block *this,
100 100
101 spin_unlock_bh(&state_lock); 101 spin_unlock_bh(&state_lock);
102 break; 102 break;
103 }; 103 }
104 104
105 return NOTIFY_DONE; 105 return NOTIFY_DONE;
106} 106}
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 6069a11514f6..b44192924f95 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -10,7 +10,7 @@
10/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ 10/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
11int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type) 11int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type)
12{ 12{
13 struct iphdr *iph = (*pskb)->nh.iph; 13 const struct iphdr *iph = ip_hdr(*pskb);
14 struct rtable *rt; 14 struct rtable *rt;
15 struct flowi fl = {}; 15 struct flowi fl = {};
16 struct dst_entry *odst; 16 struct dst_entry *odst;
@@ -142,7 +142,7 @@ static void nf_ip_saveroute(const struct sk_buff *skb, struct nf_info *info)
142 struct ip_rt_info *rt_info = nf_info_reroute(info); 142 struct ip_rt_info *rt_info = nf_info_reroute(info);
143 143
144 if (info->hook == NF_IP_LOCAL_OUT) { 144 if (info->hook == NF_IP_LOCAL_OUT) {
145 const struct iphdr *iph = skb->nh.iph; 145 const struct iphdr *iph = ip_hdr(skb);
146 146
147 rt_info->tos = iph->tos; 147 rt_info->tos = iph->tos;
148 rt_info->daddr = iph->daddr; 148 rt_info->daddr = iph->daddr;
@@ -155,7 +155,7 @@ static int nf_ip_reroute(struct sk_buff **pskb, const struct nf_info *info)
155 const struct ip_rt_info *rt_info = nf_info_reroute(info); 155 const struct ip_rt_info *rt_info = nf_info_reroute(info);
156 156
157 if (info->hook == NF_IP_LOCAL_OUT) { 157 if (info->hook == NF_IP_LOCAL_OUT) {
158 struct iphdr *iph = (*pskb)->nh.iph; 158 const struct iphdr *iph = ip_hdr(*pskb);
159 159
160 if (!(iph->tos == rt_info->tos 160 if (!(iph->tos == rt_info->tos
161 && iph->daddr == rt_info->daddr 161 && iph->daddr == rt_info->daddr
@@ -168,7 +168,7 @@ static int nf_ip_reroute(struct sk_buff **pskb, const struct nf_info *info)
168__sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook, 168__sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
169 unsigned int dataoff, u_int8_t protocol) 169 unsigned int dataoff, u_int8_t protocol)
170{ 170{
171 struct iphdr *iph = skb->nh.iph; 171 const struct iphdr *iph = ip_hdr(skb);
172 __sum16 csum = 0; 172 __sum16 csum = 0;
173 173
174 switch (skb->ip_summed) { 174 switch (skb->ip_summed) {
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 601808c796ec..46509fae9fd8 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -30,188 +30,6 @@ config NF_CONNTRACK_PROC_COMPAT
30 30
31 If unsure, say Y. 31 If unsure, say Y.
32 32
33# connection tracking, helpers and protocols
34config IP_NF_CT_ACCT
35 bool "Connection tracking flow accounting"
36 depends on IP_NF_CONNTRACK
37 help
38 If this option is enabled, the connection tracking code will
39 keep per-flow packet and byte counters.
40
41 Those counters can be used for flow-based accounting or the
42 `connbytes' match.
43
44 If unsure, say `N'.
45
46config IP_NF_CONNTRACK_MARK
47 bool 'Connection mark tracking support'
48 depends on IP_NF_CONNTRACK
49 help
50 This option enables support for connection marks, used by the
51 `CONNMARK' target and `connmark' match. Similar to the mark value
52 of packets, but this mark value is kept in the conntrack session
53 instead of the individual packets.
54
55config IP_NF_CONNTRACK_SECMARK
56 bool 'Connection tracking security mark support'
57 depends on IP_NF_CONNTRACK && NETWORK_SECMARK
58 help
59 This option enables security markings to be applied to
60 connections. Typically they are copied to connections from
61 packets using the CONNSECMARK target and copied back from
62 connections to packets with the same target, with the packets
63 being originally labeled via SECMARK.
64
65 If unsure, say 'N'.
66
67config IP_NF_CONNTRACK_EVENTS
68 bool "Connection tracking events (EXPERIMENTAL)"
69 depends on EXPERIMENTAL && IP_NF_CONNTRACK
70 help
71 If this option is enabled, the connection tracking code will
72 provide a notifier chain that can be used by other kernel code
73 to get notified about changes in the connection tracking state.
74
75 IF unsure, say `N'.
76
77config IP_NF_CONNTRACK_NETLINK
78 tristate 'Connection tracking netlink interface (EXPERIMENTAL)'
79 depends on EXPERIMENTAL && IP_NF_CONNTRACK && NETFILTER_NETLINK
80 depends on IP_NF_CONNTRACK!=y || NETFILTER_NETLINK!=m
81 depends on IP_NF_NAT=n || IP_NF_NAT
82 help
83 This option enables support for a netlink-based userspace interface
84
85
86config IP_NF_CT_PROTO_SCTP
87 tristate 'SCTP protocol connection tracking support (EXPERIMENTAL)'
88 depends on IP_NF_CONNTRACK && EXPERIMENTAL
89 help
90 With this option enabled, the connection tracking code will
91 be able to do state tracking on SCTP connections.
92
93 If you want to compile it as a module, say M here and read
94 <file:Documentation/modules.txt>. If unsure, say `N'.
95
96config IP_NF_FTP
97 tristate "FTP protocol support"
98 depends on IP_NF_CONNTRACK
99 help
100 Tracking FTP connections is problematic: special helpers are
101 required for tracking them, and doing masquerading and other forms
102 of Network Address Translation on them.
103
104 To compile it as a module, choose M here. If unsure, say Y.
105
106config IP_NF_IRC
107 tristate "IRC protocol support"
108 depends on IP_NF_CONNTRACK
109 ---help---
110 There is a commonly-used extension to IRC called
111 Direct Client-to-Client Protocol (DCC). This enables users to send
112 files to each other, and also chat to each other without the need
113 of a server. DCC Sending is used anywhere you send files over IRC,
114 and DCC Chat is most commonly used by Eggdrop bots. If you are
115 using NAT, this extension will enable you to send files and initiate
116 chats. Note that you do NOT need this extension to get files or
117 have others initiate chats, or everything else in IRC.
118
119 To compile it as a module, choose M here. If unsure, say Y.
120
121config IP_NF_NETBIOS_NS
122 tristate "NetBIOS name service protocol support (EXPERIMENTAL)"
123 depends on IP_NF_CONNTRACK && EXPERIMENTAL
124 help
125 NetBIOS name service requests are sent as broadcast messages from an
126 unprivileged port and responded to with unicast messages to the
127 same port. This make them hard to firewall properly because connection
128 tracking doesn't deal with broadcasts. This helper tracks locally
129 originating NetBIOS name service requests and the corresponding
130 responses. It relies on correct IP address configuration, specifically
131 netmask and broadcast address. When properly configured, the output
132 of "ip address show" should look similar to this:
133
134 $ ip -4 address show eth0
135 4: eth0: <BROADCAST,MULTICAST,UP> mtu 1500 qdisc pfifo_fast qlen 1000
136 inet 172.16.2.252/24 brd 172.16.2.255 scope global eth0
137
138 To compile it as a module, choose M here. If unsure, say N.
139
140config IP_NF_TFTP
141 tristate "TFTP protocol support"
142 depends on IP_NF_CONNTRACK
143 help
144 TFTP connection tracking helper, this is required depending
145 on how restrictive your ruleset is.
146 If you are using a tftp client behind -j SNAT or -j MASQUERADING
147 you will need this.
148
149 To compile it as a module, choose M here. If unsure, say Y.
150
151config IP_NF_AMANDA
152 tristate "Amanda backup protocol support"
153 depends on IP_NF_CONNTRACK
154 select TEXTSEARCH
155 select TEXTSEARCH_KMP
156 help
157 If you are running the Amanda backup package <http://www.amanda.org/>
158 on this machine or machines that will be MASQUERADED through this
159 machine, then you may want to enable this feature. This allows the
160 connection tracking and natting code to allow the sub-channels that
161 Amanda requires for communication of the backup data, messages and
162 index.
163
164 To compile it as a module, choose M here. If unsure, say Y.
165
166config IP_NF_PPTP
167 tristate 'PPTP protocol support'
168 depends on IP_NF_CONNTRACK
169 help
170 This module adds support for PPTP (Point to Point Tunnelling
171 Protocol, RFC2637) connection tracking and NAT.
172
173 If you are running PPTP sessions over a stateful firewall or NAT
174 box, you may want to enable this feature.
175
176 Please note that not all PPTP modes of operation are supported yet.
177 For more info, read top of the file
178 net/ipv4/netfilter/ip_conntrack_pptp.c
179
180 If you want to compile it as a module, say M here and read
181 Documentation/modules.txt. If unsure, say `N'.
182
183config IP_NF_H323
184 tristate 'H.323 protocol support (EXPERIMENTAL)'
185 depends on IP_NF_CONNTRACK && EXPERIMENTAL
186 help
187 H.323 is a VoIP signalling protocol from ITU-T. As one of the most
188 important VoIP protocols, it is widely used by voice hardware and
189 software including voice gateways, IP phones, Netmeeting, OpenPhone,
190 Gnomemeeting, etc.
191
192 With this module you can support H.323 on a connection tracking/NAT
193 firewall.
194
195 This module supports RAS, Fast Start, H.245 Tunnelling, Call
196 Forwarding, RTP/RTCP and T.120 based audio, video, fax, chat,
197 whiteboard, file transfer, etc. For more information, please
198 visit http://nath323.sourceforge.net/.
199
200 If you want to compile it as a module, say 'M' here and read
201 Documentation/modules.txt. If unsure, say 'N'.
202
203config IP_NF_SIP
204 tristate "SIP protocol support (EXPERIMENTAL)"
205 depends on IP_NF_CONNTRACK && EXPERIMENTAL
206 help
207 SIP is an application-layer control protocol that can establish,
208 modify, and terminate multimedia sessions (conferences) such as
209 Internet telephony calls. With the ip_conntrack_sip and
210 the ip_nat_sip modules you can support the protocol on a connection
211 tracking/NATing firewall.
212
213 To compile it as a module, choose M here. If unsure, say Y.
214
215config IP_NF_QUEUE 33config IP_NF_QUEUE
216 tristate "IP Userspace queueing via NETLINK (OBSOLETE)" 34 tristate "IP Userspace queueing via NETLINK (OBSOLETE)"
217 help 35 help
@@ -361,17 +179,6 @@ config IP_NF_TARGET_ULOG
361 179
362 To compile it as a module, choose M here. If unsure, say N. 180 To compile it as a module, choose M here. If unsure, say N.
363 181
364# NAT + specific targets: ip_conntrack
365config IP_NF_NAT
366 tristate "Full NAT"
367 depends on IP_NF_IPTABLES && IP_NF_CONNTRACK
368 help
369 The Full NAT option allows masquerading, port forwarding and other
370 forms of full Network Address Port Translation. It is controlled by
371 the `nat' table in iptables: see the man page for iptables(8).
372
373 To compile it as a module, choose M here. If unsure, say N.
374
375# NAT + specific targets: nf_conntrack 182# NAT + specific targets: nf_conntrack
376config NF_NAT 183config NF_NAT
377 tristate "Full NAT" 184 tristate "Full NAT"
@@ -383,11 +190,6 @@ config NF_NAT
383 190
384 To compile it as a module, choose M here. If unsure, say N. 191 To compile it as a module, choose M here. If unsure, say N.
385 192
386config IP_NF_NAT_NEEDED
387 bool
388 depends on IP_NF_NAT
389 default y
390
391config NF_NAT_NEEDED 193config NF_NAT_NEEDED
392 bool 194 bool
393 depends on NF_NAT 195 depends on NF_NAT
@@ -395,7 +197,7 @@ config NF_NAT_NEEDED
395 197
396config IP_NF_TARGET_MASQUERADE 198config IP_NF_TARGET_MASQUERADE
397 tristate "MASQUERADE target support" 199 tristate "MASQUERADE target support"
398 depends on (NF_NAT || IP_NF_NAT) 200 depends on NF_NAT
399 help 201 help
400 Masquerading is a special case of NAT: all outgoing connections are 202 Masquerading is a special case of NAT: all outgoing connections are
401 changed to seem to come from a particular interface's address, and 203 changed to seem to come from a particular interface's address, and
@@ -407,7 +209,7 @@ config IP_NF_TARGET_MASQUERADE
407 209
408config IP_NF_TARGET_REDIRECT 210config IP_NF_TARGET_REDIRECT
409 tristate "REDIRECT target support" 211 tristate "REDIRECT target support"
410 depends on (NF_NAT || IP_NF_NAT) 212 depends on NF_NAT
411 help 213 help
412 REDIRECT is a special case of NAT: all incoming connections are 214 REDIRECT is a special case of NAT: all incoming connections are
413 mapped onto the incoming interface's address, causing the packets to 215 mapped onto the incoming interface's address, causing the packets to
@@ -418,7 +220,7 @@ config IP_NF_TARGET_REDIRECT
418 220
419config IP_NF_TARGET_NETMAP 221config IP_NF_TARGET_NETMAP
420 tristate "NETMAP target support" 222 tristate "NETMAP target support"
421 depends on (NF_NAT || IP_NF_NAT) 223 depends on NF_NAT
422 help 224 help
423 NETMAP is an implementation of static 1:1 NAT mapping of network 225 NETMAP is an implementation of static 1:1 NAT mapping of network
424 addresses. It maps the network address part, while keeping the host 226 addresses. It maps the network address part, while keeping the host
@@ -429,28 +231,13 @@ config IP_NF_TARGET_NETMAP
429 231
430config IP_NF_TARGET_SAME 232config IP_NF_TARGET_SAME
431 tristate "SAME target support" 233 tristate "SAME target support"
432 depends on (NF_NAT || IP_NF_NAT) 234 depends on NF_NAT
433 help 235 help
434 This option adds a `SAME' target, which works like the standard SNAT 236 This option adds a `SAME' target, which works like the standard SNAT
435 target, but attempts to give clients the same IP for all connections. 237 target, but attempts to give clients the same IP for all connections.
436 238
437 To compile it as a module, choose M here. If unsure, say N. 239 To compile it as a module, choose M here. If unsure, say N.
438 240
439config IP_NF_NAT_SNMP_BASIC
440 tristate "Basic SNMP-ALG support (EXPERIMENTAL)"
441 depends on EXPERIMENTAL && IP_NF_NAT
442 ---help---
443
444 This module implements an Application Layer Gateway (ALG) for
445 SNMP payloads. In conjunction with NAT, it allows a network
446 management system to access multiple private networks with
447 conflicting addresses. It works by modifying IP addresses
448 inside SNMP payloads to match IP-layer NAT mapping.
449
450 This is the "basic" form of SNMP-ALG, as described in RFC 2962
451
452 To compile it as a module, choose M here. If unsure, say N.
453
454config NF_NAT_SNMP_BASIC 241config NF_NAT_SNMP_BASIC
455 tristate "Basic SNMP-ALG support (EXPERIMENTAL)" 242 tristate "Basic SNMP-ALG support (EXPERIMENTAL)"
456 depends on EXPERIMENTAL && NF_NAT 243 depends on EXPERIMENTAL && NF_NAT
@@ -477,78 +264,37 @@ config NF_NAT_PROTO_GRE
477 tristate 264 tristate
478 depends on NF_NAT && NF_CT_PROTO_GRE 265 depends on NF_NAT && NF_CT_PROTO_GRE
479 266
480config IP_NF_NAT_FTP
481 tristate
482 depends on IP_NF_IPTABLES && IP_NF_CONNTRACK && IP_NF_NAT
483 default IP_NF_NAT && IP_NF_FTP
484
485config NF_NAT_FTP 267config NF_NAT_FTP
486 tristate 268 tristate
487 depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT 269 depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT
488 default NF_NAT && NF_CONNTRACK_FTP 270 default NF_NAT && NF_CONNTRACK_FTP
489 271
490config IP_NF_NAT_IRC
491 tristate
492 depends on IP_NF_IPTABLES!=n && IP_NF_CONNTRACK!=n && IP_NF_NAT!=n
493 default IP_NF_NAT if IP_NF_IRC=y
494 default m if IP_NF_IRC=m
495
496config NF_NAT_IRC 272config NF_NAT_IRC
497 tristate 273 tristate
498 depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT 274 depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT
499 default NF_NAT && NF_CONNTRACK_IRC 275 default NF_NAT && NF_CONNTRACK_IRC
500 276
501config IP_NF_NAT_TFTP
502 tristate
503 depends on IP_NF_IPTABLES!=n && IP_NF_CONNTRACK!=n && IP_NF_NAT!=n
504 default IP_NF_NAT if IP_NF_TFTP=y
505 default m if IP_NF_TFTP=m
506
507config NF_NAT_TFTP 277config NF_NAT_TFTP
508 tristate 278 tristate
509 depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT 279 depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT
510 default NF_NAT && NF_CONNTRACK_TFTP 280 default NF_NAT && NF_CONNTRACK_TFTP
511 281
512config IP_NF_NAT_AMANDA
513 tristate
514 depends on IP_NF_IPTABLES!=n && IP_NF_CONNTRACK!=n && IP_NF_NAT!=n
515 default IP_NF_NAT if IP_NF_AMANDA=y
516 default m if IP_NF_AMANDA=m
517
518config NF_NAT_AMANDA 282config NF_NAT_AMANDA
519 tristate 283 tristate
520 depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT 284 depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT
521 default NF_NAT && NF_CONNTRACK_AMANDA 285 default NF_NAT && NF_CONNTRACK_AMANDA
522 286
523config IP_NF_NAT_PPTP
524 tristate
525 depends on IP_NF_NAT!=n && IP_NF_PPTP!=n
526 default IP_NF_NAT if IP_NF_PPTP=y
527 default m if IP_NF_PPTP=m
528
529config NF_NAT_PPTP 287config NF_NAT_PPTP
530 tristate 288 tristate
531 depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT 289 depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT
532 default NF_NAT && NF_CONNTRACK_PPTP 290 default NF_NAT && NF_CONNTRACK_PPTP
533 select NF_NAT_PROTO_GRE 291 select NF_NAT_PROTO_GRE
534 292
535config IP_NF_NAT_H323
536 tristate
537 depends on IP_NF_IPTABLES!=n && IP_NF_CONNTRACK!=n && IP_NF_NAT!=n
538 default IP_NF_NAT if IP_NF_H323=y
539 default m if IP_NF_H323=m
540
541config NF_NAT_H323 293config NF_NAT_H323
542 tristate 294 tristate
543 depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT 295 depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT
544 default NF_NAT && NF_CONNTRACK_H323 296 default NF_NAT && NF_CONNTRACK_H323
545 297
546config IP_NF_NAT_SIP
547 tristate
548 depends on IP_NF_IPTABLES!=n && IP_NF_CONNTRACK!=n && IP_NF_NAT!=n
549 default IP_NF_NAT if IP_NF_SIP=y
550 default m if IP_NF_SIP=m
551
552config NF_NAT_SIP 298config NF_NAT_SIP
553 tristate 299 tristate
554 depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT 300 depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT
@@ -606,9 +352,8 @@ config IP_NF_TARGET_TTL
606config IP_NF_TARGET_CLUSTERIP 352config IP_NF_TARGET_CLUSTERIP
607 tristate "CLUSTERIP target support (EXPERIMENTAL)" 353 tristate "CLUSTERIP target support (EXPERIMENTAL)"
608 depends on IP_NF_MANGLE && EXPERIMENTAL 354 depends on IP_NF_MANGLE && EXPERIMENTAL
609 depends on IP_NF_CONNTRACK || NF_CONNTRACK_IPV4 355 depends on NF_CONNTRACK_IPV4
610 select IP_NF_CONNTRACK_MARK if IP_NF_CONNTRACK 356 select NF_CONNTRACK_MARK
611 select NF_CONNTRACK_MARK if NF_CONNTRACK_IPV4
612 help 357 help
613 The CLUSTERIP target allows you to build load-balancing clusters of 358 The CLUSTERIP target allows you to build load-balancing clusters of
614 network servers without having a dedicated load-balancing 359 network servers without having a dedicated load-balancing
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 6625ec68180c..409d273f6f82 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -2,8 +2,6 @@
2# Makefile for the netfilter modules on top of IPv4. 2# Makefile for the netfilter modules on top of IPv4.
3# 3#
4 4
5# objects for the standalone - connection tracking / NAT
6ip_conntrack-objs := ip_conntrack_standalone.o ip_conntrack_core.o ip_conntrack_proto_generic.o ip_conntrack_proto_tcp.o ip_conntrack_proto_udp.o ip_conntrack_proto_icmp.o
7# objects for l3 independent conntrack 5# objects for l3 independent conntrack
8nf_conntrack_ipv4-objs := nf_conntrack_l3proto_ipv4.o nf_conntrack_proto_icmp.o 6nf_conntrack_ipv4-objs := nf_conntrack_l3proto_ipv4.o nf_conntrack_proto_icmp.o
9ifeq ($(CONFIG_NF_CONNTRACK_PROC_COMPAT),y) 7ifeq ($(CONFIG_NF_CONNTRACK_PROC_COMPAT),y)
@@ -12,53 +10,14 @@ nf_conntrack_ipv4-objs += nf_conntrack_l3proto_ipv4_compat.o
12endif 10endif
13endif 11endif
14 12
15ip_nat-objs := ip_nat_core.o ip_nat_helper.o ip_nat_proto_unknown.o ip_nat_proto_tcp.o ip_nat_proto_udp.o ip_nat_proto_icmp.o 13nf_nat-objs := nf_nat_core.o nf_nat_helper.o nf_nat_proto_unknown.o nf_nat_proto_tcp.o nf_nat_proto_udp.o nf_nat_proto_icmp.o
16nf_nat-objs := nf_nat_core.o nf_nat_helper.o nf_nat_proto_unknown.o nf_nat_proto_tcp.o nf_nat_proto_udp.o nf_nat_proto_icmp.o
17ifneq ($(CONFIG_NF_NAT),)
18iptable_nat-objs := nf_nat_rule.o nf_nat_standalone.o 14iptable_nat-objs := nf_nat_rule.o nf_nat_standalone.o
19else
20iptable_nat-objs := ip_nat_rule.o ip_nat_standalone.o
21endif
22
23ip_conntrack_pptp-objs := ip_conntrack_helper_pptp.o ip_conntrack_proto_gre.o
24ip_nat_pptp-objs := ip_nat_helper_pptp.o ip_nat_proto_gre.o
25
26ip_conntrack_h323-objs := ip_conntrack_helper_h323.o ../../netfilter/nf_conntrack_h323_asn1.o
27ip_nat_h323-objs := ip_nat_helper_h323.o
28 15
29# connection tracking 16# connection tracking
30obj-$(CONFIG_IP_NF_CONNTRACK) += ip_conntrack.o
31obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o 17obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o
32 18
33obj-$(CONFIG_IP_NF_NAT) += ip_nat.o
34obj-$(CONFIG_NF_NAT) += nf_nat.o 19obj-$(CONFIG_NF_NAT) += nf_nat.o
35 20
36# conntrack netlink interface
37obj-$(CONFIG_IP_NF_CONNTRACK_NETLINK) += ip_conntrack_netlink.o
38
39
40# SCTP protocol connection tracking
41obj-$(CONFIG_IP_NF_CT_PROTO_SCTP) += ip_conntrack_proto_sctp.o
42
43# connection tracking helpers
44obj-$(CONFIG_IP_NF_H323) += ip_conntrack_h323.o
45obj-$(CONFIG_IP_NF_PPTP) += ip_conntrack_pptp.o
46obj-$(CONFIG_IP_NF_AMANDA) += ip_conntrack_amanda.o
47obj-$(CONFIG_IP_NF_TFTP) += ip_conntrack_tftp.o
48obj-$(CONFIG_IP_NF_FTP) += ip_conntrack_ftp.o
49obj-$(CONFIG_IP_NF_IRC) += ip_conntrack_irc.o
50obj-$(CONFIG_IP_NF_SIP) += ip_conntrack_sip.o
51obj-$(CONFIG_IP_NF_NETBIOS_NS) += ip_conntrack_netbios_ns.o
52
53# NAT helpers (ip_conntrack)
54obj-$(CONFIG_IP_NF_NAT_H323) += ip_nat_h323.o
55obj-$(CONFIG_IP_NF_NAT_PPTP) += ip_nat_pptp.o
56obj-$(CONFIG_IP_NF_NAT_AMANDA) += ip_nat_amanda.o
57obj-$(CONFIG_IP_NF_NAT_TFTP) += ip_nat_tftp.o
58obj-$(CONFIG_IP_NF_NAT_FTP) += ip_nat_ftp.o
59obj-$(CONFIG_IP_NF_NAT_IRC) += ip_nat_irc.o
60obj-$(CONFIG_IP_NF_NAT_SIP) += ip_nat_sip.o
61
62# NAT helpers (nf_conntrack) 21# NAT helpers (nf_conntrack)
63obj-$(CONFIG_NF_NAT_AMANDA) += nf_nat_amanda.o 22obj-$(CONFIG_NF_NAT_AMANDA) += nf_nat_amanda.o
64obj-$(CONFIG_NF_NAT_FTP) += nf_nat_ftp.o 23obj-$(CONFIG_NF_NAT_FTP) += nf_nat_ftp.o
@@ -78,7 +37,6 @@ obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
78# the three instances of ip_tables 37# the three instances of ip_tables
79obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o 38obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o
80obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o 39obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o
81obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o
82obj-$(CONFIG_NF_NAT) += iptable_nat.o 40obj-$(CONFIG_NF_NAT) += iptable_nat.o
83obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o 41obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o
84 42
@@ -100,7 +58,6 @@ obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o
100obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o 58obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o
101obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o 59obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o
102obj-$(CONFIG_IP_NF_TARGET_SAME) += ipt_SAME.o 60obj-$(CONFIG_IP_NF_TARGET_SAME) += ipt_SAME.o
103obj-$(CONFIG_IP_NF_NAT_SNMP_BASIC) += ip_nat_snmp_basic.o
104obj-$(CONFIG_IP_NF_TARGET_LOG) += ipt_LOG.o 61obj-$(CONFIG_IP_NF_TARGET_LOG) += ipt_LOG.o
105obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o 62obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o
106obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o 63obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 57b0221f9e24..cae41215e3c7 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -245,7 +245,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb,
245 e = get_entry(table_base, private->hook_entry[hook]); 245 e = get_entry(table_base, private->hook_entry[hook]);
246 back = get_entry(table_base, private->underflow[hook]); 246 back = get_entry(table_base, private->underflow[hook]);
247 247
248 arp = (*pskb)->nh.arph; 248 arp = arp_hdr(*pskb);
249 do { 249 do {
250 if (arp_packet_match(arp, (*pskb)->dev, indev, outdev, &e->arp)) { 250 if (arp_packet_match(arp, (*pskb)->dev, indev, outdev, &e->arp)) {
251 struct arpt_entry_target *t; 251 struct arpt_entry_target *t;
@@ -297,7 +297,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb,
297 t->data); 297 t->data);
298 298
299 /* Target might have changed stuff. */ 299 /* Target might have changed stuff. */
300 arp = (*pskb)->nh.arph; 300 arp = arp_hdr(*pskb);
301 301
302 if (verdict == ARPT_CONTINUE) 302 if (verdict == ARPT_CONTINUE)
303 e = (void *)e + e->next_offset; 303 e = (void *)e + e->next_offset;
diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c
index 709db4d3f48f..6298d404e7c7 100644
--- a/net/ipv4/netfilter/arpt_mangle.c
+++ b/net/ipv4/netfilter/arpt_mangle.c
@@ -30,35 +30,35 @@ target(struct sk_buff **pskb,
30 *pskb = nskb; 30 *pskb = nskb;
31 } 31 }
32 32
33 arp = (*pskb)->nh.arph; 33 arp = arp_hdr(*pskb);
34 arpptr = (*pskb)->nh.raw + sizeof(*arp); 34 arpptr = skb_network_header(*pskb) + sizeof(*arp);
35 pln = arp->ar_pln; 35 pln = arp->ar_pln;
36 hln = arp->ar_hln; 36 hln = arp->ar_hln;
37 /* We assume that pln and hln were checked in the match */ 37 /* We assume that pln and hln were checked in the match */
38 if (mangle->flags & ARPT_MANGLE_SDEV) { 38 if (mangle->flags & ARPT_MANGLE_SDEV) {
39 if (ARPT_DEV_ADDR_LEN_MAX < hln || 39 if (ARPT_DEV_ADDR_LEN_MAX < hln ||
40 (arpptr + hln > (**pskb).tail)) 40 (arpptr + hln > skb_tail_pointer(*pskb)))
41 return NF_DROP; 41 return NF_DROP;
42 memcpy(arpptr, mangle->src_devaddr, hln); 42 memcpy(arpptr, mangle->src_devaddr, hln);
43 } 43 }
44 arpptr += hln; 44 arpptr += hln;
45 if (mangle->flags & ARPT_MANGLE_SIP) { 45 if (mangle->flags & ARPT_MANGLE_SIP) {
46 if (ARPT_MANGLE_ADDR_LEN_MAX < pln || 46 if (ARPT_MANGLE_ADDR_LEN_MAX < pln ||
47 (arpptr + pln > (**pskb).tail)) 47 (arpptr + pln > skb_tail_pointer(*pskb)))
48 return NF_DROP; 48 return NF_DROP;
49 memcpy(arpptr, &mangle->u_s.src_ip, pln); 49 memcpy(arpptr, &mangle->u_s.src_ip, pln);
50 } 50 }
51 arpptr += pln; 51 arpptr += pln;
52 if (mangle->flags & ARPT_MANGLE_TDEV) { 52 if (mangle->flags & ARPT_MANGLE_TDEV) {
53 if (ARPT_DEV_ADDR_LEN_MAX < hln || 53 if (ARPT_DEV_ADDR_LEN_MAX < hln ||
54 (arpptr + hln > (**pskb).tail)) 54 (arpptr + hln > skb_tail_pointer(*pskb)))
55 return NF_DROP; 55 return NF_DROP;
56 memcpy(arpptr, mangle->tgt_devaddr, hln); 56 memcpy(arpptr, mangle->tgt_devaddr, hln);
57 } 57 }
58 arpptr += hln; 58 arpptr += hln;
59 if (mangle->flags & ARPT_MANGLE_TIP) { 59 if (mangle->flags & ARPT_MANGLE_TIP) {
60 if (ARPT_MANGLE_ADDR_LEN_MAX < pln || 60 if (ARPT_MANGLE_ADDR_LEN_MAX < pln ||
61 (arpptr + pln > (**pskb).tail)) 61 (arpptr + pln > skb_tail_pointer(*pskb)))
62 return NF_DROP; 62 return NF_DROP;
63 memcpy(arpptr, &mangle->u_t.tgt_ip, pln); 63 memcpy(arpptr, &mangle->u_t.tgt_ip, pln);
64 } 64 }
diff --git a/net/ipv4/netfilter/ip_conntrack_amanda.c b/net/ipv4/netfilter/ip_conntrack_amanda.c
deleted file mode 100644
index 4f561f52c83a..000000000000
--- a/net/ipv4/netfilter/ip_conntrack_amanda.c
+++ /dev/null
@@ -1,229 +0,0 @@
1/* Amanda extension for IP connection tracking, Version 0.2
2 * (C) 2002 by Brian J. Murrell <netfilter@interlinx.bc.ca>
3 * based on HW's ip_conntrack_irc.c as well as other modules
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version
8 * 2 of the License, or (at your option) any later version.
9 *
10 * Module load syntax:
11 * insmod ip_conntrack_amanda.o [master_timeout=n]
12 *
13 * Where master_timeout is the timeout (in seconds) of the master
14 * connection (port 10080). This defaults to 5 minutes but if
15 * your clients take longer than 5 minutes to do their work
16 * before getting back to the Amanda server, you can increase
17 * this value.
18 *
19 */
20#include <linux/kernel.h>
21#include <linux/module.h>
22#include <linux/moduleparam.h>
23#include <linux/textsearch.h>
24#include <linux/skbuff.h>
25#include <linux/in.h>
26#include <linux/ip.h>
27#include <linux/udp.h>
28
29#include <linux/netfilter.h>
30#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
31#include <linux/netfilter_ipv4/ip_conntrack_amanda.h>
32
33static unsigned int master_timeout = 300;
34static char *ts_algo = "kmp";
35
36MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>");
37MODULE_DESCRIPTION("Amanda connection tracking module");
38MODULE_LICENSE("GPL");
39module_param(master_timeout, uint, 0600);
40MODULE_PARM_DESC(master_timeout, "timeout for the master connection");
41module_param(ts_algo, charp, 0400);
42MODULE_PARM_DESC(ts_algo, "textsearch algorithm to use (default kmp)");
43
44unsigned int (*ip_nat_amanda_hook)(struct sk_buff **pskb,
45 enum ip_conntrack_info ctinfo,
46 unsigned int matchoff,
47 unsigned int matchlen,
48 struct ip_conntrack_expect *exp);
49EXPORT_SYMBOL_GPL(ip_nat_amanda_hook);
50
51enum amanda_strings {
52 SEARCH_CONNECT,
53 SEARCH_NEWLINE,
54 SEARCH_DATA,
55 SEARCH_MESG,
56 SEARCH_INDEX,
57};
58
59static struct {
60 char *string;
61 size_t len;
62 struct ts_config *ts;
63} search[] = {
64 [SEARCH_CONNECT] = {
65 .string = "CONNECT ",
66 .len = 8,
67 },
68 [SEARCH_NEWLINE] = {
69 .string = "\n",
70 .len = 1,
71 },
72 [SEARCH_DATA] = {
73 .string = "DATA ",
74 .len = 5,
75 },
76 [SEARCH_MESG] = {
77 .string = "MESG ",
78 .len = 5,
79 },
80 [SEARCH_INDEX] = {
81 .string = "INDEX ",
82 .len = 6,
83 },
84};
85
86static int help(struct sk_buff **pskb,
87 struct ip_conntrack *ct, enum ip_conntrack_info ctinfo)
88{
89 struct ts_state ts;
90 struct ip_conntrack_expect *exp;
91 unsigned int dataoff, start, stop, off, i;
92 char pbuf[sizeof("65535")], *tmp;
93 u_int16_t port, len;
94 int ret = NF_ACCEPT;
95 typeof(ip_nat_amanda_hook) ip_nat_amanda;
96
97 /* Only look at packets from the Amanda server */
98 if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
99 return NF_ACCEPT;
100
101 /* increase the UDP timeout of the master connection as replies from
102 * Amanda clients to the server can be quite delayed */
103 ip_ct_refresh(ct, *pskb, master_timeout * HZ);
104
105 /* No data? */
106 dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
107 if (dataoff >= (*pskb)->len) {
108 if (net_ratelimit())
109 printk("amanda_help: skblen = %u\n", (*pskb)->len);
110 return NF_ACCEPT;
111 }
112
113 memset(&ts, 0, sizeof(ts));
114 start = skb_find_text(*pskb, dataoff, (*pskb)->len,
115 search[SEARCH_CONNECT].ts, &ts);
116 if (start == UINT_MAX)
117 goto out;
118 start += dataoff + search[SEARCH_CONNECT].len;
119
120 memset(&ts, 0, sizeof(ts));
121 stop = skb_find_text(*pskb, start, (*pskb)->len,
122 search[SEARCH_NEWLINE].ts, &ts);
123 if (stop == UINT_MAX)
124 goto out;
125 stop += start;
126
127 for (i = SEARCH_DATA; i <= SEARCH_INDEX; i++) {
128 memset(&ts, 0, sizeof(ts));
129 off = skb_find_text(*pskb, start, stop, search[i].ts, &ts);
130 if (off == UINT_MAX)
131 continue;
132 off += start + search[i].len;
133
134 len = min_t(unsigned int, sizeof(pbuf) - 1, stop - off);
135 if (skb_copy_bits(*pskb, off, pbuf, len))
136 break;
137 pbuf[len] = '\0';
138
139 port = simple_strtoul(pbuf, &tmp, 10);
140 len = tmp - pbuf;
141 if (port == 0 || len > 5)
142 break;
143
144 exp = ip_conntrack_expect_alloc(ct);
145 if (exp == NULL) {
146 ret = NF_DROP;
147 goto out;
148 }
149
150 exp->expectfn = NULL;
151 exp->flags = 0;
152
153 exp->tuple.src.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip;
154 exp->tuple.src.u.tcp.port = 0;
155 exp->tuple.dst.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip;
156 exp->tuple.dst.protonum = IPPROTO_TCP;
157 exp->tuple.dst.u.tcp.port = htons(port);
158
159 exp->mask.src.ip = htonl(0xFFFFFFFF);
160 exp->mask.src.u.tcp.port = 0;
161 exp->mask.dst.ip = htonl(0xFFFFFFFF);
162 exp->mask.dst.protonum = 0xFF;
163 exp->mask.dst.u.tcp.port = htons(0xFFFF);
164
165 /* RCU read locked by nf_hook_slow */
166 ip_nat_amanda = rcu_dereference(ip_nat_amanda_hook);
167 if (ip_nat_amanda)
168 ret = ip_nat_amanda(pskb, ctinfo, off - dataoff,
169 len, exp);
170 else if (ip_conntrack_expect_related(exp) != 0)
171 ret = NF_DROP;
172 ip_conntrack_expect_put(exp);
173 }
174
175out:
176 return ret;
177}
178
179static struct ip_conntrack_helper amanda_helper = {
180 .max_expected = 3,
181 .timeout = 180,
182 .me = THIS_MODULE,
183 .help = help,
184 .name = "amanda",
185
186 .tuple = { .src = { .u = { .udp = {.port = __constant_htons(10080) } } },
187 .dst = { .protonum = IPPROTO_UDP },
188 },
189 .mask = { .src = { .u = { 0xFFFF } },
190 .dst = { .protonum = 0xFF },
191 },
192};
193
194static void __exit ip_conntrack_amanda_fini(void)
195{
196 int i;
197
198 ip_conntrack_helper_unregister(&amanda_helper);
199 for (i = 0; i < ARRAY_SIZE(search); i++)
200 textsearch_destroy(search[i].ts);
201}
202
203static int __init ip_conntrack_amanda_init(void)
204{
205 int ret, i;
206
207 ret = -ENOMEM;
208 for (i = 0; i < ARRAY_SIZE(search); i++) {
209 search[i].ts = textsearch_prepare(ts_algo, search[i].string,
210 search[i].len,
211 GFP_KERNEL, TS_AUTOLOAD);
212 if (search[i].ts == NULL)
213 goto err;
214 }
215 ret = ip_conntrack_helper_register(&amanda_helper);
216 if (ret < 0)
217 goto err;
218 return 0;
219
220err:
221 for (; i >= 0; i--) {
222 if (search[i].ts)
223 textsearch_destroy(search[i].ts);
224 }
225 return ret;
226}
227
228module_init(ip_conntrack_amanda_init);
229module_exit(ip_conntrack_amanda_fini);
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
deleted file mode 100644
index 23b99ae2cc37..000000000000
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ /dev/null
@@ -1,1550 +0,0 @@
1/* Connection state tracking for netfilter. This is separated from,
2 but required by, the NAT layer; it can also be used by an iptables
3 extension. */
4
5/* (C) 1999-2001 Paul `Rusty' Russell
6 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 *
12 * 23 Apr 2001: Harald Welte <laforge@gnumonks.org>
13 * - new API and handling of conntrack/nat helpers
14 * - now capable of multiple expectations for one master
15 * 16 Jul 2002: Harald Welte <laforge@gnumonks.org>
16 * - add usage/reference counts to ip_conntrack_expect
17 * - export ip_conntrack[_expect]_{find_get,put} functions
18 * */
19
20#include <linux/types.h>
21#include <linux/icmp.h>
22#include <linux/ip.h>
23#include <linux/netfilter.h>
24#include <linux/netfilter_ipv4.h>
25#include <linux/module.h>
26#include <linux/skbuff.h>
27#include <linux/proc_fs.h>
28#include <linux/vmalloc.h>
29#include <net/checksum.h>
30#include <net/ip.h>
31#include <linux/stddef.h>
32#include <linux/sysctl.h>
33#include <linux/slab.h>
34#include <linux/random.h>
35#include <linux/jhash.h>
36#include <linux/err.h>
37#include <linux/percpu.h>
38#include <linux/moduleparam.h>
39#include <linux/notifier.h>
40
41/* ip_conntrack_lock protects the main hash table, protocol/helper/expected
42 registrations, conntrack timers*/
43#include <linux/netfilter_ipv4/ip_conntrack.h>
44#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
45#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
46#include <linux/netfilter_ipv4/ip_conntrack_core.h>
47
48#define IP_CONNTRACK_VERSION "2.4"
49
50#if 0
51#define DEBUGP printk
52#else
53#define DEBUGP(format, args...)
54#endif
55
56DEFINE_RWLOCK(ip_conntrack_lock);
57
58/* ip_conntrack_standalone needs this */
59atomic_t ip_conntrack_count = ATOMIC_INIT(0);
60
61void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL;
62LIST_HEAD(ip_conntrack_expect_list);
63struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO] __read_mostly;
64static LIST_HEAD(helpers);
65unsigned int ip_conntrack_htable_size __read_mostly = 0;
66int ip_conntrack_max __read_mostly;
67struct list_head *ip_conntrack_hash __read_mostly;
68static struct kmem_cache *ip_conntrack_cachep __read_mostly;
69static struct kmem_cache *ip_conntrack_expect_cachep __read_mostly;
70struct ip_conntrack ip_conntrack_untracked;
71unsigned int ip_ct_log_invalid __read_mostly;
72static LIST_HEAD(unconfirmed);
73static int ip_conntrack_vmalloc __read_mostly;
74
75static unsigned int ip_conntrack_next_id;
76static unsigned int ip_conntrack_expect_next_id;
77#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
78ATOMIC_NOTIFIER_HEAD(ip_conntrack_chain);
79ATOMIC_NOTIFIER_HEAD(ip_conntrack_expect_chain);
80
81DEFINE_PER_CPU(struct ip_conntrack_ecache, ip_conntrack_ecache);
82
83/* deliver cached events and clear cache entry - must be called with locally
84 * disabled softirqs */
85static inline void
86__ip_ct_deliver_cached_events(struct ip_conntrack_ecache *ecache)
87{
88 DEBUGP("ecache: delivering events for %p\n", ecache->ct);
89 if (is_confirmed(ecache->ct) && !is_dying(ecache->ct) && ecache->events)
90 atomic_notifier_call_chain(&ip_conntrack_chain, ecache->events,
91 ecache->ct);
92 ecache->events = 0;
93 ip_conntrack_put(ecache->ct);
94 ecache->ct = NULL;
95}
96
97/* Deliver all cached events for a particular conntrack. This is called
98 * by code prior to async packet handling or freeing the skb */
99void ip_ct_deliver_cached_events(const struct ip_conntrack *ct)
100{
101 struct ip_conntrack_ecache *ecache;
102
103 local_bh_disable();
104 ecache = &__get_cpu_var(ip_conntrack_ecache);
105 if (ecache->ct == ct)
106 __ip_ct_deliver_cached_events(ecache);
107 local_bh_enable();
108}
109
110void __ip_ct_event_cache_init(struct ip_conntrack *ct)
111{
112 struct ip_conntrack_ecache *ecache;
113
114 /* take care of delivering potentially old events */
115 ecache = &__get_cpu_var(ip_conntrack_ecache);
116 BUG_ON(ecache->ct == ct);
117 if (ecache->ct)
118 __ip_ct_deliver_cached_events(ecache);
119 /* initialize for this conntrack/packet */
120 ecache->ct = ct;
121 nf_conntrack_get(&ct->ct_general);
122}
123
124/* flush the event cache - touches other CPU's data and must not be called while
125 * packets are still passing through the code */
126static void ip_ct_event_cache_flush(void)
127{
128 struct ip_conntrack_ecache *ecache;
129 int cpu;
130
131 for_each_possible_cpu(cpu) {
132 ecache = &per_cpu(ip_conntrack_ecache, cpu);
133 if (ecache->ct)
134 ip_conntrack_put(ecache->ct);
135 }
136}
137#else
138static inline void ip_ct_event_cache_flush(void) {}
139#endif /* CONFIG_IP_NF_CONNTRACK_EVENTS */
140
141DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat);
142
143static int ip_conntrack_hash_rnd_initted;
144static unsigned int ip_conntrack_hash_rnd;
145
146static u_int32_t __hash_conntrack(const struct ip_conntrack_tuple *tuple,
147 unsigned int size, unsigned int rnd)
148{
149 return (jhash_3words((__force u32)tuple->src.ip,
150 ((__force u32)tuple->dst.ip ^ tuple->dst.protonum),
151 (tuple->src.u.all | (tuple->dst.u.all << 16)),
152 rnd) % size);
153}
154
155static u_int32_t
156hash_conntrack(const struct ip_conntrack_tuple *tuple)
157{
158 return __hash_conntrack(tuple, ip_conntrack_htable_size,
159 ip_conntrack_hash_rnd);
160}
161
162int
163ip_ct_get_tuple(const struct iphdr *iph,
164 const struct sk_buff *skb,
165 unsigned int dataoff,
166 struct ip_conntrack_tuple *tuple,
167 const struct ip_conntrack_protocol *protocol)
168{
169 /* Never happen */
170 if (iph->frag_off & htons(IP_OFFSET)) {
171 printk("ip_conntrack_core: Frag of proto %u.\n",
172 iph->protocol);
173 return 0;
174 }
175
176 tuple->src.ip = iph->saddr;
177 tuple->dst.ip = iph->daddr;
178 tuple->dst.protonum = iph->protocol;
179 tuple->dst.dir = IP_CT_DIR_ORIGINAL;
180
181 return protocol->pkt_to_tuple(skb, dataoff, tuple);
182}
183
184int
185ip_ct_invert_tuple(struct ip_conntrack_tuple *inverse,
186 const struct ip_conntrack_tuple *orig,
187 const struct ip_conntrack_protocol *protocol)
188{
189 inverse->src.ip = orig->dst.ip;
190 inverse->dst.ip = orig->src.ip;
191 inverse->dst.protonum = orig->dst.protonum;
192 inverse->dst.dir = !orig->dst.dir;
193
194 return protocol->invert_tuple(inverse, orig);
195}
196
197
198/* ip_conntrack_expect helper functions */
199void ip_ct_unlink_expect(struct ip_conntrack_expect *exp)
200{
201 IP_NF_ASSERT(!timer_pending(&exp->timeout));
202 list_del(&exp->list);
203 CONNTRACK_STAT_INC(expect_delete);
204 exp->master->expecting--;
205 ip_conntrack_expect_put(exp);
206}
207
208static void expectation_timed_out(unsigned long ul_expect)
209{
210 struct ip_conntrack_expect *exp = (void *)ul_expect;
211
212 write_lock_bh(&ip_conntrack_lock);
213 ip_ct_unlink_expect(exp);
214 write_unlock_bh(&ip_conntrack_lock);
215 ip_conntrack_expect_put(exp);
216}
217
218struct ip_conntrack_expect *
219__ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple)
220{
221 struct ip_conntrack_expect *i;
222
223 list_for_each_entry(i, &ip_conntrack_expect_list, list) {
224 if (ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask))
225 return i;
226 }
227 return NULL;
228}
229
230/* Just find a expectation corresponding to a tuple. */
231struct ip_conntrack_expect *
232ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple)
233{
234 struct ip_conntrack_expect *i;
235
236 read_lock_bh(&ip_conntrack_lock);
237 i = __ip_conntrack_expect_find(tuple);
238 if (i)
239 atomic_inc(&i->use);
240 read_unlock_bh(&ip_conntrack_lock);
241
242 return i;
243}
244
245/* If an expectation for this connection is found, it gets delete from
246 * global list then returned. */
247static struct ip_conntrack_expect *
248find_expectation(const struct ip_conntrack_tuple *tuple)
249{
250 struct ip_conntrack_expect *i;
251
252 list_for_each_entry(i, &ip_conntrack_expect_list, list) {
253 /* If master is not in hash table yet (ie. packet hasn't left
254 this machine yet), how can other end know about expected?
255 Hence these are not the droids you are looking for (if
256 master ct never got confirmed, we'd hold a reference to it
257 and weird things would happen to future packets). */
258 if (ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)
259 && is_confirmed(i->master)) {
260 if (i->flags & IP_CT_EXPECT_PERMANENT) {
261 atomic_inc(&i->use);
262 return i;
263 } else if (del_timer(&i->timeout)) {
264 ip_ct_unlink_expect(i);
265 return i;
266 }
267 }
268 }
269 return NULL;
270}
271
272/* delete all expectations for this conntrack */
273void ip_ct_remove_expectations(struct ip_conntrack *ct)
274{
275 struct ip_conntrack_expect *i, *tmp;
276
277 /* Optimization: most connection never expect any others. */
278 if (ct->expecting == 0)
279 return;
280
281 list_for_each_entry_safe(i, tmp, &ip_conntrack_expect_list, list) {
282 if (i->master == ct && del_timer(&i->timeout)) {
283 ip_ct_unlink_expect(i);
284 ip_conntrack_expect_put(i);
285 }
286 }
287}
288
289static void
290clean_from_lists(struct ip_conntrack *ct)
291{
292 DEBUGP("clean_from_lists(%p)\n", ct);
293 list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
294 list_del(&ct->tuplehash[IP_CT_DIR_REPLY].list);
295
296 /* Destroy all pending expectations */
297 ip_ct_remove_expectations(ct);
298}
299
300static void
301destroy_conntrack(struct nf_conntrack *nfct)
302{
303 struct ip_conntrack *ct = (struct ip_conntrack *)nfct;
304 struct ip_conntrack_protocol *proto;
305 struct ip_conntrack_helper *helper;
306 typeof(ip_conntrack_destroyed) destroyed;
307
308 DEBUGP("destroy_conntrack(%p)\n", ct);
309 IP_NF_ASSERT(atomic_read(&nfct->use) == 0);
310 IP_NF_ASSERT(!timer_pending(&ct->timeout));
311
312 ip_conntrack_event(IPCT_DESTROY, ct);
313 set_bit(IPS_DYING_BIT, &ct->status);
314
315 helper = ct->helper;
316 if (helper && helper->destroy)
317 helper->destroy(ct);
318
319 /* To make sure we don't get any weird locking issues here:
320 * destroy_conntrack() MUST NOT be called with a write lock
321 * to ip_conntrack_lock!!! -HW */
322 rcu_read_lock();
323 proto = __ip_conntrack_proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
324 if (proto && proto->destroy)
325 proto->destroy(ct);
326
327 destroyed = rcu_dereference(ip_conntrack_destroyed);
328 if (destroyed)
329 destroyed(ct);
330
331 rcu_read_unlock();
332
333 write_lock_bh(&ip_conntrack_lock);
334 /* Expectations will have been removed in clean_from_lists,
335 * except TFTP can create an expectation on the first packet,
336 * before connection is in the list, so we need to clean here,
337 * too. */
338 ip_ct_remove_expectations(ct);
339
340 /* We overload first tuple to link into unconfirmed list. */
341 if (!is_confirmed(ct)) {
342 BUG_ON(list_empty(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list));
343 list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
344 }
345
346 CONNTRACK_STAT_INC(delete);
347 write_unlock_bh(&ip_conntrack_lock);
348
349 if (ct->master)
350 ip_conntrack_put(ct->master);
351
352 DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct);
353 ip_conntrack_free(ct);
354}
355
356static void death_by_timeout(unsigned long ul_conntrack)
357{
358 struct ip_conntrack *ct = (void *)ul_conntrack;
359
360 write_lock_bh(&ip_conntrack_lock);
361 /* Inside lock so preempt is disabled on module removal path.
362 * Otherwise we can get spurious warnings. */
363 CONNTRACK_STAT_INC(delete_list);
364 clean_from_lists(ct);
365 write_unlock_bh(&ip_conntrack_lock);
366 ip_conntrack_put(ct);
367}
368
369struct ip_conntrack_tuple_hash *
370__ip_conntrack_find(const struct ip_conntrack_tuple *tuple,
371 const struct ip_conntrack *ignored_conntrack)
372{
373 struct ip_conntrack_tuple_hash *h;
374 unsigned int hash = hash_conntrack(tuple);
375
376 list_for_each_entry(h, &ip_conntrack_hash[hash], list) {
377 if (tuplehash_to_ctrack(h) != ignored_conntrack &&
378 ip_ct_tuple_equal(tuple, &h->tuple)) {
379 CONNTRACK_STAT_INC(found);
380 return h;
381 }
382 CONNTRACK_STAT_INC(searched);
383 }
384
385 return NULL;
386}
387
388/* Find a connection corresponding to a tuple. */
389struct ip_conntrack_tuple_hash *
390ip_conntrack_find_get(const struct ip_conntrack_tuple *tuple,
391 const struct ip_conntrack *ignored_conntrack)
392{
393 struct ip_conntrack_tuple_hash *h;
394
395 read_lock_bh(&ip_conntrack_lock);
396 h = __ip_conntrack_find(tuple, ignored_conntrack);
397 if (h)
398 atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use);
399 read_unlock_bh(&ip_conntrack_lock);
400
401 return h;
402}
403
404static void __ip_conntrack_hash_insert(struct ip_conntrack *ct,
405 unsigned int hash,
406 unsigned int repl_hash)
407{
408 ct->id = ++ip_conntrack_next_id;
409 list_add(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list,
410 &ip_conntrack_hash[hash]);
411 list_add(&ct->tuplehash[IP_CT_DIR_REPLY].list,
412 &ip_conntrack_hash[repl_hash]);
413}
414
415void ip_conntrack_hash_insert(struct ip_conntrack *ct)
416{
417 unsigned int hash, repl_hash;
418
419 hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
420 repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
421
422 write_lock_bh(&ip_conntrack_lock);
423 __ip_conntrack_hash_insert(ct, hash, repl_hash);
424 write_unlock_bh(&ip_conntrack_lock);
425}
426
427/* Confirm a connection given skb; places it in hash table */
428int
429__ip_conntrack_confirm(struct sk_buff **pskb)
430{
431 unsigned int hash, repl_hash;
432 struct ip_conntrack_tuple_hash *h;
433 struct ip_conntrack *ct;
434 enum ip_conntrack_info ctinfo;
435
436 ct = ip_conntrack_get(*pskb, &ctinfo);
437
438 /* ipt_REJECT uses ip_conntrack_attach to attach related
439 ICMP/TCP RST packets in other direction. Actual packet
440 which created connection will be IP_CT_NEW or for an
441 expected connection, IP_CT_RELATED. */
442 if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
443 return NF_ACCEPT;
444
445 hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
446 repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
447
448 /* We're not in hash table, and we refuse to set up related
449 connections for unconfirmed conns. But packet copies and
450 REJECT will give spurious warnings here. */
451 /* IP_NF_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
452
453 /* No external references means noone else could have
454 confirmed us. */
455 IP_NF_ASSERT(!is_confirmed(ct));
456 DEBUGP("Confirming conntrack %p\n", ct);
457
458 write_lock_bh(&ip_conntrack_lock);
459
460 /* See if there's one in the list already, including reverse:
461 NAT could have grabbed it without realizing, since we're
462 not in the hash. If there is, we lost race. */
463 list_for_each_entry(h, &ip_conntrack_hash[hash], list)
464 if (ip_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
465 &h->tuple))
466 goto out;
467 list_for_each_entry(h, &ip_conntrack_hash[repl_hash], list)
468 if (ip_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
469 &h->tuple))
470 goto out;
471
472 /* Remove from unconfirmed list */
473 list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
474
475 __ip_conntrack_hash_insert(ct, hash, repl_hash);
476 /* Timer relative to confirmation time, not original
477 setting time, otherwise we'd get timer wrap in
478 weird delay cases. */
479 ct->timeout.expires += jiffies;
480 add_timer(&ct->timeout);
481 atomic_inc(&ct->ct_general.use);
482 set_bit(IPS_CONFIRMED_BIT, &ct->status);
483 CONNTRACK_STAT_INC(insert);
484 write_unlock_bh(&ip_conntrack_lock);
485 if (ct->helper)
486 ip_conntrack_event_cache(IPCT_HELPER, *pskb);
487#ifdef CONFIG_IP_NF_NAT_NEEDED
488 if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) ||
489 test_bit(IPS_DST_NAT_DONE_BIT, &ct->status))
490 ip_conntrack_event_cache(IPCT_NATINFO, *pskb);
491#endif
492 ip_conntrack_event_cache(master_ct(ct) ?
493 IPCT_RELATED : IPCT_NEW, *pskb);
494
495 return NF_ACCEPT;
496
497out:
498 CONNTRACK_STAT_INC(insert_failed);
499 write_unlock_bh(&ip_conntrack_lock);
500 return NF_DROP;
501}
502
503/* Returns true if a connection correspondings to the tuple (required
504 for NAT). */
505int
506ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple,
507 const struct ip_conntrack *ignored_conntrack)
508{
509 struct ip_conntrack_tuple_hash *h;
510
511 read_lock_bh(&ip_conntrack_lock);
512 h = __ip_conntrack_find(tuple, ignored_conntrack);
513 read_unlock_bh(&ip_conntrack_lock);
514
515 return h != NULL;
516}
517
518/* There's a small race here where we may free a just-assured
519 connection. Too bad: we're in trouble anyway. */
520static int early_drop(struct list_head *chain)
521{
522 /* Traverse backwards: gives us oldest, which is roughly LRU */
523 struct ip_conntrack_tuple_hash *h;
524 struct ip_conntrack *ct = NULL, *tmp;
525 int dropped = 0;
526
527 read_lock_bh(&ip_conntrack_lock);
528 list_for_each_entry_reverse(h, chain, list) {
529 tmp = tuplehash_to_ctrack(h);
530 if (!test_bit(IPS_ASSURED_BIT, &tmp->status)) {
531 ct = tmp;
532 atomic_inc(&ct->ct_general.use);
533 break;
534 }
535 }
536 read_unlock_bh(&ip_conntrack_lock);
537
538 if (!ct)
539 return dropped;
540
541 if (del_timer(&ct->timeout)) {
542 death_by_timeout((unsigned long)ct);
543 dropped = 1;
544 CONNTRACK_STAT_INC_ATOMIC(early_drop);
545 }
546 ip_conntrack_put(ct);
547 return dropped;
548}
549
550static struct ip_conntrack_helper *
551__ip_conntrack_helper_find( const struct ip_conntrack_tuple *tuple)
552{
553 struct ip_conntrack_helper *h;
554
555 list_for_each_entry(h, &helpers, list) {
556 if (ip_ct_tuple_mask_cmp(tuple, &h->tuple, &h->mask))
557 return h;
558 }
559 return NULL;
560}
561
562struct ip_conntrack_helper *
563ip_conntrack_helper_find_get( const struct ip_conntrack_tuple *tuple)
564{
565 struct ip_conntrack_helper *helper;
566
567 /* need ip_conntrack_lock to assure that helper exists until
568 * try_module_get() is called */
569 read_lock_bh(&ip_conntrack_lock);
570
571 helper = __ip_conntrack_helper_find(tuple);
572 if (helper) {
573 /* need to increase module usage count to assure helper will
574 * not go away while the caller is e.g. busy putting a
575 * conntrack in the hash that uses the helper */
576 if (!try_module_get(helper->me))
577 helper = NULL;
578 }
579
580 read_unlock_bh(&ip_conntrack_lock);
581
582 return helper;
583}
584
585void ip_conntrack_helper_put(struct ip_conntrack_helper *helper)
586{
587 module_put(helper->me);
588}
589
590struct ip_conntrack_protocol *
591__ip_conntrack_proto_find(u_int8_t protocol)
592{
593 return ip_ct_protos[protocol];
594}
595
596/* this is guaranteed to always return a valid protocol helper, since
597 * it falls back to generic_protocol */
598struct ip_conntrack_protocol *
599ip_conntrack_proto_find_get(u_int8_t protocol)
600{
601 struct ip_conntrack_protocol *p;
602
603 rcu_read_lock();
604 p = __ip_conntrack_proto_find(protocol);
605 if (p) {
606 if (!try_module_get(p->me))
607 p = &ip_conntrack_generic_protocol;
608 }
609 rcu_read_unlock();
610
611 return p;
612}
613
614void ip_conntrack_proto_put(struct ip_conntrack_protocol *p)
615{
616 module_put(p->me);
617}
618
619struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig,
620 struct ip_conntrack_tuple *repl)
621{
622 struct ip_conntrack *conntrack;
623
624 if (!ip_conntrack_hash_rnd_initted) {
625 get_random_bytes(&ip_conntrack_hash_rnd, 4);
626 ip_conntrack_hash_rnd_initted = 1;
627 }
628
629 /* We don't want any race condition at early drop stage */
630 atomic_inc(&ip_conntrack_count);
631
632 if (ip_conntrack_max
633 && atomic_read(&ip_conntrack_count) > ip_conntrack_max) {
634 unsigned int hash = hash_conntrack(orig);
635 /* Try dropping from this hash chain. */
636 if (!early_drop(&ip_conntrack_hash[hash])) {
637 atomic_dec(&ip_conntrack_count);
638 if (net_ratelimit())
639 printk(KERN_WARNING
640 "ip_conntrack: table full, dropping"
641 " packet.\n");
642 return ERR_PTR(-ENOMEM);
643 }
644 }
645
646 conntrack = kmem_cache_zalloc(ip_conntrack_cachep, GFP_ATOMIC);
647 if (!conntrack) {
648 DEBUGP("Can't allocate conntrack.\n");
649 atomic_dec(&ip_conntrack_count);
650 return ERR_PTR(-ENOMEM);
651 }
652
653 atomic_set(&conntrack->ct_general.use, 1);
654 conntrack->ct_general.destroy = destroy_conntrack;
655 conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
656 conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
657 /* Don't set timer yet: wait for confirmation */
658 init_timer(&conntrack->timeout);
659 conntrack->timeout.data = (unsigned long)conntrack;
660 conntrack->timeout.function = death_by_timeout;
661
662 return conntrack;
663}
664
665void
666ip_conntrack_free(struct ip_conntrack *conntrack)
667{
668 atomic_dec(&ip_conntrack_count);
669 kmem_cache_free(ip_conntrack_cachep, conntrack);
670}
671
672/* Allocate a new conntrack: we return -ENOMEM if classification
673 * failed due to stress. Otherwise it really is unclassifiable */
674static struct ip_conntrack_tuple_hash *
675init_conntrack(struct ip_conntrack_tuple *tuple,
676 struct ip_conntrack_protocol *protocol,
677 struct sk_buff *skb)
678{
679 struct ip_conntrack *conntrack;
680 struct ip_conntrack_tuple repl_tuple;
681 struct ip_conntrack_expect *exp;
682
683 if (!ip_ct_invert_tuple(&repl_tuple, tuple, protocol)) {
684 DEBUGP("Can't invert tuple.\n");
685 return NULL;
686 }
687
688 conntrack = ip_conntrack_alloc(tuple, &repl_tuple);
689 if (conntrack == NULL || IS_ERR(conntrack))
690 return (struct ip_conntrack_tuple_hash *)conntrack;
691
692 if (!protocol->new(conntrack, skb)) {
693 ip_conntrack_free(conntrack);
694 return NULL;
695 }
696
697 write_lock_bh(&ip_conntrack_lock);
698 exp = find_expectation(tuple);
699
700 if (exp) {
701 DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n",
702 conntrack, exp);
703 /* Welcome, Mr. Bond. We've been expecting you... */
704 __set_bit(IPS_EXPECTED_BIT, &conntrack->status);
705 conntrack->master = exp->master;
706#ifdef CONFIG_IP_NF_CONNTRACK_MARK
707 conntrack->mark = exp->master->mark;
708#endif
709#if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \
710 defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE)
711 /* this is ugly, but there is no other place where to put it */
712 conntrack->nat.masq_index = exp->master->nat.masq_index;
713#endif
714#ifdef CONFIG_IP_NF_CONNTRACK_SECMARK
715 conntrack->secmark = exp->master->secmark;
716#endif
717 nf_conntrack_get(&conntrack->master->ct_general);
718 CONNTRACK_STAT_INC(expect_new);
719 } else {
720 conntrack->helper = __ip_conntrack_helper_find(&repl_tuple);
721
722 CONNTRACK_STAT_INC(new);
723 }
724
725 /* Overload tuple linked list to put us in unconfirmed list. */
726 list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed);
727
728 write_unlock_bh(&ip_conntrack_lock);
729
730 if (exp) {
731 if (exp->expectfn)
732 exp->expectfn(conntrack, exp);
733 ip_conntrack_expect_put(exp);
734 }
735
736 return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL];
737}
738
739/* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
740static inline struct ip_conntrack *
741resolve_normal_ct(struct sk_buff *skb,
742 struct ip_conntrack_protocol *proto,
743 int *set_reply,
744 unsigned int hooknum,
745 enum ip_conntrack_info *ctinfo)
746{
747 struct ip_conntrack_tuple tuple;
748 struct ip_conntrack_tuple_hash *h;
749 struct ip_conntrack *ct;
750
751 IP_NF_ASSERT((skb->nh.iph->frag_off & htons(IP_OFFSET)) == 0);
752
753 if (!ip_ct_get_tuple(skb->nh.iph, skb, skb->nh.iph->ihl*4,
754 &tuple,proto))
755 return NULL;
756
757 /* look for tuple match */
758 h = ip_conntrack_find_get(&tuple, NULL);
759 if (!h) {
760 h = init_conntrack(&tuple, proto, skb);
761 if (!h)
762 return NULL;
763 if (IS_ERR(h))
764 return (void *)h;
765 }
766 ct = tuplehash_to_ctrack(h);
767
768 /* It exists; we have (non-exclusive) reference. */
769 if (DIRECTION(h) == IP_CT_DIR_REPLY) {
770 *ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY;
771 /* Please set reply bit if this packet OK */
772 *set_reply = 1;
773 } else {
774 /* Once we've had two way comms, always ESTABLISHED. */
775 if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
776 DEBUGP("ip_conntrack_in: normal packet for %p\n",
777 ct);
778 *ctinfo = IP_CT_ESTABLISHED;
779 } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) {
780 DEBUGP("ip_conntrack_in: related packet for %p\n",
781 ct);
782 *ctinfo = IP_CT_RELATED;
783 } else {
784 DEBUGP("ip_conntrack_in: new packet for %p\n",
785 ct);
786 *ctinfo = IP_CT_NEW;
787 }
788 *set_reply = 0;
789 }
790 skb->nfct = &ct->ct_general;
791 skb->nfctinfo = *ctinfo;
792 return ct;
793}
794
795/* Netfilter hook itself. */
796unsigned int ip_conntrack_in(unsigned int hooknum,
797 struct sk_buff **pskb,
798 const struct net_device *in,
799 const struct net_device *out,
800 int (*okfn)(struct sk_buff *))
801{
802 struct ip_conntrack *ct;
803 enum ip_conntrack_info ctinfo;
804 struct ip_conntrack_protocol *proto;
805 int set_reply = 0;
806 int ret;
807
808 /* Previously seen (loopback or untracked)? Ignore. */
809 if ((*pskb)->nfct) {
810 CONNTRACK_STAT_INC_ATOMIC(ignore);
811 return NF_ACCEPT;
812 }
813
814 /* Never happen */
815 if ((*pskb)->nh.iph->frag_off & htons(IP_OFFSET)) {
816 if (net_ratelimit()) {
817 printk(KERN_ERR "ip_conntrack_in: Frag of proto %u (hook=%u)\n",
818 (*pskb)->nh.iph->protocol, hooknum);
819 }
820 return NF_DROP;
821 }
822
823/* Doesn't cover locally-generated broadcast, so not worth it. */
824#if 0
825 /* Ignore broadcast: no `connection'. */
826 if ((*pskb)->pkt_type == PACKET_BROADCAST) {
827 printk("Broadcast packet!\n");
828 return NF_ACCEPT;
829 } else if (((*pskb)->nh.iph->daddr & htonl(0x000000FF))
830 == htonl(0x000000FF)) {
831 printk("Should bcast: %u.%u.%u.%u->%u.%u.%u.%u (sk=%p, ptype=%u)\n",
832 NIPQUAD((*pskb)->nh.iph->saddr),
833 NIPQUAD((*pskb)->nh.iph->daddr),
834 (*pskb)->sk, (*pskb)->pkt_type);
835 }
836#endif
837
838 /* rcu_read_lock()ed by nf_hook_slow */
839 proto = __ip_conntrack_proto_find((*pskb)->nh.iph->protocol);
840
841 /* It may be an special packet, error, unclean...
842 * inverse of the return code tells to the netfilter
843 * core what to do with the packet. */
844 if (proto->error != NULL
845 && (ret = proto->error(*pskb, &ctinfo, hooknum)) <= 0) {
846 CONNTRACK_STAT_INC_ATOMIC(error);
847 CONNTRACK_STAT_INC_ATOMIC(invalid);
848 return -ret;
849 }
850
851 if (!(ct = resolve_normal_ct(*pskb, proto,&set_reply,hooknum,&ctinfo))) {
852 /* Not valid part of a connection */
853 CONNTRACK_STAT_INC_ATOMIC(invalid);
854 return NF_ACCEPT;
855 }
856
857 if (IS_ERR(ct)) {
858 /* Too stressed to deal. */
859 CONNTRACK_STAT_INC_ATOMIC(drop);
860 return NF_DROP;
861 }
862
863 IP_NF_ASSERT((*pskb)->nfct);
864
865 ret = proto->packet(ct, *pskb, ctinfo);
866 if (ret < 0) {
867 /* Invalid: inverse of the return code tells
868 * the netfilter core what to do*/
869 nf_conntrack_put((*pskb)->nfct);
870 (*pskb)->nfct = NULL;
871 CONNTRACK_STAT_INC_ATOMIC(invalid);
872 return -ret;
873 }
874
875 if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
876 ip_conntrack_event_cache(IPCT_STATUS, *pskb);
877
878 return ret;
879}
880
881int invert_tuplepr(struct ip_conntrack_tuple *inverse,
882 const struct ip_conntrack_tuple *orig)
883{
884 struct ip_conntrack_protocol *proto;
885 int ret;
886
887 rcu_read_lock();
888 proto = __ip_conntrack_proto_find(orig->dst.protonum);
889 ret = ip_ct_invert_tuple(inverse, orig, proto);
890 rcu_read_unlock();
891
892 return ret;
893}
894
895/* Would two expected things clash? */
896static inline int expect_clash(const struct ip_conntrack_expect *a,
897 const struct ip_conntrack_expect *b)
898{
899 /* Part covered by intersection of masks must be unequal,
900 otherwise they clash */
901 struct ip_conntrack_tuple intersect_mask
902 = { { a->mask.src.ip & b->mask.src.ip,
903 { a->mask.src.u.all & b->mask.src.u.all } },
904 { a->mask.dst.ip & b->mask.dst.ip,
905 { a->mask.dst.u.all & b->mask.dst.u.all },
906 a->mask.dst.protonum & b->mask.dst.protonum } };
907
908 return ip_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
909}
910
911static inline int expect_matches(const struct ip_conntrack_expect *a,
912 const struct ip_conntrack_expect *b)
913{
914 return a->master == b->master
915 && ip_ct_tuple_equal(&a->tuple, &b->tuple)
916 && ip_ct_tuple_equal(&a->mask, &b->mask);
917}
918
919/* Generally a bad idea to call this: could have matched already. */
920void ip_conntrack_unexpect_related(struct ip_conntrack_expect *exp)
921{
922 struct ip_conntrack_expect *i;
923
924 write_lock_bh(&ip_conntrack_lock);
925 /* choose the the oldest expectation to evict */
926 list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) {
927 if (expect_matches(i, exp) && del_timer(&i->timeout)) {
928 ip_ct_unlink_expect(i);
929 write_unlock_bh(&ip_conntrack_lock);
930 ip_conntrack_expect_put(i);
931 return;
932 }
933 }
934 write_unlock_bh(&ip_conntrack_lock);
935}
936
937/* We don't increase the master conntrack refcount for non-fulfilled
938 * conntracks. During the conntrack destruction, the expectations are
939 * always killed before the conntrack itself */
940struct ip_conntrack_expect *ip_conntrack_expect_alloc(struct ip_conntrack *me)
941{
942 struct ip_conntrack_expect *new;
943
944 new = kmem_cache_alloc(ip_conntrack_expect_cachep, GFP_ATOMIC);
945 if (!new) {
946 DEBUGP("expect_related: OOM allocating expect\n");
947 return NULL;
948 }
949 new->master = me;
950 atomic_set(&new->use, 1);
951 return new;
952}
953
954void ip_conntrack_expect_put(struct ip_conntrack_expect *exp)
955{
956 if (atomic_dec_and_test(&exp->use))
957 kmem_cache_free(ip_conntrack_expect_cachep, exp);
958}
959
960static void ip_conntrack_expect_insert(struct ip_conntrack_expect *exp)
961{
962 atomic_inc(&exp->use);
963 exp->master->expecting++;
964 list_add(&exp->list, &ip_conntrack_expect_list);
965
966 init_timer(&exp->timeout);
967 exp->timeout.data = (unsigned long)exp;
968 exp->timeout.function = expectation_timed_out;
969 exp->timeout.expires = jiffies + exp->master->helper->timeout * HZ;
970 add_timer(&exp->timeout);
971
972 exp->id = ++ip_conntrack_expect_next_id;
973 atomic_inc(&exp->use);
974 CONNTRACK_STAT_INC(expect_create);
975}
976
977/* Race with expectations being used means we could have none to find; OK. */
978static void evict_oldest_expect(struct ip_conntrack *master)
979{
980 struct ip_conntrack_expect *i;
981
982 list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) {
983 if (i->master == master) {
984 if (del_timer(&i->timeout)) {
985 ip_ct_unlink_expect(i);
986 ip_conntrack_expect_put(i);
987 }
988 break;
989 }
990 }
991}
992
993static inline int refresh_timer(struct ip_conntrack_expect *i)
994{
995 if (!del_timer(&i->timeout))
996 return 0;
997
998 i->timeout.expires = jiffies + i->master->helper->timeout*HZ;
999 add_timer(&i->timeout);
1000 return 1;
1001}
1002
1003int ip_conntrack_expect_related(struct ip_conntrack_expect *expect)
1004{
1005 struct ip_conntrack_expect *i;
1006 int ret;
1007
1008 DEBUGP("ip_conntrack_expect_related %p\n", related_to);
1009 DEBUGP("tuple: "); DUMP_TUPLE(&expect->tuple);
1010 DEBUGP("mask: "); DUMP_TUPLE(&expect->mask);
1011
1012 write_lock_bh(&ip_conntrack_lock);
1013 list_for_each_entry(i, &ip_conntrack_expect_list, list) {
1014 if (expect_matches(i, expect)) {
1015 /* Refresh timer: if it's dying, ignore.. */
1016 if (refresh_timer(i)) {
1017 ret = 0;
1018 goto out;
1019 }
1020 } else if (expect_clash(i, expect)) {
1021 ret = -EBUSY;
1022 goto out;
1023 }
1024 }
1025
1026 /* Will be over limit? */
1027 if (expect->master->helper->max_expected &&
1028 expect->master->expecting >= expect->master->helper->max_expected)
1029 evict_oldest_expect(expect->master);
1030
1031 ip_conntrack_expect_insert(expect);
1032 ip_conntrack_expect_event(IPEXP_NEW, expect);
1033 ret = 0;
1034out:
1035 write_unlock_bh(&ip_conntrack_lock);
1036 return ret;
1037}
1038
1039/* Alter reply tuple (maybe alter helper). This is for NAT, and is
1040 implicitly racy: see __ip_conntrack_confirm */
1041void ip_conntrack_alter_reply(struct ip_conntrack *conntrack,
1042 const struct ip_conntrack_tuple *newreply)
1043{
1044 write_lock_bh(&ip_conntrack_lock);
1045 /* Should be unconfirmed, so not in hash table yet */
1046 IP_NF_ASSERT(!is_confirmed(conntrack));
1047
1048 DEBUGP("Altering reply tuple of %p to ", conntrack);
1049 DUMP_TUPLE(newreply);
1050
1051 conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
1052 if (!conntrack->master && conntrack->expecting == 0)
1053 conntrack->helper = __ip_conntrack_helper_find(newreply);
1054 write_unlock_bh(&ip_conntrack_lock);
1055}
1056
1057int ip_conntrack_helper_register(struct ip_conntrack_helper *me)
1058{
1059 BUG_ON(me->timeout == 0);
1060 write_lock_bh(&ip_conntrack_lock);
1061 list_add(&me->list, &helpers);
1062 write_unlock_bh(&ip_conntrack_lock);
1063
1064 return 0;
1065}
1066
1067struct ip_conntrack_helper *
1068__ip_conntrack_helper_find_byname(const char *name)
1069{
1070 struct ip_conntrack_helper *h;
1071
1072 list_for_each_entry(h, &helpers, list) {
1073 if (!strcmp(h->name, name))
1074 return h;
1075 }
1076
1077 return NULL;
1078}
1079
1080static inline void unhelp(struct ip_conntrack_tuple_hash *i,
1081 const struct ip_conntrack_helper *me)
1082{
1083 if (tuplehash_to_ctrack(i)->helper == me) {
1084 ip_conntrack_event(IPCT_HELPER, tuplehash_to_ctrack(i));
1085 tuplehash_to_ctrack(i)->helper = NULL;
1086 }
1087}
1088
1089void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
1090{
1091 unsigned int i;
1092 struct ip_conntrack_tuple_hash *h;
1093 struct ip_conntrack_expect *exp, *tmp;
1094
1095 /* Need write lock here, to delete helper. */
1096 write_lock_bh(&ip_conntrack_lock);
1097 list_del(&me->list);
1098
1099 /* Get rid of expectations */
1100 list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, list) {
1101 if (exp->master->helper == me && del_timer(&exp->timeout)) {
1102 ip_ct_unlink_expect(exp);
1103 ip_conntrack_expect_put(exp);
1104 }
1105 }
1106 /* Get rid of expecteds, set helpers to NULL. */
1107 list_for_each_entry(h, &unconfirmed, list)
1108 unhelp(h, me);
1109 for (i = 0; i < ip_conntrack_htable_size; i++) {
1110 list_for_each_entry(h, &ip_conntrack_hash[i], list)
1111 unhelp(h, me);
1112 }
1113 write_unlock_bh(&ip_conntrack_lock);
1114
1115 /* Someone could be still looking at the helper in a bh. */
1116 synchronize_net();
1117}
1118
1119/* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */
1120void __ip_ct_refresh_acct(struct ip_conntrack *ct,
1121 enum ip_conntrack_info ctinfo,
1122 const struct sk_buff *skb,
1123 unsigned long extra_jiffies,
1124 int do_acct)
1125{
1126 int event = 0;
1127
1128 IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct);
1129 IP_NF_ASSERT(skb);
1130
1131 write_lock_bh(&ip_conntrack_lock);
1132
1133 /* Only update if this is not a fixed timeout */
1134 if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) {
1135 write_unlock_bh(&ip_conntrack_lock);
1136 return;
1137 }
1138
1139 /* If not in hash table, timer will not be active yet */
1140 if (!is_confirmed(ct)) {
1141 ct->timeout.expires = extra_jiffies;
1142 event = IPCT_REFRESH;
1143 } else {
1144 /* Need del_timer for race avoidance (may already be dying). */
1145 if (del_timer(&ct->timeout)) {
1146 ct->timeout.expires = jiffies + extra_jiffies;
1147 add_timer(&ct->timeout);
1148 event = IPCT_REFRESH;
1149 }
1150 }
1151
1152#ifdef CONFIG_IP_NF_CT_ACCT
1153 if (do_acct) {
1154 ct->counters[CTINFO2DIR(ctinfo)].packets++;
1155 ct->counters[CTINFO2DIR(ctinfo)].bytes +=
1156 ntohs(skb->nh.iph->tot_len);
1157 if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000)
1158 || (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000))
1159 event |= IPCT_COUNTER_FILLING;
1160 }
1161#endif
1162
1163 write_unlock_bh(&ip_conntrack_lock);
1164
1165 /* must be unlocked when calling event cache */
1166 if (event)
1167 ip_conntrack_event_cache(event, skb);
1168}
1169
1170#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
1171 defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
1172/* Generic function for tcp/udp/sctp/dccp and alike. This needs to be
1173 * in ip_conntrack_core, since we don't want the protocols to autoload
1174 * or depend on ctnetlink */
1175int ip_ct_port_tuple_to_nfattr(struct sk_buff *skb,
1176 const struct ip_conntrack_tuple *tuple)
1177{
1178 NFA_PUT(skb, CTA_PROTO_SRC_PORT, sizeof(__be16),
1179 &tuple->src.u.tcp.port);
1180 NFA_PUT(skb, CTA_PROTO_DST_PORT, sizeof(__be16),
1181 &tuple->dst.u.tcp.port);
1182 return 0;
1183
1184nfattr_failure:
1185 return -1;
1186}
1187
1188int ip_ct_port_nfattr_to_tuple(struct nfattr *tb[],
1189 struct ip_conntrack_tuple *t)
1190{
1191 if (!tb[CTA_PROTO_SRC_PORT-1] || !tb[CTA_PROTO_DST_PORT-1])
1192 return -EINVAL;
1193
1194 t->src.u.tcp.port =
1195 *(__be16 *)NFA_DATA(tb[CTA_PROTO_SRC_PORT-1]);
1196 t->dst.u.tcp.port =
1197 *(__be16 *)NFA_DATA(tb[CTA_PROTO_DST_PORT-1]);
1198
1199 return 0;
1200}
1201#endif
1202
1203/* Returns new sk_buff, or NULL */
1204struct sk_buff *
1205ip_ct_gather_frags(struct sk_buff *skb, u_int32_t user)
1206{
1207 skb_orphan(skb);
1208
1209 local_bh_disable();
1210 skb = ip_defrag(skb, user);
1211 local_bh_enable();
1212
1213 if (skb)
1214 ip_send_check(skb->nh.iph);
1215 return skb;
1216}
1217
1218/* Used by ipt_REJECT. */
1219static void ip_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb)
1220{
1221 struct ip_conntrack *ct;
1222 enum ip_conntrack_info ctinfo;
1223
1224 /* This ICMP is in reverse direction to the packet which caused it */
1225 ct = ip_conntrack_get(skb, &ctinfo);
1226
1227 if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
1228 ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY;
1229 else
1230 ctinfo = IP_CT_RELATED;
1231
1232 /* Attach to new skbuff, and increment count */
1233 nskb->nfct = &ct->ct_general;
1234 nskb->nfctinfo = ctinfo;
1235 nf_conntrack_get(nskb->nfct);
1236}
1237
1238/* Bring out ya dead! */
1239static struct ip_conntrack *
1240get_next_corpse(int (*iter)(struct ip_conntrack *i, void *data),
1241 void *data, unsigned int *bucket)
1242{
1243 struct ip_conntrack_tuple_hash *h;
1244 struct ip_conntrack *ct;
1245
1246 write_lock_bh(&ip_conntrack_lock);
1247 for (; *bucket < ip_conntrack_htable_size; (*bucket)++) {
1248 list_for_each_entry(h, &ip_conntrack_hash[*bucket], list) {
1249 ct = tuplehash_to_ctrack(h);
1250 if (iter(ct, data))
1251 goto found;
1252 }
1253 }
1254 list_for_each_entry(h, &unconfirmed, list) {
1255 ct = tuplehash_to_ctrack(h);
1256 if (iter(ct, data))
1257 set_bit(IPS_DYING_BIT, &ct->status);
1258 }
1259 write_unlock_bh(&ip_conntrack_lock);
1260 return NULL;
1261
1262found:
1263 atomic_inc(&ct->ct_general.use);
1264 write_unlock_bh(&ip_conntrack_lock);
1265 return ct;
1266}
1267
1268void
1269ip_ct_iterate_cleanup(int (*iter)(struct ip_conntrack *i, void *), void *data)
1270{
1271 struct ip_conntrack *ct;
1272 unsigned int bucket = 0;
1273
1274 while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) {
1275 /* Time to push up daises... */
1276 if (del_timer(&ct->timeout))
1277 death_by_timeout((unsigned long)ct);
1278 /* ... else the timer will get him soon. */
1279
1280 ip_conntrack_put(ct);
1281 }
1282}
1283
1284/* Fast function for those who don't want to parse /proc (and I don't
1285 blame them). */
1286/* Reversing the socket's dst/src point of view gives us the reply
1287 mapping. */
1288static int
1289getorigdst(struct sock *sk, int optval, void __user *user, int *len)
1290{
1291 struct inet_sock *inet = inet_sk(sk);
1292 struct ip_conntrack_tuple_hash *h;
1293 struct ip_conntrack_tuple tuple;
1294
1295 IP_CT_TUPLE_U_BLANK(&tuple);
1296 tuple.src.ip = inet->rcv_saddr;
1297 tuple.src.u.tcp.port = inet->sport;
1298 tuple.dst.ip = inet->daddr;
1299 tuple.dst.u.tcp.port = inet->dport;
1300 tuple.dst.protonum = IPPROTO_TCP;
1301
1302 /* We only do TCP at the moment: is there a better way? */
1303 if (strcmp(sk->sk_prot->name, "TCP")) {
1304 DEBUGP("SO_ORIGINAL_DST: Not a TCP socket\n");
1305 return -ENOPROTOOPT;
1306 }
1307
1308 if ((unsigned int) *len < sizeof(struct sockaddr_in)) {
1309 DEBUGP("SO_ORIGINAL_DST: len %u not %u\n",
1310 *len, sizeof(struct sockaddr_in));
1311 return -EINVAL;
1312 }
1313
1314 h = ip_conntrack_find_get(&tuple, NULL);
1315 if (h) {
1316 struct sockaddr_in sin;
1317 struct ip_conntrack *ct = tuplehash_to_ctrack(h);
1318
1319 sin.sin_family = AF_INET;
1320 sin.sin_port = ct->tuplehash[IP_CT_DIR_ORIGINAL]
1321 .tuple.dst.u.tcp.port;
1322 sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL]
1323 .tuple.dst.ip;
1324 memset(sin.sin_zero, 0, sizeof(sin.sin_zero));
1325
1326 DEBUGP("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n",
1327 NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
1328 ip_conntrack_put(ct);
1329 if (copy_to_user(user, &sin, sizeof(sin)) != 0)
1330 return -EFAULT;
1331 else
1332 return 0;
1333 }
1334 DEBUGP("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n",
1335 NIPQUAD(tuple.src.ip), ntohs(tuple.src.u.tcp.port),
1336 NIPQUAD(tuple.dst.ip), ntohs(tuple.dst.u.tcp.port));
1337 return -ENOENT;
1338}
1339
1340static struct nf_sockopt_ops so_getorigdst = {
1341 .pf = PF_INET,
1342 .get_optmin = SO_ORIGINAL_DST,
1343 .get_optmax = SO_ORIGINAL_DST+1,
1344 .get = &getorigdst,
1345};
1346
1347static int kill_all(struct ip_conntrack *i, void *data)
1348{
1349 return 1;
1350}
1351
1352void ip_conntrack_flush(void)
1353{
1354 ip_ct_iterate_cleanup(kill_all, NULL);
1355}
1356
1357static void free_conntrack_hash(struct list_head *hash, int vmalloced,int size)
1358{
1359 if (vmalloced)
1360 vfree(hash);
1361 else
1362 free_pages((unsigned long)hash,
1363 get_order(sizeof(struct list_head) * size));
1364}
1365
1366/* Mishearing the voices in his head, our hero wonders how he's
1367 supposed to kill the mall. */
1368void ip_conntrack_cleanup(void)
1369{
1370 rcu_assign_pointer(ip_ct_attach, NULL);
1371
1372 /* This makes sure all current packets have passed through
1373 netfilter framework. Roll on, two-stage module
1374 delete... */
1375 synchronize_net();
1376
1377 ip_ct_event_cache_flush();
1378 i_see_dead_people:
1379 ip_conntrack_flush();
1380 if (atomic_read(&ip_conntrack_count) != 0) {
1381 schedule();
1382 goto i_see_dead_people;
1383 }
1384 /* wait until all references to ip_conntrack_untracked are dropped */
1385 while (atomic_read(&ip_conntrack_untracked.ct_general.use) > 1)
1386 schedule();
1387
1388 kmem_cache_destroy(ip_conntrack_cachep);
1389 kmem_cache_destroy(ip_conntrack_expect_cachep);
1390 free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc,
1391 ip_conntrack_htable_size);
1392 nf_unregister_sockopt(&so_getorigdst);
1393}
1394
1395static struct list_head *alloc_hashtable(int size, int *vmalloced)
1396{
1397 struct list_head *hash;
1398 unsigned int i;
1399
1400 *vmalloced = 0;
1401 hash = (void*)__get_free_pages(GFP_KERNEL,
1402 get_order(sizeof(struct list_head)
1403 * size));
1404 if (!hash) {
1405 *vmalloced = 1;
1406 printk(KERN_WARNING"ip_conntrack: falling back to vmalloc.\n");
1407 hash = vmalloc(sizeof(struct list_head) * size);
1408 }
1409
1410 if (hash)
1411 for (i = 0; i < size; i++)
1412 INIT_LIST_HEAD(&hash[i]);
1413
1414 return hash;
1415}
1416
1417static int set_hashsize(const char *val, struct kernel_param *kp)
1418{
1419 int i, bucket, hashsize, vmalloced;
1420 int old_vmalloced, old_size;
1421 int rnd;
1422 struct list_head *hash, *old_hash;
1423 struct ip_conntrack_tuple_hash *h;
1424
1425 /* On boot, we can set this without any fancy locking. */
1426 if (!ip_conntrack_htable_size)
1427 return param_set_int(val, kp);
1428
1429 hashsize = simple_strtol(val, NULL, 0);
1430 if (!hashsize)
1431 return -EINVAL;
1432
1433 hash = alloc_hashtable(hashsize, &vmalloced);
1434 if (!hash)
1435 return -ENOMEM;
1436
1437 /* We have to rehash for the new table anyway, so we also can
1438 * use a new random seed */
1439 get_random_bytes(&rnd, 4);
1440
1441 write_lock_bh(&ip_conntrack_lock);
1442 for (i = 0; i < ip_conntrack_htable_size; i++) {
1443 while (!list_empty(&ip_conntrack_hash[i])) {
1444 h = list_entry(ip_conntrack_hash[i].next,
1445 struct ip_conntrack_tuple_hash, list);
1446 list_del(&h->list);
1447 bucket = __hash_conntrack(&h->tuple, hashsize, rnd);
1448 list_add_tail(&h->list, &hash[bucket]);
1449 }
1450 }
1451 old_size = ip_conntrack_htable_size;
1452 old_vmalloced = ip_conntrack_vmalloc;
1453 old_hash = ip_conntrack_hash;
1454
1455 ip_conntrack_htable_size = hashsize;
1456 ip_conntrack_vmalloc = vmalloced;
1457 ip_conntrack_hash = hash;
1458 ip_conntrack_hash_rnd = rnd;
1459 write_unlock_bh(&ip_conntrack_lock);
1460
1461 free_conntrack_hash(old_hash, old_vmalloced, old_size);
1462 return 0;
1463}
1464
1465module_param_call(hashsize, set_hashsize, param_get_uint,
1466 &ip_conntrack_htable_size, 0600);
1467
1468int __init ip_conntrack_init(void)
1469{
1470 unsigned int i;
1471 int ret;
1472
1473 /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB
1474 * machine has 256 buckets. >= 1GB machines have 8192 buckets. */
1475 if (!ip_conntrack_htable_size) {
1476 ip_conntrack_htable_size
1477 = (((num_physpages << PAGE_SHIFT) / 16384)
1478 / sizeof(struct list_head));
1479 if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE))
1480 ip_conntrack_htable_size = 8192;
1481 if (ip_conntrack_htable_size < 16)
1482 ip_conntrack_htable_size = 16;
1483 }
1484 ip_conntrack_max = 8 * ip_conntrack_htable_size;
1485
1486 printk("ip_conntrack version %s (%u buckets, %d max)"
1487 " - %Zd bytes per conntrack\n", IP_CONNTRACK_VERSION,
1488 ip_conntrack_htable_size, ip_conntrack_max,
1489 sizeof(struct ip_conntrack));
1490
1491 ret = nf_register_sockopt(&so_getorigdst);
1492 if (ret != 0) {
1493 printk(KERN_ERR "Unable to register netfilter socket option\n");
1494 return ret;
1495 }
1496
1497 ip_conntrack_hash = alloc_hashtable(ip_conntrack_htable_size,
1498 &ip_conntrack_vmalloc);
1499 if (!ip_conntrack_hash) {
1500 printk(KERN_ERR "Unable to create ip_conntrack_hash\n");
1501 goto err_unreg_sockopt;
1502 }
1503
1504 ip_conntrack_cachep = kmem_cache_create("ip_conntrack",
1505 sizeof(struct ip_conntrack), 0,
1506 0, NULL, NULL);
1507 if (!ip_conntrack_cachep) {
1508 printk(KERN_ERR "Unable to create ip_conntrack slab cache\n");
1509 goto err_free_hash;
1510 }
1511
1512 ip_conntrack_expect_cachep = kmem_cache_create("ip_conntrack_expect",
1513 sizeof(struct ip_conntrack_expect),
1514 0, 0, NULL, NULL);
1515 if (!ip_conntrack_expect_cachep) {
1516 printk(KERN_ERR "Unable to create ip_expect slab cache\n");
1517 goto err_free_conntrack_slab;
1518 }
1519
1520 /* Don't NEED lock here, but good form anyway. */
1521 write_lock_bh(&ip_conntrack_lock);
1522 for (i = 0; i < MAX_IP_CT_PROTO; i++)
1523 rcu_assign_pointer(ip_ct_protos[i], &ip_conntrack_generic_protocol);
1524 /* Sew in builtin protocols. */
1525 rcu_assign_pointer(ip_ct_protos[IPPROTO_TCP], &ip_conntrack_protocol_tcp);
1526 rcu_assign_pointer(ip_ct_protos[IPPROTO_UDP], &ip_conntrack_protocol_udp);
1527 rcu_assign_pointer(ip_ct_protos[IPPROTO_ICMP], &ip_conntrack_protocol_icmp);
1528 write_unlock_bh(&ip_conntrack_lock);
1529
1530 /* For use by ipt_REJECT */
1531 rcu_assign_pointer(ip_ct_attach, ip_conntrack_attach);
1532
1533 /* Set up fake conntrack:
1534 - to never be deleted, not in any hashes */
1535 atomic_set(&ip_conntrack_untracked.ct_general.use, 1);
1536 /* - and look it like as a confirmed connection */
1537 set_bit(IPS_CONFIRMED_BIT, &ip_conntrack_untracked.status);
1538
1539 return ret;
1540
1541err_free_conntrack_slab:
1542 kmem_cache_destroy(ip_conntrack_cachep);
1543err_free_hash:
1544 free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc,
1545 ip_conntrack_htable_size);
1546err_unreg_sockopt:
1547 nf_unregister_sockopt(&so_getorigdst);
1548
1549 return -ENOMEM;
1550}
diff --git a/net/ipv4/netfilter/ip_conntrack_ftp.c b/net/ipv4/netfilter/ip_conntrack_ftp.c
deleted file mode 100644
index 1faa68ab9432..000000000000
--- a/net/ipv4/netfilter/ip_conntrack_ftp.c
+++ /dev/null
@@ -1,520 +0,0 @@
1/* FTP extension for IP connection tracking. */
2
3/* (C) 1999-2001 Paul `Rusty' Russell
4 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include <linux/module.h>
12#include <linux/netfilter.h>
13#include <linux/ip.h>
14#include <linux/ctype.h>
15#include <net/checksum.h>
16#include <net/tcp.h>
17
18#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
19#include <linux/netfilter_ipv4/ip_conntrack_ftp.h>
20#include <linux/moduleparam.h>
21
22MODULE_LICENSE("GPL");
23MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
24MODULE_DESCRIPTION("ftp connection tracking helper");
25
26/* This is slow, but it's simple. --RR */
27static char *ftp_buffer;
28static DEFINE_SPINLOCK(ip_ftp_lock);
29
30#define MAX_PORTS 8
31static unsigned short ports[MAX_PORTS];
32static int ports_c;
33module_param_array(ports, ushort, &ports_c, 0400);
34
35static int loose;
36module_param(loose, bool, 0600);
37
38unsigned int (*ip_nat_ftp_hook)(struct sk_buff **pskb,
39 enum ip_conntrack_info ctinfo,
40 enum ip_ct_ftp_type type,
41 unsigned int matchoff,
42 unsigned int matchlen,
43 struct ip_conntrack_expect *exp,
44 u32 *seq);
45EXPORT_SYMBOL_GPL(ip_nat_ftp_hook);
46
47#if 0
48#define DEBUGP printk
49#else
50#define DEBUGP(format, args...)
51#endif
52
53static int try_rfc959(const char *, size_t, u_int32_t [], char);
54static int try_eprt(const char *, size_t, u_int32_t [], char);
55static int try_epsv_response(const char *, size_t, u_int32_t [], char);
56
57static const struct ftp_search {
58 const char *pattern;
59 size_t plen;
60 char skip;
61 char term;
62 enum ip_ct_ftp_type ftptype;
63 int (*getnum)(const char *, size_t, u_int32_t[], char);
64} search[IP_CT_DIR_MAX][2] = {
65 [IP_CT_DIR_ORIGINAL] = {
66 {
67 .pattern = "PORT",
68 .plen = sizeof("PORT") - 1,
69 .skip = ' ',
70 .term = '\r',
71 .ftptype = IP_CT_FTP_PORT,
72 .getnum = try_rfc959,
73 },
74 {
75 .pattern = "EPRT",
76 .plen = sizeof("EPRT") - 1,
77 .skip = ' ',
78 .term = '\r',
79 .ftptype = IP_CT_FTP_EPRT,
80 .getnum = try_eprt,
81 },
82 },
83 [IP_CT_DIR_REPLY] = {
84 {
85 .pattern = "227 ",
86 .plen = sizeof("227 ") - 1,
87 .skip = '(',
88 .term = ')',
89 .ftptype = IP_CT_FTP_PASV,
90 .getnum = try_rfc959,
91 },
92 {
93 .pattern = "229 ",
94 .plen = sizeof("229 ") - 1,
95 .skip = '(',
96 .term = ')',
97 .ftptype = IP_CT_FTP_EPSV,
98 .getnum = try_epsv_response,
99 },
100 },
101};
102
103static int try_number(const char *data, size_t dlen, u_int32_t array[],
104 int array_size, char sep, char term)
105{
106 u_int32_t i, len;
107
108 memset(array, 0, sizeof(array[0])*array_size);
109
110 /* Keep data pointing at next char. */
111 for (i = 0, len = 0; len < dlen && i < array_size; len++, data++) {
112 if (*data >= '0' && *data <= '9') {
113 array[i] = array[i]*10 + *data - '0';
114 }
115 else if (*data == sep)
116 i++;
117 else {
118 /* Unexpected character; true if it's the
119 terminator and we're finished. */
120 if (*data == term && i == array_size - 1)
121 return len;
122
123 DEBUGP("Char %u (got %u nums) `%u' unexpected\n",
124 len, i, *data);
125 return 0;
126 }
127 }
128 DEBUGP("Failed to fill %u numbers separated by %c\n", array_size, sep);
129
130 return 0;
131}
132
133/* Returns 0, or length of numbers: 192,168,1,1,5,6 */
134static int try_rfc959(const char *data, size_t dlen, u_int32_t array[6],
135 char term)
136{
137 return try_number(data, dlen, array, 6, ',', term);
138}
139
140/* Grab port: number up to delimiter */
141static int get_port(const char *data, int start, size_t dlen, char delim,
142 u_int32_t array[2])
143{
144 u_int16_t port = 0;
145 int i;
146
147 for (i = start; i < dlen; i++) {
148 /* Finished? */
149 if (data[i] == delim) {
150 if (port == 0)
151 break;
152 array[0] = port >> 8;
153 array[1] = port;
154 return i + 1;
155 }
156 else if (data[i] >= '0' && data[i] <= '9')
157 port = port*10 + data[i] - '0';
158 else /* Some other crap */
159 break;
160 }
161 return 0;
162}
163
164/* Returns 0, or length of numbers: |1|132.235.1.2|6275| */
165static int try_eprt(const char *data, size_t dlen, u_int32_t array[6],
166 char term)
167{
168 char delim;
169 int length;
170
171 /* First character is delimiter, then "1" for IPv4, then
172 delimiter again. */
173 if (dlen <= 3) return 0;
174 delim = data[0];
175 if (isdigit(delim) || delim < 33 || delim > 126
176 || data[1] != '1' || data[2] != delim)
177 return 0;
178
179 DEBUGP("EPRT: Got |1|!\n");
180 /* Now we have IP address. */
181 length = try_number(data + 3, dlen - 3, array, 4, '.', delim);
182 if (length == 0)
183 return 0;
184
185 DEBUGP("EPRT: Got IP address!\n");
186 /* Start offset includes initial "|1|", and trailing delimiter */
187 return get_port(data, 3 + length + 1, dlen, delim, array+4);
188}
189
190/* Returns 0, or length of numbers: |||6446| */
191static int try_epsv_response(const char *data, size_t dlen, u_int32_t array[6],
192 char term)
193{
194 char delim;
195
196 /* Three delimiters. */
197 if (dlen <= 3) return 0;
198 delim = data[0];
199 if (isdigit(delim) || delim < 33 || delim > 126
200 || data[1] != delim || data[2] != delim)
201 return 0;
202
203 return get_port(data, 3, dlen, delim, array+4);
204}
205
206/* Return 1 for match, 0 for accept, -1 for partial. */
207static int find_pattern(const char *data, size_t dlen,
208 const char *pattern, size_t plen,
209 char skip, char term,
210 unsigned int *numoff,
211 unsigned int *numlen,
212 u_int32_t array[6],
213 int (*getnum)(const char *, size_t, u_int32_t[], char))
214{
215 size_t i;
216
217 DEBUGP("find_pattern `%s': dlen = %u\n", pattern, dlen);
218 if (dlen == 0)
219 return 0;
220
221 if (dlen <= plen) {
222 /* Short packet: try for partial? */
223 if (strnicmp(data, pattern, dlen) == 0)
224 return -1;
225 else return 0;
226 }
227
228 if (strnicmp(data, pattern, plen) != 0) {
229#if 0
230 size_t i;
231
232 DEBUGP("ftp: string mismatch\n");
233 for (i = 0; i < plen; i++) {
234 DEBUGP("ftp:char %u `%c'(%u) vs `%c'(%u)\n",
235 i, data[i], data[i],
236 pattern[i], pattern[i]);
237 }
238#endif
239 return 0;
240 }
241
242 DEBUGP("Pattern matches!\n");
243 /* Now we've found the constant string, try to skip
244 to the 'skip' character */
245 for (i = plen; data[i] != skip; i++)
246 if (i == dlen - 1) return -1;
247
248 /* Skip over the last character */
249 i++;
250
251 DEBUGP("Skipped up to `%c'!\n", skip);
252
253 *numoff = i;
254 *numlen = getnum(data + i, dlen - i, array, term);
255 if (!*numlen)
256 return -1;
257
258 DEBUGP("Match succeeded!\n");
259 return 1;
260}
261
262/* Look up to see if we're just after a \n. */
263static int find_nl_seq(u32 seq, const struct ip_ct_ftp_master *info, int dir)
264{
265 unsigned int i;
266
267 for (i = 0; i < info->seq_aft_nl_num[dir]; i++)
268 if (info->seq_aft_nl[dir][i] == seq)
269 return 1;
270 return 0;
271}
272
273/* We don't update if it's older than what we have. */
274static void update_nl_seq(u32 nl_seq, struct ip_ct_ftp_master *info, int dir,
275 struct sk_buff *skb)
276{
277 unsigned int i, oldest = NUM_SEQ_TO_REMEMBER;
278
279 /* Look for oldest: if we find exact match, we're done. */
280 for (i = 0; i < info->seq_aft_nl_num[dir]; i++) {
281 if (info->seq_aft_nl[dir][i] == nl_seq)
282 return;
283
284 if (oldest == info->seq_aft_nl_num[dir]
285 || before(info->seq_aft_nl[dir][i], oldest))
286 oldest = i;
287 }
288
289 if (info->seq_aft_nl_num[dir] < NUM_SEQ_TO_REMEMBER) {
290 info->seq_aft_nl[dir][info->seq_aft_nl_num[dir]++] = nl_seq;
291 ip_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb);
292 } else if (oldest != NUM_SEQ_TO_REMEMBER) {
293 info->seq_aft_nl[dir][oldest] = nl_seq;
294 ip_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb);
295 }
296}
297
298static int help(struct sk_buff **pskb,
299 struct ip_conntrack *ct,
300 enum ip_conntrack_info ctinfo)
301{
302 unsigned int dataoff, datalen;
303 struct tcphdr _tcph, *th;
304 char *fb_ptr;
305 int ret;
306 u32 seq, array[6] = { 0 };
307 int dir = CTINFO2DIR(ctinfo);
308 unsigned int matchlen, matchoff;
309 struct ip_ct_ftp_master *ct_ftp_info = &ct->help.ct_ftp_info;
310 struct ip_conntrack_expect *exp;
311 unsigned int i;
312 int found = 0, ends_in_nl;
313 typeof(ip_nat_ftp_hook) ip_nat_ftp;
314
315 /* Until there's been traffic both ways, don't look in packets. */
316 if (ctinfo != IP_CT_ESTABLISHED
317 && ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) {
318 DEBUGP("ftp: Conntrackinfo = %u\n", ctinfo);
319 return NF_ACCEPT;
320 }
321
322 th = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4,
323 sizeof(_tcph), &_tcph);
324 if (th == NULL)
325 return NF_ACCEPT;
326
327 dataoff = (*pskb)->nh.iph->ihl*4 + th->doff*4;
328 /* No data? */
329 if (dataoff >= (*pskb)->len) {
330 DEBUGP("ftp: pskblen = %u\n", (*pskb)->len);
331 return NF_ACCEPT;
332 }
333 datalen = (*pskb)->len - dataoff;
334
335 spin_lock_bh(&ip_ftp_lock);
336 fb_ptr = skb_header_pointer(*pskb, dataoff,
337 (*pskb)->len - dataoff, ftp_buffer);
338 BUG_ON(fb_ptr == NULL);
339
340 ends_in_nl = (fb_ptr[datalen - 1] == '\n');
341 seq = ntohl(th->seq) + datalen;
342
343 /* Look up to see if we're just after a \n. */
344 if (!find_nl_seq(ntohl(th->seq), ct_ftp_info, dir)) {
345 /* Now if this ends in \n, update ftp info. */
346 DEBUGP("ip_conntrack_ftp_help: wrong seq pos %s(%u) or %s(%u)\n",
347 ct_ftp_info->seq_aft_nl[0][dir]
348 old_seq_aft_nl_set ? "":"(UNSET) ", old_seq_aft_nl);
349 ret = NF_ACCEPT;
350 goto out_update_nl;
351 }
352
353 /* Initialize IP array to expected address (it's not mentioned
354 in EPSV responses) */
355 array[0] = (ntohl(ct->tuplehash[dir].tuple.src.ip) >> 24) & 0xFF;
356 array[1] = (ntohl(ct->tuplehash[dir].tuple.src.ip) >> 16) & 0xFF;
357 array[2] = (ntohl(ct->tuplehash[dir].tuple.src.ip) >> 8) & 0xFF;
358 array[3] = ntohl(ct->tuplehash[dir].tuple.src.ip) & 0xFF;
359
360 for (i = 0; i < ARRAY_SIZE(search[dir]); i++) {
361 found = find_pattern(fb_ptr, (*pskb)->len - dataoff,
362 search[dir][i].pattern,
363 search[dir][i].plen,
364 search[dir][i].skip,
365 search[dir][i].term,
366 &matchoff, &matchlen,
367 array,
368 search[dir][i].getnum);
369 if (found) break;
370 }
371 if (found == -1) {
372 /* We don't usually drop packets. After all, this is
373 connection tracking, not packet filtering.
374 However, it is necessary for accurate tracking in
375 this case. */
376 if (net_ratelimit())
377 printk("conntrack_ftp: partial %s %u+%u\n",
378 search[dir][i].pattern,
379 ntohl(th->seq), datalen);
380 ret = NF_DROP;
381 goto out;
382 } else if (found == 0) { /* No match */
383 ret = NF_ACCEPT;
384 goto out_update_nl;
385 }
386
387 DEBUGP("conntrack_ftp: match `%s' (%u bytes at %u)\n",
388 fb_ptr + matchoff, matchlen, ntohl(th->seq) + matchoff);
389
390 /* Allocate expectation which will be inserted */
391 exp = ip_conntrack_expect_alloc(ct);
392 if (exp == NULL) {
393 ret = NF_DROP;
394 goto out;
395 }
396
397 /* We refer to the reverse direction ("!dir") tuples here,
398 * because we're expecting something in the other direction.
399 * Doesn't matter unless NAT is happening. */
400 exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip;
401
402 if (htonl((array[0] << 24) | (array[1] << 16) | (array[2] << 8) | array[3])
403 != ct->tuplehash[dir].tuple.src.ip) {
404 /* Enrico Scholz's passive FTP to partially RNAT'd ftp
405 server: it really wants us to connect to a
406 different IP address. Simply don't record it for
407 NAT. */
408 DEBUGP("conntrack_ftp: NOT RECORDING: %u,%u,%u,%u != %u.%u.%u.%u\n",
409 array[0], array[1], array[2], array[3],
410 NIPQUAD(ct->tuplehash[dir].tuple.src.ip));
411
412 /* Thanks to Cristiano Lincoln Mattos
413 <lincoln@cesar.org.br> for reporting this potential
414 problem (DMZ machines opening holes to internal
415 networks, or the packet filter itself). */
416 if (!loose) {
417 ret = NF_ACCEPT;
418 goto out_put_expect;
419 }
420 exp->tuple.dst.ip = htonl((array[0] << 24) | (array[1] << 16)
421 | (array[2] << 8) | array[3]);
422 }
423
424 exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip;
425 exp->tuple.dst.u.tcp.port = htons(array[4] << 8 | array[5]);
426 exp->tuple.src.u.tcp.port = 0; /* Don't care. */
427 exp->tuple.dst.protonum = IPPROTO_TCP;
428 exp->mask = ((struct ip_conntrack_tuple)
429 { { htonl(0xFFFFFFFF), { 0 } },
430 { htonl(0xFFFFFFFF), { .tcp = { htons(0xFFFF) } }, 0xFF }});
431
432 exp->expectfn = NULL;
433 exp->flags = 0;
434
435 /* Now, NAT might want to mangle the packet, and register the
436 * (possibly changed) expectation itself. */
437 ip_nat_ftp = rcu_dereference(ip_nat_ftp_hook);
438 if (ip_nat_ftp)
439 ret = ip_nat_ftp(pskb, ctinfo, search[dir][i].ftptype,
440 matchoff, matchlen, exp, &seq);
441 else {
442 /* Can't expect this? Best to drop packet now. */
443 if (ip_conntrack_expect_related(exp) != 0)
444 ret = NF_DROP;
445 else
446 ret = NF_ACCEPT;
447 }
448
449out_put_expect:
450 ip_conntrack_expect_put(exp);
451
452out_update_nl:
453 /* Now if this ends in \n, update ftp info. Seq may have been
454 * adjusted by NAT code. */
455 if (ends_in_nl)
456 update_nl_seq(seq, ct_ftp_info,dir, *pskb);
457 out:
458 spin_unlock_bh(&ip_ftp_lock);
459 return ret;
460}
461
462static struct ip_conntrack_helper ftp[MAX_PORTS];
463static char ftp_names[MAX_PORTS][sizeof("ftp-65535")];
464
465/* Not __exit: called from init() */
466static void ip_conntrack_ftp_fini(void)
467{
468 int i;
469 for (i = 0; i < ports_c; i++) {
470 DEBUGP("ip_ct_ftp: unregistering helper for port %d\n",
471 ports[i]);
472 ip_conntrack_helper_unregister(&ftp[i]);
473 }
474
475 kfree(ftp_buffer);
476}
477
478static int __init ip_conntrack_ftp_init(void)
479{
480 int i, ret;
481 char *tmpname;
482
483 ftp_buffer = kmalloc(65536, GFP_KERNEL);
484 if (!ftp_buffer)
485 return -ENOMEM;
486
487 if (ports_c == 0)
488 ports[ports_c++] = FTP_PORT;
489
490 for (i = 0; i < ports_c; i++) {
491 ftp[i].tuple.src.u.tcp.port = htons(ports[i]);
492 ftp[i].tuple.dst.protonum = IPPROTO_TCP;
493 ftp[i].mask.src.u.tcp.port = htons(0xFFFF);
494 ftp[i].mask.dst.protonum = 0xFF;
495 ftp[i].max_expected = 1;
496 ftp[i].timeout = 5 * 60; /* 5 minutes */
497 ftp[i].me = THIS_MODULE;
498 ftp[i].help = help;
499
500 tmpname = &ftp_names[i][0];
501 if (ports[i] == FTP_PORT)
502 sprintf(tmpname, "ftp");
503 else
504 sprintf(tmpname, "ftp-%d", ports[i]);
505 ftp[i].name = tmpname;
506
507 DEBUGP("ip_ct_ftp: registering helper for port %d\n",
508 ports[i]);
509 ret = ip_conntrack_helper_register(&ftp[i]);
510
511 if (ret) {
512 ip_conntrack_ftp_fini();
513 return ret;
514 }
515 }
516 return 0;
517}
518
519module_init(ip_conntrack_ftp_init);
520module_exit(ip_conntrack_ftp_fini);
diff --git a/net/ipv4/netfilter/ip_conntrack_helper_h323.c b/net/ipv4/netfilter/ip_conntrack_helper_h323.c
deleted file mode 100644
index 53eb365ccc7e..000000000000
--- a/net/ipv4/netfilter/ip_conntrack_helper_h323.c
+++ /dev/null
@@ -1,1841 +0,0 @@
1/*
2 * H.323 connection tracking helper
3 *
4 * Copyright (c) 2006 Jing Min Zhao <zhaojingmin@users.sourceforge.net>
5 *
6 * This source code is licensed under General Public License version 2.
7 *
8 * Based on the 'brute force' H.323 connection tracking module by
9 * Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
10 *
11 * For more information, please see http://nath323.sourceforge.net/
12 */
13
14#include <linux/module.h>
15#include <linux/netfilter.h>
16#include <linux/ip.h>
17#include <net/tcp.h>
18#include <linux/netfilter_ipv4/ip_conntrack.h>
19#include <linux/netfilter_ipv4/ip_conntrack_core.h>
20#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
21#include <linux/netfilter_ipv4/ip_conntrack_tuple.h>
22#include <linux/netfilter_ipv4/ip_conntrack_h323.h>
23#include <linux/moduleparam.h>
24#include <linux/ctype.h>
25#include <linux/inet.h>
26
27#if 0
28#define DEBUGP printk
29#else
30#define DEBUGP(format, args...)
31#endif
32
33/* Parameters */
34static unsigned int default_rrq_ttl = 300;
35module_param(default_rrq_ttl, uint, 0600);
36MODULE_PARM_DESC(default_rrq_ttl, "use this TTL if it's missing in RRQ");
37
38static int gkrouted_only = 1;
39module_param(gkrouted_only, int, 0600);
40MODULE_PARM_DESC(gkrouted_only, "only accept calls from gatekeeper");
41
42static int callforward_filter = 1;
43module_param(callforward_filter, bool, 0600);
44MODULE_PARM_DESC(callforward_filter, "only create call forwarding expectations "
45 "if both endpoints are on different sides "
46 "(determined by routing information)");
47
48/* Hooks for NAT */
49int (*set_h245_addr_hook) (struct sk_buff ** pskb,
50 unsigned char **data, int dataoff,
51 H245_TransportAddress * addr,
52 __be32 ip, u_int16_t port);
53int (*set_h225_addr_hook) (struct sk_buff ** pskb,
54 unsigned char **data, int dataoff,
55 TransportAddress * addr,
56 __be32 ip, u_int16_t port);
57int (*set_sig_addr_hook) (struct sk_buff ** pskb,
58 struct ip_conntrack * ct,
59 enum ip_conntrack_info ctinfo,
60 unsigned char **data,
61 TransportAddress * addr, int count);
62int (*set_ras_addr_hook) (struct sk_buff ** pskb,
63 struct ip_conntrack * ct,
64 enum ip_conntrack_info ctinfo,
65 unsigned char **data,
66 TransportAddress * addr, int count);
67int (*nat_rtp_rtcp_hook) (struct sk_buff ** pskb,
68 struct ip_conntrack * ct,
69 enum ip_conntrack_info ctinfo,
70 unsigned char **data, int dataoff,
71 H245_TransportAddress * addr,
72 u_int16_t port, u_int16_t rtp_port,
73 struct ip_conntrack_expect * rtp_exp,
74 struct ip_conntrack_expect * rtcp_exp);
75int (*nat_t120_hook) (struct sk_buff ** pskb,
76 struct ip_conntrack * ct,
77 enum ip_conntrack_info ctinfo,
78 unsigned char **data, int dataoff,
79 H245_TransportAddress * addr, u_int16_t port,
80 struct ip_conntrack_expect * exp);
81int (*nat_h245_hook) (struct sk_buff ** pskb,
82 struct ip_conntrack * ct,
83 enum ip_conntrack_info ctinfo,
84 unsigned char **data, int dataoff,
85 TransportAddress * addr, u_int16_t port,
86 struct ip_conntrack_expect * exp);
87int (*nat_callforwarding_hook) (struct sk_buff ** pskb,
88 struct ip_conntrack * ct,
89 enum ip_conntrack_info ctinfo,
90 unsigned char **data, int dataoff,
91 TransportAddress * addr, u_int16_t port,
92 struct ip_conntrack_expect * exp);
93int (*nat_q931_hook) (struct sk_buff ** pskb,
94 struct ip_conntrack * ct,
95 enum ip_conntrack_info ctinfo,
96 unsigned char **data, TransportAddress * addr, int idx,
97 u_int16_t port, struct ip_conntrack_expect * exp);
98
99
100static DEFINE_SPINLOCK(ip_h323_lock);
101static char *h323_buffer;
102
103/****************************************************************************/
104static int get_tpkt_data(struct sk_buff **pskb, struct ip_conntrack *ct,
105 enum ip_conntrack_info ctinfo,
106 unsigned char **data, int *datalen, int *dataoff)
107{
108 struct ip_ct_h323_master *info = &ct->help.ct_h323_info;
109 int dir = CTINFO2DIR(ctinfo);
110 struct tcphdr _tcph, *th;
111 int tcpdatalen;
112 int tcpdataoff;
113 unsigned char *tpkt;
114 int tpktlen;
115 int tpktoff;
116
117 /* Get TCP header */
118 th = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl * 4,
119 sizeof(_tcph), &_tcph);
120 if (th == NULL)
121 return 0;
122
123 /* Get TCP data offset */
124 tcpdataoff = (*pskb)->nh.iph->ihl * 4 + th->doff * 4;
125
126 /* Get TCP data length */
127 tcpdatalen = (*pskb)->len - tcpdataoff;
128 if (tcpdatalen <= 0) /* No TCP data */
129 goto clear_out;
130
131 if (*data == NULL) { /* first TPKT */
132 /* Get first TPKT pointer */
133 tpkt = skb_header_pointer(*pskb, tcpdataoff, tcpdatalen,
134 h323_buffer);
135 BUG_ON(tpkt == NULL);
136
137 /* Validate TPKT identifier */
138 if (tcpdatalen < 4 || tpkt[0] != 0x03 || tpkt[1] != 0) {
139 /* Netmeeting sends TPKT header and data separately */
140 if (info->tpkt_len[dir] > 0) {
141 DEBUGP("ip_ct_h323: previous packet "
142 "indicated separate TPKT data of %hu "
143 "bytes\n", info->tpkt_len[dir]);
144 if (info->tpkt_len[dir] <= tcpdatalen) {
145 /* Yes, there was a TPKT header
146 * received */
147 *data = tpkt;
148 *datalen = info->tpkt_len[dir];
149 *dataoff = 0;
150 goto out;
151 }
152
153 /* Fragmented TPKT */
154 if (net_ratelimit())
155 printk("ip_ct_h323: "
156 "fragmented TPKT\n");
157 goto clear_out;
158 }
159
160 /* It is not even a TPKT */
161 return 0;
162 }
163 tpktoff = 0;
164 } else { /* Next TPKT */
165 tpktoff = *dataoff + *datalen;
166 tcpdatalen -= tpktoff;
167 if (tcpdatalen <= 4) /* No more TPKT */
168 goto clear_out;
169 tpkt = *data + *datalen;
170
171 /* Validate TPKT identifier */
172 if (tpkt[0] != 0x03 || tpkt[1] != 0)
173 goto clear_out;
174 }
175
176 /* Validate TPKT length */
177 tpktlen = tpkt[2] * 256 + tpkt[3];
178 if (tpktlen < 4)
179 goto clear_out;
180 if (tpktlen > tcpdatalen) {
181 if (tcpdatalen == 4) { /* Separate TPKT header */
182 /* Netmeeting sends TPKT header and data separately */
183 DEBUGP("ip_ct_h323: separate TPKT header indicates "
184 "there will be TPKT data of %hu bytes\n",
185 tpktlen - 4);
186 info->tpkt_len[dir] = tpktlen - 4;
187 return 0;
188 }
189
190 if (net_ratelimit())
191 printk("ip_ct_h323: incomplete TPKT (fragmented?)\n");
192 goto clear_out;
193 }
194
195 /* This is the encapsulated data */
196 *data = tpkt + 4;
197 *datalen = tpktlen - 4;
198 *dataoff = tpktoff + 4;
199
200 out:
201 /* Clear TPKT length */
202 info->tpkt_len[dir] = 0;
203 return 1;
204
205 clear_out:
206 info->tpkt_len[dir] = 0;
207 return 0;
208}
209
210/****************************************************************************/
211static int get_h245_addr(unsigned char *data, H245_TransportAddress * addr,
212 __be32 * ip, u_int16_t * port)
213{
214 unsigned char *p;
215
216 if (addr->choice != eH245_TransportAddress_unicastAddress ||
217 addr->unicastAddress.choice != eUnicastAddress_iPAddress)
218 return 0;
219
220 p = data + addr->unicastAddress.iPAddress.network;
221 *ip = htonl((p[0] << 24) | (p[1] << 16) | (p[2] << 8) | (p[3]));
222 *port = (p[4] << 8) | (p[5]);
223
224 return 1;
225}
226
227/****************************************************************************/
228static int expect_rtp_rtcp(struct sk_buff **pskb, struct ip_conntrack *ct,
229 enum ip_conntrack_info ctinfo,
230 unsigned char **data, int dataoff,
231 H245_TransportAddress * addr)
232{
233 int dir = CTINFO2DIR(ctinfo);
234 int ret = 0;
235 __be32 ip;
236 u_int16_t port;
237 u_int16_t rtp_port;
238 struct ip_conntrack_expect *rtp_exp;
239 struct ip_conntrack_expect *rtcp_exp;
240 typeof(nat_rtp_rtcp_hook) nat_rtp_rtcp;
241
242 /* Read RTP or RTCP address */
243 if (!get_h245_addr(*data, addr, &ip, &port) ||
244 ip != ct->tuplehash[dir].tuple.src.ip || port == 0)
245 return 0;
246
247 /* RTP port is even */
248 rtp_port = port & (~1);
249
250 /* Create expect for RTP */
251 if ((rtp_exp = ip_conntrack_expect_alloc(ct)) == NULL)
252 return -1;
253 rtp_exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip;
254 rtp_exp->tuple.src.u.udp.port = 0;
255 rtp_exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip;
256 rtp_exp->tuple.dst.u.udp.port = htons(rtp_port);
257 rtp_exp->tuple.dst.protonum = IPPROTO_UDP;
258 rtp_exp->mask.src.ip = htonl(0xFFFFFFFF);
259 rtp_exp->mask.src.u.udp.port = 0;
260 rtp_exp->mask.dst.ip = htonl(0xFFFFFFFF);
261 rtp_exp->mask.dst.u.udp.port = htons(0xFFFF);
262 rtp_exp->mask.dst.protonum = 0xFF;
263 rtp_exp->flags = 0;
264
265 /* Create expect for RTCP */
266 if ((rtcp_exp = ip_conntrack_expect_alloc(ct)) == NULL) {
267 ip_conntrack_expect_put(rtp_exp);
268 return -1;
269 }
270 rtcp_exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip;
271 rtcp_exp->tuple.src.u.udp.port = 0;
272 rtcp_exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip;
273 rtcp_exp->tuple.dst.u.udp.port = htons(rtp_port + 1);
274 rtcp_exp->tuple.dst.protonum = IPPROTO_UDP;
275 rtcp_exp->mask.src.ip = htonl(0xFFFFFFFF);
276 rtcp_exp->mask.src.u.udp.port = 0;
277 rtcp_exp->mask.dst.ip = htonl(0xFFFFFFFF);
278 rtcp_exp->mask.dst.u.udp.port = htons(0xFFFF);
279 rtcp_exp->mask.dst.protonum = 0xFF;
280 rtcp_exp->flags = 0;
281
282 if (ct->tuplehash[dir].tuple.src.ip !=
283 ct->tuplehash[!dir].tuple.dst.ip &&
284 (nat_rtp_rtcp = rcu_dereference(nat_rtp_rtcp_hook))) {
285 /* NAT needed */
286 ret = nat_rtp_rtcp(pskb, ct, ctinfo, data, dataoff,
287 addr, port, rtp_port, rtp_exp, rtcp_exp);
288 } else { /* Conntrack only */
289 rtp_exp->expectfn = NULL;
290 rtcp_exp->expectfn = NULL;
291
292 if (ip_conntrack_expect_related(rtp_exp) == 0) {
293 if (ip_conntrack_expect_related(rtcp_exp) == 0) {
294 DEBUGP("ip_ct_h323: expect RTP "
295 "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
296 NIPQUAD(rtp_exp->tuple.src.ip),
297 ntohs(rtp_exp->tuple.src.u.udp.port),
298 NIPQUAD(rtp_exp->tuple.dst.ip),
299 ntohs(rtp_exp->tuple.dst.u.udp.port));
300 DEBUGP("ip_ct_h323: expect RTCP "
301 "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
302 NIPQUAD(rtcp_exp->tuple.src.ip),
303 ntohs(rtcp_exp->tuple.src.u.udp.port),
304 NIPQUAD(rtcp_exp->tuple.dst.ip),
305 ntohs(rtcp_exp->tuple.dst.u.udp.port));
306 } else {
307 ip_conntrack_unexpect_related(rtp_exp);
308 ret = -1;
309 }
310 } else
311 ret = -1;
312 }
313
314 ip_conntrack_expect_put(rtp_exp);
315 ip_conntrack_expect_put(rtcp_exp);
316
317 return ret;
318}
319
320/****************************************************************************/
321static int expect_t120(struct sk_buff **pskb,
322 struct ip_conntrack *ct,
323 enum ip_conntrack_info ctinfo,
324 unsigned char **data, int dataoff,
325 H245_TransportAddress * addr)
326{
327 int dir = CTINFO2DIR(ctinfo);
328 int ret = 0;
329 __be32 ip;
330 u_int16_t port;
331 struct ip_conntrack_expect *exp = NULL;
332 typeof(nat_t120_hook) nat_t120;
333
334 /* Read T.120 address */
335 if (!get_h245_addr(*data, addr, &ip, &port) ||
336 ip != ct->tuplehash[dir].tuple.src.ip || port == 0)
337 return 0;
338
339 /* Create expect for T.120 connections */
340 if ((exp = ip_conntrack_expect_alloc(ct)) == NULL)
341 return -1;
342 exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip;
343 exp->tuple.src.u.tcp.port = 0;
344 exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip;
345 exp->tuple.dst.u.tcp.port = htons(port);
346 exp->tuple.dst.protonum = IPPROTO_TCP;
347 exp->mask.src.ip = htonl(0xFFFFFFFF);
348 exp->mask.src.u.tcp.port = 0;
349 exp->mask.dst.ip = htonl(0xFFFFFFFF);
350 exp->mask.dst.u.tcp.port = htons(0xFFFF);
351 exp->mask.dst.protonum = 0xFF;
352 exp->flags = IP_CT_EXPECT_PERMANENT; /* Accept multiple channels */
353
354 if (ct->tuplehash[dir].tuple.src.ip !=
355 ct->tuplehash[!dir].tuple.dst.ip &&
356 (nat_t120 = rcu_dereference(nat_t120_hook))) {
357 /* NAT needed */
358 ret = nat_t120(pskb, ct, ctinfo, data, dataoff, addr,
359 port, exp);
360 } else { /* Conntrack only */
361 exp->expectfn = NULL;
362 if (ip_conntrack_expect_related(exp) == 0) {
363 DEBUGP("ip_ct_h323: expect T.120 "
364 "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
365 NIPQUAD(exp->tuple.src.ip),
366 ntohs(exp->tuple.src.u.tcp.port),
367 NIPQUAD(exp->tuple.dst.ip),
368 ntohs(exp->tuple.dst.u.tcp.port));
369 } else
370 ret = -1;
371 }
372
373 ip_conntrack_expect_put(exp);
374
375 return ret;
376}
377
378/****************************************************************************/
379static int process_h245_channel(struct sk_buff **pskb,
380 struct ip_conntrack *ct,
381 enum ip_conntrack_info ctinfo,
382 unsigned char **data, int dataoff,
383 H2250LogicalChannelParameters * channel)
384{
385 int ret;
386
387 if (channel->options & eH2250LogicalChannelParameters_mediaChannel) {
388 /* RTP */
389 ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff,
390 &channel->mediaChannel);
391 if (ret < 0)
392 return -1;
393 }
394
395 if (channel->
396 options & eH2250LogicalChannelParameters_mediaControlChannel) {
397 /* RTCP */
398 ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff,
399 &channel->mediaControlChannel);
400 if (ret < 0)
401 return -1;
402 }
403
404 return 0;
405}
406
407/****************************************************************************/
408static int process_olc(struct sk_buff **pskb, struct ip_conntrack *ct,
409 enum ip_conntrack_info ctinfo,
410 unsigned char **data, int dataoff,
411 OpenLogicalChannel * olc)
412{
413 int ret;
414
415 DEBUGP("ip_ct_h323: OpenLogicalChannel\n");
416
417 if (olc->forwardLogicalChannelParameters.multiplexParameters.choice ==
418 eOpenLogicalChannel_forwardLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters)
419 {
420 ret = process_h245_channel(pskb, ct, ctinfo, data, dataoff,
421 &olc->
422 forwardLogicalChannelParameters.
423 multiplexParameters.
424 h2250LogicalChannelParameters);
425 if (ret < 0)
426 return -1;
427 }
428
429 if ((olc->options &
430 eOpenLogicalChannel_reverseLogicalChannelParameters) &&
431 (olc->reverseLogicalChannelParameters.options &
432 eOpenLogicalChannel_reverseLogicalChannelParameters_multiplexParameters)
433 && (olc->reverseLogicalChannelParameters.multiplexParameters.
434 choice ==
435 eOpenLogicalChannel_reverseLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters))
436 {
437 ret =
438 process_h245_channel(pskb, ct, ctinfo, data, dataoff,
439 &olc->
440 reverseLogicalChannelParameters.
441 multiplexParameters.
442 h2250LogicalChannelParameters);
443 if (ret < 0)
444 return -1;
445 }
446
447 if ((olc->options & eOpenLogicalChannel_separateStack) &&
448 olc->forwardLogicalChannelParameters.dataType.choice ==
449 eDataType_data &&
450 olc->forwardLogicalChannelParameters.dataType.data.application.
451 choice == eDataApplicationCapability_application_t120 &&
452 olc->forwardLogicalChannelParameters.dataType.data.application.
453 t120.choice == eDataProtocolCapability_separateLANStack &&
454 olc->separateStack.networkAddress.choice ==
455 eNetworkAccessParameters_networkAddress_localAreaAddress) {
456 ret = expect_t120(pskb, ct, ctinfo, data, dataoff,
457 &olc->separateStack.networkAddress.
458 localAreaAddress);
459 if (ret < 0)
460 return -1;
461 }
462
463 return 0;
464}
465
466/****************************************************************************/
467static int process_olca(struct sk_buff **pskb, struct ip_conntrack *ct,
468 enum ip_conntrack_info ctinfo,
469 unsigned char **data, int dataoff,
470 OpenLogicalChannelAck * olca)
471{
472 H2250LogicalChannelAckParameters *ack;
473 int ret;
474
475 DEBUGP("ip_ct_h323: OpenLogicalChannelAck\n");
476
477 if ((olca->options &
478 eOpenLogicalChannelAck_reverseLogicalChannelParameters) &&
479 (olca->reverseLogicalChannelParameters.options &
480 eOpenLogicalChannelAck_reverseLogicalChannelParameters_multiplexParameters)
481 && (olca->reverseLogicalChannelParameters.multiplexParameters.
482 choice ==
483 eOpenLogicalChannelAck_reverseLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters))
484 {
485 ret = process_h245_channel(pskb, ct, ctinfo, data, dataoff,
486 &olca->
487 reverseLogicalChannelParameters.
488 multiplexParameters.
489 h2250LogicalChannelParameters);
490 if (ret < 0)
491 return -1;
492 }
493
494 if ((olca->options &
495 eOpenLogicalChannelAck_forwardMultiplexAckParameters) &&
496 (olca->forwardMultiplexAckParameters.choice ==
497 eOpenLogicalChannelAck_forwardMultiplexAckParameters_h2250LogicalChannelAckParameters))
498 {
499 ack = &olca->forwardMultiplexAckParameters.
500 h2250LogicalChannelAckParameters;
501 if (ack->options &
502 eH2250LogicalChannelAckParameters_mediaChannel) {
503 /* RTP */
504 ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff,
505 &ack->mediaChannel);
506 if (ret < 0)
507 return -1;
508 }
509
510 if (ack->options &
511 eH2250LogicalChannelAckParameters_mediaControlChannel) {
512 /* RTCP */
513 ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff,
514 &ack->mediaControlChannel);
515 if (ret < 0)
516 return -1;
517 }
518 }
519
520 return 0;
521}
522
523/****************************************************************************/
524static int process_h245(struct sk_buff **pskb, struct ip_conntrack *ct,
525 enum ip_conntrack_info ctinfo,
526 unsigned char **data, int dataoff,
527 MultimediaSystemControlMessage * mscm)
528{
529 switch (mscm->choice) {
530 case eMultimediaSystemControlMessage_request:
531 if (mscm->request.choice ==
532 eRequestMessage_openLogicalChannel) {
533 return process_olc(pskb, ct, ctinfo, data, dataoff,
534 &mscm->request.openLogicalChannel);
535 }
536 DEBUGP("ip_ct_h323: H.245 Request %d\n",
537 mscm->request.choice);
538 break;
539 case eMultimediaSystemControlMessage_response:
540 if (mscm->response.choice ==
541 eResponseMessage_openLogicalChannelAck) {
542 return process_olca(pskb, ct, ctinfo, data, dataoff,
543 &mscm->response.
544 openLogicalChannelAck);
545 }
546 DEBUGP("ip_ct_h323: H.245 Response %d\n",
547 mscm->response.choice);
548 break;
549 default:
550 DEBUGP("ip_ct_h323: H.245 signal %d\n", mscm->choice);
551 break;
552 }
553
554 return 0;
555}
556
557/****************************************************************************/
558static int h245_help(struct sk_buff **pskb, struct ip_conntrack *ct,
559 enum ip_conntrack_info ctinfo)
560{
561 static MultimediaSystemControlMessage mscm;
562 unsigned char *data = NULL;
563 int datalen;
564 int dataoff;
565 int ret;
566
567 /* Until there's been traffic both ways, don't look in packets. */
568 if (ctinfo != IP_CT_ESTABLISHED
569 && ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) {
570 return NF_ACCEPT;
571 }
572 DEBUGP("ip_ct_h245: skblen = %u\n", (*pskb)->len);
573
574 spin_lock_bh(&ip_h323_lock);
575
576 /* Process each TPKT */
577 while (get_tpkt_data(pskb, ct, ctinfo, &data, &datalen, &dataoff)) {
578 DEBUGP("ip_ct_h245: TPKT %u.%u.%u.%u->%u.%u.%u.%u, len=%d\n",
579 NIPQUAD((*pskb)->nh.iph->saddr),
580 NIPQUAD((*pskb)->nh.iph->daddr), datalen);
581
582 /* Decode H.245 signal */
583 ret = DecodeMultimediaSystemControlMessage(data, datalen,
584 &mscm);
585 if (ret < 0) {
586 if (net_ratelimit())
587 printk("ip_ct_h245: decoding error: %s\n",
588 ret == H323_ERROR_BOUND ?
589 "out of bound" : "out of range");
590 /* We don't drop when decoding error */
591 break;
592 }
593
594 /* Process H.245 signal */
595 if (process_h245(pskb, ct, ctinfo, &data, dataoff, &mscm) < 0)
596 goto drop;
597 }
598
599 spin_unlock_bh(&ip_h323_lock);
600 return NF_ACCEPT;
601
602 drop:
603 spin_unlock_bh(&ip_h323_lock);
604 if (net_ratelimit())
605 printk("ip_ct_h245: packet dropped\n");
606 return NF_DROP;
607}
608
609/****************************************************************************/
610static struct ip_conntrack_helper ip_conntrack_helper_h245 = {
611 .name = "H.245",
612 .me = THIS_MODULE,
613 .max_expected = H323_RTP_CHANNEL_MAX * 4 + 2 /* T.120 */ ,
614 .timeout = 240,
615 .tuple = {.dst = {.protonum = IPPROTO_TCP}},
616 .mask = {.src = {.u = {0xFFFF}},
617 .dst = {.protonum = 0xFF}},
618 .help = h245_help
619};
620
621/****************************************************************************/
622void ip_conntrack_h245_expect(struct ip_conntrack *new,
623 struct ip_conntrack_expect *this)
624{
625 write_lock_bh(&ip_conntrack_lock);
626 new->helper = &ip_conntrack_helper_h245;
627 write_unlock_bh(&ip_conntrack_lock);
628}
629
630/****************************************************************************/
631int get_h225_addr(unsigned char *data, TransportAddress * addr,
632 __be32 * ip, u_int16_t * port)
633{
634 unsigned char *p;
635
636 if (addr->choice != eTransportAddress_ipAddress)
637 return 0;
638
639 p = data + addr->ipAddress.ip;
640 *ip = htonl((p[0] << 24) | (p[1] << 16) | (p[2] << 8) | (p[3]));
641 *port = (p[4] << 8) | (p[5]);
642
643 return 1;
644}
645
646/****************************************************************************/
647static int expect_h245(struct sk_buff **pskb, struct ip_conntrack *ct,
648 enum ip_conntrack_info ctinfo,
649 unsigned char **data, int dataoff,
650 TransportAddress * addr)
651{
652 int dir = CTINFO2DIR(ctinfo);
653 int ret = 0;
654 __be32 ip;
655 u_int16_t port;
656 struct ip_conntrack_expect *exp = NULL;
657 typeof(nat_h245_hook) nat_h245;
658
659 /* Read h245Address */
660 if (!get_h225_addr(*data, addr, &ip, &port) ||
661 ip != ct->tuplehash[dir].tuple.src.ip || port == 0)
662 return 0;
663
664 /* Create expect for h245 connection */
665 if ((exp = ip_conntrack_expect_alloc(ct)) == NULL)
666 return -1;
667 exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip;
668 exp->tuple.src.u.tcp.port = 0;
669 exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip;
670 exp->tuple.dst.u.tcp.port = htons(port);
671 exp->tuple.dst.protonum = IPPROTO_TCP;
672 exp->mask.src.ip = htonl(0xFFFFFFFF);
673 exp->mask.src.u.tcp.port = 0;
674 exp->mask.dst.ip = htonl(0xFFFFFFFF);
675 exp->mask.dst.u.tcp.port = htons(0xFFFF);
676 exp->mask.dst.protonum = 0xFF;
677 exp->flags = 0;
678
679 if (ct->tuplehash[dir].tuple.src.ip !=
680 ct->tuplehash[!dir].tuple.dst.ip &&
681 (nat_h245 = rcu_dereference(nat_h245_hook))) {
682 /* NAT needed */
683 ret = nat_h245(pskb, ct, ctinfo, data, dataoff, addr,
684 port, exp);
685 } else { /* Conntrack only */
686 exp->expectfn = ip_conntrack_h245_expect;
687
688 if (ip_conntrack_expect_related(exp) == 0) {
689 DEBUGP("ip_ct_q931: expect H.245 "
690 "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
691 NIPQUAD(exp->tuple.src.ip),
692 ntohs(exp->tuple.src.u.tcp.port),
693 NIPQUAD(exp->tuple.dst.ip),
694 ntohs(exp->tuple.dst.u.tcp.port));
695 } else
696 ret = -1;
697 }
698
699 ip_conntrack_expect_put(exp);
700
701 return ret;
702}
703
704/* Forwarding declaration */
705void ip_conntrack_q931_expect(struct ip_conntrack *new,
706 struct ip_conntrack_expect *this);
707
708/****************************************************************************/
709static int expect_callforwarding(struct sk_buff **pskb,
710 struct ip_conntrack *ct,
711 enum ip_conntrack_info ctinfo,
712 unsigned char **data, int dataoff,
713 TransportAddress * addr)
714{
715 int dir = CTINFO2DIR(ctinfo);
716 int ret = 0;
717 __be32 ip;
718 u_int16_t port;
719 struct ip_conntrack_expect *exp = NULL;
720 typeof(nat_callforwarding_hook) nat_callforwarding;
721
722 /* Read alternativeAddress */
723 if (!get_h225_addr(*data, addr, &ip, &port) || port == 0)
724 return 0;
725
726 /* If the calling party is on the same side of the forward-to party,
727 * we don't need to track the second call */
728 if (callforward_filter) {
729 struct rtable *rt1, *rt2;
730 struct flowi fl1 = {
731 .fl4_dst = ip,
732 };
733 struct flowi fl2 = {
734 .fl4_dst = ct->tuplehash[!dir].tuple.src.ip,
735 };
736
737 if (ip_route_output_key(&rt1, &fl1) == 0) {
738 if (ip_route_output_key(&rt2, &fl2) == 0) {
739 if (rt1->rt_gateway == rt2->rt_gateway &&
740 rt1->u.dst.dev == rt2->u.dst.dev)
741 ret = 1;
742 dst_release(&rt2->u.dst);
743 }
744 dst_release(&rt1->u.dst);
745 }
746 if (ret) {
747 DEBUGP("ip_ct_q931: Call Forwarding not tracked\n");
748 return 0;
749 }
750 }
751
752 /* Create expect for the second call leg */
753 if ((exp = ip_conntrack_expect_alloc(ct)) == NULL)
754 return -1;
755 exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip;
756 exp->tuple.src.u.tcp.port = 0;
757 exp->tuple.dst.ip = ip;
758 exp->tuple.dst.u.tcp.port = htons(port);
759 exp->tuple.dst.protonum = IPPROTO_TCP;
760 exp->mask.src.ip = htonl(0xFFFFFFFF);
761 exp->mask.src.u.tcp.port = 0;
762 exp->mask.dst.ip = htonl(0xFFFFFFFF);
763 exp->mask.dst.u.tcp.port = htons(0xFFFF);
764 exp->mask.dst.protonum = 0xFF;
765 exp->flags = 0;
766
767 if (ct->tuplehash[dir].tuple.src.ip !=
768 ct->tuplehash[!dir].tuple.dst.ip &&
769 (nat_callforwarding = rcu_dereference(nat_callforwarding_hook))) {
770 /* Need NAT */
771 ret = nat_callforwarding(pskb, ct, ctinfo, data, dataoff,
772 addr, port, exp);
773 } else { /* Conntrack only */
774 exp->expectfn = ip_conntrack_q931_expect;
775
776 if (ip_conntrack_expect_related(exp) == 0) {
777 DEBUGP("ip_ct_q931: expect Call Forwarding "
778 "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
779 NIPQUAD(exp->tuple.src.ip),
780 ntohs(exp->tuple.src.u.tcp.port),
781 NIPQUAD(exp->tuple.dst.ip),
782 ntohs(exp->tuple.dst.u.tcp.port));
783 } else
784 ret = -1;
785 }
786
787 ip_conntrack_expect_put(exp);
788
789 return ret;
790}
791
792/****************************************************************************/
793static int process_setup(struct sk_buff **pskb, struct ip_conntrack *ct,
794 enum ip_conntrack_info ctinfo,
795 unsigned char **data, int dataoff,
796 Setup_UUIE * setup)
797{
798 int dir = CTINFO2DIR(ctinfo);
799 int ret;
800 int i;
801 __be32 ip;
802 u_int16_t port;
803 typeof(set_h225_addr_hook) set_h225_addr;
804
805 DEBUGP("ip_ct_q931: Setup\n");
806
807 if (setup->options & eSetup_UUIE_h245Address) {
808 ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
809 &setup->h245Address);
810 if (ret < 0)
811 return -1;
812 }
813
814 set_h225_addr = rcu_dereference(set_h225_addr_hook);
815
816 if ((setup->options & eSetup_UUIE_destCallSignalAddress) &&
817 (set_h225_addr) &&
818 get_h225_addr(*data, &setup->destCallSignalAddress, &ip, &port) &&
819 ip != ct->tuplehash[!dir].tuple.src.ip) {
820 DEBUGP("ip_ct_q931: set destCallSignalAddress "
821 "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
822 NIPQUAD(ip), port,
823 NIPQUAD(ct->tuplehash[!dir].tuple.src.ip),
824 ntohs(ct->tuplehash[!dir].tuple.src.u.tcp.port));
825 ret = set_h225_addr(pskb, data, dataoff,
826 &setup->destCallSignalAddress,
827 ct->tuplehash[!dir].tuple.src.ip,
828 ntohs(ct->tuplehash[!dir].tuple.src.
829 u.tcp.port));
830 if (ret < 0)
831 return -1;
832 }
833
834 if ((setup->options & eSetup_UUIE_sourceCallSignalAddress) &&
835 (set_h225_addr) &&
836 get_h225_addr(*data, &setup->sourceCallSignalAddress, &ip, &port)
837 && ip != ct->tuplehash[!dir].tuple.dst.ip) {
838 DEBUGP("ip_ct_q931: set sourceCallSignalAddress "
839 "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
840 NIPQUAD(ip), port,
841 NIPQUAD(ct->tuplehash[!dir].tuple.dst.ip),
842 ntohs(ct->tuplehash[!dir].tuple.dst.u.tcp.port));
843 ret = set_h225_addr(pskb, data, dataoff,
844 &setup->sourceCallSignalAddress,
845 ct->tuplehash[!dir].tuple.dst.ip,
846 ntohs(ct->tuplehash[!dir].tuple.dst.
847 u.tcp.port));
848 if (ret < 0)
849 return -1;
850 }
851
852 if (setup->options & eSetup_UUIE_fastStart) {
853 for (i = 0; i < setup->fastStart.count; i++) {
854 ret = process_olc(pskb, ct, ctinfo, data, dataoff,
855 &setup->fastStart.item[i]);
856 if (ret < 0)
857 return -1;
858 }
859 }
860
861 return 0;
862}
863
864/****************************************************************************/
865static int process_callproceeding(struct sk_buff **pskb,
866 struct ip_conntrack *ct,
867 enum ip_conntrack_info ctinfo,
868 unsigned char **data, int dataoff,
869 CallProceeding_UUIE * callproc)
870{
871 int ret;
872 int i;
873
874 DEBUGP("ip_ct_q931: CallProceeding\n");
875
876 if (callproc->options & eCallProceeding_UUIE_h245Address) {
877 ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
878 &callproc->h245Address);
879 if (ret < 0)
880 return -1;
881 }
882
883 if (callproc->options & eCallProceeding_UUIE_fastStart) {
884 for (i = 0; i < callproc->fastStart.count; i++) {
885 ret = process_olc(pskb, ct, ctinfo, data, dataoff,
886 &callproc->fastStart.item[i]);
887 if (ret < 0)
888 return -1;
889 }
890 }
891
892 return 0;
893}
894
895/****************************************************************************/
896static int process_connect(struct sk_buff **pskb, struct ip_conntrack *ct,
897 enum ip_conntrack_info ctinfo,
898 unsigned char **data, int dataoff,
899 Connect_UUIE * connect)
900{
901 int ret;
902 int i;
903
904 DEBUGP("ip_ct_q931: Connect\n");
905
906 if (connect->options & eConnect_UUIE_h245Address) {
907 ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
908 &connect->h245Address);
909 if (ret < 0)
910 return -1;
911 }
912
913 if (connect->options & eConnect_UUIE_fastStart) {
914 for (i = 0; i < connect->fastStart.count; i++) {
915 ret = process_olc(pskb, ct, ctinfo, data, dataoff,
916 &connect->fastStart.item[i]);
917 if (ret < 0)
918 return -1;
919 }
920 }
921
922 return 0;
923}
924
925/****************************************************************************/
926static int process_alerting(struct sk_buff **pskb, struct ip_conntrack *ct,
927 enum ip_conntrack_info ctinfo,
928 unsigned char **data, int dataoff,
929 Alerting_UUIE * alert)
930{
931 int ret;
932 int i;
933
934 DEBUGP("ip_ct_q931: Alerting\n");
935
936 if (alert->options & eAlerting_UUIE_h245Address) {
937 ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
938 &alert->h245Address);
939 if (ret < 0)
940 return -1;
941 }
942
943 if (alert->options & eAlerting_UUIE_fastStart) {
944 for (i = 0; i < alert->fastStart.count; i++) {
945 ret = process_olc(pskb, ct, ctinfo, data, dataoff,
946 &alert->fastStart.item[i]);
947 if (ret < 0)
948 return -1;
949 }
950 }
951
952 return 0;
953}
954
955/****************************************************************************/
956static int process_information(struct sk_buff **pskb,
957 struct ip_conntrack *ct,
958 enum ip_conntrack_info ctinfo,
959 unsigned char **data, int dataoff,
960 Information_UUIE * info)
961{
962 int ret;
963 int i;
964
965 DEBUGP("ip_ct_q931: Information\n");
966
967 if (info->options & eInformation_UUIE_fastStart) {
968 for (i = 0; i < info->fastStart.count; i++) {
969 ret = process_olc(pskb, ct, ctinfo, data, dataoff,
970 &info->fastStart.item[i]);
971 if (ret < 0)
972 return -1;
973 }
974 }
975
976 return 0;
977}
978
979/****************************************************************************/
980static int process_facility(struct sk_buff **pskb, struct ip_conntrack *ct,
981 enum ip_conntrack_info ctinfo,
982 unsigned char **data, int dataoff,
983 Facility_UUIE * facility)
984{
985 int ret;
986 int i;
987
988 DEBUGP("ip_ct_q931: Facility\n");
989
990 if (facility->reason.choice == eFacilityReason_callForwarded) {
991 if (facility->options & eFacility_UUIE_alternativeAddress)
992 return expect_callforwarding(pskb, ct, ctinfo, data,
993 dataoff,
994 &facility->
995 alternativeAddress);
996 return 0;
997 }
998
999 if (facility->options & eFacility_UUIE_h245Address) {
1000 ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
1001 &facility->h245Address);
1002 if (ret < 0)
1003 return -1;
1004 }
1005
1006 if (facility->options & eFacility_UUIE_fastStart) {
1007 for (i = 0; i < facility->fastStart.count; i++) {
1008 ret = process_olc(pskb, ct, ctinfo, data, dataoff,
1009 &facility->fastStart.item[i]);
1010 if (ret < 0)
1011 return -1;
1012 }
1013 }
1014
1015 return 0;
1016}
1017
1018/****************************************************************************/
1019static int process_progress(struct sk_buff **pskb, struct ip_conntrack *ct,
1020 enum ip_conntrack_info ctinfo,
1021 unsigned char **data, int dataoff,
1022 Progress_UUIE * progress)
1023{
1024 int ret;
1025 int i;
1026
1027 DEBUGP("ip_ct_q931: Progress\n");
1028
1029 if (progress->options & eProgress_UUIE_h245Address) {
1030 ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
1031 &progress->h245Address);
1032 if (ret < 0)
1033 return -1;
1034 }
1035
1036 if (progress->options & eProgress_UUIE_fastStart) {
1037 for (i = 0; i < progress->fastStart.count; i++) {
1038 ret = process_olc(pskb, ct, ctinfo, data, dataoff,
1039 &progress->fastStart.item[i]);
1040 if (ret < 0)
1041 return -1;
1042 }
1043 }
1044
1045 return 0;
1046}
1047
1048/****************************************************************************/
1049static int process_q931(struct sk_buff **pskb, struct ip_conntrack *ct,
1050 enum ip_conntrack_info ctinfo,
1051 unsigned char **data, int dataoff, Q931 * q931)
1052{
1053 H323_UU_PDU *pdu = &q931->UUIE.h323_uu_pdu;
1054 int i;
1055 int ret = 0;
1056
1057 switch (pdu->h323_message_body.choice) {
1058 case eH323_UU_PDU_h323_message_body_setup:
1059 ret = process_setup(pskb, ct, ctinfo, data, dataoff,
1060 &pdu->h323_message_body.setup);
1061 break;
1062 case eH323_UU_PDU_h323_message_body_callProceeding:
1063 ret = process_callproceeding(pskb, ct, ctinfo, data, dataoff,
1064 &pdu->h323_message_body.
1065 callProceeding);
1066 break;
1067 case eH323_UU_PDU_h323_message_body_connect:
1068 ret = process_connect(pskb, ct, ctinfo, data, dataoff,
1069 &pdu->h323_message_body.connect);
1070 break;
1071 case eH323_UU_PDU_h323_message_body_alerting:
1072 ret = process_alerting(pskb, ct, ctinfo, data, dataoff,
1073 &pdu->h323_message_body.alerting);
1074 break;
1075 case eH323_UU_PDU_h323_message_body_information:
1076 ret = process_information(pskb, ct, ctinfo, data, dataoff,
1077 &pdu->h323_message_body.
1078 information);
1079 break;
1080 case eH323_UU_PDU_h323_message_body_facility:
1081 ret = process_facility(pskb, ct, ctinfo, data, dataoff,
1082 &pdu->h323_message_body.facility);
1083 break;
1084 case eH323_UU_PDU_h323_message_body_progress:
1085 ret = process_progress(pskb, ct, ctinfo, data, dataoff,
1086 &pdu->h323_message_body.progress);
1087 break;
1088 default:
1089 DEBUGP("ip_ct_q931: Q.931 signal %d\n",
1090 pdu->h323_message_body.choice);
1091 break;
1092 }
1093
1094 if (ret < 0)
1095 return -1;
1096
1097 if (pdu->options & eH323_UU_PDU_h245Control) {
1098 for (i = 0; i < pdu->h245Control.count; i++) {
1099 ret = process_h245(pskb, ct, ctinfo, data, dataoff,
1100 &pdu->h245Control.item[i]);
1101 if (ret < 0)
1102 return -1;
1103 }
1104 }
1105
1106 return 0;
1107}
1108
1109/****************************************************************************/
1110static int q931_help(struct sk_buff **pskb, struct ip_conntrack *ct,
1111 enum ip_conntrack_info ctinfo)
1112{
1113 static Q931 q931;
1114 unsigned char *data = NULL;
1115 int datalen;
1116 int dataoff;
1117 int ret;
1118
1119 /* Until there's been traffic both ways, don't look in packets. */
1120 if (ctinfo != IP_CT_ESTABLISHED
1121 && ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) {
1122 return NF_ACCEPT;
1123 }
1124 DEBUGP("ip_ct_q931: skblen = %u\n", (*pskb)->len);
1125
1126 spin_lock_bh(&ip_h323_lock);
1127
1128 /* Process each TPKT */
1129 while (get_tpkt_data(pskb, ct, ctinfo, &data, &datalen, &dataoff)) {
1130 DEBUGP("ip_ct_q931: TPKT %u.%u.%u.%u->%u.%u.%u.%u, len=%d\n",
1131 NIPQUAD((*pskb)->nh.iph->saddr),
1132 NIPQUAD((*pskb)->nh.iph->daddr), datalen);
1133
1134 /* Decode Q.931 signal */
1135 ret = DecodeQ931(data, datalen, &q931);
1136 if (ret < 0) {
1137 if (net_ratelimit())
1138 printk("ip_ct_q931: decoding error: %s\n",
1139 ret == H323_ERROR_BOUND ?
1140 "out of bound" : "out of range");
1141 /* We don't drop when decoding error */
1142 break;
1143 }
1144
1145 /* Process Q.931 signal */
1146 if (process_q931(pskb, ct, ctinfo, &data, dataoff, &q931) < 0)
1147 goto drop;
1148 }
1149
1150 spin_unlock_bh(&ip_h323_lock);
1151 return NF_ACCEPT;
1152
1153 drop:
1154 spin_unlock_bh(&ip_h323_lock);
1155 if (net_ratelimit())
1156 printk("ip_ct_q931: packet dropped\n");
1157 return NF_DROP;
1158}
1159
1160/****************************************************************************/
1161static struct ip_conntrack_helper ip_conntrack_helper_q931 = {
1162 .name = "Q.931",
1163 .me = THIS_MODULE,
1164 .max_expected = H323_RTP_CHANNEL_MAX * 4 + 4 /* T.120 and H.245 */ ,
1165 .timeout = 240,
1166 .tuple = {.src = {.u = {.tcp = {.port = __constant_htons(Q931_PORT)}}},
1167 .dst = {.protonum = IPPROTO_TCP}},
1168 .mask = {.src = {.u = {0xFFFF}},
1169 .dst = {.protonum = 0xFF}},
1170 .help = q931_help
1171};
1172
1173/****************************************************************************/
1174void ip_conntrack_q931_expect(struct ip_conntrack *new,
1175 struct ip_conntrack_expect *this)
1176{
1177 write_lock_bh(&ip_conntrack_lock);
1178 new->helper = &ip_conntrack_helper_q931;
1179 write_unlock_bh(&ip_conntrack_lock);
1180}
1181
1182/****************************************************************************/
1183static unsigned char *get_udp_data(struct sk_buff **pskb, int *datalen)
1184{
1185 struct udphdr _uh, *uh;
1186 int dataoff;
1187
1188 uh = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl * 4, sizeof(_uh),
1189 &_uh);
1190 if (uh == NULL)
1191 return NULL;
1192 dataoff = (*pskb)->nh.iph->ihl * 4 + sizeof(_uh);
1193 if (dataoff >= (*pskb)->len)
1194 return NULL;
1195 *datalen = (*pskb)->len - dataoff;
1196 return skb_header_pointer(*pskb, dataoff, *datalen, h323_buffer);
1197}
1198
1199/****************************************************************************/
1200static struct ip_conntrack_expect *find_expect(struct ip_conntrack *ct,
1201 __be32 ip, u_int16_t port)
1202{
1203 struct ip_conntrack_expect *exp;
1204 struct ip_conntrack_tuple tuple;
1205
1206 tuple.src.ip = 0;
1207 tuple.src.u.tcp.port = 0;
1208 tuple.dst.ip = ip;
1209 tuple.dst.u.tcp.port = htons(port);
1210 tuple.dst.protonum = IPPROTO_TCP;
1211
1212 exp = __ip_conntrack_expect_find(&tuple);
1213 if (exp && exp->master == ct)
1214 return exp;
1215 return NULL;
1216}
1217
1218/****************************************************************************/
1219static int set_expect_timeout(struct ip_conntrack_expect *exp,
1220 unsigned timeout)
1221{
1222 if (!exp || !del_timer(&exp->timeout))
1223 return 0;
1224
1225 exp->timeout.expires = jiffies + timeout * HZ;
1226 add_timer(&exp->timeout);
1227
1228 return 1;
1229}
1230
1231/****************************************************************************/
1232static int expect_q931(struct sk_buff **pskb, struct ip_conntrack *ct,
1233 enum ip_conntrack_info ctinfo,
1234 unsigned char **data,
1235 TransportAddress * addr, int count)
1236{
1237 struct ip_ct_h323_master *info = &ct->help.ct_h323_info;
1238 int dir = CTINFO2DIR(ctinfo);
1239 int ret = 0;
1240 int i;
1241 __be32 ip;
1242 u_int16_t port;
1243 struct ip_conntrack_expect *exp;
1244 typeof(nat_q931_hook) nat_q931;
1245
1246 /* Look for the first related address */
1247 for (i = 0; i < count; i++) {
1248 if (get_h225_addr(*data, &addr[i], &ip, &port) &&
1249 ip == ct->tuplehash[dir].tuple.src.ip && port != 0)
1250 break;
1251 }
1252
1253 if (i >= count) /* Not found */
1254 return 0;
1255
1256 /* Create expect for Q.931 */
1257 if ((exp = ip_conntrack_expect_alloc(ct)) == NULL)
1258 return -1;
1259 exp->tuple.src.ip = gkrouted_only ? /* only accept calls from GK? */
1260 ct->tuplehash[!dir].tuple.src.ip : 0;
1261 exp->tuple.src.u.tcp.port = 0;
1262 exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip;
1263 exp->tuple.dst.u.tcp.port = htons(port);
1264 exp->tuple.dst.protonum = IPPROTO_TCP;
1265 exp->mask.src.ip = gkrouted_only ? htonl(0xFFFFFFFF) : 0;
1266 exp->mask.src.u.tcp.port = 0;
1267 exp->mask.dst.ip = htonl(0xFFFFFFFF);
1268 exp->mask.dst.u.tcp.port = htons(0xFFFF);
1269 exp->mask.dst.protonum = 0xFF;
1270 exp->flags = IP_CT_EXPECT_PERMANENT; /* Accept multiple calls */
1271
1272 nat_q931 = rcu_dereference(nat_q931_hook);
1273 if (nat_q931) { /* Need NAT */
1274 ret = nat_q931(pskb, ct, ctinfo, data, addr, i, port, exp);
1275 } else { /* Conntrack only */
1276 exp->expectfn = ip_conntrack_q931_expect;
1277
1278 if (ip_conntrack_expect_related(exp) == 0) {
1279 DEBUGP("ip_ct_ras: expect Q.931 "
1280 "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
1281 NIPQUAD(exp->tuple.src.ip),
1282 ntohs(exp->tuple.src.u.tcp.port),
1283 NIPQUAD(exp->tuple.dst.ip),
1284 ntohs(exp->tuple.dst.u.tcp.port));
1285
1286 /* Save port for looking up expect in processing RCF */
1287 info->sig_port[dir] = port;
1288 } else
1289 ret = -1;
1290 }
1291
1292 ip_conntrack_expect_put(exp);
1293
1294 return ret;
1295}
1296
1297/****************************************************************************/
1298static int process_grq(struct sk_buff **pskb, struct ip_conntrack *ct,
1299 enum ip_conntrack_info ctinfo,
1300 unsigned char **data, GatekeeperRequest * grq)
1301{
1302 typeof(set_ras_addr_hook) set_ras_addr;
1303
1304 DEBUGP("ip_ct_ras: GRQ\n");
1305
1306 set_ras_addr = rcu_dereference(set_ras_addr_hook);
1307 if (set_ras_addr) /* NATed */
1308 return set_ras_addr(pskb, ct, ctinfo, data,
1309 &grq->rasAddress, 1);
1310 return 0;
1311}
1312
1313/* Declare before using */
1314static void ip_conntrack_ras_expect(struct ip_conntrack *new,
1315 struct ip_conntrack_expect *this);
1316
1317/****************************************************************************/
1318static int process_gcf(struct sk_buff **pskb, struct ip_conntrack *ct,
1319 enum ip_conntrack_info ctinfo,
1320 unsigned char **data, GatekeeperConfirm * gcf)
1321{
1322 int dir = CTINFO2DIR(ctinfo);
1323 int ret = 0;
1324 __be32 ip;
1325 u_int16_t port;
1326 struct ip_conntrack_expect *exp;
1327
1328 DEBUGP("ip_ct_ras: GCF\n");
1329
1330 if (!get_h225_addr(*data, &gcf->rasAddress, &ip, &port))
1331 return 0;
1332
1333 /* Registration port is the same as discovery port */
1334 if (ip == ct->tuplehash[dir].tuple.src.ip &&
1335 port == ntohs(ct->tuplehash[dir].tuple.src.u.udp.port))
1336 return 0;
1337
1338 /* Avoid RAS expectation loops. A GCF is never expected. */
1339 if (test_bit(IPS_EXPECTED_BIT, &ct->status))
1340 return 0;
1341
1342 /* Need new expect */
1343 if ((exp = ip_conntrack_expect_alloc(ct)) == NULL)
1344 return -1;
1345 exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip;
1346 exp->tuple.src.u.tcp.port = 0;
1347 exp->tuple.dst.ip = ip;
1348 exp->tuple.dst.u.tcp.port = htons(port);
1349 exp->tuple.dst.protonum = IPPROTO_UDP;
1350 exp->mask.src.ip = htonl(0xFFFFFFFF);
1351 exp->mask.src.u.tcp.port = 0;
1352 exp->mask.dst.ip = htonl(0xFFFFFFFF);
1353 exp->mask.dst.u.tcp.port = htons(0xFFFF);
1354 exp->mask.dst.protonum = 0xFF;
1355 exp->flags = 0;
1356 exp->expectfn = ip_conntrack_ras_expect;
1357 if (ip_conntrack_expect_related(exp) == 0) {
1358 DEBUGP("ip_ct_ras: expect RAS "
1359 "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
1360 NIPQUAD(exp->tuple.src.ip),
1361 ntohs(exp->tuple.src.u.tcp.port),
1362 NIPQUAD(exp->tuple.dst.ip),
1363 ntohs(exp->tuple.dst.u.tcp.port));
1364 } else
1365 ret = -1;
1366
1367 ip_conntrack_expect_put(exp);
1368
1369 return ret;
1370}
1371
1372/****************************************************************************/
1373static int process_rrq(struct sk_buff **pskb, struct ip_conntrack *ct,
1374 enum ip_conntrack_info ctinfo,
1375 unsigned char **data, RegistrationRequest * rrq)
1376{
1377 struct ip_ct_h323_master *info = &ct->help.ct_h323_info;
1378 int ret;
1379 typeof(set_ras_addr_hook) set_ras_addr;
1380
1381 DEBUGP("ip_ct_ras: RRQ\n");
1382
1383 ret = expect_q931(pskb, ct, ctinfo, data,
1384 rrq->callSignalAddress.item,
1385 rrq->callSignalAddress.count);
1386 if (ret < 0)
1387 return -1;
1388
1389 set_ras_addr = rcu_dereference(set_ras_addr_hook);
1390 if (set_ras_addr) {
1391 ret = set_ras_addr(pskb, ct, ctinfo, data,
1392 rrq->rasAddress.item,
1393 rrq->rasAddress.count);
1394 if (ret < 0)
1395 return -1;
1396 }
1397
1398 if (rrq->options & eRegistrationRequest_timeToLive) {
1399 DEBUGP("ip_ct_ras: RRQ TTL = %u seconds\n", rrq->timeToLive);
1400 info->timeout = rrq->timeToLive;
1401 } else
1402 info->timeout = default_rrq_ttl;
1403
1404 return 0;
1405}
1406
1407/****************************************************************************/
1408static int process_rcf(struct sk_buff **pskb, struct ip_conntrack *ct,
1409 enum ip_conntrack_info ctinfo,
1410 unsigned char **data, RegistrationConfirm * rcf)
1411{
1412 struct ip_ct_h323_master *info = &ct->help.ct_h323_info;
1413 int dir = CTINFO2DIR(ctinfo);
1414 int ret;
1415 struct ip_conntrack_expect *exp;
1416 typeof(set_sig_addr_hook) set_sig_addr;
1417
1418 DEBUGP("ip_ct_ras: RCF\n");
1419
1420 set_sig_addr = rcu_dereference(set_sig_addr_hook);
1421 if (set_sig_addr) {
1422 ret = set_sig_addr(pskb, ct, ctinfo, data,
1423 rcf->callSignalAddress.item,
1424 rcf->callSignalAddress.count);
1425 if (ret < 0)
1426 return -1;
1427 }
1428
1429 if (rcf->options & eRegistrationConfirm_timeToLive) {
1430 DEBUGP("ip_ct_ras: RCF TTL = %u seconds\n", rcf->timeToLive);
1431 info->timeout = rcf->timeToLive;
1432 }
1433
1434 if (info->timeout > 0) {
1435 DEBUGP
1436 ("ip_ct_ras: set RAS connection timeout to %u seconds\n",
1437 info->timeout);
1438 ip_ct_refresh(ct, *pskb, info->timeout * HZ);
1439
1440 /* Set expect timeout */
1441 read_lock_bh(&ip_conntrack_lock);
1442 exp = find_expect(ct, ct->tuplehash[dir].tuple.dst.ip,
1443 info->sig_port[!dir]);
1444 if (exp) {
1445 DEBUGP("ip_ct_ras: set Q.931 expect "
1446 "(%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu) "
1447 "timeout to %u seconds\n",
1448 NIPQUAD(exp->tuple.src.ip),
1449 ntohs(exp->tuple.src.u.tcp.port),
1450 NIPQUAD(exp->tuple.dst.ip),
1451 ntohs(exp->tuple.dst.u.tcp.port),
1452 info->timeout);
1453 set_expect_timeout(exp, info->timeout);
1454 }
1455 read_unlock_bh(&ip_conntrack_lock);
1456 }
1457
1458 return 0;
1459}
1460
1461/****************************************************************************/
1462static int process_urq(struct sk_buff **pskb, struct ip_conntrack *ct,
1463 enum ip_conntrack_info ctinfo,
1464 unsigned char **data, UnregistrationRequest * urq)
1465{
1466 struct ip_ct_h323_master *info = &ct->help.ct_h323_info;
1467 int dir = CTINFO2DIR(ctinfo);
1468 int ret;
1469 typeof(set_sig_addr_hook) set_sig_addr;
1470
1471 DEBUGP("ip_ct_ras: URQ\n");
1472
1473 set_sig_addr = rcu_dereference(set_sig_addr_hook);
1474 if (set_sig_addr) {
1475 ret = set_sig_addr(pskb, ct, ctinfo, data,
1476 urq->callSignalAddress.item,
1477 urq->callSignalAddress.count);
1478 if (ret < 0)
1479 return -1;
1480 }
1481
1482 /* Clear old expect */
1483 ip_ct_remove_expectations(ct);
1484 info->sig_port[dir] = 0;
1485 info->sig_port[!dir] = 0;
1486
1487 /* Give it 30 seconds for UCF or URJ */
1488 ip_ct_refresh(ct, *pskb, 30 * HZ);
1489
1490 return 0;
1491}
1492
1493/****************************************************************************/
1494static int process_arq(struct sk_buff **pskb, struct ip_conntrack *ct,
1495 enum ip_conntrack_info ctinfo,
1496 unsigned char **data, AdmissionRequest * arq)
1497{
1498 struct ip_ct_h323_master *info = &ct->help.ct_h323_info;
1499 int dir = CTINFO2DIR(ctinfo);
1500 __be32 ip;
1501 u_int16_t port;
1502 typeof(set_h225_addr_hook) set_h225_addr;
1503
1504 DEBUGP("ip_ct_ras: ARQ\n");
1505
1506 set_h225_addr = rcu_dereference(set_h225_addr_hook);
1507 if ((arq->options & eAdmissionRequest_destCallSignalAddress) &&
1508 get_h225_addr(*data, &arq->destCallSignalAddress, &ip, &port) &&
1509 ip == ct->tuplehash[dir].tuple.src.ip &&
1510 port == info->sig_port[dir] && set_h225_addr) {
1511 /* Answering ARQ */
1512 return set_h225_addr(pskb, data, 0,
1513 &arq->destCallSignalAddress,
1514 ct->tuplehash[!dir].tuple.dst.ip,
1515 info->sig_port[!dir]);
1516 }
1517
1518 if ((arq->options & eAdmissionRequest_srcCallSignalAddress) &&
1519 get_h225_addr(*data, &arq->srcCallSignalAddress, &ip, &port) &&
1520 ip == ct->tuplehash[dir].tuple.src.ip && set_h225_addr) {
1521 /* Calling ARQ */
1522 return set_h225_addr(pskb, data, 0,
1523 &arq->srcCallSignalAddress,
1524 ct->tuplehash[!dir].tuple.dst.ip,
1525 port);
1526 }
1527
1528 return 0;
1529}
1530
1531/****************************************************************************/
1532static int process_acf(struct sk_buff **pskb, struct ip_conntrack *ct,
1533 enum ip_conntrack_info ctinfo,
1534 unsigned char **data, AdmissionConfirm * acf)
1535{
1536 int dir = CTINFO2DIR(ctinfo);
1537 int ret = 0;
1538 __be32 ip;
1539 u_int16_t port;
1540 struct ip_conntrack_expect *exp;
1541 typeof(set_sig_addr_hook) set_sig_addr;
1542
1543 DEBUGP("ip_ct_ras: ACF\n");
1544
1545 if (!get_h225_addr(*data, &acf->destCallSignalAddress, &ip, &port))
1546 return 0;
1547
1548 if (ip == ct->tuplehash[dir].tuple.dst.ip) { /* Answering ACF */
1549 set_sig_addr = rcu_dereference(set_sig_addr_hook);
1550 if (set_sig_addr)
1551 return set_sig_addr(pskb, ct, ctinfo, data,
1552 &acf->destCallSignalAddress, 1);
1553 return 0;
1554 }
1555
1556 /* Need new expect */
1557 if ((exp = ip_conntrack_expect_alloc(ct)) == NULL)
1558 return -1;
1559 exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip;
1560 exp->tuple.src.u.tcp.port = 0;
1561 exp->tuple.dst.ip = ip;
1562 exp->tuple.dst.u.tcp.port = htons(port);
1563 exp->tuple.dst.protonum = IPPROTO_TCP;
1564 exp->mask.src.ip = htonl(0xFFFFFFFF);
1565 exp->mask.src.u.tcp.port = 0;
1566 exp->mask.dst.ip = htonl(0xFFFFFFFF);
1567 exp->mask.dst.u.tcp.port = htons(0xFFFF);
1568 exp->mask.dst.protonum = 0xFF;
1569 exp->flags = IP_CT_EXPECT_PERMANENT;
1570 exp->expectfn = ip_conntrack_q931_expect;
1571
1572 if (ip_conntrack_expect_related(exp) == 0) {
1573 DEBUGP("ip_ct_ras: expect Q.931 "
1574 "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
1575 NIPQUAD(exp->tuple.src.ip),
1576 ntohs(exp->tuple.src.u.tcp.port),
1577 NIPQUAD(exp->tuple.dst.ip),
1578 ntohs(exp->tuple.dst.u.tcp.port));
1579 } else
1580 ret = -1;
1581
1582 ip_conntrack_expect_put(exp);
1583
1584 return ret;
1585}
1586
1587/****************************************************************************/
1588static int process_lrq(struct sk_buff **pskb, struct ip_conntrack *ct,
1589 enum ip_conntrack_info ctinfo,
1590 unsigned char **data, LocationRequest * lrq)
1591{
1592 typeof(set_ras_addr_hook) set_ras_addr;
1593
1594 DEBUGP("ip_ct_ras: LRQ\n");
1595
1596 set_ras_addr = rcu_dereference(set_ras_addr_hook);
1597 if (set_ras_addr)
1598 return set_ras_addr(pskb, ct, ctinfo, data,
1599 &lrq->replyAddress, 1);
1600 return 0;
1601}
1602
1603/****************************************************************************/
1604static int process_lcf(struct sk_buff **pskb, struct ip_conntrack *ct,
1605 enum ip_conntrack_info ctinfo,
1606 unsigned char **data, LocationConfirm * lcf)
1607{
1608 int dir = CTINFO2DIR(ctinfo);
1609 int ret = 0;
1610 __be32 ip;
1611 u_int16_t port;
1612 struct ip_conntrack_expect *exp = NULL;
1613
1614 DEBUGP("ip_ct_ras: LCF\n");
1615
1616 if (!get_h225_addr(*data, &lcf->callSignalAddress, &ip, &port))
1617 return 0;
1618
1619 /* Need new expect for call signal */
1620 if ((exp = ip_conntrack_expect_alloc(ct)) == NULL)
1621 return -1;
1622 exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip;
1623 exp->tuple.src.u.tcp.port = 0;
1624 exp->tuple.dst.ip = ip;
1625 exp->tuple.dst.u.tcp.port = htons(port);
1626 exp->tuple.dst.protonum = IPPROTO_TCP;
1627 exp->mask.src.ip = htonl(0xFFFFFFFF);
1628 exp->mask.src.u.tcp.port = 0;
1629 exp->mask.dst.ip = htonl(0xFFFFFFFF);
1630 exp->mask.dst.u.tcp.port = htons(0xFFFF);
1631 exp->mask.dst.protonum = 0xFF;
1632 exp->flags = IP_CT_EXPECT_PERMANENT;
1633 exp->expectfn = ip_conntrack_q931_expect;
1634
1635 if (ip_conntrack_expect_related(exp) == 0) {
1636 DEBUGP("ip_ct_ras: expect Q.931 "
1637 "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
1638 NIPQUAD(exp->tuple.src.ip),
1639 ntohs(exp->tuple.src.u.tcp.port),
1640 NIPQUAD(exp->tuple.dst.ip),
1641 ntohs(exp->tuple.dst.u.tcp.port));
1642 } else
1643 ret = -1;
1644
1645 ip_conntrack_expect_put(exp);
1646
1647 /* Ignore rasAddress */
1648
1649 return ret;
1650}
1651
1652/****************************************************************************/
1653static int process_irr(struct sk_buff **pskb, struct ip_conntrack *ct,
1654 enum ip_conntrack_info ctinfo,
1655 unsigned char **data, InfoRequestResponse * irr)
1656{
1657 int ret;
1658 typeof(set_ras_addr_hook) set_ras_addr;
1659 typeof(set_sig_addr_hook) set_sig_addr;
1660
1661 DEBUGP("ip_ct_ras: IRR\n");
1662
1663 set_ras_addr = rcu_dereference(set_ras_addr_hook);
1664 if (set_ras_addr) {
1665 ret = set_ras_addr(pskb, ct, ctinfo, data,
1666 &irr->rasAddress, 1);
1667 if (ret < 0)
1668 return -1;
1669 }
1670
1671 set_sig_addr = rcu_dereference(set_sig_addr_hook);
1672 if (set_sig_addr) {
1673 ret = set_sig_addr(pskb, ct, ctinfo, data,
1674 irr->callSignalAddress.item,
1675 irr->callSignalAddress.count);
1676 if (ret < 0)
1677 return -1;
1678 }
1679
1680 return 0;
1681}
1682
1683/****************************************************************************/
1684static int process_ras(struct sk_buff **pskb, struct ip_conntrack *ct,
1685 enum ip_conntrack_info ctinfo,
1686 unsigned char **data, RasMessage * ras)
1687{
1688 switch (ras->choice) {
1689 case eRasMessage_gatekeeperRequest:
1690 return process_grq(pskb, ct, ctinfo, data,
1691 &ras->gatekeeperRequest);
1692 case eRasMessage_gatekeeperConfirm:
1693 return process_gcf(pskb, ct, ctinfo, data,
1694 &ras->gatekeeperConfirm);
1695 case eRasMessage_registrationRequest:
1696 return process_rrq(pskb, ct, ctinfo, data,
1697 &ras->registrationRequest);
1698 case eRasMessage_registrationConfirm:
1699 return process_rcf(pskb, ct, ctinfo, data,
1700 &ras->registrationConfirm);
1701 case eRasMessage_unregistrationRequest:
1702 return process_urq(pskb, ct, ctinfo, data,
1703 &ras->unregistrationRequest);
1704 case eRasMessage_admissionRequest:
1705 return process_arq(pskb, ct, ctinfo, data,
1706 &ras->admissionRequest);
1707 case eRasMessage_admissionConfirm:
1708 return process_acf(pskb, ct, ctinfo, data,
1709 &ras->admissionConfirm);
1710 case eRasMessage_locationRequest:
1711 return process_lrq(pskb, ct, ctinfo, data,
1712 &ras->locationRequest);
1713 case eRasMessage_locationConfirm:
1714 return process_lcf(pskb, ct, ctinfo, data,
1715 &ras->locationConfirm);
1716 case eRasMessage_infoRequestResponse:
1717 return process_irr(pskb, ct, ctinfo, data,
1718 &ras->infoRequestResponse);
1719 default:
1720 DEBUGP("ip_ct_ras: RAS message %d\n", ras->choice);
1721 break;
1722 }
1723
1724 return 0;
1725}
1726
1727/****************************************************************************/
1728static int ras_help(struct sk_buff **pskb, struct ip_conntrack *ct,
1729 enum ip_conntrack_info ctinfo)
1730{
1731 static RasMessage ras;
1732 unsigned char *data;
1733 int datalen = 0;
1734 int ret;
1735
1736 DEBUGP("ip_ct_ras: skblen = %u\n", (*pskb)->len);
1737
1738 spin_lock_bh(&ip_h323_lock);
1739
1740 /* Get UDP data */
1741 data = get_udp_data(pskb, &datalen);
1742 if (data == NULL)
1743 goto accept;
1744 DEBUGP("ip_ct_ras: RAS message %u.%u.%u.%u->%u.%u.%u.%u, len=%d\n",
1745 NIPQUAD((*pskb)->nh.iph->saddr),
1746 NIPQUAD((*pskb)->nh.iph->daddr), datalen);
1747
1748 /* Decode RAS message */
1749 ret = DecodeRasMessage(data, datalen, &ras);
1750 if (ret < 0) {
1751 if (net_ratelimit())
1752 printk("ip_ct_ras: decoding error: %s\n",
1753 ret == H323_ERROR_BOUND ?
1754 "out of bound" : "out of range");
1755 goto accept;
1756 }
1757
1758 /* Process RAS message */
1759 if (process_ras(pskb, ct, ctinfo, &data, &ras) < 0)
1760 goto drop;
1761
1762 accept:
1763 spin_unlock_bh(&ip_h323_lock);
1764 return NF_ACCEPT;
1765
1766 drop:
1767 spin_unlock_bh(&ip_h323_lock);
1768 if (net_ratelimit())
1769 printk("ip_ct_ras: packet dropped\n");
1770 return NF_DROP;
1771}
1772
1773/****************************************************************************/
1774static struct ip_conntrack_helper ip_conntrack_helper_ras = {
1775 .name = "RAS",
1776 .me = THIS_MODULE,
1777 .max_expected = 32,
1778 .timeout = 240,
1779 .tuple = {.src = {.u = {.tcp = {.port = __constant_htons(RAS_PORT)}}},
1780 .dst = {.protonum = IPPROTO_UDP}},
1781 .mask = {.src = {.u = {0xFFFE}},
1782 .dst = {.protonum = 0xFF}},
1783 .help = ras_help,
1784};
1785
1786/****************************************************************************/
1787static void ip_conntrack_ras_expect(struct ip_conntrack *new,
1788 struct ip_conntrack_expect *this)
1789{
1790 write_lock_bh(&ip_conntrack_lock);
1791 new->helper = &ip_conntrack_helper_ras;
1792 write_unlock_bh(&ip_conntrack_lock);
1793}
1794
1795/****************************************************************************/
1796/* Not __exit - called from init() */
1797static void fini(void)
1798{
1799 ip_conntrack_helper_unregister(&ip_conntrack_helper_ras);
1800 ip_conntrack_helper_unregister(&ip_conntrack_helper_q931);
1801 kfree(h323_buffer);
1802 DEBUGP("ip_ct_h323: fini\n");
1803}
1804
1805/****************************************************************************/
1806static int __init init(void)
1807{
1808 int ret;
1809
1810 h323_buffer = kmalloc(65536, GFP_KERNEL);
1811 if (!h323_buffer)
1812 return -ENOMEM;
1813 if ((ret = ip_conntrack_helper_register(&ip_conntrack_helper_q931)) ||
1814 (ret = ip_conntrack_helper_register(&ip_conntrack_helper_ras))) {
1815 fini();
1816 return ret;
1817 }
1818 DEBUGP("ip_ct_h323: init success\n");
1819 return 0;
1820}
1821
1822/****************************************************************************/
1823module_init(init);
1824module_exit(fini);
1825
1826EXPORT_SYMBOL_GPL(get_h225_addr);
1827EXPORT_SYMBOL_GPL(ip_conntrack_h245_expect);
1828EXPORT_SYMBOL_GPL(ip_conntrack_q931_expect);
1829EXPORT_SYMBOL_GPL(set_h245_addr_hook);
1830EXPORT_SYMBOL_GPL(set_h225_addr_hook);
1831EXPORT_SYMBOL_GPL(set_sig_addr_hook);
1832EXPORT_SYMBOL_GPL(set_ras_addr_hook);
1833EXPORT_SYMBOL_GPL(nat_rtp_rtcp_hook);
1834EXPORT_SYMBOL_GPL(nat_t120_hook);
1835EXPORT_SYMBOL_GPL(nat_h245_hook);
1836EXPORT_SYMBOL_GPL(nat_callforwarding_hook);
1837EXPORT_SYMBOL_GPL(nat_q931_hook);
1838
1839MODULE_AUTHOR("Jing Min Zhao <zhaojingmin@users.sourceforge.net>");
1840MODULE_DESCRIPTION("H.323 connection tracking helper");
1841MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
deleted file mode 100644
index 2b760c5cf709..000000000000
--- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
+++ /dev/null
@@ -1,684 +0,0 @@
1/*
2 * ip_conntrack_pptp.c - Version 3.0
3 *
4 * Connection tracking support for PPTP (Point to Point Tunneling Protocol).
5 * PPTP is a a protocol for creating virtual private networks.
6 * It is a specification defined by Microsoft and some vendors
7 * working with Microsoft. PPTP is built on top of a modified
8 * version of the Internet Generic Routing Encapsulation Protocol.
9 * GRE is defined in RFC 1701 and RFC 1702. Documentation of
10 * PPTP can be found in RFC 2637
11 *
12 * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
13 *
14 * Development of this code funded by Astaro AG (http://www.astaro.com/)
15 *
16 * Limitations:
17 * - We blindly assume that control connections are always
18 * established in PNS->PAC direction. This is a violation
19 * of RFFC2673
20 * - We can only support one single call within each session
21 *
22 * TODO:
23 * - testing of incoming PPTP calls
24 *
25 * Changes:
26 * 2002-02-05 - Version 1.3
27 * - Call ip_conntrack_unexpect_related() from
28 * pptp_destroy_siblings() to destroy expectations in case
29 * CALL_DISCONNECT_NOTIFY or tcp fin packet was seen
30 * (Philip Craig <philipc@snapgear.com>)
31 * - Add Version information at module loadtime
32 * 2002-02-10 - Version 1.6
33 * - move to C99 style initializers
34 * - remove second expectation if first arrives
35 * 2004-10-22 - Version 2.0
36 * - merge Mandrake's 2.6.x port with recent 2.6.x API changes
37 * - fix lots of linear skb assumptions from Mandrake's port
38 * 2005-06-10 - Version 2.1
39 * - use ip_conntrack_expect_free() instead of kfree() on the
40 * expect's (which are from the slab for quite some time)
41 * 2005-06-10 - Version 3.0
42 * - port helper to post-2.6.11 API changes,
43 * funded by Oxcoda NetBox Blue (http://www.netboxblue.com/)
44 * 2005-07-30 - Version 3.1
45 * - port helper to 2.6.13 API changes
46 *
47 */
48
49#include <linux/module.h>
50#include <linux/netfilter.h>
51#include <linux/ip.h>
52#include <net/checksum.h>
53#include <net/tcp.h>
54
55#include <linux/netfilter_ipv4/ip_conntrack.h>
56#include <linux/netfilter_ipv4/ip_conntrack_core.h>
57#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
58#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
59#include <linux/netfilter_ipv4/ip_conntrack_pptp.h>
60
61#define IP_CT_PPTP_VERSION "3.1"
62
63MODULE_LICENSE("GPL");
64MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
65MODULE_DESCRIPTION("Netfilter connection tracking helper module for PPTP");
66
67static DEFINE_SPINLOCK(ip_pptp_lock);
68
69int
70(*ip_nat_pptp_hook_outbound)(struct sk_buff **pskb,
71 struct ip_conntrack *ct,
72 enum ip_conntrack_info ctinfo,
73 struct PptpControlHeader *ctlh,
74 union pptp_ctrl_union *pptpReq);
75
76int
77(*ip_nat_pptp_hook_inbound)(struct sk_buff **pskb,
78 struct ip_conntrack *ct,
79 enum ip_conntrack_info ctinfo,
80 struct PptpControlHeader *ctlh,
81 union pptp_ctrl_union *pptpReq);
82
83void
84(*ip_nat_pptp_hook_exp_gre)(struct ip_conntrack_expect *expect_orig,
85 struct ip_conntrack_expect *expect_reply);
86
87void
88(*ip_nat_pptp_hook_expectfn)(struct ip_conntrack *ct,
89 struct ip_conntrack_expect *exp);
90
91#if 0
92/* PptpControlMessageType names */
93const char *pptp_msg_name[] = {
94 "UNKNOWN_MESSAGE",
95 "START_SESSION_REQUEST",
96 "START_SESSION_REPLY",
97 "STOP_SESSION_REQUEST",
98 "STOP_SESSION_REPLY",
99 "ECHO_REQUEST",
100 "ECHO_REPLY",
101 "OUT_CALL_REQUEST",
102 "OUT_CALL_REPLY",
103 "IN_CALL_REQUEST",
104 "IN_CALL_REPLY",
105 "IN_CALL_CONNECT",
106 "CALL_CLEAR_REQUEST",
107 "CALL_DISCONNECT_NOTIFY",
108 "WAN_ERROR_NOTIFY",
109 "SET_LINK_INFO"
110};
111EXPORT_SYMBOL(pptp_msg_name);
112#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, __FUNCTION__, ## args)
113#else
114#define DEBUGP(format, args...)
115#endif
116
117#define SECS *HZ
118#define MINS * 60 SECS
119#define HOURS * 60 MINS
120
121#define PPTP_GRE_TIMEOUT (10 MINS)
122#define PPTP_GRE_STREAM_TIMEOUT (5 HOURS)
123
124static void pptp_expectfn(struct ip_conntrack *ct,
125 struct ip_conntrack_expect *exp)
126{
127 typeof(ip_nat_pptp_hook_expectfn) ip_nat_pptp_expectfn;
128
129 DEBUGP("increasing timeouts\n");
130
131 /* increase timeout of GRE data channel conntrack entry */
132 ct->proto.gre.timeout = PPTP_GRE_TIMEOUT;
133 ct->proto.gre.stream_timeout = PPTP_GRE_STREAM_TIMEOUT;
134
135 /* Can you see how rusty this code is, compared with the pre-2.6.11
136 * one? That's what happened to my shiny newnat of 2002 ;( -HW */
137
138 rcu_read_lock();
139 ip_nat_pptp_expectfn = rcu_dereference(ip_nat_pptp_hook_expectfn);
140 if (!ip_nat_pptp_expectfn) {
141 struct ip_conntrack_tuple inv_t;
142 struct ip_conntrack_expect *exp_other;
143
144 /* obviously this tuple inversion only works until you do NAT */
145 invert_tuplepr(&inv_t, &exp->tuple);
146 DEBUGP("trying to unexpect other dir: ");
147 DUMP_TUPLE(&inv_t);
148
149 exp_other = ip_conntrack_expect_find_get(&inv_t);
150 if (exp_other) {
151 /* delete other expectation. */
152 DEBUGP("found\n");
153 ip_conntrack_unexpect_related(exp_other);
154 ip_conntrack_expect_put(exp_other);
155 } else {
156 DEBUGP("not found\n");
157 }
158 } else {
159 /* we need more than simple inversion */
160 ip_nat_pptp_expectfn(ct, exp);
161 }
162 rcu_read_unlock();
163}
164
165static int destroy_sibling_or_exp(const struct ip_conntrack_tuple *t)
166{
167 struct ip_conntrack_tuple_hash *h;
168 struct ip_conntrack_expect *exp;
169
170 DEBUGP("trying to timeout ct or exp for tuple ");
171 DUMP_TUPLE(t);
172
173 h = ip_conntrack_find_get(t, NULL);
174 if (h) {
175 struct ip_conntrack *sibling = tuplehash_to_ctrack(h);
176 DEBUGP("setting timeout of conntrack %p to 0\n", sibling);
177 sibling->proto.gre.timeout = 0;
178 sibling->proto.gre.stream_timeout = 0;
179 if (del_timer(&sibling->timeout))
180 sibling->timeout.function((unsigned long)sibling);
181 ip_conntrack_put(sibling);
182 return 1;
183 } else {
184 exp = ip_conntrack_expect_find_get(t);
185 if (exp) {
186 DEBUGP("unexpect_related of expect %p\n", exp);
187 ip_conntrack_unexpect_related(exp);
188 ip_conntrack_expect_put(exp);
189 return 1;
190 }
191 }
192
193 return 0;
194}
195
196
197/* timeout GRE data connections */
198static void pptp_destroy_siblings(struct ip_conntrack *ct)
199{
200 struct ip_conntrack_tuple t;
201
202 ip_ct_gre_keymap_destroy(ct);
203 /* Since ct->sibling_list has literally rusted away in 2.6.11,
204 * we now need another way to find out about our sibling
205 * contrack and expects... -HW */
206
207 /* try original (pns->pac) tuple */
208 memcpy(&t, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, sizeof(t));
209 t.dst.protonum = IPPROTO_GRE;
210 t.src.u.gre.key = ct->help.ct_pptp_info.pns_call_id;
211 t.dst.u.gre.key = ct->help.ct_pptp_info.pac_call_id;
212
213 if (!destroy_sibling_or_exp(&t))
214 DEBUGP("failed to timeout original pns->pac ct/exp\n");
215
216 /* try reply (pac->pns) tuple */
217 memcpy(&t, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, sizeof(t));
218 t.dst.protonum = IPPROTO_GRE;
219 t.src.u.gre.key = ct->help.ct_pptp_info.pac_call_id;
220 t.dst.u.gre.key = ct->help.ct_pptp_info.pns_call_id;
221
222 if (!destroy_sibling_or_exp(&t))
223 DEBUGP("failed to timeout reply pac->pns ct/exp\n");
224}
225
226/* expect GRE connections (PNS->PAC and PAC->PNS direction) */
227static inline int
228exp_gre(struct ip_conntrack *ct,
229 __be16 callid,
230 __be16 peer_callid)
231{
232 struct ip_conntrack_expect *exp_orig, *exp_reply;
233 int ret = 1;
234 typeof(ip_nat_pptp_hook_exp_gre) ip_nat_pptp_exp_gre;
235
236 exp_orig = ip_conntrack_expect_alloc(ct);
237 if (exp_orig == NULL)
238 goto out;
239
240 exp_reply = ip_conntrack_expect_alloc(ct);
241 if (exp_reply == NULL)
242 goto out_put_orig;
243
244 /* original direction, PNS->PAC */
245 exp_orig->tuple.src.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip;
246 exp_orig->tuple.src.u.gre.key = peer_callid;
247 exp_orig->tuple.dst.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip;
248 exp_orig->tuple.dst.u.gre.key = callid;
249 exp_orig->tuple.dst.protonum = IPPROTO_GRE;
250
251 exp_orig->mask.src.ip = htonl(0xffffffff);
252 exp_orig->mask.src.u.all = 0;
253 exp_orig->mask.dst.u.gre.key = htons(0xffff);
254 exp_orig->mask.dst.ip = htonl(0xffffffff);
255 exp_orig->mask.dst.protonum = 0xff;
256
257 exp_orig->master = ct;
258 exp_orig->expectfn = pptp_expectfn;
259 exp_orig->flags = 0;
260
261 /* both expectations are identical apart from tuple */
262 memcpy(exp_reply, exp_orig, sizeof(*exp_reply));
263
264 /* reply direction, PAC->PNS */
265 exp_reply->tuple.src.ip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip;
266 exp_reply->tuple.src.u.gre.key = callid;
267 exp_reply->tuple.dst.ip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip;
268 exp_reply->tuple.dst.u.gre.key = peer_callid;
269 exp_reply->tuple.dst.protonum = IPPROTO_GRE;
270
271 ip_nat_pptp_exp_gre = rcu_dereference(ip_nat_pptp_hook_exp_gre);
272 if (ip_nat_pptp_exp_gre)
273 ip_nat_pptp_exp_gre(exp_orig, exp_reply);
274 if (ip_conntrack_expect_related(exp_orig) != 0)
275 goto out_put_both;
276 if (ip_conntrack_expect_related(exp_reply) != 0)
277 goto out_unexpect_orig;
278
279 /* Add GRE keymap entries */
280 if (ip_ct_gre_keymap_add(ct, &exp_orig->tuple, 0) != 0)
281 goto out_unexpect_both;
282 if (ip_ct_gre_keymap_add(ct, &exp_reply->tuple, 1) != 0) {
283 ip_ct_gre_keymap_destroy(ct);
284 goto out_unexpect_both;
285 }
286 ret = 0;
287
288out_put_both:
289 ip_conntrack_expect_put(exp_reply);
290out_put_orig:
291 ip_conntrack_expect_put(exp_orig);
292out:
293 return ret;
294
295out_unexpect_both:
296 ip_conntrack_unexpect_related(exp_reply);
297out_unexpect_orig:
298 ip_conntrack_unexpect_related(exp_orig);
299 goto out_put_both;
300}
301
302static inline int
303pptp_inbound_pkt(struct sk_buff **pskb,
304 struct PptpControlHeader *ctlh,
305 union pptp_ctrl_union *pptpReq,
306 unsigned int reqlen,
307 struct ip_conntrack *ct,
308 enum ip_conntrack_info ctinfo)
309{
310 struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
311 u_int16_t msg;
312 __be16 cid = 0, pcid = 0;
313 typeof(ip_nat_pptp_hook_inbound) ip_nat_pptp_inbound;
314
315 msg = ntohs(ctlh->messageType);
316 DEBUGP("inbound control message %s\n", pptp_msg_name[msg]);
317
318 switch (msg) {
319 case PPTP_START_SESSION_REPLY:
320 /* server confirms new control session */
321 if (info->sstate < PPTP_SESSION_REQUESTED)
322 goto invalid;
323 if (pptpReq->srep.resultCode == PPTP_START_OK)
324 info->sstate = PPTP_SESSION_CONFIRMED;
325 else
326 info->sstate = PPTP_SESSION_ERROR;
327 break;
328
329 case PPTP_STOP_SESSION_REPLY:
330 /* server confirms end of control session */
331 if (info->sstate > PPTP_SESSION_STOPREQ)
332 goto invalid;
333 if (pptpReq->strep.resultCode == PPTP_STOP_OK)
334 info->sstate = PPTP_SESSION_NONE;
335 else
336 info->sstate = PPTP_SESSION_ERROR;
337 break;
338
339 case PPTP_OUT_CALL_REPLY:
340 /* server accepted call, we now expect GRE frames */
341 if (info->sstate != PPTP_SESSION_CONFIRMED)
342 goto invalid;
343 if (info->cstate != PPTP_CALL_OUT_REQ &&
344 info->cstate != PPTP_CALL_OUT_CONF)
345 goto invalid;
346
347 cid = pptpReq->ocack.callID;
348 pcid = pptpReq->ocack.peersCallID;
349 if (info->pns_call_id != pcid)
350 goto invalid;
351 DEBUGP("%s, CID=%X, PCID=%X\n", pptp_msg_name[msg],
352 ntohs(cid), ntohs(pcid));
353
354 if (pptpReq->ocack.resultCode == PPTP_OUTCALL_CONNECT) {
355 info->cstate = PPTP_CALL_OUT_CONF;
356 info->pac_call_id = cid;
357 exp_gre(ct, cid, pcid);
358 } else
359 info->cstate = PPTP_CALL_NONE;
360 break;
361
362 case PPTP_IN_CALL_REQUEST:
363 /* server tells us about incoming call request */
364 if (info->sstate != PPTP_SESSION_CONFIRMED)
365 goto invalid;
366
367 cid = pptpReq->icreq.callID;
368 DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid));
369 info->cstate = PPTP_CALL_IN_REQ;
370 info->pac_call_id = cid;
371 break;
372
373 case PPTP_IN_CALL_CONNECT:
374 /* server tells us about incoming call established */
375 if (info->sstate != PPTP_SESSION_CONFIRMED)
376 goto invalid;
377 if (info->cstate != PPTP_CALL_IN_REP &&
378 info->cstate != PPTP_CALL_IN_CONF)
379 goto invalid;
380
381 pcid = pptpReq->iccon.peersCallID;
382 cid = info->pac_call_id;
383
384 if (info->pns_call_id != pcid)
385 goto invalid;
386
387 DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(pcid));
388 info->cstate = PPTP_CALL_IN_CONF;
389
390 /* we expect a GRE connection from PAC to PNS */
391 exp_gre(ct, cid, pcid);
392 break;
393
394 case PPTP_CALL_DISCONNECT_NOTIFY:
395 /* server confirms disconnect */
396 cid = pptpReq->disc.callID;
397 DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid));
398 info->cstate = PPTP_CALL_NONE;
399
400 /* untrack this call id, unexpect GRE packets */
401 pptp_destroy_siblings(ct);
402 break;
403
404 case PPTP_WAN_ERROR_NOTIFY:
405 case PPTP_ECHO_REQUEST:
406 case PPTP_ECHO_REPLY:
407 /* I don't have to explain these ;) */
408 break;
409 default:
410 goto invalid;
411 }
412
413 ip_nat_pptp_inbound = rcu_dereference(ip_nat_pptp_hook_inbound);
414 if (ip_nat_pptp_inbound)
415 return ip_nat_pptp_inbound(pskb, ct, ctinfo, ctlh, pptpReq);
416 return NF_ACCEPT;
417
418invalid:
419 DEBUGP("invalid %s: type=%d cid=%u pcid=%u "
420 "cstate=%d sstate=%d pns_cid=%u pac_cid=%u\n",
421 msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0],
422 msg, ntohs(cid), ntohs(pcid), info->cstate, info->sstate,
423 ntohs(info->pns_call_id), ntohs(info->pac_call_id));
424 return NF_ACCEPT;
425}
426
427static inline int
428pptp_outbound_pkt(struct sk_buff **pskb,
429 struct PptpControlHeader *ctlh,
430 union pptp_ctrl_union *pptpReq,
431 unsigned int reqlen,
432 struct ip_conntrack *ct,
433 enum ip_conntrack_info ctinfo)
434{
435 struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
436 u_int16_t msg;
437 __be16 cid = 0, pcid = 0;
438 typeof(ip_nat_pptp_hook_outbound) ip_nat_pptp_outbound;
439
440 msg = ntohs(ctlh->messageType);
441 DEBUGP("outbound control message %s\n", pptp_msg_name[msg]);
442
443 switch (msg) {
444 case PPTP_START_SESSION_REQUEST:
445 /* client requests for new control session */
446 if (info->sstate != PPTP_SESSION_NONE)
447 goto invalid;
448 info->sstate = PPTP_SESSION_REQUESTED;
449 break;
450 case PPTP_STOP_SESSION_REQUEST:
451 /* client requests end of control session */
452 info->sstate = PPTP_SESSION_STOPREQ;
453 break;
454
455 case PPTP_OUT_CALL_REQUEST:
456 /* client initiating connection to server */
457 if (info->sstate != PPTP_SESSION_CONFIRMED)
458 goto invalid;
459 info->cstate = PPTP_CALL_OUT_REQ;
460 /* track PNS call id */
461 cid = pptpReq->ocreq.callID;
462 DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid));
463 info->pns_call_id = cid;
464 break;
465 case PPTP_IN_CALL_REPLY:
466 /* client answers incoming call */
467 if (info->cstate != PPTP_CALL_IN_REQ &&
468 info->cstate != PPTP_CALL_IN_REP)
469 goto invalid;
470
471 cid = pptpReq->icack.callID;
472 pcid = pptpReq->icack.peersCallID;
473 if (info->pac_call_id != pcid)
474 goto invalid;
475 DEBUGP("%s, CID=%X PCID=%X\n", pptp_msg_name[msg],
476 ntohs(cid), ntohs(pcid));
477
478 if (pptpReq->icack.resultCode == PPTP_INCALL_ACCEPT) {
479 /* part two of the three-way handshake */
480 info->cstate = PPTP_CALL_IN_REP;
481 info->pns_call_id = cid;
482 } else
483 info->cstate = PPTP_CALL_NONE;
484 break;
485
486 case PPTP_CALL_CLEAR_REQUEST:
487 /* client requests hangup of call */
488 if (info->sstate != PPTP_SESSION_CONFIRMED)
489 goto invalid;
490 /* FUTURE: iterate over all calls and check if
491 * call ID is valid. We don't do this without newnat,
492 * because we only know about last call */
493 info->cstate = PPTP_CALL_CLEAR_REQ;
494 break;
495 case PPTP_SET_LINK_INFO:
496 case PPTP_ECHO_REQUEST:
497 case PPTP_ECHO_REPLY:
498 /* I don't have to explain these ;) */
499 break;
500 default:
501 goto invalid;
502 }
503
504 ip_nat_pptp_outbound = rcu_dereference(ip_nat_pptp_hook_outbound);
505 if (ip_nat_pptp_outbound)
506 return ip_nat_pptp_outbound(pskb, ct, ctinfo, ctlh, pptpReq);
507 return NF_ACCEPT;
508
509invalid:
510 DEBUGP("invalid %s: type=%d cid=%u pcid=%u "
511 "cstate=%d sstate=%d pns_cid=%u pac_cid=%u\n",
512 msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0],
513 msg, ntohs(cid), ntohs(pcid), info->cstate, info->sstate,
514 ntohs(info->pns_call_id), ntohs(info->pac_call_id));
515 return NF_ACCEPT;
516}
517
518static const unsigned int pptp_msg_size[] = {
519 [PPTP_START_SESSION_REQUEST] = sizeof(struct PptpStartSessionRequest),
520 [PPTP_START_SESSION_REPLY] = sizeof(struct PptpStartSessionReply),
521 [PPTP_STOP_SESSION_REQUEST] = sizeof(struct PptpStopSessionRequest),
522 [PPTP_STOP_SESSION_REPLY] = sizeof(struct PptpStopSessionReply),
523 [PPTP_OUT_CALL_REQUEST] = sizeof(struct PptpOutCallRequest),
524 [PPTP_OUT_CALL_REPLY] = sizeof(struct PptpOutCallReply),
525 [PPTP_IN_CALL_REQUEST] = sizeof(struct PptpInCallRequest),
526 [PPTP_IN_CALL_REPLY] = sizeof(struct PptpInCallReply),
527 [PPTP_IN_CALL_CONNECT] = sizeof(struct PptpInCallConnected),
528 [PPTP_CALL_CLEAR_REQUEST] = sizeof(struct PptpClearCallRequest),
529 [PPTP_CALL_DISCONNECT_NOTIFY] = sizeof(struct PptpCallDisconnectNotify),
530 [PPTP_WAN_ERROR_NOTIFY] = sizeof(struct PptpWanErrorNotify),
531 [PPTP_SET_LINK_INFO] = sizeof(struct PptpSetLinkInfo),
532};
533
534/* track caller id inside control connection, call expect_related */
535static int
536conntrack_pptp_help(struct sk_buff **pskb,
537 struct ip_conntrack *ct, enum ip_conntrack_info ctinfo)
538
539{
540 int dir = CTINFO2DIR(ctinfo);
541 struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
542 struct tcphdr _tcph, *tcph;
543 struct pptp_pkt_hdr _pptph, *pptph;
544 struct PptpControlHeader _ctlh, *ctlh;
545 union pptp_ctrl_union _pptpReq, *pptpReq;
546 unsigned int tcplen = (*pskb)->len - (*pskb)->nh.iph->ihl * 4;
547 unsigned int datalen, reqlen, nexthdr_off;
548 int oldsstate, oldcstate;
549 int ret;
550 u_int16_t msg;
551
552 /* don't do any tracking before tcp handshake complete */
553 if (ctinfo != IP_CT_ESTABLISHED
554 && ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) {
555 DEBUGP("ctinfo = %u, skipping\n", ctinfo);
556 return NF_ACCEPT;
557 }
558
559 nexthdr_off = (*pskb)->nh.iph->ihl*4;
560 tcph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_tcph), &_tcph);
561 BUG_ON(!tcph);
562 nexthdr_off += tcph->doff * 4;
563 datalen = tcplen - tcph->doff * 4;
564
565 pptph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_pptph), &_pptph);
566 if (!pptph) {
567 DEBUGP("no full PPTP header, can't track\n");
568 return NF_ACCEPT;
569 }
570 nexthdr_off += sizeof(_pptph);
571 datalen -= sizeof(_pptph);
572
573 /* if it's not a control message we can't do anything with it */
574 if (ntohs(pptph->packetType) != PPTP_PACKET_CONTROL ||
575 ntohl(pptph->magicCookie) != PPTP_MAGIC_COOKIE) {
576 DEBUGP("not a control packet\n");
577 return NF_ACCEPT;
578 }
579
580 ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh);
581 if (!ctlh)
582 return NF_ACCEPT;
583 nexthdr_off += sizeof(_ctlh);
584 datalen -= sizeof(_ctlh);
585
586 reqlen = datalen;
587 msg = ntohs(ctlh->messageType);
588 if (msg > 0 && msg <= PPTP_MSG_MAX && reqlen < pptp_msg_size[msg])
589 return NF_ACCEPT;
590 if (reqlen > sizeof(*pptpReq))
591 reqlen = sizeof(*pptpReq);
592
593 pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq);
594 if (!pptpReq)
595 return NF_ACCEPT;
596
597 oldsstate = info->sstate;
598 oldcstate = info->cstate;
599
600 spin_lock_bh(&ip_pptp_lock);
601
602 /* FIXME: We just blindly assume that the control connection is always
603 * established from PNS->PAC. However, RFC makes no guarantee */
604 if (dir == IP_CT_DIR_ORIGINAL)
605 /* client -> server (PNS -> PAC) */
606 ret = pptp_outbound_pkt(pskb, ctlh, pptpReq, reqlen, ct,
607 ctinfo);
608 else
609 /* server -> client (PAC -> PNS) */
610 ret = pptp_inbound_pkt(pskb, ctlh, pptpReq, reqlen, ct,
611 ctinfo);
612 DEBUGP("sstate: %d->%d, cstate: %d->%d\n",
613 oldsstate, info->sstate, oldcstate, info->cstate);
614 spin_unlock_bh(&ip_pptp_lock);
615
616 return ret;
617}
618
619/* control protocol helper */
620static struct ip_conntrack_helper pptp = {
621 .list = { NULL, NULL },
622 .name = "pptp",
623 .me = THIS_MODULE,
624 .max_expected = 2,
625 .timeout = 5 * 60,
626 .tuple = { .src = { .ip = 0,
627 .u = { .tcp = { .port =
628 __constant_htons(PPTP_CONTROL_PORT) } }
629 },
630 .dst = { .ip = 0,
631 .u = { .all = 0 },
632 .protonum = IPPROTO_TCP
633 }
634 },
635 .mask = { .src = { .ip = 0,
636 .u = { .tcp = { .port = __constant_htons(0xffff) } }
637 },
638 .dst = { .ip = 0,
639 .u = { .all = 0 },
640 .protonum = 0xff
641 }
642 },
643 .help = conntrack_pptp_help,
644 .destroy = pptp_destroy_siblings,
645};
646
647extern void ip_ct_proto_gre_fini(void);
648extern int __init ip_ct_proto_gre_init(void);
649
650/* ip_conntrack_pptp initialization */
651static int __init ip_conntrack_helper_pptp_init(void)
652{
653 int retcode;
654
655 retcode = ip_ct_proto_gre_init();
656 if (retcode < 0)
657 return retcode;
658
659 DEBUGP(" registering helper\n");
660 if ((retcode = ip_conntrack_helper_register(&pptp))) {
661 printk(KERN_ERR "Unable to register conntrack application "
662 "helper for pptp: %d\n", retcode);
663 ip_ct_proto_gre_fini();
664 return retcode;
665 }
666
667 printk("ip_conntrack_pptp version %s loaded\n", IP_CT_PPTP_VERSION);
668 return 0;
669}
670
671static void __exit ip_conntrack_helper_pptp_fini(void)
672{
673 ip_conntrack_helper_unregister(&pptp);
674 ip_ct_proto_gre_fini();
675 printk("ip_conntrack_pptp version %s unloaded\n", IP_CT_PPTP_VERSION);
676}
677
678module_init(ip_conntrack_helper_pptp_init);
679module_exit(ip_conntrack_helper_pptp_fini);
680
681EXPORT_SYMBOL(ip_nat_pptp_hook_outbound);
682EXPORT_SYMBOL(ip_nat_pptp_hook_inbound);
683EXPORT_SYMBOL(ip_nat_pptp_hook_exp_gre);
684EXPORT_SYMBOL(ip_nat_pptp_hook_expectfn);
diff --git a/net/ipv4/netfilter/ip_conntrack_irc.c b/net/ipv4/netfilter/ip_conntrack_irc.c
deleted file mode 100644
index 053e591f407a..000000000000
--- a/net/ipv4/netfilter/ip_conntrack_irc.c
+++ /dev/null
@@ -1,314 +0,0 @@
1/* IRC extension for IP connection tracking, Version 1.21
2 * (C) 2000-2002 by Harald Welte <laforge@gnumonks.org>
3 * based on RR's ip_conntrack_ftp.c
4 *
5 * ip_conntrack_irc.c,v 1.21 2002/02/05 14:49:26 laforge Exp
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 **
12 * Module load syntax:
13 * insmod ip_conntrack_irc.o ports=port1,port2,...port<MAX_PORTS>
14 * max_dcc_channels=n dcc_timeout=secs
15 *
16 * please give the ports of all IRC servers You wish to connect to.
17 * If You don't specify ports, the default will be port 6667.
18 * With max_dcc_channels you can define the maximum number of not
19 * yet answered DCC channels per IRC session (default 8).
20 * With dcc_timeout you can specify how long the system waits for
21 * an expected DCC channel (default 300 seconds).
22 *
23 */
24
25#include <linux/module.h>
26#include <linux/netfilter.h>
27#include <linux/ip.h>
28#include <net/checksum.h>
29#include <net/tcp.h>
30
31#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
32#include <linux/netfilter_ipv4/ip_conntrack_irc.h>
33#include <linux/moduleparam.h>
34
35#define MAX_PORTS 8
36static unsigned short ports[MAX_PORTS];
37static int ports_c;
38static unsigned int max_dcc_channels = 8;
39static unsigned int dcc_timeout = 300;
40/* This is slow, but it's simple. --RR */
41static char *irc_buffer;
42static DEFINE_SPINLOCK(irc_buffer_lock);
43
44unsigned int (*ip_nat_irc_hook)(struct sk_buff **pskb,
45 enum ip_conntrack_info ctinfo,
46 unsigned int matchoff,
47 unsigned int matchlen,
48 struct ip_conntrack_expect *exp);
49EXPORT_SYMBOL_GPL(ip_nat_irc_hook);
50
51MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
52MODULE_DESCRIPTION("IRC (DCC) connection tracking helper");
53MODULE_LICENSE("GPL");
54module_param_array(ports, ushort, &ports_c, 0400);
55MODULE_PARM_DESC(ports, "port numbers of IRC servers");
56module_param(max_dcc_channels, uint, 0400);
57MODULE_PARM_DESC(max_dcc_channels, "max number of expected DCC channels per IRC session");
58module_param(dcc_timeout, uint, 0400);
59MODULE_PARM_DESC(dcc_timeout, "timeout on for unestablished DCC channels");
60
61static const char *dccprotos[] = { "SEND ", "CHAT ", "MOVE ", "TSEND ", "SCHAT " };
62#define MINMATCHLEN 5
63
64#if 0
65#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s:" format, \
66 __FILE__, __FUNCTION__ , ## args)
67#else
68#define DEBUGP(format, args...)
69#endif
70
71static int parse_dcc(char *data, char *data_end, u_int32_t *ip,
72 u_int16_t *port, char **ad_beg_p, char **ad_end_p)
73/* tries to get the ip_addr and port out of a dcc command
74 return value: -1 on failure, 0 on success
75 data pointer to first byte of DCC command data
76 data_end pointer to last byte of dcc command data
77 ip returns parsed ip of dcc command
78 port returns parsed port of dcc command
79 ad_beg_p returns pointer to first byte of addr data
80 ad_end_p returns pointer to last byte of addr data */
81{
82
83 /* at least 12: "AAAAAAAA P\1\n" */
84 while (*data++ != ' ')
85 if (data > data_end - 12)
86 return -1;
87
88 *ad_beg_p = data;
89 *ip = simple_strtoul(data, &data, 10);
90
91 /* skip blanks between ip and port */
92 while (*data == ' ') {
93 if (data >= data_end)
94 return -1;
95 data++;
96 }
97
98 *port = simple_strtoul(data, &data, 10);
99 *ad_end_p = data;
100
101 return 0;
102}
103
104static int help(struct sk_buff **pskb,
105 struct ip_conntrack *ct, enum ip_conntrack_info ctinfo)
106{
107 unsigned int dataoff;
108 struct tcphdr _tcph, *th;
109 char *data, *data_limit, *ib_ptr;
110 int dir = CTINFO2DIR(ctinfo);
111 struct ip_conntrack_expect *exp;
112 u32 seq;
113 u_int32_t dcc_ip;
114 u_int16_t dcc_port;
115 int i, ret = NF_ACCEPT;
116 char *addr_beg_p, *addr_end_p;
117 typeof(ip_nat_irc_hook) ip_nat_irc;
118
119 DEBUGP("entered\n");
120
121 /* If packet is coming from IRC server */
122 if (dir == IP_CT_DIR_REPLY)
123 return NF_ACCEPT;
124
125 /* Until there's been traffic both ways, don't look in packets. */
126 if (ctinfo != IP_CT_ESTABLISHED
127 && ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) {
128 DEBUGP("Conntrackinfo = %u\n", ctinfo);
129 return NF_ACCEPT;
130 }
131
132 /* Not a full tcp header? */
133 th = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4,
134 sizeof(_tcph), &_tcph);
135 if (th == NULL)
136 return NF_ACCEPT;
137
138 /* No data? */
139 dataoff = (*pskb)->nh.iph->ihl*4 + th->doff*4;
140 if (dataoff >= (*pskb)->len)
141 return NF_ACCEPT;
142
143 spin_lock_bh(&irc_buffer_lock);
144 ib_ptr = skb_header_pointer(*pskb, dataoff,
145 (*pskb)->len - dataoff, irc_buffer);
146 BUG_ON(ib_ptr == NULL);
147
148 data = ib_ptr;
149 data_limit = ib_ptr + (*pskb)->len - dataoff;
150
151 /* strlen("\1DCC SENT t AAAAAAAA P\1\n")=24
152 * 5+MINMATCHLEN+strlen("t AAAAAAAA P\1\n")=14 */
153 while (data < (data_limit - (19 + MINMATCHLEN))) {
154 if (memcmp(data, "\1DCC ", 5)) {
155 data++;
156 continue;
157 }
158
159 data += 5;
160 /* we have at least (19+MINMATCHLEN)-5 bytes valid data left */
161
162 DEBUGP("DCC found in master %u.%u.%u.%u:%u %u.%u.%u.%u:%u...\n",
163 NIPQUAD(iph->saddr), ntohs(th->source),
164 NIPQUAD(iph->daddr), ntohs(th->dest));
165
166 for (i = 0; i < ARRAY_SIZE(dccprotos); i++) {
167 if (memcmp(data, dccprotos[i], strlen(dccprotos[i]))) {
168 /* no match */
169 continue;
170 }
171
172 DEBUGP("DCC %s detected\n", dccprotos[i]);
173 data += strlen(dccprotos[i]);
174 /* we have at least
175 * (19+MINMATCHLEN)-5-dccprotos[i].matchlen bytes valid
176 * data left (== 14/13 bytes) */
177 if (parse_dcc((char *)data, data_limit, &dcc_ip,
178 &dcc_port, &addr_beg_p, &addr_end_p)) {
179 /* unable to parse */
180 DEBUGP("unable to parse dcc command\n");
181 continue;
182 }
183 DEBUGP("DCC bound ip/port: %u.%u.%u.%u:%u\n",
184 HIPQUAD(dcc_ip), dcc_port);
185
186 /* dcc_ip can be the internal OR external (NAT'ed) IP
187 * Tiago Sousa <mirage@kaotik.org> */
188 if (ct->tuplehash[dir].tuple.src.ip != htonl(dcc_ip)
189 && ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip != htonl(dcc_ip)) {
190 if (net_ratelimit())
191 printk(KERN_WARNING
192 "Forged DCC command from "
193 "%u.%u.%u.%u: %u.%u.%u.%u:%u\n",
194 NIPQUAD(ct->tuplehash[dir].tuple.src.ip),
195 HIPQUAD(dcc_ip), dcc_port);
196
197 continue;
198 }
199
200 exp = ip_conntrack_expect_alloc(ct);
201 if (exp == NULL) {
202 ret = NF_DROP;
203 goto out;
204 }
205
206 /* save position of address in dcc string,
207 * necessary for NAT */
208 DEBUGP("tcph->seq = %u\n", th->seq);
209 seq = ntohl(th->seq) + (addr_beg_p - ib_ptr);
210
211 /* We refer to the reverse direction ("!dir")
212 * tuples here, because we're expecting
213 * something in the other * direction.
214 * Doesn't matter unless NAT is happening. */
215 exp->tuple = ((struct ip_conntrack_tuple)
216 { { 0, { 0 } },
217 { ct->tuplehash[!dir].tuple.dst.ip,
218 { .tcp = { htons(dcc_port) } },
219 IPPROTO_TCP }});
220 exp->mask = ((struct ip_conntrack_tuple)
221 { { 0, { 0 } },
222 { htonl(0xFFFFFFFF),
223 { .tcp = { htons(0xFFFF) } }, 0xFF }});
224 exp->expectfn = NULL;
225 exp->flags = 0;
226 ip_nat_irc = rcu_dereference(ip_nat_irc_hook);
227 if (ip_nat_irc)
228 ret = ip_nat_irc(pskb, ctinfo,
229 addr_beg_p - ib_ptr,
230 addr_end_p - addr_beg_p,
231 exp);
232 else if (ip_conntrack_expect_related(exp) != 0)
233 ret = NF_DROP;
234 ip_conntrack_expect_put(exp);
235 goto out;
236 } /* for .. NUM_DCCPROTO */
237 } /* while data < ... */
238
239 out:
240 spin_unlock_bh(&irc_buffer_lock);
241 return ret;
242}
243
244static struct ip_conntrack_helper irc_helpers[MAX_PORTS];
245static char irc_names[MAX_PORTS][sizeof("irc-65535")];
246
247static void ip_conntrack_irc_fini(void);
248
249static int __init ip_conntrack_irc_init(void)
250{
251 int i, ret;
252 struct ip_conntrack_helper *hlpr;
253 char *tmpname;
254
255 if (max_dcc_channels < 1) {
256 printk("ip_conntrack_irc: max_dcc_channels must be a positive integer\n");
257 return -EBUSY;
258 }
259
260 irc_buffer = kmalloc(65536, GFP_KERNEL);
261 if (!irc_buffer)
262 return -ENOMEM;
263
264 /* If no port given, default to standard irc port */
265 if (ports_c == 0)
266 ports[ports_c++] = IRC_PORT;
267
268 for (i = 0; i < ports_c; i++) {
269 hlpr = &irc_helpers[i];
270 hlpr->tuple.src.u.tcp.port = htons(ports[i]);
271 hlpr->tuple.dst.protonum = IPPROTO_TCP;
272 hlpr->mask.src.u.tcp.port = htons(0xFFFF);
273 hlpr->mask.dst.protonum = 0xFF;
274 hlpr->max_expected = max_dcc_channels;
275 hlpr->timeout = dcc_timeout;
276 hlpr->me = THIS_MODULE;
277 hlpr->help = help;
278
279 tmpname = &irc_names[i][0];
280 if (ports[i] == IRC_PORT)
281 sprintf(tmpname, "irc");
282 else
283 sprintf(tmpname, "irc-%d", i);
284 hlpr->name = tmpname;
285
286 DEBUGP("port #%d: %d\n", i, ports[i]);
287
288 ret = ip_conntrack_helper_register(hlpr);
289
290 if (ret) {
291 printk("ip_conntrack_irc: ERROR registering port %d\n",
292 ports[i]);
293 ip_conntrack_irc_fini();
294 return -EBUSY;
295 }
296 }
297 return 0;
298}
299
300/* This function is intentionally _NOT_ defined as __exit, because
301 * it is needed by the init function */
302static void ip_conntrack_irc_fini(void)
303{
304 int i;
305 for (i = 0; i < ports_c; i++) {
306 DEBUGP("unregistering port %d\n",
307 ports[i]);
308 ip_conntrack_helper_unregister(&irc_helpers[i]);
309 }
310 kfree(irc_buffer);
311}
312
313module_init(ip_conntrack_irc_init);
314module_exit(ip_conntrack_irc_fini);
diff --git a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
deleted file mode 100644
index cc6dd49c9da0..000000000000
--- a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
+++ /dev/null
@@ -1,143 +0,0 @@
1/*
2 * NetBIOS name service broadcast connection tracking helper
3 *
4 * (c) 2005 Patrick McHardy <kaber@trash.net>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11/*
12 * This helper tracks locally originating NetBIOS name service
13 * requests by issuing permanent expectations (valid until
14 * timing out) matching all reply connections from the
15 * destination network. The only NetBIOS specific thing is
16 * actually the port number.
17 */
18#include <linux/kernel.h>
19#include <linux/module.h>
20#include <linux/init.h>
21#include <linux/skbuff.h>
22#include <linux/netdevice.h>
23#include <linux/inetdevice.h>
24#include <linux/if_addr.h>
25#include <linux/in.h>
26#include <linux/ip.h>
27#include <net/route.h>
28
29#include <linux/netfilter.h>
30#include <linux/netfilter_ipv4.h>
31#include <linux/netfilter_ipv4/ip_conntrack.h>
32#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
33
34#define NMBD_PORT 137
35
36MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
37MODULE_DESCRIPTION("NetBIOS name service broadcast connection tracking helper");
38MODULE_LICENSE("GPL");
39
40static unsigned int timeout = 3;
41module_param(timeout, uint, 0400);
42MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds");
43
44static int help(struct sk_buff **pskb,
45 struct ip_conntrack *ct, enum ip_conntrack_info ctinfo)
46{
47 struct ip_conntrack_expect *exp;
48 struct iphdr *iph = (*pskb)->nh.iph;
49 struct rtable *rt = (struct rtable *)(*pskb)->dst;
50 struct in_device *in_dev;
51 __be32 mask = 0;
52
53 /* we're only interested in locally generated packets */
54 if ((*pskb)->sk == NULL)
55 goto out;
56 if (rt == NULL || !(rt->rt_flags & RTCF_BROADCAST))
57 goto out;
58 if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
59 goto out;
60
61 rcu_read_lock();
62 in_dev = __in_dev_get_rcu(rt->u.dst.dev);
63 if (in_dev != NULL) {
64 for_primary_ifa(in_dev) {
65 if (ifa->ifa_broadcast == iph->daddr) {
66 mask = ifa->ifa_mask;
67 break;
68 }
69 } endfor_ifa(in_dev);
70 }
71 rcu_read_unlock();
72
73 if (mask == 0)
74 goto out;
75
76 exp = ip_conntrack_expect_alloc(ct);
77 if (exp == NULL)
78 goto out;
79
80 exp->tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
81 exp->tuple.src.u.udp.port = htons(NMBD_PORT);
82
83 exp->mask.src.ip = mask;
84 exp->mask.src.u.udp.port = htons(0xFFFF);
85 exp->mask.dst.ip = htonl(0xFFFFFFFF);
86 exp->mask.dst.u.udp.port = htons(0xFFFF);
87 exp->mask.dst.protonum = 0xFF;
88
89 exp->expectfn = NULL;
90 exp->flags = IP_CT_EXPECT_PERMANENT;
91
92 ip_conntrack_expect_related(exp);
93 ip_conntrack_expect_put(exp);
94
95 ip_ct_refresh(ct, *pskb, timeout * HZ);
96out:
97 return NF_ACCEPT;
98}
99
100static struct ip_conntrack_helper helper = {
101 .name = "netbios-ns",
102 .tuple = {
103 .src = {
104 .u = {
105 .udp = {
106 .port = __constant_htons(NMBD_PORT),
107 }
108 }
109 },
110 .dst = {
111 .protonum = IPPROTO_UDP,
112 },
113 },
114 .mask = {
115 .src = {
116 .u = {
117 .udp = {
118 .port = __constant_htons(0xFFFF),
119 }
120 }
121 },
122 .dst = {
123 .protonum = 0xFF,
124 },
125 },
126 .max_expected = 1,
127 .me = THIS_MODULE,
128 .help = help,
129};
130
131static int __init ip_conntrack_netbios_ns_init(void)
132{
133 helper.timeout = timeout;
134 return ip_conntrack_helper_register(&helper);
135}
136
137static void __exit ip_conntrack_netbios_ns_fini(void)
138{
139 ip_conntrack_helper_unregister(&helper);
140}
141
142module_init(ip_conntrack_netbios_ns_init);
143module_exit(ip_conntrack_netbios_ns_fini);
diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c
deleted file mode 100644
index 9228b76ccd9a..000000000000
--- a/net/ipv4/netfilter/ip_conntrack_netlink.c
+++ /dev/null
@@ -1,1577 +0,0 @@
1/* Connection tracking via netlink socket. Allows for user space
2 * protocol helpers and general trouble making from userspace.
3 *
4 * (C) 2001 by Jay Schulist <jschlst@samba.org>
5 * (C) 2002-2005 by Harald Welte <laforge@gnumonks.org>
6 * (C) 2003 by Patrick Mchardy <kaber@trash.net>
7 * (C) 2005-2006 by Pablo Neira Ayuso <pablo@eurodev.net>
8 *
9 * I've reworked this stuff to use attributes instead of conntrack
10 * structures. 5.44 am. I need more tea. --pablo 05/07/11.
11 *
12 * Initial connection tracking via netlink development funded and
13 * generally made possible by Network Robots, Inc. (www.networkrobots.com)
14 *
15 * Further development of this code funded by Astaro AG (http://www.astaro.com)
16 *
17 * This software may be used and distributed according to the terms
18 * of the GNU General Public License, incorporated herein by reference.
19 */
20
21#include <linux/init.h>
22#include <linux/module.h>
23#include <linux/kernel.h>
24#include <linux/types.h>
25#include <linux/timer.h>
26#include <linux/skbuff.h>
27#include <linux/errno.h>
28#include <linux/netlink.h>
29#include <linux/spinlock.h>
30#include <linux/interrupt.h>
31#include <linux/notifier.h>
32
33#include <linux/netfilter.h>
34#include <linux/netfilter_ipv4/ip_conntrack.h>
35#include <linux/netfilter_ipv4/ip_conntrack_core.h>
36#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
37#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
38#include <linux/netfilter_ipv4/ip_nat_protocol.h>
39
40#include <linux/netfilter/nfnetlink.h>
41#include <linux/netfilter/nfnetlink_conntrack.h>
42
43MODULE_LICENSE("GPL");
44
45static char __initdata version[] = "0.90";
46
47static inline int
48ctnetlink_dump_tuples_proto(struct sk_buff *skb,
49 const struct ip_conntrack_tuple *tuple,
50 struct ip_conntrack_protocol *proto)
51{
52 int ret = 0;
53 struct nfattr *nest_parms = NFA_NEST(skb, CTA_TUPLE_PROTO);
54
55 NFA_PUT(skb, CTA_PROTO_NUM, sizeof(u_int8_t), &tuple->dst.protonum);
56
57 if (likely(proto->tuple_to_nfattr))
58 ret = proto->tuple_to_nfattr(skb, tuple);
59
60 NFA_NEST_END(skb, nest_parms);
61
62 return ret;
63
64nfattr_failure:
65 return -1;
66}
67
68static inline int
69ctnetlink_dump_tuples_ip(struct sk_buff *skb,
70 const struct ip_conntrack_tuple *tuple)
71{
72 struct nfattr *nest_parms = NFA_NEST(skb, CTA_TUPLE_IP);
73
74 NFA_PUT(skb, CTA_IP_V4_SRC, sizeof(__be32), &tuple->src.ip);
75 NFA_PUT(skb, CTA_IP_V4_DST, sizeof(__be32), &tuple->dst.ip);
76
77 NFA_NEST_END(skb, nest_parms);
78
79 return 0;
80
81nfattr_failure:
82 return -1;
83}
84
85static inline int
86ctnetlink_dump_tuples(struct sk_buff *skb,
87 const struct ip_conntrack_tuple *tuple)
88{
89 int ret;
90 struct ip_conntrack_protocol *proto;
91
92 ret = ctnetlink_dump_tuples_ip(skb, tuple);
93 if (unlikely(ret < 0))
94 return ret;
95
96 proto = ip_conntrack_proto_find_get(tuple->dst.protonum);
97 ret = ctnetlink_dump_tuples_proto(skb, tuple, proto);
98 ip_conntrack_proto_put(proto);
99
100 return ret;
101}
102
103static inline int
104ctnetlink_dump_status(struct sk_buff *skb, const struct ip_conntrack *ct)
105{
106 __be32 status = htonl((u_int32_t) ct->status);
107 NFA_PUT(skb, CTA_STATUS, sizeof(status), &status);
108 return 0;
109
110nfattr_failure:
111 return -1;
112}
113
114static inline int
115ctnetlink_dump_timeout(struct sk_buff *skb, const struct ip_conntrack *ct)
116{
117 long timeout_l = ct->timeout.expires - jiffies;
118 __be32 timeout;
119
120 if (timeout_l < 0)
121 timeout = 0;
122 else
123 timeout = htonl(timeout_l / HZ);
124
125 NFA_PUT(skb, CTA_TIMEOUT, sizeof(timeout), &timeout);
126 return 0;
127
128nfattr_failure:
129 return -1;
130}
131
132static inline int
133ctnetlink_dump_protoinfo(struct sk_buff *skb, const struct ip_conntrack *ct)
134{
135 struct ip_conntrack_protocol *proto = ip_conntrack_proto_find_get(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum);
136
137 struct nfattr *nest_proto;
138 int ret;
139
140 if (!proto->to_nfattr) {
141 ip_conntrack_proto_put(proto);
142 return 0;
143 }
144
145 nest_proto = NFA_NEST(skb, CTA_PROTOINFO);
146
147 ret = proto->to_nfattr(skb, nest_proto, ct);
148
149 ip_conntrack_proto_put(proto);
150
151 NFA_NEST_END(skb, nest_proto);
152
153 return ret;
154
155nfattr_failure:
156 ip_conntrack_proto_put(proto);
157 return -1;
158}
159
160static inline int
161ctnetlink_dump_helpinfo(struct sk_buff *skb, const struct ip_conntrack *ct)
162{
163 struct nfattr *nest_helper;
164
165 if (!ct->helper)
166 return 0;
167
168 nest_helper = NFA_NEST(skb, CTA_HELP);
169 NFA_PUT(skb, CTA_HELP_NAME, strlen(ct->helper->name), ct->helper->name);
170
171 if (ct->helper->to_nfattr)
172 ct->helper->to_nfattr(skb, ct);
173
174 NFA_NEST_END(skb, nest_helper);
175
176 return 0;
177
178nfattr_failure:
179 return -1;
180}
181
182#ifdef CONFIG_IP_NF_CT_ACCT
183static inline int
184ctnetlink_dump_counters(struct sk_buff *skb, const struct ip_conntrack *ct,
185 enum ip_conntrack_dir dir)
186{
187 enum ctattr_type type = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG;
188 struct nfattr *nest_count = NFA_NEST(skb, type);
189 __be32 tmp;
190
191 tmp = htonl(ct->counters[dir].packets);
192 NFA_PUT(skb, CTA_COUNTERS32_PACKETS, sizeof(__be32), &tmp);
193
194 tmp = htonl(ct->counters[dir].bytes);
195 NFA_PUT(skb, CTA_COUNTERS32_BYTES, sizeof(__be32), &tmp);
196
197 NFA_NEST_END(skb, nest_count);
198
199 return 0;
200
201nfattr_failure:
202 return -1;
203}
204#else
205#define ctnetlink_dump_counters(a, b, c) (0)
206#endif
207
208#ifdef CONFIG_IP_NF_CONNTRACK_MARK
209static inline int
210ctnetlink_dump_mark(struct sk_buff *skb, const struct ip_conntrack *ct)
211{
212 __be32 mark = htonl(ct->mark);
213
214 NFA_PUT(skb, CTA_MARK, sizeof(__be32), &mark);
215 return 0;
216
217nfattr_failure:
218 return -1;
219}
220#else
221#define ctnetlink_dump_mark(a, b) (0)
222#endif
223
224static inline int
225ctnetlink_dump_id(struct sk_buff *skb, const struct ip_conntrack *ct)
226{
227 __be32 id = htonl(ct->id);
228 NFA_PUT(skb, CTA_ID, sizeof(__be32), &id);
229 return 0;
230
231nfattr_failure:
232 return -1;
233}
234
235static inline int
236ctnetlink_dump_use(struct sk_buff *skb, const struct ip_conntrack *ct)
237{
238 __be32 use = htonl(atomic_read(&ct->ct_general.use));
239
240 NFA_PUT(skb, CTA_USE, sizeof(__be32), &use);
241 return 0;
242
243nfattr_failure:
244 return -1;
245}
246
247#define tuple(ct, dir) (&(ct)->tuplehash[dir].tuple)
248
249static int
250ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
251 int event, int nowait,
252 const struct ip_conntrack *ct)
253{
254 struct nlmsghdr *nlh;
255 struct nfgenmsg *nfmsg;
256 struct nfattr *nest_parms;
257 unsigned char *b;
258
259 b = skb->tail;
260
261 event |= NFNL_SUBSYS_CTNETLINK << 8;
262 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg));
263 nfmsg = NLMSG_DATA(nlh);
264
265 nlh->nlmsg_flags = (nowait && pid) ? NLM_F_MULTI : 0;
266 nfmsg->nfgen_family = AF_INET;
267 nfmsg->version = NFNETLINK_V0;
268 nfmsg->res_id = 0;
269
270 nest_parms = NFA_NEST(skb, CTA_TUPLE_ORIG);
271 if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_ORIGINAL)) < 0)
272 goto nfattr_failure;
273 NFA_NEST_END(skb, nest_parms);
274
275 nest_parms = NFA_NEST(skb, CTA_TUPLE_REPLY);
276 if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0)
277 goto nfattr_failure;
278 NFA_NEST_END(skb, nest_parms);
279
280 if (ctnetlink_dump_status(skb, ct) < 0 ||
281 ctnetlink_dump_timeout(skb, ct) < 0 ||
282 ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
283 ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 ||
284 ctnetlink_dump_protoinfo(skb, ct) < 0 ||
285 ctnetlink_dump_helpinfo(skb, ct) < 0 ||
286 ctnetlink_dump_mark(skb, ct) < 0 ||
287 ctnetlink_dump_id(skb, ct) < 0 ||
288 ctnetlink_dump_use(skb, ct) < 0)
289 goto nfattr_failure;
290
291 nlh->nlmsg_len = skb->tail - b;
292 return skb->len;
293
294nlmsg_failure:
295nfattr_failure:
296 skb_trim(skb, b - skb->data);
297 return -1;
298}
299
300#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
301static int ctnetlink_conntrack_event(struct notifier_block *this,
302 unsigned long events, void *ptr)
303{
304 struct nlmsghdr *nlh;
305 struct nfgenmsg *nfmsg;
306 struct nfattr *nest_parms;
307 struct ip_conntrack *ct = (struct ip_conntrack *)ptr;
308 struct sk_buff *skb;
309 unsigned int type;
310 unsigned char *b;
311 unsigned int flags = 0, group;
312
313 /* ignore our fake conntrack entry */
314 if (ct == &ip_conntrack_untracked)
315 return NOTIFY_DONE;
316
317 if (events & IPCT_DESTROY) {
318 type = IPCTNL_MSG_CT_DELETE;
319 group = NFNLGRP_CONNTRACK_DESTROY;
320 } else if (events & (IPCT_NEW | IPCT_RELATED)) {
321 type = IPCTNL_MSG_CT_NEW;
322 flags = NLM_F_CREATE|NLM_F_EXCL;
323 group = NFNLGRP_CONNTRACK_NEW;
324 } else if (events & (IPCT_STATUS | IPCT_PROTOINFO)) {
325 type = IPCTNL_MSG_CT_NEW;
326 group = NFNLGRP_CONNTRACK_UPDATE;
327 } else
328 return NOTIFY_DONE;
329
330 if (!nfnetlink_has_listeners(group))
331 return NOTIFY_DONE;
332
333 skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
334 if (!skb)
335 return NOTIFY_DONE;
336
337 b = skb->tail;
338
339 type |= NFNL_SUBSYS_CTNETLINK << 8;
340 nlh = NLMSG_PUT(skb, 0, 0, type, sizeof(struct nfgenmsg));
341 nfmsg = NLMSG_DATA(nlh);
342
343 nlh->nlmsg_flags = flags;
344 nfmsg->nfgen_family = AF_INET;
345 nfmsg->version = NFNETLINK_V0;
346 nfmsg->res_id = 0;
347
348 nest_parms = NFA_NEST(skb, CTA_TUPLE_ORIG);
349 if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_ORIGINAL)) < 0)
350 goto nfattr_failure;
351 NFA_NEST_END(skb, nest_parms);
352
353 nest_parms = NFA_NEST(skb, CTA_TUPLE_REPLY);
354 if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0)
355 goto nfattr_failure;
356 NFA_NEST_END(skb, nest_parms);
357
358 if (events & IPCT_DESTROY) {
359 if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
360 ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0)
361 goto nfattr_failure;
362 } else {
363 if (ctnetlink_dump_status(skb, ct) < 0)
364 goto nfattr_failure;
365
366 if (ctnetlink_dump_timeout(skb, ct) < 0)
367 goto nfattr_failure;
368
369 if (events & IPCT_PROTOINFO
370 && ctnetlink_dump_protoinfo(skb, ct) < 0)
371 goto nfattr_failure;
372
373 if ((events & IPCT_HELPER || ct->helper)
374 && ctnetlink_dump_helpinfo(skb, ct) < 0)
375 goto nfattr_failure;
376
377#ifdef CONFIG_IP_NF_CONNTRACK_MARK
378 if ((events & IPCT_MARK || ct->mark)
379 && ctnetlink_dump_mark(skb, ct) < 0)
380 goto nfattr_failure;
381#endif
382
383 if (events & IPCT_COUNTER_FILLING &&
384 (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
385 ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0))
386 goto nfattr_failure;
387 }
388
389 nlh->nlmsg_len = skb->tail - b;
390 nfnetlink_send(skb, 0, group, 0);
391 return NOTIFY_DONE;
392
393nlmsg_failure:
394nfattr_failure:
395 kfree_skb(skb);
396 return NOTIFY_DONE;
397}
398#endif /* CONFIG_IP_NF_CONNTRACK_EVENTS */
399
400static int ctnetlink_done(struct netlink_callback *cb)
401{
402 if (cb->args[1])
403 ip_conntrack_put((struct ip_conntrack *)cb->args[1]);
404 return 0;
405}
406
407static int
408ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
409{
410 struct ip_conntrack *ct, *last;
411 struct ip_conntrack_tuple_hash *h;
412 struct list_head *i;
413
414 read_lock_bh(&ip_conntrack_lock);
415 last = (struct ip_conntrack *)cb->args[1];
416 for (; cb->args[0] < ip_conntrack_htable_size; cb->args[0]++) {
417restart:
418 list_for_each_prev(i, &ip_conntrack_hash[cb->args[0]]) {
419 h = (struct ip_conntrack_tuple_hash *) i;
420 if (DIRECTION(h) != IP_CT_DIR_ORIGINAL)
421 continue;
422 ct = tuplehash_to_ctrack(h);
423 if (cb->args[1]) {
424 if (ct != last)
425 continue;
426 cb->args[1] = 0;
427 }
428 if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid,
429 cb->nlh->nlmsg_seq,
430 IPCTNL_MSG_CT_NEW,
431 1, ct) < 0) {
432 nf_conntrack_get(&ct->ct_general);
433 cb->args[1] = (unsigned long)ct;
434 goto out;
435 }
436#ifdef CONFIG_NF_CT_ACCT
437 if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) ==
438 IPCTNL_MSG_CT_GET_CTRZERO)
439 memset(&ct->counters, 0, sizeof(ct->counters));
440#endif
441 }
442 if (cb->args[1]) {
443 cb->args[1] = 0;
444 goto restart;
445 }
446 }
447out:
448 read_unlock_bh(&ip_conntrack_lock);
449 if (last)
450 ip_conntrack_put(last);
451
452 return skb->len;
453}
454
455static const size_t cta_min_ip[CTA_IP_MAX] = {
456 [CTA_IP_V4_SRC-1] = sizeof(__be32),
457 [CTA_IP_V4_DST-1] = sizeof(__be32),
458};
459
460static inline int
461ctnetlink_parse_tuple_ip(struct nfattr *attr, struct ip_conntrack_tuple *tuple)
462{
463 struct nfattr *tb[CTA_IP_MAX];
464
465 nfattr_parse_nested(tb, CTA_IP_MAX, attr);
466
467 if (nfattr_bad_size(tb, CTA_IP_MAX, cta_min_ip))
468 return -EINVAL;
469
470 if (!tb[CTA_IP_V4_SRC-1])
471 return -EINVAL;
472 tuple->src.ip = *(__be32 *)NFA_DATA(tb[CTA_IP_V4_SRC-1]);
473
474 if (!tb[CTA_IP_V4_DST-1])
475 return -EINVAL;
476 tuple->dst.ip = *(__be32 *)NFA_DATA(tb[CTA_IP_V4_DST-1]);
477
478 return 0;
479}
480
481static const size_t cta_min_proto[CTA_PROTO_MAX] = {
482 [CTA_PROTO_NUM-1] = sizeof(u_int8_t),
483 [CTA_PROTO_SRC_PORT-1] = sizeof(u_int16_t),
484 [CTA_PROTO_DST_PORT-1] = sizeof(u_int16_t),
485 [CTA_PROTO_ICMP_TYPE-1] = sizeof(u_int8_t),
486 [CTA_PROTO_ICMP_CODE-1] = sizeof(u_int8_t),
487 [CTA_PROTO_ICMP_ID-1] = sizeof(u_int16_t),
488};
489
490static inline int
491ctnetlink_parse_tuple_proto(struct nfattr *attr,
492 struct ip_conntrack_tuple *tuple)
493{
494 struct nfattr *tb[CTA_PROTO_MAX];
495 struct ip_conntrack_protocol *proto;
496 int ret = 0;
497
498 nfattr_parse_nested(tb, CTA_PROTO_MAX, attr);
499
500 if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto))
501 return -EINVAL;
502
503 if (!tb[CTA_PROTO_NUM-1])
504 return -EINVAL;
505 tuple->dst.protonum = *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_NUM-1]);
506
507 proto = ip_conntrack_proto_find_get(tuple->dst.protonum);
508
509 if (likely(proto->nfattr_to_tuple))
510 ret = proto->nfattr_to_tuple(tb, tuple);
511
512 ip_conntrack_proto_put(proto);
513
514 return ret;
515}
516
517static inline int
518ctnetlink_parse_tuple(struct nfattr *cda[], struct ip_conntrack_tuple *tuple,
519 enum ctattr_tuple type)
520{
521 struct nfattr *tb[CTA_TUPLE_MAX];
522 int err;
523
524 memset(tuple, 0, sizeof(*tuple));
525
526 nfattr_parse_nested(tb, CTA_TUPLE_MAX, cda[type-1]);
527
528 if (!tb[CTA_TUPLE_IP-1])
529 return -EINVAL;
530
531 err = ctnetlink_parse_tuple_ip(tb[CTA_TUPLE_IP-1], tuple);
532 if (err < 0)
533 return err;
534
535 if (!tb[CTA_TUPLE_PROTO-1])
536 return -EINVAL;
537
538 err = ctnetlink_parse_tuple_proto(tb[CTA_TUPLE_PROTO-1], tuple);
539 if (err < 0)
540 return err;
541
542 /* orig and expect tuples get DIR_ORIGINAL */
543 if (type == CTA_TUPLE_REPLY)
544 tuple->dst.dir = IP_CT_DIR_REPLY;
545 else
546 tuple->dst.dir = IP_CT_DIR_ORIGINAL;
547
548 return 0;
549}
550
551#ifdef CONFIG_IP_NF_NAT_NEEDED
552static const size_t cta_min_protonat[CTA_PROTONAT_MAX] = {
553 [CTA_PROTONAT_PORT_MIN-1] = sizeof(u_int16_t),
554 [CTA_PROTONAT_PORT_MAX-1] = sizeof(u_int16_t),
555};
556
557static int ctnetlink_parse_nat_proto(struct nfattr *attr,
558 const struct ip_conntrack *ct,
559 struct ip_nat_range *range)
560{
561 struct nfattr *tb[CTA_PROTONAT_MAX];
562 struct ip_nat_protocol *npt;
563
564 nfattr_parse_nested(tb, CTA_PROTONAT_MAX, attr);
565
566 if (nfattr_bad_size(tb, CTA_PROTONAT_MAX, cta_min_protonat))
567 return -EINVAL;
568
569 npt = ip_nat_proto_find_get(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum);
570
571 if (!npt->nfattr_to_range) {
572 ip_nat_proto_put(npt);
573 return 0;
574 }
575
576 /* nfattr_to_range returns 1 if it parsed, 0 if not, neg. on error */
577 if (npt->nfattr_to_range(tb, range) > 0)
578 range->flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
579
580 ip_nat_proto_put(npt);
581
582 return 0;
583}
584
585static const size_t cta_min_nat[CTA_NAT_MAX] = {
586 [CTA_NAT_MINIP-1] = sizeof(__be32),
587 [CTA_NAT_MAXIP-1] = sizeof(__be32),
588};
589
590static inline int
591ctnetlink_parse_nat(struct nfattr *nat,
592 const struct ip_conntrack *ct, struct ip_nat_range *range)
593{
594 struct nfattr *tb[CTA_NAT_MAX];
595 int err;
596
597 memset(range, 0, sizeof(*range));
598
599 nfattr_parse_nested(tb, CTA_NAT_MAX, nat);
600
601 if (nfattr_bad_size(tb, CTA_NAT_MAX, cta_min_nat))
602 return -EINVAL;
603
604 if (tb[CTA_NAT_MINIP-1])
605 range->min_ip = *(__be32 *)NFA_DATA(tb[CTA_NAT_MINIP-1]);
606
607 if (!tb[CTA_NAT_MAXIP-1])
608 range->max_ip = range->min_ip;
609 else
610 range->max_ip = *(__be32 *)NFA_DATA(tb[CTA_NAT_MAXIP-1]);
611
612 if (range->min_ip)
613 range->flags |= IP_NAT_RANGE_MAP_IPS;
614
615 if (!tb[CTA_NAT_PROTO-1])
616 return 0;
617
618 err = ctnetlink_parse_nat_proto(tb[CTA_NAT_PROTO-1], ct, range);
619 if (err < 0)
620 return err;
621
622 return 0;
623}
624#endif
625
626static inline int
627ctnetlink_parse_help(struct nfattr *attr, char **helper_name)
628{
629 struct nfattr *tb[CTA_HELP_MAX];
630
631 nfattr_parse_nested(tb, CTA_HELP_MAX, attr);
632
633 if (!tb[CTA_HELP_NAME-1])
634 return -EINVAL;
635
636 *helper_name = NFA_DATA(tb[CTA_HELP_NAME-1]);
637
638 return 0;
639}
640
641static const size_t cta_min[CTA_MAX] = {
642 [CTA_STATUS-1] = sizeof(__be32),
643 [CTA_TIMEOUT-1] = sizeof(__be32),
644 [CTA_MARK-1] = sizeof(__be32),
645 [CTA_USE-1] = sizeof(__be32),
646 [CTA_ID-1] = sizeof(__be32)
647};
648
649static int
650ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
651 struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
652{
653 struct ip_conntrack_tuple_hash *h;
654 struct ip_conntrack_tuple tuple;
655 struct ip_conntrack *ct;
656 int err = 0;
657
658 if (nfattr_bad_size(cda, CTA_MAX, cta_min))
659 return -EINVAL;
660
661 if (cda[CTA_TUPLE_ORIG-1])
662 err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG);
663 else if (cda[CTA_TUPLE_REPLY-1])
664 err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY);
665 else {
666 /* Flush the whole table */
667 ip_conntrack_flush();
668 return 0;
669 }
670
671 if (err < 0)
672 return err;
673
674 h = ip_conntrack_find_get(&tuple, NULL);
675 if (!h)
676 return -ENOENT;
677
678 ct = tuplehash_to_ctrack(h);
679
680 if (cda[CTA_ID-1]) {
681 u_int32_t id = ntohl(*(__be32 *)NFA_DATA(cda[CTA_ID-1]));
682 if (ct->id != id) {
683 ip_conntrack_put(ct);
684 return -ENOENT;
685 }
686 }
687 if (del_timer(&ct->timeout))
688 ct->timeout.function((unsigned long)ct);
689
690 ip_conntrack_put(ct);
691
692 return 0;
693}
694
695static int
696ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
697 struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
698{
699 struct ip_conntrack_tuple_hash *h;
700 struct ip_conntrack_tuple tuple;
701 struct ip_conntrack *ct;
702 struct sk_buff *skb2 = NULL;
703 int err = 0;
704
705 if (nlh->nlmsg_flags & NLM_F_DUMP) {
706 struct nfgenmsg *msg = NLMSG_DATA(nlh);
707 u32 rlen;
708
709 if (msg->nfgen_family != AF_INET)
710 return -EAFNOSUPPORT;
711
712#ifndef CONFIG_IP_NF_CT_ACCT
713 if (NFNL_MSG_TYPE(nlh->nlmsg_type) == IPCTNL_MSG_CT_GET_CTRZERO)
714 return -ENOTSUPP;
715#endif
716 if ((*errp = netlink_dump_start(ctnl, skb, nlh,
717 ctnetlink_dump_table,
718 ctnetlink_done)) != 0)
719 return -EINVAL;
720
721 rlen = NLMSG_ALIGN(nlh->nlmsg_len);
722 if (rlen > skb->len)
723 rlen = skb->len;
724 skb_pull(skb, rlen);
725 return 0;
726 }
727
728 if (nfattr_bad_size(cda, CTA_MAX, cta_min))
729 return -EINVAL;
730
731 if (cda[CTA_TUPLE_ORIG-1])
732 err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG);
733 else if (cda[CTA_TUPLE_REPLY-1])
734 err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY);
735 else
736 return -EINVAL;
737
738 if (err < 0)
739 return err;
740
741 h = ip_conntrack_find_get(&tuple, NULL);
742 if (!h)
743 return -ENOENT;
744
745 ct = tuplehash_to_ctrack(h);
746
747 err = -ENOMEM;
748 skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
749 if (!skb2) {
750 ip_conntrack_put(ct);
751 return -ENOMEM;
752 }
753
754 err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq,
755 IPCTNL_MSG_CT_NEW, 1, ct);
756 ip_conntrack_put(ct);
757 if (err <= 0)
758 goto free;
759
760 err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
761 if (err < 0)
762 goto out;
763
764 return 0;
765
766free:
767 kfree_skb(skb2);
768out:
769 return err;
770}
771
772static inline int
773ctnetlink_change_status(struct ip_conntrack *ct, struct nfattr *cda[])
774{
775 unsigned long d;
776 unsigned status = ntohl(*(__be32 *)NFA_DATA(cda[CTA_STATUS-1]));
777 d = ct->status ^ status;
778
779 if (d & (IPS_EXPECTED|IPS_CONFIRMED|IPS_DYING))
780 /* unchangeable */
781 return -EINVAL;
782
783 if (d & IPS_SEEN_REPLY && !(status & IPS_SEEN_REPLY))
784 /* SEEN_REPLY bit can only be set */
785 return -EINVAL;
786
787
788 if (d & IPS_ASSURED && !(status & IPS_ASSURED))
789 /* ASSURED bit can only be set */
790 return -EINVAL;
791
792 if (cda[CTA_NAT_SRC-1] || cda[CTA_NAT_DST-1]) {
793#ifndef CONFIG_IP_NF_NAT_NEEDED
794 return -EINVAL;
795#else
796 struct ip_nat_range range;
797
798 if (cda[CTA_NAT_DST-1]) {
799 if (ctnetlink_parse_nat(cda[CTA_NAT_DST-1], ct,
800 &range) < 0)
801 return -EINVAL;
802 if (ip_nat_initialized(ct,
803 HOOK2MANIP(NF_IP_PRE_ROUTING)))
804 return -EEXIST;
805 ip_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING);
806 }
807 if (cda[CTA_NAT_SRC-1]) {
808 if (ctnetlink_parse_nat(cda[CTA_NAT_SRC-1], ct,
809 &range) < 0)
810 return -EINVAL;
811 if (ip_nat_initialized(ct,
812 HOOK2MANIP(NF_IP_POST_ROUTING)))
813 return -EEXIST;
814 ip_nat_setup_info(ct, &range, NF_IP_POST_ROUTING);
815 }
816#endif
817 }
818
819 /* Be careful here, modifying NAT bits can screw up things,
820 * so don't let users modify them directly if they don't pass
821 * ip_nat_range. */
822 ct->status |= status & ~(IPS_NAT_DONE_MASK | IPS_NAT_MASK);
823 return 0;
824}
825
826
827static inline int
828ctnetlink_change_helper(struct ip_conntrack *ct, struct nfattr *cda[])
829{
830 struct ip_conntrack_helper *helper;
831 char *helpname;
832 int err;
833
834 /* don't change helper of sibling connections */
835 if (ct->master)
836 return -EINVAL;
837
838 err = ctnetlink_parse_help(cda[CTA_HELP-1], &helpname);
839 if (err < 0)
840 return err;
841
842 helper = __ip_conntrack_helper_find_byname(helpname);
843 if (!helper) {
844 if (!strcmp(helpname, ""))
845 helper = NULL;
846 else
847 return -EINVAL;
848 }
849
850 if (ct->helper) {
851 if (!helper) {
852 /* we had a helper before ... */
853 ip_ct_remove_expectations(ct);
854 ct->helper = NULL;
855 } else {
856 /* need to zero data of old helper */
857 memset(&ct->help, 0, sizeof(ct->help));
858 }
859 }
860
861 ct->helper = helper;
862
863 return 0;
864}
865
866static inline int
867ctnetlink_change_timeout(struct ip_conntrack *ct, struct nfattr *cda[])
868{
869 u_int32_t timeout = ntohl(*(__be32 *)NFA_DATA(cda[CTA_TIMEOUT-1]));
870
871 if (!del_timer(&ct->timeout))
872 return -ETIME;
873
874 ct->timeout.expires = jiffies + timeout * HZ;
875 add_timer(&ct->timeout);
876
877 return 0;
878}
879
880static inline int
881ctnetlink_change_protoinfo(struct ip_conntrack *ct, struct nfattr *cda[])
882{
883 struct nfattr *tb[CTA_PROTOINFO_MAX], *attr = cda[CTA_PROTOINFO-1];
884 struct ip_conntrack_protocol *proto;
885 u_int16_t npt = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum;
886 int err = 0;
887
888 nfattr_parse_nested(tb, CTA_PROTOINFO_MAX, attr);
889
890 proto = ip_conntrack_proto_find_get(npt);
891
892 if (proto->from_nfattr)
893 err = proto->from_nfattr(tb, ct);
894 ip_conntrack_proto_put(proto);
895
896 return err;
897}
898
899static int
900ctnetlink_change_conntrack(struct ip_conntrack *ct, struct nfattr *cda[])
901{
902 int err;
903
904 if (cda[CTA_HELP-1]) {
905 err = ctnetlink_change_helper(ct, cda);
906 if (err < 0)
907 return err;
908 }
909
910 if (cda[CTA_TIMEOUT-1]) {
911 err = ctnetlink_change_timeout(ct, cda);
912 if (err < 0)
913 return err;
914 }
915
916 if (cda[CTA_STATUS-1]) {
917 err = ctnetlink_change_status(ct, cda);
918 if (err < 0)
919 return err;
920 }
921
922 if (cda[CTA_PROTOINFO-1]) {
923 err = ctnetlink_change_protoinfo(ct, cda);
924 if (err < 0)
925 return err;
926 }
927
928#if defined(CONFIG_IP_NF_CONNTRACK_MARK)
929 if (cda[CTA_MARK-1])
930 ct->mark = ntohl(*(__be32 *)NFA_DATA(cda[CTA_MARK-1]));
931#endif
932
933 return 0;
934}
935
936static int
937ctnetlink_create_conntrack(struct nfattr *cda[],
938 struct ip_conntrack_tuple *otuple,
939 struct ip_conntrack_tuple *rtuple)
940{
941 struct ip_conntrack *ct;
942 int err = -EINVAL;
943
944 ct = ip_conntrack_alloc(otuple, rtuple);
945 if (ct == NULL || IS_ERR(ct))
946 return -ENOMEM;
947
948 if (!cda[CTA_TIMEOUT-1])
949 goto err;
950 ct->timeout.expires = ntohl(*(__be32 *)NFA_DATA(cda[CTA_TIMEOUT-1]));
951
952 ct->timeout.expires = jiffies + ct->timeout.expires * HZ;
953 ct->status |= IPS_CONFIRMED;
954
955 if (cda[CTA_STATUS-1]) {
956 err = ctnetlink_change_status(ct, cda);
957 if (err < 0)
958 goto err;
959 }
960
961 if (cda[CTA_PROTOINFO-1]) {
962 err = ctnetlink_change_protoinfo(ct, cda);
963 if (err < 0)
964 goto err;
965 }
966
967#if defined(CONFIG_IP_NF_CONNTRACK_MARK)
968 if (cda[CTA_MARK-1])
969 ct->mark = ntohl(*(__be32 *)NFA_DATA(cda[CTA_MARK-1]));
970#endif
971
972 ct->helper = ip_conntrack_helper_find_get(rtuple);
973
974 add_timer(&ct->timeout);
975 ip_conntrack_hash_insert(ct);
976
977 if (ct->helper)
978 ip_conntrack_helper_put(ct->helper);
979
980 return 0;
981
982err:
983 ip_conntrack_free(ct);
984 return err;
985}
986
987static int
988ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
989 struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
990{
991 struct ip_conntrack_tuple otuple, rtuple;
992 struct ip_conntrack_tuple_hash *h = NULL;
993 int err = 0;
994
995 if (nfattr_bad_size(cda, CTA_MAX, cta_min))
996 return -EINVAL;
997
998 if (cda[CTA_TUPLE_ORIG-1]) {
999 err = ctnetlink_parse_tuple(cda, &otuple, CTA_TUPLE_ORIG);
1000 if (err < 0)
1001 return err;
1002 }
1003
1004 if (cda[CTA_TUPLE_REPLY-1]) {
1005 err = ctnetlink_parse_tuple(cda, &rtuple, CTA_TUPLE_REPLY);
1006 if (err < 0)
1007 return err;
1008 }
1009
1010 write_lock_bh(&ip_conntrack_lock);
1011 if (cda[CTA_TUPLE_ORIG-1])
1012 h = __ip_conntrack_find(&otuple, NULL);
1013 else if (cda[CTA_TUPLE_REPLY-1])
1014 h = __ip_conntrack_find(&rtuple, NULL);
1015
1016 if (h == NULL) {
1017 write_unlock_bh(&ip_conntrack_lock);
1018 err = -ENOENT;
1019 if (nlh->nlmsg_flags & NLM_F_CREATE)
1020 err = ctnetlink_create_conntrack(cda, &otuple, &rtuple);
1021 return err;
1022 }
1023 /* implicit 'else' */
1024
1025 /* we only allow nat config for new conntracks */
1026 if (cda[CTA_NAT_SRC-1] || cda[CTA_NAT_DST-1]) {
1027 err = -EINVAL;
1028 goto out_unlock;
1029 }
1030
1031 /* We manipulate the conntrack inside the global conntrack table lock,
1032 * so there's no need to increase the refcount */
1033 err = -EEXIST;
1034 if (!(nlh->nlmsg_flags & NLM_F_EXCL))
1035 err = ctnetlink_change_conntrack(tuplehash_to_ctrack(h), cda);
1036
1037out_unlock:
1038 write_unlock_bh(&ip_conntrack_lock);
1039 return err;
1040}
1041
1042/***********************************************************************
1043 * EXPECT
1044 ***********************************************************************/
1045
1046static inline int
1047ctnetlink_exp_dump_tuple(struct sk_buff *skb,
1048 const struct ip_conntrack_tuple *tuple,
1049 enum ctattr_expect type)
1050{
1051 struct nfattr *nest_parms = NFA_NEST(skb, type);
1052
1053 if (ctnetlink_dump_tuples(skb, tuple) < 0)
1054 goto nfattr_failure;
1055
1056 NFA_NEST_END(skb, nest_parms);
1057
1058 return 0;
1059
1060nfattr_failure:
1061 return -1;
1062}
1063
1064static inline int
1065ctnetlink_exp_dump_mask(struct sk_buff *skb,
1066 const struct ip_conntrack_tuple *tuple,
1067 const struct ip_conntrack_tuple *mask)
1068{
1069 int ret;
1070 struct ip_conntrack_protocol *proto;
1071 struct nfattr *nest_parms = NFA_NEST(skb, CTA_EXPECT_MASK);
1072
1073 ret = ctnetlink_dump_tuples_ip(skb, mask);
1074 if (unlikely(ret < 0))
1075 goto nfattr_failure;
1076
1077 proto = ip_conntrack_proto_find_get(tuple->dst.protonum);
1078 ret = ctnetlink_dump_tuples_proto(skb, mask, proto);
1079 ip_conntrack_proto_put(proto);
1080 if (unlikely(ret < 0))
1081 goto nfattr_failure;
1082
1083 NFA_NEST_END(skb, nest_parms);
1084
1085 return 0;
1086
1087nfattr_failure:
1088 return -1;
1089}
1090
1091static inline int
1092ctnetlink_exp_dump_expect(struct sk_buff *skb,
1093 const struct ip_conntrack_expect *exp)
1094{
1095 struct ip_conntrack *master = exp->master;
1096 __be32 timeout = htonl((exp->timeout.expires - jiffies) / HZ);
1097 __be32 id = htonl(exp->id);
1098
1099 if (ctnetlink_exp_dump_tuple(skb, &exp->tuple, CTA_EXPECT_TUPLE) < 0)
1100 goto nfattr_failure;
1101 if (ctnetlink_exp_dump_mask(skb, &exp->tuple, &exp->mask) < 0)
1102 goto nfattr_failure;
1103 if (ctnetlink_exp_dump_tuple(skb,
1104 &master->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
1105 CTA_EXPECT_MASTER) < 0)
1106 goto nfattr_failure;
1107
1108 NFA_PUT(skb, CTA_EXPECT_TIMEOUT, sizeof(__be32), &timeout);
1109 NFA_PUT(skb, CTA_EXPECT_ID, sizeof(__be32), &id);
1110
1111 return 0;
1112
1113nfattr_failure:
1114 return -1;
1115}
1116
1117static int
1118ctnetlink_exp_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
1119 int event,
1120 int nowait,
1121 const struct ip_conntrack_expect *exp)
1122{
1123 struct nlmsghdr *nlh;
1124 struct nfgenmsg *nfmsg;
1125 unsigned char *b;
1126
1127 b = skb->tail;
1128
1129 event |= NFNL_SUBSYS_CTNETLINK_EXP << 8;
1130 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg));
1131 nfmsg = NLMSG_DATA(nlh);
1132
1133 nlh->nlmsg_flags = (nowait && pid) ? NLM_F_MULTI : 0;
1134 nfmsg->nfgen_family = AF_INET;
1135 nfmsg->version = NFNETLINK_V0;
1136 nfmsg->res_id = 0;
1137
1138 if (ctnetlink_exp_dump_expect(skb, exp) < 0)
1139 goto nfattr_failure;
1140
1141 nlh->nlmsg_len = skb->tail - b;
1142 return skb->len;
1143
1144nlmsg_failure:
1145nfattr_failure:
1146 skb_trim(skb, b - skb->data);
1147 return -1;
1148}
1149
1150#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
1151static int ctnetlink_expect_event(struct notifier_block *this,
1152 unsigned long events, void *ptr)
1153{
1154 struct nlmsghdr *nlh;
1155 struct nfgenmsg *nfmsg;
1156 struct ip_conntrack_expect *exp = (struct ip_conntrack_expect *)ptr;
1157 struct sk_buff *skb;
1158 unsigned int type;
1159 unsigned char *b;
1160 int flags = 0;
1161
1162 if (events & IPEXP_NEW) {
1163 type = IPCTNL_MSG_EXP_NEW;
1164 flags = NLM_F_CREATE|NLM_F_EXCL;
1165 } else
1166 return NOTIFY_DONE;
1167
1168 if (!nfnetlink_has_listeners(NFNLGRP_CONNTRACK_EXP_NEW))
1169 return NOTIFY_DONE;
1170
1171 skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
1172 if (!skb)
1173 return NOTIFY_DONE;
1174
1175 b = skb->tail;
1176
1177 type |= NFNL_SUBSYS_CTNETLINK_EXP << 8;
1178 nlh = NLMSG_PUT(skb, 0, 0, type, sizeof(struct nfgenmsg));
1179 nfmsg = NLMSG_DATA(nlh);
1180
1181 nlh->nlmsg_flags = flags;
1182 nfmsg->nfgen_family = AF_INET;
1183 nfmsg->version = NFNETLINK_V0;
1184 nfmsg->res_id = 0;
1185
1186 if (ctnetlink_exp_dump_expect(skb, exp) < 0)
1187 goto nfattr_failure;
1188
1189 nlh->nlmsg_len = skb->tail - b;
1190 nfnetlink_send(skb, 0, NFNLGRP_CONNTRACK_EXP_NEW, 0);
1191 return NOTIFY_DONE;
1192
1193nlmsg_failure:
1194nfattr_failure:
1195 kfree_skb(skb);
1196 return NOTIFY_DONE;
1197}
1198#endif
1199
1200static int
1201ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
1202{
1203 struct ip_conntrack_expect *exp = NULL;
1204 struct list_head *i;
1205 u_int32_t *id = (u_int32_t *) &cb->args[0];
1206
1207 read_lock_bh(&ip_conntrack_lock);
1208 list_for_each_prev(i, &ip_conntrack_expect_list) {
1209 exp = (struct ip_conntrack_expect *) i;
1210 if (exp->id <= *id)
1211 continue;
1212 if (ctnetlink_exp_fill_info(skb, NETLINK_CB(cb->skb).pid,
1213 cb->nlh->nlmsg_seq,
1214 IPCTNL_MSG_EXP_NEW,
1215 1, exp) < 0)
1216 goto out;
1217 *id = exp->id;
1218 }
1219out:
1220 read_unlock_bh(&ip_conntrack_lock);
1221
1222 return skb->len;
1223}
1224
1225static const size_t cta_min_exp[CTA_EXPECT_MAX] = {
1226 [CTA_EXPECT_TIMEOUT-1] = sizeof(__be32),
1227 [CTA_EXPECT_ID-1] = sizeof(__be32)
1228};
1229
1230static int
1231ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
1232 struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
1233{
1234 struct ip_conntrack_tuple tuple;
1235 struct ip_conntrack_expect *exp;
1236 struct sk_buff *skb2;
1237 int err = 0;
1238
1239 if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp))
1240 return -EINVAL;
1241
1242 if (nlh->nlmsg_flags & NLM_F_DUMP) {
1243 struct nfgenmsg *msg = NLMSG_DATA(nlh);
1244 u32 rlen;
1245
1246 if (msg->nfgen_family != AF_INET)
1247 return -EAFNOSUPPORT;
1248
1249 if ((*errp = netlink_dump_start(ctnl, skb, nlh,
1250 ctnetlink_exp_dump_table,
1251 ctnetlink_done)) != 0)
1252 return -EINVAL;
1253 rlen = NLMSG_ALIGN(nlh->nlmsg_len);
1254 if (rlen > skb->len)
1255 rlen = skb->len;
1256 skb_pull(skb, rlen);
1257 return 0;
1258 }
1259
1260 if (cda[CTA_EXPECT_MASTER-1])
1261 err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER);
1262 else
1263 return -EINVAL;
1264
1265 if (err < 0)
1266 return err;
1267
1268 exp = ip_conntrack_expect_find_get(&tuple);
1269 if (!exp)
1270 return -ENOENT;
1271
1272 if (cda[CTA_EXPECT_ID-1]) {
1273 __be32 id = *(__be32 *)NFA_DATA(cda[CTA_EXPECT_ID-1]);
1274 if (exp->id != ntohl(id)) {
1275 ip_conntrack_expect_put(exp);
1276 return -ENOENT;
1277 }
1278 }
1279
1280 err = -ENOMEM;
1281 skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1282 if (!skb2)
1283 goto out;
1284
1285 err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).pid,
1286 nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW,
1287 1, exp);
1288 if (err <= 0)
1289 goto free;
1290
1291 ip_conntrack_expect_put(exp);
1292
1293 return netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
1294
1295free:
1296 kfree_skb(skb2);
1297out:
1298 ip_conntrack_expect_put(exp);
1299 return err;
1300}
1301
1302static int
1303ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
1304 struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
1305{
1306 struct ip_conntrack_expect *exp, *tmp;
1307 struct ip_conntrack_tuple tuple;
1308 struct ip_conntrack_helper *h;
1309 int err;
1310
1311 if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp))
1312 return -EINVAL;
1313
1314 if (cda[CTA_EXPECT_TUPLE-1]) {
1315 /* delete a single expect by tuple */
1316 err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE);
1317 if (err < 0)
1318 return err;
1319
1320 /* bump usage count to 2 */
1321 exp = ip_conntrack_expect_find_get(&tuple);
1322 if (!exp)
1323 return -ENOENT;
1324
1325 if (cda[CTA_EXPECT_ID-1]) {
1326 __be32 id =
1327 *(__be32 *)NFA_DATA(cda[CTA_EXPECT_ID-1]);
1328 if (exp->id != ntohl(id)) {
1329 ip_conntrack_expect_put(exp);
1330 return -ENOENT;
1331 }
1332 }
1333
1334 /* after list removal, usage count == 1 */
1335 ip_conntrack_unexpect_related(exp);
1336 /* have to put what we 'get' above.
1337 * after this line usage count == 0 */
1338 ip_conntrack_expect_put(exp);
1339 } else if (cda[CTA_EXPECT_HELP_NAME-1]) {
1340 char *name = NFA_DATA(cda[CTA_EXPECT_HELP_NAME-1]);
1341
1342 /* delete all expectations for this helper */
1343 write_lock_bh(&ip_conntrack_lock);
1344 h = __ip_conntrack_helper_find_byname(name);
1345 if (!h) {
1346 write_unlock_bh(&ip_conntrack_lock);
1347 return -EINVAL;
1348 }
1349 list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list,
1350 list) {
1351 if (exp->master->helper == h
1352 && del_timer(&exp->timeout)) {
1353 ip_ct_unlink_expect(exp);
1354 ip_conntrack_expect_put(exp);
1355 }
1356 }
1357 write_unlock_bh(&ip_conntrack_lock);
1358 } else {
1359 /* This basically means we have to flush everything*/
1360 write_lock_bh(&ip_conntrack_lock);
1361 list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list,
1362 list) {
1363 if (del_timer(&exp->timeout)) {
1364 ip_ct_unlink_expect(exp);
1365 ip_conntrack_expect_put(exp);
1366 }
1367 }
1368 write_unlock_bh(&ip_conntrack_lock);
1369 }
1370
1371 return 0;
1372}
1373static int
1374ctnetlink_change_expect(struct ip_conntrack_expect *x, struct nfattr *cda[])
1375{
1376 return -EOPNOTSUPP;
1377}
1378
1379static int
1380ctnetlink_create_expect(struct nfattr *cda[])
1381{
1382 struct ip_conntrack_tuple tuple, mask, master_tuple;
1383 struct ip_conntrack_tuple_hash *h = NULL;
1384 struct ip_conntrack_expect *exp;
1385 struct ip_conntrack *ct;
1386 int err = 0;
1387
1388 /* caller guarantees that those three CTA_EXPECT_* exist */
1389 err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE);
1390 if (err < 0)
1391 return err;
1392 err = ctnetlink_parse_tuple(cda, &mask, CTA_EXPECT_MASK);
1393 if (err < 0)
1394 return err;
1395 err = ctnetlink_parse_tuple(cda, &master_tuple, CTA_EXPECT_MASTER);
1396 if (err < 0)
1397 return err;
1398
1399 /* Look for master conntrack of this expectation */
1400 h = ip_conntrack_find_get(&master_tuple, NULL);
1401 if (!h)
1402 return -ENOENT;
1403 ct = tuplehash_to_ctrack(h);
1404
1405 if (!ct->helper) {
1406 /* such conntrack hasn't got any helper, abort */
1407 err = -EINVAL;
1408 goto out;
1409 }
1410
1411 exp = ip_conntrack_expect_alloc(ct);
1412 if (!exp) {
1413 err = -ENOMEM;
1414 goto out;
1415 }
1416
1417 exp->expectfn = NULL;
1418 exp->flags = 0;
1419 exp->master = ct;
1420 memcpy(&exp->tuple, &tuple, sizeof(struct ip_conntrack_tuple));
1421 memcpy(&exp->mask, &mask, sizeof(struct ip_conntrack_tuple));
1422
1423 err = ip_conntrack_expect_related(exp);
1424 ip_conntrack_expect_put(exp);
1425
1426out:
1427 ip_conntrack_put(tuplehash_to_ctrack(h));
1428 return err;
1429}
1430
1431static int
1432ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
1433 struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
1434{
1435 struct ip_conntrack_tuple tuple;
1436 struct ip_conntrack_expect *exp;
1437 int err = 0;
1438
1439 if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp))
1440 return -EINVAL;
1441
1442 if (!cda[CTA_EXPECT_TUPLE-1]
1443 || !cda[CTA_EXPECT_MASK-1]
1444 || !cda[CTA_EXPECT_MASTER-1])
1445 return -EINVAL;
1446
1447 err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE);
1448 if (err < 0)
1449 return err;
1450
1451 write_lock_bh(&ip_conntrack_lock);
1452 exp = __ip_conntrack_expect_find(&tuple);
1453
1454 if (!exp) {
1455 write_unlock_bh(&ip_conntrack_lock);
1456 err = -ENOENT;
1457 if (nlh->nlmsg_flags & NLM_F_CREATE)
1458 err = ctnetlink_create_expect(cda);
1459 return err;
1460 }
1461
1462 err = -EEXIST;
1463 if (!(nlh->nlmsg_flags & NLM_F_EXCL))
1464 err = ctnetlink_change_expect(exp, cda);
1465 write_unlock_bh(&ip_conntrack_lock);
1466
1467 return err;
1468}
1469
1470#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
1471static struct notifier_block ctnl_notifier = {
1472 .notifier_call = ctnetlink_conntrack_event,
1473};
1474
1475static struct notifier_block ctnl_notifier_exp = {
1476 .notifier_call = ctnetlink_expect_event,
1477};
1478#endif
1479
1480static struct nfnl_callback ctnl_cb[IPCTNL_MSG_MAX] = {
1481 [IPCTNL_MSG_CT_NEW] = { .call = ctnetlink_new_conntrack,
1482 .attr_count = CTA_MAX, },
1483 [IPCTNL_MSG_CT_GET] = { .call = ctnetlink_get_conntrack,
1484 .attr_count = CTA_MAX, },
1485 [IPCTNL_MSG_CT_DELETE] = { .call = ctnetlink_del_conntrack,
1486 .attr_count = CTA_MAX, },
1487 [IPCTNL_MSG_CT_GET_CTRZERO] = { .call = ctnetlink_get_conntrack,
1488 .attr_count = CTA_MAX, },
1489};
1490
1491static struct nfnl_callback ctnl_exp_cb[IPCTNL_MSG_EXP_MAX] = {
1492 [IPCTNL_MSG_EXP_GET] = { .call = ctnetlink_get_expect,
1493 .attr_count = CTA_EXPECT_MAX, },
1494 [IPCTNL_MSG_EXP_NEW] = { .call = ctnetlink_new_expect,
1495 .attr_count = CTA_EXPECT_MAX, },
1496 [IPCTNL_MSG_EXP_DELETE] = { .call = ctnetlink_del_expect,
1497 .attr_count = CTA_EXPECT_MAX, },
1498};
1499
1500static struct nfnetlink_subsystem ctnl_subsys = {
1501 .name = "conntrack",
1502 .subsys_id = NFNL_SUBSYS_CTNETLINK,
1503 .cb_count = IPCTNL_MSG_MAX,
1504 .cb = ctnl_cb,
1505};
1506
1507static struct nfnetlink_subsystem ctnl_exp_subsys = {
1508 .name = "conntrack_expect",
1509 .subsys_id = NFNL_SUBSYS_CTNETLINK_EXP,
1510 .cb_count = IPCTNL_MSG_EXP_MAX,
1511 .cb = ctnl_exp_cb,
1512};
1513
1514MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK);
1515MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK_EXP);
1516
1517static int __init ctnetlink_init(void)
1518{
1519 int ret;
1520
1521 printk("ctnetlink v%s: registering with nfnetlink.\n", version);
1522 ret = nfnetlink_subsys_register(&ctnl_subsys);
1523 if (ret < 0) {
1524 printk("ctnetlink_init: cannot register with nfnetlink.\n");
1525 goto err_out;
1526 }
1527
1528 ret = nfnetlink_subsys_register(&ctnl_exp_subsys);
1529 if (ret < 0) {
1530 printk("ctnetlink_init: cannot register exp with nfnetlink.\n");
1531 goto err_unreg_subsys;
1532 }
1533
1534#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
1535 ret = ip_conntrack_register_notifier(&ctnl_notifier);
1536 if (ret < 0) {
1537 printk("ctnetlink_init: cannot register notifier.\n");
1538 goto err_unreg_exp_subsys;
1539 }
1540
1541 ret = ip_conntrack_expect_register_notifier(&ctnl_notifier_exp);
1542 if (ret < 0) {
1543 printk("ctnetlink_init: cannot expect register notifier.\n");
1544 goto err_unreg_notifier;
1545 }
1546#endif
1547
1548 return 0;
1549
1550#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
1551err_unreg_notifier:
1552 ip_conntrack_unregister_notifier(&ctnl_notifier);
1553err_unreg_exp_subsys:
1554 nfnetlink_subsys_unregister(&ctnl_exp_subsys);
1555#endif
1556err_unreg_subsys:
1557 nfnetlink_subsys_unregister(&ctnl_subsys);
1558err_out:
1559 return ret;
1560}
1561
1562static void __exit ctnetlink_exit(void)
1563{
1564 printk("ctnetlink: unregistering from nfnetlink.\n");
1565
1566#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
1567 ip_conntrack_expect_unregister_notifier(&ctnl_notifier_exp);
1568 ip_conntrack_unregister_notifier(&ctnl_notifier);
1569#endif
1570
1571 nfnetlink_subsys_unregister(&ctnl_exp_subsys);
1572 nfnetlink_subsys_unregister(&ctnl_subsys);
1573 return;
1574}
1575
1576module_init(ctnetlink_init);
1577module_exit(ctnetlink_exit);
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_generic.c b/net/ipv4/netfilter/ip_conntrack_proto_generic.c
deleted file mode 100644
index 88af82e98658..000000000000
--- a/net/ipv4/netfilter/ip_conntrack_proto_generic.c
+++ /dev/null
@@ -1,74 +0,0 @@
1/* (C) 1999-2001 Paul `Rusty' Russell
2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#include <linux/types.h>
10#include <linux/timer.h>
11#include <linux/netfilter.h>
12#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
13
14unsigned int ip_ct_generic_timeout __read_mostly = 600*HZ;
15
16static int generic_pkt_to_tuple(const struct sk_buff *skb,
17 unsigned int dataoff,
18 struct ip_conntrack_tuple *tuple)
19{
20 tuple->src.u.all = 0;
21 tuple->dst.u.all = 0;
22
23 return 1;
24}
25
26static int generic_invert_tuple(struct ip_conntrack_tuple *tuple,
27 const struct ip_conntrack_tuple *orig)
28{
29 tuple->src.u.all = 0;
30 tuple->dst.u.all = 0;
31
32 return 1;
33}
34
35/* Print out the per-protocol part of the tuple. */
36static int generic_print_tuple(struct seq_file *s,
37 const struct ip_conntrack_tuple *tuple)
38{
39 return 0;
40}
41
42/* Print out the private part of the conntrack. */
43static int generic_print_conntrack(struct seq_file *s,
44 const struct ip_conntrack *state)
45{
46 return 0;
47}
48
49/* Returns verdict for packet, or -1 for invalid. */
50static int packet(struct ip_conntrack *conntrack,
51 const struct sk_buff *skb,
52 enum ip_conntrack_info ctinfo)
53{
54 ip_ct_refresh_acct(conntrack, ctinfo, skb, ip_ct_generic_timeout);
55 return NF_ACCEPT;
56}
57
58/* Called when a new connection for this protocol found. */
59static int new(struct ip_conntrack *conntrack, const struct sk_buff *skb)
60{
61 return 1;
62}
63
64struct ip_conntrack_protocol ip_conntrack_generic_protocol =
65{
66 .proto = 0,
67 .name = "unknown",
68 .pkt_to_tuple = generic_pkt_to_tuple,
69 .invert_tuple = generic_invert_tuple,
70 .print_tuple = generic_print_tuple,
71 .print_conntrack = generic_print_conntrack,
72 .packet = packet,
73 .new = new,
74};
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_gre.c b/net/ipv4/netfilter/ip_conntrack_proto_gre.c
deleted file mode 100644
index ac1c49ef36a9..000000000000
--- a/net/ipv4/netfilter/ip_conntrack_proto_gre.c
+++ /dev/null
@@ -1,328 +0,0 @@
1/*
2 * ip_conntrack_proto_gre.c - Version 3.0
3 *
4 * Connection tracking protocol helper module for GRE.
5 *
6 * GRE is a generic encapsulation protocol, which is generally not very
7 * suited for NAT, as it has no protocol-specific part as port numbers.
8 *
9 * It has an optional key field, which may help us distinguishing two
10 * connections between the same two hosts.
11 *
12 * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784
13 *
14 * PPTP is built on top of a modified version of GRE, and has a mandatory
15 * field called "CallID", which serves us for the same purpose as the key
16 * field in plain GRE.
17 *
18 * Documentation about PPTP can be found in RFC 2637
19 *
20 * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
21 *
22 * Development of this code funded by Astaro AG (http://www.astaro.com/)
23 *
24 */
25
26#include <linux/module.h>
27#include <linux/types.h>
28#include <linux/timer.h>
29#include <linux/netfilter.h>
30#include <linux/ip.h>
31#include <linux/in.h>
32#include <linux/list.h>
33#include <linux/seq_file.h>
34#include <linux/interrupt.h>
35
36static DEFINE_RWLOCK(ip_ct_gre_lock);
37
38#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
39#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
40#include <linux/netfilter_ipv4/ip_conntrack_core.h>
41
42#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
43#include <linux/netfilter_ipv4/ip_conntrack_pptp.h>
44
45MODULE_LICENSE("GPL");
46MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
47MODULE_DESCRIPTION("netfilter connection tracking protocol helper for GRE");
48
49/* shamelessly stolen from ip_conntrack_proto_udp.c */
50#define GRE_TIMEOUT (30*HZ)
51#define GRE_STREAM_TIMEOUT (180*HZ)
52
53#if 0
54#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, __FUNCTION__, ## args)
55#define DUMP_TUPLE_GRE(x) printk("%u.%u.%u.%u:0x%x -> %u.%u.%u.%u:0x%x\n", \
56 NIPQUAD((x)->src.ip), ntohs((x)->src.u.gre.key), \
57 NIPQUAD((x)->dst.ip), ntohs((x)->dst.u.gre.key))
58#else
59#define DEBUGP(x, args...)
60#define DUMP_TUPLE_GRE(x)
61#endif
62
63/* GRE KEYMAP HANDLING FUNCTIONS */
64static LIST_HEAD(gre_keymap_list);
65
66static inline int gre_key_cmpfn(const struct ip_ct_gre_keymap *km,
67 const struct ip_conntrack_tuple *t)
68{
69 return ((km->tuple.src.ip == t->src.ip) &&
70 (km->tuple.dst.ip == t->dst.ip) &&
71 (km->tuple.dst.protonum == t->dst.protonum) &&
72 (km->tuple.dst.u.all == t->dst.u.all));
73}
74
75/* look up the source key for a given tuple */
76static __be16 gre_keymap_lookup(struct ip_conntrack_tuple *t)
77{
78 struct ip_ct_gre_keymap *km;
79 __be16 key = 0;
80
81 read_lock_bh(&ip_ct_gre_lock);
82 list_for_each_entry(km, &gre_keymap_list, list) {
83 if (gre_key_cmpfn(km, t)) {
84 key = km->tuple.src.u.gre.key;
85 break;
86 }
87 }
88 read_unlock_bh(&ip_ct_gre_lock);
89
90 DEBUGP("lookup src key 0x%x up key for ", key);
91 DUMP_TUPLE_GRE(t);
92
93 return key;
94}
95
96/* add a single keymap entry, associate with specified master ct */
97int
98ip_ct_gre_keymap_add(struct ip_conntrack *ct,
99 struct ip_conntrack_tuple *t, int reply)
100{
101 struct ip_ct_gre_keymap **exist_km, *km;
102
103 if (!ct->helper || strcmp(ct->helper->name, "pptp")) {
104 DEBUGP("refusing to add GRE keymap to non-pptp session\n");
105 return -1;
106 }
107
108 if (!reply)
109 exist_km = &ct->help.ct_pptp_info.keymap_orig;
110 else
111 exist_km = &ct->help.ct_pptp_info.keymap_reply;
112
113 if (*exist_km) {
114 /* check whether it's a retransmission */
115 list_for_each_entry(km, &gre_keymap_list, list) {
116 if (gre_key_cmpfn(km, t) && km == *exist_km)
117 return 0;
118 }
119 DEBUGP("trying to override keymap_%s for ct %p\n",
120 reply? "reply":"orig", ct);
121 return -EEXIST;
122 }
123
124 km = kmalloc(sizeof(*km), GFP_ATOMIC);
125 if (!km)
126 return -ENOMEM;
127
128 memcpy(&km->tuple, t, sizeof(*t));
129 *exist_km = km;
130
131 DEBUGP("adding new entry %p: ", km);
132 DUMP_TUPLE_GRE(&km->tuple);
133
134 write_lock_bh(&ip_ct_gre_lock);
135 list_add_tail(&km->list, &gre_keymap_list);
136 write_unlock_bh(&ip_ct_gre_lock);
137
138 return 0;
139}
140
141/* destroy the keymap entries associated with specified master ct */
142void ip_ct_gre_keymap_destroy(struct ip_conntrack *ct)
143{
144 DEBUGP("entering for ct %p\n", ct);
145
146 if (!ct->helper || strcmp(ct->helper->name, "pptp")) {
147 DEBUGP("refusing to destroy GRE keymap to non-pptp session\n");
148 return;
149 }
150
151 write_lock_bh(&ip_ct_gre_lock);
152 if (ct->help.ct_pptp_info.keymap_orig) {
153 DEBUGP("removing %p from list\n",
154 ct->help.ct_pptp_info.keymap_orig);
155 list_del(&ct->help.ct_pptp_info.keymap_orig->list);
156 kfree(ct->help.ct_pptp_info.keymap_orig);
157 ct->help.ct_pptp_info.keymap_orig = NULL;
158 }
159 if (ct->help.ct_pptp_info.keymap_reply) {
160 DEBUGP("removing %p from list\n",
161 ct->help.ct_pptp_info.keymap_reply);
162 list_del(&ct->help.ct_pptp_info.keymap_reply->list);
163 kfree(ct->help.ct_pptp_info.keymap_reply);
164 ct->help.ct_pptp_info.keymap_reply = NULL;
165 }
166 write_unlock_bh(&ip_ct_gre_lock);
167}
168
169
170/* PUBLIC CONNTRACK PROTO HELPER FUNCTIONS */
171
172/* invert gre part of tuple */
173static int gre_invert_tuple(struct ip_conntrack_tuple *tuple,
174 const struct ip_conntrack_tuple *orig)
175{
176 tuple->dst.u.gre.key = orig->src.u.gre.key;
177 tuple->src.u.gre.key = orig->dst.u.gre.key;
178
179 return 1;
180}
181
182/* gre hdr info to tuple */
183static int gre_pkt_to_tuple(const struct sk_buff *skb,
184 unsigned int dataoff,
185 struct ip_conntrack_tuple *tuple)
186{
187 struct gre_hdr_pptp _pgrehdr, *pgrehdr;
188 __be16 srckey;
189 struct gre_hdr _grehdr, *grehdr;
190
191 /* first only delinearize old RFC1701 GRE header */
192 grehdr = skb_header_pointer(skb, dataoff, sizeof(_grehdr), &_grehdr);
193 if (!grehdr || grehdr->version != GRE_VERSION_PPTP) {
194 /* try to behave like "ip_conntrack_proto_generic" */
195 tuple->src.u.all = 0;
196 tuple->dst.u.all = 0;
197 return 1;
198 }
199
200 /* PPTP header is variable length, only need up to the call_id field */
201 pgrehdr = skb_header_pointer(skb, dataoff, 8, &_pgrehdr);
202 if (!pgrehdr)
203 return 1;
204
205 if (ntohs(grehdr->protocol) != GRE_PROTOCOL_PPTP) {
206 DEBUGP("GRE_VERSION_PPTP but unknown proto\n");
207 return 0;
208 }
209
210 tuple->dst.u.gre.key = pgrehdr->call_id;
211 srckey = gre_keymap_lookup(tuple);
212 tuple->src.u.gre.key = srckey;
213
214 return 1;
215}
216
217/* print gre part of tuple */
218static int gre_print_tuple(struct seq_file *s,
219 const struct ip_conntrack_tuple *tuple)
220{
221 return seq_printf(s, "srckey=0x%x dstkey=0x%x ",
222 ntohs(tuple->src.u.gre.key),
223 ntohs(tuple->dst.u.gre.key));
224}
225
226/* print private data for conntrack */
227static int gre_print_conntrack(struct seq_file *s,
228 const struct ip_conntrack *ct)
229{
230 return seq_printf(s, "timeout=%u, stream_timeout=%u ",
231 (ct->proto.gre.timeout / HZ),
232 (ct->proto.gre.stream_timeout / HZ));
233}
234
235/* Returns verdict for packet, and may modify conntrack */
236static int gre_packet(struct ip_conntrack *ct,
237 const struct sk_buff *skb,
238 enum ip_conntrack_info conntrackinfo)
239{
240 /* If we've seen traffic both ways, this is a GRE connection.
241 * Extend timeout. */
242 if (ct->status & IPS_SEEN_REPLY) {
243 ip_ct_refresh_acct(ct, conntrackinfo, skb,
244 ct->proto.gre.stream_timeout);
245 /* Also, more likely to be important, and not a probe. */
246 set_bit(IPS_ASSURED_BIT, &ct->status);
247 ip_conntrack_event_cache(IPCT_STATUS, skb);
248 } else
249 ip_ct_refresh_acct(ct, conntrackinfo, skb,
250 ct->proto.gre.timeout);
251
252 return NF_ACCEPT;
253}
254
255/* Called when a new connection for this protocol found. */
256static int gre_new(struct ip_conntrack *ct,
257 const struct sk_buff *skb)
258{
259 DEBUGP(": ");
260 DUMP_TUPLE_GRE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
261
262 /* initialize to sane value. Ideally a conntrack helper
263 * (e.g. in case of pptp) is increasing them */
264 ct->proto.gre.stream_timeout = GRE_STREAM_TIMEOUT;
265 ct->proto.gre.timeout = GRE_TIMEOUT;
266
267 return 1;
268}
269
270/* Called when a conntrack entry has already been removed from the hashes
271 * and is about to be deleted from memory */
272static void gre_destroy(struct ip_conntrack *ct)
273{
274 struct ip_conntrack *master = ct->master;
275 DEBUGP(" entering\n");
276
277 if (!master)
278 DEBUGP("no master !?!\n");
279 else
280 ip_ct_gre_keymap_destroy(master);
281}
282
283/* protocol helper struct */
284static struct ip_conntrack_protocol gre = {
285 .proto = IPPROTO_GRE,
286 .name = "gre",
287 .pkt_to_tuple = gre_pkt_to_tuple,
288 .invert_tuple = gre_invert_tuple,
289 .print_tuple = gre_print_tuple,
290 .print_conntrack = gre_print_conntrack,
291 .packet = gre_packet,
292 .new = gre_new,
293 .destroy = gre_destroy,
294 .me = THIS_MODULE,
295#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
296 defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
297 .tuple_to_nfattr = ip_ct_port_tuple_to_nfattr,
298 .nfattr_to_tuple = ip_ct_port_nfattr_to_tuple,
299#endif
300};
301
302/* ip_conntrack_proto_gre initialization */
303int __init ip_ct_proto_gre_init(void)
304{
305 return ip_conntrack_protocol_register(&gre);
306}
307
308/* This cannot be __exit, as it is invoked from ip_conntrack_helper_pptp.c's
309 * init() code on errors.
310 */
311void ip_ct_proto_gre_fini(void)
312{
313 struct list_head *pos, *n;
314
315 /* delete all keymap entries */
316 write_lock_bh(&ip_ct_gre_lock);
317 list_for_each_safe(pos, n, &gre_keymap_list) {
318 DEBUGP("deleting keymap %p at module unload time\n", pos);
319 list_del(pos);
320 kfree(pos);
321 }
322 write_unlock_bh(&ip_ct_gre_lock);
323
324 ip_conntrack_protocol_unregister(&gre);
325}
326
327EXPORT_SYMBOL(ip_ct_gre_keymap_add);
328EXPORT_SYMBOL(ip_ct_gre_keymap_destroy);
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
deleted file mode 100644
index ad70c81a21e0..000000000000
--- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
+++ /dev/null
@@ -1,315 +0,0 @@
1/* (C) 1999-2001 Paul `Rusty' Russell
2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#include <linux/types.h>
10#include <linux/timer.h>
11#include <linux/netfilter.h>
12#include <linux/in.h>
13#include <linux/icmp.h>
14#include <linux/seq_file.h>
15#include <linux/skbuff.h>
16#include <net/ip.h>
17#include <net/checksum.h>
18#include <linux/netfilter_ipv4.h>
19#include <linux/netfilter_ipv4/ip_conntrack.h>
20#include <linux/netfilter_ipv4/ip_conntrack_core.h>
21#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
22
23unsigned int ip_ct_icmp_timeout __read_mostly = 30*HZ;
24
25#if 0
26#define DEBUGP printk
27#else
28#define DEBUGP(format, args...)
29#endif
30
31static int icmp_pkt_to_tuple(const struct sk_buff *skb,
32 unsigned int dataoff,
33 struct ip_conntrack_tuple *tuple)
34{
35 struct icmphdr _hdr, *hp;
36
37 hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
38 if (hp == NULL)
39 return 0;
40
41 tuple->dst.u.icmp.type = hp->type;
42 tuple->src.u.icmp.id = hp->un.echo.id;
43 tuple->dst.u.icmp.code = hp->code;
44
45 return 1;
46}
47
48/* Add 1; spaces filled with 0. */
49static const u_int8_t invmap[] = {
50 [ICMP_ECHO] = ICMP_ECHOREPLY + 1,
51 [ICMP_ECHOREPLY] = ICMP_ECHO + 1,
52 [ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1,
53 [ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1,
54 [ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1,
55 [ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1,
56 [ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1,
57 [ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1
58};
59
60static int icmp_invert_tuple(struct ip_conntrack_tuple *tuple,
61 const struct ip_conntrack_tuple *orig)
62{
63 if (orig->dst.u.icmp.type >= sizeof(invmap)
64 || !invmap[orig->dst.u.icmp.type])
65 return 0;
66
67 tuple->src.u.icmp.id = orig->src.u.icmp.id;
68 tuple->dst.u.icmp.type = invmap[orig->dst.u.icmp.type] - 1;
69 tuple->dst.u.icmp.code = orig->dst.u.icmp.code;
70 return 1;
71}
72
73/* Print out the per-protocol part of the tuple. */
74static int icmp_print_tuple(struct seq_file *s,
75 const struct ip_conntrack_tuple *tuple)
76{
77 return seq_printf(s, "type=%u code=%u id=%u ",
78 tuple->dst.u.icmp.type,
79 tuple->dst.u.icmp.code,
80 ntohs(tuple->src.u.icmp.id));
81}
82
83/* Print out the private part of the conntrack. */
84static int icmp_print_conntrack(struct seq_file *s,
85 const struct ip_conntrack *conntrack)
86{
87 return 0;
88}
89
90/* Returns verdict for packet, or -1 for invalid. */
91static int icmp_packet(struct ip_conntrack *ct,
92 const struct sk_buff *skb,
93 enum ip_conntrack_info ctinfo)
94{
95 /* Try to delete connection immediately after all replies:
96 won't actually vanish as we still have skb, and del_timer
97 means this will only run once even if count hits zero twice
98 (theoretically possible with SMP) */
99 if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) {
100 if (atomic_dec_and_test(&ct->proto.icmp.count)
101 && del_timer(&ct->timeout))
102 ct->timeout.function((unsigned long)ct);
103 } else {
104 atomic_inc(&ct->proto.icmp.count);
105 ip_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
106 ip_ct_refresh_acct(ct, ctinfo, skb, ip_ct_icmp_timeout);
107 }
108
109 return NF_ACCEPT;
110}
111
112/* Called when a new connection for this protocol found. */
113static int icmp_new(struct ip_conntrack *conntrack,
114 const struct sk_buff *skb)
115{
116 static const u_int8_t valid_new[] = {
117 [ICMP_ECHO] = 1,
118 [ICMP_TIMESTAMP] = 1,
119 [ICMP_INFO_REQUEST] = 1,
120 [ICMP_ADDRESS] = 1
121 };
122
123 if (conntrack->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new)
124 || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type]) {
125 /* Can't create a new ICMP `conn' with this. */
126 DEBUGP("icmp: can't create new conn with type %u\n",
127 conntrack->tuplehash[0].tuple.dst.u.icmp.type);
128 DUMP_TUPLE(&conntrack->tuplehash[0].tuple);
129 return 0;
130 }
131 atomic_set(&conntrack->proto.icmp.count, 0);
132 return 1;
133}
134
135static int
136icmp_error_message(struct sk_buff *skb,
137 enum ip_conntrack_info *ctinfo,
138 unsigned int hooknum)
139{
140 struct ip_conntrack_tuple innertuple, origtuple;
141 struct {
142 struct icmphdr icmp;
143 struct iphdr ip;
144 } _in, *inside;
145 struct ip_conntrack_protocol *innerproto;
146 struct ip_conntrack_tuple_hash *h;
147 int dataoff;
148
149 IP_NF_ASSERT(skb->nfct == NULL);
150
151 /* Not enough header? */
152 inside = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_in), &_in);
153 if (inside == NULL)
154 return -NF_ACCEPT;
155
156 /* Ignore ICMP's containing fragments (shouldn't happen) */
157 if (inside->ip.frag_off & htons(IP_OFFSET)) {
158 DEBUGP("icmp_error_track: fragment of proto %u\n",
159 inside->ip.protocol);
160 return -NF_ACCEPT;
161 }
162
163 innerproto = ip_conntrack_proto_find_get(inside->ip.protocol);
164 dataoff = skb->nh.iph->ihl*4 + sizeof(inside->icmp) + inside->ip.ihl*4;
165 /* Are they talking about one of our connections? */
166 if (!ip_ct_get_tuple(&inside->ip, skb, dataoff, &origtuple, innerproto)) {
167 DEBUGP("icmp_error: ! get_tuple p=%u", inside->ip.protocol);
168 ip_conntrack_proto_put(innerproto);
169 return -NF_ACCEPT;
170 }
171
172 /* Ordinarily, we'd expect the inverted tupleproto, but it's
173 been preserved inside the ICMP. */
174 if (!ip_ct_invert_tuple(&innertuple, &origtuple, innerproto)) {
175 DEBUGP("icmp_error_track: Can't invert tuple\n");
176 ip_conntrack_proto_put(innerproto);
177 return -NF_ACCEPT;
178 }
179 ip_conntrack_proto_put(innerproto);
180
181 *ctinfo = IP_CT_RELATED;
182
183 h = ip_conntrack_find_get(&innertuple, NULL);
184 if (!h) {
185 /* Locally generated ICMPs will match inverted if they
186 haven't been SNAT'ed yet */
187 /* FIXME: NAT code has to handle half-done double NAT --RR */
188 if (hooknum == NF_IP_LOCAL_OUT)
189 h = ip_conntrack_find_get(&origtuple, NULL);
190
191 if (!h) {
192 DEBUGP("icmp_error_track: no match\n");
193 return -NF_ACCEPT;
194 }
195 /* Reverse direction from that found */
196 if (DIRECTION(h) != IP_CT_DIR_REPLY)
197 *ctinfo += IP_CT_IS_REPLY;
198 } else {
199 if (DIRECTION(h) == IP_CT_DIR_REPLY)
200 *ctinfo += IP_CT_IS_REPLY;
201 }
202
203 /* Update skb to refer to this connection */
204 skb->nfct = &tuplehash_to_ctrack(h)->ct_general;
205 skb->nfctinfo = *ctinfo;
206 return -NF_ACCEPT;
207}
208
209/* Small and modified version of icmp_rcv */
210static int
211icmp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo,
212 unsigned int hooknum)
213{
214 struct icmphdr _ih, *icmph;
215
216 /* Not enough header? */
217 icmph = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_ih), &_ih);
218 if (icmph == NULL) {
219 if (LOG_INVALID(IPPROTO_ICMP))
220 nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
221 "ip_ct_icmp: short packet ");
222 return -NF_ACCEPT;
223 }
224
225 /* See ip_conntrack_proto_tcp.c */
226 if (ip_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING &&
227 nf_ip_checksum(skb, hooknum, skb->nh.iph->ihl * 4, 0)) {
228 if (LOG_INVALID(IPPROTO_ICMP))
229 nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
230 "ip_ct_icmp: bad ICMP checksum ");
231 return -NF_ACCEPT;
232 }
233
234 /*
235 * 18 is the highest 'known' ICMP type. Anything else is a mystery
236 *
237 * RFC 1122: 3.2.2 Unknown ICMP messages types MUST be silently
238 * discarded.
239 */
240 if (icmph->type > NR_ICMP_TYPES) {
241 if (LOG_INVALID(IPPROTO_ICMP))
242 nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
243 "ip_ct_icmp: invalid ICMP type ");
244 return -NF_ACCEPT;
245 }
246
247 /* Need to track icmp error message? */
248 if (icmph->type != ICMP_DEST_UNREACH
249 && icmph->type != ICMP_SOURCE_QUENCH
250 && icmph->type != ICMP_TIME_EXCEEDED
251 && icmph->type != ICMP_PARAMETERPROB
252 && icmph->type != ICMP_REDIRECT)
253 return NF_ACCEPT;
254
255 return icmp_error_message(skb, ctinfo, hooknum);
256}
257
258#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
259 defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
260static int icmp_tuple_to_nfattr(struct sk_buff *skb,
261 const struct ip_conntrack_tuple *t)
262{
263 NFA_PUT(skb, CTA_PROTO_ICMP_ID, sizeof(__be16),
264 &t->src.u.icmp.id);
265 NFA_PUT(skb, CTA_PROTO_ICMP_TYPE, sizeof(u_int8_t),
266 &t->dst.u.icmp.type);
267 NFA_PUT(skb, CTA_PROTO_ICMP_CODE, sizeof(u_int8_t),
268 &t->dst.u.icmp.code);
269
270 return 0;
271
272nfattr_failure:
273 return -1;
274}
275
276static int icmp_nfattr_to_tuple(struct nfattr *tb[],
277 struct ip_conntrack_tuple *tuple)
278{
279 if (!tb[CTA_PROTO_ICMP_TYPE-1]
280 || !tb[CTA_PROTO_ICMP_CODE-1]
281 || !tb[CTA_PROTO_ICMP_ID-1])
282 return -EINVAL;
283
284 tuple->dst.u.icmp.type =
285 *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_TYPE-1]);
286 tuple->dst.u.icmp.code =
287 *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_CODE-1]);
288 tuple->src.u.icmp.id =
289 *(__be16 *)NFA_DATA(tb[CTA_PROTO_ICMP_ID-1]);
290
291 if (tuple->dst.u.icmp.type >= sizeof(invmap)
292 || !invmap[tuple->dst.u.icmp.type])
293 return -EINVAL;
294
295 return 0;
296}
297#endif
298
299struct ip_conntrack_protocol ip_conntrack_protocol_icmp =
300{
301 .proto = IPPROTO_ICMP,
302 .name = "icmp",
303 .pkt_to_tuple = icmp_pkt_to_tuple,
304 .invert_tuple = icmp_invert_tuple,
305 .print_tuple = icmp_print_tuple,
306 .print_conntrack = icmp_print_conntrack,
307 .packet = icmp_packet,
308 .new = icmp_new,
309 .error = icmp_error,
310#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
311 defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
312 .tuple_to_nfattr = icmp_tuple_to_nfattr,
313 .nfattr_to_tuple = icmp_nfattr_to_tuple,
314#endif
315};
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
deleted file mode 100644
index e6942992b2f6..000000000000
--- a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
+++ /dev/null
@@ -1,659 +0,0 @@
1/*
2 * Connection tracking protocol helper module for SCTP.
3 *
4 * SCTP is defined in RFC 2960. References to various sections in this code
5 * are to this RFC.
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11
12/*
13 * Added support for proc manipulation of timeouts.
14 */
15
16#include <linux/types.h>
17#include <linux/timer.h>
18#include <linux/interrupt.h>
19#include <linux/netfilter.h>
20#include <linux/module.h>
21#include <linux/in.h>
22#include <linux/ip.h>
23#include <linux/sctp.h>
24#include <linux/string.h>
25#include <linux/seq_file.h>
26
27#include <linux/netfilter_ipv4/ip_conntrack.h>
28#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
29
30#if 0
31#define DEBUGP(format, ...) printk(format, ## __VA_ARGS__)
32#else
33#define DEBUGP(format, args...)
34#endif
35
36/* Protects conntrack->proto.sctp */
37static DEFINE_RWLOCK(sctp_lock);
38
39/* FIXME: Examine ipfilter's timeouts and conntrack transitions more
40 closely. They're more complex. --RR
41
42 And so for me for SCTP :D -Kiran */
43
44static const char *sctp_conntrack_names[] = {
45 "NONE",
46 "CLOSED",
47 "COOKIE_WAIT",
48 "COOKIE_ECHOED",
49 "ESTABLISHED",
50 "SHUTDOWN_SENT",
51 "SHUTDOWN_RECD",
52 "SHUTDOWN_ACK_SENT",
53};
54
55#define SECS * HZ
56#define MINS * 60 SECS
57#define HOURS * 60 MINS
58#define DAYS * 24 HOURS
59
60static unsigned int ip_ct_sctp_timeout_closed __read_mostly = 10 SECS;
61static unsigned int ip_ct_sctp_timeout_cookie_wait __read_mostly = 3 SECS;
62static unsigned int ip_ct_sctp_timeout_cookie_echoed __read_mostly = 3 SECS;
63static unsigned int ip_ct_sctp_timeout_established __read_mostly = 5 DAYS;
64static unsigned int ip_ct_sctp_timeout_shutdown_sent __read_mostly = 300 SECS / 1000;
65static unsigned int ip_ct_sctp_timeout_shutdown_recd __read_mostly = 300 SECS / 1000;
66static unsigned int ip_ct_sctp_timeout_shutdown_ack_sent __read_mostly = 3 SECS;
67
68static const unsigned int * sctp_timeouts[]
69= { NULL, /* SCTP_CONNTRACK_NONE */
70 &ip_ct_sctp_timeout_closed, /* SCTP_CONNTRACK_CLOSED */
71 &ip_ct_sctp_timeout_cookie_wait, /* SCTP_CONNTRACK_COOKIE_WAIT */
72 &ip_ct_sctp_timeout_cookie_echoed, /* SCTP_CONNTRACK_COOKIE_ECHOED */
73 &ip_ct_sctp_timeout_established, /* SCTP_CONNTRACK_ESTABLISHED */
74 &ip_ct_sctp_timeout_shutdown_sent, /* SCTP_CONNTRACK_SHUTDOWN_SENT */
75 &ip_ct_sctp_timeout_shutdown_recd, /* SCTP_CONNTRACK_SHUTDOWN_RECD */
76 &ip_ct_sctp_timeout_shutdown_ack_sent /* SCTP_CONNTRACK_SHUTDOWN_ACK_SENT */
77 };
78
79#define sNO SCTP_CONNTRACK_NONE
80#define sCL SCTP_CONNTRACK_CLOSED
81#define sCW SCTP_CONNTRACK_COOKIE_WAIT
82#define sCE SCTP_CONNTRACK_COOKIE_ECHOED
83#define sES SCTP_CONNTRACK_ESTABLISHED
84#define sSS SCTP_CONNTRACK_SHUTDOWN_SENT
85#define sSR SCTP_CONNTRACK_SHUTDOWN_RECD
86#define sSA SCTP_CONNTRACK_SHUTDOWN_ACK_SENT
87#define sIV SCTP_CONNTRACK_MAX
88
89/*
90 These are the descriptions of the states:
91
92NOTE: These state names are tantalizingly similar to the states of an
93SCTP endpoint. But the interpretation of the states is a little different,
94considering that these are the states of the connection and not of an end
95point. Please note the subtleties. -Kiran
96
97NONE - Nothing so far.
98COOKIE WAIT - We have seen an INIT chunk in the original direction, or also
99 an INIT_ACK chunk in the reply direction.
100COOKIE ECHOED - We have seen a COOKIE_ECHO chunk in the original direction.
101ESTABLISHED - We have seen a COOKIE_ACK in the reply direction.
102SHUTDOWN_SENT - We have seen a SHUTDOWN chunk in the original direction.
103SHUTDOWN_RECD - We have seen a SHUTDOWN chunk in the reply directoin.
104SHUTDOWN_ACK_SENT - We have seen a SHUTDOWN_ACK chunk in the direction opposite
105 to that of the SHUTDOWN chunk.
106CLOSED - We have seen a SHUTDOWN_COMPLETE chunk in the direction of
107 the SHUTDOWN chunk. Connection is closed.
108*/
109
110/* TODO
111 - I have assumed that the first INIT is in the original direction.
112 This messes things when an INIT comes in the reply direction in CLOSED
113 state.
114 - Check the error type in the reply dir before transitioning from
115cookie echoed to closed.
116 - Sec 5.2.4 of RFC 2960
117 - Multi Homing support.
118*/
119
120/* SCTP conntrack state transitions */
121static const enum sctp_conntrack sctp_conntracks[2][9][SCTP_CONNTRACK_MAX] = {
122 {
123/* ORIGINAL */
124/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA */
125/* init */ {sCW, sCW, sCW, sCE, sES, sSS, sSR, sSA},
126/* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},
127/* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
128/* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA},
129/* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA},
130/* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant have Stale cookie*/
131/* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA},/* 5.2.4 - Big TODO */
132/* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant come in orig dir */
133/* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL}
134 },
135 {
136/* REPLY */
137/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA */
138/* init */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* INIT in sCL Big TODO */
139/* init_ack */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},
140/* abort */ {sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
141/* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA},
142/* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA},
143/* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA},
144/* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant come in reply dir */
145/* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA},
146/* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL}
147 }
148};
149
150static int sctp_pkt_to_tuple(const struct sk_buff *skb,
151 unsigned int dataoff,
152 struct ip_conntrack_tuple *tuple)
153{
154 sctp_sctphdr_t _hdr, *hp;
155
156 DEBUGP(__FUNCTION__);
157 DEBUGP("\n");
158
159 /* Actually only need first 8 bytes. */
160 hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
161 if (hp == NULL)
162 return 0;
163
164 tuple->src.u.sctp.port = hp->source;
165 tuple->dst.u.sctp.port = hp->dest;
166 return 1;
167}
168
169static int sctp_invert_tuple(struct ip_conntrack_tuple *tuple,
170 const struct ip_conntrack_tuple *orig)
171{
172 DEBUGP(__FUNCTION__);
173 DEBUGP("\n");
174
175 tuple->src.u.sctp.port = orig->dst.u.sctp.port;
176 tuple->dst.u.sctp.port = orig->src.u.sctp.port;
177 return 1;
178}
179
180/* Print out the per-protocol part of the tuple. */
181static int sctp_print_tuple(struct seq_file *s,
182 const struct ip_conntrack_tuple *tuple)
183{
184 DEBUGP(__FUNCTION__);
185 DEBUGP("\n");
186
187 return seq_printf(s, "sport=%hu dport=%hu ",
188 ntohs(tuple->src.u.sctp.port),
189 ntohs(tuple->dst.u.sctp.port));
190}
191
192/* Print out the private part of the conntrack. */
193static int sctp_print_conntrack(struct seq_file *s,
194 const struct ip_conntrack *conntrack)
195{
196 enum sctp_conntrack state;
197
198 DEBUGP(__FUNCTION__);
199 DEBUGP("\n");
200
201 read_lock_bh(&sctp_lock);
202 state = conntrack->proto.sctp.state;
203 read_unlock_bh(&sctp_lock);
204
205 return seq_printf(s, "%s ", sctp_conntrack_names[state]);
206}
207
208#define for_each_sctp_chunk(skb, sch, _sch, offset, count) \
209for (offset = skb->nh.iph->ihl * 4 + sizeof(sctp_sctphdr_t), count = 0; \
210 offset < skb->len && \
211 (sch = skb_header_pointer(skb, offset, sizeof(_sch), &_sch)); \
212 offset += (ntohs(sch->length) + 3) & ~3, count++)
213
214/* Some validity checks to make sure the chunks are fine */
215static int do_basic_checks(struct ip_conntrack *conntrack,
216 const struct sk_buff *skb,
217 char *map)
218{
219 u_int32_t offset, count;
220 sctp_chunkhdr_t _sch, *sch;
221 int flag;
222
223 DEBUGP(__FUNCTION__);
224 DEBUGP("\n");
225
226 flag = 0;
227
228 for_each_sctp_chunk (skb, sch, _sch, offset, count) {
229 DEBUGP("Chunk Num: %d Type: %d\n", count, sch->type);
230
231 if (sch->type == SCTP_CID_INIT
232 || sch->type == SCTP_CID_INIT_ACK
233 || sch->type == SCTP_CID_SHUTDOWN_COMPLETE) {
234 flag = 1;
235 }
236
237 /*
238 * Cookie Ack/Echo chunks not the first OR
239 * Init / Init Ack / Shutdown compl chunks not the only chunks
240 * OR zero-length.
241 */
242 if (((sch->type == SCTP_CID_COOKIE_ACK
243 || sch->type == SCTP_CID_COOKIE_ECHO
244 || flag)
245 && count !=0) || !sch->length) {
246 DEBUGP("Basic checks failed\n");
247 return 1;
248 }
249
250 if (map) {
251 set_bit(sch->type, (void *)map);
252 }
253 }
254
255 DEBUGP("Basic checks passed\n");
256 return count == 0;
257}
258
259static int new_state(enum ip_conntrack_dir dir,
260 enum sctp_conntrack cur_state,
261 int chunk_type)
262{
263 int i;
264
265 DEBUGP(__FUNCTION__);
266 DEBUGP("\n");
267
268 DEBUGP("Chunk type: %d\n", chunk_type);
269
270 switch (chunk_type) {
271 case SCTP_CID_INIT:
272 DEBUGP("SCTP_CID_INIT\n");
273 i = 0; break;
274 case SCTP_CID_INIT_ACK:
275 DEBUGP("SCTP_CID_INIT_ACK\n");
276 i = 1; break;
277 case SCTP_CID_ABORT:
278 DEBUGP("SCTP_CID_ABORT\n");
279 i = 2; break;
280 case SCTP_CID_SHUTDOWN:
281 DEBUGP("SCTP_CID_SHUTDOWN\n");
282 i = 3; break;
283 case SCTP_CID_SHUTDOWN_ACK:
284 DEBUGP("SCTP_CID_SHUTDOWN_ACK\n");
285 i = 4; break;
286 case SCTP_CID_ERROR:
287 DEBUGP("SCTP_CID_ERROR\n");
288 i = 5; break;
289 case SCTP_CID_COOKIE_ECHO:
290 DEBUGP("SCTP_CID_COOKIE_ECHO\n");
291 i = 6; break;
292 case SCTP_CID_COOKIE_ACK:
293 DEBUGP("SCTP_CID_COOKIE_ACK\n");
294 i = 7; break;
295 case SCTP_CID_SHUTDOWN_COMPLETE:
296 DEBUGP("SCTP_CID_SHUTDOWN_COMPLETE\n");
297 i = 8; break;
298 default:
299 /* Other chunks like DATA, SACK, HEARTBEAT and
300 its ACK do not cause a change in state */
301 DEBUGP("Unknown chunk type, Will stay in %s\n",
302 sctp_conntrack_names[cur_state]);
303 return cur_state;
304 }
305
306 DEBUGP("dir: %d cur_state: %s chunk_type: %d new_state: %s\n",
307 dir, sctp_conntrack_names[cur_state], chunk_type,
308 sctp_conntrack_names[sctp_conntracks[dir][i][cur_state]]);
309
310 return sctp_conntracks[dir][i][cur_state];
311}
312
313/* Returns verdict for packet, or -1 for invalid. */
314static int sctp_packet(struct ip_conntrack *conntrack,
315 const struct sk_buff *skb,
316 enum ip_conntrack_info ctinfo)
317{
318 enum sctp_conntrack newconntrack, oldsctpstate;
319 struct iphdr *iph = skb->nh.iph;
320 sctp_sctphdr_t _sctph, *sh;
321 sctp_chunkhdr_t _sch, *sch;
322 u_int32_t offset, count;
323 char map[256 / sizeof (char)] = {0};
324
325 DEBUGP(__FUNCTION__);
326 DEBUGP("\n");
327
328 sh = skb_header_pointer(skb, iph->ihl * 4, sizeof(_sctph), &_sctph);
329 if (sh == NULL)
330 return -1;
331
332 if (do_basic_checks(conntrack, skb, map) != 0)
333 return -1;
334
335 /* Check the verification tag (Sec 8.5) */
336 if (!test_bit(SCTP_CID_INIT, (void *)map)
337 && !test_bit(SCTP_CID_SHUTDOWN_COMPLETE, (void *)map)
338 && !test_bit(SCTP_CID_COOKIE_ECHO, (void *)map)
339 && !test_bit(SCTP_CID_ABORT, (void *)map)
340 && !test_bit(SCTP_CID_SHUTDOWN_ACK, (void *)map)
341 && (sh->vtag != conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])) {
342 DEBUGP("Verification tag check failed\n");
343 return -1;
344 }
345
346 oldsctpstate = newconntrack = SCTP_CONNTRACK_MAX;
347 for_each_sctp_chunk (skb, sch, _sch, offset, count) {
348 write_lock_bh(&sctp_lock);
349
350 /* Special cases of Verification tag check (Sec 8.5.1) */
351 if (sch->type == SCTP_CID_INIT) {
352 /* Sec 8.5.1 (A) */
353 if (sh->vtag != 0) {
354 write_unlock_bh(&sctp_lock);
355 return -1;
356 }
357 } else if (sch->type == SCTP_CID_ABORT) {
358 /* Sec 8.5.1 (B) */
359 if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])
360 && !(sh->vtag == conntrack->proto.sctp.vtag
361 [1 - CTINFO2DIR(ctinfo)])) {
362 write_unlock_bh(&sctp_lock);
363 return -1;
364 }
365 } else if (sch->type == SCTP_CID_SHUTDOWN_COMPLETE) {
366 /* Sec 8.5.1 (C) */
367 if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])
368 && !(sh->vtag == conntrack->proto.sctp.vtag
369 [1 - CTINFO2DIR(ctinfo)]
370 && (sch->flags & 1))) {
371 write_unlock_bh(&sctp_lock);
372 return -1;
373 }
374 } else if (sch->type == SCTP_CID_COOKIE_ECHO) {
375 /* Sec 8.5.1 (D) */
376 if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])) {
377 write_unlock_bh(&sctp_lock);
378 return -1;
379 }
380 }
381
382 oldsctpstate = conntrack->proto.sctp.state;
383 newconntrack = new_state(CTINFO2DIR(ctinfo), oldsctpstate, sch->type);
384
385 /* Invalid */
386 if (newconntrack == SCTP_CONNTRACK_MAX) {
387 DEBUGP("ip_conntrack_sctp: Invalid dir=%i ctype=%u conntrack=%u\n",
388 CTINFO2DIR(ctinfo), sch->type, oldsctpstate);
389 write_unlock_bh(&sctp_lock);
390 return -1;
391 }
392
393 /* If it is an INIT or an INIT ACK note down the vtag */
394 if (sch->type == SCTP_CID_INIT
395 || sch->type == SCTP_CID_INIT_ACK) {
396 sctp_inithdr_t _inithdr, *ih;
397
398 ih = skb_header_pointer(skb, offset + sizeof(sctp_chunkhdr_t),
399 sizeof(_inithdr), &_inithdr);
400 if (ih == NULL) {
401 write_unlock_bh(&sctp_lock);
402 return -1;
403 }
404 DEBUGP("Setting vtag %x for dir %d\n",
405 ih->init_tag, !CTINFO2DIR(ctinfo));
406 conntrack->proto.sctp.vtag[!CTINFO2DIR(ctinfo)] = ih->init_tag;
407 }
408
409 conntrack->proto.sctp.state = newconntrack;
410 if (oldsctpstate != newconntrack)
411 ip_conntrack_event_cache(IPCT_PROTOINFO, skb);
412 write_unlock_bh(&sctp_lock);
413 }
414
415 ip_ct_refresh_acct(conntrack, ctinfo, skb, *sctp_timeouts[newconntrack]);
416
417 if (oldsctpstate == SCTP_CONNTRACK_COOKIE_ECHOED
418 && CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY
419 && newconntrack == SCTP_CONNTRACK_ESTABLISHED) {
420 DEBUGP("Setting assured bit\n");
421 set_bit(IPS_ASSURED_BIT, &conntrack->status);
422 ip_conntrack_event_cache(IPCT_STATUS, skb);
423 }
424
425 return NF_ACCEPT;
426}
427
428/* Called when a new connection for this protocol found. */
429static int sctp_new(struct ip_conntrack *conntrack,
430 const struct sk_buff *skb)
431{
432 enum sctp_conntrack newconntrack;
433 struct iphdr *iph = skb->nh.iph;
434 sctp_sctphdr_t _sctph, *sh;
435 sctp_chunkhdr_t _sch, *sch;
436 u_int32_t offset, count;
437 char map[256 / sizeof (char)] = {0};
438
439 DEBUGP(__FUNCTION__);
440 DEBUGP("\n");
441
442 sh = skb_header_pointer(skb, iph->ihl * 4, sizeof(_sctph), &_sctph);
443 if (sh == NULL)
444 return 0;
445
446 if (do_basic_checks(conntrack, skb, map) != 0)
447 return 0;
448
449 /* If an OOTB packet has any of these chunks discard (Sec 8.4) */
450 if ((test_bit (SCTP_CID_ABORT, (void *)map))
451 || (test_bit (SCTP_CID_SHUTDOWN_COMPLETE, (void *)map))
452 || (test_bit (SCTP_CID_COOKIE_ACK, (void *)map))) {
453 return 0;
454 }
455
456 newconntrack = SCTP_CONNTRACK_MAX;
457 for_each_sctp_chunk (skb, sch, _sch, offset, count) {
458 /* Don't need lock here: this conntrack not in circulation yet */
459 newconntrack = new_state (IP_CT_DIR_ORIGINAL,
460 SCTP_CONNTRACK_NONE, sch->type);
461
462 /* Invalid: delete conntrack */
463 if (newconntrack == SCTP_CONNTRACK_MAX) {
464 DEBUGP("ip_conntrack_sctp: invalid new deleting.\n");
465 return 0;
466 }
467
468 /* Copy the vtag into the state info */
469 if (sch->type == SCTP_CID_INIT) {
470 if (sh->vtag == 0) {
471 sctp_inithdr_t _inithdr, *ih;
472
473 ih = skb_header_pointer(skb, offset + sizeof(sctp_chunkhdr_t),
474 sizeof(_inithdr), &_inithdr);
475 if (ih == NULL)
476 return 0;
477
478 DEBUGP("Setting vtag %x for new conn\n",
479 ih->init_tag);
480
481 conntrack->proto.sctp.vtag[IP_CT_DIR_REPLY] =
482 ih->init_tag;
483 } else {
484 /* Sec 8.5.1 (A) */
485 return 0;
486 }
487 }
488 /* If it is a shutdown ack OOTB packet, we expect a return
489 shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */
490 else {
491 DEBUGP("Setting vtag %x for new conn OOTB\n",
492 sh->vtag);
493 conntrack->proto.sctp.vtag[IP_CT_DIR_REPLY] = sh->vtag;
494 }
495
496 conntrack->proto.sctp.state = newconntrack;
497 }
498
499 return 1;
500}
501
502static struct ip_conntrack_protocol ip_conntrack_protocol_sctp = {
503 .proto = IPPROTO_SCTP,
504 .name = "sctp",
505 .pkt_to_tuple = sctp_pkt_to_tuple,
506 .invert_tuple = sctp_invert_tuple,
507 .print_tuple = sctp_print_tuple,
508 .print_conntrack = sctp_print_conntrack,
509 .packet = sctp_packet,
510 .new = sctp_new,
511 .destroy = NULL,
512 .me = THIS_MODULE,
513#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
514 defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
515 .tuple_to_nfattr = ip_ct_port_tuple_to_nfattr,
516 .nfattr_to_tuple = ip_ct_port_nfattr_to_tuple,
517#endif
518};
519
520#ifdef CONFIG_SYSCTL
521static ctl_table ip_ct_sysctl_table[] = {
522 {
523 .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_CLOSED,
524 .procname = "ip_conntrack_sctp_timeout_closed",
525 .data = &ip_ct_sctp_timeout_closed,
526 .maxlen = sizeof(unsigned int),
527 .mode = 0644,
528 .proc_handler = &proc_dointvec_jiffies,
529 },
530 {
531 .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_WAIT,
532 .procname = "ip_conntrack_sctp_timeout_cookie_wait",
533 .data = &ip_ct_sctp_timeout_cookie_wait,
534 .maxlen = sizeof(unsigned int),
535 .mode = 0644,
536 .proc_handler = &proc_dointvec_jiffies,
537 },
538 {
539 .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_ECHOED,
540 .procname = "ip_conntrack_sctp_timeout_cookie_echoed",
541 .data = &ip_ct_sctp_timeout_cookie_echoed,
542 .maxlen = sizeof(unsigned int),
543 .mode = 0644,
544 .proc_handler = &proc_dointvec_jiffies,
545 },
546 {
547 .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_ESTABLISHED,
548 .procname = "ip_conntrack_sctp_timeout_established",
549 .data = &ip_ct_sctp_timeout_established,
550 .maxlen = sizeof(unsigned int),
551 .mode = 0644,
552 .proc_handler = &proc_dointvec_jiffies,
553 },
554 {
555 .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_SENT,
556 .procname = "ip_conntrack_sctp_timeout_shutdown_sent",
557 .data = &ip_ct_sctp_timeout_shutdown_sent,
558 .maxlen = sizeof(unsigned int),
559 .mode = 0644,
560 .proc_handler = &proc_dointvec_jiffies,
561 },
562 {
563 .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_RECD,
564 .procname = "ip_conntrack_sctp_timeout_shutdown_recd",
565 .data = &ip_ct_sctp_timeout_shutdown_recd,
566 .maxlen = sizeof(unsigned int),
567 .mode = 0644,
568 .proc_handler = &proc_dointvec_jiffies,
569 },
570 {
571 .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_ACK_SENT,
572 .procname = "ip_conntrack_sctp_timeout_shutdown_ack_sent",
573 .data = &ip_ct_sctp_timeout_shutdown_ack_sent,
574 .maxlen = sizeof(unsigned int),
575 .mode = 0644,
576 .proc_handler = &proc_dointvec_jiffies,
577 },
578 { .ctl_name = 0 }
579};
580
581static ctl_table ip_ct_netfilter_table[] = {
582 {
583 .ctl_name = NET_IPV4_NETFILTER,
584 .procname = "netfilter",
585 .mode = 0555,
586 .child = ip_ct_sysctl_table,
587 },
588 { .ctl_name = 0 }
589};
590
591static ctl_table ip_ct_ipv4_table[] = {
592 {
593 .ctl_name = NET_IPV4,
594 .procname = "ipv4",
595 .mode = 0555,
596 .child = ip_ct_netfilter_table,
597 },
598 { .ctl_name = 0 }
599};
600
601static ctl_table ip_ct_net_table[] = {
602 {
603 .ctl_name = CTL_NET,
604 .procname = "net",
605 .mode = 0555,
606 .child = ip_ct_ipv4_table,
607 },
608 { .ctl_name = 0 }
609};
610
611static struct ctl_table_header *ip_ct_sysctl_header;
612#endif
613
614static int __init ip_conntrack_proto_sctp_init(void)
615{
616 int ret;
617
618 ret = ip_conntrack_protocol_register(&ip_conntrack_protocol_sctp);
619 if (ret) {
620 printk("ip_conntrack_proto_sctp: protocol register failed\n");
621 goto out;
622 }
623
624#ifdef CONFIG_SYSCTL
625 ip_ct_sysctl_header = register_sysctl_table(ip_ct_net_table);
626 if (ip_ct_sysctl_header == NULL) {
627 ret = -ENOMEM;
628 printk("ip_conntrack_proto_sctp: can't register to sysctl.\n");
629 goto cleanup;
630 }
631#endif
632
633 return ret;
634
635#ifdef CONFIG_SYSCTL
636 cleanup:
637 ip_conntrack_protocol_unregister(&ip_conntrack_protocol_sctp);
638#endif
639 out:
640 DEBUGP("SCTP conntrack module loading %s\n",
641 ret ? "failed": "succeeded");
642 return ret;
643}
644
645static void __exit ip_conntrack_proto_sctp_fini(void)
646{
647 ip_conntrack_protocol_unregister(&ip_conntrack_protocol_sctp);
648#ifdef CONFIG_SYSCTL
649 unregister_sysctl_table(ip_ct_sysctl_header);
650#endif
651 DEBUGP("SCTP conntrack module unloaded\n");
652}
653
654module_init(ip_conntrack_proto_sctp_init);
655module_exit(ip_conntrack_proto_sctp_fini);
656
657MODULE_LICENSE("GPL");
658MODULE_AUTHOR("Kiran Kumar Immidi");
659MODULE_DESCRIPTION("Netfilter connection tracking protocol helper for SCTP");
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
deleted file mode 100644
index 0a72eab14620..000000000000
--- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
+++ /dev/null
@@ -1,1164 +0,0 @@
1/* (C) 1999-2001 Paul `Rusty' Russell
2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>:
9 * - Real stateful connection tracking
10 * - Modified state transitions table
11 * - Window scaling support added
12 * - SACK support added
13 *
14 * Willy Tarreau:
15 * - State table bugfixes
16 * - More robust state changes
17 * - Tuning timer parameters
18 *
19 * version 2.2
20 */
21
22#include <linux/types.h>
23#include <linux/timer.h>
24#include <linux/netfilter.h>
25#include <linux/module.h>
26#include <linux/in.h>
27#include <linux/ip.h>
28#include <linux/tcp.h>
29#include <linux/spinlock.h>
30
31#include <net/tcp.h>
32
33#include <linux/netfilter_ipv4.h>
34#include <linux/netfilter_ipv4/ip_conntrack.h>
35#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
36
37#if 0
38#define DEBUGP printk
39#define DEBUGP_VARS
40#else
41#define DEBUGP(format, args...)
42#endif
43
44/* Protects conntrack->proto.tcp */
45static DEFINE_RWLOCK(tcp_lock);
46
47/* "Be conservative in what you do,
48 be liberal in what you accept from others."
49 If it's non-zero, we mark only out of window RST segments as INVALID. */
50int ip_ct_tcp_be_liberal __read_mostly = 0;
51
52/* If it is set to zero, we disable picking up already established
53 connections. */
54int ip_ct_tcp_loose __read_mostly = 1;
55
56/* Max number of the retransmitted packets without receiving an (acceptable)
57 ACK from the destination. If this number is reached, a shorter timer
58 will be started. */
59int ip_ct_tcp_max_retrans __read_mostly = 3;
60
61 /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
62 closely. They're more complex. --RR */
63
64static const char *tcp_conntrack_names[] = {
65 "NONE",
66 "SYN_SENT",
67 "SYN_RECV",
68 "ESTABLISHED",
69 "FIN_WAIT",
70 "CLOSE_WAIT",
71 "LAST_ACK",
72 "TIME_WAIT",
73 "CLOSE",
74 "LISTEN"
75};
76
77#define SECS * HZ
78#define MINS * 60 SECS
79#define HOURS * 60 MINS
80#define DAYS * 24 HOURS
81
82unsigned int ip_ct_tcp_timeout_syn_sent __read_mostly = 2 MINS;
83unsigned int ip_ct_tcp_timeout_syn_recv __read_mostly = 60 SECS;
84unsigned int ip_ct_tcp_timeout_established __read_mostly = 5 DAYS;
85unsigned int ip_ct_tcp_timeout_fin_wait __read_mostly = 2 MINS;
86unsigned int ip_ct_tcp_timeout_close_wait __read_mostly = 60 SECS;
87unsigned int ip_ct_tcp_timeout_last_ack __read_mostly = 30 SECS;
88unsigned int ip_ct_tcp_timeout_time_wait __read_mostly = 2 MINS;
89unsigned int ip_ct_tcp_timeout_close __read_mostly = 10 SECS;
90
91/* RFC1122 says the R2 limit should be at least 100 seconds.
92 Linux uses 15 packets as limit, which corresponds
93 to ~13-30min depending on RTO. */
94unsigned int ip_ct_tcp_timeout_max_retrans __read_mostly = 5 MINS;
95
96static const unsigned int * tcp_timeouts[]
97= { NULL, /* TCP_CONNTRACK_NONE */
98 &ip_ct_tcp_timeout_syn_sent, /* TCP_CONNTRACK_SYN_SENT, */
99 &ip_ct_tcp_timeout_syn_recv, /* TCP_CONNTRACK_SYN_RECV, */
100 &ip_ct_tcp_timeout_established, /* TCP_CONNTRACK_ESTABLISHED, */
101 &ip_ct_tcp_timeout_fin_wait, /* TCP_CONNTRACK_FIN_WAIT, */
102 &ip_ct_tcp_timeout_close_wait, /* TCP_CONNTRACK_CLOSE_WAIT, */
103 &ip_ct_tcp_timeout_last_ack, /* TCP_CONNTRACK_LAST_ACK, */
104 &ip_ct_tcp_timeout_time_wait, /* TCP_CONNTRACK_TIME_WAIT, */
105 &ip_ct_tcp_timeout_close, /* TCP_CONNTRACK_CLOSE, */
106 NULL, /* TCP_CONNTRACK_LISTEN */
107 };
108
109#define sNO TCP_CONNTRACK_NONE
110#define sSS TCP_CONNTRACK_SYN_SENT
111#define sSR TCP_CONNTRACK_SYN_RECV
112#define sES TCP_CONNTRACK_ESTABLISHED
113#define sFW TCP_CONNTRACK_FIN_WAIT
114#define sCW TCP_CONNTRACK_CLOSE_WAIT
115#define sLA TCP_CONNTRACK_LAST_ACK
116#define sTW TCP_CONNTRACK_TIME_WAIT
117#define sCL TCP_CONNTRACK_CLOSE
118#define sLI TCP_CONNTRACK_LISTEN
119#define sIV TCP_CONNTRACK_MAX
120#define sIG TCP_CONNTRACK_IGNORE
121
122/* What TCP flags are set from RST/SYN/FIN/ACK. */
123enum tcp_bit_set {
124 TCP_SYN_SET,
125 TCP_SYNACK_SET,
126 TCP_FIN_SET,
127 TCP_ACK_SET,
128 TCP_RST_SET,
129 TCP_NONE_SET,
130};
131
132/*
133 * The TCP state transition table needs a few words...
134 *
135 * We are the man in the middle. All the packets go through us
136 * but might get lost in transit to the destination.
137 * It is assumed that the destinations can't receive segments
138 * we haven't seen.
139 *
140 * The checked segment is in window, but our windows are *not*
141 * equivalent with the ones of the sender/receiver. We always
142 * try to guess the state of the current sender.
143 *
144 * The meaning of the states are:
145 *
146 * NONE: initial state
147 * SYN_SENT: SYN-only packet seen
148 * SYN_RECV: SYN-ACK packet seen
149 * ESTABLISHED: ACK packet seen
150 * FIN_WAIT: FIN packet seen
151 * CLOSE_WAIT: ACK seen (after FIN)
152 * LAST_ACK: FIN seen (after FIN)
153 * TIME_WAIT: last ACK seen
154 * CLOSE: closed connection
155 *
156 * LISTEN state is not used.
157 *
158 * Packets marked as IGNORED (sIG):
159 * if they may be either invalid or valid
160 * and the receiver may send back a connection
161 * closing RST or a SYN/ACK.
162 *
163 * Packets marked as INVALID (sIV):
164 * if they are invalid
165 * or we do not support the request (simultaneous open)
166 */
167static const enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
168 {
169/* ORIGINAL */
170/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
171/*syn*/ { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sIV },
172/*
173 * sNO -> sSS Initialize a new connection
174 * sSS -> sSS Retransmitted SYN
175 * sSR -> sIG Late retransmitted SYN?
176 * sES -> sIG Error: SYNs in window outside the SYN_SENT state
177 * are errors. Receiver will reply with RST
178 * and close the connection.
179 * Or we are not in sync and hold a dead connection.
180 * sFW -> sIG
181 * sCW -> sIG
182 * sLA -> sIG
183 * sTW -> sSS Reopened connection (RFC 1122).
184 * sCL -> sSS
185 */
186/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
187/*synack*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV },
188/*
189 * A SYN/ACK from the client is always invalid:
190 * - either it tries to set up a simultaneous open, which is
191 * not supported;
192 * - or the firewall has just been inserted between the two hosts
193 * during the session set-up. The SYN will be retransmitted
194 * by the true client (or it'll time out).
195 */
196/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
197/*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
198/*
199 * sNO -> sIV Too late and no reason to do anything...
200 * sSS -> sIV Client migth not send FIN in this state:
201 * we enforce waiting for a SYN/ACK reply first.
202 * sSR -> sFW Close started.
203 * sES -> sFW
204 * sFW -> sLA FIN seen in both directions, waiting for
205 * the last ACK.
206 * Migth be a retransmitted FIN as well...
207 * sCW -> sLA
208 * sLA -> sLA Retransmitted FIN. Remain in the same state.
209 * sTW -> sTW
210 * sCL -> sCL
211 */
212/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
213/*ack*/ { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV },
214/*
215 * sNO -> sES Assumed.
216 * sSS -> sIV ACK is invalid: we haven't seen a SYN/ACK yet.
217 * sSR -> sES Established state is reached.
218 * sES -> sES :-)
219 * sFW -> sCW Normal close request answered by ACK.
220 * sCW -> sCW
221 * sLA -> sTW Last ACK detected.
222 * sTW -> sTW Retransmitted last ACK. Remain in the same state.
223 * sCL -> sCL
224 */
225/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
226/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV },
227/*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
228 },
229 {
230/* REPLY */
231/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
232/*syn*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV },
233/*
234 * sNO -> sIV Never reached.
235 * sSS -> sIV Simultaneous open, not supported
236 * sSR -> sIV Simultaneous open, not supported.
237 * sES -> sIV Server may not initiate a connection.
238 * sFW -> sIV
239 * sCW -> sIV
240 * sLA -> sIV
241 * sTW -> sIV Reopened connection, but server may not do it.
242 * sCL -> sIV
243 */
244/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
245/*synack*/ { sIV, sSR, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIV },
246/*
247 * sSS -> sSR Standard open.
248 * sSR -> sSR Retransmitted SYN/ACK.
249 * sES -> sIG Late retransmitted SYN/ACK?
250 * sFW -> sIG Might be SYN/ACK answering ignored SYN
251 * sCW -> sIG
252 * sLA -> sIG
253 * sTW -> sIG
254 * sCL -> sIG
255 */
256/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
257/*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
258/*
259 * sSS -> sIV Server might not send FIN in this state.
260 * sSR -> sFW Close started.
261 * sES -> sFW
262 * sFW -> sLA FIN seen in both directions.
263 * sCW -> sLA
264 * sLA -> sLA Retransmitted FIN.
265 * sTW -> sTW
266 * sCL -> sCL
267 */
268/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
269/*ack*/ { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIV },
270/*
271 * sSS -> sIG Might be a half-open connection.
272 * sSR -> sSR Might answer late resent SYN.
273 * sES -> sES :-)
274 * sFW -> sCW Normal close request answered by ACK.
275 * sCW -> sCW
276 * sLA -> sTW Last ACK detected.
277 * sTW -> sTW Retransmitted last ACK.
278 * sCL -> sCL
279 */
280/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
281/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV },
282/*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
283 }
284};
285
286static int tcp_pkt_to_tuple(const struct sk_buff *skb,
287 unsigned int dataoff,
288 struct ip_conntrack_tuple *tuple)
289{
290 struct tcphdr _hdr, *hp;
291
292 /* Actually only need first 8 bytes. */
293 hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
294 if (hp == NULL)
295 return 0;
296
297 tuple->src.u.tcp.port = hp->source;
298 tuple->dst.u.tcp.port = hp->dest;
299
300 return 1;
301}
302
303static int tcp_invert_tuple(struct ip_conntrack_tuple *tuple,
304 const struct ip_conntrack_tuple *orig)
305{
306 tuple->src.u.tcp.port = orig->dst.u.tcp.port;
307 tuple->dst.u.tcp.port = orig->src.u.tcp.port;
308 return 1;
309}
310
311/* Print out the per-protocol part of the tuple. */
312static int tcp_print_tuple(struct seq_file *s,
313 const struct ip_conntrack_tuple *tuple)
314{
315 return seq_printf(s, "sport=%hu dport=%hu ",
316 ntohs(tuple->src.u.tcp.port),
317 ntohs(tuple->dst.u.tcp.port));
318}
319
320/* Print out the private part of the conntrack. */
321static int tcp_print_conntrack(struct seq_file *s,
322 const struct ip_conntrack *conntrack)
323{
324 enum tcp_conntrack state;
325
326 read_lock_bh(&tcp_lock);
327 state = conntrack->proto.tcp.state;
328 read_unlock_bh(&tcp_lock);
329
330 return seq_printf(s, "%s ", tcp_conntrack_names[state]);
331}
332
333#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
334 defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
335static int tcp_to_nfattr(struct sk_buff *skb, struct nfattr *nfa,
336 const struct ip_conntrack *ct)
337{
338 struct nfattr *nest_parms;
339
340 read_lock_bh(&tcp_lock);
341 nest_parms = NFA_NEST(skb, CTA_PROTOINFO_TCP);
342 NFA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t),
343 &ct->proto.tcp.state);
344 read_unlock_bh(&tcp_lock);
345
346 NFA_NEST_END(skb, nest_parms);
347
348 return 0;
349
350nfattr_failure:
351 read_unlock_bh(&tcp_lock);
352 return -1;
353}
354
355static const size_t cta_min_tcp[CTA_PROTOINFO_TCP_MAX] = {
356 [CTA_PROTOINFO_TCP_STATE-1] = sizeof(u_int8_t),
357};
358
359static int nfattr_to_tcp(struct nfattr *cda[], struct ip_conntrack *ct)
360{
361 struct nfattr *attr = cda[CTA_PROTOINFO_TCP-1];
362 struct nfattr *tb[CTA_PROTOINFO_TCP_MAX];
363
364 /* updates could not contain anything about the private
365 * protocol info, in that case skip the parsing */
366 if (!attr)
367 return 0;
368
369 nfattr_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, attr);
370
371 if (nfattr_bad_size(tb, CTA_PROTOINFO_TCP_MAX, cta_min_tcp))
372 return -EINVAL;
373
374 if (!tb[CTA_PROTOINFO_TCP_STATE-1])
375 return -EINVAL;
376
377 write_lock_bh(&tcp_lock);
378 ct->proto.tcp.state =
379 *(u_int8_t *)NFA_DATA(tb[CTA_PROTOINFO_TCP_STATE-1]);
380 write_unlock_bh(&tcp_lock);
381
382 return 0;
383}
384#endif
385
386static unsigned int get_conntrack_index(const struct tcphdr *tcph)
387{
388 if (tcph->rst) return TCP_RST_SET;
389 else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET);
390 else if (tcph->fin) return TCP_FIN_SET;
391 else if (tcph->ack) return TCP_ACK_SET;
392 else return TCP_NONE_SET;
393}
394
395/* TCP connection tracking based on 'Real Stateful TCP Packet Filtering
396 in IP Filter' by Guido van Rooij.
397
398 http://www.nluug.nl/events/sane2000/papers.html
399 http://www.iae.nl/users/guido/papers/tcp_filtering.ps.gz
400
401 The boundaries and the conditions are changed according to RFC793:
402 the packet must intersect the window (i.e. segments may be
403 after the right or before the left edge) and thus receivers may ACK
404 segments after the right edge of the window.
405
406 td_maxend = max(sack + max(win,1)) seen in reply packets
407 td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets
408 td_maxwin += seq + len - sender.td_maxend
409 if seq + len > sender.td_maxend
410 td_end = max(seq + len) seen in sent packets
411
412 I. Upper bound for valid data: seq <= sender.td_maxend
413 II. Lower bound for valid data: seq + len >= sender.td_end - receiver.td_maxwin
414 III. Upper bound for valid ack: sack <= receiver.td_end
415 IV. Lower bound for valid ack: ack >= receiver.td_end - MAXACKWINDOW
416
417 where sack is the highest right edge of sack block found in the packet.
418
419 The upper bound limit for a valid ack is not ignored -
420 we doesn't have to deal with fragments.
421*/
422
423static inline __u32 segment_seq_plus_len(__u32 seq,
424 size_t len,
425 struct iphdr *iph,
426 struct tcphdr *tcph)
427{
428 return (seq + len - (iph->ihl + tcph->doff)*4
429 + (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0));
430}
431
432/* Fixme: what about big packets? */
433#define MAXACKWINCONST 66000
434#define MAXACKWINDOW(sender) \
435 ((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin \
436 : MAXACKWINCONST)
437
438/*
439 * Simplified tcp_parse_options routine from tcp_input.c
440 */
441static void tcp_options(const struct sk_buff *skb,
442 struct iphdr *iph,
443 struct tcphdr *tcph,
444 struct ip_ct_tcp_state *state)
445{
446 unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
447 unsigned char *ptr;
448 int length = (tcph->doff*4) - sizeof(struct tcphdr);
449
450 if (!length)
451 return;
452
453 ptr = skb_header_pointer(skb,
454 (iph->ihl * 4) + sizeof(struct tcphdr),
455 length, buff);
456 BUG_ON(ptr == NULL);
457
458 state->td_scale =
459 state->flags = 0;
460
461 while (length > 0) {
462 int opcode=*ptr++;
463 int opsize;
464
465 switch (opcode) {
466 case TCPOPT_EOL:
467 return;
468 case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
469 length--;
470 continue;
471 default:
472 opsize=*ptr++;
473 if (opsize < 2) /* "silly options" */
474 return;
475 if (opsize > length)
476 break; /* don't parse partial options */
477
478 if (opcode == TCPOPT_SACK_PERM
479 && opsize == TCPOLEN_SACK_PERM)
480 state->flags |= IP_CT_TCP_FLAG_SACK_PERM;
481 else if (opcode == TCPOPT_WINDOW
482 && opsize == TCPOLEN_WINDOW) {
483 state->td_scale = *(u_int8_t *)ptr;
484
485 if (state->td_scale > 14) {
486 /* See RFC1323 */
487 state->td_scale = 14;
488 }
489 state->flags |=
490 IP_CT_TCP_FLAG_WINDOW_SCALE;
491 }
492 ptr += opsize - 2;
493 length -= opsize;
494 }
495 }
496}
497
498static void tcp_sack(const struct sk_buff *skb,
499 struct iphdr *iph,
500 struct tcphdr *tcph,
501 __u32 *sack)
502{
503 unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
504 unsigned char *ptr;
505 int length = (tcph->doff*4) - sizeof(struct tcphdr);
506 __u32 tmp;
507
508 if (!length)
509 return;
510
511 ptr = skb_header_pointer(skb,
512 (iph->ihl * 4) + sizeof(struct tcphdr),
513 length, buff);
514 BUG_ON(ptr == NULL);
515
516 /* Fast path for timestamp-only option */
517 if (length == TCPOLEN_TSTAMP_ALIGNED*4
518 && *(__be32 *)ptr ==
519 __constant_htonl((TCPOPT_NOP << 24)
520 | (TCPOPT_NOP << 16)
521 | (TCPOPT_TIMESTAMP << 8)
522 | TCPOLEN_TIMESTAMP))
523 return;
524
525 while (length > 0) {
526 int opcode=*ptr++;
527 int opsize, i;
528
529 switch (opcode) {
530 case TCPOPT_EOL:
531 return;
532 case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
533 length--;
534 continue;
535 default:
536 opsize=*ptr++;
537 if (opsize < 2) /* "silly options" */
538 return;
539 if (opsize > length)
540 break; /* don't parse partial options */
541
542 if (opcode == TCPOPT_SACK
543 && opsize >= (TCPOLEN_SACK_BASE
544 + TCPOLEN_SACK_PERBLOCK)
545 && !((opsize - TCPOLEN_SACK_BASE)
546 % TCPOLEN_SACK_PERBLOCK)) {
547 for (i = 0;
548 i < (opsize - TCPOLEN_SACK_BASE);
549 i += TCPOLEN_SACK_PERBLOCK) {
550 tmp = ntohl(*((__be32 *)(ptr+i)+1));
551
552 if (after(tmp, *sack))
553 *sack = tmp;
554 }
555 return;
556 }
557 ptr += opsize - 2;
558 length -= opsize;
559 }
560 }
561}
562
563static int tcp_in_window(struct ip_ct_tcp *state,
564 enum ip_conntrack_dir dir,
565 unsigned int index,
566 const struct sk_buff *skb,
567 struct iphdr *iph,
568 struct tcphdr *tcph)
569{
570 struct ip_ct_tcp_state *sender = &state->seen[dir];
571 struct ip_ct_tcp_state *receiver = &state->seen[!dir];
572 __u32 seq, ack, sack, end, win, swin;
573 int res;
574
575 /*
576 * Get the required data from the packet.
577 */
578 seq = ntohl(tcph->seq);
579 ack = sack = ntohl(tcph->ack_seq);
580 win = ntohs(tcph->window);
581 end = segment_seq_plus_len(seq, skb->len, iph, tcph);
582
583 if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
584 tcp_sack(skb, iph, tcph, &sack);
585
586 DEBUGP("tcp_in_window: START\n");
587 DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
588 "seq=%u ack=%u sack=%u win=%u end=%u\n",
589 NIPQUAD(iph->saddr), ntohs(tcph->source),
590 NIPQUAD(iph->daddr), ntohs(tcph->dest),
591 seq, ack, sack, win, end);
592 DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
593 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
594 sender->td_end, sender->td_maxend, sender->td_maxwin,
595 sender->td_scale,
596 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
597 receiver->td_scale);
598
599 if (sender->td_end == 0) {
600 /*
601 * Initialize sender data.
602 */
603 if (tcph->syn && tcph->ack) {
604 /*
605 * Outgoing SYN-ACK in reply to a SYN.
606 */
607 sender->td_end =
608 sender->td_maxend = end;
609 sender->td_maxwin = (win == 0 ? 1 : win);
610
611 tcp_options(skb, iph, tcph, sender);
612 /*
613 * RFC 1323:
614 * Both sides must send the Window Scale option
615 * to enable window scaling in either direction.
616 */
617 if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE
618 && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE))
619 sender->td_scale =
620 receiver->td_scale = 0;
621 } else {
622 /*
623 * We are in the middle of a connection,
624 * its history is lost for us.
625 * Let's try to use the data from the packet.
626 */
627 sender->td_end = end;
628 sender->td_maxwin = (win == 0 ? 1 : win);
629 sender->td_maxend = end + sender->td_maxwin;
630 }
631 } else if (((state->state == TCP_CONNTRACK_SYN_SENT
632 && dir == IP_CT_DIR_ORIGINAL)
633 || (state->state == TCP_CONNTRACK_SYN_RECV
634 && dir == IP_CT_DIR_REPLY))
635 && after(end, sender->td_end)) {
636 /*
637 * RFC 793: "if a TCP is reinitialized ... then it need
638 * not wait at all; it must only be sure to use sequence
639 * numbers larger than those recently used."
640 */
641 sender->td_end =
642 sender->td_maxend = end;
643 sender->td_maxwin = (win == 0 ? 1 : win);
644
645 tcp_options(skb, iph, tcph, sender);
646 }
647
648 if (!(tcph->ack)) {
649 /*
650 * If there is no ACK, just pretend it was set and OK.
651 */
652 ack = sack = receiver->td_end;
653 } else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) ==
654 (TCP_FLAG_ACK|TCP_FLAG_RST))
655 && (ack == 0)) {
656 /*
657 * Broken TCP stacks, that set ACK in RST packets as well
658 * with zero ack value.
659 */
660 ack = sack = receiver->td_end;
661 }
662
663 if (seq == end
664 && (!tcph->rst
665 || (seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)))
666 /*
667 * Packets contains no data: we assume it is valid
668 * and check the ack value only.
669 * However RST segments are always validated by their
670 * SEQ number, except when seq == 0 (reset sent answering
671 * SYN.
672 */
673 seq = end = sender->td_end;
674
675 DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
676 "seq=%u ack=%u sack =%u win=%u end=%u\n",
677 NIPQUAD(iph->saddr), ntohs(tcph->source),
678 NIPQUAD(iph->daddr), ntohs(tcph->dest),
679 seq, ack, sack, win, end);
680 DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
681 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
682 sender->td_end, sender->td_maxend, sender->td_maxwin,
683 sender->td_scale,
684 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
685 receiver->td_scale);
686
687 DEBUGP("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
688 before(seq, sender->td_maxend + 1),
689 after(end, sender->td_end - receiver->td_maxwin - 1),
690 before(sack, receiver->td_end + 1),
691 after(ack, receiver->td_end - MAXACKWINDOW(sender)));
692
693 if (before(seq, sender->td_maxend + 1) &&
694 after(end, sender->td_end - receiver->td_maxwin - 1) &&
695 before(sack, receiver->td_end + 1) &&
696 after(ack, receiver->td_end - MAXACKWINDOW(sender))) {
697 /*
698 * Take into account window scaling (RFC 1323).
699 */
700 if (!tcph->syn)
701 win <<= sender->td_scale;
702
703 /*
704 * Update sender data.
705 */
706 swin = win + (sack - ack);
707 if (sender->td_maxwin < swin)
708 sender->td_maxwin = swin;
709 if (after(end, sender->td_end))
710 sender->td_end = end;
711 /*
712 * Update receiver data.
713 */
714 if (after(end, sender->td_maxend))
715 receiver->td_maxwin += end - sender->td_maxend;
716 if (after(sack + win, receiver->td_maxend - 1)) {
717 receiver->td_maxend = sack + win;
718 if (win == 0)
719 receiver->td_maxend++;
720 }
721
722 /*
723 * Check retransmissions.
724 */
725 if (index == TCP_ACK_SET) {
726 if (state->last_dir == dir
727 && state->last_seq == seq
728 && state->last_ack == ack
729 && state->last_end == end
730 && state->last_win == win)
731 state->retrans++;
732 else {
733 state->last_dir = dir;
734 state->last_seq = seq;
735 state->last_ack = ack;
736 state->last_end = end;
737 state->last_win = win;
738 state->retrans = 0;
739 }
740 }
741 res = 1;
742 } else {
743 res = 0;
744 if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL ||
745 ip_ct_tcp_be_liberal)
746 res = 1;
747 if (!res && LOG_INVALID(IPPROTO_TCP))
748 nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
749 "ip_ct_tcp: %s ",
750 before(seq, sender->td_maxend + 1) ?
751 after(end, sender->td_end - receiver->td_maxwin - 1) ?
752 before(sack, receiver->td_end + 1) ?
753 after(ack, receiver->td_end - MAXACKWINDOW(sender)) ? "BUG"
754 : "ACK is under the lower bound (possible overly delayed ACK)"
755 : "ACK is over the upper bound (ACKed data not seen yet)"
756 : "SEQ is under the lower bound (already ACKed data retransmitted)"
757 : "SEQ is over the upper bound (over the window of the receiver)");
758 }
759
760 DEBUGP("tcp_in_window: res=%i sender end=%u maxend=%u maxwin=%u "
761 "receiver end=%u maxend=%u maxwin=%u\n",
762 res, sender->td_end, sender->td_maxend, sender->td_maxwin,
763 receiver->td_end, receiver->td_maxend, receiver->td_maxwin);
764
765 return res;
766}
767
768#ifdef CONFIG_IP_NF_NAT_NEEDED
769/* Update sender->td_end after NAT successfully mangled the packet */
770void ip_conntrack_tcp_update(struct sk_buff *skb,
771 struct ip_conntrack *conntrack,
772 enum ip_conntrack_dir dir)
773{
774 struct iphdr *iph = skb->nh.iph;
775 struct tcphdr *tcph = (void *)skb->nh.iph + skb->nh.iph->ihl*4;
776 __u32 end;
777#ifdef DEBUGP_VARS
778 struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[dir];
779 struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[!dir];
780#endif
781
782 end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, iph, tcph);
783
784 write_lock_bh(&tcp_lock);
785 /*
786 * We have to worry for the ack in the reply packet only...
787 */
788 if (after(end, conntrack->proto.tcp.seen[dir].td_end))
789 conntrack->proto.tcp.seen[dir].td_end = end;
790 conntrack->proto.tcp.last_end = end;
791 write_unlock_bh(&tcp_lock);
792 DEBUGP("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i "
793 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
794 sender->td_end, sender->td_maxend, sender->td_maxwin,
795 sender->td_scale,
796 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
797 receiver->td_scale);
798}
799
800#endif
801
802#define TH_FIN 0x01
803#define TH_SYN 0x02
804#define TH_RST 0x04
805#define TH_PUSH 0x08
806#define TH_ACK 0x10
807#define TH_URG 0x20
808#define TH_ECE 0x40
809#define TH_CWR 0x80
810
811/* table of valid flag combinations - ECE and CWR are always valid */
812static const u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG) + 1] =
813{
814 [TH_SYN] = 1,
815 [TH_SYN|TH_PUSH] = 1,
816 [TH_SYN|TH_URG] = 1,
817 [TH_SYN|TH_PUSH|TH_URG] = 1,
818 [TH_SYN|TH_ACK] = 1,
819 [TH_SYN|TH_ACK|TH_PUSH] = 1,
820 [TH_RST] = 1,
821 [TH_RST|TH_ACK] = 1,
822 [TH_RST|TH_ACK|TH_PUSH] = 1,
823 [TH_FIN|TH_ACK] = 1,
824 [TH_ACK] = 1,
825 [TH_ACK|TH_PUSH] = 1,
826 [TH_ACK|TH_URG] = 1,
827 [TH_ACK|TH_URG|TH_PUSH] = 1,
828 [TH_FIN|TH_ACK|TH_PUSH] = 1,
829 [TH_FIN|TH_ACK|TH_URG] = 1,
830 [TH_FIN|TH_ACK|TH_URG|TH_PUSH] = 1,
831};
832
833/* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */
834static int tcp_error(struct sk_buff *skb,
835 enum ip_conntrack_info *ctinfo,
836 unsigned int hooknum)
837{
838 struct iphdr *iph = skb->nh.iph;
839 struct tcphdr _tcph, *th;
840 unsigned int tcplen = skb->len - iph->ihl * 4;
841 u_int8_t tcpflags;
842
843 /* Smaller that minimal TCP header? */
844 th = skb_header_pointer(skb, iph->ihl * 4,
845 sizeof(_tcph), &_tcph);
846 if (th == NULL) {
847 if (LOG_INVALID(IPPROTO_TCP))
848 nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
849 "ip_ct_tcp: short packet ");
850 return -NF_ACCEPT;
851 }
852
853 /* Not whole TCP header or malformed packet */
854 if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
855 if (LOG_INVALID(IPPROTO_TCP))
856 nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
857 "ip_ct_tcp: truncated/malformed packet ");
858 return -NF_ACCEPT;
859 }
860
861 /* Checksum invalid? Ignore.
862 * We skip checking packets on the outgoing path
863 * because it is assumed to be correct.
864 */
865 /* FIXME: Source route IP option packets --RR */
866 if (ip_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING &&
867 nf_ip_checksum(skb, hooknum, iph->ihl * 4, IPPROTO_TCP)) {
868 if (LOG_INVALID(IPPROTO_TCP))
869 nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
870 "ip_ct_tcp: bad TCP checksum ");
871 return -NF_ACCEPT;
872 }
873
874 /* Check TCP flags. */
875 tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR));
876 if (!tcp_valid_flags[tcpflags]) {
877 if (LOG_INVALID(IPPROTO_TCP))
878 nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
879 "ip_ct_tcp: invalid TCP flag combination ");
880 return -NF_ACCEPT;
881 }
882
883 return NF_ACCEPT;
884}
885
886/* Returns verdict for packet, or -1 for invalid. */
887static int tcp_packet(struct ip_conntrack *conntrack,
888 const struct sk_buff *skb,
889 enum ip_conntrack_info ctinfo)
890{
891 enum tcp_conntrack new_state, old_state;
892 enum ip_conntrack_dir dir;
893 struct iphdr *iph = skb->nh.iph;
894 struct tcphdr *th, _tcph;
895 unsigned long timeout;
896 unsigned int index;
897
898 th = skb_header_pointer(skb, iph->ihl * 4,
899 sizeof(_tcph), &_tcph);
900 BUG_ON(th == NULL);
901
902 write_lock_bh(&tcp_lock);
903 old_state = conntrack->proto.tcp.state;
904 dir = CTINFO2DIR(ctinfo);
905 index = get_conntrack_index(th);
906 new_state = tcp_conntracks[dir][index][old_state];
907
908 switch (new_state) {
909 case TCP_CONNTRACK_IGNORE:
910 /* Ignored packets:
911 *
912 * a) SYN in ORIGINAL
913 * b) SYN/ACK in REPLY
914 * c) ACK in reply direction after initial SYN in original.
915 */
916 if (index == TCP_SYNACK_SET
917 && conntrack->proto.tcp.last_index == TCP_SYN_SET
918 && conntrack->proto.tcp.last_dir != dir
919 && ntohl(th->ack_seq) ==
920 conntrack->proto.tcp.last_end) {
921 /* This SYN/ACK acknowledges a SYN that we earlier
922 * ignored as invalid. This means that the client and
923 * the server are both in sync, while the firewall is
924 * not. We kill this session and block the SYN/ACK so
925 * that the client cannot but retransmit its SYN and
926 * thus initiate a clean new session.
927 */
928 write_unlock_bh(&tcp_lock);
929 if (LOG_INVALID(IPPROTO_TCP))
930 nf_log_packet(PF_INET, 0, skb, NULL, NULL,
931 NULL, "ip_ct_tcp: "
932 "killing out of sync session ");
933 if (del_timer(&conntrack->timeout))
934 conntrack->timeout.function((unsigned long)
935 conntrack);
936 return -NF_DROP;
937 }
938 conntrack->proto.tcp.last_index = index;
939 conntrack->proto.tcp.last_dir = dir;
940 conntrack->proto.tcp.last_seq = ntohl(th->seq);
941 conntrack->proto.tcp.last_end =
942 segment_seq_plus_len(ntohl(th->seq), skb->len, iph, th);
943
944 write_unlock_bh(&tcp_lock);
945 if (LOG_INVALID(IPPROTO_TCP))
946 nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
947 "ip_ct_tcp: invalid packet ignored ");
948 return NF_ACCEPT;
949 case TCP_CONNTRACK_MAX:
950 /* Invalid packet */
951 DEBUGP("ip_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
952 dir, get_conntrack_index(th),
953 old_state);
954 write_unlock_bh(&tcp_lock);
955 if (LOG_INVALID(IPPROTO_TCP))
956 nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
957 "ip_ct_tcp: invalid state ");
958 return -NF_ACCEPT;
959 case TCP_CONNTRACK_SYN_SENT:
960 if (old_state < TCP_CONNTRACK_TIME_WAIT)
961 break;
962 if ((conntrack->proto.tcp.seen[dir].flags &
963 IP_CT_TCP_FLAG_CLOSE_INIT)
964 || after(ntohl(th->seq),
965 conntrack->proto.tcp.seen[dir].td_end)) {
966 /* Attempt to reopen a closed connection.
967 * Delete this connection and look up again. */
968 write_unlock_bh(&tcp_lock);
969 if (del_timer(&conntrack->timeout))
970 conntrack->timeout.function((unsigned long)
971 conntrack);
972 return -NF_REPEAT;
973 } else {
974 write_unlock_bh(&tcp_lock);
975 if (LOG_INVALID(IPPROTO_TCP))
976 nf_log_packet(PF_INET, 0, skb, NULL, NULL,
977 NULL, "ip_ct_tcp: invalid SYN");
978 return -NF_ACCEPT;
979 }
980 case TCP_CONNTRACK_CLOSE:
981 if (index == TCP_RST_SET
982 && ((test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)
983 && conntrack->proto.tcp.last_index == TCP_SYN_SET)
984 || (!test_bit(IPS_ASSURED_BIT, &conntrack->status)
985 && conntrack->proto.tcp.last_index == TCP_ACK_SET))
986 && ntohl(th->ack_seq) == conntrack->proto.tcp.last_end) {
987 /* RST sent to invalid SYN or ACK we had let through
988 * at a) and c) above:
989 *
990 * a) SYN was in window then
991 * c) we hold a half-open connection.
992 *
993 * Delete our connection entry.
994 * We skip window checking, because packet might ACK
995 * segments we ignored. */
996 goto in_window;
997 }
998 /* Just fall through */
999 default:
1000 /* Keep compilers happy. */
1001 break;
1002 }
1003
1004 if (!tcp_in_window(&conntrack->proto.tcp, dir, index,
1005 skb, iph, th)) {
1006 write_unlock_bh(&tcp_lock);
1007 return -NF_ACCEPT;
1008 }
1009 in_window:
1010 /* From now on we have got in-window packets */
1011 conntrack->proto.tcp.last_index = index;
1012
1013 DEBUGP("tcp_conntracks: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
1014 "syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n",
1015 NIPQUAD(iph->saddr), ntohs(th->source),
1016 NIPQUAD(iph->daddr), ntohs(th->dest),
1017 (th->syn ? 1 : 0), (th->ack ? 1 : 0),
1018 (th->fin ? 1 : 0), (th->rst ? 1 : 0),
1019 old_state, new_state);
1020
1021 conntrack->proto.tcp.state = new_state;
1022 if (old_state != new_state
1023 && (new_state == TCP_CONNTRACK_FIN_WAIT
1024 || new_state == TCP_CONNTRACK_CLOSE))
1025 conntrack->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
1026 timeout = conntrack->proto.tcp.retrans >= ip_ct_tcp_max_retrans
1027 && *tcp_timeouts[new_state] > ip_ct_tcp_timeout_max_retrans
1028 ? ip_ct_tcp_timeout_max_retrans : *tcp_timeouts[new_state];
1029 write_unlock_bh(&tcp_lock);
1030
1031 ip_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
1032 if (new_state != old_state)
1033 ip_conntrack_event_cache(IPCT_PROTOINFO, skb);
1034
1035 if (!test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) {
1036 /* If only reply is a RST, we can consider ourselves not to
1037 have an established connection: this is a fairly common
1038 problem case, so we can delete the conntrack
1039 immediately. --RR */
1040 if (th->rst) {
1041 if (del_timer(&conntrack->timeout))
1042 conntrack->timeout.function((unsigned long)
1043 conntrack);
1044 return NF_ACCEPT;
1045 }
1046 } else if (!test_bit(IPS_ASSURED_BIT, &conntrack->status)
1047 && (old_state == TCP_CONNTRACK_SYN_RECV
1048 || old_state == TCP_CONNTRACK_ESTABLISHED)
1049 && new_state == TCP_CONNTRACK_ESTABLISHED) {
1050 /* Set ASSURED if we see see valid ack in ESTABLISHED
1051 after SYN_RECV or a valid answer for a picked up
1052 connection. */
1053 set_bit(IPS_ASSURED_BIT, &conntrack->status);
1054 ip_conntrack_event_cache(IPCT_STATUS, skb);
1055 }
1056 ip_ct_refresh_acct(conntrack, ctinfo, skb, timeout);
1057
1058 return NF_ACCEPT;
1059}
1060
1061/* Called when a new connection for this protocol found. */
1062static int tcp_new(struct ip_conntrack *conntrack,
1063 const struct sk_buff *skb)
1064{
1065 enum tcp_conntrack new_state;
1066 struct iphdr *iph = skb->nh.iph;
1067 struct tcphdr *th, _tcph;
1068#ifdef DEBUGP_VARS
1069 struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[0];
1070 struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[1];
1071#endif
1072
1073 th = skb_header_pointer(skb, iph->ihl * 4,
1074 sizeof(_tcph), &_tcph);
1075 BUG_ON(th == NULL);
1076
1077 /* Don't need lock here: this conntrack not in circulation yet */
1078 new_state
1079 = tcp_conntracks[0][get_conntrack_index(th)]
1080 [TCP_CONNTRACK_NONE];
1081
1082 /* Invalid: delete conntrack */
1083 if (new_state >= TCP_CONNTRACK_MAX) {
1084 DEBUGP("ip_ct_tcp: invalid new deleting.\n");
1085 return 0;
1086 }
1087
1088 if (new_state == TCP_CONNTRACK_SYN_SENT) {
1089 /* SYN packet */
1090 conntrack->proto.tcp.seen[0].td_end =
1091 segment_seq_plus_len(ntohl(th->seq), skb->len,
1092 iph, th);
1093 conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1094 if (conntrack->proto.tcp.seen[0].td_maxwin == 0)
1095 conntrack->proto.tcp.seen[0].td_maxwin = 1;
1096 conntrack->proto.tcp.seen[0].td_maxend =
1097 conntrack->proto.tcp.seen[0].td_end;
1098
1099 tcp_options(skb, iph, th, &conntrack->proto.tcp.seen[0]);
1100 conntrack->proto.tcp.seen[1].flags = 0;
1101 } else if (ip_ct_tcp_loose == 0) {
1102 /* Don't try to pick up connections. */
1103 return 0;
1104 } else {
1105 /*
1106 * We are in the middle of a connection,
1107 * its history is lost for us.
1108 * Let's try to use the data from the packet.
1109 */
1110 conntrack->proto.tcp.seen[0].td_end =
1111 segment_seq_plus_len(ntohl(th->seq), skb->len,
1112 iph, th);
1113 conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1114 if (conntrack->proto.tcp.seen[0].td_maxwin == 0)
1115 conntrack->proto.tcp.seen[0].td_maxwin = 1;
1116 conntrack->proto.tcp.seen[0].td_maxend =
1117 conntrack->proto.tcp.seen[0].td_end +
1118 conntrack->proto.tcp.seen[0].td_maxwin;
1119 conntrack->proto.tcp.seen[0].td_scale = 0;
1120
1121 /* We assume SACK and liberal window checking to handle
1122 * window scaling */
1123 conntrack->proto.tcp.seen[0].flags =
1124 conntrack->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM |
1125 IP_CT_TCP_FLAG_BE_LIBERAL;
1126 }
1127
1128 conntrack->proto.tcp.seen[1].td_end = 0;
1129 conntrack->proto.tcp.seen[1].td_maxend = 0;
1130 conntrack->proto.tcp.seen[1].td_maxwin = 1;
1131 conntrack->proto.tcp.seen[1].td_scale = 0;
1132
1133 /* tcp_packet will set them */
1134 conntrack->proto.tcp.state = TCP_CONNTRACK_NONE;
1135 conntrack->proto.tcp.last_index = TCP_NONE_SET;
1136
1137 DEBUGP("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
1138 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
1139 sender->td_end, sender->td_maxend, sender->td_maxwin,
1140 sender->td_scale,
1141 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
1142 receiver->td_scale);
1143 return 1;
1144}
1145
1146struct ip_conntrack_protocol ip_conntrack_protocol_tcp =
1147{
1148 .proto = IPPROTO_TCP,
1149 .name = "tcp",
1150 .pkt_to_tuple = tcp_pkt_to_tuple,
1151 .invert_tuple = tcp_invert_tuple,
1152 .print_tuple = tcp_print_tuple,
1153 .print_conntrack = tcp_print_conntrack,
1154 .packet = tcp_packet,
1155 .new = tcp_new,
1156 .error = tcp_error,
1157#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
1158 defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
1159 .to_nfattr = tcp_to_nfattr,
1160 .from_nfattr = nfattr_to_tcp,
1161 .tuple_to_nfattr = ip_ct_port_tuple_to_nfattr,
1162 .nfattr_to_tuple = ip_ct_port_nfattr_to_tuple,
1163#endif
1164};
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
deleted file mode 100644
index 14c30c646c7f..000000000000
--- a/net/ipv4/netfilter/ip_conntrack_proto_udp.c
+++ /dev/null
@@ -1,148 +0,0 @@
1/* (C) 1999-2001 Paul `Rusty' Russell
2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#include <linux/types.h>
10#include <linux/timer.h>
11#include <linux/netfilter.h>
12#include <linux/in.h>
13#include <linux/ip.h>
14#include <linux/udp.h>
15#include <linux/seq_file.h>
16#include <net/checksum.h>
17#include <linux/netfilter_ipv4.h>
18#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
19
20unsigned int ip_ct_udp_timeout __read_mostly = 30*HZ;
21unsigned int ip_ct_udp_timeout_stream __read_mostly = 180*HZ;
22
23static int udp_pkt_to_tuple(const struct sk_buff *skb,
24 unsigned int dataoff,
25 struct ip_conntrack_tuple *tuple)
26{
27 struct udphdr _hdr, *hp;
28
29 /* Actually only need first 8 bytes. */
30 hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
31 if (hp == NULL)
32 return 0;
33
34 tuple->src.u.udp.port = hp->source;
35 tuple->dst.u.udp.port = hp->dest;
36
37 return 1;
38}
39
40static int udp_invert_tuple(struct ip_conntrack_tuple *tuple,
41 const struct ip_conntrack_tuple *orig)
42{
43 tuple->src.u.udp.port = orig->dst.u.udp.port;
44 tuple->dst.u.udp.port = orig->src.u.udp.port;
45 return 1;
46}
47
48/* Print out the per-protocol part of the tuple. */
49static int udp_print_tuple(struct seq_file *s,
50 const struct ip_conntrack_tuple *tuple)
51{
52 return seq_printf(s, "sport=%hu dport=%hu ",
53 ntohs(tuple->src.u.udp.port),
54 ntohs(tuple->dst.u.udp.port));
55}
56
57/* Print out the private part of the conntrack. */
58static int udp_print_conntrack(struct seq_file *s,
59 const struct ip_conntrack *conntrack)
60{
61 return 0;
62}
63
64/* Returns verdict for packet, and may modify conntracktype */
65static int udp_packet(struct ip_conntrack *conntrack,
66 const struct sk_buff *skb,
67 enum ip_conntrack_info ctinfo)
68{
69 /* If we've seen traffic both ways, this is some kind of UDP
70 stream. Extend timeout. */
71 if (test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) {
72 ip_ct_refresh_acct(conntrack, ctinfo, skb,
73 ip_ct_udp_timeout_stream);
74 /* Also, more likely to be important, and not a probe */
75 if (!test_and_set_bit(IPS_ASSURED_BIT, &conntrack->status))
76 ip_conntrack_event_cache(IPCT_STATUS, skb);
77 } else
78 ip_ct_refresh_acct(conntrack, ctinfo, skb, ip_ct_udp_timeout);
79
80 return NF_ACCEPT;
81}
82
83/* Called when a new connection for this protocol found. */
84static int udp_new(struct ip_conntrack *conntrack, const struct sk_buff *skb)
85{
86 return 1;
87}
88
89static int udp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo,
90 unsigned int hooknum)
91{
92 struct iphdr *iph = skb->nh.iph;
93 unsigned int udplen = skb->len - iph->ihl * 4;
94 struct udphdr _hdr, *hdr;
95
96 /* Header is too small? */
97 hdr = skb_header_pointer(skb, iph->ihl*4, sizeof(_hdr), &_hdr);
98 if (hdr == NULL) {
99 if (LOG_INVALID(IPPROTO_UDP))
100 nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
101 "ip_ct_udp: short packet ");
102 return -NF_ACCEPT;
103 }
104
105 /* Truncated/malformed packets */
106 if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) {
107 if (LOG_INVALID(IPPROTO_UDP))
108 nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
109 "ip_ct_udp: truncated/malformed packet ");
110 return -NF_ACCEPT;
111 }
112
113 /* Packet with no checksum */
114 if (!hdr->check)
115 return NF_ACCEPT;
116
117 /* Checksum invalid? Ignore.
118 * We skip checking packets on the outgoing path
119 * because the checksum is assumed to be correct.
120 * FIXME: Source route IP option packets --RR */
121 if (ip_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING &&
122 nf_ip_checksum(skb, hooknum, iph->ihl * 4, IPPROTO_UDP)) {
123 if (LOG_INVALID(IPPROTO_UDP))
124 nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
125 "ip_ct_udp: bad UDP checksum ");
126 return -NF_ACCEPT;
127 }
128
129 return NF_ACCEPT;
130}
131
132struct ip_conntrack_protocol ip_conntrack_protocol_udp =
133{
134 .proto = IPPROTO_UDP,
135 .name = "udp",
136 .pkt_to_tuple = udp_pkt_to_tuple,
137 .invert_tuple = udp_invert_tuple,
138 .print_tuple = udp_print_tuple,
139 .print_conntrack = udp_print_conntrack,
140 .packet = udp_packet,
141 .new = udp_new,
142 .error = udp_error,
143#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
144 defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
145 .tuple_to_nfattr = ip_ct_port_tuple_to_nfattr,
146 .nfattr_to_tuple = ip_ct_port_nfattr_to_tuple,
147#endif
148};
diff --git a/net/ipv4/netfilter/ip_conntrack_sip.c b/net/ipv4/netfilter/ip_conntrack_sip.c
deleted file mode 100644
index c59a962c1f61..000000000000
--- a/net/ipv4/netfilter/ip_conntrack_sip.c
+++ /dev/null
@@ -1,520 +0,0 @@
1/* SIP extension for IP connection tracking.
2 *
3 * (C) 2005 by Christian Hentschel <chentschel@arnet.com.ar>
4 * based on RR's ip_conntrack_ftp.c and other modules.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include <linux/module.h>
12#include <linux/ctype.h>
13#include <linux/skbuff.h>
14#include <linux/in.h>
15#include <linux/ip.h>
16#include <linux/udp.h>
17
18#include <linux/netfilter.h>
19#include <linux/netfilter_ipv4.h>
20#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
21#include <linux/netfilter_ipv4/ip_conntrack_sip.h>
22
23#if 0
24#define DEBUGP printk
25#else
26#define DEBUGP(format, args...)
27#endif
28
29MODULE_LICENSE("GPL");
30MODULE_AUTHOR("Christian Hentschel <chentschel@arnet.com.ar>");
31MODULE_DESCRIPTION("SIP connection tracking helper");
32
33#define MAX_PORTS 8
34static unsigned short ports[MAX_PORTS];
35static int ports_c;
36module_param_array(ports, ushort, &ports_c, 0400);
37MODULE_PARM_DESC(ports, "port numbers of sip servers");
38
39static unsigned int sip_timeout = SIP_TIMEOUT;
40module_param(sip_timeout, uint, 0600);
41MODULE_PARM_DESC(sip_timeout, "timeout for the master SIP session");
42
43unsigned int (*ip_nat_sip_hook)(struct sk_buff **pskb,
44 enum ip_conntrack_info ctinfo,
45 struct ip_conntrack *ct,
46 const char **dptr);
47EXPORT_SYMBOL_GPL(ip_nat_sip_hook);
48
49unsigned int (*ip_nat_sdp_hook)(struct sk_buff **pskb,
50 enum ip_conntrack_info ctinfo,
51 struct ip_conntrack_expect *exp,
52 const char *dptr);
53EXPORT_SYMBOL_GPL(ip_nat_sdp_hook);
54
55static int digits_len(const char *dptr, const char *limit, int *shift);
56static int epaddr_len(const char *dptr, const char *limit, int *shift);
57static int skp_digits_len(const char *dptr, const char *limit, int *shift);
58static int skp_epaddr_len(const char *dptr, const char *limit, int *shift);
59
60struct sip_header_nfo {
61 const char *lname;
62 const char *sname;
63 const char *ln_str;
64 size_t lnlen;
65 size_t snlen;
66 size_t ln_strlen;
67 int case_sensitive;
68 int (*match_len)(const char *, const char *, int *);
69};
70
71static struct sip_header_nfo ct_sip_hdrs[] = {
72 [POS_REG_REQ_URI] = { /* SIP REGISTER request URI */
73 .lname = "sip:",
74 .lnlen = sizeof("sip:") - 1,
75 .ln_str = ":",
76 .ln_strlen = sizeof(":") - 1,
77 .match_len = epaddr_len
78 },
79 [POS_REQ_URI] = { /* SIP request URI */
80 .lname = "sip:",
81 .lnlen = sizeof("sip:") - 1,
82 .ln_str = "@",
83 .ln_strlen = sizeof("@") - 1,
84 .match_len = epaddr_len
85 },
86 [POS_FROM] = { /* SIP From header */
87 .lname = "From:",
88 .lnlen = sizeof("From:") - 1,
89 .sname = "\r\nf:",
90 .snlen = sizeof("\r\nf:") - 1,
91 .ln_str = "sip:",
92 .ln_strlen = sizeof("sip:") - 1,
93 .match_len = skp_epaddr_len,
94 },
95 [POS_TO] = { /* SIP To header */
96 .lname = "To:",
97 .lnlen = sizeof("To:") - 1,
98 .sname = "\r\nt:",
99 .snlen = sizeof("\r\nt:") - 1,
100 .ln_str = "sip:",
101 .ln_strlen = sizeof("sip:") - 1,
102 .match_len = skp_epaddr_len,
103 },
104 [POS_VIA] = { /* SIP Via header */
105 .lname = "Via:",
106 .lnlen = sizeof("Via:") - 1,
107 .sname = "\r\nv:",
108 .snlen = sizeof("\r\nv:") - 1, /* rfc3261 "\r\n" */
109 .ln_str = "UDP ",
110 .ln_strlen = sizeof("UDP ") - 1,
111 .match_len = epaddr_len,
112 },
113 [POS_CONTACT] = { /* SIP Contact header */
114 .lname = "Contact:",
115 .lnlen = sizeof("Contact:") - 1,
116 .sname = "\r\nm:",
117 .snlen = sizeof("\r\nm:") - 1,
118 .ln_str = "sip:",
119 .ln_strlen = sizeof("sip:") - 1,
120 .match_len = skp_epaddr_len
121 },
122 [POS_CONTENT] = { /* SIP Content length header */
123 .lname = "Content-Length:",
124 .lnlen = sizeof("Content-Length:") - 1,
125 .sname = "\r\nl:",
126 .snlen = sizeof("\r\nl:") - 1,
127 .ln_str = ":",
128 .ln_strlen = sizeof(":") - 1,
129 .match_len = skp_digits_len
130 },
131 [POS_MEDIA] = { /* SDP media info */
132 .case_sensitive = 1,
133 .lname = "\nm=",
134 .lnlen = sizeof("\nm=") - 1,
135 .sname = "\rm=",
136 .snlen = sizeof("\rm=") - 1,
137 .ln_str = "audio ",
138 .ln_strlen = sizeof("audio ") - 1,
139 .match_len = digits_len
140 },
141 [POS_OWNER] = { /* SDP owner address*/
142 .case_sensitive = 1,
143 .lname = "\no=",
144 .lnlen = sizeof("\no=") - 1,
145 .sname = "\ro=",
146 .snlen = sizeof("\ro=") - 1,
147 .ln_str = "IN IP4 ",
148 .ln_strlen = sizeof("IN IP4 ") - 1,
149 .match_len = epaddr_len
150 },
151 [POS_CONNECTION] = { /* SDP connection info */
152 .case_sensitive = 1,
153 .lname = "\nc=",
154 .lnlen = sizeof("\nc=") - 1,
155 .sname = "\rc=",
156 .snlen = sizeof("\rc=") - 1,
157 .ln_str = "IN IP4 ",
158 .ln_strlen = sizeof("IN IP4 ") - 1,
159 .match_len = epaddr_len
160 },
161 [POS_SDP_HEADER] = { /* SDP version header */
162 .case_sensitive = 1,
163 .lname = "\nv=",
164 .lnlen = sizeof("\nv=") - 1,
165 .sname = "\rv=",
166 .snlen = sizeof("\rv=") - 1,
167 .ln_str = "=",
168 .ln_strlen = sizeof("=") - 1,
169 .match_len = digits_len
170 }
171};
172
173/* get line lenght until first CR or LF seen. */
174int ct_sip_lnlen(const char *line, const char *limit)
175{
176 const char *k = line;
177
178 while ((line <= limit) && (*line == '\r' || *line == '\n'))
179 line++;
180
181 while (line <= limit) {
182 if (*line == '\r' || *line == '\n')
183 break;
184 line++;
185 }
186 return line - k;
187}
188EXPORT_SYMBOL_GPL(ct_sip_lnlen);
189
190/* Linear string search, case sensitive. */
191const char *ct_sip_search(const char *needle, const char *haystack,
192 size_t needle_len, size_t haystack_len,
193 int case_sensitive)
194{
195 const char *limit = haystack + (haystack_len - needle_len);
196
197 while (haystack <= limit) {
198 if (case_sensitive) {
199 if (strncmp(haystack, needle, needle_len) == 0)
200 return haystack;
201 } else {
202 if (strnicmp(haystack, needle, needle_len) == 0)
203 return haystack;
204 }
205 haystack++;
206 }
207 return NULL;
208}
209EXPORT_SYMBOL_GPL(ct_sip_search);
210
211static int digits_len(const char *dptr, const char *limit, int *shift)
212{
213 int len = 0;
214 while (dptr <= limit && isdigit(*dptr)) {
215 dptr++;
216 len++;
217 }
218 return len;
219}
220
221/* get digits lenght, skiping blank spaces. */
222static int skp_digits_len(const char *dptr, const char *limit, int *shift)
223{
224 for (; dptr <= limit && *dptr == ' '; dptr++)
225 (*shift)++;
226
227 return digits_len(dptr, limit, shift);
228}
229
230/* Simple ipaddr parser.. */
231static int parse_ipaddr(const char *cp, const char **endp,
232 __be32 *ipaddr, const char *limit)
233{
234 unsigned long int val;
235 int i, digit = 0;
236
237 for (i = 0, *ipaddr = 0; cp <= limit && i < 4; i++) {
238 digit = 0;
239 if (!isdigit(*cp))
240 break;
241
242 val = simple_strtoul(cp, (char **)&cp, 10);
243 if (val > 0xFF)
244 return -1;
245
246 ((u_int8_t *)ipaddr)[i] = val;
247 digit = 1;
248
249 if (*cp != '.')
250 break;
251 cp++;
252 }
253 if (!digit)
254 return -1;
255
256 if (endp)
257 *endp = cp;
258
259 return 0;
260}
261
262/* skip ip address. returns it lenght. */
263static int epaddr_len(const char *dptr, const char *limit, int *shift)
264{
265 const char *aux = dptr;
266 __be32 ip;
267
268 if (parse_ipaddr(dptr, &dptr, &ip, limit) < 0) {
269 DEBUGP("ip: %s parse failed.!\n", dptr);
270 return 0;
271 }
272
273 /* Port number */
274 if (*dptr == ':') {
275 dptr++;
276 dptr += digits_len(dptr, limit, shift);
277 }
278 return dptr - aux;
279}
280
281/* get address length, skiping user info. */
282static int skp_epaddr_len(const char *dptr, const char *limit, int *shift)
283{
284 int s = *shift;
285
286 /* Search for @, but stop at the end of the line.
287 * We are inside a sip: URI, so we don't need to worry about
288 * continuation lines. */
289 while (dptr <= limit &&
290 *dptr != '@' && *dptr != '\r' && *dptr != '\n') {
291 (*shift)++;
292 dptr++;
293 }
294
295 if (dptr <= limit && *dptr == '@') {
296 dptr++;
297 (*shift)++;
298 } else
299 *shift = s;
300
301 return epaddr_len(dptr, limit, shift);
302}
303
304/* Returns 0 if not found, -1 error parsing. */
305int ct_sip_get_info(const char *dptr, size_t dlen,
306 unsigned int *matchoff,
307 unsigned int *matchlen,
308 enum sip_header_pos pos)
309{
310 struct sip_header_nfo *hnfo = &ct_sip_hdrs[pos];
311 const char *limit, *aux, *k = dptr;
312 int shift = 0;
313
314 limit = dptr + (dlen - hnfo->lnlen);
315
316 while (dptr <= limit) {
317 if ((strncmp(dptr, hnfo->lname, hnfo->lnlen) != 0) &&
318 (hnfo->sname == NULL ||
319 strncmp(dptr, hnfo->sname, hnfo->snlen) != 0)) {
320 dptr++;
321 continue;
322 }
323 aux = ct_sip_search(hnfo->ln_str, dptr, hnfo->ln_strlen,
324 ct_sip_lnlen(dptr, limit),
325 hnfo->case_sensitive);
326 if (!aux) {
327 DEBUGP("'%s' not found in '%s'.\n", hnfo->ln_str,
328 hnfo->lname);
329 return -1;
330 }
331 aux += hnfo->ln_strlen;
332
333 *matchlen = hnfo->match_len(aux, limit, &shift);
334 if (!*matchlen)
335 return -1;
336
337 *matchoff = (aux - k) + shift;
338
339 DEBUGP("%s match succeeded! - len: %u\n", hnfo->lname,
340 *matchlen);
341 return 1;
342 }
343 DEBUGP("%s header not found.\n", hnfo->lname);
344 return 0;
345}
346EXPORT_SYMBOL_GPL(ct_sip_get_info);
347
348static int set_expected_rtp(struct sk_buff **pskb,
349 struct ip_conntrack *ct,
350 enum ip_conntrack_info ctinfo,
351 __be32 ipaddr, u_int16_t port,
352 const char *dptr)
353{
354 struct ip_conntrack_expect *exp;
355 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
356 int ret;
357 typeof(ip_nat_sdp_hook) ip_nat_sdp;
358
359 exp = ip_conntrack_expect_alloc(ct);
360 if (exp == NULL)
361 return NF_DROP;
362
363 exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip;
364 exp->tuple.src.u.udp.port = 0;
365 exp->tuple.dst.ip = ipaddr;
366 exp->tuple.dst.u.udp.port = htons(port);
367 exp->tuple.dst.protonum = IPPROTO_UDP;
368
369 exp->mask.src.ip = htonl(0xFFFFFFFF);
370 exp->mask.src.u.udp.port = 0;
371 exp->mask.dst.ip = htonl(0xFFFFFFFF);
372 exp->mask.dst.u.udp.port = htons(0xFFFF);
373 exp->mask.dst.protonum = 0xFF;
374
375 exp->expectfn = NULL;
376 exp->flags = 0;
377
378 ip_nat_sdp = rcu_dereference(ip_nat_sdp_hook);
379 if (ip_nat_sdp)
380 ret = ip_nat_sdp(pskb, ctinfo, exp, dptr);
381 else {
382 if (ip_conntrack_expect_related(exp) != 0)
383 ret = NF_DROP;
384 else
385 ret = NF_ACCEPT;
386 }
387 ip_conntrack_expect_put(exp);
388
389 return ret;
390}
391
392static int sip_help(struct sk_buff **pskb,
393 struct ip_conntrack *ct,
394 enum ip_conntrack_info ctinfo)
395{
396 unsigned int dataoff, datalen;
397 const char *dptr;
398 int ret = NF_ACCEPT;
399 int matchoff, matchlen;
400 __be32 ipaddr;
401 u_int16_t port;
402 typeof(ip_nat_sip_hook) ip_nat_sip;
403
404 /* No Data ? */
405 dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
406 if (dataoff >= (*pskb)->len) {
407 DEBUGP("skb->len = %u\n", (*pskb)->len);
408 return NF_ACCEPT;
409 }
410
411 ip_ct_refresh(ct, *pskb, sip_timeout * HZ);
412
413 if (!skb_is_nonlinear(*pskb))
414 dptr = (*pskb)->data + dataoff;
415 else {
416 DEBUGP("Copy of skbuff not supported yet.\n");
417 goto out;
418 }
419
420 ip_nat_sip = rcu_dereference(ip_nat_sip_hook);
421 if (ip_nat_sip) {
422 if (!ip_nat_sip(pskb, ctinfo, ct, &dptr)) {
423 ret = NF_DROP;
424 goto out;
425 }
426 }
427
428 /* After this point NAT, could have mangled skb, so
429 we need to recalculate payload lenght. */
430 datalen = (*pskb)->len - dataoff;
431
432 if (datalen < (sizeof("SIP/2.0 200") - 1))
433 goto out;
434
435 /* RTP info only in some SDP pkts */
436 if (memcmp(dptr, "INVITE", sizeof("INVITE") - 1) != 0 &&
437 memcmp(dptr, "SIP/2.0 200", sizeof("SIP/2.0 200") - 1) != 0) {
438 goto out;
439 }
440 /* Get ip and port address from SDP packet. */
441 if (ct_sip_get_info(dptr, datalen, &matchoff, &matchlen,
442 POS_CONNECTION) > 0) {
443
444 /* We'll drop only if there are parse problems. */
445 if (parse_ipaddr(dptr + matchoff, NULL, &ipaddr,
446 dptr + datalen) < 0) {
447 ret = NF_DROP;
448 goto out;
449 }
450 if (ct_sip_get_info(dptr, datalen, &matchoff, &matchlen,
451 POS_MEDIA) > 0) {
452
453 port = simple_strtoul(dptr + matchoff, NULL, 10);
454 if (port < 1024) {
455 ret = NF_DROP;
456 goto out;
457 }
458 ret = set_expected_rtp(pskb, ct, ctinfo,
459 ipaddr, port, dptr);
460 }
461 }
462out:
463 return ret;
464}
465
466static struct ip_conntrack_helper sip[MAX_PORTS];
467static char sip_names[MAX_PORTS][10];
468
469static void fini(void)
470{
471 int i;
472 for (i = 0; i < ports_c; i++) {
473 DEBUGP("unregistering helper for port %d\n", ports[i]);
474 ip_conntrack_helper_unregister(&sip[i]);
475 }
476}
477
478static int __init init(void)
479{
480 int i, ret;
481 char *tmpname;
482
483 if (ports_c == 0)
484 ports[ports_c++] = SIP_PORT;
485
486 for (i = 0; i < ports_c; i++) {
487 /* Create helper structure */
488 memset(&sip[i], 0, sizeof(struct ip_conntrack_helper));
489
490 sip[i].tuple.dst.protonum = IPPROTO_UDP;
491 sip[i].tuple.src.u.udp.port = htons(ports[i]);
492 sip[i].mask.src.u.udp.port = htons(0xFFFF);
493 sip[i].mask.dst.protonum = 0xFF;
494 sip[i].max_expected = 2;
495 sip[i].timeout = 3 * 60; /* 3 minutes */
496 sip[i].me = THIS_MODULE;
497 sip[i].help = sip_help;
498
499 tmpname = &sip_names[i][0];
500 if (ports[i] == SIP_PORT)
501 sprintf(tmpname, "sip");
502 else
503 sprintf(tmpname, "sip-%d", i);
504 sip[i].name = tmpname;
505
506 DEBUGP("port #%d: %d\n", i, ports[i]);
507
508 ret = ip_conntrack_helper_register(&sip[i]);
509 if (ret) {
510 printk("ERROR registering helper for port %d\n",
511 ports[i]);
512 fini();
513 return ret;
514 }
515 }
516 return 0;
517}
518
519module_init(init);
520module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c
deleted file mode 100644
index 56b2f7546d1e..000000000000
--- a/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ /dev/null
@@ -1,962 +0,0 @@
1/* This file contains all the functions required for the standalone
2 ip_conntrack module.
3
4 These are not required by the compatibility layer.
5*/
6
7/* (C) 1999-2001 Paul `Rusty' Russell
8 * (C) 2002-2005 Netfilter Core Team <coreteam@netfilter.org>
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2 as
12 * published by the Free Software Foundation.
13 */
14
15#include <linux/types.h>
16#include <linux/ip.h>
17#include <linux/netfilter.h>
18#include <linux/netfilter_ipv4.h>
19#include <linux/module.h>
20#include <linux/skbuff.h>
21#include <linux/proc_fs.h>
22#include <linux/seq_file.h>
23#include <linux/percpu.h>
24#ifdef CONFIG_SYSCTL
25#include <linux/sysctl.h>
26#endif
27#include <net/checksum.h>
28#include <net/ip.h>
29#include <net/route.h>
30
31#include <linux/netfilter_ipv4/ip_conntrack.h>
32#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
33#include <linux/netfilter_ipv4/ip_conntrack_core.h>
34#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
35
36#if 0
37#define DEBUGP printk
38#else
39#define DEBUGP(format, args...)
40#endif
41
42MODULE_LICENSE("GPL");
43
44extern atomic_t ip_conntrack_count;
45DECLARE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat);
46
47static int kill_proto(struct ip_conntrack *i, void *data)
48{
49 return (i->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum ==
50 *((u_int8_t *) data));
51}
52
53#ifdef CONFIG_PROC_FS
54static int
55print_tuple(struct seq_file *s, const struct ip_conntrack_tuple *tuple,
56 struct ip_conntrack_protocol *proto)
57{
58 seq_printf(s, "src=%u.%u.%u.%u dst=%u.%u.%u.%u ",
59 NIPQUAD(tuple->src.ip), NIPQUAD(tuple->dst.ip));
60 return proto->print_tuple(s, tuple);
61}
62
63#ifdef CONFIG_IP_NF_CT_ACCT
64static unsigned int
65seq_print_counters(struct seq_file *s,
66 const struct ip_conntrack_counter *counter)
67{
68 return seq_printf(s, "packets=%llu bytes=%llu ",
69 (unsigned long long)counter->packets,
70 (unsigned long long)counter->bytes);
71}
72#else
73#define seq_print_counters(x, y) 0
74#endif
75
76struct ct_iter_state {
77 unsigned int bucket;
78};
79
80static struct list_head *ct_get_first(struct seq_file *seq)
81{
82 struct ct_iter_state *st = seq->private;
83
84 for (st->bucket = 0;
85 st->bucket < ip_conntrack_htable_size;
86 st->bucket++) {
87 if (!list_empty(&ip_conntrack_hash[st->bucket]))
88 return ip_conntrack_hash[st->bucket].next;
89 }
90 return NULL;
91}
92
93static struct list_head *ct_get_next(struct seq_file *seq, struct list_head *head)
94{
95 struct ct_iter_state *st = seq->private;
96
97 head = head->next;
98 while (head == &ip_conntrack_hash[st->bucket]) {
99 if (++st->bucket >= ip_conntrack_htable_size)
100 return NULL;
101 head = ip_conntrack_hash[st->bucket].next;
102 }
103 return head;
104}
105
106static struct list_head *ct_get_idx(struct seq_file *seq, loff_t pos)
107{
108 struct list_head *head = ct_get_first(seq);
109
110 if (head)
111 while (pos && (head = ct_get_next(seq, head)))
112 pos--;
113 return pos ? NULL : head;
114}
115
116static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
117{
118 read_lock_bh(&ip_conntrack_lock);
119 return ct_get_idx(seq, *pos);
120}
121
122static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos)
123{
124 (*pos)++;
125 return ct_get_next(s, v);
126}
127
128static void ct_seq_stop(struct seq_file *s, void *v)
129{
130 read_unlock_bh(&ip_conntrack_lock);
131}
132
133static int ct_seq_show(struct seq_file *s, void *v)
134{
135 const struct ip_conntrack_tuple_hash *hash = v;
136 const struct ip_conntrack *conntrack = tuplehash_to_ctrack(hash);
137 struct ip_conntrack_protocol *proto;
138
139 IP_NF_ASSERT(conntrack);
140
141 /* we only want to print DIR_ORIGINAL */
142 if (DIRECTION(hash))
143 return 0;
144
145 proto = __ip_conntrack_proto_find(conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum);
146 IP_NF_ASSERT(proto);
147
148 if (seq_printf(s, "%-8s %u %ld ",
149 proto->name,
150 conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum,
151 timer_pending(&conntrack->timeout)
152 ? (long)(conntrack->timeout.expires - jiffies)/HZ
153 : 0) != 0)
154 return -ENOSPC;
155
156 if (proto->print_conntrack(s, conntrack))
157 return -ENOSPC;
158
159 if (print_tuple(s, &conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
160 proto))
161 return -ENOSPC;
162
163 if (seq_print_counters(s, &conntrack->counters[IP_CT_DIR_ORIGINAL]))
164 return -ENOSPC;
165
166 if (!(test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)))
167 if (seq_printf(s, "[UNREPLIED] "))
168 return -ENOSPC;
169
170 if (print_tuple(s, &conntrack->tuplehash[IP_CT_DIR_REPLY].tuple,
171 proto))
172 return -ENOSPC;
173
174 if (seq_print_counters(s, &conntrack->counters[IP_CT_DIR_REPLY]))
175 return -ENOSPC;
176
177 if (test_bit(IPS_ASSURED_BIT, &conntrack->status))
178 if (seq_printf(s, "[ASSURED] "))
179 return -ENOSPC;
180
181#if defined(CONFIG_IP_NF_CONNTRACK_MARK)
182 if (seq_printf(s, "mark=%u ", conntrack->mark))
183 return -ENOSPC;
184#endif
185
186#ifdef CONFIG_IP_NF_CONNTRACK_SECMARK
187 if (seq_printf(s, "secmark=%u ", conntrack->secmark))
188 return -ENOSPC;
189#endif
190
191 if (seq_printf(s, "use=%u\n", atomic_read(&conntrack->ct_general.use)))
192 return -ENOSPC;
193
194 return 0;
195}
196
197static struct seq_operations ct_seq_ops = {
198 .start = ct_seq_start,
199 .next = ct_seq_next,
200 .stop = ct_seq_stop,
201 .show = ct_seq_show
202};
203
204static int ct_open(struct inode *inode, struct file *file)
205{
206 struct seq_file *seq;
207 struct ct_iter_state *st;
208 int ret;
209
210 st = kmalloc(sizeof(struct ct_iter_state), GFP_KERNEL);
211 if (st == NULL)
212 return -ENOMEM;
213 ret = seq_open(file, &ct_seq_ops);
214 if (ret)
215 goto out_free;
216 seq = file->private_data;
217 seq->private = st;
218 memset(st, 0, sizeof(struct ct_iter_state));
219 return ret;
220out_free:
221 kfree(st);
222 return ret;
223}
224
225static const struct file_operations ct_file_ops = {
226 .owner = THIS_MODULE,
227 .open = ct_open,
228 .read = seq_read,
229 .llseek = seq_lseek,
230 .release = seq_release_private,
231};
232
233/* expects */
234static void *exp_seq_start(struct seq_file *s, loff_t *pos)
235{
236 struct list_head *e = &ip_conntrack_expect_list;
237 loff_t i;
238
239 /* strange seq_file api calls stop even if we fail,
240 * thus we need to grab lock since stop unlocks */
241 read_lock_bh(&ip_conntrack_lock);
242
243 if (list_empty(e))
244 return NULL;
245
246 for (i = 0; i <= *pos; i++) {
247 e = e->next;
248 if (e == &ip_conntrack_expect_list)
249 return NULL;
250 }
251 return e;
252}
253
254static void *exp_seq_next(struct seq_file *s, void *v, loff_t *pos)
255{
256 struct list_head *e = v;
257
258 ++*pos;
259 e = e->next;
260
261 if (e == &ip_conntrack_expect_list)
262 return NULL;
263
264 return e;
265}
266
267static void exp_seq_stop(struct seq_file *s, void *v)
268{
269 read_unlock_bh(&ip_conntrack_lock);
270}
271
272static int exp_seq_show(struct seq_file *s, void *v)
273{
274 struct ip_conntrack_expect *expect = v;
275
276 if (expect->timeout.function)
277 seq_printf(s, "%ld ", timer_pending(&expect->timeout)
278 ? (long)(expect->timeout.expires - jiffies)/HZ : 0);
279 else
280 seq_printf(s, "- ");
281
282 seq_printf(s, "proto=%u ", expect->tuple.dst.protonum);
283
284 print_tuple(s, &expect->tuple,
285 __ip_conntrack_proto_find(expect->tuple.dst.protonum));
286 return seq_putc(s, '\n');
287}
288
289static struct seq_operations exp_seq_ops = {
290 .start = exp_seq_start,
291 .next = exp_seq_next,
292 .stop = exp_seq_stop,
293 .show = exp_seq_show
294};
295
296static int exp_open(struct inode *inode, struct file *file)
297{
298 return seq_open(file, &exp_seq_ops);
299}
300
301static const struct file_operations exp_file_ops = {
302 .owner = THIS_MODULE,
303 .open = exp_open,
304 .read = seq_read,
305 .llseek = seq_lseek,
306 .release = seq_release
307};
308
309static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
310{
311 int cpu;
312
313 if (*pos == 0)
314 return SEQ_START_TOKEN;
315
316 for (cpu = *pos-1; cpu < NR_CPUS; ++cpu) {
317 if (!cpu_possible(cpu))
318 continue;
319 *pos = cpu+1;
320 return &per_cpu(ip_conntrack_stat, cpu);
321 }
322
323 return NULL;
324}
325
326static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
327{
328 int cpu;
329
330 for (cpu = *pos; cpu < NR_CPUS; ++cpu) {
331 if (!cpu_possible(cpu))
332 continue;
333 *pos = cpu+1;
334 return &per_cpu(ip_conntrack_stat, cpu);
335 }
336
337 return NULL;
338}
339
340static void ct_cpu_seq_stop(struct seq_file *seq, void *v)
341{
342}
343
344static int ct_cpu_seq_show(struct seq_file *seq, void *v)
345{
346 unsigned int nr_conntracks = atomic_read(&ip_conntrack_count);
347 struct ip_conntrack_stat *st = v;
348
349 if (v == SEQ_START_TOKEN) {
350 seq_printf(seq, "entries searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete\n");
351 return 0;
352 }
353
354 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x "
355 "%08x %08x %08x %08x %08x %08x %08x %08x \n",
356 nr_conntracks,
357 st->searched,
358 st->found,
359 st->new,
360 st->invalid,
361 st->ignore,
362 st->delete,
363 st->delete_list,
364 st->insert,
365 st->insert_failed,
366 st->drop,
367 st->early_drop,
368 st->error,
369
370 st->expect_new,
371 st->expect_create,
372 st->expect_delete
373 );
374 return 0;
375}
376
377static struct seq_operations ct_cpu_seq_ops = {
378 .start = ct_cpu_seq_start,
379 .next = ct_cpu_seq_next,
380 .stop = ct_cpu_seq_stop,
381 .show = ct_cpu_seq_show,
382};
383
384static int ct_cpu_seq_open(struct inode *inode, struct file *file)
385{
386 return seq_open(file, &ct_cpu_seq_ops);
387}
388
389static const struct file_operations ct_cpu_seq_fops = {
390 .owner = THIS_MODULE,
391 .open = ct_cpu_seq_open,
392 .read = seq_read,
393 .llseek = seq_lseek,
394 .release = seq_release_private,
395};
396#endif
397
398static unsigned int ip_confirm(unsigned int hooknum,
399 struct sk_buff **pskb,
400 const struct net_device *in,
401 const struct net_device *out,
402 int (*okfn)(struct sk_buff *))
403{
404 /* We've seen it coming out the other side: confirm it */
405 return ip_conntrack_confirm(pskb);
406}
407
408static unsigned int ip_conntrack_help(unsigned int hooknum,
409 struct sk_buff **pskb,
410 const struct net_device *in,
411 const struct net_device *out,
412 int (*okfn)(struct sk_buff *))
413{
414 struct ip_conntrack *ct;
415 enum ip_conntrack_info ctinfo;
416
417 /* This is where we call the helper: as the packet goes out. */
418 ct = ip_conntrack_get(*pskb, &ctinfo);
419 if (ct && ct->helper && ctinfo != IP_CT_RELATED + IP_CT_IS_REPLY) {
420 unsigned int ret;
421 ret = ct->helper->help(pskb, ct, ctinfo);
422 if (ret != NF_ACCEPT)
423 return ret;
424 }
425 return NF_ACCEPT;
426}
427
428static unsigned int ip_conntrack_defrag(unsigned int hooknum,
429 struct sk_buff **pskb,
430 const struct net_device *in,
431 const struct net_device *out,
432 int (*okfn)(struct sk_buff *))
433{
434#if !defined(CONFIG_IP_NF_NAT) && !defined(CONFIG_IP_NF_NAT_MODULE)
435 /* Previously seen (loopback)? Ignore. Do this before
436 fragment check. */
437 if ((*pskb)->nfct)
438 return NF_ACCEPT;
439#endif
440
441 /* Gather fragments. */
442 if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
443 *pskb = ip_ct_gather_frags(*pskb,
444 hooknum == NF_IP_PRE_ROUTING ?
445 IP_DEFRAG_CONNTRACK_IN :
446 IP_DEFRAG_CONNTRACK_OUT);
447 if (!*pskb)
448 return NF_STOLEN;
449 }
450 return NF_ACCEPT;
451}
452
453static unsigned int ip_conntrack_local(unsigned int hooknum,
454 struct sk_buff **pskb,
455 const struct net_device *in,
456 const struct net_device *out,
457 int (*okfn)(struct sk_buff *))
458{
459 /* root is playing with raw sockets. */
460 if ((*pskb)->len < sizeof(struct iphdr)
461 || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) {
462 if (net_ratelimit())
463 printk("ipt_hook: happy cracking.\n");
464 return NF_ACCEPT;
465 }
466 return ip_conntrack_in(hooknum, pskb, in, out, okfn);
467}
468
469/* Connection tracking may drop packets, but never alters them, so
470 make it the first hook. */
471static struct nf_hook_ops ip_conntrack_ops[] = {
472 {
473 .hook = ip_conntrack_defrag,
474 .owner = THIS_MODULE,
475 .pf = PF_INET,
476 .hooknum = NF_IP_PRE_ROUTING,
477 .priority = NF_IP_PRI_CONNTRACK_DEFRAG,
478 },
479 {
480 .hook = ip_conntrack_in,
481 .owner = THIS_MODULE,
482 .pf = PF_INET,
483 .hooknum = NF_IP_PRE_ROUTING,
484 .priority = NF_IP_PRI_CONNTRACK,
485 },
486 {
487 .hook = ip_conntrack_defrag,
488 .owner = THIS_MODULE,
489 .pf = PF_INET,
490 .hooknum = NF_IP_LOCAL_OUT,
491 .priority = NF_IP_PRI_CONNTRACK_DEFRAG,
492 },
493 {
494 .hook = ip_conntrack_local,
495 .owner = THIS_MODULE,
496 .pf = PF_INET,
497 .hooknum = NF_IP_LOCAL_OUT,
498 .priority = NF_IP_PRI_CONNTRACK,
499 },
500 {
501 .hook = ip_conntrack_help,
502 .owner = THIS_MODULE,
503 .pf = PF_INET,
504 .hooknum = NF_IP_POST_ROUTING,
505 .priority = NF_IP_PRI_CONNTRACK_HELPER,
506 },
507 {
508 .hook = ip_conntrack_help,
509 .owner = THIS_MODULE,
510 .pf = PF_INET,
511 .hooknum = NF_IP_LOCAL_IN,
512 .priority = NF_IP_PRI_CONNTRACK_HELPER,
513 },
514 {
515 .hook = ip_confirm,
516 .owner = THIS_MODULE,
517 .pf = PF_INET,
518 .hooknum = NF_IP_POST_ROUTING,
519 .priority = NF_IP_PRI_CONNTRACK_CONFIRM,
520 },
521 {
522 .hook = ip_confirm,
523 .owner = THIS_MODULE,
524 .pf = PF_INET,
525 .hooknum = NF_IP_LOCAL_IN,
526 .priority = NF_IP_PRI_CONNTRACK_CONFIRM,
527 },
528};
529
530/* Sysctl support */
531
532int ip_conntrack_checksum __read_mostly = 1;
533
534#ifdef CONFIG_SYSCTL
535
536/* From ip_conntrack_core.c */
537extern int ip_conntrack_max;
538extern unsigned int ip_conntrack_htable_size;
539
540/* From ip_conntrack_proto_tcp.c */
541extern unsigned int ip_ct_tcp_timeout_syn_sent;
542extern unsigned int ip_ct_tcp_timeout_syn_recv;
543extern unsigned int ip_ct_tcp_timeout_established;
544extern unsigned int ip_ct_tcp_timeout_fin_wait;
545extern unsigned int ip_ct_tcp_timeout_close_wait;
546extern unsigned int ip_ct_tcp_timeout_last_ack;
547extern unsigned int ip_ct_tcp_timeout_time_wait;
548extern unsigned int ip_ct_tcp_timeout_close;
549extern unsigned int ip_ct_tcp_timeout_max_retrans;
550extern int ip_ct_tcp_loose;
551extern int ip_ct_tcp_be_liberal;
552extern int ip_ct_tcp_max_retrans;
553
554/* From ip_conntrack_proto_udp.c */
555extern unsigned int ip_ct_udp_timeout;
556extern unsigned int ip_ct_udp_timeout_stream;
557
558/* From ip_conntrack_proto_icmp.c */
559extern unsigned int ip_ct_icmp_timeout;
560
561/* From ip_conntrack_proto_generic.c */
562extern unsigned int ip_ct_generic_timeout;
563
564/* Log invalid packets of a given protocol */
565static int log_invalid_proto_min = 0;
566static int log_invalid_proto_max = 255;
567
568static struct ctl_table_header *ip_ct_sysctl_header;
569
570static ctl_table ip_ct_sysctl_table[] = {
571 {
572 .ctl_name = NET_IPV4_NF_CONNTRACK_MAX,
573 .procname = "ip_conntrack_max",
574 .data = &ip_conntrack_max,
575 .maxlen = sizeof(int),
576 .mode = 0644,
577 .proc_handler = &proc_dointvec,
578 },
579 {
580 .ctl_name = NET_IPV4_NF_CONNTRACK_COUNT,
581 .procname = "ip_conntrack_count",
582 .data = &ip_conntrack_count,
583 .maxlen = sizeof(int),
584 .mode = 0444,
585 .proc_handler = &proc_dointvec,
586 },
587 {
588 .ctl_name = NET_IPV4_NF_CONNTRACK_BUCKETS,
589 .procname = "ip_conntrack_buckets",
590 .data = &ip_conntrack_htable_size,
591 .maxlen = sizeof(unsigned int),
592 .mode = 0444,
593 .proc_handler = &proc_dointvec,
594 },
595 {
596 .ctl_name = NET_IPV4_NF_CONNTRACK_CHECKSUM,
597 .procname = "ip_conntrack_checksum",
598 .data = &ip_conntrack_checksum,
599 .maxlen = sizeof(int),
600 .mode = 0644,
601 .proc_handler = &proc_dointvec,
602 },
603 {
604 .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT,
605 .procname = "ip_conntrack_tcp_timeout_syn_sent",
606 .data = &ip_ct_tcp_timeout_syn_sent,
607 .maxlen = sizeof(unsigned int),
608 .mode = 0644,
609 .proc_handler = &proc_dointvec_jiffies,
610 },
611 {
612 .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV,
613 .procname = "ip_conntrack_tcp_timeout_syn_recv",
614 .data = &ip_ct_tcp_timeout_syn_recv,
615 .maxlen = sizeof(unsigned int),
616 .mode = 0644,
617 .proc_handler = &proc_dointvec_jiffies,
618 },
619 {
620 .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED,
621 .procname = "ip_conntrack_tcp_timeout_established",
622 .data = &ip_ct_tcp_timeout_established,
623 .maxlen = sizeof(unsigned int),
624 .mode = 0644,
625 .proc_handler = &proc_dointvec_jiffies,
626 },
627 {
628 .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT,
629 .procname = "ip_conntrack_tcp_timeout_fin_wait",
630 .data = &ip_ct_tcp_timeout_fin_wait,
631 .maxlen = sizeof(unsigned int),
632 .mode = 0644,
633 .proc_handler = &proc_dointvec_jiffies,
634 },
635 {
636 .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT,
637 .procname = "ip_conntrack_tcp_timeout_close_wait",
638 .data = &ip_ct_tcp_timeout_close_wait,
639 .maxlen = sizeof(unsigned int),
640 .mode = 0644,
641 .proc_handler = &proc_dointvec_jiffies,
642 },
643 {
644 .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK,
645 .procname = "ip_conntrack_tcp_timeout_last_ack",
646 .data = &ip_ct_tcp_timeout_last_ack,
647 .maxlen = sizeof(unsigned int),
648 .mode = 0644,
649 .proc_handler = &proc_dointvec_jiffies,
650 },
651 {
652 .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT,
653 .procname = "ip_conntrack_tcp_timeout_time_wait",
654 .data = &ip_ct_tcp_timeout_time_wait,
655 .maxlen = sizeof(unsigned int),
656 .mode = 0644,
657 .proc_handler = &proc_dointvec_jiffies,
658 },
659 {
660 .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE,
661 .procname = "ip_conntrack_tcp_timeout_close",
662 .data = &ip_ct_tcp_timeout_close,
663 .maxlen = sizeof(unsigned int),
664 .mode = 0644,
665 .proc_handler = &proc_dointvec_jiffies,
666 },
667 {
668 .ctl_name = NET_IPV4_NF_CONNTRACK_UDP_TIMEOUT,
669 .procname = "ip_conntrack_udp_timeout",
670 .data = &ip_ct_udp_timeout,
671 .maxlen = sizeof(unsigned int),
672 .mode = 0644,
673 .proc_handler = &proc_dointvec_jiffies,
674 },
675 {
676 .ctl_name = NET_IPV4_NF_CONNTRACK_UDP_TIMEOUT_STREAM,
677 .procname = "ip_conntrack_udp_timeout_stream",
678 .data = &ip_ct_udp_timeout_stream,
679 .maxlen = sizeof(unsigned int),
680 .mode = 0644,
681 .proc_handler = &proc_dointvec_jiffies,
682 },
683 {
684 .ctl_name = NET_IPV4_NF_CONNTRACK_ICMP_TIMEOUT,
685 .procname = "ip_conntrack_icmp_timeout",
686 .data = &ip_ct_icmp_timeout,
687 .maxlen = sizeof(unsigned int),
688 .mode = 0644,
689 .proc_handler = &proc_dointvec_jiffies,
690 },
691 {
692 .ctl_name = NET_IPV4_NF_CONNTRACK_GENERIC_TIMEOUT,
693 .procname = "ip_conntrack_generic_timeout",
694 .data = &ip_ct_generic_timeout,
695 .maxlen = sizeof(unsigned int),
696 .mode = 0644,
697 .proc_handler = &proc_dointvec_jiffies,
698 },
699 {
700 .ctl_name = NET_IPV4_NF_CONNTRACK_LOG_INVALID,
701 .procname = "ip_conntrack_log_invalid",
702 .data = &ip_ct_log_invalid,
703 .maxlen = sizeof(unsigned int),
704 .mode = 0644,
705 .proc_handler = &proc_dointvec_minmax,
706 .strategy = &sysctl_intvec,
707 .extra1 = &log_invalid_proto_min,
708 .extra2 = &log_invalid_proto_max,
709 },
710 {
711 .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS,
712 .procname = "ip_conntrack_tcp_timeout_max_retrans",
713 .data = &ip_ct_tcp_timeout_max_retrans,
714 .maxlen = sizeof(unsigned int),
715 .mode = 0644,
716 .proc_handler = &proc_dointvec_jiffies,
717 },
718 {
719 .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_LOOSE,
720 .procname = "ip_conntrack_tcp_loose",
721 .data = &ip_ct_tcp_loose,
722 .maxlen = sizeof(unsigned int),
723 .mode = 0644,
724 .proc_handler = &proc_dointvec,
725 },
726 {
727 .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_BE_LIBERAL,
728 .procname = "ip_conntrack_tcp_be_liberal",
729 .data = &ip_ct_tcp_be_liberal,
730 .maxlen = sizeof(unsigned int),
731 .mode = 0644,
732 .proc_handler = &proc_dointvec,
733 },
734 {
735 .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_MAX_RETRANS,
736 .procname = "ip_conntrack_tcp_max_retrans",
737 .data = &ip_ct_tcp_max_retrans,
738 .maxlen = sizeof(unsigned int),
739 .mode = 0644,
740 .proc_handler = &proc_dointvec,
741 },
742 { .ctl_name = 0 }
743};
744
745#define NET_IP_CONNTRACK_MAX 2089
746
747static ctl_table ip_ct_netfilter_table[] = {
748 {
749 .ctl_name = NET_IPV4_NETFILTER,
750 .procname = "netfilter",
751 .mode = 0555,
752 .child = ip_ct_sysctl_table,
753 },
754 {
755 .ctl_name = NET_IP_CONNTRACK_MAX,
756 .procname = "ip_conntrack_max",
757 .data = &ip_conntrack_max,
758 .maxlen = sizeof(int),
759 .mode = 0644,
760 .proc_handler = &proc_dointvec
761 },
762 { .ctl_name = 0 }
763};
764
765static ctl_table ip_ct_ipv4_table[] = {
766 {
767 .ctl_name = NET_IPV4,
768 .procname = "ipv4",
769 .mode = 0555,
770 .child = ip_ct_netfilter_table,
771 },
772 { .ctl_name = 0 }
773};
774
775static ctl_table ip_ct_net_table[] = {
776 {
777 .ctl_name = CTL_NET,
778 .procname = "net",
779 .mode = 0555,
780 .child = ip_ct_ipv4_table,
781 },
782 { .ctl_name = 0 }
783};
784
785EXPORT_SYMBOL(ip_ct_log_invalid);
786#endif /* CONFIG_SYSCTL */
787
788/* FIXME: Allow NULL functions and sub in pointers to generic for
789 them. --RR */
790int ip_conntrack_protocol_register(struct ip_conntrack_protocol *proto)
791{
792 int ret = 0;
793
794 write_lock_bh(&ip_conntrack_lock);
795 if (ip_ct_protos[proto->proto] != &ip_conntrack_generic_protocol) {
796 ret = -EBUSY;
797 goto out;
798 }
799 rcu_assign_pointer(ip_ct_protos[proto->proto], proto);
800 out:
801 write_unlock_bh(&ip_conntrack_lock);
802 return ret;
803}
804
805void ip_conntrack_protocol_unregister(struct ip_conntrack_protocol *proto)
806{
807 write_lock_bh(&ip_conntrack_lock);
808 rcu_assign_pointer(ip_ct_protos[proto->proto],
809 &ip_conntrack_generic_protocol);
810 write_unlock_bh(&ip_conntrack_lock);
811 synchronize_rcu();
812
813 /* Remove all contrack entries for this protocol */
814 ip_ct_iterate_cleanup(kill_proto, &proto->proto);
815}
816
817static int __init ip_conntrack_standalone_init(void)
818{
819#ifdef CONFIG_PROC_FS
820 struct proc_dir_entry *proc, *proc_exp, *proc_stat;
821#endif
822 int ret = 0;
823
824 ret = ip_conntrack_init();
825 if (ret < 0)
826 return ret;
827
828#ifdef CONFIG_PROC_FS
829 ret = -ENOMEM;
830 proc = proc_net_fops_create("ip_conntrack", 0440, &ct_file_ops);
831 if (!proc) goto cleanup_init;
832
833 proc_exp = proc_net_fops_create("ip_conntrack_expect", 0440,
834 &exp_file_ops);
835 if (!proc_exp) goto cleanup_proc;
836
837 proc_stat = create_proc_entry("ip_conntrack", S_IRUGO, proc_net_stat);
838 if (!proc_stat)
839 goto cleanup_proc_exp;
840
841 proc_stat->proc_fops = &ct_cpu_seq_fops;
842 proc_stat->owner = THIS_MODULE;
843#endif
844
845 ret = nf_register_hooks(ip_conntrack_ops, ARRAY_SIZE(ip_conntrack_ops));
846 if (ret < 0) {
847 printk("ip_conntrack: can't register hooks.\n");
848 goto cleanup_proc_stat;
849 }
850#ifdef CONFIG_SYSCTL
851 ip_ct_sysctl_header = register_sysctl_table(ip_ct_net_table);
852 if (ip_ct_sysctl_header == NULL) {
853 printk("ip_conntrack: can't register to sysctl.\n");
854 ret = -ENOMEM;
855 goto cleanup_hooks;
856 }
857#endif
858 return ret;
859
860#ifdef CONFIG_SYSCTL
861 cleanup_hooks:
862 nf_unregister_hooks(ip_conntrack_ops, ARRAY_SIZE(ip_conntrack_ops));
863#endif
864 cleanup_proc_stat:
865#ifdef CONFIG_PROC_FS
866 remove_proc_entry("ip_conntrack", proc_net_stat);
867 cleanup_proc_exp:
868 proc_net_remove("ip_conntrack_expect");
869 cleanup_proc:
870 proc_net_remove("ip_conntrack");
871 cleanup_init:
872#endif /* CONFIG_PROC_FS */
873 ip_conntrack_cleanup();
874 return ret;
875}
876
877static void __exit ip_conntrack_standalone_fini(void)
878{
879 synchronize_net();
880#ifdef CONFIG_SYSCTL
881 unregister_sysctl_table(ip_ct_sysctl_header);
882#endif
883 nf_unregister_hooks(ip_conntrack_ops, ARRAY_SIZE(ip_conntrack_ops));
884#ifdef CONFIG_PROC_FS
885 remove_proc_entry("ip_conntrack", proc_net_stat);
886 proc_net_remove("ip_conntrack_expect");
887 proc_net_remove("ip_conntrack");
888#endif /* CONFIG_PROC_FS */
889 ip_conntrack_cleanup();
890}
891
892module_init(ip_conntrack_standalone_init);
893module_exit(ip_conntrack_standalone_fini);
894
895/* Some modules need us, but don't depend directly on any symbol.
896 They should call this. */
897void need_conntrack(void)
898{
899}
900
901#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
902EXPORT_SYMBOL_GPL(ip_conntrack_chain);
903EXPORT_SYMBOL_GPL(ip_conntrack_expect_chain);
904EXPORT_SYMBOL_GPL(ip_conntrack_register_notifier);
905EXPORT_SYMBOL_GPL(ip_conntrack_unregister_notifier);
906EXPORT_SYMBOL_GPL(__ip_ct_event_cache_init);
907EXPORT_PER_CPU_SYMBOL_GPL(ip_conntrack_ecache);
908#endif
909EXPORT_SYMBOL(ip_conntrack_protocol_register);
910EXPORT_SYMBOL(ip_conntrack_protocol_unregister);
911EXPORT_SYMBOL(ip_ct_get_tuple);
912EXPORT_SYMBOL(invert_tuplepr);
913EXPORT_SYMBOL(ip_conntrack_alter_reply);
914EXPORT_SYMBOL(ip_conntrack_destroyed);
915EXPORT_SYMBOL(need_conntrack);
916EXPORT_SYMBOL(ip_conntrack_helper_register);
917EXPORT_SYMBOL(ip_conntrack_helper_unregister);
918EXPORT_SYMBOL(ip_ct_iterate_cleanup);
919EXPORT_SYMBOL(__ip_ct_refresh_acct);
920
921EXPORT_SYMBOL(ip_conntrack_expect_alloc);
922EXPORT_SYMBOL(ip_conntrack_expect_put);
923EXPORT_SYMBOL_GPL(__ip_conntrack_expect_find);
924EXPORT_SYMBOL_GPL(ip_conntrack_expect_find_get);
925EXPORT_SYMBOL(ip_conntrack_expect_related);
926EXPORT_SYMBOL(ip_conntrack_unexpect_related);
927EXPORT_SYMBOL_GPL(ip_conntrack_expect_list);
928EXPORT_SYMBOL_GPL(ip_ct_unlink_expect);
929
930EXPORT_SYMBOL(ip_conntrack_tuple_taken);
931EXPORT_SYMBOL(ip_ct_gather_frags);
932EXPORT_SYMBOL(ip_conntrack_htable_size);
933EXPORT_SYMBOL(ip_conntrack_lock);
934EXPORT_SYMBOL(ip_conntrack_hash);
935EXPORT_SYMBOL(ip_conntrack_untracked);
936EXPORT_SYMBOL_GPL(ip_conntrack_find_get);
937#ifdef CONFIG_IP_NF_NAT_NEEDED
938EXPORT_SYMBOL(ip_conntrack_tcp_update);
939#endif
940
941EXPORT_SYMBOL_GPL(ip_conntrack_flush);
942EXPORT_SYMBOL_GPL(__ip_conntrack_find);
943
944EXPORT_SYMBOL_GPL(ip_conntrack_alloc);
945EXPORT_SYMBOL_GPL(ip_conntrack_free);
946EXPORT_SYMBOL_GPL(ip_conntrack_hash_insert);
947
948EXPORT_SYMBOL_GPL(ip_ct_remove_expectations);
949
950EXPORT_SYMBOL_GPL(ip_conntrack_helper_find_get);
951EXPORT_SYMBOL_GPL(ip_conntrack_helper_put);
952EXPORT_SYMBOL_GPL(__ip_conntrack_helper_find_byname);
953
954EXPORT_SYMBOL_GPL(ip_conntrack_proto_find_get);
955EXPORT_SYMBOL_GPL(ip_conntrack_proto_put);
956EXPORT_SYMBOL_GPL(__ip_conntrack_proto_find);
957EXPORT_SYMBOL_GPL(ip_conntrack_checksum);
958#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
959 defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
960EXPORT_SYMBOL_GPL(ip_ct_port_tuple_to_nfattr);
961EXPORT_SYMBOL_GPL(ip_ct_port_nfattr_to_tuple);
962#endif
diff --git a/net/ipv4/netfilter/ip_conntrack_tftp.c b/net/ipv4/netfilter/ip_conntrack_tftp.c
deleted file mode 100644
index 76e175e7a972..000000000000
--- a/net/ipv4/netfilter/ip_conntrack_tftp.c
+++ /dev/null
@@ -1,161 +0,0 @@
1/* (C) 2001-2002 Magnus Boden <mb@ozaba.mine.nu>
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 as
5 * published by the Free Software Foundation.
6 *
7 * Version: 0.0.7
8 *
9 * Thu 21 Mar 2002 Harald Welte <laforge@gnumonks.org>
10 * - port to newnat API
11 *
12 */
13
14#include <linux/module.h>
15#include <linux/ip.h>
16#include <linux/udp.h>
17
18#include <linux/netfilter.h>
19#include <linux/netfilter_ipv4/ip_tables.h>
20#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
21#include <linux/netfilter_ipv4/ip_conntrack_tftp.h>
22#include <linux/moduleparam.h>
23
24MODULE_AUTHOR("Magnus Boden <mb@ozaba.mine.nu>");
25MODULE_DESCRIPTION("tftp connection tracking helper");
26MODULE_LICENSE("GPL");
27
28#define MAX_PORTS 8
29static unsigned short ports[MAX_PORTS];
30static int ports_c;
31module_param_array(ports, ushort, &ports_c, 0400);
32MODULE_PARM_DESC(ports, "port numbers of tftp servers");
33
34#if 0
35#define DEBUGP(format, args...) printk("%s:%s:" format, \
36 __FILE__, __FUNCTION__ , ## args)
37#else
38#define DEBUGP(format, args...)
39#endif
40
41unsigned int (*ip_nat_tftp_hook)(struct sk_buff **pskb,
42 enum ip_conntrack_info ctinfo,
43 struct ip_conntrack_expect *exp);
44EXPORT_SYMBOL_GPL(ip_nat_tftp_hook);
45
46static int tftp_help(struct sk_buff **pskb,
47 struct ip_conntrack *ct,
48 enum ip_conntrack_info ctinfo)
49{
50 struct tftphdr _tftph, *tfh;
51 struct ip_conntrack_expect *exp;
52 unsigned int ret = NF_ACCEPT;
53 typeof(ip_nat_tftp_hook) ip_nat_tftp;
54
55 tfh = skb_header_pointer(*pskb,
56 (*pskb)->nh.iph->ihl*4+sizeof(struct udphdr),
57 sizeof(_tftph), &_tftph);
58 if (tfh == NULL)
59 return NF_ACCEPT;
60
61 switch (ntohs(tfh->opcode)) {
62 /* RRQ and WRQ works the same way */
63 case TFTP_OPCODE_READ:
64 case TFTP_OPCODE_WRITE:
65 DEBUGP("");
66 DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
67 DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
68
69 exp = ip_conntrack_expect_alloc(ct);
70 if (exp == NULL)
71 return NF_DROP;
72
73 exp->tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
74 exp->mask.src.ip = htonl(0xffffffff);
75 exp->mask.src.u.udp.port = 0;
76 exp->mask.dst.ip = htonl(0xffffffff);
77 exp->mask.dst.u.udp.port = htons(0xffff);
78 exp->mask.dst.protonum = 0xff;
79 exp->expectfn = NULL;
80 exp->flags = 0;
81
82 DEBUGP("expect: ");
83 DUMP_TUPLE(&exp->tuple);
84 DUMP_TUPLE(&exp->mask);
85 ip_nat_tftp = rcu_dereference(ip_nat_tftp_hook);
86 if (ip_nat_tftp)
87 ret = ip_nat_tftp(pskb, ctinfo, exp);
88 else if (ip_conntrack_expect_related(exp) != 0)
89 ret = NF_DROP;
90 ip_conntrack_expect_put(exp);
91 break;
92 case TFTP_OPCODE_DATA:
93 case TFTP_OPCODE_ACK:
94 DEBUGP("Data/ACK opcode\n");
95 break;
96 case TFTP_OPCODE_ERROR:
97 DEBUGP("Error opcode\n");
98 break;
99 default:
100 DEBUGP("Unknown opcode\n");
101 }
102 return NF_ACCEPT;
103}
104
105static struct ip_conntrack_helper tftp[MAX_PORTS];
106static char tftp_names[MAX_PORTS][sizeof("tftp-65535")];
107
108static void ip_conntrack_tftp_fini(void)
109{
110 int i;
111
112 for (i = 0 ; i < ports_c; i++) {
113 DEBUGP("unregistering helper for port %d\n",
114 ports[i]);
115 ip_conntrack_helper_unregister(&tftp[i]);
116 }
117}
118
119static int __init ip_conntrack_tftp_init(void)
120{
121 int i, ret;
122 char *tmpname;
123
124 if (ports_c == 0)
125 ports[ports_c++] = TFTP_PORT;
126
127 for (i = 0; i < ports_c; i++) {
128 /* Create helper structure */
129 memset(&tftp[i], 0, sizeof(struct ip_conntrack_helper));
130
131 tftp[i].tuple.dst.protonum = IPPROTO_UDP;
132 tftp[i].tuple.src.u.udp.port = htons(ports[i]);
133 tftp[i].mask.dst.protonum = 0xFF;
134 tftp[i].mask.src.u.udp.port = htons(0xFFFF);
135 tftp[i].max_expected = 1;
136 tftp[i].timeout = 5 * 60; /* 5 minutes */
137 tftp[i].me = THIS_MODULE;
138 tftp[i].help = tftp_help;
139
140 tmpname = &tftp_names[i][0];
141 if (ports[i] == TFTP_PORT)
142 sprintf(tmpname, "tftp");
143 else
144 sprintf(tmpname, "tftp-%d", i);
145 tftp[i].name = tmpname;
146
147 DEBUGP("port #%d: %d\n", i, ports[i]);
148
149 ret=ip_conntrack_helper_register(&tftp[i]);
150 if (ret) {
151 printk("ERROR registering helper for port %d\n",
152 ports[i]);
153 ip_conntrack_tftp_fini();
154 return(ret);
155 }
156 }
157 return(0);
158}
159
160module_init(ip_conntrack_tftp_init);
161module_exit(ip_conntrack_tftp_fini);
diff --git a/net/ipv4/netfilter/ip_nat_amanda.c b/net/ipv4/netfilter/ip_nat_amanda.c
deleted file mode 100644
index 85df1a9aed33..000000000000
--- a/net/ipv4/netfilter/ip_nat_amanda.c
+++ /dev/null
@@ -1,85 +0,0 @@
1/* Amanda extension for TCP NAT alteration.
2 * (C) 2002 by Brian J. Murrell <netfilter@interlinx.bc.ca>
3 * based on a copy of HW's ip_nat_irc.c as well as other modules
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version
8 * 2 of the License, or (at your option) any later version.
9 *
10 * Module load syntax:
11 * insmod ip_nat_amanda.o
12 */
13
14#include <linux/kernel.h>
15#include <linux/module.h>
16#include <linux/netfilter.h>
17#include <linux/skbuff.h>
18#include <linux/ip.h>
19#include <linux/udp.h>
20#include <net/tcp.h>
21#include <net/udp.h>
22
23#include <linux/netfilter_ipv4.h>
24#include <linux/netfilter_ipv4/ip_nat.h>
25#include <linux/netfilter_ipv4/ip_nat_helper.h>
26#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
27#include <linux/netfilter_ipv4/ip_conntrack_amanda.h>
28
29
30MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>");
31MODULE_DESCRIPTION("Amanda NAT helper");
32MODULE_LICENSE("GPL");
33
34static unsigned int help(struct sk_buff **pskb,
35 enum ip_conntrack_info ctinfo,
36 unsigned int matchoff,
37 unsigned int matchlen,
38 struct ip_conntrack_expect *exp)
39{
40 char buffer[sizeof("65535")];
41 u_int16_t port;
42 unsigned int ret;
43
44 /* Connection comes from client. */
45 exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
46 exp->dir = IP_CT_DIR_ORIGINAL;
47
48 /* When you see the packet, we need to NAT it the same as the
49 * this one (ie. same IP: it will be TCP and master is UDP). */
50 exp->expectfn = ip_nat_follow_master;
51
52 /* Try to get same port: if not, try to change it. */
53 for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
54 exp->tuple.dst.u.tcp.port = htons(port);
55 if (ip_conntrack_expect_related(exp) == 0)
56 break;
57 }
58
59 if (port == 0)
60 return NF_DROP;
61
62 sprintf(buffer, "%u", port);
63 ret = ip_nat_mangle_udp_packet(pskb, exp->master, ctinfo,
64 matchoff, matchlen,
65 buffer, strlen(buffer));
66 if (ret != NF_ACCEPT)
67 ip_conntrack_unexpect_related(exp);
68 return ret;
69}
70
71static void __exit ip_nat_amanda_fini(void)
72{
73 rcu_assign_pointer(ip_nat_amanda_hook, NULL);
74 synchronize_rcu();
75}
76
77static int __init ip_nat_amanda_init(void)
78{
79 BUG_ON(rcu_dereference(ip_nat_amanda_hook));
80 rcu_assign_pointer(ip_nat_amanda_hook, help);
81 return 0;
82}
83
84module_init(ip_nat_amanda_init);
85module_exit(ip_nat_amanda_fini);
diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c
deleted file mode 100644
index 40737fdbe9a7..000000000000
--- a/net/ipv4/netfilter/ip_nat_core.c
+++ /dev/null
@@ -1,634 +0,0 @@
1/* NAT for netfilter; shared with compatibility layer. */
2
3/* (C) 1999-2001 Paul `Rusty' Russell
4 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include <linux/module.h>
12#include <linux/types.h>
13#include <linux/timer.h>
14#include <linux/skbuff.h>
15#include <linux/netfilter_ipv4.h>
16#include <linux/vmalloc.h>
17#include <net/checksum.h>
18#include <net/icmp.h>
19#include <net/ip.h>
20#include <net/tcp.h> /* For tcp_prot in getorigdst */
21#include <linux/icmp.h>
22#include <linux/udp.h>
23#include <linux/jhash.h>
24
25#include <linux/netfilter_ipv4/ip_conntrack.h>
26#include <linux/netfilter_ipv4/ip_conntrack_core.h>
27#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
28#include <linux/netfilter_ipv4/ip_nat.h>
29#include <linux/netfilter_ipv4/ip_nat_protocol.h>
30#include <linux/netfilter_ipv4/ip_nat_core.h>
31#include <linux/netfilter_ipv4/ip_nat_helper.h>
32#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
33
34#if 0
35#define DEBUGP printk
36#else
37#define DEBUGP(format, args...)
38#endif
39
40DEFINE_RWLOCK(ip_nat_lock);
41
42/* Calculated at init based on memory size */
43static unsigned int ip_nat_htable_size;
44
45static struct list_head *bysource;
46
47#define MAX_IP_NAT_PROTO 256
48static struct ip_nat_protocol *ip_nat_protos[MAX_IP_NAT_PROTO];
49
50static inline struct ip_nat_protocol *
51__ip_nat_proto_find(u_int8_t protonum)
52{
53 return rcu_dereference(ip_nat_protos[protonum]);
54}
55
56struct ip_nat_protocol *
57ip_nat_proto_find_get(u_int8_t protonum)
58{
59 struct ip_nat_protocol *p;
60
61 rcu_read_lock();
62 p = __ip_nat_proto_find(protonum);
63 if (!try_module_get(p->me))
64 p = &ip_nat_unknown_protocol;
65 rcu_read_unlock();
66
67 return p;
68}
69EXPORT_SYMBOL_GPL(ip_nat_proto_find_get);
70
71void
72ip_nat_proto_put(struct ip_nat_protocol *p)
73{
74 module_put(p->me);
75}
76EXPORT_SYMBOL_GPL(ip_nat_proto_put);
77
78/* We keep an extra hash for each conntrack, for fast searching. */
79static inline unsigned int
80hash_by_src(const struct ip_conntrack_tuple *tuple)
81{
82 /* Original src, to ensure we map it consistently if poss. */
83 return jhash_3words((__force u32)tuple->src.ip, tuple->src.u.all,
84 tuple->dst.protonum, 0) % ip_nat_htable_size;
85}
86
87/* Noone using conntrack by the time this called. */
88static void ip_nat_cleanup_conntrack(struct ip_conntrack *conn)
89{
90 if (!(conn->status & IPS_NAT_DONE_MASK))
91 return;
92
93 write_lock_bh(&ip_nat_lock);
94 list_del(&conn->nat.info.bysource);
95 write_unlock_bh(&ip_nat_lock);
96}
97
98/* Is this tuple already taken? (not by us) */
99int
100ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple,
101 const struct ip_conntrack *ignored_conntrack)
102{
103 /* Conntrack tracking doesn't keep track of outgoing tuples; only
104 incoming ones. NAT means they don't have a fixed mapping,
105 so we invert the tuple and look for the incoming reply.
106
107 We could keep a separate hash if this proves too slow. */
108 struct ip_conntrack_tuple reply;
109
110 invert_tuplepr(&reply, tuple);
111 return ip_conntrack_tuple_taken(&reply, ignored_conntrack);
112}
113EXPORT_SYMBOL(ip_nat_used_tuple);
114
115/* If we source map this tuple so reply looks like reply_tuple, will
116 * that meet the constraints of range. */
117static int
118in_range(const struct ip_conntrack_tuple *tuple,
119 const struct ip_nat_range *range)
120{
121 struct ip_nat_protocol *proto;
122 int ret = 0;
123
124 /* If we are supposed to map IPs, then we must be in the
125 range specified, otherwise let this drag us onto a new src IP. */
126 if (range->flags & IP_NAT_RANGE_MAP_IPS) {
127 if (ntohl(tuple->src.ip) < ntohl(range->min_ip)
128 || ntohl(tuple->src.ip) > ntohl(range->max_ip))
129 return 0;
130 }
131
132 rcu_read_lock();
133 proto = __ip_nat_proto_find(tuple->dst.protonum);
134 if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)
135 || proto->in_range(tuple, IP_NAT_MANIP_SRC,
136 &range->min, &range->max))
137 ret = 1;
138 rcu_read_unlock();
139
140 return ret;
141}
142
143static inline int
144same_src(const struct ip_conntrack *ct,
145 const struct ip_conntrack_tuple *tuple)
146{
147 return (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum
148 == tuple->dst.protonum
149 && ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip
150 == tuple->src.ip
151 && ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.all
152 == tuple->src.u.all);
153}
154
155/* Only called for SRC manip */
156static int
157find_appropriate_src(const struct ip_conntrack_tuple *tuple,
158 struct ip_conntrack_tuple *result,
159 const struct ip_nat_range *range)
160{
161 unsigned int h = hash_by_src(tuple);
162 struct ip_conntrack *ct;
163
164 read_lock_bh(&ip_nat_lock);
165 list_for_each_entry(ct, &bysource[h], nat.info.bysource) {
166 if (same_src(ct, tuple)) {
167 /* Copy source part from reply tuple. */
168 invert_tuplepr(result,
169 &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
170 result->dst = tuple->dst;
171
172 if (in_range(result, range)) {
173 read_unlock_bh(&ip_nat_lock);
174 return 1;
175 }
176 }
177 }
178 read_unlock_bh(&ip_nat_lock);
179 return 0;
180}
181
182/* For [FUTURE] fragmentation handling, we want the least-used
183 src-ip/dst-ip/proto triple. Fairness doesn't come into it. Thus
184 if the range specifies 1.2.3.4 ports 10000-10005 and 1.2.3.5 ports
185 1-65535, we don't do pro-rata allocation based on ports; we choose
186 the ip with the lowest src-ip/dst-ip/proto usage.
187*/
188static void
189find_best_ips_proto(struct ip_conntrack_tuple *tuple,
190 const struct ip_nat_range *range,
191 const struct ip_conntrack *conntrack,
192 enum ip_nat_manip_type maniptype)
193{
194 __be32 *var_ipp;
195 /* Host order */
196 u_int32_t minip, maxip, j;
197
198 /* No IP mapping? Do nothing. */
199 if (!(range->flags & IP_NAT_RANGE_MAP_IPS))
200 return;
201
202 if (maniptype == IP_NAT_MANIP_SRC)
203 var_ipp = &tuple->src.ip;
204 else
205 var_ipp = &tuple->dst.ip;
206
207 /* Fast path: only one choice. */
208 if (range->min_ip == range->max_ip) {
209 *var_ipp = range->min_ip;
210 return;
211 }
212
213 /* Hashing source and destination IPs gives a fairly even
214 * spread in practice (if there are a small number of IPs
215 * involved, there usually aren't that many connections
216 * anyway). The consistency means that servers see the same
217 * client coming from the same IP (some Internet Banking sites
218 * like this), even across reboots. */
219 minip = ntohl(range->min_ip);
220 maxip = ntohl(range->max_ip);
221 j = jhash_2words((__force u32)tuple->src.ip, (__force u32)tuple->dst.ip, 0);
222 *var_ipp = htonl(minip + j % (maxip - minip + 1));
223}
224
225/* Manipulate the tuple into the range given. For NF_IP_POST_ROUTING,
226 * we change the source to map into the range. For NF_IP_PRE_ROUTING
227 * and NF_IP_LOCAL_OUT, we change the destination to map into the
228 * range. It might not be possible to get a unique tuple, but we try.
229 * At worst (or if we race), we will end up with a final duplicate in
230 * __ip_conntrack_confirm and drop the packet. */
231static void
232get_unique_tuple(struct ip_conntrack_tuple *tuple,
233 const struct ip_conntrack_tuple *orig_tuple,
234 const struct ip_nat_range *range,
235 struct ip_conntrack *conntrack,
236 enum ip_nat_manip_type maniptype)
237{
238 struct ip_nat_protocol *proto;
239
240 /* 1) If this srcip/proto/src-proto-part is currently mapped,
241 and that same mapping gives a unique tuple within the given
242 range, use that.
243
244 This is only required for source (ie. NAT/masq) mappings.
245 So far, we don't do local source mappings, so multiple
246 manips not an issue. */
247 if (maniptype == IP_NAT_MANIP_SRC) {
248 if (find_appropriate_src(orig_tuple, tuple, range)) {
249 DEBUGP("get_unique_tuple: Found current src map\n");
250 if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM))
251 if (!ip_nat_used_tuple(tuple, conntrack))
252 return;
253 }
254 }
255
256 /* 2) Select the least-used IP/proto combination in the given
257 range. */
258 *tuple = *orig_tuple;
259 find_best_ips_proto(tuple, range, conntrack, maniptype);
260
261 /* 3) The per-protocol part of the manip is made to map into
262 the range to make a unique tuple. */
263
264 rcu_read_lock();
265 proto = __ip_nat_proto_find(orig_tuple->dst.protonum);
266
267 /* Change protocol info to have some randomization */
268 if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) {
269 proto->unique_tuple(tuple, range, maniptype, conntrack);
270 goto out;
271 }
272
273 /* Only bother mapping if it's not already in range and unique */
274 if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)
275 || proto->in_range(tuple, maniptype, &range->min, &range->max))
276 && !ip_nat_used_tuple(tuple, conntrack))
277 goto out;
278
279 /* Last change: get protocol to try to obtain unique tuple. */
280 proto->unique_tuple(tuple, range, maniptype, conntrack);
281out:
282 rcu_read_unlock();
283}
284
285unsigned int
286ip_nat_setup_info(struct ip_conntrack *conntrack,
287 const struct ip_nat_range *range,
288 unsigned int hooknum)
289{
290 struct ip_conntrack_tuple curr_tuple, new_tuple;
291 struct ip_nat_info *info = &conntrack->nat.info;
292 int have_to_hash = !(conntrack->status & IPS_NAT_DONE_MASK);
293 enum ip_nat_manip_type maniptype = HOOK2MANIP(hooknum);
294
295 IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING
296 || hooknum == NF_IP_POST_ROUTING
297 || hooknum == NF_IP_LOCAL_IN
298 || hooknum == NF_IP_LOCAL_OUT);
299 BUG_ON(ip_nat_initialized(conntrack, maniptype));
300
301 /* What we've got will look like inverse of reply. Normally
302 this is what is in the conntrack, except for prior
303 manipulations (future optimization: if num_manips == 0,
304 orig_tp =
305 conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple) */
306 invert_tuplepr(&curr_tuple,
307 &conntrack->tuplehash[IP_CT_DIR_REPLY].tuple);
308
309 get_unique_tuple(&new_tuple, &curr_tuple, range, conntrack, maniptype);
310
311 if (!ip_ct_tuple_equal(&new_tuple, &curr_tuple)) {
312 struct ip_conntrack_tuple reply;
313
314 /* Alter conntrack table so will recognize replies. */
315 invert_tuplepr(&reply, &new_tuple);
316 ip_conntrack_alter_reply(conntrack, &reply);
317
318 /* Non-atomic: we own this at the moment. */
319 if (maniptype == IP_NAT_MANIP_SRC)
320 conntrack->status |= IPS_SRC_NAT;
321 else
322 conntrack->status |= IPS_DST_NAT;
323 }
324
325 /* Place in source hash if this is the first time. */
326 if (have_to_hash) {
327 unsigned int srchash
328 = hash_by_src(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
329 .tuple);
330 write_lock_bh(&ip_nat_lock);
331 list_add(&info->bysource, &bysource[srchash]);
332 write_unlock_bh(&ip_nat_lock);
333 }
334
335 /* It's done. */
336 if (maniptype == IP_NAT_MANIP_DST)
337 set_bit(IPS_DST_NAT_DONE_BIT, &conntrack->status);
338 else
339 set_bit(IPS_SRC_NAT_DONE_BIT, &conntrack->status);
340
341 return NF_ACCEPT;
342}
343EXPORT_SYMBOL(ip_nat_setup_info);
344
345/* Returns true if succeeded. */
346static int
347manip_pkt(u_int16_t proto,
348 struct sk_buff **pskb,
349 unsigned int iphdroff,
350 const struct ip_conntrack_tuple *target,
351 enum ip_nat_manip_type maniptype)
352{
353 struct iphdr *iph;
354 struct ip_nat_protocol *p;
355
356 if (!skb_make_writable(pskb, iphdroff + sizeof(*iph)))
357 return 0;
358
359 iph = (void *)(*pskb)->data + iphdroff;
360
361 /* Manipulate protcol part. */
362
363 /* rcu_read_lock()ed by nf_hook_slow */
364 p = __ip_nat_proto_find(proto);
365 if (!p->manip_pkt(pskb, iphdroff, target, maniptype))
366 return 0;
367
368 iph = (void *)(*pskb)->data + iphdroff;
369
370 if (maniptype == IP_NAT_MANIP_SRC) {
371 nf_csum_replace4(&iph->check, iph->saddr, target->src.ip);
372 iph->saddr = target->src.ip;
373 } else {
374 nf_csum_replace4(&iph->check, iph->daddr, target->dst.ip);
375 iph->daddr = target->dst.ip;
376 }
377 return 1;
378}
379
380/* Do packet manipulations according to ip_nat_setup_info. */
381unsigned int ip_nat_packet(struct ip_conntrack *ct,
382 enum ip_conntrack_info ctinfo,
383 unsigned int hooknum,
384 struct sk_buff **pskb)
385{
386 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
387 unsigned long statusbit;
388 enum ip_nat_manip_type mtype = HOOK2MANIP(hooknum);
389
390 if (mtype == IP_NAT_MANIP_SRC)
391 statusbit = IPS_SRC_NAT;
392 else
393 statusbit = IPS_DST_NAT;
394
395 /* Invert if this is reply dir. */
396 if (dir == IP_CT_DIR_REPLY)
397 statusbit ^= IPS_NAT_MASK;
398
399 /* Non-atomic: these bits don't change. */
400 if (ct->status & statusbit) {
401 struct ip_conntrack_tuple target;
402
403 /* We are aiming to look like inverse of other direction. */
404 invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
405
406 if (!manip_pkt(target.dst.protonum, pskb, 0, &target, mtype))
407 return NF_DROP;
408 }
409 return NF_ACCEPT;
410}
411EXPORT_SYMBOL_GPL(ip_nat_packet);
412
413/* Dir is direction ICMP is coming from (opposite to packet it contains) */
414int ip_nat_icmp_reply_translation(struct ip_conntrack *ct,
415 enum ip_conntrack_info ctinfo,
416 unsigned int hooknum,
417 struct sk_buff **pskb)
418{
419 struct {
420 struct icmphdr icmp;
421 struct iphdr ip;
422 } *inside;
423 struct ip_conntrack_protocol *proto;
424 struct ip_conntrack_tuple inner, target;
425 int hdrlen = (*pskb)->nh.iph->ihl * 4;
426 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
427 unsigned long statusbit;
428 enum ip_nat_manip_type manip = HOOK2MANIP(hooknum);
429
430 if (!skb_make_writable(pskb, hdrlen + sizeof(*inside)))
431 return 0;
432
433 inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
434
435 /* We're actually going to mangle it beyond trivial checksum
436 adjustment, so make sure the current checksum is correct. */
437 if (nf_ip_checksum(*pskb, hooknum, hdrlen, 0))
438 return 0;
439
440 /* Must be RELATED */
441 IP_NF_ASSERT((*pskb)->nfctinfo == IP_CT_RELATED ||
442 (*pskb)->nfctinfo == IP_CT_RELATED+IP_CT_IS_REPLY);
443
444 /* Redirects on non-null nats must be dropped, else they'll
445 start talking to each other without our translation, and be
446 confused... --RR */
447 if (inside->icmp.type == ICMP_REDIRECT) {
448 /* If NAT isn't finished, assume it and drop. */
449 if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
450 return 0;
451
452 if (ct->status & IPS_NAT_MASK)
453 return 0;
454 }
455
456 DEBUGP("icmp_reply_translation: translating error %p manp %u dir %s\n",
457 *pskb, manip, dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY");
458
459 /* rcu_read_lock()ed by nf_hook_slow */
460 proto = __ip_conntrack_proto_find(inside->ip.protocol);
461 if (!ip_ct_get_tuple(&inside->ip, *pskb, (*pskb)->nh.iph->ihl*4 +
462 sizeof(struct icmphdr) + inside->ip.ihl*4,
463 &inner, proto))
464 return 0;
465
466 /* Change inner back to look like incoming packet. We do the
467 opposite manip on this hook to normal, because it might not
468 pass all hooks (locally-generated ICMP). Consider incoming
469 packet: PREROUTING (DST manip), routing produces ICMP, goes
470 through POSTROUTING (which must correct the DST manip). */
471 if (!manip_pkt(inside->ip.protocol, pskb,
472 (*pskb)->nh.iph->ihl*4
473 + sizeof(inside->icmp),
474 &ct->tuplehash[!dir].tuple,
475 !manip))
476 return 0;
477
478 if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
479 /* Reloading "inside" here since manip_pkt inner. */
480 inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
481 inside->icmp.checksum = 0;
482 inside->icmp.checksum = csum_fold(skb_checksum(*pskb, hdrlen,
483 (*pskb)->len - hdrlen,
484 0));
485 }
486
487 /* Change outer to look the reply to an incoming packet
488 * (proto 0 means don't invert per-proto part). */
489 if (manip == IP_NAT_MANIP_SRC)
490 statusbit = IPS_SRC_NAT;
491 else
492 statusbit = IPS_DST_NAT;
493
494 /* Invert if this is reply dir. */
495 if (dir == IP_CT_DIR_REPLY)
496 statusbit ^= IPS_NAT_MASK;
497
498 if (ct->status & statusbit) {
499 invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
500 if (!manip_pkt(0, pskb, 0, &target, manip))
501 return 0;
502 }
503
504 return 1;
505}
506EXPORT_SYMBOL_GPL(ip_nat_icmp_reply_translation);
507
508/* Protocol registration. */
509int ip_nat_protocol_register(struct ip_nat_protocol *proto)
510{
511 int ret = 0;
512
513 write_lock_bh(&ip_nat_lock);
514 if (ip_nat_protos[proto->protonum] != &ip_nat_unknown_protocol) {
515 ret = -EBUSY;
516 goto out;
517 }
518 rcu_assign_pointer(ip_nat_protos[proto->protonum], proto);
519 out:
520 write_unlock_bh(&ip_nat_lock);
521 return ret;
522}
523EXPORT_SYMBOL(ip_nat_protocol_register);
524
525/* Noone stores the protocol anywhere; simply delete it. */
526void ip_nat_protocol_unregister(struct ip_nat_protocol *proto)
527{
528 write_lock_bh(&ip_nat_lock);
529 rcu_assign_pointer(ip_nat_protos[proto->protonum],
530 &ip_nat_unknown_protocol);
531 write_unlock_bh(&ip_nat_lock);
532 synchronize_rcu();
533}
534EXPORT_SYMBOL(ip_nat_protocol_unregister);
535
536#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
537 defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
538int
539ip_nat_port_range_to_nfattr(struct sk_buff *skb,
540 const struct ip_nat_range *range)
541{
542 NFA_PUT(skb, CTA_PROTONAT_PORT_MIN, sizeof(__be16),
543 &range->min.tcp.port);
544 NFA_PUT(skb, CTA_PROTONAT_PORT_MAX, sizeof(__be16),
545 &range->max.tcp.port);
546
547 return 0;
548
549nfattr_failure:
550 return -1;
551}
552
553int
554ip_nat_port_nfattr_to_range(struct nfattr *tb[], struct ip_nat_range *range)
555{
556 int ret = 0;
557
558 /* we have to return whether we actually parsed something or not */
559
560 if (tb[CTA_PROTONAT_PORT_MIN-1]) {
561 ret = 1;
562 range->min.tcp.port =
563 *(__be16 *)NFA_DATA(tb[CTA_PROTONAT_PORT_MIN-1]);
564 }
565
566 if (!tb[CTA_PROTONAT_PORT_MAX-1]) {
567 if (ret)
568 range->max.tcp.port = range->min.tcp.port;
569 } else {
570 ret = 1;
571 range->max.tcp.port =
572 *(__be16 *)NFA_DATA(tb[CTA_PROTONAT_PORT_MAX-1]);
573 }
574
575 return ret;
576}
577EXPORT_SYMBOL_GPL(ip_nat_port_nfattr_to_range);
578EXPORT_SYMBOL_GPL(ip_nat_port_range_to_nfattr);
579#endif
580
581static int __init ip_nat_init(void)
582{
583 size_t i;
584
585 /* Leave them the same for the moment. */
586 ip_nat_htable_size = ip_conntrack_htable_size;
587
588 /* One vmalloc for both hash tables */
589 bysource = vmalloc(sizeof(struct list_head) * ip_nat_htable_size);
590 if (!bysource)
591 return -ENOMEM;
592
593 /* Sew in builtin protocols. */
594 write_lock_bh(&ip_nat_lock);
595 for (i = 0; i < MAX_IP_NAT_PROTO; i++)
596 rcu_assign_pointer(ip_nat_protos[i], &ip_nat_unknown_protocol);
597 rcu_assign_pointer(ip_nat_protos[IPPROTO_TCP], &ip_nat_protocol_tcp);
598 rcu_assign_pointer(ip_nat_protos[IPPROTO_UDP], &ip_nat_protocol_udp);
599 rcu_assign_pointer(ip_nat_protos[IPPROTO_ICMP], &ip_nat_protocol_icmp);
600 write_unlock_bh(&ip_nat_lock);
601
602 for (i = 0; i < ip_nat_htable_size; i++) {
603 INIT_LIST_HEAD(&bysource[i]);
604 }
605
606 /* FIXME: Man, this is a hack. <SIGH> */
607 IP_NF_ASSERT(rcu_dereference(ip_conntrack_destroyed) == NULL);
608 rcu_assign_pointer(ip_conntrack_destroyed, ip_nat_cleanup_conntrack);
609
610 /* Initialize fake conntrack so that NAT will skip it */
611 ip_conntrack_untracked.status |= IPS_NAT_DONE_MASK;
612 return 0;
613}
614
615/* Clear NAT section of all conntracks, in case we're loaded again. */
616static int clean_nat(struct ip_conntrack *i, void *data)
617{
618 memset(&i->nat, 0, sizeof(i->nat));
619 i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | IPS_SEQ_ADJUST);
620 return 0;
621}
622
623static void __exit ip_nat_cleanup(void)
624{
625 ip_ct_iterate_cleanup(&clean_nat, NULL);
626 rcu_assign_pointer(ip_conntrack_destroyed, NULL);
627 synchronize_rcu();
628 vfree(bysource);
629}
630
631MODULE_LICENSE("GPL");
632
633module_init(ip_nat_init);
634module_exit(ip_nat_cleanup);
diff --git a/net/ipv4/netfilter/ip_nat_ftp.c b/net/ipv4/netfilter/ip_nat_ftp.c
deleted file mode 100644
index 32e01d8dffcb..000000000000
--- a/net/ipv4/netfilter/ip_nat_ftp.c
+++ /dev/null
@@ -1,180 +0,0 @@
1/* FTP extension for TCP NAT alteration. */
2
3/* (C) 1999-2001 Paul `Rusty' Russell
4 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include <linux/module.h>
12#include <linux/netfilter_ipv4.h>
13#include <linux/ip.h>
14#include <linux/tcp.h>
15#include <linux/moduleparam.h>
16#include <net/tcp.h>
17#include <linux/netfilter_ipv4/ip_nat.h>
18#include <linux/netfilter_ipv4/ip_nat_helper.h>
19#include <linux/netfilter_ipv4/ip_nat_rule.h>
20#include <linux/netfilter_ipv4/ip_conntrack_ftp.h>
21#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
22
23MODULE_LICENSE("GPL");
24MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
25MODULE_DESCRIPTION("ftp NAT helper");
26
27#if 0
28#define DEBUGP printk
29#else
30#define DEBUGP(format, args...)
31#endif
32
33/* FIXME: Time out? --RR */
34
35static int
36mangle_rfc959_packet(struct sk_buff **pskb,
37 __be32 newip,
38 u_int16_t port,
39 unsigned int matchoff,
40 unsigned int matchlen,
41 struct ip_conntrack *ct,
42 enum ip_conntrack_info ctinfo,
43 u32 *seq)
44{
45 char buffer[sizeof("nnn,nnn,nnn,nnn,nnn,nnn")];
46
47 sprintf(buffer, "%u,%u,%u,%u,%u,%u",
48 NIPQUAD(newip), port>>8, port&0xFF);
49
50 DEBUGP("calling ip_nat_mangle_tcp_packet\n");
51
52 *seq += strlen(buffer) - matchlen;
53 return ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff,
54 matchlen, buffer, strlen(buffer));
55}
56
57/* |1|132.235.1.2|6275| */
58static int
59mangle_eprt_packet(struct sk_buff **pskb,
60 __be32 newip,
61 u_int16_t port,
62 unsigned int matchoff,
63 unsigned int matchlen,
64 struct ip_conntrack *ct,
65 enum ip_conntrack_info ctinfo,
66 u32 *seq)
67{
68 char buffer[sizeof("|1|255.255.255.255|65535|")];
69
70 sprintf(buffer, "|1|%u.%u.%u.%u|%u|", NIPQUAD(newip), port);
71
72 DEBUGP("calling ip_nat_mangle_tcp_packet\n");
73
74 *seq += strlen(buffer) - matchlen;
75 return ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff,
76 matchlen, buffer, strlen(buffer));
77}
78
79/* |1|132.235.1.2|6275| */
80static int
81mangle_epsv_packet(struct sk_buff **pskb,
82 __be32 newip,
83 u_int16_t port,
84 unsigned int matchoff,
85 unsigned int matchlen,
86 struct ip_conntrack *ct,
87 enum ip_conntrack_info ctinfo,
88 u32 *seq)
89{
90 char buffer[sizeof("|||65535|")];
91
92 sprintf(buffer, "|||%u|", port);
93
94 DEBUGP("calling ip_nat_mangle_tcp_packet\n");
95
96 *seq += strlen(buffer) - matchlen;
97 return ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff,
98 matchlen, buffer, strlen(buffer));
99}
100
101static int (*mangle[])(struct sk_buff **, __be32, u_int16_t,
102 unsigned int,
103 unsigned int,
104 struct ip_conntrack *,
105 enum ip_conntrack_info,
106 u32 *seq)
107= { [IP_CT_FTP_PORT] = mangle_rfc959_packet,
108 [IP_CT_FTP_PASV] = mangle_rfc959_packet,
109 [IP_CT_FTP_EPRT] = mangle_eprt_packet,
110 [IP_CT_FTP_EPSV] = mangle_epsv_packet
111};
112
113/* So, this packet has hit the connection tracking matching code.
114 Mangle it, and change the expectation to match the new version. */
115static unsigned int ip_nat_ftp(struct sk_buff **pskb,
116 enum ip_conntrack_info ctinfo,
117 enum ip_ct_ftp_type type,
118 unsigned int matchoff,
119 unsigned int matchlen,
120 struct ip_conntrack_expect *exp,
121 u32 *seq)
122{
123 __be32 newip;
124 u_int16_t port;
125 int dir = CTINFO2DIR(ctinfo);
126 struct ip_conntrack *ct = exp->master;
127
128 DEBUGP("FTP_NAT: type %i, off %u len %u\n", type, matchoff, matchlen);
129
130 /* Connection will come from wherever this packet goes, hence !dir */
131 newip = ct->tuplehash[!dir].tuple.dst.ip;
132 exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
133 exp->dir = !dir;
134
135 /* When you see the packet, we need to NAT it the same as the
136 * this one. */
137 exp->expectfn = ip_nat_follow_master;
138
139 /* Try to get same port: if not, try to change it. */
140 for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
141 exp->tuple.dst.u.tcp.port = htons(port);
142 if (ip_conntrack_expect_related(exp) == 0)
143 break;
144 }
145
146 if (port == 0)
147 return NF_DROP;
148
149 if (!mangle[type](pskb, newip, port, matchoff, matchlen, ct, ctinfo,
150 seq)) {
151 ip_conntrack_unexpect_related(exp);
152 return NF_DROP;
153 }
154 return NF_ACCEPT;
155}
156
157static void __exit ip_nat_ftp_fini(void)
158{
159 rcu_assign_pointer(ip_nat_ftp_hook, NULL);
160 synchronize_rcu();
161}
162
163static int __init ip_nat_ftp_init(void)
164{
165 BUG_ON(rcu_dereference(ip_nat_ftp_hook));
166 rcu_assign_pointer(ip_nat_ftp_hook, ip_nat_ftp);
167 return 0;
168}
169
170/* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */
171static int warn_set(const char *val, struct kernel_param *kp)
172{
173 printk(KERN_INFO KBUILD_MODNAME
174 ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
175 return 0;
176}
177module_param_call(ports, warn_set, NULL, NULL, 0);
178
179module_init(ip_nat_ftp_init);
180module_exit(ip_nat_ftp_fini);
diff --git a/net/ipv4/netfilter/ip_nat_helper.c b/net/ipv4/netfilter/ip_nat_helper.c
deleted file mode 100644
index dc778cfef58b..000000000000
--- a/net/ipv4/netfilter/ip_nat_helper.c
+++ /dev/null
@@ -1,436 +0,0 @@
1/* ip_nat_helper.c - generic support functions for NAT helpers
2 *
3 * (C) 2000-2002 Harald Welte <laforge@netfilter.org>
4 * (C) 2003-2004 Netfilter Core Team <coreteam@netfilter.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 *
10 * 14 Jan 2002 Harald Welte <laforge@gnumonks.org>:
11 * - add support for SACK adjustment
12 * 14 Mar 2002 Harald Welte <laforge@gnumonks.org>:
13 * - merge SACK support into newnat API
14 * 16 Aug 2002 Brian J. Murrell <netfilter@interlinx.bc.ca>:
15 * - make ip_nat_resize_packet more generic (TCP and UDP)
16 * - add ip_nat_mangle_udp_packet
17 */
18#include <linux/module.h>
19#include <linux/kmod.h>
20#include <linux/types.h>
21#include <linux/timer.h>
22#include <linux/skbuff.h>
23#include <linux/netfilter_ipv4.h>
24#include <net/checksum.h>
25#include <net/icmp.h>
26#include <net/ip.h>
27#include <net/tcp.h>
28#include <net/udp.h>
29
30#include <linux/netfilter_ipv4/ip_conntrack.h>
31#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
32#include <linux/netfilter_ipv4/ip_nat.h>
33#include <linux/netfilter_ipv4/ip_nat_protocol.h>
34#include <linux/netfilter_ipv4/ip_nat_core.h>
35#include <linux/netfilter_ipv4/ip_nat_helper.h>
36
37#if 0
38#define DEBUGP printk
39#define DUMP_OFFSET(x) printk("offset_before=%d, offset_after=%d, correction_pos=%u\n", x->offset_before, x->offset_after, x->correction_pos);
40#else
41#define DEBUGP(format, args...)
42#define DUMP_OFFSET(x)
43#endif
44
45static DEFINE_SPINLOCK(ip_nat_seqofs_lock);
46
47/* Setup TCP sequence correction given this change at this sequence */
48static inline void
49adjust_tcp_sequence(u32 seq,
50 int sizediff,
51 struct ip_conntrack *ct,
52 enum ip_conntrack_info ctinfo)
53{
54 int dir;
55 struct ip_nat_seq *this_way, *other_way;
56
57 DEBUGP("ip_nat_resize_packet: old_size = %u, new_size = %u\n",
58 (*skb)->len, new_size);
59
60 dir = CTINFO2DIR(ctinfo);
61
62 this_way = &ct->nat.info.seq[dir];
63 other_way = &ct->nat.info.seq[!dir];
64
65 DEBUGP("ip_nat_resize_packet: Seq_offset before: ");
66 DUMP_OFFSET(this_way);
67
68 spin_lock_bh(&ip_nat_seqofs_lock);
69
70 /* SYN adjust. If it's uninitialized, or this is after last
71 * correction, record it: we don't handle more than one
72 * adjustment in the window, but do deal with common case of a
73 * retransmit */
74 if (this_way->offset_before == this_way->offset_after
75 || before(this_way->correction_pos, seq)) {
76 this_way->correction_pos = seq;
77 this_way->offset_before = this_way->offset_after;
78 this_way->offset_after += sizediff;
79 }
80 spin_unlock_bh(&ip_nat_seqofs_lock);
81
82 DEBUGP("ip_nat_resize_packet: Seq_offset after: ");
83 DUMP_OFFSET(this_way);
84}
85
86/* Frobs data inside this packet, which is linear. */
87static void mangle_contents(struct sk_buff *skb,
88 unsigned int dataoff,
89 unsigned int match_offset,
90 unsigned int match_len,
91 const char *rep_buffer,
92 unsigned int rep_len)
93{
94 unsigned char *data;
95
96 BUG_ON(skb_is_nonlinear(skb));
97 data = (unsigned char *)skb->nh.iph + dataoff;
98
99 /* move post-replacement */
100 memmove(data + match_offset + rep_len,
101 data + match_offset + match_len,
102 skb->tail - (data + match_offset + match_len));
103
104 /* insert data from buffer */
105 memcpy(data + match_offset, rep_buffer, rep_len);
106
107 /* update skb info */
108 if (rep_len > match_len) {
109 DEBUGP("ip_nat_mangle_packet: Extending packet by "
110 "%u from %u bytes\n", rep_len - match_len,
111 skb->len);
112 skb_put(skb, rep_len - match_len);
113 } else {
114 DEBUGP("ip_nat_mangle_packet: Shrinking packet from "
115 "%u from %u bytes\n", match_len - rep_len,
116 skb->len);
117 __skb_trim(skb, skb->len + rep_len - match_len);
118 }
119
120 /* fix IP hdr checksum information */
121 skb->nh.iph->tot_len = htons(skb->len);
122 ip_send_check(skb->nh.iph);
123}
124
125/* Unusual, but possible case. */
126static int enlarge_skb(struct sk_buff **pskb, unsigned int extra)
127{
128 struct sk_buff *nskb;
129
130 if ((*pskb)->len + extra > 65535)
131 return 0;
132
133 nskb = skb_copy_expand(*pskb, skb_headroom(*pskb), extra, GFP_ATOMIC);
134 if (!nskb)
135 return 0;
136
137 /* Transfer socket to new skb. */
138 if ((*pskb)->sk)
139 skb_set_owner_w(nskb, (*pskb)->sk);
140 kfree_skb(*pskb);
141 *pskb = nskb;
142 return 1;
143}
144
145/* Generic function for mangling variable-length address changes inside
146 * NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX
147 * command in FTP).
148 *
149 * Takes care about all the nasty sequence number changes, checksumming,
150 * skb enlargement, ...
151 *
152 * */
153int
154ip_nat_mangle_tcp_packet(struct sk_buff **pskb,
155 struct ip_conntrack *ct,
156 enum ip_conntrack_info ctinfo,
157 unsigned int match_offset,
158 unsigned int match_len,
159 const char *rep_buffer,
160 unsigned int rep_len)
161{
162 struct iphdr *iph;
163 struct tcphdr *tcph;
164 int oldlen, datalen;
165
166 if (!skb_make_writable(pskb, (*pskb)->len))
167 return 0;
168
169 if (rep_len > match_len
170 && rep_len - match_len > skb_tailroom(*pskb)
171 && !enlarge_skb(pskb, rep_len - match_len))
172 return 0;
173
174 SKB_LINEAR_ASSERT(*pskb);
175
176 iph = (*pskb)->nh.iph;
177 tcph = (void *)iph + iph->ihl*4;
178
179 oldlen = (*pskb)->len - iph->ihl*4;
180 mangle_contents(*pskb, iph->ihl*4 + tcph->doff*4,
181 match_offset, match_len, rep_buffer, rep_len);
182
183 datalen = (*pskb)->len - iph->ihl*4;
184 if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
185 tcph->check = 0;
186 tcph->check = tcp_v4_check(datalen,
187 iph->saddr, iph->daddr,
188 csum_partial((char *)tcph,
189 datalen, 0));
190 } else
191 nf_proto_csum_replace2(&tcph->check, *pskb,
192 htons(oldlen), htons(datalen), 1);
193
194 if (rep_len != match_len) {
195 set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
196 adjust_tcp_sequence(ntohl(tcph->seq),
197 (int)rep_len - (int)match_len,
198 ct, ctinfo);
199 /* Tell TCP window tracking about seq change */
200 ip_conntrack_tcp_update(*pskb, ct, CTINFO2DIR(ctinfo));
201 }
202 return 1;
203}
204EXPORT_SYMBOL(ip_nat_mangle_tcp_packet);
205
206/* Generic function for mangling variable-length address changes inside
207 * NATed UDP connections (like the CONNECT DATA XXXXX MESG XXXXX INDEX XXXXX
208 * command in the Amanda protocol)
209 *
210 * Takes care about all the nasty sequence number changes, checksumming,
211 * skb enlargement, ...
212 *
213 * XXX - This function could be merged with ip_nat_mangle_tcp_packet which
214 * should be fairly easy to do.
215 */
216int
217ip_nat_mangle_udp_packet(struct sk_buff **pskb,
218 struct ip_conntrack *ct,
219 enum ip_conntrack_info ctinfo,
220 unsigned int match_offset,
221 unsigned int match_len,
222 const char *rep_buffer,
223 unsigned int rep_len)
224{
225 struct iphdr *iph;
226 struct udphdr *udph;
227 int datalen, oldlen;
228
229 /* UDP helpers might accidentally mangle the wrong packet */
230 iph = (*pskb)->nh.iph;
231 if ((*pskb)->len < iph->ihl*4 + sizeof(*udph) +
232 match_offset + match_len)
233 return 0;
234
235 if (!skb_make_writable(pskb, (*pskb)->len))
236 return 0;
237
238 if (rep_len > match_len
239 && rep_len - match_len > skb_tailroom(*pskb)
240 && !enlarge_skb(pskb, rep_len - match_len))
241 return 0;
242
243 iph = (*pskb)->nh.iph;
244 udph = (void *)iph + iph->ihl*4;
245
246 oldlen = (*pskb)->len - iph->ihl*4;
247 mangle_contents(*pskb, iph->ihl*4 + sizeof(*udph),
248 match_offset, match_len, rep_buffer, rep_len);
249
250 /* update the length of the UDP packet */
251 datalen = (*pskb)->len - iph->ihl*4;
252 udph->len = htons(datalen);
253
254 /* fix udp checksum if udp checksum was previously calculated */
255 if (!udph->check && (*pskb)->ip_summed != CHECKSUM_PARTIAL)
256 return 1;
257
258 if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
259 udph->check = 0;
260 udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
261 datalen, IPPROTO_UDP,
262 csum_partial((char *)udph,
263 datalen, 0));
264 if (!udph->check)
265 udph->check = CSUM_MANGLED_0;
266 } else
267 nf_proto_csum_replace2(&udph->check, *pskb,
268 htons(oldlen), htons(datalen), 1);
269 return 1;
270}
271EXPORT_SYMBOL(ip_nat_mangle_udp_packet);
272
273/* Adjust one found SACK option including checksum correction */
274static void
275sack_adjust(struct sk_buff *skb,
276 struct tcphdr *tcph,
277 unsigned int sackoff,
278 unsigned int sackend,
279 struct ip_nat_seq *natseq)
280{
281 while (sackoff < sackend) {
282 struct tcp_sack_block_wire *sack;
283 __be32 new_start_seq, new_end_seq;
284
285 sack = (void *)skb->data + sackoff;
286 if (after(ntohl(sack->start_seq) - natseq->offset_before,
287 natseq->correction_pos))
288 new_start_seq = htonl(ntohl(sack->start_seq)
289 - natseq->offset_after);
290 else
291 new_start_seq = htonl(ntohl(sack->start_seq)
292 - natseq->offset_before);
293
294 if (after(ntohl(sack->end_seq) - natseq->offset_before,
295 natseq->correction_pos))
296 new_end_seq = htonl(ntohl(sack->end_seq)
297 - natseq->offset_after);
298 else
299 new_end_seq = htonl(ntohl(sack->end_seq)
300 - natseq->offset_before);
301
302 DEBUGP("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n",
303 ntohl(sack->start_seq), new_start_seq,
304 ntohl(sack->end_seq), new_end_seq);
305
306 nf_proto_csum_replace4(&tcph->check, skb,
307 sack->start_seq, new_start_seq, 0);
308 nf_proto_csum_replace4(&tcph->check, skb,
309 sack->end_seq, new_end_seq, 0);
310 sack->start_seq = new_start_seq;
311 sack->end_seq = new_end_seq;
312 sackoff += sizeof(*sack);
313 }
314}
315
316/* TCP SACK sequence number adjustment */
317static inline unsigned int
318ip_nat_sack_adjust(struct sk_buff **pskb,
319 struct tcphdr *tcph,
320 struct ip_conntrack *ct,
321 enum ip_conntrack_info ctinfo)
322{
323 unsigned int dir, optoff, optend;
324
325 optoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct tcphdr);
326 optend = (*pskb)->nh.iph->ihl*4 + tcph->doff*4;
327
328 if (!skb_make_writable(pskb, optend))
329 return 0;
330
331 dir = CTINFO2DIR(ctinfo);
332
333 while (optoff < optend) {
334 /* Usually: option, length. */
335 unsigned char *op = (*pskb)->data + optoff;
336
337 switch (op[0]) {
338 case TCPOPT_EOL:
339 return 1;
340 case TCPOPT_NOP:
341 optoff++;
342 continue;
343 default:
344 /* no partial options */
345 if (optoff + 1 == optend
346 || optoff + op[1] > optend
347 || op[1] < 2)
348 return 0;
349 if (op[0] == TCPOPT_SACK
350 && op[1] >= 2+TCPOLEN_SACK_PERBLOCK
351 && ((op[1] - 2) % TCPOLEN_SACK_PERBLOCK) == 0)
352 sack_adjust(*pskb, tcph, optoff+2,
353 optoff+op[1],
354 &ct->nat.info.seq[!dir]);
355 optoff += op[1];
356 }
357 }
358 return 1;
359}
360
361/* TCP sequence number adjustment. Returns 1 on success, 0 on failure */
362int
363ip_nat_seq_adjust(struct sk_buff **pskb,
364 struct ip_conntrack *ct,
365 enum ip_conntrack_info ctinfo)
366{
367 struct tcphdr *tcph;
368 int dir;
369 __be32 newseq, newack;
370 struct ip_nat_seq *this_way, *other_way;
371
372 dir = CTINFO2DIR(ctinfo);
373
374 this_way = &ct->nat.info.seq[dir];
375 other_way = &ct->nat.info.seq[!dir];
376
377 if (!skb_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph)))
378 return 0;
379
380 tcph = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
381 if (after(ntohl(tcph->seq), this_way->correction_pos))
382 newseq = htonl(ntohl(tcph->seq) + this_way->offset_after);
383 else
384 newseq = htonl(ntohl(tcph->seq) + this_way->offset_before);
385
386 if (after(ntohl(tcph->ack_seq) - other_way->offset_before,
387 other_way->correction_pos))
388 newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_after);
389 else
390 newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_before);
391
392 nf_proto_csum_replace4(&tcph->check, *pskb, tcph->seq, newseq, 0);
393 nf_proto_csum_replace4(&tcph->check, *pskb, tcph->ack_seq, newack, 0);
394
395 DEBUGP("Adjusting sequence number from %u->%u, ack from %u->%u\n",
396 ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
397 ntohl(newack));
398
399 tcph->seq = newseq;
400 tcph->ack_seq = newack;
401
402 if (!ip_nat_sack_adjust(pskb, tcph, ct, ctinfo))
403 return 0;
404
405 ip_conntrack_tcp_update(*pskb, ct, dir);
406
407 return 1;
408}
409EXPORT_SYMBOL(ip_nat_seq_adjust);
410
411/* Setup NAT on this expected conntrack so it follows master. */
412/* If we fail to get a free NAT slot, we'll get dropped on confirm */
413void ip_nat_follow_master(struct ip_conntrack *ct,
414 struct ip_conntrack_expect *exp)
415{
416 struct ip_nat_range range;
417
418 /* This must be a fresh one. */
419 BUG_ON(ct->status & IPS_NAT_DONE_MASK);
420
421 /* Change src to where master sends to */
422 range.flags = IP_NAT_RANGE_MAP_IPS;
423 range.min_ip = range.max_ip
424 = ct->master->tuplehash[!exp->dir].tuple.dst.ip;
425 /* hook doesn't matter, but it has to do source manip */
426 ip_nat_setup_info(ct, &range, NF_IP_POST_ROUTING);
427
428 /* For DST manip, map port here to where it's expected. */
429 range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
430 range.min = range.max = exp->saved_proto;
431 range.min_ip = range.max_ip
432 = ct->master->tuplehash[!exp->dir].tuple.src.ip;
433 /* hook doesn't matter, but it has to do destination manip */
434 ip_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING);
435}
436EXPORT_SYMBOL(ip_nat_follow_master);
diff --git a/net/ipv4/netfilter/ip_nat_helper_h323.c b/net/ipv4/netfilter/ip_nat_helper_h323.c
deleted file mode 100644
index bdc99ef6159e..000000000000
--- a/net/ipv4/netfilter/ip_nat_helper_h323.c
+++ /dev/null
@@ -1,611 +0,0 @@
1/*
2 * H.323 extension for NAT alteration.
3 *
4 * Copyright (c) 2006 Jing Min Zhao <zhaojingmin@users.sourceforge.net>
5 *
6 * This source code is licensed under General Public License version 2.
7 *
8 * Based on the 'brute force' H.323 NAT module by
9 * Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
10 */
11
12#include <linux/module.h>
13#include <linux/netfilter_ipv4.h>
14#include <linux/netfilter.h>
15#include <linux/ip.h>
16#include <linux/tcp.h>
17#include <linux/moduleparam.h>
18#include <net/tcp.h>
19#include <linux/netfilter_ipv4/ip_nat.h>
20#include <linux/netfilter_ipv4/ip_nat_helper.h>
21#include <linux/netfilter_ipv4/ip_nat_rule.h>
22#include <linux/netfilter_ipv4/ip_conntrack_tuple.h>
23#include <linux/netfilter_ipv4/ip_conntrack_h323.h>
24#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
25
26#if 0
27#define DEBUGP printk
28#else
29#define DEBUGP(format, args...)
30#endif
31
32/****************************************************************************/
33static int set_addr(struct sk_buff **pskb,
34 unsigned char **data, int dataoff,
35 unsigned int addroff, __be32 ip, u_int16_t port)
36{
37 enum ip_conntrack_info ctinfo;
38 struct ip_conntrack *ct = ip_conntrack_get(*pskb, &ctinfo);
39 struct {
40 __be32 ip;
41 __be16 port;
42 } __attribute__ ((__packed__)) buf;
43 struct tcphdr _tcph, *th;
44
45 buf.ip = ip;
46 buf.port = htons(port);
47 addroff += dataoff;
48
49 if ((*pskb)->nh.iph->protocol == IPPROTO_TCP) {
50 if (!ip_nat_mangle_tcp_packet(pskb, ct, ctinfo,
51 addroff, sizeof(buf),
52 (char *) &buf, sizeof(buf))) {
53 if (net_ratelimit())
54 printk("ip_nat_h323: ip_nat_mangle_tcp_packet"
55 " error\n");
56 return -1;
57 }
58
59 /* Relocate data pointer */
60 th = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl * 4,
61 sizeof(_tcph), &_tcph);
62 if (th == NULL)
63 return -1;
64 *data = (*pskb)->data + (*pskb)->nh.iph->ihl * 4 +
65 th->doff * 4 + dataoff;
66 } else {
67 if (!ip_nat_mangle_udp_packet(pskb, ct, ctinfo,
68 addroff, sizeof(buf),
69 (char *) &buf, sizeof(buf))) {
70 if (net_ratelimit())
71 printk("ip_nat_h323: ip_nat_mangle_udp_packet"
72 " error\n");
73 return -1;
74 }
75 /* ip_nat_mangle_udp_packet uses skb_make_writable() to copy
76 * or pull everything in a linear buffer, so we can safely
77 * use the skb pointers now */
78 *data = (*pskb)->data + (*pskb)->nh.iph->ihl * 4 +
79 sizeof(struct udphdr);
80 }
81
82 return 0;
83}
84
85/****************************************************************************/
86static int set_h225_addr(struct sk_buff **pskb,
87 unsigned char **data, int dataoff,
88 TransportAddress * addr,
89 __be32 ip, u_int16_t port)
90{
91 return set_addr(pskb, data, dataoff, addr->ipAddress.ip, ip, port);
92}
93
94/****************************************************************************/
95static int set_h245_addr(struct sk_buff **pskb,
96 unsigned char **data, int dataoff,
97 H245_TransportAddress * addr,
98 __be32 ip, u_int16_t port)
99{
100 return set_addr(pskb, data, dataoff,
101 addr->unicastAddress.iPAddress.network, ip, port);
102}
103
104/****************************************************************************/
105static int set_sig_addr(struct sk_buff **pskb, struct ip_conntrack *ct,
106 enum ip_conntrack_info ctinfo,
107 unsigned char **data,
108 TransportAddress * addr, int count)
109{
110 struct ip_ct_h323_master *info = &ct->help.ct_h323_info;
111 int dir = CTINFO2DIR(ctinfo);
112 int i;
113 __be32 ip;
114 u_int16_t port;
115
116 for (i = 0; i < count; i++) {
117 if (get_h225_addr(*data, &addr[i], &ip, &port)) {
118 if (ip == ct->tuplehash[dir].tuple.src.ip &&
119 port == info->sig_port[dir]) {
120 /* GW->GK */
121
122 /* Fix for Gnomemeeting */
123 if (i > 0 &&
124 get_h225_addr(*data, &addr[0],
125 &ip, &port) &&
126 (ntohl(ip) & 0xff000000) == 0x7f000000)
127 i = 0;
128
129 DEBUGP
130 ("ip_nat_ras: set signal address "
131 "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
132 NIPQUAD(ip), port,
133 NIPQUAD(ct->tuplehash[!dir].tuple.dst.
134 ip), info->sig_port[!dir]);
135 return set_h225_addr(pskb, data, 0, &addr[i],
136 ct->tuplehash[!dir].
137 tuple.dst.ip,
138 info->sig_port[!dir]);
139 } else if (ip == ct->tuplehash[dir].tuple.dst.ip &&
140 port == info->sig_port[dir]) {
141 /* GK->GW */
142 DEBUGP
143 ("ip_nat_ras: set signal address "
144 "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
145 NIPQUAD(ip), port,
146 NIPQUAD(ct->tuplehash[!dir].tuple.src.
147 ip), info->sig_port[!dir]);
148 return set_h225_addr(pskb, data, 0, &addr[i],
149 ct->tuplehash[!dir].
150 tuple.src.ip,
151 info->sig_port[!dir]);
152 }
153 }
154 }
155
156 return 0;
157}
158
159/****************************************************************************/
160static int set_ras_addr(struct sk_buff **pskb, struct ip_conntrack *ct,
161 enum ip_conntrack_info ctinfo,
162 unsigned char **data,
163 TransportAddress * addr, int count)
164{
165 int dir = CTINFO2DIR(ctinfo);
166 int i;
167 __be32 ip;
168 u_int16_t port;
169
170 for (i = 0; i < count; i++) {
171 if (get_h225_addr(*data, &addr[i], &ip, &port) &&
172 ip == ct->tuplehash[dir].tuple.src.ip &&
173 port == ntohs(ct->tuplehash[dir].tuple.src.u.udp.port)) {
174 DEBUGP("ip_nat_ras: set rasAddress "
175 "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
176 NIPQUAD(ip), port,
177 NIPQUAD(ct->tuplehash[!dir].tuple.dst.ip),
178 ntohs(ct->tuplehash[!dir].tuple.dst.u.udp.
179 port));
180 return set_h225_addr(pskb, data, 0, &addr[i],
181 ct->tuplehash[!dir].tuple.dst.ip,
182 ntohs(ct->tuplehash[!dir].tuple.
183 dst.u.udp.port));
184 }
185 }
186
187 return 0;
188}
189
190/****************************************************************************/
191static int nat_rtp_rtcp(struct sk_buff **pskb, struct ip_conntrack *ct,
192 enum ip_conntrack_info ctinfo,
193 unsigned char **data, int dataoff,
194 H245_TransportAddress * addr,
195 u_int16_t port, u_int16_t rtp_port,
196 struct ip_conntrack_expect *rtp_exp,
197 struct ip_conntrack_expect *rtcp_exp)
198{
199 struct ip_ct_h323_master *info = &ct->help.ct_h323_info;
200 int dir = CTINFO2DIR(ctinfo);
201 int i;
202 u_int16_t nated_port;
203
204 /* Set expectations for NAT */
205 rtp_exp->saved_proto.udp.port = rtp_exp->tuple.dst.u.udp.port;
206 rtp_exp->expectfn = ip_nat_follow_master;
207 rtp_exp->dir = !dir;
208 rtcp_exp->saved_proto.udp.port = rtcp_exp->tuple.dst.u.udp.port;
209 rtcp_exp->expectfn = ip_nat_follow_master;
210 rtcp_exp->dir = !dir;
211
212 /* Lookup existing expects */
213 for (i = 0; i < H323_RTP_CHANNEL_MAX; i++) {
214 if (info->rtp_port[i][dir] == rtp_port) {
215 /* Expected */
216
217 /* Use allocated ports first. This will refresh
218 * the expects */
219 rtp_exp->tuple.dst.u.udp.port =
220 htons(info->rtp_port[i][dir]);
221 rtcp_exp->tuple.dst.u.udp.port =
222 htons(info->rtp_port[i][dir] + 1);
223 break;
224 } else if (info->rtp_port[i][dir] == 0) {
225 /* Not expected */
226 break;
227 }
228 }
229
230 /* Run out of expectations */
231 if (i >= H323_RTP_CHANNEL_MAX) {
232 if (net_ratelimit())
233 printk("ip_nat_h323: out of expectations\n");
234 return 0;
235 }
236
237 /* Try to get a pair of ports. */
238 for (nated_port = ntohs(rtp_exp->tuple.dst.u.udp.port);
239 nated_port != 0; nated_port += 2) {
240 rtp_exp->tuple.dst.u.udp.port = htons(nated_port);
241 if (ip_conntrack_expect_related(rtp_exp) == 0) {
242 rtcp_exp->tuple.dst.u.udp.port =
243 htons(nated_port + 1);
244 if (ip_conntrack_expect_related(rtcp_exp) == 0)
245 break;
246 ip_conntrack_unexpect_related(rtp_exp);
247 }
248 }
249
250 if (nated_port == 0) { /* No port available */
251 if (net_ratelimit())
252 printk("ip_nat_h323: out of RTP ports\n");
253 return 0;
254 }
255
256 /* Modify signal */
257 if (set_h245_addr(pskb, data, dataoff, addr,
258 ct->tuplehash[!dir].tuple.dst.ip,
259 (port & 1) ? nated_port + 1 : nated_port) == 0) {
260 /* Save ports */
261 info->rtp_port[i][dir] = rtp_port;
262 info->rtp_port[i][!dir] = nated_port;
263 } else {
264 ip_conntrack_unexpect_related(rtp_exp);
265 ip_conntrack_unexpect_related(rtcp_exp);
266 return -1;
267 }
268
269 /* Success */
270 DEBUGP("ip_nat_h323: expect RTP %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
271 NIPQUAD(rtp_exp->tuple.src.ip),
272 ntohs(rtp_exp->tuple.src.u.udp.port),
273 NIPQUAD(rtp_exp->tuple.dst.ip),
274 ntohs(rtp_exp->tuple.dst.u.udp.port));
275 DEBUGP("ip_nat_h323: expect RTCP %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
276 NIPQUAD(rtcp_exp->tuple.src.ip),
277 ntohs(rtcp_exp->tuple.src.u.udp.port),
278 NIPQUAD(rtcp_exp->tuple.dst.ip),
279 ntohs(rtcp_exp->tuple.dst.u.udp.port));
280
281 return 0;
282}
283
284/****************************************************************************/
285static int nat_t120(struct sk_buff **pskb, struct ip_conntrack *ct,
286 enum ip_conntrack_info ctinfo,
287 unsigned char **data, int dataoff,
288 H245_TransportAddress * addr, u_int16_t port,
289 struct ip_conntrack_expect *exp)
290{
291 int dir = CTINFO2DIR(ctinfo);
292 u_int16_t nated_port = port;
293
294 /* Set expectations for NAT */
295 exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
296 exp->expectfn = ip_nat_follow_master;
297 exp->dir = !dir;
298
299 /* Try to get same port: if not, try to change it. */
300 for (; nated_port != 0; nated_port++) {
301 exp->tuple.dst.u.tcp.port = htons(nated_port);
302 if (ip_conntrack_expect_related(exp) == 0)
303 break;
304 }
305
306 if (nated_port == 0) { /* No port available */
307 if (net_ratelimit())
308 printk("ip_nat_h323: out of TCP ports\n");
309 return 0;
310 }
311
312 /* Modify signal */
313 if (set_h245_addr(pskb, data, dataoff, addr,
314 ct->tuplehash[!dir].tuple.dst.ip, nated_port) < 0) {
315 ip_conntrack_unexpect_related(exp);
316 return -1;
317 }
318
319 DEBUGP("ip_nat_h323: expect T.120 %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
320 NIPQUAD(exp->tuple.src.ip), ntohs(exp->tuple.src.u.tcp.port),
321 NIPQUAD(exp->tuple.dst.ip), ntohs(exp->tuple.dst.u.tcp.port));
322
323 return 0;
324}
325
326/****************************************************************************
327 * This conntrack expect function replaces ip_conntrack_h245_expect()
328 * which was set by ip_conntrack_helper_h323.c. It calls both
329 * ip_nat_follow_master() and ip_conntrack_h245_expect()
330 ****************************************************************************/
331static void ip_nat_h245_expect(struct ip_conntrack *new,
332 struct ip_conntrack_expect *this)
333{
334 ip_nat_follow_master(new, this);
335 ip_conntrack_h245_expect(new, this);
336}
337
338/****************************************************************************/
339static int nat_h245(struct sk_buff **pskb, struct ip_conntrack *ct,
340 enum ip_conntrack_info ctinfo,
341 unsigned char **data, int dataoff,
342 TransportAddress * addr, u_int16_t port,
343 struct ip_conntrack_expect *exp)
344{
345 struct ip_ct_h323_master *info = &ct->help.ct_h323_info;
346 int dir = CTINFO2DIR(ctinfo);
347 u_int16_t nated_port = port;
348
349 /* Set expectations for NAT */
350 exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
351 exp->expectfn = ip_nat_h245_expect;
352 exp->dir = !dir;
353
354 /* Check existing expects */
355 if (info->sig_port[dir] == port)
356 nated_port = info->sig_port[!dir];
357
358 /* Try to get same port: if not, try to change it. */
359 for (; nated_port != 0; nated_port++) {
360 exp->tuple.dst.u.tcp.port = htons(nated_port);
361 if (ip_conntrack_expect_related(exp) == 0)
362 break;
363 }
364
365 if (nated_port == 0) { /* No port available */
366 if (net_ratelimit())
367 printk("ip_nat_q931: out of TCP ports\n");
368 return 0;
369 }
370
371 /* Modify signal */
372 if (set_h225_addr(pskb, data, dataoff, addr,
373 ct->tuplehash[!dir].tuple.dst.ip,
374 nated_port) == 0) {
375 /* Save ports */
376 info->sig_port[dir] = port;
377 info->sig_port[!dir] = nated_port;
378 } else {
379 ip_conntrack_unexpect_related(exp);
380 return -1;
381 }
382
383 DEBUGP("ip_nat_q931: expect H.245 %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
384 NIPQUAD(exp->tuple.src.ip), ntohs(exp->tuple.src.u.tcp.port),
385 NIPQUAD(exp->tuple.dst.ip), ntohs(exp->tuple.dst.u.tcp.port));
386
387 return 0;
388}
389
390/****************************************************************************
391 * This conntrack expect function replaces ip_conntrack_q931_expect()
392 * which was set by ip_conntrack_helper_h323.c.
393 ****************************************************************************/
394static void ip_nat_q931_expect(struct ip_conntrack *new,
395 struct ip_conntrack_expect *this)
396{
397 struct ip_nat_range range;
398
399 if (this->tuple.src.ip != 0) { /* Only accept calls from GK */
400 ip_nat_follow_master(new, this);
401 goto out;
402 }
403
404 /* This must be a fresh one. */
405 BUG_ON(new->status & IPS_NAT_DONE_MASK);
406
407 /* Change src to where master sends to */
408 range.flags = IP_NAT_RANGE_MAP_IPS;
409 range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.ip;
410
411 /* hook doesn't matter, but it has to do source manip */
412 ip_nat_setup_info(new, &range, NF_IP_POST_ROUTING);
413
414 /* For DST manip, map port here to where it's expected. */
415 range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
416 range.min = range.max = this->saved_proto;
417 range.min_ip = range.max_ip =
418 new->master->tuplehash[!this->dir].tuple.src.ip;
419
420 /* hook doesn't matter, but it has to do destination manip */
421 ip_nat_setup_info(new, &range, NF_IP_PRE_ROUTING);
422
423 out:
424 ip_conntrack_q931_expect(new, this);
425}
426
427/****************************************************************************/
428static int nat_q931(struct sk_buff **pskb, struct ip_conntrack *ct,
429 enum ip_conntrack_info ctinfo,
430 unsigned char **data, TransportAddress * addr, int idx,
431 u_int16_t port, struct ip_conntrack_expect *exp)
432{
433 struct ip_ct_h323_master *info = &ct->help.ct_h323_info;
434 int dir = CTINFO2DIR(ctinfo);
435 u_int16_t nated_port = port;
436 __be32 ip;
437
438 /* Set expectations for NAT */
439 exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
440 exp->expectfn = ip_nat_q931_expect;
441 exp->dir = !dir;
442
443 /* Check existing expects */
444 if (info->sig_port[dir] == port)
445 nated_port = info->sig_port[!dir];
446
447 /* Try to get same port: if not, try to change it. */
448 for (; nated_port != 0; nated_port++) {
449 exp->tuple.dst.u.tcp.port = htons(nated_port);
450 if (ip_conntrack_expect_related(exp) == 0)
451 break;
452 }
453
454 if (nated_port == 0) { /* No port available */
455 if (net_ratelimit())
456 printk("ip_nat_ras: out of TCP ports\n");
457 return 0;
458 }
459
460 /* Modify signal */
461 if (set_h225_addr(pskb, data, 0, &addr[idx],
462 ct->tuplehash[!dir].tuple.dst.ip,
463 nated_port) == 0) {
464 /* Save ports */
465 info->sig_port[dir] = port;
466 info->sig_port[!dir] = nated_port;
467
468 /* Fix for Gnomemeeting */
469 if (idx > 0 &&
470 get_h225_addr(*data, &addr[0], &ip, &port) &&
471 (ntohl(ip) & 0xff000000) == 0x7f000000) {
472 set_h225_addr_hook(pskb, data, 0, &addr[0],
473 ct->tuplehash[!dir].tuple.dst.ip,
474 info->sig_port[!dir]);
475 }
476 } else {
477 ip_conntrack_unexpect_related(exp);
478 return -1;
479 }
480
481 /* Success */
482 DEBUGP("ip_nat_ras: expect Q.931 %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
483 NIPQUAD(exp->tuple.src.ip), ntohs(exp->tuple.src.u.tcp.port),
484 NIPQUAD(exp->tuple.dst.ip), ntohs(exp->tuple.dst.u.tcp.port));
485
486 return 0;
487}
488
489/****************************************************************************/
490static void ip_nat_callforwarding_expect(struct ip_conntrack *new,
491 struct ip_conntrack_expect *this)
492{
493 struct ip_nat_range range;
494
495 /* This must be a fresh one. */
496 BUG_ON(new->status & IPS_NAT_DONE_MASK);
497
498 /* Change src to where master sends to */
499 range.flags = IP_NAT_RANGE_MAP_IPS;
500 range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.ip;
501
502 /* hook doesn't matter, but it has to do source manip */
503 ip_nat_setup_info(new, &range, NF_IP_POST_ROUTING);
504
505 /* For DST manip, map port here to where it's expected. */
506 range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
507 range.min = range.max = this->saved_proto;
508 range.min_ip = range.max_ip = this->saved_ip;
509
510 /* hook doesn't matter, but it has to do destination manip */
511 ip_nat_setup_info(new, &range, NF_IP_PRE_ROUTING);
512
513 ip_conntrack_q931_expect(new, this);
514}
515
516/****************************************************************************/
517static int nat_callforwarding(struct sk_buff **pskb, struct ip_conntrack *ct,
518 enum ip_conntrack_info ctinfo,
519 unsigned char **data, int dataoff,
520 TransportAddress * addr, u_int16_t port,
521 struct ip_conntrack_expect *exp)
522{
523 int dir = CTINFO2DIR(ctinfo);
524 u_int16_t nated_port;
525
526 /* Set expectations for NAT */
527 exp->saved_ip = exp->tuple.dst.ip;
528 exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip;
529 exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
530 exp->expectfn = ip_nat_callforwarding_expect;
531 exp->dir = !dir;
532
533 /* Try to get same port: if not, try to change it. */
534 for (nated_port = port; nated_port != 0; nated_port++) {
535 exp->tuple.dst.u.tcp.port = htons(nated_port);
536 if (ip_conntrack_expect_related(exp) == 0)
537 break;
538 }
539
540 if (nated_port == 0) { /* No port available */
541 if (net_ratelimit())
542 printk("ip_nat_q931: out of TCP ports\n");
543 return 0;
544 }
545
546 /* Modify signal */
547 if (!set_h225_addr(pskb, data, dataoff, addr,
548 ct->tuplehash[!dir].tuple.dst.ip,
549 nated_port) == 0) {
550 ip_conntrack_unexpect_related(exp);
551 return -1;
552 }
553
554 /* Success */
555 DEBUGP("ip_nat_q931: expect Call Forwarding "
556 "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
557 NIPQUAD(exp->tuple.src.ip), ntohs(exp->tuple.src.u.tcp.port),
558 NIPQUAD(exp->tuple.dst.ip), ntohs(exp->tuple.dst.u.tcp.port));
559
560 return 0;
561}
562
563/****************************************************************************/
564static int __init init(void)
565{
566 BUG_ON(rcu_dereference(set_h245_addr_hook) != NULL);
567 BUG_ON(rcu_dereference(set_h225_addr_hook) != NULL);
568 BUG_ON(rcu_dereference(set_sig_addr_hook) != NULL);
569 BUG_ON(rcu_dereference(set_ras_addr_hook) != NULL);
570 BUG_ON(rcu_dereference(nat_rtp_rtcp_hook) != NULL);
571 BUG_ON(rcu_dereference(nat_t120_hook) != NULL);
572 BUG_ON(rcu_dereference(nat_h245_hook) != NULL);
573 BUG_ON(rcu_dereference(nat_callforwarding_hook) != NULL);
574 BUG_ON(rcu_dereference(nat_q931_hook) != NULL);
575
576 rcu_assign_pointer(set_h245_addr_hook, set_h245_addr);
577 rcu_assign_pointer(set_h225_addr_hook, set_h225_addr);
578 rcu_assign_pointer(set_sig_addr_hook, set_sig_addr);
579 rcu_assign_pointer(set_ras_addr_hook, set_ras_addr);
580 rcu_assign_pointer(nat_rtp_rtcp_hook, nat_rtp_rtcp);
581 rcu_assign_pointer(nat_t120_hook, nat_t120);
582 rcu_assign_pointer(nat_h245_hook, nat_h245);
583 rcu_assign_pointer(nat_callforwarding_hook, nat_callforwarding);
584 rcu_assign_pointer(nat_q931_hook, nat_q931);
585
586 DEBUGP("ip_nat_h323: init success\n");
587 return 0;
588}
589
590/****************************************************************************/
591static void __exit fini(void)
592{
593 rcu_assign_pointer(set_h245_addr_hook, NULL);
594 rcu_assign_pointer(set_h225_addr_hook, NULL);
595 rcu_assign_pointer(set_sig_addr_hook, NULL);
596 rcu_assign_pointer(set_ras_addr_hook, NULL);
597 rcu_assign_pointer(nat_rtp_rtcp_hook, NULL);
598 rcu_assign_pointer(nat_t120_hook, NULL);
599 rcu_assign_pointer(nat_h245_hook, NULL);
600 rcu_assign_pointer(nat_callforwarding_hook, NULL);
601 rcu_assign_pointer(nat_q931_hook, NULL);
602 synchronize_rcu();
603}
604
605/****************************************************************************/
606module_init(init);
607module_exit(fini);
608
609MODULE_AUTHOR("Jing Min Zhao <zhaojingmin@users.sourceforge.net>");
610MODULE_DESCRIPTION("H.323 NAT helper");
611MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/ip_nat_helper_pptp.c b/net/ipv4/netfilter/ip_nat_helper_pptp.c
deleted file mode 100644
index 24ce4a5023d7..000000000000
--- a/net/ipv4/netfilter/ip_nat_helper_pptp.c
+++ /dev/null
@@ -1,350 +0,0 @@
1/*
2 * ip_nat_pptp.c - Version 3.0
3 *
4 * NAT support for PPTP (Point to Point Tunneling Protocol).
5 * PPTP is a a protocol for creating virtual private networks.
6 * It is a specification defined by Microsoft and some vendors
7 * working with Microsoft. PPTP is built on top of a modified
8 * version of the Internet Generic Routing Encapsulation Protocol.
9 * GRE is defined in RFC 1701 and RFC 1702. Documentation of
10 * PPTP can be found in RFC 2637
11 *
12 * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
13 *
14 * Development of this code funded by Astaro AG (http://www.astaro.com/)
15 *
16 * TODO: - NAT to a unique tuple, not to TCP source port
17 * (needs netfilter tuple reservation)
18 *
19 * Changes:
20 * 2002-02-10 - Version 1.3
21 * - Use ip_nat_mangle_tcp_packet() because of cloned skb's
22 * in local connections (Philip Craig <philipc@snapgear.com>)
23 * - add checks for magicCookie and pptp version
24 * - make argument list of pptp_{out,in}bound_packet() shorter
25 * - move to C99 style initializers
26 * - print version number at module loadtime
27 * 2003-09-22 - Version 1.5
28 * - use SNATed tcp sourceport as callid, since we get called before
29 * TCP header is mangled (Philip Craig <philipc@snapgear.com>)
30 * 2004-10-22 - Version 2.0
31 * - kernel 2.6.x version
32 * 2005-06-10 - Version 3.0
33 * - kernel >= 2.6.11 version,
34 * funded by Oxcoda NetBox Blue (http://www.netboxblue.com/)
35 *
36 */
37
38#include <linux/module.h>
39#include <linux/ip.h>
40#include <linux/tcp.h>
41#include <net/tcp.h>
42
43#include <linux/netfilter_ipv4/ip_nat.h>
44#include <linux/netfilter_ipv4/ip_nat_rule.h>
45#include <linux/netfilter_ipv4/ip_nat_helper.h>
46#include <linux/netfilter_ipv4/ip_nat_pptp.h>
47#include <linux/netfilter_ipv4/ip_conntrack_core.h>
48#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
49#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
50#include <linux/netfilter_ipv4/ip_conntrack_pptp.h>
51
52#define IP_NAT_PPTP_VERSION "3.0"
53
54#define REQ_CID(req, off) (*(__be16 *)((char *)(req) + (off)))
55
56MODULE_LICENSE("GPL");
57MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
58MODULE_DESCRIPTION("Netfilter NAT helper module for PPTP");
59
60
61#if 0
62extern const char *pptp_msg_name[];
63#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, \
64 __FUNCTION__, ## args)
65#else
66#define DEBUGP(format, args...)
67#endif
68
69static void pptp_nat_expected(struct ip_conntrack *ct,
70 struct ip_conntrack_expect *exp)
71{
72 struct ip_conntrack *master = ct->master;
73 struct ip_conntrack_expect *other_exp;
74 struct ip_conntrack_tuple t;
75 struct ip_ct_pptp_master *ct_pptp_info;
76 struct ip_nat_pptp *nat_pptp_info;
77 struct ip_nat_range range;
78
79 ct_pptp_info = &master->help.ct_pptp_info;
80 nat_pptp_info = &master->nat.help.nat_pptp_info;
81
82 /* And here goes the grand finale of corrosion... */
83
84 if (exp->dir == IP_CT_DIR_ORIGINAL) {
85 DEBUGP("we are PNS->PAC\n");
86 /* therefore, build tuple for PAC->PNS */
87 t.src.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip;
88 t.src.u.gre.key = master->help.ct_pptp_info.pac_call_id;
89 t.dst.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip;
90 t.dst.u.gre.key = master->help.ct_pptp_info.pns_call_id;
91 t.dst.protonum = IPPROTO_GRE;
92 } else {
93 DEBUGP("we are PAC->PNS\n");
94 /* build tuple for PNS->PAC */
95 t.src.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip;
96 t.src.u.gre.key = master->nat.help.nat_pptp_info.pns_call_id;
97 t.dst.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip;
98 t.dst.u.gre.key = master->nat.help.nat_pptp_info.pac_call_id;
99 t.dst.protonum = IPPROTO_GRE;
100 }
101
102 DEBUGP("trying to unexpect other dir: ");
103 DUMP_TUPLE(&t);
104 other_exp = ip_conntrack_expect_find_get(&t);
105 if (other_exp) {
106 ip_conntrack_unexpect_related(other_exp);
107 ip_conntrack_expect_put(other_exp);
108 DEBUGP("success\n");
109 } else {
110 DEBUGP("not found!\n");
111 }
112
113 /* This must be a fresh one. */
114 BUG_ON(ct->status & IPS_NAT_DONE_MASK);
115
116 /* Change src to where master sends to */
117 range.flags = IP_NAT_RANGE_MAP_IPS;
118 range.min_ip = range.max_ip
119 = ct->master->tuplehash[!exp->dir].tuple.dst.ip;
120 if (exp->dir == IP_CT_DIR_ORIGINAL) {
121 range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
122 range.min = range.max = exp->saved_proto;
123 }
124 /* hook doesn't matter, but it has to do source manip */
125 ip_nat_setup_info(ct, &range, NF_IP_POST_ROUTING);
126
127 /* For DST manip, map port here to where it's expected. */
128 range.flags = IP_NAT_RANGE_MAP_IPS;
129 range.min_ip = range.max_ip
130 = ct->master->tuplehash[!exp->dir].tuple.src.ip;
131 if (exp->dir == IP_CT_DIR_REPLY) {
132 range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
133 range.min = range.max = exp->saved_proto;
134 }
135 /* hook doesn't matter, but it has to do destination manip */
136 ip_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING);
137}
138
139/* outbound packets == from PNS to PAC */
140static int
141pptp_outbound_pkt(struct sk_buff **pskb,
142 struct ip_conntrack *ct,
143 enum ip_conntrack_info ctinfo,
144 struct PptpControlHeader *ctlh,
145 union pptp_ctrl_union *pptpReq)
146
147{
148 struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info;
149 struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
150 u_int16_t msg;
151 __be16 new_callid;
152 unsigned int cid_off;
153
154 new_callid = ct_pptp_info->pns_call_id;
155
156 switch (msg = ntohs(ctlh->messageType)) {
157 case PPTP_OUT_CALL_REQUEST:
158 cid_off = offsetof(union pptp_ctrl_union, ocreq.callID);
159 /* FIXME: ideally we would want to reserve a call ID
160 * here. current netfilter NAT core is not able to do
161 * this :( For now we use TCP source port. This breaks
162 * multiple calls within one control session */
163
164 /* save original call ID in nat_info */
165 nat_pptp_info->pns_call_id = ct_pptp_info->pns_call_id;
166
167 /* don't use tcph->source since we are at a DSTmanip
168 * hook (e.g. PREROUTING) and pkt is not mangled yet */
169 new_callid = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port;
170
171 /* save new call ID in ct info */
172 ct_pptp_info->pns_call_id = new_callid;
173 break;
174 case PPTP_IN_CALL_REPLY:
175 cid_off = offsetof(union pptp_ctrl_union, icack.callID);
176 break;
177 case PPTP_CALL_CLEAR_REQUEST:
178 cid_off = offsetof(union pptp_ctrl_union, clrreq.callID);
179 break;
180 default:
181 DEBUGP("unknown outbound packet 0x%04x:%s\n", msg,
182 (msg <= PPTP_MSG_MAX)?
183 pptp_msg_name[msg]:pptp_msg_name[0]);
184 /* fall through */
185
186 case PPTP_SET_LINK_INFO:
187 /* only need to NAT in case PAC is behind NAT box */
188 case PPTP_START_SESSION_REQUEST:
189 case PPTP_START_SESSION_REPLY:
190 case PPTP_STOP_SESSION_REQUEST:
191 case PPTP_STOP_SESSION_REPLY:
192 case PPTP_ECHO_REQUEST:
193 case PPTP_ECHO_REPLY:
194 /* no need to alter packet */
195 return NF_ACCEPT;
196 }
197
198 /* only OUT_CALL_REQUEST, IN_CALL_REPLY, CALL_CLEAR_REQUEST pass
199 * down to here */
200 DEBUGP("altering call id from 0x%04x to 0x%04x\n",
201 ntohs(REQ_CID(pptpReq, cid_off)), ntohs(new_callid));
202
203 /* mangle packet */
204 if (ip_nat_mangle_tcp_packet(pskb, ct, ctinfo,
205 cid_off + sizeof(struct pptp_pkt_hdr) +
206 sizeof(struct PptpControlHeader),
207 sizeof(new_callid), (char *)&new_callid,
208 sizeof(new_callid)) == 0)
209 return NF_DROP;
210
211 return NF_ACCEPT;
212}
213
214static void
215pptp_exp_gre(struct ip_conntrack_expect *expect_orig,
216 struct ip_conntrack_expect *expect_reply)
217{
218 struct ip_conntrack *ct = expect_orig->master;
219 struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info;
220 struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
221
222 /* save original PAC call ID in nat_info */
223 nat_pptp_info->pac_call_id = ct_pptp_info->pac_call_id;
224
225 /* alter expectation for PNS->PAC direction */
226 expect_orig->saved_proto.gre.key = ct_pptp_info->pns_call_id;
227 expect_orig->tuple.src.u.gre.key = nat_pptp_info->pns_call_id;
228 expect_orig->tuple.dst.u.gre.key = ct_pptp_info->pac_call_id;
229 expect_orig->dir = IP_CT_DIR_ORIGINAL;
230
231 /* alter expectation for PAC->PNS direction */
232 expect_reply->saved_proto.gre.key = nat_pptp_info->pns_call_id;
233 expect_reply->tuple.src.u.gre.key = nat_pptp_info->pac_call_id;
234 expect_reply->tuple.dst.u.gre.key = ct_pptp_info->pns_call_id;
235 expect_reply->dir = IP_CT_DIR_REPLY;
236}
237
238/* inbound packets == from PAC to PNS */
239static int
240pptp_inbound_pkt(struct sk_buff **pskb,
241 struct ip_conntrack *ct,
242 enum ip_conntrack_info ctinfo,
243 struct PptpControlHeader *ctlh,
244 union pptp_ctrl_union *pptpReq)
245{
246 struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
247 u_int16_t msg;
248 __be16 new_pcid;
249 unsigned int pcid_off;
250
251 new_pcid = nat_pptp_info->pns_call_id;
252
253 switch (msg = ntohs(ctlh->messageType)) {
254 case PPTP_OUT_CALL_REPLY:
255 pcid_off = offsetof(union pptp_ctrl_union, ocack.peersCallID);
256 break;
257 case PPTP_IN_CALL_CONNECT:
258 pcid_off = offsetof(union pptp_ctrl_union, iccon.peersCallID);
259 break;
260 case PPTP_IN_CALL_REQUEST:
261 /* only need to nat in case PAC is behind NAT box */
262 return NF_ACCEPT;
263 case PPTP_WAN_ERROR_NOTIFY:
264 pcid_off = offsetof(union pptp_ctrl_union, wanerr.peersCallID);
265 break;
266 case PPTP_CALL_DISCONNECT_NOTIFY:
267 pcid_off = offsetof(union pptp_ctrl_union, disc.callID);
268 break;
269 case PPTP_SET_LINK_INFO:
270 pcid_off = offsetof(union pptp_ctrl_union, setlink.peersCallID);
271 break;
272
273 default:
274 DEBUGP("unknown inbound packet %s\n", (msg <= PPTP_MSG_MAX)?
275 pptp_msg_name[msg]:pptp_msg_name[0]);
276 /* fall through */
277
278 case PPTP_START_SESSION_REQUEST:
279 case PPTP_START_SESSION_REPLY:
280 case PPTP_STOP_SESSION_REQUEST:
281 case PPTP_STOP_SESSION_REPLY:
282 case PPTP_ECHO_REQUEST:
283 case PPTP_ECHO_REPLY:
284 /* no need to alter packet */
285 return NF_ACCEPT;
286 }
287
288 /* only OUT_CALL_REPLY, IN_CALL_CONNECT, IN_CALL_REQUEST,
289 * WAN_ERROR_NOTIFY, CALL_DISCONNECT_NOTIFY pass down here */
290
291 /* mangle packet */
292 DEBUGP("altering peer call id from 0x%04x to 0x%04x\n",
293 ntohs(REQ_CID(pptpReq, pcid_off)), ntohs(new_pcid));
294
295 if (ip_nat_mangle_tcp_packet(pskb, ct, ctinfo,
296 pcid_off + sizeof(struct pptp_pkt_hdr) +
297 sizeof(struct PptpControlHeader),
298 sizeof(new_pcid), (char *)&new_pcid,
299 sizeof(new_pcid)) == 0)
300 return NF_DROP;
301 return NF_ACCEPT;
302}
303
304
305extern int __init ip_nat_proto_gre_init(void);
306extern void __exit ip_nat_proto_gre_fini(void);
307
308static int __init ip_nat_helper_pptp_init(void)
309{
310 int ret;
311
312 DEBUGP("%s: registering NAT helper\n", __FILE__);
313
314 ret = ip_nat_proto_gre_init();
315 if (ret < 0)
316 return ret;
317
318 BUG_ON(rcu_dereference(ip_nat_pptp_hook_outbound));
319 rcu_assign_pointer(ip_nat_pptp_hook_outbound, pptp_outbound_pkt);
320
321 BUG_ON(rcu_dereference(ip_nat_pptp_hook_inbound));
322 rcu_assign_pointer(ip_nat_pptp_hook_inbound, pptp_inbound_pkt);
323
324 BUG_ON(rcu_dereference(ip_nat_pptp_hook_exp_gre));
325 rcu_assign_pointer(ip_nat_pptp_hook_exp_gre, pptp_exp_gre);
326
327 BUG_ON(rcu_dereference(ip_nat_pptp_hook_expectfn));
328 rcu_assign_pointer(ip_nat_pptp_hook_expectfn, pptp_nat_expected);
329
330 printk("ip_nat_pptp version %s loaded\n", IP_NAT_PPTP_VERSION);
331 return 0;
332}
333
334static void __exit ip_nat_helper_pptp_fini(void)
335{
336 DEBUGP("cleanup_module\n" );
337
338 rcu_assign_pointer(ip_nat_pptp_hook_expectfn, NULL);
339 rcu_assign_pointer(ip_nat_pptp_hook_exp_gre, NULL);
340 rcu_assign_pointer(ip_nat_pptp_hook_inbound, NULL);
341 rcu_assign_pointer(ip_nat_pptp_hook_outbound, NULL);
342 synchronize_rcu();
343
344 ip_nat_proto_gre_fini();
345
346 printk("ip_nat_pptp version %s unloaded\n", IP_NAT_PPTP_VERSION);
347}
348
349module_init(ip_nat_helper_pptp_init);
350module_exit(ip_nat_helper_pptp_fini);
diff --git a/net/ipv4/netfilter/ip_nat_irc.c b/net/ipv4/netfilter/ip_nat_irc.c
deleted file mode 100644
index cfaeea38314f..000000000000
--- a/net/ipv4/netfilter/ip_nat_irc.c
+++ /dev/null
@@ -1,122 +0,0 @@
1/* IRC extension for TCP NAT alteration.
2 * (C) 2000-2001 by Harald Welte <laforge@gnumonks.org>
3 * (C) 2004 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
4 * based on a copy of RR's ip_nat_ftp.c
5 *
6 * ip_nat_irc.c,v 1.16 2001/12/06 07:42:10 laforge Exp
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14#include <linux/module.h>
15#include <linux/netfilter_ipv4.h>
16#include <linux/ip.h>
17#include <linux/tcp.h>
18#include <linux/kernel.h>
19#include <net/tcp.h>
20#include <linux/netfilter_ipv4/ip_nat.h>
21#include <linux/netfilter_ipv4/ip_nat_helper.h>
22#include <linux/netfilter_ipv4/ip_nat_rule.h>
23#include <linux/netfilter_ipv4/ip_conntrack_irc.h>
24#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
25#include <linux/moduleparam.h>
26
27#if 0
28#define DEBUGP printk
29#else
30#define DEBUGP(format, args...)
31#endif
32
33MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
34MODULE_DESCRIPTION("IRC (DCC) NAT helper");
35MODULE_LICENSE("GPL");
36
37static unsigned int help(struct sk_buff **pskb,
38 enum ip_conntrack_info ctinfo,
39 unsigned int matchoff,
40 unsigned int matchlen,
41 struct ip_conntrack_expect *exp)
42{
43 u_int16_t port;
44 unsigned int ret;
45
46 /* "4294967296 65635 " */
47 char buffer[18];
48
49 DEBUGP("IRC_NAT: info (seq %u + %u) in %u\n",
50 expect->seq, exp_irc_info->len,
51 ntohl(tcph->seq));
52
53 /* Reply comes from server. */
54 exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
55 exp->dir = IP_CT_DIR_REPLY;
56
57 /* When you see the packet, we need to NAT it the same as the
58 * this one. */
59 exp->expectfn = ip_nat_follow_master;
60
61 /* Try to get same port: if not, try to change it. */
62 for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
63 exp->tuple.dst.u.tcp.port = htons(port);
64 if (ip_conntrack_expect_related(exp) == 0)
65 break;
66 }
67
68 if (port == 0)
69 return NF_DROP;
70
71 /* strlen("\1DCC CHAT chat AAAAAAAA P\1\n")=27
72 * strlen("\1DCC SCHAT chat AAAAAAAA P\1\n")=28
73 * strlen("\1DCC SEND F AAAAAAAA P S\1\n")=26
74 * strlen("\1DCC MOVE F AAAAAAAA P S\1\n")=26
75 * strlen("\1DCC TSEND F AAAAAAAA P S\1\n")=27
76 * AAAAAAAAA: bound addr (1.0.0.0==16777216, min 8 digits,
77 * 255.255.255.255==4294967296, 10 digits)
78 * P: bound port (min 1 d, max 5d (65635))
79 * F: filename (min 1 d )
80 * S: size (min 1 d )
81 * 0x01, \n: terminators
82 */
83
84 /* AAA = "us", ie. where server normally talks to. */
85 sprintf(buffer, "%u %u",
86 ntohl(exp->master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip),
87 port);
88 DEBUGP("ip_nat_irc: Inserting '%s' == %u.%u.%u.%u, port %u\n",
89 buffer, NIPQUAD(exp->tuple.src.ip), port);
90
91 ret = ip_nat_mangle_tcp_packet(pskb, exp->master, ctinfo,
92 matchoff, matchlen, buffer,
93 strlen(buffer));
94 if (ret != NF_ACCEPT)
95 ip_conntrack_unexpect_related(exp);
96 return ret;
97}
98
99static void __exit ip_nat_irc_fini(void)
100{
101 rcu_assign_pointer(ip_nat_irc_hook, NULL);
102 synchronize_rcu();
103}
104
105static int __init ip_nat_irc_init(void)
106{
107 BUG_ON(rcu_dereference(ip_nat_irc_hook));
108 rcu_assign_pointer(ip_nat_irc_hook, help);
109 return 0;
110}
111
112/* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */
113static int warn_set(const char *val, struct kernel_param *kp)
114{
115 printk(KERN_INFO KBUILD_MODNAME
116 ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
117 return 0;
118}
119module_param_call(ports, warn_set, NULL, NULL, 0);
120
121module_init(ip_nat_irc_init);
122module_exit(ip_nat_irc_fini);
diff --git a/net/ipv4/netfilter/ip_nat_proto_gre.c b/net/ipv4/netfilter/ip_nat_proto_gre.c
deleted file mode 100644
index 95810202d849..000000000000
--- a/net/ipv4/netfilter/ip_nat_proto_gre.c
+++ /dev/null
@@ -1,174 +0,0 @@
1/*
2 * ip_nat_proto_gre.c - Version 2.0
3 *
4 * NAT protocol helper module for GRE.
5 *
6 * GRE is a generic encapsulation protocol, which is generally not very
7 * suited for NAT, as it has no protocol-specific part as port numbers.
8 *
9 * It has an optional key field, which may help us distinguishing two
10 * connections between the same two hosts.
11 *
12 * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784
13 *
14 * PPTP is built on top of a modified version of GRE, and has a mandatory
15 * field called "CallID", which serves us for the same purpose as the key
16 * field in plain GRE.
17 *
18 * Documentation about PPTP can be found in RFC 2637
19 *
20 * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
21 *
22 * Development of this code funded by Astaro AG (http://www.astaro.com/)
23 *
24 */
25
26#include <linux/module.h>
27#include <linux/ip.h>
28#include <linux/netfilter_ipv4/ip_nat.h>
29#include <linux/netfilter_ipv4/ip_nat_rule.h>
30#include <linux/netfilter_ipv4/ip_nat_protocol.h>
31#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
32
33MODULE_LICENSE("GPL");
34MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
35MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE");
36
37#if 0
38#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, \
39 __FUNCTION__, ## args)
40#else
41#define DEBUGP(x, args...)
42#endif
43
44/* is key in given range between min and max */
45static int
46gre_in_range(const struct ip_conntrack_tuple *tuple,
47 enum ip_nat_manip_type maniptype,
48 const union ip_conntrack_manip_proto *min,
49 const union ip_conntrack_manip_proto *max)
50{
51 __be16 key;
52
53 if (maniptype == IP_NAT_MANIP_SRC)
54 key = tuple->src.u.gre.key;
55 else
56 key = tuple->dst.u.gre.key;
57
58 return ntohs(key) >= ntohs(min->gre.key)
59 && ntohs(key) <= ntohs(max->gre.key);
60}
61
62/* generate unique tuple ... */
63static int
64gre_unique_tuple(struct ip_conntrack_tuple *tuple,
65 const struct ip_nat_range *range,
66 enum ip_nat_manip_type maniptype,
67 const struct ip_conntrack *conntrack)
68{
69 static u_int16_t key;
70 __be16 *keyptr;
71 unsigned int min, i, range_size;
72
73 if (maniptype == IP_NAT_MANIP_SRC)
74 keyptr = &tuple->src.u.gre.key;
75 else
76 keyptr = &tuple->dst.u.gre.key;
77
78 if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
79 DEBUGP("%p: NATing GRE PPTP\n", conntrack);
80 min = 1;
81 range_size = 0xffff;
82 } else {
83 min = ntohs(range->min.gre.key);
84 range_size = ntohs(range->max.gre.key) - min + 1;
85 }
86
87 DEBUGP("min = %u, range_size = %u\n", min, range_size);
88
89 for (i = 0; i < range_size; i++, key++) {
90 *keyptr = htons(min + key % range_size);
91 if (!ip_nat_used_tuple(tuple, conntrack))
92 return 1;
93 }
94
95 DEBUGP("%p: no NAT mapping\n", conntrack);
96
97 return 0;
98}
99
100/* manipulate a GRE packet according to maniptype */
101static int
102gre_manip_pkt(struct sk_buff **pskb,
103 unsigned int iphdroff,
104 const struct ip_conntrack_tuple *tuple,
105 enum ip_nat_manip_type maniptype)
106{
107 struct gre_hdr *greh;
108 struct gre_hdr_pptp *pgreh;
109 struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
110 unsigned int hdroff = iphdroff + iph->ihl*4;
111
112 /* pgreh includes two optional 32bit fields which are not required
113 * to be there. That's where the magic '8' comes from */
114 if (!skb_make_writable(pskb, hdroff + sizeof(*pgreh)-8))
115 return 0;
116
117 greh = (void *)(*pskb)->data + hdroff;
118 pgreh = (struct gre_hdr_pptp *) greh;
119
120 /* we only have destination manip of a packet, since 'source key'
121 * is not present in the packet itself */
122 if (maniptype == IP_NAT_MANIP_DST) {
123 /* key manipulation is always dest */
124 switch (greh->version) {
125 case 0:
126 if (!greh->key) {
127 DEBUGP("can't nat GRE w/o key\n");
128 break;
129 }
130 if (greh->csum) {
131 /* FIXME: Never tested this code... */
132 nf_proto_csum_replace4(gre_csum(greh), *pskb,
133 *(gre_key(greh)),
134 tuple->dst.u.gre.key, 0);
135 }
136 *(gre_key(greh)) = tuple->dst.u.gre.key;
137 break;
138 case GRE_VERSION_PPTP:
139 DEBUGP("call_id -> 0x%04x\n",
140 ntohs(tuple->dst.u.gre.key));
141 pgreh->call_id = tuple->dst.u.gre.key;
142 break;
143 default:
144 DEBUGP("can't nat unknown GRE version\n");
145 return 0;
146 break;
147 }
148 }
149 return 1;
150}
151
152/* nat helper struct */
153static struct ip_nat_protocol gre = {
154 .name = "GRE",
155 .protonum = IPPROTO_GRE,
156 .manip_pkt = gre_manip_pkt,
157 .in_range = gre_in_range,
158 .unique_tuple = gre_unique_tuple,
159#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
160 defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
161 .range_to_nfattr = ip_nat_port_range_to_nfattr,
162 .nfattr_to_range = ip_nat_port_nfattr_to_range,
163#endif
164};
165
166int __init ip_nat_proto_gre_init(void)
167{
168 return ip_nat_protocol_register(&gre);
169}
170
171void __exit ip_nat_proto_gre_fini(void)
172{
173 ip_nat_protocol_unregister(&gre);
174}
diff --git a/net/ipv4/netfilter/ip_nat_proto_icmp.c b/net/ipv4/netfilter/ip_nat_proto_icmp.c
deleted file mode 100644
index 22a528ae0380..000000000000
--- a/net/ipv4/netfilter/ip_nat_proto_icmp.c
+++ /dev/null
@@ -1,87 +0,0 @@
1/* (C) 1999-2001 Paul `Rusty' Russell
2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#include <linux/types.h>
10#include <linux/init.h>
11#include <linux/netfilter.h>
12#include <linux/ip.h>
13#include <linux/icmp.h>
14#include <linux/if.h>
15
16#include <linux/netfilter_ipv4/ip_nat.h>
17#include <linux/netfilter_ipv4/ip_nat_core.h>
18#include <linux/netfilter_ipv4/ip_nat_rule.h>
19#include <linux/netfilter_ipv4/ip_nat_protocol.h>
20
21static int
22icmp_in_range(const struct ip_conntrack_tuple *tuple,
23 enum ip_nat_manip_type maniptype,
24 const union ip_conntrack_manip_proto *min,
25 const union ip_conntrack_manip_proto *max)
26{
27 return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) &&
28 ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id);
29}
30
31static int
32icmp_unique_tuple(struct ip_conntrack_tuple *tuple,
33 const struct ip_nat_range *range,
34 enum ip_nat_manip_type maniptype,
35 const struct ip_conntrack *conntrack)
36{
37 static u_int16_t id;
38 unsigned int range_size;
39 unsigned int i;
40
41 range_size = ntohs(range->max.icmp.id) - ntohs(range->min.icmp.id) + 1;
42 /* If no range specified... */
43 if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED))
44 range_size = 0xFFFF;
45
46 for (i = 0; i < range_size; i++, id++) {
47 tuple->src.u.icmp.id = htons(ntohs(range->min.icmp.id) +
48 (id % range_size));
49 if (!ip_nat_used_tuple(tuple, conntrack))
50 return 1;
51 }
52 return 0;
53}
54
55static int
56icmp_manip_pkt(struct sk_buff **pskb,
57 unsigned int iphdroff,
58 const struct ip_conntrack_tuple *tuple,
59 enum ip_nat_manip_type maniptype)
60{
61 struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
62 struct icmphdr *hdr;
63 unsigned int hdroff = iphdroff + iph->ihl*4;
64
65 if (!skb_make_writable(pskb, hdroff + sizeof(*hdr)))
66 return 0;
67
68 hdr = (struct icmphdr *)((*pskb)->data + hdroff);
69 nf_proto_csum_replace2(&hdr->checksum, *pskb,
70 hdr->un.echo.id, tuple->src.u.icmp.id, 0);
71 hdr->un.echo.id = tuple->src.u.icmp.id;
72 return 1;
73}
74
75struct ip_nat_protocol ip_nat_protocol_icmp = {
76 .name = "ICMP",
77 .protonum = IPPROTO_ICMP,
78 .me = THIS_MODULE,
79 .manip_pkt = icmp_manip_pkt,
80 .in_range = icmp_in_range,
81 .unique_tuple = icmp_unique_tuple,
82#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
83 defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
84 .range_to_nfattr = ip_nat_port_range_to_nfattr,
85 .nfattr_to_range = ip_nat_port_nfattr_to_range,
86#endif
87};
diff --git a/net/ipv4/netfilter/ip_nat_proto_tcp.c b/net/ipv4/netfilter/ip_nat_proto_tcp.c
deleted file mode 100644
index 14ff24f53a7a..000000000000
--- a/net/ipv4/netfilter/ip_nat_proto_tcp.c
+++ /dev/null
@@ -1,154 +0,0 @@
1/* (C) 1999-2001 Paul `Rusty' Russell
2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#include <linux/types.h>
10#include <linux/init.h>
11#include <linux/random.h>
12#include <linux/netfilter.h>
13#include <linux/ip.h>
14#include <linux/tcp.h>
15#include <linux/if.h>
16#include <linux/netfilter/nfnetlink_conntrack.h>
17#include <linux/netfilter_ipv4/ip_nat.h>
18#include <linux/netfilter_ipv4/ip_nat_rule.h>
19#include <linux/netfilter_ipv4/ip_nat_protocol.h>
20#include <linux/netfilter_ipv4/ip_nat_core.h>
21
22static int
23tcp_in_range(const struct ip_conntrack_tuple *tuple,
24 enum ip_nat_manip_type maniptype,
25 const union ip_conntrack_manip_proto *min,
26 const union ip_conntrack_manip_proto *max)
27{
28 __be16 port;
29
30 if (maniptype == IP_NAT_MANIP_SRC)
31 port = tuple->src.u.tcp.port;
32 else
33 port = tuple->dst.u.tcp.port;
34
35 return ntohs(port) >= ntohs(min->tcp.port)
36 && ntohs(port) <= ntohs(max->tcp.port);
37}
38
39static int
40tcp_unique_tuple(struct ip_conntrack_tuple *tuple,
41 const struct ip_nat_range *range,
42 enum ip_nat_manip_type maniptype,
43 const struct ip_conntrack *conntrack)
44{
45 static u_int16_t port;
46 __be16 *portptr;
47 unsigned int range_size, min, i;
48
49 if (maniptype == IP_NAT_MANIP_SRC)
50 portptr = &tuple->src.u.tcp.port;
51 else
52 portptr = &tuple->dst.u.tcp.port;
53
54 /* If no range specified... */
55 if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
56 /* If it's dst rewrite, can't change port */
57 if (maniptype == IP_NAT_MANIP_DST)
58 return 0;
59
60 /* Map privileged onto privileged. */
61 if (ntohs(*portptr) < 1024) {
62 /* Loose convention: >> 512 is credential passing */
63 if (ntohs(*portptr)<512) {
64 min = 1;
65 range_size = 511 - min + 1;
66 } else {
67 min = 600;
68 range_size = 1023 - min + 1;
69 }
70 } else {
71 min = 1024;
72 range_size = 65535 - 1024 + 1;
73 }
74 } else {
75 min = ntohs(range->min.tcp.port);
76 range_size = ntohs(range->max.tcp.port) - min + 1;
77 }
78
79 /* Start from random port to avoid prediction */
80 if (range->flags & IP_NAT_RANGE_PROTO_RANDOM)
81 port = net_random();
82
83 for (i = 0; i < range_size; i++, port++) {
84 *portptr = htons(min + port % range_size);
85 if (!ip_nat_used_tuple(tuple, conntrack)) {
86 return 1;
87 }
88 }
89 return 0;
90}
91
92static int
93tcp_manip_pkt(struct sk_buff **pskb,
94 unsigned int iphdroff,
95 const struct ip_conntrack_tuple *tuple,
96 enum ip_nat_manip_type maniptype)
97{
98 struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
99 struct tcphdr *hdr;
100 unsigned int hdroff = iphdroff + iph->ihl*4;
101 __be32 oldip, newip;
102 __be16 *portptr, newport, oldport;
103 int hdrsize = 8; /* TCP connection tracking guarantees this much */
104
105 /* this could be a inner header returned in icmp packet; in such
106 cases we cannot update the checksum field since it is outside of
107 the 8 bytes of transport layer headers we are guaranteed */
108 if ((*pskb)->len >= hdroff + sizeof(struct tcphdr))
109 hdrsize = sizeof(struct tcphdr);
110
111 if (!skb_make_writable(pskb, hdroff + hdrsize))
112 return 0;
113
114 iph = (struct iphdr *)((*pskb)->data + iphdroff);
115 hdr = (struct tcphdr *)((*pskb)->data + hdroff);
116
117 if (maniptype == IP_NAT_MANIP_SRC) {
118 /* Get rid of src ip and src pt */
119 oldip = iph->saddr;
120 newip = tuple->src.ip;
121 newport = tuple->src.u.tcp.port;
122 portptr = &hdr->source;
123 } else {
124 /* Get rid of dst ip and dst pt */
125 oldip = iph->daddr;
126 newip = tuple->dst.ip;
127 newport = tuple->dst.u.tcp.port;
128 portptr = &hdr->dest;
129 }
130
131 oldport = *portptr;
132 *portptr = newport;
133
134 if (hdrsize < sizeof(*hdr))
135 return 1;
136
137 nf_proto_csum_replace4(&hdr->check, *pskb, oldip, newip, 1);
138 nf_proto_csum_replace2(&hdr->check, *pskb, oldport, newport, 0);
139 return 1;
140}
141
142struct ip_nat_protocol ip_nat_protocol_tcp = {
143 .name = "TCP",
144 .protonum = IPPROTO_TCP,
145 .me = THIS_MODULE,
146 .manip_pkt = tcp_manip_pkt,
147 .in_range = tcp_in_range,
148 .unique_tuple = tcp_unique_tuple,
149#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
150 defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
151 .range_to_nfattr = ip_nat_port_range_to_nfattr,
152 .nfattr_to_range = ip_nat_port_nfattr_to_range,
153#endif
154};
diff --git a/net/ipv4/netfilter/ip_nat_proto_udp.c b/net/ipv4/netfilter/ip_nat_proto_udp.c
deleted file mode 100644
index dfd521672891..000000000000
--- a/net/ipv4/netfilter/ip_nat_proto_udp.c
+++ /dev/null
@@ -1,144 +0,0 @@
1/* (C) 1999-2001 Paul `Rusty' Russell
2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#include <linux/types.h>
10#include <linux/init.h>
11#include <linux/random.h>
12#include <linux/netfilter.h>
13#include <linux/ip.h>
14#include <linux/udp.h>
15#include <linux/if.h>
16
17#include <linux/netfilter_ipv4/ip_nat.h>
18#include <linux/netfilter_ipv4/ip_nat_core.h>
19#include <linux/netfilter_ipv4/ip_nat_rule.h>
20#include <linux/netfilter_ipv4/ip_nat_protocol.h>
21
22static int
23udp_in_range(const struct ip_conntrack_tuple *tuple,
24 enum ip_nat_manip_type maniptype,
25 const union ip_conntrack_manip_proto *min,
26 const union ip_conntrack_manip_proto *max)
27{
28 __be16 port;
29
30 if (maniptype == IP_NAT_MANIP_SRC)
31 port = tuple->src.u.udp.port;
32 else
33 port = tuple->dst.u.udp.port;
34
35 return ntohs(port) >= ntohs(min->udp.port)
36 && ntohs(port) <= ntohs(max->udp.port);
37}
38
39static int
40udp_unique_tuple(struct ip_conntrack_tuple *tuple,
41 const struct ip_nat_range *range,
42 enum ip_nat_manip_type maniptype,
43 const struct ip_conntrack *conntrack)
44{
45 static u_int16_t port;
46 __be16 *portptr;
47 unsigned int range_size, min, i;
48
49 if (maniptype == IP_NAT_MANIP_SRC)
50 portptr = &tuple->src.u.udp.port;
51 else
52 portptr = &tuple->dst.u.udp.port;
53
54 /* If no range specified... */
55 if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
56 /* If it's dst rewrite, can't change port */
57 if (maniptype == IP_NAT_MANIP_DST)
58 return 0;
59
60 if (ntohs(*portptr) < 1024) {
61 /* Loose convention: >> 512 is credential passing */
62 if (ntohs(*portptr)<512) {
63 min = 1;
64 range_size = 511 - min + 1;
65 } else {
66 min = 600;
67 range_size = 1023 - min + 1;
68 }
69 } else {
70 min = 1024;
71 range_size = 65535 - 1024 + 1;
72 }
73 } else {
74 min = ntohs(range->min.udp.port);
75 range_size = ntohs(range->max.udp.port) - min + 1;
76 }
77
78 /* Start from random port to avoid prediction */
79 if (range->flags & IP_NAT_RANGE_PROTO_RANDOM)
80 port = net_random();
81
82 for (i = 0; i < range_size; i++, port++) {
83 *portptr = htons(min + port % range_size);
84 if (!ip_nat_used_tuple(tuple, conntrack))
85 return 1;
86 }
87 return 0;
88}
89
90static int
91udp_manip_pkt(struct sk_buff **pskb,
92 unsigned int iphdroff,
93 const struct ip_conntrack_tuple *tuple,
94 enum ip_nat_manip_type maniptype)
95{
96 struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
97 struct udphdr *hdr;
98 unsigned int hdroff = iphdroff + iph->ihl*4;
99 __be32 oldip, newip;
100 __be16 *portptr, newport;
101
102 if (!skb_make_writable(pskb, hdroff + sizeof(*hdr)))
103 return 0;
104
105 iph = (struct iphdr *)((*pskb)->data + iphdroff);
106 hdr = (struct udphdr *)((*pskb)->data + hdroff);
107
108 if (maniptype == IP_NAT_MANIP_SRC) {
109 /* Get rid of src ip and src pt */
110 oldip = iph->saddr;
111 newip = tuple->src.ip;
112 newport = tuple->src.u.udp.port;
113 portptr = &hdr->source;
114 } else {
115 /* Get rid of dst ip and dst pt */
116 oldip = iph->daddr;
117 newip = tuple->dst.ip;
118 newport = tuple->dst.u.udp.port;
119 portptr = &hdr->dest;
120 }
121
122 if (hdr->check || (*pskb)->ip_summed == CHECKSUM_PARTIAL) {
123 nf_proto_csum_replace4(&hdr->check, *pskb, oldip, newip, 1);
124 nf_proto_csum_replace2(&hdr->check, *pskb, *portptr, newport, 0);
125 if (!hdr->check)
126 hdr->check = CSUM_MANGLED_0;
127 }
128 *portptr = newport;
129 return 1;
130}
131
132struct ip_nat_protocol ip_nat_protocol_udp = {
133 .name = "UDP",
134 .protonum = IPPROTO_UDP,
135 .me = THIS_MODULE,
136 .manip_pkt = udp_manip_pkt,
137 .in_range = udp_in_range,
138 .unique_tuple = udp_unique_tuple,
139#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
140 defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
141 .range_to_nfattr = ip_nat_port_range_to_nfattr,
142 .nfattr_to_range = ip_nat_port_nfattr_to_range,
143#endif
144};
diff --git a/net/ipv4/netfilter/ip_nat_proto_unknown.c b/net/ipv4/netfilter/ip_nat_proto_unknown.c
deleted file mode 100644
index 3bf049517246..000000000000
--- a/net/ipv4/netfilter/ip_nat_proto_unknown.c
+++ /dev/null
@@ -1,55 +0,0 @@
1/* The "unknown" protocol. This is what is used for protocols we
2 * don't understand. It's returned by ip_ct_find_proto().
3 */
4
5/* (C) 1999-2001 Paul `Rusty' Russell
6 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 */
12
13#include <linux/types.h>
14#include <linux/init.h>
15#include <linux/netfilter.h>
16#include <linux/if.h>
17
18#include <linux/netfilter_ipv4/ip_nat.h>
19#include <linux/netfilter_ipv4/ip_nat_rule.h>
20#include <linux/netfilter_ipv4/ip_nat_protocol.h>
21
22static int unknown_in_range(const struct ip_conntrack_tuple *tuple,
23 enum ip_nat_manip_type manip_type,
24 const union ip_conntrack_manip_proto *min,
25 const union ip_conntrack_manip_proto *max)
26{
27 return 1;
28}
29
30static int unknown_unique_tuple(struct ip_conntrack_tuple *tuple,
31 const struct ip_nat_range *range,
32 enum ip_nat_manip_type maniptype,
33 const struct ip_conntrack *conntrack)
34{
35 /* Sorry: we can't help you; if it's not unique, we can't frob
36 anything. */
37 return 0;
38}
39
40static int
41unknown_manip_pkt(struct sk_buff **pskb,
42 unsigned int iphdroff,
43 const struct ip_conntrack_tuple *tuple,
44 enum ip_nat_manip_type maniptype)
45{
46 return 1;
47}
48
49struct ip_nat_protocol ip_nat_unknown_protocol = {
50 .name = "unknown",
51 /* .me isn't set: getting a ref to this cannot fail. */
52 .manip_pkt = unknown_manip_pkt,
53 .in_range = unknown_in_range,
54 .unique_tuple = unknown_unique_tuple,
55};
diff --git a/net/ipv4/netfilter/ip_nat_rule.c b/net/ipv4/netfilter/ip_nat_rule.c
deleted file mode 100644
index 080eb1d92200..000000000000
--- a/net/ipv4/netfilter/ip_nat_rule.c
+++ /dev/null
@@ -1,314 +0,0 @@
1/* (C) 1999-2001 Paul `Rusty' Russell
2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9/* Everything about the rules for NAT. */
10#include <linux/types.h>
11#include <linux/ip.h>
12#include <linux/netfilter.h>
13#include <linux/netfilter_ipv4.h>
14#include <linux/module.h>
15#include <linux/kmod.h>
16#include <linux/skbuff.h>
17#include <linux/proc_fs.h>
18#include <net/checksum.h>
19#include <net/route.h>
20#include <linux/bitops.h>
21
22#include <linux/netfilter_ipv4/ip_tables.h>
23#include <linux/netfilter_ipv4/ip_nat.h>
24#include <linux/netfilter_ipv4/ip_nat_core.h>
25#include <linux/netfilter_ipv4/ip_nat_rule.h>
26
27#if 0
28#define DEBUGP printk
29#else
30#define DEBUGP(format, args...)
31#endif
32
33#define NAT_VALID_HOOKS ((1<<NF_IP_PRE_ROUTING) | (1<<NF_IP_POST_ROUTING) | (1<<NF_IP_LOCAL_OUT))
34
35static struct
36{
37 struct ipt_replace repl;
38 struct ipt_standard entries[3];
39 struct ipt_error term;
40} nat_initial_table __initdata
41= { { "nat", NAT_VALID_HOOKS, 4,
42 sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error),
43 { [NF_IP_PRE_ROUTING] = 0,
44 [NF_IP_POST_ROUTING] = sizeof(struct ipt_standard),
45 [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 2 },
46 { [NF_IP_PRE_ROUTING] = 0,
47 [NF_IP_POST_ROUTING] = sizeof(struct ipt_standard),
48 [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 2 },
49 0, NULL, { } },
50 {
51 /* PRE_ROUTING */
52 { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 },
53 0,
54 sizeof(struct ipt_entry),
55 sizeof(struct ipt_standard),
56 0, { 0, 0 }, { } },
57 { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } },
58 -NF_ACCEPT - 1 } },
59 /* POST_ROUTING */
60 { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 },
61 0,
62 sizeof(struct ipt_entry),
63 sizeof(struct ipt_standard),
64 0, { 0, 0 }, { } },
65 { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } },
66 -NF_ACCEPT - 1 } },
67 /* LOCAL_OUT */
68 { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 },
69 0,
70 sizeof(struct ipt_entry),
71 sizeof(struct ipt_standard),
72 0, { 0, 0 }, { } },
73 { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } },
74 -NF_ACCEPT - 1 } }
75 },
76 /* ERROR */
77 { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 },
78 0,
79 sizeof(struct ipt_entry),
80 sizeof(struct ipt_error),
81 0, { 0, 0 }, { } },
82 { { { { IPT_ALIGN(sizeof(struct ipt_error_target)), IPT_ERROR_TARGET } },
83 { } },
84 "ERROR"
85 }
86 }
87};
88
89static struct xt_table nat_table = {
90 .name = "nat",
91 .valid_hooks = NAT_VALID_HOOKS,
92 .lock = RW_LOCK_UNLOCKED,
93 .me = THIS_MODULE,
94 .af = AF_INET,
95};
96
97/* Source NAT */
98static unsigned int ipt_snat_target(struct sk_buff **pskb,
99 const struct net_device *in,
100 const struct net_device *out,
101 unsigned int hooknum,
102 const struct xt_target *target,
103 const void *targinfo)
104{
105 struct ip_conntrack *ct;
106 enum ip_conntrack_info ctinfo;
107 const struct ip_nat_multi_range_compat *mr = targinfo;
108
109 IP_NF_ASSERT(hooknum == NF_IP_POST_ROUTING);
110
111 ct = ip_conntrack_get(*pskb, &ctinfo);
112
113 /* Connection must be valid and new. */
114 IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED
115 || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY));
116 IP_NF_ASSERT(out);
117
118 return ip_nat_setup_info(ct, &mr->range[0], hooknum);
119}
120
121/* Before 2.6.11 we did implicit source NAT if required. Warn about change. */
122static void warn_if_extra_mangle(__be32 dstip, __be32 srcip)
123{
124 static int warned = 0;
125 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dstip } } };
126 struct rtable *rt;
127
128 if (ip_route_output_key(&rt, &fl) != 0)
129 return;
130
131 if (rt->rt_src != srcip && !warned) {
132 printk("NAT: no longer support implicit source local NAT\n");
133 printk("NAT: packet src %u.%u.%u.%u -> dst %u.%u.%u.%u\n",
134 NIPQUAD(srcip), NIPQUAD(dstip));
135 warned = 1;
136 }
137 ip_rt_put(rt);
138}
139
140static unsigned int ipt_dnat_target(struct sk_buff **pskb,
141 const struct net_device *in,
142 const struct net_device *out,
143 unsigned int hooknum,
144 const struct xt_target *target,
145 const void *targinfo)
146{
147 struct ip_conntrack *ct;
148 enum ip_conntrack_info ctinfo;
149 const struct ip_nat_multi_range_compat *mr = targinfo;
150
151 IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING
152 || hooknum == NF_IP_LOCAL_OUT);
153
154 ct = ip_conntrack_get(*pskb, &ctinfo);
155
156 /* Connection must be valid and new. */
157 IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
158
159 if (hooknum == NF_IP_LOCAL_OUT
160 && mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)
161 warn_if_extra_mangle((*pskb)->nh.iph->daddr,
162 mr->range[0].min_ip);
163
164 return ip_nat_setup_info(ct, &mr->range[0], hooknum);
165}
166
167static int ipt_snat_checkentry(const char *tablename,
168 const void *entry,
169 const struct xt_target *target,
170 void *targinfo,
171 unsigned int hook_mask)
172{
173 struct ip_nat_multi_range_compat *mr = targinfo;
174
175 /* Must be a valid range */
176 if (mr->rangesize != 1) {
177 printk("SNAT: multiple ranges no longer supported\n");
178 return 0;
179 }
180 return 1;
181}
182
183static int ipt_dnat_checkentry(const char *tablename,
184 const void *entry,
185 const struct xt_target *target,
186 void *targinfo,
187 unsigned int hook_mask)
188{
189 struct ip_nat_multi_range_compat *mr = targinfo;
190
191 /* Must be a valid range */
192 if (mr->rangesize != 1) {
193 printk("DNAT: multiple ranges no longer supported\n");
194 return 0;
195 }
196 if (mr->range[0].flags & IP_NAT_RANGE_PROTO_RANDOM) {
197 printk("DNAT: port randomization not supported\n");
198 return 0;
199 }
200 return 1;
201}
202
203inline unsigned int
204alloc_null_binding(struct ip_conntrack *conntrack,
205 struct ip_nat_info *info,
206 unsigned int hooknum)
207{
208 /* Force range to this IP; let proto decide mapping for
209 per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED).
210 Use reply in case it's already been mangled (eg local packet).
211 */
212 __be32 ip
213 = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC
214 ? conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip
215 : conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip);
216 struct ip_nat_range range
217 = { IP_NAT_RANGE_MAP_IPS, ip, ip, { 0 }, { 0 } };
218
219 DEBUGP("Allocating NULL binding for %p (%u.%u.%u.%u)\n", conntrack,
220 NIPQUAD(ip));
221 return ip_nat_setup_info(conntrack, &range, hooknum);
222}
223
224unsigned int
225alloc_null_binding_confirmed(struct ip_conntrack *conntrack,
226 struct ip_nat_info *info,
227 unsigned int hooknum)
228{
229 __be32 ip
230 = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC
231 ? conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip
232 : conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip);
233 u_int16_t all
234 = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC
235 ? conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.all
236 : conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.src.u.all);
237 struct ip_nat_range range
238 = { IP_NAT_RANGE_MAP_IPS, ip, ip, { all }, { all } };
239
240 DEBUGP("Allocating NULL binding for confirmed %p (%u.%u.%u.%u)\n",
241 conntrack, NIPQUAD(ip));
242 return ip_nat_setup_info(conntrack, &range, hooknum);
243}
244
245int ip_nat_rule_find(struct sk_buff **pskb,
246 unsigned int hooknum,
247 const struct net_device *in,
248 const struct net_device *out,
249 struct ip_conntrack *ct,
250 struct ip_nat_info *info)
251{
252 int ret;
253
254 ret = ipt_do_table(pskb, hooknum, in, out, &nat_table);
255
256 if (ret == NF_ACCEPT) {
257 if (!ip_nat_initialized(ct, HOOK2MANIP(hooknum)))
258 /* NUL mapping */
259 ret = alloc_null_binding(ct, info, hooknum);
260 }
261 return ret;
262}
263
264static struct xt_target ipt_snat_reg = {
265 .name = "SNAT",
266 .family = AF_INET,
267 .target = ipt_snat_target,
268 .targetsize = sizeof(struct ip_nat_multi_range_compat),
269 .table = "nat",
270 .hooks = 1 << NF_IP_POST_ROUTING,
271 .checkentry = ipt_snat_checkentry,
272};
273
274static struct xt_target ipt_dnat_reg = {
275 .name = "DNAT",
276 .family = AF_INET,
277 .target = ipt_dnat_target,
278 .targetsize = sizeof(struct ip_nat_multi_range_compat),
279 .table = "nat",
280 .hooks = (1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_OUT),
281 .checkentry = ipt_dnat_checkentry,
282};
283
284int __init ip_nat_rule_init(void)
285{
286 int ret;
287
288 ret = ipt_register_table(&nat_table, &nat_initial_table.repl);
289 if (ret != 0)
290 return ret;
291 ret = xt_register_target(&ipt_snat_reg);
292 if (ret != 0)
293 goto unregister_table;
294
295 ret = xt_register_target(&ipt_dnat_reg);
296 if (ret != 0)
297 goto unregister_snat;
298
299 return ret;
300
301 unregister_snat:
302 xt_unregister_target(&ipt_snat_reg);
303 unregister_table:
304 xt_unregister_table(&nat_table);
305
306 return ret;
307}
308
309void ip_nat_rule_cleanup(void)
310{
311 xt_unregister_target(&ipt_dnat_reg);
312 xt_unregister_target(&ipt_snat_reg);
313 ipt_unregister_table(&nat_table);
314}
diff --git a/net/ipv4/netfilter/ip_nat_sip.c b/net/ipv4/netfilter/ip_nat_sip.c
deleted file mode 100644
index 325c5a9dc2ef..000000000000
--- a/net/ipv4/netfilter/ip_nat_sip.c
+++ /dev/null
@@ -1,282 +0,0 @@
1/* SIP extension for UDP NAT alteration.
2 *
3 * (C) 2005 by Christian Hentschel <chentschel@arnet.com.ar>
4 * based on RR's ip_nat_ftp.c and other modules.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include <linux/module.h>
12#include <linux/skbuff.h>
13#include <linux/ip.h>
14#include <linux/udp.h>
15
16#include <linux/netfilter_ipv4.h>
17#include <linux/netfilter_ipv4/ip_nat.h>
18#include <linux/netfilter_ipv4/ip_nat_helper.h>
19#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
20#include <linux/netfilter_ipv4/ip_conntrack_sip.h>
21
22MODULE_LICENSE("GPL");
23MODULE_AUTHOR("Christian Hentschel <chentschel@arnet.com.ar>");
24MODULE_DESCRIPTION("SIP NAT helper");
25
26#if 0
27#define DEBUGP printk
28#else
29#define DEBUGP(format, args...)
30#endif
31
32struct addr_map {
33 struct {
34 char src[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
35 char dst[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
36 unsigned int srclen, srciplen;
37 unsigned int dstlen, dstiplen;
38 } addr[IP_CT_DIR_MAX];
39};
40
41static void addr_map_init(struct ip_conntrack *ct, struct addr_map *map)
42{
43 struct ip_conntrack_tuple *t;
44 enum ip_conntrack_dir dir;
45 unsigned int n;
46
47 for (dir = 0; dir < IP_CT_DIR_MAX; dir++) {
48 t = &ct->tuplehash[dir].tuple;
49
50 n = sprintf(map->addr[dir].src, "%u.%u.%u.%u",
51 NIPQUAD(t->src.ip));
52 map->addr[dir].srciplen = n;
53 n += sprintf(map->addr[dir].src + n, ":%u",
54 ntohs(t->src.u.udp.port));
55 map->addr[dir].srclen = n;
56
57 n = sprintf(map->addr[dir].dst, "%u.%u.%u.%u",
58 NIPQUAD(t->dst.ip));
59 map->addr[dir].dstiplen = n;
60 n += sprintf(map->addr[dir].dst + n, ":%u",
61 ntohs(t->dst.u.udp.port));
62 map->addr[dir].dstlen = n;
63 }
64}
65
66static int map_sip_addr(struct sk_buff **pskb, enum ip_conntrack_info ctinfo,
67 struct ip_conntrack *ct, const char **dptr, size_t dlen,
68 enum sip_header_pos pos, struct addr_map *map)
69{
70 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
71 unsigned int matchlen, matchoff, addrlen;
72 char *addr;
73
74 if (ct_sip_get_info(*dptr, dlen, &matchoff, &matchlen, pos) <= 0)
75 return 1;
76
77 if ((matchlen == map->addr[dir].srciplen ||
78 matchlen == map->addr[dir].srclen) &&
79 memcmp(*dptr + matchoff, map->addr[dir].src, matchlen) == 0) {
80 addr = map->addr[!dir].dst;
81 addrlen = map->addr[!dir].dstlen;
82 } else if ((matchlen == map->addr[dir].dstiplen ||
83 matchlen == map->addr[dir].dstlen) &&
84 memcmp(*dptr + matchoff, map->addr[dir].dst, matchlen) == 0) {
85 addr = map->addr[!dir].src;
86 addrlen = map->addr[!dir].srclen;
87 } else
88 return 1;
89
90 if (!ip_nat_mangle_udp_packet(pskb, ct, ctinfo,
91 matchoff, matchlen, addr, addrlen))
92 return 0;
93 *dptr = (*pskb)->data + (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
94 return 1;
95
96}
97
98static unsigned int ip_nat_sip(struct sk_buff **pskb,
99 enum ip_conntrack_info ctinfo,
100 struct ip_conntrack *ct,
101 const char **dptr)
102{
103 enum sip_header_pos pos;
104 struct addr_map map;
105 int dataoff, datalen;
106
107 dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
108 datalen = (*pskb)->len - dataoff;
109 if (datalen < sizeof("SIP/2.0") - 1)
110 return NF_DROP;
111
112 addr_map_init(ct, &map);
113
114 /* Basic rules: requests and responses. */
115 if (strncmp(*dptr, "SIP/2.0", sizeof("SIP/2.0") - 1) != 0) {
116 /* 10.2: Constructing the REGISTER Request:
117 *
118 * The "userinfo" and "@" components of the SIP URI MUST NOT
119 * be present.
120 */
121 if (datalen >= sizeof("REGISTER") - 1 &&
122 strncmp(*dptr, "REGISTER", sizeof("REGISTER") - 1) == 0)
123 pos = POS_REG_REQ_URI;
124 else
125 pos = POS_REQ_URI;
126
127 if (!map_sip_addr(pskb, ctinfo, ct, dptr, datalen, pos, &map))
128 return NF_DROP;
129 }
130
131 if (!map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_FROM, &map) ||
132 !map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_TO, &map) ||
133 !map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_VIA, &map) ||
134 !map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_CONTACT, &map))
135 return NF_DROP;
136 return NF_ACCEPT;
137}
138
139static unsigned int mangle_sip_packet(struct sk_buff **pskb,
140 enum ip_conntrack_info ctinfo,
141 struct ip_conntrack *ct,
142 const char **dptr, size_t dlen,
143 char *buffer, int bufflen,
144 enum sip_header_pos pos)
145{
146 unsigned int matchlen, matchoff;
147
148 if (ct_sip_get_info(*dptr, dlen, &matchoff, &matchlen, pos) <= 0)
149 return 0;
150
151 if (!ip_nat_mangle_udp_packet(pskb, ct, ctinfo,
152 matchoff, matchlen, buffer, bufflen))
153 return 0;
154
155 /* We need to reload this. Thanks Patrick. */
156 *dptr = (*pskb)->data + (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
157 return 1;
158}
159
160static int mangle_content_len(struct sk_buff **pskb,
161 enum ip_conntrack_info ctinfo,
162 struct ip_conntrack *ct,
163 const char *dptr)
164{
165 unsigned int dataoff, matchoff, matchlen;
166 char buffer[sizeof("65536")];
167 int bufflen;
168
169 dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
170
171 /* Get actual SDP lenght */
172 if (ct_sip_get_info(dptr, (*pskb)->len - dataoff, &matchoff,
173 &matchlen, POS_SDP_HEADER) > 0) {
174
175 /* since ct_sip_get_info() give us a pointer passing 'v='
176 we need to add 2 bytes in this count. */
177 int c_len = (*pskb)->len - dataoff - matchoff + 2;
178
179 /* Now, update SDP lenght */
180 if (ct_sip_get_info(dptr, (*pskb)->len - dataoff, &matchoff,
181 &matchlen, POS_CONTENT) > 0) {
182
183 bufflen = sprintf(buffer, "%u", c_len);
184
185 return ip_nat_mangle_udp_packet(pskb, ct, ctinfo,
186 matchoff, matchlen,
187 buffer, bufflen);
188 }
189 }
190 return 0;
191}
192
193static unsigned int mangle_sdp(struct sk_buff **pskb,
194 enum ip_conntrack_info ctinfo,
195 struct ip_conntrack *ct,
196 __be32 newip, u_int16_t port,
197 const char *dptr)
198{
199 char buffer[sizeof("nnn.nnn.nnn.nnn")];
200 unsigned int dataoff, bufflen;
201
202 dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
203
204 /* Mangle owner and contact info. */
205 bufflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(newip));
206 if (!mangle_sip_packet(pskb, ctinfo, ct, &dptr, (*pskb)->len - dataoff,
207 buffer, bufflen, POS_OWNER))
208 return 0;
209
210 if (!mangle_sip_packet(pskb, ctinfo, ct, &dptr, (*pskb)->len - dataoff,
211 buffer, bufflen, POS_CONNECTION))
212 return 0;
213
214 /* Mangle media port. */
215 bufflen = sprintf(buffer, "%u", port);
216 if (!mangle_sip_packet(pskb, ctinfo, ct, &dptr, (*pskb)->len - dataoff,
217 buffer, bufflen, POS_MEDIA))
218 return 0;
219
220 return mangle_content_len(pskb, ctinfo, ct, dptr);
221}
222
223/* So, this packet has hit the connection tracking matching code.
224 Mangle it, and change the expectation to match the new version. */
225static unsigned int ip_nat_sdp(struct sk_buff **pskb,
226 enum ip_conntrack_info ctinfo,
227 struct ip_conntrack_expect *exp,
228 const char *dptr)
229{
230 struct ip_conntrack *ct = exp->master;
231 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
232 __be32 newip;
233 u_int16_t port;
234
235 DEBUGP("ip_nat_sdp():\n");
236
237 /* Connection will come from reply */
238 newip = ct->tuplehash[!dir].tuple.dst.ip;
239
240 exp->tuple.dst.ip = newip;
241 exp->saved_proto.udp.port = exp->tuple.dst.u.udp.port;
242 exp->dir = !dir;
243
244 /* When you see the packet, we need to NAT it the same as the
245 this one. */
246 exp->expectfn = ip_nat_follow_master;
247
248 /* Try to get same port: if not, try to change it. */
249 for (port = ntohs(exp->saved_proto.udp.port); port != 0; port++) {
250 exp->tuple.dst.u.udp.port = htons(port);
251 if (ip_conntrack_expect_related(exp) == 0)
252 break;
253 }
254
255 if (port == 0)
256 return NF_DROP;
257
258 if (!mangle_sdp(pskb, ctinfo, ct, newip, port, dptr)) {
259 ip_conntrack_unexpect_related(exp);
260 return NF_DROP;
261 }
262 return NF_ACCEPT;
263}
264
265static void __exit fini(void)
266{
267 rcu_assign_pointer(ip_nat_sip_hook, NULL);
268 rcu_assign_pointer(ip_nat_sdp_hook, NULL);
269 synchronize_rcu();
270}
271
272static int __init init(void)
273{
274 BUG_ON(rcu_dereference(ip_nat_sip_hook));
275 BUG_ON(rcu_dereference(ip_nat_sdp_hook));
276 rcu_assign_pointer(ip_nat_sip_hook, ip_nat_sip);
277 rcu_assign_pointer(ip_nat_sdp_hook, ip_nat_sdp);
278 return 0;
279}
280
281module_init(init);
282module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_nat_snmp_basic.c b/net/ipv4/netfilter/ip_nat_snmp_basic.c
deleted file mode 100644
index e41d0efae515..000000000000
--- a/net/ipv4/netfilter/ip_nat_snmp_basic.c
+++ /dev/null
@@ -1,1333 +0,0 @@
1/*
2 * ip_nat_snmp_basic.c
3 *
4 * Basic SNMP Application Layer Gateway
5 *
6 * This IP NAT module is intended for use with SNMP network
7 * discovery and monitoring applications where target networks use
8 * conflicting private address realms.
9 *
10 * Static NAT is used to remap the networks from the view of the network
11 * management system at the IP layer, and this module remaps some application
12 * layer addresses to match.
13 *
14 * The simplest form of ALG is performed, where only tagged IP addresses
15 * are modified. The module does not need to be MIB aware and only scans
16 * messages at the ASN.1/BER level.
17 *
18 * Currently, only SNMPv1 and SNMPv2 are supported.
19 *
20 * More information on ALG and associated issues can be found in
21 * RFC 2962
22 *
23 * The ASB.1/BER parsing code is derived from the gxsnmp package by Gregory
24 * McLean & Jochen Friedrich, stripped down for use in the kernel.
25 *
26 * Copyright (c) 2000 RP Internet (www.rpi.net.au).
27 *
28 * This program is free software; you can redistribute it and/or modify
29 * it under the terms of the GNU General Public License as published by
30 * the Free Software Foundation; either version 2 of the License, or
31 * (at your option) any later version.
32 * This program is distributed in the hope that it will be useful,
33 * but WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35 * GNU General Public License for more details.
36 * You should have received a copy of the GNU General Public License
37 * along with this program; if not, write to the Free Software
38 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
39 *
40 * Author: James Morris <jmorris@intercode.com.au>
41 *
42 * Updates:
43 * 2000-08-06: Convert to new helper API (Harald Welte).
44 *
45 */
46#include <linux/in.h>
47#include <linux/module.h>
48#include <linux/types.h>
49#include <linux/kernel.h>
50#include <linux/moduleparam.h>
51#include <linux/netfilter_ipv4.h>
52#include <linux/netfilter_ipv4/ip_nat.h>
53#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
54#include <linux/netfilter_ipv4/ip_nat_helper.h>
55#include <linux/ip.h>
56#include <linux/udp.h>
57#include <net/checksum.h>
58#include <net/udp.h>
59#include <asm/uaccess.h>
60
61MODULE_LICENSE("GPL");
62MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
63MODULE_DESCRIPTION("Basic SNMP Application Layer Gateway");
64
65#define SNMP_PORT 161
66#define SNMP_TRAP_PORT 162
67#define NOCT1(n) (*(u8 *)n)
68
69static int debug;
70static DEFINE_SPINLOCK(snmp_lock);
71
72/*
73 * Application layer address mapping mimics the NAT mapping, but
74 * only for the first octet in this case (a more flexible system
75 * can be implemented if needed).
76 */
77struct oct1_map
78{
79 u_int8_t from;
80 u_int8_t to;
81};
82
83
84/*****************************************************************************
85 *
86 * Basic ASN.1 decoding routines (gxsnmp author Dirk Wisse)
87 *
88 *****************************************************************************/
89
90/* Class */
91#define ASN1_UNI 0 /* Universal */
92#define ASN1_APL 1 /* Application */
93#define ASN1_CTX 2 /* Context */
94#define ASN1_PRV 3 /* Private */
95
96/* Tag */
97#define ASN1_EOC 0 /* End Of Contents */
98#define ASN1_BOL 1 /* Boolean */
99#define ASN1_INT 2 /* Integer */
100#define ASN1_BTS 3 /* Bit String */
101#define ASN1_OTS 4 /* Octet String */
102#define ASN1_NUL 5 /* Null */
103#define ASN1_OJI 6 /* Object Identifier */
104#define ASN1_OJD 7 /* Object Description */
105#define ASN1_EXT 8 /* External */
106#define ASN1_SEQ 16 /* Sequence */
107#define ASN1_SET 17 /* Set */
108#define ASN1_NUMSTR 18 /* Numerical String */
109#define ASN1_PRNSTR 19 /* Printable String */
110#define ASN1_TEXSTR 20 /* Teletext String */
111#define ASN1_VIDSTR 21 /* Video String */
112#define ASN1_IA5STR 22 /* IA5 String */
113#define ASN1_UNITIM 23 /* Universal Time */
114#define ASN1_GENTIM 24 /* General Time */
115#define ASN1_GRASTR 25 /* Graphical String */
116#define ASN1_VISSTR 26 /* Visible String */
117#define ASN1_GENSTR 27 /* General String */
118
119/* Primitive / Constructed methods*/
120#define ASN1_PRI 0 /* Primitive */
121#define ASN1_CON 1 /* Constructed */
122
123/*
124 * Error codes.
125 */
126#define ASN1_ERR_NOERROR 0
127#define ASN1_ERR_DEC_EMPTY 2
128#define ASN1_ERR_DEC_EOC_MISMATCH 3
129#define ASN1_ERR_DEC_LENGTH_MISMATCH 4
130#define ASN1_ERR_DEC_BADVALUE 5
131
132/*
133 * ASN.1 context.
134 */
135struct asn1_ctx
136{
137 int error; /* Error condition */
138 unsigned char *pointer; /* Octet just to be decoded */
139 unsigned char *begin; /* First octet */
140 unsigned char *end; /* Octet after last octet */
141};
142
143/*
144 * Octet string (not null terminated)
145 */
146struct asn1_octstr
147{
148 unsigned char *data;
149 unsigned int len;
150};
151
152static void asn1_open(struct asn1_ctx *ctx,
153 unsigned char *buf,
154 unsigned int len)
155{
156 ctx->begin = buf;
157 ctx->end = buf + len;
158 ctx->pointer = buf;
159 ctx->error = ASN1_ERR_NOERROR;
160}
161
162static unsigned char asn1_octet_decode(struct asn1_ctx *ctx, unsigned char *ch)
163{
164 if (ctx->pointer >= ctx->end) {
165 ctx->error = ASN1_ERR_DEC_EMPTY;
166 return 0;
167 }
168 *ch = *(ctx->pointer)++;
169 return 1;
170}
171
172static unsigned char asn1_tag_decode(struct asn1_ctx *ctx, unsigned int *tag)
173{
174 unsigned char ch;
175
176 *tag = 0;
177
178 do
179 {
180 if (!asn1_octet_decode(ctx, &ch))
181 return 0;
182 *tag <<= 7;
183 *tag |= ch & 0x7F;
184 } while ((ch & 0x80) == 0x80);
185 return 1;
186}
187
188static unsigned char asn1_id_decode(struct asn1_ctx *ctx,
189 unsigned int *cls,
190 unsigned int *con,
191 unsigned int *tag)
192{
193 unsigned char ch;
194
195 if (!asn1_octet_decode(ctx, &ch))
196 return 0;
197
198 *cls = (ch & 0xC0) >> 6;
199 *con = (ch & 0x20) >> 5;
200 *tag = (ch & 0x1F);
201
202 if (*tag == 0x1F) {
203 if (!asn1_tag_decode(ctx, tag))
204 return 0;
205 }
206 return 1;
207}
208
209static unsigned char asn1_length_decode(struct asn1_ctx *ctx,
210 unsigned int *def,
211 unsigned int *len)
212{
213 unsigned char ch, cnt;
214
215 if (!asn1_octet_decode(ctx, &ch))
216 return 0;
217
218 if (ch == 0x80)
219 *def = 0;
220 else {
221 *def = 1;
222
223 if (ch < 0x80)
224 *len = ch;
225 else {
226 cnt = (unsigned char) (ch & 0x7F);
227 *len = 0;
228
229 while (cnt > 0) {
230 if (!asn1_octet_decode(ctx, &ch))
231 return 0;
232 *len <<= 8;
233 *len |= ch;
234 cnt--;
235 }
236 }
237 }
238 return 1;
239}
240
241static unsigned char asn1_header_decode(struct asn1_ctx *ctx,
242 unsigned char **eoc,
243 unsigned int *cls,
244 unsigned int *con,
245 unsigned int *tag)
246{
247 unsigned int def, len;
248
249 if (!asn1_id_decode(ctx, cls, con, tag))
250 return 0;
251
252 def = len = 0;
253 if (!asn1_length_decode(ctx, &def, &len))
254 return 0;
255
256 if (def)
257 *eoc = ctx->pointer + len;
258 else
259 *eoc = NULL;
260 return 1;
261}
262
263static unsigned char asn1_eoc_decode(struct asn1_ctx *ctx, unsigned char *eoc)
264{
265 unsigned char ch;
266
267 if (eoc == 0) {
268 if (!asn1_octet_decode(ctx, &ch))
269 return 0;
270
271 if (ch != 0x00) {
272 ctx->error = ASN1_ERR_DEC_EOC_MISMATCH;
273 return 0;
274 }
275
276 if (!asn1_octet_decode(ctx, &ch))
277 return 0;
278
279 if (ch != 0x00) {
280 ctx->error = ASN1_ERR_DEC_EOC_MISMATCH;
281 return 0;
282 }
283 return 1;
284 } else {
285 if (ctx->pointer != eoc) {
286 ctx->error = ASN1_ERR_DEC_LENGTH_MISMATCH;
287 return 0;
288 }
289 return 1;
290 }
291}
292
293static unsigned char asn1_null_decode(struct asn1_ctx *ctx, unsigned char *eoc)
294{
295 ctx->pointer = eoc;
296 return 1;
297}
298
299static unsigned char asn1_long_decode(struct asn1_ctx *ctx,
300 unsigned char *eoc,
301 long *integer)
302{
303 unsigned char ch;
304 unsigned int len;
305
306 if (!asn1_octet_decode(ctx, &ch))
307 return 0;
308
309 *integer = (signed char) ch;
310 len = 1;
311
312 while (ctx->pointer < eoc) {
313 if (++len > sizeof (long)) {
314 ctx->error = ASN1_ERR_DEC_BADVALUE;
315 return 0;
316 }
317
318 if (!asn1_octet_decode(ctx, &ch))
319 return 0;
320
321 *integer <<= 8;
322 *integer |= ch;
323 }
324 return 1;
325}
326
327static unsigned char asn1_uint_decode(struct asn1_ctx *ctx,
328 unsigned char *eoc,
329 unsigned int *integer)
330{
331 unsigned char ch;
332 unsigned int len;
333
334 if (!asn1_octet_decode(ctx, &ch))
335 return 0;
336
337 *integer = ch;
338 if (ch == 0) len = 0;
339 else len = 1;
340
341 while (ctx->pointer < eoc) {
342 if (++len > sizeof (unsigned int)) {
343 ctx->error = ASN1_ERR_DEC_BADVALUE;
344 return 0;
345 }
346
347 if (!asn1_octet_decode(ctx, &ch))
348 return 0;
349
350 *integer <<= 8;
351 *integer |= ch;
352 }
353 return 1;
354}
355
356static unsigned char asn1_ulong_decode(struct asn1_ctx *ctx,
357 unsigned char *eoc,
358 unsigned long *integer)
359{
360 unsigned char ch;
361 unsigned int len;
362
363 if (!asn1_octet_decode(ctx, &ch))
364 return 0;
365
366 *integer = ch;
367 if (ch == 0) len = 0;
368 else len = 1;
369
370 while (ctx->pointer < eoc) {
371 if (++len > sizeof (unsigned long)) {
372 ctx->error = ASN1_ERR_DEC_BADVALUE;
373 return 0;
374 }
375
376 if (!asn1_octet_decode(ctx, &ch))
377 return 0;
378
379 *integer <<= 8;
380 *integer |= ch;
381 }
382 return 1;
383}
384
385static unsigned char asn1_octets_decode(struct asn1_ctx *ctx,
386 unsigned char *eoc,
387 unsigned char **octets,
388 unsigned int *len)
389{
390 unsigned char *ptr;
391
392 *len = 0;
393
394 *octets = kmalloc(eoc - ctx->pointer, GFP_ATOMIC);
395 if (*octets == NULL) {
396 if (net_ratelimit())
397 printk("OOM in bsalg (%d)\n", __LINE__);
398 return 0;
399 }
400
401 ptr = *octets;
402 while (ctx->pointer < eoc) {
403 if (!asn1_octet_decode(ctx, (unsigned char *)ptr++)) {
404 kfree(*octets);
405 *octets = NULL;
406 return 0;
407 }
408 (*len)++;
409 }
410 return 1;
411}
412
413static unsigned char asn1_subid_decode(struct asn1_ctx *ctx,
414 unsigned long *subid)
415{
416 unsigned char ch;
417
418 *subid = 0;
419
420 do {
421 if (!asn1_octet_decode(ctx, &ch))
422 return 0;
423
424 *subid <<= 7;
425 *subid |= ch & 0x7F;
426 } while ((ch & 0x80) == 0x80);
427 return 1;
428}
429
430static unsigned char asn1_oid_decode(struct asn1_ctx *ctx,
431 unsigned char *eoc,
432 unsigned long **oid,
433 unsigned int *len)
434{
435 unsigned long subid;
436 unsigned int size;
437 unsigned long *optr;
438
439 size = eoc - ctx->pointer + 1;
440 *oid = kmalloc(size * sizeof(unsigned long), GFP_ATOMIC);
441 if (*oid == NULL) {
442 if (net_ratelimit())
443 printk("OOM in bsalg (%d)\n", __LINE__);
444 return 0;
445 }
446
447 optr = *oid;
448
449 if (!asn1_subid_decode(ctx, &subid)) {
450 kfree(*oid);
451 *oid = NULL;
452 return 0;
453 }
454
455 if (subid < 40) {
456 optr [0] = 0;
457 optr [1] = subid;
458 } else if (subid < 80) {
459 optr [0] = 1;
460 optr [1] = subid - 40;
461 } else {
462 optr [0] = 2;
463 optr [1] = subid - 80;
464 }
465
466 *len = 2;
467 optr += 2;
468
469 while (ctx->pointer < eoc) {
470 if (++(*len) > size) {
471 ctx->error = ASN1_ERR_DEC_BADVALUE;
472 kfree(*oid);
473 *oid = NULL;
474 return 0;
475 }
476
477 if (!asn1_subid_decode(ctx, optr++)) {
478 kfree(*oid);
479 *oid = NULL;
480 return 0;
481 }
482 }
483 return 1;
484}
485
486/*****************************************************************************
487 *
488 * SNMP decoding routines (gxsnmp author Dirk Wisse)
489 *
490 *****************************************************************************/
491
492/* SNMP Versions */
493#define SNMP_V1 0
494#define SNMP_V2C 1
495#define SNMP_V2 2
496#define SNMP_V3 3
497
498/* Default Sizes */
499#define SNMP_SIZE_COMM 256
500#define SNMP_SIZE_OBJECTID 128
501#define SNMP_SIZE_BUFCHR 256
502#define SNMP_SIZE_BUFINT 128
503#define SNMP_SIZE_SMALLOBJECTID 16
504
505/* Requests */
506#define SNMP_PDU_GET 0
507#define SNMP_PDU_NEXT 1
508#define SNMP_PDU_RESPONSE 2
509#define SNMP_PDU_SET 3
510#define SNMP_PDU_TRAP1 4
511#define SNMP_PDU_BULK 5
512#define SNMP_PDU_INFORM 6
513#define SNMP_PDU_TRAP2 7
514
515/* Errors */
516#define SNMP_NOERROR 0
517#define SNMP_TOOBIG 1
518#define SNMP_NOSUCHNAME 2
519#define SNMP_BADVALUE 3
520#define SNMP_READONLY 4
521#define SNMP_GENERROR 5
522#define SNMP_NOACCESS 6
523#define SNMP_WRONGTYPE 7
524#define SNMP_WRONGLENGTH 8
525#define SNMP_WRONGENCODING 9
526#define SNMP_WRONGVALUE 10
527#define SNMP_NOCREATION 11
528#define SNMP_INCONSISTENTVALUE 12
529#define SNMP_RESOURCEUNAVAILABLE 13
530#define SNMP_COMMITFAILED 14
531#define SNMP_UNDOFAILED 15
532#define SNMP_AUTHORIZATIONERROR 16
533#define SNMP_NOTWRITABLE 17
534#define SNMP_INCONSISTENTNAME 18
535
536/* General SNMP V1 Traps */
537#define SNMP_TRAP_COLDSTART 0
538#define SNMP_TRAP_WARMSTART 1
539#define SNMP_TRAP_LINKDOWN 2
540#define SNMP_TRAP_LINKUP 3
541#define SNMP_TRAP_AUTFAILURE 4
542#define SNMP_TRAP_EQPNEIGHBORLOSS 5
543#define SNMP_TRAP_ENTSPECIFIC 6
544
545/* SNMPv1 Types */
546#define SNMP_NULL 0
547#define SNMP_INTEGER 1 /* l */
548#define SNMP_OCTETSTR 2 /* c */
549#define SNMP_DISPLAYSTR 2 /* c */
550#define SNMP_OBJECTID 3 /* ul */
551#define SNMP_IPADDR 4 /* uc */
552#define SNMP_COUNTER 5 /* ul */
553#define SNMP_GAUGE 6 /* ul */
554#define SNMP_TIMETICKS 7 /* ul */
555#define SNMP_OPAQUE 8 /* c */
556
557/* Additional SNMPv2 Types */
558#define SNMP_UINTEGER 5 /* ul */
559#define SNMP_BITSTR 9 /* uc */
560#define SNMP_NSAP 10 /* uc */
561#define SNMP_COUNTER64 11 /* ul */
562#define SNMP_NOSUCHOBJECT 12
563#define SNMP_NOSUCHINSTANCE 13
564#define SNMP_ENDOFMIBVIEW 14
565
566union snmp_syntax
567{
568 unsigned char uc[0]; /* 8 bit unsigned */
569 char c[0]; /* 8 bit signed */
570 unsigned long ul[0]; /* 32 bit unsigned */
571 long l[0]; /* 32 bit signed */
572};
573
574struct snmp_object
575{
576 unsigned long *id;
577 unsigned int id_len;
578 unsigned short type;
579 unsigned int syntax_len;
580 union snmp_syntax syntax;
581};
582
583struct snmp_request
584{
585 unsigned long id;
586 unsigned int error_status;
587 unsigned int error_index;
588};
589
590struct snmp_v1_trap
591{
592 unsigned long *id;
593 unsigned int id_len;
594 unsigned long ip_address; /* pointer */
595 unsigned int general;
596 unsigned int specific;
597 unsigned long time;
598};
599
600/* SNMP types */
601#define SNMP_IPA 0
602#define SNMP_CNT 1
603#define SNMP_GGE 2
604#define SNMP_TIT 3
605#define SNMP_OPQ 4
606#define SNMP_C64 6
607
608/* SNMP errors */
609#define SERR_NSO 0
610#define SERR_NSI 1
611#define SERR_EOM 2
612
613static inline void mangle_address(unsigned char *begin,
614 unsigned char *addr,
615 const struct oct1_map *map,
616 __sum16 *check);
617struct snmp_cnv
618{
619 unsigned int class;
620 unsigned int tag;
621 int syntax;
622};
623
624static struct snmp_cnv snmp_conv [] =
625{
626 {ASN1_UNI, ASN1_NUL, SNMP_NULL},
627 {ASN1_UNI, ASN1_INT, SNMP_INTEGER},
628 {ASN1_UNI, ASN1_OTS, SNMP_OCTETSTR},
629 {ASN1_UNI, ASN1_OTS, SNMP_DISPLAYSTR},
630 {ASN1_UNI, ASN1_OJI, SNMP_OBJECTID},
631 {ASN1_APL, SNMP_IPA, SNMP_IPADDR},
632 {ASN1_APL, SNMP_CNT, SNMP_COUNTER}, /* Counter32 */
633 {ASN1_APL, SNMP_GGE, SNMP_GAUGE}, /* Gauge32 == Unsigned32 */
634 {ASN1_APL, SNMP_TIT, SNMP_TIMETICKS},
635 {ASN1_APL, SNMP_OPQ, SNMP_OPAQUE},
636
637 /* SNMPv2 data types and errors */
638 {ASN1_UNI, ASN1_BTS, SNMP_BITSTR},
639 {ASN1_APL, SNMP_C64, SNMP_COUNTER64},
640 {ASN1_CTX, SERR_NSO, SNMP_NOSUCHOBJECT},
641 {ASN1_CTX, SERR_NSI, SNMP_NOSUCHINSTANCE},
642 {ASN1_CTX, SERR_EOM, SNMP_ENDOFMIBVIEW},
643 {0, 0, -1}
644};
645
646static unsigned char snmp_tag_cls2syntax(unsigned int tag,
647 unsigned int cls,
648 unsigned short *syntax)
649{
650 struct snmp_cnv *cnv;
651
652 cnv = snmp_conv;
653
654 while (cnv->syntax != -1) {
655 if (cnv->tag == tag && cnv->class == cls) {
656 *syntax = cnv->syntax;
657 return 1;
658 }
659 cnv++;
660 }
661 return 0;
662}
663
664static unsigned char snmp_object_decode(struct asn1_ctx *ctx,
665 struct snmp_object **obj)
666{
667 unsigned int cls, con, tag, len, idlen;
668 unsigned short type;
669 unsigned char *eoc, *end, *p;
670 unsigned long *lp, *id;
671 unsigned long ul;
672 long l;
673
674 *obj = NULL;
675 id = NULL;
676
677 if (!asn1_header_decode(ctx, &eoc, &cls, &con, &tag))
678 return 0;
679
680 if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ)
681 return 0;
682
683 if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
684 return 0;
685
686 if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_OJI)
687 return 0;
688
689 if (!asn1_oid_decode(ctx, end, &id, &idlen))
690 return 0;
691
692 if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) {
693 kfree(id);
694 return 0;
695 }
696
697 if (con != ASN1_PRI) {
698 kfree(id);
699 return 0;
700 }
701
702 type = 0;
703 if (!snmp_tag_cls2syntax(tag, cls, &type)) {
704 kfree(id);
705 return 0;
706 }
707
708 l = 0;
709 switch (type) {
710 case SNMP_INTEGER:
711 len = sizeof(long);
712 if (!asn1_long_decode(ctx, end, &l)) {
713 kfree(id);
714 return 0;
715 }
716 *obj = kmalloc(sizeof(struct snmp_object) + len,
717 GFP_ATOMIC);
718 if (*obj == NULL) {
719 kfree(id);
720 if (net_ratelimit())
721 printk("OOM in bsalg (%d)\n", __LINE__);
722 return 0;
723 }
724 (*obj)->syntax.l[0] = l;
725 break;
726 case SNMP_OCTETSTR:
727 case SNMP_OPAQUE:
728 if (!asn1_octets_decode(ctx, end, &p, &len)) {
729 kfree(id);
730 return 0;
731 }
732 *obj = kmalloc(sizeof(struct snmp_object) + len,
733 GFP_ATOMIC);
734 if (*obj == NULL) {
735 kfree(id);
736 if (net_ratelimit())
737 printk("OOM in bsalg (%d)\n", __LINE__);
738 return 0;
739 }
740 memcpy((*obj)->syntax.c, p, len);
741 kfree(p);
742 break;
743 case SNMP_NULL:
744 case SNMP_NOSUCHOBJECT:
745 case SNMP_NOSUCHINSTANCE:
746 case SNMP_ENDOFMIBVIEW:
747 len = 0;
748 *obj = kmalloc(sizeof(struct snmp_object), GFP_ATOMIC);
749 if (*obj == NULL) {
750 kfree(id);
751 if (net_ratelimit())
752 printk("OOM in bsalg (%d)\n", __LINE__);
753 return 0;
754 }
755 if (!asn1_null_decode(ctx, end)) {
756 kfree(id);
757 kfree(*obj);
758 *obj = NULL;
759 return 0;
760 }
761 break;
762 case SNMP_OBJECTID:
763 if (!asn1_oid_decode(ctx, end, (unsigned long **)&lp, &len)) {
764 kfree(id);
765 return 0;
766 }
767 len *= sizeof(unsigned long);
768 *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
769 if (*obj == NULL) {
770 kfree(lp);
771 kfree(id);
772 if (net_ratelimit())
773 printk("OOM in bsalg (%d)\n", __LINE__);
774 return 0;
775 }
776 memcpy((*obj)->syntax.ul, lp, len);
777 kfree(lp);
778 break;
779 case SNMP_IPADDR:
780 if (!asn1_octets_decode(ctx, end, &p, &len)) {
781 kfree(id);
782 return 0;
783 }
784 if (len != 4) {
785 kfree(p);
786 kfree(id);
787 return 0;
788 }
789 *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
790 if (*obj == NULL) {
791 kfree(p);
792 kfree(id);
793 if (net_ratelimit())
794 printk("OOM in bsalg (%d)\n", __LINE__);
795 return 0;
796 }
797 memcpy((*obj)->syntax.uc, p, len);
798 kfree(p);
799 break;
800 case SNMP_COUNTER:
801 case SNMP_GAUGE:
802 case SNMP_TIMETICKS:
803 len = sizeof(unsigned long);
804 if (!asn1_ulong_decode(ctx, end, &ul)) {
805 kfree(id);
806 return 0;
807 }
808 *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
809 if (*obj == NULL) {
810 kfree(id);
811 if (net_ratelimit())
812 printk("OOM in bsalg (%d)\n", __LINE__);
813 return 0;
814 }
815 (*obj)->syntax.ul[0] = ul;
816 break;
817 default:
818 kfree(id);
819 return 0;
820 }
821
822 (*obj)->syntax_len = len;
823 (*obj)->type = type;
824 (*obj)->id = id;
825 (*obj)->id_len = idlen;
826
827 if (!asn1_eoc_decode(ctx, eoc)) {
828 kfree(id);
829 kfree(*obj);
830 *obj = NULL;
831 return 0;
832 }
833 return 1;
834}
835
836static unsigned char snmp_request_decode(struct asn1_ctx *ctx,
837 struct snmp_request *request)
838{
839 unsigned int cls, con, tag;
840 unsigned char *end;
841
842 if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
843 return 0;
844
845 if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
846 return 0;
847
848 if (!asn1_ulong_decode(ctx, end, &request->id))
849 return 0;
850
851 if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
852 return 0;
853
854 if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
855 return 0;
856
857 if (!asn1_uint_decode(ctx, end, &request->error_status))
858 return 0;
859
860 if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
861 return 0;
862
863 if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
864 return 0;
865
866 if (!asn1_uint_decode(ctx, end, &request->error_index))
867 return 0;
868
869 return 1;
870}
871
872/*
873 * Fast checksum update for possibly oddly-aligned UDP byte, from the
874 * code example in the draft.
875 */
876static void fast_csum(__sum16 *csum,
877 const unsigned char *optr,
878 const unsigned char *nptr,
879 int offset)
880{
881 unsigned char s[4];
882
883 if (offset & 1) {
884 s[0] = s[2] = 0;
885 s[1] = ~*optr;
886 s[3] = *nptr;
887 } else {
888 s[1] = s[3] = 0;
889 s[0] = ~*optr;
890 s[2] = *nptr;
891 }
892
893 *csum = csum_fold(csum_partial(s, 4, ~csum_unfold(*csum)));
894}
895
896/*
897 * Mangle IP address.
898 * - begin points to the start of the snmp messgae
899 * - addr points to the start of the address
900 */
901static inline void mangle_address(unsigned char *begin,
902 unsigned char *addr,
903 const struct oct1_map *map,
904 __sum16 *check)
905{
906 if (map->from == NOCT1(addr)) {
907 u_int32_t old;
908
909 if (debug)
910 memcpy(&old, (unsigned char *)addr, sizeof(old));
911
912 *addr = map->to;
913
914 /* Update UDP checksum if being used */
915 if (*check) {
916 fast_csum(check,
917 &map->from, &map->to, addr - begin);
918 }
919
920 if (debug)
921 printk(KERN_DEBUG "bsalg: mapped %u.%u.%u.%u to "
922 "%u.%u.%u.%u\n", NIPQUAD(old), NIPQUAD(*addr));
923 }
924}
925
926static unsigned char snmp_trap_decode(struct asn1_ctx *ctx,
927 struct snmp_v1_trap *trap,
928 const struct oct1_map *map,
929 __sum16 *check)
930{
931 unsigned int cls, con, tag, len;
932 unsigned char *end;
933
934 if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
935 return 0;
936
937 if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_OJI)
938 return 0;
939
940 if (!asn1_oid_decode(ctx, end, &trap->id, &trap->id_len))
941 return 0;
942
943 if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
944 goto err_id_free;
945
946 if (!((cls == ASN1_APL && con == ASN1_PRI && tag == SNMP_IPA) ||
947 (cls == ASN1_UNI && con == ASN1_PRI && tag == ASN1_OTS)))
948 goto err_id_free;
949
950 if (!asn1_octets_decode(ctx, end, (unsigned char **)&trap->ip_address, &len))
951 goto err_id_free;
952
953 /* IPv4 only */
954 if (len != 4)
955 goto err_addr_free;
956
957 mangle_address(ctx->begin, ctx->pointer - 4, map, check);
958
959 if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
960 goto err_addr_free;
961
962 if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
963 goto err_addr_free;
964
965 if (!asn1_uint_decode(ctx, end, &trap->general))
966 goto err_addr_free;
967
968 if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
969 goto err_addr_free;
970
971 if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
972 goto err_addr_free;
973
974 if (!asn1_uint_decode(ctx, end, &trap->specific))
975 goto err_addr_free;
976
977 if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
978 goto err_addr_free;
979
980 if (!((cls == ASN1_APL && con == ASN1_PRI && tag == SNMP_TIT) ||
981 (cls == ASN1_UNI && con == ASN1_PRI && tag == ASN1_INT)))
982 goto err_addr_free;
983
984 if (!asn1_ulong_decode(ctx, end, &trap->time))
985 goto err_addr_free;
986
987 return 1;
988
989err_addr_free:
990 kfree((unsigned long *)trap->ip_address);
991
992err_id_free:
993 kfree(trap->id);
994
995 return 0;
996}
997
998/*****************************************************************************
999 *
1000 * Misc. routines
1001 *
1002 *****************************************************************************/
1003
1004static void hex_dump(unsigned char *buf, size_t len)
1005{
1006 size_t i;
1007
1008 for (i = 0; i < len; i++) {
1009 if (i && !(i % 16))
1010 printk("\n");
1011 printk("%02x ", *(buf + i));
1012 }
1013 printk("\n");
1014}
1015
1016/*
1017 * Parse and mangle SNMP message according to mapping.
1018 * (And this is the fucking 'basic' method).
1019 */
1020static int snmp_parse_mangle(unsigned char *msg,
1021 u_int16_t len,
1022 const struct oct1_map *map,
1023 __sum16 *check)
1024{
1025 unsigned char *eoc, *end;
1026 unsigned int cls, con, tag, vers, pdutype;
1027 struct asn1_ctx ctx;
1028 struct asn1_octstr comm;
1029 struct snmp_object **obj;
1030
1031 if (debug > 1)
1032 hex_dump(msg, len);
1033
1034 asn1_open(&ctx, msg, len);
1035
1036 /*
1037 * Start of SNMP message.
1038 */
1039 if (!asn1_header_decode(&ctx, &eoc, &cls, &con, &tag))
1040 return 0;
1041 if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ)
1042 return 0;
1043
1044 /*
1045 * Version 1 or 2 handled.
1046 */
1047 if (!asn1_header_decode(&ctx, &end, &cls, &con, &tag))
1048 return 0;
1049 if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
1050 return 0;
1051 if (!asn1_uint_decode (&ctx, end, &vers))
1052 return 0;
1053 if (debug > 1)
1054 printk(KERN_DEBUG "bsalg: snmp version: %u\n", vers + 1);
1055 if (vers > 1)
1056 return 1;
1057
1058 /*
1059 * Community.
1060 */
1061 if (!asn1_header_decode (&ctx, &end, &cls, &con, &tag))
1062 return 0;
1063 if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_OTS)
1064 return 0;
1065 if (!asn1_octets_decode(&ctx, end, &comm.data, &comm.len))
1066 return 0;
1067 if (debug > 1) {
1068 unsigned int i;
1069
1070 printk(KERN_DEBUG "bsalg: community: ");
1071 for (i = 0; i < comm.len; i++)
1072 printk("%c", comm.data[i]);
1073 printk("\n");
1074 }
1075 kfree(comm.data);
1076
1077 /*
1078 * PDU type
1079 */
1080 if (!asn1_header_decode(&ctx, &eoc, &cls, &con, &pdutype))
1081 return 0;
1082 if (cls != ASN1_CTX || con != ASN1_CON)
1083 return 0;
1084 if (debug > 1) {
1085 unsigned char *pdus[] = {
1086 [SNMP_PDU_GET] = "get",
1087 [SNMP_PDU_NEXT] = "get-next",
1088 [SNMP_PDU_RESPONSE] = "response",
1089 [SNMP_PDU_SET] = "set",
1090 [SNMP_PDU_TRAP1] = "trapv1",
1091 [SNMP_PDU_BULK] = "bulk",
1092 [SNMP_PDU_INFORM] = "inform",
1093 [SNMP_PDU_TRAP2] = "trapv2"
1094 };
1095
1096 if (pdutype > SNMP_PDU_TRAP2)
1097 printk(KERN_DEBUG "bsalg: bad pdu type %u\n", pdutype);
1098 else
1099 printk(KERN_DEBUG "bsalg: pdu: %s\n", pdus[pdutype]);
1100 }
1101 if (pdutype != SNMP_PDU_RESPONSE &&
1102 pdutype != SNMP_PDU_TRAP1 && pdutype != SNMP_PDU_TRAP2)
1103 return 1;
1104
1105 /*
1106 * Request header or v1 trap
1107 */
1108 if (pdutype == SNMP_PDU_TRAP1) {
1109 struct snmp_v1_trap trap;
1110 unsigned char ret = snmp_trap_decode(&ctx, &trap, map, check);
1111
1112 if (ret) {
1113 kfree(trap.id);
1114 kfree((unsigned long *)trap.ip_address);
1115 } else
1116 return ret;
1117
1118 } else {
1119 struct snmp_request req;
1120
1121 if (!snmp_request_decode(&ctx, &req))
1122 return 0;
1123
1124 if (debug > 1)
1125 printk(KERN_DEBUG "bsalg: request: id=0x%lx error_status=%u "
1126 "error_index=%u\n", req.id, req.error_status,
1127 req.error_index);
1128 }
1129
1130 /*
1131 * Loop through objects, look for IP addresses to mangle.
1132 */
1133 if (!asn1_header_decode(&ctx, &eoc, &cls, &con, &tag))
1134 return 0;
1135
1136 if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ)
1137 return 0;
1138
1139 obj = kmalloc(sizeof(struct snmp_object), GFP_ATOMIC);
1140 if (obj == NULL) {
1141 if (net_ratelimit())
1142 printk(KERN_WARNING "OOM in bsalg(%d)\n", __LINE__);
1143 return 0;
1144 }
1145
1146 while (!asn1_eoc_decode(&ctx, eoc)) {
1147 unsigned int i;
1148
1149 if (!snmp_object_decode(&ctx, obj)) {
1150 if (*obj) {
1151 kfree((*obj)->id);
1152 kfree(*obj);
1153 }
1154 kfree(obj);
1155 return 0;
1156 }
1157
1158 if (debug > 1) {
1159 printk(KERN_DEBUG "bsalg: object: ");
1160 for (i = 0; i < (*obj)->id_len; i++) {
1161 if (i > 0)
1162 printk(".");
1163 printk("%lu", (*obj)->id[i]);
1164 }
1165 printk(": type=%u\n", (*obj)->type);
1166
1167 }
1168
1169 if ((*obj)->type == SNMP_IPADDR)
1170 mangle_address(ctx.begin, ctx.pointer - 4 , map, check);
1171
1172 kfree((*obj)->id);
1173 kfree(*obj);
1174 }
1175 kfree(obj);
1176
1177 if (!asn1_eoc_decode(&ctx, eoc))
1178 return 0;
1179
1180 return 1;
1181}
1182
1183/*****************************************************************************
1184 *
1185 * NAT routines.
1186 *
1187 *****************************************************************************/
1188
1189/*
1190 * SNMP translation routine.
1191 */
1192static int snmp_translate(struct ip_conntrack *ct,
1193 enum ip_conntrack_info ctinfo,
1194 struct sk_buff **pskb)
1195{
1196 struct iphdr *iph = (*pskb)->nh.iph;
1197 struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl);
1198 u_int16_t udplen = ntohs(udph->len);
1199 u_int16_t paylen = udplen - sizeof(struct udphdr);
1200 int dir = CTINFO2DIR(ctinfo);
1201 struct oct1_map map;
1202
1203 /*
1204 * Determine mappping for application layer addresses based
1205 * on NAT manipulations for the packet.
1206 */
1207 if (dir == IP_CT_DIR_ORIGINAL) {
1208 /* SNAT traps */
1209 map.from = NOCT1(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip);
1210 map.to = NOCT1(&ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip);
1211 } else {
1212 /* DNAT replies */
1213 map.from = NOCT1(&ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip);
1214 map.to = NOCT1(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip);
1215 }
1216
1217 if (map.from == map.to)
1218 return NF_ACCEPT;
1219
1220 if (!snmp_parse_mangle((unsigned char *)udph + sizeof(struct udphdr),
1221 paylen, &map, &udph->check)) {
1222 if (net_ratelimit())
1223 printk(KERN_WARNING "bsalg: parser failed\n");
1224 return NF_DROP;
1225 }
1226 return NF_ACCEPT;
1227}
1228
1229/* We don't actually set up expectations, just adjust internal IP
1230 * addresses if this is being NATted */
1231static int help(struct sk_buff **pskb,
1232 struct ip_conntrack *ct,
1233 enum ip_conntrack_info ctinfo)
1234{
1235 int dir = CTINFO2DIR(ctinfo);
1236 unsigned int ret;
1237 struct iphdr *iph = (*pskb)->nh.iph;
1238 struct udphdr *udph = (struct udphdr *)((u_int32_t *)iph + iph->ihl);
1239
1240 /* SNMP replies and originating SNMP traps get mangled */
1241 if (udph->source == htons(SNMP_PORT) && dir != IP_CT_DIR_REPLY)
1242 return NF_ACCEPT;
1243 if (udph->dest == htons(SNMP_TRAP_PORT) && dir != IP_CT_DIR_ORIGINAL)
1244 return NF_ACCEPT;
1245
1246 /* No NAT? */
1247 if (!(ct->status & IPS_NAT_MASK))
1248 return NF_ACCEPT;
1249
1250 /*
1251 * Make sure the packet length is ok. So far, we were only guaranteed
1252 * to have a valid length IP header plus 8 bytes, which means we have
1253 * enough room for a UDP header. Just verify the UDP length field so we
1254 * can mess around with the payload.
1255 */
1256 if (ntohs(udph->len) != (*pskb)->len - (iph->ihl << 2)) {
1257 if (net_ratelimit())
1258 printk(KERN_WARNING "SNMP: dropping malformed packet "
1259 "src=%u.%u.%u.%u dst=%u.%u.%u.%u\n",
1260 NIPQUAD(iph->saddr), NIPQUAD(iph->daddr));
1261 return NF_DROP;
1262 }
1263
1264 if (!skb_make_writable(pskb, (*pskb)->len))
1265 return NF_DROP;
1266
1267 spin_lock_bh(&snmp_lock);
1268 ret = snmp_translate(ct, ctinfo, pskb);
1269 spin_unlock_bh(&snmp_lock);
1270 return ret;
1271}
1272
1273static struct ip_conntrack_helper snmp_helper = {
1274 .max_expected = 0,
1275 .timeout = 180,
1276 .me = THIS_MODULE,
1277 .help = help,
1278 .name = "snmp",
1279
1280 .tuple = {.src = {.u = {.udp = {.port = __constant_htons(SNMP_PORT)}}},
1281 .dst = {.protonum = IPPROTO_UDP},
1282 },
1283 .mask = {.src = {.u = {0xFFFF}},
1284 .dst = {.protonum = 0xFF},
1285 },
1286};
1287
1288static struct ip_conntrack_helper snmp_trap_helper = {
1289 .max_expected = 0,
1290 .timeout = 180,
1291 .me = THIS_MODULE,
1292 .help = help,
1293 .name = "snmp_trap",
1294
1295 .tuple = {.src = {.u = {.udp = {.port = __constant_htons(SNMP_TRAP_PORT)}}},
1296 .dst = {.protonum = IPPROTO_UDP},
1297 },
1298 .mask = {.src = {.u = {0xFFFF}},
1299 .dst = {.protonum = 0xFF},
1300 },
1301};
1302
1303/*****************************************************************************
1304 *
1305 * Module stuff.
1306 *
1307 *****************************************************************************/
1308
1309static int __init ip_nat_snmp_basic_init(void)
1310{
1311 int ret = 0;
1312
1313 ret = ip_conntrack_helper_register(&snmp_helper);
1314 if (ret < 0)
1315 return ret;
1316 ret = ip_conntrack_helper_register(&snmp_trap_helper);
1317 if (ret < 0) {
1318 ip_conntrack_helper_unregister(&snmp_helper);
1319 return ret;
1320 }
1321 return ret;
1322}
1323
1324static void __exit ip_nat_snmp_basic_fini(void)
1325{
1326 ip_conntrack_helper_unregister(&snmp_helper);
1327 ip_conntrack_helper_unregister(&snmp_trap_helper);
1328}
1329
1330module_init(ip_nat_snmp_basic_init);
1331module_exit(ip_nat_snmp_basic_fini);
1332
1333module_param(debug, int, 0600);
diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c
deleted file mode 100644
index 6bcfdf6dfcc9..000000000000
--- a/net/ipv4/netfilter/ip_nat_standalone.c
+++ /dev/null
@@ -1,388 +0,0 @@
1/* This file contains all the functions required for the standalone
2 ip_nat module.
3
4 These are not required by the compatibility layer.
5*/
6
7/* (C) 1999-2001 Paul `Rusty' Russell
8 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2 as
12 * published by the Free Software Foundation.
13 */
14
15/*
16 * 23 Apr 2001: Harald Welte <laforge@gnumonks.org>
17 * - new API and handling of conntrack/nat helpers
18 * - now capable of multiple expectations for one master
19 * */
20
21#include <linux/types.h>
22#include <linux/icmp.h>
23#include <linux/ip.h>
24#include <linux/netfilter.h>
25#include <linux/netfilter_ipv4.h>
26#include <linux/module.h>
27#include <linux/skbuff.h>
28#include <linux/proc_fs.h>
29#include <net/ip.h>
30#include <net/checksum.h>
31#include <linux/spinlock.h>
32
33#include <linux/netfilter_ipv4/ip_nat.h>
34#include <linux/netfilter_ipv4/ip_nat_rule.h>
35#include <linux/netfilter_ipv4/ip_nat_protocol.h>
36#include <linux/netfilter_ipv4/ip_nat_core.h>
37#include <linux/netfilter_ipv4/ip_nat_helper.h>
38#include <linux/netfilter_ipv4/ip_tables.h>
39#include <linux/netfilter_ipv4/ip_conntrack_core.h>
40
41#if 0
42#define DEBUGP printk
43#else
44#define DEBUGP(format, args...)
45#endif
46
47#ifdef CONFIG_XFRM
48static void nat_decode_session(struct sk_buff *skb, struct flowi *fl)
49{
50 struct ip_conntrack *ct;
51 struct ip_conntrack_tuple *t;
52 enum ip_conntrack_info ctinfo;
53 enum ip_conntrack_dir dir;
54 unsigned long statusbit;
55
56 ct = ip_conntrack_get(skb, &ctinfo);
57 if (ct == NULL)
58 return;
59 dir = CTINFO2DIR(ctinfo);
60 t = &ct->tuplehash[dir].tuple;
61
62 if (dir == IP_CT_DIR_ORIGINAL)
63 statusbit = IPS_DST_NAT;
64 else
65 statusbit = IPS_SRC_NAT;
66
67 if (ct->status & statusbit) {
68 fl->fl4_dst = t->dst.ip;
69 if (t->dst.protonum == IPPROTO_TCP ||
70 t->dst.protonum == IPPROTO_UDP)
71 fl->fl_ip_dport = t->dst.u.tcp.port;
72 }
73
74 statusbit ^= IPS_NAT_MASK;
75
76 if (ct->status & statusbit) {
77 fl->fl4_src = t->src.ip;
78 if (t->dst.protonum == IPPROTO_TCP ||
79 t->dst.protonum == IPPROTO_UDP)
80 fl->fl_ip_sport = t->src.u.tcp.port;
81 }
82}
83#endif
84
85static unsigned int
86ip_nat_fn(unsigned int hooknum,
87 struct sk_buff **pskb,
88 const struct net_device *in,
89 const struct net_device *out,
90 int (*okfn)(struct sk_buff *))
91{
92 struct ip_conntrack *ct;
93 enum ip_conntrack_info ctinfo;
94 struct ip_nat_info *info;
95 /* maniptype == SRC for postrouting. */
96 enum ip_nat_manip_type maniptype = HOOK2MANIP(hooknum);
97
98 /* We never see fragments: conntrack defrags on pre-routing
99 and local-out, and ip_nat_out protects post-routing. */
100 IP_NF_ASSERT(!((*pskb)->nh.iph->frag_off
101 & htons(IP_MF|IP_OFFSET)));
102
103 ct = ip_conntrack_get(*pskb, &ctinfo);
104 /* Can't track? It's not due to stress, or conntrack would
105 have dropped it. Hence it's the user's responsibilty to
106 packet filter it out, or implement conntrack/NAT for that
107 protocol. 8) --RR */
108 if (!ct) {
109 /* Exception: ICMP redirect to new connection (not in
110 hash table yet). We must not let this through, in
111 case we're doing NAT to the same network. */
112 if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) {
113 struct icmphdr _hdr, *hp;
114
115 hp = skb_header_pointer(*pskb,
116 (*pskb)->nh.iph->ihl*4,
117 sizeof(_hdr), &_hdr);
118 if (hp != NULL &&
119 hp->type == ICMP_REDIRECT)
120 return NF_DROP;
121 }
122 return NF_ACCEPT;
123 }
124
125 /* Don't try to NAT if this packet is not conntracked */
126 if (ct == &ip_conntrack_untracked)
127 return NF_ACCEPT;
128
129 switch (ctinfo) {
130 case IP_CT_RELATED:
131 case IP_CT_RELATED+IP_CT_IS_REPLY:
132 if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) {
133 if (!ip_nat_icmp_reply_translation(ct, ctinfo,
134 hooknum, pskb))
135 return NF_DROP;
136 else
137 return NF_ACCEPT;
138 }
139 /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
140 case IP_CT_NEW:
141 info = &ct->nat.info;
142
143 /* Seen it before? This can happen for loopback, retrans,
144 or local packets.. */
145 if (!ip_nat_initialized(ct, maniptype)) {
146 unsigned int ret;
147
148 if (unlikely(is_confirmed(ct)))
149 /* NAT module was loaded late */
150 ret = alloc_null_binding_confirmed(ct, info,
151 hooknum);
152 else if (hooknum == NF_IP_LOCAL_IN)
153 /* LOCAL_IN hook doesn't have a chain! */
154 ret = alloc_null_binding(ct, info, hooknum);
155 else
156 ret = ip_nat_rule_find(pskb, hooknum,
157 in, out, ct,
158 info);
159
160 if (ret != NF_ACCEPT) {
161 return ret;
162 }
163 } else
164 DEBUGP("Already setup manip %s for ct %p\n",
165 maniptype == IP_NAT_MANIP_SRC ? "SRC" : "DST",
166 ct);
167 break;
168
169 default:
170 /* ESTABLISHED */
171 IP_NF_ASSERT(ctinfo == IP_CT_ESTABLISHED
172 || ctinfo == (IP_CT_ESTABLISHED+IP_CT_IS_REPLY));
173 info = &ct->nat.info;
174 }
175
176 IP_NF_ASSERT(info);
177 return ip_nat_packet(ct, ctinfo, hooknum, pskb);
178}
179
180static unsigned int
181ip_nat_in(unsigned int hooknum,
182 struct sk_buff **pskb,
183 const struct net_device *in,
184 const struct net_device *out,
185 int (*okfn)(struct sk_buff *))
186{
187 unsigned int ret;
188 __be32 daddr = (*pskb)->nh.iph->daddr;
189
190 ret = ip_nat_fn(hooknum, pskb, in, out, okfn);
191 if (ret != NF_DROP && ret != NF_STOLEN
192 && daddr != (*pskb)->nh.iph->daddr) {
193 dst_release((*pskb)->dst);
194 (*pskb)->dst = NULL;
195 }
196 return ret;
197}
198
199static unsigned int
200ip_nat_out(unsigned int hooknum,
201 struct sk_buff **pskb,
202 const struct net_device *in,
203 const struct net_device *out,
204 int (*okfn)(struct sk_buff *))
205{
206#ifdef CONFIG_XFRM
207 struct ip_conntrack *ct;
208 enum ip_conntrack_info ctinfo;
209#endif
210 unsigned int ret;
211
212 /* root is playing with raw sockets. */
213 if ((*pskb)->len < sizeof(struct iphdr)
214 || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr))
215 return NF_ACCEPT;
216
217 ret = ip_nat_fn(hooknum, pskb, in, out, okfn);
218#ifdef CONFIG_XFRM
219 if (ret != NF_DROP && ret != NF_STOLEN
220 && (ct = ip_conntrack_get(*pskb, &ctinfo)) != NULL) {
221 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
222
223 if (ct->tuplehash[dir].tuple.src.ip !=
224 ct->tuplehash[!dir].tuple.dst.ip
225 || ct->tuplehash[dir].tuple.src.u.all !=
226 ct->tuplehash[!dir].tuple.dst.u.all
227 )
228 return ip_xfrm_me_harder(pskb) == 0 ? ret : NF_DROP;
229 }
230#endif
231 return ret;
232}
233
234static unsigned int
235ip_nat_local_fn(unsigned int hooknum,
236 struct sk_buff **pskb,
237 const struct net_device *in,
238 const struct net_device *out,
239 int (*okfn)(struct sk_buff *))
240{
241 struct ip_conntrack *ct;
242 enum ip_conntrack_info ctinfo;
243 unsigned int ret;
244
245 /* root is playing with raw sockets. */
246 if ((*pskb)->len < sizeof(struct iphdr)
247 || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr))
248 return NF_ACCEPT;
249
250 ret = ip_nat_fn(hooknum, pskb, in, out, okfn);
251 if (ret != NF_DROP && ret != NF_STOLEN
252 && (ct = ip_conntrack_get(*pskb, &ctinfo)) != NULL) {
253 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
254
255 if (ct->tuplehash[dir].tuple.dst.ip !=
256 ct->tuplehash[!dir].tuple.src.ip) {
257 if (ip_route_me_harder(pskb, RTN_UNSPEC))
258 ret = NF_DROP;
259 }
260#ifdef CONFIG_XFRM
261 else if (ct->tuplehash[dir].tuple.dst.u.all !=
262 ct->tuplehash[!dir].tuple.src.u.all)
263 if (ip_xfrm_me_harder(pskb))
264 ret = NF_DROP;
265#endif
266
267 }
268 return ret;
269}
270
271static unsigned int
272ip_nat_adjust(unsigned int hooknum,
273 struct sk_buff **pskb,
274 const struct net_device *in,
275 const struct net_device *out,
276 int (*okfn)(struct sk_buff *))
277{
278 struct ip_conntrack *ct;
279 enum ip_conntrack_info ctinfo;
280
281 ct = ip_conntrack_get(*pskb, &ctinfo);
282 if (ct && test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) {
283 DEBUGP("ip_nat_standalone: adjusting sequence number\n");
284 if (!ip_nat_seq_adjust(pskb, ct, ctinfo))
285 return NF_DROP;
286 }
287 return NF_ACCEPT;
288}
289
290/* We must be after connection tracking and before packet filtering. */
291
292static struct nf_hook_ops ip_nat_ops[] = {
293 /* Before packet filtering, change destination */
294 {
295 .hook = ip_nat_in,
296 .owner = THIS_MODULE,
297 .pf = PF_INET,
298 .hooknum = NF_IP_PRE_ROUTING,
299 .priority = NF_IP_PRI_NAT_DST,
300 },
301 /* After packet filtering, change source */
302 {
303 .hook = ip_nat_out,
304 .owner = THIS_MODULE,
305 .pf = PF_INET,
306 .hooknum = NF_IP_POST_ROUTING,
307 .priority = NF_IP_PRI_NAT_SRC,
308 },
309 /* After conntrack, adjust sequence number */
310 {
311 .hook = ip_nat_adjust,
312 .owner = THIS_MODULE,
313 .pf = PF_INET,
314 .hooknum = NF_IP_POST_ROUTING,
315 .priority = NF_IP_PRI_NAT_SEQ_ADJUST,
316 },
317 /* Before packet filtering, change destination */
318 {
319 .hook = ip_nat_local_fn,
320 .owner = THIS_MODULE,
321 .pf = PF_INET,
322 .hooknum = NF_IP_LOCAL_OUT,
323 .priority = NF_IP_PRI_NAT_DST,
324 },
325 /* After packet filtering, change source */
326 {
327 .hook = ip_nat_fn,
328 .owner = THIS_MODULE,
329 .pf = PF_INET,
330 .hooknum = NF_IP_LOCAL_IN,
331 .priority = NF_IP_PRI_NAT_SRC,
332 },
333 /* After conntrack, adjust sequence number */
334 {
335 .hook = ip_nat_adjust,
336 .owner = THIS_MODULE,
337 .pf = PF_INET,
338 .hooknum = NF_IP_LOCAL_IN,
339 .priority = NF_IP_PRI_NAT_SEQ_ADJUST,
340 },
341};
342
343static int __init ip_nat_standalone_init(void)
344{
345 int ret = 0;
346
347 need_conntrack();
348
349#ifdef CONFIG_XFRM
350 BUG_ON(ip_nat_decode_session != NULL);
351 ip_nat_decode_session = nat_decode_session;
352#endif
353 ret = ip_nat_rule_init();
354 if (ret < 0) {
355 printk("ip_nat_init: can't setup rules.\n");
356 goto cleanup_decode_session;
357 }
358 ret = nf_register_hooks(ip_nat_ops, ARRAY_SIZE(ip_nat_ops));
359 if (ret < 0) {
360 printk("ip_nat_init: can't register hooks.\n");
361 goto cleanup_rule_init;
362 }
363 return ret;
364
365 cleanup_rule_init:
366 ip_nat_rule_cleanup();
367 cleanup_decode_session:
368#ifdef CONFIG_XFRM
369 ip_nat_decode_session = NULL;
370 synchronize_net();
371#endif
372 return ret;
373}
374
375static void __exit ip_nat_standalone_fini(void)
376{
377 nf_unregister_hooks(ip_nat_ops, ARRAY_SIZE(ip_nat_ops));
378 ip_nat_rule_cleanup();
379#ifdef CONFIG_XFRM
380 ip_nat_decode_session = NULL;
381 synchronize_net();
382#endif
383}
384
385module_init(ip_nat_standalone_init);
386module_exit(ip_nat_standalone_fini);
387
388MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/ip_nat_tftp.c b/net/ipv4/netfilter/ip_nat_tftp.c
deleted file mode 100644
index 604793536fc1..000000000000
--- a/net/ipv4/netfilter/ip_nat_tftp.c
+++ /dev/null
@@ -1,70 +0,0 @@
1/* (C) 2001-2002 Magnus Boden <mb@ozaba.mine.nu>
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 as
5 * published by the Free Software Foundation.
6 *
7 * Version: 0.0.7
8 *
9 * Thu 21 Mar 2002 Harald Welte <laforge@gnumonks.org>
10 * - Port to newnat API
11 *
12 * This module currently supports DNAT:
13 * iptables -t nat -A PREROUTING -d x.x.x.x -j DNAT --to-dest x.x.x.y
14 *
15 * and SNAT:
16 * iptables -t nat -A POSTROUTING { -j MASQUERADE , -j SNAT --to-source x.x.x.x }
17 *
18 * It has not been tested with
19 * -j SNAT --to-source x.x.x.x-x.x.x.y since I only have one external ip
20 * If you do test this please let me know if it works or not.
21 *
22 */
23
24#include <linux/module.h>
25#include <linux/netfilter_ipv4.h>
26#include <linux/ip.h>
27#include <linux/udp.h>
28
29#include <linux/netfilter.h>
30#include <linux/netfilter_ipv4/ip_tables.h>
31#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
32#include <linux/netfilter_ipv4/ip_conntrack_tftp.h>
33#include <linux/netfilter_ipv4/ip_nat_helper.h>
34#include <linux/netfilter_ipv4/ip_nat_rule.h>
35#include <linux/moduleparam.h>
36
37MODULE_AUTHOR("Magnus Boden <mb@ozaba.mine.nu>");
38MODULE_DESCRIPTION("tftp NAT helper");
39MODULE_LICENSE("GPL");
40
41static unsigned int help(struct sk_buff **pskb,
42 enum ip_conntrack_info ctinfo,
43 struct ip_conntrack_expect *exp)
44{
45 struct ip_conntrack *ct = exp->master;
46
47 exp->saved_proto.udp.port
48 = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port;
49 exp->dir = IP_CT_DIR_REPLY;
50 exp->expectfn = ip_nat_follow_master;
51 if (ip_conntrack_expect_related(exp) != 0)
52 return NF_DROP;
53 return NF_ACCEPT;
54}
55
56static void __exit ip_nat_tftp_fini(void)
57{
58 rcu_assign_pointer(ip_nat_tftp_hook, NULL);
59 synchronize_rcu();
60}
61
62static int __init ip_nat_tftp_init(void)
63{
64 BUG_ON(rcu_dereference(ip_nat_tftp_hook));
65 rcu_assign_pointer(ip_nat_tftp_hook, help);
66 return 0;
67}
68
69module_init(ip_nat_tftp_init);
70module_exit(ip_nat_tftp_fini);
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index a14798a850d7..702d94db19b9 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -8,18 +8,6 @@
8 * This program is free software; you can redistribute it and/or modify 8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as 9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation. 10 * published by the Free Software Foundation.
11 *
12 * 2000-03-27: Simplified code (thanks to Andi Kleen for clues).
13 * 2000-05-20: Fixed notifier problems (following Miguel Freitas' report).
14 * 2000-06-19: Fixed so nfmark is copied to metadata (reported by Sebastian
15 * Zander).
16 * 2000-08-01: Added Nick Williams' MAC support.
17 * 2002-06-25: Code cleanup.
18 * 2005-01-10: Added /proc counter for dropped packets; fixed so
19 * packets aren't delivered to user space if they're going
20 * to be dropped.
21 * 2005-05-26: local_bh_{disable,enable} around nf_reinject (Harald Welte)
22 *
23 */ 11 */
24#include <linux/module.h> 12#include <linux/module.h>
25#include <linux/skbuff.h> 13#include <linux/skbuff.h>
@@ -191,12 +179,13 @@ ipq_flush(int verdict)
191static struct sk_buff * 179static struct sk_buff *
192ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) 180ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
193{ 181{
194 unsigned char *old_tail; 182 sk_buff_data_t old_tail;
195 size_t size = 0; 183 size_t size = 0;
196 size_t data_len = 0; 184 size_t data_len = 0;
197 struct sk_buff *skb; 185 struct sk_buff *skb;
198 struct ipq_packet_msg *pmsg; 186 struct ipq_packet_msg *pmsg;
199 struct nlmsghdr *nlh; 187 struct nlmsghdr *nlh;
188 struct timeval tv;
200 189
201 read_lock_bh(&queue_lock); 190 read_lock_bh(&queue_lock);
202 191
@@ -234,15 +223,16 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
234 if (!skb) 223 if (!skb)
235 goto nlmsg_failure; 224 goto nlmsg_failure;
236 225
237 old_tail= skb->tail; 226 old_tail = skb->tail;
238 nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh)); 227 nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh));
239 pmsg = NLMSG_DATA(nlh); 228 pmsg = NLMSG_DATA(nlh);
240 memset(pmsg, 0, sizeof(*pmsg)); 229 memset(pmsg, 0, sizeof(*pmsg));
241 230
242 pmsg->packet_id = (unsigned long )entry; 231 pmsg->packet_id = (unsigned long )entry;
243 pmsg->data_len = data_len; 232 pmsg->data_len = data_len;
244 pmsg->timestamp_sec = entry->skb->tstamp.off_sec; 233 tv = ktime_to_timeval(entry->skb->tstamp);
245 pmsg->timestamp_usec = entry->skb->tstamp.off_usec; 234 pmsg->timestamp_sec = tv.tv_sec;
235 pmsg->timestamp_usec = tv.tv_usec;
246 pmsg->mark = entry->skb->mark; 236 pmsg->mark = entry->skb->mark;
247 pmsg->hook = entry->info->hook; 237 pmsg->hook = entry->info->hook;
248 pmsg->hw_protocol = entry->skb->protocol; 238 pmsg->hw_protocol = entry->skb->protocol;
@@ -378,7 +368,7 @@ ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct ipq_queue_entry *e)
378 } 368 }
379 if (!skb_make_writable(&e->skb, v->data_len)) 369 if (!skb_make_writable(&e->skb, v->data_len))
380 return -ENOMEM; 370 return -ENOMEM;
381 memcpy(e->skb->data, v->payload, v->data_len); 371 skb_copy_to_linear_data(e->skb, v->payload, v->data_len);
382 e->skb->ip_summed = CHECKSUM_NONE; 372 e->skb->ip_summed = CHECKSUM_NONE;
383 373
384 return 0; 374 return 0;
@@ -495,7 +485,7 @@ ipq_rcv_skb(struct sk_buff *skb)
495 if (skblen < sizeof(*nlh)) 485 if (skblen < sizeof(*nlh))
496 return; 486 return;
497 487
498 nlh = (struct nlmsghdr *)skb->data; 488 nlh = nlmsg_hdr(skb);
499 nlmsglen = nlh->nlmsg_len; 489 nlmsglen = nlh->nlmsg_len;
500 if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen) 490 if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen)
501 return; 491 return;
@@ -678,7 +668,7 @@ static int __init ip_queue_init(void)
678 668
679 netlink_register_notifier(&ipq_nl_notifier); 669 netlink_register_notifier(&ipq_nl_notifier);
680 ipqnl = netlink_kernel_create(NETLINK_FIREWALL, 0, ipq_rcv_sk, 670 ipqnl = netlink_kernel_create(NETLINK_FIREWALL, 0, ipq_rcv_sk,
681 THIS_MODULE); 671 NULL, THIS_MODULE);
682 if (ipqnl == NULL) { 672 if (ipqnl == NULL) {
683 printk(KERN_ERR "ip_queue: failed to create netlink socket\n"); 673 printk(KERN_ERR "ip_queue: failed to create netlink socket\n");
684 goto cleanup_netlink_notifier; 674 goto cleanup_netlink_notifier;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 50cc4b92e284..e3f83bf160d9 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -7,12 +7,6 @@
7 * This program is free software; you can redistribute it and/or modify 7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as 8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation. 9 * published by the Free Software Foundation.
10 *
11 * 19 Jan 2002 Harald Welte <laforge@gnumonks.org>
12 * - increase module usage count as soon as we have rules inside
13 * a table
14 * 08 Oct 2005 Harald Welte <lafore@netfilter.org>
15 * - Generalize into "x_tables" layer and "{ip,ip6,arp}_tables"
16 */ 10 */
17#include <linux/cache.h> 11#include <linux/cache.h>
18#include <linux/capability.h> 12#include <linux/capability.h>
@@ -198,7 +192,7 @@ int do_match(struct ipt_entry_match *m,
198{ 192{
199 /* Stop iteration if it doesn't match */ 193 /* Stop iteration if it doesn't match */
200 if (!m->u.kernel.match->match(skb, in, out, m->u.kernel.match, m->data, 194 if (!m->u.kernel.match->match(skb, in, out, m->u.kernel.match, m->data,
201 offset, skb->nh.iph->ihl*4, hotdrop)) 195 offset, ip_hdrlen(skb), hotdrop))
202 return 1; 196 return 1;
203 else 197 else
204 return 0; 198 return 0;
@@ -231,7 +225,7 @@ ipt_do_table(struct sk_buff **pskb,
231 struct xt_table_info *private; 225 struct xt_table_info *private;
232 226
233 /* Initialization */ 227 /* Initialization */
234 ip = (*pskb)->nh.iph; 228 ip = ip_hdr(*pskb);
235 datalen = (*pskb)->len - ip->ihl * 4; 229 datalen = (*pskb)->len - ip->ihl * 4;
236 indev = in ? in->name : nulldevname; 230 indev = in ? in->name : nulldevname;
237 outdev = out ? out->name : nulldevname; 231 outdev = out ? out->name : nulldevname;
@@ -320,7 +314,7 @@ ipt_do_table(struct sk_buff **pskb,
320 = 0x57acc001; 314 = 0x57acc001;
321#endif 315#endif
322 /* Target might have changed stuff. */ 316 /* Target might have changed stuff. */
323 ip = (*pskb)->nh.iph; 317 ip = ip_hdr(*pskb);
324 datalen = (*pskb)->len - ip->ihl * 4; 318 datalen = (*pskb)->len - ip->ihl * 4;
325 319
326 if (verdict == IPT_CONTINUE) 320 if (verdict == IPT_CONTINUE)
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 42b08029e867..40e273421398 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -21,15 +21,12 @@
21#include <linux/if_arp.h> 21#include <linux/if_arp.h>
22#include <linux/proc_fs.h> 22#include <linux/proc_fs.h>
23#include <linux/seq_file.h> 23#include <linux/seq_file.h>
24
25#include <net/checksum.h>
26
27#include <linux/netfilter_arp.h> 24#include <linux/netfilter_arp.h>
28
29#include <linux/netfilter/x_tables.h> 25#include <linux/netfilter/x_tables.h>
30#include <linux/netfilter_ipv4/ip_tables.h> 26#include <linux/netfilter_ipv4/ip_tables.h>
31#include <linux/netfilter_ipv4/ipt_CLUSTERIP.h> 27#include <linux/netfilter_ipv4/ipt_CLUSTERIP.h>
32#include <net/netfilter/nf_conntrack_compat.h> 28#include <net/netfilter/nf_conntrack.h>
29#include <net/checksum.h>
33 30
34#define CLUSTERIP_VERSION "0.8" 31#define CLUSTERIP_VERSION "0.8"
35 32
@@ -240,7 +237,7 @@ clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum)
240static inline u_int32_t 237static inline u_int32_t
241clusterip_hashfn(struct sk_buff *skb, struct clusterip_config *config) 238clusterip_hashfn(struct sk_buff *skb, struct clusterip_config *config)
242{ 239{
243 struct iphdr *iph = skb->nh.iph; 240 struct iphdr *iph = ip_hdr(skb);
244 unsigned long hashval; 241 unsigned long hashval;
245 u_int16_t sport, dport; 242 u_int16_t sport, dport;
246 u_int16_t *ports; 243 u_int16_t *ports;
@@ -310,15 +307,16 @@ target(struct sk_buff **pskb,
310 const void *targinfo) 307 const void *targinfo)
311{ 308{
312 const struct ipt_clusterip_tgt_info *cipinfo = targinfo; 309 const struct ipt_clusterip_tgt_info *cipinfo = targinfo;
310 struct nf_conn *ct;
313 enum ip_conntrack_info ctinfo; 311 enum ip_conntrack_info ctinfo;
314 u_int32_t *mark, hash; 312 u_int32_t hash;
315 313
316 /* don't need to clusterip_config_get() here, since refcount 314 /* don't need to clusterip_config_get() here, since refcount
317 * is only decremented by destroy() - and ip_tables guarantees 315 * is only decremented by destroy() - and ip_tables guarantees
318 * that the ->target() function isn't called after ->destroy() */ 316 * that the ->target() function isn't called after ->destroy() */
319 317
320 mark = nf_ct_get_mark((*pskb), &ctinfo); 318 ct = nf_ct_get(*pskb, &ctinfo);
321 if (mark == NULL) { 319 if (ct == NULL) {
322 printk(KERN_ERR "CLUSTERIP: no conntrack!\n"); 320 printk(KERN_ERR "CLUSTERIP: no conntrack!\n");
323 /* FIXME: need to drop invalid ones, since replies 321 /* FIXME: need to drop invalid ones, since replies
324 * to outgoing connections of other nodes will be 322 * to outgoing connections of other nodes will be
@@ -328,7 +326,7 @@ target(struct sk_buff **pskb,
328 326
329 /* special case: ICMP error handling. conntrack distinguishes between 327 /* special case: ICMP error handling. conntrack distinguishes between
330 * error messages (RELATED) and information requests (see below) */ 328 * error messages (RELATED) and information requests (see below) */
331 if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP 329 if (ip_hdr(*pskb)->protocol == IPPROTO_ICMP
332 && (ctinfo == IP_CT_RELATED 330 && (ctinfo == IP_CT_RELATED
333 || ctinfo == IP_CT_RELATED+IP_CT_IS_REPLY)) 331 || ctinfo == IP_CT_RELATED+IP_CT_IS_REPLY))
334 return XT_CONTINUE; 332 return XT_CONTINUE;
@@ -341,7 +339,7 @@ target(struct sk_buff **pskb,
341 339
342 switch (ctinfo) { 340 switch (ctinfo) {
343 case IP_CT_NEW: 341 case IP_CT_NEW:
344 *mark = hash; 342 ct->mark = hash;
345 break; 343 break;
346 case IP_CT_RELATED: 344 case IP_CT_RELATED:
347 case IP_CT_RELATED+IP_CT_IS_REPLY: 345 case IP_CT_RELATED+IP_CT_IS_REPLY:
@@ -358,7 +356,7 @@ target(struct sk_buff **pskb,
358#ifdef DEBUG_CLUSTERP 356#ifdef DEBUG_CLUSTERP
359 DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); 357 DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
360#endif 358#endif
361 DEBUGP("hash=%u ct_hash=%u ", hash, *mark); 359 DEBUGP("hash=%u ct_hash=%u ", hash, ct->mark);
362 if (!clusterip_responsible(cipinfo->config, hash)) { 360 if (!clusterip_responsible(cipinfo->config, hash)) {
363 DEBUGP("not responsible\n"); 361 DEBUGP("not responsible\n");
364 return NF_DROP; 362 return NF_DROP;
@@ -521,7 +519,7 @@ arp_mangle(unsigned int hook,
521 const struct net_device *out, 519 const struct net_device *out,
522 int (*okfn)(struct sk_buff *)) 520 int (*okfn)(struct sk_buff *))
523{ 521{
524 struct arphdr *arp = (*pskb)->nh.arph; 522 struct arphdr *arp = arp_hdr(*pskb);
525 struct arp_payload *payload; 523 struct arp_payload *payload;
526 struct clusterip_config *c; 524 struct clusterip_config *c;
527 525
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index 4f565633631d..918ca92e534a 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -5,14 +5,13 @@
5 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as 6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 *
9 * ipt_ECN.c,v 1.5 2002/08/18 19:36:51 laforge Exp
10*/ 8*/
11 9
12#include <linux/in.h> 10#include <linux/in.h>
13#include <linux/module.h> 11#include <linux/module.h>
14#include <linux/skbuff.h> 12#include <linux/skbuff.h>
15#include <linux/ip.h> 13#include <linux/ip.h>
14#include <net/ip.h>
16#include <linux/tcp.h> 15#include <linux/tcp.h>
17#include <net/checksum.h> 16#include <net/checksum.h>
18 17
@@ -29,13 +28,13 @@ MODULE_DESCRIPTION("iptables ECN modification module");
29static inline int 28static inline int
30set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) 29set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
31{ 30{
32 struct iphdr *iph = (*pskb)->nh.iph; 31 struct iphdr *iph = ip_hdr(*pskb);
33 32
34 if ((iph->tos & IPT_ECN_IP_MASK) != (einfo->ip_ect & IPT_ECN_IP_MASK)) { 33 if ((iph->tos & IPT_ECN_IP_MASK) != (einfo->ip_ect & IPT_ECN_IP_MASK)) {
35 __u8 oldtos; 34 __u8 oldtos;
36 if (!skb_make_writable(pskb, sizeof(struct iphdr))) 35 if (!skb_make_writable(pskb, sizeof(struct iphdr)))
37 return 0; 36 return 0;
38 iph = (*pskb)->nh.iph; 37 iph = ip_hdr(*pskb);
39 oldtos = iph->tos; 38 oldtos = iph->tos;
40 iph->tos &= ~IPT_ECN_IP_MASK; 39 iph->tos &= ~IPT_ECN_IP_MASK;
41 iph->tos |= (einfo->ip_ect & IPT_ECN_IP_MASK); 40 iph->tos |= (einfo->ip_ect & IPT_ECN_IP_MASK);
@@ -52,7 +51,7 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
52 __be16 oldval; 51 __be16 oldval;
53 52
54 /* Not enought header? */ 53 /* Not enought header? */
55 tcph = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4, 54 tcph = skb_header_pointer(*pskb, ip_hdrlen(*pskb),
56 sizeof(_tcph), &_tcph); 55 sizeof(_tcph), &_tcph);
57 if (!tcph) 56 if (!tcph)
58 return 0; 57 return 0;
@@ -63,9 +62,9 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
63 tcph->cwr == einfo->proto.tcp.cwr))) 62 tcph->cwr == einfo->proto.tcp.cwr)))
64 return 1; 63 return 1;
65 64
66 if (!skb_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph))) 65 if (!skb_make_writable(pskb, ip_hdrlen(*pskb) + sizeof(*tcph)))
67 return 0; 66 return 0;
68 tcph = (void *)(*pskb)->nh.iph + (*pskb)->nh.iph->ihl*4; 67 tcph = (void *)ip_hdr(*pskb) + ip_hdrlen(*pskb);
69 68
70 oldval = ((__be16 *)tcph)[6]; 69 oldval = ((__be16 *)tcph)[6];
71 if (einfo->operation & IPT_ECN_OP_SET_ECE) 70 if (einfo->operation & IPT_ECN_OP_SET_ECE)
@@ -93,7 +92,7 @@ target(struct sk_buff **pskb,
93 return NF_DROP; 92 return NF_DROP;
94 93
95 if (einfo->operation & (IPT_ECN_OP_SET_ECE | IPT_ECN_OP_SET_CWR) 94 if (einfo->operation & (IPT_ECN_OP_SET_ECE | IPT_ECN_OP_SET_CWR)
96 && (*pskb)->nh.iph->protocol == IPPROTO_TCP) 95 && ip_hdr(*pskb)->protocol == IPPROTO_TCP)
97 if (!set_ect_tcp(pskb, einfo)) 96 if (!set_ect_tcp(pskb, einfo))
98 return NF_DROP; 97 return NF_DROP;
99 98
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index d9c37fd94228..a42c5cd968b1 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -399,9 +399,9 @@ ipt_log_packet(unsigned int pf,
399 /* MAC logging for input chain only. */ 399 /* MAC logging for input chain only. */
400 printk("MAC="); 400 printk("MAC=");
401 if (skb->dev && skb->dev->hard_header_len 401 if (skb->dev && skb->dev->hard_header_len
402 && skb->mac.raw != (void*)skb->nh.iph) { 402 && skb->mac_header != skb->network_header) {
403 int i; 403 int i;
404 unsigned char *p = skb->mac.raw; 404 const unsigned char *p = skb_mac_header(skb);
405 for (i = 0; i < skb->dev->hard_header_len; i++,p++) 405 for (i = 0; i < skb->dev->hard_header_len; i++,p++)
406 printk("%02x%c", *p, 406 printk("%02x%c", *p,
407 i==skb->dev->hard_header_len - 1 407 i==skb->dev->hard_header_len - 1
@@ -477,14 +477,10 @@ static int __init ipt_log_init(void)
477 ret = xt_register_target(&ipt_log_reg); 477 ret = xt_register_target(&ipt_log_reg);
478 if (ret < 0) 478 if (ret < 0)
479 return ret; 479 return ret;
480 if (nf_log_register(PF_INET, &ipt_log_logger) < 0) { 480 ret = nf_log_register(PF_INET, &ipt_log_logger);
481 printk(KERN_WARNING "ipt_LOG: not logging via system console " 481 if (ret < 0 && ret != -EEXIST)
482 "since somebody else already registered for PF_INET\n"); 482 xt_unregister_target(&ipt_log_reg);
483 /* we cannot make module load fail here, since otherwise 483 return ret;
484 * iptables userspace would abort */
485 }
486
487 return 0;
488} 484}
489 485
490static void __exit ipt_log_fini(void) 486static void __exit ipt_log_fini(void)
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index b5955f3a3f8f..d4f2d7775330 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -19,12 +19,8 @@
19#include <net/ip.h> 19#include <net/ip.h>
20#include <net/checksum.h> 20#include <net/checksum.h>
21#include <net/route.h> 21#include <net/route.h>
22#include <linux/netfilter_ipv4.h>
23#ifdef CONFIG_NF_NAT_NEEDED
24#include <net/netfilter/nf_nat_rule.h> 22#include <net/netfilter/nf_nat_rule.h>
25#else 23#include <linux/netfilter_ipv4.h>
26#include <linux/netfilter_ipv4/ip_nat_rule.h>
27#endif
28#include <linux/netfilter/x_tables.h> 24#include <linux/netfilter/x_tables.h>
29 25
30MODULE_LICENSE("GPL"); 26MODULE_LICENSE("GPL");
@@ -48,7 +44,7 @@ masquerade_check(const char *tablename,
48 void *targinfo, 44 void *targinfo,
49 unsigned int hook_mask) 45 unsigned int hook_mask)
50{ 46{
51 const struct ip_nat_multi_range_compat *mr = targinfo; 47 const struct nf_nat_multi_range_compat *mr = targinfo;
52 48
53 if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) { 49 if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) {
54 DEBUGP("masquerade_check: bad MAP_IPS.\n"); 50 DEBUGP("masquerade_check: bad MAP_IPS.\n");
@@ -69,33 +65,26 @@ masquerade_target(struct sk_buff **pskb,
69 const struct xt_target *target, 65 const struct xt_target *target,
70 const void *targinfo) 66 const void *targinfo)
71{ 67{
72#ifdef CONFIG_NF_NAT_NEEDED 68 struct nf_conn *ct;
73 struct nf_conn_nat *nat; 69 struct nf_conn_nat *nat;
74#endif
75 struct ip_conntrack *ct;
76 enum ip_conntrack_info ctinfo; 70 enum ip_conntrack_info ctinfo;
77 struct ip_nat_range newrange; 71 struct nf_nat_range newrange;
78 const struct ip_nat_multi_range_compat *mr; 72 const struct nf_nat_multi_range_compat *mr;
79 struct rtable *rt; 73 struct rtable *rt;
80 __be32 newsrc; 74 __be32 newsrc;
81 75
82 IP_NF_ASSERT(hooknum == NF_IP_POST_ROUTING); 76 NF_CT_ASSERT(hooknum == NF_IP_POST_ROUTING);
83 77
84 ct = ip_conntrack_get(*pskb, &ctinfo); 78 ct = nf_ct_get(*pskb, &ctinfo);
85#ifdef CONFIG_NF_NAT_NEEDED
86 nat = nfct_nat(ct); 79 nat = nfct_nat(ct);
87#endif 80
88 IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED 81 NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED
89 || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)); 82 || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY));
90 83
91 /* Source address is 0.0.0.0 - locally generated packet that is 84 /* Source address is 0.0.0.0 - locally generated packet that is
92 * probably not supposed to be masqueraded. 85 * probably not supposed to be masqueraded.
93 */ 86 */
94#ifdef CONFIG_NF_NAT_NEEDED
95 if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == 0) 87 if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == 0)
96#else
97 if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip == 0)
98#endif
99 return NF_ACCEPT; 88 return NF_ACCEPT;
100 89
101 mr = targinfo; 90 mr = targinfo;
@@ -107,40 +96,30 @@ masquerade_target(struct sk_buff **pskb,
107 } 96 }
108 97
109 write_lock_bh(&masq_lock); 98 write_lock_bh(&masq_lock);
110#ifdef CONFIG_NF_NAT_NEEDED
111 nat->masq_index = out->ifindex; 99 nat->masq_index = out->ifindex;
112#else
113 ct->nat.masq_index = out->ifindex;
114#endif
115 write_unlock_bh(&masq_lock); 100 write_unlock_bh(&masq_lock);
116 101
117 /* Transfer from original range. */ 102 /* Transfer from original range. */
118 newrange = ((struct ip_nat_range) 103 newrange = ((struct nf_nat_range)
119 { mr->range[0].flags | IP_NAT_RANGE_MAP_IPS, 104 { mr->range[0].flags | IP_NAT_RANGE_MAP_IPS,
120 newsrc, newsrc, 105 newsrc, newsrc,
121 mr->range[0].min, mr->range[0].max }); 106 mr->range[0].min, mr->range[0].max });
122 107
123 /* Hand modified range to generic setup. */ 108 /* Hand modified range to generic setup. */
124 return ip_nat_setup_info(ct, &newrange, hooknum); 109 return nf_nat_setup_info(ct, &newrange, hooknum);
125} 110}
126 111
127static inline int 112static inline int
128device_cmp(struct ip_conntrack *i, void *ifindex) 113device_cmp(struct nf_conn *i, void *ifindex)
129{ 114{
130 int ret;
131#ifdef CONFIG_NF_NAT_NEEDED
132 struct nf_conn_nat *nat = nfct_nat(i); 115 struct nf_conn_nat *nat = nfct_nat(i);
116 int ret;
133 117
134 if (!nat) 118 if (!nat)
135 return 0; 119 return 0;
136#endif
137 120
138 read_lock_bh(&masq_lock); 121 read_lock_bh(&masq_lock);
139#ifdef CONFIG_NF_NAT_NEEDED
140 ret = (nat->masq_index == (int)(long)ifindex); 122 ret = (nat->masq_index == (int)(long)ifindex);
141#else
142 ret = (i->nat.masq_index == (int)(long)ifindex);
143#endif
144 read_unlock_bh(&masq_lock); 123 read_unlock_bh(&masq_lock);
145 124
146 return ret; 125 return ret;
@@ -156,9 +135,9 @@ static int masq_device_event(struct notifier_block *this,
156 /* Device was downed. Search entire table for 135 /* Device was downed. Search entire table for
157 conntracks which were associated with that device, 136 conntracks which were associated with that device,
158 and forget them. */ 137 and forget them. */
159 IP_NF_ASSERT(dev->ifindex != 0); 138 NF_CT_ASSERT(dev->ifindex != 0);
160 139
161 ip_ct_iterate_cleanup(device_cmp, (void *)(long)dev->ifindex); 140 nf_ct_iterate_cleanup(device_cmp, (void *)(long)dev->ifindex);
162 } 141 }
163 142
164 return NOTIFY_DONE; 143 return NOTIFY_DONE;
@@ -174,9 +153,9 @@ static int masq_inet_event(struct notifier_block *this,
174 /* IP address was deleted. Search entire table for 153 /* IP address was deleted. Search entire table for
175 conntracks which were associated with that device, 154 conntracks which were associated with that device,
176 and forget them. */ 155 and forget them. */
177 IP_NF_ASSERT(dev->ifindex != 0); 156 NF_CT_ASSERT(dev->ifindex != 0);
178 157
179 ip_ct_iterate_cleanup(device_cmp, (void *)(long)dev->ifindex); 158 nf_ct_iterate_cleanup(device_cmp, (void *)(long)dev->ifindex);
180 } 159 }
181 160
182 return NOTIFY_DONE; 161 return NOTIFY_DONE;
@@ -194,7 +173,7 @@ static struct xt_target masquerade = {
194 .name = "MASQUERADE", 173 .name = "MASQUERADE",
195 .family = AF_INET, 174 .family = AF_INET,
196 .target = masquerade_target, 175 .target = masquerade_target,
197 .targetsize = sizeof(struct ip_nat_multi_range_compat), 176 .targetsize = sizeof(struct nf_nat_multi_range_compat),
198 .table = "nat", 177 .table = "nat",
199 .hooks = 1 << NF_IP_POST_ROUTING, 178 .hooks = 1 << NF_IP_POST_ROUTING,
200 .checkentry = masquerade_check, 179 .checkentry = masquerade_check,
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index fd7aaa347cd8..068c69bce30e 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -16,11 +16,7 @@
16#include <linux/netfilter.h> 16#include <linux/netfilter.h>
17#include <linux/netfilter_ipv4.h> 17#include <linux/netfilter_ipv4.h>
18#include <linux/netfilter/x_tables.h> 18#include <linux/netfilter/x_tables.h>
19#ifdef CONFIG_NF_NAT_NEEDED
20#include <net/netfilter/nf_nat_rule.h> 19#include <net/netfilter/nf_nat_rule.h>
21#else
22#include <linux/netfilter_ipv4/ip_nat_rule.h>
23#endif
24 20
25#define MODULENAME "NETMAP" 21#define MODULENAME "NETMAP"
26MODULE_LICENSE("GPL"); 22MODULE_LICENSE("GPL");
@@ -40,7 +36,7 @@ check(const char *tablename,
40 void *targinfo, 36 void *targinfo,
41 unsigned int hook_mask) 37 unsigned int hook_mask)
42{ 38{
43 const struct ip_nat_multi_range_compat *mr = targinfo; 39 const struct nf_nat_multi_range_compat *mr = targinfo;
44 40
45 if (!(mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)) { 41 if (!(mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)) {
46 DEBUGP(MODULENAME":check: bad MAP_IPS.\n"); 42 DEBUGP(MODULENAME":check: bad MAP_IPS.\n");
@@ -61,39 +57,39 @@ target(struct sk_buff **pskb,
61 const struct xt_target *target, 57 const struct xt_target *target,
62 const void *targinfo) 58 const void *targinfo)
63{ 59{
64 struct ip_conntrack *ct; 60 struct nf_conn *ct;
65 enum ip_conntrack_info ctinfo; 61 enum ip_conntrack_info ctinfo;
66 __be32 new_ip, netmask; 62 __be32 new_ip, netmask;
67 const struct ip_nat_multi_range_compat *mr = targinfo; 63 const struct nf_nat_multi_range_compat *mr = targinfo;
68 struct ip_nat_range newrange; 64 struct nf_nat_range newrange;
69 65
70 IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING 66 NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING
71 || hooknum == NF_IP_POST_ROUTING 67 || hooknum == NF_IP_POST_ROUTING
72 || hooknum == NF_IP_LOCAL_OUT); 68 || hooknum == NF_IP_LOCAL_OUT);
73 ct = ip_conntrack_get(*pskb, &ctinfo); 69 ct = nf_ct_get(*pskb, &ctinfo);
74 70
75 netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip); 71 netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip);
76 72
77 if (hooknum == NF_IP_PRE_ROUTING || hooknum == NF_IP_LOCAL_OUT) 73 if (hooknum == NF_IP_PRE_ROUTING || hooknum == NF_IP_LOCAL_OUT)
78 new_ip = (*pskb)->nh.iph->daddr & ~netmask; 74 new_ip = ip_hdr(*pskb)->daddr & ~netmask;
79 else 75 else
80 new_ip = (*pskb)->nh.iph->saddr & ~netmask; 76 new_ip = ip_hdr(*pskb)->saddr & ~netmask;
81 new_ip |= mr->range[0].min_ip & netmask; 77 new_ip |= mr->range[0].min_ip & netmask;
82 78
83 newrange = ((struct ip_nat_range) 79 newrange = ((struct nf_nat_range)
84 { mr->range[0].flags | IP_NAT_RANGE_MAP_IPS, 80 { mr->range[0].flags | IP_NAT_RANGE_MAP_IPS,
85 new_ip, new_ip, 81 new_ip, new_ip,
86 mr->range[0].min, mr->range[0].max }); 82 mr->range[0].min, mr->range[0].max });
87 83
88 /* Hand modified range to generic setup. */ 84 /* Hand modified range to generic setup. */
89 return ip_nat_setup_info(ct, &newrange, hooknum); 85 return nf_nat_setup_info(ct, &newrange, hooknum);
90} 86}
91 87
92static struct xt_target target_module = { 88static struct xt_target target_module = {
93 .name = MODULENAME, 89 .name = MODULENAME,
94 .family = AF_INET, 90 .family = AF_INET,
95 .target = target, 91 .target = target,
96 .targetsize = sizeof(struct ip_nat_multi_range_compat), 92 .targetsize = sizeof(struct nf_nat_multi_range_compat),
97 .table = "nat", 93 .table = "nat",
98 .hooks = (1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_POST_ROUTING) | 94 .hooks = (1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_POST_ROUTING) |
99 (1 << NF_IP_LOCAL_OUT), 95 (1 << NF_IP_LOCAL_OUT),
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index c2b6b80670f8..68cc76a198eb 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -19,11 +19,7 @@
19#include <net/checksum.h> 19#include <net/checksum.h>
20#include <linux/netfilter_ipv4.h> 20#include <linux/netfilter_ipv4.h>
21#include <linux/netfilter/x_tables.h> 21#include <linux/netfilter/x_tables.h>
22#ifdef CONFIG_NF_NAT_NEEDED
23#include <net/netfilter/nf_nat_rule.h> 22#include <net/netfilter/nf_nat_rule.h>
24#else
25#include <linux/netfilter_ipv4/ip_nat_rule.h>
26#endif
27 23
28MODULE_LICENSE("GPL"); 24MODULE_LICENSE("GPL");
29MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); 25MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
@@ -43,7 +39,7 @@ redirect_check(const char *tablename,
43 void *targinfo, 39 void *targinfo,
44 unsigned int hook_mask) 40 unsigned int hook_mask)
45{ 41{
46 const struct ip_nat_multi_range_compat *mr = targinfo; 42 const struct nf_nat_multi_range_compat *mr = targinfo;
47 43
48 if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) { 44 if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) {
49 DEBUGP("redirect_check: bad MAP_IPS.\n"); 45 DEBUGP("redirect_check: bad MAP_IPS.\n");
@@ -64,17 +60,17 @@ redirect_target(struct sk_buff **pskb,
64 const struct xt_target *target, 60 const struct xt_target *target,
65 const void *targinfo) 61 const void *targinfo)
66{ 62{
67 struct ip_conntrack *ct; 63 struct nf_conn *ct;
68 enum ip_conntrack_info ctinfo; 64 enum ip_conntrack_info ctinfo;
69 __be32 newdst; 65 __be32 newdst;
70 const struct ip_nat_multi_range_compat *mr = targinfo; 66 const struct nf_nat_multi_range_compat *mr = targinfo;
71 struct ip_nat_range newrange; 67 struct nf_nat_range newrange;
72 68
73 IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING 69 NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING
74 || hooknum == NF_IP_LOCAL_OUT); 70 || hooknum == NF_IP_LOCAL_OUT);
75 71
76 ct = ip_conntrack_get(*pskb, &ctinfo); 72 ct = nf_ct_get(*pskb, &ctinfo);
77 IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); 73 NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
78 74
79 /* Local packets: make them go to loopback */ 75 /* Local packets: make them go to loopback */
80 if (hooknum == NF_IP_LOCAL_OUT) 76 if (hooknum == NF_IP_LOCAL_OUT)
@@ -96,20 +92,20 @@ redirect_target(struct sk_buff **pskb,
96 } 92 }
97 93
98 /* Transfer from original range. */ 94 /* Transfer from original range. */
99 newrange = ((struct ip_nat_range) 95 newrange = ((struct nf_nat_range)
100 { mr->range[0].flags | IP_NAT_RANGE_MAP_IPS, 96 { mr->range[0].flags | IP_NAT_RANGE_MAP_IPS,
101 newdst, newdst, 97 newdst, newdst,
102 mr->range[0].min, mr->range[0].max }); 98 mr->range[0].min, mr->range[0].max });
103 99
104 /* Hand modified range to generic setup. */ 100 /* Hand modified range to generic setup. */
105 return ip_nat_setup_info(ct, &newrange, hooknum); 101 return nf_nat_setup_info(ct, &newrange, hooknum);
106} 102}
107 103
108static struct xt_target redirect_reg = { 104static struct xt_target redirect_reg = {
109 .name = "REDIRECT", 105 .name = "REDIRECT",
110 .family = AF_INET, 106 .family = AF_INET,
111 .target = redirect_target, 107 .target = redirect_target,
112 .targetsize = sizeof(struct ip_nat_multi_range_compat), 108 .targetsize = sizeof(struct nf_nat_multi_range_compat),
113 .table = "nat", 109 .table = "nat",
114 .hooks = (1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_OUT), 110 .hooks = (1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_OUT),
115 .checkentry = redirect_check, 111 .checkentry = redirect_check,
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 80f739e21824..9041e0741f6f 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -1,7 +1,5 @@
1/* 1/*
2 * This is a module which is used for rejecting packets. 2 * This is a module which is used for rejecting packets.
3 * Added support for customized reject packets (Jozsef Kadlecsik).
4 * Added support for ICMP type-3-code-13 (Maciej Soltysiak). [RFC 1812]
5 */ 3 */
6 4
7/* (C) 1999-2001 Paul `Rusty' Russell 5/* (C) 1999-2001 Paul `Rusty' Russell
@@ -43,7 +41,7 @@ MODULE_DESCRIPTION("iptables REJECT target module");
43static void send_reset(struct sk_buff *oldskb, int hook) 41static void send_reset(struct sk_buff *oldskb, int hook)
44{ 42{
45 struct sk_buff *nskb; 43 struct sk_buff *nskb;
46 struct iphdr *iph = oldskb->nh.iph; 44 struct iphdr *niph;
47 struct tcphdr _otcph, *oth, *tcph; 45 struct tcphdr _otcph, *oth, *tcph;
48 __be16 tmp_port; 46 __be16 tmp_port;
49 __be32 tmp_addr; 47 __be32 tmp_addr;
@@ -51,10 +49,10 @@ static void send_reset(struct sk_buff *oldskb, int hook)
51 unsigned int addr_type; 49 unsigned int addr_type;
52 50
53 /* IP header checks: fragment. */ 51 /* IP header checks: fragment. */
54 if (oldskb->nh.iph->frag_off & htons(IP_OFFSET)) 52 if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET))
55 return; 53 return;
56 54
57 oth = skb_header_pointer(oldskb, oldskb->nh.iph->ihl * 4, 55 oth = skb_header_pointer(oldskb, ip_hdrlen(oldskb),
58 sizeof(_otcph), &_otcph); 56 sizeof(_otcph), &_otcph);
59 if (oth == NULL) 57 if (oth == NULL)
60 return; 58 return;
@@ -64,7 +62,7 @@ static void send_reset(struct sk_buff *oldskb, int hook)
64 return; 62 return;
65 63
66 /* Check checksum */ 64 /* Check checksum */
67 if (nf_ip_checksum(oldskb, hook, iph->ihl * 4, IPPROTO_TCP)) 65 if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), IPPROTO_TCP))
68 return; 66 return;
69 67
70 /* We need a linear, writeable skb. We also need to expand 68 /* We need a linear, writeable skb. We also need to expand
@@ -84,20 +82,21 @@ static void send_reset(struct sk_buff *oldskb, int hook)
84 skb_shinfo(nskb)->gso_segs = 0; 82 skb_shinfo(nskb)->gso_segs = 0;
85 skb_shinfo(nskb)->gso_type = 0; 83 skb_shinfo(nskb)->gso_type = 0;
86 84
87 tcph = (struct tcphdr *)((u_int32_t*)nskb->nh.iph + nskb->nh.iph->ihl); 85 tcph = (struct tcphdr *)(skb_network_header(nskb) + ip_hdrlen(nskb));
88 86
89 /* Swap source and dest */ 87 /* Swap source and dest */
90 tmp_addr = nskb->nh.iph->saddr; 88 niph = ip_hdr(nskb);
91 nskb->nh.iph->saddr = nskb->nh.iph->daddr; 89 tmp_addr = niph->saddr;
92 nskb->nh.iph->daddr = tmp_addr; 90 niph->saddr = niph->daddr;
91 niph->daddr = tmp_addr;
93 tmp_port = tcph->source; 92 tmp_port = tcph->source;
94 tcph->source = tcph->dest; 93 tcph->source = tcph->dest;
95 tcph->dest = tmp_port; 94 tcph->dest = tmp_port;
96 95
97 /* Truncate to length (no data) */ 96 /* Truncate to length (no data) */
98 tcph->doff = sizeof(struct tcphdr)/4; 97 tcph->doff = sizeof(struct tcphdr)/4;
99 skb_trim(nskb, nskb->nh.iph->ihl*4 + sizeof(struct tcphdr)); 98 skb_trim(nskb, ip_hdrlen(nskb) + sizeof(struct tcphdr));
100 nskb->nh.iph->tot_len = htons(nskb->len); 99 niph->tot_len = htons(nskb->len);
101 100
102 if (tcph->ack) { 101 if (tcph->ack) {
103 needs_ack = 0; 102 needs_ack = 0;
@@ -105,9 +104,9 @@ static void send_reset(struct sk_buff *oldskb, int hook)
105 tcph->ack_seq = 0; 104 tcph->ack_seq = 0;
106 } else { 105 } else {
107 needs_ack = 1; 106 needs_ack = 1;
108 tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin 107 tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin +
109 + oldskb->len - oldskb->nh.iph->ihl*4 108 oldskb->len - ip_hdrlen(oldskb) -
110 - (oth->doff<<2)); 109 (oth->doff << 2));
111 tcph->seq = 0; 110 tcph->seq = 0;
112 } 111 }
113 112
@@ -122,14 +121,13 @@ static void send_reset(struct sk_buff *oldskb, int hook)
122 /* Adjust TCP checksum */ 121 /* Adjust TCP checksum */
123 tcph->check = 0; 122 tcph->check = 0;
124 tcph->check = tcp_v4_check(sizeof(struct tcphdr), 123 tcph->check = tcp_v4_check(sizeof(struct tcphdr),
125 nskb->nh.iph->saddr, 124 niph->saddr, niph->daddr,
126 nskb->nh.iph->daddr,
127 csum_partial((char *)tcph, 125 csum_partial((char *)tcph,
128 sizeof(struct tcphdr), 0)); 126 sizeof(struct tcphdr), 0));
129 127
130 /* Set DF, id = 0 */ 128 /* Set DF, id = 0 */
131 nskb->nh.iph->frag_off = htons(IP_DF); 129 niph->frag_off = htons(IP_DF);
132 nskb->nh.iph->id = 0; 130 niph->id = 0;
133 131
134 addr_type = RTN_UNSPEC; 132 addr_type = RTN_UNSPEC;
135 if (hook != NF_IP_FORWARD 133 if (hook != NF_IP_FORWARD
@@ -145,12 +143,11 @@ static void send_reset(struct sk_buff *oldskb, int hook)
145 nskb->ip_summed = CHECKSUM_NONE; 143 nskb->ip_summed = CHECKSUM_NONE;
146 144
147 /* Adjust IP TTL */ 145 /* Adjust IP TTL */
148 nskb->nh.iph->ttl = dst_metric(nskb->dst, RTAX_HOPLIMIT); 146 niph->ttl = dst_metric(nskb->dst, RTAX_HOPLIMIT);
149 147
150 /* Adjust IP checksum */ 148 /* Adjust IP checksum */
151 nskb->nh.iph->check = 0; 149 niph->check = 0;
152 nskb->nh.iph->check = ip_fast_csum((unsigned char *)nskb->nh.iph, 150 niph->check = ip_fast_csum(skb_network_header(nskb), niph->ihl);
153 nskb->nh.iph->ihl);
154 151
155 /* "Never happens" */ 152 /* "Never happens" */
156 if (nskb->len > dst_mtu(nskb->dst)) 153 if (nskb->len > dst_mtu(nskb->dst))
@@ -182,7 +179,7 @@ static unsigned int reject(struct sk_buff **pskb,
182 179
183 /* Our naive response construction doesn't deal with IP 180 /* Our naive response construction doesn't deal with IP
184 options, and probably shouldn't try. */ 181 options, and probably shouldn't try. */
185 if ((*pskb)->nh.iph->ihl<<2 != sizeof(struct iphdr)) 182 if (ip_hdrlen(*pskb) != sizeof(struct iphdr))
186 return NF_DROP; 183 return NF_DROP;
187 184
188 /* WARNING: This code causes reentry within iptables. 185 /* WARNING: This code causes reentry within iptables.
diff --git a/net/ipv4/netfilter/ipt_SAME.c b/net/ipv4/netfilter/ipt_SAME.c
index bd4404e5c688..511e5ff84938 100644
--- a/net/ipv4/netfilter/ipt_SAME.c
+++ b/net/ipv4/netfilter/ipt_SAME.c
@@ -7,21 +7,6 @@
7 * This program is free software; you can redistribute it and/or modify 7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as 8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation. 9 * published by the Free Software Foundation.
10 *
11 * 010320 Martin Josefsson <gandalf@wlug.westbo.se>
12 * * copied ipt_BALANCE.c to ipt_SAME.c and changed a few things.
13 * 010728 Martin Josefsson <gandalf@wlug.westbo.se>
14 * * added --nodst to not include destination-ip in new source
15 * calculations.
16 * * added some more sanity-checks.
17 * 010729 Martin Josefsson <gandalf@wlug.westbo.se>
18 * * fixed a buggy if-statement in same_check(), should have
19 * used ntohl() but didn't.
20 * * added support for multiple ranges. IPT_SAME_MAX_RANGE is
21 * defined in linux/include/linux/netfilter_ipv4/ipt_SAME.h
22 * and is currently set to 10.
23 * * added support for 1-address range, nice to have now that
24 * we have multiple ranges.
25 */ 10 */
26#include <linux/types.h> 11#include <linux/types.h>
27#include <linux/ip.h> 12#include <linux/ip.h>
@@ -35,11 +20,7 @@
35#include <net/checksum.h> 20#include <net/checksum.h>
36#include <linux/netfilter_ipv4.h> 21#include <linux/netfilter_ipv4.h>
37#include <linux/netfilter/x_tables.h> 22#include <linux/netfilter/x_tables.h>
38#ifdef CONFIG_NF_NAT_NEEDED
39#include <net/netfilter/nf_nat_rule.h> 23#include <net/netfilter/nf_nat_rule.h>
40#else
41#include <linux/netfilter_ipv4/ip_nat_rule.h>
42#endif
43#include <linux/netfilter_ipv4/ipt_SAME.h> 24#include <linux/netfilter_ipv4/ipt_SAME.h>
44 25
45MODULE_LICENSE("GPL"); 26MODULE_LICENSE("GPL");
@@ -138,17 +119,17 @@ same_target(struct sk_buff **pskb,
138 const struct xt_target *target, 119 const struct xt_target *target,
139 const void *targinfo) 120 const void *targinfo)
140{ 121{
141 struct ip_conntrack *ct; 122 struct nf_conn *ct;
142 enum ip_conntrack_info ctinfo; 123 enum ip_conntrack_info ctinfo;
143 u_int32_t tmpip, aindex; 124 u_int32_t tmpip, aindex;
144 __be32 new_ip; 125 __be32 new_ip;
145 const struct ipt_same_info *same = targinfo; 126 const struct ipt_same_info *same = targinfo;
146 struct ip_nat_range newrange; 127 struct nf_nat_range newrange;
147 const struct ip_conntrack_tuple *t; 128 const struct nf_conntrack_tuple *t;
148 129
149 IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING || 130 NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING ||
150 hooknum == NF_IP_POST_ROUTING); 131 hooknum == NF_IP_POST_ROUTING);
151 ct = ip_conntrack_get(*pskb, &ctinfo); 132 ct = nf_ct_get(*pskb, &ctinfo);
152 133
153 t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; 134 t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
154 135
@@ -157,17 +138,10 @@ same_target(struct sk_buff **pskb,
157 Here we calculate the index in same->iparray which 138 Here we calculate the index in same->iparray which
158 holds the ipaddress we should use */ 139 holds the ipaddress we should use */
159 140
160#ifdef CONFIG_NF_NAT_NEEDED
161 tmpip = ntohl(t->src.u3.ip); 141 tmpip = ntohl(t->src.u3.ip);
162 142
163 if (!(same->info & IPT_SAME_NODST)) 143 if (!(same->info & IPT_SAME_NODST))
164 tmpip += ntohl(t->dst.u3.ip); 144 tmpip += ntohl(t->dst.u3.ip);
165#else
166 tmpip = ntohl(t->src.ip);
167
168 if (!(same->info & IPT_SAME_NODST))
169 tmpip += ntohl(t->dst.ip);
170#endif
171 aindex = tmpip % same->ipnum; 145 aindex = tmpip % same->ipnum;
172 146
173 new_ip = htonl(same->iparray[aindex]); 147 new_ip = htonl(same->iparray[aindex]);
@@ -178,13 +152,13 @@ same_target(struct sk_buff **pskb,
178 NIPQUAD(new_ip)); 152 NIPQUAD(new_ip));
179 153
180 /* Transfer from original range. */ 154 /* Transfer from original range. */
181 newrange = ((struct ip_nat_range) 155 newrange = ((struct nf_nat_range)
182 { same->range[0].flags, new_ip, new_ip, 156 { same->range[0].flags, new_ip, new_ip,
183 /* FIXME: Use ports from correct range! */ 157 /* FIXME: Use ports from correct range! */
184 same->range[0].min, same->range[0].max }); 158 same->range[0].min, same->range[0].max });
185 159
186 /* Hand modified range to generic setup. */ 160 /* Hand modified range to generic setup. */
187 return ip_nat_setup_info(ct, &newrange, hooknum); 161 return nf_nat_setup_info(ct, &newrange, hooknum);
188} 162}
189 163
190static struct xt_target same_reg = { 164static struct xt_target same_reg = {
diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c
index cedf9f7d9d6e..0ad02f249837 100644
--- a/net/ipv4/netfilter/ipt_TOS.c
+++ b/net/ipv4/netfilter/ipt_TOS.c
@@ -29,13 +29,13 @@ target(struct sk_buff **pskb,
29 const void *targinfo) 29 const void *targinfo)
30{ 30{
31 const struct ipt_tos_target_info *tosinfo = targinfo; 31 const struct ipt_tos_target_info *tosinfo = targinfo;
32 struct iphdr *iph = (*pskb)->nh.iph; 32 struct iphdr *iph = ip_hdr(*pskb);
33 33
34 if ((iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) { 34 if ((iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) {
35 __u8 oldtos; 35 __u8 oldtos;
36 if (!skb_make_writable(pskb, sizeof(struct iphdr))) 36 if (!skb_make_writable(pskb, sizeof(struct iphdr)))
37 return NF_DROP; 37 return NF_DROP;
38 iph = (*pskb)->nh.iph; 38 iph = ip_hdr(*pskb);
39 oldtos = iph->tos; 39 oldtos = iph->tos;
40 iph->tos = (iph->tos & IPTOS_PREC_MASK) | tosinfo->tos; 40 iph->tos = (iph->tos & IPTOS_PREC_MASK) | tosinfo->tos;
41 nf_csum_replace2(&iph->check, htons(oldtos), htons(iph->tos)); 41 nf_csum_replace2(&iph->check, htons(oldtos), htons(iph->tos));
diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c
index 64be31c22ba9..a991ec7bd4e7 100644
--- a/net/ipv4/netfilter/ipt_TTL.c
+++ b/net/ipv4/netfilter/ipt_TTL.c
@@ -32,7 +32,7 @@ ipt_ttl_target(struct sk_buff **pskb,
32 if (!skb_make_writable(pskb, (*pskb)->len)) 32 if (!skb_make_writable(pskb, (*pskb)->len))
33 return NF_DROP; 33 return NF_DROP;
34 34
35 iph = (*pskb)->nh.iph; 35 iph = ip_hdr(*pskb);
36 36
37 switch (info->mode) { 37 switch (info->mode) {
38 case IPT_TTL_SET: 38 case IPT_TTL_SET:
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index 9acc018766f2..23b607b33b32 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -2,20 +2,6 @@
2 * netfilter module for userspace packet logging daemons 2 * netfilter module for userspace packet logging daemons
3 * 3 *
4 * (C) 2000-2004 by Harald Welte <laforge@netfilter.org> 4 * (C) 2000-2004 by Harald Welte <laforge@netfilter.org>
5 *
6 * 2000/09/22 ulog-cprange feature added
7 * 2001/01/04 in-kernel queue as proposed by Sebastian Zander
8 * <zander@fokus.gmd.de>
9 * 2001/01/30 per-rule nlgroup conflicts with global queue.
10 * nlgroup now global (sysctl)
11 * 2001/04/19 ulog-queue reworked, now fixed buffer size specified at
12 * module loadtime -HW
13 * 2002/07/07 remove broken nflog_rcv() function -HW
14 * 2002/08/29 fix shifted/unshifted nlgroup bug -HW
15 * 2002/10/30 fix uninitialized mac_len field - <Anders K. Pedersen>
16 * 2004/10/25 fix erroneous calculation of 'len' parameter to NLMSG_PUT
17 * resulting in bogus 'error during NLMSG_PUT' messages.
18 *
19 * (C) 1999-2001 Paul `Rusty' Russell 5 * (C) 1999-2001 Paul `Rusty' Russell
20 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> 6 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
21 * 7 *
@@ -42,8 +28,6 @@
42 * flushtimeout: 28 * flushtimeout:
43 * Specify, after how many hundredths of a second the queue should be 29 * Specify, after how many hundredths of a second the queue should be
44 * flushed even if it is not full yet. 30 * flushed even if it is not full yet.
45 *
46 * ipt_ULOG.c,v 1.22 2002/10/30 09:07:31 laforge Exp
47 */ 31 */
48 32
49#include <linux/module.h> 33#include <linux/module.h>
@@ -187,6 +171,7 @@ static void ipt_ulog_packet(unsigned int hooknum,
187 ulog_packet_msg_t *pm; 171 ulog_packet_msg_t *pm;
188 size_t size, copy_len; 172 size_t size, copy_len;
189 struct nlmsghdr *nlh; 173 struct nlmsghdr *nlh;
174 struct timeval tv;
190 175
191 /* ffs == find first bit set, necessary because userspace 176 /* ffs == find first bit set, necessary because userspace
192 * is already shifting groupnumber, but we need unshifted. 177 * is already shifting groupnumber, but we need unshifted.
@@ -232,13 +217,14 @@ static void ipt_ulog_packet(unsigned int hooknum,
232 pm = NLMSG_DATA(nlh); 217 pm = NLMSG_DATA(nlh);
233 218
234 /* We might not have a timestamp, get one */ 219 /* We might not have a timestamp, get one */
235 if (skb->tstamp.off_sec == 0) 220 if (skb->tstamp.tv64 == 0)
236 __net_timestamp((struct sk_buff *)skb); 221 __net_timestamp((struct sk_buff *)skb);
237 222
238 /* copy hook, prefix, timestamp, payload, etc. */ 223 /* copy hook, prefix, timestamp, payload, etc. */
239 pm->data_len = copy_len; 224 pm->data_len = copy_len;
240 put_unaligned(skb->tstamp.off_sec, &pm->timestamp_sec); 225 tv = ktime_to_timeval(skb->tstamp);
241 put_unaligned(skb->tstamp.off_usec, &pm->timestamp_usec); 226 put_unaligned(tv.tv_sec, &pm->timestamp_sec);
227 put_unaligned(tv.tv_usec, &pm->timestamp_usec);
242 put_unaligned(skb->mark, &pm->mark); 228 put_unaligned(skb->mark, &pm->mark);
243 pm->hook = hooknum; 229 pm->hook = hooknum;
244 if (prefix != NULL) 230 if (prefix != NULL)
@@ -249,9 +235,9 @@ static void ipt_ulog_packet(unsigned int hooknum,
249 *(pm->prefix) = '\0'; 235 *(pm->prefix) = '\0';
250 236
251 if (in && in->hard_header_len > 0 237 if (in && in->hard_header_len > 0
252 && skb->mac.raw != (void *) skb->nh.iph 238 && skb->mac_header != skb->network_header
253 && in->hard_header_len <= ULOG_MAC_LEN) { 239 && in->hard_header_len <= ULOG_MAC_LEN) {
254 memcpy(pm->mac, skb->mac.raw, in->hard_header_len); 240 memcpy(pm->mac, skb_mac_header(skb), in->hard_header_len);
255 pm->mac_len = in->hard_header_len; 241 pm->mac_len = in->hard_header_len;
256 } else 242 } else
257 pm->mac_len = 0; 243 pm->mac_len = 0;
@@ -363,12 +349,52 @@ static int ipt_ulog_checkentry(const char *tablename,
363 return 1; 349 return 1;
364} 350}
365 351
352#ifdef CONFIG_COMPAT
353struct compat_ipt_ulog_info {
354 compat_uint_t nl_group;
355 compat_size_t copy_range;
356 compat_size_t qthreshold;
357 char prefix[ULOG_PREFIX_LEN];
358};
359
360static void compat_from_user(void *dst, void *src)
361{
362 struct compat_ipt_ulog_info *cl = src;
363 struct ipt_ulog_info l = {
364 .nl_group = cl->nl_group,
365 .copy_range = cl->copy_range,
366 .qthreshold = cl->qthreshold,
367 };
368
369 memcpy(l.prefix, cl->prefix, sizeof(l.prefix));
370 memcpy(dst, &l, sizeof(l));
371}
372
373static int compat_to_user(void __user *dst, void *src)
374{
375 struct ipt_ulog_info *l = src;
376 struct compat_ipt_ulog_info cl = {
377 .nl_group = l->nl_group,
378 .copy_range = l->copy_range,
379 .qthreshold = l->qthreshold,
380 };
381
382 memcpy(cl.prefix, l->prefix, sizeof(cl.prefix));
383 return copy_to_user(dst, &cl, sizeof(cl)) ? -EFAULT : 0;
384}
385#endif /* CONFIG_COMPAT */
386
366static struct xt_target ipt_ulog_reg = { 387static struct xt_target ipt_ulog_reg = {
367 .name = "ULOG", 388 .name = "ULOG",
368 .family = AF_INET, 389 .family = AF_INET,
369 .target = ipt_ulog_target, 390 .target = ipt_ulog_target,
370 .targetsize = sizeof(struct ipt_ulog_info), 391 .targetsize = sizeof(struct ipt_ulog_info),
371 .checkentry = ipt_ulog_checkentry, 392 .checkentry = ipt_ulog_checkentry,
393#ifdef CONFIG_COMPAT
394 .compatsize = sizeof(struct compat_ipt_ulog_info),
395 .compat_from_user = compat_from_user,
396 .compat_to_user = compat_to_user,
397#endif
372 .me = THIS_MODULE, 398 .me = THIS_MODULE,
373}; 399};
374 400
@@ -390,14 +416,11 @@ static int __init ipt_ulog_init(void)
390 } 416 }
391 417
392 /* initialize ulog_buffers */ 418 /* initialize ulog_buffers */
393 for (i = 0; i < ULOG_MAXNLGROUPS; i++) { 419 for (i = 0; i < ULOG_MAXNLGROUPS; i++)
394 init_timer(&ulog_buffers[i].timer); 420 setup_timer(&ulog_buffers[i].timer, ulog_timer, i);
395 ulog_buffers[i].timer.function = ulog_timer;
396 ulog_buffers[i].timer.data = i;
397 }
398 421
399 nflognl = netlink_kernel_create(NETLINK_NFLOG, ULOG_MAXNLGROUPS, NULL, 422 nflognl = netlink_kernel_create(NETLINK_NFLOG, ULOG_MAXNLGROUPS, NULL,
400 THIS_MODULE); 423 NULL, THIS_MODULE);
401 if (!nflognl) 424 if (!nflognl)
402 return -ENOMEM; 425 return -ENOMEM;
403 426
diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c
index cfa0472617f6..a652a1451552 100644
--- a/net/ipv4/netfilter/ipt_addrtype.c
+++ b/net/ipv4/netfilter/ipt_addrtype.c
@@ -33,7 +33,7 @@ static int match(const struct sk_buff *skb,
33 int offset, unsigned int protoff, int *hotdrop) 33 int offset, unsigned int protoff, int *hotdrop)
34{ 34{
35 const struct ipt_addrtype_info *info = matchinfo; 35 const struct ipt_addrtype_info *info = matchinfo;
36 const struct iphdr *iph = skb->nh.iph; 36 const struct iphdr *iph = ip_hdr(skb);
37 int ret = 1; 37 int ret = 1;
38 38
39 if (info->source) 39 if (info->source)
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
index 37508b2cfea6..26218122f865 100644
--- a/net/ipv4/netfilter/ipt_ecn.c
+++ b/net/ipv4/netfilter/ipt_ecn.c
@@ -1,7 +1,5 @@
1/* IP tables module for matching the value of the IPv4 and TCP ECN bits 1/* IP tables module for matching the value of the IPv4 and TCP ECN bits
2 * 2 *
3 * ipt_ecn.c,v 1.3 2002/05/29 15:09:00 laforge Exp
4 *
5 * (C) 2002 by Harald Welte <laforge@gnumonks.org> 3 * (C) 2002 by Harald Welte <laforge@gnumonks.org>
6 * 4 *
7 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
@@ -11,6 +9,7 @@
11 9
12#include <linux/in.h> 10#include <linux/in.h>
13#include <linux/ip.h> 11#include <linux/ip.h>
12#include <net/ip.h>
14#include <linux/module.h> 13#include <linux/module.h>
15#include <linux/skbuff.h> 14#include <linux/skbuff.h>
16#include <linux/tcp.h> 15#include <linux/tcp.h>
@@ -26,7 +25,7 @@ MODULE_LICENSE("GPL");
26static inline int match_ip(const struct sk_buff *skb, 25static inline int match_ip(const struct sk_buff *skb,
27 const struct ipt_ecn_info *einfo) 26 const struct ipt_ecn_info *einfo)
28{ 27{
29 return ((skb->nh.iph->tos&IPT_ECN_IP_MASK) == einfo->ip_ect); 28 return (ip_hdr(skb)->tos & IPT_ECN_IP_MASK) == einfo->ip_ect;
30} 29}
31 30
32static inline int match_tcp(const struct sk_buff *skb, 31static inline int match_tcp(const struct sk_buff *skb,
@@ -38,8 +37,7 @@ static inline int match_tcp(const struct sk_buff *skb,
38 /* In practice, TCP match does this, so can't fail. But let's 37 /* In practice, TCP match does this, so can't fail. But let's
39 * be good citizens. 38 * be good citizens.
40 */ 39 */
41 th = skb_header_pointer(skb, skb->nh.iph->ihl * 4, 40 th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
42 sizeof(_tcph), &_tcph);
43 if (th == NULL) { 41 if (th == NULL) {
44 *hotdrop = 0; 42 *hotdrop = 0;
45 return 0; 43 return 0;
@@ -80,7 +78,7 @@ static int match(const struct sk_buff *skb,
80 return 0; 78 return 0;
81 79
82 if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR)) { 80 if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR)) {
83 if (skb->nh.iph->protocol != IPPROTO_TCP) 81 if (ip_hdr(skb)->protocol != IPPROTO_TCP)
84 return 0; 82 return 0;
85 if (!match_tcp(skb, info, hotdrop)) 83 if (!match_tcp(skb, info, hotdrop))
86 return 0; 84 return 0;
diff --git a/net/ipv4/netfilter/ipt_iprange.c b/net/ipv4/netfilter/ipt_iprange.c
index bc5d5e6091e4..33af9e940887 100644
--- a/net/ipv4/netfilter/ipt_iprange.c
+++ b/net/ipv4/netfilter/ipt_iprange.c
@@ -32,7 +32,7 @@ match(const struct sk_buff *skb,
32 int offset, unsigned int protoff, int *hotdrop) 32 int offset, unsigned int protoff, int *hotdrop)
33{ 33{
34 const struct ipt_iprange_info *info = matchinfo; 34 const struct ipt_iprange_info *info = matchinfo;
35 const struct iphdr *iph = skb->nh.iph; 35 const struct iphdr *iph = ip_hdr(skb);
36 36
37 if (info->flags & IPRANGE_SRC) { 37 if (info->flags & IPRANGE_SRC) {
38 if (((ntohl(iph->saddr) < ntohl(info->src.min_ip)) 38 if (((ntohl(iph->saddr) < ntohl(info->src.min_ip))
diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c
index aecb9c48e152..15a9e8bbb7cc 100644
--- a/net/ipv4/netfilter/ipt_recent.c
+++ b/net/ipv4/netfilter/ipt_recent.c
@@ -183,11 +183,11 @@ ipt_recent_match(const struct sk_buff *skb,
183 int ret = info->invert; 183 int ret = info->invert;
184 184
185 if (info->side == IPT_RECENT_DEST) 185 if (info->side == IPT_RECENT_DEST)
186 addr = skb->nh.iph->daddr; 186 addr = ip_hdr(skb)->daddr;
187 else 187 else
188 addr = skb->nh.iph->saddr; 188 addr = ip_hdr(skb)->saddr;
189 189
190 ttl = skb->nh.iph->ttl; 190 ttl = ip_hdr(skb)->ttl;
191 /* use TTL as seen before forwarding */ 191 /* use TTL as seen before forwarding */
192 if (out && !skb->sk) 192 if (out && !skb->sk)
193 ttl++; 193 ttl++;
diff --git a/net/ipv4/netfilter/ipt_tos.c b/net/ipv4/netfilter/ipt_tos.c
index 5d33b51d49d8..d314844af12b 100644
--- a/net/ipv4/netfilter/ipt_tos.c
+++ b/net/ipv4/netfilter/ipt_tos.c
@@ -30,7 +30,7 @@ match(const struct sk_buff *skb,
30{ 30{
31 const struct ipt_tos_info *info = matchinfo; 31 const struct ipt_tos_info *info = matchinfo;
32 32
33 return (skb->nh.iph->tos == info->tos) ^ info->invert; 33 return (ip_hdr(skb)->tos == info->tos) ^ info->invert;
34} 34}
35 35
36static struct xt_match tos_match = { 36static struct xt_match tos_match = {
diff --git a/net/ipv4/netfilter/ipt_ttl.c b/net/ipv4/netfilter/ipt_ttl.c
index 1eca9f400374..ab02d9e3139c 100644
--- a/net/ipv4/netfilter/ipt_ttl.c
+++ b/net/ipv4/netfilter/ipt_ttl.c
@@ -1,7 +1,5 @@
1/* IP tables module for matching the value of the TTL 1/* IP tables module for matching the value of the TTL
2 * 2 *
3 * ipt_ttl.c,v 1.5 2000/11/13 11:16:08 laforge Exp
4 *
5 * (C) 2000,2001 by Harald Welte <laforge@netfilter.org> 3 * (C) 2000,2001 by Harald Welte <laforge@netfilter.org>
6 * 4 *
7 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
@@ -26,19 +24,20 @@ static int match(const struct sk_buff *skb,
26 int offset, unsigned int protoff, int *hotdrop) 24 int offset, unsigned int protoff, int *hotdrop)
27{ 25{
28 const struct ipt_ttl_info *info = matchinfo; 26 const struct ipt_ttl_info *info = matchinfo;
27 const u8 ttl = ip_hdr(skb)->ttl;
29 28
30 switch (info->mode) { 29 switch (info->mode) {
31 case IPT_TTL_EQ: 30 case IPT_TTL_EQ:
32 return (skb->nh.iph->ttl == info->ttl); 31 return (ttl == info->ttl);
33 break; 32 break;
34 case IPT_TTL_NE: 33 case IPT_TTL_NE:
35 return (!(skb->nh.iph->ttl == info->ttl)); 34 return (!(ttl == info->ttl));
36 break; 35 break;
37 case IPT_TTL_LT: 36 case IPT_TTL_LT:
38 return (skb->nh.iph->ttl < info->ttl); 37 return (ttl < info->ttl);
39 break; 38 break;
40 case IPT_TTL_GT: 39 case IPT_TTL_GT:
41 return (skb->nh.iph->ttl > info->ttl); 40 return (ttl > info->ttl);
42 break; 41 break;
43 default: 42 default:
44 printk(KERN_WARNING "ipt_ttl: unknown mode %d\n", 43 printk(KERN_WARNING "ipt_ttl: unknown mode %d\n",
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index d1d61e97b976..42728909eba0 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -13,6 +13,7 @@
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/moduleparam.h> 14#include <linux/moduleparam.h>
15#include <linux/netfilter_ipv4/ip_tables.h> 15#include <linux/netfilter_ipv4/ip_tables.h>
16#include <net/ip.h>
16 17
17MODULE_LICENSE("GPL"); 18MODULE_LICENSE("GPL");
18MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); 19MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
@@ -102,7 +103,7 @@ ipt_local_out_hook(unsigned int hook,
102{ 103{
103 /* root is playing with raw sockets. */ 104 /* root is playing with raw sockets. */
104 if ((*pskb)->len < sizeof(struct iphdr) 105 if ((*pskb)->len < sizeof(struct iphdr)
105 || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) { 106 || ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
106 if (net_ratelimit()) 107 if (net_ratelimit())
107 printk("ipt_hook: happy cracking.\n"); 108 printk("ipt_hook: happy cracking.\n");
108 return NF_ACCEPT; 109 return NF_ACCEPT;
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 98b66ef0c714..9278802f2742 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -7,8 +7,6 @@
7 * This program is free software; you can redistribute it and/or modify 7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as 8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation. 9 * published by the Free Software Foundation.
10 *
11 * Extended to all five netfilter hooks by Brad Chapman & Harald Welte
12 */ 10 */
13#include <linux/module.h> 11#include <linux/module.h>
14#include <linux/netfilter_ipv4/ip_tables.h> 12#include <linux/netfilter_ipv4/ip_tables.h>
@@ -17,6 +15,7 @@
17#include <net/sock.h> 15#include <net/sock.h>
18#include <net/route.h> 16#include <net/route.h>
19#include <linux/ip.h> 17#include <linux/ip.h>
18#include <net/ip.h>
20 19
21MODULE_LICENSE("GPL"); 20MODULE_LICENSE("GPL");
22MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); 21MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
@@ -130,13 +129,14 @@ ipt_local_hook(unsigned int hook,
130 int (*okfn)(struct sk_buff *)) 129 int (*okfn)(struct sk_buff *))
131{ 130{
132 unsigned int ret; 131 unsigned int ret;
132 const struct iphdr *iph;
133 u_int8_t tos; 133 u_int8_t tos;
134 __be32 saddr, daddr; 134 __be32 saddr, daddr;
135 u_int32_t mark; 135 u_int32_t mark;
136 136
137 /* root is playing with raw sockets. */ 137 /* root is playing with raw sockets. */
138 if ((*pskb)->len < sizeof(struct iphdr) 138 if ((*pskb)->len < sizeof(struct iphdr)
139 || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) { 139 || ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
140 if (net_ratelimit()) 140 if (net_ratelimit())
141 printk("ipt_hook: happy cracking.\n"); 141 printk("ipt_hook: happy cracking.\n");
142 return NF_ACCEPT; 142 return NF_ACCEPT;
@@ -144,19 +144,23 @@ ipt_local_hook(unsigned int hook,
144 144
145 /* Save things which could affect route */ 145 /* Save things which could affect route */
146 mark = (*pskb)->mark; 146 mark = (*pskb)->mark;
147 saddr = (*pskb)->nh.iph->saddr; 147 iph = ip_hdr(*pskb);
148 daddr = (*pskb)->nh.iph->daddr; 148 saddr = iph->saddr;
149 tos = (*pskb)->nh.iph->tos; 149 daddr = iph->daddr;
150 tos = iph->tos;
150 151
151 ret = ipt_do_table(pskb, hook, in, out, &packet_mangler); 152 ret = ipt_do_table(pskb, hook, in, out, &packet_mangler);
152 /* Reroute for ANY change. */ 153 /* Reroute for ANY change. */
153 if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE 154 if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE) {
154 && ((*pskb)->nh.iph->saddr != saddr 155 iph = ip_hdr(*pskb);
155 || (*pskb)->nh.iph->daddr != daddr 156
156 || (*pskb)->mark != mark 157 if (iph->saddr != saddr ||
157 || (*pskb)->nh.iph->tos != tos)) 158 iph->daddr != daddr ||
158 if (ip_route_me_harder(pskb, RTN_UNSPEC)) 159 (*pskb)->mark != mark ||
159 ret = NF_DROP; 160 iph->tos != tos)
161 if (ip_route_me_harder(pskb, RTN_UNSPEC))
162 ret = NF_DROP;
163 }
160 164
161 return ret; 165 return ret;
162} 166}
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 8f3e92d20df8..0654eaae70c9 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -4,14 +4,6 @@
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as 5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation. 6 * published by the Free Software Foundation.
7 *
8 * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
9 * - move L3 protocol dependent part to this file.
10 * 23 Mar 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
11 * - add get_features() to support various size of conntrack
12 * structures.
13 *
14 * Derived from net/ipv4/netfilter/ip_conntrack_standalone.c
15 */ 7 */
16 8
17#include <linux/types.h> 9#include <linux/types.h>
@@ -87,7 +79,7 @@ nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
87 local_bh_enable(); 79 local_bh_enable();
88 80
89 if (skb) 81 if (skb)
90 ip_send_check(skb->nh.iph); 82 ip_send_check(ip_hdr(skb));
91 83
92 return skb; 84 return skb;
93} 85}
@@ -97,16 +89,16 @@ ipv4_prepare(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff,
97 u_int8_t *protonum) 89 u_int8_t *protonum)
98{ 90{
99 /* Never happen */ 91 /* Never happen */
100 if ((*pskb)->nh.iph->frag_off & htons(IP_OFFSET)) { 92 if (ip_hdr(*pskb)->frag_off & htons(IP_OFFSET)) {
101 if (net_ratelimit()) { 93 if (net_ratelimit()) {
102 printk(KERN_ERR "ipv4_prepare: Frag of proto %u (hook=%u)\n", 94 printk(KERN_ERR "ipv4_prepare: Frag of proto %u (hook=%u)\n",
103 (*pskb)->nh.iph->protocol, hooknum); 95 ip_hdr(*pskb)->protocol, hooknum);
104 } 96 }
105 return -NF_DROP; 97 return -NF_DROP;
106 } 98 }
107 99
108 *dataoff = (*pskb)->nh.raw - (*pskb)->data + (*pskb)->nh.iph->ihl*4; 100 *dataoff = skb_network_offset(*pskb) + ip_hdrlen(*pskb);
109 *protonum = (*pskb)->nh.iph->protocol; 101 *protonum = ip_hdr(*pskb)->protocol;
110 102
111 return NF_ACCEPT; 103 return NF_ACCEPT;
112} 104}
@@ -152,9 +144,8 @@ static unsigned int ipv4_conntrack_help(unsigned int hooknum,
152 return NF_ACCEPT; 144 return NF_ACCEPT;
153 145
154 return help->helper->help(pskb, 146 return help->helper->help(pskb,
155 (*pskb)->nh.raw - (*pskb)->data 147 skb_network_offset(*pskb) + ip_hdrlen(*pskb),
156 + (*pskb)->nh.iph->ihl*4, 148 ct, ctinfo);
157 ct, ctinfo);
158} 149}
159 150
160static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, 151static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
@@ -171,7 +162,7 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
171#endif 162#endif
172 163
173 /* Gather fragments. */ 164 /* Gather fragments. */
174 if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) { 165 if (ip_hdr(*pskb)->frag_off & htons(IP_MF | IP_OFFSET)) {
175 *pskb = nf_ct_ipv4_gather_frags(*pskb, 166 *pskb = nf_ct_ipv4_gather_frags(*pskb,
176 hooknum == NF_IP_PRE_ROUTING ? 167 hooknum == NF_IP_PRE_ROUTING ?
177 IP_DEFRAG_CONNTRACK_IN : 168 IP_DEFRAG_CONNTRACK_IN :
@@ -199,7 +190,7 @@ static unsigned int ipv4_conntrack_local(unsigned int hooknum,
199{ 190{
200 /* root is playing with raw sockets. */ 191 /* root is playing with raw sockets. */
201 if ((*pskb)->len < sizeof(struct iphdr) 192 if ((*pskb)->len < sizeof(struct iphdr)
202 || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) { 193 || ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
203 if (net_ratelimit()) 194 if (net_ratelimit())
204 printk("ipt_hook: happy cracking.\n"); 195 printk("ipt_hook: happy cracking.\n");
205 return NF_ACCEPT; 196 return NF_ACCEPT;
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 5fd1e5363c1a..f4fc657c1983 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -4,11 +4,6 @@
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as 5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation. 6 * published by the Free Software Foundation.
7 *
8 * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
9 * - enable working with Layer 3 protocol independent connection tracking.
10 *
11 * Derived from net/ipv4/netfilter/ip_conntrack_proto_icmp.c
12 */ 7 */
13 8
14#include <linux/types.h> 9#include <linux/types.h>
@@ -158,7 +153,7 @@ icmp_error_message(struct sk_buff *skb,
158 NF_CT_ASSERT(skb->nfct == NULL); 153 NF_CT_ASSERT(skb->nfct == NULL);
159 154
160 /* Not enough header? */ 155 /* Not enough header? */
161 inside = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_in), &_in); 156 inside = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_in), &_in);
162 if (inside == NULL) 157 if (inside == NULL)
163 return -NF_ACCEPT; 158 return -NF_ACCEPT;
164 159
@@ -172,7 +167,7 @@ icmp_error_message(struct sk_buff *skb,
172 /* rcu_read_lock()ed by nf_hook_slow */ 167 /* rcu_read_lock()ed by nf_hook_slow */
173 innerproto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol); 168 innerproto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol);
174 169
175 dataoff = skb->nh.iph->ihl*4 + sizeof(inside->icmp); 170 dataoff = ip_hdrlen(skb) + sizeof(inside->icmp);
176 /* Are they talking about one of our connections? */ 171 /* Are they talking about one of our connections? */
177 if (!nf_ct_get_tuple(skb, dataoff, dataoff + inside->ip.ihl*4, PF_INET, 172 if (!nf_ct_get_tuple(skb, dataoff, dataoff + inside->ip.ihl*4, PF_INET,
178 inside->ip.protocol, &origtuple, 173 inside->ip.protocol, &origtuple,
@@ -227,7 +222,7 @@ icmp_error(struct sk_buff *skb, unsigned int dataoff,
227 struct icmphdr _ih, *icmph; 222 struct icmphdr _ih, *icmph;
228 223
229 /* Not enough header? */ 224 /* Not enough header? */
230 icmph = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_ih), &_ih); 225 icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih);
231 if (icmph == NULL) { 226 if (icmph == NULL) {
232 if (LOG_INVALID(IPPROTO_ICMP)) 227 if (LOG_INVALID(IPPROTO_ICMP))
233 nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, 228 nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 452e9d326684..ea02f00d2dac 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -431,7 +431,7 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
431 } *inside; 431 } *inside;
432 struct nf_conntrack_l4proto *l4proto; 432 struct nf_conntrack_l4proto *l4proto;
433 struct nf_conntrack_tuple inner, target; 433 struct nf_conntrack_tuple inner, target;
434 int hdrlen = (*pskb)->nh.iph->ihl * 4; 434 int hdrlen = ip_hdrlen(*pskb);
435 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); 435 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
436 unsigned long statusbit; 436 unsigned long statusbit;
437 enum nf_nat_manip_type manip = HOOK2MANIP(hooknum); 437 enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
@@ -439,7 +439,7 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
439 if (!skb_make_writable(pskb, hdrlen + sizeof(*inside))) 439 if (!skb_make_writable(pskb, hdrlen + sizeof(*inside)))
440 return 0; 440 return 0;
441 441
442 inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4; 442 inside = (void *)(*pskb)->data + ip_hdrlen(*pskb);
443 443
444 /* We're actually going to mangle it beyond trivial checksum 444 /* We're actually going to mangle it beyond trivial checksum
445 adjustment, so make sure the current checksum is correct. */ 445 adjustment, so make sure the current checksum is correct. */
@@ -469,9 +469,9 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
469 l4proto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol); 469 l4proto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol);
470 470
471 if (!nf_ct_get_tuple(*pskb, 471 if (!nf_ct_get_tuple(*pskb,
472 (*pskb)->nh.iph->ihl*4 + sizeof(struct icmphdr), 472 ip_hdrlen(*pskb) + sizeof(struct icmphdr),
473 (*pskb)->nh.iph->ihl*4 + 473 (ip_hdrlen(*pskb) +
474 sizeof(struct icmphdr) + inside->ip.ihl*4, 474 sizeof(struct icmphdr) + inside->ip.ihl * 4),
475 (u_int16_t)AF_INET, 475 (u_int16_t)AF_INET,
476 inside->ip.protocol, 476 inside->ip.protocol,
477 &inner, l3proto, l4proto)) 477 &inner, l3proto, l4proto))
@@ -483,14 +483,14 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
483 packet: PREROUTING (DST manip), routing produces ICMP, goes 483 packet: PREROUTING (DST manip), routing produces ICMP, goes
484 through POSTROUTING (which must correct the DST manip). */ 484 through POSTROUTING (which must correct the DST manip). */
485 if (!manip_pkt(inside->ip.protocol, pskb, 485 if (!manip_pkt(inside->ip.protocol, pskb,
486 (*pskb)->nh.iph->ihl*4 + sizeof(inside->icmp), 486 ip_hdrlen(*pskb) + sizeof(inside->icmp),
487 &ct->tuplehash[!dir].tuple, 487 &ct->tuplehash[!dir].tuple,
488 !manip)) 488 !manip))
489 return 0; 489 return 0;
490 490
491 if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) { 491 if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
492 /* Reloading "inside" here since manip_pkt inner. */ 492 /* Reloading "inside" here since manip_pkt inner. */
493 inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4; 493 inside = (void *)(*pskb)->data + ip_hdrlen(*pskb);
494 inside->icmp.checksum = 0; 494 inside->icmp.checksum = 0;
495 inside->icmp.checksum = 495 inside->icmp.checksum =
496 csum_fold(skb_checksum(*pskb, hdrlen, 496 csum_fold(skb_checksum(*pskb, hdrlen,
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index 9cbf3f9be13b..fcebc968d37f 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -33,7 +33,7 @@ static int set_addr(struct sk_buff **pskb,
33 unsigned int addroff, __be32 ip, __be16 port) 33 unsigned int addroff, __be32 ip, __be16 port)
34{ 34{
35 enum ip_conntrack_info ctinfo; 35 enum ip_conntrack_info ctinfo;
36 struct nf_conn *ct = ip_conntrack_get(*pskb, &ctinfo); 36 struct nf_conn *ct = nf_ct_get(*pskb, &ctinfo);
37 struct { 37 struct {
38 __be32 ip; 38 __be32 ip;
39 __be16 port; 39 __be16 port;
@@ -44,7 +44,7 @@ static int set_addr(struct sk_buff **pskb,
44 buf.port = port; 44 buf.port = port;
45 addroff += dataoff; 45 addroff += dataoff;
46 46
47 if ((*pskb)->nh.iph->protocol == IPPROTO_TCP) { 47 if (ip_hdr(*pskb)->protocol == IPPROTO_TCP) {
48 if (!nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, 48 if (!nf_nat_mangle_tcp_packet(pskb, ct, ctinfo,
49 addroff, sizeof(buf), 49 addroff, sizeof(buf),
50 (char *) &buf, sizeof(buf))) { 50 (char *) &buf, sizeof(buf))) {
@@ -55,11 +55,11 @@ static int set_addr(struct sk_buff **pskb,
55 } 55 }
56 56
57 /* Relocate data pointer */ 57 /* Relocate data pointer */
58 th = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl * 4, 58 th = skb_header_pointer(*pskb, ip_hdrlen(*pskb),
59 sizeof(_tcph), &_tcph); 59 sizeof(_tcph), &_tcph);
60 if (th == NULL) 60 if (th == NULL)
61 return -1; 61 return -1;
62 *data = (*pskb)->data + (*pskb)->nh.iph->ihl * 4 + 62 *data = (*pskb)->data + ip_hdrlen(*pskb) +
63 th->doff * 4 + dataoff; 63 th->doff * 4 + dataoff;
64 } else { 64 } else {
65 if (!nf_nat_mangle_udp_packet(pskb, ct, ctinfo, 65 if (!nf_nat_mangle_udp_packet(pskb, ct, ctinfo,
@@ -73,8 +73,8 @@ static int set_addr(struct sk_buff **pskb,
73 /* nf_nat_mangle_udp_packet uses skb_make_writable() to copy 73 /* nf_nat_mangle_udp_packet uses skb_make_writable() to copy
74 * or pull everything in a linear buffer, so we can safely 74 * or pull everything in a linear buffer, so we can safely
75 * use the skb pointers now */ 75 * use the skb pointers now */
76 *data = (*pskb)->data + (*pskb)->nh.iph->ihl * 4 + 76 *data = ((*pskb)->data + ip_hdrlen(*pskb) +
77 sizeof(struct udphdr); 77 sizeof(struct udphdr));
78 } 78 }
79 79
80 return 0; 80 return 0;
@@ -383,7 +383,7 @@ static int nat_h245(struct sk_buff **pskb, struct nf_conn *ct,
383static void ip_nat_q931_expect(struct nf_conn *new, 383static void ip_nat_q931_expect(struct nf_conn *new,
384 struct nf_conntrack_expect *this) 384 struct nf_conntrack_expect *this)
385{ 385{
386 struct ip_nat_range range; 386 struct nf_nat_range range;
387 387
388 if (this->tuple.src.u3.ip != 0) { /* Only accept calls from GK */ 388 if (this->tuple.src.u3.ip != 0) { /* Only accept calls from GK */
389 nf_nat_follow_master(new, this); 389 nf_nat_follow_master(new, this);
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
index 49a90c39ffce..15b6e5ce3a04 100644
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -87,12 +87,13 @@ static void mangle_contents(struct sk_buff *skb,
87 unsigned char *data; 87 unsigned char *data;
88 88
89 BUG_ON(skb_is_nonlinear(skb)); 89 BUG_ON(skb_is_nonlinear(skb));
90 data = (unsigned char *)skb->nh.iph + dataoff; 90 data = skb_network_header(skb) + dataoff;
91 91
92 /* move post-replacement */ 92 /* move post-replacement */
93 memmove(data + match_offset + rep_len, 93 memmove(data + match_offset + rep_len,
94 data + match_offset + match_len, 94 data + match_offset + match_len,
95 skb->tail - (data + match_offset + match_len)); 95 skb->tail - (skb->network_header + dataoff +
96 match_offset + match_len));
96 97
97 /* insert data from buffer */ 98 /* insert data from buffer */
98 memcpy(data + match_offset, rep_buffer, rep_len); 99 memcpy(data + match_offset, rep_buffer, rep_len);
@@ -111,8 +112,8 @@ static void mangle_contents(struct sk_buff *skb,
111 } 112 }
112 113
113 /* fix IP hdr checksum information */ 114 /* fix IP hdr checksum information */
114 skb->nh.iph->tot_len = htons(skb->len); 115 ip_hdr(skb)->tot_len = htons(skb->len);
115 ip_send_check(skb->nh.iph); 116 ip_send_check(ip_hdr(skb));
116} 117}
117 118
118/* Unusual, but possible case. */ 119/* Unusual, but possible case. */
@@ -152,6 +153,7 @@ nf_nat_mangle_tcp_packet(struct sk_buff **pskb,
152 const char *rep_buffer, 153 const char *rep_buffer,
153 unsigned int rep_len) 154 unsigned int rep_len)
154{ 155{
156 struct rtable *rt = (struct rtable *)(*pskb)->dst;
155 struct iphdr *iph; 157 struct iphdr *iph;
156 struct tcphdr *tcph; 158 struct tcphdr *tcph;
157 int oldlen, datalen; 159 int oldlen, datalen;
@@ -166,7 +168,7 @@ nf_nat_mangle_tcp_packet(struct sk_buff **pskb,
166 168
167 SKB_LINEAR_ASSERT(*pskb); 169 SKB_LINEAR_ASSERT(*pskb);
168 170
169 iph = (*pskb)->nh.iph; 171 iph = ip_hdr(*pskb);
170 tcph = (void *)iph + iph->ihl*4; 172 tcph = (void *)iph + iph->ihl*4;
171 173
172 oldlen = (*pskb)->len - iph->ihl*4; 174 oldlen = (*pskb)->len - iph->ihl*4;
@@ -175,11 +177,22 @@ nf_nat_mangle_tcp_packet(struct sk_buff **pskb,
175 177
176 datalen = (*pskb)->len - iph->ihl*4; 178 datalen = (*pskb)->len - iph->ihl*4;
177 if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) { 179 if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
178 tcph->check = 0; 180 if (!(rt->rt_flags & RTCF_LOCAL) &&
179 tcph->check = tcp_v4_check(datalen, 181 (*pskb)->dev->features & NETIF_F_ALL_CSUM) {
180 iph->saddr, iph->daddr, 182 (*pskb)->ip_summed = CHECKSUM_PARTIAL;
181 csum_partial((char *)tcph, 183 (*pskb)->csum_start = skb_headroom(*pskb) +
182 datalen, 0)); 184 skb_network_offset(*pskb) +
185 iph->ihl * 4;
186 (*pskb)->csum_offset = offsetof(struct tcphdr, check);
187 tcph->check = ~tcp_v4_check(datalen,
188 iph->saddr, iph->daddr, 0);
189 } else {
190 tcph->check = 0;
191 tcph->check = tcp_v4_check(datalen,
192 iph->saddr, iph->daddr,
193 csum_partial((char *)tcph,
194 datalen, 0));
195 }
183 } else 196 } else
184 nf_proto_csum_replace2(&tcph->check, *pskb, 197 nf_proto_csum_replace2(&tcph->check, *pskb,
185 htons(oldlen), htons(datalen), 1); 198 htons(oldlen), htons(datalen), 1);
@@ -190,7 +203,7 @@ nf_nat_mangle_tcp_packet(struct sk_buff **pskb,
190 (int)rep_len - (int)match_len, 203 (int)rep_len - (int)match_len,
191 ct, ctinfo); 204 ct, ctinfo);
192 /* Tell TCP window tracking about seq change */ 205 /* Tell TCP window tracking about seq change */
193 nf_conntrack_tcp_update(*pskb, (*pskb)->nh.iph->ihl*4, 206 nf_conntrack_tcp_update(*pskb, ip_hdrlen(*pskb),
194 ct, CTINFO2DIR(ctinfo)); 207 ct, CTINFO2DIR(ctinfo));
195 } 208 }
196 return 1; 209 return 1;
@@ -216,12 +229,13 @@ nf_nat_mangle_udp_packet(struct sk_buff **pskb,
216 const char *rep_buffer, 229 const char *rep_buffer,
217 unsigned int rep_len) 230 unsigned int rep_len)
218{ 231{
232 struct rtable *rt = (struct rtable *)(*pskb)->dst;
219 struct iphdr *iph; 233 struct iphdr *iph;
220 struct udphdr *udph; 234 struct udphdr *udph;
221 int datalen, oldlen; 235 int datalen, oldlen;
222 236
223 /* UDP helpers might accidentally mangle the wrong packet */ 237 /* UDP helpers might accidentally mangle the wrong packet */
224 iph = (*pskb)->nh.iph; 238 iph = ip_hdr(*pskb);
225 if ((*pskb)->len < iph->ihl*4 + sizeof(*udph) + 239 if ((*pskb)->len < iph->ihl*4 + sizeof(*udph) +
226 match_offset + match_len) 240 match_offset + match_len)
227 return 0; 241 return 0;
@@ -234,7 +248,7 @@ nf_nat_mangle_udp_packet(struct sk_buff **pskb,
234 !enlarge_skb(pskb, rep_len - match_len)) 248 !enlarge_skb(pskb, rep_len - match_len))
235 return 0; 249 return 0;
236 250
237 iph = (*pskb)->nh.iph; 251 iph = ip_hdr(*pskb);
238 udph = (void *)iph + iph->ihl*4; 252 udph = (void *)iph + iph->ihl*4;
239 253
240 oldlen = (*pskb)->len - iph->ihl*4; 254 oldlen = (*pskb)->len - iph->ihl*4;
@@ -250,13 +264,25 @@ nf_nat_mangle_udp_packet(struct sk_buff **pskb,
250 return 1; 264 return 1;
251 265
252 if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) { 266 if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
253 udph->check = 0; 267 if (!(rt->rt_flags & RTCF_LOCAL) &&
254 udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr, 268 (*pskb)->dev->features & NETIF_F_ALL_CSUM) {
255 datalen, IPPROTO_UDP, 269 (*pskb)->ip_summed = CHECKSUM_PARTIAL;
256 csum_partial((char *)udph, 270 (*pskb)->csum_start = skb_headroom(*pskb) +
257 datalen, 0)); 271 skb_network_offset(*pskb) +
258 if (!udph->check) 272 iph->ihl * 4;
259 udph->check = CSUM_MANGLED_0; 273 (*pskb)->csum_offset = offsetof(struct udphdr, check);
274 udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
275 datalen, IPPROTO_UDP,
276 0);
277 } else {
278 udph->check = 0;
279 udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
280 datalen, IPPROTO_UDP,
281 csum_partial((char *)udph,
282 datalen, 0));
283 if (!udph->check)
284 udph->check = CSUM_MANGLED_0;
285 }
260 } else 286 } else
261 nf_proto_csum_replace2(&udph->check, *pskb, 287 nf_proto_csum_replace2(&udph->check, *pskb,
262 htons(oldlen), htons(datalen), 1); 288 htons(oldlen), htons(datalen), 1);
@@ -318,8 +344,8 @@ nf_nat_sack_adjust(struct sk_buff **pskb,
318 unsigned int dir, optoff, optend; 344 unsigned int dir, optoff, optend;
319 struct nf_conn_nat *nat = nfct_nat(ct); 345 struct nf_conn_nat *nat = nfct_nat(ct);
320 346
321 optoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct tcphdr); 347 optoff = ip_hdrlen(*pskb) + sizeof(struct tcphdr);
322 optend = (*pskb)->nh.iph->ihl*4 + tcph->doff*4; 348 optend = ip_hdrlen(*pskb) + tcph->doff * 4;
323 349
324 if (!skb_make_writable(pskb, optend)) 350 if (!skb_make_writable(pskb, optend))
325 return 0; 351 return 0;
@@ -371,10 +397,10 @@ nf_nat_seq_adjust(struct sk_buff **pskb,
371 this_way = &nat->info.seq[dir]; 397 this_way = &nat->info.seq[dir];
372 other_way = &nat->info.seq[!dir]; 398 other_way = &nat->info.seq[!dir];
373 399
374 if (!skb_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph))) 400 if (!skb_make_writable(pskb, ip_hdrlen(*pskb) + sizeof(*tcph)))
375 return 0; 401 return 0;
376 402
377 tcph = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4; 403 tcph = (void *)(*pskb)->data + ip_hdrlen(*pskb);
378 if (after(ntohl(tcph->seq), this_way->correction_pos)) 404 if (after(ntohl(tcph->seq), this_way->correction_pos))
379 newseq = htonl(ntohl(tcph->seq) + this_way->offset_after); 405 newseq = htonl(ntohl(tcph->seq) + this_way->offset_after);
380 else 406 else
@@ -399,7 +425,7 @@ nf_nat_seq_adjust(struct sk_buff **pskb,
399 if (!nf_nat_sack_adjust(pskb, tcph, ct, ctinfo)) 425 if (!nf_nat_sack_adjust(pskb, tcph, ct, ctinfo))
400 return 0; 426 return 0;
401 427
402 nf_conntrack_tcp_update(*pskb, (*pskb)->nh.iph->ihl*4, ct, dir); 428 nf_conntrack_tcp_update(*pskb, ip_hdrlen(*pskb), ct, dir);
403 429
404 return 1; 430 return 1;
405} 431}
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index 7ba341c22eaa..a66888749ceb 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -53,7 +53,7 @@ static void pptp_nat_expected(struct nf_conn *ct,
53 struct nf_conntrack_tuple t; 53 struct nf_conntrack_tuple t;
54 struct nf_ct_pptp_master *ct_pptp_info; 54 struct nf_ct_pptp_master *ct_pptp_info;
55 struct nf_nat_pptp *nat_pptp_info; 55 struct nf_nat_pptp *nat_pptp_info;
56 struct ip_nat_range range; 56 struct nf_nat_range range;
57 57
58 ct_pptp_info = &nfct_help(master)->help.ct_pptp_info; 58 ct_pptp_info = &nfct_help(master)->help.ct_pptp_info;
59 nat_pptp_info = &nfct_nat(master)->help.nat_pptp_info; 59 nat_pptp_info = &nfct_nat(master)->help.nat_pptp_info;
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index 147a4370cf03..2a283397a8b6 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -191,7 +191,7 @@ static unsigned int ipt_dnat_target(struct sk_buff **pskb,
191 191
192 if (hooknum == NF_IP_LOCAL_OUT && 192 if (hooknum == NF_IP_LOCAL_OUT &&
193 mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) 193 mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)
194 warn_if_extra_mangle((*pskb)->nh.iph->daddr, 194 warn_if_extra_mangle(ip_hdr(*pskb)->daddr,
195 mr->range[0].min_ip); 195 mr->range[0].min_ip);
196 196
197 return nf_nat_setup_info(ct, &mr->range[0], hooknum); 197 return nf_nat_setup_info(ct, &mr->range[0], hooknum);
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
index b12cd7c314ca..bfd88e4e0685 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -11,6 +11,7 @@
11#include <linux/module.h> 11#include <linux/module.h>
12#include <linux/skbuff.h> 12#include <linux/skbuff.h>
13#include <linux/ip.h> 13#include <linux/ip.h>
14#include <net/ip.h>
14#include <linux/udp.h> 15#include <linux/udp.h>
15 16
16#include <net/netfilter/nf_nat.h> 17#include <net/netfilter/nf_nat.h>
@@ -92,7 +93,7 @@ static int map_sip_addr(struct sk_buff **pskb, enum ip_conntrack_info ctinfo,
92 if (!nf_nat_mangle_udp_packet(pskb, ct, ctinfo, 93 if (!nf_nat_mangle_udp_packet(pskb, ct, ctinfo,
93 matchoff, matchlen, addr, addrlen)) 94 matchoff, matchlen, addr, addrlen))
94 return 0; 95 return 0;
95 *dptr = (*pskb)->data + (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr); 96 *dptr = (*pskb)->data + ip_hdrlen(*pskb) + sizeof(struct udphdr);
96 return 1; 97 return 1;
97 98
98} 99}
@@ -106,7 +107,7 @@ static unsigned int ip_nat_sip(struct sk_buff **pskb,
106 struct addr_map map; 107 struct addr_map map;
107 int dataoff, datalen; 108 int dataoff, datalen;
108 109
109 dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr); 110 dataoff = ip_hdrlen(*pskb) + sizeof(struct udphdr);
110 datalen = (*pskb)->len - dataoff; 111 datalen = (*pskb)->len - dataoff;
111 if (datalen < sizeof("SIP/2.0") - 1) 112 if (datalen < sizeof("SIP/2.0") - 1)
112 return NF_DROP; 113 return NF_DROP;
@@ -155,7 +156,7 @@ static unsigned int mangle_sip_packet(struct sk_buff **pskb,
155 return 0; 156 return 0;
156 157
157 /* We need to reload this. Thanks Patrick. */ 158 /* We need to reload this. Thanks Patrick. */
158 *dptr = (*pskb)->data + (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr); 159 *dptr = (*pskb)->data + ip_hdrlen(*pskb) + sizeof(struct udphdr);
159 return 1; 160 return 1;
160} 161}
161 162
@@ -168,7 +169,7 @@ static int mangle_content_len(struct sk_buff **pskb,
168 char buffer[sizeof("65536")]; 169 char buffer[sizeof("65536")];
169 int bufflen; 170 int bufflen;
170 171
171 dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr); 172 dataoff = ip_hdrlen(*pskb) + sizeof(struct udphdr);
172 173
173 /* Get actual SDP lenght */ 174 /* Get actual SDP lenght */
174 if (ct_sip_get_info(ct, dptr, (*pskb)->len - dataoff, &matchoff, 175 if (ct_sip_get_info(ct, dptr, (*pskb)->len - dataoff, &matchoff,
@@ -200,7 +201,7 @@ static unsigned int mangle_sdp(struct sk_buff **pskb,
200 char buffer[sizeof("nnn.nnn.nnn.nnn")]; 201 char buffer[sizeof("nnn.nnn.nnn.nnn")];
201 unsigned int dataoff, bufflen; 202 unsigned int dataoff, bufflen;
202 203
203 dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr); 204 dataoff = ip_hdrlen(*pskb) + sizeof(struct udphdr);
204 205
205 /* Mangle owner and contact info. */ 206 /* Mangle owner and contact info. */
206 bufflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(newip)); 207 bufflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(newip));
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index ce5c4939a6ee..6e88505d6162 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -38,10 +38,6 @@
38 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 38 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
39 * 39 *
40 * Author: James Morris <jmorris@intercode.com.au> 40 * Author: James Morris <jmorris@intercode.com.au>
41 *
42 * Updates:
43 * 2000-08-06: Convert to new helper API (Harald Welte).
44 *
45 */ 41 */
46#include <linux/module.h> 42#include <linux/module.h>
47#include <linux/moduleparam.h> 43#include <linux/moduleparam.h>
@@ -1194,7 +1190,7 @@ static int snmp_translate(struct nf_conn *ct,
1194 enum ip_conntrack_info ctinfo, 1190 enum ip_conntrack_info ctinfo,
1195 struct sk_buff **pskb) 1191 struct sk_buff **pskb)
1196{ 1192{
1197 struct iphdr *iph = (*pskb)->nh.iph; 1193 struct iphdr *iph = ip_hdr(*pskb);
1198 struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl); 1194 struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl);
1199 u_int16_t udplen = ntohs(udph->len); 1195 u_int16_t udplen = ntohs(udph->len);
1200 u_int16_t paylen = udplen - sizeof(struct udphdr); 1196 u_int16_t paylen = udplen - sizeof(struct udphdr);
@@ -1235,7 +1231,7 @@ static int help(struct sk_buff **pskb, unsigned int protoff,
1235{ 1231{
1236 int dir = CTINFO2DIR(ctinfo); 1232 int dir = CTINFO2DIR(ctinfo);
1237 unsigned int ret; 1233 unsigned int ret;
1238 struct iphdr *iph = (*pskb)->nh.iph; 1234 struct iphdr *iph = ip_hdr(*pskb);
1239 struct udphdr *udph = (struct udphdr *)((u_int32_t *)iph + iph->ihl); 1235 struct udphdr *udph = (struct udphdr *)((u_int32_t *)iph + iph->ihl);
1240 1236
1241 /* SNMP replies and originating SNMP traps get mangled */ 1237 /* SNMP replies and originating SNMP traps get mangled */
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index 15aa3db8cb33..64bbed2ba780 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -86,8 +86,7 @@ nf_nat_fn(unsigned int hooknum,
86 86
87 /* We never see fragments: conntrack defrags on pre-routing 87 /* We never see fragments: conntrack defrags on pre-routing
88 and local-out, and nf_nat_out protects post-routing. */ 88 and local-out, and nf_nat_out protects post-routing. */
89 NF_CT_ASSERT(!((*pskb)->nh.iph->frag_off 89 NF_CT_ASSERT(!(ip_hdr(*pskb)->frag_off & htons(IP_MF | IP_OFFSET)));
90 & htons(IP_MF|IP_OFFSET)));
91 90
92 ct = nf_ct_get(*pskb, &ctinfo); 91 ct = nf_ct_get(*pskb, &ctinfo);
93 /* Can't track? It's not due to stress, or conntrack would 92 /* Can't track? It's not due to stress, or conntrack would
@@ -98,11 +97,10 @@ nf_nat_fn(unsigned int hooknum,
98 /* Exception: ICMP redirect to new connection (not in 97 /* Exception: ICMP redirect to new connection (not in
99 hash table yet). We must not let this through, in 98 hash table yet). We must not let this through, in
100 case we're doing NAT to the same network. */ 99 case we're doing NAT to the same network. */
101 if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) { 100 if (ip_hdr(*pskb)->protocol == IPPROTO_ICMP) {
102 struct icmphdr _hdr, *hp; 101 struct icmphdr _hdr, *hp;
103 102
104 hp = skb_header_pointer(*pskb, 103 hp = skb_header_pointer(*pskb, ip_hdrlen(*pskb),
105 (*pskb)->nh.iph->ihl*4,
106 sizeof(_hdr), &_hdr); 104 sizeof(_hdr), &_hdr);
107 if (hp != NULL && 105 if (hp != NULL &&
108 hp->type == ICMP_REDIRECT) 106 hp->type == ICMP_REDIRECT)
@@ -122,7 +120,7 @@ nf_nat_fn(unsigned int hooknum,
122 switch (ctinfo) { 120 switch (ctinfo) {
123 case IP_CT_RELATED: 121 case IP_CT_RELATED:
124 case IP_CT_RELATED+IP_CT_IS_REPLY: 122 case IP_CT_RELATED+IP_CT_IS_REPLY:
125 if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) { 123 if (ip_hdr(*pskb)->protocol == IPPROTO_ICMP) {
126 if (!nf_nat_icmp_reply_translation(ct, ctinfo, 124 if (!nf_nat_icmp_reply_translation(ct, ctinfo,
127 hooknum, pskb)) 125 hooknum, pskb))
128 return NF_DROP; 126 return NF_DROP;
@@ -177,11 +175,11 @@ nf_nat_in(unsigned int hooknum,
177 int (*okfn)(struct sk_buff *)) 175 int (*okfn)(struct sk_buff *))
178{ 176{
179 unsigned int ret; 177 unsigned int ret;
180 __be32 daddr = (*pskb)->nh.iph->daddr; 178 __be32 daddr = ip_hdr(*pskb)->daddr;
181 179
182 ret = nf_nat_fn(hooknum, pskb, in, out, okfn); 180 ret = nf_nat_fn(hooknum, pskb, in, out, okfn);
183 if (ret != NF_DROP && ret != NF_STOLEN && 181 if (ret != NF_DROP && ret != NF_STOLEN &&
184 daddr != (*pskb)->nh.iph->daddr) { 182 daddr != ip_hdr(*pskb)->daddr) {
185 dst_release((*pskb)->dst); 183 dst_release((*pskb)->dst);
186 (*pskb)->dst = NULL; 184 (*pskb)->dst = NULL;
187 } 185 }
@@ -203,7 +201,7 @@ nf_nat_out(unsigned int hooknum,
203 201
204 /* root is playing with raw sockets. */ 202 /* root is playing with raw sockets. */
205 if ((*pskb)->len < sizeof(struct iphdr) || 203 if ((*pskb)->len < sizeof(struct iphdr) ||
206 (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) 204 ip_hdrlen(*pskb) < sizeof(struct iphdr))
207 return NF_ACCEPT; 205 return NF_ACCEPT;
208 206
209 ret = nf_nat_fn(hooknum, pskb, in, out, okfn); 207 ret = nf_nat_fn(hooknum, pskb, in, out, okfn);
@@ -236,7 +234,7 @@ nf_nat_local_fn(unsigned int hooknum,
236 234
237 /* root is playing with raw sockets. */ 235 /* root is playing with raw sockets. */
238 if ((*pskb)->len < sizeof(struct iphdr) || 236 if ((*pskb)->len < sizeof(struct iphdr) ||
239 (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) 237 ip_hdrlen(*pskb) < sizeof(struct iphdr))
240 return NF_ACCEPT; 238 return NF_ACCEPT;
241 239
242 ret = nf_nat_fn(hooknum, pskb, in, out, okfn); 240 ret = nf_nat_fn(hooknum, pskb, in, out, okfn);
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index ae68a691e8cd..37ab5802ca08 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -87,19 +87,6 @@ static const struct file_operations sockstat_seq_fops = {
87 .release = single_release, 87 .release = single_release,
88}; 88};
89 89
90static unsigned long
91fold_field(void *mib[], int offt)
92{
93 unsigned long res = 0;
94 int i;
95
96 for_each_possible_cpu(i) {
97 res += *(((unsigned long *) per_cpu_ptr(mib[0], i)) + offt);
98 res += *(((unsigned long *) per_cpu_ptr(mib[1], i)) + offt);
99 }
100 return res;
101}
102
103/* snmp items */ 90/* snmp items */
104static const struct snmp_mib snmp4_ipstats_list[] = { 91static const struct snmp_mib snmp4_ipstats_list[] = {
105 SNMP_MIB_ITEM("InReceives", IPSTATS_MIB_INRECEIVES), 92 SNMP_MIB_ITEM("InReceives", IPSTATS_MIB_INRECEIVES),
@@ -266,8 +253,8 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
266 253
267 for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) 254 for (i = 0; snmp4_ipstats_list[i].name != NULL; i++)
268 seq_printf(seq, " %lu", 255 seq_printf(seq, " %lu",
269 fold_field((void **) ip_statistics, 256 snmp_fold_field((void **)ip_statistics,
270 snmp4_ipstats_list[i].entry)); 257 snmp4_ipstats_list[i].entry));
271 258
272 seq_puts(seq, "\nIcmp:"); 259 seq_puts(seq, "\nIcmp:");
273 for (i = 0; snmp4_icmp_list[i].name != NULL; i++) 260 for (i = 0; snmp4_icmp_list[i].name != NULL; i++)
@@ -276,8 +263,8 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
276 seq_puts(seq, "\nIcmp:"); 263 seq_puts(seq, "\nIcmp:");
277 for (i = 0; snmp4_icmp_list[i].name != NULL; i++) 264 for (i = 0; snmp4_icmp_list[i].name != NULL; i++)
278 seq_printf(seq, " %lu", 265 seq_printf(seq, " %lu",
279 fold_field((void **) icmp_statistics, 266 snmp_fold_field((void **)icmp_statistics,
280 snmp4_icmp_list[i].entry)); 267 snmp4_icmp_list[i].entry));
281 268
282 seq_puts(seq, "\nTcp:"); 269 seq_puts(seq, "\nTcp:");
283 for (i = 0; snmp4_tcp_list[i].name != NULL; i++) 270 for (i = 0; snmp4_tcp_list[i].name != NULL; i++)
@@ -288,12 +275,12 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
288 /* MaxConn field is signed, RFC 2012 */ 275 /* MaxConn field is signed, RFC 2012 */
289 if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN) 276 if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN)
290 seq_printf(seq, " %ld", 277 seq_printf(seq, " %ld",
291 fold_field((void **) tcp_statistics, 278 snmp_fold_field((void **)tcp_statistics,
292 snmp4_tcp_list[i].entry)); 279 snmp4_tcp_list[i].entry));
293 else 280 else
294 seq_printf(seq, " %lu", 281 seq_printf(seq, " %lu",
295 fold_field((void **) tcp_statistics, 282 snmp_fold_field((void **)tcp_statistics,
296 snmp4_tcp_list[i].entry)); 283 snmp4_tcp_list[i].entry));
297 } 284 }
298 285
299 seq_puts(seq, "\nUdp:"); 286 seq_puts(seq, "\nUdp:");
@@ -303,8 +290,8 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
303 seq_puts(seq, "\nUdp:"); 290 seq_puts(seq, "\nUdp:");
304 for (i = 0; snmp4_udp_list[i].name != NULL; i++) 291 for (i = 0; snmp4_udp_list[i].name != NULL; i++)
305 seq_printf(seq, " %lu", 292 seq_printf(seq, " %lu",
306 fold_field((void **) udp_statistics, 293 snmp_fold_field((void **)udp_statistics,
307 snmp4_udp_list[i].entry)); 294 snmp4_udp_list[i].entry));
308 295
309 /* the UDP and UDP-Lite MIBs are the same */ 296 /* the UDP and UDP-Lite MIBs are the same */
310 seq_puts(seq, "\nUdpLite:"); 297 seq_puts(seq, "\nUdpLite:");
@@ -314,8 +301,8 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
314 seq_puts(seq, "\nUdpLite:"); 301 seq_puts(seq, "\nUdpLite:");
315 for (i = 0; snmp4_udp_list[i].name != NULL; i++) 302 for (i = 0; snmp4_udp_list[i].name != NULL; i++)
316 seq_printf(seq, " %lu", 303 seq_printf(seq, " %lu",
317 fold_field((void **) udplite_statistics, 304 snmp_fold_field((void **)udplite_statistics,
318 snmp4_udp_list[i].entry) ); 305 snmp4_udp_list[i].entry));
319 306
320 seq_putc(seq, '\n'); 307 seq_putc(seq, '\n');
321 return 0; 308 return 0;
@@ -348,8 +335,8 @@ static int netstat_seq_show(struct seq_file *seq, void *v)
348 seq_puts(seq, "\nTcpExt:"); 335 seq_puts(seq, "\nTcpExt:");
349 for (i = 0; snmp4_net_list[i].name != NULL; i++) 336 for (i = 0; snmp4_net_list[i].name != NULL; i++)
350 seq_printf(seq, " %lu", 337 seq_printf(seq, " %lu",
351 fold_field((void **) net_statistics, 338 snmp_fold_field((void **)net_statistics,
352 snmp4_net_list[i].entry)); 339 snmp4_net_list[i].entry));
353 340
354 seq_putc(seq, '\n'); 341 seq_putc(seq, '\n');
355 return 0; 342 return 0;
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
index da70fef82c93..971ab9356e51 100644
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c
@@ -45,7 +45,7 @@
45#include <net/ipip.h> 45#include <net/ipip.h>
46#include <linux/igmp.h> 46#include <linux/igmp.h>
47 47
48struct net_protocol *inet_protos[MAX_INET_PROTOS]; 48struct net_protocol *inet_protos[MAX_INET_PROTOS] ____cacheline_aligned_in_smp;
49static DEFINE_SPINLOCK(inet_proto_lock); 49static DEFINE_SPINLOCK(inet_proto_lock);
50 50
51/* 51/*
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 87e9c1618100..24d7c9f31918 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -132,7 +132,7 @@ static __inline__ int icmp_filter(struct sock *sk, struct sk_buff *skb)
132 if (!pskb_may_pull(skb, sizeof(struct icmphdr))) 132 if (!pskb_may_pull(skb, sizeof(struct icmphdr)))
133 return 1; 133 return 1;
134 134
135 type = skb->h.icmph->type; 135 type = icmp_hdr(skb)->type;
136 if (type < 32) { 136 if (type < 32) {
137 __u32 data = raw_sk(sk)->filter.data; 137 __u32 data = raw_sk(sk)->filter.data;
138 138
@@ -184,8 +184,8 @@ out:
184void raw_err (struct sock *sk, struct sk_buff *skb, u32 info) 184void raw_err (struct sock *sk, struct sk_buff *skb, u32 info)
185{ 185{
186 struct inet_sock *inet = inet_sk(sk); 186 struct inet_sock *inet = inet_sk(sk);
187 int type = skb->h.icmph->type; 187 const int type = icmp_hdr(skb)->type;
188 int code = skb->h.icmph->code; 188 const int code = icmp_hdr(skb)->code;
189 int err = 0; 189 int err = 0;
190 int harderr = 0; 190 int harderr = 0;
191 191
@@ -256,7 +256,7 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb)
256 } 256 }
257 nf_reset(skb); 257 nf_reset(skb);
258 258
259 skb_push(skb, skb->data - skb->nh.raw); 259 skb_push(skb, skb->data - skb_network_header(skb));
260 260
261 raw_rcv_skb(sk, skb); 261 raw_rcv_skb(sk, skb);
262 return 0; 262 return 0;
@@ -291,11 +291,13 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
291 skb->priority = sk->sk_priority; 291 skb->priority = sk->sk_priority;
292 skb->dst = dst_clone(&rt->u.dst); 292 skb->dst = dst_clone(&rt->u.dst);
293 293
294 skb->nh.iph = iph = (struct iphdr *)skb_put(skb, length); 294 skb_reset_network_header(skb);
295 iph = ip_hdr(skb);
296 skb_put(skb, length);
295 297
296 skb->ip_summed = CHECKSUM_NONE; 298 skb->ip_summed = CHECKSUM_NONE;
297 299
298 skb->h.raw = skb->nh.raw; 300 skb->transport_header = skb->network_header;
299 err = memcpy_fromiovecend((void *)iph, from, 0, length); 301 err = memcpy_fromiovecend((void *)iph, from, 0, length);
300 if (err) 302 if (err)
301 goto error_fault; 303 goto error_fault;
@@ -613,7 +615,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
613 /* Copy the address. */ 615 /* Copy the address. */
614 if (sin) { 616 if (sin) {
615 sin->sin_family = AF_INET; 617 sin->sin_family = AF_INET;
616 sin->sin_addr.s_addr = skb->nh.iph->saddr; 618 sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
617 sin->sin_port = 0; 619 sin->sin_port = 0;
618 memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); 620 memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
619 } 621 }
@@ -887,7 +889,7 @@ static int raw_seq_show(struct seq_file *seq, void *v)
887 return 0; 889 return 0;
888} 890}
889 891
890static struct seq_operations raw_seq_ops = { 892static const struct seq_operations raw_seq_ops = {
891 .start = raw_seq_start, 893 .start = raw_seq_start,
892 .next = raw_seq_next, 894 .next = raw_seq_next,
893 .stop = raw_seq_stop, 895 .stop = raw_seq_stop,
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 37e0d4d5cf94..cb76e3c725a0 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -82,7 +82,6 @@
82#include <linux/proc_fs.h> 82#include <linux/proc_fs.h>
83#include <linux/init.h> 83#include <linux/init.h>
84#include <linux/skbuff.h> 84#include <linux/skbuff.h>
85#include <linux/rtnetlink.h>
86#include <linux/inetdevice.h> 85#include <linux/inetdevice.h>
87#include <linux/igmp.h> 86#include <linux/igmp.h>
88#include <linux/pkt_sched.h> 87#include <linux/pkt_sched.h>
@@ -104,6 +103,7 @@
104#include <net/xfrm.h> 103#include <net/xfrm.h>
105#include <net/ip_mp_alg.h> 104#include <net/ip_mp_alg.h>
106#include <net/netevent.h> 105#include <net/netevent.h>
106#include <net/rtnetlink.h>
107#ifdef CONFIG_SYSCTL 107#ifdef CONFIG_SYSCTL
108#include <linux/sysctl.h> 108#include <linux/sysctl.h>
109#endif 109#endif
@@ -364,7 +364,7 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v)
364 return 0; 364 return 0;
365} 365}
366 366
367static struct seq_operations rt_cache_seq_ops = { 367static const struct seq_operations rt_cache_seq_ops = {
368 .start = rt_cache_seq_start, 368 .start = rt_cache_seq_start,
369 .next = rt_cache_seq_next, 369 .next = rt_cache_seq_next,
370 .stop = rt_cache_seq_stop, 370 .stop = rt_cache_seq_stop,
@@ -470,7 +470,7 @@ static int rt_cpu_seq_show(struct seq_file *seq, void *v)
470 return 0; 470 return 0;
471} 471}
472 472
473static struct seq_operations rt_cpu_seq_ops = { 473static const struct seq_operations rt_cpu_seq_ops = {
474 .start = rt_cpu_seq_start, 474 .start = rt_cpu_seq_start,
475 .next = rt_cpu_seq_next, 475 .next = rt_cpu_seq_next,
476 .stop = rt_cpu_seq_stop, 476 .stop = rt_cpu_seq_stop,
@@ -1519,7 +1519,7 @@ static void ipv4_link_failure(struct sk_buff *skb)
1519static int ip_rt_bug(struct sk_buff *skb) 1519static int ip_rt_bug(struct sk_buff *skb)
1520{ 1520{
1521 printk(KERN_DEBUG "ip_rt_bug: %u.%u.%u.%u -> %u.%u.%u.%u, %s\n", 1521 printk(KERN_DEBUG "ip_rt_bug: %u.%u.%u.%u -> %u.%u.%u.%u, %s\n",
1522 NIPQUAD(skb->nh.iph->saddr), NIPQUAD(skb->nh.iph->daddr), 1522 NIPQUAD(ip_hdr(skb)->saddr), NIPQUAD(ip_hdr(skb)->daddr),
1523 skb->dev ? skb->dev->name : "?"); 1523 skb->dev ? skb->dev->name : "?");
1524 kfree_skb(skb); 1524 kfree_skb(skb);
1525 return 0; 1525 return 0;
@@ -1698,9 +1698,9 @@ static void ip_handle_martian_source(struct net_device *dev,
1698 printk(KERN_WARNING "martian source %u.%u.%u.%u from " 1698 printk(KERN_WARNING "martian source %u.%u.%u.%u from "
1699 "%u.%u.%u.%u, on dev %s\n", 1699 "%u.%u.%u.%u, on dev %s\n",
1700 NIPQUAD(daddr), NIPQUAD(saddr), dev->name); 1700 NIPQUAD(daddr), NIPQUAD(saddr), dev->name);
1701 if (dev->hard_header_len && skb->mac.raw) { 1701 if (dev->hard_header_len && skb_mac_header_was_set(skb)) {
1702 int i; 1702 int i;
1703 unsigned char *p = skb->mac.raw; 1703 const unsigned char *p = skb_mac_header(skb);
1704 printk(KERN_WARNING "ll header: "); 1704 printk(KERN_WARNING "ll header: ");
1705 for (i = 0; i < dev->hard_header_len; i++, p++) { 1705 for (i = 0; i < dev->hard_header_len; i++, p++) {
1706 printk("%02x", *p); 1706 printk("%02x", *p);
@@ -2134,7 +2134,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2134 rcu_read_lock(); 2134 rcu_read_lock();
2135 if ((in_dev = __in_dev_get_rcu(dev)) != NULL) { 2135 if ((in_dev = __in_dev_get_rcu(dev)) != NULL) {
2136 int our = ip_check_mc(in_dev, daddr, saddr, 2136 int our = ip_check_mc(in_dev, daddr, saddr,
2137 skb->nh.iph->protocol); 2137 ip_hdr(skb)->protocol);
2138 if (our 2138 if (our
2139#ifdef CONFIG_IP_MROUTE 2139#ifdef CONFIG_IP_MROUTE
2140 || (!LOCAL_MCAST(daddr) && IN_DEV_MFORWARD(in_dev)) 2140 || (!LOCAL_MCAST(daddr) && IN_DEV_MFORWARD(in_dev))
@@ -2396,7 +2396,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
2396 2396
2397 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ 2397 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
2398 dev_out = ip_dev_find(oldflp->fl4_src); 2398 dev_out = ip_dev_find(oldflp->fl4_src);
2399 if (dev_out == NULL) 2399 if ((dev_out == NULL) && !(sysctl_ip_nonlocal_bind))
2400 goto out; 2400 goto out;
2401 2401
2402 /* I removed check for oif == dev_out->oif here. 2402 /* I removed check for oif == dev_out->oif here.
@@ -2407,7 +2407,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
2407 of another iface. --ANK 2407 of another iface. --ANK
2408 */ 2408 */
2409 2409
2410 if (oldflp->oif == 0 2410 if (dev_out && oldflp->oif == 0
2411 && (MULTICAST(oldflp->fl4_dst) || oldflp->fl4_dst == htonl(0xFFFFFFFF))) { 2411 && (MULTICAST(oldflp->fl4_dst) || oldflp->fl4_dst == htonl(0xFFFFFFFF))) {
2412 /* Special hack: user can direct multicasts 2412 /* Special hack: user can direct multicasts
2413 and limited broadcast via necessary interface 2413 and limited broadcast via necessary interface
@@ -2683,7 +2683,7 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
2683 id = rt->peer->ip_id_count; 2683 id = rt->peer->ip_id_count;
2684 if (rt->peer->tcp_ts_stamp) { 2684 if (rt->peer->tcp_ts_stamp) {
2685 ts = rt->peer->tcp_ts; 2685 ts = rt->peer->tcp_ts;
2686 tsage = xtime.tv_sec - rt->peer->tcp_ts_stamp; 2686 tsage = get_seconds() - rt->peer->tcp_ts_stamp;
2687 } 2687 }
2688 } 2688 }
2689 2689
@@ -2721,7 +2721,7 @@ nla_put_failure:
2721 return -EMSGSIZE; 2721 return -EMSGSIZE;
2722} 2722}
2723 2723
2724int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) 2724static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2725{ 2725{
2726 struct rtmsg *rtm; 2726 struct rtmsg *rtm;
2727 struct nlattr *tb[RTA_MAX+1]; 2727 struct nlattr *tb[RTA_MAX+1];
@@ -2747,10 +2747,11 @@ int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2747 /* Reserve room for dummy headers, this skb can pass 2747 /* Reserve room for dummy headers, this skb can pass
2748 through good chunk of routing engine. 2748 through good chunk of routing engine.
2749 */ 2749 */
2750 skb->mac.raw = skb->nh.raw = skb->data; 2750 skb_reset_mac_header(skb);
2751 skb_reset_network_header(skb);
2751 2752
2752 /* Bugfix: need to give ip_route_input enough of an IP header to not gag. */ 2753 /* Bugfix: need to give ip_route_input enough of an IP header to not gag. */
2753 skb->nh.iph->protocol = IPPROTO_ICMP; 2754 ip_hdr(skb)->protocol = IPPROTO_ICMP;
2754 skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr)); 2755 skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
2755 2756
2756 src = tb[RTA_SRC] ? nla_get_be32(tb[RTA_SRC]) : 0; 2757 src = tb[RTA_SRC] ? nla_get_be32(tb[RTA_SRC]) : 0;
@@ -3193,6 +3194,8 @@ int __init ip_rt_init(void)
3193 xfrm_init(); 3194 xfrm_init();
3194 xfrm4_init(); 3195 xfrm4_init();
3195#endif 3196#endif
3197 rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL);
3198
3196 return rc; 3199 return rc;
3197} 3200}
3198 3201
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 33016cc90f0b..2da1be0589a9 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -125,10 +125,11 @@ static __u16 const msstab[] = {
125__u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp) 125__u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
126{ 126{
127 struct tcp_sock *tp = tcp_sk(sk); 127 struct tcp_sock *tp = tcp_sk(sk);
128 const struct iphdr *iph = ip_hdr(skb);
129 const struct tcphdr *th = tcp_hdr(skb);
128 int mssind; 130 int mssind;
129 const __u16 mss = *mssp; 131 const __u16 mss = *mssp;
130 132
131
132 tp->last_synq_overflow = jiffies; 133 tp->last_synq_overflow = jiffies;
133 134
134 /* XXX sort msstab[] by probability? Binary search? */ 135 /* XXX sort msstab[] by probability? Binary search? */
@@ -138,9 +139,8 @@ __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
138 139
139 NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESSENT); 140 NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESSENT);
140 141
141 return secure_tcp_syn_cookie(skb->nh.iph->saddr, skb->nh.iph->daddr, 142 return secure_tcp_syn_cookie(iph->saddr, iph->daddr,
142 skb->h.th->source, skb->h.th->dest, 143 th->source, th->dest, ntohl(th->seq),
143 ntohl(skb->h.th->seq),
144 jiffies / (HZ * 60), mssind); 144 jiffies / (HZ * 60), mssind);
145} 145}
146 146
@@ -157,14 +157,13 @@ __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
157 */ 157 */
158static inline int cookie_check(struct sk_buff *skb, __u32 cookie) 158static inline int cookie_check(struct sk_buff *skb, __u32 cookie)
159{ 159{
160 __u32 seq; 160 const struct iphdr *iph = ip_hdr(skb);
161 __u32 mssind; 161 const struct tcphdr *th = tcp_hdr(skb);
162 162 __u32 seq = ntohl(th->seq) - 1;
163 seq = ntohl(skb->h.th->seq)-1; 163 __u32 mssind = check_tcp_syn_cookie(cookie, iph->saddr, iph->daddr,
164 mssind = check_tcp_syn_cookie(cookie, 164 th->source, th->dest, seq,
165 skb->nh.iph->saddr, skb->nh.iph->daddr, 165 jiffies / (HZ * 60),
166 skb->h.th->source, skb->h.th->dest, 166 COUNTER_TRIES);
167 seq, jiffies / (HZ * 60), COUNTER_TRIES);
168 167
169 return mssind < NUM_MSS ? msstab[mssind] + 1 : 0; 168 return mssind < NUM_MSS ? msstab[mssind] + 1 : 0;
170} 169}
@@ -191,14 +190,15 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
191 struct inet_request_sock *ireq; 190 struct inet_request_sock *ireq;
192 struct tcp_request_sock *treq; 191 struct tcp_request_sock *treq;
193 struct tcp_sock *tp = tcp_sk(sk); 192 struct tcp_sock *tp = tcp_sk(sk);
194 __u32 cookie = ntohl(skb->h.th->ack_seq) - 1; 193 const struct tcphdr *th = tcp_hdr(skb);
194 __u32 cookie = ntohl(th->ack_seq) - 1;
195 struct sock *ret = sk; 195 struct sock *ret = sk;
196 struct request_sock *req; 196 struct request_sock *req;
197 int mss; 197 int mss;
198 struct rtable *rt; 198 struct rtable *rt;
199 __u8 rcv_wscale; 199 __u8 rcv_wscale;
200 200
201 if (!sysctl_tcp_syncookies || !skb->h.th->ack) 201 if (!sysctl_tcp_syncookies || !th->ack)
202 goto out; 202 goto out;
203 203
204 if (time_after(jiffies, tp->last_synq_overflow + TCP_TIMEOUT_INIT) || 204 if (time_after(jiffies, tp->last_synq_overflow + TCP_TIMEOUT_INIT) ||
@@ -220,12 +220,12 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
220 } 220 }
221 ireq = inet_rsk(req); 221 ireq = inet_rsk(req);
222 treq = tcp_rsk(req); 222 treq = tcp_rsk(req);
223 treq->rcv_isn = ntohl(skb->h.th->seq) - 1; 223 treq->rcv_isn = ntohl(th->seq) - 1;
224 treq->snt_isn = cookie; 224 treq->snt_isn = cookie;
225 req->mss = mss; 225 req->mss = mss;
226 ireq->rmt_port = skb->h.th->source; 226 ireq->rmt_port = th->source;
227 ireq->loc_addr = skb->nh.iph->daddr; 227 ireq->loc_addr = ip_hdr(skb)->daddr;
228 ireq->rmt_addr = skb->nh.iph->saddr; 228 ireq->rmt_addr = ip_hdr(skb)->saddr;
229 ireq->opt = NULL; 229 ireq->opt = NULL;
230 230
231 /* We throwed the options of the initial SYN away, so we hope 231 /* We throwed the options of the initial SYN away, so we hope
@@ -261,8 +261,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
261 .tos = RT_CONN_FLAGS(sk) } }, 261 .tos = RT_CONN_FLAGS(sk) } },
262 .proto = IPPROTO_TCP, 262 .proto = IPPROTO_TCP,
263 .uli_u = { .ports = 263 .uli_u = { .ports =
264 { .sport = skb->h.th->dest, 264 { .sport = th->dest,
265 .dport = skb->h.th->source } } }; 265 .dport = th->source } } };
266 security_req_classify_flow(req, &fl); 266 security_req_classify_flow(req, &fl);
267 if (ip_route_output_key(&rt, &fl)) { 267 if (ip_route_output_key(&rt, &fl)) {
268 reqsk_free(req); 268 reqsk_free(req);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 0aa304711a96..6817d6485df5 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -647,6 +647,14 @@ ctl_table ipv4_table[] = {
647 .proc_handler = &proc_dointvec 647 .proc_handler = &proc_dointvec
648 }, 648 },
649 { 649 {
650 .ctl_name = NET_TCP_FRTO_RESPONSE,
651 .procname = "tcp_frto_response",
652 .data = &sysctl_tcp_frto_response,
653 .maxlen = sizeof(int),
654 .mode = 0644,
655 .proc_handler = &proc_dointvec
656 },
657 {
650 .ctl_name = NET_TCP_LOW_LATENCY, 658 .ctl_name = NET_TCP_LOW_LATENCY,
651 .procname = "tcp_low_latency", 659 .procname = "tcp_low_latency",
652 .data = &sysctl_tcp_low_latency, 660 .data = &sysctl_tcp_low_latency,
@@ -803,6 +811,14 @@ ctl_table ipv4_table[] = {
803 .proc_handler = &proc_allowed_congestion_control, 811 .proc_handler = &proc_allowed_congestion_control,
804 .strategy = &strategy_allowed_congestion_control, 812 .strategy = &strategy_allowed_congestion_control,
805 }, 813 },
814 {
815 .ctl_name = NET_TCP_MAX_SSTHRESH,
816 .procname = "tcp_max_ssthresh",
817 .data = &sysctl_tcp_max_ssthresh,
818 .maxlen = sizeof(int),
819 .mode = 0644,
820 .proc_handler = &proc_dointvec,
821 },
806 { .ctl_name = 0 } 822 { .ctl_name = 0 }
807}; 823};
808 824
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 3834b10b5115..2cf9a898ce50 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -297,7 +297,7 @@ EXPORT_SYMBOL(tcp_sockets_allocated);
297 * All the sk_stream_mem_schedule() is of this nature: accounting 297 * All the sk_stream_mem_schedule() is of this nature: accounting
298 * is strict, actions are advisory and have some latency. 298 * is strict, actions are advisory and have some latency.
299 */ 299 */
300int tcp_memory_pressure; 300int tcp_memory_pressure __read_mostly;
301 301
302EXPORT_SYMBOL(tcp_memory_pressure); 302EXPORT_SYMBOL(tcp_memory_pressure);
303 303
@@ -425,7 +425,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
425 /* Subtract 1, if FIN is in queue. */ 425 /* Subtract 1, if FIN is in queue. */
426 if (answ && !skb_queue_empty(&sk->sk_receive_queue)) 426 if (answ && !skb_queue_empty(&sk->sk_receive_queue))
427 answ -= 427 answ -=
428 ((struct sk_buff *)sk->sk_receive_queue.prev)->h.th->fin; 428 tcp_hdr((struct sk_buff *)sk->sk_receive_queue.prev)->fin;
429 } else 429 } else
430 answ = tp->urg_seq - tp->copied_seq; 430 answ = tp->urg_seq - tp->copied_seq;
431 release_sock(sk); 431 release_sock(sk);
@@ -444,7 +444,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
444 break; 444 break;
445 default: 445 default:
446 return -ENOIOCTLCMD; 446 return -ENOIOCTLCMD;
447 }; 447 }
448 448
449 return put_user(answ, (int __user *)arg); 449 return put_user(answ, (int __user *)arg);
450} 450}
@@ -460,9 +460,9 @@ static inline int forced_push(struct tcp_sock *tp)
460 return after(tp->write_seq, tp->pushed_seq + (tp->max_window >> 1)); 460 return after(tp->write_seq, tp->pushed_seq + (tp->max_window >> 1));
461} 461}
462 462
463static inline void skb_entail(struct sock *sk, struct tcp_sock *tp, 463static inline void skb_entail(struct sock *sk, struct sk_buff *skb)
464 struct sk_buff *skb)
465{ 464{
465 struct tcp_sock *tp = tcp_sk(sk);
466 struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); 466 struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
467 467
468 skb->csum = 0; 468 skb->csum = 0;
@@ -470,10 +470,8 @@ static inline void skb_entail(struct sock *sk, struct tcp_sock *tp,
470 tcb->flags = TCPCB_FLAG_ACK; 470 tcb->flags = TCPCB_FLAG_ACK;
471 tcb->sacked = 0; 471 tcb->sacked = 0;
472 skb_header_release(skb); 472 skb_header_release(skb);
473 __skb_queue_tail(&sk->sk_write_queue, skb); 473 tcp_add_write_queue_tail(sk, skb);
474 sk_charge_skb(sk, skb); 474 sk_charge_skb(sk, skb);
475 if (!sk->sk_send_head)
476 sk->sk_send_head = skb;
477 if (tp->nonagle & TCP_NAGLE_PUSH) 475 if (tp->nonagle & TCP_NAGLE_PUSH)
478 tp->nonagle &= ~TCP_NAGLE_PUSH; 476 tp->nonagle &= ~TCP_NAGLE_PUSH;
479} 477}
@@ -488,15 +486,17 @@ static inline void tcp_mark_urg(struct tcp_sock *tp, int flags,
488 } 486 }
489} 487}
490 488
491static inline void tcp_push(struct sock *sk, struct tcp_sock *tp, int flags, 489static inline void tcp_push(struct sock *sk, int flags, int mss_now,
492 int mss_now, int nonagle) 490 int nonagle)
493{ 491{
494 if (sk->sk_send_head) { 492 struct tcp_sock *tp = tcp_sk(sk);
495 struct sk_buff *skb = sk->sk_write_queue.prev; 493
494 if (tcp_send_head(sk)) {
495 struct sk_buff *skb = tcp_write_queue_tail(sk);
496 if (!(flags & MSG_MORE) || forced_push(tp)) 496 if (!(flags & MSG_MORE) || forced_push(tp))
497 tcp_mark_push(tp, skb); 497 tcp_mark_push(tp, skb);
498 tcp_mark_urg(tp, flags, skb); 498 tcp_mark_urg(tp, flags, skb);
499 __tcp_push_pending_frames(sk, tp, mss_now, 499 __tcp_push_pending_frames(sk, mss_now,
500 (flags & MSG_MORE) ? TCP_NAGLE_CORK : nonagle); 500 (flags & MSG_MORE) ? TCP_NAGLE_CORK : nonagle);
501 } 501 }
502} 502}
@@ -526,13 +526,13 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse
526 goto do_error; 526 goto do_error;
527 527
528 while (psize > 0) { 528 while (psize > 0) {
529 struct sk_buff *skb = sk->sk_write_queue.prev; 529 struct sk_buff *skb = tcp_write_queue_tail(sk);
530 struct page *page = pages[poffset / PAGE_SIZE]; 530 struct page *page = pages[poffset / PAGE_SIZE];
531 int copy, i, can_coalesce; 531 int copy, i, can_coalesce;
532 int offset = poffset % PAGE_SIZE; 532 int offset = poffset % PAGE_SIZE;
533 int size = min_t(size_t, psize, PAGE_SIZE - offset); 533 int size = min_t(size_t, psize, PAGE_SIZE - offset);
534 534
535 if (!sk->sk_send_head || (copy = size_goal - skb->len) <= 0) { 535 if (!tcp_send_head(sk) || (copy = size_goal - skb->len) <= 0) {
536new_segment: 536new_segment:
537 if (!sk_stream_memory_free(sk)) 537 if (!sk_stream_memory_free(sk))
538 goto wait_for_sndbuf; 538 goto wait_for_sndbuf;
@@ -542,7 +542,7 @@ new_segment:
542 if (!skb) 542 if (!skb)
543 goto wait_for_memory; 543 goto wait_for_memory;
544 544
545 skb_entail(sk, tp, skb); 545 skb_entail(sk, skb);
546 copy = size_goal; 546 copy = size_goal;
547 } 547 }
548 548
@@ -588,8 +588,8 @@ new_segment:
588 588
589 if (forced_push(tp)) { 589 if (forced_push(tp)) {
590 tcp_mark_push(tp, skb); 590 tcp_mark_push(tp, skb);
591 __tcp_push_pending_frames(sk, tp, mss_now, TCP_NAGLE_PUSH); 591 __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_PUSH);
592 } else if (skb == sk->sk_send_head) 592 } else if (skb == tcp_send_head(sk))
593 tcp_push_one(sk, mss_now); 593 tcp_push_one(sk, mss_now);
594 continue; 594 continue;
595 595
@@ -597,7 +597,7 @@ wait_for_sndbuf:
597 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 597 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
598wait_for_memory: 598wait_for_memory:
599 if (copied) 599 if (copied)
600 tcp_push(sk, tp, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH); 600 tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
601 601
602 if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) 602 if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
603 goto do_error; 603 goto do_error;
@@ -608,7 +608,7 @@ wait_for_memory:
608 608
609out: 609out:
610 if (copied) 610 if (copied)
611 tcp_push(sk, tp, flags, mss_now, tp->nonagle); 611 tcp_push(sk, flags, mss_now, tp->nonagle);
612 return copied; 612 return copied;
613 613
614do_error: 614do_error:
@@ -639,8 +639,9 @@ ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset,
639#define TCP_PAGE(sk) (sk->sk_sndmsg_page) 639#define TCP_PAGE(sk) (sk->sk_sndmsg_page)
640#define TCP_OFF(sk) (sk->sk_sndmsg_off) 640#define TCP_OFF(sk) (sk->sk_sndmsg_off)
641 641
642static inline int select_size(struct sock *sk, struct tcp_sock *tp) 642static inline int select_size(struct sock *sk)
643{ 643{
644 struct tcp_sock *tp = tcp_sk(sk);
644 int tmp = tp->mss_cache; 645 int tmp = tp->mss_cache;
645 646
646 if (sk->sk_route_caps & NETIF_F_SG) { 647 if (sk->sk_route_caps & NETIF_F_SG) {
@@ -704,9 +705,9 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
704 while (seglen > 0) { 705 while (seglen > 0) {
705 int copy; 706 int copy;
706 707
707 skb = sk->sk_write_queue.prev; 708 skb = tcp_write_queue_tail(sk);
708 709
709 if (!sk->sk_send_head || 710 if (!tcp_send_head(sk) ||
710 (copy = size_goal - skb->len) <= 0) { 711 (copy = size_goal - skb->len) <= 0) {
711 712
712new_segment: 713new_segment:
@@ -716,7 +717,7 @@ new_segment:
716 if (!sk_stream_memory_free(sk)) 717 if (!sk_stream_memory_free(sk))
717 goto wait_for_sndbuf; 718 goto wait_for_sndbuf;
718 719
719 skb = sk_stream_alloc_pskb(sk, select_size(sk, tp), 720 skb = sk_stream_alloc_pskb(sk, select_size(sk),
720 0, sk->sk_allocation); 721 0, sk->sk_allocation);
721 if (!skb) 722 if (!skb)
722 goto wait_for_memory; 723 goto wait_for_memory;
@@ -727,7 +728,7 @@ new_segment:
727 if (sk->sk_route_caps & NETIF_F_ALL_CSUM) 728 if (sk->sk_route_caps & NETIF_F_ALL_CSUM)
728 skb->ip_summed = CHECKSUM_PARTIAL; 729 skb->ip_summed = CHECKSUM_PARTIAL;
729 730
730 skb_entail(sk, tp, skb); 731 skb_entail(sk, skb);
731 copy = size_goal; 732 copy = size_goal;
732 } 733 }
733 734
@@ -832,8 +833,8 @@ new_segment:
832 833
833 if (forced_push(tp)) { 834 if (forced_push(tp)) {
834 tcp_mark_push(tp, skb); 835 tcp_mark_push(tp, skb);
835 __tcp_push_pending_frames(sk, tp, mss_now, TCP_NAGLE_PUSH); 836 __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_PUSH);
836 } else if (skb == sk->sk_send_head) 837 } else if (skb == tcp_send_head(sk))
837 tcp_push_one(sk, mss_now); 838 tcp_push_one(sk, mss_now);
838 continue; 839 continue;
839 840
@@ -841,7 +842,7 @@ wait_for_sndbuf:
841 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 842 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
842wait_for_memory: 843wait_for_memory:
843 if (copied) 844 if (copied)
844 tcp_push(sk, tp, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH); 845 tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
845 846
846 if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) 847 if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
847 goto do_error; 848 goto do_error;
@@ -853,16 +854,18 @@ wait_for_memory:
853 854
854out: 855out:
855 if (copied) 856 if (copied)
856 tcp_push(sk, tp, flags, mss_now, tp->nonagle); 857 tcp_push(sk, flags, mss_now, tp->nonagle);
857 TCP_CHECK_TIMER(sk); 858 TCP_CHECK_TIMER(sk);
858 release_sock(sk); 859 release_sock(sk);
859 return copied; 860 return copied;
860 861
861do_fault: 862do_fault:
862 if (!skb->len) { 863 if (!skb->len) {
863 if (sk->sk_send_head == skb) 864 tcp_unlink_write_queue(skb, sk);
864 sk->sk_send_head = NULL; 865 /* It is the one place in all of TCP, except connection
865 __skb_unlink(skb, &sk->sk_write_queue); 866 * reset, where we can be unlinking the send_head.
867 */
868 tcp_check_send_head(sk, skb);
866 sk_stream_free_skb(sk, skb); 869 sk_stream_free_skb(sk, skb);
867 } 870 }
868 871
@@ -1016,9 +1019,9 @@ static inline struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
1016 1019
1017 skb_queue_walk(&sk->sk_receive_queue, skb) { 1020 skb_queue_walk(&sk->sk_receive_queue, skb) {
1018 offset = seq - TCP_SKB_CB(skb)->seq; 1021 offset = seq - TCP_SKB_CB(skb)->seq;
1019 if (skb->h.th->syn) 1022 if (tcp_hdr(skb)->syn)
1020 offset--; 1023 offset--;
1021 if (offset < skb->len || skb->h.th->fin) { 1024 if (offset < skb->len || tcp_hdr(skb)->fin) {
1022 *off = offset; 1025 *off = offset;
1023 return skb; 1026 return skb;
1024 } 1027 }
@@ -1070,7 +1073,7 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
1070 if (offset != skb->len) 1073 if (offset != skb->len)
1071 break; 1074 break;
1072 } 1075 }
1073 if (skb->h.th->fin) { 1076 if (tcp_hdr(skb)->fin) {
1074 sk_eat_skb(sk, skb, 0); 1077 sk_eat_skb(sk, skb, 0);
1075 ++seq; 1078 ++seq;
1076 break; 1079 break;
@@ -1174,11 +1177,11 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1174 break; 1177 break;
1175 } 1178 }
1176 offset = *seq - TCP_SKB_CB(skb)->seq; 1179 offset = *seq - TCP_SKB_CB(skb)->seq;
1177 if (skb->h.th->syn) 1180 if (tcp_hdr(skb)->syn)
1178 offset--; 1181 offset--;
1179 if (offset < skb->len) 1182 if (offset < skb->len)
1180 goto found_ok_skb; 1183 goto found_ok_skb;
1181 if (skb->h.th->fin) 1184 if (tcp_hdr(skb)->fin)
1182 goto found_fin_ok; 1185 goto found_fin_ok;
1183 BUG_TRAP(flags & MSG_PEEK); 1186 BUG_TRAP(flags & MSG_PEEK);
1184 skb = skb->next; 1187 skb = skb->next;
@@ -1389,12 +1392,12 @@ do_prequeue:
1389skip_copy: 1392skip_copy:
1390 if (tp->urg_data && after(tp->copied_seq, tp->urg_seq)) { 1393 if (tp->urg_data && after(tp->copied_seq, tp->urg_seq)) {
1391 tp->urg_data = 0; 1394 tp->urg_data = 0;
1392 tcp_fast_path_check(sk, tp); 1395 tcp_fast_path_check(sk);
1393 } 1396 }
1394 if (used + offset < skb->len) 1397 if (used + offset < skb->len)
1395 continue; 1398 continue;
1396 1399
1397 if (skb->h.th->fin) 1400 if (tcp_hdr(skb)->fin)
1398 goto found_fin_ok; 1401 goto found_fin_ok;
1399 if (!(flags & MSG_PEEK)) { 1402 if (!(flags & MSG_PEEK)) {
1400 sk_eat_skb(sk, skb, copied_early); 1403 sk_eat_skb(sk, skb, copied_early);
@@ -1563,7 +1566,7 @@ void tcp_close(struct sock *sk, long timeout)
1563 */ 1566 */
1564 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) { 1567 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
1565 u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq - 1568 u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq -
1566 skb->h.th->fin; 1569 tcp_hdr(skb)->fin;
1567 data_was_unread += len; 1570 data_was_unread += len;
1568 __kfree_skb(skb); 1571 __kfree_skb(skb);
1569 } 1572 }
@@ -1732,7 +1735,7 @@ int tcp_disconnect(struct sock *sk, int flags)
1732 1735
1733 tcp_clear_xmit_timers(sk); 1736 tcp_clear_xmit_timers(sk);
1734 __skb_queue_purge(&sk->sk_receive_queue); 1737 __skb_queue_purge(&sk->sk_receive_queue);
1735 sk_stream_writequeue_purge(sk); 1738 tcp_write_queue_purge(sk);
1736 __skb_queue_purge(&tp->out_of_order_queue); 1739 __skb_queue_purge(&tp->out_of_order_queue);
1737#ifdef CONFIG_NET_DMA 1740#ifdef CONFIG_NET_DMA
1738 __skb_queue_purge(&sk->sk_async_wait_queue); 1741 __skb_queue_purge(&sk->sk_async_wait_queue);
@@ -1758,7 +1761,7 @@ int tcp_disconnect(struct sock *sk, int flags)
1758 tcp_set_ca_state(sk, TCP_CA_Open); 1761 tcp_set_ca_state(sk, TCP_CA_Open);
1759 tcp_clear_retrans(tp); 1762 tcp_clear_retrans(tp);
1760 inet_csk_delack_init(sk); 1763 inet_csk_delack_init(sk);
1761 sk->sk_send_head = NULL; 1764 tcp_init_send_head(sk);
1762 tp->rx_opt.saw_tstamp = 0; 1765 tp->rx_opt.saw_tstamp = 0;
1763 tcp_sack_reset(&tp->rx_opt); 1766 tcp_sack_reset(&tp->rx_opt);
1764 __sk_dst_reset(sk); 1767 __sk_dst_reset(sk);
@@ -1830,7 +1833,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
1830 * for currently queued segments. 1833 * for currently queued segments.
1831 */ 1834 */
1832 tp->nonagle |= TCP_NAGLE_OFF|TCP_NAGLE_PUSH; 1835 tp->nonagle |= TCP_NAGLE_OFF|TCP_NAGLE_PUSH;
1833 tcp_push_pending_frames(sk, tp); 1836 tcp_push_pending_frames(sk);
1834 } else { 1837 } else {
1835 tp->nonagle &= ~TCP_NAGLE_OFF; 1838 tp->nonagle &= ~TCP_NAGLE_OFF;
1836 } 1839 }
@@ -1854,7 +1857,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
1854 tp->nonagle &= ~TCP_NAGLE_CORK; 1857 tp->nonagle &= ~TCP_NAGLE_CORK;
1855 if (tp->nonagle&TCP_NAGLE_OFF) 1858 if (tp->nonagle&TCP_NAGLE_OFF)
1856 tp->nonagle |= TCP_NAGLE_PUSH; 1859 tp->nonagle |= TCP_NAGLE_PUSH;
1857 tcp_push_pending_frames(sk, tp); 1860 tcp_push_pending_frames(sk);
1858 } 1861 }
1859 break; 1862 break;
1860 1863
@@ -1954,7 +1957,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
1954 default: 1957 default:
1955 err = -ENOPROTOOPT; 1958 err = -ENOPROTOOPT;
1956 break; 1959 break;
1957 }; 1960 }
1961
1958 release_sock(sk); 1962 release_sock(sk);
1959 return err; 1963 return err;
1960} 1964}
@@ -2124,7 +2128,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
2124 return 0; 2128 return 0;
2125 default: 2129 default:
2126 return -ENOPROTOOPT; 2130 return -ENOPROTOOPT;
2127 }; 2131 }
2128 2132
2129 if (put_user(len, optlen)) 2133 if (put_user(len, optlen))
2130 return -EFAULT; 2134 return -EFAULT;
@@ -2170,7 +2174,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features)
2170 if (!pskb_may_pull(skb, sizeof(*th))) 2174 if (!pskb_may_pull(skb, sizeof(*th)))
2171 goto out; 2175 goto out;
2172 2176
2173 th = skb->h.th; 2177 th = tcp_hdr(skb);
2174 thlen = th->doff * 4; 2178 thlen = th->doff * 4;
2175 if (thlen < sizeof(*th)) 2179 if (thlen < sizeof(*th))
2176 goto out; 2180 goto out;
@@ -2210,7 +2214,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features)
2210 delta = htonl(oldlen + (thlen + len)); 2214 delta = htonl(oldlen + (thlen + len));
2211 2215
2212 skb = segs; 2216 skb = segs;
2213 th = skb->h.th; 2217 th = tcp_hdr(skb);
2214 seq = ntohl(th->seq); 2218 seq = ntohl(th->seq);
2215 2219
2216 do { 2220 do {
@@ -2219,23 +2223,25 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features)
2219 th->check = ~csum_fold((__force __wsum)((__force u32)th->check + 2223 th->check = ~csum_fold((__force __wsum)((__force u32)th->check +
2220 (__force u32)delta)); 2224 (__force u32)delta));
2221 if (skb->ip_summed != CHECKSUM_PARTIAL) 2225 if (skb->ip_summed != CHECKSUM_PARTIAL)
2222 th->check = csum_fold(csum_partial(skb->h.raw, thlen, 2226 th->check =
2223 skb->csum)); 2227 csum_fold(csum_partial(skb_transport_header(skb),
2228 thlen, skb->csum));
2224 2229
2225 seq += len; 2230 seq += len;
2226 skb = skb->next; 2231 skb = skb->next;
2227 th = skb->h.th; 2232 th = tcp_hdr(skb);
2228 2233
2229 th->seq = htonl(seq); 2234 th->seq = htonl(seq);
2230 th->cwr = 0; 2235 th->cwr = 0;
2231 } while (skb->next); 2236 } while (skb->next);
2232 2237
2233 delta = htonl(oldlen + (skb->tail - skb->h.raw) + skb->data_len); 2238 delta = htonl(oldlen + (skb->tail - skb->transport_header) +
2239 skb->data_len);
2234 th->check = ~csum_fold((__force __wsum)((__force u32)th->check + 2240 th->check = ~csum_fold((__force __wsum)((__force u32)th->check +
2235 (__force u32)delta)); 2241 (__force u32)delta));
2236 if (skb->ip_summed != CHECKSUM_PARTIAL) 2242 if (skb->ip_summed != CHECKSUM_PARTIAL)
2237 th->check = csum_fold(csum_partial(skb->h.raw, thlen, 2243 th->check = csum_fold(csum_partial(skb_transport_header(skb),
2238 skb->csum)); 2244 thlen, skb->csum));
2239 2245
2240out: 2246out:
2241 return segs; 2247 return segs;
@@ -2372,6 +2378,23 @@ void __tcp_put_md5sig_pool(void)
2372EXPORT_SYMBOL(__tcp_put_md5sig_pool); 2378EXPORT_SYMBOL(__tcp_put_md5sig_pool);
2373#endif 2379#endif
2374 2380
2381void tcp_done(struct sock *sk)
2382{
2383 if(sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV)
2384 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
2385
2386 tcp_set_state(sk, TCP_CLOSE);
2387 tcp_clear_xmit_timers(sk);
2388
2389 sk->sk_shutdown = SHUTDOWN_MASK;
2390
2391 if (!sock_flag(sk, SOCK_DEAD))
2392 sk->sk_state_change(sk);
2393 else
2394 inet_csk_destroy_sock(sk);
2395}
2396EXPORT_SYMBOL_GPL(tcp_done);
2397
2375extern void __skb_cb_too_small_for_tcp(int, int); 2398extern void __skb_cb_too_small_for_tcp(int, int);
2376extern struct tcp_congestion_ops tcp_reno; 2399extern struct tcp_congestion_ops tcp_reno;
2377 2400
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index 5730333cd0ac..281c9f913257 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -206,7 +206,7 @@ static void bictcp_state(struct sock *sk, u8 new_state)
206/* Track delayed acknowledgment ratio using sliding window 206/* Track delayed acknowledgment ratio using sliding window
207 * ratio = (15*ratio + sample) / 16 207 * ratio = (15*ratio + sample) / 16
208 */ 208 */
209static void bictcp_acked(struct sock *sk, u32 cnt) 209static void bictcp_acked(struct sock *sk, u32 cnt, ktime_t last)
210{ 210{
211 const struct inet_connection_sock *icsk = inet_csk(sk); 211 const struct inet_connection_sock *icsk = inet_csk(sk);
212 212
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 34ae3f13483a..86b26539e54b 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -12,6 +12,8 @@
12#include <linux/list.h> 12#include <linux/list.h>
13#include <net/tcp.h> 13#include <net/tcp.h>
14 14
15int sysctl_tcp_max_ssthresh = 0;
16
15static DEFINE_SPINLOCK(tcp_cong_list_lock); 17static DEFINE_SPINLOCK(tcp_cong_list_lock);
16static LIST_HEAD(tcp_cong_list); 18static LIST_HEAD(tcp_cong_list);
17 19
@@ -124,7 +126,7 @@ int tcp_set_default_congestion_control(const char *name)
124#endif 126#endif
125 127
126 if (ca) { 128 if (ca) {
127 ca->non_restricted = 1; /* default is always allowed */ 129 ca->flags |= TCP_CONG_NON_RESTRICTED; /* default is always allowed */
128 list_move(&ca->list, &tcp_cong_list); 130 list_move(&ca->list, &tcp_cong_list);
129 ret = 0; 131 ret = 0;
130 } 132 }
@@ -179,7 +181,7 @@ void tcp_get_allowed_congestion_control(char *buf, size_t maxlen)
179 *buf = '\0'; 181 *buf = '\0';
180 rcu_read_lock(); 182 rcu_read_lock();
181 list_for_each_entry_rcu(ca, &tcp_cong_list, list) { 183 list_for_each_entry_rcu(ca, &tcp_cong_list, list) {
182 if (!ca->non_restricted) 184 if (!(ca->flags & TCP_CONG_NON_RESTRICTED))
183 continue; 185 continue;
184 offs += snprintf(buf + offs, maxlen - offs, 186 offs += snprintf(buf + offs, maxlen - offs,
185 "%s%s", 187 "%s%s",
@@ -210,16 +212,16 @@ int tcp_set_allowed_congestion_control(char *val)
210 } 212 }
211 } 213 }
212 214
213 /* pass 2 clear */ 215 /* pass 2 clear old values */
214 list_for_each_entry_rcu(ca, &tcp_cong_list, list) 216 list_for_each_entry_rcu(ca, &tcp_cong_list, list)
215 ca->non_restricted = 0; 217 ca->flags &= ~TCP_CONG_NON_RESTRICTED;
216 218
217 /* pass 3 mark as allowed */ 219 /* pass 3 mark as allowed */
218 while ((name = strsep(&val, " ")) && *name) { 220 while ((name = strsep(&val, " ")) && *name) {
219 ca = tcp_ca_find(name); 221 ca = tcp_ca_find(name);
220 WARN_ON(!ca); 222 WARN_ON(!ca);
221 if (ca) 223 if (ca)
222 ca->non_restricted = 1; 224 ca->flags |= TCP_CONG_NON_RESTRICTED;
223 } 225 }
224out: 226out:
225 spin_unlock(&tcp_cong_list_lock); 227 spin_unlock(&tcp_cong_list_lock);
@@ -254,7 +256,7 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
254 if (!ca) 256 if (!ca)
255 err = -ENOENT; 257 err = -ENOENT;
256 258
257 else if (!(ca->non_restricted || capable(CAP_NET_ADMIN))) 259 else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || capable(CAP_NET_ADMIN)))
258 err = -EPERM; 260 err = -EPERM;
259 261
260 else if (!try_module_get(ca->owner)) 262 else if (!try_module_get(ca->owner))
@@ -274,10 +276,13 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
274 276
275 277
276/* 278/*
277 * Linear increase during slow start 279 * Slow start (exponential increase) with
280 * RFC3742 Limited Slow Start (fast linear increase) support.
278 */ 281 */
279void tcp_slow_start(struct tcp_sock *tp) 282void tcp_slow_start(struct tcp_sock *tp)
280{ 283{
284 int cnt = 0;
285
281 if (sysctl_tcp_abc) { 286 if (sysctl_tcp_abc) {
282 /* RFC3465: Slow Start 287 /* RFC3465: Slow Start
283 * TCP sender SHOULD increase cwnd by the number of 288 * TCP sender SHOULD increase cwnd by the number of
@@ -286,17 +291,25 @@ void tcp_slow_start(struct tcp_sock *tp)
286 */ 291 */
287 if (tp->bytes_acked < tp->mss_cache) 292 if (tp->bytes_acked < tp->mss_cache)
288 return; 293 return;
289
290 /* We MAY increase by 2 if discovered delayed ack */
291 if (sysctl_tcp_abc > 1 && tp->bytes_acked >= 2*tp->mss_cache) {
292 if (tp->snd_cwnd < tp->snd_cwnd_clamp)
293 tp->snd_cwnd++;
294 }
295 } 294 }
295
296 if (sysctl_tcp_max_ssthresh > 0 &&
297 tp->snd_cwnd > sysctl_tcp_max_ssthresh)
298 cnt += sysctl_tcp_max_ssthresh>>1;
299 else
300 cnt += tp->snd_cwnd;
301
302 /* RFC3465: We MAY increase by 2 if discovered delayed ack */
303 if (sysctl_tcp_abc > 1 && tp->bytes_acked >= 2*tp->mss_cache)
304 cnt <<= 1;
296 tp->bytes_acked = 0; 305 tp->bytes_acked = 0;
297 306
298 if (tp->snd_cwnd < tp->snd_cwnd_clamp) 307 tp->snd_cwnd_cnt += cnt;
299 tp->snd_cwnd++; 308 while (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
309 tp->snd_cwnd_cnt -= tp->snd_cwnd;
310 if (tp->snd_cwnd < tp->snd_cwnd_clamp)
311 tp->snd_cwnd++;
312 }
300} 313}
301EXPORT_SYMBOL_GPL(tcp_slow_start); 314EXPORT_SYMBOL_GPL(tcp_slow_start);
302 315
@@ -358,8 +371,8 @@ u32 tcp_reno_min_cwnd(const struct sock *sk)
358EXPORT_SYMBOL_GPL(tcp_reno_min_cwnd); 371EXPORT_SYMBOL_GPL(tcp_reno_min_cwnd);
359 372
360struct tcp_congestion_ops tcp_reno = { 373struct tcp_congestion_ops tcp_reno = {
374 .flags = TCP_CONG_NON_RESTRICTED,
361 .name = "reno", 375 .name = "reno",
362 .non_restricted = 1,
363 .owner = THIS_MODULE, 376 .owner = THIS_MODULE,
364 .ssthresh = tcp_reno_ssthresh, 377 .ssthresh = tcp_reno_ssthresh,
365 .cong_avoid = tcp_reno_cong_avoid, 378 .cong_avoid = tcp_reno_cong_avoid,
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 9a582fb4ef9f..14224487b16b 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * TCP CUBIC: Binary Increase Congestion control for TCP v2.0 2 * TCP CUBIC: Binary Increase Congestion control for TCP v2.1
3 * 3 *
4 * This is from the implementation of CUBIC TCP in 4 * This is from the implementation of CUBIC TCP in
5 * Injong Rhee, Lisong Xu. 5 * Injong Rhee, Lisong Xu.
@@ -51,8 +51,6 @@ MODULE_PARM_DESC(bic_scale, "scale (scaled by 1024) value for bic function (bic_
51module_param(tcp_friendliness, int, 0644); 51module_param(tcp_friendliness, int, 0644);
52MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness"); 52MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness");
53 53
54#include <asm/div64.h>
55
56/* BIC TCP Parameters */ 54/* BIC TCP Parameters */
57struct bictcp { 55struct bictcp {
58 u32 cnt; /* increase cwnd by 1 after ACKs */ 56 u32 cnt; /* increase cwnd by 1 after ACKs */
@@ -93,50 +91,51 @@ static void bictcp_init(struct sock *sk)
93 tcp_sk(sk)->snd_ssthresh = initial_ssthresh; 91 tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
94} 92}
95 93
96/* 64bit divisor, dividend and result. dynamic precision */ 94/* calculate the cubic root of x using a table lookup followed by one
97static inline u_int64_t div64_64(u_int64_t dividend, u_int64_t divisor) 95 * Newton-Raphson iteration.
98{ 96 * Avg err ~= 0.195%
99 u_int32_t d = divisor;
100
101 if (divisor > 0xffffffffULL) {
102 unsigned int shift = fls(divisor >> 32);
103
104 d = divisor >> shift;
105 dividend >>= shift;
106 }
107
108 /* avoid 64 bit division if possible */
109 if (dividend >> 32)
110 do_div(dividend, d);
111 else
112 dividend = (uint32_t) dividend / d;
113
114 return dividend;
115}
116
117/*
118 * calculate the cubic root of x using Newton-Raphson
119 */ 97 */
120static u32 cubic_root(u64 a) 98static u32 cubic_root(u64 a)
121{ 99{
122 u32 x, x1; 100 u32 x, b, shift;
123 101 /*
124 /* Initial estimate is based on: 102 * cbrt(x) MSB values for x MSB values in [0..63].
125 * cbrt(x) = exp(log(x) / 3) 103 * Precomputed then refined by hand - Willy Tarreau
104 *
105 * For x in [0..63],
106 * v = cbrt(x << 18) - 1
107 * cbrt(x) = (v[x] + 10) >> 6
126 */ 108 */
127 x = 1u << (fls64(a)/3); 109 static const u8 v[] = {
110 /* 0x00 */ 0, 54, 54, 54, 118, 118, 118, 118,
111 /* 0x08 */ 123, 129, 134, 138, 143, 147, 151, 156,
112 /* 0x10 */ 157, 161, 164, 168, 170, 173, 176, 179,
113 /* 0x18 */ 181, 185, 187, 190, 192, 194, 197, 199,
114 /* 0x20 */ 200, 202, 204, 206, 209, 211, 213, 215,
115 /* 0x28 */ 217, 219, 221, 222, 224, 225, 227, 229,
116 /* 0x30 */ 231, 232, 234, 236, 237, 239, 240, 242,
117 /* 0x38 */ 244, 245, 246, 248, 250, 251, 252, 254,
118 };
119
120 b = fls64(a);
121 if (b < 7) {
122 /* a in [0..63] */
123 return ((u32)v[(u32)a] + 35) >> 6;
124 }
125
126 b = ((b * 84) >> 8) - 1;
127 shift = (a >> (b * 3));
128
129 x = ((u32)(((u32)v[shift] + 10) << b)) >> 6;
128 130
129 /* 131 /*
130 * Iteration based on: 132 * Newton-Raphson iteration
131 * 2 133 * 2
132 * x = ( 2 * x + a / x ) / 3 134 * x = ( 2 * x + a / x ) / 3
133 * k+1 k k 135 * k+1 k k
134 */ 136 */
135 do { 137 x = (2 * x + (u32)div64_64(a, (u64)x * (u64)(x - 1)));
136 x1 = x; 138 x = ((x * 341) >> 10);
137 x = (2 * x + (uint32_t) div64_64(a, x*x)) / 3;
138 } while (abs(x1 - x) > 1);
139
140 return x; 139 return x;
141} 140}
142 141
@@ -215,7 +214,9 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
215 if (ca->delay_min > 0) { 214 if (ca->delay_min > 0) {
216 /* max increment = Smax * rtt / 0.1 */ 215 /* max increment = Smax * rtt / 0.1 */
217 min_cnt = (cwnd * HZ * 8)/(10 * max_increment * ca->delay_min); 216 min_cnt = (cwnd * HZ * 8)/(10 * max_increment * ca->delay_min);
218 if (ca->cnt < min_cnt) 217
218 /* use concave growth when the target is above the origin */
219 if (ca->cnt < min_cnt && t >= ca->bic_K)
219 ca->cnt = min_cnt; 220 ca->cnt = min_cnt;
220 } 221 }
221 222
@@ -333,7 +334,7 @@ static void bictcp_state(struct sock *sk, u8 new_state)
333/* Track delayed acknowledgment ratio using sliding window 334/* Track delayed acknowledgment ratio using sliding window
334 * ratio = (15*ratio + sample) / 16 335 * ratio = (15*ratio + sample) / 16
335 */ 336 */
336static void bictcp_acked(struct sock *sk, u32 cnt) 337static void bictcp_acked(struct sock *sk, u32 cnt, ktime_t last)
337{ 338{
338 const struct inet_connection_sock *icsk = inet_csk(sk); 339 const struct inet_connection_sock *icsk = inet_csk(sk);
339 340
@@ -401,4 +402,4 @@ module_exit(cubictcp_unregister);
401MODULE_AUTHOR("Sangtae Ha, Stephen Hemminger"); 402MODULE_AUTHOR("Sangtae Ha, Stephen Hemminger");
402MODULE_LICENSE("GPL"); 403MODULE_LICENSE("GPL");
403MODULE_DESCRIPTION("CUBIC TCP"); 404MODULE_DESCRIPTION("CUBIC TCP");
404MODULE_VERSION("2.0"); 405MODULE_VERSION("2.1");
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index 1020eb48d8d1..4ba4a7ae0a85 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -98,7 +98,7 @@ static inline void measure_rtt(struct sock *sk)
98 } 98 }
99} 99}
100 100
101static void measure_achieved_throughput(struct sock *sk, u32 pkts_acked) 101static void measure_achieved_throughput(struct sock *sk, u32 pkts_acked, ktime_t last)
102{ 102{
103 const struct inet_connection_sock *icsk = inet_csk(sk); 103 const struct inet_connection_sock *icsk = inet_csk(sk);
104 const struct tcp_sock *tp = tcp_sk(sk); 104 const struct tcp_sock *tp = tcp_sk(sk);
diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c
index 59e691d26f64..e5be35117223 100644
--- a/net/ipv4/tcp_hybla.c
+++ b/net/ipv4/tcp_hybla.c
@@ -144,7 +144,7 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
144 ca->snd_cwnd_cents += odd; 144 ca->snd_cwnd_cents += odd;
145 145
146 /* check when fractions goes >=128 and increase cwnd by 1. */ 146 /* check when fractions goes >=128 and increase cwnd by 1. */
147 while(ca->snd_cwnd_cents >= 128) { 147 while (ca->snd_cwnd_cents >= 128) {
148 tp->snd_cwnd++; 148 tp->snd_cwnd++;
149 ca->snd_cwnd_cents -= 128; 149 ca->snd_cwnd_cents -= 128;
150 tp->snd_cwnd_cnt = 0; 150 tp->snd_cwnd_cnt = 0;
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
new file mode 100644
index 000000000000..4adc47c55351
--- /dev/null
+++ b/net/ipv4/tcp_illinois.c
@@ -0,0 +1,356 @@
1/*
2 * TCP Illinois congestion control.
3 * Home page:
4 * http://www.ews.uiuc.edu/~shaoliu/tcpillinois/index.html
5 *
6 * The algorithm is described in:
7 * "TCP-Illinois: A Loss and Delay-Based Congestion Control Algorithm
8 * for High-Speed Networks"
9 * http://www.ews.uiuc.edu/~shaoliu/papersandslides/liubassri06perf.pdf
10 *
11 * Implemented from description in paper and ns-2 simulation.
12 * Copyright (C) 2007 Stephen Hemminger <shemminger@linux-foundation.org>
13 */
14
15#include <linux/module.h>
16#include <linux/skbuff.h>
17#include <linux/inet_diag.h>
18#include <asm/div64.h>
19#include <net/tcp.h>
20
21#define ALPHA_SHIFT 7
22#define ALPHA_SCALE (1u<<ALPHA_SHIFT)
23#define ALPHA_MIN ((3*ALPHA_SCALE)/10) /* ~0.3 */
24#define ALPHA_MAX (10*ALPHA_SCALE) /* 10.0 */
25#define ALPHA_BASE ALPHA_SCALE /* 1.0 */
26#define U32_MAX ((u32)~0U)
27#define RTT_MAX (U32_MAX / ALPHA_MAX) /* 3.3 secs */
28
29#define BETA_SHIFT 6
30#define BETA_SCALE (1u<<BETA_SHIFT)
31#define BETA_MIN (BETA_SCALE/8) /* 0.125 */
32#define BETA_MAX (BETA_SCALE/2) /* 0.5 */
33#define BETA_BASE BETA_MAX
34
35static int win_thresh __read_mostly = 15;
36module_param(win_thresh, int, 0);
37MODULE_PARM_DESC(win_thresh, "Window threshold for starting adaptive sizing");
38
39static int theta __read_mostly = 5;
40module_param(theta, int, 0);
41MODULE_PARM_DESC(theta, "# of fast RTT's before full growth");
42
43/* TCP Illinois Parameters */
44struct illinois {
45 u64 sum_rtt; /* sum of rtt's measured within last rtt */
46 u16 cnt_rtt; /* # of rtts measured within last rtt */
47 u32 base_rtt; /* min of all rtt in usec */
48 u32 max_rtt; /* max of all rtt in usec */
49 u32 end_seq; /* right edge of current RTT */
50 u32 alpha; /* Additive increase */
51 u32 beta; /* Muliplicative decrease */
52 u16 acked; /* # packets acked by current ACK */
53 u8 rtt_above; /* average rtt has gone above threshold */
54 u8 rtt_low; /* # of rtts measurements below threshold */
55};
56
57static void rtt_reset(struct sock *sk)
58{
59 struct tcp_sock *tp = tcp_sk(sk);
60 struct illinois *ca = inet_csk_ca(sk);
61
62 ca->end_seq = tp->snd_nxt;
63 ca->cnt_rtt = 0;
64 ca->sum_rtt = 0;
65
66 /* TODO: age max_rtt? */
67}
68
69static void tcp_illinois_init(struct sock *sk)
70{
71 struct illinois *ca = inet_csk_ca(sk);
72
73 ca->alpha = ALPHA_MAX;
74 ca->beta = BETA_BASE;
75 ca->base_rtt = 0x7fffffff;
76 ca->max_rtt = 0;
77
78 ca->acked = 0;
79 ca->rtt_low = 0;
80 ca->rtt_above = 0;
81
82 rtt_reset(sk);
83}
84
85/* Measure RTT for each ack. */
86static void tcp_illinois_acked(struct sock *sk, u32 pkts_acked, ktime_t last)
87{
88 struct illinois *ca = inet_csk_ca(sk);
89 u32 rtt;
90
91 ca->acked = pkts_acked;
92
93 rtt = ktime_to_us(net_timedelta(last));
94
95 /* ignore bogus values, this prevents wraparound in alpha math */
96 if (rtt > RTT_MAX)
97 rtt = RTT_MAX;
98
99 /* keep track of minimum RTT seen so far */
100 if (ca->base_rtt > rtt)
101 ca->base_rtt = rtt;
102
103 /* and max */
104 if (ca->max_rtt < rtt)
105 ca->max_rtt = rtt;
106
107 ++ca->cnt_rtt;
108 ca->sum_rtt += rtt;
109}
110
111/* Maximum queuing delay */
112static inline u32 max_delay(const struct illinois *ca)
113{
114 return ca->max_rtt - ca->base_rtt;
115}
116
117/* Average queuing delay */
118static inline u32 avg_delay(const struct illinois *ca)
119{
120 u64 t = ca->sum_rtt;
121
122 do_div(t, ca->cnt_rtt);
123 return t - ca->base_rtt;
124}
125
126/*
127 * Compute value of alpha used for additive increase.
128 * If small window then use 1.0, equivalent to Reno.
129 *
130 * For larger windows, adjust based on average delay.
131 * A. If average delay is at minimum (we are uncongested),
132 * then use large alpha (10.0) to increase faster.
133 * B. If average delay is at maximum (getting congested)
134 * then use small alpha (0.3)
135 *
136 * The result is a convex window growth curve.
137 */
138static u32 alpha(struct illinois *ca, u32 da, u32 dm)
139{
140 u32 d1 = dm / 100; /* Low threshold */
141
142 if (da <= d1) {
143 /* If never got out of low delay zone, then use max */
144 if (!ca->rtt_above)
145 return ALPHA_MAX;
146
147 /* Wait for 5 good RTT's before allowing alpha to go alpha max.
148 * This prevents one good RTT from causing sudden window increase.
149 */
150 if (++ca->rtt_low < theta)
151 return ca->alpha;
152
153 ca->rtt_low = 0;
154 ca->rtt_above = 0;
155 return ALPHA_MAX;
156 }
157
158 ca->rtt_above = 1;
159
160 /*
161 * Based on:
162 *
163 * (dm - d1) amin amax
164 * k1 = -------------------
165 * amax - amin
166 *
167 * (dm - d1) amin
168 * k2 = ---------------- - d1
169 * amax - amin
170 *
171 * k1
172 * alpha = ----------
173 * k2 + da
174 */
175
176 dm -= d1;
177 da -= d1;
178 return (dm * ALPHA_MAX) /
179 (dm + (da * (ALPHA_MAX - ALPHA_MIN)) / ALPHA_MIN);
180}
181
182/*
183 * Beta used for multiplicative decrease.
184 * For small window sizes returns same value as Reno (0.5)
185 *
186 * If delay is small (10% of max) then beta = 1/8
187 * If delay is up to 80% of max then beta = 1/2
188 * In between is a linear function
189 */
190static u32 beta(u32 da, u32 dm)
191{
192 u32 d2, d3;
193
194 d2 = dm / 10;
195 if (da <= d2)
196 return BETA_MIN;
197
198 d3 = (8 * dm) / 10;
199 if (da >= d3 || d3 <= d2)
200 return BETA_MAX;
201
202 /*
203 * Based on:
204 *
205 * bmin d3 - bmax d2
206 * k3 = -------------------
207 * d3 - d2
208 *
209 * bmax - bmin
210 * k4 = -------------
211 * d3 - d2
212 *
213 * b = k3 + k4 da
214 */
215 return (BETA_MIN * d3 - BETA_MAX * d2 + (BETA_MAX - BETA_MIN) * da)
216 / (d3 - d2);
217}
218
219/* Update alpha and beta values once per RTT */
220static void update_params(struct sock *sk)
221{
222 struct tcp_sock *tp = tcp_sk(sk);
223 struct illinois *ca = inet_csk_ca(sk);
224
225 if (tp->snd_cwnd < win_thresh) {
226 ca->alpha = ALPHA_BASE;
227 ca->beta = BETA_BASE;
228 } else if (ca->cnt_rtt > 0) {
229 u32 dm = max_delay(ca);
230 u32 da = avg_delay(ca);
231
232 ca->alpha = alpha(ca, da, dm);
233 ca->beta = beta(da, dm);
234 }
235
236 rtt_reset(sk);
237}
238
239/*
240 * In case of loss, reset to default values
241 */
242static void tcp_illinois_state(struct sock *sk, u8 new_state)
243{
244 struct illinois *ca = inet_csk_ca(sk);
245
246 if (new_state == TCP_CA_Loss) {
247 ca->alpha = ALPHA_BASE;
248 ca->beta = BETA_BASE;
249 ca->rtt_low = 0;
250 ca->rtt_above = 0;
251 rtt_reset(sk);
252 }
253}
254
255/*
256 * Increase window in response to successful acknowledgment.
257 */
258static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
259 u32 in_flight, int flag)
260{
261 struct tcp_sock *tp = tcp_sk(sk);
262 struct illinois *ca = inet_csk_ca(sk);
263
264 if (after(ack, ca->end_seq))
265 update_params(sk);
266
267 /* RFC2861 only increase cwnd if fully utilized */
268 if (!tcp_is_cwnd_limited(sk, in_flight))
269 return;
270
271 /* In slow start */
272 if (tp->snd_cwnd <= tp->snd_ssthresh)
273 tcp_slow_start(tp);
274
275 else {
276 u32 delta;
277
278 /* snd_cwnd_cnt is # of packets since last cwnd increment */
279 tp->snd_cwnd_cnt += ca->acked;
280 ca->acked = 1;
281
282 /* This is close approximation of:
283 * tp->snd_cwnd += alpha/tp->snd_cwnd
284 */
285 delta = (tp->snd_cwnd_cnt * ca->alpha) >> ALPHA_SHIFT;
286 if (delta >= tp->snd_cwnd) {
287 tp->snd_cwnd = min(tp->snd_cwnd + delta / tp->snd_cwnd,
288 (u32) tp->snd_cwnd_clamp);
289 tp->snd_cwnd_cnt = 0;
290 }
291 }
292}
293
294static u32 tcp_illinois_ssthresh(struct sock *sk)
295{
296 struct tcp_sock *tp = tcp_sk(sk);
297 struct illinois *ca = inet_csk_ca(sk);
298
299 /* Multiplicative decrease */
300 return max((tp->snd_cwnd * ca->beta) >> BETA_SHIFT, 2U);
301}
302
303
304/* Extract info for Tcp socket info provided via netlink. */
305static void tcp_illinois_info(struct sock *sk, u32 ext,
306 struct sk_buff *skb)
307{
308 const struct illinois *ca = inet_csk_ca(sk);
309
310 if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
311 struct tcpvegas_info info = {
312 .tcpv_enabled = 1,
313 .tcpv_rttcnt = ca->cnt_rtt,
314 .tcpv_minrtt = ca->base_rtt,
315 };
316 u64 t = ca->sum_rtt;
317
318 do_div(t, ca->cnt_rtt);
319 info.tcpv_rtt = t;
320
321 nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info);
322 }
323}
324
325static struct tcp_congestion_ops tcp_illinois = {
326 .flags = TCP_CONG_RTT_STAMP,
327 .init = tcp_illinois_init,
328 .ssthresh = tcp_illinois_ssthresh,
329 .min_cwnd = tcp_reno_min_cwnd,
330 .cong_avoid = tcp_illinois_cong_avoid,
331 .set_state = tcp_illinois_state,
332 .get_info = tcp_illinois_info,
333 .pkts_acked = tcp_illinois_acked,
334
335 .owner = THIS_MODULE,
336 .name = "illinois",
337};
338
339static int __init tcp_illinois_register(void)
340{
341 BUILD_BUG_ON(sizeof(struct illinois) > ICSK_CA_PRIV_SIZE);
342 return tcp_register_congestion_control(&tcp_illinois);
343}
344
345static void __exit tcp_illinois_unregister(void)
346{
347 tcp_unregister_congestion_control(&tcp_illinois);
348}
349
350module_init(tcp_illinois_register);
351module_exit(tcp_illinois_unregister);
352
353MODULE_AUTHOR("Stephen Hemminger, Shao Liu");
354MODULE_LICENSE("GPL");
355MODULE_DESCRIPTION("TCP Illinois");
356MODULE_VERSION("1.0");
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 1a14191687ac..051f0f815f17 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -86,6 +86,7 @@ int sysctl_tcp_stdurg __read_mostly;
86int sysctl_tcp_rfc1337 __read_mostly; 86int sysctl_tcp_rfc1337 __read_mostly;
87int sysctl_tcp_max_orphans __read_mostly = NR_FILE; 87int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
88int sysctl_tcp_frto __read_mostly; 88int sysctl_tcp_frto __read_mostly;
89int sysctl_tcp_frto_response __read_mostly;
89int sysctl_tcp_nometrics_save __read_mostly; 90int sysctl_tcp_nometrics_save __read_mostly;
90 91
91int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; 92int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
@@ -100,6 +101,7 @@ int sysctl_tcp_abc __read_mostly;
100#define FLAG_ECE 0x40 /* ECE in this ACK */ 101#define FLAG_ECE 0x40 /* ECE in this ACK */
101#define FLAG_DATA_LOST 0x80 /* SACK detected data lossage. */ 102#define FLAG_DATA_LOST 0x80 /* SACK detected data lossage. */
102#define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/ 103#define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/
104#define FLAG_ONLY_ORIG_SACKED 0x200 /* SACKs only non-rexmit sent before RTO */
103 105
104#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED) 106#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
105#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED) 107#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
@@ -110,6 +112,8 @@ int sysctl_tcp_abc __read_mostly;
110#define IsFack(tp) ((tp)->rx_opt.sack_ok & 2) 112#define IsFack(tp) ((tp)->rx_opt.sack_ok & 2)
111#define IsDSack(tp) ((tp)->rx_opt.sack_ok & 4) 113#define IsDSack(tp) ((tp)->rx_opt.sack_ok & 4)
112 114
115#define IsSackFrto() (sysctl_tcp_frto == 0x2)
116
113#define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH) 117#define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
114 118
115/* Adapt the MSS value used to make delayed ack decision to the 119/* Adapt the MSS value used to make delayed ack decision to the
@@ -136,7 +140,7 @@ static void tcp_measure_rcv_mss(struct sock *sk,
136 * 140 *
137 * "len" is invariant segment length, including TCP header. 141 * "len" is invariant segment length, including TCP header.
138 */ 142 */
139 len += skb->data - skb->h.raw; 143 len += skb->data - skb_transport_header(skb);
140 if (len >= TCP_MIN_RCVMSS + sizeof(struct tcphdr) || 144 if (len >= TCP_MIN_RCVMSS + sizeof(struct tcphdr) ||
141 /* If PSH is not set, packet should be 145 /* If PSH is not set, packet should be
142 * full sized, provided peer TCP is not badly broken. 146 * full sized, provided peer TCP is not badly broken.
@@ -144,7 +148,7 @@ static void tcp_measure_rcv_mss(struct sock *sk,
144 * to handle super-low mtu links fairly. 148 * to handle super-low mtu links fairly.
145 */ 149 */
146 (len >= TCP_MIN_MSS + sizeof(struct tcphdr) && 150 (len >= TCP_MIN_MSS + sizeof(struct tcphdr) &&
147 !(tcp_flag_word(skb->h.th)&TCP_REMNANT))) { 151 !(tcp_flag_word(tcp_hdr(skb)) & TCP_REMNANT))) {
148 /* Subtract also invariant (if peer is RFC compliant), 152 /* Subtract also invariant (if peer is RFC compliant),
149 * tcp header plus fixed timestamp option length. 153 * tcp header plus fixed timestamp option length.
150 * Resulting "len" is MSS free of SACK jitter. 154 * Resulting "len" is MSS free of SACK jitter.
@@ -231,9 +235,9 @@ static void tcp_fixup_sndbuf(struct sock *sk)
231 */ 235 */
232 236
233/* Slow part of check#2. */ 237/* Slow part of check#2. */
234static int __tcp_grow_window(const struct sock *sk, struct tcp_sock *tp, 238static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
235 const struct sk_buff *skb)
236{ 239{
240 struct tcp_sock *tp = tcp_sk(sk);
237 /* Optimize this! */ 241 /* Optimize this! */
238 int truesize = tcp_win_from_space(skb->truesize)/2; 242 int truesize = tcp_win_from_space(skb->truesize)/2;
239 int window = tcp_win_from_space(sysctl_tcp_rmem[2])/2; 243 int window = tcp_win_from_space(sysctl_tcp_rmem[2])/2;
@@ -248,9 +252,11 @@ static int __tcp_grow_window(const struct sock *sk, struct tcp_sock *tp,
248 return 0; 252 return 0;
249} 253}
250 254
251static void tcp_grow_window(struct sock *sk, struct tcp_sock *tp, 255static void tcp_grow_window(struct sock *sk,
252 struct sk_buff *skb) 256 struct sk_buff *skb)
253{ 257{
258 struct tcp_sock *tp = tcp_sk(sk);
259
254 /* Check #1 */ 260 /* Check #1 */
255 if (tp->rcv_ssthresh < tp->window_clamp && 261 if (tp->rcv_ssthresh < tp->window_clamp &&
256 (int)tp->rcv_ssthresh < tcp_space(sk) && 262 (int)tp->rcv_ssthresh < tcp_space(sk) &&
@@ -263,7 +269,7 @@ static void tcp_grow_window(struct sock *sk, struct tcp_sock *tp,
263 if (tcp_win_from_space(skb->truesize) <= skb->len) 269 if (tcp_win_from_space(skb->truesize) <= skb->len)
264 incr = 2*tp->advmss; 270 incr = 2*tp->advmss;
265 else 271 else
266 incr = __tcp_grow_window(sk, tp, skb); 272 incr = __tcp_grow_window(sk, skb);
267 273
268 if (incr) { 274 if (incr) {
269 tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr, tp->window_clamp); 275 tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr, tp->window_clamp);
@@ -326,8 +332,9 @@ static void tcp_init_buffer_space(struct sock *sk)
326} 332}
327 333
328/* 5. Recalculate window clamp after socket hit its memory bounds. */ 334/* 5. Recalculate window clamp after socket hit its memory bounds. */
329static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp) 335static void tcp_clamp_window(struct sock *sk)
330{ 336{
337 struct tcp_sock *tp = tcp_sk(sk);
331 struct inet_connection_sock *icsk = inet_csk(sk); 338 struct inet_connection_sock *icsk = inet_csk(sk);
332 339
333 icsk->icsk_ack.quick = 0; 340 icsk->icsk_ack.quick = 0;
@@ -499,8 +506,9 @@ new_measure:
499 * each ACK we send, he increments snd_cwnd and transmits more of his 506 * each ACK we send, he increments snd_cwnd and transmits more of his
500 * queue. -DaveM 507 * queue. -DaveM
501 */ 508 */
502static void tcp_event_data_recv(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb) 509static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
503{ 510{
511 struct tcp_sock *tp = tcp_sk(sk);
504 struct inet_connection_sock *icsk = inet_csk(sk); 512 struct inet_connection_sock *icsk = inet_csk(sk);
505 u32 now; 513 u32 now;
506 514
@@ -541,7 +549,7 @@ static void tcp_event_data_recv(struct sock *sk, struct tcp_sock *tp, struct sk_
541 TCP_ECN_check_ce(tp, skb); 549 TCP_ECN_check_ce(tp, skb);
542 550
543 if (skb->len >= 128) 551 if (skb->len >= 128)
544 tcp_grow_window(sk, tp, skb); 552 tcp_grow_window(sk, skb);
545} 553}
546 554
547/* Called to compute a smoothed rtt estimate. The data fed to this 555/* Called to compute a smoothed rtt estimate. The data fed to this
@@ -574,7 +582,7 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
574 * does not matter how to _calculate_ it. Seems, it was trap 582 * does not matter how to _calculate_ it. Seems, it was trap
575 * that VJ failed to avoid. 8) 583 * that VJ failed to avoid. 8)
576 */ 584 */
577 if(m == 0) 585 if (m == 0)
578 m = 1; 586 m = 1;
579 if (tp->srtt != 0) { 587 if (tp->srtt != 0) {
580 m -= (tp->srtt >> 3); /* m is now error in rtt est */ 588 m -= (tp->srtt >> 3); /* m is now error in rtt est */
@@ -759,15 +767,17 @@ __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst)
759} 767}
760 768
761/* Set slow start threshold and cwnd not falling to slow start */ 769/* Set slow start threshold and cwnd not falling to slow start */
762void tcp_enter_cwr(struct sock *sk) 770void tcp_enter_cwr(struct sock *sk, const int set_ssthresh)
763{ 771{
764 struct tcp_sock *tp = tcp_sk(sk); 772 struct tcp_sock *tp = tcp_sk(sk);
773 const struct inet_connection_sock *icsk = inet_csk(sk);
765 774
766 tp->prior_ssthresh = 0; 775 tp->prior_ssthresh = 0;
767 tp->bytes_acked = 0; 776 tp->bytes_acked = 0;
768 if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { 777 if (icsk->icsk_ca_state < TCP_CA_CWR) {
769 tp->undo_marker = 0; 778 tp->undo_marker = 0;
770 tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk); 779 if (set_ssthresh)
780 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
771 tp->snd_cwnd = min(tp->snd_cwnd, 781 tp->snd_cwnd = min(tp->snd_cwnd,
772 tcp_packets_in_flight(tp) + 1U); 782 tcp_packets_in_flight(tp) + 1U);
773 tp->snd_cwnd_cnt = 0; 783 tp->snd_cwnd_cnt = 0;
@@ -934,7 +944,8 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
934{ 944{
935 const struct inet_connection_sock *icsk = inet_csk(sk); 945 const struct inet_connection_sock *icsk = inet_csk(sk);
936 struct tcp_sock *tp = tcp_sk(sk); 946 struct tcp_sock *tp = tcp_sk(sk);
937 unsigned char *ptr = ack_skb->h.raw + TCP_SKB_CB(ack_skb)->sacked; 947 unsigned char *ptr = (skb_transport_header(ack_skb) +
948 TCP_SKB_CB(ack_skb)->sacked);
938 struct tcp_sack_block_wire *sp = (struct tcp_sack_block_wire *)(ptr+2); 949 struct tcp_sack_block_wire *sp = (struct tcp_sack_block_wire *)(ptr+2);
939 struct sk_buff *cached_skb; 950 struct sk_buff *cached_skb;
940 int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3; 951 int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3;
@@ -1038,7 +1049,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
1038 cached_skb = tp->fastpath_skb_hint; 1049 cached_skb = tp->fastpath_skb_hint;
1039 cached_fack_count = tp->fastpath_cnt_hint; 1050 cached_fack_count = tp->fastpath_cnt_hint;
1040 if (!cached_skb) { 1051 if (!cached_skb) {
1041 cached_skb = sk->sk_write_queue.next; 1052 cached_skb = tcp_write_queue_head(sk);
1042 cached_fack_count = 0; 1053 cached_fack_count = 0;
1043 } 1054 }
1044 1055
@@ -1055,10 +1066,13 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
1055 if (after(end_seq, tp->high_seq)) 1066 if (after(end_seq, tp->high_seq))
1056 flag |= FLAG_DATA_LOST; 1067 flag |= FLAG_DATA_LOST;
1057 1068
1058 sk_stream_for_retrans_queue_from(skb, sk) { 1069 tcp_for_write_queue_from(skb, sk) {
1059 int in_sack, pcount; 1070 int in_sack, pcount;
1060 u8 sacked; 1071 u8 sacked;
1061 1072
1073 if (skb == tcp_send_head(sk))
1074 break;
1075
1062 cached_skb = skb; 1076 cached_skb = skb;
1063 cached_fack_count = fack_count; 1077 cached_fack_count = fack_count;
1064 if (i == first_sack_index) { 1078 if (i == first_sack_index) {
@@ -1159,6 +1173,18 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
1159 /* clear lost hint */ 1173 /* clear lost hint */
1160 tp->retransmit_skb_hint = NULL; 1174 tp->retransmit_skb_hint = NULL;
1161 } 1175 }
1176 /* SACK enhanced F-RTO detection.
1177 * Set flag if and only if non-rexmitted
1178 * segments below frto_highmark are
1179 * SACKed (RFC4138; Appendix B).
1180 * Clearing correct due to in-order walk
1181 */
1182 if (after(end_seq, tp->frto_highmark)) {
1183 flag &= ~FLAG_ONLY_ORIG_SACKED;
1184 } else {
1185 if (!(sacked & TCPCB_RETRANS))
1186 flag |= FLAG_ONLY_ORIG_SACKED;
1187 }
1162 } 1188 }
1163 1189
1164 TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_ACKED; 1190 TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_ACKED;
@@ -1195,7 +1221,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
1195 if (lost_retrans && icsk->icsk_ca_state == TCP_CA_Recovery) { 1221 if (lost_retrans && icsk->icsk_ca_state == TCP_CA_Recovery) {
1196 struct sk_buff *skb; 1222 struct sk_buff *skb;
1197 1223
1198 sk_stream_for_retrans_queue(skb, sk) { 1224 tcp_for_write_queue(skb, sk) {
1225 if (skb == tcp_send_head(sk))
1226 break;
1199 if (after(TCP_SKB_CB(skb)->seq, lost_retrans)) 1227 if (after(TCP_SKB_CB(skb)->seq, lost_retrans))
1200 break; 1228 break;
1201 if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) 1229 if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
@@ -1224,7 +1252,8 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
1224 1252
1225 tp->left_out = tp->sacked_out + tp->lost_out; 1253 tp->left_out = tp->sacked_out + tp->lost_out;
1226 1254
1227 if ((reord < tp->fackets_out) && icsk->icsk_ca_state != TCP_CA_Loss) 1255 if ((reord < tp->fackets_out) && icsk->icsk_ca_state != TCP_CA_Loss &&
1256 (!tp->frto_highmark || after(tp->snd_una, tp->frto_highmark)))
1228 tcp_update_reordering(sk, ((tp->fackets_out + 1) - reord), 0); 1257 tcp_update_reordering(sk, ((tp->fackets_out + 1) - reord), 0);
1229 1258
1230#if FASTRETRANS_DEBUG > 0 1259#if FASTRETRANS_DEBUG > 0
@@ -1236,9 +1265,54 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
1236 return flag; 1265 return flag;
1237} 1266}
1238 1267
1239/* RTO occurred, but do not yet enter loss state. Instead, transmit two new 1268/* F-RTO can only be used if these conditions are satisfied:
1240 * segments to see from the next ACKs whether any data was really missing. 1269 * - there must be some unsent new data
1241 * If the RTO was spurious, new ACKs should arrive. 1270 * - the advertised window should allow sending it
1271 * - TCP has never retransmitted anything other than head (SACK enhanced
1272 * variant from Appendix B of RFC4138 is more robust here)
1273 */
1274int tcp_use_frto(struct sock *sk)
1275{
1276 const struct tcp_sock *tp = tcp_sk(sk);
1277 struct sk_buff *skb;
1278
1279 if (!sysctl_tcp_frto || !tcp_send_head(sk) ||
1280 after(TCP_SKB_CB(tcp_send_head(sk))->end_seq,
1281 tp->snd_una + tp->snd_wnd))
1282 return 0;
1283
1284 if (IsSackFrto())
1285 return 1;
1286
1287 /* Avoid expensive walking of rexmit queue if possible */
1288 if (tp->retrans_out > 1)
1289 return 0;
1290
1291 skb = tcp_write_queue_head(sk);
1292 skb = tcp_write_queue_next(sk, skb); /* Skips head */
1293 tcp_for_write_queue_from(skb, sk) {
1294 if (skb == tcp_send_head(sk))
1295 break;
1296 if (TCP_SKB_CB(skb)->sacked&TCPCB_RETRANS)
1297 return 0;
1298 /* Short-circuit when first non-SACKed skb has been checked */
1299 if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED))
1300 break;
1301 }
1302 return 1;
1303}
1304
1305/* RTO occurred, but do not yet enter Loss state. Instead, defer RTO
1306 * recovery a bit and use heuristics in tcp_process_frto() to detect if
1307 * the RTO was spurious. Only clear SACKED_RETRANS of the head here to
1308 * keep retrans_out counting accurate (with SACK F-RTO, other than head
1309 * may still have that bit set); TCPCB_LOST and remaining SACKED_RETRANS
1310 * bits are handled if the Loss state is really to be entered (in
1311 * tcp_enter_frto_loss).
1312 *
1313 * Do like tcp_enter_loss() would; when RTO expires the second time it
1314 * does:
1315 * "Reduce ssthresh if it has not yet been made inside this window."
1242 */ 1316 */
1243void tcp_enter_frto(struct sock *sk) 1317void tcp_enter_frto(struct sock *sk)
1244{ 1318{
@@ -1246,39 +1320,69 @@ void tcp_enter_frto(struct sock *sk)
1246 struct tcp_sock *tp = tcp_sk(sk); 1320 struct tcp_sock *tp = tcp_sk(sk);
1247 struct sk_buff *skb; 1321 struct sk_buff *skb;
1248 1322
1249 tp->frto_counter = 1; 1323 if ((!tp->frto_counter && icsk->icsk_ca_state <= TCP_CA_Disorder) ||
1250
1251 if (icsk->icsk_ca_state <= TCP_CA_Disorder ||
1252 tp->snd_una == tp->high_seq || 1324 tp->snd_una == tp->high_seq ||
1253 (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) { 1325 ((icsk->icsk_ca_state == TCP_CA_Loss || tp->frto_counter) &&
1326 !icsk->icsk_retransmits)) {
1254 tp->prior_ssthresh = tcp_current_ssthresh(sk); 1327 tp->prior_ssthresh = tcp_current_ssthresh(sk);
1255 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); 1328 /* Our state is too optimistic in ssthresh() call because cwnd
1329 * is not reduced until tcp_enter_frto_loss() when previous FRTO
1330 * recovery has not yet completed. Pattern would be this: RTO,
1331 * Cumulative ACK, RTO (2xRTO for the same segment does not end
1332 * up here twice).
1333 * RFC4138 should be more specific on what to do, even though
1334 * RTO is quite unlikely to occur after the first Cumulative ACK
1335 * due to back-off and complexity of triggering events ...
1336 */
1337 if (tp->frto_counter) {
1338 u32 stored_cwnd;
1339 stored_cwnd = tp->snd_cwnd;
1340 tp->snd_cwnd = 2;
1341 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
1342 tp->snd_cwnd = stored_cwnd;
1343 } else {
1344 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
1345 }
1346 /* ... in theory, cong.control module could do "any tricks" in
1347 * ssthresh(), which means that ca_state, lost bits and lost_out
1348 * counter would have to be faked before the call occurs. We
1349 * consider that too expensive, unlikely and hacky, so modules
1350 * using these in ssthresh() must deal these incompatibility
1351 * issues if they receives CA_EVENT_FRTO and frto_counter != 0
1352 */
1256 tcp_ca_event(sk, CA_EVENT_FRTO); 1353 tcp_ca_event(sk, CA_EVENT_FRTO);
1257 } 1354 }
1258 1355
1259 /* Have to clear retransmission markers here to keep the bookkeeping
1260 * in shape, even though we are not yet in Loss state.
1261 * If something was really lost, it is eventually caught up
1262 * in tcp_enter_frto_loss.
1263 */
1264 tp->retrans_out = 0;
1265 tp->undo_marker = tp->snd_una; 1356 tp->undo_marker = tp->snd_una;
1266 tp->undo_retrans = 0; 1357 tp->undo_retrans = 0;
1267 1358
1268 sk_stream_for_retrans_queue(skb, sk) { 1359 skb = tcp_write_queue_head(sk);
1269 TCP_SKB_CB(skb)->sacked &= ~TCPCB_RETRANS; 1360 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
1361 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1362 tp->retrans_out -= tcp_skb_pcount(skb);
1270 } 1363 }
1271 tcp_sync_left_out(tp); 1364 tcp_sync_left_out(tp);
1272 1365
1273 tcp_set_ca_state(sk, TCP_CA_Open); 1366 /* Earlier loss recovery underway (see RFC4138; Appendix B).
1274 tp->frto_highmark = tp->snd_nxt; 1367 * The last condition is necessary at least in tp->frto_counter case.
1368 */
1369 if (IsSackFrto() && (tp->frto_counter ||
1370 ((1 << icsk->icsk_ca_state) & (TCPF_CA_Recovery|TCPF_CA_Loss))) &&
1371 after(tp->high_seq, tp->snd_una)) {
1372 tp->frto_highmark = tp->high_seq;
1373 } else {
1374 tp->frto_highmark = tp->snd_nxt;
1375 }
1376 tcp_set_ca_state(sk, TCP_CA_Disorder);
1377 tp->high_seq = tp->snd_nxt;
1378 tp->frto_counter = 1;
1275} 1379}
1276 1380
1277/* Enter Loss state after F-RTO was applied. Dupack arrived after RTO, 1381/* Enter Loss state after F-RTO was applied. Dupack arrived after RTO,
1278 * which indicates that we should follow the traditional RTO recovery, 1382 * which indicates that we should follow the traditional RTO recovery,
1279 * i.e. mark everything lost and do go-back-N retransmission. 1383 * i.e. mark everything lost and do go-back-N retransmission.
1280 */ 1384 */
1281static void tcp_enter_frto_loss(struct sock *sk) 1385static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
1282{ 1386{
1283 struct tcp_sock *tp = tcp_sk(sk); 1387 struct tcp_sock *tp = tcp_sk(sk);
1284 struct sk_buff *skb; 1388 struct sk_buff *skb;
@@ -1287,10 +1391,23 @@ static void tcp_enter_frto_loss(struct sock *sk)
1287 tp->sacked_out = 0; 1391 tp->sacked_out = 0;
1288 tp->lost_out = 0; 1392 tp->lost_out = 0;
1289 tp->fackets_out = 0; 1393 tp->fackets_out = 0;
1394 tp->retrans_out = 0;
1290 1395
1291 sk_stream_for_retrans_queue(skb, sk) { 1396 tcp_for_write_queue(skb, sk) {
1397 if (skb == tcp_send_head(sk))
1398 break;
1292 cnt += tcp_skb_pcount(skb); 1399 cnt += tcp_skb_pcount(skb);
1293 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; 1400 /*
1401 * Count the retransmission made on RTO correctly (only when
1402 * waiting for the first ACK and did not get it)...
1403 */
1404 if ((tp->frto_counter == 1) && !(flag&FLAG_DATA_ACKED)) {
1405 tp->retrans_out += tcp_skb_pcount(skb);
1406 /* ...enter this if branch just for the first segment */
1407 flag |= FLAG_DATA_ACKED;
1408 } else {
1409 TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
1410 }
1294 if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) { 1411 if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) {
1295 1412
1296 /* Do not mark those segments lost that were 1413 /* Do not mark those segments lost that were
@@ -1308,7 +1425,7 @@ static void tcp_enter_frto_loss(struct sock *sk)
1308 } 1425 }
1309 tcp_sync_left_out(tp); 1426 tcp_sync_left_out(tp);
1310 1427
1311 tp->snd_cwnd = tp->frto_counter + tcp_packets_in_flight(tp)+1; 1428 tp->snd_cwnd = tcp_packets_in_flight(tp) + allowed_segments;
1312 tp->snd_cwnd_cnt = 0; 1429 tp->snd_cwnd_cnt = 0;
1313 tp->snd_cwnd_stamp = tcp_time_stamp; 1430 tp->snd_cwnd_stamp = tcp_time_stamp;
1314 tp->undo_marker = 0; 1431 tp->undo_marker = 0;
@@ -1366,7 +1483,9 @@ void tcp_enter_loss(struct sock *sk, int how)
1366 if (!how) 1483 if (!how)
1367 tp->undo_marker = tp->snd_una; 1484 tp->undo_marker = tp->snd_una;
1368 1485
1369 sk_stream_for_retrans_queue(skb, sk) { 1486 tcp_for_write_queue(skb, sk) {
1487 if (skb == tcp_send_head(sk))
1488 break;
1370 cnt += tcp_skb_pcount(skb); 1489 cnt += tcp_skb_pcount(skb);
1371 if (TCP_SKB_CB(skb)->sacked&TCPCB_RETRANS) 1490 if (TCP_SKB_CB(skb)->sacked&TCPCB_RETRANS)
1372 tp->undo_marker = 0; 1491 tp->undo_marker = 0;
@@ -1401,14 +1520,14 @@ static int tcp_check_sack_reneging(struct sock *sk)
1401 * receiver _host_ is heavily congested (or buggy). 1520 * receiver _host_ is heavily congested (or buggy).
1402 * Do processing similar to RTO timeout. 1521 * Do processing similar to RTO timeout.
1403 */ 1522 */
1404 if ((skb = skb_peek(&sk->sk_write_queue)) != NULL && 1523 if ((skb = tcp_write_queue_head(sk)) != NULL &&
1405 (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) { 1524 (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
1406 struct inet_connection_sock *icsk = inet_csk(sk); 1525 struct inet_connection_sock *icsk = inet_csk(sk);
1407 NET_INC_STATS_BH(LINUX_MIB_TCPSACKRENEGING); 1526 NET_INC_STATS_BH(LINUX_MIB_TCPSACKRENEGING);
1408 1527
1409 tcp_enter_loss(sk, 1); 1528 tcp_enter_loss(sk, 1);
1410 icsk->icsk_retransmits++; 1529 icsk->icsk_retransmits++;
1411 tcp_retransmit_skb(sk, skb_peek(&sk->sk_write_queue)); 1530 tcp_retransmit_skb(sk, tcp_write_queue_head(sk));
1412 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 1531 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
1413 icsk->icsk_rto, TCP_RTO_MAX); 1532 icsk->icsk_rto, TCP_RTO_MAX);
1414 return 1; 1533 return 1;
@@ -1426,10 +1545,12 @@ static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb)
1426 return (tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto); 1545 return (tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto);
1427} 1546}
1428 1547
1429static inline int tcp_head_timedout(struct sock *sk, struct tcp_sock *tp) 1548static inline int tcp_head_timedout(struct sock *sk)
1430{ 1549{
1550 struct tcp_sock *tp = tcp_sk(sk);
1551
1431 return tp->packets_out && 1552 return tp->packets_out &&
1432 tcp_skb_timedout(sk, skb_peek(&sk->sk_write_queue)); 1553 tcp_skb_timedout(sk, tcp_write_queue_head(sk));
1433} 1554}
1434 1555
1435/* Linux NewReno/SACK/FACK/ECN state machine. 1556/* Linux NewReno/SACK/FACK/ECN state machine.
@@ -1525,10 +1646,15 @@ static inline int tcp_head_timedout(struct sock *sk, struct tcp_sock *tp)
1525 * Main question: may we further continue forward transmission 1646 * Main question: may we further continue forward transmission
1526 * with the same cwnd? 1647 * with the same cwnd?
1527 */ 1648 */
1528static int tcp_time_to_recover(struct sock *sk, struct tcp_sock *tp) 1649static int tcp_time_to_recover(struct sock *sk)
1529{ 1650{
1651 struct tcp_sock *tp = tcp_sk(sk);
1530 __u32 packets_out; 1652 __u32 packets_out;
1531 1653
1654 /* Do not perform any recovery during FRTO algorithm */
1655 if (tp->frto_counter)
1656 return 0;
1657
1532 /* Trick#1: The loss is proven. */ 1658 /* Trick#1: The loss is proven. */
1533 if (tp->lost_out) 1659 if (tp->lost_out)
1534 return 1; 1660 return 1;
@@ -1540,7 +1666,7 @@ static int tcp_time_to_recover(struct sock *sk, struct tcp_sock *tp)
1540 /* Trick#3 : when we use RFC2988 timer restart, fast 1666 /* Trick#3 : when we use RFC2988 timer restart, fast
1541 * retransmit can be triggered by timeout of queue head. 1667 * retransmit can be triggered by timeout of queue head.
1542 */ 1668 */
1543 if (tcp_head_timedout(sk, tp)) 1669 if (tcp_head_timedout(sk))
1544 return 1; 1670 return 1;
1545 1671
1546 /* Trick#4: It is still not OK... But will it be useful to delay 1672 /* Trick#4: It is still not OK... But will it be useful to delay
@@ -1549,7 +1675,7 @@ static int tcp_time_to_recover(struct sock *sk, struct tcp_sock *tp)
1549 packets_out = tp->packets_out; 1675 packets_out = tp->packets_out;
1550 if (packets_out <= tp->reordering && 1676 if (packets_out <= tp->reordering &&
1551 tp->sacked_out >= max_t(__u32, packets_out/2, sysctl_tcp_reordering) && 1677 tp->sacked_out >= max_t(__u32, packets_out/2, sysctl_tcp_reordering) &&
1552 !tcp_may_send_now(sk, tp)) { 1678 !tcp_may_send_now(sk)) {
1553 /* We have nothing to send. This connection is limited 1679 /* We have nothing to send. This connection is limited
1554 * either by receiver window or by application. 1680 * either by receiver window or by application.
1555 */ 1681 */
@@ -1589,8 +1715,10 @@ static void tcp_add_reno_sack(struct sock *sk)
1589 1715
1590/* Account for ACK, ACKing some data in Reno Recovery phase. */ 1716/* Account for ACK, ACKing some data in Reno Recovery phase. */
1591 1717
1592static void tcp_remove_reno_sacks(struct sock *sk, struct tcp_sock *tp, int acked) 1718static void tcp_remove_reno_sacks(struct sock *sk, int acked)
1593{ 1719{
1720 struct tcp_sock *tp = tcp_sk(sk);
1721
1594 if (acked > 0) { 1722 if (acked > 0) {
1595 /* One ACK acked hole. The rest eat duplicate ACKs. */ 1723 /* One ACK acked hole. The rest eat duplicate ACKs. */
1596 if (acked-1 >= tp->sacked_out) 1724 if (acked-1 >= tp->sacked_out)
@@ -1609,9 +1737,10 @@ static inline void tcp_reset_reno_sack(struct tcp_sock *tp)
1609} 1737}
1610 1738
1611/* Mark head of queue up as lost. */ 1739/* Mark head of queue up as lost. */
1612static void tcp_mark_head_lost(struct sock *sk, struct tcp_sock *tp, 1740static void tcp_mark_head_lost(struct sock *sk,
1613 int packets, u32 high_seq) 1741 int packets, u32 high_seq)
1614{ 1742{
1743 struct tcp_sock *tp = tcp_sk(sk);
1615 struct sk_buff *skb; 1744 struct sk_buff *skb;
1616 int cnt; 1745 int cnt;
1617 1746
@@ -1620,11 +1749,13 @@ static void tcp_mark_head_lost(struct sock *sk, struct tcp_sock *tp,
1620 skb = tp->lost_skb_hint; 1749 skb = tp->lost_skb_hint;
1621 cnt = tp->lost_cnt_hint; 1750 cnt = tp->lost_cnt_hint;
1622 } else { 1751 } else {
1623 skb = sk->sk_write_queue.next; 1752 skb = tcp_write_queue_head(sk);
1624 cnt = 0; 1753 cnt = 0;
1625 } 1754 }
1626 1755
1627 sk_stream_for_retrans_queue_from(skb, sk) { 1756 tcp_for_write_queue_from(skb, sk) {
1757 if (skb == tcp_send_head(sk))
1758 break;
1628 /* TODO: do this better */ 1759 /* TODO: do this better */
1629 /* this is not the most efficient way to do this... */ 1760 /* this is not the most efficient way to do this... */
1630 tp->lost_skb_hint = skb; 1761 tp->lost_skb_hint = skb;
@@ -1638,12 +1769,11 @@ static void tcp_mark_head_lost(struct sock *sk, struct tcp_sock *tp,
1638 1769
1639 /* clear xmit_retransmit_queue hints 1770 /* clear xmit_retransmit_queue hints
1640 * if this is beyond hint */ 1771 * if this is beyond hint */
1641 if(tp->retransmit_skb_hint != NULL && 1772 if (tp->retransmit_skb_hint != NULL &&
1642 before(TCP_SKB_CB(skb)->seq, 1773 before(TCP_SKB_CB(skb)->seq,
1643 TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) { 1774 TCP_SKB_CB(tp->retransmit_skb_hint)->seq))
1644
1645 tp->retransmit_skb_hint = NULL; 1775 tp->retransmit_skb_hint = NULL;
1646 } 1776
1647 } 1777 }
1648 } 1778 }
1649 tcp_sync_left_out(tp); 1779 tcp_sync_left_out(tp);
@@ -1651,15 +1781,17 @@ static void tcp_mark_head_lost(struct sock *sk, struct tcp_sock *tp,
1651 1781
1652/* Account newly detected lost packet(s) */ 1782/* Account newly detected lost packet(s) */
1653 1783
1654static void tcp_update_scoreboard(struct sock *sk, struct tcp_sock *tp) 1784static void tcp_update_scoreboard(struct sock *sk)
1655{ 1785{
1786 struct tcp_sock *tp = tcp_sk(sk);
1787
1656 if (IsFack(tp)) { 1788 if (IsFack(tp)) {
1657 int lost = tp->fackets_out - tp->reordering; 1789 int lost = tp->fackets_out - tp->reordering;
1658 if (lost <= 0) 1790 if (lost <= 0)
1659 lost = 1; 1791 lost = 1;
1660 tcp_mark_head_lost(sk, tp, lost, tp->high_seq); 1792 tcp_mark_head_lost(sk, lost, tp->high_seq);
1661 } else { 1793 } else {
1662 tcp_mark_head_lost(sk, tp, 1, tp->high_seq); 1794 tcp_mark_head_lost(sk, 1, tp->high_seq);
1663 } 1795 }
1664 1796
1665 /* New heuristics: it is possible only after we switched 1797 /* New heuristics: it is possible only after we switched
@@ -1667,13 +1799,15 @@ static void tcp_update_scoreboard(struct sock *sk, struct tcp_sock *tp)
1667 * Hence, we can detect timed out packets during fast 1799 * Hence, we can detect timed out packets during fast
1668 * retransmit without falling to slow start. 1800 * retransmit without falling to slow start.
1669 */ 1801 */
1670 if (!IsReno(tp) && tcp_head_timedout(sk, tp)) { 1802 if (!IsReno(tp) && tcp_head_timedout(sk)) {
1671 struct sk_buff *skb; 1803 struct sk_buff *skb;
1672 1804
1673 skb = tp->scoreboard_skb_hint ? tp->scoreboard_skb_hint 1805 skb = tp->scoreboard_skb_hint ? tp->scoreboard_skb_hint
1674 : sk->sk_write_queue.next; 1806 : tcp_write_queue_head(sk);
1675 1807
1676 sk_stream_for_retrans_queue_from(skb, sk) { 1808 tcp_for_write_queue_from(skb, sk) {
1809 if (skb == tcp_send_head(sk))
1810 break;
1677 if (!tcp_skb_timedout(sk, skb)) 1811 if (!tcp_skb_timedout(sk, skb))
1678 break; 1812 break;
1679 1813
@@ -1745,9 +1879,11 @@ static inline int tcp_packet_delayed(struct tcp_sock *tp)
1745/* Undo procedures. */ 1879/* Undo procedures. */
1746 1880
1747#if FASTRETRANS_DEBUG > 1 1881#if FASTRETRANS_DEBUG > 1
1748static void DBGUNDO(struct sock *sk, struct tcp_sock *tp, const char *msg) 1882static void DBGUNDO(struct sock *sk, const char *msg)
1749{ 1883{
1884 struct tcp_sock *tp = tcp_sk(sk);
1750 struct inet_sock *inet = inet_sk(sk); 1885 struct inet_sock *inet = inet_sk(sk);
1886
1751 printk(KERN_DEBUG "Undo %s %u.%u.%u.%u/%u c%u l%u ss%u/%u p%u\n", 1887 printk(KERN_DEBUG "Undo %s %u.%u.%u.%u/%u c%u l%u ss%u/%u p%u\n",
1752 msg, 1888 msg,
1753 NIPQUAD(inet->daddr), ntohs(inet->dport), 1889 NIPQUAD(inet->daddr), ntohs(inet->dport),
@@ -1793,13 +1929,15 @@ static inline int tcp_may_undo(struct tcp_sock *tp)
1793} 1929}
1794 1930
1795/* People celebrate: "We love our President!" */ 1931/* People celebrate: "We love our President!" */
1796static int tcp_try_undo_recovery(struct sock *sk, struct tcp_sock *tp) 1932static int tcp_try_undo_recovery(struct sock *sk)
1797{ 1933{
1934 struct tcp_sock *tp = tcp_sk(sk);
1935
1798 if (tcp_may_undo(tp)) { 1936 if (tcp_may_undo(tp)) {
1799 /* Happy end! We did not retransmit anything 1937 /* Happy end! We did not retransmit anything
1800 * or our original transmission succeeded. 1938 * or our original transmission succeeded.
1801 */ 1939 */
1802 DBGUNDO(sk, tp, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans"); 1940 DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans");
1803 tcp_undo_cwr(sk, 1); 1941 tcp_undo_cwr(sk, 1);
1804 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss) 1942 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
1805 NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO); 1943 NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO);
@@ -1819,10 +1957,12 @@ static int tcp_try_undo_recovery(struct sock *sk, struct tcp_sock *tp)
1819} 1957}
1820 1958
1821/* Try to undo cwnd reduction, because D-SACKs acked all retransmitted data */ 1959/* Try to undo cwnd reduction, because D-SACKs acked all retransmitted data */
1822static void tcp_try_undo_dsack(struct sock *sk, struct tcp_sock *tp) 1960static void tcp_try_undo_dsack(struct sock *sk)
1823{ 1961{
1962 struct tcp_sock *tp = tcp_sk(sk);
1963
1824 if (tp->undo_marker && !tp->undo_retrans) { 1964 if (tp->undo_marker && !tp->undo_retrans) {
1825 DBGUNDO(sk, tp, "D-SACK"); 1965 DBGUNDO(sk, "D-SACK");
1826 tcp_undo_cwr(sk, 1); 1966 tcp_undo_cwr(sk, 1);
1827 tp->undo_marker = 0; 1967 tp->undo_marker = 0;
1828 NET_INC_STATS_BH(LINUX_MIB_TCPDSACKUNDO); 1968 NET_INC_STATS_BH(LINUX_MIB_TCPDSACKUNDO);
@@ -1831,9 +1971,9 @@ static void tcp_try_undo_dsack(struct sock *sk, struct tcp_sock *tp)
1831 1971
1832/* Undo during fast recovery after partial ACK. */ 1972/* Undo during fast recovery after partial ACK. */
1833 1973
1834static int tcp_try_undo_partial(struct sock *sk, struct tcp_sock *tp, 1974static int tcp_try_undo_partial(struct sock *sk, int acked)
1835 int acked)
1836{ 1975{
1976 struct tcp_sock *tp = tcp_sk(sk);
1837 /* Partial ACK arrived. Force Hoe's retransmit. */ 1977 /* Partial ACK arrived. Force Hoe's retransmit. */
1838 int failed = IsReno(tp) || tp->fackets_out>tp->reordering; 1978 int failed = IsReno(tp) || tp->fackets_out>tp->reordering;
1839 1979
@@ -1846,7 +1986,7 @@ static int tcp_try_undo_partial(struct sock *sk, struct tcp_sock *tp,
1846 1986
1847 tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1); 1987 tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1);
1848 1988
1849 DBGUNDO(sk, tp, "Hoe"); 1989 DBGUNDO(sk, "Hoe");
1850 tcp_undo_cwr(sk, 0); 1990 tcp_undo_cwr(sk, 0);
1851 NET_INC_STATS_BH(LINUX_MIB_TCPPARTIALUNDO); 1991 NET_INC_STATS_BH(LINUX_MIB_TCPPARTIALUNDO);
1852 1992
@@ -1860,17 +2000,21 @@ static int tcp_try_undo_partial(struct sock *sk, struct tcp_sock *tp,
1860} 2000}
1861 2001
1862/* Undo during loss recovery after partial ACK. */ 2002/* Undo during loss recovery after partial ACK. */
1863static int tcp_try_undo_loss(struct sock *sk, struct tcp_sock *tp) 2003static int tcp_try_undo_loss(struct sock *sk)
1864{ 2004{
2005 struct tcp_sock *tp = tcp_sk(sk);
2006
1865 if (tcp_may_undo(tp)) { 2007 if (tcp_may_undo(tp)) {
1866 struct sk_buff *skb; 2008 struct sk_buff *skb;
1867 sk_stream_for_retrans_queue(skb, sk) { 2009 tcp_for_write_queue(skb, sk) {
2010 if (skb == tcp_send_head(sk))
2011 break;
1868 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; 2012 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
1869 } 2013 }
1870 2014
1871 clear_all_retrans_hints(tp); 2015 clear_all_retrans_hints(tp);
1872 2016
1873 DBGUNDO(sk, tp, "partial loss"); 2017 DBGUNDO(sk, "partial loss");
1874 tp->lost_out = 0; 2018 tp->lost_out = 0;
1875 tp->left_out = tp->sacked_out; 2019 tp->left_out = tp->sacked_out;
1876 tcp_undo_cwr(sk, 1); 2020 tcp_undo_cwr(sk, 1);
@@ -1892,15 +2036,17 @@ static inline void tcp_complete_cwr(struct sock *sk)
1892 tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); 2036 tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
1893} 2037}
1894 2038
1895static void tcp_try_to_open(struct sock *sk, struct tcp_sock *tp, int flag) 2039static void tcp_try_to_open(struct sock *sk, int flag)
1896{ 2040{
2041 struct tcp_sock *tp = tcp_sk(sk);
2042
1897 tp->left_out = tp->sacked_out; 2043 tp->left_out = tp->sacked_out;
1898 2044
1899 if (tp->retrans_out == 0) 2045 if (tp->retrans_out == 0)
1900 tp->retrans_stamp = 0; 2046 tp->retrans_stamp = 0;
1901 2047
1902 if (flag&FLAG_ECE) 2048 if (flag&FLAG_ECE)
1903 tcp_enter_cwr(sk); 2049 tcp_enter_cwr(sk, 1);
1904 2050
1905 if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) { 2051 if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
1906 int state = TCP_CA_Open; 2052 int state = TCP_CA_Open;
@@ -1987,7 +2133,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
1987 before(tp->snd_una, tp->high_seq) && 2133 before(tp->snd_una, tp->high_seq) &&
1988 icsk->icsk_ca_state != TCP_CA_Open && 2134 icsk->icsk_ca_state != TCP_CA_Open &&
1989 tp->fackets_out > tp->reordering) { 2135 tp->fackets_out > tp->reordering) {
1990 tcp_mark_head_lost(sk, tp, tp->fackets_out-tp->reordering, tp->high_seq); 2136 tcp_mark_head_lost(sk, tp->fackets_out-tp->reordering, tp->high_seq);
1991 NET_INC_STATS_BH(LINUX_MIB_TCPLOSS); 2137 NET_INC_STATS_BH(LINUX_MIB_TCPLOSS);
1992 } 2138 }
1993 2139
@@ -1997,14 +2143,13 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
1997 /* E. Check state exit conditions. State can be terminated 2143 /* E. Check state exit conditions. State can be terminated
1998 * when high_seq is ACKed. */ 2144 * when high_seq is ACKed. */
1999 if (icsk->icsk_ca_state == TCP_CA_Open) { 2145 if (icsk->icsk_ca_state == TCP_CA_Open) {
2000 if (!sysctl_tcp_frto) 2146 BUG_TRAP(tp->retrans_out == 0);
2001 BUG_TRAP(tp->retrans_out == 0);
2002 tp->retrans_stamp = 0; 2147 tp->retrans_stamp = 0;
2003 } else if (!before(tp->snd_una, tp->high_seq)) { 2148 } else if (!before(tp->snd_una, tp->high_seq)) {
2004 switch (icsk->icsk_ca_state) { 2149 switch (icsk->icsk_ca_state) {
2005 case TCP_CA_Loss: 2150 case TCP_CA_Loss:
2006 icsk->icsk_retransmits = 0; 2151 icsk->icsk_retransmits = 0;
2007 if (tcp_try_undo_recovery(sk, tp)) 2152 if (tcp_try_undo_recovery(sk))
2008 return; 2153 return;
2009 break; 2154 break;
2010 2155
@@ -2018,7 +2163,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
2018 break; 2163 break;
2019 2164
2020 case TCP_CA_Disorder: 2165 case TCP_CA_Disorder:
2021 tcp_try_undo_dsack(sk, tp); 2166 tcp_try_undo_dsack(sk);
2022 if (!tp->undo_marker || 2167 if (!tp->undo_marker ||
2023 /* For SACK case do not Open to allow to undo 2168 /* For SACK case do not Open to allow to undo
2024 * catching for all duplicate ACKs. */ 2169 * catching for all duplicate ACKs. */
@@ -2031,7 +2176,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
2031 case TCP_CA_Recovery: 2176 case TCP_CA_Recovery:
2032 if (IsReno(tp)) 2177 if (IsReno(tp))
2033 tcp_reset_reno_sack(tp); 2178 tcp_reset_reno_sack(tp);
2034 if (tcp_try_undo_recovery(sk, tp)) 2179 if (tcp_try_undo_recovery(sk))
2035 return; 2180 return;
2036 tcp_complete_cwr(sk); 2181 tcp_complete_cwr(sk);
2037 break; 2182 break;
@@ -2047,14 +2192,14 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
2047 } else { 2192 } else {
2048 int acked = prior_packets - tp->packets_out; 2193 int acked = prior_packets - tp->packets_out;
2049 if (IsReno(tp)) 2194 if (IsReno(tp))
2050 tcp_remove_reno_sacks(sk, tp, acked); 2195 tcp_remove_reno_sacks(sk, acked);
2051 is_dupack = tcp_try_undo_partial(sk, tp, acked); 2196 is_dupack = tcp_try_undo_partial(sk, acked);
2052 } 2197 }
2053 break; 2198 break;
2054 case TCP_CA_Loss: 2199 case TCP_CA_Loss:
2055 if (flag&FLAG_DATA_ACKED) 2200 if (flag&FLAG_DATA_ACKED)
2056 icsk->icsk_retransmits = 0; 2201 icsk->icsk_retransmits = 0;
2057 if (!tcp_try_undo_loss(sk, tp)) { 2202 if (!tcp_try_undo_loss(sk)) {
2058 tcp_moderate_cwnd(tp); 2203 tcp_moderate_cwnd(tp);
2059 tcp_xmit_retransmit_queue(sk); 2204 tcp_xmit_retransmit_queue(sk);
2060 return; 2205 return;
@@ -2071,10 +2216,10 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
2071 } 2216 }
2072 2217
2073 if (icsk->icsk_ca_state == TCP_CA_Disorder) 2218 if (icsk->icsk_ca_state == TCP_CA_Disorder)
2074 tcp_try_undo_dsack(sk, tp); 2219 tcp_try_undo_dsack(sk);
2075 2220
2076 if (!tcp_time_to_recover(sk, tp)) { 2221 if (!tcp_time_to_recover(sk)) {
2077 tcp_try_to_open(sk, tp, flag); 2222 tcp_try_to_open(sk, flag);
2078 return; 2223 return;
2079 } 2224 }
2080 2225
@@ -2113,8 +2258,8 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
2113 tcp_set_ca_state(sk, TCP_CA_Recovery); 2258 tcp_set_ca_state(sk, TCP_CA_Recovery);
2114 } 2259 }
2115 2260
2116 if (is_dupack || tcp_head_timedout(sk, tp)) 2261 if (is_dupack || tcp_head_timedout(sk))
2117 tcp_update_scoreboard(sk, tp); 2262 tcp_update_scoreboard(sk);
2118 tcp_cwnd_down(sk); 2263 tcp_cwnd_down(sk);
2119 tcp_xmit_retransmit_queue(sk); 2264 tcp_xmit_retransmit_queue(sk);
2120} 2265}
@@ -2190,8 +2335,10 @@ static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
2190 * RFC2988 recommends to restart timer to now+rto. 2335 * RFC2988 recommends to restart timer to now+rto.
2191 */ 2336 */
2192 2337
2193static void tcp_ack_packets_out(struct sock *sk, struct tcp_sock *tp) 2338static void tcp_ack_packets_out(struct sock *sk)
2194{ 2339{
2340 struct tcp_sock *tp = tcp_sk(sk);
2341
2195 if (!tp->packets_out) { 2342 if (!tp->packets_out) {
2196 inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); 2343 inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
2197 } else { 2344 } else {
@@ -2255,14 +2402,6 @@ static int tcp_tso_acked(struct sock *sk, struct sk_buff *skb,
2255 return acked; 2402 return acked;
2256} 2403}
2257 2404
2258static u32 tcp_usrtt(struct timeval *tv)
2259{
2260 struct timeval now;
2261
2262 do_gettimeofday(&now);
2263 return (now.tv_sec - tv->tv_sec) * 1000000 + (now.tv_usec - tv->tv_usec);
2264}
2265
2266/* Remove acknowledged frames from the retransmission queue. */ 2405/* Remove acknowledged frames from the retransmission queue. */
2267static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) 2406static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
2268{ 2407{
@@ -2273,12 +2412,10 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
2273 int acked = 0; 2412 int acked = 0;
2274 __s32 seq_rtt = -1; 2413 __s32 seq_rtt = -1;
2275 u32 pkts_acked = 0; 2414 u32 pkts_acked = 0;
2276 void (*rtt_sample)(struct sock *sk, u32 usrtt) 2415 ktime_t last_ackt = ktime_set(0,0);
2277 = icsk->icsk_ca_ops->rtt_sample;
2278 struct timeval tv = { .tv_sec = 0, .tv_usec = 0 };
2279 2416
2280 while ((skb = skb_peek(&sk->sk_write_queue)) && 2417 while ((skb = tcp_write_queue_head(sk)) &&
2281 skb != sk->sk_send_head) { 2418 skb != tcp_send_head(sk)) {
2282 struct tcp_skb_cb *scb = TCP_SKB_CB(skb); 2419 struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
2283 __u8 sacked = scb->sacked; 2420 __u8 sacked = scb->sacked;
2284 2421
@@ -2318,13 +2455,13 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
2318 2455
2319 if (sacked) { 2456 if (sacked) {
2320 if (sacked & TCPCB_RETRANS) { 2457 if (sacked & TCPCB_RETRANS) {
2321 if(sacked & TCPCB_SACKED_RETRANS) 2458 if (sacked & TCPCB_SACKED_RETRANS)
2322 tp->retrans_out -= tcp_skb_pcount(skb); 2459 tp->retrans_out -= tcp_skb_pcount(skb);
2323 acked |= FLAG_RETRANS_DATA_ACKED; 2460 acked |= FLAG_RETRANS_DATA_ACKED;
2324 seq_rtt = -1; 2461 seq_rtt = -1;
2325 } else if (seq_rtt < 0) { 2462 } else if (seq_rtt < 0) {
2326 seq_rtt = now - scb->when; 2463 seq_rtt = now - scb->when;
2327 skb_get_timestamp(skb, &tv); 2464 last_ackt = skb->tstamp;
2328 } 2465 }
2329 if (sacked & TCPCB_SACKED_ACKED) 2466 if (sacked & TCPCB_SACKED_ACKED)
2330 tp->sacked_out -= tcp_skb_pcount(skb); 2467 tp->sacked_out -= tcp_skb_pcount(skb);
@@ -2337,23 +2474,24 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
2337 } 2474 }
2338 } else if (seq_rtt < 0) { 2475 } else if (seq_rtt < 0) {
2339 seq_rtt = now - scb->when; 2476 seq_rtt = now - scb->when;
2340 skb_get_timestamp(skb, &tv); 2477 last_ackt = skb->tstamp;
2341 } 2478 }
2342 tcp_dec_pcount_approx(&tp->fackets_out, skb); 2479 tcp_dec_pcount_approx(&tp->fackets_out, skb);
2343 tcp_packets_out_dec(tp, skb); 2480 tcp_packets_out_dec(tp, skb);
2344 __skb_unlink(skb, &sk->sk_write_queue); 2481 tcp_unlink_write_queue(skb, sk);
2345 sk_stream_free_skb(sk, skb); 2482 sk_stream_free_skb(sk, skb);
2346 clear_all_retrans_hints(tp); 2483 clear_all_retrans_hints(tp);
2347 } 2484 }
2348 2485
2349 if (acked&FLAG_ACKED) { 2486 if (acked&FLAG_ACKED) {
2487 const struct tcp_congestion_ops *ca_ops
2488 = inet_csk(sk)->icsk_ca_ops;
2489
2350 tcp_ack_update_rtt(sk, acked, seq_rtt); 2490 tcp_ack_update_rtt(sk, acked, seq_rtt);
2351 tcp_ack_packets_out(sk, tp); 2491 tcp_ack_packets_out(sk);
2352 if (rtt_sample && !(acked & FLAG_RETRANS_DATA_ACKED))
2353 (*rtt_sample)(sk, tcp_usrtt(&tv));
2354 2492
2355 if (icsk->icsk_ca_ops->pkts_acked) 2493 if (ca_ops->pkts_acked)
2356 icsk->icsk_ca_ops->pkts_acked(sk, pkts_acked); 2494 ca_ops->pkts_acked(sk, pkts_acked, last_ackt);
2357 } 2495 }
2358 2496
2359#if FASTRETRANS_DEBUG > 0 2497#if FASTRETRANS_DEBUG > 0
@@ -2390,7 +2528,7 @@ static void tcp_ack_probe(struct sock *sk)
2390 2528
2391 /* Was it a usable window open? */ 2529 /* Was it a usable window open? */
2392 2530
2393 if (!after(TCP_SKB_CB(sk->sk_send_head)->end_seq, 2531 if (!after(TCP_SKB_CB(tcp_send_head(sk))->end_seq,
2394 tp->snd_una + tp->snd_wnd)) { 2532 tp->snd_una + tp->snd_wnd)) {
2395 icsk->icsk_backoff = 0; 2533 icsk->icsk_backoff = 0;
2396 inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0); 2534 inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0);
@@ -2433,13 +2571,14 @@ static inline int tcp_may_update_window(const struct tcp_sock *tp, const u32 ack
2433 * Window update algorithm, described in RFC793/RFC1122 (used in linux-2.2 2571 * Window update algorithm, described in RFC793/RFC1122 (used in linux-2.2
2434 * and in FreeBSD. NetBSD's one is even worse.) is wrong. 2572 * and in FreeBSD. NetBSD's one is even worse.) is wrong.
2435 */ 2573 */
2436static int tcp_ack_update_window(struct sock *sk, struct tcp_sock *tp, 2574static int tcp_ack_update_window(struct sock *sk, struct sk_buff *skb, u32 ack,
2437 struct sk_buff *skb, u32 ack, u32 ack_seq) 2575 u32 ack_seq)
2438{ 2576{
2577 struct tcp_sock *tp = tcp_sk(sk);
2439 int flag = 0; 2578 int flag = 0;
2440 u32 nwin = ntohs(skb->h.th->window); 2579 u32 nwin = ntohs(tcp_hdr(skb)->window);
2441 2580
2442 if (likely(!skb->h.th->syn)) 2581 if (likely(!tcp_hdr(skb)->syn))
2443 nwin <<= tp->rx_opt.snd_wscale; 2582 nwin <<= tp->rx_opt.snd_wscale;
2444 2583
2445 if (tcp_may_update_window(tp, ack, ack_seq, nwin)) { 2584 if (tcp_may_update_window(tp, ack, ack_seq, nwin)) {
@@ -2453,7 +2592,7 @@ static int tcp_ack_update_window(struct sock *sk, struct tcp_sock *tp,
2453 * fast path is recovered for sending TCP. 2592 * fast path is recovered for sending TCP.
2454 */ 2593 */
2455 tp->pred_flags = 0; 2594 tp->pred_flags = 0;
2456 tcp_fast_path_check(sk, tp); 2595 tcp_fast_path_check(sk);
2457 2596
2458 if (nwin > tp->max_window) { 2597 if (nwin > tp->max_window) {
2459 tp->max_window = nwin; 2598 tp->max_window = nwin;
@@ -2467,39 +2606,128 @@ static int tcp_ack_update_window(struct sock *sk, struct tcp_sock *tp,
2467 return flag; 2606 return flag;
2468} 2607}
2469 2608
2470static void tcp_process_frto(struct sock *sk, u32 prior_snd_una) 2609/* A very conservative spurious RTO response algorithm: reduce cwnd and
2610 * continue in congestion avoidance.
2611 */
2612static void tcp_conservative_spur_to_response(struct tcp_sock *tp)
2613{
2614 tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
2615 tp->snd_cwnd_cnt = 0;
2616 tcp_moderate_cwnd(tp);
2617}
2618
2619/* A conservative spurious RTO response algorithm: reduce cwnd using
2620 * rate halving and continue in congestion avoidance.
2621 */
2622static void tcp_ratehalving_spur_to_response(struct sock *sk)
2623{
2624 tcp_enter_cwr(sk, 0);
2625}
2626
2627static void tcp_undo_spur_to_response(struct sock *sk, int flag)
2628{
2629 if (flag&FLAG_ECE)
2630 tcp_ratehalving_spur_to_response(sk);
2631 else
2632 tcp_undo_cwr(sk, 1);
2633}
2634
2635/* F-RTO spurious RTO detection algorithm (RFC4138)
2636 *
2637 * F-RTO affects during two new ACKs following RTO (well, almost, see inline
2638 * comments). State (ACK number) is kept in frto_counter. When ACK advances
2639 * window (but not to or beyond highest sequence sent before RTO):
2640 * On First ACK, send two new segments out.
2641 * On Second ACK, RTO was likely spurious. Do spurious response (response
2642 * algorithm is not part of the F-RTO detection algorithm
2643 * given in RFC4138 but can be selected separately).
2644 * Otherwise (basically on duplicate ACK), RTO was (likely) caused by a loss
2645 * and TCP falls back to conventional RTO recovery.
2646 *
2647 * Rationale: if the RTO was spurious, new ACKs should arrive from the
2648 * original window even after we transmit two new data segments.
2649 *
2650 * SACK version:
2651 * on first step, wait until first cumulative ACK arrives, then move to
2652 * the second step. In second step, the next ACK decides.
2653 *
2654 * F-RTO is implemented (mainly) in four functions:
2655 * - tcp_use_frto() is used to determine if TCP is can use F-RTO
2656 * - tcp_enter_frto() prepares TCP state on RTO if F-RTO is used, it is
2657 * called when tcp_use_frto() showed green light
2658 * - tcp_process_frto() handles incoming ACKs during F-RTO algorithm
2659 * - tcp_enter_frto_loss() is called if there is not enough evidence
2660 * to prove that the RTO is indeed spurious. It transfers the control
2661 * from F-RTO to the conventional RTO recovery
2662 */
2663static int tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag)
2471{ 2664{
2472 struct tcp_sock *tp = tcp_sk(sk); 2665 struct tcp_sock *tp = tcp_sk(sk);
2473 2666
2474 tcp_sync_left_out(tp); 2667 tcp_sync_left_out(tp);
2475 2668
2476 if (tp->snd_una == prior_snd_una || 2669 /* Duplicate the behavior from Loss state (fastretrans_alert) */
2477 !before(tp->snd_una, tp->frto_highmark)) { 2670 if (flag&FLAG_DATA_ACKED)
2478 /* RTO was caused by loss, start retransmitting in 2671 inet_csk(sk)->icsk_retransmits = 0;
2479 * go-back-N slow start 2672
2480 */ 2673 if (!before(tp->snd_una, tp->frto_highmark)) {
2481 tcp_enter_frto_loss(sk); 2674 tcp_enter_frto_loss(sk, tp->frto_counter + 1, flag);
2482 return; 2675 return 1;
2483 } 2676 }
2484 2677
2485 if (tp->frto_counter == 1) { 2678 if (!IsSackFrto() || IsReno(tp)) {
2486 /* First ACK after RTO advances the window: allow two new 2679 /* RFC4138 shortcoming in step 2; should also have case c):
2487 * segments out. 2680 * ACK isn't duplicate nor advances window, e.g., opposite dir
2681 * data, winupdate
2488 */ 2682 */
2489 tp->snd_cwnd = tcp_packets_in_flight(tp) + 2; 2683 if ((tp->snd_una == prior_snd_una) && (flag&FLAG_NOT_DUP) &&
2684 !(flag&FLAG_FORWARD_PROGRESS))
2685 return 1;
2686
2687 if (!(flag&FLAG_DATA_ACKED)) {
2688 tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3),
2689 flag);
2690 return 1;
2691 }
2490 } else { 2692 } else {
2491 /* Also the second ACK after RTO advances the window. 2693 if (!(flag&FLAG_DATA_ACKED) && (tp->frto_counter == 1)) {
2492 * The RTO was likely spurious. Reduce cwnd and continue 2694 /* Prevent sending of new data. */
2493 * in congestion avoidance 2695 tp->snd_cwnd = min(tp->snd_cwnd,
2494 */ 2696 tcp_packets_in_flight(tp));
2495 tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); 2697 return 1;
2496 tcp_moderate_cwnd(tp); 2698 }
2699
2700 if ((tp->frto_counter == 2) &&
2701 (!(flag&FLAG_FORWARD_PROGRESS) ||
2702 ((flag&FLAG_DATA_SACKED) && !(flag&FLAG_ONLY_ORIG_SACKED)))) {
2703 /* RFC4138 shortcoming (see comment above) */
2704 if (!(flag&FLAG_FORWARD_PROGRESS) && (flag&FLAG_NOT_DUP))
2705 return 1;
2706
2707 tcp_enter_frto_loss(sk, 3, flag);
2708 return 1;
2709 }
2497 } 2710 }
2498 2711
2499 /* F-RTO affects on two new ACKs following RTO. 2712 if (tp->frto_counter == 1) {
2500 * At latest on third ACK the TCP behavior is back to normal. 2713 tp->snd_cwnd = tcp_packets_in_flight(tp) + 2;
2501 */ 2714 tp->frto_counter = 2;
2502 tp->frto_counter = (tp->frto_counter + 1) % 3; 2715 return 1;
2716 } else /* frto_counter == 2 */ {
2717 switch (sysctl_tcp_frto_response) {
2718 case 2:
2719 tcp_undo_spur_to_response(sk, flag);
2720 break;
2721 case 1:
2722 tcp_conservative_spur_to_response(tp);
2723 break;
2724 default:
2725 tcp_ratehalving_spur_to_response(sk);
2726 break;
2727 }
2728 tp->frto_counter = 0;
2729 }
2730 return 0;
2503} 2731}
2504 2732
2505/* This routine deals with incoming acks, but not outgoing ones. */ 2733/* This routine deals with incoming acks, but not outgoing ones. */
@@ -2513,6 +2741,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
2513 u32 prior_in_flight; 2741 u32 prior_in_flight;
2514 s32 seq_rtt; 2742 s32 seq_rtt;
2515 int prior_packets; 2743 int prior_packets;
2744 int frto_cwnd = 0;
2516 2745
2517 /* If the ack is newer than sent or older than previous acks 2746 /* If the ack is newer than sent or older than previous acks
2518 * then we can probably ignore it. 2747 * then we can probably ignore it.
@@ -2549,12 +2778,12 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
2549 else 2778 else
2550 NET_INC_STATS_BH(LINUX_MIB_TCPPUREACKS); 2779 NET_INC_STATS_BH(LINUX_MIB_TCPPUREACKS);
2551 2780
2552 flag |= tcp_ack_update_window(sk, tp, skb, ack, ack_seq); 2781 flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
2553 2782
2554 if (TCP_SKB_CB(skb)->sacked) 2783 if (TCP_SKB_CB(skb)->sacked)
2555 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una); 2784 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una);
2556 2785
2557 if (TCP_ECN_rcv_ecn_echo(tp, skb->h.th)) 2786 if (TCP_ECN_rcv_ecn_echo(tp, tcp_hdr(skb)))
2558 flag |= FLAG_ECE; 2787 flag |= FLAG_ECE;
2559 2788
2560 tcp_ca_event(sk, CA_EVENT_SLOW_ACK); 2789 tcp_ca_event(sk, CA_EVENT_SLOW_ACK);
@@ -2575,15 +2804,16 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
2575 flag |= tcp_clean_rtx_queue(sk, &seq_rtt); 2804 flag |= tcp_clean_rtx_queue(sk, &seq_rtt);
2576 2805
2577 if (tp->frto_counter) 2806 if (tp->frto_counter)
2578 tcp_process_frto(sk, prior_snd_una); 2807 frto_cwnd = tcp_process_frto(sk, prior_snd_una, flag);
2579 2808
2580 if (tcp_ack_is_dubious(sk, flag)) { 2809 if (tcp_ack_is_dubious(sk, flag)) {
2581 /* Advance CWND, if state allows this. */ 2810 /* Advance CWND, if state allows this. */
2582 if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag)) 2811 if ((flag & FLAG_DATA_ACKED) && !frto_cwnd &&
2812 tcp_may_raise_cwnd(sk, flag))
2583 tcp_cong_avoid(sk, ack, seq_rtt, prior_in_flight, 0); 2813 tcp_cong_avoid(sk, ack, seq_rtt, prior_in_flight, 0);
2584 tcp_fastretrans_alert(sk, prior_snd_una, prior_packets, flag); 2814 tcp_fastretrans_alert(sk, prior_snd_una, prior_packets, flag);
2585 } else { 2815 } else {
2586 if ((flag & FLAG_DATA_ACKED)) 2816 if ((flag & FLAG_DATA_ACKED) && !frto_cwnd)
2587 tcp_cong_avoid(sk, ack, seq_rtt, prior_in_flight, 1); 2817 tcp_cong_avoid(sk, ack, seq_rtt, prior_in_flight, 1);
2588 } 2818 }
2589 2819
@@ -2599,7 +2829,7 @@ no_queue:
2599 * being used to time the probes, and is probably far higher than 2829 * being used to time the probes, and is probably far higher than
2600 * it needs to be for normal retransmission. 2830 * it needs to be for normal retransmission.
2601 */ 2831 */
2602 if (sk->sk_send_head) 2832 if (tcp_send_head(sk))
2603 tcp_ack_probe(sk); 2833 tcp_ack_probe(sk);
2604 return 1; 2834 return 1;
2605 2835
@@ -2620,13 +2850,13 @@ uninteresting_ack:
2620void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, int estab) 2850void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, int estab)
2621{ 2851{
2622 unsigned char *ptr; 2852 unsigned char *ptr;
2623 struct tcphdr *th = skb->h.th; 2853 struct tcphdr *th = tcp_hdr(skb);
2624 int length=(th->doff*4)-sizeof(struct tcphdr); 2854 int length=(th->doff*4)-sizeof(struct tcphdr);
2625 2855
2626 ptr = (unsigned char *)(th + 1); 2856 ptr = (unsigned char *)(th + 1);
2627 opt_rx->saw_tstamp = 0; 2857 opt_rx->saw_tstamp = 0;
2628 2858
2629 while(length>0) { 2859 while (length > 0) {
2630 int opcode=*ptr++; 2860 int opcode=*ptr++;
2631 int opsize; 2861 int opsize;
2632 2862
@@ -2642,9 +2872,9 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
2642 return; 2872 return;
2643 if (opsize > length) 2873 if (opsize > length)
2644 return; /* don't parse partial options */ 2874 return; /* don't parse partial options */
2645 switch(opcode) { 2875 switch (opcode) {
2646 case TCPOPT_MSS: 2876 case TCPOPT_MSS:
2647 if(opsize==TCPOLEN_MSS && th->syn && !estab) { 2877 if (opsize==TCPOLEN_MSS && th->syn && !estab) {
2648 u16 in_mss = ntohs(get_unaligned((__be16 *)ptr)); 2878 u16 in_mss = ntohs(get_unaligned((__be16 *)ptr));
2649 if (in_mss) { 2879 if (in_mss) {
2650 if (opt_rx->user_mss && opt_rx->user_mss < in_mss) 2880 if (opt_rx->user_mss && opt_rx->user_mss < in_mss)
@@ -2654,12 +2884,12 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
2654 } 2884 }
2655 break; 2885 break;
2656 case TCPOPT_WINDOW: 2886 case TCPOPT_WINDOW:
2657 if(opsize==TCPOLEN_WINDOW && th->syn && !estab) 2887 if (opsize==TCPOLEN_WINDOW && th->syn && !estab)
2658 if (sysctl_tcp_window_scaling) { 2888 if (sysctl_tcp_window_scaling) {
2659 __u8 snd_wscale = *(__u8 *) ptr; 2889 __u8 snd_wscale = *(__u8 *) ptr;
2660 opt_rx->wscale_ok = 1; 2890 opt_rx->wscale_ok = 1;
2661 if (snd_wscale > 14) { 2891 if (snd_wscale > 14) {
2662 if(net_ratelimit()) 2892 if (net_ratelimit())
2663 printk(KERN_INFO "tcp_parse_options: Illegal window " 2893 printk(KERN_INFO "tcp_parse_options: Illegal window "
2664 "scaling value %d >14 received.\n", 2894 "scaling value %d >14 received.\n",
2665 snd_wscale); 2895 snd_wscale);
@@ -2669,7 +2899,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
2669 } 2899 }
2670 break; 2900 break;
2671 case TCPOPT_TIMESTAMP: 2901 case TCPOPT_TIMESTAMP:
2672 if(opsize==TCPOLEN_TIMESTAMP) { 2902 if (opsize==TCPOLEN_TIMESTAMP) {
2673 if ((estab && opt_rx->tstamp_ok) || 2903 if ((estab && opt_rx->tstamp_ok) ||
2674 (!estab && sysctl_tcp_timestamps)) { 2904 (!estab && sysctl_tcp_timestamps)) {
2675 opt_rx->saw_tstamp = 1; 2905 opt_rx->saw_tstamp = 1;
@@ -2679,7 +2909,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
2679 } 2909 }
2680 break; 2910 break;
2681 case TCPOPT_SACK_PERM: 2911 case TCPOPT_SACK_PERM:
2682 if(opsize==TCPOLEN_SACK_PERM && th->syn && !estab) { 2912 if (opsize==TCPOLEN_SACK_PERM && th->syn && !estab) {
2683 if (sysctl_tcp_sack) { 2913 if (sysctl_tcp_sack) {
2684 opt_rx->sack_ok = 1; 2914 opt_rx->sack_ok = 1;
2685 tcp_sack_reset(opt_rx); 2915 tcp_sack_reset(opt_rx);
@@ -2688,7 +2918,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
2688 break; 2918 break;
2689 2919
2690 case TCPOPT_SACK: 2920 case TCPOPT_SACK:
2691 if((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) && 2921 if ((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
2692 !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) && 2922 !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) &&
2693 opt_rx->sack_ok) { 2923 opt_rx->sack_ok) {
2694 TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th; 2924 TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th;
@@ -2701,10 +2931,11 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
2701 */ 2931 */
2702 break; 2932 break;
2703#endif 2933#endif
2704 }; 2934 }
2935
2705 ptr+=opsize-2; 2936 ptr+=opsize-2;
2706 length-=opsize; 2937 length-=opsize;
2707 }; 2938 }
2708 } 2939 }
2709} 2940}
2710 2941
@@ -2737,7 +2968,7 @@ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
2737static inline void tcp_store_ts_recent(struct tcp_sock *tp) 2968static inline void tcp_store_ts_recent(struct tcp_sock *tp)
2738{ 2969{
2739 tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval; 2970 tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval;
2740 tp->rx_opt.ts_recent_stamp = xtime.tv_sec; 2971 tp->rx_opt.ts_recent_stamp = get_seconds();
2741} 2972}
2742 2973
2743static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) 2974static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
@@ -2750,8 +2981,8 @@ static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
2750 * Not only, also it occurs for expired timestamps. 2981 * Not only, also it occurs for expired timestamps.
2751 */ 2982 */
2752 2983
2753 if((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) >= 0 || 2984 if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) >= 0 ||
2754 xtime.tv_sec >= tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS) 2985 get_seconds() >= tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS)
2755 tcp_store_ts_recent(tp); 2986 tcp_store_ts_recent(tp);
2756 } 2987 }
2757} 2988}
@@ -2782,7 +3013,7 @@ static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
2782static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb) 3013static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb)
2783{ 3014{
2784 struct tcp_sock *tp = tcp_sk(sk); 3015 struct tcp_sock *tp = tcp_sk(sk);
2785 struct tcphdr *th = skb->h.th; 3016 struct tcphdr *th = tcp_hdr(skb);
2786 u32 seq = TCP_SKB_CB(skb)->seq; 3017 u32 seq = TCP_SKB_CB(skb)->seq;
2787 u32 ack = TCP_SKB_CB(skb)->ack_seq; 3018 u32 ack = TCP_SKB_CB(skb)->ack_seq;
2788 3019
@@ -2803,7 +3034,7 @@ static inline int tcp_paws_discard(const struct sock *sk, const struct sk_buff *
2803{ 3034{
2804 const struct tcp_sock *tp = tcp_sk(sk); 3035 const struct tcp_sock *tp = tcp_sk(sk);
2805 return ((s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) > TCP_PAWS_WINDOW && 3036 return ((s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) > TCP_PAWS_WINDOW &&
2806 xtime.tv_sec < tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS && 3037 get_seconds() < tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS &&
2807 !tcp_disordered_ack(sk, skb)); 3038 !tcp_disordered_ack(sk, skb));
2808} 3039}
2809 3040
@@ -2910,7 +3141,7 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
2910 printk(KERN_ERR "%s: Impossible, sk->sk_state=%d\n", 3141 printk(KERN_ERR "%s: Impossible, sk->sk_state=%d\n",
2911 __FUNCTION__, sk->sk_state); 3142 __FUNCTION__, sk->sk_state);
2912 break; 3143 break;
2913 }; 3144 }
2914 3145
2915 /* It _is_ possible, that we have something out-of-order _after_ FIN. 3146 /* It _is_ possible, that we have something out-of-order _after_ FIN.
2916 * Probably, we should reset in this case. For now drop them. 3147 * Probably, we should reset in this case. For now drop them.
@@ -3009,7 +3240,7 @@ static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
3009 */ 3240 */
3010 tp->rx_opt.num_sacks--; 3241 tp->rx_opt.num_sacks--;
3011 tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + tp->rx_opt.dsack, 4 - tp->rx_opt.tstamp_ok); 3242 tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + tp->rx_opt.dsack, 4 - tp->rx_opt.tstamp_ok);
3012 for(i=this_sack; i < tp->rx_opt.num_sacks; i++) 3243 for (i=this_sack; i < tp->rx_opt.num_sacks; i++)
3013 sp[i] = sp[i+1]; 3244 sp[i] = sp[i+1];
3014 continue; 3245 continue;
3015 } 3246 }
@@ -3062,7 +3293,7 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
3062 tp->rx_opt.num_sacks--; 3293 tp->rx_opt.num_sacks--;
3063 sp--; 3294 sp--;
3064 } 3295 }
3065 for(; this_sack > 0; this_sack--, sp--) 3296 for (; this_sack > 0; this_sack--, sp--)
3066 *sp = *(sp-1); 3297 *sp = *(sp-1);
3067 3298
3068new_sack: 3299new_sack:
@@ -3088,7 +3319,7 @@ static void tcp_sack_remove(struct tcp_sock *tp)
3088 return; 3319 return;
3089 } 3320 }
3090 3321
3091 for(this_sack = 0; this_sack < num_sacks; ) { 3322 for (this_sack = 0; this_sack < num_sacks; ) {
3092 /* Check if the start of the sack is covered by RCV.NXT. */ 3323 /* Check if the start of the sack is covered by RCV.NXT. */
3093 if (!before(tp->rcv_nxt, sp->start_seq)) { 3324 if (!before(tp->rcv_nxt, sp->start_seq)) {
3094 int i; 3325 int i;
@@ -3144,8 +3375,8 @@ static void tcp_ofo_queue(struct sock *sk)
3144 __skb_unlink(skb, &tp->out_of_order_queue); 3375 __skb_unlink(skb, &tp->out_of_order_queue);
3145 __skb_queue_tail(&sk->sk_receive_queue, skb); 3376 __skb_queue_tail(&sk->sk_receive_queue, skb);
3146 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; 3377 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
3147 if(skb->h.th->fin) 3378 if (tcp_hdr(skb)->fin)
3148 tcp_fin(skb, sk, skb->h.th); 3379 tcp_fin(skb, sk, tcp_hdr(skb));
3149 } 3380 }
3150} 3381}
3151 3382
@@ -3153,7 +3384,7 @@ static int tcp_prune_queue(struct sock *sk);
3153 3384
3154static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) 3385static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
3155{ 3386{
3156 struct tcphdr *th = skb->h.th; 3387 struct tcphdr *th = tcp_hdr(skb);
3157 struct tcp_sock *tp = tcp_sk(sk); 3388 struct tcp_sock *tp = tcp_sk(sk);
3158 int eaten = -1; 3389 int eaten = -1;
3159 3390
@@ -3210,9 +3441,9 @@ queue_and_out:
3210 __skb_queue_tail(&sk->sk_receive_queue, skb); 3441 __skb_queue_tail(&sk->sk_receive_queue, skb);
3211 } 3442 }
3212 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; 3443 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
3213 if(skb->len) 3444 if (skb->len)
3214 tcp_event_data_recv(sk, tp, skb); 3445 tcp_event_data_recv(sk, skb);
3215 if(th->fin) 3446 if (th->fin)
3216 tcp_fin(skb, sk, th); 3447 tcp_fin(skb, sk, th);
3217 3448
3218 if (!skb_queue_empty(&tp->out_of_order_queue)) { 3449 if (!skb_queue_empty(&tp->out_of_order_queue)) {
@@ -3228,7 +3459,7 @@ queue_and_out:
3228 if (tp->rx_opt.num_sacks) 3459 if (tp->rx_opt.num_sacks)
3229 tcp_sack_remove(tp); 3460 tcp_sack_remove(tp);
3230 3461
3231 tcp_fast_path_check(sk, tp); 3462 tcp_fast_path_check(sk);
3232 3463
3233 if (eaten > 0) 3464 if (eaten > 0)
3234 __kfree_skb(skb); 3465 __kfree_skb(skb);
@@ -3392,7 +3623,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
3392 * - bloated or contains data before "start" or 3623 * - bloated or contains data before "start" or
3393 * overlaps to the next one. 3624 * overlaps to the next one.
3394 */ 3625 */
3395 if (!skb->h.th->syn && !skb->h.th->fin && 3626 if (!tcp_hdr(skb)->syn && !tcp_hdr(skb)->fin &&
3396 (tcp_win_from_space(skb->truesize) > skb->len || 3627 (tcp_win_from_space(skb->truesize) > skb->len ||
3397 before(TCP_SKB_CB(skb)->seq, start) || 3628 before(TCP_SKB_CB(skb)->seq, start) ||
3398 (skb->next != tail && 3629 (skb->next != tail &&
@@ -3403,7 +3634,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
3403 start = TCP_SKB_CB(skb)->end_seq; 3634 start = TCP_SKB_CB(skb)->end_seq;
3404 skb = skb->next; 3635 skb = skb->next;
3405 } 3636 }
3406 if (skb == tail || skb->h.th->syn || skb->h.th->fin) 3637 if (skb == tail || tcp_hdr(skb)->syn || tcp_hdr(skb)->fin)
3407 return; 3638 return;
3408 3639
3409 while (before(start, end)) { 3640 while (before(start, end)) {
@@ -3419,11 +3650,14 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
3419 nskb = alloc_skb(copy+header, GFP_ATOMIC); 3650 nskb = alloc_skb(copy+header, GFP_ATOMIC);
3420 if (!nskb) 3651 if (!nskb)
3421 return; 3652 return;
3653
3654 skb_set_mac_header(nskb, skb_mac_header(skb) - skb->head);
3655 skb_set_network_header(nskb, (skb_network_header(skb) -
3656 skb->head));
3657 skb_set_transport_header(nskb, (skb_transport_header(skb) -
3658 skb->head));
3422 skb_reserve(nskb, header); 3659 skb_reserve(nskb, header);
3423 memcpy(nskb->head, skb->head, header); 3660 memcpy(nskb->head, skb->head, header);
3424 nskb->nh.raw = nskb->head + (skb->nh.raw-skb->head);
3425 nskb->h.raw = nskb->head + (skb->h.raw-skb->head);
3426 nskb->mac.raw = nskb->head + (skb->mac.raw-skb->head);
3427 memcpy(nskb->cb, skb->cb, sizeof(skb->cb)); 3661 memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
3428 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start; 3662 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
3429 __skb_insert(nskb, skb->prev, skb, list); 3663 __skb_insert(nskb, skb->prev, skb, list);
@@ -3449,7 +3683,9 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
3449 __kfree_skb(skb); 3683 __kfree_skb(skb);
3450 NET_INC_STATS_BH(LINUX_MIB_TCPRCVCOLLAPSED); 3684 NET_INC_STATS_BH(LINUX_MIB_TCPRCVCOLLAPSED);
3451 skb = next; 3685 skb = next;
3452 if (skb == tail || skb->h.th->syn || skb->h.th->fin) 3686 if (skb == tail ||
3687 tcp_hdr(skb)->syn ||
3688 tcp_hdr(skb)->fin)
3453 return; 3689 return;
3454 } 3690 }
3455 } 3691 }
@@ -3514,7 +3750,7 @@ static int tcp_prune_queue(struct sock *sk)
3514 NET_INC_STATS_BH(LINUX_MIB_PRUNECALLED); 3750 NET_INC_STATS_BH(LINUX_MIB_PRUNECALLED);
3515 3751
3516 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) 3752 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
3517 tcp_clamp_window(sk, tp); 3753 tcp_clamp_window(sk);
3518 else if (tcp_memory_pressure) 3754 else if (tcp_memory_pressure)
3519 tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); 3755 tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
3520 3756
@@ -3583,8 +3819,10 @@ void tcp_cwnd_application_limited(struct sock *sk)
3583 tp->snd_cwnd_stamp = tcp_time_stamp; 3819 tp->snd_cwnd_stamp = tcp_time_stamp;
3584} 3820}
3585 3821
3586static int tcp_should_expand_sndbuf(struct sock *sk, struct tcp_sock *tp) 3822static int tcp_should_expand_sndbuf(struct sock *sk)
3587{ 3823{
3824 struct tcp_sock *tp = tcp_sk(sk);
3825
3588 /* If the user specified a specific send buffer setting, do 3826 /* If the user specified a specific send buffer setting, do
3589 * not modify it. 3827 * not modify it.
3590 */ 3828 */
@@ -3616,7 +3854,7 @@ static void tcp_new_space(struct sock *sk)
3616{ 3854{
3617 struct tcp_sock *tp = tcp_sk(sk); 3855 struct tcp_sock *tp = tcp_sk(sk);
3618 3856
3619 if (tcp_should_expand_sndbuf(sk, tp)) { 3857 if (tcp_should_expand_sndbuf(sk)) {
3620 int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) + 3858 int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) +
3621 MAX_TCP_HEADER + 16 + sizeof(struct sk_buff), 3859 MAX_TCP_HEADER + 16 + sizeof(struct sk_buff),
3622 demanded = max_t(unsigned int, tp->snd_cwnd, 3860 demanded = max_t(unsigned int, tp->snd_cwnd,
@@ -3640,9 +3878,9 @@ static void tcp_check_space(struct sock *sk)
3640 } 3878 }
3641} 3879}
3642 3880
3643static inline void tcp_data_snd_check(struct sock *sk, struct tcp_sock *tp) 3881static inline void tcp_data_snd_check(struct sock *sk)
3644{ 3882{
3645 tcp_push_pending_frames(sk, tp); 3883 tcp_push_pending_frames(sk);
3646 tcp_check_space(sk); 3884 tcp_check_space(sk);
3647} 3885}
3648 3886
@@ -3790,7 +4028,7 @@ static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
3790 int err; 4028 int err;
3791 4029
3792 local_bh_enable(); 4030 local_bh_enable();
3793 if (skb->ip_summed==CHECKSUM_UNNECESSARY) 4031 if (skb_csum_unnecessary(skb))
3794 err = skb_copy_datagram_iovec(skb, hlen, tp->ucopy.iov, chunk); 4032 err = skb_copy_datagram_iovec(skb, hlen, tp->ucopy.iov, chunk);
3795 else 4033 else
3796 err = skb_copy_and_csum_datagram_iovec(skb, hlen, 4034 err = skb_copy_and_csum_datagram_iovec(skb, hlen,
@@ -3822,7 +4060,7 @@ static __sum16 __tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb
3822 4060
3823static inline int tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb) 4061static inline int tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb)
3824{ 4062{
3825 return skb->ip_summed != CHECKSUM_UNNECESSARY && 4063 return !skb_csum_unnecessary(skb) &&
3826 __tcp_checksum_complete_user(sk, skb); 4064 __tcp_checksum_complete_user(sk, skb);
3827} 4065}
3828 4066
@@ -3840,7 +4078,7 @@ static int tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb, int hlen
3840 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list) 4078 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
3841 tp->ucopy.dma_chan = get_softnet_dma(); 4079 tp->ucopy.dma_chan = get_softnet_dma();
3842 4080
3843 if (tp->ucopy.dma_chan && skb->ip_summed == CHECKSUM_UNNECESSARY) { 4081 if (tp->ucopy.dma_chan && skb_csum_unnecessary(skb)) {
3844 4082
3845 dma_cookie = dma_skb_copy_datagram_iovec(tp->ucopy.dma_chan, 4083 dma_cookie = dma_skb_copy_datagram_iovec(tp->ucopy.dma_chan,
3846 skb, hlen, tp->ucopy.iov, chunk, tp->ucopy.pinned_list); 4084 skb, hlen, tp->ucopy.iov, chunk, tp->ucopy.pinned_list);
@@ -3856,7 +4094,7 @@ static int tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb, int hlen
3856 tcp_rcv_space_adjust(sk); 4094 tcp_rcv_space_adjust(sk);
3857 4095
3858 if ((tp->ucopy.len == 0) || 4096 if ((tp->ucopy.len == 0) ||
3859 (tcp_flag_word(skb->h.th) & TCP_FLAG_PSH) || 4097 (tcp_flag_word(tcp_hdr(skb)) & TCP_FLAG_PSH) ||
3860 (atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1))) { 4098 (atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1))) {
3861 tp->ucopy.wakeup = 1; 4099 tp->ucopy.wakeup = 1;
3862 sk->sk_data_ready(sk, 0); 4100 sk->sk_data_ready(sk, 0);
@@ -3976,7 +4214,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
3976 */ 4214 */
3977 tcp_ack(sk, skb, 0); 4215 tcp_ack(sk, skb, 0);
3978 __kfree_skb(skb); 4216 __kfree_skb(skb);
3979 tcp_data_snd_check(sk, tp); 4217 tcp_data_snd_check(sk);
3980 return 0; 4218 return 0;
3981 } else { /* Header too small */ 4219 } else { /* Header too small */
3982 TCP_INC_STATS_BH(TCP_MIB_INERRS); 4220 TCP_INC_STATS_BH(TCP_MIB_INERRS);
@@ -4047,12 +4285,12 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
4047 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; 4285 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
4048 } 4286 }
4049 4287
4050 tcp_event_data_recv(sk, tp, skb); 4288 tcp_event_data_recv(sk, skb);
4051 4289
4052 if (TCP_SKB_CB(skb)->ack_seq != tp->snd_una) { 4290 if (TCP_SKB_CB(skb)->ack_seq != tp->snd_una) {
4053 /* Well, only one small jumplet in fast path... */ 4291 /* Well, only one small jumplet in fast path... */
4054 tcp_ack(sk, skb, FLAG_DATA); 4292 tcp_ack(sk, skb, FLAG_DATA);
4055 tcp_data_snd_check(sk, tp); 4293 tcp_data_snd_check(sk);
4056 if (!inet_csk_ack_scheduled(sk)) 4294 if (!inet_csk_ack_scheduled(sk))
4057 goto no_ack; 4295 goto no_ack;
4058 } 4296 }
@@ -4109,7 +4347,7 @@ slow_path:
4109 goto discard; 4347 goto discard;
4110 } 4348 }
4111 4349
4112 if(th->rst) { 4350 if (th->rst) {
4113 tcp_reset(sk); 4351 tcp_reset(sk);
4114 goto discard; 4352 goto discard;
4115 } 4353 }
@@ -4124,7 +4362,7 @@ slow_path:
4124 } 4362 }
4125 4363
4126step5: 4364step5:
4127 if(th->ack) 4365 if (th->ack)
4128 tcp_ack(sk, skb, FLAG_SLOWPATH); 4366 tcp_ack(sk, skb, FLAG_SLOWPATH);
4129 4367
4130 tcp_rcv_rtt_measure_ts(sk, skb); 4368 tcp_rcv_rtt_measure_ts(sk, skb);
@@ -4135,7 +4373,7 @@ step5:
4135 /* step 7: process the segment text */ 4373 /* step 7: process the segment text */
4136 tcp_data_queue(sk, skb); 4374 tcp_data_queue(sk, skb);
4137 4375
4138 tcp_data_snd_check(sk, tp); 4376 tcp_data_snd_check(sk);
4139 tcp_ack_snd_check(sk); 4377 tcp_ack_snd_check(sk);
4140 return 0; 4378 return 0;
4141 4379
@@ -4412,13 +4650,13 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
4412 goto discard; 4650 goto discard;
4413 4651
4414 case TCP_LISTEN: 4652 case TCP_LISTEN:
4415 if(th->ack) 4653 if (th->ack)
4416 return 1; 4654 return 1;
4417 4655
4418 if(th->rst) 4656 if (th->rst)
4419 goto discard; 4657 goto discard;
4420 4658
4421 if(th->syn) { 4659 if (th->syn) {
4422 if (icsk->icsk_af_ops->conn_request(sk, skb) < 0) 4660 if (icsk->icsk_af_ops->conn_request(sk, skb) < 0)
4423 return 1; 4661 return 1;
4424 4662
@@ -4452,7 +4690,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
4452 /* Do step6 onward by hand. */ 4690 /* Do step6 onward by hand. */
4453 tcp_urg(sk, skb, th); 4691 tcp_urg(sk, skb, th);
4454 __kfree_skb(skb); 4692 __kfree_skb(skb);
4455 tcp_data_snd_check(sk, tp); 4693 tcp_data_snd_check(sk);
4456 return 0; 4694 return 0;
4457 } 4695 }
4458 4696
@@ -4474,7 +4712,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
4474 } 4712 }
4475 4713
4476 /* step 2: check RST bit */ 4714 /* step 2: check RST bit */
4477 if(th->rst) { 4715 if (th->rst) {
4478 tcp_reset(sk); 4716 tcp_reset(sk);
4479 goto discard; 4717 goto discard;
4480 } 4718 }
@@ -4497,7 +4735,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
4497 if (th->ack) { 4735 if (th->ack) {
4498 int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH); 4736 int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH);
4499 4737
4500 switch(sk->sk_state) { 4738 switch (sk->sk_state) {
4501 case TCP_SYN_RECV: 4739 case TCP_SYN_RECV:
4502 if (acceptable) { 4740 if (acceptable) {
4503 tp->copied_seq = tp->rcv_nxt; 4741 tp->copied_seq = tp->rcv_nxt;
@@ -4644,7 +4882,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
4644 4882
4645 /* tcp_data could move socket to TIME-WAIT */ 4883 /* tcp_data could move socket to TIME-WAIT */
4646 if (sk->sk_state != TCP_CLOSE) { 4884 if (sk->sk_state != TCP_CLOSE) {
4647 tcp_data_snd_check(sk, tp); 4885 tcp_data_snd_check(sk);
4648 tcp_ack_snd_check(sk); 4886 tcp_ack_snd_check(sk);
4649 } 4887 }
4650 4888
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 0ba74bbe7d30..5a3e7f839fc5 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -88,7 +88,7 @@ int sysctl_tcp_low_latency __read_mostly;
88#define ICMP_MIN_LENGTH 8 88#define ICMP_MIN_LENGTH 8
89 89
90/* Socket used for sending RSTs */ 90/* Socket used for sending RSTs */
91static struct socket *tcp_socket; 91static struct socket *tcp_socket __read_mostly;
92 92
93void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb); 93void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb);
94 94
@@ -125,10 +125,10 @@ void tcp_unhash(struct sock *sk)
125 125
126static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb) 126static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb)
127{ 127{
128 return secure_tcp_sequence_number(skb->nh.iph->daddr, 128 return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
129 skb->nh.iph->saddr, 129 ip_hdr(skb)->saddr,
130 skb->h.th->dest, 130 tcp_hdr(skb)->dest,
131 skb->h.th->source); 131 tcp_hdr(skb)->source);
132} 132}
133 133
134int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) 134int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
@@ -149,7 +149,7 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
149 */ 149 */
150 if (tcptw->tw_ts_recent_stamp && 150 if (tcptw->tw_ts_recent_stamp &&
151 (twp == NULL || (sysctl_tcp_tw_reuse && 151 (twp == NULL || (sysctl_tcp_tw_reuse &&
152 xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) { 152 get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
153 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2; 153 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
154 if (tp->write_seq == 0) 154 if (tp->write_seq == 0)
155 tp->write_seq = 1; 155 tp->write_seq = 1;
@@ -224,7 +224,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
224 * when trying new connection. 224 * when trying new connection.
225 */ 225 */
226 if (peer != NULL && 226 if (peer != NULL &&
227 peer->tcp_ts_stamp + TCP_PAWS_MSL >= xtime.tv_sec) { 227 peer->tcp_ts_stamp + TCP_PAWS_MSL >= get_seconds()) {
228 tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp; 228 tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
229 tp->rx_opt.ts_recent = peer->tcp_ts; 229 tp->rx_opt.ts_recent = peer->tcp_ts;
230 } 230 }
@@ -354,8 +354,8 @@ void tcp_v4_err(struct sk_buff *skb, u32 info)
354 struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2)); 354 struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2));
355 struct tcp_sock *tp; 355 struct tcp_sock *tp;
356 struct inet_sock *inet; 356 struct inet_sock *inet;
357 int type = skb->h.icmph->type; 357 const int type = icmp_hdr(skb)->type;
358 int code = skb->h.icmph->code; 358 const int code = icmp_hdr(skb)->code;
359 struct sock *sk; 359 struct sock *sk;
360 __u32 seq; 360 __u32 seq;
361 int err; 361 int err;
@@ -499,11 +499,12 @@ out:
499void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb) 499void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
500{ 500{
501 struct inet_sock *inet = inet_sk(sk); 501 struct inet_sock *inet = inet_sk(sk);
502 struct tcphdr *th = skb->h.th; 502 struct tcphdr *th = tcp_hdr(skb);
503 503
504 if (skb->ip_summed == CHECKSUM_PARTIAL) { 504 if (skb->ip_summed == CHECKSUM_PARTIAL) {
505 th->check = ~tcp_v4_check(len, inet->saddr, 505 th->check = ~tcp_v4_check(len, inet->saddr,
506 inet->daddr, 0); 506 inet->daddr, 0);
507 skb->csum_start = skb_transport_header(skb) - skb->head;
507 skb->csum_offset = offsetof(struct tcphdr, check); 508 skb->csum_offset = offsetof(struct tcphdr, check);
508 } else { 509 } else {
509 th->check = tcp_v4_check(len, inet->saddr, inet->daddr, 510 th->check = tcp_v4_check(len, inet->saddr, inet->daddr,
@@ -515,17 +516,18 @@ void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
515 516
516int tcp_v4_gso_send_check(struct sk_buff *skb) 517int tcp_v4_gso_send_check(struct sk_buff *skb)
517{ 518{
518 struct iphdr *iph; 519 const struct iphdr *iph;
519 struct tcphdr *th; 520 struct tcphdr *th;
520 521
521 if (!pskb_may_pull(skb, sizeof(*th))) 522 if (!pskb_may_pull(skb, sizeof(*th)))
522 return -EINVAL; 523 return -EINVAL;
523 524
524 iph = skb->nh.iph; 525 iph = ip_hdr(skb);
525 th = skb->h.th; 526 th = tcp_hdr(skb);
526 527
527 th->check = 0; 528 th->check = 0;
528 th->check = ~tcp_v4_check(skb->len, iph->saddr, iph->daddr, 0); 529 th->check = ~tcp_v4_check(skb->len, iph->saddr, iph->daddr, 0);
530 skb->csum_start = skb_transport_header(skb) - skb->head;
529 skb->csum_offset = offsetof(struct tcphdr, check); 531 skb->csum_offset = offsetof(struct tcphdr, check);
530 skb->ip_summed = CHECKSUM_PARTIAL; 532 skb->ip_summed = CHECKSUM_PARTIAL;
531 return 0; 533 return 0;
@@ -546,7 +548,7 @@ int tcp_v4_gso_send_check(struct sk_buff *skb)
546 548
547static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) 549static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
548{ 550{
549 struct tcphdr *th = skb->h.th; 551 struct tcphdr *th = tcp_hdr(skb);
550 struct { 552 struct {
551 struct tcphdr th; 553 struct tcphdr th;
552#ifdef CONFIG_TCP_MD5SIG 554#ifdef CONFIG_TCP_MD5SIG
@@ -585,7 +587,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
585 arg.iov[0].iov_len = sizeof(rep.th); 587 arg.iov[0].iov_len = sizeof(rep.th);
586 588
587#ifdef CONFIG_TCP_MD5SIG 589#ifdef CONFIG_TCP_MD5SIG
588 key = sk ? tcp_v4_md5_do_lookup(sk, skb->nh.iph->daddr) : NULL; 590 key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr) : NULL;
589 if (key) { 591 if (key) {
590 rep.opt[0] = htonl((TCPOPT_NOP << 24) | 592 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
591 (TCPOPT_NOP << 16) | 593 (TCPOPT_NOP << 16) |
@@ -597,14 +599,14 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
597 599
598 tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[1], 600 tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[1],
599 key, 601 key,
600 skb->nh.iph->daddr, 602 ip_hdr(skb)->daddr,
601 skb->nh.iph->saddr, 603 ip_hdr(skb)->saddr,
602 &rep.th, IPPROTO_TCP, 604 &rep.th, IPPROTO_TCP,
603 arg.iov[0].iov_len); 605 arg.iov[0].iov_len);
604 } 606 }
605#endif 607#endif
606 arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr, 608 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
607 skb->nh.iph->saddr, /* XXX */ 609 ip_hdr(skb)->saddr, /* XXX */
608 sizeof(struct tcphdr), IPPROTO_TCP, 0); 610 sizeof(struct tcphdr), IPPROTO_TCP, 0);
609 arg.csumoffset = offsetof(struct tcphdr, check) / 2; 611 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
610 612
@@ -622,7 +624,7 @@ static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk,
622 struct sk_buff *skb, u32 seq, u32 ack, 624 struct sk_buff *skb, u32 seq, u32 ack,
623 u32 win, u32 ts) 625 u32 win, u32 ts)
624{ 626{
625 struct tcphdr *th = skb->h.th; 627 struct tcphdr *th = tcp_hdr(skb);
626 struct { 628 struct {
627 struct tcphdr th; 629 struct tcphdr th;
628 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2) 630 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
@@ -670,7 +672,7 @@ static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk,
670 * skb->sk) holds true, but we program defensively. 672 * skb->sk) holds true, but we program defensively.
671 */ 673 */
672 if (!twsk && skb->sk) { 674 if (!twsk && skb->sk) {
673 key = tcp_v4_md5_do_lookup(skb->sk, skb->nh.iph->daddr); 675 key = tcp_v4_md5_do_lookup(skb->sk, ip_hdr(skb)->daddr);
674 } else if (twsk && twsk->tw_md5_keylen) { 676 } else if (twsk && twsk->tw_md5_keylen) {
675 tw_key.key = twsk->tw_md5_key; 677 tw_key.key = twsk->tw_md5_key;
676 tw_key.keylen = twsk->tw_md5_keylen; 678 tw_key.keylen = twsk->tw_md5_keylen;
@@ -690,14 +692,14 @@ static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk,
690 692
691 tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[offset], 693 tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[offset],
692 key, 694 key,
693 skb->nh.iph->daddr, 695 ip_hdr(skb)->daddr,
694 skb->nh.iph->saddr, 696 ip_hdr(skb)->saddr,
695 &rep.th, IPPROTO_TCP, 697 &rep.th, IPPROTO_TCP,
696 arg.iov[0].iov_len); 698 arg.iov[0].iov_len);
697 } 699 }
698#endif 700#endif
699 arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr, 701 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
700 skb->nh.iph->saddr, /* XXX */ 702 ip_hdr(skb)->saddr, /* XXX */
701 arg.iov[0].iov_len, IPPROTO_TCP, 0); 703 arg.iov[0].iov_len, IPPROTO_TCP, 0);
702 arg.csumoffset = offsetof(struct tcphdr, check) / 2; 704 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
703 705
@@ -745,7 +747,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
745 skb = tcp_make_synack(sk, dst, req); 747 skb = tcp_make_synack(sk, dst, req);
746 748
747 if (skb) { 749 if (skb) {
748 struct tcphdr *th = skb->h.th; 750 struct tcphdr *th = tcp_hdr(skb);
749 751
750 th->check = tcp_v4_check(skb->len, 752 th->check = tcp_v4_check(skb->len,
751 ireq->loc_addr, 753 ireq->loc_addr,
@@ -781,7 +783,7 @@ static void syn_flood_warning(struct sk_buff *skb)
781 warntime = jiffies; 783 warntime = jiffies;
782 printk(KERN_INFO 784 printk(KERN_INFO
783 "possible SYN flooding on port %d. Sending cookies.\n", 785 "possible SYN flooding on port %d. Sending cookies.\n",
784 ntohs(skb->h.th->dest)); 786 ntohs(tcp_hdr(skb)->dest));
785 } 787 }
786} 788}
787#endif 789#endif
@@ -1133,8 +1135,8 @@ static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
1133 */ 1135 */
1134 __u8 *hash_location = NULL; 1136 __u8 *hash_location = NULL;
1135 struct tcp_md5sig_key *hash_expected; 1137 struct tcp_md5sig_key *hash_expected;
1136 struct iphdr *iph = skb->nh.iph; 1138 const struct iphdr *iph = ip_hdr(skb);
1137 struct tcphdr *th = skb->h.th; 1139 struct tcphdr *th = tcp_hdr(skb);
1138 int length = (th->doff << 2) - sizeof(struct tcphdr); 1140 int length = (th->doff << 2) - sizeof(struct tcphdr);
1139 int genhash; 1141 int genhash;
1140 unsigned char *ptr; 1142 unsigned char *ptr;
@@ -1251,8 +1253,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1251 struct inet_request_sock *ireq; 1253 struct inet_request_sock *ireq;
1252 struct tcp_options_received tmp_opt; 1254 struct tcp_options_received tmp_opt;
1253 struct request_sock *req; 1255 struct request_sock *req;
1254 __be32 saddr = skb->nh.iph->saddr; 1256 __be32 saddr = ip_hdr(skb)->saddr;
1255 __be32 daddr = skb->nh.iph->daddr; 1257 __be32 daddr = ip_hdr(skb)->daddr;
1256 __u32 isn = TCP_SKB_CB(skb)->when; 1258 __u32 isn = TCP_SKB_CB(skb)->when;
1257 struct dst_entry *dst = NULL; 1259 struct dst_entry *dst = NULL;
1258#ifdef CONFIG_SYN_COOKIES 1260#ifdef CONFIG_SYN_COOKIES
@@ -1327,7 +1329,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1327 ireq->rmt_addr = saddr; 1329 ireq->rmt_addr = saddr;
1328 ireq->opt = tcp_v4_save_options(sk, skb); 1330 ireq->opt = tcp_v4_save_options(sk, skb);
1329 if (!want_cookie) 1331 if (!want_cookie)
1330 TCP_ECN_create_request(req, skb->h.th); 1332 TCP_ECN_create_request(req, tcp_hdr(skb));
1331 1333
1332 if (want_cookie) { 1334 if (want_cookie) {
1333#ifdef CONFIG_SYN_COOKIES 1335#ifdef CONFIG_SYN_COOKIES
@@ -1351,7 +1353,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1351 (dst = inet_csk_route_req(sk, req)) != NULL && 1353 (dst = inet_csk_route_req(sk, req)) != NULL &&
1352 (peer = rt_get_peer((struct rtable *)dst)) != NULL && 1354 (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
1353 peer->v4daddr == saddr) { 1355 peer->v4daddr == saddr) {
1354 if (xtime.tv_sec < peer->tcp_ts_stamp + TCP_PAWS_MSL && 1356 if (get_seconds() < peer->tcp_ts_stamp + TCP_PAWS_MSL &&
1355 (s32)(peer->tcp_ts - req->ts_recent) > 1357 (s32)(peer->tcp_ts - req->ts_recent) >
1356 TCP_PAWS_WINDOW) { 1358 TCP_PAWS_WINDOW) {
1357 NET_INC_STATS_BH(LINUX_MIB_PAWSPASSIVEREJECTED); 1359 NET_INC_STATS_BH(LINUX_MIB_PAWSPASSIVEREJECTED);
@@ -1375,7 +1377,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1375 LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open " 1377 LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open "
1376 "request from %u.%u.%u.%u/%u\n", 1378 "request from %u.%u.%u.%u/%u\n",
1377 NIPQUAD(saddr), 1379 NIPQUAD(saddr),
1378 ntohs(skb->h.th->source)); 1380 ntohs(tcp_hdr(skb)->source));
1379 dst_release(dst); 1381 dst_release(dst);
1380 goto drop_and_free; 1382 goto drop_and_free;
1381 } 1383 }
@@ -1439,7 +1441,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1439 newinet->opt = ireq->opt; 1441 newinet->opt = ireq->opt;
1440 ireq->opt = NULL; 1442 ireq->opt = NULL;
1441 newinet->mc_index = inet_iif(skb); 1443 newinet->mc_index = inet_iif(skb);
1442 newinet->mc_ttl = skb->nh.iph->ttl; 1444 newinet->mc_ttl = ip_hdr(skb)->ttl;
1443 inet_csk(newsk)->icsk_ext_hdr_len = 0; 1445 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1444 if (newinet->opt) 1446 if (newinet->opt)
1445 inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen; 1447 inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen;
@@ -1481,8 +1483,8 @@ exit:
1481 1483
1482static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) 1484static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1483{ 1485{
1484 struct tcphdr *th = skb->h.th; 1486 struct tcphdr *th = tcp_hdr(skb);
1485 struct iphdr *iph = skb->nh.iph; 1487 const struct iphdr *iph = ip_hdr(skb);
1486 struct sock *nsk; 1488 struct sock *nsk;
1487 struct request_sock **prev; 1489 struct request_sock **prev;
1488 /* Find possible connection requests. */ 1490 /* Find possible connection requests. */
@@ -1491,9 +1493,8 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1491 if (req) 1493 if (req)
1492 return tcp_check_req(sk, skb, req, prev); 1494 return tcp_check_req(sk, skb, req, prev);
1493 1495
1494 nsk = inet_lookup_established(&tcp_hashinfo, skb->nh.iph->saddr, 1496 nsk = inet_lookup_established(&tcp_hashinfo, iph->saddr, th->source,
1495 th->source, skb->nh.iph->daddr, 1497 iph->daddr, th->dest, inet_iif(skb));
1496 th->dest, inet_iif(skb));
1497 1498
1498 if (nsk) { 1499 if (nsk) {
1499 if (nsk->sk_state != TCP_TIME_WAIT) { 1500 if (nsk->sk_state != TCP_TIME_WAIT) {
@@ -1513,15 +1514,17 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1513 1514
1514static __sum16 tcp_v4_checksum_init(struct sk_buff *skb) 1515static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
1515{ 1516{
1517 const struct iphdr *iph = ip_hdr(skb);
1518
1516 if (skb->ip_summed == CHECKSUM_COMPLETE) { 1519 if (skb->ip_summed == CHECKSUM_COMPLETE) {
1517 if (!tcp_v4_check(skb->len, skb->nh.iph->saddr, 1520 if (!tcp_v4_check(skb->len, iph->saddr,
1518 skb->nh.iph->daddr, skb->csum)) { 1521 iph->daddr, skb->csum)) {
1519 skb->ip_summed = CHECKSUM_UNNECESSARY; 1522 skb->ip_summed = CHECKSUM_UNNECESSARY;
1520 return 0; 1523 return 0;
1521 } 1524 }
1522 } 1525 }
1523 1526
1524 skb->csum = csum_tcpudp_nofold(skb->nh.iph->saddr, skb->nh.iph->daddr, 1527 skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
1525 skb->len, IPPROTO_TCP, 0); 1528 skb->len, IPPROTO_TCP, 0);
1526 1529
1527 if (skb->len <= 76) { 1530 if (skb->len <= 76) {
@@ -1555,7 +1558,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1555 1558
1556 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ 1559 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1557 TCP_CHECK_TIMER(sk); 1560 TCP_CHECK_TIMER(sk);
1558 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len)) { 1561 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
1559 rsk = sk; 1562 rsk = sk;
1560 goto reset; 1563 goto reset;
1561 } 1564 }
@@ -1563,7 +1566,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1563 return 0; 1566 return 0;
1564 } 1567 }
1565 1568
1566 if (skb->len < (skb->h.th->doff << 2) || tcp_checksum_complete(skb)) 1569 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1567 goto csum_err; 1570 goto csum_err;
1568 1571
1569 if (sk->sk_state == TCP_LISTEN) { 1572 if (sk->sk_state == TCP_LISTEN) {
@@ -1581,7 +1584,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1581 } 1584 }
1582 1585
1583 TCP_CHECK_TIMER(sk); 1586 TCP_CHECK_TIMER(sk);
1584 if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len)) { 1587 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
1585 rsk = sk; 1588 rsk = sk;
1586 goto reset; 1589 goto reset;
1587 } 1590 }
@@ -1610,6 +1613,7 @@ csum_err:
1610 1613
1611int tcp_v4_rcv(struct sk_buff *skb) 1614int tcp_v4_rcv(struct sk_buff *skb)
1612{ 1615{
1616 const struct iphdr *iph;
1613 struct tcphdr *th; 1617 struct tcphdr *th;
1614 struct sock *sk; 1618 struct sock *sk;
1615 int ret; 1619 int ret;
@@ -1623,7 +1627,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
1623 if (!pskb_may_pull(skb, sizeof(struct tcphdr))) 1627 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1624 goto discard_it; 1628 goto discard_it;
1625 1629
1626 th = skb->h.th; 1630 th = tcp_hdr(skb);
1627 1631
1628 if (th->doff < sizeof(struct tcphdr) / 4) 1632 if (th->doff < sizeof(struct tcphdr) / 4)
1629 goto bad_packet; 1633 goto bad_packet;
@@ -1634,23 +1638,21 @@ int tcp_v4_rcv(struct sk_buff *skb)
1634 * Packet length and doff are validated by header prediction, 1638 * Packet length and doff are validated by header prediction,
1635 * provided case of th->doff==0 is eliminated. 1639 * provided case of th->doff==0 is eliminated.
1636 * So, we defer the checks. */ 1640 * So, we defer the checks. */
1637 if ((skb->ip_summed != CHECKSUM_UNNECESSARY && 1641 if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
1638 tcp_v4_checksum_init(skb)))
1639 goto bad_packet; 1642 goto bad_packet;
1640 1643
1641 th = skb->h.th; 1644 th = tcp_hdr(skb);
1645 iph = ip_hdr(skb);
1642 TCP_SKB_CB(skb)->seq = ntohl(th->seq); 1646 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1643 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + 1647 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1644 skb->len - th->doff * 4); 1648 skb->len - th->doff * 4);
1645 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); 1649 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1646 TCP_SKB_CB(skb)->when = 0; 1650 TCP_SKB_CB(skb)->when = 0;
1647 TCP_SKB_CB(skb)->flags = skb->nh.iph->tos; 1651 TCP_SKB_CB(skb)->flags = iph->tos;
1648 TCP_SKB_CB(skb)->sacked = 0; 1652 TCP_SKB_CB(skb)->sacked = 0;
1649 1653
1650 sk = __inet_lookup(&tcp_hashinfo, skb->nh.iph->saddr, th->source, 1654 sk = __inet_lookup(&tcp_hashinfo, iph->saddr, th->source,
1651 skb->nh.iph->daddr, th->dest, 1655 iph->daddr, th->dest, inet_iif(skb));
1652 inet_iif(skb));
1653
1654 if (!sk) 1656 if (!sk)
1655 goto no_tcp_socket; 1657 goto no_tcp_socket;
1656 1658
@@ -1724,8 +1726,7 @@ do_time_wait:
1724 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { 1726 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1725 case TCP_TW_SYN: { 1727 case TCP_TW_SYN: {
1726 struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo, 1728 struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo,
1727 skb->nh.iph->daddr, 1729 iph->daddr, th->dest,
1728 th->dest,
1729 inet_iif(skb)); 1730 inet_iif(skb));
1730 if (sk2) { 1731 if (sk2) {
1731 inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row); 1732 inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
@@ -1770,7 +1771,7 @@ int tcp_v4_remember_stamp(struct sock *sk)
1770 1771
1771 if (peer) { 1772 if (peer) {
1772 if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 || 1773 if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 ||
1773 (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec && 1774 (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() &&
1774 peer->tcp_ts_stamp <= tp->rx_opt.ts_recent_stamp)) { 1775 peer->tcp_ts_stamp <= tp->rx_opt.ts_recent_stamp)) {
1775 peer->tcp_ts_stamp = tp->rx_opt.ts_recent_stamp; 1776 peer->tcp_ts_stamp = tp->rx_opt.ts_recent_stamp;
1776 peer->tcp_ts = tp->rx_opt.ts_recent; 1777 peer->tcp_ts = tp->rx_opt.ts_recent;
@@ -1791,7 +1792,7 @@ int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
1791 const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); 1792 const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
1792 1793
1793 if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 || 1794 if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 ||
1794 (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec && 1795 (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() &&
1795 peer->tcp_ts_stamp <= tcptw->tw_ts_recent_stamp)) { 1796 peer->tcp_ts_stamp <= tcptw->tw_ts_recent_stamp)) {
1796 peer->tcp_ts_stamp = tcptw->tw_ts_recent_stamp; 1797 peer->tcp_ts_stamp = tcptw->tw_ts_recent_stamp;
1797 peer->tcp_ts = tcptw->tw_ts_recent; 1798 peer->tcp_ts = tcptw->tw_ts_recent;
@@ -1890,7 +1891,7 @@ int tcp_v4_destroy_sock(struct sock *sk)
1890 tcp_cleanup_congestion_control(sk); 1891 tcp_cleanup_congestion_control(sk);
1891 1892
1892 /* Cleanup up the write buffer. */ 1893 /* Cleanup up the write buffer. */
1893 sk_stream_writequeue_purge(sk); 1894 tcp_write_queue_purge(sk);
1894 1895
1895 /* Cleans up our, hopefully empty, out_of_order_queue. */ 1896 /* Cleans up our, hopefully empty, out_of_order_queue. */
1896 __skb_queue_purge(&tp->out_of_order_queue); 1897 __skb_queue_purge(&tp->out_of_order_queue);
@@ -2293,13 +2294,13 @@ static void get_openreq4(struct sock *sk, struct request_sock *req,
2293 req); 2294 req);
2294} 2295}
2295 2296
2296static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i) 2297static void get_tcp4_sock(struct sock *sk, char *tmpbuf, int i)
2297{ 2298{
2298 int timer_active; 2299 int timer_active;
2299 unsigned long timer_expires; 2300 unsigned long timer_expires;
2300 struct tcp_sock *tp = tcp_sk(sp); 2301 struct tcp_sock *tp = tcp_sk(sk);
2301 const struct inet_connection_sock *icsk = inet_csk(sp); 2302 const struct inet_connection_sock *icsk = inet_csk(sk);
2302 struct inet_sock *inet = inet_sk(sp); 2303 struct inet_sock *inet = inet_sk(sk);
2303 __be32 dest = inet->daddr; 2304 __be32 dest = inet->daddr;
2304 __be32 src = inet->rcv_saddr; 2305 __be32 src = inet->rcv_saddr;
2305 __u16 destp = ntohs(inet->dport); 2306 __u16 destp = ntohs(inet->dport);
@@ -2311,9 +2312,9 @@ static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i)
2311 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { 2312 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2312 timer_active = 4; 2313 timer_active = 4;
2313 timer_expires = icsk->icsk_timeout; 2314 timer_expires = icsk->icsk_timeout;
2314 } else if (timer_pending(&sp->sk_timer)) { 2315 } else if (timer_pending(&sk->sk_timer)) {
2315 timer_active = 2; 2316 timer_active = 2;
2316 timer_expires = sp->sk_timer.expires; 2317 timer_expires = sk->sk_timer.expires;
2317 } else { 2318 } else {
2318 timer_active = 0; 2319 timer_active = 0;
2319 timer_expires = jiffies; 2320 timer_expires = jiffies;
@@ -2321,17 +2322,17 @@ static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i)
2321 2322
2322 sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX " 2323 sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2323 "%08X %5d %8d %lu %d %p %u %u %u %u %d", 2324 "%08X %5d %8d %lu %d %p %u %u %u %u %d",
2324 i, src, srcp, dest, destp, sp->sk_state, 2325 i, src, srcp, dest, destp, sk->sk_state,
2325 tp->write_seq - tp->snd_una, 2326 tp->write_seq - tp->snd_una,
2326 sp->sk_state == TCP_LISTEN ? sp->sk_ack_backlog : 2327 sk->sk_state == TCP_LISTEN ? sk->sk_ack_backlog :
2327 (tp->rcv_nxt - tp->copied_seq), 2328 (tp->rcv_nxt - tp->copied_seq),
2328 timer_active, 2329 timer_active,
2329 jiffies_to_clock_t(timer_expires - jiffies), 2330 jiffies_to_clock_t(timer_expires - jiffies),
2330 icsk->icsk_retransmits, 2331 icsk->icsk_retransmits,
2331 sock_i_uid(sp), 2332 sock_i_uid(sk),
2332 icsk->icsk_probes_out, 2333 icsk->icsk_probes_out,
2333 sock_i_ino(sp), 2334 sock_i_ino(sk),
2334 atomic_read(&sp->sk_refcnt), sp, 2335 atomic_read(&sk->sk_refcnt), sk,
2335 icsk->icsk_rto, 2336 icsk->icsk_rto,
2336 icsk->icsk_ack.ato, 2337 icsk->icsk_ack.ato,
2337 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, 2338 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c
index f0ebaf0e21cb..43294ad9f63e 100644
--- a/net/ipv4/tcp_lp.c
+++ b/net/ipv4/tcp_lp.c
@@ -218,7 +218,7 @@ static u32 tcp_lp_owd_calculator(struct sock *sk)
218 * 3. calc smoothed OWD (SOWD). 218 * 3. calc smoothed OWD (SOWD).
219 * Most ideas come from the original TCP-LP implementation. 219 * Most ideas come from the original TCP-LP implementation.
220 */ 220 */
221static void tcp_lp_rtt_sample(struct sock *sk, u32 usrtt) 221static void tcp_lp_rtt_sample(struct sock *sk, u32 rtt)
222{ 222{
223 struct lp *lp = inet_csk_ca(sk); 223 struct lp *lp = inet_csk_ca(sk);
224 s64 mowd = tcp_lp_owd_calculator(sk); 224 s64 mowd = tcp_lp_owd_calculator(sk);
@@ -261,11 +261,13 @@ static void tcp_lp_rtt_sample(struct sock *sk, u32 usrtt)
261 * newReno in increase case. 261 * newReno in increase case.
262 * We work it out by following the idea from TCP-LP's paper directly 262 * We work it out by following the idea from TCP-LP's paper directly
263 */ 263 */
264static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked) 264static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked, ktime_t last)
265{ 265{
266 struct tcp_sock *tp = tcp_sk(sk); 266 struct tcp_sock *tp = tcp_sk(sk);
267 struct lp *lp = inet_csk_ca(sk); 267 struct lp *lp = inet_csk_ca(sk);
268 268
269 tcp_lp_rtt_sample(sk, ktime_to_us(net_timedelta(last)));
270
269 /* calc inference */ 271 /* calc inference */
270 if (tcp_time_stamp > tp->rx_opt.rcv_tsecr) 272 if (tcp_time_stamp > tp->rx_opt.rcv_tsecr)
271 lp->inference = 3 * (tcp_time_stamp - tp->rx_opt.rcv_tsecr); 273 lp->inference = 3 * (tcp_time_stamp - tp->rx_opt.rcv_tsecr);
@@ -312,11 +314,11 @@ static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked)
312} 314}
313 315
314static struct tcp_congestion_ops tcp_lp = { 316static struct tcp_congestion_ops tcp_lp = {
317 .flags = TCP_CONG_RTT_STAMP,
315 .init = tcp_lp_init, 318 .init = tcp_lp_init,
316 .ssthresh = tcp_reno_ssthresh, 319 .ssthresh = tcp_reno_ssthresh,
317 .cong_avoid = tcp_lp_cong_avoid, 320 .cong_avoid = tcp_lp_cong_avoid,
318 .min_cwnd = tcp_reno_min_cwnd, 321 .min_cwnd = tcp_reno_min_cwnd,
319 .rtt_sample = tcp_lp_rtt_sample,
320 .pkts_acked = tcp_lp_pkts_acked, 322 .pkts_acked = tcp_lp_pkts_acked,
321 323
322 .owner = THIS_MODULE, 324 .owner = THIS_MODULE,
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 6b5c64f3c925..a12b08fca5ad 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -149,7 +149,7 @@ kill_with_rst:
149 tw->tw_substate = TCP_TIME_WAIT; 149 tw->tw_substate = TCP_TIME_WAIT;
150 tcptw->tw_rcv_nxt = TCP_SKB_CB(skb)->end_seq; 150 tcptw->tw_rcv_nxt = TCP_SKB_CB(skb)->end_seq;
151 if (tmp_opt.saw_tstamp) { 151 if (tmp_opt.saw_tstamp) {
152 tcptw->tw_ts_recent_stamp = xtime.tv_sec; 152 tcptw->tw_ts_recent_stamp = get_seconds();
153 tcptw->tw_ts_recent = tmp_opt.rcv_tsval; 153 tcptw->tw_ts_recent = tmp_opt.rcv_tsval;
154 } 154 }
155 155
@@ -208,7 +208,7 @@ kill:
208 208
209 if (tmp_opt.saw_tstamp) { 209 if (tmp_opt.saw_tstamp) {
210 tcptw->tw_ts_recent = tmp_opt.rcv_tsval; 210 tcptw->tw_ts_recent = tmp_opt.rcv_tsval;
211 tcptw->tw_ts_recent_stamp = xtime.tv_sec; 211 tcptw->tw_ts_recent_stamp = get_seconds();
212 } 212 }
213 213
214 inet_twsk_put(tw); 214 inet_twsk_put(tw);
@@ -246,7 +246,7 @@ kill:
246 if (paws_reject) 246 if (paws_reject)
247 NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED); 247 NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED);
248 248
249 if(!th->rst) { 249 if (!th->rst) {
250 /* In this case we must reset the TIMEWAIT timer. 250 /* In this case we must reset the TIMEWAIT timer.
251 * 251 *
252 * If it is ACKless SYN it may be both old duplicate 252 * If it is ACKless SYN it may be both old duplicate
@@ -324,7 +324,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
324 if (tcp_alloc_md5sig_pool() == NULL) 324 if (tcp_alloc_md5sig_pool() == NULL)
325 BUG(); 325 BUG();
326 } 326 }
327 } while(0); 327 } while (0);
328#endif 328#endif
329 329
330 /* Linkage updates. */ 330 /* Linkage updates. */
@@ -387,8 +387,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
387 /* Now setup tcp_sock */ 387 /* Now setup tcp_sock */
388 newtp = tcp_sk(newsk); 388 newtp = tcp_sk(newsk);
389 newtp->pred_flags = 0; 389 newtp->pred_flags = 0;
390 newtp->rcv_nxt = treq->rcv_isn + 1; 390 newtp->rcv_wup = newtp->copied_seq = newtp->rcv_nxt = treq->rcv_isn + 1;
391 newtp->snd_nxt = newtp->snd_una = newtp->snd_sml = treq->snt_isn + 1; 391 newtp->snd_sml = newtp->snd_una = newtp->snd_nxt = treq->snt_isn + 1;
392 392
393 tcp_prequeue_init(newtp); 393 tcp_prequeue_init(newtp);
394 394
@@ -422,10 +422,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
422 tcp_set_ca_state(newsk, TCP_CA_Open); 422 tcp_set_ca_state(newsk, TCP_CA_Open);
423 tcp_init_xmit_timers(newsk); 423 tcp_init_xmit_timers(newsk);
424 skb_queue_head_init(&newtp->out_of_order_queue); 424 skb_queue_head_init(&newtp->out_of_order_queue);
425 newtp->rcv_wup = treq->rcv_isn + 1;
426 newtp->write_seq = treq->snt_isn + 1; 425 newtp->write_seq = treq->snt_isn + 1;
427 newtp->pushed_seq = newtp->write_seq; 426 newtp->pushed_seq = newtp->write_seq;
428 newtp->copied_seq = treq->rcv_isn + 1;
429 427
430 newtp->rx_opt.saw_tstamp = 0; 428 newtp->rx_opt.saw_tstamp = 0;
431 429
@@ -440,7 +438,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
440 keepalive_time_when(newtp)); 438 keepalive_time_when(newtp));
441 439
442 newtp->rx_opt.tstamp_ok = ireq->tstamp_ok; 440 newtp->rx_opt.tstamp_ok = ireq->tstamp_ok;
443 if((newtp->rx_opt.sack_ok = ireq->sack_ok) != 0) { 441 if ((newtp->rx_opt.sack_ok = ireq->sack_ok) != 0) {
444 if (sysctl_tcp_fack) 442 if (sysctl_tcp_fack)
445 newtp->rx_opt.sack_ok |= 2; 443 newtp->rx_opt.sack_ok |= 2;
446 } 444 }
@@ -455,12 +453,13 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
455 newtp->rx_opt.snd_wscale = newtp->rx_opt.rcv_wscale = 0; 453 newtp->rx_opt.snd_wscale = newtp->rx_opt.rcv_wscale = 0;
456 newtp->window_clamp = min(newtp->window_clamp, 65535U); 454 newtp->window_clamp = min(newtp->window_clamp, 65535U);
457 } 455 }
458 newtp->snd_wnd = ntohs(skb->h.th->window) << newtp->rx_opt.snd_wscale; 456 newtp->snd_wnd = (ntohs(tcp_hdr(skb)->window) <<
457 newtp->rx_opt.snd_wscale);
459 newtp->max_window = newtp->snd_wnd; 458 newtp->max_window = newtp->snd_wnd;
460 459
461 if (newtp->rx_opt.tstamp_ok) { 460 if (newtp->rx_opt.tstamp_ok) {
462 newtp->rx_opt.ts_recent = req->ts_recent; 461 newtp->rx_opt.ts_recent = req->ts_recent;
463 newtp->rx_opt.ts_recent_stamp = xtime.tv_sec; 462 newtp->rx_opt.ts_recent_stamp = get_seconds();
464 newtp->tcp_header_len = sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED; 463 newtp->tcp_header_len = sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
465 } else { 464 } else {
466 newtp->rx_opt.ts_recent_stamp = 0; 465 newtp->rx_opt.ts_recent_stamp = 0;
@@ -490,7 +489,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
490 struct request_sock *req, 489 struct request_sock *req,
491 struct request_sock **prev) 490 struct request_sock **prev)
492{ 491{
493 struct tcphdr *th = skb->h.th; 492 const struct tcphdr *th = tcp_hdr(skb);
494 __be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK); 493 __be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
495 int paws_reject = 0; 494 int paws_reject = 0;
496 struct tcp_options_received tmp_opt; 495 struct tcp_options_received tmp_opt;
@@ -506,7 +505,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
506 * it can be estimated (approximately) 505 * it can be estimated (approximately)
507 * from another data. 506 * from another data.
508 */ 507 */
509 tmp_opt.ts_recent_stamp = xtime.tv_sec - ((TCP_TIMEOUT_INIT/HZ)<<req->retrans); 508 tmp_opt.ts_recent_stamp = get_seconds() - ((TCP_TIMEOUT_INIT/HZ)<<req->retrans);
510 paws_reject = tcp_paws_check(&tmp_opt, th->rst); 509 paws_reject = tcp_paws_check(&tmp_opt, th->rst);
511 } 510 }
512 } 511 }
@@ -712,8 +711,8 @@ int tcp_child_process(struct sock *parent, struct sock *child,
712 int state = child->sk_state; 711 int state = child->sk_state;
713 712
714 if (!sock_owned_by_user(child)) { 713 if (!sock_owned_by_user(child)) {
715 ret = tcp_rcv_state_process(child, skb, skb->h.th, skb->len); 714 ret = tcp_rcv_state_process(child, skb, tcp_hdr(skb),
716 715 skb->len);
717 /* Wakeup parent, send SIGIO */ 716 /* Wakeup parent, send SIGIO */
718 if (state == TCP_SYN_RECV && child->sk_state != state) 717 if (state == TCP_SYN_RECV && child->sk_state != state)
719 parent->sk_data_ready(parent, 0); 718 parent->sk_data_ready(parent, 0);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 3c24881f2a65..e70a6840cb64 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -62,14 +62,13 @@ int sysctl_tcp_base_mss __read_mostly = 512;
62/* By default, RFC2861 behavior. */ 62/* By default, RFC2861 behavior. */
63int sysctl_tcp_slow_start_after_idle __read_mostly = 1; 63int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
64 64
65static void update_send_head(struct sock *sk, struct tcp_sock *tp, 65static void update_send_head(struct sock *sk, struct sk_buff *skb)
66 struct sk_buff *skb)
67{ 66{
68 sk->sk_send_head = skb->next; 67 struct tcp_sock *tp = tcp_sk(sk);
69 if (sk->sk_send_head == (struct sk_buff *)&sk->sk_write_queue) 68
70 sk->sk_send_head = NULL; 69 tcp_advance_send_head(sk, skb);
71 tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; 70 tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
72 tcp_packets_out_inc(sk, tp, skb); 71 tcp_packets_out_inc(sk, skb);
73} 72}
74 73
75/* SND.NXT, if window was not shrunk. 74/* SND.NXT, if window was not shrunk.
@@ -78,8 +77,10 @@ static void update_send_head(struct sock *sk, struct tcp_sock *tp,
78 * Anything in between SND.UNA...SND.UNA+SND.WND also can be already 77 * Anything in between SND.UNA...SND.UNA+SND.WND also can be already
79 * invalid. OK, let's make this for now: 78 * invalid. OK, let's make this for now:
80 */ 79 */
81static inline __u32 tcp_acceptable_seq(struct sock *sk, struct tcp_sock *tp) 80static inline __u32 tcp_acceptable_seq(struct sock *sk)
82{ 81{
82 struct tcp_sock *tp = tcp_sk(sk);
83
83 if (!before(tp->snd_una+tp->snd_wnd, tp->snd_nxt)) 84 if (!before(tp->snd_una+tp->snd_wnd, tp->snd_nxt))
84 return tp->snd_nxt; 85 return tp->snd_nxt;
85 else 86 else
@@ -238,7 +239,7 @@ static u16 tcp_select_window(struct sock *sk)
238 u32 new_win = __tcp_select_window(sk); 239 u32 new_win = __tcp_select_window(sk);
239 240
240 /* Never shrink the offered window */ 241 /* Never shrink the offered window */
241 if(new_win < cur_win) { 242 if (new_win < cur_win) {
242 /* Danger Will Robinson! 243 /* Danger Will Robinson!
243 * Don't update rcv_wup/rcv_wnd here or else 244 * Don't update rcv_wup/rcv_wnd here or else
244 * we will not be able to advertise a zero 245 * we will not be able to advertise a zero
@@ -289,10 +290,12 @@ static void tcp_build_and_update_options(__be32 *ptr, struct tcp_sock *tp,
289 (TCPOPT_SACK << 8) | 290 (TCPOPT_SACK << 8) |
290 (TCPOLEN_SACK_BASE + (tp->rx_opt.eff_sacks * 291 (TCPOLEN_SACK_BASE + (tp->rx_opt.eff_sacks *
291 TCPOLEN_SACK_PERBLOCK))); 292 TCPOLEN_SACK_PERBLOCK)));
292 for(this_sack = 0; this_sack < tp->rx_opt.eff_sacks; this_sack++) { 293
294 for (this_sack = 0; this_sack < tp->rx_opt.eff_sacks; this_sack++) {
293 *ptr++ = htonl(sp[this_sack].start_seq); 295 *ptr++ = htonl(sp[this_sack].start_seq);
294 *ptr++ = htonl(sp[this_sack].end_seq); 296 *ptr++ = htonl(sp[this_sack].end_seq);
295 } 297 }
298
296 if (tp->rx_opt.dsack) { 299 if (tp->rx_opt.dsack) {
297 tp->rx_opt.dsack = 0; 300 tp->rx_opt.dsack = 0;
298 tp->rx_opt.eff_sacks--; 301 tp->rx_opt.eff_sacks--;
@@ -337,7 +340,7 @@ static void tcp_syn_build_options(__be32 *ptr, int mss, int ts, int sack,
337 */ 340 */
338 *ptr++ = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss); 341 *ptr++ = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss);
339 if (ts) { 342 if (ts) {
340 if(sack) 343 if (sack)
341 *ptr++ = htonl((TCPOPT_SACK_PERM << 24) | 344 *ptr++ = htonl((TCPOPT_SACK_PERM << 24) |
342 (TCPOLEN_SACK_PERM << 16) | 345 (TCPOLEN_SACK_PERM << 16) |
343 (TCPOPT_TIMESTAMP << 8) | 346 (TCPOPT_TIMESTAMP << 8) |
@@ -349,7 +352,7 @@ static void tcp_syn_build_options(__be32 *ptr, int mss, int ts, int sack,
349 TCPOLEN_TIMESTAMP); 352 TCPOLEN_TIMESTAMP);
350 *ptr++ = htonl(tstamp); /* TSVAL */ 353 *ptr++ = htonl(tstamp); /* TSVAL */
351 *ptr++ = htonl(ts_recent); /* TSECR */ 354 *ptr++ = htonl(ts_recent); /* TSECR */
352 } else if(sack) 355 } else if (sack)
353 *ptr++ = htonl((TCPOPT_NOP << 24) | 356 *ptr++ = htonl((TCPOPT_NOP << 24) |
354 (TCPOPT_NOP << 16) | 357 (TCPOPT_NOP << 16) |
355 (TCPOPT_SACK_PERM << 8) | 358 (TCPOPT_SACK_PERM << 8) |
@@ -406,7 +409,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
406 /* If congestion control is doing timestamping, we must 409 /* If congestion control is doing timestamping, we must
407 * take such a timestamp before we potentially clone/copy. 410 * take such a timestamp before we potentially clone/copy.
408 */ 411 */
409 if (icsk->icsk_ca_ops->rtt_sample) 412 if (icsk->icsk_ca_ops->flags & TCP_CONG_RTT_STAMP)
410 __net_timestamp(skb); 413 __net_timestamp(skb);
411 414
412 if (likely(clone_it)) { 415 if (likely(clone_it)) {
@@ -430,7 +433,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
430 sysctl_flags = 0; 433 sysctl_flags = 0;
431 if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) { 434 if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) {
432 tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS; 435 tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS;
433 if(sysctl_tcp_timestamps) { 436 if (sysctl_tcp_timestamps) {
434 tcp_header_size += TCPOLEN_TSTAMP_ALIGNED; 437 tcp_header_size += TCPOLEN_TSTAMP_ALIGNED;
435 sysctl_flags |= SYSCTL_FLAG_TSTAMPS; 438 sysctl_flags |= SYSCTL_FLAG_TSTAMPS;
436 } 439 }
@@ -465,11 +468,12 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
465 tcp_header_size += TCPOLEN_MD5SIG_ALIGNED; 468 tcp_header_size += TCPOLEN_MD5SIG_ALIGNED;
466#endif 469#endif
467 470
468 th = (struct tcphdr *) skb_push(skb, tcp_header_size); 471 skb_push(skb, tcp_header_size);
469 skb->h.th = th; 472 skb_reset_transport_header(skb);
470 skb_set_owner_w(skb, sk); 473 skb_set_owner_w(skb, sk);
471 474
472 /* Build TCP header and checksum it. */ 475 /* Build TCP header and checksum it. */
476 th = tcp_hdr(skb);
473 th->source = inet->sport; 477 th->source = inet->sport;
474 th->dest = inet->dport; 478 th->dest = inet->dport;
475 th->seq = htonl(tcb->seq); 479 th->seq = htonl(tcb->seq);
@@ -515,7 +519,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
515 md5 ? &md5_hash_location : 519 md5 ? &md5_hash_location :
516#endif 520#endif
517 NULL); 521 NULL);
518 TCP_ECN_send(sk, tp, skb, tcp_header_size); 522 TCP_ECN_send(sk, skb, tcp_header_size);
519 } 523 }
520 524
521#ifdef CONFIG_TCP_MD5SIG 525#ifdef CONFIG_TCP_MD5SIG
@@ -524,7 +528,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
524 tp->af_specific->calc_md5_hash(md5_hash_location, 528 tp->af_specific->calc_md5_hash(md5_hash_location,
525 md5, 529 md5,
526 sk, NULL, NULL, 530 sk, NULL, NULL,
527 skb->h.th, 531 tcp_hdr(skb),
528 sk->sk_protocol, 532 sk->sk_protocol,
529 skb->len); 533 skb->len);
530 } 534 }
@@ -545,7 +549,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
545 if (likely(err <= 0)) 549 if (likely(err <= 0))
546 return err; 550 return err;
547 551
548 tcp_enter_cwr(sk); 552 tcp_enter_cwr(sk, 1);
549 553
550 return net_xmit_eval(err); 554 return net_xmit_eval(err);
551 555
@@ -567,12 +571,8 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
567 /* Advance write_seq and place onto the write_queue. */ 571 /* Advance write_seq and place onto the write_queue. */
568 tp->write_seq = TCP_SKB_CB(skb)->end_seq; 572 tp->write_seq = TCP_SKB_CB(skb)->end_seq;
569 skb_header_release(skb); 573 skb_header_release(skb);
570 __skb_queue_tail(&sk->sk_write_queue, skb); 574 tcp_add_write_queue_tail(sk, skb);
571 sk_charge_skb(sk, skb); 575 sk_charge_skb(sk, skb);
572
573 /* Queue it, remembering where we must start sending. */
574 if (sk->sk_send_head == NULL)
575 sk->sk_send_head = skb;
576} 576}
577 577
578static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now) 578static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now)
@@ -705,7 +705,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
705 705
706 /* Link BUFF into the send queue. */ 706 /* Link BUFF into the send queue. */
707 skb_header_release(buff); 707 skb_header_release(buff);
708 __skb_append(skb, buff, &sk->sk_write_queue); 708 tcp_insert_write_queue_after(skb, buff, sk);
709 709
710 return 0; 710 return 0;
711} 711}
@@ -736,7 +736,7 @@ static void __pskb_trim_head(struct sk_buff *skb, int len)
736 } 736 }
737 skb_shinfo(skb)->nr_frags = k; 737 skb_shinfo(skb)->nr_frags = k;
738 738
739 skb->tail = skb->data; 739 skb_reset_tail_pointer(skb);
740 skb->data_len -= len; 740 skb->data_len -= len;
741 skb->len = skb->data_len; 741 skb->len = skb->data_len;
742} 742}
@@ -930,8 +930,9 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
930 930
931/* Congestion window validation. (RFC2861) */ 931/* Congestion window validation. (RFC2861) */
932 932
933static void tcp_cwnd_validate(struct sock *sk, struct tcp_sock *tp) 933static void tcp_cwnd_validate(struct sock *sk)
934{ 934{
935 struct tcp_sock *tp = tcp_sk(sk);
935 __u32 packets_out = tp->packets_out; 936 __u32 packets_out = tp->packets_out;
936 937
937 if (packets_out >= tp->snd_cwnd) { 938 if (packets_out >= tp->snd_cwnd) {
@@ -1056,7 +1057,7 @@ static inline int tcp_snd_wnd_test(struct tcp_sock *tp, struct sk_buff *skb, uns
1056 return !after(end_seq, tp->snd_una + tp->snd_wnd); 1057 return !after(end_seq, tp->snd_una + tp->snd_wnd);
1057} 1058}
1058 1059
1059/* This checks if the data bearing packet SKB (usually sk->sk_send_head) 1060/* This checks if the data bearing packet SKB (usually tcp_send_head(sk))
1060 * should be put on the wire right now. If so, it returns the number of 1061 * should be put on the wire right now. If so, it returns the number of
1061 * packets allowed by the congestion window. 1062 * packets allowed by the congestion window.
1062 */ 1063 */
@@ -1079,15 +1080,10 @@ static unsigned int tcp_snd_test(struct sock *sk, struct sk_buff *skb,
1079 return cwnd_quota; 1080 return cwnd_quota;
1080} 1081}
1081 1082
1082static inline int tcp_skb_is_last(const struct sock *sk, 1083int tcp_may_send_now(struct sock *sk)
1083 const struct sk_buff *skb)
1084{
1085 return skb->next == (struct sk_buff *)&sk->sk_write_queue;
1086}
1087
1088int tcp_may_send_now(struct sock *sk, struct tcp_sock *tp)
1089{ 1084{
1090 struct sk_buff *skb = sk->sk_send_head; 1085 struct tcp_sock *tp = tcp_sk(sk);
1086 struct sk_buff *skb = tcp_send_head(sk);
1091 1087
1092 return (skb && 1088 return (skb &&
1093 tcp_snd_test(sk, skb, tcp_current_mss(sk, 1), 1089 tcp_snd_test(sk, skb, tcp_current_mss(sk, 1),
@@ -1143,7 +1139,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1143 1139
1144 /* Link BUFF into the send queue. */ 1140 /* Link BUFF into the send queue. */
1145 skb_header_release(buff); 1141 skb_header_release(buff);
1146 __skb_append(skb, buff, &sk->sk_write_queue); 1142 tcp_insert_write_queue_after(skb, buff, sk);
1147 1143
1148 return 0; 1144 return 0;
1149} 1145}
@@ -1153,8 +1149,9 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1153 * 1149 *
1154 * This algorithm is from John Heffner. 1150 * This algorithm is from John Heffner.
1155 */ 1151 */
1156static int tcp_tso_should_defer(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb) 1152static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
1157{ 1153{
1154 struct tcp_sock *tp = tcp_sk(sk);
1158 const struct inet_connection_sock *icsk = inet_csk(sk); 1155 const struct inet_connection_sock *icsk = inet_csk(sk);
1159 u32 send_win, cong_win, limit, in_flight; 1156 u32 send_win, cong_win, limit, in_flight;
1160 1157
@@ -1249,10 +1246,10 @@ static int tcp_mtu_probe(struct sock *sk)
1249 1246
1250 /* Have enough data in the send queue to probe? */ 1247 /* Have enough data in the send queue to probe? */
1251 len = 0; 1248 len = 0;
1252 if ((skb = sk->sk_send_head) == NULL) 1249 if ((skb = tcp_send_head(sk)) == NULL)
1253 return -1; 1250 return -1;
1254 while ((len += skb->len) < probe_size && !tcp_skb_is_last(sk, skb)) 1251 while ((len += skb->len) < probe_size && !tcp_skb_is_last(sk, skb))
1255 skb = skb->next; 1252 skb = tcp_write_queue_next(sk, skb);
1256 if (len < probe_size) 1253 if (len < probe_size)
1257 return -1; 1254 return -1;
1258 1255
@@ -1279,9 +1276,9 @@ static int tcp_mtu_probe(struct sock *sk)
1279 return -1; 1276 return -1;
1280 sk_charge_skb(sk, nskb); 1277 sk_charge_skb(sk, nskb);
1281 1278
1282 skb = sk->sk_send_head; 1279 skb = tcp_send_head(sk);
1283 __skb_insert(nskb, skb->prev, skb, &sk->sk_write_queue); 1280 tcp_insert_write_queue_before(nskb, skb, sk);
1284 sk->sk_send_head = nskb; 1281 tcp_advance_send_head(sk, skb);
1285 1282
1286 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq; 1283 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq;
1287 TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size; 1284 TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size;
@@ -1292,7 +1289,7 @@ static int tcp_mtu_probe(struct sock *sk)
1292 1289
1293 len = 0; 1290 len = 0;
1294 while (len < probe_size) { 1291 while (len < probe_size) {
1295 next = skb->next; 1292 next = tcp_write_queue_next(sk, skb);
1296 1293
1297 copy = min_t(int, skb->len, probe_size - len); 1294 copy = min_t(int, skb->len, probe_size - len);
1298 if (nskb->ip_summed) 1295 if (nskb->ip_summed)
@@ -1305,7 +1302,7 @@ static int tcp_mtu_probe(struct sock *sk)
1305 /* We've eaten all the data from this skb. 1302 /* We've eaten all the data from this skb.
1306 * Throw it away. */ 1303 * Throw it away. */
1307 TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags; 1304 TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags;
1308 __skb_unlink(skb, &sk->sk_write_queue); 1305 tcp_unlink_write_queue(skb, sk);
1309 sk_stream_free_skb(sk, skb); 1306 sk_stream_free_skb(sk, skb);
1310 } else { 1307 } else {
1311 TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags & 1308 TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags &
@@ -1333,7 +1330,7 @@ static int tcp_mtu_probe(struct sock *sk)
1333 /* Decrement cwnd here because we are sending 1330 /* Decrement cwnd here because we are sending
1334 * effectively two packets. */ 1331 * effectively two packets. */
1335 tp->snd_cwnd--; 1332 tp->snd_cwnd--;
1336 update_send_head(sk, tp, nskb); 1333 update_send_head(sk, nskb);
1337 1334
1338 icsk->icsk_mtup.probe_size = tcp_mss_to_mtu(sk, nskb->len); 1335 icsk->icsk_mtup.probe_size = tcp_mss_to_mtu(sk, nskb->len);
1339 tp->mtu_probe.probe_seq_start = TCP_SKB_CB(nskb)->seq; 1336 tp->mtu_probe.probe_seq_start = TCP_SKB_CB(nskb)->seq;
@@ -1377,7 +1374,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
1377 sent_pkts = 1; 1374 sent_pkts = 1;
1378 } 1375 }
1379 1376
1380 while ((skb = sk->sk_send_head)) { 1377 while ((skb = tcp_send_head(sk))) {
1381 unsigned int limit; 1378 unsigned int limit;
1382 1379
1383 tso_segs = tcp_init_tso_segs(sk, skb, mss_now); 1380 tso_segs = tcp_init_tso_segs(sk, skb, mss_now);
@@ -1396,7 +1393,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
1396 nonagle : TCP_NAGLE_PUSH)))) 1393 nonagle : TCP_NAGLE_PUSH))))
1397 break; 1394 break;
1398 } else { 1395 } else {
1399 if (tcp_tso_should_defer(sk, tp, skb)) 1396 if (tcp_tso_should_defer(sk, skb))
1400 break; 1397 break;
1401 } 1398 }
1402 1399
@@ -1425,31 +1422,31 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
1425 /* Advance the send_head. This one is sent out. 1422 /* Advance the send_head. This one is sent out.
1426 * This call will increment packets_out. 1423 * This call will increment packets_out.
1427 */ 1424 */
1428 update_send_head(sk, tp, skb); 1425 update_send_head(sk, skb);
1429 1426
1430 tcp_minshall_update(tp, mss_now, skb); 1427 tcp_minshall_update(tp, mss_now, skb);
1431 sent_pkts++; 1428 sent_pkts++;
1432 } 1429 }
1433 1430
1434 if (likely(sent_pkts)) { 1431 if (likely(sent_pkts)) {
1435 tcp_cwnd_validate(sk, tp); 1432 tcp_cwnd_validate(sk);
1436 return 0; 1433 return 0;
1437 } 1434 }
1438 return !tp->packets_out && sk->sk_send_head; 1435 return !tp->packets_out && tcp_send_head(sk);
1439} 1436}
1440 1437
1441/* Push out any pending frames which were held back due to 1438/* Push out any pending frames which were held back due to
1442 * TCP_CORK or attempt at coalescing tiny packets. 1439 * TCP_CORK or attempt at coalescing tiny packets.
1443 * The socket must be locked by the caller. 1440 * The socket must be locked by the caller.
1444 */ 1441 */
1445void __tcp_push_pending_frames(struct sock *sk, struct tcp_sock *tp, 1442void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
1446 unsigned int cur_mss, int nonagle) 1443 int nonagle)
1447{ 1444{
1448 struct sk_buff *skb = sk->sk_send_head; 1445 struct sk_buff *skb = tcp_send_head(sk);
1449 1446
1450 if (skb) { 1447 if (skb) {
1451 if (tcp_write_xmit(sk, cur_mss, nonagle)) 1448 if (tcp_write_xmit(sk, cur_mss, nonagle))
1452 tcp_check_probe_timer(sk, tp); 1449 tcp_check_probe_timer(sk);
1453 } 1450 }
1454} 1451}
1455 1452
@@ -1459,7 +1456,7 @@ void __tcp_push_pending_frames(struct sock *sk, struct tcp_sock *tp,
1459void tcp_push_one(struct sock *sk, unsigned int mss_now) 1456void tcp_push_one(struct sock *sk, unsigned int mss_now)
1460{ 1457{
1461 struct tcp_sock *tp = tcp_sk(sk); 1458 struct tcp_sock *tp = tcp_sk(sk);
1462 struct sk_buff *skb = sk->sk_send_head; 1459 struct sk_buff *skb = tcp_send_head(sk);
1463 unsigned int tso_segs, cwnd_quota; 1460 unsigned int tso_segs, cwnd_quota;
1464 1461
1465 BUG_ON(!skb || skb->len < mss_now); 1462 BUG_ON(!skb || skb->len < mss_now);
@@ -1493,8 +1490,8 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now)
1493 TCP_SKB_CB(skb)->when = tcp_time_stamp; 1490 TCP_SKB_CB(skb)->when = tcp_time_stamp;
1494 1491
1495 if (likely(!tcp_transmit_skb(sk, skb, 1, sk->sk_allocation))) { 1492 if (likely(!tcp_transmit_skb(sk, skb, 1, sk->sk_allocation))) {
1496 update_send_head(sk, tp, skb); 1493 update_send_head(sk, skb);
1497 tcp_cwnd_validate(sk, tp); 1494 tcp_cwnd_validate(sk);
1498 return; 1495 return;
1499 } 1496 }
1500 } 1497 }
@@ -1620,7 +1617,7 @@ u32 __tcp_select_window(struct sock *sk)
1620static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int mss_now) 1617static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int mss_now)
1621{ 1618{
1622 struct tcp_sock *tp = tcp_sk(sk); 1619 struct tcp_sock *tp = tcp_sk(sk);
1623 struct sk_buff *next_skb = skb->next; 1620 struct sk_buff *next_skb = tcp_write_queue_next(sk, skb);
1624 1621
1625 /* The first test we must make is that neither of these two 1622 /* The first test we must make is that neither of these two
1626 * SKB's are still referenced by someone else. 1623 * SKB's are still referenced by someone else.
@@ -1630,7 +1627,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
1630 u16 flags = TCP_SKB_CB(skb)->flags; 1627 u16 flags = TCP_SKB_CB(skb)->flags;
1631 1628
1632 /* Also punt if next skb has been SACK'd. */ 1629 /* Also punt if next skb has been SACK'd. */
1633 if(TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_ACKED) 1630 if (TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_ACKED)
1634 return; 1631 return;
1635 1632
1636 /* Next skb is out of window. */ 1633 /* Next skb is out of window. */
@@ -1652,9 +1649,11 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
1652 clear_all_retrans_hints(tp); 1649 clear_all_retrans_hints(tp);
1653 1650
1654 /* Ok. We will be able to collapse the packet. */ 1651 /* Ok. We will be able to collapse the packet. */
1655 __skb_unlink(next_skb, &sk->sk_write_queue); 1652 tcp_unlink_write_queue(next_skb, sk);
1656 1653
1657 memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size); 1654 skb_copy_from_linear_data(next_skb,
1655 skb_put(skb, next_skb_size),
1656 next_skb_size);
1658 1657
1659 if (next_skb->ip_summed == CHECKSUM_PARTIAL) 1658 if (next_skb->ip_summed == CHECKSUM_PARTIAL)
1660 skb->ip_summed = CHECKSUM_PARTIAL; 1659 skb->ip_summed = CHECKSUM_PARTIAL;
@@ -1706,7 +1705,9 @@ void tcp_simple_retransmit(struct sock *sk)
1706 unsigned int mss = tcp_current_mss(sk, 0); 1705 unsigned int mss = tcp_current_mss(sk, 0);
1707 int lost = 0; 1706 int lost = 0;
1708 1707
1709 sk_stream_for_retrans_queue(skb, sk) { 1708 tcp_for_write_queue(skb, sk) {
1709 if (skb == tcp_send_head(sk))
1710 break;
1710 if (skb->len > mss && 1711 if (skb->len > mss &&
1711 !(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) { 1712 !(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) {
1712 if (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) { 1713 if (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) {
@@ -1788,13 +1789,13 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
1788 } 1789 }
1789 1790
1790 /* Collapse two adjacent packets if worthwhile and we can. */ 1791 /* Collapse two adjacent packets if worthwhile and we can. */
1791 if(!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) && 1792 if (!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) &&
1792 (skb->len < (cur_mss >> 1)) && 1793 (skb->len < (cur_mss >> 1)) &&
1793 (skb->next != sk->sk_send_head) && 1794 (tcp_write_queue_next(sk, skb) != tcp_send_head(sk)) &&
1794 (skb->next != (struct sk_buff *)&sk->sk_write_queue) && 1795 (!tcp_skb_is_last(sk, skb)) &&
1795 (skb_shinfo(skb)->nr_frags == 0 && skb_shinfo(skb->next)->nr_frags == 0) && 1796 (skb_shinfo(skb)->nr_frags == 0 && skb_shinfo(tcp_write_queue_next(sk, skb))->nr_frags == 0) &&
1796 (tcp_skb_pcount(skb) == 1 && tcp_skb_pcount(skb->next) == 1) && 1797 (tcp_skb_pcount(skb) == 1 && tcp_skb_pcount(tcp_write_queue_next(sk, skb)) == 1) &&
1797 (sysctl_tcp_retrans_collapse != 0)) 1798 (sysctl_tcp_retrans_collapse != 0))
1798 tcp_retrans_try_collapse(sk, skb, cur_mss); 1799 tcp_retrans_try_collapse(sk, skb, cur_mss);
1799 1800
1800 if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk)) 1801 if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
@@ -1804,9 +1805,9 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
1804 * retransmit when old data is attached. So strip it off 1805 * retransmit when old data is attached. So strip it off
1805 * since it is cheap to do so and saves bytes on the network. 1806 * since it is cheap to do so and saves bytes on the network.
1806 */ 1807 */
1807 if(skb->len > 0 && 1808 if (skb->len > 0 &&
1808 (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) && 1809 (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) &&
1809 tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) { 1810 tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) {
1810 if (!pskb_trim(skb, 0)) { 1811 if (!pskb_trim(skb, 0)) {
1811 TCP_SKB_CB(skb)->seq = TCP_SKB_CB(skb)->end_seq - 1; 1812 TCP_SKB_CB(skb)->seq = TCP_SKB_CB(skb)->end_seq - 1;
1812 skb_shinfo(skb)->gso_segs = 1; 1813 skb_shinfo(skb)->gso_segs = 1;
@@ -1872,15 +1873,17 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
1872 skb = tp->retransmit_skb_hint; 1873 skb = tp->retransmit_skb_hint;
1873 packet_cnt = tp->retransmit_cnt_hint; 1874 packet_cnt = tp->retransmit_cnt_hint;
1874 }else{ 1875 }else{
1875 skb = sk->sk_write_queue.next; 1876 skb = tcp_write_queue_head(sk);
1876 packet_cnt = 0; 1877 packet_cnt = 0;
1877 } 1878 }
1878 1879
1879 /* First pass: retransmit lost packets. */ 1880 /* First pass: retransmit lost packets. */
1880 if (tp->lost_out) { 1881 if (tp->lost_out) {
1881 sk_stream_for_retrans_queue_from(skb, sk) { 1882 tcp_for_write_queue_from(skb, sk) {
1882 __u8 sacked = TCP_SKB_CB(skb)->sacked; 1883 __u8 sacked = TCP_SKB_CB(skb)->sacked;
1883 1884
1885 if (skb == tcp_send_head(sk))
1886 break;
1884 /* we could do better than to assign each time */ 1887 /* we could do better than to assign each time */
1885 tp->retransmit_skb_hint = skb; 1888 tp->retransmit_skb_hint = skb;
1886 tp->retransmit_cnt_hint = packet_cnt; 1889 tp->retransmit_cnt_hint = packet_cnt;
@@ -1906,8 +1909,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
1906 else 1909 else
1907 NET_INC_STATS_BH(LINUX_MIB_TCPSLOWSTARTRETRANS); 1910 NET_INC_STATS_BH(LINUX_MIB_TCPSLOWSTARTRETRANS);
1908 1911
1909 if (skb == 1912 if (skb == tcp_write_queue_head(sk))
1910 skb_peek(&sk->sk_write_queue))
1911 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 1913 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
1912 inet_csk(sk)->icsk_rto, 1914 inet_csk(sk)->icsk_rto,
1913 TCP_RTO_MAX); 1915 TCP_RTO_MAX);
@@ -1937,18 +1939,20 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
1937 * segments to send. 1939 * segments to send.
1938 */ 1940 */
1939 1941
1940 if (tcp_may_send_now(sk, tp)) 1942 if (tcp_may_send_now(sk))
1941 return; 1943 return;
1942 1944
1943 if (tp->forward_skb_hint) { 1945 if (tp->forward_skb_hint) {
1944 skb = tp->forward_skb_hint; 1946 skb = tp->forward_skb_hint;
1945 packet_cnt = tp->forward_cnt_hint; 1947 packet_cnt = tp->forward_cnt_hint;
1946 } else{ 1948 } else{
1947 skb = sk->sk_write_queue.next; 1949 skb = tcp_write_queue_head(sk);
1948 packet_cnt = 0; 1950 packet_cnt = 0;
1949 } 1951 }
1950 1952
1951 sk_stream_for_retrans_queue_from(skb, sk) { 1953 tcp_for_write_queue_from(skb, sk) {
1954 if (skb == tcp_send_head(sk))
1955 break;
1952 tp->forward_cnt_hint = packet_cnt; 1956 tp->forward_cnt_hint = packet_cnt;
1953 tp->forward_skb_hint = skb; 1957 tp->forward_skb_hint = skb;
1954 1958
@@ -1973,7 +1977,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
1973 break; 1977 break;
1974 } 1978 }
1975 1979
1976 if (skb == skb_peek(&sk->sk_write_queue)) 1980 if (skb == tcp_write_queue_head(sk))
1977 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 1981 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
1978 inet_csk(sk)->icsk_rto, 1982 inet_csk(sk)->icsk_rto,
1979 TCP_RTO_MAX); 1983 TCP_RTO_MAX);
@@ -1989,7 +1993,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
1989void tcp_send_fin(struct sock *sk) 1993void tcp_send_fin(struct sock *sk)
1990{ 1994{
1991 struct tcp_sock *tp = tcp_sk(sk); 1995 struct tcp_sock *tp = tcp_sk(sk);
1992 struct sk_buff *skb = skb_peek_tail(&sk->sk_write_queue); 1996 struct sk_buff *skb = tcp_write_queue_tail(sk);
1993 int mss_now; 1997 int mss_now;
1994 1998
1995 /* Optimization, tack on the FIN if we have a queue of 1999 /* Optimization, tack on the FIN if we have a queue of
@@ -1998,7 +2002,7 @@ void tcp_send_fin(struct sock *sk)
1998 */ 2002 */
1999 mss_now = tcp_current_mss(sk, 1); 2003 mss_now = tcp_current_mss(sk, 1);
2000 2004
2001 if (sk->sk_send_head != NULL) { 2005 if (tcp_send_head(sk) != NULL) {
2002 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN; 2006 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN;
2003 TCP_SKB_CB(skb)->end_seq++; 2007 TCP_SKB_CB(skb)->end_seq++;
2004 tp->write_seq++; 2008 tp->write_seq++;
@@ -2025,7 +2029,7 @@ void tcp_send_fin(struct sock *sk)
2025 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1; 2029 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1;
2026 tcp_queue_skb(sk, skb); 2030 tcp_queue_skb(sk, skb);
2027 } 2031 }
2028 __tcp_push_pending_frames(sk, tp, mss_now, TCP_NAGLE_OFF); 2032 __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF);
2029} 2033}
2030 2034
2031/* We get here when a process closes a file descriptor (either due to 2035/* We get here when a process closes a file descriptor (either due to
@@ -2035,7 +2039,6 @@ void tcp_send_fin(struct sock *sk)
2035 */ 2039 */
2036void tcp_send_active_reset(struct sock *sk, gfp_t priority) 2040void tcp_send_active_reset(struct sock *sk, gfp_t priority)
2037{ 2041{
2038 struct tcp_sock *tp = tcp_sk(sk);
2039 struct sk_buff *skb; 2042 struct sk_buff *skb;
2040 2043
2041 /* NOTE: No TCP options attached and we never retransmit this. */ 2044 /* NOTE: No TCP options attached and we never retransmit this. */
@@ -2055,7 +2058,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
2055 skb_shinfo(skb)->gso_type = 0; 2058 skb_shinfo(skb)->gso_type = 0;
2056 2059
2057 /* Send it off. */ 2060 /* Send it off. */
2058 TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk, tp); 2061 TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk);
2059 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq; 2062 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq;
2060 TCP_SKB_CB(skb)->when = tcp_time_stamp; 2063 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2061 if (tcp_transmit_skb(sk, skb, 0, priority)) 2064 if (tcp_transmit_skb(sk, skb, 0, priority))
@@ -2071,7 +2074,7 @@ int tcp_send_synack(struct sock *sk)
2071{ 2074{
2072 struct sk_buff* skb; 2075 struct sk_buff* skb;
2073 2076
2074 skb = skb_peek(&sk->sk_write_queue); 2077 skb = tcp_write_queue_head(sk);
2075 if (skb == NULL || !(TCP_SKB_CB(skb)->flags&TCPCB_FLAG_SYN)) { 2078 if (skb == NULL || !(TCP_SKB_CB(skb)->flags&TCPCB_FLAG_SYN)) {
2076 printk(KERN_DEBUG "tcp_send_synack: wrong queue state\n"); 2079 printk(KERN_DEBUG "tcp_send_synack: wrong queue state\n");
2077 return -EFAULT; 2080 return -EFAULT;
@@ -2081,9 +2084,9 @@ int tcp_send_synack(struct sock *sk)
2081 struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC); 2084 struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
2082 if (nskb == NULL) 2085 if (nskb == NULL)
2083 return -ENOMEM; 2086 return -ENOMEM;
2084 __skb_unlink(skb, &sk->sk_write_queue); 2087 tcp_unlink_write_queue(skb, sk);
2085 skb_header_release(nskb); 2088 skb_header_release(nskb);
2086 __skb_queue_head(&sk->sk_write_queue, nskb); 2089 __tcp_add_write_queue_head(sk, nskb);
2087 sk_stream_free_skb(sk, skb); 2090 sk_stream_free_skb(sk, skb);
2088 sk_charge_skb(sk, nskb); 2091 sk_charge_skb(sk, nskb);
2089 skb = nskb; 2092 skb = nskb;
@@ -2133,8 +2136,10 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2133 if (md5) 2136 if (md5)
2134 tcp_header_size += TCPOLEN_MD5SIG_ALIGNED; 2137 tcp_header_size += TCPOLEN_MD5SIG_ALIGNED;
2135#endif 2138#endif
2136 skb->h.th = th = (struct tcphdr *) skb_push(skb, tcp_header_size); 2139 skb_push(skb, tcp_header_size);
2140 skb_reset_transport_header(skb);
2137 2141
2142 th = tcp_hdr(skb);
2138 memset(th, 0, sizeof(struct tcphdr)); 2143 memset(th, 0, sizeof(struct tcphdr));
2139 th->syn = 1; 2144 th->syn = 1;
2140 th->ack = 1; 2145 th->ack = 1;
@@ -2188,7 +2193,7 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2188 tp->af_specific->calc_md5_hash(md5_hash_location, 2193 tp->af_specific->calc_md5_hash(md5_hash_location,
2189 md5, 2194 md5,
2190 NULL, dst, req, 2195 NULL, dst, req,
2191 skb->h.th, sk->sk_protocol, 2196 tcp_hdr(skb), sk->sk_protocol,
2192 skb->len); 2197 skb->len);
2193 } 2198 }
2194#endif 2199#endif
@@ -2271,7 +2276,7 @@ int tcp_connect(struct sock *sk)
2271 skb_reserve(buff, MAX_TCP_HEADER); 2276 skb_reserve(buff, MAX_TCP_HEADER);
2272 2277
2273 TCP_SKB_CB(buff)->flags = TCPCB_FLAG_SYN; 2278 TCP_SKB_CB(buff)->flags = TCPCB_FLAG_SYN;
2274 TCP_ECN_send_syn(sk, tp, buff); 2279 TCP_ECN_send_syn(sk, buff);
2275 TCP_SKB_CB(buff)->sacked = 0; 2280 TCP_SKB_CB(buff)->sacked = 0;
2276 skb_shinfo(buff)->gso_segs = 1; 2281 skb_shinfo(buff)->gso_segs = 1;
2277 skb_shinfo(buff)->gso_size = 0; 2282 skb_shinfo(buff)->gso_size = 0;
@@ -2285,7 +2290,7 @@ int tcp_connect(struct sock *sk)
2285 TCP_SKB_CB(buff)->when = tcp_time_stamp; 2290 TCP_SKB_CB(buff)->when = tcp_time_stamp;
2286 tp->retrans_stamp = TCP_SKB_CB(buff)->when; 2291 tp->retrans_stamp = TCP_SKB_CB(buff)->when;
2287 skb_header_release(buff); 2292 skb_header_release(buff);
2288 __skb_queue_tail(&sk->sk_write_queue, buff); 2293 __tcp_add_write_queue_tail(sk, buff);
2289 sk_charge_skb(sk, buff); 2294 sk_charge_skb(sk, buff);
2290 tp->packets_out += tcp_skb_pcount(buff); 2295 tp->packets_out += tcp_skb_pcount(buff);
2291 tcp_transmit_skb(sk, buff, 1, GFP_KERNEL); 2296 tcp_transmit_skb(sk, buff, 1, GFP_KERNEL);
@@ -2363,7 +2368,6 @@ void tcp_send_ack(struct sock *sk)
2363{ 2368{
2364 /* If we have been reset, we may not send again. */ 2369 /* If we have been reset, we may not send again. */
2365 if (sk->sk_state != TCP_CLOSE) { 2370 if (sk->sk_state != TCP_CLOSE) {
2366 struct tcp_sock *tp = tcp_sk(sk);
2367 struct sk_buff *buff; 2371 struct sk_buff *buff;
2368 2372
2369 /* We are not putting this on the write queue, so 2373 /* We are not putting this on the write queue, so
@@ -2389,7 +2393,7 @@ void tcp_send_ack(struct sock *sk)
2389 skb_shinfo(buff)->gso_type = 0; 2393 skb_shinfo(buff)->gso_type = 0;
2390 2394
2391 /* Send it off, this clears delayed acks for us. */ 2395 /* Send it off, this clears delayed acks for us. */
2392 TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk, tp); 2396 TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk);
2393 TCP_SKB_CB(buff)->when = tcp_time_stamp; 2397 TCP_SKB_CB(buff)->when = tcp_time_stamp;
2394 tcp_transmit_skb(sk, buff, 0, GFP_ATOMIC); 2398 tcp_transmit_skb(sk, buff, 0, GFP_ATOMIC);
2395 } 2399 }
@@ -2441,7 +2445,7 @@ int tcp_write_wakeup(struct sock *sk)
2441 struct tcp_sock *tp = tcp_sk(sk); 2445 struct tcp_sock *tp = tcp_sk(sk);
2442 struct sk_buff *skb; 2446 struct sk_buff *skb;
2443 2447
2444 if ((skb = sk->sk_send_head) != NULL && 2448 if ((skb = tcp_send_head(sk)) != NULL &&
2445 before(TCP_SKB_CB(skb)->seq, tp->snd_una+tp->snd_wnd)) { 2449 before(TCP_SKB_CB(skb)->seq, tp->snd_una+tp->snd_wnd)) {
2446 int err; 2450 int err;
2447 unsigned int mss = tcp_current_mss(sk, 0); 2451 unsigned int mss = tcp_current_mss(sk, 0);
@@ -2467,7 +2471,7 @@ int tcp_write_wakeup(struct sock *sk)
2467 TCP_SKB_CB(skb)->when = tcp_time_stamp; 2471 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2468 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); 2472 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
2469 if (!err) { 2473 if (!err) {
2470 update_send_head(sk, tp, skb); 2474 update_send_head(sk, skb);
2471 } 2475 }
2472 return err; 2476 return err;
2473 } else { 2477 } else {
@@ -2491,7 +2495,7 @@ void tcp_send_probe0(struct sock *sk)
2491 2495
2492 err = tcp_write_wakeup(sk); 2496 err = tcp_write_wakeup(sk);
2493 2497
2494 if (tp->packets_out || !sk->sk_send_head) { 2498 if (tp->packets_out || !tcp_send_head(sk)) {
2495 /* Cancel probe timer, if it is not required. */ 2499 /* Cancel probe timer, if it is not required. */
2496 icsk->icsk_probes_out = 0; 2500 icsk->icsk_probes_out = 0;
2497 icsk->icsk_backoff = 0; 2501 icsk->icsk_backoff = 0;
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index 61f406f27294..3938d5dbdf20 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -26,6 +26,8 @@
26#include <linux/proc_fs.h> 26#include <linux/proc_fs.h>
27#include <linux/module.h> 27#include <linux/module.h>
28#include <linux/kfifo.h> 28#include <linux/kfifo.h>
29#include <linux/ktime.h>
30#include <linux/time.h>
29#include <linux/vmalloc.h> 31#include <linux/vmalloc.h>
30 32
31#include <net/tcp.h> 33#include <net/tcp.h>
@@ -34,43 +36,45 @@ MODULE_AUTHOR("Stephen Hemminger <shemminger@linux-foundation.org>");
34MODULE_DESCRIPTION("TCP cwnd snooper"); 36MODULE_DESCRIPTION("TCP cwnd snooper");
35MODULE_LICENSE("GPL"); 37MODULE_LICENSE("GPL");
36 38
37static int port = 0; 39static int port __read_mostly = 0;
38MODULE_PARM_DESC(port, "Port to match (0=all)"); 40MODULE_PARM_DESC(port, "Port to match (0=all)");
39module_param(port, int, 0); 41module_param(port, int, 0);
40 42
41static int bufsize = 64*1024; 43static int bufsize __read_mostly = 64*1024;
42MODULE_PARM_DESC(bufsize, "Log buffer size (default 64k)"); 44MODULE_PARM_DESC(bufsize, "Log buffer size (default 64k)");
43module_param(bufsize, int, 0); 45module_param(bufsize, int, 0);
44 46
47static int full __read_mostly;
48MODULE_PARM_DESC(full, "Full log (1=every ack packet received, 0=only cwnd changes)");
49module_param(full, int, 0);
50
45static const char procname[] = "tcpprobe"; 51static const char procname[] = "tcpprobe";
46 52
47struct { 53struct {
48 struct kfifo *fifo; 54 struct kfifo *fifo;
49 spinlock_t lock; 55 spinlock_t lock;
50 wait_queue_head_t wait; 56 wait_queue_head_t wait;
51 struct timeval tstart; 57 ktime_t start;
58 u32 lastcwnd;
52} tcpw; 59} tcpw;
53 60
61/*
62 * Print to log with timestamps.
63 * FIXME: causes an extra copy
64 */
54static void printl(const char *fmt, ...) 65static void printl(const char *fmt, ...)
55{ 66{
56 va_list args; 67 va_list args;
57 int len; 68 int len;
58 struct timeval now; 69 struct timespec tv;
59 char tbuf[256]; 70 char tbuf[256];
60 71
61 va_start(args, fmt); 72 va_start(args, fmt);
62 do_gettimeofday(&now); 73 /* want monotonic time since start of tcp_probe */
74 tv = ktime_to_timespec(ktime_sub(ktime_get(), tcpw.start));
63 75
64 now.tv_sec -= tcpw.tstart.tv_sec; 76 len = sprintf(tbuf, "%lu.%09lu ",
65 now.tv_usec -= tcpw.tstart.tv_usec; 77 (unsigned long) tv.tv_sec, (unsigned long) tv.tv_nsec);
66 if (now.tv_usec < 0) {
67 --now.tv_sec;
68 now.tv_usec += 1000000;
69 }
70
71 len = sprintf(tbuf, "%lu.%06lu ",
72 (unsigned long) now.tv_sec,
73 (unsigned long) now.tv_usec);
74 len += vscnprintf(tbuf+len, sizeof(tbuf)-len, fmt, args); 78 len += vscnprintf(tbuf+len, sizeof(tbuf)-len, fmt, args);
75 va_end(args); 79 va_end(args);
76 80
@@ -78,38 +82,44 @@ static void printl(const char *fmt, ...)
78 wake_up(&tcpw.wait); 82 wake_up(&tcpw.wait);
79} 83}
80 84
81static int jtcp_sendmsg(struct kiocb *iocb, struct sock *sk, 85/*
82 struct msghdr *msg, size_t size) 86 * Hook inserted to be called before each receive packet.
87 * Note: arguments must match tcp_rcv_established()!
88 */
89static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb,
90 struct tcphdr *th, unsigned len)
83{ 91{
84 const struct tcp_sock *tp = tcp_sk(sk); 92 const struct tcp_sock *tp = tcp_sk(sk);
85 const struct inet_sock *inet = inet_sk(sk); 93 const struct inet_sock *inet = inet_sk(sk);
86 94
87 if (port == 0 || ntohs(inet->dport) == port || 95 /* Only update if port matches */
88 ntohs(inet->sport) == port) { 96 if ((port == 0 || ntohs(inet->dport) == port || ntohs(inet->sport) == port)
97 && (full || tp->snd_cwnd != tcpw.lastcwnd)) {
89 printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d %#x %#x %u %u %u\n", 98 printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d %#x %#x %u %u %u\n",
90 NIPQUAD(inet->saddr), ntohs(inet->sport), 99 NIPQUAD(inet->saddr), ntohs(inet->sport),
91 NIPQUAD(inet->daddr), ntohs(inet->dport), 100 NIPQUAD(inet->daddr), ntohs(inet->dport),
92 size, tp->snd_nxt, tp->snd_una, 101 skb->len, tp->snd_nxt, tp->snd_una,
93 tp->snd_cwnd, tcp_current_ssthresh(sk), 102 tp->snd_cwnd, tcp_current_ssthresh(sk),
94 tp->snd_wnd); 103 tp->snd_wnd, tp->srtt >> 3);
104 tcpw.lastcwnd = tp->snd_cwnd;
95 } 105 }
96 106
97 jprobe_return(); 107 jprobe_return();
98 return 0; 108 return 0;
99} 109}
100 110
101static struct jprobe tcp_send_probe = { 111static struct jprobe tcp_probe = {
102 .kp = { 112 .kp = {
103 .symbol_name = "tcp_sendmsg", 113 .symbol_name = "tcp_rcv_established",
104 }, 114 },
105 .entry = JPROBE_ENTRY(jtcp_sendmsg), 115 .entry = JPROBE_ENTRY(jtcp_rcv_established),
106}; 116};
107 117
108 118
109static int tcpprobe_open(struct inode * inode, struct file * file) 119static int tcpprobe_open(struct inode * inode, struct file * file)
110{ 120{
111 kfifo_reset(tcpw.fifo); 121 kfifo_reset(tcpw.fifo);
112 do_gettimeofday(&tcpw.tstart); 122 tcpw.start = ktime_get();
113 return 0; 123 return 0;
114} 124}
115 125
@@ -162,7 +172,7 @@ static __init int tcpprobe_init(void)
162 if (!proc_net_fops_create(procname, S_IRUSR, &tcpprobe_fops)) 172 if (!proc_net_fops_create(procname, S_IRUSR, &tcpprobe_fops))
163 goto err0; 173 goto err0;
164 174
165 ret = register_jprobe(&tcp_send_probe); 175 ret = register_jprobe(&tcp_probe);
166 if (ret) 176 if (ret)
167 goto err1; 177 goto err1;
168 178
@@ -180,7 +190,7 @@ static __exit void tcpprobe_exit(void)
180{ 190{
181 kfifo_free(tcpw.fifo); 191 kfifo_free(tcpw.fifo);
182 proc_net_remove(procname); 192 proc_net_remove(procname);
183 unregister_jprobe(&tcp_send_probe); 193 unregister_jprobe(&tcp_probe);
184 194
185} 195}
186module_exit(tcpprobe_exit); 196module_exit(tcpprobe_exit);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index a9243cfc1bea..2ca97b20929d 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -233,7 +233,7 @@ static void tcp_probe_timer(struct sock *sk)
233 struct tcp_sock *tp = tcp_sk(sk); 233 struct tcp_sock *tp = tcp_sk(sk);
234 int max_probes; 234 int max_probes;
235 235
236 if (tp->packets_out || !sk->sk_send_head) { 236 if (tp->packets_out || !tcp_send_head(sk)) {
237 icsk->icsk_probes_out = 0; 237 icsk->icsk_probes_out = 0;
238 return; 238 return;
239 } 239 }
@@ -284,7 +284,7 @@ static void tcp_retransmit_timer(struct sock *sk)
284 if (!tp->packets_out) 284 if (!tp->packets_out)
285 goto out; 285 goto out;
286 286
287 BUG_TRAP(!skb_queue_empty(&sk->sk_write_queue)); 287 BUG_TRAP(!tcp_write_queue_empty(sk));
288 288
289 if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) && 289 if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) &&
290 !((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) { 290 !((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) {
@@ -306,7 +306,7 @@ static void tcp_retransmit_timer(struct sock *sk)
306 goto out; 306 goto out;
307 } 307 }
308 tcp_enter_loss(sk, 0); 308 tcp_enter_loss(sk, 0);
309 tcp_retransmit_skb(sk, skb_peek(&sk->sk_write_queue)); 309 tcp_retransmit_skb(sk, tcp_write_queue_head(sk));
310 __sk_dst_reset(sk); 310 __sk_dst_reset(sk);
311 goto out_reset_timer; 311 goto out_reset_timer;
312 } 312 }
@@ -341,7 +341,7 @@ static void tcp_retransmit_timer(struct sock *sk)
341 tcp_enter_loss(sk, 0); 341 tcp_enter_loss(sk, 0);
342 } 342 }
343 343
344 if (tcp_retransmit_skb(sk, skb_peek(&sk->sk_write_queue)) > 0) { 344 if (tcp_retransmit_skb(sk, tcp_write_queue_head(sk)) > 0) {
345 /* Retransmission failed because of local congestion, 345 /* Retransmission failed because of local congestion,
346 * do not backoff. 346 * do not backoff.
347 */ 347 */
@@ -482,7 +482,7 @@ static void tcp_keepalive_timer (unsigned long data)
482 elapsed = keepalive_time_when(tp); 482 elapsed = keepalive_time_when(tp);
483 483
484 /* It is alive without keepalive 8) */ 484 /* It is alive without keepalive 8) */
485 if (tp->packets_out || sk->sk_send_head) 485 if (tp->packets_out || tcp_send_head(sk))
486 goto resched; 486 goto resched;
487 487
488 elapsed = tcp_time_stamp - tp->rcv_tstamp; 488 elapsed = tcp_time_stamp - tp->rcv_tstamp;
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 5c484dceb967..73e19cf7df21 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -38,6 +38,8 @@
38 38
39#include <net/tcp.h> 39#include <net/tcp.h>
40 40
41#include "tcp_vegas.h"
42
41/* Default values of the Vegas variables, in fixed-point representation 43/* Default values of the Vegas variables, in fixed-point representation
42 * with V_PARAM_SHIFT bits to the right of the binary point. 44 * with V_PARAM_SHIFT bits to the right of the binary point.
43 */ 45 */
@@ -54,17 +56,6 @@ module_param(gamma, int, 0644);
54MODULE_PARM_DESC(gamma, "limit on increase (scale by 2)"); 56MODULE_PARM_DESC(gamma, "limit on increase (scale by 2)");
55 57
56 58
57/* Vegas variables */
58struct vegas {
59 u32 beg_snd_nxt; /* right edge during last RTT */
60 u32 beg_snd_una; /* left edge during last RTT */
61 u32 beg_snd_cwnd; /* saves the size of the cwnd */
62 u8 doing_vegas_now;/* if true, do vegas for this RTT */
63 u16 cntRTT; /* # of RTTs measured within last RTT */
64 u32 minRTT; /* min of RTTs measured within last RTT (in usec) */
65 u32 baseRTT; /* the min of all Vegas RTT measurements seen (in usec) */
66};
67
68/* There are several situations when we must "re-start" Vegas: 59/* There are several situations when we must "re-start" Vegas:
69 * 60 *
70 * o when a connection is established 61 * o when a connection is established
@@ -81,7 +72,7 @@ struct vegas {
81 * Instead we must wait until the completion of an RTT during 72 * Instead we must wait until the completion of an RTT during
82 * which we actually receive ACKs. 73 * which we actually receive ACKs.
83 */ 74 */
84static inline void vegas_enable(struct sock *sk) 75static void vegas_enable(struct sock *sk)
85{ 76{
86 const struct tcp_sock *tp = tcp_sk(sk); 77 const struct tcp_sock *tp = tcp_sk(sk);
87 struct vegas *vegas = inet_csk_ca(sk); 78 struct vegas *vegas = inet_csk_ca(sk);
@@ -104,13 +95,14 @@ static inline void vegas_disable(struct sock *sk)
104 vegas->doing_vegas_now = 0; 95 vegas->doing_vegas_now = 0;
105} 96}
106 97
107static void tcp_vegas_init(struct sock *sk) 98void tcp_vegas_init(struct sock *sk)
108{ 99{
109 struct vegas *vegas = inet_csk_ca(sk); 100 struct vegas *vegas = inet_csk_ca(sk);
110 101
111 vegas->baseRTT = 0x7fffffff; 102 vegas->baseRTT = 0x7fffffff;
112 vegas_enable(sk); 103 vegas_enable(sk);
113} 104}
105EXPORT_SYMBOL_GPL(tcp_vegas_init);
114 106
115/* Do RTT sampling needed for Vegas. 107/* Do RTT sampling needed for Vegas.
116 * Basically we: 108 * Basically we:
@@ -120,10 +112,13 @@ static void tcp_vegas_init(struct sock *sk)
120 * o min-filter RTT samples from a much longer window (forever for now) 112 * o min-filter RTT samples from a much longer window (forever for now)
121 * to find the propagation delay (baseRTT) 113 * to find the propagation delay (baseRTT)
122 */ 114 */
123static void tcp_vegas_rtt_calc(struct sock *sk, u32 usrtt) 115void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, ktime_t last)
124{ 116{
125 struct vegas *vegas = inet_csk_ca(sk); 117 struct vegas *vegas = inet_csk_ca(sk);
126 u32 vrtt = usrtt + 1; /* Never allow zero rtt or baseRTT */ 118 u32 vrtt;
119
120 /* Never allow zero rtt or baseRTT */
121 vrtt = ktime_to_us(net_timedelta(last)) + 1;
127 122
128 /* Filter to find propagation delay: */ 123 /* Filter to find propagation delay: */
129 if (vrtt < vegas->baseRTT) 124 if (vrtt < vegas->baseRTT)
@@ -135,8 +130,9 @@ static void tcp_vegas_rtt_calc(struct sock *sk, u32 usrtt)
135 vegas->minRTT = min(vegas->minRTT, vrtt); 130 vegas->minRTT = min(vegas->minRTT, vrtt);
136 vegas->cntRTT++; 131 vegas->cntRTT++;
137} 132}
133EXPORT_SYMBOL_GPL(tcp_vegas_pkts_acked);
138 134
139static void tcp_vegas_state(struct sock *sk, u8 ca_state) 135void tcp_vegas_state(struct sock *sk, u8 ca_state)
140{ 136{
141 137
142 if (ca_state == TCP_CA_Open) 138 if (ca_state == TCP_CA_Open)
@@ -144,6 +140,7 @@ static void tcp_vegas_state(struct sock *sk, u8 ca_state)
144 else 140 else
145 vegas_disable(sk); 141 vegas_disable(sk);
146} 142}
143EXPORT_SYMBOL_GPL(tcp_vegas_state);
147 144
148/* 145/*
149 * If the connection is idle and we are restarting, 146 * If the connection is idle and we are restarting,
@@ -154,12 +151,13 @@ static void tcp_vegas_state(struct sock *sk, u8 ca_state)
154 * packets, _then_ we can make Vegas calculations 151 * packets, _then_ we can make Vegas calculations
155 * again. 152 * again.
156 */ 153 */
157static void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event) 154void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event)
158{ 155{
159 if (event == CA_EVENT_CWND_RESTART || 156 if (event == CA_EVENT_CWND_RESTART ||
160 event == CA_EVENT_TX_START) 157 event == CA_EVENT_TX_START)
161 tcp_vegas_init(sk); 158 tcp_vegas_init(sk);
162} 159}
160EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event);
163 161
164static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, 162static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
165 u32 seq_rtt, u32 in_flight, int flag) 163 u32 seq_rtt, u32 in_flight, int flag)
@@ -336,30 +334,29 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
336} 334}
337 335
338/* Extract info for Tcp socket info provided via netlink. */ 336/* Extract info for Tcp socket info provided via netlink. */
339static void tcp_vegas_get_info(struct sock *sk, u32 ext, 337void tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb)
340 struct sk_buff *skb)
341{ 338{
342 const struct vegas *ca = inet_csk_ca(sk); 339 const struct vegas *ca = inet_csk_ca(sk);
343 if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) { 340 if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
344 struct tcpvegas_info *info; 341 struct tcpvegas_info info = {
345 342 .tcpv_enabled = ca->doing_vegas_now,
346 info = RTA_DATA(__RTA_PUT(skb, INET_DIAG_VEGASINFO, 343 .tcpv_rttcnt = ca->cntRTT,
347 sizeof(*info))); 344 .tcpv_rtt = ca->baseRTT,
348 345 .tcpv_minrtt = ca->minRTT,
349 info->tcpv_enabled = ca->doing_vegas_now; 346 };
350 info->tcpv_rttcnt = ca->cntRTT; 347
351 info->tcpv_rtt = ca->baseRTT; 348 nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info);
352 info->tcpv_minrtt = ca->minRTT;
353 rtattr_failure: ;
354 } 349 }
355} 350}
351EXPORT_SYMBOL_GPL(tcp_vegas_get_info);
356 352
357static struct tcp_congestion_ops tcp_vegas = { 353static struct tcp_congestion_ops tcp_vegas = {
354 .flags = TCP_CONG_RTT_STAMP,
358 .init = tcp_vegas_init, 355 .init = tcp_vegas_init,
359 .ssthresh = tcp_reno_ssthresh, 356 .ssthresh = tcp_reno_ssthresh,
360 .cong_avoid = tcp_vegas_cong_avoid, 357 .cong_avoid = tcp_vegas_cong_avoid,
361 .min_cwnd = tcp_reno_min_cwnd, 358 .min_cwnd = tcp_reno_min_cwnd,
362 .rtt_sample = tcp_vegas_rtt_calc, 359 .pkts_acked = tcp_vegas_pkts_acked,
363 .set_state = tcp_vegas_state, 360 .set_state = tcp_vegas_state,
364 .cwnd_event = tcp_vegas_cwnd_event, 361 .cwnd_event = tcp_vegas_cwnd_event,
365 .get_info = tcp_vegas_get_info, 362 .get_info = tcp_vegas_get_info,
diff --git a/net/ipv4/tcp_vegas.h b/net/ipv4/tcp_vegas.h
new file mode 100644
index 000000000000..502fa8183634
--- /dev/null
+++ b/net/ipv4/tcp_vegas.h
@@ -0,0 +1,24 @@
1/*
2 * TCP Vegas congestion control interface
3 */
4#ifndef __TCP_VEGAS_H
5#define __TCP_VEGAS_H 1
6
7/* Vegas variables */
8struct vegas {
9 u32 beg_snd_nxt; /* right edge during last RTT */
10 u32 beg_snd_una; /* left edge during last RTT */
11 u32 beg_snd_cwnd; /* saves the size of the cwnd */
12 u8 doing_vegas_now;/* if true, do vegas for this RTT */
13 u16 cntRTT; /* # of RTTs measured within last RTT */
14 u32 minRTT; /* min of RTTs measured within last RTT (in usec) */
15 u32 baseRTT; /* the min of all Vegas RTT measurements seen (in usec) */
16};
17
18extern void tcp_vegas_init(struct sock *sk);
19extern void tcp_vegas_state(struct sock *sk, u8 ca_state);
20extern void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, ktime_t last);
21extern void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event);
22extern void tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb);
23
24#endif /* __TCP_VEGAS_H */
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index ce57bf302f6c..9edb340f2f95 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -69,10 +69,13 @@ static void tcp_veno_init(struct sock *sk)
69} 69}
70 70
71/* Do rtt sampling needed for Veno. */ 71/* Do rtt sampling needed for Veno. */
72static void tcp_veno_rtt_calc(struct sock *sk, u32 usrtt) 72static void tcp_veno_pkts_acked(struct sock *sk, u32 cnt, ktime_t last)
73{ 73{
74 struct veno *veno = inet_csk_ca(sk); 74 struct veno *veno = inet_csk_ca(sk);
75 u32 vrtt = usrtt + 1; /* Never allow zero rtt or basertt */ 75 u32 vrtt;
76
77 /* Never allow zero rtt or baseRTT */
78 vrtt = ktime_to_us(net_timedelta(last)) + 1;
76 79
77 /* Filter to find propagation delay: */ 80 /* Filter to find propagation delay: */
78 if (vrtt < veno->basertt) 81 if (vrtt < veno->basertt)
@@ -199,10 +202,11 @@ static u32 tcp_veno_ssthresh(struct sock *sk)
199} 202}
200 203
201static struct tcp_congestion_ops tcp_veno = { 204static struct tcp_congestion_ops tcp_veno = {
205 .flags = TCP_CONG_RTT_STAMP,
202 .init = tcp_veno_init, 206 .init = tcp_veno_init,
203 .ssthresh = tcp_veno_ssthresh, 207 .ssthresh = tcp_veno_ssthresh,
204 .cong_avoid = tcp_veno_cong_avoid, 208 .cong_avoid = tcp_veno_cong_avoid,
205 .rtt_sample = tcp_veno_rtt_calc, 209 .pkts_acked = tcp_veno_pkts_acked,
206 .set_state = tcp_veno_state, 210 .set_state = tcp_veno_state,
207 .cwnd_event = tcp_veno_cwnd_event, 211 .cwnd_event = tcp_veno_cwnd_event,
208 212
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c
index 4e1b61032a9c..e61e09dd513e 100644
--- a/net/ipv4/tcp_westwood.c
+++ b/net/ipv4/tcp_westwood.c
@@ -100,7 +100,7 @@ static void westwood_filter(struct westwood *w, u32 delta)
100 * Called after processing group of packets. 100 * Called after processing group of packets.
101 * but all westwood needs is the last sample of srtt. 101 * but all westwood needs is the last sample of srtt.
102 */ 102 */
103static void tcp_westwood_pkts_acked(struct sock *sk, u32 cnt) 103static void tcp_westwood_pkts_acked(struct sock *sk, u32 cnt, ktime_t last)
104{ 104{
105 struct westwood *w = inet_csk_ca(sk); 105 struct westwood *w = inet_csk_ca(sk);
106 if (cnt > 0) 106 if (cnt > 0)
@@ -226,7 +226,7 @@ static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event)
226 struct tcp_sock *tp = tcp_sk(sk); 226 struct tcp_sock *tp = tcp_sk(sk);
227 struct westwood *w = inet_csk_ca(sk); 227 struct westwood *w = inet_csk_ca(sk);
228 228
229 switch(event) { 229 switch (event) {
230 case CA_EVENT_FAST_ACK: 230 case CA_EVENT_FAST_ACK:
231 westwood_fast_bw(sk); 231 westwood_fast_bw(sk);
232 break; 232 break;
@@ -260,16 +260,13 @@ static void tcp_westwood_info(struct sock *sk, u32 ext,
260{ 260{
261 const struct westwood *ca = inet_csk_ca(sk); 261 const struct westwood *ca = inet_csk_ca(sk);
262 if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) { 262 if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
263 struct rtattr *rta; 263 struct tcpvegas_info info = {
264 struct tcpvegas_info *info; 264 .tcpv_enabled = 1,
265 265 .tcpv_rtt = jiffies_to_usecs(ca->rtt),
266 rta = __RTA_PUT(skb, INET_DIAG_VEGASINFO, sizeof(*info)); 266 .tcpv_minrtt = jiffies_to_usecs(ca->rtt_min),
267 info = RTA_DATA(rta); 267 };
268 info->tcpv_enabled = 1; 268
269 info->tcpv_rttcnt = 0; 269 nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info);
270 info->tcpv_rtt = jiffies_to_usecs(ca->rtt);
271 info->tcpv_minrtt = jiffies_to_usecs(ca->rtt_min);
272 rtattr_failure: ;
273 } 270 }
274} 271}
275 272
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
new file mode 100644
index 000000000000..545ed237ab53
--- /dev/null
+++ b/net/ipv4/tcp_yeah.c
@@ -0,0 +1,268 @@
1/*
2 *
3 * YeAH TCP
4 *
5 * For further details look at:
6 * http://wil.cs.caltech.edu/pfldnet2007/paper/YeAH_TCP.pdf
7 *
8 */
9#include <linux/mm.h>
10#include <linux/module.h>
11#include <linux/skbuff.h>
12#include <linux/inet_diag.h>
13
14#include <net/tcp.h>
15
16#include "tcp_vegas.h"
17
18#define TCP_YEAH_ALPHA 80 //lin number of packets queued at the bottleneck
19#define TCP_YEAH_GAMMA 1 //lin fraction of queue to be removed per rtt
20#define TCP_YEAH_DELTA 3 //log minimum fraction of cwnd to be removed on loss
21#define TCP_YEAH_EPSILON 1 //log maximum fraction to be removed on early decongestion
22#define TCP_YEAH_PHY 8 //lin maximum delta from base
23#define TCP_YEAH_RHO 16 //lin minumum number of consecutive rtt to consider competition on loss
24#define TCP_YEAH_ZETA 50 //lin minimum number of state switchs to reset reno_count
25
26#define TCP_SCALABLE_AI_CNT 100U
27
28/* YeAH variables */
29struct yeah {
30 struct vegas vegas; /* must be first */
31
32 /* YeAH */
33 u32 lastQ;
34 u32 doing_reno_now;
35
36 u32 reno_count;
37 u32 fast_count;
38
39 u32 pkts_acked;
40};
41
42static void tcp_yeah_init(struct sock *sk)
43{
44 struct tcp_sock *tp = tcp_sk(sk);
45 struct yeah *yeah = inet_csk_ca(sk);
46
47 tcp_vegas_init(sk);
48
49 yeah->doing_reno_now = 0;
50 yeah->lastQ = 0;
51
52 yeah->reno_count = 2;
53
54 /* Ensure the MD arithmetic works. This is somewhat pedantic,
55 * since I don't think we will see a cwnd this large. :) */
56 tp->snd_cwnd_clamp = min_t(u32, tp->snd_cwnd_clamp, 0xffffffff/128);
57
58}
59
60
61static void tcp_yeah_pkts_acked(struct sock *sk, u32 pkts_acked, ktime_t last)
62{
63 const struct inet_connection_sock *icsk = inet_csk(sk);
64 struct yeah *yeah = inet_csk_ca(sk);
65
66 if (icsk->icsk_ca_state == TCP_CA_Open)
67 yeah->pkts_acked = pkts_acked;
68
69 tcp_vegas_pkts_acked(sk, pkts_acked, last);
70}
71
72static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack,
73 u32 seq_rtt, u32 in_flight, int flag)
74{
75 struct tcp_sock *tp = tcp_sk(sk);
76 struct yeah *yeah = inet_csk_ca(sk);
77
78 if (!tcp_is_cwnd_limited(sk, in_flight))
79 return;
80
81 if (tp->snd_cwnd <= tp->snd_ssthresh)
82 tcp_slow_start(tp);
83
84 else if (!yeah->doing_reno_now) {
85 /* Scalable */
86
87 tp->snd_cwnd_cnt+=yeah->pkts_acked;
88 if (tp->snd_cwnd_cnt > min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT)){
89 if (tp->snd_cwnd < tp->snd_cwnd_clamp)
90 tp->snd_cwnd++;
91 tp->snd_cwnd_cnt = 0;
92 }
93
94 yeah->pkts_acked = 1;
95
96 } else {
97 /* Reno */
98
99 if (tp->snd_cwnd_cnt < tp->snd_cwnd)
100 tp->snd_cwnd_cnt++;
101
102 if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
103 tp->snd_cwnd++;
104 tp->snd_cwnd_cnt = 0;
105 }
106 }
107
108 /* The key players are v_vegas.beg_snd_una and v_beg_snd_nxt.
109 *
110 * These are so named because they represent the approximate values
111 * of snd_una and snd_nxt at the beginning of the current RTT. More
112 * precisely, they represent the amount of data sent during the RTT.
113 * At the end of the RTT, when we receive an ACK for v_beg_snd_nxt,
114 * we will calculate that (v_beg_snd_nxt - v_vegas.beg_snd_una) outstanding
115 * bytes of data have been ACKed during the course of the RTT, giving
116 * an "actual" rate of:
117 *
118 * (v_beg_snd_nxt - v_vegas.beg_snd_una) / (rtt duration)
119 *
120 * Unfortunately, v_vegas.beg_snd_una is not exactly equal to snd_una,
121 * because delayed ACKs can cover more than one segment, so they
122 * don't line up yeahly with the boundaries of RTTs.
123 *
124 * Another unfortunate fact of life is that delayed ACKs delay the
125 * advance of the left edge of our send window, so that the number
126 * of bytes we send in an RTT is often less than our cwnd will allow.
127 * So we keep track of our cwnd separately, in v_beg_snd_cwnd.
128 */
129
130 if (after(ack, yeah->vegas.beg_snd_nxt)) {
131
132 /* We do the Vegas calculations only if we got enough RTT
133 * samples that we can be reasonably sure that we got
134 * at least one RTT sample that wasn't from a delayed ACK.
135 * If we only had 2 samples total,
136 * then that means we're getting only 1 ACK per RTT, which
137 * means they're almost certainly delayed ACKs.
138 * If we have 3 samples, we should be OK.
139 */
140
141 if (yeah->vegas.cntRTT > 2) {
142 u32 rtt, queue;
143 u64 bw;
144
145 /* We have enough RTT samples, so, using the Vegas
146 * algorithm, we determine if we should increase or
147 * decrease cwnd, and by how much.
148 */
149
150 /* Pluck out the RTT we are using for the Vegas
151 * calculations. This is the min RTT seen during the
152 * last RTT. Taking the min filters out the effects
153 * of delayed ACKs, at the cost of noticing congestion
154 * a bit later.
155 */
156 rtt = yeah->vegas.minRTT;
157
158 /* Compute excess number of packets above bandwidth
159 * Avoid doing full 64 bit divide.
160 */
161 bw = tp->snd_cwnd;
162 bw *= rtt - yeah->vegas.baseRTT;
163 do_div(bw, rtt);
164 queue = bw;
165
166 if (queue > TCP_YEAH_ALPHA ||
167 rtt - yeah->vegas.baseRTT > (yeah->vegas.baseRTT / TCP_YEAH_PHY)) {
168 if (queue > TCP_YEAH_ALPHA
169 && tp->snd_cwnd > yeah->reno_count) {
170 u32 reduction = min(queue / TCP_YEAH_GAMMA ,
171 tp->snd_cwnd >> TCP_YEAH_EPSILON);
172
173 tp->snd_cwnd -= reduction;
174
175 tp->snd_cwnd = max(tp->snd_cwnd,
176 yeah->reno_count);
177
178 tp->snd_ssthresh = tp->snd_cwnd;
179 }
180
181 if (yeah->reno_count <= 2)
182 yeah->reno_count = max(tp->snd_cwnd>>1, 2U);
183 else
184 yeah->reno_count++;
185
186 yeah->doing_reno_now = min(yeah->doing_reno_now + 1,
187 0xffffffU);
188 } else {
189 yeah->fast_count++;
190
191 if (yeah->fast_count > TCP_YEAH_ZETA) {
192 yeah->reno_count = 2;
193 yeah->fast_count = 0;
194 }
195
196 yeah->doing_reno_now = 0;
197 }
198
199 yeah->lastQ = queue;
200
201 }
202
203 /* Save the extent of the current window so we can use this
204 * at the end of the next RTT.
205 */
206 yeah->vegas.beg_snd_una = yeah->vegas.beg_snd_nxt;
207 yeah->vegas.beg_snd_nxt = tp->snd_nxt;
208 yeah->vegas.beg_snd_cwnd = tp->snd_cwnd;
209
210 /* Wipe the slate clean for the next RTT. */
211 yeah->vegas.cntRTT = 0;
212 yeah->vegas.minRTT = 0x7fffffff;
213 }
214}
215
216static u32 tcp_yeah_ssthresh(struct sock *sk) {
217 const struct tcp_sock *tp = tcp_sk(sk);
218 struct yeah *yeah = inet_csk_ca(sk);
219 u32 reduction;
220
221 if (yeah->doing_reno_now < TCP_YEAH_RHO) {
222 reduction = yeah->lastQ;
223
224 reduction = min( reduction, max(tp->snd_cwnd>>1, 2U) );
225
226 reduction = max( reduction, tp->snd_cwnd >> TCP_YEAH_DELTA);
227 } else
228 reduction = max(tp->snd_cwnd>>1,2U);
229
230 yeah->fast_count = 0;
231 yeah->reno_count = max(yeah->reno_count>>1, 2U);
232
233 return tp->snd_cwnd - reduction;
234}
235
236static struct tcp_congestion_ops tcp_yeah = {
237 .flags = TCP_CONG_RTT_STAMP,
238 .init = tcp_yeah_init,
239 .ssthresh = tcp_yeah_ssthresh,
240 .cong_avoid = tcp_yeah_cong_avoid,
241 .min_cwnd = tcp_reno_min_cwnd,
242 .set_state = tcp_vegas_state,
243 .cwnd_event = tcp_vegas_cwnd_event,
244 .get_info = tcp_vegas_get_info,
245 .pkts_acked = tcp_yeah_pkts_acked,
246
247 .owner = THIS_MODULE,
248 .name = "yeah",
249};
250
251static int __init tcp_yeah_register(void)
252{
253 BUG_ON(sizeof(struct yeah) > ICSK_CA_PRIV_SIZE);
254 tcp_register_congestion_control(&tcp_yeah);
255 return 0;
256}
257
258static void __exit tcp_yeah_unregister(void)
259{
260 tcp_unregister_congestion_control(&tcp_yeah);
261}
262
263module_init(tcp_yeah_register);
264module_exit(tcp_yeah_unregister);
265
266MODULE_AUTHOR("Angelo P. Castellani");
267MODULE_LICENSE("GPL");
268MODULE_DESCRIPTION("YeAH TCP");
diff --git a/net/ipv4/tcp_yeah.h b/net/ipv4/tcp_yeah.h
new file mode 100644
index 000000000000..ed3b7198f23c
--- /dev/null
+++ b/net/ipv4/tcp_yeah.h
@@ -0,0 +1,7 @@
1#include <linux/mm.h>
2#include <linux/module.h>
3#include <linux/skbuff.h>
4#include <linux/inet_diag.h>
5#include <asm/div64.h>
6
7#include <net/tcp.h>
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index fc620a7c1db4..cec0f2cc49b7 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -175,7 +175,8 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum,
175 ; 175 ;
176 } 176 }
177 result = best; 177 result = best;
178 for(i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++, result += UDP_HTABLE_SIZE) { 178 for (i = 0; i < (1 << 16) / UDP_HTABLE_SIZE;
179 i++, result += UDP_HTABLE_SIZE) {
179 if (result > sysctl_local_port_range[1]) 180 if (result > sysctl_local_port_range[1])
180 result = sysctl_local_port_range[0] 181 result = sysctl_local_port_range[0]
181 + ((result - sysctl_local_port_range[0]) & 182 + ((result - sysctl_local_port_range[0]) &
@@ -212,13 +213,13 @@ fail:
212 return error; 213 return error;
213} 214}
214 215
215__inline__ int udp_get_port(struct sock *sk, unsigned short snum, 216int udp_get_port(struct sock *sk, unsigned short snum,
216 int (*scmp)(const struct sock *, const struct sock *)) 217 int (*scmp)(const struct sock *, const struct sock *))
217{ 218{
218 return __udp_lib_get_port(sk, snum, udp_hash, &udp_port_rover, scmp); 219 return __udp_lib_get_port(sk, snum, udp_hash, &udp_port_rover, scmp);
219} 220}
220 221
221inline int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) 222int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
222{ 223{
223 struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); 224 struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
224 225
@@ -270,10 +271,10 @@ static struct sock *__udp4_lib_lookup(__be32 saddr, __be16 sport,
270 continue; 271 continue;
271 score+=2; 272 score+=2;
272 } 273 }
273 if(score == 9) { 274 if (score == 9) {
274 result = sk; 275 result = sk;
275 break; 276 break;
276 } else if(score > badness) { 277 } else if (score > badness) {
277 result = sk; 278 result = sk;
278 badness = score; 279 badness = score;
279 } 280 }
@@ -329,8 +330,8 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[])
329 struct inet_sock *inet; 330 struct inet_sock *inet;
330 struct iphdr *iph = (struct iphdr*)skb->data; 331 struct iphdr *iph = (struct iphdr*)skb->data;
331 struct udphdr *uh = (struct udphdr*)(skb->data+(iph->ihl<<2)); 332 struct udphdr *uh = (struct udphdr*)(skb->data+(iph->ihl<<2));
332 int type = skb->h.icmph->type; 333 const int type = icmp_hdr(skb)->type;
333 int code = skb->h.icmph->code; 334 const int code = icmp_hdr(skb)->code;
334 struct sock *sk; 335 struct sock *sk;
335 int harderr; 336 int harderr;
336 int err; 337 int err;
@@ -390,7 +391,7 @@ out:
390 sock_put(sk); 391 sock_put(sk);
391} 392}
392 393
393__inline__ void udp_err(struct sk_buff *skb, u32 info) 394void udp_err(struct sk_buff *skb, u32 info)
394{ 395{
395 return __udp4_lib_err(skb, info, udp_hash); 396 return __udp4_lib_err(skb, info, udp_hash);
396} 397}
@@ -419,13 +420,14 @@ static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
419 __be32 src, __be32 dst, int len ) 420 __be32 src, __be32 dst, int len )
420{ 421{
421 unsigned int offset; 422 unsigned int offset;
422 struct udphdr *uh = skb->h.uh; 423 struct udphdr *uh = udp_hdr(skb);
423 __wsum csum = 0; 424 __wsum csum = 0;
424 425
425 if (skb_queue_len(&sk->sk_write_queue) == 1) { 426 if (skb_queue_len(&sk->sk_write_queue) == 1) {
426 /* 427 /*
427 * Only one fragment on the socket. 428 * Only one fragment on the socket.
428 */ 429 */
430 skb->csum_start = skb_transport_header(skb) - skb->head;
429 skb->csum_offset = offsetof(struct udphdr, check); 431 skb->csum_offset = offsetof(struct udphdr, check);
430 uh->check = ~csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, 0); 432 uh->check = ~csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, 0);
431 } else { 433 } else {
@@ -434,7 +436,7 @@ static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
434 * fragments on the socket so that all csums of sk_buffs 436 * fragments on the socket so that all csums of sk_buffs
435 * should be together 437 * should be together
436 */ 438 */
437 offset = skb->h.raw - skb->data; 439 offset = skb_transport_offset(skb);
438 skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); 440 skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
439 441
440 skb->ip_summed = CHECKSUM_NONE; 442 skb->ip_summed = CHECKSUM_NONE;
@@ -469,7 +471,7 @@ static int udp_push_pending_frames(struct sock *sk)
469 /* 471 /*
470 * Create a UDP header 472 * Create a UDP header
471 */ 473 */
472 uh = skb->h.uh; 474 uh = udp_hdr(skb);
473 uh->source = fl->fl_ip_sport; 475 uh->source = fl->fl_ip_sport;
474 uh->dest = fl->fl_ip_dport; 476 uh->dest = fl->fl_ip_dport;
475 uh->len = htons(up->len); 477 uh->len = htons(up->len);
@@ -765,38 +767,38 @@ out:
765 767
766int udp_ioctl(struct sock *sk, int cmd, unsigned long arg) 768int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
767{ 769{
768 switch(cmd) 770 switch (cmd) {
771 case SIOCOUTQ:
769 { 772 {
770 case SIOCOUTQ: 773 int amount = atomic_read(&sk->sk_wmem_alloc);
771 { 774 return put_user(amount, (int __user *)arg);
772 int amount = atomic_read(&sk->sk_wmem_alloc); 775 }
773 return put_user(amount, (int __user *)arg);
774 }
775 776
776 case SIOCINQ: 777 case SIOCINQ:
777 { 778 {
778 struct sk_buff *skb; 779 struct sk_buff *skb;
779 unsigned long amount; 780 unsigned long amount;
780 781
781 amount = 0; 782 amount = 0;
782 spin_lock_bh(&sk->sk_receive_queue.lock); 783 spin_lock_bh(&sk->sk_receive_queue.lock);
783 skb = skb_peek(&sk->sk_receive_queue); 784 skb = skb_peek(&sk->sk_receive_queue);
784 if (skb != NULL) { 785 if (skb != NULL) {
785 /* 786 /*
786 * We will only return the amount 787 * We will only return the amount
787 * of this packet since that is all 788 * of this packet since that is all
788 * that will be read. 789 * that will be read.
789 */ 790 */
790 amount = skb->len - sizeof(struct udphdr); 791 amount = skb->len - sizeof(struct udphdr);
791 }
792 spin_unlock_bh(&sk->sk_receive_queue.lock);
793 return put_user(amount, (int __user *)arg);
794 } 792 }
793 spin_unlock_bh(&sk->sk_receive_queue.lock);
794 return put_user(amount, (int __user *)arg);
795 }
795 796
796 default: 797 default:
797 return -ENOIOCTLCMD; 798 return -ENOIOCTLCMD;
798 } 799 }
799 return(0); 800
801 return 0;
800} 802}
801 803
802/* 804/*
@@ -810,7 +812,9 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
810 struct inet_sock *inet = inet_sk(sk); 812 struct inet_sock *inet = inet_sk(sk);
811 struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; 813 struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
812 struct sk_buff *skb; 814 struct sk_buff *skb;
813 int copied, err, copy_only, is_udplite = IS_UDPLITE(sk); 815 unsigned int ulen, copied;
816 int err;
817 int is_udplite = IS_UDPLITE(sk);
814 818
815 /* 819 /*
816 * Check any passed addresses 820 * Check any passed addresses
@@ -826,28 +830,25 @@ try_again:
826 if (!skb) 830 if (!skb)
827 goto out; 831 goto out;
828 832
829 copied = skb->len - sizeof(struct udphdr); 833 ulen = skb->len - sizeof(struct udphdr);
830 if (copied > len) { 834 copied = len;
831 copied = len; 835 if (copied > ulen)
836 copied = ulen;
837 else if (copied < ulen)
832 msg->msg_flags |= MSG_TRUNC; 838 msg->msg_flags |= MSG_TRUNC;
833 }
834 839
835 /* 840 /*
836 * Decide whether to checksum and/or copy data. 841 * If checksum is needed at all, try to do it while copying the
837 * 842 * data. If the data is truncated, or if we only want a partial
838 * UDP: checksum may have been computed in HW, 843 * coverage checksum (UDP-Lite), do it before the copy.
839 * (re-)compute it if message is truncated.
840 * UDP-Lite: always needs to checksum, no HW support.
841 */ 844 */
842 copy_only = (skb->ip_summed==CHECKSUM_UNNECESSARY);
843 845
844 if (is_udplite || (!copy_only && msg->msg_flags&MSG_TRUNC)) { 846 if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) {
845 if (__udp_lib_checksum_complete(skb)) 847 if (udp_lib_checksum_complete(skb))
846 goto csum_copy_err; 848 goto csum_copy_err;
847 copy_only = 1;
848 } 849 }
849 850
850 if (copy_only) 851 if (skb_csum_unnecessary(skb))
851 err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), 852 err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
852 msg->msg_iov, copied ); 853 msg->msg_iov, copied );
853 else { 854 else {
@@ -866,8 +867,8 @@ try_again:
866 if (sin) 867 if (sin)
867 { 868 {
868 sin->sin_family = AF_INET; 869 sin->sin_family = AF_INET;
869 sin->sin_port = skb->h.uh->source; 870 sin->sin_port = udp_hdr(skb)->source;
870 sin->sin_addr.s_addr = skb->nh.iph->saddr; 871 sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
871 memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); 872 memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
872 } 873 }
873 if (inet->cmsg_flags) 874 if (inet->cmsg_flags)
@@ -875,7 +876,7 @@ try_again:
875 876
876 err = copied; 877 err = copied;
877 if (flags & MSG_TRUNC) 878 if (flags & MSG_TRUNC)
878 err = skb->len - sizeof(struct udphdr); 879 err = ulen;
879 880
880out_free: 881out_free:
881 skb_free_datagram(sk, skb); 882 skb_free_datagram(sk, skb);
@@ -949,7 +950,7 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb)
949 return 1; 950 return 1;
950 951
951 /* Now we can get the pointers */ 952 /* Now we can get the pointers */
952 uh = skb->h.uh; 953 uh = udp_hdr(skb);
953 udpdata = (__u8 *)uh + sizeof(struct udphdr); 954 udpdata = (__u8 *)uh + sizeof(struct udphdr);
954 udpdata32 = (__be32 *)udpdata; 955 udpdata32 = (__be32 *)udpdata;
955 956
@@ -959,7 +960,7 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb)
959 /* Check if this is a keepalive packet. If so, eat it. */ 960 /* Check if this is a keepalive packet. If so, eat it. */
960 if (len == 1 && udpdata[0] == 0xff) { 961 if (len == 1 && udpdata[0] == 0xff) {
961 return 0; 962 return 0;
962 } else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0 ) { 963 } else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) {
963 /* ESP Packet without Non-ESP header */ 964 /* ESP Packet without Non-ESP header */
964 len = sizeof(struct udphdr); 965 len = sizeof(struct udphdr);
965 } else 966 } else
@@ -990,7 +991,7 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb)
990 return 0; 991 return 0;
991 992
992 /* Now we can update and verify the packet length... */ 993 /* Now we can update and verify the packet length... */
993 iph = skb->nh.iph; 994 iph = ip_hdr(skb);
994 iphlen = iph->ihl << 2; 995 iphlen = iph->ihl << 2;
995 iph->tot_len = htons(ntohs(iph->tot_len) - len); 996 iph->tot_len = htons(ntohs(iph->tot_len) - len);
996 if (skb->len < iphlen + len) { 997 if (skb->len < iphlen + len) {
@@ -1002,7 +1003,8 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb)
1002 * transport header to point to ESP. Keep UDP on the stack 1003 * transport header to point to ESP. Keep UDP on the stack
1003 * for later. 1004 * for later.
1004 */ 1005 */
1005 skb->h.raw = skb_pull(skb, len); 1006 __skb_pull(skb, len);
1007 skb_reset_transport_header(skb);
1006 1008
1007 /* modify the protocol (it's ESP!) */ 1009 /* modify the protocol (it's ESP!) */
1008 iph->protocol = IPPROTO_ESP; 1010 iph->protocol = IPPROTO_ESP;
@@ -1095,10 +1097,9 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
1095 } 1097 }
1096 } 1098 }
1097 1099
1098 if (sk->sk_filter && skb->ip_summed != CHECKSUM_UNNECESSARY) { 1100 if (sk->sk_filter) {
1099 if (__udp_lib_checksum_complete(skb)) 1101 if (udp_lib_checksum_complete(skb))
1100 goto drop; 1102 goto drop;
1101 skb->ip_summed = CHECKSUM_UNNECESSARY;
1102 } 1103 }
1103 1104
1104 if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { 1105 if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) {
@@ -1143,10 +1144,10 @@ static int __udp4_lib_mcast_deliver(struct sk_buff *skb,
1143 1144
1144 sknext = udp_v4_mcast_next(sk_next(sk), uh->dest, daddr, 1145 sknext = udp_v4_mcast_next(sk_next(sk), uh->dest, daddr,
1145 uh->source, saddr, dif); 1146 uh->source, saddr, dif);
1146 if(sknext) 1147 if (sknext)
1147 skb1 = skb_clone(skb, GFP_ATOMIC); 1148 skb1 = skb_clone(skb, GFP_ATOMIC);
1148 1149
1149 if(skb1) { 1150 if (skb1) {
1150 int ret = udp_queue_rcv_skb(sk, skb1); 1151 int ret = udp_queue_rcv_skb(sk, skb1);
1151 if (ret > 0) 1152 if (ret > 0)
1152 /* we should probably re-process instead 1153 /* we should probably re-process instead
@@ -1154,7 +1155,7 @@ static int __udp4_lib_mcast_deliver(struct sk_buff *skb,
1154 kfree_skb(skb1); 1155 kfree_skb(skb1);
1155 } 1156 }
1156 sk = sknext; 1157 sk = sknext;
1157 } while(sknext); 1158 } while (sknext);
1158 } else 1159 } else
1159 kfree_skb(skb); 1160 kfree_skb(skb);
1160 read_unlock(&udp_hash_lock); 1161 read_unlock(&udp_hash_lock);
@@ -1166,25 +1167,37 @@ static int __udp4_lib_mcast_deliver(struct sk_buff *skb,
1166 * Otherwise, csum completion requires chacksumming packet body, 1167 * Otherwise, csum completion requires chacksumming packet body,
1167 * including udp header and folding it to skb->csum. 1168 * including udp header and folding it to skb->csum.
1168 */ 1169 */
1169static inline void udp4_csum_init(struct sk_buff *skb, struct udphdr *uh) 1170static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh,
1171 int proto)
1170{ 1172{
1173 const struct iphdr *iph;
1174 int err;
1175
1176 UDP_SKB_CB(skb)->partial_cov = 0;
1177 UDP_SKB_CB(skb)->cscov = skb->len;
1178
1179 if (proto == IPPROTO_UDPLITE) {
1180 err = udplite_checksum_init(skb, uh);
1181 if (err)
1182 return err;
1183 }
1184
1185 iph = ip_hdr(skb);
1171 if (uh->check == 0) { 1186 if (uh->check == 0) {
1172 skb->ip_summed = CHECKSUM_UNNECESSARY; 1187 skb->ip_summed = CHECKSUM_UNNECESSARY;
1173 } else if (skb->ip_summed == CHECKSUM_COMPLETE) { 1188 } else if (skb->ip_summed == CHECKSUM_COMPLETE) {
1174 if (!csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr, 1189 if (!csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len,
1175 skb->len, IPPROTO_UDP, skb->csum )) 1190 proto, skb->csum))
1176 skb->ip_summed = CHECKSUM_UNNECESSARY; 1191 skb->ip_summed = CHECKSUM_UNNECESSARY;
1177 } 1192 }
1178 if (skb->ip_summed != CHECKSUM_UNNECESSARY) 1193 if (!skb_csum_unnecessary(skb))
1179 skb->csum = csum_tcpudp_nofold(skb->nh.iph->saddr, 1194 skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
1180 skb->nh.iph->daddr, 1195 skb->len, proto, 0);
1181 skb->len, IPPROTO_UDP, 0);
1182 /* Probably, we should checksum udp header (it should be in cache 1196 /* Probably, we should checksum udp header (it should be in cache
1183 * in any case) and data in tiny packets (< rx copybreak). 1197 * in any case) and data in tiny packets (< rx copybreak).
1184 */ 1198 */
1185 1199
1186 /* UDP = UDP-Lite with a non-partial checksum coverage */ 1200 return 0;
1187 UDP_SKB_CB(skb)->partial_cov = 0;
1188} 1201}
1189 1202
1190/* 1203/*
@@ -1192,14 +1205,14 @@ static inline void udp4_csum_init(struct sk_buff *skb, struct udphdr *uh)
1192 */ 1205 */
1193 1206
1194int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], 1207int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
1195 int is_udplite) 1208 int proto)
1196{ 1209{
1197 struct sock *sk; 1210 struct sock *sk;
1198 struct udphdr *uh = skb->h.uh; 1211 struct udphdr *uh = udp_hdr(skb);
1199 unsigned short ulen; 1212 unsigned short ulen;
1200 struct rtable *rt = (struct rtable*)skb->dst; 1213 struct rtable *rt = (struct rtable*)skb->dst;
1201 __be32 saddr = skb->nh.iph->saddr; 1214 __be32 saddr = ip_hdr(skb)->saddr;
1202 __be32 daddr = skb->nh.iph->daddr; 1215 __be32 daddr = ip_hdr(skb)->daddr;
1203 1216
1204 /* 1217 /*
1205 * Validate the packet. 1218 * Validate the packet.
@@ -1211,20 +1224,17 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
1211 if (ulen > skb->len) 1224 if (ulen > skb->len)
1212 goto short_packet; 1225 goto short_packet;
1213 1226
1214 if(! is_udplite ) { /* UDP validates ulen. */ 1227 if (proto == IPPROTO_UDP) {
1215 1228 /* UDP validates ulen. */
1216 if (ulen < sizeof(*uh) || pskb_trim_rcsum(skb, ulen)) 1229 if (ulen < sizeof(*uh) || pskb_trim_rcsum(skb, ulen))
1217 goto short_packet; 1230 goto short_packet;
1218 uh = skb->h.uh; 1231 uh = udp_hdr(skb);
1219
1220 udp4_csum_init(skb, uh);
1221
1222 } else { /* UDP-Lite validates cscov. */
1223 if (udplite4_csum_init(skb, uh))
1224 goto csum_error;
1225 } 1232 }
1226 1233
1227 if(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) 1234 if (udp4_csum_init(skb, uh, proto))
1235 goto csum_error;
1236
1237 if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
1228 return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable); 1238 return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable);
1229 1239
1230 sk = __udp4_lib_lookup(saddr, uh->source, daddr, uh->dest, 1240 sk = __udp4_lib_lookup(saddr, uh->source, daddr, uh->dest,
@@ -1250,7 +1260,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
1250 if (udp_lib_checksum_complete(skb)) 1260 if (udp_lib_checksum_complete(skb))
1251 goto csum_error; 1261 goto csum_error;
1252 1262
1253 UDP_INC_STATS_BH(UDP_MIB_NOPORTS, is_udplite); 1263 UDP_INC_STATS_BH(UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);
1254 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); 1264 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
1255 1265
1256 /* 1266 /*
@@ -1258,11 +1268,11 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
1258 * don't wanna listen. Ignore it. 1268 * don't wanna listen. Ignore it.
1259 */ 1269 */
1260 kfree_skb(skb); 1270 kfree_skb(skb);
1261 return(0); 1271 return 0;
1262 1272
1263short_packet: 1273short_packet:
1264 LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n", 1274 LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n",
1265 is_udplite? "-Lite" : "", 1275 proto == IPPROTO_UDPLITE ? "-Lite" : "",
1266 NIPQUAD(saddr), 1276 NIPQUAD(saddr),
1267 ntohs(uh->source), 1277 ntohs(uh->source),
1268 ulen, 1278 ulen,
@@ -1277,21 +1287,21 @@ csum_error:
1277 * the network is concerned, anyway) as per 4.1.3.4 (MUST). 1287 * the network is concerned, anyway) as per 4.1.3.4 (MUST).
1278 */ 1288 */
1279 LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n", 1289 LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n",
1280 is_udplite? "-Lite" : "", 1290 proto == IPPROTO_UDPLITE ? "-Lite" : "",
1281 NIPQUAD(saddr), 1291 NIPQUAD(saddr),
1282 ntohs(uh->source), 1292 ntohs(uh->source),
1283 NIPQUAD(daddr), 1293 NIPQUAD(daddr),
1284 ntohs(uh->dest), 1294 ntohs(uh->dest),
1285 ulen); 1295 ulen);
1286drop: 1296drop:
1287 UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite); 1297 UDP_INC_STATS_BH(UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
1288 kfree_skb(skb); 1298 kfree_skb(skb);
1289 return(0); 1299 return 0;
1290} 1300}
1291 1301
1292__inline__ int udp_rcv(struct sk_buff *skb) 1302int udp_rcv(struct sk_buff *skb)
1293{ 1303{
1294 return __udp4_lib_rcv(skb, udp_hash, 0); 1304 return __udp4_lib_rcv(skb, udp_hash, IPPROTO_UDP);
1295} 1305}
1296 1306
1297int udp_destroy_sock(struct sock *sk) 1307int udp_destroy_sock(struct sock *sk)
@@ -1313,13 +1323,13 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
1313 int val; 1323 int val;
1314 int err = 0; 1324 int err = 0;
1315 1325
1316 if(optlen<sizeof(int)) 1326 if (optlen<sizeof(int))
1317 return -EINVAL; 1327 return -EINVAL;
1318 1328
1319 if (get_user(val, (int __user *)optval)) 1329 if (get_user(val, (int __user *)optval))
1320 return -EFAULT; 1330 return -EFAULT;
1321 1331
1322 switch(optname) { 1332 switch (optname) {
1323 case UDP_CORK: 1333 case UDP_CORK:
1324 if (val != 0) { 1334 if (val != 0) {
1325 up->corkflag = 1; 1335 up->corkflag = 1;
@@ -1373,7 +1383,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
1373 default: 1383 default:
1374 err = -ENOPROTOOPT; 1384 err = -ENOPROTOOPT;
1375 break; 1385 break;
1376 }; 1386 }
1377 1387
1378 return err; 1388 return err;
1379} 1389}
@@ -1404,15 +1414,15 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
1404 struct udp_sock *up = udp_sk(sk); 1414 struct udp_sock *up = udp_sk(sk);
1405 int val, len; 1415 int val, len;
1406 1416
1407 if(get_user(len,optlen)) 1417 if (get_user(len,optlen))
1408 return -EFAULT; 1418 return -EFAULT;
1409 1419
1410 len = min_t(unsigned int, len, sizeof(int)); 1420 len = min_t(unsigned int, len, sizeof(int));
1411 1421
1412 if(len < 0) 1422 if (len < 0)
1413 return -EINVAL; 1423 return -EINVAL;
1414 1424
1415 switch(optname) { 1425 switch (optname) {
1416 case UDP_CORK: 1426 case UDP_CORK:
1417 val = up->corkflag; 1427 val = up->corkflag;
1418 break; 1428 break;
@@ -1433,11 +1443,11 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
1433 1443
1434 default: 1444 default:
1435 return -ENOPROTOOPT; 1445 return -ENOPROTOOPT;
1436 }; 1446 }
1437 1447
1438 if(put_user(len, optlen)) 1448 if (put_user(len, optlen))
1439 return -EFAULT; 1449 return -EFAULT;
1440 if(copy_to_user(optval, &val,len)) 1450 if (copy_to_user(optval, &val,len))
1441 return -EFAULT; 1451 return -EFAULT;
1442 return 0; 1452 return 0;
1443} 1453}
@@ -1486,15 +1496,11 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
1486 struct sk_buff *skb; 1496 struct sk_buff *skb;
1487 1497
1488 spin_lock_bh(&rcvq->lock); 1498 spin_lock_bh(&rcvq->lock);
1489 while ((skb = skb_peek(rcvq)) != NULL) { 1499 while ((skb = skb_peek(rcvq)) != NULL &&
1490 if (udp_lib_checksum_complete(skb)) { 1500 udp_lib_checksum_complete(skb)) {
1491 UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_lite); 1501 UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_lite);
1492 __skb_unlink(skb, rcvq); 1502 __skb_unlink(skb, rcvq);
1493 kfree_skb(skb); 1503 kfree_skb(skb);
1494 } else {
1495 skb->ip_summed = CHECKSUM_UNNECESSARY;
1496 break;
1497 }
1498 } 1504 }
1499 spin_unlock_bh(&rcvq->lock); 1505 spin_unlock_bh(&rcvq->lock);
1500 1506
@@ -1573,7 +1579,7 @@ static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos)
1573 struct sock *sk = udp_get_first(seq); 1579 struct sock *sk = udp_get_first(seq);
1574 1580
1575 if (sk) 1581 if (sk)
1576 while(pos && (sk = udp_get_next(seq, sk)) != NULL) 1582 while (pos && (sk = udp_get_next(seq, sk)) != NULL)
1577 --pos; 1583 --pos;
1578 return pos ? NULL : sk; 1584 return pos ? NULL : sk;
1579} 1585}
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index b28fe1edf98b..f34fd686a8f1 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -31,7 +31,7 @@ static int udplite_v4_get_port(struct sock *sk, unsigned short snum)
31 31
32static int udplite_rcv(struct sk_buff *skb) 32static int udplite_rcv(struct sk_buff *skb)
33{ 33{
34 return __udp4_lib_rcv(skb, udplite_hash, 1); 34 return __udp4_lib_rcv(skb, udplite_hash, IPPROTO_UDPLITE);
35} 35}
36 36
37static void udplite_err(struct sk_buff *skb, u32 info) 37static void udplite_err(struct sk_buff *skb, u32 info)
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index 78e80deb7e89..5ceca951d73f 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -28,7 +28,7 @@ static int xfrm4_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32
28 switch (nexthdr) { 28 switch (nexthdr) {
29 case IPPROTO_IPIP: 29 case IPPROTO_IPIP:
30 case IPPROTO_IPV6: 30 case IPPROTO_IPV6:
31 *spi = skb->nh.iph->saddr; 31 *spi = ip_hdr(skb)->saddr;
32 *seq = 0; 32 *seq = 0;
33 return 0; 33 return 0;
34 } 34 }
@@ -39,9 +39,9 @@ static int xfrm4_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32
39#ifdef CONFIG_NETFILTER 39#ifdef CONFIG_NETFILTER
40static inline int xfrm4_rcv_encap_finish(struct sk_buff *skb) 40static inline int xfrm4_rcv_encap_finish(struct sk_buff *skb)
41{ 41{
42 struct iphdr *iph = skb->nh.iph;
43
44 if (skb->dst == NULL) { 42 if (skb->dst == NULL) {
43 const struct iphdr *iph = ip_hdr(skb);
44
45 if (ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, 45 if (ip_route_input(skb, iph->daddr, iph->saddr, iph->tos,
46 skb->dev)) 46 skb->dev))
47 goto drop; 47 goto drop;
@@ -55,18 +55,18 @@ drop:
55 55
56int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type) 56int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
57{ 57{
58 int err;
59 __be32 spi, seq; 58 __be32 spi, seq;
60 struct xfrm_state *xfrm_vec[XFRM_MAX_DEPTH]; 59 struct xfrm_state *xfrm_vec[XFRM_MAX_DEPTH];
61 struct xfrm_state *x; 60 struct xfrm_state *x;
62 int xfrm_nr = 0; 61 int xfrm_nr = 0;
63 int decaps = 0; 62 int decaps = 0;
63 int err = xfrm4_parse_spi(skb, ip_hdr(skb)->protocol, &spi, &seq);
64 64
65 if ((err = xfrm4_parse_spi(skb, skb->nh.iph->protocol, &spi, &seq)) != 0) 65 if (err != 0)
66 goto drop; 66 goto drop;
67 67
68 do { 68 do {
69 struct iphdr *iph = skb->nh.iph; 69 const struct iphdr *iph = ip_hdr(skb);
70 70
71 if (xfrm_nr == XFRM_MAX_DEPTH) 71 if (xfrm_nr == XFRM_MAX_DEPTH)
72 goto drop; 72 goto drop;
@@ -113,7 +113,8 @@ int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
113 break; 113 break;
114 } 114 }
115 115
116 if ((err = xfrm_parse_spi(skb, skb->nh.iph->protocol, &spi, &seq)) < 0) 116 err = xfrm_parse_spi(skb, ip_hdr(skb)->protocol, &spi, &seq);
117 if (err < 0)
117 goto drop; 118 goto drop;
118 } while (!err); 119 } while (!err);
119 120
@@ -146,15 +147,15 @@ int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
146 return 0; 147 return 0;
147 } else { 148 } else {
148#ifdef CONFIG_NETFILTER 149#ifdef CONFIG_NETFILTER
149 __skb_push(skb, skb->data - skb->nh.raw); 150 __skb_push(skb, skb->data - skb_network_header(skb));
150 skb->nh.iph->tot_len = htons(skb->len); 151 ip_hdr(skb)->tot_len = htons(skb->len);
151 ip_send_check(skb->nh.iph); 152 ip_send_check(ip_hdr(skb));
152 153
153 NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, skb->dev, NULL, 154 NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, skb->dev, NULL,
154 xfrm4_rcv_encap_finish); 155 xfrm4_rcv_encap_finish);
155 return 0; 156 return 0;
156#else 157#else
157 return -skb->nh.iph->protocol; 158 return -ip_hdr(skb)->protocol;
158#endif 159#endif
159 } 160 }
160 161
diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c
index d419e15d9803..a73e710740c2 100644
--- a/net/ipv4/xfrm4_mode_beet.c
+++ b/net/ipv4/xfrm4_mode_beet.c
@@ -29,20 +29,21 @@
29 */ 29 */
30static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb) 30static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb)
31{ 31{
32 struct iphdr *iph, *top_iph = NULL; 32 struct iphdr *iph, *top_iph;
33 int hdrlen, optlen; 33 int hdrlen, optlen;
34 34
35 iph = skb->nh.iph; 35 iph = ip_hdr(skb);
36 skb->h.ipiph = iph; 36 skb->transport_header = skb->network_header;
37 37
38 hdrlen = 0; 38 hdrlen = 0;
39 optlen = iph->ihl * 4 - sizeof(*iph); 39 optlen = iph->ihl * 4 - sizeof(*iph);
40 if (unlikely(optlen)) 40 if (unlikely(optlen))
41 hdrlen += IPV4_BEET_PHMAXLEN - (optlen & 4); 41 hdrlen += IPV4_BEET_PHMAXLEN - (optlen & 4);
42 42
43 skb->nh.raw = skb_push(skb, x->props.header_len + hdrlen); 43 skb_push(skb, x->props.header_len - IPV4_BEET_PHMAXLEN + hdrlen);
44 top_iph = skb->nh.iph; 44 skb_reset_network_header(skb);
45 skb->h.raw += sizeof(*iph) - hdrlen; 45 top_iph = ip_hdr(skb);
46 skb->transport_header += sizeof(*iph) - hdrlen;
46 47
47 memmove(top_iph, iph, sizeof(*iph)); 48 memmove(top_iph, iph, sizeof(*iph));
48 if (unlikely(optlen)) { 49 if (unlikely(optlen)) {
@@ -50,7 +51,7 @@ static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb)
50 51
51 BUG_ON(optlen < 0); 52 BUG_ON(optlen < 0);
52 53
53 ph = (struct ip_beet_phdr *)skb->h.raw; 54 ph = (struct ip_beet_phdr *)skb_transport_header(skb);
54 ph->padlen = 4 - (optlen & 4); 55 ph->padlen = 4 - (optlen & 4);
55 ph->hdrlen = optlen / 8; 56 ph->hdrlen = optlen / 8;
56 ph->nexthdr = top_iph->protocol; 57 ph->nexthdr = top_iph->protocol;
@@ -69,20 +70,18 @@ static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb)
69 70
70static int xfrm4_beet_input(struct xfrm_state *x, struct sk_buff *skb) 71static int xfrm4_beet_input(struct xfrm_state *x, struct sk_buff *skb)
71{ 72{
72 struct iphdr *iph = skb->nh.iph; 73 struct iphdr *iph = ip_hdr(skb);
73 int phlen = 0; 74 int phlen = 0;
74 int optlen = 0; 75 int optlen = 0;
75 __u8 ph_nexthdr = 0, protocol = 0; 76 u8 ph_nexthdr = 0;
76 int err = -EINVAL; 77 int err = -EINVAL;
77 78
78 protocol = iph->protocol;
79
80 if (unlikely(iph->protocol == IPPROTO_BEETPH)) { 79 if (unlikely(iph->protocol == IPPROTO_BEETPH)) {
81 struct ip_beet_phdr *ph; 80 struct ip_beet_phdr *ph;
82 81
83 if (!pskb_may_pull(skb, sizeof(*ph))) 82 if (!pskb_may_pull(skb, sizeof(*ph)))
84 goto out; 83 goto out;
85 ph = (struct ip_beet_phdr *)(skb->h.ipiph + 1); 84 ph = (struct ip_beet_phdr *)(ipip_hdr(skb) + 1);
86 85
87 phlen = sizeof(*ph) + ph->padlen; 86 phlen = sizeof(*ph) + ph->padlen;
88 optlen = ph->hdrlen * 8 + (IPV4_BEET_PHMAXLEN - phlen); 87 optlen = ph->hdrlen * 8 + (IPV4_BEET_PHMAXLEN - phlen);
@@ -96,22 +95,20 @@ static int xfrm4_beet_input(struct xfrm_state *x, struct sk_buff *skb)
96 ph_nexthdr = ph->nexthdr; 95 ph_nexthdr = ph->nexthdr;
97 } 96 }
98 97
99 skb->nh.raw = skb->data + (phlen - sizeof(*iph)); 98 skb_set_network_header(skb, phlen - sizeof(*iph));
100 memmove(skb->nh.raw, iph, sizeof(*iph)); 99 memmove(skb_network_header(skb), iph, sizeof(*iph));
101 skb->h.raw = skb->data + (phlen + optlen); 100 skb_set_transport_header(skb, phlen + optlen);
102 skb->data = skb->h.raw; 101 skb->data = skb_transport_header(skb);
103 102
104 iph = skb->nh.iph; 103 iph = ip_hdr(skb);
105 iph->ihl = (sizeof(*iph) + optlen) / 4; 104 iph->ihl = (sizeof(*iph) + optlen) / 4;
106 iph->tot_len = htons(skb->len + iph->ihl * 4); 105 iph->tot_len = htons(skb->len + iph->ihl * 4);
107 iph->daddr = x->sel.daddr.a4; 106 iph->daddr = x->sel.daddr.a4;
108 iph->saddr = x->sel.saddr.a4; 107 iph->saddr = x->sel.saddr.a4;
109 if (ph_nexthdr) 108 if (ph_nexthdr)
110 iph->protocol = ph_nexthdr; 109 iph->protocol = ph_nexthdr;
111 else
112 iph->protocol = protocol;
113 iph->check = 0; 110 iph->check = 0;
114 iph->check = ip_fast_csum(skb->nh.raw, iph->ihl); 111 iph->check = ip_fast_csum(skb_network_header(skb), iph->ihl);
115 err = 0; 112 err = 0;
116out: 113out:
117 return err; 114 return err;
diff --git a/net/ipv4/xfrm4_mode_transport.c b/net/ipv4/xfrm4_mode_transport.c
index 92676b7e4034..601047161ea6 100644
--- a/net/ipv4/xfrm4_mode_transport.c
+++ b/net/ipv4/xfrm4_mode_transport.c
@@ -23,16 +23,13 @@
23 */ 23 */
24static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb) 24static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb)
25{ 25{
26 struct iphdr *iph; 26 struct iphdr *iph = ip_hdr(skb);
27 int ihl; 27 int ihl = iph->ihl * 4;
28 28
29 iph = skb->nh.iph; 29 skb->transport_header = skb->network_header + ihl;
30 skb->h.ipiph = iph; 30 skb_push(skb, x->props.header_len);
31 31 skb_reset_network_header(skb);
32 ihl = iph->ihl * 4; 32 memmove(skb_network_header(skb), iph, ihl);
33 skb->h.raw += ihl;
34
35 skb->nh.raw = memmove(skb_push(skb, x->props.header_len), iph, ihl);
36 return 0; 33 return 0;
37} 34}
38 35
@@ -46,12 +43,15 @@ static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb)
46 */ 43 */
47static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb) 44static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb)
48{ 45{
49 int ihl = skb->data - skb->h.raw; 46 int ihl = skb->data - skb_transport_header(skb);
50 47
51 if (skb->h.raw != skb->nh.raw) 48 if (skb->transport_header != skb->network_header) {
52 skb->nh.raw = memmove(skb->h.raw, skb->nh.raw, ihl); 49 memmove(skb_transport_header(skb),
53 skb->nh.iph->tot_len = htons(skb->len + ihl); 50 skb_network_header(skb), ihl);
54 skb->h.raw = skb->data; 51 skb->network_header = skb->transport_header;
52 }
53 ip_hdr(skb)->tot_len = htons(skb->len + ihl);
54 skb_reset_transport_header(skb);
55 return 0; 55 return 0;
56} 56}
57 57
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index ceb4376f572a..a2f2e6a5ec5d 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -16,8 +16,8 @@
16 16
17static inline void ipip_ecn_decapsulate(struct sk_buff *skb) 17static inline void ipip_ecn_decapsulate(struct sk_buff *skb)
18{ 18{
19 struct iphdr *outer_iph = skb->nh.iph; 19 struct iphdr *outer_iph = ip_hdr(skb);
20 struct iphdr *inner_iph = skb->h.ipiph; 20 struct iphdr *inner_iph = ipip_hdr(skb);
21 21
22 if (INET_ECN_is_ce(outer_iph->tos)) 22 if (INET_ECN_is_ce(outer_iph->tos))
23 IP_ECN_set_ce(inner_iph); 23 IP_ECN_set_ce(inner_iph);
@@ -26,7 +26,7 @@ static inline void ipip_ecn_decapsulate(struct sk_buff *skb)
26static inline void ipip6_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb) 26static inline void ipip6_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
27{ 27{
28 if (INET_ECN_is_ce(iph->tos)) 28 if (INET_ECN_is_ce(iph->tos))
29 IP6_ECN_set_ce(skb->nh.ipv6h); 29 IP6_ECN_set_ce(ipv6_hdr(skb));
30} 30}
31 31
32/* Add encapsulation header. 32/* Add encapsulation header.
@@ -46,11 +46,12 @@ static int xfrm4_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
46 struct iphdr *iph, *top_iph; 46 struct iphdr *iph, *top_iph;
47 int flags; 47 int flags;
48 48
49 iph = skb->nh.iph; 49 iph = ip_hdr(skb);
50 skb->h.ipiph = iph; 50 skb->transport_header = skb->network_header;
51 51
52 skb->nh.raw = skb_push(skb, x->props.header_len); 52 skb_push(skb, x->props.header_len);
53 top_iph = skb->nh.iph; 53 skb_reset_network_header(skb);
54 top_iph = ip_hdr(skb);
54 55
55 top_iph->ihl = 5; 56 top_iph->ihl = 5;
56 top_iph->version = 4; 57 top_iph->version = 4;
@@ -90,10 +91,11 @@ static int xfrm4_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
90 91
91static int xfrm4_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) 92static int xfrm4_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
92{ 93{
93 struct iphdr *iph = skb->nh.iph; 94 struct iphdr *iph = ip_hdr(skb);
95 const unsigned char *old_mac;
94 int err = -EINVAL; 96 int err = -EINVAL;
95 97
96 switch(iph->protocol){ 98 switch (iph->protocol){
97 case IPPROTO_IPIP: 99 case IPPROTO_IPIP:
98 break; 100 break;
99#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) 101#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
@@ -111,10 +113,10 @@ static int xfrm4_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
111 (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) 113 (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
112 goto out; 114 goto out;
113 115
114 iph = skb->nh.iph; 116 iph = ip_hdr(skb);
115 if (iph->protocol == IPPROTO_IPIP) { 117 if (iph->protocol == IPPROTO_IPIP) {
116 if (x->props.flags & XFRM_STATE_DECAP_DSCP) 118 if (x->props.flags & XFRM_STATE_DECAP_DSCP)
117 ipv4_copy_dscp(iph, skb->h.ipiph); 119 ipv4_copy_dscp(iph, ipip_hdr(skb));
118 if (!(x->props.flags & XFRM_STATE_NOECN)) 120 if (!(x->props.flags & XFRM_STATE_NOECN))
119 ipip_ecn_decapsulate(skb); 121 ipip_ecn_decapsulate(skb);
120 } 122 }
@@ -125,9 +127,10 @@ static int xfrm4_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
125 skb->protocol = htons(ETH_P_IPV6); 127 skb->protocol = htons(ETH_P_IPV6);
126 } 128 }
127#endif 129#endif
128 skb->mac.raw = memmove(skb->data - skb->mac_len, 130 old_mac = skb_mac_header(skb);
129 skb->mac.raw, skb->mac_len); 131 skb_set_mac_header(skb, -skb->mac_len);
130 skb->nh.raw = skb->data; 132 memmove(skb_mac_header(skb), old_mac, skb->mac_len);
133 skb_reset_network_header(skb);
131 err = 0; 134 err = 0;
132 135
133out: 136out:
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 038ca160fe2c..44ef208a75cb 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -22,14 +22,13 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb)
22{ 22{
23 int mtu, ret = 0; 23 int mtu, ret = 0;
24 struct dst_entry *dst; 24 struct dst_entry *dst;
25 struct iphdr *iph = skb->nh.iph;
26 25
27 if (IPCB(skb)->flags & IPSKB_XFRM_TUNNEL_SIZE) 26 if (IPCB(skb)->flags & IPSKB_XFRM_TUNNEL_SIZE)
28 goto out; 27 goto out;
29 28
30 IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE; 29 IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE;
31 30
32 if (!(iph->frag_off & htons(IP_DF)) || skb->local_df) 31 if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->local_df)
33 goto out; 32 goto out;
34 33
35 dst = skb->dst; 34 dst = skb->dst;
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 5d51a2af34c1..4ff8ed30024f 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -119,7 +119,7 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
119 119
120 if (xfrm[i]->props.mode == XFRM_MODE_TUNNEL) { 120 if (xfrm[i]->props.mode == XFRM_MODE_TUNNEL) {
121 unsigned short encap_family = xfrm[i]->props.family; 121 unsigned short encap_family = xfrm[i]->props.family;
122 switch(encap_family) { 122 switch (encap_family) {
123 case AF_INET: 123 case AF_INET:
124 fl_tunnel.fl4_dst = xfrm[i]->id.daddr.a4; 124 fl_tunnel.fl4_dst = xfrm[i]->id.daddr.a4;
125 fl_tunnel.fl4_src = xfrm[i]->props.saddr.a4; 125 fl_tunnel.fl4_src = xfrm[i]->props.saddr.a4;
@@ -209,8 +209,8 @@ error:
209static void 209static void
210_decode_session4(struct sk_buff *skb, struct flowi *fl) 210_decode_session4(struct sk_buff *skb, struct flowi *fl)
211{ 211{
212 struct iphdr *iph = skb->nh.iph; 212 struct iphdr *iph = ip_hdr(skb);
213 u8 *xprth = skb->nh.raw + iph->ihl*4; 213 u8 *xprth = skb_network_header(skb) + iph->ihl * 4;
214 214
215 memset(fl, 0, sizeof(struct flowi)); 215 memset(fl, 0, sizeof(struct flowi));
216 if (!(iph->frag_off & htons(IP_MF | IP_OFFSET))) { 216 if (!(iph->frag_off & htons(IP_MF | IP_OFFSET))) {
@@ -263,7 +263,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl)
263 default: 263 default:
264 fl->fl_ipsec_spi = 0; 264 fl->fl_ipsec_spi = 0;
265 break; 265 break;
266 }; 266 }
267 } 267 }
268 fl->proto = iph->protocol; 268 fl->proto = iph->protocol;
269 fl->fl4_dst = iph->daddr; 269 fl->fl4_dst = iph->daddr;
diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c
index 3eef06454da9..568510304553 100644
--- a/net/ipv4/xfrm4_tunnel.c
+++ b/net/ipv4/xfrm4_tunnel.c
@@ -12,9 +12,8 @@
12 12
13static int ipip_output(struct xfrm_state *x, struct sk_buff *skb) 13static int ipip_output(struct xfrm_state *x, struct sk_buff *skb)
14{ 14{
15 struct iphdr *iph; 15 struct iphdr *iph = ip_hdr(skb);
16 16
17 iph = skb->nh.iph;
18 iph->tot_len = htons(skb->len); 17 iph->tot_len = htons(skb->len);
19 ip_send_check(iph); 18 ip_send_check(iph);
20 19
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 79682efb14be..8e5d54f23b49 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -57,6 +57,16 @@ config IPV6_ROUTE_INFO
57 57
58 If unsure, say N. 58 If unsure, say N.
59 59
60config IPV6_OPTIMISTIC_DAD
61 bool "IPv6: Enable RFC 4429 Optimistic DAD (EXPERIMENTAL)"
62 depends on IPV6 && EXPERIMENTAL
63 ---help---
64 This is experimental support for optimistic Duplicate
65 Address Detection. It allows for autoconfigured addresses
66 to be used more quickly.
67
68 If unsure, say N.
69
60config INET6_AH 70config INET6_AH
61 tristate "IPv6: AH transformation" 71 tristate "IPv6: AH transformation"
62 depends on IPV6 72 depends on IPV6
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index d460017bb353..bb33309044c9 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -7,14 +7,15 @@ obj-$(CONFIG_IPV6) += ipv6.o
7ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \ 7ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \
8 route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \ 8 route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \
9 raw.o protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \ 9 raw.o protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \
10 exthdrs.o sysctl_net_ipv6.o datagram.o proc.o \ 10 exthdrs.o sysctl_net_ipv6.o datagram.o \
11 ip6_flowlabel.o ipv6_syms.o inet6_connection_sock.o 11 ip6_flowlabel.o inet6_connection_sock.o
12 12
13ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \ 13ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \
14 xfrm6_output.o 14 xfrm6_output.o
15ipv6-$(CONFIG_NETFILTER) += netfilter.o 15ipv6-$(CONFIG_NETFILTER) += netfilter.o
16ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o 16ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o
17ipv6-$(CONFIG_IPV6_MIP6) += mip6.o 17ipv6-$(CONFIG_IPV6_MIP6) += mip6.o
18ipv6-$(CONFIG_PROC_FS) += proc.o
18 19
19ipv6-objs += $(ipv6-y) 20ipv6-objs += $(ipv6-y)
20 21
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 452a82ce4796..e04e49373505 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -81,6 +81,7 @@
81#endif 81#endif
82 82
83#include <asm/uaccess.h> 83#include <asm/uaccess.h>
84#include <asm/unaligned.h>
84 85
85#include <linux/proc_fs.h> 86#include <linux/proc_fs.h>
86#include <linux/seq_file.h> 87#include <linux/seq_file.h>
@@ -208,9 +209,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
208}; 209};
209 210
210/* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */ 211/* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */
211#if 0
212const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT; 212const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
213#endif
214const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT; 213const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT;
215 214
216static void addrconf_del_timer(struct inet6_ifaddr *ifp) 215static void addrconf_del_timer(struct inet6_ifaddr *ifp)
@@ -246,6 +245,37 @@ static void addrconf_mod_timer(struct inet6_ifaddr *ifp,
246 add_timer(&ifp->timer); 245 add_timer(&ifp->timer);
247} 246}
248 247
248static int snmp6_alloc_dev(struct inet6_dev *idev)
249{
250 int err = -ENOMEM;
251
252 if (!idev || !idev->dev)
253 return -EINVAL;
254
255 if (snmp_mib_init((void **)idev->stats.ipv6,
256 sizeof(struct ipstats_mib),
257 __alignof__(struct ipstats_mib)) < 0)
258 goto err_ip;
259 if (snmp_mib_init((void **)idev->stats.icmpv6,
260 sizeof(struct icmpv6_mib),
261 __alignof__(struct icmpv6_mib)) < 0)
262 goto err_icmp;
263
264 return 0;
265
266err_icmp:
267 snmp_mib_free((void **)idev->stats.ipv6);
268err_ip:
269 return err;
270}
271
272static int snmp6_free_dev(struct inet6_dev *idev)
273{
274 snmp_mib_free((void **)idev->stats.icmpv6);
275 snmp_mib_free((void **)idev->stats.ipv6);
276 return 0;
277}
278
249/* Nobody refers to this device, we may destroy it. */ 279/* Nobody refers to this device, we may destroy it. */
250 280
251static void in6_dev_finish_destroy_rcu(struct rcu_head *head) 281static void in6_dev_finish_destroy_rcu(struct rcu_head *head)
@@ -271,6 +301,8 @@ void in6_dev_finish_destroy(struct inet6_dev *idev)
271 call_rcu(&idev->rcu, in6_dev_finish_destroy_rcu); 301 call_rcu(&idev->rcu, in6_dev_finish_destroy_rcu);
272} 302}
273 303
304EXPORT_SYMBOL(in6_dev_finish_destroy);
305
274static struct inet6_dev * ipv6_add_dev(struct net_device *dev) 306static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
275{ 307{
276 struct inet6_dev *ndev; 308 struct inet6_dev *ndev;
@@ -528,6 +560,16 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
528 560
529 ifa->rt = rt; 561 ifa->rt = rt;
530 562
563 /*
564 * part one of RFC 4429, section 3.3
565 * We should not configure an address as
566 * optimistic if we do not yet know the link
567 * layer address of our nexhop router
568 */
569
570 if (rt->rt6i_nexthop == NULL)
571 ifa->flags &= ~IFA_F_OPTIMISTIC;
572
531 ifa->idev = idev; 573 ifa->idev = idev;
532 in6_dev_hold(idev); 574 in6_dev_hold(idev);
533 /* For caller */ 575 /* For caller */
@@ -704,6 +746,7 @@ static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *i
704 int tmp_plen; 746 int tmp_plen;
705 int ret = 0; 747 int ret = 0;
706 int max_addresses; 748 int max_addresses;
749 u32 addr_flags;
707 750
708 write_lock(&idev->lock); 751 write_lock(&idev->lock);
709 if (ift) { 752 if (ift) {
@@ -761,10 +804,17 @@ retry:
761 spin_unlock_bh(&ifp->lock); 804 spin_unlock_bh(&ifp->lock);
762 805
763 write_unlock(&idev->lock); 806 write_unlock(&idev->lock);
807
808 addr_flags = IFA_F_TEMPORARY;
809 /* set in addrconf_prefix_rcv() */
810 if (ifp->flags & IFA_F_OPTIMISTIC)
811 addr_flags |= IFA_F_OPTIMISTIC;
812
764 ift = !max_addresses || 813 ift = !max_addresses ||
765 ipv6_count_addresses(idev) < max_addresses ? 814 ipv6_count_addresses(idev) < max_addresses ?
766 ipv6_add_addr(idev, &addr, tmp_plen, 815 ipv6_add_addr(idev, &addr, tmp_plen,
767 ipv6_addr_type(&addr)&IPV6_ADDR_SCOPE_MASK, IFA_F_TEMPORARY) : NULL; 816 ipv6_addr_type(&addr)&IPV6_ADDR_SCOPE_MASK,
817 addr_flags) : NULL;
768 if (!ift || IS_ERR(ift)) { 818 if (!ift || IS_ERR(ift)) {
769 in6_ifa_put(ifp); 819 in6_ifa_put(ifp);
770 in6_dev_put(idev); 820 in6_dev_put(idev);
@@ -896,13 +946,14 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev,
896 * - Tentative Address (RFC2462 section 5.4) 946 * - Tentative Address (RFC2462 section 5.4)
897 * - A tentative address is not considered 947 * - A tentative address is not considered
898 * "assigned to an interface" in the traditional 948 * "assigned to an interface" in the traditional
899 * sense. 949 * sense, unless it is also flagged as optimistic.
900 * - Candidate Source Address (section 4) 950 * - Candidate Source Address (section 4)
901 * - In any case, anycast addresses, multicast 951 * - In any case, anycast addresses, multicast
902 * addresses, and the unspecified address MUST 952 * addresses, and the unspecified address MUST
903 * NOT be included in a candidate set. 953 * NOT be included in a candidate set.
904 */ 954 */
905 if (ifa->flags & IFA_F_TENTATIVE) 955 if ((ifa->flags & IFA_F_TENTATIVE) &&
956 (!(ifa->flags & IFA_F_OPTIMISTIC)))
906 continue; 957 continue;
907 if (unlikely(score.addr_type == IPV6_ADDR_ANY || 958 if (unlikely(score.addr_type == IPV6_ADDR_ANY ||
908 score.addr_type & IPV6_ADDR_MULTICAST)) { 959 score.addr_type & IPV6_ADDR_MULTICAST)) {
@@ -961,15 +1012,17 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev,
961 } 1012 }
962 } 1013 }
963 1014
964 /* Rule 3: Avoid deprecated address */ 1015 /* Rule 3: Avoid deprecated and optimistic addresses */
965 if (hiscore.rule < 3) { 1016 if (hiscore.rule < 3) {
966 if (ipv6_saddr_preferred(hiscore.addr_type) || 1017 if (ipv6_saddr_preferred(hiscore.addr_type) ||
967 !(ifa_result->flags & IFA_F_DEPRECATED)) 1018 (((ifa_result->flags &
1019 (IFA_F_DEPRECATED|IFA_F_OPTIMISTIC)) == 0)))
968 hiscore.attrs |= IPV6_SADDR_SCORE_PREFERRED; 1020 hiscore.attrs |= IPV6_SADDR_SCORE_PREFERRED;
969 hiscore.rule++; 1021 hiscore.rule++;
970 } 1022 }
971 if (ipv6_saddr_preferred(score.addr_type) || 1023 if (ipv6_saddr_preferred(score.addr_type) ||
972 !(ifa->flags & IFA_F_DEPRECATED)) { 1024 (((ifa_result->flags &
1025 (IFA_F_DEPRECATED|IFA_F_OPTIMISTIC)) == 0))) {
973 score.attrs |= IPV6_SADDR_SCORE_PREFERRED; 1026 score.attrs |= IPV6_SADDR_SCORE_PREFERRED;
974 if (!(hiscore.attrs & IPV6_SADDR_SCORE_PREFERRED)) { 1027 if (!(hiscore.attrs & IPV6_SADDR_SCORE_PREFERRED)) {
975 score.rule = 3; 1028 score.rule = 3;
@@ -1107,8 +1160,10 @@ int ipv6_get_saddr(struct dst_entry *dst,
1107 return ipv6_dev_get_saddr(dst ? ip6_dst_idev(dst)->dev : NULL, daddr, saddr); 1160 return ipv6_dev_get_saddr(dst ? ip6_dst_idev(dst)->dev : NULL, daddr, saddr);
1108} 1161}
1109 1162
1163EXPORT_SYMBOL(ipv6_get_saddr);
1110 1164
1111int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr) 1165int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
1166 unsigned char banned_flags)
1112{ 1167{
1113 struct inet6_dev *idev; 1168 struct inet6_dev *idev;
1114 int err = -EADDRNOTAVAIL; 1169 int err = -EADDRNOTAVAIL;
@@ -1119,7 +1174,7 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr)
1119 1174
1120 read_lock_bh(&idev->lock); 1175 read_lock_bh(&idev->lock);
1121 for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) { 1176 for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) {
1122 if (ifp->scope == IFA_LINK && !(ifp->flags&IFA_F_TENTATIVE)) { 1177 if (ifp->scope == IFA_LINK && !(ifp->flags & banned_flags)) {
1123 ipv6_addr_copy(addr, &ifp->addr); 1178 ipv6_addr_copy(addr, &ifp->addr);
1124 err = 0; 1179 err = 0;
1125 break; 1180 break;
@@ -1161,6 +1216,8 @@ int ipv6_chk_addr(struct in6_addr *addr, struct net_device *dev, int strict)
1161 return ifp != NULL; 1216 return ifp != NULL;
1162} 1217}
1163 1218
1219EXPORT_SYMBOL(ipv6_chk_addr);
1220
1164static 1221static
1165int ipv6_chk_same_addr(const struct in6_addr *addr, struct net_device *dev) 1222int ipv6_chk_same_addr(const struct in6_addr *addr, struct net_device *dev)
1166{ 1223{
@@ -1669,6 +1726,13 @@ ok:
1669 1726
1670 if (ifp == NULL && valid_lft) { 1727 if (ifp == NULL && valid_lft) {
1671 int max_addresses = in6_dev->cnf.max_addresses; 1728 int max_addresses = in6_dev->cnf.max_addresses;
1729 u32 addr_flags = 0;
1730
1731#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1732 if (in6_dev->cnf.optimistic_dad &&
1733 !ipv6_devconf.forwarding)
1734 addr_flags = IFA_F_OPTIMISTIC;
1735#endif
1672 1736
1673 /* Do not allow to create too much of autoconfigured 1737 /* Do not allow to create too much of autoconfigured
1674 * addresses; this would be too easy way to crash kernel. 1738 * addresses; this would be too easy way to crash kernel.
@@ -1676,7 +1740,8 @@ ok:
1676 if (!max_addresses || 1740 if (!max_addresses ||
1677 ipv6_count_addresses(in6_dev) < max_addresses) 1741 ipv6_count_addresses(in6_dev) < max_addresses)
1678 ifp = ipv6_add_addr(in6_dev, &addr, pinfo->prefix_len, 1742 ifp = ipv6_add_addr(in6_dev, &addr, pinfo->prefix_len,
1679 addr_type&IPV6_ADDR_SCOPE_MASK, 0); 1743 addr_type&IPV6_ADDR_SCOPE_MASK,
1744 addr_flags);
1680 1745
1681 if (!ifp || IS_ERR(ifp)) { 1746 if (!ifp || IS_ERR(ifp)) {
1682 in6_dev_put(in6_dev); 1747 in6_dev_put(in6_dev);
@@ -1884,6 +1949,11 @@ static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen,
1884 1949
1885 addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev, 1950 addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev,
1886 jiffies_to_clock_t(valid_lft * HZ), flags); 1951 jiffies_to_clock_t(valid_lft * HZ), flags);
1952 /*
1953 * Note that section 3.1 of RFC 4429 indicates
1954 * that the Optimistic flag should not be set for
1955 * manually configured addresses
1956 */
1887 addrconf_dad_start(ifp, 0); 1957 addrconf_dad_start(ifp, 0);
1888 in6_ifa_put(ifp); 1958 in6_ifa_put(ifp);
1889 addrconf_verify(0); 1959 addrconf_verify(0);
@@ -2060,8 +2130,16 @@ static void init_loopback(struct net_device *dev)
2060static void addrconf_add_linklocal(struct inet6_dev *idev, struct in6_addr *addr) 2130static void addrconf_add_linklocal(struct inet6_dev *idev, struct in6_addr *addr)
2061{ 2131{
2062 struct inet6_ifaddr * ifp; 2132 struct inet6_ifaddr * ifp;
2133 u32 addr_flags = IFA_F_PERMANENT;
2134
2135#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
2136 if (idev->cnf.optimistic_dad &&
2137 !ipv6_devconf.forwarding)
2138 addr_flags |= IFA_F_OPTIMISTIC;
2139#endif
2063 2140
2064 ifp = ipv6_add_addr(idev, addr, 64, IFA_LINK, IFA_F_PERMANENT); 2141
2142 ifp = ipv6_add_addr(idev, addr, 64, IFA_LINK, addr_flags);
2065 if (!IS_ERR(ifp)) { 2143 if (!IS_ERR(ifp)) {
2066 addrconf_prefix_route(&ifp->addr, ifp->prefix_len, idev->dev, 0, 0); 2144 addrconf_prefix_route(&ifp->addr, ifp->prefix_len, idev->dev, 0, 0);
2067 addrconf_dad_start(ifp, 0); 2145 addrconf_dad_start(ifp, 0);
@@ -2129,7 +2207,7 @@ ipv6_inherit_linklocal(struct inet6_dev *idev, struct net_device *link_dev)
2129{ 2207{
2130 struct in6_addr lladdr; 2208 struct in6_addr lladdr;
2131 2209
2132 if (!ipv6_get_lladdr(link_dev, &lladdr)) { 2210 if (!ipv6_get_lladdr(link_dev, &lladdr, IFA_F_TENTATIVE)) {
2133 addrconf_add_linklocal(idev, &lladdr); 2211 addrconf_add_linklocal(idev, &lladdr);
2134 return 0; 2212 return 0;
2135 } 2213 }
@@ -2240,7 +2318,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
2240 default: 2318 default:
2241 addrconf_dev_config(dev); 2319 addrconf_dev_config(dev);
2242 break; 2320 break;
2243 }; 2321 }
2244 if (idev) { 2322 if (idev) {
2245 if (run_pending) 2323 if (run_pending)
2246 addrconf_dad_run(idev); 2324 addrconf_dad_run(idev);
@@ -2293,7 +2371,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
2293 } 2371 }
2294#endif 2372#endif
2295 break; 2373 break;
2296 }; 2374 }
2297 2375
2298 return NOTIFY_OK; 2376 return NOTIFY_OK;
2299} 2377}
@@ -2474,7 +2552,11 @@ static void addrconf_dad_kick(struct inet6_ifaddr *ifp)
2474 unsigned long rand_num; 2552 unsigned long rand_num;
2475 struct inet6_dev *idev = ifp->idev; 2553 struct inet6_dev *idev = ifp->idev;
2476 2554
2477 rand_num = net_random() % (idev->cnf.rtr_solicit_delay ? : 1); 2555 if (ifp->flags & IFA_F_OPTIMISTIC)
2556 rand_num = 0;
2557 else
2558 rand_num = net_random() % (idev->cnf.rtr_solicit_delay ? : 1);
2559
2478 ifp->probes = idev->cnf.dad_transmits; 2560 ifp->probes = idev->cnf.dad_transmits;
2479 addrconf_mod_timer(ifp, AC_DAD, rand_num); 2561 addrconf_mod_timer(ifp, AC_DAD, rand_num);
2480} 2562}
@@ -2496,7 +2578,7 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
2496 if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) || 2578 if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) ||
2497 !(ifp->flags&IFA_F_TENTATIVE) || 2579 !(ifp->flags&IFA_F_TENTATIVE) ||
2498 ifp->flags & IFA_F_NODAD) { 2580 ifp->flags & IFA_F_NODAD) {
2499 ifp->flags &= ~IFA_F_TENTATIVE; 2581 ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC);
2500 spin_unlock_bh(&ifp->lock); 2582 spin_unlock_bh(&ifp->lock);
2501 read_unlock_bh(&idev->lock); 2583 read_unlock_bh(&idev->lock);
2502 2584
@@ -2516,6 +2598,14 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
2516 addrconf_dad_stop(ifp); 2598 addrconf_dad_stop(ifp);
2517 return; 2599 return;
2518 } 2600 }
2601
2602 /*
2603 * Optimistic nodes can start receiving
2604 * Frames right away
2605 */
2606 if(ifp->flags & IFA_F_OPTIMISTIC)
2607 ip6_ins_rt(ifp->rt);
2608
2519 addrconf_dad_kick(ifp); 2609 addrconf_dad_kick(ifp);
2520 spin_unlock_bh(&ifp->lock); 2610 spin_unlock_bh(&ifp->lock);
2521out: 2611out:
@@ -2540,7 +2630,7 @@ static void addrconf_dad_timer(unsigned long data)
2540 * DAD was successful 2630 * DAD was successful
2541 */ 2631 */
2542 2632
2543 ifp->flags &= ~IFA_F_TENTATIVE; 2633 ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC);
2544 spin_unlock_bh(&ifp->lock); 2634 spin_unlock_bh(&ifp->lock);
2545 read_unlock_bh(&idev->lock); 2635 read_unlock_bh(&idev->lock);
2546 2636
@@ -3164,7 +3254,6 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
3164 3254
3165 s_idx = cb->args[0]; 3255 s_idx = cb->args[0];
3166 s_ip_idx = ip_idx = cb->args[1]; 3256 s_ip_idx = ip_idx = cb->args[1];
3167 read_lock(&dev_base_lock);
3168 3257
3169 for (dev = dev_base, idx = 0; dev; dev = dev->next, idx++) { 3258 for (dev = dev_base, idx = 0; dev; dev = dev->next, idx++) {
3170 if (idx < s_idx) 3259 if (idx < s_idx)
@@ -3226,7 +3315,6 @@ done:
3226 read_unlock_bh(&idev->lock); 3315 read_unlock_bh(&idev->lock);
3227 in6_dev_put(idev); 3316 in6_dev_put(idev);
3228 } 3317 }
3229 read_unlock(&dev_base_lock);
3230 cb->args[0] = idx; 3318 cb->args[0] = idx;
3231 cb->args[1] = ip_idx; 3319 cb->args[1] = ip_idx;
3232 return skb->len; 3320 return skb->len;
@@ -3359,6 +3447,9 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
3359#endif 3447#endif
3360 array[DEVCONF_PROXY_NDP] = cnf->proxy_ndp; 3448 array[DEVCONF_PROXY_NDP] = cnf->proxy_ndp;
3361 array[DEVCONF_ACCEPT_SOURCE_ROUTE] = cnf->accept_source_route; 3449 array[DEVCONF_ACCEPT_SOURCE_ROUTE] = cnf->accept_source_route;
3450#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
3451 array[DEVCONF_OPTIMISTIC_DAD] = cnf->optimistic_dad;
3452#endif
3362} 3453}
3363 3454
3364static inline size_t inet6_if_nlmsg_size(void) 3455static inline size_t inet6_if_nlmsg_size(void)
@@ -3372,14 +3463,44 @@ static inline size_t inet6_if_nlmsg_size(void)
3372 nla_total_size(4) /* IFLA_INET6_FLAGS */ 3463 nla_total_size(4) /* IFLA_INET6_FLAGS */
3373 + nla_total_size(sizeof(struct ifla_cacheinfo)) 3464 + nla_total_size(sizeof(struct ifla_cacheinfo))
3374 + nla_total_size(DEVCONF_MAX * 4) /* IFLA_INET6_CONF */ 3465 + nla_total_size(DEVCONF_MAX * 4) /* IFLA_INET6_CONF */
3466 + nla_total_size(IPSTATS_MIB_MAX * 8) /* IFLA_INET6_STATS */
3467 + nla_total_size(ICMP6_MIB_MAX * 8) /* IFLA_INET6_ICMP6STATS */
3375 ); 3468 );
3376} 3469}
3377 3470
3471static inline void __snmp6_fill_stats(u64 *stats, void **mib, int items,
3472 int bytes)
3473{
3474 int i;
3475 int pad = bytes - sizeof(u64) * items;
3476 BUG_ON(pad < 0);
3477
3478 /* Use put_unaligned() because stats may not be aligned for u64. */
3479 put_unaligned(items, &stats[0]);
3480 for (i = 1; i < items; i++)
3481 put_unaligned(snmp_fold_field(mib, i), &stats[i]);
3482
3483 memset(&stats[items], 0, pad);
3484}
3485
3486static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype,
3487 int bytes)
3488{
3489 switch(attrtype) {
3490 case IFLA_INET6_STATS:
3491 __snmp6_fill_stats(stats, (void **)idev->stats.ipv6, IPSTATS_MIB_MAX, bytes);
3492 break;
3493 case IFLA_INET6_ICMP6STATS:
3494 __snmp6_fill_stats(stats, (void **)idev->stats.icmpv6, ICMP6_MIB_MAX, bytes);
3495 break;
3496 }
3497}
3498
3378static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, 3499static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
3379 u32 pid, u32 seq, int event, unsigned int flags) 3500 u32 pid, u32 seq, int event, unsigned int flags)
3380{ 3501{
3381 struct net_device *dev = idev->dev; 3502 struct net_device *dev = idev->dev;
3382 struct nlattr *conf; 3503 struct nlattr *nla;
3383 struct ifinfomsg *hdr; 3504 struct ifinfomsg *hdr;
3384 struct nlmsghdr *nlh; 3505 struct nlmsghdr *nlh;
3385 void *protoinfo; 3506 void *protoinfo;
@@ -3419,12 +3540,22 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
3419 ci.retrans_time = idev->nd_parms->retrans_time; 3540 ci.retrans_time = idev->nd_parms->retrans_time;
3420 NLA_PUT(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci); 3541 NLA_PUT(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci);
3421 3542
3422 conf = nla_reserve(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(s32)); 3543 nla = nla_reserve(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(s32));
3423 if (conf == NULL) 3544 if (nla == NULL)
3424 goto nla_put_failure; 3545 goto nla_put_failure;
3425 ipv6_store_devconf(&idev->cnf, nla_data(conf), nla_len(conf)); 3546 ipv6_store_devconf(&idev->cnf, nla_data(nla), nla_len(nla));
3426 3547
3427 /* XXX - Statistics/MC not implemented */ 3548 /* XXX - MC not implemented */
3549
3550 nla = nla_reserve(skb, IFLA_INET6_STATS, IPSTATS_MIB_MAX * sizeof(u64));
3551 if (nla == NULL)
3552 goto nla_put_failure;
3553 snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_STATS, nla_len(nla));
3554
3555 nla = nla_reserve(skb, IFLA_INET6_ICMP6STATS, ICMP6_MIB_MAX * sizeof(u64));
3556 if (nla == NULL)
3557 goto nla_put_failure;
3558 snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_ICMP6STATS, nla_len(nla));
3428 3559
3429 nla_nest_end(skb, protoinfo); 3560 nla_nest_end(skb, protoinfo);
3430 return nlmsg_end(skb, nlh); 3561 return nlmsg_end(skb, nlh);
@@ -3550,30 +3681,20 @@ errout:
3550 rtnl_set_sk_err(RTNLGRP_IPV6_PREFIX, err); 3681 rtnl_set_sk_err(RTNLGRP_IPV6_PREFIX, err);
3551} 3682}
3552 3683
3553static struct rtnetlink_link inet6_rtnetlink_table[RTM_NR_MSGTYPES] = {
3554 [RTM_GETLINK - RTM_BASE] = { .dumpit = inet6_dump_ifinfo, },
3555 [RTM_NEWADDR - RTM_BASE] = { .doit = inet6_rtm_newaddr, },
3556 [RTM_DELADDR - RTM_BASE] = { .doit = inet6_rtm_deladdr, },
3557 [RTM_GETADDR - RTM_BASE] = { .doit = inet6_rtm_getaddr,
3558 .dumpit = inet6_dump_ifaddr, },
3559 [RTM_GETMULTICAST - RTM_BASE] = { .dumpit = inet6_dump_ifmcaddr, },
3560 [RTM_GETANYCAST - RTM_BASE] = { .dumpit = inet6_dump_ifacaddr, },
3561 [RTM_NEWROUTE - RTM_BASE] = { .doit = inet6_rtm_newroute, },
3562 [RTM_DELROUTE - RTM_BASE] = { .doit = inet6_rtm_delroute, },
3563 [RTM_GETROUTE - RTM_BASE] = { .doit = inet6_rtm_getroute,
3564 .dumpit = inet6_dump_fib, },
3565#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3566 [RTM_GETRULE - RTM_BASE] = { .dumpit = fib6_rules_dump, },
3567#endif
3568};
3569
3570static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) 3684static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
3571{ 3685{
3572 inet6_ifa_notify(event ? : RTM_NEWADDR, ifp); 3686 inet6_ifa_notify(event ? : RTM_NEWADDR, ifp);
3573 3687
3574 switch (event) { 3688 switch (event) {
3575 case RTM_NEWADDR: 3689 case RTM_NEWADDR:
3576 ip6_ins_rt(ifp->rt); 3690 /*
3691 * If the address was optimistic
3692 * we inserted the route at the start of
3693 * our DAD process, so we don't need
3694 * to do it again
3695 */
3696 if (!(ifp->rt->rt6i_node))
3697 ip6_ins_rt(ifp->rt);
3577 if (ifp->idev->cnf.forwarding) 3698 if (ifp->idev->cnf.forwarding)
3578 addrconf_join_anycast(ifp); 3699 addrconf_join_anycast(ifp);
3579 break; 3700 break;
@@ -3894,6 +4015,17 @@ static struct addrconf_sysctl_table
3894 .mode = 0644, 4015 .mode = 0644,
3895 .proc_handler = &proc_dointvec, 4016 .proc_handler = &proc_dointvec,
3896 }, 4017 },
4018#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
4019 {
4020 .ctl_name = CTL_UNNUMBERED,
4021 .procname = "optimistic_dad",
4022 .data = &ipv6_devconf.optimistic_dad,
4023 .maxlen = sizeof(int),
4024 .mode = 0644,
4025 .proc_handler = &proc_dointvec,
4026
4027 },
4028#endif
3897 { 4029 {
3898 .ctl_name = 0, /* sentinel */ 4030 .ctl_name = 0, /* sentinel */
3899 } 4031 }
@@ -4021,11 +4153,15 @@ int register_inet6addr_notifier(struct notifier_block *nb)
4021 return atomic_notifier_chain_register(&inet6addr_chain, nb); 4153 return atomic_notifier_chain_register(&inet6addr_chain, nb);
4022} 4154}
4023 4155
4156EXPORT_SYMBOL(register_inet6addr_notifier);
4157
4024int unregister_inet6addr_notifier(struct notifier_block *nb) 4158int unregister_inet6addr_notifier(struct notifier_block *nb)
4025{ 4159{
4026 return atomic_notifier_chain_unregister(&inet6addr_chain,nb); 4160 return atomic_notifier_chain_unregister(&inet6addr_chain,nb);
4027} 4161}
4028 4162
4163EXPORT_SYMBOL(unregister_inet6addr_notifier);
4164
4029/* 4165/*
4030 * Init / cleanup code 4166 * Init / cleanup code
4031 */ 4167 */
@@ -4064,7 +4200,18 @@ int __init addrconf_init(void)
4064 register_netdevice_notifier(&ipv6_dev_notf); 4200 register_netdevice_notifier(&ipv6_dev_notf);
4065 4201
4066 addrconf_verify(0); 4202 addrconf_verify(0);
4067 rtnetlink_links[PF_INET6] = inet6_rtnetlink_table; 4203
4204 err = __rtnl_register(PF_INET6, RTM_GETLINK, NULL, inet6_dump_ifinfo);
4205 if (err < 0)
4206 goto errout;
4207
4208 /* Only the first call to __rtnl_register can fail */
4209 __rtnl_register(PF_INET6, RTM_NEWADDR, inet6_rtm_newaddr, NULL);
4210 __rtnl_register(PF_INET6, RTM_DELADDR, inet6_rtm_deladdr, NULL);
4211 __rtnl_register(PF_INET6, RTM_GETADDR, inet6_rtm_getaddr, inet6_dump_ifaddr);
4212 __rtnl_register(PF_INET6, RTM_GETMULTICAST, NULL, inet6_dump_ifmcaddr);
4213 __rtnl_register(PF_INET6, RTM_GETANYCAST, NULL, inet6_dump_ifacaddr);
4214
4068#ifdef CONFIG_SYSCTL 4215#ifdef CONFIG_SYSCTL
4069 addrconf_sysctl.sysctl_header = 4216 addrconf_sysctl.sysctl_header =
4070 register_sysctl_table(addrconf_sysctl.addrconf_root_dir); 4217 register_sysctl_table(addrconf_sysctl.addrconf_root_dir);
@@ -4072,6 +4219,10 @@ int __init addrconf_init(void)
4072#endif 4219#endif
4073 4220
4074 return 0; 4221 return 0;
4222errout:
4223 unregister_netdevice_notifier(&ipv6_dev_notf);
4224
4225 return err;
4075} 4226}
4076 4227
4077void __exit addrconf_cleanup(void) 4228void __exit addrconf_cleanup(void)
@@ -4083,7 +4234,6 @@ void __exit addrconf_cleanup(void)
4083 4234
4084 unregister_netdevice_notifier(&ipv6_dev_notf); 4235 unregister_netdevice_notifier(&ipv6_dev_notf);
4085 4236
4086 rtnetlink_links[PF_INET6] = NULL;
4087#ifdef CONFIG_SYSCTL 4237#ifdef CONFIG_SYSCTL
4088 addrconf_sysctl_unregister(&ipv6_devconf_dflt); 4238 addrconf_sysctl_unregister(&ipv6_devconf_dflt);
4089 addrconf_sysctl_unregister(&ipv6_devconf); 4239 addrconf_sysctl_unregister(&ipv6_devconf);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 5cac14a5c778..18cb928c8d92 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -98,6 +98,11 @@ static int inet6_create(struct socket *sock, int protocol)
98 int try_loading_module = 0; 98 int try_loading_module = 0;
99 int err; 99 int err;
100 100
101 if (sock->type != SOCK_RAW &&
102 sock->type != SOCK_DGRAM &&
103 !inet_ehash_secret)
104 build_ehash_secret();
105
101 /* Look for the requested type/protocol pair. */ 106 /* Look for the requested type/protocol pair. */
102 answer = NULL; 107 answer = NULL;
103lookup_protocol: 108lookup_protocol:
@@ -349,6 +354,8 @@ out:
349 return err; 354 return err;
350} 355}
351 356
357EXPORT_SYMBOL(inet6_bind);
358
352int inet6_release(struct socket *sock) 359int inet6_release(struct socket *sock)
353{ 360{
354 struct sock *sk = sock->sk; 361 struct sock *sk = sock->sk;
@@ -365,6 +372,8 @@ int inet6_release(struct socket *sock)
365 return inet_release(sock); 372 return inet_release(sock);
366} 373}
367 374
375EXPORT_SYMBOL(inet6_release);
376
368int inet6_destroy_sock(struct sock *sk) 377int inet6_destroy_sock(struct sock *sk)
369{ 378{
370 struct ipv6_pinfo *np = inet6_sk(sk); 379 struct ipv6_pinfo *np = inet6_sk(sk);
@@ -428,6 +437,8 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
428 return(0); 437 return(0);
429} 438}
430 439
440EXPORT_SYMBOL(inet6_getname);
441
431int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 442int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
432{ 443{
433 struct sock *sk = sock->sk; 444 struct sock *sk = sock->sk;
@@ -437,6 +448,9 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
437 case SIOCGSTAMP: 448 case SIOCGSTAMP:
438 return sock_get_timestamp(sk, (struct timeval __user *)arg); 449 return sock_get_timestamp(sk, (struct timeval __user *)arg);
439 450
451 case SIOCGSTAMPNS:
452 return sock_get_timestampns(sk, (struct timespec __user *)arg);
453
440 case SIOCADDRT: 454 case SIOCADDRT:
441 case SIOCDELRT: 455 case SIOCDELRT:
442 456
@@ -457,6 +471,8 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
457 return(0); 471 return(0);
458} 472}
459 473
474EXPORT_SYMBOL(inet6_ioctl);
475
460const struct proto_ops inet6_stream_ops = { 476const struct proto_ops inet6_stream_ops = {
461 .family = PF_INET6, 477 .family = PF_INET6,
462 .owner = THIS_MODULE, 478 .owner = THIS_MODULE,
@@ -603,6 +619,8 @@ out_illegal:
603 goto out; 619 goto out;
604} 620}
605 621
622EXPORT_SYMBOL(inet6_register_protosw);
623
606void 624void
607inet6_unregister_protosw(struct inet_protosw *p) 625inet6_unregister_protosw(struct inet_protosw *p)
608{ 626{
@@ -619,6 +637,8 @@ inet6_unregister_protosw(struct inet_protosw *p)
619 } 637 }
620} 638}
621 639
640EXPORT_SYMBOL(inet6_unregister_protosw);
641
622int inet6_sk_rebuild_header(struct sock *sk) 642int inet6_sk_rebuild_header(struct sock *sk)
623{ 643{
624 int err; 644 int err;
@@ -678,7 +698,8 @@ int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
678 if (np->rxopt.all) { 698 if (np->rxopt.all) {
679 if ((opt->hop && (np->rxopt.bits.hopopts || 699 if ((opt->hop && (np->rxopt.bits.hopopts ||
680 np->rxopt.bits.ohopopts)) || 700 np->rxopt.bits.ohopopts)) ||
681 ((IPV6_FLOWINFO_MASK & *(__be32*)skb->nh.raw) && 701 ((IPV6_FLOWINFO_MASK &
702 *(__be32 *)skb_network_header(skb)) &&
682 np->rxopt.bits.rxflow) || 703 np->rxopt.bits.rxflow) ||
683 (opt->srcrt && (np->rxopt.bits.srcrt || 704 (opt->srcrt && (np->rxopt.bits.srcrt ||
684 np->rxopt.bits.osrcrt)) || 705 np->rxopt.bits.osrcrt)) ||
@@ -691,61 +712,28 @@ int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
691 712
692EXPORT_SYMBOL_GPL(ipv6_opt_accepted); 713EXPORT_SYMBOL_GPL(ipv6_opt_accepted);
693 714
694int
695snmp6_mib_init(void *ptr[2], size_t mibsize, size_t mibalign)
696{
697 if (ptr == NULL)
698 return -EINVAL;
699
700 ptr[0] = __alloc_percpu(mibsize);
701 if (!ptr[0])
702 goto err0;
703
704 ptr[1] = __alloc_percpu(mibsize);
705 if (!ptr[1])
706 goto err1;
707
708 return 0;
709
710err1:
711 free_percpu(ptr[0]);
712 ptr[0] = NULL;
713err0:
714 return -ENOMEM;
715}
716
717void
718snmp6_mib_free(void *ptr[2])
719{
720 if (ptr == NULL)
721 return;
722 free_percpu(ptr[0]);
723 free_percpu(ptr[1]);
724 ptr[0] = ptr[1] = NULL;
725}
726
727static int __init init_ipv6_mibs(void) 715static int __init init_ipv6_mibs(void)
728{ 716{
729 if (snmp6_mib_init((void **)ipv6_statistics, sizeof (struct ipstats_mib), 717 if (snmp_mib_init((void **)ipv6_statistics, sizeof (struct ipstats_mib),
730 __alignof__(struct ipstats_mib)) < 0) 718 __alignof__(struct ipstats_mib)) < 0)
731 goto err_ip_mib; 719 goto err_ip_mib;
732 if (snmp6_mib_init((void **)icmpv6_statistics, sizeof (struct icmpv6_mib), 720 if (snmp_mib_init((void **)icmpv6_statistics, sizeof (struct icmpv6_mib),
733 __alignof__(struct icmpv6_mib)) < 0) 721 __alignof__(struct icmpv6_mib)) < 0)
734 goto err_icmp_mib; 722 goto err_icmp_mib;
735 if (snmp6_mib_init((void **)udp_stats_in6, sizeof (struct udp_mib), 723 if (snmp_mib_init((void **)udp_stats_in6, sizeof (struct udp_mib),
736 __alignof__(struct udp_mib)) < 0) 724 __alignof__(struct udp_mib)) < 0)
737 goto err_udp_mib; 725 goto err_udp_mib;
738 if (snmp6_mib_init((void **)udplite_stats_in6, sizeof (struct udp_mib), 726 if (snmp_mib_init((void **)udplite_stats_in6, sizeof (struct udp_mib),
739 __alignof__(struct udp_mib)) < 0) 727 __alignof__(struct udp_mib)) < 0)
740 goto err_udplite_mib; 728 goto err_udplite_mib;
741 return 0; 729 return 0;
742 730
743err_udplite_mib: 731err_udplite_mib:
744 snmp6_mib_free((void **)udp_stats_in6); 732 snmp_mib_free((void **)udp_stats_in6);
745err_udp_mib: 733err_udp_mib:
746 snmp6_mib_free((void **)icmpv6_statistics); 734 snmp_mib_free((void **)icmpv6_statistics);
747err_icmp_mib: 735err_icmp_mib:
748 snmp6_mib_free((void **)ipv6_statistics); 736 snmp_mib_free((void **)ipv6_statistics);
749err_ip_mib: 737err_ip_mib:
750 return -ENOMEM; 738 return -ENOMEM;
751 739
@@ -753,10 +741,10 @@ err_ip_mib:
753 741
754static void cleanup_ipv6_mibs(void) 742static void cleanup_ipv6_mibs(void)
755{ 743{
756 snmp6_mib_free((void **)ipv6_statistics); 744 snmp_mib_free((void **)ipv6_statistics);
757 snmp6_mib_free((void **)icmpv6_statistics); 745 snmp_mib_free((void **)icmpv6_statistics);
758 snmp6_mib_free((void **)udp_stats_in6); 746 snmp_mib_free((void **)udp_stats_in6);
759 snmp6_mib_free((void **)udplite_stats_in6); 747 snmp_mib_free((void **)udplite_stats_in6);
760} 748}
761 749
762static int __init inet6_init(void) 750static int __init inet6_init(void)
@@ -929,6 +917,8 @@ static void __exit inet6_exit(void)
929{ 917{
930 /* First of all disallow new sockets creation. */ 918 /* First of all disallow new sockets creation. */
931 sock_unregister(PF_INET6); 919 sock_unregister(PF_INET6);
920 /* Disallow any further netlink messages */
921 rtnl_unregister_all(PF_INET6);
932 922
933 /* Cleanup code parts. */ 923 /* Cleanup code parts. */
934 ipv6_packet_cleanup(); 924 ipv6_packet_cleanup();
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index dc68b7269c3c..b696c8401200 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -238,8 +238,8 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
238 top_iph = (struct ipv6hdr *)skb->data; 238 top_iph = (struct ipv6hdr *)skb->data;
239 top_iph->payload_len = htons(skb->len - sizeof(*top_iph)); 239 top_iph->payload_len = htons(skb->len - sizeof(*top_iph));
240 240
241 nexthdr = *skb->nh.raw; 241 nexthdr = *skb_network_header(skb);
242 *skb->nh.raw = IPPROTO_AH; 242 *skb_network_header(skb) = IPPROTO_AH;
243 243
244 /* When there are no extension headers, we only need to save the first 244 /* When there are no extension headers, we only need to save the first
245 * 8 bytes of the base IP header. 245 * 8 bytes of the base IP header.
@@ -247,7 +247,7 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
247 memcpy(tmp_base, top_iph, sizeof(tmp_base)); 247 memcpy(tmp_base, top_iph, sizeof(tmp_base));
248 248
249 tmp_ext = NULL; 249 tmp_ext = NULL;
250 extlen = skb->h.raw - (unsigned char *)(top_iph + 1); 250 extlen = skb_transport_offset(skb) + sizeof(struct ipv6hdr);
251 if (extlen) { 251 if (extlen) {
252 extlen += sizeof(*tmp_ext); 252 extlen += sizeof(*tmp_ext);
253 tmp_ext = kmalloc(extlen, GFP_ATOMIC); 253 tmp_ext = kmalloc(extlen, GFP_ATOMIC);
@@ -268,7 +268,7 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
268 goto error_free_iph; 268 goto error_free_iph;
269 } 269 }
270 270
271 ah = (struct ip_auth_hdr *)skb->h.raw; 271 ah = (struct ip_auth_hdr *)skb_transport_header(skb);
272 ah->nexthdr = nexthdr; 272 ah->nexthdr = nexthdr;
273 273
274 top_iph->priority = 0; 274 top_iph->priority = 0;
@@ -316,8 +316,8 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
316 * 316 *
317 * To erase AH: 317 * To erase AH:
318 * Keeping copy of cleared headers. After AH processing, 318 * Keeping copy of cleared headers. After AH processing,
319 * Moving the pointer of skb->nh.raw by using skb_pull as long as AH 319 * Moving the pointer of skb->network_header by using skb_pull as long
320 * header length. Then copy back the copy as long as hdr_len 320 * as AH header length. Then copy back the copy as long as hdr_len
321 * If destination header following AH exists, copy it into after [Ext2]. 321 * If destination header following AH exists, copy it into after [Ext2].
322 * 322 *
323 * |<>|[IPv6][Ext1][Ext2][Dest][Payload] 323 * |<>|[IPv6][Ext1][Ext2][Dest][Payload]
@@ -325,6 +325,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
325 */ 325 */
326 326
327 struct ipv6_auth_hdr *ah; 327 struct ipv6_auth_hdr *ah;
328 struct ipv6hdr *ip6h;
328 struct ah_data *ahp; 329 struct ah_data *ahp;
329 unsigned char *tmp_hdr = NULL; 330 unsigned char *tmp_hdr = NULL;
330 u16 hdr_len; 331 u16 hdr_len;
@@ -341,7 +342,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
341 pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) 342 pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
342 goto out; 343 goto out;
343 344
344 hdr_len = skb->data - skb->nh.raw; 345 hdr_len = skb->data - skb_network_header(skb);
345 ah = (struct ipv6_auth_hdr*)skb->data; 346 ah = (struct ipv6_auth_hdr*)skb->data;
346 ahp = x->data; 347 ahp = x->data;
347 nexthdr = ah->nexthdr; 348 nexthdr = ah->nexthdr;
@@ -354,16 +355,17 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
354 if (!pskb_may_pull(skb, ah_hlen)) 355 if (!pskb_may_pull(skb, ah_hlen))
355 goto out; 356 goto out;
356 357
357 tmp_hdr = kmemdup(skb->nh.raw, hdr_len, GFP_ATOMIC); 358 tmp_hdr = kmemdup(skb_network_header(skb), hdr_len, GFP_ATOMIC);
358 if (!tmp_hdr) 359 if (!tmp_hdr)
359 goto out; 360 goto out;
360 if (ipv6_clear_mutable_options(skb->nh.ipv6h, hdr_len, XFRM_POLICY_IN)) 361 ip6h = ipv6_hdr(skb);
362 if (ipv6_clear_mutable_options(ip6h, hdr_len, XFRM_POLICY_IN))
361 goto free_out; 363 goto free_out;
362 skb->nh.ipv6h->priority = 0; 364 ip6h->priority = 0;
363 skb->nh.ipv6h->flow_lbl[0] = 0; 365 ip6h->flow_lbl[0] = 0;
364 skb->nh.ipv6h->flow_lbl[1] = 0; 366 ip6h->flow_lbl[1] = 0;
365 skb->nh.ipv6h->flow_lbl[2] = 0; 367 ip6h->flow_lbl[2] = 0;
366 skb->nh.ipv6h->hop_limit = 0; 368 ip6h->hop_limit = 0;
367 369
368 { 370 {
369 u8 auth_data[MAX_AH_AUTH_LEN]; 371 u8 auth_data[MAX_AH_AUTH_LEN];
@@ -382,7 +384,9 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
382 } 384 }
383 } 385 }
384 386
385 skb->h.raw = memcpy(skb->nh.raw += ah_hlen, tmp_hdr, hdr_len); 387 skb->network_header += ah_hlen;
388 memcpy(skb_network_header(skb), tmp_hdr, hdr_len);
389 skb->transport_header = skb->network_header;
386 __skb_pull(skb, ah_hlen + hdr_len); 390 __skb_pull(skb, ah_hlen + hdr_len);
387 391
388 kfree(tmp_hdr); 392 kfree(tmp_hdr);
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 3b4e8dcf4c86..403eee66b9c5 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -209,7 +209,7 @@ void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
209 __be16 port, u32 info, u8 *payload) 209 __be16 port, u32 info, u8 *payload)
210{ 210{
211 struct ipv6_pinfo *np = inet6_sk(sk); 211 struct ipv6_pinfo *np = inet6_sk(sk);
212 struct icmp6hdr *icmph = (struct icmp6hdr *)skb->h.raw; 212 struct icmp6hdr *icmph = icmp6_hdr(skb);
213 struct sock_exterr_skb *serr; 213 struct sock_exterr_skb *serr;
214 214
215 if (!np->recverr) 215 if (!np->recverr)
@@ -227,11 +227,12 @@ void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
227 serr->ee.ee_pad = 0; 227 serr->ee.ee_pad = 0;
228 serr->ee.ee_info = info; 228 serr->ee.ee_info = info;
229 serr->ee.ee_data = 0; 229 serr->ee.ee_data = 0;
230 serr->addr_offset = (u8*)&(((struct ipv6hdr*)(icmph+1))->daddr) - skb->nh.raw; 230 serr->addr_offset = (u8 *)&(((struct ipv6hdr *)(icmph + 1))->daddr) -
231 skb_network_header(skb);
231 serr->port = port; 232 serr->port = port;
232 233
233 skb->h.raw = payload;
234 __skb_pull(skb, payload - skb->data); 234 __skb_pull(skb, payload - skb->data);
235 skb_reset_transport_header(skb);
235 236
236 if (sock_queue_err_skb(sk, skb)) 237 if (sock_queue_err_skb(sk, skb))
237 kfree_skb(skb); 238 kfree_skb(skb);
@@ -251,8 +252,9 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info)
251 if (!skb) 252 if (!skb)
252 return; 253 return;
253 254
254 iph = (struct ipv6hdr*)skb_put(skb, sizeof(struct ipv6hdr)); 255 skb_put(skb, sizeof(struct ipv6hdr));
255 skb->nh.ipv6h = iph; 256 skb_reset_network_header(skb);
257 iph = ipv6_hdr(skb);
256 ipv6_addr_copy(&iph->daddr, &fl->fl6_dst); 258 ipv6_addr_copy(&iph->daddr, &fl->fl6_dst);
257 259
258 serr = SKB_EXT_ERR(skb); 260 serr = SKB_EXT_ERR(skb);
@@ -263,11 +265,11 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info)
263 serr->ee.ee_pad = 0; 265 serr->ee.ee_pad = 0;
264 serr->ee.ee_info = info; 266 serr->ee.ee_info = info;
265 serr->ee.ee_data = 0; 267 serr->ee.ee_data = 0;
266 serr->addr_offset = (u8*)&iph->daddr - skb->nh.raw; 268 serr->addr_offset = (u8 *)&iph->daddr - skb_network_header(skb);
267 serr->port = fl->fl_ip_dport; 269 serr->port = fl->fl_ip_dport;
268 270
269 skb->h.raw = skb->tail; 271 __skb_pull(skb, skb_tail_pointer(skb) - skb->data);
270 __skb_pull(skb, skb->tail - skb->data); 272 skb_reset_transport_header(skb);
271 273
272 if (sock_queue_err_skb(sk, skb)) 274 if (sock_queue_err_skb(sk, skb))
273 kfree_skb(skb); 275 kfree_skb(skb);
@@ -309,21 +311,24 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
309 311
310 sin = (struct sockaddr_in6 *)msg->msg_name; 312 sin = (struct sockaddr_in6 *)msg->msg_name;
311 if (sin) { 313 if (sin) {
314 const unsigned char *nh = skb_network_header(skb);
312 sin->sin6_family = AF_INET6; 315 sin->sin6_family = AF_INET6;
313 sin->sin6_flowinfo = 0; 316 sin->sin6_flowinfo = 0;
314 sin->sin6_port = serr->port; 317 sin->sin6_port = serr->port;
315 sin->sin6_scope_id = 0; 318 sin->sin6_scope_id = 0;
316 if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP6) { 319 if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP6) {
317 ipv6_addr_copy(&sin->sin6_addr, 320 ipv6_addr_copy(&sin->sin6_addr,
318 (struct in6_addr *)(skb->nh.raw + serr->addr_offset)); 321 (struct in6_addr *)(nh + serr->addr_offset));
319 if (np->sndflow) 322 if (np->sndflow)
320 sin->sin6_flowinfo = *(__be32*)(skb->nh.raw + serr->addr_offset - 24) & IPV6_FLOWINFO_MASK; 323 sin->sin6_flowinfo =
324 (*(__be32 *)(nh + serr->addr_offset - 24) &
325 IPV6_FLOWINFO_MASK);
321 if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL) 326 if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
322 sin->sin6_scope_id = IP6CB(skb)->iif; 327 sin->sin6_scope_id = IP6CB(skb)->iif;
323 } else { 328 } else {
324 ipv6_addr_set(&sin->sin6_addr, 0, 0, 329 ipv6_addr_set(&sin->sin6_addr, 0, 0,
325 htonl(0xffff), 330 htonl(0xffff),
326 *(__be32*)(skb->nh.raw + serr->addr_offset)); 331 *(__be32 *)(nh + serr->addr_offset));
327 } 332 }
328 } 333 }
329 334
@@ -335,7 +340,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
335 sin->sin6_flowinfo = 0; 340 sin->sin6_flowinfo = 0;
336 sin->sin6_scope_id = 0; 341 sin->sin6_scope_id = 0;
337 if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP6) { 342 if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP6) {
338 ipv6_addr_copy(&sin->sin6_addr, &skb->nh.ipv6h->saddr); 343 ipv6_addr_copy(&sin->sin6_addr, &ipv6_hdr(skb)->saddr);
339 if (np->rxopt.all) 344 if (np->rxopt.all)
340 datagram_recv_ctl(sk, msg, skb); 345 datagram_recv_ctl(sk, msg, skb);
341 if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL) 346 if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
@@ -344,8 +349,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
344 struct inet_sock *inet = inet_sk(sk); 349 struct inet_sock *inet = inet_sk(sk);
345 350
346 ipv6_addr_set(&sin->sin6_addr, 0, 0, 351 ipv6_addr_set(&sin->sin6_addr, 0, 0,
347 htonl(0xffff), 352 htonl(0xffff), ip_hdr(skb)->saddr);
348 skb->nh.iph->saddr);
349 if (inet->cmsg_flags) 353 if (inet->cmsg_flags)
350 ip_cmsg_recv(msg, skb); 354 ip_cmsg_recv(msg, skb);
351 } 355 }
@@ -381,33 +385,34 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
381{ 385{
382 struct ipv6_pinfo *np = inet6_sk(sk); 386 struct ipv6_pinfo *np = inet6_sk(sk);
383 struct inet6_skb_parm *opt = IP6CB(skb); 387 struct inet6_skb_parm *opt = IP6CB(skb);
388 unsigned char *nh = skb_network_header(skb);
384 389
385 if (np->rxopt.bits.rxinfo) { 390 if (np->rxopt.bits.rxinfo) {
386 struct in6_pktinfo src_info; 391 struct in6_pktinfo src_info;
387 392
388 src_info.ipi6_ifindex = opt->iif; 393 src_info.ipi6_ifindex = opt->iif;
389 ipv6_addr_copy(&src_info.ipi6_addr, &skb->nh.ipv6h->daddr); 394 ipv6_addr_copy(&src_info.ipi6_addr, &ipv6_hdr(skb)->daddr);
390 put_cmsg(msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info); 395 put_cmsg(msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info);
391 } 396 }
392 397
393 if (np->rxopt.bits.rxhlim) { 398 if (np->rxopt.bits.rxhlim) {
394 int hlim = skb->nh.ipv6h->hop_limit; 399 int hlim = ipv6_hdr(skb)->hop_limit;
395 put_cmsg(msg, SOL_IPV6, IPV6_HOPLIMIT, sizeof(hlim), &hlim); 400 put_cmsg(msg, SOL_IPV6, IPV6_HOPLIMIT, sizeof(hlim), &hlim);
396 } 401 }
397 402
398 if (np->rxopt.bits.rxtclass) { 403 if (np->rxopt.bits.rxtclass) {
399 int tclass = (ntohl(*(__be32 *)skb->nh.ipv6h) >> 20) & 0xff; 404 int tclass = (ntohl(*(__be32 *)ipv6_hdr(skb)) >> 20) & 0xff;
400 put_cmsg(msg, SOL_IPV6, IPV6_TCLASS, sizeof(tclass), &tclass); 405 put_cmsg(msg, SOL_IPV6, IPV6_TCLASS, sizeof(tclass), &tclass);
401 } 406 }
402 407
403 if (np->rxopt.bits.rxflow && (*(__be32*)skb->nh.raw & IPV6_FLOWINFO_MASK)) { 408 if (np->rxopt.bits.rxflow && (*(__be32 *)nh & IPV6_FLOWINFO_MASK)) {
404 __be32 flowinfo = *(__be32*)skb->nh.raw & IPV6_FLOWINFO_MASK; 409 __be32 flowinfo = *(__be32 *)nh & IPV6_FLOWINFO_MASK;
405 put_cmsg(msg, SOL_IPV6, IPV6_FLOWINFO, sizeof(flowinfo), &flowinfo); 410 put_cmsg(msg, SOL_IPV6, IPV6_FLOWINFO, sizeof(flowinfo), &flowinfo);
406 } 411 }
407 412
408 /* HbH is allowed only once */ 413 /* HbH is allowed only once */
409 if (np->rxopt.bits.hopopts && opt->hop) { 414 if (np->rxopt.bits.hopopts && opt->hop) {
410 u8 *ptr = skb->nh.raw + opt->hop; 415 u8 *ptr = nh + opt->hop;
411 put_cmsg(msg, SOL_IPV6, IPV6_HOPOPTS, (ptr[1]+1)<<3, ptr); 416 put_cmsg(msg, SOL_IPV6, IPV6_HOPOPTS, (ptr[1]+1)<<3, ptr);
412 } 417 }
413 418
@@ -423,11 +428,11 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
423 * IPV6_RECVDSTOPTS is more generic. --yoshfuji 428 * IPV6_RECVDSTOPTS is more generic. --yoshfuji
424 */ 429 */
425 unsigned int off = sizeof(struct ipv6hdr); 430 unsigned int off = sizeof(struct ipv6hdr);
426 u8 nexthdr = skb->nh.ipv6h->nexthdr; 431 u8 nexthdr = ipv6_hdr(skb)->nexthdr;
427 432
428 while (off <= opt->lastopt) { 433 while (off <= opt->lastopt) {
429 unsigned len; 434 unsigned len;
430 u8 *ptr = skb->nh.raw + off; 435 u8 *ptr = nh + off;
431 436
432 switch(nexthdr) { 437 switch(nexthdr) {
433 case IPPROTO_DSTOPTS: 438 case IPPROTO_DSTOPTS:
@@ -461,27 +466,27 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
461 struct in6_pktinfo src_info; 466 struct in6_pktinfo src_info;
462 467
463 src_info.ipi6_ifindex = opt->iif; 468 src_info.ipi6_ifindex = opt->iif;
464 ipv6_addr_copy(&src_info.ipi6_addr, &skb->nh.ipv6h->daddr); 469 ipv6_addr_copy(&src_info.ipi6_addr, &ipv6_hdr(skb)->daddr);
465 put_cmsg(msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info); 470 put_cmsg(msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info);
466 } 471 }
467 if (np->rxopt.bits.rxohlim) { 472 if (np->rxopt.bits.rxohlim) {
468 int hlim = skb->nh.ipv6h->hop_limit; 473 int hlim = ipv6_hdr(skb)->hop_limit;
469 put_cmsg(msg, SOL_IPV6, IPV6_2292HOPLIMIT, sizeof(hlim), &hlim); 474 put_cmsg(msg, SOL_IPV6, IPV6_2292HOPLIMIT, sizeof(hlim), &hlim);
470 } 475 }
471 if (np->rxopt.bits.ohopopts && opt->hop) { 476 if (np->rxopt.bits.ohopopts && opt->hop) {
472 u8 *ptr = skb->nh.raw + opt->hop; 477 u8 *ptr = nh + opt->hop;
473 put_cmsg(msg, SOL_IPV6, IPV6_2292HOPOPTS, (ptr[1]+1)<<3, ptr); 478 put_cmsg(msg, SOL_IPV6, IPV6_2292HOPOPTS, (ptr[1]+1)<<3, ptr);
474 } 479 }
475 if (np->rxopt.bits.odstopts && opt->dst0) { 480 if (np->rxopt.bits.odstopts && opt->dst0) {
476 u8 *ptr = skb->nh.raw + opt->dst0; 481 u8 *ptr = nh + opt->dst0;
477 put_cmsg(msg, SOL_IPV6, IPV6_2292DSTOPTS, (ptr[1]+1)<<3, ptr); 482 put_cmsg(msg, SOL_IPV6, IPV6_2292DSTOPTS, (ptr[1]+1)<<3, ptr);
478 } 483 }
479 if (np->rxopt.bits.osrcrt && opt->srcrt) { 484 if (np->rxopt.bits.osrcrt && opt->srcrt) {
480 struct ipv6_rt_hdr *rthdr = (struct ipv6_rt_hdr *)(skb->nh.raw + opt->srcrt); 485 struct ipv6_rt_hdr *rthdr = (struct ipv6_rt_hdr *)(nh + opt->srcrt);
481 put_cmsg(msg, SOL_IPV6, IPV6_2292RTHDR, (rthdr->hdrlen+1) << 3, rthdr); 486 put_cmsg(msg, SOL_IPV6, IPV6_2292RTHDR, (rthdr->hdrlen+1) << 3, rthdr);
482 } 487 }
483 if (np->rxopt.bits.odstopts && opt->dst1) { 488 if (np->rxopt.bits.odstopts && opt->dst1) {
484 u8 *ptr = skb->nh.raw + opt->dst1; 489 u8 *ptr = nh + opt->dst1;
485 put_cmsg(msg, SOL_IPV6, IPV6_2292DSTOPTS, (ptr[1]+1)<<3, ptr); 490 put_cmsg(msg, SOL_IPV6, IPV6_2292DSTOPTS, (ptr[1]+1)<<3, ptr);
486 } 491 }
487 return 0; 492 return 0;
@@ -718,7 +723,7 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl,
718 cmsg->cmsg_type); 723 cmsg->cmsg_type);
719 err = -EINVAL; 724 err = -EINVAL;
720 break; 725 break;
721 }; 726 }
722 } 727 }
723 728
724exit_f: 729exit_f:
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 363e63ffecca..7107bb7e2e62 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -42,21 +42,19 @@
42static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) 42static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
43{ 43{
44 int err; 44 int err;
45 int hdr_len;
46 struct ipv6hdr *top_iph; 45 struct ipv6hdr *top_iph;
47 struct ipv6_esp_hdr *esph; 46 struct ipv6_esp_hdr *esph;
48 struct crypto_blkcipher *tfm; 47 struct crypto_blkcipher *tfm;
49 struct blkcipher_desc desc; 48 struct blkcipher_desc desc;
50 struct esp_data *esp;
51 struct sk_buff *trailer; 49 struct sk_buff *trailer;
52 int blksize; 50 int blksize;
53 int clen; 51 int clen;
54 int alen; 52 int alen;
55 int nfrags; 53 int nfrags;
56 54 u8 *tail;
57 esp = x->data; 55 struct esp_data *esp = x->data;
58 hdr_len = skb->h.raw - skb->data + 56 int hdr_len = (skb_transport_offset(skb) +
59 sizeof(*esph) + esp->conf.ivlen; 57 sizeof(*esph) + esp->conf.ivlen);
60 58
61 /* Strip IP+ESP header. */ 59 /* Strip IP+ESP header. */
62 __skb_pull(skb, hdr_len); 60 __skb_pull(skb, hdr_len);
@@ -81,19 +79,20 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
81 } 79 }
82 80
83 /* Fill padding... */ 81 /* Fill padding... */
82 tail = skb_tail_pointer(trailer);
84 do { 83 do {
85 int i; 84 int i;
86 for (i=0; i<clen-skb->len - 2; i++) 85 for (i=0; i<clen-skb->len - 2; i++)
87 *(u8*)(trailer->tail + i) = i+1; 86 tail[i] = i + 1;
88 } while (0); 87 } while (0);
89 *(u8*)(trailer->tail + clen-skb->len - 2) = (clen - skb->len)-2; 88 tail[clen-skb->len - 2] = (clen - skb->len) - 2;
90 pskb_put(skb, trailer, clen - skb->len); 89 pskb_put(skb, trailer, clen - skb->len);
91 90
92 top_iph = (struct ipv6hdr *)__skb_push(skb, hdr_len); 91 top_iph = (struct ipv6hdr *)__skb_push(skb, hdr_len);
93 esph = (struct ipv6_esp_hdr *)skb->h.raw; 92 esph = (struct ipv6_esp_hdr *)skb_transport_header(skb);
94 top_iph->payload_len = htons(skb->len + alen - sizeof(*top_iph)); 93 top_iph->payload_len = htons(skb->len + alen - sizeof(*top_iph));
95 *(u8*)(trailer->tail - 1) = *skb->nh.raw; 94 *(skb_tail_pointer(trailer) - 1) = *skb_network_header(skb);
96 *skb->nh.raw = IPPROTO_ESP; 95 *skb_network_header(skb) = IPPROTO_ESP;
97 96
98 esph->spi = x->id.spi; 97 esph->spi = x->id.spi;
99 esph->seq_no = htonl(++x->replay.oseq); 98 esph->seq_no = htonl(++x->replay.oseq);
@@ -150,8 +149,7 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
150 int blksize = ALIGN(crypto_blkcipher_blocksize(tfm), 4); 149 int blksize = ALIGN(crypto_blkcipher_blocksize(tfm), 4);
151 int alen = esp->auth.icv_trunc_len; 150 int alen = esp->auth.icv_trunc_len;
152 int elen = skb->len - sizeof(struct ipv6_esp_hdr) - esp->conf.ivlen - alen; 151 int elen = skb->len - sizeof(struct ipv6_esp_hdr) - esp->conf.ivlen - alen;
153 152 int hdr_len = skb_network_header_len(skb);
154 int hdr_len = skb->h.raw - skb->nh.raw;
155 int nfrags; 153 int nfrags;
156 int ret = 0; 154 int ret = 0;
157 155
@@ -191,7 +189,7 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
191 skb->ip_summed = CHECKSUM_NONE; 189 skb->ip_summed = CHECKSUM_NONE;
192 190
193 esph = (struct ipv6_esp_hdr*)skb->data; 191 esph = (struct ipv6_esp_hdr*)skb->data;
194 iph = skb->nh.ipv6h; 192 iph = ipv6_hdr(skb);
195 193
196 /* Get ivec. This can be wrong, check against another impls. */ 194 /* Get ivec. This can be wrong, check against another impls. */
197 if (esp->conf.ivlen) 195 if (esp->conf.ivlen)
@@ -231,28 +229,30 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
231 ret = nexthdr[1]; 229 ret = nexthdr[1];
232 } 230 }
233 231
234 skb->h.raw = __skb_pull(skb, sizeof(*esph) + esp->conf.ivlen) - hdr_len; 232 __skb_pull(skb, sizeof(*esph) + esp->conf.ivlen);
235 233 skb_set_transport_header(skb, -hdr_len);
236out: 234out:
237 return ret; 235 return ret;
238} 236}
239 237
240static u32 esp6_get_max_size(struct xfrm_state *x, int mtu) 238static u32 esp6_get_mtu(struct xfrm_state *x, int mtu)
241{ 239{
242 struct esp_data *esp = x->data; 240 struct esp_data *esp = x->data;
243 u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4); 241 u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4);
242 u32 align = max_t(u32, blksize, esp->conf.padlen);
243 u32 rem;
244
245 mtu -= x->props.header_len + esp->auth.icv_trunc_len;
246 rem = mtu & (align - 1);
247 mtu &= ~(align - 1);
244 248
245 if (x->props.mode == XFRM_MODE_TUNNEL) { 249 if (x->props.mode != XFRM_MODE_TUNNEL) {
246 mtu = ALIGN(mtu + 2, blksize);
247 } else {
248 /* The worst case. */
249 u32 padsize = ((blksize - 1) & 7) + 1; 250 u32 padsize = ((blksize - 1) & 7) + 1;
250 mtu = ALIGN(mtu + 2, padsize) + blksize - padsize; 251 mtu -= blksize - padsize;
252 mtu += min_t(u32, blksize - padsize, rem);
251 } 253 }
252 if (esp->conf.padlen)
253 mtu = ALIGN(mtu, esp->conf.padlen);
254 254
255 return mtu + x->props.header_len + esp->auth.icv_trunc_len; 255 return mtu - 2;
256} 256}
257 257
258static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 258static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
@@ -382,7 +382,7 @@ static struct xfrm_type esp6_type =
382 .proto = IPPROTO_ESP, 382 .proto = IPPROTO_ESP,
383 .init_state = esp6_init_state, 383 .init_state = esp6_init_state,
384 .destructor = esp6_destroy, 384 .destructor = esp6_destroy,
385 .get_max_size = esp6_get_max_size, 385 .get_mtu = esp6_get_mtu,
386 .input = esp6_input, 386 .input = esp6_input,
387 .output = esp6_output, 387 .output = esp6_output,
388 .hdr_offset = xfrm6_find_1stfragopt, 388 .hdr_offset = xfrm6_find_1stfragopt,
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index fb39604c3d09..6d8e4ac7bdad 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -50,13 +50,14 @@
50 50
51int ipv6_find_tlv(struct sk_buff *skb, int offset, int type) 51int ipv6_find_tlv(struct sk_buff *skb, int offset, int type)
52{ 52{
53 int packet_len = skb->tail - skb->nh.raw; 53 const unsigned char *nh = skb_network_header(skb);
54 int packet_len = skb->tail - skb->network_header;
54 struct ipv6_opt_hdr *hdr; 55 struct ipv6_opt_hdr *hdr;
55 int len; 56 int len;
56 57
57 if (offset + 2 > packet_len) 58 if (offset + 2 > packet_len)
58 goto bad; 59 goto bad;
59 hdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset); 60 hdr = (struct ipv6_opt_hdr *)(nh + offset);
60 len = ((hdr->hdrlen + 1) << 3); 61 len = ((hdr->hdrlen + 1) << 3);
61 62
62 if (offset + len > packet_len) 63 if (offset + len > packet_len)
@@ -66,7 +67,7 @@ int ipv6_find_tlv(struct sk_buff *skb, int offset, int type)
66 len -= 2; 67 len -= 2;
67 68
68 while (len > 0) { 69 while (len > 0) {
69 int opttype = skb->nh.raw[offset]; 70 int opttype = nh[offset];
70 int optlen; 71 int optlen;
71 72
72 if (opttype == type) 73 if (opttype == type)
@@ -77,7 +78,7 @@ int ipv6_find_tlv(struct sk_buff *skb, int offset, int type)
77 optlen = 1; 78 optlen = 1;
78 break; 79 break;
79 default: 80 default:
80 optlen = skb->nh.raw[offset + 1] + 2; 81 optlen = nh[offset + 1] + 2;
81 if (optlen > len) 82 if (optlen > len)
82 goto bad; 83 goto bad;
83 break; 84 break;
@@ -113,7 +114,7 @@ static int ip6_tlvopt_unknown(struct sk_buff **skbp, int optoff)
113{ 114{
114 struct sk_buff *skb = *skbp; 115 struct sk_buff *skb = *skbp;
115 116
116 switch ((skb->nh.raw[optoff] & 0xC0) >> 6) { 117 switch ((skb_network_header(skb)[optoff] & 0xC0) >> 6) {
117 case 0: /* ignore */ 118 case 0: /* ignore */
118 return 1; 119 return 1;
119 120
@@ -124,12 +125,12 @@ static int ip6_tlvopt_unknown(struct sk_buff **skbp, int optoff)
124 /* Actually, it is redundant check. icmp_send 125 /* Actually, it is redundant check. icmp_send
125 will recheck in any case. 126 will recheck in any case.
126 */ 127 */
127 if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr)) 128 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr))
128 break; 129 break;
129 case 2: /* send ICMP PARM PROB regardless and drop packet */ 130 case 2: /* send ICMP PARM PROB regardless and drop packet */
130 icmpv6_param_prob(skb, ICMPV6_UNK_OPTION, optoff); 131 icmpv6_param_prob(skb, ICMPV6_UNK_OPTION, optoff);
131 return 0; 132 return 0;
132 }; 133 }
133 134
134 kfree_skb(skb); 135 kfree_skb(skb);
135 return 0; 136 return 0;
@@ -141,19 +142,20 @@ static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff **skbp)
141{ 142{
142 struct sk_buff *skb = *skbp; 143 struct sk_buff *skb = *skbp;
143 struct tlvtype_proc *curr; 144 struct tlvtype_proc *curr;
144 int off = skb->h.raw - skb->nh.raw; 145 const unsigned char *nh = skb_network_header(skb);
145 int len = ((skb->h.raw[1]+1)<<3); 146 int off = skb_network_header_len(skb);
147 int len = (skb_transport_header(skb)[1] + 1) << 3;
146 148
147 if ((skb->h.raw + len) - skb->data > skb_headlen(skb)) 149 if (skb_transport_offset(skb) + len > skb_headlen(skb))
148 goto bad; 150 goto bad;
149 151
150 off += 2; 152 off += 2;
151 len -= 2; 153 len -= 2;
152 154
153 while (len > 0) { 155 while (len > 0) {
154 int optlen = skb->nh.raw[off+1]+2; 156 int optlen = nh[off + 1] + 2;
155 157
156 switch (skb->nh.raw[off]) { 158 switch (nh[off]) {
157 case IPV6_TLV_PAD0: 159 case IPV6_TLV_PAD0:
158 optlen = 1; 160 optlen = 1;
159 break; 161 break;
@@ -165,7 +167,7 @@ static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff **skbp)
165 if (optlen > len) 167 if (optlen > len)
166 goto bad; 168 goto bad;
167 for (curr=procs; curr->type >= 0; curr++) { 169 for (curr=procs; curr->type >= 0; curr++) {
168 if (curr->type == skb->nh.raw[off]) { 170 if (curr->type == nh[off]) {
169 /* type specific length/alignment 171 /* type specific length/alignment
170 checks will be performed in the 172 checks will be performed in the
171 func(). */ 173 func(). */
@@ -200,7 +202,7 @@ static int ipv6_dest_hao(struct sk_buff **skbp, int optoff)
200 struct sk_buff *skb = *skbp; 202 struct sk_buff *skb = *skbp;
201 struct ipv6_destopt_hao *hao; 203 struct ipv6_destopt_hao *hao;
202 struct inet6_skb_parm *opt = IP6CB(skb); 204 struct inet6_skb_parm *opt = IP6CB(skb);
203 struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb->nh.raw; 205 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
204 struct in6_addr tmp_addr; 206 struct in6_addr tmp_addr;
205 int ret; 207 int ret;
206 208
@@ -211,7 +213,7 @@ static int ipv6_dest_hao(struct sk_buff **skbp, int optoff)
211 opt->dsthao = opt->dst1; 213 opt->dsthao = opt->dst1;
212 opt->dst1 = 0; 214 opt->dst1 = 0;
213 215
214 hao = (struct ipv6_destopt_hao *)(skb->nh.raw + optoff); 216 hao = (struct ipv6_destopt_hao *)(skb_network_header(skb) + optoff);
215 217
216 if (hao->length != 16) { 218 if (hao->length != 16) {
217 LIMIT_NETDEBUG( 219 LIMIT_NETDEBUG(
@@ -244,8 +246,9 @@ static int ipv6_dest_hao(struct sk_buff **skbp, int optoff)
244 246
245 /* update all variable using below by copied skbuff */ 247 /* update all variable using below by copied skbuff */
246 *skbp = skb = skb2; 248 *skbp = skb = skb2;
247 hao = (struct ipv6_destopt_hao *)(skb2->nh.raw + optoff); 249 hao = (struct ipv6_destopt_hao *)(skb_network_header(skb2) +
248 ipv6h = (struct ipv6hdr *)skb2->nh.raw; 250 optoff);
251 ipv6h = ipv6_hdr(skb2);
249 } 252 }
250 253
251 if (skb->ip_summed == CHECKSUM_COMPLETE) 254 if (skb->ip_summed == CHECKSUM_COMPLETE)
@@ -255,7 +258,7 @@ static int ipv6_dest_hao(struct sk_buff **skbp, int optoff)
255 ipv6_addr_copy(&ipv6h->saddr, &hao->addr); 258 ipv6_addr_copy(&ipv6h->saddr, &hao->addr);
256 ipv6_addr_copy(&hao->addr, &tmp_addr); 259 ipv6_addr_copy(&hao->addr, &tmp_addr);
257 260
258 if (skb->tstamp.off_sec == 0) 261 if (skb->tstamp.tv64 == 0)
259 __net_timestamp(skb); 262 __net_timestamp(skb);
260 263
261 return 1; 264 return 1;
@@ -285,16 +288,16 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp)
285#endif 288#endif
286 struct dst_entry *dst; 289 struct dst_entry *dst;
287 290
288 if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+8) || 291 if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) ||
289 !pskb_may_pull(skb, (skb->h.raw-skb->data)+((skb->h.raw[1]+1)<<3))) { 292 !pskb_may_pull(skb, (skb_transport_offset(skb) +
293 ((skb_transport_header(skb)[1] + 1) << 3)))) {
290 IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), 294 IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
291 IPSTATS_MIB_INHDRERRORS); 295 IPSTATS_MIB_INHDRERRORS);
292 kfree_skb(skb); 296 kfree_skb(skb);
293 return -1; 297 return -1;
294 } 298 }
295 299
296 opt->lastopt = skb->h.raw - skb->nh.raw; 300 opt->lastopt = opt->dst1 = skb_network_header_len(skb);
297 opt->dst1 = skb->h.raw - skb->nh.raw;
298#ifdef CONFIG_IPV6_MIP6 301#ifdef CONFIG_IPV6_MIP6
299 dstbuf = opt->dst1; 302 dstbuf = opt->dst1;
300#endif 303#endif
@@ -303,7 +306,7 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp)
303 if (ip6_parse_tlv(tlvprocdestopt_lst, skbp)) { 306 if (ip6_parse_tlv(tlvprocdestopt_lst, skbp)) {
304 dst_release(dst); 307 dst_release(dst);
305 skb = *skbp; 308 skb = *skbp;
306 skb->h.raw += ((skb->h.raw[1]+1)<<3); 309 skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3;
307 opt = IP6CB(skb); 310 opt = IP6CB(skb);
308#ifdef CONFIG_IPV6_MIP6 311#ifdef CONFIG_IPV6_MIP6
309 opt->nhoff = dstbuf; 312 opt->nhoff = dstbuf;
@@ -384,18 +387,20 @@ static int ipv6_rthdr_rcv(struct sk_buff **skbp)
384 387
385 in6_dev_put(idev); 388 in6_dev_put(idev);
386 389
387 if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+8) || 390 if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) ||
388 !pskb_may_pull(skb, (skb->h.raw-skb->data)+((skb->h.raw[1]+1)<<3))) { 391 !pskb_may_pull(skb, (skb_transport_offset(skb) +
392 ((skb_transport_header(skb)[1] + 1) << 3)))) {
389 IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), 393 IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
390 IPSTATS_MIB_INHDRERRORS); 394 IPSTATS_MIB_INHDRERRORS);
391 kfree_skb(skb); 395 kfree_skb(skb);
392 return -1; 396 return -1;
393 } 397 }
394 398
395 hdr = (struct ipv6_rt_hdr *) skb->h.raw; 399 hdr = (struct ipv6_rt_hdr *)skb_transport_header(skb);
396 400
397 switch (hdr->type) { 401 switch (hdr->type) {
398#ifdef CONFIG_IPV6_MIP6 402#ifdef CONFIG_IPV6_MIP6
403 case IPV6_SRCRT_TYPE_2:
399 break; 404 break;
400#endif 405#endif
401 case IPV6_SRCRT_TYPE_0: 406 case IPV6_SRCRT_TYPE_0:
@@ -406,11 +411,12 @@ static int ipv6_rthdr_rcv(struct sk_buff **skbp)
406 default: 411 default:
407 IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), 412 IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
408 IPSTATS_MIB_INHDRERRORS); 413 IPSTATS_MIB_INHDRERRORS);
409 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->type) - skb->nh.raw); 414 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
415 (&hdr->type) - skb_network_header(skb));
410 return -1; 416 return -1;
411 } 417 }
412 418
413 if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr) || 419 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) ||
414 skb->pkt_type != PACKET_HOST) { 420 skb->pkt_type != PACKET_HOST) {
415 IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), 421 IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
416 IPSTATS_MIB_INADDRERRORS); 422 IPSTATS_MIB_INADDRERRORS);
@@ -438,12 +444,11 @@ looped_back:
438 break; 444 break;
439 } 445 }
440 446
441 opt->lastopt = skb->h.raw - skb->nh.raw; 447 opt->lastopt = opt->srcrt = skb_network_header_len(skb);
442 opt->srcrt = skb->h.raw - skb->nh.raw; 448 skb->transport_header += (hdr->hdrlen + 1) << 3;
443 skb->h.raw += (hdr->hdrlen + 1) << 3;
444 opt->dst0 = opt->dst1; 449 opt->dst0 = opt->dst1;
445 opt->dst1 = 0; 450 opt->dst1 = 0;
446 opt->nhoff = (&hdr->nexthdr) - skb->nh.raw; 451 opt->nhoff = (&hdr->nexthdr) - skb_network_header(skb);
447 return 1; 452 return 1;
448 } 453 }
449 454
@@ -452,7 +457,9 @@ looped_back:
452 if (hdr->hdrlen & 0x01) { 457 if (hdr->hdrlen & 0x01) {
453 IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), 458 IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
454 IPSTATS_MIB_INHDRERRORS); 459 IPSTATS_MIB_INHDRERRORS);
455 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->hdrlen) - skb->nh.raw); 460 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
461 ((&hdr->hdrlen) -
462 skb_network_header(skb)));
456 return -1; 463 return -1;
457 } 464 }
458 break; 465 break;
@@ -479,7 +486,9 @@ looped_back:
479 if (hdr->segments_left > n) { 486 if (hdr->segments_left > n) {
480 IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), 487 IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
481 IPSTATS_MIB_INHDRERRORS); 488 IPSTATS_MIB_INHDRERRORS);
482 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->segments_left) - skb->nh.raw); 489 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
490 ((&hdr->segments_left) -
491 skb_network_header(skb)));
483 return -1; 492 return -1;
484 } 493 }
485 494
@@ -498,7 +507,7 @@ looped_back:
498 kfree_skb(skb); 507 kfree_skb(skb);
499 *skbp = skb = skb2; 508 *skbp = skb = skb2;
500 opt = IP6CB(skb2); 509 opt = IP6CB(skb2);
501 hdr = (struct ipv6_rt_hdr *) skb2->h.raw; 510 hdr = (struct ipv6_rt_hdr *)skb_transport_header(skb2);
502 } 511 }
503 512
504 if (skb->ip_summed == CHECKSUM_COMPLETE) 513 if (skb->ip_summed == CHECKSUM_COMPLETE)
@@ -514,7 +523,7 @@ looped_back:
514#ifdef CONFIG_IPV6_MIP6 523#ifdef CONFIG_IPV6_MIP6
515 case IPV6_SRCRT_TYPE_2: 524 case IPV6_SRCRT_TYPE_2:
516 if (xfrm6_input_addr(skb, (xfrm_address_t *)addr, 525 if (xfrm6_input_addr(skb, (xfrm_address_t *)addr,
517 (xfrm_address_t *)&skb->nh.ipv6h->saddr, 526 (xfrm_address_t *)&ipv6_hdr(skb)->saddr,
518 IPPROTO_ROUTING) < 0) { 527 IPPROTO_ROUTING) < 0) {
519 IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), 528 IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
520 IPSTATS_MIB_INADDRERRORS); 529 IPSTATS_MIB_INADDRERRORS);
@@ -541,19 +550,19 @@ looped_back:
541 } 550 }
542 551
543 ipv6_addr_copy(&daddr, addr); 552 ipv6_addr_copy(&daddr, addr);
544 ipv6_addr_copy(addr, &skb->nh.ipv6h->daddr); 553 ipv6_addr_copy(addr, &ipv6_hdr(skb)->daddr);
545 ipv6_addr_copy(&skb->nh.ipv6h->daddr, &daddr); 554 ipv6_addr_copy(&ipv6_hdr(skb)->daddr, &daddr);
546 555
547 dst_release(xchg(&skb->dst, NULL)); 556 dst_release(xchg(&skb->dst, NULL));
548 ip6_route_input(skb); 557 ip6_route_input(skb);
549 if (skb->dst->error) { 558 if (skb->dst->error) {
550 skb_push(skb, skb->data - skb->nh.raw); 559 skb_push(skb, skb->data - skb_network_header(skb));
551 dst_input(skb); 560 dst_input(skb);
552 return -1; 561 return -1;
553 } 562 }
554 563
555 if (skb->dst->dev->flags&IFF_LOOPBACK) { 564 if (skb->dst->dev->flags&IFF_LOOPBACK) {
556 if (skb->nh.ipv6h->hop_limit <= 1) { 565 if (ipv6_hdr(skb)->hop_limit <= 1) {
557 IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), 566 IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
558 IPSTATS_MIB_INHDRERRORS); 567 IPSTATS_MIB_INHDRERRORS);
559 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 568 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
@@ -561,11 +570,11 @@ looped_back:
561 kfree_skb(skb); 570 kfree_skb(skb);
562 return -1; 571 return -1;
563 } 572 }
564 skb->nh.ipv6h->hop_limit--; 573 ipv6_hdr(skb)->hop_limit--;
565 goto looped_back; 574 goto looped_back;
566 } 575 }
567 576
568 skb_push(skb, skb->data - skb->nh.raw); 577 skb_push(skb, skb->data - skb_network_header(skb));
569 dst_input(skb); 578 dst_input(skb);
570 return -1; 579 return -1;
571} 580}
@@ -656,13 +665,14 @@ EXPORT_SYMBOL_GPL(ipv6_invert_rthdr);
656static int ipv6_hop_ra(struct sk_buff **skbp, int optoff) 665static int ipv6_hop_ra(struct sk_buff **skbp, int optoff)
657{ 666{
658 struct sk_buff *skb = *skbp; 667 struct sk_buff *skb = *skbp;
668 const unsigned char *nh = skb_network_header(skb);
659 669
660 if (skb->nh.raw[optoff+1] == 2) { 670 if (nh[optoff + 1] == 2) {
661 IP6CB(skb)->ra = optoff; 671 IP6CB(skb)->ra = optoff;
662 return 1; 672 return 1;
663 } 673 }
664 LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_ra: wrong RA length %d\n", 674 LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_ra: wrong RA length %d\n",
665 skb->nh.raw[optoff+1]); 675 nh[optoff + 1]);
666 kfree_skb(skb); 676 kfree_skb(skb);
667 return 0; 677 return 0;
668} 678}
@@ -672,23 +682,24 @@ static int ipv6_hop_ra(struct sk_buff **skbp, int optoff)
672static int ipv6_hop_jumbo(struct sk_buff **skbp, int optoff) 682static int ipv6_hop_jumbo(struct sk_buff **skbp, int optoff)
673{ 683{
674 struct sk_buff *skb = *skbp; 684 struct sk_buff *skb = *skbp;
685 const unsigned char *nh = skb_network_header(skb);
675 u32 pkt_len; 686 u32 pkt_len;
676 687
677 if (skb->nh.raw[optoff+1] != 4 || (optoff&3) != 2) { 688 if (nh[optoff + 1] != 4 || (optoff & 3) != 2) {
678 LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n", 689 LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n",
679 skb->nh.raw[optoff+1]); 690 nh[optoff+1]);
680 IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), 691 IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
681 IPSTATS_MIB_INHDRERRORS); 692 IPSTATS_MIB_INHDRERRORS);
682 goto drop; 693 goto drop;
683 } 694 }
684 695
685 pkt_len = ntohl(*(__be32*)(skb->nh.raw+optoff+2)); 696 pkt_len = ntohl(*(__be32 *)(nh + optoff + 2));
686 if (pkt_len <= IPV6_MAXPLEN) { 697 if (pkt_len <= IPV6_MAXPLEN) {
687 IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS); 698 IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS);
688 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff+2); 699 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff+2);
689 return 0; 700 return 0;
690 } 701 }
691 if (skb->nh.ipv6h->payload_len) { 702 if (ipv6_hdr(skb)->payload_len) {
692 IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS); 703 IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS);
693 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff); 704 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff);
694 return 0; 705 return 0;
@@ -727,13 +738,14 @@ int ipv6_parse_hopopts(struct sk_buff **skbp)
727 struct inet6_skb_parm *opt = IP6CB(skb); 738 struct inet6_skb_parm *opt = IP6CB(skb);
728 739
729 /* 740 /*
730 * skb->nh.raw is equal to skb->data, and 741 * skb_network_header(skb) is equal to skb->data, and
731 * skb->h.raw - skb->nh.raw is always equal to 742 * skb_network_header_len(skb) is always equal to
732 * sizeof(struct ipv6hdr) by definition of 743 * sizeof(struct ipv6hdr) by definition of
733 * hop-by-hop options. 744 * hop-by-hop options.
734 */ 745 */
735 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + 8) || 746 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + 8) ||
736 !pskb_may_pull(skb, sizeof(struct ipv6hdr) + ((skb->h.raw[1] + 1) << 3))) { 747 !pskb_may_pull(skb, (sizeof(struct ipv6hdr) +
748 ((skb_transport_header(skb)[1] + 1) << 3)))) {
737 kfree_skb(skb); 749 kfree_skb(skb);
738 return -1; 750 return -1;
739 } 751 }
@@ -741,7 +753,7 @@ int ipv6_parse_hopopts(struct sk_buff **skbp)
741 opt->hop = sizeof(struct ipv6hdr); 753 opt->hop = sizeof(struct ipv6hdr);
742 if (ip6_parse_tlv(tlvprochopopt_lst, skbp)) { 754 if (ip6_parse_tlv(tlvprochopopt_lst, skbp)) {
743 skb = *skbp; 755 skb = *skbp;
744 skb->h.raw += (skb->h.raw[1]+1)<<3; 756 skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3;
745 opt = IP6CB(skb); 757 opt = IP6CB(skb);
746 opt->nhoff = sizeof(struct ipv6hdr); 758 opt->nhoff = sizeof(struct ipv6hdr);
747 return 1; 759 return 1;
@@ -810,6 +822,8 @@ void ipv6_push_nfrag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt,
810 ipv6_push_exthdr(skb, proto, NEXTHDR_HOP, opt->hopopt); 822 ipv6_push_exthdr(skb, proto, NEXTHDR_HOP, opt->hopopt);
811} 823}
812 824
825EXPORT_SYMBOL(ipv6_push_nfrag_opts);
826
813void ipv6_push_frag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, u8 *proto) 827void ipv6_push_frag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, u8 *proto)
814{ 828{
815 if (opt->dst1opt) 829 if (opt->dst1opt)
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index ea3035b4e3e8..fc3882c90604 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -17,6 +17,7 @@
17 17
18#include <net/fib_rules.h> 18#include <net/fib_rules.h>
19#include <net/ipv6.h> 19#include <net/ipv6.h>
20#include <net/addrconf.h>
20#include <net/ip6_route.h> 21#include <net/ip6_route.h>
21#include <net/netlink.h> 22#include <net/netlink.h>
22 23
@@ -95,8 +96,27 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
95 if (table) 96 if (table)
96 rt = lookup(table, flp, flags); 97 rt = lookup(table, flp, flags);
97 98
98 if (rt != &ip6_null_entry) 99 if (rt != &ip6_null_entry) {
100 struct fib6_rule *r = (struct fib6_rule *)rule;
101
102 /*
103 * If we need to find a source address for this traffic,
104 * we check the result if it meets requirement of the rule.
105 */
106 if ((rule->flags & FIB_RULE_FIND_SADDR) &&
107 r->src.plen && !(flags & RT6_LOOKUP_F_HAS_SADDR)) {
108 struct in6_addr saddr;
109 if (ipv6_get_saddr(&rt->u.dst, &flp->fl6_dst,
110 &saddr))
111 goto again;
112 if (!ipv6_prefix_equal(&saddr, &r->src.addr,
113 r->src.plen))
114 goto again;
115 ipv6_addr_copy(&flp->fl6_src, &saddr);
116 }
99 goto out; 117 goto out;
118 }
119again:
100 dst_release(&rt->u.dst); 120 dst_release(&rt->u.dst);
101 rt = NULL; 121 rt = NULL;
102 goto out; 122 goto out;
@@ -117,9 +137,17 @@ static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
117 !ipv6_prefix_equal(&fl->fl6_dst, &r->dst.addr, r->dst.plen)) 137 !ipv6_prefix_equal(&fl->fl6_dst, &r->dst.addr, r->dst.plen))
118 return 0; 138 return 0;
119 139
140 /*
141 * If FIB_RULE_FIND_SADDR is set and we do not have a
142 * source address for the traffic, we defer check for
143 * source address.
144 */
120 if (r->src.plen) { 145 if (r->src.plen) {
121 if (!(flags & RT6_LOOKUP_F_HAS_SADDR) || 146 if (flags & RT6_LOOKUP_F_HAS_SADDR) {
122 !ipv6_prefix_equal(&fl->fl6_src, &r->src.addr, r->src.plen)) 147 if (!ipv6_prefix_equal(&fl->fl6_src, &r->src.addr,
148 r->src.plen))
149 return 0;
150 } else if (!(r->common.flags & FIB_RULE_FIND_SADDR))
123 return 0; 151 return 0;
124 } 152 }
125 153
@@ -216,11 +244,6 @@ nla_put_failure:
216 return -ENOBUFS; 244 return -ENOBUFS;
217} 245}
218 246
219int fib6_rules_dump(struct sk_buff *skb, struct netlink_callback *cb)
220{
221 return fib_rules_dump(skb, cb, AF_INET6);
222}
223
224static u32 fib6_rule_default_pref(void) 247static u32 fib6_rule_default_pref(void)
225{ 248{
226 return 0x3FFF; 249 return 0x3FFF;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index edfe98bf64c3..e9bcce9e7bdf 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -68,6 +68,7 @@
68#include <asm/system.h> 68#include <asm/system.h>
69 69
70DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics) __read_mostly; 70DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics) __read_mostly;
71EXPORT_SYMBOL(icmpv6_statistics);
71 72
72/* 73/*
73 * The ICMP socket(s). This is the most convenient way to flow control 74 * The ICMP socket(s). This is the most convenient way to flow control
@@ -128,9 +129,9 @@ void icmpv6_param_prob(struct sk_buff *skb, int code, int pos)
128 129
129static int is_ineligible(struct sk_buff *skb) 130static int is_ineligible(struct sk_buff *skb)
130{ 131{
131 int ptr = (u8*)(skb->nh.ipv6h+1) - skb->data; 132 int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
132 int len = skb->len - ptr; 133 int len = skb->len - ptr;
133 __u8 nexthdr = skb->nh.ipv6h->nexthdr; 134 __u8 nexthdr = ipv6_hdr(skb)->nexthdr;
134 135
135 if (len < 0) 136 if (len < 0)
136 return 1; 137 return 1;
@@ -205,7 +206,7 @@ static __inline__ int opt_unrec(struct sk_buff *skb, __u32 offset)
205{ 206{
206 u8 _optval, *op; 207 u8 _optval, *op;
207 208
208 offset += skb->nh.raw - skb->data; 209 offset += skb_network_offset(skb);
209 op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval); 210 op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
210 if (op == NULL) 211 if (op == NULL)
211 return 1; 212 return 1;
@@ -221,7 +222,7 @@ static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct
221 if ((skb = skb_peek(&sk->sk_write_queue)) == NULL) 222 if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
222 goto out; 223 goto out;
223 224
224 icmp6h = (struct icmp6hdr*) skb->h.raw; 225 icmp6h = icmp6_hdr(skb);
225 memcpy(icmp6h, thdr, sizeof(struct icmp6hdr)); 226 memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
226 icmp6h->icmp6_cksum = 0; 227 icmp6h->icmp6_cksum = 0;
227 228
@@ -274,7 +275,7 @@ static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, st
274#ifdef CONFIG_IPV6_MIP6 275#ifdef CONFIG_IPV6_MIP6
275static void mip6_addr_swap(struct sk_buff *skb) 276static void mip6_addr_swap(struct sk_buff *skb)
276{ 277{
277 struct ipv6hdr *iph = skb->nh.ipv6h; 278 struct ipv6hdr *iph = ipv6_hdr(skb);
278 struct inet6_skb_parm *opt = IP6CB(skb); 279 struct inet6_skb_parm *opt = IP6CB(skb);
279 struct ipv6_destopt_hao *hao; 280 struct ipv6_destopt_hao *hao;
280 struct in6_addr tmp; 281 struct in6_addr tmp;
@@ -283,7 +284,8 @@ static void mip6_addr_swap(struct sk_buff *skb)
283 if (opt->dsthao) { 284 if (opt->dsthao) {
284 off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO); 285 off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
285 if (likely(off >= 0)) { 286 if (likely(off >= 0)) {
286 hao = (struct ipv6_destopt_hao *)(skb->nh.raw + off); 287 hao = (struct ipv6_destopt_hao *)
288 (skb_network_header(skb) + off);
287 ipv6_addr_copy(&tmp, &iph->saddr); 289 ipv6_addr_copy(&tmp, &iph->saddr);
288 ipv6_addr_copy(&iph->saddr, &hao->addr); 290 ipv6_addr_copy(&iph->saddr, &hao->addr);
289 ipv6_addr_copy(&hao->addr, &tmp); 291 ipv6_addr_copy(&hao->addr, &tmp);
@@ -301,7 +303,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
301 struct net_device *dev) 303 struct net_device *dev)
302{ 304{
303 struct inet6_dev *idev = NULL; 305 struct inet6_dev *idev = NULL;
304 struct ipv6hdr *hdr = skb->nh.ipv6h; 306 struct ipv6hdr *hdr = ipv6_hdr(skb);
305 struct sock *sk; 307 struct sock *sk;
306 struct ipv6_pinfo *np; 308 struct ipv6_pinfo *np;
307 struct in6_addr *saddr = NULL; 309 struct in6_addr *saddr = NULL;
@@ -315,7 +317,8 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
315 int hlimit, tclass; 317 int hlimit, tclass;
316 int err = 0; 318 int err = 0;
317 319
318 if ((u8*)hdr < skb->head || (u8*)(hdr+1) > skb->tail) 320 if ((u8 *)hdr < skb->head ||
321 (skb->network_header + sizeof(*hdr)) > skb->tail)
319 return; 322 return;
320 323
321 /* 324 /*
@@ -430,7 +433,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
430 tclass = 0; 433 tclass = 0;
431 434
432 msg.skb = skb; 435 msg.skb = skb;
433 msg.offset = skb->nh.raw - skb->data; 436 msg.offset = skb_network_offset(skb);
434 msg.type = type; 437 msg.type = type;
435 438
436 len = skb->len - msg.offset; 439 len = skb->len - msg.offset;
@@ -466,13 +469,15 @@ out:
466 icmpv6_xmit_unlock(); 469 icmpv6_xmit_unlock();
467} 470}
468 471
472EXPORT_SYMBOL(icmpv6_send);
473
469static void icmpv6_echo_reply(struct sk_buff *skb) 474static void icmpv6_echo_reply(struct sk_buff *skb)
470{ 475{
471 struct sock *sk; 476 struct sock *sk;
472 struct inet6_dev *idev; 477 struct inet6_dev *idev;
473 struct ipv6_pinfo *np; 478 struct ipv6_pinfo *np;
474 struct in6_addr *saddr = NULL; 479 struct in6_addr *saddr = NULL;
475 struct icmp6hdr *icmph = (struct icmp6hdr *) skb->h.raw; 480 struct icmp6hdr *icmph = icmp6_hdr(skb);
476 struct icmp6hdr tmp_hdr; 481 struct icmp6hdr tmp_hdr;
477 struct flowi fl; 482 struct flowi fl;
478 struct icmpv6_msg msg; 483 struct icmpv6_msg msg;
@@ -481,7 +486,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
481 int hlimit; 486 int hlimit;
482 int tclass; 487 int tclass;
483 488
484 saddr = &skb->nh.ipv6h->daddr; 489 saddr = &ipv6_hdr(skb)->daddr;
485 490
486 if (!ipv6_unicast_destination(skb)) 491 if (!ipv6_unicast_destination(skb))
487 saddr = NULL; 492 saddr = NULL;
@@ -491,7 +496,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
491 496
492 memset(&fl, 0, sizeof(fl)); 497 memset(&fl, 0, sizeof(fl));
493 fl.proto = IPPROTO_ICMPV6; 498 fl.proto = IPPROTO_ICMPV6;
494 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr); 499 ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr);
495 if (saddr) 500 if (saddr)
496 ipv6_addr_copy(&fl.fl6_src, saddr); 501 ipv6_addr_copy(&fl.fl6_src, saddr);
497 fl.oif = skb->dev->ifindex; 502 fl.oif = skb->dev->ifindex;
@@ -579,8 +584,8 @@ static void icmpv6_notify(struct sk_buff *skb, int type, int code, __be32 info)
579 if (!pskb_may_pull(skb, inner_offset+8)) 584 if (!pskb_may_pull(skb, inner_offset+8))
580 return; 585 return;
581 586
582 saddr = &skb->nh.ipv6h->saddr; 587 saddr = &ipv6_hdr(skb)->saddr;
583 daddr = &skb->nh.ipv6h->daddr; 588 daddr = &ipv6_hdr(skb)->daddr;
584 589
585 /* BUGGG_FUTURE: we should try to parse exthdrs in this packet. 590 /* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
586 Without this we will not able f.e. to make source routed 591 Without this we will not able f.e. to make source routed
@@ -624,8 +629,8 @@ static int icmpv6_rcv(struct sk_buff **pskb)
624 629
625 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INMSGS); 630 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INMSGS);
626 631
627 saddr = &skb->nh.ipv6h->saddr; 632 saddr = &ipv6_hdr(skb)->saddr;
628 daddr = &skb->nh.ipv6h->daddr; 633 daddr = &ipv6_hdr(skb)->daddr;
629 634
630 /* Perform checksum. */ 635 /* Perform checksum. */
631 switch (skb->ip_summed) { 636 switch (skb->ip_summed) {
@@ -647,7 +652,7 @@ static int icmpv6_rcv(struct sk_buff **pskb)
647 if (!pskb_pull(skb, sizeof(struct icmp6hdr))) 652 if (!pskb_pull(skb, sizeof(struct icmp6hdr)))
648 goto discard_it; 653 goto discard_it;
649 654
650 hdr = (struct icmp6hdr *) skb->h.raw; 655 hdr = icmp6_hdr(skb);
651 656
652 type = hdr->icmp6_type; 657 type = hdr->icmp6_type;
653 658
@@ -673,7 +678,7 @@ static int icmpv6_rcv(struct sk_buff **pskb)
673 */ 678 */
674 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) 679 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
675 goto discard_it; 680 goto discard_it;
676 hdr = (struct icmp6hdr *) skb->h.raw; 681 hdr = icmp6_hdr(skb);
677 orig_hdr = (struct ipv6hdr *) (hdr + 1); 682 orig_hdr = (struct ipv6hdr *) (hdr + 1);
678 rt6_pmtu_discovery(&orig_hdr->daddr, &orig_hdr->saddr, dev, 683 rt6_pmtu_discovery(&orig_hdr->daddr, &orig_hdr->saddr, dev,
679 ntohl(hdr->icmp6_mtu)); 684 ntohl(hdr->icmp6_mtu));
@@ -727,7 +732,8 @@ static int icmpv6_rcv(struct sk_buff **pskb)
727 */ 732 */
728 733
729 icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu); 734 icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
730 }; 735 }
736
731 kfree_skb(skb); 737 kfree_skb(skb);
732 return 0; 738 return 0;
733 739
@@ -860,11 +866,13 @@ int icmpv6_err_convert(int type, int code, int *err)
860 case ICMPV6_TIME_EXCEED: 866 case ICMPV6_TIME_EXCEED:
861 *err = EHOSTUNREACH; 867 *err = EHOSTUNREACH;
862 break; 868 break;
863 }; 869 }
864 870
865 return fatal; 871 return fatal;
866} 872}
867 873
874EXPORT_SYMBOL(icmpv6_err_convert);
875
868#ifdef CONFIG_SYSCTL 876#ifdef CONFIG_SYSCTL
869ctl_table ipv6_icmp_table[] = { 877ctl_table ipv6_icmp_table[] = {
870 { 878 {
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 268f476ef3db..ca08ee88d07f 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -359,7 +359,7 @@ end:
359 return res; 359 return res;
360} 360}
361 361
362int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) 362static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
363{ 363{
364 unsigned int h, s_h; 364 unsigned int h, s_h;
365 unsigned int e = 0, s_e; 365 unsigned int e = 0, s_e;
@@ -1486,6 +1486,8 @@ void __init fib6_init(void)
1486 NULL, NULL); 1486 NULL, NULL);
1487 1487
1488 fib6_tables_init(); 1488 fib6_tables_init();
1489
1490 __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib);
1489} 1491}
1490 1492
1491void fib6_gc_cleanup(void) 1493void fib6_gc_cleanup(void)
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 61e7a6c8141d..be0ee8a34f9b 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -96,12 +96,12 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
96 if (unlikely(!pskb_may_pull(skb, sizeof(*hdr)))) 96 if (unlikely(!pskb_may_pull(skb, sizeof(*hdr))))
97 goto err; 97 goto err;
98 98
99 hdr = skb->nh.ipv6h; 99 hdr = ipv6_hdr(skb);
100 100
101 if (hdr->version != 6) 101 if (hdr->version != 6)
102 goto err; 102 goto err;
103 103
104 skb->h.raw = (u8 *)(hdr + 1); 104 skb->transport_header = skb->network_header + sizeof(*hdr);
105 IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr); 105 IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
106 106
107 pkt_len = ntohs(hdr->payload_len); 107 pkt_len = ntohs(hdr->payload_len);
@@ -116,7 +116,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
116 IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS); 116 IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS);
117 goto drop; 117 goto drop;
118 } 118 }
119 hdr = skb->nh.ipv6h; 119 hdr = ipv6_hdr(skb);
120 } 120 }
121 121
122 if (hdr->nexthdr == NEXTHDR_HOP) { 122 if (hdr->nexthdr == NEXTHDR_HOP) {
@@ -160,10 +160,10 @@ static inline int ip6_input_finish(struct sk_buff *skb)
160 rcu_read_lock(); 160 rcu_read_lock();
161resubmit: 161resubmit:
162 idev = ip6_dst_idev(skb->dst); 162 idev = ip6_dst_idev(skb->dst);
163 if (!pskb_pull(skb, skb->h.raw - skb->data)) 163 if (!pskb_pull(skb, skb_transport_offset(skb)))
164 goto discard; 164 goto discard;
165 nhoff = IP6CB(skb)->nhoff; 165 nhoff = IP6CB(skb)->nhoff;
166 nexthdr = skb->nh.raw[nhoff]; 166 nexthdr = skb_network_header(skb)[nhoff];
167 167
168 raw_sk = sk_head(&raw_v6_htable[nexthdr & (MAX_INET_PROTOS - 1)]); 168 raw_sk = sk_head(&raw_v6_htable[nexthdr & (MAX_INET_PROTOS - 1)]);
169 if (raw_sk && !ipv6_raw_deliver(skb, nexthdr)) 169 if (raw_sk && !ipv6_raw_deliver(skb, nexthdr))
@@ -181,9 +181,9 @@ resubmit:
181 indefinitely. */ 181 indefinitely. */
182 nf_reset(skb); 182 nf_reset(skb);
183 183
184 skb_postpull_rcsum(skb, skb->nh.raw, 184 skb_postpull_rcsum(skb, skb_network_header(skb),
185 skb->h.raw - skb->nh.raw); 185 skb_network_header_len(skb));
186 hdr = skb->nh.ipv6h; 186 hdr = ipv6_hdr(skb);
187 if (ipv6_addr_is_multicast(&hdr->daddr) && 187 if (ipv6_addr_is_multicast(&hdr->daddr) &&
188 !ipv6_chk_mcast_addr(skb->dev, &hdr->daddr, 188 !ipv6_chk_mcast_addr(skb->dev, &hdr->daddr,
189 &hdr->saddr) && 189 &hdr->saddr) &&
@@ -234,7 +234,7 @@ int ip6_mc_input(struct sk_buff *skb)
234 234
235 IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INMCASTPKTS); 235 IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INMCASTPKTS);
236 236
237 hdr = skb->nh.ipv6h; 237 hdr = ipv6_hdr(skb);
238 deliver = likely(!(skb->dev->flags & (IFF_PROMISC|IFF_ALLMULTI))) || 238 deliver = likely(!(skb->dev->flags & (IFF_PROMISC|IFF_ALLMULTI))) ||
239 ipv6_chk_mcast_addr(skb->dev, &hdr->daddr, NULL); 239 ipv6_chk_mcast_addr(skb->dev, &hdr->daddr, NULL);
240 240
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 305516921aa8..f508171bab73 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -88,8 +88,8 @@ static inline int ip6_output_finish(struct sk_buff *skb)
88/* dev_loopback_xmit for use with netfilter. */ 88/* dev_loopback_xmit for use with netfilter. */
89static int ip6_dev_loopback_xmit(struct sk_buff *newskb) 89static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
90{ 90{
91 newskb->mac.raw = newskb->data; 91 skb_reset_mac_header(newskb);
92 __skb_pull(newskb, newskb->nh.raw - newskb->data); 92 __skb_pull(newskb, skb_network_offset(newskb));
93 newskb->pkt_type = PACKET_LOOPBACK; 93 newskb->pkt_type = PACKET_LOOPBACK;
94 newskb->ip_summed = CHECKSUM_UNNECESSARY; 94 newskb->ip_summed = CHECKSUM_UNNECESSARY;
95 BUG_TRAP(newskb->dst); 95 BUG_TRAP(newskb->dst);
@@ -107,13 +107,13 @@ static int ip6_output2(struct sk_buff *skb)
107 skb->protocol = htons(ETH_P_IPV6); 107 skb->protocol = htons(ETH_P_IPV6);
108 skb->dev = dev; 108 skb->dev = dev;
109 109
110 if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr)) { 110 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
111 struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL; 111 struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
112 struct inet6_dev *idev = ip6_dst_idev(skb->dst); 112 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
113 113
114 if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) && 114 if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
115 ipv6_chk_mcast_addr(dev, &skb->nh.ipv6h->daddr, 115 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
116 &skb->nh.ipv6h->saddr)) { 116 &ipv6_hdr(skb)->saddr)) {
117 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); 117 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
118 118
119 /* Do not check for IFF_ALLMULTI; multicast routing 119 /* Do not check for IFF_ALLMULTI; multicast routing
@@ -124,7 +124,7 @@ static int ip6_output2(struct sk_buff *skb)
124 newskb->dev, 124 newskb->dev,
125 ip6_dev_loopback_xmit); 125 ip6_dev_loopback_xmit);
126 126
127 if (skb->nh.ipv6h->hop_limit == 0) { 127 if (ipv6_hdr(skb)->hop_limit == 0) {
128 IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS); 128 IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS);
129 kfree_skb(skb); 129 kfree_skb(skb);
130 return 0; 130 return 0;
@@ -137,9 +137,17 @@ static int ip6_output2(struct sk_buff *skb)
137 return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish); 137 return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish);
138} 138}
139 139
140static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
141{
142 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
143
144 return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
145 skb->dst->dev->mtu : dst_mtu(skb->dst);
146}
147
140int ip6_output(struct sk_buff *skb) 148int ip6_output(struct sk_buff *skb)
141{ 149{
142 if ((skb->len > dst_mtu(skb->dst) && !skb_is_gso(skb)) || 150 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
143 dst_allfrag(skb->dst)) 151 dst_allfrag(skb->dst))
144 return ip6_fragment(skb, ip6_output2); 152 return ip6_fragment(skb, ip6_output2);
145 else 153 else
@@ -191,7 +199,9 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
191 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop); 199 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
192 } 200 }
193 201
194 hdr = skb->nh.ipv6h = (struct ipv6hdr*)skb_push(skb, sizeof(struct ipv6hdr)); 202 skb_push(skb, sizeof(struct ipv6hdr));
203 skb_reset_network_header(skb);
204 hdr = ipv6_hdr(skb);
195 205
196 /* 206 /*
197 * Fill in the IPv6 header 207 * Fill in the IPv6 header
@@ -239,6 +249,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
239 return -EMSGSIZE; 249 return -EMSGSIZE;
240} 250}
241 251
252EXPORT_SYMBOL(ip6_xmit);
253
242/* 254/*
243 * To avoid extra problems ND packets are send through this 255 * To avoid extra problems ND packets are send through this
244 * routine. It's code duplication but I really want to avoid 256 * routine. It's code duplication but I really want to avoid
@@ -259,8 +271,9 @@ int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
259 271
260 totlen = len + sizeof(struct ipv6hdr); 272 totlen = len + sizeof(struct ipv6hdr);
261 273
262 hdr = (struct ipv6hdr *) skb_put(skb, sizeof(struct ipv6hdr)); 274 skb_reset_network_header(skb);
263 skb->nh.ipv6h = hdr; 275 skb_put(skb, sizeof(struct ipv6hdr));
276 hdr = ipv6_hdr(skb);
264 277
265 *(__be32*)hdr = htonl(0x60000000); 278 *(__be32*)hdr = htonl(0x60000000);
266 279
@@ -305,7 +318,7 @@ static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
305 318
306static int ip6_forward_proxy_check(struct sk_buff *skb) 319static int ip6_forward_proxy_check(struct sk_buff *skb)
307{ 320{
308 struct ipv6hdr *hdr = skb->nh.ipv6h; 321 struct ipv6hdr *hdr = ipv6_hdr(skb);
309 u8 nexthdr = hdr->nexthdr; 322 u8 nexthdr = hdr->nexthdr;
310 int offset; 323 int offset;
311 324
@@ -319,10 +332,11 @@ static int ip6_forward_proxy_check(struct sk_buff *skb)
319 if (nexthdr == IPPROTO_ICMPV6) { 332 if (nexthdr == IPPROTO_ICMPV6) {
320 struct icmp6hdr *icmp6; 333 struct icmp6hdr *icmp6;
321 334
322 if (!pskb_may_pull(skb, skb->nh.raw + offset + 1 - skb->data)) 335 if (!pskb_may_pull(skb, (skb_network_header(skb) +
336 offset + 1 - skb->data)))
323 return 0; 337 return 0;
324 338
325 icmp6 = (struct icmp6hdr *)(skb->nh.raw + offset); 339 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
326 340
327 switch (icmp6->icmp6_type) { 341 switch (icmp6->icmp6_type) {
328 case NDISC_ROUTER_SOLICITATION: 342 case NDISC_ROUTER_SOLICITATION:
@@ -361,7 +375,7 @@ static inline int ip6_forward_finish(struct sk_buff *skb)
361int ip6_forward(struct sk_buff *skb) 375int ip6_forward(struct sk_buff *skb)
362{ 376{
363 struct dst_entry *dst = skb->dst; 377 struct dst_entry *dst = skb->dst;
364 struct ipv6hdr *hdr = skb->nh.ipv6h; 378 struct ipv6hdr *hdr = ipv6_hdr(skb);
365 struct inet6_skb_parm *opt = IP6CB(skb); 379 struct inet6_skb_parm *opt = IP6CB(skb);
366 380
367 if (ipv6_devconf.forwarding == 0) 381 if (ipv6_devconf.forwarding == 0)
@@ -372,7 +386,7 @@ int ip6_forward(struct sk_buff *skb)
372 goto drop; 386 goto drop;
373 } 387 }
374 388
375 skb->ip_summed = CHECKSUM_NONE; 389 skb_forward_csum(skb);
376 390
377 /* 391 /*
378 * We DO NOT make any processing on 392 * We DO NOT make any processing on
@@ -388,7 +402,7 @@ int ip6_forward(struct sk_buff *skb)
388 * that different fragments will go along one path. --ANK 402 * that different fragments will go along one path. --ANK
389 */ 403 */
390 if (opt->ra) { 404 if (opt->ra) {
391 u8 *ptr = skb->nh.raw + opt->ra; 405 u8 *ptr = skb_network_header(skb) + opt->ra;
392 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3])) 406 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
393 return 0; 407 return 0;
394 } 408 }
@@ -470,7 +484,7 @@ int ip6_forward(struct sk_buff *skb)
470 goto drop; 484 goto drop;
471 } 485 }
472 486
473 hdr = skb->nh.ipv6h; 487 hdr = ipv6_hdr(skb);
474 488
475 /* Mangling hops number delayed to point after skb COW */ 489 /* Mangling hops number delayed to point after skb COW */
476 490
@@ -499,33 +513,18 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
499#ifdef CONFIG_NET_SCHED 513#ifdef CONFIG_NET_SCHED
500 to->tc_index = from->tc_index; 514 to->tc_index = from->tc_index;
501#endif 515#endif
502#ifdef CONFIG_NETFILTER 516 nf_copy(to, from);
503 /* Connection association is same as pre-frag packet */
504 nf_conntrack_put(to->nfct);
505 to->nfct = from->nfct;
506 nf_conntrack_get(to->nfct);
507 to->nfctinfo = from->nfctinfo;
508#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
509 nf_conntrack_put_reasm(to->nfct_reasm);
510 to->nfct_reasm = from->nfct_reasm;
511 nf_conntrack_get_reasm(to->nfct_reasm);
512#endif
513#ifdef CONFIG_BRIDGE_NETFILTER
514 nf_bridge_put(to->nf_bridge);
515 to->nf_bridge = from->nf_bridge;
516 nf_bridge_get(to->nf_bridge);
517#endif
518#endif
519 skb_copy_secmark(to, from); 517 skb_copy_secmark(to, from);
520} 518}
521 519
522int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) 520int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
523{ 521{
524 u16 offset = sizeof(struct ipv6hdr); 522 u16 offset = sizeof(struct ipv6hdr);
525 struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1); 523 struct ipv6_opt_hdr *exthdr =
526 unsigned int packet_len = skb->tail - skb->nh.raw; 524 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
525 unsigned int packet_len = skb->tail - skb->network_header;
527 int found_rhdr = 0; 526 int found_rhdr = 0;
528 *nexthdr = &skb->nh.ipv6h->nexthdr; 527 *nexthdr = &ipv6_hdr(skb)->nexthdr;
529 528
530 while (offset + 1 <= packet_len) { 529 while (offset + 1 <= packet_len) {
531 530
@@ -550,7 +549,8 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
550 549
551 offset += ipv6_optlen(exthdr); 550 offset += ipv6_optlen(exthdr);
552 *nexthdr = &exthdr->nexthdr; 551 *nexthdr = &exthdr->nexthdr;
553 exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset); 552 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
553 offset);
554 } 554 }
555 555
556 return offset; 556 return offset;
@@ -574,7 +574,20 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
574 hlen = ip6_find_1stfragopt(skb, &prevhdr); 574 hlen = ip6_find_1stfragopt(skb, &prevhdr);
575 nexthdr = *prevhdr; 575 nexthdr = *prevhdr;
576 576
577 mtu = dst_mtu(&rt->u.dst); 577 mtu = ip6_skb_dst_mtu(skb);
578
579 /* We must not fragment if the socket is set to force MTU discovery
580 * or if the skb it not generated by a local socket. (This last
581 * check should be redundant, but it's free.)
582 */
583 if (!np || np->pmtudisc >= IPV6_PMTUDISC_DO) {
584 skb->dev = skb->dst->dev;
585 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
586 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
587 kfree_skb(skb);
588 return -EMSGSIZE;
589 }
590
578 if (np && np->frag_size < mtu) { 591 if (np && np->frag_size < mtu) {
579 if (np->frag_size) 592 if (np->frag_size)
580 mtu = np->frag_size; 593 mtu = np->frag_size;
@@ -616,7 +629,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
616 /* BUILD HEADER */ 629 /* BUILD HEADER */
617 630
618 *prevhdr = NEXTHDR_FRAGMENT; 631 *prevhdr = NEXTHDR_FRAGMENT;
619 tmp_hdr = kmemdup(skb->nh.raw, hlen, GFP_ATOMIC); 632 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
620 if (!tmp_hdr) { 633 if (!tmp_hdr) {
621 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS); 634 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
622 return -ENOMEM; 635 return -ENOMEM;
@@ -624,8 +637,9 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
624 637
625 __skb_pull(skb, hlen); 638 __skb_pull(skb, hlen);
626 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr)); 639 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
627 skb->nh.raw = __skb_push(skb, hlen); 640 __skb_push(skb, hlen);
628 memcpy(skb->nh.raw, tmp_hdr, hlen); 641 skb_reset_network_header(skb);
642 memcpy(skb_network_header(skb), tmp_hdr, hlen);
629 643
630 ipv6_select_ident(skb, fh); 644 ipv6_select_ident(skb, fh);
631 fh->nexthdr = nexthdr; 645 fh->nexthdr = nexthdr;
@@ -636,7 +650,8 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
636 first_len = skb_pagelen(skb); 650 first_len = skb_pagelen(skb);
637 skb->data_len = first_len - skb_headlen(skb); 651 skb->data_len = first_len - skb_headlen(skb);
638 skb->len = first_len; 652 skb->len = first_len;
639 skb->nh.ipv6h->payload_len = htons(first_len - sizeof(struct ipv6hdr)); 653 ipv6_hdr(skb)->payload_len = htons(first_len -
654 sizeof(struct ipv6hdr));
640 655
641 dst_hold(&rt->u.dst); 656 dst_hold(&rt->u.dst);
642 657
@@ -645,10 +660,12 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
645 * before previous one went down. */ 660 * before previous one went down. */
646 if (frag) { 661 if (frag) {
647 frag->ip_summed = CHECKSUM_NONE; 662 frag->ip_summed = CHECKSUM_NONE;
648 frag->h.raw = frag->data; 663 skb_reset_transport_header(frag);
649 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr)); 664 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
650 frag->nh.raw = __skb_push(frag, hlen); 665 __skb_push(frag, hlen);
651 memcpy(frag->nh.raw, tmp_hdr, hlen); 666 skb_reset_network_header(frag);
667 memcpy(skb_network_header(frag), tmp_hdr,
668 hlen);
652 offset += skb->len - hlen - sizeof(struct frag_hdr); 669 offset += skb->len - hlen - sizeof(struct frag_hdr);
653 fh->nexthdr = nexthdr; 670 fh->nexthdr = nexthdr;
654 fh->reserved = 0; 671 fh->reserved = 0;
@@ -656,7 +673,9 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
656 if (frag->next != NULL) 673 if (frag->next != NULL)
657 fh->frag_off |= htons(IP6_MF); 674 fh->frag_off |= htons(IP6_MF);
658 fh->identification = frag_id; 675 fh->identification = frag_id;
659 frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr)); 676 ipv6_hdr(frag)->payload_len =
677 htons(frag->len -
678 sizeof(struct ipv6hdr));
660 ip6_copy_metadata(frag, skb); 679 ip6_copy_metadata(frag, skb);
661 } 680 }
662 681
@@ -733,9 +752,10 @@ slow_path:
733 ip6_copy_metadata(frag, skb); 752 ip6_copy_metadata(frag, skb);
734 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev)); 753 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
735 skb_put(frag, len + hlen + sizeof(struct frag_hdr)); 754 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
736 frag->nh.raw = frag->data; 755 skb_reset_network_header(frag);
737 fh = (struct frag_hdr*)(frag->data + hlen); 756 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
738 frag->h.raw = frag->data + hlen + sizeof(struct frag_hdr); 757 frag->transport_header = (frag->network_header + hlen +
758 sizeof(struct frag_hdr));
739 759
740 /* 760 /*
741 * Charge the memory for the fragment to any owner 761 * Charge the memory for the fragment to any owner
@@ -747,7 +767,7 @@ slow_path:
747 /* 767 /*
748 * Copy the packet header into the new buffer. 768 * Copy the packet header into the new buffer.
749 */ 769 */
750 memcpy(frag->nh.raw, skb->data, hlen); 770 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
751 771
752 /* 772 /*
753 * Build fragment header. 773 * Build fragment header.
@@ -763,14 +783,15 @@ slow_path:
763 /* 783 /*
764 * Copy a block of the IP datagram. 784 * Copy a block of the IP datagram.
765 */ 785 */
766 if (skb_copy_bits(skb, ptr, frag->h.raw, len)) 786 if (skb_copy_bits(skb, ptr, skb_transport_header(skb), len))
767 BUG(); 787 BUG();
768 left -= len; 788 left -= len;
769 789
770 fh->frag_off = htons(offset); 790 fh->frag_off = htons(offset);
771 if (left > 0) 791 if (left > 0)
772 fh->frag_off |= htons(IP6_MF); 792 fh->frag_off |= htons(IP6_MF);
773 frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr)); 793 ipv6_hdr(frag)->payload_len = htons(frag->len -
794 sizeof(struct ipv6hdr));
774 795
775 ptr += len; 796 ptr += len;
776 offset += len; 797 offset += len;
@@ -861,6 +882,41 @@ static int ip6_dst_lookup_tail(struct sock *sk,
861 goto out_err_release; 882 goto out_err_release;
862 } 883 }
863 884
885#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
886 /*
887 * Here if the dst entry we've looked up
888 * has a neighbour entry that is in the INCOMPLETE
889 * state and the src address from the flow is
890 * marked as OPTIMISTIC, we release the found
891 * dst entry and replace it instead with the
892 * dst entry of the nexthop router
893 */
894 if (!((*dst)->neighbour->nud_state & NUD_VALID)) {
895 struct inet6_ifaddr *ifp;
896 struct flowi fl_gw;
897 int redirect;
898
899 ifp = ipv6_get_ifaddr(&fl->fl6_src, (*dst)->dev, 1);
900
901 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
902 if (ifp)
903 in6_ifa_put(ifp);
904
905 if (redirect) {
906 /*
907 * We need to get the dst entry for the
908 * default router instead
909 */
910 dst_release(*dst);
911 memcpy(&fl_gw, fl, sizeof(struct flowi));
912 memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
913 *dst = ip6_route_output(sk, &fl_gw);
914 if ((err = (*dst)->error))
915 goto out_err_release;
916 }
917 }
918#endif
919
864 return 0; 920 return 0;
865 921
866out_err_release: 922out_err_release:
@@ -939,10 +995,10 @@ static inline int ip6_ufo_append_data(struct sock *sk,
939 skb_put(skb,fragheaderlen + transhdrlen); 995 skb_put(skb,fragheaderlen + transhdrlen);
940 996
941 /* initialize network header pointer */ 997 /* initialize network header pointer */
942 skb->nh.raw = skb->data; 998 skb_reset_network_header(skb);
943 999
944 /* initialize protocol header pointer */ 1000 /* initialize protocol header pointer */
945 skb->h.raw = skb->data + fragheaderlen; 1001 skb->transport_header = skb->network_header + fragheaderlen;
946 1002
947 skb->ip_summed = CHECKSUM_PARTIAL; 1003 skb->ip_summed = CHECKSUM_PARTIAL;
948 skb->csum = 0; 1004 skb->csum = 0;
@@ -1015,7 +1071,8 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1015 inet->cork.fl = *fl; 1071 inet->cork.fl = *fl;
1016 np->cork.hop_limit = hlimit; 1072 np->cork.hop_limit = hlimit;
1017 np->cork.tclass = tclass; 1073 np->cork.tclass = tclass;
1018 mtu = dst_mtu(rt->u.dst.path); 1074 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1075 rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
1019 if (np->frag_size < mtu) { 1076 if (np->frag_size < mtu) {
1020 if (np->frag_size) 1077 if (np->frag_size)
1021 mtu = np->frag_size; 1078 mtu = np->frag_size;
@@ -1162,10 +1219,10 @@ alloc_new_skb:
1162 * Find where to start putting bytes 1219 * Find where to start putting bytes
1163 */ 1220 */
1164 data = skb_put(skb, fraglen); 1221 data = skb_put(skb, fraglen);
1165 skb->nh.raw = data + exthdrlen; 1222 skb_set_network_header(skb, exthdrlen);
1166 data += fragheaderlen; 1223 data += fragheaderlen;
1167 skb->h.raw = data + exthdrlen; 1224 skb->transport_header = (skb->network_header +
1168 1225 fragheaderlen);
1169 if (fraggap) { 1226 if (fraggap) {
1170 skb->csum = skb_copy_and_csum_bits( 1227 skb->csum = skb_copy_and_csum_bits(
1171 skb_prev, maxfraglen, 1228 skb_prev, maxfraglen,
@@ -1288,10 +1345,10 @@ int ip6_push_pending_frames(struct sock *sk)
1288 tail_skb = &(skb_shinfo(skb)->frag_list); 1345 tail_skb = &(skb_shinfo(skb)->frag_list);
1289 1346
1290 /* move skb->data to ip header from ext header */ 1347 /* move skb->data to ip header from ext header */
1291 if (skb->data < skb->nh.raw) 1348 if (skb->data < skb_network_header(skb))
1292 __skb_pull(skb, skb->nh.raw - skb->data); 1349 __skb_pull(skb, skb_network_offset(skb));
1293 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { 1350 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1294 __skb_pull(tmp_skb, skb->h.raw - skb->nh.raw); 1351 __skb_pull(tmp_skb, skb_network_header_len(skb));
1295 *tail_skb = tmp_skb; 1352 *tail_skb = tmp_skb;
1296 tail_skb = &(tmp_skb->next); 1353 tail_skb = &(tmp_skb->next);
1297 skb->len += tmp_skb->len; 1354 skb->len += tmp_skb->len;
@@ -1303,13 +1360,15 @@ int ip6_push_pending_frames(struct sock *sk)
1303 } 1360 }
1304 1361
1305 ipv6_addr_copy(final_dst, &fl->fl6_dst); 1362 ipv6_addr_copy(final_dst, &fl->fl6_dst);
1306 __skb_pull(skb, skb->h.raw - skb->nh.raw); 1363 __skb_pull(skb, skb_network_header_len(skb));
1307 if (opt && opt->opt_flen) 1364 if (opt && opt->opt_flen)
1308 ipv6_push_frag_opts(skb, opt, &proto); 1365 ipv6_push_frag_opts(skb, opt, &proto);
1309 if (opt && opt->opt_nflen) 1366 if (opt && opt->opt_nflen)
1310 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst); 1367 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1311 1368
1312 skb->nh.ipv6h = hdr = (struct ipv6hdr*) skb_push(skb, sizeof(struct ipv6hdr)); 1369 skb_push(skb, sizeof(struct ipv6hdr));
1370 skb_reset_network_header(skb);
1371 hdr = ipv6_hdr(skb);
1313 1372
1314 *(__be32*)hdr = fl->fl6_flowlabel | 1373 *(__be32*)hdr = fl->fl6_flowlabel |
1315 htonl(0x60000000 | ((int)np->cork.tclass << 20)); 1374 htonl(0x60000000 | ((int)np->cork.tclass << 20));
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 08d944223ec8..a0902fbdb4e1 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1,14 +1,15 @@
1/* 1/*
2 * IPv6 over IPv6 tunnel device 2 * IPv6 tunneling device
3 * Linux INET6 implementation 3 * Linux INET6 implementation
4 * 4 *
5 * Authors: 5 * Authors:
6 * Ville Nuorvala <vnuorval@tcs.hut.fi> 6 * Ville Nuorvala <vnuorval@tcs.hut.fi>
7 * Yasuyuki Kozakai <kozakai@linux-ipv6.org>
7 * 8 *
8 * $Id$ 9 * $Id$
9 * 10 *
10 * Based on: 11 * Based on:
11 * linux/net/ipv6/sit.c 12 * linux/net/ipv6/sit.c and linux/net/ipv4/ipip.c
12 * 13 *
13 * RFC 2473 14 * RFC 2473
14 * 15 *
@@ -24,6 +25,7 @@
24#include <linux/errno.h> 25#include <linux/errno.h>
25#include <linux/types.h> 26#include <linux/types.h>
26#include <linux/sockios.h> 27#include <linux/sockios.h>
28#include <linux/icmp.h>
27#include <linux/if.h> 29#include <linux/if.h>
28#include <linux/in.h> 30#include <linux/in.h>
29#include <linux/ip.h> 31#include <linux/ip.h>
@@ -41,6 +43,7 @@
41#include <asm/uaccess.h> 43#include <asm/uaccess.h>
42#include <asm/atomic.h> 44#include <asm/atomic.h>
43 45
46#include <net/icmp.h>
44#include <net/ip.h> 47#include <net/ip.h>
45#include <net/ipv6.h> 48#include <net/ipv6.h>
46#include <net/ip6_route.h> 49#include <net/ip6_route.h>
@@ -51,7 +54,7 @@
51#include <net/inet_ecn.h> 54#include <net/inet_ecn.h>
52 55
53MODULE_AUTHOR("Ville Nuorvala"); 56MODULE_AUTHOR("Ville Nuorvala");
54MODULE_DESCRIPTION("IPv6-in-IPv6 tunnel"); 57MODULE_DESCRIPTION("IPv6 tunneling device");
55MODULE_LICENSE("GPL"); 58MODULE_LICENSE("GPL");
56 59
57#define IPV6_TLV_TEL_DST_SIZE 8 60#define IPV6_TLV_TEL_DST_SIZE 8
@@ -63,6 +66,7 @@ MODULE_LICENSE("GPL");
63#endif 66#endif
64 67
65#define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK) 68#define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK)
69#define IPV6_TCLASS_SHIFT 20
66 70
67#define HASH_SIZE 32 71#define HASH_SIZE 32
68 72
@@ -70,12 +74,12 @@ MODULE_LICENSE("GPL");
70 (addr)->s6_addr32[2] ^ (addr)->s6_addr32[3]) & \ 74 (addr)->s6_addr32[2] ^ (addr)->s6_addr32[3]) & \
71 (HASH_SIZE - 1)) 75 (HASH_SIZE - 1))
72 76
73static int ip6ip6_fb_tnl_dev_init(struct net_device *dev); 77static int ip6_fb_tnl_dev_init(struct net_device *dev);
74static int ip6ip6_tnl_dev_init(struct net_device *dev); 78static int ip6_tnl_dev_init(struct net_device *dev);
75static void ip6ip6_tnl_dev_setup(struct net_device *dev); 79static void ip6_tnl_dev_setup(struct net_device *dev);
76 80
77/* the IPv6 tunnel fallback device */ 81/* the IPv6 tunnel fallback device */
78static struct net_device *ip6ip6_fb_tnl_dev; 82static struct net_device *ip6_fb_tnl_dev;
79 83
80 84
81/* lists for storing tunnels in use */ 85/* lists for storing tunnels in use */
@@ -84,7 +88,7 @@ static struct ip6_tnl *tnls_wc[1];
84static struct ip6_tnl **tnls[2] = { tnls_wc, tnls_r_l }; 88static struct ip6_tnl **tnls[2] = { tnls_wc, tnls_r_l };
85 89
86/* lock for the tunnel lists */ 90/* lock for the tunnel lists */
87static DEFINE_RWLOCK(ip6ip6_lock); 91static DEFINE_RWLOCK(ip6_tnl_lock);
88 92
89static inline struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t) 93static inline struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t)
90{ 94{
@@ -115,7 +119,7 @@ static inline void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst)
115} 119}
116 120
117/** 121/**
118 * ip6ip6_tnl_lookup - fetch tunnel matching the end-point addresses 122 * ip6_tnl_lookup - fetch tunnel matching the end-point addresses
119 * @remote: the address of the tunnel exit-point 123 * @remote: the address of the tunnel exit-point
120 * @local: the address of the tunnel entry-point 124 * @local: the address of the tunnel entry-point
121 * 125 *
@@ -126,7 +130,7 @@ static inline void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst)
126 **/ 130 **/
127 131
128static struct ip6_tnl * 132static struct ip6_tnl *
129ip6ip6_tnl_lookup(struct in6_addr *remote, struct in6_addr *local) 133ip6_tnl_lookup(struct in6_addr *remote, struct in6_addr *local)
130{ 134{
131 unsigned h0 = HASH(remote); 135 unsigned h0 = HASH(remote);
132 unsigned h1 = HASH(local); 136 unsigned h1 = HASH(local);
@@ -145,18 +149,18 @@ ip6ip6_tnl_lookup(struct in6_addr *remote, struct in6_addr *local)
145} 149}
146 150
147/** 151/**
148 * ip6ip6_bucket - get head of list matching given tunnel parameters 152 * ip6_tnl_bucket - get head of list matching given tunnel parameters
149 * @p: parameters containing tunnel end-points 153 * @p: parameters containing tunnel end-points
150 * 154 *
151 * Description: 155 * Description:
152 * ip6ip6_bucket() returns the head of the list matching the 156 * ip6_tnl_bucket() returns the head of the list matching the
153 * &struct in6_addr entries laddr and raddr in @p. 157 * &struct in6_addr entries laddr and raddr in @p.
154 * 158 *
155 * Return: head of IPv6 tunnel list 159 * Return: head of IPv6 tunnel list
156 **/ 160 **/
157 161
158static struct ip6_tnl ** 162static struct ip6_tnl **
159ip6ip6_bucket(struct ip6_tnl_parm *p) 163ip6_tnl_bucket(struct ip6_tnl_parm *p)
160{ 164{
161 struct in6_addr *remote = &p->raddr; 165 struct in6_addr *remote = &p->raddr;
162 struct in6_addr *local = &p->laddr; 166 struct in6_addr *local = &p->laddr;
@@ -171,36 +175,36 @@ ip6ip6_bucket(struct ip6_tnl_parm *p)
171} 175}
172 176
173/** 177/**
174 * ip6ip6_tnl_link - add tunnel to hash table 178 * ip6_tnl_link - add tunnel to hash table
175 * @t: tunnel to be added 179 * @t: tunnel to be added
176 **/ 180 **/
177 181
178static void 182static void
179ip6ip6_tnl_link(struct ip6_tnl *t) 183ip6_tnl_link(struct ip6_tnl *t)
180{ 184{
181 struct ip6_tnl **tp = ip6ip6_bucket(&t->parms); 185 struct ip6_tnl **tp = ip6_tnl_bucket(&t->parms);
182 186
183 t->next = *tp; 187 t->next = *tp;
184 write_lock_bh(&ip6ip6_lock); 188 write_lock_bh(&ip6_tnl_lock);
185 *tp = t; 189 *tp = t;
186 write_unlock_bh(&ip6ip6_lock); 190 write_unlock_bh(&ip6_tnl_lock);
187} 191}
188 192
189/** 193/**
190 * ip6ip6_tnl_unlink - remove tunnel from hash table 194 * ip6_tnl_unlink - remove tunnel from hash table
191 * @t: tunnel to be removed 195 * @t: tunnel to be removed
192 **/ 196 **/
193 197
194static void 198static void
195ip6ip6_tnl_unlink(struct ip6_tnl *t) 199ip6_tnl_unlink(struct ip6_tnl *t)
196{ 200{
197 struct ip6_tnl **tp; 201 struct ip6_tnl **tp;
198 202
199 for (tp = ip6ip6_bucket(&t->parms); *tp; tp = &(*tp)->next) { 203 for (tp = ip6_tnl_bucket(&t->parms); *tp; tp = &(*tp)->next) {
200 if (t == *tp) { 204 if (t == *tp) {
201 write_lock_bh(&ip6ip6_lock); 205 write_lock_bh(&ip6_tnl_lock);
202 *tp = t->next; 206 *tp = t->next;
203 write_unlock_bh(&ip6ip6_lock); 207 write_unlock_bh(&ip6_tnl_lock);
204 break; 208 break;
205 } 209 }
206 } 210 }
@@ -237,12 +241,12 @@ static struct ip6_tnl *ip6_tnl_create(struct ip6_tnl_parm *p)
237 if (i == IP6_TNL_MAX) 241 if (i == IP6_TNL_MAX)
238 goto failed; 242 goto failed;
239 } 243 }
240 dev = alloc_netdev(sizeof (*t), name, ip6ip6_tnl_dev_setup); 244 dev = alloc_netdev(sizeof (*t), name, ip6_tnl_dev_setup);
241 if (dev == NULL) 245 if (dev == NULL)
242 goto failed; 246 goto failed;
243 247
244 t = netdev_priv(dev); 248 t = netdev_priv(dev);
245 dev->init = ip6ip6_tnl_dev_init; 249 dev->init = ip6_tnl_dev_init;
246 t->parms = *p; 250 t->parms = *p;
247 251
248 if ((err = register_netdevice(dev)) < 0) { 252 if ((err = register_netdevice(dev)) < 0) {
@@ -250,19 +254,19 @@ static struct ip6_tnl *ip6_tnl_create(struct ip6_tnl_parm *p)
250 goto failed; 254 goto failed;
251 } 255 }
252 dev_hold(dev); 256 dev_hold(dev);
253 ip6ip6_tnl_link(t); 257 ip6_tnl_link(t);
254 return t; 258 return t;
255failed: 259failed:
256 return NULL; 260 return NULL;
257} 261}
258 262
259/** 263/**
260 * ip6ip6_tnl_locate - find or create tunnel matching given parameters 264 * ip6_tnl_locate - find or create tunnel matching given parameters
261 * @p: tunnel parameters 265 * @p: tunnel parameters
262 * @create: != 0 if allowed to create new tunnel if no match found 266 * @create: != 0 if allowed to create new tunnel if no match found
263 * 267 *
264 * Description: 268 * Description:
265 * ip6ip6_tnl_locate() first tries to locate an existing tunnel 269 * ip6_tnl_locate() first tries to locate an existing tunnel
266 * based on @parms. If this is unsuccessful, but @create is set a new 270 * based on @parms. If this is unsuccessful, but @create is set a new
267 * tunnel device is created and registered for use. 271 * tunnel device is created and registered for use.
268 * 272 *
@@ -270,13 +274,13 @@ failed:
270 * matching tunnel or NULL 274 * matching tunnel or NULL
271 **/ 275 **/
272 276
273static struct ip6_tnl *ip6ip6_tnl_locate(struct ip6_tnl_parm *p, int create) 277static struct ip6_tnl *ip6_tnl_locate(struct ip6_tnl_parm *p, int create)
274{ 278{
275 struct in6_addr *remote = &p->raddr; 279 struct in6_addr *remote = &p->raddr;
276 struct in6_addr *local = &p->laddr; 280 struct in6_addr *local = &p->laddr;
277 struct ip6_tnl *t; 281 struct ip6_tnl *t;
278 282
279 for (t = *ip6ip6_bucket(p); t; t = t->next) { 283 for (t = *ip6_tnl_bucket(p); t; t = t->next) {
280 if (ipv6_addr_equal(local, &t->parms.laddr) && 284 if (ipv6_addr_equal(local, &t->parms.laddr) &&
281 ipv6_addr_equal(remote, &t->parms.raddr)) 285 ipv6_addr_equal(remote, &t->parms.raddr))
282 return t; 286 return t;
@@ -287,24 +291,24 @@ static struct ip6_tnl *ip6ip6_tnl_locate(struct ip6_tnl_parm *p, int create)
287} 291}
288 292
289/** 293/**
290 * ip6ip6_tnl_dev_uninit - tunnel device uninitializer 294 * ip6_tnl_dev_uninit - tunnel device uninitializer
291 * @dev: the device to be destroyed 295 * @dev: the device to be destroyed
292 * 296 *
293 * Description: 297 * Description:
294 * ip6ip6_tnl_dev_uninit() removes tunnel from its list 298 * ip6_tnl_dev_uninit() removes tunnel from its list
295 **/ 299 **/
296 300
297static void 301static void
298ip6ip6_tnl_dev_uninit(struct net_device *dev) 302ip6_tnl_dev_uninit(struct net_device *dev)
299{ 303{
300 struct ip6_tnl *t = netdev_priv(dev); 304 struct ip6_tnl *t = netdev_priv(dev);
301 305
302 if (dev == ip6ip6_fb_tnl_dev) { 306 if (dev == ip6_fb_tnl_dev) {
303 write_lock_bh(&ip6ip6_lock); 307 write_lock_bh(&ip6_tnl_lock);
304 tnls_wc[0] = NULL; 308 tnls_wc[0] = NULL;
305 write_unlock_bh(&ip6ip6_lock); 309 write_unlock_bh(&ip6_tnl_lock);
306 } else { 310 } else {
307 ip6ip6_tnl_unlink(t); 311 ip6_tnl_unlink(t);
308 } 312 }
309 ip6_tnl_dst_reset(t); 313 ip6_tnl_dst_reset(t);
310 dev_put(dev); 314 dev_put(dev);
@@ -372,16 +376,16 @@ parse_tlv_tnl_enc_lim(struct sk_buff *skb, __u8 * raw)
372} 376}
373 377
374/** 378/**
375 * ip6ip6_err - tunnel error handler 379 * ip6_tnl_err - tunnel error handler
376 * 380 *
377 * Description: 381 * Description:
378 * ip6ip6_err() should handle errors in the tunnel according 382 * ip6_tnl_err() should handle errors in the tunnel according
379 * to the specifications in RFC 2473. 383 * to the specifications in RFC 2473.
380 **/ 384 **/
381 385
382static int 386static int
383ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 387ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
384 int type, int code, int offset, __be32 info) 388 int *type, int *code, int *msg, __be32 *info, int offset)
385{ 389{
386 struct ipv6hdr *ipv6h = (struct ipv6hdr *) skb->data; 390 struct ipv6hdr *ipv6h = (struct ipv6hdr *) skb->data;
387 struct ip6_tnl *t; 391 struct ip6_tnl *t;
@@ -396,13 +400,16 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
396 in trouble since we might need the source address for further 400 in trouble since we might need the source address for further
397 processing of the error. */ 401 processing of the error. */
398 402
399 read_lock(&ip6ip6_lock); 403 read_lock(&ip6_tnl_lock);
400 if ((t = ip6ip6_tnl_lookup(&ipv6h->daddr, &ipv6h->saddr)) == NULL) 404 if ((t = ip6_tnl_lookup(&ipv6h->daddr, &ipv6h->saddr)) == NULL)
405 goto out;
406
407 if (t->parms.proto != ipproto && t->parms.proto != 0)
401 goto out; 408 goto out;
402 409
403 err = 0; 410 err = 0;
404 411
405 switch (type) { 412 switch (*type) {
406 __u32 teli; 413 __u32 teli;
407 struct ipv6_tlv_tnl_enc_lim *tel; 414 struct ipv6_tlv_tnl_enc_lim *tel;
408 __u32 mtu; 415 __u32 mtu;
@@ -414,7 +421,7 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
414 rel_msg = 1; 421 rel_msg = 1;
415 break; 422 break;
416 case ICMPV6_TIME_EXCEED: 423 case ICMPV6_TIME_EXCEED:
417 if (code == ICMPV6_EXC_HOPLIMIT) { 424 if ((*code) == ICMPV6_EXC_HOPLIMIT) {
418 if (net_ratelimit()) 425 if (net_ratelimit())
419 printk(KERN_WARNING 426 printk(KERN_WARNING
420 "%s: Too small hop limit or " 427 "%s: Too small hop limit or "
@@ -425,10 +432,10 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
425 break; 432 break;
426 case ICMPV6_PARAMPROB: 433 case ICMPV6_PARAMPROB:
427 teli = 0; 434 teli = 0;
428 if (code == ICMPV6_HDR_FIELD) 435 if ((*code) == ICMPV6_HDR_FIELD)
429 teli = parse_tlv_tnl_enc_lim(skb, skb->data); 436 teli = parse_tlv_tnl_enc_lim(skb, skb->data);
430 437
431 if (teli && teli == ntohl(info) - 2) { 438 if (teli && teli == ntohl(*info) - 2) {
432 tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli]; 439 tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli];
433 if (tel->encap_limit == 0) { 440 if (tel->encap_limit == 0) {
434 if (net_ratelimit()) 441 if (net_ratelimit())
@@ -445,7 +452,7 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
445 } 452 }
446 break; 453 break;
447 case ICMPV6_PKT_TOOBIG: 454 case ICMPV6_PKT_TOOBIG:
448 mtu = ntohl(info) - offset; 455 mtu = ntohl(*info) - offset;
449 if (mtu < IPV6_MIN_MTU) 456 if (mtu < IPV6_MIN_MTU)
450 mtu = IPV6_MIN_MTU; 457 mtu = IPV6_MIN_MTU;
451 t->dev->mtu = mtu; 458 t->dev->mtu = mtu;
@@ -458,20 +465,144 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
458 } 465 }
459 break; 466 break;
460 } 467 }
461 if (rel_msg && pskb_may_pull(skb, offset + sizeof (*ipv6h))) { 468
469 *type = rel_type;
470 *code = rel_code;
471 *info = rel_info;
472 *msg = rel_msg;
473
474out:
475 read_unlock(&ip6_tnl_lock);
476 return err;
477}
478
479static int
480ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
481 int type, int code, int offset, __u32 info)
482{
483 int rel_msg = 0;
484 int rel_type = type;
485 int rel_code = code;
486 __u32 rel_info = info;
487 int err;
488 struct sk_buff *skb2;
489 struct iphdr *eiph;
490 struct flowi fl;
491 struct rtable *rt;
492
493 err = ip6_tnl_err(skb, IPPROTO_IPIP, opt, &rel_type, &rel_code,
494 &rel_msg, &rel_info, offset);
495 if (err < 0)
496 return err;
497
498 if (rel_msg == 0)
499 return 0;
500
501 switch (rel_type) {
502 case ICMPV6_DEST_UNREACH:
503 if (rel_code != ICMPV6_ADDR_UNREACH)
504 return 0;
505 rel_type = ICMP_DEST_UNREACH;
506 rel_code = ICMP_HOST_UNREACH;
507 break;
508 case ICMPV6_PKT_TOOBIG:
509 if (rel_code != 0)
510 return 0;
511 rel_type = ICMP_DEST_UNREACH;
512 rel_code = ICMP_FRAG_NEEDED;
513 break;
514 default:
515 return 0;
516 }
517
518 if (!pskb_may_pull(skb, offset + sizeof(struct iphdr)))
519 return 0;
520
521 skb2 = skb_clone(skb, GFP_ATOMIC);
522 if (!skb2)
523 return 0;
524
525 dst_release(skb2->dst);
526 skb2->dst = NULL;
527 skb_pull(skb2, offset);
528 skb_reset_network_header(skb2);
529 eiph = ip_hdr(skb2);
530
531 /* Try to guess incoming interface */
532 memset(&fl, 0, sizeof(fl));
533 fl.fl4_dst = eiph->saddr;
534 fl.fl4_tos = RT_TOS(eiph->tos);
535 fl.proto = IPPROTO_IPIP;
536 if (ip_route_output_key(&rt, &fl))
537 goto out;
538
539 skb2->dev = rt->u.dst.dev;
540
541 /* route "incoming" packet */
542 if (rt->rt_flags & RTCF_LOCAL) {
543 ip_rt_put(rt);
544 rt = NULL;
545 fl.fl4_dst = eiph->daddr;
546 fl.fl4_src = eiph->saddr;
547 fl.fl4_tos = eiph->tos;
548 if (ip_route_output_key(&rt, &fl) ||
549 rt->u.dst.dev->type != ARPHRD_TUNNEL) {
550 ip_rt_put(rt);
551 goto out;
552 }
553 } else {
554 ip_rt_put(rt);
555 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos,
556 skb2->dev) ||
557 skb2->dst->dev->type != ARPHRD_TUNNEL)
558 goto out;
559 }
560
561 /* change mtu on this route */
562 if (rel_type == ICMP_DEST_UNREACH && rel_code == ICMP_FRAG_NEEDED) {
563 if (rel_info > dst_mtu(skb2->dst))
564 goto out;
565
566 skb2->dst->ops->update_pmtu(skb2->dst, rel_info);
567 rel_info = htonl(rel_info);
568 }
569
570 icmp_send(skb2, rel_type, rel_code, rel_info);
571
572out:
573 kfree_skb(skb2);
574 return 0;
575}
576
577static int
578ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
579 int type, int code, int offset, __u32 info)
580{
581 int rel_msg = 0;
582 int rel_type = type;
583 int rel_code = code;
584 __u32 rel_info = info;
585 int err;
586
587 err = ip6_tnl_err(skb, IPPROTO_IPV6, opt, &rel_type, &rel_code,
588 &rel_msg, &rel_info, offset);
589 if (err < 0)
590 return err;
591
592 if (rel_msg && pskb_may_pull(skb, offset + sizeof(struct ipv6hdr))) {
462 struct rt6_info *rt; 593 struct rt6_info *rt;
463 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 594 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
464 595
465 if (!skb2) 596 if (!skb2)
466 goto out; 597 return 0;
467 598
468 dst_release(skb2->dst); 599 dst_release(skb2->dst);
469 skb2->dst = NULL; 600 skb2->dst = NULL;
470 skb_pull(skb2, offset); 601 skb_pull(skb2, offset);
471 skb2->nh.raw = skb2->data; 602 skb_reset_network_header(skb2);
472 603
473 /* Try to guess incoming interface */ 604 /* Try to guess incoming interface */
474 rt = rt6_lookup(&skb2->nh.ipv6h->saddr, NULL, 0, 0); 605 rt = rt6_lookup(&ipv6_hdr(skb2)->saddr, NULL, 0, 0);
475 606
476 if (rt && rt->rt6i_dev) 607 if (rt && rt->rt6i_dev)
477 skb2->dev = rt->rt6i_dev; 608 skb2->dev = rt->rt6i_dev;
@@ -483,19 +614,34 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
483 614
484 kfree_skb(skb2); 615 kfree_skb(skb2);
485 } 616 }
486out: 617
487 read_unlock(&ip6ip6_lock); 618 return 0;
488 return err;
489} 619}
490 620
491static inline void ip6ip6_ecn_decapsulate(struct ipv6hdr *outer_iph, 621static void ip4ip6_dscp_ecn_decapsulate(struct ip6_tnl *t,
492 struct sk_buff *skb) 622 struct ipv6hdr *ipv6h,
623 struct sk_buff *skb)
493{ 624{
494 struct ipv6hdr *inner_iph = skb->nh.ipv6h; 625 __u8 dsfield = ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK;
495 626
496 if (INET_ECN_is_ce(ipv6_get_dsfield(outer_iph))) 627 if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
497 IP6_ECN_set_ce(inner_iph); 628 ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, dsfield);
629
630 if (INET_ECN_is_ce(dsfield))
631 IP_ECN_set_ce(ip_hdr(skb));
632}
633
634static void ip6ip6_dscp_ecn_decapsulate(struct ip6_tnl *t,
635 struct ipv6hdr *ipv6h,
636 struct sk_buff *skb)
637{
638 if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
639 ipv6_copy_dscp(ipv6h, ipv6_hdr(skb));
640
641 if (INET_ECN_is_ce(ipv6_get_dsfield(ipv6h)))
642 IP6_ECN_set_ce(ipv6_hdr(skb));
498} 643}
644
499static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t) 645static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t)
500{ 646{
501 struct ip6_tnl_parm *p = &t->parms; 647 struct ip6_tnl_parm *p = &t->parms;
@@ -519,53 +665,61 @@ static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t)
519} 665}
520 666
521/** 667/**
522 * ip6ip6_rcv - decapsulate IPv6 packet and retransmit it locally 668 * ip6_tnl_rcv - decapsulate IPv6 packet and retransmit it locally
523 * @skb: received socket buffer 669 * @skb: received socket buffer
670 * @protocol: ethernet protocol ID
671 * @dscp_ecn_decapsulate: the function to decapsulate DSCP code and ECN
524 * 672 *
525 * Return: 0 673 * Return: 0
526 **/ 674 **/
527 675
528static int 676static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
529ip6ip6_rcv(struct sk_buff *skb) 677 __u8 ipproto,
678 void (*dscp_ecn_decapsulate)(struct ip6_tnl *t,
679 struct ipv6hdr *ipv6h,
680 struct sk_buff *skb))
530{ 681{
531 struct ipv6hdr *ipv6h;
532 struct ip6_tnl *t; 682 struct ip6_tnl *t;
683 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
533 684
534 ipv6h = skb->nh.ipv6h; 685 read_lock(&ip6_tnl_lock);
535 686
536 read_lock(&ip6ip6_lock); 687 if ((t = ip6_tnl_lookup(&ipv6h->saddr, &ipv6h->daddr)) != NULL) {
688 if (t->parms.proto != ipproto && t->parms.proto != 0) {
689 read_unlock(&ip6_tnl_lock);
690 goto discard;
691 }
537 692
538 if ((t = ip6ip6_tnl_lookup(&ipv6h->saddr, &ipv6h->daddr)) != NULL) {
539 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { 693 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
540 read_unlock(&ip6ip6_lock); 694 read_unlock(&ip6_tnl_lock);
541 goto discard; 695 goto discard;
542 } 696 }
543 697
544 if (!ip6_tnl_rcv_ctl(t)) { 698 if (!ip6_tnl_rcv_ctl(t)) {
545 t->stat.rx_dropped++; 699 t->stat.rx_dropped++;
546 read_unlock(&ip6ip6_lock); 700 read_unlock(&ip6_tnl_lock);
547 goto discard; 701 goto discard;
548 } 702 }
549 secpath_reset(skb); 703 secpath_reset(skb);
550 skb->mac.raw = skb->nh.raw; 704 skb->mac_header = skb->network_header;
551 skb->nh.raw = skb->data; 705 skb_reset_network_header(skb);
552 skb->protocol = htons(ETH_P_IPV6); 706 skb->protocol = htons(protocol);
553 skb->pkt_type = PACKET_HOST; 707 skb->pkt_type = PACKET_HOST;
554 memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); 708 memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
555 skb->dev = t->dev; 709 skb->dev = t->dev;
556 dst_release(skb->dst); 710 dst_release(skb->dst);
557 skb->dst = NULL; 711 skb->dst = NULL;
558 nf_reset(skb); 712 nf_reset(skb);
559 if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY) 713
560 ipv6_copy_dscp(ipv6h, skb->nh.ipv6h); 714 dscp_ecn_decapsulate(t, ipv6h, skb);
561 ip6ip6_ecn_decapsulate(ipv6h, skb); 715
562 t->stat.rx_packets++; 716 t->stat.rx_packets++;
563 t->stat.rx_bytes += skb->len; 717 t->stat.rx_bytes += skb->len;
564 netif_rx(skb); 718 netif_rx(skb);
565 read_unlock(&ip6ip6_lock); 719 read_unlock(&ip6_tnl_lock);
566 return 0; 720 return 0;
567 } 721 }
568 read_unlock(&ip6ip6_lock); 722 read_unlock(&ip6_tnl_lock);
569 return 1; 723 return 1;
570 724
571discard: 725discard:
@@ -573,6 +727,18 @@ discard:
573 return 0; 727 return 0;
574} 728}
575 729
730static int ip4ip6_rcv(struct sk_buff *skb)
731{
732 return ip6_tnl_rcv(skb, ETH_P_IP, IPPROTO_IPIP,
733 ip4ip6_dscp_ecn_decapsulate);
734}
735
736static int ip6ip6_rcv(struct sk_buff *skb)
737{
738 return ip6_tnl_rcv(skb, ETH_P_IPV6, IPPROTO_IPV6,
739 ip6ip6_dscp_ecn_decapsulate);
740}
741
576struct ipv6_tel_txoption { 742struct ipv6_tel_txoption {
577 struct ipv6_txoptions ops; 743 struct ipv6_txoptions ops;
578 __u8 dst_opt[8]; 744 __u8 dst_opt[8];
@@ -593,7 +759,7 @@ static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit)
593} 759}
594 760
595/** 761/**
596 * ip6ip6_tnl_addr_conflict - compare packet addresses to tunnel's own 762 * ip6_tnl_addr_conflict - compare packet addresses to tunnel's own
597 * @t: the outgoing tunnel device 763 * @t: the outgoing tunnel device
598 * @hdr: IPv6 header from the incoming packet 764 * @hdr: IPv6 header from the incoming packet
599 * 765 *
@@ -607,7 +773,7 @@ static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit)
607 **/ 773 **/
608 774
609static inline int 775static inline int
610ip6ip6_tnl_addr_conflict(struct ip6_tnl *t, struct ipv6hdr *hdr) 776ip6_tnl_addr_conflict(struct ip6_tnl *t, struct ipv6hdr *hdr)
611{ 777{
612 return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr); 778 return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr);
613} 779}
@@ -641,72 +807,49 @@ static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t)
641 return ret; 807 return ret;
642} 808}
643/** 809/**
644 * ip6ip6_tnl_xmit - encapsulate packet and send 810 * ip6_tnl_xmit2 - encapsulate packet and send
645 * @skb: the outgoing socket buffer 811 * @skb: the outgoing socket buffer
646 * @dev: the outgoing tunnel device 812 * @dev: the outgoing tunnel device
813 * @dsfield: dscp code for outer header
814 * @fl: flow of tunneled packet
815 * @encap_limit: encapsulation limit
816 * @pmtu: Path MTU is stored if packet is too big
647 * 817 *
648 * Description: 818 * Description:
649 * Build new header and do some sanity checks on the packet before sending 819 * Build new header and do some sanity checks on the packet before sending
650 * it. 820 * it.
651 * 821 *
652 * Return: 822 * Return:
653 * 0 823 * 0 on success
824 * -1 fail
825 * %-EMSGSIZE message too big. return mtu in this case.
654 **/ 826 **/
655 827
656static int 828static int ip6_tnl_xmit2(struct sk_buff *skb,
657ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) 829 struct net_device *dev,
830 __u8 dsfield,
831 struct flowi *fl,
832 int encap_limit,
833 __u32 *pmtu)
658{ 834{
659 struct ip6_tnl *t = netdev_priv(dev); 835 struct ip6_tnl *t = netdev_priv(dev);
660 struct net_device_stats *stats = &t->stat; 836 struct net_device_stats *stats = &t->stat;
661 struct ipv6hdr *ipv6h = skb->nh.ipv6h; 837 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
662 int encap_limit = -1;
663 struct ipv6_tel_txoption opt; 838 struct ipv6_tel_txoption opt;
664 __u16 offset;
665 struct flowi fl;
666 struct dst_entry *dst; 839 struct dst_entry *dst;
667 struct net_device *tdev; 840 struct net_device *tdev;
668 int mtu; 841 int mtu;
669 int max_headroom = sizeof(struct ipv6hdr); 842 int max_headroom = sizeof(struct ipv6hdr);
670 u8 proto; 843 u8 proto;
671 int err; 844 int err = -1;
672 int pkt_len; 845 int pkt_len;
673 int dsfield;
674
675 if (t->recursion++) {
676 stats->collisions++;
677 goto tx_err;
678 }
679 if (skb->protocol != htons(ETH_P_IPV6) ||
680 !ip6_tnl_xmit_ctl(t) || ip6ip6_tnl_addr_conflict(t, ipv6h))
681 goto tx_err;
682
683 if ((offset = parse_tlv_tnl_enc_lim(skb, skb->nh.raw)) > 0) {
684 struct ipv6_tlv_tnl_enc_lim *tel;
685 tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->nh.raw[offset];
686 if (tel->encap_limit == 0) {
687 icmpv6_send(skb, ICMPV6_PARAMPROB,
688 ICMPV6_HDR_FIELD, offset + 2, skb->dev);
689 goto tx_err;
690 }
691 encap_limit = tel->encap_limit - 1;
692 } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
693 encap_limit = t->parms.encap_limit;
694
695 memcpy(&fl, &t->fl, sizeof (fl));
696 proto = fl.proto;
697
698 dsfield = ipv6_get_dsfield(ipv6h);
699 if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS))
700 fl.fl6_flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK);
701 if ((t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL))
702 fl.fl6_flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK);
703 846
704 if ((dst = ip6_tnl_dst_check(t)) != NULL) 847 if ((dst = ip6_tnl_dst_check(t)) != NULL)
705 dst_hold(dst); 848 dst_hold(dst);
706 else { 849 else {
707 dst = ip6_route_output(NULL, &fl); 850 dst = ip6_route_output(NULL, fl);
708 851
709 if (dst->error || xfrm_lookup(&dst, &fl, NULL, 0) < 0) 852 if (dst->error || xfrm_lookup(&dst, fl, NULL, 0) < 0)
710 goto tx_err_link_failure; 853 goto tx_err_link_failure;
711 } 854 }
712 855
@@ -730,7 +873,8 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
730 if (skb->dst) 873 if (skb->dst)
731 skb->dst->ops->update_pmtu(skb->dst, mtu); 874 skb->dst->ops->update_pmtu(skb->dst, mtu);
732 if (skb->len > mtu) { 875 if (skb->len > mtu) {
733 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev); 876 *pmtu = mtu;
877 err = -EMSGSIZE;
734 goto tx_err_dst_release; 878 goto tx_err_dst_release;
735 } 879 }
736 880
@@ -754,22 +898,24 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
754 dst_release(skb->dst); 898 dst_release(skb->dst);
755 skb->dst = dst_clone(dst); 899 skb->dst = dst_clone(dst);
756 900
757 skb->h.raw = skb->nh.raw; 901 skb->transport_header = skb->network_header;
758 902
903 proto = fl->proto;
759 if (encap_limit >= 0) { 904 if (encap_limit >= 0) {
760 init_tel_txopt(&opt, encap_limit); 905 init_tel_txopt(&opt, encap_limit);
761 ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL); 906 ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL);
762 } 907 }
763 skb->nh.raw = skb_push(skb, sizeof(struct ipv6hdr)); 908 skb_push(skb, sizeof(struct ipv6hdr));
764 ipv6h = skb->nh.ipv6h; 909 skb_reset_network_header(skb);
765 *(__be32*)ipv6h = fl.fl6_flowlabel | htonl(0x60000000); 910 ipv6h = ipv6_hdr(skb);
911 *(__be32*)ipv6h = fl->fl6_flowlabel | htonl(0x60000000);
766 dsfield = INET_ECN_encapsulate(0, dsfield); 912 dsfield = INET_ECN_encapsulate(0, dsfield);
767 ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield); 913 ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield);
768 ipv6h->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); 914 ipv6h->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
769 ipv6h->hop_limit = t->parms.hop_limit; 915 ipv6h->hop_limit = t->parms.hop_limit;
770 ipv6h->nexthdr = proto; 916 ipv6h->nexthdr = proto;
771 ipv6_addr_copy(&ipv6h->saddr, &fl.fl6_src); 917 ipv6_addr_copy(&ipv6h->saddr, &fl->fl6_src);
772 ipv6_addr_copy(&ipv6h->daddr, &fl.fl6_dst); 918 ipv6_addr_copy(&ipv6h->daddr, &fl->fl6_dst);
773 nf_reset(skb); 919 nf_reset(skb);
774 pkt_len = skb->len; 920 pkt_len = skb->len;
775 err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, 921 err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL,
@@ -783,13 +929,131 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
783 stats->tx_aborted_errors++; 929 stats->tx_aborted_errors++;
784 } 930 }
785 ip6_tnl_dst_store(t, dst); 931 ip6_tnl_dst_store(t, dst);
786 t->recursion--;
787 return 0; 932 return 0;
788tx_err_link_failure: 933tx_err_link_failure:
789 stats->tx_carrier_errors++; 934 stats->tx_carrier_errors++;
790 dst_link_failure(skb); 935 dst_link_failure(skb);
791tx_err_dst_release: 936tx_err_dst_release:
792 dst_release(dst); 937 dst_release(dst);
938 return err;
939}
940
941static inline int
942ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
943{
944 struct ip6_tnl *t = netdev_priv(dev);
945 struct iphdr *iph = ip_hdr(skb);
946 int encap_limit = -1;
947 struct flowi fl;
948 __u8 dsfield;
949 __u32 mtu;
950 int err;
951
952 if ((t->parms.proto != IPPROTO_IPIP && t->parms.proto != 0) ||
953 !ip6_tnl_xmit_ctl(t))
954 return -1;
955
956 if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
957 encap_limit = t->parms.encap_limit;
958
959 memcpy(&fl, &t->fl, sizeof (fl));
960 fl.proto = IPPROTO_IPIP;
961
962 dsfield = ipv4_get_dsfield(iph);
963
964 if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS))
965 fl.fl6_flowlabel |= ntohl(((__u32)iph->tos << IPV6_TCLASS_SHIFT)
966 & IPV6_TCLASS_MASK);
967
968 err = ip6_tnl_xmit2(skb, dev, dsfield, &fl, encap_limit, &mtu);
969 if (err != 0) {
970 /* XXX: send ICMP error even if DF is not set. */
971 if (err == -EMSGSIZE)
972 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
973 htonl(mtu));
974 return -1;
975 }
976
977 return 0;
978}
979
980static inline int
981ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
982{
983 struct ip6_tnl *t = netdev_priv(dev);
984 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
985 int encap_limit = -1;
986 __u16 offset;
987 struct flowi fl;
988 __u8 dsfield;
989 __u32 mtu;
990 int err;
991
992 if ((t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) ||
993 !ip6_tnl_xmit_ctl(t) || ip6_tnl_addr_conflict(t, ipv6h))
994 return -1;
995
996 offset = parse_tlv_tnl_enc_lim(skb, skb_network_header(skb));
997 if (offset > 0) {
998 struct ipv6_tlv_tnl_enc_lim *tel;
999 tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset];
1000 if (tel->encap_limit == 0) {
1001 icmpv6_send(skb, ICMPV6_PARAMPROB,
1002 ICMPV6_HDR_FIELD, offset + 2, skb->dev);
1003 return -1;
1004 }
1005 encap_limit = tel->encap_limit - 1;
1006 } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
1007 encap_limit = t->parms.encap_limit;
1008
1009 memcpy(&fl, &t->fl, sizeof (fl));
1010 fl.proto = IPPROTO_IPV6;
1011
1012 dsfield = ipv6_get_dsfield(ipv6h);
1013 if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS))
1014 fl.fl6_flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK);
1015 if ((t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL))
1016 fl.fl6_flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK);
1017
1018 err = ip6_tnl_xmit2(skb, dev, dsfield, &fl, encap_limit, &mtu);
1019 if (err != 0) {
1020 if (err == -EMSGSIZE)
1021 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
1022 return -1;
1023 }
1024
1025 return 0;
1026}
1027
1028static int
1029ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
1030{
1031 struct ip6_tnl *t = netdev_priv(dev);
1032 struct net_device_stats *stats = &t->stat;
1033 int ret;
1034
1035 if (t->recursion++) {
1036 t->stat.collisions++;
1037 goto tx_err;
1038 }
1039
1040 switch (skb->protocol) {
1041 case __constant_htons(ETH_P_IP):
1042 ret = ip4ip6_tnl_xmit(skb, dev);
1043 break;
1044 case __constant_htons(ETH_P_IPV6):
1045 ret = ip6ip6_tnl_xmit(skb, dev);
1046 break;
1047 default:
1048 goto tx_err;
1049 }
1050
1051 if (ret < 0)
1052 goto tx_err;
1053
1054 t->recursion--;
1055 return 0;
1056
793tx_err: 1057tx_err:
794 stats->tx_errors++; 1058 stats->tx_errors++;
795 stats->tx_dropped++; 1059 stats->tx_dropped++;
@@ -817,7 +1081,7 @@ static void ip6_tnl_set_cap(struct ip6_tnl *t)
817 } 1081 }
818} 1082}
819 1083
820static void ip6ip6_tnl_link_config(struct ip6_tnl *t) 1084static void ip6_tnl_link_config(struct ip6_tnl *t)
821{ 1085{
822 struct net_device *dev = t->dev; 1086 struct net_device *dev = t->dev;
823 struct ip6_tnl_parm *p = &t->parms; 1087 struct ip6_tnl_parm *p = &t->parms;
@@ -870,17 +1134,17 @@ static void ip6ip6_tnl_link_config(struct ip6_tnl *t)
870} 1134}
871 1135
872/** 1136/**
873 * ip6ip6_tnl_change - update the tunnel parameters 1137 * ip6_tnl_change - update the tunnel parameters
874 * @t: tunnel to be changed 1138 * @t: tunnel to be changed
875 * @p: tunnel configuration parameters 1139 * @p: tunnel configuration parameters
876 * @active: != 0 if tunnel is ready for use 1140 * @active: != 0 if tunnel is ready for use
877 * 1141 *
878 * Description: 1142 * Description:
879 * ip6ip6_tnl_change() updates the tunnel parameters 1143 * ip6_tnl_change() updates the tunnel parameters
880 **/ 1144 **/
881 1145
882static int 1146static int
883ip6ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p) 1147ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p)
884{ 1148{
885 ipv6_addr_copy(&t->parms.laddr, &p->laddr); 1149 ipv6_addr_copy(&t->parms.laddr, &p->laddr);
886 ipv6_addr_copy(&t->parms.raddr, &p->raddr); 1150 ipv6_addr_copy(&t->parms.raddr, &p->raddr);
@@ -889,19 +1153,20 @@ ip6ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p)
889 t->parms.encap_limit = p->encap_limit; 1153 t->parms.encap_limit = p->encap_limit;
890 t->parms.flowinfo = p->flowinfo; 1154 t->parms.flowinfo = p->flowinfo;
891 t->parms.link = p->link; 1155 t->parms.link = p->link;
1156 t->parms.proto = p->proto;
892 ip6_tnl_dst_reset(t); 1157 ip6_tnl_dst_reset(t);
893 ip6ip6_tnl_link_config(t); 1158 ip6_tnl_link_config(t);
894 return 0; 1159 return 0;
895} 1160}
896 1161
897/** 1162/**
898 * ip6ip6_tnl_ioctl - configure ipv6 tunnels from userspace 1163 * ip6_tnl_ioctl - configure ipv6 tunnels from userspace
899 * @dev: virtual device associated with tunnel 1164 * @dev: virtual device associated with tunnel
900 * @ifr: parameters passed from userspace 1165 * @ifr: parameters passed from userspace
901 * @cmd: command to be performed 1166 * @cmd: command to be performed
902 * 1167 *
903 * Description: 1168 * Description:
904 * ip6ip6_tnl_ioctl() is used for managing IPv6 tunnels 1169 * ip6_tnl_ioctl() is used for managing IPv6 tunnels
905 * from userspace. 1170 * from userspace.
906 * 1171 *
907 * The possible commands are the following: 1172 * The possible commands are the following:
@@ -923,7 +1188,7 @@ ip6ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p)
923 **/ 1188 **/
924 1189
925static int 1190static int
926ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) 1191ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
927{ 1192{
928 int err = 0; 1193 int err = 0;
929 struct ip6_tnl_parm p; 1194 struct ip6_tnl_parm p;
@@ -931,12 +1196,12 @@ ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
931 1196
932 switch (cmd) { 1197 switch (cmd) {
933 case SIOCGETTUNNEL: 1198 case SIOCGETTUNNEL:
934 if (dev == ip6ip6_fb_tnl_dev) { 1199 if (dev == ip6_fb_tnl_dev) {
935 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) { 1200 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) {
936 err = -EFAULT; 1201 err = -EFAULT;
937 break; 1202 break;
938 } 1203 }
939 t = ip6ip6_tnl_locate(&p, 0); 1204 t = ip6_tnl_locate(&p, 0);
940 } 1205 }
941 if (t == NULL) 1206 if (t == NULL)
942 t = netdev_priv(dev); 1207 t = netdev_priv(dev);
@@ -954,10 +1219,11 @@ ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
954 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) 1219 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p)))
955 break; 1220 break;
956 err = -EINVAL; 1221 err = -EINVAL;
957 if (p.proto != IPPROTO_IPV6) 1222 if (p.proto != IPPROTO_IPV6 && p.proto != IPPROTO_IPIP &&
1223 p.proto != 0)
958 break; 1224 break;
959 t = ip6ip6_tnl_locate(&p, cmd == SIOCADDTUNNEL); 1225 t = ip6_tnl_locate(&p, cmd == SIOCADDTUNNEL);
960 if (dev != ip6ip6_fb_tnl_dev && cmd == SIOCCHGTUNNEL) { 1226 if (dev != ip6_fb_tnl_dev && cmd == SIOCCHGTUNNEL) {
961 if (t != NULL) { 1227 if (t != NULL) {
962 if (t->dev != dev) { 1228 if (t->dev != dev) {
963 err = -EEXIST; 1229 err = -EEXIST;
@@ -966,9 +1232,9 @@ ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
966 } else 1232 } else
967 t = netdev_priv(dev); 1233 t = netdev_priv(dev);
968 1234
969 ip6ip6_tnl_unlink(t); 1235 ip6_tnl_unlink(t);
970 err = ip6ip6_tnl_change(t, &p); 1236 err = ip6_tnl_change(t, &p);
971 ip6ip6_tnl_link(t); 1237 ip6_tnl_link(t);
972 netdev_state_change(dev); 1238 netdev_state_change(dev);
973 } 1239 }
974 if (t) { 1240 if (t) {
@@ -984,15 +1250,15 @@ ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
984 if (!capable(CAP_NET_ADMIN)) 1250 if (!capable(CAP_NET_ADMIN))
985 break; 1251 break;
986 1252
987 if (dev == ip6ip6_fb_tnl_dev) { 1253 if (dev == ip6_fb_tnl_dev) {
988 err = -EFAULT; 1254 err = -EFAULT;
989 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) 1255 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p)))
990 break; 1256 break;
991 err = -ENOENT; 1257 err = -ENOENT;
992 if ((t = ip6ip6_tnl_locate(&p, 0)) == NULL) 1258 if ((t = ip6_tnl_locate(&p, 0)) == NULL)
993 break; 1259 break;
994 err = -EPERM; 1260 err = -EPERM;
995 if (t->dev == ip6ip6_fb_tnl_dev) 1261 if (t->dev == ip6_fb_tnl_dev)
996 break; 1262 break;
997 dev = t->dev; 1263 dev = t->dev;
998 } 1264 }
@@ -1006,20 +1272,20 @@ ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1006} 1272}
1007 1273
1008/** 1274/**
1009 * ip6ip6_tnl_get_stats - return the stats for tunnel device 1275 * ip6_tnl_get_stats - return the stats for tunnel device
1010 * @dev: virtual device associated with tunnel 1276 * @dev: virtual device associated with tunnel
1011 * 1277 *
1012 * Return: stats for device 1278 * Return: stats for device
1013 **/ 1279 **/
1014 1280
1015static struct net_device_stats * 1281static struct net_device_stats *
1016ip6ip6_tnl_get_stats(struct net_device *dev) 1282ip6_tnl_get_stats(struct net_device *dev)
1017{ 1283{
1018 return &(((struct ip6_tnl *)netdev_priv(dev))->stat); 1284 return &(((struct ip6_tnl *)netdev_priv(dev))->stat);
1019} 1285}
1020 1286
1021/** 1287/**
1022 * ip6ip6_tnl_change_mtu - change mtu manually for tunnel device 1288 * ip6_tnl_change_mtu - change mtu manually for tunnel device
1023 * @dev: virtual device associated with tunnel 1289 * @dev: virtual device associated with tunnel
1024 * @new_mtu: the new mtu 1290 * @new_mtu: the new mtu
1025 * 1291 *
@@ -1029,7 +1295,7 @@ ip6ip6_tnl_get_stats(struct net_device *dev)
1029 **/ 1295 **/
1030 1296
1031static int 1297static int
1032ip6ip6_tnl_change_mtu(struct net_device *dev, int new_mtu) 1298ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
1033{ 1299{
1034 if (new_mtu < IPV6_MIN_MTU) { 1300 if (new_mtu < IPV6_MIN_MTU) {
1035 return -EINVAL; 1301 return -EINVAL;
@@ -1039,22 +1305,22 @@ ip6ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
1039} 1305}
1040 1306
1041/** 1307/**
1042 * ip6ip6_tnl_dev_setup - setup virtual tunnel device 1308 * ip6_tnl_dev_setup - setup virtual tunnel device
1043 * @dev: virtual device associated with tunnel 1309 * @dev: virtual device associated with tunnel
1044 * 1310 *
1045 * Description: 1311 * Description:
1046 * Initialize function pointers and device parameters 1312 * Initialize function pointers and device parameters
1047 **/ 1313 **/
1048 1314
1049static void ip6ip6_tnl_dev_setup(struct net_device *dev) 1315static void ip6_tnl_dev_setup(struct net_device *dev)
1050{ 1316{
1051 SET_MODULE_OWNER(dev); 1317 SET_MODULE_OWNER(dev);
1052 dev->uninit = ip6ip6_tnl_dev_uninit; 1318 dev->uninit = ip6_tnl_dev_uninit;
1053 dev->destructor = free_netdev; 1319 dev->destructor = free_netdev;
1054 dev->hard_start_xmit = ip6ip6_tnl_xmit; 1320 dev->hard_start_xmit = ip6_tnl_xmit;
1055 dev->get_stats = ip6ip6_tnl_get_stats; 1321 dev->get_stats = ip6_tnl_get_stats;
1056 dev->do_ioctl = ip6ip6_tnl_ioctl; 1322 dev->do_ioctl = ip6_tnl_ioctl;
1057 dev->change_mtu = ip6ip6_tnl_change_mtu; 1323 dev->change_mtu = ip6_tnl_change_mtu;
1058 1324
1059 dev->type = ARPHRD_TUNNEL6; 1325 dev->type = ARPHRD_TUNNEL6;
1060 dev->hard_header_len = LL_MAX_HEADER + sizeof (struct ipv6hdr); 1326 dev->hard_header_len = LL_MAX_HEADER + sizeof (struct ipv6hdr);
@@ -1065,50 +1331,56 @@ static void ip6ip6_tnl_dev_setup(struct net_device *dev)
1065 1331
1066 1332
1067/** 1333/**
1068 * ip6ip6_tnl_dev_init_gen - general initializer for all tunnel devices 1334 * ip6_tnl_dev_init_gen - general initializer for all tunnel devices
1069 * @dev: virtual device associated with tunnel 1335 * @dev: virtual device associated with tunnel
1070 **/ 1336 **/
1071 1337
1072static inline void 1338static inline void
1073ip6ip6_tnl_dev_init_gen(struct net_device *dev) 1339ip6_tnl_dev_init_gen(struct net_device *dev)
1074{ 1340{
1075 struct ip6_tnl *t = netdev_priv(dev); 1341 struct ip6_tnl *t = netdev_priv(dev);
1076 t->fl.proto = IPPROTO_IPV6;
1077 t->dev = dev; 1342 t->dev = dev;
1078 strcpy(t->parms.name, dev->name); 1343 strcpy(t->parms.name, dev->name);
1079} 1344}
1080 1345
1081/** 1346/**
1082 * ip6ip6_tnl_dev_init - initializer for all non fallback tunnel devices 1347 * ip6_tnl_dev_init - initializer for all non fallback tunnel devices
1083 * @dev: virtual device associated with tunnel 1348 * @dev: virtual device associated with tunnel
1084 **/ 1349 **/
1085 1350
1086static int 1351static int
1087ip6ip6_tnl_dev_init(struct net_device *dev) 1352ip6_tnl_dev_init(struct net_device *dev)
1088{ 1353{
1089 struct ip6_tnl *t = netdev_priv(dev); 1354 struct ip6_tnl *t = netdev_priv(dev);
1090 ip6ip6_tnl_dev_init_gen(dev); 1355 ip6_tnl_dev_init_gen(dev);
1091 ip6ip6_tnl_link_config(t); 1356 ip6_tnl_link_config(t);
1092 return 0; 1357 return 0;
1093} 1358}
1094 1359
1095/** 1360/**
1096 * ip6ip6_fb_tnl_dev_init - initializer for fallback tunnel device 1361 * ip6_fb_tnl_dev_init - initializer for fallback tunnel device
1097 * @dev: fallback device 1362 * @dev: fallback device
1098 * 1363 *
1099 * Return: 0 1364 * Return: 0
1100 **/ 1365 **/
1101 1366
1102static int 1367static int
1103ip6ip6_fb_tnl_dev_init(struct net_device *dev) 1368ip6_fb_tnl_dev_init(struct net_device *dev)
1104{ 1369{
1105 struct ip6_tnl *t = netdev_priv(dev); 1370 struct ip6_tnl *t = netdev_priv(dev);
1106 ip6ip6_tnl_dev_init_gen(dev); 1371 ip6_tnl_dev_init_gen(dev);
1372 t->parms.proto = IPPROTO_IPV6;
1107 dev_hold(dev); 1373 dev_hold(dev);
1108 tnls_wc[0] = t; 1374 tnls_wc[0] = t;
1109 return 0; 1375 return 0;
1110} 1376}
1111 1377
1378static struct xfrm6_tunnel ip4ip6_handler = {
1379 .handler = ip4ip6_rcv,
1380 .err_handler = ip4ip6_err,
1381 .priority = 1,
1382};
1383
1112static struct xfrm6_tunnel ip6ip6_handler = { 1384static struct xfrm6_tunnel ip6ip6_handler = {
1113 .handler = ip6ip6_rcv, 1385 .handler = ip6ip6_rcv,
1114 .err_handler = ip6ip6_err, 1386 .err_handler = ip6ip6_err,
@@ -1125,30 +1397,40 @@ static int __init ip6_tunnel_init(void)
1125{ 1397{
1126 int err; 1398 int err;
1127 1399
1400 if (xfrm6_tunnel_register(&ip4ip6_handler, AF_INET)) {
1401 printk(KERN_ERR "ip6_tunnel init: can't register ip4ip6\n");
1402 err = -EAGAIN;
1403 goto out;
1404 }
1405
1128 if (xfrm6_tunnel_register(&ip6ip6_handler, AF_INET6)) { 1406 if (xfrm6_tunnel_register(&ip6ip6_handler, AF_INET6)) {
1129 printk(KERN_ERR "ip6ip6 init: can't register tunnel\n"); 1407 printk(KERN_ERR "ip6_tunnel init: can't register ip6ip6\n");
1130 return -EAGAIN; 1408 err = -EAGAIN;
1409 goto unreg_ip4ip6;
1131 } 1410 }
1132 ip6ip6_fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0", 1411 ip6_fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0",
1133 ip6ip6_tnl_dev_setup); 1412 ip6_tnl_dev_setup);
1134 1413
1135 if (!ip6ip6_fb_tnl_dev) { 1414 if (!ip6_fb_tnl_dev) {
1136 err = -ENOMEM; 1415 err = -ENOMEM;
1137 goto fail; 1416 goto fail;
1138 } 1417 }
1139 ip6ip6_fb_tnl_dev->init = ip6ip6_fb_tnl_dev_init; 1418 ip6_fb_tnl_dev->init = ip6_fb_tnl_dev_init;
1140 1419
1141 if ((err = register_netdev(ip6ip6_fb_tnl_dev))) { 1420 if ((err = register_netdev(ip6_fb_tnl_dev))) {
1142 free_netdev(ip6ip6_fb_tnl_dev); 1421 free_netdev(ip6_fb_tnl_dev);
1143 goto fail; 1422 goto fail;
1144 } 1423 }
1145 return 0; 1424 return 0;
1146fail: 1425fail:
1147 xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6); 1426 xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6);
1427unreg_ip4ip6:
1428 xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET);
1429out:
1148 return err; 1430 return err;
1149} 1431}
1150 1432
1151static void __exit ip6ip6_destroy_tunnels(void) 1433static void __exit ip6_tnl_destroy_tunnels(void)
1152{ 1434{
1153 int h; 1435 int h;
1154 struct ip6_tnl *t; 1436 struct ip6_tnl *t;
@@ -1168,11 +1450,14 @@ static void __exit ip6ip6_destroy_tunnels(void)
1168 1450
1169static void __exit ip6_tunnel_cleanup(void) 1451static void __exit ip6_tunnel_cleanup(void)
1170{ 1452{
1453 if (xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET))
1454 printk(KERN_INFO "ip6_tunnel close: can't deregister ip4ip6\n");
1455
1171 if (xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6)) 1456 if (xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6))
1172 printk(KERN_INFO "ip6ip6 close: can't deregister tunnel\n"); 1457 printk(KERN_INFO "ip6_tunnel close: can't deregister ip6ip6\n");
1173 1458
1174 rtnl_lock(); 1459 rtnl_lock();
1175 ip6ip6_destroy_tunnels(); 1460 ip6_tnl_destroy_tunnels();
1176 rtnl_unlock(); 1461 rtnl_unlock();
1177} 1462}
1178 1463
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 5724ba9f75de..1ee50b5782e1 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -79,9 +79,9 @@ static int ipcomp6_input(struct xfrm_state *x, struct sk_buff *skb)
79 skb->ip_summed = CHECKSUM_NONE; 79 skb->ip_summed = CHECKSUM_NONE;
80 80
81 /* Remove ipcomp header and decompress original payload */ 81 /* Remove ipcomp header and decompress original payload */
82 iph = skb->nh.ipv6h; 82 iph = ipv6_hdr(skb);
83 ipch = (void *)skb->data; 83 ipch = (void *)skb->data;
84 skb->h.raw = skb->nh.raw + sizeof(*ipch); 84 skb->transport_header = skb->network_header + sizeof(*ipch);
85 __skb_pull(skb, sizeof(*ipch)); 85 __skb_pull(skb, sizeof(*ipch));
86 86
87 /* decompression */ 87 /* decompression */
@@ -111,7 +111,7 @@ static int ipcomp6_input(struct xfrm_state *x, struct sk_buff *skb)
111 111
112 skb->truesize += dlen - plen; 112 skb->truesize += dlen - plen;
113 __skb_put(skb, dlen - plen); 113 __skb_put(skb, dlen - plen);
114 memcpy(skb->data, scratch, dlen); 114 skb_copy_to_linear_data(skb, scratch, dlen);
115 err = ipch->nexthdr; 115 err = ipch->nexthdr;
116 116
117out_put_cpu: 117out_put_cpu:
@@ -124,15 +124,13 @@ static int ipcomp6_output(struct xfrm_state *x, struct sk_buff *skb)
124{ 124{
125 int err; 125 int err;
126 struct ipv6hdr *top_iph; 126 struct ipv6hdr *top_iph;
127 int hdr_len;
128 struct ipv6_comp_hdr *ipch; 127 struct ipv6_comp_hdr *ipch;
129 struct ipcomp_data *ipcd = x->data; 128 struct ipcomp_data *ipcd = x->data;
130 int plen, dlen; 129 int plen, dlen;
131 u8 *start, *scratch; 130 u8 *start, *scratch;
132 struct crypto_comp *tfm; 131 struct crypto_comp *tfm;
133 int cpu; 132 int cpu;
134 133 int hdr_len = skb_transport_offset(skb);
135 hdr_len = skb->h.raw - skb->data;
136 134
137 /* check whether datagram len is larger than threshold */ 135 /* check whether datagram len is larger than threshold */
138 if ((skb->len - hdr_len) < ipcd->threshold) { 136 if ((skb->len - hdr_len) < ipcd->threshold) {
@@ -145,7 +143,7 @@ static int ipcomp6_output(struct xfrm_state *x, struct sk_buff *skb)
145 /* compression */ 143 /* compression */
146 plen = skb->len - hdr_len; 144 plen = skb->len - hdr_len;
147 dlen = IPCOMP_SCRATCH_SIZE; 145 dlen = IPCOMP_SCRATCH_SIZE;
148 start = skb->h.raw; 146 start = skb_transport_header(skb);
149 147
150 cpu = get_cpu(); 148 cpu = get_cpu();
151 scratch = *per_cpu_ptr(ipcomp6_scratches, cpu); 149 scratch = *per_cpu_ptr(ipcomp6_scratches, cpu);
@@ -166,10 +164,10 @@ static int ipcomp6_output(struct xfrm_state *x, struct sk_buff *skb)
166 top_iph->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); 164 top_iph->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
167 165
168 ipch = (struct ipv6_comp_hdr *)start; 166 ipch = (struct ipv6_comp_hdr *)start;
169 ipch->nexthdr = *skb->nh.raw; 167 ipch->nexthdr = *skb_network_header(skb);
170 ipch->flags = 0; 168 ipch->flags = 0;
171 ipch->cpi = htons((u16 )ntohl(x->id.spi)); 169 ipch->cpi = htons((u16 )ntohl(x->id.spi));
172 *skb->nh.raw = IPPROTO_COMP; 170 *skb_network_header(skb) = IPPROTO_COMP;
173 171
174out_ok: 172out_ok:
175 return 0; 173 return 0;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index f5f9582a8d39..aa3d07c52a8f 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -101,14 +101,14 @@ static int ipv6_gso_send_check(struct sk_buff *skb)
101 if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h)))) 101 if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
102 goto out; 102 goto out;
103 103
104 ipv6h = skb->nh.ipv6h; 104 ipv6h = ipv6_hdr(skb);
105 __skb_pull(skb, sizeof(*ipv6h)); 105 __skb_pull(skb, sizeof(*ipv6h));
106 err = -EPROTONOSUPPORT; 106 err = -EPROTONOSUPPORT;
107 107
108 rcu_read_lock(); 108 rcu_read_lock();
109 ops = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr); 109 ops = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr);
110 if (likely(ops && ops->gso_send_check)) { 110 if (likely(ops && ops->gso_send_check)) {
111 skb->h.raw = skb->data; 111 skb_reset_transport_header(skb);
112 err = ops->gso_send_check(skb); 112 err = ops->gso_send_check(skb);
113 } 113 }
114 rcu_read_unlock(); 114 rcu_read_unlock();
@@ -137,14 +137,14 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int features)
137 if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h)))) 137 if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
138 goto out; 138 goto out;
139 139
140 ipv6h = skb->nh.ipv6h; 140 ipv6h = ipv6_hdr(skb);
141 __skb_pull(skb, sizeof(*ipv6h)); 141 __skb_pull(skb, sizeof(*ipv6h));
142 segs = ERR_PTR(-EPROTONOSUPPORT); 142 segs = ERR_PTR(-EPROTONOSUPPORT);
143 143
144 rcu_read_lock(); 144 rcu_read_lock();
145 ops = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr); 145 ops = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr);
146 if (likely(ops && ops->gso_segment)) { 146 if (likely(ops && ops->gso_segment)) {
147 skb->h.raw = skb->data; 147 skb_reset_transport_header(skb);
148 segs = ops->gso_segment(skb, features); 148 segs = ops->gso_segment(skb, features);
149 } 149 }
150 rcu_read_unlock(); 150 rcu_read_unlock();
@@ -153,7 +153,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int features)
153 goto out; 153 goto out;
154 154
155 for (skb = segs; skb; skb = skb->next) { 155 for (skb = segs; skb; skb = skb->next) {
156 ipv6h = skb->nh.ipv6h; 156 ipv6h = ipv6_hdr(skb);
157 ipv6h->payload_len = htons(skb->len - skb->mac_len - 157 ipv6h->payload_len = htons(skb->len - skb->mac_len -
158 sizeof(*ipv6h)); 158 sizeof(*ipv6h));
159 } 159 }
@@ -694,7 +694,7 @@ done:
694 retv = ip6_ra_control(sk, val, NULL); 694 retv = ip6_ra_control(sk, val, NULL);
695 break; 695 break;
696 case IPV6_MTU_DISCOVER: 696 case IPV6_MTU_DISCOVER:
697 if (val<0 || val>2) 697 if (val<0 || val>3)
698 goto e_inval; 698 goto e_inval;
699 np->pmtudisc = val; 699 np->pmtudisc = val;
700 retv = 0; 700 retv = 0;
@@ -761,6 +761,7 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname,
761 return err; 761 return err;
762} 762}
763 763
764EXPORT_SYMBOL(ipv6_setsockopt);
764 765
765#ifdef CONFIG_COMPAT 766#ifdef CONFIG_COMPAT
766int compat_ipv6_setsockopt(struct sock *sk, int level, int optname, 767int compat_ipv6_setsockopt(struct sock *sk, int level, int optname,
@@ -796,18 +797,37 @@ EXPORT_SYMBOL(compat_ipv6_setsockopt);
796#endif 797#endif
797 798
798static int ipv6_getsockopt_sticky(struct sock *sk, struct ipv6_txoptions *opt, 799static int ipv6_getsockopt_sticky(struct sock *sk, struct ipv6_txoptions *opt,
799 char __user *optval, int len) 800 int optname, char __user *optval, int len)
800{ 801{
801 struct ipv6_opt_hdr *hdr; 802 struct ipv6_opt_hdr *hdr;
802 803
803 if (!opt || !opt->hopopt) 804 if (!opt)
805 return 0;
806
807 switch(optname) {
808 case IPV6_HOPOPTS:
809 hdr = opt->hopopt;
810 break;
811 case IPV6_RTHDRDSTOPTS:
812 hdr = opt->dst0opt;
813 break;
814 case IPV6_RTHDR:
815 hdr = (struct ipv6_opt_hdr *)opt->srcrt;
816 break;
817 case IPV6_DSTOPTS:
818 hdr = opt->dst1opt;
819 break;
820 default:
821 return -EINVAL; /* should not happen */
822 }
823
824 if (!hdr)
804 return 0; 825 return 0;
805 hdr = opt->hopopt;
806 826
807 len = min_t(unsigned int, len, ipv6_optlen(hdr)); 827 len = min_t(unsigned int, len, ipv6_optlen(hdr));
808 if (copy_to_user(optval, hdr, ipv6_optlen(hdr))) 828 if (copy_to_user(optval, hdr, len));
809 return -EFAULT; 829 return -EFAULT;
810 return len; 830 return ipv6_optlen(hdr);
811} 831}
812 832
813static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, 833static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
@@ -945,7 +965,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
945 965
946 lock_sock(sk); 966 lock_sock(sk);
947 len = ipv6_getsockopt_sticky(sk, np->opt, 967 len = ipv6_getsockopt_sticky(sk, np->opt,
948 optval, len); 968 optname, optval, len);
949 release_sock(sk); 969 release_sock(sk);
950 return put_user(len, optlen); 970 return put_user(len, optlen);
951 } 971 }
@@ -1066,6 +1086,8 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname,
1066 return err; 1086 return err;
1067} 1087}
1068 1088
1089EXPORT_SYMBOL(ipv6_getsockopt);
1090
1069#ifdef CONFIG_COMPAT 1091#ifdef CONFIG_COMPAT
1070int compat_ipv6_getsockopt(struct sock *sk, int level, int optname, 1092int compat_ipv6_getsockopt(struct sock *sk, int level, int optname,
1071 char __user *optval, int __user *optlen) 1093 char __user *optval, int __user *optlen)
diff --git a/net/ipv6/ipv6_syms.c b/net/ipv6/ipv6_syms.c
deleted file mode 100644
index e12e3d4fccec..000000000000
--- a/net/ipv6/ipv6_syms.c
+++ /dev/null
@@ -1,36 +0,0 @@
1
2#include <linux/module.h>
3#include <net/protocol.h>
4#include <net/ipv6.h>
5#include <net/addrconf.h>
6#include <net/ip6_route.h>
7#include <net/xfrm.h>
8
9EXPORT_SYMBOL(icmpv6_send);
10EXPORT_SYMBOL(icmpv6_statistics);
11EXPORT_SYMBOL(icmpv6_err_convert);
12EXPORT_SYMBOL(ndisc_mc_map);
13EXPORT_SYMBOL(register_inet6addr_notifier);
14EXPORT_SYMBOL(unregister_inet6addr_notifier);
15EXPORT_SYMBOL(ip6_route_output);
16EXPORT_SYMBOL(ipv6_setsockopt);
17EXPORT_SYMBOL(ipv6_getsockopt);
18EXPORT_SYMBOL(inet6_register_protosw);
19EXPORT_SYMBOL(inet6_unregister_protosw);
20EXPORT_SYMBOL(inet6_add_protocol);
21EXPORT_SYMBOL(inet6_del_protocol);
22EXPORT_SYMBOL(ip6_xmit);
23EXPORT_SYMBOL(inet6_release);
24EXPORT_SYMBOL(inet6_bind);
25EXPORT_SYMBOL(inet6_getname);
26EXPORT_SYMBOL(inet6_ioctl);
27EXPORT_SYMBOL(ipv6_get_saddr);
28EXPORT_SYMBOL(ipv6_chk_addr);
29EXPORT_SYMBOL(in6_dev_finish_destroy);
30#ifdef CONFIG_XFRM
31EXPORT_SYMBOL(xfrm6_rcv);
32EXPORT_SYMBOL(xfrm6_input_addr);
33EXPORT_SYMBOL(xfrm6_find_1stfragopt);
34#endif
35EXPORT_SYMBOL(rt6_lookup);
36EXPORT_SYMBOL(ipv6_push_nfrag_opts);
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index a8d6625ec782..6c2758951d60 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -988,7 +988,7 @@ int ipv6_is_mld(struct sk_buff *skb, int nexthdr)
988 if (!pskb_may_pull(skb, sizeof(struct icmp6hdr))) 988 if (!pskb_may_pull(skb, sizeof(struct icmp6hdr)))
989 return 0; 989 return 0;
990 990
991 pic = (struct icmp6hdr *)skb->h.raw; 991 pic = icmp6_hdr(skb);
992 992
993 switch (pic->icmp6_type) { 993 switch (pic->icmp6_type) {
994 case ICMPV6_MGM_QUERY: 994 case ICMPV6_MGM_QUERY:
@@ -1167,11 +1167,11 @@ int igmp6_event_query(struct sk_buff *skb)
1167 return -EINVAL; 1167 return -EINVAL;
1168 1168
1169 /* compute payload length excluding extension headers */ 1169 /* compute payload length excluding extension headers */
1170 len = ntohs(skb->nh.ipv6h->payload_len) + sizeof(struct ipv6hdr); 1170 len = ntohs(ipv6_hdr(skb)->payload_len) + sizeof(struct ipv6hdr);
1171 len -= (char *)skb->h.raw - (char *)skb->nh.ipv6h; 1171 len -= skb_network_header_len(skb);
1172 1172
1173 /* Drop queries with not link local source */ 1173 /* Drop queries with not link local source */
1174 if (!(ipv6_addr_type(&skb->nh.ipv6h->saddr)&IPV6_ADDR_LINKLOCAL)) 1174 if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL))
1175 return -EINVAL; 1175 return -EINVAL;
1176 1176
1177 idev = in6_dev_get(skb->dev); 1177 idev = in6_dev_get(skb->dev);
@@ -1179,7 +1179,7 @@ int igmp6_event_query(struct sk_buff *skb)
1179 if (idev == NULL) 1179 if (idev == NULL)
1180 return 0; 1180 return 0;
1181 1181
1182 hdr = (struct icmp6hdr *) skb->h.raw; 1182 hdr = icmp6_hdr(skb);
1183 group = (struct in6_addr *) (hdr + 1); 1183 group = (struct in6_addr *) (hdr + 1);
1184 group_type = ipv6_addr_type(group); 1184 group_type = ipv6_addr_type(group);
1185 1185
@@ -1212,7 +1212,7 @@ int igmp6_event_query(struct sk_buff *skb)
1212 in6_dev_put(idev); 1212 in6_dev_put(idev);
1213 return -EINVAL; 1213 return -EINVAL;
1214 } 1214 }
1215 mlh2 = (struct mld2_query *) skb->h.raw; 1215 mlh2 = (struct mld2_query *)skb_transport_header(skb);
1216 max_delay = (MLDV2_MRC(ntohs(mlh2->mrc))*HZ)/1000; 1216 max_delay = (MLDV2_MRC(ntohs(mlh2->mrc))*HZ)/1000;
1217 if (!max_delay) 1217 if (!max_delay)
1218 max_delay = 1; 1218 max_delay = 1;
@@ -1235,7 +1235,7 @@ int igmp6_event_query(struct sk_buff *skb)
1235 in6_dev_put(idev); 1235 in6_dev_put(idev);
1236 return -EINVAL; 1236 return -EINVAL;
1237 } 1237 }
1238 mlh2 = (struct mld2_query *) skb->h.raw; 1238 mlh2 = (struct mld2_query *)skb_transport_header(skb);
1239 mark = 1; 1239 mark = 1;
1240 } 1240 }
1241 } else { 1241 } else {
@@ -1300,10 +1300,10 @@ int igmp6_event_report(struct sk_buff *skb)
1300 if (!pskb_may_pull(skb, sizeof(struct in6_addr))) 1300 if (!pskb_may_pull(skb, sizeof(struct in6_addr)))
1301 return -EINVAL; 1301 return -EINVAL;
1302 1302
1303 hdr = (struct icmp6hdr*) skb->h.raw; 1303 hdr = icmp6_hdr(skb);
1304 1304
1305 /* Drop reports with not link local source */ 1305 /* Drop reports with not link local source */
1306 addr_type = ipv6_addr_type(&skb->nh.ipv6h->saddr); 1306 addr_type = ipv6_addr_type(&ipv6_hdr(skb)->saddr);
1307 if (addr_type != IPV6_ADDR_ANY && 1307 if (addr_type != IPV6_ADDR_ANY &&
1308 !(addr_type&IPV6_ADDR_LINKLOCAL)) 1308 !(addr_type&IPV6_ADDR_LINKLOCAL))
1309 return -EINVAL; 1309 return -EINVAL;
@@ -1411,7 +1411,7 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size)
1411 1411
1412 skb_reserve(skb, LL_RESERVED_SPACE(dev)); 1412 skb_reserve(skb, LL_RESERVED_SPACE(dev));
1413 1413
1414 if (ipv6_get_lladdr(dev, &addr_buf)) { 1414 if (ipv6_get_lladdr(dev, &addr_buf, IFA_F_TENTATIVE)) {
1415 /* <draft-ietf-magma-mld-source-05.txt>: 1415 /* <draft-ietf-magma-mld-source-05.txt>:
1416 * use unspecified address as the source address 1416 * use unspecified address as the source address
1417 * when a valid link-local address is not available. 1417 * when a valid link-local address is not available.
@@ -1423,8 +1423,9 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size)
1423 1423
1424 memcpy(skb_put(skb, sizeof(ra)), ra, sizeof(ra)); 1424 memcpy(skb_put(skb, sizeof(ra)), ra, sizeof(ra));
1425 1425
1426 pmr =(struct mld2_report *)skb_put(skb, sizeof(*pmr)); 1426 skb_set_transport_header(skb, skb_tail_pointer(skb) - skb->data);
1427 skb->h.raw = (unsigned char *)pmr; 1427 skb_put(skb, sizeof(*pmr));
1428 pmr = (struct mld2_report *)skb_transport_header(skb);
1428 pmr->type = ICMPV6_MLD2_REPORT; 1429 pmr->type = ICMPV6_MLD2_REPORT;
1429 pmr->resv1 = 0; 1430 pmr->resv1 = 0;
1430 pmr->csum = 0; 1431 pmr->csum = 0;
@@ -1441,7 +1442,7 @@ static inline int mld_dev_queue_xmit2(struct sk_buff *skb)
1441 unsigned char ha[MAX_ADDR_LEN]; 1442 unsigned char ha[MAX_ADDR_LEN];
1442 int err; 1443 int err;
1443 1444
1444 ndisc_mc_map(&skb->nh.ipv6h->daddr, ha, dev, 1); 1445 ndisc_mc_map(&ipv6_hdr(skb)->daddr, ha, dev, 1);
1445 err = dev->hard_header(skb, dev, ETH_P_IPV6, ha, NULL, skb->len); 1446 err = dev->hard_header(skb, dev, ETH_P_IPV6, ha, NULL, skb->len);
1446 if (err < 0) { 1447 if (err < 0) {
1447 kfree_skb(skb); 1448 kfree_skb(skb);
@@ -1459,20 +1460,21 @@ static inline int mld_dev_queue_xmit(struct sk_buff *skb)
1459 1460
1460static void mld_sendpack(struct sk_buff *skb) 1461static void mld_sendpack(struct sk_buff *skb)
1461{ 1462{
1462 struct ipv6hdr *pip6 = skb->nh.ipv6h; 1463 struct ipv6hdr *pip6 = ipv6_hdr(skb);
1463 struct mld2_report *pmr = (struct mld2_report *)skb->h.raw; 1464 struct mld2_report *pmr =
1465 (struct mld2_report *)skb_transport_header(skb);
1464 int payload_len, mldlen; 1466 int payload_len, mldlen;
1465 struct inet6_dev *idev = in6_dev_get(skb->dev); 1467 struct inet6_dev *idev = in6_dev_get(skb->dev);
1466 int err; 1468 int err;
1467 1469
1468 IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS); 1470 IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS);
1469 payload_len = skb->tail - (unsigned char *)skb->nh.ipv6h - 1471 payload_len = (skb->tail - skb->network_header) - sizeof(*pip6);
1470 sizeof(struct ipv6hdr); 1472 mldlen = skb->tail - skb->transport_header;
1471 mldlen = skb->tail - skb->h.raw;
1472 pip6->payload_len = htons(payload_len); 1473 pip6->payload_len = htons(payload_len);
1473 1474
1474 pmr->csum = csum_ipv6_magic(&pip6->saddr, &pip6->daddr, mldlen, 1475 pmr->csum = csum_ipv6_magic(&pip6->saddr, &pip6->daddr, mldlen,
1475 IPPROTO_ICMPV6, csum_partial(skb->h.raw, mldlen, 0)); 1476 IPPROTO_ICMPV6, csum_partial(skb_transport_header(skb),
1477 mldlen, 0));
1476 err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dev, 1478 err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dev,
1477 mld_dev_queue_xmit); 1479 mld_dev_queue_xmit);
1478 if (!err) { 1480 if (!err) {
@@ -1506,7 +1508,7 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc,
1506 pgr->grec_auxwords = 0; 1508 pgr->grec_auxwords = 0;
1507 pgr->grec_nsrcs = 0; 1509 pgr->grec_nsrcs = 0;
1508 pgr->grec_mca = pmc->mca_addr; /* structure copy */ 1510 pgr->grec_mca = pmc->mca_addr; /* structure copy */
1509 pmr = (struct mld2_report *)skb->h.raw; 1511 pmr = (struct mld2_report *)skb_transport_header(skb);
1510 pmr->ngrec = htons(ntohs(pmr->ngrec)+1); 1512 pmr->ngrec = htons(ntohs(pmr->ngrec)+1);
1511 *ppgr = pgr; 1513 *ppgr = pgr;
1512 return skb; 1514 return skb;
@@ -1539,7 +1541,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
1539 if (!*psf_list) 1541 if (!*psf_list)
1540 goto empty_source; 1542 goto empty_source;
1541 1543
1542 pmr = skb ? (struct mld2_report *)skb->h.raw : NULL; 1544 pmr = skb ? (struct mld2_report *)skb_transport_header(skb) : NULL;
1543 1545
1544 /* EX and TO_EX get a fresh packet, if needed */ 1546 /* EX and TO_EX get a fresh packet, if needed */
1545 if (truncate) { 1547 if (truncate) {
@@ -1791,7 +1793,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
1791 1793
1792 skb_reserve(skb, LL_RESERVED_SPACE(dev)); 1794 skb_reserve(skb, LL_RESERVED_SPACE(dev));
1793 1795
1794 if (ipv6_get_lladdr(dev, &addr_buf)) { 1796 if (ipv6_get_lladdr(dev, &addr_buf, IFA_F_TENTATIVE)) {
1795 /* <draft-ietf-magma-mld-source-05.txt>: 1797 /* <draft-ietf-magma-mld-source-05.txt>:
1796 * use unspecified address as the source address 1798 * use unspecified address as the source address
1797 * when a valid link-local address is not available. 1799 * when a valid link-local address is not available.
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index 0afcabdd8ed6..13b7160fb892 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -90,23 +90,26 @@ int mip6_mh_filter(struct sock *sk, struct sk_buff *skb)
90{ 90{
91 struct ip6_mh *mh; 91 struct ip6_mh *mh;
92 92
93 if (!pskb_may_pull(skb, (skb->h.raw - skb->data) + 8) || 93 if (!pskb_may_pull(skb, (skb_transport_offset(skb)) + 8) ||
94 !pskb_may_pull(skb, (skb->h.raw - skb->data) + ((skb->h.raw[1] + 1) << 3))) 94 !pskb_may_pull(skb, (skb_transport_offset(skb) +
95 ((skb_transport_header(skb)[1] + 1) << 3))))
95 return -1; 96 return -1;
96 97
97 mh = (struct ip6_mh *)skb->h.raw; 98 mh = (struct ip6_mh *)skb_transport_header(skb);
98 99
99 if (mh->ip6mh_hdrlen < mip6_mh_len(mh->ip6mh_type)) { 100 if (mh->ip6mh_hdrlen < mip6_mh_len(mh->ip6mh_type)) {
100 LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH message too short: %d vs >=%d\n", 101 LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH message too short: %d vs >=%d\n",
101 mh->ip6mh_hdrlen, mip6_mh_len(mh->ip6mh_type)); 102 mh->ip6mh_hdrlen, mip6_mh_len(mh->ip6mh_type));
102 mip6_param_prob(skb, 0, (&mh->ip6mh_hdrlen) - skb->nh.raw); 103 mip6_param_prob(skb, 0, ((&mh->ip6mh_hdrlen) -
104 skb_network_header(skb)));
103 return -1; 105 return -1;
104 } 106 }
105 107
106 if (mh->ip6mh_proto != IPPROTO_NONE) { 108 if (mh->ip6mh_proto != IPPROTO_NONE) {
107 LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH invalid payload proto = %d\n", 109 LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH invalid payload proto = %d\n",
108 mh->ip6mh_proto); 110 mh->ip6mh_proto);
109 mip6_param_prob(skb, 0, (&mh->ip6mh_proto) - skb->nh.raw); 111 mip6_param_prob(skb, 0, ((&mh->ip6mh_proto) -
112 skb_network_header(skb)));
110 return -1; 113 return -1;
111 } 114 }
112 115
@@ -122,12 +125,12 @@ struct mip6_report_rate_limiter {
122}; 125};
123 126
124static struct mip6_report_rate_limiter mip6_report_rl = { 127static struct mip6_report_rate_limiter mip6_report_rl = {
125 .lock = SPIN_LOCK_UNLOCKED 128 .lock = __SPIN_LOCK_UNLOCKED(mip6_report_rl.lock)
126}; 129};
127 130
128static int mip6_destopt_input(struct xfrm_state *x, struct sk_buff *skb) 131static int mip6_destopt_input(struct xfrm_state *x, struct sk_buff *skb)
129{ 132{
130 struct ipv6hdr *iph = skb->nh.ipv6h; 133 struct ipv6hdr *iph = ipv6_hdr(skb);
131 struct ipv6_destopt_hdr *destopt = (struct ipv6_destopt_hdr *)skb->data; 134 struct ipv6_destopt_hdr *destopt = (struct ipv6_destopt_hdr *)skb->data;
132 135
133 if (!ipv6_addr_equal(&iph->saddr, (struct in6_addr *)x->coaddr) && 136 if (!ipv6_addr_equal(&iph->saddr, (struct in6_addr *)x->coaddr) &&
@@ -152,10 +155,10 @@ static int mip6_destopt_output(struct xfrm_state *x, struct sk_buff *skb)
152 iph = (struct ipv6hdr *)skb->data; 155 iph = (struct ipv6hdr *)skb->data;
153 iph->payload_len = htons(skb->len - sizeof(*iph)); 156 iph->payload_len = htons(skb->len - sizeof(*iph));
154 157
155 nexthdr = *skb->nh.raw; 158 nexthdr = *skb_network_header(skb);
156 *skb->nh.raw = IPPROTO_DSTOPTS; 159 *skb_network_header(skb) = IPPROTO_DSTOPTS;
157 160
158 dstopt = (struct ipv6_destopt_hdr *)skb->h.raw; 161 dstopt = (struct ipv6_destopt_hdr *)skb_transport_header(skb);
159 dstopt->nexthdr = nexthdr; 162 dstopt->nexthdr = nexthdr;
160 163
161 hao = mip6_padn((char *)(dstopt + 1), 164 hao = mip6_padn((char *)(dstopt + 1),
@@ -215,21 +218,22 @@ static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, struct
215 if (likely(opt->dsthao)) { 218 if (likely(opt->dsthao)) {
216 offset = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO); 219 offset = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
217 if (likely(offset >= 0)) 220 if (likely(offset >= 0))
218 hao = (struct ipv6_destopt_hao *)(skb->nh.raw + offset); 221 hao = (struct ipv6_destopt_hao *)
222 (skb_network_header(skb) + offset);
219 } 223 }
220 224
221 skb_get_timestamp(skb, &stamp); 225 skb_get_timestamp(skb, &stamp);
222 226
223 if (!mip6_report_rl_allow(&stamp, &skb->nh.ipv6h->daddr, 227 if (!mip6_report_rl_allow(&stamp, &ipv6_hdr(skb)->daddr,
224 hao ? &hao->addr : &skb->nh.ipv6h->saddr, 228 hao ? &hao->addr : &ipv6_hdr(skb)->saddr,
225 opt->iif)) 229 opt->iif))
226 goto out; 230 goto out;
227 231
228 memset(&sel, 0, sizeof(sel)); 232 memset(&sel, 0, sizeof(sel));
229 memcpy(&sel.daddr, (xfrm_address_t *)&skb->nh.ipv6h->daddr, 233 memcpy(&sel.daddr, (xfrm_address_t *)&ipv6_hdr(skb)->daddr,
230 sizeof(sel.daddr)); 234 sizeof(sel.daddr));
231 sel.prefixlen_d = 128; 235 sel.prefixlen_d = 128;
232 memcpy(&sel.saddr, (xfrm_address_t *)&skb->nh.ipv6h->saddr, 236 memcpy(&sel.saddr, (xfrm_address_t *)&ipv6_hdr(skb)->saddr,
233 sizeof(sel.saddr)); 237 sizeof(sel.saddr));
234 sel.prefixlen_s = 128; 238 sel.prefixlen_s = 128;
235 sel.family = AF_INET6; 239 sel.family = AF_INET6;
@@ -253,11 +257,13 @@ static int mip6_destopt_offset(struct xfrm_state *x, struct sk_buff *skb,
253 u8 **nexthdr) 257 u8 **nexthdr)
254{ 258{
255 u16 offset = sizeof(struct ipv6hdr); 259 u16 offset = sizeof(struct ipv6hdr);
256 struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1); 260 struct ipv6_opt_hdr *exthdr =
257 unsigned int packet_len = skb->tail - skb->nh.raw; 261 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
262 const unsigned char *nh = skb_network_header(skb);
263 unsigned int packet_len = skb->tail - skb->network_header;
258 int found_rhdr = 0; 264 int found_rhdr = 0;
259 265
260 *nexthdr = &skb->nh.ipv6h->nexthdr; 266 *nexthdr = &ipv6_hdr(skb)->nexthdr;
261 267
262 while (offset + 1 <= packet_len) { 268 while (offset + 1 <= packet_len) {
263 269
@@ -288,7 +294,7 @@ static int mip6_destopt_offset(struct xfrm_state *x, struct sk_buff *skb,
288 294
289 offset += ipv6_optlen(exthdr); 295 offset += ipv6_optlen(exthdr);
290 *nexthdr = &exthdr->nexthdr; 296 *nexthdr = &exthdr->nexthdr;
291 exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset); 297 exthdr = (struct ipv6_opt_hdr *)(nh + offset);
292 } 298 }
293 299
294 return offset; 300 return offset;
@@ -361,10 +367,10 @@ static int mip6_rthdr_output(struct xfrm_state *x, struct sk_buff *skb)
361 iph = (struct ipv6hdr *)skb->data; 367 iph = (struct ipv6hdr *)skb->data;
362 iph->payload_len = htons(skb->len - sizeof(*iph)); 368 iph->payload_len = htons(skb->len - sizeof(*iph));
363 369
364 nexthdr = *skb->nh.raw; 370 nexthdr = *skb_network_header(skb);
365 *skb->nh.raw = IPPROTO_ROUTING; 371 *skb_network_header(skb) = IPPROTO_ROUTING;
366 372
367 rt2 = (struct rt2_hdr *)skb->h.raw; 373 rt2 = (struct rt2_hdr *)skb_transport_header(skb);
368 rt2->rt_hdr.nexthdr = nexthdr; 374 rt2->rt_hdr.nexthdr = nexthdr;
369 rt2->rt_hdr.hdrlen = (x->props.header_len >> 3) - 1; 375 rt2->rt_hdr.hdrlen = (x->props.header_len >> 3) - 1;
370 rt2->rt_hdr.type = IPV6_SRCRT_TYPE_2; 376 rt2->rt_hdr.type = IPV6_SRCRT_TYPE_2;
@@ -383,11 +389,13 @@ static int mip6_rthdr_offset(struct xfrm_state *x, struct sk_buff *skb,
383 u8 **nexthdr) 389 u8 **nexthdr)
384{ 390{
385 u16 offset = sizeof(struct ipv6hdr); 391 u16 offset = sizeof(struct ipv6hdr);
386 struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1); 392 struct ipv6_opt_hdr *exthdr =
387 unsigned int packet_len = skb->tail - skb->nh.raw; 393 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
394 const unsigned char *nh = skb_network_header(skb);
395 unsigned int packet_len = skb->tail - skb->network_header;
388 int found_rhdr = 0; 396 int found_rhdr = 0;
389 397
390 *nexthdr = &skb->nh.ipv6h->nexthdr; 398 *nexthdr = &ipv6_hdr(skb)->nexthdr;
391 399
392 while (offset + 1 <= packet_len) { 400 while (offset + 1 <= packet_len) {
393 401
@@ -397,7 +405,7 @@ static int mip6_rthdr_offset(struct xfrm_state *x, struct sk_buff *skb,
397 case NEXTHDR_ROUTING: 405 case NEXTHDR_ROUTING:
398 if (offset + 3 <= packet_len) { 406 if (offset + 3 <= packet_len) {
399 struct ipv6_rt_hdr *rt; 407 struct ipv6_rt_hdr *rt;
400 rt = (struct ipv6_rt_hdr *)(skb->nh.raw + offset); 408 rt = (struct ipv6_rt_hdr *)(nh + offset);
401 if (rt->type != 0) 409 if (rt->type != 0)
402 return offset; 410 return offset;
403 } 411 }
@@ -417,7 +425,7 @@ static int mip6_rthdr_offset(struct xfrm_state *x, struct sk_buff *skb,
417 425
418 offset += ipv6_optlen(exthdr); 426 offset += ipv6_optlen(exthdr);
419 *nexthdr = &exthdr->nexthdr; 427 *nexthdr = &exthdr->nexthdr;
420 exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset); 428 exthdr = (struct ipv6_opt_hdr *)(nh + offset);
421 } 429 }
422 430
423 return offset; 431 return offset;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 121f31c283f8..d8b36451bada 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -319,6 +319,8 @@ int ndisc_mc_map(struct in6_addr *addr, char *buf, struct net_device *dev, int d
319 return -EINVAL; 319 return -EINVAL;
320} 320}
321 321
322EXPORT_SYMBOL(ndisc_mc_map);
323
322static u32 ndisc_hash(const void *pkey, const struct net_device *dev) 324static u32 ndisc_hash(const void *pkey, const struct net_device *dev)
323{ 325{
324 const u32 *p32 = pkey; 326 const u32 *p32 = pkey;
@@ -425,36 +427,23 @@ static inline void ndisc_flow_init(struct flowi *fl, u8 type,
425 security_sk_classify_flow(ndisc_socket->sk, fl); 427 security_sk_classify_flow(ndisc_socket->sk, fl);
426} 428}
427 429
428static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, 430static void __ndisc_send(struct net_device *dev,
429 struct in6_addr *daddr, struct in6_addr *solicited_addr, 431 struct neighbour *neigh,
430 int router, int solicited, int override, int inc_opt) 432 struct in6_addr *daddr, struct in6_addr *saddr,
433 struct icmp6hdr *icmp6h, struct in6_addr *target,
434 int llinfo, int icmp6_mib_outnd)
431{ 435{
432 struct in6_addr tmpaddr;
433 struct inet6_ifaddr *ifp;
434 struct inet6_dev *idev;
435 struct flowi fl; 436 struct flowi fl;
436 struct dst_entry* dst; 437 struct dst_entry *dst;
437 struct sock *sk = ndisc_socket->sk; 438 struct sock *sk = ndisc_socket->sk;
438 struct in6_addr *src_addr;
439 struct nd_msg *msg;
440 int len;
441 struct sk_buff *skb; 439 struct sk_buff *skb;
440 struct icmp6hdr *hdr;
441 struct inet6_dev *idev;
442 int len;
442 int err; 443 int err;
444 u8 *opt;
443 445
444 len = sizeof(struct icmp6hdr) + sizeof(struct in6_addr); 446 ndisc_flow_init(&fl, icmp6h->icmp6_type, saddr, daddr,
445
446 /* for anycast or proxy, solicited_addr != src_addr */
447 ifp = ipv6_get_ifaddr(solicited_addr, dev, 1);
448 if (ifp) {
449 src_addr = solicited_addr;
450 in6_ifa_put(ifp);
451 } else {
452 if (ipv6_dev_get_saddr(dev, daddr, &tmpaddr))
453 return;
454 src_addr = &tmpaddr;
455 }
456
457 ndisc_flow_init(&fl, NDISC_NEIGHBOUR_ADVERTISEMENT, src_addr, daddr,
458 dev->ifindex); 447 dev->ifindex);
459 448
460 dst = ndisc_dst_alloc(dev, neigh, daddr, ip6_output); 449 dst = ndisc_dst_alloc(dev, neigh, daddr, ip6_output);
@@ -465,60 +454,57 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
465 if (err < 0) 454 if (err < 0)
466 return; 455 return;
467 456
468 if (inc_opt) { 457 if (!dev->addr_len)
469 if (dev->addr_len) 458 llinfo = 0;
470 len += ndisc_opt_addr_space(dev); 459
471 else 460 len = sizeof(struct icmp6hdr) + (target ? sizeof(*target) : 0);
472 inc_opt = 0; 461 if (llinfo)
473 } 462 len += ndisc_opt_addr_space(dev);
474 463
475 skb = sock_alloc_send_skb(sk, 464 skb = sock_alloc_send_skb(sk,
476 (MAX_HEADER + sizeof(struct ipv6hdr) + 465 (MAX_HEADER + sizeof(struct ipv6hdr) +
477 len + LL_RESERVED_SPACE(dev)), 466 len + LL_RESERVED_SPACE(dev)),
478 1, &err); 467 1, &err);
479 468 if (!skb) {
480 if (skb == NULL) {
481 ND_PRINTK0(KERN_ERR 469 ND_PRINTK0(KERN_ERR
482 "ICMPv6 NA: %s() failed to allocate an skb.\n", 470 "ICMPv6 ND: %s() failed to allocate an skb.\n",
483 __FUNCTION__); 471 __FUNCTION__);
484 dst_release(dst); 472 dst_release(dst);
485 return; 473 return;
486 } 474 }
487 475
488 skb_reserve(skb, LL_RESERVED_SPACE(dev)); 476 skb_reserve(skb, LL_RESERVED_SPACE(dev));
489 ip6_nd_hdr(sk, skb, dev, src_addr, daddr, IPPROTO_ICMPV6, len); 477 ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len);
490
491 msg = (struct nd_msg *)skb_put(skb, len);
492 skb->h.raw = (unsigned char*)msg;
493 478
494 msg->icmph.icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT; 479 skb->transport_header = skb->tail;
495 msg->icmph.icmp6_code = 0; 480 skb_put(skb, len);
496 msg->icmph.icmp6_cksum = 0;
497 481
498 msg->icmph.icmp6_unused = 0; 482 hdr = (struct icmp6hdr *)skb_transport_header(skb);
499 msg->icmph.icmp6_router = router; 483 memcpy(hdr, icmp6h, sizeof(*hdr));
500 msg->icmph.icmp6_solicited = solicited;
501 msg->icmph.icmp6_override = override;
502 484
503 /* Set the target address. */ 485 opt = skb_transport_header(skb) + sizeof(struct icmp6hdr);
504 ipv6_addr_copy(&msg->target, solicited_addr); 486 if (target) {
487 ipv6_addr_copy((struct in6_addr *)opt, target);
488 opt += sizeof(*target);
489 }
505 490
506 if (inc_opt) 491 if (llinfo)
507 ndisc_fill_addr_option(msg->opt, ND_OPT_TARGET_LL_ADDR, dev->dev_addr, 492 ndisc_fill_addr_option(opt, llinfo, dev->dev_addr,
508 dev->addr_len, dev->type); 493 dev->addr_len, dev->type);
509 494
510 /* checksum */ 495 hdr->icmp6_cksum = csum_ipv6_magic(saddr, daddr, len,
511 msg->icmph.icmp6_cksum = csum_ipv6_magic(src_addr, daddr, len, 496 IPPROTO_ICMPV6,
512 IPPROTO_ICMPV6, 497 csum_partial((__u8 *) hdr,
513 csum_partial((__u8 *) msg, 498 len, 0));
514 len, 0));
515 499
516 skb->dst = dst; 500 skb->dst = dst;
501
517 idev = in6_dev_get(dst->dev); 502 idev = in6_dev_get(dst->dev);
518 IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS); 503 IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS);
504
519 err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, dst_output); 505 err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, dst_output);
520 if (!err) { 506 if (!err) {
521 ICMP6_INC_STATS(idev, ICMP6_MIB_OUTNEIGHBORADVERTISEMENTS); 507 ICMP6_INC_STATS(idev, icmp6_mib_outnd);
522 ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS); 508 ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS);
523 } 509 }
524 510
@@ -526,165 +512,95 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
526 in6_dev_put(idev); 512 in6_dev_put(idev);
527} 513}
528 514
515static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
516 struct in6_addr *daddr, struct in6_addr *solicited_addr,
517 int router, int solicited, int override, int inc_opt)
518{
519 struct in6_addr tmpaddr;
520 struct inet6_ifaddr *ifp;
521 struct in6_addr *src_addr;
522 struct icmp6hdr icmp6h = {
523 .icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT,
524 };
525
526 /* for anycast or proxy, solicited_addr != src_addr */
527 ifp = ipv6_get_ifaddr(solicited_addr, dev, 1);
528 if (ifp) {
529 src_addr = solicited_addr;
530 if (ifp->flags & IFA_F_OPTIMISTIC)
531 override = 0;
532 in6_ifa_put(ifp);
533 } else {
534 if (ipv6_dev_get_saddr(dev, daddr, &tmpaddr))
535 return;
536 src_addr = &tmpaddr;
537 }
538
539 icmp6h.icmp6_router = router;
540 icmp6h.icmp6_solicited = solicited;
541 icmp6h.icmp6_override = override;
542
543 __ndisc_send(dev, neigh, daddr, src_addr,
544 &icmp6h, solicited_addr,
545 inc_opt ? ND_OPT_TARGET_LL_ADDR : 0,
546 ICMP6_MIB_OUTNEIGHBORADVERTISEMENTS);
547}
548
529void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh, 549void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
530 struct in6_addr *solicit, 550 struct in6_addr *solicit,
531 struct in6_addr *daddr, struct in6_addr *saddr) 551 struct in6_addr *daddr, struct in6_addr *saddr)
532{ 552{
533 struct flowi fl;
534 struct dst_entry* dst;
535 struct inet6_dev *idev;
536 struct sock *sk = ndisc_socket->sk;
537 struct sk_buff *skb;
538 struct nd_msg *msg;
539 struct in6_addr addr_buf; 553 struct in6_addr addr_buf;
540 int len; 554 struct icmp6hdr icmp6h = {
541 int err; 555 .icmp6_type = NDISC_NEIGHBOUR_SOLICITATION,
542 int send_llinfo; 556 };
543 557
544 if (saddr == NULL) { 558 if (saddr == NULL) {
545 if (ipv6_get_lladdr(dev, &addr_buf)) 559 if (ipv6_get_lladdr(dev, &addr_buf,
560 (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)))
546 return; 561 return;
547 saddr = &addr_buf; 562 saddr = &addr_buf;
548 } 563 }
549 564
550 ndisc_flow_init(&fl, NDISC_NEIGHBOUR_SOLICITATION, saddr, daddr, 565 __ndisc_send(dev, neigh, daddr, saddr,
551 dev->ifindex); 566 &icmp6h, solicit,
552 567 !ipv6_addr_any(saddr) ? ND_OPT_SOURCE_LL_ADDR : 0,
553 dst = ndisc_dst_alloc(dev, neigh, daddr, ip6_output); 568 ICMP6_MIB_OUTNEIGHBORSOLICITS);
554 if (!dst)
555 return;
556
557 err = xfrm_lookup(&dst, &fl, NULL, 0);
558 if (err < 0)
559 return;
560
561 len = sizeof(struct icmp6hdr) + sizeof(struct in6_addr);
562 send_llinfo = dev->addr_len && !ipv6_addr_any(saddr);
563 if (send_llinfo)
564 len += ndisc_opt_addr_space(dev);
565
566 skb = sock_alloc_send_skb(sk,
567 (MAX_HEADER + sizeof(struct ipv6hdr) +
568 len + LL_RESERVED_SPACE(dev)),
569 1, &err);
570 if (skb == NULL) {
571 ND_PRINTK0(KERN_ERR
572 "ICMPv6 NA: %s() failed to allocate an skb.\n",
573 __FUNCTION__);
574 dst_release(dst);
575 return;
576 }
577
578 skb_reserve(skb, LL_RESERVED_SPACE(dev));
579 ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len);
580
581 msg = (struct nd_msg *)skb_put(skb, len);
582 skb->h.raw = (unsigned char*)msg;
583 msg->icmph.icmp6_type = NDISC_NEIGHBOUR_SOLICITATION;
584 msg->icmph.icmp6_code = 0;
585 msg->icmph.icmp6_cksum = 0;
586 msg->icmph.icmp6_unused = 0;
587
588 /* Set the target address. */
589 ipv6_addr_copy(&msg->target, solicit);
590
591 if (send_llinfo)
592 ndisc_fill_addr_option(msg->opt, ND_OPT_SOURCE_LL_ADDR, dev->dev_addr,
593 dev->addr_len, dev->type);
594
595 /* checksum */
596 msg->icmph.icmp6_cksum = csum_ipv6_magic(&skb->nh.ipv6h->saddr,
597 daddr, len,
598 IPPROTO_ICMPV6,
599 csum_partial((__u8 *) msg,
600 len, 0));
601 /* send it! */
602 skb->dst = dst;
603 idev = in6_dev_get(dst->dev);
604 IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS);
605 err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, dst_output);
606 if (!err) {
607 ICMP6_INC_STATS(idev, ICMP6_MIB_OUTNEIGHBORSOLICITS);
608 ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS);
609 }
610
611 if (likely(idev != NULL))
612 in6_dev_put(idev);
613} 569}
614 570
615void ndisc_send_rs(struct net_device *dev, struct in6_addr *saddr, 571void ndisc_send_rs(struct net_device *dev, struct in6_addr *saddr,
616 struct in6_addr *daddr) 572 struct in6_addr *daddr)
617{ 573{
618 struct flowi fl; 574 struct icmp6hdr icmp6h = {
619 struct dst_entry* dst; 575 .icmp6_type = NDISC_ROUTER_SOLICITATION,
620 struct inet6_dev *idev; 576 };
621 struct sock *sk = ndisc_socket->sk; 577 int send_sllao = dev->addr_len;
622 struct sk_buff *skb;
623 struct icmp6hdr *hdr;
624 __u8 * opt;
625 int len;
626 int err;
627
628 ndisc_flow_init(&fl, NDISC_ROUTER_SOLICITATION, saddr, daddr,
629 dev->ifindex);
630
631 dst = ndisc_dst_alloc(dev, NULL, daddr, ip6_output);
632 if (!dst)
633 return;
634
635 err = xfrm_lookup(&dst, &fl, NULL, 0);
636 if (err < 0)
637 return;
638
639 len = sizeof(struct icmp6hdr);
640 if (dev->addr_len)
641 len += ndisc_opt_addr_space(dev);
642
643 skb = sock_alloc_send_skb(sk,
644 (MAX_HEADER + sizeof(struct ipv6hdr) +
645 len + LL_RESERVED_SPACE(dev)),
646 1, &err);
647 if (skb == NULL) {
648 ND_PRINTK0(KERN_ERR
649 "ICMPv6 RS: %s() failed to allocate an skb.\n",
650 __FUNCTION__);
651 dst_release(dst);
652 return;
653 }
654
655 skb_reserve(skb, LL_RESERVED_SPACE(dev));
656 ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len);
657
658 hdr = (struct icmp6hdr *)skb_put(skb, len);
659 skb->h.raw = (unsigned char*)hdr;
660 hdr->icmp6_type = NDISC_ROUTER_SOLICITATION;
661 hdr->icmp6_code = 0;
662 hdr->icmp6_cksum = 0;
663 hdr->icmp6_unused = 0;
664
665 opt = (u8*) (hdr + 1);
666
667 if (dev->addr_len)
668 ndisc_fill_addr_option(opt, ND_OPT_SOURCE_LL_ADDR, dev->dev_addr,
669 dev->addr_len, dev->type);
670
671 /* checksum */
672 hdr->icmp6_cksum = csum_ipv6_magic(&skb->nh.ipv6h->saddr, daddr, len,
673 IPPROTO_ICMPV6,
674 csum_partial((__u8 *) hdr, len, 0));
675 578
676 /* send it! */ 579#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
677 skb->dst = dst; 580 /*
678 idev = in6_dev_get(dst->dev); 581 * According to section 2.2 of RFC 4429, we must not
679 IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS); 582 * send router solicitations with a sllao from
680 err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, dst_output); 583 * optimistic addresses, but we may send the solicitation
681 if (!err) { 584 * if we don't include the sllao. So here we check
682 ICMP6_INC_STATS(idev, ICMP6_MIB_OUTROUTERSOLICITS); 585 * if our address is optimistic, and if so, we
683 ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS); 586 * supress the inclusion of the sllao.
587 */
588 if (send_sllao) {
589 struct inet6_ifaddr *ifp = ipv6_get_ifaddr(saddr, dev, 1);
590 if (ifp) {
591 if (ifp->flags & IFA_F_OPTIMISTIC) {
592 send_sllao = 0;
593 }
594 in6_ifa_put(ifp);
595 } else {
596 send_sllao = 0;
597 }
684 } 598 }
685 599#endif
686 if (likely(idev != NULL)) 600 __ndisc_send(dev, NULL, daddr, saddr,
687 in6_dev_put(idev); 601 &icmp6h, NULL,
602 send_sllao ? ND_OPT_SOURCE_LL_ADDR : 0,
603 ICMP6_MIB_OUTROUTERSOLICITS);
688} 604}
689 605
690 606
@@ -708,8 +624,8 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
708 struct in6_addr *target = (struct in6_addr *)&neigh->primary_key; 624 struct in6_addr *target = (struct in6_addr *)&neigh->primary_key;
709 int probes = atomic_read(&neigh->probes); 625 int probes = atomic_read(&neigh->probes);
710 626
711 if (skb && ipv6_chk_addr(&skb->nh.ipv6h->saddr, dev, 1)) 627 if (skb && ipv6_chk_addr(&ipv6_hdr(skb)->saddr, dev, 1))
712 saddr = &skb->nh.ipv6h->saddr; 628 saddr = &ipv6_hdr(skb)->saddr;
713 629
714 if ((probes -= neigh->parms->ucast_probes) < 0) { 630 if ((probes -= neigh->parms->ucast_probes) < 0) {
715 if (!(neigh->nud_state & NUD_VALID)) { 631 if (!(neigh->nud_state & NUD_VALID)) {
@@ -732,11 +648,12 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
732 648
733static void ndisc_recv_ns(struct sk_buff *skb) 649static void ndisc_recv_ns(struct sk_buff *skb)
734{ 650{
735 struct nd_msg *msg = (struct nd_msg *)skb->h.raw; 651 struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
736 struct in6_addr *saddr = &skb->nh.ipv6h->saddr; 652 struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
737 struct in6_addr *daddr = &skb->nh.ipv6h->daddr; 653 struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
738 u8 *lladdr = NULL; 654 u8 *lladdr = NULL;
739 u32 ndoptlen = skb->tail - msg->opt; 655 u32 ndoptlen = skb->tail - (skb->transport_header +
656 offsetof(struct nd_msg, opt));
740 struct ndisc_options ndopts; 657 struct ndisc_options ndopts;
741 struct net_device *dev = skb->dev; 658 struct net_device *dev = skb->dev;
742 struct inet6_ifaddr *ifp; 659 struct inet6_ifaddr *ifp;
@@ -796,28 +713,40 @@ static void ndisc_recv_ns(struct sk_buff *skb)
796 inc = ipv6_addr_is_multicast(daddr); 713 inc = ipv6_addr_is_multicast(daddr);
797 714
798 if ((ifp = ipv6_get_ifaddr(&msg->target, dev, 1)) != NULL) { 715 if ((ifp = ipv6_get_ifaddr(&msg->target, dev, 1)) != NULL) {
799 if (ifp->flags & IFA_F_TENTATIVE) { 716
800 /* Address is tentative. If the source 717 if (ifp->flags & (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) {
801 is unspecified address, it is someone 718 if (dad) {
802 does DAD, otherwise we ignore solicitations 719 if (dev->type == ARPHRD_IEEE802_TR) {
803 until DAD timer expires. 720 const unsigned char *sadr;
804 */ 721 sadr = skb_mac_header(skb);
805 if (!dad) 722 if (((sadr[8] ^ dev->dev_addr[0]) & 0x7f) == 0 &&
723 sadr[9] == dev->dev_addr[1] &&
724 sadr[10] == dev->dev_addr[2] &&
725 sadr[11] == dev->dev_addr[3] &&
726 sadr[12] == dev->dev_addr[4] &&
727 sadr[13] == dev->dev_addr[5]) {
728 /* looped-back to us */
729 goto out;
730 }
731 }
732
733 /*
734 * We are colliding with another node
735 * who is doing DAD
736 * so fail our DAD process
737 */
738 addrconf_dad_failure(ifp);
806 goto out; 739 goto out;
807 if (dev->type == ARPHRD_IEEE802_TR) { 740 } else {
808 unsigned char *sadr = skb->mac.raw; 741 /*
809 if (((sadr[8] ^ dev->dev_addr[0]) & 0x7f) == 0 && 742 * This is not a dad solicitation.
810 sadr[9] == dev->dev_addr[1] && 743 * If we are an optimistic node,
811 sadr[10] == dev->dev_addr[2] && 744 * we should respond.
812 sadr[11] == dev->dev_addr[3] && 745 * Otherwise, we should ignore it.
813 sadr[12] == dev->dev_addr[4] && 746 */
814 sadr[13] == dev->dev_addr[5]) { 747 if (!(ifp->flags & IFA_F_OPTIMISTIC))
815 /* looped-back to us */
816 goto out; 748 goto out;
817 }
818 } 749 }
819 addrconf_dad_failure(ifp);
820 return;
821 } 750 }
822 751
823 idev = ifp->idev; 752 idev = ifp->idev;
@@ -898,11 +827,12 @@ out:
898 827
899static void ndisc_recv_na(struct sk_buff *skb) 828static void ndisc_recv_na(struct sk_buff *skb)
900{ 829{
901 struct nd_msg *msg = (struct nd_msg *)skb->h.raw; 830 struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
902 struct in6_addr *saddr = &skb->nh.ipv6h->saddr; 831 struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
903 struct in6_addr *daddr = &skb->nh.ipv6h->daddr; 832 struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
904 u8 *lladdr = NULL; 833 u8 *lladdr = NULL;
905 u32 ndoptlen = skb->tail - msg->opt; 834 u32 ndoptlen = skb->tail - (skb->transport_header +
835 offsetof(struct nd_msg, opt));
906 struct ndisc_options ndopts; 836 struct ndisc_options ndopts;
907 struct net_device *dev = skb->dev; 837 struct net_device *dev = skb->dev;
908 struct inet6_ifaddr *ifp; 838 struct inet6_ifaddr *ifp;
@@ -1000,11 +930,11 @@ out:
1000 930
1001static void ndisc_recv_rs(struct sk_buff *skb) 931static void ndisc_recv_rs(struct sk_buff *skb)
1002{ 932{
1003 struct rs_msg *rs_msg = (struct rs_msg *) skb->h.raw; 933 struct rs_msg *rs_msg = (struct rs_msg *)skb_transport_header(skb);
1004 unsigned long ndoptlen = skb->len - sizeof(*rs_msg); 934 unsigned long ndoptlen = skb->len - sizeof(*rs_msg);
1005 struct neighbour *neigh; 935 struct neighbour *neigh;
1006 struct inet6_dev *idev; 936 struct inet6_dev *idev;
1007 struct in6_addr *saddr = &skb->nh.ipv6h->saddr; 937 struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
1008 struct ndisc_options ndopts; 938 struct ndisc_options ndopts;
1009 u8 *lladdr = NULL; 939 u8 *lladdr = NULL;
1010 940
@@ -1057,7 +987,7 @@ out:
1057 987
1058static void ndisc_router_discovery(struct sk_buff *skb) 988static void ndisc_router_discovery(struct sk_buff *skb)
1059{ 989{
1060 struct ra_msg *ra_msg = (struct ra_msg *) skb->h.raw; 990 struct ra_msg *ra_msg = (struct ra_msg *)skb_transport_header(skb);
1061 struct neighbour *neigh = NULL; 991 struct neighbour *neigh = NULL;
1062 struct inet6_dev *in6_dev; 992 struct inet6_dev *in6_dev;
1063 struct rt6_info *rt = NULL; 993 struct rt6_info *rt = NULL;
@@ -1068,9 +998,9 @@ static void ndisc_router_discovery(struct sk_buff *skb)
1068 998
1069 __u8 * opt = (__u8 *)(ra_msg + 1); 999 __u8 * opt = (__u8 *)(ra_msg + 1);
1070 1000
1071 optlen = (skb->tail - skb->h.raw) - sizeof(struct ra_msg); 1001 optlen = (skb->tail - skb->transport_header) - sizeof(struct ra_msg);
1072 1002
1073 if (!(ipv6_addr_type(&skb->nh.ipv6h->saddr) & IPV6_ADDR_LINKLOCAL)) { 1003 if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
1074 ND_PRINTK2(KERN_WARNING 1004 ND_PRINTK2(KERN_WARNING
1075 "ICMPv6 RA: source address is not link-local.\n"); 1005 "ICMPv6 RA: source address is not link-local.\n");
1076 return; 1006 return;
@@ -1136,7 +1066,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
1136 pref = ICMPV6_ROUTER_PREF_MEDIUM; 1066 pref = ICMPV6_ROUTER_PREF_MEDIUM;
1137#endif 1067#endif
1138 1068
1139 rt = rt6_get_dflt_router(&skb->nh.ipv6h->saddr, skb->dev); 1069 rt = rt6_get_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev);
1140 1070
1141 if (rt) 1071 if (rt)
1142 neigh = rt->rt6i_nexthop; 1072 neigh = rt->rt6i_nexthop;
@@ -1151,7 +1081,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
1151 ND_PRINTK3(KERN_DEBUG 1081 ND_PRINTK3(KERN_DEBUG
1152 "ICMPv6 RA: adding default router.\n"); 1082 "ICMPv6 RA: adding default router.\n");
1153 1083
1154 rt = rt6_add_dflt_router(&skb->nh.ipv6h->saddr, skb->dev, pref); 1084 rt = rt6_add_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev, pref);
1155 if (rt == NULL) { 1085 if (rt == NULL) {
1156 ND_PRINTK0(KERN_ERR 1086 ND_PRINTK0(KERN_ERR
1157 "ICMPv6 RA: %s() failed to add default route.\n", 1087 "ICMPv6 RA: %s() failed to add default route.\n",
@@ -1223,7 +1153,7 @@ skip_defrtr:
1223 */ 1153 */
1224 1154
1225 if (!neigh) 1155 if (!neigh)
1226 neigh = __neigh_lookup(&nd_tbl, &skb->nh.ipv6h->saddr, 1156 neigh = __neigh_lookup(&nd_tbl, &ipv6_hdr(skb)->saddr,
1227 skb->dev, 1); 1157 skb->dev, 1);
1228 if (neigh) { 1158 if (neigh) {
1229 u8 *lladdr = NULL; 1159 u8 *lladdr = NULL;
@@ -1252,7 +1182,7 @@ skip_defrtr:
1252 if (((struct route_info *)p)->prefix_len > in6_dev->cnf.accept_ra_rt_info_max_plen) 1182 if (((struct route_info *)p)->prefix_len > in6_dev->cnf.accept_ra_rt_info_max_plen)
1253 continue; 1183 continue;
1254 rt6_route_rcv(skb->dev, (u8*)p, (p->nd_opt_len) << 3, 1184 rt6_route_rcv(skb->dev, (u8*)p, (p->nd_opt_len) << 3,
1255 &skb->nh.ipv6h->saddr); 1185 &ipv6_hdr(skb)->saddr);
1256 } 1186 }
1257 } 1187 }
1258#endif 1188#endif
@@ -1311,13 +1241,13 @@ static void ndisc_redirect_rcv(struct sk_buff *skb)
1311 int optlen; 1241 int optlen;
1312 u8 *lladdr = NULL; 1242 u8 *lladdr = NULL;
1313 1243
1314 if (!(ipv6_addr_type(&skb->nh.ipv6h->saddr) & IPV6_ADDR_LINKLOCAL)) { 1244 if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
1315 ND_PRINTK2(KERN_WARNING 1245 ND_PRINTK2(KERN_WARNING
1316 "ICMPv6 Redirect: source address is not link-local.\n"); 1246 "ICMPv6 Redirect: source address is not link-local.\n");
1317 return; 1247 return;
1318 } 1248 }
1319 1249
1320 optlen = skb->tail - skb->h.raw; 1250 optlen = skb->tail - skb->transport_header;
1321 optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr); 1251 optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
1322 1252
1323 if (optlen < 0) { 1253 if (optlen < 0) {
@@ -1326,7 +1256,7 @@ static void ndisc_redirect_rcv(struct sk_buff *skb)
1326 return; 1256 return;
1327 } 1257 }
1328 1258
1329 icmph = (struct icmp6hdr *) skb->h.raw; 1259 icmph = icmp6_hdr(skb);
1330 target = (struct in6_addr *) (icmph + 1); 1260 target = (struct in6_addr *) (icmph + 1);
1331 dest = target + 1; 1261 dest = target + 1;
1332 1262
@@ -1376,8 +1306,8 @@ static void ndisc_redirect_rcv(struct sk_buff *skb)
1376 1306
1377 neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1); 1307 neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
1378 if (neigh) { 1308 if (neigh) {
1379 rt6_redirect(dest, &skb->nh.ipv6h->daddr, 1309 rt6_redirect(dest, &ipv6_hdr(skb)->daddr,
1380 &skb->nh.ipv6h->saddr, neigh, lladdr, 1310 &ipv6_hdr(skb)->saddr, neigh, lladdr,
1381 on_link); 1311 on_link);
1382 neigh_release(neigh); 1312 neigh_release(neigh);
1383 } 1313 }
@@ -1406,21 +1336,21 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
1406 1336
1407 dev = skb->dev; 1337 dev = skb->dev;
1408 1338
1409 if (ipv6_get_lladdr(dev, &saddr_buf)) { 1339 if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) {
1410 ND_PRINTK2(KERN_WARNING 1340 ND_PRINTK2(KERN_WARNING
1411 "ICMPv6 Redirect: no link-local address on %s\n", 1341 "ICMPv6 Redirect: no link-local address on %s\n",
1412 dev->name); 1342 dev->name);
1413 return; 1343 return;
1414 } 1344 }
1415 1345
1416 if (!ipv6_addr_equal(&skb->nh.ipv6h->daddr, target) && 1346 if (!ipv6_addr_equal(&ipv6_hdr(skb)->daddr, target) &&
1417 !(ipv6_addr_type(target) & IPV6_ADDR_LINKLOCAL)) { 1347 !(ipv6_addr_type(target) & IPV6_ADDR_LINKLOCAL)) {
1418 ND_PRINTK2(KERN_WARNING 1348 ND_PRINTK2(KERN_WARNING
1419 "ICMPv6 Redirect: target address is not link-local.\n"); 1349 "ICMPv6 Redirect: target address is not link-local.\n");
1420 return; 1350 return;
1421 } 1351 }
1422 1352
1423 ndisc_flow_init(&fl, NDISC_REDIRECT, &saddr_buf, &skb->nh.ipv6h->saddr, 1353 ndisc_flow_init(&fl, NDISC_REDIRECT, &saddr_buf, &ipv6_hdr(skb)->saddr,
1424 dev->ifindex); 1354 dev->ifindex);
1425 1355
1426 dst = ip6_route_output(NULL, &fl); 1356 dst = ip6_route_output(NULL, &fl);
@@ -1475,11 +1405,12 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
1475 hlen = 0; 1405 hlen = 0;
1476 1406
1477 skb_reserve(buff, LL_RESERVED_SPACE(dev)); 1407 skb_reserve(buff, LL_RESERVED_SPACE(dev));
1478 ip6_nd_hdr(sk, buff, dev, &saddr_buf, &skb->nh.ipv6h->saddr, 1408 ip6_nd_hdr(sk, buff, dev, &saddr_buf, &ipv6_hdr(skb)->saddr,
1479 IPPROTO_ICMPV6, len); 1409 IPPROTO_ICMPV6, len);
1480 1410
1481 icmph = (struct icmp6hdr *)skb_put(buff, len); 1411 skb_set_transport_header(buff, skb_tail_pointer(buff) - buff->data);
1482 buff->h.raw = (unsigned char*)icmph; 1412 skb_put(buff, len);
1413 icmph = icmp6_hdr(buff);
1483 1414
1484 memset(icmph, 0, sizeof(struct icmp6hdr)); 1415 memset(icmph, 0, sizeof(struct icmp6hdr));
1485 icmph->icmp6_type = NDISC_REDIRECT; 1416 icmph->icmp6_type = NDISC_REDIRECT;
@@ -1491,7 +1422,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
1491 addrp = (struct in6_addr *)(icmph + 1); 1422 addrp = (struct in6_addr *)(icmph + 1);
1492 ipv6_addr_copy(addrp, target); 1423 ipv6_addr_copy(addrp, target);
1493 addrp++; 1424 addrp++;
1494 ipv6_addr_copy(addrp, &skb->nh.ipv6h->daddr); 1425 ipv6_addr_copy(addrp, &ipv6_hdr(skb)->daddr);
1495 1426
1496 opt = (u8*) (addrp + 1); 1427 opt = (u8*) (addrp + 1);
1497 1428
@@ -1512,9 +1443,9 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
1512 *(opt++) = (rd_len >> 3); 1443 *(opt++) = (rd_len >> 3);
1513 opt += 6; 1444 opt += 6;
1514 1445
1515 memcpy(opt, skb->nh.ipv6h, rd_len - 8); 1446 memcpy(opt, ipv6_hdr(skb), rd_len - 8);
1516 1447
1517 icmph->icmp6_cksum = csum_ipv6_magic(&saddr_buf, &skb->nh.ipv6h->saddr, 1448 icmph->icmp6_cksum = csum_ipv6_magic(&saddr_buf, &ipv6_hdr(skb)->saddr,
1518 len, IPPROTO_ICMPV6, 1449 len, IPPROTO_ICMPV6,
1519 csum_partial((u8 *) icmph, len, 0)); 1450 csum_partial((u8 *) icmph, len, 0));
1520 1451
@@ -1544,14 +1475,14 @@ int ndisc_rcv(struct sk_buff *skb)
1544 if (!pskb_may_pull(skb, skb->len)) 1475 if (!pskb_may_pull(skb, skb->len))
1545 return 0; 1476 return 0;
1546 1477
1547 msg = (struct nd_msg *) skb->h.raw; 1478 msg = (struct nd_msg *)skb_transport_header(skb);
1548 1479
1549 __skb_push(skb, skb->data-skb->h.raw); 1480 __skb_push(skb, skb->data - skb_transport_header(skb));
1550 1481
1551 if (skb->nh.ipv6h->hop_limit != 255) { 1482 if (ipv6_hdr(skb)->hop_limit != 255) {
1552 ND_PRINTK2(KERN_WARNING 1483 ND_PRINTK2(KERN_WARNING
1553 "ICMPv6 NDISC: invalid hop-limit: %d\n", 1484 "ICMPv6 NDISC: invalid hop-limit: %d\n",
1554 skb->nh.ipv6h->hop_limit); 1485 ipv6_hdr(skb)->hop_limit);
1555 return 0; 1486 return 0;
1556 } 1487 }
1557 1488
@@ -1584,7 +1515,7 @@ int ndisc_rcv(struct sk_buff *skb)
1584 case NDISC_REDIRECT: 1515 case NDISC_REDIRECT:
1585 ndisc_redirect_rcv(skb); 1516 ndisc_redirect_rcv(skb);
1586 break; 1517 break;
1587 }; 1518 }
1588 1519
1589 return 0; 1520 return 0;
1590} 1521}
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 1c405dd30c67..38b149613915 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -11,7 +11,7 @@
11 11
12int ip6_route_me_harder(struct sk_buff *skb) 12int ip6_route_me_harder(struct sk_buff *skb)
13{ 13{
14 struct ipv6hdr *iph = skb->nh.ipv6h; 14 struct ipv6hdr *iph = ipv6_hdr(skb);
15 struct dst_entry *dst; 15 struct dst_entry *dst;
16 struct flowi fl = { 16 struct flowi fl = {
17 .oif = skb->sk ? skb->sk->sk_bound_dev_if : 0, 17 .oif = skb->sk ? skb->sk->sk_bound_dev_if : 0,
@@ -61,7 +61,7 @@ static void nf_ip6_saveroute(const struct sk_buff *skb, struct nf_info *info)
61 struct ip6_rt_info *rt_info = nf_info_reroute(info); 61 struct ip6_rt_info *rt_info = nf_info_reroute(info);
62 62
63 if (info->hook == NF_IP6_LOCAL_OUT) { 63 if (info->hook == NF_IP6_LOCAL_OUT) {
64 struct ipv6hdr *iph = skb->nh.ipv6h; 64 struct ipv6hdr *iph = ipv6_hdr(skb);
65 65
66 rt_info->daddr = iph->daddr; 66 rt_info->daddr = iph->daddr;
67 rt_info->saddr = iph->saddr; 67 rt_info->saddr = iph->saddr;
@@ -73,7 +73,7 @@ static int nf_ip6_reroute(struct sk_buff **pskb, const struct nf_info *info)
73 struct ip6_rt_info *rt_info = nf_info_reroute(info); 73 struct ip6_rt_info *rt_info = nf_info_reroute(info);
74 74
75 if (info->hook == NF_IP6_LOCAL_OUT) { 75 if (info->hook == NF_IP6_LOCAL_OUT) {
76 struct ipv6hdr *iph = (*pskb)->nh.ipv6h; 76 struct ipv6hdr *iph = ipv6_hdr(*pskb);
77 if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) || 77 if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) ||
78 !ipv6_addr_equal(&iph->saddr, &rt_info->saddr)) 78 !ipv6_addr_equal(&iph->saddr, &rt_info->saddr))
79 return ip6_route_me_harder(*pskb); 79 return ip6_route_me_harder(*pskb);
@@ -84,7 +84,7 @@ static int nf_ip6_reroute(struct sk_buff **pskb, const struct nf_info *info)
84__sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook, 84__sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook,
85 unsigned int dataoff, u_int8_t protocol) 85 unsigned int dataoff, u_int8_t protocol)
86{ 86{
87 struct ipv6hdr *ip6h = skb->nh.ipv6h; 87 struct ipv6hdr *ip6h = ipv6_hdr(skb);
88 __sum16 csum = 0; 88 __sum16 csum = 0;
89 89
90 switch (skb->ip_summed) { 90 switch (skb->ip_summed) {
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index fdb30a5916e5..0004db38af6d 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -11,18 +11,6 @@
11 * This program is free software; you can redistribute it and/or modify 11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License version 2 as 12 * it under the terms of the GNU General Public License version 2 as
13 * published by the Free Software Foundation. 13 * published by the Free Software Foundation.
14 *
15 * 2001-11-06: First try. Working with ip_queue.c for IPv4 and trying
16 * to adapt it to IPv6
17 * HEAVILY based in ipqueue.c by James Morris. It's just
18 * a little modified version of it, so he's nearly the
19 * real coder of this.
20 * Few changes needed, mainly the hard_routing code and
21 * the netlink socket protocol (we're NETLINK_IP6_FW).
22 * 2002-06-25: Code cleanup. [JM: ported cleanup over from ip_queue.c]
23 * 2005-02-04: Added /proc counter for dropped packets; fixed so
24 * packets aren't delivered to user space if they're going
25 * to be dropped.
26 */ 14 */
27#include <linux/module.h> 15#include <linux/module.h>
28#include <linux/skbuff.h> 16#include <linux/skbuff.h>
@@ -189,12 +177,13 @@ ipq_flush(int verdict)
189static struct sk_buff * 177static struct sk_buff *
190ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) 178ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
191{ 179{
192 unsigned char *old_tail; 180 sk_buff_data_t old_tail;
193 size_t size = 0; 181 size_t size = 0;
194 size_t data_len = 0; 182 size_t data_len = 0;
195 struct sk_buff *skb; 183 struct sk_buff *skb;
196 struct ipq_packet_msg *pmsg; 184 struct ipq_packet_msg *pmsg;
197 struct nlmsghdr *nlh; 185 struct nlmsghdr *nlh;
186 struct timeval tv;
198 187
199 read_lock_bh(&queue_lock); 188 read_lock_bh(&queue_lock);
200 189
@@ -232,15 +221,16 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
232 if (!skb) 221 if (!skb)
233 goto nlmsg_failure; 222 goto nlmsg_failure;
234 223
235 old_tail= skb->tail; 224 old_tail = skb->tail;
236 nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh)); 225 nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh));
237 pmsg = NLMSG_DATA(nlh); 226 pmsg = NLMSG_DATA(nlh);
238 memset(pmsg, 0, sizeof(*pmsg)); 227 memset(pmsg, 0, sizeof(*pmsg));
239 228
240 pmsg->packet_id = (unsigned long )entry; 229 pmsg->packet_id = (unsigned long )entry;
241 pmsg->data_len = data_len; 230 pmsg->data_len = data_len;
242 pmsg->timestamp_sec = entry->skb->tstamp.off_sec; 231 tv = ktime_to_timeval(entry->skb->tstamp);
243 pmsg->timestamp_usec = entry->skb->tstamp.off_usec; 232 pmsg->timestamp_sec = tv.tv_sec;
233 pmsg->timestamp_usec = tv.tv_usec;
244 pmsg->mark = entry->skb->mark; 234 pmsg->mark = entry->skb->mark;
245 pmsg->hook = entry->info->hook; 235 pmsg->hook = entry->info->hook;
246 pmsg->hw_protocol = entry->skb->protocol; 236 pmsg->hw_protocol = entry->skb->protocol;
@@ -376,7 +366,7 @@ ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct ipq_queue_entry *e)
376 } 366 }
377 if (!skb_make_writable(&e->skb, v->data_len)) 367 if (!skb_make_writable(&e->skb, v->data_len))
378 return -ENOMEM; 368 return -ENOMEM;
379 memcpy(e->skb->data, v->payload, v->data_len); 369 skb_copy_to_linear_data(e->skb, v->payload, v->data_len);
380 e->skb->ip_summed = CHECKSUM_NONE; 370 e->skb->ip_summed = CHECKSUM_NONE;
381 371
382 return 0; 372 return 0;
@@ -485,7 +475,7 @@ ipq_rcv_skb(struct sk_buff *skb)
485 if (skblen < sizeof(*nlh)) 475 if (skblen < sizeof(*nlh))
486 return; 476 return;
487 477
488 nlh = (struct nlmsghdr *)skb->data; 478 nlh = nlmsg_hdr(skb);
489 nlmsglen = nlh->nlmsg_len; 479 nlmsglen = nlh->nlmsg_len;
490 if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen) 480 if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen)
491 return; 481 return;
@@ -667,7 +657,7 @@ static int __init ip6_queue_init(void)
667 struct proc_dir_entry *proc; 657 struct proc_dir_entry *proc;
668 658
669 netlink_register_notifier(&ipq_nl_notifier); 659 netlink_register_notifier(&ipq_nl_notifier);
670 ipqnl = netlink_kernel_create(NETLINK_IP6_FW, 0, ipq_rcv_sk, 660 ipqnl = netlink_kernel_create(NETLINK_IP6_FW, 0, ipq_rcv_sk, NULL,
671 THIS_MODULE); 661 THIS_MODULE);
672 if (ipqnl == NULL) { 662 if (ipqnl == NULL) {
673 printk(KERN_ERR "ip6_queue: failed to create netlink socket\n"); 663 printk(KERN_ERR "ip6_queue: failed to create netlink socket\n");
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 7c512e13f956..9aa624026688 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -7,15 +7,6 @@
7 * This program is free software; you can redistribute it and/or modify 7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as 8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation. 9 * published by the Free Software Foundation.
10 *
11 * 19 Jan 2002 Harald Welte <laforge@gnumonks.org>
12 * - increase module usage count as soon as we have rules inside
13 * a table
14 * 06 Jun 2002 Andras Kis-Szabo <kisza@sch.bme.hu>
15 * - new extension header parser code
16 * 15 Oct 2005 Harald Welte <laforge@netfilter.org>
17 * - Unification of {ip,ip6}_tables into x_tables
18 * - Removed tcp and udp code, since it's not ipv6 specific
19 */ 10 */
20 11
21#include <linux/capability.h> 12#include <linux/capability.h>
@@ -115,7 +106,7 @@ ip6_packet_match(const struct sk_buff *skb,
115{ 106{
116 size_t i; 107 size_t i;
117 unsigned long ret; 108 unsigned long ret;
118 const struct ipv6hdr *ipv6 = skb->nh.ipv6h; 109 const struct ipv6hdr *ipv6 = ipv6_hdr(skb);
119 110
120#define FWINV(bool,invflg) ((bool) ^ !!(ip6info->invflags & invflg)) 111#define FWINV(bool,invflg) ((bool) ^ !!(ip6info->invflags & invflg))
121 112
@@ -301,7 +292,7 @@ ip6t_do_table(struct sk_buff **pskb,
301 goto no_match; 292 goto no_match;
302 293
303 ADD_COUNTER(e->counters, 294 ADD_COUNTER(e->counters,
304 ntohs((*pskb)->nh.ipv6h->payload_len) 295 ntohs(ipv6_hdr(*pskb)->payload_len)
305 + IPV6_HDR_LEN, 296 + IPV6_HDR_LEN,
306 1); 297 1);
307 298
@@ -1448,8 +1439,8 @@ static void __exit ip6_tables_fini(void)
1448int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, 1439int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
1449 int target, unsigned short *fragoff) 1440 int target, unsigned short *fragoff)
1450{ 1441{
1451 unsigned int start = (u8*)(skb->nh.ipv6h + 1) - skb->data; 1442 unsigned int start = skb_network_offset(skb) + sizeof(struct ipv6hdr);
1452 u8 nexthdr = skb->nh.ipv6h->nexthdr; 1443 u8 nexthdr = ipv6_hdr(skb)->nexthdr;
1453 unsigned int len = skb->len - start; 1444 unsigned int len = skb->len - start;
1454 1445
1455 if (fragoff) 1446 if (fragoff)
diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c
index ccbab66277e3..4115a576ba25 100644
--- a/net/ipv6/netfilter/ip6t_HL.c
+++ b/net/ipv6/netfilter/ip6t_HL.c
@@ -32,7 +32,7 @@ static unsigned int ip6t_hl_target(struct sk_buff **pskb,
32 if (!skb_make_writable(pskb, (*pskb)->len)) 32 if (!skb_make_writable(pskb, (*pskb)->len))
33 return NF_DROP; 33 return NF_DROP;
34 34
35 ip6h = (*pskb)->nh.ipv6h; 35 ip6h = ipv6_hdr(*pskb);
36 36
37 switch (info->mode) { 37 switch (info->mode) {
38 case IP6T_HL_SET: 38 case IP6T_HL_SET:
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index afaa039d0b7b..5bb9cd349350 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -396,8 +396,8 @@ ip6t_log_packet(unsigned int pf,
396 /* MAC logging for input chain only. */ 396 /* MAC logging for input chain only. */
397 printk("MAC="); 397 printk("MAC=");
398 if (skb->dev && (len = skb->dev->hard_header_len) && 398 if (skb->dev && (len = skb->dev->hard_header_len) &&
399 skb->mac.raw != skb->nh.raw) { 399 skb->mac_header != skb->network_header) {
400 unsigned char *p = skb->mac.raw; 400 const unsigned char *p = skb_mac_header(skb);
401 int i; 401 int i;
402 402
403 if (skb->dev->type == ARPHRD_SIT && 403 if (skb->dev->type == ARPHRD_SIT &&
@@ -412,7 +412,8 @@ ip6t_log_packet(unsigned int pf,
412 printk(" "); 412 printk(" ");
413 413
414 if (skb->dev->type == ARPHRD_SIT) { 414 if (skb->dev->type == ARPHRD_SIT) {
415 struct iphdr *iph = (struct iphdr *)skb->mac.raw; 415 const struct iphdr *iph =
416 (struct iphdr *)skb_mac_header(skb);
416 printk("TUNNEL=%u.%u.%u.%u->%u.%u.%u.%u ", 417 printk("TUNNEL=%u.%u.%u.%u->%u.%u.%u.%u ",
417 NIPQUAD(iph->saddr), 418 NIPQUAD(iph->saddr),
418 NIPQUAD(iph->daddr)); 419 NIPQUAD(iph->daddr));
@@ -421,7 +422,7 @@ ip6t_log_packet(unsigned int pf,
421 printk(" "); 422 printk(" ");
422 } 423 }
423 424
424 dump_packet(loginfo, skb, (u8*)skb->nh.ipv6h - skb->data, 1); 425 dump_packet(loginfo, skb, skb_network_offset(skb), 1);
425 printk("\n"); 426 printk("\n");
426 spin_unlock_bh(&log_lock); 427 spin_unlock_bh(&log_lock);
427} 428}
@@ -489,14 +490,10 @@ static int __init ip6t_log_init(void)
489 ret = xt_register_target(&ip6t_log_reg); 490 ret = xt_register_target(&ip6t_log_reg);
490 if (ret < 0) 491 if (ret < 0)
491 return ret; 492 return ret;
492 if (nf_log_register(PF_INET6, &ip6t_logger) < 0) { 493 ret = nf_log_register(PF_INET6, &ip6t_logger);
493 printk(KERN_WARNING "ip6t_LOG: not logging via system console " 494 if (ret < 0 && ret != -EEXIST)
494 "since somebody else already registered for PF_INET6\n"); 495 xt_unregister_target(&ip6t_log_reg);
495 /* we cannot make module load fail here, since otherwise 496 return ret;
496 * ip6tables userspace would abort */
497 }
498
499 return 0;
500} 497}
501 498
502static void __exit ip6t_log_fini(void) 499static void __exit ip6t_log_fini(void)
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 6abee94c929f..cb3d2415a064 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -47,7 +47,7 @@ static void send_reset(struct sk_buff *oldskb)
47 struct tcphdr otcph, *tcph; 47 struct tcphdr otcph, *tcph;
48 unsigned int otcplen, hh_len; 48 unsigned int otcplen, hh_len;
49 int tcphoff, needs_ack; 49 int tcphoff, needs_ack;
50 struct ipv6hdr *oip6h = oldskb->nh.ipv6h, *ip6h; 50 struct ipv6hdr *oip6h = ipv6_hdr(oldskb), *ip6h;
51 struct dst_entry *dst = NULL; 51 struct dst_entry *dst = NULL;
52 u8 proto; 52 u8 proto;
53 struct flowi fl; 53 struct flowi fl;
@@ -120,8 +120,9 @@ static void send_reset(struct sk_buff *oldskb)
120 120
121 skb_reserve(nskb, hh_len + dst->header_len); 121 skb_reserve(nskb, hh_len + dst->header_len);
122 122
123 ip6h = nskb->nh.ipv6h = (struct ipv6hdr *) 123 skb_put(nskb, sizeof(struct ipv6hdr));
124 skb_put(nskb, sizeof(struct ipv6hdr)); 124 skb_reset_network_header(nskb);
125 ip6h = ipv6_hdr(nskb);
125 ip6h->version = 6; 126 ip6h->version = 6;
126 ip6h->hop_limit = dst_metric(dst, RTAX_HOPLIMIT); 127 ip6h->hop_limit = dst_metric(dst, RTAX_HOPLIMIT);
127 ip6h->nexthdr = IPPROTO_TCP; 128 ip6h->nexthdr = IPPROTO_TCP;
@@ -155,8 +156,8 @@ static void send_reset(struct sk_buff *oldskb)
155 tcph->check = 0; 156 tcph->check = 0;
156 157
157 /* Adjust TCP checksum */ 158 /* Adjust TCP checksum */
158 tcph->check = csum_ipv6_magic(&nskb->nh.ipv6h->saddr, 159 tcph->check = csum_ipv6_magic(&ipv6_hdr(nskb)->saddr,
159 &nskb->nh.ipv6h->daddr, 160 &ipv6_hdr(nskb)->daddr,
160 sizeof(struct tcphdr), IPPROTO_TCP, 161 sizeof(struct tcphdr), IPPROTO_TCP,
161 csum_partial((char *)tcph, 162 csum_partial((char *)tcph,
162 sizeof(struct tcphdr), 0)); 163 sizeof(struct tcphdr), 0));
diff --git a/net/ipv6/netfilter/ip6t_eui64.c b/net/ipv6/netfilter/ip6t_eui64.c
index 967bed71d4a8..0f3dd932f0a6 100644
--- a/net/ipv6/netfilter/ip6t_eui64.c
+++ b/net/ipv6/netfilter/ip6t_eui64.c
@@ -32,8 +32,8 @@ match(const struct sk_buff *skb,
32 unsigned char eui64[8]; 32 unsigned char eui64[8];
33 int i = 0; 33 int i = 0;
34 34
35 if (!(skb->mac.raw >= skb->head && 35 if (!(skb_mac_header(skb) >= skb->head &&
36 (skb->mac.raw + ETH_HLEN) <= skb->data) && 36 (skb_mac_header(skb) + ETH_HLEN) <= skb->data) &&
37 offset != 0) { 37 offset != 0) {
38 *hotdrop = 1; 38 *hotdrop = 1;
39 return 0; 39 return 0;
@@ -42,7 +42,7 @@ match(const struct sk_buff *skb,
42 memset(eui64, 0, sizeof(eui64)); 42 memset(eui64, 0, sizeof(eui64));
43 43
44 if (eth_hdr(skb)->h_proto == htons(ETH_P_IPV6)) { 44 if (eth_hdr(skb)->h_proto == htons(ETH_P_IPV6)) {
45 if (skb->nh.ipv6h->version == 0x6) { 45 if (ipv6_hdr(skb)->version == 0x6) {
46 memcpy(eui64, eth_hdr(skb)->h_source, 3); 46 memcpy(eui64, eth_hdr(skb)->h_source, 3);
47 memcpy(eui64 + 5, eth_hdr(skb)->h_source + 3, 3); 47 memcpy(eui64 + 5, eth_hdr(skb)->h_source + 3, 3);
48 eui64[3] = 0xff; 48 eui64[3] = 0xff;
@@ -50,7 +50,7 @@ match(const struct sk_buff *skb,
50 eui64[0] |= 0x02; 50 eui64[0] |= 0x02;
51 51
52 i = 0; 52 i = 0;
53 while ((skb->nh.ipv6h->saddr.s6_addr[8+i] == eui64[i]) 53 while ((ipv6_hdr(skb)->saddr.s6_addr[8 + i] == eui64[i])
54 && (i < 8)) 54 && (i < 8))
55 i++; 55 i++;
56 56
diff --git a/net/ipv6/netfilter/ip6t_hl.c b/net/ipv6/netfilter/ip6t_hl.c
index 37c8a4d4ed78..d606c0e6d6fd 100644
--- a/net/ipv6/netfilter/ip6t_hl.c
+++ b/net/ipv6/netfilter/ip6t_hl.c
@@ -25,7 +25,7 @@ static int match(const struct sk_buff *skb,
25 int offset, unsigned int protoff, int *hotdrop) 25 int offset, unsigned int protoff, int *hotdrop)
26{ 26{
27 const struct ip6t_hl_info *info = matchinfo; 27 const struct ip6t_hl_info *info = matchinfo;
28 const struct ipv6hdr *ip6h = skb->nh.ipv6h; 28 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
29 29
30 switch (info->mode) { 30 switch (info->mode) {
31 case IP6T_HL_EQ: 31 case IP6T_HL_EQ:
diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c
index 700a11d25deb..fd6a0869099b 100644
--- a/net/ipv6/netfilter/ip6t_ipv6header.c
+++ b/net/ipv6/netfilter/ip6t_ipv6header.c
@@ -45,7 +45,7 @@ ipv6header_match(const struct sk_buff *skb,
45 /* Make sure this isn't an evil packet */ 45 /* Make sure this isn't an evil packet */
46 46
47 /* type of the 1st exthdr */ 47 /* type of the 1st exthdr */
48 nexthdr = skb->nh.ipv6h->nexthdr; 48 nexthdr = ipv6_hdr(skb)->nexthdr;
49 /* pointer to the 1st exthdr */ 49 /* pointer to the 1st exthdr */
50 ptr = sizeof(struct ipv6hdr); 50 ptr = sizeof(struct ipv6hdr);
51 /* available length */ 51 /* available length */
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 112a21d0c6da..76f0cf66f95c 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -102,7 +102,7 @@ ip6t_local_out_hook(unsigned int hook,
102#if 0 102#if 0
103 /* root is playing with raw sockets. */ 103 /* root is playing with raw sockets. */
104 if ((*pskb)->len < sizeof(struct iphdr) 104 if ((*pskb)->len < sizeof(struct iphdr)
105 || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) { 105 || ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
106 if (net_ratelimit()) 106 if (net_ratelimit())
107 printk("ip6t_hook: happy cracking.\n"); 107 printk("ip6t_hook: happy cracking.\n");
108 return NF_ACCEPT; 108 return NF_ACCEPT;
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index 0c468d35a937..a9f10e32c163 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -7,8 +7,6 @@
7 * This program is free software; you can redistribute it and/or modify 7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as 8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation. 9 * published by the Free Software Foundation.
10 *
11 * Extended to all five netfilter hooks by Brad Chapman & Harald Welte
12 */ 10 */
13#include <linux/module.h> 11#include <linux/module.h>
14#include <linux/netfilter_ipv6/ip6_tables.h> 12#include <linux/netfilter_ipv6/ip6_tables.h>
@@ -138,7 +136,7 @@ ip6t_local_hook(unsigned int hook,
138#if 0 136#if 0
139 /* root is playing with raw sockets. */ 137 /* root is playing with raw sockets. */
140 if ((*pskb)->len < sizeof(struct iphdr) 138 if ((*pskb)->len < sizeof(struct iphdr)
141 || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) { 139 || ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
142 if (net_ratelimit()) 140 if (net_ratelimit())
143 printk("ip6t_hook: happy cracking.\n"); 141 printk("ip6t_hook: happy cracking.\n");
144 return NF_ACCEPT; 142 return NF_ACCEPT;
@@ -146,21 +144,21 @@ ip6t_local_hook(unsigned int hook,
146#endif 144#endif
147 145
148 /* save source/dest address, mark, hoplimit, flowlabel, priority, */ 146 /* save source/dest address, mark, hoplimit, flowlabel, priority, */
149 memcpy(&saddr, &(*pskb)->nh.ipv6h->saddr, sizeof(saddr)); 147 memcpy(&saddr, &ipv6_hdr(*pskb)->saddr, sizeof(saddr));
150 memcpy(&daddr, &(*pskb)->nh.ipv6h->daddr, sizeof(daddr)); 148 memcpy(&daddr, &ipv6_hdr(*pskb)->daddr, sizeof(daddr));
151 mark = (*pskb)->mark; 149 mark = (*pskb)->mark;
152 hop_limit = (*pskb)->nh.ipv6h->hop_limit; 150 hop_limit = ipv6_hdr(*pskb)->hop_limit;
153 151
154 /* flowlabel and prio (includes version, which shouldn't change either */ 152 /* flowlabel and prio (includes version, which shouldn't change either */
155 flowlabel = *((u_int32_t *) (*pskb)->nh.ipv6h); 153 flowlabel = *((u_int32_t *)ipv6_hdr(*pskb));
156 154
157 ret = ip6t_do_table(pskb, hook, in, out, &packet_mangler); 155 ret = ip6t_do_table(pskb, hook, in, out, &packet_mangler);
158 156
159 if (ret != NF_DROP && ret != NF_STOLEN 157 if (ret != NF_DROP && ret != NF_STOLEN
160 && (memcmp(&(*pskb)->nh.ipv6h->saddr, &saddr, sizeof(saddr)) 158 && (memcmp(&ipv6_hdr(*pskb)->saddr, &saddr, sizeof(saddr))
161 || memcmp(&(*pskb)->nh.ipv6h->daddr, &daddr, sizeof(daddr)) 159 || memcmp(&ipv6_hdr(*pskb)->daddr, &daddr, sizeof(daddr))
162 || (*pskb)->mark != mark 160 || (*pskb)->mark != mark
163 || (*pskb)->nh.ipv6h->hop_limit != hop_limit)) 161 || ipv6_hdr(*pskb)->hop_limit != hop_limit))
164 return ip6_route_me_harder(*pskb) == 0 ? ret : NF_DROP; 162 return ip6_route_me_harder(*pskb) == 0 ? ret : NF_DROP;
165 163
166 return ret; 164 return ret;
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index d1102455668d..6d2a08205111 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -7,17 +7,6 @@
7 * 7 *
8 * Author: 8 * Author:
9 * Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> 9 * Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
10 *
11 * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
12 * - support Layer 3 protocol independent connection tracking.
13 * Based on the original ip_conntrack code which had the following
14 * copyright information:
15 * (C) 1999-2001 Paul `Rusty' Russell
16 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
17 *
18 * 23 Mar 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
19 * - add get_features() to support various size of conntrack
20 * structures.
21 */ 10 */
22 11
23#include <linux/types.h> 12#include <linux/types.h>
@@ -138,16 +127,10 @@ static int
138ipv6_prepare(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff, 127ipv6_prepare(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff,
139 u_int8_t *protonum) 128 u_int8_t *protonum)
140{ 129{
141 unsigned int extoff; 130 unsigned int extoff = (u8 *)(ipv6_hdr(*pskb) + 1) - (*pskb)->data;
142 unsigned char pnum; 131 unsigned char pnum = ipv6_hdr(*pskb)->nexthdr;
143 int protoff; 132 int protoff = nf_ct_ipv6_skip_exthdr(*pskb, extoff, &pnum,
144 133 (*pskb)->len - extoff);
145 extoff = (u8*)((*pskb)->nh.ipv6h + 1) - (*pskb)->data;
146 pnum = (*pskb)->nh.ipv6h->nexthdr;
147
148 protoff = nf_ct_ipv6_skip_exthdr(*pskb, extoff, &pnum,
149 (*pskb)->len - extoff);
150
151 /* 134 /*
152 * (protoff == (*pskb)->len) mean that the packet doesn't have no data 135 * (protoff == (*pskb)->len) mean that the packet doesn't have no data
153 * except of IPv6 & ext headers. but it's tracked anyway. - YK 136 * except of IPv6 & ext headers. but it's tracked anyway. - YK
@@ -179,9 +162,8 @@ static unsigned int ipv6_confirm(unsigned int hooknum,
179 struct nf_conn_help *help; 162 struct nf_conn_help *help;
180 enum ip_conntrack_info ctinfo; 163 enum ip_conntrack_info ctinfo;
181 unsigned int ret, protoff; 164 unsigned int ret, protoff;
182 unsigned int extoff = (u8*)((*pskb)->nh.ipv6h + 1) 165 unsigned int extoff = (u8 *)(ipv6_hdr(*pskb) + 1) - (*pskb)->data;
183 - (*pskb)->data; 166 unsigned char pnum = ipv6_hdr(*pskb)->nexthdr;
184 unsigned char pnum = (*pskb)->nh.ipv6h->nexthdr;
185 167
186 168
187 /* This is where we call the helper: as the packet goes out. */ 169 /* This is where we call the helper: as the packet goes out. */
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 075da4f287b8..0be790d250f9 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -7,13 +7,6 @@
7 * 7 *
8 * Author: 8 * Author:
9 * Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> 9 * Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
10 *
11 * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
12 * - ICMPv6 tracking support. Derived from the original ip_conntrack code
13 * net/ipv4/netfilter/ip_conntrack_proto_icmp.c which had the following
14 * copyright information:
15 * (C) 1999-2001 Paul `Rusty' Russell
16 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
17 */ 10 */
18 11
19#include <linux/types.h> 12#include <linux/types.h>
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 15ab1e3e8b56..347ab7608231 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -82,7 +82,7 @@ struct nf_ct_frag6_queue
82 struct sk_buff *fragments; 82 struct sk_buff *fragments;
83 int len; 83 int len;
84 int meat; 84 int meat;
85 struct timeval stamp; 85 ktime_t stamp;
86 unsigned int csum; 86 unsigned int csum;
87 __u8 last_in; /* has first/last segment arrived? */ 87 __u8 last_in; /* has first/last segment arrived? */
88#define COMPLETE 4 88#define COMPLETE 4
@@ -353,9 +353,7 @@ nf_ct_frag6_create(unsigned int hash, __be32 id, struct in6_addr *src, str
353 ipv6_addr_copy(&fq->saddr, src); 353 ipv6_addr_copy(&fq->saddr, src);
354 ipv6_addr_copy(&fq->daddr, dst); 354 ipv6_addr_copy(&fq->daddr, dst);
355 355
356 init_timer(&fq->timer); 356 setup_timer(&fq->timer, nf_ct_frag6_expire, (unsigned long)fq);
357 fq->timer.function = nf_ct_frag6_expire;
358 fq->timer.data = (long) fq;
359 spin_lock_init(&fq->lock); 357 spin_lock_init(&fq->lock);
360 atomic_set(&fq->refcnt, 1); 358 atomic_set(&fq->refcnt, 1);
361 359
@@ -400,19 +398,20 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
400 } 398 }
401 399
402 offset = ntohs(fhdr->frag_off) & ~0x7; 400 offset = ntohs(fhdr->frag_off) & ~0x7;
403 end = offset + (ntohs(skb->nh.ipv6h->payload_len) - 401 end = offset + (ntohs(ipv6_hdr(skb)->payload_len) -
404 ((u8 *) (fhdr + 1) - (u8 *) (skb->nh.ipv6h + 1))); 402 ((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1)));
405 403
406 if ((unsigned int)end > IPV6_MAXPLEN) { 404 if ((unsigned int)end > IPV6_MAXPLEN) {
407 DEBUGP("offset is too large.\n"); 405 DEBUGP("offset is too large.\n");
408 return -1; 406 return -1;
409 } 407 }
410 408
411 if (skb->ip_summed == CHECKSUM_COMPLETE) 409 if (skb->ip_summed == CHECKSUM_COMPLETE) {
410 const unsigned char *nh = skb_network_header(skb);
412 skb->csum = csum_sub(skb->csum, 411 skb->csum = csum_sub(skb->csum,
413 csum_partial(skb->nh.raw, 412 csum_partial(nh, (u8 *)(fhdr + 1) - nh,
414 (u8*)(fhdr + 1) - skb->nh.raw,
415 0)); 413 0));
414 }
416 415
417 /* Is this the final fragment? */ 416 /* Is this the final fragment? */
418 if (!(fhdr->frag_off & htons(IP6_MF))) { 417 if (!(fhdr->frag_off & htons(IP6_MF))) {
@@ -542,7 +541,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
542 fq->fragments = skb; 541 fq->fragments = skb;
543 542
544 skb->dev = NULL; 543 skb->dev = NULL;
545 skb_get_timestamp(skb, &fq->stamp); 544 fq->stamp = skb->tstamp;
546 fq->meat += skb->len; 545 fq->meat += skb->len;
547 atomic_add(skb->truesize, &nf_ct_frag6_mem); 546 atomic_add(skb->truesize, &nf_ct_frag6_mem);
548 547
@@ -583,7 +582,9 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
583 BUG_TRAP(NFCT_FRAG6_CB(head)->offset == 0); 582 BUG_TRAP(NFCT_FRAG6_CB(head)->offset == 0);
584 583
585 /* Unfragmented part is taken from the first segment. */ 584 /* Unfragmented part is taken from the first segment. */
586 payload_len = (head->data - head->nh.raw) - sizeof(struct ipv6hdr) + fq->len - sizeof(struct frag_hdr); 585 payload_len = ((head->data - skb_network_header(head)) -
586 sizeof(struct ipv6hdr) + fq->len -
587 sizeof(struct frag_hdr));
587 if (payload_len > IPV6_MAXPLEN) { 588 if (payload_len > IPV6_MAXPLEN) {
588 DEBUGP("payload len is too large.\n"); 589 DEBUGP("payload len is too large.\n");
589 goto out_oversize; 590 goto out_oversize;
@@ -624,15 +625,15 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
624 625
625 /* We have to remove fragment header from datagram and to relocate 626 /* We have to remove fragment header from datagram and to relocate
626 * header in order to calculate ICV correctly. */ 627 * header in order to calculate ICV correctly. */
627 head->nh.raw[fq->nhoffset] = head->h.raw[0]; 628 skb_network_header(head)[fq->nhoffset] = skb_transport_header(head)[0];
628 memmove(head->head + sizeof(struct frag_hdr), head->head, 629 memmove(head->head + sizeof(struct frag_hdr), head->head,
629 (head->data - head->head) - sizeof(struct frag_hdr)); 630 (head->data - head->head) - sizeof(struct frag_hdr));
630 head->mac.raw += sizeof(struct frag_hdr); 631 head->mac_header += sizeof(struct frag_hdr);
631 head->nh.raw += sizeof(struct frag_hdr); 632 head->network_header += sizeof(struct frag_hdr);
632 633
633 skb_shinfo(head)->frag_list = head->next; 634 skb_shinfo(head)->frag_list = head->next;
634 head->h.raw = head->data; 635 skb_reset_transport_header(head);
635 skb_push(head, head->data - head->nh.raw); 636 skb_push(head, head->data - skb_network_header(head));
636 atomic_sub(head->truesize, &nf_ct_frag6_mem); 637 atomic_sub(head->truesize, &nf_ct_frag6_mem);
637 638
638 for (fp=head->next; fp; fp = fp->next) { 639 for (fp=head->next; fp; fp = fp->next) {
@@ -648,12 +649,14 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
648 649
649 head->next = NULL; 650 head->next = NULL;
650 head->dev = dev; 651 head->dev = dev;
651 skb_set_timestamp(head, &fq->stamp); 652 head->tstamp = fq->stamp;
652 head->nh.ipv6h->payload_len = htons(payload_len); 653 ipv6_hdr(head)->payload_len = htons(payload_len);
653 654
654 /* Yes, and fold redundant checksum back. 8) */ 655 /* Yes, and fold redundant checksum back. 8) */
655 if (head->ip_summed == CHECKSUM_COMPLETE) 656 if (head->ip_summed == CHECKSUM_COMPLETE)
656 head->csum = csum_partial(head->nh.raw, head->h.raw-head->nh.raw, head->csum); 657 head->csum = csum_partial(skb_network_header(head),
658 skb_network_header_len(head),
659 head->csum);
657 660
658 fq->fragments = NULL; 661 fq->fragments = NULL;
659 662
@@ -701,9 +704,10 @@ out_fail:
701static int 704static int
702find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff) 705find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff)
703{ 706{
704 u8 nexthdr = skb->nh.ipv6h->nexthdr; 707 u8 nexthdr = ipv6_hdr(skb)->nexthdr;
705 u8 prev_nhoff = (u8 *)&skb->nh.ipv6h->nexthdr - skb->data; 708 const int netoff = skb_network_offset(skb);
706 int start = (u8 *)(skb->nh.ipv6h+1) - skb->data; 709 u8 prev_nhoff = netoff + offsetof(struct ipv6hdr, nexthdr);
710 int start = netoff + sizeof(struct ipv6hdr);
707 int len = skb->len - start; 711 int len = skb->len - start;
708 u8 prevhdr = NEXTHDR_IPV6; 712 u8 prevhdr = NEXTHDR_IPV6;
709 713
@@ -759,7 +763,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb)
759 struct sk_buff *ret_skb = NULL; 763 struct sk_buff *ret_skb = NULL;
760 764
761 /* Jumbo payload inhibits frag. header */ 765 /* Jumbo payload inhibits frag. header */
762 if (skb->nh.ipv6h->payload_len == 0) { 766 if (ipv6_hdr(skb)->payload_len == 0) {
763 DEBUGP("payload len = 0\n"); 767 DEBUGP("payload len = 0\n");
764 return skb; 768 return skb;
765 } 769 }
@@ -780,9 +784,9 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb)
780 goto ret_orig; 784 goto ret_orig;
781 } 785 }
782 786
783 clone->h.raw = clone->data + fhoff; 787 skb_set_transport_header(clone, fhoff);
784 hdr = clone->nh.ipv6h; 788 hdr = ipv6_hdr(clone);
785 fhdr = (struct frag_hdr *)clone->h.raw; 789 fhdr = (struct frag_hdr *)skb_transport_header(clone);
786 790
787 if (!(fhdr->frag_off & htons(0xFFF9))) { 791 if (!(fhdr->frag_off & htons(0xFFF9))) {
788 DEBUGP("Invalid fragment offset\n"); 792 DEBUGP("Invalid fragment offset\n");
@@ -864,8 +868,7 @@ int nf_ct_frag6_init(void)
864 nf_ct_frag6_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^ 868 nf_ct_frag6_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
865 (jiffies ^ (jiffies >> 6))); 869 (jiffies ^ (jiffies >> 6)));
866 870
867 init_timer(&nf_ct_frag6_secret_timer); 871 setup_timer(&nf_ct_frag6_secret_timer, nf_ct_frag6_secret_rebuild, 0);
868 nf_ct_frag6_secret_timer.function = nf_ct_frag6_secret_rebuild;
869 nf_ct_frag6_secret_timer.expires = jiffies 872 nf_ct_frag6_secret_timer.expires = jiffies
870 + nf_ct_frag6_secret_interval; 873 + nf_ct_frag6_secret_interval;
871 add_timer(&nf_ct_frag6_secret_timer); 874 add_timer(&nf_ct_frag6_secret_timer);
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index fa3fb509f187..acb306a5dd56 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -23,12 +23,12 @@
23#include <linux/proc_fs.h> 23#include <linux/proc_fs.h>
24#include <linux/seq_file.h> 24#include <linux/seq_file.h>
25#include <linux/stddef.h> 25#include <linux/stddef.h>
26#include <net/ip.h>
26#include <net/sock.h> 27#include <net/sock.h>
27#include <net/tcp.h> 28#include <net/tcp.h>
28#include <net/transp_v6.h> 29#include <net/transp_v6.h>
29#include <net/ipv6.h> 30#include <net/ipv6.h>
30 31
31#ifdef CONFIG_PROC_FS
32static struct proc_dir_entry *proc_net_devsnmp6; 32static struct proc_dir_entry *proc_net_devsnmp6;
33 33
34static int fold_prot_inuse(struct proto *proto) 34static int fold_prot_inuse(struct proto *proto)
@@ -142,26 +142,13 @@ static struct snmp_mib snmp6_udplite6_list[] = {
142 SNMP_MIB_SENTINEL 142 SNMP_MIB_SENTINEL
143}; 143};
144 144
145static unsigned long
146fold_field(void *mib[], int offt)
147{
148 unsigned long res = 0;
149 int i;
150
151 for_each_possible_cpu(i) {
152 res += *(((unsigned long *)per_cpu_ptr(mib[0], i)) + offt);
153 res += *(((unsigned long *)per_cpu_ptr(mib[1], i)) + offt);
154 }
155 return res;
156}
157
158static inline void 145static inline void
159snmp6_seq_show_item(struct seq_file *seq, void **mib, struct snmp_mib *itemlist) 146snmp6_seq_show_item(struct seq_file *seq, void **mib, struct snmp_mib *itemlist)
160{ 147{
161 int i; 148 int i;
162 for (i=0; itemlist[i].name; i++) 149 for (i=0; itemlist[i].name; i++)
163 seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name, 150 seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name,
164 fold_field(mib, itemlist[i].entry)); 151 snmp_fold_field(mib, itemlist[i].entry));
165} 152}
166 153
167static int snmp6_seq_show(struct seq_file *seq, void *v) 154static int snmp6_seq_show(struct seq_file *seq, void *v)
@@ -271,47 +258,3 @@ void ipv6_misc_proc_exit(void)
271 proc_net_remove("snmp6"); 258 proc_net_remove("snmp6");
272} 259}
273 260
274#else /* CONFIG_PROC_FS */
275
276
277int snmp6_register_dev(struct inet6_dev *idev)
278{
279 return 0;
280}
281
282int snmp6_unregister_dev(struct inet6_dev *idev)
283{
284 return 0;
285}
286#endif /* CONFIG_PROC_FS */
287
288int snmp6_alloc_dev(struct inet6_dev *idev)
289{
290 int err = -ENOMEM;
291
292 if (!idev || !idev->dev)
293 return -EINVAL;
294
295 if (snmp6_mib_init((void **)idev->stats.ipv6, sizeof(struct ipstats_mib),
296 __alignof__(struct ipstats_mib)) < 0)
297 goto err_ip;
298 if (snmp6_mib_init((void **)idev->stats.icmpv6, sizeof(struct icmpv6_mib),
299 __alignof__(struct icmpv6_mib)) < 0)
300 goto err_icmp;
301
302 return 0;
303
304err_icmp:
305 snmp6_mib_free((void **)idev->stats.ipv6);
306err_ip:
307 return err;
308}
309
310int snmp6_free_dev(struct inet6_dev *idev)
311{
312 snmp6_mib_free((void **)idev->stats.icmpv6);
313 snmp6_mib_free((void **)idev->stats.ipv6);
314 return 0;
315}
316
317
diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c
index ef43bd57baed..f929f47b925e 100644
--- a/net/ipv6/protocol.c
+++ b/net/ipv6/protocol.c
@@ -60,6 +60,8 @@ int inet6_add_protocol(struct inet6_protocol *prot, unsigned char protocol)
60 return ret; 60 return ret;
61} 61}
62 62
63EXPORT_SYMBOL(inet6_add_protocol);
64
63/* 65/*
64 * Remove a protocol from the hash tables. 66 * Remove a protocol from the hash tables.
65 */ 67 */
@@ -83,3 +85,5 @@ int inet6_del_protocol(struct inet6_protocol *prot, unsigned char protocol)
83 85
84 return ret; 86 return ret;
85} 87}
88
89EXPORT_SYMBOL(inet6_del_protocol);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 203e069e7fe9..009a1047fc3f 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -152,7 +152,7 @@ int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
152 int delivered = 0; 152 int delivered = 0;
153 __u8 hash; 153 __u8 hash;
154 154
155 saddr = &skb->nh.ipv6h->saddr; 155 saddr = &ipv6_hdr(skb)->saddr;
156 daddr = saddr + 1; 156 daddr = saddr + 1;
157 157
158 hash = nexthdr & (MAX_INET_PROTOS - 1); 158 hash = nexthdr & (MAX_INET_PROTOS - 1);
@@ -361,17 +361,18 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
361 skb->ip_summed = CHECKSUM_UNNECESSARY; 361 skb->ip_summed = CHECKSUM_UNNECESSARY;
362 362
363 if (skb->ip_summed == CHECKSUM_COMPLETE) { 363 if (skb->ip_summed == CHECKSUM_COMPLETE) {
364 skb_postpull_rcsum(skb, skb->nh.raw, 364 skb_postpull_rcsum(skb, skb_network_header(skb),
365 skb->h.raw - skb->nh.raw); 365 skb_network_header_len(skb));
366 if (!csum_ipv6_magic(&skb->nh.ipv6h->saddr, 366 if (!csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
367 &skb->nh.ipv6h->daddr, 367 &ipv6_hdr(skb)->daddr,
368 skb->len, inet->num, skb->csum)) 368 skb->len, inet->num, skb->csum))
369 skb->ip_summed = CHECKSUM_UNNECESSARY; 369 skb->ip_summed = CHECKSUM_UNNECESSARY;
370 } 370 }
371 if (skb->ip_summed != CHECKSUM_UNNECESSARY) 371 if (!skb_csum_unnecessary(skb))
372 skb->csum = ~csum_unfold(csum_ipv6_magic(&skb->nh.ipv6h->saddr, 372 skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
373 &skb->nh.ipv6h->daddr, 373 &ipv6_hdr(skb)->daddr,
374 skb->len, inet->num, 0)); 374 skb->len,
375 inet->num, 0));
375 376
376 if (inet->hdrincl) { 377 if (inet->hdrincl) {
377 if (skb_checksum_complete(skb)) { 378 if (skb_checksum_complete(skb)) {
@@ -420,7 +421,7 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
420 msg->msg_flags |= MSG_TRUNC; 421 msg->msg_flags |= MSG_TRUNC;
421 } 422 }
422 423
423 if (skb->ip_summed==CHECKSUM_UNNECESSARY) { 424 if (skb_csum_unnecessary(skb)) {
424 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); 425 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
425 } else if (msg->msg_flags&MSG_TRUNC) { 426 } else if (msg->msg_flags&MSG_TRUNC) {
426 if (__skb_checksum_complete(skb)) 427 if (__skb_checksum_complete(skb))
@@ -438,7 +439,7 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
438 if (sin6) { 439 if (sin6) {
439 sin6->sin6_family = AF_INET6; 440 sin6->sin6_family = AF_INET6;
440 sin6->sin6_port = 0; 441 sin6->sin6_port = 0;
441 ipv6_addr_copy(&sin6->sin6_addr, &skb->nh.ipv6h->saddr); 442 ipv6_addr_copy(&sin6->sin6_addr, &ipv6_hdr(skb)->saddr);
442 sin6->sin6_flowinfo = 0; 443 sin6->sin6_flowinfo = 0;
443 sin6->sin6_scope_id = 0; 444 sin6->sin6_scope_id = 0;
444 if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) 445 if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
@@ -488,7 +489,8 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl,
488 goto out; 489 goto out;
489 490
490 offset = rp->offset; 491 offset = rp->offset;
491 total_len = inet_sk(sk)->cork.length - (skb->nh.raw - skb->data); 492 total_len = inet_sk(sk)->cork.length - (skb_network_header(skb) -
493 skb->data);
492 if (offset >= total_len - 1) { 494 if (offset >= total_len - 1) {
493 err = -EINVAL; 495 err = -EINVAL;
494 ip6_flush_pending_frames(sk); 496 ip6_flush_pending_frames(sk);
@@ -511,7 +513,7 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl,
511 if (csum_skb) 513 if (csum_skb)
512 continue; 514 continue;
513 515
514 len = skb->len - (skb->h.raw - skb->data); 516 len = skb->len - skb_transport_offset(skb);
515 if (offset >= len) { 517 if (offset >= len) {
516 offset -= len; 518 offset -= len;
517 continue; 519 continue;
@@ -523,7 +525,7 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl,
523 skb = csum_skb; 525 skb = csum_skb;
524 } 526 }
525 527
526 offset += skb->h.raw - skb->data; 528 offset += skb_transport_offset(skb);
527 if (skb_copy_bits(skb, offset, &csum, 2)) 529 if (skb_copy_bits(skb, offset, &csum, 2))
528 BUG(); 530 BUG();
529 531
@@ -575,11 +577,13 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
575 skb->priority = sk->sk_priority; 577 skb->priority = sk->sk_priority;
576 skb->dst = dst_clone(&rt->u.dst); 578 skb->dst = dst_clone(&rt->u.dst);
577 579
578 skb->nh.ipv6h = iph = (struct ipv6hdr *)skb_put(skb, length); 580 skb_put(skb, length);
581 skb_reset_network_header(skb);
582 iph = ipv6_hdr(skb);
579 583
580 skb->ip_summed = CHECKSUM_NONE; 584 skb->ip_summed = CHECKSUM_NONE;
581 585
582 skb->h.raw = skb->nh.raw; 586 skb->transport_header = skb->network_header;
583 err = memcpy_fromiovecend((void *)iph, from, 0, length); 587 err = memcpy_fromiovecend((void *)iph, from, 0, length);
584 if (err) 588 if (err)
585 goto error_fault; 589 goto error_fault;
@@ -878,7 +882,7 @@ static int rawv6_seticmpfilter(struct sock *sk, int level, int optname,
878 return 0; 882 return 0;
879 default: 883 default:
880 return -ENOPROTOOPT; 884 return -ENOPROTOOPT;
881 }; 885 }
882 886
883 return 0; 887 return 0;
884} 888}
@@ -903,7 +907,7 @@ static int rawv6_geticmpfilter(struct sock *sk, int level, int optname,
903 return 0; 907 return 0;
904 default: 908 default:
905 return -ENOPROTOOPT; 909 return -ENOPROTOOPT;
906 }; 910 }
907 911
908 return 0; 912 return 0;
909} 913}
@@ -957,7 +961,8 @@ static int rawv6_setsockopt(struct sock *sk, int level, int optname,
957 default: 961 default:
958 return ipv6_setsockopt(sk, level, optname, optval, 962 return ipv6_setsockopt(sk, level, optname, optval,
959 optlen); 963 optlen);
960 }; 964 }
965
961 return do_rawv6_setsockopt(sk, level, optname, optval, optlen); 966 return do_rawv6_setsockopt(sk, level, optname, optval, optlen);
962} 967}
963 968
@@ -978,7 +983,7 @@ static int compat_rawv6_setsockopt(struct sock *sk, int level, int optname,
978 default: 983 default:
979 return compat_ipv6_setsockopt(sk, level, optname, 984 return compat_ipv6_setsockopt(sk, level, optname,
980 optval, optlen); 985 optval, optlen);
981 }; 986 }
982 return do_rawv6_setsockopt(sk, level, optname, optval, optlen); 987 return do_rawv6_setsockopt(sk, level, optname, optval, optlen);
983} 988}
984#endif 989#endif
@@ -1031,7 +1036,8 @@ static int rawv6_getsockopt(struct sock *sk, int level, int optname,
1031 default: 1036 default:
1032 return ipv6_getsockopt(sk, level, optname, optval, 1037 return ipv6_getsockopt(sk, level, optname, optval,
1033 optlen); 1038 optlen);
1034 }; 1039 }
1040
1035 return do_rawv6_getsockopt(sk, level, optname, optval, optlen); 1041 return do_rawv6_getsockopt(sk, level, optname, optval, optlen);
1036} 1042}
1037 1043
@@ -1052,7 +1058,7 @@ static int compat_rawv6_getsockopt(struct sock *sk, int level, int optname,
1052 default: 1058 default:
1053 return compat_ipv6_getsockopt(sk, level, optname, 1059 return compat_ipv6_getsockopt(sk, level, optname,
1054 optval, optlen); 1060 optval, optlen);
1055 }; 1061 }
1056 return do_rawv6_getsockopt(sk, level, optname, optval, optlen); 1062 return do_rawv6_getsockopt(sk, level, optname, optval, optlen);
1057} 1063}
1058#endif 1064#endif
@@ -1073,7 +1079,7 @@ static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg)
1073 spin_lock_bh(&sk->sk_receive_queue.lock); 1079 spin_lock_bh(&sk->sk_receive_queue.lock);
1074 skb = skb_peek(&sk->sk_receive_queue); 1080 skb = skb_peek(&sk->sk_receive_queue);
1075 if (skb != NULL) 1081 if (skb != NULL)
1076 amount = skb->tail - skb->h.raw; 1082 amount = skb->tail - skb->transport_header;
1077 spin_unlock_bh(&sk->sk_receive_queue.lock); 1083 spin_unlock_bh(&sk->sk_receive_queue.lock);
1078 return put_user(amount, (int __user *)arg); 1084 return put_user(amount, (int __user *)arg);
1079 } 1085 }
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 7034c54e5010..de795c04e34c 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -88,7 +88,7 @@ struct frag_queue
88 int len; 88 int len;
89 int meat; 89 int meat;
90 int iif; 90 int iif;
91 struct timeval stamp; 91 ktime_t stamp;
92 unsigned int csum; 92 unsigned int csum;
93 __u8 last_in; /* has first/last segment arrived? */ 93 __u8 last_in; /* has first/last segment arrived? */
94#define COMPLETE 4 94#define COMPLETE 4
@@ -430,19 +430,24 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
430 goto err; 430 goto err;
431 431
432 offset = ntohs(fhdr->frag_off) & ~0x7; 432 offset = ntohs(fhdr->frag_off) & ~0x7;
433 end = offset + (ntohs(skb->nh.ipv6h->payload_len) - 433 end = offset + (ntohs(ipv6_hdr(skb)->payload_len) -
434 ((u8 *) (fhdr + 1) - (u8 *) (skb->nh.ipv6h + 1))); 434 ((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1)));
435 435
436 if ((unsigned int)end > IPV6_MAXPLEN) { 436 if ((unsigned int)end > IPV6_MAXPLEN) {
437 IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), 437 IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
438 IPSTATS_MIB_INHDRERRORS); 438 IPSTATS_MIB_INHDRERRORS);
439 icmpv6_param_prob(skb,ICMPV6_HDR_FIELD, (u8*)&fhdr->frag_off - skb->nh.raw); 439 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
440 ((u8 *)&fhdr->frag_off -
441 skb_network_header(skb)));
440 return; 442 return;
441 } 443 }
442 444
443 if (skb->ip_summed == CHECKSUM_COMPLETE) 445 if (skb->ip_summed == CHECKSUM_COMPLETE) {
446 const unsigned char *nh = skb_network_header(skb);
444 skb->csum = csum_sub(skb->csum, 447 skb->csum = csum_sub(skb->csum,
445 csum_partial(skb->nh.raw, (u8*)(fhdr+1)-skb->nh.raw, 0)); 448 csum_partial(nh, (u8 *)(fhdr + 1) - nh,
449 0));
450 }
446 451
447 /* Is this the final fragment? */ 452 /* Is this the final fragment? */
448 if (!(fhdr->frag_off & htons(IP6_MF))) { 453 if (!(fhdr->frag_off & htons(IP6_MF))) {
@@ -562,7 +567,7 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
562 if (skb->dev) 567 if (skb->dev)
563 fq->iif = skb->dev->ifindex; 568 fq->iif = skb->dev->ifindex;
564 skb->dev = NULL; 569 skb->dev = NULL;
565 skb_get_timestamp(skb, &fq->stamp); 570 fq->stamp = skb->tstamp;
566 fq->meat += skb->len; 571 fq->meat += skb->len;
567 atomic_add(skb->truesize, &ip6_frag_mem); 572 atomic_add(skb->truesize, &ip6_frag_mem);
568 573
@@ -605,7 +610,9 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
605 BUG_TRAP(FRAG6_CB(head)->offset == 0); 610 BUG_TRAP(FRAG6_CB(head)->offset == 0);
606 611
607 /* Unfragmented part is taken from the first segment. */ 612 /* Unfragmented part is taken from the first segment. */
608 payload_len = (head->data - head->nh.raw) - sizeof(struct ipv6hdr) + fq->len - sizeof(struct frag_hdr); 613 payload_len = ((head->data - skb_network_header(head)) -
614 sizeof(struct ipv6hdr) + fq->len -
615 sizeof(struct frag_hdr));
609 if (payload_len > IPV6_MAXPLEN) 616 if (payload_len > IPV6_MAXPLEN)
610 goto out_oversize; 617 goto out_oversize;
611 618
@@ -639,15 +646,15 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
639 /* We have to remove fragment header from datagram and to relocate 646 /* We have to remove fragment header from datagram and to relocate
640 * header in order to calculate ICV correctly. */ 647 * header in order to calculate ICV correctly. */
641 nhoff = fq->nhoffset; 648 nhoff = fq->nhoffset;
642 head->nh.raw[nhoff] = head->h.raw[0]; 649 skb_network_header(head)[nhoff] = skb_transport_header(head)[0];
643 memmove(head->head + sizeof(struct frag_hdr), head->head, 650 memmove(head->head + sizeof(struct frag_hdr), head->head,
644 (head->data - head->head) - sizeof(struct frag_hdr)); 651 (head->data - head->head) - sizeof(struct frag_hdr));
645 head->mac.raw += sizeof(struct frag_hdr); 652 head->mac_header += sizeof(struct frag_hdr);
646 head->nh.raw += sizeof(struct frag_hdr); 653 head->network_header += sizeof(struct frag_hdr);
647 654
648 skb_shinfo(head)->frag_list = head->next; 655 skb_shinfo(head)->frag_list = head->next;
649 head->h.raw = head->data; 656 skb_reset_transport_header(head);
650 skb_push(head, head->data - head->nh.raw); 657 skb_push(head, head->data - skb_network_header(head));
651 atomic_sub(head->truesize, &ip6_frag_mem); 658 atomic_sub(head->truesize, &ip6_frag_mem);
652 659
653 for (fp=head->next; fp; fp = fp->next) { 660 for (fp=head->next; fp; fp = fp->next) {
@@ -663,15 +670,17 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
663 670
664 head->next = NULL; 671 head->next = NULL;
665 head->dev = dev; 672 head->dev = dev;
666 skb_set_timestamp(head, &fq->stamp); 673 head->tstamp = fq->stamp;
667 head->nh.ipv6h->payload_len = htons(payload_len); 674 ipv6_hdr(head)->payload_len = htons(payload_len);
668 IP6CB(head)->nhoff = nhoff; 675 IP6CB(head)->nhoff = nhoff;
669 676
670 *skb_in = head; 677 *skb_in = head;
671 678
672 /* Yes, and fold redundant checksum back. 8) */ 679 /* Yes, and fold redundant checksum back. 8) */
673 if (head->ip_summed == CHECKSUM_COMPLETE) 680 if (head->ip_summed == CHECKSUM_COMPLETE)
674 head->csum = csum_partial(head->nh.raw, head->h.raw-head->nh.raw, head->csum); 681 head->csum = csum_partial(skb_network_header(head),
682 skb_network_header_len(head),
683 head->csum);
675 684
676 rcu_read_lock(); 685 rcu_read_lock();
677 IP6_INC_STATS_BH(__in6_dev_get(dev), IPSTATS_MIB_REASMOKS); 686 IP6_INC_STATS_BH(__in6_dev_get(dev), IPSTATS_MIB_REASMOKS);
@@ -699,33 +708,34 @@ static int ipv6_frag_rcv(struct sk_buff **skbp)
699 struct net_device *dev = skb->dev; 708 struct net_device *dev = skb->dev;
700 struct frag_hdr *fhdr; 709 struct frag_hdr *fhdr;
701 struct frag_queue *fq; 710 struct frag_queue *fq;
702 struct ipv6hdr *hdr; 711 struct ipv6hdr *hdr = ipv6_hdr(skb);
703
704 hdr = skb->nh.ipv6h;
705 712
706 IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMREQDS); 713 IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMREQDS);
707 714
708 /* Jumbo payload inhibits frag. header */ 715 /* Jumbo payload inhibits frag. header */
709 if (hdr->payload_len==0) { 716 if (hdr->payload_len==0) {
710 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS); 717 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS);
711 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb->h.raw-skb->nh.raw); 718 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
719 skb_network_header_len(skb));
712 return -1; 720 return -1;
713 } 721 }
714 if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+sizeof(struct frag_hdr))) { 722 if (!pskb_may_pull(skb, (skb_transport_offset(skb) +
723 sizeof(struct frag_hdr)))) {
715 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS); 724 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS);
716 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb->h.raw-skb->nh.raw); 725 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
726 skb_network_header_len(skb));
717 return -1; 727 return -1;
718 } 728 }
719 729
720 hdr = skb->nh.ipv6h; 730 hdr = ipv6_hdr(skb);
721 fhdr = (struct frag_hdr *)skb->h.raw; 731 fhdr = (struct frag_hdr *)skb_transport_header(skb);
722 732
723 if (!(fhdr->frag_off & htons(0xFFF9))) { 733 if (!(fhdr->frag_off & htons(0xFFF9))) {
724 /* It is not a fragmented frame */ 734 /* It is not a fragmented frame */
725 skb->h.raw += sizeof(struct frag_hdr); 735 skb->transport_header += sizeof(struct frag_hdr);
726 IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMOKS); 736 IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMOKS);
727 737
728 IP6CB(skb)->nhoff = (u8*)fhdr - skb->nh.raw; 738 IP6CB(skb)->nhoff = (u8 *)fhdr - skb_network_header(skb);
729 return 1; 739 return 1;
730 } 740 }
731 741
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index aebb4e2d5ae3..b46ad53044ba 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -575,6 +575,8 @@ struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
575 return NULL; 575 return NULL;
576} 576}
577 577
578EXPORT_SYMBOL(rt6_lookup);
579
578/* ip6_ins_rt is called with FREE table->tb6_lock. 580/* ip6_ins_rt is called with FREE table->tb6_lock.
579 It takes new route entry, the addition fails by any reason the 581 It takes new route entry, the addition fails by any reason the
580 route is freed. In any case, if caller does not hold it, it may 582 route is freed. In any case, if caller does not hold it, it may
@@ -724,7 +726,7 @@ out2:
724 726
725void ip6_route_input(struct sk_buff *skb) 727void ip6_route_input(struct sk_buff *skb)
726{ 728{
727 struct ipv6hdr *iph = skb->nh.ipv6h; 729 struct ipv6hdr *iph = ipv6_hdr(skb);
728 int flags = RT6_LOOKUP_F_HAS_SADDR; 730 int flags = RT6_LOOKUP_F_HAS_SADDR;
729 struct flowi fl = { 731 struct flowi fl = {
730 .iif = skb->dev->ifindex, 732 .iif = skb->dev->ifindex,
@@ -829,6 +831,7 @@ struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
829 return fib6_rule_lookup(fl, flags, ip6_pol_route_output); 831 return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
830} 832}
831 833
834EXPORT_SYMBOL(ip6_route_output);
832 835
833/* 836/*
834 * Destination cache support functions 837 * Destination cache support functions
@@ -1757,7 +1760,7 @@ int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1757 rtnl_unlock(); 1760 rtnl_unlock();
1758 1761
1759 return err; 1762 return err;
1760 }; 1763 }
1761 1764
1762 return -EINVAL; 1765 return -EINVAL;
1763} 1766}
@@ -1772,7 +1775,7 @@ static inline int ip6_pkt_drop(struct sk_buff *skb, int code,
1772 int type; 1775 int type;
1773 switch (ipstats_mib_noroutes) { 1776 switch (ipstats_mib_noroutes) {
1774 case IPSTATS_MIB_INNOROUTES: 1777 case IPSTATS_MIB_INNOROUTES:
1775 type = ipv6_addr_type(&skb->nh.ipv6h->daddr); 1778 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1776 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) { 1779 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
1777 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS); 1780 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
1778 break; 1781 break;
@@ -2012,7 +2015,7 @@ errout:
2012 return err; 2015 return err;
2013} 2016}
2014 2017
2015int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 2018static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2016{ 2019{
2017 struct fib6_config cfg; 2020 struct fib6_config cfg;
2018 int err; 2021 int err;
@@ -2024,7 +2027,7 @@ int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2024 return ip6_route_del(&cfg); 2027 return ip6_route_del(&cfg);
2025} 2028}
2026 2029
2027int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 2030static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2028{ 2031{
2029 struct fib6_config cfg; 2032 struct fib6_config cfg;
2030 int err; 2033 int err;
@@ -2161,7 +2164,7 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2161 prefix, NLM_F_MULTI); 2164 prefix, NLM_F_MULTI);
2162} 2165}
2163 2166
2164int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) 2167static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2165{ 2168{
2166 struct nlattr *tb[RTA_MAX+1]; 2169 struct nlattr *tb[RTA_MAX+1];
2167 struct rt6_info *rt; 2170 struct rt6_info *rt;
@@ -2215,7 +2218,7 @@ int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2215 /* Reserve room for dummy headers, this skb can pass 2218 /* Reserve room for dummy headers, this skb can pass
2216 through good chunk of routing engine. 2219 through good chunk of routing engine.
2217 */ 2220 */
2218 skb->mac.raw = skb->data; 2221 skb_reset_mac_header(skb);
2219 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr)); 2222 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2220 2223
2221 rt = (struct rt6_info*) ip6_route_output(NULL, &fl); 2224 rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
@@ -2486,8 +2489,9 @@ ctl_table ipv6_route_table[] = {
2486 2489
2487void __init ip6_route_init(void) 2490void __init ip6_route_init(void)
2488{ 2491{
2492#ifdef CONFIG_PROC_FS
2489 struct proc_dir_entry *p; 2493 struct proc_dir_entry *p;
2490 2494#endif
2491 ip6_dst_ops.kmem_cachep = 2495 ip6_dst_ops.kmem_cachep =
2492 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0, 2496 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2493 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); 2497 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
@@ -2505,6 +2509,10 @@ void __init ip6_route_init(void)
2505#ifdef CONFIG_IPV6_MULTIPLE_TABLES 2509#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2506 fib6_rules_init(); 2510 fib6_rules_init();
2507#endif 2511#endif
2512
2513 __rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL);
2514 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL);
2515 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL);
2508} 2516}
2509 2517
2510void ip6_route_cleanup(void) 2518void ip6_route_cleanup(void)
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 08d6ed3396e4..1efa95a99f45 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -99,10 +99,10 @@ static struct ip_tunnel * ipip6_tunnel_lookup(__be32 remote, __be32 local)
99 return NULL; 99 return NULL;
100} 100}
101 101
102static struct ip_tunnel ** ipip6_bucket(struct ip_tunnel *t) 102static struct ip_tunnel **__ipip6_bucket(struct ip_tunnel_parm *parms)
103{ 103{
104 __be32 remote = t->parms.iph.daddr; 104 __be32 remote = parms->iph.daddr;
105 __be32 local = t->parms.iph.saddr; 105 __be32 local = parms->iph.saddr;
106 unsigned h = 0; 106 unsigned h = 0;
107 int prio = 0; 107 int prio = 0;
108 108
@@ -117,6 +117,11 @@ static struct ip_tunnel ** ipip6_bucket(struct ip_tunnel *t)
117 return &tunnels[prio][h]; 117 return &tunnels[prio][h];
118} 118}
119 119
120static inline struct ip_tunnel **ipip6_bucket(struct ip_tunnel *t)
121{
122 return __ipip6_bucket(&t->parms);
123}
124
120static void ipip6_tunnel_unlink(struct ip_tunnel *t) 125static void ipip6_tunnel_unlink(struct ip_tunnel *t)
121{ 126{
122 struct ip_tunnel **tp; 127 struct ip_tunnel **tp;
@@ -147,19 +152,9 @@ static struct ip_tunnel * ipip6_tunnel_locate(struct ip_tunnel_parm *parms, int
147 __be32 local = parms->iph.saddr; 152 __be32 local = parms->iph.saddr;
148 struct ip_tunnel *t, **tp, *nt; 153 struct ip_tunnel *t, **tp, *nt;
149 struct net_device *dev; 154 struct net_device *dev;
150 unsigned h = 0;
151 int prio = 0;
152 char name[IFNAMSIZ]; 155 char name[IFNAMSIZ];
153 156
154 if (remote) { 157 for (tp = __ipip6_bucket(parms); (t = *tp) != NULL; tp = &t->next) {
155 prio |= 2;
156 h ^= HASH(remote);
157 }
158 if (local) {
159 prio |= 1;
160 h ^= HASH(local);
161 }
162 for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
163 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) 158 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
164 return t; 159 return t;
165 } 160 }
@@ -224,8 +219,8 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
224 ICMP in the real Internet is absolutely infeasible. 219 ICMP in the real Internet is absolutely infeasible.
225 */ 220 */
226 struct iphdr *iph = (struct iphdr*)skb->data; 221 struct iphdr *iph = (struct iphdr*)skb->data;
227 int type = skb->h.icmph->type; 222 const int type = icmp_hdr(skb)->type;
228 int code = skb->h.icmph->code; 223 const int code = icmp_hdr(skb)->code;
229 struct ip_tunnel *t; 224 struct ip_tunnel *t;
230 int err; 225 int err;
231 226
@@ -280,8 +275,8 @@ out:
280 struct iphdr *iph = (struct iphdr*)dp; 275 struct iphdr *iph = (struct iphdr*)dp;
281 int hlen = iph->ihl<<2; 276 int hlen = iph->ihl<<2;
282 struct ipv6hdr *iph6; 277 struct ipv6hdr *iph6;
283 int type = skb->h.icmph->type; 278 const int type = icmp_hdr(skb)->type;
284 int code = skb->h.icmph->code; 279 const int code = icmp_hdr(skb)->code;
285 int rel_type = 0; 280 int rel_type = 0;
286 int rel_code = 0; 281 int rel_code = 0;
287 int rel_info = 0; 282 int rel_info = 0;
@@ -296,14 +291,14 @@ out:
296 default: 291 default:
297 return; 292 return;
298 case ICMP_PARAMETERPROB: 293 case ICMP_PARAMETERPROB:
299 if (skb->h.icmph->un.gateway < hlen) 294 if (icmp_hdr(skb)->un.gateway < hlen)
300 return; 295 return;
301 296
302 /* So... This guy found something strange INSIDE encapsulated 297 /* So... This guy found something strange INSIDE encapsulated
303 packet. Well, he is fool, but what can we do ? 298 packet. Well, he is fool, but what can we do ?
304 */ 299 */
305 rel_type = ICMPV6_PARAMPROB; 300 rel_type = ICMPV6_PARAMPROB;
306 rel_info = skb->h.icmph->un.gateway - hlen; 301 rel_info = icmp_hdr(skb)->un.gateway - hlen;
307 break; 302 break;
308 303
309 case ICMP_DEST_UNREACH: 304 case ICMP_DEST_UNREACH:
@@ -340,7 +335,7 @@ out:
340 dst_release(skb2->dst); 335 dst_release(skb2->dst);
341 skb2->dst = NULL; 336 skb2->dst = NULL;
342 skb_pull(skb2, skb->data - (u8*)iph6); 337 skb_pull(skb2, skb->data - (u8*)iph6);
343 skb2->nh.raw = skb2->data; 338 skb_reset_network_header(skb2);
344 339
345 /* Try to guess incoming interface */ 340 /* Try to guess incoming interface */
346 rt6i = rt6_lookup(&iph6->saddr, NULL, NULL, 0); 341 rt6i = rt6_lookup(&iph6->saddr, NULL, NULL, 0);
@@ -366,7 +361,7 @@ out:
366static inline void ipip6_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb) 361static inline void ipip6_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
367{ 362{
368 if (INET_ECN_is_ce(iph->tos)) 363 if (INET_ECN_is_ce(iph->tos))
369 IP6_ECN_set_ce(skb->nh.ipv6h); 364 IP6_ECN_set_ce(ipv6_hdr(skb));
370} 365}
371 366
372static int ipip6_rcv(struct sk_buff *skb) 367static int ipip6_rcv(struct sk_buff *skb)
@@ -377,13 +372,13 @@ static int ipip6_rcv(struct sk_buff *skb)
377 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) 372 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
378 goto out; 373 goto out;
379 374
380 iph = skb->nh.iph; 375 iph = ip_hdr(skb);
381 376
382 read_lock(&ipip6_lock); 377 read_lock(&ipip6_lock);
383 if ((tunnel = ipip6_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) { 378 if ((tunnel = ipip6_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
384 secpath_reset(skb); 379 secpath_reset(skb);
385 skb->mac.raw = skb->nh.raw; 380 skb->mac_header = skb->network_header;
386 skb->nh.raw = skb->data; 381 skb_reset_network_header(skb);
387 IPCB(skb)->flags = 0; 382 IPCB(skb)->flags = 0;
388 skb->protocol = htons(ETH_P_IPV6); 383 skb->protocol = htons(ETH_P_IPV6);
389 skb->pkt_type = PACKET_HOST; 384 skb->pkt_type = PACKET_HOST;
@@ -430,7 +425,7 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
430 struct ip_tunnel *tunnel = netdev_priv(dev); 425 struct ip_tunnel *tunnel = netdev_priv(dev);
431 struct net_device_stats *stats = &tunnel->stat; 426 struct net_device_stats *stats = &tunnel->stat;
432 struct iphdr *tiph = &tunnel->parms.iph; 427 struct iphdr *tiph = &tunnel->parms.iph;
433 struct ipv6hdr *iph6 = skb->nh.ipv6h; 428 struct ipv6hdr *iph6 = ipv6_hdr(skb);
434 u8 tos = tunnel->parms.iph.tos; 429 u8 tos = tunnel->parms.iph.tos;
435 struct rtable *rt; /* Route to the other host */ 430 struct rtable *rt; /* Route to the other host */
436 struct net_device *tdev; /* Device to other host */ 431 struct net_device *tdev; /* Device to other host */
@@ -468,7 +463,7 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
468 addr_type = ipv6_addr_type(addr6); 463 addr_type = ipv6_addr_type(addr6);
469 464
470 if (addr_type == IPV6_ADDR_ANY) { 465 if (addr_type == IPV6_ADDR_ANY) {
471 addr6 = &skb->nh.ipv6h->daddr; 466 addr6 = &ipv6_hdr(skb)->daddr;
472 addr_type = ipv6_addr_type(addr6); 467 addr_type = ipv6_addr_type(addr6);
473 } 468 }
474 469
@@ -550,11 +545,12 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
550 skb_set_owner_w(new_skb, skb->sk); 545 skb_set_owner_w(new_skb, skb->sk);
551 dev_kfree_skb(skb); 546 dev_kfree_skb(skb);
552 skb = new_skb; 547 skb = new_skb;
553 iph6 = skb->nh.ipv6h; 548 iph6 = ipv6_hdr(skb);
554 } 549 }
555 550
556 skb->h.raw = skb->nh.raw; 551 skb->transport_header = skb->network_header;
557 skb->nh.raw = skb_push(skb, sizeof(struct iphdr)); 552 skb_push(skb, sizeof(struct iphdr));
553 skb_reset_network_header(skb);
558 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 554 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
559 IPCB(skb)->flags = 0; 555 IPCB(skb)->flags = 0;
560 dst_release(skb->dst); 556 dst_release(skb->dst);
@@ -564,7 +560,7 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
564 * Push down and install the IPIP header. 560 * Push down and install the IPIP header.
565 */ 561 */
566 562
567 iph = skb->nh.iph; 563 iph = ip_hdr(skb);
568 iph->version = 4; 564 iph->version = 4;
569 iph->ihl = sizeof(struct iphdr)>>2; 565 iph->ihl = sizeof(struct iphdr)>>2;
570 if (mtu > IPV6_MIN_MTU) 566 if (mtu > IPV6_MIN_MTU)
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 92f99927d12d..e2f25ea43b68 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -115,10 +115,10 @@ static __inline__ __sum16 tcp_v6_check(struct tcphdr *th, int len,
115 115
116static __u32 tcp_v6_init_sequence(struct sk_buff *skb) 116static __u32 tcp_v6_init_sequence(struct sk_buff *skb)
117{ 117{
118 return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32, 118 return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
119 skb->nh.ipv6h->saddr.s6_addr32, 119 ipv6_hdr(skb)->saddr.s6_addr32,
120 skb->h.th->dest, 120 tcp_hdr(skb)->dest,
121 skb->h.th->source); 121 tcp_hdr(skb)->source);
122} 122}
123 123
124static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, 124static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
@@ -486,7 +486,9 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
486 struct sk_buff *pktopts = treq->pktopts; 486 struct sk_buff *pktopts = treq->pktopts;
487 struct inet6_skb_parm *rxopt = IP6CB(pktopts); 487 struct inet6_skb_parm *rxopt = IP6CB(pktopts);
488 if (rxopt->srcrt) 488 if (rxopt->srcrt)
489 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt)); 489 opt = ipv6_invert_rthdr(sk,
490 (struct ipv6_rt_hdr *)(skb_network_header(pktopts) +
491 rxopt->srcrt));
490 } 492 }
491 493
492 if (opt && opt->srcrt) { 494 if (opt && opt->srcrt) {
@@ -507,7 +509,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
507 509
508 skb = tcp_make_synack(sk, dst, req); 510 skb = tcp_make_synack(sk, dst, req);
509 if (skb) { 511 if (skb) {
510 struct tcphdr *th = skb->h.th; 512 struct tcphdr *th = tcp_hdr(skb);
511 513
512 th->check = tcp_v6_check(th, skb->len, 514 th->check = tcp_v6_check(th, skb->len,
513 &treq->loc_addr, &treq->rmt_addr, 515 &treq->loc_addr, &treq->rmt_addr,
@@ -835,8 +837,8 @@ static int tcp_v6_inbound_md5_hash (struct sock *sk, struct sk_buff *skb)
835{ 837{
836 __u8 *hash_location = NULL; 838 __u8 *hash_location = NULL;
837 struct tcp_md5sig_key *hash_expected; 839 struct tcp_md5sig_key *hash_expected;
838 struct ipv6hdr *ip6h = skb->nh.ipv6h; 840 struct ipv6hdr *ip6h = ipv6_hdr(skb);
839 struct tcphdr *th = skb->h.th; 841 struct tcphdr *th = tcp_hdr(skb);
840 int length = (th->doff << 2) - sizeof (*th); 842 int length = (th->doff << 2) - sizeof (*th);
841 int genhash; 843 int genhash;
842 u8 *ptr; 844 u8 *ptr;
@@ -944,10 +946,11 @@ static struct timewait_sock_ops tcp6_timewait_sock_ops = {
944static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb) 946static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb)
945{ 947{
946 struct ipv6_pinfo *np = inet6_sk(sk); 948 struct ipv6_pinfo *np = inet6_sk(sk);
947 struct tcphdr *th = skb->h.th; 949 struct tcphdr *th = tcp_hdr(skb);
948 950
949 if (skb->ip_summed == CHECKSUM_PARTIAL) { 951 if (skb->ip_summed == CHECKSUM_PARTIAL) {
950 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0); 952 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0);
953 skb->csum_start = skb_transport_header(skb) - skb->head;
951 skb->csum_offset = offsetof(struct tcphdr, check); 954 skb->csum_offset = offsetof(struct tcphdr, check);
952 } else { 955 } else {
953 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 956 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
@@ -964,12 +967,13 @@ static int tcp_v6_gso_send_check(struct sk_buff *skb)
964 if (!pskb_may_pull(skb, sizeof(*th))) 967 if (!pskb_may_pull(skb, sizeof(*th)))
965 return -EINVAL; 968 return -EINVAL;
966 969
967 ipv6h = skb->nh.ipv6h; 970 ipv6h = ipv6_hdr(skb);
968 th = skb->h.th; 971 th = tcp_hdr(skb);
969 972
970 th->check = 0; 973 th->check = 0;
971 th->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len, 974 th->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len,
972 IPPROTO_TCP, 0); 975 IPPROTO_TCP, 0);
976 skb->csum_start = skb_transport_header(skb) - skb->head;
973 skb->csum_offset = offsetof(struct tcphdr, check); 977 skb->csum_offset = offsetof(struct tcphdr, check);
974 skb->ip_summed = CHECKSUM_PARTIAL; 978 skb->ip_summed = CHECKSUM_PARTIAL;
975 return 0; 979 return 0;
@@ -977,7 +981,7 @@ static int tcp_v6_gso_send_check(struct sk_buff *skb)
977 981
978static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) 982static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
979{ 983{
980 struct tcphdr *th = skb->h.th, *t1; 984 struct tcphdr *th = tcp_hdr(skb), *t1;
981 struct sk_buff *buff; 985 struct sk_buff *buff;
982 struct flowi fl; 986 struct flowi fl;
983 int tot_len = sizeof(*th); 987 int tot_len = sizeof(*th);
@@ -993,7 +997,7 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
993 997
994#ifdef CONFIG_TCP_MD5SIG 998#ifdef CONFIG_TCP_MD5SIG
995 if (sk) 999 if (sk)
996 key = tcp_v6_md5_do_lookup(sk, &skb->nh.ipv6h->daddr); 1000 key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr);
997 else 1001 else
998 key = NULL; 1002 key = NULL;
999 1003
@@ -1037,20 +1041,18 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
1037 (TCPOPT_NOP << 16) | 1041 (TCPOPT_NOP << 16) |
1038 (TCPOPT_MD5SIG << 8) | 1042 (TCPOPT_MD5SIG << 8) |
1039 TCPOLEN_MD5SIG); 1043 TCPOLEN_MD5SIG);
1040 tcp_v6_do_calc_md5_hash((__u8*)&opt[1], 1044 tcp_v6_do_calc_md5_hash((__u8 *)&opt[1], key,
1041 key, 1045 &ipv6_hdr(skb)->daddr,
1042 &skb->nh.ipv6h->daddr, 1046 &ipv6_hdr(skb)->saddr,
1043 &skb->nh.ipv6h->saddr, 1047 t1, IPPROTO_TCP, tot_len);
1044 t1, IPPROTO_TCP,
1045 tot_len);
1046 } 1048 }
1047#endif 1049#endif
1048 1050
1049 buff->csum = csum_partial((char *)t1, sizeof(*t1), 0); 1051 buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
1050 1052
1051 memset(&fl, 0, sizeof(fl)); 1053 memset(&fl, 0, sizeof(fl));
1052 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr); 1054 ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr);
1053 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr); 1055 ipv6_addr_copy(&fl.fl6_src, &ipv6_hdr(skb)->daddr);
1054 1056
1055 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst, 1057 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1056 sizeof(*t1), IPPROTO_TCP, 1058 sizeof(*t1), IPPROTO_TCP,
@@ -1079,7 +1081,7 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
1079static void tcp_v6_send_ack(struct tcp_timewait_sock *tw, 1081static void tcp_v6_send_ack(struct tcp_timewait_sock *tw,
1080 struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts) 1082 struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
1081{ 1083{
1082 struct tcphdr *th = skb->h.th, *t1; 1084 struct tcphdr *th = tcp_hdr(skb), *t1;
1083 struct sk_buff *buff; 1085 struct sk_buff *buff;
1084 struct flowi fl; 1086 struct flowi fl;
1085 int tot_len = sizeof(struct tcphdr); 1087 int tot_len = sizeof(struct tcphdr);
@@ -1091,7 +1093,7 @@ static void tcp_v6_send_ack(struct tcp_timewait_sock *tw,
1091 1093
1092#ifdef CONFIG_TCP_MD5SIG 1094#ifdef CONFIG_TCP_MD5SIG
1093 if (!tw && skb->sk) { 1095 if (!tw && skb->sk) {
1094 key = tcp_v6_md5_do_lookup(skb->sk, &skb->nh.ipv6h->daddr); 1096 key = tcp_v6_md5_do_lookup(skb->sk, &ipv6_hdr(skb)->daddr);
1095 } else if (tw && tw->tw_md5_keylen) { 1097 } else if (tw && tw->tw_md5_keylen) {
1096 tw_key.key = tw->tw_md5_key; 1098 tw_key.key = tw->tw_md5_key;
1097 tw_key.keylen = tw->tw_md5_keylen; 1099 tw_key.keylen = tw->tw_md5_keylen;
@@ -1140,20 +1142,18 @@ static void tcp_v6_send_ack(struct tcp_timewait_sock *tw,
1140 if (key) { 1142 if (key) {
1141 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 1143 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1142 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG); 1144 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
1143 tcp_v6_do_calc_md5_hash((__u8 *)topt, 1145 tcp_v6_do_calc_md5_hash((__u8 *)topt, key,
1144 key, 1146 &ipv6_hdr(skb)->daddr,
1145 &skb->nh.ipv6h->daddr, 1147 &ipv6_hdr(skb)->saddr,
1146 &skb->nh.ipv6h->saddr, 1148 t1, IPPROTO_TCP, tot_len);
1147 t1, IPPROTO_TCP,
1148 tot_len);
1149 } 1149 }
1150#endif 1150#endif
1151 1151
1152 buff->csum = csum_partial((char *)t1, tot_len, 0); 1152 buff->csum = csum_partial((char *)t1, tot_len, 0);
1153 1153
1154 memset(&fl, 0, sizeof(fl)); 1154 memset(&fl, 0, sizeof(fl));
1155 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr); 1155 ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr);
1156 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr); 1156 ipv6_addr_copy(&fl.fl6_src, &ipv6_hdr(skb)->daddr);
1157 1157
1158 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst, 1158 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1159 tot_len, IPPROTO_TCP, 1159 tot_len, IPPROTO_TCP,
@@ -1197,18 +1197,18 @@ static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
1197static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) 1197static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1198{ 1198{
1199 struct request_sock *req, **prev; 1199 struct request_sock *req, **prev;
1200 const struct tcphdr *th = skb->h.th; 1200 const struct tcphdr *th = tcp_hdr(skb);
1201 struct sock *nsk; 1201 struct sock *nsk;
1202 1202
1203 /* Find possible connection requests. */ 1203 /* Find possible connection requests. */
1204 req = inet6_csk_search_req(sk, &prev, th->source, 1204 req = inet6_csk_search_req(sk, &prev, th->source,
1205 &skb->nh.ipv6h->saddr, 1205 &ipv6_hdr(skb)->saddr,
1206 &skb->nh.ipv6h->daddr, inet6_iif(skb)); 1206 &ipv6_hdr(skb)->daddr, inet6_iif(skb));
1207 if (req) 1207 if (req)
1208 return tcp_check_req(sk, skb, req, prev); 1208 return tcp_check_req(sk, skb, req, prev);
1209 1209
1210 nsk = __inet6_lookup_established(&tcp_hashinfo, &skb->nh.ipv6h->saddr, 1210 nsk = __inet6_lookup_established(&tcp_hashinfo, &ipv6_hdr(skb)->saddr,
1211 th->source, &skb->nh.ipv6h->daddr, 1211 th->source, &ipv6_hdr(skb)->daddr,
1212 ntohs(th->dest), inet6_iif(skb)); 1212 ntohs(th->dest), inet6_iif(skb));
1213 1213
1214 if (nsk) { 1214 if (nsk) {
@@ -1275,9 +1275,9 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1275 tcp_openreq_init(req, &tmp_opt, skb); 1275 tcp_openreq_init(req, &tmp_opt, skb);
1276 1276
1277 treq = inet6_rsk(req); 1277 treq = inet6_rsk(req);
1278 ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr); 1278 ipv6_addr_copy(&treq->rmt_addr, &ipv6_hdr(skb)->saddr);
1279 ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr); 1279 ipv6_addr_copy(&treq->loc_addr, &ipv6_hdr(skb)->daddr);
1280 TCP_ECN_create_request(req, skb->h.th); 1280 TCP_ECN_create_request(req, tcp_hdr(skb));
1281 treq->pktopts = NULL; 1281 treq->pktopts = NULL;
1282 if (ipv6_opt_accepted(sk, skb) || 1282 if (ipv6_opt_accepted(sk, skb) ||
1283 np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || 1283 np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
@@ -1363,7 +1363,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1363 newnp->pktoptions = NULL; 1363 newnp->pktoptions = NULL;
1364 newnp->opt = NULL; 1364 newnp->opt = NULL;
1365 newnp->mcast_oif = inet6_iif(skb); 1365 newnp->mcast_oif = inet6_iif(skb);
1366 newnp->mcast_hops = skb->nh.ipv6h->hop_limit; 1366 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1367 1367
1368 /* 1368 /*
1369 * No need to charge this sock to the relevant IPv6 refcnt debug socks count 1369 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
@@ -1389,7 +1389,9 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1389 opt == NULL && treq->pktopts) { 1389 opt == NULL && treq->pktopts) {
1390 struct inet6_skb_parm *rxopt = IP6CB(treq->pktopts); 1390 struct inet6_skb_parm *rxopt = IP6CB(treq->pktopts);
1391 if (rxopt->srcrt) 1391 if (rxopt->srcrt)
1392 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr *)(treq->pktopts->nh.raw + rxopt->srcrt)); 1392 opt = ipv6_invert_rthdr(sk,
1393 (struct ipv6_rt_hdr *)(skb_network_header(treq->pktopts) +
1394 rxopt->srcrt));
1393 } 1395 }
1394 1396
1395 if (dst == NULL) { 1397 if (dst == NULL) {
@@ -1469,7 +1471,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1469 } 1471 }
1470 newnp->opt = NULL; 1472 newnp->opt = NULL;
1471 newnp->mcast_oif = inet6_iif(skb); 1473 newnp->mcast_oif = inet6_iif(skb);
1472 newnp->mcast_hops = skb->nh.ipv6h->hop_limit; 1474 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1473 1475
1474 /* Clone native IPv6 options from listening socket (if any) 1476 /* Clone native IPv6 options from listening socket (if any)
1475 1477
@@ -1528,15 +1530,16 @@ out:
1528static __sum16 tcp_v6_checksum_init(struct sk_buff *skb) 1530static __sum16 tcp_v6_checksum_init(struct sk_buff *skb)
1529{ 1531{
1530 if (skb->ip_summed == CHECKSUM_COMPLETE) { 1532 if (skb->ip_summed == CHECKSUM_COMPLETE) {
1531 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr, 1533 if (!tcp_v6_check(tcp_hdr(skb), skb->len, &ipv6_hdr(skb)->saddr,
1532 &skb->nh.ipv6h->daddr,skb->csum)) { 1534 &ipv6_hdr(skb)->daddr, skb->csum)) {
1533 skb->ip_summed = CHECKSUM_UNNECESSARY; 1535 skb->ip_summed = CHECKSUM_UNNECESSARY;
1534 return 0; 1536 return 0;
1535 } 1537 }
1536 } 1538 }
1537 1539
1538 skb->csum = ~csum_unfold(tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr, 1540 skb->csum = ~csum_unfold(tcp_v6_check(tcp_hdr(skb), skb->len,
1539 &skb->nh.ipv6h->daddr, 0)); 1541 &ipv6_hdr(skb)->saddr,
1542 &ipv6_hdr(skb)->daddr, 0));
1540 1543
1541 if (skb->len <= 76) { 1544 if (skb->len <= 76) {
1542 return __skb_checksum_complete(skb); 1545 return __skb_checksum_complete(skb);
@@ -1600,7 +1603,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1600 1603
1601 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ 1604 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1602 TCP_CHECK_TIMER(sk); 1605 TCP_CHECK_TIMER(sk);
1603 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len)) 1606 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len))
1604 goto reset; 1607 goto reset;
1605 TCP_CHECK_TIMER(sk); 1608 TCP_CHECK_TIMER(sk);
1606 if (opt_skb) 1609 if (opt_skb)
@@ -1608,7 +1611,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1608 return 0; 1611 return 0;
1609 } 1612 }
1610 1613
1611 if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb)) 1614 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1612 goto csum_err; 1615 goto csum_err;
1613 1616
1614 if (sk->sk_state == TCP_LISTEN) { 1617 if (sk->sk_state == TCP_LISTEN) {
@@ -1631,7 +1634,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1631 } 1634 }
1632 1635
1633 TCP_CHECK_TIMER(sk); 1636 TCP_CHECK_TIMER(sk);
1634 if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len)) 1637 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len))
1635 goto reset; 1638 goto reset;
1636 TCP_CHECK_TIMER(sk); 1639 TCP_CHECK_TIMER(sk);
1637 if (opt_skb) 1640 if (opt_skb)
@@ -1664,7 +1667,7 @@ ipv6_pktoptions:
1664 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo) 1667 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1665 np->mcast_oif = inet6_iif(opt_skb); 1668 np->mcast_oif = inet6_iif(opt_skb);
1666 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) 1669 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1667 np->mcast_hops = opt_skb->nh.ipv6h->hop_limit; 1670 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1668 if (ipv6_opt_accepted(sk, opt_skb)) { 1671 if (ipv6_opt_accepted(sk, opt_skb)) {
1669 skb_set_owner_r(opt_skb, sk); 1672 skb_set_owner_r(opt_skb, sk);
1670 opt_skb = xchg(&np->pktoptions, opt_skb); 1673 opt_skb = xchg(&np->pktoptions, opt_skb);
@@ -1697,28 +1700,27 @@ static int tcp_v6_rcv(struct sk_buff **pskb)
1697 if (!pskb_may_pull(skb, sizeof(struct tcphdr))) 1700 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1698 goto discard_it; 1701 goto discard_it;
1699 1702
1700 th = skb->h.th; 1703 th = tcp_hdr(skb);
1701 1704
1702 if (th->doff < sizeof(struct tcphdr)/4) 1705 if (th->doff < sizeof(struct tcphdr)/4)
1703 goto bad_packet; 1706 goto bad_packet;
1704 if (!pskb_may_pull(skb, th->doff*4)) 1707 if (!pskb_may_pull(skb, th->doff*4))
1705 goto discard_it; 1708 goto discard_it;
1706 1709
1707 if ((skb->ip_summed != CHECKSUM_UNNECESSARY && 1710 if (!skb_csum_unnecessary(skb) && tcp_v6_checksum_init(skb))
1708 tcp_v6_checksum_init(skb)))
1709 goto bad_packet; 1711 goto bad_packet;
1710 1712
1711 th = skb->h.th; 1713 th = tcp_hdr(skb);
1712 TCP_SKB_CB(skb)->seq = ntohl(th->seq); 1714 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1713 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + 1715 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1714 skb->len - th->doff*4); 1716 skb->len - th->doff*4);
1715 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); 1717 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1716 TCP_SKB_CB(skb)->when = 0; 1718 TCP_SKB_CB(skb)->when = 0;
1717 TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h); 1719 TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(ipv6_hdr(skb));
1718 TCP_SKB_CB(skb)->sacked = 0; 1720 TCP_SKB_CB(skb)->sacked = 0;
1719 1721
1720 sk = __inet6_lookup(&tcp_hashinfo, &skb->nh.ipv6h->saddr, th->source, 1722 sk = __inet6_lookup(&tcp_hashinfo, &ipv6_hdr(skb)->saddr, th->source,
1721 &skb->nh.ipv6h->daddr, ntohs(th->dest), 1723 &ipv6_hdr(skb)->daddr, ntohs(th->dest),
1722 inet6_iif(skb)); 1724 inet6_iif(skb));
1723 1725
1724 if (!sk) 1726 if (!sk)
@@ -1798,7 +1800,7 @@ do_time_wait:
1798 struct sock *sk2; 1800 struct sock *sk2;
1799 1801
1800 sk2 = inet6_lookup_listener(&tcp_hashinfo, 1802 sk2 = inet6_lookup_listener(&tcp_hashinfo,
1801 &skb->nh.ipv6h->daddr, 1803 &ipv6_hdr(skb)->daddr,
1802 ntohs(th->dest), inet6_iif(skb)); 1804 ntohs(th->dest), inet6_iif(skb));
1803 if (sk2 != NULL) { 1805 if (sk2 != NULL) {
1804 struct inet_timewait_sock *tw = inet_twsk(sk); 1806 struct inet_timewait_sock *tw = inet_twsk(sk);
@@ -1945,6 +1947,7 @@ static int tcp_v6_destroy_sock(struct sock *sk)
1945 return inet6_destroy_sock(sk); 1947 return inet6_destroy_sock(sk);
1946} 1948}
1947 1949
1950#ifdef CONFIG_PROC_FS
1948/* Proc filesystem TCPv6 sock list dumping. */ 1951/* Proc filesystem TCPv6 sock list dumping. */
1949static void get_openreq6(struct seq_file *seq, 1952static void get_openreq6(struct seq_file *seq,
1950 struct sock *sk, struct request_sock *req, int i, int uid) 1953 struct sock *sk, struct request_sock *req, int i, int uid)
@@ -2061,7 +2064,6 @@ static void get_timewait6_sock(struct seq_file *seq,
2061 atomic_read(&tw->tw_refcnt), tw); 2064 atomic_read(&tw->tw_refcnt), tw);
2062} 2065}
2063 2066
2064#ifdef CONFIG_PROC_FS
2065static int tcp6_seq_show(struct seq_file *seq, void *v) 2067static int tcp6_seq_show(struct seq_file *seq, void *v)
2066{ 2068{
2067 struct tcp_iter_state *st; 2069 struct tcp_iter_state *st;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index f590db57a7c9..b083c09e3d2d 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -93,10 +93,10 @@ static struct sock *__udp6_lib_lookup(struct in6_addr *saddr, __be16 sport,
93 continue; 93 continue;
94 score++; 94 score++;
95 } 95 }
96 if(score == 4) { 96 if (score == 4) {
97 result = sk; 97 result = sk;
98 break; 98 break;
99 } else if(score > badness) { 99 } else if (score > badness) {
100 result = sk; 100 result = sk;
101 badness = score; 101 badness = score;
102 } 102 }
@@ -120,8 +120,9 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk,
120 struct ipv6_pinfo *np = inet6_sk(sk); 120 struct ipv6_pinfo *np = inet6_sk(sk);
121 struct inet_sock *inet = inet_sk(sk); 121 struct inet_sock *inet = inet_sk(sk);
122 struct sk_buff *skb; 122 struct sk_buff *skb;
123 size_t copied; 123 unsigned int ulen, copied;
124 int err, copy_only, is_udplite = IS_UDPLITE(sk); 124 int err;
125 int is_udplite = IS_UDPLITE(sk);
125 126
126 if (addr_len) 127 if (addr_len)
127 *addr_len=sizeof(struct sockaddr_in6); 128 *addr_len=sizeof(struct sockaddr_in6);
@@ -134,24 +135,25 @@ try_again:
134 if (!skb) 135 if (!skb)
135 goto out; 136 goto out;
136 137
137 copied = skb->len - sizeof(struct udphdr); 138 ulen = skb->len - sizeof(struct udphdr);
138 if (copied > len) { 139 copied = len;
139 copied = len; 140 if (copied > ulen)
141 copied = ulen;
142 else if (copied < ulen)
140 msg->msg_flags |= MSG_TRUNC; 143 msg->msg_flags |= MSG_TRUNC;
141 }
142 144
143 /* 145 /*
144 * Decide whether to checksum and/or copy data. 146 * If checksum is needed at all, try to do it while copying the
147 * data. If the data is truncated, or if we only want a partial
148 * coverage checksum (UDP-Lite), do it before the copy.
145 */ 149 */
146 copy_only = (skb->ip_summed==CHECKSUM_UNNECESSARY);
147 150
148 if (is_udplite || (!copy_only && msg->msg_flags&MSG_TRUNC)) { 151 if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) {
149 if (__udp_lib_checksum_complete(skb)) 152 if (udp_lib_checksum_complete(skb))
150 goto csum_copy_err; 153 goto csum_copy_err;
151 copy_only = 1;
152 } 154 }
153 155
154 if (copy_only) 156 if (skb_csum_unnecessary(skb))
155 err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), 157 err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
156 msg->msg_iov, copied ); 158 msg->msg_iov, copied );
157 else { 159 else {
@@ -170,15 +172,16 @@ try_again:
170 172
171 sin6 = (struct sockaddr_in6 *) msg->msg_name; 173 sin6 = (struct sockaddr_in6 *) msg->msg_name;
172 sin6->sin6_family = AF_INET6; 174 sin6->sin6_family = AF_INET6;
173 sin6->sin6_port = skb->h.uh->source; 175 sin6->sin6_port = udp_hdr(skb)->source;
174 sin6->sin6_flowinfo = 0; 176 sin6->sin6_flowinfo = 0;
175 sin6->sin6_scope_id = 0; 177 sin6->sin6_scope_id = 0;
176 178
177 if (skb->protocol == htons(ETH_P_IP)) 179 if (skb->protocol == htons(ETH_P_IP))
178 ipv6_addr_set(&sin6->sin6_addr, 0, 0, 180 ipv6_addr_set(&sin6->sin6_addr, 0, 0,
179 htonl(0xffff), skb->nh.iph->saddr); 181 htonl(0xffff), ip_hdr(skb)->saddr);
180 else { 182 else {
181 ipv6_addr_copy(&sin6->sin6_addr, &skb->nh.ipv6h->saddr); 183 ipv6_addr_copy(&sin6->sin6_addr,
184 &ipv6_hdr(skb)->saddr);
182 if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) 185 if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
183 sin6->sin6_scope_id = IP6CB(skb)->iif; 186 sin6->sin6_scope_id = IP6CB(skb)->iif;
184 } 187 }
@@ -194,7 +197,7 @@ try_again:
194 197
195 err = copied; 198 err = copied;
196 if (flags & MSG_TRUNC) 199 if (flags & MSG_TRUNC)
197 err = skb->len - sizeof(struct udphdr); 200 err = ulen;
198 201
199out_free: 202out_free:
200 skb_free_datagram(sk, skb); 203 skb_free_datagram(sk, skb);
@@ -279,8 +282,10 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
279 } 282 }
280 } 283 }
281 284
282 if (udp_lib_checksum_complete(skb)) 285 if (sk->sk_filter) {
283 goto drop; 286 if (udp_lib_checksum_complete(skb))
287 goto drop;
288 }
284 289
285 if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { 290 if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) {
286 /* Note that an ENOMEM error is charged twice */ 291 /* Note that an ENOMEM error is charged twice */
@@ -325,7 +330,7 @@ static struct sock *udp_v6_mcast_next(struct sock *sk,
325 if (!ipv6_addr_equal(&np->rcv_saddr, loc_addr)) 330 if (!ipv6_addr_equal(&np->rcv_saddr, loc_addr))
326 continue; 331 continue;
327 } 332 }
328 if(!inet6_mc_check(s, loc_addr, rmt_addr)) 333 if (!inet6_mc_check(s, loc_addr, rmt_addr))
329 continue; 334 continue;
330 return s; 335 return s;
331 } 336 }
@@ -341,7 +346,7 @@ static int __udp6_lib_mcast_deliver(struct sk_buff *skb, struct in6_addr *saddr,
341 struct in6_addr *daddr, struct hlist_head udptable[]) 346 struct in6_addr *daddr, struct hlist_head udptable[])
342{ 347{
343 struct sock *sk, *sk2; 348 struct sock *sk, *sk2;
344 const struct udphdr *uh = skb->h.uh; 349 const struct udphdr *uh = udp_hdr(skb);
345 int dif; 350 int dif;
346 351
347 read_lock(&udp_hash_lock); 352 read_lock(&udp_hash_lock);
@@ -366,9 +371,20 @@ out:
366 return 0; 371 return 0;
367} 372}
368 373
369static inline int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh) 374static inline int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh,
370 375 int proto)
371{ 376{
377 int err;
378
379 UDP_SKB_CB(skb)->partial_cov = 0;
380 UDP_SKB_CB(skb)->cscov = skb->len;
381
382 if (proto == IPPROTO_UDPLITE) {
383 err = udplite_checksum_init(skb, uh);
384 if (err)
385 return err;
386 }
387
372 if (uh->check == 0) { 388 if (uh->check == 0) {
373 /* RFC 2460 section 8.1 says that we SHOULD log 389 /* RFC 2460 section 8.1 says that we SHOULD log
374 this error. Well, it is reasonable. 390 this error. Well, it is reasonable.
@@ -377,21 +393,20 @@ static inline int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh)
377 return 1; 393 return 1;
378 } 394 }
379 if (skb->ip_summed == CHECKSUM_COMPLETE && 395 if (skb->ip_summed == CHECKSUM_COMPLETE &&
380 !csum_ipv6_magic(&skb->nh.ipv6h->saddr, &skb->nh.ipv6h->daddr, 396 !csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
381 skb->len, IPPROTO_UDP, skb->csum )) 397 skb->len, proto, skb->csum))
382 skb->ip_summed = CHECKSUM_UNNECESSARY; 398 skb->ip_summed = CHECKSUM_UNNECESSARY;
383 399
384 if (skb->ip_summed != CHECKSUM_UNNECESSARY) 400 if (!skb_csum_unnecessary(skb))
385 skb->csum = ~csum_unfold(csum_ipv6_magic(&skb->nh.ipv6h->saddr, 401 skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
386 &skb->nh.ipv6h->daddr, 402 &ipv6_hdr(skb)->daddr,
387 skb->len, IPPROTO_UDP, 403 skb->len, proto, 0));
388 0));
389 404
390 return (UDP_SKB_CB(skb)->partial_cov = 0); 405 return 0;
391} 406}
392 407
393int __udp6_lib_rcv(struct sk_buff **pskb, struct hlist_head udptable[], 408int __udp6_lib_rcv(struct sk_buff **pskb, struct hlist_head udptable[],
394 int is_udplite) 409 int proto)
395{ 410{
396 struct sk_buff *skb = *pskb; 411 struct sk_buff *skb = *pskb;
397 struct sock *sk; 412 struct sock *sk;
@@ -403,15 +418,16 @@ int __udp6_lib_rcv(struct sk_buff **pskb, struct hlist_head udptable[],
403 if (!pskb_may_pull(skb, sizeof(struct udphdr))) 418 if (!pskb_may_pull(skb, sizeof(struct udphdr)))
404 goto short_packet; 419 goto short_packet;
405 420
406 saddr = &skb->nh.ipv6h->saddr; 421 saddr = &ipv6_hdr(skb)->saddr;
407 daddr = &skb->nh.ipv6h->daddr; 422 daddr = &ipv6_hdr(skb)->daddr;
408 uh = skb->h.uh; 423 uh = udp_hdr(skb);
409 424
410 ulen = ntohs(uh->len); 425 ulen = ntohs(uh->len);
411 if (ulen > skb->len) 426 if (ulen > skb->len)
412 goto short_packet; 427 goto short_packet;
413 428
414 if(! is_udplite ) { /* UDP validates ulen. */ 429 if (proto == IPPROTO_UDP) {
430 /* UDP validates ulen. */
415 431
416 /* Check for jumbo payload */ 432 /* Check for jumbo payload */
417 if (ulen == 0) 433 if (ulen == 0)
@@ -423,19 +439,15 @@ int __udp6_lib_rcv(struct sk_buff **pskb, struct hlist_head udptable[],
423 if (ulen < skb->len) { 439 if (ulen < skb->len) {
424 if (pskb_trim_rcsum(skb, ulen)) 440 if (pskb_trim_rcsum(skb, ulen))
425 goto short_packet; 441 goto short_packet;
426 saddr = &skb->nh.ipv6h->saddr; 442 saddr = &ipv6_hdr(skb)->saddr;
427 daddr = &skb->nh.ipv6h->daddr; 443 daddr = &ipv6_hdr(skb)->daddr;
428 uh = skb->h.uh; 444 uh = udp_hdr(skb);
429 } 445 }
430
431 if (udp6_csum_init(skb, uh))
432 goto discard;
433
434 } else { /* UDP-Lite validates cscov. */
435 if (udplite6_csum_init(skb, uh))
436 goto discard;
437 } 446 }
438 447
448 if (udp6_csum_init(skb, uh, proto))
449 goto discard;
450
439 /* 451 /*
440 * Multicast receive code 452 * Multicast receive code
441 */ 453 */
@@ -457,33 +469,34 @@ int __udp6_lib_rcv(struct sk_buff **pskb, struct hlist_head udptable[],
457 469
458 if (udp_lib_checksum_complete(skb)) 470 if (udp_lib_checksum_complete(skb))
459 goto discard; 471 goto discard;
460 UDP6_INC_STATS_BH(UDP_MIB_NOPORTS, is_udplite); 472 UDP6_INC_STATS_BH(UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);
461 473
462 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0, dev); 474 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0, dev);
463 475
464 kfree_skb(skb); 476 kfree_skb(skb);
465 return(0); 477 return 0;
466 } 478 }
467 479
468 /* deliver */ 480 /* deliver */
469 481
470 udpv6_queue_rcv_skb(sk, skb); 482 udpv6_queue_rcv_skb(sk, skb);
471 sock_put(sk); 483 sock_put(sk);
472 return(0); 484 return 0;
473 485
474short_packet: 486short_packet:
475 LIMIT_NETDEBUG(KERN_DEBUG "UDP%sv6: short packet: %d/%u\n", 487 LIMIT_NETDEBUG(KERN_DEBUG "UDP%sv6: short packet: %d/%u\n",
476 is_udplite? "-Lite" : "", ulen, skb->len); 488 proto == IPPROTO_UDPLITE ? "-Lite" : "",
489 ulen, skb->len);
477 490
478discard: 491discard:
479 UDP6_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite); 492 UDP6_INC_STATS_BH(UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
480 kfree_skb(skb); 493 kfree_skb(skb);
481 return(0); 494 return 0;
482} 495}
483 496
484static __inline__ int udpv6_rcv(struct sk_buff **pskb) 497static __inline__ int udpv6_rcv(struct sk_buff **pskb)
485{ 498{
486 return __udp6_lib_rcv(pskb, udp_hash, 0); 499 return __udp6_lib_rcv(pskb, udp_hash, IPPROTO_UDP);
487} 500}
488 501
489/* 502/*
@@ -521,7 +534,7 @@ static int udp_v6_push_pending_frames(struct sock *sk)
521 /* 534 /*
522 * Create a UDP header 535 * Create a UDP header
523 */ 536 */
524 uh = skb->h.uh; 537 uh = udp_hdr(skb);
525 uh->source = fl->fl_ip_sport; 538 uh->source = fl->fl_ip_sport;
526 uh->dest = fl->fl_ip_dport; 539 uh->dest = fl->fl_ip_dport;
527 uh->len = htons(up->len); 540 uh->len = htons(up->len);
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index 629f97162fbc..f54016a55004 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -19,7 +19,7 @@ DEFINE_SNMP_STAT(struct udp_mib, udplite_stats_in6) __read_mostly;
19 19
20static int udplitev6_rcv(struct sk_buff **pskb) 20static int udplitev6_rcv(struct sk_buff **pskb)
21{ 21{
22 return __udp6_lib_rcv(pskb, udplite_hash, 1); 22 return __udp6_lib_rcv(pskb, udplite_hash, IPPROTO_UDPLITE);
23} 23}
24 24
25static void udplitev6_err(struct sk_buff *skb, 25static void udplitev6_err(struct sk_buff *skb,
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 31f651f95096..d7ed8aa56ec1 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -28,14 +28,14 @@ int xfrm6_rcv_spi(struct sk_buff *skb, __be32 spi)
28 unsigned int nhoff; 28 unsigned int nhoff;
29 29
30 nhoff = IP6CB(skb)->nhoff; 30 nhoff = IP6CB(skb)->nhoff;
31 nexthdr = skb->nh.raw[nhoff]; 31 nexthdr = skb_network_header(skb)[nhoff];
32 32
33 seq = 0; 33 seq = 0;
34 if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0) 34 if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0)
35 goto drop; 35 goto drop;
36 36
37 do { 37 do {
38 struct ipv6hdr *iph = skb->nh.ipv6h; 38 struct ipv6hdr *iph = ipv6_hdr(skb);
39 39
40 if (xfrm_nr == XFRM_MAX_DEPTH) 40 if (xfrm_nr == XFRM_MAX_DEPTH)
41 goto drop; 41 goto drop;
@@ -58,7 +58,7 @@ int xfrm6_rcv_spi(struct sk_buff *skb, __be32 spi)
58 if (nexthdr <= 0) 58 if (nexthdr <= 0)
59 goto drop_unlock; 59 goto drop_unlock;
60 60
61 skb->nh.raw[nhoff] = nexthdr; 61 skb_network_header(skb)[nhoff] = nexthdr;
62 62
63 if (x->props.replay_window) 63 if (x->props.replay_window)
64 xfrm_replay_advance(x, seq); 64 xfrm_replay_advance(x, seq);
@@ -112,8 +112,8 @@ int xfrm6_rcv_spi(struct sk_buff *skb, __be32 spi)
112 return -1; 112 return -1;
113 } else { 113 } else {
114#ifdef CONFIG_NETFILTER 114#ifdef CONFIG_NETFILTER
115 skb->nh.ipv6h->payload_len = htons(skb->len); 115 ipv6_hdr(skb)->payload_len = htons(skb->len);
116 __skb_push(skb, skb->data - skb->nh.raw); 116 __skb_push(skb, skb->data - skb_network_header(skb));
117 117
118 NF_HOOK(PF_INET6, NF_IP6_PRE_ROUTING, skb, skb->dev, NULL, 118 NF_HOOK(PF_INET6, NF_IP6_PRE_ROUTING, skb, skb->dev, NULL,
119 ip6_rcv_finish); 119 ip6_rcv_finish);
@@ -140,19 +140,19 @@ int xfrm6_rcv(struct sk_buff **pskb)
140 return xfrm6_rcv_spi(*pskb, 0); 140 return xfrm6_rcv_spi(*pskb, 0);
141} 141}
142 142
143EXPORT_SYMBOL(xfrm6_rcv);
144
143int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, 145int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
144 xfrm_address_t *saddr, u8 proto) 146 xfrm_address_t *saddr, u8 proto)
145{ 147{
146 struct xfrm_state *x = NULL; 148 struct xfrm_state *x = NULL;
147 int wildcard = 0; 149 int wildcard = 0;
148 struct in6_addr any;
149 xfrm_address_t *xany; 150 xfrm_address_t *xany;
150 struct xfrm_state *xfrm_vec_one = NULL; 151 struct xfrm_state *xfrm_vec_one = NULL;
151 int nh = 0; 152 int nh = 0;
152 int i = 0; 153 int i = 0;
153 154
154 ipv6_addr_set(&any, 0, 0, 0, 0); 155 xany = (xfrm_address_t *)&in6addr_any;
155 xany = (xfrm_address_t *)&any;
156 156
157 for (i = 0; i < 3; i++) { 157 for (i = 0; i < 3; i++) {
158 xfrm_address_t *dst, *src; 158 xfrm_address_t *dst, *src;
@@ -247,3 +247,5 @@ drop:
247 xfrm_state_put(xfrm_vec_one); 247 xfrm_state_put(xfrm_vec_one);
248 return -1; 248 return -1;
249} 249}
250
251EXPORT_SYMBOL(xfrm6_input_addr);
diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c
index edcfffa9e87b..2e61d6ddece3 100644
--- a/net/ipv6/xfrm6_mode_beet.c
+++ b/net/ipv6/xfrm6_mode_beet.c
@@ -38,17 +38,18 @@ static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb)
38 int hdr_len; 38 int hdr_len;
39 39
40 skb_push(skb, x->props.header_len); 40 skb_push(skb, x->props.header_len);
41 iph = skb->nh.ipv6h; 41 iph = ipv6_hdr(skb);
42 42
43 hdr_len = ip6_find_1stfragopt(skb, &prevhdr); 43 hdr_len = ip6_find_1stfragopt(skb, &prevhdr);
44 skb->nh.raw = prevhdr - x->props.header_len; 44 skb_set_network_header(skb,
45 skb->h.raw = skb->data + hdr_len; 45 (prevhdr - x->props.header_len) - skb->data);
46 skb_set_transport_header(skb, hdr_len);
46 memmove(skb->data, iph, hdr_len); 47 memmove(skb->data, iph, hdr_len);
47 48
48 skb->nh.raw = skb->data; 49 skb_reset_network_header(skb);
49 top_iph = skb->nh.ipv6h; 50 top_iph = ipv6_hdr(skb);
50 skb->nh.raw = &top_iph->nexthdr; 51 skb->transport_header = skb->network_header + sizeof(struct ipv6hdr);
51 skb->h.ipv6h = top_iph + 1; 52 skb->network_header += offsetof(struct ipv6hdr, nexthdr);
52 53
53 ipv6_addr_copy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr); 54 ipv6_addr_copy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr);
54 ipv6_addr_copy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr); 55 ipv6_addr_copy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr);
@@ -59,6 +60,7 @@ static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb)
59static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb) 60static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb)
60{ 61{
61 struct ipv6hdr *ip6h; 62 struct ipv6hdr *ip6h;
63 const unsigned char *old_mac;
62 int size = sizeof(struct ipv6hdr); 64 int size = sizeof(struct ipv6hdr);
63 int err = -EINVAL; 65 int err = -EINVAL;
64 66
@@ -66,13 +68,14 @@ static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb)
66 goto out; 68 goto out;
67 69
68 skb_push(skb, size); 70 skb_push(skb, size);
69 memmove(skb->data, skb->nh.raw, size); 71 memmove(skb->data, skb_network_header(skb), size);
70 skb->nh.raw = skb->data; 72 skb_reset_network_header(skb);
71 73
72 skb->mac.raw = memmove(skb->data - skb->mac_len, 74 old_mac = skb_mac_header(skb);
73 skb->mac.raw, skb->mac_len); 75 skb_set_mac_header(skb, -skb->mac_len);
76 memmove(skb_mac_header(skb), old_mac, skb->mac_len);
74 77
75 ip6h = skb->nh.ipv6h; 78 ip6h = ipv6_hdr(skb);
76 ip6h->payload_len = htons(skb->len - size); 79 ip6h->payload_len = htons(skb->len - size);
77 ipv6_addr_copy(&ip6h->daddr, (struct in6_addr *) &x->sel.daddr.a6); 80 ipv6_addr_copy(&ip6h->daddr, (struct in6_addr *) &x->sel.daddr.a6);
78 ipv6_addr_copy(&ip6h->saddr, (struct in6_addr *) &x->sel.saddr.a6); 81 ipv6_addr_copy(&ip6h->saddr, (struct in6_addr *) &x->sel.saddr.a6);
diff --git a/net/ipv6/xfrm6_mode_ro.c b/net/ipv6/xfrm6_mode_ro.c
index 6031c16d46ca..6ad6d7ac6bd7 100644
--- a/net/ipv6/xfrm6_mode_ro.c
+++ b/net/ipv6/xfrm6_mode_ro.c
@@ -50,11 +50,12 @@ static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb)
50 int hdr_len; 50 int hdr_len;
51 51
52 skb_push(skb, x->props.header_len); 52 skb_push(skb, x->props.header_len);
53 iph = skb->nh.ipv6h; 53 iph = ipv6_hdr(skb);
54 54
55 hdr_len = x->type->hdr_offset(x, skb, &prevhdr); 55 hdr_len = x->type->hdr_offset(x, skb, &prevhdr);
56 skb->nh.raw = prevhdr - x->props.header_len; 56 skb_set_network_header(skb,
57 skb->h.raw = skb->data + hdr_len; 57 (prevhdr - x->props.header_len) - skb->data);
58 skb_set_transport_header(skb, hdr_len);
58 memmove(skb->data, iph, hdr_len); 59 memmove(skb->data, iph, hdr_len);
59 return 0; 60 return 0;
60} 61}
diff --git a/net/ipv6/xfrm6_mode_transport.c b/net/ipv6/xfrm6_mode_transport.c
index 3a4b39b12bad..c026bfea820a 100644
--- a/net/ipv6/xfrm6_mode_transport.c
+++ b/net/ipv6/xfrm6_mode_transport.c
@@ -32,11 +32,12 @@ static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb)
32 int hdr_len; 32 int hdr_len;
33 33
34 skb_push(skb, x->props.header_len); 34 skb_push(skb, x->props.header_len);
35 iph = skb->nh.ipv6h; 35 iph = ipv6_hdr(skb);
36 36
37 hdr_len = x->type->hdr_offset(x, skb, &prevhdr); 37 hdr_len = x->type->hdr_offset(x, skb, &prevhdr);
38 skb->nh.raw = prevhdr - x->props.header_len; 38 skb_set_network_header(skb,
39 skb->h.raw = skb->data + hdr_len; 39 (prevhdr - x->props.header_len) - skb->data);
40 skb_set_transport_header(skb, hdr_len);
40 memmove(skb->data, iph, hdr_len); 41 memmove(skb->data, iph, hdr_len);
41 return 0; 42 return 0;
42} 43}
@@ -51,13 +52,16 @@ static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb)
51 */ 52 */
52static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb) 53static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb)
53{ 54{
54 int ihl = skb->data - skb->h.raw; 55 int ihl = skb->data - skb_transport_header(skb);
55 56
56 if (skb->h.raw != skb->nh.raw) 57 if (skb->transport_header != skb->network_header) {
57 skb->nh.raw = memmove(skb->h.raw, skb->nh.raw, ihl); 58 memmove(skb_transport_header(skb),
58 skb->nh.ipv6h->payload_len = htons(skb->len + ihl - 59 skb_network_header(skb), ihl);
60 skb->network_header = skb->transport_header;
61 }
62 ipv6_hdr(skb)->payload_len = htons(skb->len + ihl -
59 sizeof(struct ipv6hdr)); 63 sizeof(struct ipv6hdr));
60 skb->h.raw = skb->data; 64 skb_reset_transport_header(skb);
61 return 0; 65 return 0;
62} 66}
63 67
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index 0bc866c0d83c..a6c0cdf46ad6 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -18,8 +18,8 @@
18 18
19static inline void ipip6_ecn_decapsulate(struct sk_buff *skb) 19static inline void ipip6_ecn_decapsulate(struct sk_buff *skb)
20{ 20{
21 struct ipv6hdr *outer_iph = skb->nh.ipv6h; 21 struct ipv6hdr *outer_iph = ipv6_hdr(skb);
22 struct ipv6hdr *inner_iph = skb->h.ipv6h; 22 struct ipv6hdr *inner_iph = ipipv6_hdr(skb);
23 23
24 if (INET_ECN_is_ce(ipv6_get_dsfield(outer_iph))) 24 if (INET_ECN_is_ce(ipv6_get_dsfield(outer_iph)))
25 IP6_ECN_set_ce(inner_iph); 25 IP6_ECN_set_ce(inner_iph);
@@ -27,8 +27,8 @@ static inline void ipip6_ecn_decapsulate(struct sk_buff *skb)
27 27
28static inline void ip6ip_ecn_decapsulate(struct sk_buff *skb) 28static inline void ip6ip_ecn_decapsulate(struct sk_buff *skb)
29{ 29{
30 if (INET_ECN_is_ce(ipv6_get_dsfield(skb->nh.ipv6h))) 30 if (INET_ECN_is_ce(ipv6_get_dsfield(ipv6_hdr(skb))))
31 IP_ECN_set_ce(skb->h.ipiph); 31 IP_ECN_set_ce(ipip_hdr(skb));
32} 32}
33 33
34/* Add encapsulation header. 34/* Add encapsulation header.
@@ -51,12 +51,12 @@ static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
51 int dsfield; 51 int dsfield;
52 52
53 skb_push(skb, x->props.header_len); 53 skb_push(skb, x->props.header_len);
54 iph = skb->nh.ipv6h; 54 iph = ipv6_hdr(skb);
55 55
56 skb->nh.raw = skb->data; 56 skb_reset_network_header(skb);
57 top_iph = skb->nh.ipv6h; 57 top_iph = ipv6_hdr(skb);
58 skb->nh.raw = &top_iph->nexthdr; 58 skb->transport_header = skb->network_header + sizeof(struct ipv6hdr);
59 skb->h.ipv6h = top_iph + 1; 59 skb->network_header += offsetof(struct ipv6hdr, nexthdr);
60 60
61 top_iph->version = 6; 61 top_iph->version = 6;
62 if (xdst->route->ops->family == AF_INET6) { 62 if (xdst->route->ops->family == AF_INET6) {
@@ -86,9 +86,11 @@ static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
86static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) 86static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
87{ 87{
88 int err = -EINVAL; 88 int err = -EINVAL;
89 const unsigned char *old_mac;
90 const unsigned char *nh = skb_network_header(skb);
89 91
90 if (skb->nh.raw[IP6CB(skb)->nhoff] != IPPROTO_IPV6 92 if (nh[IP6CB(skb)->nhoff] != IPPROTO_IPV6 &&
91 && skb->nh.raw[IP6CB(skb)->nhoff] != IPPROTO_IPIP) 93 nh[IP6CB(skb)->nhoff] != IPPROTO_IPIP)
92 goto out; 94 goto out;
93 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) 95 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
94 goto out; 96 goto out;
@@ -97,9 +99,10 @@ static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
97 (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) 99 (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
98 goto out; 100 goto out;
99 101
100 if (skb->nh.raw[IP6CB(skb)->nhoff] == IPPROTO_IPV6) { 102 nh = skb_network_header(skb);
103 if (nh[IP6CB(skb)->nhoff] == IPPROTO_IPV6) {
101 if (x->props.flags & XFRM_STATE_DECAP_DSCP) 104 if (x->props.flags & XFRM_STATE_DECAP_DSCP)
102 ipv6_copy_dscp(skb->nh.ipv6h, skb->h.ipv6h); 105 ipv6_copy_dscp(ipv6_hdr(skb), ipipv6_hdr(skb));
103 if (!(x->props.flags & XFRM_STATE_NOECN)) 106 if (!(x->props.flags & XFRM_STATE_NOECN))
104 ipip6_ecn_decapsulate(skb); 107 ipip6_ecn_decapsulate(skb);
105 } else { 108 } else {
@@ -107,9 +110,10 @@ static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
107 ip6ip_ecn_decapsulate(skb); 110 ip6ip_ecn_decapsulate(skb);
108 skb->protocol = htons(ETH_P_IP); 111 skb->protocol = htons(ETH_P_IP);
109 } 112 }
110 skb->mac.raw = memmove(skb->data - skb->mac_len, 113 old_mac = skb_mac_header(skb);
111 skb->mac.raw, skb->mac_len); 114 skb_set_mac_header(skb, -skb->mac_len);
112 skb->nh.raw = skb->data; 115 memmove(skb_mac_header(skb), old_mac, skb->mac_len);
116 skb_reset_network_header(skb);
113 err = 0; 117 err = 0;
114 118
115out: 119out:
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index d6d786b89d2b..56364a5f676a 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -23,6 +23,8 @@ int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb,
23 return ip6_find_1stfragopt(skb, prevhdr); 23 return ip6_find_1stfragopt(skb, prevhdr);
24} 24}
25 25
26EXPORT_SYMBOL(xfrm6_find_1stfragopt);
27
26static int xfrm6_tunnel_check_size(struct sk_buff *skb) 28static int xfrm6_tunnel_check_size(struct sk_buff *skb)
27{ 29{
28 int mtu, ret = 0; 30 int mtu, ret = 0;
@@ -76,11 +78,11 @@ static int xfrm6_output_one(struct sk_buff *skb)
76 x->curlft.bytes += skb->len; 78 x->curlft.bytes += skb->len;
77 x->curlft.packets++; 79 x->curlft.packets++;
78 if (x->props.mode == XFRM_MODE_ROUTEOPTIMIZATION) 80 if (x->props.mode == XFRM_MODE_ROUTEOPTIMIZATION)
79 x->lastused = (u64)xtime.tv_sec; 81 x->lastused = get_seconds();
80 82
81 spin_unlock_bh(&x->lock); 83 spin_unlock_bh(&x->lock);
82 84
83 skb->nh.raw = skb->data; 85 skb_reset_network_header(skb);
84 86
85 if (!(skb->dst = dst_pop(dst))) { 87 if (!(skb->dst = dst_pop(dst))) {
86 err = -EHOSTUNREACH; 88 err = -EHOSTUNREACH;
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index d8a585bd2cb4..1faa2ea80afc 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -240,7 +240,8 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
240 if (!afinfo) { 240 if (!afinfo) {
241 dst = *dst_p; 241 dst = *dst_p;
242 goto error; 242 goto error;
243 }; 243 }
244
244 dst_prev->output = afinfo->output; 245 dst_prev->output = afinfo->output;
245 xfrm_state_put_afinfo(afinfo); 246 xfrm_state_put_afinfo(afinfo);
246 /* Sheit... I remember I did this right. Apparently, 247 /* Sheit... I remember I did this right. Apparently,
@@ -270,17 +271,19 @@ error:
270static inline void 271static inline void
271_decode_session6(struct sk_buff *skb, struct flowi *fl) 272_decode_session6(struct sk_buff *skb, struct flowi *fl)
272{ 273{
273 u16 offset = skb->h.raw - skb->nh.raw; 274 u16 offset = skb_network_header_len(skb);
274 struct ipv6hdr *hdr = skb->nh.ipv6h; 275 struct ipv6hdr *hdr = ipv6_hdr(skb);
275 struct ipv6_opt_hdr *exthdr; 276 struct ipv6_opt_hdr *exthdr;
276 u8 nexthdr = skb->nh.raw[IP6CB(skb)->nhoff]; 277 const unsigned char *nh = skb_network_header(skb);
278 u8 nexthdr = nh[IP6CB(skb)->nhoff];
277 279
278 memset(fl, 0, sizeof(struct flowi)); 280 memset(fl, 0, sizeof(struct flowi));
279 ipv6_addr_copy(&fl->fl6_dst, &hdr->daddr); 281 ipv6_addr_copy(&fl->fl6_dst, &hdr->daddr);
280 ipv6_addr_copy(&fl->fl6_src, &hdr->saddr); 282 ipv6_addr_copy(&fl->fl6_src, &hdr->saddr);
281 283
282 while (pskb_may_pull(skb, skb->nh.raw + offset + 1 - skb->data)) { 284 while (pskb_may_pull(skb, nh + offset + 1 - skb->data)) {
283 exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset); 285 nh = skb_network_header(skb);
286 exthdr = (struct ipv6_opt_hdr *)(nh + offset);
284 287
285 switch (nexthdr) { 288 switch (nexthdr) {
286 case NEXTHDR_ROUTING: 289 case NEXTHDR_ROUTING:
@@ -288,7 +291,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl)
288 case NEXTHDR_DEST: 291 case NEXTHDR_DEST:
289 offset += ipv6_optlen(exthdr); 292 offset += ipv6_optlen(exthdr);
290 nexthdr = exthdr->nexthdr; 293 nexthdr = exthdr->nexthdr;
291 exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset); 294 exthdr = (struct ipv6_opt_hdr *)(nh + offset);
292 break; 295 break;
293 296
294 case IPPROTO_UDP: 297 case IPPROTO_UDP:
@@ -296,7 +299,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl)
296 case IPPROTO_TCP: 299 case IPPROTO_TCP:
297 case IPPROTO_SCTP: 300 case IPPROTO_SCTP:
298 case IPPROTO_DCCP: 301 case IPPROTO_DCCP:
299 if (pskb_may_pull(skb, skb->nh.raw + offset + 4 - skb->data)) { 302 if (pskb_may_pull(skb, nh + offset + 4 - skb->data)) {
300 __be16 *ports = (__be16 *)exthdr; 303 __be16 *ports = (__be16 *)exthdr;
301 304
302 fl->fl_ip_sport = ports[0]; 305 fl->fl_ip_sport = ports[0];
@@ -306,7 +309,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl)
306 return; 309 return;
307 310
308 case IPPROTO_ICMPV6: 311 case IPPROTO_ICMPV6:
309 if (pskb_may_pull(skb, skb->nh.raw + offset + 2 - skb->data)) { 312 if (pskb_may_pull(skb, nh + offset + 2 - skb->data)) {
310 u8 *icmp = (u8 *)exthdr; 313 u8 *icmp = (u8 *)exthdr;
311 314
312 fl->fl_icmp_type = icmp[0]; 315 fl->fl_icmp_type = icmp[0];
@@ -317,7 +320,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl)
317 320
318#ifdef CONFIG_IPV6_MIP6 321#ifdef CONFIG_IPV6_MIP6
319 case IPPROTO_MH: 322 case IPPROTO_MH:
320 if (pskb_may_pull(skb, skb->nh.raw + offset + 3 - skb->data)) { 323 if (pskb_may_pull(skb, nh + offset + 3 - skb->data)) {
321 struct ip6_mh *mh; 324 struct ip6_mh *mh;
322 mh = (struct ip6_mh *)exthdr; 325 mh = (struct ip6_mh *)exthdr;
323 326
@@ -335,7 +338,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl)
335 fl->fl_ipsec_spi = 0; 338 fl->fl_ipsec_spi = 0;
336 fl->proto = nexthdr; 339 fl->proto = nexthdr;
337 return; 340 return;
338 }; 341 }
339 } 342 }
340} 343}
341 344
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index 93c42232aa39..538499a89975 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -257,7 +257,7 @@ static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
257 257
258static int xfrm6_tunnel_rcv(struct sk_buff *skb) 258static int xfrm6_tunnel_rcv(struct sk_buff *skb)
259{ 259{
260 struct ipv6hdr *iph = skb->nh.ipv6h; 260 struct ipv6hdr *iph = ipv6_hdr(skb);
261 __be32 spi; 261 __be32 spi;
262 262
263 spi = xfrm6_tunnel_spi_lookup((xfrm_address_t *)&iph->saddr); 263 spi = xfrm6_tunnel_spi_lookup((xfrm_address_t *)&iph->saddr);
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index cac35a77f069..392f8bc92691 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -576,7 +576,9 @@ static struct sk_buff *ipxitf_adjust_skbuff(struct ipx_interface *intrfc,
576 skb2 = alloc_skb(len, GFP_ATOMIC); 576 skb2 = alloc_skb(len, GFP_ATOMIC);
577 if (skb2) { 577 if (skb2) {
578 skb_reserve(skb2, out_offset); 578 skb_reserve(skb2, out_offset);
579 skb2->nh.raw = skb2->h.raw = skb_put(skb2, skb->len); 579 skb_reset_network_header(skb2);
580 skb_reset_transport_header(skb2);
581 skb_put(skb2, skb->len);
580 memcpy(ipx_hdr(skb2), ipx_hdr(skb), skb->len); 582 memcpy(ipx_hdr(skb2), ipx_hdr(skb), skb->len);
581 memcpy(skb2->cb, skb->cb, sizeof(skb->cb)); 583 memcpy(skb2->cb, skb->cb, sizeof(skb->cb));
582 } 584 }
@@ -1807,8 +1809,8 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock,
1807 copied); 1809 copied);
1808 if (rc) 1810 if (rc)
1809 goto out_free; 1811 goto out_free;
1810 if (skb->tstamp.off_sec) 1812 if (skb->tstamp.tv64)
1811 skb_get_timestamp(skb, &sk->sk_stamp); 1813 sk->sk_stamp = skb->tstamp;
1812 1814
1813 msg->msg_namelen = sizeof(*sipx); 1815 msg->msg_namelen = sizeof(*sipx);
1814 1816
diff --git a/net/ipx/ipx_route.c b/net/ipx/ipx_route.c
index 8e1cad971f11..e16c11423527 100644
--- a/net/ipx/ipx_route.c
+++ b/net/ipx/ipx_route.c
@@ -203,7 +203,9 @@ int ipxrtr_route_packet(struct sock *sk, struct sockaddr_ipx *usipx,
203 skb->sk = sk; 203 skb->sk = sk;
204 204
205 /* Fill in IPX header */ 205 /* Fill in IPX header */
206 skb->h.raw = skb->nh.raw = skb_put(skb, sizeof(struct ipxhdr)); 206 skb_reset_network_header(skb);
207 skb_reset_transport_header(skb);
208 skb_put(skb, sizeof(struct ipxhdr));
207 ipx = ipx_hdr(skb); 209 ipx = ipx_hdr(skb);
208 ipx->ipx_pktsize = htons(len + sizeof(struct ipxhdr)); 210 ipx->ipx_pktsize = htons(len + sizeof(struct ipxhdr));
209 IPX_SKB_CB(skb)->ipx_tctrl = 0; 211 IPX_SKB_CB(skb)->ipx_tctrl = 0;
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index 0eb7d596d470..06c97c60d542 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -89,7 +89,6 @@ static int irda_data_indication(void *instance, void *sap, struct sk_buff *skb)
89 89
90 self = instance; 90 self = instance;
91 sk = instance; 91 sk = instance;
92 IRDA_ASSERT(sk != NULL, return -1;);
93 92
94 err = sock_queue_rcv_skb(sk, skb); 93 err = sock_queue_rcv_skb(sk, skb);
95 if (err) { 94 if (err) {
@@ -131,14 +130,12 @@ static void irda_disconnect_indication(void *instance, void *sap,
131 } 130 }
132 131
133 /* Prevent race conditions with irda_release() and irda_shutdown() */ 132 /* Prevent race conditions with irda_release() and irda_shutdown() */
133 bh_lock_sock(sk);
134 if (!sock_flag(sk, SOCK_DEAD) && sk->sk_state != TCP_CLOSE) { 134 if (!sock_flag(sk, SOCK_DEAD) && sk->sk_state != TCP_CLOSE) {
135 lock_sock(sk);
136 sk->sk_state = TCP_CLOSE; 135 sk->sk_state = TCP_CLOSE;
137 sk->sk_err = ECONNRESET;
138 sk->sk_shutdown |= SEND_SHUTDOWN; 136 sk->sk_shutdown |= SEND_SHUTDOWN;
139 137
140 sk->sk_state_change(sk); 138 sk->sk_state_change(sk);
141 release_sock(sk);
142 139
143 /* Close our TSAP. 140 /* Close our TSAP.
144 * If we leave it open, IrLMP put it back into the list of 141 * If we leave it open, IrLMP put it back into the list of
@@ -158,6 +155,7 @@ static void irda_disconnect_indication(void *instance, void *sap,
158 self->tsap = NULL; 155 self->tsap = NULL;
159 } 156 }
160 } 157 }
158 bh_unlock_sock(sk);
161 159
162 /* Note : once we are there, there is not much you want to do 160 /* Note : once we are there, there is not much you want to do
163 * with the socket anymore, apart from closing it. 161 * with the socket anymore, apart from closing it.
@@ -220,7 +218,7 @@ static void irda_connect_confirm(void *instance, void *sap,
220 break; 218 break;
221 default: 219 default:
222 self->max_data_size = irttp_get_max_seg_size(self->tsap); 220 self->max_data_size = irttp_get_max_seg_size(self->tsap);
223 }; 221 }
224 222
225 IRDA_DEBUG(2, "%s(), max_data_size=%d\n", __FUNCTION__, 223 IRDA_DEBUG(2, "%s(), max_data_size=%d\n", __FUNCTION__,
226 self->max_data_size); 224 self->max_data_size);
@@ -283,7 +281,7 @@ static void irda_connect_indication(void *instance, void *sap,
283 break; 281 break;
284 default: 282 default:
285 self->max_data_size = irttp_get_max_seg_size(self->tsap); 283 self->max_data_size = irttp_get_max_seg_size(self->tsap);
286 }; 284 }
287 285
288 IRDA_DEBUG(2, "%s(), max_data_size=%d\n", __FUNCTION__, 286 IRDA_DEBUG(2, "%s(), max_data_size=%d\n", __FUNCTION__,
289 self->max_data_size); 287 self->max_data_size);
@@ -306,8 +304,6 @@ static void irda_connect_response(struct irda_sock *self)
306 304
307 IRDA_DEBUG(2, "%s()\n", __FUNCTION__); 305 IRDA_DEBUG(2, "%s()\n", __FUNCTION__);
308 306
309 IRDA_ASSERT(self != NULL, return;);
310
311 skb = alloc_skb(TTP_MAX_HEADER + TTP_SAR_HEADER, 307 skb = alloc_skb(TTP_MAX_HEADER + TTP_SAR_HEADER,
312 GFP_ATOMIC); 308 GFP_ATOMIC);
313 if (skb == NULL) { 309 if (skb == NULL) {
@@ -337,7 +333,7 @@ static void irda_flow_indication(void *instance, void *sap, LOCAL_FLOW flow)
337 333
338 self = instance; 334 self = instance;
339 sk = instance; 335 sk = instance;
340 IRDA_ASSERT(sk != NULL, return;); 336 BUG_ON(sk == NULL);
341 337
342 switch (flow) { 338 switch (flow) {
343 case FLOW_STOP: 339 case FLOW_STOP:
@@ -449,7 +445,7 @@ static void irda_discovery_timeout(u_long priv)
449 IRDA_DEBUG(2, "%s()\n", __FUNCTION__); 445 IRDA_DEBUG(2, "%s()\n", __FUNCTION__);
450 446
451 self = (struct irda_sock *) priv; 447 self = (struct irda_sock *) priv;
452 IRDA_ASSERT(self != NULL, return;); 448 BUG_ON(self == NULL);
453 449
454 /* Nothing for the caller */ 450 /* Nothing for the caller */
455 self->cachelog = NULL; 451 self->cachelog = NULL;
@@ -546,8 +542,6 @@ static int irda_find_lsap_sel(struct irda_sock *self, char *name)
546{ 542{
547 IRDA_DEBUG(2, "%s(%p, %s)\n", __FUNCTION__, self, name); 543 IRDA_DEBUG(2, "%s(%p, %s)\n", __FUNCTION__, self, name);
548 544
549 IRDA_ASSERT(self != NULL, return -1;);
550
551 if (self->iriap) { 545 if (self->iriap) {
552 IRDA_WARNING("%s(): busy with a previous query\n", 546 IRDA_WARNING("%s(): busy with a previous query\n",
553 __FUNCTION__); 547 __FUNCTION__);
@@ -635,8 +629,6 @@ static int irda_discover_daddr_and_lsap_sel(struct irda_sock *self, char *name)
635 629
636 IRDA_DEBUG(2, "%s(), name=%s\n", __FUNCTION__, name); 630 IRDA_DEBUG(2, "%s(), name=%s\n", __FUNCTION__, name);
637 631
638 IRDA_ASSERT(self != NULL, return -1;);
639
640 /* Ask lmp for the current discovery log 632 /* Ask lmp for the current discovery log
641 * Note : we have to use irlmp_get_discoveries(), as opposed 633 * Note : we have to use irlmp_get_discoveries(), as opposed
642 * to play with the cachelog directly, because while we are 634 * to play with the cachelog directly, because while we are
@@ -784,8 +776,6 @@ static int irda_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
784 struct irda_sock *self = irda_sk(sk); 776 struct irda_sock *self = irda_sk(sk);
785 int err; 777 int err;
786 778
787 IRDA_ASSERT(self != NULL, return -1;);
788
789 IRDA_DEBUG(2, "%s(%p)\n", __FUNCTION__, self); 779 IRDA_DEBUG(2, "%s(%p)\n", __FUNCTION__, self);
790 780
791 if (addr_len != sizeof(struct sockaddr_irda)) 781 if (addr_len != sizeof(struct sockaddr_irda))
@@ -841,8 +831,6 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags)
841 831
842 IRDA_DEBUG(2, "%s()\n", __FUNCTION__); 832 IRDA_DEBUG(2, "%s()\n", __FUNCTION__);
843 833
844 IRDA_ASSERT(self != NULL, return -1;);
845
846 err = irda_create(newsock, sk->sk_protocol); 834 err = irda_create(newsock, sk->sk_protocol);
847 if (err) 835 if (err)
848 return err; 836 return err;
@@ -873,44 +861,28 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags)
873 * calling us, the data is waiting for us ;-) 861 * calling us, the data is waiting for us ;-)
874 * Jean II 862 * Jean II
875 */ 863 */
876 skb = skb_dequeue(&sk->sk_receive_queue); 864 while (1) {
877 if (skb == NULL) { 865 skb = skb_dequeue(&sk->sk_receive_queue);
878 int ret = 0; 866 if (skb)
879 DECLARE_WAITQUEUE(waitq, current); 867 break;
880 868
881 /* Non blocking operation */ 869 /* Non blocking operation */
882 if (flags & O_NONBLOCK) 870 if (flags & O_NONBLOCK)
883 return -EWOULDBLOCK; 871 return -EWOULDBLOCK;
884 872
885 /* The following code is a cut'n'paste of the 873 err = wait_event_interruptible(*(sk->sk_sleep),
886 * wait_event_interruptible() macro. 874 skb_peek(&sk->sk_receive_queue));
887 * We don't us the macro because the condition has 875 if (err)
888 * side effects : we want to make sure that only one 876 return err;
889 * skb get dequeued - Jean II */
890 add_wait_queue(sk->sk_sleep, &waitq);
891 for (;;) {
892 set_current_state(TASK_INTERRUPTIBLE);
893 skb = skb_dequeue(&sk->sk_receive_queue);
894 if (skb != NULL)
895 break;
896 if (!signal_pending(current)) {
897 schedule();
898 continue;
899 }
900 ret = -ERESTARTSYS;
901 break;
902 }
903 current->state = TASK_RUNNING;
904 remove_wait_queue(sk->sk_sleep, &waitq);
905 if(ret)
906 return -ERESTARTSYS;
907 } 877 }
908 878
909 newsk = newsock->sk; 879 newsk = newsock->sk;
880 if (newsk == NULL)
881 return -EIO;
882
910 newsk->sk_state = TCP_ESTABLISHED; 883 newsk->sk_state = TCP_ESTABLISHED;
911 884
912 new = irda_sk(newsk); 885 new = irda_sk(newsk);
913 IRDA_ASSERT(new != NULL, return -1;);
914 886
915 /* Now attach up the new socket */ 887 /* Now attach up the new socket */
916 new->tsap = irttp_dup(self->tsap, new); 888 new->tsap = irttp_dup(self->tsap, new);
@@ -1061,7 +1033,8 @@ static int irda_connect(struct socket *sock, struct sockaddr *uaddr,
1061 1033
1062 if (sk->sk_state != TCP_ESTABLISHED) { 1034 if (sk->sk_state != TCP_ESTABLISHED) {
1063 sock->state = SS_UNCONNECTED; 1035 sock->state = SS_UNCONNECTED;
1064 return sock_error(sk); /* Always set at this point */ 1036 err = sock_error(sk);
1037 return err? err : -ECONNRESET;
1065 } 1038 }
1066 1039
1067 sock->state = SS_CONNECTED; 1040 sock->state = SS_CONNECTED;
@@ -1171,8 +1144,6 @@ static void irda_destroy_socket(struct irda_sock *self)
1171{ 1144{
1172 IRDA_DEBUG(2, "%s(%p)\n", __FUNCTION__, self); 1145 IRDA_DEBUG(2, "%s(%p)\n", __FUNCTION__, self);
1173 1146
1174 IRDA_ASSERT(self != NULL, return;);
1175
1176 /* Unregister with IrLMP */ 1147 /* Unregister with IrLMP */
1177 irlmp_unregister_client(self->ckey); 1148 irlmp_unregister_client(self->ckey);
1178 irlmp_unregister_service(self->skey); 1149 irlmp_unregister_service(self->skey);
@@ -1274,7 +1245,6 @@ static int irda_sendmsg(struct kiocb *iocb, struct socket *sock,
1274 struct sock *sk = sock->sk; 1245 struct sock *sk = sock->sk;
1275 struct irda_sock *self; 1246 struct irda_sock *self;
1276 struct sk_buff *skb; 1247 struct sk_buff *skb;
1277 unsigned char *asmptr;
1278 int err; 1248 int err;
1279 1249
1280 IRDA_DEBUG(4, "%s(), len=%zd\n", __FUNCTION__, len); 1250 IRDA_DEBUG(4, "%s(), len=%zd\n", __FUNCTION__, len);
@@ -1292,7 +1262,6 @@ static int irda_sendmsg(struct kiocb *iocb, struct socket *sock,
1292 return -ENOTCONN; 1262 return -ENOTCONN;
1293 1263
1294 self = irda_sk(sk); 1264 self = irda_sk(sk);
1295 IRDA_ASSERT(self != NULL, return -1;);
1296 1265
1297 /* Check if IrTTP is wants us to slow down */ 1266 /* Check if IrTTP is wants us to slow down */
1298 1267
@@ -1317,9 +1286,9 @@ static int irda_sendmsg(struct kiocb *iocb, struct socket *sock,
1317 return -ENOBUFS; 1286 return -ENOBUFS;
1318 1287
1319 skb_reserve(skb, self->max_header_size + 16); 1288 skb_reserve(skb, self->max_header_size + 16);
1320 1289 skb_reset_transport_header(skb);
1321 asmptr = skb->h.raw = skb_put(skb, len); 1290 skb_put(skb, len);
1322 err = memcpy_fromiovec(asmptr, msg->msg_iov, len); 1291 err = memcpy_fromiovec(skb_transport_header(skb), msg->msg_iov, len);
1323 if (err) { 1292 if (err) {
1324 kfree_skb(skb); 1293 kfree_skb(skb);
1325 return err; 1294 return err;
@@ -1355,16 +1324,16 @@ static int irda_recvmsg_dgram(struct kiocb *iocb, struct socket *sock,
1355 1324
1356 IRDA_DEBUG(4, "%s()\n", __FUNCTION__); 1325 IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
1357 1326
1358 IRDA_ASSERT(self != NULL, return -1;); 1327 if ((err = sock_error(sk)) < 0)
1359 IRDA_ASSERT(!sock_error(sk), return -1;); 1328 return err;
1360 1329
1361 skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, 1330 skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
1362 flags & MSG_DONTWAIT, &err); 1331 flags & MSG_DONTWAIT, &err);
1363 if (!skb) 1332 if (!skb)
1364 return err; 1333 return err;
1365 1334
1366 skb->h.raw = skb->data; 1335 skb_reset_transport_header(skb);
1367 copied = skb->len; 1336 copied = skb->len;
1368 1337
1369 if (copied > size) { 1338 if (copied > size) {
1370 IRDA_DEBUG(2, "%s(), Received truncated frame (%zd < %zd)!\n", 1339 IRDA_DEBUG(2, "%s(), Received truncated frame (%zd < %zd)!\n",
@@ -1403,13 +1372,13 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock,
1403 struct irda_sock *self = irda_sk(sk); 1372 struct irda_sock *self = irda_sk(sk);
1404 int noblock = flags & MSG_DONTWAIT; 1373 int noblock = flags & MSG_DONTWAIT;
1405 size_t copied = 0; 1374 size_t copied = 0;
1406 int target = 1; 1375 int target, err;
1407 DECLARE_WAITQUEUE(waitq, current); 1376 long timeo;
1408 1377
1409 IRDA_DEBUG(3, "%s()\n", __FUNCTION__); 1378 IRDA_DEBUG(3, "%s()\n", __FUNCTION__);
1410 1379
1411 IRDA_ASSERT(self != NULL, return -1;); 1380 if ((err = sock_error(sk)) < 0)
1412 IRDA_ASSERT(!sock_error(sk), return -1;); 1381 return err;
1413 1382
1414 if (sock->flags & __SO_ACCEPTCON) 1383 if (sock->flags & __SO_ACCEPTCON)
1415 return(-EINVAL); 1384 return(-EINVAL);
@@ -1417,8 +1386,8 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock,
1417 if (flags & MSG_OOB) 1386 if (flags & MSG_OOB)
1418 return -EOPNOTSUPP; 1387 return -EOPNOTSUPP;
1419 1388
1420 if (flags & MSG_WAITALL) 1389 target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
1421 target = size; 1390 timeo = sock_rcvtimeo(sk, noblock);
1422 1391
1423 msg->msg_namelen = 0; 1392 msg->msg_namelen = 0;
1424 1393
@@ -1426,19 +1395,14 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock,
1426 int chunk; 1395 int chunk;
1427 struct sk_buff *skb = skb_dequeue(&sk->sk_receive_queue); 1396 struct sk_buff *skb = skb_dequeue(&sk->sk_receive_queue);
1428 1397
1429 if (skb==NULL) { 1398 if (skb == NULL) {
1399 DEFINE_WAIT(wait);
1430 int ret = 0; 1400 int ret = 0;
1431 1401
1432 if (copied >= target) 1402 if (copied >= target)
1433 break; 1403 break;
1434 1404
1435 /* The following code is a cut'n'paste of the 1405 prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1436 * wait_event_interruptible() macro.
1437 * We don't us the macro because the test condition
1438 * is messy. - Jean II */
1439 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1440 add_wait_queue(sk->sk_sleep, &waitq);
1441 set_current_state(TASK_INTERRUPTIBLE);
1442 1406
1443 /* 1407 /*
1444 * POSIX 1003.1g mandates this order. 1408 * POSIX 1003.1g mandates this order.
@@ -1451,17 +1415,17 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock,
1451 else if (noblock) 1415 else if (noblock)
1452 ret = -EAGAIN; 1416 ret = -EAGAIN;
1453 else if (signal_pending(current)) 1417 else if (signal_pending(current))
1454 ret = -ERESTARTSYS; 1418 ret = sock_intr_errno(timeo);
1419 else if (sk->sk_state != TCP_ESTABLISHED)
1420 ret = -ENOTCONN;
1455 else if (skb_peek(&sk->sk_receive_queue) == NULL) 1421 else if (skb_peek(&sk->sk_receive_queue) == NULL)
1456 /* Wait process until data arrives */ 1422 /* Wait process until data arrives */
1457 schedule(); 1423 schedule();
1458 1424
1459 current->state = TASK_RUNNING; 1425 finish_wait(sk->sk_sleep, &wait);
1460 remove_wait_queue(sk->sk_sleep, &waitq);
1461 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1462 1426
1463 if(ret) 1427 if (ret)
1464 return(ret); 1428 return ret;
1465 if (sk->sk_shutdown & RCV_SHUTDOWN) 1429 if (sk->sk_shutdown & RCV_SHUTDOWN)
1466 break; 1430 break;
1467 1431
@@ -1530,7 +1494,6 @@ static int irda_sendmsg_dgram(struct kiocb *iocb, struct socket *sock,
1530 struct sock *sk = sock->sk; 1494 struct sock *sk = sock->sk;
1531 struct irda_sock *self; 1495 struct irda_sock *self;
1532 struct sk_buff *skb; 1496 struct sk_buff *skb;
1533 unsigned char *asmptr;
1534 int err; 1497 int err;
1535 1498
1536 IRDA_DEBUG(4, "%s(), len=%zd\n", __FUNCTION__, len); 1499 IRDA_DEBUG(4, "%s(), len=%zd\n", __FUNCTION__, len);
@@ -1547,7 +1510,6 @@ static int irda_sendmsg_dgram(struct kiocb *iocb, struct socket *sock,
1547 return -ENOTCONN; 1510 return -ENOTCONN;
1548 1511
1549 self = irda_sk(sk); 1512 self = irda_sk(sk);
1550 IRDA_ASSERT(self != NULL, return -1;);
1551 1513
1552 /* 1514 /*
1553 * Check that we don't send out too big frames. This is an unreliable 1515 * Check that we don't send out too big frames. This is an unreliable
@@ -1566,10 +1528,11 @@ static int irda_sendmsg_dgram(struct kiocb *iocb, struct socket *sock,
1566 return -ENOBUFS; 1528 return -ENOBUFS;
1567 1529
1568 skb_reserve(skb, self->max_header_size); 1530 skb_reserve(skb, self->max_header_size);
1531 skb_reset_transport_header(skb);
1569 1532
1570 IRDA_DEBUG(4, "%s(), appending user data\n", __FUNCTION__); 1533 IRDA_DEBUG(4, "%s(), appending user data\n", __FUNCTION__);
1571 asmptr = skb->h.raw = skb_put(skb, len); 1534 skb_put(skb, len);
1572 err = memcpy_fromiovec(asmptr, msg->msg_iov, len); 1535 err = memcpy_fromiovec(skb_transport_header(skb), msg->msg_iov, len);
1573 if (err) { 1536 if (err) {
1574 kfree_skb(skb); 1537 kfree_skb(skb);
1575 return err; 1538 return err;
@@ -1602,7 +1565,6 @@ static int irda_sendmsg_ultra(struct kiocb *iocb, struct socket *sock,
1602 __u8 pid = 0; 1565 __u8 pid = 0;
1603 int bound = 0; 1566 int bound = 0;
1604 struct sk_buff *skb; 1567 struct sk_buff *skb;
1605 unsigned char *asmptr;
1606 int err; 1568 int err;
1607 1569
1608 IRDA_DEBUG(4, "%s(), len=%zd\n", __FUNCTION__, len); 1570 IRDA_DEBUG(4, "%s(), len=%zd\n", __FUNCTION__, len);
@@ -1616,7 +1578,6 @@ static int irda_sendmsg_ultra(struct kiocb *iocb, struct socket *sock,
1616 } 1578 }
1617 1579
1618 self = irda_sk(sk); 1580 self = irda_sk(sk);
1619 IRDA_ASSERT(self != NULL, return -1;);
1620 1581
1621 /* Check if an address was specified with sendto. Jean II */ 1582 /* Check if an address was specified with sendto. Jean II */
1622 if (msg->msg_name) { 1583 if (msg->msg_name) {
@@ -1662,10 +1623,11 @@ static int irda_sendmsg_ultra(struct kiocb *iocb, struct socket *sock,
1662 return -ENOBUFS; 1623 return -ENOBUFS;
1663 1624
1664 skb_reserve(skb, self->max_header_size); 1625 skb_reserve(skb, self->max_header_size);
1626 skb_reset_transport_header(skb);
1665 1627
1666 IRDA_DEBUG(4, "%s(), appending user data\n", __FUNCTION__); 1628 IRDA_DEBUG(4, "%s(), appending user data\n", __FUNCTION__);
1667 asmptr = skb->h.raw = skb_put(skb, len); 1629 skb_put(skb, len);
1668 err = memcpy_fromiovec(asmptr, msg->msg_iov, len); 1630 err = memcpy_fromiovec(skb_transport_header(skb), msg->msg_iov, len);
1669 if (err) { 1631 if (err) {
1670 kfree_skb(skb); 1632 kfree_skb(skb);
1671 return err; 1633 return err;
@@ -1689,8 +1651,6 @@ static int irda_shutdown(struct socket *sock, int how)
1689 struct sock *sk = sock->sk; 1651 struct sock *sk = sock->sk;
1690 struct irda_sock *self = irda_sk(sk); 1652 struct irda_sock *self = irda_sk(sk);
1691 1653
1692 IRDA_ASSERT(self != NULL, return -1;);
1693
1694 IRDA_DEBUG(1, "%s(%p)\n", __FUNCTION__, self); 1654 IRDA_DEBUG(1, "%s(%p)\n", __FUNCTION__, self);
1695 1655
1696 sk->sk_state = TCP_CLOSE; 1656 sk->sk_state = TCP_CLOSE;
@@ -1863,8 +1823,6 @@ static int irda_setsockopt(struct socket *sock, int level, int optname,
1863 struct ias_attrib * ias_attr; /* Attribute in IAS object */ 1823 struct ias_attrib * ias_attr; /* Attribute in IAS object */
1864 int opt; 1824 int opt;
1865 1825
1866 IRDA_ASSERT(self != NULL, return -1;);
1867
1868 IRDA_DEBUG(2, "%s(%p)\n", __FUNCTION__, self); 1826 IRDA_DEBUG(2, "%s(%p)\n", __FUNCTION__, self);
1869 1827
1870 if (level != SOL_IRLMP) 1828 if (level != SOL_IRLMP)
diff --git a/net/irda/ircomm/ircomm_param.c b/net/irda/ircomm/ircomm_param.c
index 01d7c9c7b3b4..e5e4792a0314 100644
--- a/net/irda/ircomm/ircomm_param.c
+++ b/net/irda/ircomm/ircomm_param.c
@@ -133,8 +133,8 @@ int ircomm_param_request(struct ircomm_tty_cb *self, __u8 pi, int flush)
133 * Inserting is a little bit tricky since we don't know how much 133 * Inserting is a little bit tricky since we don't know how much
134 * room we will need. But this should hopefully work OK 134 * room we will need. But this should hopefully work OK
135 */ 135 */
136 count = irda_param_insert(self, pi, skb->tail, skb_tailroom(skb), 136 count = irda_param_insert(self, pi, skb_tail_pointer(skb),
137 &ircomm_param_info); 137 skb_tailroom(skb), &ircomm_param_info);
138 if (count < 0) { 138 if (count < 0) {
139 IRDA_WARNING("%s(), no room for parameter!\n", __FUNCTION__); 139 IRDA_WARNING("%s(), no room for parameter!\n", __FUNCTION__);
140 spin_unlock_irqrestore(&self->spinlock, flags); 140 spin_unlock_irqrestore(&self->spinlock, flags);
diff --git a/net/irda/irda_device.c b/net/irda/irda_device.c
index e717801b38f9..7b5def1ea633 100644
--- a/net/irda/irda_device.c
+++ b/net/irda/irda_device.c
@@ -375,7 +375,7 @@ EXPORT_SYMBOL(alloc_irdadev);
375dongle_t *irda_device_dongle_init(struct net_device *dev, int type) 375dongle_t *irda_device_dongle_init(struct net_device *dev, int type)
376{ 376{
377 struct dongle_reg *reg; 377 struct dongle_reg *reg;
378 dongle_t *dongle = NULL; 378 dongle_t *dongle = kzalloc(sizeof(dongle_t), GFP_KERNEL);
379 379
380 might_sleep(); 380 might_sleep();
381 381
@@ -397,19 +397,14 @@ dongle_t *irda_device_dongle_init(struct net_device *dev, int type)
397 if (!reg || !try_module_get(reg->owner) ) { 397 if (!reg || !try_module_get(reg->owner) ) {
398 IRDA_ERROR("IrDA: Unable to find requested dongle type %x\n", 398 IRDA_ERROR("IrDA: Unable to find requested dongle type %x\n",
399 type); 399 type);
400 goto out; 400 kfree(dongle);
401 dongle = NULL;
402 }
403 if (dongle) {
404 /* Bind the registration info to this particular instance */
405 dongle->issue = reg;
406 dongle->dev = dev;
401 } 407 }
402
403 /* Allocate dongle info for this instance */
404 dongle = kzalloc(sizeof(dongle_t), GFP_KERNEL);
405 if (!dongle)
406 goto out;
407
408 /* Bind the registration info to this particular instance */
409 dongle->issue = reg;
410 dongle->dev = dev;
411
412 out:
413 spin_unlock(&dongles->hb_spinlock); 408 spin_unlock(&dongles->hb_spinlock);
414 return dongle; 409 return dongle;
415} 410}
diff --git a/net/irda/irlan/irlan_common.c b/net/irda/irlan/irlan_common.c
index fcf9d6599628..ed69773b0f8e 100644
--- a/net/irda/irlan/irlan_common.c
+++ b/net/irda/irlan/irlan_common.c
@@ -1039,7 +1039,7 @@ static int __irlan_insert_param(struct sk_buff *skb, char *param, int type,
1039 } 1039 }
1040 1040
1041 /* Insert at end of sk-buffer */ 1041 /* Insert at end of sk-buffer */
1042 frame = skb->tail; 1042 frame = skb_tail_pointer(skb);
1043 1043
1044 /* Make space for data */ 1044 /* Make space for data */
1045 if (skb_tailroom(skb) < (param_len+value_len+3)) { 1045 if (skb_tailroom(skb) < (param_len+value_len+3)) {
diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c
index 672ab3f69033..c421521c0a99 100644
--- a/net/irda/irlan/irlan_eth.c
+++ b/net/irda/irlan/irlan_eth.c
@@ -234,8 +234,7 @@ int irlan_eth_receive(void *instance, void *sap, struct sk_buff *skb)
234 * might have been previously set by the low level IrDA network 234 * might have been previously set by the low level IrDA network
235 * device driver 235 * device driver
236 */ 236 */
237 skb->dev = self->dev; 237 skb->protocol = eth_type_trans(skb, self->dev); /* Remove eth header */
238 skb->protocol=eth_type_trans(skb, skb->dev); /* Remove eth header */
239 238
240 self->stats.rx_packets++; 239 self->stats.rx_packets++;
241 self->stats.rx_bytes += skb->len; 240 self->stats.rx_bytes += skb->len;
diff --git a/net/irda/irlap_event.c b/net/irda/irlap_event.c
index 7b6433fe1dc2..0b02073ffdf3 100644
--- a/net/irda/irlap_event.c
+++ b/net/irda/irlap_event.c
@@ -590,7 +590,7 @@ static int irlap_state_query(struct irlap_cb *self, IRLAP_EVENT event,
590 if (!self->discovery_log) { 590 if (!self->discovery_log) {
591 IRDA_WARNING("%s: discovery log is gone! " 591 IRDA_WARNING("%s: discovery log is gone! "
592 "maybe the discovery timeout has been set" 592 "maybe the discovery timeout has been set"
593 " to short?\n", __FUNCTION__); 593 " too short?\n", __FUNCTION__);
594 break; 594 break;
595 } 595 }
596 hashbin_insert(self->discovery_log, 596 hashbin_insert(self->discovery_log,
diff --git a/net/irda/irlap_frame.c b/net/irda/irlap_frame.c
index 0b04603e9c47..3c5a68e36414 100644
--- a/net/irda/irlap_frame.c
+++ b/net/irda/irlap_frame.c
@@ -93,7 +93,9 @@ void irlap_queue_xmit(struct irlap_cb *self, struct sk_buff *skb)
93{ 93{
94 /* Some common init stuff */ 94 /* Some common init stuff */
95 skb->dev = self->netdev; 95 skb->dev = self->netdev;
96 skb->h.raw = skb->nh.raw = skb->mac.raw = skb->data; 96 skb_reset_mac_header(skb);
97 skb_reset_network_header(skb);
98 skb_reset_transport_header(skb);
97 skb->protocol = htons(ETH_P_IRDA); 99 skb->protocol = htons(ETH_P_IRDA);
98 skb->priority = TC_PRIO_BESTEFFORT; 100 skb->priority = TC_PRIO_BESTEFFORT;
99 101
@@ -411,7 +413,7 @@ static void irlap_recv_discovery_xid_rsp(struct irlap_cb *self,
411 IRDA_ASSERT(self->magic == LAP_MAGIC, return;); 413 IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
412 414
413 if (!pskb_may_pull(skb, sizeof(struct xid_frame))) { 415 if (!pskb_may_pull(skb, sizeof(struct xid_frame))) {
414 IRDA_ERROR("%s: frame to short!\n", __FUNCTION__); 416 IRDA_ERROR("%s: frame too short!\n", __FUNCTION__);
415 return; 417 return;
416 } 418 }
417 419
@@ -482,7 +484,7 @@ static void irlap_recv_discovery_xid_cmd(struct irlap_cb *self,
482 char *text; 484 char *text;
483 485
484 if (!pskb_may_pull(skb, sizeof(struct xid_frame))) { 486 if (!pskb_may_pull(skb, sizeof(struct xid_frame))) {
485 IRDA_ERROR("%s: frame to short!\n", __FUNCTION__); 487 IRDA_ERROR("%s: frame too short!\n", __FUNCTION__);
486 return; 488 return;
487 } 489 }
488 490
@@ -526,7 +528,7 @@ static void irlap_recv_discovery_xid_cmd(struct irlap_cb *self,
526 /* Check if things are sane at this point... */ 528 /* Check if things are sane at this point... */
527 if((discovery_info == NULL) || 529 if((discovery_info == NULL) ||
528 !pskb_may_pull(skb, 3)) { 530 !pskb_may_pull(skb, 3)) {
529 IRDA_ERROR("%s: discovery frame to short!\n", 531 IRDA_ERROR("%s: discovery frame too short!\n",
530 __FUNCTION__); 532 __FUNCTION__);
531 return; 533 return;
532 } 534 }
@@ -1171,7 +1173,7 @@ static void irlap_recv_frmr_frame(struct irlap_cb *self, struct sk_buff *skb,
1171 IRDA_ASSERT(info != NULL, return;); 1173 IRDA_ASSERT(info != NULL, return;);
1172 1174
1173 if (!pskb_may_pull(skb, 4)) { 1175 if (!pskb_may_pull(skb, 4)) {
1174 IRDA_ERROR("%s: frame to short!\n", __FUNCTION__); 1176 IRDA_ERROR("%s: frame too short!\n", __FUNCTION__);
1175 return; 1177 return;
1176 } 1178 }
1177 1179
@@ -1260,7 +1262,7 @@ static void irlap_recv_test_frame(struct irlap_cb *self, struct sk_buff *skb,
1260 IRDA_DEBUG(2, "%s()\n", __FUNCTION__); 1262 IRDA_DEBUG(2, "%s()\n", __FUNCTION__);
1261 1263
1262 if (!pskb_may_pull(skb, sizeof(*frame))) { 1264 if (!pskb_may_pull(skb, sizeof(*frame))) {
1263 IRDA_ERROR("%s: frame to short!\n", __FUNCTION__); 1265 IRDA_ERROR("%s: frame too short!\n", __FUNCTION__);
1264 return; 1266 return;
1265 } 1267 }
1266 frame = (struct test_frame *) skb->data; 1268 frame = (struct test_frame *) skb->data;
@@ -1268,7 +1270,7 @@ static void irlap_recv_test_frame(struct irlap_cb *self, struct sk_buff *skb,
1268 /* Broadcast frames must carry saddr and daddr fields */ 1270 /* Broadcast frames must carry saddr and daddr fields */
1269 if (info->caddr == CBROADCAST) { 1271 if (info->caddr == CBROADCAST) {
1270 if (skb->len < sizeof(struct test_frame)) { 1272 if (skb->len < sizeof(struct test_frame)) {
1271 IRDA_DEBUG(0, "%s() test frame to short!\n", 1273 IRDA_DEBUG(0, "%s() test frame too short!\n",
1272 __FUNCTION__); 1274 __FUNCTION__);
1273 return; 1275 return;
1274 } 1276 }
@@ -1334,7 +1336,7 @@ int irlap_driver_rcv(struct sk_buff *skb, struct net_device *dev,
1334 1336
1335 /* Check if frame is large enough for parsing */ 1337 /* Check if frame is large enough for parsing */
1336 if (!pskb_may_pull(skb, 2)) { 1338 if (!pskb_may_pull(skb, 2)) {
1337 IRDA_ERROR("%s: frame to short!\n", __FUNCTION__); 1339 IRDA_ERROR("%s: frame too short!\n", __FUNCTION__);
1338 dev_kfree_skb(skb); 1340 dev_kfree_skb(skb);
1339 return -1; 1341 return -1;
1340 } 1342 }
diff --git a/net/irda/irqueue.c b/net/irda/irqueue.c
index 92662330dbcf..d058b467f9e4 100644
--- a/net/irda/irqueue.c
+++ b/net/irda/irqueue.c
@@ -384,6 +384,9 @@ EXPORT_SYMBOL(hashbin_new);
384 * for deallocating this structure if it's complex. If not the user can 384 * for deallocating this structure if it's complex. If not the user can
385 * just supply kfree, which should take care of the job. 385 * just supply kfree, which should take care of the job.
386 */ 386 */
387#ifdef CONFIG_LOCKDEP
388static int hashbin_lock_depth = 0;
389#endif
387int hashbin_delete( hashbin_t* hashbin, FREE_FUNC free_func) 390int hashbin_delete( hashbin_t* hashbin, FREE_FUNC free_func)
388{ 391{
389 irda_queue_t* queue; 392 irda_queue_t* queue;
@@ -395,7 +398,8 @@ int hashbin_delete( hashbin_t* hashbin, FREE_FUNC free_func)
395 398
396 /* Synchronize */ 399 /* Synchronize */
397 if ( hashbin->hb_type & HB_LOCK ) { 400 if ( hashbin->hb_type & HB_LOCK ) {
398 spin_lock_irqsave(&hashbin->hb_spinlock, flags); 401 spin_lock_irqsave_nested(&hashbin->hb_spinlock, flags,
402 hashbin_lock_depth++);
399 } 403 }
400 404
401 /* 405 /*
@@ -419,6 +423,9 @@ int hashbin_delete( hashbin_t* hashbin, FREE_FUNC free_func)
419 /* Release lock */ 423 /* Release lock */
420 if ( hashbin->hb_type & HB_LOCK) { 424 if ( hashbin->hb_type & HB_LOCK) {
421 spin_unlock_irqrestore(&hashbin->hb_spinlock, flags); 425 spin_unlock_irqrestore(&hashbin->hb_spinlock, flags);
426#ifdef CONFIG_LOCKDEP
427 hashbin_lock_depth--;
428#endif
422 } 429 }
423 430
424 /* 431 /*
diff --git a/net/irda/irttp.c b/net/irda/irttp.c
index da3f2bc1b6f6..7069e4a58257 100644
--- a/net/irda/irttp.c
+++ b/net/irda/irttp.c
@@ -256,7 +256,7 @@ static struct sk_buff *irttp_reassemble_skb(struct tsap_cb *self)
256 * Copy all fragments to a new buffer 256 * Copy all fragments to a new buffer
257 */ 257 */
258 while ((frag = skb_dequeue(&self->rx_fragments)) != NULL) { 258 while ((frag = skb_dequeue(&self->rx_fragments)) != NULL) {
259 memcpy(skb->data+n, frag->data, frag->len); 259 skb_copy_to_linear_data_offset(skb, n, frag->data, frag->len);
260 n += frag->len; 260 n += frag->len;
261 261
262 dev_kfree_skb(frag); 262 dev_kfree_skb(frag);
@@ -314,8 +314,8 @@ static inline void irttp_fragment_skb(struct tsap_cb *self,
314 skb_reserve(frag, self->max_header_size); 314 skb_reserve(frag, self->max_header_size);
315 315
316 /* Copy data from the original skb into this fragment. */ 316 /* Copy data from the original skb into this fragment. */
317 memcpy(skb_put(frag, self->max_seg_size), skb->data, 317 skb_copy_from_linear_data(skb, skb_put(frag, self->max_seg_size),
318 self->max_seg_size); 318 self->max_seg_size);
319 319
320 /* Insert TTP header, with the more bit set */ 320 /* Insert TTP header, with the more bit set */
321 frame = skb_push(frag, TTP_HEADER); 321 frame = skb_push(frag, TTP_HEADER);
@@ -551,7 +551,7 @@ int irttp_udata_request(struct tsap_cb *self, struct sk_buff *skb)
551 } 551 }
552 552
553 if (skb->len > self->max_seg_size) { 553 if (skb->len > self->max_seg_size) {
554 IRDA_DEBUG(1, "%s(), UData is to large for IrLAP!\n", 554 IRDA_DEBUG(1, "%s(), UData is too large for IrLAP!\n",
555 __FUNCTION__); 555 __FUNCTION__);
556 goto err; 556 goto err;
557 } 557 }
@@ -598,7 +598,7 @@ int irttp_data_request(struct tsap_cb *self, struct sk_buff *skb)
598 * inside an IrLAP frame 598 * inside an IrLAP frame
599 */ 599 */
600 if ((self->tx_max_sdu_size == 0) && (skb->len > self->max_seg_size)) { 600 if ((self->tx_max_sdu_size == 0) && (skb->len > self->max_seg_size)) {
601 IRDA_ERROR("%s: SAR disabled, and data is to large for IrLAP!\n", 601 IRDA_ERROR("%s: SAR disabled, and data is too large for IrLAP!\n",
602 __FUNCTION__); 602 __FUNCTION__);
603 ret = -EMSGSIZE; 603 ret = -EMSGSIZE;
604 goto err; 604 goto err;
diff --git a/net/irda/parameters.c b/net/irda/parameters.c
index 75a72d203b01..2627dad7cd87 100644
--- a/net/irda/parameters.c
+++ b/net/irda/parameters.c
@@ -160,7 +160,7 @@ static int irda_insert_integer(void *self, __u8 *buf, int len, __u8 pi,
160 } 160 }
161 /* Check if buffer is long enough for insertion */ 161 /* Check if buffer is long enough for insertion */
162 if (len < (2+p.pl)) { 162 if (len < (2+p.pl)) {
163 IRDA_WARNING("%s: buffer to short for insertion!\n", 163 IRDA_WARNING("%s: buffer too short for insertion!\n",
164 __FUNCTION__); 164 __FUNCTION__);
165 return -1; 165 return -1;
166 } 166 }
@@ -216,7 +216,7 @@ static int irda_extract_integer(void *self, __u8 *buf, int len, __u8 pi,
216 216
217 /* Check if buffer is long enough for parsing */ 217 /* Check if buffer is long enough for parsing */
218 if (len < (2+p.pl)) { 218 if (len < (2+p.pl)) {
219 IRDA_WARNING("%s: buffer to short for parsing! " 219 IRDA_WARNING("%s: buffer too short for parsing! "
220 "Need %d bytes, but len is only %d\n", 220 "Need %d bytes, but len is only %d\n",
221 __FUNCTION__, p.pl, len); 221 __FUNCTION__, p.pl, len);
222 return -1; 222 return -1;
@@ -304,7 +304,7 @@ static int irda_extract_string(void *self, __u8 *buf, int len, __u8 pi,
304 304
305 /* Check if buffer is long enough for parsing */ 305 /* Check if buffer is long enough for parsing */
306 if (len < (2+p.pl)) { 306 if (len < (2+p.pl)) {
307 IRDA_WARNING("%s: buffer to short for parsing! " 307 IRDA_WARNING("%s: buffer too short for parsing! "
308 "Need %d bytes, but len is only %d\n", 308 "Need %d bytes, but len is only %d\n",
309 __FUNCTION__, p.pl, len); 309 __FUNCTION__, p.pl, len);
310 return -1; 310 return -1;
@@ -343,7 +343,7 @@ static int irda_extract_octseq(void *self, __u8 *buf, int len, __u8 pi,
343 343
344 /* Check if buffer is long enough for parsing */ 344 /* Check if buffer is long enough for parsing */
345 if (len < (2+p.pl)) { 345 if (len < (2+p.pl)) {
346 IRDA_WARNING("%s: buffer to short for parsing! " 346 IRDA_WARNING("%s: buffer too short for parsing! "
347 "Need %d bytes, but len is only %d\n", 347 "Need %d bytes, but len is only %d\n",
348 __FUNCTION__, p.pl, len); 348 __FUNCTION__, p.pl, len);
349 return -1; 349 return -1;
diff --git a/net/irda/qos.c b/net/irda/qos.c
index 349012c926b7..aeb18cf1dcae 100644
--- a/net/irda/qos.c
+++ b/net/irda/qos.c
@@ -469,49 +469,49 @@ int irlap_insert_qos_negotiation_params(struct irlap_cb *self,
469 int ret; 469 int ret;
470 470
471 /* Insert data rate */ 471 /* Insert data rate */
472 ret = irda_param_insert(self, PI_BAUD_RATE, skb->tail, 472 ret = irda_param_insert(self, PI_BAUD_RATE, skb_tail_pointer(skb),
473 skb_tailroom(skb), &irlap_param_info); 473 skb_tailroom(skb), &irlap_param_info);
474 if (ret < 0) 474 if (ret < 0)
475 return ret; 475 return ret;
476 skb_put(skb, ret); 476 skb_put(skb, ret);
477 477
478 /* Insert max turnaround time */ 478 /* Insert max turnaround time */
479 ret = irda_param_insert(self, PI_MAX_TURN_TIME, skb->tail, 479 ret = irda_param_insert(self, PI_MAX_TURN_TIME, skb_tail_pointer(skb),
480 skb_tailroom(skb), &irlap_param_info); 480 skb_tailroom(skb), &irlap_param_info);
481 if (ret < 0) 481 if (ret < 0)
482 return ret; 482 return ret;
483 skb_put(skb, ret); 483 skb_put(skb, ret);
484 484
485 /* Insert data size */ 485 /* Insert data size */
486 ret = irda_param_insert(self, PI_DATA_SIZE, skb->tail, 486 ret = irda_param_insert(self, PI_DATA_SIZE, skb_tail_pointer(skb),
487 skb_tailroom(skb), &irlap_param_info); 487 skb_tailroom(skb), &irlap_param_info);
488 if (ret < 0) 488 if (ret < 0)
489 return ret; 489 return ret;
490 skb_put(skb, ret); 490 skb_put(skb, ret);
491 491
492 /* Insert window size */ 492 /* Insert window size */
493 ret = irda_param_insert(self, PI_WINDOW_SIZE, skb->tail, 493 ret = irda_param_insert(self, PI_WINDOW_SIZE, skb_tail_pointer(skb),
494 skb_tailroom(skb), &irlap_param_info); 494 skb_tailroom(skb), &irlap_param_info);
495 if (ret < 0) 495 if (ret < 0)
496 return ret; 496 return ret;
497 skb_put(skb, ret); 497 skb_put(skb, ret);
498 498
499 /* Insert additional BOFs */ 499 /* Insert additional BOFs */
500 ret = irda_param_insert(self, PI_ADD_BOFS, skb->tail, 500 ret = irda_param_insert(self, PI_ADD_BOFS, skb_tail_pointer(skb),
501 skb_tailroom(skb), &irlap_param_info); 501 skb_tailroom(skb), &irlap_param_info);
502 if (ret < 0) 502 if (ret < 0)
503 return ret; 503 return ret;
504 skb_put(skb, ret); 504 skb_put(skb, ret);
505 505
506 /* Insert minimum turnaround time */ 506 /* Insert minimum turnaround time */
507 ret = irda_param_insert(self, PI_MIN_TURN_TIME, skb->tail, 507 ret = irda_param_insert(self, PI_MIN_TURN_TIME, skb_tail_pointer(skb),
508 skb_tailroom(skb), &irlap_param_info); 508 skb_tailroom(skb), &irlap_param_info);
509 if (ret < 0) 509 if (ret < 0)
510 return ret; 510 return ret;
511 skb_put(skb, ret); 511 skb_put(skb, ret);
512 512
513 /* Insert link disconnect/threshold time */ 513 /* Insert link disconnect/threshold time */
514 ret = irda_param_insert(self, PI_LINK_DISC, skb->tail, 514 ret = irda_param_insert(self, PI_LINK_DISC, skb_tail_pointer(skb),
515 skb_tailroom(skb), &irlap_param_info); 515 skb_tailroom(skb), &irlap_param_info);
516 if (ret < 0) 516 if (ret < 0)
517 return ret; 517 return ret;
diff --git a/net/irda/wrapper.c b/net/irda/wrapper.c
index 5abfb71aae8d..a7a7f191f1a8 100644
--- a/net/irda/wrapper.c
+++ b/net/irda/wrapper.c
@@ -239,7 +239,8 @@ async_bump(struct net_device *dev,
239 239
240 if(docopy) { 240 if(docopy) {
241 /* Copy data without CRC (lenght already checked) */ 241 /* Copy data without CRC (lenght already checked) */
242 memcpy(newskb->data, rx_buff->data, rx_buff->len - 2); 242 skb_copy_to_linear_data(newskb, rx_buff->data,
243 rx_buff->len - 2);
243 /* Deliver this skb */ 244 /* Deliver this skb */
244 dataskb = newskb; 245 dataskb = newskb;
245 } else { 246 } else {
@@ -256,7 +257,7 @@ async_bump(struct net_device *dev,
256 257
257 /* Feed it to IrLAP layer */ 258 /* Feed it to IrLAP layer */
258 dataskb->dev = dev; 259 dataskb->dev = dev;
259 dataskb->mac.raw = dataskb->data; 260 skb_reset_mac_header(dataskb);
260 dataskb->protocol = htons(ETH_P_IRDA); 261 dataskb->protocol = htons(ETH_P_IRDA);
261 262
262 netif_rx(dataskb); 263 netif_rx(dataskb);
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index acc94214bde6..e84c924a81ee 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -181,7 +181,7 @@ static void iucv_sock_close(struct sock *sk)
181 default: 181 default:
182 sock_set_flag(sk, SOCK_ZAPPED); 182 sock_set_flag(sk, SOCK_ZAPPED);
183 break; 183 break;
184 }; 184 }
185 185
186 release_sock(sk); 186 release_sock(sk);
187 iucv_sock_kill(sk); 187 iucv_sock_kill(sk);
@@ -953,8 +953,8 @@ static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg)
953 return; 953 return;
954 } 954 }
955 955
956 skb->h.raw = skb->data; 956 skb_reset_transport_header(skb);
957 skb->nh.raw = skb->data; 957 skb_reset_network_header(skb);
958 skb->len = msg->length; 958 skb->len = msg->length;
959 } 959 }
960 960
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index 1b10d576f222..60f293842a39 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -519,7 +519,6 @@ static void iucv_disable(void)
519 kfree(iucv_path_table); 519 kfree(iucv_path_table);
520} 520}
521 521
522#ifdef CONFIG_HOTPLUG_CPU
523static int __cpuinit iucv_cpu_notify(struct notifier_block *self, 522static int __cpuinit iucv_cpu_notify(struct notifier_block *self,
524 unsigned long action, void *hcpu) 523 unsigned long action, void *hcpu)
525{ 524{
@@ -565,7 +564,6 @@ static int __cpuinit iucv_cpu_notify(struct notifier_block *self,
565static struct notifier_block iucv_cpu_notifier = { 564static struct notifier_block iucv_cpu_notifier = {
566 .notifier_call = iucv_cpu_notify, 565 .notifier_call = iucv_cpu_notify,
567}; 566};
568#endif
569 567
570/** 568/**
571 * iucv_sever_pathid 569 * iucv_sever_pathid
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 345019345f09..a99444142dc7 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -379,7 +379,7 @@ static int verify_address_len(void *p)
379 */ 379 */
380 return -EINVAL; 380 return -EINVAL;
381 break; 381 break;
382 }; 382 }
383 383
384 return 0; 384 return 0;
385} 385}
@@ -3667,7 +3667,7 @@ static int pfkey_recvmsg(struct kiocb *kiocb,
3667 copied = len; 3667 copied = len;
3668 } 3668 }
3669 3669
3670 skb->h.raw = skb->data; 3670 skb_reset_transport_header(skb);
3671 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); 3671 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
3672 if (err) 3672 if (err)
3673 goto out_free; 3673 goto out_free;
diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c
index b3f65d1e80b1..099ed8fec145 100644
--- a/net/llc/llc_input.c
+++ b/net/llc/llc_input.c
@@ -112,7 +112,7 @@ static inline int llc_fixup_skb(struct sk_buff *skb)
112 if (unlikely(!pskb_may_pull(skb, llc_len))) 112 if (unlikely(!pskb_may_pull(skb, llc_len)))
113 return 0; 113 return 0;
114 114
115 skb->h.raw += llc_len; 115 skb->transport_header += llc_len;
116 skb_pull(skb, llc_len); 116 skb_pull(skb, llc_len);
117 if (skb->protocol == htons(ETH_P_802_2)) { 117 if (skb->protocol == htons(ETH_P_802_2)) {
118 __be16 pdulen = eth_hdr(skb)->h_proto; 118 __be16 pdulen = eth_hdr(skb)->h_proto;
diff --git a/net/llc/llc_output.c b/net/llc/llc_output.c
index f4291f349e92..754f4fedc852 100644
--- a/net/llc/llc_output.c
+++ b/net/llc/llc_output.c
@@ -41,7 +41,8 @@ int llc_mac_hdr_init(struct sk_buff *skb,
41 struct net_device *dev = skb->dev; 41 struct net_device *dev = skb->dev;
42 struct trh_hdr *trh; 42 struct trh_hdr *trh;
43 43
44 skb->mac.raw = skb_push(skb, sizeof(*trh)); 44 skb_push(skb, sizeof(*trh));
45 skb_reset_mac_header(skb);
45 trh = tr_hdr(skb); 46 trh = tr_hdr(skb);
46 trh->ac = AC; 47 trh->ac = AC;
47 trh->fc = LLC_FRAME; 48 trh->fc = LLC_FRAME;
@@ -52,7 +53,7 @@ int llc_mac_hdr_init(struct sk_buff *skb,
52 if (da) { 53 if (da) {
53 memcpy(trh->daddr, da, dev->addr_len); 54 memcpy(trh->daddr, da, dev->addr_len);
54 tr_source_route(skb, trh, dev); 55 tr_source_route(skb, trh, dev);
55 skb->mac.raw = skb->data; 56 skb_reset_mac_header(skb);
56 } 57 }
57 break; 58 break;
58 } 59 }
@@ -62,7 +63,8 @@ int llc_mac_hdr_init(struct sk_buff *skb,
62 unsigned short len = skb->len; 63 unsigned short len = skb->len;
63 struct ethhdr *eth; 64 struct ethhdr *eth;
64 65
65 skb->mac.raw = skb_push(skb, sizeof(*eth)); 66 skb_push(skb, sizeof(*eth));
67 skb_reset_mac_header(skb);
66 eth = eth_hdr(skb); 68 eth = eth_hdr(skb);
67 eth->h_proto = htons(len); 69 eth->h_proto = htons(len);
68 memcpy(eth->h_dest, da, ETH_ALEN); 70 memcpy(eth->h_dest, da, ETH_ALEN);
diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c
index 2615dc81aa36..2525165e2e8f 100644
--- a/net/llc/llc_sap.c
+++ b/net/llc/llc_sap.c
@@ -36,11 +36,12 @@ struct sk_buff *llc_alloc_frame(struct sock *sk, struct net_device *dev)
36 struct sk_buff *skb = alloc_skb(128, GFP_ATOMIC); 36 struct sk_buff *skb = alloc_skb(128, GFP_ATOMIC);
37 37
38 if (skb) { 38 if (skb) {
39 skb_reset_mac_header(skb);
39 skb_reserve(skb, 50); 40 skb_reserve(skb, 50);
40 skb->nh.raw = skb->h.raw = skb->data; 41 skb_reset_network_header(skb);
42 skb_reset_transport_header(skb);
41 skb->protocol = htons(ETH_P_802_2); 43 skb->protocol = htons(ETH_P_802_2);
42 skb->dev = dev; 44 skb->dev = dev;
43 skb->mac.raw = skb->head;
44 if (sk != NULL) 45 if (sk != NULL)
45 skb_set_owner_w(skb, sk); 46 skb_set_owner_w(skb, sk);
46 } 47 }
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 54698af6d0af..c558f3214255 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -25,6 +25,7 @@ config NETFILTER_NETLINK_LOG
25 and is also scheduled to replace the old syslog-based ipt_LOG 25 and is also scheduled to replace the old syslog-based ipt_LOG
26 and ip6t_LOG modules. 26 and ip6t_LOG modules.
27 27
28# Rename this to NF_CONNTRACK in a 2.6.25
28config NF_CONNTRACK_ENABLED 29config NF_CONNTRACK_ENABLED
29 tristate "Netfilter connection tracking support" 30 tristate "Netfilter connection tracking support"
30 help 31 help
@@ -39,42 +40,9 @@ config NF_CONNTRACK_ENABLED
39 40
40 To compile it as a module, choose M here. If unsure, say N. 41 To compile it as a module, choose M here. If unsure, say N.
41 42
42choice
43 prompt "Netfilter connection tracking support"
44 depends on NF_CONNTRACK_ENABLED
45
46config NF_CONNTRACK_SUPPORT
47 bool "Layer 3 Independent Connection tracking"
48 help
49 Layer 3 independent connection tracking is experimental scheme
50 which generalize ip_conntrack to support other layer 3 protocols.
51
52 This is required to do Masquerading or other kinds of Network
53 Address Translation (except for Fast NAT). It can also be used to
54 enhance packet filtering (see `Connection state match support'
55 below).
56
57config IP_NF_CONNTRACK_SUPPORT
58 bool "Layer 3 Dependent Connection tracking (OBSOLETE)"
59 help
60 The old, Layer 3 dependent ip_conntrack subsystem of netfilter.
61
62 This is required to do Masquerading or other kinds of Network
63 Address Translation (except for Fast NAT). It can also be used to
64 enhance packet filtering (see `Connection state match support'
65 below).
66
67endchoice
68
69config NF_CONNTRACK 43config NF_CONNTRACK
70 tristate 44 tristate
71 default m if NF_CONNTRACK_SUPPORT && NF_CONNTRACK_ENABLED=m 45 default NF_CONNTRACK_ENABLED
72 default y if NF_CONNTRACK_SUPPORT && NF_CONNTRACK_ENABLED=y
73
74config IP_NF_CONNTRACK
75 tristate
76 default m if IP_NF_CONNTRACK_SUPPORT && NF_CONNTRACK_ENABLED=m
77 default y if IP_NF_CONNTRACK_SUPPORT && NF_CONNTRACK_ENABLED=y
78 46
79config NF_CT_ACCT 47config NF_CT_ACCT
80 bool "Connection tracking flow accounting" 48 bool "Connection tracking flow accounting"
@@ -303,9 +271,8 @@ config NETFILTER_XT_TARGET_CONNMARK
303 tristate '"CONNMARK" target support' 271 tristate '"CONNMARK" target support'
304 depends on NETFILTER_XTABLES 272 depends on NETFILTER_XTABLES
305 depends on IP_NF_MANGLE || IP6_NF_MANGLE 273 depends on IP_NF_MANGLE || IP6_NF_MANGLE
306 depends on IP_NF_CONNTRACK || NF_CONNTRACK 274 depends on NF_CONNTRACK
307 select IP_NF_CONNTRACK_MARK if IP_NF_CONNTRACK 275 select NF_CONNTRACK_MARK
308 select NF_CONNTRACK_MARK if NF_CONNTRACK
309 help 276 help
310 This option adds a `CONNMARK' target, which allows one to manipulate 277 This option adds a `CONNMARK' target, which allows one to manipulate
311 the connection mark value. Similar to the MARK target, but 278 the connection mark value. Similar to the MARK target, but
@@ -366,7 +333,7 @@ config NETFILTER_XT_TARGET_NOTRACK
366 tristate '"NOTRACK" target support' 333 tristate '"NOTRACK" target support'
367 depends on NETFILTER_XTABLES 334 depends on NETFILTER_XTABLES
368 depends on IP_NF_RAW || IP6_NF_RAW 335 depends on IP_NF_RAW || IP6_NF_RAW
369 depends on IP_NF_CONNTRACK || NF_CONNTRACK 336 depends on NF_CONNTRACK
370 help 337 help
371 The NOTRACK target allows a select rule to specify 338 The NOTRACK target allows a select rule to specify
372 which packets *not* to enter the conntrack/NAT 339 which packets *not* to enter the conntrack/NAT
@@ -387,9 +354,7 @@ config NETFILTER_XT_TARGET_SECMARK
387 354
388config NETFILTER_XT_TARGET_CONNSECMARK 355config NETFILTER_XT_TARGET_CONNSECMARK
389 tristate '"CONNSECMARK" target support' 356 tristate '"CONNSECMARK" target support'
390 depends on NETFILTER_XTABLES && \ 357 depends on NETFILTER_XTABLES && NF_CONNTRACK && NF_CONNTRACK_SECMARK
391 ((NF_CONNTRACK && NF_CONNTRACK_SECMARK) || \
392 (IP_NF_CONNTRACK && IP_NF_CONNTRACK_SECMARK))
393 help 358 help
394 The CONNSECMARK target copies security markings from packets 359 The CONNSECMARK target copies security markings from packets
395 to connections, and restores security markings from connections 360 to connections, and restores security markings from connections
@@ -437,9 +402,8 @@ config NETFILTER_XT_MATCH_COMMENT
437config NETFILTER_XT_MATCH_CONNBYTES 402config NETFILTER_XT_MATCH_CONNBYTES
438 tristate '"connbytes" per-connection counter match support' 403 tristate '"connbytes" per-connection counter match support'
439 depends on NETFILTER_XTABLES 404 depends on NETFILTER_XTABLES
440 depends on IP_NF_CONNTRACK || NF_CONNTRACK 405 depends on NF_CONNTRACK
441 select IP_NF_CT_ACCT if IP_NF_CONNTRACK 406 select NF_CT_ACCT
442 select NF_CT_ACCT if NF_CONNTRACK
443 help 407 help
444 This option adds a `connbytes' match, which allows you to match the 408 This option adds a `connbytes' match, which allows you to match the
445 number of bytes and/or packets for each direction within a connection. 409 number of bytes and/or packets for each direction within a connection.
@@ -450,9 +414,8 @@ config NETFILTER_XT_MATCH_CONNBYTES
450config NETFILTER_XT_MATCH_CONNMARK 414config NETFILTER_XT_MATCH_CONNMARK
451 tristate '"connmark" connection mark match support' 415 tristate '"connmark" connection mark match support'
452 depends on NETFILTER_XTABLES 416 depends on NETFILTER_XTABLES
453 depends on IP_NF_CONNTRACK || NF_CONNTRACK 417 depends on NF_CONNTRACK
454 select IP_NF_CONNTRACK_MARK if IP_NF_CONNTRACK 418 select NF_CONNTRACK_MARK
455 select NF_CONNTRACK_MARK if NF_CONNTRACK
456 help 419 help
457 This option adds a `connmark' match, which allows you to match the 420 This option adds a `connmark' match, which allows you to match the
458 connection mark value previously set for the session by `CONNMARK'. 421 connection mark value previously set for the session by `CONNMARK'.
@@ -464,7 +427,7 @@ config NETFILTER_XT_MATCH_CONNMARK
464config NETFILTER_XT_MATCH_CONNTRACK 427config NETFILTER_XT_MATCH_CONNTRACK
465 tristate '"conntrack" connection tracking match support' 428 tristate '"conntrack" connection tracking match support'
466 depends on NETFILTER_XTABLES 429 depends on NETFILTER_XTABLES
467 depends on IP_NF_CONNTRACK || NF_CONNTRACK 430 depends on NF_CONNTRACK
468 help 431 help
469 This is a general conntrack match module, a superset of the state match. 432 This is a general conntrack match module, a superset of the state match.
470 433
@@ -508,7 +471,7 @@ config NETFILTER_XT_MATCH_ESP
508config NETFILTER_XT_MATCH_HELPER 471config NETFILTER_XT_MATCH_HELPER
509 tristate '"helper" match support' 472 tristate '"helper" match support'
510 depends on NETFILTER_XTABLES 473 depends on NETFILTER_XTABLES
511 depends on IP_NF_CONNTRACK || NF_CONNTRACK 474 depends on NF_CONNTRACK
512 help 475 help
513 Helper matching allows you to match packets in dynamic connections 476 Helper matching allows you to match packets in dynamic connections
514 tracked by a conntrack-helper, ie. ip_conntrack_ftp 477 tracked by a conntrack-helper, ie. ip_conntrack_ftp
@@ -632,7 +595,7 @@ config NETFILTER_XT_MATCH_SCTP
632config NETFILTER_XT_MATCH_STATE 595config NETFILTER_XT_MATCH_STATE
633 tristate '"state" match support' 596 tristate '"state" match support'
634 depends on NETFILTER_XTABLES 597 depends on NETFILTER_XTABLES
635 depends on IP_NF_CONNTRACK || NF_CONNTRACK 598 depends on NF_CONNTRACK
636 help 599 help
637 Connection state matching allows you to match packets based on their 600 Connection state matching allows you to match packets based on their
638 relationship to a tracked connection (ie. previous packets). This 601 relationship to a tracked connection (ie. previous packets). This
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index c3ebdbd917e9..a84478ee2ded 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -5,10 +5,6 @@
5 * way. 5 * way.
6 * 6 *
7 * Rusty Russell (C)2000 -- This code is GPL. 7 * Rusty Russell (C)2000 -- This code is GPL.
8 *
9 * February 2000: Modified by James Morris to have 1 queue per protocol.
10 * 15-Mar-2000: Added NF_REPEAT --RR.
11 * 08-May-2003: Internal logging interface added by Jozsef Kadlecsik.
12 */ 8 */
13#include <linux/kernel.h> 9#include <linux/kernel.h>
14#include <linux/netfilter.h> 10#include <linux/netfilter.h>
@@ -244,6 +240,7 @@ void nf_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb,
244} 240}
245EXPORT_SYMBOL(nf_proto_csum_replace4); 241EXPORT_SYMBOL(nf_proto_csum_replace4);
246 242
243#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
247/* This does not belong here, but locally generated errors need it if connection 244/* This does not belong here, but locally generated errors need it if connection
248 tracking in use: without this, connection may not be in hash table, and hence 245 tracking in use: without this, connection may not be in hash table, and hence
249 manufactured ICMP or RST packets will not be associated with it. */ 246 manufactured ICMP or RST packets will not be associated with it. */
@@ -264,6 +261,22 @@ void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb)
264} 261}
265EXPORT_SYMBOL(nf_ct_attach); 262EXPORT_SYMBOL(nf_ct_attach);
266 263
264void (*nf_ct_destroy)(struct nf_conntrack *);
265EXPORT_SYMBOL(nf_ct_destroy);
266
267void nf_conntrack_destroy(struct nf_conntrack *nfct)
268{
269 void (*destroy)(struct nf_conntrack *);
270
271 rcu_read_lock();
272 destroy = rcu_dereference(nf_ct_destroy);
273 BUG_ON(destroy == NULL);
274 destroy(nfct);
275 rcu_read_unlock();
276}
277EXPORT_SYMBOL(nf_conntrack_destroy);
278#endif /* CONFIG_NF_CONNTRACK */
279
267#ifdef CONFIG_PROC_FS 280#ifdef CONFIG_PROC_FS
268struct proc_dir_entry *proc_net_netfilter; 281struct proc_dir_entry *proc_net_netfilter;
269EXPORT_SYMBOL(proc_net_netfilter); 282EXPORT_SYMBOL(proc_net_netfilter);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index b3a70eb6d42a..e132c8ae8784 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -9,24 +9,6 @@
9 * This program is free software; you can redistribute it and/or modify 9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as 10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation. 11 * published by the Free Software Foundation.
12 *
13 * 23 Apr 2001: Harald Welte <laforge@gnumonks.org>
14 * - new API and handling of conntrack/nat helpers
15 * - now capable of multiple expectations for one master
16 * 16 Jul 2002: Harald Welte <laforge@gnumonks.org>
17 * - add usage/reference counts to ip_conntrack_expect
18 * - export ip_conntrack[_expect]_{find_get,put} functions
19 * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
20 * - generalize L3 protocol denendent part.
21 * 23 Mar 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
22 * - add support various size of conntrack structures.
23 * 26 Jan 2006: Harald Welte <laforge@netfilter.org>
24 * - restructure nf_conn (introduce nf_conn_help)
25 * - redesign 'features' how they were originally intended
26 * 26 Feb 2006: Pablo Neira Ayuso <pablo@eurodev.net>
27 * - add support for L3 protocol module load on demand.
28 *
29 * Derived from net/ipv4/netfilter/ip_conntrack_core.c
30 */ 12 */
31 13
32#include <linux/types.h> 14#include <linux/types.h>
@@ -128,10 +110,11 @@ static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
128 unsigned int size, unsigned int rnd) 110 unsigned int size, unsigned int rnd)
129{ 111{
130 unsigned int a, b; 112 unsigned int a, b;
131 a = jhash((void *)tuple->src.u3.all, sizeof(tuple->src.u3.all), 113
132 ((tuple->src.l3num) << 16) | tuple->dst.protonum); 114 a = jhash2(tuple->src.u3.all, ARRAY_SIZE(tuple->src.u3.all),
133 b = jhash((void *)tuple->dst.u3.all, sizeof(tuple->dst.u3.all), 115 (tuple->src.l3num << 16) | tuple->dst.protonum);
134 (tuple->src.u.all << 16) | tuple->dst.u.all); 116 b = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all),
117 (tuple->src.u.all << 16) | tuple->dst.u.all);
135 118
136 return jhash_2words(a, b, rnd) % size; 119 return jhash_2words(a, b, rnd) % size;
137} 120}
@@ -633,13 +616,11 @@ __nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
633 memset(conntrack, 0, nf_ct_cache[features].size); 616 memset(conntrack, 0, nf_ct_cache[features].size);
634 conntrack->features = features; 617 conntrack->features = features;
635 atomic_set(&conntrack->ct_general.use, 1); 618 atomic_set(&conntrack->ct_general.use, 1);
636 conntrack->ct_general.destroy = destroy_conntrack;
637 conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig; 619 conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
638 conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *repl; 620 conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
639 /* Don't set timer yet: wait for confirmation */ 621 /* Don't set timer yet: wait for confirmation */
640 init_timer(&conntrack->timeout); 622 setup_timer(&conntrack->timeout, death_by_timeout,
641 conntrack->timeout.data = (unsigned long)conntrack; 623 (unsigned long)conntrack);
642 conntrack->timeout.function = death_by_timeout;
643 read_unlock_bh(&nf_ct_cache_lock); 624 read_unlock_bh(&nf_ct_cache_lock);
644 625
645 return conntrack; 626 return conntrack;
@@ -768,7 +749,7 @@ resolve_normal_ct(struct sk_buff *skb,
768 struct nf_conntrack_tuple_hash *h; 749 struct nf_conntrack_tuple_hash *h;
769 struct nf_conn *ct; 750 struct nf_conn *ct;
770 751
771 if (!nf_ct_get_tuple(skb, (unsigned int)(skb->nh.raw - skb->data), 752 if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
772 dataoff, l3num, protonum, &tuple, l3proto, 753 dataoff, l3num, protonum, &tuple, l3proto,
773 l4proto)) { 754 l4proto)) {
774 DEBUGP("resolve_normal_ct: Can't get tuple\n"); 755 DEBUGP("resolve_normal_ct: Can't get tuple\n");
@@ -960,7 +941,7 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
960 if (do_acct) { 941 if (do_acct) {
961 ct->counters[CTINFO2DIR(ctinfo)].packets++; 942 ct->counters[CTINFO2DIR(ctinfo)].packets++;
962 ct->counters[CTINFO2DIR(ctinfo)].bytes += 943 ct->counters[CTINFO2DIR(ctinfo)].bytes +=
963 skb->len - (unsigned int)(skb->nh.raw - skb->data); 944 skb->len - skb_network_offset(skb);
964 945
965 if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000) 946 if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000)
966 || (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000)) 947 || (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000))
@@ -1140,6 +1121,8 @@ void nf_conntrack_cleanup(void)
1140 while (atomic_read(&nf_conntrack_untracked.ct_general.use) > 1) 1121 while (atomic_read(&nf_conntrack_untracked.ct_general.use) > 1)
1141 schedule(); 1122 schedule();
1142 1123
1124 rcu_assign_pointer(nf_ct_destroy, NULL);
1125
1143 for (i = 0; i < NF_CT_F_NUM; i++) { 1126 for (i = 0; i < NF_CT_F_NUM; i++) {
1144 if (nf_ct_cache[i].use == 0) 1127 if (nf_ct_cache[i].use == 0)
1145 continue; 1128 continue;
@@ -1152,14 +1135,7 @@ void nf_conntrack_cleanup(void)
1152 free_conntrack_hash(nf_conntrack_hash, nf_conntrack_vmalloc, 1135 free_conntrack_hash(nf_conntrack_hash, nf_conntrack_vmalloc,
1153 nf_conntrack_htable_size); 1136 nf_conntrack_htable_size);
1154 1137
1155 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_generic); 1138 nf_conntrack_proto_fini();
1156
1157 /* free l3proto protocol tables */
1158 for (i = 0; i < PF_MAX; i++)
1159 if (nf_ct_protos[i]) {
1160 kfree(nf_ct_protos[i]);
1161 nf_ct_protos[i] = NULL;
1162 }
1163} 1139}
1164 1140
1165static struct list_head *alloc_hashtable(int size, int *vmalloced) 1141static struct list_head *alloc_hashtable(int size, int *vmalloced)
@@ -1237,7 +1213,6 @@ module_param_call(hashsize, set_hashsize, param_get_uint,
1237 1213
1238int __init nf_conntrack_init(void) 1214int __init nf_conntrack_init(void)
1239{ 1215{
1240 unsigned int i;
1241 int ret; 1216 int ret;
1242 1217
1243 /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB 1218 /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB
@@ -1279,18 +1254,13 @@ int __init nf_conntrack_init(void)
1279 goto err_free_conntrack_slab; 1254 goto err_free_conntrack_slab;
1280 } 1255 }
1281 1256
1282 ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_generic); 1257 ret = nf_conntrack_proto_init();
1283 if (ret < 0) 1258 if (ret < 0)
1284 goto out_free_expect_slab; 1259 goto out_free_expect_slab;
1285 1260
1286 /* Don't NEED lock here, but good form anyway. */
1287 write_lock_bh(&nf_conntrack_lock);
1288 for (i = 0; i < AF_MAX; i++)
1289 nf_ct_l3protos[i] = &nf_conntrack_l3proto_generic;
1290 write_unlock_bh(&nf_conntrack_lock);
1291
1292 /* For use by REJECT target */ 1261 /* For use by REJECT target */
1293 rcu_assign_pointer(ip_ct_attach, __nf_conntrack_attach); 1262 rcu_assign_pointer(ip_ct_attach, __nf_conntrack_attach);
1263 rcu_assign_pointer(nf_ct_destroy, destroy_conntrack);
1294 1264
1295 /* Set up fake conntrack: 1265 /* Set up fake conntrack:
1296 - to never be deleted, not in any hashes */ 1266 - to never be deleted, not in any hashes */
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index 1a223e0c0856..6bd421df2dbc 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -91,3 +91,26 @@ void nf_ct_event_cache_flush(void)
91 } 91 }
92} 92}
93 93
94int nf_conntrack_register_notifier(struct notifier_block *nb)
95{
96 return atomic_notifier_chain_register(&nf_conntrack_chain, nb);
97}
98EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier);
99
100int nf_conntrack_unregister_notifier(struct notifier_block *nb)
101{
102 return atomic_notifier_chain_unregister(&nf_conntrack_chain, nb);
103}
104EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier);
105
106int nf_conntrack_expect_register_notifier(struct notifier_block *nb)
107{
108 return atomic_notifier_chain_register(&nf_conntrack_expect_chain, nb);
109}
110EXPORT_SYMBOL_GPL(nf_conntrack_expect_register_notifier);
111
112int nf_conntrack_expect_unregister_notifier(struct notifier_block *nb)
113{
114 return atomic_notifier_chain_unregister(&nf_conntrack_expect_chain, nb);
115}
116EXPORT_SYMBOL_GPL(nf_conntrack_expect_unregister_notifier);
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index ce70a6fc6bda..c31af29a4439 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -290,9 +290,7 @@ static void nf_conntrack_expect_insert(struct nf_conntrack_expect *exp)
290 master_help->expecting++; 290 master_help->expecting++;
291 list_add(&exp->list, &nf_conntrack_expect_list); 291 list_add(&exp->list, &nf_conntrack_expect_list);
292 292
293 init_timer(&exp->timeout); 293 setup_timer(&exp->timeout, expectation_timed_out, (unsigned long)exp);
294 exp->timeout.data = (unsigned long)exp;
295 exp->timeout.function = expectation_timed_out;
296 exp->timeout.expires = jiffies + master_help->helper->timeout * HZ; 294 exp->timeout.expires = jiffies + master_help->helper->timeout * HZ;
297 add_timer(&exp->timeout); 295 add_timer(&exp->timeout);
298 296
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index 3089dfc40c88..a186799f6542 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -7,12 +7,6 @@
7 * This program is free software; you can redistribute it and/or modify 7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as 8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation. 9 * published by the Free Software Foundation.
10 *
11 * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
12 * - enable working with Layer 3 protocol independent connection tracking.
13 * - track EPRT and EPSV commands with IPv6 address.
14 *
15 * Derived from net/ipv4/netfilter/ip_conntrack_ftp.c
16 */ 10 */
17 11
18#include <linux/module.h> 12#include <linux/module.h>
diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c
index bb26a658cc1c..1093478cc007 100644
--- a/net/netfilter/nf_conntrack_netbios_ns.c
+++ b/net/netfilter/nf_conntrack_netbios_ns.c
@@ -46,7 +46,7 @@ static int help(struct sk_buff **pskb, unsigned int protoff,
46 struct nf_conn *ct, enum ip_conntrack_info ctinfo) 46 struct nf_conn *ct, enum ip_conntrack_info ctinfo)
47{ 47{
48 struct nf_conntrack_expect *exp; 48 struct nf_conntrack_expect *exp;
49 struct iphdr *iph = (*pskb)->nh.iph; 49 struct iphdr *iph = ip_hdr(*pskb);
50 struct rtable *rt = (struct rtable *)(*pskb)->dst; 50 struct rtable *rt = (struct rtable *)(*pskb)->dst;
51 struct in_device *in_dev; 51 struct in_device *in_dev;
52 __be32 mask = 0; 52 __be32 mask = 0;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 48f05314ebf7..aa1a97ee514b 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -6,9 +6,6 @@
6 * (C) 2003 by Patrick Mchardy <kaber@trash.net> 6 * (C) 2003 by Patrick Mchardy <kaber@trash.net>
7 * (C) 2005-2006 by Pablo Neira Ayuso <pablo@eurodev.net> 7 * (C) 2005-2006 by Pablo Neira Ayuso <pablo@eurodev.net>
8 * 8 *
9 * I've reworked this stuff to use attributes instead of conntrack
10 * structures. 5.44 am. I need more tea. --pablo 05/07/11.
11 *
12 * Initial connection tracking via netlink development funded and 9 * Initial connection tracking via netlink development funded and
13 * generally made possible by Network Robots, Inc. (www.networkrobots.com) 10 * generally made possible by Network Robots, Inc. (www.networkrobots.com)
14 * 11 *
@@ -16,8 +13,6 @@
16 * 13 *
17 * This software may be used and distributed according to the terms 14 * This software may be used and distributed according to the terms
18 * of the GNU General Public License, incorporated herein by reference. 15 * of the GNU General Public License, incorporated herein by reference.
19 *
20 * Derived from ip_conntrack_netlink.c: Port by Pablo Neira Ayuso (05/11/14)
21 */ 16 */
22 17
23#include <linux/init.h> 18#include <linux/init.h>
@@ -33,6 +28,7 @@
33#include <linux/notifier.h> 28#include <linux/notifier.h>
34 29
35#include <linux/netfilter.h> 30#include <linux/netfilter.h>
31#include <net/netlink.h>
36#include <net/netfilter/nf_conntrack.h> 32#include <net/netfilter/nf_conntrack.h>
37#include <net/netfilter/nf_conntrack_core.h> 33#include <net/netfilter/nf_conntrack_core.h>
38#include <net/netfilter/nf_conntrack_expect.h> 34#include <net/netfilter/nf_conntrack_expect.h>
@@ -268,9 +264,7 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
268 struct nlmsghdr *nlh; 264 struct nlmsghdr *nlh;
269 struct nfgenmsg *nfmsg; 265 struct nfgenmsg *nfmsg;
270 struct nfattr *nest_parms; 266 struct nfattr *nest_parms;
271 unsigned char *b; 267 unsigned char *b = skb_tail_pointer(skb);
272
273 b = skb->tail;
274 268
275 event |= NFNL_SUBSYS_CTNETLINK << 8; 269 event |= NFNL_SUBSYS_CTNETLINK << 8;
276 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg)); 270 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg));
@@ -303,12 +297,12 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
303 ctnetlink_dump_use(skb, ct) < 0) 297 ctnetlink_dump_use(skb, ct) < 0)
304 goto nfattr_failure; 298 goto nfattr_failure;
305 299
306 nlh->nlmsg_len = skb->tail - b; 300 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
307 return skb->len; 301 return skb->len;
308 302
309nlmsg_failure: 303nlmsg_failure:
310nfattr_failure: 304nfattr_failure:
311 skb_trim(skb, b - skb->data); 305 nlmsg_trim(skb, b);
312 return -1; 306 return -1;
313} 307}
314 308
@@ -322,7 +316,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
322 struct nf_conn *ct = (struct nf_conn *)ptr; 316 struct nf_conn *ct = (struct nf_conn *)ptr;
323 struct sk_buff *skb; 317 struct sk_buff *skb;
324 unsigned int type; 318 unsigned int type;
325 unsigned char *b; 319 sk_buff_data_t b;
326 unsigned int flags = 0, group; 320 unsigned int flags = 0, group;
327 321
328 /* ignore our fake conntrack entry */ 322 /* ignore our fake conntrack entry */
@@ -662,7 +656,7 @@ static const size_t cta_min[CTA_MAX] = {
662 656
663static int 657static int
664ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, 658ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
665 struct nlmsghdr *nlh, struct nfattr *cda[], int *errp) 659 struct nlmsghdr *nlh, struct nfattr *cda[])
666{ 660{
667 struct nf_conntrack_tuple_hash *h; 661 struct nf_conntrack_tuple_hash *h;
668 struct nf_conntrack_tuple tuple; 662 struct nf_conntrack_tuple tuple;
@@ -710,7 +704,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
710 704
711static int 705static int
712ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, 706ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
713 struct nlmsghdr *nlh, struct nfattr *cda[], int *errp) 707 struct nlmsghdr *nlh, struct nfattr *cda[])
714{ 708{
715 struct nf_conntrack_tuple_hash *h; 709 struct nf_conntrack_tuple_hash *h;
716 struct nf_conntrack_tuple tuple; 710 struct nf_conntrack_tuple tuple;
@@ -721,22 +715,12 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
721 int err = 0; 715 int err = 0;
722 716
723 if (nlh->nlmsg_flags & NLM_F_DUMP) { 717 if (nlh->nlmsg_flags & NLM_F_DUMP) {
724 u32 rlen;
725
726#ifndef CONFIG_NF_CT_ACCT 718#ifndef CONFIG_NF_CT_ACCT
727 if (NFNL_MSG_TYPE(nlh->nlmsg_type) == IPCTNL_MSG_CT_GET_CTRZERO) 719 if (NFNL_MSG_TYPE(nlh->nlmsg_type) == IPCTNL_MSG_CT_GET_CTRZERO)
728 return -ENOTSUPP; 720 return -ENOTSUPP;
729#endif 721#endif
730 if ((*errp = netlink_dump_start(ctnl, skb, nlh, 722 return netlink_dump_start(ctnl, skb, nlh, ctnetlink_dump_table,
731 ctnetlink_dump_table, 723 ctnetlink_done);
732 ctnetlink_done)) != 0)
733 return -EINVAL;
734
735 rlen = NLMSG_ALIGN(nlh->nlmsg_len);
736 if (rlen > skb->len)
737 rlen = skb->len;
738 skb_pull(skb, rlen);
739 return 0;
740 } 724 }
741 725
742 if (nfattr_bad_size(cda, CTA_MAX, cta_min)) 726 if (nfattr_bad_size(cda, CTA_MAX, cta_min))
@@ -1010,7 +994,7 @@ err:
1010 994
1011static int 995static int
1012ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, 996ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
1013 struct nlmsghdr *nlh, struct nfattr *cda[], int *errp) 997 struct nlmsghdr *nlh, struct nfattr *cda[])
1014{ 998{
1015 struct nf_conntrack_tuple otuple, rtuple; 999 struct nf_conntrack_tuple otuple, rtuple;
1016 struct nf_conntrack_tuple_hash *h = NULL; 1000 struct nf_conntrack_tuple_hash *h = NULL;
@@ -1152,9 +1136,7 @@ ctnetlink_exp_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
1152{ 1136{
1153 struct nlmsghdr *nlh; 1137 struct nlmsghdr *nlh;
1154 struct nfgenmsg *nfmsg; 1138 struct nfgenmsg *nfmsg;
1155 unsigned char *b; 1139 unsigned char *b = skb_tail_pointer(skb);
1156
1157 b = skb->tail;
1158 1140
1159 event |= NFNL_SUBSYS_CTNETLINK_EXP << 8; 1141 event |= NFNL_SUBSYS_CTNETLINK_EXP << 8;
1160 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg)); 1142 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg));
@@ -1168,12 +1150,12 @@ ctnetlink_exp_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
1168 if (ctnetlink_exp_dump_expect(skb, exp) < 0) 1150 if (ctnetlink_exp_dump_expect(skb, exp) < 0)
1169 goto nfattr_failure; 1151 goto nfattr_failure;
1170 1152
1171 nlh->nlmsg_len = skb->tail - b; 1153 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1172 return skb->len; 1154 return skb->len;
1173 1155
1174nlmsg_failure: 1156nlmsg_failure:
1175nfattr_failure: 1157nfattr_failure:
1176 skb_trim(skb, b - skb->data); 1158 nlmsg_trim(skb, b);
1177 return -1; 1159 return -1;
1178} 1160}
1179 1161
@@ -1186,7 +1168,7 @@ static int ctnetlink_expect_event(struct notifier_block *this,
1186 struct nf_conntrack_expect *exp = (struct nf_conntrack_expect *)ptr; 1168 struct nf_conntrack_expect *exp = (struct nf_conntrack_expect *)ptr;
1187 struct sk_buff *skb; 1169 struct sk_buff *skb;
1188 unsigned int type; 1170 unsigned int type;
1189 unsigned char *b; 1171 sk_buff_data_t b;
1190 int flags = 0; 1172 int flags = 0;
1191 1173
1192 if (events & IPEXP_NEW) { 1174 if (events & IPEXP_NEW) {
@@ -1263,7 +1245,7 @@ static const size_t cta_min_exp[CTA_EXPECT_MAX] = {
1263 1245
1264static int 1246static int
1265ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, 1247ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
1266 struct nlmsghdr *nlh, struct nfattr *cda[], int *errp) 1248 struct nlmsghdr *nlh, struct nfattr *cda[])
1267{ 1249{
1268 struct nf_conntrack_tuple tuple; 1250 struct nf_conntrack_tuple tuple;
1269 struct nf_conntrack_expect *exp; 1251 struct nf_conntrack_expect *exp;
@@ -1276,17 +1258,9 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
1276 return -EINVAL; 1258 return -EINVAL;
1277 1259
1278 if (nlh->nlmsg_flags & NLM_F_DUMP) { 1260 if (nlh->nlmsg_flags & NLM_F_DUMP) {
1279 u32 rlen; 1261 return netlink_dump_start(ctnl, skb, nlh,
1280 1262 ctnetlink_exp_dump_table,
1281 if ((*errp = netlink_dump_start(ctnl, skb, nlh, 1263 ctnetlink_done);
1282 ctnetlink_exp_dump_table,
1283 ctnetlink_done)) != 0)
1284 return -EINVAL;
1285 rlen = NLMSG_ALIGN(nlh->nlmsg_len);
1286 if (rlen > skb->len)
1287 rlen = skb->len;
1288 skb_pull(skb, rlen);
1289 return 0;
1290 } 1264 }
1291 1265
1292 if (cda[CTA_EXPECT_MASTER-1]) 1266 if (cda[CTA_EXPECT_MASTER-1])
@@ -1333,7 +1307,7 @@ out:
1333 1307
1334static int 1308static int
1335ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, 1309ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
1336 struct nlmsghdr *nlh, struct nfattr *cda[], int *errp) 1310 struct nlmsghdr *nlh, struct nfattr *cda[])
1337{ 1311{
1338 struct nf_conntrack_expect *exp, *tmp; 1312 struct nf_conntrack_expect *exp, *tmp;
1339 struct nf_conntrack_tuple tuple; 1313 struct nf_conntrack_tuple tuple;
@@ -1467,7 +1441,7 @@ out:
1467 1441
1468static int 1442static int
1469ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, 1443ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
1470 struct nlmsghdr *nlh, struct nfattr *cda[], int *errp) 1444 struct nlmsghdr *nlh, struct nfattr *cda[])
1471{ 1445{
1472 struct nf_conntrack_tuple tuple; 1446 struct nf_conntrack_tuple tuple;
1473 struct nf_conntrack_expect *exp; 1447 struct nf_conntrack_expect *exp;
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 456155f05c75..6d947068c58f 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -28,13 +28,13 @@
28#include <net/netfilter/nf_conntrack_l4proto.h> 28#include <net/netfilter/nf_conntrack_l4proto.h>
29#include <net/netfilter/nf_conntrack_core.h> 29#include <net/netfilter/nf_conntrack_core.h>
30 30
31struct nf_conntrack_l4proto **nf_ct_protos[PF_MAX] __read_mostly; 31static struct nf_conntrack_l4proto **nf_ct_protos[PF_MAX] __read_mostly;
32struct nf_conntrack_l3proto *nf_ct_l3protos[AF_MAX] __read_mostly; 32struct nf_conntrack_l3proto *nf_ct_l3protos[AF_MAX] __read_mostly;
33EXPORT_SYMBOL_GPL(nf_ct_l3protos); 33EXPORT_SYMBOL_GPL(nf_ct_l3protos);
34 34
35#ifdef CONFIG_SYSCTL 35static DEFINE_MUTEX(nf_ct_proto_mutex);
36static DEFINE_MUTEX(nf_ct_proto_sysctl_mutex);
37 36
37#ifdef CONFIG_SYSCTL
38static int 38static int
39nf_ct_register_sysctl(struct ctl_table_header **header, struct ctl_table *path, 39nf_ct_register_sysctl(struct ctl_table_header **header, struct ctl_table *path,
40 struct ctl_table *table, unsigned int *users) 40 struct ctl_table *table, unsigned int *users)
@@ -164,13 +164,11 @@ static int nf_ct_l3proto_register_sysctl(struct nf_conntrack_l3proto *l3proto)
164 int err = 0; 164 int err = 0;
165 165
166#ifdef CONFIG_SYSCTL 166#ifdef CONFIG_SYSCTL
167 mutex_lock(&nf_ct_proto_sysctl_mutex);
168 if (l3proto->ctl_table != NULL) { 167 if (l3proto->ctl_table != NULL) {
169 err = nf_ct_register_sysctl(&l3proto->ctl_table_header, 168 err = nf_ct_register_sysctl(&l3proto->ctl_table_header,
170 l3proto->ctl_table_path, 169 l3proto->ctl_table_path,
171 l3proto->ctl_table, NULL); 170 l3proto->ctl_table, NULL);
172 } 171 }
173 mutex_unlock(&nf_ct_proto_sysctl_mutex);
174#endif 172#endif
175 return err; 173 return err;
176} 174}
@@ -178,11 +176,9 @@ static int nf_ct_l3proto_register_sysctl(struct nf_conntrack_l3proto *l3proto)
178static void nf_ct_l3proto_unregister_sysctl(struct nf_conntrack_l3proto *l3proto) 176static void nf_ct_l3proto_unregister_sysctl(struct nf_conntrack_l3proto *l3proto)
179{ 177{
180#ifdef CONFIG_SYSCTL 178#ifdef CONFIG_SYSCTL
181 mutex_lock(&nf_ct_proto_sysctl_mutex);
182 if (l3proto->ctl_table_header != NULL) 179 if (l3proto->ctl_table_header != NULL)
183 nf_ct_unregister_sysctl(&l3proto->ctl_table_header, 180 nf_ct_unregister_sysctl(&l3proto->ctl_table_header,
184 l3proto->ctl_table, NULL); 181 l3proto->ctl_table, NULL);
185 mutex_unlock(&nf_ct_proto_sysctl_mutex);
186#endif 182#endif
187} 183}
188 184
@@ -190,27 +186,23 @@ int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto)
190{ 186{
191 int ret = 0; 187 int ret = 0;
192 188
193 if (proto->l3proto >= AF_MAX) { 189 if (proto->l3proto >= AF_MAX)
194 ret = -EBUSY; 190 return -EBUSY;
195 goto out;
196 }
197 191
198 write_lock_bh(&nf_conntrack_lock); 192 mutex_lock(&nf_ct_proto_mutex);
199 if (nf_ct_l3protos[proto->l3proto] != &nf_conntrack_l3proto_generic) { 193 if (nf_ct_l3protos[proto->l3proto] != &nf_conntrack_l3proto_generic) {
200 ret = -EBUSY; 194 ret = -EBUSY;
201 goto out_unlock; 195 goto out_unlock;
202 } 196 }
203 rcu_assign_pointer(nf_ct_l3protos[proto->l3proto], proto);
204 write_unlock_bh(&nf_conntrack_lock);
205 197
206 ret = nf_ct_l3proto_register_sysctl(proto); 198 ret = nf_ct_l3proto_register_sysctl(proto);
207 if (ret < 0) 199 if (ret < 0)
208 nf_conntrack_l3proto_unregister(proto); 200 goto out_unlock;
209 return ret; 201
202 rcu_assign_pointer(nf_ct_l3protos[proto->l3proto], proto);
210 203
211out_unlock: 204out_unlock:
212 write_unlock_bh(&nf_conntrack_lock); 205 mutex_unlock(&nf_ct_proto_mutex);
213out:
214 return ret; 206 return ret;
215} 207}
216EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_register); 208EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_register);
@@ -219,14 +211,14 @@ void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto)
219{ 211{
220 BUG_ON(proto->l3proto >= AF_MAX); 212 BUG_ON(proto->l3proto >= AF_MAX);
221 213
222 write_lock_bh(&nf_conntrack_lock); 214 mutex_lock(&nf_ct_proto_mutex);
223 BUG_ON(nf_ct_l3protos[proto->l3proto] != proto); 215 BUG_ON(nf_ct_l3protos[proto->l3proto] != proto);
224 rcu_assign_pointer(nf_ct_l3protos[proto->l3proto], 216 rcu_assign_pointer(nf_ct_l3protos[proto->l3proto],
225 &nf_conntrack_l3proto_generic); 217 &nf_conntrack_l3proto_generic);
226 write_unlock_bh(&nf_conntrack_lock);
227 synchronize_rcu();
228
229 nf_ct_l3proto_unregister_sysctl(proto); 218 nf_ct_l3proto_unregister_sysctl(proto);
219 mutex_unlock(&nf_ct_proto_mutex);
220
221 synchronize_rcu();
230 222
231 /* Remove all contrack entries for this protocol */ 223 /* Remove all contrack entries for this protocol */
232 nf_ct_iterate_cleanup(kill_l3proto, proto); 224 nf_ct_iterate_cleanup(kill_l3proto, proto);
@@ -238,7 +230,6 @@ static int nf_ct_l4proto_register_sysctl(struct nf_conntrack_l4proto *l4proto)
238 int err = 0; 230 int err = 0;
239 231
240#ifdef CONFIG_SYSCTL 232#ifdef CONFIG_SYSCTL
241 mutex_lock(&nf_ct_proto_sysctl_mutex);
242 if (l4proto->ctl_table != NULL) { 233 if (l4proto->ctl_table != NULL) {
243 err = nf_ct_register_sysctl(l4proto->ctl_table_header, 234 err = nf_ct_register_sysctl(l4proto->ctl_table_header,
244 nf_net_netfilter_sysctl_path, 235 nf_net_netfilter_sysctl_path,
@@ -260,7 +251,6 @@ static int nf_ct_l4proto_register_sysctl(struct nf_conntrack_l4proto *l4proto)
260 } 251 }
261#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ 252#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
262out: 253out:
263 mutex_unlock(&nf_ct_proto_sysctl_mutex);
264#endif /* CONFIG_SYSCTL */ 254#endif /* CONFIG_SYSCTL */
265 return err; 255 return err;
266} 256}
@@ -268,7 +258,6 @@ out:
268static void nf_ct_l4proto_unregister_sysctl(struct nf_conntrack_l4proto *l4proto) 258static void nf_ct_l4proto_unregister_sysctl(struct nf_conntrack_l4proto *l4proto)
269{ 259{
270#ifdef CONFIG_SYSCTL 260#ifdef CONFIG_SYSCTL
271 mutex_lock(&nf_ct_proto_sysctl_mutex);
272 if (l4proto->ctl_table_header != NULL && 261 if (l4proto->ctl_table_header != NULL &&
273 *l4proto->ctl_table_header != NULL) 262 *l4proto->ctl_table_header != NULL)
274 nf_ct_unregister_sysctl(l4proto->ctl_table_header, 263 nf_ct_unregister_sysctl(l4proto->ctl_table_header,
@@ -279,7 +268,6 @@ static void nf_ct_l4proto_unregister_sysctl(struct nf_conntrack_l4proto *l4proto
279 nf_ct_unregister_sysctl(&l4proto->ctl_compat_table_header, 268 nf_ct_unregister_sysctl(&l4proto->ctl_compat_table_header,
280 l4proto->ctl_compat_table, NULL); 269 l4proto->ctl_compat_table, NULL);
281#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ 270#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
282 mutex_unlock(&nf_ct_proto_sysctl_mutex);
283#endif /* CONFIG_SYSCTL */ 271#endif /* CONFIG_SYSCTL */
284} 272}
285 273
@@ -289,68 +277,41 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto)
289{ 277{
290 int ret = 0; 278 int ret = 0;
291 279
292 if (l4proto->l3proto >= PF_MAX) { 280 if (l4proto->l3proto >= PF_MAX)
293 ret = -EBUSY; 281 return -EBUSY;
294 goto out;
295 }
296
297 if (l4proto == &nf_conntrack_l4proto_generic)
298 return nf_ct_l4proto_register_sysctl(l4proto);
299 282
300retry: 283 mutex_lock(&nf_ct_proto_mutex);
301 write_lock_bh(&nf_conntrack_lock); 284 if (!nf_ct_protos[l4proto->l3proto]) {
302 if (nf_ct_protos[l4proto->l3proto]) {
303 if (nf_ct_protos[l4proto->l3proto][l4proto->l4proto]
304 != &nf_conntrack_l4proto_generic) {
305 ret = -EBUSY;
306 goto out_unlock;
307 }
308 } else {
309 /* l3proto may be loaded latter. */ 285 /* l3proto may be loaded latter. */
310 struct nf_conntrack_l4proto **proto_array; 286 struct nf_conntrack_l4proto **proto_array;
311 int i; 287 int i;
312 288
313 write_unlock_bh(&nf_conntrack_lock); 289 proto_array = kmalloc(MAX_NF_CT_PROTO *
314 290 sizeof(struct nf_conntrack_l4proto *),
315 proto_array = (struct nf_conntrack_l4proto **) 291 GFP_KERNEL);
316 kmalloc(MAX_NF_CT_PROTO *
317 sizeof(struct nf_conntrack_l4proto *),
318 GFP_KERNEL);
319 if (proto_array == NULL) { 292 if (proto_array == NULL) {
320 ret = -ENOMEM; 293 ret = -ENOMEM;
321 goto out; 294 goto out_unlock;
322 } 295 }
296
323 for (i = 0; i < MAX_NF_CT_PROTO; i++) 297 for (i = 0; i < MAX_NF_CT_PROTO; i++)
324 proto_array[i] = &nf_conntrack_l4proto_generic; 298 proto_array[i] = &nf_conntrack_l4proto_generic;
325 299 nf_ct_protos[l4proto->l3proto] = proto_array;
326 write_lock_bh(&nf_conntrack_lock); 300 } else if (nf_ct_protos[l4proto->l3proto][l4proto->l4proto] !=
327 if (nf_ct_protos[l4proto->l3proto]) { 301 &nf_conntrack_l4proto_generic) {
328 /* bad timing, but no problem */ 302 ret = -EBUSY;
329 write_unlock_bh(&nf_conntrack_lock); 303 goto out_unlock;
330 kfree(proto_array);
331 } else {
332 nf_ct_protos[l4proto->l3proto] = proto_array;
333 write_unlock_bh(&nf_conntrack_lock);
334 }
335
336 /*
337 * Just once because array is never freed until unloading
338 * nf_conntrack.ko
339 */
340 goto retry;
341 } 304 }
342 305
343 rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto], l4proto);
344 write_unlock_bh(&nf_conntrack_lock);
345
346 ret = nf_ct_l4proto_register_sysctl(l4proto); 306 ret = nf_ct_l4proto_register_sysctl(l4proto);
347 if (ret < 0) 307 if (ret < 0)
348 nf_conntrack_l4proto_unregister(l4proto); 308 goto out_unlock;
349 return ret; 309
310 rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
311 l4proto);
350 312
351out_unlock: 313out_unlock:
352 write_unlock_bh(&nf_conntrack_lock); 314 mutex_unlock(&nf_ct_proto_mutex);
353out:
354 return ret; 315 return ret;
355} 316}
356EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_register); 317EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_register);
@@ -359,21 +320,42 @@ void nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *l4proto)
359{ 320{
360 BUG_ON(l4proto->l3proto >= PF_MAX); 321 BUG_ON(l4proto->l3proto >= PF_MAX);
361 322
362 if (l4proto == &nf_conntrack_l4proto_generic) { 323 mutex_lock(&nf_ct_proto_mutex);
363 nf_ct_l4proto_unregister_sysctl(l4proto);
364 return;
365 }
366
367 write_lock_bh(&nf_conntrack_lock);
368 BUG_ON(nf_ct_protos[l4proto->l3proto][l4proto->l4proto] != l4proto); 324 BUG_ON(nf_ct_protos[l4proto->l3proto][l4proto->l4proto] != l4proto);
369 rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto], 325 rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
370 &nf_conntrack_l4proto_generic); 326 &nf_conntrack_l4proto_generic);
371 write_unlock_bh(&nf_conntrack_lock);
372 synchronize_rcu();
373
374 nf_ct_l4proto_unregister_sysctl(l4proto); 327 nf_ct_l4proto_unregister_sysctl(l4proto);
328 mutex_unlock(&nf_ct_proto_mutex);
329
330 synchronize_rcu();
375 331
376 /* Remove all contrack entries for this protocol */ 332 /* Remove all contrack entries for this protocol */
377 nf_ct_iterate_cleanup(kill_l4proto, l4proto); 333 nf_ct_iterate_cleanup(kill_l4proto, l4proto);
378} 334}
379EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_unregister); 335EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_unregister);
336
337int nf_conntrack_proto_init(void)
338{
339 unsigned int i;
340 int err;
341
342 err = nf_ct_l4proto_register_sysctl(&nf_conntrack_l4proto_generic);
343 if (err < 0)
344 return err;
345
346 for (i = 0; i < AF_MAX; i++)
347 rcu_assign_pointer(nf_ct_l3protos[i],
348 &nf_conntrack_l3proto_generic);
349 return 0;
350}
351
352void nf_conntrack_proto_fini(void)
353{
354 unsigned int i;
355
356 nf_ct_l4proto_unregister_sysctl(&nf_conntrack_l4proto_generic);
357
358 /* free l3proto protocol tables */
359 for (i = 0; i < PF_MAX; i++)
360 kfree(nf_ct_protos[i]);
361}
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index 7c069939695a..6faf1bed7224 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -4,11 +4,6 @@
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as 5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation. 6 * published by the Free Software Foundation.
7 *
8 * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
9 * - enable working with L3 protocol independent connection tracking.
10 *
11 * Derived from net/ipv4/netfilter/ip_conntrack_proto_generic.c
12 */ 7 */
13 8
14#include <linux/types.h> 9#include <linux/types.h>
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 3c80558716a0..0d3254b974c5 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -7,15 +7,6 @@
7 * This program is free software; you can redistribute it and/or modify 7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as 8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation. 9 * published by the Free Software Foundation.
10 *
11 * 17 Oct 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
12 * - enable working with L3 protocol independent connection tracking.
13 *
14 * Derived from net/ipv4/ip_conntrack_sctp.c
15 */
16
17/*
18 * Added support for proc manipulation of timeouts.
19 */ 10 */
20 11
21#include <linux/types.h> 12#include <linux/types.h>
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 153d6619993a..ccdd5d231e0d 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -4,24 +4,6 @@
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as 5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation. 6 * published by the Free Software Foundation.
7 *
8 * Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>:
9 * - Real stateful connection tracking
10 * - Modified state transitions table
11 * - Window scaling support added
12 * - SACK support added
13 *
14 * Willy Tarreau:
15 * - State table bugfixes
16 * - More robust state changes
17 * - Tuning timer parameters
18 *
19 * 27 Oct 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
20 * - genelized Layer 3 protocol part.
21 *
22 * Derived from net/ipv4/netfilter/ip_conntrack_proto_tcp.c
23 *
24 * version 2.2
25 */ 7 */
26 8
27#include <linux/types.h> 9#include <linux/types.h>
@@ -470,11 +452,10 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
470 452
471 /* Fast path for timestamp-only option */ 453 /* Fast path for timestamp-only option */
472 if (length == TCPOLEN_TSTAMP_ALIGNED*4 454 if (length == TCPOLEN_TSTAMP_ALIGNED*4
473 && *(__be32 *)ptr == 455 && *(__be32 *)ptr == htonl((TCPOPT_NOP << 24)
474 __constant_htonl((TCPOPT_NOP << 24) 456 | (TCPOPT_NOP << 16)
475 | (TCPOPT_NOP << 16) 457 | (TCPOPT_TIMESTAMP << 8)
476 | (TCPOPT_TIMESTAMP << 8) 458 | TCPOLEN_TIMESTAMP))
477 | TCPOLEN_TIMESTAMP))
478 return; 459 return;
479 460
480 while (length > 0) { 461 while (length > 0) {
@@ -765,26 +746,18 @@ EXPORT_SYMBOL_GPL(nf_conntrack_tcp_update);
765#define TH_ECE 0x40 746#define TH_ECE 0x40
766#define TH_CWR 0x80 747#define TH_CWR 0x80
767 748
768/* table of valid flag combinations - ECE and CWR are always valid */ 749/* table of valid flag combinations - PUSH, ECE and CWR are always valid */
769static u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG) + 1] = 750static u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_ACK|TH_URG) + 1] =
770{ 751{
771 [TH_SYN] = 1, 752 [TH_SYN] = 1,
772 [TH_SYN|TH_PUSH] = 1,
773 [TH_SYN|TH_URG] = 1, 753 [TH_SYN|TH_URG] = 1,
774 [TH_SYN|TH_PUSH|TH_URG] = 1,
775 [TH_SYN|TH_ACK] = 1, 754 [TH_SYN|TH_ACK] = 1,
776 [TH_SYN|TH_ACK|TH_PUSH] = 1,
777 [TH_RST] = 1, 755 [TH_RST] = 1,
778 [TH_RST|TH_ACK] = 1, 756 [TH_RST|TH_ACK] = 1,
779 [TH_RST|TH_ACK|TH_PUSH] = 1,
780 [TH_FIN|TH_ACK] = 1, 757 [TH_FIN|TH_ACK] = 1,
758 [TH_FIN|TH_ACK|TH_URG] = 1,
781 [TH_ACK] = 1, 759 [TH_ACK] = 1,
782 [TH_ACK|TH_PUSH] = 1,
783 [TH_ACK|TH_URG] = 1, 760 [TH_ACK|TH_URG] = 1,
784 [TH_ACK|TH_URG|TH_PUSH] = 1,
785 [TH_FIN|TH_ACK|TH_PUSH] = 1,
786 [TH_FIN|TH_ACK|TH_URG] = 1,
787 [TH_FIN|TH_ACK|TH_URG|TH_PUSH] = 1,
788}; 761};
789 762
790/* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */ 763/* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */
@@ -831,7 +804,7 @@ static int tcp_error(struct sk_buff *skb,
831 } 804 }
832 805
833 /* Check TCP flags. */ 806 /* Check TCP flags. */
834 tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR)); 807 tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR|TH_PUSH));
835 if (!tcp_valid_flags[tcpflags]) { 808 if (!tcp_valid_flags[tcpflags]) {
836 if (LOG_INVALID(IPPROTO_TCP)) 809 if (LOG_INVALID(IPPROTO_TCP))
837 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 810 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
@@ -1110,11 +1083,26 @@ static int tcp_to_nfattr(struct sk_buff *skb, struct nfattr *nfa,
1110 const struct nf_conn *ct) 1083 const struct nf_conn *ct)
1111{ 1084{
1112 struct nfattr *nest_parms; 1085 struct nfattr *nest_parms;
1086 struct nf_ct_tcp_flags tmp = {};
1113 1087
1114 read_lock_bh(&tcp_lock); 1088 read_lock_bh(&tcp_lock);
1115 nest_parms = NFA_NEST(skb, CTA_PROTOINFO_TCP); 1089 nest_parms = NFA_NEST(skb, CTA_PROTOINFO_TCP);
1116 NFA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t), 1090 NFA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t),
1117 &ct->proto.tcp.state); 1091 &ct->proto.tcp.state);
1092
1093 NFA_PUT(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL, sizeof(u_int8_t),
1094 &ct->proto.tcp.seen[0].td_scale);
1095
1096 NFA_PUT(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY, sizeof(u_int8_t),
1097 &ct->proto.tcp.seen[1].td_scale);
1098
1099 tmp.flags = ct->proto.tcp.seen[0].flags;
1100 NFA_PUT(skb, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL,
1101 sizeof(struct nf_ct_tcp_flags), &tmp);
1102
1103 tmp.flags = ct->proto.tcp.seen[1].flags;
1104 NFA_PUT(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY,
1105 sizeof(struct nf_ct_tcp_flags), &tmp);
1118 read_unlock_bh(&tcp_lock); 1106 read_unlock_bh(&tcp_lock);
1119 1107
1120 NFA_NEST_END(skb, nest_parms); 1108 NFA_NEST_END(skb, nest_parms);
@@ -1127,7 +1115,11 @@ nfattr_failure:
1127} 1115}
1128 1116
1129static const size_t cta_min_tcp[CTA_PROTOINFO_TCP_MAX] = { 1117static const size_t cta_min_tcp[CTA_PROTOINFO_TCP_MAX] = {
1130 [CTA_PROTOINFO_TCP_STATE-1] = sizeof(u_int8_t), 1118 [CTA_PROTOINFO_TCP_STATE-1] = sizeof(u_int8_t),
1119 [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL-1] = sizeof(u_int8_t),
1120 [CTA_PROTOINFO_TCP_WSCALE_REPLY-1] = sizeof(u_int8_t),
1121 [CTA_PROTOINFO_TCP_FLAGS_ORIGINAL-1] = sizeof(struct nf_ct_tcp_flags),
1122 [CTA_PROTOINFO_TCP_FLAGS_REPLY-1] = sizeof(struct nf_ct_tcp_flags)
1131}; 1123};
1132 1124
1133static int nfattr_to_tcp(struct nfattr *cda[], struct nf_conn *ct) 1125static int nfattr_to_tcp(struct nfattr *cda[], struct nf_conn *ct)
@@ -1151,6 +1143,30 @@ static int nfattr_to_tcp(struct nfattr *cda[], struct nf_conn *ct)
1151 write_lock_bh(&tcp_lock); 1143 write_lock_bh(&tcp_lock);
1152 ct->proto.tcp.state = 1144 ct->proto.tcp.state =
1153 *(u_int8_t *)NFA_DATA(tb[CTA_PROTOINFO_TCP_STATE-1]); 1145 *(u_int8_t *)NFA_DATA(tb[CTA_PROTOINFO_TCP_STATE-1]);
1146
1147 if (tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL-1]) {
1148 struct nf_ct_tcp_flags *attr =
1149 NFA_DATA(tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL-1]);
1150 ct->proto.tcp.seen[0].flags &= ~attr->mask;
1151 ct->proto.tcp.seen[0].flags |= attr->flags & attr->mask;
1152 }
1153
1154 if (tb[CTA_PROTOINFO_TCP_FLAGS_REPLY-1]) {
1155 struct nf_ct_tcp_flags *attr =
1156 NFA_DATA(tb[CTA_PROTOINFO_TCP_FLAGS_REPLY-1]);
1157 ct->proto.tcp.seen[1].flags &= ~attr->mask;
1158 ct->proto.tcp.seen[1].flags |= attr->flags & attr->mask;
1159 }
1160
1161 if (tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL-1] &&
1162 tb[CTA_PROTOINFO_TCP_WSCALE_REPLY-1] &&
1163 ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
1164 ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
1165 ct->proto.tcp.seen[0].td_scale = *(u_int8_t *)
1166 NFA_DATA(tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL-1]);
1167 ct->proto.tcp.seen[1].td_scale = *(u_int8_t *)
1168 NFA_DATA(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY-1]);
1169 }
1154 write_unlock_bh(&tcp_lock); 1170 write_unlock_bh(&tcp_lock);
1155 1171
1156 return 0; 1172 return 0;
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index a5e5726ec0c7..3620ecc095fd 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -4,11 +4,6 @@
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as 5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation. 6 * published by the Free Software Foundation.
7 *
8 * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
9 * - enable working with Layer 3 protocol independent connection tracking.
10 *
11 * Derived from net/ipv4/netfilter/ip_conntrack_proto_udp.c
12 */ 7 */
13 8
14#include <linux/types.h> 9#include <linux/types.h>
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index b8586360e519..45baeb0e30f9 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -1,20 +1,9 @@
1/* This file contains all the functions required for the standalone
2 nf_conntrack module.
3
4 These are not required by the compatibility layer.
5*/
6
7/* (C) 1999-2001 Paul `Rusty' Russell 1/* (C) 1999-2001 Paul `Rusty' Russell
8 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> 2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
9 * 3 *
10 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2 as 5 * it under the terms of the GNU General Public License version 2 as
12 * published by the Free Software Foundation. 6 * published by the Free Software Foundation.
13 *
14 * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
15 * - generalize L3 protocol dependent part.
16 *
17 * Derived from net/ipv4/netfilter/ip_conntrack_standalone.c
18 */ 7 */
19 8
20#include <linux/types.h> 9#include <linux/types.h>
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index bf23e489e4cd..8797e6953ef2 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -3,7 +3,7 @@
3 * 3 *
4 * (C) 2001 by Jay Schulist <jschlst@samba.org>, 4 * (C) 2001 by Jay Schulist <jschlst@samba.org>,
5 * (C) 2002-2005 by Harald Welte <laforge@gnumonks.org> 5 * (C) 2002-2005 by Harald Welte <laforge@gnumonks.org>
6 * (C) 2005 by Pablo Neira Ayuso <pablo@eurodev.net> 6 * (C) 2005,2007 by Pablo Neira Ayuso <pablo@netfilter.org>
7 * 7 *
8 * Initial netfilter messages via netlink development funded and 8 * Initial netfilter messages via netlink development funded and
9 * generally made possible by Network Robots, Inc. (www.networkrobots.com) 9 * generally made possible by Network Robots, Inc. (www.networkrobots.com)
@@ -28,10 +28,9 @@
28#include <asm/uaccess.h> 28#include <asm/uaccess.h>
29#include <asm/system.h> 29#include <asm/system.h>
30#include <net/sock.h> 30#include <net/sock.h>
31#include <net/netlink.h>
31#include <linux/init.h> 32#include <linux/init.h>
32#include <linux/spinlock.h>
33 33
34#include <linux/netfilter.h>
35#include <linux/netlink.h> 34#include <linux/netlink.h>
36#include <linux/netfilter/nfnetlink.h> 35#include <linux/netfilter/nfnetlink.h>
37 36
@@ -41,32 +40,34 @@ MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER);
41 40
42static char __initdata nfversion[] = "0.30"; 41static char __initdata nfversion[] = "0.30";
43 42
44#if 0
45#define DEBUGP(format, args...) \
46 printk(KERN_DEBUG "%s(%d):%s(): " format, __FILE__, \
47 __LINE__, __FUNCTION__, ## args)
48#else
49#define DEBUGP(format, args...)
50#endif
51
52static struct sock *nfnl = NULL; 43static struct sock *nfnl = NULL;
53static struct nfnetlink_subsystem *subsys_table[NFNL_SUBSYS_COUNT]; 44static struct nfnetlink_subsystem *subsys_table[NFNL_SUBSYS_COUNT];
54DECLARE_MUTEX(nfnl_sem); 45static DEFINE_MUTEX(nfnl_mutex);
55 46
56void nfnl_lock(void) 47static void nfnl_lock(void)
57{ 48{
58 nfnl_shlock(); 49 mutex_lock(&nfnl_mutex);
59} 50}
60 51
61void nfnl_unlock(void) 52static int nfnl_trylock(void)
62{ 53{
63 nfnl_shunlock(); 54 return !mutex_trylock(&nfnl_mutex);
64} 55}
65 56
66int nfnetlink_subsys_register(struct nfnetlink_subsystem *n) 57static void __nfnl_unlock(void)
67{ 58{
68 DEBUGP("registering subsystem ID %u\n", n->subsys_id); 59 mutex_unlock(&nfnl_mutex);
60}
61
62static void nfnl_unlock(void)
63{
64 mutex_unlock(&nfnl_mutex);
65 if (nfnl->sk_receive_queue.qlen)
66 nfnl->sk_data_ready(nfnl, 0);
67}
69 68
69int nfnetlink_subsys_register(struct nfnetlink_subsystem *n)
70{
70 nfnl_lock(); 71 nfnl_lock();
71 if (subsys_table[n->subsys_id]) { 72 if (subsys_table[n->subsys_id]) {
72 nfnl_unlock(); 73 nfnl_unlock();
@@ -77,24 +78,23 @@ int nfnetlink_subsys_register(struct nfnetlink_subsystem *n)
77 78
78 return 0; 79 return 0;
79} 80}
81EXPORT_SYMBOL_GPL(nfnetlink_subsys_register);
80 82
81int nfnetlink_subsys_unregister(struct nfnetlink_subsystem *n) 83int nfnetlink_subsys_unregister(struct nfnetlink_subsystem *n)
82{ 84{
83 DEBUGP("unregistering subsystem ID %u\n", n->subsys_id);
84
85 nfnl_lock(); 85 nfnl_lock();
86 subsys_table[n->subsys_id] = NULL; 86 subsys_table[n->subsys_id] = NULL;
87 nfnl_unlock(); 87 nfnl_unlock();
88 88
89 return 0; 89 return 0;
90} 90}
91EXPORT_SYMBOL_GPL(nfnetlink_subsys_unregister);
91 92
92static inline struct nfnetlink_subsystem *nfnetlink_get_subsys(u_int16_t type) 93static inline struct nfnetlink_subsystem *nfnetlink_get_subsys(u_int16_t type)
93{ 94{
94 u_int8_t subsys_id = NFNL_SUBSYS_ID(type); 95 u_int8_t subsys_id = NFNL_SUBSYS_ID(type);
95 96
96 if (subsys_id >= NFNL_SUBSYS_COUNT 97 if (subsys_id >= NFNL_SUBSYS_COUNT)
97 || subsys_table[subsys_id] == NULL)
98 return NULL; 98 return NULL;
99 99
100 return subsys_table[subsys_id]; 100 return subsys_table[subsys_id];
@@ -105,10 +105,8 @@ nfnetlink_find_client(u_int16_t type, struct nfnetlink_subsystem *ss)
105{ 105{
106 u_int8_t cb_id = NFNL_MSG_TYPE(type); 106 u_int8_t cb_id = NFNL_MSG_TYPE(type);
107 107
108 if (cb_id >= ss->cb_count) { 108 if (cb_id >= ss->cb_count)
109 DEBUGP("msgtype %u >= %u, returning\n", type, ss->cb_count);
110 return NULL; 109 return NULL;
111 }
112 110
113 return &ss->cb[cb_id]; 111 return &ss->cb[cb_id];
114} 112}
@@ -125,6 +123,7 @@ void __nfa_fill(struct sk_buff *skb, int attrtype, int attrlen,
125 memcpy(NFA_DATA(nfa), data, attrlen); 123 memcpy(NFA_DATA(nfa), data, attrlen);
126 memset(NFA_DATA(nfa) + attrlen, 0, NFA_ALIGN(size) - size); 124 memset(NFA_DATA(nfa) + attrlen, 0, NFA_ALIGN(size) - size);
127} 125}
126EXPORT_SYMBOL_GPL(__nfa_fill);
128 127
129void nfattr_parse(struct nfattr *tb[], int maxattr, struct nfattr *nfa, int len) 128void nfattr_parse(struct nfattr *tb[], int maxattr, struct nfattr *nfa, int len)
130{ 129{
@@ -137,6 +136,7 @@ void nfattr_parse(struct nfattr *tb[], int maxattr, struct nfattr *nfa, int len)
137 nfa = NFA_NEXT(nfa, len); 136 nfa = NFA_NEXT(nfa, len);
138 } 137 }
139} 138}
139EXPORT_SYMBOL_GPL(nfattr_parse);
140 140
141/** 141/**
142 * nfnetlink_check_attributes - check and parse nfnetlink attributes 142 * nfnetlink_check_attributes - check and parse nfnetlink attributes
@@ -150,37 +150,15 @@ static int
150nfnetlink_check_attributes(struct nfnetlink_subsystem *subsys, 150nfnetlink_check_attributes(struct nfnetlink_subsystem *subsys,
151 struct nlmsghdr *nlh, struct nfattr *cda[]) 151 struct nlmsghdr *nlh, struct nfattr *cda[])
152{ 152{
153 int min_len; 153 int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg));
154 u_int16_t attr_count;
155 u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type); 154 u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type);
156 155 u_int16_t attr_count = subsys->cb[cb_id].attr_count;
157 if (unlikely(cb_id >= subsys->cb_count)) {
158 DEBUGP("msgtype %u >= %u, returning\n",
159 cb_id, subsys->cb_count);
160 return -EINVAL;
161 }
162
163 min_len = NLMSG_SPACE(sizeof(struct nfgenmsg));
164 if (unlikely(nlh->nlmsg_len < min_len))
165 return -EINVAL;
166
167 attr_count = subsys->cb[cb_id].attr_count;
168 memset(cda, 0, sizeof(struct nfattr *) * attr_count);
169 156
170 /* check attribute lengths. */ 157 /* check attribute lengths. */
171 if (likely(nlh->nlmsg_len > min_len)) { 158 if (likely(nlh->nlmsg_len > min_len)) {
172 struct nfattr *attr = NFM_NFA(NLMSG_DATA(nlh)); 159 struct nfattr *attr = NFM_NFA(NLMSG_DATA(nlh));
173 int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len); 160 int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len);
174 161 nfattr_parse(cda, attr_count, attr, attrlen);
175 while (NFA_OK(attr, attrlen)) {
176 unsigned flavor = NFA_TYPE(attr);
177 if (flavor) {
178 if (flavor > attr_count)
179 return -EINVAL;
180 cda[flavor - 1] = attr;
181 }
182 attr = NFA_NEXT(attr, attrlen);
183 }
184 } 162 }
185 163
186 /* implicit: if nlmsg_len == min_len, we return 0, and an empty 164 /* implicit: if nlmsg_len == min_len, we return 0, and an empty
@@ -208,62 +186,46 @@ int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo)
208 186
209 return err; 187 return err;
210} 188}
189EXPORT_SYMBOL_GPL(nfnetlink_send);
211 190
212int nfnetlink_unicast(struct sk_buff *skb, u_int32_t pid, int flags) 191int nfnetlink_unicast(struct sk_buff *skb, u_int32_t pid, int flags)
213{ 192{
214 return netlink_unicast(nfnl, skb, pid, flags); 193 return netlink_unicast(nfnl, skb, pid, flags);
215} 194}
195EXPORT_SYMBOL_GPL(nfnetlink_unicast);
216 196
217/* Process one complete nfnetlink message. */ 197/* Process one complete nfnetlink message. */
218static int nfnetlink_rcv_msg(struct sk_buff *skb, 198static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
219 struct nlmsghdr *nlh, int *errp)
220{ 199{
221 struct nfnl_callback *nc; 200 struct nfnl_callback *nc;
222 struct nfnetlink_subsystem *ss; 201 struct nfnetlink_subsystem *ss;
223 int type, err = 0; 202 int type, err;
224
225 DEBUGP("entered; subsys=%u, msgtype=%u\n",
226 NFNL_SUBSYS_ID(nlh->nlmsg_type),
227 NFNL_MSG_TYPE(nlh->nlmsg_type));
228
229 if (security_netlink_recv(skb, CAP_NET_ADMIN)) {
230 DEBUGP("missing CAP_NET_ADMIN\n");
231 *errp = -EPERM;
232 return -1;
233 }
234 203
235 /* Only requests are handled by kernel now. */ 204 if (security_netlink_recv(skb, CAP_NET_ADMIN))
236 if (!(nlh->nlmsg_flags & NLM_F_REQUEST)) { 205 return -EPERM;
237 DEBUGP("received non-request message\n");
238 return 0;
239 }
240 206
241 /* All the messages must at least contain nfgenmsg */ 207 /* All the messages must at least contain nfgenmsg */
242 if (nlh->nlmsg_len < NLMSG_SPACE(sizeof(struct nfgenmsg))) { 208 if (nlh->nlmsg_len < NLMSG_SPACE(sizeof(struct nfgenmsg)))
243 DEBUGP("received message was too short\n");
244 return 0; 209 return 0;
245 }
246 210
247 type = nlh->nlmsg_type; 211 type = nlh->nlmsg_type;
248 ss = nfnetlink_get_subsys(type); 212 ss = nfnetlink_get_subsys(type);
249 if (!ss) { 213 if (!ss) {
250#ifdef CONFIG_KMOD 214#ifdef CONFIG_KMOD
251 /* don't call nfnl_shunlock, since it would reenter 215 /* don't call nfnl_unlock, since it would reenter
252 * with further packet processing */ 216 * with further packet processing */
253 up(&nfnl_sem); 217 __nfnl_unlock();
254 request_module("nfnetlink-subsys-%d", NFNL_SUBSYS_ID(type)); 218 request_module("nfnetlink-subsys-%d", NFNL_SUBSYS_ID(type));
255 nfnl_shlock(); 219 nfnl_lock();
256 ss = nfnetlink_get_subsys(type); 220 ss = nfnetlink_get_subsys(type);
257 if (!ss) 221 if (!ss)
258#endif 222#endif
259 goto err_inval; 223 return -EINVAL;
260 } 224 }
261 225
262 nc = nfnetlink_find_client(type, ss); 226 nc = nfnetlink_find_client(type, ss);
263 if (!nc) { 227 if (!nc)
264 DEBUGP("unable to find client for type %d\n", type); 228 return -EINVAL;
265 goto err_inval;
266 }
267 229
268 { 230 {
269 u_int16_t attr_count = 231 u_int16_t attr_count =
@@ -274,73 +236,21 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb,
274 236
275 err = nfnetlink_check_attributes(ss, nlh, cda); 237 err = nfnetlink_check_attributes(ss, nlh, cda);
276 if (err < 0) 238 if (err < 0)
277 goto err_inval; 239 return err;
278 240 return nc->call(nfnl, skb, nlh, cda);
279 DEBUGP("calling handler\n");
280 err = nc->call(nfnl, skb, nlh, cda, errp);
281 *errp = err;
282 return err;
283 }
284
285err_inval:
286 DEBUGP("returning -EINVAL\n");
287 *errp = -EINVAL;
288 return -1;
289}
290
291/* Process one packet of messages. */
292static inline int nfnetlink_rcv_skb(struct sk_buff *skb)
293{
294 int err;
295 struct nlmsghdr *nlh;
296
297 while (skb->len >= NLMSG_SPACE(0)) {
298 u32 rlen;
299
300 nlh = (struct nlmsghdr *)skb->data;
301 if (nlh->nlmsg_len < sizeof(struct nlmsghdr)
302 || skb->len < nlh->nlmsg_len)
303 return 0;
304 rlen = NLMSG_ALIGN(nlh->nlmsg_len);
305 if (rlen > skb->len)
306 rlen = skb->len;
307 if (nfnetlink_rcv_msg(skb, nlh, &err)) {
308 if (!err)
309 return -1;
310 netlink_ack(skb, nlh, err);
311 } else
312 if (nlh->nlmsg_flags & NLM_F_ACK)
313 netlink_ack(skb, nlh, 0);
314 skb_pull(skb, rlen);
315 } 241 }
316
317 return 0;
318} 242}
319 243
320static void nfnetlink_rcv(struct sock *sk, int len) 244static void nfnetlink_rcv(struct sock *sk, int len)
321{ 245{
322 do { 246 unsigned int qlen = 0;
323 struct sk_buff *skb;
324 247
325 if (nfnl_shlock_nowait()) 248 do {
249 if (nfnl_trylock())
326 return; 250 return;
327 251 netlink_run_queue(sk, &qlen, nfnetlink_rcv_msg);
328 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { 252 __nfnl_unlock();
329 if (nfnetlink_rcv_skb(skb)) { 253 } while (qlen);
330 if (skb->len)
331 skb_queue_head(&sk->sk_receive_queue,
332 skb);
333 else
334 kfree_skb(skb);
335 break;
336 }
337 kfree_skb(skb);
338 }
339
340 /* don't call nfnl_shunlock, since it would reenter
341 * with further packet processing */
342 up(&nfnl_sem);
343 } while(nfnl && nfnl->sk_receive_queue.qlen);
344} 254}
345 255
346static void __exit nfnetlink_exit(void) 256static void __exit nfnetlink_exit(void)
@@ -355,7 +265,7 @@ static int __init nfnetlink_init(void)
355 printk("Netfilter messages via NETLINK v%s.\n", nfversion); 265 printk("Netfilter messages via NETLINK v%s.\n", nfversion);
356 266
357 nfnl = netlink_kernel_create(NETLINK_NETFILTER, NFNLGRP_MAX, 267 nfnl = netlink_kernel_create(NETLINK_NETFILTER, NFNLGRP_MAX,
358 nfnetlink_rcv, THIS_MODULE); 268 nfnetlink_rcv, NULL, THIS_MODULE);
359 if (!nfnl) { 269 if (!nfnl) {
360 printk(KERN_ERR "cannot initialize nfnetlink!\n"); 270 printk(KERN_ERR "cannot initialize nfnetlink!\n");
361 return -1; 271 return -1;
@@ -366,10 +276,3 @@ static int __init nfnetlink_init(void)
366 276
367module_init(nfnetlink_init); 277module_init(nfnetlink_init);
368module_exit(nfnetlink_exit); 278module_exit(nfnetlink_exit);
369
370EXPORT_SYMBOL_GPL(nfnetlink_subsys_register);
371EXPORT_SYMBOL_GPL(nfnetlink_subsys_unregister);
372EXPORT_SYMBOL_GPL(nfnetlink_send);
373EXPORT_SYMBOL_GPL(nfnetlink_unicast);
374EXPORT_SYMBOL_GPL(nfattr_parse);
375EXPORT_SYMBOL_GPL(__nfa_fill);
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 5cb30ebba0f4..e32e30e7a17c 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -10,11 +10,6 @@
10 * This program is free software; you can redistribute it and/or modify 10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2 as 11 * it under the terms of the GNU General Public License version 2 as
12 * published by the Free Software Foundation. 12 * published by the Free Software Foundation.
13 *
14 * 2006-01-26 Harald Welte <laforge@netfilter.org>
15 * - Add optional local and global sequence number to detect lost
16 * events from userspace
17 *
18 */ 13 */
19#include <linux/module.h> 14#include <linux/module.h>
20#include <linux/skbuff.h> 15#include <linux/skbuff.h>
@@ -163,10 +158,7 @@ instance_create(u_int16_t group_num, int pid)
163 /* needs to be two, since we _put() after creation */ 158 /* needs to be two, since we _put() after creation */
164 atomic_set(&inst->use, 2); 159 atomic_set(&inst->use, 2);
165 160
166 init_timer(&inst->timer); 161 setup_timer(&inst->timer, nfulnl_timer, (unsigned long)inst);
167 inst->timer.function = nfulnl_timer;
168 inst->timer.data = (unsigned long)inst;
169 /* don't start timer yet. (re)start it with every packet */
170 162
171 inst->peer_pid = pid; 163 inst->peer_pid = pid;
172 inst->group_num = group_num; 164 inst->group_num = group_num;
@@ -200,20 +192,14 @@ out_unlock:
200static int __nfulnl_send(struct nfulnl_instance *inst); 192static int __nfulnl_send(struct nfulnl_instance *inst);
201 193
202static void 194static void
203_instance_destroy2(struct nfulnl_instance *inst, int lock) 195__instance_destroy(struct nfulnl_instance *inst)
204{ 196{
205 /* first pull it out of the global list */ 197 /* first pull it out of the global list */
206 if (lock)
207 write_lock_bh(&instances_lock);
208
209 UDEBUG("removing instance %p (queuenum=%u) from hash\n", 198 UDEBUG("removing instance %p (queuenum=%u) from hash\n",
210 inst, inst->group_num); 199 inst, inst->group_num);
211 200
212 hlist_del(&inst->hlist); 201 hlist_del(&inst->hlist);
213 202
214 if (lock)
215 write_unlock_bh(&instances_lock);
216
217 /* then flush all pending packets from skb */ 203 /* then flush all pending packets from skb */
218 204
219 spin_lock_bh(&inst->lock); 205 spin_lock_bh(&inst->lock);
@@ -235,15 +221,11 @@ _instance_destroy2(struct nfulnl_instance *inst, int lock)
235} 221}
236 222
237static inline void 223static inline void
238__instance_destroy(struct nfulnl_instance *inst)
239{
240 _instance_destroy2(inst, 0);
241}
242
243static inline void
244instance_destroy(struct nfulnl_instance *inst) 224instance_destroy(struct nfulnl_instance *inst)
245{ 225{
246 _instance_destroy2(inst, 1); 226 write_lock_bh(&instances_lock);
227 __instance_destroy(inst);
228 write_unlock_bh(&instances_lock);
247} 229}
248 230
249static int 231static int
@@ -365,9 +347,6 @@ __nfulnl_send(struct nfulnl_instance *inst)
365{ 347{
366 int status; 348 int status;
367 349
368 if (!inst->skb)
369 return 0;
370
371 if (inst->qlen > 1) 350 if (inst->qlen > 1)
372 inst->lastnlh->nlmsg_type = NLMSG_DONE; 351 inst->lastnlh->nlmsg_type = NLMSG_DONE;
373 352
@@ -391,7 +370,8 @@ static void nfulnl_timer(unsigned long data)
391 UDEBUG("timer function called, flushing buffer\n"); 370 UDEBUG("timer function called, flushing buffer\n");
392 371
393 spin_lock_bh(&inst->lock); 372 spin_lock_bh(&inst->lock);
394 __nfulnl_send(inst); 373 if (inst->skb)
374 __nfulnl_send(inst);
395 spin_unlock_bh(&inst->lock); 375 spin_unlock_bh(&inst->lock);
396 instance_put(inst); 376 instance_put(inst);
397} 377}
@@ -409,15 +389,14 @@ __build_packet_message(struct nfulnl_instance *inst,
409 const struct nf_loginfo *li, 389 const struct nf_loginfo *li,
410 const char *prefix, unsigned int plen) 390 const char *prefix, unsigned int plen)
411{ 391{
412 unsigned char *old_tail;
413 struct nfulnl_msg_packet_hdr pmsg; 392 struct nfulnl_msg_packet_hdr pmsg;
414 struct nlmsghdr *nlh; 393 struct nlmsghdr *nlh;
415 struct nfgenmsg *nfmsg; 394 struct nfgenmsg *nfmsg;
416 __be32 tmp_uint; 395 __be32 tmp_uint;
396 sk_buff_data_t old_tail = inst->skb->tail;
417 397
418 UDEBUG("entered\n"); 398 UDEBUG("entered\n");
419 399
420 old_tail = inst->skb->tail;
421 nlh = NLMSG_PUT(inst->skb, 0, 0, 400 nlh = NLMSG_PUT(inst->skb, 0, 0,
422 NFNL_SUBSYS_ULOG << 8 | NFULNL_MSG_PACKET, 401 NFNL_SUBSYS_ULOG << 8 | NFULNL_MSG_PACKET,
423 sizeof(struct nfgenmsg)); 402 sizeof(struct nfgenmsg));
@@ -509,11 +488,11 @@ __build_packet_message(struct nfulnl_instance *inst,
509 NFA_PUT(inst->skb, NFULA_HWADDR, sizeof(phw), &phw); 488 NFA_PUT(inst->skb, NFULA_HWADDR, sizeof(phw), &phw);
510 } 489 }
511 490
512 if (skb->tstamp.off_sec) { 491 if (skb->tstamp.tv64) {
513 struct nfulnl_msg_packet_timestamp ts; 492 struct nfulnl_msg_packet_timestamp ts;
514 493 struct timeval tv = ktime_to_timeval(skb->tstamp);
515 ts.sec = cpu_to_be64(skb->tstamp.off_sec); 494 ts.sec = cpu_to_be64(tv.tv_sec);
516 ts.usec = cpu_to_be64(skb->tstamp.off_usec); 495 ts.usec = cpu_to_be64(tv.tv_usec);
517 496
518 NFA_PUT(inst->skb, NFULA_TIMESTAMP, sizeof(ts), &ts); 497 NFA_PUT(inst->skb, NFULA_TIMESTAMP, sizeof(ts), &ts);
519 } 498 }
@@ -596,7 +575,6 @@ nfulnl_log_packet(unsigned int pf,
596 struct nfulnl_instance *inst; 575 struct nfulnl_instance *inst;
597 const struct nf_loginfo *li; 576 const struct nf_loginfo *li;
598 unsigned int qthreshold; 577 unsigned int qthreshold;
599 unsigned int nlbufsiz;
600 unsigned int plen; 578 unsigned int plen;
601 579
602 if (li_user && li_user->type == NF_LOG_TYPE_ULOG) 580 if (li_user && li_user->type == NF_LOG_TYPE_ULOG)
@@ -606,12 +584,7 @@ nfulnl_log_packet(unsigned int pf,
606 584
607 inst = instance_lookup_get(li->u.ulog.group); 585 inst = instance_lookup_get(li->u.ulog.group);
608 if (!inst) 586 if (!inst)
609 inst = instance_lookup_get(0);
610 if (!inst) {
611 PRINTR("nfnetlink_log: trying to log packet, "
612 "but no instance for group %u\n", li->u.ulog.group);
613 return; 587 return;
614 }
615 588
616 plen = 0; 589 plen = 0;
617 if (prefix) 590 if (prefix)
@@ -667,24 +640,11 @@ nfulnl_log_packet(unsigned int pf,
667 break; 640 break;
668 641
669 default: 642 default:
670 spin_unlock_bh(&inst->lock); 643 goto unlock_and_release;
671 instance_put(inst);
672 return;
673 } 644 }
674 645
675 if (size > inst->nlbufsiz) 646 if (inst->qlen >= qthreshold ||
676 nlbufsiz = size; 647 (inst->skb && size > skb_tailroom(inst->skb))) {
677 else
678 nlbufsiz = inst->nlbufsiz;
679
680 if (!inst->skb) {
681 if (!(inst->skb = nfulnl_alloc_skb(nlbufsiz, size))) {
682 UDEBUG("error in nfulnl_alloc_skb(%u, %u)\n",
683 inst->nlbufsiz, size);
684 goto alloc_failure;
685 }
686 } else if (inst->qlen >= qthreshold ||
687 size > skb_tailroom(inst->skb)) {
688 /* either the queue len is too high or we don't have 648 /* either the queue len is too high or we don't have
689 * enough room in the skb left. flush to userspace. */ 649 * enough room in the skb left. flush to userspace. */
690 UDEBUG("flushing old skb\n"); 650 UDEBUG("flushing old skb\n");
@@ -693,12 +653,12 @@ nfulnl_log_packet(unsigned int pf,
693 if (del_timer(&inst->timer)) 653 if (del_timer(&inst->timer))
694 instance_put(inst); 654 instance_put(inst);
695 __nfulnl_send(inst); 655 __nfulnl_send(inst);
656 }
696 657
697 if (!(inst->skb = nfulnl_alloc_skb(nlbufsiz, size))) { 658 if (!inst->skb) {
698 UDEBUG("error in nfulnl_alloc_skb(%u, %u)\n", 659 inst->skb = nfulnl_alloc_skb(inst->nlbufsiz, size);
699 inst->nlbufsiz, size); 660 if (!inst->skb)
700 goto alloc_failure; 661 goto alloc_failure;
701 }
702 } 662 }
703 663
704 UDEBUG("qlen %d, qthreshold %d\n", inst->qlen, qthreshold); 664 UDEBUG("qlen %d, qthreshold %d\n", inst->qlen, qthreshold);
@@ -760,7 +720,7 @@ static struct notifier_block nfulnl_rtnl_notifier = {
760 720
761static int 721static int
762nfulnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb, 722nfulnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb,
763 struct nlmsghdr *nlh, struct nfattr *nfqa[], int *errp) 723 struct nlmsghdr *nlh, struct nfattr *nfqa[])
764{ 724{
765 return -ENOTSUPP; 725 return -ENOTSUPP;
766} 726}
@@ -798,7 +758,7 @@ static const int nfula_cfg_min[NFULA_CFG_MAX] = {
798 758
799static int 759static int
800nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb, 760nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
801 struct nlmsghdr *nlh, struct nfattr *nfula[], int *errp) 761 struct nlmsghdr *nlh, struct nfattr *nfula[])
802{ 762{
803 struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); 763 struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
804 u_int16_t group_num = ntohs(nfmsg->res_id); 764 u_int16_t group_num = ntohs(nfmsg->res_id);
@@ -830,13 +790,13 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
830 NETLINK_CB(skb).pid); 790 NETLINK_CB(skb).pid);
831 if (!inst) { 791 if (!inst) {
832 ret = -EINVAL; 792 ret = -EINVAL;
833 goto out_put; 793 goto out;
834 } 794 }
835 break; 795 break;
836 case NFULNL_CFG_CMD_UNBIND: 796 case NFULNL_CFG_CMD_UNBIND:
837 if (!inst) { 797 if (!inst) {
838 ret = -ENODEV; 798 ret = -ENODEV;
839 goto out_put; 799 goto out;
840 } 800 }
841 801
842 if (inst->peer_pid != NETLINK_CB(skb).pid) { 802 if (inst->peer_pid != NETLINK_CB(skb).pid) {
@@ -845,7 +805,7 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
845 } 805 }
846 806
847 instance_destroy(inst); 807 instance_destroy(inst);
848 break; 808 goto out;
849 case NFULNL_CFG_CMD_PF_BIND: 809 case NFULNL_CFG_CMD_PF_BIND:
850 UDEBUG("registering log handler for pf=%u\n", pf); 810 UDEBUG("registering log handler for pf=%u\n", pf);
851 ret = nf_log_register(pf, &nfulnl_logger); 811 ret = nf_log_register(pf, &nfulnl_logger);
@@ -869,7 +829,7 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
869 "group=%u pid=%u =>ENOENT\n", 829 "group=%u pid=%u =>ENOENT\n",
870 group_num, NETLINK_CB(skb).pid); 830 group_num, NETLINK_CB(skb).pid);
871 ret = -ENOENT; 831 ret = -ENOENT;
872 goto out_put; 832 goto out;
873 } 833 }
874 834
875 if (inst->peer_pid != NETLINK_CB(skb).pid) { 835 if (inst->peer_pid != NETLINK_CB(skb).pid) {
@@ -939,10 +899,8 @@ struct iter_state {
939 unsigned int bucket; 899 unsigned int bucket;
940}; 900};
941 901
942static struct hlist_node *get_first(struct seq_file *seq) 902static struct hlist_node *get_first(struct iter_state *st)
943{ 903{
944 struct iter_state *st = seq->private;
945
946 if (!st) 904 if (!st)
947 return NULL; 905 return NULL;
948 906
@@ -953,10 +911,8 @@ static struct hlist_node *get_first(struct seq_file *seq)
953 return NULL; 911 return NULL;
954} 912}
955 913
956static struct hlist_node *get_next(struct seq_file *seq, struct hlist_node *h) 914static struct hlist_node *get_next(struct iter_state *st, struct hlist_node *h)
957{ 915{
958 struct iter_state *st = seq->private;
959
960 h = h->next; 916 h = h->next;
961 while (!h) { 917 while (!h) {
962 if (++st->bucket >= INSTANCE_BUCKETS) 918 if (++st->bucket >= INSTANCE_BUCKETS)
@@ -967,13 +923,13 @@ static struct hlist_node *get_next(struct seq_file *seq, struct hlist_node *h)
967 return h; 923 return h;
968} 924}
969 925
970static struct hlist_node *get_idx(struct seq_file *seq, loff_t pos) 926static struct hlist_node *get_idx(struct iter_state *st, loff_t pos)
971{ 927{
972 struct hlist_node *head; 928 struct hlist_node *head;
973 head = get_first(seq); 929 head = get_first(st);
974 930
975 if (head) 931 if (head)
976 while (pos && (head = get_next(seq, head))) 932 while (pos && (head = get_next(st, head)))
977 pos--; 933 pos--;
978 return pos ? NULL : head; 934 return pos ? NULL : head;
979} 935}
@@ -981,13 +937,13 @@ static struct hlist_node *get_idx(struct seq_file *seq, loff_t pos)
981static void *seq_start(struct seq_file *seq, loff_t *pos) 937static void *seq_start(struct seq_file *seq, loff_t *pos)
982{ 938{
983 read_lock_bh(&instances_lock); 939 read_lock_bh(&instances_lock);
984 return get_idx(seq, *pos); 940 return get_idx(seq->private, *pos);
985} 941}
986 942
987static void *seq_next(struct seq_file *s, void *v, loff_t *pos) 943static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
988{ 944{
989 (*pos)++; 945 (*pos)++;
990 return get_next(s, v); 946 return get_next(s->private, v);
991} 947}
992 948
993static void seq_stop(struct seq_file *s, void *v) 949static void seq_stop(struct seq_file *s, void *v)
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index d9ce4a71d0f3..7a97bec67729 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -338,7 +338,7 @@ static struct sk_buff *
338nfqnl_build_packet_message(struct nfqnl_instance *queue, 338nfqnl_build_packet_message(struct nfqnl_instance *queue,
339 struct nfqnl_queue_entry *entry, int *errp) 339 struct nfqnl_queue_entry *entry, int *errp)
340{ 340{
341 unsigned char *old_tail; 341 sk_buff_data_t old_tail;
342 size_t size; 342 size_t size;
343 size_t data_len = 0; 343 size_t data_len = 0;
344 struct sk_buff *skb; 344 struct sk_buff *skb;
@@ -404,7 +404,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
404 if (!skb) 404 if (!skb)
405 goto nlmsg_failure; 405 goto nlmsg_failure;
406 406
407 old_tail= skb->tail; 407 old_tail = skb->tail;
408 nlh = NLMSG_PUT(skb, 0, 0, 408 nlh = NLMSG_PUT(skb, 0, 0,
409 NFNL_SUBSYS_QUEUE << 8 | NFQNL_MSG_PACKET, 409 NFNL_SUBSYS_QUEUE << 8 | NFQNL_MSG_PACKET,
410 sizeof(struct nfgenmsg)); 410 sizeof(struct nfgenmsg));
@@ -495,11 +495,11 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
495 NFA_PUT(skb, NFQA_HWADDR, sizeof(phw), &phw); 495 NFA_PUT(skb, NFQA_HWADDR, sizeof(phw), &phw);
496 } 496 }
497 497
498 if (entskb->tstamp.off_sec) { 498 if (entskb->tstamp.tv64) {
499 struct nfqnl_msg_packet_timestamp ts; 499 struct nfqnl_msg_packet_timestamp ts;
500 500 struct timeval tv = ktime_to_timeval(entskb->tstamp);
501 ts.sec = cpu_to_be64(entskb->tstamp.off_sec); 501 ts.sec = cpu_to_be64(tv.tv_sec);
502 ts.usec = cpu_to_be64(entskb->tstamp.off_usec); 502 ts.usec = cpu_to_be64(tv.tv_usec);
503 503
504 NFA_PUT(skb, NFQA_TIMESTAMP, sizeof(ts), &ts); 504 NFA_PUT(skb, NFQA_TIMESTAMP, sizeof(ts), &ts);
505 } 505 }
@@ -648,7 +648,7 @@ nfqnl_mangle(void *data, int data_len, struct nfqnl_queue_entry *e)
648 } 648 }
649 if (!skb_make_writable(&e->skb, data_len)) 649 if (!skb_make_writable(&e->skb, data_len))
650 return -ENOMEM; 650 return -ENOMEM;
651 memcpy(e->skb->data, data, data_len); 651 skb_copy_to_linear_data(e->skb, data, data_len);
652 e->skb->ip_summed = CHECKSUM_NONE; 652 e->skb->ip_summed = CHECKSUM_NONE;
653 return 0; 653 return 0;
654} 654}
@@ -783,7 +783,7 @@ static const int nfqa_verdict_min[NFQA_MAX] = {
783 783
784static int 784static int
785nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, 785nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
786 struct nlmsghdr *nlh, struct nfattr *nfqa[], int *errp) 786 struct nlmsghdr *nlh, struct nfattr *nfqa[])
787{ 787{
788 struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); 788 struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
789 u_int16_t queue_num = ntohs(nfmsg->res_id); 789 u_int16_t queue_num = ntohs(nfmsg->res_id);
@@ -848,7 +848,7 @@ err_out_put:
848 848
849static int 849static int
850nfqnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb, 850nfqnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb,
851 struct nlmsghdr *nlh, struct nfattr *nfqa[], int *errp) 851 struct nlmsghdr *nlh, struct nfattr *nfqa[])
852{ 852{
853 return -ENOTSUPP; 853 return -ENOTSUPP;
854} 854}
@@ -865,7 +865,7 @@ static struct nf_queue_handler nfqh = {
865 865
866static int 866static int
867nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, 867nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
868 struct nlmsghdr *nlh, struct nfattr *nfqa[], int *errp) 868 struct nlmsghdr *nlh, struct nfattr *nfqa[])
869{ 869{
870 struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); 870 struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
871 u_int16_t queue_num = ntohs(nfmsg->res_id); 871 u_int16_t queue_num = ntohs(nfmsg->res_id);
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index ec607a421a5a..0eb2504b89b5 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -56,8 +56,8 @@ enum {
56}; 56};
57 57
58static const char *xt_prefix[NPROTO] = { 58static const char *xt_prefix[NPROTO] = {
59 [AF_INET] = "ip", 59 [AF_INET] = "ip",
60 [AF_INET6] = "ip6", 60 [AF_INET6] = "ip6",
61 [NF_ARP] = "arp", 61 [NF_ARP] = "arp",
62}; 62};
63 63
@@ -651,12 +651,6 @@ void *xt_unregister_table(struct xt_table *table)
651EXPORT_SYMBOL_GPL(xt_unregister_table); 651EXPORT_SYMBOL_GPL(xt_unregister_table);
652 652
653#ifdef CONFIG_PROC_FS 653#ifdef CONFIG_PROC_FS
654static char *xt_proto_prefix[NPROTO] = {
655 [AF_INET] = "ip",
656 [AF_INET6] = "ip6",
657 [NF_ARP] = "arp",
658};
659
660static struct list_head *xt_get_idx(struct list_head *list, struct seq_file *seq, loff_t pos) 654static struct list_head *xt_get_idx(struct list_head *list, struct seq_file *seq, loff_t pos)
661{ 655{
662 struct list_head *head = list->next; 656 struct list_head *head = list->next;
@@ -798,7 +792,7 @@ int xt_proto_init(int af)
798 792
799 793
800#ifdef CONFIG_PROC_FS 794#ifdef CONFIG_PROC_FS
801 strlcpy(buf, xt_proto_prefix[af], sizeof(buf)); 795 strlcpy(buf, xt_prefix[af], sizeof(buf));
802 strlcat(buf, FORMAT_TABLES, sizeof(buf)); 796 strlcat(buf, FORMAT_TABLES, sizeof(buf));
803 proc = proc_net_fops_create(buf, 0440, &xt_file_ops); 797 proc = proc_net_fops_create(buf, 0440, &xt_file_ops);
804 if (!proc) 798 if (!proc)
@@ -806,14 +800,14 @@ int xt_proto_init(int af)
806 proc->data = (void *) ((unsigned long) af | (TABLE << 16)); 800 proc->data = (void *) ((unsigned long) af | (TABLE << 16));
807 801
808 802
809 strlcpy(buf, xt_proto_prefix[af], sizeof(buf)); 803 strlcpy(buf, xt_prefix[af], sizeof(buf));
810 strlcat(buf, FORMAT_MATCHES, sizeof(buf)); 804 strlcat(buf, FORMAT_MATCHES, sizeof(buf));
811 proc = proc_net_fops_create(buf, 0440, &xt_file_ops); 805 proc = proc_net_fops_create(buf, 0440, &xt_file_ops);
812 if (!proc) 806 if (!proc)
813 goto out_remove_tables; 807 goto out_remove_tables;
814 proc->data = (void *) ((unsigned long) af | (MATCH << 16)); 808 proc->data = (void *) ((unsigned long) af | (MATCH << 16));
815 809
816 strlcpy(buf, xt_proto_prefix[af], sizeof(buf)); 810 strlcpy(buf, xt_prefix[af], sizeof(buf));
817 strlcat(buf, FORMAT_TARGETS, sizeof(buf)); 811 strlcat(buf, FORMAT_TARGETS, sizeof(buf));
818 proc = proc_net_fops_create(buf, 0440, &xt_file_ops); 812 proc = proc_net_fops_create(buf, 0440, &xt_file_ops);
819 if (!proc) 813 if (!proc)
@@ -825,12 +819,12 @@ int xt_proto_init(int af)
825 819
826#ifdef CONFIG_PROC_FS 820#ifdef CONFIG_PROC_FS
827out_remove_matches: 821out_remove_matches:
828 strlcpy(buf, xt_proto_prefix[af], sizeof(buf)); 822 strlcpy(buf, xt_prefix[af], sizeof(buf));
829 strlcat(buf, FORMAT_MATCHES, sizeof(buf)); 823 strlcat(buf, FORMAT_MATCHES, sizeof(buf));
830 proc_net_remove(buf); 824 proc_net_remove(buf);
831 825
832out_remove_tables: 826out_remove_tables:
833 strlcpy(buf, xt_proto_prefix[af], sizeof(buf)); 827 strlcpy(buf, xt_prefix[af], sizeof(buf));
834 strlcat(buf, FORMAT_TABLES, sizeof(buf)); 828 strlcat(buf, FORMAT_TABLES, sizeof(buf));
835 proc_net_remove(buf); 829 proc_net_remove(buf);
836out: 830out:
@@ -844,15 +838,15 @@ void xt_proto_fini(int af)
844#ifdef CONFIG_PROC_FS 838#ifdef CONFIG_PROC_FS
845 char buf[XT_FUNCTION_MAXNAMELEN]; 839 char buf[XT_FUNCTION_MAXNAMELEN];
846 840
847 strlcpy(buf, xt_proto_prefix[af], sizeof(buf)); 841 strlcpy(buf, xt_prefix[af], sizeof(buf));
848 strlcat(buf, FORMAT_TABLES, sizeof(buf)); 842 strlcat(buf, FORMAT_TABLES, sizeof(buf));
849 proc_net_remove(buf); 843 proc_net_remove(buf);
850 844
851 strlcpy(buf, xt_proto_prefix[af], sizeof(buf)); 845 strlcpy(buf, xt_prefix[af], sizeof(buf));
852 strlcat(buf, FORMAT_TARGETS, sizeof(buf)); 846 strlcat(buf, FORMAT_TARGETS, sizeof(buf));
853 proc_net_remove(buf); 847 proc_net_remove(buf);
854 848
855 strlcpy(buf, xt_proto_prefix[af], sizeof(buf)); 849 strlcpy(buf, xt_prefix[af], sizeof(buf));
856 strlcat(buf, FORMAT_MATCHES, sizeof(buf)); 850 strlcat(buf, FORMAT_MATCHES, sizeof(buf));
857 proc_net_remove(buf); 851 proc_net_remove(buf);
858#endif /*CONFIG_PROC_FS*/ 852#endif /*CONFIG_PROC_FS*/
diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c
index 795c058b16a5..b03ce009d0bf 100644
--- a/net/netfilter/xt_CONNMARK.c
+++ b/net/netfilter/xt_CONNMARK.c
@@ -30,10 +30,7 @@ MODULE_ALIAS("ipt_CONNMARK");
30 30
31#include <linux/netfilter/x_tables.h> 31#include <linux/netfilter/x_tables.h>
32#include <linux/netfilter/xt_CONNMARK.h> 32#include <linux/netfilter/xt_CONNMARK.h>
33#include <net/netfilter/nf_conntrack_compat.h>
34#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
35#include <net/netfilter/nf_conntrack_ecache.h> 33#include <net/netfilter/nf_conntrack_ecache.h>
36#endif
37 34
38static unsigned int 35static unsigned int
39target(struct sk_buff **pskb, 36target(struct sk_buff **pskb,
@@ -44,40 +41,33 @@ target(struct sk_buff **pskb,
44 const void *targinfo) 41 const void *targinfo)
45{ 42{
46 const struct xt_connmark_target_info *markinfo = targinfo; 43 const struct xt_connmark_target_info *markinfo = targinfo;
44 struct nf_conn *ct;
45 enum ip_conntrack_info ctinfo;
47 u_int32_t diff; 46 u_int32_t diff;
48 u_int32_t mark; 47 u_int32_t mark;
49 u_int32_t newmark; 48 u_int32_t newmark;
50 u_int32_t ctinfo;
51 u_int32_t *ctmark = nf_ct_get_mark(*pskb, &ctinfo);
52 49
53 if (ctmark) { 50 ct = nf_ct_get(*pskb, &ctinfo);
51 if (ct) {
54 switch(markinfo->mode) { 52 switch(markinfo->mode) {
55 case XT_CONNMARK_SET: 53 case XT_CONNMARK_SET:
56 newmark = (*ctmark & ~markinfo->mask) | markinfo->mark; 54 newmark = (ct->mark & ~markinfo->mask) | markinfo->mark;
57 if (newmark != *ctmark) { 55 if (newmark != ct->mark) {
58 *ctmark = newmark; 56 ct->mark = newmark;
59#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
60 ip_conntrack_event_cache(IPCT_MARK, *pskb);
61#else
62 nf_conntrack_event_cache(IPCT_MARK, *pskb); 57 nf_conntrack_event_cache(IPCT_MARK, *pskb);
63#endif
64 } 58 }
65 break; 59 break;
66 case XT_CONNMARK_SAVE: 60 case XT_CONNMARK_SAVE:
67 newmark = (*ctmark & ~markinfo->mask) | 61 newmark = (ct->mark & ~markinfo->mask) |
68 ((*pskb)->mark & markinfo->mask); 62 ((*pskb)->mark & markinfo->mask);
69 if (*ctmark != newmark) { 63 if (ct->mark != newmark) {
70 *ctmark = newmark; 64 ct->mark = newmark;
71#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
72 ip_conntrack_event_cache(IPCT_MARK, *pskb);
73#else
74 nf_conntrack_event_cache(IPCT_MARK, *pskb); 65 nf_conntrack_event_cache(IPCT_MARK, *pskb);
75#endif
76 } 66 }
77 break; 67 break;
78 case XT_CONNMARK_RESTORE: 68 case XT_CONNMARK_RESTORE:
79 mark = (*pskb)->mark; 69 mark = (*pskb)->mark;
80 diff = (*ctmark ^ mark) & markinfo->mask; 70 diff = (ct->mark ^ mark) & markinfo->mask;
81 (*pskb)->mark = mark ^ diff; 71 (*pskb)->mark = mark ^ diff;
82 break; 72 break;
83 } 73 }
diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c
index 1ab0db641f96..81c0c58bab47 100644
--- a/net/netfilter/xt_CONNSECMARK.c
+++ b/net/netfilter/xt_CONNSECMARK.c
@@ -19,7 +19,7 @@
19#include <linux/skbuff.h> 19#include <linux/skbuff.h>
20#include <linux/netfilter/x_tables.h> 20#include <linux/netfilter/x_tables.h>
21#include <linux/netfilter/xt_CONNSECMARK.h> 21#include <linux/netfilter/xt_CONNSECMARK.h>
22#include <net/netfilter/nf_conntrack_compat.h> 22#include <net/netfilter/nf_conntrack.h>
23 23
24#define PFX "CONNSECMARK: " 24#define PFX "CONNSECMARK: "
25 25
@@ -36,12 +36,12 @@ MODULE_ALIAS("ip6t_CONNSECMARK");
36static void secmark_save(struct sk_buff *skb) 36static void secmark_save(struct sk_buff *skb)
37{ 37{
38 if (skb->secmark) { 38 if (skb->secmark) {
39 u32 *connsecmark; 39 struct nf_conn *ct;
40 enum ip_conntrack_info ctinfo; 40 enum ip_conntrack_info ctinfo;
41 41
42 connsecmark = nf_ct_get_secmark(skb, &ctinfo); 42 ct = nf_ct_get(skb, &ctinfo);
43 if (connsecmark && !*connsecmark) 43 if (ct && !ct->secmark)
44 *connsecmark = skb->secmark; 44 ct->secmark = skb->secmark;
45 } 45 }
46} 46}
47 47
@@ -52,12 +52,12 @@ static void secmark_save(struct sk_buff *skb)
52static void secmark_restore(struct sk_buff *skb) 52static void secmark_restore(struct sk_buff *skb)
53{ 53{
54 if (!skb->secmark) { 54 if (!skb->secmark) {
55 u32 *connsecmark; 55 struct nf_conn *ct;
56 enum ip_conntrack_info ctinfo; 56 enum ip_conntrack_info ctinfo;
57 57
58 connsecmark = nf_ct_get_secmark(skb, &ctinfo); 58 ct = nf_ct_get(skb, &ctinfo);
59 if (connsecmark && *connsecmark) 59 if (ct && ct->secmark)
60 skb->secmark = *connsecmark; 60 skb->secmark = ct->secmark;
61 } 61 }
62} 62}
63 63
diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c
index a7cc75aeb38d..9f2f2201f6ae 100644
--- a/net/netfilter/xt_DSCP.c
+++ b/net/netfilter/xt_DSCP.c
@@ -8,8 +8,6 @@
8 * published by the Free Software Foundation. 8 * published by the Free Software Foundation.
9 * 9 *
10 * See RFC2474 for a description of the DSCP field within the IP Header. 10 * See RFC2474 for a description of the DSCP field within the IP Header.
11 *
12 * xt_DSCP.c,v 1.8 2002/08/06 18:41:57 laforge Exp
13*/ 11*/
14 12
15#include <linux/module.h> 13#include <linux/module.h>
@@ -35,13 +33,13 @@ static unsigned int target(struct sk_buff **pskb,
35 const void *targinfo) 33 const void *targinfo)
36{ 34{
37 const struct xt_DSCP_info *dinfo = targinfo; 35 const struct xt_DSCP_info *dinfo = targinfo;
38 u_int8_t dscp = ipv4_get_dsfield((*pskb)->nh.iph) >> XT_DSCP_SHIFT; 36 u_int8_t dscp = ipv4_get_dsfield(ip_hdr(*pskb)) >> XT_DSCP_SHIFT;
39 37
40 if (dscp != dinfo->dscp) { 38 if (dscp != dinfo->dscp) {
41 if (!skb_make_writable(pskb, sizeof(struct iphdr))) 39 if (!skb_make_writable(pskb, sizeof(struct iphdr)))
42 return NF_DROP; 40 return NF_DROP;
43 41
44 ipv4_change_dsfield((*pskb)->nh.iph, (__u8)(~XT_DSCP_MASK), 42 ipv4_change_dsfield(ip_hdr(*pskb), (__u8)(~XT_DSCP_MASK),
45 dinfo->dscp << XT_DSCP_SHIFT); 43 dinfo->dscp << XT_DSCP_SHIFT);
46 44
47 } 45 }
@@ -56,13 +54,13 @@ static unsigned int target6(struct sk_buff **pskb,
56 const void *targinfo) 54 const void *targinfo)
57{ 55{
58 const struct xt_DSCP_info *dinfo = targinfo; 56 const struct xt_DSCP_info *dinfo = targinfo;
59 u_int8_t dscp = ipv6_get_dsfield((*pskb)->nh.ipv6h) >> XT_DSCP_SHIFT; 57 u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(*pskb)) >> XT_DSCP_SHIFT;
60 58
61 if (dscp != dinfo->dscp) { 59 if (dscp != dinfo->dscp) {
62 if (!skb_make_writable(pskb, sizeof(struct ipv6hdr))) 60 if (!skb_make_writable(pskb, sizeof(struct ipv6hdr)))
63 return NF_DROP; 61 return NF_DROP;
64 62
65 ipv6_change_dsfield((*pskb)->nh.ipv6h, (__u8)(~XT_DSCP_MASK), 63 ipv6_change_dsfield(ipv6_hdr(*pskb), (__u8)(~XT_DSCP_MASK),
66 dinfo->dscp << XT_DSCP_SHIFT); 64 dinfo->dscp << XT_DSCP_SHIFT);
67 } 65 }
68 return XT_CONTINUE; 66 return XT_CONTINUE;
diff --git a/net/netfilter/xt_NOTRACK.c b/net/netfilter/xt_NOTRACK.c
index b874a2008b2b..5085fb3d1e2d 100644
--- a/net/netfilter/xt_NOTRACK.c
+++ b/net/netfilter/xt_NOTRACK.c
@@ -5,7 +5,7 @@
5#include <linux/skbuff.h> 5#include <linux/skbuff.h>
6 6
7#include <linux/netfilter/x_tables.h> 7#include <linux/netfilter/x_tables.h>
8#include <net/netfilter/nf_conntrack_compat.h> 8#include <net/netfilter/nf_conntrack.h>
9 9
10MODULE_LICENSE("GPL"); 10MODULE_LICENSE("GPL");
11MODULE_ALIAS("ipt_NOTRACK"); 11MODULE_ALIAS("ipt_NOTRACK");
@@ -26,7 +26,7 @@ target(struct sk_buff **pskb,
26 If there is a real ct entry correspondig to this packet, 26 If there is a real ct entry correspondig to this packet,
27 it'll hang aroun till timing out. We don't deal with it 27 it'll hang aroun till timing out. We don't deal with it
28 for performance reasons. JK */ 28 for performance reasons. JK */
29 nf_ct_untrack(*pskb); 29 (*pskb)->nfct = &nf_conntrack_untracked.ct_general;
30 (*pskb)->nfctinfo = IP_CT_NEW; 30 (*pskb)->nfctinfo = IP_CT_NEW;
31 nf_conntrack_get((*pskb)->nfct); 31 nf_conntrack_get((*pskb)->nfct);
32 32
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index db7e38c08de2..15fe8f649510 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -54,7 +54,7 @@ tcpmss_mangle_packet(struct sk_buff **pskb,
54 return -1; 54 return -1;
55 55
56 tcplen = (*pskb)->len - tcphoff; 56 tcplen = (*pskb)->len - tcphoff;
57 tcph = (struct tcphdr *)((*pskb)->nh.raw + tcphoff); 57 tcph = (struct tcphdr *)(skb_network_header(*pskb) + tcphoff);
58 58
59 /* Since it passed flags test in tcp match, we know it is is 59 /* Since it passed flags test in tcp match, we know it is is
60 not a fragment, and has data >= tcp header length. SYN 60 not a fragment, and has data >= tcp header length. SYN
@@ -113,7 +113,7 @@ tcpmss_mangle_packet(struct sk_buff **pskb,
113 return -1; 113 return -1;
114 kfree_skb(*pskb); 114 kfree_skb(*pskb);
115 *pskb = newskb; 115 *pskb = newskb;
116 tcph = (struct tcphdr *)((*pskb)->nh.raw + tcphoff); 116 tcph = (struct tcphdr *)(skb_network_header(*pskb) + tcphoff);
117 } 117 }
118 118
119 skb_put((*pskb), TCPOLEN_MSS); 119 skb_put((*pskb), TCPOLEN_MSS);
@@ -145,7 +145,7 @@ xt_tcpmss_target4(struct sk_buff **pskb,
145 const struct xt_target *target, 145 const struct xt_target *target,
146 const void *targinfo) 146 const void *targinfo)
147{ 147{
148 struct iphdr *iph = (*pskb)->nh.iph; 148 struct iphdr *iph = ip_hdr(*pskb);
149 __be16 newlen; 149 __be16 newlen;
150 int ret; 150 int ret;
151 151
@@ -154,7 +154,7 @@ xt_tcpmss_target4(struct sk_buff **pskb,
154 if (ret < 0) 154 if (ret < 0)
155 return NF_DROP; 155 return NF_DROP;
156 if (ret > 0) { 156 if (ret > 0) {
157 iph = (*pskb)->nh.iph; 157 iph = ip_hdr(*pskb);
158 newlen = htons(ntohs(iph->tot_len) + ret); 158 newlen = htons(ntohs(iph->tot_len) + ret);
159 nf_csum_replace2(&iph->check, iph->tot_len, newlen); 159 nf_csum_replace2(&iph->check, iph->tot_len, newlen);
160 iph->tot_len = newlen; 160 iph->tot_len = newlen;
@@ -171,7 +171,7 @@ xt_tcpmss_target6(struct sk_buff **pskb,
171 const struct xt_target *target, 171 const struct xt_target *target,
172 const void *targinfo) 172 const void *targinfo)
173{ 173{
174 struct ipv6hdr *ipv6h = (*pskb)->nh.ipv6h; 174 struct ipv6hdr *ipv6h = ipv6_hdr(*pskb);
175 u8 nexthdr; 175 u8 nexthdr;
176 int tcphoff; 176 int tcphoff;
177 int ret; 177 int ret;
@@ -187,7 +187,7 @@ xt_tcpmss_target6(struct sk_buff **pskb,
187 if (ret < 0) 187 if (ret < 0)
188 return NF_DROP; 188 return NF_DROP;
189 if (ret > 0) { 189 if (ret > 0) {
190 ipv6h = (*pskb)->nh.ipv6h; 190 ipv6h = ipv6_hdr(*pskb);
191 ipv6h->payload_len = htons(ntohs(ipv6h->payload_len) + ret); 191 ipv6h->payload_len = htons(ntohs(ipv6h->payload_len) + ret);
192 } 192 }
193 return XT_CONTINUE; 193 return XT_CONTINUE;
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index 5e32dfa2668b..804afe55e141 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -1,20 +1,11 @@
1/* Kernel module to match connection tracking byte counter. 1/* Kernel module to match connection tracking byte counter.
2 * GPL (C) 2002 Martin Devera (devik@cdi.cz). 2 * GPL (C) 2002 Martin Devera (devik@cdi.cz).
3 *
4 * 2004-07-20 Harald Welte <laforge@netfilter.org>
5 * - reimplemented to use per-connection accounting counters
6 * - add functionality to match number of packets
7 * - add functionality to match average packet size
8 * - add support to match directions seperately
9 * 2005-10-16 Harald Welte <laforge@netfilter.org>
10 * - Port to x_tables
11 *
12 */ 3 */
13#include <linux/module.h> 4#include <linux/module.h>
14#include <linux/skbuff.h> 5#include <linux/skbuff.h>
15#include <net/netfilter/nf_conntrack_compat.h>
16#include <linux/netfilter/x_tables.h> 6#include <linux/netfilter/x_tables.h>
17#include <linux/netfilter/xt_connbytes.h> 7#include <linux/netfilter/xt_connbytes.h>
8#include <net/netfilter/nf_conntrack.h>
18 9
19#include <asm/div64.h> 10#include <asm/div64.h>
20#include <asm/bitops.h> 11#include <asm/bitops.h>
@@ -24,22 +15,6 @@ MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
24MODULE_DESCRIPTION("iptables match for matching number of pkts/bytes per connection"); 15MODULE_DESCRIPTION("iptables match for matching number of pkts/bytes per connection");
25MODULE_ALIAS("ipt_connbytes"); 16MODULE_ALIAS("ipt_connbytes");
26 17
27/* 64bit divisor, dividend and result. dynamic precision */
28static u_int64_t div64_64(u_int64_t dividend, u_int64_t divisor)
29{
30 u_int32_t d = divisor;
31
32 if (divisor > 0xffffffffULL) {
33 unsigned int shift = fls(divisor >> 32);
34
35 d = divisor >> shift;
36 dividend >>= shift;
37 }
38
39 do_div(dividend, d);
40 return dividend;
41}
42
43static int 18static int
44match(const struct sk_buff *skb, 19match(const struct sk_buff *skb,
45 const struct net_device *in, 20 const struct net_device *in,
@@ -51,13 +26,17 @@ match(const struct sk_buff *skb,
51 int *hotdrop) 26 int *hotdrop)
52{ 27{
53 const struct xt_connbytes_info *sinfo = matchinfo; 28 const struct xt_connbytes_info *sinfo = matchinfo;
29 struct nf_conn *ct;
30 enum ip_conntrack_info ctinfo;
54 u_int64_t what = 0; /* initialize to make gcc happy */ 31 u_int64_t what = 0; /* initialize to make gcc happy */
55 u_int64_t bytes = 0; 32 u_int64_t bytes = 0;
56 u_int64_t pkts = 0; 33 u_int64_t pkts = 0;
57 const struct ip_conntrack_counter *counters; 34 const struct ip_conntrack_counter *counters;
58 35
59 if (!(counters = nf_ct_get_counters(skb))) 36 ct = nf_ct_get(skb, &ctinfo);
60 return 0; /* no match */ 37 if (!ct)
38 return 0;
39 counters = ct->counters;
61 40
62 switch (sinfo->what) { 41 switch (sinfo->what) {
63 case XT_CONNBYTES_PKTS: 42 case XT_CONNBYTES_PKTS:
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index 36c2defff238..e1803256c792 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -21,16 +21,15 @@
21 21
22#include <linux/module.h> 22#include <linux/module.h>
23#include <linux/skbuff.h> 23#include <linux/skbuff.h>
24#include <net/netfilter/nf_conntrack.h>
25#include <linux/netfilter/x_tables.h>
26#include <linux/netfilter/xt_connmark.h>
24 27
25MODULE_AUTHOR("Henrik Nordstrom <hno@marasytems.com>"); 28MODULE_AUTHOR("Henrik Nordstrom <hno@marasytems.com>");
26MODULE_DESCRIPTION("IP tables connmark match module"); 29MODULE_DESCRIPTION("IP tables connmark match module");
27MODULE_LICENSE("GPL"); 30MODULE_LICENSE("GPL");
28MODULE_ALIAS("ipt_connmark"); 31MODULE_ALIAS("ipt_connmark");
29 32
30#include <linux/netfilter/x_tables.h>
31#include <linux/netfilter/xt_connmark.h>
32#include <net/netfilter/nf_conntrack_compat.h>
33
34static int 33static int
35match(const struct sk_buff *skb, 34match(const struct sk_buff *skb,
36 const struct net_device *in, 35 const struct net_device *in,
@@ -42,12 +41,14 @@ match(const struct sk_buff *skb,
42 int *hotdrop) 41 int *hotdrop)
43{ 42{
44 const struct xt_connmark_info *info = matchinfo; 43 const struct xt_connmark_info *info = matchinfo;
45 u_int32_t ctinfo; 44 struct nf_conn *ct;
46 const u_int32_t *ctmark = nf_ct_get_mark(skb, &ctinfo); 45 enum ip_conntrack_info ctinfo;
47 if (!ctmark) 46
47 ct = nf_ct_get(skb, &ctinfo);
48 if (!ct)
48 return 0; 49 return 0;
49 50
50 return (((*ctmark) & info->mask) == info->mark) ^ info->invert; 51 return (((ct->mark) & info->mask) == info->mark) ^ info->invert;
51} 52}
52 53
53static int 54static int
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index 2885c378288e..f4ea8fe07a53 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -10,121 +10,15 @@
10 10
11#include <linux/module.h> 11#include <linux/module.h>
12#include <linux/skbuff.h> 12#include <linux/skbuff.h>
13
14#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
15#include <linux/netfilter_ipv4/ip_conntrack.h>
16#include <linux/netfilter_ipv4/ip_conntrack_tuple.h>
17#else
18#include <net/netfilter/nf_conntrack.h>
19#endif
20
21#include <linux/netfilter/x_tables.h> 13#include <linux/netfilter/x_tables.h>
22#include <linux/netfilter/xt_conntrack.h> 14#include <linux/netfilter/xt_conntrack.h>
23#include <net/netfilter/nf_conntrack_compat.h> 15#include <net/netfilter/nf_conntrack.h>
24 16
25MODULE_LICENSE("GPL"); 17MODULE_LICENSE("GPL");
26MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>"); 18MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
27MODULE_DESCRIPTION("iptables connection tracking match module"); 19MODULE_DESCRIPTION("iptables connection tracking match module");
28MODULE_ALIAS("ipt_conntrack"); 20MODULE_ALIAS("ipt_conntrack");
29 21
30#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
31
32static int
33match(const struct sk_buff *skb,
34 const struct net_device *in,
35 const struct net_device *out,
36 const struct xt_match *match,
37 const void *matchinfo,
38 int offset,
39 unsigned int protoff,
40 int *hotdrop)
41{
42 const struct xt_conntrack_info *sinfo = matchinfo;
43 struct ip_conntrack *ct;
44 enum ip_conntrack_info ctinfo;
45 unsigned int statebit;
46
47 ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo);
48
49#define FWINV(bool, invflg) ((bool) ^ !!(sinfo->invflags & invflg))
50
51 if (ct == &ip_conntrack_untracked)
52 statebit = XT_CONNTRACK_STATE_UNTRACKED;
53 else if (ct)
54 statebit = XT_CONNTRACK_STATE_BIT(ctinfo);
55 else
56 statebit = XT_CONNTRACK_STATE_INVALID;
57
58 if (sinfo->flags & XT_CONNTRACK_STATE) {
59 if (ct) {
60 if (test_bit(IPS_SRC_NAT_BIT, &ct->status))
61 statebit |= XT_CONNTRACK_STATE_SNAT;
62 if (test_bit(IPS_DST_NAT_BIT, &ct->status))
63 statebit |= XT_CONNTRACK_STATE_DNAT;
64 }
65 if (FWINV((statebit & sinfo->statemask) == 0,
66 XT_CONNTRACK_STATE))
67 return 0;
68 }
69
70 if (ct == NULL) {
71 if (sinfo->flags & ~XT_CONNTRACK_STATE)
72 return 0;
73 return 1;
74 }
75
76 if (sinfo->flags & XT_CONNTRACK_PROTO &&
77 FWINV(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum !=
78 sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum,
79 XT_CONNTRACK_PROTO))
80 return 0;
81
82 if (sinfo->flags & XT_CONNTRACK_ORIGSRC &&
83 FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip &
84 sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) !=
85 sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip,
86 XT_CONNTRACK_ORIGSRC))
87 return 0;
88
89 if (sinfo->flags & XT_CONNTRACK_ORIGDST &&
90 FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip &
91 sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) !=
92 sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip,
93 XT_CONNTRACK_ORIGDST))
94 return 0;
95
96 if (sinfo->flags & XT_CONNTRACK_REPLSRC &&
97 FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip &
98 sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) !=
99 sinfo->tuple[IP_CT_DIR_REPLY].src.ip,
100 XT_CONNTRACK_REPLSRC))
101 return 0;
102
103 if (sinfo->flags & XT_CONNTRACK_REPLDST &&
104 FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip &
105 sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) !=
106 sinfo->tuple[IP_CT_DIR_REPLY].dst.ip,
107 XT_CONNTRACK_REPLDST))
108 return 0;
109
110 if (sinfo->flags & XT_CONNTRACK_STATUS &&
111 FWINV((ct->status & sinfo->statusmask) == 0,
112 XT_CONNTRACK_STATUS))
113 return 0;
114
115 if (sinfo->flags & XT_CONNTRACK_EXPIRES) {
116 unsigned long expires = timer_pending(&ct->timeout) ?
117 (ct->timeout.expires - jiffies)/HZ : 0;
118
119 if (FWINV(!(expires >= sinfo->expires_min &&
120 expires <= sinfo->expires_max),
121 XT_CONNTRACK_EXPIRES))
122 return 0;
123 }
124 return 1;
125}
126
127#else /* CONFIG_IP_NF_CONNTRACK */
128static int 22static int
129match(const struct sk_buff *skb, 23match(const struct sk_buff *skb,
130 const struct net_device *in, 24 const struct net_device *in,
@@ -220,8 +114,6 @@ match(const struct sk_buff *skb,
220 return 1; 114 return 1;
221} 115}
222 116
223#endif /* CONFIG_NF_IP_CONNTRACK */
224
225static int 117static int
226checkentry(const char *tablename, 118checkentry(const char *tablename,
227 const void *ip, 119 const void *ip,
diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c
index 26c7f4ad102a..56b247ecc283 100644
--- a/net/netfilter/xt_dscp.c
+++ b/net/netfilter/xt_dscp.c
@@ -1,7 +1,5 @@
1/* IP tables module for matching the value of the IPv4/IPv6 DSCP field 1/* IP tables module for matching the value of the IPv4/IPv6 DSCP field
2 * 2 *
3 * xt_dscp.c,v 1.3 2002/08/05 19:00:21 laforge Exp
4 *
5 * (C) 2002 by Harald Welte <laforge@netfilter.org> 3 * (C) 2002 by Harald Welte <laforge@netfilter.org>
6 * 4 *
7 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
@@ -34,7 +32,7 @@ static int match(const struct sk_buff *skb,
34 int *hotdrop) 32 int *hotdrop)
35{ 33{
36 const struct xt_dscp_info *info = matchinfo; 34 const struct xt_dscp_info *info = matchinfo;
37 u_int8_t dscp = ipv4_get_dsfield(skb->nh.iph) >> XT_DSCP_SHIFT; 35 u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT;
38 36
39 return (dscp == info->dscp) ^ !!info->invert; 37 return (dscp == info->dscp) ^ !!info->invert;
40} 38}
@@ -49,7 +47,7 @@ static int match6(const struct sk_buff *skb,
49 int *hotdrop) 47 int *hotdrop)
50{ 48{
51 const struct xt_dscp_info *info = matchinfo; 49 const struct xt_dscp_info *info = matchinfo;
52 u_int8_t dscp = ipv6_get_dsfield(skb->nh.ipv6h) >> XT_DSCP_SHIFT; 50 u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT;
53 51
54 return (dscp == info->dscp) ^ !!info->invert; 52 return (dscp == info->dscp) ^ !!info->invert;
55} 53}
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 9f37d593ca38..d3043fa32ebc 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -216,10 +216,8 @@ static int htable_create(struct xt_hashlimit_info *minfo, int family)
216 hinfo->pde->proc_fops = &dl_file_ops; 216 hinfo->pde->proc_fops = &dl_file_ops;
217 hinfo->pde->data = hinfo; 217 hinfo->pde->data = hinfo;
218 218
219 init_timer(&hinfo->timer); 219 setup_timer(&hinfo->timer, htable_gc, (unsigned long )hinfo);
220 hinfo->timer.expires = jiffies + msecs_to_jiffies(hinfo->cfg.gc_interval); 220 hinfo->timer.expires = jiffies + msecs_to_jiffies(hinfo->cfg.gc_interval);
221 hinfo->timer.data = (unsigned long )hinfo;
222 hinfo->timer.function = htable_gc;
223 add_timer(&hinfo->timer); 221 add_timer(&hinfo->timer);
224 222
225 spin_lock_bh(&hashlimit_lock); 223 spin_lock_bh(&hashlimit_lock);
@@ -380,22 +378,22 @@ hashlimit_init_dst(struct xt_hashlimit_htable *hinfo, struct dsthash_dst *dst,
380 switch (hinfo->family) { 378 switch (hinfo->family) {
381 case AF_INET: 379 case AF_INET:
382 if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_DIP) 380 if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_DIP)
383 dst->addr.ip.dst = skb->nh.iph->daddr; 381 dst->addr.ip.dst = ip_hdr(skb)->daddr;
384 if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_SIP) 382 if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_SIP)
385 dst->addr.ip.src = skb->nh.iph->saddr; 383 dst->addr.ip.src = ip_hdr(skb)->saddr;
386 384
387 if (!(hinfo->cfg.mode & 385 if (!(hinfo->cfg.mode &
388 (XT_HASHLIMIT_HASH_DPT | XT_HASHLIMIT_HASH_SPT))) 386 (XT_HASHLIMIT_HASH_DPT | XT_HASHLIMIT_HASH_SPT)))
389 return 0; 387 return 0;
390 nexthdr = skb->nh.iph->protocol; 388 nexthdr = ip_hdr(skb)->protocol;
391 break; 389 break;
392#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) 390#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
393 case AF_INET6: 391 case AF_INET6:
394 if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_DIP) 392 if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_DIP)
395 memcpy(&dst->addr.ip6.dst, &skb->nh.ipv6h->daddr, 393 memcpy(&dst->addr.ip6.dst, &ipv6_hdr(skb)->daddr,
396 sizeof(dst->addr.ip6.dst)); 394 sizeof(dst->addr.ip6.dst));
397 if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_SIP) 395 if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_SIP)
398 memcpy(&dst->addr.ip6.src, &skb->nh.ipv6h->saddr, 396 memcpy(&dst->addr.ip6.src, &ipv6_hdr(skb)->saddr,
399 sizeof(dst->addr.ip6.src)); 397 sizeof(dst->addr.ip6.src));
400 398
401 if (!(hinfo->cfg.mode & 399 if (!(hinfo->cfg.mode &
diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c
index 407d1d5da8a1..c139b2f43a10 100644
--- a/net/netfilter/xt_helper.c
+++ b/net/netfilter/xt_helper.c
@@ -5,26 +5,16 @@
5 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as 6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 *
9 * 19 Mar 2002 Harald Welte <laforge@gnumonks.org>:
10 * - Port to newnat infrastructure
11 */ 8 */
12 9
13#include <linux/module.h> 10#include <linux/module.h>
14#include <linux/skbuff.h> 11#include <linux/skbuff.h>
15#include <linux/netfilter.h> 12#include <linux/netfilter.h>
16#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
17#include <linux/netfilter_ipv4/ip_conntrack.h>
18#include <linux/netfilter_ipv4/ip_conntrack_core.h>
19#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
20#else
21#include <net/netfilter/nf_conntrack.h> 13#include <net/netfilter/nf_conntrack.h>
22#include <net/netfilter/nf_conntrack_core.h> 14#include <net/netfilter/nf_conntrack_core.h>
23#include <net/netfilter/nf_conntrack_helper.h> 15#include <net/netfilter/nf_conntrack_helper.h>
24#endif
25#include <linux/netfilter/x_tables.h> 16#include <linux/netfilter/x_tables.h>
26#include <linux/netfilter/xt_helper.h> 17#include <linux/netfilter/xt_helper.h>
27#include <net/netfilter/nf_conntrack_compat.h>
28 18
29MODULE_LICENSE("GPL"); 19MODULE_LICENSE("GPL");
30MODULE_AUTHOR("Martin Josefsson <gandalf@netfilter.org>"); 20MODULE_AUTHOR("Martin Josefsson <gandalf@netfilter.org>");
@@ -38,55 +28,6 @@ MODULE_ALIAS("ip6t_helper");
38#define DEBUGP(format, args...) 28#define DEBUGP(format, args...)
39#endif 29#endif
40 30
41#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
42static int
43match(const struct sk_buff *skb,
44 const struct net_device *in,
45 const struct net_device *out,
46 const struct xt_match *match,
47 const void *matchinfo,
48 int offset,
49 unsigned int protoff,
50 int *hotdrop)
51{
52 const struct xt_helper_info *info = matchinfo;
53 struct ip_conntrack *ct;
54 enum ip_conntrack_info ctinfo;
55 int ret = info->invert;
56
57 ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo);
58 if (!ct) {
59 DEBUGP("xt_helper: Eek! invalid conntrack?\n");
60 return ret;
61 }
62
63 if (!ct->master) {
64 DEBUGP("xt_helper: conntrack %p has no master\n", ct);
65 return ret;
66 }
67
68 read_lock_bh(&ip_conntrack_lock);
69 if (!ct->master->helper) {
70 DEBUGP("xt_helper: master ct %p has no helper\n",
71 exp->expectant);
72 goto out_unlock;
73 }
74
75 DEBUGP("master's name = %s , info->name = %s\n",
76 ct->master->helper->name, info->name);
77
78 if (info->name[0] == '\0')
79 ret ^= 1;
80 else
81 ret ^= !strncmp(ct->master->helper->name, info->name,
82 strlen(ct->master->helper->name));
83out_unlock:
84 read_unlock_bh(&ip_conntrack_lock);
85 return ret;
86}
87
88#else /* CONFIG_IP_NF_CONNTRACK */
89
90static int 31static int
91match(const struct sk_buff *skb, 32match(const struct sk_buff *skb,
92 const struct net_device *in, 33 const struct net_device *in,
@@ -134,7 +75,6 @@ out_unlock:
134 read_unlock_bh(&nf_conntrack_lock); 75 read_unlock_bh(&nf_conntrack_lock);
135 return ret; 76 return ret;
136} 77}
137#endif
138 78
139static int check(const char *tablename, 79static int check(const char *tablename,
140 const void *inf, 80 const void *inf,
diff --git a/net/netfilter/xt_length.c b/net/netfilter/xt_length.c
index 32fb998d9bac..77288c5ada78 100644
--- a/net/netfilter/xt_length.c
+++ b/net/netfilter/xt_length.c
@@ -31,7 +31,7 @@ match(const struct sk_buff *skb,
31 int *hotdrop) 31 int *hotdrop)
32{ 32{
33 const struct xt_length_info *info = matchinfo; 33 const struct xt_length_info *info = matchinfo;
34 u_int16_t pktlen = ntohs(skb->nh.iph->tot_len); 34 u_int16_t pktlen = ntohs(ip_hdr(skb)->tot_len);
35 35
36 return (pktlen >= info->min && pktlen <= info->max) ^ info->invert; 36 return (pktlen >= info->min && pktlen <= info->max) ^ info->invert;
37} 37}
@@ -47,7 +47,8 @@ match6(const struct sk_buff *skb,
47 int *hotdrop) 47 int *hotdrop)
48{ 48{
49 const struct xt_length_info *info = matchinfo; 49 const struct xt_length_info *info = matchinfo;
50 u_int16_t pktlen = ntohs(skb->nh.ipv6h->payload_len) + sizeof(struct ipv6hdr); 50 const u_int16_t pktlen = (ntohs(ipv6_hdr(skb)->payload_len) +
51 sizeof(struct ipv6hdr));
51 52
52 return (pktlen >= info->min && pktlen <= info->max) ^ info->invert; 53 return (pktlen >= info->min && pktlen <= info->max) ^ info->invert;
53} 54}
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index 6fd8347c0058..571a72ab89ad 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -1,10 +1,3 @@
1/* Kernel module to control the rate
2 *
3 * 2 September 1999: Changed from the target RATE to the match
4 * `limit', removed logging. Did I mention that
5 * Alexey is a fucking genius?
6 * Rusty Russell (rusty@rustcorp.com.au). */
7
8/* (C) 1999 Jérôme de Vivie <devivie@info.enserb.u-bordeaux.fr> 1/* (C) 1999 Jérôme de Vivie <devivie@info.enserb.u-bordeaux.fr>
9 * (C) 1999 Hervé Eychenne <eychenne@info.enserb.u-bordeaux.fr> 2 * (C) 1999 Hervé Eychenne <eychenne@info.enserb.u-bordeaux.fr>
10 * 3 *
diff --git a/net/netfilter/xt_mac.c b/net/netfilter/xt_mac.c
index d430d90d7b26..1d3a1d98b885 100644
--- a/net/netfilter/xt_mac.c
+++ b/net/netfilter/xt_mac.c
@@ -37,8 +37,8 @@ match(const struct sk_buff *skb,
37 const struct xt_mac_info *info = matchinfo; 37 const struct xt_mac_info *info = matchinfo;
38 38
39 /* Is mac pointer valid? */ 39 /* Is mac pointer valid? */
40 return (skb->mac.raw >= skb->head 40 return (skb_mac_header(skb) >= skb->head &&
41 && (skb->mac.raw + ETH_HLEN) <= skb->data 41 (skb_mac_header(skb) + ETH_HLEN) <= skb->data
42 /* If so, compare... */ 42 /* If so, compare... */
43 && ((!compare_ether_addr(eth_hdr(skb)->h_source, info->srcaddr)) 43 && ((!compare_ether_addr(eth_hdr(skb)->h_source, info->srcaddr))
44 ^ info->invert)); 44 ^ info->invert));
diff --git a/net/netfilter/xt_pkttype.c b/net/netfilter/xt_pkttype.c
index 16e7b0804287..e1409fc5c288 100644
--- a/net/netfilter/xt_pkttype.c
+++ b/net/netfilter/xt_pkttype.c
@@ -34,7 +34,7 @@ static int match(const struct sk_buff *skb,
34 const struct xt_pkttype_info *info = matchinfo; 34 const struct xt_pkttype_info *info = matchinfo;
35 35
36 if (skb->pkt_type == PACKET_LOOPBACK) 36 if (skb->pkt_type == PACKET_LOOPBACK)
37 type = (MULTICAST(skb->nh.iph->daddr) 37 type = (MULTICAST(ip_hdr(skb)->daddr)
38 ? PACKET_MULTICAST 38 ? PACKET_MULTICAST
39 : PACKET_BROADCAST); 39 : PACKET_BROADCAST);
40 else 40 else
diff --git a/net/netfilter/xt_realm.c b/net/netfilter/xt_realm.c
index 97ffc2fbc19d..c2017f8af9c4 100644
--- a/net/netfilter/xt_realm.c
+++ b/net/netfilter/xt_realm.c
@@ -1,7 +1,5 @@
1/* IP tables module for matching the routing realm 1/* IP tables module for matching the routing realm
2 * 2 *
3 * $Id: ipt_realm.c,v 1.3 2004/03/05 13:25:40 laforge Exp $
4 *
5 * (C) 2003 by Sampsa Ranta <sampsa@netsonic.fi> 3 * (C) 2003 by Sampsa Ranta <sampsa@netsonic.fi>
6 * 4 *
7 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c
index df37b912163a..149294f7df71 100644
--- a/net/netfilter/xt_state.c
+++ b/net/netfilter/xt_state.c
@@ -10,7 +10,7 @@
10 10
11#include <linux/module.h> 11#include <linux/module.h>
12#include <linux/skbuff.h> 12#include <linux/skbuff.h>
13#include <net/netfilter/nf_conntrack_compat.h> 13#include <net/netfilter/nf_conntrack.h>
14#include <linux/netfilter/x_tables.h> 14#include <linux/netfilter/x_tables.h>
15#include <linux/netfilter/xt_state.h> 15#include <linux/netfilter/xt_state.h>
16 16
@@ -36,7 +36,7 @@ match(const struct sk_buff *skb,
36 36
37 if (nf_ct_is_untracked(skb)) 37 if (nf_ct_is_untracked(skb))
38 statebit = XT_STATE_UNTRACKED; 38 statebit = XT_STATE_UNTRACKED;
39 else if (!nf_ct_get_ctinfo(skb, &ctinfo)) 39 else if (!nf_ct_get(skb, &ctinfo))
40 statebit = XT_STATE_INVALID; 40 statebit = XT_STATE_INVALID;
41 else 41 else
42 statebit = XT_STATE_BIT(ctinfo); 42 statebit = XT_STATE_BIT(ctinfo);
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index c48b0f49f003..42d2fb94eff1 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -56,6 +56,7 @@
56#include <linux/types.h> 56#include <linux/types.h>
57#include <linux/audit.h> 57#include <linux/audit.h>
58#include <linux/selinux.h> 58#include <linux/selinux.h>
59#include <linux/mutex.h>
59 60
60#include <net/sock.h> 61#include <net/sock.h>
61#include <net/scm.h> 62#include <net/scm.h>
@@ -76,7 +77,8 @@ struct netlink_sock {
76 unsigned long state; 77 unsigned long state;
77 wait_queue_head_t wait; 78 wait_queue_head_t wait;
78 struct netlink_callback *cb; 79 struct netlink_callback *cb;
79 spinlock_t cb_lock; 80 struct mutex *cb_mutex;
81 struct mutex cb_def_mutex;
80 void (*data_ready)(struct sock *sk, int bytes); 82 void (*data_ready)(struct sock *sk, int bytes);
81 struct module *module; 83 struct module *module;
82}; 84};
@@ -108,6 +110,7 @@ struct netlink_table {
108 unsigned long *listeners; 110 unsigned long *listeners;
109 unsigned int nl_nonroot; 111 unsigned int nl_nonroot;
110 unsigned int groups; 112 unsigned int groups;
113 struct mutex *cb_mutex;
111 struct module *module; 114 struct module *module;
112 int registered; 115 int registered;
113}; 116};
@@ -118,6 +121,7 @@ static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait);
118 121
119static int netlink_dump(struct sock *sk); 122static int netlink_dump(struct sock *sk);
120static void netlink_destroy_callback(struct netlink_callback *cb); 123static void netlink_destroy_callback(struct netlink_callback *cb);
124static void netlink_queue_skip(struct nlmsghdr *nlh, struct sk_buff *skb);
121 125
122static DEFINE_RWLOCK(nl_table_lock); 126static DEFINE_RWLOCK(nl_table_lock);
123static atomic_t nl_table_users = ATOMIC_INIT(0); 127static atomic_t nl_table_users = ATOMIC_INIT(0);
@@ -370,7 +374,8 @@ static struct proto netlink_proto = {
370 .obj_size = sizeof(struct netlink_sock), 374 .obj_size = sizeof(struct netlink_sock),
371}; 375};
372 376
373static int __netlink_create(struct socket *sock, int protocol) 377static int __netlink_create(struct socket *sock, struct mutex *cb_mutex,
378 int protocol)
374{ 379{
375 struct sock *sk; 380 struct sock *sk;
376 struct netlink_sock *nlk; 381 struct netlink_sock *nlk;
@@ -384,7 +389,12 @@ static int __netlink_create(struct socket *sock, int protocol)
384 sock_init_data(sock, sk); 389 sock_init_data(sock, sk);
385 390
386 nlk = nlk_sk(sk); 391 nlk = nlk_sk(sk);
387 spin_lock_init(&nlk->cb_lock); 392 if (cb_mutex)
393 nlk->cb_mutex = cb_mutex;
394 else {
395 nlk->cb_mutex = &nlk->cb_def_mutex;
396 mutex_init(nlk->cb_mutex);
397 }
388 init_waitqueue_head(&nlk->wait); 398 init_waitqueue_head(&nlk->wait);
389 399
390 sk->sk_destruct = netlink_sock_destruct; 400 sk->sk_destruct = netlink_sock_destruct;
@@ -395,8 +405,8 @@ static int __netlink_create(struct socket *sock, int protocol)
395static int netlink_create(struct socket *sock, int protocol) 405static int netlink_create(struct socket *sock, int protocol)
396{ 406{
397 struct module *module = NULL; 407 struct module *module = NULL;
408 struct mutex *cb_mutex;
398 struct netlink_sock *nlk; 409 struct netlink_sock *nlk;
399 unsigned int groups;
400 int err = 0; 410 int err = 0;
401 411
402 sock->state = SS_UNCONNECTED; 412 sock->state = SS_UNCONNECTED;
@@ -418,10 +428,10 @@ static int netlink_create(struct socket *sock, int protocol)
418 if (nl_table[protocol].registered && 428 if (nl_table[protocol].registered &&
419 try_module_get(nl_table[protocol].module)) 429 try_module_get(nl_table[protocol].module))
420 module = nl_table[protocol].module; 430 module = nl_table[protocol].module;
421 groups = nl_table[protocol].groups; 431 cb_mutex = nl_table[protocol].cb_mutex;
422 netlink_unlock_table(); 432 netlink_unlock_table();
423 433
424 if ((err = __netlink_create(sock, protocol)) < 0) 434 if ((err = __netlink_create(sock, cb_mutex, protocol)) < 0)
425 goto out_module; 435 goto out_module;
426 436
427 nlk = nlk_sk(sock->sk); 437 nlk = nlk_sk(sock->sk);
@@ -446,14 +456,14 @@ static int netlink_release(struct socket *sock)
446 sock_orphan(sk); 456 sock_orphan(sk);
447 nlk = nlk_sk(sk); 457 nlk = nlk_sk(sk);
448 458
449 spin_lock(&nlk->cb_lock); 459 mutex_lock(nlk->cb_mutex);
450 if (nlk->cb) { 460 if (nlk->cb) {
451 if (nlk->cb->done) 461 if (nlk->cb->done)
452 nlk->cb->done(nlk->cb); 462 nlk->cb->done(nlk->cb);
453 netlink_destroy_callback(nlk->cb); 463 netlink_destroy_callback(nlk->cb);
454 nlk->cb = NULL; 464 nlk->cb = NULL;
455 } 465 }
456 spin_unlock(&nlk->cb_lock); 466 mutex_unlock(nlk->cb_mutex);
457 467
458 /* OK. Socket is unlinked, and, therefore, 468 /* OK. Socket is unlinked, and, therefore,
459 no new packets will arrive */ 469 no new packets will arrive */
@@ -1215,7 +1225,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
1215 copied = len; 1225 copied = len;
1216 } 1226 }
1217 1227
1218 skb->h.raw = skb->data; 1228 skb_reset_transport_header(skb);
1219 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); 1229 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1220 1230
1221 if (msg->msg_name) { 1231 if (msg->msg_name) {
@@ -1242,6 +1252,9 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
1242 1252
1243 scm_recv(sock, msg, siocb->scm, flags); 1253 scm_recv(sock, msg, siocb->scm, flags);
1244 1254
1255 if (flags & MSG_TRUNC)
1256 copied = skb->len;
1257
1245out: 1258out:
1246 netlink_rcv_wake(sk); 1259 netlink_rcv_wake(sk);
1247 return err ? : copied; 1260 return err ? : copied;
@@ -1265,7 +1278,7 @@ static void netlink_data_ready(struct sock *sk, int len)
1265struct sock * 1278struct sock *
1266netlink_kernel_create(int unit, unsigned int groups, 1279netlink_kernel_create(int unit, unsigned int groups,
1267 void (*input)(struct sock *sk, int len), 1280 void (*input)(struct sock *sk, int len),
1268 struct module *module) 1281 struct mutex *cb_mutex, struct module *module)
1269{ 1282{
1270 struct socket *sock; 1283 struct socket *sock;
1271 struct sock *sk; 1284 struct sock *sk;
@@ -1280,7 +1293,7 @@ netlink_kernel_create(int unit, unsigned int groups,
1280 if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock)) 1293 if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock))
1281 return NULL; 1294 return NULL;
1282 1295
1283 if (__netlink_create(sock, unit) < 0) 1296 if (__netlink_create(sock, cb_mutex, unit) < 0)
1284 goto out_sock_release; 1297 goto out_sock_release;
1285 1298
1286 if (groups < 32) 1299 if (groups < 32)
@@ -1304,6 +1317,7 @@ netlink_kernel_create(int unit, unsigned int groups,
1304 netlink_table_grab(); 1317 netlink_table_grab();
1305 nl_table[unit].groups = groups; 1318 nl_table[unit].groups = groups;
1306 nl_table[unit].listeners = listeners; 1319 nl_table[unit].listeners = listeners;
1320 nl_table[unit].cb_mutex = cb_mutex;
1307 nl_table[unit].module = module; 1321 nl_table[unit].module = module;
1308 nl_table[unit].registered = 1; 1322 nl_table[unit].registered = 1;
1309 netlink_table_ungrab(); 1323 netlink_table_ungrab();
@@ -1346,7 +1360,7 @@ static int netlink_dump(struct sock *sk)
1346 if (!skb) 1360 if (!skb)
1347 goto errout; 1361 goto errout;
1348 1362
1349 spin_lock(&nlk->cb_lock); 1363 mutex_lock(nlk->cb_mutex);
1350 1364
1351 cb = nlk->cb; 1365 cb = nlk->cb;
1352 if (cb == NULL) { 1366 if (cb == NULL) {
@@ -1357,7 +1371,7 @@ static int netlink_dump(struct sock *sk)
1357 len = cb->dump(skb, cb); 1371 len = cb->dump(skb, cb);
1358 1372
1359 if (len > 0) { 1373 if (len > 0) {
1360 spin_unlock(&nlk->cb_lock); 1374 mutex_unlock(nlk->cb_mutex);
1361 skb_queue_tail(&sk->sk_receive_queue, skb); 1375 skb_queue_tail(&sk->sk_receive_queue, skb);
1362 sk->sk_data_ready(sk, len); 1376 sk->sk_data_ready(sk, len);
1363 return 0; 1377 return 0;
@@ -1375,13 +1389,13 @@ static int netlink_dump(struct sock *sk)
1375 if (cb->done) 1389 if (cb->done)
1376 cb->done(cb); 1390 cb->done(cb);
1377 nlk->cb = NULL; 1391 nlk->cb = NULL;
1378 spin_unlock(&nlk->cb_lock); 1392 mutex_unlock(nlk->cb_mutex);
1379 1393
1380 netlink_destroy_callback(cb); 1394 netlink_destroy_callback(cb);
1381 return 0; 1395 return 0;
1382 1396
1383errout_skb: 1397errout_skb:
1384 spin_unlock(&nlk->cb_lock); 1398 mutex_unlock(nlk->cb_mutex);
1385 kfree_skb(skb); 1399 kfree_skb(skb);
1386errout: 1400errout:
1387 return err; 1401 return err;
@@ -1413,19 +1427,24 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
1413 } 1427 }
1414 nlk = nlk_sk(sk); 1428 nlk = nlk_sk(sk);
1415 /* A dump or destruction is in progress... */ 1429 /* A dump or destruction is in progress... */
1416 spin_lock(&nlk->cb_lock); 1430 mutex_lock(nlk->cb_mutex);
1417 if (nlk->cb || sock_flag(sk, SOCK_DEAD)) { 1431 if (nlk->cb || sock_flag(sk, SOCK_DEAD)) {
1418 spin_unlock(&nlk->cb_lock); 1432 mutex_unlock(nlk->cb_mutex);
1419 netlink_destroy_callback(cb); 1433 netlink_destroy_callback(cb);
1420 sock_put(sk); 1434 sock_put(sk);
1421 return -EBUSY; 1435 return -EBUSY;
1422 } 1436 }
1423 nlk->cb = cb; 1437 nlk->cb = cb;
1424 spin_unlock(&nlk->cb_lock); 1438 mutex_unlock(nlk->cb_mutex);
1425 1439
1426 netlink_dump(sk); 1440 netlink_dump(sk);
1427 sock_put(sk); 1441 sock_put(sk);
1428 return 0; 1442
1443 /* We successfully started a dump, by returning -EINTR we
1444 * signal the queue mangement to interrupt processing of
1445 * any netlink messages so userspace gets a chance to read
1446 * the results. */
1447 return -EINTR;
1429} 1448}
1430 1449
1431void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) 1450void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
@@ -1462,27 +1481,35 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
1462} 1481}
1463 1482
1464static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, 1483static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
1465 struct nlmsghdr *, int *)) 1484 struct nlmsghdr *))
1466{ 1485{
1467 struct nlmsghdr *nlh; 1486 struct nlmsghdr *nlh;
1468 int err; 1487 int err;
1469 1488
1470 while (skb->len >= nlmsg_total_size(0)) { 1489 while (skb->len >= nlmsg_total_size(0)) {
1471 nlh = (struct nlmsghdr *) skb->data; 1490 nlh = nlmsg_hdr(skb);
1491 err = 0;
1472 1492
1473 if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len) 1493 if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len)
1474 return 0; 1494 return 0;
1475 1495
1476 if (cb(skb, nlh, &err) < 0) { 1496 /* Only requests are handled by the kernel */
1477 /* Not an error, but we have to interrupt processing 1497 if (!(nlh->nlmsg_flags & NLM_F_REQUEST))
1478 * here. Note: that in this case we do not pull 1498 goto skip;
1479 * message from skb, it will be processed later. 1499
1480 */ 1500 /* Skip control messages */
1481 if (err == 0) 1501 if (nlh->nlmsg_type < NLMSG_MIN_TYPE)
1482 return -1; 1502 goto skip;
1503
1504 err = cb(skb, nlh);
1505 if (err == -EINTR) {
1506 /* Not an error, but we interrupt processing */
1507 netlink_queue_skip(nlh, skb);
1508 return err;
1509 }
1510skip:
1511 if (nlh->nlmsg_flags & NLM_F_ACK || err)
1483 netlink_ack(skb, nlh, err); 1512 netlink_ack(skb, nlh, err);
1484 } else if (nlh->nlmsg_flags & NLM_F_ACK)
1485 netlink_ack(skb, nlh, 0);
1486 1513
1487 netlink_queue_skip(nlh, skb); 1514 netlink_queue_skip(nlh, skb);
1488 } 1515 }
@@ -1504,9 +1531,14 @@ static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
1504 * 1531 *
1505 * qlen must be initialized to 0 before the initial entry, afterwards 1532 * qlen must be initialized to 0 before the initial entry, afterwards
1506 * the function may be called repeatedly until qlen reaches 0. 1533 * the function may be called repeatedly until qlen reaches 0.
1534 *
1535 * The callback function may return -EINTR to signal that processing
1536 * of netlink messages shall be interrupted. In this case the message
1537 * currently being processed will NOT be requeued onto the receive
1538 * queue.
1507 */ 1539 */
1508void netlink_run_queue(struct sock *sk, unsigned int *qlen, 1540void netlink_run_queue(struct sock *sk, unsigned int *qlen,
1509 int (*cb)(struct sk_buff *, struct nlmsghdr *, int *)) 1541 int (*cb)(struct sk_buff *, struct nlmsghdr *))
1510{ 1542{
1511 struct sk_buff *skb; 1543 struct sk_buff *skb;
1512 1544
@@ -1537,7 +1569,7 @@ void netlink_run_queue(struct sock *sk, unsigned int *qlen,
1537 * Pulls the given netlink message off the socket buffer so the next 1569 * Pulls the given netlink message off the socket buffer so the next
1538 * call to netlink_queue_run() will not reconsider the message. 1570 * call to netlink_queue_run() will not reconsider the message.
1539 */ 1571 */
1540void netlink_queue_skip(struct nlmsghdr *nlh, struct sk_buff *skb) 1572static void netlink_queue_skip(struct nlmsghdr *nlh, struct sk_buff *skb)
1541{ 1573{
1542 int msglen = NLMSG_ALIGN(nlh->nlmsg_len); 1574 int msglen = NLMSG_ALIGN(nlh->nlmsg_len);
1543 1575
@@ -1820,12 +1852,10 @@ core_initcall(netlink_proto_init);
1820 1852
1821EXPORT_SYMBOL(netlink_ack); 1853EXPORT_SYMBOL(netlink_ack);
1822EXPORT_SYMBOL(netlink_run_queue); 1854EXPORT_SYMBOL(netlink_run_queue);
1823EXPORT_SYMBOL(netlink_queue_skip);
1824EXPORT_SYMBOL(netlink_broadcast); 1855EXPORT_SYMBOL(netlink_broadcast);
1825EXPORT_SYMBOL(netlink_dump_start); 1856EXPORT_SYMBOL(netlink_dump_start);
1826EXPORT_SYMBOL(netlink_kernel_create); 1857EXPORT_SYMBOL(netlink_kernel_create);
1827EXPORT_SYMBOL(netlink_register_notifier); 1858EXPORT_SYMBOL(netlink_register_notifier);
1828EXPORT_SYMBOL(netlink_set_err);
1829EXPORT_SYMBOL(netlink_set_nonroot); 1859EXPORT_SYMBOL(netlink_set_nonroot);
1830EXPORT_SYMBOL(netlink_unicast); 1860EXPORT_SYMBOL(netlink_unicast);
1831EXPORT_SYMBOL(netlink_unregister_notifier); 1861EXPORT_SYMBOL(netlink_unregister_notifier);
diff --git a/net/netlink/attr.c b/net/netlink/attr.c
index 004139557e09..df5f820a4c32 100644
--- a/net/netlink/attr.c
+++ b/net/netlink/attr.c
@@ -67,6 +67,11 @@ static int validate_nla(struct nlattr *nla, int maxtype,
67 } 67 }
68 break; 68 break;
69 69
70 case NLA_BINARY:
71 if (pt->len && attrlen > pt->len)
72 return -ERANGE;
73 break;
74
70 default: 75 default:
71 if (pt->len) 76 if (pt->len)
72 minlen = pt->len; 77 minlen = pt->len;
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index c2996794eb25..6e31234a4196 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -295,66 +295,46 @@ int genl_unregister_family(struct genl_family *family)
295 return -ENOENT; 295 return -ENOENT;
296} 296}
297 297
298static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, 298static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
299 int *errp)
300{ 299{
301 struct genl_ops *ops; 300 struct genl_ops *ops;
302 struct genl_family *family; 301 struct genl_family *family;
303 struct genl_info info; 302 struct genl_info info;
304 struct genlmsghdr *hdr = nlmsg_data(nlh); 303 struct genlmsghdr *hdr = nlmsg_data(nlh);
305 int hdrlen, err = -EINVAL; 304 int hdrlen, err;
306
307 if (!(nlh->nlmsg_flags & NLM_F_REQUEST))
308 goto ignore;
309
310 if (nlh->nlmsg_type < NLMSG_MIN_TYPE)
311 goto ignore;
312 305
313 family = genl_family_find_byid(nlh->nlmsg_type); 306 family = genl_family_find_byid(nlh->nlmsg_type);
314 if (family == NULL) { 307 if (family == NULL)
315 err = -ENOENT; 308 return -ENOENT;
316 goto errout;
317 }
318 309
319 hdrlen = GENL_HDRLEN + family->hdrsize; 310 hdrlen = GENL_HDRLEN + family->hdrsize;
320 if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen)) 311 if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen))
321 goto errout; 312 return -EINVAL;
322 313
323 ops = genl_get_cmd(hdr->cmd, family); 314 ops = genl_get_cmd(hdr->cmd, family);
324 if (ops == NULL) { 315 if (ops == NULL)
325 err = -EOPNOTSUPP; 316 return -EOPNOTSUPP;
326 goto errout;
327 }
328 317
329 if ((ops->flags & GENL_ADMIN_PERM) && security_netlink_recv(skb, CAP_NET_ADMIN)) { 318 if ((ops->flags & GENL_ADMIN_PERM) &&
330 err = -EPERM; 319 security_netlink_recv(skb, CAP_NET_ADMIN))
331 goto errout; 320 return -EPERM;
332 }
333 321
334 if (nlh->nlmsg_flags & NLM_F_DUMP) { 322 if (nlh->nlmsg_flags & NLM_F_DUMP) {
335 if (ops->dumpit == NULL) { 323 if (ops->dumpit == NULL)
336 err = -EOPNOTSUPP; 324 return -EOPNOTSUPP;
337 goto errout;
338 }
339 325
340 *errp = err = netlink_dump_start(genl_sock, skb, nlh, 326 return netlink_dump_start(genl_sock, skb, nlh,
341 ops->dumpit, ops->done); 327 ops->dumpit, ops->done);
342 if (err == 0)
343 skb_pull(skb, min(NLMSG_ALIGN(nlh->nlmsg_len),
344 skb->len));
345 return -1;
346 } 328 }
347 329
348 if (ops->doit == NULL) { 330 if (ops->doit == NULL)
349 err = -EOPNOTSUPP; 331 return -EOPNOTSUPP;
350 goto errout;
351 }
352 332
353 if (family->attrbuf) { 333 if (family->attrbuf) {
354 err = nlmsg_parse(nlh, hdrlen, family->attrbuf, family->maxattr, 334 err = nlmsg_parse(nlh, hdrlen, family->attrbuf, family->maxattr,
355 ops->policy); 335 ops->policy);
356 if (err < 0) 336 if (err < 0)
357 goto errout; 337 return err;
358 } 338 }
359 339
360 info.snd_seq = nlh->nlmsg_seq; 340 info.snd_seq = nlh->nlmsg_seq;
@@ -364,15 +344,7 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
364 info.userhdr = nlmsg_data(nlh) + GENL_HDRLEN; 344 info.userhdr = nlmsg_data(nlh) + GENL_HDRLEN;
365 info.attrs = family->attrbuf; 345 info.attrs = family->attrbuf;
366 346
367 *errp = err = ops->doit(skb, &info); 347 return ops->doit(skb, &info);
368 return err;
369
370ignore:
371 return 0;
372
373errout:
374 *errp = err;
375 return -1;
376} 348}
377 349
378static void genl_rcv(struct sock *sk, int len) 350static void genl_rcv(struct sock *sk, int len)
@@ -586,7 +558,7 @@ static int __init genl_init(void)
586 558
587 netlink_set_nonroot(NETLINK_GENERIC, NL_NONROOT_RECV); 559 netlink_set_nonroot(NETLINK_GENERIC, NL_NONROOT_RECV);
588 genl_sock = netlink_kernel_create(NETLINK_GENERIC, GENL_MAX_ID, 560 genl_sock = netlink_kernel_create(NETLINK_GENERIC, GENL_MAX_ID,
589 genl_rcv, THIS_MODULE); 561 genl_rcv, NULL, THIS_MODULE);
590 if (genl_sock == NULL) 562 if (genl_sock == NULL)
591 panic("GENL: Cannot initialize generic netlink\n"); 563 panic("GENL: Cannot initialize generic netlink\n");
592 564
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index bf9837dd95c4..5d4a26c2aa0c 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -625,42 +625,42 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr,
625 ax25_address *source = NULL; 625 ax25_address *source = NULL;
626 ax25_uid_assoc *user; 626 ax25_uid_assoc *user;
627 struct net_device *dev; 627 struct net_device *dev;
628 int err = 0;
628 629
629 lock_sock(sk); 630 lock_sock(sk);
630 if (sk->sk_state == TCP_ESTABLISHED && sock->state == SS_CONNECTING) { 631 if (sk->sk_state == TCP_ESTABLISHED && sock->state == SS_CONNECTING) {
631 sock->state = SS_CONNECTED; 632 sock->state = SS_CONNECTED;
632 release_sock(sk); 633 goto out_release; /* Connect completed during a ERESTARTSYS event */
633 return 0; /* Connect completed during a ERESTARTSYS event */
634 } 634 }
635 635
636 if (sk->sk_state == TCP_CLOSE && sock->state == SS_CONNECTING) { 636 if (sk->sk_state == TCP_CLOSE && sock->state == SS_CONNECTING) {
637 sock->state = SS_UNCONNECTED; 637 sock->state = SS_UNCONNECTED;
638 release_sock(sk); 638 err = -ECONNREFUSED;
639 return -ECONNREFUSED; 639 goto out_release;
640 } 640 }
641 641
642 if (sk->sk_state == TCP_ESTABLISHED) { 642 if (sk->sk_state == TCP_ESTABLISHED) {
643 release_sock(sk); 643 err = -EISCONN; /* No reconnect on a seqpacket socket */
644 return -EISCONN; /* No reconnect on a seqpacket socket */ 644 goto out_release;
645 } 645 }
646 646
647 sk->sk_state = TCP_CLOSE; 647 sk->sk_state = TCP_CLOSE;
648 sock->state = SS_UNCONNECTED; 648 sock->state = SS_UNCONNECTED;
649 649
650 if (addr_len != sizeof(struct sockaddr_ax25) && addr_len != sizeof(struct full_sockaddr_ax25)) { 650 if (addr_len != sizeof(struct sockaddr_ax25) && addr_len != sizeof(struct full_sockaddr_ax25)) {
651 release_sock(sk); 651 err = -EINVAL;
652 return -EINVAL; 652 goto out_release;
653 } 653 }
654 if (addr->sax25_family != AF_NETROM) { 654 if (addr->sax25_family != AF_NETROM) {
655 release_sock(sk); 655 err = -EINVAL;
656 return -EINVAL; 656 goto out_release;
657 } 657 }
658 if (sock_flag(sk, SOCK_ZAPPED)) { /* Must bind first - autobinding in this may or may not work */ 658 if (sock_flag(sk, SOCK_ZAPPED)) { /* Must bind first - autobinding in this may or may not work */
659 sock_reset_flag(sk, SOCK_ZAPPED); 659 sock_reset_flag(sk, SOCK_ZAPPED);
660 660
661 if ((dev = nr_dev_first()) == NULL) { 661 if ((dev = nr_dev_first()) == NULL) {
662 release_sock(sk); 662 err = -ENETUNREACH;
663 return -ENETUNREACH; 663 goto out_release;
664 } 664 }
665 source = (ax25_address *)dev->dev_addr; 665 source = (ax25_address *)dev->dev_addr;
666 666
@@ -671,8 +671,8 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr,
671 } else { 671 } else {
672 if (ax25_uid_policy && !capable(CAP_NET_ADMIN)) { 672 if (ax25_uid_policy && !capable(CAP_NET_ADMIN)) {
673 dev_put(dev); 673 dev_put(dev);
674 release_sock(sk); 674 err = -EPERM;
675 return -EPERM; 675 goto out_release;
676 } 676 }
677 nr->user_addr = *source; 677 nr->user_addr = *source;
678 } 678 }
@@ -707,8 +707,8 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr,
707 707
708 /* Now the loop */ 708 /* Now the loop */
709 if (sk->sk_state != TCP_ESTABLISHED && (flags & O_NONBLOCK)) { 709 if (sk->sk_state != TCP_ESTABLISHED && (flags & O_NONBLOCK)) {
710 release_sock(sk); 710 err = -EINPROGRESS;
711 return -EINPROGRESS; 711 goto out_release;
712 } 712 }
713 713
714 /* 714 /*
@@ -716,46 +716,46 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr,
716 * closed. 716 * closed.
717 */ 717 */
718 if (sk->sk_state == TCP_SYN_SENT) { 718 if (sk->sk_state == TCP_SYN_SENT) {
719 struct task_struct *tsk = current; 719 DEFINE_WAIT(wait);
720 DECLARE_WAITQUEUE(wait, tsk);
721 720
722 add_wait_queue(sk->sk_sleep, &wait);
723 for (;;) { 721 for (;;) {
724 set_current_state(TASK_INTERRUPTIBLE); 722 prepare_to_wait(sk->sk_sleep, &wait,
723 TASK_INTERRUPTIBLE);
725 if (sk->sk_state != TCP_SYN_SENT) 724 if (sk->sk_state != TCP_SYN_SENT)
726 break; 725 break;
727 release_sock(sk); 726 if (!signal_pending(current)) {
728 if (!signal_pending(tsk)) { 727 release_sock(sk);
729 schedule(); 728 schedule();
730 lock_sock(sk); 729 lock_sock(sk);
731 continue; 730 continue;
732 } 731 }
733 current->state = TASK_RUNNING; 732 err = -ERESTARTSYS;
734 remove_wait_queue(sk->sk_sleep, &wait); 733 break;
735 return -ERESTARTSYS;
736 } 734 }
737 current->state = TASK_RUNNING; 735 finish_wait(sk->sk_sleep, &wait);
738 remove_wait_queue(sk->sk_sleep, &wait); 736 if (err)
737 goto out_release;
739 } 738 }
740 739
741 if (sk->sk_state != TCP_ESTABLISHED) { 740 if (sk->sk_state != TCP_ESTABLISHED) {
742 sock->state = SS_UNCONNECTED; 741 sock->state = SS_UNCONNECTED;
743 release_sock(sk); 742 err = sock_error(sk); /* Always set at this point */
744 return sock_error(sk); /* Always set at this point */ 743 goto out_release;
745 } 744 }
746 745
747 sock->state = SS_CONNECTED; 746 sock->state = SS_CONNECTED;
747
748out_release:
748 release_sock(sk); 749 release_sock(sk);
749 750
750 return 0; 751 return err;
751} 752}
752 753
753static int nr_accept(struct socket *sock, struct socket *newsock, int flags) 754static int nr_accept(struct socket *sock, struct socket *newsock, int flags)
754{ 755{
755 struct task_struct *tsk = current;
756 DECLARE_WAITQUEUE(wait, tsk);
757 struct sk_buff *skb; 756 struct sk_buff *skb;
758 struct sock *newsk; 757 struct sock *newsk;
758 DEFINE_WAIT(wait);
759 struct sock *sk; 759 struct sock *sk;
760 int err = 0; 760 int err = 0;
761 761
@@ -765,42 +765,40 @@ static int nr_accept(struct socket *sock, struct socket *newsock, int flags)
765 lock_sock(sk); 765 lock_sock(sk);
766 if (sk->sk_type != SOCK_SEQPACKET) { 766 if (sk->sk_type != SOCK_SEQPACKET) {
767 err = -EOPNOTSUPP; 767 err = -EOPNOTSUPP;
768 goto out; 768 goto out_release;
769 } 769 }
770 770
771 if (sk->sk_state != TCP_LISTEN) { 771 if (sk->sk_state != TCP_LISTEN) {
772 err = -EINVAL; 772 err = -EINVAL;
773 goto out; 773 goto out_release;
774 } 774 }
775 775
776 /* 776 /*
777 * The write queue this time is holding sockets ready to use 777 * The write queue this time is holding sockets ready to use
778 * hooked into the SABM we saved 778 * hooked into the SABM we saved
779 */ 779 */
780 add_wait_queue(sk->sk_sleep, &wait);
781 for (;;) { 780 for (;;) {
781 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
782 skb = skb_dequeue(&sk->sk_receive_queue); 782 skb = skb_dequeue(&sk->sk_receive_queue);
783 if (skb) 783 if (skb)
784 break; 784 break;
785 785
786 current->state = TASK_INTERRUPTIBLE;
787 release_sock(sk);
788 if (flags & O_NONBLOCK) { 786 if (flags & O_NONBLOCK) {
789 current->state = TASK_RUNNING; 787 err = -EWOULDBLOCK;
790 remove_wait_queue(sk->sk_sleep, &wait); 788 break;
791 return -EWOULDBLOCK;
792 } 789 }
793 if (!signal_pending(tsk)) { 790 if (!signal_pending(current)) {
791 release_sock(sk);
794 schedule(); 792 schedule();
795 lock_sock(sk); 793 lock_sock(sk);
796 continue; 794 continue;
797 } 795 }
798 current->state = TASK_RUNNING; 796 err = -ERESTARTSYS;
799 remove_wait_queue(sk->sk_sleep, &wait); 797 break;
800 return -ERESTARTSYS;
801 } 798 }
802 current->state = TASK_RUNNING; 799 finish_wait(sk->sk_sleep, &wait);
803 remove_wait_queue(sk->sk_sleep, &wait); 800 if (err)
801 goto out_release;
804 802
805 newsk = skb->sk; 803 newsk = skb->sk;
806 newsk->sk_socket = newsock; 804 newsk->sk_socket = newsock;
@@ -811,8 +809,9 @@ static int nr_accept(struct socket *sock, struct socket *newsock, int flags)
811 sk_acceptq_removed(sk); 809 sk_acceptq_removed(sk);
812 newsock->sk = newsk; 810 newsock->sk = newsk;
813 811
814out: 812out_release:
815 release_sock(sk); 813 release_sock(sk);
814
816 return err; 815 return err;
817} 816}
818 817
@@ -878,7 +877,7 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev)
878 if (frametype == NR_PROTOEXT && 877 if (frametype == NR_PROTOEXT &&
879 circuit_index == NR_PROTO_IP && circuit_id == NR_PROTO_IP) { 878 circuit_index == NR_PROTO_IP && circuit_id == NR_PROTO_IP) {
880 skb_pull(skb, NR_NETWORK_LEN + NR_TRANSPORT_LEN); 879 skb_pull(skb, NR_NETWORK_LEN + NR_TRANSPORT_LEN);
881 skb->h.raw = skb->data; 880 skb_reset_transport_header(skb);
882 881
883 return nr_rx_ip(skb, dev); 882 return nr_rx_ip(skb, dev);
884 } 883 }
@@ -904,7 +903,7 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev)
904 } 903 }
905 904
906 if (sk != NULL) { 905 if (sk != NULL) {
907 skb->h.raw = skb->data; 906 skb_reset_transport_header(skb);
908 907
909 if (frametype == NR_CONNACK && skb->len == 22) 908 if (frametype == NR_CONNACK && skb->len == 22)
910 nr_sk(sk)->bpqext = 1; 909 nr_sk(sk)->bpqext = 1;
@@ -1074,6 +1073,7 @@ static int nr_sendmsg(struct kiocb *iocb, struct socket *sock,
1074 goto out; 1073 goto out;
1075 1074
1076 skb_reserve(skb, size - len); 1075 skb_reserve(skb, size - len);
1076 skb_reset_transport_header(skb);
1077 1077
1078 /* 1078 /*
1079 * Push down the NET/ROM header 1079 * Push down the NET/ROM header
@@ -1094,14 +1094,12 @@ static int nr_sendmsg(struct kiocb *iocb, struct socket *sock,
1094 /* 1094 /*
1095 * Put the data on the end 1095 * Put the data on the end
1096 */ 1096 */
1097 skb_put(skb, len);
1097 1098
1098 skb->h.raw = skb_put(skb, len);
1099
1100 asmptr = skb->h.raw;
1101 SOCK_DEBUG(sk, "NET/ROM: Appending user data\n"); 1099 SOCK_DEBUG(sk, "NET/ROM: Appending user data\n");
1102 1100
1103 /* User data follows immediately after the NET/ROM transport header */ 1101 /* User data follows immediately after the NET/ROM transport header */
1104 if (memcpy_fromiovec(asmptr, msg->msg_iov, len)) { 1102 if (memcpy_fromiovec(skb_transport_header(skb), msg->msg_iov, len)) {
1105 kfree_skb(skb); 1103 kfree_skb(skb);
1106 err = -EFAULT; 1104 err = -EFAULT;
1107 goto out; 1105 goto out;
@@ -1149,7 +1147,7 @@ static int nr_recvmsg(struct kiocb *iocb, struct socket *sock,
1149 return er; 1147 return er;
1150 } 1148 }
1151 1149
1152 skb->h.raw = skb->data; 1150 skb_reset_transport_header(skb);
1153 copied = skb->len; 1151 copied = skb->len;
1154 1152
1155 if (copied > size) { 1153 if (copied > size) {
@@ -1161,7 +1159,8 @@ static int nr_recvmsg(struct kiocb *iocb, struct socket *sock,
1161 1159
1162 if (sax != NULL) { 1160 if (sax != NULL) {
1163 sax->sax25_family = AF_NETROM; 1161 sax->sax25_family = AF_NETROM;
1164 memcpy(sax->sax25_call.ax25_call, skb->data + 7, AX25_ADDR_LEN); 1162 skb_copy_from_linear_data_offset(skb, 7, sax->sax25_call.ax25_call,
1163 AX25_ADDR_LEN);
1165 } 1164 }
1166 1165
1167 msg->msg_namelen = sizeof(*sax); 1166 msg->msg_namelen = sizeof(*sax);
@@ -1209,6 +1208,12 @@ static int nr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1209 release_sock(sk); 1208 release_sock(sk);
1210 return ret; 1209 return ret;
1211 1210
1211 case SIOCGSTAMPNS:
1212 lock_sock(sk);
1213 ret = sock_get_timestampns(sk, argp);
1214 release_sock(sk);
1215 return ret;
1216
1212 case SIOCGIFADDR: 1217 case SIOCGIFADDR:
1213 case SIOCSIFADDR: 1218 case SIOCSIFADDR:
1214 case SIOCGIFDSTADDR: 1219 case SIOCGIFDSTADDR:
diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c
index 9a97ed6e6910..c7b5d930e732 100644
--- a/net/netrom/nr_dev.c
+++ b/net/netrom/nr_dev.c
@@ -56,8 +56,8 @@ int nr_rx_ip(struct sk_buff *skb, struct net_device *dev)
56 56
57 /* Spoof incoming device */ 57 /* Spoof incoming device */
58 skb->dev = dev; 58 skb->dev = dev;
59 skb->mac.raw = skb->nh.raw; 59 skb_reset_mac_header(skb);
60 skb->nh.raw = skb->data; 60 skb_reset_network_header(skb);
61 skb->pkt_type = PACKET_HOST; 61 skb->pkt_type = PACKET_HOST;
62 62
63 netif_rx(skb); 63 netif_rx(skb);
diff --git a/net/netrom/nr_in.c b/net/netrom/nr_in.c
index 5560acbaaa95..68176483617f 100644
--- a/net/netrom/nr_in.c
+++ b/net/netrom/nr_in.c
@@ -51,10 +51,12 @@ static int nr_queue_rx_frame(struct sock *sk, struct sk_buff *skb, int more)
51 if ((skbn = alloc_skb(nr->fraglen, GFP_ATOMIC)) == NULL) 51 if ((skbn = alloc_skb(nr->fraglen, GFP_ATOMIC)) == NULL)
52 return 1; 52 return 1;
53 53
54 skbn->h.raw = skbn->data; 54 skb_reset_transport_header(skbn);
55 55
56 while ((skbo = skb_dequeue(&nr->frag_queue)) != NULL) { 56 while ((skbo = skb_dequeue(&nr->frag_queue)) != NULL) {
57 memcpy(skb_put(skbn, skbo->len), skbo->data, skbo->len); 57 skb_copy_from_linear_data(skbo,
58 skb_put(skbn, skbo->len),
59 skbo->len);
58 kfree_skb(skbo); 60 kfree_skb(skbo);
59 } 61 }
60 62
diff --git a/net/netrom/nr_loopback.c b/net/netrom/nr_loopback.c
index e856ae1b360a..f324d5df4186 100644
--- a/net/netrom/nr_loopback.c
+++ b/net/netrom/nr_loopback.c
@@ -34,8 +34,8 @@ int nr_loopback_queue(struct sk_buff *skb)
34 struct sk_buff *skbn; 34 struct sk_buff *skbn;
35 35
36 if ((skbn = alloc_skb(skb->len, GFP_ATOMIC)) != NULL) { 36 if ((skbn = alloc_skb(skb->len, GFP_ATOMIC)) != NULL) {
37 memcpy(skb_put(skbn, skb->len), skb->data, skb->len); 37 skb_copy_from_linear_data(skb, skb_put(skbn, skb->len), skb->len);
38 skbn->h.raw = skbn->data; 38 skb_reset_transport_header(skbn);
39 39
40 skb_queue_tail(&loopback_queue, skbn); 40 skb_queue_tail(&loopback_queue, skbn);
41 41
diff --git a/net/netrom/nr_out.c b/net/netrom/nr_out.c
index 0cbfb611465b..e3e6c44e1890 100644
--- a/net/netrom/nr_out.c
+++ b/net/netrom/nr_out.c
@@ -40,7 +40,7 @@ void nr_output(struct sock *sk, struct sk_buff *skb)
40 40
41 if (skb->len - NR_TRANSPORT_LEN > NR_MAX_PACKET_SIZE) { 41 if (skb->len - NR_TRANSPORT_LEN > NR_MAX_PACKET_SIZE) {
42 /* Save a copy of the Transport Header */ 42 /* Save a copy of the Transport Header */
43 memcpy(transport, skb->data, NR_TRANSPORT_LEN); 43 skb_copy_from_linear_data(skb, transport, NR_TRANSPORT_LEN);
44 skb_pull(skb, NR_TRANSPORT_LEN); 44 skb_pull(skb, NR_TRANSPORT_LEN);
45 45
46 frontlen = skb_headroom(skb); 46 frontlen = skb_headroom(skb);
@@ -54,13 +54,13 @@ void nr_output(struct sock *sk, struct sk_buff *skb)
54 len = (NR_MAX_PACKET_SIZE > skb->len) ? skb->len : NR_MAX_PACKET_SIZE; 54 len = (NR_MAX_PACKET_SIZE > skb->len) ? skb->len : NR_MAX_PACKET_SIZE;
55 55
56 /* Copy the user data */ 56 /* Copy the user data */
57 memcpy(skb_put(skbn, len), skb->data, len); 57 skb_copy_from_linear_data(skb, skb_put(skbn, len), len);
58 skb_pull(skb, len); 58 skb_pull(skb, len);
59 59
60 /* Duplicate the Transport Header */ 60 /* Duplicate the Transport Header */
61 skb_push(skbn, NR_TRANSPORT_LEN); 61 skb_push(skbn, NR_TRANSPORT_LEN);
62 memcpy(skbn->data, transport, NR_TRANSPORT_LEN); 62 skb_copy_to_linear_data(skbn, transport,
63 63 NR_TRANSPORT_LEN);
64 if (skb->len > 0) 64 if (skb->len > 0)
65 skbn->data[4] |= NR_MORE_FLAG; 65 skbn->data[4] |= NR_MORE_FLAG;
66 66
diff --git a/net/netrom/nr_subr.c b/net/netrom/nr_subr.c
index 07b694d18870..04e7d0d2fd8f 100644
--- a/net/netrom/nr_subr.c
+++ b/net/netrom/nr_subr.c
@@ -226,13 +226,13 @@ void __nr_transmit_reply(struct sk_buff *skb, int mine, unsigned char cmdflags)
226 226
227 dptr = skb_put(skbn, NR_NETWORK_LEN + NR_TRANSPORT_LEN); 227 dptr = skb_put(skbn, NR_NETWORK_LEN + NR_TRANSPORT_LEN);
228 228
229 memcpy(dptr, skb->data + 7, AX25_ADDR_LEN); 229 skb_copy_from_linear_data_offset(skb, 7, dptr, AX25_ADDR_LEN);
230 dptr[6] &= ~AX25_CBIT; 230 dptr[6] &= ~AX25_CBIT;
231 dptr[6] &= ~AX25_EBIT; 231 dptr[6] &= ~AX25_EBIT;
232 dptr[6] |= AX25_SSSID_SPARE; 232 dptr[6] |= AX25_SSSID_SPARE;
233 dptr += AX25_ADDR_LEN; 233 dptr += AX25_ADDR_LEN;
234 234
235 memcpy(dptr, skb->data + 0, AX25_ADDR_LEN); 235 skb_copy_from_linear_data(skb, dptr, AX25_ADDR_LEN);
236 dptr[6] &= ~AX25_CBIT; 236 dptr[6] &= ~AX25_CBIT;
237 dptr[6] |= AX25_EBIT; 237 dptr[6] |= AX25_EBIT;
238 dptr[6] |= AX25_SSSID_SPARE; 238 dptr[6] |= AX25_SSSID_SPARE;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 28d47e8f2873..02e401cd683f 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -114,22 +114,22 @@ On receive:
114----------- 114-----------
115 115
116Incoming, dev->hard_header!=NULL 116Incoming, dev->hard_header!=NULL
117 mac.raw -> ll header 117 mac_header -> ll header
118 data -> data 118 data -> data
119 119
120Outgoing, dev->hard_header!=NULL 120Outgoing, dev->hard_header!=NULL
121 mac.raw -> ll header 121 mac_header -> ll header
122 data -> ll header 122 data -> ll header
123 123
124Incoming, dev->hard_header==NULL 124Incoming, dev->hard_header==NULL
125 mac.raw -> UNKNOWN position. It is very likely, that it points to ll header. 125 mac_header -> UNKNOWN position. It is very likely, that it points to ll
126 PPP makes it, that is wrong, because introduce assymetry 126 header. PPP makes it, that is wrong, because introduce
127 between rx and tx paths. 127 assymetry between rx and tx paths.
128 data -> data 128 data -> data
129 129
130Outgoing, dev->hard_header==NULL 130Outgoing, dev->hard_header==NULL
131 mac.raw -> data. ll header is still not built! 131 mac_header -> data. ll header is still not built!
132 data -> data 132 data -> data
133 133
134Resume 134Resume
135 If dev->hard_header==NULL we are unlikely to restore sensible ll header. 135 If dev->hard_header==NULL we are unlikely to restore sensible ll header.
@@ -139,12 +139,12 @@ On transmit:
139------------ 139------------
140 140
141dev->hard_header != NULL 141dev->hard_header != NULL
142 mac.raw -> ll header 142 mac_header -> ll header
143 data -> ll header 143 data -> ll header
144 144
145dev->hard_header == NULL (ll header is added by device, we cannot control it) 145dev->hard_header == NULL (ll header is added by device, we cannot control it)
146 mac.raw -> data 146 mac_header -> data
147 data -> data 147 data -> data
148 148
149 We should set nh.raw on output to correct posistion, 149 We should set nh.raw on output to correct posistion,
150 packet classifier depends on it. 150 packet classifier depends on it.
@@ -201,7 +201,8 @@ struct packet_sock {
201 struct packet_type prot_hook; 201 struct packet_type prot_hook;
202 spinlock_t bind_lock; 202 spinlock_t bind_lock;
203 unsigned int running:1, /* prot_hook is attached*/ 203 unsigned int running:1, /* prot_hook is attached*/
204 auxdata:1; 204 auxdata:1,
205 origdev:1;
205 int ifindex; /* bound device */ 206 int ifindex; /* bound device */
206 __be16 num; 207 __be16 num;
207#ifdef CONFIG_PACKET_MULTICAST 208#ifdef CONFIG_PACKET_MULTICAST
@@ -284,7 +285,7 @@ static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct
284 * Incoming packets have ll header pulled, 285 * Incoming packets have ll header pulled,
285 * push it back. 286 * push it back.
286 * 287 *
287 * For outgoing ones skb->data == skb->mac.raw 288 * For outgoing ones skb->data == skb_mac_header(skb)
288 * so that this procedure is noop. 289 * so that this procedure is noop.
289 */ 290 */
290 291
@@ -303,7 +304,7 @@ static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct
303 304
304 spkt = &PACKET_SKB_CB(skb)->sa.pkt; 305 spkt = &PACKET_SKB_CB(skb)->sa.pkt;
305 306
306 skb_push(skb, skb->data-skb->mac.raw); 307 skb_push(skb, skb->data - skb_mac_header(skb));
307 308
308 /* 309 /*
309 * The SOCK_PACKET socket receives _all_ frames. 310 * The SOCK_PACKET socket receives _all_ frames.
@@ -401,14 +402,14 @@ static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
401 * notable one here. This should really be fixed at the driver level. 402 * notable one here. This should really be fixed at the driver level.
402 */ 403 */
403 skb_reserve(skb, LL_RESERVED_SPACE(dev)); 404 skb_reserve(skb, LL_RESERVED_SPACE(dev));
404 skb->nh.raw = skb->data; 405 skb_reset_network_header(skb);
405 406
406 /* Try to align data part correctly */ 407 /* Try to align data part correctly */
407 if (dev->hard_header) { 408 if (dev->hard_header) {
408 skb->data -= dev->hard_header_len; 409 skb->data -= dev->hard_header_len;
409 skb->tail -= dev->hard_header_len; 410 skb->tail -= dev->hard_header_len;
410 if (len < dev->hard_header_len) 411 if (len < dev->hard_header_len)
411 skb->nh.raw = skb->data; 412 skb_reset_network_header(skb);
412 } 413 }
413 414
414 /* Returns -EFAULT on error */ 415 /* Returns -EFAULT on error */
@@ -488,10 +489,10 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet
488 never delivered to user. 489 never delivered to user.
489 */ 490 */
490 if (sk->sk_type != SOCK_DGRAM) 491 if (sk->sk_type != SOCK_DGRAM)
491 skb_push(skb, skb->data - skb->mac.raw); 492 skb_push(skb, skb->data - skb_mac_header(skb));
492 else if (skb->pkt_type == PACKET_OUTGOING) { 493 else if (skb->pkt_type == PACKET_OUTGOING) {
493 /* Special case: outgoing packets have ll header at head */ 494 /* Special case: outgoing packets have ll header at head */
494 skb_pull(skb, skb->nh.raw - skb->data); 495 skb_pull(skb, skb_network_offset(skb));
495 } 496 }
496 } 497 }
497 498
@@ -528,7 +529,10 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet
528 sll->sll_hatype = dev->type; 529 sll->sll_hatype = dev->type;
529 sll->sll_protocol = skb->protocol; 530 sll->sll_protocol = skb->protocol;
530 sll->sll_pkttype = skb->pkt_type; 531 sll->sll_pkttype = skb->pkt_type;
531 sll->sll_ifindex = dev->ifindex; 532 if (unlikely(po->origdev) && skb->pkt_type == PACKET_HOST)
533 sll->sll_ifindex = orig_dev->ifindex;
534 else
535 sll->sll_ifindex = dev->ifindex;
532 sll->sll_halen = 0; 536 sll->sll_halen = 0;
533 537
534 if (dev->hard_header_parse) 538 if (dev->hard_header_parse)
@@ -582,6 +586,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
582 unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER; 586 unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
583 unsigned short macoff, netoff; 587 unsigned short macoff, netoff;
584 struct sk_buff *copy_skb = NULL; 588 struct sk_buff *copy_skb = NULL;
589 struct timeval tv;
585 590
586 if (skb->pkt_type == PACKET_LOOPBACK) 591 if (skb->pkt_type == PACKET_LOOPBACK)
587 goto drop; 592 goto drop;
@@ -591,10 +596,10 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
591 596
592 if (dev->hard_header) { 597 if (dev->hard_header) {
593 if (sk->sk_type != SOCK_DGRAM) 598 if (sk->sk_type != SOCK_DGRAM)
594 skb_push(skb, skb->data - skb->mac.raw); 599 skb_push(skb, skb->data - skb_mac_header(skb));
595 else if (skb->pkt_type == PACKET_OUTGOING) { 600 else if (skb->pkt_type == PACKET_OUTGOING) {
596 /* Special case: outgoing packets have ll header at head */ 601 /* Special case: outgoing packets have ll header at head */
597 skb_pull(skb, skb->nh.raw - skb->data); 602 skb_pull(skb, skb_network_offset(skb));
598 } 603 }
599 } 604 }
600 605
@@ -612,7 +617,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
612 if (sk->sk_type == SOCK_DGRAM) { 617 if (sk->sk_type == SOCK_DGRAM) {
613 macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16; 618 macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16;
614 } else { 619 } else {
615 unsigned maclen = skb->nh.raw - skb->data; 620 unsigned maclen = skb_network_offset(skb);
616 netoff = TPACKET_ALIGN(TPACKET_HDRLEN + (maclen < 16 ? 16 : maclen)); 621 netoff = TPACKET_ALIGN(TPACKET_HDRLEN + (maclen < 16 ? 16 : maclen));
617 macoff = netoff - maclen; 622 macoff = netoff - maclen;
618 } 623 }
@@ -656,12 +661,13 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
656 h->tp_snaplen = snaplen; 661 h->tp_snaplen = snaplen;
657 h->tp_mac = macoff; 662 h->tp_mac = macoff;
658 h->tp_net = netoff; 663 h->tp_net = netoff;
659 if (skb->tstamp.off_sec == 0) { 664 if (skb->tstamp.tv64 == 0) {
660 __net_timestamp(skb); 665 __net_timestamp(skb);
661 sock_enable_timestamp(sk); 666 sock_enable_timestamp(sk);
662 } 667 }
663 h->tp_sec = skb->tstamp.off_sec; 668 tv = ktime_to_timeval(skb->tstamp);
664 h->tp_usec = skb->tstamp.off_usec; 669 h->tp_sec = tv.tv_sec;
670 h->tp_usec = tv.tv_usec;
665 671
666 sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h))); 672 sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h)));
667 sll->sll_halen = 0; 673 sll->sll_halen = 0;
@@ -671,7 +677,10 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
671 sll->sll_hatype = dev->type; 677 sll->sll_hatype = dev->type;
672 sll->sll_protocol = skb->protocol; 678 sll->sll_protocol = skb->protocol;
673 sll->sll_pkttype = skb->pkt_type; 679 sll->sll_pkttype = skb->pkt_type;
674 sll->sll_ifindex = dev->ifindex; 680 if (unlikely(po->origdev) && skb->pkt_type == PACKET_HOST)
681 sll->sll_ifindex = orig_dev->ifindex;
682 else
683 sll->sll_ifindex = dev->ifindex;
675 684
676 h->tp_status = status; 685 h->tp_status = status;
677 smp_mb(); 686 smp_mb();
@@ -766,14 +775,14 @@ static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
766 goto out_unlock; 775 goto out_unlock;
767 776
768 skb_reserve(skb, LL_RESERVED_SPACE(dev)); 777 skb_reserve(skb, LL_RESERVED_SPACE(dev));
769 skb->nh.raw = skb->data; 778 skb_reset_network_header(skb);
770 779
771 if (dev->hard_header) { 780 if (dev->hard_header) {
772 int res; 781 int res;
773 err = -EINVAL; 782 err = -EINVAL;
774 res = dev->hard_header(skb, dev, ntohs(proto), addr, NULL, len); 783 res = dev->hard_header(skb, dev, ntohs(proto), addr, NULL, len);
775 if (sock->type != SOCK_DGRAM) { 784 if (sock->type != SOCK_DGRAM) {
776 skb->tail = skb->data; 785 skb_reset_tail_pointer(skb);
777 skb->len = 0; 786 skb->len = 0;
778 } else if (res < 0) 787 } else if (res < 0)
779 goto out_free; 788 goto out_free;
@@ -1143,7 +1152,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1143 aux.tp_len = PACKET_SKB_CB(skb)->origlen; 1152 aux.tp_len = PACKET_SKB_CB(skb)->origlen;
1144 aux.tp_snaplen = skb->len; 1153 aux.tp_snaplen = skb->len;
1145 aux.tp_mac = 0; 1154 aux.tp_mac = 0;
1146 aux.tp_net = skb->nh.raw - skb->data; 1155 aux.tp_net = skb_network_offset(skb);
1147 1156
1148 put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux); 1157 put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
1149 } 1158 }
@@ -1411,6 +1420,18 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
1411 po->auxdata = !!val; 1420 po->auxdata = !!val;
1412 return 0; 1421 return 0;
1413 } 1422 }
1423 case PACKET_ORIGDEV:
1424 {
1425 int val;
1426
1427 if (optlen < sizeof(val))
1428 return -EINVAL;
1429 if (copy_from_user(&val, optval, sizeof(val)))
1430 return -EFAULT;
1431
1432 po->origdev = !!val;
1433 return 0;
1434 }
1414 default: 1435 default:
1415 return -ENOPROTOOPT; 1436 return -ENOPROTOOPT;
1416 } 1437 }
@@ -1454,6 +1475,13 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
1454 1475
1455 data = &val; 1476 data = &val;
1456 break; 1477 break;
1478 case PACKET_ORIGDEV:
1479 if (len > sizeof(int))
1480 len = sizeof(int);
1481 val = po->origdev;
1482
1483 data = &val;
1484 break;
1457 default: 1485 default:
1458 return -ENOPROTOOPT; 1486 return -ENOPROTOOPT;
1459 } 1487 }
@@ -1543,6 +1571,8 @@ static int packet_ioctl(struct socket *sock, unsigned int cmd,
1543 } 1571 }
1544 case SIOCGSTAMP: 1572 case SIOCGSTAMP:
1545 return sock_get_timestamp(sk, (struct timeval __user *)arg); 1573 return sock_get_timestamp(sk, (struct timeval __user *)arg);
1574 case SIOCGSTAMPNS:
1575 return sock_get_timestampns(sk, (struct timespec __user *)arg);
1546 1576
1547#ifdef CONFIG_INET 1577#ifdef CONFIG_INET
1548 case SIOCADDRT: 1578 case SIOCADDRT:
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index f92d5310847b..d476c43d5216 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -812,26 +812,26 @@ rose_try_next_neigh:
812 * closed. 812 * closed.
813 */ 813 */
814 if (sk->sk_state == TCP_SYN_SENT) { 814 if (sk->sk_state == TCP_SYN_SENT) {
815 struct task_struct *tsk = current; 815 DEFINE_WAIT(wait);
816 DECLARE_WAITQUEUE(wait, tsk);
817 816
818 add_wait_queue(sk->sk_sleep, &wait);
819 for (;;) { 817 for (;;) {
820 set_current_state(TASK_INTERRUPTIBLE); 818 prepare_to_wait(sk->sk_sleep, &wait,
819 TASK_INTERRUPTIBLE);
821 if (sk->sk_state != TCP_SYN_SENT) 820 if (sk->sk_state != TCP_SYN_SENT)
822 break; 821 break;
823 release_sock(sk); 822 if (!signal_pending(current)) {
824 if (!signal_pending(tsk)) { 823 release_sock(sk);
825 schedule(); 824 schedule();
826 lock_sock(sk); 825 lock_sock(sk);
827 continue; 826 continue;
828 } 827 }
829 current->state = TASK_RUNNING; 828 err = -ERESTARTSYS;
830 remove_wait_queue(sk->sk_sleep, &wait); 829 break;
831 return -ERESTARTSYS;
832 } 830 }
833 current->state = TASK_RUNNING; 831 finish_wait(sk->sk_sleep, &wait);
834 remove_wait_queue(sk->sk_sleep, &wait); 832
833 if (err)
834 goto out_release;
835 } 835 }
836 836
837 if (sk->sk_state != TCP_ESTABLISHED) { 837 if (sk->sk_state != TCP_ESTABLISHED) {
@@ -856,10 +856,9 @@ out_release:
856 856
857static int rose_accept(struct socket *sock, struct socket *newsock, int flags) 857static int rose_accept(struct socket *sock, struct socket *newsock, int flags)
858{ 858{
859 struct task_struct *tsk = current;
860 DECLARE_WAITQUEUE(wait, tsk);
861 struct sk_buff *skb; 859 struct sk_buff *skb;
862 struct sock *newsk; 860 struct sock *newsk;
861 DEFINE_WAIT(wait);
863 struct sock *sk; 862 struct sock *sk;
864 int err = 0; 863 int err = 0;
865 864
@@ -869,42 +868,41 @@ static int rose_accept(struct socket *sock, struct socket *newsock, int flags)
869 lock_sock(sk); 868 lock_sock(sk);
870 if (sk->sk_type != SOCK_SEQPACKET) { 869 if (sk->sk_type != SOCK_SEQPACKET) {
871 err = -EOPNOTSUPP; 870 err = -EOPNOTSUPP;
872 goto out; 871 goto out_release;
873 } 872 }
874 873
875 if (sk->sk_state != TCP_LISTEN) { 874 if (sk->sk_state != TCP_LISTEN) {
876 err = -EINVAL; 875 err = -EINVAL;
877 goto out; 876 goto out_release;
878 } 877 }
879 878
880 /* 879 /*
881 * The write queue this time is holding sockets ready to use 880 * The write queue this time is holding sockets ready to use
882 * hooked into the SABM we saved 881 * hooked into the SABM we saved
883 */ 882 */
884 add_wait_queue(sk->sk_sleep, &wait);
885 for (;;) { 883 for (;;) {
884 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
885
886 skb = skb_dequeue(&sk->sk_receive_queue); 886 skb = skb_dequeue(&sk->sk_receive_queue);
887 if (skb) 887 if (skb)
888 break; 888 break;
889 889
890 current->state = TASK_INTERRUPTIBLE;
891 release_sock(sk);
892 if (flags & O_NONBLOCK) { 890 if (flags & O_NONBLOCK) {
893 current->state = TASK_RUNNING; 891 err = -EWOULDBLOCK;
894 remove_wait_queue(sk->sk_sleep, &wait); 892 break;
895 return -EWOULDBLOCK;
896 } 893 }
897 if (!signal_pending(tsk)) { 894 if (!signal_pending(current)) {
895 release_sock(sk);
898 schedule(); 896 schedule();
899 lock_sock(sk); 897 lock_sock(sk);
900 continue; 898 continue;
901 } 899 }
902 current->state = TASK_RUNNING; 900 err = -ERESTARTSYS;
903 remove_wait_queue(sk->sk_sleep, &wait); 901 break;
904 return -ERESTARTSYS;
905 } 902 }
906 current->state = TASK_RUNNING; 903 finish_wait(sk->sk_sleep, &wait);
907 remove_wait_queue(sk->sk_sleep, &wait); 904 if (err)
905 goto out_release;
908 906
909 newsk = skb->sk; 907 newsk = skb->sk;
910 newsk->sk_socket = newsock; 908 newsk->sk_socket = newsock;
@@ -916,7 +914,7 @@ static int rose_accept(struct socket *sock, struct socket *newsock, int flags)
916 sk->sk_ack_backlog--; 914 sk->sk_ack_backlog--;
917 newsock->sk = newsk; 915 newsock->sk = newsk;
918 916
919out: 917out_release:
920 release_sock(sk); 918 release_sock(sk);
921 919
922 return err; 920 return err;
@@ -1105,9 +1103,10 @@ static int rose_sendmsg(struct kiocb *iocb, struct socket *sock,
1105 */ 1103 */
1106 SOCK_DEBUG(sk, "ROSE: Appending user data\n"); 1104 SOCK_DEBUG(sk, "ROSE: Appending user data\n");
1107 1105
1108 asmptr = skb->h.raw = skb_put(skb, len); 1106 skb_reset_transport_header(skb);
1107 skb_put(skb, len);
1109 1108
1110 err = memcpy_fromiovec(asmptr, msg->msg_iov, len); 1109 err = memcpy_fromiovec(skb_transport_header(skb), msg->msg_iov, len);
1111 if (err) { 1110 if (err) {
1112 kfree_skb(skb); 1111 kfree_skb(skb);
1113 return err; 1112 return err;
@@ -1155,7 +1154,7 @@ static int rose_sendmsg(struct kiocb *iocb, struct socket *sock,
1155 int lg; 1154 int lg;
1156 1155
1157 /* Save a copy of the Header */ 1156 /* Save a copy of the Header */
1158 memcpy(header, skb->data, ROSE_MIN_LEN); 1157 skb_copy_from_linear_data(skb, header, ROSE_MIN_LEN);
1159 skb_pull(skb, ROSE_MIN_LEN); 1158 skb_pull(skb, ROSE_MIN_LEN);
1160 1159
1161 frontlen = skb_headroom(skb); 1160 frontlen = skb_headroom(skb);
@@ -1175,12 +1174,12 @@ static int rose_sendmsg(struct kiocb *iocb, struct socket *sock,
1175 lg = (ROSE_PACLEN > skb->len) ? skb->len : ROSE_PACLEN; 1174 lg = (ROSE_PACLEN > skb->len) ? skb->len : ROSE_PACLEN;
1176 1175
1177 /* Copy the user data */ 1176 /* Copy the user data */
1178 memcpy(skb_put(skbn, lg), skb->data, lg); 1177 skb_copy_from_linear_data(skb, skb_put(skbn, lg), lg);
1179 skb_pull(skb, lg); 1178 skb_pull(skb, lg);
1180 1179
1181 /* Duplicate the Header */ 1180 /* Duplicate the Header */
1182 skb_push(skbn, ROSE_MIN_LEN); 1181 skb_push(skbn, ROSE_MIN_LEN);
1183 memcpy(skbn->data, header, ROSE_MIN_LEN); 1182 skb_copy_to_linear_data(skbn, header, ROSE_MIN_LEN);
1184 1183
1185 if (skb->len > 0) 1184 if (skb->len > 0)
1186 skbn->data[2] |= M_BIT; 1185 skbn->data[2] |= M_BIT;
@@ -1234,7 +1233,7 @@ static int rose_recvmsg(struct kiocb *iocb, struct socket *sock,
1234 *asmptr = qbit; 1233 *asmptr = qbit;
1235 } 1234 }
1236 1235
1237 skb->h.raw = skb->data; 1236 skb_reset_transport_header(skb);
1238 copied = skb->len; 1237 copied = skb->len;
1239 1238
1240 if (copied > size) { 1239 if (copied > size) {
@@ -1296,6 +1295,9 @@ static int rose_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1296 case SIOCGSTAMP: 1295 case SIOCGSTAMP:
1297 return sock_get_timestamp(sk, (struct timeval __user *) argp); 1296 return sock_get_timestamp(sk, (struct timeval __user *) argp);
1298 1297
1298 case SIOCGSTAMPNS:
1299 return sock_get_timestampns(sk, (struct timespec __user *) argp);
1300
1299 case SIOCGIFADDR: 1301 case SIOCGIFADDR:
1300 case SIOCSIFADDR: 1302 case SIOCSIFADDR:
1301 case SIOCGIFDSTADDR: 1303 case SIOCGIFDSTADDR:
diff --git a/net/rose/rose_loopback.c b/net/rose/rose_loopback.c
index 3e41bd93ab9f..cd01642f0491 100644
--- a/net/rose/rose_loopback.c
+++ b/net/rose/rose_loopback.c
@@ -77,7 +77,7 @@ static void rose_loopback_timer(unsigned long param)
77 dest = (rose_address *)(skb->data + 4); 77 dest = (rose_address *)(skb->data + 4);
78 lci_o = 0xFFF - lci_i; 78 lci_o = 0xFFF - lci_i;
79 79
80 skb->h.raw = skb->data; 80 skb_reset_transport_header(skb);
81 81
82 sk = rose_find_socket(lci_o, &rose_loopback_neigh); 82 sk = rose_find_socket(lci_o, &rose_loopback_neigh);
83 if (sk) { 83 if (sk) {
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index a1233e1b1ab6..1f9aefd95a99 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -906,7 +906,7 @@ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25)
906 } 906 }
907 } 907 }
908 else { 908 else {
909 skb->h.raw = skb->data; 909 skb_reset_transport_header(skb);
910 res = rose_process_rx_frame(sk, skb); 910 res = rose_process_rx_frame(sk, skb);
911 goto out; 911 goto out;
912 } 912 }
diff --git a/net/rxrpc/Kconfig b/net/rxrpc/Kconfig
new file mode 100644
index 000000000000..d72380e304ae
--- /dev/null
+++ b/net/rxrpc/Kconfig
@@ -0,0 +1,37 @@
1#
2# RxRPC session sockets
3#
4
5config AF_RXRPC
6 tristate "RxRPC session sockets"
7 depends on EXPERIMENTAL
8 help
9 Say Y or M here to include support for RxRPC session sockets (just
10 the transport part, not the presentation part: (un)marshalling is
11 left to the application).
12
13 These are used for AFS kernel filesystem and userspace utilities.
14
15 This module at the moment only supports client operations and is
16 currently incomplete.
17
18 See Documentation/networking/rxrpc.txt.
19
20
21config AF_RXRPC_DEBUG
22 bool "RxRPC dynamic debugging"
23 depends on AF_RXRPC
24 help
25 Say Y here to make runtime controllable debugging messages appear.
26
27 See Documentation/networking/rxrpc.txt.
28
29
30config RXKAD
31 tristate "RxRPC Kerberos security"
32 depends on AF_RXRPC && KEYS
33 help
34 Provide kerberos 4 and AFS kaserver security handling for AF_RXRPC
35 through the use of the key retention service.
36
37 See Documentation/networking/rxrpc.txt.
diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile
index 6efcb6f162a0..c46867c61c98 100644
--- a/net/rxrpc/Makefile
+++ b/net/rxrpc/Makefile
@@ -1,25 +1,29 @@
1# 1#
2# Makefile for Linux kernel Rx RPC 2# Makefile for Linux kernel RxRPC
3# 3#
4 4
5#CFLAGS += -finstrument-functions 5af-rxrpc-objs := \
6 6 af_rxrpc.o \
7rxrpc-objs := \ 7 ar-accept.o \
8 call.o \ 8 ar-ack.o \
9 connection.o \ 9 ar-call.o \
10 krxiod.o \ 10 ar-connection.o \
11 krxsecd.o \ 11 ar-connevent.o \
12 krxtimod.o \ 12 ar-error.o \
13 main.o \ 13 ar-input.o \
14 peer.o \ 14 ar-key.o \
15 rxrpc_syms.o \ 15 ar-local.o \
16 transport.o 16 ar-output.o \
17 ar-peer.o \
18 ar-recvmsg.o \
19 ar-security.o \
20 ar-skbuff.o \
21 ar-transport.o
17 22
18ifeq ($(CONFIG_PROC_FS),y) 23ifeq ($(CONFIG_PROC_FS),y)
19rxrpc-objs += proc.o 24af-rxrpc-objs += ar-proc.o
20endif
21ifeq ($(CONFIG_SYSCTL),y)
22rxrpc-objs += sysctl.o
23endif 25endif
24 26
25obj-$(CONFIG_RXRPC) := rxrpc.o 27obj-$(CONFIG_AF_RXRPC) += af-rxrpc.o
28
29obj-$(CONFIG_RXKAD) += rxkad.o
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
new file mode 100644
index 000000000000..2c57df9c131b
--- /dev/null
+++ b/net/rxrpc/af_rxrpc.c
@@ -0,0 +1,879 @@
1/* AF_RXRPC implementation
2 *
3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/module.h>
13#include <linux/net.h>
14#include <linux/skbuff.h>
15#include <linux/poll.h>
16#include <linux/proc_fs.h>
17#include <net/sock.h>
18#include <net/af_rxrpc.h>
19#include "ar-internal.h"
20
21MODULE_DESCRIPTION("RxRPC network protocol");
22MODULE_AUTHOR("Red Hat, Inc.");
23MODULE_LICENSE("GPL");
24MODULE_ALIAS_NETPROTO(PF_RXRPC);
25
26unsigned rxrpc_debug; // = RXRPC_DEBUG_KPROTO;
27module_param_named(debug, rxrpc_debug, uint, S_IWUSR | S_IRUGO);
28MODULE_PARM_DESC(rxrpc_debug, "RxRPC debugging mask");
29
30static int sysctl_rxrpc_max_qlen __read_mostly = 10;
31
32static struct proto rxrpc_proto;
33static const struct proto_ops rxrpc_rpc_ops;
34
35/* local epoch for detecting local-end reset */
36__be32 rxrpc_epoch;
37
38/* current debugging ID */
39atomic_t rxrpc_debug_id;
40
41/* count of skbs currently in use */
42atomic_t rxrpc_n_skbs;
43
44struct workqueue_struct *rxrpc_workqueue;
45
46static void rxrpc_sock_destructor(struct sock *);
47
48/*
49 * see if an RxRPC socket is currently writable
50 */
51static inline int rxrpc_writable(struct sock *sk)
52{
53 return atomic_read(&sk->sk_wmem_alloc) < (size_t) sk->sk_sndbuf;
54}
55
56/*
57 * wait for write bufferage to become available
58 */
59static void rxrpc_write_space(struct sock *sk)
60{
61 _enter("%p", sk);
62 read_lock(&sk->sk_callback_lock);
63 if (rxrpc_writable(sk)) {
64 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
65 wake_up_interruptible(sk->sk_sleep);
66 sk_wake_async(sk, 2, POLL_OUT);
67 }
68 read_unlock(&sk->sk_callback_lock);
69}
70
71/*
72 * validate an RxRPC address
73 */
74static int rxrpc_validate_address(struct rxrpc_sock *rx,
75 struct sockaddr_rxrpc *srx,
76 int len)
77{
78 if (len < sizeof(struct sockaddr_rxrpc))
79 return -EINVAL;
80
81 if (srx->srx_family != AF_RXRPC)
82 return -EAFNOSUPPORT;
83
84 if (srx->transport_type != SOCK_DGRAM)
85 return -ESOCKTNOSUPPORT;
86
87 len -= offsetof(struct sockaddr_rxrpc, transport);
88 if (srx->transport_len < sizeof(sa_family_t) ||
89 srx->transport_len > len)
90 return -EINVAL;
91
92 if (srx->transport.family != rx->proto)
93 return -EAFNOSUPPORT;
94
95 switch (srx->transport.family) {
96 case AF_INET:
97 _debug("INET: %x @ %u.%u.%u.%u",
98 ntohs(srx->transport.sin.sin_port),
99 NIPQUAD(srx->transport.sin.sin_addr));
100 if (srx->transport_len > 8)
101 memset((void *)&srx->transport + 8, 0,
102 srx->transport_len - 8);
103 break;
104
105 case AF_INET6:
106 default:
107 return -EAFNOSUPPORT;
108 }
109
110 return 0;
111}
112
113/*
114 * bind a local address to an RxRPC socket
115 */
116static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len)
117{
118 struct sockaddr_rxrpc *srx = (struct sockaddr_rxrpc *) saddr;
119 struct sock *sk = sock->sk;
120 struct rxrpc_local *local;
121 struct rxrpc_sock *rx = rxrpc_sk(sk), *prx;
122 __be16 service_id;
123 int ret;
124
125 _enter("%p,%p,%d", rx, saddr, len);
126
127 ret = rxrpc_validate_address(rx, srx, len);
128 if (ret < 0)
129 goto error;
130
131 lock_sock(&rx->sk);
132
133 if (rx->sk.sk_state != RXRPC_UNCONNECTED) {
134 ret = -EINVAL;
135 goto error_unlock;
136 }
137
138 memcpy(&rx->srx, srx, sizeof(rx->srx));
139
140 /* find a local transport endpoint if we don't have one already */
141 local = rxrpc_lookup_local(&rx->srx);
142 if (IS_ERR(local)) {
143 ret = PTR_ERR(local);
144 goto error_unlock;
145 }
146
147 rx->local = local;
148 if (srx->srx_service) {
149 service_id = htons(srx->srx_service);
150 write_lock_bh(&local->services_lock);
151 list_for_each_entry(prx, &local->services, listen_link) {
152 if (prx->service_id == service_id)
153 goto service_in_use;
154 }
155
156 rx->service_id = service_id;
157 list_add_tail(&rx->listen_link, &local->services);
158 write_unlock_bh(&local->services_lock);
159
160 rx->sk.sk_state = RXRPC_SERVER_BOUND;
161 } else {
162 rx->sk.sk_state = RXRPC_CLIENT_BOUND;
163 }
164
165 release_sock(&rx->sk);
166 _leave(" = 0");
167 return 0;
168
169service_in_use:
170 ret = -EADDRINUSE;
171 write_unlock_bh(&local->services_lock);
172error_unlock:
173 release_sock(&rx->sk);
174error:
175 _leave(" = %d", ret);
176 return ret;
177}
178
179/*
180 * set the number of pending calls permitted on a listening socket
181 */
182static int rxrpc_listen(struct socket *sock, int backlog)
183{
184 struct sock *sk = sock->sk;
185 struct rxrpc_sock *rx = rxrpc_sk(sk);
186 int ret;
187
188 _enter("%p,%d", rx, backlog);
189
190 lock_sock(&rx->sk);
191
192 switch (rx->sk.sk_state) {
193 case RXRPC_UNCONNECTED:
194 ret = -EADDRNOTAVAIL;
195 break;
196 case RXRPC_CLIENT_BOUND:
197 case RXRPC_CLIENT_CONNECTED:
198 default:
199 ret = -EBUSY;
200 break;
201 case RXRPC_SERVER_BOUND:
202 ASSERT(rx->local != NULL);
203 sk->sk_max_ack_backlog = backlog;
204 rx->sk.sk_state = RXRPC_SERVER_LISTENING;
205 ret = 0;
206 break;
207 }
208
209 release_sock(&rx->sk);
210 _leave(" = %d", ret);
211 return ret;
212}
213
214/*
215 * find a transport by address
216 */
217static struct rxrpc_transport *rxrpc_name_to_transport(struct socket *sock,
218 struct sockaddr *addr,
219 int addr_len, int flags,
220 gfp_t gfp)
221{
222 struct sockaddr_rxrpc *srx = (struct sockaddr_rxrpc *) addr;
223 struct rxrpc_transport *trans;
224 struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
225 struct rxrpc_peer *peer;
226
227 _enter("%p,%p,%d,%d", rx, addr, addr_len, flags);
228
229 ASSERT(rx->local != NULL);
230 ASSERT(rx->sk.sk_state > RXRPC_UNCONNECTED);
231
232 if (rx->srx.transport_type != srx->transport_type)
233 return ERR_PTR(-ESOCKTNOSUPPORT);
234 if (rx->srx.transport.family != srx->transport.family)
235 return ERR_PTR(-EAFNOSUPPORT);
236
237 /* find a remote transport endpoint from the local one */
238 peer = rxrpc_get_peer(srx, gfp);
239 if (IS_ERR(peer))
240 return ERR_PTR(PTR_ERR(peer));
241
242 /* find a transport */
243 trans = rxrpc_get_transport(rx->local, peer, gfp);
244 rxrpc_put_peer(peer);
245 _leave(" = %p", trans);
246 return trans;
247}
248
249/**
250 * rxrpc_kernel_begin_call - Allow a kernel service to begin a call
251 * @sock: The socket on which to make the call
252 * @srx: The address of the peer to contact (defaults to socket setting)
253 * @key: The security context to use (defaults to socket setting)
254 * @user_call_ID: The ID to use
255 *
256 * Allow a kernel service to begin a call on the nominated socket. This just
257 * sets up all the internal tracking structures and allocates connection and
258 * call IDs as appropriate. The call to be used is returned.
259 *
260 * The default socket destination address and security may be overridden by
261 * supplying @srx and @key.
262 */
263struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
264 struct sockaddr_rxrpc *srx,
265 struct key *key,
266 unsigned long user_call_ID,
267 gfp_t gfp)
268{
269 struct rxrpc_conn_bundle *bundle;
270 struct rxrpc_transport *trans;
271 struct rxrpc_call *call;
272 struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
273 __be16 service_id;
274
275 _enter(",,%x,%lx", key_serial(key), user_call_ID);
276
277 lock_sock(&rx->sk);
278
279 if (srx) {
280 trans = rxrpc_name_to_transport(sock, (struct sockaddr *) srx,
281 sizeof(*srx), 0, gfp);
282 if (IS_ERR(trans)) {
283 call = ERR_PTR(PTR_ERR(trans));
284 trans = NULL;
285 goto out;
286 }
287 } else {
288 trans = rx->trans;
289 if (!trans) {
290 call = ERR_PTR(-ENOTCONN);
291 goto out;
292 }
293 atomic_inc(&trans->usage);
294 }
295
296 service_id = rx->service_id;
297 if (srx)
298 service_id = htons(srx->srx_service);
299
300 if (!key)
301 key = rx->key;
302 if (key && !key->payload.data)
303 key = NULL; /* a no-security key */
304
305 bundle = rxrpc_get_bundle(rx, trans, key, service_id, gfp);
306 if (IS_ERR(bundle)) {
307 call = ERR_PTR(PTR_ERR(bundle));
308 goto out;
309 }
310
311 call = rxrpc_get_client_call(rx, trans, bundle, user_call_ID, true,
312 gfp);
313 rxrpc_put_bundle(trans, bundle);
314out:
315 rxrpc_put_transport(trans);
316 release_sock(&rx->sk);
317 _leave(" = %p", call);
318 return call;
319}
320
321EXPORT_SYMBOL(rxrpc_kernel_begin_call);
322
323/**
324 * rxrpc_kernel_end_call - Allow a kernel service to end a call it was using
325 * @call: The call to end
326 *
327 * Allow a kernel service to end a call it was using. The call must be
328 * complete before this is called (the call should be aborted if necessary).
329 */
330void rxrpc_kernel_end_call(struct rxrpc_call *call)
331{
332 _enter("%d{%d}", call->debug_id, atomic_read(&call->usage));
333 rxrpc_remove_user_ID(call->socket, call);
334 rxrpc_put_call(call);
335}
336
337EXPORT_SYMBOL(rxrpc_kernel_end_call);
338
339/**
340 * rxrpc_kernel_intercept_rx_messages - Intercept received RxRPC messages
341 * @sock: The socket to intercept received messages on
342 * @interceptor: The function to pass the messages to
343 *
344 * Allow a kernel service to intercept messages heading for the Rx queue on an
345 * RxRPC socket. They get passed to the specified function instead.
346 * @interceptor should free the socket buffers it is given. @interceptor is
347 * called with the socket receive queue spinlock held and softirqs disabled -
348 * this ensures that the messages will be delivered in the right order.
349 */
350void rxrpc_kernel_intercept_rx_messages(struct socket *sock,
351 rxrpc_interceptor_t interceptor)
352{
353 struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
354
355 _enter("");
356 rx->interceptor = interceptor;
357}
358
359EXPORT_SYMBOL(rxrpc_kernel_intercept_rx_messages);
360
361/*
362 * connect an RxRPC socket
363 * - this just targets it at a specific destination; no actual connection
364 * negotiation takes place
365 */
366static int rxrpc_connect(struct socket *sock, struct sockaddr *addr,
367 int addr_len, int flags)
368{
369 struct sockaddr_rxrpc *srx = (struct sockaddr_rxrpc *) addr;
370 struct sock *sk = sock->sk;
371 struct rxrpc_transport *trans;
372 struct rxrpc_local *local;
373 struct rxrpc_sock *rx = rxrpc_sk(sk);
374 int ret;
375
376 _enter("%p,%p,%d,%d", rx, addr, addr_len, flags);
377
378 ret = rxrpc_validate_address(rx, srx, addr_len);
379 if (ret < 0) {
380 _leave(" = %d [bad addr]", ret);
381 return ret;
382 }
383
384 lock_sock(&rx->sk);
385
386 switch (rx->sk.sk_state) {
387 case RXRPC_UNCONNECTED:
388 /* find a local transport endpoint if we don't have one already */
389 ASSERTCMP(rx->local, ==, NULL);
390 rx->srx.srx_family = AF_RXRPC;
391 rx->srx.srx_service = 0;
392 rx->srx.transport_type = srx->transport_type;
393 rx->srx.transport_len = sizeof(sa_family_t);
394 rx->srx.transport.family = srx->transport.family;
395 local = rxrpc_lookup_local(&rx->srx);
396 if (IS_ERR(local)) {
397 release_sock(&rx->sk);
398 return PTR_ERR(local);
399 }
400 rx->local = local;
401 rx->sk.sk_state = RXRPC_CLIENT_BOUND;
402 case RXRPC_CLIENT_BOUND:
403 break;
404 case RXRPC_CLIENT_CONNECTED:
405 release_sock(&rx->sk);
406 return -EISCONN;
407 default:
408 release_sock(&rx->sk);
409 return -EBUSY; /* server sockets can't connect as well */
410 }
411
412 trans = rxrpc_name_to_transport(sock, addr, addr_len, flags,
413 GFP_KERNEL);
414 if (IS_ERR(trans)) {
415 release_sock(&rx->sk);
416 _leave(" = %ld", PTR_ERR(trans));
417 return PTR_ERR(trans);
418 }
419
420 rx->trans = trans;
421 rx->service_id = htons(srx->srx_service);
422 rx->sk.sk_state = RXRPC_CLIENT_CONNECTED;
423
424 release_sock(&rx->sk);
425 return 0;
426}
427
428/*
429 * send a message through an RxRPC socket
430 * - in a client this does a number of things:
431 * - finds/sets up a connection for the security specified (if any)
432 * - initiates a call (ID in control data)
433 * - ends the request phase of a call (if MSG_MORE is not set)
434 * - sends a call data packet
435 * - may send an abort (abort code in control data)
436 */
437static int rxrpc_sendmsg(struct kiocb *iocb, struct socket *sock,
438 struct msghdr *m, size_t len)
439{
440 struct rxrpc_transport *trans;
441 struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
442 int ret;
443
444 _enter(",{%d},,%zu", rx->sk.sk_state, len);
445
446 if (m->msg_flags & MSG_OOB)
447 return -EOPNOTSUPP;
448
449 if (m->msg_name) {
450 ret = rxrpc_validate_address(rx, m->msg_name, m->msg_namelen);
451 if (ret < 0) {
452 _leave(" = %d [bad addr]", ret);
453 return ret;
454 }
455 }
456
457 trans = NULL;
458 lock_sock(&rx->sk);
459
460 if (m->msg_name) {
461 ret = -EISCONN;
462 trans = rxrpc_name_to_transport(sock, m->msg_name,
463 m->msg_namelen, 0, GFP_KERNEL);
464 if (IS_ERR(trans)) {
465 ret = PTR_ERR(trans);
466 trans = NULL;
467 goto out;
468 }
469 } else {
470 trans = rx->trans;
471 if (trans)
472 atomic_inc(&trans->usage);
473 }
474
475 switch (rx->sk.sk_state) {
476 case RXRPC_SERVER_LISTENING:
477 if (!m->msg_name) {
478 ret = rxrpc_server_sendmsg(iocb, rx, m, len);
479 break;
480 }
481 case RXRPC_SERVER_BOUND:
482 case RXRPC_CLIENT_BOUND:
483 if (!m->msg_name) {
484 ret = -ENOTCONN;
485 break;
486 }
487 case RXRPC_CLIENT_CONNECTED:
488 ret = rxrpc_client_sendmsg(iocb, rx, trans, m, len);
489 break;
490 default:
491 ret = -ENOTCONN;
492 break;
493 }
494
495out:
496 release_sock(&rx->sk);
497 if (trans)
498 rxrpc_put_transport(trans);
499 _leave(" = %d", ret);
500 return ret;
501}
502
503/*
504 * set RxRPC socket options
505 */
506static int rxrpc_setsockopt(struct socket *sock, int level, int optname,
507 char __user *optval, int optlen)
508{
509 struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
510 unsigned min_sec_level;
511 int ret;
512
513 _enter(",%d,%d,,%d", level, optname, optlen);
514
515 lock_sock(&rx->sk);
516 ret = -EOPNOTSUPP;
517
518 if (level == SOL_RXRPC) {
519 switch (optname) {
520 case RXRPC_EXCLUSIVE_CONNECTION:
521 ret = -EINVAL;
522 if (optlen != 0)
523 goto error;
524 ret = -EISCONN;
525 if (rx->sk.sk_state != RXRPC_UNCONNECTED)
526 goto error;
527 set_bit(RXRPC_SOCK_EXCLUSIVE_CONN, &rx->flags);
528 goto success;
529
530 case RXRPC_SECURITY_KEY:
531 ret = -EINVAL;
532 if (rx->key)
533 goto error;
534 ret = -EISCONN;
535 if (rx->sk.sk_state != RXRPC_UNCONNECTED)
536 goto error;
537 ret = rxrpc_request_key(rx, optval, optlen);
538 goto error;
539
540 case RXRPC_SECURITY_KEYRING:
541 ret = -EINVAL;
542 if (rx->key)
543 goto error;
544 ret = -EISCONN;
545 if (rx->sk.sk_state != RXRPC_UNCONNECTED)
546 goto error;
547 ret = rxrpc_server_keyring(rx, optval, optlen);
548 goto error;
549
550 case RXRPC_MIN_SECURITY_LEVEL:
551 ret = -EINVAL;
552 if (optlen != sizeof(unsigned))
553 goto error;
554 ret = -EISCONN;
555 if (rx->sk.sk_state != RXRPC_UNCONNECTED)
556 goto error;
557 ret = get_user(min_sec_level,
558 (unsigned __user *) optval);
559 if (ret < 0)
560 goto error;
561 ret = -EINVAL;
562 if (min_sec_level > RXRPC_SECURITY_MAX)
563 goto error;
564 rx->min_sec_level = min_sec_level;
565 goto success;
566
567 default:
568 break;
569 }
570 }
571
572success:
573 ret = 0;
574error:
575 release_sock(&rx->sk);
576 return ret;
577}
578
579/*
580 * permit an RxRPC socket to be polled
581 */
582static unsigned int rxrpc_poll(struct file *file, struct socket *sock,
583 poll_table *wait)
584{
585 unsigned int mask;
586 struct sock *sk = sock->sk;
587
588 poll_wait(file, sk->sk_sleep, wait);
589 mask = 0;
590
591 /* the socket is readable if there are any messages waiting on the Rx
592 * queue */
593 if (!skb_queue_empty(&sk->sk_receive_queue))
594 mask |= POLLIN | POLLRDNORM;
595
596 /* the socket is writable if there is space to add new data to the
597 * socket; there is no guarantee that any particular call in progress
598 * on the socket may have space in the Tx ACK window */
599 if (rxrpc_writable(sk))
600 mask |= POLLOUT | POLLWRNORM;
601
602 return mask;
603}
604
605/*
606 * create an RxRPC socket
607 */
608static int rxrpc_create(struct socket *sock, int protocol)
609{
610 struct rxrpc_sock *rx;
611 struct sock *sk;
612
613 _enter("%p,%d", sock, protocol);
614
615 /* we support transport protocol UDP only */
616 if (protocol != PF_INET)
617 return -EPROTONOSUPPORT;
618
619 if (sock->type != SOCK_DGRAM)
620 return -ESOCKTNOSUPPORT;
621
622 sock->ops = &rxrpc_rpc_ops;
623 sock->state = SS_UNCONNECTED;
624
625 sk = sk_alloc(PF_RXRPC, GFP_KERNEL, &rxrpc_proto, 1);
626 if (!sk)
627 return -ENOMEM;
628
629 sock_init_data(sock, sk);
630 sk->sk_state = RXRPC_UNCONNECTED;
631 sk->sk_write_space = rxrpc_write_space;
632 sk->sk_max_ack_backlog = sysctl_rxrpc_max_qlen;
633 sk->sk_destruct = rxrpc_sock_destructor;
634
635 rx = rxrpc_sk(sk);
636 rx->proto = protocol;
637 rx->calls = RB_ROOT;
638
639 INIT_LIST_HEAD(&rx->listen_link);
640 INIT_LIST_HEAD(&rx->secureq);
641 INIT_LIST_HEAD(&rx->acceptq);
642 rwlock_init(&rx->call_lock);
643 memset(&rx->srx, 0, sizeof(rx->srx));
644
645 _leave(" = 0 [%p]", rx);
646 return 0;
647}
648
649/*
650 * RxRPC socket destructor
651 */
652static void rxrpc_sock_destructor(struct sock *sk)
653{
654 _enter("%p", sk);
655
656 rxrpc_purge_queue(&sk->sk_receive_queue);
657
658 BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
659 BUG_TRAP(sk_unhashed(sk));
660 BUG_TRAP(!sk->sk_socket);
661
662 if (!sock_flag(sk, SOCK_DEAD)) {
663 printk("Attempt to release alive rxrpc socket: %p\n", sk);
664 return;
665 }
666}
667
668/*
669 * release an RxRPC socket
670 */
671static int rxrpc_release_sock(struct sock *sk)
672{
673 struct rxrpc_sock *rx = rxrpc_sk(sk);
674
675 _enter("%p{%d,%d}", sk, sk->sk_state, atomic_read(&sk->sk_refcnt));
676
677 /* declare the socket closed for business */
678 sock_orphan(sk);
679 sk->sk_shutdown = SHUTDOWN_MASK;
680
681 spin_lock_bh(&sk->sk_receive_queue.lock);
682 sk->sk_state = RXRPC_CLOSE;
683 spin_unlock_bh(&sk->sk_receive_queue.lock);
684
685 ASSERTCMP(rx->listen_link.next, !=, LIST_POISON1);
686
687 if (!list_empty(&rx->listen_link)) {
688 write_lock_bh(&rx->local->services_lock);
689 list_del(&rx->listen_link);
690 write_unlock_bh(&rx->local->services_lock);
691 }
692
693 /* try to flush out this socket */
694 rxrpc_release_calls_on_socket(rx);
695 flush_workqueue(rxrpc_workqueue);
696 rxrpc_purge_queue(&sk->sk_receive_queue);
697
698 if (rx->conn) {
699 rxrpc_put_connection(rx->conn);
700 rx->conn = NULL;
701 }
702
703 if (rx->bundle) {
704 rxrpc_put_bundle(rx->trans, rx->bundle);
705 rx->bundle = NULL;
706 }
707 if (rx->trans) {
708 rxrpc_put_transport(rx->trans);
709 rx->trans = NULL;
710 }
711 if (rx->local) {
712 rxrpc_put_local(rx->local);
713 rx->local = NULL;
714 }
715
716 key_put(rx->key);
717 rx->key = NULL;
718 key_put(rx->securities);
719 rx->securities = NULL;
720 sock_put(sk);
721
722 _leave(" = 0");
723 return 0;
724}
725
726/*
727 * release an RxRPC BSD socket on close() or equivalent
728 */
729static int rxrpc_release(struct socket *sock)
730{
731 struct sock *sk = sock->sk;
732
733 _enter("%p{%p}", sock, sk);
734
735 if (!sk)
736 return 0;
737
738 sock->sk = NULL;
739
740 return rxrpc_release_sock(sk);
741}
742
743/*
744 * RxRPC network protocol
745 */
746static const struct proto_ops rxrpc_rpc_ops = {
747 .family = PF_UNIX,
748 .owner = THIS_MODULE,
749 .release = rxrpc_release,
750 .bind = rxrpc_bind,
751 .connect = rxrpc_connect,
752 .socketpair = sock_no_socketpair,
753 .accept = sock_no_accept,
754 .getname = sock_no_getname,
755 .poll = rxrpc_poll,
756 .ioctl = sock_no_ioctl,
757 .listen = rxrpc_listen,
758 .shutdown = sock_no_shutdown,
759 .setsockopt = rxrpc_setsockopt,
760 .getsockopt = sock_no_getsockopt,
761 .sendmsg = rxrpc_sendmsg,
762 .recvmsg = rxrpc_recvmsg,
763 .mmap = sock_no_mmap,
764 .sendpage = sock_no_sendpage,
765};
766
767static struct proto rxrpc_proto = {
768 .name = "RXRPC",
769 .owner = THIS_MODULE,
770 .obj_size = sizeof(struct rxrpc_sock),
771 .max_header = sizeof(struct rxrpc_header),
772};
773
774static struct net_proto_family rxrpc_family_ops = {
775 .family = PF_RXRPC,
776 .create = rxrpc_create,
777 .owner = THIS_MODULE,
778};
779
780/*
781 * initialise and register the RxRPC protocol
782 */
783static int __init af_rxrpc_init(void)
784{
785 struct sk_buff *dummy_skb;
786 int ret = -1;
787
788 BUILD_BUG_ON(sizeof(struct rxrpc_skb_priv) > sizeof(dummy_skb->cb));
789
790 rxrpc_epoch = htonl(xtime.tv_sec);
791
792 ret = -ENOMEM;
793 rxrpc_call_jar = kmem_cache_create(
794 "rxrpc_call_jar", sizeof(struct rxrpc_call), 0,
795 SLAB_HWCACHE_ALIGN, NULL, NULL);
796 if (!rxrpc_call_jar) {
797 printk(KERN_NOTICE "RxRPC: Failed to allocate call jar\n");
798 goto error_call_jar;
799 }
800
801 rxrpc_workqueue = create_workqueue("krxrpcd");
802 if (!rxrpc_workqueue) {
803 printk(KERN_NOTICE "RxRPC: Failed to allocate work queue\n");
804 goto error_work_queue;
805 }
806
807 ret = proto_register(&rxrpc_proto, 1);
808 if (ret < 0) {
809 printk(KERN_CRIT "RxRPC: Cannot register protocol\n");
810 goto error_proto;
811 }
812
813 ret = sock_register(&rxrpc_family_ops);
814 if (ret < 0) {
815 printk(KERN_CRIT "RxRPC: Cannot register socket family\n");
816 goto error_sock;
817 }
818
819 ret = register_key_type(&key_type_rxrpc);
820 if (ret < 0) {
821 printk(KERN_CRIT "RxRPC: Cannot register client key type\n");
822 goto error_key_type;
823 }
824
825 ret = register_key_type(&key_type_rxrpc_s);
826 if (ret < 0) {
827 printk(KERN_CRIT "RxRPC: Cannot register server key type\n");
828 goto error_key_type_s;
829 }
830
831#ifdef CONFIG_PROC_FS
832 proc_net_fops_create("rxrpc_calls", 0, &rxrpc_call_seq_fops);
833 proc_net_fops_create("rxrpc_conns", 0, &rxrpc_connection_seq_fops);
834#endif
835 return 0;
836
837error_key_type_s:
838 unregister_key_type(&key_type_rxrpc);
839error_key_type:
840 sock_unregister(PF_RXRPC);
841error_sock:
842 proto_unregister(&rxrpc_proto);
843error_proto:
844 destroy_workqueue(rxrpc_workqueue);
845error_work_queue:
846 kmem_cache_destroy(rxrpc_call_jar);
847error_call_jar:
848 return ret;
849}
850
851/*
852 * unregister the RxRPC protocol
853 */
854static void __exit af_rxrpc_exit(void)
855{
856 _enter("");
857 unregister_key_type(&key_type_rxrpc_s);
858 unregister_key_type(&key_type_rxrpc);
859 sock_unregister(PF_RXRPC);
860 proto_unregister(&rxrpc_proto);
861 rxrpc_destroy_all_calls();
862 rxrpc_destroy_all_connections();
863 rxrpc_destroy_all_transports();
864 rxrpc_destroy_all_peers();
865 rxrpc_destroy_all_locals();
866
867 ASSERTCMP(atomic_read(&rxrpc_n_skbs), ==, 0);
868
869 _debug("flush scheduled work");
870 flush_workqueue(rxrpc_workqueue);
871 proc_net_remove("rxrpc_conns");
872 proc_net_remove("rxrpc_calls");
873 destroy_workqueue(rxrpc_workqueue);
874 kmem_cache_destroy(rxrpc_call_jar);
875 _leave("");
876}
877
878module_init(af_rxrpc_init);
879module_exit(af_rxrpc_exit);
diff --git a/net/rxrpc/ar-accept.c b/net/rxrpc/ar-accept.c
new file mode 100644
index 000000000000..92a87fde8bfe
--- /dev/null
+++ b/net/rxrpc/ar-accept.c
@@ -0,0 +1,504 @@
1/* incoming call handling
2 *
3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/module.h>
13#include <linux/net.h>
14#include <linux/skbuff.h>
15#include <linux/errqueue.h>
16#include <linux/udp.h>
17#include <linux/in.h>
18#include <linux/in6.h>
19#include <linux/icmp.h>
20#include <net/sock.h>
21#include <net/af_rxrpc.h>
22#include <net/ip.h>
23#include "ar-internal.h"
24
25/*
26 * generate a connection-level abort
27 */
28static int rxrpc_busy(struct rxrpc_local *local, struct sockaddr_rxrpc *srx,
29 struct rxrpc_header *hdr)
30{
31 struct msghdr msg;
32 struct kvec iov[1];
33 size_t len;
34 int ret;
35
36 _enter("%d,,", local->debug_id);
37
38 msg.msg_name = &srx->transport.sin;
39 msg.msg_namelen = sizeof(srx->transport.sin);
40 msg.msg_control = NULL;
41 msg.msg_controllen = 0;
42 msg.msg_flags = 0;
43
44 hdr->seq = 0;
45 hdr->type = RXRPC_PACKET_TYPE_BUSY;
46 hdr->flags = 0;
47 hdr->userStatus = 0;
48 hdr->_rsvd = 0;
49
50 iov[0].iov_base = hdr;
51 iov[0].iov_len = sizeof(*hdr);
52
53 len = iov[0].iov_len;
54
55 hdr->serial = htonl(1);
56 _proto("Tx BUSY %%%u", ntohl(hdr->serial));
57
58 ret = kernel_sendmsg(local->socket, &msg, iov, 1, len);
59 if (ret < 0) {
60 _leave(" = -EAGAIN [sendmsg failed: %d]", ret);
61 return -EAGAIN;
62 }
63
64 _leave(" = 0");
65 return 0;
66}
67
68/*
69 * accept an incoming call that needs peer, transport and/or connection setting
70 * up
71 */
72static int rxrpc_accept_incoming_call(struct rxrpc_local *local,
73 struct rxrpc_sock *rx,
74 struct sk_buff *skb,
75 struct sockaddr_rxrpc *srx)
76{
77 struct rxrpc_connection *conn;
78 struct rxrpc_transport *trans;
79 struct rxrpc_skb_priv *sp, *nsp;
80 struct rxrpc_peer *peer;
81 struct rxrpc_call *call;
82 struct sk_buff *notification;
83 int ret;
84
85 _enter("");
86
87 sp = rxrpc_skb(skb);
88
89 /* get a notification message to send to the server app */
90 notification = alloc_skb(0, GFP_NOFS);
91 rxrpc_new_skb(notification);
92 notification->mark = RXRPC_SKB_MARK_NEW_CALL;
93
94 peer = rxrpc_get_peer(srx, GFP_NOIO);
95 if (IS_ERR(peer)) {
96 _debug("no peer");
97 ret = -EBUSY;
98 goto error;
99 }
100
101 trans = rxrpc_get_transport(local, peer, GFP_NOIO);
102 rxrpc_put_peer(peer);
103 if (!trans) {
104 _debug("no trans");
105 ret = -EBUSY;
106 goto error;
107 }
108
109 conn = rxrpc_incoming_connection(trans, &sp->hdr, GFP_NOIO);
110 rxrpc_put_transport(trans);
111 if (IS_ERR(conn)) {
112 _debug("no conn");
113 ret = PTR_ERR(conn);
114 goto error;
115 }
116
117 call = rxrpc_incoming_call(rx, conn, &sp->hdr, GFP_NOIO);
118 rxrpc_put_connection(conn);
119 if (IS_ERR(call)) {
120 _debug("no call");
121 ret = PTR_ERR(call);
122 goto error;
123 }
124
125 /* attach the call to the socket */
126 read_lock_bh(&local->services_lock);
127 if (rx->sk.sk_state == RXRPC_CLOSE)
128 goto invalid_service;
129
130 write_lock(&rx->call_lock);
131 if (!test_and_set_bit(RXRPC_CALL_INIT_ACCEPT, &call->flags)) {
132 rxrpc_get_call(call);
133
134 spin_lock(&call->conn->state_lock);
135 if (sp->hdr.securityIndex > 0 &&
136 call->conn->state == RXRPC_CONN_SERVER_UNSECURED) {
137 _debug("await conn sec");
138 list_add_tail(&call->accept_link, &rx->secureq);
139 call->conn->state = RXRPC_CONN_SERVER_CHALLENGING;
140 atomic_inc(&call->conn->usage);
141 set_bit(RXRPC_CONN_CHALLENGE, &call->conn->events);
142 rxrpc_queue_conn(call->conn);
143 } else {
144 _debug("conn ready");
145 call->state = RXRPC_CALL_SERVER_ACCEPTING;
146 list_add_tail(&call->accept_link, &rx->acceptq);
147 rxrpc_get_call(call);
148 nsp = rxrpc_skb(notification);
149 nsp->call = call;
150
151 ASSERTCMP(atomic_read(&call->usage), >=, 3);
152
153 _debug("notify");
154 spin_lock(&call->lock);
155 ret = rxrpc_queue_rcv_skb(call, notification, true,
156 false);
157 spin_unlock(&call->lock);
158 notification = NULL;
159 if (ret < 0)
160 BUG();
161 }
162 spin_unlock(&call->conn->state_lock);
163
164 _debug("queued");
165 }
166 write_unlock(&rx->call_lock);
167
168 _debug("process");
169 rxrpc_fast_process_packet(call, skb);
170
171 _debug("done");
172 read_unlock_bh(&local->services_lock);
173 rxrpc_free_skb(notification);
174 rxrpc_put_call(call);
175 _leave(" = 0");
176 return 0;
177
178invalid_service:
179 _debug("invalid");
180 read_unlock_bh(&local->services_lock);
181
182 read_lock_bh(&call->state_lock);
183 if (!test_bit(RXRPC_CALL_RELEASE, &call->flags) &&
184 !test_and_set_bit(RXRPC_CALL_RELEASE, &call->events)) {
185 rxrpc_get_call(call);
186 rxrpc_queue_call(call);
187 }
188 read_unlock_bh(&call->state_lock);
189 rxrpc_put_call(call);
190 ret = -ECONNREFUSED;
191error:
192 rxrpc_free_skb(notification);
193 _leave(" = %d", ret);
194 return ret;
195}
196
197/*
198 * accept incoming calls that need peer, transport and/or connection setting up
199 * - the packets we get are all incoming client DATA packets that have seq == 1
200 */
201void rxrpc_accept_incoming_calls(struct work_struct *work)
202{
203 struct rxrpc_local *local =
204 container_of(work, struct rxrpc_local, acceptor);
205 struct rxrpc_skb_priv *sp;
206 struct sockaddr_rxrpc srx;
207 struct rxrpc_sock *rx;
208 struct sk_buff *skb;
209 __be16 service_id;
210 int ret;
211
212 _enter("%d", local->debug_id);
213
214 read_lock_bh(&rxrpc_local_lock);
215 if (atomic_read(&local->usage) > 0)
216 rxrpc_get_local(local);
217 else
218 local = NULL;
219 read_unlock_bh(&rxrpc_local_lock);
220 if (!local) {
221 _leave(" [local dead]");
222 return;
223 }
224
225process_next_packet:
226 skb = skb_dequeue(&local->accept_queue);
227 if (!skb) {
228 rxrpc_put_local(local);
229 _leave("\n");
230 return;
231 }
232
233 _net("incoming call skb %p", skb);
234
235 sp = rxrpc_skb(skb);
236
237 /* determine the remote address */
238 memset(&srx, 0, sizeof(srx));
239 srx.srx_family = AF_RXRPC;
240 srx.transport.family = local->srx.transport.family;
241 srx.transport_type = local->srx.transport_type;
242 switch (srx.transport.family) {
243 case AF_INET:
244 srx.transport_len = sizeof(struct sockaddr_in);
245 srx.transport.sin.sin_port = udp_hdr(skb)->source;
246 srx.transport.sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
247 break;
248 default:
249 goto busy;
250 }
251
252 /* get the socket providing the service */
253 service_id = sp->hdr.serviceId;
254 read_lock_bh(&local->services_lock);
255 list_for_each_entry(rx, &local->services, listen_link) {
256 if (rx->service_id == service_id &&
257 rx->sk.sk_state != RXRPC_CLOSE)
258 goto found_service;
259 }
260 read_unlock_bh(&local->services_lock);
261 goto invalid_service;
262
263found_service:
264 _debug("found service %hd", ntohs(rx->service_id));
265 if (sk_acceptq_is_full(&rx->sk))
266 goto backlog_full;
267 sk_acceptq_added(&rx->sk);
268 sock_hold(&rx->sk);
269 read_unlock_bh(&local->services_lock);
270
271 ret = rxrpc_accept_incoming_call(local, rx, skb, &srx);
272 if (ret < 0)
273 sk_acceptq_removed(&rx->sk);
274 sock_put(&rx->sk);
275 switch (ret) {
276 case -ECONNRESET: /* old calls are ignored */
277 case -ECONNABORTED: /* aborted calls are reaborted or ignored */
278 case 0:
279 goto process_next_packet;
280 case -ECONNREFUSED:
281 goto invalid_service;
282 case -EBUSY:
283 goto busy;
284 case -EKEYREJECTED:
285 goto security_mismatch;
286 default:
287 BUG();
288 }
289
290backlog_full:
291 read_unlock_bh(&local->services_lock);
292busy:
293 rxrpc_busy(local, &srx, &sp->hdr);
294 rxrpc_free_skb(skb);
295 goto process_next_packet;
296
297invalid_service:
298 skb->priority = RX_INVALID_OPERATION;
299 rxrpc_reject_packet(local, skb);
300 goto process_next_packet;
301
302 /* can't change connection security type mid-flow */
303security_mismatch:
304 skb->priority = RX_PROTOCOL_ERROR;
305 rxrpc_reject_packet(local, skb);
306 goto process_next_packet;
307}
308
309/*
310 * handle acceptance of a call by userspace
311 * - assign the user call ID to the call at the front of the queue
312 */
313struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx,
314 unsigned long user_call_ID)
315{
316 struct rxrpc_call *call;
317 struct rb_node *parent, **pp;
318 int ret;
319
320 _enter(",%lx", user_call_ID);
321
322 ASSERT(!irqs_disabled());
323
324 write_lock(&rx->call_lock);
325
326 ret = -ENODATA;
327 if (list_empty(&rx->acceptq))
328 goto out;
329
330 /* check the user ID isn't already in use */
331 ret = -EBADSLT;
332 pp = &rx->calls.rb_node;
333 parent = NULL;
334 while (*pp) {
335 parent = *pp;
336 call = rb_entry(parent, struct rxrpc_call, sock_node);
337
338 if (user_call_ID < call->user_call_ID)
339 pp = &(*pp)->rb_left;
340 else if (user_call_ID > call->user_call_ID)
341 pp = &(*pp)->rb_right;
342 else
343 goto out;
344 }
345
346 /* dequeue the first call and check it's still valid */
347 call = list_entry(rx->acceptq.next, struct rxrpc_call, accept_link);
348 list_del_init(&call->accept_link);
349 sk_acceptq_removed(&rx->sk);
350
351 write_lock_bh(&call->state_lock);
352 switch (call->state) {
353 case RXRPC_CALL_SERVER_ACCEPTING:
354 call->state = RXRPC_CALL_SERVER_RECV_REQUEST;
355 break;
356 case RXRPC_CALL_REMOTELY_ABORTED:
357 case RXRPC_CALL_LOCALLY_ABORTED:
358 ret = -ECONNABORTED;
359 goto out_release;
360 case RXRPC_CALL_NETWORK_ERROR:
361 ret = call->conn->error;
362 goto out_release;
363 case RXRPC_CALL_DEAD:
364 ret = -ETIME;
365 goto out_discard;
366 default:
367 BUG();
368 }
369
370 /* formalise the acceptance */
371 call->user_call_ID = user_call_ID;
372 rb_link_node(&call->sock_node, parent, pp);
373 rb_insert_color(&call->sock_node, &rx->calls);
374 if (test_and_set_bit(RXRPC_CALL_HAS_USERID, &call->flags))
375 BUG();
376 if (test_and_set_bit(RXRPC_CALL_ACCEPTED, &call->events))
377 BUG();
378 rxrpc_queue_call(call);
379
380 rxrpc_get_call(call);
381 write_unlock_bh(&call->state_lock);
382 write_unlock(&rx->call_lock);
383 _leave(" = %p{%d}", call, call->debug_id);
384 return call;
385
386 /* if the call is already dying or dead, then we leave the socket's ref
387 * on it to be released by rxrpc_dead_call_expired() as induced by
388 * rxrpc_release_call() */
389out_release:
390 _debug("release %p", call);
391 if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
392 !test_and_set_bit(RXRPC_CALL_RELEASE, &call->events))
393 rxrpc_queue_call(call);
394out_discard:
395 write_unlock_bh(&call->state_lock);
396 _debug("discard %p", call);
397out:
398 write_unlock(&rx->call_lock);
399 _leave(" = %d", ret);
400 return ERR_PTR(ret);
401}
402
403/*
404 * handle rejectance of a call by userspace
405 * - reject the call at the front of the queue
406 */
407int rxrpc_reject_call(struct rxrpc_sock *rx)
408{
409 struct rxrpc_call *call;
410 int ret;
411
412 _enter("");
413
414 ASSERT(!irqs_disabled());
415
416 write_lock(&rx->call_lock);
417
418 ret = -ENODATA;
419 if (list_empty(&rx->acceptq))
420 goto out;
421
422 /* dequeue the first call and check it's still valid */
423 call = list_entry(rx->acceptq.next, struct rxrpc_call, accept_link);
424 list_del_init(&call->accept_link);
425 sk_acceptq_removed(&rx->sk);
426
427 write_lock_bh(&call->state_lock);
428 switch (call->state) {
429 case RXRPC_CALL_SERVER_ACCEPTING:
430 call->state = RXRPC_CALL_SERVER_BUSY;
431 if (test_and_set_bit(RXRPC_CALL_REJECT_BUSY, &call->events))
432 rxrpc_queue_call(call);
433 ret = 0;
434 goto out_release;
435 case RXRPC_CALL_REMOTELY_ABORTED:
436 case RXRPC_CALL_LOCALLY_ABORTED:
437 ret = -ECONNABORTED;
438 goto out_release;
439 case RXRPC_CALL_NETWORK_ERROR:
440 ret = call->conn->error;
441 goto out_release;
442 case RXRPC_CALL_DEAD:
443 ret = -ETIME;
444 goto out_discard;
445 default:
446 BUG();
447 }
448
449 /* if the call is already dying or dead, then we leave the socket's ref
450 * on it to be released by rxrpc_dead_call_expired() as induced by
451 * rxrpc_release_call() */
452out_release:
453 _debug("release %p", call);
454 if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
455 !test_and_set_bit(RXRPC_CALL_RELEASE, &call->events))
456 rxrpc_queue_call(call);
457out_discard:
458 write_unlock_bh(&call->state_lock);
459 _debug("discard %p", call);
460out:
461 write_unlock(&rx->call_lock);
462 _leave(" = %d", ret);
463 return ret;
464}
465
466/**
467 * rxrpc_kernel_accept_call - Allow a kernel service to accept an incoming call
468 * @sock: The socket on which the impending call is waiting
469 * @user_call_ID: The tag to attach to the call
470 *
471 * Allow a kernel service to accept an incoming call, assuming the incoming
472 * call is still valid.
473 */
474struct rxrpc_call *rxrpc_kernel_accept_call(struct socket *sock,
475 unsigned long user_call_ID)
476{
477 struct rxrpc_call *call;
478
479 _enter(",%lx", user_call_ID);
480 call = rxrpc_accept_call(rxrpc_sk(sock->sk), user_call_ID);
481 _leave(" = %p", call);
482 return call;
483}
484
485EXPORT_SYMBOL(rxrpc_kernel_accept_call);
486
487/**
488 * rxrpc_kernel_reject_call - Allow a kernel service to reject an incoming call
489 * @sock: The socket on which the impending call is waiting
490 *
491 * Allow a kernel service to reject an incoming call with a BUSY message,
492 * assuming the incoming call is still valid.
493 */
494int rxrpc_kernel_reject_call(struct socket *sock)
495{
496 int ret;
497
498 _enter("");
499 ret = rxrpc_reject_call(rxrpc_sk(sock->sk));
500 _leave(" = %d", ret);
501 return ret;
502}
503
504EXPORT_SYMBOL(rxrpc_kernel_reject_call);
diff --git a/net/rxrpc/ar-ack.c b/net/rxrpc/ar-ack.c
new file mode 100644
index 000000000000..fc07a926df56
--- /dev/null
+++ b/net/rxrpc/ar-ack.c
@@ -0,0 +1,1250 @@
1/* Management of Tx window, Tx resend, ACKs and out-of-sequence reception
2 *
3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/module.h>
13#include <linux/circ_buf.h>
14#include <linux/net.h>
15#include <linux/skbuff.h>
16#include <linux/udp.h>
17#include <net/sock.h>
18#include <net/af_rxrpc.h>
19#include "ar-internal.h"
20
21static unsigned rxrpc_ack_defer = 1;
22
23static const char *rxrpc_acks[] = {
24 "---", "REQ", "DUP", "OOS", "WIN", "MEM", "PNG", "PNR", "DLY", "IDL",
25 "-?-"
26};
27
28static const s8 rxrpc_ack_priority[] = {
29 [0] = 0,
30 [RXRPC_ACK_DELAY] = 1,
31 [RXRPC_ACK_REQUESTED] = 2,
32 [RXRPC_ACK_IDLE] = 3,
33 [RXRPC_ACK_PING_RESPONSE] = 4,
34 [RXRPC_ACK_DUPLICATE] = 5,
35 [RXRPC_ACK_OUT_OF_SEQUENCE] = 6,
36 [RXRPC_ACK_EXCEEDS_WINDOW] = 7,
37 [RXRPC_ACK_NOSPACE] = 8,
38};
39
40/*
41 * propose an ACK be sent
42 */
43void __rxrpc_propose_ACK(struct rxrpc_call *call, uint8_t ack_reason,
44 __be32 serial, bool immediate)
45{
46 unsigned long expiry;
47 s8 prior = rxrpc_ack_priority[ack_reason];
48
49 ASSERTCMP(prior, >, 0);
50
51 _enter("{%d},%s,%%%x,%u",
52 call->debug_id, rxrpc_acks[ack_reason], ntohl(serial),
53 immediate);
54
55 if (prior < rxrpc_ack_priority[call->ackr_reason]) {
56 if (immediate)
57 goto cancel_timer;
58 return;
59 }
60
61 /* update DELAY, IDLE, REQUESTED and PING_RESPONSE ACK serial
62 * numbers */
63 if (prior == rxrpc_ack_priority[call->ackr_reason]) {
64 if (prior <= 4)
65 call->ackr_serial = serial;
66 if (immediate)
67 goto cancel_timer;
68 return;
69 }
70
71 call->ackr_reason = ack_reason;
72 call->ackr_serial = serial;
73
74 switch (ack_reason) {
75 case RXRPC_ACK_DELAY:
76 _debug("run delay timer");
77 call->ack_timer.expires = jiffies + rxrpc_ack_timeout * HZ;
78 add_timer(&call->ack_timer);
79 return;
80
81 case RXRPC_ACK_IDLE:
82 if (!immediate) {
83 _debug("run defer timer");
84 expiry = 1;
85 goto run_timer;
86 }
87 goto cancel_timer;
88
89 case RXRPC_ACK_REQUESTED:
90 if (!rxrpc_ack_defer)
91 goto cancel_timer;
92 if (!immediate || serial == cpu_to_be32(1)) {
93 _debug("run defer timer");
94 expiry = rxrpc_ack_defer;
95 goto run_timer;
96 }
97
98 default:
99 _debug("immediate ACK");
100 goto cancel_timer;
101 }
102
103run_timer:
104 expiry += jiffies;
105 if (!timer_pending(&call->ack_timer) ||
106 time_after(call->ack_timer.expires, expiry))
107 mod_timer(&call->ack_timer, expiry);
108 return;
109
110cancel_timer:
111 _debug("cancel timer %%%u", ntohl(serial));
112 try_to_del_timer_sync(&call->ack_timer);
113 read_lock_bh(&call->state_lock);
114 if (call->state <= RXRPC_CALL_COMPLETE &&
115 !test_and_set_bit(RXRPC_CALL_ACK, &call->events))
116 rxrpc_queue_call(call);
117 read_unlock_bh(&call->state_lock);
118}
119
120/*
121 * propose an ACK be sent, locking the call structure
122 */
123void rxrpc_propose_ACK(struct rxrpc_call *call, uint8_t ack_reason,
124 __be32 serial, bool immediate)
125{
126 s8 prior = rxrpc_ack_priority[ack_reason];
127
128 if (prior > rxrpc_ack_priority[call->ackr_reason]) {
129 spin_lock_bh(&call->lock);
130 __rxrpc_propose_ACK(call, ack_reason, serial, immediate);
131 spin_unlock_bh(&call->lock);
132 }
133}
134
135/*
136 * set the resend timer
137 */
138static void rxrpc_set_resend(struct rxrpc_call *call, u8 resend,
139 unsigned long resend_at)
140{
141 read_lock_bh(&call->state_lock);
142 if (call->state >= RXRPC_CALL_COMPLETE)
143 resend = 0;
144
145 if (resend & 1) {
146 _debug("SET RESEND");
147 set_bit(RXRPC_CALL_RESEND, &call->events);
148 }
149
150 if (resend & 2) {
151 _debug("MODIFY RESEND TIMER");
152 set_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
153 mod_timer(&call->resend_timer, resend_at);
154 } else {
155 _debug("KILL RESEND TIMER");
156 del_timer_sync(&call->resend_timer);
157 clear_bit(RXRPC_CALL_RESEND_TIMER, &call->events);
158 clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
159 }
160 read_unlock_bh(&call->state_lock);
161}
162
163/*
164 * resend packets
165 */
166static void rxrpc_resend(struct rxrpc_call *call)
167{
168 struct rxrpc_skb_priv *sp;
169 struct rxrpc_header *hdr;
170 struct sk_buff *txb;
171 unsigned long *p_txb, resend_at;
172 int loop, stop;
173 u8 resend;
174
175 _enter("{%d,%d,%d,%d},",
176 call->acks_hard, call->acks_unacked,
177 atomic_read(&call->sequence),
178 CIRC_CNT(call->acks_head, call->acks_tail, call->acks_winsz));
179
180 stop = 0;
181 resend = 0;
182 resend_at = 0;
183
184 for (loop = call->acks_tail;
185 loop != call->acks_head || stop;
186 loop = (loop + 1) & (call->acks_winsz - 1)
187 ) {
188 p_txb = call->acks_window + loop;
189 smp_read_barrier_depends();
190 if (*p_txb & 1)
191 continue;
192
193 txb = (struct sk_buff *) *p_txb;
194 sp = rxrpc_skb(txb);
195
196 if (sp->need_resend) {
197 sp->need_resend = 0;
198
199 /* each Tx packet has a new serial number */
200 sp->hdr.serial =
201 htonl(atomic_inc_return(&call->conn->serial));
202
203 hdr = (struct rxrpc_header *) txb->head;
204 hdr->serial = sp->hdr.serial;
205
206 _proto("Tx DATA %%%u { #%d }",
207 ntohl(sp->hdr.serial), ntohl(sp->hdr.seq));
208 if (rxrpc_send_packet(call->conn->trans, txb) < 0) {
209 stop = 0;
210 sp->resend_at = jiffies + 3;
211 } else {
212 sp->resend_at =
213 jiffies + rxrpc_resend_timeout * HZ;
214 }
215 }
216
217 if (time_after_eq(jiffies + 1, sp->resend_at)) {
218 sp->need_resend = 1;
219 resend |= 1;
220 } else if (resend & 2) {
221 if (time_before(sp->resend_at, resend_at))
222 resend_at = sp->resend_at;
223 } else {
224 resend_at = sp->resend_at;
225 resend |= 2;
226 }
227 }
228
229 rxrpc_set_resend(call, resend, resend_at);
230 _leave("");
231}
232
233/*
234 * handle resend timer expiry
235 */
236static void rxrpc_resend_timer(struct rxrpc_call *call)
237{
238 struct rxrpc_skb_priv *sp;
239 struct sk_buff *txb;
240 unsigned long *p_txb, resend_at;
241 int loop;
242 u8 resend;
243
244 _enter("%d,%d,%d",
245 call->acks_tail, call->acks_unacked, call->acks_head);
246
247 resend = 0;
248 resend_at = 0;
249
250 for (loop = call->acks_unacked;
251 loop != call->acks_head;
252 loop = (loop + 1) & (call->acks_winsz - 1)
253 ) {
254 p_txb = call->acks_window + loop;
255 smp_read_barrier_depends();
256 txb = (struct sk_buff *) (*p_txb & ~1);
257 sp = rxrpc_skb(txb);
258
259 ASSERT(!(*p_txb & 1));
260
261 if (sp->need_resend) {
262 ;
263 } else if (time_after_eq(jiffies + 1, sp->resend_at)) {
264 sp->need_resend = 1;
265 resend |= 1;
266 } else if (resend & 2) {
267 if (time_before(sp->resend_at, resend_at))
268 resend_at = sp->resend_at;
269 } else {
270 resend_at = sp->resend_at;
271 resend |= 2;
272 }
273 }
274
275 rxrpc_set_resend(call, resend, resend_at);
276 _leave("");
277}
278
279/*
280 * process soft ACKs of our transmitted packets
281 * - these indicate packets the peer has or has not received, but hasn't yet
282 * given to the consumer, and so can still be discarded and re-requested
283 */
284static int rxrpc_process_soft_ACKs(struct rxrpc_call *call,
285 struct rxrpc_ackpacket *ack,
286 struct sk_buff *skb)
287{
288 struct rxrpc_skb_priv *sp;
289 struct sk_buff *txb;
290 unsigned long *p_txb, resend_at;
291 int loop;
292 u8 sacks[RXRPC_MAXACKS], resend;
293
294 _enter("{%d,%d},{%d},",
295 call->acks_hard,
296 CIRC_CNT(call->acks_head, call->acks_tail, call->acks_winsz),
297 ack->nAcks);
298
299 if (skb_copy_bits(skb, 0, sacks, ack->nAcks) < 0)
300 goto protocol_error;
301
302 resend = 0;
303 resend_at = 0;
304 for (loop = 0; loop < ack->nAcks; loop++) {
305 p_txb = call->acks_window;
306 p_txb += (call->acks_tail + loop) & (call->acks_winsz - 1);
307 smp_read_barrier_depends();
308 txb = (struct sk_buff *) (*p_txb & ~1);
309 sp = rxrpc_skb(txb);
310
311 switch (sacks[loop]) {
312 case RXRPC_ACK_TYPE_ACK:
313 sp->need_resend = 0;
314 *p_txb |= 1;
315 break;
316 case RXRPC_ACK_TYPE_NACK:
317 sp->need_resend = 1;
318 *p_txb &= ~1;
319 resend = 1;
320 break;
321 default:
322 _debug("Unsupported ACK type %d", sacks[loop]);
323 goto protocol_error;
324 }
325 }
326
327 smp_mb();
328 call->acks_unacked = (call->acks_tail + loop) & (call->acks_winsz - 1);
329
330 /* anything not explicitly ACK'd is implicitly NACK'd, but may just not
331 * have been received or processed yet by the far end */
332 for (loop = call->acks_unacked;
333 loop != call->acks_head;
334 loop = (loop + 1) & (call->acks_winsz - 1)
335 ) {
336 p_txb = call->acks_window + loop;
337 smp_read_barrier_depends();
338 txb = (struct sk_buff *) (*p_txb & ~1);
339 sp = rxrpc_skb(txb);
340
341 if (*p_txb & 1) {
342 /* packet must have been discarded */
343 sp->need_resend = 1;
344 *p_txb &= ~1;
345 resend |= 1;
346 } else if (sp->need_resend) {
347 ;
348 } else if (time_after_eq(jiffies + 1, sp->resend_at)) {
349 sp->need_resend = 1;
350 resend |= 1;
351 } else if (resend & 2) {
352 if (time_before(sp->resend_at, resend_at))
353 resend_at = sp->resend_at;
354 } else {
355 resend_at = sp->resend_at;
356 resend |= 2;
357 }
358 }
359
360 rxrpc_set_resend(call, resend, resend_at);
361 _leave(" = 0");
362 return 0;
363
364protocol_error:
365 _leave(" = -EPROTO");
366 return -EPROTO;
367}
368
369/*
370 * discard hard-ACK'd packets from the Tx window
371 */
372static void rxrpc_rotate_tx_window(struct rxrpc_call *call, u32 hard)
373{
374 struct rxrpc_skb_priv *sp;
375 unsigned long _skb;
376 int tail = call->acks_tail, old_tail;
377 int win = CIRC_CNT(call->acks_head, tail, call->acks_winsz);
378
379 _enter("{%u,%u},%u", call->acks_hard, win, hard);
380
381 ASSERTCMP(hard - call->acks_hard, <=, win);
382
383 while (call->acks_hard < hard) {
384 smp_read_barrier_depends();
385 _skb = call->acks_window[tail] & ~1;
386 sp = rxrpc_skb((struct sk_buff *) _skb);
387 rxrpc_free_skb((struct sk_buff *) _skb);
388 old_tail = tail;
389 tail = (tail + 1) & (call->acks_winsz - 1);
390 call->acks_tail = tail;
391 if (call->acks_unacked == old_tail)
392 call->acks_unacked = tail;
393 call->acks_hard++;
394 }
395
396 wake_up(&call->tx_waitq);
397}
398
399/*
400 * clear the Tx window in the event of a failure
401 */
402static void rxrpc_clear_tx_window(struct rxrpc_call *call)
403{
404 rxrpc_rotate_tx_window(call, atomic_read(&call->sequence));
405}
406
407/*
408 * drain the out of sequence received packet queue into the packet Rx queue
409 */
410static int rxrpc_drain_rx_oos_queue(struct rxrpc_call *call)
411{
412 struct rxrpc_skb_priv *sp;
413 struct sk_buff *skb;
414 bool terminal;
415 int ret;
416
417 _enter("{%d,%d}", call->rx_data_post, call->rx_first_oos);
418
419 spin_lock_bh(&call->lock);
420
421 ret = -ECONNRESET;
422 if (test_bit(RXRPC_CALL_RELEASED, &call->flags))
423 goto socket_unavailable;
424
425 skb = skb_dequeue(&call->rx_oos_queue);
426 if (skb) {
427 sp = rxrpc_skb(skb);
428
429 _debug("drain OOS packet %d [%d]",
430 ntohl(sp->hdr.seq), call->rx_first_oos);
431
432 if (ntohl(sp->hdr.seq) != call->rx_first_oos) {
433 skb_queue_head(&call->rx_oos_queue, skb);
434 call->rx_first_oos = ntohl(rxrpc_skb(skb)->hdr.seq);
435 _debug("requeue %p {%u}", skb, call->rx_first_oos);
436 } else {
437 skb->mark = RXRPC_SKB_MARK_DATA;
438 terminal = ((sp->hdr.flags & RXRPC_LAST_PACKET) &&
439 !(sp->hdr.flags & RXRPC_CLIENT_INITIATED));
440 ret = rxrpc_queue_rcv_skb(call, skb, true, terminal);
441 BUG_ON(ret < 0);
442 _debug("drain #%u", call->rx_data_post);
443 call->rx_data_post++;
444
445 /* find out what the next packet is */
446 skb = skb_peek(&call->rx_oos_queue);
447 if (skb)
448 call->rx_first_oos =
449 ntohl(rxrpc_skb(skb)->hdr.seq);
450 else
451 call->rx_first_oos = 0;
452 _debug("peek %p {%u}", skb, call->rx_first_oos);
453 }
454 }
455
456 ret = 0;
457socket_unavailable:
458 spin_unlock_bh(&call->lock);
459 _leave(" = %d", ret);
460 return ret;
461}
462
463/*
464 * insert an out of sequence packet into the buffer
465 */
466static void rxrpc_insert_oos_packet(struct rxrpc_call *call,
467 struct sk_buff *skb)
468{
469 struct rxrpc_skb_priv *sp, *psp;
470 struct sk_buff *p;
471 u32 seq;
472
473 sp = rxrpc_skb(skb);
474 seq = ntohl(sp->hdr.seq);
475 _enter(",,{%u}", seq);
476
477 skb->destructor = rxrpc_packet_destructor;
478 ASSERTCMP(sp->call, ==, NULL);
479 sp->call = call;
480 rxrpc_get_call(call);
481
482 /* insert into the buffer in sequence order */
483 spin_lock_bh(&call->lock);
484
485 skb_queue_walk(&call->rx_oos_queue, p) {
486 psp = rxrpc_skb(p);
487 if (ntohl(psp->hdr.seq) > seq) {
488 _debug("insert oos #%u before #%u",
489 seq, ntohl(psp->hdr.seq));
490 skb_insert(p, skb, &call->rx_oos_queue);
491 goto inserted;
492 }
493 }
494
495 _debug("append oos #%u", seq);
496 skb_queue_tail(&call->rx_oos_queue, skb);
497inserted:
498
499 /* we might now have a new front to the queue */
500 if (call->rx_first_oos == 0 || seq < call->rx_first_oos)
501 call->rx_first_oos = seq;
502
503 read_lock(&call->state_lock);
504 if (call->state < RXRPC_CALL_COMPLETE &&
505 call->rx_data_post == call->rx_first_oos) {
506 _debug("drain rx oos now");
507 set_bit(RXRPC_CALL_DRAIN_RX_OOS, &call->events);
508 }
509 read_unlock(&call->state_lock);
510
511 spin_unlock_bh(&call->lock);
512 _leave(" [stored #%u]", call->rx_first_oos);
513}
514
515/*
516 * clear the Tx window on final ACK reception
517 */
518static void rxrpc_zap_tx_window(struct rxrpc_call *call)
519{
520 struct rxrpc_skb_priv *sp;
521 struct sk_buff *skb;
522 unsigned long _skb, *acks_window;
523 uint8_t winsz = call->acks_winsz;
524 int tail;
525
526 acks_window = call->acks_window;
527 call->acks_window = NULL;
528
529 while (CIRC_CNT(call->acks_head, call->acks_tail, winsz) > 0) {
530 tail = call->acks_tail;
531 smp_read_barrier_depends();
532 _skb = acks_window[tail] & ~1;
533 smp_mb();
534 call->acks_tail = (call->acks_tail + 1) & (winsz - 1);
535
536 skb = (struct sk_buff *) _skb;
537 sp = rxrpc_skb(skb);
538 _debug("+++ clear Tx %u", ntohl(sp->hdr.seq));
539 rxrpc_free_skb(skb);
540 }
541
542 kfree(acks_window);
543}
544
545/*
546 * process packets in the reception queue
547 */
548static int rxrpc_process_rx_queue(struct rxrpc_call *call,
549 u32 *_abort_code)
550{
551 struct rxrpc_ackpacket ack;
552 struct rxrpc_skb_priv *sp;
553 struct sk_buff *skb;
554 bool post_ACK;
555 int latest;
556 u32 hard, tx;
557
558 _enter("");
559
560process_further:
561 skb = skb_dequeue(&call->rx_queue);
562 if (!skb)
563 return -EAGAIN;
564
565 _net("deferred skb %p", skb);
566
567 sp = rxrpc_skb(skb);
568
569 _debug("process %s [st %d]", rxrpc_pkts[sp->hdr.type], call->state);
570
571 post_ACK = false;
572
573 switch (sp->hdr.type) {
574 /* data packets that wind up here have been received out of
575 * order, need security processing or are jumbo packets */
576 case RXRPC_PACKET_TYPE_DATA:
577 _proto("OOSQ DATA %%%u { #%u }",
578 ntohl(sp->hdr.serial), ntohl(sp->hdr.seq));
579
580 /* secured packets must be verified and possibly decrypted */
581 if (rxrpc_verify_packet(call, skb, _abort_code) < 0)
582 goto protocol_error;
583
584 rxrpc_insert_oos_packet(call, skb);
585 goto process_further;
586
587 /* partial ACK to process */
588 case RXRPC_PACKET_TYPE_ACK:
589 if (skb_copy_bits(skb, 0, &ack, sizeof(ack)) < 0) {
590 _debug("extraction failure");
591 goto protocol_error;
592 }
593 if (!skb_pull(skb, sizeof(ack)))
594 BUG();
595
596 latest = ntohl(sp->hdr.serial);
597 hard = ntohl(ack.firstPacket);
598 tx = atomic_read(&call->sequence);
599
600 _proto("Rx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }",
601 latest,
602 ntohs(ack.maxSkew),
603 hard,
604 ntohl(ack.previousPacket),
605 ntohl(ack.serial),
606 rxrpc_acks[ack.reason],
607 ack.nAcks);
608
609 if (ack.reason == RXRPC_ACK_PING) {
610 _proto("Rx ACK %%%u PING Request", latest);
611 rxrpc_propose_ACK(call, RXRPC_ACK_PING_RESPONSE,
612 sp->hdr.serial, true);
613 }
614
615 /* discard any out-of-order or duplicate ACKs */
616 if (latest - call->acks_latest <= 0) {
617 _debug("discard ACK %d <= %d",
618 latest, call->acks_latest);
619 goto discard;
620 }
621 call->acks_latest = latest;
622
623 if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST &&
624 call->state != RXRPC_CALL_CLIENT_AWAIT_REPLY &&
625 call->state != RXRPC_CALL_SERVER_SEND_REPLY &&
626 call->state != RXRPC_CALL_SERVER_AWAIT_ACK)
627 goto discard;
628
629 _debug("Tx=%d H=%u S=%d", tx, call->acks_hard, call->state);
630
631 if (hard > 0) {
632 if (hard - 1 > tx) {
633 _debug("hard-ACK'd packet %d not transmitted"
634 " (%d top)",
635 hard - 1, tx);
636 goto protocol_error;
637 }
638
639 if ((call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY ||
640 call->state == RXRPC_CALL_SERVER_AWAIT_ACK) &&
641 hard > tx)
642 goto all_acked;
643
644 smp_rmb();
645 rxrpc_rotate_tx_window(call, hard - 1);
646 }
647
648 if (ack.nAcks > 0) {
649 if (hard - 1 + ack.nAcks > tx) {
650 _debug("soft-ACK'd packet %d+%d not"
651 " transmitted (%d top)",
652 hard - 1, ack.nAcks, tx);
653 goto protocol_error;
654 }
655
656 if (rxrpc_process_soft_ACKs(call, &ack, skb) < 0)
657 goto protocol_error;
658 }
659 goto discard;
660
661 /* complete ACK to process */
662 case RXRPC_PACKET_TYPE_ACKALL:
663 goto all_acked;
664
665 /* abort and busy are handled elsewhere */
666 case RXRPC_PACKET_TYPE_BUSY:
667 case RXRPC_PACKET_TYPE_ABORT:
668 BUG();
669
670 /* connection level events - also handled elsewhere */
671 case RXRPC_PACKET_TYPE_CHALLENGE:
672 case RXRPC_PACKET_TYPE_RESPONSE:
673 case RXRPC_PACKET_TYPE_DEBUG:
674 BUG();
675 }
676
677 /* if we've had a hard ACK that covers all the packets we've sent, then
678 * that ends that phase of the operation */
679all_acked:
680 write_lock_bh(&call->state_lock);
681 _debug("ack all %d", call->state);
682
683 switch (call->state) {
684 case RXRPC_CALL_CLIENT_AWAIT_REPLY:
685 call->state = RXRPC_CALL_CLIENT_RECV_REPLY;
686 break;
687 case RXRPC_CALL_SERVER_AWAIT_ACK:
688 _debug("srv complete");
689 call->state = RXRPC_CALL_COMPLETE;
690 post_ACK = true;
691 break;
692 case RXRPC_CALL_CLIENT_SEND_REQUEST:
693 case RXRPC_CALL_SERVER_RECV_REQUEST:
694 goto protocol_error_unlock; /* can't occur yet */
695 default:
696 write_unlock_bh(&call->state_lock);
697 goto discard; /* assume packet left over from earlier phase */
698 }
699
700 write_unlock_bh(&call->state_lock);
701
702 /* if all the packets we sent are hard-ACK'd, then we can discard
703 * whatever we've got left */
704 _debug("clear Tx %d",
705 CIRC_CNT(call->acks_head, call->acks_tail, call->acks_winsz));
706
707 del_timer_sync(&call->resend_timer);
708 clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
709 clear_bit(RXRPC_CALL_RESEND_TIMER, &call->events);
710
711 if (call->acks_window)
712 rxrpc_zap_tx_window(call);
713
714 if (post_ACK) {
715 /* post the final ACK message for userspace to pick up */
716 _debug("post ACK");
717 skb->mark = RXRPC_SKB_MARK_FINAL_ACK;
718 sp->call = call;
719 rxrpc_get_call(call);
720 spin_lock_bh(&call->lock);
721 if (rxrpc_queue_rcv_skb(call, skb, true, true) < 0)
722 BUG();
723 spin_unlock_bh(&call->lock);
724 goto process_further;
725 }
726
727discard:
728 rxrpc_free_skb(skb);
729 goto process_further;
730
731protocol_error_unlock:
732 write_unlock_bh(&call->state_lock);
733protocol_error:
734 rxrpc_free_skb(skb);
735 _leave(" = -EPROTO");
736 return -EPROTO;
737}
738
739/*
740 * post a message to the socket Rx queue for recvmsg() to pick up
741 */
742static int rxrpc_post_message(struct rxrpc_call *call, u32 mark, u32 error,
743 bool fatal)
744{
745 struct rxrpc_skb_priv *sp;
746 struct sk_buff *skb;
747 int ret;
748
749 _enter("{%d,%lx},%u,%u,%d",
750 call->debug_id, call->flags, mark, error, fatal);
751
752 /* remove timers and things for fatal messages */
753 if (fatal) {
754 del_timer_sync(&call->resend_timer);
755 del_timer_sync(&call->ack_timer);
756 clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
757 }
758
759 if (mark != RXRPC_SKB_MARK_NEW_CALL &&
760 !test_bit(RXRPC_CALL_HAS_USERID, &call->flags)) {
761 _leave("[no userid]");
762 return 0;
763 }
764
765 if (!test_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags)) {
766 skb = alloc_skb(0, GFP_NOFS);
767 if (!skb)
768 return -ENOMEM;
769
770 rxrpc_new_skb(skb);
771
772 skb->mark = mark;
773
774 sp = rxrpc_skb(skb);
775 memset(sp, 0, sizeof(*sp));
776 sp->error = error;
777 sp->call = call;
778 rxrpc_get_call(call);
779
780 spin_lock_bh(&call->lock);
781 ret = rxrpc_queue_rcv_skb(call, skb, true, fatal);
782 spin_unlock_bh(&call->lock);
783 if (ret < 0)
784 BUG();
785 }
786
787 return 0;
788}
789
790/*
791 * handle background processing of incoming call packets and ACK / abort
792 * generation
793 */
794void rxrpc_process_call(struct work_struct *work)
795{
796 struct rxrpc_call *call =
797 container_of(work, struct rxrpc_call, processor);
798 struct rxrpc_ackpacket ack;
799 struct rxrpc_ackinfo ackinfo;
800 struct rxrpc_header hdr;
801 struct msghdr msg;
802 struct kvec iov[5];
803 unsigned long bits;
804 __be32 data;
805 size_t len;
806 int genbit, loop, nbit, ioc, ret;
807 u32 abort_code = RX_PROTOCOL_ERROR;
808 u8 *acks = NULL;
809
810 //printk("\n--------------------\n");
811 _enter("{%d,%s,%lx} [%lu]",
812 call->debug_id, rxrpc_call_states[call->state], call->events,
813 (jiffies - call->creation_jif) / (HZ / 10));
814
815 if (test_and_set_bit(RXRPC_CALL_PROC_BUSY, &call->flags)) {
816 _debug("XXXXXXXXXXXXX RUNNING ON MULTIPLE CPUS XXXXXXXXXXXXX");
817 return;
818 }
819
820 /* there's a good chance we're going to have to send a message, so set
821 * one up in advance */
822 msg.msg_name = &call->conn->trans->peer->srx.transport.sin;
823 msg.msg_namelen = sizeof(call->conn->trans->peer->srx.transport.sin);
824 msg.msg_control = NULL;
825 msg.msg_controllen = 0;
826 msg.msg_flags = 0;
827
828 hdr.epoch = call->conn->epoch;
829 hdr.cid = call->cid;
830 hdr.callNumber = call->call_id;
831 hdr.seq = 0;
832 hdr.type = RXRPC_PACKET_TYPE_ACK;
833 hdr.flags = call->conn->out_clientflag;
834 hdr.userStatus = 0;
835 hdr.securityIndex = call->conn->security_ix;
836 hdr._rsvd = 0;
837 hdr.serviceId = call->conn->service_id;
838
839 memset(iov, 0, sizeof(iov));
840 iov[0].iov_base = &hdr;
841 iov[0].iov_len = sizeof(hdr);
842
843 /* deal with events of a final nature */
844 if (test_bit(RXRPC_CALL_RELEASE, &call->events)) {
845 rxrpc_release_call(call);
846 clear_bit(RXRPC_CALL_RELEASE, &call->events);
847 }
848
849 if (test_bit(RXRPC_CALL_RCVD_ERROR, &call->events)) {
850 int error;
851
852 clear_bit(RXRPC_CALL_CONN_ABORT, &call->events);
853 clear_bit(RXRPC_CALL_REJECT_BUSY, &call->events);
854 clear_bit(RXRPC_CALL_ABORT, &call->events);
855
856 error = call->conn->trans->peer->net_error;
857 _debug("post net error %d", error);
858
859 if (rxrpc_post_message(call, RXRPC_SKB_MARK_NET_ERROR,
860 error, true) < 0)
861 goto no_mem;
862 clear_bit(RXRPC_CALL_RCVD_ERROR, &call->events);
863 goto kill_ACKs;
864 }
865
866 if (test_bit(RXRPC_CALL_CONN_ABORT, &call->events)) {
867 ASSERTCMP(call->state, >, RXRPC_CALL_COMPLETE);
868
869 clear_bit(RXRPC_CALL_REJECT_BUSY, &call->events);
870 clear_bit(RXRPC_CALL_ABORT, &call->events);
871
872 _debug("post conn abort");
873
874 if (rxrpc_post_message(call, RXRPC_SKB_MARK_LOCAL_ERROR,
875 call->conn->error, true) < 0)
876 goto no_mem;
877 clear_bit(RXRPC_CALL_CONN_ABORT, &call->events);
878 goto kill_ACKs;
879 }
880
881 if (test_bit(RXRPC_CALL_REJECT_BUSY, &call->events)) {
882 hdr.type = RXRPC_PACKET_TYPE_BUSY;
883 genbit = RXRPC_CALL_REJECT_BUSY;
884 goto send_message;
885 }
886
887 if (test_bit(RXRPC_CALL_ABORT, &call->events)) {
888 ASSERTCMP(call->state, >, RXRPC_CALL_COMPLETE);
889
890 if (rxrpc_post_message(call, RXRPC_SKB_MARK_LOCAL_ERROR,
891 ECONNABORTED, true) < 0)
892 goto no_mem;
893 hdr.type = RXRPC_PACKET_TYPE_ABORT;
894 data = htonl(call->abort_code);
895 iov[1].iov_base = &data;
896 iov[1].iov_len = sizeof(data);
897 genbit = RXRPC_CALL_ABORT;
898 goto send_message;
899 }
900
901 if (test_bit(RXRPC_CALL_ACK_FINAL, &call->events)) {
902 hdr.type = RXRPC_PACKET_TYPE_ACKALL;
903 genbit = RXRPC_CALL_ACK_FINAL;
904 goto send_message;
905 }
906
907 if (call->events & ((1 << RXRPC_CALL_RCVD_BUSY) |
908 (1 << RXRPC_CALL_RCVD_ABORT))
909 ) {
910 u32 mark;
911
912 if (test_bit(RXRPC_CALL_RCVD_ABORT, &call->events))
913 mark = RXRPC_SKB_MARK_REMOTE_ABORT;
914 else
915 mark = RXRPC_SKB_MARK_BUSY;
916
917 _debug("post abort/busy");
918 rxrpc_clear_tx_window(call);
919 if (rxrpc_post_message(call, mark, ECONNABORTED, true) < 0)
920 goto no_mem;
921
922 clear_bit(RXRPC_CALL_RCVD_BUSY, &call->events);
923 clear_bit(RXRPC_CALL_RCVD_ABORT, &call->events);
924 goto kill_ACKs;
925 }
926
927 if (test_and_clear_bit(RXRPC_CALL_RCVD_ACKALL, &call->events)) {
928 _debug("do implicit ackall");
929 rxrpc_clear_tx_window(call);
930 }
931
932 if (test_bit(RXRPC_CALL_LIFE_TIMER, &call->events)) {
933 write_lock_bh(&call->state_lock);
934 if (call->state <= RXRPC_CALL_COMPLETE) {
935 call->state = RXRPC_CALL_LOCALLY_ABORTED;
936 call->abort_code = RX_CALL_TIMEOUT;
937 set_bit(RXRPC_CALL_ABORT, &call->events);
938 }
939 write_unlock_bh(&call->state_lock);
940
941 _debug("post timeout");
942 if (rxrpc_post_message(call, RXRPC_SKB_MARK_LOCAL_ERROR,
943 ETIME, true) < 0)
944 goto no_mem;
945
946 clear_bit(RXRPC_CALL_LIFE_TIMER, &call->events);
947 goto kill_ACKs;
948 }
949
950 /* deal with assorted inbound messages */
951 if (!skb_queue_empty(&call->rx_queue)) {
952 switch (rxrpc_process_rx_queue(call, &abort_code)) {
953 case 0:
954 case -EAGAIN:
955 break;
956 case -ENOMEM:
957 goto no_mem;
958 case -EKEYEXPIRED:
959 case -EKEYREJECTED:
960 case -EPROTO:
961 rxrpc_abort_call(call, abort_code);
962 goto kill_ACKs;
963 }
964 }
965
966 /* handle resending */
967 if (test_and_clear_bit(RXRPC_CALL_RESEND_TIMER, &call->events))
968 rxrpc_resend_timer(call);
969 if (test_and_clear_bit(RXRPC_CALL_RESEND, &call->events))
970 rxrpc_resend(call);
971
972 /* consider sending an ordinary ACK */
973 if (test_bit(RXRPC_CALL_ACK, &call->events)) {
974 __be32 pad;
975
976 _debug("send ACK: window: %d - %d { %lx }",
977 call->rx_data_eaten, call->ackr_win_top,
978 call->ackr_window[0]);
979
980 if (call->state > RXRPC_CALL_SERVER_ACK_REQUEST &&
981 call->ackr_reason != RXRPC_ACK_PING_RESPONSE) {
982 /* ACK by sending reply DATA packet in this state */
983 clear_bit(RXRPC_CALL_ACK, &call->events);
984 goto maybe_reschedule;
985 }
986
987 genbit = RXRPC_CALL_ACK;
988
989 acks = kzalloc(call->ackr_win_top - call->rx_data_eaten,
990 GFP_NOFS);
991 if (!acks)
992 goto no_mem;
993
994 //hdr.flags = RXRPC_SLOW_START_OK;
995 ack.bufferSpace = htons(8);
996 ack.maxSkew = 0;
997 ack.serial = 0;
998 ack.reason = 0;
999
1000 ackinfo.rxMTU = htonl(5692);
1001// ackinfo.rxMTU = htonl(call->conn->trans->peer->maxdata);
1002 ackinfo.maxMTU = htonl(call->conn->trans->peer->maxdata);
1003 ackinfo.rwind = htonl(32);
1004 ackinfo.jumbo_max = htonl(4);
1005
1006 spin_lock_bh(&call->lock);
1007 ack.reason = call->ackr_reason;
1008 ack.serial = call->ackr_serial;
1009 ack.previousPacket = call->ackr_prev_seq;
1010 ack.firstPacket = htonl(call->rx_data_eaten + 1);
1011
1012 ack.nAcks = 0;
1013 for (loop = 0; loop < RXRPC_ACKR_WINDOW_ASZ; loop++) {
1014 nbit = loop * BITS_PER_LONG;
1015 for (bits = call->ackr_window[loop]; bits; bits >>= 1
1016 ) {
1017 _debug("- l=%d n=%d b=%lx", loop, nbit, bits);
1018 if (bits & 1) {
1019 acks[nbit] = RXRPC_ACK_TYPE_ACK;
1020 ack.nAcks = nbit + 1;
1021 }
1022 nbit++;
1023 }
1024 }
1025 call->ackr_reason = 0;
1026 spin_unlock_bh(&call->lock);
1027
1028 pad = 0;
1029
1030 iov[1].iov_base = &ack;
1031 iov[1].iov_len = sizeof(ack);
1032 iov[2].iov_base = acks;
1033 iov[2].iov_len = ack.nAcks;
1034 iov[3].iov_base = &pad;
1035 iov[3].iov_len = 3;
1036 iov[4].iov_base = &ackinfo;
1037 iov[4].iov_len = sizeof(ackinfo);
1038
1039 switch (ack.reason) {
1040 case RXRPC_ACK_REQUESTED:
1041 case RXRPC_ACK_DUPLICATE:
1042 case RXRPC_ACK_OUT_OF_SEQUENCE:
1043 case RXRPC_ACK_EXCEEDS_WINDOW:
1044 case RXRPC_ACK_NOSPACE:
1045 case RXRPC_ACK_PING:
1046 case RXRPC_ACK_PING_RESPONSE:
1047 goto send_ACK_with_skew;
1048 case RXRPC_ACK_DELAY:
1049 case RXRPC_ACK_IDLE:
1050 goto send_ACK;
1051 }
1052 }
1053
1054 /* handle completion of security negotiations on an incoming
1055 * connection */
1056 if (test_and_clear_bit(RXRPC_CALL_SECURED, &call->events)) {
1057 _debug("secured");
1058 spin_lock_bh(&call->lock);
1059
1060 if (call->state == RXRPC_CALL_SERVER_SECURING) {
1061 _debug("securing");
1062 write_lock(&call->conn->lock);
1063 if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
1064 !test_bit(RXRPC_CALL_RELEASE, &call->events)) {
1065 _debug("not released");
1066 call->state = RXRPC_CALL_SERVER_ACCEPTING;
1067 list_move_tail(&call->accept_link,
1068 &call->socket->acceptq);
1069 }
1070 write_unlock(&call->conn->lock);
1071 read_lock(&call->state_lock);
1072 if (call->state < RXRPC_CALL_COMPLETE)
1073 set_bit(RXRPC_CALL_POST_ACCEPT, &call->events);
1074 read_unlock(&call->state_lock);
1075 }
1076
1077 spin_unlock_bh(&call->lock);
1078 if (!test_bit(RXRPC_CALL_POST_ACCEPT, &call->events))
1079 goto maybe_reschedule;
1080 }
1081
1082 /* post a notification of an acceptable connection to the app */
1083 if (test_bit(RXRPC_CALL_POST_ACCEPT, &call->events)) {
1084 _debug("post accept");
1085 if (rxrpc_post_message(call, RXRPC_SKB_MARK_NEW_CALL,
1086 0, false) < 0)
1087 goto no_mem;
1088 clear_bit(RXRPC_CALL_POST_ACCEPT, &call->events);
1089 goto maybe_reschedule;
1090 }
1091
1092 /* handle incoming call acceptance */
1093 if (test_and_clear_bit(RXRPC_CALL_ACCEPTED, &call->events)) {
1094 _debug("accepted");
1095 ASSERTCMP(call->rx_data_post, ==, 0);
1096 call->rx_data_post = 1;
1097 read_lock_bh(&call->state_lock);
1098 if (call->state < RXRPC_CALL_COMPLETE)
1099 set_bit(RXRPC_CALL_DRAIN_RX_OOS, &call->events);
1100 read_unlock_bh(&call->state_lock);
1101 }
1102
1103 /* drain the out of sequence received packet queue into the packet Rx
1104 * queue */
1105 if (test_and_clear_bit(RXRPC_CALL_DRAIN_RX_OOS, &call->events)) {
1106 while (call->rx_data_post == call->rx_first_oos)
1107 if (rxrpc_drain_rx_oos_queue(call) < 0)
1108 break;
1109 goto maybe_reschedule;
1110 }
1111
1112 /* other events may have been raised since we started checking */
1113 goto maybe_reschedule;
1114
1115send_ACK_with_skew:
1116 ack.maxSkew = htons(atomic_read(&call->conn->hi_serial) -
1117 ntohl(ack.serial));
1118send_ACK:
1119 hdr.serial = htonl(atomic_inc_return(&call->conn->serial));
1120 _proto("Tx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }",
1121 ntohl(hdr.serial),
1122 ntohs(ack.maxSkew),
1123 ntohl(ack.firstPacket),
1124 ntohl(ack.previousPacket),
1125 ntohl(ack.serial),
1126 rxrpc_acks[ack.reason],
1127 ack.nAcks);
1128
1129 del_timer_sync(&call->ack_timer);
1130 if (ack.nAcks > 0)
1131 set_bit(RXRPC_CALL_TX_SOFT_ACK, &call->flags);
1132 goto send_message_2;
1133
1134send_message:
1135 _debug("send message");
1136
1137 hdr.serial = htonl(atomic_inc_return(&call->conn->serial));
1138 _proto("Tx %s %%%u", rxrpc_pkts[hdr.type], ntohl(hdr.serial));
1139send_message_2:
1140
1141 len = iov[0].iov_len;
1142 ioc = 1;
1143 if (iov[4].iov_len) {
1144 ioc = 5;
1145 len += iov[4].iov_len;
1146 len += iov[3].iov_len;
1147 len += iov[2].iov_len;
1148 len += iov[1].iov_len;
1149 } else if (iov[3].iov_len) {
1150 ioc = 4;
1151 len += iov[3].iov_len;
1152 len += iov[2].iov_len;
1153 len += iov[1].iov_len;
1154 } else if (iov[2].iov_len) {
1155 ioc = 3;
1156 len += iov[2].iov_len;
1157 len += iov[1].iov_len;
1158 } else if (iov[1].iov_len) {
1159 ioc = 2;
1160 len += iov[1].iov_len;
1161 }
1162
1163 ret = kernel_sendmsg(call->conn->trans->local->socket,
1164 &msg, iov, ioc, len);
1165 if (ret < 0) {
1166 _debug("sendmsg failed: %d", ret);
1167 read_lock_bh(&call->state_lock);
1168 if (call->state < RXRPC_CALL_DEAD)
1169 rxrpc_queue_call(call);
1170 read_unlock_bh(&call->state_lock);
1171 goto error;
1172 }
1173
1174 switch (genbit) {
1175 case RXRPC_CALL_ABORT:
1176 clear_bit(genbit, &call->events);
1177 clear_bit(RXRPC_CALL_RCVD_ABORT, &call->events);
1178 goto kill_ACKs;
1179
1180 case RXRPC_CALL_ACK_FINAL:
1181 write_lock_bh(&call->state_lock);
1182 if (call->state == RXRPC_CALL_CLIENT_FINAL_ACK)
1183 call->state = RXRPC_CALL_COMPLETE;
1184 write_unlock_bh(&call->state_lock);
1185 goto kill_ACKs;
1186
1187 default:
1188 clear_bit(genbit, &call->events);
1189 switch (call->state) {
1190 case RXRPC_CALL_CLIENT_AWAIT_REPLY:
1191 case RXRPC_CALL_CLIENT_RECV_REPLY:
1192 case RXRPC_CALL_SERVER_RECV_REQUEST:
1193 case RXRPC_CALL_SERVER_ACK_REQUEST:
1194 _debug("start ACK timer");
1195 rxrpc_propose_ACK(call, RXRPC_ACK_DELAY,
1196 call->ackr_serial, false);
1197 default:
1198 break;
1199 }
1200 goto maybe_reschedule;
1201 }
1202
1203kill_ACKs:
1204 del_timer_sync(&call->ack_timer);
1205 if (test_and_clear_bit(RXRPC_CALL_ACK_FINAL, &call->events))
1206 rxrpc_put_call(call);
1207 clear_bit(RXRPC_CALL_ACK, &call->events);
1208
1209maybe_reschedule:
1210 if (call->events || !skb_queue_empty(&call->rx_queue)) {
1211 read_lock_bh(&call->state_lock);
1212 if (call->state < RXRPC_CALL_DEAD)
1213 rxrpc_queue_call(call);
1214 read_unlock_bh(&call->state_lock);
1215 }
1216
1217 /* don't leave aborted connections on the accept queue */
1218 if (call->state >= RXRPC_CALL_COMPLETE &&
1219 !list_empty(&call->accept_link)) {
1220 _debug("X unlinking once-pending call %p { e=%lx f=%lx c=%x }",
1221 call, call->events, call->flags,
1222 ntohl(call->conn->cid));
1223
1224 read_lock_bh(&call->state_lock);
1225 if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
1226 !test_and_set_bit(RXRPC_CALL_RELEASE, &call->events))
1227 rxrpc_queue_call(call);
1228 read_unlock_bh(&call->state_lock);
1229 }
1230
1231error:
1232 clear_bit(RXRPC_CALL_PROC_BUSY, &call->flags);
1233 kfree(acks);
1234
1235 /* because we don't want two CPUs both processing the work item for one
1236 * call at the same time, we use a flag to note when it's busy; however
1237 * this means there's a race between clearing the flag and setting the
1238 * work pending bit and the work item being processed again */
1239 if (call->events && !work_pending(&call->processor)) {
1240 _debug("jumpstart %x", ntohl(call->conn->cid));
1241 rxrpc_queue_call(call);
1242 }
1243
1244 _leave("");
1245 return;
1246
1247no_mem:
1248 _debug("out of memory");
1249 goto maybe_reschedule;
1250}
diff --git a/net/rxrpc/ar-call.c b/net/rxrpc/ar-call.c
new file mode 100644
index 000000000000..4d92d88ff1fc
--- /dev/null
+++ b/net/rxrpc/ar-call.c
@@ -0,0 +1,804 @@
1/* RxRPC individual remote procedure call handling
2 *
3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/module.h>
13#include <linux/circ_buf.h>
14#include <net/sock.h>
15#include <net/af_rxrpc.h>
16#include "ar-internal.h"
17
18struct kmem_cache *rxrpc_call_jar;
19LIST_HEAD(rxrpc_calls);
20DEFINE_RWLOCK(rxrpc_call_lock);
21static unsigned rxrpc_call_max_lifetime = 60;
22static unsigned rxrpc_dead_call_timeout = 2;
23
24static void rxrpc_destroy_call(struct work_struct *work);
25static void rxrpc_call_life_expired(unsigned long _call);
26static void rxrpc_dead_call_expired(unsigned long _call);
27static void rxrpc_ack_time_expired(unsigned long _call);
28static void rxrpc_resend_time_expired(unsigned long _call);
29
30/*
31 * allocate a new call
32 */
33static struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
34{
35 struct rxrpc_call *call;
36
37 call = kmem_cache_zalloc(rxrpc_call_jar, gfp);
38 if (!call)
39 return NULL;
40
41 call->acks_winsz = 16;
42 call->acks_window = kmalloc(call->acks_winsz * sizeof(unsigned long),
43 gfp);
44 if (!call->acks_window) {
45 kmem_cache_free(rxrpc_call_jar, call);
46 return NULL;
47 }
48
49 setup_timer(&call->lifetimer, &rxrpc_call_life_expired,
50 (unsigned long) call);
51 setup_timer(&call->deadspan, &rxrpc_dead_call_expired,
52 (unsigned long) call);
53 setup_timer(&call->ack_timer, &rxrpc_ack_time_expired,
54 (unsigned long) call);
55 setup_timer(&call->resend_timer, &rxrpc_resend_time_expired,
56 (unsigned long) call);
57 INIT_WORK(&call->destroyer, &rxrpc_destroy_call);
58 INIT_WORK(&call->processor, &rxrpc_process_call);
59 INIT_LIST_HEAD(&call->accept_link);
60 skb_queue_head_init(&call->rx_queue);
61 skb_queue_head_init(&call->rx_oos_queue);
62 init_waitqueue_head(&call->tx_waitq);
63 spin_lock_init(&call->lock);
64 rwlock_init(&call->state_lock);
65 atomic_set(&call->usage, 1);
66 call->debug_id = atomic_inc_return(&rxrpc_debug_id);
67 call->state = RXRPC_CALL_CLIENT_SEND_REQUEST;
68
69 memset(&call->sock_node, 0xed, sizeof(call->sock_node));
70
71 call->rx_data_expect = 1;
72 call->rx_data_eaten = 0;
73 call->rx_first_oos = 0;
74 call->ackr_win_top = call->rx_data_eaten + 1 + RXRPC_MAXACKS;
75 call->creation_jif = jiffies;
76 return call;
77}
78
79/*
80 * allocate a new client call and attempt to to get a connection slot for it
81 */
82static struct rxrpc_call *rxrpc_alloc_client_call(
83 struct rxrpc_sock *rx,
84 struct rxrpc_transport *trans,
85 struct rxrpc_conn_bundle *bundle,
86 gfp_t gfp)
87{
88 struct rxrpc_call *call;
89 int ret;
90
91 _enter("");
92
93 ASSERT(rx != NULL);
94 ASSERT(trans != NULL);
95 ASSERT(bundle != NULL);
96
97 call = rxrpc_alloc_call(gfp);
98 if (!call)
99 return ERR_PTR(-ENOMEM);
100
101 sock_hold(&rx->sk);
102 call->socket = rx;
103 call->rx_data_post = 1;
104
105 ret = rxrpc_connect_call(rx, trans, bundle, call, gfp);
106 if (ret < 0) {
107 kmem_cache_free(rxrpc_call_jar, call);
108 return ERR_PTR(ret);
109 }
110
111 spin_lock(&call->conn->trans->peer->lock);
112 list_add(&call->error_link, &call->conn->trans->peer->error_targets);
113 spin_unlock(&call->conn->trans->peer->lock);
114
115 call->lifetimer.expires = jiffies + rxrpc_call_max_lifetime * HZ;
116 add_timer(&call->lifetimer);
117
118 _leave(" = %p", call);
119 return call;
120}
121
122/*
123 * set up a call for the given data
124 * - called in process context with IRQs enabled
125 */
126struct rxrpc_call *rxrpc_get_client_call(struct rxrpc_sock *rx,
127 struct rxrpc_transport *trans,
128 struct rxrpc_conn_bundle *bundle,
129 unsigned long user_call_ID,
130 int create,
131 gfp_t gfp)
132{
133 struct rxrpc_call *call, *candidate;
134 struct rb_node *p, *parent, **pp;
135
136 _enter("%p,%d,%d,%lx,%d",
137 rx, trans ? trans->debug_id : -1, bundle ? bundle->debug_id : -1,
138 user_call_ID, create);
139
140 /* search the extant calls first for one that matches the specified
141 * user ID */
142 read_lock(&rx->call_lock);
143
144 p = rx->calls.rb_node;
145 while (p) {
146 call = rb_entry(p, struct rxrpc_call, sock_node);
147
148 if (user_call_ID < call->user_call_ID)
149 p = p->rb_left;
150 else if (user_call_ID > call->user_call_ID)
151 p = p->rb_right;
152 else
153 goto found_extant_call;
154 }
155
156 read_unlock(&rx->call_lock);
157
158 if (!create || !trans)
159 return ERR_PTR(-EBADSLT);
160
161 /* not yet present - create a candidate for a new record and then
162 * redo the search */
163 candidate = rxrpc_alloc_client_call(rx, trans, bundle, gfp);
164 if (IS_ERR(candidate)) {
165 _leave(" = %ld", PTR_ERR(candidate));
166 return candidate;
167 }
168
169 candidate->user_call_ID = user_call_ID;
170 __set_bit(RXRPC_CALL_HAS_USERID, &candidate->flags);
171
172 write_lock(&rx->call_lock);
173
174 pp = &rx->calls.rb_node;
175 parent = NULL;
176 while (*pp) {
177 parent = *pp;
178 call = rb_entry(parent, struct rxrpc_call, sock_node);
179
180 if (user_call_ID < call->user_call_ID)
181 pp = &(*pp)->rb_left;
182 else if (user_call_ID > call->user_call_ID)
183 pp = &(*pp)->rb_right;
184 else
185 goto found_extant_second;
186 }
187
188 /* second search also failed; add the new call */
189 call = candidate;
190 candidate = NULL;
191 rxrpc_get_call(call);
192
193 rb_link_node(&call->sock_node, parent, pp);
194 rb_insert_color(&call->sock_node, &rx->calls);
195 write_unlock(&rx->call_lock);
196
197 write_lock_bh(&rxrpc_call_lock);
198 list_add_tail(&call->link, &rxrpc_calls);
199 write_unlock_bh(&rxrpc_call_lock);
200
201 _net("CALL new %d on CONN %d", call->debug_id, call->conn->debug_id);
202
203 _leave(" = %p [new]", call);
204 return call;
205
206 /* we found the call in the list immediately */
207found_extant_call:
208 rxrpc_get_call(call);
209 read_unlock(&rx->call_lock);
210 _leave(" = %p [extant %d]", call, atomic_read(&call->usage));
211 return call;
212
213 /* we found the call on the second time through the list */
214found_extant_second:
215 rxrpc_get_call(call);
216 write_unlock(&rx->call_lock);
217 rxrpc_put_call(candidate);
218 _leave(" = %p [second %d]", call, atomic_read(&call->usage));
219 return call;
220}
221
222/*
223 * set up an incoming call
224 * - called in process context with IRQs enabled
225 */
226struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
227 struct rxrpc_connection *conn,
228 struct rxrpc_header *hdr,
229 gfp_t gfp)
230{
231 struct rxrpc_call *call, *candidate;
232 struct rb_node **p, *parent;
233 __be32 call_id;
234
235 _enter(",%d,,%x", conn->debug_id, gfp);
236
237 ASSERT(rx != NULL);
238
239 candidate = rxrpc_alloc_call(gfp);
240 if (!candidate)
241 return ERR_PTR(-EBUSY);
242
243 candidate->socket = rx;
244 candidate->conn = conn;
245 candidate->cid = hdr->cid;
246 candidate->call_id = hdr->callNumber;
247 candidate->channel = ntohl(hdr->cid) & RXRPC_CHANNELMASK;
248 candidate->rx_data_post = 0;
249 candidate->state = RXRPC_CALL_SERVER_ACCEPTING;
250 if (conn->security_ix > 0)
251 candidate->state = RXRPC_CALL_SERVER_SECURING;
252
253 write_lock_bh(&conn->lock);
254
255 /* set the channel for this call */
256 call = conn->channels[candidate->channel];
257 _debug("channel[%u] is %p", candidate->channel, call);
258 if (call && call->call_id == hdr->callNumber) {
259 /* already set; must've been a duplicate packet */
260 _debug("extant call [%d]", call->state);
261 ASSERTCMP(call->conn, ==, conn);
262
263 read_lock(&call->state_lock);
264 switch (call->state) {
265 case RXRPC_CALL_LOCALLY_ABORTED:
266 if (!test_and_set_bit(RXRPC_CALL_ABORT, &call->events))
267 rxrpc_queue_call(call);
268 case RXRPC_CALL_REMOTELY_ABORTED:
269 read_unlock(&call->state_lock);
270 goto aborted_call;
271 default:
272 rxrpc_get_call(call);
273 read_unlock(&call->state_lock);
274 goto extant_call;
275 }
276 }
277
278 if (call) {
279 /* it seems the channel is still in use from the previous call
280 * - ditch the old binding if its call is now complete */
281 _debug("CALL: %u { %s }",
282 call->debug_id, rxrpc_call_states[call->state]);
283
284 if (call->state >= RXRPC_CALL_COMPLETE) {
285 conn->channels[call->channel] = NULL;
286 } else {
287 write_unlock_bh(&conn->lock);
288 kmem_cache_free(rxrpc_call_jar, candidate);
289 _leave(" = -EBUSY");
290 return ERR_PTR(-EBUSY);
291 }
292 }
293
294 /* check the call number isn't duplicate */
295 _debug("check dup");
296 call_id = hdr->callNumber;
297 p = &conn->calls.rb_node;
298 parent = NULL;
299 while (*p) {
300 parent = *p;
301 call = rb_entry(parent, struct rxrpc_call, conn_node);
302
303 if (call_id < call->call_id)
304 p = &(*p)->rb_left;
305 else if (call_id > call->call_id)
306 p = &(*p)->rb_right;
307 else
308 goto old_call;
309 }
310
311 /* make the call available */
312 _debug("new call");
313 call = candidate;
314 candidate = NULL;
315 rb_link_node(&call->conn_node, parent, p);
316 rb_insert_color(&call->conn_node, &conn->calls);
317 conn->channels[call->channel] = call;
318 sock_hold(&rx->sk);
319 atomic_inc(&conn->usage);
320 write_unlock_bh(&conn->lock);
321
322 spin_lock(&conn->trans->peer->lock);
323 list_add(&call->error_link, &conn->trans->peer->error_targets);
324 spin_unlock(&conn->trans->peer->lock);
325
326 write_lock_bh(&rxrpc_call_lock);
327 list_add_tail(&call->link, &rxrpc_calls);
328 write_unlock_bh(&rxrpc_call_lock);
329
330 _net("CALL incoming %d on CONN %d", call->debug_id, call->conn->debug_id);
331
332 call->lifetimer.expires = jiffies + rxrpc_call_max_lifetime * HZ;
333 add_timer(&call->lifetimer);
334 _leave(" = %p {%d} [new]", call, call->debug_id);
335 return call;
336
337extant_call:
338 write_unlock_bh(&conn->lock);
339 kmem_cache_free(rxrpc_call_jar, candidate);
340 _leave(" = %p {%d} [extant]", call, call ? call->debug_id : -1);
341 return call;
342
343aborted_call:
344 write_unlock_bh(&conn->lock);
345 kmem_cache_free(rxrpc_call_jar, candidate);
346 _leave(" = -ECONNABORTED");
347 return ERR_PTR(-ECONNABORTED);
348
349old_call:
350 write_unlock_bh(&conn->lock);
351 kmem_cache_free(rxrpc_call_jar, candidate);
352 _leave(" = -ECONNRESET [old]");
353 return ERR_PTR(-ECONNRESET);
354}
355
356/*
357 * find an extant server call
358 * - called in process context with IRQs enabled
359 */
360struct rxrpc_call *rxrpc_find_server_call(struct rxrpc_sock *rx,
361 unsigned long user_call_ID)
362{
363 struct rxrpc_call *call;
364 struct rb_node *p;
365
366 _enter("%p,%lx", rx, user_call_ID);
367
368 /* search the extant calls for one that matches the specified user
369 * ID */
370 read_lock(&rx->call_lock);
371
372 p = rx->calls.rb_node;
373 while (p) {
374 call = rb_entry(p, struct rxrpc_call, sock_node);
375
376 if (user_call_ID < call->user_call_ID)
377 p = p->rb_left;
378 else if (user_call_ID > call->user_call_ID)
379 p = p->rb_right;
380 else
381 goto found_extant_call;
382 }
383
384 read_unlock(&rx->call_lock);
385 _leave(" = NULL");
386 return NULL;
387
388 /* we found the call in the list immediately */
389found_extant_call:
390 rxrpc_get_call(call);
391 read_unlock(&rx->call_lock);
392 _leave(" = %p [%d]", call, atomic_read(&call->usage));
393 return call;
394}
395
396/*
397 * detach a call from a socket and set up for release
398 */
399void rxrpc_release_call(struct rxrpc_call *call)
400{
401 struct rxrpc_connection *conn = call->conn;
402 struct rxrpc_sock *rx = call->socket;
403
404 _enter("{%d,%d,%d,%d}",
405 call->debug_id, atomic_read(&call->usage),
406 atomic_read(&call->ackr_not_idle),
407 call->rx_first_oos);
408
409 spin_lock_bh(&call->lock);
410 if (test_and_set_bit(RXRPC_CALL_RELEASED, &call->flags))
411 BUG();
412 spin_unlock_bh(&call->lock);
413
414 /* dissociate from the socket
415 * - the socket's ref on the call is passed to the death timer
416 */
417 _debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, conn);
418
419 write_lock_bh(&rx->call_lock);
420 if (!list_empty(&call->accept_link)) {
421 _debug("unlinking once-pending call %p { e=%lx f=%lx }",
422 call, call->events, call->flags);
423 ASSERT(!test_bit(RXRPC_CALL_HAS_USERID, &call->flags));
424 list_del_init(&call->accept_link);
425 sk_acceptq_removed(&rx->sk);
426 } else if (test_bit(RXRPC_CALL_HAS_USERID, &call->flags)) {
427 rb_erase(&call->sock_node, &rx->calls);
428 memset(&call->sock_node, 0xdd, sizeof(call->sock_node));
429 clear_bit(RXRPC_CALL_HAS_USERID, &call->flags);
430 }
431 write_unlock_bh(&rx->call_lock);
432
433 /* free up the channel for reuse */
434 spin_lock(&conn->trans->client_lock);
435 write_lock_bh(&conn->lock);
436 write_lock(&call->state_lock);
437
438 if (conn->channels[call->channel] == call)
439 conn->channels[call->channel] = NULL;
440
441 if (conn->out_clientflag && conn->bundle) {
442 conn->avail_calls++;
443 switch (conn->avail_calls) {
444 case 1:
445 list_move_tail(&conn->bundle_link,
446 &conn->bundle->avail_conns);
447 case 2 ... RXRPC_MAXCALLS - 1:
448 ASSERT(conn->channels[0] == NULL ||
449 conn->channels[1] == NULL ||
450 conn->channels[2] == NULL ||
451 conn->channels[3] == NULL);
452 break;
453 case RXRPC_MAXCALLS:
454 list_move_tail(&conn->bundle_link,
455 &conn->bundle->unused_conns);
456 ASSERT(conn->channels[0] == NULL &&
457 conn->channels[1] == NULL &&
458 conn->channels[2] == NULL &&
459 conn->channels[3] == NULL);
460 break;
461 default:
462 printk(KERN_ERR "RxRPC: conn->avail_calls=%d\n",
463 conn->avail_calls);
464 BUG();
465 }
466 }
467
468 spin_unlock(&conn->trans->client_lock);
469
470 if (call->state < RXRPC_CALL_COMPLETE &&
471 call->state != RXRPC_CALL_CLIENT_FINAL_ACK) {
472 _debug("+++ ABORTING STATE %d +++\n", call->state);
473 call->state = RXRPC_CALL_LOCALLY_ABORTED;
474 call->abort_code = RX_CALL_DEAD;
475 set_bit(RXRPC_CALL_ABORT, &call->events);
476 rxrpc_queue_call(call);
477 }
478 write_unlock(&call->state_lock);
479 write_unlock_bh(&conn->lock);
480
481 /* clean up the Rx queue */
482 if (!skb_queue_empty(&call->rx_queue) ||
483 !skb_queue_empty(&call->rx_oos_queue)) {
484 struct rxrpc_skb_priv *sp;
485 struct sk_buff *skb;
486
487 _debug("purge Rx queues");
488
489 spin_lock_bh(&call->lock);
490 while ((skb = skb_dequeue(&call->rx_queue)) ||
491 (skb = skb_dequeue(&call->rx_oos_queue))) {
492 sp = rxrpc_skb(skb);
493 if (sp->call) {
494 ASSERTCMP(sp->call, ==, call);
495 rxrpc_put_call(call);
496 sp->call = NULL;
497 }
498 skb->destructor = NULL;
499 spin_unlock_bh(&call->lock);
500
501 _debug("- zap %s %%%u #%u",
502 rxrpc_pkts[sp->hdr.type],
503 ntohl(sp->hdr.serial),
504 ntohl(sp->hdr.seq));
505 rxrpc_free_skb(skb);
506 spin_lock_bh(&call->lock);
507 }
508 spin_unlock_bh(&call->lock);
509
510 ASSERTCMP(call->state, !=, RXRPC_CALL_COMPLETE);
511 }
512
513 del_timer_sync(&call->resend_timer);
514 del_timer_sync(&call->ack_timer);
515 del_timer_sync(&call->lifetimer);
516 call->deadspan.expires = jiffies + rxrpc_dead_call_timeout * HZ;
517 add_timer(&call->deadspan);
518
519 _leave("");
520}
521
522/*
523 * handle a dead call being ready for reaping
524 */
525static void rxrpc_dead_call_expired(unsigned long _call)
526{
527 struct rxrpc_call *call = (struct rxrpc_call *) _call;
528
529 _enter("{%d}", call->debug_id);
530
531 write_lock_bh(&call->state_lock);
532 call->state = RXRPC_CALL_DEAD;
533 write_unlock_bh(&call->state_lock);
534 rxrpc_put_call(call);
535}
536
537/*
538 * mark a call as to be released, aborting it if it's still in progress
539 * - called with softirqs disabled
540 */
541static void rxrpc_mark_call_released(struct rxrpc_call *call)
542{
543 bool sched;
544
545 write_lock(&call->state_lock);
546 if (call->state < RXRPC_CALL_DEAD) {
547 sched = false;
548 if (call->state < RXRPC_CALL_COMPLETE) {
549 _debug("abort call %p", call);
550 call->state = RXRPC_CALL_LOCALLY_ABORTED;
551 call->abort_code = RX_CALL_DEAD;
552 if (!test_and_set_bit(RXRPC_CALL_ABORT, &call->events))
553 sched = true;
554 }
555 if (!test_and_set_bit(RXRPC_CALL_RELEASE, &call->events))
556 sched = true;
557 if (sched)
558 rxrpc_queue_call(call);
559 }
560 write_unlock(&call->state_lock);
561}
562
563/*
564 * release all the calls associated with a socket
565 */
566void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx)
567{
568 struct rxrpc_call *call;
569 struct rb_node *p;
570
571 _enter("%p", rx);
572
573 read_lock_bh(&rx->call_lock);
574
575 /* mark all the calls as no longer wanting incoming packets */
576 for (p = rb_first(&rx->calls); p; p = rb_next(p)) {
577 call = rb_entry(p, struct rxrpc_call, sock_node);
578 rxrpc_mark_call_released(call);
579 }
580
581 /* kill the not-yet-accepted incoming calls */
582 list_for_each_entry(call, &rx->secureq, accept_link) {
583 rxrpc_mark_call_released(call);
584 }
585
586 list_for_each_entry(call, &rx->acceptq, accept_link) {
587 rxrpc_mark_call_released(call);
588 }
589
590 read_unlock_bh(&rx->call_lock);
591 _leave("");
592}
593
594/*
595 * release a call
596 */
597void __rxrpc_put_call(struct rxrpc_call *call)
598{
599 ASSERT(call != NULL);
600
601 _enter("%p{u=%d}", call, atomic_read(&call->usage));
602
603 ASSERTCMP(atomic_read(&call->usage), >, 0);
604
605 if (atomic_dec_and_test(&call->usage)) {
606 _debug("call %d dead", call->debug_id);
607 ASSERTCMP(call->state, ==, RXRPC_CALL_DEAD);
608 rxrpc_queue_work(&call->destroyer);
609 }
610 _leave("");
611}
612
613/*
614 * clean up a call
615 */
616static void rxrpc_cleanup_call(struct rxrpc_call *call)
617{
618 _net("DESTROY CALL %d", call->debug_id);
619
620 ASSERT(call->socket);
621
622 memset(&call->sock_node, 0xcd, sizeof(call->sock_node));
623
624 del_timer_sync(&call->lifetimer);
625 del_timer_sync(&call->deadspan);
626 del_timer_sync(&call->ack_timer);
627 del_timer_sync(&call->resend_timer);
628
629 ASSERT(test_bit(RXRPC_CALL_RELEASED, &call->flags));
630 ASSERTCMP(call->events, ==, 0);
631 if (work_pending(&call->processor)) {
632 _debug("defer destroy");
633 rxrpc_queue_work(&call->destroyer);
634 return;
635 }
636
637 if (call->conn) {
638 spin_lock(&call->conn->trans->peer->lock);
639 list_del(&call->error_link);
640 spin_unlock(&call->conn->trans->peer->lock);
641
642 write_lock_bh(&call->conn->lock);
643 rb_erase(&call->conn_node, &call->conn->calls);
644 write_unlock_bh(&call->conn->lock);
645 rxrpc_put_connection(call->conn);
646 }
647
648 if (call->acks_window) {
649 _debug("kill Tx window %d",
650 CIRC_CNT(call->acks_head, call->acks_tail,
651 call->acks_winsz));
652 smp_mb();
653 while (CIRC_CNT(call->acks_head, call->acks_tail,
654 call->acks_winsz) > 0) {
655 struct rxrpc_skb_priv *sp;
656 unsigned long _skb;
657
658 _skb = call->acks_window[call->acks_tail] & ~1;
659 sp = rxrpc_skb((struct sk_buff *) _skb);
660 _debug("+++ clear Tx %u", ntohl(sp->hdr.seq));
661 rxrpc_free_skb((struct sk_buff *) _skb);
662 call->acks_tail =
663 (call->acks_tail + 1) & (call->acks_winsz - 1);
664 }
665
666 kfree(call->acks_window);
667 }
668
669 rxrpc_free_skb(call->tx_pending);
670
671 rxrpc_purge_queue(&call->rx_queue);
672 ASSERT(skb_queue_empty(&call->rx_oos_queue));
673 sock_put(&call->socket->sk);
674 kmem_cache_free(rxrpc_call_jar, call);
675}
676
677/*
678 * destroy a call
679 */
680static void rxrpc_destroy_call(struct work_struct *work)
681{
682 struct rxrpc_call *call =
683 container_of(work, struct rxrpc_call, destroyer);
684
685 _enter("%p{%d,%d,%p}",
686 call, atomic_read(&call->usage), call->channel, call->conn);
687
688 ASSERTCMP(call->state, ==, RXRPC_CALL_DEAD);
689
690 write_lock_bh(&rxrpc_call_lock);
691 list_del_init(&call->link);
692 write_unlock_bh(&rxrpc_call_lock);
693
694 rxrpc_cleanup_call(call);
695 _leave("");
696}
697
698/*
699 * preemptively destroy all the call records from a transport endpoint rather
700 * than waiting for them to time out
701 */
702void __exit rxrpc_destroy_all_calls(void)
703{
704 struct rxrpc_call *call;
705
706 _enter("");
707 write_lock_bh(&rxrpc_call_lock);
708
709 while (!list_empty(&rxrpc_calls)) {
710 call = list_entry(rxrpc_calls.next, struct rxrpc_call, link);
711 _debug("Zapping call %p", call);
712
713 list_del_init(&call->link);
714
715 switch (atomic_read(&call->usage)) {
716 case 0:
717 ASSERTCMP(call->state, ==, RXRPC_CALL_DEAD);
718 break;
719 case 1:
720 if (del_timer_sync(&call->deadspan) != 0 &&
721 call->state != RXRPC_CALL_DEAD)
722 rxrpc_dead_call_expired((unsigned long) call);
723 if (call->state != RXRPC_CALL_DEAD)
724 break;
725 default:
726 printk(KERN_ERR "RXRPC:"
727 " Call %p still in use (%d,%d,%s,%lx,%lx)!\n",
728 call, atomic_read(&call->usage),
729 atomic_read(&call->ackr_not_idle),
730 rxrpc_call_states[call->state],
731 call->flags, call->events);
732 if (!skb_queue_empty(&call->rx_queue))
733 printk(KERN_ERR"RXRPC: Rx queue occupied\n");
734 if (!skb_queue_empty(&call->rx_oos_queue))
735 printk(KERN_ERR"RXRPC: OOS queue occupied\n");
736 break;
737 }
738
739 write_unlock_bh(&rxrpc_call_lock);
740 cond_resched();
741 write_lock_bh(&rxrpc_call_lock);
742 }
743
744 write_unlock_bh(&rxrpc_call_lock);
745 _leave("");
746}
747
748/*
749 * handle call lifetime being exceeded
750 */
751static void rxrpc_call_life_expired(unsigned long _call)
752{
753 struct rxrpc_call *call = (struct rxrpc_call *) _call;
754
755 if (call->state >= RXRPC_CALL_COMPLETE)
756 return;
757
758 _enter("{%d}", call->debug_id);
759 read_lock_bh(&call->state_lock);
760 if (call->state < RXRPC_CALL_COMPLETE) {
761 set_bit(RXRPC_CALL_LIFE_TIMER, &call->events);
762 rxrpc_queue_call(call);
763 }
764 read_unlock_bh(&call->state_lock);
765}
766
767/*
768 * handle resend timer expiry
769 */
770static void rxrpc_resend_time_expired(unsigned long _call)
771{
772 struct rxrpc_call *call = (struct rxrpc_call *) _call;
773
774 _enter("{%d}", call->debug_id);
775
776 if (call->state >= RXRPC_CALL_COMPLETE)
777 return;
778
779 read_lock_bh(&call->state_lock);
780 clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
781 if (call->state < RXRPC_CALL_COMPLETE &&
782 !test_and_set_bit(RXRPC_CALL_RESEND_TIMER, &call->events))
783 rxrpc_queue_call(call);
784 read_unlock_bh(&call->state_lock);
785}
786
787/*
788 * handle ACK timer expiry
789 */
790static void rxrpc_ack_time_expired(unsigned long _call)
791{
792 struct rxrpc_call *call = (struct rxrpc_call *) _call;
793
794 _enter("{%d}", call->debug_id);
795
796 if (call->state >= RXRPC_CALL_COMPLETE)
797 return;
798
799 read_lock_bh(&call->state_lock);
800 if (call->state < RXRPC_CALL_COMPLETE &&
801 !test_and_set_bit(RXRPC_CALL_ACK, &call->events))
802 rxrpc_queue_call(call);
803 read_unlock_bh(&call->state_lock);
804}
diff --git a/net/rxrpc/ar-connection.c b/net/rxrpc/ar-connection.c
new file mode 100644
index 000000000000..43cb3e051ece
--- /dev/null
+++ b/net/rxrpc/ar-connection.c
@@ -0,0 +1,911 @@
1/* RxRPC virtual connection handler
2 *
3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/module.h>
13#include <linux/net.h>
14#include <linux/skbuff.h>
15#include <linux/crypto.h>
16#include <net/sock.h>
17#include <net/af_rxrpc.h>
18#include "ar-internal.h"
19
20static void rxrpc_connection_reaper(struct work_struct *work);
21
22LIST_HEAD(rxrpc_connections);
23DEFINE_RWLOCK(rxrpc_connection_lock);
24static unsigned long rxrpc_connection_timeout = 10 * 60;
25static DECLARE_DELAYED_WORK(rxrpc_connection_reap, rxrpc_connection_reaper);
26
27/*
28 * allocate a new client connection bundle
29 */
30static struct rxrpc_conn_bundle *rxrpc_alloc_bundle(gfp_t gfp)
31{
32 struct rxrpc_conn_bundle *bundle;
33
34 _enter("");
35
36 bundle = kzalloc(sizeof(struct rxrpc_conn_bundle), gfp);
37 if (bundle) {
38 INIT_LIST_HEAD(&bundle->unused_conns);
39 INIT_LIST_HEAD(&bundle->avail_conns);
40 INIT_LIST_HEAD(&bundle->busy_conns);
41 init_waitqueue_head(&bundle->chanwait);
42 atomic_set(&bundle->usage, 1);
43 }
44
45 _leave(" = %p", bundle);
46 return bundle;
47}
48
49/*
50 * compare bundle parameters with what we're looking for
51 * - return -ve, 0 or +ve
52 */
53static inline
54int rxrpc_cmp_bundle(const struct rxrpc_conn_bundle *bundle,
55 struct key *key, __be16 service_id)
56{
57 return (bundle->service_id - service_id) ?:
58 ((unsigned long) bundle->key - (unsigned long) key);
59}
60
61/*
62 * get bundle of client connections that a client socket can make use of
63 */
64struct rxrpc_conn_bundle *rxrpc_get_bundle(struct rxrpc_sock *rx,
65 struct rxrpc_transport *trans,
66 struct key *key,
67 __be16 service_id,
68 gfp_t gfp)
69{
70 struct rxrpc_conn_bundle *bundle, *candidate;
71 struct rb_node *p, *parent, **pp;
72
73 _enter("%p{%x},%x,%hx,",
74 rx, key_serial(key), trans->debug_id, ntohl(service_id));
75
76 if (rx->trans == trans && rx->bundle) {
77 atomic_inc(&rx->bundle->usage);
78 return rx->bundle;
79 }
80
81 /* search the extant bundles first for one that matches the specified
82 * user ID */
83 spin_lock(&trans->client_lock);
84
85 p = trans->bundles.rb_node;
86 while (p) {
87 bundle = rb_entry(p, struct rxrpc_conn_bundle, node);
88
89 if (rxrpc_cmp_bundle(bundle, key, service_id) < 0)
90 p = p->rb_left;
91 else if (rxrpc_cmp_bundle(bundle, key, service_id) > 0)
92 p = p->rb_right;
93 else
94 goto found_extant_bundle;
95 }
96
97 spin_unlock(&trans->client_lock);
98
99 /* not yet present - create a candidate for a new record and then
100 * redo the search */
101 candidate = rxrpc_alloc_bundle(gfp);
102 if (!candidate) {
103 _leave(" = -ENOMEM");
104 return ERR_PTR(-ENOMEM);
105 }
106
107 candidate->key = key_get(key);
108 candidate->service_id = service_id;
109
110 spin_lock(&trans->client_lock);
111
112 pp = &trans->bundles.rb_node;
113 parent = NULL;
114 while (*pp) {
115 parent = *pp;
116 bundle = rb_entry(parent, struct rxrpc_conn_bundle, node);
117
118 if (rxrpc_cmp_bundle(bundle, key, service_id) < 0)
119 pp = &(*pp)->rb_left;
120 else if (rxrpc_cmp_bundle(bundle, key, service_id) > 0)
121 pp = &(*pp)->rb_right;
122 else
123 goto found_extant_second;
124 }
125
126 /* second search also failed; add the new bundle */
127 bundle = candidate;
128 candidate = NULL;
129
130 rb_link_node(&bundle->node, parent, pp);
131 rb_insert_color(&bundle->node, &trans->bundles);
132 spin_unlock(&trans->client_lock);
133 _net("BUNDLE new on trans %d", trans->debug_id);
134 if (!rx->bundle && rx->sk.sk_state == RXRPC_CLIENT_CONNECTED) {
135 atomic_inc(&bundle->usage);
136 rx->bundle = bundle;
137 }
138 _leave(" = %p [new]", bundle);
139 return bundle;
140
141 /* we found the bundle in the list immediately */
142found_extant_bundle:
143 atomic_inc(&bundle->usage);
144 spin_unlock(&trans->client_lock);
145 _net("BUNDLE old on trans %d", trans->debug_id);
146 if (!rx->bundle && rx->sk.sk_state == RXRPC_CLIENT_CONNECTED) {
147 atomic_inc(&bundle->usage);
148 rx->bundle = bundle;
149 }
150 _leave(" = %p [extant %d]", bundle, atomic_read(&bundle->usage));
151 return bundle;
152
153 /* we found the bundle on the second time through the list */
154found_extant_second:
155 atomic_inc(&bundle->usage);
156 spin_unlock(&trans->client_lock);
157 kfree(candidate);
158 _net("BUNDLE old2 on trans %d", trans->debug_id);
159 if (!rx->bundle && rx->sk.sk_state == RXRPC_CLIENT_CONNECTED) {
160 atomic_inc(&bundle->usage);
161 rx->bundle = bundle;
162 }
163 _leave(" = %p [second %d]", bundle, atomic_read(&bundle->usage));
164 return bundle;
165}
166
167/*
168 * release a bundle
169 */
170void rxrpc_put_bundle(struct rxrpc_transport *trans,
171 struct rxrpc_conn_bundle *bundle)
172{
173 _enter("%p,%p{%d}",trans, bundle, atomic_read(&bundle->usage));
174
175 if (atomic_dec_and_lock(&bundle->usage, &trans->client_lock)) {
176 _debug("Destroy bundle");
177 rb_erase(&bundle->node, &trans->bundles);
178 spin_unlock(&trans->client_lock);
179 ASSERT(list_empty(&bundle->unused_conns));
180 ASSERT(list_empty(&bundle->avail_conns));
181 ASSERT(list_empty(&bundle->busy_conns));
182 ASSERTCMP(bundle->num_conns, ==, 0);
183 key_put(bundle->key);
184 kfree(bundle);
185 }
186
187 _leave("");
188}
189
190/*
191 * allocate a new connection
192 */
193static struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp)
194{
195 struct rxrpc_connection *conn;
196
197 _enter("");
198
199 conn = kzalloc(sizeof(struct rxrpc_connection), gfp);
200 if (conn) {
201 INIT_WORK(&conn->processor, &rxrpc_process_connection);
202 INIT_LIST_HEAD(&conn->bundle_link);
203 conn->calls = RB_ROOT;
204 skb_queue_head_init(&conn->rx_queue);
205 rwlock_init(&conn->lock);
206 spin_lock_init(&conn->state_lock);
207 atomic_set(&conn->usage, 1);
208 conn->debug_id = atomic_inc_return(&rxrpc_debug_id);
209 conn->avail_calls = RXRPC_MAXCALLS;
210 conn->size_align = 4;
211 conn->header_size = sizeof(struct rxrpc_header);
212 }
213
214 _leave(" = %p{%d}", conn, conn->debug_id);
215 return conn;
216}
217
218/*
219 * assign a connection ID to a connection and add it to the transport's
220 * connection lookup tree
221 * - called with transport client lock held
222 */
223static void rxrpc_assign_connection_id(struct rxrpc_connection *conn)
224{
225 struct rxrpc_connection *xconn;
226 struct rb_node *parent, **p;
227 __be32 epoch;
228 u32 real_conn_id;
229
230 _enter("");
231
232 epoch = conn->epoch;
233
234 write_lock_bh(&conn->trans->conn_lock);
235
236 conn->trans->conn_idcounter += RXRPC_CID_INC;
237 if (conn->trans->conn_idcounter < RXRPC_CID_INC)
238 conn->trans->conn_idcounter = RXRPC_CID_INC;
239 real_conn_id = conn->trans->conn_idcounter;
240
241attempt_insertion:
242 parent = NULL;
243 p = &conn->trans->client_conns.rb_node;
244
245 while (*p) {
246 parent = *p;
247 xconn = rb_entry(parent, struct rxrpc_connection, node);
248
249 if (epoch < xconn->epoch)
250 p = &(*p)->rb_left;
251 else if (epoch > xconn->epoch)
252 p = &(*p)->rb_right;
253 else if (real_conn_id < xconn->real_conn_id)
254 p = &(*p)->rb_left;
255 else if (real_conn_id > xconn->real_conn_id)
256 p = &(*p)->rb_right;
257 else
258 goto id_exists;
259 }
260
261 /* we've found a suitable hole - arrange for this connection to occupy
262 * it */
263 rb_link_node(&conn->node, parent, p);
264 rb_insert_color(&conn->node, &conn->trans->client_conns);
265
266 conn->real_conn_id = real_conn_id;
267 conn->cid = htonl(real_conn_id);
268 write_unlock_bh(&conn->trans->conn_lock);
269 _leave(" [CONNID %x CID %x]", real_conn_id, ntohl(conn->cid));
270 return;
271
272 /* we found a connection with the proposed ID - walk the tree from that
273 * point looking for the next unused ID */
274id_exists:
275 for (;;) {
276 real_conn_id += RXRPC_CID_INC;
277 if (real_conn_id < RXRPC_CID_INC) {
278 real_conn_id = RXRPC_CID_INC;
279 conn->trans->conn_idcounter = real_conn_id;
280 goto attempt_insertion;
281 }
282
283 parent = rb_next(parent);
284 if (!parent)
285 goto attempt_insertion;
286
287 xconn = rb_entry(parent, struct rxrpc_connection, node);
288 if (epoch < xconn->epoch ||
289 real_conn_id < xconn->real_conn_id)
290 goto attempt_insertion;
291 }
292}
293
294/*
295 * add a call to a connection's call-by-ID tree
296 */
297static void rxrpc_add_call_ID_to_conn(struct rxrpc_connection *conn,
298 struct rxrpc_call *call)
299{
300 struct rxrpc_call *xcall;
301 struct rb_node *parent, **p;
302 __be32 call_id;
303
304 write_lock_bh(&conn->lock);
305
306 call_id = call->call_id;
307 p = &conn->calls.rb_node;
308 parent = NULL;
309 while (*p) {
310 parent = *p;
311 xcall = rb_entry(parent, struct rxrpc_call, conn_node);
312
313 if (call_id < xcall->call_id)
314 p = &(*p)->rb_left;
315 else if (call_id > xcall->call_id)
316 p = &(*p)->rb_right;
317 else
318 BUG();
319 }
320
321 rb_link_node(&call->conn_node, parent, p);
322 rb_insert_color(&call->conn_node, &conn->calls);
323
324 write_unlock_bh(&conn->lock);
325}
326
327/*
328 * connect a call on an exclusive connection
329 */
330static int rxrpc_connect_exclusive(struct rxrpc_sock *rx,
331 struct rxrpc_transport *trans,
332 __be16 service_id,
333 struct rxrpc_call *call,
334 gfp_t gfp)
335{
336 struct rxrpc_connection *conn;
337 int chan, ret;
338
339 _enter("");
340
341 conn = rx->conn;
342 if (!conn) {
343 /* not yet present - create a candidate for a new connection
344 * and then redo the check */
345 conn = rxrpc_alloc_connection(gfp);
346 if (IS_ERR(conn)) {
347 _leave(" = %ld", PTR_ERR(conn));
348 return PTR_ERR(conn);
349 }
350
351 conn->trans = trans;
352 conn->bundle = NULL;
353 conn->service_id = service_id;
354 conn->epoch = rxrpc_epoch;
355 conn->in_clientflag = 0;
356 conn->out_clientflag = RXRPC_CLIENT_INITIATED;
357 conn->cid = 0;
358 conn->state = RXRPC_CONN_CLIENT;
359 conn->avail_calls = RXRPC_MAXCALLS - 1;
360 conn->security_level = rx->min_sec_level;
361 conn->key = key_get(rx->key);
362
363 ret = rxrpc_init_client_conn_security(conn);
364 if (ret < 0) {
365 key_put(conn->key);
366 kfree(conn);
367 _leave(" = %d [key]", ret);
368 return ret;
369 }
370
371 write_lock_bh(&rxrpc_connection_lock);
372 list_add_tail(&conn->link, &rxrpc_connections);
373 write_unlock_bh(&rxrpc_connection_lock);
374
375 spin_lock(&trans->client_lock);
376 atomic_inc(&trans->usage);
377
378 _net("CONNECT EXCL new %d on TRANS %d",
379 conn->debug_id, conn->trans->debug_id);
380
381 rxrpc_assign_connection_id(conn);
382 rx->conn = conn;
383 }
384
385 /* we've got a connection with a free channel and we can now attach the
386 * call to it
387 * - we're holding the transport's client lock
388 * - we're holding a reference on the connection
389 */
390 for (chan = 0; chan < RXRPC_MAXCALLS; chan++)
391 if (!conn->channels[chan])
392 goto found_channel;
393 goto no_free_channels;
394
395found_channel:
396 atomic_inc(&conn->usage);
397 conn->channels[chan] = call;
398 call->conn = conn;
399 call->channel = chan;
400 call->cid = conn->cid | htonl(chan);
401 call->call_id = htonl(++conn->call_counter);
402
403 _net("CONNECT client on conn %d chan %d as call %x",
404 conn->debug_id, chan, ntohl(call->call_id));
405
406 spin_unlock(&trans->client_lock);
407
408 rxrpc_add_call_ID_to_conn(conn, call);
409 _leave(" = 0");
410 return 0;
411
412no_free_channels:
413 spin_unlock(&trans->client_lock);
414 _leave(" = -ENOSR");
415 return -ENOSR;
416}
417
418/*
419 * find a connection for a call
420 * - called in process context with IRQs enabled
421 */
422int rxrpc_connect_call(struct rxrpc_sock *rx,
423 struct rxrpc_transport *trans,
424 struct rxrpc_conn_bundle *bundle,
425 struct rxrpc_call *call,
426 gfp_t gfp)
427{
428 struct rxrpc_connection *conn, *candidate;
429 int chan, ret;
430
431 DECLARE_WAITQUEUE(myself, current);
432
433 _enter("%p,%lx,", rx, call->user_call_ID);
434
435 if (test_bit(RXRPC_SOCK_EXCLUSIVE_CONN, &rx->flags))
436 return rxrpc_connect_exclusive(rx, trans, bundle->service_id,
437 call, gfp);
438
439 spin_lock(&trans->client_lock);
440 for (;;) {
441 /* see if the bundle has a call slot available */
442 if (!list_empty(&bundle->avail_conns)) {
443 _debug("avail");
444 conn = list_entry(bundle->avail_conns.next,
445 struct rxrpc_connection,
446 bundle_link);
447 if (--conn->avail_calls == 0)
448 list_move(&conn->bundle_link,
449 &bundle->busy_conns);
450 ASSERTCMP(conn->avail_calls, <, RXRPC_MAXCALLS);
451 ASSERT(conn->channels[0] == NULL ||
452 conn->channels[1] == NULL ||
453 conn->channels[2] == NULL ||
454 conn->channels[3] == NULL);
455 atomic_inc(&conn->usage);
456 break;
457 }
458
459 if (!list_empty(&bundle->unused_conns)) {
460 _debug("unused");
461 conn = list_entry(bundle->unused_conns.next,
462 struct rxrpc_connection,
463 bundle_link);
464 ASSERTCMP(conn->avail_calls, ==, RXRPC_MAXCALLS);
465 conn->avail_calls = RXRPC_MAXCALLS - 1;
466 ASSERT(conn->channels[0] == NULL &&
467 conn->channels[1] == NULL &&
468 conn->channels[2] == NULL &&
469 conn->channels[3] == NULL);
470 atomic_inc(&conn->usage);
471 list_move(&conn->bundle_link, &bundle->avail_conns);
472 break;
473 }
474
475 /* need to allocate a new connection */
476 _debug("get new conn [%d]", bundle->num_conns);
477
478 spin_unlock(&trans->client_lock);
479
480 if (signal_pending(current))
481 goto interrupted;
482
483 if (bundle->num_conns >= 20) {
484 _debug("too many conns");
485
486 if (!(gfp & __GFP_WAIT)) {
487 _leave(" = -EAGAIN");
488 return -EAGAIN;
489 }
490
491 add_wait_queue(&bundle->chanwait, &myself);
492 for (;;) {
493 set_current_state(TASK_INTERRUPTIBLE);
494 if (bundle->num_conns < 20 ||
495 !list_empty(&bundle->unused_conns) ||
496 !list_empty(&bundle->avail_conns))
497 break;
498 if (signal_pending(current))
499 goto interrupted_dequeue;
500 schedule();
501 }
502 remove_wait_queue(&bundle->chanwait, &myself);
503 __set_current_state(TASK_RUNNING);
504 spin_lock(&trans->client_lock);
505 continue;
506 }
507
508 /* not yet present - create a candidate for a new connection and then
509 * redo the check */
510 candidate = rxrpc_alloc_connection(gfp);
511 if (IS_ERR(candidate)) {
512 _leave(" = %ld", PTR_ERR(candidate));
513 return PTR_ERR(candidate);
514 }
515
516 candidate->trans = trans;
517 candidate->bundle = bundle;
518 candidate->service_id = bundle->service_id;
519 candidate->epoch = rxrpc_epoch;
520 candidate->in_clientflag = 0;
521 candidate->out_clientflag = RXRPC_CLIENT_INITIATED;
522 candidate->cid = 0;
523 candidate->state = RXRPC_CONN_CLIENT;
524 candidate->avail_calls = RXRPC_MAXCALLS;
525 candidate->security_level = rx->min_sec_level;
526 candidate->key = key_get(bundle->key);
527
528 ret = rxrpc_init_client_conn_security(candidate);
529 if (ret < 0) {
530 key_put(candidate->key);
531 kfree(candidate);
532 _leave(" = %d [key]", ret);
533 return ret;
534 }
535
536 write_lock_bh(&rxrpc_connection_lock);
537 list_add_tail(&candidate->link, &rxrpc_connections);
538 write_unlock_bh(&rxrpc_connection_lock);
539
540 spin_lock(&trans->client_lock);
541
542 list_add(&candidate->bundle_link, &bundle->unused_conns);
543 bundle->num_conns++;
544 atomic_inc(&bundle->usage);
545 atomic_inc(&trans->usage);
546
547 _net("CONNECT new %d on TRANS %d",
548 candidate->debug_id, candidate->trans->debug_id);
549
550 rxrpc_assign_connection_id(candidate);
551 if (candidate->security)
552 candidate->security->prime_packet_security(candidate);
553
554 /* leave the candidate lurking in zombie mode attached to the
555 * bundle until we're ready for it */
556 rxrpc_put_connection(candidate);
557 candidate = NULL;
558 }
559
560 /* we've got a connection with a free channel and we can now attach the
561 * call to it
562 * - we're holding the transport's client lock
563 * - we're holding a reference on the connection
564 * - we're holding a reference on the bundle
565 */
566 for (chan = 0; chan < RXRPC_MAXCALLS; chan++)
567 if (!conn->channels[chan])
568 goto found_channel;
569 ASSERT(conn->channels[0] == NULL ||
570 conn->channels[1] == NULL ||
571 conn->channels[2] == NULL ||
572 conn->channels[3] == NULL);
573 BUG();
574
575found_channel:
576 conn->channels[chan] = call;
577 call->conn = conn;
578 call->channel = chan;
579 call->cid = conn->cid | htonl(chan);
580 call->call_id = htonl(++conn->call_counter);
581
582 _net("CONNECT client on conn %d chan %d as call %x",
583 conn->debug_id, chan, ntohl(call->call_id));
584
585 ASSERTCMP(conn->avail_calls, <, RXRPC_MAXCALLS);
586 spin_unlock(&trans->client_lock);
587
588 rxrpc_add_call_ID_to_conn(conn, call);
589
590 _leave(" = 0");
591 return 0;
592
593interrupted_dequeue:
594 remove_wait_queue(&bundle->chanwait, &myself);
595 __set_current_state(TASK_RUNNING);
596interrupted:
597 _leave(" = -ERESTARTSYS");
598 return -ERESTARTSYS;
599}
600
601/*
602 * get a record of an incoming connection
603 */
604struct rxrpc_connection *
605rxrpc_incoming_connection(struct rxrpc_transport *trans,
606 struct rxrpc_header *hdr,
607 gfp_t gfp)
608{
609 struct rxrpc_connection *conn, *candidate = NULL;
610 struct rb_node *p, **pp;
611 const char *new = "old";
612 __be32 epoch;
613 u32 conn_id;
614
615 _enter("");
616
617 ASSERT(hdr->flags & RXRPC_CLIENT_INITIATED);
618
619 epoch = hdr->epoch;
620 conn_id = ntohl(hdr->cid) & RXRPC_CIDMASK;
621
622 /* search the connection list first */
623 read_lock_bh(&trans->conn_lock);
624
625 p = trans->server_conns.rb_node;
626 while (p) {
627 conn = rb_entry(p, struct rxrpc_connection, node);
628
629 _debug("maybe %x", conn->real_conn_id);
630
631 if (epoch < conn->epoch)
632 p = p->rb_left;
633 else if (epoch > conn->epoch)
634 p = p->rb_right;
635 else if (conn_id < conn->real_conn_id)
636 p = p->rb_left;
637 else if (conn_id > conn->real_conn_id)
638 p = p->rb_right;
639 else
640 goto found_extant_connection;
641 }
642 read_unlock_bh(&trans->conn_lock);
643
644 /* not yet present - create a candidate for a new record and then
645 * redo the search */
646 candidate = rxrpc_alloc_connection(gfp);
647 if (!candidate) {
648 _leave(" = -ENOMEM");
649 return ERR_PTR(-ENOMEM);
650 }
651
652 candidate->trans = trans;
653 candidate->epoch = hdr->epoch;
654 candidate->cid = hdr->cid & __constant_cpu_to_be32(RXRPC_CIDMASK);
655 candidate->service_id = hdr->serviceId;
656 candidate->security_ix = hdr->securityIndex;
657 candidate->in_clientflag = RXRPC_CLIENT_INITIATED;
658 candidate->out_clientflag = 0;
659 candidate->real_conn_id = conn_id;
660 candidate->state = RXRPC_CONN_SERVER;
661 if (candidate->service_id)
662 candidate->state = RXRPC_CONN_SERVER_UNSECURED;
663
664 write_lock_bh(&trans->conn_lock);
665
666 pp = &trans->server_conns.rb_node;
667 p = NULL;
668 while (*pp) {
669 p = *pp;
670 conn = rb_entry(p, struct rxrpc_connection, node);
671
672 if (epoch < conn->epoch)
673 pp = &(*pp)->rb_left;
674 else if (epoch > conn->epoch)
675 pp = &(*pp)->rb_right;
676 else if (conn_id < conn->real_conn_id)
677 pp = &(*pp)->rb_left;
678 else if (conn_id > conn->real_conn_id)
679 pp = &(*pp)->rb_right;
680 else
681 goto found_extant_second;
682 }
683
684 /* we can now add the new candidate to the list */
685 conn = candidate;
686 candidate = NULL;
687 rb_link_node(&conn->node, p, pp);
688 rb_insert_color(&conn->node, &trans->server_conns);
689 atomic_inc(&conn->trans->usage);
690
691 write_unlock_bh(&trans->conn_lock);
692
693 write_lock_bh(&rxrpc_connection_lock);
694 list_add_tail(&conn->link, &rxrpc_connections);
695 write_unlock_bh(&rxrpc_connection_lock);
696
697 new = "new";
698
699success:
700 _net("CONNECTION %s %d {%x}", new, conn->debug_id, conn->real_conn_id);
701
702 _leave(" = %p {u=%d}", conn, atomic_read(&conn->usage));
703 return conn;
704
705 /* we found the connection in the list immediately */
706found_extant_connection:
707 if (hdr->securityIndex != conn->security_ix) {
708 read_unlock_bh(&trans->conn_lock);
709 goto security_mismatch;
710 }
711 atomic_inc(&conn->usage);
712 read_unlock_bh(&trans->conn_lock);
713 goto success;
714
715 /* we found the connection on the second time through the list */
716found_extant_second:
717 if (hdr->securityIndex != conn->security_ix) {
718 write_unlock_bh(&trans->conn_lock);
719 goto security_mismatch;
720 }
721 atomic_inc(&conn->usage);
722 write_unlock_bh(&trans->conn_lock);
723 kfree(candidate);
724 goto success;
725
726security_mismatch:
727 kfree(candidate);
728 _leave(" = -EKEYREJECTED");
729 return ERR_PTR(-EKEYREJECTED);
730}
731
732/*
733 * find a connection based on transport and RxRPC connection ID for an incoming
734 * packet
735 */
736struct rxrpc_connection *rxrpc_find_connection(struct rxrpc_transport *trans,
737 struct rxrpc_header *hdr)
738{
739 struct rxrpc_connection *conn;
740 struct rb_node *p;
741 __be32 epoch;
742 u32 conn_id;
743
744 _enter(",{%x,%x}", ntohl(hdr->cid), hdr->flags);
745
746 read_lock_bh(&trans->conn_lock);
747
748 conn_id = ntohl(hdr->cid) & RXRPC_CIDMASK;
749 epoch = hdr->epoch;
750
751 if (hdr->flags & RXRPC_CLIENT_INITIATED)
752 p = trans->server_conns.rb_node;
753 else
754 p = trans->client_conns.rb_node;
755
756 while (p) {
757 conn = rb_entry(p, struct rxrpc_connection, node);
758
759 _debug("maybe %x", conn->real_conn_id);
760
761 if (epoch < conn->epoch)
762 p = p->rb_left;
763 else if (epoch > conn->epoch)
764 p = p->rb_right;
765 else if (conn_id < conn->real_conn_id)
766 p = p->rb_left;
767 else if (conn_id > conn->real_conn_id)
768 p = p->rb_right;
769 else
770 goto found;
771 }
772
773 read_unlock_bh(&trans->conn_lock);
774 _leave(" = NULL");
775 return NULL;
776
777found:
778 atomic_inc(&conn->usage);
779 read_unlock_bh(&trans->conn_lock);
780 _leave(" = %p", conn);
781 return conn;
782}
783
784/*
785 * release a virtual connection
786 */
787void rxrpc_put_connection(struct rxrpc_connection *conn)
788{
789 _enter("%p{u=%d,d=%d}",
790 conn, atomic_read(&conn->usage), conn->debug_id);
791
792 ASSERTCMP(atomic_read(&conn->usage), >, 0);
793
794 conn->put_time = xtime.tv_sec;
795 if (atomic_dec_and_test(&conn->usage)) {
796 _debug("zombie");
797 rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0);
798 }
799
800 _leave("");
801}
802
803/*
804 * destroy a virtual connection
805 */
806static void rxrpc_destroy_connection(struct rxrpc_connection *conn)
807{
808 _enter("%p{%d}", conn, atomic_read(&conn->usage));
809
810 ASSERTCMP(atomic_read(&conn->usage), ==, 0);
811
812 _net("DESTROY CONN %d", conn->debug_id);
813
814 if (conn->bundle)
815 rxrpc_put_bundle(conn->trans, conn->bundle);
816
817 ASSERT(RB_EMPTY_ROOT(&conn->calls));
818 rxrpc_purge_queue(&conn->rx_queue);
819
820 rxrpc_clear_conn_security(conn);
821 rxrpc_put_transport(conn->trans);
822 kfree(conn);
823 _leave("");
824}
825
826/*
827 * reap dead connections
828 */
829void rxrpc_connection_reaper(struct work_struct *work)
830{
831 struct rxrpc_connection *conn, *_p;
832 unsigned long now, earliest, reap_time;
833
834 LIST_HEAD(graveyard);
835
836 _enter("");
837
838 now = xtime.tv_sec;
839 earliest = ULONG_MAX;
840
841 write_lock_bh(&rxrpc_connection_lock);
842 list_for_each_entry_safe(conn, _p, &rxrpc_connections, link) {
843 _debug("reap CONN %d { u=%d,t=%ld }",
844 conn->debug_id, atomic_read(&conn->usage),
845 (long) now - (long) conn->put_time);
846
847 if (likely(atomic_read(&conn->usage) > 0))
848 continue;
849
850 spin_lock(&conn->trans->client_lock);
851 write_lock(&conn->trans->conn_lock);
852 reap_time = conn->put_time + rxrpc_connection_timeout;
853
854 if (atomic_read(&conn->usage) > 0) {
855 ;
856 } else if (reap_time <= now) {
857 list_move_tail(&conn->link, &graveyard);
858 if (conn->out_clientflag)
859 rb_erase(&conn->node,
860 &conn->trans->client_conns);
861 else
862 rb_erase(&conn->node,
863 &conn->trans->server_conns);
864 if (conn->bundle) {
865 list_del_init(&conn->bundle_link);
866 conn->bundle->num_conns--;
867 }
868
869 } else if (reap_time < earliest) {
870 earliest = reap_time;
871 }
872
873 write_unlock(&conn->trans->conn_lock);
874 spin_unlock(&conn->trans->client_lock);
875 }
876 write_unlock_bh(&rxrpc_connection_lock);
877
878 if (earliest != ULONG_MAX) {
879 _debug("reschedule reaper %ld", (long) earliest - now);
880 ASSERTCMP(earliest, >, now);
881 rxrpc_queue_delayed_work(&rxrpc_connection_reap,
882 (earliest - now) * HZ);
883 }
884
885 /* then destroy all those pulled out */
886 while (!list_empty(&graveyard)) {
887 conn = list_entry(graveyard.next, struct rxrpc_connection,
888 link);
889 list_del_init(&conn->link);
890
891 ASSERTCMP(atomic_read(&conn->usage), ==, 0);
892 rxrpc_destroy_connection(conn);
893 }
894
895 _leave("");
896}
897
898/*
899 * preemptively destroy all the connection records rather than waiting for them
900 * to time out
901 */
902void __exit rxrpc_destroy_all_connections(void)
903{
904 _enter("");
905
906 rxrpc_connection_timeout = 0;
907 cancel_delayed_work(&rxrpc_connection_reap);
908 rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0);
909
910 _leave("");
911}
diff --git a/net/rxrpc/ar-connevent.c b/net/rxrpc/ar-connevent.c
new file mode 100644
index 000000000000..1ada43d51165
--- /dev/null
+++ b/net/rxrpc/ar-connevent.c
@@ -0,0 +1,403 @@
1/* connection-level event handling
2 *
3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/module.h>
13#include <linux/net.h>
14#include <linux/skbuff.h>
15#include <linux/errqueue.h>
16#include <linux/udp.h>
17#include <linux/in.h>
18#include <linux/in6.h>
19#include <linux/icmp.h>
20#include <net/sock.h>
21#include <net/af_rxrpc.h>
22#include <net/ip.h>
23#include "ar-internal.h"
24
25/*
26 * pass a connection-level abort onto all calls on that connection
27 */
28static void rxrpc_abort_calls(struct rxrpc_connection *conn, int state,
29 u32 abort_code)
30{
31 struct rxrpc_call *call;
32 struct rb_node *p;
33
34 _enter("{%d},%x", conn->debug_id, abort_code);
35
36 read_lock_bh(&conn->lock);
37
38 for (p = rb_first(&conn->calls); p; p = rb_next(p)) {
39 call = rb_entry(p, struct rxrpc_call, conn_node);
40 write_lock(&call->state_lock);
41 if (call->state <= RXRPC_CALL_COMPLETE) {
42 call->state = state;
43 call->abort_code = abort_code;
44 if (state == RXRPC_CALL_LOCALLY_ABORTED)
45 set_bit(RXRPC_CALL_CONN_ABORT, &call->events);
46 else
47 set_bit(RXRPC_CALL_RCVD_ABORT, &call->events);
48 rxrpc_queue_call(call);
49 }
50 write_unlock(&call->state_lock);
51 }
52
53 read_unlock_bh(&conn->lock);
54 _leave("");
55}
56
57/*
58 * generate a connection-level abort
59 */
60static int rxrpc_abort_connection(struct rxrpc_connection *conn,
61 u32 error, u32 abort_code)
62{
63 struct rxrpc_header hdr;
64 struct msghdr msg;
65 struct kvec iov[2];
66 __be32 word;
67 size_t len;
68 int ret;
69
70 _enter("%d,,%u,%u", conn->debug_id, error, abort_code);
71
72 /* generate a connection-level abort */
73 spin_lock_bh(&conn->state_lock);
74 if (conn->state < RXRPC_CONN_REMOTELY_ABORTED) {
75 conn->state = RXRPC_CONN_LOCALLY_ABORTED;
76 conn->error = error;
77 spin_unlock_bh(&conn->state_lock);
78 } else {
79 spin_unlock_bh(&conn->state_lock);
80 _leave(" = 0 [already dead]");
81 return 0;
82 }
83
84 rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED, abort_code);
85
86 msg.msg_name = &conn->trans->peer->srx.transport.sin;
87 msg.msg_namelen = sizeof(conn->trans->peer->srx.transport.sin);
88 msg.msg_control = NULL;
89 msg.msg_controllen = 0;
90 msg.msg_flags = 0;
91
92 hdr.epoch = conn->epoch;
93 hdr.cid = conn->cid;
94 hdr.callNumber = 0;
95 hdr.seq = 0;
96 hdr.type = RXRPC_PACKET_TYPE_ABORT;
97 hdr.flags = conn->out_clientflag;
98 hdr.userStatus = 0;
99 hdr.securityIndex = conn->security_ix;
100 hdr._rsvd = 0;
101 hdr.serviceId = conn->service_id;
102
103 word = htonl(abort_code);
104
105 iov[0].iov_base = &hdr;
106 iov[0].iov_len = sizeof(hdr);
107 iov[1].iov_base = &word;
108 iov[1].iov_len = sizeof(word);
109
110 len = iov[0].iov_len + iov[1].iov_len;
111
112 hdr.serial = htonl(atomic_inc_return(&conn->serial));
113 _proto("Tx CONN ABORT %%%u { %d }", ntohl(hdr.serial), abort_code);
114
115 ret = kernel_sendmsg(conn->trans->local->socket, &msg, iov, 2, len);
116 if (ret < 0) {
117 _debug("sendmsg failed: %d", ret);
118 return -EAGAIN;
119 }
120
121 _leave(" = 0");
122 return 0;
123}
124
125/*
126 * mark a call as being on a now-secured channel
127 * - must be called with softirqs disabled
128 */
129void rxrpc_call_is_secure(struct rxrpc_call *call)
130{
131 _enter("%p", call);
132 if (call) {
133 read_lock(&call->state_lock);
134 if (call->state < RXRPC_CALL_COMPLETE &&
135 !test_and_set_bit(RXRPC_CALL_SECURED, &call->events))
136 rxrpc_queue_call(call);
137 read_unlock(&call->state_lock);
138 }
139}
140
141/*
142 * connection-level Rx packet processor
143 */
144static int rxrpc_process_event(struct rxrpc_connection *conn,
145 struct sk_buff *skb,
146 u32 *_abort_code)
147{
148 struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
149 __be32 tmp;
150 u32 serial;
151 int loop, ret;
152
153 if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED)
154 return -ECONNABORTED;
155
156 serial = ntohl(sp->hdr.serial);
157
158 switch (sp->hdr.type) {
159 case RXRPC_PACKET_TYPE_ABORT:
160 if (skb_copy_bits(skb, 0, &tmp, sizeof(tmp)) < 0)
161 return -EPROTO;
162 _proto("Rx ABORT %%%u { ac=%d }", serial, ntohl(tmp));
163
164 conn->state = RXRPC_CONN_REMOTELY_ABORTED;
165 rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED,
166 ntohl(tmp));
167 return -ECONNABORTED;
168
169 case RXRPC_PACKET_TYPE_CHALLENGE:
170 if (conn->security)
171 return conn->security->respond_to_challenge(
172 conn, skb, _abort_code);
173 return -EPROTO;
174
175 case RXRPC_PACKET_TYPE_RESPONSE:
176 if (!conn->security)
177 return -EPROTO;
178
179 ret = conn->security->verify_response(conn, skb, _abort_code);
180 if (ret < 0)
181 return ret;
182
183 ret = conn->security->init_connection_security(conn);
184 if (ret < 0)
185 return ret;
186
187 conn->security->prime_packet_security(conn);
188 read_lock_bh(&conn->lock);
189 spin_lock(&conn->state_lock);
190
191 if (conn->state == RXRPC_CONN_SERVER_CHALLENGING) {
192 conn->state = RXRPC_CONN_SERVER;
193 for (loop = 0; loop < RXRPC_MAXCALLS; loop++)
194 rxrpc_call_is_secure(conn->channels[loop]);
195 }
196
197 spin_unlock(&conn->state_lock);
198 read_unlock_bh(&conn->lock);
199 return 0;
200
201 default:
202 return -EPROTO;
203 }
204}
205
206/*
207 * set up security and issue a challenge
208 */
209static void rxrpc_secure_connection(struct rxrpc_connection *conn)
210{
211 u32 abort_code;
212 int ret;
213
214 _enter("{%d}", conn->debug_id);
215
216 ASSERT(conn->security_ix != 0);
217
218 if (!conn->key) {
219 _debug("set up security");
220 ret = rxrpc_init_server_conn_security(conn);
221 switch (ret) {
222 case 0:
223 break;
224 case -ENOENT:
225 abort_code = RX_CALL_DEAD;
226 goto abort;
227 default:
228 abort_code = RXKADNOAUTH;
229 goto abort;
230 }
231 }
232
233 ASSERT(conn->security != NULL);
234
235 if (conn->security->issue_challenge(conn) < 0) {
236 abort_code = RX_CALL_DEAD;
237 ret = -ENOMEM;
238 goto abort;
239 }
240
241 _leave("");
242 return;
243
244abort:
245 _debug("abort %d, %d", ret, abort_code);
246 rxrpc_abort_connection(conn, -ret, abort_code);
247 _leave(" [aborted]");
248}
249
250/*
251 * connection-level event processor
252 */
253void rxrpc_process_connection(struct work_struct *work)
254{
255 struct rxrpc_connection *conn =
256 container_of(work, struct rxrpc_connection, processor);
257 struct rxrpc_skb_priv *sp;
258 struct sk_buff *skb;
259 u32 abort_code = RX_PROTOCOL_ERROR;
260 int ret;
261
262 _enter("{%d}", conn->debug_id);
263
264 atomic_inc(&conn->usage);
265
266 if (test_and_clear_bit(RXRPC_CONN_CHALLENGE, &conn->events)) {
267 rxrpc_secure_connection(conn);
268 rxrpc_put_connection(conn);
269 }
270
271 /* go through the conn-level event packets, releasing the ref on this
272 * connection that each one has when we've finished with it */
273 while ((skb = skb_dequeue(&conn->rx_queue))) {
274 sp = rxrpc_skb(skb);
275
276 ret = rxrpc_process_event(conn, skb, &abort_code);
277 switch (ret) {
278 case -EPROTO:
279 case -EKEYEXPIRED:
280 case -EKEYREJECTED:
281 goto protocol_error;
282 case -EAGAIN:
283 goto requeue_and_leave;
284 case -ECONNABORTED:
285 default:
286 rxrpc_put_connection(conn);
287 rxrpc_free_skb(skb);
288 break;
289 }
290 }
291
292out:
293 rxrpc_put_connection(conn);
294 _leave("");
295 return;
296
297requeue_and_leave:
298 skb_queue_head(&conn->rx_queue, skb);
299 goto out;
300
301protocol_error:
302 if (rxrpc_abort_connection(conn, -ret, abort_code) < 0)
303 goto requeue_and_leave;
304 rxrpc_put_connection(conn);
305 rxrpc_free_skb(skb);
306 _leave(" [EPROTO]");
307 goto out;
308}
309
310/*
311 * put a packet up for transport-level abort
312 */
313void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb)
314{
315 CHECK_SLAB_OKAY(&local->usage);
316
317 if (!atomic_inc_not_zero(&local->usage)) {
318 printk("resurrected on reject\n");
319 BUG();
320 }
321
322 skb_queue_tail(&local->reject_queue, skb);
323 rxrpc_queue_work(&local->rejecter);
324}
325
326/*
327 * reject packets through the local endpoint
328 */
329void rxrpc_reject_packets(struct work_struct *work)
330{
331 union {
332 struct sockaddr sa;
333 struct sockaddr_in sin;
334 } sa;
335 struct rxrpc_skb_priv *sp;
336 struct rxrpc_header hdr;
337 struct rxrpc_local *local;
338 struct sk_buff *skb;
339 struct msghdr msg;
340 struct kvec iov[2];
341 size_t size;
342 __be32 code;
343
344 local = container_of(work, struct rxrpc_local, rejecter);
345 rxrpc_get_local(local);
346
347 _enter("%d", local->debug_id);
348
349 iov[0].iov_base = &hdr;
350 iov[0].iov_len = sizeof(hdr);
351 iov[1].iov_base = &code;
352 iov[1].iov_len = sizeof(code);
353 size = sizeof(hdr) + sizeof(code);
354
355 msg.msg_name = &sa;
356 msg.msg_control = NULL;
357 msg.msg_controllen = 0;
358 msg.msg_flags = 0;
359
360 memset(&sa, 0, sizeof(sa));
361 sa.sa.sa_family = local->srx.transport.family;
362 switch (sa.sa.sa_family) {
363 case AF_INET:
364 msg.msg_namelen = sizeof(sa.sin);
365 break;
366 default:
367 msg.msg_namelen = 0;
368 break;
369 }
370
371 memset(&hdr, 0, sizeof(hdr));
372 hdr.type = RXRPC_PACKET_TYPE_ABORT;
373
374 while ((skb = skb_dequeue(&local->reject_queue))) {
375 sp = rxrpc_skb(skb);
376 switch (sa.sa.sa_family) {
377 case AF_INET:
378 sa.sin.sin_port = udp_hdr(skb)->source;
379 sa.sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
380 code = htonl(skb->priority);
381
382 hdr.epoch = sp->hdr.epoch;
383 hdr.cid = sp->hdr.cid;
384 hdr.callNumber = sp->hdr.callNumber;
385 hdr.serviceId = sp->hdr.serviceId;
386 hdr.flags = sp->hdr.flags;
387 hdr.flags ^= RXRPC_CLIENT_INITIATED;
388 hdr.flags &= RXRPC_CLIENT_INITIATED;
389
390 kernel_sendmsg(local->socket, &msg, iov, 2, size);
391 break;
392
393 default:
394 break;
395 }
396
397 rxrpc_free_skb(skb);
398 rxrpc_put_local(local);
399 }
400
401 rxrpc_put_local(local);
402 _leave("");
403}
diff --git a/net/rxrpc/ar-error.c b/net/rxrpc/ar-error.c
new file mode 100644
index 000000000000..2c27df1ffa17
--- /dev/null
+++ b/net/rxrpc/ar-error.c
@@ -0,0 +1,253 @@
1/* Error message handling (ICMP)
2 *
3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/module.h>
13#include <linux/net.h>
14#include <linux/skbuff.h>
15#include <linux/errqueue.h>
16#include <linux/udp.h>
17#include <linux/in.h>
18#include <linux/in6.h>
19#include <linux/icmp.h>
20#include <net/sock.h>
21#include <net/af_rxrpc.h>
22#include <net/ip.h>
23#include "ar-internal.h"
24
25/*
26 * handle an error received on the local endpoint
27 */
28void rxrpc_UDP_error_report(struct sock *sk)
29{
30 struct sock_exterr_skb *serr;
31 struct rxrpc_transport *trans;
32 struct rxrpc_local *local = sk->sk_user_data;
33 struct rxrpc_peer *peer;
34 struct sk_buff *skb;
35 __be32 addr;
36 __be16 port;
37
38 _enter("%p{%d}", sk, local->debug_id);
39
40 skb = skb_dequeue(&sk->sk_error_queue);
41 if (!skb) {
42 _leave("UDP socket errqueue empty");
43 return;
44 }
45
46 rxrpc_new_skb(skb);
47
48 serr = SKB_EXT_ERR(skb);
49 addr = *(__be32 *)(skb_network_header(skb) + serr->addr_offset);
50 port = serr->port;
51
52 _net("Rx UDP Error from "NIPQUAD_FMT":%hu",
53 NIPQUAD(addr), ntohs(port));
54 _debug("Msg l:%d d:%d", skb->len, skb->data_len);
55
56 peer = rxrpc_find_peer(local, addr, port);
57 if (IS_ERR(peer)) {
58 rxrpc_free_skb(skb);
59 _leave(" [no peer]");
60 return;
61 }
62
63 trans = rxrpc_find_transport(local, peer);
64 if (!trans) {
65 rxrpc_put_peer(peer);
66 rxrpc_free_skb(skb);
67 _leave(" [no trans]");
68 return;
69 }
70
71 if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP &&
72 serr->ee.ee_type == ICMP_DEST_UNREACH &&
73 serr->ee.ee_code == ICMP_FRAG_NEEDED
74 ) {
75 u32 mtu = serr->ee.ee_info;
76
77 _net("Rx Received ICMP Fragmentation Needed (%d)", mtu);
78
79 /* wind down the local interface MTU */
80 if (mtu > 0 && peer->if_mtu == 65535 && mtu < peer->if_mtu) {
81 peer->if_mtu = mtu;
82 _net("I/F MTU %u", mtu);
83 }
84
85 /* ip_rt_frag_needed() may have eaten the info */
86 if (mtu == 0)
87 mtu = ntohs(icmp_hdr(skb)->un.frag.mtu);
88
89 if (mtu == 0) {
90 /* they didn't give us a size, estimate one */
91 if (mtu > 1500) {
92 mtu >>= 1;
93 if (mtu < 1500)
94 mtu = 1500;
95 } else {
96 mtu -= 100;
97 if (mtu < peer->hdrsize)
98 mtu = peer->hdrsize + 4;
99 }
100 }
101
102 if (mtu < peer->mtu) {
103 peer->mtu = mtu;
104 peer->maxdata = peer->mtu - peer->hdrsize;
105 _net("Net MTU %u (maxdata %u)",
106 peer->mtu, peer->maxdata);
107 }
108 }
109
110 rxrpc_put_peer(peer);
111
112 /* pass the transport ref to error_handler to release */
113 skb_queue_tail(&trans->error_queue, skb);
114 rxrpc_queue_work(&trans->error_handler);
115
116 /* reset and regenerate socket error */
117 spin_lock_bh(&sk->sk_error_queue.lock);
118 sk->sk_err = 0;
119 skb = skb_peek(&sk->sk_error_queue);
120 if (skb) {
121 sk->sk_err = SKB_EXT_ERR(skb)->ee.ee_errno;
122 spin_unlock_bh(&sk->sk_error_queue.lock);
123 sk->sk_error_report(sk);
124 } else {
125 spin_unlock_bh(&sk->sk_error_queue.lock);
126 }
127
128 _leave("");
129}
130
131/*
132 * deal with UDP error messages
133 */
134void rxrpc_UDP_error_handler(struct work_struct *work)
135{
136 struct sock_extended_err *ee;
137 struct sock_exterr_skb *serr;
138 struct rxrpc_transport *trans =
139 container_of(work, struct rxrpc_transport, error_handler);
140 struct sk_buff *skb;
141 int local, err;
142
143 _enter("");
144
145 skb = skb_dequeue(&trans->error_queue);
146 if (!skb)
147 return;
148
149 serr = SKB_EXT_ERR(skb);
150 ee = &serr->ee;
151
152 _net("Rx Error o=%d t=%d c=%d e=%d",
153 ee->ee_origin, ee->ee_type, ee->ee_code, ee->ee_errno);
154
155 err = ee->ee_errno;
156
157 switch (ee->ee_origin) {
158 case SO_EE_ORIGIN_ICMP:
159 local = 0;
160 switch (ee->ee_type) {
161 case ICMP_DEST_UNREACH:
162 switch (ee->ee_code) {
163 case ICMP_NET_UNREACH:
164 _net("Rx Received ICMP Network Unreachable");
165 err = ENETUNREACH;
166 break;
167 case ICMP_HOST_UNREACH:
168 _net("Rx Received ICMP Host Unreachable");
169 err = EHOSTUNREACH;
170 break;
171 case ICMP_PORT_UNREACH:
172 _net("Rx Received ICMP Port Unreachable");
173 err = ECONNREFUSED;
174 break;
175 case ICMP_FRAG_NEEDED:
176 _net("Rx Received ICMP Fragmentation Needed (%d)",
177 ee->ee_info);
178 err = 0; /* dealt with elsewhere */
179 break;
180 case ICMP_NET_UNKNOWN:
181 _net("Rx Received ICMP Unknown Network");
182 err = ENETUNREACH;
183 break;
184 case ICMP_HOST_UNKNOWN:
185 _net("Rx Received ICMP Unknown Host");
186 err = EHOSTUNREACH;
187 break;
188 default:
189 _net("Rx Received ICMP DestUnreach code=%u",
190 ee->ee_code);
191 break;
192 }
193 break;
194
195 case ICMP_TIME_EXCEEDED:
196 _net("Rx Received ICMP TTL Exceeded");
197 break;
198
199 default:
200 _proto("Rx Received ICMP error { type=%u code=%u }",
201 ee->ee_type, ee->ee_code);
202 break;
203 }
204 break;
205
206 case SO_EE_ORIGIN_LOCAL:
207 _proto("Rx Received local error { error=%d }",
208 ee->ee_errno);
209 local = 1;
210 break;
211
212 case SO_EE_ORIGIN_NONE:
213 case SO_EE_ORIGIN_ICMP6:
214 default:
215 _proto("Rx Received error report { orig=%u }",
216 ee->ee_origin);
217 local = 0;
218 break;
219 }
220
221 /* terminate all the affected calls if there's an unrecoverable
222 * error */
223 if (err) {
224 struct rxrpc_call *call, *_n;
225
226 _debug("ISSUE ERROR %d", err);
227
228 spin_lock_bh(&trans->peer->lock);
229 trans->peer->net_error = err;
230
231 list_for_each_entry_safe(call, _n, &trans->peer->error_targets,
232 error_link) {
233 write_lock(&call->state_lock);
234 if (call->state != RXRPC_CALL_COMPLETE &&
235 call->state < RXRPC_CALL_NETWORK_ERROR) {
236 call->state = RXRPC_CALL_NETWORK_ERROR;
237 set_bit(RXRPC_CALL_RCVD_ERROR, &call->events);
238 rxrpc_queue_call(call);
239 }
240 write_unlock(&call->state_lock);
241 list_del_init(&call->error_link);
242 }
243
244 spin_unlock_bh(&trans->peer->lock);
245 }
246
247 if (!skb_queue_empty(&trans->error_queue))
248 rxrpc_queue_work(&trans->error_handler);
249
250 rxrpc_free_skb(skb);
251 rxrpc_put_transport(trans);
252 _leave("");
253}
diff --git a/net/rxrpc/ar-input.c b/net/rxrpc/ar-input.c
new file mode 100644
index 000000000000..91b5bbb003e2
--- /dev/null
+++ b/net/rxrpc/ar-input.c
@@ -0,0 +1,797 @@
1/* RxRPC packet reception
2 *
3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/module.h>
13#include <linux/net.h>
14#include <linux/skbuff.h>
15#include <linux/errqueue.h>
16#include <linux/udp.h>
17#include <linux/in.h>
18#include <linux/in6.h>
19#include <linux/icmp.h>
20#include <net/sock.h>
21#include <net/af_rxrpc.h>
22#include <net/ip.h>
23#include "ar-internal.h"
24
25unsigned long rxrpc_ack_timeout = 1;
26
27const char *rxrpc_pkts[] = {
28 "?00",
29 "DATA", "ACK", "BUSY", "ABORT", "ACKALL", "CHALL", "RESP", "DEBUG",
30 "?09", "?10", "?11", "?12", "?13", "?14", "?15"
31};
32
33/*
34 * queue a packet for recvmsg to pass to userspace
35 * - the caller must hold a lock on call->lock
36 * - must not be called with interrupts disabled (sk_filter() disables BH's)
37 * - eats the packet whether successful or not
38 * - there must be just one reference to the packet, which the caller passes to
39 * this function
40 */
41int rxrpc_queue_rcv_skb(struct rxrpc_call *call, struct sk_buff *skb,
42 bool force, bool terminal)
43{
44 struct rxrpc_skb_priv *sp;
45 struct rxrpc_sock *rx = call->socket;
46 struct sock *sk;
47 int skb_len, ret;
48
49 _enter(",,%d,%d", force, terminal);
50
51 ASSERT(!irqs_disabled());
52
53 sp = rxrpc_skb(skb);
54 ASSERTCMP(sp->call, ==, call);
55
56 /* if we've already posted the terminal message for a call, then we
57 * don't post any more */
58 if (test_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags)) {
59 _debug("already terminated");
60 ASSERTCMP(call->state, >=, RXRPC_CALL_COMPLETE);
61 skb->destructor = NULL;
62 sp->call = NULL;
63 rxrpc_put_call(call);
64 rxrpc_free_skb(skb);
65 return 0;
66 }
67
68 sk = &rx->sk;
69
70 if (!force) {
71 /* cast skb->rcvbuf to unsigned... It's pointless, but
72 * reduces number of warnings when compiling with -W
73 * --ANK */
74// ret = -ENOBUFS;
75// if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
76// (unsigned) sk->sk_rcvbuf)
77// goto out;
78
79 ret = sk_filter(sk, skb);
80 if (ret < 0)
81 goto out;
82 }
83
84 spin_lock_bh(&sk->sk_receive_queue.lock);
85 if (!test_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags) &&
86 !test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
87 call->socket->sk.sk_state != RXRPC_CLOSE) {
88 skb->destructor = rxrpc_packet_destructor;
89 skb->dev = NULL;
90 skb->sk = sk;
91 atomic_add(skb->truesize, &sk->sk_rmem_alloc);
92
93 if (terminal) {
94 _debug("<<<< TERMINAL MESSAGE >>>>");
95 set_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags);
96 }
97
98 /* allow interception by a kernel service */
99 if (rx->interceptor) {
100 rx->interceptor(sk, call->user_call_ID, skb);
101 spin_unlock_bh(&sk->sk_receive_queue.lock);
102 } else {
103
104 /* Cache the SKB length before we tack it onto the
105 * receive queue. Once it is added it no longer
106 * belongs to us and may be freed by other threads of
107 * control pulling packets from the queue */
108 skb_len = skb->len;
109
110 _net("post skb %p", skb);
111 __skb_queue_tail(&sk->sk_receive_queue, skb);
112 spin_unlock_bh(&sk->sk_receive_queue.lock);
113
114 if (!sock_flag(sk, SOCK_DEAD))
115 sk->sk_data_ready(sk, skb_len);
116 }
117 skb = NULL;
118 } else {
119 spin_unlock_bh(&sk->sk_receive_queue.lock);
120 }
121 ret = 0;
122
123out:
124 /* release the socket buffer */
125 if (skb) {
126 skb->destructor = NULL;
127 sp->call = NULL;
128 rxrpc_put_call(call);
129 rxrpc_free_skb(skb);
130 }
131
132 _leave(" = %d", ret);
133 return ret;
134}
135
136/*
137 * process a DATA packet, posting the packet to the appropriate queue
138 * - eats the packet if successful
139 */
140static int rxrpc_fast_process_data(struct rxrpc_call *call,
141 struct sk_buff *skb, u32 seq)
142{
143 struct rxrpc_skb_priv *sp;
144 bool terminal;
145 int ret, ackbit, ack;
146
147 _enter("{%u,%u},,{%u}", call->rx_data_post, call->rx_first_oos, seq);
148
149 sp = rxrpc_skb(skb);
150 ASSERTCMP(sp->call, ==, NULL);
151
152 spin_lock(&call->lock);
153
154 if (call->state > RXRPC_CALL_COMPLETE)
155 goto discard;
156
157 ASSERTCMP(call->rx_data_expect, >=, call->rx_data_post);
158 ASSERTCMP(call->rx_data_post, >=, call->rx_data_recv);
159 ASSERTCMP(call->rx_data_recv, >=, call->rx_data_eaten);
160
161 if (seq < call->rx_data_post) {
162 _debug("dup #%u [-%u]", seq, call->rx_data_post);
163 ack = RXRPC_ACK_DUPLICATE;
164 ret = -ENOBUFS;
165 goto discard_and_ack;
166 }
167
168 /* we may already have the packet in the out of sequence queue */
169 ackbit = seq - (call->rx_data_eaten + 1);
170 ASSERTCMP(ackbit, >=, 0);
171 if (__test_and_set_bit(ackbit, call->ackr_window)) {
172 _debug("dup oos #%u [%u,%u]",
173 seq, call->rx_data_eaten, call->rx_data_post);
174 ack = RXRPC_ACK_DUPLICATE;
175 goto discard_and_ack;
176 }
177
178 if (seq >= call->ackr_win_top) {
179 _debug("exceed #%u [%u]", seq, call->ackr_win_top);
180 __clear_bit(ackbit, call->ackr_window);
181 ack = RXRPC_ACK_EXCEEDS_WINDOW;
182 goto discard_and_ack;
183 }
184
185 if (seq == call->rx_data_expect) {
186 clear_bit(RXRPC_CALL_EXPECT_OOS, &call->flags);
187 call->rx_data_expect++;
188 } else if (seq > call->rx_data_expect) {
189 _debug("oos #%u [%u]", seq, call->rx_data_expect);
190 call->rx_data_expect = seq + 1;
191 if (test_and_set_bit(RXRPC_CALL_EXPECT_OOS, &call->flags)) {
192 ack = RXRPC_ACK_OUT_OF_SEQUENCE;
193 goto enqueue_and_ack;
194 }
195 goto enqueue_packet;
196 }
197
198 if (seq != call->rx_data_post) {
199 _debug("ahead #%u [%u]", seq, call->rx_data_post);
200 goto enqueue_packet;
201 }
202
203 if (test_bit(RXRPC_CALL_RCVD_LAST, &call->flags))
204 goto protocol_error;
205
206 /* if the packet need security things doing to it, then it goes down
207 * the slow path */
208 if (call->conn->security)
209 goto enqueue_packet;
210
211 sp->call = call;
212 rxrpc_get_call(call);
213 terminal = ((sp->hdr.flags & RXRPC_LAST_PACKET) &&
214 !(sp->hdr.flags & RXRPC_CLIENT_INITIATED));
215 ret = rxrpc_queue_rcv_skb(call, skb, false, terminal);
216 if (ret < 0) {
217 if (ret == -ENOMEM || ret == -ENOBUFS) {
218 __clear_bit(ackbit, call->ackr_window);
219 ack = RXRPC_ACK_NOSPACE;
220 goto discard_and_ack;
221 }
222 goto out;
223 }
224
225 skb = NULL;
226
227 _debug("post #%u", seq);
228 ASSERTCMP(call->rx_data_post, ==, seq);
229 call->rx_data_post++;
230
231 if (sp->hdr.flags & RXRPC_LAST_PACKET)
232 set_bit(RXRPC_CALL_RCVD_LAST, &call->flags);
233
234 /* if we've reached an out of sequence packet then we need to drain
235 * that queue into the socket Rx queue now */
236 if (call->rx_data_post == call->rx_first_oos) {
237 _debug("drain rx oos now");
238 read_lock(&call->state_lock);
239 if (call->state < RXRPC_CALL_COMPLETE &&
240 !test_and_set_bit(RXRPC_CALL_DRAIN_RX_OOS, &call->events))
241 rxrpc_queue_call(call);
242 read_unlock(&call->state_lock);
243 }
244
245 spin_unlock(&call->lock);
246 atomic_inc(&call->ackr_not_idle);
247 rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, sp->hdr.serial, false);
248 _leave(" = 0 [posted]");
249 return 0;
250
251protocol_error:
252 ret = -EBADMSG;
253out:
254 spin_unlock(&call->lock);
255 _leave(" = %d", ret);
256 return ret;
257
258discard_and_ack:
259 _debug("discard and ACK packet %p", skb);
260 __rxrpc_propose_ACK(call, ack, sp->hdr.serial, true);
261discard:
262 spin_unlock(&call->lock);
263 rxrpc_free_skb(skb);
264 _leave(" = 0 [discarded]");
265 return 0;
266
267enqueue_and_ack:
268 __rxrpc_propose_ACK(call, ack, sp->hdr.serial, true);
269enqueue_packet:
270 _net("defer skb %p", skb);
271 spin_unlock(&call->lock);
272 skb_queue_tail(&call->rx_queue, skb);
273 atomic_inc(&call->ackr_not_idle);
274 read_lock(&call->state_lock);
275 if (call->state < RXRPC_CALL_DEAD)
276 rxrpc_queue_call(call);
277 read_unlock(&call->state_lock);
278 _leave(" = 0 [queued]");
279 return 0;
280}
281
282/*
283 * assume an implicit ACKALL of the transmission phase of a client socket upon
284 * reception of the first reply packet
285 */
286static void rxrpc_assume_implicit_ackall(struct rxrpc_call *call, u32 serial)
287{
288 write_lock_bh(&call->state_lock);
289
290 switch (call->state) {
291 case RXRPC_CALL_CLIENT_AWAIT_REPLY:
292 call->state = RXRPC_CALL_CLIENT_RECV_REPLY;
293 call->acks_latest = serial;
294
295 _debug("implicit ACKALL %%%u", call->acks_latest);
296 set_bit(RXRPC_CALL_RCVD_ACKALL, &call->events);
297 write_unlock_bh(&call->state_lock);
298
299 if (try_to_del_timer_sync(&call->resend_timer) >= 0) {
300 clear_bit(RXRPC_CALL_RESEND_TIMER, &call->events);
301 clear_bit(RXRPC_CALL_RESEND, &call->events);
302 clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
303 }
304 break;
305
306 default:
307 write_unlock_bh(&call->state_lock);
308 break;
309 }
310}
311
312/*
313 * post an incoming packet to the nominated call to deal with
314 * - must get rid of the sk_buff, either by freeing it or by queuing it
315 */
316void rxrpc_fast_process_packet(struct rxrpc_call *call, struct sk_buff *skb)
317{
318 struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
319 __be32 _abort_code;
320 u32 serial, hi_serial, seq, abort_code;
321
322 _enter("%p,%p", call, skb);
323
324 ASSERT(!irqs_disabled());
325
326#if 0 // INJECT RX ERROR
327 if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA) {
328 static int skip = 0;
329 if (++skip == 3) {
330 printk("DROPPED 3RD PACKET!!!!!!!!!!!!!\n");
331 skip = 0;
332 goto free_packet;
333 }
334 }
335#endif
336
337 /* track the latest serial number on this connection for ACK packet
338 * information */
339 serial = ntohl(sp->hdr.serial);
340 hi_serial = atomic_read(&call->conn->hi_serial);
341 while (serial > hi_serial)
342 hi_serial = atomic_cmpxchg(&call->conn->hi_serial, hi_serial,
343 serial);
344
345 /* request ACK generation for any ACK or DATA packet that requests
346 * it */
347 if (sp->hdr.flags & RXRPC_REQUEST_ACK) {
348 _proto("ACK Requested on %%%u", serial);
349 rxrpc_propose_ACK(call, RXRPC_ACK_REQUESTED, sp->hdr.serial,
350 !(sp->hdr.flags & RXRPC_MORE_PACKETS));
351 }
352
353 switch (sp->hdr.type) {
354 case RXRPC_PACKET_TYPE_ABORT:
355 _debug("abort");
356
357 if (skb_copy_bits(skb, 0, &_abort_code,
358 sizeof(_abort_code)) < 0)
359 goto protocol_error;
360
361 abort_code = ntohl(_abort_code);
362 _proto("Rx ABORT %%%u { %x }", serial, abort_code);
363
364 write_lock_bh(&call->state_lock);
365 if (call->state < RXRPC_CALL_COMPLETE) {
366 call->state = RXRPC_CALL_REMOTELY_ABORTED;
367 call->abort_code = abort_code;
368 set_bit(RXRPC_CALL_RCVD_ABORT, &call->events);
369 rxrpc_queue_call(call);
370 }
371 goto free_packet_unlock;
372
373 case RXRPC_PACKET_TYPE_BUSY:
374 _proto("Rx BUSY %%%u", serial);
375
376 if (call->conn->out_clientflag)
377 goto protocol_error;
378
379 write_lock_bh(&call->state_lock);
380 switch (call->state) {
381 case RXRPC_CALL_CLIENT_SEND_REQUEST:
382 call->state = RXRPC_CALL_SERVER_BUSY;
383 set_bit(RXRPC_CALL_RCVD_BUSY, &call->events);
384 rxrpc_queue_call(call);
385 case RXRPC_CALL_SERVER_BUSY:
386 goto free_packet_unlock;
387 default:
388 goto protocol_error_locked;
389 }
390
391 default:
392 _proto("Rx %s %%%u", rxrpc_pkts[sp->hdr.type], serial);
393 goto protocol_error;
394
395 case RXRPC_PACKET_TYPE_DATA:
396 seq = ntohl(sp->hdr.seq);
397
398 _proto("Rx DATA %%%u { #%u }", serial, seq);
399
400 if (seq == 0)
401 goto protocol_error;
402
403 call->ackr_prev_seq = sp->hdr.seq;
404
405 /* received data implicitly ACKs all of the request packets we
406 * sent when we're acting as a client */
407 if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY)
408 rxrpc_assume_implicit_ackall(call, serial);
409
410 switch (rxrpc_fast_process_data(call, skb, seq)) {
411 case 0:
412 skb = NULL;
413 goto done;
414
415 default:
416 BUG();
417
418 /* data packet received beyond the last packet */
419 case -EBADMSG:
420 goto protocol_error;
421 }
422
423 case RXRPC_PACKET_TYPE_ACK:
424 /* ACK processing is done in process context */
425 read_lock_bh(&call->state_lock);
426 if (call->state < RXRPC_CALL_DEAD) {
427 skb_queue_tail(&call->rx_queue, skb);
428 rxrpc_queue_call(call);
429 skb = NULL;
430 }
431 read_unlock_bh(&call->state_lock);
432 goto free_packet;
433 }
434
435protocol_error:
436 _debug("protocol error");
437 write_lock_bh(&call->state_lock);
438protocol_error_locked:
439 if (call->state <= RXRPC_CALL_COMPLETE) {
440 call->state = RXRPC_CALL_LOCALLY_ABORTED;
441 call->abort_code = RX_PROTOCOL_ERROR;
442 set_bit(RXRPC_CALL_ABORT, &call->events);
443 rxrpc_queue_call(call);
444 }
445free_packet_unlock:
446 write_unlock_bh(&call->state_lock);
447free_packet:
448 rxrpc_free_skb(skb);
449done:
450 _leave("");
451}
452
453/*
454 * split up a jumbo data packet
455 */
456static void rxrpc_process_jumbo_packet(struct rxrpc_call *call,
457 struct sk_buff *jumbo)
458{
459 struct rxrpc_jumbo_header jhdr;
460 struct rxrpc_skb_priv *sp;
461 struct sk_buff *part;
462
463 _enter(",{%u,%u}", jumbo->data_len, jumbo->len);
464
465 sp = rxrpc_skb(jumbo);
466
467 do {
468 sp->hdr.flags &= ~RXRPC_JUMBO_PACKET;
469
470 /* make a clone to represent the first subpacket in what's left
471 * of the jumbo packet */
472 part = skb_clone(jumbo, GFP_ATOMIC);
473 if (!part) {
474 /* simply ditch the tail in the event of ENOMEM */
475 pskb_trim(jumbo, RXRPC_JUMBO_DATALEN);
476 break;
477 }
478 rxrpc_new_skb(part);
479
480 pskb_trim(part, RXRPC_JUMBO_DATALEN);
481
482 if (!pskb_pull(jumbo, RXRPC_JUMBO_DATALEN))
483 goto protocol_error;
484
485 if (skb_copy_bits(jumbo, 0, &jhdr, sizeof(jhdr)) < 0)
486 goto protocol_error;
487 if (!pskb_pull(jumbo, sizeof(jhdr)))
488 BUG();
489
490 sp->hdr.seq = htonl(ntohl(sp->hdr.seq) + 1);
491 sp->hdr.serial = htonl(ntohl(sp->hdr.serial) + 1);
492 sp->hdr.flags = jhdr.flags;
493 sp->hdr._rsvd = jhdr._rsvd;
494
495 _proto("Rx DATA Jumbo %%%u", ntohl(sp->hdr.serial) - 1);
496
497 rxrpc_fast_process_packet(call, part);
498 part = NULL;
499
500 } while (sp->hdr.flags & RXRPC_JUMBO_PACKET);
501
502 rxrpc_fast_process_packet(call, jumbo);
503 _leave("");
504 return;
505
506protocol_error:
507 _debug("protocol error");
508 rxrpc_free_skb(part);
509 rxrpc_free_skb(jumbo);
510 write_lock_bh(&call->state_lock);
511 if (call->state <= RXRPC_CALL_COMPLETE) {
512 call->state = RXRPC_CALL_LOCALLY_ABORTED;
513 call->abort_code = RX_PROTOCOL_ERROR;
514 set_bit(RXRPC_CALL_ABORT, &call->events);
515 rxrpc_queue_call(call);
516 }
517 write_unlock_bh(&call->state_lock);
518 _leave("");
519}
520
521/*
522 * post an incoming packet to the appropriate call/socket to deal with
523 * - must get rid of the sk_buff, either by freeing it or by queuing it
524 */
525static void rxrpc_post_packet_to_call(struct rxrpc_connection *conn,
526 struct sk_buff *skb)
527{
528 struct rxrpc_skb_priv *sp;
529 struct rxrpc_call *call;
530 struct rb_node *p;
531 __be32 call_id;
532
533 _enter("%p,%p", conn, skb);
534
535 read_lock_bh(&conn->lock);
536
537 sp = rxrpc_skb(skb);
538
539 /* look at extant calls by channel number first */
540 call = conn->channels[ntohl(sp->hdr.cid) & RXRPC_CHANNELMASK];
541 if (!call || call->call_id != sp->hdr.callNumber)
542 goto call_not_extant;
543
544 _debug("extant call [%d]", call->state);
545 ASSERTCMP(call->conn, ==, conn);
546
547 read_lock(&call->state_lock);
548 switch (call->state) {
549 case RXRPC_CALL_LOCALLY_ABORTED:
550 if (!test_and_set_bit(RXRPC_CALL_ABORT, &call->events))
551 rxrpc_queue_call(call);
552 case RXRPC_CALL_REMOTELY_ABORTED:
553 case RXRPC_CALL_NETWORK_ERROR:
554 case RXRPC_CALL_DEAD:
555 goto free_unlock;
556 default:
557 break;
558 }
559
560 read_unlock(&call->state_lock);
561 rxrpc_get_call(call);
562 read_unlock_bh(&conn->lock);
563
564 if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA &&
565 sp->hdr.flags & RXRPC_JUMBO_PACKET)
566 rxrpc_process_jumbo_packet(call, skb);
567 else
568 rxrpc_fast_process_packet(call, skb);
569
570 rxrpc_put_call(call);
571 goto done;
572
573call_not_extant:
574 /* search the completed calls in case what we're dealing with is
575 * there */
576 _debug("call not extant");
577
578 call_id = sp->hdr.callNumber;
579 p = conn->calls.rb_node;
580 while (p) {
581 call = rb_entry(p, struct rxrpc_call, conn_node);
582
583 if (call_id < call->call_id)
584 p = p->rb_left;
585 else if (call_id > call->call_id)
586 p = p->rb_right;
587 else
588 goto found_completed_call;
589 }
590
591dead_call:
592 /* it's a either a really old call that we no longer remember or its a
593 * new incoming call */
594 read_unlock_bh(&conn->lock);
595
596 if (sp->hdr.flags & RXRPC_CLIENT_INITIATED &&
597 sp->hdr.seq == __constant_cpu_to_be32(1)) {
598 _debug("incoming call");
599 skb_queue_tail(&conn->trans->local->accept_queue, skb);
600 rxrpc_queue_work(&conn->trans->local->acceptor);
601 goto done;
602 }
603
604 _debug("dead call");
605 skb->priority = RX_CALL_DEAD;
606 rxrpc_reject_packet(conn->trans->local, skb);
607 goto done;
608
609 /* resend last packet of a completed call
610 * - client calls may have been aborted or ACK'd
611 * - server calls may have been aborted
612 */
613found_completed_call:
614 _debug("completed call");
615
616 if (atomic_read(&call->usage) == 0)
617 goto dead_call;
618
619 /* synchronise any state changes */
620 read_lock(&call->state_lock);
621 ASSERTIFCMP(call->state != RXRPC_CALL_CLIENT_FINAL_ACK,
622 call->state, >=, RXRPC_CALL_COMPLETE);
623
624 if (call->state == RXRPC_CALL_LOCALLY_ABORTED ||
625 call->state == RXRPC_CALL_REMOTELY_ABORTED ||
626 call->state == RXRPC_CALL_DEAD) {
627 read_unlock(&call->state_lock);
628 goto dead_call;
629 }
630
631 if (call->conn->in_clientflag) {
632 read_unlock(&call->state_lock);
633 goto dead_call; /* complete server call */
634 }
635
636 _debug("final ack again");
637 rxrpc_get_call(call);
638 set_bit(RXRPC_CALL_ACK_FINAL, &call->events);
639 rxrpc_queue_call(call);
640
641free_unlock:
642 read_unlock(&call->state_lock);
643 read_unlock_bh(&conn->lock);
644 rxrpc_free_skb(skb);
645done:
646 _leave("");
647}
648
649/*
650 * post connection-level events to the connection
651 * - this includes challenges, responses and some aborts
652 */
653static void rxrpc_post_packet_to_conn(struct rxrpc_connection *conn,
654 struct sk_buff *skb)
655{
656 _enter("%p,%p", conn, skb);
657
658 atomic_inc(&conn->usage);
659 skb_queue_tail(&conn->rx_queue, skb);
660 rxrpc_queue_conn(conn);
661}
662
663/*
664 * handle data received on the local endpoint
665 * - may be called in interrupt context
666 */
667void rxrpc_data_ready(struct sock *sk, int count)
668{
669 struct rxrpc_connection *conn;
670 struct rxrpc_transport *trans;
671 struct rxrpc_skb_priv *sp;
672 struct rxrpc_local *local;
673 struct rxrpc_peer *peer;
674 struct sk_buff *skb;
675 int ret;
676
677 _enter("%p, %d", sk, count);
678
679 ASSERT(!irqs_disabled());
680
681 read_lock_bh(&rxrpc_local_lock);
682 local = sk->sk_user_data;
683 if (local && atomic_read(&local->usage) > 0)
684 rxrpc_get_local(local);
685 else
686 local = NULL;
687 read_unlock_bh(&rxrpc_local_lock);
688 if (!local) {
689 _leave(" [local dead]");
690 return;
691 }
692
693 skb = skb_recv_datagram(sk, 0, 1, &ret);
694 if (!skb) {
695 rxrpc_put_local(local);
696 if (ret == -EAGAIN)
697 return;
698 _debug("UDP socket error %d", ret);
699 return;
700 }
701
702 rxrpc_new_skb(skb);
703
704 _net("recv skb %p", skb);
705
706 /* we'll probably need to checksum it (didn't call sock_recvmsg) */
707 if (skb_checksum_complete(skb)) {
708 rxrpc_free_skb(skb);
709 rxrpc_put_local(local);
710 _leave(" [CSUM failed]");
711 return;
712 }
713
714 /* the socket buffer we have is owned by UDP, with UDP's data all over
715 * it, but we really want our own */
716 skb_orphan(skb);
717 sp = rxrpc_skb(skb);
718 memset(sp, 0, sizeof(*sp));
719
720 _net("Rx UDP packet from %08x:%04hu",
721 ntohl(ip_hdr(skb)->saddr), ntohs(udp_hdr(skb)->source));
722
723 /* dig out the RxRPC connection details */
724 if (skb_copy_bits(skb, sizeof(struct udphdr), &sp->hdr,
725 sizeof(sp->hdr)) < 0)
726 goto bad_message;
727 if (!pskb_pull(skb, sizeof(struct udphdr) + sizeof(sp->hdr)))
728 BUG();
729
730 _net("Rx RxRPC %s ep=%x call=%x:%x",
731 sp->hdr.flags & RXRPC_CLIENT_INITIATED ? "ToServer" : "ToClient",
732 ntohl(sp->hdr.epoch),
733 ntohl(sp->hdr.cid),
734 ntohl(sp->hdr.callNumber));
735
736 if (sp->hdr.type == 0 || sp->hdr.type >= RXRPC_N_PACKET_TYPES) {
737 _proto("Rx Bad Packet Type %u", sp->hdr.type);
738 goto bad_message;
739 }
740
741 if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA &&
742 (sp->hdr.callNumber == 0 || sp->hdr.seq == 0))
743 goto bad_message;
744
745 peer = rxrpc_find_peer(local, ip_hdr(skb)->saddr, udp_hdr(skb)->source);
746 if (IS_ERR(peer))
747 goto cant_route_call;
748
749 trans = rxrpc_find_transport(local, peer);
750 rxrpc_put_peer(peer);
751 if (!trans)
752 goto cant_route_call;
753
754 conn = rxrpc_find_connection(trans, &sp->hdr);
755 rxrpc_put_transport(trans);
756 if (!conn)
757 goto cant_route_call;
758
759 _debug("CONN %p {%d}", conn, conn->debug_id);
760
761 if (sp->hdr.callNumber == 0)
762 rxrpc_post_packet_to_conn(conn, skb);
763 else
764 rxrpc_post_packet_to_call(conn, skb);
765 rxrpc_put_connection(conn);
766 rxrpc_put_local(local);
767 return;
768
769cant_route_call:
770 _debug("can't route call");
771 if (sp->hdr.flags & RXRPC_CLIENT_INITIATED &&
772 sp->hdr.type == RXRPC_PACKET_TYPE_DATA) {
773 if (sp->hdr.seq == __constant_cpu_to_be32(1)) {
774 _debug("first packet");
775 skb_queue_tail(&local->accept_queue, skb);
776 rxrpc_queue_work(&local->acceptor);
777 rxrpc_put_local(local);
778 _leave(" [incoming]");
779 return;
780 }
781 skb->priority = RX_INVALID_OPERATION;
782 } else {
783 skb->priority = RX_CALL_DEAD;
784 }
785
786 _debug("reject");
787 rxrpc_reject_packet(local, skb);
788 rxrpc_put_local(local);
789 _leave(" [no call]");
790 return;
791
792bad_message:
793 skb->priority = RX_PROTOCOL_ERROR;
794 rxrpc_reject_packet(local, skb);
795 rxrpc_put_local(local);
796 _leave(" [badmsg]");
797}
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
new file mode 100644
index 000000000000..58aaf892238e
--- /dev/null
+++ b/net/rxrpc/ar-internal.h
@@ -0,0 +1,808 @@
1/* AF_RXRPC internal definitions
2 *
3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <rxrpc/packet.h>
13
14#if 0
15#define CHECK_SLAB_OKAY(X) \
16 BUG_ON(atomic_read((X)) >> (sizeof(atomic_t) - 2) == \
17 (POISON_FREE << 8 | POISON_FREE))
18#else
19#define CHECK_SLAB_OKAY(X) do {} while(0)
20#endif
21
22#define FCRYPT_BSIZE 8
23struct rxrpc_crypt {
24 union {
25 u8 x[FCRYPT_BSIZE];
26 u32 n[2];
27 };
28} __attribute__((aligned(8)));
29
30#define rxrpc_queue_work(WS) queue_work(rxrpc_workqueue, (WS))
31#define rxrpc_queue_delayed_work(WS,D) \
32 queue_delayed_work(rxrpc_workqueue, (WS), (D))
33
34#define rxrpc_queue_call(CALL) rxrpc_queue_work(&(CALL)->processor)
35#define rxrpc_queue_conn(CONN) rxrpc_queue_work(&(CONN)->processor)
36
37/*
38 * sk_state for RxRPC sockets
39 */
40enum {
41 RXRPC_UNCONNECTED = 0,
42 RXRPC_CLIENT_BOUND, /* client local address bound */
43 RXRPC_CLIENT_CONNECTED, /* client is connected */
44 RXRPC_SERVER_BOUND, /* server local address bound */
45 RXRPC_SERVER_LISTENING, /* server listening for connections */
46 RXRPC_CLOSE, /* socket is being closed */
47};
48
49/*
50 * RxRPC socket definition
51 */
52struct rxrpc_sock {
53 /* WARNING: sk has to be the first member */
54 struct sock sk;
55 rxrpc_interceptor_t interceptor; /* kernel service Rx interceptor function */
56 struct rxrpc_local *local; /* local endpoint */
57 struct rxrpc_transport *trans; /* transport handler */
58 struct rxrpc_conn_bundle *bundle; /* virtual connection bundle */
59 struct rxrpc_connection *conn; /* exclusive virtual connection */
60 struct list_head listen_link; /* link in the local endpoint's listen list */
61 struct list_head secureq; /* calls awaiting connection security clearance */
62 struct list_head acceptq; /* calls awaiting acceptance */
63 struct key *key; /* security for this socket */
64 struct key *securities; /* list of server security descriptors */
65 struct rb_root calls; /* outstanding calls on this socket */
66 unsigned long flags;
67#define RXRPC_SOCK_EXCLUSIVE_CONN 1 /* exclusive connection for a client socket */
68 rwlock_t call_lock; /* lock for calls */
69 u32 min_sec_level; /* minimum security level */
70#define RXRPC_SECURITY_MAX RXRPC_SECURITY_ENCRYPT
71 struct sockaddr_rxrpc srx; /* local address */
72 sa_family_t proto; /* protocol created with */
73 __be16 service_id; /* service ID of local/remote service */
74};
75
76#define rxrpc_sk(__sk) container_of((__sk), struct rxrpc_sock, sk)
77
78/*
79 * RxRPC socket buffer private variables
80 * - max 48 bytes (struct sk_buff::cb)
81 */
82struct rxrpc_skb_priv {
83 struct rxrpc_call *call; /* call with which associated */
84 unsigned long resend_at; /* time in jiffies at which to resend */
85 union {
86 unsigned offset; /* offset into buffer of next read */
87 int remain; /* amount of space remaining for next write */
88 u32 error; /* network error code */
89 bool need_resend; /* T if needs resending */
90 };
91
92 struct rxrpc_header hdr; /* RxRPC packet header from this packet */
93};
94
95#define rxrpc_skb(__skb) ((struct rxrpc_skb_priv *) &(__skb)->cb)
96
97enum rxrpc_command {
98 RXRPC_CMD_SEND_DATA, /* send data message */
99 RXRPC_CMD_SEND_ABORT, /* request abort generation */
100 RXRPC_CMD_ACCEPT, /* [server] accept incoming call */
101 RXRPC_CMD_REJECT_BUSY, /* [server] reject a call as busy */
102};
103
104/*
105 * RxRPC security module interface
106 */
107struct rxrpc_security {
108 struct module *owner; /* providing module */
109 struct list_head link; /* link in master list */
110 const char *name; /* name of this service */
111 u8 security_index; /* security type provided */
112
113 /* initialise a connection's security */
114 int (*init_connection_security)(struct rxrpc_connection *);
115
116 /* prime a connection's packet security */
117 void (*prime_packet_security)(struct rxrpc_connection *);
118
119 /* impose security on a packet */
120 int (*secure_packet)(const struct rxrpc_call *,
121 struct sk_buff *,
122 size_t,
123 void *);
124
125 /* verify the security on a received packet */
126 int (*verify_packet)(const struct rxrpc_call *, struct sk_buff *,
127 u32 *);
128
129 /* issue a challenge */
130 int (*issue_challenge)(struct rxrpc_connection *);
131
132 /* respond to a challenge */
133 int (*respond_to_challenge)(struct rxrpc_connection *,
134 struct sk_buff *,
135 u32 *);
136
137 /* verify a response */
138 int (*verify_response)(struct rxrpc_connection *,
139 struct sk_buff *,
140 u32 *);
141
142 /* clear connection security */
143 void (*clear)(struct rxrpc_connection *);
144};
145
146/*
147 * RxRPC local transport endpoint definition
148 * - matched by local port, address and protocol type
149 */
150struct rxrpc_local {
151 struct socket *socket; /* my UDP socket */
152 struct work_struct destroyer; /* endpoint destroyer */
153 struct work_struct acceptor; /* incoming call processor */
154 struct work_struct rejecter; /* packet reject writer */
155 struct list_head services; /* services listening on this endpoint */
156 struct list_head link; /* link in endpoint list */
157 struct rw_semaphore defrag_sem; /* control re-enablement of IP DF bit */
158 struct sk_buff_head accept_queue; /* incoming calls awaiting acceptance */
159 struct sk_buff_head reject_queue; /* packets awaiting rejection */
160 spinlock_t lock; /* access lock */
161 rwlock_t services_lock; /* lock for services list */
162 atomic_t usage;
163 int debug_id; /* debug ID for printks */
164 volatile char error_rcvd; /* T if received ICMP error outstanding */
165 struct sockaddr_rxrpc srx; /* local address */
166};
167
168/*
169 * RxRPC remote transport endpoint definition
170 * - matched by remote port, address and protocol type
171 * - holds the connection ID counter for connections between the two endpoints
172 */
173struct rxrpc_peer {
174 struct work_struct destroyer; /* peer destroyer */
175 struct list_head link; /* link in master peer list */
176 struct list_head error_targets; /* targets for net error distribution */
177 spinlock_t lock; /* access lock */
178 atomic_t usage;
179 unsigned if_mtu; /* interface MTU for this peer */
180 unsigned mtu; /* network MTU for this peer */
181 unsigned maxdata; /* data size (MTU - hdrsize) */
182 unsigned short hdrsize; /* header size (IP + UDP + RxRPC) */
183 int debug_id; /* debug ID for printks */
184 int net_error; /* network error distributed */
185 struct sockaddr_rxrpc srx; /* remote address */
186
187 /* calculated RTT cache */
188#define RXRPC_RTT_CACHE_SIZE 32
189 suseconds_t rtt; /* current RTT estimate (in uS) */
190 unsigned rtt_point; /* next entry at which to insert */
191 unsigned rtt_usage; /* amount of cache actually used */
192 suseconds_t rtt_cache[RXRPC_RTT_CACHE_SIZE]; /* calculated RTT cache */
193};
194
195/*
196 * RxRPC point-to-point transport / connection manager definition
197 * - handles a bundle of connections between two endpoints
198 * - matched by { local, peer }
199 */
200struct rxrpc_transport {
201 struct rxrpc_local *local; /* local transport endpoint */
202 struct rxrpc_peer *peer; /* remote transport endpoint */
203 struct work_struct error_handler; /* network error distributor */
204 struct rb_root bundles; /* client connection bundles on this transport */
205 struct rb_root client_conns; /* client connections on this transport */
206 struct rb_root server_conns; /* server connections on this transport */
207 struct list_head link; /* link in master session list */
208 struct sk_buff_head error_queue; /* error packets awaiting processing */
209 time_t put_time; /* time at which to reap */
210 spinlock_t client_lock; /* client connection allocation lock */
211 rwlock_t conn_lock; /* lock for active/dead connections */
212 atomic_t usage;
213 int debug_id; /* debug ID for printks */
214 unsigned int conn_idcounter; /* connection ID counter (client) */
215};
216
217/*
218 * RxRPC client connection bundle
219 * - matched by { transport, service_id, key }
220 */
221struct rxrpc_conn_bundle {
222 struct rb_node node; /* node in transport's lookup tree */
223 struct list_head unused_conns; /* unused connections in this bundle */
224 struct list_head avail_conns; /* available connections in this bundle */
225 struct list_head busy_conns; /* busy connections in this bundle */
226 struct key *key; /* security for this bundle */
227 wait_queue_head_t chanwait; /* wait for channel to become available */
228 atomic_t usage;
229 int debug_id; /* debug ID for printks */
230 unsigned short num_conns; /* number of connections in this bundle */
231 __be16 service_id; /* service ID */
232 uint8_t security_ix; /* security type */
233};
234
235/*
236 * RxRPC connection definition
237 * - matched by { transport, service_id, conn_id, direction, key }
238 * - each connection can only handle four simultaneous calls
239 */
240struct rxrpc_connection {
241 struct rxrpc_transport *trans; /* transport session */
242 struct rxrpc_conn_bundle *bundle; /* connection bundle (client) */
243 struct work_struct processor; /* connection event processor */
244 struct rb_node node; /* node in transport's lookup tree */
245 struct list_head link; /* link in master connection list */
246 struct list_head bundle_link; /* link in bundle */
247 struct rb_root calls; /* calls on this connection */
248 struct sk_buff_head rx_queue; /* received conn-level packets */
249 struct rxrpc_call *channels[RXRPC_MAXCALLS]; /* channels (active calls) */
250 struct rxrpc_security *security; /* applied security module */
251 struct key *key; /* security for this connection (client) */
252 struct key *server_key; /* security for this service */
253 struct crypto_blkcipher *cipher; /* encryption handle */
254 struct rxrpc_crypt csum_iv; /* packet checksum base */
255 unsigned long events;
256#define RXRPC_CONN_CHALLENGE 0 /* send challenge packet */
257 time_t put_time; /* time at which to reap */
258 rwlock_t lock; /* access lock */
259 spinlock_t state_lock; /* state-change lock */
260 atomic_t usage;
261 u32 real_conn_id; /* connection ID (host-endian) */
262 enum { /* current state of connection */
263 RXRPC_CONN_UNUSED, /* - connection not yet attempted */
264 RXRPC_CONN_CLIENT, /* - client connection */
265 RXRPC_CONN_SERVER_UNSECURED, /* - server unsecured connection */
266 RXRPC_CONN_SERVER_CHALLENGING, /* - server challenging for security */
267 RXRPC_CONN_SERVER, /* - server secured connection */
268 RXRPC_CONN_REMOTELY_ABORTED, /* - conn aborted by peer */
269 RXRPC_CONN_LOCALLY_ABORTED, /* - conn aborted locally */
270 RXRPC_CONN_NETWORK_ERROR, /* - conn terminated by network error */
271 } state;
272 int error; /* error code for local abort */
273 int debug_id; /* debug ID for printks */
274 unsigned call_counter; /* call ID counter */
275 atomic_t serial; /* packet serial number counter */
276 atomic_t hi_serial; /* highest serial number received */
277 u8 avail_calls; /* number of calls available */
278 u8 size_align; /* data size alignment (for security) */
279 u8 header_size; /* rxrpc + security header size */
280 u8 security_size; /* security header size */
281 u32 security_level; /* security level negotiated */
282 u32 security_nonce; /* response re-use preventer */
283
284 /* the following are all in net order */
285 __be32 epoch; /* epoch of this connection */
286 __be32 cid; /* connection ID */
287 __be16 service_id; /* service ID */
288 u8 security_ix; /* security type */
289 u8 in_clientflag; /* RXRPC_CLIENT_INITIATED if we are server */
290 u8 out_clientflag; /* RXRPC_CLIENT_INITIATED if we are client */
291};
292
293/*
294 * RxRPC call definition
295 * - matched by { connection, call_id }
296 */
297struct rxrpc_call {
298 struct rxrpc_connection *conn; /* connection carrying call */
299 struct rxrpc_sock *socket; /* socket responsible */
300 struct timer_list lifetimer; /* lifetime remaining on call */
301 struct timer_list deadspan; /* reap timer for re-ACK'ing, etc */
302 struct timer_list ack_timer; /* ACK generation timer */
303 struct timer_list resend_timer; /* Tx resend timer */
304 struct work_struct destroyer; /* call destroyer */
305 struct work_struct processor; /* packet processor and ACK generator */
306 struct list_head link; /* link in master call list */
307 struct list_head error_link; /* link in error distribution list */
308 struct list_head accept_link; /* calls awaiting acceptance */
309 struct rb_node sock_node; /* node in socket call tree */
310 struct rb_node conn_node; /* node in connection call tree */
311 struct sk_buff_head rx_queue; /* received packets */
312 struct sk_buff_head rx_oos_queue; /* packets received out of sequence */
313 struct sk_buff *tx_pending; /* Tx socket buffer being filled */
314 wait_queue_head_t tx_waitq; /* wait for Tx window space to become available */
315 unsigned long user_call_ID; /* user-defined call ID */
316 unsigned long creation_jif; /* time of call creation */
317 unsigned long flags;
318#define RXRPC_CALL_RELEASED 0 /* call has been released - no more message to userspace */
319#define RXRPC_CALL_TERMINAL_MSG 1 /* call has given the socket its final message */
320#define RXRPC_CALL_RCVD_LAST 2 /* all packets received */
321#define RXRPC_CALL_RUN_RTIMER 3 /* Tx resend timer started */
322#define RXRPC_CALL_TX_SOFT_ACK 4 /* sent some soft ACKs */
323#define RXRPC_CALL_PROC_BUSY 5 /* the processor is busy */
324#define RXRPC_CALL_INIT_ACCEPT 6 /* acceptance was initiated */
325#define RXRPC_CALL_HAS_USERID 7 /* has a user ID attached */
326#define RXRPC_CALL_EXPECT_OOS 8 /* expect out of sequence packets */
327 unsigned long events;
328#define RXRPC_CALL_RCVD_ACKALL 0 /* ACKALL or reply received */
329#define RXRPC_CALL_RCVD_BUSY 1 /* busy packet received */
330#define RXRPC_CALL_RCVD_ABORT 2 /* abort packet received */
331#define RXRPC_CALL_RCVD_ERROR 3 /* network error received */
332#define RXRPC_CALL_ACK_FINAL 4 /* need to generate final ACK (and release call) */
333#define RXRPC_CALL_ACK 5 /* need to generate ACK */
334#define RXRPC_CALL_REJECT_BUSY 6 /* need to generate busy message */
335#define RXRPC_CALL_ABORT 7 /* need to generate abort */
336#define RXRPC_CALL_CONN_ABORT 8 /* local connection abort generated */
337#define RXRPC_CALL_RESEND_TIMER 9 /* Tx resend timer expired */
338#define RXRPC_CALL_RESEND 10 /* Tx resend required */
339#define RXRPC_CALL_DRAIN_RX_OOS 11 /* drain the Rx out of sequence queue */
340#define RXRPC_CALL_LIFE_TIMER 12 /* call's lifetimer ran out */
341#define RXRPC_CALL_ACCEPTED 13 /* incoming call accepted by userspace app */
342#define RXRPC_CALL_SECURED 14 /* incoming call's connection is now secure */
343#define RXRPC_CALL_POST_ACCEPT 15 /* need to post an "accept?" message to the app */
344#define RXRPC_CALL_RELEASE 16 /* need to release the call's resources */
345
346 spinlock_t lock;
347 rwlock_t state_lock; /* lock for state transition */
348 atomic_t usage;
349 atomic_t sequence; /* Tx data packet sequence counter */
350 u32 abort_code; /* local/remote abort code */
351 enum { /* current state of call */
352 RXRPC_CALL_CLIENT_SEND_REQUEST, /* - client sending request phase */
353 RXRPC_CALL_CLIENT_AWAIT_REPLY, /* - client awaiting reply */
354 RXRPC_CALL_CLIENT_RECV_REPLY, /* - client receiving reply phase */
355 RXRPC_CALL_CLIENT_FINAL_ACK, /* - client sending final ACK phase */
356 RXRPC_CALL_SERVER_SECURING, /* - server securing request connection */
357 RXRPC_CALL_SERVER_ACCEPTING, /* - server accepting request */
358 RXRPC_CALL_SERVER_RECV_REQUEST, /* - server receiving request */
359 RXRPC_CALL_SERVER_ACK_REQUEST, /* - server pending ACK of request */
360 RXRPC_CALL_SERVER_SEND_REPLY, /* - server sending reply */
361 RXRPC_CALL_SERVER_AWAIT_ACK, /* - server awaiting final ACK */
362 RXRPC_CALL_COMPLETE, /* - call completed */
363 RXRPC_CALL_SERVER_BUSY, /* - call rejected by busy server */
364 RXRPC_CALL_REMOTELY_ABORTED, /* - call aborted by peer */
365 RXRPC_CALL_LOCALLY_ABORTED, /* - call aborted locally on error or close */
366 RXRPC_CALL_NETWORK_ERROR, /* - call terminated by network error */
367 RXRPC_CALL_DEAD, /* - call is dead */
368 } state;
369 int debug_id; /* debug ID for printks */
370 u8 channel; /* connection channel occupied by this call */
371
372 /* transmission-phase ACK management */
373 uint8_t acks_head; /* offset into window of first entry */
374 uint8_t acks_tail; /* offset into window of last entry */
375 uint8_t acks_winsz; /* size of un-ACK'd window */
376 uint8_t acks_unacked; /* lowest unacked packet in last ACK received */
377 int acks_latest; /* serial number of latest ACK received */
378 rxrpc_seq_t acks_hard; /* highest definitively ACK'd msg seq */
379 unsigned long *acks_window; /* sent packet window
380 * - elements are pointers with LSB set if ACK'd
381 */
382
383 /* receive-phase ACK management */
384 rxrpc_seq_t rx_data_expect; /* next data seq ID expected to be received */
385 rxrpc_seq_t rx_data_post; /* next data seq ID expected to be posted */
386 rxrpc_seq_t rx_data_recv; /* last data seq ID encountered by recvmsg */
387 rxrpc_seq_t rx_data_eaten; /* last data seq ID consumed by recvmsg */
388 rxrpc_seq_t rx_first_oos; /* first packet in rx_oos_queue (or 0) */
389 rxrpc_seq_t ackr_win_top; /* top of ACK window (rx_data_eaten is bottom) */
390 rxrpc_seq_net_t ackr_prev_seq; /* previous sequence number received */
391 uint8_t ackr_reason; /* reason to ACK */
392 __be32 ackr_serial; /* serial of packet being ACK'd */
393 atomic_t ackr_not_idle; /* number of packets in Rx queue */
394
395 /* received packet records, 1 bit per record */
396#define RXRPC_ACKR_WINDOW_ASZ DIV_ROUND_UP(RXRPC_MAXACKS, BITS_PER_LONG)
397 unsigned long ackr_window[RXRPC_ACKR_WINDOW_ASZ + 1];
398
399 /* the following should all be in net order */
400 __be32 cid; /* connection ID + channel index */
401 __be32 call_id; /* call ID on connection */
402};
403
404/*
405 * RxRPC key for Kerberos (type-2 security)
406 */
407struct rxkad_key {
408 u16 security_index; /* RxRPC header security index */
409 u16 ticket_len; /* length of ticket[] */
410 u32 expiry; /* time at which expires */
411 u32 kvno; /* key version number */
412 u8 session_key[8]; /* DES session key */
413 u8 ticket[0]; /* the encrypted ticket */
414};
415
416struct rxrpc_key_payload {
417 struct rxkad_key k;
418};
419
420/*
421 * locally abort an RxRPC call
422 */
423static inline void rxrpc_abort_call(struct rxrpc_call *call, u32 abort_code)
424{
425 write_lock_bh(&call->state_lock);
426 if (call->state < RXRPC_CALL_COMPLETE) {
427 call->abort_code = abort_code;
428 call->state = RXRPC_CALL_LOCALLY_ABORTED;
429 set_bit(RXRPC_CALL_ABORT, &call->events);
430 }
431 write_unlock_bh(&call->state_lock);
432}
433
434/*
435 * af_rxrpc.c
436 */
437extern atomic_t rxrpc_n_skbs;
438extern __be32 rxrpc_epoch;
439extern atomic_t rxrpc_debug_id;
440extern struct workqueue_struct *rxrpc_workqueue;
441
442/*
443 * ar-accept.c
444 */
445extern void rxrpc_accept_incoming_calls(struct work_struct *);
446extern struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *,
447 unsigned long);
448extern int rxrpc_reject_call(struct rxrpc_sock *);
449
450/*
451 * ar-ack.c
452 */
453extern void __rxrpc_propose_ACK(struct rxrpc_call *, uint8_t, __be32, bool);
454extern void rxrpc_propose_ACK(struct rxrpc_call *, uint8_t, __be32, bool);
455extern void rxrpc_process_call(struct work_struct *);
456
457/*
458 * ar-call.c
459 */
460extern struct kmem_cache *rxrpc_call_jar;
461extern struct list_head rxrpc_calls;
462extern rwlock_t rxrpc_call_lock;
463
464extern struct rxrpc_call *rxrpc_get_client_call(struct rxrpc_sock *,
465 struct rxrpc_transport *,
466 struct rxrpc_conn_bundle *,
467 unsigned long, int, gfp_t);
468extern struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *,
469 struct rxrpc_connection *,
470 struct rxrpc_header *, gfp_t);
471extern struct rxrpc_call *rxrpc_find_server_call(struct rxrpc_sock *,
472 unsigned long);
473extern void rxrpc_release_call(struct rxrpc_call *);
474extern void rxrpc_release_calls_on_socket(struct rxrpc_sock *);
475extern void __rxrpc_put_call(struct rxrpc_call *);
476extern void __exit rxrpc_destroy_all_calls(void);
477
478/*
479 * ar-connection.c
480 */
481extern struct list_head rxrpc_connections;
482extern rwlock_t rxrpc_connection_lock;
483
484extern struct rxrpc_conn_bundle *rxrpc_get_bundle(struct rxrpc_sock *,
485 struct rxrpc_transport *,
486 struct key *,
487 __be16, gfp_t);
488extern void rxrpc_put_bundle(struct rxrpc_transport *,
489 struct rxrpc_conn_bundle *);
490extern int rxrpc_connect_call(struct rxrpc_sock *, struct rxrpc_transport *,
491 struct rxrpc_conn_bundle *, struct rxrpc_call *,
492 gfp_t);
493extern void rxrpc_put_connection(struct rxrpc_connection *);
494extern void __exit rxrpc_destroy_all_connections(void);
495extern struct rxrpc_connection *rxrpc_find_connection(struct rxrpc_transport *,
496 struct rxrpc_header *);
497extern struct rxrpc_connection *
498rxrpc_incoming_connection(struct rxrpc_transport *, struct rxrpc_header *,
499 gfp_t);
500
501/*
502 * ar-connevent.c
503 */
504extern void rxrpc_process_connection(struct work_struct *);
505extern void rxrpc_reject_packet(struct rxrpc_local *, struct sk_buff *);
506extern void rxrpc_reject_packets(struct work_struct *);
507
508/*
509 * ar-error.c
510 */
511extern void rxrpc_UDP_error_report(struct sock *);
512extern void rxrpc_UDP_error_handler(struct work_struct *);
513
514/*
515 * ar-input.c
516 */
517extern unsigned long rxrpc_ack_timeout;
518extern const char *rxrpc_pkts[];
519
520extern void rxrpc_data_ready(struct sock *, int);
521extern int rxrpc_queue_rcv_skb(struct rxrpc_call *, struct sk_buff *, bool,
522 bool);
523extern void rxrpc_fast_process_packet(struct rxrpc_call *, struct sk_buff *);
524
525/*
526 * ar-local.c
527 */
528extern rwlock_t rxrpc_local_lock;
529extern struct rxrpc_local *rxrpc_lookup_local(struct sockaddr_rxrpc *);
530extern void rxrpc_put_local(struct rxrpc_local *);
531extern void __exit rxrpc_destroy_all_locals(void);
532
533/*
534 * ar-key.c
535 */
536extern struct key_type key_type_rxrpc;
537extern struct key_type key_type_rxrpc_s;
538
539extern int rxrpc_request_key(struct rxrpc_sock *, char __user *, int);
540extern int rxrpc_server_keyring(struct rxrpc_sock *, char __user *, int);
541extern int rxrpc_get_server_data_key(struct rxrpc_connection *, const void *,
542 time_t, u32);
543
544/*
545 * ar-output.c
546 */
547extern int rxrpc_resend_timeout;
548
549extern int rxrpc_send_packet(struct rxrpc_transport *, struct sk_buff *);
550extern int rxrpc_client_sendmsg(struct kiocb *, struct rxrpc_sock *,
551 struct rxrpc_transport *, struct msghdr *,
552 size_t);
553extern int rxrpc_server_sendmsg(struct kiocb *, struct rxrpc_sock *,
554 struct msghdr *, size_t);
555
556/*
557 * ar-peer.c
558 */
559extern struct rxrpc_peer *rxrpc_get_peer(struct sockaddr_rxrpc *, gfp_t);
560extern void rxrpc_put_peer(struct rxrpc_peer *);
561extern struct rxrpc_peer *rxrpc_find_peer(struct rxrpc_local *,
562 __be32, __be16);
563extern void __exit rxrpc_destroy_all_peers(void);
564
565/*
566 * ar-proc.c
567 */
568extern const char *rxrpc_call_states[];
569extern struct file_operations rxrpc_call_seq_fops;
570extern struct file_operations rxrpc_connection_seq_fops;
571
572/*
573 * ar-recvmsg.c
574 */
575extern void rxrpc_remove_user_ID(struct rxrpc_sock *, struct rxrpc_call *);
576extern int rxrpc_recvmsg(struct kiocb *, struct socket *, struct msghdr *,
577 size_t, int);
578
579/*
580 * ar-security.c
581 */
582extern int rxrpc_register_security(struct rxrpc_security *);
583extern void rxrpc_unregister_security(struct rxrpc_security *);
584extern int rxrpc_init_client_conn_security(struct rxrpc_connection *);
585extern int rxrpc_init_server_conn_security(struct rxrpc_connection *);
586extern int rxrpc_secure_packet(const struct rxrpc_call *, struct sk_buff *,
587 size_t, void *);
588extern int rxrpc_verify_packet(const struct rxrpc_call *, struct sk_buff *,
589 u32 *);
590extern void rxrpc_clear_conn_security(struct rxrpc_connection *);
591
592/*
593 * ar-skbuff.c
594 */
595extern void rxrpc_packet_destructor(struct sk_buff *);
596
597/*
598 * ar-transport.c
599 */
600extern struct rxrpc_transport *rxrpc_get_transport(struct rxrpc_local *,
601 struct rxrpc_peer *,
602 gfp_t);
603extern void rxrpc_put_transport(struct rxrpc_transport *);
604extern void __exit rxrpc_destroy_all_transports(void);
605extern struct rxrpc_transport *rxrpc_find_transport(struct rxrpc_local *,
606 struct rxrpc_peer *);
607
608/*
609 * debug tracing
610 */
611extern unsigned rxrpc_debug;
612
613#define dbgprintk(FMT,...) \
614 printk("[%x%-6.6s] "FMT"\n", smp_processor_id(), current->comm ,##__VA_ARGS__)
615
616/* make sure we maintain the format strings, even when debugging is disabled */
617static inline __attribute__((format(printf,1,2)))
618void _dbprintk(const char *fmt, ...)
619{
620}
621
622#define kenter(FMT,...) dbgprintk("==> %s("FMT")",__FUNCTION__ ,##__VA_ARGS__)
623#define kleave(FMT,...) dbgprintk("<== %s()"FMT"",__FUNCTION__ ,##__VA_ARGS__)
624#define kdebug(FMT,...) dbgprintk(" "FMT ,##__VA_ARGS__)
625#define kproto(FMT,...) dbgprintk("### "FMT ,##__VA_ARGS__)
626#define knet(FMT,...) dbgprintk("@@@ "FMT ,##__VA_ARGS__)
627
628
629#if defined(__KDEBUG)
630#define _enter(FMT,...) kenter(FMT,##__VA_ARGS__)
631#define _leave(FMT,...) kleave(FMT,##__VA_ARGS__)
632#define _debug(FMT,...) kdebug(FMT,##__VA_ARGS__)
633#define _proto(FMT,...) kproto(FMT,##__VA_ARGS__)
634#define _net(FMT,...) knet(FMT,##__VA_ARGS__)
635
636#elif defined(CONFIG_AF_RXRPC_DEBUG)
637#define RXRPC_DEBUG_KENTER 0x01
638#define RXRPC_DEBUG_KLEAVE 0x02
639#define RXRPC_DEBUG_KDEBUG 0x04
640#define RXRPC_DEBUG_KPROTO 0x08
641#define RXRPC_DEBUG_KNET 0x10
642
643#define _enter(FMT,...) \
644do { \
645 if (unlikely(rxrpc_debug & RXRPC_DEBUG_KENTER)) \
646 kenter(FMT,##__VA_ARGS__); \
647} while (0)
648
649#define _leave(FMT,...) \
650do { \
651 if (unlikely(rxrpc_debug & RXRPC_DEBUG_KLEAVE)) \
652 kleave(FMT,##__VA_ARGS__); \
653} while (0)
654
655#define _debug(FMT,...) \
656do { \
657 if (unlikely(rxrpc_debug & RXRPC_DEBUG_KDEBUG)) \
658 kdebug(FMT,##__VA_ARGS__); \
659} while (0)
660
661#define _proto(FMT,...) \
662do { \
663 if (unlikely(rxrpc_debug & RXRPC_DEBUG_KPROTO)) \
664 kproto(FMT,##__VA_ARGS__); \
665} while (0)
666
667#define _net(FMT,...) \
668do { \
669 if (unlikely(rxrpc_debug & RXRPC_DEBUG_KNET)) \
670 knet(FMT,##__VA_ARGS__); \
671} while (0)
672
673#else
674#define _enter(FMT,...) _dbprintk("==> %s("FMT")",__FUNCTION__ ,##__VA_ARGS__)
675#define _leave(FMT,...) _dbprintk("<== %s()"FMT"",__FUNCTION__ ,##__VA_ARGS__)
676#define _debug(FMT,...) _dbprintk(" "FMT ,##__VA_ARGS__)
677#define _proto(FMT,...) _dbprintk("### "FMT ,##__VA_ARGS__)
678#define _net(FMT,...) _dbprintk("@@@ "FMT ,##__VA_ARGS__)
679#endif
680
681/*
682 * debug assertion checking
683 */
684#if 1 // defined(__KDEBUGALL)
685
686#define ASSERT(X) \
687do { \
688 if (unlikely(!(X))) { \
689 printk(KERN_ERR "\n"); \
690 printk(KERN_ERR "RxRPC: Assertion failed\n"); \
691 BUG(); \
692 } \
693} while(0)
694
695#define ASSERTCMP(X, OP, Y) \
696do { \
697 if (unlikely(!((X) OP (Y)))) { \
698 printk(KERN_ERR "\n"); \
699 printk(KERN_ERR "RxRPC: Assertion failed\n"); \
700 printk(KERN_ERR "%lu " #OP " %lu is false\n", \
701 (unsigned long)(X), (unsigned long)(Y)); \
702 printk(KERN_ERR "0x%lx " #OP " 0x%lx is false\n", \
703 (unsigned long)(X), (unsigned long)(Y)); \
704 BUG(); \
705 } \
706} while(0)
707
708#define ASSERTIF(C, X) \
709do { \
710 if (unlikely((C) && !(X))) { \
711 printk(KERN_ERR "\n"); \
712 printk(KERN_ERR "RxRPC: Assertion failed\n"); \
713 BUG(); \
714 } \
715} while(0)
716
717#define ASSERTIFCMP(C, X, OP, Y) \
718do { \
719 if (unlikely((C) && !((X) OP (Y)))) { \
720 printk(KERN_ERR "\n"); \
721 printk(KERN_ERR "RxRPC: Assertion failed\n"); \
722 printk(KERN_ERR "%lu " #OP " %lu is false\n", \
723 (unsigned long)(X), (unsigned long)(Y)); \
724 printk(KERN_ERR "0x%lx " #OP " 0x%lx is false\n", \
725 (unsigned long)(X), (unsigned long)(Y)); \
726 BUG(); \
727 } \
728} while(0)
729
730#else
731
732#define ASSERT(X) \
733do { \
734} while(0)
735
736#define ASSERTCMP(X, OP, Y) \
737do { \
738} while(0)
739
740#define ASSERTIF(C, X) \
741do { \
742} while(0)
743
744#define ASSERTIFCMP(C, X, OP, Y) \
745do { \
746} while(0)
747
748#endif /* __KDEBUGALL */
749
750/*
751 * socket buffer accounting / leak finding
752 */
753static inline void __rxrpc_new_skb(struct sk_buff *skb, const char *fn)
754{
755 //_net("new skb %p %s [%d]", skb, fn, atomic_read(&rxrpc_n_skbs));
756 //atomic_inc(&rxrpc_n_skbs);
757}
758
759#define rxrpc_new_skb(skb) __rxrpc_new_skb((skb), __func__)
760
761static inline void __rxrpc_kill_skb(struct sk_buff *skb, const char *fn)
762{
763 //_net("kill skb %p %s [%d]", skb, fn, atomic_read(&rxrpc_n_skbs));
764 //atomic_dec(&rxrpc_n_skbs);
765}
766
767#define rxrpc_kill_skb(skb) __rxrpc_kill_skb((skb), __func__)
768
769static inline void __rxrpc_free_skb(struct sk_buff *skb, const char *fn)
770{
771 if (skb) {
772 CHECK_SLAB_OKAY(&skb->users);
773 //_net("free skb %p %s [%d]",
774 // skb, fn, atomic_read(&rxrpc_n_skbs));
775 //atomic_dec(&rxrpc_n_skbs);
776 kfree_skb(skb);
777 }
778}
779
780#define rxrpc_free_skb(skb) __rxrpc_free_skb((skb), __func__)
781
782static inline void rxrpc_purge_queue(struct sk_buff_head *list)
783{
784 struct sk_buff *skb;
785 while ((skb = skb_dequeue((list))) != NULL)
786 rxrpc_free_skb(skb);
787}
788
789static inline void __rxrpc_get_local(struct rxrpc_local *local, const char *f)
790{
791 CHECK_SLAB_OKAY(&local->usage);
792 if (atomic_inc_return(&local->usage) == 1)
793 printk("resurrected (%s)\n", f);
794}
795
796#define rxrpc_get_local(LOCAL) __rxrpc_get_local((LOCAL), __func__)
797
798#define rxrpc_get_call(CALL) \
799do { \
800 CHECK_SLAB_OKAY(&(CALL)->usage); \
801 if (atomic_inc_return(&(CALL)->usage) == 1) \
802 BUG(); \
803} while(0)
804
805#define rxrpc_put_call(CALL) \
806do { \
807 __rxrpc_put_call(CALL); \
808} while(0)
diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c
new file mode 100644
index 000000000000..7e049ff6ae60
--- /dev/null
+++ b/net/rxrpc/ar-key.c
@@ -0,0 +1,334 @@
1/* RxRPC key management
2 *
3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 * RxRPC keys should have a description of describing their purpose:
12 * "afs@CAMBRIDGE.REDHAT.COM>
13 */
14
15#include <linux/module.h>
16#include <linux/net.h>
17#include <linux/skbuff.h>
18#include <linux/key.h>
19#include <linux/crypto.h>
20#include <net/sock.h>
21#include <net/af_rxrpc.h>
22#include <keys/rxrpc-type.h>
23#include <keys/user-type.h>
24#include "ar-internal.h"
25
26static int rxrpc_instantiate(struct key *, const void *, size_t);
27static int rxrpc_instantiate_s(struct key *, const void *, size_t);
28static void rxrpc_destroy(struct key *);
29static void rxrpc_destroy_s(struct key *);
30static void rxrpc_describe(const struct key *, struct seq_file *);
31
32/*
33 * rxrpc defined keys take an arbitrary string as the description and an
34 * arbitrary blob of data as the payload
35 */
36struct key_type key_type_rxrpc = {
37 .name = "rxrpc",
38 .instantiate = rxrpc_instantiate,
39 .match = user_match,
40 .destroy = rxrpc_destroy,
41 .describe = rxrpc_describe,
42};
43
44EXPORT_SYMBOL(key_type_rxrpc);
45
46/*
47 * rxrpc server defined keys take "<serviceId>:<securityIndex>" as the
48 * description and an 8-byte decryption key as the payload
49 */
50struct key_type key_type_rxrpc_s = {
51 .name = "rxrpc_s",
52 .instantiate = rxrpc_instantiate_s,
53 .match = user_match,
54 .destroy = rxrpc_destroy_s,
55 .describe = rxrpc_describe,
56};
57
58/*
59 * instantiate an rxrpc defined key
60 * data should be of the form:
61 * OFFSET LEN CONTENT
62 * 0 4 key interface version number
63 * 4 2 security index (type)
64 * 6 2 ticket length
65 * 8 4 key expiry time (time_t)
66 * 12 4 kvno
67 * 16 8 session key
68 * 24 [len] ticket
69 *
70 * if no data is provided, then a no-security key is made
71 */
72static int rxrpc_instantiate(struct key *key, const void *data, size_t datalen)
73{
74 const struct rxkad_key *tsec;
75 struct rxrpc_key_payload *upayload;
76 size_t plen;
77 u32 kver;
78 int ret;
79
80 _enter("{%x},,%zu", key_serial(key), datalen);
81
82 /* handle a no-security key */
83 if (!data && datalen == 0)
84 return 0;
85
86 /* get the key interface version number */
87 ret = -EINVAL;
88 if (datalen <= 4 || !data)
89 goto error;
90 memcpy(&kver, data, sizeof(kver));
91 data += sizeof(kver);
92 datalen -= sizeof(kver);
93
94 _debug("KEY I/F VERSION: %u", kver);
95
96 ret = -EKEYREJECTED;
97 if (kver != 1)
98 goto error;
99
100 /* deal with a version 1 key */
101 ret = -EINVAL;
102 if (datalen < sizeof(*tsec))
103 goto error;
104
105 tsec = data;
106 if (datalen != sizeof(*tsec) + tsec->ticket_len)
107 goto error;
108
109 _debug("SCIX: %u", tsec->security_index);
110 _debug("TLEN: %u", tsec->ticket_len);
111 _debug("EXPY: %x", tsec->expiry);
112 _debug("KVNO: %u", tsec->kvno);
113 _debug("SKEY: %02x%02x%02x%02x%02x%02x%02x%02x",
114 tsec->session_key[0], tsec->session_key[1],
115 tsec->session_key[2], tsec->session_key[3],
116 tsec->session_key[4], tsec->session_key[5],
117 tsec->session_key[6], tsec->session_key[7]);
118 if (tsec->ticket_len >= 8)
119 _debug("TCKT: %02x%02x%02x%02x%02x%02x%02x%02x",
120 tsec->ticket[0], tsec->ticket[1],
121 tsec->ticket[2], tsec->ticket[3],
122 tsec->ticket[4], tsec->ticket[5],
123 tsec->ticket[6], tsec->ticket[7]);
124
125 ret = -EPROTONOSUPPORT;
126 if (tsec->security_index != 2)
127 goto error;
128
129 key->type_data.x[0] = tsec->security_index;
130
131 plen = sizeof(*upayload) + tsec->ticket_len;
132 ret = key_payload_reserve(key, plen);
133 if (ret < 0)
134 goto error;
135
136 ret = -ENOMEM;
137 upayload = kmalloc(plen, GFP_KERNEL);
138 if (!upayload)
139 goto error;
140
141 /* attach the data */
142 memcpy(&upayload->k, tsec, sizeof(*tsec));
143 memcpy(&upayload->k.ticket, (void *)tsec + sizeof(*tsec),
144 tsec->ticket_len);
145 key->payload.data = upayload;
146 key->expiry = tsec->expiry;
147 ret = 0;
148
149error:
150 return ret;
151}
152
153/*
154 * instantiate a server secret key
155 * data should be a pointer to the 8-byte secret key
156 */
157static int rxrpc_instantiate_s(struct key *key, const void *data,
158 size_t datalen)
159{
160 struct crypto_blkcipher *ci;
161
162 _enter("{%x},,%zu", key_serial(key), datalen);
163
164 if (datalen != 8)
165 return -EINVAL;
166
167 memcpy(&key->type_data, data, 8);
168
169 ci = crypto_alloc_blkcipher("pcbc(des)", 0, CRYPTO_ALG_ASYNC);
170 if (IS_ERR(ci)) {
171 _leave(" = %ld", PTR_ERR(ci));
172 return PTR_ERR(ci);
173 }
174
175 if (crypto_blkcipher_setkey(ci, data, 8) < 0)
176 BUG();
177
178 key->payload.data = ci;
179 _leave(" = 0");
180 return 0;
181}
182
183/*
184 * dispose of the data dangling from the corpse of a rxrpc key
185 */
186static void rxrpc_destroy(struct key *key)
187{
188 kfree(key->payload.data);
189}
190
191/*
192 * dispose of the data dangling from the corpse of a rxrpc key
193 */
194static void rxrpc_destroy_s(struct key *key)
195{
196 if (key->payload.data) {
197 crypto_free_blkcipher(key->payload.data);
198 key->payload.data = NULL;
199 }
200}
201
202/*
203 * describe the rxrpc key
204 */
205static void rxrpc_describe(const struct key *key, struct seq_file *m)
206{
207 seq_puts(m, key->description);
208}
209
210/*
211 * grab the security key for a socket
212 */
213int rxrpc_request_key(struct rxrpc_sock *rx, char __user *optval, int optlen)
214{
215 struct key *key;
216 char *description;
217
218 _enter("");
219
220 if (optlen <= 0 || optlen > PAGE_SIZE - 1)
221 return -EINVAL;
222
223 description = kmalloc(optlen + 1, GFP_KERNEL);
224 if (!description)
225 return -ENOMEM;
226
227 if (copy_from_user(description, optval, optlen)) {
228 kfree(description);
229 return -EFAULT;
230 }
231 description[optlen] = 0;
232
233 key = request_key(&key_type_rxrpc, description, NULL);
234 if (IS_ERR(key)) {
235 kfree(description);
236 _leave(" = %ld", PTR_ERR(key));
237 return PTR_ERR(key);
238 }
239
240 rx->key = key;
241 kfree(description);
242 _leave(" = 0 [key %x]", key->serial);
243 return 0;
244}
245
246/*
247 * grab the security keyring for a server socket
248 */
249int rxrpc_server_keyring(struct rxrpc_sock *rx, char __user *optval,
250 int optlen)
251{
252 struct key *key;
253 char *description;
254
255 _enter("");
256
257 if (optlen <= 0 || optlen > PAGE_SIZE - 1)
258 return -EINVAL;
259
260 description = kmalloc(optlen + 1, GFP_KERNEL);
261 if (!description)
262 return -ENOMEM;
263
264 if (copy_from_user(description, optval, optlen)) {
265 kfree(description);
266 return -EFAULT;
267 }
268 description[optlen] = 0;
269
270 key = request_key(&key_type_keyring, description, NULL);
271 if (IS_ERR(key)) {
272 kfree(description);
273 _leave(" = %ld", PTR_ERR(key));
274 return PTR_ERR(key);
275 }
276
277 rx->securities = key;
278 kfree(description);
279 _leave(" = 0 [key %x]", key->serial);
280 return 0;
281}
282
283/*
284 * generate a server data key
285 */
286int rxrpc_get_server_data_key(struct rxrpc_connection *conn,
287 const void *session_key,
288 time_t expiry,
289 u32 kvno)
290{
291 struct key *key;
292 int ret;
293
294 struct {
295 u32 kver;
296 struct rxkad_key tsec;
297 } data;
298
299 _enter("");
300
301 key = key_alloc(&key_type_rxrpc, "x", 0, 0, current, 0,
302 KEY_ALLOC_NOT_IN_QUOTA);
303 if (IS_ERR(key)) {
304 _leave(" = -ENOMEM [alloc %ld]", PTR_ERR(key));
305 return -ENOMEM;
306 }
307
308 _debug("key %d", key_serial(key));
309
310 data.kver = 1;
311 data.tsec.security_index = 2;
312 data.tsec.ticket_len = 0;
313 data.tsec.expiry = expiry;
314 data.tsec.kvno = 0;
315
316 memcpy(&data.tsec.session_key, session_key,
317 sizeof(data.tsec.session_key));
318
319 ret = key_instantiate_and_link(key, &data, sizeof(data), NULL, NULL);
320 if (ret < 0)
321 goto error;
322
323 conn->key = key;
324 _leave(" = 0 [%d]", key_serial(key));
325 return 0;
326
327error:
328 key_revoke(key);
329 key_put(key);
330 _leave(" = -ENOMEM [ins %d]", ret);
331 return -ENOMEM;
332}
333
334EXPORT_SYMBOL(rxrpc_get_server_data_key);
diff --git a/net/rxrpc/ar-local.c b/net/rxrpc/ar-local.c
new file mode 100644
index 000000000000..fe03f71f17da
--- /dev/null
+++ b/net/rxrpc/ar-local.c
@@ -0,0 +1,309 @@
1/* AF_RXRPC local endpoint management
2 *
3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/module.h>
13#include <linux/net.h>
14#include <linux/skbuff.h>
15#include <net/sock.h>
16#include <net/af_rxrpc.h>
17#include "ar-internal.h"
18
19static LIST_HEAD(rxrpc_locals);
20DEFINE_RWLOCK(rxrpc_local_lock);
21static DECLARE_RWSEM(rxrpc_local_sem);
22static DECLARE_WAIT_QUEUE_HEAD(rxrpc_local_wq);
23
24static void rxrpc_destroy_local(struct work_struct *work);
25
26/*
27 * allocate a new local
28 */
29static
30struct rxrpc_local *rxrpc_alloc_local(struct sockaddr_rxrpc *srx)
31{
32 struct rxrpc_local *local;
33
34 local = kzalloc(sizeof(struct rxrpc_local), GFP_KERNEL);
35 if (local) {
36 INIT_WORK(&local->destroyer, &rxrpc_destroy_local);
37 INIT_WORK(&local->acceptor, &rxrpc_accept_incoming_calls);
38 INIT_WORK(&local->rejecter, &rxrpc_reject_packets);
39 INIT_LIST_HEAD(&local->services);
40 INIT_LIST_HEAD(&local->link);
41 init_rwsem(&local->defrag_sem);
42 skb_queue_head_init(&local->accept_queue);
43 skb_queue_head_init(&local->reject_queue);
44 spin_lock_init(&local->lock);
45 rwlock_init(&local->services_lock);
46 atomic_set(&local->usage, 1);
47 local->debug_id = atomic_inc_return(&rxrpc_debug_id);
48 memcpy(&local->srx, srx, sizeof(*srx));
49 }
50
51 _leave(" = %p", local);
52 return local;
53}
54
55/*
56 * create the local socket
57 * - must be called with rxrpc_local_sem writelocked
58 */
59static int rxrpc_create_local(struct rxrpc_local *local)
60{
61 struct sock *sock;
62 int ret, opt;
63
64 _enter("%p{%d}", local, local->srx.transport_type);
65
66 /* create a socket to represent the local endpoint */
67 ret = sock_create_kern(PF_INET, local->srx.transport_type, IPPROTO_UDP,
68 &local->socket);
69 if (ret < 0) {
70 _leave(" = %d [socket]", ret);
71 return ret;
72 }
73
74 /* if a local address was supplied then bind it */
75 if (local->srx.transport_len > sizeof(sa_family_t)) {
76 _debug("bind");
77 ret = kernel_bind(local->socket,
78 (struct sockaddr *) &local->srx.transport,
79 local->srx.transport_len);
80 if (ret < 0) {
81 _debug("bind failed");
82 goto error;
83 }
84 }
85
86 /* we want to receive ICMP errors */
87 opt = 1;
88 ret = kernel_setsockopt(local->socket, SOL_IP, IP_RECVERR,
89 (char *) &opt, sizeof(opt));
90 if (ret < 0) {
91 _debug("setsockopt failed");
92 goto error;
93 }
94
95 /* we want to set the don't fragment bit */
96 opt = IP_PMTUDISC_DO;
97 ret = kernel_setsockopt(local->socket, SOL_IP, IP_MTU_DISCOVER,
98 (char *) &opt, sizeof(opt));
99 if (ret < 0) {
100 _debug("setsockopt failed");
101 goto error;
102 }
103
104 write_lock_bh(&rxrpc_local_lock);
105 list_add(&local->link, &rxrpc_locals);
106 write_unlock_bh(&rxrpc_local_lock);
107
108 /* set the socket up */
109 sock = local->socket->sk;
110 sock->sk_user_data = local;
111 sock->sk_data_ready = rxrpc_data_ready;
112 sock->sk_error_report = rxrpc_UDP_error_report;
113 _leave(" = 0");
114 return 0;
115
116error:
117 local->socket->ops->shutdown(local->socket, 2);
118 local->socket->sk->sk_user_data = NULL;
119 sock_release(local->socket);
120 local->socket = NULL;
121
122 _leave(" = %d", ret);
123 return ret;
124}
125
126/*
127 * create a new local endpoint using the specified UDP address
128 */
129struct rxrpc_local *rxrpc_lookup_local(struct sockaddr_rxrpc *srx)
130{
131 struct rxrpc_local *local;
132 int ret;
133
134 _enter("{%d,%u,%u.%u.%u.%u+%hu}",
135 srx->transport_type,
136 srx->transport.family,
137 NIPQUAD(srx->transport.sin.sin_addr),
138 ntohs(srx->transport.sin.sin_port));
139
140 down_write(&rxrpc_local_sem);
141
142 /* see if we have a suitable local local endpoint already */
143 read_lock_bh(&rxrpc_local_lock);
144
145 list_for_each_entry(local, &rxrpc_locals, link) {
146 _debug("CMP {%d,%u,%u.%u.%u.%u+%hu}",
147 local->srx.transport_type,
148 local->srx.transport.family,
149 NIPQUAD(local->srx.transport.sin.sin_addr),
150 ntohs(local->srx.transport.sin.sin_port));
151
152 if (local->srx.transport_type != srx->transport_type ||
153 local->srx.transport.family != srx->transport.family)
154 continue;
155
156 switch (srx->transport.family) {
157 case AF_INET:
158 if (local->srx.transport.sin.sin_port !=
159 srx->transport.sin.sin_port)
160 continue;
161 if (memcmp(&local->srx.transport.sin.sin_addr,
162 &srx->transport.sin.sin_addr,
163 sizeof(struct in_addr)) != 0)
164 continue;
165 goto found_local;
166
167 default:
168 BUG();
169 }
170 }
171
172 read_unlock_bh(&rxrpc_local_lock);
173
174 /* we didn't find one, so we need to create one */
175 local = rxrpc_alloc_local(srx);
176 if (!local) {
177 up_write(&rxrpc_local_sem);
178 return ERR_PTR(-ENOMEM);
179 }
180
181 ret = rxrpc_create_local(local);
182 if (ret < 0) {
183 up_write(&rxrpc_local_sem);
184 kfree(local);
185 _leave(" = %d", ret);
186 return ERR_PTR(ret);
187 }
188
189 up_write(&rxrpc_local_sem);
190
191 _net("LOCAL new %d {%d,%u,%u.%u.%u.%u+%hu}",
192 local->debug_id,
193 local->srx.transport_type,
194 local->srx.transport.family,
195 NIPQUAD(local->srx.transport.sin.sin_addr),
196 ntohs(local->srx.transport.sin.sin_port));
197
198 _leave(" = %p [new]", local);
199 return local;
200
201found_local:
202 rxrpc_get_local(local);
203 read_unlock_bh(&rxrpc_local_lock);
204 up_write(&rxrpc_local_sem);
205
206 _net("LOCAL old %d {%d,%u,%u.%u.%u.%u+%hu}",
207 local->debug_id,
208 local->srx.transport_type,
209 local->srx.transport.family,
210 NIPQUAD(local->srx.transport.sin.sin_addr),
211 ntohs(local->srx.transport.sin.sin_port));
212
213 _leave(" = %p [reuse]", local);
214 return local;
215}
216
217/*
218 * release a local endpoint
219 */
220void rxrpc_put_local(struct rxrpc_local *local)
221{
222 _enter("%p{u=%d}", local, atomic_read(&local->usage));
223
224 ASSERTCMP(atomic_read(&local->usage), >, 0);
225
226 /* to prevent a race, the decrement and the dequeue must be effectively
227 * atomic */
228 write_lock_bh(&rxrpc_local_lock);
229 if (unlikely(atomic_dec_and_test(&local->usage))) {
230 _debug("destroy local");
231 rxrpc_queue_work(&local->destroyer);
232 }
233 write_unlock_bh(&rxrpc_local_lock);
234 _leave("");
235}
236
237/*
238 * destroy a local endpoint
239 */
240static void rxrpc_destroy_local(struct work_struct *work)
241{
242 struct rxrpc_local *local =
243 container_of(work, struct rxrpc_local, destroyer);
244
245 _enter("%p{%d}", local, atomic_read(&local->usage));
246
247 down_write(&rxrpc_local_sem);
248
249 write_lock_bh(&rxrpc_local_lock);
250 if (atomic_read(&local->usage) > 0) {
251 write_unlock_bh(&rxrpc_local_lock);
252 up_read(&rxrpc_local_sem);
253 _leave(" [resurrected]");
254 return;
255 }
256
257 list_del(&local->link);
258 local->socket->sk->sk_user_data = NULL;
259 write_unlock_bh(&rxrpc_local_lock);
260
261 downgrade_write(&rxrpc_local_sem);
262
263 ASSERT(list_empty(&local->services));
264 ASSERT(!work_pending(&local->acceptor));
265 ASSERT(!work_pending(&local->rejecter));
266
267 /* finish cleaning up the local descriptor */
268 rxrpc_purge_queue(&local->accept_queue);
269 rxrpc_purge_queue(&local->reject_queue);
270 local->socket->ops->shutdown(local->socket, 2);
271 sock_release(local->socket);
272
273 up_read(&rxrpc_local_sem);
274
275 _net("DESTROY LOCAL %d", local->debug_id);
276 kfree(local);
277
278 if (list_empty(&rxrpc_locals))
279 wake_up_all(&rxrpc_local_wq);
280
281 _leave("");
282}
283
284/*
285 * preemptively destroy all local local endpoint rather than waiting for
286 * them to be destroyed
287 */
288void __exit rxrpc_destroy_all_locals(void)
289{
290 DECLARE_WAITQUEUE(myself,current);
291
292 _enter("");
293
294 /* we simply have to wait for them to go away */
295 if (!list_empty(&rxrpc_locals)) {
296 set_current_state(TASK_UNINTERRUPTIBLE);
297 add_wait_queue(&rxrpc_local_wq, &myself);
298
299 while (!list_empty(&rxrpc_locals)) {
300 schedule();
301 set_current_state(TASK_UNINTERRUPTIBLE);
302 }
303
304 remove_wait_queue(&rxrpc_local_wq, &myself);
305 set_current_state(TASK_RUNNING);
306 }
307
308 _leave("");
309}
diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/ar-output.c
new file mode 100644
index 000000000000..5cdde4a48ed1
--- /dev/null
+++ b/net/rxrpc/ar-output.c
@@ -0,0 +1,734 @@
1/* RxRPC packet transmission
2 *
3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/net.h>
13#include <linux/skbuff.h>
14#include <linux/circ_buf.h>
15#include <net/sock.h>
16#include <net/af_rxrpc.h>
17#include "ar-internal.h"
18
19int rxrpc_resend_timeout = 4;
20
21static int rxrpc_send_data(struct kiocb *iocb,
22 struct rxrpc_sock *rx,
23 struct rxrpc_call *call,
24 struct msghdr *msg, size_t len);
25
26/*
27 * extract control messages from the sendmsg() control buffer
28 */
29static int rxrpc_sendmsg_cmsg(struct rxrpc_sock *rx, struct msghdr *msg,
30 unsigned long *user_call_ID,
31 enum rxrpc_command *command,
32 u32 *abort_code,
33 bool server)
34{
35 struct cmsghdr *cmsg;
36 int len;
37
38 *command = RXRPC_CMD_SEND_DATA;
39
40 if (msg->msg_controllen == 0)
41 return -EINVAL;
42
43 for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
44 if (!CMSG_OK(msg, cmsg))
45 return -EINVAL;
46
47 len = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr));
48 _debug("CMSG %d, %d, %d",
49 cmsg->cmsg_level, cmsg->cmsg_type, len);
50
51 if (cmsg->cmsg_level != SOL_RXRPC)
52 continue;
53
54 switch (cmsg->cmsg_type) {
55 case RXRPC_USER_CALL_ID:
56 if (msg->msg_flags & MSG_CMSG_COMPAT) {
57 if (len != sizeof(u32))
58 return -EINVAL;
59 *user_call_ID = *(u32 *) CMSG_DATA(cmsg);
60 } else {
61 if (len != sizeof(unsigned long))
62 return -EINVAL;
63 *user_call_ID = *(unsigned long *)
64 CMSG_DATA(cmsg);
65 }
66 _debug("User Call ID %lx", *user_call_ID);
67 break;
68
69 case RXRPC_ABORT:
70 if (*command != RXRPC_CMD_SEND_DATA)
71 return -EINVAL;
72 *command = RXRPC_CMD_SEND_ABORT;
73 if (len != sizeof(*abort_code))
74 return -EINVAL;
75 *abort_code = *(unsigned int *) CMSG_DATA(cmsg);
76 _debug("Abort %x", *abort_code);
77 if (*abort_code == 0)
78 return -EINVAL;
79 break;
80
81 case RXRPC_ACCEPT:
82 if (*command != RXRPC_CMD_SEND_DATA)
83 return -EINVAL;
84 *command = RXRPC_CMD_ACCEPT;
85 if (len != 0)
86 return -EINVAL;
87 if (!server)
88 return -EISCONN;
89 break;
90
91 default:
92 return -EINVAL;
93 }
94 }
95
96 _leave(" = 0");
97 return 0;
98}
99
100/*
101 * abort a call, sending an ABORT packet to the peer
102 */
103static void rxrpc_send_abort(struct rxrpc_call *call, u32 abort_code)
104{
105 write_lock_bh(&call->state_lock);
106
107 if (call->state <= RXRPC_CALL_COMPLETE) {
108 call->state = RXRPC_CALL_LOCALLY_ABORTED;
109 call->abort_code = abort_code;
110 set_bit(RXRPC_CALL_ABORT, &call->events);
111 del_timer_sync(&call->resend_timer);
112 del_timer_sync(&call->ack_timer);
113 clear_bit(RXRPC_CALL_RESEND_TIMER, &call->events);
114 clear_bit(RXRPC_CALL_ACK, &call->events);
115 clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
116 rxrpc_queue_call(call);
117 }
118
119 write_unlock_bh(&call->state_lock);
120}
121
122/*
123 * send a message forming part of a client call through an RxRPC socket
124 * - caller holds the socket locked
125 * - the socket may be either a client socket or a server socket
126 */
127int rxrpc_client_sendmsg(struct kiocb *iocb, struct rxrpc_sock *rx,
128 struct rxrpc_transport *trans, struct msghdr *msg,
129 size_t len)
130{
131 struct rxrpc_conn_bundle *bundle;
132 enum rxrpc_command cmd;
133 struct rxrpc_call *call;
134 unsigned long user_call_ID = 0;
135 struct key *key;
136 __be16 service_id;
137 u32 abort_code = 0;
138 int ret;
139
140 _enter("");
141
142 ASSERT(trans != NULL);
143
144 ret = rxrpc_sendmsg_cmsg(rx, msg, &user_call_ID, &cmd, &abort_code,
145 false);
146 if (ret < 0)
147 return ret;
148
149 bundle = NULL;
150 if (trans) {
151 service_id = rx->service_id;
152 if (msg->msg_name) {
153 struct sockaddr_rxrpc *srx =
154 (struct sockaddr_rxrpc *) msg->msg_name;
155 service_id = htons(srx->srx_service);
156 }
157 key = rx->key;
158 if (key && !rx->key->payload.data)
159 key = NULL;
160 bundle = rxrpc_get_bundle(rx, trans, key, service_id,
161 GFP_KERNEL);
162 if (IS_ERR(bundle))
163 return PTR_ERR(bundle);
164 }
165
166 call = rxrpc_get_client_call(rx, trans, bundle, user_call_ID,
167 abort_code == 0, GFP_KERNEL);
168 if (trans)
169 rxrpc_put_bundle(trans, bundle);
170 if (IS_ERR(call)) {
171 _leave(" = %ld", PTR_ERR(call));
172 return PTR_ERR(call);
173 }
174
175 _debug("CALL %d USR %lx ST %d on CONN %p",
176 call->debug_id, call->user_call_ID, call->state, call->conn);
177
178 if (call->state >= RXRPC_CALL_COMPLETE) {
179 /* it's too late for this call */
180 ret = -ESHUTDOWN;
181 } else if (cmd == RXRPC_CMD_SEND_ABORT) {
182 rxrpc_send_abort(call, abort_code);
183 } else if (cmd != RXRPC_CMD_SEND_DATA) {
184 ret = -EINVAL;
185 } else if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST) {
186 /* request phase complete for this client call */
187 ret = -EPROTO;
188 } else {
189 ret = rxrpc_send_data(iocb, rx, call, msg, len);
190 }
191
192 rxrpc_put_call(call);
193 _leave(" = %d", ret);
194 return ret;
195}
196
197/**
198 * rxrpc_kernel_send_data - Allow a kernel service to send data on a call
199 * @call: The call to send data through
200 * @msg: The data to send
201 * @len: The amount of data to send
202 *
203 * Allow a kernel service to send data on a call. The call must be in an state
204 * appropriate to sending data. No control data should be supplied in @msg,
205 * nor should an address be supplied. MSG_MORE should be flagged if there's
206 * more data to come, otherwise this data will end the transmission phase.
207 */
208int rxrpc_kernel_send_data(struct rxrpc_call *call, struct msghdr *msg,
209 size_t len)
210{
211 int ret;
212
213 _enter("{%d,%s},", call->debug_id, rxrpc_call_states[call->state]);
214
215 ASSERTCMP(msg->msg_name, ==, NULL);
216 ASSERTCMP(msg->msg_control, ==, NULL);
217
218 lock_sock(&call->socket->sk);
219
220 _debug("CALL %d USR %lx ST %d on CONN %p",
221 call->debug_id, call->user_call_ID, call->state, call->conn);
222
223 if (call->state >= RXRPC_CALL_COMPLETE) {
224 ret = -ESHUTDOWN; /* it's too late for this call */
225 } else if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST &&
226 call->state != RXRPC_CALL_SERVER_ACK_REQUEST &&
227 call->state != RXRPC_CALL_SERVER_SEND_REPLY) {
228 ret = -EPROTO; /* request phase complete for this client call */
229 } else {
230 mm_segment_t oldfs = get_fs();
231 set_fs(KERNEL_DS);
232 ret = rxrpc_send_data(NULL, call->socket, call, msg, len);
233 set_fs(oldfs);
234 }
235
236 release_sock(&call->socket->sk);
237 _leave(" = %d", ret);
238 return ret;
239}
240
241EXPORT_SYMBOL(rxrpc_kernel_send_data);
242
243/*
244 * rxrpc_kernel_abort_call - Allow a kernel service to abort a call
245 * @call: The call to be aborted
246 * @abort_code: The abort code to stick into the ABORT packet
247 *
248 * Allow a kernel service to abort a call, if it's still in an abortable state.
249 */
250void rxrpc_kernel_abort_call(struct rxrpc_call *call, u32 abort_code)
251{
252 _enter("{%d},%d", call->debug_id, abort_code);
253
254 lock_sock(&call->socket->sk);
255
256 _debug("CALL %d USR %lx ST %d on CONN %p",
257 call->debug_id, call->user_call_ID, call->state, call->conn);
258
259 if (call->state < RXRPC_CALL_COMPLETE)
260 rxrpc_send_abort(call, abort_code);
261
262 release_sock(&call->socket->sk);
263 _leave("");
264}
265
266EXPORT_SYMBOL(rxrpc_kernel_abort_call);
267
268/*
269 * send a message through a server socket
270 * - caller holds the socket locked
271 */
272int rxrpc_server_sendmsg(struct kiocb *iocb, struct rxrpc_sock *rx,
273 struct msghdr *msg, size_t len)
274{
275 enum rxrpc_command cmd;
276 struct rxrpc_call *call;
277 unsigned long user_call_ID = 0;
278 u32 abort_code = 0;
279 int ret;
280
281 _enter("");
282
283 ret = rxrpc_sendmsg_cmsg(rx, msg, &user_call_ID, &cmd, &abort_code,
284 true);
285 if (ret < 0)
286 return ret;
287
288 if (cmd == RXRPC_CMD_ACCEPT) {
289 call = rxrpc_accept_call(rx, user_call_ID);
290 if (IS_ERR(call))
291 return PTR_ERR(call);
292 rxrpc_put_call(call);
293 return 0;
294 }
295
296 call = rxrpc_find_server_call(rx, user_call_ID);
297 if (!call)
298 return -EBADSLT;
299 if (call->state >= RXRPC_CALL_COMPLETE) {
300 ret = -ESHUTDOWN;
301 goto out;
302 }
303
304 switch (cmd) {
305 case RXRPC_CMD_SEND_DATA:
306 if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST &&
307 call->state != RXRPC_CALL_SERVER_ACK_REQUEST &&
308 call->state != RXRPC_CALL_SERVER_SEND_REPLY) {
309 /* Tx phase not yet begun for this call */
310 ret = -EPROTO;
311 break;
312 }
313
314 ret = rxrpc_send_data(iocb, rx, call, msg, len);
315 break;
316
317 case RXRPC_CMD_SEND_ABORT:
318 rxrpc_send_abort(call, abort_code);
319 break;
320 default:
321 BUG();
322 }
323
324 out:
325 rxrpc_put_call(call);
326 _leave(" = %d", ret);
327 return ret;
328}
329
330/*
331 * send a packet through the transport endpoint
332 */
333int rxrpc_send_packet(struct rxrpc_transport *trans, struct sk_buff *skb)
334{
335 struct kvec iov[1];
336 struct msghdr msg;
337 int ret, opt;
338
339 _enter(",{%d}", skb->len);
340
341 iov[0].iov_base = skb->head;
342 iov[0].iov_len = skb->len;
343
344 msg.msg_name = &trans->peer->srx.transport.sin;
345 msg.msg_namelen = sizeof(trans->peer->srx.transport.sin);
346 msg.msg_control = NULL;
347 msg.msg_controllen = 0;
348 msg.msg_flags = 0;
349
350 /* send the packet with the don't fragment bit set if we currently
351 * think it's small enough */
352 if (skb->len - sizeof(struct rxrpc_header) < trans->peer->maxdata) {
353 down_read(&trans->local->defrag_sem);
354 /* send the packet by UDP
355 * - returns -EMSGSIZE if UDP would have to fragment the packet
356 * to go out of the interface
357 * - in which case, we'll have processed the ICMP error
358 * message and update the peer record
359 */
360 ret = kernel_sendmsg(trans->local->socket, &msg, iov, 1,
361 iov[0].iov_len);
362
363 up_read(&trans->local->defrag_sem);
364 if (ret == -EMSGSIZE)
365 goto send_fragmentable;
366
367 _leave(" = %d [%u]", ret, trans->peer->maxdata);
368 return ret;
369 }
370
371send_fragmentable:
372 /* attempt to send this message with fragmentation enabled */
373 _debug("send fragment");
374
375 down_write(&trans->local->defrag_sem);
376 opt = IP_PMTUDISC_DONT;
377 ret = kernel_setsockopt(trans->local->socket, SOL_IP, IP_MTU_DISCOVER,
378 (char *) &opt, sizeof(opt));
379 if (ret == 0) {
380 ret = kernel_sendmsg(trans->local->socket, &msg, iov, 1,
381 iov[0].iov_len);
382
383 opt = IP_PMTUDISC_DO;
384 kernel_setsockopt(trans->local->socket, SOL_IP,
385 IP_MTU_DISCOVER, (char *) &opt, sizeof(opt));
386 }
387
388 up_write(&trans->local->defrag_sem);
389 _leave(" = %d [frag %u]", ret, trans->peer->maxdata);
390 return ret;
391}
392
393/*
394 * wait for space to appear in the transmit/ACK window
395 * - caller holds the socket locked
396 */
397static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx,
398 struct rxrpc_call *call,
399 long *timeo)
400{
401 DECLARE_WAITQUEUE(myself, current);
402 int ret;
403
404 _enter(",{%d},%ld",
405 CIRC_SPACE(call->acks_head, call->acks_tail, call->acks_winsz),
406 *timeo);
407
408 add_wait_queue(&call->tx_waitq, &myself);
409
410 for (;;) {
411 set_current_state(TASK_INTERRUPTIBLE);
412 ret = 0;
413 if (CIRC_SPACE(call->acks_head, call->acks_tail,
414 call->acks_winsz) > 0)
415 break;
416 if (signal_pending(current)) {
417 ret = sock_intr_errno(*timeo);
418 break;
419 }
420
421 release_sock(&rx->sk);
422 *timeo = schedule_timeout(*timeo);
423 lock_sock(&rx->sk);
424 }
425
426 remove_wait_queue(&call->tx_waitq, &myself);
427 set_current_state(TASK_RUNNING);
428 _leave(" = %d", ret);
429 return ret;
430}
431
432/*
433 * attempt to schedule an instant Tx resend
434 */
435static inline void rxrpc_instant_resend(struct rxrpc_call *call)
436{
437 read_lock_bh(&call->state_lock);
438 if (try_to_del_timer_sync(&call->resend_timer) >= 0) {
439 clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
440 if (call->state < RXRPC_CALL_COMPLETE &&
441 !test_and_set_bit(RXRPC_CALL_RESEND_TIMER, &call->events))
442 rxrpc_queue_call(call);
443 }
444 read_unlock_bh(&call->state_lock);
445}
446
447/*
448 * queue a packet for transmission, set the resend timer and attempt
449 * to send the packet immediately
450 */
451static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb,
452 bool last)
453{
454 struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
455 int ret;
456
457 _net("queue skb %p [%d]", skb, call->acks_head);
458
459 ASSERT(call->acks_window != NULL);
460 call->acks_window[call->acks_head] = (unsigned long) skb;
461 smp_wmb();
462 call->acks_head = (call->acks_head + 1) & (call->acks_winsz - 1);
463
464 if (last || call->state == RXRPC_CALL_SERVER_ACK_REQUEST) {
465 _debug("________awaiting reply/ACK__________");
466 write_lock_bh(&call->state_lock);
467 switch (call->state) {
468 case RXRPC_CALL_CLIENT_SEND_REQUEST:
469 call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY;
470 break;
471 case RXRPC_CALL_SERVER_ACK_REQUEST:
472 call->state = RXRPC_CALL_SERVER_SEND_REPLY;
473 if (!last)
474 break;
475 case RXRPC_CALL_SERVER_SEND_REPLY:
476 call->state = RXRPC_CALL_SERVER_AWAIT_ACK;
477 break;
478 default:
479 break;
480 }
481 write_unlock_bh(&call->state_lock);
482 }
483
484 _proto("Tx DATA %%%u { #%u }",
485 ntohl(sp->hdr.serial), ntohl(sp->hdr.seq));
486
487 sp->need_resend = 0;
488 sp->resend_at = jiffies + rxrpc_resend_timeout * HZ;
489 if (!test_and_set_bit(RXRPC_CALL_RUN_RTIMER, &call->flags)) {
490 _debug("run timer");
491 call->resend_timer.expires = sp->resend_at;
492 add_timer(&call->resend_timer);
493 }
494
495 /* attempt to cancel the rx-ACK timer, deferring reply transmission if
496 * we're ACK'ing the request phase of an incoming call */
497 ret = -EAGAIN;
498 if (try_to_del_timer_sync(&call->ack_timer) >= 0) {
499 /* the packet may be freed by rxrpc_process_call() before this
500 * returns */
501 ret = rxrpc_send_packet(call->conn->trans, skb);
502 _net("sent skb %p", skb);
503 } else {
504 _debug("failed to delete ACK timer");
505 }
506
507 if (ret < 0) {
508 _debug("need instant resend %d", ret);
509 sp->need_resend = 1;
510 rxrpc_instant_resend(call);
511 }
512
513 _leave("");
514}
515
516/*
517 * send data through a socket
518 * - must be called in process context
519 * - caller holds the socket locked
520 */
521static int rxrpc_send_data(struct kiocb *iocb,
522 struct rxrpc_sock *rx,
523 struct rxrpc_call *call,
524 struct msghdr *msg, size_t len)
525{
526 struct rxrpc_skb_priv *sp;
527 unsigned char __user *from;
528 struct sk_buff *skb;
529 struct iovec *iov;
530 struct sock *sk = &rx->sk;
531 long timeo;
532 bool more;
533 int ret, ioc, segment, copied;
534
535 _enter(",,,{%zu},%zu", msg->msg_iovlen, len);
536
537 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
538
539 /* this should be in poll */
540 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
541
542 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
543 return -EPIPE;
544
545 iov = msg->msg_iov;
546 ioc = msg->msg_iovlen - 1;
547 from = iov->iov_base;
548 segment = iov->iov_len;
549 iov++;
550 more = msg->msg_flags & MSG_MORE;
551
552 skb = call->tx_pending;
553 call->tx_pending = NULL;
554
555 copied = 0;
556 do {
557 int copy;
558
559 if (segment > len)
560 segment = len;
561
562 _debug("SEGMENT %d @%p", segment, from);
563
564 if (!skb) {
565 size_t size, chunk, max, space;
566
567 _debug("alloc");
568
569 if (CIRC_SPACE(call->acks_head, call->acks_tail,
570 call->acks_winsz) <= 0) {
571 ret = -EAGAIN;
572 if (msg->msg_flags & MSG_DONTWAIT)
573 goto maybe_error;
574 ret = rxrpc_wait_for_tx_window(rx, call,
575 &timeo);
576 if (ret < 0)
577 goto maybe_error;
578 }
579
580 max = call->conn->trans->peer->maxdata;
581 max -= call->conn->security_size;
582 max &= ~(call->conn->size_align - 1UL);
583
584 chunk = max;
585 if (chunk > len)
586 chunk = len;
587
588 space = chunk + call->conn->size_align;
589 space &= ~(call->conn->size_align - 1UL);
590
591 size = space + call->conn->header_size;
592
593 _debug("SIZE: %zu/%zu/%zu", chunk, space, size);
594
595 /* create a buffer that we can retain until it's ACK'd */
596 skb = sock_alloc_send_skb(
597 sk, size, msg->msg_flags & MSG_DONTWAIT, &ret);
598 if (!skb)
599 goto maybe_error;
600
601 rxrpc_new_skb(skb);
602
603 _debug("ALLOC SEND %p", skb);
604
605 ASSERTCMP(skb->mark, ==, 0);
606
607 _debug("HS: %u", call->conn->header_size);
608 skb_reserve(skb, call->conn->header_size);
609 skb->len += call->conn->header_size;
610
611 sp = rxrpc_skb(skb);
612 sp->remain = chunk;
613 if (sp->remain > skb_tailroom(skb))
614 sp->remain = skb_tailroom(skb);
615
616 _net("skb: hr %d, tr %d, hl %d, rm %d",
617 skb_headroom(skb),
618 skb_tailroom(skb),
619 skb_headlen(skb),
620 sp->remain);
621
622 skb->ip_summed = CHECKSUM_UNNECESSARY;
623 }
624
625 _debug("append");
626 sp = rxrpc_skb(skb);
627
628 /* append next segment of data to the current buffer */
629 copy = skb_tailroom(skb);
630 ASSERTCMP(copy, >, 0);
631 if (copy > segment)
632 copy = segment;
633 if (copy > sp->remain)
634 copy = sp->remain;
635
636 _debug("add");
637 ret = skb_add_data(skb, from, copy);
638 _debug("added");
639 if (ret < 0)
640 goto efault;
641 sp->remain -= copy;
642 skb->mark += copy;
643
644 len -= copy;
645 segment -= copy;
646 from += copy;
647 while (segment == 0 && ioc > 0) {
648 from = iov->iov_base;
649 segment = iov->iov_len;
650 iov++;
651 ioc--;
652 }
653 if (len == 0) {
654 segment = 0;
655 ioc = 0;
656 }
657
658 /* check for the far side aborting the call or a network error
659 * occurring */
660 if (call->state > RXRPC_CALL_COMPLETE)
661 goto call_aborted;
662
663 /* add the packet to the send queue if it's now full */
664 if (sp->remain <= 0 || (segment == 0 && !more)) {
665 struct rxrpc_connection *conn = call->conn;
666 size_t pad;
667
668 /* pad out if we're using security */
669 if (conn->security) {
670 pad = conn->security_size + skb->mark;
671 pad = conn->size_align - pad;
672 pad &= conn->size_align - 1;
673 _debug("pad %zu", pad);
674 if (pad)
675 memset(skb_put(skb, pad), 0, pad);
676 }
677
678 sp->hdr.epoch = conn->epoch;
679 sp->hdr.cid = call->cid;
680 sp->hdr.callNumber = call->call_id;
681 sp->hdr.seq =
682 htonl(atomic_inc_return(&call->sequence));
683 sp->hdr.serial =
684 htonl(atomic_inc_return(&conn->serial));
685 sp->hdr.type = RXRPC_PACKET_TYPE_DATA;
686 sp->hdr.userStatus = 0;
687 sp->hdr.securityIndex = conn->security_ix;
688 sp->hdr._rsvd = 0;
689 sp->hdr.serviceId = conn->service_id;
690
691 sp->hdr.flags = conn->out_clientflag;
692 if (len == 0 && !more)
693 sp->hdr.flags |= RXRPC_LAST_PACKET;
694 else if (CIRC_SPACE(call->acks_head, call->acks_tail,
695 call->acks_winsz) > 1)
696 sp->hdr.flags |= RXRPC_MORE_PACKETS;
697
698 ret = rxrpc_secure_packet(
699 call, skb, skb->mark,
700 skb->head + sizeof(struct rxrpc_header));
701 if (ret < 0)
702 goto out;
703
704 memcpy(skb->head, &sp->hdr,
705 sizeof(struct rxrpc_header));
706 rxrpc_queue_packet(call, skb, segment == 0 && !more);
707 skb = NULL;
708 }
709
710 } while (segment > 0);
711
712out:
713 call->tx_pending = skb;
714 _leave(" = %d", ret);
715 return ret;
716
717call_aborted:
718 rxrpc_free_skb(skb);
719 if (call->state == RXRPC_CALL_NETWORK_ERROR)
720 ret = call->conn->trans->peer->net_error;
721 else
722 ret = -ECONNABORTED;
723 _leave(" = %d", ret);
724 return ret;
725
726maybe_error:
727 if (copied)
728 ret = copied;
729 goto out;
730
731efault:
732 ret = -EFAULT;
733 goto out;
734}
diff --git a/net/rxrpc/ar-peer.c b/net/rxrpc/ar-peer.c
new file mode 100644
index 000000000000..d399de4a7fe2
--- /dev/null
+++ b/net/rxrpc/ar-peer.c
@@ -0,0 +1,273 @@
1/* RxRPC remote transport endpoint management
2 *
3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/module.h>
13#include <linux/net.h>
14#include <linux/skbuff.h>
15#include <linux/udp.h>
16#include <linux/in.h>
17#include <linux/in6.h>
18#include <linux/icmp.h>
19#include <net/sock.h>
20#include <net/af_rxrpc.h>
21#include <net/ip.h>
22#include "ar-internal.h"
23
24static LIST_HEAD(rxrpc_peers);
25static DEFINE_RWLOCK(rxrpc_peer_lock);
26static DECLARE_WAIT_QUEUE_HEAD(rxrpc_peer_wq);
27
28static void rxrpc_destroy_peer(struct work_struct *work);
29
30/*
31 * allocate a new peer
32 */
33static struct rxrpc_peer *rxrpc_alloc_peer(struct sockaddr_rxrpc *srx,
34 gfp_t gfp)
35{
36 struct rxrpc_peer *peer;
37
38 _enter("");
39
40 peer = kzalloc(sizeof(struct rxrpc_peer), gfp);
41 if (peer) {
42 INIT_WORK(&peer->destroyer, &rxrpc_destroy_peer);
43 INIT_LIST_HEAD(&peer->link);
44 INIT_LIST_HEAD(&peer->error_targets);
45 spin_lock_init(&peer->lock);
46 atomic_set(&peer->usage, 1);
47 peer->debug_id = atomic_inc_return(&rxrpc_debug_id);
48 memcpy(&peer->srx, srx, sizeof(*srx));
49
50 peer->mtu = peer->if_mtu = 65535;
51
52 if (srx->transport.family == AF_INET) {
53 peer->hdrsize = sizeof(struct iphdr);
54 switch (srx->transport_type) {
55 case SOCK_DGRAM:
56 peer->hdrsize += sizeof(struct udphdr);
57 break;
58 default:
59 BUG();
60 break;
61 }
62 } else {
63 BUG();
64 }
65
66 peer->hdrsize += sizeof(struct rxrpc_header);
67 peer->maxdata = peer->mtu - peer->hdrsize;
68 }
69
70 _leave(" = %p", peer);
71 return peer;
72}
73
74/*
75 * obtain a remote transport endpoint for the specified address
76 */
77struct rxrpc_peer *rxrpc_get_peer(struct sockaddr_rxrpc *srx, gfp_t gfp)
78{
79 struct rxrpc_peer *peer, *candidate;
80 const char *new = "old";
81 int usage;
82
83 _enter("{%d,%d,%u.%u.%u.%u+%hu}",
84 srx->transport_type,
85 srx->transport_len,
86 NIPQUAD(srx->transport.sin.sin_addr),
87 ntohs(srx->transport.sin.sin_port));
88
89 /* search the peer list first */
90 read_lock_bh(&rxrpc_peer_lock);
91 list_for_each_entry(peer, &rxrpc_peers, link) {
92 _debug("check PEER %d { u=%d t=%d l=%d }",
93 peer->debug_id,
94 atomic_read(&peer->usage),
95 peer->srx.transport_type,
96 peer->srx.transport_len);
97
98 if (atomic_read(&peer->usage) > 0 &&
99 peer->srx.transport_type == srx->transport_type &&
100 peer->srx.transport_len == srx->transport_len &&
101 memcmp(&peer->srx.transport,
102 &srx->transport,
103 srx->transport_len) == 0)
104 goto found_extant_peer;
105 }
106 read_unlock_bh(&rxrpc_peer_lock);
107
108 /* not yet present - create a candidate for a new record and then
109 * redo the search */
110 candidate = rxrpc_alloc_peer(srx, gfp);
111 if (!candidate) {
112 _leave(" = -ENOMEM");
113 return ERR_PTR(-ENOMEM);
114 }
115
116 write_lock_bh(&rxrpc_peer_lock);
117
118 list_for_each_entry(peer, &rxrpc_peers, link) {
119 if (atomic_read(&peer->usage) > 0 &&
120 peer->srx.transport_type == srx->transport_type &&
121 peer->srx.transport_len == srx->transport_len &&
122 memcmp(&peer->srx.transport,
123 &srx->transport,
124 srx->transport_len) == 0)
125 goto found_extant_second;
126 }
127
128 /* we can now add the new candidate to the list */
129 peer = candidate;
130 candidate = NULL;
131
132 list_add_tail(&peer->link, &rxrpc_peers);
133 write_unlock_bh(&rxrpc_peer_lock);
134 new = "new";
135
136success:
137 _net("PEER %s %d {%d,%u,%u.%u.%u.%u+%hu}",
138 new,
139 peer->debug_id,
140 peer->srx.transport_type,
141 peer->srx.transport.family,
142 NIPQUAD(peer->srx.transport.sin.sin_addr),
143 ntohs(peer->srx.transport.sin.sin_port));
144
145 _leave(" = %p {u=%d}", peer, atomic_read(&peer->usage));
146 return peer;
147
148 /* we found the peer in the list immediately */
149found_extant_peer:
150 usage = atomic_inc_return(&peer->usage);
151 read_unlock_bh(&rxrpc_peer_lock);
152 goto success;
153
154 /* we found the peer on the second time through the list */
155found_extant_second:
156 usage = atomic_inc_return(&peer->usage);
157 write_unlock_bh(&rxrpc_peer_lock);
158 kfree(candidate);
159 goto success;
160}
161
162/*
163 * find the peer associated with a packet
164 */
165struct rxrpc_peer *rxrpc_find_peer(struct rxrpc_local *local,
166 __be32 addr, __be16 port)
167{
168 struct rxrpc_peer *peer;
169
170 _enter("");
171
172 /* search the peer list */
173 read_lock_bh(&rxrpc_peer_lock);
174
175 if (local->srx.transport.family == AF_INET &&
176 local->srx.transport_type == SOCK_DGRAM
177 ) {
178 list_for_each_entry(peer, &rxrpc_peers, link) {
179 if (atomic_read(&peer->usage) > 0 &&
180 peer->srx.transport_type == SOCK_DGRAM &&
181 peer->srx.transport.family == AF_INET &&
182 peer->srx.transport.sin.sin_port == port &&
183 peer->srx.transport.sin.sin_addr.s_addr == addr)
184 goto found_UDP_peer;
185 }
186
187 goto new_UDP_peer;
188 }
189
190 read_unlock_bh(&rxrpc_peer_lock);
191 _leave(" = -EAFNOSUPPORT");
192 return ERR_PTR(-EAFNOSUPPORT);
193
194found_UDP_peer:
195 _net("Rx UDP DGRAM from peer %d", peer->debug_id);
196 atomic_inc(&peer->usage);
197 read_unlock_bh(&rxrpc_peer_lock);
198 _leave(" = %p", peer);
199 return peer;
200
201new_UDP_peer:
202 _net("Rx UDP DGRAM from NEW peer %d", peer->debug_id);
203 read_unlock_bh(&rxrpc_peer_lock);
204 _leave(" = -EBUSY [new]");
205 return ERR_PTR(-EBUSY);
206}
207
208/*
209 * release a remote transport endpoint
210 */
211void rxrpc_put_peer(struct rxrpc_peer *peer)
212{
213 _enter("%p{u=%d}", peer, atomic_read(&peer->usage));
214
215 ASSERTCMP(atomic_read(&peer->usage), >, 0);
216
217 if (likely(!atomic_dec_and_test(&peer->usage))) {
218 _leave(" [in use]");
219 return;
220 }
221
222 rxrpc_queue_work(&peer->destroyer);
223 _leave("");
224}
225
226/*
227 * destroy a remote transport endpoint
228 */
229static void rxrpc_destroy_peer(struct work_struct *work)
230{
231 struct rxrpc_peer *peer =
232 container_of(work, struct rxrpc_peer, destroyer);
233
234 _enter("%p{%d}", peer, atomic_read(&peer->usage));
235
236 write_lock_bh(&rxrpc_peer_lock);
237 list_del(&peer->link);
238 write_unlock_bh(&rxrpc_peer_lock);
239
240 _net("DESTROY PEER %d", peer->debug_id);
241 kfree(peer);
242
243 if (list_empty(&rxrpc_peers))
244 wake_up_all(&rxrpc_peer_wq);
245 _leave("");
246}
247
248/*
249 * preemptively destroy all the peer records from a transport endpoint rather
250 * than waiting for them to time out
251 */
252void __exit rxrpc_destroy_all_peers(void)
253{
254 DECLARE_WAITQUEUE(myself,current);
255
256 _enter("");
257
258 /* we simply have to wait for them to go away */
259 if (!list_empty(&rxrpc_peers)) {
260 set_current_state(TASK_UNINTERRUPTIBLE);
261 add_wait_queue(&rxrpc_peer_wq, &myself);
262
263 while (!list_empty(&rxrpc_peers)) {
264 schedule();
265 set_current_state(TASK_UNINTERRUPTIBLE);
266 }
267
268 remove_wait_queue(&rxrpc_peer_wq, &myself);
269 set_current_state(TASK_RUNNING);
270 }
271
272 _leave("");
273}
diff --git a/net/rxrpc/ar-proc.c b/net/rxrpc/ar-proc.c
new file mode 100644
index 000000000000..58f4b4e5cece
--- /dev/null
+++ b/net/rxrpc/ar-proc.c
@@ -0,0 +1,247 @@
1/* /proc/net/ support for AF_RXRPC
2 *
3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/module.h>
13#include <net/sock.h>
14#include <net/af_rxrpc.h>
15#include "ar-internal.h"
16
17static const char *rxrpc_conn_states[] = {
18 [RXRPC_CONN_UNUSED] = "Unused ",
19 [RXRPC_CONN_CLIENT] = "Client ",
20 [RXRPC_CONN_SERVER_UNSECURED] = "SvUnsec ",
21 [RXRPC_CONN_SERVER_CHALLENGING] = "SvChall ",
22 [RXRPC_CONN_SERVER] = "SvSecure",
23 [RXRPC_CONN_REMOTELY_ABORTED] = "RmtAbort",
24 [RXRPC_CONN_LOCALLY_ABORTED] = "LocAbort",
25 [RXRPC_CONN_NETWORK_ERROR] = "NetError",
26};
27
28const char *rxrpc_call_states[] = {
29 [RXRPC_CALL_CLIENT_SEND_REQUEST] = "ClSndReq",
30 [RXRPC_CALL_CLIENT_AWAIT_REPLY] = "ClAwtRpl",
31 [RXRPC_CALL_CLIENT_RECV_REPLY] = "ClRcvRpl",
32 [RXRPC_CALL_CLIENT_FINAL_ACK] = "ClFnlACK",
33 [RXRPC_CALL_SERVER_SECURING] = "SvSecure",
34 [RXRPC_CALL_SERVER_ACCEPTING] = "SvAccept",
35 [RXRPC_CALL_SERVER_RECV_REQUEST] = "SvRcvReq",
36 [RXRPC_CALL_SERVER_ACK_REQUEST] = "SvAckReq",
37 [RXRPC_CALL_SERVER_SEND_REPLY] = "SvSndRpl",
38 [RXRPC_CALL_SERVER_AWAIT_ACK] = "SvAwtACK",
39 [RXRPC_CALL_COMPLETE] = "Complete",
40 [RXRPC_CALL_SERVER_BUSY] = "SvBusy ",
41 [RXRPC_CALL_REMOTELY_ABORTED] = "RmtAbort",
42 [RXRPC_CALL_LOCALLY_ABORTED] = "LocAbort",
43 [RXRPC_CALL_NETWORK_ERROR] = "NetError",
44 [RXRPC_CALL_DEAD] = "Dead ",
45};
46
47/*
48 * generate a list of extant and dead calls in /proc/net/rxrpc_calls
49 */
50static void *rxrpc_call_seq_start(struct seq_file *seq, loff_t *_pos)
51{
52 struct list_head *_p;
53 loff_t pos = *_pos;
54
55 read_lock(&rxrpc_call_lock);
56 if (!pos)
57 return SEQ_START_TOKEN;
58 pos--;
59
60 list_for_each(_p, &rxrpc_calls)
61 if (!pos--)
62 break;
63
64 return _p != &rxrpc_calls ? _p : NULL;
65}
66
67static void *rxrpc_call_seq_next(struct seq_file *seq, void *v, loff_t *pos)
68{
69 struct list_head *_p;
70
71 (*pos)++;
72
73 _p = v;
74 _p = (v == SEQ_START_TOKEN) ? rxrpc_calls.next : _p->next;
75
76 return _p != &rxrpc_calls ? _p : NULL;
77}
78
79static void rxrpc_call_seq_stop(struct seq_file *seq, void *v)
80{
81 read_unlock(&rxrpc_call_lock);
82}
83
84static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
85{
86 struct rxrpc_transport *trans;
87 struct rxrpc_call *call;
88 char lbuff[4 + 4 + 4 + 4 + 5 + 1], rbuff[4 + 4 + 4 + 4 + 5 + 1];
89
90 if (v == SEQ_START_TOKEN) {
91 seq_puts(seq,
92 "Proto Local Remote "
93 " SvID ConnID CallID End Use State Abort "
94 " UserID\n");
95 return 0;
96 }
97
98 call = list_entry(v, struct rxrpc_call, link);
99 trans = call->conn->trans;
100
101 sprintf(lbuff, NIPQUAD_FMT":%u",
102 NIPQUAD(trans->local->srx.transport.sin.sin_addr),
103 ntohs(trans->local->srx.transport.sin.sin_port));
104
105 sprintf(rbuff, NIPQUAD_FMT":%u",
106 NIPQUAD(trans->peer->srx.transport.sin.sin_addr),
107 ntohs(trans->peer->srx.transport.sin.sin_port));
108
109 seq_printf(seq,
110 "UDP %-22.22s %-22.22s %4x %08x %08x %s %3u"
111 " %-8.8s %08x %lx\n",
112 lbuff,
113 rbuff,
114 ntohs(call->conn->service_id),
115 ntohl(call->conn->cid),
116 ntohl(call->call_id),
117 call->conn->in_clientflag ? "Svc" : "Clt",
118 atomic_read(&call->usage),
119 rxrpc_call_states[call->state],
120 call->abort_code,
121 call->user_call_ID);
122
123 return 0;
124}
125
126static struct seq_operations rxrpc_call_seq_ops = {
127 .start = rxrpc_call_seq_start,
128 .next = rxrpc_call_seq_next,
129 .stop = rxrpc_call_seq_stop,
130 .show = rxrpc_call_seq_show,
131};
132
133static int rxrpc_call_seq_open(struct inode *inode, struct file *file)
134{
135 return seq_open(file, &rxrpc_call_seq_ops);
136}
137
138struct file_operations rxrpc_call_seq_fops = {
139 .owner = THIS_MODULE,
140 .open = rxrpc_call_seq_open,
141 .read = seq_read,
142 .llseek = seq_lseek,
143 .release = seq_release_private,
144};
145
146/*
147 * generate a list of extant virtual connections in /proc/net/rxrpc_conns
148 */
149static void *rxrpc_connection_seq_start(struct seq_file *seq, loff_t *_pos)
150{
151 struct list_head *_p;
152 loff_t pos = *_pos;
153
154 read_lock(&rxrpc_connection_lock);
155 if (!pos)
156 return SEQ_START_TOKEN;
157 pos--;
158
159 list_for_each(_p, &rxrpc_connections)
160 if (!pos--)
161 break;
162
163 return _p != &rxrpc_connections ? _p : NULL;
164}
165
166static void *rxrpc_connection_seq_next(struct seq_file *seq, void *v,
167 loff_t *pos)
168{
169 struct list_head *_p;
170
171 (*pos)++;
172
173 _p = v;
174 _p = (v == SEQ_START_TOKEN) ? rxrpc_connections.next : _p->next;
175
176 return _p != &rxrpc_connections ? _p : NULL;
177}
178
179static void rxrpc_connection_seq_stop(struct seq_file *seq, void *v)
180{
181 read_unlock(&rxrpc_connection_lock);
182}
183
184static int rxrpc_connection_seq_show(struct seq_file *seq, void *v)
185{
186 struct rxrpc_connection *conn;
187 struct rxrpc_transport *trans;
188 char lbuff[4 + 4 + 4 + 4 + 5 + 1], rbuff[4 + 4 + 4 + 4 + 5 + 1];
189
190 if (v == SEQ_START_TOKEN) {
191 seq_puts(seq,
192 "Proto Local Remote "
193 " SvID ConnID Calls End Use State Key "
194 " Serial ISerial\n"
195 );
196 return 0;
197 }
198
199 conn = list_entry(v, struct rxrpc_connection, link);
200 trans = conn->trans;
201
202 sprintf(lbuff, NIPQUAD_FMT":%u",
203 NIPQUAD(trans->local->srx.transport.sin.sin_addr),
204 ntohs(trans->local->srx.transport.sin.sin_port));
205
206 sprintf(rbuff, NIPQUAD_FMT":%u",
207 NIPQUAD(trans->peer->srx.transport.sin.sin_addr),
208 ntohs(trans->peer->srx.transport.sin.sin_port));
209
210 seq_printf(seq,
211 "UDP %-22.22s %-22.22s %4x %08x %08x %s %3u"
212 " %s %08x %08x %08x\n",
213 lbuff,
214 rbuff,
215 ntohs(conn->service_id),
216 ntohl(conn->cid),
217 conn->call_counter,
218 conn->in_clientflag ? "Svc" : "Clt",
219 atomic_read(&conn->usage),
220 rxrpc_conn_states[conn->state],
221 key_serial(conn->key),
222 atomic_read(&conn->serial),
223 atomic_read(&conn->hi_serial));
224
225 return 0;
226}
227
228static struct seq_operations rxrpc_connection_seq_ops = {
229 .start = rxrpc_connection_seq_start,
230 .next = rxrpc_connection_seq_next,
231 .stop = rxrpc_connection_seq_stop,
232 .show = rxrpc_connection_seq_show,
233};
234
235
236static int rxrpc_connection_seq_open(struct inode *inode, struct file *file)
237{
238 return seq_open(file, &rxrpc_connection_seq_ops);
239}
240
241struct file_operations rxrpc_connection_seq_fops = {
242 .owner = THIS_MODULE,
243 .open = rxrpc_connection_seq_open,
244 .read = seq_read,
245 .llseek = seq_lseek,
246 .release = seq_release_private,
247};
diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/ar-recvmsg.c
new file mode 100644
index 000000000000..f19121d4795b
--- /dev/null
+++ b/net/rxrpc/ar-recvmsg.c
@@ -0,0 +1,437 @@
1/* RxRPC recvmsg() implementation
2 *
3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/net.h>
13#include <linux/skbuff.h>
14#include <net/sock.h>
15#include <net/af_rxrpc.h>
16#include "ar-internal.h"
17
18/*
19 * removal a call's user ID from the socket tree to make the user ID available
20 * again and so that it won't be seen again in association with that call
21 */
22void rxrpc_remove_user_ID(struct rxrpc_sock *rx, struct rxrpc_call *call)
23{
24 _debug("RELEASE CALL %d", call->debug_id);
25
26 if (test_bit(RXRPC_CALL_HAS_USERID, &call->flags)) {
27 write_lock_bh(&rx->call_lock);
28 rb_erase(&call->sock_node, &call->socket->calls);
29 clear_bit(RXRPC_CALL_HAS_USERID, &call->flags);
30 write_unlock_bh(&rx->call_lock);
31 }
32
33 read_lock_bh(&call->state_lock);
34 if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
35 !test_and_set_bit(RXRPC_CALL_RELEASE, &call->events))
36 rxrpc_queue_call(call);
37 read_unlock_bh(&call->state_lock);
38}
39
40/*
41 * receive a message from an RxRPC socket
42 * - we need to be careful about two or more threads calling recvmsg
43 * simultaneously
44 */
45int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock,
46 struct msghdr *msg, size_t len, int flags)
47{
48 struct rxrpc_skb_priv *sp;
49 struct rxrpc_call *call = NULL, *continue_call = NULL;
50 struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
51 struct sk_buff *skb;
52 long timeo;
53 int copy, ret, ullen, offset, copied = 0;
54 u32 abort_code;
55
56 DEFINE_WAIT(wait);
57
58 _enter(",,,%zu,%d", len, flags);
59
60 if (flags & (MSG_OOB | MSG_TRUNC))
61 return -EOPNOTSUPP;
62
63 ullen = msg->msg_flags & MSG_CMSG_COMPAT ? 4 : sizeof(unsigned long);
64
65 timeo = sock_rcvtimeo(&rx->sk, flags & MSG_DONTWAIT);
66 msg->msg_flags |= MSG_MORE;
67
68 lock_sock(&rx->sk);
69
70 for (;;) {
71 /* return immediately if a client socket has no outstanding
72 * calls */
73 if (RB_EMPTY_ROOT(&rx->calls)) {
74 if (copied)
75 goto out;
76 if (rx->sk.sk_state != RXRPC_SERVER_LISTENING) {
77 release_sock(&rx->sk);
78 if (continue_call)
79 rxrpc_put_call(continue_call);
80 return -ENODATA;
81 }
82 }
83
84 /* get the next message on the Rx queue */
85 skb = skb_peek(&rx->sk.sk_receive_queue);
86 if (!skb) {
87 /* nothing remains on the queue */
88 if (copied &&
89 (msg->msg_flags & MSG_PEEK || timeo == 0))
90 goto out;
91
92 /* wait for a message to turn up */
93 release_sock(&rx->sk);
94 prepare_to_wait_exclusive(rx->sk.sk_sleep, &wait,
95 TASK_INTERRUPTIBLE);
96 ret = sock_error(&rx->sk);
97 if (ret)
98 goto wait_error;
99
100 if (skb_queue_empty(&rx->sk.sk_receive_queue)) {
101 if (signal_pending(current))
102 goto wait_interrupted;
103 timeo = schedule_timeout(timeo);
104 }
105 finish_wait(rx->sk.sk_sleep, &wait);
106 lock_sock(&rx->sk);
107 continue;
108 }
109
110 peek_next_packet:
111 sp = rxrpc_skb(skb);
112 call = sp->call;
113 ASSERT(call != NULL);
114
115 _debug("next pkt %s", rxrpc_pkts[sp->hdr.type]);
116
117 /* make sure we wait for the state to be updated in this call */
118 spin_lock_bh(&call->lock);
119 spin_unlock_bh(&call->lock);
120
121 if (test_bit(RXRPC_CALL_RELEASED, &call->flags)) {
122 _debug("packet from released call");
123 if (skb_dequeue(&rx->sk.sk_receive_queue) != skb)
124 BUG();
125 rxrpc_free_skb(skb);
126 continue;
127 }
128
129 /* determine whether to continue last data receive */
130 if (continue_call) {
131 _debug("maybe cont");
132 if (call != continue_call ||
133 skb->mark != RXRPC_SKB_MARK_DATA) {
134 release_sock(&rx->sk);
135 rxrpc_put_call(continue_call);
136 _leave(" = %d [noncont]", copied);
137 return copied;
138 }
139 }
140
141 rxrpc_get_call(call);
142
143 /* copy the peer address and timestamp */
144 if (!continue_call) {
145 if (msg->msg_name && msg->msg_namelen > 0)
146 memcpy(&msg->msg_name, &call->conn->trans->peer->srx,
147 sizeof(call->conn->trans->peer->srx));
148 sock_recv_timestamp(msg, &rx->sk, skb);
149 }
150
151 /* receive the message */
152 if (skb->mark != RXRPC_SKB_MARK_DATA)
153 goto receive_non_data_message;
154
155 _debug("recvmsg DATA #%u { %d, %d }",
156 ntohl(sp->hdr.seq), skb->len, sp->offset);
157
158 if (!continue_call) {
159 /* only set the control data once per recvmsg() */
160 ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID,
161 ullen, &call->user_call_ID);
162 if (ret < 0)
163 goto copy_error;
164 ASSERT(test_bit(RXRPC_CALL_HAS_USERID, &call->flags));
165 }
166
167 ASSERTCMP(ntohl(sp->hdr.seq), >=, call->rx_data_recv);
168 ASSERTCMP(ntohl(sp->hdr.seq), <=, call->rx_data_recv + 1);
169 call->rx_data_recv = ntohl(sp->hdr.seq);
170
171 ASSERTCMP(ntohl(sp->hdr.seq), >, call->rx_data_eaten);
172
173 offset = sp->offset;
174 copy = skb->len - offset;
175 if (copy > len - copied)
176 copy = len - copied;
177
178 if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
179 ret = skb_copy_datagram_iovec(skb, offset,
180 msg->msg_iov, copy);
181 } else {
182 ret = skb_copy_and_csum_datagram_iovec(skb, offset,
183 msg->msg_iov);
184 if (ret == -EINVAL)
185 goto csum_copy_error;
186 }
187
188 if (ret < 0)
189 goto copy_error;
190
191 /* handle piecemeal consumption of data packets */
192 _debug("copied %d+%d", copy, copied);
193
194 offset += copy;
195 copied += copy;
196
197 if (!(flags & MSG_PEEK))
198 sp->offset = offset;
199
200 if (sp->offset < skb->len) {
201 _debug("buffer full");
202 ASSERTCMP(copied, ==, len);
203 break;
204 }
205
206 /* we transferred the whole data packet */
207 if (sp->hdr.flags & RXRPC_LAST_PACKET) {
208 _debug("last");
209 if (call->conn->out_clientflag) {
210 /* last byte of reply received */
211 ret = copied;
212 goto terminal_message;
213 }
214
215 /* last bit of request received */
216 if (!(flags & MSG_PEEK)) {
217 _debug("eat packet");
218 if (skb_dequeue(&rx->sk.sk_receive_queue) !=
219 skb)
220 BUG();
221 rxrpc_free_skb(skb);
222 }
223 msg->msg_flags &= ~MSG_MORE;
224 break;
225 }
226
227 /* move on to the next data message */
228 _debug("next");
229 if (!continue_call)
230 continue_call = sp->call;
231 else
232 rxrpc_put_call(call);
233 call = NULL;
234
235 if (flags & MSG_PEEK) {
236 _debug("peek next");
237 skb = skb->next;
238 if (skb == (struct sk_buff *) &rx->sk.sk_receive_queue)
239 break;
240 goto peek_next_packet;
241 }
242
243 _debug("eat packet");
244 if (skb_dequeue(&rx->sk.sk_receive_queue) != skb)
245 BUG();
246 rxrpc_free_skb(skb);
247 }
248
249 /* end of non-terminal data packet reception for the moment */
250 _debug("end rcv data");
251out:
252 release_sock(&rx->sk);
253 if (call)
254 rxrpc_put_call(call);
255 if (continue_call)
256 rxrpc_put_call(continue_call);
257 _leave(" = %d [data]", copied);
258 return copied;
259
260 /* handle non-DATA messages such as aborts, incoming connections and
261 * final ACKs */
262receive_non_data_message:
263 _debug("non-data");
264
265 if (skb->mark == RXRPC_SKB_MARK_NEW_CALL) {
266 _debug("RECV NEW CALL");
267 ret = put_cmsg(msg, SOL_RXRPC, RXRPC_NEW_CALL, 0, &abort_code);
268 if (ret < 0)
269 goto copy_error;
270 if (!(flags & MSG_PEEK)) {
271 if (skb_dequeue(&rx->sk.sk_receive_queue) != skb)
272 BUG();
273 rxrpc_free_skb(skb);
274 }
275 goto out;
276 }
277
278 ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID,
279 ullen, &call->user_call_ID);
280 if (ret < 0)
281 goto copy_error;
282 ASSERT(test_bit(RXRPC_CALL_HAS_USERID, &call->flags));
283
284 switch (skb->mark) {
285 case RXRPC_SKB_MARK_DATA:
286 BUG();
287 case RXRPC_SKB_MARK_FINAL_ACK:
288 ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ACK, 0, &abort_code);
289 break;
290 case RXRPC_SKB_MARK_BUSY:
291 ret = put_cmsg(msg, SOL_RXRPC, RXRPC_BUSY, 0, &abort_code);
292 break;
293 case RXRPC_SKB_MARK_REMOTE_ABORT:
294 abort_code = call->abort_code;
295 ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ABORT, 4, &abort_code);
296 break;
297 case RXRPC_SKB_MARK_NET_ERROR:
298 _debug("RECV NET ERROR %d", sp->error);
299 abort_code = sp->error;
300 ret = put_cmsg(msg, SOL_RXRPC, RXRPC_NET_ERROR, 4, &abort_code);
301 break;
302 case RXRPC_SKB_MARK_LOCAL_ERROR:
303 _debug("RECV LOCAL ERROR %d", sp->error);
304 abort_code = sp->error;
305 ret = put_cmsg(msg, SOL_RXRPC, RXRPC_LOCAL_ERROR, 4,
306 &abort_code);
307 break;
308 default:
309 BUG();
310 break;
311 }
312
313 if (ret < 0)
314 goto copy_error;
315
316terminal_message:
317 _debug("terminal");
318 msg->msg_flags &= ~MSG_MORE;
319 msg->msg_flags |= MSG_EOR;
320
321 if (!(flags & MSG_PEEK)) {
322 _net("free terminal skb %p", skb);
323 if (skb_dequeue(&rx->sk.sk_receive_queue) != skb)
324 BUG();
325 rxrpc_free_skb(skb);
326 rxrpc_remove_user_ID(rx, call);
327 }
328
329 release_sock(&rx->sk);
330 rxrpc_put_call(call);
331 if (continue_call)
332 rxrpc_put_call(continue_call);
333 _leave(" = %d", ret);
334 return ret;
335
336copy_error:
337 _debug("copy error");
338 release_sock(&rx->sk);
339 rxrpc_put_call(call);
340 if (continue_call)
341 rxrpc_put_call(continue_call);
342 _leave(" = %d", ret);
343 return ret;
344
345csum_copy_error:
346 _debug("csum error");
347 release_sock(&rx->sk);
348 if (continue_call)
349 rxrpc_put_call(continue_call);
350 rxrpc_kill_skb(skb);
351 skb_kill_datagram(&rx->sk, skb, flags);
352 rxrpc_put_call(call);
353 return -EAGAIN;
354
355wait_interrupted:
356 ret = sock_intr_errno(timeo);
357wait_error:
358 finish_wait(rx->sk.sk_sleep, &wait);
359 if (continue_call)
360 rxrpc_put_call(continue_call);
361 if (copied)
362 copied = ret;
363 _leave(" = %d [waitfail %d]", copied, ret);
364 return copied;
365
366}
367
368/**
369 * rxrpc_kernel_data_delivered - Record delivery of data message
370 * @skb: Message holding data
371 *
372 * Record the delivery of a data message. This permits RxRPC to keep its
373 * tracking correct. The socket buffer will be deleted.
374 */
375void rxrpc_kernel_data_delivered(struct sk_buff *skb)
376{
377 struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
378 struct rxrpc_call *call = sp->call;
379
380 ASSERTCMP(ntohl(sp->hdr.seq), >=, call->rx_data_recv);
381 ASSERTCMP(ntohl(sp->hdr.seq), <=, call->rx_data_recv + 1);
382 call->rx_data_recv = ntohl(sp->hdr.seq);
383
384 ASSERTCMP(ntohl(sp->hdr.seq), >, call->rx_data_eaten);
385 rxrpc_free_skb(skb);
386}
387
388EXPORT_SYMBOL(rxrpc_kernel_data_delivered);
389
390/**
391 * rxrpc_kernel_is_data_last - Determine if data message is last one
392 * @skb: Message holding data
393 *
394 * Determine if data message is last one for the parent call.
395 */
396bool rxrpc_kernel_is_data_last(struct sk_buff *skb)
397{
398 struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
399
400 ASSERTCMP(skb->mark, ==, RXRPC_SKB_MARK_DATA);
401
402 return sp->hdr.flags & RXRPC_LAST_PACKET;
403}
404
405EXPORT_SYMBOL(rxrpc_kernel_is_data_last);
406
407/**
408 * rxrpc_kernel_get_abort_code - Get the abort code from an RxRPC abort message
409 * @skb: Message indicating an abort
410 *
411 * Get the abort code from an RxRPC abort message.
412 */
413u32 rxrpc_kernel_get_abort_code(struct sk_buff *skb)
414{
415 struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
416
417 ASSERTCMP(skb->mark, ==, RXRPC_SKB_MARK_REMOTE_ABORT);
418
419 return sp->call->abort_code;
420}
421
422EXPORT_SYMBOL(rxrpc_kernel_get_abort_code);
423
424/**
425 * rxrpc_kernel_get_error - Get the error number from an RxRPC error message
426 * @skb: Message indicating an error
427 *
428 * Get the error number from an RxRPC error message.
429 */
430int rxrpc_kernel_get_error_number(struct sk_buff *skb)
431{
432 struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
433
434 return sp->error;
435}
436
437EXPORT_SYMBOL(rxrpc_kernel_get_error_number);
diff --git a/net/rxrpc/ar-security.c b/net/rxrpc/ar-security.c
new file mode 100644
index 000000000000..60d1d364430a
--- /dev/null
+++ b/net/rxrpc/ar-security.c
@@ -0,0 +1,258 @@
1/* RxRPC security handling
2 *
3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/module.h>
13#include <linux/net.h>
14#include <linux/skbuff.h>
15#include <linux/udp.h>
16#include <linux/crypto.h>
17#include <net/sock.h>
18#include <net/af_rxrpc.h>
19#include "ar-internal.h"
20
21static LIST_HEAD(rxrpc_security_methods);
22static DECLARE_RWSEM(rxrpc_security_sem);
23
24/*
25 * get an RxRPC security module
26 */
27static struct rxrpc_security *rxrpc_security_get(struct rxrpc_security *sec)
28{
29 return try_module_get(sec->owner) ? sec : NULL;
30}
31
32/*
33 * release an RxRPC security module
34 */
35static void rxrpc_security_put(struct rxrpc_security *sec)
36{
37 module_put(sec->owner);
38}
39
40/*
41 * look up an rxrpc security module
42 */
43struct rxrpc_security *rxrpc_security_lookup(u8 security_index)
44{
45 struct rxrpc_security *sec = NULL;
46
47 _enter("");
48
49 down_read(&rxrpc_security_sem);
50
51 list_for_each_entry(sec, &rxrpc_security_methods, link) {
52 if (sec->security_index == security_index) {
53 if (unlikely(!rxrpc_security_get(sec)))
54 break;
55 goto out;
56 }
57 }
58
59 sec = NULL;
60out:
61 up_read(&rxrpc_security_sem);
62 _leave(" = %p [%s]", sec, sec ? sec->name : "");
63 return sec;
64}
65
66/**
67 * rxrpc_register_security - register an RxRPC security handler
68 * @sec: security module
69 *
70 * register an RxRPC security handler for use by RxRPC
71 */
72int rxrpc_register_security(struct rxrpc_security *sec)
73{
74 struct rxrpc_security *psec;
75 int ret;
76
77 _enter("");
78 down_write(&rxrpc_security_sem);
79
80 ret = -EEXIST;
81 list_for_each_entry(psec, &rxrpc_security_methods, link) {
82 if (psec->security_index == sec->security_index)
83 goto out;
84 }
85
86 list_add(&sec->link, &rxrpc_security_methods);
87
88 printk(KERN_NOTICE "RxRPC: Registered security type %d '%s'\n",
89 sec->security_index, sec->name);
90 ret = 0;
91
92out:
93 up_write(&rxrpc_security_sem);
94 _leave(" = %d", ret);
95 return ret;
96}
97
98EXPORT_SYMBOL_GPL(rxrpc_register_security);
99
100/**
101 * rxrpc_unregister_security - unregister an RxRPC security handler
102 * @sec: security module
103 *
104 * unregister an RxRPC security handler
105 */
106void rxrpc_unregister_security(struct rxrpc_security *sec)
107{
108
109 _enter("");
110 down_write(&rxrpc_security_sem);
111 list_del_init(&sec->link);
112 up_write(&rxrpc_security_sem);
113
114 printk(KERN_NOTICE "RxRPC: Unregistered security type %d '%s'\n",
115 sec->security_index, sec->name);
116}
117
118EXPORT_SYMBOL_GPL(rxrpc_unregister_security);
119
120/*
121 * initialise the security on a client connection
122 */
123int rxrpc_init_client_conn_security(struct rxrpc_connection *conn)
124{
125 struct rxrpc_security *sec;
126 struct key *key = conn->key;
127 int ret;
128
129 _enter("{%d},{%x}", conn->debug_id, key_serial(key));
130
131 if (!key)
132 return 0;
133
134 ret = key_validate(key);
135 if (ret < 0)
136 return ret;
137
138 sec = rxrpc_security_lookup(key->type_data.x[0]);
139 if (!sec)
140 return -EKEYREJECTED;
141 conn->security = sec;
142
143 ret = conn->security->init_connection_security(conn);
144 if (ret < 0) {
145 rxrpc_security_put(conn->security);
146 conn->security = NULL;
147 return ret;
148 }
149
150 _leave(" = 0");
151 return 0;
152}
153
154/*
155 * initialise the security on a server connection
156 */
157int rxrpc_init_server_conn_security(struct rxrpc_connection *conn)
158{
159 struct rxrpc_security *sec;
160 struct rxrpc_local *local = conn->trans->local;
161 struct rxrpc_sock *rx;
162 struct key *key;
163 key_ref_t kref;
164 char kdesc[5+1+3+1];
165
166 _enter("");
167
168 sprintf(kdesc, "%u:%u", ntohs(conn->service_id), conn->security_ix);
169
170 sec = rxrpc_security_lookup(conn->security_ix);
171 if (!sec) {
172 _leave(" = -ENOKEY [lookup]");
173 return -ENOKEY;
174 }
175
176 /* find the service */
177 read_lock_bh(&local->services_lock);
178 list_for_each_entry(rx, &local->services, listen_link) {
179 if (rx->service_id == conn->service_id)
180 goto found_service;
181 }
182
183 /* the service appears to have died */
184 read_unlock_bh(&local->services_lock);
185 rxrpc_security_put(sec);
186 _leave(" = -ENOENT");
187 return -ENOENT;
188
189found_service:
190 if (!rx->securities) {
191 read_unlock_bh(&local->services_lock);
192 rxrpc_security_put(sec);
193 _leave(" = -ENOKEY");
194 return -ENOKEY;
195 }
196
197 /* look through the service's keyring */
198 kref = keyring_search(make_key_ref(rx->securities, 1UL),
199 &key_type_rxrpc_s, kdesc);
200 if (IS_ERR(kref)) {
201 read_unlock_bh(&local->services_lock);
202 rxrpc_security_put(sec);
203 _leave(" = %ld [search]", PTR_ERR(kref));
204 return PTR_ERR(kref);
205 }
206
207 key = key_ref_to_ptr(kref);
208 read_unlock_bh(&local->services_lock);
209
210 conn->server_key = key;
211 conn->security = sec;
212
213 _leave(" = 0");
214 return 0;
215}
216
217/*
218 * secure a packet prior to transmission
219 */
220int rxrpc_secure_packet(const struct rxrpc_call *call,
221 struct sk_buff *skb,
222 size_t data_size,
223 void *sechdr)
224{
225 if (call->conn->security)
226 return call->conn->security->secure_packet(
227 call, skb, data_size, sechdr);
228 return 0;
229}
230
231/*
232 * secure a packet prior to transmission
233 */
234int rxrpc_verify_packet(const struct rxrpc_call *call, struct sk_buff *skb,
235 u32 *_abort_code)
236{
237 if (call->conn->security)
238 return call->conn->security->verify_packet(
239 call, skb, _abort_code);
240 return 0;
241}
242
243/*
244 * clear connection security
245 */
246void rxrpc_clear_conn_security(struct rxrpc_connection *conn)
247{
248 _enter("{%d}", conn->debug_id);
249
250 if (conn->security) {
251 conn->security->clear(conn);
252 rxrpc_security_put(conn->security);
253 conn->security = NULL;
254 }
255
256 key_put(conn->key);
257 key_put(conn->server_key);
258}
diff --git a/net/rxrpc/ar-skbuff.c b/net/rxrpc/ar-skbuff.c
new file mode 100644
index 000000000000..de755e04d29c
--- /dev/null
+++ b/net/rxrpc/ar-skbuff.c
@@ -0,0 +1,132 @@
1/* ar-skbuff.c: socket buffer destruction handling
2 *
3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/module.h>
13#include <linux/net.h>
14#include <linux/skbuff.h>
15#include <net/sock.h>
16#include <net/af_rxrpc.h>
17#include "ar-internal.h"
18
19/*
20 * set up for the ACK at the end of the receive phase when we discard the final
21 * receive phase data packet
22 * - called with softirqs disabled
23 */
24static void rxrpc_request_final_ACK(struct rxrpc_call *call)
25{
26 /* the call may be aborted before we have a chance to ACK it */
27 write_lock(&call->state_lock);
28
29 switch (call->state) {
30 case RXRPC_CALL_CLIENT_RECV_REPLY:
31 call->state = RXRPC_CALL_CLIENT_FINAL_ACK;
32 _debug("request final ACK");
33
34 /* get an extra ref on the call for the final-ACK generator to
35 * release */
36 rxrpc_get_call(call);
37 set_bit(RXRPC_CALL_ACK_FINAL, &call->events);
38 if (try_to_del_timer_sync(&call->ack_timer) >= 0)
39 rxrpc_queue_call(call);
40 break;
41
42 case RXRPC_CALL_SERVER_RECV_REQUEST:
43 call->state = RXRPC_CALL_SERVER_ACK_REQUEST;
44 default:
45 break;
46 }
47
48 write_unlock(&call->state_lock);
49}
50
51/*
52 * drop the bottom ACK off of the call ACK window and advance the window
53 */
54static void rxrpc_hard_ACK_data(struct rxrpc_call *call,
55 struct rxrpc_skb_priv *sp)
56{
57 int loop;
58 u32 seq;
59
60 spin_lock_bh(&call->lock);
61
62 _debug("hard ACK #%u", ntohl(sp->hdr.seq));
63
64 for (loop = 0; loop < RXRPC_ACKR_WINDOW_ASZ; loop++) {
65 call->ackr_window[loop] >>= 1;
66 call->ackr_window[loop] |=
67 call->ackr_window[loop + 1] << (BITS_PER_LONG - 1);
68 }
69
70 seq = ntohl(sp->hdr.seq);
71 ASSERTCMP(seq, ==, call->rx_data_eaten + 1);
72 call->rx_data_eaten = seq;
73
74 if (call->ackr_win_top < UINT_MAX)
75 call->ackr_win_top++;
76
77 ASSERTIFCMP(call->state <= RXRPC_CALL_COMPLETE,
78 call->rx_data_post, >=, call->rx_data_recv);
79 ASSERTIFCMP(call->state <= RXRPC_CALL_COMPLETE,
80 call->rx_data_recv, >=, call->rx_data_eaten);
81
82 if (sp->hdr.flags & RXRPC_LAST_PACKET) {
83 rxrpc_request_final_ACK(call);
84 } else if (atomic_dec_and_test(&call->ackr_not_idle) &&
85 test_and_clear_bit(RXRPC_CALL_TX_SOFT_ACK, &call->flags)) {
86 _debug("send Rx idle ACK");
87 __rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, sp->hdr.serial,
88 true);
89 }
90
91 spin_unlock_bh(&call->lock);
92}
93
94/*
95 * destroy a packet that has an RxRPC control buffer
96 * - advance the hard-ACK state of the parent call (done here in case something
97 * in the kernel bypasses recvmsg() and steals the packet directly off of the
98 * socket receive queue)
99 */
100void rxrpc_packet_destructor(struct sk_buff *skb)
101{
102 struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
103 struct rxrpc_call *call = sp->call;
104
105 _enter("%p{%p}", skb, call);
106
107 if (call) {
108 /* send the final ACK on a client call */
109 if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA)
110 rxrpc_hard_ACK_data(call, sp);
111 rxrpc_put_call(call);
112 sp->call = NULL;
113 }
114
115 if (skb->sk)
116 sock_rfree(skb);
117 _leave("");
118}
119
120/**
121 * rxrpc_kernel_free_skb - Free an RxRPC socket buffer
122 * @skb: The socket buffer to be freed
123 *
124 * Let RxRPC free its own socket buffer, permitting it to maintain debug
125 * accounting.
126 */
127void rxrpc_kernel_free_skb(struct sk_buff *skb)
128{
129 rxrpc_free_skb(skb);
130}
131
132EXPORT_SYMBOL(rxrpc_kernel_free_skb);
diff --git a/net/rxrpc/ar-transport.c b/net/rxrpc/ar-transport.c
new file mode 100644
index 000000000000..d43d78f19302
--- /dev/null
+++ b/net/rxrpc/ar-transport.c
@@ -0,0 +1,276 @@
1/* RxRPC point-to-point transport session management
2 *
3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/module.h>
13#include <linux/net.h>
14#include <linux/skbuff.h>
15#include <net/sock.h>
16#include <net/af_rxrpc.h>
17#include "ar-internal.h"
18
19static void rxrpc_transport_reaper(struct work_struct *work);
20
21static LIST_HEAD(rxrpc_transports);
22static DEFINE_RWLOCK(rxrpc_transport_lock);
23static unsigned long rxrpc_transport_timeout = 3600 * 24;
24static DECLARE_DELAYED_WORK(rxrpc_transport_reap, rxrpc_transport_reaper);
25
26/*
27 * allocate a new transport session manager
28 */
29static struct rxrpc_transport *rxrpc_alloc_transport(struct rxrpc_local *local,
30 struct rxrpc_peer *peer,
31 gfp_t gfp)
32{
33 struct rxrpc_transport *trans;
34
35 _enter("");
36
37 trans = kzalloc(sizeof(struct rxrpc_transport), gfp);
38 if (trans) {
39 trans->local = local;
40 trans->peer = peer;
41 INIT_LIST_HEAD(&trans->link);
42 trans->bundles = RB_ROOT;
43 trans->client_conns = RB_ROOT;
44 trans->server_conns = RB_ROOT;
45 skb_queue_head_init(&trans->error_queue);
46 spin_lock_init(&trans->client_lock);
47 rwlock_init(&trans->conn_lock);
48 atomic_set(&trans->usage, 1);
49 trans->debug_id = atomic_inc_return(&rxrpc_debug_id);
50
51 if (peer->srx.transport.family == AF_INET) {
52 switch (peer->srx.transport_type) {
53 case SOCK_DGRAM:
54 INIT_WORK(&trans->error_handler,
55 rxrpc_UDP_error_handler);
56 break;
57 default:
58 BUG();
59 break;
60 }
61 } else {
62 BUG();
63 }
64 }
65
66 _leave(" = %p", trans);
67 return trans;
68}
69
70/*
71 * obtain a transport session for the nominated endpoints
72 */
73struct rxrpc_transport *rxrpc_get_transport(struct rxrpc_local *local,
74 struct rxrpc_peer *peer,
75 gfp_t gfp)
76{
77 struct rxrpc_transport *trans, *candidate;
78 const char *new = "old";
79 int usage;
80
81 _enter("{%u.%u.%u.%u+%hu},{%u.%u.%u.%u+%hu},",
82 NIPQUAD(local->srx.transport.sin.sin_addr),
83 ntohs(local->srx.transport.sin.sin_port),
84 NIPQUAD(peer->srx.transport.sin.sin_addr),
85 ntohs(peer->srx.transport.sin.sin_port));
86
87 /* search the transport list first */
88 read_lock_bh(&rxrpc_transport_lock);
89 list_for_each_entry(trans, &rxrpc_transports, link) {
90 if (trans->local == local && trans->peer == peer)
91 goto found_extant_transport;
92 }
93 read_unlock_bh(&rxrpc_transport_lock);
94
95 /* not yet present - create a candidate for a new record and then
96 * redo the search */
97 candidate = rxrpc_alloc_transport(local, peer, gfp);
98 if (!candidate) {
99 _leave(" = -ENOMEM");
100 return ERR_PTR(-ENOMEM);
101 }
102
103 write_lock_bh(&rxrpc_transport_lock);
104
105 list_for_each_entry(trans, &rxrpc_transports, link) {
106 if (trans->local == local && trans->peer == peer)
107 goto found_extant_second;
108 }
109
110 /* we can now add the new candidate to the list */
111 trans = candidate;
112 candidate = NULL;
113
114 rxrpc_get_local(trans->local);
115 atomic_inc(&trans->peer->usage);
116 list_add_tail(&trans->link, &rxrpc_transports);
117 write_unlock_bh(&rxrpc_transport_lock);
118 new = "new";
119
120success:
121 _net("TRANSPORT %s %d local %d -> peer %d",
122 new,
123 trans->debug_id,
124 trans->local->debug_id,
125 trans->peer->debug_id);
126
127 _leave(" = %p {u=%d}", trans, atomic_read(&trans->usage));
128 return trans;
129
130 /* we found the transport in the list immediately */
131found_extant_transport:
132 usage = atomic_inc_return(&trans->usage);
133 read_unlock_bh(&rxrpc_transport_lock);
134 goto success;
135
136 /* we found the transport on the second time through the list */
137found_extant_second:
138 usage = atomic_inc_return(&trans->usage);
139 write_unlock_bh(&rxrpc_transport_lock);
140 kfree(candidate);
141 goto success;
142}
143
144/*
145 * find the transport connecting two endpoints
146 */
147struct rxrpc_transport *rxrpc_find_transport(struct rxrpc_local *local,
148 struct rxrpc_peer *peer)
149{
150 struct rxrpc_transport *trans;
151
152 _enter("{%u.%u.%u.%u+%hu},{%u.%u.%u.%u+%hu},",
153 NIPQUAD(local->srx.transport.sin.sin_addr),
154 ntohs(local->srx.transport.sin.sin_port),
155 NIPQUAD(peer->srx.transport.sin.sin_addr),
156 ntohs(peer->srx.transport.sin.sin_port));
157
158 /* search the transport list */
159 read_lock_bh(&rxrpc_transport_lock);
160
161 list_for_each_entry(trans, &rxrpc_transports, link) {
162 if (trans->local == local && trans->peer == peer)
163 goto found_extant_transport;
164 }
165
166 read_unlock_bh(&rxrpc_transport_lock);
167 _leave(" = NULL");
168 return NULL;
169
170found_extant_transport:
171 atomic_inc(&trans->usage);
172 read_unlock_bh(&rxrpc_transport_lock);
173 _leave(" = %p", trans);
174 return trans;
175}
176
177/*
178 * release a transport session
179 */
180void rxrpc_put_transport(struct rxrpc_transport *trans)
181{
182 _enter("%p{u=%d}", trans, atomic_read(&trans->usage));
183
184 ASSERTCMP(atomic_read(&trans->usage), >, 0);
185
186 trans->put_time = xtime.tv_sec;
187 if (unlikely(atomic_dec_and_test(&trans->usage)))
188 _debug("zombie");
189 /* let the reaper determine the timeout to avoid a race with
190 * overextending the timeout if the reaper is running at the
191 * same time */
192 rxrpc_queue_delayed_work(&rxrpc_transport_reap, 0);
193 _leave("");
194}
195
196/*
197 * clean up a transport session
198 */
199static void rxrpc_cleanup_transport(struct rxrpc_transport *trans)
200{
201 _net("DESTROY TRANS %d", trans->debug_id);
202
203 rxrpc_purge_queue(&trans->error_queue);
204
205 rxrpc_put_local(trans->local);
206 rxrpc_put_peer(trans->peer);
207 kfree(trans);
208}
209
210/*
211 * reap dead transports that have passed their expiry date
212 */
213static void rxrpc_transport_reaper(struct work_struct *work)
214{
215 struct rxrpc_transport *trans, *_p;
216 unsigned long now, earliest, reap_time;
217
218 LIST_HEAD(graveyard);
219
220 _enter("");
221
222 now = xtime.tv_sec;
223 earliest = ULONG_MAX;
224
225 /* extract all the transports that have been dead too long */
226 write_lock_bh(&rxrpc_transport_lock);
227 list_for_each_entry_safe(trans, _p, &rxrpc_transports, link) {
228 _debug("reap TRANS %d { u=%d t=%ld }",
229 trans->debug_id, atomic_read(&trans->usage),
230 (long) now - (long) trans->put_time);
231
232 if (likely(atomic_read(&trans->usage) > 0))
233 continue;
234
235 reap_time = trans->put_time + rxrpc_transport_timeout;
236 if (reap_time <= now)
237 list_move_tail(&trans->link, &graveyard);
238 else if (reap_time < earliest)
239 earliest = reap_time;
240 }
241 write_unlock_bh(&rxrpc_transport_lock);
242
243 if (earliest != ULONG_MAX) {
244 _debug("reschedule reaper %ld", (long) earliest - now);
245 ASSERTCMP(earliest, >, now);
246 rxrpc_queue_delayed_work(&rxrpc_transport_reap,
247 (earliest - now) * HZ);
248 }
249
250 /* then destroy all those pulled out */
251 while (!list_empty(&graveyard)) {
252 trans = list_entry(graveyard.next, struct rxrpc_transport,
253 link);
254 list_del_init(&trans->link);
255
256 ASSERTCMP(atomic_read(&trans->usage), ==, 0);
257 rxrpc_cleanup_transport(trans);
258 }
259
260 _leave("");
261}
262
263/*
264 * preemptively destroy all the transport session records rather than waiting
265 * for them to time out
266 */
267void __exit rxrpc_destroy_all_transports(void)
268{
269 _enter("");
270
271 rxrpc_transport_timeout = 0;
272 cancel_delayed_work(&rxrpc_transport_reap);
273 rxrpc_queue_delayed_work(&rxrpc_transport_reap, 0);
274
275 _leave("");
276}
diff --git a/net/rxrpc/call.c b/net/rxrpc/call.c
deleted file mode 100644
index d07122b57e0d..000000000000
--- a/net/rxrpc/call.c
+++ /dev/null
@@ -1,2277 +0,0 @@
1/* call.c: Rx call routines
2 *
3 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/sched.h>
13#include <linux/slab.h>
14#include <linux/module.h>
15#include <rxrpc/rxrpc.h>
16#include <rxrpc/transport.h>
17#include <rxrpc/peer.h>
18#include <rxrpc/connection.h>
19#include <rxrpc/call.h>
20#include <rxrpc/message.h>
21#include "internal.h"
22
23__RXACCT_DECL(atomic_t rxrpc_call_count);
24__RXACCT_DECL(atomic_t rxrpc_message_count);
25
26LIST_HEAD(rxrpc_calls);
27DECLARE_RWSEM(rxrpc_calls_sem);
28
29unsigned rxrpc_call_rcv_timeout = HZ/3;
30static unsigned rxrpc_call_acks_timeout = HZ/3;
31static unsigned rxrpc_call_dfr_ack_timeout = HZ/20;
32static unsigned short rxrpc_call_max_resend = HZ/10;
33
34const char *rxrpc_call_states[] = {
35 "COMPLETE",
36 "ERROR",
37 "SRVR_RCV_OPID",
38 "SRVR_RCV_ARGS",
39 "SRVR_GOT_ARGS",
40 "SRVR_SND_REPLY",
41 "SRVR_RCV_FINAL_ACK",
42 "CLNT_SND_ARGS",
43 "CLNT_RCV_REPLY",
44 "CLNT_GOT_REPLY"
45};
46
47const char *rxrpc_call_error_states[] = {
48 "NO_ERROR",
49 "LOCAL_ABORT",
50 "PEER_ABORT",
51 "LOCAL_ERROR",
52 "REMOTE_ERROR"
53};
54
55const char *rxrpc_pkts[] = {
56 "?00",
57 "data", "ack", "busy", "abort", "ackall", "chall", "resp", "debug",
58 "?09", "?10", "?11", "?12", "?13", "?14", "?15"
59};
60
61static const char *rxrpc_acks[] = {
62 "---", "REQ", "DUP", "SEQ", "WIN", "MEM", "PNG", "PNR", "DLY", "IDL",
63 "-?-"
64};
65
66static const char _acktype[] = "NA-";
67
68static void rxrpc_call_receive_packet(struct rxrpc_call *call);
69static void rxrpc_call_receive_data_packet(struct rxrpc_call *call,
70 struct rxrpc_message *msg);
71static void rxrpc_call_receive_ack_packet(struct rxrpc_call *call,
72 struct rxrpc_message *msg);
73static void rxrpc_call_definitively_ACK(struct rxrpc_call *call,
74 rxrpc_seq_t higest);
75static void rxrpc_call_resend(struct rxrpc_call *call, rxrpc_seq_t highest);
76static int __rxrpc_call_read_data(struct rxrpc_call *call);
77
78static int rxrpc_call_record_ACK(struct rxrpc_call *call,
79 struct rxrpc_message *msg,
80 rxrpc_seq_t seq,
81 size_t count);
82
83static int rxrpc_call_flush(struct rxrpc_call *call);
84
85#define _state(call) \
86 _debug("[[[ state %s ]]]", rxrpc_call_states[call->app_call_state]);
87
88static void rxrpc_call_default_attn_func(struct rxrpc_call *call)
89{
90 wake_up(&call->waitq);
91}
92
93static void rxrpc_call_default_error_func(struct rxrpc_call *call)
94{
95 wake_up(&call->waitq);
96}
97
98static void rxrpc_call_default_aemap_func(struct rxrpc_call *call)
99{
100 switch (call->app_err_state) {
101 case RXRPC_ESTATE_LOCAL_ABORT:
102 call->app_abort_code = -call->app_errno;
103 case RXRPC_ESTATE_PEER_ABORT:
104 call->app_errno = -ECONNABORTED;
105 default:
106 break;
107 }
108}
109
110static void __rxrpc_call_acks_timeout(unsigned long _call)
111{
112 struct rxrpc_call *call = (struct rxrpc_call *) _call;
113
114 _debug("ACKS TIMEOUT %05lu", jiffies - call->cjif);
115
116 call->flags |= RXRPC_CALL_ACKS_TIMO;
117 rxrpc_krxiod_queue_call(call);
118}
119
120static void __rxrpc_call_rcv_timeout(unsigned long _call)
121{
122 struct rxrpc_call *call = (struct rxrpc_call *) _call;
123
124 _debug("RCV TIMEOUT %05lu", jiffies - call->cjif);
125
126 call->flags |= RXRPC_CALL_RCV_TIMO;
127 rxrpc_krxiod_queue_call(call);
128}
129
130static void __rxrpc_call_ackr_timeout(unsigned long _call)
131{
132 struct rxrpc_call *call = (struct rxrpc_call *) _call;
133
134 _debug("ACKR TIMEOUT %05lu",jiffies - call->cjif);
135
136 call->flags |= RXRPC_CALL_ACKR_TIMO;
137 rxrpc_krxiod_queue_call(call);
138}
139
140/*****************************************************************************/
141/*
142 * calculate a timeout based on an RTT value
143 */
144static inline unsigned long __rxrpc_rtt_based_timeout(struct rxrpc_call *call,
145 unsigned long val)
146{
147 unsigned long expiry = call->conn->peer->rtt / (1000000 / HZ);
148
149 expiry += 10;
150 if (expiry < HZ / 25)
151 expiry = HZ / 25;
152 if (expiry > HZ)
153 expiry = HZ;
154
155 _leave(" = %lu jiffies", expiry);
156 return jiffies + expiry;
157} /* end __rxrpc_rtt_based_timeout() */
158
159/*****************************************************************************/
160/*
161 * create a new call record
162 */
163static inline int __rxrpc_create_call(struct rxrpc_connection *conn,
164 struct rxrpc_call **_call)
165{
166 struct rxrpc_call *call;
167
168 _enter("%p", conn);
169
170 /* allocate and initialise a call record */
171 call = (struct rxrpc_call *) get_zeroed_page(GFP_KERNEL);
172 if (!call) {
173 _leave(" ENOMEM");
174 return -ENOMEM;
175 }
176
177 atomic_set(&call->usage, 1);
178
179 init_waitqueue_head(&call->waitq);
180 spin_lock_init(&call->lock);
181 INIT_LIST_HEAD(&call->link);
182 INIT_LIST_HEAD(&call->acks_pendq);
183 INIT_LIST_HEAD(&call->rcv_receiveq);
184 INIT_LIST_HEAD(&call->rcv_krxiodq_lk);
185 INIT_LIST_HEAD(&call->app_readyq);
186 INIT_LIST_HEAD(&call->app_unreadyq);
187 INIT_LIST_HEAD(&call->app_link);
188 INIT_LIST_HEAD(&call->app_attn_link);
189
190 init_timer(&call->acks_timeout);
191 call->acks_timeout.data = (unsigned long) call;
192 call->acks_timeout.function = __rxrpc_call_acks_timeout;
193
194 init_timer(&call->rcv_timeout);
195 call->rcv_timeout.data = (unsigned long) call;
196 call->rcv_timeout.function = __rxrpc_call_rcv_timeout;
197
198 init_timer(&call->ackr_dfr_timo);
199 call->ackr_dfr_timo.data = (unsigned long) call;
200 call->ackr_dfr_timo.function = __rxrpc_call_ackr_timeout;
201
202 call->conn = conn;
203 call->ackr_win_bot = 1;
204 call->ackr_win_top = call->ackr_win_bot + RXRPC_CALL_ACK_WINDOW_SIZE - 1;
205 call->ackr_prev_seq = 0;
206 call->app_mark = RXRPC_APP_MARK_EOF;
207 call->app_attn_func = rxrpc_call_default_attn_func;
208 call->app_error_func = rxrpc_call_default_error_func;
209 call->app_aemap_func = rxrpc_call_default_aemap_func;
210 call->app_scr_alloc = call->app_scratch;
211
212 call->cjif = jiffies;
213
214 _leave(" = 0 (%p)", call);
215
216 *_call = call;
217
218 return 0;
219} /* end __rxrpc_create_call() */
220
221/*****************************************************************************/
222/*
223 * create a new call record for outgoing calls
224 */
225int rxrpc_create_call(struct rxrpc_connection *conn,
226 rxrpc_call_attn_func_t attn,
227 rxrpc_call_error_func_t error,
228 rxrpc_call_aemap_func_t aemap,
229 struct rxrpc_call **_call)
230{
231 DECLARE_WAITQUEUE(myself, current);
232
233 struct rxrpc_call *call;
234 int ret, cix, loop;
235
236 _enter("%p", conn);
237
238 /* allocate and initialise a call record */
239 ret = __rxrpc_create_call(conn, &call);
240 if (ret < 0) {
241 _leave(" = %d", ret);
242 return ret;
243 }
244
245 call->app_call_state = RXRPC_CSTATE_CLNT_SND_ARGS;
246 if (attn)
247 call->app_attn_func = attn;
248 if (error)
249 call->app_error_func = error;
250 if (aemap)
251 call->app_aemap_func = aemap;
252
253 _state(call);
254
255 spin_lock(&conn->lock);
256 set_current_state(TASK_INTERRUPTIBLE);
257 add_wait_queue(&conn->chanwait, &myself);
258
259 try_again:
260 /* try to find an unused channel */
261 for (cix = 0; cix < 4; cix++)
262 if (!conn->channels[cix])
263 goto obtained_chan;
264
265 /* no free channels - wait for one to become available */
266 ret = -EINTR;
267 if (signal_pending(current))
268 goto error_unwait;
269
270 spin_unlock(&conn->lock);
271
272 schedule();
273 set_current_state(TASK_INTERRUPTIBLE);
274
275 spin_lock(&conn->lock);
276 goto try_again;
277
278 /* got a channel - now attach to the connection */
279 obtained_chan:
280 remove_wait_queue(&conn->chanwait, &myself);
281 set_current_state(TASK_RUNNING);
282
283 /* concoct a unique call number */
284 next_callid:
285 call->call_id = htonl(++conn->call_counter);
286 for (loop = 0; loop < 4; loop++)
287 if (conn->channels[loop] &&
288 conn->channels[loop]->call_id == call->call_id)
289 goto next_callid;
290
291 rxrpc_get_connection(conn);
292 conn->channels[cix] = call; /* assign _after_ done callid check loop */
293 do_gettimeofday(&conn->atime);
294 call->chan_ix = htonl(cix);
295
296 spin_unlock(&conn->lock);
297
298 down_write(&rxrpc_calls_sem);
299 list_add_tail(&call->call_link, &rxrpc_calls);
300 up_write(&rxrpc_calls_sem);
301
302 __RXACCT(atomic_inc(&rxrpc_call_count));
303 *_call = call;
304
305 _leave(" = 0 (call=%p cix=%u)", call, cix);
306 return 0;
307
308 error_unwait:
309 remove_wait_queue(&conn->chanwait, &myself);
310 set_current_state(TASK_RUNNING);
311 spin_unlock(&conn->lock);
312
313 free_page((unsigned long) call);
314 _leave(" = %d", ret);
315 return ret;
316} /* end rxrpc_create_call() */
317
318/*****************************************************************************/
319/*
320 * create a new call record for incoming calls
321 */
322int rxrpc_incoming_call(struct rxrpc_connection *conn,
323 struct rxrpc_message *msg,
324 struct rxrpc_call **_call)
325{
326 struct rxrpc_call *call;
327 unsigned cix;
328 int ret;
329
330 cix = ntohl(msg->hdr.cid) & RXRPC_CHANNELMASK;
331
332 _enter("%p,%u,%u", conn, ntohl(msg->hdr.callNumber), cix);
333
334 /* allocate and initialise a call record */
335 ret = __rxrpc_create_call(conn, &call);
336 if (ret < 0) {
337 _leave(" = %d", ret);
338 return ret;
339 }
340
341 call->pkt_rcv_count = 1;
342 call->app_call_state = RXRPC_CSTATE_SRVR_RCV_OPID;
343 call->app_mark = sizeof(uint32_t);
344
345 _state(call);
346
347 /* attach to the connection */
348 ret = -EBUSY;
349 call->chan_ix = htonl(cix);
350 call->call_id = msg->hdr.callNumber;
351
352 spin_lock(&conn->lock);
353
354 if (!conn->channels[cix] ||
355 conn->channels[cix]->app_call_state == RXRPC_CSTATE_COMPLETE ||
356 conn->channels[cix]->app_call_state == RXRPC_CSTATE_ERROR
357 ) {
358 conn->channels[cix] = call;
359 rxrpc_get_connection(conn);
360 ret = 0;
361 }
362
363 spin_unlock(&conn->lock);
364
365 if (ret < 0) {
366 free_page((unsigned long) call);
367 call = NULL;
368 }
369
370 if (ret == 0) {
371 down_write(&rxrpc_calls_sem);
372 list_add_tail(&call->call_link, &rxrpc_calls);
373 up_write(&rxrpc_calls_sem);
374 __RXACCT(atomic_inc(&rxrpc_call_count));
375 *_call = call;
376 }
377
378 _leave(" = %d [%p]", ret, call);
379 return ret;
380} /* end rxrpc_incoming_call() */
381
382/*****************************************************************************/
383/*
384 * free a call record
385 */
386void rxrpc_put_call(struct rxrpc_call *call)
387{
388 struct rxrpc_connection *conn = call->conn;
389 struct rxrpc_message *msg;
390
391 _enter("%p{u=%d}",call,atomic_read(&call->usage));
392
393 /* sanity check */
394 if (atomic_read(&call->usage) <= 0)
395 BUG();
396
397 /* to prevent a race, the decrement and the de-list must be effectively
398 * atomic */
399 spin_lock(&conn->lock);
400 if (likely(!atomic_dec_and_test(&call->usage))) {
401 spin_unlock(&conn->lock);
402 _leave("");
403 return;
404 }
405
406 if (conn->channels[ntohl(call->chan_ix)] == call)
407 conn->channels[ntohl(call->chan_ix)] = NULL;
408
409 spin_unlock(&conn->lock);
410
411 wake_up(&conn->chanwait);
412
413 rxrpc_put_connection(conn);
414
415 /* clear the timers and dequeue from krxiod */
416 del_timer_sync(&call->acks_timeout);
417 del_timer_sync(&call->rcv_timeout);
418 del_timer_sync(&call->ackr_dfr_timo);
419
420 rxrpc_krxiod_dequeue_call(call);
421
422 /* clean up the contents of the struct */
423 if (call->snd_nextmsg)
424 rxrpc_put_message(call->snd_nextmsg);
425
426 if (call->snd_ping)
427 rxrpc_put_message(call->snd_ping);
428
429 while (!list_empty(&call->acks_pendq)) {
430 msg = list_entry(call->acks_pendq.next,
431 struct rxrpc_message, link);
432 list_del(&msg->link);
433 rxrpc_put_message(msg);
434 }
435
436 while (!list_empty(&call->rcv_receiveq)) {
437 msg = list_entry(call->rcv_receiveq.next,
438 struct rxrpc_message, link);
439 list_del(&msg->link);
440 rxrpc_put_message(msg);
441 }
442
443 while (!list_empty(&call->app_readyq)) {
444 msg = list_entry(call->app_readyq.next,
445 struct rxrpc_message, link);
446 list_del(&msg->link);
447 rxrpc_put_message(msg);
448 }
449
450 while (!list_empty(&call->app_unreadyq)) {
451 msg = list_entry(call->app_unreadyq.next,
452 struct rxrpc_message, link);
453 list_del(&msg->link);
454 rxrpc_put_message(msg);
455 }
456
457 module_put(call->owner);
458
459 down_write(&rxrpc_calls_sem);
460 list_del(&call->call_link);
461 up_write(&rxrpc_calls_sem);
462
463 __RXACCT(atomic_dec(&rxrpc_call_count));
464 free_page((unsigned long) call);
465
466 _leave(" [destroyed]");
467} /* end rxrpc_put_call() */
468
469/*****************************************************************************/
470/*
471 * actually generate a normal ACK
472 */
473static inline int __rxrpc_call_gen_normal_ACK(struct rxrpc_call *call,
474 rxrpc_seq_t seq)
475{
476 struct rxrpc_message *msg;
477 struct kvec diov[3];
478 __be32 aux[4];
479 int delta, ret;
480
481 /* ACKs default to DELAY */
482 if (!call->ackr.reason)
483 call->ackr.reason = RXRPC_ACK_DELAY;
484
485 _proto("Rx %05lu Sending ACK { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }",
486 jiffies - call->cjif,
487 ntohs(call->ackr.maxSkew),
488 ntohl(call->ackr.firstPacket),
489 ntohl(call->ackr.previousPacket),
490 ntohl(call->ackr.serial),
491 rxrpc_acks[call->ackr.reason],
492 call->ackr.nAcks);
493
494 aux[0] = htonl(call->conn->peer->if_mtu); /* interface MTU */
495 aux[1] = htonl(1444); /* max MTU */
496 aux[2] = htonl(16); /* rwind */
497 aux[3] = htonl(4); /* max packets */
498
499 diov[0].iov_len = sizeof(struct rxrpc_ackpacket);
500 diov[0].iov_base = &call->ackr;
501 diov[1].iov_len = call->ackr_pend_cnt + 3;
502 diov[1].iov_base = call->ackr_array;
503 diov[2].iov_len = sizeof(aux);
504 diov[2].iov_base = &aux;
505
506 /* build and send the message */
507 ret = rxrpc_conn_newmsg(call->conn,call, RXRPC_PACKET_TYPE_ACK,
508 3, diov, GFP_KERNEL, &msg);
509 if (ret < 0)
510 goto out;
511
512 msg->seq = seq;
513 msg->hdr.seq = htonl(seq);
514 msg->hdr.flags |= RXRPC_SLOW_START_OK;
515
516 ret = rxrpc_conn_sendmsg(call->conn, msg);
517 rxrpc_put_message(msg);
518 if (ret < 0)
519 goto out;
520 call->pkt_snd_count++;
521
522 /* count how many actual ACKs there were at the front */
523 for (delta = 0; delta < call->ackr_pend_cnt; delta++)
524 if (call->ackr_array[delta] != RXRPC_ACK_TYPE_ACK)
525 break;
526
527 call->ackr_pend_cnt -= delta; /* all ACK'd to this point */
528
529 /* crank the ACK window around */
530 if (delta == 0) {
531 /* un-ACK'd window */
532 }
533 else if (delta < RXRPC_CALL_ACK_WINDOW_SIZE) {
534 /* partially ACK'd window
535 * - shuffle down to avoid losing out-of-sequence packets
536 */
537 call->ackr_win_bot += delta;
538 call->ackr_win_top += delta;
539
540 memmove(&call->ackr_array[0],
541 &call->ackr_array[delta],
542 call->ackr_pend_cnt);
543
544 memset(&call->ackr_array[call->ackr_pend_cnt],
545 RXRPC_ACK_TYPE_NACK,
546 sizeof(call->ackr_array) - call->ackr_pend_cnt);
547 }
548 else {
549 /* fully ACK'd window
550 * - just clear the whole thing
551 */
552 memset(&call->ackr_array,
553 RXRPC_ACK_TYPE_NACK,
554 sizeof(call->ackr_array));
555 }
556
557 /* clear this ACK */
558 memset(&call->ackr, 0, sizeof(call->ackr));
559
560 out:
561 if (!call->app_call_state)
562 printk("___ STATE 0 ___\n");
563 return ret;
564} /* end __rxrpc_call_gen_normal_ACK() */
565
566/*****************************************************************************/
567/*
568 * note the reception of a packet in the call's ACK records and generate an
569 * appropriate ACK packet if necessary
570 * - returns 0 if packet should be processed, 1 if packet should be ignored
571 * and -ve on an error
572 */
573static int rxrpc_call_generate_ACK(struct rxrpc_call *call,
574 struct rxrpc_header *hdr,
575 struct rxrpc_ackpacket *ack)
576{
577 struct rxrpc_message *msg;
578 rxrpc_seq_t seq;
579 unsigned offset;
580 int ret = 0, err;
581 u8 special_ACK, do_ACK, force;
582
583 _enter("%p,%p { seq=%d tp=%d fl=%02x }",
584 call, hdr, ntohl(hdr->seq), hdr->type, hdr->flags);
585
586 seq = ntohl(hdr->seq);
587 offset = seq - call->ackr_win_bot;
588 do_ACK = RXRPC_ACK_DELAY;
589 special_ACK = 0;
590 force = (seq == 1);
591
592 if (call->ackr_high_seq < seq)
593 call->ackr_high_seq = seq;
594
595 /* deal with generation of obvious special ACKs first */
596 if (ack && ack->reason == RXRPC_ACK_PING) {
597 special_ACK = RXRPC_ACK_PING_RESPONSE;
598 ret = 1;
599 goto gen_ACK;
600 }
601
602 if (seq < call->ackr_win_bot) {
603 special_ACK = RXRPC_ACK_DUPLICATE;
604 ret = 1;
605 goto gen_ACK;
606 }
607
608 if (seq >= call->ackr_win_top) {
609 special_ACK = RXRPC_ACK_EXCEEDS_WINDOW;
610 ret = 1;
611 goto gen_ACK;
612 }
613
614 if (call->ackr_array[offset] != RXRPC_ACK_TYPE_NACK) {
615 special_ACK = RXRPC_ACK_DUPLICATE;
616 ret = 1;
617 goto gen_ACK;
618 }
619
620 /* okay... it's a normal data packet inside the ACK window */
621 call->ackr_array[offset] = RXRPC_ACK_TYPE_ACK;
622
623 if (offset < call->ackr_pend_cnt) {
624 }
625 else if (offset > call->ackr_pend_cnt) {
626 do_ACK = RXRPC_ACK_OUT_OF_SEQUENCE;
627 call->ackr_pend_cnt = offset;
628 goto gen_ACK;
629 }
630
631 if (hdr->flags & RXRPC_REQUEST_ACK) {
632 do_ACK = RXRPC_ACK_REQUESTED;
633 }
634
635 /* generate an ACK on the final packet of a reply just received */
636 if (hdr->flags & RXRPC_LAST_PACKET) {
637 if (call->conn->out_clientflag)
638 force = 1;
639 }
640 else if (!(hdr->flags & RXRPC_MORE_PACKETS)) {
641 do_ACK = RXRPC_ACK_REQUESTED;
642 }
643
644 /* re-ACK packets previously received out-of-order */
645 for (offset++; offset < RXRPC_CALL_ACK_WINDOW_SIZE; offset++)
646 if (call->ackr_array[offset] != RXRPC_ACK_TYPE_ACK)
647 break;
648
649 call->ackr_pend_cnt = offset;
650
651 /* generate an ACK if we fill up the window */
652 if (call->ackr_pend_cnt >= RXRPC_CALL_ACK_WINDOW_SIZE)
653 force = 1;
654
655 gen_ACK:
656 _debug("%05lu ACKs pend=%u norm=%s special=%s%s",
657 jiffies - call->cjif,
658 call->ackr_pend_cnt,
659 rxrpc_acks[do_ACK],
660 rxrpc_acks[special_ACK],
661 force ? " immediate" :
662 do_ACK == RXRPC_ACK_REQUESTED ? " merge-req" :
663 hdr->flags & RXRPC_LAST_PACKET ? " finalise" :
664 " defer"
665 );
666
667 /* send any pending normal ACKs if need be */
668 if (call->ackr_pend_cnt > 0) {
669 /* fill out the appropriate form */
670 call->ackr.bufferSpace = htons(RXRPC_CALL_ACK_WINDOW_SIZE);
671 call->ackr.maxSkew = htons(min(call->ackr_high_seq - seq,
672 65535U));
673 call->ackr.firstPacket = htonl(call->ackr_win_bot);
674 call->ackr.previousPacket = call->ackr_prev_seq;
675 call->ackr.serial = hdr->serial;
676 call->ackr.nAcks = call->ackr_pend_cnt;
677
678 if (do_ACK == RXRPC_ACK_REQUESTED)
679 call->ackr.reason = do_ACK;
680
681 /* generate the ACK immediately if necessary */
682 if (special_ACK || force) {
683 err = __rxrpc_call_gen_normal_ACK(
684 call, do_ACK == RXRPC_ACK_DELAY ? 0 : seq);
685 if (err < 0) {
686 ret = err;
687 goto out;
688 }
689 }
690 }
691
692 if (call->ackr.reason == RXRPC_ACK_REQUESTED)
693 call->ackr_dfr_seq = seq;
694
695 /* start the ACK timer if not running if there are any pending deferred
696 * ACKs */
697 if (call->ackr_pend_cnt > 0 &&
698 call->ackr.reason != RXRPC_ACK_REQUESTED &&
699 !timer_pending(&call->ackr_dfr_timo)
700 ) {
701 unsigned long timo;
702
703 timo = rxrpc_call_dfr_ack_timeout + jiffies;
704
705 _debug("START ACKR TIMER for cj=%lu", timo - call->cjif);
706
707 spin_lock(&call->lock);
708 mod_timer(&call->ackr_dfr_timo, timo);
709 spin_unlock(&call->lock);
710 }
711 else if ((call->ackr_pend_cnt == 0 ||
712 call->ackr.reason == RXRPC_ACK_REQUESTED) &&
713 timer_pending(&call->ackr_dfr_timo)
714 ) {
715 /* stop timer if no pending ACKs */
716 _debug("CLEAR ACKR TIMER");
717 del_timer_sync(&call->ackr_dfr_timo);
718 }
719
720 /* send a special ACK if one is required */
721 if (special_ACK) {
722 struct rxrpc_ackpacket ack;
723 struct kvec diov[2];
724 uint8_t acks[1] = { RXRPC_ACK_TYPE_ACK };
725
726 /* fill out the appropriate form */
727 ack.bufferSpace = htons(RXRPC_CALL_ACK_WINDOW_SIZE);
728 ack.maxSkew = htons(min(call->ackr_high_seq - seq,
729 65535U));
730 ack.firstPacket = htonl(call->ackr_win_bot);
731 ack.previousPacket = call->ackr_prev_seq;
732 ack.serial = hdr->serial;
733 ack.reason = special_ACK;
734 ack.nAcks = 0;
735
736 _proto("Rx Sending s-ACK"
737 " { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }",
738 ntohs(ack.maxSkew),
739 ntohl(ack.firstPacket),
740 ntohl(ack.previousPacket),
741 ntohl(ack.serial),
742 rxrpc_acks[ack.reason],
743 ack.nAcks);
744
745 diov[0].iov_len = sizeof(struct rxrpc_ackpacket);
746 diov[0].iov_base = &ack;
747 diov[1].iov_len = sizeof(acks);
748 diov[1].iov_base = acks;
749
750 /* build and send the message */
751 err = rxrpc_conn_newmsg(call->conn,call, RXRPC_PACKET_TYPE_ACK,
752 hdr->seq ? 2 : 1, diov,
753 GFP_KERNEL,
754 &msg);
755 if (err < 0) {
756 ret = err;
757 goto out;
758 }
759
760 msg->seq = seq;
761 msg->hdr.seq = htonl(seq);
762 msg->hdr.flags |= RXRPC_SLOW_START_OK;
763
764 err = rxrpc_conn_sendmsg(call->conn, msg);
765 rxrpc_put_message(msg);
766 if (err < 0) {
767 ret = err;
768 goto out;
769 }
770 call->pkt_snd_count++;
771 }
772
773 out:
774 if (hdr->seq)
775 call->ackr_prev_seq = hdr->seq;
776
777 _leave(" = %d", ret);
778 return ret;
779} /* end rxrpc_call_generate_ACK() */
780
781/*****************************************************************************/
782/*
783 * handle work to be done on a call
784 * - includes packet reception and timeout processing
785 */
786void rxrpc_call_do_stuff(struct rxrpc_call *call)
787{
788 _enter("%p{flags=%lx}", call, call->flags);
789
790 /* handle packet reception */
791 if (call->flags & RXRPC_CALL_RCV_PKT) {
792 _debug("- receive packet");
793 call->flags &= ~RXRPC_CALL_RCV_PKT;
794 rxrpc_call_receive_packet(call);
795 }
796
797 /* handle overdue ACKs */
798 if (call->flags & RXRPC_CALL_ACKS_TIMO) {
799 _debug("- overdue ACK timeout");
800 call->flags &= ~RXRPC_CALL_ACKS_TIMO;
801 rxrpc_call_resend(call, call->snd_seq_count);
802 }
803
804 /* handle lack of reception */
805 if (call->flags & RXRPC_CALL_RCV_TIMO) {
806 _debug("- reception timeout");
807 call->flags &= ~RXRPC_CALL_RCV_TIMO;
808 rxrpc_call_abort(call, -EIO);
809 }
810
811 /* handle deferred ACKs */
812 if (call->flags & RXRPC_CALL_ACKR_TIMO ||
813 (call->ackr.nAcks > 0 && call->ackr.reason == RXRPC_ACK_REQUESTED)
814 ) {
815 _debug("- deferred ACK timeout: cj=%05lu r=%s n=%u",
816 jiffies - call->cjif,
817 rxrpc_acks[call->ackr.reason],
818 call->ackr.nAcks);
819
820 call->flags &= ~RXRPC_CALL_ACKR_TIMO;
821
822 if (call->ackr.nAcks > 0 &&
823 call->app_call_state != RXRPC_CSTATE_ERROR) {
824 /* generate ACK */
825 __rxrpc_call_gen_normal_ACK(call, call->ackr_dfr_seq);
826 call->ackr_dfr_seq = 0;
827 }
828 }
829
830 _leave("");
831
832} /* end rxrpc_call_do_stuff() */
833
834/*****************************************************************************/
835/*
836 * send an abort message at call or connection level
837 * - must be called with call->lock held
838 * - the supplied error code is sent as the packet data
839 */
840static int __rxrpc_call_abort(struct rxrpc_call *call, int errno)
841{
842 struct rxrpc_connection *conn = call->conn;
843 struct rxrpc_message *msg;
844 struct kvec diov[1];
845 int ret;
846 __be32 _error;
847
848 _enter("%p{%08x},%p{%d},%d",
849 conn, ntohl(conn->conn_id), call, ntohl(call->call_id), errno);
850
851 /* if this call is already aborted, then just wake up any waiters */
852 if (call->app_call_state == RXRPC_CSTATE_ERROR) {
853 spin_unlock(&call->lock);
854 call->app_error_func(call);
855 _leave(" = 0");
856 return 0;
857 }
858
859 rxrpc_get_call(call);
860
861 /* change the state _with_ the lock still held */
862 call->app_call_state = RXRPC_CSTATE_ERROR;
863 call->app_err_state = RXRPC_ESTATE_LOCAL_ABORT;
864 call->app_errno = errno;
865 call->app_mark = RXRPC_APP_MARK_EOF;
866 call->app_read_buf = NULL;
867 call->app_async_read = 0;
868
869 _state(call);
870
871 /* ask the app to translate the error code */
872 call->app_aemap_func(call);
873
874 spin_unlock(&call->lock);
875
876 /* flush any outstanding ACKs */
877 del_timer_sync(&call->acks_timeout);
878 del_timer_sync(&call->rcv_timeout);
879 del_timer_sync(&call->ackr_dfr_timo);
880
881 if (rxrpc_call_is_ack_pending(call))
882 __rxrpc_call_gen_normal_ACK(call, 0);
883
884 /* send the abort packet only if we actually traded some other
885 * packets */
886 ret = 0;
887 if (call->pkt_snd_count || call->pkt_rcv_count) {
888 /* actually send the abort */
889 _proto("Rx Sending Call ABORT { data=%d }",
890 call->app_abort_code);
891
892 _error = htonl(call->app_abort_code);
893
894 diov[0].iov_len = sizeof(_error);
895 diov[0].iov_base = &_error;
896
897 ret = rxrpc_conn_newmsg(conn, call, RXRPC_PACKET_TYPE_ABORT,
898 1, diov, GFP_KERNEL, &msg);
899 if (ret == 0) {
900 ret = rxrpc_conn_sendmsg(conn, msg);
901 rxrpc_put_message(msg);
902 }
903 }
904
905 /* tell the app layer to let go */
906 call->app_error_func(call);
907
908 rxrpc_put_call(call);
909
910 _leave(" = %d", ret);
911 return ret;
912} /* end __rxrpc_call_abort() */
913
914/*****************************************************************************/
915/*
916 * send an abort message at call or connection level
917 * - the supplied error code is sent as the packet data
918 */
919int rxrpc_call_abort(struct rxrpc_call *call, int error)
920{
921 spin_lock(&call->lock);
922
923 return __rxrpc_call_abort(call, error);
924
925} /* end rxrpc_call_abort() */
926
927/*****************************************************************************/
928/*
929 * process packets waiting for this call
930 */
931static void rxrpc_call_receive_packet(struct rxrpc_call *call)
932{
933 struct rxrpc_message *msg;
934 struct list_head *_p;
935
936 _enter("%p", call);
937
938 rxrpc_get_call(call); /* must not go away too soon if aborted by
939 * app-layer */
940
941 while (!list_empty(&call->rcv_receiveq)) {
942 /* try to get next packet */
943 _p = NULL;
944 spin_lock(&call->lock);
945 if (!list_empty(&call->rcv_receiveq)) {
946 _p = call->rcv_receiveq.next;
947 list_del_init(_p);
948 }
949 spin_unlock(&call->lock);
950
951 if (!_p)
952 break;
953
954 msg = list_entry(_p, struct rxrpc_message, link);
955
956 _proto("Rx %05lu Received %s packet (%%%u,#%u,%c%c%c%c%c)",
957 jiffies - call->cjif,
958 rxrpc_pkts[msg->hdr.type],
959 ntohl(msg->hdr.serial),
960 msg->seq,
961 msg->hdr.flags & RXRPC_JUMBO_PACKET ? 'j' : '-',
962 msg->hdr.flags & RXRPC_MORE_PACKETS ? 'm' : '-',
963 msg->hdr.flags & RXRPC_LAST_PACKET ? 'l' : '-',
964 msg->hdr.flags & RXRPC_REQUEST_ACK ? 'r' : '-',
965 msg->hdr.flags & RXRPC_CLIENT_INITIATED ? 'C' : 'S'
966 );
967
968 switch (msg->hdr.type) {
969 /* deal with data packets */
970 case RXRPC_PACKET_TYPE_DATA:
971 /* ACK the packet if necessary */
972 switch (rxrpc_call_generate_ACK(call, &msg->hdr,
973 NULL)) {
974 case 0: /* useful packet */
975 rxrpc_call_receive_data_packet(call, msg);
976 break;
977 case 1: /* duplicate or out-of-window packet */
978 break;
979 default:
980 rxrpc_put_message(msg);
981 goto out;
982 }
983 break;
984
985 /* deal with ACK packets */
986 case RXRPC_PACKET_TYPE_ACK:
987 rxrpc_call_receive_ack_packet(call, msg);
988 break;
989
990 /* deal with abort packets */
991 case RXRPC_PACKET_TYPE_ABORT: {
992 __be32 _dbuf, *dp;
993
994 dp = skb_header_pointer(msg->pkt, msg->offset,
995 sizeof(_dbuf), &_dbuf);
996 if (dp == NULL)
997 printk("Rx Received short ABORT packet\n");
998
999 _proto("Rx Received Call ABORT { data=%d }",
1000 (dp ? ntohl(*dp) : 0));
1001
1002 spin_lock(&call->lock);
1003 call->app_call_state = RXRPC_CSTATE_ERROR;
1004 call->app_err_state = RXRPC_ESTATE_PEER_ABORT;
1005 call->app_abort_code = (dp ? ntohl(*dp) : 0);
1006 call->app_errno = -ECONNABORTED;
1007 call->app_mark = RXRPC_APP_MARK_EOF;
1008 call->app_read_buf = NULL;
1009 call->app_async_read = 0;
1010
1011 /* ask the app to translate the error code */
1012 call->app_aemap_func(call);
1013 _state(call);
1014 spin_unlock(&call->lock);
1015 call->app_error_func(call);
1016 break;
1017 }
1018 default:
1019 /* deal with other packet types */
1020 _proto("Rx Unsupported packet type %u (#%u)",
1021 msg->hdr.type, msg->seq);
1022 break;
1023 }
1024
1025 rxrpc_put_message(msg);
1026 }
1027
1028 out:
1029 rxrpc_put_call(call);
1030 _leave("");
1031} /* end rxrpc_call_receive_packet() */
1032
1033/*****************************************************************************/
1034/*
1035 * process next data packet
1036 * - as the next data packet arrives:
1037 * - it is queued on app_readyq _if_ it is the next one expected
1038 * (app_ready_seq+1)
1039 * - it is queued on app_unreadyq _if_ it is not the next one expected
1040 * - if a packet placed on app_readyq completely fills a hole leading up to
1041 * the first packet on app_unreadyq, then packets now in sequence are
1042 * tranferred to app_readyq
1043 * - the application layer can only see packets on app_readyq
1044 * (app_ready_qty bytes)
1045 * - the application layer is prodded every time a new packet arrives
1046 */
1047static void rxrpc_call_receive_data_packet(struct rxrpc_call *call,
1048 struct rxrpc_message *msg)
1049{
1050 const struct rxrpc_operation *optbl, *op;
1051 struct rxrpc_message *pmsg;
1052 struct list_head *_p;
1053 int ret, lo, hi, rmtimo;
1054 __be32 opid;
1055
1056 _enter("%p{%u},%p{%u}", call, ntohl(call->call_id), msg, msg->seq);
1057
1058 rxrpc_get_message(msg);
1059
1060 /* add to the unready queue if we'd have to create a hole in the ready
1061 * queue otherwise */
1062 if (msg->seq != call->app_ready_seq + 1) {
1063 _debug("Call add packet %d to unreadyq", msg->seq);
1064
1065 /* insert in seq order */
1066 list_for_each(_p, &call->app_unreadyq) {
1067 pmsg = list_entry(_p, struct rxrpc_message, link);
1068 if (pmsg->seq > msg->seq)
1069 break;
1070 }
1071
1072 list_add_tail(&msg->link, _p);
1073
1074 _leave(" [unreadyq]");
1075 return;
1076 }
1077
1078 /* next in sequence - simply append into the call's ready queue */
1079 _debug("Call add packet %d to readyq (+%Zd => %Zd bytes)",
1080 msg->seq, msg->dsize, call->app_ready_qty);
1081
1082 spin_lock(&call->lock);
1083 call->app_ready_seq = msg->seq;
1084 call->app_ready_qty += msg->dsize;
1085 list_add_tail(&msg->link, &call->app_readyq);
1086
1087 /* move unready packets to the readyq if we got rid of a hole */
1088 while (!list_empty(&call->app_unreadyq)) {
1089 pmsg = list_entry(call->app_unreadyq.next,
1090 struct rxrpc_message, link);
1091
1092 if (pmsg->seq != call->app_ready_seq + 1)
1093 break;
1094
1095 /* next in sequence - just move list-to-list */
1096 _debug("Call transfer packet %d to readyq (+%Zd => %Zd bytes)",
1097 pmsg->seq, pmsg->dsize, call->app_ready_qty);
1098
1099 call->app_ready_seq = pmsg->seq;
1100 call->app_ready_qty += pmsg->dsize;
1101 list_move_tail(&pmsg->link, &call->app_readyq);
1102 }
1103
1104 /* see if we've got the last packet yet */
1105 if (!list_empty(&call->app_readyq)) {
1106 pmsg = list_entry(call->app_readyq.prev,
1107 struct rxrpc_message, link);
1108 if (pmsg->hdr.flags & RXRPC_LAST_PACKET) {
1109 call->app_last_rcv = 1;
1110 _debug("Last packet on readyq");
1111 }
1112 }
1113
1114 switch (call->app_call_state) {
1115 /* do nothing if call already aborted */
1116 case RXRPC_CSTATE_ERROR:
1117 spin_unlock(&call->lock);
1118 _leave(" [error]");
1119 return;
1120
1121 /* extract the operation ID from an incoming call if that's not
1122 * yet been done */
1123 case RXRPC_CSTATE_SRVR_RCV_OPID:
1124 spin_unlock(&call->lock);
1125
1126 /* handle as yet insufficient data for the operation ID */
1127 if (call->app_ready_qty < 4) {
1128 if (call->app_last_rcv)
1129 /* trouble - last packet seen */
1130 rxrpc_call_abort(call, -EINVAL);
1131
1132 _leave("");
1133 return;
1134 }
1135
1136 /* pull the operation ID out of the buffer */
1137 ret = rxrpc_call_read_data(call, &opid, sizeof(opid), 0);
1138 if (ret < 0) {
1139 printk("Unexpected error from read-data: %d\n", ret);
1140 if (call->app_call_state != RXRPC_CSTATE_ERROR)
1141 rxrpc_call_abort(call, ret);
1142 _leave("");
1143 return;
1144 }
1145 call->app_opcode = ntohl(opid);
1146
1147 /* locate the operation in the available ops table */
1148 optbl = call->conn->service->ops_begin;
1149 lo = 0;
1150 hi = call->conn->service->ops_end - optbl;
1151
1152 while (lo < hi) {
1153 int mid = (hi + lo) / 2;
1154 op = &optbl[mid];
1155 if (call->app_opcode == op->id)
1156 goto found_op;
1157 if (call->app_opcode > op->id)
1158 lo = mid + 1;
1159 else
1160 hi = mid;
1161 }
1162
1163 /* search failed */
1164 kproto("Rx Client requested operation %d from %s service",
1165 call->app_opcode, call->conn->service->name);
1166 rxrpc_call_abort(call, -EINVAL);
1167 _leave(" [inval]");
1168 return;
1169
1170 found_op:
1171 _proto("Rx Client requested operation %s from %s service",
1172 op->name, call->conn->service->name);
1173
1174 /* we're now waiting for the argument block (unless the call
1175 * was aborted) */
1176 spin_lock(&call->lock);
1177 if (call->app_call_state == RXRPC_CSTATE_SRVR_RCV_OPID ||
1178 call->app_call_state == RXRPC_CSTATE_SRVR_SND_REPLY) {
1179 if (!call->app_last_rcv)
1180 call->app_call_state =
1181 RXRPC_CSTATE_SRVR_RCV_ARGS;
1182 else if (call->app_ready_qty > 0)
1183 call->app_call_state =
1184 RXRPC_CSTATE_SRVR_GOT_ARGS;
1185 else
1186 call->app_call_state =
1187 RXRPC_CSTATE_SRVR_SND_REPLY;
1188 call->app_mark = op->asize;
1189 call->app_user = op->user;
1190 }
1191 spin_unlock(&call->lock);
1192
1193 _state(call);
1194 break;
1195
1196 case RXRPC_CSTATE_SRVR_RCV_ARGS:
1197 /* change state if just received last packet of arg block */
1198 if (call->app_last_rcv)
1199 call->app_call_state = RXRPC_CSTATE_SRVR_GOT_ARGS;
1200 spin_unlock(&call->lock);
1201
1202 _state(call);
1203 break;
1204
1205 case RXRPC_CSTATE_CLNT_RCV_REPLY:
1206 /* change state if just received last packet of reply block */
1207 rmtimo = 0;
1208 if (call->app_last_rcv) {
1209 call->app_call_state = RXRPC_CSTATE_CLNT_GOT_REPLY;
1210 rmtimo = 1;
1211 }
1212 spin_unlock(&call->lock);
1213
1214 if (rmtimo) {
1215 del_timer_sync(&call->acks_timeout);
1216 del_timer_sync(&call->rcv_timeout);
1217 del_timer_sync(&call->ackr_dfr_timo);
1218 }
1219
1220 _state(call);
1221 break;
1222
1223 default:
1224 /* deal with data reception in an unexpected state */
1225 printk("Unexpected state [[[ %u ]]]\n", call->app_call_state);
1226 __rxrpc_call_abort(call, -EBADMSG);
1227 _leave("");
1228 return;
1229 }
1230
1231 if (call->app_call_state == RXRPC_CSTATE_CLNT_RCV_REPLY &&
1232 call->app_last_rcv)
1233 BUG();
1234
1235 /* otherwise just invoke the data function whenever we can satisfy its desire for more
1236 * data
1237 */
1238 _proto("Rx Received Op Data: st=%u qty=%Zu mk=%Zu%s",
1239 call->app_call_state, call->app_ready_qty, call->app_mark,
1240 call->app_last_rcv ? " last-rcvd" : "");
1241
1242 spin_lock(&call->lock);
1243
1244 ret = __rxrpc_call_read_data(call);
1245 switch (ret) {
1246 case 0:
1247 spin_unlock(&call->lock);
1248 call->app_attn_func(call);
1249 break;
1250 case -EAGAIN:
1251 spin_unlock(&call->lock);
1252 break;
1253 case -ECONNABORTED:
1254 spin_unlock(&call->lock);
1255 break;
1256 default:
1257 __rxrpc_call_abort(call, ret);
1258 break;
1259 }
1260
1261 _state(call);
1262
1263 _leave("");
1264
1265} /* end rxrpc_call_receive_data_packet() */
1266
1267/*****************************************************************************/
1268/*
1269 * received an ACK packet
1270 */
1271static void rxrpc_call_receive_ack_packet(struct rxrpc_call *call,
1272 struct rxrpc_message *msg)
1273{
1274 struct rxrpc_ackpacket _ack, *ap;
1275 rxrpc_serial_net_t serial;
1276 rxrpc_seq_t seq;
1277 int ret;
1278
1279 _enter("%p{%u},%p{%u}", call, ntohl(call->call_id), msg, msg->seq);
1280
1281 /* extract the basic ACK record */
1282 ap = skb_header_pointer(msg->pkt, msg->offset, sizeof(_ack), &_ack);
1283 if (ap == NULL) {
1284 printk("Rx Received short ACK packet\n");
1285 return;
1286 }
1287 msg->offset += sizeof(_ack);
1288
1289 serial = ap->serial;
1290 seq = ntohl(ap->firstPacket);
1291
1292 _proto("Rx Received ACK %%%d { b=%hu m=%hu f=%u p=%u s=%u r=%s n=%u }",
1293 ntohl(msg->hdr.serial),
1294 ntohs(ap->bufferSpace),
1295 ntohs(ap->maxSkew),
1296 seq,
1297 ntohl(ap->previousPacket),
1298 ntohl(serial),
1299 rxrpc_acks[ap->reason],
1300 call->ackr.nAcks
1301 );
1302
1303 /* check the other side isn't ACK'ing a sequence number I haven't sent
1304 * yet */
1305 if (ap->nAcks > 0 &&
1306 (seq > call->snd_seq_count ||
1307 seq + ap->nAcks - 1 > call->snd_seq_count)) {
1308 printk("Received ACK (#%u-#%u) for unsent packet\n",
1309 seq, seq + ap->nAcks - 1);
1310 rxrpc_call_abort(call, -EINVAL);
1311 _leave("");
1312 return;
1313 }
1314
1315 /* deal with RTT calculation */
1316 if (serial) {
1317 struct rxrpc_message *rttmsg;
1318
1319 /* find the prompting packet */
1320 spin_lock(&call->lock);
1321 if (call->snd_ping && call->snd_ping->hdr.serial == serial) {
1322 /* it was a ping packet */
1323 rttmsg = call->snd_ping;
1324 call->snd_ping = NULL;
1325 spin_unlock(&call->lock);
1326
1327 if (rttmsg) {
1328 rttmsg->rttdone = 1;
1329 rxrpc_peer_calculate_rtt(call->conn->peer,
1330 rttmsg, msg);
1331 rxrpc_put_message(rttmsg);
1332 }
1333 }
1334 else {
1335 struct list_head *_p;
1336
1337 /* it ought to be a data packet - look in the pending
1338 * ACK list */
1339 list_for_each(_p, &call->acks_pendq) {
1340 rttmsg = list_entry(_p, struct rxrpc_message,
1341 link);
1342 if (rttmsg->hdr.serial == serial) {
1343 if (rttmsg->rttdone)
1344 /* never do RTT twice without
1345 * resending */
1346 break;
1347
1348 rttmsg->rttdone = 1;
1349 rxrpc_peer_calculate_rtt(
1350 call->conn->peer, rttmsg, msg);
1351 break;
1352 }
1353 }
1354 spin_unlock(&call->lock);
1355 }
1356 }
1357
1358 switch (ap->reason) {
1359 /* deal with negative/positive acknowledgement of data
1360 * packets */
1361 case RXRPC_ACK_REQUESTED:
1362 case RXRPC_ACK_DELAY:
1363 case RXRPC_ACK_IDLE:
1364 rxrpc_call_definitively_ACK(call, seq - 1);
1365
1366 case RXRPC_ACK_DUPLICATE:
1367 case RXRPC_ACK_OUT_OF_SEQUENCE:
1368 case RXRPC_ACK_EXCEEDS_WINDOW:
1369 call->snd_resend_cnt = 0;
1370 ret = rxrpc_call_record_ACK(call, msg, seq, ap->nAcks);
1371 if (ret < 0)
1372 rxrpc_call_abort(call, ret);
1373 break;
1374
1375 /* respond to ping packets immediately */
1376 case RXRPC_ACK_PING:
1377 rxrpc_call_generate_ACK(call, &msg->hdr, ap);
1378 break;
1379
1380 /* only record RTT on ping response packets */
1381 case RXRPC_ACK_PING_RESPONSE:
1382 if (call->snd_ping) {
1383 struct rxrpc_message *rttmsg;
1384
1385 /* only do RTT stuff if the response matches the
1386 * retained ping */
1387 rttmsg = NULL;
1388 spin_lock(&call->lock);
1389 if (call->snd_ping &&
1390 call->snd_ping->hdr.serial == ap->serial) {
1391 rttmsg = call->snd_ping;
1392 call->snd_ping = NULL;
1393 }
1394 spin_unlock(&call->lock);
1395
1396 if (rttmsg) {
1397 rttmsg->rttdone = 1;
1398 rxrpc_peer_calculate_rtt(call->conn->peer,
1399 rttmsg, msg);
1400 rxrpc_put_message(rttmsg);
1401 }
1402 }
1403 break;
1404
1405 default:
1406 printk("Unsupported ACK reason %u\n", ap->reason);
1407 break;
1408 }
1409
1410 _leave("");
1411} /* end rxrpc_call_receive_ack_packet() */
1412
1413/*****************************************************************************/
1414/*
1415 * record definitive ACKs for all messages up to and including the one with the
1416 * 'highest' seq
1417 */
1418static void rxrpc_call_definitively_ACK(struct rxrpc_call *call,
1419 rxrpc_seq_t highest)
1420{
1421 struct rxrpc_message *msg;
1422 int now_complete;
1423
1424 _enter("%p{ads=%u},%u", call, call->acks_dftv_seq, highest);
1425
1426 while (call->acks_dftv_seq < highest) {
1427 call->acks_dftv_seq++;
1428
1429 _proto("Definitive ACK on packet #%u", call->acks_dftv_seq);
1430
1431 /* discard those at front of queue until message with highest
1432 * ACK is found */
1433 spin_lock(&call->lock);
1434 msg = NULL;
1435 if (!list_empty(&call->acks_pendq)) {
1436 msg = list_entry(call->acks_pendq.next,
1437 struct rxrpc_message, link);
1438 list_del_init(&msg->link); /* dequeue */
1439 if (msg->state == RXRPC_MSG_SENT)
1440 call->acks_pend_cnt--;
1441 }
1442 spin_unlock(&call->lock);
1443
1444 /* insanity check */
1445 if (!msg)
1446 panic("%s(): acks_pendq unexpectedly empty\n",
1447 __FUNCTION__);
1448
1449 if (msg->seq != call->acks_dftv_seq)
1450 panic("%s(): Packet #%u expected at front of acks_pendq"
1451 " (#%u found)\n",
1452 __FUNCTION__, call->acks_dftv_seq, msg->seq);
1453
1454 /* discard the message */
1455 msg->state = RXRPC_MSG_DONE;
1456 rxrpc_put_message(msg);
1457 }
1458
1459 /* if all sent packets are definitively ACK'd then prod any sleepers just in case */
1460 now_complete = 0;
1461 spin_lock(&call->lock);
1462 if (call->acks_dftv_seq == call->snd_seq_count) {
1463 if (call->app_call_state != RXRPC_CSTATE_COMPLETE) {
1464 call->app_call_state = RXRPC_CSTATE_COMPLETE;
1465 _state(call);
1466 now_complete = 1;
1467 }
1468 }
1469 spin_unlock(&call->lock);
1470
1471 if (now_complete) {
1472 del_timer_sync(&call->acks_timeout);
1473 del_timer_sync(&call->rcv_timeout);
1474 del_timer_sync(&call->ackr_dfr_timo);
1475 call->app_attn_func(call);
1476 }
1477
1478 _leave("");
1479} /* end rxrpc_call_definitively_ACK() */
1480
1481/*****************************************************************************/
1482/*
1483 * record the specified amount of ACKs/NAKs
1484 */
1485static int rxrpc_call_record_ACK(struct rxrpc_call *call,
1486 struct rxrpc_message *msg,
1487 rxrpc_seq_t seq,
1488 size_t count)
1489{
1490 struct rxrpc_message *dmsg;
1491 struct list_head *_p;
1492 rxrpc_seq_t highest;
1493 unsigned ix;
1494 size_t chunk;
1495 char resend, now_complete;
1496 u8 acks[16];
1497
1498 _enter("%p{apc=%u ads=%u},%p,%u,%Zu",
1499 call, call->acks_pend_cnt, call->acks_dftv_seq,
1500 msg, seq, count);
1501
1502 /* handle re-ACK'ing of definitively ACK'd packets (may be out-of-order
1503 * ACKs) */
1504 if (seq <= call->acks_dftv_seq) {
1505 unsigned delta = call->acks_dftv_seq - seq;
1506
1507 if (count <= delta) {
1508 _leave(" = 0 [all definitively ACK'd]");
1509 return 0;
1510 }
1511
1512 seq += delta;
1513 count -= delta;
1514 msg->offset += delta;
1515 }
1516
1517 highest = seq + count - 1;
1518 resend = 0;
1519 while (count > 0) {
1520 /* extract up to 16 ACK slots at a time */
1521 chunk = min(count, sizeof(acks));
1522 count -= chunk;
1523
1524 memset(acks, 2, sizeof(acks));
1525
1526 if (skb_copy_bits(msg->pkt, msg->offset, &acks, chunk) < 0) {
1527 printk("Rx Received short ACK packet\n");
1528 _leave(" = -EINVAL");
1529 return -EINVAL;
1530 }
1531 msg->offset += chunk;
1532
1533 /* check that the ACK set is valid */
1534 for (ix = 0; ix < chunk; ix++) {
1535 switch (acks[ix]) {
1536 case RXRPC_ACK_TYPE_ACK:
1537 break;
1538 case RXRPC_ACK_TYPE_NACK:
1539 resend = 1;
1540 break;
1541 default:
1542 printk("Rx Received unsupported ACK state"
1543 " %u\n", acks[ix]);
1544 _leave(" = -EINVAL");
1545 return -EINVAL;
1546 }
1547 }
1548
1549 _proto("Rx ACK of packets #%u-#%u "
1550 "[%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c] (pend=%u)",
1551 seq, (unsigned) (seq + chunk - 1),
1552 _acktype[acks[0x0]],
1553 _acktype[acks[0x1]],
1554 _acktype[acks[0x2]],
1555 _acktype[acks[0x3]],
1556 _acktype[acks[0x4]],
1557 _acktype[acks[0x5]],
1558 _acktype[acks[0x6]],
1559 _acktype[acks[0x7]],
1560 _acktype[acks[0x8]],
1561 _acktype[acks[0x9]],
1562 _acktype[acks[0xA]],
1563 _acktype[acks[0xB]],
1564 _acktype[acks[0xC]],
1565 _acktype[acks[0xD]],
1566 _acktype[acks[0xE]],
1567 _acktype[acks[0xF]],
1568 call->acks_pend_cnt
1569 );
1570
1571 /* mark the packets in the ACK queue as being provisionally
1572 * ACK'd */
1573 ix = 0;
1574 spin_lock(&call->lock);
1575
1576 /* find the first packet ACK'd/NAK'd here */
1577 list_for_each(_p, &call->acks_pendq) {
1578 dmsg = list_entry(_p, struct rxrpc_message, link);
1579 if (dmsg->seq == seq)
1580 goto found_first;
1581 _debug("- %u: skipping #%u", ix, dmsg->seq);
1582 }
1583 goto bad_queue;
1584
1585 found_first:
1586 do {
1587 _debug("- %u: processing #%u (%c) apc=%u",
1588 ix, dmsg->seq, _acktype[acks[ix]],
1589 call->acks_pend_cnt);
1590
1591 if (acks[ix] == RXRPC_ACK_TYPE_ACK) {
1592 if (dmsg->state == RXRPC_MSG_SENT)
1593 call->acks_pend_cnt--;
1594 dmsg->state = RXRPC_MSG_ACKED;
1595 }
1596 else {
1597 if (dmsg->state == RXRPC_MSG_ACKED)
1598 call->acks_pend_cnt++;
1599 dmsg->state = RXRPC_MSG_SENT;
1600 }
1601 ix++;
1602 seq++;
1603
1604 _p = dmsg->link.next;
1605 dmsg = list_entry(_p, struct rxrpc_message, link);
1606 } while(ix < chunk &&
1607 _p != &call->acks_pendq &&
1608 dmsg->seq == seq);
1609
1610 if (ix < chunk)
1611 goto bad_queue;
1612
1613 spin_unlock(&call->lock);
1614 }
1615
1616 if (resend)
1617 rxrpc_call_resend(call, highest);
1618
1619 /* if all packets are provisionally ACK'd, then wake up anyone who's
1620 * waiting for that */
1621 now_complete = 0;
1622 spin_lock(&call->lock);
1623 if (call->acks_pend_cnt == 0) {
1624 if (call->app_call_state == RXRPC_CSTATE_SRVR_RCV_FINAL_ACK) {
1625 call->app_call_state = RXRPC_CSTATE_COMPLETE;
1626 _state(call);
1627 }
1628 now_complete = 1;
1629 }
1630 spin_unlock(&call->lock);
1631
1632 if (now_complete) {
1633 _debug("- wake up waiters");
1634 del_timer_sync(&call->acks_timeout);
1635 del_timer_sync(&call->rcv_timeout);
1636 del_timer_sync(&call->ackr_dfr_timo);
1637 call->app_attn_func(call);
1638 }
1639
1640 _leave(" = 0 (apc=%u)", call->acks_pend_cnt);
1641 return 0;
1642
1643 bad_queue:
1644 panic("%s(): acks_pendq in bad state (packet #%u absent)\n",
1645 __FUNCTION__, seq);
1646
1647} /* end rxrpc_call_record_ACK() */
1648
1649/*****************************************************************************/
1650/*
1651 * transfer data from the ready packet queue to the asynchronous read buffer
1652 * - since this func is the only one going to look at packets queued on
1653 * app_readyq, we don't need a lock to modify or access them, only to modify
1654 * the queue pointers
1655 * - called with call->lock held
1656 * - the buffer must be in kernel space
1657 * - returns:
1658 * 0 if buffer filled
1659 * -EAGAIN if buffer not filled and more data to come
1660 * -EBADMSG if last packet received and insufficient data left
1661 * -ECONNABORTED if the call has in an error state
1662 */
1663static int __rxrpc_call_read_data(struct rxrpc_call *call)
1664{
1665 struct rxrpc_message *msg;
1666 size_t qty;
1667 int ret;
1668
1669 _enter("%p{as=%d buf=%p qty=%Zu/%Zu}",
1670 call,
1671 call->app_async_read, call->app_read_buf,
1672 call->app_ready_qty, call->app_mark);
1673
1674 /* check the state */
1675 switch (call->app_call_state) {
1676 case RXRPC_CSTATE_SRVR_RCV_ARGS:
1677 case RXRPC_CSTATE_CLNT_RCV_REPLY:
1678 if (call->app_last_rcv) {
1679 printk("%s(%p,%p,%Zd):"
1680 " Inconsistent call state (%s, last pkt)",
1681 __FUNCTION__,
1682 call, call->app_read_buf, call->app_mark,
1683 rxrpc_call_states[call->app_call_state]);
1684 BUG();
1685 }
1686 break;
1687
1688 case RXRPC_CSTATE_SRVR_RCV_OPID:
1689 case RXRPC_CSTATE_SRVR_GOT_ARGS:
1690 case RXRPC_CSTATE_CLNT_GOT_REPLY:
1691 break;
1692
1693 case RXRPC_CSTATE_SRVR_SND_REPLY:
1694 if (!call->app_last_rcv) {
1695 printk("%s(%p,%p,%Zd):"
1696 " Inconsistent call state (%s, not last pkt)",
1697 __FUNCTION__,
1698 call, call->app_read_buf, call->app_mark,
1699 rxrpc_call_states[call->app_call_state]);
1700 BUG();
1701 }
1702 _debug("Trying to read data from call in SND_REPLY state");
1703 break;
1704
1705 case RXRPC_CSTATE_ERROR:
1706 _leave(" = -ECONNABORTED");
1707 return -ECONNABORTED;
1708
1709 default:
1710 printk("reading in unexpected state [[[ %u ]]]\n",
1711 call->app_call_state);
1712 BUG();
1713 }
1714
1715 /* handle the case of not having an async buffer */
1716 if (!call->app_async_read) {
1717 if (call->app_mark == RXRPC_APP_MARK_EOF) {
1718 ret = call->app_last_rcv ? 0 : -EAGAIN;
1719 }
1720 else {
1721 if (call->app_mark >= call->app_ready_qty) {
1722 call->app_mark = RXRPC_APP_MARK_EOF;
1723 ret = 0;
1724 }
1725 else {
1726 ret = call->app_last_rcv ? -EBADMSG : -EAGAIN;
1727 }
1728 }
1729
1730 _leave(" = %d [no buf]", ret);
1731 return 0;
1732 }
1733
1734 while (!list_empty(&call->app_readyq) && call->app_mark > 0) {
1735 msg = list_entry(call->app_readyq.next,
1736 struct rxrpc_message, link);
1737
1738 /* drag as much data as we need out of this packet */
1739 qty = min(call->app_mark, msg->dsize);
1740
1741 _debug("reading %Zu from skb=%p off=%lu",
1742 qty, msg->pkt, msg->offset);
1743
1744 if (call->app_read_buf)
1745 if (skb_copy_bits(msg->pkt, msg->offset,
1746 call->app_read_buf, qty) < 0)
1747 panic("%s: Failed to copy data from packet:"
1748 " (%p,%p,%Zd)",
1749 __FUNCTION__,
1750 call, call->app_read_buf, qty);
1751
1752 /* if that packet is now empty, discard it */
1753 call->app_ready_qty -= qty;
1754 msg->dsize -= qty;
1755
1756 if (msg->dsize == 0) {
1757 list_del_init(&msg->link);
1758 rxrpc_put_message(msg);
1759 }
1760 else {
1761 msg->offset += qty;
1762 }
1763
1764 call->app_mark -= qty;
1765 if (call->app_read_buf)
1766 call->app_read_buf += qty;
1767 }
1768
1769 if (call->app_mark == 0) {
1770 call->app_async_read = 0;
1771 call->app_mark = RXRPC_APP_MARK_EOF;
1772 call->app_read_buf = NULL;
1773
1774 /* adjust the state if used up all packets */
1775 if (list_empty(&call->app_readyq) && call->app_last_rcv) {
1776 switch (call->app_call_state) {
1777 case RXRPC_CSTATE_SRVR_RCV_OPID:
1778 call->app_call_state = RXRPC_CSTATE_SRVR_SND_REPLY;
1779 call->app_mark = RXRPC_APP_MARK_EOF;
1780 _state(call);
1781 del_timer_sync(&call->rcv_timeout);
1782 break;
1783 case RXRPC_CSTATE_SRVR_GOT_ARGS:
1784 call->app_call_state = RXRPC_CSTATE_SRVR_SND_REPLY;
1785 _state(call);
1786 del_timer_sync(&call->rcv_timeout);
1787 break;
1788 default:
1789 call->app_call_state = RXRPC_CSTATE_COMPLETE;
1790 _state(call);
1791 del_timer_sync(&call->acks_timeout);
1792 del_timer_sync(&call->ackr_dfr_timo);
1793 del_timer_sync(&call->rcv_timeout);
1794 break;
1795 }
1796 }
1797
1798 _leave(" = 0");
1799 return 0;
1800 }
1801
1802 if (call->app_last_rcv) {
1803 _debug("Insufficient data (%Zu/%Zu)",
1804 call->app_ready_qty, call->app_mark);
1805 call->app_async_read = 0;
1806 call->app_mark = RXRPC_APP_MARK_EOF;
1807 call->app_read_buf = NULL;
1808
1809 _leave(" = -EBADMSG");
1810 return -EBADMSG;
1811 }
1812
1813 _leave(" = -EAGAIN");
1814 return -EAGAIN;
1815} /* end __rxrpc_call_read_data() */
1816
1817/*****************************************************************************/
1818/*
1819 * attempt to read the specified amount of data from the call's ready queue
1820 * into the buffer provided
1821 * - since this func is the only one going to look at packets queued on
1822 * app_readyq, we don't need a lock to modify or access them, only to modify
1823 * the queue pointers
1824 * - if the buffer pointer is NULL, then data is merely drained, not copied
1825 * - if flags&RXRPC_CALL_READ_BLOCK, then the function will wait until there is
1826 * enough data or an error will be generated
1827 * - note that the caller must have added the calling task to the call's wait
1828 * queue beforehand
1829 * - if flags&RXRPC_CALL_READ_ALL, then an error will be generated if this
1830 * function doesn't read all available data
1831 */
1832int rxrpc_call_read_data(struct rxrpc_call *call,
1833 void *buffer, size_t size, int flags)
1834{
1835 int ret;
1836
1837 _enter("%p{arq=%Zu},%p,%Zd,%x",
1838 call, call->app_ready_qty, buffer, size, flags);
1839
1840 spin_lock(&call->lock);
1841
1842 if (unlikely(!!call->app_read_buf)) {
1843 spin_unlock(&call->lock);
1844 _leave(" = -EBUSY");
1845 return -EBUSY;
1846 }
1847
1848 call->app_mark = size;
1849 call->app_read_buf = buffer;
1850 call->app_async_read = 1;
1851 call->app_read_count++;
1852
1853 /* read as much data as possible */
1854 ret = __rxrpc_call_read_data(call);
1855 switch (ret) {
1856 case 0:
1857 if (flags & RXRPC_CALL_READ_ALL &&
1858 (!call->app_last_rcv || call->app_ready_qty > 0)) {
1859 _leave(" = -EBADMSG");
1860 __rxrpc_call_abort(call, -EBADMSG);
1861 return -EBADMSG;
1862 }
1863
1864 spin_unlock(&call->lock);
1865 call->app_attn_func(call);
1866 _leave(" = 0");
1867 return ret;
1868
1869 case -ECONNABORTED:
1870 spin_unlock(&call->lock);
1871 _leave(" = %d [aborted]", ret);
1872 return ret;
1873
1874 default:
1875 __rxrpc_call_abort(call, ret);
1876 _leave(" = %d", ret);
1877 return ret;
1878
1879 case -EAGAIN:
1880 spin_unlock(&call->lock);
1881
1882 if (!(flags & RXRPC_CALL_READ_BLOCK)) {
1883 _leave(" = -EAGAIN");
1884 return -EAGAIN;
1885 }
1886
1887 /* wait for the data to arrive */
1888 _debug("blocking for data arrival");
1889
1890 for (;;) {
1891 set_current_state(TASK_INTERRUPTIBLE);
1892 if (!call->app_async_read || signal_pending(current))
1893 break;
1894 schedule();
1895 }
1896 set_current_state(TASK_RUNNING);
1897
1898 if (signal_pending(current)) {
1899 _leave(" = -EINTR");
1900 return -EINTR;
1901 }
1902
1903 if (call->app_call_state == RXRPC_CSTATE_ERROR) {
1904 _leave(" = -ECONNABORTED");
1905 return -ECONNABORTED;
1906 }
1907
1908 _leave(" = 0");
1909 return 0;
1910 }
1911
1912} /* end rxrpc_call_read_data() */
1913
1914/*****************************************************************************/
1915/*
1916 * write data to a call
1917 * - the data may not be sent immediately if it doesn't fill a buffer
1918 * - if we can't queue all the data for buffering now, siov[] will have been
1919 * adjusted to take account of what has been sent
1920 */
1921int rxrpc_call_write_data(struct rxrpc_call *call,
1922 size_t sioc,
1923 struct kvec *siov,
1924 u8 rxhdr_flags,
1925 gfp_t alloc_flags,
1926 int dup_data,
1927 size_t *size_sent)
1928{
1929 struct rxrpc_message *msg;
1930 struct kvec *sptr;
1931 size_t space, size, chunk, tmp;
1932 char *buf;
1933 int ret;
1934
1935 _enter("%p,%Zu,%p,%02x,%x,%d,%p",
1936 call, sioc, siov, rxhdr_flags, alloc_flags, dup_data,
1937 size_sent);
1938
1939 *size_sent = 0;
1940 size = 0;
1941 ret = -EINVAL;
1942
1943 /* can't send more if we've sent last packet from this end */
1944 switch (call->app_call_state) {
1945 case RXRPC_CSTATE_SRVR_SND_REPLY:
1946 case RXRPC_CSTATE_CLNT_SND_ARGS:
1947 break;
1948 case RXRPC_CSTATE_ERROR:
1949 ret = call->app_errno;
1950 default:
1951 goto out;
1952 }
1953
1954 /* calculate how much data we've been given */
1955 sptr = siov;
1956 for (; sioc > 0; sptr++, sioc--) {
1957 if (!sptr->iov_len)
1958 continue;
1959
1960 if (!sptr->iov_base)
1961 goto out;
1962
1963 size += sptr->iov_len;
1964 }
1965
1966 _debug("- size=%Zu mtu=%Zu", size, call->conn->mtu_size);
1967
1968 do {
1969 /* make sure there's a message under construction */
1970 if (!call->snd_nextmsg) {
1971 /* no - allocate a message with no data yet attached */
1972 ret = rxrpc_conn_newmsg(call->conn, call,
1973 RXRPC_PACKET_TYPE_DATA,
1974 0, NULL, alloc_flags,
1975 &call->snd_nextmsg);
1976 if (ret < 0)
1977 goto out;
1978 _debug("- allocated new message [ds=%Zu]",
1979 call->snd_nextmsg->dsize);
1980 }
1981
1982 msg = call->snd_nextmsg;
1983 msg->hdr.flags |= rxhdr_flags;
1984
1985 /* deal with zero-length terminal packet */
1986 if (size == 0) {
1987 if (rxhdr_flags & RXRPC_LAST_PACKET) {
1988 ret = rxrpc_call_flush(call);
1989 if (ret < 0)
1990 goto out;
1991 }
1992 break;
1993 }
1994
1995 /* work out how much space current packet has available */
1996 space = call->conn->mtu_size - msg->dsize;
1997 chunk = min(space, size);
1998
1999 _debug("- [before] space=%Zu chunk=%Zu", space, chunk);
2000
2001 while (!siov->iov_len)
2002 siov++;
2003
2004 /* if we are going to have to duplicate the data then coalesce
2005 * it too */
2006 if (dup_data) {
2007 /* don't allocate more that 1 page at a time */
2008 if (chunk > PAGE_SIZE)
2009 chunk = PAGE_SIZE;
2010
2011 /* allocate a data buffer and attach to the message */
2012 buf = kmalloc(chunk, alloc_flags);
2013 if (unlikely(!buf)) {
2014 if (msg->dsize ==
2015 sizeof(struct rxrpc_header)) {
2016 /* discard an empty msg and wind back
2017 * the seq counter */
2018 rxrpc_put_message(msg);
2019 call->snd_nextmsg = NULL;
2020 call->snd_seq_count--;
2021 }
2022
2023 ret = -ENOMEM;
2024 goto out;
2025 }
2026
2027 tmp = msg->dcount++;
2028 set_bit(tmp, &msg->dfree);
2029 msg->data[tmp].iov_base = buf;
2030 msg->data[tmp].iov_len = chunk;
2031 msg->dsize += chunk;
2032 *size_sent += chunk;
2033 size -= chunk;
2034
2035 /* load the buffer with data */
2036 while (chunk > 0) {
2037 tmp = min(chunk, siov->iov_len);
2038 memcpy(buf, siov->iov_base, tmp);
2039 buf += tmp;
2040 siov->iov_base += tmp;
2041 siov->iov_len -= tmp;
2042 if (!siov->iov_len)
2043 siov++;
2044 chunk -= tmp;
2045 }
2046 }
2047 else {
2048 /* we want to attach the supplied buffers directly */
2049 while (chunk > 0 &&
2050 msg->dcount < RXRPC_MSG_MAX_IOCS) {
2051 tmp = msg->dcount++;
2052 msg->data[tmp].iov_base = siov->iov_base;
2053 msg->data[tmp].iov_len = siov->iov_len;
2054 msg->dsize += siov->iov_len;
2055 *size_sent += siov->iov_len;
2056 size -= siov->iov_len;
2057 chunk -= siov->iov_len;
2058 siov++;
2059 }
2060 }
2061
2062 _debug("- [loaded] chunk=%Zu size=%Zu", chunk, size);
2063
2064 /* dispatch the message when full, final or requesting ACK */
2065 if (msg->dsize >= call->conn->mtu_size || rxhdr_flags) {
2066 ret = rxrpc_call_flush(call);
2067 if (ret < 0)
2068 goto out;
2069 }
2070
2071 } while(size > 0);
2072
2073 ret = 0;
2074 out:
2075 _leave(" = %d (%Zd queued, %Zd rem)", ret, *size_sent, size);
2076 return ret;
2077
2078} /* end rxrpc_call_write_data() */
2079
2080/*****************************************************************************/
2081/*
2082 * flush outstanding packets to the network
2083 */
2084static int rxrpc_call_flush(struct rxrpc_call *call)
2085{
2086 struct rxrpc_message *msg;
2087 int ret = 0;
2088
2089 _enter("%p", call);
2090
2091 rxrpc_get_call(call);
2092
2093 /* if there's a packet under construction, then dispatch it now */
2094 if (call->snd_nextmsg) {
2095 msg = call->snd_nextmsg;
2096 call->snd_nextmsg = NULL;
2097
2098 if (msg->hdr.flags & RXRPC_LAST_PACKET) {
2099 msg->hdr.flags &= ~RXRPC_MORE_PACKETS;
2100 if (call->app_call_state != RXRPC_CSTATE_CLNT_SND_ARGS)
2101 msg->hdr.flags |= RXRPC_REQUEST_ACK;
2102 }
2103 else {
2104 msg->hdr.flags |= RXRPC_MORE_PACKETS;
2105 }
2106
2107 _proto("Sending DATA message { ds=%Zu dc=%u df=%02lu }",
2108 msg->dsize, msg->dcount, msg->dfree);
2109
2110 /* queue and adjust call state */
2111 spin_lock(&call->lock);
2112 list_add_tail(&msg->link, &call->acks_pendq);
2113
2114 /* decide what to do depending on current state and if this is
2115 * the last packet */
2116 ret = -EINVAL;
2117 switch (call->app_call_state) {
2118 case RXRPC_CSTATE_SRVR_SND_REPLY:
2119 if (msg->hdr.flags & RXRPC_LAST_PACKET) {
2120 call->app_call_state =
2121 RXRPC_CSTATE_SRVR_RCV_FINAL_ACK;
2122 _state(call);
2123 }
2124 break;
2125
2126 case RXRPC_CSTATE_CLNT_SND_ARGS:
2127 if (msg->hdr.flags & RXRPC_LAST_PACKET) {
2128 call->app_call_state =
2129 RXRPC_CSTATE_CLNT_RCV_REPLY;
2130 _state(call);
2131 }
2132 break;
2133
2134 case RXRPC_CSTATE_ERROR:
2135 ret = call->app_errno;
2136 default:
2137 spin_unlock(&call->lock);
2138 goto out;
2139 }
2140
2141 call->acks_pend_cnt++;
2142
2143 mod_timer(&call->acks_timeout,
2144 __rxrpc_rtt_based_timeout(call,
2145 rxrpc_call_acks_timeout));
2146
2147 spin_unlock(&call->lock);
2148
2149 ret = rxrpc_conn_sendmsg(call->conn, msg);
2150 if (ret == 0)
2151 call->pkt_snd_count++;
2152 }
2153
2154 out:
2155 rxrpc_put_call(call);
2156
2157 _leave(" = %d", ret);
2158 return ret;
2159
2160} /* end rxrpc_call_flush() */
2161
2162/*****************************************************************************/
2163/*
2164 * resend NAK'd or unacknowledged packets up to the highest one specified
2165 */
2166static void rxrpc_call_resend(struct rxrpc_call *call, rxrpc_seq_t highest)
2167{
2168 struct rxrpc_message *msg;
2169 struct list_head *_p;
2170 rxrpc_seq_t seq = 0;
2171
2172 _enter("%p,%u", call, highest);
2173
2174 _proto("Rx Resend required");
2175
2176 /* handle too many resends */
2177 if (call->snd_resend_cnt >= rxrpc_call_max_resend) {
2178 _debug("Aborting due to too many resends (rcv=%d)",
2179 call->pkt_rcv_count);
2180 rxrpc_call_abort(call,
2181 call->pkt_rcv_count > 0 ? -EIO : -ETIMEDOUT);
2182 _leave("");
2183 return;
2184 }
2185
2186 spin_lock(&call->lock);
2187 call->snd_resend_cnt++;
2188 for (;;) {
2189 /* determine which the next packet we might need to ACK is */
2190 if (seq <= call->acks_dftv_seq)
2191 seq = call->acks_dftv_seq;
2192 seq++;
2193
2194 if (seq > highest)
2195 break;
2196
2197 /* look for the packet in the pending-ACK queue */
2198 list_for_each(_p, &call->acks_pendq) {
2199 msg = list_entry(_p, struct rxrpc_message, link);
2200 if (msg->seq == seq)
2201 goto found_msg;
2202 }
2203
2204 panic("%s(%p,%d):"
2205 " Inconsistent pending-ACK queue (ds=%u sc=%u sq=%u)\n",
2206 __FUNCTION__, call, highest,
2207 call->acks_dftv_seq, call->snd_seq_count, seq);
2208
2209 found_msg:
2210 if (msg->state != RXRPC_MSG_SENT)
2211 continue; /* only un-ACK'd packets */
2212
2213 rxrpc_get_message(msg);
2214 spin_unlock(&call->lock);
2215
2216 /* send each message again (and ignore any errors we might
2217 * incur) */
2218 _proto("Resending DATA message { ds=%Zu dc=%u df=%02lu }",
2219 msg->dsize, msg->dcount, msg->dfree);
2220
2221 if (rxrpc_conn_sendmsg(call->conn, msg) == 0)
2222 call->pkt_snd_count++;
2223
2224 rxrpc_put_message(msg);
2225
2226 spin_lock(&call->lock);
2227 }
2228
2229 /* reset the timeout */
2230 mod_timer(&call->acks_timeout,
2231 __rxrpc_rtt_based_timeout(call, rxrpc_call_acks_timeout));
2232
2233 spin_unlock(&call->lock);
2234
2235 _leave("");
2236} /* end rxrpc_call_resend() */
2237
2238/*****************************************************************************/
2239/*
2240 * handle an ICMP error being applied to a call
2241 */
2242void rxrpc_call_handle_error(struct rxrpc_call *call, int local, int errno)
2243{
2244 _enter("%p{%u},%d", call, ntohl(call->call_id), errno);
2245
2246 /* if this call is already aborted, then just wake up any waiters */
2247 if (call->app_call_state == RXRPC_CSTATE_ERROR) {
2248 call->app_error_func(call);
2249 }
2250 else {
2251 /* tell the app layer what happened */
2252 spin_lock(&call->lock);
2253 call->app_call_state = RXRPC_CSTATE_ERROR;
2254 _state(call);
2255 if (local)
2256 call->app_err_state = RXRPC_ESTATE_LOCAL_ERROR;
2257 else
2258 call->app_err_state = RXRPC_ESTATE_REMOTE_ERROR;
2259 call->app_errno = errno;
2260 call->app_mark = RXRPC_APP_MARK_EOF;
2261 call->app_read_buf = NULL;
2262 call->app_async_read = 0;
2263
2264 /* map the error */
2265 call->app_aemap_func(call);
2266
2267 del_timer_sync(&call->acks_timeout);
2268 del_timer_sync(&call->rcv_timeout);
2269 del_timer_sync(&call->ackr_dfr_timo);
2270
2271 spin_unlock(&call->lock);
2272
2273 call->app_error_func(call);
2274 }
2275
2276 _leave("");
2277} /* end rxrpc_call_handle_error() */
diff --git a/net/rxrpc/connection.c b/net/rxrpc/connection.c
deleted file mode 100644
index a7c929a9fdca..000000000000
--- a/net/rxrpc/connection.c
+++ /dev/null
@@ -1,777 +0,0 @@
1/* connection.c: Rx connection routines
2 *
3 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/sched.h>
13#include <linux/slab.h>
14#include <linux/module.h>
15#include <rxrpc/rxrpc.h>
16#include <rxrpc/transport.h>
17#include <rxrpc/peer.h>
18#include <rxrpc/connection.h>
19#include <rxrpc/call.h>
20#include <rxrpc/message.h>
21#include <linux/udp.h>
22#include <linux/ip.h>
23#include <net/sock.h>
24#include <asm/uaccess.h>
25#include "internal.h"
26
27__RXACCT_DECL(atomic_t rxrpc_connection_count);
28
29LIST_HEAD(rxrpc_conns);
30DECLARE_RWSEM(rxrpc_conns_sem);
31unsigned long rxrpc_conn_timeout = 60 * 60;
32
33static void rxrpc_conn_do_timeout(struct rxrpc_connection *conn);
34
35static void __rxrpc_conn_timeout(rxrpc_timer_t *timer)
36{
37 struct rxrpc_connection *conn =
38 list_entry(timer, struct rxrpc_connection, timeout);
39
40 _debug("Rx CONN TIMEOUT [%p{u=%d}]", conn, atomic_read(&conn->usage));
41
42 rxrpc_conn_do_timeout(conn);
43}
44
45static const struct rxrpc_timer_ops rxrpc_conn_timer_ops = {
46 .timed_out = __rxrpc_conn_timeout,
47};
48
49/*****************************************************************************/
50/*
51 * create a new connection record
52 */
53static inline int __rxrpc_create_connection(struct rxrpc_peer *peer,
54 struct rxrpc_connection **_conn)
55{
56 struct rxrpc_connection *conn;
57
58 _enter("%p",peer);
59
60 /* allocate and initialise a connection record */
61 conn = kzalloc(sizeof(struct rxrpc_connection), GFP_KERNEL);
62 if (!conn) {
63 _leave(" = -ENOMEM");
64 return -ENOMEM;
65 }
66
67 atomic_set(&conn->usage, 1);
68
69 INIT_LIST_HEAD(&conn->link);
70 INIT_LIST_HEAD(&conn->id_link);
71 init_waitqueue_head(&conn->chanwait);
72 spin_lock_init(&conn->lock);
73 rxrpc_timer_init(&conn->timeout, &rxrpc_conn_timer_ops);
74
75 do_gettimeofday(&conn->atime);
76 conn->mtu_size = 1024;
77 conn->peer = peer;
78 conn->trans = peer->trans;
79
80 __RXACCT(atomic_inc(&rxrpc_connection_count));
81 *_conn = conn;
82 _leave(" = 0 (%p)", conn);
83
84 return 0;
85} /* end __rxrpc_create_connection() */
86
87/*****************************************************************************/
88/*
89 * create a new connection record for outgoing connections
90 */
91int rxrpc_create_connection(struct rxrpc_transport *trans,
92 __be16 port,
93 __be32 addr,
94 uint16_t service_id,
95 void *security,
96 struct rxrpc_connection **_conn)
97{
98 struct rxrpc_connection *candidate, *conn;
99 struct rxrpc_peer *peer;
100 struct list_head *_p;
101 __be32 connid;
102 int ret;
103
104 _enter("%p{%hu},%u,%hu", trans, trans->port, ntohs(port), service_id);
105
106 /* get a peer record */
107 ret = rxrpc_peer_lookup(trans, addr, &peer);
108 if (ret < 0) {
109 _leave(" = %d", ret);
110 return ret;
111 }
112
113 /* allocate and initialise a connection record */
114 ret = __rxrpc_create_connection(peer, &candidate);
115 if (ret < 0) {
116 rxrpc_put_peer(peer);
117 _leave(" = %d", ret);
118 return ret;
119 }
120
121 /* fill in the specific bits */
122 candidate->addr.sin_family = AF_INET;
123 candidate->addr.sin_port = port;
124 candidate->addr.sin_addr.s_addr = addr;
125
126 candidate->in_epoch = rxrpc_epoch;
127 candidate->out_epoch = rxrpc_epoch;
128 candidate->in_clientflag = 0;
129 candidate->out_clientflag = RXRPC_CLIENT_INITIATED;
130 candidate->service_id = htons(service_id);
131
132 /* invent a unique connection ID */
133 write_lock(&peer->conn_idlock);
134
135 try_next_id:
136 connid = htonl(peer->conn_idcounter & RXRPC_CIDMASK);
137 peer->conn_idcounter += RXRPC_MAXCALLS;
138
139 list_for_each(_p, &peer->conn_idlist) {
140 conn = list_entry(_p, struct rxrpc_connection, id_link);
141 if (connid == conn->conn_id)
142 goto try_next_id;
143 if (connid > conn->conn_id)
144 break;
145 }
146
147 _debug("selected candidate conn ID %x.%u",
148 ntohl(peer->addr.s_addr), ntohl(connid));
149
150 candidate->conn_id = connid;
151 list_add_tail(&candidate->id_link, _p);
152
153 write_unlock(&peer->conn_idlock);
154
155 /* attach to peer */
156 candidate->peer = peer;
157
158 write_lock(&peer->conn_lock);
159
160 /* search the peer's transport graveyard list */
161 spin_lock(&peer->conn_gylock);
162 list_for_each(_p, &peer->conn_graveyard) {
163 conn = list_entry(_p, struct rxrpc_connection, link);
164 if (conn->addr.sin_port == candidate->addr.sin_port &&
165 conn->security_ix == candidate->security_ix &&
166 conn->service_id == candidate->service_id &&
167 conn->in_clientflag == 0)
168 goto found_in_graveyard;
169 }
170 spin_unlock(&peer->conn_gylock);
171
172 /* pick the new candidate */
173 _debug("created connection: {%08x} [out]", ntohl(candidate->conn_id));
174 atomic_inc(&peer->conn_count);
175 conn = candidate;
176 candidate = NULL;
177
178 make_active:
179 list_add_tail(&conn->link, &peer->conn_active);
180 write_unlock(&peer->conn_lock);
181
182 if (candidate) {
183 write_lock(&peer->conn_idlock);
184 list_del(&candidate->id_link);
185 write_unlock(&peer->conn_idlock);
186
187 __RXACCT(atomic_dec(&rxrpc_connection_count));
188 kfree(candidate);
189 }
190 else {
191 down_write(&rxrpc_conns_sem);
192 list_add_tail(&conn->proc_link, &rxrpc_conns);
193 up_write(&rxrpc_conns_sem);
194 }
195
196 *_conn = conn;
197 _leave(" = 0 (%p)", conn);
198
199 return 0;
200
201 /* handle resurrecting a connection from the graveyard */
202 found_in_graveyard:
203 _debug("resurrecting connection: {%08x} [out]", ntohl(conn->conn_id));
204 rxrpc_get_connection(conn);
205 rxrpc_krxtimod_del_timer(&conn->timeout);
206 list_del_init(&conn->link);
207 spin_unlock(&peer->conn_gylock);
208 goto make_active;
209} /* end rxrpc_create_connection() */
210
211/*****************************************************************************/
212/*
213 * lookup the connection for an incoming packet
214 * - create a new connection record for unrecorded incoming connections
215 */
216int rxrpc_connection_lookup(struct rxrpc_peer *peer,
217 struct rxrpc_message *msg,
218 struct rxrpc_connection **_conn)
219{
220 struct rxrpc_connection *conn, *candidate = NULL;
221 struct list_head *_p;
222 struct sk_buff *pkt = msg->pkt;
223 int ret, fresh = 0;
224 __be32 x_epoch, x_connid;
225 __be16 x_port, x_servid;
226 __u32 x_secix;
227 u8 x_clflag;
228
229 _enter("%p{{%hu}},%u,%hu",
230 peer,
231 peer->trans->port,
232 ntohs(pkt->h.uh->source),
233 ntohs(msg->hdr.serviceId));
234
235 x_port = pkt->h.uh->source;
236 x_epoch = msg->hdr.epoch;
237 x_clflag = msg->hdr.flags & RXRPC_CLIENT_INITIATED;
238 x_connid = htonl(ntohl(msg->hdr.cid) & RXRPC_CIDMASK);
239 x_servid = msg->hdr.serviceId;
240 x_secix = msg->hdr.securityIndex;
241
242 /* [common case] search the transport's active list first */
243 read_lock(&peer->conn_lock);
244 list_for_each(_p, &peer->conn_active) {
245 conn = list_entry(_p, struct rxrpc_connection, link);
246 if (conn->addr.sin_port == x_port &&
247 conn->in_epoch == x_epoch &&
248 conn->conn_id == x_connid &&
249 conn->security_ix == x_secix &&
250 conn->service_id == x_servid &&
251 conn->in_clientflag == x_clflag)
252 goto found_active;
253 }
254 read_unlock(&peer->conn_lock);
255
256 /* [uncommon case] not active
257 * - create a candidate for a new record if an inbound connection
258 * - only examine the graveyard for an outbound connection
259 */
260 if (x_clflag) {
261 ret = __rxrpc_create_connection(peer, &candidate);
262 if (ret < 0) {
263 _leave(" = %d", ret);
264 return ret;
265 }
266
267 /* fill in the specifics */
268 candidate->addr.sin_family = AF_INET;
269 candidate->addr.sin_port = x_port;
270 candidate->addr.sin_addr.s_addr = pkt->nh.iph->saddr;
271 candidate->in_epoch = x_epoch;
272 candidate->out_epoch = x_epoch;
273 candidate->in_clientflag = RXRPC_CLIENT_INITIATED;
274 candidate->out_clientflag = 0;
275 candidate->conn_id = x_connid;
276 candidate->service_id = x_servid;
277 candidate->security_ix = x_secix;
278 }
279
280 /* search the active list again, just in case it appeared whilst we
281 * were busy */
282 write_lock(&peer->conn_lock);
283 list_for_each(_p, &peer->conn_active) {
284 conn = list_entry(_p, struct rxrpc_connection, link);
285 if (conn->addr.sin_port == x_port &&
286 conn->in_epoch == x_epoch &&
287 conn->conn_id == x_connid &&
288 conn->security_ix == x_secix &&
289 conn->service_id == x_servid &&
290 conn->in_clientflag == x_clflag)
291 goto found_active_second_chance;
292 }
293
294 /* search the transport's graveyard list */
295 spin_lock(&peer->conn_gylock);
296 list_for_each(_p, &peer->conn_graveyard) {
297 conn = list_entry(_p, struct rxrpc_connection, link);
298 if (conn->addr.sin_port == x_port &&
299 conn->in_epoch == x_epoch &&
300 conn->conn_id == x_connid &&
301 conn->security_ix == x_secix &&
302 conn->service_id == x_servid &&
303 conn->in_clientflag == x_clflag)
304 goto found_in_graveyard;
305 }
306 spin_unlock(&peer->conn_gylock);
307
308 /* outbound connections aren't created here */
309 if (!x_clflag) {
310 write_unlock(&peer->conn_lock);
311 _leave(" = -ENOENT");
312 return -ENOENT;
313 }
314
315 /* we can now add the new candidate to the list */
316 _debug("created connection: {%08x} [in]", ntohl(candidate->conn_id));
317 rxrpc_get_peer(peer);
318 conn = candidate;
319 candidate = NULL;
320 atomic_inc(&peer->conn_count);
321 fresh = 1;
322
323 make_active:
324 list_add_tail(&conn->link, &peer->conn_active);
325
326 success_uwfree:
327 write_unlock(&peer->conn_lock);
328
329 if (candidate) {
330 write_lock(&peer->conn_idlock);
331 list_del(&candidate->id_link);
332 write_unlock(&peer->conn_idlock);
333
334 __RXACCT(atomic_dec(&rxrpc_connection_count));
335 kfree(candidate);
336 }
337
338 if (fresh) {
339 down_write(&rxrpc_conns_sem);
340 list_add_tail(&conn->proc_link, &rxrpc_conns);
341 up_write(&rxrpc_conns_sem);
342 }
343
344 success:
345 *_conn = conn;
346 _leave(" = 0 (%p)", conn);
347 return 0;
348
349 /* handle the connection being found in the active list straight off */
350 found_active:
351 rxrpc_get_connection(conn);
352 read_unlock(&peer->conn_lock);
353 goto success;
354
355 /* handle resurrecting a connection from the graveyard */
356 found_in_graveyard:
357 _debug("resurrecting connection: {%08x} [in]", ntohl(conn->conn_id));
358 rxrpc_get_peer(peer);
359 rxrpc_get_connection(conn);
360 rxrpc_krxtimod_del_timer(&conn->timeout);
361 list_del_init(&conn->link);
362 spin_unlock(&peer->conn_gylock);
363 goto make_active;
364
365 /* handle finding the connection on the second time through the active
366 * list */
367 found_active_second_chance:
368 rxrpc_get_connection(conn);
369 goto success_uwfree;
370
371} /* end rxrpc_connection_lookup() */
372
373/*****************************************************************************/
374/*
375 * finish using a connection record
376 * - it will be transferred to the peer's connection graveyard when refcount
377 * reaches 0
378 */
379void rxrpc_put_connection(struct rxrpc_connection *conn)
380{
381 struct rxrpc_peer *peer;
382
383 if (!conn)
384 return;
385
386 _enter("%p{u=%d p=%hu}",
387 conn, atomic_read(&conn->usage), ntohs(conn->addr.sin_port));
388
389 peer = conn->peer;
390 spin_lock(&peer->conn_gylock);
391
392 /* sanity check */
393 if (atomic_read(&conn->usage) <= 0)
394 BUG();
395
396 if (likely(!atomic_dec_and_test(&conn->usage))) {
397 spin_unlock(&peer->conn_gylock);
398 _leave("");
399 return;
400 }
401
402 /* move to graveyard queue */
403 _debug("burying connection: {%08x}", ntohl(conn->conn_id));
404 list_move_tail(&conn->link, &peer->conn_graveyard);
405
406 rxrpc_krxtimod_add_timer(&conn->timeout, rxrpc_conn_timeout * HZ);
407
408 spin_unlock(&peer->conn_gylock);
409
410 rxrpc_put_peer(conn->peer);
411
412 _leave(" [killed]");
413} /* end rxrpc_put_connection() */
414
415/*****************************************************************************/
416/*
417 * free a connection record
418 */
419static void rxrpc_conn_do_timeout(struct rxrpc_connection *conn)
420{
421 struct rxrpc_peer *peer;
422
423 _enter("%p{u=%d p=%hu}",
424 conn, atomic_read(&conn->usage), ntohs(conn->addr.sin_port));
425
426 peer = conn->peer;
427
428 if (atomic_read(&conn->usage) < 0)
429 BUG();
430
431 /* remove from graveyard if still dead */
432 spin_lock(&peer->conn_gylock);
433 if (atomic_read(&conn->usage) == 0) {
434 list_del_init(&conn->link);
435 }
436 else {
437 conn = NULL;
438 }
439 spin_unlock(&peer->conn_gylock);
440
441 if (!conn) {
442 _leave("");
443 return; /* resurrected */
444 }
445
446 _debug("--- Destroying Connection %p{%08x} ---",
447 conn, ntohl(conn->conn_id));
448
449 down_write(&rxrpc_conns_sem);
450 list_del(&conn->proc_link);
451 up_write(&rxrpc_conns_sem);
452
453 write_lock(&peer->conn_idlock);
454 list_del(&conn->id_link);
455 write_unlock(&peer->conn_idlock);
456
457 __RXACCT(atomic_dec(&rxrpc_connection_count));
458 kfree(conn);
459
460 /* if the graveyard is now empty, wake up anyone waiting for that */
461 if (atomic_dec_and_test(&peer->conn_count))
462 wake_up(&peer->conn_gy_waitq);
463
464 _leave(" [destroyed]");
465} /* end rxrpc_conn_do_timeout() */
466
467/*****************************************************************************/
468/*
469 * clear all connection records from a peer endpoint
470 */
471void rxrpc_conn_clearall(struct rxrpc_peer *peer)
472{
473 DECLARE_WAITQUEUE(myself, current);
474
475 struct rxrpc_connection *conn;
476 int err;
477
478 _enter("%p", peer);
479
480 /* there shouldn't be any active conns remaining */
481 if (!list_empty(&peer->conn_active))
482 BUG();
483
484 /* manually timeout all conns in the graveyard */
485 spin_lock(&peer->conn_gylock);
486 while (!list_empty(&peer->conn_graveyard)) {
487 conn = list_entry(peer->conn_graveyard.next,
488 struct rxrpc_connection, link);
489 err = rxrpc_krxtimod_del_timer(&conn->timeout);
490 spin_unlock(&peer->conn_gylock);
491
492 if (err == 0)
493 rxrpc_conn_do_timeout(conn);
494
495 spin_lock(&peer->conn_gylock);
496 }
497 spin_unlock(&peer->conn_gylock);
498
499 /* wait for the the conn graveyard to be completely cleared */
500 set_current_state(TASK_UNINTERRUPTIBLE);
501 add_wait_queue(&peer->conn_gy_waitq, &myself);
502
503 while (atomic_read(&peer->conn_count) != 0) {
504 schedule();
505 set_current_state(TASK_UNINTERRUPTIBLE);
506 }
507
508 remove_wait_queue(&peer->conn_gy_waitq, &myself);
509 set_current_state(TASK_RUNNING);
510
511 _leave("");
512} /* end rxrpc_conn_clearall() */
513
514/*****************************************************************************/
515/*
516 * allocate and prepare a message for sending out through the transport
517 * endpoint
518 */
519int rxrpc_conn_newmsg(struct rxrpc_connection *conn,
520 struct rxrpc_call *call,
521 uint8_t type,
522 int dcount,
523 struct kvec diov[],
524 gfp_t alloc_flags,
525 struct rxrpc_message **_msg)
526{
527 struct rxrpc_message *msg;
528 int loop;
529
530 _enter("%p{%d},%p,%u", conn, ntohs(conn->addr.sin_port), call, type);
531
532 if (dcount > 3) {
533 _leave(" = -EINVAL");
534 return -EINVAL;
535 }
536
537 msg = kzalloc(sizeof(struct rxrpc_message), alloc_flags);
538 if (!msg) {
539 _leave(" = -ENOMEM");
540 return -ENOMEM;
541 }
542
543 atomic_set(&msg->usage, 1);
544
545 INIT_LIST_HEAD(&msg->link);
546
547 msg->state = RXRPC_MSG_PREPARED;
548
549 msg->hdr.epoch = conn->out_epoch;
550 msg->hdr.cid = conn->conn_id | (call ? call->chan_ix : 0);
551 msg->hdr.callNumber = call ? call->call_id : 0;
552 msg->hdr.type = type;
553 msg->hdr.flags = conn->out_clientflag;
554 msg->hdr.securityIndex = conn->security_ix;
555 msg->hdr.serviceId = conn->service_id;
556
557 /* generate sequence numbers for data packets */
558 if (call) {
559 switch (type) {
560 case RXRPC_PACKET_TYPE_DATA:
561 msg->seq = ++call->snd_seq_count;
562 msg->hdr.seq = htonl(msg->seq);
563 break;
564 case RXRPC_PACKET_TYPE_ACK:
565 /* ACK sequence numbers are complicated. The following
566 * may be wrong:
567 * - jumbo packet ACKs should have a seq number
568 * - normal ACKs should not
569 */
570 default:
571 break;
572 }
573 }
574
575 msg->dcount = dcount + 1;
576 msg->dsize = sizeof(msg->hdr);
577 msg->data[0].iov_len = sizeof(msg->hdr);
578 msg->data[0].iov_base = &msg->hdr;
579
580 for (loop=0; loop < dcount; loop++) {
581 msg->dsize += diov[loop].iov_len;
582 msg->data[loop+1].iov_len = diov[loop].iov_len;
583 msg->data[loop+1].iov_base = diov[loop].iov_base;
584 }
585
586 __RXACCT(atomic_inc(&rxrpc_message_count));
587 *_msg = msg;
588 _leave(" = 0 (%p) #%d", msg, atomic_read(&rxrpc_message_count));
589 return 0;
590} /* end rxrpc_conn_newmsg() */
591
592/*****************************************************************************/
593/*
594 * free a message
595 */
596void __rxrpc_put_message(struct rxrpc_message *msg)
597{
598 int loop;
599
600 _enter("%p #%d", msg, atomic_read(&rxrpc_message_count));
601
602 if (msg->pkt)
603 kfree_skb(msg->pkt);
604 rxrpc_put_connection(msg->conn);
605
606 for (loop = 0; loop < 8; loop++)
607 if (test_bit(loop, &msg->dfree))
608 kfree(msg->data[loop].iov_base);
609
610 __RXACCT(atomic_dec(&rxrpc_message_count));
611 kfree(msg);
612
613 _leave("");
614} /* end __rxrpc_put_message() */
615
616/*****************************************************************************/
617/*
618 * send a message out through the transport endpoint
619 */
620int rxrpc_conn_sendmsg(struct rxrpc_connection *conn,
621 struct rxrpc_message *msg)
622{
623 struct msghdr msghdr;
624 int ret;
625
626 _enter("%p{%d}", conn, ntohs(conn->addr.sin_port));
627
628 /* fill in some fields in the header */
629 spin_lock(&conn->lock);
630 msg->hdr.serial = htonl(++conn->serial_counter);
631 msg->rttdone = 0;
632 spin_unlock(&conn->lock);
633
634 /* set up the message to be transmitted */
635 msghdr.msg_name = &conn->addr;
636 msghdr.msg_namelen = sizeof(conn->addr);
637 msghdr.msg_control = NULL;
638 msghdr.msg_controllen = 0;
639 msghdr.msg_flags = MSG_CONFIRM | MSG_DONTWAIT;
640
641 _net("Sending message type %d of %Zd bytes to %08x:%d",
642 msg->hdr.type,
643 msg->dsize,
644 ntohl(conn->addr.sin_addr.s_addr),
645 ntohs(conn->addr.sin_port));
646
647 /* send the message */
648 ret = kernel_sendmsg(conn->trans->socket, &msghdr,
649 msg->data, msg->dcount, msg->dsize);
650 if (ret < 0) {
651 msg->state = RXRPC_MSG_ERROR;
652 } else {
653 msg->state = RXRPC_MSG_SENT;
654 ret = 0;
655
656 spin_lock(&conn->lock);
657 do_gettimeofday(&conn->atime);
658 msg->stamp = conn->atime;
659 spin_unlock(&conn->lock);
660 }
661
662 _leave(" = %d", ret);
663
664 return ret;
665} /* end rxrpc_conn_sendmsg() */
666
667/*****************************************************************************/
668/*
669 * deal with a subsequent call packet
670 */
671int rxrpc_conn_receive_call_packet(struct rxrpc_connection *conn,
672 struct rxrpc_call *call,
673 struct rxrpc_message *msg)
674{
675 struct rxrpc_message *pmsg;
676 struct dst_entry *dst;
677 struct list_head *_p;
678 unsigned cix, seq;
679 int ret = 0;
680
681 _enter("%p,%p,%p", conn, call, msg);
682
683 if (!call) {
684 cix = ntohl(msg->hdr.cid) & RXRPC_CHANNELMASK;
685
686 spin_lock(&conn->lock);
687 call = conn->channels[cix];
688
689 if (!call || call->call_id != msg->hdr.callNumber) {
690 spin_unlock(&conn->lock);
691 rxrpc_trans_immediate_abort(conn->trans, msg, -ENOENT);
692 goto out;
693 }
694 else {
695 rxrpc_get_call(call);
696 spin_unlock(&conn->lock);
697 }
698 }
699 else {
700 rxrpc_get_call(call);
701 }
702
703 _proto("Received packet %%%u [%u] on call %hu:%u:%u",
704 ntohl(msg->hdr.serial),
705 ntohl(msg->hdr.seq),
706 ntohs(msg->hdr.serviceId),
707 ntohl(conn->conn_id),
708 ntohl(call->call_id));
709
710 call->pkt_rcv_count++;
711
712 dst = msg->pkt->dst;
713 if (dst && dst->dev)
714 conn->peer->if_mtu =
715 dst->dev->mtu - dst->dev->hard_header_len;
716
717 /* queue on the call in seq order */
718 rxrpc_get_message(msg);
719 seq = msg->seq;
720
721 spin_lock(&call->lock);
722 list_for_each(_p, &call->rcv_receiveq) {
723 pmsg = list_entry(_p, struct rxrpc_message, link);
724 if (pmsg->seq > seq)
725 break;
726 }
727 list_add_tail(&msg->link, _p);
728
729 /* reset the activity timeout */
730 call->flags |= RXRPC_CALL_RCV_PKT;
731 mod_timer(&call->rcv_timeout,jiffies + rxrpc_call_rcv_timeout * HZ);
732
733 spin_unlock(&call->lock);
734
735 rxrpc_krxiod_queue_call(call);
736
737 rxrpc_put_call(call);
738 out:
739 _leave(" = %d", ret);
740 return ret;
741} /* end rxrpc_conn_receive_call_packet() */
742
743/*****************************************************************************/
744/*
745 * handle an ICMP error being applied to a connection
746 */
747void rxrpc_conn_handle_error(struct rxrpc_connection *conn,
748 int local, int errno)
749{
750 struct rxrpc_call *calls[4];
751 int loop;
752
753 _enter("%p{%d},%d", conn, ntohs(conn->addr.sin_port), errno);
754
755 /* get a ref to all my calls in one go */
756 memset(calls, 0, sizeof(calls));
757 spin_lock(&conn->lock);
758
759 for (loop = 3; loop >= 0; loop--) {
760 if (conn->channels[loop]) {
761 calls[loop] = conn->channels[loop];
762 rxrpc_get_call(calls[loop]);
763 }
764 }
765
766 spin_unlock(&conn->lock);
767
768 /* now kick them all */
769 for (loop = 3; loop >= 0; loop--) {
770 if (calls[loop]) {
771 rxrpc_call_handle_error(calls[loop], local, errno);
772 rxrpc_put_call(calls[loop]);
773 }
774 }
775
776 _leave("");
777} /* end rxrpc_conn_handle_error() */
diff --git a/net/rxrpc/internal.h b/net/rxrpc/internal.h
deleted file mode 100644
index cc0c5795a103..000000000000
--- a/net/rxrpc/internal.h
+++ /dev/null
@@ -1,106 +0,0 @@
1/* internal.h: internal Rx RPC stuff
2 *
3 * Copyright (c) 2002 David Howells (dhowells@redhat.com).
4 */
5
6#ifndef RXRPC_INTERNAL_H
7#define RXRPC_INTERNAL_H
8
9#include <linux/compiler.h>
10#include <linux/kernel.h>
11
12/*
13 * debug accounting
14 */
15#if 1
16#define __RXACCT_DECL(X) X
17#define __RXACCT(X) do { X; } while(0)
18#else
19#define __RXACCT_DECL(X)
20#define __RXACCT(X) do { } while(0)
21#endif
22
23__RXACCT_DECL(extern atomic_t rxrpc_transport_count);
24__RXACCT_DECL(extern atomic_t rxrpc_peer_count);
25__RXACCT_DECL(extern atomic_t rxrpc_connection_count);
26__RXACCT_DECL(extern atomic_t rxrpc_call_count);
27__RXACCT_DECL(extern atomic_t rxrpc_message_count);
28
29/*
30 * debug tracing
31 */
32#define kenter(FMT, a...) printk("==> %s("FMT")\n",__FUNCTION__ , ##a)
33#define kleave(FMT, a...) printk("<== %s()"FMT"\n",__FUNCTION__ , ##a)
34#define kdebug(FMT, a...) printk(" "FMT"\n" , ##a)
35#define kproto(FMT, a...) printk("### "FMT"\n" , ##a)
36#define knet(FMT, a...) printk(" "FMT"\n" , ##a)
37
38#if 0
39#define _enter(FMT, a...) kenter(FMT , ##a)
40#define _leave(FMT, a...) kleave(FMT , ##a)
41#define _debug(FMT, a...) kdebug(FMT , ##a)
42#define _proto(FMT, a...) kproto(FMT , ##a)
43#define _net(FMT, a...) knet(FMT , ##a)
44#else
45#define _enter(FMT, a...) do { if (rxrpc_ktrace) kenter(FMT , ##a); } while(0)
46#define _leave(FMT, a...) do { if (rxrpc_ktrace) kleave(FMT , ##a); } while(0)
47#define _debug(FMT, a...) do { if (rxrpc_kdebug) kdebug(FMT , ##a); } while(0)
48#define _proto(FMT, a...) do { if (rxrpc_kproto) kproto(FMT , ##a); } while(0)
49#define _net(FMT, a...) do { if (rxrpc_knet) knet (FMT , ##a); } while(0)
50#endif
51
52static inline void rxrpc_discard_my_signals(void)
53{
54 while (signal_pending(current)) {
55 siginfo_t sinfo;
56
57 spin_lock_irq(&current->sighand->siglock);
58 dequeue_signal(current, &current->blocked, &sinfo);
59 spin_unlock_irq(&current->sighand->siglock);
60 }
61}
62
63/*
64 * call.c
65 */
66extern struct list_head rxrpc_calls;
67extern struct rw_semaphore rxrpc_calls_sem;
68
69/*
70 * connection.c
71 */
72extern struct list_head rxrpc_conns;
73extern struct rw_semaphore rxrpc_conns_sem;
74extern unsigned long rxrpc_conn_timeout;
75
76extern void rxrpc_conn_clearall(struct rxrpc_peer *peer);
77
78/*
79 * peer.c
80 */
81extern struct list_head rxrpc_peers;
82extern struct rw_semaphore rxrpc_peers_sem;
83extern unsigned long rxrpc_peer_timeout;
84
85extern void rxrpc_peer_calculate_rtt(struct rxrpc_peer *peer,
86 struct rxrpc_message *msg,
87 struct rxrpc_message *resp);
88
89extern void rxrpc_peer_clearall(struct rxrpc_transport *trans);
90
91
92/*
93 * proc.c
94 */
95#ifdef CONFIG_PROC_FS
96extern int rxrpc_proc_init(void);
97extern void rxrpc_proc_cleanup(void);
98#endif
99
100/*
101 * transport.c
102 */
103extern struct list_head rxrpc_proc_transports;
104extern struct rw_semaphore rxrpc_proc_transports_sem;
105
106#endif /* RXRPC_INTERNAL_H */
diff --git a/net/rxrpc/krxiod.c b/net/rxrpc/krxiod.c
deleted file mode 100644
index bbbcd6c24048..000000000000
--- a/net/rxrpc/krxiod.c
+++ /dev/null
@@ -1,262 +0,0 @@
1/* krxiod.c: Rx I/O daemon
2 *
3 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/sched.h>
13#include <linux/completion.h>
14#include <linux/spinlock.h>
15#include <linux/init.h>
16#include <linux/freezer.h>
17#include <rxrpc/krxiod.h>
18#include <rxrpc/transport.h>
19#include <rxrpc/peer.h>
20#include <rxrpc/call.h>
21#include "internal.h"
22
23static DECLARE_WAIT_QUEUE_HEAD(rxrpc_krxiod_sleepq);
24static DECLARE_COMPLETION(rxrpc_krxiod_dead);
25
26static atomic_t rxrpc_krxiod_qcount = ATOMIC_INIT(0);
27
28static LIST_HEAD(rxrpc_krxiod_transportq);
29static DEFINE_SPINLOCK(rxrpc_krxiod_transportq_lock);
30
31static LIST_HEAD(rxrpc_krxiod_callq);
32static DEFINE_SPINLOCK(rxrpc_krxiod_callq_lock);
33
34static volatile int rxrpc_krxiod_die;
35
36/*****************************************************************************/
37/*
38 * Rx I/O daemon
39 */
40static int rxrpc_krxiod(void *arg)
41{
42 DECLARE_WAITQUEUE(krxiod,current);
43
44 printk("Started krxiod %d\n",current->pid);
45
46 daemonize("krxiod");
47
48 /* loop around waiting for work to do */
49 do {
50 /* wait for work or to be told to exit */
51 _debug("### Begin Wait");
52 if (!atomic_read(&rxrpc_krxiod_qcount)) {
53 set_current_state(TASK_INTERRUPTIBLE);
54
55 add_wait_queue(&rxrpc_krxiod_sleepq, &krxiod);
56
57 for (;;) {
58 set_current_state(TASK_INTERRUPTIBLE);
59 if (atomic_read(&rxrpc_krxiod_qcount) ||
60 rxrpc_krxiod_die ||
61 signal_pending(current))
62 break;
63
64 schedule();
65 }
66
67 remove_wait_queue(&rxrpc_krxiod_sleepq, &krxiod);
68 set_current_state(TASK_RUNNING);
69 }
70 _debug("### End Wait");
71
72 /* do work if been given some to do */
73 _debug("### Begin Work");
74
75 /* see if there's a transport in need of attention */
76 if (!list_empty(&rxrpc_krxiod_transportq)) {
77 struct rxrpc_transport *trans = NULL;
78
79 spin_lock_irq(&rxrpc_krxiod_transportq_lock);
80
81 if (!list_empty(&rxrpc_krxiod_transportq)) {
82 trans = list_entry(
83 rxrpc_krxiod_transportq.next,
84 struct rxrpc_transport,
85 krxiodq_link);
86
87 list_del_init(&trans->krxiodq_link);
88 atomic_dec(&rxrpc_krxiod_qcount);
89
90 /* make sure it hasn't gone away and doesn't go
91 * away */
92 if (atomic_read(&trans->usage)>0)
93 rxrpc_get_transport(trans);
94 else
95 trans = NULL;
96 }
97
98 spin_unlock_irq(&rxrpc_krxiod_transportq_lock);
99
100 if (trans) {
101 rxrpc_trans_receive_packet(trans);
102 rxrpc_put_transport(trans);
103 }
104 }
105
106 /* see if there's a call in need of attention */
107 if (!list_empty(&rxrpc_krxiod_callq)) {
108 struct rxrpc_call *call = NULL;
109
110 spin_lock_irq(&rxrpc_krxiod_callq_lock);
111
112 if (!list_empty(&rxrpc_krxiod_callq)) {
113 call = list_entry(rxrpc_krxiod_callq.next,
114 struct rxrpc_call,
115 rcv_krxiodq_lk);
116 list_del_init(&call->rcv_krxiodq_lk);
117 atomic_dec(&rxrpc_krxiod_qcount);
118
119 /* make sure it hasn't gone away and doesn't go
120 * away */
121 if (atomic_read(&call->usage) > 0) {
122 _debug("@@@ KRXIOD"
123 " Begin Attend Call %p", call);
124 rxrpc_get_call(call);
125 }
126 else {
127 call = NULL;
128 }
129 }
130
131 spin_unlock_irq(&rxrpc_krxiod_callq_lock);
132
133 if (call) {
134 rxrpc_call_do_stuff(call);
135 rxrpc_put_call(call);
136 _debug("@@@ KRXIOD End Attend Call %p", call);
137 }
138 }
139
140 _debug("### End Work");
141
142 try_to_freeze();
143
144 /* discard pending signals */
145 rxrpc_discard_my_signals();
146
147 } while (!rxrpc_krxiod_die);
148
149 /* and that's all */
150 complete_and_exit(&rxrpc_krxiod_dead, 0);
151
152} /* end rxrpc_krxiod() */
153
154/*****************************************************************************/
155/*
156 * start up a krxiod daemon
157 */
158int __init rxrpc_krxiod_init(void)
159{
160 return kernel_thread(rxrpc_krxiod, NULL, 0);
161
162} /* end rxrpc_krxiod_init() */
163
164/*****************************************************************************/
165/*
166 * kill the krxiod daemon and wait for it to complete
167 */
168void rxrpc_krxiod_kill(void)
169{
170 rxrpc_krxiod_die = 1;
171 wake_up_all(&rxrpc_krxiod_sleepq);
172 wait_for_completion(&rxrpc_krxiod_dead);
173
174} /* end rxrpc_krxiod_kill() */
175
176/*****************************************************************************/
177/*
178 * queue a transport for attention by krxiod
179 */
180void rxrpc_krxiod_queue_transport(struct rxrpc_transport *trans)
181{
182 unsigned long flags;
183
184 _enter("");
185
186 if (list_empty(&trans->krxiodq_link)) {
187 spin_lock_irqsave(&rxrpc_krxiod_transportq_lock, flags);
188
189 if (list_empty(&trans->krxiodq_link)) {
190 if (atomic_read(&trans->usage) > 0) {
191 list_add_tail(&trans->krxiodq_link,
192 &rxrpc_krxiod_transportq);
193 atomic_inc(&rxrpc_krxiod_qcount);
194 }
195 }
196
197 spin_unlock_irqrestore(&rxrpc_krxiod_transportq_lock, flags);
198 wake_up_all(&rxrpc_krxiod_sleepq);
199 }
200
201 _leave("");
202
203} /* end rxrpc_krxiod_queue_transport() */
204
205/*****************************************************************************/
206/*
207 * dequeue a transport from krxiod's attention queue
208 */
209void rxrpc_krxiod_dequeue_transport(struct rxrpc_transport *trans)
210{
211 unsigned long flags;
212
213 _enter("");
214
215 spin_lock_irqsave(&rxrpc_krxiod_transportq_lock, flags);
216 if (!list_empty(&trans->krxiodq_link)) {
217 list_del_init(&trans->krxiodq_link);
218 atomic_dec(&rxrpc_krxiod_qcount);
219 }
220 spin_unlock_irqrestore(&rxrpc_krxiod_transportq_lock, flags);
221
222 _leave("");
223
224} /* end rxrpc_krxiod_dequeue_transport() */
225
226/*****************************************************************************/
227/*
228 * queue a call for attention by krxiod
229 */
230void rxrpc_krxiod_queue_call(struct rxrpc_call *call)
231{
232 unsigned long flags;
233
234 if (list_empty(&call->rcv_krxiodq_lk)) {
235 spin_lock_irqsave(&rxrpc_krxiod_callq_lock, flags);
236 if (atomic_read(&call->usage) > 0) {
237 list_add_tail(&call->rcv_krxiodq_lk,
238 &rxrpc_krxiod_callq);
239 atomic_inc(&rxrpc_krxiod_qcount);
240 }
241 spin_unlock_irqrestore(&rxrpc_krxiod_callq_lock, flags);
242 }
243 wake_up_all(&rxrpc_krxiod_sleepq);
244
245} /* end rxrpc_krxiod_queue_call() */
246
247/*****************************************************************************/
248/*
249 * dequeue a call from krxiod's attention queue
250 */
251void rxrpc_krxiod_dequeue_call(struct rxrpc_call *call)
252{
253 unsigned long flags;
254
255 spin_lock_irqsave(&rxrpc_krxiod_callq_lock, flags);
256 if (!list_empty(&call->rcv_krxiodq_lk)) {
257 list_del_init(&call->rcv_krxiodq_lk);
258 atomic_dec(&rxrpc_krxiod_qcount);
259 }
260 spin_unlock_irqrestore(&rxrpc_krxiod_callq_lock, flags);
261
262} /* end rxrpc_krxiod_dequeue_call() */
diff --git a/net/rxrpc/krxsecd.c b/net/rxrpc/krxsecd.c
deleted file mode 100644
index 9a1e7f5e034c..000000000000
--- a/net/rxrpc/krxsecd.c
+++ /dev/null
@@ -1,270 +0,0 @@
1/* krxsecd.c: Rx security daemon
2 *
3 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 * This daemon deals with:
12 * - consulting the application as to whether inbound peers and calls should be authorised
13 * - generating security challenges for inbound connections
14 * - responding to security challenges on outbound connections
15 */
16
17#include <linux/module.h>
18#include <linux/sched.h>
19#include <linux/completion.h>
20#include <linux/spinlock.h>
21#include <linux/init.h>
22#include <rxrpc/krxsecd.h>
23#include <rxrpc/transport.h>
24#include <rxrpc/connection.h>
25#include <rxrpc/message.h>
26#include <rxrpc/peer.h>
27#include <rxrpc/call.h>
28#include <linux/udp.h>
29#include <linux/ip.h>
30#include <linux/freezer.h>
31#include <net/sock.h>
32#include "internal.h"
33
34static DECLARE_WAIT_QUEUE_HEAD(rxrpc_krxsecd_sleepq);
35static DECLARE_COMPLETION(rxrpc_krxsecd_dead);
36static volatile int rxrpc_krxsecd_die;
37
38static atomic_t rxrpc_krxsecd_qcount;
39
40/* queue of unprocessed inbound messages with seqno #1 and
41 * RXRPC_CLIENT_INITIATED flag set */
42static LIST_HEAD(rxrpc_krxsecd_initmsgq);
43static DEFINE_SPINLOCK(rxrpc_krxsecd_initmsgq_lock);
44
45static void rxrpc_krxsecd_process_incoming_call(struct rxrpc_message *msg);
46
47/*****************************************************************************/
48/*
49 * Rx security daemon
50 */
51static int rxrpc_krxsecd(void *arg)
52{
53 DECLARE_WAITQUEUE(krxsecd, current);
54
55 int die;
56
57 printk("Started krxsecd %d\n", current->pid);
58
59 daemonize("krxsecd");
60
61 /* loop around waiting for work to do */
62 do {
63 /* wait for work or to be told to exit */
64 _debug("### Begin Wait");
65 if (!atomic_read(&rxrpc_krxsecd_qcount)) {
66 set_current_state(TASK_INTERRUPTIBLE);
67
68 add_wait_queue(&rxrpc_krxsecd_sleepq, &krxsecd);
69
70 for (;;) {
71 set_current_state(TASK_INTERRUPTIBLE);
72 if (atomic_read(&rxrpc_krxsecd_qcount) ||
73 rxrpc_krxsecd_die ||
74 signal_pending(current))
75 break;
76
77 schedule();
78 }
79
80 remove_wait_queue(&rxrpc_krxsecd_sleepq, &krxsecd);
81 set_current_state(TASK_RUNNING);
82 }
83 die = rxrpc_krxsecd_die;
84 _debug("### End Wait");
85
86 /* see if there're incoming calls in need of authenticating */
87 _debug("### Begin Inbound Calls");
88
89 if (!list_empty(&rxrpc_krxsecd_initmsgq)) {
90 struct rxrpc_message *msg = NULL;
91
92 spin_lock(&rxrpc_krxsecd_initmsgq_lock);
93
94 if (!list_empty(&rxrpc_krxsecd_initmsgq)) {
95 msg = list_entry(rxrpc_krxsecd_initmsgq.next,
96 struct rxrpc_message, link);
97 list_del_init(&msg->link);
98 atomic_dec(&rxrpc_krxsecd_qcount);
99 }
100
101 spin_unlock(&rxrpc_krxsecd_initmsgq_lock);
102
103 if (msg) {
104 rxrpc_krxsecd_process_incoming_call(msg);
105 rxrpc_put_message(msg);
106 }
107 }
108
109 _debug("### End Inbound Calls");
110
111 try_to_freeze();
112
113 /* discard pending signals */
114 rxrpc_discard_my_signals();
115
116 } while (!die);
117
118 /* and that's all */
119 complete_and_exit(&rxrpc_krxsecd_dead, 0);
120
121} /* end rxrpc_krxsecd() */
122
123/*****************************************************************************/
124/*
125 * start up a krxsecd daemon
126 */
127int __init rxrpc_krxsecd_init(void)
128{
129 return kernel_thread(rxrpc_krxsecd, NULL, 0);
130
131} /* end rxrpc_krxsecd_init() */
132
133/*****************************************************************************/
134/*
135 * kill the krxsecd daemon and wait for it to complete
136 */
137void rxrpc_krxsecd_kill(void)
138{
139 rxrpc_krxsecd_die = 1;
140 wake_up_all(&rxrpc_krxsecd_sleepq);
141 wait_for_completion(&rxrpc_krxsecd_dead);
142
143} /* end rxrpc_krxsecd_kill() */
144
145/*****************************************************************************/
146/*
147 * clear all pending incoming calls for the specified transport
148 */
149void rxrpc_krxsecd_clear_transport(struct rxrpc_transport *trans)
150{
151 LIST_HEAD(tmp);
152
153 struct rxrpc_message *msg;
154 struct list_head *_p, *_n;
155
156 _enter("%p",trans);
157
158 /* move all the messages for this transport onto a temp list */
159 spin_lock(&rxrpc_krxsecd_initmsgq_lock);
160
161 list_for_each_safe(_p, _n, &rxrpc_krxsecd_initmsgq) {
162 msg = list_entry(_p, struct rxrpc_message, link);
163 if (msg->trans == trans) {
164 list_move_tail(&msg->link, &tmp);
165 atomic_dec(&rxrpc_krxsecd_qcount);
166 }
167 }
168
169 spin_unlock(&rxrpc_krxsecd_initmsgq_lock);
170
171 /* zap all messages on the temp list */
172 while (!list_empty(&tmp)) {
173 msg = list_entry(tmp.next, struct rxrpc_message, link);
174 list_del_init(&msg->link);
175 rxrpc_put_message(msg);
176 }
177
178 _leave("");
179} /* end rxrpc_krxsecd_clear_transport() */
180
181/*****************************************************************************/
182/*
183 * queue a message on the incoming calls list
184 */
185void rxrpc_krxsecd_queue_incoming_call(struct rxrpc_message *msg)
186{
187 _enter("%p", msg);
188
189 /* queue for processing by krxsecd */
190 spin_lock(&rxrpc_krxsecd_initmsgq_lock);
191
192 if (!rxrpc_krxsecd_die) {
193 rxrpc_get_message(msg);
194 list_add_tail(&msg->link, &rxrpc_krxsecd_initmsgq);
195 atomic_inc(&rxrpc_krxsecd_qcount);
196 }
197
198 spin_unlock(&rxrpc_krxsecd_initmsgq_lock);
199
200 wake_up(&rxrpc_krxsecd_sleepq);
201
202 _leave("");
203} /* end rxrpc_krxsecd_queue_incoming_call() */
204
205/*****************************************************************************/
206/*
207 * process the initial message of an incoming call
208 */
209void rxrpc_krxsecd_process_incoming_call(struct rxrpc_message *msg)
210{
211 struct rxrpc_transport *trans = msg->trans;
212 struct rxrpc_service *srv;
213 struct rxrpc_call *call;
214 struct list_head *_p;
215 unsigned short sid;
216 int ret;
217
218 _enter("%p{tr=%p}", msg, trans);
219
220 ret = rxrpc_incoming_call(msg->conn, msg, &call);
221 if (ret < 0)
222 goto out;
223
224 /* find the matching service on the transport */
225 sid = ntohs(msg->hdr.serviceId);
226 srv = NULL;
227
228 spin_lock(&trans->lock);
229 list_for_each(_p, &trans->services) {
230 srv = list_entry(_p, struct rxrpc_service, link);
231 if (srv->service_id == sid && try_module_get(srv->owner)) {
232 /* found a match (made sure it won't vanish) */
233 _debug("found service '%s'", srv->name);
234 call->owner = srv->owner;
235 break;
236 }
237 }
238 spin_unlock(&trans->lock);
239
240 /* report the new connection
241 * - the func must inc the call's usage count to keep it
242 */
243 ret = -ENOENT;
244 if (_p != &trans->services) {
245 /* attempt to accept the call */
246 call->conn->service = srv;
247 call->app_attn_func = srv->attn_func;
248 call->app_error_func = srv->error_func;
249 call->app_aemap_func = srv->aemap_func;
250
251 ret = srv->new_call(call);
252
253 /* send an abort if an error occurred */
254 if (ret < 0) {
255 rxrpc_call_abort(call, ret);
256 }
257 else {
258 /* formally receive and ACK the new packet */
259 ret = rxrpc_conn_receive_call_packet(call->conn,
260 call, msg);
261 }
262 }
263
264 rxrpc_put_call(call);
265 out:
266 if (ret < 0)
267 rxrpc_trans_immediate_abort(trans, msg, ret);
268
269 _leave(" (%d)", ret);
270} /* end rxrpc_krxsecd_process_incoming_call() */
diff --git a/net/rxrpc/krxtimod.c b/net/rxrpc/krxtimod.c
deleted file mode 100644
index 9a9b6132dba4..000000000000
--- a/net/rxrpc/krxtimod.c
+++ /dev/null
@@ -1,204 +0,0 @@
1/* krxtimod.c: RXRPC timeout daemon
2 *
3 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/module.h>
13#include <linux/init.h>
14#include <linux/sched.h>
15#include <linux/completion.h>
16#include <linux/freezer.h>
17#include <rxrpc/rxrpc.h>
18#include <rxrpc/krxtimod.h>
19#include <asm/errno.h>
20#include "internal.h"
21
22static DECLARE_COMPLETION(krxtimod_alive);
23static DECLARE_COMPLETION(krxtimod_dead);
24static DECLARE_WAIT_QUEUE_HEAD(krxtimod_sleepq);
25static int krxtimod_die;
26
27static LIST_HEAD(krxtimod_list);
28static DEFINE_SPINLOCK(krxtimod_lock);
29
30static int krxtimod(void *arg);
31
32/*****************************************************************************/
33/*
34 * start the timeout daemon
35 */
36int rxrpc_krxtimod_start(void)
37{
38 int ret;
39
40 ret = kernel_thread(krxtimod, NULL, 0);
41 if (ret < 0)
42 return ret;
43
44 wait_for_completion(&krxtimod_alive);
45
46 return ret;
47} /* end rxrpc_krxtimod_start() */
48
49/*****************************************************************************/
50/*
51 * stop the timeout daemon
52 */
53void rxrpc_krxtimod_kill(void)
54{
55 /* get rid of my daemon */
56 krxtimod_die = 1;
57 wake_up(&krxtimod_sleepq);
58 wait_for_completion(&krxtimod_dead);
59
60} /* end rxrpc_krxtimod_kill() */
61
62/*****************************************************************************/
63/*
64 * timeout processing daemon
65 */
66static int krxtimod(void *arg)
67{
68 DECLARE_WAITQUEUE(myself, current);
69
70 rxrpc_timer_t *timer;
71
72 printk("Started krxtimod %d\n", current->pid);
73
74 daemonize("krxtimod");
75
76 complete(&krxtimod_alive);
77
78 /* loop around looking for things to attend to */
79 loop:
80 set_current_state(TASK_INTERRUPTIBLE);
81 add_wait_queue(&krxtimod_sleepq, &myself);
82
83 for (;;) {
84 unsigned long jif;
85 long timeout;
86
87 /* deal with the server being asked to die */
88 if (krxtimod_die) {
89 remove_wait_queue(&krxtimod_sleepq, &myself);
90 _leave("");
91 complete_and_exit(&krxtimod_dead, 0);
92 }
93
94 try_to_freeze();
95
96 /* discard pending signals */
97 rxrpc_discard_my_signals();
98
99 /* work out the time to elapse before the next event */
100 spin_lock(&krxtimod_lock);
101 if (list_empty(&krxtimod_list)) {
102 timeout = MAX_SCHEDULE_TIMEOUT;
103 }
104 else {
105 timer = list_entry(krxtimod_list.next,
106 rxrpc_timer_t, link);
107 timeout = timer->timo_jif;
108 jif = jiffies;
109
110 if (time_before_eq((unsigned long) timeout, jif))
111 goto immediate;
112
113 else {
114 timeout = (long) timeout - (long) jiffies;
115 }
116 }
117 spin_unlock(&krxtimod_lock);
118
119 schedule_timeout(timeout);
120
121 set_current_state(TASK_INTERRUPTIBLE);
122 }
123
124 /* the thing on the front of the queue needs processing
125 * - we come here with the lock held and timer pointing to the expired
126 * entry
127 */
128 immediate:
129 remove_wait_queue(&krxtimod_sleepq, &myself);
130 set_current_state(TASK_RUNNING);
131
132 _debug("@@@ Begin Timeout of %p", timer);
133
134 /* dequeue the timer */
135 list_del_init(&timer->link);
136 spin_unlock(&krxtimod_lock);
137
138 /* call the timeout function */
139 timer->ops->timed_out(timer);
140
141 _debug("@@@ End Timeout");
142 goto loop;
143
144} /* end krxtimod() */
145
146/*****************************************************************************/
147/*
148 * (re-)queue a timer
149 */
150void rxrpc_krxtimod_add_timer(rxrpc_timer_t *timer, unsigned long timeout)
151{
152 struct list_head *_p;
153 rxrpc_timer_t *ptimer;
154
155 _enter("%p,%lu", timer, timeout);
156
157 spin_lock(&krxtimod_lock);
158
159 list_del(&timer->link);
160
161 /* the timer was deferred or reset - put it back in the queue at the
162 * right place */
163 timer->timo_jif = jiffies + timeout;
164
165 list_for_each(_p, &krxtimod_list) {
166 ptimer = list_entry(_p, rxrpc_timer_t, link);
167 if (time_before(timer->timo_jif, ptimer->timo_jif))
168 break;
169 }
170
171 list_add_tail(&timer->link, _p); /* insert before stopping point */
172
173 spin_unlock(&krxtimod_lock);
174
175 wake_up(&krxtimod_sleepq);
176
177 _leave("");
178} /* end rxrpc_krxtimod_add_timer() */
179
180/*****************************************************************************/
181/*
182 * dequeue a timer
183 * - returns 0 if the timer was deleted or -ENOENT if it wasn't queued
184 */
185int rxrpc_krxtimod_del_timer(rxrpc_timer_t *timer)
186{
187 int ret = 0;
188
189 _enter("%p", timer);
190
191 spin_lock(&krxtimod_lock);
192
193 if (list_empty(&timer->link))
194 ret = -ENOENT;
195 else
196 list_del_init(&timer->link);
197
198 spin_unlock(&krxtimod_lock);
199
200 wake_up(&krxtimod_sleepq);
201
202 _leave(" = %d", ret);
203 return ret;
204} /* end rxrpc_krxtimod_del_timer() */
diff --git a/net/rxrpc/main.c b/net/rxrpc/main.c
deleted file mode 100644
index baec1f7fd8b9..000000000000
--- a/net/rxrpc/main.c
+++ /dev/null
@@ -1,180 +0,0 @@
1/* main.c: Rx RPC interface
2 *
3 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/module.h>
13#include <linux/init.h>
14#include <linux/sched.h>
15#include <rxrpc/rxrpc.h>
16#include <rxrpc/krxiod.h>
17#include <rxrpc/krxsecd.h>
18#include <rxrpc/krxtimod.h>
19#include <rxrpc/transport.h>
20#include <rxrpc/connection.h>
21#include <rxrpc/call.h>
22#include <rxrpc/message.h>
23#include "internal.h"
24
25MODULE_DESCRIPTION("Rx RPC implementation");
26MODULE_AUTHOR("Red Hat, Inc.");
27MODULE_LICENSE("GPL");
28
29__be32 rxrpc_epoch;
30
31/*****************************************************************************/
32/*
33 * initialise the Rx module
34 */
35static int __init rxrpc_initialise(void)
36{
37 int ret;
38
39 /* my epoch value */
40 rxrpc_epoch = htonl(xtime.tv_sec);
41
42 /* register the /proc interface */
43#ifdef CONFIG_PROC_FS
44 ret = rxrpc_proc_init();
45 if (ret<0)
46 return ret;
47#endif
48
49 /* register the sysctl files */
50#ifdef CONFIG_SYSCTL
51 ret = rxrpc_sysctl_init();
52 if (ret<0)
53 goto error_proc;
54#endif
55
56 /* start the krxtimod daemon */
57 ret = rxrpc_krxtimod_start();
58 if (ret<0)
59 goto error_sysctl;
60
61 /* start the krxiod daemon */
62 ret = rxrpc_krxiod_init();
63 if (ret<0)
64 goto error_krxtimod;
65
66 /* start the krxsecd daemon */
67 ret = rxrpc_krxsecd_init();
68 if (ret<0)
69 goto error_krxiod;
70
71 kdebug("\n\n");
72
73 return 0;
74
75 error_krxiod:
76 rxrpc_krxiod_kill();
77 error_krxtimod:
78 rxrpc_krxtimod_kill();
79 error_sysctl:
80#ifdef CONFIG_SYSCTL
81 rxrpc_sysctl_cleanup();
82 error_proc:
83#endif
84#ifdef CONFIG_PROC_FS
85 rxrpc_proc_cleanup();
86#endif
87 return ret;
88} /* end rxrpc_initialise() */
89
90module_init(rxrpc_initialise);
91
92/*****************************************************************************/
93/*
94 * clean up the Rx module
95 */
96static void __exit rxrpc_cleanup(void)
97{
98 kenter("");
99
100 __RXACCT(printk("Outstanding Messages : %d\n",
101 atomic_read(&rxrpc_message_count)));
102 __RXACCT(printk("Outstanding Calls : %d\n",
103 atomic_read(&rxrpc_call_count)));
104 __RXACCT(printk("Outstanding Connections: %d\n",
105 atomic_read(&rxrpc_connection_count)));
106 __RXACCT(printk("Outstanding Peers : %d\n",
107 atomic_read(&rxrpc_peer_count)));
108 __RXACCT(printk("Outstanding Transports : %d\n",
109 atomic_read(&rxrpc_transport_count)));
110
111 rxrpc_krxsecd_kill();
112 rxrpc_krxiod_kill();
113 rxrpc_krxtimod_kill();
114#ifdef CONFIG_SYSCTL
115 rxrpc_sysctl_cleanup();
116#endif
117#ifdef CONFIG_PROC_FS
118 rxrpc_proc_cleanup();
119#endif
120
121 __RXACCT(printk("Outstanding Messages : %d\n",
122 atomic_read(&rxrpc_message_count)));
123 __RXACCT(printk("Outstanding Calls : %d\n",
124 atomic_read(&rxrpc_call_count)));
125 __RXACCT(printk("Outstanding Connections: %d\n",
126 atomic_read(&rxrpc_connection_count)));
127 __RXACCT(printk("Outstanding Peers : %d\n",
128 atomic_read(&rxrpc_peer_count)));
129 __RXACCT(printk("Outstanding Transports : %d\n",
130 atomic_read(&rxrpc_transport_count)));
131
132 kleave("");
133} /* end rxrpc_cleanup() */
134
135module_exit(rxrpc_cleanup);
136
137/*****************************************************************************/
138/*
139 * clear the dead space between task_struct and kernel stack
140 * - called by supplying -finstrument-functions to gcc
141 */
142#if 0
143void __cyg_profile_func_enter (void *this_fn, void *call_site)
144__attribute__((no_instrument_function));
145
146void __cyg_profile_func_enter (void *this_fn, void *call_site)
147{
148 asm volatile(" movl %%esp,%%edi \n"
149 " andl %0,%%edi \n"
150 " addl %1,%%edi \n"
151 " movl %%esp,%%ecx \n"
152 " subl %%edi,%%ecx \n"
153 " shrl $2,%%ecx \n"
154 " movl $0xedededed,%%eax \n"
155 " rep stosl \n"
156 :
157 : "i"(~(THREAD_SIZE-1)), "i"(sizeof(struct thread_info))
158 : "eax", "ecx", "edi", "memory", "cc"
159 );
160}
161
162void __cyg_profile_func_exit(void *this_fn, void *call_site)
163__attribute__((no_instrument_function));
164
165void __cyg_profile_func_exit(void *this_fn, void *call_site)
166{
167 asm volatile(" movl %%esp,%%edi \n"
168 " andl %0,%%edi \n"
169 " addl %1,%%edi \n"
170 " movl %%esp,%%ecx \n"
171 " subl %%edi,%%ecx \n"
172 " shrl $2,%%ecx \n"
173 " movl $0xdadadada,%%eax \n"
174 " rep stosl \n"
175 :
176 : "i"(~(THREAD_SIZE-1)), "i"(sizeof(struct thread_info))
177 : "eax", "ecx", "edi", "memory", "cc"
178 );
179}
180#endif
diff --git a/net/rxrpc/peer.c b/net/rxrpc/peer.c
deleted file mode 100644
index 8a275157a3bb..000000000000
--- a/net/rxrpc/peer.c
+++ /dev/null
@@ -1,398 +0,0 @@
1/* peer.c: Rx RPC peer management
2 *
3 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/sched.h>
13#include <linux/slab.h>
14#include <linux/module.h>
15#include <rxrpc/rxrpc.h>
16#include <rxrpc/transport.h>
17#include <rxrpc/peer.h>
18#include <rxrpc/connection.h>
19#include <rxrpc/call.h>
20#include <rxrpc/message.h>
21#include <linux/udp.h>
22#include <linux/ip.h>
23#include <net/sock.h>
24#include <asm/uaccess.h>
25#include <asm/div64.h>
26#include "internal.h"
27
28__RXACCT_DECL(atomic_t rxrpc_peer_count);
29LIST_HEAD(rxrpc_peers);
30DECLARE_RWSEM(rxrpc_peers_sem);
31unsigned long rxrpc_peer_timeout = 12 * 60 * 60;
32
33static void rxrpc_peer_do_timeout(struct rxrpc_peer *peer);
34
35static void __rxrpc_peer_timeout(rxrpc_timer_t *timer)
36{
37 struct rxrpc_peer *peer =
38 list_entry(timer, struct rxrpc_peer, timeout);
39
40 _debug("Rx PEER TIMEOUT [%p{u=%d}]", peer, atomic_read(&peer->usage));
41
42 rxrpc_peer_do_timeout(peer);
43}
44
45static const struct rxrpc_timer_ops rxrpc_peer_timer_ops = {
46 .timed_out = __rxrpc_peer_timeout,
47};
48
49/*****************************************************************************/
50/*
51 * create a peer record
52 */
53static int __rxrpc_create_peer(struct rxrpc_transport *trans, __be32 addr,
54 struct rxrpc_peer **_peer)
55{
56 struct rxrpc_peer *peer;
57
58 _enter("%p,%08x", trans, ntohl(addr));
59
60 /* allocate and initialise a peer record */
61 peer = kzalloc(sizeof(struct rxrpc_peer), GFP_KERNEL);
62 if (!peer) {
63 _leave(" = -ENOMEM");
64 return -ENOMEM;
65 }
66
67 atomic_set(&peer->usage, 1);
68
69 INIT_LIST_HEAD(&peer->link);
70 INIT_LIST_HEAD(&peer->proc_link);
71 INIT_LIST_HEAD(&peer->conn_idlist);
72 INIT_LIST_HEAD(&peer->conn_active);
73 INIT_LIST_HEAD(&peer->conn_graveyard);
74 spin_lock_init(&peer->conn_gylock);
75 init_waitqueue_head(&peer->conn_gy_waitq);
76 rwlock_init(&peer->conn_idlock);
77 rwlock_init(&peer->conn_lock);
78 atomic_set(&peer->conn_count, 0);
79 spin_lock_init(&peer->lock);
80 rxrpc_timer_init(&peer->timeout, &rxrpc_peer_timer_ops);
81
82 peer->addr.s_addr = addr;
83
84 peer->trans = trans;
85 peer->ops = trans->peer_ops;
86
87 __RXACCT(atomic_inc(&rxrpc_peer_count));
88 *_peer = peer;
89 _leave(" = 0 (%p)", peer);
90
91 return 0;
92} /* end __rxrpc_create_peer() */
93
94/*****************************************************************************/
95/*
96 * find a peer record on the specified transport
97 * - returns (if successful) with peer record usage incremented
98 * - resurrects it from the graveyard if found there
99 */
100int rxrpc_peer_lookup(struct rxrpc_transport *trans, __be32 addr,
101 struct rxrpc_peer **_peer)
102{
103 struct rxrpc_peer *peer, *candidate = NULL;
104 struct list_head *_p;
105 int ret;
106
107 _enter("%p{%hu},%08x", trans, trans->port, ntohl(addr));
108
109 /* [common case] search the transport's active list first */
110 read_lock(&trans->peer_lock);
111 list_for_each(_p, &trans->peer_active) {
112 peer = list_entry(_p, struct rxrpc_peer, link);
113 if (peer->addr.s_addr == addr)
114 goto found_active;
115 }
116 read_unlock(&trans->peer_lock);
117
118 /* [uncommon case] not active - create a candidate for a new record */
119 ret = __rxrpc_create_peer(trans, addr, &candidate);
120 if (ret < 0) {
121 _leave(" = %d", ret);
122 return ret;
123 }
124
125 /* search the active list again, just in case it appeared whilst we
126 * were busy */
127 write_lock(&trans->peer_lock);
128 list_for_each(_p, &trans->peer_active) {
129 peer = list_entry(_p, struct rxrpc_peer, link);
130 if (peer->addr.s_addr == addr)
131 goto found_active_second_chance;
132 }
133
134 /* search the transport's graveyard list */
135 spin_lock(&trans->peer_gylock);
136 list_for_each(_p, &trans->peer_graveyard) {
137 peer = list_entry(_p, struct rxrpc_peer, link);
138 if (peer->addr.s_addr == addr)
139 goto found_in_graveyard;
140 }
141 spin_unlock(&trans->peer_gylock);
142
143 /* we can now add the new candidate to the list
144 * - tell the application layer that this peer has been added
145 */
146 rxrpc_get_transport(trans);
147 peer = candidate;
148 candidate = NULL;
149
150 if (peer->ops && peer->ops->adding) {
151 ret = peer->ops->adding(peer);
152 if (ret < 0) {
153 write_unlock(&trans->peer_lock);
154 __RXACCT(atomic_dec(&rxrpc_peer_count));
155 kfree(peer);
156 rxrpc_put_transport(trans);
157 _leave(" = %d", ret);
158 return ret;
159 }
160 }
161
162 atomic_inc(&trans->peer_count);
163
164 make_active:
165 list_add_tail(&peer->link, &trans->peer_active);
166
167 success_uwfree:
168 write_unlock(&trans->peer_lock);
169
170 if (candidate) {
171 __RXACCT(atomic_dec(&rxrpc_peer_count));
172 kfree(candidate);
173 }
174
175 if (list_empty(&peer->proc_link)) {
176 down_write(&rxrpc_peers_sem);
177 list_add_tail(&peer->proc_link, &rxrpc_peers);
178 up_write(&rxrpc_peers_sem);
179 }
180
181 success:
182 *_peer = peer;
183
184 _leave(" = 0 (%p{u=%d cc=%d})",
185 peer,
186 atomic_read(&peer->usage),
187 atomic_read(&peer->conn_count));
188 return 0;
189
190 /* handle the peer being found in the active list straight off */
191 found_active:
192 rxrpc_get_peer(peer);
193 read_unlock(&trans->peer_lock);
194 goto success;
195
196 /* handle resurrecting a peer from the graveyard */
197 found_in_graveyard:
198 rxrpc_get_peer(peer);
199 rxrpc_get_transport(peer->trans);
200 rxrpc_krxtimod_del_timer(&peer->timeout);
201 list_del_init(&peer->link);
202 spin_unlock(&trans->peer_gylock);
203 goto make_active;
204
205 /* handle finding the peer on the second time through the active
206 * list */
207 found_active_second_chance:
208 rxrpc_get_peer(peer);
209 goto success_uwfree;
210
211} /* end rxrpc_peer_lookup() */
212
213/*****************************************************************************/
214/*
215 * finish with a peer record
216 * - it gets sent to the graveyard from where it can be resurrected or timed
217 * out
218 */
219void rxrpc_put_peer(struct rxrpc_peer *peer)
220{
221 struct rxrpc_transport *trans = peer->trans;
222
223 _enter("%p{cc=%d a=%08x}",
224 peer,
225 atomic_read(&peer->conn_count),
226 ntohl(peer->addr.s_addr));
227
228 /* sanity check */
229 if (atomic_read(&peer->usage) <= 0)
230 BUG();
231
232 write_lock(&trans->peer_lock);
233 spin_lock(&trans->peer_gylock);
234 if (likely(!atomic_dec_and_test(&peer->usage))) {
235 spin_unlock(&trans->peer_gylock);
236 write_unlock(&trans->peer_lock);
237 _leave("");
238 return;
239 }
240
241 /* move to graveyard queue */
242 list_del(&peer->link);
243 write_unlock(&trans->peer_lock);
244
245 list_add_tail(&peer->link, &trans->peer_graveyard);
246
247 BUG_ON(!list_empty(&peer->conn_active));
248
249 rxrpc_krxtimod_add_timer(&peer->timeout, rxrpc_peer_timeout * HZ);
250
251 spin_unlock(&trans->peer_gylock);
252
253 rxrpc_put_transport(trans);
254
255 _leave(" [killed]");
256} /* end rxrpc_put_peer() */
257
258/*****************************************************************************/
259/*
260 * handle a peer timing out in the graveyard
261 * - called from krxtimod
262 */
263static void rxrpc_peer_do_timeout(struct rxrpc_peer *peer)
264{
265 struct rxrpc_transport *trans = peer->trans;
266
267 _enter("%p{u=%d cc=%d a=%08x}",
268 peer,
269 atomic_read(&peer->usage),
270 atomic_read(&peer->conn_count),
271 ntohl(peer->addr.s_addr));
272
273 BUG_ON(atomic_read(&peer->usage) < 0);
274
275 /* remove from graveyard if still dead */
276 spin_lock(&trans->peer_gylock);
277 if (atomic_read(&peer->usage) == 0)
278 list_del_init(&peer->link);
279 else
280 peer = NULL;
281 spin_unlock(&trans->peer_gylock);
282
283 if (!peer) {
284 _leave("");
285 return; /* resurrected */
286 }
287
288 /* clear all connections on this peer */
289 rxrpc_conn_clearall(peer);
290
291 BUG_ON(!list_empty(&peer->conn_active));
292 BUG_ON(!list_empty(&peer->conn_graveyard));
293
294 /* inform the application layer */
295 if (peer->ops && peer->ops->discarding)
296 peer->ops->discarding(peer);
297
298 if (!list_empty(&peer->proc_link)) {
299 down_write(&rxrpc_peers_sem);
300 list_del(&peer->proc_link);
301 up_write(&rxrpc_peers_sem);
302 }
303
304 __RXACCT(atomic_dec(&rxrpc_peer_count));
305 kfree(peer);
306
307 /* if the graveyard is now empty, wake up anyone waiting for that */
308 if (atomic_dec_and_test(&trans->peer_count))
309 wake_up(&trans->peer_gy_waitq);
310
311 _leave(" [destroyed]");
312} /* end rxrpc_peer_do_timeout() */
313
314/*****************************************************************************/
315/*
316 * clear all peer records from a transport endpoint
317 */
318void rxrpc_peer_clearall(struct rxrpc_transport *trans)
319{
320 DECLARE_WAITQUEUE(myself,current);
321
322 struct rxrpc_peer *peer;
323 int err;
324
325 _enter("%p",trans);
326
327 /* there shouldn't be any active peers remaining */
328 BUG_ON(!list_empty(&trans->peer_active));
329
330 /* manually timeout all peers in the graveyard */
331 spin_lock(&trans->peer_gylock);
332 while (!list_empty(&trans->peer_graveyard)) {
333 peer = list_entry(trans->peer_graveyard.next,
334 struct rxrpc_peer, link);
335 _debug("Clearing peer %p\n", peer);
336 err = rxrpc_krxtimod_del_timer(&peer->timeout);
337 spin_unlock(&trans->peer_gylock);
338
339 if (err == 0)
340 rxrpc_peer_do_timeout(peer);
341
342 spin_lock(&trans->peer_gylock);
343 }
344 spin_unlock(&trans->peer_gylock);
345
346 /* wait for the the peer graveyard to be completely cleared */
347 set_current_state(TASK_UNINTERRUPTIBLE);
348 add_wait_queue(&trans->peer_gy_waitq, &myself);
349
350 while (atomic_read(&trans->peer_count) != 0) {
351 schedule();
352 set_current_state(TASK_UNINTERRUPTIBLE);
353 }
354
355 remove_wait_queue(&trans->peer_gy_waitq, &myself);
356 set_current_state(TASK_RUNNING);
357
358 _leave("");
359} /* end rxrpc_peer_clearall() */
360
361/*****************************************************************************/
362/*
363 * calculate and cache the Round-Trip-Time for a message and its response
364 */
365void rxrpc_peer_calculate_rtt(struct rxrpc_peer *peer,
366 struct rxrpc_message *msg,
367 struct rxrpc_message *resp)
368{
369 unsigned long long rtt;
370 int loop;
371
372 _enter("%p,%p,%p", peer, msg, resp);
373
374 /* calculate the latest RTT */
375 rtt = resp->stamp.tv_sec - msg->stamp.tv_sec;
376 rtt *= 1000000UL;
377 rtt += resp->stamp.tv_usec - msg->stamp.tv_usec;
378
379 /* add to cache */
380 peer->rtt_cache[peer->rtt_point] = rtt;
381 peer->rtt_point++;
382 peer->rtt_point %= RXRPC_RTT_CACHE_SIZE;
383
384 if (peer->rtt_usage < RXRPC_RTT_CACHE_SIZE)
385 peer->rtt_usage++;
386
387 /* recalculate RTT */
388 rtt = 0;
389 for (loop = peer->rtt_usage - 1; loop >= 0; loop--)
390 rtt += peer->rtt_cache[loop];
391
392 do_div(rtt, peer->rtt_usage);
393 peer->rtt = rtt;
394
395 _leave(" RTT=%lu.%lums",
396 (long) (peer->rtt / 1000), (long) (peer->rtt % 1000));
397
398} /* end rxrpc_peer_calculate_rtt() */
diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c
deleted file mode 100644
index 8551c879e456..000000000000
--- a/net/rxrpc/proc.c
+++ /dev/null
@@ -1,617 +0,0 @@
1/* proc.c: /proc interface for RxRPC
2 *
3 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/sched.h>
13#include <linux/slab.h>
14#include <linux/module.h>
15#include <linux/proc_fs.h>
16#include <linux/seq_file.h>
17#include <rxrpc/rxrpc.h>
18#include <rxrpc/transport.h>
19#include <rxrpc/peer.h>
20#include <rxrpc/connection.h>
21#include <rxrpc/call.h>
22#include <rxrpc/message.h>
23#include "internal.h"
24
25static struct proc_dir_entry *proc_rxrpc;
26
27static int rxrpc_proc_transports_open(struct inode *inode, struct file *file);
28static void *rxrpc_proc_transports_start(struct seq_file *p, loff_t *pos);
29static void *rxrpc_proc_transports_next(struct seq_file *p, void *v, loff_t *pos);
30static void rxrpc_proc_transports_stop(struct seq_file *p, void *v);
31static int rxrpc_proc_transports_show(struct seq_file *m, void *v);
32
33static struct seq_operations rxrpc_proc_transports_ops = {
34 .start = rxrpc_proc_transports_start,
35 .next = rxrpc_proc_transports_next,
36 .stop = rxrpc_proc_transports_stop,
37 .show = rxrpc_proc_transports_show,
38};
39
40static const struct file_operations rxrpc_proc_transports_fops = {
41 .open = rxrpc_proc_transports_open,
42 .read = seq_read,
43 .llseek = seq_lseek,
44 .release = seq_release,
45};
46
47static int rxrpc_proc_peers_open(struct inode *inode, struct file *file);
48static void *rxrpc_proc_peers_start(struct seq_file *p, loff_t *pos);
49static void *rxrpc_proc_peers_next(struct seq_file *p, void *v, loff_t *pos);
50static void rxrpc_proc_peers_stop(struct seq_file *p, void *v);
51static int rxrpc_proc_peers_show(struct seq_file *m, void *v);
52
53static struct seq_operations rxrpc_proc_peers_ops = {
54 .start = rxrpc_proc_peers_start,
55 .next = rxrpc_proc_peers_next,
56 .stop = rxrpc_proc_peers_stop,
57 .show = rxrpc_proc_peers_show,
58};
59
60static const struct file_operations rxrpc_proc_peers_fops = {
61 .open = rxrpc_proc_peers_open,
62 .read = seq_read,
63 .llseek = seq_lseek,
64 .release = seq_release,
65};
66
67static int rxrpc_proc_conns_open(struct inode *inode, struct file *file);
68static void *rxrpc_proc_conns_start(struct seq_file *p, loff_t *pos);
69static void *rxrpc_proc_conns_next(struct seq_file *p, void *v, loff_t *pos);
70static void rxrpc_proc_conns_stop(struct seq_file *p, void *v);
71static int rxrpc_proc_conns_show(struct seq_file *m, void *v);
72
73static struct seq_operations rxrpc_proc_conns_ops = {
74 .start = rxrpc_proc_conns_start,
75 .next = rxrpc_proc_conns_next,
76 .stop = rxrpc_proc_conns_stop,
77 .show = rxrpc_proc_conns_show,
78};
79
80static const struct file_operations rxrpc_proc_conns_fops = {
81 .open = rxrpc_proc_conns_open,
82 .read = seq_read,
83 .llseek = seq_lseek,
84 .release = seq_release,
85};
86
87static int rxrpc_proc_calls_open(struct inode *inode, struct file *file);
88static void *rxrpc_proc_calls_start(struct seq_file *p, loff_t *pos);
89static void *rxrpc_proc_calls_next(struct seq_file *p, void *v, loff_t *pos);
90static void rxrpc_proc_calls_stop(struct seq_file *p, void *v);
91static int rxrpc_proc_calls_show(struct seq_file *m, void *v);
92
93static struct seq_operations rxrpc_proc_calls_ops = {
94 .start = rxrpc_proc_calls_start,
95 .next = rxrpc_proc_calls_next,
96 .stop = rxrpc_proc_calls_stop,
97 .show = rxrpc_proc_calls_show,
98};
99
100static const struct file_operations rxrpc_proc_calls_fops = {
101 .open = rxrpc_proc_calls_open,
102 .read = seq_read,
103 .llseek = seq_lseek,
104 .release = seq_release,
105};
106
107static const char *rxrpc_call_states7[] = {
108 "complet",
109 "error ",
110 "rcv_op ",
111 "rcv_arg",
112 "got_arg",
113 "snd_rpl",
114 "fin_ack",
115 "snd_arg",
116 "rcv_rpl",
117 "got_rpl"
118};
119
120static const char *rxrpc_call_error_states7[] = {
121 "no_err ",
122 "loc_abt",
123 "rmt_abt",
124 "loc_err",
125 "rmt_err"
126};
127
128/*****************************************************************************/
129/*
130 * initialise the /proc/net/rxrpc/ directory
131 */
132int rxrpc_proc_init(void)
133{
134 struct proc_dir_entry *p;
135
136 proc_rxrpc = proc_mkdir("rxrpc", proc_net);
137 if (!proc_rxrpc)
138 goto error;
139 proc_rxrpc->owner = THIS_MODULE;
140
141 p = create_proc_entry("calls", 0, proc_rxrpc);
142 if (!p)
143 goto error_proc;
144 p->proc_fops = &rxrpc_proc_calls_fops;
145 p->owner = THIS_MODULE;
146
147 p = create_proc_entry("connections", 0, proc_rxrpc);
148 if (!p)
149 goto error_calls;
150 p->proc_fops = &rxrpc_proc_conns_fops;
151 p->owner = THIS_MODULE;
152
153 p = create_proc_entry("peers", 0, proc_rxrpc);
154 if (!p)
155 goto error_calls;
156 p->proc_fops = &rxrpc_proc_peers_fops;
157 p->owner = THIS_MODULE;
158
159 p = create_proc_entry("transports", 0, proc_rxrpc);
160 if (!p)
161 goto error_conns;
162 p->proc_fops = &rxrpc_proc_transports_fops;
163 p->owner = THIS_MODULE;
164
165 return 0;
166
167 error_conns:
168 remove_proc_entry("connections", proc_rxrpc);
169 error_calls:
170 remove_proc_entry("calls", proc_rxrpc);
171 error_proc:
172 remove_proc_entry("rxrpc", proc_net);
173 error:
174 return -ENOMEM;
175} /* end rxrpc_proc_init() */
176
177/*****************************************************************************/
178/*
179 * clean up the /proc/net/rxrpc/ directory
180 */
181void rxrpc_proc_cleanup(void)
182{
183 remove_proc_entry("transports", proc_rxrpc);
184 remove_proc_entry("peers", proc_rxrpc);
185 remove_proc_entry("connections", proc_rxrpc);
186 remove_proc_entry("calls", proc_rxrpc);
187
188 remove_proc_entry("rxrpc", proc_net);
189
190} /* end rxrpc_proc_cleanup() */
191
192/*****************************************************************************/
193/*
194 * open "/proc/net/rxrpc/transports" which provides a summary of extant transports
195 */
196static int rxrpc_proc_transports_open(struct inode *inode, struct file *file)
197{
198 struct seq_file *m;
199 int ret;
200
201 ret = seq_open(file, &rxrpc_proc_transports_ops);
202 if (ret < 0)
203 return ret;
204
205 m = file->private_data;
206 m->private = PDE(inode)->data;
207
208 return 0;
209} /* end rxrpc_proc_transports_open() */
210
211/*****************************************************************************/
212/*
213 * set up the iterator to start reading from the transports list and return the first item
214 */
215static void *rxrpc_proc_transports_start(struct seq_file *m, loff_t *_pos)
216{
217 struct list_head *_p;
218 loff_t pos = *_pos;
219
220 /* lock the list against modification */
221 down_read(&rxrpc_proc_transports_sem);
222
223 /* allow for the header line */
224 if (!pos)
225 return SEQ_START_TOKEN;
226 pos--;
227
228 /* find the n'th element in the list */
229 list_for_each(_p, &rxrpc_proc_transports)
230 if (!pos--)
231 break;
232
233 return _p != &rxrpc_proc_transports ? _p : NULL;
234} /* end rxrpc_proc_transports_start() */
235
236/*****************************************************************************/
237/*
238 * move to next call in transports list
239 */
240static void *rxrpc_proc_transports_next(struct seq_file *p, void *v, loff_t *pos)
241{
242 struct list_head *_p;
243
244 (*pos)++;
245
246 _p = v;
247 _p = (v == SEQ_START_TOKEN) ? rxrpc_proc_transports.next : _p->next;
248
249 return _p != &rxrpc_proc_transports ? _p : NULL;
250} /* end rxrpc_proc_transports_next() */
251
252/*****************************************************************************/
253/*
254 * clean up after reading from the transports list
255 */
256static void rxrpc_proc_transports_stop(struct seq_file *p, void *v)
257{
258 up_read(&rxrpc_proc_transports_sem);
259
260} /* end rxrpc_proc_transports_stop() */
261
262/*****************************************************************************/
263/*
264 * display a header line followed by a load of call lines
265 */
266static int rxrpc_proc_transports_show(struct seq_file *m, void *v)
267{
268 struct rxrpc_transport *trans =
269 list_entry(v, struct rxrpc_transport, proc_link);
270
271 /* display header on line 1 */
272 if (v == SEQ_START_TOKEN) {
273 seq_puts(m, "LOCAL USE\n");
274 return 0;
275 }
276
277 /* display one transport per line on subsequent lines */
278 seq_printf(m, "%5hu %3d\n",
279 trans->port,
280 atomic_read(&trans->usage)
281 );
282
283 return 0;
284} /* end rxrpc_proc_transports_show() */
285
286/*****************************************************************************/
287/*
288 * open "/proc/net/rxrpc/peers" which provides a summary of extant peers
289 */
290static int rxrpc_proc_peers_open(struct inode *inode, struct file *file)
291{
292 struct seq_file *m;
293 int ret;
294
295 ret = seq_open(file, &rxrpc_proc_peers_ops);
296 if (ret < 0)
297 return ret;
298
299 m = file->private_data;
300 m->private = PDE(inode)->data;
301
302 return 0;
303} /* end rxrpc_proc_peers_open() */
304
305/*****************************************************************************/
306/*
307 * set up the iterator to start reading from the peers list and return the
308 * first item
309 */
310static void *rxrpc_proc_peers_start(struct seq_file *m, loff_t *_pos)
311{
312 struct list_head *_p;
313 loff_t pos = *_pos;
314
315 /* lock the list against modification */
316 down_read(&rxrpc_peers_sem);
317
318 /* allow for the header line */
319 if (!pos)
320 return SEQ_START_TOKEN;
321 pos--;
322
323 /* find the n'th element in the list */
324 list_for_each(_p, &rxrpc_peers)
325 if (!pos--)
326 break;
327
328 return _p != &rxrpc_peers ? _p : NULL;
329} /* end rxrpc_proc_peers_start() */
330
331/*****************************************************************************/
332/*
333 * move to next conn in peers list
334 */
335static void *rxrpc_proc_peers_next(struct seq_file *p, void *v, loff_t *pos)
336{
337 struct list_head *_p;
338
339 (*pos)++;
340
341 _p = v;
342 _p = (v == SEQ_START_TOKEN) ? rxrpc_peers.next : _p->next;
343
344 return _p != &rxrpc_peers ? _p : NULL;
345} /* end rxrpc_proc_peers_next() */
346
347/*****************************************************************************/
348/*
349 * clean up after reading from the peers list
350 */
351static void rxrpc_proc_peers_stop(struct seq_file *p, void *v)
352{
353 up_read(&rxrpc_peers_sem);
354
355} /* end rxrpc_proc_peers_stop() */
356
357/*****************************************************************************/
358/*
359 * display a header line followed by a load of conn lines
360 */
361static int rxrpc_proc_peers_show(struct seq_file *m, void *v)
362{
363 struct rxrpc_peer *peer = list_entry(v, struct rxrpc_peer, proc_link);
364 long timeout;
365
366 /* display header on line 1 */
367 if (v == SEQ_START_TOKEN) {
368 seq_puts(m, "LOCAL REMOTE USAGE CONNS TIMEOUT"
369 " MTU RTT(uS)\n");
370 return 0;
371 }
372
373 /* display one peer per line on subsequent lines */
374 timeout = 0;
375 if (!list_empty(&peer->timeout.link))
376 timeout = (long) peer->timeout.timo_jif -
377 (long) jiffies;
378
379 seq_printf(m, "%5hu %08x %5d %5d %8ld %5Zu %7lu\n",
380 peer->trans->port,
381 ntohl(peer->addr.s_addr),
382 atomic_read(&peer->usage),
383 atomic_read(&peer->conn_count),
384 timeout,
385 peer->if_mtu,
386 (long) peer->rtt
387 );
388
389 return 0;
390} /* end rxrpc_proc_peers_show() */
391
392/*****************************************************************************/
393/*
394 * open "/proc/net/rxrpc/connections" which provides a summary of extant
395 * connections
396 */
397static int rxrpc_proc_conns_open(struct inode *inode, struct file *file)
398{
399 struct seq_file *m;
400 int ret;
401
402 ret = seq_open(file, &rxrpc_proc_conns_ops);
403 if (ret < 0)
404 return ret;
405
406 m = file->private_data;
407 m->private = PDE(inode)->data;
408
409 return 0;
410} /* end rxrpc_proc_conns_open() */
411
412/*****************************************************************************/
413/*
414 * set up the iterator to start reading from the conns list and return the
415 * first item
416 */
417static void *rxrpc_proc_conns_start(struct seq_file *m, loff_t *_pos)
418{
419 struct list_head *_p;
420 loff_t pos = *_pos;
421
422 /* lock the list against modification */
423 down_read(&rxrpc_conns_sem);
424
425 /* allow for the header line */
426 if (!pos)
427 return SEQ_START_TOKEN;
428 pos--;
429
430 /* find the n'th element in the list */
431 list_for_each(_p, &rxrpc_conns)
432 if (!pos--)
433 break;
434
435 return _p != &rxrpc_conns ? _p : NULL;
436} /* end rxrpc_proc_conns_start() */
437
438/*****************************************************************************/
439/*
440 * move to next conn in conns list
441 */
442static void *rxrpc_proc_conns_next(struct seq_file *p, void *v, loff_t *pos)
443{
444 struct list_head *_p;
445
446 (*pos)++;
447
448 _p = v;
449 _p = (v == SEQ_START_TOKEN) ? rxrpc_conns.next : _p->next;
450
451 return _p != &rxrpc_conns ? _p : NULL;
452} /* end rxrpc_proc_conns_next() */
453
454/*****************************************************************************/
455/*
456 * clean up after reading from the conns list
457 */
458static void rxrpc_proc_conns_stop(struct seq_file *p, void *v)
459{
460 up_read(&rxrpc_conns_sem);
461
462} /* end rxrpc_proc_conns_stop() */
463
464/*****************************************************************************/
465/*
466 * display a header line followed by a load of conn lines
467 */
468static int rxrpc_proc_conns_show(struct seq_file *m, void *v)
469{
470 struct rxrpc_connection *conn;
471 long timeout;
472
473 conn = list_entry(v, struct rxrpc_connection, proc_link);
474
475 /* display header on line 1 */
476 if (v == SEQ_START_TOKEN) {
477 seq_puts(m,
478 "LOCAL REMOTE RPORT SRVC CONN END SERIALNO "
479 "CALLNO MTU TIMEOUT"
480 "\n");
481 return 0;
482 }
483
484 /* display one conn per line on subsequent lines */
485 timeout = 0;
486 if (!list_empty(&conn->timeout.link))
487 timeout = (long) conn->timeout.timo_jif -
488 (long) jiffies;
489
490 seq_printf(m,
491 "%5hu %08x %5hu %04hx %08x %-3.3s %08x %08x %5Zu %8ld\n",
492 conn->trans->port,
493 ntohl(conn->addr.sin_addr.s_addr),
494 ntohs(conn->addr.sin_port),
495 ntohs(conn->service_id),
496 ntohl(conn->conn_id),
497 conn->out_clientflag ? "CLT" : "SRV",
498 conn->serial_counter,
499 conn->call_counter,
500 conn->mtu_size,
501 timeout
502 );
503
504 return 0;
505} /* end rxrpc_proc_conns_show() */
506
507/*****************************************************************************/
508/*
509 * open "/proc/net/rxrpc/calls" which provides a summary of extant calls
510 */
511static int rxrpc_proc_calls_open(struct inode *inode, struct file *file)
512{
513 struct seq_file *m;
514 int ret;
515
516 ret = seq_open(file, &rxrpc_proc_calls_ops);
517 if (ret < 0)
518 return ret;
519
520 m = file->private_data;
521 m->private = PDE(inode)->data;
522
523 return 0;
524} /* end rxrpc_proc_calls_open() */
525
526/*****************************************************************************/
527/*
528 * set up the iterator to start reading from the calls list and return the
529 * first item
530 */
531static void *rxrpc_proc_calls_start(struct seq_file *m, loff_t *_pos)
532{
533 struct list_head *_p;
534 loff_t pos = *_pos;
535
536 /* lock the list against modification */
537 down_read(&rxrpc_calls_sem);
538
539 /* allow for the header line */
540 if (!pos)
541 return SEQ_START_TOKEN;
542 pos--;
543
544 /* find the n'th element in the list */
545 list_for_each(_p, &rxrpc_calls)
546 if (!pos--)
547 break;
548
549 return _p != &rxrpc_calls ? _p : NULL;
550} /* end rxrpc_proc_calls_start() */
551
552/*****************************************************************************/
553/*
554 * move to next call in calls list
555 */
556static void *rxrpc_proc_calls_next(struct seq_file *p, void *v, loff_t *pos)
557{
558 struct list_head *_p;
559
560 (*pos)++;
561
562 _p = v;
563 _p = (v == SEQ_START_TOKEN) ? rxrpc_calls.next : _p->next;
564
565 return _p != &rxrpc_calls ? _p : NULL;
566} /* end rxrpc_proc_calls_next() */
567
568/*****************************************************************************/
569/*
570 * clean up after reading from the calls list
571 */
572static void rxrpc_proc_calls_stop(struct seq_file *p, void *v)
573{
574 up_read(&rxrpc_calls_sem);
575
576} /* end rxrpc_proc_calls_stop() */
577
578/*****************************************************************************/
579/*
580 * display a header line followed by a load of call lines
581 */
582static int rxrpc_proc_calls_show(struct seq_file *m, void *v)
583{
584 struct rxrpc_call *call = list_entry(v, struct rxrpc_call, call_link);
585
586 /* display header on line 1 */
587 if (v == SEQ_START_TOKEN) {
588 seq_puts(m,
589 "LOCAL REMOT SRVC CONN CALL DIR USE "
590 " L STATE OPCODE ABORT ERRNO\n"
591 );
592 return 0;
593 }
594
595 /* display one call per line on subsequent lines */
596 seq_printf(m,
597 "%5hu %5hu %04hx %08x %08x %s %3u%c"
598 " %c %-7.7s %6d %08x %5d\n",
599 call->conn->trans->port,
600 ntohs(call->conn->addr.sin_port),
601 ntohs(call->conn->service_id),
602 ntohl(call->conn->conn_id),
603 ntohl(call->call_id),
604 call->conn->service ? "SVC" : "CLT",
605 atomic_read(&call->usage),
606 waitqueue_active(&call->waitq) ? 'w' : ' ',
607 call->app_last_rcv ? 'Y' : '-',
608 (call->app_call_state!=RXRPC_CSTATE_ERROR ?
609 rxrpc_call_states7[call->app_call_state] :
610 rxrpc_call_error_states7[call->app_err_state]),
611 call->app_opcode,
612 call->app_abort_code,
613 call->app_errno
614 );
615
616 return 0;
617} /* end rxrpc_proc_calls_show() */
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
new file mode 100644
index 000000000000..1eaf529efac1
--- /dev/null
+++ b/net/rxrpc/rxkad.c
@@ -0,0 +1,1153 @@
1/* Kerberos-based RxRPC security
2 *
3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/module.h>
13#include <linux/net.h>
14#include <linux/skbuff.h>
15#include <linux/udp.h>
16#include <linux/crypto.h>
17#include <linux/scatterlist.h>
18#include <linux/ctype.h>
19#include <net/sock.h>
20#include <net/af_rxrpc.h>
21#include "ar-internal.h"
22
23#define RXKAD_VERSION 2
24#define MAXKRB5TICKETLEN 1024
25#define RXKAD_TKT_TYPE_KERBEROS_V5 256
26#define ANAME_SZ 40 /* size of authentication name */
27#define INST_SZ 40 /* size of principal's instance */
28#define REALM_SZ 40 /* size of principal's auth domain */
29#define SNAME_SZ 40 /* size of service name */
30
31unsigned rxrpc_debug;
32module_param_named(debug, rxrpc_debug, uint, S_IWUSR | S_IRUGO);
33MODULE_PARM_DESC(rxrpc_debug, "rxkad debugging mask");
34
35struct rxkad_level1_hdr {
36 __be32 data_size; /* true data size (excluding padding) */
37};
38
39struct rxkad_level2_hdr {
40 __be32 data_size; /* true data size (excluding padding) */
41 __be32 checksum; /* decrypted data checksum */
42};
43
44MODULE_DESCRIPTION("RxRPC network protocol type-2 security (Kerberos)");
45MODULE_AUTHOR("Red Hat, Inc.");
46MODULE_LICENSE("GPL");
47
48/*
49 * this holds a pinned cipher so that keventd doesn't get called by the cipher
50 * alloc routine, but since we have it to hand, we use it to decrypt RESPONSE
51 * packets
52 */
53static struct crypto_blkcipher *rxkad_ci;
54static DEFINE_MUTEX(rxkad_ci_mutex);
55
56/*
57 * initialise connection security
58 */
59static int rxkad_init_connection_security(struct rxrpc_connection *conn)
60{
61 struct rxrpc_key_payload *payload;
62 struct crypto_blkcipher *ci;
63 int ret;
64
65 _enter("{%d},{%x}", conn->debug_id, key_serial(conn->key));
66
67 payload = conn->key->payload.data;
68 conn->security_ix = payload->k.security_index;
69
70 ci = crypto_alloc_blkcipher("pcbc(fcrypt)", 0, CRYPTO_ALG_ASYNC);
71 if (IS_ERR(ci)) {
72 _debug("no cipher");
73 ret = PTR_ERR(ci);
74 goto error;
75 }
76
77 if (crypto_blkcipher_setkey(ci, payload->k.session_key,
78 sizeof(payload->k.session_key)) < 0)
79 BUG();
80
81 switch (conn->security_level) {
82 case RXRPC_SECURITY_PLAIN:
83 break;
84 case RXRPC_SECURITY_AUTH:
85 conn->size_align = 8;
86 conn->security_size = sizeof(struct rxkad_level1_hdr);
87 conn->header_size += sizeof(struct rxkad_level1_hdr);
88 break;
89 case RXRPC_SECURITY_ENCRYPT:
90 conn->size_align = 8;
91 conn->security_size = sizeof(struct rxkad_level2_hdr);
92 conn->header_size += sizeof(struct rxkad_level2_hdr);
93 break;
94 default:
95 ret = -EKEYREJECTED;
96 goto error;
97 }
98
99 conn->cipher = ci;
100 ret = 0;
101error:
102 _leave(" = %d", ret);
103 return ret;
104}
105
106/*
107 * prime the encryption state with the invariant parts of a connection's
108 * description
109 */
110static void rxkad_prime_packet_security(struct rxrpc_connection *conn)
111{
112 struct rxrpc_key_payload *payload;
113 struct blkcipher_desc desc;
114 struct scatterlist sg[2];
115 struct rxrpc_crypt iv;
116 struct {
117 __be32 x[4];
118 } tmpbuf __attribute__((aligned(16))); /* must all be in same page */
119
120 _enter("");
121
122 if (!conn->key)
123 return;
124
125 payload = conn->key->payload.data;
126 memcpy(&iv, payload->k.session_key, sizeof(iv));
127
128 desc.tfm = conn->cipher;
129 desc.info = iv.x;
130 desc.flags = 0;
131
132 tmpbuf.x[0] = conn->epoch;
133 tmpbuf.x[1] = conn->cid;
134 tmpbuf.x[2] = 0;
135 tmpbuf.x[3] = htonl(conn->security_ix);
136
137 memset(sg, 0, sizeof(sg));
138 sg_set_buf(&sg[0], &tmpbuf, sizeof(tmpbuf));
139 sg_set_buf(&sg[1], &tmpbuf, sizeof(tmpbuf));
140 crypto_blkcipher_encrypt_iv(&desc, &sg[0], &sg[1], sizeof(tmpbuf));
141
142 memcpy(&conn->csum_iv, &tmpbuf.x[2], sizeof(conn->csum_iv));
143 ASSERTCMP(conn->csum_iv.n[0], ==, tmpbuf.x[2]);
144
145 _leave("");
146}
147
148/*
149 * partially encrypt a packet (level 1 security)
150 */
151static int rxkad_secure_packet_auth(const struct rxrpc_call *call,
152 struct sk_buff *skb,
153 u32 data_size,
154 void *sechdr)
155{
156 struct rxrpc_skb_priv *sp;
157 struct blkcipher_desc desc;
158 struct rxrpc_crypt iv;
159 struct scatterlist sg[2];
160 struct {
161 struct rxkad_level1_hdr hdr;
162 __be32 first; /* first four bytes of data and padding */
163 } tmpbuf __attribute__((aligned(8))); /* must all be in same page */
164 u16 check;
165
166 sp = rxrpc_skb(skb);
167
168 _enter("");
169
170 check = ntohl(sp->hdr.seq ^ sp->hdr.callNumber);
171 data_size |= (u32) check << 16;
172
173 tmpbuf.hdr.data_size = htonl(data_size);
174 memcpy(&tmpbuf.first, sechdr + 4, sizeof(tmpbuf.first));
175
176 /* start the encryption afresh */
177 memset(&iv, 0, sizeof(iv));
178 desc.tfm = call->conn->cipher;
179 desc.info = iv.x;
180 desc.flags = 0;
181
182 memset(sg, 0, sizeof(sg));
183 sg_set_buf(&sg[0], &tmpbuf, sizeof(tmpbuf));
184 sg_set_buf(&sg[1], &tmpbuf, sizeof(tmpbuf));
185 crypto_blkcipher_encrypt_iv(&desc, &sg[0], &sg[1], sizeof(tmpbuf));
186
187 memcpy(sechdr, &tmpbuf, sizeof(tmpbuf));
188
189 _leave(" = 0");
190 return 0;
191}
192
193/*
194 * wholly encrypt a packet (level 2 security)
195 */
196static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call,
197 struct sk_buff *skb,
198 u32 data_size,
199 void *sechdr)
200{
201 const struct rxrpc_key_payload *payload;
202 struct rxkad_level2_hdr rxkhdr
203 __attribute__((aligned(8))); /* must be all on one page */
204 struct rxrpc_skb_priv *sp;
205 struct blkcipher_desc desc;
206 struct rxrpc_crypt iv;
207 struct scatterlist sg[16];
208 struct sk_buff *trailer;
209 unsigned len;
210 u16 check;
211 int nsg;
212
213 sp = rxrpc_skb(skb);
214
215 _enter("");
216
217 check = ntohl(sp->hdr.seq ^ sp->hdr.callNumber);
218
219 rxkhdr.data_size = htonl(data_size | (u32) check << 16);
220 rxkhdr.checksum = 0;
221
222 /* encrypt from the session key */
223 payload = call->conn->key->payload.data;
224 memcpy(&iv, payload->k.session_key, sizeof(iv));
225 desc.tfm = call->conn->cipher;
226 desc.info = iv.x;
227 desc.flags = 0;
228
229 memset(sg, 0, sizeof(sg[0]) * 2);
230 sg_set_buf(&sg[0], sechdr, sizeof(rxkhdr));
231 sg_set_buf(&sg[1], &rxkhdr, sizeof(rxkhdr));
232 crypto_blkcipher_encrypt_iv(&desc, &sg[0], &sg[1], sizeof(rxkhdr));
233
234 /* we want to encrypt the skbuff in-place */
235 nsg = skb_cow_data(skb, 0, &trailer);
236 if (nsg < 0 || nsg > 16)
237 return -ENOMEM;
238
239 len = data_size + call->conn->size_align - 1;
240 len &= ~(call->conn->size_align - 1);
241
242 skb_to_sgvec(skb, sg, 0, len);
243 crypto_blkcipher_encrypt_iv(&desc, sg, sg, len);
244
245 _leave(" = 0");
246 return 0;
247}
248
249/*
250 * checksum an RxRPC packet header
251 */
252static int rxkad_secure_packet(const struct rxrpc_call *call,
253 struct sk_buff *skb,
254 size_t data_size,
255 void *sechdr)
256{
257 struct rxrpc_skb_priv *sp;
258 struct blkcipher_desc desc;
259 struct rxrpc_crypt iv;
260 struct scatterlist sg[2];
261 struct {
262 __be32 x[2];
263 } tmpbuf __attribute__((aligned(8))); /* must all be in same page */
264 __be32 x;
265 int ret;
266
267 sp = rxrpc_skb(skb);
268
269 _enter("{%d{%x}},{#%u},%zu,",
270 call->debug_id, key_serial(call->conn->key), ntohl(sp->hdr.seq),
271 data_size);
272
273 if (!call->conn->cipher)
274 return 0;
275
276 ret = key_validate(call->conn->key);
277 if (ret < 0)
278 return ret;
279
280 /* continue encrypting from where we left off */
281 memcpy(&iv, call->conn->csum_iv.x, sizeof(iv));
282 desc.tfm = call->conn->cipher;
283 desc.info = iv.x;
284 desc.flags = 0;
285
286 /* calculate the security checksum */
287 x = htonl(call->channel << (32 - RXRPC_CIDSHIFT));
288 x |= sp->hdr.seq & __constant_cpu_to_be32(0x3fffffff);
289 tmpbuf.x[0] = sp->hdr.callNumber;
290 tmpbuf.x[1] = x;
291
292 memset(&sg, 0, sizeof(sg));
293 sg_set_buf(&sg[0], &tmpbuf, sizeof(tmpbuf));
294 sg_set_buf(&sg[1], &tmpbuf, sizeof(tmpbuf));
295 crypto_blkcipher_encrypt_iv(&desc, &sg[0], &sg[1], sizeof(tmpbuf));
296
297 x = ntohl(tmpbuf.x[1]);
298 x = (x >> 16) & 0xffff;
299 if (x == 0)
300 x = 1; /* zero checksums are not permitted */
301 sp->hdr.cksum = htons(x);
302
303 switch (call->conn->security_level) {
304 case RXRPC_SECURITY_PLAIN:
305 ret = 0;
306 break;
307 case RXRPC_SECURITY_AUTH:
308 ret = rxkad_secure_packet_auth(call, skb, data_size, sechdr);
309 break;
310 case RXRPC_SECURITY_ENCRYPT:
311 ret = rxkad_secure_packet_encrypt(call, skb, data_size,
312 sechdr);
313 break;
314 default:
315 ret = -EPERM;
316 break;
317 }
318
319 _leave(" = %d [set %hx]", ret, x);
320 return ret;
321}
322
323/*
324 * decrypt partial encryption on a packet (level 1 security)
325 */
326static int rxkad_verify_packet_auth(const struct rxrpc_call *call,
327 struct sk_buff *skb,
328 u32 *_abort_code)
329{
330 struct rxkad_level1_hdr sechdr;
331 struct rxrpc_skb_priv *sp;
332 struct blkcipher_desc desc;
333 struct rxrpc_crypt iv;
334 struct scatterlist sg[2];
335 struct sk_buff *trailer;
336 u32 data_size, buf;
337 u16 check;
338
339 _enter("");
340
341 sp = rxrpc_skb(skb);
342
343 /* we want to decrypt the skbuff in-place */
344 if (skb_cow_data(skb, 0, &trailer) < 0)
345 goto nomem;
346
347 skb_to_sgvec(skb, sg, 0, 8);
348
349 /* start the decryption afresh */
350 memset(&iv, 0, sizeof(iv));
351 desc.tfm = call->conn->cipher;
352 desc.info = iv.x;
353 desc.flags = 0;
354
355 crypto_blkcipher_decrypt_iv(&desc, sg, sg, 8);
356
357 /* remove the decrypted packet length */
358 if (skb_copy_bits(skb, 0, &sechdr, sizeof(sechdr)) < 0)
359 goto datalen_error;
360 if (!skb_pull(skb, sizeof(sechdr)))
361 BUG();
362
363 buf = ntohl(sechdr.data_size);
364 data_size = buf & 0xffff;
365
366 check = buf >> 16;
367 check ^= ntohl(sp->hdr.seq ^ sp->hdr.callNumber);
368 check &= 0xffff;
369 if (check != 0) {
370 *_abort_code = RXKADSEALEDINCON;
371 goto protocol_error;
372 }
373
374 /* shorten the packet to remove the padding */
375 if (data_size > skb->len)
376 goto datalen_error;
377 else if (data_size < skb->len)
378 skb->len = data_size;
379
380 _leave(" = 0 [dlen=%x]", data_size);
381 return 0;
382
383datalen_error:
384 *_abort_code = RXKADDATALEN;
385protocol_error:
386 _leave(" = -EPROTO");
387 return -EPROTO;
388
389nomem:
390 _leave(" = -ENOMEM");
391 return -ENOMEM;
392}
393
394/*
395 * wholly decrypt a packet (level 2 security)
396 */
397static int rxkad_verify_packet_encrypt(const struct rxrpc_call *call,
398 struct sk_buff *skb,
399 u32 *_abort_code)
400{
401 const struct rxrpc_key_payload *payload;
402 struct rxkad_level2_hdr sechdr;
403 struct rxrpc_skb_priv *sp;
404 struct blkcipher_desc desc;
405 struct rxrpc_crypt iv;
406 struct scatterlist _sg[4], *sg;
407 struct sk_buff *trailer;
408 u32 data_size, buf;
409 u16 check;
410 int nsg;
411
412 _enter(",{%d}", skb->len);
413
414 sp = rxrpc_skb(skb);
415
416 /* we want to decrypt the skbuff in-place */
417 nsg = skb_cow_data(skb, 0, &trailer);
418 if (nsg < 0)
419 goto nomem;
420
421 sg = _sg;
422 if (unlikely(nsg > 4)) {
423 sg = kmalloc(sizeof(*sg) * nsg, GFP_NOIO);
424 if (!sg)
425 goto nomem;
426 }
427
428 skb_to_sgvec(skb, sg, 0, skb->len);
429
430 /* decrypt from the session key */
431 payload = call->conn->key->payload.data;
432 memcpy(&iv, payload->k.session_key, sizeof(iv));
433 desc.tfm = call->conn->cipher;
434 desc.info = iv.x;
435 desc.flags = 0;
436
437 crypto_blkcipher_decrypt_iv(&desc, sg, sg, skb->len);
438 if (sg != _sg)
439 kfree(sg);
440
441 /* remove the decrypted packet length */
442 if (skb_copy_bits(skb, 0, &sechdr, sizeof(sechdr)) < 0)
443 goto datalen_error;
444 if (!skb_pull(skb, sizeof(sechdr)))
445 BUG();
446
447 buf = ntohl(sechdr.data_size);
448 data_size = buf & 0xffff;
449
450 check = buf >> 16;
451 check ^= ntohl(sp->hdr.seq ^ sp->hdr.callNumber);
452 check &= 0xffff;
453 if (check != 0) {
454 *_abort_code = RXKADSEALEDINCON;
455 goto protocol_error;
456 }
457
458 /* shorten the packet to remove the padding */
459 if (data_size > skb->len)
460 goto datalen_error;
461 else if (data_size < skb->len)
462 skb->len = data_size;
463
464 _leave(" = 0 [dlen=%x]", data_size);
465 return 0;
466
467datalen_error:
468 *_abort_code = RXKADDATALEN;
469protocol_error:
470 _leave(" = -EPROTO");
471 return -EPROTO;
472
473nomem:
474 _leave(" = -ENOMEM");
475 return -ENOMEM;
476}
477
478/*
479 * verify the security on a received packet
480 */
481static int rxkad_verify_packet(const struct rxrpc_call *call,
482 struct sk_buff *skb,
483 u32 *_abort_code)
484{
485 struct blkcipher_desc desc;
486 struct rxrpc_skb_priv *sp;
487 struct rxrpc_crypt iv;
488 struct scatterlist sg[2];
489 struct {
490 __be32 x[2];
491 } tmpbuf __attribute__((aligned(8))); /* must all be in same page */
492 __be32 x;
493 __be16 cksum;
494 int ret;
495
496 sp = rxrpc_skb(skb);
497
498 _enter("{%d{%x}},{#%u}",
499 call->debug_id, key_serial(call->conn->key),
500 ntohl(sp->hdr.seq));
501
502 if (!call->conn->cipher)
503 return 0;
504
505 if (sp->hdr.securityIndex != 2) {
506 *_abort_code = RXKADINCONSISTENCY;
507 _leave(" = -EPROTO [not rxkad]");
508 return -EPROTO;
509 }
510
511 /* continue encrypting from where we left off */
512 memcpy(&iv, call->conn->csum_iv.x, sizeof(iv));
513 desc.tfm = call->conn->cipher;
514 desc.info = iv.x;
515 desc.flags = 0;
516
517 /* validate the security checksum */
518 x = htonl(call->channel << (32 - RXRPC_CIDSHIFT));
519 x |= sp->hdr.seq & __constant_cpu_to_be32(0x3fffffff);
520 tmpbuf.x[0] = call->call_id;
521 tmpbuf.x[1] = x;
522
523 memset(&sg, 0, sizeof(sg));
524 sg_set_buf(&sg[0], &tmpbuf, sizeof(tmpbuf));
525 sg_set_buf(&sg[1], &tmpbuf, sizeof(tmpbuf));
526 crypto_blkcipher_encrypt_iv(&desc, &sg[0], &sg[1], sizeof(tmpbuf));
527
528 x = ntohl(tmpbuf.x[1]);
529 x = (x >> 16) & 0xffff;
530 if (x == 0)
531 x = 1; /* zero checksums are not permitted */
532
533 cksum = htons(x);
534 if (sp->hdr.cksum != cksum) {
535 *_abort_code = RXKADSEALEDINCON;
536 _leave(" = -EPROTO [csum failed]");
537 return -EPROTO;
538 }
539
540 switch (call->conn->security_level) {
541 case RXRPC_SECURITY_PLAIN:
542 ret = 0;
543 break;
544 case RXRPC_SECURITY_AUTH:
545 ret = rxkad_verify_packet_auth(call, skb, _abort_code);
546 break;
547 case RXRPC_SECURITY_ENCRYPT:
548 ret = rxkad_verify_packet_encrypt(call, skb, _abort_code);
549 break;
550 default:
551 ret = -ENOANO;
552 break;
553 }
554
555 _leave(" = %d", ret);
556 return ret;
557}
558
559/*
560 * issue a challenge
561 */
562static int rxkad_issue_challenge(struct rxrpc_connection *conn)
563{
564 struct rxkad_challenge challenge;
565 struct rxrpc_header hdr;
566 struct msghdr msg;
567 struct kvec iov[2];
568 size_t len;
569 int ret;
570
571 _enter("{%d,%x}", conn->debug_id, key_serial(conn->key));
572
573 ret = key_validate(conn->key);
574 if (ret < 0)
575 return ret;
576
577 get_random_bytes(&conn->security_nonce, sizeof(conn->security_nonce));
578
579 challenge.version = htonl(2);
580 challenge.nonce = htonl(conn->security_nonce);
581 challenge.min_level = htonl(0);
582 challenge.__padding = 0;
583
584 msg.msg_name = &conn->trans->peer->srx.transport.sin;
585 msg.msg_namelen = sizeof(conn->trans->peer->srx.transport.sin);
586 msg.msg_control = NULL;
587 msg.msg_controllen = 0;
588 msg.msg_flags = 0;
589
590 hdr.epoch = conn->epoch;
591 hdr.cid = conn->cid;
592 hdr.callNumber = 0;
593 hdr.seq = 0;
594 hdr.type = RXRPC_PACKET_TYPE_CHALLENGE;
595 hdr.flags = conn->out_clientflag;
596 hdr.userStatus = 0;
597 hdr.securityIndex = conn->security_ix;
598 hdr._rsvd = 0;
599 hdr.serviceId = conn->service_id;
600
601 iov[0].iov_base = &hdr;
602 iov[0].iov_len = sizeof(hdr);
603 iov[1].iov_base = &challenge;
604 iov[1].iov_len = sizeof(challenge);
605
606 len = iov[0].iov_len + iov[1].iov_len;
607
608 hdr.serial = htonl(atomic_inc_return(&conn->serial));
609 _proto("Tx CHALLENGE %%%u", ntohl(hdr.serial));
610
611 ret = kernel_sendmsg(conn->trans->local->socket, &msg, iov, 2, len);
612 if (ret < 0) {
613 _debug("sendmsg failed: %d", ret);
614 return -EAGAIN;
615 }
616
617 _leave(" = 0");
618 return 0;
619}
620
621/*
622 * send a Kerberos security response
623 */
624static int rxkad_send_response(struct rxrpc_connection *conn,
625 struct rxrpc_header *hdr,
626 struct rxkad_response *resp,
627 const struct rxkad_key *s2)
628{
629 struct msghdr msg;
630 struct kvec iov[3];
631 size_t len;
632 int ret;
633
634 _enter("");
635
636 msg.msg_name = &conn->trans->peer->srx.transport.sin;
637 msg.msg_namelen = sizeof(conn->trans->peer->srx.transport.sin);
638 msg.msg_control = NULL;
639 msg.msg_controllen = 0;
640 msg.msg_flags = 0;
641
642 hdr->epoch = conn->epoch;
643 hdr->seq = 0;
644 hdr->type = RXRPC_PACKET_TYPE_RESPONSE;
645 hdr->flags = conn->out_clientflag;
646 hdr->userStatus = 0;
647 hdr->_rsvd = 0;
648
649 iov[0].iov_base = hdr;
650 iov[0].iov_len = sizeof(*hdr);
651 iov[1].iov_base = resp;
652 iov[1].iov_len = sizeof(*resp);
653 iov[2].iov_base = (void *) s2->ticket;
654 iov[2].iov_len = s2->ticket_len;
655
656 len = iov[0].iov_len + iov[1].iov_len + iov[2].iov_len;
657
658 hdr->serial = htonl(atomic_inc_return(&conn->serial));
659 _proto("Tx RESPONSE %%%u", ntohl(hdr->serial));
660
661 ret = kernel_sendmsg(conn->trans->local->socket, &msg, iov, 3, len);
662 if (ret < 0) {
663 _debug("sendmsg failed: %d", ret);
664 return -EAGAIN;
665 }
666
667 _leave(" = 0");
668 return 0;
669}
670
671/*
672 * calculate the response checksum
673 */
674static void rxkad_calc_response_checksum(struct rxkad_response *response)
675{
676 u32 csum = 1000003;
677 int loop;
678 u8 *p = (u8 *) response;
679
680 for (loop = sizeof(*response); loop > 0; loop--)
681 csum = csum * 0x10204081 + *p++;
682
683 response->encrypted.checksum = htonl(csum);
684}
685
686/*
687 * load a scatterlist with a potentially split-page buffer
688 */
689static void rxkad_sg_set_buf2(struct scatterlist sg[2],
690 void *buf, size_t buflen)
691{
692
693 memset(sg, 0, sizeof(sg));
694
695 sg_set_buf(&sg[0], buf, buflen);
696 if (sg[0].offset + buflen > PAGE_SIZE) {
697 /* the buffer was split over two pages */
698 sg[0].length = PAGE_SIZE - sg[0].offset;
699 sg_set_buf(&sg[1], buf + sg[0].length, buflen - sg[0].length);
700 }
701
702 ASSERTCMP(sg[0].length + sg[1].length, ==, buflen);
703}
704
705/*
706 * encrypt the response packet
707 */
708static void rxkad_encrypt_response(struct rxrpc_connection *conn,
709 struct rxkad_response *resp,
710 const struct rxkad_key *s2)
711{
712 struct blkcipher_desc desc;
713 struct rxrpc_crypt iv;
714 struct scatterlist ssg[2], dsg[2];
715
716 /* continue encrypting from where we left off */
717 memcpy(&iv, s2->session_key, sizeof(iv));
718 desc.tfm = conn->cipher;
719 desc.info = iv.x;
720 desc.flags = 0;
721
722 rxkad_sg_set_buf2(ssg, &resp->encrypted, sizeof(resp->encrypted));
723 memcpy(dsg, ssg, sizeof(dsg));
724 crypto_blkcipher_encrypt_iv(&desc, dsg, ssg, sizeof(resp->encrypted));
725}
726
727/*
728 * respond to a challenge packet
729 */
730static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
731 struct sk_buff *skb,
732 u32 *_abort_code)
733{
734 const struct rxrpc_key_payload *payload;
735 struct rxkad_challenge challenge;
736 struct rxkad_response resp
737 __attribute__((aligned(8))); /* must be aligned for crypto */
738 struct rxrpc_skb_priv *sp;
739 u32 version, nonce, min_level, abort_code;
740 int ret;
741
742 _enter("{%d,%x}", conn->debug_id, key_serial(conn->key));
743
744 if (!conn->key) {
745 _leave(" = -EPROTO [no key]");
746 return -EPROTO;
747 }
748
749 ret = key_validate(conn->key);
750 if (ret < 0) {
751 *_abort_code = RXKADEXPIRED;
752 return ret;
753 }
754
755 abort_code = RXKADPACKETSHORT;
756 sp = rxrpc_skb(skb);
757 if (skb_copy_bits(skb, 0, &challenge, sizeof(challenge)) < 0)
758 goto protocol_error;
759
760 version = ntohl(challenge.version);
761 nonce = ntohl(challenge.nonce);
762 min_level = ntohl(challenge.min_level);
763
764 _proto("Rx CHALLENGE %%%u { v=%u n=%u ml=%u }",
765 ntohl(sp->hdr.serial), version, nonce, min_level);
766
767 abort_code = RXKADINCONSISTENCY;
768 if (version != RXKAD_VERSION)
769 goto protocol_error;
770
771 abort_code = RXKADLEVELFAIL;
772 if (conn->security_level < min_level)
773 goto protocol_error;
774
775 payload = conn->key->payload.data;
776
777 /* build the response packet */
778 memset(&resp, 0, sizeof(resp));
779
780 resp.version = RXKAD_VERSION;
781 resp.encrypted.epoch = conn->epoch;
782 resp.encrypted.cid = conn->cid;
783 resp.encrypted.securityIndex = htonl(conn->security_ix);
784 resp.encrypted.call_id[0] =
785 (conn->channels[0] ? conn->channels[0]->call_id : 0);
786 resp.encrypted.call_id[1] =
787 (conn->channels[1] ? conn->channels[1]->call_id : 0);
788 resp.encrypted.call_id[2] =
789 (conn->channels[2] ? conn->channels[2]->call_id : 0);
790 resp.encrypted.call_id[3] =
791 (conn->channels[3] ? conn->channels[3]->call_id : 0);
792 resp.encrypted.inc_nonce = htonl(nonce + 1);
793 resp.encrypted.level = htonl(conn->security_level);
794 resp.kvno = htonl(payload->k.kvno);
795 resp.ticket_len = htonl(payload->k.ticket_len);
796
797 /* calculate the response checksum and then do the encryption */
798 rxkad_calc_response_checksum(&resp);
799 rxkad_encrypt_response(conn, &resp, &payload->k);
800 return rxkad_send_response(conn, &sp->hdr, &resp, &payload->k);
801
802protocol_error:
803 *_abort_code = abort_code;
804 _leave(" = -EPROTO [%d]", abort_code);
805 return -EPROTO;
806}
807
808/*
809 * decrypt the kerberos IV ticket in the response
810 */
811static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
812 void *ticket, size_t ticket_len,
813 struct rxrpc_crypt *_session_key,
814 time_t *_expiry,
815 u32 *_abort_code)
816{
817 struct blkcipher_desc desc;
818 struct rxrpc_crypt iv, key;
819 struct scatterlist ssg[1], dsg[1];
820 struct in_addr addr;
821 unsigned life;
822 time_t issue, now;
823 bool little_endian;
824 int ret;
825 u8 *p, *q, *name, *end;
826
827 _enter("{%d},{%x}", conn->debug_id, key_serial(conn->server_key));
828
829 *_expiry = 0;
830
831 ret = key_validate(conn->server_key);
832 if (ret < 0) {
833 switch (ret) {
834 case -EKEYEXPIRED:
835 *_abort_code = RXKADEXPIRED;
836 goto error;
837 default:
838 *_abort_code = RXKADNOAUTH;
839 goto error;
840 }
841 }
842
843 ASSERT(conn->server_key->payload.data != NULL);
844 ASSERTCMP((unsigned long) ticket & 7UL, ==, 0);
845
846 memcpy(&iv, &conn->server_key->type_data, sizeof(iv));
847
848 desc.tfm = conn->server_key->payload.data;
849 desc.info = iv.x;
850 desc.flags = 0;
851
852 sg_init_one(&ssg[0], ticket, ticket_len);
853 memcpy(dsg, ssg, sizeof(dsg));
854 crypto_blkcipher_decrypt_iv(&desc, dsg, ssg, ticket_len);
855
856 p = ticket;
857 end = p + ticket_len;
858
859#define Z(size) \
860 ({ \
861 u8 *__str = p; \
862 q = memchr(p, 0, end - p); \
863 if (!q || q - p > (size)) \
864 goto bad_ticket; \
865 for (; p < q; p++) \
866 if (!isprint(*p)) \
867 goto bad_ticket; \
868 p++; \
869 __str; \
870 })
871
872 /* extract the ticket flags */
873 _debug("KIV FLAGS: %x", *p);
874 little_endian = *p & 1;
875 p++;
876
877 /* extract the authentication name */
878 name = Z(ANAME_SZ);
879 _debug("KIV ANAME: %s", name);
880
881 /* extract the principal's instance */
882 name = Z(INST_SZ);
883 _debug("KIV INST : %s", name);
884
885 /* extract the principal's authentication domain */
886 name = Z(REALM_SZ);
887 _debug("KIV REALM: %s", name);
888
889 if (end - p < 4 + 8 + 4 + 2)
890 goto bad_ticket;
891
892 /* get the IPv4 address of the entity that requested the ticket */
893 memcpy(&addr, p, sizeof(addr));
894 p += 4;
895 _debug("KIV ADDR : "NIPQUAD_FMT, NIPQUAD(addr));
896
897 /* get the session key from the ticket */
898 memcpy(&key, p, sizeof(key));
899 p += 8;
900 _debug("KIV KEY : %08x %08x", ntohl(key.n[0]), ntohl(key.n[1]));
901 memcpy(_session_key, &key, sizeof(key));
902
903 /* get the ticket's lifetime */
904 life = *p++ * 5 * 60;
905 _debug("KIV LIFE : %u", life);
906
907 /* get the issue time of the ticket */
908 if (little_endian) {
909 __le32 stamp;
910 memcpy(&stamp, p, 4);
911 issue = le32_to_cpu(stamp);
912 } else {
913 __be32 stamp;
914 memcpy(&stamp, p, 4);
915 issue = be32_to_cpu(stamp);
916 }
917 p += 4;
918 now = xtime.tv_sec;
919 _debug("KIV ISSUE: %lx [%lx]", issue, now);
920
921 /* check the ticket is in date */
922 if (issue > now) {
923 *_abort_code = RXKADNOAUTH;
924 ret = -EKEYREJECTED;
925 goto error;
926 }
927
928 if (issue < now - life) {
929 *_abort_code = RXKADEXPIRED;
930 ret = -EKEYEXPIRED;
931 goto error;
932 }
933
934 *_expiry = issue + life;
935
936 /* get the service name */
937 name = Z(SNAME_SZ);
938 _debug("KIV SNAME: %s", name);
939
940 /* get the service instance name */
941 name = Z(INST_SZ);
942 _debug("KIV SINST: %s", name);
943
944 ret = 0;
945error:
946 _leave(" = %d", ret);
947 return ret;
948
949bad_ticket:
950 *_abort_code = RXKADBADTICKET;
951 ret = -EBADMSG;
952 goto error;
953}
954
955/*
956 * decrypt the response packet
957 */
958static void rxkad_decrypt_response(struct rxrpc_connection *conn,
959 struct rxkad_response *resp,
960 const struct rxrpc_crypt *session_key)
961{
962 struct blkcipher_desc desc;
963 struct scatterlist ssg[2], dsg[2];
964 struct rxrpc_crypt iv;
965
966 _enter(",,%08x%08x",
967 ntohl(session_key->n[0]), ntohl(session_key->n[1]));
968
969 ASSERT(rxkad_ci != NULL);
970
971 mutex_lock(&rxkad_ci_mutex);
972 if (crypto_blkcipher_setkey(rxkad_ci, session_key->x,
973 sizeof(*session_key)) < 0)
974 BUG();
975
976 memcpy(&iv, session_key, sizeof(iv));
977 desc.tfm = rxkad_ci;
978 desc.info = iv.x;
979 desc.flags = 0;
980
981 rxkad_sg_set_buf2(ssg, &resp->encrypted, sizeof(resp->encrypted));
982 memcpy(dsg, ssg, sizeof(dsg));
983 crypto_blkcipher_decrypt_iv(&desc, dsg, ssg, sizeof(resp->encrypted));
984 mutex_unlock(&rxkad_ci_mutex);
985
986 _leave("");
987}
988
989/*
990 * verify a response
991 */
992static int rxkad_verify_response(struct rxrpc_connection *conn,
993 struct sk_buff *skb,
994 u32 *_abort_code)
995{
996 struct rxkad_response response
997 __attribute__((aligned(8))); /* must be aligned for crypto */
998 struct rxrpc_skb_priv *sp;
999 struct rxrpc_crypt session_key;
1000 time_t expiry;
1001 void *ticket;
1002 u32 abort_code, version, kvno, ticket_len, csum, level;
1003 int ret;
1004
1005 _enter("{%d,%x}", conn->debug_id, key_serial(conn->server_key));
1006
1007 abort_code = RXKADPACKETSHORT;
1008 if (skb_copy_bits(skb, 0, &response, sizeof(response)) < 0)
1009 goto protocol_error;
1010 if (!pskb_pull(skb, sizeof(response)))
1011 BUG();
1012
1013 version = ntohl(response.version);
1014 ticket_len = ntohl(response.ticket_len);
1015 kvno = ntohl(response.kvno);
1016 sp = rxrpc_skb(skb);
1017 _proto("Rx RESPONSE %%%u { v=%u kv=%u tl=%u }",
1018 ntohl(sp->hdr.serial), version, kvno, ticket_len);
1019
1020 abort_code = RXKADINCONSISTENCY;
1021 if (version != RXKAD_VERSION)
1022
1023 abort_code = RXKADTICKETLEN;
1024 if (ticket_len < 4 || ticket_len > MAXKRB5TICKETLEN)
1025 goto protocol_error;
1026
1027 abort_code = RXKADUNKNOWNKEY;
1028 if (kvno >= RXKAD_TKT_TYPE_KERBEROS_V5)
1029 goto protocol_error;
1030
1031 /* extract the kerberos ticket and decrypt and decode it */
1032 ticket = kmalloc(ticket_len, GFP_NOFS);
1033 if (!ticket)
1034 return -ENOMEM;
1035
1036 abort_code = RXKADPACKETSHORT;
1037 if (skb_copy_bits(skb, 0, ticket, ticket_len) < 0)
1038 goto protocol_error_free;
1039
1040 ret = rxkad_decrypt_ticket(conn, ticket, ticket_len, &session_key,
1041 &expiry, &abort_code);
1042 if (ret < 0) {
1043 *_abort_code = abort_code;
1044 kfree(ticket);
1045 return ret;
1046 }
1047
1048 /* use the session key from inside the ticket to decrypt the
1049 * response */
1050 rxkad_decrypt_response(conn, &response, &session_key);
1051
1052 abort_code = RXKADSEALEDINCON;
1053 if (response.encrypted.epoch != conn->epoch)
1054 goto protocol_error_free;
1055 if (response.encrypted.cid != conn->cid)
1056 goto protocol_error_free;
1057 if (ntohl(response.encrypted.securityIndex) != conn->security_ix)
1058 goto protocol_error_free;
1059 csum = response.encrypted.checksum;
1060 response.encrypted.checksum = 0;
1061 rxkad_calc_response_checksum(&response);
1062 if (response.encrypted.checksum != csum)
1063 goto protocol_error_free;
1064
1065 if (ntohl(response.encrypted.call_id[0]) > INT_MAX ||
1066 ntohl(response.encrypted.call_id[1]) > INT_MAX ||
1067 ntohl(response.encrypted.call_id[2]) > INT_MAX ||
1068 ntohl(response.encrypted.call_id[3]) > INT_MAX)
1069 goto protocol_error_free;
1070
1071 abort_code = RXKADOUTOFSEQUENCE;
1072 if (response.encrypted.inc_nonce != htonl(conn->security_nonce + 1))
1073 goto protocol_error_free;
1074
1075 abort_code = RXKADLEVELFAIL;
1076 level = ntohl(response.encrypted.level);
1077 if (level > RXRPC_SECURITY_ENCRYPT)
1078 goto protocol_error_free;
1079 conn->security_level = level;
1080
1081 /* create a key to hold the security data and expiration time - after
1082 * this the connection security can be handled in exactly the same way
1083 * as for a client connection */
1084 ret = rxrpc_get_server_data_key(conn, &session_key, expiry, kvno);
1085 if (ret < 0) {
1086 kfree(ticket);
1087 return ret;
1088 }
1089
1090 kfree(ticket);
1091 _leave(" = 0");
1092 return 0;
1093
1094protocol_error_free:
1095 kfree(ticket);
1096protocol_error:
1097 *_abort_code = abort_code;
1098 _leave(" = -EPROTO [%d]", abort_code);
1099 return -EPROTO;
1100}
1101
1102/*
1103 * clear the connection security
1104 */
1105static void rxkad_clear(struct rxrpc_connection *conn)
1106{
1107 _enter("");
1108
1109 if (conn->cipher)
1110 crypto_free_blkcipher(conn->cipher);
1111}
1112
1113/*
1114 * RxRPC Kerberos-based security
1115 */
1116static struct rxrpc_security rxkad = {
1117 .owner = THIS_MODULE,
1118 .name = "rxkad",
1119 .security_index = RXKAD_VERSION,
1120 .init_connection_security = rxkad_init_connection_security,
1121 .prime_packet_security = rxkad_prime_packet_security,
1122 .secure_packet = rxkad_secure_packet,
1123 .verify_packet = rxkad_verify_packet,
1124 .issue_challenge = rxkad_issue_challenge,
1125 .respond_to_challenge = rxkad_respond_to_challenge,
1126 .verify_response = rxkad_verify_response,
1127 .clear = rxkad_clear,
1128};
1129
1130static __init int rxkad_init(void)
1131{
1132 _enter("");
1133
1134 /* pin the cipher we need so that the crypto layer doesn't invoke
1135 * keventd to go get it */
1136 rxkad_ci = crypto_alloc_blkcipher("pcbc(fcrypt)", 0, CRYPTO_ALG_ASYNC);
1137 if (IS_ERR(rxkad_ci))
1138 return PTR_ERR(rxkad_ci);
1139
1140 return rxrpc_register_security(&rxkad);
1141}
1142
1143module_init(rxkad_init);
1144
1145static __exit void rxkad_exit(void)
1146{
1147 _enter("");
1148
1149 rxrpc_unregister_security(&rxkad);
1150 crypto_free_blkcipher(rxkad_ci);
1151}
1152
1153module_exit(rxkad_exit);
diff --git a/net/rxrpc/rxrpc_syms.c b/net/rxrpc/rxrpc_syms.c
deleted file mode 100644
index 9896fd87a4d4..000000000000
--- a/net/rxrpc/rxrpc_syms.c
+++ /dev/null
@@ -1,34 +0,0 @@
1/* rxrpc_syms.c: exported Rx RPC layer interface symbols
2 *
3 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/module.h>
13
14#include <rxrpc/transport.h>
15#include <rxrpc/connection.h>
16#include <rxrpc/call.h>
17#include <rxrpc/krxiod.h>
18
19/* call.c */
20EXPORT_SYMBOL(rxrpc_create_call);
21EXPORT_SYMBOL(rxrpc_put_call);
22EXPORT_SYMBOL(rxrpc_call_abort);
23EXPORT_SYMBOL(rxrpc_call_read_data);
24EXPORT_SYMBOL(rxrpc_call_write_data);
25
26/* connection.c */
27EXPORT_SYMBOL(rxrpc_create_connection);
28EXPORT_SYMBOL(rxrpc_put_connection);
29
30/* transport.c */
31EXPORT_SYMBOL(rxrpc_create_transport);
32EXPORT_SYMBOL(rxrpc_put_transport);
33EXPORT_SYMBOL(rxrpc_add_service);
34EXPORT_SYMBOL(rxrpc_del_service);
diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c
deleted file mode 100644
index 884290754af7..000000000000
--- a/net/rxrpc/sysctl.c
+++ /dev/null
@@ -1,121 +0,0 @@
1/* sysctl.c: Rx RPC control
2 *
3 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/sched.h>
13#include <linux/slab.h>
14#include <linux/module.h>
15#include <linux/sysctl.h>
16#include <rxrpc/types.h>
17#include <rxrpc/rxrpc.h>
18#include <asm/errno.h>
19#include "internal.h"
20
21int rxrpc_ktrace;
22int rxrpc_kdebug;
23int rxrpc_kproto;
24int rxrpc_knet;
25
26#ifdef CONFIG_SYSCTL
27static struct ctl_table_header *rxrpc_sysctl = NULL;
28
29static ctl_table rxrpc_sysctl_table[] = {
30 {
31 .ctl_name = 1,
32 .procname = "kdebug",
33 .data = &rxrpc_kdebug,
34 .maxlen = sizeof(int),
35 .mode = 0644,
36 .proc_handler = &proc_dointvec
37 },
38 {
39 .ctl_name = 2,
40 .procname = "ktrace",
41 .data = &rxrpc_ktrace,
42 .maxlen = sizeof(int),
43 .mode = 0644,
44 .proc_handler = &proc_dointvec
45 },
46 {
47 .ctl_name = 3,
48 .procname = "kproto",
49 .data = &rxrpc_kproto,
50 .maxlen = sizeof(int),
51 .mode = 0644,
52 .proc_handler = &proc_dointvec
53 },
54 {
55 .ctl_name = 4,
56 .procname = "knet",
57 .data = &rxrpc_knet,
58 .maxlen = sizeof(int),
59 .mode = 0644,
60 .proc_handler = &proc_dointvec
61 },
62 {
63 .ctl_name = 5,
64 .procname = "peertimo",
65 .data = &rxrpc_peer_timeout,
66 .maxlen = sizeof(unsigned long),
67 .mode = 0644,
68 .proc_handler = &proc_doulongvec_minmax
69 },
70 {
71 .ctl_name = 6,
72 .procname = "conntimo",
73 .data = &rxrpc_conn_timeout,
74 .maxlen = sizeof(unsigned long),
75 .mode = 0644,
76 .proc_handler = &proc_doulongvec_minmax
77 },
78 { .ctl_name = 0 }
79};
80
81static ctl_table rxrpc_dir_sysctl_table[] = {
82 {
83 .ctl_name = 1,
84 .procname = "rxrpc",
85 .maxlen = 0,
86 .mode = 0555,
87 .child = rxrpc_sysctl_table
88 },
89 { .ctl_name = 0 }
90};
91#endif /* CONFIG_SYSCTL */
92
93/*****************************************************************************/
94/*
95 * initialise the sysctl stuff for Rx RPC
96 */
97int rxrpc_sysctl_init(void)
98{
99#ifdef CONFIG_SYSCTL
100 rxrpc_sysctl = register_sysctl_table(rxrpc_dir_sysctl_table);
101 if (!rxrpc_sysctl)
102 return -ENOMEM;
103#endif /* CONFIG_SYSCTL */
104
105 return 0;
106} /* end rxrpc_sysctl_init() */
107
108/*****************************************************************************/
109/*
110 * clean up the sysctl stuff for Rx RPC
111 */
112void rxrpc_sysctl_cleanup(void)
113{
114#ifdef CONFIG_SYSCTL
115 if (rxrpc_sysctl) {
116 unregister_sysctl_table(rxrpc_sysctl);
117 rxrpc_sysctl = NULL;
118 }
119#endif /* CONFIG_SYSCTL */
120
121} /* end rxrpc_sysctl_cleanup() */
diff --git a/net/rxrpc/transport.c b/net/rxrpc/transport.c
deleted file mode 100644
index 8e57be2df936..000000000000
--- a/net/rxrpc/transport.c
+++ /dev/null
@@ -1,846 +0,0 @@
1/* transport.c: Rx Transport routines
2 *
3 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/slab.h>
13#include <linux/module.h>
14#include <rxrpc/transport.h>
15#include <rxrpc/peer.h>
16#include <rxrpc/connection.h>
17#include <rxrpc/call.h>
18#include <rxrpc/message.h>
19#include <rxrpc/krxiod.h>
20#include <rxrpc/krxsecd.h>
21#include <linux/udp.h>
22#include <linux/in.h>
23#include <linux/in6.h>
24#include <linux/icmp.h>
25#include <linux/skbuff.h>
26#include <net/sock.h>
27#include <net/ip.h>
28#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
29#include <linux/ipv6.h> /* this should _really_ be in errqueue.h.. */
30#endif
31#include <linux/errqueue.h>
32#include <asm/uaccess.h>
33#include "internal.h"
34
35struct errormsg {
36 struct cmsghdr cmsg; /* control message header */
37 struct sock_extended_err ee; /* extended error information */
38 struct sockaddr_in icmp_src; /* ICMP packet source address */
39};
40
41static DEFINE_SPINLOCK(rxrpc_transports_lock);
42static struct list_head rxrpc_transports = LIST_HEAD_INIT(rxrpc_transports);
43
44__RXACCT_DECL(atomic_t rxrpc_transport_count);
45LIST_HEAD(rxrpc_proc_transports);
46DECLARE_RWSEM(rxrpc_proc_transports_sem);
47
48static void rxrpc_data_ready(struct sock *sk, int count);
49static void rxrpc_error_report(struct sock *sk);
50static int rxrpc_trans_receive_new_call(struct rxrpc_transport *trans,
51 struct list_head *msgq);
52static void rxrpc_trans_receive_error_report(struct rxrpc_transport *trans);
53
54/*****************************************************************************/
55/*
56 * create a new transport endpoint using the specified UDP port
57 */
58int rxrpc_create_transport(unsigned short port,
59 struct rxrpc_transport **_trans)
60{
61 struct rxrpc_transport *trans;
62 struct sockaddr_in sin;
63 mm_segment_t oldfs;
64 struct sock *sock;
65 int ret, opt;
66
67 _enter("%hu", port);
68
69 trans = kzalloc(sizeof(struct rxrpc_transport), GFP_KERNEL);
70 if (!trans)
71 return -ENOMEM;
72
73 atomic_set(&trans->usage, 1);
74 INIT_LIST_HEAD(&trans->services);
75 INIT_LIST_HEAD(&trans->link);
76 INIT_LIST_HEAD(&trans->krxiodq_link);
77 spin_lock_init(&trans->lock);
78 INIT_LIST_HEAD(&trans->peer_active);
79 INIT_LIST_HEAD(&trans->peer_graveyard);
80 spin_lock_init(&trans->peer_gylock);
81 init_waitqueue_head(&trans->peer_gy_waitq);
82 rwlock_init(&trans->peer_lock);
83 atomic_set(&trans->peer_count, 0);
84 trans->port = port;
85
86 /* create a UDP socket to be my actual transport endpoint */
87 ret = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &trans->socket);
88 if (ret < 0)
89 goto error;
90
91 /* use the specified port */
92 if (port) {
93 memset(&sin, 0, sizeof(sin));
94 sin.sin_family = AF_INET;
95 sin.sin_port = htons(port);
96 ret = trans->socket->ops->bind(trans->socket,
97 (struct sockaddr *) &sin,
98 sizeof(sin));
99 if (ret < 0)
100 goto error;
101 }
102
103 opt = 1;
104 oldfs = get_fs();
105 set_fs(KERNEL_DS);
106 ret = trans->socket->ops->setsockopt(trans->socket, SOL_IP, IP_RECVERR,
107 (char *) &opt, sizeof(opt));
108 set_fs(oldfs);
109
110 spin_lock(&rxrpc_transports_lock);
111 list_add(&trans->link, &rxrpc_transports);
112 spin_unlock(&rxrpc_transports_lock);
113
114 /* set the socket up */
115 sock = trans->socket->sk;
116 sock->sk_user_data = trans;
117 sock->sk_data_ready = rxrpc_data_ready;
118 sock->sk_error_report = rxrpc_error_report;
119
120 down_write(&rxrpc_proc_transports_sem);
121 list_add_tail(&trans->proc_link, &rxrpc_proc_transports);
122 up_write(&rxrpc_proc_transports_sem);
123
124 __RXACCT(atomic_inc(&rxrpc_transport_count));
125
126 *_trans = trans;
127 _leave(" = 0 (%p)", trans);
128 return 0;
129
130 error:
131 /* finish cleaning up the transport (not really needed here, but...) */
132 if (trans->socket)
133 trans->socket->ops->shutdown(trans->socket, 2);
134
135 /* close the socket */
136 if (trans->socket) {
137 trans->socket->sk->sk_user_data = NULL;
138 sock_release(trans->socket);
139 trans->socket = NULL;
140 }
141
142 kfree(trans);
143
144
145 _leave(" = %d", ret);
146 return ret;
147} /* end rxrpc_create_transport() */
148
149/*****************************************************************************/
150/*
151 * destroy a transport endpoint
152 */
153void rxrpc_put_transport(struct rxrpc_transport *trans)
154{
155 _enter("%p{u=%d p=%hu}",
156 trans, atomic_read(&trans->usage), trans->port);
157
158 BUG_ON(atomic_read(&trans->usage) <= 0);
159
160 /* to prevent a race, the decrement and the dequeue must be
161 * effectively atomic */
162 spin_lock(&rxrpc_transports_lock);
163 if (likely(!atomic_dec_and_test(&trans->usage))) {
164 spin_unlock(&rxrpc_transports_lock);
165 _leave("");
166 return;
167 }
168
169 list_del(&trans->link);
170 spin_unlock(&rxrpc_transports_lock);
171
172 /* finish cleaning up the transport */
173 if (trans->socket)
174 trans->socket->ops->shutdown(trans->socket, 2);
175
176 rxrpc_krxsecd_clear_transport(trans);
177 rxrpc_krxiod_dequeue_transport(trans);
178
179 /* discard all peer information */
180 rxrpc_peer_clearall(trans);
181
182 down_write(&rxrpc_proc_transports_sem);
183 list_del(&trans->proc_link);
184 up_write(&rxrpc_proc_transports_sem);
185 __RXACCT(atomic_dec(&rxrpc_transport_count));
186
187 /* close the socket */
188 if (trans->socket) {
189 trans->socket->sk->sk_user_data = NULL;
190 sock_release(trans->socket);
191 trans->socket = NULL;
192 }
193
194 kfree(trans);
195
196 _leave("");
197} /* end rxrpc_put_transport() */
198
199/*****************************************************************************/
200/*
201 * add a service to a transport to be listened upon
202 */
203int rxrpc_add_service(struct rxrpc_transport *trans,
204 struct rxrpc_service *newsrv)
205{
206 struct rxrpc_service *srv;
207 struct list_head *_p;
208 int ret = -EEXIST;
209
210 _enter("%p{%hu},%p{%hu}",
211 trans, trans->port, newsrv, newsrv->service_id);
212
213 /* verify that the service ID is not already present */
214 spin_lock(&trans->lock);
215
216 list_for_each(_p, &trans->services) {
217 srv = list_entry(_p, struct rxrpc_service, link);
218 if (srv->service_id == newsrv->service_id)
219 goto out;
220 }
221
222 /* okay - add the transport to the list */
223 list_add_tail(&newsrv->link, &trans->services);
224 rxrpc_get_transport(trans);
225 ret = 0;
226
227 out:
228 spin_unlock(&trans->lock);
229
230 _leave("= %d", ret);
231 return ret;
232} /* end rxrpc_add_service() */
233
234/*****************************************************************************/
235/*
236 * remove a service from a transport
237 */
238void rxrpc_del_service(struct rxrpc_transport *trans, struct rxrpc_service *srv)
239{
240 _enter("%p{%hu},%p{%hu}", trans, trans->port, srv, srv->service_id);
241
242 spin_lock(&trans->lock);
243 list_del(&srv->link);
244 spin_unlock(&trans->lock);
245
246 rxrpc_put_transport(trans);
247
248 _leave("");
249} /* end rxrpc_del_service() */
250
251/*****************************************************************************/
252/*
253 * INET callback when data has been received on the socket.
254 */
255static void rxrpc_data_ready(struct sock *sk, int count)
256{
257 struct rxrpc_transport *trans;
258
259 _enter("%p{t=%p},%d", sk, sk->sk_user_data, count);
260
261 /* queue the transport for attention by krxiod */
262 trans = (struct rxrpc_transport *) sk->sk_user_data;
263 if (trans)
264 rxrpc_krxiod_queue_transport(trans);
265
266 /* wake up anyone waiting on the socket */
267 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
268 wake_up_interruptible(sk->sk_sleep);
269
270 _leave("");
271} /* end rxrpc_data_ready() */
272
273/*****************************************************************************/
274/*
275 * INET callback when an ICMP error packet is received
276 * - sk->err is error (EHOSTUNREACH, EPROTO or EMSGSIZE)
277 */
278static void rxrpc_error_report(struct sock *sk)
279{
280 struct rxrpc_transport *trans;
281
282 _enter("%p{t=%p}", sk, sk->sk_user_data);
283
284 /* queue the transport for attention by krxiod */
285 trans = (struct rxrpc_transport *) sk->sk_user_data;
286 if (trans) {
287 trans->error_rcvd = 1;
288 rxrpc_krxiod_queue_transport(trans);
289 }
290
291 /* wake up anyone waiting on the socket */
292 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
293 wake_up_interruptible(sk->sk_sleep);
294
295 _leave("");
296} /* end rxrpc_error_report() */
297
298/*****************************************************************************/
299/*
300 * split a message up, allocating message records and filling them in
301 * from the contents of a socket buffer
302 */
303static int rxrpc_incoming_msg(struct rxrpc_transport *trans,
304 struct sk_buff *pkt,
305 struct list_head *msgq)
306{
307 struct rxrpc_message *msg;
308 int ret;
309
310 _enter("");
311
312 msg = kzalloc(sizeof(struct rxrpc_message), GFP_KERNEL);
313 if (!msg) {
314 _leave(" = -ENOMEM");
315 return -ENOMEM;
316 }
317
318 atomic_set(&msg->usage, 1);
319 list_add_tail(&msg->link,msgq);
320
321 /* dig out the Rx routing parameters */
322 if (skb_copy_bits(pkt, sizeof(struct udphdr),
323 &msg->hdr, sizeof(msg->hdr)) < 0) {
324 ret = -EBADMSG;
325 goto error;
326 }
327
328 msg->trans = trans;
329 msg->state = RXRPC_MSG_RECEIVED;
330 skb_get_timestamp(pkt, &msg->stamp);
331 if (msg->stamp.tv_sec == 0) {
332 do_gettimeofday(&msg->stamp);
333 if (pkt->sk)
334 sock_enable_timestamp(pkt->sk);
335 }
336 msg->seq = ntohl(msg->hdr.seq);
337
338 /* attach the packet */
339 skb_get(pkt);
340 msg->pkt = pkt;
341
342 msg->offset = sizeof(struct udphdr) + sizeof(struct rxrpc_header);
343 msg->dsize = msg->pkt->len - msg->offset;
344
345 _net("Rx Received packet from %s (%08x;%08x,%1x,%d,%s,%02x,%d,%d)",
346 msg->hdr.flags & RXRPC_CLIENT_INITIATED ? "client" : "server",
347 ntohl(msg->hdr.epoch),
348 (ntohl(msg->hdr.cid) & RXRPC_CIDMASK) >> RXRPC_CIDSHIFT,
349 ntohl(msg->hdr.cid) & RXRPC_CHANNELMASK,
350 ntohl(msg->hdr.callNumber),
351 rxrpc_pkts[msg->hdr.type],
352 msg->hdr.flags,
353 ntohs(msg->hdr.serviceId),
354 msg->hdr.securityIndex);
355
356 __RXACCT(atomic_inc(&rxrpc_message_count));
357
358 /* split off jumbo packets */
359 while (msg->hdr.type == RXRPC_PACKET_TYPE_DATA &&
360 msg->hdr.flags & RXRPC_JUMBO_PACKET
361 ) {
362 struct rxrpc_jumbo_header jumbo;
363 struct rxrpc_message *jumbomsg = msg;
364
365 _debug("split jumbo packet");
366
367 /* quick sanity check */
368 ret = -EBADMSG;
369 if (msg->dsize <
370 RXRPC_JUMBO_DATALEN + sizeof(struct rxrpc_jumbo_header))
371 goto error;
372 if (msg->hdr.flags & RXRPC_LAST_PACKET)
373 goto error;
374
375 /* dig out the secondary header */
376 if (skb_copy_bits(pkt, msg->offset + RXRPC_JUMBO_DATALEN,
377 &jumbo, sizeof(jumbo)) < 0)
378 goto error;
379
380 /* allocate a new message record */
381 ret = -ENOMEM;
382 msg = kmemdup(jumbomsg, sizeof(struct rxrpc_message), GFP_KERNEL);
383 if (!msg)
384 goto error;
385
386 list_add_tail(&msg->link, msgq);
387
388 /* adjust the jumbo packet */
389 jumbomsg->dsize = RXRPC_JUMBO_DATALEN;
390
391 /* attach the packet here too */
392 skb_get(pkt);
393
394 /* adjust the parameters */
395 msg->seq++;
396 msg->hdr.seq = htonl(msg->seq);
397 msg->hdr.serial = htonl(ntohl(msg->hdr.serial) + 1);
398 msg->offset += RXRPC_JUMBO_DATALEN +
399 sizeof(struct rxrpc_jumbo_header);
400 msg->dsize -= RXRPC_JUMBO_DATALEN +
401 sizeof(struct rxrpc_jumbo_header);
402 msg->hdr.flags = jumbo.flags;
403 msg->hdr._rsvd = jumbo._rsvd;
404
405 _net("Rx Split jumbo packet from %s"
406 " (%08x;%08x,%1x,%d,%s,%02x,%d,%d)",
407 msg->hdr.flags & RXRPC_CLIENT_INITIATED ? "client" : "server",
408 ntohl(msg->hdr.epoch),
409 (ntohl(msg->hdr.cid) & RXRPC_CIDMASK) >> RXRPC_CIDSHIFT,
410 ntohl(msg->hdr.cid) & RXRPC_CHANNELMASK,
411 ntohl(msg->hdr.callNumber),
412 rxrpc_pkts[msg->hdr.type],
413 msg->hdr.flags,
414 ntohs(msg->hdr.serviceId),
415 msg->hdr.securityIndex);
416
417 __RXACCT(atomic_inc(&rxrpc_message_count));
418 }
419
420 _leave(" = 0 #%d", atomic_read(&rxrpc_message_count));
421 return 0;
422
423 error:
424 while (!list_empty(msgq)) {
425 msg = list_entry(msgq->next, struct rxrpc_message, link);
426 list_del_init(&msg->link);
427
428 rxrpc_put_message(msg);
429 }
430
431 _leave(" = %d", ret);
432 return ret;
433} /* end rxrpc_incoming_msg() */
434
435/*****************************************************************************/
436/*
437 * accept a new call
438 * - called from krxiod in process context
439 */
440void rxrpc_trans_receive_packet(struct rxrpc_transport *trans)
441{
442 struct rxrpc_message *msg;
443 struct rxrpc_peer *peer;
444 struct sk_buff *pkt;
445 int ret;
446 __be32 addr;
447 __be16 port;
448
449 LIST_HEAD(msgq);
450
451 _enter("%p{%d}", trans, trans->port);
452
453 for (;;) {
454 /* deal with outstanting errors first */
455 if (trans->error_rcvd)
456 rxrpc_trans_receive_error_report(trans);
457
458 /* attempt to receive a packet */
459 pkt = skb_recv_datagram(trans->socket->sk, 0, 1, &ret);
460 if (!pkt) {
461 if (ret == -EAGAIN) {
462 _leave(" EAGAIN");
463 return;
464 }
465
466 /* an icmp error may have occurred */
467 rxrpc_krxiod_queue_transport(trans);
468 _leave(" error %d\n", ret);
469 return;
470 }
471
472 /* we'll probably need to checksum it (didn't call
473 * sock_recvmsg) */
474 if (skb_checksum_complete(pkt)) {
475 kfree_skb(pkt);
476 rxrpc_krxiod_queue_transport(trans);
477 _leave(" CSUM failed");
478 return;
479 }
480
481 addr = pkt->nh.iph->saddr;
482 port = pkt->h.uh->source;
483
484 _net("Rx Received UDP packet from %08x:%04hu",
485 ntohl(addr), ntohs(port));
486
487 /* unmarshall the Rx parameters and split jumbo packets */
488 ret = rxrpc_incoming_msg(trans, pkt, &msgq);
489 if (ret < 0) {
490 kfree_skb(pkt);
491 rxrpc_krxiod_queue_transport(trans);
492 _leave(" bad packet");
493 return;
494 }
495
496 BUG_ON(list_empty(&msgq));
497
498 msg = list_entry(msgq.next, struct rxrpc_message, link);
499
500 /* locate the record for the peer from which it
501 * originated */
502 ret = rxrpc_peer_lookup(trans, addr, &peer);
503 if (ret < 0) {
504 kdebug("Rx No connections from that peer");
505 rxrpc_trans_immediate_abort(trans, msg, -EINVAL);
506 goto finished_msg;
507 }
508
509 /* try and find a matching connection */
510 ret = rxrpc_connection_lookup(peer, msg, &msg->conn);
511 if (ret < 0) {
512 kdebug("Rx Unknown Connection");
513 rxrpc_trans_immediate_abort(trans, msg, -EINVAL);
514 rxrpc_put_peer(peer);
515 goto finished_msg;
516 }
517 rxrpc_put_peer(peer);
518
519 /* deal with the first packet of a new call */
520 if (msg->hdr.flags & RXRPC_CLIENT_INITIATED &&
521 msg->hdr.type == RXRPC_PACKET_TYPE_DATA &&
522 ntohl(msg->hdr.seq) == 1
523 ) {
524 _debug("Rx New server call");
525 rxrpc_trans_receive_new_call(trans, &msgq);
526 goto finished_msg;
527 }
528
529 /* deal with subsequent packet(s) of call */
530 _debug("Rx Call packet");
531 while (!list_empty(&msgq)) {
532 msg = list_entry(msgq.next, struct rxrpc_message, link);
533 list_del_init(&msg->link);
534
535 ret = rxrpc_conn_receive_call_packet(msg->conn, NULL, msg);
536 if (ret < 0) {
537 rxrpc_trans_immediate_abort(trans, msg, ret);
538 rxrpc_put_message(msg);
539 goto finished_msg;
540 }
541
542 rxrpc_put_message(msg);
543 }
544
545 goto finished_msg;
546
547 /* dispose of the packets */
548 finished_msg:
549 while (!list_empty(&msgq)) {
550 msg = list_entry(msgq.next, struct rxrpc_message, link);
551 list_del_init(&msg->link);
552
553 rxrpc_put_message(msg);
554 }
555 kfree_skb(pkt);
556 }
557
558 _leave("");
559
560} /* end rxrpc_trans_receive_packet() */
561
562/*****************************************************************************/
563/*
564 * accept a new call from a client trying to connect to one of my services
565 * - called in process context
566 */
567static int rxrpc_trans_receive_new_call(struct rxrpc_transport *trans,
568 struct list_head *msgq)
569{
570 struct rxrpc_message *msg;
571
572 _enter("");
573
574 /* only bother with the first packet */
575 msg = list_entry(msgq->next, struct rxrpc_message, link);
576 list_del_init(&msg->link);
577 rxrpc_krxsecd_queue_incoming_call(msg);
578 rxrpc_put_message(msg);
579
580 _leave(" = 0");
581
582 return 0;
583} /* end rxrpc_trans_receive_new_call() */
584
585/*****************************************************************************/
586/*
587 * perform an immediate abort without connection or call structures
588 */
589int rxrpc_trans_immediate_abort(struct rxrpc_transport *trans,
590 struct rxrpc_message *msg,
591 int error)
592{
593 struct rxrpc_header ahdr;
594 struct sockaddr_in sin;
595 struct msghdr msghdr;
596 struct kvec iov[2];
597 __be32 _error;
598 int len, ret;
599
600 _enter("%p,%p,%d", trans, msg, error);
601
602 /* don't abort an abort packet */
603 if (msg->hdr.type == RXRPC_PACKET_TYPE_ABORT) {
604 _leave(" = 0");
605 return 0;
606 }
607
608 _error = htonl(-error);
609
610 /* set up the message to be transmitted */
611 memcpy(&ahdr, &msg->hdr, sizeof(ahdr));
612 ahdr.epoch = msg->hdr.epoch;
613 ahdr.serial = htonl(1);
614 ahdr.seq = 0;
615 ahdr.type = RXRPC_PACKET_TYPE_ABORT;
616 ahdr.flags = RXRPC_LAST_PACKET;
617 ahdr.flags |= ~msg->hdr.flags & RXRPC_CLIENT_INITIATED;
618
619 iov[0].iov_len = sizeof(ahdr);
620 iov[0].iov_base = &ahdr;
621 iov[1].iov_len = sizeof(_error);
622 iov[1].iov_base = &_error;
623
624 len = sizeof(ahdr) + sizeof(_error);
625
626 memset(&sin,0,sizeof(sin));
627 sin.sin_family = AF_INET;
628 sin.sin_port = msg->pkt->h.uh->source;
629 sin.sin_addr.s_addr = msg->pkt->nh.iph->saddr;
630
631 msghdr.msg_name = &sin;
632 msghdr.msg_namelen = sizeof(sin);
633 msghdr.msg_control = NULL;
634 msghdr.msg_controllen = 0;
635 msghdr.msg_flags = MSG_DONTWAIT;
636
637 _net("Sending message type %d of %d bytes to %08x:%d",
638 ahdr.type,
639 len,
640 ntohl(sin.sin_addr.s_addr),
641 ntohs(sin.sin_port));
642
643 /* send the message */
644 ret = kernel_sendmsg(trans->socket, &msghdr, iov, 2, len);
645
646 _leave(" = %d", ret);
647 return ret;
648} /* end rxrpc_trans_immediate_abort() */
649
650/*****************************************************************************/
651/*
652 * receive an ICMP error report and percolate it to all connections
653 * heading to the affected host or port
654 */
655static void rxrpc_trans_receive_error_report(struct rxrpc_transport *trans)
656{
657 struct rxrpc_connection *conn;
658 struct sockaddr_in sin;
659 struct rxrpc_peer *peer;
660 struct list_head connq, *_p;
661 struct errormsg emsg;
662 struct msghdr msg;
663 __be16 port;
664 int local, err;
665
666 _enter("%p", trans);
667
668 for (;;) {
669 trans->error_rcvd = 0;
670
671 /* try and receive an error message */
672 msg.msg_name = &sin;
673 msg.msg_namelen = sizeof(sin);
674 msg.msg_control = &emsg;
675 msg.msg_controllen = sizeof(emsg);
676 msg.msg_flags = 0;
677
678 err = kernel_recvmsg(trans->socket, &msg, NULL, 0, 0,
679 MSG_ERRQUEUE | MSG_DONTWAIT | MSG_TRUNC);
680
681 if (err == -EAGAIN) {
682 _leave("");
683 return;
684 }
685
686 if (err < 0) {
687 printk("%s: unable to recv an error report: %d\n",
688 __FUNCTION__, err);
689 _leave("");
690 return;
691 }
692
693 msg.msg_controllen = (char *) msg.msg_control - (char *) &emsg;
694
695 if (msg.msg_controllen < sizeof(emsg.cmsg) ||
696 msg.msg_namelen < sizeof(sin)) {
697 printk("%s: short control message"
698 " (nlen=%u clen=%Zu fl=%x)\n",
699 __FUNCTION__,
700 msg.msg_namelen,
701 msg.msg_controllen,
702 msg.msg_flags);
703 continue;
704 }
705
706 _net("Rx Received control message"
707 " { len=%Zu level=%u type=%u }",
708 emsg.cmsg.cmsg_len,
709 emsg.cmsg.cmsg_level,
710 emsg.cmsg.cmsg_type);
711
712 if (sin.sin_family != AF_INET) {
713 printk("Rx Ignoring error report with non-INET address"
714 " (fam=%u)",
715 sin.sin_family);
716 continue;
717 }
718
719 _net("Rx Received message pertaining to host addr=%x port=%hu",
720 ntohl(sin.sin_addr.s_addr), ntohs(sin.sin_port));
721
722 if (emsg.cmsg.cmsg_level != SOL_IP ||
723 emsg.cmsg.cmsg_type != IP_RECVERR) {
724 printk("Rx Ignoring unknown error report"
725 " { level=%u type=%u }",
726 emsg.cmsg.cmsg_level,
727 emsg.cmsg.cmsg_type);
728 continue;
729 }
730
731 if (msg.msg_controllen < sizeof(emsg.cmsg) + sizeof(emsg.ee)) {
732 printk("%s: short error message (%Zu)\n",
733 __FUNCTION__, msg.msg_controllen);
734 _leave("");
735 return;
736 }
737
738 port = sin.sin_port;
739
740 switch (emsg.ee.ee_origin) {
741 case SO_EE_ORIGIN_ICMP:
742 local = 0;
743 switch (emsg.ee.ee_type) {
744 case ICMP_DEST_UNREACH:
745 switch (emsg.ee.ee_code) {
746 case ICMP_NET_UNREACH:
747 _net("Rx Received ICMP Network Unreachable");
748 port = 0;
749 err = -ENETUNREACH;
750 break;
751 case ICMP_HOST_UNREACH:
752 _net("Rx Received ICMP Host Unreachable");
753 port = 0;
754 err = -EHOSTUNREACH;
755 break;
756 case ICMP_PORT_UNREACH:
757 _net("Rx Received ICMP Port Unreachable");
758 err = -ECONNREFUSED;
759 break;
760 case ICMP_NET_UNKNOWN:
761 _net("Rx Received ICMP Unknown Network");
762 port = 0;
763 err = -ENETUNREACH;
764 break;
765 case ICMP_HOST_UNKNOWN:
766 _net("Rx Received ICMP Unknown Host");
767 port = 0;
768 err = -EHOSTUNREACH;
769 break;
770 default:
771 _net("Rx Received ICMP DestUnreach { code=%u }",
772 emsg.ee.ee_code);
773 err = emsg.ee.ee_errno;
774 break;
775 }
776 break;
777
778 case ICMP_TIME_EXCEEDED:
779 _net("Rx Received ICMP TTL Exceeded");
780 err = emsg.ee.ee_errno;
781 break;
782
783 default:
784 _proto("Rx Received ICMP error { type=%u code=%u }",
785 emsg.ee.ee_type, emsg.ee.ee_code);
786 err = emsg.ee.ee_errno;
787 break;
788 }
789 break;
790
791 case SO_EE_ORIGIN_LOCAL:
792 _proto("Rx Received local error { error=%d }",
793 emsg.ee.ee_errno);
794 local = 1;
795 err = emsg.ee.ee_errno;
796 break;
797
798 case SO_EE_ORIGIN_NONE:
799 case SO_EE_ORIGIN_ICMP6:
800 default:
801 _proto("Rx Received error report { orig=%u }",
802 emsg.ee.ee_origin);
803 local = 0;
804 err = emsg.ee.ee_errno;
805 break;
806 }
807
808 /* find all the connections between this transport and the
809 * affected destination */
810 INIT_LIST_HEAD(&connq);
811
812 if (rxrpc_peer_lookup(trans, sin.sin_addr.s_addr,
813 &peer) == 0) {
814 read_lock(&peer->conn_lock);
815 list_for_each(_p, &peer->conn_active) {
816 conn = list_entry(_p, struct rxrpc_connection,
817 link);
818 if (port && conn->addr.sin_port != port)
819 continue;
820 if (!list_empty(&conn->err_link))
821 continue;
822
823 rxrpc_get_connection(conn);
824 list_add_tail(&conn->err_link, &connq);
825 }
826 read_unlock(&peer->conn_lock);
827
828 /* service all those connections */
829 while (!list_empty(&connq)) {
830 conn = list_entry(connq.next,
831 struct rxrpc_connection,
832 err_link);
833 list_del(&conn->err_link);
834
835 rxrpc_conn_handle_error(conn, local, err);
836
837 rxrpc_put_connection(conn);
838 }
839
840 rxrpc_put_peer(peer);
841 }
842 }
843
844 _leave("");
845 return;
846} /* end rxrpc_trans_receive_error_report() */
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index f4544dd86476..475df8449be9 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -46,62 +46,6 @@ config NET_SCH_FIFO
46 46
47if NET_SCHED 47if NET_SCHED
48 48
49choice
50 prompt "Packet scheduler clock source"
51 default NET_SCH_CLK_GETTIMEOFDAY
52 ---help---
53 Packet schedulers need a monotonic clock that increments at a static
54 rate. The kernel provides several suitable interfaces, each with
55 different properties:
56
57 - high resolution (us or better)
58 - fast to read (minimal locking, no i/o access)
59 - synchronized on all processors
60 - handles cpu clock frequency changes
61
62 but nothing provides all of the above.
63
64config NET_SCH_CLK_JIFFIES
65 bool "Timer interrupt"
66 ---help---
67 Say Y here if you want to use the timer interrupt (jiffies) as clock
68 source. This clock source is fast, synchronized on all processors and
69 handles cpu clock frequency changes, but its resolution is too low
70 for accurate shaping except at very low speed.
71
72config NET_SCH_CLK_GETTIMEOFDAY
73 bool "gettimeofday"
74 ---help---
75 Say Y here if you want to use gettimeofday as clock source. This clock
76 source has high resolution, is synchronized on all processors and
77 handles cpu clock frequency changes, but it is slow.
78
79 Choose this if you need a high resolution clock source but can't use
80 the CPU's cycle counter.
81
82# don't allow on SMP x86 because they can have unsynchronized TSCs.
83# gettimeofday is a good alternative
84config NET_SCH_CLK_CPU
85 bool "CPU cycle counter"
86 depends on ((X86_TSC || X86_64) && !SMP) || ALPHA || SPARC64 || PPC64 || IA64
87 ---help---
88 Say Y here if you want to use the CPU's cycle counter as clock source.
89 This is a cheap and high resolution clock source, but on some
90 architectures it is not synchronized on all processors and doesn't
91 handle cpu clock frequency changes.
92
93 The useable cycle counters are:
94
95 x86/x86_64 - Timestamp Counter
96 alpha - Cycle Counter
97 sparc64 - %ticks register
98 ppc64 - Time base
99 ia64 - Interval Time Counter
100
101 Choose this if your CPU's cycle counter is working properly.
102
103endchoice
104
105comment "Queueing/Scheduling" 49comment "Queueing/Scheduling"
106 50
107config NET_SCH_CBQ 51config NET_SCH_CBQ
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index cb21617a5670..711dd26c95c3 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -25,12 +25,12 @@
25#include <linux/interrupt.h> 25#include <linux/interrupt.h>
26#include <linux/netdevice.h> 26#include <linux/netdevice.h>
27#include <linux/skbuff.h> 27#include <linux/skbuff.h>
28#include <linux/rtnetlink.h>
29#include <linux/init.h> 28#include <linux/init.h>
30#include <linux/kmod.h> 29#include <linux/kmod.h>
31#include <net/sock.h> 30#include <net/sock.h>
32#include <net/sch_generic.h> 31#include <net/sch_generic.h>
33#include <net/act_api.h> 32#include <net/act_api.h>
33#include <net/netlink.h>
34 34
35void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo) 35void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo)
36{ 36{
@@ -93,15 +93,15 @@ static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb,
93 continue; 93 continue;
94 a->priv = p; 94 a->priv = p;
95 a->order = n_i; 95 a->order = n_i;
96 r = (struct rtattr*) skb->tail; 96 r = (struct rtattr *)skb_tail_pointer(skb);
97 RTA_PUT(skb, a->order, 0, NULL); 97 RTA_PUT(skb, a->order, 0, NULL);
98 err = tcf_action_dump_1(skb, a, 0, 0); 98 err = tcf_action_dump_1(skb, a, 0, 0);
99 if (err < 0) { 99 if (err < 0) {
100 index--; 100 index--;
101 skb_trim(skb, (u8*)r - skb->data); 101 nlmsg_trim(skb, r);
102 goto done; 102 goto done;
103 } 103 }
104 r->rta_len = skb->tail - (u8*)r; 104 r->rta_len = skb_tail_pointer(skb) - (u8 *)r;
105 n_i++; 105 n_i++;
106 if (n_i >= TCA_ACT_MAX_PRIO) 106 if (n_i >= TCA_ACT_MAX_PRIO)
107 goto done; 107 goto done;
@@ -114,7 +114,7 @@ done:
114 return n_i; 114 return n_i;
115 115
116rtattr_failure: 116rtattr_failure:
117 skb_trim(skb, (u8*)r - skb->data); 117 nlmsg_trim(skb, r);
118 goto done; 118 goto done;
119} 119}
120 120
@@ -125,7 +125,7 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a,
125 struct rtattr *r ; 125 struct rtattr *r ;
126 int i= 0, n_i = 0; 126 int i= 0, n_i = 0;
127 127
128 r = (struct rtattr*) skb->tail; 128 r = (struct rtattr *)skb_tail_pointer(skb);
129 RTA_PUT(skb, a->order, 0, NULL); 129 RTA_PUT(skb, a->order, 0, NULL);
130 RTA_PUT(skb, TCA_KIND, IFNAMSIZ, a->ops->kind); 130 RTA_PUT(skb, TCA_KIND, IFNAMSIZ, a->ops->kind);
131 for (i = 0; i < (hinfo->hmask + 1); i++) { 131 for (i = 0; i < (hinfo->hmask + 1); i++) {
@@ -140,11 +140,11 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a,
140 } 140 }
141 } 141 }
142 RTA_PUT(skb, TCA_FCNT, 4, &n_i); 142 RTA_PUT(skb, TCA_FCNT, 4, &n_i);
143 r->rta_len = skb->tail - (u8*)r; 143 r->rta_len = skb_tail_pointer(skb) - (u8 *)r;
144 144
145 return n_i; 145 return n_i;
146rtattr_failure: 146rtattr_failure:
147 skb_trim(skb, (u8*)r - skb->data); 147 nlmsg_trim(skb, r);
148 return -EINVAL; 148 return -EINVAL;
149} 149}
150 150
@@ -423,7 +423,7 @@ int
423tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref) 423tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
424{ 424{
425 int err = -EINVAL; 425 int err = -EINVAL;
426 unsigned char *b = skb->tail; 426 unsigned char *b = skb_tail_pointer(skb);
427 struct rtattr *r; 427 struct rtattr *r;
428 428
429 if (a->ops == NULL || a->ops->dump == NULL) 429 if (a->ops == NULL || a->ops->dump == NULL)
@@ -432,15 +432,15 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
432 RTA_PUT(skb, TCA_KIND, IFNAMSIZ, a->ops->kind); 432 RTA_PUT(skb, TCA_KIND, IFNAMSIZ, a->ops->kind);
433 if (tcf_action_copy_stats(skb, a, 0)) 433 if (tcf_action_copy_stats(skb, a, 0))
434 goto rtattr_failure; 434 goto rtattr_failure;
435 r = (struct rtattr*) skb->tail; 435 r = (struct rtattr *)skb_tail_pointer(skb);
436 RTA_PUT(skb, TCA_OPTIONS, 0, NULL); 436 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
437 if ((err = tcf_action_dump_old(skb, a, bind, ref)) > 0) { 437 if ((err = tcf_action_dump_old(skb, a, bind, ref)) > 0) {
438 r->rta_len = skb->tail - (u8*)r; 438 r->rta_len = skb_tail_pointer(skb) - (u8 *)r;
439 return err; 439 return err;
440 } 440 }
441 441
442rtattr_failure: 442rtattr_failure:
443 skb_trim(skb, b - skb->data); 443 nlmsg_trim(skb, b);
444 return -1; 444 return -1;
445} 445}
446 446
@@ -449,17 +449,17 @@ tcf_action_dump(struct sk_buff *skb, struct tc_action *act, int bind, int ref)
449{ 449{
450 struct tc_action *a; 450 struct tc_action *a;
451 int err = -EINVAL; 451 int err = -EINVAL;
452 unsigned char *b = skb->tail; 452 unsigned char *b = skb_tail_pointer(skb);
453 struct rtattr *r ; 453 struct rtattr *r ;
454 454
455 while ((a = act) != NULL) { 455 while ((a = act) != NULL) {
456 r = (struct rtattr*) skb->tail; 456 r = (struct rtattr *)skb_tail_pointer(skb);
457 act = a->next; 457 act = a->next;
458 RTA_PUT(skb, a->order, 0, NULL); 458 RTA_PUT(skb, a->order, 0, NULL);
459 err = tcf_action_dump_1(skb, a, bind, ref); 459 err = tcf_action_dump_1(skb, a, bind, ref);
460 if (err < 0) 460 if (err < 0)
461 goto errout; 461 goto errout;
462 r->rta_len = skb->tail - (u8*)r; 462 r->rta_len = skb_tail_pointer(skb) - (u8 *)r;
463 } 463 }
464 464
465 return 0; 465 return 0;
@@ -467,7 +467,7 @@ tcf_action_dump(struct sk_buff *skb, struct tc_action *act, int bind, int ref)
467rtattr_failure: 467rtattr_failure:
468 err = -EINVAL; 468 err = -EINVAL;
469errout: 469errout:
470 skb_trim(skb, b - skb->data); 470 nlmsg_trim(skb, b);
471 return err; 471 return err;
472} 472}
473 473
@@ -635,7 +635,7 @@ tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 pid, u32 seq,
635{ 635{
636 struct tcamsg *t; 636 struct tcamsg *t;
637 struct nlmsghdr *nlh; 637 struct nlmsghdr *nlh;
638 unsigned char *b = skb->tail; 638 unsigned char *b = skb_tail_pointer(skb);
639 struct rtattr *x; 639 struct rtattr *x;
640 640
641 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*t), flags); 641 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*t), flags);
@@ -645,20 +645,20 @@ tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 pid, u32 seq,
645 t->tca__pad1 = 0; 645 t->tca__pad1 = 0;
646 t->tca__pad2 = 0; 646 t->tca__pad2 = 0;
647 647
648 x = (struct rtattr*) skb->tail; 648 x = (struct rtattr *)skb_tail_pointer(skb);
649 RTA_PUT(skb, TCA_ACT_TAB, 0, NULL); 649 RTA_PUT(skb, TCA_ACT_TAB, 0, NULL);
650 650
651 if (tcf_action_dump(skb, a, bind, ref) < 0) 651 if (tcf_action_dump(skb, a, bind, ref) < 0)
652 goto rtattr_failure; 652 goto rtattr_failure;
653 653
654 x->rta_len = skb->tail - (u8*)x; 654 x->rta_len = skb_tail_pointer(skb) - (u8 *)x;
655 655
656 nlh->nlmsg_len = skb->tail - b; 656 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
657 return skb->len; 657 return skb->len;
658 658
659rtattr_failure: 659rtattr_failure:
660nlmsg_failure: 660nlmsg_failure:
661 skb_trim(skb, b - skb->data); 661 nlmsg_trim(skb, b);
662 return -1; 662 return -1;
663} 663}
664 664
@@ -767,7 +767,7 @@ static int tca_action_flush(struct rtattr *rta, struct nlmsghdr *n, u32 pid)
767 return -ENOBUFS; 767 return -ENOBUFS;
768 } 768 }
769 769
770 b = (unsigned char *)skb->tail; 770 b = skb_tail_pointer(skb);
771 771
772 if (rtattr_parse_nested(tb, TCA_ACT_MAX, rta) < 0) 772 if (rtattr_parse_nested(tb, TCA_ACT_MAX, rta) < 0)
773 goto err_out; 773 goto err_out;
@@ -783,16 +783,16 @@ static int tca_action_flush(struct rtattr *rta, struct nlmsghdr *n, u32 pid)
783 t->tca__pad1 = 0; 783 t->tca__pad1 = 0;
784 t->tca__pad2 = 0; 784 t->tca__pad2 = 0;
785 785
786 x = (struct rtattr *) skb->tail; 786 x = (struct rtattr *)skb_tail_pointer(skb);
787 RTA_PUT(skb, TCA_ACT_TAB, 0, NULL); 787 RTA_PUT(skb, TCA_ACT_TAB, 0, NULL);
788 788
789 err = a->ops->walk(skb, &dcb, RTM_DELACTION, a); 789 err = a->ops->walk(skb, &dcb, RTM_DELACTION, a);
790 if (err < 0) 790 if (err < 0)
791 goto rtattr_failure; 791 goto rtattr_failure;
792 792
793 x->rta_len = skb->tail - (u8 *) x; 793 x->rta_len = skb_tail_pointer(skb) - (u8 *)x;
794 794
795 nlh->nlmsg_len = skb->tail - b; 795 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
796 nlh->nlmsg_flags |= NLM_F_ROOT; 796 nlh->nlmsg_flags |= NLM_F_ROOT;
797 module_put(a->ops->owner); 797 module_put(a->ops->owner);
798 kfree(a); 798 kfree(a);
@@ -884,7 +884,7 @@ static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event,
884 if (!skb) 884 if (!skb)
885 return -ENOBUFS; 885 return -ENOBUFS;
886 886
887 b = (unsigned char *)skb->tail; 887 b = skb_tail_pointer(skb);
888 888
889 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*t), flags); 889 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*t), flags);
890 t = NLMSG_DATA(nlh); 890 t = NLMSG_DATA(nlh);
@@ -892,15 +892,15 @@ static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event,
892 t->tca__pad1 = 0; 892 t->tca__pad1 = 0;
893 t->tca__pad2 = 0; 893 t->tca__pad2 = 0;
894 894
895 x = (struct rtattr*) skb->tail; 895 x = (struct rtattr *)skb_tail_pointer(skb);
896 RTA_PUT(skb, TCA_ACT_TAB, 0, NULL); 896 RTA_PUT(skb, TCA_ACT_TAB, 0, NULL);
897 897
898 if (tcf_action_dump(skb, a, 0, 0) < 0) 898 if (tcf_action_dump(skb, a, 0, 0) < 0)
899 goto rtattr_failure; 899 goto rtattr_failure;
900 900
901 x->rta_len = skb->tail - (u8*)x; 901 x->rta_len = skb_tail_pointer(skb) - (u8 *)x;
902 902
903 nlh->nlmsg_len = skb->tail - b; 903 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
904 NETLINK_CB(skb).dst_group = RTNLGRP_TC; 904 NETLINK_CB(skb).dst_group = RTNLGRP_TC;
905 905
906 err = rtnetlink_send(skb, pid, RTNLGRP_TC, flags&NLM_F_ECHO); 906 err = rtnetlink_send(skb, pid, RTNLGRP_TC, flags&NLM_F_ECHO);
@@ -1015,7 +1015,7 @@ static int
1015tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) 1015tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
1016{ 1016{
1017 struct nlmsghdr *nlh; 1017 struct nlmsghdr *nlh;
1018 unsigned char *b = skb->tail; 1018 unsigned char *b = skb_tail_pointer(skb);
1019 struct rtattr *x; 1019 struct rtattr *x;
1020 struct tc_action_ops *a_o; 1020 struct tc_action_ops *a_o;
1021 struct tc_action a; 1021 struct tc_action a;
@@ -1048,7 +1048,7 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
1048 t->tca__pad1 = 0; 1048 t->tca__pad1 = 0;
1049 t->tca__pad2 = 0; 1049 t->tca__pad2 = 0;
1050 1050
1051 x = (struct rtattr *) skb->tail; 1051 x = (struct rtattr *)skb_tail_pointer(skb);
1052 RTA_PUT(skb, TCA_ACT_TAB, 0, NULL); 1052 RTA_PUT(skb, TCA_ACT_TAB, 0, NULL);
1053 1053
1054 ret = a_o->walk(skb, cb, RTM_GETACTION, &a); 1054 ret = a_o->walk(skb, cb, RTM_GETACTION, &a);
@@ -1056,12 +1056,12 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
1056 goto rtattr_failure; 1056 goto rtattr_failure;
1057 1057
1058 if (ret > 0) { 1058 if (ret > 0) {
1059 x->rta_len = skb->tail - (u8 *) x; 1059 x->rta_len = skb_tail_pointer(skb) - (u8 *)x;
1060 ret = skb->len; 1060 ret = skb->len;
1061 } else 1061 } else
1062 skb_trim(skb, (u8*)x - skb->data); 1062 nlmsg_trim(skb, x);
1063 1063
1064 nlh->nlmsg_len = skb->tail - b; 1064 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1065 if (NETLINK_CB(cb->skb).pid && ret) 1065 if (NETLINK_CB(cb->skb).pid && ret)
1066 nlh->nlmsg_flags |= NLM_F_MULTI; 1066 nlh->nlmsg_flags |= NLM_F_MULTI;
1067 module_put(a_o->owner); 1067 module_put(a_o->owner);
@@ -1070,20 +1070,15 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
1070rtattr_failure: 1070rtattr_failure:
1071nlmsg_failure: 1071nlmsg_failure:
1072 module_put(a_o->owner); 1072 module_put(a_o->owner);
1073 skb_trim(skb, b - skb->data); 1073 nlmsg_trim(skb, b);
1074 return skb->len; 1074 return skb->len;
1075} 1075}
1076 1076
1077static int __init tc_action_init(void) 1077static int __init tc_action_init(void)
1078{ 1078{
1079 struct rtnetlink_link *link_p = rtnetlink_links[PF_UNSPEC]; 1079 rtnl_register(PF_UNSPEC, RTM_NEWACTION, tc_ctl_action, NULL);
1080 1080 rtnl_register(PF_UNSPEC, RTM_DELACTION, tc_ctl_action, NULL);
1081 if (link_p) { 1081 rtnl_register(PF_UNSPEC, RTM_GETACTION, tc_ctl_action, tc_dump_action);
1082 link_p[RTM_NEWACTION-RTM_BASE].doit = tc_ctl_action;
1083 link_p[RTM_DELACTION-RTM_BASE].doit = tc_ctl_action;
1084 link_p[RTM_GETACTION-RTM_BASE].doit = tc_ctl_action;
1085 link_p[RTM_GETACTION-RTM_BASE].dumpit = tc_dump_action;
1086 }
1087 1082
1088 return 0; 1083 return 0;
1089} 1084}
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 87d0faf32867..7517f3791541 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -28,6 +28,7 @@
28#include <linux/module.h> 28#include <linux/module.h>
29#include <linux/init.h> 29#include <linux/init.h>
30#include <linux/proc_fs.h> 30#include <linux/proc_fs.h>
31#include <net/netlink.h>
31#include <net/sock.h> 32#include <net/sock.h>
32#include <net/pkt_sched.h> 33#include <net/pkt_sched.h>
33#include <linux/tc_act/tc_gact.h> 34#include <linux/tc_act/tc_gact.h>
@@ -155,7 +156,7 @@ static int tcf_gact(struct sk_buff *skb, struct tc_action *a, struct tcf_result
155 156
156static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) 157static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
157{ 158{
158 unsigned char *b = skb->tail; 159 unsigned char *b = skb_tail_pointer(skb);
159 struct tc_gact opt; 160 struct tc_gact opt;
160 struct tcf_gact *gact = a->priv; 161 struct tcf_gact *gact = a->priv;
161 struct tcf_t t; 162 struct tcf_t t;
@@ -181,7 +182,7 @@ static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int
181 return skb->len; 182 return skb->len;
182 183
183rtattr_failure: 184rtattr_failure:
184 skb_trim(skb, b - skb->data); 185 nlmsg_trim(skb, b);
185 return -1; 186 return -1;
186} 187}
187 188
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 47f0b1324239..00b05f422d45 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -30,6 +30,7 @@
30#include <linux/init.h> 30#include <linux/init.h>
31#include <linux/proc_fs.h> 31#include <linux/proc_fs.h>
32#include <linux/kmod.h> 32#include <linux/kmod.h>
33#include <net/netlink.h>
33#include <net/sock.h> 34#include <net/sock.h>
34#include <net/pkt_sched.h> 35#include <net/pkt_sched.h>
35#include <linux/tc_act/tc_ipt.h> 36#include <linux/tc_act/tc_ipt.h>
@@ -245,7 +246,7 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a,
245 246
246static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) 247static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
247{ 248{
248 unsigned char *b = skb->tail; 249 unsigned char *b = skb_tail_pointer(skb);
249 struct tcf_ipt *ipt = a->priv; 250 struct tcf_ipt *ipt = a->priv;
250 struct ipt_entry_target *t; 251 struct ipt_entry_target *t;
251 struct tcf_t tm; 252 struct tcf_t tm;
@@ -277,7 +278,7 @@ static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int
277 return skb->len; 278 return skb->len;
278 279
279rtattr_failure: 280rtattr_failure:
280 skb_trim(skb, b - skb->data); 281 nlmsg_trim(skb, b);
281 kfree(t); 282 kfree(t);
282 return -1; 283 return -1;
283} 284}
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 3e93683e9ab3..de21c92faaa2 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -30,6 +30,7 @@
30#include <linux/module.h> 30#include <linux/module.h>
31#include <linux/init.h> 31#include <linux/init.h>
32#include <linux/proc_fs.h> 32#include <linux/proc_fs.h>
33#include <net/netlink.h>
33#include <net/sock.h> 34#include <net/sock.h>
34#include <net/pkt_sched.h> 35#include <net/pkt_sched.h>
35#include <linux/tc_act/tc_mirred.h> 36#include <linux/tc_act/tc_mirred.h>
@@ -206,7 +207,7 @@ bad_mirred:
206 207
207static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) 208static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
208{ 209{
209 unsigned char *b = skb->tail; 210 unsigned char *b = skb_tail_pointer(skb);
210 struct tcf_mirred *m = a->priv; 211 struct tcf_mirred *m = a->priv;
211 struct tc_mirred opt; 212 struct tc_mirred opt;
212 struct tcf_t t; 213 struct tcf_t t;
@@ -225,7 +226,7 @@ static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, i
225 return skb->len; 226 return skb->len;
226 227
227rtattr_failure: 228rtattr_failure:
228 skb_trim(skb, b - skb->data); 229 nlmsg_trim(skb, b);
229 return -1; 230 return -1;
230} 231}
231 232
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 3d6a2fcc9ce4..45b3cda86a21 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -27,6 +27,7 @@
27#include <linux/module.h> 27#include <linux/module.h>
28#include <linux/init.h> 28#include <linux/init.h>
29#include <linux/proc_fs.h> 29#include <linux/proc_fs.h>
30#include <net/netlink.h>
30#include <net/sock.h> 31#include <net/sock.h>
31#include <net/pkt_sched.h> 32#include <net/pkt_sched.h>
32#include <linux/tc_act/tc_pedit.h> 33#include <linux/tc_act/tc_pedit.h>
@@ -136,7 +137,7 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a,
136 } 137 }
137 } 138 }
138 139
139 pptr = skb->nh.raw; 140 pptr = skb_network_header(skb);
140 141
141 spin_lock(&p->tcf_lock); 142 spin_lock(&p->tcf_lock);
142 143
@@ -195,7 +196,7 @@ done:
195static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a, 196static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,
196 int bind, int ref) 197 int bind, int ref)
197{ 198{
198 unsigned char *b = skb->tail; 199 unsigned char *b = skb_tail_pointer(skb);
199 struct tcf_pedit *p = a->priv; 200 struct tcf_pedit *p = a->priv;
200 struct tc_pedit *opt; 201 struct tc_pedit *opt;
201 struct tcf_t t; 202 struct tcf_t t;
@@ -226,7 +227,7 @@ static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,
226 return skb->len; 227 return skb->len;
227 228
228rtattr_failure: 229rtattr_failure:
229 skb_trim(skb, b - skb->data); 230 nlmsg_trim(skb, b);
230 kfree(opt); 231 kfree(opt);
231 return -1; 232 return -1;
232} 233}
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 10a5a5c36f76..616f465f407e 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -30,6 +30,7 @@
30#include <linux/init.h> 30#include <linux/init.h>
31#include <net/sock.h> 31#include <net/sock.h>
32#include <net/act_api.h> 32#include <net/act_api.h>
33#include <net/netlink.h>
33 34
34#define L2T(p,L) ((p)->tcfp_R_tab->data[(L)>>(p)->tcfp_R_tab->rate.cell_log]) 35#define L2T(p,L) ((p)->tcfp_R_tab->data[(L)>>(p)->tcfp_R_tab->rate.cell_log])
35#define L2T_P(p,L) ((p)->tcfp_P_tab->data[(L)>>(p)->tcfp_P_tab->rate.cell_log]) 36#define L2T_P(p,L) ((p)->tcfp_P_tab->data[(L)>>(p)->tcfp_P_tab->rate.cell_log])
@@ -80,7 +81,7 @@ static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *c
80 continue; 81 continue;
81 a->priv = p; 82 a->priv = p;
82 a->order = index; 83 a->order = index;
83 r = (struct rtattr*) skb->tail; 84 r = (struct rtattr *)skb_tail_pointer(skb);
84 RTA_PUT(skb, a->order, 0, NULL); 85 RTA_PUT(skb, a->order, 0, NULL);
85 if (type == RTM_DELACTION) 86 if (type == RTM_DELACTION)
86 err = tcf_action_dump_1(skb, a, 0, 1); 87 err = tcf_action_dump_1(skb, a, 0, 1);
@@ -88,10 +89,10 @@ static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *c
88 err = tcf_action_dump_1(skb, a, 0, 0); 89 err = tcf_action_dump_1(skb, a, 0, 0);
89 if (err < 0) { 90 if (err < 0) {
90 index--; 91 index--;
91 skb_trim(skb, (u8*)r - skb->data); 92 nlmsg_trim(skb, r);
92 goto done; 93 goto done;
93 } 94 }
94 r->rta_len = skb->tail - (u8*)r; 95 r->rta_len = skb_tail_pointer(skb) - (u8 *)r;
95 n_i++; 96 n_i++;
96 } 97 }
97 } 98 }
@@ -102,7 +103,7 @@ done:
102 return n_i; 103 return n_i;
103 104
104rtattr_failure: 105rtattr_failure:
105 skb_trim(skb, (u8*)r - skb->data); 106 nlmsg_trim(skb, r);
106 goto done; 107 goto done;
107} 108}
108#endif 109#endif
@@ -240,7 +241,7 @@ override:
240 if (ret != ACT_P_CREATED) 241 if (ret != ACT_P_CREATED)
241 return ret; 242 return ret;
242 243
243 PSCHED_GET_TIME(police->tcfp_t_c); 244 police->tcfp_t_c = psched_get_time();
244 police->tcf_index = parm->index ? parm->index : 245 police->tcf_index = parm->index ? parm->index :
245 tcf_hash_new_index(&police_idx_gen, &police_hash_info); 246 tcf_hash_new_index(&police_idx_gen, &police_hash_info);
246 h = tcf_hash(police->tcf_index, POL_TAB_MASK); 247 h = tcf_hash(police->tcf_index, POL_TAB_MASK);
@@ -295,10 +296,9 @@ static int tcf_act_police(struct sk_buff *skb, struct tc_action *a,
295 return police->tcfp_result; 296 return police->tcfp_result;
296 } 297 }
297 298
298 PSCHED_GET_TIME(now); 299 now = psched_get_time();
299 300 toks = psched_tdiff_bounded(now, police->tcfp_t_c,
300 toks = PSCHED_TDIFF_SAFE(now, police->tcfp_t_c, 301 police->tcfp_burst);
301 police->tcfp_burst);
302 if (police->tcfp_P_tab) { 302 if (police->tcfp_P_tab) {
303 ptoks = toks + police->tcfp_ptoks; 303 ptoks = toks + police->tcfp_ptoks;
304 if (ptoks > (long)L2T_P(police, police->tcfp_mtu)) 304 if (ptoks > (long)L2T_P(police, police->tcfp_mtu))
@@ -326,7 +326,7 @@ static int tcf_act_police(struct sk_buff *skb, struct tc_action *a,
326static int 326static int
327tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) 327tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
328{ 328{
329 unsigned char *b = skb->tail; 329 unsigned char *b = skb_tail_pointer(skb);
330 struct tcf_police *police = a->priv; 330 struct tcf_police *police = a->priv;
331 struct tc_police opt; 331 struct tc_police opt;
332 332
@@ -355,7 +355,7 @@ tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
355 return skb->len; 355 return skb->len;
356 356
357rtattr_failure: 357rtattr_failure:
358 skb_trim(skb, b - skb->data); 358 nlmsg_trim(skb, b);
359 return -1; 359 return -1;
360} 360}
361 361
@@ -494,7 +494,7 @@ struct tcf_police *tcf_police_locate(struct rtattr *rta, struct rtattr *est)
494 } 494 }
495 if (police->tcfp_P_tab) 495 if (police->tcfp_P_tab)
496 police->tcfp_ptoks = L2T_P(police, police->tcfp_mtu); 496 police->tcfp_ptoks = L2T_P(police, police->tcfp_mtu);
497 PSCHED_GET_TIME(police->tcfp_t_c); 497 police->tcfp_t_c = psched_get_time();
498 police->tcf_index = parm->index ? parm->index : 498 police->tcf_index = parm->index ? parm->index :
499 tcf_police_new_index(); 499 tcf_police_new_index();
500 police->tcf_action = parm->action; 500 police->tcf_action = parm->action;
@@ -542,9 +542,9 @@ int tcf_police(struct sk_buff *skb, struct tcf_police *police)
542 return police->tcfp_result; 542 return police->tcfp_result;
543 } 543 }
544 544
545 PSCHED_GET_TIME(now); 545 now = psched_get_time();
546 toks = PSCHED_TDIFF_SAFE(now, police->tcfp_t_c, 546 toks = psched_tdiff_bounded(now, police->tcfp_t_c,
547 police->tcfp_burst); 547 police->tcfp_burst);
548 if (police->tcfp_P_tab) { 548 if (police->tcfp_P_tab) {
549 ptoks = toks + police->tcfp_ptoks; 549 ptoks = toks + police->tcfp_ptoks;
550 if (ptoks > (long)L2T_P(police, police->tcfp_mtu)) 550 if (ptoks > (long)L2T_P(police, police->tcfp_mtu))
@@ -572,7 +572,7 @@ EXPORT_SYMBOL(tcf_police);
572 572
573int tcf_police_dump(struct sk_buff *skb, struct tcf_police *police) 573int tcf_police_dump(struct sk_buff *skb, struct tcf_police *police)
574{ 574{
575 unsigned char *b = skb->tail; 575 unsigned char *b = skb_tail_pointer(skb);
576 struct tc_police opt; 576 struct tc_police opt;
577 577
578 opt.index = police->tcf_index; 578 opt.index = police->tcf_index;
@@ -598,7 +598,7 @@ int tcf_police_dump(struct sk_buff *skb, struct tcf_police *police)
598 return skb->len; 598 return skb->len;
599 599
600rtattr_failure: 600rtattr_failure:
601 skb_trim(skb, b - skb->data); 601 nlmsg_trim(skb, b);
602 return -1; 602 return -1;
603} 603}
604 604
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index c7971182af07..36e1edad5990 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -16,6 +16,7 @@
16#include <linux/netdevice.h> 16#include <linux/netdevice.h>
17#include <linux/skbuff.h> 17#include <linux/skbuff.h>
18#include <linux/rtnetlink.h> 18#include <linux/rtnetlink.h>
19#include <net/netlink.h>
19#include <net/pkt_sched.h> 20#include <net/pkt_sched.h>
20 21
21#define TCA_ACT_SIMP 22 22#define TCA_ACT_SIMP 22
@@ -155,7 +156,7 @@ static inline int tcf_simp_cleanup(struct tc_action *a, int bind)
155static inline int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a, 156static inline int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a,
156 int bind, int ref) 157 int bind, int ref)
157{ 158{
158 unsigned char *b = skb->tail; 159 unsigned char *b = skb_tail_pointer(skb);
159 struct tcf_defact *d = a->priv; 160 struct tcf_defact *d = a->priv;
160 struct tc_defact opt; 161 struct tc_defact opt;
161 struct tcf_t t; 162 struct tcf_t t;
@@ -173,7 +174,7 @@ static inline int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a,
173 return skb->len; 174 return skb->len;
174 175
175rtattr_failure: 176rtattr_failure:
176 skb_trim(skb, b - skb->data); 177 nlmsg_trim(skb, b);
177 return -1; 178 return -1;
178} 179}
179 180
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 5c6ffdb77d2d..ebf94edf0478 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -29,9 +29,10 @@
29#include <linux/interrupt.h> 29#include <linux/interrupt.h>
30#include <linux/netdevice.h> 30#include <linux/netdevice.h>
31#include <linux/skbuff.h> 31#include <linux/skbuff.h>
32#include <linux/rtnetlink.h>
33#include <linux/init.h> 32#include <linux/init.h>
34#include <linux/kmod.h> 33#include <linux/kmod.h>
34#include <linux/netlink.h>
35#include <net/netlink.h>
35#include <net/sock.h> 36#include <net/sock.h>
36#include <net/pkt_sched.h> 37#include <net/pkt_sched.h>
37#include <net/pkt_cls.h> 38#include <net/pkt_cls.h>
@@ -323,7 +324,7 @@ tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp, unsigned long fh,
323{ 324{
324 struct tcmsg *tcm; 325 struct tcmsg *tcm;
325 struct nlmsghdr *nlh; 326 struct nlmsghdr *nlh;
326 unsigned char *b = skb->tail; 327 unsigned char *b = skb_tail_pointer(skb);
327 328
328 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags); 329 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
329 tcm = NLMSG_DATA(nlh); 330 tcm = NLMSG_DATA(nlh);
@@ -340,12 +341,12 @@ tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp, unsigned long fh,
340 if (tp->ops->dump && tp->ops->dump(tp, fh, skb, tcm) < 0) 341 if (tp->ops->dump && tp->ops->dump(tp, fh, skb, tcm) < 0)
341 goto rtattr_failure; 342 goto rtattr_failure;
342 } 343 }
343 nlh->nlmsg_len = skb->tail - b; 344 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
344 return skb->len; 345 return skb->len;
345 346
346nlmsg_failure: 347nlmsg_failure:
347rtattr_failure: 348rtattr_failure:
348 skb_trim(skb, b - skb->data); 349 nlmsg_trim(skb, b);
349 return -1; 350 return -1;
350} 351}
351 352
@@ -399,7 +400,6 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
399 if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL) 400 if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL)
400 return skb->len; 401 return skb->len;
401 402
402 read_lock(&qdisc_tree_lock);
403 if (!tcm->tcm_parent) 403 if (!tcm->tcm_parent)
404 q = dev->qdisc_sleeping; 404 q = dev->qdisc_sleeping;
405 else 405 else
@@ -456,7 +456,6 @@ errout:
456 if (cl) 456 if (cl)
457 cops->put(q, cl); 457 cops->put(q, cl);
458out: 458out:
459 read_unlock(&qdisc_tree_lock);
460 dev_put(dev); 459 dev_put(dev);
461 return skb->len; 460 return skb->len;
462} 461}
@@ -563,30 +562,30 @@ tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts,
563 * to work with both old and new modes of entering 562 * to work with both old and new modes of entering
564 * tc data even if iproute2 was newer - jhs 563 * tc data even if iproute2 was newer - jhs
565 */ 564 */
566 struct rtattr * p_rta = (struct rtattr*) skb->tail; 565 struct rtattr *p_rta = (struct rtattr *)skb_tail_pointer(skb);
567 566
568 if (exts->action->type != TCA_OLD_COMPAT) { 567 if (exts->action->type != TCA_OLD_COMPAT) {
569 RTA_PUT(skb, map->action, 0, NULL); 568 RTA_PUT(skb, map->action, 0, NULL);
570 if (tcf_action_dump(skb, exts->action, 0, 0) < 0) 569 if (tcf_action_dump(skb, exts->action, 0, 0) < 0)
571 goto rtattr_failure; 570 goto rtattr_failure;
572 p_rta->rta_len = skb->tail - (u8*)p_rta; 571 p_rta->rta_len = skb_tail_pointer(skb) - (u8 *)p_rta;
573 } else if (map->police) { 572 } else if (map->police) {
574 RTA_PUT(skb, map->police, 0, NULL); 573 RTA_PUT(skb, map->police, 0, NULL);
575 if (tcf_action_dump_old(skb, exts->action, 0, 0) < 0) 574 if (tcf_action_dump_old(skb, exts->action, 0, 0) < 0)
576 goto rtattr_failure; 575 goto rtattr_failure;
577 p_rta->rta_len = skb->tail - (u8*)p_rta; 576 p_rta->rta_len = skb_tail_pointer(skb) - (u8 *)p_rta;
578 } 577 }
579 } 578 }
580#elif defined CONFIG_NET_CLS_POLICE 579#elif defined CONFIG_NET_CLS_POLICE
581 if (map->police && exts->police) { 580 if (map->police && exts->police) {
582 struct rtattr * p_rta = (struct rtattr*) skb->tail; 581 struct rtattr *p_rta = (struct rtattr *)skb_tail_pointer(skb);
583 582
584 RTA_PUT(skb, map->police, 0, NULL); 583 RTA_PUT(skb, map->police, 0, NULL);
585 584
586 if (tcf_police_dump(skb, exts->police) < 0) 585 if (tcf_police_dump(skb, exts->police) < 0)
587 goto rtattr_failure; 586 goto rtattr_failure;
588 587
589 p_rta->rta_len = skb->tail - (u8*)p_rta; 588 p_rta->rta_len = skb_tail_pointer(skb) - (u8 *)p_rta;
590 } 589 }
591#endif 590#endif
592 return 0; 591 return 0;
@@ -614,18 +613,11 @@ rtattr_failure: __attribute__ ((unused))
614 613
615static int __init tc_filter_init(void) 614static int __init tc_filter_init(void)
616{ 615{
617 struct rtnetlink_link *link_p = rtnetlink_links[PF_UNSPEC]; 616 rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL);
617 rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_ctl_tfilter, NULL);
618 rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_ctl_tfilter,
619 tc_dump_tfilter);
618 620
619 /* Setup rtnetlink links. It is made here to avoid
620 exporting large number of public symbols.
621 */
622
623 if (link_p) {
624 link_p[RTM_NEWTFILTER-RTM_BASE].doit = tc_ctl_tfilter;
625 link_p[RTM_DELTFILTER-RTM_BASE].doit = tc_ctl_tfilter;
626 link_p[RTM_GETTFILTER-RTM_BASE].doit = tc_ctl_tfilter;
627 link_p[RTM_GETTFILTER-RTM_BASE].dumpit = tc_dump_tfilter;
628 }
629 return 0; 621 return 0;
630} 622}
631 623
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 4a91f082a81d..c885412d79d5 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -17,6 +17,7 @@
17#include <linux/errno.h> 17#include <linux/errno.h>
18#include <linux/rtnetlink.h> 18#include <linux/rtnetlink.h>
19#include <linux/skbuff.h> 19#include <linux/skbuff.h>
20#include <net/netlink.h>
20#include <net/act_api.h> 21#include <net/act_api.h>
21#include <net/pkt_cls.h> 22#include <net/pkt_cls.h>
22 23
@@ -245,7 +246,7 @@ static int basic_dump(struct tcf_proto *tp, unsigned long fh,
245 struct sk_buff *skb, struct tcmsg *t) 246 struct sk_buff *skb, struct tcmsg *t)
246{ 247{
247 struct basic_filter *f = (struct basic_filter *) fh; 248 struct basic_filter *f = (struct basic_filter *) fh;
248 unsigned char *b = skb->tail; 249 unsigned char *b = skb_tail_pointer(skb);
249 struct rtattr *rta; 250 struct rtattr *rta;
250 251
251 if (f == NULL) 252 if (f == NULL)
@@ -263,11 +264,11 @@ static int basic_dump(struct tcf_proto *tp, unsigned long fh,
263 tcf_em_tree_dump(skb, &f->ematches, TCA_BASIC_EMATCHES) < 0) 264 tcf_em_tree_dump(skb, &f->ematches, TCA_BASIC_EMATCHES) < 0)
264 goto rtattr_failure; 265 goto rtattr_failure;
265 266
266 rta->rta_len = (skb->tail - b); 267 rta->rta_len = skb_tail_pointer(skb) - b;
267 return skb->len; 268 return skb->len;
268 269
269rtattr_failure: 270rtattr_failure:
270 skb_trim(skb, b - skb->data); 271 nlmsg_trim(skb, b);
271 return -1; 272 return -1;
272} 273}
273 274
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 5dbb9d451f73..bbec4a0d4dcb 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -38,6 +38,7 @@
38#include <linux/notifier.h> 38#include <linux/notifier.h>
39#include <linux/netfilter.h> 39#include <linux/netfilter.h>
40#include <net/ip.h> 40#include <net/ip.h>
41#include <net/netlink.h>
41#include <net/route.h> 42#include <net/route.h>
42#include <linux/skbuff.h> 43#include <linux/skbuff.h>
43#include <net/sock.h> 44#include <net/sock.h>
@@ -348,7 +349,7 @@ static int fw_dump(struct tcf_proto *tp, unsigned long fh,
348{ 349{
349 struct fw_head *head = (struct fw_head *)tp->root; 350 struct fw_head *head = (struct fw_head *)tp->root;
350 struct fw_filter *f = (struct fw_filter*)fh; 351 struct fw_filter *f = (struct fw_filter*)fh;
351 unsigned char *b = skb->tail; 352 unsigned char *b = skb_tail_pointer(skb);
352 struct rtattr *rta; 353 struct rtattr *rta;
353 354
354 if (f == NULL) 355 if (f == NULL)
@@ -374,7 +375,7 @@ static int fw_dump(struct tcf_proto *tp, unsigned long fh,
374 if (tcf_exts_dump(skb, &f->exts, &fw_ext_map) < 0) 375 if (tcf_exts_dump(skb, &f->exts, &fw_ext_map) < 0)
375 goto rtattr_failure; 376 goto rtattr_failure;
376 377
377 rta->rta_len = skb->tail - b; 378 rta->rta_len = skb_tail_pointer(skb) - b;
378 379
379 if (tcf_exts_dump_stats(skb, &f->exts, &fw_ext_map) < 0) 380 if (tcf_exts_dump_stats(skb, &f->exts, &fw_ext_map) < 0)
380 goto rtattr_failure; 381 goto rtattr_failure;
@@ -382,7 +383,7 @@ static int fw_dump(struct tcf_proto *tp, unsigned long fh,
382 return skb->len; 383 return skb->len;
383 384
384rtattr_failure: 385rtattr_failure:
385 skb_trim(skb, b - skb->data); 386 nlmsg_trim(skb, b);
386 return -1; 387 return -1;
387} 388}
388 389
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index abc47cc48ad0..cc941d0ee3a5 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -28,6 +28,7 @@
28#include <linux/etherdevice.h> 28#include <linux/etherdevice.h>
29#include <linux/notifier.h> 29#include <linux/notifier.h>
30#include <net/ip.h> 30#include <net/ip.h>
31#include <net/netlink.h>
31#include <net/route.h> 32#include <net/route.h>
32#include <linux/skbuff.h> 33#include <linux/skbuff.h>
33#include <net/sock.h> 34#include <net/sock.h>
@@ -88,9 +89,9 @@ static __inline__ int route4_fastmap_hash(u32 id, int iif)
88static inline 89static inline
89void route4_reset_fastmap(struct net_device *dev, struct route4_head *head, u32 id) 90void route4_reset_fastmap(struct net_device *dev, struct route4_head *head, u32 id)
90{ 91{
91 spin_lock_bh(&dev->queue_lock); 92 qdisc_lock_tree(dev);
92 memset(head->fastmap, 0, sizeof(head->fastmap)); 93 memset(head->fastmap, 0, sizeof(head->fastmap));
93 spin_unlock_bh(&dev->queue_lock); 94 qdisc_unlock_tree(dev);
94} 95}
95 96
96static inline void 97static inline void
@@ -562,7 +563,7 @@ static int route4_dump(struct tcf_proto *tp, unsigned long fh,
562 struct sk_buff *skb, struct tcmsg *t) 563 struct sk_buff *skb, struct tcmsg *t)
563{ 564{
564 struct route4_filter *f = (struct route4_filter*)fh; 565 struct route4_filter *f = (struct route4_filter*)fh;
565 unsigned char *b = skb->tail; 566 unsigned char *b = skb_tail_pointer(skb);
566 struct rtattr *rta; 567 struct rtattr *rta;
567 u32 id; 568 u32 id;
568 569
@@ -591,7 +592,7 @@ static int route4_dump(struct tcf_proto *tp, unsigned long fh,
591 if (tcf_exts_dump(skb, &f->exts, &route_ext_map) < 0) 592 if (tcf_exts_dump(skb, &f->exts, &route_ext_map) < 0)
592 goto rtattr_failure; 593 goto rtattr_failure;
593 594
594 rta->rta_len = skb->tail - b; 595 rta->rta_len = skb_tail_pointer(skb) - b;
595 596
596 if (tcf_exts_dump_stats(skb, &f->exts, &route_ext_map) < 0) 597 if (tcf_exts_dump_stats(skb, &f->exts, &route_ext_map) < 0)
597 goto rtattr_failure; 598 goto rtattr_failure;
@@ -599,7 +600,7 @@ static int route4_dump(struct tcf_proto *tp, unsigned long fh,
599 return skb->len; 600 return skb->len;
600 601
601rtattr_failure: 602rtattr_failure:
602 skb_trim(skb, b - skb->data); 603 nlmsg_trim(skb, b);
603 return -1; 604 return -1;
604} 605}
605 606
diff --git a/net/sched/cls_rsvp.c b/net/sched/cls_rsvp.c
index 1d4a1fb17608..0a683c07c648 100644
--- a/net/sched/cls_rsvp.c
+++ b/net/sched/cls_rsvp.c
@@ -31,6 +31,7 @@
31#include <net/route.h> 31#include <net/route.h>
32#include <linux/skbuff.h> 32#include <linux/skbuff.h>
33#include <net/sock.h> 33#include <net/sock.h>
34#include <net/netlink.h>
34#include <net/act_api.h> 35#include <net/act_api.h>
35#include <net/pkt_cls.h> 36#include <net/pkt_cls.h>
36 37
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 7853621a04cc..22f9ede70e8f 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -143,9 +143,9 @@ static int rsvp_classify(struct sk_buff *skb, struct tcf_proto *tp,
143 u8 tunnelid = 0; 143 u8 tunnelid = 0;
144 u8 *xprt; 144 u8 *xprt;
145#if RSVP_DST_LEN == 4 145#if RSVP_DST_LEN == 4
146 struct ipv6hdr *nhptr = skb->nh.ipv6h; 146 struct ipv6hdr *nhptr = ipv6_hdr(skb);
147#else 147#else
148 struct iphdr *nhptr = skb->nh.iph; 148 struct iphdr *nhptr = ip_hdr(skb);
149#endif 149#endif
150 150
151restart: 151restart:
@@ -160,7 +160,7 @@ restart:
160 dst = &nhptr->daddr; 160 dst = &nhptr->daddr;
161 protocol = nhptr->protocol; 161 protocol = nhptr->protocol;
162 xprt = ((u8*)nhptr) + (nhptr->ihl<<2); 162 xprt = ((u8*)nhptr) + (nhptr->ihl<<2);
163 if (nhptr->frag_off&__constant_htons(IP_MF|IP_OFFSET)) 163 if (nhptr->frag_off & htons(IP_MF|IP_OFFSET))
164 return -1; 164 return -1;
165#endif 165#endif
166 166
@@ -593,7 +593,7 @@ static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
593{ 593{
594 struct rsvp_filter *f = (struct rsvp_filter*)fh; 594 struct rsvp_filter *f = (struct rsvp_filter*)fh;
595 struct rsvp_session *s; 595 struct rsvp_session *s;
596 unsigned char *b = skb->tail; 596 unsigned char *b = skb_tail_pointer(skb);
597 struct rtattr *rta; 597 struct rtattr *rta;
598 struct tc_rsvp_pinfo pinfo; 598 struct tc_rsvp_pinfo pinfo;
599 599
@@ -623,14 +623,14 @@ static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
623 if (tcf_exts_dump(skb, &f->exts, &rsvp_ext_map) < 0) 623 if (tcf_exts_dump(skb, &f->exts, &rsvp_ext_map) < 0)
624 goto rtattr_failure; 624 goto rtattr_failure;
625 625
626 rta->rta_len = skb->tail - b; 626 rta->rta_len = skb_tail_pointer(skb) - b;
627 627
628 if (tcf_exts_dump_stats(skb, &f->exts, &rsvp_ext_map) < 0) 628 if (tcf_exts_dump_stats(skb, &f->exts, &rsvp_ext_map) < 0)
629 goto rtattr_failure; 629 goto rtattr_failure;
630 return skb->len; 630 return skb->len;
631 631
632rtattr_failure: 632rtattr_failure:
633 skb_trim(skb, b - skb->data); 633 nlmsg_trim(skb, b);
634 return -1; 634 return -1;
635} 635}
636 636
diff --git a/net/sched/cls_rsvp6.c b/net/sched/cls_rsvp6.c
index a2979d89798f..93b6abed57db 100644
--- a/net/sched/cls_rsvp6.c
+++ b/net/sched/cls_rsvp6.c
@@ -34,6 +34,7 @@
34#include <net/sock.h> 34#include <net/sock.h>
35#include <net/act_api.h> 35#include <net/act_api.h>
36#include <net/pkt_cls.h> 36#include <net/pkt_cls.h>
37#include <net/netlink.h>
37 38
38#define RSVP_DST_LEN 4 39#define RSVP_DST_LEN 4
39#define RSVP_ID "rsvp6" 40#define RSVP_ID "rsvp6"
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 7563fdcef4b7..47ac0c556429 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -12,6 +12,7 @@
12#include <linux/netdevice.h> 12#include <linux/netdevice.h>
13#include <net/ip.h> 13#include <net/ip.h>
14#include <net/act_api.h> 14#include <net/act_api.h>
15#include <net/netlink.h>
15#include <net/pkt_cls.h> 16#include <net/pkt_cls.h>
16#include <net/route.h> 17#include <net/route.h>
17 18
@@ -448,7 +449,7 @@ static int tcindex_dump(struct tcf_proto *tp, unsigned long fh,
448{ 449{
449 struct tcindex_data *p = PRIV(tp); 450 struct tcindex_data *p = PRIV(tp);
450 struct tcindex_filter_result *r = (struct tcindex_filter_result *) fh; 451 struct tcindex_filter_result *r = (struct tcindex_filter_result *) fh;
451 unsigned char *b = skb->tail; 452 unsigned char *b = skb_tail_pointer(skb);
452 struct rtattr *rta; 453 struct rtattr *rta;
453 454
454 DPRINTK("tcindex_dump(tp %p,fh 0x%lx,skb %p,t %p),p %p,r %p,b %p\n", 455 DPRINTK("tcindex_dump(tp %p,fh 0x%lx,skb %p,t %p),p %p,r %p,b %p\n",
@@ -463,7 +464,7 @@ static int tcindex_dump(struct tcf_proto *tp, unsigned long fh,
463 RTA_PUT(skb,TCA_TCINDEX_SHIFT,sizeof(p->shift),&p->shift); 464 RTA_PUT(skb,TCA_TCINDEX_SHIFT,sizeof(p->shift),&p->shift);
464 RTA_PUT(skb,TCA_TCINDEX_FALL_THROUGH,sizeof(p->fall_through), 465 RTA_PUT(skb,TCA_TCINDEX_FALL_THROUGH,sizeof(p->fall_through),
465 &p->fall_through); 466 &p->fall_through);
466 rta->rta_len = skb->tail-b; 467 rta->rta_len = skb_tail_pointer(skb) - b;
467 } else { 468 } else {
468 if (p->perfect) { 469 if (p->perfect) {
469 t->tcm_handle = r-p->perfect; 470 t->tcm_handle = r-p->perfect;
@@ -486,7 +487,7 @@ static int tcindex_dump(struct tcf_proto *tp, unsigned long fh,
486 487
487 if (tcf_exts_dump(skb, &r->exts, &tcindex_ext_map) < 0) 488 if (tcf_exts_dump(skb, &r->exts, &tcindex_ext_map) < 0)
488 goto rtattr_failure; 489 goto rtattr_failure;
489 rta->rta_len = skb->tail-b; 490 rta->rta_len = skb_tail_pointer(skb) - b;
490 491
491 if (tcf_exts_dump_stats(skb, &r->exts, &tcindex_ext_map) < 0) 492 if (tcf_exts_dump_stats(skb, &r->exts, &tcindex_ext_map) < 0)
492 goto rtattr_failure; 493 goto rtattr_failure;
@@ -495,7 +496,7 @@ static int tcindex_dump(struct tcf_proto *tp, unsigned long fh,
495 return skb->len; 496 return skb->len;
496 497
497rtattr_failure: 498rtattr_failure:
498 skb_trim(skb, b - skb->data); 499 nlmsg_trim(skb, b);
499 return -1; 500 return -1;
500} 501}
501 502
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 0bcb16928d25..c7a347bd6d70 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -50,6 +50,7 @@
50#include <linux/notifier.h> 50#include <linux/notifier.h>
51#include <linux/rtnetlink.h> 51#include <linux/rtnetlink.h>
52#include <net/ip.h> 52#include <net/ip.h>
53#include <net/netlink.h>
53#include <net/route.h> 54#include <net/route.h>
54#include <linux/skbuff.h> 55#include <linux/skbuff.h>
55#include <net/sock.h> 56#include <net/sock.h>
@@ -119,7 +120,7 @@ static int u32_classify(struct sk_buff *skb, struct tcf_proto *tp, struct tcf_re
119 } stack[TC_U32_MAXDEPTH]; 120 } stack[TC_U32_MAXDEPTH];
120 121
121 struct tc_u_hnode *ht = (struct tc_u_hnode*)tp->root; 122 struct tc_u_hnode *ht = (struct tc_u_hnode*)tp->root;
122 u8 *ptr = skb->nh.raw; 123 u8 *ptr = skb_network_header(skb);
123 struct tc_u_knode *n; 124 struct tc_u_knode *n;
124 int sdepth = 0; 125 int sdepth = 0;
125 int off2 = 0; 126 int off2 = 0;
@@ -213,7 +214,7 @@ check_terminal:
213 off2 = 0; 214 off2 = 0;
214 } 215 }
215 216
216 if (ptr < skb->tail) 217 if (ptr < skb_tail_pointer(skb))
217 goto next_ht; 218 goto next_ht;
218 } 219 }
219 220
@@ -435,7 +436,7 @@ static void u32_destroy(struct tcf_proto *tp)
435 BUG_TRAP(ht->refcnt == 0); 436 BUG_TRAP(ht->refcnt == 0);
436 437
437 kfree(ht); 438 kfree(ht);
438 }; 439 }
439 440
440 kfree(tp_c); 441 kfree(tp_c);
441 } 442 }
@@ -718,7 +719,7 @@ static int u32_dump(struct tcf_proto *tp, unsigned long fh,
718 struct sk_buff *skb, struct tcmsg *t) 719 struct sk_buff *skb, struct tcmsg *t)
719{ 720{
720 struct tc_u_knode *n = (struct tc_u_knode*)fh; 721 struct tc_u_knode *n = (struct tc_u_knode*)fh;
721 unsigned char *b = skb->tail; 722 unsigned char *b = skb_tail_pointer(skb);
722 struct rtattr *rta; 723 struct rtattr *rta;
723 724
724 if (n == NULL) 725 if (n == NULL)
@@ -765,14 +766,14 @@ static int u32_dump(struct tcf_proto *tp, unsigned long fh,
765#endif 766#endif
766 } 767 }
767 768
768 rta->rta_len = skb->tail - b; 769 rta->rta_len = skb_tail_pointer(skb) - b;
769 if (TC_U32_KEY(n->handle)) 770 if (TC_U32_KEY(n->handle))
770 if (tcf_exts_dump_stats(skb, &n->exts, &u32_ext_map) < 0) 771 if (tcf_exts_dump_stats(skb, &n->exts, &u32_ext_map) < 0)
771 goto rtattr_failure; 772 goto rtattr_failure;
772 return skb->len; 773 return skb->len;
773 774
774rtattr_failure: 775rtattr_failure:
775 skb_trim(skb, b - skb->data); 776 nlmsg_trim(skb, b);
776 return -1; 777 return -1;
777} 778}
778 779
diff --git a/net/sched/em_u32.c b/net/sched/em_u32.c
index cd0600c67969..0a2a7fe08de3 100644
--- a/net/sched/em_u32.c
+++ b/net/sched/em_u32.c
@@ -22,7 +22,7 @@ static int em_u32_match(struct sk_buff *skb, struct tcf_ematch *em,
22 struct tcf_pkt_info *info) 22 struct tcf_pkt_info *info)
23{ 23{
24 struct tc_u32_key *key = (struct tc_u32_key *) em->data; 24 struct tc_u32_key *key = (struct tc_u32_key *) em->data;
25 unsigned char *ptr = skb->nh.raw; 25 const unsigned char *ptr = skb_network_header(skb);
26 26
27 if (info) { 27 if (info) {
28 if (info->ptr) 28 if (info->ptr)
diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index 959c306c5714..63146d339d81 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -418,17 +418,19 @@ void tcf_em_tree_destroy(struct tcf_proto *tp, struct tcf_ematch_tree *tree)
418int tcf_em_tree_dump(struct sk_buff *skb, struct tcf_ematch_tree *tree, int tlv) 418int tcf_em_tree_dump(struct sk_buff *skb, struct tcf_ematch_tree *tree, int tlv)
419{ 419{
420 int i; 420 int i;
421 struct rtattr * top_start = (struct rtattr*) skb->tail; 421 u8 *tail;
422 struct rtattr * list_start; 422 struct rtattr *top_start = (struct rtattr *)skb_tail_pointer(skb);
423 struct rtattr *list_start;
423 424
424 RTA_PUT(skb, tlv, 0, NULL); 425 RTA_PUT(skb, tlv, 0, NULL);
425 RTA_PUT(skb, TCA_EMATCH_TREE_HDR, sizeof(tree->hdr), &tree->hdr); 426 RTA_PUT(skb, TCA_EMATCH_TREE_HDR, sizeof(tree->hdr), &tree->hdr);
426 427
427 list_start = (struct rtattr *) skb->tail; 428 list_start = (struct rtattr *)skb_tail_pointer(skb);
428 RTA_PUT(skb, TCA_EMATCH_TREE_LIST, 0, NULL); 429 RTA_PUT(skb, TCA_EMATCH_TREE_LIST, 0, NULL);
429 430
431 tail = skb_tail_pointer(skb);
430 for (i = 0; i < tree->hdr.nmatches; i++) { 432 for (i = 0; i < tree->hdr.nmatches; i++) {
431 struct rtattr *match_start = (struct rtattr*) skb->tail; 433 struct rtattr *match_start = (struct rtattr *)tail;
432 struct tcf_ematch *em = tcf_em_get_match(tree, i); 434 struct tcf_ematch *em = tcf_em_get_match(tree, i);
433 struct tcf_ematch_hdr em_hdr = { 435 struct tcf_ematch_hdr em_hdr = {
434 .kind = em->ops ? em->ops->kind : TCF_EM_CONTAINER, 436 .kind = em->ops ? em->ops->kind : TCF_EM_CONTAINER,
@@ -447,11 +449,12 @@ int tcf_em_tree_dump(struct sk_buff *skb, struct tcf_ematch_tree *tree, int tlv)
447 } else if (em->datalen > 0) 449 } else if (em->datalen > 0)
448 RTA_PUT_NOHDR(skb, em->datalen, (void *) em->data); 450 RTA_PUT_NOHDR(skb, em->datalen, (void *) em->data);
449 451
450 match_start->rta_len = skb->tail - (u8*) match_start; 452 tail = skb_tail_pointer(skb);
453 match_start->rta_len = tail - (u8 *)match_start;
451 } 454 }
452 455
453 list_start->rta_len = skb->tail - (u8 *) list_start; 456 list_start->rta_len = tail - (u8 *)list_start;
454 top_start->rta_len = skb->tail - (u8 *) top_start; 457 top_start->rta_len = tail - (u8 *)top_start;
455 458
456 return 0; 459 return 0;
457 460
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index ecc988af4a9a..8699e7006d80 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -27,14 +27,15 @@
27#include <linux/interrupt.h> 27#include <linux/interrupt.h>
28#include <linux/netdevice.h> 28#include <linux/netdevice.h>
29#include <linux/skbuff.h> 29#include <linux/skbuff.h>
30#include <linux/rtnetlink.h>
31#include <linux/init.h> 30#include <linux/init.h>
32#include <linux/proc_fs.h> 31#include <linux/proc_fs.h>
33#include <linux/seq_file.h> 32#include <linux/seq_file.h>
34#include <linux/kmod.h> 33#include <linux/kmod.h>
35#include <linux/list.h> 34#include <linux/list.h>
36#include <linux/bitops.h> 35#include <linux/bitops.h>
36#include <linux/hrtimer.h>
37 37
38#include <net/netlink.h>
38#include <net/sock.h> 39#include <net/sock.h>
39#include <net/pkt_sched.h> 40#include <net/pkt_sched.h>
40 41
@@ -190,7 +191,7 @@ int unregister_qdisc(struct Qdisc_ops *qops)
190 (root qdisc, all its children, children of children etc.) 191 (root qdisc, all its children, children of children etc.)
191 */ 192 */
192 193
193static struct Qdisc *__qdisc_lookup(struct net_device *dev, u32 handle) 194struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
194{ 195{
195 struct Qdisc *q; 196 struct Qdisc *q;
196 197
@@ -201,16 +202,6 @@ static struct Qdisc *__qdisc_lookup(struct net_device *dev, u32 handle)
201 return NULL; 202 return NULL;
202} 203}
203 204
204struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
205{
206 struct Qdisc *q;
207
208 read_lock(&qdisc_tree_lock);
209 q = __qdisc_lookup(dev, handle);
210 read_unlock(&qdisc_tree_lock);
211 return q;
212}
213
214static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid) 205static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
215{ 206{
216 unsigned long cl; 207 unsigned long cl;
@@ -291,6 +282,48 @@ void qdisc_put_rtab(struct qdisc_rate_table *tab)
291 } 282 }
292} 283}
293 284
285static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
286{
287 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
288 timer);
289 struct net_device *dev = wd->qdisc->dev;
290
291 wd->qdisc->flags &= ~TCQ_F_THROTTLED;
292 smp_wmb();
293 if (spin_trylock(&dev->queue_lock)) {
294 qdisc_run(dev);
295 spin_unlock(&dev->queue_lock);
296 } else
297 netif_schedule(dev);
298
299 return HRTIMER_NORESTART;
300}
301
302void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
303{
304 hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
305 wd->timer.function = qdisc_watchdog;
306 wd->qdisc = qdisc;
307}
308EXPORT_SYMBOL(qdisc_watchdog_init);
309
310void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires)
311{
312 ktime_t time;
313
314 wd->qdisc->flags |= TCQ_F_THROTTLED;
315 time = ktime_set(0, 0);
316 time = ktime_add_ns(time, PSCHED_US2NS(expires));
317 hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS);
318}
319EXPORT_SYMBOL(qdisc_watchdog_schedule);
320
321void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
322{
323 hrtimer_cancel(&wd->timer);
324 wd->qdisc->flags &= ~TCQ_F_THROTTLED;
325}
326EXPORT_SYMBOL(qdisc_watchdog_cancel);
294 327
295/* Allocate an unique handle from space managed by kernel */ 328/* Allocate an unique handle from space managed by kernel */
296 329
@@ -362,7 +395,7 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
362 if (n == 0) 395 if (n == 0)
363 return; 396 return;
364 while ((parentid = sch->parent)) { 397 while ((parentid = sch->parent)) {
365 sch = __qdisc_lookup(sch->dev, TC_H_MAJ(parentid)); 398 sch = qdisc_lookup(sch->dev, TC_H_MAJ(parentid));
366 cops = sch->ops->cl_ops; 399 cops = sch->ops->cl_ops;
367 if (cops->qlen_notify) { 400 if (cops->qlen_notify) {
368 cl = cops->get(sch, parentid); 401 cl = cops->get(sch, parentid);
@@ -467,12 +500,16 @@ qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp)
467 500
468 if (handle == TC_H_INGRESS) { 501 if (handle == TC_H_INGRESS) {
469 sch->flags |= TCQ_F_INGRESS; 502 sch->flags |= TCQ_F_INGRESS;
503 sch->stats_lock = &dev->ingress_lock;
470 handle = TC_H_MAKE(TC_H_INGRESS, 0); 504 handle = TC_H_MAKE(TC_H_INGRESS, 0);
471 } else if (handle == 0) { 505 } else {
472 handle = qdisc_alloc_handle(dev); 506 sch->stats_lock = &dev->queue_lock;
473 err = -ENOMEM; 507 if (handle == 0) {
474 if (handle == 0) 508 handle = qdisc_alloc_handle(dev);
475 goto err_out3; 509 err = -ENOMEM;
510 if (handle == 0)
511 goto err_out3;
512 }
476 } 513 }
477 514
478 sch->handle = handle; 515 sch->handle = handle;
@@ -621,9 +658,9 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
621 return err; 658 return err;
622 if (q) { 659 if (q) {
623 qdisc_notify(skb, n, clid, q, NULL); 660 qdisc_notify(skb, n, clid, q, NULL);
624 spin_lock_bh(&dev->queue_lock); 661 qdisc_lock_tree(dev);
625 qdisc_destroy(q); 662 qdisc_destroy(q);
626 spin_unlock_bh(&dev->queue_lock); 663 qdisc_unlock_tree(dev);
627 } 664 }
628 } else { 665 } else {
629 qdisc_notify(skb, n, clid, NULL, q); 666 qdisc_notify(skb, n, clid, NULL, q);
@@ -756,17 +793,17 @@ graft:
756 err = qdisc_graft(dev, p, clid, q, &old_q); 793 err = qdisc_graft(dev, p, clid, q, &old_q);
757 if (err) { 794 if (err) {
758 if (q) { 795 if (q) {
759 spin_lock_bh(&dev->queue_lock); 796 qdisc_lock_tree(dev);
760 qdisc_destroy(q); 797 qdisc_destroy(q);
761 spin_unlock_bh(&dev->queue_lock); 798 qdisc_unlock_tree(dev);
762 } 799 }
763 return err; 800 return err;
764 } 801 }
765 qdisc_notify(skb, n, clid, old_q, q); 802 qdisc_notify(skb, n, clid, old_q, q);
766 if (old_q) { 803 if (old_q) {
767 spin_lock_bh(&dev->queue_lock); 804 qdisc_lock_tree(dev);
768 qdisc_destroy(old_q); 805 qdisc_destroy(old_q);
769 spin_unlock_bh(&dev->queue_lock); 806 qdisc_unlock_tree(dev);
770 } 807 }
771 } 808 }
772 return 0; 809 return 0;
@@ -777,7 +814,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
777{ 814{
778 struct tcmsg *tcm; 815 struct tcmsg *tcm;
779 struct nlmsghdr *nlh; 816 struct nlmsghdr *nlh;
780 unsigned char *b = skb->tail; 817 unsigned char *b = skb_tail_pointer(skb);
781 struct gnet_dump d; 818 struct gnet_dump d;
782 819
783 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags); 820 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
@@ -811,12 +848,12 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
811 if (gnet_stats_finish_copy(&d) < 0) 848 if (gnet_stats_finish_copy(&d) < 0)
812 goto rtattr_failure; 849 goto rtattr_failure;
813 850
814 nlh->nlmsg_len = skb->tail - b; 851 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
815 return skb->len; 852 return skb->len;
816 853
817nlmsg_failure: 854nlmsg_failure:
818rtattr_failure: 855rtattr_failure:
819 skb_trim(skb, b - skb->data); 856 nlmsg_trim(skb, b);
820 return -1; 857 return -1;
821} 858}
822 859
@@ -862,7 +899,6 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
862 continue; 899 continue;
863 if (idx > s_idx) 900 if (idx > s_idx)
864 s_q_idx = 0; 901 s_q_idx = 0;
865 read_lock(&qdisc_tree_lock);
866 q_idx = 0; 902 q_idx = 0;
867 list_for_each_entry(q, &dev->qdisc_list, list) { 903 list_for_each_entry(q, &dev->qdisc_list, list) {
868 if (q_idx < s_q_idx) { 904 if (q_idx < s_q_idx) {
@@ -870,13 +906,10 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
870 continue; 906 continue;
871 } 907 }
872 if (tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid, 908 if (tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
873 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) { 909 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
874 read_unlock(&qdisc_tree_lock);
875 goto done; 910 goto done;
876 }
877 q_idx++; 911 q_idx++;
878 } 912 }
879 read_unlock(&qdisc_tree_lock);
880 } 913 }
881 914
882done: 915done:
@@ -1015,7 +1048,7 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1015{ 1048{
1016 struct tcmsg *tcm; 1049 struct tcmsg *tcm;
1017 struct nlmsghdr *nlh; 1050 struct nlmsghdr *nlh;
1018 unsigned char *b = skb->tail; 1051 unsigned char *b = skb_tail_pointer(skb);
1019 struct gnet_dump d; 1052 struct gnet_dump d;
1020 struct Qdisc_class_ops *cl_ops = q->ops->cl_ops; 1053 struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1021 1054
@@ -1040,12 +1073,12 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1040 if (gnet_stats_finish_copy(&d) < 0) 1073 if (gnet_stats_finish_copy(&d) < 0)
1041 goto rtattr_failure; 1074 goto rtattr_failure;
1042 1075
1043 nlh->nlmsg_len = skb->tail - b; 1076 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1044 return skb->len; 1077 return skb->len;
1045 1078
1046nlmsg_failure: 1079nlmsg_failure:
1047rtattr_failure: 1080rtattr_failure:
1048 skb_trim(skb, b - skb->data); 1081 nlmsg_trim(skb, b);
1049 return -1; 1082 return -1;
1050} 1083}
1051 1084
@@ -1099,7 +1132,6 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1099 s_t = cb->args[0]; 1132 s_t = cb->args[0];
1100 t = 0; 1133 t = 0;
1101 1134
1102 read_lock(&qdisc_tree_lock);
1103 list_for_each_entry(q, &dev->qdisc_list, list) { 1135 list_for_each_entry(q, &dev->qdisc_list, list) {
1104 if (t < s_t || !q->ops->cl_ops || 1136 if (t < s_t || !q->ops->cl_ops ||
1105 (tcm->tcm_parent && 1137 (tcm->tcm_parent &&
@@ -1121,7 +1153,6 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1121 break; 1153 break;
1122 t++; 1154 t++;
1123 } 1155 }
1124 read_unlock(&qdisc_tree_lock);
1125 1156
1126 cb->args[0] = t; 1157 cb->args[0] = t;
1127 1158
@@ -1146,7 +1177,7 @@ reclassify:
1146 1177
1147 for ( ; tp; tp = tp->next) { 1178 for ( ; tp; tp = tp->next) {
1148 if ((tp->protocol == protocol || 1179 if ((tp->protocol == protocol ||
1149 tp->protocol == __constant_htons(ETH_P_ALL)) && 1180 tp->protocol == htons(ETH_P_ALL)) &&
1150 (err = tp->classify(skb, tp, res)) >= 0) { 1181 (err = tp->classify(skb, tp, res)) >= 0) {
1151#ifdef CONFIG_NET_CLS_ACT 1182#ifdef CONFIG_NET_CLS_ACT
1152 if ( TC_ACT_RECLASSIFY == err) { 1183 if ( TC_ACT_RECLASSIFY == err) {
@@ -1175,15 +1206,31 @@ reclassify:
1175 return -1; 1206 return -1;
1176} 1207}
1177 1208
1178static int psched_us_per_tick = 1; 1209void tcf_destroy(struct tcf_proto *tp)
1179static int psched_tick_per_us = 1; 1210{
1211 tp->ops->destroy(tp);
1212 module_put(tp->ops->owner);
1213 kfree(tp);
1214}
1215
1216void tcf_destroy_chain(struct tcf_proto *fl)
1217{
1218 struct tcf_proto *tp;
1219
1220 while ((tp = fl) != NULL) {
1221 fl = tp->next;
1222 tcf_destroy(tp);
1223 }
1224}
1225EXPORT_SYMBOL(tcf_destroy_chain);
1180 1226
1181#ifdef CONFIG_PROC_FS 1227#ifdef CONFIG_PROC_FS
1182static int psched_show(struct seq_file *seq, void *v) 1228static int psched_show(struct seq_file *seq, void *v)
1183{ 1229{
1184 seq_printf(seq, "%08x %08x %08x %08x\n", 1230 seq_printf(seq, "%08x %08x %08x %08x\n",
1185 psched_tick_per_us, psched_us_per_tick, 1231 (u32)NSEC_PER_USEC, (u32)PSCHED_US2NS(1),
1186 1000000, HZ); 1232 1000000,
1233 (u32)NSEC_PER_SEC/(u32)ktime_to_ns(KTIME_MONOTONIC_RES));
1187 1234
1188 return 0; 1235 return 0;
1189} 1236}
@@ -1202,101 +1249,19 @@ static const struct file_operations psched_fops = {
1202}; 1249};
1203#endif 1250#endif
1204 1251
1205#ifdef CONFIG_NET_SCH_CLK_CPU
1206psched_tdiff_t psched_clock_per_hz;
1207int psched_clock_scale;
1208EXPORT_SYMBOL(psched_clock_per_hz);
1209EXPORT_SYMBOL(psched_clock_scale);
1210
1211psched_time_t psched_time_base;
1212cycles_t psched_time_mark;
1213EXPORT_SYMBOL(psched_time_mark);
1214EXPORT_SYMBOL(psched_time_base);
1215
1216/*
1217 * Periodically adjust psched_time_base to avoid overflow
1218 * with 32-bit get_cycles(). Safe up to 4GHz CPU.
1219 */
1220static void psched_tick(unsigned long);
1221static DEFINE_TIMER(psched_timer, psched_tick, 0, 0);
1222
1223static void psched_tick(unsigned long dummy)
1224{
1225 if (sizeof(cycles_t) == sizeof(u32)) {
1226 psched_time_t dummy_stamp;
1227 PSCHED_GET_TIME(dummy_stamp);
1228 psched_timer.expires = jiffies + 1*HZ;
1229 add_timer(&psched_timer);
1230 }
1231}
1232
1233int __init psched_calibrate_clock(void)
1234{
1235 psched_time_t stamp, stamp1;
1236 struct timeval tv, tv1;
1237 psched_tdiff_t delay;
1238 long rdelay;
1239 unsigned long stop;
1240
1241 psched_tick(0);
1242 stop = jiffies + HZ/10;
1243 PSCHED_GET_TIME(stamp);
1244 do_gettimeofday(&tv);
1245 while (time_before(jiffies, stop)) {
1246 barrier();
1247 cpu_relax();
1248 }
1249 PSCHED_GET_TIME(stamp1);
1250 do_gettimeofday(&tv1);
1251
1252 delay = PSCHED_TDIFF(stamp1, stamp);
1253 rdelay = tv1.tv_usec - tv.tv_usec;
1254 rdelay += (tv1.tv_sec - tv.tv_sec)*1000000;
1255 if (rdelay > delay)
1256 return -1;
1257 delay /= rdelay;
1258 psched_tick_per_us = delay;
1259 while ((delay>>=1) != 0)
1260 psched_clock_scale++;
1261 psched_us_per_tick = 1<<psched_clock_scale;
1262 psched_clock_per_hz = (psched_tick_per_us*(1000000/HZ))>>psched_clock_scale;
1263 return 0;
1264}
1265#endif
1266
1267static int __init pktsched_init(void) 1252static int __init pktsched_init(void)
1268{ 1253{
1269 struct rtnetlink_link *link_p;
1270
1271#ifdef CONFIG_NET_SCH_CLK_CPU
1272 if (psched_calibrate_clock() < 0)
1273 return -1;
1274#elif defined(CONFIG_NET_SCH_CLK_JIFFIES)
1275 psched_tick_per_us = HZ<<PSCHED_JSCALE;
1276 psched_us_per_tick = 1000000;
1277#endif
1278
1279 link_p = rtnetlink_links[PF_UNSPEC];
1280
1281 /* Setup rtnetlink links. It is made here to avoid
1282 exporting large number of public symbols.
1283 */
1284
1285 if (link_p) {
1286 link_p[RTM_NEWQDISC-RTM_BASE].doit = tc_modify_qdisc;
1287 link_p[RTM_DELQDISC-RTM_BASE].doit = tc_get_qdisc;
1288 link_p[RTM_GETQDISC-RTM_BASE].doit = tc_get_qdisc;
1289 link_p[RTM_GETQDISC-RTM_BASE].dumpit = tc_dump_qdisc;
1290 link_p[RTM_NEWTCLASS-RTM_BASE].doit = tc_ctl_tclass;
1291 link_p[RTM_DELTCLASS-RTM_BASE].doit = tc_ctl_tclass;
1292 link_p[RTM_GETTCLASS-RTM_BASE].doit = tc_ctl_tclass;
1293 link_p[RTM_GETTCLASS-RTM_BASE].dumpit = tc_dump_tclass;
1294 }
1295
1296 register_qdisc(&pfifo_qdisc_ops); 1254 register_qdisc(&pfifo_qdisc_ops);
1297 register_qdisc(&bfifo_qdisc_ops); 1255 register_qdisc(&bfifo_qdisc_ops);
1298 proc_net_fops_create("psched", 0, &psched_fops); 1256 proc_net_fops_create("psched", 0, &psched_fops);
1299 1257
1258 rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL);
1259 rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL);
1260 rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc);
1261 rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL);
1262 rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL);
1263 rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass);
1264
1300 return 0; 1265 return 0;
1301} 1266}
1302 1267
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index afb3bbd571f2..be7d299acd73 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -14,6 +14,7 @@
14#include <linux/netdevice.h> 14#include <linux/netdevice.h>
15#include <linux/rtnetlink.h> 15#include <linux/rtnetlink.h>
16#include <linux/file.h> /* for fput */ 16#include <linux/file.h> /* for fput */
17#include <net/netlink.h>
17#include <net/pkt_sched.h> 18#include <net/pkt_sched.h>
18#include <net/sock.h> 19#include <net/sock.h>
19 20
@@ -157,19 +158,6 @@ static unsigned long atm_tc_bind_filter(struct Qdisc *sch,
157 return atm_tc_get(sch,classid); 158 return atm_tc_get(sch,classid);
158} 159}
159 160
160
161static void destroy_filters(struct atm_flow_data *flow)
162{
163 struct tcf_proto *filter;
164
165 while ((filter = flow->filter_list)) {
166 DPRINTK("destroy_filters: destroying filter %p\n",filter);
167 flow->filter_list = filter->next;
168 tcf_destroy(filter);
169 }
170}
171
172
173/* 161/*
174 * atm_tc_put handles all destructions, including the ones that are explicitly 162 * atm_tc_put handles all destructions, including the ones that are explicitly
175 * requested (atm_tc_destroy, etc.). The assumption here is that we never drop 163 * requested (atm_tc_destroy, etc.). The assumption here is that we never drop
@@ -194,7 +182,7 @@ static void atm_tc_put(struct Qdisc *sch, unsigned long cl)
194 *prev = flow->next; 182 *prev = flow->next;
195 DPRINTK("atm_tc_put: qdisc %p\n",flow->q); 183 DPRINTK("atm_tc_put: qdisc %p\n",flow->q);
196 qdisc_destroy(flow->q); 184 qdisc_destroy(flow->q);
197 destroy_filters(flow); 185 tcf_destroy_chain(flow->filter_list);
198 if (flow->sock) { 186 if (flow->sock) {
199 DPRINTK("atm_tc_put: f_count %d\n", 187 DPRINTK("atm_tc_put: f_count %d\n",
200 file_count(flow->sock->file)); 188 file_count(flow->sock->file));
@@ -503,7 +491,7 @@ static void sch_atm_dequeue(unsigned long data)
503 } 491 }
504 D2PRINTK("atm_tc_dequeue: sending on class %p\n",flow); 492 D2PRINTK("atm_tc_dequeue: sending on class %p\n",flow);
505 /* remove any LL header somebody else has attached */ 493 /* remove any LL header somebody else has attached */
506 skb_pull(skb,(char *) skb->nh.iph-(char *) skb->data); 494 skb_pull(skb, skb_network_offset(skb));
507 if (skb_headroom(skb) < flow->hdr_len) { 495 if (skb_headroom(skb) < flow->hdr_len) {
508 struct sk_buff *new; 496 struct sk_buff *new;
509 497
@@ -513,7 +501,7 @@ static void sch_atm_dequeue(unsigned long data)
513 skb = new; 501 skb = new;
514 } 502 }
515 D2PRINTK("sch_atm_dequeue: ip %p, data %p\n", 503 D2PRINTK("sch_atm_dequeue: ip %p, data %p\n",
516 skb->nh.iph,skb->data); 504 skb_network_header(skb), skb->data);
517 ATM_SKB(skb)->vcc = flow->vcc; 505 ATM_SKB(skb)->vcc = flow->vcc;
518 memcpy(skb_push(skb,flow->hdr_len),flow->hdr, 506 memcpy(skb_push(skb,flow->hdr_len),flow->hdr,
519 flow->hdr_len); 507 flow->hdr_len);
@@ -610,7 +598,7 @@ static void atm_tc_destroy(struct Qdisc *sch)
610 DPRINTK("atm_tc_destroy(sch %p,[qdisc %p])\n",sch,p); 598 DPRINTK("atm_tc_destroy(sch %p,[qdisc %p])\n",sch,p);
611 /* races ? */ 599 /* races ? */
612 while ((flow = p->flows)) { 600 while ((flow = p->flows)) {
613 destroy_filters(flow); 601 tcf_destroy_chain(flow->filter_list);
614 if (flow->ref > 1) 602 if (flow->ref > 1)
615 printk(KERN_ERR "atm_destroy: %p->ref = %d\n",flow, 603 printk(KERN_ERR "atm_destroy: %p->ref = %d\n",flow,
616 flow->ref); 604 flow->ref);
@@ -631,7 +619,7 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl,
631{ 619{
632 struct atm_qdisc_data *p = PRIV(sch); 620 struct atm_qdisc_data *p = PRIV(sch);
633 struct atm_flow_data *flow = (struct atm_flow_data *) cl; 621 struct atm_flow_data *flow = (struct atm_flow_data *) cl;
634 unsigned char *b = skb->tail; 622 unsigned char *b = skb_tail_pointer(skb);
635 struct rtattr *rta; 623 struct rtattr *rta;
636 624
637 DPRINTK("atm_tc_dump_class(sch %p,[qdisc %p],flow %p,skb %p,tcm %p)\n", 625 DPRINTK("atm_tc_dump_class(sch %p,[qdisc %p],flow %p,skb %p,tcm %p)\n",
@@ -661,11 +649,11 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl,
661 649
662 RTA_PUT(skb,TCA_ATM_EXCESS,sizeof(zero),&zero); 650 RTA_PUT(skb,TCA_ATM_EXCESS,sizeof(zero),&zero);
663 } 651 }
664 rta->rta_len = skb->tail-b; 652 rta->rta_len = skb_tail_pointer(skb) - b;
665 return skb->len; 653 return skb->len;
666 654
667rtattr_failure: 655rtattr_failure:
668 skb_trim(skb,b-skb->data); 656 nlmsg_trim(skb, b);
669 return -1; 657 return -1;
670} 658}
671static int 659static int
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 76c92e710a33..a294542cb8e4 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -29,6 +29,7 @@
29#include <linux/etherdevice.h> 29#include <linux/etherdevice.h>
30#include <linux/notifier.h> 30#include <linux/notifier.h>
31#include <net/ip.h> 31#include <net/ip.h>
32#include <net/netlink.h>
32#include <net/route.h> 33#include <net/route.h>
33#include <linux/skbuff.h> 34#include <linux/skbuff.h>
34#include <net/sock.h> 35#include <net/sock.h>
@@ -112,7 +113,7 @@ struct cbq_class
112 113
113 /* Overlimit strategy parameters */ 114 /* Overlimit strategy parameters */
114 void (*overlimit)(struct cbq_class *cl); 115 void (*overlimit)(struct cbq_class *cl);
115 long penalty; 116 psched_tdiff_t penalty;
116 117
117 /* General scheduler (WRR) parameters */ 118 /* General scheduler (WRR) parameters */
118 long allot; 119 long allot;
@@ -143,7 +144,7 @@ struct cbq_class
143 psched_time_t undertime; 144 psched_time_t undertime;
144 long avgidle; 145 long avgidle;
145 long deficit; /* Saved deficit for WRR */ 146 long deficit; /* Saved deficit for WRR */
146 unsigned long penalized; 147 psched_time_t penalized;
147 struct gnet_stats_basic bstats; 148 struct gnet_stats_basic bstats;
148 struct gnet_stats_queue qstats; 149 struct gnet_stats_queue qstats;
149 struct gnet_stats_rate_est rate_est; 150 struct gnet_stats_rate_est rate_est;
@@ -180,12 +181,12 @@ struct cbq_sched_data
180 psched_time_t now_rt; /* Cached real time */ 181 psched_time_t now_rt; /* Cached real time */
181 unsigned pmask; 182 unsigned pmask;
182 183
183 struct timer_list delay_timer; 184 struct hrtimer delay_timer;
184 struct timer_list wd_timer; /* Watchdog timer, 185 struct qdisc_watchdog watchdog; /* Watchdog timer,
185 started when CBQ has 186 started when CBQ has
186 backlog, but cannot 187 backlog, but cannot
187 transmit just now */ 188 transmit just now */
188 long wd_expires; 189 psched_tdiff_t wd_expires;
189 int toplevel; 190 int toplevel;
190 u32 hgenerator; 191 u32 hgenerator;
191}; 192};
@@ -384,12 +385,12 @@ cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
384 psched_time_t now; 385 psched_time_t now;
385 psched_tdiff_t incr; 386 psched_tdiff_t incr;
386 387
387 PSCHED_GET_TIME(now); 388 now = psched_get_time();
388 incr = PSCHED_TDIFF(now, q->now_rt); 389 incr = now - q->now_rt;
389 PSCHED_TADD2(q->now, incr, now); 390 now = q->now + incr;
390 391
391 do { 392 do {
392 if (PSCHED_TLESS(cl->undertime, now)) { 393 if (cl->undertime < now) {
393 q->toplevel = cl->level; 394 q->toplevel = cl->level;
394 return; 395 return;
395 } 396 }
@@ -473,7 +474,7 @@ cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
473static void cbq_ovl_classic(struct cbq_class *cl) 474static void cbq_ovl_classic(struct cbq_class *cl)
474{ 475{
475 struct cbq_sched_data *q = qdisc_priv(cl->qdisc); 476 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
476 psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now); 477 psched_tdiff_t delay = cl->undertime - q->now;
477 478
478 if (!cl->delayed) { 479 if (!cl->delayed) {
479 delay += cl->offtime; 480 delay += cl->offtime;
@@ -491,7 +492,7 @@ static void cbq_ovl_classic(struct cbq_class *cl)
491 cl->avgidle = cl->minidle; 492 cl->avgidle = cl->minidle;
492 if (delay <= 0) 493 if (delay <= 0)
493 delay = 1; 494 delay = 1;
494 PSCHED_TADD2(q->now, delay, cl->undertime); 495 cl->undertime = q->now + delay;
495 496
496 cl->xstats.overactions++; 497 cl->xstats.overactions++;
497 cl->delayed = 1; 498 cl->delayed = 1;
@@ -508,7 +509,7 @@ static void cbq_ovl_classic(struct cbq_class *cl)
508 psched_tdiff_t base_delay = q->wd_expires; 509 psched_tdiff_t base_delay = q->wd_expires;
509 510
510 for (b = cl->borrow; b; b = b->borrow) { 511 for (b = cl->borrow; b; b = b->borrow) {
511 delay = PSCHED_TDIFF(b->undertime, q->now); 512 delay = b->undertime - q->now;
512 if (delay < base_delay) { 513 if (delay < base_delay) {
513 if (delay <= 0) 514 if (delay <= 0)
514 delay = 1; 515 delay = 1;
@@ -546,27 +547,32 @@ static void cbq_ovl_rclassic(struct cbq_class *cl)
546static void cbq_ovl_delay(struct cbq_class *cl) 547static void cbq_ovl_delay(struct cbq_class *cl)
547{ 548{
548 struct cbq_sched_data *q = qdisc_priv(cl->qdisc); 549 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
549 psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now); 550 psched_tdiff_t delay = cl->undertime - q->now;
550 551
551 if (!cl->delayed) { 552 if (!cl->delayed) {
552 unsigned long sched = jiffies; 553 psched_time_t sched = q->now;
554 ktime_t expires;
553 555
554 delay += cl->offtime; 556 delay += cl->offtime;
555 if (cl->avgidle < 0) 557 if (cl->avgidle < 0)
556 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log); 558 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
557 if (cl->avgidle < cl->minidle) 559 if (cl->avgidle < cl->minidle)
558 cl->avgidle = cl->minidle; 560 cl->avgidle = cl->minidle;
559 PSCHED_TADD2(q->now, delay, cl->undertime); 561 cl->undertime = q->now + delay;
560 562
561 if (delay > 0) { 563 if (delay > 0) {
562 sched += PSCHED_US2JIFFIE(delay) + cl->penalty; 564 sched += delay + cl->penalty;
563 cl->penalized = sched; 565 cl->penalized = sched;
564 cl->cpriority = TC_CBQ_MAXPRIO; 566 cl->cpriority = TC_CBQ_MAXPRIO;
565 q->pmask |= (1<<TC_CBQ_MAXPRIO); 567 q->pmask |= (1<<TC_CBQ_MAXPRIO);
566 if (del_timer(&q->delay_timer) && 568
567 (long)(q->delay_timer.expires - sched) > 0) 569 expires = ktime_set(0, 0);
568 q->delay_timer.expires = sched; 570 expires = ktime_add_ns(expires, PSCHED_US2NS(sched));
569 add_timer(&q->delay_timer); 571 if (hrtimer_try_to_cancel(&q->delay_timer) &&
572 ktime_to_ns(ktime_sub(q->delay_timer.expires,
573 expires)) > 0)
574 q->delay_timer.expires = expires;
575 hrtimer_restart(&q->delay_timer);
570 cl->delayed = 1; 576 cl->delayed = 1;
571 cl->xstats.overactions++; 577 cl->xstats.overactions++;
572 return; 578 return;
@@ -583,7 +589,7 @@ static void cbq_ovl_lowprio(struct cbq_class *cl)
583{ 589{
584 struct cbq_sched_data *q = qdisc_priv(cl->qdisc); 590 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
585 591
586 cl->penalized = jiffies + cl->penalty; 592 cl->penalized = q->now + cl->penalty;
587 593
588 if (cl->cpriority != cl->priority2) { 594 if (cl->cpriority != cl->priority2) {
589 cl->cpriority = cl->priority2; 595 cl->cpriority = cl->priority2;
@@ -604,27 +610,19 @@ static void cbq_ovl_drop(struct cbq_class *cl)
604 cbq_ovl_classic(cl); 610 cbq_ovl_classic(cl);
605} 611}
606 612
607static void cbq_watchdog(unsigned long arg) 613static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
608{ 614 psched_time_t now)
609 struct Qdisc *sch = (struct Qdisc*)arg;
610
611 sch->flags &= ~TCQ_F_THROTTLED;
612 netif_schedule(sch->dev);
613}
614
615static unsigned long cbq_undelay_prio(struct cbq_sched_data *q, int prio)
616{ 615{
617 struct cbq_class *cl; 616 struct cbq_class *cl;
618 struct cbq_class *cl_prev = q->active[prio]; 617 struct cbq_class *cl_prev = q->active[prio];
619 unsigned long now = jiffies; 618 psched_time_t sched = now;
620 unsigned long sched = now;
621 619
622 if (cl_prev == NULL) 620 if (cl_prev == NULL)
623 return now; 621 return 0;
624 622
625 do { 623 do {
626 cl = cl_prev->next_alive; 624 cl = cl_prev->next_alive;
627 if ((long)(now - cl->penalized) > 0) { 625 if (now - cl->penalized > 0) {
628 cl_prev->next_alive = cl->next_alive; 626 cl_prev->next_alive = cl->next_alive;
629 cl->next_alive = NULL; 627 cl->next_alive = NULL;
630 cl->cpriority = cl->priority; 628 cl->cpriority = cl->priority;
@@ -640,30 +638,34 @@ static unsigned long cbq_undelay_prio(struct cbq_sched_data *q, int prio)
640 } 638 }
641 639
642 cl = cl_prev->next_alive; 640 cl = cl_prev->next_alive;
643 } else if ((long)(sched - cl->penalized) > 0) 641 } else if (sched - cl->penalized > 0)
644 sched = cl->penalized; 642 sched = cl->penalized;
645 } while ((cl_prev = cl) != q->active[prio]); 643 } while ((cl_prev = cl) != q->active[prio]);
646 644
647 return (long)(sched - now); 645 return sched - now;
648} 646}
649 647
650static void cbq_undelay(unsigned long arg) 648static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
651{ 649{
652 struct Qdisc *sch = (struct Qdisc*)arg; 650 struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
653 struct cbq_sched_data *q = qdisc_priv(sch); 651 delay_timer);
654 long delay = 0; 652 struct Qdisc *sch = q->watchdog.qdisc;
653 psched_time_t now;
654 psched_tdiff_t delay = 0;
655 unsigned pmask; 655 unsigned pmask;
656 656
657 now = psched_get_time();
658
657 pmask = q->pmask; 659 pmask = q->pmask;
658 q->pmask = 0; 660 q->pmask = 0;
659 661
660 while (pmask) { 662 while (pmask) {
661 int prio = ffz(~pmask); 663 int prio = ffz(~pmask);
662 long tmp; 664 psched_tdiff_t tmp;
663 665
664 pmask &= ~(1<<prio); 666 pmask &= ~(1<<prio);
665 667
666 tmp = cbq_undelay_prio(q, prio); 668 tmp = cbq_undelay_prio(q, prio, now);
667 if (tmp > 0) { 669 if (tmp > 0) {
668 q->pmask |= 1<<prio; 670 q->pmask |= 1<<prio;
669 if (tmp < delay || delay == 0) 671 if (tmp < delay || delay == 0)
@@ -672,12 +674,16 @@ static void cbq_undelay(unsigned long arg)
672 } 674 }
673 675
674 if (delay) { 676 if (delay) {
675 q->delay_timer.expires = jiffies + delay; 677 ktime_t time;
676 add_timer(&q->delay_timer); 678
679 time = ktime_set(0, 0);
680 time = ktime_add_ns(time, PSCHED_US2NS(now + delay));
681 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
677 } 682 }
678 683
679 sch->flags &= ~TCQ_F_THROTTLED; 684 sch->flags &= ~TCQ_F_THROTTLED;
680 netif_schedule(sch->dev); 685 netif_schedule(sch->dev);
686 return HRTIMER_NORESTART;
681} 687}
682 688
683 689
@@ -732,7 +738,7 @@ cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
732 if (cl && q->toplevel >= borrowed->level) { 738 if (cl && q->toplevel >= borrowed->level) {
733 if (cl->q->q.qlen > 1) { 739 if (cl->q->q.qlen > 1) {
734 do { 740 do {
735 if (PSCHED_IS_PASTPERFECT(borrowed->undertime)) { 741 if (borrowed->undertime == PSCHED_PASTPERFECT) {
736 q->toplevel = borrowed->level; 742 q->toplevel = borrowed->level;
737 return; 743 return;
738 } 744 }
@@ -770,7 +776,7 @@ cbq_update(struct cbq_sched_data *q)
770 idle = (now - last) - last_pktlen/rate 776 idle = (now - last) - last_pktlen/rate
771 */ 777 */
772 778
773 idle = PSCHED_TDIFF(q->now, cl->last); 779 idle = q->now - cl->last;
774 if ((unsigned long)idle > 128*1024*1024) { 780 if ((unsigned long)idle > 128*1024*1024) {
775 avgidle = cl->maxidle; 781 avgidle = cl->maxidle;
776 } else { 782 } else {
@@ -814,13 +820,11 @@ cbq_update(struct cbq_sched_data *q)
814 idle -= L2T(&q->link, len); 820 idle -= L2T(&q->link, len);
815 idle += L2T(cl, len); 821 idle += L2T(cl, len);
816 822
817 PSCHED_AUDIT_TDIFF(idle); 823 cl->undertime = q->now + idle;
818
819 PSCHED_TADD2(q->now, idle, cl->undertime);
820 } else { 824 } else {
821 /* Underlimit */ 825 /* Underlimit */
822 826
823 PSCHED_SET_PASTPERFECT(cl->undertime); 827 cl->undertime = PSCHED_PASTPERFECT;
824 if (avgidle > cl->maxidle) 828 if (avgidle > cl->maxidle)
825 cl->avgidle = cl->maxidle; 829 cl->avgidle = cl->maxidle;
826 else 830 else
@@ -841,8 +845,7 @@ cbq_under_limit(struct cbq_class *cl)
841 if (cl->tparent == NULL) 845 if (cl->tparent == NULL)
842 return cl; 846 return cl;
843 847
844 if (PSCHED_IS_PASTPERFECT(cl->undertime) || 848 if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
845 !PSCHED_TLESS(q->now, cl->undertime)) {
846 cl->delayed = 0; 849 cl->delayed = 0;
847 return cl; 850 return cl;
848 } 851 }
@@ -865,8 +868,7 @@ cbq_under_limit(struct cbq_class *cl)
865 } 868 }
866 if (cl->level > q->toplevel) 869 if (cl->level > q->toplevel)
867 return NULL; 870 return NULL;
868 } while (!PSCHED_IS_PASTPERFECT(cl->undertime) && 871 } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
869 PSCHED_TLESS(q->now, cl->undertime));
870 872
871 cl->delayed = 0; 873 cl->delayed = 0;
872 return cl; 874 return cl;
@@ -1001,8 +1003,8 @@ cbq_dequeue(struct Qdisc *sch)
1001 psched_time_t now; 1003 psched_time_t now;
1002 psched_tdiff_t incr; 1004 psched_tdiff_t incr;
1003 1005
1004 PSCHED_GET_TIME(now); 1006 now = psched_get_time();
1005 incr = PSCHED_TDIFF(now, q->now_rt); 1007 incr = now - q->now_rt;
1006 1008
1007 if (q->tx_class) { 1009 if (q->tx_class) {
1008 psched_tdiff_t incr2; 1010 psched_tdiff_t incr2;
@@ -1014,12 +1016,12 @@ cbq_dequeue(struct Qdisc *sch)
1014 cbq_time = max(real_time, work); 1016 cbq_time = max(real_time, work);
1015 */ 1017 */
1016 incr2 = L2T(&q->link, q->tx_len); 1018 incr2 = L2T(&q->link, q->tx_len);
1017 PSCHED_TADD(q->now, incr2); 1019 q->now += incr2;
1018 cbq_update(q); 1020 cbq_update(q);
1019 if ((incr -= incr2) < 0) 1021 if ((incr -= incr2) < 0)
1020 incr = 0; 1022 incr = 0;
1021 } 1023 }
1022 PSCHED_TADD(q->now, incr); 1024 q->now += incr;
1023 q->now_rt = now; 1025 q->now_rt = now;
1024 1026
1025 for (;;) { 1027 for (;;) {
@@ -1051,11 +1053,11 @@ cbq_dequeue(struct Qdisc *sch)
1051 */ 1053 */
1052 1054
1053 if (q->toplevel == TC_CBQ_MAXLEVEL && 1055 if (q->toplevel == TC_CBQ_MAXLEVEL &&
1054 PSCHED_IS_PASTPERFECT(q->link.undertime)) 1056 q->link.undertime == PSCHED_PASTPERFECT)
1055 break; 1057 break;
1056 1058
1057 q->toplevel = TC_CBQ_MAXLEVEL; 1059 q->toplevel = TC_CBQ_MAXLEVEL;
1058 PSCHED_SET_PASTPERFECT(q->link.undertime); 1060 q->link.undertime = PSCHED_PASTPERFECT;
1059 } 1061 }
1060 1062
1061 /* No packets in scheduler or nobody wants to give them to us :-( 1063 /* No packets in scheduler or nobody wants to give them to us :-(
@@ -1063,13 +1065,9 @@ cbq_dequeue(struct Qdisc *sch)
1063 1065
1064 if (sch->q.qlen) { 1066 if (sch->q.qlen) {
1065 sch->qstats.overlimits++; 1067 sch->qstats.overlimits++;
1066 if (q->wd_expires) { 1068 if (q->wd_expires)
1067 long delay = PSCHED_US2JIFFIE(q->wd_expires); 1069 qdisc_watchdog_schedule(&q->watchdog,
1068 if (delay <= 0) 1070 now + q->wd_expires);
1069 delay = 1;
1070 mod_timer(&q->wd_timer, jiffies + delay);
1071 sch->flags |= TCQ_F_THROTTLED;
1072 }
1073 } 1071 }
1074 return NULL; 1072 return NULL;
1075} 1073}
@@ -1276,10 +1274,10 @@ cbq_reset(struct Qdisc* sch)
1276 q->pmask = 0; 1274 q->pmask = 0;
1277 q->tx_class = NULL; 1275 q->tx_class = NULL;
1278 q->tx_borrowed = NULL; 1276 q->tx_borrowed = NULL;
1279 del_timer(&q->wd_timer); 1277 qdisc_watchdog_cancel(&q->watchdog);
1280 del_timer(&q->delay_timer); 1278 hrtimer_cancel(&q->delay_timer);
1281 q->toplevel = TC_CBQ_MAXLEVEL; 1279 q->toplevel = TC_CBQ_MAXLEVEL;
1282 PSCHED_GET_TIME(q->now); 1280 q->now = psched_get_time();
1283 q->now_rt = q->now; 1281 q->now_rt = q->now;
1284 1282
1285 for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++) 1283 for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
@@ -1290,7 +1288,7 @@ cbq_reset(struct Qdisc* sch)
1290 qdisc_reset(cl->q); 1288 qdisc_reset(cl->q);
1291 1289
1292 cl->next_alive = NULL; 1290 cl->next_alive = NULL;
1293 PSCHED_SET_PASTPERFECT(cl->undertime); 1291 cl->undertime = PSCHED_PASTPERFECT;
1294 cl->avgidle = cl->maxidle; 1292 cl->avgidle = cl->maxidle;
1295 cl->deficit = cl->quantum; 1293 cl->deficit = cl->quantum;
1296 cl->cpriority = cl->priority; 1294 cl->cpriority = cl->priority;
@@ -1379,7 +1377,7 @@ static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1379 default: 1377 default:
1380 return -EINVAL; 1378 return -EINVAL;
1381 } 1379 }
1382 cl->penalty = (ovl->penalty*HZ)/1000; 1380 cl->penalty = ovl->penalty;
1383 return 0; 1381 return 0;
1384} 1382}
1385 1383
@@ -1446,14 +1444,11 @@ static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
1446 q->link.minidle = -0x7FFFFFFF; 1444 q->link.minidle = -0x7FFFFFFF;
1447 q->link.stats_lock = &sch->dev->queue_lock; 1445 q->link.stats_lock = &sch->dev->queue_lock;
1448 1446
1449 init_timer(&q->wd_timer); 1447 qdisc_watchdog_init(&q->watchdog, sch);
1450 q->wd_timer.data = (unsigned long)sch; 1448 hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1451 q->wd_timer.function = cbq_watchdog;
1452 init_timer(&q->delay_timer);
1453 q->delay_timer.data = (unsigned long)sch;
1454 q->delay_timer.function = cbq_undelay; 1449 q->delay_timer.function = cbq_undelay;
1455 q->toplevel = TC_CBQ_MAXLEVEL; 1450 q->toplevel = TC_CBQ_MAXLEVEL;
1456 PSCHED_GET_TIME(q->now); 1451 q->now = psched_get_time();
1457 q->now_rt = q->now; 1452 q->now_rt = q->now;
1458 1453
1459 cbq_link_class(&q->link); 1454 cbq_link_class(&q->link);
@@ -1467,19 +1462,19 @@ static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
1467 1462
1468static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl) 1463static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1469{ 1464{
1470 unsigned char *b = skb->tail; 1465 unsigned char *b = skb_tail_pointer(skb);
1471 1466
1472 RTA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate); 1467 RTA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1473 return skb->len; 1468 return skb->len;
1474 1469
1475rtattr_failure: 1470rtattr_failure:
1476 skb_trim(skb, b - skb->data); 1471 nlmsg_trim(skb, b);
1477 return -1; 1472 return -1;
1478} 1473}
1479 1474
1480static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl) 1475static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1481{ 1476{
1482 unsigned char *b = skb->tail; 1477 unsigned char *b = skb_tail_pointer(skb);
1483 struct tc_cbq_lssopt opt; 1478 struct tc_cbq_lssopt opt;
1484 1479
1485 opt.flags = 0; 1480 opt.flags = 0;
@@ -1498,13 +1493,13 @@ static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1498 return skb->len; 1493 return skb->len;
1499 1494
1500rtattr_failure: 1495rtattr_failure:
1501 skb_trim(skb, b - skb->data); 1496 nlmsg_trim(skb, b);
1502 return -1; 1497 return -1;
1503} 1498}
1504 1499
1505static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl) 1500static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1506{ 1501{
1507 unsigned char *b = skb->tail; 1502 unsigned char *b = skb_tail_pointer(skb);
1508 struct tc_cbq_wrropt opt; 1503 struct tc_cbq_wrropt opt;
1509 1504
1510 opt.flags = 0; 1505 opt.flags = 0;
@@ -1516,30 +1511,30 @@ static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1516 return skb->len; 1511 return skb->len;
1517 1512
1518rtattr_failure: 1513rtattr_failure:
1519 skb_trim(skb, b - skb->data); 1514 nlmsg_trim(skb, b);
1520 return -1; 1515 return -1;
1521} 1516}
1522 1517
1523static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl) 1518static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1524{ 1519{
1525 unsigned char *b = skb->tail; 1520 unsigned char *b = skb_tail_pointer(skb);
1526 struct tc_cbq_ovl opt; 1521 struct tc_cbq_ovl opt;
1527 1522
1528 opt.strategy = cl->ovl_strategy; 1523 opt.strategy = cl->ovl_strategy;
1529 opt.priority2 = cl->priority2+1; 1524 opt.priority2 = cl->priority2+1;
1530 opt.pad = 0; 1525 opt.pad = 0;
1531 opt.penalty = (cl->penalty*1000)/HZ; 1526 opt.penalty = cl->penalty;
1532 RTA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt); 1527 RTA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1533 return skb->len; 1528 return skb->len;
1534 1529
1535rtattr_failure: 1530rtattr_failure:
1536 skb_trim(skb, b - skb->data); 1531 nlmsg_trim(skb, b);
1537 return -1; 1532 return -1;
1538} 1533}
1539 1534
1540static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl) 1535static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1541{ 1536{
1542 unsigned char *b = skb->tail; 1537 unsigned char *b = skb_tail_pointer(skb);
1543 struct tc_cbq_fopt opt; 1538 struct tc_cbq_fopt opt;
1544 1539
1545 if (cl->split || cl->defmap) { 1540 if (cl->split || cl->defmap) {
@@ -1551,14 +1546,14 @@ static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1551 return skb->len; 1546 return skb->len;
1552 1547
1553rtattr_failure: 1548rtattr_failure:
1554 skb_trim(skb, b - skb->data); 1549 nlmsg_trim(skb, b);
1555 return -1; 1550 return -1;
1556} 1551}
1557 1552
1558#ifdef CONFIG_NET_CLS_POLICE 1553#ifdef CONFIG_NET_CLS_POLICE
1559static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl) 1554static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1560{ 1555{
1561 unsigned char *b = skb->tail; 1556 unsigned char *b = skb_tail_pointer(skb);
1562 struct tc_cbq_police opt; 1557 struct tc_cbq_police opt;
1563 1558
1564 if (cl->police) { 1559 if (cl->police) {
@@ -1570,7 +1565,7 @@ static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1570 return skb->len; 1565 return skb->len;
1571 1566
1572rtattr_failure: 1567rtattr_failure:
1573 skb_trim(skb, b - skb->data); 1568 nlmsg_trim(skb, b);
1574 return -1; 1569 return -1;
1575} 1570}
1576#endif 1571#endif
@@ -1592,18 +1587,18 @@ static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1592static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb) 1587static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1593{ 1588{
1594 struct cbq_sched_data *q = qdisc_priv(sch); 1589 struct cbq_sched_data *q = qdisc_priv(sch);
1595 unsigned char *b = skb->tail; 1590 unsigned char *b = skb_tail_pointer(skb);
1596 struct rtattr *rta; 1591 struct rtattr *rta;
1597 1592
1598 rta = (struct rtattr*)b; 1593 rta = (struct rtattr*)b;
1599 RTA_PUT(skb, TCA_OPTIONS, 0, NULL); 1594 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1600 if (cbq_dump_attr(skb, &q->link) < 0) 1595 if (cbq_dump_attr(skb, &q->link) < 0)
1601 goto rtattr_failure; 1596 goto rtattr_failure;
1602 rta->rta_len = skb->tail - b; 1597 rta->rta_len = skb_tail_pointer(skb) - b;
1603 return skb->len; 1598 return skb->len;
1604 1599
1605rtattr_failure: 1600rtattr_failure:
1606 skb_trim(skb, b - skb->data); 1601 nlmsg_trim(skb, b);
1607 return -1; 1602 return -1;
1608} 1603}
1609 1604
@@ -1621,7 +1616,7 @@ cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1621 struct sk_buff *skb, struct tcmsg *tcm) 1616 struct sk_buff *skb, struct tcmsg *tcm)
1622{ 1617{
1623 struct cbq_class *cl = (struct cbq_class*)arg; 1618 struct cbq_class *cl = (struct cbq_class*)arg;
1624 unsigned char *b = skb->tail; 1619 unsigned char *b = skb_tail_pointer(skb);
1625 struct rtattr *rta; 1620 struct rtattr *rta;
1626 1621
1627 if (cl->tparent) 1622 if (cl->tparent)
@@ -1635,11 +1630,11 @@ cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1635 RTA_PUT(skb, TCA_OPTIONS, 0, NULL); 1630 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1636 if (cbq_dump_attr(skb, cl) < 0) 1631 if (cbq_dump_attr(skb, cl) < 0)
1637 goto rtattr_failure; 1632 goto rtattr_failure;
1638 rta->rta_len = skb->tail - b; 1633 rta->rta_len = skb_tail_pointer(skb) - b;
1639 return skb->len; 1634 return skb->len;
1640 1635
1641rtattr_failure: 1636rtattr_failure:
1642 skb_trim(skb, b - skb->data); 1637 nlmsg_trim(skb, b);
1643 return -1; 1638 return -1;
1644} 1639}
1645 1640
@@ -1654,8 +1649,8 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1654 cl->xstats.avgidle = cl->avgidle; 1649 cl->xstats.avgidle = cl->avgidle;
1655 cl->xstats.undertime = 0; 1650 cl->xstats.undertime = 0;
1656 1651
1657 if (!PSCHED_IS_PASTPERFECT(cl->undertime)) 1652 if (cl->undertime != PSCHED_PASTPERFECT)
1658 cl->xstats.undertime = PSCHED_TDIFF(cl->undertime, q->now); 1653 cl->xstats.undertime = cl->undertime - q->now;
1659 1654
1660 if (gnet_stats_copy_basic(d, &cl->bstats) < 0 || 1655 if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1661#ifdef CONFIG_NET_ESTIMATOR 1656#ifdef CONFIG_NET_ESTIMATOR
@@ -1722,23 +1717,13 @@ static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1722 return 0; 1717 return 0;
1723} 1718}
1724 1719
1725static void cbq_destroy_filters(struct cbq_class *cl)
1726{
1727 struct tcf_proto *tp;
1728
1729 while ((tp = cl->filter_list) != NULL) {
1730 cl->filter_list = tp->next;
1731 tcf_destroy(tp);
1732 }
1733}
1734
1735static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl) 1720static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1736{ 1721{
1737 struct cbq_sched_data *q = qdisc_priv(sch); 1722 struct cbq_sched_data *q = qdisc_priv(sch);
1738 1723
1739 BUG_TRAP(!cl->filters); 1724 BUG_TRAP(!cl->filters);
1740 1725
1741 cbq_destroy_filters(cl); 1726 tcf_destroy_chain(cl->filter_list);
1742 qdisc_destroy(cl->q); 1727 qdisc_destroy(cl->q);
1743 qdisc_put_rtab(cl->R_tab); 1728 qdisc_put_rtab(cl->R_tab);
1744#ifdef CONFIG_NET_ESTIMATOR 1729#ifdef CONFIG_NET_ESTIMATOR
@@ -1765,7 +1750,7 @@ cbq_destroy(struct Qdisc* sch)
1765 */ 1750 */
1766 for (h = 0; h < 16; h++) 1751 for (h = 0; h < 16; h++)
1767 for (cl = q->classes[h]; cl; cl = cl->next) 1752 for (cl = q->classes[h]; cl; cl = cl->next)
1768 cbq_destroy_filters(cl); 1753 tcf_destroy_chain(cl->filter_list);
1769 1754
1770 for (h = 0; h < 16; h++) { 1755 for (h = 0; h < 16; h++) {
1771 struct cbq_class *next; 1756 struct cbq_class *next;
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 96324cf4e6a9..3c6fd181263f 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -216,17 +216,17 @@ static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch)
216 /* FIXME: Safe with non-linear skbs? --RR */ 216 /* FIXME: Safe with non-linear skbs? --RR */
217 switch (skb->protocol) { 217 switch (skb->protocol) {
218 case __constant_htons(ETH_P_IP): 218 case __constant_htons(ETH_P_IP):
219 skb->tc_index = ipv4_get_dsfield(skb->nh.iph) 219 skb->tc_index = ipv4_get_dsfield(ip_hdr(skb))
220 & ~INET_ECN_MASK; 220 & ~INET_ECN_MASK;
221 break; 221 break;
222 case __constant_htons(ETH_P_IPV6): 222 case __constant_htons(ETH_P_IPV6):
223 skb->tc_index = ipv6_get_dsfield(skb->nh.ipv6h) 223 skb->tc_index = ipv6_get_dsfield(ipv6_hdr(skb))
224 & ~INET_ECN_MASK; 224 & ~INET_ECN_MASK;
225 break; 225 break;
226 default: 226 default:
227 skb->tc_index = 0; 227 skb->tc_index = 0;
228 break; 228 break;
229 }; 229 }
230 } 230 }
231 231
232 if (TC_H_MAJ(skb->priority) == sch->handle) 232 if (TC_H_MAJ(skb->priority) == sch->handle)
@@ -257,7 +257,7 @@ static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch)
257 if (p->default_index != NO_DEFAULT_INDEX) 257 if (p->default_index != NO_DEFAULT_INDEX)
258 skb->tc_index = p->default_index; 258 skb->tc_index = p->default_index;
259 break; 259 break;
260 }; 260 }
261 } 261 }
262 262
263 err = p->q->enqueue(skb,p->q); 263 err = p->q->enqueue(skb,p->q);
@@ -292,11 +292,11 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
292 292
293 switch (skb->protocol) { 293 switch (skb->protocol) {
294 case __constant_htons(ETH_P_IP): 294 case __constant_htons(ETH_P_IP):
295 ipv4_change_dsfield(skb->nh.iph, p->mask[index], 295 ipv4_change_dsfield(ip_hdr(skb), p->mask[index],
296 p->value[index]); 296 p->value[index]);
297 break; 297 break;
298 case __constant_htons(ETH_P_IPV6): 298 case __constant_htons(ETH_P_IPV6):
299 ipv6_change_dsfield(skb->nh.ipv6h, p->mask[index], 299 ipv6_change_dsfield(ipv6_hdr(skb), p->mask[index],
300 p->value[index]); 300 p->value[index]);
301 break; 301 break;
302 default: 302 default:
@@ -310,7 +310,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
310 "unsupported protocol %d\n", 310 "unsupported protocol %d\n",
311 ntohs(skb->protocol)); 311 ntohs(skb->protocol));
312 break; 312 break;
313 }; 313 }
314 314
315 return skb; 315 return skb;
316} 316}
@@ -412,16 +412,10 @@ static void dsmark_reset(struct Qdisc *sch)
412static void dsmark_destroy(struct Qdisc *sch) 412static void dsmark_destroy(struct Qdisc *sch)
413{ 413{
414 struct dsmark_qdisc_data *p = PRIV(sch); 414 struct dsmark_qdisc_data *p = PRIV(sch);
415 struct tcf_proto *tp;
416 415
417 DPRINTK("dsmark_destroy(sch %p,[qdisc %p])\n", sch, p); 416 DPRINTK("dsmark_destroy(sch %p,[qdisc %p])\n", sch, p);
418 417
419 while (p->filter_list) { 418 tcf_destroy_chain(p->filter_list);
420 tp = p->filter_list;
421 p->filter_list = tp->next;
422 tcf_destroy(tp);
423 }
424
425 qdisc_destroy(p->q); 419 qdisc_destroy(p->q);
426 kfree(p->mask); 420 kfree(p->mask);
427} 421}
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 52eb3439d7c6..3385ee592541 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -36,34 +36,27 @@
36 36
37/* Main transmission queue. */ 37/* Main transmission queue. */
38 38
39/* Main qdisc structure lock. 39/* Modifications to data participating in scheduling must be protected with
40 40 * dev->queue_lock spinlock.
41 However, modifications 41 *
42 to data, participating in scheduling must be additionally 42 * The idea is the following:
43 protected with dev->queue_lock spinlock. 43 * - enqueue, dequeue are serialized via top level device
44 44 * spinlock dev->queue_lock.
45 The idea is the following: 45 * - ingress filtering is serialized via top level device
46 - enqueue, dequeue are serialized via top level device 46 * spinlock dev->ingress_lock.
47 spinlock dev->queue_lock. 47 * - updates to tree and tree walking are only done under the rtnl mutex.
48 - tree walking is protected by read_lock(qdisc_tree_lock)
49 and this lock is used only in process context.
50 - updates to tree are made only under rtnl semaphore,
51 hence this lock may be made without local bh disabling.
52
53 qdisc_tree_lock must be grabbed BEFORE dev->queue_lock!
54 */ 48 */
55DEFINE_RWLOCK(qdisc_tree_lock);
56 49
57void qdisc_lock_tree(struct net_device *dev) 50void qdisc_lock_tree(struct net_device *dev)
58{ 51{
59 write_lock(&qdisc_tree_lock);
60 spin_lock_bh(&dev->queue_lock); 52 spin_lock_bh(&dev->queue_lock);
53 spin_lock(&dev->ingress_lock);
61} 54}
62 55
63void qdisc_unlock_tree(struct net_device *dev) 56void qdisc_unlock_tree(struct net_device *dev)
64{ 57{
58 spin_unlock(&dev->ingress_lock);
65 spin_unlock_bh(&dev->queue_lock); 59 spin_unlock_bh(&dev->queue_lock);
66 write_unlock(&qdisc_tree_lock);
67} 60}
68 61
69/* 62/*
@@ -442,7 +435,6 @@ struct Qdisc *qdisc_alloc(struct net_device *dev, struct Qdisc_ops *ops)
442 sch->dequeue = ops->dequeue; 435 sch->dequeue = ops->dequeue;
443 sch->dev = dev; 436 sch->dev = dev;
444 dev_hold(dev); 437 dev_hold(dev);
445 sch->stats_lock = &dev->queue_lock;
446 atomic_set(&sch->refcnt, 1); 438 atomic_set(&sch->refcnt, 1);
447 439
448 return sch; 440 return sch;
@@ -458,6 +450,7 @@ struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops,
458 sch = qdisc_alloc(dev, ops); 450 sch = qdisc_alloc(dev, ops);
459 if (IS_ERR(sch)) 451 if (IS_ERR(sch))
460 goto errout; 452 goto errout;
453 sch->stats_lock = &dev->queue_lock;
461 sch->parent = parentid; 454 sch->parent = parentid;
462 455
463 if (!ops->init || ops->init(sch, NULL) == 0) 456 if (!ops->init || ops->init(sch, NULL) == 0)
@@ -528,15 +521,11 @@ void dev_activate(struct net_device *dev)
528 printk(KERN_INFO "%s: activation failed\n", dev->name); 521 printk(KERN_INFO "%s: activation failed\n", dev->name);
529 return; 522 return;
530 } 523 }
531 write_lock(&qdisc_tree_lock);
532 list_add_tail(&qdisc->list, &dev->qdisc_list); 524 list_add_tail(&qdisc->list, &dev->qdisc_list);
533 write_unlock(&qdisc_tree_lock);
534 } else { 525 } else {
535 qdisc = &noqueue_qdisc; 526 qdisc = &noqueue_qdisc;
536 } 527 }
537 write_lock(&qdisc_tree_lock);
538 dev->qdisc_sleeping = qdisc; 528 dev->qdisc_sleeping = qdisc;
539 write_unlock(&qdisc_tree_lock);
540 } 529 }
541 530
542 if (!netif_carrier_ok(dev)) 531 if (!netif_carrier_ok(dev))
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 407c6fb1ba14..9d124c4ee3a7 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -59,13 +59,13 @@
59#include <linux/skbuff.h> 59#include <linux/skbuff.h>
60#include <linux/string.h> 60#include <linux/string.h>
61#include <linux/slab.h> 61#include <linux/slab.h>
62#include <linux/timer.h>
63#include <linux/list.h> 62#include <linux/list.h>
64#include <linux/rbtree.h> 63#include <linux/rbtree.h>
65#include <linux/init.h> 64#include <linux/init.h>
66#include <linux/netdevice.h> 65#include <linux/netdevice.h>
67#include <linux/rtnetlink.h> 66#include <linux/rtnetlink.h>
68#include <linux/pkt_sched.h> 67#include <linux/pkt_sched.h>
68#include <net/netlink.h>
69#include <net/pkt_sched.h> 69#include <net/pkt_sched.h>
70#include <net/pkt_cls.h> 70#include <net/pkt_cls.h>
71#include <asm/system.h> 71#include <asm/system.h>
@@ -192,23 +192,9 @@ struct hfsc_sched
192 struct list_head droplist; /* active leaf class list (for 192 struct list_head droplist; /* active leaf class list (for
193 dropping) */ 193 dropping) */
194 struct sk_buff_head requeue; /* requeued packet */ 194 struct sk_buff_head requeue; /* requeued packet */
195 struct timer_list wd_timer; /* watchdog timer */ 195 struct qdisc_watchdog watchdog; /* watchdog timer */
196}; 196};
197 197
198/*
199 * macros
200 */
201#ifdef CONFIG_NET_SCH_CLK_GETTIMEOFDAY
202#include <linux/time.h>
203#undef PSCHED_GET_TIME
204#define PSCHED_GET_TIME(stamp) \
205do { \
206 struct timeval tv; \
207 do_gettimeofday(&tv); \
208 (stamp) = 1ULL * USEC_PER_SEC * tv.tv_sec + tv.tv_usec; \
209} while (0)
210#endif
211
212#define HT_INFINITY 0xffffffffffffffffULL /* infinite time value */ 198#define HT_INFINITY 0xffffffffffffffffULL /* infinite time value */
213 199
214 200
@@ -394,28 +380,17 @@ cftree_update(struct hfsc_class *cl)
394 * ism: (psched_us/byte) << ISM_SHIFT 380 * ism: (psched_us/byte) << ISM_SHIFT
395 * dx: psched_us 381 * dx: psched_us
396 * 382 *
397 * Clock source resolution (CONFIG_NET_SCH_CLK_*) 383 * The clock source resolution with ktime is 1.024us.
398 * JIFFIES: for 48<=HZ<=1534 resolution is between 0.63us and 1.27us.
399 * CPU: resolution is between 0.5us and 1us.
400 * GETTIMEOFDAY: resolution is exactly 1us.
401 * 384 *
402 * sm and ism are scaled in order to keep effective digits. 385 * sm and ism are scaled in order to keep effective digits.
403 * SM_SHIFT and ISM_SHIFT are selected to keep at least 4 effective 386 * SM_SHIFT and ISM_SHIFT are selected to keep at least 4 effective
404 * digits in decimal using the following table. 387 * digits in decimal using the following table.
405 * 388 *
406 * Note: We can afford the additional accuracy (altq hfsc keeps at most
407 * 3 effective digits) thanks to the fact that linux clock is bounded
408 * much more tightly.
409 *
410 * bits/sec 100Kbps 1Mbps 10Mbps 100Mbps 1Gbps 389 * bits/sec 100Kbps 1Mbps 10Mbps 100Mbps 1Gbps
411 * ------------+------------------------------------------------------- 390 * ------------+-------------------------------------------------------
412 * bytes/0.5us 6.25e-3 62.5e-3 625e-3 6250e-e 62500e-3 391 * bytes/1.024us 12.8e-3 128e-3 1280e-3 12800e-3 128000e-3
413 * bytes/us 12.5e-3 125e-3 1250e-3 12500e-3 125000e-3
414 * bytes/1.27us 15.875e-3 158.75e-3 1587.5e-3 15875e-3 158750e-3
415 * 392 *
416 * 0.5us/byte 160 16 1.6 0.16 0.016 393 * 1.024us/byte 78.125 7.8125 0.78125 0.078125 0.0078125
417 * us/byte 80 8 0.8 0.08 0.008
418 * 1.27us/byte 63 6.3 0.63 0.063 0.0063
419 */ 394 */
420#define SM_SHIFT 20 395#define SM_SHIFT 20
421#define ISM_SHIFT 18 396#define ISM_SHIFT 18
@@ -460,8 +435,8 @@ m2sm(u32 m)
460 u64 sm; 435 u64 sm;
461 436
462 sm = ((u64)m << SM_SHIFT); 437 sm = ((u64)m << SM_SHIFT);
463 sm += PSCHED_JIFFIE2US(HZ) - 1; 438 sm += PSCHED_TICKS_PER_SEC - 1;
464 do_div(sm, PSCHED_JIFFIE2US(HZ)); 439 do_div(sm, PSCHED_TICKS_PER_SEC);
465 return sm; 440 return sm;
466} 441}
467 442
@@ -474,7 +449,7 @@ m2ism(u32 m)
474 if (m == 0) 449 if (m == 0)
475 ism = HT_INFINITY; 450 ism = HT_INFINITY;
476 else { 451 else {
477 ism = ((u64)PSCHED_JIFFIE2US(HZ) << ISM_SHIFT); 452 ism = ((u64)PSCHED_TICKS_PER_SEC << ISM_SHIFT);
478 ism += m - 1; 453 ism += m - 1;
479 do_div(ism, m); 454 do_div(ism, m);
480 } 455 }
@@ -487,7 +462,7 @@ d2dx(u32 d)
487{ 462{
488 u64 dx; 463 u64 dx;
489 464
490 dx = ((u64)d * PSCHED_JIFFIE2US(HZ)); 465 dx = ((u64)d * PSCHED_TICKS_PER_SEC);
491 dx += USEC_PER_SEC - 1; 466 dx += USEC_PER_SEC - 1;
492 do_div(dx, USEC_PER_SEC); 467 do_div(dx, USEC_PER_SEC);
493 return dx; 468 return dx;
@@ -499,7 +474,7 @@ sm2m(u64 sm)
499{ 474{
500 u64 m; 475 u64 m;
501 476
502 m = (sm * PSCHED_JIFFIE2US(HZ)) >> SM_SHIFT; 477 m = (sm * PSCHED_TICKS_PER_SEC) >> SM_SHIFT;
503 return (u32)m; 478 return (u32)m;
504} 479}
505 480
@@ -510,7 +485,7 @@ dx2d(u64 dx)
510 u64 d; 485 u64 d;
511 486
512 d = dx * USEC_PER_SEC; 487 d = dx * USEC_PER_SEC;
513 do_div(d, PSCHED_JIFFIE2US(HZ)); 488 do_div(d, PSCHED_TICKS_PER_SEC);
514 return (u32)d; 489 return (u32)d;
515} 490}
516 491
@@ -654,9 +629,7 @@ rtsc_min(struct runtime_sc *rtsc, struct internal_sc *isc, u64 x, u64 y)
654static void 629static void
655init_ed(struct hfsc_class *cl, unsigned int next_len) 630init_ed(struct hfsc_class *cl, unsigned int next_len)
656{ 631{
657 u64 cur_time; 632 u64 cur_time = psched_get_time();
658
659 PSCHED_GET_TIME(cur_time);
660 633
661 /* update the deadline curve */ 634 /* update the deadline curve */
662 rtsc_min(&cl->cl_deadline, &cl->cl_rsc, cur_time, cl->cl_cumul); 635 rtsc_min(&cl->cl_deadline, &cl->cl_rsc, cur_time, cl->cl_cumul);
@@ -779,7 +752,7 @@ init_vf(struct hfsc_class *cl, unsigned int len)
779 if (cl->cl_flags & HFSC_USC) { 752 if (cl->cl_flags & HFSC_USC) {
780 /* class has upper limit curve */ 753 /* class has upper limit curve */
781 if (cur_time == 0) 754 if (cur_time == 0)
782 PSCHED_GET_TIME(cur_time); 755 cur_time = psched_get_time();
783 756
784 /* update the ulimit curve */ 757 /* update the ulimit curve */
785 rtsc_min(&cl->cl_ulimit, &cl->cl_usc, cur_time, 758 rtsc_min(&cl->cl_ulimit, &cl->cl_usc, cur_time,
@@ -1063,7 +1036,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
1063 if (cl->cl_parent == NULL && parentid != TC_H_ROOT) 1036 if (cl->cl_parent == NULL && parentid != TC_H_ROOT)
1064 return -EINVAL; 1037 return -EINVAL;
1065 } 1038 }
1066 PSCHED_GET_TIME(cur_time); 1039 cur_time = psched_get_time();
1067 1040
1068 sch_tree_lock(sch); 1041 sch_tree_lock(sch);
1069 if (rsc != NULL) 1042 if (rsc != NULL)
@@ -1149,22 +1122,11 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
1149} 1122}
1150 1123
1151static void 1124static void
1152hfsc_destroy_filters(struct tcf_proto **fl)
1153{
1154 struct tcf_proto *tp;
1155
1156 while ((tp = *fl) != NULL) {
1157 *fl = tp->next;
1158 tcf_destroy(tp);
1159 }
1160}
1161
1162static void
1163hfsc_destroy_class(struct Qdisc *sch, struct hfsc_class *cl) 1125hfsc_destroy_class(struct Qdisc *sch, struct hfsc_class *cl)
1164{ 1126{
1165 struct hfsc_sched *q = qdisc_priv(sch); 1127 struct hfsc_sched *q = qdisc_priv(sch);
1166 1128
1167 hfsc_destroy_filters(&cl->filter_list); 1129 tcf_destroy_chain(cl->filter_list);
1168 qdisc_destroy(cl->qdisc); 1130 qdisc_destroy(cl->qdisc);
1169#ifdef CONFIG_NET_ESTIMATOR 1131#ifdef CONFIG_NET_ESTIMATOR
1170 gen_kill_estimator(&cl->bstats, &cl->rate_est); 1132 gen_kill_estimator(&cl->bstats, &cl->rate_est);
@@ -1389,7 +1351,7 @@ hfsc_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb,
1389 struct tcmsg *tcm) 1351 struct tcmsg *tcm)
1390{ 1352{
1391 struct hfsc_class *cl = (struct hfsc_class *)arg; 1353 struct hfsc_class *cl = (struct hfsc_class *)arg;
1392 unsigned char *b = skb->tail; 1354 unsigned char *b = skb_tail_pointer(skb);
1393 struct rtattr *rta = (struct rtattr *)b; 1355 struct rtattr *rta = (struct rtattr *)b;
1394 1356
1395 tcm->tcm_parent = cl->cl_parent ? cl->cl_parent->classid : TC_H_ROOT; 1357 tcm->tcm_parent = cl->cl_parent ? cl->cl_parent->classid : TC_H_ROOT;
@@ -1400,11 +1362,11 @@ hfsc_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb,
1400 RTA_PUT(skb, TCA_OPTIONS, 0, NULL); 1362 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1401 if (hfsc_dump_curves(skb, cl) < 0) 1363 if (hfsc_dump_curves(skb, cl) < 0)
1402 goto rtattr_failure; 1364 goto rtattr_failure;
1403 rta->rta_len = skb->tail - b; 1365 rta->rta_len = skb_tail_pointer(skb) - b;
1404 return skb->len; 1366 return skb->len;
1405 1367
1406 rtattr_failure: 1368 rtattr_failure:
1407 skb_trim(skb, b - skb->data); 1369 nlmsg_trim(skb, b);
1408 return -1; 1370 return -1;
1409} 1371}
1410 1372
@@ -1459,21 +1421,11 @@ hfsc_walk(struct Qdisc *sch, struct qdisc_walker *arg)
1459} 1421}
1460 1422
1461static void 1423static void
1462hfsc_watchdog(unsigned long arg) 1424hfsc_schedule_watchdog(struct Qdisc *sch)
1463{
1464 struct Qdisc *sch = (struct Qdisc *)arg;
1465
1466 sch->flags &= ~TCQ_F_THROTTLED;
1467 netif_schedule(sch->dev);
1468}
1469
1470static void
1471hfsc_schedule_watchdog(struct Qdisc *sch, u64 cur_time)
1472{ 1425{
1473 struct hfsc_sched *q = qdisc_priv(sch); 1426 struct hfsc_sched *q = qdisc_priv(sch);
1474 struct hfsc_class *cl; 1427 struct hfsc_class *cl;
1475 u64 next_time = 0; 1428 u64 next_time = 0;
1476 long delay;
1477 1429
1478 if ((cl = eltree_get_minel(q)) != NULL) 1430 if ((cl = eltree_get_minel(q)) != NULL)
1479 next_time = cl->cl_e; 1431 next_time = cl->cl_e;
@@ -1482,11 +1434,7 @@ hfsc_schedule_watchdog(struct Qdisc *sch, u64 cur_time)
1482 next_time = q->root.cl_cfmin; 1434 next_time = q->root.cl_cfmin;
1483 } 1435 }
1484 WARN_ON(next_time == 0); 1436 WARN_ON(next_time == 0);
1485 delay = next_time - cur_time; 1437 qdisc_watchdog_schedule(&q->watchdog, next_time);
1486 delay = PSCHED_US2JIFFIE(delay);
1487
1488 sch->flags |= TCQ_F_THROTTLED;
1489 mod_timer(&q->wd_timer, jiffies + delay);
1490} 1438}
1491 1439
1492static int 1440static int
@@ -1523,9 +1471,7 @@ hfsc_init_qdisc(struct Qdisc *sch, struct rtattr *opt)
1523 1471
1524 list_add(&q->root.hlist, &q->clhash[hfsc_hash(q->root.classid)]); 1472 list_add(&q->root.hlist, &q->clhash[hfsc_hash(q->root.classid)]);
1525 1473
1526 init_timer(&q->wd_timer); 1474 qdisc_watchdog_init(&q->watchdog, sch);
1527 q->wd_timer.function = hfsc_watchdog;
1528 q->wd_timer.data = (unsigned long)sch;
1529 1475
1530 return 0; 1476 return 0;
1531} 1477}
@@ -1595,8 +1541,7 @@ hfsc_reset_qdisc(struct Qdisc *sch)
1595 __skb_queue_purge(&q->requeue); 1541 __skb_queue_purge(&q->requeue);
1596 q->eligible = RB_ROOT; 1542 q->eligible = RB_ROOT;
1597 INIT_LIST_HEAD(&q->droplist); 1543 INIT_LIST_HEAD(&q->droplist);
1598 del_timer(&q->wd_timer); 1544 qdisc_watchdog_cancel(&q->watchdog);
1599 sch->flags &= ~TCQ_F_THROTTLED;
1600 sch->q.qlen = 0; 1545 sch->q.qlen = 0;
1601} 1546}
1602 1547
@@ -1612,14 +1557,14 @@ hfsc_destroy_qdisc(struct Qdisc *sch)
1612 hfsc_destroy_class(sch, cl); 1557 hfsc_destroy_class(sch, cl);
1613 } 1558 }
1614 __skb_queue_purge(&q->requeue); 1559 __skb_queue_purge(&q->requeue);
1615 del_timer(&q->wd_timer); 1560 qdisc_watchdog_cancel(&q->watchdog);
1616} 1561}
1617 1562
1618static int 1563static int
1619hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb) 1564hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb)
1620{ 1565{
1621 struct hfsc_sched *q = qdisc_priv(sch); 1566 struct hfsc_sched *q = qdisc_priv(sch);
1622 unsigned char *b = skb->tail; 1567 unsigned char *b = skb_tail_pointer(skb);
1623 struct tc_hfsc_qopt qopt; 1568 struct tc_hfsc_qopt qopt;
1624 1569
1625 qopt.defcls = q->defcls; 1570 qopt.defcls = q->defcls;
@@ -1627,7 +1572,7 @@ hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb)
1627 return skb->len; 1572 return skb->len;
1628 1573
1629 rtattr_failure: 1574 rtattr_failure:
1630 skb_trim(skb, b - skb->data); 1575 nlmsg_trim(skb, b);
1631 return -1; 1576 return -1;
1632} 1577}
1633 1578
@@ -1681,7 +1626,7 @@ hfsc_dequeue(struct Qdisc *sch)
1681 if ((skb = __skb_dequeue(&q->requeue))) 1626 if ((skb = __skb_dequeue(&q->requeue)))
1682 goto out; 1627 goto out;
1683 1628
1684 PSCHED_GET_TIME(cur_time); 1629 cur_time = psched_get_time();
1685 1630
1686 /* 1631 /*
1687 * if there are eligible classes, use real-time criteria. 1632 * if there are eligible classes, use real-time criteria.
@@ -1698,7 +1643,7 @@ hfsc_dequeue(struct Qdisc *sch)
1698 cl = vttree_get_minvt(&q->root, cur_time); 1643 cl = vttree_get_minvt(&q->root, cur_time);
1699 if (cl == NULL) { 1644 if (cl == NULL) {
1700 sch->qstats.overlimits++; 1645 sch->qstats.overlimits++;
1701 hfsc_schedule_watchdog(sch, cur_time); 1646 hfsc_schedule_watchdog(sch);
1702 return NULL; 1647 return NULL;
1703 } 1648 }
1704 } 1649 }
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 3c3294d01041..99bcec8dd04c 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -50,6 +50,7 @@
50#include <linux/skbuff.h> 50#include <linux/skbuff.h>
51#include <linux/list.h> 51#include <linux/list.h>
52#include <linux/compiler.h> 52#include <linux/compiler.h>
53#include <net/netlink.h>
53#include <net/sock.h> 54#include <net/sock.h>
54#include <net/pkt_sched.h> 55#include <net/pkt_sched.h>
55#include <linux/rbtree.h> 56#include <linux/rbtree.h>
@@ -128,7 +129,7 @@ struct htb_class {
128 } un; 129 } un;
129 struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */ 130 struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */
130 struct rb_node pq_node; /* node for event queue */ 131 struct rb_node pq_node; /* node for event queue */
131 unsigned long pq_key; /* the same type as jiffies global */ 132 psched_time_t pq_key;
132 133
133 int prio_activity; /* for which prios are we active */ 134 int prio_activity; /* for which prios are we active */
134 enum htb_cmode cmode; /* current mode of the class */ 135 enum htb_cmode cmode; /* current mode of the class */
@@ -179,10 +180,7 @@ struct htb_sched {
179 struct rb_root wait_pq[TC_HTB_MAXDEPTH]; 180 struct rb_root wait_pq[TC_HTB_MAXDEPTH];
180 181
181 /* time of nearest event per level (row) */ 182 /* time of nearest event per level (row) */
182 unsigned long near_ev_cache[TC_HTB_MAXDEPTH]; 183 psched_time_t near_ev_cache[TC_HTB_MAXDEPTH];
183
184 /* cached value of jiffies in dequeue */
185 unsigned long jiffies;
186 184
187 /* whether we hit non-work conserving class during this dequeue; we use */ 185 /* whether we hit non-work conserving class during this dequeue; we use */
188 int nwc_hit; /* this to disable mindelay complaint in dequeue */ 186 int nwc_hit; /* this to disable mindelay complaint in dequeue */
@@ -195,7 +193,7 @@ struct htb_sched {
195 193
196 int rate2quantum; /* quant = rate / rate2quantum */ 194 int rate2quantum; /* quant = rate / rate2quantum */
197 psched_time_t now; /* cached dequeue time */ 195 psched_time_t now; /* cached dequeue time */
198 struct timer_list timer; /* send delay timer */ 196 struct qdisc_watchdog watchdog;
199#ifdef HTB_RATECM 197#ifdef HTB_RATECM
200 struct timer_list rttim; /* rate computer timer */ 198 struct timer_list rttim; /* rate computer timer */
201 int recmp_bucket; /* which hash bucket to recompute next */ 199 int recmp_bucket; /* which hash bucket to recompute next */
@@ -342,19 +340,19 @@ static void htb_add_to_wait_tree(struct htb_sched *q,
342{ 340{
343 struct rb_node **p = &q->wait_pq[cl->level].rb_node, *parent = NULL; 341 struct rb_node **p = &q->wait_pq[cl->level].rb_node, *parent = NULL;
344 342
345 cl->pq_key = q->jiffies + PSCHED_US2JIFFIE(delay); 343 cl->pq_key = q->now + delay;
346 if (cl->pq_key == q->jiffies) 344 if (cl->pq_key == q->now)
347 cl->pq_key++; 345 cl->pq_key++;
348 346
349 /* update the nearest event cache */ 347 /* update the nearest event cache */
350 if (time_after(q->near_ev_cache[cl->level], cl->pq_key)) 348 if (q->near_ev_cache[cl->level] > cl->pq_key)
351 q->near_ev_cache[cl->level] = cl->pq_key; 349 q->near_ev_cache[cl->level] = cl->pq_key;
352 350
353 while (*p) { 351 while (*p) {
354 struct htb_class *c; 352 struct htb_class *c;
355 parent = *p; 353 parent = *p;
356 c = rb_entry(parent, struct htb_class, pq_node); 354 c = rb_entry(parent, struct htb_class, pq_node);
357 if (time_after_eq(cl->pq_key, c->pq_key)) 355 if (cl->pq_key >= c->pq_key)
358 p = &parent->rb_right; 356 p = &parent->rb_right;
359 else 357 else
360 p = &parent->rb_left; 358 p = &parent->rb_left;
@@ -679,14 +677,6 @@ static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch)
679 return NET_XMIT_SUCCESS; 677 return NET_XMIT_SUCCESS;
680} 678}
681 679
682static void htb_timer(unsigned long arg)
683{
684 struct Qdisc *sch = (struct Qdisc *)arg;
685 sch->flags &= ~TCQ_F_THROTTLED;
686 wmb();
687 netif_schedule(sch->dev);
688}
689
690#ifdef HTB_RATECM 680#ifdef HTB_RATECM
691#define RT_GEN(D,R) R+=D-(R/HTB_EWMAC);D=0 681#define RT_GEN(D,R) R+=D-(R/HTB_EWMAC);D=0
692static void htb_rate_timer(unsigned long arg) 682static void htb_rate_timer(unsigned long arg)
@@ -739,7 +729,7 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
739 cl->T = toks 729 cl->T = toks
740 730
741 while (cl) { 731 while (cl) {
742 diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32) cl->mbuffer); 732 diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer);
743 if (cl->level >= level) { 733 if (cl->level >= level) {
744 if (cl->level == level) 734 if (cl->level == level)
745 cl->xstats.lends++; 735 cl->xstats.lends++;
@@ -778,11 +768,11 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
778/** 768/**
779 * htb_do_events - make mode changes to classes at the level 769 * htb_do_events - make mode changes to classes at the level
780 * 770 *
781 * Scans event queue for pending events and applies them. Returns jiffies to 771 * Scans event queue for pending events and applies them. Returns time of
782 * next pending event (0 for no event in pq). 772 * next pending event (0 for no event in pq).
783 * Note: Aplied are events whose have cl->pq_key <= jiffies. 773 * Note: Applied are events whose have cl->pq_key <= q->now.
784 */ 774 */
785static long htb_do_events(struct htb_sched *q, int level) 775static psched_time_t htb_do_events(struct htb_sched *q, int level)
786{ 776{
787 int i; 777 int i;
788 778
@@ -795,18 +785,18 @@ static long htb_do_events(struct htb_sched *q, int level)
795 return 0; 785 return 0;
796 786
797 cl = rb_entry(p, struct htb_class, pq_node); 787 cl = rb_entry(p, struct htb_class, pq_node);
798 if (time_after(cl->pq_key, q->jiffies)) { 788 if (cl->pq_key > q->now)
799 return cl->pq_key - q->jiffies; 789 return cl->pq_key;
800 } 790
801 htb_safe_rb_erase(p, q->wait_pq + level); 791 htb_safe_rb_erase(p, q->wait_pq + level);
802 diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32) cl->mbuffer); 792 diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer);
803 htb_change_class_mode(q, cl, &diff); 793 htb_change_class_mode(q, cl, &diff);
804 if (cl->cmode != HTB_CAN_SEND) 794 if (cl->cmode != HTB_CAN_SEND)
805 htb_add_to_wait_tree(q, cl, diff); 795 htb_add_to_wait_tree(q, cl, diff);
806 } 796 }
807 if (net_ratelimit()) 797 if (net_ratelimit())
808 printk(KERN_WARNING "htb: too many events !\n"); 798 printk(KERN_WARNING "htb: too many events !\n");
809 return HZ / 10; 799 return q->now + PSCHED_TICKS_PER_SEC / 10;
810} 800}
811 801
812/* Returns class->node+prio from id-tree where classe's id is >= id. NULL 802/* Returns class->node+prio from id-tree where classe's id is >= id. NULL
@@ -958,30 +948,12 @@ next:
958 return skb; 948 return skb;
959} 949}
960 950
961static void htb_delay_by(struct Qdisc *sch, long delay)
962{
963 struct htb_sched *q = qdisc_priv(sch);
964 if (delay <= 0)
965 delay = 1;
966 if (unlikely(delay > 5 * HZ)) {
967 if (net_ratelimit())
968 printk(KERN_INFO "HTB delay %ld > 5sec\n", delay);
969 delay = 5 * HZ;
970 }
971 /* why don't use jiffies here ? because expires can be in past */
972 mod_timer(&q->timer, q->jiffies + delay);
973 sch->flags |= TCQ_F_THROTTLED;
974 sch->qstats.overlimits++;
975}
976
977static struct sk_buff *htb_dequeue(struct Qdisc *sch) 951static struct sk_buff *htb_dequeue(struct Qdisc *sch)
978{ 952{
979 struct sk_buff *skb = NULL; 953 struct sk_buff *skb = NULL;
980 struct htb_sched *q = qdisc_priv(sch); 954 struct htb_sched *q = qdisc_priv(sch);
981 int level; 955 int level;
982 long min_delay; 956 psched_time_t next_event;
983
984 q->jiffies = jiffies;
985 957
986 /* try to dequeue direct packets as high prio (!) to minimize cpu work */ 958 /* try to dequeue direct packets as high prio (!) to minimize cpu work */
987 skb = __skb_dequeue(&q->direct_queue); 959 skb = __skb_dequeue(&q->direct_queue);
@@ -993,23 +965,25 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
993 965
994 if (!sch->q.qlen) 966 if (!sch->q.qlen)
995 goto fin; 967 goto fin;
996 PSCHED_GET_TIME(q->now); 968 q->now = psched_get_time();
997 969
998 min_delay = LONG_MAX; 970 next_event = q->now + 5 * PSCHED_TICKS_PER_SEC;
999 q->nwc_hit = 0; 971 q->nwc_hit = 0;
1000 for (level = 0; level < TC_HTB_MAXDEPTH; level++) { 972 for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
1001 /* common case optimization - skip event handler quickly */ 973 /* common case optimization - skip event handler quickly */
1002 int m; 974 int m;
1003 long delay; 975 psched_time_t event;
1004 if (time_after_eq(q->jiffies, q->near_ev_cache[level])) { 976
1005 delay = htb_do_events(q, level); 977 if (q->now >= q->near_ev_cache[level]) {
1006 q->near_ev_cache[level] = 978 event = htb_do_events(q, level);
1007 q->jiffies + (delay ? delay : HZ); 979 q->near_ev_cache[level] = event ? event :
980 PSCHED_TICKS_PER_SEC;
1008 } else 981 } else
1009 delay = q->near_ev_cache[level] - q->jiffies; 982 event = q->near_ev_cache[level];
983
984 if (event && next_event > event)
985 next_event = event;
1010 986
1011 if (delay && min_delay > delay)
1012 min_delay = delay;
1013 m = ~q->row_mask[level]; 987 m = ~q->row_mask[level];
1014 while (m != (int)(-1)) { 988 while (m != (int)(-1)) {
1015 int prio = ffz(m); 989 int prio = ffz(m);
@@ -1022,7 +996,8 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
1022 } 996 }
1023 } 997 }
1024 } 998 }
1025 htb_delay_by(sch, min_delay > 5 * HZ ? 5 * HZ : min_delay); 999 sch->qstats.overlimits++;
1000 qdisc_watchdog_schedule(&q->watchdog, next_event);
1026fin: 1001fin:
1027 return skb; 1002 return skb;
1028} 1003}
@@ -1075,8 +1050,7 @@ static void htb_reset(struct Qdisc *sch)
1075 1050
1076 } 1051 }
1077 } 1052 }
1078 sch->flags &= ~TCQ_F_THROTTLED; 1053 qdisc_watchdog_cancel(&q->watchdog);
1079 del_timer(&q->timer);
1080 __skb_queue_purge(&q->direct_queue); 1054 __skb_queue_purge(&q->direct_queue);
1081 sch->q.qlen = 0; 1055 sch->q.qlen = 0;
1082 memset(q->row, 0, sizeof(q->row)); 1056 memset(q->row, 0, sizeof(q->row));
@@ -1113,14 +1087,12 @@ static int htb_init(struct Qdisc *sch, struct rtattr *opt)
1113 for (i = 0; i < TC_HTB_NUMPRIO; i++) 1087 for (i = 0; i < TC_HTB_NUMPRIO; i++)
1114 INIT_LIST_HEAD(q->drops + i); 1088 INIT_LIST_HEAD(q->drops + i);
1115 1089
1116 init_timer(&q->timer); 1090 qdisc_watchdog_init(&q->watchdog, sch);
1117 skb_queue_head_init(&q->direct_queue); 1091 skb_queue_head_init(&q->direct_queue);
1118 1092
1119 q->direct_qlen = sch->dev->tx_queue_len; 1093 q->direct_qlen = sch->dev->tx_queue_len;
1120 if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */ 1094 if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */
1121 q->direct_qlen = 2; 1095 q->direct_qlen = 2;
1122 q->timer.function = htb_timer;
1123 q->timer.data = (unsigned long)sch;
1124 1096
1125#ifdef HTB_RATECM 1097#ifdef HTB_RATECM
1126 init_timer(&q->rttim); 1098 init_timer(&q->rttim);
@@ -1139,7 +1111,7 @@ static int htb_init(struct Qdisc *sch, struct rtattr *opt)
1139static int htb_dump(struct Qdisc *sch, struct sk_buff *skb) 1111static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1140{ 1112{
1141 struct htb_sched *q = qdisc_priv(sch); 1113 struct htb_sched *q = qdisc_priv(sch);
1142 unsigned char *b = skb->tail; 1114 unsigned char *b = skb_tail_pointer(skb);
1143 struct rtattr *rta; 1115 struct rtattr *rta;
1144 struct tc_htb_glob gopt; 1116 struct tc_htb_glob gopt;
1145 spin_lock_bh(&sch->dev->queue_lock); 1117 spin_lock_bh(&sch->dev->queue_lock);
@@ -1152,12 +1124,12 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1152 rta = (struct rtattr *)b; 1124 rta = (struct rtattr *)b;
1153 RTA_PUT(skb, TCA_OPTIONS, 0, NULL); 1125 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1154 RTA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt); 1126 RTA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt);
1155 rta->rta_len = skb->tail - b; 1127 rta->rta_len = skb_tail_pointer(skb) - b;
1156 spin_unlock_bh(&sch->dev->queue_lock); 1128 spin_unlock_bh(&sch->dev->queue_lock);
1157 return skb->len; 1129 return skb->len;
1158rtattr_failure: 1130rtattr_failure:
1159 spin_unlock_bh(&sch->dev->queue_lock); 1131 spin_unlock_bh(&sch->dev->queue_lock);
1160 skb_trim(skb, skb->tail - skb->data); 1132 nlmsg_trim(skb, skb_tail_pointer(skb));
1161 return -1; 1133 return -1;
1162} 1134}
1163 1135
@@ -1165,7 +1137,7 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
1165 struct sk_buff *skb, struct tcmsg *tcm) 1137 struct sk_buff *skb, struct tcmsg *tcm)
1166{ 1138{
1167 struct htb_class *cl = (struct htb_class *)arg; 1139 struct htb_class *cl = (struct htb_class *)arg;
1168 unsigned char *b = skb->tail; 1140 unsigned char *b = skb_tail_pointer(skb);
1169 struct rtattr *rta; 1141 struct rtattr *rta;
1170 struct tc_htb_opt opt; 1142 struct tc_htb_opt opt;
1171 1143
@@ -1188,12 +1160,12 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
1188 opt.prio = cl->un.leaf.prio; 1160 opt.prio = cl->un.leaf.prio;
1189 opt.level = cl->level; 1161 opt.level = cl->level;
1190 RTA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt); 1162 RTA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt);
1191 rta->rta_len = skb->tail - b; 1163 rta->rta_len = skb_tail_pointer(skb) - b;
1192 spin_unlock_bh(&sch->dev->queue_lock); 1164 spin_unlock_bh(&sch->dev->queue_lock);
1193 return skb->len; 1165 return skb->len;
1194rtattr_failure: 1166rtattr_failure:
1195 spin_unlock_bh(&sch->dev->queue_lock); 1167 spin_unlock_bh(&sch->dev->queue_lock);
1196 skb_trim(skb, b - skb->data); 1168 nlmsg_trim(skb, b);
1197 return -1; 1169 return -1;
1198} 1170}
1199 1171
@@ -1264,16 +1236,6 @@ static unsigned long htb_get(struct Qdisc *sch, u32 classid)
1264 return (unsigned long)cl; 1236 return (unsigned long)cl;
1265} 1237}
1266 1238
1267static void htb_destroy_filters(struct tcf_proto **fl)
1268{
1269 struct tcf_proto *tp;
1270
1271 while ((tp = *fl) != NULL) {
1272 *fl = tp->next;
1273 tcf_destroy(tp);
1274 }
1275}
1276
1277static inline int htb_parent_last_child(struct htb_class *cl) 1239static inline int htb_parent_last_child(struct htb_class *cl)
1278{ 1240{
1279 if (!cl->parent) 1241 if (!cl->parent)
@@ -1302,7 +1264,7 @@ static void htb_parent_to_leaf(struct htb_class *cl, struct Qdisc *new_q)
1302 parent->un.leaf.prio = parent->prio; 1264 parent->un.leaf.prio = parent->prio;
1303 parent->tokens = parent->buffer; 1265 parent->tokens = parent->buffer;
1304 parent->ctokens = parent->cbuffer; 1266 parent->ctokens = parent->cbuffer;
1305 PSCHED_GET_TIME(parent->t_c); 1267 parent->t_c = psched_get_time();
1306 parent->cmode = HTB_CAN_SEND; 1268 parent->cmode = HTB_CAN_SEND;
1307} 1269}
1308 1270
@@ -1317,7 +1279,7 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
1317 qdisc_put_rtab(cl->rate); 1279 qdisc_put_rtab(cl->rate);
1318 qdisc_put_rtab(cl->ceil); 1280 qdisc_put_rtab(cl->ceil);
1319 1281
1320 htb_destroy_filters(&cl->filter_list); 1282 tcf_destroy_chain(cl->filter_list);
1321 1283
1322 while (!list_empty(&cl->children)) 1284 while (!list_empty(&cl->children))
1323 htb_destroy_class(sch, list_entry(cl->children.next, 1285 htb_destroy_class(sch, list_entry(cl->children.next,
@@ -1341,7 +1303,7 @@ static void htb_destroy(struct Qdisc *sch)
1341{ 1303{
1342 struct htb_sched *q = qdisc_priv(sch); 1304 struct htb_sched *q = qdisc_priv(sch);
1343 1305
1344 del_timer_sync(&q->timer); 1306 qdisc_watchdog_cancel(&q->watchdog);
1345#ifdef HTB_RATECM 1307#ifdef HTB_RATECM
1346 del_timer_sync(&q->rttim); 1308 del_timer_sync(&q->rttim);
1347#endif 1309#endif
@@ -1349,7 +1311,7 @@ static void htb_destroy(struct Qdisc *sch)
1349 and surprisingly it worked in 2.4. But it must precede it 1311 and surprisingly it worked in 2.4. But it must precede it
1350 because filter need its target class alive to be able to call 1312 because filter need its target class alive to be able to call
1351 unbind_filter on it (without Oops). */ 1313 unbind_filter on it (without Oops). */
1352 htb_destroy_filters(&q->filter_list); 1314 tcf_destroy_chain(q->filter_list);
1353 1315
1354 while (!list_empty(&q->root)) 1316 while (!list_empty(&q->root))
1355 htb_destroy_class(sch, list_entry(q->root.next, 1317 htb_destroy_class(sch, list_entry(q->root.next,
@@ -1498,8 +1460,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
1498 /* set class to be in HTB_CAN_SEND state */ 1460 /* set class to be in HTB_CAN_SEND state */
1499 cl->tokens = hopt->buffer; 1461 cl->tokens = hopt->buffer;
1500 cl->ctokens = hopt->cbuffer; 1462 cl->ctokens = hopt->cbuffer;
1501 cl->mbuffer = PSCHED_JIFFIE2US(HZ * 60); /* 1min */ 1463 cl->mbuffer = 60 * PSCHED_TICKS_PER_SEC; /* 1min */
1502 PSCHED_GET_TIME(cl->t_c); 1464 cl->t_c = psched_get_time();
1503 cl->cmode = HTB_CAN_SEND; 1465 cl->cmode = HTB_CAN_SEND;
1504 1466
1505 /* attach to the hash list and parent's family */ 1467 /* attach to the hash list and parent's family */
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index cfe070ee6ee3..f8b9f1cdf738 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -16,6 +16,7 @@
16#include <linux/netfilter_ipv6.h> 16#include <linux/netfilter_ipv6.h>
17#include <linux/netfilter.h> 17#include <linux/netfilter.h>
18#include <linux/smp.h> 18#include <linux/smp.h>
19#include <net/netlink.h>
19#include <net/pkt_sched.h> 20#include <net/pkt_sched.h>
20#include <asm/byteorder.h> 21#include <asm/byteorder.h>
21#include <asm/uaccess.h> 22#include <asm/uaccess.h>
@@ -169,7 +170,7 @@ static int ingress_enqueue(struct sk_buff *skb,struct Qdisc *sch)
169 skb->tc_index = TC_H_MIN(res.classid); 170 skb->tc_index = TC_H_MIN(res.classid);
170 result = TC_ACT_OK; 171 result = TC_ACT_OK;
171 break; 172 break;
172 }; 173 }
173/* backward compat */ 174/* backward compat */
174#else 175#else
175#ifdef CONFIG_NET_CLS_POLICE 176#ifdef CONFIG_NET_CLS_POLICE
@@ -186,7 +187,7 @@ static int ingress_enqueue(struct sk_buff *skb,struct Qdisc *sch)
186 sch->bstats.bytes += skb->len; 187 sch->bstats.bytes += skb->len;
187 result = NF_ACCEPT; 188 result = NF_ACCEPT;
188 break; 189 break;
189 }; 190 }
190 191
191#else 192#else
192 D2PRINTK("Overriding result to ACCEPT\n"); 193 D2PRINTK("Overriding result to ACCEPT\n");
@@ -247,16 +248,11 @@ ing_hook(unsigned int hook, struct sk_buff **pskb,
247 skb->dev ? (*pskb)->dev->name : "(no dev)", 248 skb->dev ? (*pskb)->dev->name : "(no dev)",
248 skb->len); 249 skb->len);
249 250
250/*
251revisit later: Use a private since lock dev->queue_lock is also
252used on the egress (might slow things for an iota)
253*/
254
255 if (dev->qdisc_ingress) { 251 if (dev->qdisc_ingress) {
256 spin_lock(&dev->queue_lock); 252 spin_lock(&dev->ingress_lock);
257 if ((q = dev->qdisc_ingress) != NULL) 253 if ((q = dev->qdisc_ingress) != NULL)
258 fwres = q->enqueue(skb, q); 254 fwres = q->enqueue(skb, q);
259 spin_unlock(&dev->queue_lock); 255 spin_unlock(&dev->ingress_lock);
260 } 256 }
261 257
262 return fwres; 258 return fwres;
@@ -345,14 +341,9 @@ static void ingress_reset(struct Qdisc *sch)
345static void ingress_destroy(struct Qdisc *sch) 341static void ingress_destroy(struct Qdisc *sch)
346{ 342{
347 struct ingress_qdisc_data *p = PRIV(sch); 343 struct ingress_qdisc_data *p = PRIV(sch);
348 struct tcf_proto *tp;
349 344
350 DPRINTK("ingress_destroy(sch %p,[qdisc %p])\n", sch, p); 345 DPRINTK("ingress_destroy(sch %p,[qdisc %p])\n", sch, p);
351 while (p->filter_list) { 346 tcf_destroy_chain(p->filter_list);
352 tp = p->filter_list;
353 p->filter_list = tp->next;
354 tcf_destroy(tp);
355 }
356#if 0 347#if 0
357/* for future use */ 348/* for future use */
358 qdisc_destroy(p->q); 349 qdisc_destroy(p->q);
@@ -362,16 +353,16 @@ static void ingress_destroy(struct Qdisc *sch)
362 353
363static int ingress_dump(struct Qdisc *sch, struct sk_buff *skb) 354static int ingress_dump(struct Qdisc *sch, struct sk_buff *skb)
364{ 355{
365 unsigned char *b = skb->tail; 356 unsigned char *b = skb_tail_pointer(skb);
366 struct rtattr *rta; 357 struct rtattr *rta;
367 358
368 rta = (struct rtattr *) b; 359 rta = (struct rtattr *) b;
369 RTA_PUT(skb, TCA_OPTIONS, 0, NULL); 360 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
370 rta->rta_len = skb->tail - b; 361 rta->rta_len = skb_tail_pointer(skb) - b;
371 return skb->len; 362 return skb->len;
372 363
373rtattr_failure: 364rtattr_failure:
374 skb_trim(skb, b - skb->data); 365 nlmsg_trim(skb, b);
375 return -1; 366 return -1;
376} 367}
377 368
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 1ccbfb55b0b8..5d9d8bc9cc3a 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -22,6 +22,7 @@
22#include <linux/skbuff.h> 22#include <linux/skbuff.h>
23#include <linux/rtnetlink.h> 23#include <linux/rtnetlink.h>
24 24
25#include <net/netlink.h>
25#include <net/pkt_sched.h> 26#include <net/pkt_sched.h>
26 27
27#define VERSION "1.2" 28#define VERSION "1.2"
@@ -54,21 +55,22 @@
54 55
55struct netem_sched_data { 56struct netem_sched_data {
56 struct Qdisc *qdisc; 57 struct Qdisc *qdisc;
57 struct timer_list timer; 58 struct qdisc_watchdog watchdog;
59
60 psched_tdiff_t latency;
61 psched_tdiff_t jitter;
58 62
59 u32 latency;
60 u32 loss; 63 u32 loss;
61 u32 limit; 64 u32 limit;
62 u32 counter; 65 u32 counter;
63 u32 gap; 66 u32 gap;
64 u32 jitter;
65 u32 duplicate; 67 u32 duplicate;
66 u32 reorder; 68 u32 reorder;
67 u32 corrupt; 69 u32 corrupt;
68 70
69 struct crndstate { 71 struct crndstate {
70 unsigned long last; 72 u32 last;
71 unsigned long rho; 73 u32 rho;
72 } delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor; 74 } delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor;
73 75
74 struct disttable { 76 struct disttable {
@@ -95,12 +97,12 @@ static void init_crandom(struct crndstate *state, unsigned long rho)
95 * Next number depends on last value. 97 * Next number depends on last value.
96 * rho is scaled to avoid floating point. 98 * rho is scaled to avoid floating point.
97 */ 99 */
98static unsigned long get_crandom(struct crndstate *state) 100static u32 get_crandom(struct crndstate *state)
99{ 101{
100 u64 value, rho; 102 u64 value, rho;
101 unsigned long answer; 103 unsigned long answer;
102 104
103 if (state->rho == 0) /* no correllation */ 105 if (state->rho == 0) /* no correlation */
104 return net_random(); 106 return net_random();
105 107
106 value = net_random(); 108 value = net_random();
@@ -114,11 +116,13 @@ static unsigned long get_crandom(struct crndstate *state)
114 * std deviation sigma. Uses table lookup to approximate the desired 116 * std deviation sigma. Uses table lookup to approximate the desired
115 * distribution, and a uniformly-distributed pseudo-random source. 117 * distribution, and a uniformly-distributed pseudo-random source.
116 */ 118 */
117static long tabledist(unsigned long mu, long sigma, 119static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma,
118 struct crndstate *state, const struct disttable *dist) 120 struct crndstate *state,
121 const struct disttable *dist)
119{ 122{
120 long t, x; 123 psched_tdiff_t x;
121 unsigned long rnd; 124 long t;
125 u32 rnd;
122 126
123 if (sigma == 0) 127 if (sigma == 0)
124 return mu; 128 return mu;
@@ -213,8 +217,8 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
213 delay = tabledist(q->latency, q->jitter, 217 delay = tabledist(q->latency, q->jitter,
214 &q->delay_cor, q->delay_dist); 218 &q->delay_cor, q->delay_dist);
215 219
216 PSCHED_GET_TIME(now); 220 now = psched_get_time();
217 PSCHED_TADD2(now, delay, cb->time_to_send); 221 cb->time_to_send = now + delay;
218 ++q->counter; 222 ++q->counter;
219 ret = q->qdisc->enqueue(skb, q->qdisc); 223 ret = q->qdisc->enqueue(skb, q->qdisc);
220 } else { 224 } else {
@@ -222,7 +226,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
222 * Do re-ordering by putting one out of N packets at the front 226 * Do re-ordering by putting one out of N packets at the front
223 * of the queue. 227 * of the queue.
224 */ 228 */
225 PSCHED_GET_TIME(cb->time_to_send); 229 cb->time_to_send = psched_get_time();
226 q->counter = 0; 230 q->counter = 0;
227 ret = q->qdisc->ops->requeue(skb, q->qdisc); 231 ret = q->qdisc->ops->requeue(skb, q->qdisc);
228 } 232 }
@@ -269,55 +273,43 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
269 struct netem_sched_data *q = qdisc_priv(sch); 273 struct netem_sched_data *q = qdisc_priv(sch);
270 struct sk_buff *skb; 274 struct sk_buff *skb;
271 275
276 smp_mb();
277 if (sch->flags & TCQ_F_THROTTLED)
278 return NULL;
279
272 skb = q->qdisc->dequeue(q->qdisc); 280 skb = q->qdisc->dequeue(q->qdisc);
273 if (skb) { 281 if (skb) {
274 const struct netem_skb_cb *cb 282 const struct netem_skb_cb *cb
275 = (const struct netem_skb_cb *)skb->cb; 283 = (const struct netem_skb_cb *)skb->cb;
276 psched_time_t now; 284 psched_time_t now = psched_get_time();
277 285
278 /* if more time remaining? */ 286 /* if more time remaining? */
279 PSCHED_GET_TIME(now); 287 if (cb->time_to_send <= now) {
280
281 if (PSCHED_TLESS(cb->time_to_send, now)) {
282 pr_debug("netem_dequeue: return skb=%p\n", skb); 288 pr_debug("netem_dequeue: return skb=%p\n", skb);
283 sch->q.qlen--; 289 sch->q.qlen--;
284 sch->flags &= ~TCQ_F_THROTTLED;
285 return skb; 290 return skb;
286 } else { 291 }
287 psched_tdiff_t delay = PSCHED_TDIFF(cb->time_to_send, now);
288
289 if (q->qdisc->ops->requeue(skb, q->qdisc) != NET_XMIT_SUCCESS) {
290 qdisc_tree_decrease_qlen(q->qdisc, 1);
291 sch->qstats.drops++;
292 printk(KERN_ERR "netem: queue discpline %s could not requeue\n",
293 q->qdisc->ops->id);
294 }
295 292
296 mod_timer(&q->timer, jiffies + PSCHED_US2JIFFIE(delay)); 293 if (unlikely(q->qdisc->ops->requeue(skb, q->qdisc) != NET_XMIT_SUCCESS)) {
297 sch->flags |= TCQ_F_THROTTLED; 294 qdisc_tree_decrease_qlen(q->qdisc, 1);
295 sch->qstats.drops++;
296 printk(KERN_ERR "netem: %s could not requeue\n",
297 q->qdisc->ops->id);
298 } 298 }
299
300 qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send);
299 } 301 }
300 302
301 return NULL; 303 return NULL;
302} 304}
303 305
304static void netem_watchdog(unsigned long arg)
305{
306 struct Qdisc *sch = (struct Qdisc *)arg;
307
308 pr_debug("netem_watchdog qlen=%d\n", sch->q.qlen);
309 sch->flags &= ~TCQ_F_THROTTLED;
310 netif_schedule(sch->dev);
311}
312
313static void netem_reset(struct Qdisc *sch) 306static void netem_reset(struct Qdisc *sch)
314{ 307{
315 struct netem_sched_data *q = qdisc_priv(sch); 308 struct netem_sched_data *q = qdisc_priv(sch);
316 309
317 qdisc_reset(q->qdisc); 310 qdisc_reset(q->qdisc);
318 sch->q.qlen = 0; 311 sch->q.qlen = 0;
319 sch->flags &= ~TCQ_F_THROTTLED; 312 qdisc_watchdog_cancel(&q->watchdog);
320 del_timer_sync(&q->timer);
321} 313}
322 314
323/* Pass size change message down to embedded FIFO */ 315/* Pass size change message down to embedded FIFO */
@@ -438,10 +430,11 @@ static int netem_change(struct Qdisc *sch, struct rtattr *opt)
438 q->loss = qopt->loss; 430 q->loss = qopt->loss;
439 q->duplicate = qopt->duplicate; 431 q->duplicate = qopt->duplicate;
440 432
441 /* for compatiablity with earlier versions. 433 /* for compatibility with earlier versions.
442 * if gap is set, need to assume 100% probablity 434 * if gap is set, need to assume 100% probability
443 */ 435 */
444 q->reorder = ~0; 436 if (q->gap)
437 q->reorder = ~0;
445 438
446 /* Handle nested options after initial queue options. 439 /* Handle nested options after initial queue options.
447 * Should have put all options in nested format but too late now. 440 * Should have put all options in nested format but too late now.
@@ -487,22 +480,28 @@ static int netem_change(struct Qdisc *sch, struct rtattr *opt)
487 */ 480 */
488struct fifo_sched_data { 481struct fifo_sched_data {
489 u32 limit; 482 u32 limit;
483 psched_time_t oldest;
490}; 484};
491 485
492static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) 486static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
493{ 487{
494 struct fifo_sched_data *q = qdisc_priv(sch); 488 struct fifo_sched_data *q = qdisc_priv(sch);
495 struct sk_buff_head *list = &sch->q; 489 struct sk_buff_head *list = &sch->q;
496 const struct netem_skb_cb *ncb 490 psched_time_t tnext = ((struct netem_skb_cb *)nskb->cb)->time_to_send;
497 = (const struct netem_skb_cb *)nskb->cb;
498 struct sk_buff *skb; 491 struct sk_buff *skb;
499 492
500 if (likely(skb_queue_len(list) < q->limit)) { 493 if (likely(skb_queue_len(list) < q->limit)) {
494 /* Optimize for add at tail */
495 if (likely(skb_queue_empty(list) || tnext >= q->oldest)) {
496 q->oldest = tnext;
497 return qdisc_enqueue_tail(nskb, sch);
498 }
499
501 skb_queue_reverse_walk(list, skb) { 500 skb_queue_reverse_walk(list, skb) {
502 const struct netem_skb_cb *cb 501 const struct netem_skb_cb *cb
503 = (const struct netem_skb_cb *)skb->cb; 502 = (const struct netem_skb_cb *)skb->cb;
504 503
505 if (!PSCHED_TLESS(ncb->time_to_send, cb->time_to_send)) 504 if (tnext >= cb->time_to_send)
506 break; 505 break;
507 } 506 }
508 507
@@ -515,7 +514,7 @@ static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
515 return NET_XMIT_SUCCESS; 514 return NET_XMIT_SUCCESS;
516 } 515 }
517 516
518 return qdisc_drop(nskb, sch); 517 return qdisc_reshape_fail(nskb, sch);
519} 518}
520 519
521static int tfifo_init(struct Qdisc *sch, struct rtattr *opt) 520static int tfifo_init(struct Qdisc *sch, struct rtattr *opt)
@@ -531,6 +530,7 @@ static int tfifo_init(struct Qdisc *sch, struct rtattr *opt)
531 } else 530 } else
532 q->limit = max_t(u32, sch->dev->tx_queue_len, 1); 531 q->limit = max_t(u32, sch->dev->tx_queue_len, 1);
533 532
533 q->oldest = PSCHED_PASTPERFECT;
534 return 0; 534 return 0;
535} 535}
536 536
@@ -567,9 +567,7 @@ static int netem_init(struct Qdisc *sch, struct rtattr *opt)
567 if (!opt) 567 if (!opt)
568 return -EINVAL; 568 return -EINVAL;
569 569
570 init_timer(&q->timer); 570 qdisc_watchdog_init(&q->watchdog, sch);
571 q->timer.function = netem_watchdog;
572 q->timer.data = (unsigned long) sch;
573 571
574 q->qdisc = qdisc_create_dflt(sch->dev, &tfifo_qdisc_ops, 572 q->qdisc = qdisc_create_dflt(sch->dev, &tfifo_qdisc_ops,
575 TC_H_MAKE(sch->handle, 1)); 573 TC_H_MAKE(sch->handle, 1));
@@ -590,7 +588,7 @@ static void netem_destroy(struct Qdisc *sch)
590{ 588{
591 struct netem_sched_data *q = qdisc_priv(sch); 589 struct netem_sched_data *q = qdisc_priv(sch);
592 590
593 del_timer_sync(&q->timer); 591 qdisc_watchdog_cancel(&q->watchdog);
594 qdisc_destroy(q->qdisc); 592 qdisc_destroy(q->qdisc);
595 kfree(q->delay_dist); 593 kfree(q->delay_dist);
596} 594}
@@ -598,7 +596,7 @@ static void netem_destroy(struct Qdisc *sch)
598static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) 596static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
599{ 597{
600 const struct netem_sched_data *q = qdisc_priv(sch); 598 const struct netem_sched_data *q = qdisc_priv(sch);
601 unsigned char *b = skb->tail; 599 unsigned char *b = skb_tail_pointer(skb);
602 struct rtattr *rta = (struct rtattr *) b; 600 struct rtattr *rta = (struct rtattr *) b;
603 struct tc_netem_qopt qopt; 601 struct tc_netem_qopt qopt;
604 struct tc_netem_corr cor; 602 struct tc_netem_corr cor;
@@ -626,12 +624,12 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
626 corrupt.correlation = q->corrupt_cor.rho; 624 corrupt.correlation = q->corrupt_cor.rho;
627 RTA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt); 625 RTA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt);
628 626
629 rta->rta_len = skb->tail - b; 627 rta->rta_len = skb_tail_pointer(skb) - b;
630 628
631 return skb->len; 629 return skb->len;
632 630
633rtattr_failure: 631rtattr_failure:
634 skb_trim(skb, b - skb->data); 632 nlmsg_trim(skb, b);
635 return -1; 633 return -1;
636} 634}
637 635
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index de889f23f22a..269a6e17c6c4 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -32,6 +32,7 @@
32#include <net/ip.h> 32#include <net/ip.h>
33#include <net/route.h> 33#include <net/route.h>
34#include <linux/skbuff.h> 34#include <linux/skbuff.h>
35#include <net/netlink.h>
35#include <net/sock.h> 36#include <net/sock.h>
36#include <net/pkt_sched.h> 37#include <net/pkt_sched.h>
37 38
@@ -61,7 +62,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
61 *qerr = NET_XMIT_SUCCESS; 62 *qerr = NET_XMIT_SUCCESS;
62 case TC_ACT_SHOT: 63 case TC_ACT_SHOT:
63 return NULL; 64 return NULL;
64 }; 65 }
65 66
66 if (!q->filter_list ) { 67 if (!q->filter_list ) {
67#else 68#else
@@ -188,13 +189,8 @@ prio_destroy(struct Qdisc* sch)
188{ 189{
189 int prio; 190 int prio;
190 struct prio_sched_data *q = qdisc_priv(sch); 191 struct prio_sched_data *q = qdisc_priv(sch);
191 struct tcf_proto *tp;
192
193 while ((tp = q->filter_list) != NULL) {
194 q->filter_list = tp->next;
195 tcf_destroy(tp);
196 }
197 192
193 tcf_destroy_chain(q->filter_list);
198 for (prio=0; prio<q->bands; prio++) 194 for (prio=0; prio<q->bands; prio++)
199 qdisc_destroy(q->queues[prio]); 195 qdisc_destroy(q->queues[prio]);
200} 196}
@@ -271,7 +267,7 @@ static int prio_init(struct Qdisc *sch, struct rtattr *opt)
271static int prio_dump(struct Qdisc *sch, struct sk_buff *skb) 267static int prio_dump(struct Qdisc *sch, struct sk_buff *skb)
272{ 268{
273 struct prio_sched_data *q = qdisc_priv(sch); 269 struct prio_sched_data *q = qdisc_priv(sch);
274 unsigned char *b = skb->tail; 270 unsigned char *b = skb_tail_pointer(skb);
275 struct tc_prio_qopt opt; 271 struct tc_prio_qopt opt;
276 272
277 opt.bands = q->bands; 273 opt.bands = q->bands;
@@ -280,7 +276,7 @@ static int prio_dump(struct Qdisc *sch, struct sk_buff *skb)
280 return skb->len; 276 return skb->len;
281 277
282rtattr_failure: 278rtattr_failure:
283 skb_trim(skb, b - skb->data); 279 nlmsg_trim(skb, b);
284 return -1; 280 return -1;
285} 281}
286 282
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 66f32051a99b..96dfdf78d32c 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -30,6 +30,7 @@
30#include <linux/notifier.h> 30#include <linux/notifier.h>
31#include <linux/init.h> 31#include <linux/init.h>
32#include <net/ip.h> 32#include <net/ip.h>
33#include <net/netlink.h>
33#include <linux/ipv6.h> 34#include <linux/ipv6.h>
34#include <net/route.h> 35#include <net/route.h>
35#include <linux/skbuff.h> 36#include <linux/skbuff.h>
@@ -137,7 +138,7 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
137 switch (skb->protocol) { 138 switch (skb->protocol) {
138 case __constant_htons(ETH_P_IP): 139 case __constant_htons(ETH_P_IP):
139 { 140 {
140 struct iphdr *iph = skb->nh.iph; 141 const struct iphdr *iph = ip_hdr(skb);
141 h = iph->daddr; 142 h = iph->daddr;
142 h2 = iph->saddr^iph->protocol; 143 h2 = iph->saddr^iph->protocol;
143 if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && 144 if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
@@ -152,7 +153,7 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
152 } 153 }
153 case __constant_htons(ETH_P_IPV6): 154 case __constant_htons(ETH_P_IPV6):
154 { 155 {
155 struct ipv6hdr *iph = skb->nh.ipv6h; 156 struct ipv6hdr *iph = ipv6_hdr(skb);
156 h = iph->daddr.s6_addr32[3]; 157 h = iph->daddr.s6_addr32[3];
157 h2 = iph->saddr.s6_addr32[3]^iph->nexthdr; 158 h2 = iph->saddr.s6_addr32[3]^iph->nexthdr;
158 if (iph->nexthdr == IPPROTO_TCP || 159 if (iph->nexthdr == IPPROTO_TCP ||
@@ -461,7 +462,7 @@ static void sfq_destroy(struct Qdisc *sch)
461static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb) 462static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
462{ 463{
463 struct sfq_sched_data *q = qdisc_priv(sch); 464 struct sfq_sched_data *q = qdisc_priv(sch);
464 unsigned char *b = skb->tail; 465 unsigned char *b = skb_tail_pointer(skb);
465 struct tc_sfq_qopt opt; 466 struct tc_sfq_qopt opt;
466 467
467 opt.quantum = q->quantum; 468 opt.quantum = q->quantum;
@@ -476,7 +477,7 @@ static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
476 return skb->len; 477 return skb->len;
477 478
478rtattr_failure: 479rtattr_failure:
479 skb_trim(skb, b - skb->data); 480 nlmsg_trim(skb, b);
480 return -1; 481 return -1;
481} 482}
482 483
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 85da8daa61d2..53862953baaf 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -32,6 +32,7 @@
32#include <linux/etherdevice.h> 32#include <linux/etherdevice.h>
33#include <linux/notifier.h> 33#include <linux/notifier.h>
34#include <net/ip.h> 34#include <net/ip.h>
35#include <net/netlink.h>
35#include <net/route.h> 36#include <net/route.h>
36#include <linux/skbuff.h> 37#include <linux/skbuff.h>
37#include <net/sock.h> 38#include <net/sock.h>
@@ -127,8 +128,8 @@ struct tbf_sched_data
127 long tokens; /* Current number of B tokens */ 128 long tokens; /* Current number of B tokens */
128 long ptokens; /* Current number of P tokens */ 129 long ptokens; /* Current number of P tokens */
129 psched_time_t t_c; /* Time check-point */ 130 psched_time_t t_c; /* Time check-point */
130 struct timer_list wd_timer; /* Watchdog timer */
131 struct Qdisc *qdisc; /* Inner qdisc, default - bfifo queue */ 131 struct Qdisc *qdisc; /* Inner qdisc, default - bfifo queue */
132 struct qdisc_watchdog watchdog; /* Watchdog timer */
132}; 133};
133 134
134#define L2T(q,L) ((q)->R_tab->data[(L)>>(q)->R_tab->rate.cell_log]) 135#define L2T(q,L) ((q)->R_tab->data[(L)>>(q)->R_tab->rate.cell_log])
@@ -185,14 +186,6 @@ static unsigned int tbf_drop(struct Qdisc* sch)
185 return len; 186 return len;
186} 187}
187 188
188static void tbf_watchdog(unsigned long arg)
189{
190 struct Qdisc *sch = (struct Qdisc*)arg;
191
192 sch->flags &= ~TCQ_F_THROTTLED;
193 netif_schedule(sch->dev);
194}
195
196static struct sk_buff *tbf_dequeue(struct Qdisc* sch) 189static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
197{ 190{
198 struct tbf_sched_data *q = qdisc_priv(sch); 191 struct tbf_sched_data *q = qdisc_priv(sch);
@@ -202,13 +195,12 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
202 195
203 if (skb) { 196 if (skb) {
204 psched_time_t now; 197 psched_time_t now;
205 long toks, delay; 198 long toks;
206 long ptoks = 0; 199 long ptoks = 0;
207 unsigned int len = skb->len; 200 unsigned int len = skb->len;
208 201
209 PSCHED_GET_TIME(now); 202 now = psched_get_time();
210 203 toks = psched_tdiff_bounded(now, q->t_c, q->buffer);
211 toks = PSCHED_TDIFF_SAFE(now, q->t_c, q->buffer);
212 204
213 if (q->P_tab) { 205 if (q->P_tab) {
214 ptoks = toks + q->ptokens; 206 ptoks = toks + q->ptokens;
@@ -230,12 +222,8 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
230 return skb; 222 return skb;
231 } 223 }
232 224
233 delay = PSCHED_US2JIFFIE(max_t(long, -toks, -ptoks)); 225 qdisc_watchdog_schedule(&q->watchdog,
234 226 now + max_t(long, -toks, -ptoks));
235 if (delay == 0)
236 delay = 1;
237
238 mod_timer(&q->wd_timer, jiffies+delay);
239 227
240 /* Maybe we have a shorter packet in the queue, 228 /* Maybe we have a shorter packet in the queue,
241 which can be sent now. It sounds cool, 229 which can be sent now. It sounds cool,
@@ -254,7 +242,6 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
254 sch->qstats.drops++; 242 sch->qstats.drops++;
255 } 243 }
256 244
257 sch->flags |= TCQ_F_THROTTLED;
258 sch->qstats.overlimits++; 245 sch->qstats.overlimits++;
259 } 246 }
260 return NULL; 247 return NULL;
@@ -266,11 +253,10 @@ static void tbf_reset(struct Qdisc* sch)
266 253
267 qdisc_reset(q->qdisc); 254 qdisc_reset(q->qdisc);
268 sch->q.qlen = 0; 255 sch->q.qlen = 0;
269 PSCHED_GET_TIME(q->t_c); 256 q->t_c = psched_get_time();
270 q->tokens = q->buffer; 257 q->tokens = q->buffer;
271 q->ptokens = q->mtu; 258 q->ptokens = q->mtu;
272 sch->flags &= ~TCQ_F_THROTTLED; 259 qdisc_watchdog_cancel(&q->watchdog);
273 del_timer(&q->wd_timer);
274} 260}
275 261
276static struct Qdisc *tbf_create_dflt_qdisc(struct Qdisc *sch, u32 limit) 262static struct Qdisc *tbf_create_dflt_qdisc(struct Qdisc *sch, u32 limit)
@@ -377,11 +363,8 @@ static int tbf_init(struct Qdisc* sch, struct rtattr *opt)
377 if (opt == NULL) 363 if (opt == NULL)
378 return -EINVAL; 364 return -EINVAL;
379 365
380 PSCHED_GET_TIME(q->t_c); 366 q->t_c = psched_get_time();
381 init_timer(&q->wd_timer); 367 qdisc_watchdog_init(&q->watchdog, sch);
382 q->wd_timer.function = tbf_watchdog;
383 q->wd_timer.data = (unsigned long)sch;
384
385 q->qdisc = &noop_qdisc; 368 q->qdisc = &noop_qdisc;
386 369
387 return tbf_change(sch, opt); 370 return tbf_change(sch, opt);
@@ -391,7 +374,7 @@ static void tbf_destroy(struct Qdisc *sch)
391{ 374{
392 struct tbf_sched_data *q = qdisc_priv(sch); 375 struct tbf_sched_data *q = qdisc_priv(sch);
393 376
394 del_timer(&q->wd_timer); 377 qdisc_watchdog_cancel(&q->watchdog);
395 378
396 if (q->P_tab) 379 if (q->P_tab)
397 qdisc_put_rtab(q->P_tab); 380 qdisc_put_rtab(q->P_tab);
@@ -404,7 +387,7 @@ static void tbf_destroy(struct Qdisc *sch)
404static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb) 387static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
405{ 388{
406 struct tbf_sched_data *q = qdisc_priv(sch); 389 struct tbf_sched_data *q = qdisc_priv(sch);
407 unsigned char *b = skb->tail; 390 unsigned char *b = skb_tail_pointer(skb);
408 struct rtattr *rta; 391 struct rtattr *rta;
409 struct tc_tbf_qopt opt; 392 struct tc_tbf_qopt opt;
410 393
@@ -420,12 +403,12 @@ static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
420 opt.mtu = q->mtu; 403 opt.mtu = q->mtu;
421 opt.buffer = q->buffer; 404 opt.buffer = q->buffer;
422 RTA_PUT(skb, TCA_TBF_PARMS, sizeof(opt), &opt); 405 RTA_PUT(skb, TCA_TBF_PARMS, sizeof(opt), &opt);
423 rta->rta_len = skb->tail - b; 406 rta->rta_len = skb_tail_pointer(skb) - b;
424 407
425 return skb->len; 408 return skb->len;
426 409
427rtattr_failure: 410rtattr_failure:
428 skb_trim(skb, b - skb->data); 411 nlmsg_trim(skb, b);
429 return -1; 412 return -1;
430} 413}
431 414
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 587123c61af9..d24914db7861 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -323,7 +323,7 @@ restart:
323 nores = 1; 323 nores = 1;
324 break; 324 break;
325 } 325 }
326 __skb_pull(skb, skb->nh.raw - skb->data); 326 __skb_pull(skb, skb_network_offset(skb));
327 } while ((q = NEXT_SLAVE(q)) != start); 327 } while ((q = NEXT_SLAVE(q)) != start);
328 328
329 if (nores && skb_res == NULL) { 329 if (nores && skb_res == NULL) {
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 78d2ddb5ca18..db73ef97485a 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -143,7 +143,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
143 /* Initialize the maximum mumber of new data packets that can be sent 143 /* Initialize the maximum mumber of new data packets that can be sent
144 * in a burst. 144 * in a burst.
145 */ 145 */
146 asoc->max_burst = sctp_max_burst; 146 asoc->max_burst = sp->max_burst;
147 147
148 /* initialize association timers */ 148 /* initialize association timers */
149 asoc->timeouts[SCTP_EVENT_TIMEOUT_NONE] = 0; 149 asoc->timeouts[SCTP_EVENT_TIMEOUT_NONE] = 0;
@@ -714,8 +714,16 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
714 /* Record the transition on the transport. */ 714 /* Record the transition on the transport. */
715 switch (command) { 715 switch (command) {
716 case SCTP_TRANSPORT_UP: 716 case SCTP_TRANSPORT_UP:
717 /* If we are moving from UNCONFIRMED state due
718 * to heartbeat success, report the SCTP_ADDR_CONFIRMED
719 * state to the user, otherwise report SCTP_ADDR_AVAILABLE.
720 */
721 if (SCTP_UNCONFIRMED == transport->state &&
722 SCTP_HEARTBEAT_SUCCESS == error)
723 spc_state = SCTP_ADDR_CONFIRMED;
724 else
725 spc_state = SCTP_ADDR_AVAILABLE;
717 transport->state = SCTP_ACTIVE; 726 transport->state = SCTP_ACTIVE;
718 spc_state = SCTP_ADDR_AVAILABLE;
719 break; 727 break;
720 728
721 case SCTP_TRANSPORT_DOWN: 729 case SCTP_TRANSPORT_DOWN:
@@ -725,7 +733,7 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
725 733
726 default: 734 default:
727 return; 735 return;
728 }; 736 }
729 737
730 /* Generate and send a SCTP_PEER_ADDR_CHANGE notification to the 738 /* Generate and send a SCTP_PEER_ADDR_CHANGE notification to the
731 * user. 739 * user.
diff --git a/net/sctp/debug.c b/net/sctp/debug.c
index 5f5ab28977c9..e8c0f7435d7f 100644
--- a/net/sctp/debug.c
+++ b/net/sctp/debug.c
@@ -93,8 +93,9 @@ const char *sctp_cname(const sctp_subtype_t cid)
93 return "FWD_TSN"; 93 return "FWD_TSN";
94 94
95 default: 95 default:
96 return "unknown chunk"; 96 break;
97 }; 97 }
98
98 return "unknown chunk"; 99 return "unknown chunk";
99} 100}
100 101
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 71db66873695..885109fb3dda 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -79,14 +79,10 @@ static void sctp_add_backlog(struct sock *sk, struct sk_buff *skb);
79/* Calculate the SCTP checksum of an SCTP packet. */ 79/* Calculate the SCTP checksum of an SCTP packet. */
80static inline int sctp_rcv_checksum(struct sk_buff *skb) 80static inline int sctp_rcv_checksum(struct sk_buff *skb)
81{ 81{
82 struct sctphdr *sh;
83 __u32 cmp, val;
84 struct sk_buff *list = skb_shinfo(skb)->frag_list; 82 struct sk_buff *list = skb_shinfo(skb)->frag_list;
85 83 struct sctphdr *sh = sctp_hdr(skb);
86 sh = (struct sctphdr *) skb->h.raw; 84 __u32 cmp = ntohl(sh->checksum);
87 cmp = ntohl(sh->checksum); 85 __u32 val = sctp_start_cksum((__u8 *)sh, skb_headlen(skb));
88
89 val = sctp_start_cksum((__u8 *)sh, skb_headlen(skb));
90 86
91 for (; list; list = list->next) 87 for (; list; list = list->next)
92 val = sctp_update_cksum((__u8 *)list->data, skb_headlen(list), 88 val = sctp_update_cksum((__u8 *)list->data, skb_headlen(list),
@@ -138,14 +134,13 @@ int sctp_rcv(struct sk_buff *skb)
138 if (skb_linearize(skb)) 134 if (skb_linearize(skb))
139 goto discard_it; 135 goto discard_it;
140 136
141 sh = (struct sctphdr *) skb->h.raw; 137 sh = sctp_hdr(skb);
142 138
143 /* Pull up the IP and SCTP headers. */ 139 /* Pull up the IP and SCTP headers. */
144 __skb_pull(skb, skb->h.raw - skb->data); 140 __skb_pull(skb, skb_transport_offset(skb));
145 if (skb->len < sizeof(struct sctphdr)) 141 if (skb->len < sizeof(struct sctphdr))
146 goto discard_it; 142 goto discard_it;
147 if ((skb->ip_summed != CHECKSUM_UNNECESSARY) && 143 if (!skb_csum_unnecessary(skb) && sctp_rcv_checksum(skb) < 0)
148 (sctp_rcv_checksum(skb) < 0))
149 goto discard_it; 144 goto discard_it;
150 145
151 skb_pull(skb, sizeof(struct sctphdr)); 146 skb_pull(skb, sizeof(struct sctphdr));
@@ -154,7 +149,7 @@ int sctp_rcv(struct sk_buff *skb)
154 if (skb->len < sizeof(struct sctp_chunkhdr)) 149 if (skb->len < sizeof(struct sctp_chunkhdr))
155 goto discard_it; 150 goto discard_it;
156 151
157 family = ipver2af(skb->nh.iph->version); 152 family = ipver2af(ip_hdr(skb)->version);
158 af = sctp_get_af_specific(family); 153 af = sctp_get_af_specific(family);
159 if (unlikely(!af)) 154 if (unlikely(!af))
160 goto discard_it; 155 goto discard_it;
@@ -510,30 +505,30 @@ void sctp_err_finish(struct sock *sk, struct sctp_association *asoc)
510void sctp_v4_err(struct sk_buff *skb, __u32 info) 505void sctp_v4_err(struct sk_buff *skb, __u32 info)
511{ 506{
512 struct iphdr *iph = (struct iphdr *)skb->data; 507 struct iphdr *iph = (struct iphdr *)skb->data;
513 struct sctphdr *sh = (struct sctphdr *)(skb->data + (iph->ihl <<2)); 508 const int ihlen = iph->ihl * 4;
514 int type = skb->h.icmph->type; 509 const int type = icmp_hdr(skb)->type;
515 int code = skb->h.icmph->code; 510 const int code = icmp_hdr(skb)->code;
516 struct sock *sk; 511 struct sock *sk;
517 struct sctp_association *asoc = NULL; 512 struct sctp_association *asoc = NULL;
518 struct sctp_transport *transport; 513 struct sctp_transport *transport;
519 struct inet_sock *inet; 514 struct inet_sock *inet;
520 char *saveip, *savesctp; 515 sk_buff_data_t saveip, savesctp;
521 int err; 516 int err;
522 517
523 if (skb->len < ((iph->ihl << 2) + 8)) { 518 if (skb->len < ihlen + 8) {
524 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); 519 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
525 return; 520 return;
526 } 521 }
527 522
528 /* Fix up skb to look at the embedded net header. */ 523 /* Fix up skb to look at the embedded net header. */
529 saveip = skb->nh.raw; 524 saveip = skb->network_header;
530 savesctp = skb->h.raw; 525 savesctp = skb->transport_header;
531 skb->nh.iph = iph; 526 skb_reset_network_header(skb);
532 skb->h.raw = (char *)sh; 527 skb_set_transport_header(skb, ihlen);
533 sk = sctp_err_lookup(AF_INET, skb, sh, &asoc, &transport); 528 sk = sctp_err_lookup(AF_INET, skb, sctp_hdr(skb), &asoc, &transport);
534 /* Put back, the original pointers. */ 529 /* Put back, the original values. */
535 skb->nh.raw = saveip; 530 skb->network_header = saveip;
536 skb->h.raw = savesctp; 531 skb->transport_header = savesctp;
537 if (!sk) { 532 if (!sk) {
538 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); 533 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
539 return; 534 return;
@@ -616,7 +611,7 @@ int sctp_rcv_ootb(struct sk_buff *skb)
616 break; 611 break;
617 612
618 ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length)); 613 ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length));
619 if (ch_end > skb->tail) 614 if (ch_end > skb_tail_pointer(skb))
620 break; 615 break;
621 616
622 /* RFC 8.4, 2) If the OOTB packet contains an ABORT chunk, the 617 /* RFC 8.4, 2) If the OOTB packet contains an ABORT chunk, the
@@ -648,7 +643,7 @@ int sctp_rcv_ootb(struct sk_buff *skb)
648 } 643 }
649 644
650 ch = (sctp_chunkhdr_t *) ch_end; 645 ch = (sctp_chunkhdr_t *) ch_end;
651 } while (ch_end < skb->tail); 646 } while (ch_end < skb_tail_pointer(skb));
652 647
653 return 0; 648 return 0;
654 649
@@ -905,7 +900,7 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct sk_buff *skb,
905 struct sctp_association *asoc; 900 struct sctp_association *asoc;
906 union sctp_addr addr; 901 union sctp_addr addr;
907 union sctp_addr *paddr = &addr; 902 union sctp_addr *paddr = &addr;
908 struct sctphdr *sh = (struct sctphdr *) skb->h.raw; 903 struct sctphdr *sh = sctp_hdr(skb);
909 sctp_chunkhdr_t *ch; 904 sctp_chunkhdr_t *ch;
910 union sctp_params params; 905 union sctp_params params;
911 sctp_init_chunk_t *init; 906 sctp_init_chunk_t *init;
diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
index c30629e17781..88aa22407549 100644
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c
@@ -159,16 +159,16 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue)
159 * the skb->tail. 159 * the skb->tail.
160 */ 160 */
161 if (unlikely(skb_is_nonlinear(chunk->skb))) { 161 if (unlikely(skb_is_nonlinear(chunk->skb))) {
162 if (chunk->chunk_end > chunk->skb->tail) 162 if (chunk->chunk_end > skb_tail_pointer(chunk->skb))
163 chunk->chunk_end = chunk->skb->tail; 163 chunk->chunk_end = skb_tail_pointer(chunk->skb);
164 } 164 }
165 skb_pull(chunk->skb, sizeof(sctp_chunkhdr_t)); 165 skb_pull(chunk->skb, sizeof(sctp_chunkhdr_t));
166 chunk->subh.v = NULL; /* Subheader is no longer valid. */ 166 chunk->subh.v = NULL; /* Subheader is no longer valid. */
167 167
168 if (chunk->chunk_end < chunk->skb->tail) { 168 if (chunk->chunk_end < skb_tail_pointer(chunk->skb)) {
169 /* This is not a singleton */ 169 /* This is not a singleton */
170 chunk->singleton = 0; 170 chunk->singleton = 0;
171 } else if (chunk->chunk_end > chunk->skb->tail) { 171 } else if (chunk->chunk_end > skb_tail_pointer(chunk->skb)) {
172 /* RFC 2960, Section 6.10 Bundling 172 /* RFC 2960, Section 6.10 Bundling
173 * 173 *
174 * Partial chunks MUST NOT be placed in an SCTP packet. 174 * Partial chunks MUST NOT be placed in an SCTP packet.
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 0b9c49b3a100..ca527a27dd05 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -122,26 +122,24 @@ SCTP_STATIC void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
122 int type, int code, int offset, __be32 info) 122 int type, int code, int offset, __be32 info)
123{ 123{
124 struct inet6_dev *idev; 124 struct inet6_dev *idev;
125 struct ipv6hdr *iph = (struct ipv6hdr *)skb->data;
126 struct sctphdr *sh = (struct sctphdr *)(skb->data + offset);
127 struct sock *sk; 125 struct sock *sk;
128 struct sctp_association *asoc; 126 struct sctp_association *asoc;
129 struct sctp_transport *transport; 127 struct sctp_transport *transport;
130 struct ipv6_pinfo *np; 128 struct ipv6_pinfo *np;
131 char *saveip, *savesctp; 129 sk_buff_data_t saveip, savesctp;
132 int err; 130 int err;
133 131
134 idev = in6_dev_get(skb->dev); 132 idev = in6_dev_get(skb->dev);
135 133
136 /* Fix up skb to look at the embedded net header. */ 134 /* Fix up skb to look at the embedded net header. */
137 saveip = skb->nh.raw; 135 saveip = skb->network_header;
138 savesctp = skb->h.raw; 136 savesctp = skb->transport_header;
139 skb->nh.ipv6h = iph; 137 skb_reset_network_header(skb);
140 skb->h.raw = (char *)sh; 138 skb_set_transport_header(skb, offset);
141 sk = sctp_err_lookup(AF_INET6, skb, sh, &asoc, &transport); 139 sk = sctp_err_lookup(AF_INET6, skb, sctp_hdr(skb), &asoc, &transport);
142 /* Put back, the original pointers. */ 140 /* Put back, the original pointers. */
143 skb->nh.raw = saveip; 141 skb->network_header = saveip;
144 skb->h.raw = savesctp; 142 skb->transport_header = savesctp;
145 if (!sk) { 143 if (!sk) {
146 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INERRORS); 144 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INERRORS);
147 goto out; 145 goto out;
@@ -391,13 +389,13 @@ static void sctp_v6_from_skb(union sctp_addr *addr,struct sk_buff *skb,
391 addr->v6.sin6_flowinfo = 0; /* FIXME */ 389 addr->v6.sin6_flowinfo = 0; /* FIXME */
392 addr->v6.sin6_scope_id = ((struct inet6_skb_parm *)skb->cb)->iif; 390 addr->v6.sin6_scope_id = ((struct inet6_skb_parm *)skb->cb)->iif;
393 391
394 sh = (struct sctphdr *) skb->h.raw; 392 sh = sctp_hdr(skb);
395 if (is_saddr) { 393 if (is_saddr) {
396 *port = sh->source; 394 *port = sh->source;
397 from = &skb->nh.ipv6h->saddr; 395 from = &ipv6_hdr(skb)->saddr;
398 } else { 396 } else {
399 *port = sh->dest; 397 *port = sh->dest;
400 from = &skb->nh.ipv6h->daddr; 398 from = &ipv6_hdr(skb)->daddr;
401 } 399 }
402 ipv6_addr_copy(&addr->v6.sin6_addr, from); 400 ipv6_addr_copy(&addr->v6.sin6_addr, from);
403} 401}
@@ -606,7 +604,7 @@ static sctp_scope_t sctp_v6_scope(union sctp_addr *addr)
606 default: 604 default:
607 retval = SCTP_SCOPE_GLOBAL; 605 retval = SCTP_SCOPE_GLOBAL;
608 break; 606 break;
609 }; 607 }
610 608
611 return retval; 609 return retval;
612} 610}
@@ -699,7 +697,7 @@ static int sctp_v6_skb_iif(const struct sk_buff *skb)
699/* Was this packet marked by Explicit Congestion Notification? */ 697/* Was this packet marked by Explicit Congestion Notification? */
700static int sctp_v6_is_ce(const struct sk_buff *skb) 698static int sctp_v6_is_ce(const struct sk_buff *skb)
701{ 699{
702 return *((__u32 *)(skb->nh.ipv6h)) & htonl(1<<20); 700 return *((__u32 *)(ipv6_hdr(skb))) & htonl(1 << 20);
703} 701}
704 702
705/* Dump the v6 addr to the seq file. */ 703/* Dump the v6 addr to the seq file. */
@@ -766,19 +764,19 @@ static void sctp_inet6_skb_msgname(struct sk_buff *skb, char *msgname,
766 if (msgname) { 764 if (msgname) {
767 sctp_inet6_msgname(msgname, addr_len); 765 sctp_inet6_msgname(msgname, addr_len);
768 sin6 = (struct sockaddr_in6 *)msgname; 766 sin6 = (struct sockaddr_in6 *)msgname;
769 sh = (struct sctphdr *)skb->h.raw; 767 sh = sctp_hdr(skb);
770 sin6->sin6_port = sh->source; 768 sin6->sin6_port = sh->source;
771 769
772 /* Map ipv4 address into v4-mapped-on-v6 address. */ 770 /* Map ipv4 address into v4-mapped-on-v6 address. */
773 if (sctp_sk(skb->sk)->v4mapped && 771 if (sctp_sk(skb->sk)->v4mapped &&
774 skb->nh.iph->version == 4) { 772 ip_hdr(skb)->version == 4) {
775 sctp_v4_map_v6((union sctp_addr *)sin6); 773 sctp_v4_map_v6((union sctp_addr *)sin6);
776 sin6->sin6_addr.s6_addr32[3] = skb->nh.iph->saddr; 774 sin6->sin6_addr.s6_addr32[3] = ip_hdr(skb)->saddr;
777 return; 775 return;
778 } 776 }
779 777
780 /* Otherwise, just copy the v6 address. */ 778 /* Otherwise, just copy the v6 address. */
781 ipv6_addr_copy(&sin6->sin6_addr, &skb->nh.ipv6h->saddr); 779 ipv6_addr_copy(&sin6->sin6_addr, &ipv6_hdr(skb)->saddr);
782 if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) { 780 if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) {
783 struct sctp_ulpevent *ev = sctp_skb2event(skb); 781 struct sctp_ulpevent *ev = sctp_skb2event(skb);
784 sin6->sin6_scope_id = ev->iif; 782 sin6->sin6_scope_id = ev->iif;
diff --git a/net/sctp/output.c b/net/sctp/output.c
index f875fc3ced54..d85543def754 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -176,7 +176,7 @@ sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *packet,
176 case SCTP_XMIT_OK: 176 case SCTP_XMIT_OK:
177 case SCTP_XMIT_NAGLE_DELAY: 177 case SCTP_XMIT_NAGLE_DELAY:
178 break; 178 break;
179 }; 179 }
180 180
181 return retval; 181 return retval;
182} 182}
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 41abfd17627e..992f361084b7 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -338,7 +338,7 @@ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk)
338 SCTP_INC_STATS(SCTP_MIB_OUTORDERCHUNKS); 338 SCTP_INC_STATS(SCTP_MIB_OUTORDERCHUNKS);
339 q->empty = 0; 339 q->empty = 0;
340 break; 340 break;
341 }; 341 }
342 } else { 342 } else {
343 list_add_tail(&chunk->list, &q->control_chunk_list); 343 list_add_tail(&chunk->list, &q->control_chunk_list);
344 SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); 344 SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
@@ -630,7 +630,7 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
630 /* Retrieve a new chunk to bundle. */ 630 /* Retrieve a new chunk to bundle. */
631 lchunk = sctp_list_dequeue(lqueue); 631 lchunk = sctp_list_dequeue(lqueue);
632 break; 632 break;
633 }; 633 }
634 634
635 /* If we are here due to a retransmit timeout or a fast 635 /* If we are here due to a retransmit timeout or a fast
636 * retransmit and if there are any chunks left in the retransmit 636 * retransmit and if there are any chunks left in the retransmit
@@ -779,7 +779,7 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
779 default: 779 default:
780 /* We built a chunk with an illegal type! */ 780 /* We built a chunk with an illegal type! */
781 BUG(); 781 BUG();
782 }; 782 }
783 } 783 }
784 784
785 /* Is it OK to send data chunks? */ 785 /* Is it OK to send data chunks? */
@@ -1397,7 +1397,7 @@ static void sctp_check_transmitted(struct sctp_outq *q,
1397 SCTP_DEBUG_PRINTK("ACKed: %08x", tsn); 1397 SCTP_DEBUG_PRINTK("ACKed: %08x", tsn);
1398 dbg_prt_state = 0; 1398 dbg_prt_state = 0;
1399 dbg_ack_tsn = tsn; 1399 dbg_ack_tsn = tsn;
1400 }; 1400 }
1401 1401
1402 dbg_last_ack_tsn = tsn; 1402 dbg_last_ack_tsn = tsn;
1403#endif /* SCTP_DEBUG */ 1403#endif /* SCTP_DEBUG */
@@ -1452,7 +1452,7 @@ static void sctp_check_transmitted(struct sctp_outq *q,
1452 SCTP_DEBUG_PRINTK("KEPT: %08x",tsn); 1452 SCTP_DEBUG_PRINTK("KEPT: %08x",tsn);
1453 dbg_prt_state = 1; 1453 dbg_prt_state = 1;
1454 dbg_kept_tsn = tsn; 1454 dbg_kept_tsn = tsn;
1455 }; 1455 }
1456 1456
1457 dbg_last_kept_tsn = tsn; 1457 dbg_last_kept_tsn = tsn;
1458#endif /* SCTP_DEBUG */ 1458#endif /* SCTP_DEBUG */
@@ -1476,7 +1476,7 @@ static void sctp_check_transmitted(struct sctp_outq *q,
1476 } else { 1476 } else {
1477 SCTP_DEBUG_PRINTK("\n"); 1477 SCTP_DEBUG_PRINTK("\n");
1478 } 1478 }
1479 }; 1479 }
1480#endif /* SCTP_DEBUG */ 1480#endif /* SCTP_DEBUG */
1481 if (transport) { 1481 if (transport) {
1482 if (bytes_acked) { 1482 if (bytes_acked) {
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index e17a823ca90f..c361deb6cea9 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -235,13 +235,13 @@ static void sctp_v4_from_skb(union sctp_addr *addr, struct sk_buff *skb,
235 port = &addr->v4.sin_port; 235 port = &addr->v4.sin_port;
236 addr->v4.sin_family = AF_INET; 236 addr->v4.sin_family = AF_INET;
237 237
238 sh = (struct sctphdr *) skb->h.raw; 238 sh = sctp_hdr(skb);
239 if (is_saddr) { 239 if (is_saddr) {
240 *port = sh->source; 240 *port = sh->source;
241 from = &skb->nh.iph->saddr; 241 from = &ip_hdr(skb)->saddr;
242 } else { 242 } else {
243 *port = sh->dest; 243 *port = sh->dest;
244 from = &skb->nh.iph->daddr; 244 from = &ip_hdr(skb)->daddr;
245 } 245 }
246 memcpy(&addr->v4.sin_addr.s_addr, from, sizeof(struct in_addr)); 246 memcpy(&addr->v4.sin_addr.s_addr, from, sizeof(struct in_addr));
247} 247}
@@ -530,7 +530,7 @@ static int sctp_v4_skb_iif(const struct sk_buff *skb)
530/* Was this packet marked by Explicit Congestion Notification? */ 530/* Was this packet marked by Explicit Congestion Notification? */
531static int sctp_v4_is_ce(const struct sk_buff *skb) 531static int sctp_v4_is_ce(const struct sk_buff *skb)
532{ 532{
533 return INET_ECN_is_ce(skb->nh.iph->tos); 533 return INET_ECN_is_ce(ip_hdr(skb)->tos);
534} 534}
535 535
536/* Create and initialize a new sk for the socket returned by accept(). */ 536/* Create and initialize a new sk for the socket returned by accept(). */
@@ -731,15 +731,13 @@ static void sctp_inet_event_msgname(struct sctp_ulpevent *event, char *msgname,
731/* Initialize and copy out a msgname from an inbound skb. */ 731/* Initialize and copy out a msgname from an inbound skb. */
732static void sctp_inet_skb_msgname(struct sk_buff *skb, char *msgname, int *len) 732static void sctp_inet_skb_msgname(struct sk_buff *skb, char *msgname, int *len)
733{ 733{
734 struct sctphdr *sh;
735 struct sockaddr_in *sin;
736
737 if (msgname) { 734 if (msgname) {
735 struct sctphdr *sh = sctp_hdr(skb);
736 struct sockaddr_in *sin = (struct sockaddr_in *)msgname;
737
738 sctp_inet_msgname(msgname, len); 738 sctp_inet_msgname(msgname, len);
739 sin = (struct sockaddr_in *)msgname;
740 sh = (struct sctphdr *)skb->h.raw;
741 sin->sin_port = sh->source; 739 sin->sin_port = sh->source;
742 sin->sin_addr.s_addr = skb->nh.iph->saddr; 740 sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
743 } 741 }
744} 742}
745 743
@@ -1044,7 +1042,7 @@ SCTP_STATIC __init int sctp_init(void)
1044 sctp_cookie_preserve_enable = 1; 1042 sctp_cookie_preserve_enable = 1;
1045 1043
1046 /* Max.Burst - 4 */ 1044 /* Max.Burst - 4 */
1047 sctp_max_burst = SCTP_MAX_BURST; 1045 sctp_max_burst = SCTP_DEFAULT_MAX_BURST;
1048 1046
1049 /* Association.Max.Retrans - 10 attempts 1047 /* Association.Max.Retrans - 10 attempts
1050 * Path.Max.Retrans - 5 attempts (per destination address) 1048 * Path.Max.Retrans - 5 attempts (per destination address)
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index f7fb29d5a0c7..be783a3761c4 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -86,7 +86,7 @@ int sctp_chunk_iif(const struct sctp_chunk *chunk)
86 struct sctp_af *af; 86 struct sctp_af *af;
87 int iif = 0; 87 int iif = 0;
88 88
89 af = sctp_get_af_specific(ipver2af(chunk->skb->nh.iph->version)); 89 af = sctp_get_af_specific(ipver2af(ip_hdr(chunk->skb)->version));
90 if (af) 90 if (af)
91 iif = af->skb_iif(chunk->skb); 91 iif = af->skb_iif(chunk->skb);
92 92
@@ -1143,7 +1143,7 @@ void *sctp_addto_chunk(struct sctp_chunk *chunk, int len, const void *data)
1143 1143
1144 /* Adjust the chunk length field. */ 1144 /* Adjust the chunk length field. */
1145 chunk->chunk_hdr->length = htons(chunklen + padlen + len); 1145 chunk->chunk_hdr->length = htons(chunklen + padlen + len);
1146 chunk->chunk_end = chunk->skb->tail; 1146 chunk->chunk_end = skb_tail_pointer(chunk->skb);
1147 1147
1148 return target; 1148 return target;
1149} 1149}
@@ -1168,7 +1168,7 @@ int sctp_user_addto_chunk(struct sctp_chunk *chunk, int off, int len,
1168 /* Adjust the chunk length field. */ 1168 /* Adjust the chunk length field. */
1169 chunk->chunk_hdr->length = 1169 chunk->chunk_hdr->length =
1170 htons(ntohs(chunk->chunk_hdr->length) + len); 1170 htons(ntohs(chunk->chunk_hdr->length) + len);
1171 chunk->chunk_end = chunk->skb->tail; 1171 chunk->chunk_end = skb_tail_pointer(chunk->skb);
1172 1172
1173out: 1173out:
1174 return err; 1174 return err;
@@ -1233,7 +1233,7 @@ struct sctp_association *sctp_make_temp_asoc(const struct sctp_endpoint *ep,
1233 asoc->temp = 1; 1233 asoc->temp = 1;
1234 skb = chunk->skb; 1234 skb = chunk->skb;
1235 /* Create an entry for the source address of the packet. */ 1235 /* Create an entry for the source address of the packet. */
1236 af = sctp_get_af_specific(ipver2af(skb->nh.iph->version)); 1236 af = sctp_get_af_specific(ipver2af(ip_hdr(skb)->version));
1237 if (unlikely(!af)) 1237 if (unlikely(!af))
1238 goto fail; 1238 goto fail;
1239 af->from_skb(&asoc->c.peer_addr, skb, 1); 1239 af->from_skb(&asoc->c.peer_addr, skb, 1);
@@ -2077,7 +2077,7 @@ static int sctp_process_param(struct sctp_association *asoc,
2077 2077
2078 default: /* Just ignore anything else. */ 2078 default: /* Just ignore anything else. */
2079 break; 2079 break;
2080 }; 2080 }
2081 } 2081 }
2082 break; 2082 break;
2083 2083
@@ -2118,7 +2118,7 @@ static int sctp_process_param(struct sctp_association *asoc,
2118 SCTP_DEBUG_PRINTK("Ignoring param: %d for association %p.\n", 2118 SCTP_DEBUG_PRINTK("Ignoring param: %d for association %p.\n",
2119 ntohs(param.p->type), asoc); 2119 ntohs(param.p->type), asoc);
2120 break; 2120 break;
2121 }; 2121 }
2122 2122
2123 return retval; 2123 return retval;
2124} 2124}
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 135567493119..b37a7adeb150 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -464,7 +464,7 @@ static void sctp_cmd_init_failed(sctp_cmd_seq_t *commands,
464 struct sctp_ulpevent *event; 464 struct sctp_ulpevent *event;
465 465
466 event = sctp_ulpevent_make_assoc_change(asoc,0, SCTP_CANT_STR_ASSOC, 466 event = sctp_ulpevent_make_assoc_change(asoc,0, SCTP_CANT_STR_ASSOC,
467 (__u16)error, 0, 0, 467 (__u16)error, 0, 0, NULL,
468 GFP_ATOMIC); 468 GFP_ATOMIC);
469 469
470 if (event) 470 if (event)
@@ -492,8 +492,13 @@ static void sctp_cmd_assoc_failed(sctp_cmd_seq_t *commands,
492 /* Cancel any partial delivery in progress. */ 492 /* Cancel any partial delivery in progress. */
493 sctp_ulpq_abort_pd(&asoc->ulpq, GFP_ATOMIC); 493 sctp_ulpq_abort_pd(&asoc->ulpq, GFP_ATOMIC);
494 494
495 event = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_COMM_LOST, 495 if (event_type == SCTP_EVENT_T_CHUNK && subtype.chunk == SCTP_CID_ABORT)
496 (__u16)error, 0, 0, 496 event = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_COMM_LOST,
497 (__u16)error, 0, 0, chunk,
498 GFP_ATOMIC);
499 else
500 event = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_COMM_LOST,
501 (__u16)error, 0, 0, NULL,
497 GFP_ATOMIC); 502 GFP_ATOMIC);
498 if (event) 503 if (event)
499 sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, 504 sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
@@ -1004,7 +1009,7 @@ static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype,
1004 status, state, event_type, subtype.chunk); 1009 status, state, event_type, subtype.chunk);
1005 BUG(); 1010 BUG();
1006 break; 1011 break;
1007 }; 1012 }
1008 1013
1009bail: 1014bail:
1010 return error; 1015 return error;
@@ -1484,7 +1489,8 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
1484 printk(KERN_WARNING "Impossible command: %u, %p\n", 1489 printk(KERN_WARNING "Impossible command: %u, %p\n",
1485 cmd->verb, cmd->obj.ptr); 1490 cmd->verb, cmd->obj.ptr);
1486 break; 1491 break;
1487 }; 1492 }
1493
1488 if (error) 1494 if (error)
1489 break; 1495 break;
1490 } 1496 }
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index e9097cf614ba..9e28a5d51200 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -186,7 +186,7 @@ sctp_disposition_t sctp_sf_do_4_C(const struct sctp_endpoint *ep,
186 * notification is passed to the upper layer. 186 * notification is passed to the upper layer.
187 */ 187 */
188 ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_SHUTDOWN_COMP, 188 ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_SHUTDOWN_COMP,
189 0, 0, 0, GFP_ATOMIC); 189 0, 0, 0, NULL, GFP_ATOMIC);
190 if (ev) 190 if (ev)
191 sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, 191 sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
192 SCTP_ULPEVENT(ev)); 192 SCTP_ULPEVENT(ev));
@@ -629,7 +629,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep,
629 case -SCTP_IERROR_BAD_SIG: 629 case -SCTP_IERROR_BAD_SIG:
630 default: 630 default:
631 return sctp_sf_pdiscard(ep, asoc, type, arg, commands); 631 return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
632 }; 632 }
633 } 633 }
634 634
635 635
@@ -661,7 +661,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep,
661 ev = sctp_ulpevent_make_assoc_change(new_asoc, 0, SCTP_COMM_UP, 0, 661 ev = sctp_ulpevent_make_assoc_change(new_asoc, 0, SCTP_COMM_UP, 0,
662 new_asoc->c.sinit_num_ostreams, 662 new_asoc->c.sinit_num_ostreams,
663 new_asoc->c.sinit_max_instreams, 663 new_asoc->c.sinit_max_instreams,
664 GFP_ATOMIC); 664 NULL, GFP_ATOMIC);
665 if (!ev) 665 if (!ev)
666 goto nomem_ev; 666 goto nomem_ev;
667 667
@@ -790,7 +790,7 @@ sctp_disposition_t sctp_sf_do_5_1E_ca(const struct sctp_endpoint *ep,
790 ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_COMM_UP, 790 ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_COMM_UP,
791 0, asoc->c.sinit_num_ostreams, 791 0, asoc->c.sinit_num_ostreams,
792 asoc->c.sinit_max_instreams, 792 asoc->c.sinit_max_instreams,
793 GFP_ATOMIC); 793 NULL, GFP_ATOMIC);
794 794
795 if (!ev) 795 if (!ev)
796 goto nomem; 796 goto nomem;
@@ -1195,7 +1195,7 @@ static void sctp_tietags_populate(struct sctp_association *new_asoc,
1195 new_asoc->c.my_ttag = asoc->c.my_vtag; 1195 new_asoc->c.my_ttag = asoc->c.my_vtag;
1196 new_asoc->c.peer_ttag = asoc->c.peer_vtag; 1196 new_asoc->c.peer_ttag = asoc->c.peer_vtag;
1197 break; 1197 break;
1198 }; 1198 }
1199 1199
1200 /* Other parameters for the endpoint SHOULD be copied from the 1200 /* Other parameters for the endpoint SHOULD be copied from the
1201 * existing parameters of the association (e.g. number of 1201 * existing parameters of the association (e.g. number of
@@ -1625,7 +1625,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_a(const struct sctp_endpoint *ep,
1625 ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_RESTART, 0, 1625 ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_RESTART, 0,
1626 new_asoc->c.sinit_num_ostreams, 1626 new_asoc->c.sinit_num_ostreams,
1627 new_asoc->c.sinit_max_instreams, 1627 new_asoc->c.sinit_max_instreams,
1628 GFP_ATOMIC); 1628 NULL, GFP_ATOMIC);
1629 if (!ev) 1629 if (!ev)
1630 goto nomem_ev; 1630 goto nomem_ev;
1631 1631
@@ -1691,7 +1691,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_b(const struct sctp_endpoint *ep,
1691 ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_COMM_UP, 0, 1691 ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_COMM_UP, 0,
1692 new_asoc->c.sinit_num_ostreams, 1692 new_asoc->c.sinit_num_ostreams,
1693 new_asoc->c.sinit_max_instreams, 1693 new_asoc->c.sinit_max_instreams,
1694 GFP_ATOMIC); 1694 NULL, GFP_ATOMIC);
1695 if (!ev) 1695 if (!ev)
1696 goto nomem_ev; 1696 goto nomem_ev;
1697 1697
@@ -1786,7 +1786,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_d(const struct sctp_endpoint *ep,
1786 SCTP_COMM_UP, 0, 1786 SCTP_COMM_UP, 0,
1787 asoc->c.sinit_num_ostreams, 1787 asoc->c.sinit_num_ostreams,
1788 asoc->c.sinit_max_instreams, 1788 asoc->c.sinit_max_instreams,
1789 GFP_ATOMIC); 1789 NULL, GFP_ATOMIC);
1790 if (!ev) 1790 if (!ev)
1791 goto nomem; 1791 goto nomem;
1792 1792
@@ -1904,7 +1904,7 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(const struct sctp_endpoint *ep,
1904 case -SCTP_IERROR_BAD_SIG: 1904 case -SCTP_IERROR_BAD_SIG:
1905 default: 1905 default:
1906 return sctp_sf_pdiscard(ep, asoc, type, arg, commands); 1906 return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
1907 }; 1907 }
1908 } 1908 }
1909 1909
1910 /* Compare the tie_tag in cookie with the verification tag of 1910 /* Compare the tie_tag in cookie with the verification tag of
@@ -1936,7 +1936,7 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(const struct sctp_endpoint *ep,
1936 default: /* Discard packet for all others. */ 1936 default: /* Discard packet for all others. */
1937 retval = sctp_sf_pdiscard(ep, asoc, type, arg, commands); 1937 retval = sctp_sf_pdiscard(ep, asoc, type, arg, commands);
1938 break; 1938 break;
1939 }; 1939 }
1940 1940
1941 /* Delete the tempory new association. */ 1941 /* Delete the tempory new association. */
1942 sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc)); 1942 sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc));
@@ -3035,7 +3035,7 @@ sctp_disposition_t sctp_sf_do_9_2_final(const struct sctp_endpoint *ep,
3035 * notification is passed to the upper layer. 3035 * notification is passed to the upper layer.
3036 */ 3036 */
3037 ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_SHUTDOWN_COMP, 3037 ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_SHUTDOWN_COMP,
3038 0, 0, 0, GFP_ATOMIC); 3038 0, 0, 0, NULL, GFP_ATOMIC);
3039 if (!ev) 3039 if (!ev)
3040 goto nomem; 3040 goto nomem;
3041 3041
@@ -3115,7 +3115,7 @@ sctp_disposition_t sctp_sf_ootb(const struct sctp_endpoint *ep,
3115 break; 3115 break;
3116 3116
3117 ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length)); 3117 ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length));
3118 if (ch_end > skb->tail) 3118 if (ch_end > skb_tail_pointer(skb))
3119 break; 3119 break;
3120 3120
3121 if (SCTP_CID_SHUTDOWN_ACK == ch->type) 3121 if (SCTP_CID_SHUTDOWN_ACK == ch->type)
@@ -3130,7 +3130,7 @@ sctp_disposition_t sctp_sf_ootb(const struct sctp_endpoint *ep,
3130 return sctp_sf_pdiscard(ep, asoc, type, arg, commands); 3130 return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
3131 3131
3132 ch = (sctp_chunkhdr_t *) ch_end; 3132 ch = (sctp_chunkhdr_t *) ch_end;
3133 } while (ch_end < skb->tail); 3133 } while (ch_end < skb_tail_pointer(skb));
3134 3134
3135 if (ootb_shut_ack) 3135 if (ootb_shut_ack)
3136 sctp_sf_shut_8_4_5(ep, asoc, type, arg, commands); 3136 sctp_sf_shut_8_4_5(ep, asoc, type, arg, commands);
@@ -4816,7 +4816,7 @@ sctp_disposition_t sctp_sf_t2_timer_expire(const struct sctp_endpoint *ep,
4816 default: 4816 default:
4817 BUG(); 4817 BUG();
4818 break; 4818 break;
4819 }; 4819 }
4820 4820
4821 if (!reply) 4821 if (!reply)
4822 goto nomem; 4822 goto nomem;
@@ -5286,7 +5286,7 @@ static int sctp_eat_data(const struct sctp_association *asoc,
5286 chunk->ecn_ce_done = 1; 5286 chunk->ecn_ce_done = 1;
5287 5287
5288 af = sctp_get_af_specific( 5288 af = sctp_get_af_specific(
5289 ipver2af(chunk->skb->nh.iph->version)); 5289 ipver2af(ip_hdr(chunk->skb)->version));
5290 5290
5291 if (af && af->is_ce(chunk->skb) && asoc->peer.ecn_capable) { 5291 if (af && af->is_ce(chunk->skb) && asoc->peer.ecn_capable) {
5292 /* Do real work as sideffect. */ 5292 /* Do real work as sideffect. */
diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c
index 5e54b17377f4..523071c7902f 100644
--- a/net/sctp/sm_statetable.c
+++ b/net/sctp/sm_statetable.c
@@ -101,7 +101,7 @@ const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type,
101 default: 101 default:
102 /* Yikes! We got an illegal event type. */ 102 /* Yikes! We got an illegal event type. */
103 return &bug; 103 return &bug;
104 }; 104 }
105} 105}
106 106
107#define TYPE_SCTP_FUNC(func) {.fn = func, .name = #func} 107#define TYPE_SCTP_FUNC(func) {.fn = func, .name = #func}
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index a1d026f12b0e..11938fb20395 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -941,7 +941,7 @@ SCTP_STATIC int sctp_setsockopt_bindx(struct sock* sk,
941 default: 941 default:
942 err = -EINVAL; 942 err = -EINVAL;
943 break; 943 break;
944 }; 944 }
945 945
946out: 946out:
947 kfree(kaddrs); 947 kfree(kaddrs);
@@ -2039,6 +2039,10 @@ static int sctp_setsockopt_autoclose(struct sock *sk, char __user *optval,
2039 * SPP_HB_DEMAND - Request a user initiated heartbeat 2039 * SPP_HB_DEMAND - Request a user initiated heartbeat
2040 * to be made immediately. 2040 * to be made immediately.
2041 * 2041 *
2042 * SPP_HB_TIME_IS_ZERO - Specify's that the time for
2043 * heartbeat delayis to be set to the value of 0
2044 * milliseconds.
2045 *
2042 * SPP_PMTUD_ENABLE - This field will enable PMTU 2046 * SPP_PMTUD_ENABLE - This field will enable PMTU
2043 * discovery upon the specified address. Note that 2047 * discovery upon the specified address. Note that
2044 * if the address feild is empty then all addresses 2048 * if the address feild is empty then all addresses
@@ -2081,13 +2085,30 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
2081 return error; 2085 return error;
2082 } 2086 }
2083 2087
2084 if (params->spp_hbinterval) { 2088 /* Note that unless the spp_flag is set to SPP_HB_ENABLE the value of
2085 if (trans) { 2089 * this field is ignored. Note also that a value of zero indicates
2086 trans->hbinterval = msecs_to_jiffies(params->spp_hbinterval); 2090 * the current setting should be left unchanged.
2087 } else if (asoc) { 2091 */
2088 asoc->hbinterval = msecs_to_jiffies(params->spp_hbinterval); 2092 if (params->spp_flags & SPP_HB_ENABLE) {
2089 } else { 2093
2090 sp->hbinterval = params->spp_hbinterval; 2094 /* Re-zero the interval if the SPP_HB_TIME_IS_ZERO is
2095 * set. This lets us use 0 value when this flag
2096 * is set.
2097 */
2098 if (params->spp_flags & SPP_HB_TIME_IS_ZERO)
2099 params->spp_hbinterval = 0;
2100
2101 if (params->spp_hbinterval ||
2102 (params->spp_flags & SPP_HB_TIME_IS_ZERO)) {
2103 if (trans) {
2104 trans->hbinterval =
2105 msecs_to_jiffies(params->spp_hbinterval);
2106 } else if (asoc) {
2107 asoc->hbinterval =
2108 msecs_to_jiffies(params->spp_hbinterval);
2109 } else {
2110 sp->hbinterval = params->spp_hbinterval;
2111 }
2091 } 2112 }
2092 } 2113 }
2093 2114
@@ -2104,7 +2125,12 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
2104 } 2125 }
2105 } 2126 }
2106 2127
2107 if (params->spp_pathmtu) { 2128 /* When Path MTU discovery is disabled the value specified here will
2129 * be the "fixed" path mtu (i.e. the value of the spp_flags field must
2130 * include the flag SPP_PMTUD_DISABLE for this field to have any
2131 * effect).
2132 */
2133 if ((params->spp_flags & SPP_PMTUD_DISABLE) && params->spp_pathmtu) {
2108 if (trans) { 2134 if (trans) {
2109 trans->pathmtu = params->spp_pathmtu; 2135 trans->pathmtu = params->spp_pathmtu;
2110 sctp_assoc_sync_pmtu(asoc); 2136 sctp_assoc_sync_pmtu(asoc);
@@ -2135,7 +2161,11 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
2135 } 2161 }
2136 } 2162 }
2137 2163
2138 if (params->spp_sackdelay) { 2164 /* Note that unless the spp_flag is set to SPP_SACKDELAY_ENABLE the
2165 * value of this field is ignored. Note also that a value of zero
2166 * indicates the current setting should be left unchanged.
2167 */
2168 if ((params->spp_flags & SPP_SACKDELAY_ENABLE) && params->spp_sackdelay) {
2139 if (trans) { 2169 if (trans) {
2140 trans->sackdelay = 2170 trans->sackdelay =
2141 msecs_to_jiffies(params->spp_sackdelay); 2171 msecs_to_jiffies(params->spp_sackdelay);
@@ -2163,7 +2193,11 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
2163 } 2193 }
2164 } 2194 }
2165 2195
2166 if (params->spp_pathmaxrxt) { 2196 /* Note that unless the spp_flag is set to SPP_PMTUD_ENABLE the value
2197 * of this field is ignored. Note also that a value of zero
2198 * indicates the current setting should be left unchanged.
2199 */
2200 if ((params->spp_flags & SPP_PMTUD_ENABLE) && params->spp_pathmaxrxt) {
2167 if (trans) { 2201 if (trans) {
2168 trans->pathmaxrxt = params->spp_pathmaxrxt; 2202 trans->pathmaxrxt = params->spp_pathmaxrxt;
2169 } else if (asoc) { 2203 } else if (asoc) {
@@ -2255,7 +2289,7 @@ static int sctp_setsockopt_peer_addr_params(struct sock *sk,
2255 return 0; 2289 return 0;
2256} 2290}
2257 2291
2258/* 7.1.24. Delayed Ack Timer (SCTP_DELAYED_ACK_TIME) 2292/* 7.1.23. Delayed Ack Timer (SCTP_DELAYED_ACK_TIME)
2259 * 2293 *
2260 * This options will get or set the delayed ack timer. The time is set 2294 * This options will get or set the delayed ack timer. The time is set
2261 * in milliseconds. If the assoc_id is 0, then this sets or gets the 2295 * in milliseconds. If the assoc_id is 0, then this sets or gets the
@@ -2792,6 +2826,102 @@ static int sctp_setsockopt_context(struct sock *sk, char __user *optval,
2792 return 0; 2826 return 0;
2793} 2827}
2794 2828
2829/*
2830 * 7.1.24. Get or set fragmented interleave (SCTP_FRAGMENT_INTERLEAVE)
2831 *
2832 * This options will at a minimum specify if the implementation is doing
2833 * fragmented interleave. Fragmented interleave, for a one to many
2834 * socket, is when subsequent calls to receive a message may return
2835 * parts of messages from different associations. Some implementations
2836 * may allow you to turn this value on or off. If so, when turned off,
2837 * no fragment interleave will occur (which will cause a head of line
2838 * blocking amongst multiple associations sharing the same one to many
2839 * socket). When this option is turned on, then each receive call may
2840 * come from a different association (thus the user must receive data
2841 * with the extended calls (e.g. sctp_recvmsg) to keep track of which
2842 * association each receive belongs to.
2843 *
2844 * This option takes a boolean value. A non-zero value indicates that
2845 * fragmented interleave is on. A value of zero indicates that
2846 * fragmented interleave is off.
2847 *
2848 * Note that it is important that an implementation that allows this
2849 * option to be turned on, have it off by default. Otherwise an unaware
2850 * application using the one to many model may become confused and act
2851 * incorrectly.
2852 */
2853static int sctp_setsockopt_fragment_interleave(struct sock *sk,
2854 char __user *optval,
2855 int optlen)
2856{
2857 int val;
2858
2859 if (optlen != sizeof(int))
2860 return -EINVAL;
2861 if (get_user(val, (int __user *)optval))
2862 return -EFAULT;
2863
2864 sctp_sk(sk)->frag_interleave = (val == 0) ? 0 : 1;
2865
2866 return 0;
2867}
2868
2869/*
2870 * 7.1.25. Set or Get the sctp partial delivery point
2871 * (SCTP_PARTIAL_DELIVERY_POINT)
2872 * This option will set or get the SCTP partial delivery point. This
2873 * point is the size of a message where the partial delivery API will be
2874 * invoked to help free up rwnd space for the peer. Setting this to a
2875 * lower value will cause partial delivery's to happen more often. The
2876 * calls argument is an integer that sets or gets the partial delivery
2877 * point.
2878 */
2879static int sctp_setsockopt_partial_delivery_point(struct sock *sk,
2880 char __user *optval,
2881 int optlen)
2882{
2883 u32 val;
2884
2885 if (optlen != sizeof(u32))
2886 return -EINVAL;
2887 if (get_user(val, (int __user *)optval))
2888 return -EFAULT;
2889
2890 sctp_sk(sk)->pd_point = val;
2891
2892 return 0; /* is this the right error code? */
2893}
2894
2895/*
2896 * 7.1.28. Set or Get the maximum burst (SCTP_MAX_BURST)
2897 *
2898 * This option will allow a user to change the maximum burst of packets
2899 * that can be emitted by this association. Note that the default value
2900 * is 4, and some implementations may restrict this setting so that it
2901 * can only be lowered.
2902 *
2903 * NOTE: This text doesn't seem right. Do this on a socket basis with
2904 * future associations inheriting the socket value.
2905 */
2906static int sctp_setsockopt_maxburst(struct sock *sk,
2907 char __user *optval,
2908 int optlen)
2909{
2910 int val;
2911
2912 if (optlen != sizeof(int))
2913 return -EINVAL;
2914 if (get_user(val, (int __user *)optval))
2915 return -EFAULT;
2916
2917 if (val < 0)
2918 return -EINVAL;
2919
2920 sctp_sk(sk)->max_burst = val;
2921
2922 return 0;
2923}
2924
2795/* API 6.2 setsockopt(), getsockopt() 2925/* API 6.2 setsockopt(), getsockopt()
2796 * 2926 *
2797 * Applications use setsockopt() and getsockopt() to set or retrieve 2927 * Applications use setsockopt() and getsockopt() to set or retrieve
@@ -2871,6 +3001,9 @@ SCTP_STATIC int sctp_setsockopt(struct sock *sk, int level, int optname,
2871 case SCTP_DELAYED_ACK_TIME: 3001 case SCTP_DELAYED_ACK_TIME:
2872 retval = sctp_setsockopt_delayed_ack_time(sk, optval, optlen); 3002 retval = sctp_setsockopt_delayed_ack_time(sk, optval, optlen);
2873 break; 3003 break;
3004 case SCTP_PARTIAL_DELIVERY_POINT:
3005 retval = sctp_setsockopt_partial_delivery_point(sk, optval, optlen);
3006 break;
2874 3007
2875 case SCTP_INITMSG: 3008 case SCTP_INITMSG:
2876 retval = sctp_setsockopt_initmsg(sk, optval, optlen); 3009 retval = sctp_setsockopt_initmsg(sk, optval, optlen);
@@ -2906,11 +3039,16 @@ SCTP_STATIC int sctp_setsockopt(struct sock *sk, int level, int optname,
2906 case SCTP_CONTEXT: 3039 case SCTP_CONTEXT:
2907 retval = sctp_setsockopt_context(sk, optval, optlen); 3040 retval = sctp_setsockopt_context(sk, optval, optlen);
2908 break; 3041 break;
2909 3042 case SCTP_FRAGMENT_INTERLEAVE:
3043 retval = sctp_setsockopt_fragment_interleave(sk, optval, optlen);
3044 break;
3045 case SCTP_MAX_BURST:
3046 retval = sctp_setsockopt_maxburst(sk, optval, optlen);
3047 break;
2910 default: 3048 default:
2911 retval = -ENOPROTOOPT; 3049 retval = -ENOPROTOOPT;
2912 break; 3050 break;
2913 }; 3051 }
2914 3052
2915 sctp_release_sock(sk); 3053 sctp_release_sock(sk);
2916 3054
@@ -3066,6 +3204,7 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
3066 sp->default_timetolive = 0; 3204 sp->default_timetolive = 0;
3067 3205
3068 sp->default_rcv_context = 0; 3206 sp->default_rcv_context = 0;
3207 sp->max_burst = sctp_max_burst;
3069 3208
3070 /* Initialize default setup parameters. These parameters 3209 /* Initialize default setup parameters. These parameters
3071 * can be modified with the SCTP_INITMSG socket option or 3210 * can be modified with the SCTP_INITMSG socket option or
@@ -3134,8 +3273,9 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
3134 sp->pf = sctp_get_pf_specific(sk->sk_family); 3273 sp->pf = sctp_get_pf_specific(sk->sk_family);
3135 3274
3136 /* Control variables for partial data delivery. */ 3275 /* Control variables for partial data delivery. */
3137 sp->pd_mode = 0; 3276 atomic_set(&sp->pd_mode, 0);
3138 skb_queue_head_init(&sp->pd_lobby); 3277 skb_queue_head_init(&sp->pd_lobby);
3278 sp->frag_interleave = 0;
3139 3279
3140 /* Create a per socket endpoint structure. Even if we 3280 /* Create a per socket endpoint structure. Even if we
3141 * change the data structure relationships, this may still 3281 * change the data structure relationships, this may still
@@ -3642,7 +3782,7 @@ static int sctp_getsockopt_peer_addr_params(struct sock *sk, int len,
3642 return 0; 3782 return 0;
3643} 3783}
3644 3784
3645/* 7.1.24. Delayed Ack Timer (SCTP_DELAYED_ACK_TIME) 3785/* 7.1.23. Delayed Ack Timer (SCTP_DELAYED_ACK_TIME)
3646 * 3786 *
3647 * This options will get or set the delayed ack timer. The time is set 3787 * This options will get or set the delayed ack timer. The time is set
3648 * in milliseconds. If the assoc_id is 0, then this sets or gets the 3788 * in milliseconds. If the assoc_id is 0, then this sets or gets the
@@ -4536,6 +4676,77 @@ static int sctp_getsockopt_maxseg(struct sock *sk, int len,
4536 return 0; 4676 return 0;
4537} 4677}
4538 4678
4679/*
4680 * 7.1.24. Get or set fragmented interleave (SCTP_FRAGMENT_INTERLEAVE)
4681 * (chapter and verse is quoted at sctp_setsockopt_fragment_interleave())
4682 */
4683static int sctp_getsockopt_fragment_interleave(struct sock *sk, int len,
4684 char __user *optval, int __user *optlen)
4685{
4686 int val;
4687
4688 if (len < sizeof(int))
4689 return -EINVAL;
4690
4691 len = sizeof(int);
4692
4693 val = sctp_sk(sk)->frag_interleave;
4694 if (put_user(len, optlen))
4695 return -EFAULT;
4696 if (copy_to_user(optval, &val, len))
4697 return -EFAULT;
4698
4699 return 0;
4700}
4701
4702/*
4703 * 7.1.25. Set or Get the sctp partial delivery point
4704 * (chapter and verse is quoted at sctp_setsockopt_partial_delivery_point())
4705 */
4706static int sctp_getsockopt_partial_delivery_point(struct sock *sk, int len,
4707 char __user *optval,
4708 int __user *optlen)
4709{
4710 u32 val;
4711
4712 if (len < sizeof(u32))
4713 return -EINVAL;
4714
4715 len = sizeof(u32);
4716
4717 val = sctp_sk(sk)->pd_point;
4718 if (put_user(len, optlen))
4719 return -EFAULT;
4720 if (copy_to_user(optval, &val, len))
4721 return -EFAULT;
4722
4723 return -ENOTSUPP;
4724}
4725
4726/*
4727 * 7.1.28. Set or Get the maximum burst (SCTP_MAX_BURST)
4728 * (chapter and verse is quoted at sctp_setsockopt_maxburst())
4729 */
4730static int sctp_getsockopt_maxburst(struct sock *sk, int len,
4731 char __user *optval,
4732 int __user *optlen)
4733{
4734 int val;
4735
4736 if (len < sizeof(int))
4737 return -EINVAL;
4738
4739 len = sizeof(int);
4740
4741 val = sctp_sk(sk)->max_burst;
4742 if (put_user(len, optlen))
4743 return -EFAULT;
4744 if (copy_to_user(optval, &val, len))
4745 return -EFAULT;
4746
4747 return -ENOTSUPP;
4748}
4749
4539SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname, 4750SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname,
4540 char __user *optval, int __user *optlen) 4751 char __user *optval, int __user *optlen)
4541{ 4752{
@@ -4648,10 +4859,21 @@ SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname,
4648 case SCTP_CONTEXT: 4859 case SCTP_CONTEXT:
4649 retval = sctp_getsockopt_context(sk, len, optval, optlen); 4860 retval = sctp_getsockopt_context(sk, len, optval, optlen);
4650 break; 4861 break;
4862 case SCTP_FRAGMENT_INTERLEAVE:
4863 retval = sctp_getsockopt_fragment_interleave(sk, len, optval,
4864 optlen);
4865 break;
4866 case SCTP_PARTIAL_DELIVERY_POINT:
4867 retval = sctp_getsockopt_partial_delivery_point(sk, len, optval,
4868 optlen);
4869 break;
4870 case SCTP_MAX_BURST:
4871 retval = sctp_getsockopt_maxburst(sk, len, optval, optlen);
4872 break;
4651 default: 4873 default:
4652 retval = -ENOPROTOOPT; 4874 retval = -ENOPROTOOPT;
4653 break; 4875 break;
4654 }; 4876 }
4655 4877
4656 sctp_release_sock(sk); 4878 sctp_release_sock(sk);
4657 return retval; 4879 return retval;
@@ -4976,7 +5198,8 @@ int sctp_inet_listen(struct socket *sock, int backlog)
4976 break; 5198 break;
4977 default: 5199 default:
4978 break; 5200 break;
4979 }; 5201 }
5202
4980 if (err) 5203 if (err)
4981 goto cleanup; 5204 goto cleanup;
4982 5205
@@ -5239,7 +5462,7 @@ SCTP_STATIC int sctp_msghdr_parse(const struct msghdr *msg,
5239 5462
5240 default: 5463 default:
5241 return -EINVAL; 5464 return -EINVAL;
5242 }; 5465 }
5243 } 5466 }
5244 return 0; 5467 return 0;
5245} 5468}
@@ -5742,9 +5965,9 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
5742 * 3) Peeling off non-partial delivery; move pd_lobby to receive_queue. 5965 * 3) Peeling off non-partial delivery; move pd_lobby to receive_queue.
5743 */ 5966 */
5744 skb_queue_head_init(&newsp->pd_lobby); 5967 skb_queue_head_init(&newsp->pd_lobby);
5745 sctp_sk(newsk)->pd_mode = assoc->ulpq.pd_mode; 5968 atomic_set(&sctp_sk(newsk)->pd_mode, assoc->ulpq.pd_mode);
5746 5969
5747 if (sctp_sk(oldsk)->pd_mode) { 5970 if (atomic_read(&sctp_sk(oldsk)->pd_mode)) {
5748 struct sk_buff_head *queue; 5971 struct sk_buff_head *queue;
5749 5972
5750 /* Decide which queue to move pd_lobby skbs to. */ 5973 /* Decide which queue to move pd_lobby skbs to. */
@@ -5770,7 +5993,7 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
5770 * delivery to finish. 5993 * delivery to finish.
5771 */ 5994 */
5772 if (assoc->ulpq.pd_mode) 5995 if (assoc->ulpq.pd_mode)
5773 sctp_clear_pd(oldsk); 5996 sctp_clear_pd(oldsk, NULL);
5774 5997
5775 } 5998 }
5776 5999
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 4d8c2ab864fc..961df275d5b9 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -507,7 +507,7 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport,
507 transport->cwnd = max(transport->cwnd/2, 507 transport->cwnd = max(transport->cwnd/2,
508 4*transport->asoc->pathmtu); 508 4*transport->asoc->pathmtu);
509 break; 509 break;
510 }; 510 }
511 511
512 transport->partial_bytes_acked = 0; 512 transport->partial_bytes_acked = 0;
513 SCTP_DEBUG_PRINTK("%s: transport: %p reason: %d cwnd: " 513 SCTP_DEBUG_PRINTK("%s: transport: %p reason: %d cwnd: "
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index 2e11bc8d5d35..661ea2dd78ba 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -131,19 +131,54 @@ static inline void sctp_ulpevent_release_owner(struct sctp_ulpevent *event)
131struct sctp_ulpevent *sctp_ulpevent_make_assoc_change( 131struct sctp_ulpevent *sctp_ulpevent_make_assoc_change(
132 const struct sctp_association *asoc, 132 const struct sctp_association *asoc,
133 __u16 flags, __u16 state, __u16 error, __u16 outbound, 133 __u16 flags, __u16 state, __u16 error, __u16 outbound,
134 __u16 inbound, gfp_t gfp) 134 __u16 inbound, struct sctp_chunk *chunk, gfp_t gfp)
135{ 135{
136 struct sctp_ulpevent *event; 136 struct sctp_ulpevent *event;
137 struct sctp_assoc_change *sac; 137 struct sctp_assoc_change *sac;
138 struct sk_buff *skb; 138 struct sk_buff *skb;
139 139
140 event = sctp_ulpevent_new(sizeof(struct sctp_assoc_change), 140 /* If the lower layer passed in the chunk, it will be
141 * an ABORT, so we need to include it in the sac_info.
142 */
143 if (chunk) {
144 /* sctp_inqu_pop() has allready pulled off the chunk
145 * header. We need to put it back temporarily
146 */
147 skb_push(chunk->skb, sizeof(sctp_chunkhdr_t));
148
149 /* Copy the chunk data to a new skb and reserve enough
150 * head room to use as notification.
151 */
152 skb = skb_copy_expand(chunk->skb,
153 sizeof(struct sctp_assoc_change), 0, gfp);
154
155 if (!skb)
156 goto fail;
157
158 /* put back the chunk header now that we have a copy */
159 skb_pull(chunk->skb, sizeof(sctp_chunkhdr_t));
160
161 /* Embed the event fields inside the cloned skb. */
162 event = sctp_skb2event(skb);
163 sctp_ulpevent_init(event, MSG_NOTIFICATION, skb->truesize);
164
165 /* Include the notification structure */
166 sac = (struct sctp_assoc_change *)
167 skb_push(skb, sizeof(struct sctp_assoc_change));
168
169 /* Trim the buffer to the right length. */
170 skb_trim(skb, sizeof(struct sctp_assoc_change) +
171 ntohs(chunk->chunk_hdr->length));
172 } else {
173 event = sctp_ulpevent_new(sizeof(struct sctp_assoc_change),
141 MSG_NOTIFICATION, gfp); 174 MSG_NOTIFICATION, gfp);
142 if (!event) 175 if (!event)
143 goto fail; 176 goto fail;
144 skb = sctp_event2skb(event); 177
145 sac = (struct sctp_assoc_change *) 178 skb = sctp_event2skb(event);
146 skb_put(skb, sizeof(struct sctp_assoc_change)); 179 sac = (struct sctp_assoc_change *) skb_put(skb,
180 sizeof(struct sctp_assoc_change));
181 }
147 182
148 /* Socket Extensions for SCTP 183 /* Socket Extensions for SCTP
149 * 5.3.1.1 SCTP_ASSOC_CHANGE 184 * 5.3.1.1 SCTP_ASSOC_CHANGE
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index b29e3e4b72c9..34eb977a204d 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -138,26 +138,59 @@ int sctp_ulpq_tail_data(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
138/* Clear the partial delivery mode for this socket. Note: This 138/* Clear the partial delivery mode for this socket. Note: This
139 * assumes that no association is currently in partial delivery mode. 139 * assumes that no association is currently in partial delivery mode.
140 */ 140 */
141int sctp_clear_pd(struct sock *sk) 141int sctp_clear_pd(struct sock *sk, struct sctp_association *asoc)
142{ 142{
143 struct sctp_sock *sp = sctp_sk(sk); 143 struct sctp_sock *sp = sctp_sk(sk);
144 144
145 sp->pd_mode = 0; 145 if (atomic_dec_and_test(&sp->pd_mode)) {
146 if (!skb_queue_empty(&sp->pd_lobby)) { 146 /* This means there are no other associations in PD, so
147 struct list_head *list; 147 * we can go ahead and clear out the lobby in one shot
148 sctp_skb_list_tail(&sp->pd_lobby, &sk->sk_receive_queue); 148 */
149 list = (struct list_head *)&sctp_sk(sk)->pd_lobby; 149 if (!skb_queue_empty(&sp->pd_lobby)) {
150 INIT_LIST_HEAD(list); 150 struct list_head *list;
151 return 1; 151 sctp_skb_list_tail(&sp->pd_lobby, &sk->sk_receive_queue);
152 list = (struct list_head *)&sctp_sk(sk)->pd_lobby;
153 INIT_LIST_HEAD(list);
154 return 1;
155 }
156 } else {
157 /* There are other associations in PD, so we only need to
158 * pull stuff out of the lobby that belongs to the
159 * associations that is exiting PD (all of its notifications
160 * are posted here).
161 */
162 if (!skb_queue_empty(&sp->pd_lobby) && asoc) {
163 struct sk_buff *skb, *tmp;
164 struct sctp_ulpevent *event;
165
166 sctp_skb_for_each(skb, &sp->pd_lobby, tmp) {
167 event = sctp_skb2event(skb);
168 if (event->asoc == asoc) {
169 __skb_unlink(skb, &sp->pd_lobby);
170 __skb_queue_tail(&sk->sk_receive_queue,
171 skb);
172 }
173 }
174 }
152 } 175 }
176
153 return 0; 177 return 0;
154} 178}
155 179
180/* Set the pd_mode on the socket and ulpq */
181static void sctp_ulpq_set_pd(struct sctp_ulpq *ulpq)
182{
183 struct sctp_sock *sp = sctp_sk(ulpq->asoc->base.sk);
184
185 atomic_inc(&sp->pd_mode);
186 ulpq->pd_mode = 1;
187}
188
156/* Clear the pd_mode and restart any pending messages waiting for delivery. */ 189/* Clear the pd_mode and restart any pending messages waiting for delivery. */
157static int sctp_ulpq_clear_pd(struct sctp_ulpq *ulpq) 190static int sctp_ulpq_clear_pd(struct sctp_ulpq *ulpq)
158{ 191{
159 ulpq->pd_mode = 0; 192 ulpq->pd_mode = 0;
160 return sctp_clear_pd(ulpq->asoc->base.sk); 193 return sctp_clear_pd(ulpq->asoc->base.sk, ulpq->asoc);
161} 194}
162 195
163/* If the SKB of 'event' is on a list, it is the first such member 196/* If the SKB of 'event' is on a list, it is the first such member
@@ -187,25 +220,35 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event)
187 * the association the cause of the partial delivery. 220 * the association the cause of the partial delivery.
188 */ 221 */
189 222
190 if (!sctp_sk(sk)->pd_mode) { 223 if (atomic_read(&sctp_sk(sk)->pd_mode) == 0) {
191 queue = &sk->sk_receive_queue; 224 queue = &sk->sk_receive_queue;
192 } else if (ulpq->pd_mode) { 225 } else {
193 /* If the association is in partial delivery, we 226 if (ulpq->pd_mode) {
194 * need to finish delivering the partially processed 227 /* If the association is in partial delivery, we
195 * packet before passing any other data. This is 228 * need to finish delivering the partially processed
196 * because we don't truly support stream interleaving. 229 * packet before passing any other data. This is
197 */ 230 * because we don't truly support stream interleaving.
198 if ((event->msg_flags & MSG_NOTIFICATION) || 231 */
199 (SCTP_DATA_NOT_FRAG == 232 if ((event->msg_flags & MSG_NOTIFICATION) ||
200 (event->msg_flags & SCTP_DATA_FRAG_MASK))) 233 (SCTP_DATA_NOT_FRAG ==
201 queue = &sctp_sk(sk)->pd_lobby; 234 (event->msg_flags & SCTP_DATA_FRAG_MASK)))
202 else { 235 queue = &sctp_sk(sk)->pd_lobby;
203 clear_pd = event->msg_flags & MSG_EOR; 236 else {
204 queue = &sk->sk_receive_queue; 237 clear_pd = event->msg_flags & MSG_EOR;
238 queue = &sk->sk_receive_queue;
239 }
240 } else {
241 /*
242 * If fragment interleave is enabled, we
243 * can queue this to the recieve queue instead
244 * of the lobby.
245 */
246 if (sctp_sk(sk)->frag_interleave)
247 queue = &sk->sk_receive_queue;
248 else
249 queue = &sctp_sk(sk)->pd_lobby;
205 } 250 }
206 } else 251 }
207 queue = &sctp_sk(sk)->pd_lobby;
208
209 252
210 /* If we are harvesting multiple skbs they will be 253 /* If we are harvesting multiple skbs they will be
211 * collected on a list. 254 * collected on a list.
@@ -348,7 +391,7 @@ static struct sctp_ulpevent *sctp_make_reassembled_event(struct sk_buff_head *qu
348 break; 391 break;
349 pos->next = pnext; 392 pos->next = pnext;
350 pos = pnext; 393 pos = pnext;
351 }; 394 }
352 395
353 event = sctp_skb2event(f_frag); 396 event = sctp_skb2event(f_frag);
354 SCTP_INC_STATS(SCTP_MIB_REASMUSRMSGS); 397 SCTP_INC_STATS(SCTP_MIB_REASMUSRMSGS);
@@ -367,6 +410,11 @@ static inline struct sctp_ulpevent *sctp_ulpq_retrieve_reassembled(struct sctp_u
367 struct sk_buff *first_frag = NULL; 410 struct sk_buff *first_frag = NULL;
368 __u32 ctsn, next_tsn; 411 __u32 ctsn, next_tsn;
369 struct sctp_ulpevent *retval = NULL; 412 struct sctp_ulpevent *retval = NULL;
413 struct sk_buff *pd_first = NULL;
414 struct sk_buff *pd_last = NULL;
415 size_t pd_len = 0;
416 struct sctp_association *asoc;
417 u32 pd_point;
370 418
371 /* Initialized to 0 just to avoid compiler warning message. Will 419 /* Initialized to 0 just to avoid compiler warning message. Will
372 * never be used with this value. It is referenced only after it 420 * never be used with this value. It is referenced only after it
@@ -382,6 +430,10 @@ static inline struct sctp_ulpevent *sctp_ulpq_retrieve_reassembled(struct sctp_u
382 * we expect to find the remaining middle fragments and the last 430 * we expect to find the remaining middle fragments and the last
383 * fragment in order. If not, first_frag is reset to NULL and we 431 * fragment in order. If not, first_frag is reset to NULL and we
384 * start the next pass when we find another first fragment. 432 * start the next pass when we find another first fragment.
433 *
434 * There is a potential to do partial delivery if user sets
435 * SCTP_PARTIAL_DELIVERY_POINT option. Lets count some things here
436 * to see if can do PD.
385 */ 437 */
386 skb_queue_walk(&ulpq->reasm, pos) { 438 skb_queue_walk(&ulpq->reasm, pos) {
387 cevent = sctp_skb2event(pos); 439 cevent = sctp_skb2event(pos);
@@ -389,14 +441,32 @@ static inline struct sctp_ulpevent *sctp_ulpq_retrieve_reassembled(struct sctp_u
389 441
390 switch (cevent->msg_flags & SCTP_DATA_FRAG_MASK) { 442 switch (cevent->msg_flags & SCTP_DATA_FRAG_MASK) {
391 case SCTP_DATA_FIRST_FRAG: 443 case SCTP_DATA_FIRST_FRAG:
444 /* If this "FIRST_FRAG" is the first
445 * element in the queue, then count it towards
446 * possible PD.
447 */
448 if (pos == ulpq->reasm.next) {
449 pd_first = pos;
450 pd_last = pos;
451 pd_len = pos->len;
452 } else {
453 pd_first = NULL;
454 pd_last = NULL;
455 pd_len = 0;
456 }
457
392 first_frag = pos; 458 first_frag = pos;
393 next_tsn = ctsn + 1; 459 next_tsn = ctsn + 1;
394 break; 460 break;
395 461
396 case SCTP_DATA_MIDDLE_FRAG: 462 case SCTP_DATA_MIDDLE_FRAG:
397 if ((first_frag) && (ctsn == next_tsn)) 463 if ((first_frag) && (ctsn == next_tsn)) {
398 next_tsn++; 464 next_tsn++;
399 else 465 if (pd_first) {
466 pd_last = pos;
467 pd_len += pos->len;
468 }
469 } else
400 first_frag = NULL; 470 first_frag = NULL;
401 break; 471 break;
402 472
@@ -406,8 +476,29 @@ static inline struct sctp_ulpevent *sctp_ulpq_retrieve_reassembled(struct sctp_u
406 else 476 else
407 first_frag = NULL; 477 first_frag = NULL;
408 break; 478 break;
409 }; 479 }
480 }
410 481
482 asoc = ulpq->asoc;
483 if (pd_first) {
484 /* Make sure we can enter partial deliver.
485 * We can trigger partial delivery only if framgent
486 * interleave is set, or the socket is not already
487 * in partial delivery.
488 */
489 if (!sctp_sk(asoc->base.sk)->frag_interleave &&
490 atomic_read(&sctp_sk(asoc->base.sk)->pd_mode))
491 goto done;
492
493 cevent = sctp_skb2event(pd_first);
494 pd_point = sctp_sk(asoc->base.sk)->pd_point;
495 if (pd_point && pd_point <= pd_len) {
496 retval = sctp_make_reassembled_event(&ulpq->reasm,
497 pd_first,
498 pd_last);
499 if (retval)
500 sctp_ulpq_set_pd(ulpq);
501 }
411 } 502 }
412done: 503done:
413 return retval; 504 return retval;
@@ -465,7 +556,7 @@ static inline struct sctp_ulpevent *sctp_ulpq_retrieve_partial(struct sctp_ulpq
465 goto done; 556 goto done;
466 default: 557 default:
467 return NULL; 558 return NULL;
468 }; 559 }
469 } 560 }
470 561
471 /* We have the reassembled event. There is no need to look 562 /* We have the reassembled event. There is no need to look
@@ -557,7 +648,7 @@ static inline struct sctp_ulpevent *sctp_ulpq_retrieve_first(struct sctp_ulpq *u
557 break; 648 break;
558 default: 649 default:
559 return NULL; 650 return NULL;
560 }; 651 }
561 } 652 }
562 653
563 /* We have the reassembled event. There is no need to look 654 /* We have the reassembled event. There is no need to look
@@ -826,19 +917,29 @@ void sctp_ulpq_partial_delivery(struct sctp_ulpq *ulpq,
826{ 917{
827 struct sctp_ulpevent *event; 918 struct sctp_ulpevent *event;
828 struct sctp_association *asoc; 919 struct sctp_association *asoc;
920 struct sctp_sock *sp;
829 921
830 asoc = ulpq->asoc; 922 asoc = ulpq->asoc;
923 sp = sctp_sk(asoc->base.sk);
831 924
832 /* Are we already in partial delivery mode? */ 925 /* If the association is already in Partial Delivery mode
833 if (!sctp_sk(asoc->base.sk)->pd_mode) { 926 * we have noting to do.
927 */
928 if (ulpq->pd_mode)
929 return;
834 930
931 /* If the user enabled fragment interleave socket option,
932 * multiple associations can enter partial delivery.
933 * Otherwise, we can only enter partial delivery if the
934 * socket is not in partial deliver mode.
935 */
936 if (sp->frag_interleave || atomic_read(&sp->pd_mode) == 0) {
835 /* Is partial delivery possible? */ 937 /* Is partial delivery possible? */
836 event = sctp_ulpq_retrieve_first(ulpq); 938 event = sctp_ulpq_retrieve_first(ulpq);
837 /* Send event to the ULP. */ 939 /* Send event to the ULP. */
838 if (event) { 940 if (event) {
839 sctp_ulpq_tail_event(ulpq, event); 941 sctp_ulpq_tail_event(ulpq, event);
840 sctp_sk(asoc->base.sk)->pd_mode = 1; 942 sctp_ulpq_set_pd(ulpq);
841 ulpq->pd_mode = 1;
842 return; 943 return;
843 } 944 }
844 } 945 }
diff --git a/net/socket.c b/net/socket.c
index ea8f81abc45c..1ad62c08377b 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -585,6 +585,37 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
585 return result; 585 return result;
586} 586}
587 587
588/*
589 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
590 */
591void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
592 struct sk_buff *skb)
593{
594 ktime_t kt = skb->tstamp;
595
596 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
597 struct timeval tv;
598 /* Race occurred between timestamp enabling and packet
599 receiving. Fill in the current time for now. */
600 if (kt.tv64 == 0)
601 kt = ktime_get_real();
602 skb->tstamp = kt;
603 tv = ktime_to_timeval(kt);
604 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, sizeof(tv), &tv);
605 } else {
606 struct timespec ts;
607 /* Race occurred between timestamp enabling and packet
608 receiving. Fill in the current time for now. */
609 if (kt.tv64 == 0)
610 kt = ktime_get_real();
611 skb->tstamp = kt;
612 ts = ktime_to_timespec(kt);
613 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, sizeof(ts), &ts);
614 }
615}
616
617EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
618
588static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, 619static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
589 struct msghdr *msg, size_t size, int flags) 620 struct msghdr *msg, size_t size, int flags)
590{ 621{
@@ -1292,7 +1323,7 @@ asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1292 int err, fput_needed; 1323 int err, fput_needed;
1293 1324
1294 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1325 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1295 if(sock) { 1326 if (sock) {
1296 err = move_addr_to_kernel(umyaddr, addrlen, address); 1327 err = move_addr_to_kernel(umyaddr, addrlen, address);
1297 if (err >= 0) { 1328 if (err >= 0) {
1298 err = security_socket_bind(sock, 1329 err = security_socket_bind(sock,
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index f02f24ae9468..543b085ae2c1 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -1237,20 +1237,12 @@ static int content_open(struct inode *inode, struct file *file)
1237 1237
1238 return res; 1238 return res;
1239} 1239}
1240static int content_release(struct inode *inode, struct file *file)
1241{
1242 struct seq_file *m = (struct seq_file *)file->private_data;
1243 struct handle *han = m->private;
1244 kfree(han);
1245 m->private = NULL;
1246 return seq_release(inode, file);
1247}
1248 1240
1249static const struct file_operations content_file_operations = { 1241static const struct file_operations content_file_operations = {
1250 .open = content_open, 1242 .open = content_open,
1251 .read = seq_read, 1243 .read = seq_read,
1252 .llseek = seq_lseek, 1244 .llseek = seq_lseek,
1253 .release = content_release, 1245 .release = seq_release_private,
1254}; 1246};
1255 1247
1256static ssize_t read_flush(struct file *file, char __user *buf, 1248static ssize_t read_flush(struct file *file, char __user *buf,
diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c
index 634885b0c04d..1d377d1ab7f4 100644
--- a/net/sunrpc/socklib.c
+++ b/net/sunrpc/socklib.c
@@ -154,7 +154,7 @@ int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
154 desc.offset = sizeof(struct udphdr); 154 desc.offset = sizeof(struct udphdr);
155 desc.count = skb->len - desc.offset; 155 desc.count = skb->len - desc.offset;
156 156
157 if (skb->ip_summed == CHECKSUM_UNNECESSARY) 157 if (skb_csum_unnecessary(skb))
158 goto no_checksum; 158 goto no_checksum;
159 159
160 desc.csum = csum_partial(skb->data, desc.offset, skb->csum); 160 desc.csum = csum_partial(skb->data, desc.offset, skb->csum);
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 2772fee93881..22f61aee4824 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -798,16 +798,12 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
798 dprintk("svc: recvfrom returned error %d\n", -err); 798 dprintk("svc: recvfrom returned error %d\n", -err);
799 } 799 }
800 rqstp->rq_addrlen = sizeof(rqstp->rq_addr); 800 rqstp->rq_addrlen = sizeof(rqstp->rq_addr);
801 if (skb->tstamp.off_sec == 0) { 801 if (skb->tstamp.tv64 == 0) {
802 struct timeval tv; 802 skb->tstamp = ktime_get_real();
803
804 tv.tv_sec = xtime.tv_sec;
805 tv.tv_usec = xtime.tv_nsec / NSEC_PER_USEC;
806 skb_set_timestamp(skb, &tv);
807 /* Don't enable netstamp, sunrpc doesn't 803 /* Don't enable netstamp, sunrpc doesn't
808 need that much accuracy */ 804 need that much accuracy */
809 } 805 }
810 skb_get_timestamp(skb, &svsk->sk_sk->sk_stamp); 806 svsk->sk_sk->sk_stamp = skb->tstamp;
811 set_bit(SK_DATA, &svsk->sk_flags); /* there may be more data... */ 807 set_bit(SK_DATA, &svsk->sk_flags); /* there may be more data... */
812 808
813 /* 809 /*
diff --git a/net/tipc/config.c b/net/tipc/config.c
index 14789a82de53..c71337a22d33 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -89,7 +89,7 @@ struct sk_buff *tipc_cfg_reply_alloc(int payload_size)
89int tipc_cfg_append_tlv(struct sk_buff *buf, int tlv_type, 89int tipc_cfg_append_tlv(struct sk_buff *buf, int tlv_type,
90 void *tlv_data, int tlv_data_size) 90 void *tlv_data, int tlv_data_size)
91{ 91{
92 struct tlv_desc *tlv = (struct tlv_desc *)buf->tail; 92 struct tlv_desc *tlv = (struct tlv_desc *)skb_tail_pointer(buf);
93 int new_tlv_space = TLV_SPACE(tlv_data_size); 93 int new_tlv_space = TLV_SPACE(tlv_data_size);
94 94
95 if (skb_tailroom(buf) < new_tlv_space) { 95 if (skb_tailroom(buf) < new_tlv_space) {
diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c
index 9be4839e32c5..67bb29b44d1b 100644
--- a/net/tipc/eth_media.c
+++ b/net/tipc/eth_media.c
@@ -73,7 +73,7 @@ static int send_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr,
73 73
74 clone = skb_clone(buf, GFP_ATOMIC); 74 clone = skb_clone(buf, GFP_ATOMIC);
75 if (clone) { 75 if (clone) {
76 clone->nh.raw = clone->data; 76 skb_reset_network_header(clone);
77 dev = ((struct eth_bearer *)(tb_ptr->usr_handle))->dev; 77 dev = ((struct eth_bearer *)(tb_ptr->usr_handle))->dev;
78 clone->dev = dev; 78 clone->dev = dev;
79 dev->hard_header(clone, dev, ETH_P_TIPC, 79 dev->hard_header(clone, dev, ETH_P_TIPC,
@@ -99,8 +99,8 @@ static int recv_msg(struct sk_buff *buf, struct net_device *dev,
99 99
100 if (likely(eb_ptr->bearer)) { 100 if (likely(eb_ptr->bearer)) {
101 if (likely(!dev->promiscuity) || 101 if (likely(!dev->promiscuity) ||
102 !memcmp(buf->mac.raw,dev->dev_addr,ETH_ALEN) || 102 !memcmp(skb_mac_header(buf), dev->dev_addr, ETH_ALEN) ||
103 !memcmp(buf->mac.raw,dev->broadcast,ETH_ALEN)) { 103 !memcmp(skb_mac_header(buf), dev->broadcast, ETH_ALEN)) {
104 size = msg_size((struct tipc_msg *)buf->data); 104 size = msg_size((struct tipc_msg *)buf->data);
105 skb_trim(buf, size); 105 skb_trim(buf, size);
106 if (likely(buf->len == size)) { 106 if (likely(buf->len == size)) {
@@ -140,7 +140,7 @@ static int enable_bearer(struct tipc_bearer *tb_ptr)
140 return -EDQUOT; 140 return -EDQUOT;
141 if (!eb_ptr->dev) { 141 if (!eb_ptr->dev) {
142 eb_ptr->dev = dev; 142 eb_ptr->dev = dev;
143 eb_ptr->tipc_packet_type.type = __constant_htons(ETH_P_TIPC); 143 eb_ptr->tipc_packet_type.type = htons(ETH_P_TIPC);
144 eb_ptr->tipc_packet_type.dev = dev; 144 eb_ptr->tipc_packet_type.dev = dev;
145 eb_ptr->tipc_packet_type.func = recv_msg; 145 eb_ptr->tipc_packet_type.func = recv_msg;
146 eb_ptr->tipc_packet_type.af_packet_priv = eb_ptr; 146 eb_ptr->tipc_packet_type.af_packet_priv = eb_ptr;
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 71c2f2fd405c..2124f32ef29f 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1001,7 +1001,7 @@ static int link_bundle_buf(struct link *l_ptr,
1001 return 0; 1001 return 0;
1002 1002
1003 skb_put(bundler, pad + size); 1003 skb_put(bundler, pad + size);
1004 memcpy(bundler->data + to_pos, buf->data, size); 1004 skb_copy_to_linear_data_offset(bundler, to_pos, buf->data, size);
1005 msg_set_size(bundler_msg, to_pos + size); 1005 msg_set_size(bundler_msg, to_pos + size);
1006 msg_set_msgcnt(bundler_msg, msg_msgcnt(bundler_msg) + 1); 1006 msg_set_msgcnt(bundler_msg, msg_msgcnt(bundler_msg) + 1);
1007 dbg("Packed msg # %u(%u octets) into pos %u in buf(#%u)\n", 1007 dbg("Packed msg # %u(%u octets) into pos %u in buf(#%u)\n",
@@ -1109,8 +1109,8 @@ int tipc_link_send_buf(struct link *l_ptr, struct sk_buff *buf)
1109 if (bundler) { 1109 if (bundler) {
1110 msg_init(&bundler_hdr, MSG_BUNDLER, OPEN_MSG, 1110 msg_init(&bundler_hdr, MSG_BUNDLER, OPEN_MSG,
1111 TIPC_OK, INT_H_SIZE, l_ptr->addr); 1111 TIPC_OK, INT_H_SIZE, l_ptr->addr);
1112 memcpy(bundler->data, (unchar *)&bundler_hdr, 1112 skb_copy_to_linear_data(bundler, &bundler_hdr,
1113 INT_H_SIZE); 1113 INT_H_SIZE);
1114 skb_trim(bundler, INT_H_SIZE); 1114 skb_trim(bundler, INT_H_SIZE);
1115 link_bundle_buf(l_ptr, bundler, buf); 1115 link_bundle_buf(l_ptr, bundler, buf);
1116 buf = bundler; 1116 buf = bundler;
@@ -1383,9 +1383,9 @@ again:
1383 if (!buf) 1383 if (!buf)
1384 return -ENOMEM; 1384 return -ENOMEM;
1385 buf->next = NULL; 1385 buf->next = NULL;
1386 memcpy(buf->data, (unchar *)&fragm_hdr, INT_H_SIZE); 1386 skb_copy_to_linear_data(buf, &fragm_hdr, INT_H_SIZE);
1387 hsz = msg_hdr_sz(hdr); 1387 hsz = msg_hdr_sz(hdr);
1388 memcpy(buf->data + INT_H_SIZE, (unchar *)hdr, hsz); 1388 skb_copy_to_linear_data_offset(buf, INT_H_SIZE, hdr, hsz);
1389 msg_dbg(buf_msg(buf), ">BUILD>"); 1389 msg_dbg(buf_msg(buf), ">BUILD>");
1390 1390
1391 /* Chop up message: */ 1391 /* Chop up message: */
@@ -1416,8 +1416,8 @@ error:
1416 return -EFAULT; 1416 return -EFAULT;
1417 } 1417 }
1418 } else 1418 } else
1419 memcpy(buf->data + fragm_crs, sect_crs, sz); 1419 skb_copy_to_linear_data_offset(buf, fragm_crs,
1420 1420 sect_crs, sz);
1421 sect_crs += sz; 1421 sect_crs += sz;
1422 sect_rest -= sz; 1422 sect_rest -= sz;
1423 fragm_crs += sz; 1423 fragm_crs += sz;
@@ -1442,7 +1442,7 @@ error:
1442 1442
1443 buf->next = NULL; 1443 buf->next = NULL;
1444 prev->next = buf; 1444 prev->next = buf;
1445 memcpy(buf->data, (unchar *)&fragm_hdr, INT_H_SIZE); 1445 skb_copy_to_linear_data(buf, &fragm_hdr, INT_H_SIZE);
1446 fragm_crs = INT_H_SIZE; 1446 fragm_crs = INT_H_SIZE;
1447 fragm_rest = fragm_sz; 1447 fragm_rest = fragm_sz;
1448 msg_dbg(buf_msg(buf)," >BUILD>"); 1448 msg_dbg(buf_msg(buf)," >BUILD>");
@@ -2130,7 +2130,7 @@ void tipc_link_send_proto_msg(struct link *l_ptr, u32 msg_typ, int probe_msg,
2130 buf = l_ptr->proto_msg_queue; 2130 buf = l_ptr->proto_msg_queue;
2131 if (!buf) 2131 if (!buf)
2132 return; 2132 return;
2133 memcpy(buf->data, (unchar *)msg, sizeof(l_ptr->proto_msg)); 2133 skb_copy_to_linear_data(buf, msg, sizeof(l_ptr->proto_msg));
2134 return; 2134 return;
2135 } 2135 }
2136 msg_set_timestamp(msg, jiffies_to_msecs(jiffies)); 2136 msg_set_timestamp(msg, jiffies_to_msecs(jiffies));
@@ -2143,7 +2143,7 @@ void tipc_link_send_proto_msg(struct link *l_ptr, u32 msg_typ, int probe_msg,
2143 if (!buf) 2143 if (!buf)
2144 return; 2144 return;
2145 2145
2146 memcpy(buf->data, (unchar *)msg, sizeof(l_ptr->proto_msg)); 2146 skb_copy_to_linear_data(buf, msg, sizeof(l_ptr->proto_msg));
2147 msg_set_size(buf_msg(buf), msg_size); 2147 msg_set_size(buf_msg(buf), msg_size);
2148 2148
2149 if (tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr)) { 2149 if (tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr)) {
@@ -2319,8 +2319,8 @@ void tipc_link_tunnel(struct link *l_ptr,
2319 "unable to send tunnel msg\n"); 2319 "unable to send tunnel msg\n");
2320 return; 2320 return;
2321 } 2321 }
2322 memcpy(buf->data, (unchar *)tunnel_hdr, INT_H_SIZE); 2322 skb_copy_to_linear_data(buf, tunnel_hdr, INT_H_SIZE);
2323 memcpy(buf->data + INT_H_SIZE, (unchar *)msg, length); 2323 skb_copy_to_linear_data_offset(buf, INT_H_SIZE, msg, length);
2324 dbg("%c->%c:", l_ptr->b_ptr->net_plane, tunnel->b_ptr->net_plane); 2324 dbg("%c->%c:", l_ptr->b_ptr->net_plane, tunnel->b_ptr->net_plane);
2325 msg_dbg(buf_msg(buf), ">SEND>"); 2325 msg_dbg(buf_msg(buf), ">SEND>");
2326 tipc_link_send_buf(tunnel, buf); 2326 tipc_link_send_buf(tunnel, buf);
@@ -2361,7 +2361,7 @@ void tipc_link_changeover(struct link *l_ptr)
2361 2361
2362 buf = buf_acquire(INT_H_SIZE); 2362 buf = buf_acquire(INT_H_SIZE);
2363 if (buf) { 2363 if (buf) {
2364 memcpy(buf->data, (unchar *)&tunnel_hdr, INT_H_SIZE); 2364 skb_copy_to_linear_data(buf, &tunnel_hdr, INT_H_SIZE);
2365 msg_set_size(&tunnel_hdr, INT_H_SIZE); 2365 msg_set_size(&tunnel_hdr, INT_H_SIZE);
2366 dbg("%c->%c:", l_ptr->b_ptr->net_plane, 2366 dbg("%c->%c:", l_ptr->b_ptr->net_plane,
2367 tunnel->b_ptr->net_plane); 2367 tunnel->b_ptr->net_plane);
@@ -2426,8 +2426,9 @@ void tipc_link_send_duplicate(struct link *l_ptr, struct link *tunnel)
2426 "unable to send duplicate msg\n"); 2426 "unable to send duplicate msg\n");
2427 return; 2427 return;
2428 } 2428 }
2429 memcpy(outbuf->data, (unchar *)&tunnel_hdr, INT_H_SIZE); 2429 skb_copy_to_linear_data(outbuf, &tunnel_hdr, INT_H_SIZE);
2430 memcpy(outbuf->data + INT_H_SIZE, iter->data, length); 2430 skb_copy_to_linear_data_offset(outbuf, INT_H_SIZE, iter->data,
2431 length);
2431 dbg("%c->%c:", l_ptr->b_ptr->net_plane, 2432 dbg("%c->%c:", l_ptr->b_ptr->net_plane,
2432 tunnel->b_ptr->net_plane); 2433 tunnel->b_ptr->net_plane);
2433 msg_dbg(buf_msg(outbuf), ">SEND>"); 2434 msg_dbg(buf_msg(outbuf), ">SEND>");
@@ -2457,7 +2458,7 @@ static struct sk_buff *buf_extract(struct sk_buff *skb, u32 from_pos)
2457 2458
2458 eb = buf_acquire(size); 2459 eb = buf_acquire(size);
2459 if (eb) 2460 if (eb)
2460 memcpy(eb->data, (unchar *)msg, size); 2461 skb_copy_to_linear_data(eb, msg, size);
2461 return eb; 2462 return eb;
2462} 2463}
2463 2464
@@ -2569,7 +2570,7 @@ void tipc_link_recv_bundle(struct sk_buff *buf)
2569 if (obuf == NULL) { 2570 if (obuf == NULL) {
2570 warn("Link unable to unbundle message(s)\n"); 2571 warn("Link unable to unbundle message(s)\n");
2571 break; 2572 break;
2572 }; 2573 }
2573 pos += align(msg_size(buf_msg(obuf))); 2574 pos += align(msg_size(buf_msg(obuf)));
2574 msg_dbg(buf_msg(obuf), " /"); 2575 msg_dbg(buf_msg(obuf), " /");
2575 tipc_net_route_msg(obuf); 2576 tipc_net_route_msg(obuf);
@@ -2631,9 +2632,9 @@ int tipc_link_send_long_buf(struct link *l_ptr, struct sk_buff *buf)
2631 goto exit; 2632 goto exit;
2632 } 2633 }
2633 msg_set_size(&fragm_hdr, fragm_sz + INT_H_SIZE); 2634 msg_set_size(&fragm_hdr, fragm_sz + INT_H_SIZE);
2634 memcpy(fragm->data, (unchar *)&fragm_hdr, INT_H_SIZE); 2635 skb_copy_to_linear_data(fragm, &fragm_hdr, INT_H_SIZE);
2635 memcpy(fragm->data + INT_H_SIZE, crs, fragm_sz); 2636 skb_copy_to_linear_data_offset(fragm, INT_H_SIZE, crs,
2636 2637 fragm_sz);
2637 /* Send queued messages first, if any: */ 2638 /* Send queued messages first, if any: */
2638 2639
2639 l_ptr->stats.sent_fragments++; 2640 l_ptr->stats.sent_fragments++;
@@ -2733,8 +2734,8 @@ int tipc_link_recv_fragment(struct sk_buff **pending, struct sk_buff **fb,
2733 if (pbuf != NULL) { 2734 if (pbuf != NULL) {
2734 pbuf->next = *pending; 2735 pbuf->next = *pending;
2735 *pending = pbuf; 2736 *pending = pbuf;
2736 memcpy(pbuf->data, (unchar *)imsg, msg_data_sz(fragm)); 2737 skb_copy_to_linear_data(pbuf, imsg,
2737 2738 msg_data_sz(fragm));
2738 /* Prepare buffer for subsequent fragments. */ 2739 /* Prepare buffer for subsequent fragments. */
2739 2740
2740 set_long_msg_seqno(pbuf, long_msg_seq_no); 2741 set_long_msg_seqno(pbuf, long_msg_seq_no);
@@ -2750,7 +2751,8 @@ int tipc_link_recv_fragment(struct sk_buff **pending, struct sk_buff **fb,
2750 u32 fsz = get_fragm_size(pbuf); 2751 u32 fsz = get_fragm_size(pbuf);
2751 u32 crs = ((msg_fragm_no(fragm) - 1) * fsz); 2752 u32 crs = ((msg_fragm_no(fragm) - 1) * fsz);
2752 u32 exp_frags = get_expected_frags(pbuf) - 1; 2753 u32 exp_frags = get_expected_frags(pbuf) - 1;
2753 memcpy(pbuf->data + crs, msg_data(fragm), dsz); 2754 skb_copy_to_linear_data_offset(pbuf, crs,
2755 msg_data(fragm), dsz);
2754 buf_discard(fbuf); 2756 buf_discard(fbuf);
2755 2757
2756 /* Is message complete? */ 2758 /* Is message complete? */
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 62d549063604..35d5ba1d4f42 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -1,8 +1,8 @@
1/* 1/*
2 * net/tipc/msg.h: Include file for TIPC message header routines 2 * net/tipc/msg.h: Include file for TIPC message header routines
3 * 3 *
4 * Copyright (c) 2000-2006, Ericsson AB 4 * Copyright (c) 2000-2007, Ericsson AB
5 * Copyright (c) 2005, Wind River Systems 5 * Copyright (c) 2005-2007, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -71,8 +71,11 @@ static inline void msg_set_word(struct tipc_msg *m, u32 w, u32 val)
71static inline void msg_set_bits(struct tipc_msg *m, u32 w, 71static inline void msg_set_bits(struct tipc_msg *m, u32 w,
72 u32 pos, u32 mask, u32 val) 72 u32 pos, u32 mask, u32 val)
73{ 73{
74 u32 word = msg_word(m,w) & ~(mask << pos); 74 val = (val & mask) << pos;
75 msg_set_word(m, w, (word |= (val << pos))); 75 val = htonl(val);
76 mask = htonl(mask << pos);
77 m->hdr[w] &= ~mask;
78 m->hdr[w] |= val;
76} 79}
77 80
78/* 81/*
@@ -786,15 +789,16 @@ static inline int msg_build(struct tipc_msg *hdr,
786 *buf = buf_acquire(sz); 789 *buf = buf_acquire(sz);
787 if (!(*buf)) 790 if (!(*buf))
788 return -ENOMEM; 791 return -ENOMEM;
789 memcpy((*buf)->data, (unchar *)hdr, hsz); 792 skb_copy_to_linear_data(*buf, hdr, hsz);
790 for (res = 1, cnt = 0; res && (cnt < num_sect); cnt++) { 793 for (res = 1, cnt = 0; res && (cnt < num_sect); cnt++) {
791 if (likely(usrmem)) 794 if (likely(usrmem))
792 res = !copy_from_user((*buf)->data + pos, 795 res = !copy_from_user((*buf)->data + pos,
793 msg_sect[cnt].iov_base, 796 msg_sect[cnt].iov_base,
794 msg_sect[cnt].iov_len); 797 msg_sect[cnt].iov_len);
795 else 798 else
796 memcpy((*buf)->data + pos, msg_sect[cnt].iov_base, 799 skb_copy_to_linear_data_offset(*buf, pos,
797 msg_sect[cnt].iov_len); 800 msg_sect[cnt].iov_base,
801 msg_sect[cnt].iov_len);
798 pos += msg_sect[cnt].iov_len; 802 pos += msg_sect[cnt].iov_len;
799 } 803 }
800 if (likely(res)) 804 if (likely(res))
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index b8e1edc2badc..4cdafa2d1d4d 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -57,7 +57,7 @@ static int handle_cmd(struct sk_buff *skb, struct genl_info *info)
57 57
58 if (rep_buf) { 58 if (rep_buf) {
59 skb_push(rep_buf, hdr_space); 59 skb_push(rep_buf, hdr_space);
60 rep_nlh = (struct nlmsghdr *)rep_buf->data; 60 rep_nlh = nlmsg_hdr(rep_buf);
61 memcpy(rep_nlh, req_nlh, hdr_space); 61 memcpy(rep_nlh, req_nlh, hdr_space);
62 rep_nlh->nlmsg_len = rep_buf->len; 62 rep_nlh->nlmsg_len = rep_buf->len;
63 genlmsg_unicast(rep_buf, req_nlh->nlmsg_pid); 63 genlmsg_unicast(rep_buf, req_nlh->nlmsg_pid);
diff --git a/net/tipc/port.c b/net/tipc/port.c
index 5f8217d4b452..bcd5da00737b 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -464,7 +464,7 @@ int tipc_reject_msg(struct sk_buff *buf, u32 err)
464 msg_set_size(rmsg, data_sz + hdr_sz); 464 msg_set_size(rmsg, data_sz + hdr_sz);
465 msg_set_nametype(rmsg, msg_nametype(msg)); 465 msg_set_nametype(rmsg, msg_nametype(msg));
466 msg_set_nameinst(rmsg, msg_nameinst(msg)); 466 msg_set_nameinst(rmsg, msg_nameinst(msg));
467 memcpy(rbuf->data + hdr_sz, msg_data(msg), data_sz); 467 skb_copy_to_linear_data_offset(rbuf, hdr_sz, msg_data(msg), data_sz);
468 468
469 /* send self-abort message when rejecting on a connected port */ 469 /* send self-abort message when rejecting on a connected port */
470 if (msg_connected(msg)) { 470 if (msg_connected(msg)) {
@@ -1419,7 +1419,7 @@ int tipc_send_buf(u32 ref, struct sk_buff *buf, unsigned int dsz)
1419 return -ENOMEM; 1419 return -ENOMEM;
1420 1420
1421 skb_push(buf, hsz); 1421 skb_push(buf, hsz);
1422 memcpy(buf->data, (unchar *)msg, hsz); 1422 skb_copy_to_linear_data(buf, msg, hsz);
1423 destnode = msg_destnode(msg); 1423 destnode = msg_destnode(msg);
1424 p_ptr->publ.congested = 1; 1424 p_ptr->publ.congested = 1;
1425 if (!tipc_port_congested(p_ptr)) { 1425 if (!tipc_port_congested(p_ptr)) {
@@ -1555,7 +1555,7 @@ int tipc_forward_buf2name(u32 ref,
1555 if (skb_cow(buf, LONG_H_SIZE)) 1555 if (skb_cow(buf, LONG_H_SIZE))
1556 return -ENOMEM; 1556 return -ENOMEM;
1557 skb_push(buf, LONG_H_SIZE); 1557 skb_push(buf, LONG_H_SIZE);
1558 memcpy(buf->data, (unchar *)msg, LONG_H_SIZE); 1558 skb_copy_to_linear_data(buf, msg, LONG_H_SIZE);
1559 msg_dbg(buf_msg(buf),"PREP:"); 1559 msg_dbg(buf_msg(buf),"PREP:");
1560 if (likely(destport || destnode)) { 1560 if (likely(destport || destnode)) {
1561 p_ptr->sent++; 1561 p_ptr->sent++;
@@ -1679,7 +1679,7 @@ int tipc_forward_buf2port(u32 ref,
1679 return -ENOMEM; 1679 return -ENOMEM;
1680 1680
1681 skb_push(buf, DIR_MSG_H_SIZE); 1681 skb_push(buf, DIR_MSG_H_SIZE);
1682 memcpy(buf->data, (unchar *)msg, DIR_MSG_H_SIZE); 1682 skb_copy_to_linear_data(buf, msg, DIR_MSG_H_SIZE);
1683 msg_dbg(msg, "buf2port: "); 1683 msg_dbg(msg, "buf2port: ");
1684 p_ptr->sent++; 1684 p_ptr->sent++;
1685 if (dest->node == tipc_own_addr) 1685 if (dest->node == tipc_own_addr)
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index b71739fbe2c6..45832fb75ea4 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1020,7 +1020,7 @@ restart:
1020 1020
1021 if (!err) { 1021 if (!err) {
1022 buf_crs = (unsigned char *)(TIPC_SKB_CB(buf)->handle); 1022 buf_crs = (unsigned char *)(TIPC_SKB_CB(buf)->handle);
1023 sz = buf->tail - buf_crs; 1023 sz = skb_tail_pointer(buf) - buf_crs;
1024 1024
1025 needed = (buf_len - sz_copied); 1025 needed = (buf_len - sz_copied);
1026 sz_to_copy = (sz <= needed) ? sz : needed; 1026 sz_to_copy = (sz <= needed) ? sz : needed;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 606971645b33..aec8cf165e1a 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1319,7 +1319,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1319 unix_attach_fds(siocb->scm, skb); 1319 unix_attach_fds(siocb->scm, skb);
1320 unix_get_secdata(siocb->scm, skb); 1320 unix_get_secdata(siocb->scm, skb);
1321 1321
1322 skb->h.raw = skb->data; 1322 skb_reset_transport_header(skb);
1323 err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len); 1323 err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
1324 if (err) 1324 if (err)
1325 goto out_free; 1325 goto out_free;
diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c
index 5d2d93dc0837..7a19e0ede289 100644
--- a/net/wanrouter/wanmain.c
+++ b/net/wanrouter/wanmain.c
@@ -277,8 +277,8 @@ int wanrouter_encapsulate(struct sk_buff *skb, struct net_device *dev,
277 skb_push(skb, 7); 277 skb_push(skb, 7);
278 skb->data[0] = 0; 278 skb->data[0] = 0;
279 skb->data[1] = NLPID_SNAP; 279 skb->data[1] = NLPID_SNAP;
280 memcpy(&skb->data[2], wanrouter_oui_ether, 280 skb_copy_to_linear_data_offset(skb, 2, wanrouter_oui_ether,
281 sizeof(wanrouter_oui_ether)); 281 sizeof(wanrouter_oui_ether));
282 *((unsigned short*)&skb->data[5]) = htons(type); 282 *((unsigned short*)&skb->data[5]) = htons(type);
283 break; 283 break;
284 284
@@ -339,7 +339,7 @@ __be16 wanrouter_type_trans(struct sk_buff *skb, struct net_device *dev)
339 skb->protocol = ethertype; 339 skb->protocol = ethertype;
340 skb->pkt_type = PACKET_HOST; /* Physically point to point */ 340 skb->pkt_type = PACKET_HOST; /* Physically point to point */
341 skb_pull(skb, cnt); 341 skb_pull(skb, cnt);
342 skb->mac.raw = skb->data; 342 skb_reset_mac_header(skb);
343 return ethertype; 343 return ethertype;
344} 344}
345 345
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
new file mode 100644
index 000000000000..a228d56a91b8
--- /dev/null
+++ b/net/wireless/Kconfig
@@ -0,0 +1,16 @@
1config CFG80211
2 tristate "Improved wireless configuration API"
3
4config WIRELESS_EXT
5 bool "Wireless extensions"
6 default n
7 ---help---
8 This option enables the legacy wireless extensions
9 (wireless network interface configuration via ioctls.)
10
11 Wireless extensions will be replaced by cfg80211 and
12 will be required only by legacy drivers that implement
13 wireless extension handlers.
14
15 Say N (if you can) unless you know you need wireless
16 extensions for external modules.
diff --git a/net/wireless/Makefile b/net/wireless/Makefile
new file mode 100644
index 000000000000..3a96ae60271c
--- /dev/null
+++ b/net/wireless/Makefile
@@ -0,0 +1,4 @@
1obj-$(CONFIG_WIRELESS_EXT) += wext.o
2obj-$(CONFIG_CFG80211) += cfg80211.o
3
4cfg80211-y += core.o sysfs.o
diff --git a/net/wireless/core.c b/net/wireless/core.c
new file mode 100644
index 000000000000..7eabd55417a5
--- /dev/null
+++ b/net/wireless/core.c
@@ -0,0 +1,224 @@
1/*
2 * This is the linux wireless configuration interface.
3 *
4 * Copyright 2006, 2007 Johannes Berg <johannes@sipsolutions.net>
5 */
6
7#include <linux/if.h>
8#include <linux/module.h>
9#include <linux/err.h>
10#include <linux/mutex.h>
11#include <linux/list.h>
12#include <linux/nl80211.h>
13#include <linux/debugfs.h>
14#include <linux/notifier.h>
15#include <linux/device.h>
16#include <net/genetlink.h>
17#include <net/cfg80211.h>
18#include <net/wireless.h>
19#include "core.h"
20#include "sysfs.h"
21
22/* name for sysfs, %d is appended */
23#define PHY_NAME "phy"
24
25MODULE_AUTHOR("Johannes Berg");
26MODULE_LICENSE("GPL");
27MODULE_DESCRIPTION("wireless configuration support");
28
29/* RCU might be appropriate here since we usually
30 * only read the list, and that can happen quite
31 * often because we need to do it for each command */
32LIST_HEAD(cfg80211_drv_list);
33DEFINE_MUTEX(cfg80211_drv_mutex);
34static int wiphy_counter;
35
36/* for debugfs */
37static struct dentry *ieee80211_debugfs_dir;
38
39/* exported functions */
40
41struct wiphy *wiphy_new(struct cfg80211_ops *ops, int sizeof_priv)
42{
43 struct cfg80211_registered_device *drv;
44 int alloc_size;
45
46 alloc_size = sizeof(*drv) + sizeof_priv;
47
48 drv = kzalloc(alloc_size, GFP_KERNEL);
49 if (!drv)
50 return NULL;
51
52 drv->ops = ops;
53
54 mutex_lock(&cfg80211_drv_mutex);
55
56 drv->idx = wiphy_counter;
57
58 /* now increase counter for the next device unless
59 * it has wrapped previously */
60 if (wiphy_counter >= 0)
61 wiphy_counter++;
62
63 mutex_unlock(&cfg80211_drv_mutex);
64
65 if (unlikely(drv->idx < 0)) {
66 /* ugh, wrapped! */
67 kfree(drv);
68 return NULL;
69 }
70
71 /* give it a proper name */
72 snprintf(drv->wiphy.dev.bus_id, BUS_ID_SIZE,
73 PHY_NAME "%d", drv->idx);
74
75 mutex_init(&drv->mtx);
76 mutex_init(&drv->devlist_mtx);
77 INIT_LIST_HEAD(&drv->netdev_list);
78
79 device_initialize(&drv->wiphy.dev);
80 drv->wiphy.dev.class = &ieee80211_class;
81 drv->wiphy.dev.platform_data = drv;
82
83 return &drv->wiphy;
84}
85EXPORT_SYMBOL(wiphy_new);
86
87int wiphy_register(struct wiphy *wiphy)
88{
89 struct cfg80211_registered_device *drv = wiphy_to_dev(wiphy);
90 int res;
91
92 mutex_lock(&cfg80211_drv_mutex);
93
94 res = device_add(&drv->wiphy.dev);
95 if (res)
96 goto out_unlock;
97
98 list_add(&drv->list, &cfg80211_drv_list);
99
100 /* add to debugfs */
101 drv->wiphy.debugfsdir =
102 debugfs_create_dir(wiphy_name(&drv->wiphy),
103 ieee80211_debugfs_dir);
104
105 res = 0;
106out_unlock:
107 mutex_unlock(&cfg80211_drv_mutex);
108 return res;
109}
110EXPORT_SYMBOL(wiphy_register);
111
112void wiphy_unregister(struct wiphy *wiphy)
113{
114 struct cfg80211_registered_device *drv = wiphy_to_dev(wiphy);
115
116 /* protect the device list */
117 mutex_lock(&cfg80211_drv_mutex);
118
119 BUG_ON(!list_empty(&drv->netdev_list));
120
121 /*
122 * Try to grab drv->mtx. If a command is still in progress,
123 * hopefully the driver will refuse it since it's tearing
124 * down the device already. We wait for this command to complete
125 * before unlinking the item from the list.
126 * Note: as codified by the BUG_ON above we cannot get here if
127 * a virtual interface is still associated. Hence, we can only
128 * get to lock contention here if userspace issues a command
129 * that identified the hardware by wiphy index.
130 */
131 mutex_lock(&drv->mtx);
132 /* unlock again before freeing */
133 mutex_unlock(&drv->mtx);
134
135 list_del(&drv->list);
136 device_del(&drv->wiphy.dev);
137 debugfs_remove(drv->wiphy.debugfsdir);
138
139 mutex_unlock(&cfg80211_drv_mutex);
140}
141EXPORT_SYMBOL(wiphy_unregister);
142
143void cfg80211_dev_free(struct cfg80211_registered_device *drv)
144{
145 mutex_destroy(&drv->mtx);
146 mutex_destroy(&drv->devlist_mtx);
147 kfree(drv);
148}
149
150void wiphy_free(struct wiphy *wiphy)
151{
152 put_device(&wiphy->dev);
153}
154EXPORT_SYMBOL(wiphy_free);
155
156static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
157 unsigned long state,
158 void *ndev)
159{
160 struct net_device *dev = ndev;
161 struct cfg80211_registered_device *rdev;
162
163 if (!dev->ieee80211_ptr)
164 return 0;
165
166 rdev = wiphy_to_dev(dev->ieee80211_ptr->wiphy);
167
168 switch (state) {
169 case NETDEV_REGISTER:
170 mutex_lock(&rdev->devlist_mtx);
171 list_add(&dev->ieee80211_ptr->list, &rdev->netdev_list);
172 if (sysfs_create_link(&dev->dev.kobj, &rdev->wiphy.dev.kobj,
173 "phy80211")) {
174 printk(KERN_ERR "wireless: failed to add phy80211 "
175 "symlink to netdev!\n");
176 }
177 dev->ieee80211_ptr->netdev = dev;
178 mutex_unlock(&rdev->devlist_mtx);
179 break;
180 case NETDEV_UNREGISTER:
181 mutex_lock(&rdev->devlist_mtx);
182 if (!list_empty(&dev->ieee80211_ptr->list)) {
183 sysfs_remove_link(&dev->dev.kobj, "phy80211");
184 list_del_init(&dev->ieee80211_ptr->list);
185 }
186 mutex_unlock(&rdev->devlist_mtx);
187 break;
188 }
189
190 return 0;
191}
192
193static struct notifier_block cfg80211_netdev_notifier = {
194 .notifier_call = cfg80211_netdev_notifier_call,
195};
196
197static int cfg80211_init(void)
198{
199 int err = wiphy_sysfs_init();
200 if (err)
201 goto out_fail_sysfs;
202
203 err = register_netdevice_notifier(&cfg80211_netdev_notifier);
204 if (err)
205 goto out_fail_notifier;
206
207 ieee80211_debugfs_dir = debugfs_create_dir("ieee80211", NULL);
208
209 return 0;
210
211out_fail_notifier:
212 wiphy_sysfs_exit();
213out_fail_sysfs:
214 return err;
215}
216module_init(cfg80211_init);
217
218static void cfg80211_exit(void)
219{
220 debugfs_remove(ieee80211_debugfs_dir);
221 unregister_netdevice_notifier(&cfg80211_netdev_notifier);
222 wiphy_sysfs_exit();
223}
224module_exit(cfg80211_exit);
diff --git a/net/wireless/core.h b/net/wireless/core.h
new file mode 100644
index 000000000000..158db1edb92a
--- /dev/null
+++ b/net/wireless/core.h
@@ -0,0 +1,49 @@
1/*
2 * Wireless configuration interface internals.
3 *
4 * Copyright 2006, 2007 Johannes Berg <johannes@sipsolutions.net>
5 */
6#ifndef __NET_WIRELESS_CORE_H
7#define __NET_WIRELESS_CORE_H
8#include <linux/mutex.h>
9#include <linux/list.h>
10#include <linux/netdevice.h>
11#include <net/genetlink.h>
12#include <net/wireless.h>
13#include <net/cfg80211.h>
14
15struct cfg80211_registered_device {
16 struct cfg80211_ops *ops;
17 struct list_head list;
18 /* we hold this mutex during any call so that
19 * we cannot do multiple calls at once, and also
20 * to avoid the deregister call to proceed while
21 * any call is in progress */
22 struct mutex mtx;
23
24 /* wiphy index, internal only */
25 int idx;
26
27 /* associate netdev list */
28 struct mutex devlist_mtx;
29 struct list_head netdev_list;
30
31 /* must be last because of the way we do wiphy_priv(),
32 * and it should at least be aligned to NETDEV_ALIGN */
33 struct wiphy wiphy __attribute__((__aligned__(NETDEV_ALIGN)));
34};
35
36static inline
37struct cfg80211_registered_device *wiphy_to_dev(struct wiphy *wiphy)
38{
39 BUG_ON(!wiphy);
40 return container_of(wiphy, struct cfg80211_registered_device, wiphy);
41}
42
43extern struct mutex cfg80211_drv_mutex;
44extern struct list_head cfg80211_drv_list;
45
46/* free object */
47extern void cfg80211_dev_free(struct cfg80211_registered_device *drv);
48
49#endif /* __NET_WIRELESS_CORE_H */
diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c
new file mode 100644
index 000000000000..3ebae1442963
--- /dev/null
+++ b/net/wireless/sysfs.c
@@ -0,0 +1,80 @@
1/*
2 * This file provides /sys/class/ieee80211/<wiphy name>/
3 * and some default attributes.
4 *
5 * Copyright 2005-2006 Jiri Benc <jbenc@suse.cz>
6 * Copyright 2006 Johannes Berg <johannes@sipsolutions.net>
7 *
8 * This file is GPLv2 as found in COPYING.
9 */
10
11#include <linux/device.h>
12#include <linux/module.h>
13#include <linux/netdevice.h>
14#include <linux/nl80211.h>
15#include <linux/rtnetlink.h>
16#include <net/cfg80211.h>
17#include "sysfs.h"
18#include "core.h"
19
20static inline struct cfg80211_registered_device *dev_to_rdev(
21 struct device *dev)
22{
23 return container_of(dev, struct cfg80211_registered_device, wiphy.dev);
24}
25
26static ssize_t _show_index(struct device *dev, struct device_attribute *attr,
27 char *buf)
28{
29 return sprintf(buf, "%d\n", dev_to_rdev(dev)->idx);
30}
31
32static ssize_t _show_permaddr(struct device *dev,
33 struct device_attribute *attr,
34 char *buf)
35{
36 char *addr = dev_to_rdev(dev)->wiphy.perm_addr;
37
38 return sprintf(buf, "%.2x:%.2x:%.2x:%.2x:%.2x:%.2x\n",
39 addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]);
40}
41
42static struct device_attribute ieee80211_dev_attrs[] = {
43 __ATTR(index, S_IRUGO, _show_index, NULL),
44 __ATTR(macaddress, S_IRUGO, _show_permaddr, NULL),
45 {}
46};
47
48static void wiphy_dev_release(struct device *dev)
49{
50 struct cfg80211_registered_device *rdev = dev_to_rdev(dev);
51
52 cfg80211_dev_free(rdev);
53}
54
55static int wiphy_uevent(struct device *dev, char **envp,
56 int num_envp, char *buf, int size)
57{
58 /* TODO, we probably need stuff here */
59 return 0;
60}
61
62struct class ieee80211_class = {
63 .name = "ieee80211",
64 .owner = THIS_MODULE,
65 .dev_release = wiphy_dev_release,
66 .dev_attrs = ieee80211_dev_attrs,
67#ifdef CONFIG_HOTPLUG
68 .dev_uevent = wiphy_uevent,
69#endif
70};
71
72int wiphy_sysfs_init(void)
73{
74 return class_register(&ieee80211_class);
75}
76
77void wiphy_sysfs_exit(void)
78{
79 class_unregister(&ieee80211_class);
80}
diff --git a/net/wireless/sysfs.h b/net/wireless/sysfs.h
new file mode 100644
index 000000000000..65acbebd3711
--- /dev/null
+++ b/net/wireless/sysfs.h
@@ -0,0 +1,9 @@
1#ifndef __WIRELESS_SYSFS_H
2#define __WIRELESS_SYSFS_H
3
4extern int wiphy_sysfs_init(void);
5extern void wiphy_sysfs_exit(void);
6
7extern struct class ieee80211_class;
8
9#endif /* __WIRELESS_SYSFS_H */
diff --git a/net/core/wireless.c b/net/wireless/wext.c
index b07fe270a508..d6aaf65192e9 100644
--- a/net/core/wireless.c
+++ b/net/wireless/wext.c
@@ -97,22 +97,10 @@
97#include <linux/wireless.h> /* Pretty obvious */ 97#include <linux/wireless.h> /* Pretty obvious */
98#include <net/iw_handler.h> /* New driver API */ 98#include <net/iw_handler.h> /* New driver API */
99#include <net/netlink.h> 99#include <net/netlink.h>
100#include <net/wext.h>
100 101
101#include <asm/uaccess.h> /* copy_to_user() */ 102#include <asm/uaccess.h> /* copy_to_user() */
102 103
103/**************************** CONSTANTS ****************************/
104
105/* Debugging stuff */
106#undef WE_IOCTL_DEBUG /* Debug IOCTL API */
107#undef WE_RTNETLINK_DEBUG /* Debug RtNetlink API */
108#undef WE_EVENT_DEBUG /* Debug Event dispatcher */
109#undef WE_SPY_DEBUG /* Debug enhanced spy support */
110
111/* Options */
112//CONFIG_NET_WIRELESS_RTNETLINK /* Wireless requests over RtNetlink */
113#define WE_EVENT_RTNETLINK /* Propagate events using RtNetlink */
114#define WE_SET_EVENT /* Generate an event on some set commands */
115
116/************************* GLOBAL VARIABLES *************************/ 104/************************* GLOBAL VARIABLES *************************/
117/* 105/*
118 * You should not use global variables, because of re-entrancy. 106 * You should not use global variables, because of re-entrancy.
@@ -349,8 +337,7 @@ static const struct iw_ioctl_description standard_ioctl[] = {
349 .max_tokens = sizeof(struct iw_pmksa), 337 .max_tokens = sizeof(struct iw_pmksa),
350 }, 338 },
351}; 339};
352static const unsigned standard_ioctl_num = (sizeof(standard_ioctl) / 340static const unsigned standard_ioctl_num = ARRAY_SIZE(standard_ioctl);
353 sizeof(struct iw_ioctl_description));
354 341
355/* 342/*
356 * Meta-data about all the additional standard Wireless Extension events 343 * Meta-data about all the additional standard Wireless Extension events
@@ -400,8 +387,7 @@ static const struct iw_ioctl_description standard_event[] = {
400 .max_tokens = sizeof(struct iw_pmkid_cand), 387 .max_tokens = sizeof(struct iw_pmkid_cand),
401 }, 388 },
402}; 389};
403static const unsigned standard_event_num = (sizeof(standard_event) / 390static const unsigned standard_event_num = ARRAY_SIZE(standard_event);
404 sizeof(struct iw_ioctl_description));
405 391
406/* Size (in bytes) of the various private data types */ 392/* Size (in bytes) of the various private data types */
407static const char iw_priv_type_size[] = { 393static const char iw_priv_type_size[] = {
@@ -454,26 +440,24 @@ static const int event_type_pk_size[] = {
454/* ---------------------------------------------------------------- */ 440/* ---------------------------------------------------------------- */
455/* 441/*
456 * Return the driver handler associated with a specific Wireless Extension. 442 * Return the driver handler associated with a specific Wireless Extension.
457 * Called from various place, so make sure it remains efficient.
458 */ 443 */
459static inline iw_handler get_handler(struct net_device *dev, 444static iw_handler get_handler(struct net_device *dev, unsigned int cmd)
460 unsigned int cmd)
461{ 445{
462 /* Don't "optimise" the following variable, it will crash */ 446 /* Don't "optimise" the following variable, it will crash */
463 unsigned int index; /* *MUST* be unsigned */ 447 unsigned int index; /* *MUST* be unsigned */
464 448
465 /* Check if we have some wireless handlers defined */ 449 /* Check if we have some wireless handlers defined */
466 if(dev->wireless_handlers == NULL) 450 if (dev->wireless_handlers == NULL)
467 return NULL; 451 return NULL;
468 452
469 /* Try as a standard command */ 453 /* Try as a standard command */
470 index = cmd - SIOCIWFIRST; 454 index = cmd - SIOCIWFIRST;
471 if(index < dev->wireless_handlers->num_standard) 455 if (index < dev->wireless_handlers->num_standard)
472 return dev->wireless_handlers->standard[index]; 456 return dev->wireless_handlers->standard[index];
473 457
474 /* Try as a private command */ 458 /* Try as a private command */
475 index = cmd - SIOCIWFIRSTPRIV; 459 index = cmd - SIOCIWFIRSTPRIV;
476 if(index < dev->wireless_handlers->num_private) 460 if (index < dev->wireless_handlers->num_private)
477 return dev->wireless_handlers->private[index]; 461 return dev->wireless_handlers->private[index];
478 462
479 /* Not found */ 463 /* Not found */
@@ -484,15 +468,15 @@ static inline iw_handler get_handler(struct net_device *dev,
484/* 468/*
485 * Get statistics out of the driver 469 * Get statistics out of the driver
486 */ 470 */
487static inline struct iw_statistics *get_wireless_stats(struct net_device *dev) 471static struct iw_statistics *get_wireless_stats(struct net_device *dev)
488{ 472{
489 /* New location */ 473 /* New location */
490 if((dev->wireless_handlers != NULL) && 474 if ((dev->wireless_handlers != NULL) &&
491 (dev->wireless_handlers->get_wireless_stats != NULL)) 475 (dev->wireless_handlers->get_wireless_stats != NULL))
492 return dev->wireless_handlers->get_wireless_stats(dev); 476 return dev->wireless_handlers->get_wireless_stats(dev);
493 477
494 /* Not found */ 478 /* Not found */
495 return (struct iw_statistics *) NULL; 479 return NULL;
496} 480}
497 481
498/* ---------------------------------------------------------------- */ 482/* ---------------------------------------------------------------- */
@@ -514,14 +498,14 @@ static inline struct iw_statistics *get_wireless_stats(struct net_device *dev)
514 * netif_running(dev) test. I'm open on that one... 498 * netif_running(dev) test. I'm open on that one...
515 * Hopefully, the driver will remember to do a commit in "open()" ;-) 499 * Hopefully, the driver will remember to do a commit in "open()" ;-)
516 */ 500 */
517static inline int call_commit_handler(struct net_device * dev) 501static int call_commit_handler(struct net_device *dev)
518{ 502{
519 if((netif_running(dev)) && 503 if ((netif_running(dev)) &&
520 (dev->wireless_handlers->standard[0] != NULL)) { 504 (dev->wireless_handlers->standard[0] != NULL))
521 /* Call the commit handler on the driver */ 505 /* Call the commit handler on the driver */
522 return dev->wireless_handlers->standard[0](dev, NULL, 506 return dev->wireless_handlers->standard[0](dev, NULL,
523 NULL, NULL); 507 NULL, NULL);
524 } else 508 else
525 return 0; /* Command completed successfully */ 509 return 0; /* Command completed successfully */
526} 510}
527 511
@@ -570,14 +554,13 @@ static int iw_handler_get_iwstats(struct net_device * dev,
570 struct iw_statistics *stats; 554 struct iw_statistics *stats;
571 555
572 stats = get_wireless_stats(dev); 556 stats = get_wireless_stats(dev);
573 if (stats != (struct iw_statistics *) NULL) { 557 if (stats) {
574
575 /* Copy statistics to extra */ 558 /* Copy statistics to extra */
576 memcpy(extra, stats, sizeof(struct iw_statistics)); 559 memcpy(extra, stats, sizeof(struct iw_statistics));
577 wrqu->data.length = sizeof(struct iw_statistics); 560 wrqu->data.length = sizeof(struct iw_statistics);
578 561
579 /* Check if we need to clear the updated flag */ 562 /* Check if we need to clear the updated flag */
580 if(wrqu->data.flags != 0) 563 if (wrqu->data.flags != 0)
581 stats->qual.updated &= ~IW_QUAL_ALL_UPDATED; 564 stats->qual.updated &= ~IW_QUAL_ALL_UPDATED;
582 return 0; 565 return 0;
583 } else 566 } else
@@ -596,12 +579,12 @@ static int iw_handler_get_private(struct net_device * dev,
596 char * extra) 579 char * extra)
597{ 580{
598 /* Check if the driver has something to export */ 581 /* Check if the driver has something to export */
599 if((dev->wireless_handlers->num_private_args == 0) || 582 if ((dev->wireless_handlers->num_private_args == 0) ||
600 (dev->wireless_handlers->private_args == NULL)) 583 (dev->wireless_handlers->private_args == NULL))
601 return -EOPNOTSUPP; 584 return -EOPNOTSUPP;
602 585
603 /* Check if there is enough buffer up there */ 586 /* Check if there is enough buffer up there */
604 if(wrqu->data.length < dev->wireless_handlers->num_private_args) { 587 if (wrqu->data.length < dev->wireless_handlers->num_private_args) {
605 /* User space can't know in advance how large the buffer 588 /* User space can't know in advance how large the buffer
606 * needs to be. Give it a hint, so that we can support 589 * needs to be. Give it a hint, so that we can support
607 * any size buffer we want somewhat efficiently... */ 590 * any size buffer we want somewhat efficiently... */
@@ -636,8 +619,8 @@ static int iw_handler_get_private(struct net_device * dev,
636/* 619/*
637 * Print one entry (line) of /proc/net/wireless 620 * Print one entry (line) of /proc/net/wireless
638 */ 621 */
639static __inline__ void wireless_seq_printf_stats(struct seq_file *seq, 622static void wireless_seq_printf_stats(struct seq_file *seq,
640 struct net_device *dev) 623 struct net_device *dev)
641{ 624{
642 /* Get stats from the driver */ 625 /* Get stats from the driver */
643 struct iw_statistics *stats = get_wireless_stats(dev); 626 struct iw_statistics *stats = get_wireless_stats(dev);
@@ -680,7 +663,7 @@ static int wireless_seq_show(struct seq_file *seq, void *v)
680 return 0; 663 return 0;
681} 664}
682 665
683static struct seq_operations wireless_seq_ops = { 666static const struct seq_operations wireless_seq_ops = {
684 .start = dev_seq_start, 667 .start = dev_seq_start,
685 .next = dev_seq_next, 668 .next = dev_seq_next,
686 .stop = dev_seq_stop, 669 .stop = dev_seq_stop,
@@ -700,7 +683,7 @@ static const struct file_operations wireless_seq_fops = {
700 .release = seq_release, 683 .release = seq_release,
701}; 684};
702 685
703int __init wireless_proc_init(void) 686int __init wext_proc_init(void)
704{ 687{
705 /* Create /proc/net/wireless entry */ 688 /* Create /proc/net/wireless entry */
706 if (!proc_net_fops_create("wireless", S_IRUGO, &wireless_seq_fops)) 689 if (!proc_net_fops_create("wireless", S_IRUGO, &wireless_seq_fops))
@@ -735,32 +718,24 @@ static int ioctl_standard_call(struct net_device * dev,
735 int ret = -EINVAL; 718 int ret = -EINVAL;
736 719
737 /* Get the description of the IOCTL */ 720 /* Get the description of the IOCTL */
738 if((cmd - SIOCIWFIRST) >= standard_ioctl_num) 721 if ((cmd - SIOCIWFIRST) >= standard_ioctl_num)
739 return -EOPNOTSUPP; 722 return -EOPNOTSUPP;
740 descr = &(standard_ioctl[cmd - SIOCIWFIRST]); 723 descr = &(standard_ioctl[cmd - SIOCIWFIRST]);
741 724
742#ifdef WE_IOCTL_DEBUG
743 printk(KERN_DEBUG "%s (WE) : Found standard handler for 0x%04X\n",
744 ifr->ifr_name, cmd);
745 printk(KERN_DEBUG "%s (WE) : Header type : %d, Token type : %d, size : %d, token : %d\n", dev->name, descr->header_type, descr->token_type, descr->token_size, descr->max_tokens);
746#endif /* WE_IOCTL_DEBUG */
747
748 /* Prepare the call */ 725 /* Prepare the call */
749 info.cmd = cmd; 726 info.cmd = cmd;
750 info.flags = 0; 727 info.flags = 0;
751 728
752 /* Check if we have a pointer to user space data or not */ 729 /* Check if we have a pointer to user space data or not */
753 if(descr->header_type != IW_HEADER_TYPE_POINT) { 730 if (descr->header_type != IW_HEADER_TYPE_POINT) {
754 731
755 /* No extra arguments. Trivial to handle */ 732 /* No extra arguments. Trivial to handle */
756 ret = handler(dev, &info, &(iwr->u), NULL); 733 ret = handler(dev, &info, &(iwr->u), NULL);
757 734
758#ifdef WE_SET_EVENT
759 /* Generate an event to notify listeners of the change */ 735 /* Generate an event to notify listeners of the change */
760 if((descr->flags & IW_DESCR_FLAG_EVENT) && 736 if ((descr->flags & IW_DESCR_FLAG_EVENT) &&
761 ((ret == 0) || (ret == -EIWCOMMIT))) 737 ((ret == 0) || (ret == -EIWCOMMIT)))
762 wireless_send_event(dev, cmd, &(iwr->u), NULL); 738 wireless_send_event(dev, cmd, &(iwr->u), NULL);
763#endif /* WE_SET_EVENT */
764 } else { 739 } else {
765 char * extra; 740 char * extra;
766 int extra_size; 741 int extra_size;
@@ -800,19 +775,19 @@ static int ioctl_standard_call(struct net_device * dev,
800 iwr->u.data.length -= essid_compat; 775 iwr->u.data.length -= essid_compat;
801 776
802 /* Check what user space is giving us */ 777 /* Check what user space is giving us */
803 if(IW_IS_SET(cmd)) { 778 if (IW_IS_SET(cmd)) {
804 /* Check NULL pointer */ 779 /* Check NULL pointer */
805 if((iwr->u.data.pointer == NULL) && 780 if ((iwr->u.data.pointer == NULL) &&
806 (iwr->u.data.length != 0)) 781 (iwr->u.data.length != 0))
807 return -EFAULT; 782 return -EFAULT;
808 /* Check if number of token fits within bounds */ 783 /* Check if number of token fits within bounds */
809 if(iwr->u.data.length > descr->max_tokens) 784 if (iwr->u.data.length > descr->max_tokens)
810 return -E2BIG; 785 return -E2BIG;
811 if(iwr->u.data.length < descr->min_tokens) 786 if (iwr->u.data.length < descr->min_tokens)
812 return -EINVAL; 787 return -EINVAL;
813 } else { 788 } else {
814 /* Check NULL pointer */ 789 /* Check NULL pointer */
815 if(iwr->u.data.pointer == NULL) 790 if (iwr->u.data.pointer == NULL)
816 return -EFAULT; 791 return -EFAULT;
817 /* Save user space buffer size for checking */ 792 /* Save user space buffer size for checking */
818 user_length = iwr->u.data.length; 793 user_length = iwr->u.data.length;
@@ -822,7 +797,7 @@ static int ioctl_standard_call(struct net_device * dev,
822 * implied by the test at the end. */ 797 * implied by the test at the end. */
823 798
824 /* Support for very large requests */ 799 /* Support for very large requests */
825 if((descr->flags & IW_DESCR_FLAG_NOMAX) && 800 if ((descr->flags & IW_DESCR_FLAG_NOMAX) &&
826 (user_length > descr->max_tokens)) { 801 (user_length > descr->max_tokens)) {
827 /* Allow userspace to GET more than max so 802 /* Allow userspace to GET more than max so
828 * we can support any size GET requests. 803 * we can support any size GET requests.
@@ -835,20 +810,14 @@ static int ioctl_standard_call(struct net_device * dev,
835 } 810 }
836 } 811 }
837 812
838#ifdef WE_IOCTL_DEBUG
839 printk(KERN_DEBUG "%s (WE) : Malloc %d bytes\n",
840 dev->name, extra_size);
841#endif /* WE_IOCTL_DEBUG */
842
843 /* Create the kernel buffer */ 813 /* Create the kernel buffer */
844 /* kzalloc ensures NULL-termination for essid_compat */ 814 /* kzalloc ensures NULL-termination for essid_compat */
845 extra = kzalloc(extra_size, GFP_KERNEL); 815 extra = kzalloc(extra_size, GFP_KERNEL);
846 if (extra == NULL) { 816 if (extra == NULL)
847 return -ENOMEM; 817 return -ENOMEM;
848 }
849 818
850 /* If it is a SET, get all the extra data in here */ 819 /* If it is a SET, get all the extra data in here */
851 if(IW_IS_SET(cmd) && (iwr->u.data.length != 0)) { 820 if (IW_IS_SET(cmd) && (iwr->u.data.length != 0)) {
852 err = copy_from_user(extra, iwr->u.data.pointer, 821 err = copy_from_user(extra, iwr->u.data.pointer,
853 iwr->u.data.length * 822 iwr->u.data.length *
854 descr->token_size); 823 descr->token_size);
@@ -856,11 +825,6 @@ static int ioctl_standard_call(struct net_device * dev,
856 kfree(extra); 825 kfree(extra);
857 return -EFAULT; 826 return -EFAULT;
858 } 827 }
859#ifdef WE_IOCTL_DEBUG
860 printk(KERN_DEBUG "%s (WE) : Got %d bytes\n",
861 dev->name,
862 iwr->u.data.length * descr->token_size);
863#endif /* WE_IOCTL_DEBUG */
864 } 828 }
865 829
866 /* Call the handler */ 830 /* Call the handler */
@@ -871,7 +835,7 @@ static int ioctl_standard_call(struct net_device * dev,
871 /* If we have something to return to the user */ 835 /* If we have something to return to the user */
872 if (!ret && IW_IS_GET(cmd)) { 836 if (!ret && IW_IS_GET(cmd)) {
873 /* Check if there is enough buffer up there */ 837 /* Check if there is enough buffer up there */
874 if(user_length < iwr->u.data.length) { 838 if (user_length < iwr->u.data.length) {
875 kfree(extra); 839 kfree(extra);
876 return -E2BIG; 840 return -E2BIG;
877 } 841 }
@@ -881,18 +845,12 @@ static int ioctl_standard_call(struct net_device * dev,
881 descr->token_size); 845 descr->token_size);
882 if (err) 846 if (err)
883 ret = -EFAULT; 847 ret = -EFAULT;
884#ifdef WE_IOCTL_DEBUG
885 printk(KERN_DEBUG "%s (WE) : Wrote %d bytes\n",
886 dev->name,
887 iwr->u.data.length * descr->token_size);
888#endif /* WE_IOCTL_DEBUG */
889 } 848 }
890 849
891#ifdef WE_SET_EVENT
892 /* Generate an event to notify listeners of the change */ 850 /* Generate an event to notify listeners of the change */
893 if((descr->flags & IW_DESCR_FLAG_EVENT) && 851 if ((descr->flags & IW_DESCR_FLAG_EVENT) &&
894 ((ret == 0) || (ret == -EIWCOMMIT))) { 852 ((ret == 0) || (ret == -EIWCOMMIT))) {
895 if(descr->flags & IW_DESCR_FLAG_RESTRICT) 853 if (descr->flags & IW_DESCR_FLAG_RESTRICT)
896 /* If the event is restricted, don't 854 /* If the event is restricted, don't
897 * export the payload */ 855 * export the payload */
898 wireless_send_event(dev, cmd, &(iwr->u), NULL); 856 wireless_send_event(dev, cmd, &(iwr->u), NULL);
@@ -900,14 +858,13 @@ static int ioctl_standard_call(struct net_device * dev,
900 wireless_send_event(dev, cmd, &(iwr->u), 858 wireless_send_event(dev, cmd, &(iwr->u),
901 extra); 859 extra);
902 } 860 }
903#endif /* WE_SET_EVENT */
904 861
905 /* Cleanup - I told you it wasn't that long ;-) */ 862 /* Cleanup - I told you it wasn't that long ;-) */
906 kfree(extra); 863 kfree(extra);
907 } 864 }
908 865
909 /* Call commit handler if needed and defined */ 866 /* Call commit handler if needed and defined */
910 if(ret == -EIWCOMMIT) 867 if (ret == -EIWCOMMIT)
911 ret = call_commit_handler(dev); 868 ret = call_commit_handler(dev);
912 869
913 /* Here, we will generate the appropriate event if needed */ 870 /* Here, we will generate the appropriate event if needed */
@@ -931,10 +888,8 @@ static int ioctl_standard_call(struct net_device * dev,
931 * a iw_handler but process it in your ioctl handler (i.e. use the 888 * a iw_handler but process it in your ioctl handler (i.e. use the
932 * old driver API). 889 * old driver API).
933 */ 890 */
934static inline int ioctl_private_call(struct net_device * dev, 891static int ioctl_private_call(struct net_device *dev, struct ifreq *ifr,
935 struct ifreq * ifr, 892 unsigned int cmd, iw_handler handler)
936 unsigned int cmd,
937 iw_handler handler)
938{ 893{
939 struct iwreq * iwr = (struct iwreq *) ifr; 894 struct iwreq * iwr = (struct iwreq *) ifr;
940 const struct iw_priv_args * descr = NULL; 895 const struct iw_priv_args * descr = NULL;
@@ -944,28 +899,18 @@ static inline int ioctl_private_call(struct net_device * dev,
944 int ret = -EINVAL; 899 int ret = -EINVAL;
945 900
946 /* Get the description of the IOCTL */ 901 /* Get the description of the IOCTL */
947 for(i = 0; i < dev->wireless_handlers->num_private_args; i++) 902 for (i = 0; i < dev->wireless_handlers->num_private_args; i++)
948 if(cmd == dev->wireless_handlers->private_args[i].cmd) { 903 if (cmd == dev->wireless_handlers->private_args[i].cmd) {
949 descr = &(dev->wireless_handlers->private_args[i]); 904 descr = &(dev->wireless_handlers->private_args[i]);
950 break; 905 break;
951 } 906 }
952 907
953#ifdef WE_IOCTL_DEBUG
954 printk(KERN_DEBUG "%s (WE) : Found private handler for 0x%04X\n",
955 ifr->ifr_name, cmd);
956 if(descr) {
957 printk(KERN_DEBUG "%s (WE) : Name %s, set %X, get %X\n",
958 dev->name, descr->name,
959 descr->set_args, descr->get_args);
960 }
961#endif /* WE_IOCTL_DEBUG */
962
963 /* Compute the size of the set/get arguments */ 908 /* Compute the size of the set/get arguments */
964 if(descr != NULL) { 909 if (descr != NULL) {
965 if(IW_IS_SET(cmd)) { 910 if (IW_IS_SET(cmd)) {
966 int offset = 0; /* For sub-ioctls */ 911 int offset = 0; /* For sub-ioctls */
967 /* Check for sub-ioctl handler */ 912 /* Check for sub-ioctl handler */
968 if(descr->name[0] == '\0') 913 if (descr->name[0] == '\0')
969 /* Reserve one int for sub-ioctl index */ 914 /* Reserve one int for sub-ioctl index */
970 offset = sizeof(__u32); 915 offset = sizeof(__u32);
971 916
@@ -973,7 +918,7 @@ static inline int ioctl_private_call(struct net_device * dev,
973 extra_size = get_priv_size(descr->set_args); 918 extra_size = get_priv_size(descr->set_args);
974 919
975 /* Does it fits in iwr ? */ 920 /* Does it fits in iwr ? */
976 if((descr->set_args & IW_PRIV_SIZE_FIXED) && 921 if ((descr->set_args & IW_PRIV_SIZE_FIXED) &&
977 ((extra_size + offset) <= IFNAMSIZ)) 922 ((extra_size + offset) <= IFNAMSIZ))
978 extra_size = 0; 923 extra_size = 0;
979 } else { 924 } else {
@@ -981,7 +926,7 @@ static inline int ioctl_private_call(struct net_device * dev,
981 extra_size = get_priv_size(descr->get_args); 926 extra_size = get_priv_size(descr->get_args);
982 927
983 /* Does it fits in iwr ? */ 928 /* Does it fits in iwr ? */
984 if((descr->get_args & IW_PRIV_SIZE_FIXED) && 929 if ((descr->get_args & IW_PRIV_SIZE_FIXED) &&
985 (extra_size <= IFNAMSIZ)) 930 (extra_size <= IFNAMSIZ))
986 extra_size = 0; 931 extra_size = 0;
987 } 932 }
@@ -992,7 +937,7 @@ static inline int ioctl_private_call(struct net_device * dev,
992 info.flags = 0; 937 info.flags = 0;
993 938
994 /* Check if we have a pointer to user space data or not. */ 939 /* Check if we have a pointer to user space data or not. */
995 if(extra_size == 0) { 940 if (extra_size == 0) {
996 /* No extra arguments. Trivial to handle */ 941 /* No extra arguments. Trivial to handle */
997 ret = handler(dev, &info, &(iwr->u), (char *) &(iwr->u)); 942 ret = handler(dev, &info, &(iwr->u), (char *) &(iwr->u));
998 } else { 943 } else {
@@ -1000,46 +945,33 @@ static inline int ioctl_private_call(struct net_device * dev,
1000 int err; 945 int err;
1001 946
1002 /* Check what user space is giving us */ 947 /* Check what user space is giving us */
1003 if(IW_IS_SET(cmd)) { 948 if (IW_IS_SET(cmd)) {
1004 /* Check NULL pointer */ 949 /* Check NULL pointer */
1005 if((iwr->u.data.pointer == NULL) && 950 if ((iwr->u.data.pointer == NULL) &&
1006 (iwr->u.data.length != 0)) 951 (iwr->u.data.length != 0))
1007 return -EFAULT; 952 return -EFAULT;
1008 953
1009 /* Does it fits within bounds ? */ 954 /* Does it fits within bounds ? */
1010 if(iwr->u.data.length > (descr->set_args & 955 if (iwr->u.data.length > (descr->set_args &
1011 IW_PRIV_SIZE_MASK)) 956 IW_PRIV_SIZE_MASK))
1012 return -E2BIG; 957 return -E2BIG;
1013 } else { 958 } else if (iwr->u.data.pointer == NULL)
1014 /* Check NULL pointer */ 959 return -EFAULT;
1015 if(iwr->u.data.pointer == NULL)
1016 return -EFAULT;
1017 }
1018
1019#ifdef WE_IOCTL_DEBUG
1020 printk(KERN_DEBUG "%s (WE) : Malloc %d bytes\n",
1021 dev->name, extra_size);
1022#endif /* WE_IOCTL_DEBUG */
1023 960
1024 /* Always allocate for max space. Easier, and won't last 961 /* Always allocate for max space. Easier, and won't last
1025 * long... */ 962 * long... */
1026 extra = kmalloc(extra_size, GFP_KERNEL); 963 extra = kmalloc(extra_size, GFP_KERNEL);
1027 if (extra == NULL) { 964 if (extra == NULL)
1028 return -ENOMEM; 965 return -ENOMEM;
1029 }
1030 966
1031 /* If it is a SET, get all the extra data in here */ 967 /* If it is a SET, get all the extra data in here */
1032 if(IW_IS_SET(cmd) && (iwr->u.data.length != 0)) { 968 if (IW_IS_SET(cmd) && (iwr->u.data.length != 0)) {
1033 err = copy_from_user(extra, iwr->u.data.pointer, 969 err = copy_from_user(extra, iwr->u.data.pointer,
1034 extra_size); 970 extra_size);
1035 if (err) { 971 if (err) {
1036 kfree(extra); 972 kfree(extra);
1037 return -EFAULT; 973 return -EFAULT;
1038 } 974 }
1039#ifdef WE_IOCTL_DEBUG
1040 printk(KERN_DEBUG "%s (WE) : Got %d elem\n",
1041 dev->name, iwr->u.data.length);
1042#endif /* WE_IOCTL_DEBUG */
1043 } 975 }
1044 976
1045 /* Call the handler */ 977 /* Call the handler */
@@ -1059,10 +991,6 @@ static inline int ioctl_private_call(struct net_device * dev,
1059 extra_size); 991 extra_size);
1060 if (err) 992 if (err)
1061 ret = -EFAULT; 993 ret = -EFAULT;
1062#ifdef WE_IOCTL_DEBUG
1063 printk(KERN_DEBUG "%s (WE) : Wrote %d elem\n",
1064 dev->name, iwr->u.data.length);
1065#endif /* WE_IOCTL_DEBUG */
1066 } 994 }
1067 995
1068 /* Cleanup - I told you it wasn't that long ;-) */ 996 /* Cleanup - I told you it wasn't that long ;-) */
@@ -1071,7 +999,7 @@ static inline int ioctl_private_call(struct net_device * dev,
1071 999
1072 1000
1073 /* Call commit handler if needed and defined */ 1001 /* Call commit handler if needed and defined */
1074 if(ret == -EIWCOMMIT) 1002 if (ret == -EIWCOMMIT)
1075 ret = call_commit_handler(dev); 1003 ret = call_commit_handler(dev);
1076 1004
1077 return ret; 1005 return ret;
@@ -1079,11 +1007,10 @@ static inline int ioctl_private_call(struct net_device * dev,
1079 1007
1080/* ---------------------------------------------------------------- */ 1008/* ---------------------------------------------------------------- */
1081/* 1009/*
1082 * Main IOCTl dispatcher. Called from the main networking code 1010 * Main IOCTl dispatcher.
1083 * (dev_ioctl() in net/core/dev.c).
1084 * Check the type of IOCTL and call the appropriate wrapper... 1011 * Check the type of IOCTL and call the appropriate wrapper...
1085 */ 1012 */
1086int wireless_process_ioctl(struct ifreq *ifr, unsigned int cmd) 1013static int wireless_process_ioctl(struct ifreq *ifr, unsigned int cmd)
1087{ 1014{
1088 struct net_device *dev; 1015 struct net_device *dev;
1089 iw_handler handler; 1016 iw_handler handler;
@@ -1098,789 +1025,54 @@ int wireless_process_ioctl(struct ifreq *ifr, unsigned int cmd)
1098 /* A bunch of special cases, then the generic case... 1025 /* A bunch of special cases, then the generic case...
1099 * Note that 'cmd' is already filtered in dev_ioctl() with 1026 * Note that 'cmd' is already filtered in dev_ioctl() with
1100 * (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) */ 1027 * (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) */
1101 switch(cmd) 1028 if (cmd == SIOCGIWSTATS)
1102 { 1029 return ioctl_standard_call(dev, ifr, cmd,
1103 case SIOCGIWSTATS: 1030 &iw_handler_get_iwstats);
1104 /* Get Wireless Stats */
1105 return ioctl_standard_call(dev,
1106 ifr,
1107 cmd,
1108 &iw_handler_get_iwstats);
1109
1110 case SIOCGIWPRIV:
1111 /* Check if we have some wireless handlers defined */
1112 if(dev->wireless_handlers != NULL) {
1113 /* We export to user space the definition of
1114 * the private handler ourselves */
1115 return ioctl_standard_call(dev,
1116 ifr,
1117 cmd,
1118 &iw_handler_get_private);
1119 }
1120 // ## Fall-through for old API ##
1121 default:
1122 /* Generic IOCTL */
1123 /* Basic check */
1124 if (!netif_device_present(dev))
1125 return -ENODEV;
1126 /* New driver API : try to find the handler */
1127 handler = get_handler(dev, cmd);
1128 if(handler != NULL) {
1129 /* Standard and private are not the same */
1130 if(cmd < SIOCIWFIRSTPRIV)
1131 return ioctl_standard_call(dev,
1132 ifr,
1133 cmd,
1134 handler);
1135 else
1136 return ioctl_private_call(dev,
1137 ifr,
1138 cmd,
1139 handler);
1140 }
1141 /* Old driver API : call driver ioctl handler */
1142 if (dev->do_ioctl) {
1143 return dev->do_ioctl(dev, ifr, cmd);
1144 }
1145 return -EOPNOTSUPP;
1146 }
1147 /* Not reached */
1148 return -EINVAL;
1149}
1150
1151/********************** RTNETLINK REQUEST API **********************/
1152/*
1153 * The alternate user space API to configure all those Wireless Extensions
1154 * is through RtNetlink.
1155 * This API support only the new driver API (iw_handler).
1156 *
1157 * This RtNetlink API use the same query/reply model as the ioctl API.
1158 * Maximum effort has been done to fit in the RtNetlink model, and
1159 * we support both RtNetlink Set and RtNelink Get operations.
1160 * On the other hand, we don't offer Dump operations because of the
1161 * following reasons :
1162 * o Large number of parameters, most optional
1163 * o Large size of some parameters (> 100 bytes)
1164 * o Each parameters need to be extracted from hardware
1165 * o Scan requests can take seconds and disable network activity.
1166 * Because of this high cost/overhead, we want to return only the
1167 * parameters the user application is really interested in.
1168 * We could offer partial Dump using the IW_DESCR_FLAG_DUMP flag.
1169 *
1170 * The API uses the standard RtNetlink socket. When the RtNetlink code
1171 * find a IFLA_WIRELESS field in a RtNetlink SET_LINK request,
1172 * it calls here.
1173 */
1174
1175#ifdef CONFIG_NET_WIRELESS_RTNETLINK
1176/* ---------------------------------------------------------------- */
1177/*
1178 * Wrapper to call a standard Wireless Extension GET handler.
1179 * We do various checks and call the handler with the proper args.
1180 */
1181static int rtnetlink_standard_get(struct net_device * dev,
1182 struct iw_event * request,
1183 int request_len,
1184 iw_handler handler,
1185 char ** p_buf,
1186 int * p_len)
1187{
1188 const struct iw_ioctl_description * descr = NULL;
1189 unsigned int cmd;
1190 union iwreq_data * wrqu;
1191 int hdr_len;
1192 struct iw_request_info info;
1193 char * buffer = NULL;
1194 int buffer_size = 0;
1195 int ret = -EINVAL;
1196
1197 /* Get the description of the Request */
1198 cmd = request->cmd;
1199 if((cmd - SIOCIWFIRST) >= standard_ioctl_num)
1200 return -EOPNOTSUPP;
1201 descr = &(standard_ioctl[cmd - SIOCIWFIRST]);
1202
1203#ifdef WE_RTNETLINK_DEBUG
1204 printk(KERN_DEBUG "%s (WE.r) : Found standard handler for 0x%04X\n",
1205 dev->name, cmd);
1206 printk(KERN_DEBUG "%s (WE.r) : Header type : %d, Token type : %d, size : %d, token : %d\n", dev->name, descr->header_type, descr->token_type, descr->token_size, descr->max_tokens);
1207#endif /* WE_RTNETLINK_DEBUG */
1208
1209 /* Check if wrqu is complete */
1210 hdr_len = event_type_size[descr->header_type];
1211 if(request_len < hdr_len) {
1212#ifdef WE_RTNETLINK_DEBUG
1213 printk(KERN_DEBUG
1214 "%s (WE.r) : Wireless request too short (%d)\n",
1215 dev->name, request_len);
1216#endif /* WE_RTNETLINK_DEBUG */
1217 return -EINVAL;
1218 }
1219
1220 /* Prepare the call */
1221 info.cmd = cmd;
1222 info.flags = 0;
1223
1224 /* Check if we have extra data in the reply or not */
1225 if(descr->header_type != IW_HEADER_TYPE_POINT) {
1226
1227 /* Create the kernel buffer that we will return.
1228 * It's at an offset to match the TYPE_POINT case... */
1229 buffer_size = request_len + IW_EV_POINT_OFF;
1230 buffer = kmalloc(buffer_size, GFP_KERNEL);
1231 if (buffer == NULL) {
1232 return -ENOMEM;
1233 }
1234 /* Copy event data */
1235 memcpy(buffer + IW_EV_POINT_OFF, request, request_len);
1236 /* Use our own copy of wrqu */
1237 wrqu = (union iwreq_data *) (buffer + IW_EV_POINT_OFF
1238 + IW_EV_LCP_PK_LEN);
1239
1240 /* No extra arguments. Trivial to handle */
1241 ret = handler(dev, &info, wrqu, NULL);
1242
1243 } else {
1244 union iwreq_data wrqu_point;
1245 char * extra = NULL;
1246 int extra_size = 0;
1247 1031
1248 /* Get a temp copy of wrqu (skip pointer) */ 1032 if (cmd == SIOCGIWPRIV && dev->wireless_handlers)
1249 memcpy(((char *) &wrqu_point) + IW_EV_POINT_OFF, 1033 return ioctl_standard_call(dev, ifr, cmd,
1250 ((char *) request) + IW_EV_LCP_PK_LEN, 1034 &iw_handler_get_private);
1251 IW_EV_POINT_LEN - IW_EV_LCP_PK_LEN);
1252
1253 /* Calculate space needed by arguments. Always allocate
1254 * for max space. Easier, and won't last long... */
1255 extra_size = descr->max_tokens * descr->token_size;
1256 /* Support for very large requests */
1257 if((descr->flags & IW_DESCR_FLAG_NOMAX) &&
1258 (wrqu_point.data.length > descr->max_tokens))
1259 extra_size = (wrqu_point.data.length
1260 * descr->token_size);
1261 buffer_size = extra_size + IW_EV_POINT_PK_LEN + IW_EV_POINT_OFF;
1262#ifdef WE_RTNETLINK_DEBUG
1263 printk(KERN_DEBUG "%s (WE.r) : Malloc %d bytes (%d bytes)\n",
1264 dev->name, extra_size, buffer_size);
1265#endif /* WE_RTNETLINK_DEBUG */
1266
1267 /* Create the kernel buffer that we will return */
1268 buffer = kmalloc(buffer_size, GFP_KERNEL);
1269 if (buffer == NULL) {
1270 return -ENOMEM;
1271 }
1272
1273 /* Put wrqu in the right place (just before extra).
1274 * Leave space for IWE header and dummy pointer...
1275 * Note that IW_EV_LCP_PK_LEN==4 bytes, so it's still aligned.
1276 */
1277 memcpy(buffer + IW_EV_LCP_PK_LEN + IW_EV_POINT_OFF,
1278 ((char *) &wrqu_point) + IW_EV_POINT_OFF,
1279 IW_EV_POINT_PK_LEN - IW_EV_LCP_PK_LEN);
1280 wrqu = (union iwreq_data *) (buffer + IW_EV_LCP_PK_LEN);
1281
1282 /* Extra comes logically after that. Offset +12 bytes. */
1283 extra = buffer + IW_EV_POINT_OFF + IW_EV_POINT_PK_LEN;
1284
1285 /* Call the handler */
1286 ret = handler(dev, &info, wrqu, extra);
1287
1288 /* Calculate real returned length */
1289 extra_size = (wrqu->data.length * descr->token_size);
1290 /* Re-adjust reply size */
1291 request->len = extra_size + IW_EV_POINT_PK_LEN;
1292
1293 /* Put the iwe header where it should, i.e. scrap the
1294 * dummy pointer. */
1295 memcpy(buffer + IW_EV_POINT_OFF, request, IW_EV_LCP_PK_LEN);
1296
1297#ifdef WE_RTNETLINK_DEBUG
1298 printk(KERN_DEBUG "%s (WE.r) : Reply 0x%04X, hdr_len %d, tokens %d, extra_size %d, buffer_size %d\n", dev->name, cmd, hdr_len, wrqu->data.length, extra_size, buffer_size);
1299#endif /* WE_RTNETLINK_DEBUG */
1300
1301 /* Check if there is enough buffer up there */
1302 if(wrqu_point.data.length < wrqu->data.length)
1303 ret = -E2BIG;
1304 }
1305
1306 /* Return the buffer to the caller */
1307 if (!ret) {
1308 *p_buf = buffer;
1309 *p_len = request->len;
1310 } else {
1311 /* Cleanup */
1312 if(buffer)
1313 kfree(buffer);
1314 }
1315
1316 return ret;
1317}
1318
1319/* ---------------------------------------------------------------- */
1320/*
1321 * Wrapper to call a standard Wireless Extension SET handler.
1322 * We do various checks and call the handler with the proper args.
1323 */
1324static inline int rtnetlink_standard_set(struct net_device * dev,
1325 struct iw_event * request,
1326 int request_len,
1327 iw_handler handler)
1328{
1329 const struct iw_ioctl_description * descr = NULL;
1330 unsigned int cmd;
1331 union iwreq_data * wrqu;
1332 union iwreq_data wrqu_point;
1333 int hdr_len;
1334 char * extra = NULL;
1335 int extra_size = 0;
1336 struct iw_request_info info;
1337 int ret = -EINVAL;
1338
1339 /* Get the description of the Request */
1340 cmd = request->cmd;
1341 if((cmd - SIOCIWFIRST) >= standard_ioctl_num)
1342 return -EOPNOTSUPP;
1343 descr = &(standard_ioctl[cmd - SIOCIWFIRST]);
1344
1345#ifdef WE_RTNETLINK_DEBUG
1346 printk(KERN_DEBUG "%s (WE.r) : Found standard SET handler for 0x%04X\n",
1347 dev->name, cmd);
1348 printk(KERN_DEBUG "%s (WE.r) : Header type : %d, Token type : %d, size : %d, token : %d\n", dev->name, descr->header_type, descr->token_type, descr->token_size, descr->max_tokens);
1349#endif /* WE_RTNETLINK_DEBUG */
1350
1351 /* Extract fixed header from request. This is properly aligned. */
1352 wrqu = (union iwreq_data *) (((char *) request) + IW_EV_LCP_PK_LEN);
1353
1354 /* Check if wrqu is complete */
1355 hdr_len = event_type_pk_size[descr->header_type];
1356 if(request_len < hdr_len) {
1357#ifdef WE_RTNETLINK_DEBUG
1358 printk(KERN_DEBUG
1359 "%s (WE.r) : Wireless request too short (%d)\n",
1360 dev->name, request_len);
1361#endif /* WE_RTNETLINK_DEBUG */
1362 return -EINVAL;
1363 }
1364
1365 /* Prepare the call */
1366 info.cmd = cmd;
1367 info.flags = 0;
1368
1369 /* Check if we have extra data in the request or not */
1370 if(descr->header_type != IW_HEADER_TYPE_POINT) {
1371
1372 /* No extra arguments. Trivial to handle */
1373 ret = handler(dev, &info, wrqu, NULL);
1374
1375 } else {
1376 int extra_len;
1377
1378 /* Put wrqu in the right place (skip pointer) */
1379 memcpy(((char *) &wrqu_point) + IW_EV_POINT_OFF,
1380 wrqu, IW_EV_POINT_PK_LEN - IW_EV_LCP_PK_LEN);
1381 /* Don't forget about the event code... */
1382 wrqu = &wrqu_point;
1383
1384 /* Check if number of token fits within bounds */
1385 if(wrqu_point.data.length > descr->max_tokens)
1386 return -E2BIG;
1387 if(wrqu_point.data.length < descr->min_tokens)
1388 return -EINVAL;
1389
1390 /* Real length of payload */
1391 extra_len = wrqu_point.data.length * descr->token_size;
1392
1393 /* Check if request is self consistent */
1394 if((request_len - hdr_len) < extra_len) {
1395#ifdef WE_RTNETLINK_DEBUG
1396 printk(KERN_DEBUG "%s (WE.r) : Wireless request data too short (%d)\n",
1397 dev->name, extra_size);
1398#endif /* WE_RTNETLINK_DEBUG */
1399 return -EINVAL;
1400 }
1401
1402#ifdef WE_RTNETLINK_DEBUG
1403 printk(KERN_DEBUG "%s (WE.r) : Malloc %d bytes\n",
1404 dev->name, extra_size);
1405#endif /* WE_RTNETLINK_DEBUG */
1406
1407 /* Always allocate for max space. Easier, and won't last
1408 * long... */
1409 extra_size = descr->max_tokens * descr->token_size;
1410 extra = kmalloc(extra_size, GFP_KERNEL);
1411 if (extra == NULL)
1412 return -ENOMEM;
1413
1414 /* Copy extra in aligned buffer */
1415 memcpy(extra, ((char *) request) + hdr_len, extra_len);
1416
1417 /* Call the handler */
1418 ret = handler(dev, &info, &wrqu_point, extra);
1419 }
1420
1421#ifdef WE_SET_EVENT
1422 /* Generate an event to notify listeners of the change */
1423 if((descr->flags & IW_DESCR_FLAG_EVENT) &&
1424 ((ret == 0) || (ret == -EIWCOMMIT))) {
1425 if(descr->flags & IW_DESCR_FLAG_RESTRICT)
1426 /* If the event is restricted, don't
1427 * export the payload */
1428 wireless_send_event(dev, cmd, wrqu, NULL);
1429 else
1430 wireless_send_event(dev, cmd, wrqu, extra);
1431 }
1432#endif /* WE_SET_EVENT */
1433
1434 /* Cleanup - I told you it wasn't that long ;-) */
1435 if(extra)
1436 kfree(extra);
1437
1438 /* Call commit handler if needed and defined */
1439 if(ret == -EIWCOMMIT)
1440 ret = call_commit_handler(dev);
1441
1442 return ret;
1443}
1444
1445/* ---------------------------------------------------------------- */
1446/*
1447 * Wrapper to call a private Wireless Extension GET handler.
1448 * Same as above...
1449 * It's not as nice and slimline as the standard wrapper. The cause
1450 * is struct iw_priv_args, which was not really designed for the
1451 * job we are going here.
1452 *
1453 * IMPORTANT : This function prevent to set and get data on the same
1454 * IOCTL and enforce the SET/GET convention. Not doing it would be
1455 * far too hairy...
1456 * If you need to set and get data at the same time, please don't use
1457 * a iw_handler but process it in your ioctl handler (i.e. use the
1458 * old driver API).
1459 */
1460static inline int rtnetlink_private_get(struct net_device * dev,
1461 struct iw_event * request,
1462 int request_len,
1463 iw_handler handler,
1464 char ** p_buf,
1465 int * p_len)
1466{
1467 const struct iw_priv_args * descr = NULL;
1468 unsigned int cmd;
1469 union iwreq_data * wrqu;
1470 int hdr_len;
1471 struct iw_request_info info;
1472 int extra_size = 0;
1473 int i;
1474 char * buffer = NULL;
1475 int buffer_size = 0;
1476 int ret = -EINVAL;
1477
1478 /* Get the description of the Request */
1479 cmd = request->cmd;
1480 for(i = 0; i < dev->wireless_handlers->num_private_args; i++)
1481 if(cmd == dev->wireless_handlers->private_args[i].cmd) {
1482 descr = &(dev->wireless_handlers->private_args[i]);
1483 break;
1484 }
1485 if(descr == NULL)
1486 return -EOPNOTSUPP;
1487
1488#ifdef WE_RTNETLINK_DEBUG
1489 printk(KERN_DEBUG "%s (WE.r) : Found private handler for 0x%04X\n",
1490 dev->name, cmd);
1491 printk(KERN_DEBUG "%s (WE.r) : Name %s, set %X, get %X\n",
1492 dev->name, descr->name, descr->set_args, descr->get_args);
1493#endif /* WE_RTNETLINK_DEBUG */
1494
1495 /* Compute the max size of the get arguments */
1496 extra_size = get_priv_size(descr->get_args);
1497
1498 /* Does it fits in wrqu ? */
1499 if((descr->get_args & IW_PRIV_SIZE_FIXED) &&
1500 (extra_size <= IFNAMSIZ)) {
1501 hdr_len = extra_size;
1502 extra_size = 0;
1503 } else {
1504 hdr_len = IW_EV_POINT_PK_LEN;
1505 }
1506
1507 /* Check if wrqu is complete */
1508 if(request_len < hdr_len) {
1509#ifdef WE_RTNETLINK_DEBUG
1510 printk(KERN_DEBUG
1511 "%s (WE.r) : Wireless request too short (%d)\n",
1512 dev->name, request_len);
1513#endif /* WE_RTNETLINK_DEBUG */
1514 return -EINVAL;
1515 }
1516
1517 /* Prepare the call */
1518 info.cmd = cmd;
1519 info.flags = 0;
1520
1521 /* Check if we have a pointer to user space data or not. */
1522 if(extra_size == 0) {
1523
1524 /* Create the kernel buffer that we will return.
1525 * It's at an offset to match the TYPE_POINT case... */
1526 buffer_size = request_len + IW_EV_POINT_OFF;
1527 buffer = kmalloc(buffer_size, GFP_KERNEL);
1528 if (buffer == NULL) {
1529 return -ENOMEM;
1530 }
1531 /* Copy event data */
1532 memcpy(buffer + IW_EV_POINT_OFF, request, request_len);
1533 /* Use our own copy of wrqu */
1534 wrqu = (union iwreq_data *) (buffer + IW_EV_POINT_OFF
1535 + IW_EV_LCP_PK_LEN);
1536
1537 /* No extra arguments. Trivial to handle */
1538 ret = handler(dev, &info, wrqu, (char *) wrqu);
1539
1540 } else {
1541 char * extra;
1542
1543 /* Buffer for full reply */
1544 buffer_size = extra_size + IW_EV_POINT_PK_LEN + IW_EV_POINT_OFF;
1545
1546#ifdef WE_RTNETLINK_DEBUG
1547 printk(KERN_DEBUG "%s (WE.r) : Malloc %d bytes (%d bytes)\n",
1548 dev->name, extra_size, buffer_size);
1549#endif /* WE_RTNETLINK_DEBUG */
1550
1551 /* Create the kernel buffer that we will return */
1552 buffer = kmalloc(buffer_size, GFP_KERNEL);
1553 if (buffer == NULL) {
1554 return -ENOMEM;
1555 }
1556
1557 /* Put wrqu in the right place (just before extra).
1558 * Leave space for IWE header and dummy pointer...
1559 * Note that IW_EV_LCP_PK_LEN==4 bytes, so it's still aligned.
1560 */
1561 memcpy(buffer + IW_EV_LCP_PK_LEN + IW_EV_POINT_OFF,
1562 ((char *) request) + IW_EV_LCP_PK_LEN,
1563 IW_EV_POINT_PK_LEN - IW_EV_LCP_PK_LEN);
1564 wrqu = (union iwreq_data *) (buffer + IW_EV_LCP_PK_LEN);
1565
1566 /* Extra comes logically after that. Offset +12 bytes. */
1567 extra = buffer + IW_EV_POINT_OFF + IW_EV_POINT_PK_LEN;
1568
1569 /* Call the handler */
1570 ret = handler(dev, &info, wrqu, extra);
1571
1572 /* Adjust for the actual length if it's variable,
1573 * avoid leaking kernel bits outside. */
1574 if (!(descr->get_args & IW_PRIV_SIZE_FIXED))
1575 extra_size = adjust_priv_size(descr->get_args, wrqu);
1576 /* Re-adjust reply size */
1577 request->len = extra_size + IW_EV_POINT_PK_LEN;
1578
1579 /* Put the iwe header where it should, i.e. scrap the
1580 * dummy pointer. */
1581 memcpy(buffer + IW_EV_POINT_OFF, request, IW_EV_LCP_PK_LEN);
1582
1583#ifdef WE_RTNETLINK_DEBUG
1584 printk(KERN_DEBUG "%s (WE.r) : Reply 0x%04X, hdr_len %d, tokens %d, extra_size %d, buffer_size %d\n", dev->name, cmd, hdr_len, wrqu->data.length, extra_size, buffer_size);
1585#endif /* WE_RTNETLINK_DEBUG */
1586 }
1587
1588 /* Return the buffer to the caller */
1589 if (!ret) {
1590 *p_buf = buffer;
1591 *p_len = request->len;
1592 } else {
1593 /* Cleanup */
1594 if(buffer)
1595 kfree(buffer);
1596 }
1597
1598 return ret;
1599}
1600
1601/* ---------------------------------------------------------------- */
1602/*
1603 * Wrapper to call a private Wireless Extension SET handler.
1604 * Same as above...
1605 * It's not as nice and slimline as the standard wrapper. The cause
1606 * is struct iw_priv_args, which was not really designed for the
1607 * job we are going here.
1608 *
1609 * IMPORTANT : This function prevent to set and get data on the same
1610 * IOCTL and enforce the SET/GET convention. Not doing it would be
1611 * far too hairy...
1612 * If you need to set and get data at the same time, please don't use
1613 * a iw_handler but process it in your ioctl handler (i.e. use the
1614 * old driver API).
1615 */
1616static inline int rtnetlink_private_set(struct net_device * dev,
1617 struct iw_event * request,
1618 int request_len,
1619 iw_handler handler)
1620{
1621 const struct iw_priv_args * descr = NULL;
1622 unsigned int cmd;
1623 union iwreq_data * wrqu;
1624 union iwreq_data wrqu_point;
1625 int hdr_len;
1626 char * extra = NULL;
1627 int extra_size = 0;
1628 int offset = 0; /* For sub-ioctls */
1629 struct iw_request_info info;
1630 int i;
1631 int ret = -EINVAL;
1632
1633 /* Get the description of the Request */
1634 cmd = request->cmd;
1635 for(i = 0; i < dev->wireless_handlers->num_private_args; i++)
1636 if(cmd == dev->wireless_handlers->private_args[i].cmd) {
1637 descr = &(dev->wireless_handlers->private_args[i]);
1638 break;
1639 }
1640 if(descr == NULL)
1641 return -EOPNOTSUPP;
1642
1643#ifdef WE_RTNETLINK_DEBUG
1644 printk(KERN_DEBUG "%s (WE.r) : Found private handler for 0x%04X\n",
1645 ifr->ifr_name, cmd);
1646 printk(KERN_DEBUG "%s (WE.r) : Name %s, set %X, get %X\n",
1647 dev->name, descr->name, descr->set_args, descr->get_args);
1648#endif /* WE_RTNETLINK_DEBUG */
1649
1650 /* Compute the size of the set arguments */
1651 /* Check for sub-ioctl handler */
1652 if(descr->name[0] == '\0')
1653 /* Reserve one int for sub-ioctl index */
1654 offset = sizeof(__u32);
1655
1656 /* Size of set arguments */
1657 extra_size = get_priv_size(descr->set_args);
1658
1659 /* Does it fits in wrqu ? */
1660 if((descr->set_args & IW_PRIV_SIZE_FIXED) &&
1661 (extra_size <= IFNAMSIZ)) {
1662 hdr_len = IW_EV_LCP_PK_LEN + extra_size;
1663 extra_size = 0;
1664 } else {
1665 hdr_len = IW_EV_POINT_PK_LEN;
1666 }
1667
1668 /* Extract fixed header from request. This is properly aligned. */
1669 wrqu = (union iwreq_data *) (((char *) request) + IW_EV_LCP_PK_LEN);
1670
1671 /* Check if wrqu is complete */
1672 if(request_len < hdr_len) {
1673#ifdef WE_RTNETLINK_DEBUG
1674 printk(KERN_DEBUG
1675 "%s (WE.r) : Wireless request too short (%d)\n",
1676 dev->name, request_len);
1677#endif /* WE_RTNETLINK_DEBUG */
1678 return -EINVAL;
1679 }
1680
1681 /* Prepare the call */
1682 info.cmd = cmd;
1683 info.flags = 0;
1684
1685 /* Check if we have a pointer to user space data or not. */
1686 if(extra_size == 0) {
1687
1688 /* No extra arguments. Trivial to handle */
1689 ret = handler(dev, &info, wrqu, (char *) wrqu);
1690
1691 } else {
1692 int extra_len;
1693
1694 /* Put wrqu in the right place (skip pointer) */
1695 memcpy(((char *) &wrqu_point) + IW_EV_POINT_OFF,
1696 wrqu, IW_EV_POINT_PK_LEN - IW_EV_LCP_PK_LEN);
1697
1698 /* Does it fits within bounds ? */
1699 if(wrqu_point.data.length > (descr->set_args &
1700 IW_PRIV_SIZE_MASK))
1701 return -E2BIG;
1702
1703 /* Real length of payload */
1704 extra_len = adjust_priv_size(descr->set_args, &wrqu_point);
1705
1706 /* Check if request is self consistent */
1707 if((request_len - hdr_len) < extra_len) {
1708#ifdef WE_RTNETLINK_DEBUG
1709 printk(KERN_DEBUG "%s (WE.r) : Wireless request data too short (%d)\n",
1710 dev->name, extra_size);
1711#endif /* WE_RTNETLINK_DEBUG */
1712 return -EINVAL;
1713 }
1714
1715#ifdef WE_RTNETLINK_DEBUG
1716 printk(KERN_DEBUG "%s (WE.r) : Malloc %d bytes\n",
1717 dev->name, extra_size);
1718#endif /* WE_RTNETLINK_DEBUG */
1719
1720 /* Always allocate for max space. Easier, and won't last
1721 * long... */
1722 extra = kmalloc(extra_size, GFP_KERNEL);
1723 if (extra == NULL)
1724 return -ENOMEM;
1725
1726 /* Copy extra in aligned buffer */
1727 memcpy(extra, ((char *) request) + hdr_len, extra_len);
1728
1729 /* Call the handler */
1730 ret = handler(dev, &info, &wrqu_point, extra);
1731
1732 /* Cleanup - I told you it wasn't that long ;-) */
1733 kfree(extra);
1734 }
1735
1736 /* Call commit handler if needed and defined */
1737 if(ret == -EIWCOMMIT)
1738 ret = call_commit_handler(dev);
1739
1740 return ret;
1741}
1742
1743/* ---------------------------------------------------------------- */
1744/*
1745 * Main RtNetlink dispatcher. Called from the main networking code
1746 * (do_getlink() in net/core/rtnetlink.c).
1747 * Check the type of Request and call the appropriate wrapper...
1748 */
1749int wireless_rtnetlink_get(struct net_device * dev,
1750 char * data,
1751 int len,
1752 char ** p_buf,
1753 int * p_len)
1754{
1755 struct iw_event * request = (struct iw_event *) data;
1756 iw_handler handler;
1757
1758 /* Check length */
1759 if(len < IW_EV_LCP_PK_LEN) {
1760 printk(KERN_DEBUG "%s (WE.r) : RtNetlink request too short (%d)\n",
1761 dev->name, len);
1762 return -EINVAL;
1763 }
1764
1765 /* ReCheck length (len may have padding) */
1766 if(request->len > len) {
1767 printk(KERN_DEBUG "%s (WE.r) : RtNetlink request len invalid (%d-%d)\n",
1768 dev->name, request->len, len);
1769 return -EINVAL;
1770 }
1771
1772 /* Only accept GET requests in here */
1773 if(!IW_IS_GET(request->cmd))
1774 return -EOPNOTSUPP;
1775
1776 /* If command is `get the encoding parameters', check if
1777 * the user has the right to do it */
1778 if (request->cmd == SIOCGIWENCODE ||
1779 request->cmd == SIOCGIWENCODEEXT) {
1780 if (!capable(CAP_NET_ADMIN))
1781 return -EPERM;
1782 }
1783
1784 /* Special cases */
1785 if(request->cmd == SIOCGIWSTATS)
1786 /* Get Wireless Stats */
1787 return rtnetlink_standard_get(dev,
1788 request,
1789 request->len,
1790 &iw_handler_get_iwstats,
1791 p_buf, p_len);
1792 if(request->cmd == SIOCGIWPRIV) {
1793 /* Check if we have some wireless handlers defined */
1794 if(dev->wireless_handlers == NULL)
1795 return -EOPNOTSUPP;
1796 /* Get Wireless Stats */
1797 return rtnetlink_standard_get(dev,
1798 request,
1799 request->len,
1800 &iw_handler_get_private,
1801 p_buf, p_len);
1802 }
1803 1035
1804 /* Basic check */ 1036 /* Basic check */
1805 if (!netif_device_present(dev)) 1037 if (!netif_device_present(dev))
1806 return -ENODEV; 1038 return -ENODEV;
1807 1039
1808 /* Try to find the handler */ 1040 /* New driver API : try to find the handler */
1809 handler = get_handler(dev, request->cmd); 1041 handler = get_handler(dev, cmd);
1810 if(handler != NULL) { 1042 if (handler) {
1811 /* Standard and private are not the same */ 1043 /* Standard and private are not the same */
1812 if(request->cmd < SIOCIWFIRSTPRIV) 1044 if (cmd < SIOCIWFIRSTPRIV)
1813 return rtnetlink_standard_get(dev, 1045 return ioctl_standard_call(dev, ifr, cmd, handler);
1814 request,
1815 request->len,
1816 handler,
1817 p_buf, p_len);
1818 else 1046 else
1819 return rtnetlink_private_get(dev, 1047 return ioctl_private_call(dev, ifr, cmd, handler);
1820 request,
1821 request->len,
1822 handler,
1823 p_buf, p_len);
1824 } 1048 }
1825 1049 /* Old driver API : call driver ioctl handler */
1050 if (dev->do_ioctl)
1051 return dev->do_ioctl(dev, ifr, cmd);
1826 return -EOPNOTSUPP; 1052 return -EOPNOTSUPP;
1827} 1053}
1828 1054
1829/* ---------------------------------------------------------------- */ 1055/* entry point from dev ioctl */
1830/* 1056int wext_handle_ioctl(struct ifreq *ifr, unsigned int cmd,
1831 * Main RtNetlink dispatcher. Called from the main networking code 1057 void __user *arg)
1832 * (do_setlink() in net/core/rtnetlink.c).
1833 * Check the type of Request and call the appropriate wrapper...
1834 */
1835int wireless_rtnetlink_set(struct net_device * dev,
1836 char * data,
1837 int len)
1838{ 1058{
1839 struct iw_event * request = (struct iw_event *) data; 1059 int ret;
1840 iw_handler handler;
1841
1842 /* Check length */
1843 if(len < IW_EV_LCP_PK_LEN) {
1844 printk(KERN_DEBUG "%s (WE.r) : RtNetlink request too short (%d)\n",
1845 dev->name, len);
1846 return -EINVAL;
1847 }
1848
1849 /* ReCheck length (len may have padding) */
1850 if(request->len > len) {
1851 printk(KERN_DEBUG "%s (WE.r) : RtNetlink request len invalid (%d-%d)\n",
1852 dev->name, request->len, len);
1853 return -EINVAL;
1854 }
1855
1856 /* Only accept SET requests in here */
1857 if(!IW_IS_SET(request->cmd))
1858 return -EOPNOTSUPP;
1859
1860 /* Basic check */
1861 if (!netif_device_present(dev))
1862 return -ENODEV;
1863 1060
1864 /* New driver API : try to find the handler */ 1061 /* If command is `set a parameter', or
1865 handler = get_handler(dev, request->cmd); 1062 * `get the encoding parameters', check if
1866 if(handler != NULL) { 1063 * the user has the right to do it */
1867 /* Standard and private are not the same */ 1064 if ((IW_IS_SET(cmd) || cmd == SIOCGIWENCODE || cmd == SIOCGIWENCODEEXT)
1868 if(request->cmd < SIOCIWFIRSTPRIV) 1065 && !capable(CAP_NET_ADMIN))
1869 return rtnetlink_standard_set(dev, 1066 return -EPERM;
1870 request, 1067
1871 request->len, 1068 dev_load(ifr->ifr_name);
1872 handler); 1069 rtnl_lock();
1873 else 1070 ret = wireless_process_ioctl(ifr, cmd);
1874 return rtnetlink_private_set(dev, 1071 rtnl_unlock();
1875 request, 1072 if (IW_IS_GET(cmd) && copy_to_user(arg, ifr, sizeof(struct ifreq)))
1876 request->len, 1073 return -EFAULT;
1877 handler); 1074 return ret;
1878 }
1879
1880 return -EOPNOTSUPP;
1881} 1075}
1882#endif /* CONFIG_NET_WIRELESS_RTNETLINK */
1883
1884 1076
1885/************************* EVENT PROCESSING *************************/ 1077/************************* EVENT PROCESSING *************************/
1886/* 1078/*
@@ -1888,7 +1080,6 @@ int wireless_rtnetlink_set(struct net_device * dev,
1888 * Most often, the event will be propagated through rtnetlink 1080 * Most often, the event will be propagated through rtnetlink
1889 */ 1081 */
1890 1082
1891#ifdef WE_EVENT_RTNETLINK
1892/* ---------------------------------------------------------------- */ 1083/* ---------------------------------------------------------------- */
1893/* 1084/*
1894 * Locking... 1085 * Locking...
@@ -1933,15 +1124,12 @@ static DECLARE_TASKLET(wireless_nlevent_tasklet, wireless_nlevent_process, 0);
1933 * current wireless config. Dumping the wireless config is far too 1124 * current wireless config. Dumping the wireless config is far too
1934 * expensive (for each parameter, the driver need to query the hardware). 1125 * expensive (for each parameter, the driver need to query the hardware).
1935 */ 1126 */
1936static inline int rtnetlink_fill_iwinfo(struct sk_buff * skb, 1127static int rtnetlink_fill_iwinfo(struct sk_buff *skb, struct net_device *dev,
1937 struct net_device * dev, 1128 int type, char *event, int event_len)
1938 int type,
1939 char * event,
1940 int event_len)
1941{ 1129{
1942 struct ifinfomsg *r; 1130 struct ifinfomsg *r;
1943 struct nlmsghdr *nlh; 1131 struct nlmsghdr *nlh;
1944 unsigned char *b = skb->tail; 1132 unsigned char *b = skb_tail_pointer(skb);
1945 1133
1946 nlh = NLMSG_PUT(skb, 0, 0, type, sizeof(*r)); 1134 nlh = NLMSG_PUT(skb, 0, 0, type, sizeof(*r));
1947 r = NLMSG_DATA(nlh); 1135 r = NLMSG_DATA(nlh);
@@ -1955,12 +1143,12 @@ static inline int rtnetlink_fill_iwinfo(struct sk_buff * skb,
1955 /* Add the wireless events in the netlink packet */ 1143 /* Add the wireless events in the netlink packet */
1956 RTA_PUT(skb, IFLA_WIRELESS, event_len, event); 1144 RTA_PUT(skb, IFLA_WIRELESS, event_len, event);
1957 1145
1958 nlh->nlmsg_len = skb->tail - b; 1146 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1959 return skb->len; 1147 return skb->len;
1960 1148
1961nlmsg_failure: 1149nlmsg_failure:
1962rtattr_failure: 1150rtattr_failure:
1963 skb_trim(skb, b - skb->data); 1151 nlmsg_trim(skb, b);
1964 return -1; 1152 return -1;
1965} 1153}
1966 1154
@@ -1971,9 +1159,7 @@ rtattr_failure:
1971 * Andrzej Krzysztofowicz mandated that I used a IFLA_XXX field 1159 * Andrzej Krzysztofowicz mandated that I used a IFLA_XXX field
1972 * within a RTM_NEWLINK event. 1160 * within a RTM_NEWLINK event.
1973 */ 1161 */
1974static inline void rtmsg_iwinfo(struct net_device * dev, 1162static void rtmsg_iwinfo(struct net_device *dev, char *event, int event_len)
1975 char * event,
1976 int event_len)
1977{ 1163{
1978 struct sk_buff *skb; 1164 struct sk_buff *skb;
1979 int size = NLMSG_GOODSIZE; 1165 int size = NLMSG_GOODSIZE;
@@ -1992,8 +1178,6 @@ static inline void rtmsg_iwinfo(struct net_device * dev,
1992 tasklet_schedule(&wireless_nlevent_tasklet); 1178 tasklet_schedule(&wireless_nlevent_tasklet);
1993} 1179}
1994 1180
1995#endif /* WE_EVENT_RTNETLINK */
1996
1997/* ---------------------------------------------------------------- */ 1181/* ---------------------------------------------------------------- */
1998/* 1182/*
1999 * Main event dispatcher. Called from other parts and drivers. 1183 * Main event dispatcher. Called from other parts and drivers.
@@ -2015,17 +1199,17 @@ void wireless_send_event(struct net_device * dev,
2015 unsigned cmd_index; /* *MUST* be unsigned */ 1199 unsigned cmd_index; /* *MUST* be unsigned */
2016 1200
2017 /* Get the description of the Event */ 1201 /* Get the description of the Event */
2018 if(cmd <= SIOCIWLAST) { 1202 if (cmd <= SIOCIWLAST) {
2019 cmd_index = cmd - SIOCIWFIRST; 1203 cmd_index = cmd - SIOCIWFIRST;
2020 if(cmd_index < standard_ioctl_num) 1204 if (cmd_index < standard_ioctl_num)
2021 descr = &(standard_ioctl[cmd_index]); 1205 descr = &(standard_ioctl[cmd_index]);
2022 } else { 1206 } else {
2023 cmd_index = cmd - IWEVFIRST; 1207 cmd_index = cmd - IWEVFIRST;
2024 if(cmd_index < standard_event_num) 1208 if (cmd_index < standard_event_num)
2025 descr = &(standard_event[cmd_index]); 1209 descr = &(standard_event[cmd_index]);
2026 } 1210 }
2027 /* Don't accept unknown events */ 1211 /* Don't accept unknown events */
2028 if(descr == NULL) { 1212 if (descr == NULL) {
2029 /* Note : we don't return an error to the driver, because 1213 /* Note : we don't return an error to the driver, because
2030 * the driver would not know what to do about it. It can't 1214 * the driver would not know what to do about it. It can't
2031 * return an error to the user, because the event is not 1215 * return an error to the user, because the event is not
@@ -2037,63 +1221,50 @@ void wireless_send_event(struct net_device * dev,
2037 dev->name, cmd); 1221 dev->name, cmd);
2038 return; 1222 return;
2039 } 1223 }
2040#ifdef WE_EVENT_DEBUG
2041 printk(KERN_DEBUG "%s (WE) : Got event 0x%04X\n",
2042 dev->name, cmd);
2043 printk(KERN_DEBUG "%s (WE) : Header type : %d, Token type : %d, size : %d, token : %d\n", dev->name, descr->header_type, descr->token_type, descr->token_size, descr->max_tokens);
2044#endif /* WE_EVENT_DEBUG */
2045 1224
2046 /* Check extra parameters and set extra_len */ 1225 /* Check extra parameters and set extra_len */
2047 if(descr->header_type == IW_HEADER_TYPE_POINT) { 1226 if (descr->header_type == IW_HEADER_TYPE_POINT) {
2048 /* Check if number of token fits within bounds */ 1227 /* Check if number of token fits within bounds */
2049 if(wrqu->data.length > descr->max_tokens) { 1228 if (wrqu->data.length > descr->max_tokens) {
2050 printk(KERN_ERR "%s (WE) : Wireless Event too big (%d)\n", dev->name, wrqu->data.length); 1229 printk(KERN_ERR "%s (WE) : Wireless Event too big (%d)\n", dev->name, wrqu->data.length);
2051 return; 1230 return;
2052 } 1231 }
2053 if(wrqu->data.length < descr->min_tokens) { 1232 if (wrqu->data.length < descr->min_tokens) {
2054 printk(KERN_ERR "%s (WE) : Wireless Event too small (%d)\n", dev->name, wrqu->data.length); 1233 printk(KERN_ERR "%s (WE) : Wireless Event too small (%d)\n", dev->name, wrqu->data.length);
2055 return; 1234 return;
2056 } 1235 }
2057 /* Calculate extra_len - extra is NULL for restricted events */ 1236 /* Calculate extra_len - extra is NULL for restricted events */
2058 if(extra != NULL) 1237 if (extra != NULL)
2059 extra_len = wrqu->data.length * descr->token_size; 1238 extra_len = wrqu->data.length * descr->token_size;
2060 /* Always at an offset in wrqu */ 1239 /* Always at an offset in wrqu */
2061 wrqu_off = IW_EV_POINT_OFF; 1240 wrqu_off = IW_EV_POINT_OFF;
2062#ifdef WE_EVENT_DEBUG
2063 printk(KERN_DEBUG "%s (WE) : Event 0x%04X, tokens %d, extra_len %d\n", dev->name, cmd, wrqu->data.length, extra_len);
2064#endif /* WE_EVENT_DEBUG */
2065 } 1241 }
2066 1242
2067 /* Total length of the event */ 1243 /* Total length of the event */
2068 hdr_len = event_type_size[descr->header_type]; 1244 hdr_len = event_type_size[descr->header_type];
2069 event_len = hdr_len + extra_len; 1245 event_len = hdr_len + extra_len;
2070 1246
2071#ifdef WE_EVENT_DEBUG
2072 printk(KERN_DEBUG "%s (WE) : Event 0x%04X, hdr_len %d, wrqu_off %d, event_len %d\n", dev->name, cmd, hdr_len, wrqu_off, event_len);
2073#endif /* WE_EVENT_DEBUG */
2074
2075 /* Create temporary buffer to hold the event */ 1247 /* Create temporary buffer to hold the event */
2076 event = kmalloc(event_len, GFP_ATOMIC); 1248 event = kmalloc(event_len, GFP_ATOMIC);
2077 if(event == NULL) 1249 if (event == NULL)
2078 return; 1250 return;
2079 1251
2080 /* Fill event */ 1252 /* Fill event */
2081 event->len = event_len; 1253 event->len = event_len;
2082 event->cmd = cmd; 1254 event->cmd = cmd;
2083 memcpy(&event->u, ((char *) wrqu) + wrqu_off, hdr_len - IW_EV_LCP_LEN); 1255 memcpy(&event->u, ((char *) wrqu) + wrqu_off, hdr_len - IW_EV_LCP_LEN);
2084 if(extra != NULL) 1256 if (extra)
2085 memcpy(((char *) event) + hdr_len, extra, extra_len); 1257 memcpy(((char *) event) + hdr_len, extra, extra_len);
2086 1258
2087#ifdef WE_EVENT_RTNETLINK
2088 /* Send via the RtNetlink event channel */ 1259 /* Send via the RtNetlink event channel */
2089 rtmsg_iwinfo(dev, (char *) event, event_len); 1260 rtmsg_iwinfo(dev, (char *) event, event_len);
2090#endif /* WE_EVENT_RTNETLINK */
2091 1261
2092 /* Cleanup */ 1262 /* Cleanup */
2093 kfree(event); 1263 kfree(event);
2094 1264
2095 return; /* Always success, I guess ;-) */ 1265 return; /* Always success, I guess ;-) */
2096} 1266}
1267EXPORT_SYMBOL(wireless_send_event);
2097 1268
2098/********************** ENHANCED IWSPY SUPPORT **********************/ 1269/********************** ENHANCED IWSPY SUPPORT **********************/
2099/* 1270/*
@@ -2113,11 +1284,11 @@ void wireless_send_event(struct net_device * dev,
2113 * Because this is called on the Rx path via wireless_spy_update(), 1284 * Because this is called on the Rx path via wireless_spy_update(),
2114 * we want it to be efficient... 1285 * we want it to be efficient...
2115 */ 1286 */
2116static inline struct iw_spy_data * get_spydata(struct net_device *dev) 1287static inline struct iw_spy_data *get_spydata(struct net_device *dev)
2117{ 1288{
2118 /* This is the new way */ 1289 /* This is the new way */
2119 if(dev->wireless_data) 1290 if (dev->wireless_data)
2120 return(dev->wireless_data->spy_data); 1291 return dev->wireless_data->spy_data;
2121 return NULL; 1292 return NULL;
2122} 1293}
2123 1294
@@ -2134,7 +1305,7 @@ int iw_handler_set_spy(struct net_device * dev,
2134 struct sockaddr * address = (struct sockaddr *) extra; 1305 struct sockaddr * address = (struct sockaddr *) extra;
2135 1306
2136 /* Make sure driver is not buggy or using the old API */ 1307 /* Make sure driver is not buggy or using the old API */
2137 if(!spydata) 1308 if (!spydata)
2138 return -EOPNOTSUPP; 1309 return -EOPNOTSUPP;
2139 1310
2140 /* Disable spy collection while we copy the addresses. 1311 /* Disable spy collection while we copy the addresses.
@@ -2151,29 +1322,16 @@ int iw_handler_set_spy(struct net_device * dev,
2151 smp_wmb(); 1322 smp_wmb();
2152 1323
2153 /* Are there are addresses to copy? */ 1324 /* Are there are addresses to copy? */
2154 if(wrqu->data.length > 0) { 1325 if (wrqu->data.length > 0) {
2155 int i; 1326 int i;
2156 1327
2157 /* Copy addresses */ 1328 /* Copy addresses */
2158 for(i = 0; i < wrqu->data.length; i++) 1329 for (i = 0; i < wrqu->data.length; i++)
2159 memcpy(spydata->spy_address[i], address[i].sa_data, 1330 memcpy(spydata->spy_address[i], address[i].sa_data,
2160 ETH_ALEN); 1331 ETH_ALEN);
2161 /* Reset stats */ 1332 /* Reset stats */
2162 memset(spydata->spy_stat, 0, 1333 memset(spydata->spy_stat, 0,
2163 sizeof(struct iw_quality) * IW_MAX_SPY); 1334 sizeof(struct iw_quality) * IW_MAX_SPY);
2164
2165#ifdef WE_SPY_DEBUG
2166 printk(KERN_DEBUG "iw_handler_set_spy() : wireless_data %p, spydata %p, num %d\n", dev->wireless_data, spydata, wrqu->data.length);
2167 for (i = 0; i < wrqu->data.length; i++)
2168 printk(KERN_DEBUG
2169 "%02X:%02X:%02X:%02X:%02X:%02X \n",
2170 spydata->spy_address[i][0],
2171 spydata->spy_address[i][1],
2172 spydata->spy_address[i][2],
2173 spydata->spy_address[i][3],
2174 spydata->spy_address[i][4],
2175 spydata->spy_address[i][5]);
2176#endif /* WE_SPY_DEBUG */
2177 } 1335 }
2178 1336
2179 /* Make sure above is updated before re-enabling */ 1337 /* Make sure above is updated before re-enabling */
@@ -2184,6 +1342,7 @@ int iw_handler_set_spy(struct net_device * dev,
2184 1342
2185 return 0; 1343 return 0;
2186} 1344}
1345EXPORT_SYMBOL(iw_handler_set_spy);
2187 1346
2188/*------------------------------------------------------------------*/ 1347/*------------------------------------------------------------------*/
2189/* 1348/*
@@ -2199,26 +1358,27 @@ int iw_handler_get_spy(struct net_device * dev,
2199 int i; 1358 int i;
2200 1359
2201 /* Make sure driver is not buggy or using the old API */ 1360 /* Make sure driver is not buggy or using the old API */
2202 if(!spydata) 1361 if (!spydata)
2203 return -EOPNOTSUPP; 1362 return -EOPNOTSUPP;
2204 1363
2205 wrqu->data.length = spydata->spy_number; 1364 wrqu->data.length = spydata->spy_number;
2206 1365
2207 /* Copy addresses. */ 1366 /* Copy addresses. */
2208 for(i = 0; i < spydata->spy_number; i++) { 1367 for (i = 0; i < spydata->spy_number; i++) {
2209 memcpy(address[i].sa_data, spydata->spy_address[i], ETH_ALEN); 1368 memcpy(address[i].sa_data, spydata->spy_address[i], ETH_ALEN);
2210 address[i].sa_family = AF_UNIX; 1369 address[i].sa_family = AF_UNIX;
2211 } 1370 }
2212 /* Copy stats to the user buffer (just after). */ 1371 /* Copy stats to the user buffer (just after). */
2213 if(spydata->spy_number > 0) 1372 if (spydata->spy_number > 0)
2214 memcpy(extra + (sizeof(struct sockaddr) *spydata->spy_number), 1373 memcpy(extra + (sizeof(struct sockaddr) *spydata->spy_number),
2215 spydata->spy_stat, 1374 spydata->spy_stat,
2216 sizeof(struct iw_quality) * spydata->spy_number); 1375 sizeof(struct iw_quality) * spydata->spy_number);
2217 /* Reset updated flags. */ 1376 /* Reset updated flags. */
2218 for(i = 0; i < spydata->spy_number; i++) 1377 for (i = 0; i < spydata->spy_number; i++)
2219 spydata->spy_stat[i].updated &= ~IW_QUAL_ALL_UPDATED; 1378 spydata->spy_stat[i].updated &= ~IW_QUAL_ALL_UPDATED;
2220 return 0; 1379 return 0;
2221} 1380}
1381EXPORT_SYMBOL(iw_handler_get_spy);
2222 1382
2223/*------------------------------------------------------------------*/ 1383/*------------------------------------------------------------------*/
2224/* 1384/*
@@ -2233,7 +1393,7 @@ int iw_handler_set_thrspy(struct net_device * dev,
2233 struct iw_thrspy * threshold = (struct iw_thrspy *) extra; 1393 struct iw_thrspy * threshold = (struct iw_thrspy *) extra;
2234 1394
2235 /* Make sure driver is not buggy or using the old API */ 1395 /* Make sure driver is not buggy or using the old API */
2236 if(!spydata) 1396 if (!spydata)
2237 return -EOPNOTSUPP; 1397 return -EOPNOTSUPP;
2238 1398
2239 /* Just do it */ 1399 /* Just do it */
@@ -2243,12 +1403,9 @@ int iw_handler_set_thrspy(struct net_device * dev,
2243 /* Clear flag */ 1403 /* Clear flag */
2244 memset(spydata->spy_thr_under, '\0', sizeof(spydata->spy_thr_under)); 1404 memset(spydata->spy_thr_under, '\0', sizeof(spydata->spy_thr_under));
2245 1405
2246#ifdef WE_SPY_DEBUG
2247 printk(KERN_DEBUG "iw_handler_set_thrspy() : low %d ; high %d\n", spydata->spy_thr_low.level, spydata->spy_thr_high.level);
2248#endif /* WE_SPY_DEBUG */
2249
2250 return 0; 1406 return 0;
2251} 1407}
1408EXPORT_SYMBOL(iw_handler_set_thrspy);
2252 1409
2253/*------------------------------------------------------------------*/ 1410/*------------------------------------------------------------------*/
2254/* 1411/*
@@ -2263,7 +1420,7 @@ int iw_handler_get_thrspy(struct net_device * dev,
2263 struct iw_thrspy * threshold = (struct iw_thrspy *) extra; 1420 struct iw_thrspy * threshold = (struct iw_thrspy *) extra;
2264 1421
2265 /* Make sure driver is not buggy or using the old API */ 1422 /* Make sure driver is not buggy or using the old API */
2266 if(!spydata) 1423 if (!spydata)
2267 return -EOPNOTSUPP; 1424 return -EOPNOTSUPP;
2268 1425
2269 /* Just do it */ 1426 /* Just do it */
@@ -2272,6 +1429,7 @@ int iw_handler_get_thrspy(struct net_device * dev,
2272 1429
2273 return 0; 1430 return 0;
2274} 1431}
1432EXPORT_SYMBOL(iw_handler_get_thrspy);
2275 1433
2276/*------------------------------------------------------------------*/ 1434/*------------------------------------------------------------------*/
2277/* 1435/*
@@ -2297,16 +1455,6 @@ static void iw_send_thrspy_event(struct net_device * dev,
2297 memcpy(&(threshold.low), &(spydata->spy_thr_low), 1455 memcpy(&(threshold.low), &(spydata->spy_thr_low),
2298 2 * sizeof(struct iw_quality)); 1456 2 * sizeof(struct iw_quality));
2299 1457
2300#ifdef WE_SPY_DEBUG
2301 printk(KERN_DEBUG "iw_send_thrspy_event() : address %02X:%02X:%02X:%02X:%02X:%02X, level %d, up = %d\n",
2302 threshold.addr.sa_data[0],
2303 threshold.addr.sa_data[1],
2304 threshold.addr.sa_data[2],
2305 threshold.addr.sa_data[3],
2306 threshold.addr.sa_data[4],
2307 threshold.addr.sa_data[5], threshold.qual.level);
2308#endif /* WE_SPY_DEBUG */
2309
2310 /* Send event to user space */ 1458 /* Send event to user space */
2311 wireless_send_event(dev, SIOCGIWTHRSPY, &wrqu, (char *) &threshold); 1459 wireless_send_event(dev, SIOCGIWTHRSPY, &wrqu, (char *) &threshold);
2312} 1460}
@@ -2327,16 +1475,12 @@ void wireless_spy_update(struct net_device * dev,
2327 int match = -1; 1475 int match = -1;
2328 1476
2329 /* Make sure driver is not buggy or using the old API */ 1477 /* Make sure driver is not buggy or using the old API */
2330 if(!spydata) 1478 if (!spydata)
2331 return; 1479 return;
2332 1480
2333#ifdef WE_SPY_DEBUG
2334 printk(KERN_DEBUG "wireless_spy_update() : wireless_data %p, spydata %p, address %02X:%02X:%02X:%02X:%02X:%02X\n", dev->wireless_data, spydata, address[0], address[1], address[2], address[3], address[4], address[5]);
2335#endif /* WE_SPY_DEBUG */
2336
2337 /* Update all records that match */ 1481 /* Update all records that match */
2338 for(i = 0; i < spydata->spy_number; i++) 1482 for (i = 0; i < spydata->spy_number; i++)
2339 if(!compare_ether_addr(address, spydata->spy_address[i])) { 1483 if (!compare_ether_addr(address, spydata->spy_address[i])) {
2340 memcpy(&(spydata->spy_stat[i]), wstats, 1484 memcpy(&(spydata->spy_stat[i]), wstats,
2341 sizeof(struct iw_quality)); 1485 sizeof(struct iw_quality));
2342 match = i; 1486 match = i;
@@ -2346,15 +1490,15 @@ void wireless_spy_update(struct net_device * dev,
2346 * To avoid event storms, we have a simple hysteresis : we generate 1490 * To avoid event storms, we have a simple hysteresis : we generate
2347 * event only when we go under the low threshold or above the 1491 * event only when we go under the low threshold or above the
2348 * high threshold. */ 1492 * high threshold. */
2349 if(match >= 0) { 1493 if (match >= 0) {
2350 if(spydata->spy_thr_under[match]) { 1494 if (spydata->spy_thr_under[match]) {
2351 if(wstats->level > spydata->spy_thr_high.level) { 1495 if (wstats->level > spydata->spy_thr_high.level) {
2352 spydata->spy_thr_under[match] = 0; 1496 spydata->spy_thr_under[match] = 0;
2353 iw_send_thrspy_event(dev, spydata, 1497 iw_send_thrspy_event(dev, spydata,
2354 address, wstats); 1498 address, wstats);
2355 } 1499 }
2356 } else { 1500 } else {
2357 if(wstats->level < spydata->spy_thr_low.level) { 1501 if (wstats->level < spydata->spy_thr_low.level) {
2358 spydata->spy_thr_under[match] = 1; 1502 spydata->spy_thr_under[match] = 1;
2359 iw_send_thrspy_event(dev, spydata, 1503 iw_send_thrspy_event(dev, spydata,
2360 address, wstats); 1504 address, wstats);
@@ -2362,10 +1506,4 @@ void wireless_spy_update(struct net_device * dev,
2362 } 1506 }
2363 } 1507 }
2364} 1508}
2365
2366EXPORT_SYMBOL(iw_handler_get_spy);
2367EXPORT_SYMBOL(iw_handler_get_thrspy);
2368EXPORT_SYMBOL(iw_handler_set_spy);
2369EXPORT_SYMBOL(iw_handler_set_thrspy);
2370EXPORT_SYMBOL(wireless_send_event);
2371EXPORT_SYMBOL(wireless_spy_update); 1509EXPORT_SYMBOL(wireless_spy_update);
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index e62ba41b05c5..0d6002fc77b2 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -951,7 +951,7 @@ int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *nb,
951 * Incoming Call User Data. 951 * Incoming Call User Data.
952 */ 952 */
953 if (skb->len >= 0) { 953 if (skb->len >= 0) {
954 memcpy(makex25->calluserdata.cuddata, skb->data, skb->len); 954 skb_copy_from_linear_data(skb, makex25->calluserdata.cuddata, skb->len);
955 makex25->calluserdata.cudlength = skb->len; 955 makex25->calluserdata.cudlength = skb->len;
956 } 956 }
957 957
@@ -1058,9 +1058,10 @@ static int x25_sendmsg(struct kiocb *iocb, struct socket *sock,
1058 */ 1058 */
1059 SOCK_DEBUG(sk, "x25_sendmsg: Copying user data\n"); 1059 SOCK_DEBUG(sk, "x25_sendmsg: Copying user data\n");
1060 1060
1061 asmptr = skb->h.raw = skb_put(skb, len); 1061 skb_reset_transport_header(skb);
1062 skb_put(skb, len);
1062 1063
1063 rc = memcpy_fromiovec(asmptr, msg->msg_iov, len); 1064 rc = memcpy_fromiovec(skb_transport_header(skb), msg->msg_iov, len);
1064 if (rc) 1065 if (rc)
1065 goto out_kfree_skb; 1066 goto out_kfree_skb;
1066 1067
@@ -1210,8 +1211,7 @@ static int x25_recvmsg(struct kiocb *iocb, struct socket *sock,
1210 } 1211 }
1211 } 1212 }
1212 1213
1213 skb->h.raw = skb->data; 1214 skb_reset_transport_header(skb);
1214
1215 copied = skb->len; 1215 copied = skb->len;
1216 1216
1217 if (copied > size) { 1217 if (copied > size) {
@@ -1280,6 +1280,12 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1280 rc = sock_get_timestamp(sk, 1280 rc = sock_get_timestamp(sk,
1281 (struct timeval __user *)argp); 1281 (struct timeval __user *)argp);
1282 break; 1282 break;
1283 case SIOCGSTAMPNS:
1284 rc = -EINVAL;
1285 if (sk)
1286 rc = sock_get_timestampns(sk,
1287 (struct timespec __user *)argp);
1288 break;
1283 case SIOCGIFADDR: 1289 case SIOCGIFADDR:
1284 case SIOCSIFADDR: 1290 case SIOCSIFADDR:
1285 case SIOCGIFDSTADDR: 1291 case SIOCGIFDSTADDR:
@@ -1521,6 +1527,12 @@ static int compat_x25_ioctl(struct socket *sock, unsigned int cmd,
1521 rc = compat_sock_get_timestamp(sk, 1527 rc = compat_sock_get_timestamp(sk,
1522 (struct timeval __user*)argp); 1528 (struct timeval __user*)argp);
1523 break; 1529 break;
1530 case SIOCGSTAMPNS:
1531 rc = -EINVAL;
1532 if (sk)
1533 rc = compat_sock_get_timestampns(sk,
1534 (struct timespec __user*)argp);
1535 break;
1524 case SIOCGIFADDR: 1536 case SIOCGIFADDR:
1525 case SIOCSIFADDR: 1537 case SIOCSIFADDR:
1526 case SIOCGIFDSTADDR: 1538 case SIOCGIFDSTADDR:
diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c
index c7221de98a95..848a6b6f90a6 100644
--- a/net/x25/x25_dev.c
+++ b/net/x25/x25_dev.c
@@ -48,7 +48,7 @@ static int x25_receive_data(struct sk_buff *skb, struct x25_neigh *nb)
48 if ((sk = x25_find_socket(lci, nb)) != NULL) { 48 if ((sk = x25_find_socket(lci, nb)) != NULL) {
49 int queued = 1; 49 int queued = 1;
50 50
51 skb->h.raw = skb->data; 51 skb_reset_transport_header(skb);
52 bh_lock_sock(sk); 52 bh_lock_sock(sk);
53 if (!sock_owned_by_user(sk)) { 53 if (!sock_owned_by_user(sk)) {
54 queued = x25_process_rx_frame(sk, skb); 54 queued = x25_process_rx_frame(sk, skb);
@@ -191,7 +191,7 @@ void x25_send_frame(struct sk_buff *skb, struct x25_neigh *nb)
191{ 191{
192 unsigned char *dptr; 192 unsigned char *dptr;
193 193
194 skb->nh.raw = skb->data; 194 skb_reset_network_header(skb);
195 195
196 switch (nb->dev->type) { 196 switch (nb->dev->type) {
197 case ARPHRD_X25: 197 case ARPHRD_X25:
diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c
index c5239fcdefa0..1c88762c2794 100644
--- a/net/x25/x25_in.c
+++ b/net/x25/x25_in.c
@@ -53,17 +53,20 @@ static int x25_queue_rx_frame(struct sock *sk, struct sk_buff *skb, int more)
53 53
54 skb_queue_tail(&x25->fragment_queue, skb); 54 skb_queue_tail(&x25->fragment_queue, skb);
55 55
56 skbn->h.raw = skbn->data; 56 skb_reset_transport_header(skbn);
57 57
58 skbo = skb_dequeue(&x25->fragment_queue); 58 skbo = skb_dequeue(&x25->fragment_queue);
59 memcpy(skb_put(skbn, skbo->len), skbo->data, skbo->len); 59 skb_copy_from_linear_data(skbo, skb_put(skbn, skbo->len),
60 skbo->len);
60 kfree_skb(skbo); 61 kfree_skb(skbo);
61 62
62 while ((skbo = 63 while ((skbo =
63 skb_dequeue(&x25->fragment_queue)) != NULL) { 64 skb_dequeue(&x25->fragment_queue)) != NULL) {
64 skb_pull(skbo, (x25->neighbour->extended) ? 65 skb_pull(skbo, (x25->neighbour->extended) ?
65 X25_EXT_MIN_LEN : X25_STD_MIN_LEN); 66 X25_EXT_MIN_LEN : X25_STD_MIN_LEN);
66 memcpy(skb_put(skbn, skbo->len), skbo->data, skbo->len); 67 skb_copy_from_linear_data(skbo,
68 skb_put(skbn, skbo->len),
69 skbo->len);
67 kfree_skb(skbo); 70 kfree_skb(skbo);
68 } 71 }
69 72
@@ -112,8 +115,9 @@ static int x25_state1_machine(struct sock *sk, struct sk_buff *skb, int frametyp
112 * Copy any Call User Data. 115 * Copy any Call User Data.
113 */ 116 */
114 if (skb->len >= 0) { 117 if (skb->len >= 0) {
115 memcpy(x25->calluserdata.cuddata, skb->data, 118 skb_copy_from_linear_data(skb,
116 skb->len); 119 x25->calluserdata.cuddata,
120 skb->len);
117 x25->calluserdata.cudlength = skb->len; 121 x25->calluserdata.cudlength = skb->len;
118 } 122 }
119 if (!sock_flag(sk, SOCK_DEAD)) 123 if (!sock_flag(sk, SOCK_DEAD))
diff --git a/net/x25/x25_out.c b/net/x25/x25_out.c
index 6f5737853912..2b96b52114d6 100644
--- a/net/x25/x25_out.c
+++ b/net/x25/x25_out.c
@@ -61,7 +61,7 @@ int x25_output(struct sock *sk, struct sk_buff *skb)
61 61
62 if (skb->len - header_len > max_len) { 62 if (skb->len - header_len > max_len) {
63 /* Save a copy of the Header */ 63 /* Save a copy of the Header */
64 memcpy(header, skb->data, header_len); 64 skb_copy_from_linear_data(skb, header, header_len);
65 skb_pull(skb, header_len); 65 skb_pull(skb, header_len);
66 66
67 frontlen = skb_headroom(skb); 67 frontlen = skb_headroom(skb);
@@ -84,12 +84,12 @@ int x25_output(struct sock *sk, struct sk_buff *skb)
84 len = max_len > skb->len ? skb->len : max_len; 84 len = max_len > skb->len ? skb->len : max_len;
85 85
86 /* Copy the user data */ 86 /* Copy the user data */
87 memcpy(skb_put(skbn, len), skb->data, len); 87 skb_copy_from_linear_data(skb, skb_put(skbn, len), len);
88 skb_pull(skb, len); 88 skb_pull(skb, len);
89 89
90 /* Duplicate the Header */ 90 /* Duplicate the Header */
91 skb_push(skbn, header_len); 91 skb_push(skbn, header_len);
92 memcpy(skbn->data, header, header_len); 92 skb_copy_to_linear_data(skbn, header, header_len);
93 93
94 if (skb->len > 0) { 94 if (skb->len > 0) {
95 if (x25->neighbour->extended) 95 if (x25->neighbour->extended)
diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
index f373a8a7d9c8..be529c4241a6 100644
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c
@@ -532,8 +532,8 @@ EXPORT_SYMBOL_GPL(xfrm_count_enc_supported);
532int skb_icv_walk(const struct sk_buff *skb, struct hash_desc *desc, 532int skb_icv_walk(const struct sk_buff *skb, struct hash_desc *desc,
533 int offset, int len, icv_update_fn_t icv_update) 533 int offset, int len, icv_update_fn_t icv_update)
534{ 534{
535 int start = skb_headlen(skb); 535 int end = skb_headlen(skb);
536 int i, copy = start - offset; 536 int i, copy = end - offset;
537 int err; 537 int err;
538 struct scatterlist sg; 538 struct scatterlist sg;
539 539
@@ -556,11 +556,9 @@ int skb_icv_walk(const struct sk_buff *skb, struct hash_desc *desc,
556 } 556 }
557 557
558 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 558 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
559 int end; 559 BUG_TRAP(len >= 0);
560 560
561 BUG_TRAP(start <= offset + len); 561 end = offset + skb_shinfo(skb)->frags[i].size;
562
563 end = start + skb_shinfo(skb)->frags[i].size;
564 if ((copy = end - offset) > 0) { 562 if ((copy = end - offset) > 0) {
565 skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 563 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
566 564
@@ -568,7 +566,7 @@ int skb_icv_walk(const struct sk_buff *skb, struct hash_desc *desc,
568 copy = len; 566 copy = len;
569 567
570 sg.page = frag->page; 568 sg.page = frag->page;
571 sg.offset = frag->page_offset + offset-start; 569 sg.offset = frag->page_offset;
572 sg.length = copy; 570 sg.length = copy;
573 571
574 err = icv_update(desc, &sg, copy); 572 err = icv_update(desc, &sg, copy);
@@ -579,22 +577,19 @@ int skb_icv_walk(const struct sk_buff *skb, struct hash_desc *desc,
579 return 0; 577 return 0;
580 offset += copy; 578 offset += copy;
581 } 579 }
582 start = end;
583 } 580 }
584 581
585 if (skb_shinfo(skb)->frag_list) { 582 if (skb_shinfo(skb)->frag_list) {
586 struct sk_buff *list = skb_shinfo(skb)->frag_list; 583 struct sk_buff *list = skb_shinfo(skb)->frag_list;
587 584
588 for (; list; list = list->next) { 585 for (; list; list = list->next) {
589 int end; 586 BUG_TRAP(len >= 0);
590
591 BUG_TRAP(start <= offset + len);
592 587
593 end = start + list->len; 588 end = offset + list->len;
594 if ((copy = end - offset) > 0) { 589 if ((copy = end - offset) > 0) {
595 if (copy > len) 590 if (copy > len)
596 copy = len; 591 copy = len;
597 err = skb_icv_walk(list, desc, offset-start, 592 err = skb_icv_walk(list, desc, 0,
598 copy, icv_update); 593 copy, icv_update);
599 if (unlikely(err)) 594 if (unlikely(err))
600 return err; 595 return err;
@@ -602,7 +597,6 @@ int skb_icv_walk(const struct sk_buff *skb, struct hash_desc *desc,
602 return 0; 597 return 0;
603 offset += copy; 598 offset += copy;
604 } 599 }
605 start = end;
606 } 600 }
607 } 601 }
608 BUG_ON(len); 602 BUG_ON(len);
@@ -612,175 +606,6 @@ EXPORT_SYMBOL_GPL(skb_icv_walk);
612 606
613#if defined(CONFIG_INET_ESP) || defined(CONFIG_INET_ESP_MODULE) || defined(CONFIG_INET6_ESP) || defined(CONFIG_INET6_ESP_MODULE) 607#if defined(CONFIG_INET_ESP) || defined(CONFIG_INET_ESP_MODULE) || defined(CONFIG_INET6_ESP) || defined(CONFIG_INET6_ESP_MODULE)
614 608
615/* Looking generic it is not used in another places. */
616
617int
618skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
619{
620 int start = skb_headlen(skb);
621 int i, copy = start - offset;
622 int elt = 0;
623
624 if (copy > 0) {
625 if (copy > len)
626 copy = len;
627 sg[elt].page = virt_to_page(skb->data + offset);
628 sg[elt].offset = (unsigned long)(skb->data + offset) % PAGE_SIZE;
629 sg[elt].length = copy;
630 elt++;
631 if ((len -= copy) == 0)
632 return elt;
633 offset += copy;
634 }
635
636 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
637 int end;
638
639 BUG_TRAP(start <= offset + len);
640
641 end = start + skb_shinfo(skb)->frags[i].size;
642 if ((copy = end - offset) > 0) {
643 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
644
645 if (copy > len)
646 copy = len;
647 sg[elt].page = frag->page;
648 sg[elt].offset = frag->page_offset+offset-start;
649 sg[elt].length = copy;
650 elt++;
651 if (!(len -= copy))
652 return elt;
653 offset += copy;
654 }
655 start = end;
656 }
657
658 if (skb_shinfo(skb)->frag_list) {
659 struct sk_buff *list = skb_shinfo(skb)->frag_list;
660
661 for (; list; list = list->next) {
662 int end;
663
664 BUG_TRAP(start <= offset + len);
665
666 end = start + list->len;
667 if ((copy = end - offset) > 0) {
668 if (copy > len)
669 copy = len;
670 elt += skb_to_sgvec(list, sg+elt, offset - start, copy);
671 if ((len -= copy) == 0)
672 return elt;
673 offset += copy;
674 }
675 start = end;
676 }
677 }
678 BUG_ON(len);
679 return elt;
680}
681EXPORT_SYMBOL_GPL(skb_to_sgvec);
682
683/* Check that skb data bits are writable. If they are not, copy data
684 * to newly created private area. If "tailbits" is given, make sure that
685 * tailbits bytes beyond current end of skb are writable.
686 *
687 * Returns amount of elements of scatterlist to load for subsequent
688 * transformations and pointer to writable trailer skb.
689 */
690
691int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
692{
693 int copyflag;
694 int elt;
695 struct sk_buff *skb1, **skb_p;
696
697 /* If skb is cloned or its head is paged, reallocate
698 * head pulling out all the pages (pages are considered not writable
699 * at the moment even if they are anonymous).
700 */
701 if ((skb_cloned(skb) || skb_shinfo(skb)->nr_frags) &&
702 __pskb_pull_tail(skb, skb_pagelen(skb)-skb_headlen(skb)) == NULL)
703 return -ENOMEM;
704
705 /* Easy case. Most of packets will go this way. */
706 if (!skb_shinfo(skb)->frag_list) {
707 /* A little of trouble, not enough of space for trailer.
708 * This should not happen, when stack is tuned to generate
709 * good frames. OK, on miss we reallocate and reserve even more
710 * space, 128 bytes is fair. */
711
712 if (skb_tailroom(skb) < tailbits &&
713 pskb_expand_head(skb, 0, tailbits-skb_tailroom(skb)+128, GFP_ATOMIC))
714 return -ENOMEM;
715
716 /* Voila! */
717 *trailer = skb;
718 return 1;
719 }
720
721 /* Misery. We are in troubles, going to mincer fragments... */
722
723 elt = 1;
724 skb_p = &skb_shinfo(skb)->frag_list;
725 copyflag = 0;
726
727 while ((skb1 = *skb_p) != NULL) {
728 int ntail = 0;
729
730 /* The fragment is partially pulled by someone,
731 * this can happen on input. Copy it and everything
732 * after it. */
733
734 if (skb_shared(skb1))
735 copyflag = 1;
736
737 /* If the skb is the last, worry about trailer. */
738
739 if (skb1->next == NULL && tailbits) {
740 if (skb_shinfo(skb1)->nr_frags ||
741 skb_shinfo(skb1)->frag_list ||
742 skb_tailroom(skb1) < tailbits)
743 ntail = tailbits + 128;
744 }
745
746 if (copyflag ||
747 skb_cloned(skb1) ||
748 ntail ||
749 skb_shinfo(skb1)->nr_frags ||
750 skb_shinfo(skb1)->frag_list) {
751 struct sk_buff *skb2;
752
753 /* Fuck, we are miserable poor guys... */
754 if (ntail == 0)
755 skb2 = skb_copy(skb1, GFP_ATOMIC);
756 else
757 skb2 = skb_copy_expand(skb1,
758 skb_headroom(skb1),
759 ntail,
760 GFP_ATOMIC);
761 if (unlikely(skb2 == NULL))
762 return -ENOMEM;
763
764 if (skb1->sk)
765 skb_set_owner_w(skb2, skb1->sk);
766
767 /* Looking around. Are we still alive?
768 * OK, link new skb, drop old one */
769
770 skb2->next = skb1->next;
771 *skb_p = skb2;
772 kfree_skb(skb1);
773 skb1 = skb2;
774 }
775 elt++;
776 *trailer = skb1;
777 skb_p = &skb1->next;
778 }
779
780 return elt;
781}
782EXPORT_SYMBOL_GPL(skb_cow_data);
783
784void *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len) 609void *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len)
785{ 610{
786 if (tail != skb) { 611 if (tail != skb) {
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index ee15bdae1419..5c4695840c58 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -62,7 +62,7 @@ int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq)
62 case IPPROTO_COMP: 62 case IPPROTO_COMP:
63 if (!pskb_may_pull(skb, sizeof(struct ip_comp_hdr))) 63 if (!pskb_may_pull(skb, sizeof(struct ip_comp_hdr)))
64 return -EINVAL; 64 return -EINVAL;
65 *spi = htonl(ntohs(*(__be16*)(skb->h.raw + 2))); 65 *spi = htonl(ntohs(*(__be16*)(skb_transport_header(skb) + 2)));
66 *seq = 0; 66 *seq = 0;
67 return 0; 67 return 0;
68 default: 68 default:
@@ -72,8 +72,8 @@ int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq)
72 if (!pskb_may_pull(skb, 16)) 72 if (!pskb_may_pull(skb, 16))
73 return -EINVAL; 73 return -EINVAL;
74 74
75 *spi = *(__be32*)(skb->h.raw + offset); 75 *spi = *(__be32*)(skb_transport_header(skb) + offset);
76 *seq = *(__be32*)(skb->h.raw + offset_seq); 76 *seq = *(__be32*)(skb_transport_header(skb) + offset_seq);
77 return 0; 77 return 0;
78} 78}
79EXPORT_SYMBOL(xfrm_parse_spi); 79EXPORT_SYMBOL(xfrm_parse_spi);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 785c3e39f062..762926009c04 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -268,7 +268,7 @@ static inline unsigned long make_jiffies(long secs)
268static void xfrm_policy_timer(unsigned long data) 268static void xfrm_policy_timer(unsigned long data)
269{ 269{
270 struct xfrm_policy *xp = (struct xfrm_policy*)data; 270 struct xfrm_policy *xp = (struct xfrm_policy*)data;
271 unsigned long now = (unsigned long)xtime.tv_sec; 271 unsigned long now = get_seconds();
272 long next = LONG_MAX; 272 long next = LONG_MAX;
273 int warn = 0; 273 int warn = 0;
274 int dir; 274 int dir;
@@ -690,7 +690,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
690 } 690 }
691 policy->index = delpol ? delpol->index : xfrm_gen_index(policy->type, dir); 691 policy->index = delpol ? delpol->index : xfrm_gen_index(policy->type, dir);
692 hlist_add_head(&policy->byidx, xfrm_policy_byidx+idx_hash(policy->index)); 692 hlist_add_head(&policy->byidx, xfrm_policy_byidx+idx_hash(policy->index));
693 policy->curlft.add_time = (unsigned long)xtime.tv_sec; 693 policy->curlft.add_time = get_seconds();
694 policy->curlft.use_time = 0; 694 policy->curlft.use_time = 0;
695 if (!mod_timer(&policy->timer, jiffies + HZ)) 695 if (!mod_timer(&policy->timer, jiffies + HZ))
696 xfrm_pol_hold(policy); 696 xfrm_pol_hold(policy);
@@ -1049,7 +1049,7 @@ static inline int policy_to_flow_dir(int dir)
1049 return FLOW_DIR_OUT; 1049 return FLOW_DIR_OUT;
1050 case XFRM_POLICY_FWD: 1050 case XFRM_POLICY_FWD:
1051 return FLOW_DIR_FWD; 1051 return FLOW_DIR_FWD;
1052 }; 1052 }
1053} 1053}
1054 1054
1055static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl) 1055static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl)
@@ -1133,7 +1133,7 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
1133 old_pol = sk->sk_policy[dir]; 1133 old_pol = sk->sk_policy[dir];
1134 sk->sk_policy[dir] = pol; 1134 sk->sk_policy[dir] = pol;
1135 if (pol) { 1135 if (pol) {
1136 pol->curlft.add_time = (unsigned long)xtime.tv_sec; 1136 pol->curlft.add_time = get_seconds();
1137 pol->index = xfrm_gen_index(pol->type, XFRM_POLICY_MAX+dir); 1137 pol->index = xfrm_gen_index(pol->type, XFRM_POLICY_MAX+dir);
1138 __xfrm_policy_link(pol, XFRM_POLICY_MAX+dir); 1138 __xfrm_policy_link(pol, XFRM_POLICY_MAX+dir);
1139 } 1139 }
@@ -1386,7 +1386,7 @@ restart:
1386 return 0; 1386 return 0;
1387 1387
1388 family = dst_orig->ops->family; 1388 family = dst_orig->ops->family;
1389 policy->curlft.use_time = (unsigned long)xtime.tv_sec; 1389 policy->curlft.use_time = get_seconds();
1390 pols[0] = policy; 1390 pols[0] = policy;
1391 npols ++; 1391 npols ++;
1392 xfrm_nr += pols[0]->xfrm_nr; 1392 xfrm_nr += pols[0]->xfrm_nr;
@@ -1682,7 +1682,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
1682 return 1; 1682 return 1;
1683 } 1683 }
1684 1684
1685 pol->curlft.use_time = (unsigned long)xtime.tv_sec; 1685 pol->curlft.use_time = get_seconds();
1686 1686
1687 pols[0] = pol; 1687 pols[0] = pol;
1688 npols ++; 1688 npols ++;
@@ -1694,7 +1694,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
1694 if (pols[1]) { 1694 if (pols[1]) {
1695 if (IS_ERR(pols[1])) 1695 if (IS_ERR(pols[1]))
1696 return 0; 1696 return 0;
1697 pols[1]->curlft.use_time = (unsigned long)xtime.tv_sec; 1697 pols[1]->curlft.use_time = get_seconds();
1698 npols ++; 1698 npols ++;
1699 } 1699 }
1700 } 1700 }
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index e3a0bcfa5df1..f3a61ebd8d65 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -233,7 +233,7 @@ static inline unsigned long make_jiffies(long secs)
233static void xfrm_timer_handler(unsigned long data) 233static void xfrm_timer_handler(unsigned long data)
234{ 234{
235 struct xfrm_state *x = (struct xfrm_state*)data; 235 struct xfrm_state *x = (struct xfrm_state*)data;
236 unsigned long now = (unsigned long)xtime.tv_sec; 236 unsigned long now = get_seconds();
237 long next = LONG_MAX; 237 long next = LONG_MAX;
238 int warn = 0; 238 int warn = 0;
239 int err = 0; 239 int err = 0;
@@ -326,7 +326,7 @@ struct xfrm_state *xfrm_state_alloc(void)
326 init_timer(&x->rtimer); 326 init_timer(&x->rtimer);
327 x->rtimer.function = xfrm_replay_timer_handler; 327 x->rtimer.function = xfrm_replay_timer_handler;
328 x->rtimer.data = (unsigned long)x; 328 x->rtimer.data = (unsigned long)x;
329 x->curlft.add_time = (unsigned long)xtime.tv_sec; 329 x->curlft.add_time = get_seconds();
330 x->lft.soft_byte_limit = XFRM_INF; 330 x->lft.soft_byte_limit = XFRM_INF;
331 x->lft.soft_packet_limit = XFRM_INF; 331 x->lft.soft_packet_limit = XFRM_INF;
332 x->lft.hard_byte_limit = XFRM_INF; 332 x->lft.hard_byte_limit = XFRM_INF;
@@ -421,6 +421,16 @@ restart:
421} 421}
422EXPORT_SYMBOL(xfrm_state_flush); 422EXPORT_SYMBOL(xfrm_state_flush);
423 423
424void xfrm_sad_getinfo(struct xfrm_sadinfo *si)
425{
426 spin_lock_bh(&xfrm_state_lock);
427 si->sadcnt = xfrm_state_num;
428 si->sadhcnt = xfrm_state_hmask;
429 si->sadhmcnt = xfrm_state_hashmax;
430 spin_unlock_bh(&xfrm_state_lock);
431}
432EXPORT_SYMBOL(xfrm_sad_getinfo);
433
424static int 434static int
425xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl, 435xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
426 struct xfrm_tmpl *tmpl, 436 struct xfrm_tmpl *tmpl,
@@ -458,7 +468,7 @@ static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi,
458 x->id.daddr.a6)) 468 x->id.daddr.a6))
459 continue; 469 continue;
460 break; 470 break;
461 }; 471 }
462 472
463 xfrm_state_hold(x); 473 xfrm_state_hold(x);
464 return x; 474 return x;
@@ -493,7 +503,7 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm
493 x->props.saddr.a6)) 503 x->props.saddr.a6))
494 continue; 504 continue;
495 break; 505 break;
496 }; 506 }
497 507
498 xfrm_state_hold(x); 508 xfrm_state_hold(x);
499 return x; 509 return x;
@@ -722,7 +732,7 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re
722 (struct in6_addr *)saddr)) 732 (struct in6_addr *)saddr))
723 continue; 733 continue;
724 break; 734 break;
725 }; 735 }
726 736
727 xfrm_state_hold(x); 737 xfrm_state_hold(x);
728 return x; 738 return x;
@@ -755,7 +765,7 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re
755 ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6, 765 ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6,
756 (struct in6_addr *)daddr); 766 (struct in6_addr *)daddr);
757 break; 767 break;
758 }; 768 }
759 769
760 x->km.state = XFRM_STATE_ACQ; 770 x->km.state = XFRM_STATE_ACQ;
761 x->id.proto = proto; 771 x->id.proto = proto;
@@ -1051,7 +1061,7 @@ EXPORT_SYMBOL(xfrm_state_update);
1051int xfrm_state_check_expire(struct xfrm_state *x) 1061int xfrm_state_check_expire(struct xfrm_state *x)
1052{ 1062{
1053 if (!x->curlft.use_time) 1063 if (!x->curlft.use_time)
1054 x->curlft.use_time = (unsigned long)xtime.tv_sec; 1064 x->curlft.use_time = get_seconds();
1055 1065
1056 if (x->km.state != XFRM_STATE_VALID) 1066 if (x->km.state != XFRM_STATE_VALID)
1057 return -EINVAL; 1067 return -EINVAL;
@@ -1667,37 +1677,17 @@ void xfrm_state_delete_tunnel(struct xfrm_state *x)
1667} 1677}
1668EXPORT_SYMBOL(xfrm_state_delete_tunnel); 1678EXPORT_SYMBOL(xfrm_state_delete_tunnel);
1669 1679
1670/*
1671 * This function is NOT optimal. For example, with ESP it will give an
1672 * MTU that's usually two bytes short of being optimal. However, it will
1673 * usually give an answer that's a multiple of 4 provided the input is
1674 * also a multiple of 4.
1675 */
1676int xfrm_state_mtu(struct xfrm_state *x, int mtu) 1680int xfrm_state_mtu(struct xfrm_state *x, int mtu)
1677{ 1681{
1678 int res = mtu; 1682 int res;
1679
1680 res -= x->props.header_len;
1681
1682 for (;;) {
1683 int m = res;
1684
1685 if (m < 68)
1686 return 68;
1687
1688 spin_lock_bh(&x->lock);
1689 if (x->km.state == XFRM_STATE_VALID &&
1690 x->type && x->type->get_max_size)
1691 m = x->type->get_max_size(x, m);
1692 else
1693 m += x->props.header_len;
1694 spin_unlock_bh(&x->lock);
1695
1696 if (m <= mtu)
1697 break;
1698 res -= (m - mtu);
1699 }
1700 1683
1684 spin_lock_bh(&x->lock);
1685 if (x->km.state == XFRM_STATE_VALID &&
1686 x->type && x->type->get_mtu)
1687 res = x->type->get_mtu(x, mtu);
1688 else
1689 res = mtu;
1690 spin_unlock_bh(&x->lock);
1701 return res; 1691 return res;
1702} 1692}
1703 1693
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 816e3690b60f..69110fed64b6 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -71,7 +71,7 @@ static int verify_one_alg(struct rtattr **xfrma, enum xfrm_attr_type_t type)
71 71
72 default: 72 default:
73 return -EINVAL; 73 return -EINVAL;
74 }; 74 }
75 75
76 algp->alg_name[CRYPTO_MAX_ALG_NAME - 1] = '\0'; 76 algp->alg_name[CRYPTO_MAX_ALG_NAME - 1] = '\0';
77 return 0; 77 return 0;
@@ -152,7 +152,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
152 152
153 default: 153 default:
154 goto out; 154 goto out;
155 }; 155 }
156 156
157 err = -EINVAL; 157 err = -EINVAL;
158 switch (p->id.proto) { 158 switch (p->id.proto) {
@@ -192,7 +192,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
192 192
193 default: 193 default:
194 goto out; 194 goto out;
195 }; 195 }
196 196
197 if ((err = verify_one_alg(xfrma, XFRMA_ALG_AUTH))) 197 if ((err = verify_one_alg(xfrma, XFRMA_ALG_AUTH)))
198 goto out; 198 goto out;
@@ -217,7 +217,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
217 217
218 default: 218 default:
219 goto out; 219 goto out;
220 }; 220 }
221 221
222 err = 0; 222 err = 0;
223 223
@@ -576,7 +576,7 @@ static int dump_one_state(struct xfrm_state *x, int count, void *ptr)
576 struct sk_buff *skb = sp->out_skb; 576 struct sk_buff *skb = sp->out_skb;
577 struct xfrm_usersa_info *p; 577 struct xfrm_usersa_info *p;
578 struct nlmsghdr *nlh; 578 struct nlmsghdr *nlh;
579 unsigned char *b = skb->tail; 579 unsigned char *b = skb_tail_pointer(skb);
580 580
581 if (sp->this_idx < sp->start_idx) 581 if (sp->this_idx < sp->start_idx)
582 goto out; 582 goto out;
@@ -621,14 +621,14 @@ static int dump_one_state(struct xfrm_state *x, int count, void *ptr)
621 if (x->lastused) 621 if (x->lastused)
622 RTA_PUT(skb, XFRMA_LASTUSED, sizeof(x->lastused), &x->lastused); 622 RTA_PUT(skb, XFRMA_LASTUSED, sizeof(x->lastused), &x->lastused);
623 623
624 nlh->nlmsg_len = skb->tail - b; 624 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
625out: 625out:
626 sp->this_idx++; 626 sp->this_idx++;
627 return 0; 627 return 0;
628 628
629nlmsg_failure: 629nlmsg_failure:
630rtattr_failure: 630rtattr_failure:
631 skb_trim(skb, b - skb->data); 631 nlmsg_trim(skb, b);
632 return -1; 632 return -1;
633} 633}
634 634
@@ -672,6 +672,61 @@ static struct sk_buff *xfrm_state_netlink(struct sk_buff *in_skb,
672 return skb; 672 return skb;
673} 673}
674 674
675static int build_sadinfo(struct sk_buff *skb, u32 pid, u32 seq, u32 flags)
676{
677 struct xfrm_sadinfo si;
678 struct nlmsghdr *nlh;
679 u32 *f;
680
681 nlh = nlmsg_put(skb, pid, seq, XFRM_MSG_NEWSADINFO, sizeof(u32), 0);
682 if (nlh == NULL) /* shouldnt really happen ... */
683 return -EMSGSIZE;
684
685 f = nlmsg_data(nlh);
686 *f = flags;
687 xfrm_sad_getinfo(&si);
688
689 if (flags & XFRM_SAD_HMASK)
690 NLA_PUT_U32(skb, XFRMA_SADHMASK, si.sadhcnt);
691 if (flags & XFRM_SAD_HMAX)
692 NLA_PUT_U32(skb, XFRMA_SADHMAX, si.sadhmcnt);
693 if (flags & XFRM_SAD_CNT)
694 NLA_PUT_U32(skb, XFRMA_SADCNT, si.sadcnt);
695
696 return nlmsg_end(skb, nlh);
697
698nla_put_failure:
699 nlmsg_cancel(skb, nlh);
700 return -EMSGSIZE;
701}
702
703static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh,
704 struct rtattr **xfrma)
705{
706 struct sk_buff *r_skb;
707 u32 *flags = NLMSG_DATA(nlh);
708 u32 spid = NETLINK_CB(skb).pid;
709 u32 seq = nlh->nlmsg_seq;
710 int len = NLMSG_LENGTH(sizeof(u32));
711
712 if (*flags & XFRM_SAD_HMASK)
713 len += RTA_SPACE(sizeof(u32));
714 if (*flags & XFRM_SAD_HMAX)
715 len += RTA_SPACE(sizeof(u32));
716 if (*flags & XFRM_SAD_CNT)
717 len += RTA_SPACE(sizeof(u32));
718
719 r_skb = alloc_skb(len, GFP_ATOMIC);
720
721 if (r_skb == NULL)
722 return -ENOMEM;
723
724 if (build_sadinfo(r_skb, spid, seq, *flags) < 0)
725 BUG();
726
727 return nlmsg_unicast(xfrm_nl, r_skb, spid);
728}
729
675static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, 730static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
676 struct rtattr **xfrma) 731 struct rtattr **xfrma)
677{ 732{
@@ -711,7 +766,7 @@ static int verify_userspi_info(struct xfrm_userspi_info *p)
711 766
712 default: 767 default:
713 return -EINVAL; 768 return -EINVAL;
714 }; 769 }
715 770
716 if (p->min > p->max) 771 if (p->min > p->max)
717 return -EINVAL; 772 return -EINVAL;
@@ -789,7 +844,7 @@ static int verify_policy_dir(u8 dir)
789 844
790 default: 845 default:
791 return -EINVAL; 846 return -EINVAL;
792 }; 847 }
793 848
794 return 0; 849 return 0;
795} 850}
@@ -805,7 +860,7 @@ static int verify_policy_type(u8 type)
805 860
806 default: 861 default:
807 return -EINVAL; 862 return -EINVAL;
808 }; 863 }
809 864
810 return 0; 865 return 0;
811} 866}
@@ -821,7 +876,7 @@ static int verify_newpolicy_info(struct xfrm_userpolicy_info *p)
821 876
822 default: 877 default:
823 return -EINVAL; 878 return -EINVAL;
824 }; 879 }
825 880
826 switch (p->action) { 881 switch (p->action) {
827 case XFRM_POLICY_ALLOW: 882 case XFRM_POLICY_ALLOW:
@@ -830,7 +885,7 @@ static int verify_newpolicy_info(struct xfrm_userpolicy_info *p)
830 885
831 default: 886 default:
832 return -EINVAL; 887 return -EINVAL;
833 }; 888 }
834 889
835 switch (p->sel.family) { 890 switch (p->sel.family) {
836 case AF_INET: 891 case AF_INET:
@@ -845,7 +900,7 @@ static int verify_newpolicy_info(struct xfrm_userpolicy_info *p)
845 900
846 default: 901 default:
847 return -EINVAL; 902 return -EINVAL;
848 }; 903 }
849 904
850 return verify_policy_dir(p->dir); 905 return verify_policy_dir(p->dir);
851} 906}
@@ -912,7 +967,7 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family)
912#endif 967#endif
913 default: 968 default:
914 return -EINVAL; 969 return -EINVAL;
915 }; 970 }
916 } 971 }
917 972
918 return 0; 973 return 0;
@@ -1157,7 +1212,7 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr
1157 struct sk_buff *in_skb = sp->in_skb; 1212 struct sk_buff *in_skb = sp->in_skb;
1158 struct sk_buff *skb = sp->out_skb; 1213 struct sk_buff *skb = sp->out_skb;
1159 struct nlmsghdr *nlh; 1214 struct nlmsghdr *nlh;
1160 unsigned char *b = skb->tail; 1215 unsigned char *b = skb_tail_pointer(skb);
1161 1216
1162 if (sp->this_idx < sp->start_idx) 1217 if (sp->this_idx < sp->start_idx)
1163 goto out; 1218 goto out;
@@ -1176,13 +1231,13 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr
1176 if (copy_to_user_policy_type(xp->type, skb) < 0) 1231 if (copy_to_user_policy_type(xp->type, skb) < 0)
1177 goto nlmsg_failure; 1232 goto nlmsg_failure;
1178 1233
1179 nlh->nlmsg_len = skb->tail - b; 1234 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1180out: 1235out:
1181 sp->this_idx++; 1236 sp->this_idx++;
1182 return 0; 1237 return 0;
1183 1238
1184nlmsg_failure: 1239nlmsg_failure:
1185 skb_trim(skb, b - skb->data); 1240 nlmsg_trim(skb, b);
1186 return -1; 1241 return -1;
1187} 1242}
1188 1243
@@ -1330,7 +1385,7 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, struct km_eve
1330 struct xfrm_aevent_id *id; 1385 struct xfrm_aevent_id *id;
1331 struct nlmsghdr *nlh; 1386 struct nlmsghdr *nlh;
1332 struct xfrm_lifetime_cur ltime; 1387 struct xfrm_lifetime_cur ltime;
1333 unsigned char *b = skb->tail; 1388 unsigned char *b = skb_tail_pointer(skb);
1334 1389
1335 nlh = NLMSG_PUT(skb, c->pid, c->seq, XFRM_MSG_NEWAE, sizeof(*id)); 1390 nlh = NLMSG_PUT(skb, c->pid, c->seq, XFRM_MSG_NEWAE, sizeof(*id));
1336 id = NLMSG_DATA(nlh); 1391 id = NLMSG_DATA(nlh);
@@ -1362,12 +1417,12 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, struct km_eve
1362 RTA_PUT(skb,XFRMA_ETIMER_THRESH,sizeof(u32),&etimer); 1417 RTA_PUT(skb,XFRMA_ETIMER_THRESH,sizeof(u32),&etimer);
1363 } 1418 }
1364 1419
1365 nlh->nlmsg_len = skb->tail - b; 1420 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1366 return skb->len; 1421 return skb->len;
1367 1422
1368rtattr_failure: 1423rtattr_failure:
1369nlmsg_failure: 1424nlmsg_failure:
1370 skb_trim(skb, b - skb->data); 1425 nlmsg_trim(skb, b);
1371 return -1; 1426 return -1;
1372} 1427}
1373 1428
@@ -1744,7 +1799,7 @@ static int build_migrate(struct sk_buff *skb, struct xfrm_migrate *m,
1744 struct xfrm_migrate *mp; 1799 struct xfrm_migrate *mp;
1745 struct xfrm_userpolicy_id *pol_id; 1800 struct xfrm_userpolicy_id *pol_id;
1746 struct nlmsghdr *nlh; 1801 struct nlmsghdr *nlh;
1747 unsigned char *b = skb->tail; 1802 unsigned char *b = skb_tail_pointer(skb);
1748 int i; 1803 int i;
1749 1804
1750 nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_MIGRATE, sizeof(*pol_id)); 1805 nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_MIGRATE, sizeof(*pol_id));
@@ -1764,10 +1819,10 @@ static int build_migrate(struct sk_buff *skb, struct xfrm_migrate *m,
1764 goto nlmsg_failure; 1819 goto nlmsg_failure;
1765 } 1820 }
1766 1821
1767 nlh->nlmsg_len = skb->tail - b; 1822 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1768 return skb->len; 1823 return skb->len;
1769nlmsg_failure: 1824nlmsg_failure:
1770 skb_trim(skb, b - skb->data); 1825 nlmsg_trim(skb, b);
1771 return -1; 1826 return -1;
1772} 1827}
1773 1828
@@ -1823,6 +1878,7 @@ static const int xfrm_msg_min[XFRM_NR_MSGTYPES] = {
1823 [XFRM_MSG_GETAE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_aevent_id), 1878 [XFRM_MSG_GETAE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_aevent_id),
1824 [XFRM_MSG_REPORT - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_report), 1879 [XFRM_MSG_REPORT - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_report),
1825 [XFRM_MSG_MIGRATE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_id), 1880 [XFRM_MSG_MIGRATE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_id),
1881 [XFRM_MSG_GETSADINFO - XFRM_MSG_BASE] = NLMSG_LENGTH(sizeof(u32)),
1826}; 1882};
1827 1883
1828#undef XMSGSIZE 1884#undef XMSGSIZE
@@ -1850,55 +1906,39 @@ static struct xfrm_link {
1850 [XFRM_MSG_NEWAE - XFRM_MSG_BASE] = { .doit = xfrm_new_ae }, 1906 [XFRM_MSG_NEWAE - XFRM_MSG_BASE] = { .doit = xfrm_new_ae },
1851 [XFRM_MSG_GETAE - XFRM_MSG_BASE] = { .doit = xfrm_get_ae }, 1907 [XFRM_MSG_GETAE - XFRM_MSG_BASE] = { .doit = xfrm_get_ae },
1852 [XFRM_MSG_MIGRATE - XFRM_MSG_BASE] = { .doit = xfrm_do_migrate }, 1908 [XFRM_MSG_MIGRATE - XFRM_MSG_BASE] = { .doit = xfrm_do_migrate },
1909 [XFRM_MSG_GETSADINFO - XFRM_MSG_BASE] = { .doit = xfrm_get_sadinfo },
1853}; 1910};
1854 1911
1855static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp) 1912static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
1856{ 1913{
1857 struct rtattr *xfrma[XFRMA_MAX]; 1914 struct rtattr *xfrma[XFRMA_MAX];
1858 struct xfrm_link *link; 1915 struct xfrm_link *link;
1859 int type, min_len; 1916 int type, min_len;
1860 1917
1861 if (!(nlh->nlmsg_flags & NLM_F_REQUEST))
1862 return 0;
1863
1864 type = nlh->nlmsg_type; 1918 type = nlh->nlmsg_type;
1865
1866 /* A control message: ignore them */
1867 if (type < XFRM_MSG_BASE)
1868 return 0;
1869
1870 /* Unknown message: reply with EINVAL */
1871 if (type > XFRM_MSG_MAX) 1919 if (type > XFRM_MSG_MAX)
1872 goto err_einval; 1920 return -EINVAL;
1873 1921
1874 type -= XFRM_MSG_BASE; 1922 type -= XFRM_MSG_BASE;
1875 link = &xfrm_dispatch[type]; 1923 link = &xfrm_dispatch[type];
1876 1924
1877 /* All operations require privileges, even GET */ 1925 /* All operations require privileges, even GET */
1878 if (security_netlink_recv(skb, CAP_NET_ADMIN)) { 1926 if (security_netlink_recv(skb, CAP_NET_ADMIN))
1879 *errp = -EPERM; 1927 return -EPERM;
1880 return -1;
1881 }
1882 1928
1883 if ((type == (XFRM_MSG_GETSA - XFRM_MSG_BASE) || 1929 if ((type == (XFRM_MSG_GETSA - XFRM_MSG_BASE) ||
1884 type == (XFRM_MSG_GETPOLICY - XFRM_MSG_BASE)) && 1930 type == (XFRM_MSG_GETPOLICY - XFRM_MSG_BASE)) &&
1885 (nlh->nlmsg_flags & NLM_F_DUMP)) { 1931 (nlh->nlmsg_flags & NLM_F_DUMP)) {
1886 if (link->dump == NULL) 1932 if (link->dump == NULL)
1887 goto err_einval; 1933 return -EINVAL;
1888
1889 if ((*errp = netlink_dump_start(xfrm_nl, skb, nlh,
1890 link->dump, NULL)) != 0) {
1891 return -1;
1892 }
1893 1934
1894 netlink_queue_skip(nlh, skb); 1935 return netlink_dump_start(xfrm_nl, skb, nlh, link->dump, NULL);
1895 return -1;
1896 } 1936 }
1897 1937
1898 memset(xfrma, 0, sizeof(xfrma)); 1938 memset(xfrma, 0, sizeof(xfrma));
1899 1939
1900 if (nlh->nlmsg_len < (min_len = xfrm_msg_min[type])) 1940 if (nlh->nlmsg_len < (min_len = xfrm_msg_min[type]))
1901 goto err_einval; 1941 return -EINVAL;
1902 1942
1903 if (nlh->nlmsg_len > min_len) { 1943 if (nlh->nlmsg_len > min_len) {
1904 int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len); 1944 int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len);
@@ -1908,7 +1948,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *err
1908 unsigned short flavor = attr->rta_type; 1948 unsigned short flavor = attr->rta_type;
1909 if (flavor) { 1949 if (flavor) {
1910 if (flavor > XFRMA_MAX) 1950 if (flavor > XFRMA_MAX)
1911 goto err_einval; 1951 return -EINVAL;
1912 xfrma[flavor - 1] = attr; 1952 xfrma[flavor - 1] = attr;
1913 } 1953 }
1914 attr = RTA_NEXT(attr, attrlen); 1954 attr = RTA_NEXT(attr, attrlen);
@@ -1916,14 +1956,9 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *err
1916 } 1956 }
1917 1957
1918 if (link->doit == NULL) 1958 if (link->doit == NULL)
1919 goto err_einval; 1959 return -EINVAL;
1920 *errp = link->doit(skb, nlh, xfrma);
1921
1922 return *errp;
1923 1960
1924err_einval: 1961 return link->doit(skb, nlh, xfrma);
1925 *errp = -EINVAL;
1926 return -1;
1927} 1962}
1928 1963
1929static void xfrm_netlink_rcv(struct sock *sk, int len) 1964static void xfrm_netlink_rcv(struct sock *sk, int len)
@@ -1942,7 +1977,7 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, struct km_eve
1942{ 1977{
1943 struct xfrm_user_expire *ue; 1978 struct xfrm_user_expire *ue;
1944 struct nlmsghdr *nlh; 1979 struct nlmsghdr *nlh;
1945 unsigned char *b = skb->tail; 1980 unsigned char *b = skb_tail_pointer(skb);
1946 1981
1947 nlh = NLMSG_PUT(skb, c->pid, 0, XFRM_MSG_EXPIRE, 1982 nlh = NLMSG_PUT(skb, c->pid, 0, XFRM_MSG_EXPIRE,
1948 sizeof(*ue)); 1983 sizeof(*ue));
@@ -1952,11 +1987,11 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, struct km_eve
1952 copy_to_user_state(x, &ue->state); 1987 copy_to_user_state(x, &ue->state);
1953 ue->hard = (c->data.hard != 0) ? 1 : 0; 1988 ue->hard = (c->data.hard != 0) ? 1 : 0;
1954 1989
1955 nlh->nlmsg_len = skb->tail - b; 1990 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1956 return skb->len; 1991 return skb->len;
1957 1992
1958nlmsg_failure: 1993nlmsg_failure:
1959 skb_trim(skb, b - skb->data); 1994 nlmsg_trim(skb, b);
1960 return -1; 1995 return -1;
1961} 1996}
1962 1997
@@ -1999,7 +2034,7 @@ static int xfrm_notify_sa_flush(struct km_event *c)
1999 struct xfrm_usersa_flush *p; 2034 struct xfrm_usersa_flush *p;
2000 struct nlmsghdr *nlh; 2035 struct nlmsghdr *nlh;
2001 struct sk_buff *skb; 2036 struct sk_buff *skb;
2002 unsigned char *b; 2037 sk_buff_data_t b;
2003 int len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_flush)); 2038 int len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_flush));
2004 2039
2005 skb = alloc_skb(len, GFP_ATOMIC); 2040 skb = alloc_skb(len, GFP_ATOMIC);
@@ -2045,7 +2080,7 @@ static int xfrm_notify_sa(struct xfrm_state *x, struct km_event *c)
2045 struct xfrm_usersa_id *id; 2080 struct xfrm_usersa_id *id;
2046 struct nlmsghdr *nlh; 2081 struct nlmsghdr *nlh;
2047 struct sk_buff *skb; 2082 struct sk_buff *skb;
2048 unsigned char *b; 2083 sk_buff_data_t b;
2049 int len = xfrm_sa_len(x); 2084 int len = xfrm_sa_len(x);
2050 int headlen; 2085 int headlen;
2051 2086
@@ -2129,7 +2164,7 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x,
2129{ 2164{
2130 struct xfrm_user_acquire *ua; 2165 struct xfrm_user_acquire *ua;
2131 struct nlmsghdr *nlh; 2166 struct nlmsghdr *nlh;
2132 unsigned char *b = skb->tail; 2167 unsigned char *b = skb_tail_pointer(skb);
2133 __u32 seq = xfrm_get_acqseq(); 2168 __u32 seq = xfrm_get_acqseq();
2134 2169
2135 nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_ACQUIRE, 2170 nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_ACQUIRE,
@@ -2153,11 +2188,11 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x,
2153 if (copy_to_user_policy_type(xp->type, skb) < 0) 2188 if (copy_to_user_policy_type(xp->type, skb) < 0)
2154 goto nlmsg_failure; 2189 goto nlmsg_failure;
2155 2190
2156 nlh->nlmsg_len = skb->tail - b; 2191 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
2157 return skb->len; 2192 return skb->len;
2158 2193
2159nlmsg_failure: 2194nlmsg_failure:
2160 skb_trim(skb, b - skb->data); 2195 nlmsg_trim(skb, b);
2161 return -1; 2196 return -1;
2162} 2197}
2163 2198
@@ -2249,7 +2284,7 @@ static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp,
2249 struct xfrm_user_polexpire *upe; 2284 struct xfrm_user_polexpire *upe;
2250 struct nlmsghdr *nlh; 2285 struct nlmsghdr *nlh;
2251 int hard = c->data.hard; 2286 int hard = c->data.hard;
2252 unsigned char *b = skb->tail; 2287 unsigned char *b = skb_tail_pointer(skb);
2253 2288
2254 nlh = NLMSG_PUT(skb, c->pid, 0, XFRM_MSG_POLEXPIRE, sizeof(*upe)); 2289 nlh = NLMSG_PUT(skb, c->pid, 0, XFRM_MSG_POLEXPIRE, sizeof(*upe));
2255 upe = NLMSG_DATA(nlh); 2290 upe = NLMSG_DATA(nlh);
@@ -2264,11 +2299,11 @@ static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp,
2264 goto nlmsg_failure; 2299 goto nlmsg_failure;
2265 upe->hard = !!hard; 2300 upe->hard = !!hard;
2266 2301
2267 nlh->nlmsg_len = skb->tail - b; 2302 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
2268 return skb->len; 2303 return skb->len;
2269 2304
2270nlmsg_failure: 2305nlmsg_failure:
2271 skb_trim(skb, b - skb->data); 2306 nlmsg_trim(skb, b);
2272 return -1; 2307 return -1;
2273} 2308}
2274 2309
@@ -2300,7 +2335,7 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *
2300 struct xfrm_userpolicy_id *id; 2335 struct xfrm_userpolicy_id *id;
2301 struct nlmsghdr *nlh; 2336 struct nlmsghdr *nlh;
2302 struct sk_buff *skb; 2337 struct sk_buff *skb;
2303 unsigned char *b; 2338 sk_buff_data_t b;
2304 int len = RTA_SPACE(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr); 2339 int len = RTA_SPACE(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr);
2305 int headlen; 2340 int headlen;
2306 2341
@@ -2357,7 +2392,7 @@ static int xfrm_notify_policy_flush(struct km_event *c)
2357{ 2392{
2358 struct nlmsghdr *nlh; 2393 struct nlmsghdr *nlh;
2359 struct sk_buff *skb; 2394 struct sk_buff *skb;
2360 unsigned char *b; 2395 sk_buff_data_t b;
2361 int len = 0; 2396 int len = 0;
2362#ifdef CONFIG_XFRM_SUB_POLICY 2397#ifdef CONFIG_XFRM_SUB_POLICY
2363 len += RTA_SPACE(sizeof(struct xfrm_userpolicy_type)); 2398 len += RTA_SPACE(sizeof(struct xfrm_userpolicy_type));
@@ -2410,7 +2445,7 @@ static int build_report(struct sk_buff *skb, u8 proto,
2410{ 2445{
2411 struct xfrm_user_report *ur; 2446 struct xfrm_user_report *ur;
2412 struct nlmsghdr *nlh; 2447 struct nlmsghdr *nlh;
2413 unsigned char *b = skb->tail; 2448 unsigned char *b = skb_tail_pointer(skb);
2414 2449
2415 nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_REPORT, sizeof(*ur)); 2450 nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_REPORT, sizeof(*ur));
2416 ur = NLMSG_DATA(nlh); 2451 ur = NLMSG_DATA(nlh);
@@ -2422,12 +2457,12 @@ static int build_report(struct sk_buff *skb, u8 proto,
2422 if (addr) 2457 if (addr)
2423 RTA_PUT(skb, XFRMA_COADDR, sizeof(*addr), addr); 2458 RTA_PUT(skb, XFRMA_COADDR, sizeof(*addr), addr);
2424 2459
2425 nlh->nlmsg_len = skb->tail - b; 2460 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
2426 return skb->len; 2461 return skb->len;
2427 2462
2428nlmsg_failure: 2463nlmsg_failure:
2429rtattr_failure: 2464rtattr_failure:
2430 skb_trim(skb, b - skb->data); 2465 nlmsg_trim(skb, b);
2431 return -1; 2466 return -1;
2432} 2467}
2433 2468
@@ -2466,7 +2501,7 @@ static int __init xfrm_user_init(void)
2466 printk(KERN_INFO "Initializing XFRM netlink socket\n"); 2501 printk(KERN_INFO "Initializing XFRM netlink socket\n");
2467 2502
2468 nlsk = netlink_kernel_create(NETLINK_XFRM, XFRMNLGRP_MAX, 2503 nlsk = netlink_kernel_create(NETLINK_XFRM, XFRMNLGRP_MAX,
2469 xfrm_netlink_rcv, THIS_MODULE); 2504 xfrm_netlink_rcv, NULL, THIS_MODULE);
2470 if (nlsk == NULL) 2505 if (nlsk == NULL)
2471 return -ENOMEM; 2506 return -ENOMEM;
2472 rcu_assign_pointer(xfrm_nl, nlsk); 2507 rcu_assign_pointer(xfrm_nl, nlsk);
diff --git a/security/keys/keyring.c b/security/keys/keyring.c
index ad45ce73964b..88292e3dee96 100644
--- a/security/keys/keyring.c
+++ b/security/keys/keyring.c
@@ -66,6 +66,8 @@ struct key_type key_type_keyring = {
66 .read = keyring_read, 66 .read = keyring_read,
67}; 67};
68 68
69EXPORT_SYMBOL(key_type_keyring);
70
69/* 71/*
70 * semaphore to serialise link/link calls to prevent two link calls in parallel 72 * semaphore to serialise link/link calls to prevent two link calls in parallel
71 * introducing a cycle 73 * introducing a cycle
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index d41e24d6ae41..5f02b4be1917 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2944,7 +2944,7 @@ static int selinux_parse_skb_ipv4(struct sk_buff *skb,
2944 int offset, ihlen, ret = -EINVAL; 2944 int offset, ihlen, ret = -EINVAL;
2945 struct iphdr _iph, *ih; 2945 struct iphdr _iph, *ih;
2946 2946
2947 offset = skb->nh.raw - skb->data; 2947 offset = skb_network_offset(skb);
2948 ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph); 2948 ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
2949 if (ih == NULL) 2949 if (ih == NULL)
2950 goto out; 2950 goto out;
@@ -3026,7 +3026,7 @@ static int selinux_parse_skb_ipv6(struct sk_buff *skb,
3026 int ret = -EINVAL, offset; 3026 int ret = -EINVAL, offset;
3027 struct ipv6hdr _ipv6h, *ip6; 3027 struct ipv6hdr _ipv6h, *ip6;
3028 3028
3029 offset = skb->nh.raw - skb->data; 3029 offset = skb_network_offset(skb);
3030 ip6 = skb_header_pointer(skb, offset, sizeof(_ipv6h), &_ipv6h); 3030 ip6 = skb_header_pointer(skb, offset, sizeof(_ipv6h), &_ipv6h);
3031 if (ip6 == NULL) 3031 if (ip6 == NULL)
3032 goto out; 3032 goto out;
@@ -3786,7 +3786,7 @@ static int selinux_nlmsg_perm(struct sock *sk, struct sk_buff *skb)
3786 err = -EINVAL; 3786 err = -EINVAL;
3787 goto out; 3787 goto out;
3788 } 3788 }
3789 nlh = (struct nlmsghdr *)skb->data; 3789 nlh = nlmsg_hdr(skb);
3790 3790
3791 err = selinux_nlmsg_lookup(isec->sclass, nlh->nlmsg_type, &perm); 3791 err = selinux_nlmsg_lookup(isec->sclass, nlh->nlmsg_type, &perm);
3792 if (err) { 3792 if (err) {
diff --git a/security/selinux/netlink.c b/security/selinux/netlink.c
index e203883406dd..f49046de63a2 100644
--- a/security/selinux/netlink.c
+++ b/security/selinux/netlink.c
@@ -66,7 +66,7 @@ static void selnl_add_payload(struct nlmsghdr *nlh, int len, int msgtype, void *
66static void selnl_notify(int msgtype, void *data) 66static void selnl_notify(int msgtype, void *data)
67{ 67{
68 int len; 68 int len;
69 unsigned char *tmp; 69 sk_buff_data_t tmp;
70 struct sk_buff *skb; 70 struct sk_buff *skb;
71 struct nlmsghdr *nlh; 71 struct nlmsghdr *nlh;
72 72
@@ -104,7 +104,7 @@ void selnl_notify_policyload(u32 seqno)
104 104
105static int __init selnl_init(void) 105static int __init selnl_init(void)
106{ 106{
107 selnl = netlink_kernel_create(NETLINK_SELINUX, SELNLGRP_MAX, NULL, 107 selnl = netlink_kernel_create(NETLINK_SELINUX, SELNLGRP_MAX, NULL, NULL,
108 THIS_MODULE); 108 THIS_MODULE);
109 if (selnl == NULL) 109 if (selnl == NULL)
110 panic("SELinux: Cannot create netlink socket."); 110 panic("SELinux: Cannot create netlink socket.");