aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/asm-alpha/socket.h2
-rw-r--r--include/asm-arm/arch-sa1100/mcp.h21
-rw-r--r--include/asm-arm/hardware/gic.h41
-rw-r--r--include/asm-arm/socket.h2
-rw-r--r--include/asm-arm26/socket.h2
-rw-r--r--include/asm-cris/socket.h2
-rw-r--r--include/asm-frv/socket.h2
-rw-r--r--include/asm-h8300/socket.h2
-rw-r--r--include/asm-i386/checksum.h2
-rw-r--r--include/asm-i386/socket.h2
-rw-r--r--include/asm-ia64/socket.h2
-rw-r--r--include/asm-m32r/checksum.h2
-rw-r--r--include/asm-m32r/socket.h2
-rw-r--r--include/asm-m68k/socket.h2
-rw-r--r--include/asm-mips/socket.h2
-rw-r--r--include/asm-parisc/socket.h2
-rw-r--r--include/asm-powerpc/8253pit.h (renamed from include/asm-ppc/8253pit.h)2
-rw-r--r--include/asm-powerpc/agp.h (renamed from include/asm-ppc/agp.h)0
-rw-r--r--include/asm-powerpc/cputime.h1
-rw-r--r--include/asm-powerpc/div64.h (renamed from include/asm-ppc/div64.h)0
-rw-r--r--include/asm-powerpc/emergency-restart.h1
-rw-r--r--include/asm-powerpc/errno.h (renamed from include/asm-ppc/errno.h)0
-rw-r--r--include/asm-powerpc/ioctl.h (renamed from include/asm-ppc/ioctl.h)0
-rw-r--r--include/asm-powerpc/ioctls.h (renamed from include/asm-ppc/ioctls.h)0
-rw-r--r--include/asm-powerpc/ipc.h (renamed from include/asm-ppc/ipc.h)0
-rw-r--r--include/asm-powerpc/linkage.h (renamed from include/asm-ppc/linkage.h)0
-rw-r--r--include/asm-powerpc/local.h (renamed from include/asm-ppc64/local.h)0
-rw-r--r--include/asm-powerpc/namei.h (renamed from include/asm-ppc/namei.h)0
-rw-r--r--include/asm-powerpc/percpu.h1
-rw-r--r--include/asm-powerpc/poll.h (renamed from include/asm-ppc/poll.h)0
-rw-r--r--include/asm-powerpc/resource.h1
-rw-r--r--include/asm-powerpc/shmparam.h (renamed from include/asm-ppc/shmparam.h)0
-rw-r--r--include/asm-powerpc/string.h (renamed from include/asm-ppc/string.h)0
-rw-r--r--include/asm-powerpc/unaligned.h (renamed from include/asm-ppc/unaligned.h)0
-rw-r--r--include/asm-powerpc/xor.h (renamed from include/asm-ppc/xor.h)0
-rw-r--r--include/asm-ppc/cputime.h6
-rw-r--r--include/asm-ppc/emergency-restart.h6
-rw-r--r--include/asm-ppc/hdreg.h1
-rw-r--r--include/asm-ppc/local.h6
-rw-r--r--include/asm-ppc/percpu.h6
-rw-r--r--include/asm-ppc/resource.h6
-rw-r--r--include/asm-ppc/socket.h2
-rw-r--r--include/asm-ppc64/8253pit.h10
-rw-r--r--include/asm-ppc64/abs_addr.h86
-rw-r--r--include/asm-ppc64/agp.h23
-rw-r--r--include/asm-ppc64/cputable.h47
-rw-r--r--include/asm-ppc64/cputime.h6
-rw-r--r--include/asm-ppc64/div64.h1
-rw-r--r--include/asm-ppc64/emergency-restart.h6
-rw-r--r--include/asm-ppc64/errno.h18
-rw-r--r--include/asm-ppc64/firmware.h101
-rw-r--r--include/asm-ppc64/hdreg.h1
-rw-r--r--include/asm-ppc64/imalloc.h2
-rw-r--r--include/asm-ppc64/ioctl.h74
-rw-r--r--include/asm-ppc64/ioctls.h114
-rw-r--r--include/asm-ppc64/iommu.h3
-rw-r--r--include/asm-ppc64/ipc.h1
-rw-r--r--include/asm-ppc64/linkage.h6
-rw-r--r--include/asm-ppc64/lmb.h1
-rw-r--r--include/asm-ppc64/machdep.h3
-rw-r--r--include/asm-ppc64/mmu.h16
-rw-r--r--include/asm-ppc64/naca.h7
-rw-r--r--include/asm-ppc64/namei.h23
-rw-r--r--include/asm-ppc64/page.h55
-rw-r--r--include/asm-ppc64/param.h4
-rw-r--r--include/asm-ppc64/percpu.h6
-rw-r--r--include/asm-ppc64/pgalloc.h93
-rw-r--r--include/asm-ppc64/pgtable.h92
-rw-r--r--include/asm-ppc64/pmc.h2
-rw-r--r--include/asm-ppc64/poll.h32
-rw-r--r--include/asm-ppc64/processor.h5
-rw-r--r--include/asm-ppc64/prom.h14
-rw-r--r--include/asm-ppc64/resource.h6
-rw-r--r--include/asm-ppc64/shmparam.h13
-rw-r--r--include/asm-ppc64/socket.h2
-rw-r--r--include/asm-ppc64/string.h35
-rw-r--r--include/asm-ppc64/system.h4
-rw-r--r--include/asm-ppc64/unaligned.h21
-rw-r--r--include/asm-ppc64/vio.h91
-rw-r--r--include/asm-ppc64/xor.h1
-rw-r--r--include/asm-s390/socket.h2
-rw-r--r--include/asm-sh/socket.h2
-rw-r--r--include/asm-sparc/processor.h1
-rw-r--r--include/asm-sparc/segment.h6
-rw-r--r--include/asm-sparc/socket.h2
-rw-r--r--include/asm-sparc/system.h1
-rw-r--r--include/asm-sparc64/atomic.h8
-rw-r--r--include/asm-sparc64/bitops.h4
-rw-r--r--include/asm-sparc64/processor.h1
-rw-r--r--include/asm-sparc64/segment.h6
-rw-r--r--include/asm-sparc64/sfafsr.h82
-rw-r--r--include/asm-sparc64/socket.h2
-rw-r--r--include/asm-sparc64/spinlock.h42
-rw-r--r--include/asm-sparc64/system.h17
-rw-r--r--include/asm-v850/socket.h2
-rw-r--r--include/asm-x86_64/checksum.h2
-rw-r--r--include/asm-x86_64/socket.h2
-rw-r--r--include/asm-xtensa/socket.h2
-rw-r--r--include/linux/8250_pci.h39
-rw-r--r--include/linux/ata.h45
-rw-r--r--include/linux/dccp.h456
-rw-r--r--include/linux/ethtool.h17
-rw-r--r--include/linux/hippidevice.h8
-rw-r--r--include/linux/if_ether.h2
-rw-r--r--include/linux/if_fc.h2
-rw-r--r--include/linux/if_fddi.h2
-rw-r--r--include/linux/if_frad.h6
-rw-r--r--include/linux/if_hippi.h6
-rw-r--r--include/linux/if_tr.h4
-rw-r--r--include/linux/if_vlan.h1
-rw-r--r--include/linux/igmp.h3
-rw-r--r--include/linux/in.h1
-rw-r--r--include/linux/inet_diag.h138
-rw-r--r--include/linux/ip.h2
-rw-r--r--include/linux/ipv6.h52
-rw-r--r--include/linux/libata.h49
-rw-r--r--include/linux/list.h65
-rw-r--r--include/linux/mii.h9
-rw-r--r--include/linux/mmc/host.h4
-rw-r--r--include/linux/mod_devicetable.h17
-rw-r--r--include/linux/net.h11
-rw-r--r--include/linux/netdevice.h35
-rw-r--r--include/linux/netfilter.h88
-rw-r--r--include/linux/netfilter/nfnetlink.h169
-rw-r--r--include/linux/netfilter/nfnetlink_conntrack.h124
-rw-r--r--include/linux/netfilter/nfnetlink_log.h88
-rw-r--r--include/linux/netfilter/nfnetlink_queue.h89
-rw-r--r--include/linux/netfilter_decnet.h17
-rw-r--r--include/linux/netfilter_ipv4.h8
-rw-r--r--include/linux/netfilter_ipv4/ip_conntrack.h189
-rw-r--r--include/linux/netfilter_ipv4/ip_conntrack_core.h18
-rw-r--r--include/linux/netfilter_ipv4/ip_conntrack_helper.h2
-rw-r--r--include/linux/netfilter_ipv4/ip_conntrack_protocol.h24
-rw-r--r--include/linux/netfilter_ipv4/ip_logging.h20
-rw-r--r--include/linux/netfilter_ipv4/ip_nat_protocol.h25
-rw-r--r--include/linux/netfilter_ipv4/ip_tables.h3
-rw-r--r--include/linux/netfilter_ipv4/ipt_LOG.h1
-rw-r--r--include/linux/netfilter_ipv4/ipt_NFQUEUE.h16
-rw-r--r--include/linux/netfilter_ipv4/ipt_TTL.h21
-rw-r--r--include/linux/netfilter_ipv4/ipt_connbytes.h25
-rw-r--r--include/linux/netfilter_ipv4/ipt_dccp.h23
-rw-r--r--include/linux/netfilter_ipv4/ipt_string.h18
-rw-r--r--include/linux/netfilter_ipv6.h6
-rw-r--r--include/linux/netfilter_ipv6/ip6_logging.h20
-rw-r--r--include/linux/netfilter_ipv6/ip6_tables.h3
-rw-r--r--include/linux/netfilter_ipv6/ip6t_HL.h22
-rw-r--r--include/linux/netfilter_ipv6/ip6t_LOG.h1
-rw-r--r--include/linux/netfilter_ipv6/ip6t_REJECT.h18
-rw-r--r--include/linux/netlink.h18
-rw-r--r--include/linux/pci_ids.h1
-rw-r--r--include/linux/phy.h377
-rw-r--r--include/linux/random.h2
-rw-r--r--include/linux/rtnetlink.h42
-rw-r--r--include/linux/security.h6
-rw-r--r--include/linux/selinux_netlink.h13
-rw-r--r--include/linux/serialP.h40
-rw-r--r--include/linux/skbuff.h118
-rw-r--r--include/linux/socket.h9
-rw-r--r--include/linux/tcp.h82
-rw-r--r--include/linux/tcp_diag.h127
-rw-r--r--include/linux/types.h3
-rw-r--r--include/linux/xfrm.h18
-rw-r--r--include/net/act_api.h2
-rw-r--r--include/net/addrconf.h6
-rw-r--r--include/net/af_unix.h15
-rw-r--r--include/net/arp.h2
-rw-r--r--include/net/ax25.h2
-rw-r--r--include/net/bluetooth/bluetooth.h5
-rw-r--r--include/net/bluetooth/hci.h15
-rw-r--r--include/net/bluetooth/hci_core.h2
-rw-r--r--include/net/bluetooth/rfcomm.h14
-rw-r--r--include/net/datalink.h2
-rw-r--r--include/net/dn.h1
-rw-r--r--include/net/icmp.h7
-rw-r--r--include/net/inet6_hashtables.h130
-rw-r--r--include/net/inet_common.h6
-rw-r--r--include/net/inet_connection_sock.h276
-rw-r--r--include/net/inet_hashtables.h427
-rw-r--r--include/net/inet_timewait_sock.h219
-rw-r--r--include/net/ip.h32
-rw-r--r--include/net/ip6_route.h1
-rw-r--r--include/net/ip_fib.h5
-rw-r--r--include/net/ip_vs.h1
-rw-r--r--include/net/ipv6.h39
-rw-r--r--include/net/llc.h8
-rw-r--r--include/net/neighbour.h9
-rw-r--r--include/net/p8022.h5
-rw-r--r--include/net/pkt_cls.h6
-rw-r--r--include/net/psnap.h2
-rw-r--r--include/net/raw.h9
-rw-r--r--include/net/rawv6.h5
-rw-r--r--include/net/request_sock.h14
-rw-r--r--include/net/route.h6
-rw-r--r--include/net/sctp/constants.h2
-rw-r--r--include/net/sock.h113
-rw-r--r--include/net/tcp.h723
-rw-r--r--include/net/tcp_ecn.h2
-rw-r--r--include/net/tcp_states.h34
-rw-r--r--include/net/udp.h5
-rw-r--r--include/net/x25.h2
-rw-r--r--include/net/x25device.h1
-rw-r--r--include/net/xfrm.h1
-rw-r--r--include/rdma/ib_cache.h105
-rw-r--r--include/rdma/ib_cm.h568
-rw-r--r--include/rdma/ib_fmr_pool.h93
-rw-r--r--include/rdma/ib_mad.h579
-rw-r--r--include/rdma/ib_pack.h245
-rw-r--r--include/rdma/ib_sa.h373
-rw-r--r--include/rdma/ib_smi.h94
-rw-r--r--include/rdma/ib_user_cm.h328
-rw-r--r--include/rdma/ib_user_mad.h137
-rw-r--r--include/rdma/ib_user_verbs.h422
-rw-r--r--include/rdma/ib_verbs.h1461
213 files changed, 8896 insertions, 1871 deletions
diff --git a/include/asm-alpha/socket.h b/include/asm-alpha/socket.h
index d00259d3dc78..b5193229132a 100644
--- a/include/asm-alpha/socket.h
+++ b/include/asm-alpha/socket.h
@@ -25,6 +25,8 @@
25#define SO_ERROR 0x1007 25#define SO_ERROR 0x1007
26#define SO_SNDBUF 0x1001 26#define SO_SNDBUF 0x1001
27#define SO_RCVBUF 0x1002 27#define SO_RCVBUF 0x1002
28#define SO_SNDBUFFORCE 0x100a
29#define SO_RCVBUFFORCE 0x100b
28#define SO_RCVLOWAT 0x1010 30#define SO_RCVLOWAT 0x1010
29#define SO_SNDLOWAT 0x1011 31#define SO_SNDLOWAT 0x1011
30#define SO_RCVTIMEO 0x1012 32#define SO_RCVTIMEO 0x1012
diff --git a/include/asm-arm/arch-sa1100/mcp.h b/include/asm-arm/arch-sa1100/mcp.h
new file mode 100644
index 000000000000..f58a22755c61
--- /dev/null
+++ b/include/asm-arm/arch-sa1100/mcp.h
@@ -0,0 +1,21 @@
1/*
2 * linux/include/asm-arm/arch-sa1100/mcp.h
3 *
4 * Copyright (C) 2005 Russell King.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10#ifndef __ASM_ARM_ARCH_MCP_H
11#define __ASM_ARM_ARCH_MCP_H
12
13#include <linux/types.h>
14
15struct mcp_plat_data {
16 u32 mccr0;
17 u32 mccr1;
18 unsigned int sclk_rate;
19};
20
21#endif
diff --git a/include/asm-arm/hardware/gic.h b/include/asm-arm/hardware/gic.h
new file mode 100644
index 000000000000..3fa5eb70f64e
--- /dev/null
+++ b/include/asm-arm/hardware/gic.h
@@ -0,0 +1,41 @@
1/*
2 * linux/include/asm-arm/hardware/gic.h
3 *
4 * Copyright (C) 2002 ARM Limited, All Rights Reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10#ifndef __ASM_ARM_HARDWARE_GIC_H
11#define __ASM_ARM_HARDWARE_GIC_H
12
13#include <linux/compiler.h>
14
15#define GIC_CPU_CTRL 0x00
16#define GIC_CPU_PRIMASK 0x04
17#define GIC_CPU_BINPOINT 0x08
18#define GIC_CPU_INTACK 0x0c
19#define GIC_CPU_EOI 0x10
20#define GIC_CPU_RUNNINGPRI 0x14
21#define GIC_CPU_HIGHPRI 0x18
22
23#define GIC_DIST_CTRL 0x000
24#define GIC_DIST_CTR 0x004
25#define GIC_DIST_ENABLE_SET 0x100
26#define GIC_DIST_ENABLE_CLEAR 0x180
27#define GIC_DIST_PENDING_SET 0x200
28#define GIC_DIST_PENDING_CLEAR 0x280
29#define GIC_DIST_ACTIVE_BIT 0x300
30#define GIC_DIST_PRI 0x400
31#define GIC_DIST_TARGET 0x800
32#define GIC_DIST_CONFIG 0xc00
33#define GIC_DIST_SOFTINT 0xf00
34
35#ifndef __ASSEMBLY__
36void gic_dist_init(void __iomem *base);
37void gic_cpu_init(void __iomem *base);
38void gic_raise_softirq(cpumask_t cpumask, unsigned int irq);
39#endif
40
41#endif
diff --git a/include/asm-arm/socket.h b/include/asm-arm/socket.h
index 46d20585d951..3c51da6438c9 100644
--- a/include/asm-arm/socket.h
+++ b/include/asm-arm/socket.h
@@ -14,6 +14,8 @@
14#define SO_BROADCAST 6 14#define SO_BROADCAST 6
15#define SO_SNDBUF 7 15#define SO_SNDBUF 7
16#define SO_RCVBUF 8 16#define SO_RCVBUF 8
17#define SO_SNDBUFFORCE 32
18#define SO_RCVBUFFORCE 33
17#define SO_KEEPALIVE 9 19#define SO_KEEPALIVE 9
18#define SO_OOBINLINE 10 20#define SO_OOBINLINE 10
19#define SO_NO_CHECK 11 21#define SO_NO_CHECK 11
diff --git a/include/asm-arm26/socket.h b/include/asm-arm26/socket.h
index 46d20585d951..3c51da6438c9 100644
--- a/include/asm-arm26/socket.h
+++ b/include/asm-arm26/socket.h
@@ -14,6 +14,8 @@
14#define SO_BROADCAST 6 14#define SO_BROADCAST 6
15#define SO_SNDBUF 7 15#define SO_SNDBUF 7
16#define SO_RCVBUF 8 16#define SO_RCVBUF 8
17#define SO_SNDBUFFORCE 32
18#define SO_RCVBUFFORCE 33
17#define SO_KEEPALIVE 9 19#define SO_KEEPALIVE 9
18#define SO_OOBINLINE 10 20#define SO_OOBINLINE 10
19#define SO_NO_CHECK 11 21#define SO_NO_CHECK 11
diff --git a/include/asm-cris/socket.h b/include/asm-cris/socket.h
index f159b4f165f7..8b1da3e58c55 100644
--- a/include/asm-cris/socket.h
+++ b/include/asm-cris/socket.h
@@ -16,6 +16,8 @@
16#define SO_BROADCAST 6 16#define SO_BROADCAST 6
17#define SO_SNDBUF 7 17#define SO_SNDBUF 7
18#define SO_RCVBUF 8 18#define SO_RCVBUF 8
19#define SO_SNDBUFFORCE 32
20#define SO_RCVBUFFORCE 33
19#define SO_KEEPALIVE 9 21#define SO_KEEPALIVE 9
20#define SO_OOBINLINE 10 22#define SO_OOBINLINE 10
21#define SO_NO_CHECK 11 23#define SO_NO_CHECK 11
diff --git a/include/asm-frv/socket.h b/include/asm-frv/socket.h
index c3be17c7de4b..7177f8b9817c 100644
--- a/include/asm-frv/socket.h
+++ b/include/asm-frv/socket.h
@@ -14,6 +14,8 @@
14#define SO_BROADCAST 6 14#define SO_BROADCAST 6
15#define SO_SNDBUF 7 15#define SO_SNDBUF 7
16#define SO_RCVBUF 8 16#define SO_RCVBUF 8
17#define SO_SNDBUFFORCE 32
18#define SO_RCVBUFFORCE 33
17#define SO_KEEPALIVE 9 19#define SO_KEEPALIVE 9
18#define SO_OOBINLINE 10 20#define SO_OOBINLINE 10
19#define SO_NO_CHECK 11 21#define SO_NO_CHECK 11
diff --git a/include/asm-h8300/socket.h b/include/asm-h8300/socket.h
index af33b8525dcf..d98cf85bafc1 100644
--- a/include/asm-h8300/socket.h
+++ b/include/asm-h8300/socket.h
@@ -14,6 +14,8 @@
14#define SO_BROADCAST 6 14#define SO_BROADCAST 6
15#define SO_SNDBUF 7 15#define SO_SNDBUF 7
16#define SO_RCVBUF 8 16#define SO_RCVBUF 8
17#define SO_SNDBUFFORCE 32
18#define SO_RCVBUFFORCE 33
17#define SO_KEEPALIVE 9 19#define SO_KEEPALIVE 9
18#define SO_OOBINLINE 10 20#define SO_OOBINLINE 10
19#define SO_NO_CHECK 11 21#define SO_NO_CHECK 11
diff --git a/include/asm-i386/checksum.h b/include/asm-i386/checksum.h
index f949e44c2a35..67d3630c4e89 100644
--- a/include/asm-i386/checksum.h
+++ b/include/asm-i386/checksum.h
@@ -83,7 +83,7 @@ static inline unsigned short ip_fast_csum(unsigned char * iph,
83 "adcl $0, %0 ;\n" 83 "adcl $0, %0 ;\n"
84 "notl %0 ;\n" 84 "notl %0 ;\n"
85"2: ;\n" 85"2: ;\n"
86 /* Since the input registers which are loaded with iph and ipl 86 /* Since the input registers which are loaded with iph and ihl
87 are modified, we must also specify them as outputs, or gcc 87 are modified, we must also specify them as outputs, or gcc
88 will assume they contain their original values. */ 88 will assume they contain their original values. */
89 : "=r" (sum), "=r" (iph), "=r" (ihl) 89 : "=r" (sum), "=r" (iph), "=r" (ihl)
diff --git a/include/asm-i386/socket.h b/include/asm-i386/socket.h
index 07f6b38ad140..802ae76195b7 100644
--- a/include/asm-i386/socket.h
+++ b/include/asm-i386/socket.h
@@ -14,6 +14,8 @@
14#define SO_BROADCAST 6 14#define SO_BROADCAST 6
15#define SO_SNDBUF 7 15#define SO_SNDBUF 7
16#define SO_RCVBUF 8 16#define SO_RCVBUF 8
17#define SO_SNDBUFFORCE 32
18#define SO_RCVBUFFORCE 33
17#define SO_KEEPALIVE 9 19#define SO_KEEPALIVE 9
18#define SO_OOBINLINE 10 20#define SO_OOBINLINE 10
19#define SO_NO_CHECK 11 21#define SO_NO_CHECK 11
diff --git a/include/asm-ia64/socket.h b/include/asm-ia64/socket.h
index 21a9f10d6baa..a255006fb7b5 100644
--- a/include/asm-ia64/socket.h
+++ b/include/asm-ia64/socket.h
@@ -23,6 +23,8 @@
23#define SO_BROADCAST 6 23#define SO_BROADCAST 6
24#define SO_SNDBUF 7 24#define SO_SNDBUF 7
25#define SO_RCVBUF 8 25#define SO_RCVBUF 8
26#define SO_SNDBUFFORCE 32
27#define SO_RCVBUFFORCE 33
26#define SO_KEEPALIVE 9 28#define SO_KEEPALIVE 9
27#define SO_OOBINLINE 10 29#define SO_OOBINLINE 10
28#define SO_NO_CHECK 11 30#define SO_NO_CHECK 11
diff --git a/include/asm-m32r/checksum.h b/include/asm-m32r/checksum.h
index 99f37dbf2558..877ebf46e9ff 100644
--- a/include/asm-m32r/checksum.h
+++ b/include/asm-m32r/checksum.h
@@ -105,7 +105,7 @@ static inline unsigned short ip_fast_csum(unsigned char * iph,
105 " addx %0, %3 \n" 105 " addx %0, %3 \n"
106 " .fillinsn\n" 106 " .fillinsn\n"
107 "2: \n" 107 "2: \n"
108 /* Since the input registers which are loaded with iph and ipl 108 /* Since the input registers which are loaded with iph and ihl
109 are modified, we must also specify them as outputs, or gcc 109 are modified, we must also specify them as outputs, or gcc
110 will assume they contain their original values. */ 110 will assume they contain their original values. */
111 : "=&r" (sum), "=r" (iph), "=r" (ihl), "=&r" (tmpreg0), "=&r" (tmpreg1) 111 : "=&r" (sum), "=r" (iph), "=r" (ihl), "=&r" (tmpreg0), "=&r" (tmpreg1)
diff --git a/include/asm-m32r/socket.h b/include/asm-m32r/socket.h
index 159519d99042..8b6680f223c0 100644
--- a/include/asm-m32r/socket.h
+++ b/include/asm-m32r/socket.h
@@ -14,6 +14,8 @@
14#define SO_BROADCAST 6 14#define SO_BROADCAST 6
15#define SO_SNDBUF 7 15#define SO_SNDBUF 7
16#define SO_RCVBUF 8 16#define SO_RCVBUF 8
17#define SO_SNDBUFFORCE 32
18#define SO_RCVBUFFORCE 33
17#define SO_KEEPALIVE 9 19#define SO_KEEPALIVE 9
18#define SO_OOBINLINE 10 20#define SO_OOBINLINE 10
19#define SO_NO_CHECK 11 21#define SO_NO_CHECK 11
diff --git a/include/asm-m68k/socket.h b/include/asm-m68k/socket.h
index 8d0b9fc2d07e..f578ca4b776a 100644
--- a/include/asm-m68k/socket.h
+++ b/include/asm-m68k/socket.h
@@ -14,6 +14,8 @@
14#define SO_BROADCAST 6 14#define SO_BROADCAST 6
15#define SO_SNDBUF 7 15#define SO_SNDBUF 7
16#define SO_RCVBUF 8 16#define SO_RCVBUF 8
17#define SO_SNDBUFFORCE 32
18#define SO_RCVBUFFORCE 33
17#define SO_KEEPALIVE 9 19#define SO_KEEPALIVE 9
18#define SO_OOBINLINE 10 20#define SO_OOBINLINE 10
19#define SO_NO_CHECK 11 21#define SO_NO_CHECK 11
diff --git a/include/asm-mips/socket.h b/include/asm-mips/socket.h
index 020b4db70ee5..d478a86294ee 100644
--- a/include/asm-mips/socket.h
+++ b/include/asm-mips/socket.h
@@ -37,6 +37,8 @@ To add: #define SO_REUSEPORT 0x0200 /* Allow local address and port reuse. */
37#define SO_ERROR 0x1007 /* get error status and clear */ 37#define SO_ERROR 0x1007 /* get error status and clear */
38#define SO_SNDBUF 0x1001 /* Send buffer size. */ 38#define SO_SNDBUF 0x1001 /* Send buffer size. */
39#define SO_RCVBUF 0x1002 /* Receive buffer. */ 39#define SO_RCVBUF 0x1002 /* Receive buffer. */
40#define SO_SNDBUFFORCE 0x100a
41#define SO_RCVBUFFORCE 0x100b
40#define SO_SNDLOWAT 0x1003 /* send low-water mark */ 42#define SO_SNDLOWAT 0x1003 /* send low-water mark */
41#define SO_RCVLOWAT 0x1004 /* receive low-water mark */ 43#define SO_RCVLOWAT 0x1004 /* receive low-water mark */
42#define SO_SNDTIMEO 0x1005 /* send timeout */ 44#define SO_SNDTIMEO 0x1005 /* send timeout */
diff --git a/include/asm-parisc/socket.h b/include/asm-parisc/socket.h
index 4a77996c1862..1bf54dc53c10 100644
--- a/include/asm-parisc/socket.h
+++ b/include/asm-parisc/socket.h
@@ -16,6 +16,8 @@
16/* To add :#define SO_REUSEPORT 0x0200 */ 16/* To add :#define SO_REUSEPORT 0x0200 */
17#define SO_SNDBUF 0x1001 17#define SO_SNDBUF 0x1001
18#define SO_RCVBUF 0x1002 18#define SO_RCVBUF 0x1002
19#define SO_SNDBUFFORCE 0x100a
20#define SO_RCVBUFFORCE 0x100b
19#define SO_SNDLOWAT 0x1003 21#define SO_SNDLOWAT 0x1003
20#define SO_RCVLOWAT 0x1004 22#define SO_RCVLOWAT 0x1004
21#define SO_SNDTIMEO 0x1005 23#define SO_SNDTIMEO 0x1005
diff --git a/include/asm-ppc/8253pit.h b/include/asm-powerpc/8253pit.h
index 285f78488ccb..862708a749b0 100644
--- a/include/asm-ppc/8253pit.h
+++ b/include/asm-powerpc/8253pit.h
@@ -5,6 +5,6 @@
5#ifndef _8253PIT_H 5#ifndef _8253PIT_H
6#define _8253PIT_H 6#define _8253PIT_H
7 7
8#define PIT_TICK_RATE 1193182UL 8#define PIT_TICK_RATE 1193182UL
9 9
10#endif 10#endif
diff --git a/include/asm-ppc/agp.h b/include/asm-powerpc/agp.h
index ca9e423307f4..ca9e423307f4 100644
--- a/include/asm-ppc/agp.h
+++ b/include/asm-powerpc/agp.h
diff --git a/include/asm-powerpc/cputime.h b/include/asm-powerpc/cputime.h
new file mode 100644
index 000000000000..6d68ad7e0ea3
--- /dev/null
+++ b/include/asm-powerpc/cputime.h
@@ -0,0 +1 @@
#include <asm-generic/cputime.h>
diff --git a/include/asm-ppc/div64.h b/include/asm-powerpc/div64.h
index 6cd978cefb28..6cd978cefb28 100644
--- a/include/asm-ppc/div64.h
+++ b/include/asm-powerpc/div64.h
diff --git a/include/asm-powerpc/emergency-restart.h b/include/asm-powerpc/emergency-restart.h
new file mode 100644
index 000000000000..3711bd9d50bd
--- /dev/null
+++ b/include/asm-powerpc/emergency-restart.h
@@ -0,0 +1 @@
#include <asm-generic/emergency-restart.h>
diff --git a/include/asm-ppc/errno.h b/include/asm-powerpc/errno.h
index 19f20bd41ae6..19f20bd41ae6 100644
--- a/include/asm-ppc/errno.h
+++ b/include/asm-powerpc/errno.h
diff --git a/include/asm-ppc/ioctl.h b/include/asm-powerpc/ioctl.h
index 93c6acfdd0fd..93c6acfdd0fd 100644
--- a/include/asm-ppc/ioctl.h
+++ b/include/asm-powerpc/ioctl.h
diff --git a/include/asm-ppc/ioctls.h b/include/asm-powerpc/ioctls.h
index f5b7f2b055e7..f5b7f2b055e7 100644
--- a/include/asm-ppc/ioctls.h
+++ b/include/asm-powerpc/ioctls.h
diff --git a/include/asm-ppc/ipc.h b/include/asm-powerpc/ipc.h
index a46e3d9c2a3f..a46e3d9c2a3f 100644
--- a/include/asm-ppc/ipc.h
+++ b/include/asm-powerpc/ipc.h
diff --git a/include/asm-ppc/linkage.h b/include/asm-powerpc/linkage.h
index 291c2d01c44f..291c2d01c44f 100644
--- a/include/asm-ppc/linkage.h
+++ b/include/asm-powerpc/linkage.h
diff --git a/include/asm-ppc64/local.h b/include/asm-powerpc/local.h
index c11c530f74d0..c11c530f74d0 100644
--- a/include/asm-ppc64/local.h
+++ b/include/asm-powerpc/local.h
diff --git a/include/asm-ppc/namei.h b/include/asm-powerpc/namei.h
index 29c9ec832133..29c9ec832133 100644
--- a/include/asm-ppc/namei.h
+++ b/include/asm-powerpc/namei.h
diff --git a/include/asm-powerpc/percpu.h b/include/asm-powerpc/percpu.h
new file mode 100644
index 000000000000..06a959d67234
--- /dev/null
+++ b/include/asm-powerpc/percpu.h
@@ -0,0 +1 @@
#include <asm-generic/percpu.h>
diff --git a/include/asm-ppc/poll.h b/include/asm-powerpc/poll.h
index be5024913c62..be5024913c62 100644
--- a/include/asm-ppc/poll.h
+++ b/include/asm-powerpc/poll.h
diff --git a/include/asm-powerpc/resource.h b/include/asm-powerpc/resource.h
new file mode 100644
index 000000000000..04bc4db8921b
--- /dev/null
+++ b/include/asm-powerpc/resource.h
@@ -0,0 +1 @@
#include <asm-generic/resource.h>
diff --git a/include/asm-ppc/shmparam.h b/include/asm-powerpc/shmparam.h
index d6250602ae64..d6250602ae64 100644
--- a/include/asm-ppc/shmparam.h
+++ b/include/asm-powerpc/shmparam.h
diff --git a/include/asm-ppc/string.h b/include/asm-powerpc/string.h
index 225575997392..225575997392 100644
--- a/include/asm-ppc/string.h
+++ b/include/asm-powerpc/string.h
diff --git a/include/asm-ppc/unaligned.h b/include/asm-powerpc/unaligned.h
index 45520d9b85d1..45520d9b85d1 100644
--- a/include/asm-ppc/unaligned.h
+++ b/include/asm-powerpc/unaligned.h
diff --git a/include/asm-ppc/xor.h b/include/asm-powerpc/xor.h
index c82eb12a5b18..c82eb12a5b18 100644
--- a/include/asm-ppc/xor.h
+++ b/include/asm-powerpc/xor.h
diff --git a/include/asm-ppc/cputime.h b/include/asm-ppc/cputime.h
deleted file mode 100644
index 8e9faf5ce720..000000000000
--- a/include/asm-ppc/cputime.h
+++ /dev/null
@@ -1,6 +0,0 @@
1#ifndef __PPC_CPUTIME_H
2#define __PPC_CPUTIME_H
3
4#include <asm-generic/cputime.h>
5
6#endif /* __PPC_CPUTIME_H */
diff --git a/include/asm-ppc/emergency-restart.h b/include/asm-ppc/emergency-restart.h
deleted file mode 100644
index 108d8c48e42e..000000000000
--- a/include/asm-ppc/emergency-restart.h
+++ /dev/null
@@ -1,6 +0,0 @@
1#ifndef _ASM_EMERGENCY_RESTART_H
2#define _ASM_EMERGENCY_RESTART_H
3
4#include <asm-generic/emergency-restart.h>
5
6#endif /* _ASM_EMERGENCY_RESTART_H */
diff --git a/include/asm-ppc/hdreg.h b/include/asm-ppc/hdreg.h
deleted file mode 100644
index 7f7fd1af0af3..000000000000
--- a/include/asm-ppc/hdreg.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/hdreg.h>
diff --git a/include/asm-ppc/local.h b/include/asm-ppc/local.h
deleted file mode 100644
index b08e3eced10e..000000000000
--- a/include/asm-ppc/local.h
+++ /dev/null
@@ -1,6 +0,0 @@
1#ifndef __PPC_LOCAL_H
2#define __PPC_LOCAL_H
3
4#include <asm-generic/local.h>
5
6#endif /* __PPC_LOCAL_H */
diff --git a/include/asm-ppc/percpu.h b/include/asm-ppc/percpu.h
deleted file mode 100644
index d66667cd5878..000000000000
--- a/include/asm-ppc/percpu.h
+++ /dev/null
@@ -1,6 +0,0 @@
1#ifndef __ARCH_PPC_PERCPU__
2#define __ARCH_PPC_PERCPU__
3
4#include <asm-generic/percpu.h>
5
6#endif /* __ARCH_PPC_PERCPU__ */
diff --git a/include/asm-ppc/resource.h b/include/asm-ppc/resource.h
deleted file mode 100644
index 86a1ea23a6ed..000000000000
--- a/include/asm-ppc/resource.h
+++ /dev/null
@@ -1,6 +0,0 @@
1#ifndef _PPC_RESOURCE_H
2#define _PPC_RESOURCE_H
3
4#include <asm-generic/resource.h>
5
6#endif
diff --git a/include/asm-ppc/socket.h b/include/asm-ppc/socket.h
index 4134376b0f66..296e1a3469d0 100644
--- a/include/asm-ppc/socket.h
+++ b/include/asm-ppc/socket.h
@@ -20,6 +20,8 @@
20#define SO_BROADCAST 6 20#define SO_BROADCAST 6
21#define SO_SNDBUF 7 21#define SO_SNDBUF 7
22#define SO_RCVBUF 8 22#define SO_RCVBUF 8
23#define SO_SNDBUFFORCE 32
24#define SO_RCVBUFFORCE 33
23#define SO_KEEPALIVE 9 25#define SO_KEEPALIVE 9
24#define SO_OOBINLINE 10 26#define SO_OOBINLINE 10
25#define SO_NO_CHECK 11 27#define SO_NO_CHECK 11
diff --git a/include/asm-ppc64/8253pit.h b/include/asm-ppc64/8253pit.h
deleted file mode 100644
index 285f78488ccb..000000000000
--- a/include/asm-ppc64/8253pit.h
+++ /dev/null
@@ -1,10 +0,0 @@
1/*
2 * 8253/8254 Programmable Interval Timer
3 */
4
5#ifndef _8253PIT_H
6#define _8253PIT_H
7
8#define PIT_TICK_RATE 1193182UL
9
10#endif
diff --git a/include/asm-ppc64/abs_addr.h b/include/asm-ppc64/abs_addr.h
index 6d4e8e787058..84c24d4cdb71 100644
--- a/include/asm-ppc64/abs_addr.h
+++ b/include/asm-ppc64/abs_addr.h
@@ -16,93 +16,51 @@
16#include <asm/page.h> 16#include <asm/page.h>
17#include <asm/prom.h> 17#include <asm/prom.h>
18#include <asm/lmb.h> 18#include <asm/lmb.h>
19#include <asm/firmware.h>
19 20
20typedef u32 msChunks_entry; 21struct mschunks_map {
21struct msChunks {
22 unsigned long num_chunks; 22 unsigned long num_chunks;
23 unsigned long chunk_size; 23 unsigned long chunk_size;
24 unsigned long chunk_shift; 24 unsigned long chunk_shift;
25 unsigned long chunk_mask; 25 unsigned long chunk_mask;
26 msChunks_entry *abs; 26 u32 *mapping;
27}; 27};
28 28
29extern struct msChunks msChunks; 29extern struct mschunks_map mschunks_map;
30 30
31extern unsigned long msChunks_alloc(unsigned long, unsigned long, unsigned long); 31/* Chunks are 256 KB */
32extern unsigned long reloc_offset(void); 32#define MSCHUNKS_CHUNK_SHIFT (18)
33#define MSCHUNKS_CHUNK_SIZE (1UL << MSCHUNKS_CHUNK_SHIFT)
34#define MSCHUNKS_OFFSET_MASK (MSCHUNKS_CHUNK_SIZE - 1)
33 35
34#ifdef CONFIG_MSCHUNKS 36static inline unsigned long chunk_to_addr(unsigned long chunk)
35
36static inline unsigned long
37chunk_to_addr(unsigned long chunk)
38{ 37{
39 unsigned long offset = reloc_offset(); 38 return chunk << MSCHUNKS_CHUNK_SHIFT;
40 struct msChunks *_msChunks = PTRRELOC(&msChunks);
41
42 return chunk << _msChunks->chunk_shift;
43} 39}
44 40
45static inline unsigned long 41static inline unsigned long addr_to_chunk(unsigned long addr)
46addr_to_chunk(unsigned long addr)
47{ 42{
48 unsigned long offset = reloc_offset(); 43 return addr >> MSCHUNKS_CHUNK_SHIFT;
49 struct msChunks *_msChunks = PTRRELOC(&msChunks);
50
51 return addr >> _msChunks->chunk_shift;
52} 44}
53 45
54static inline unsigned long 46static inline unsigned long phys_to_abs(unsigned long pa)
55chunk_offset(unsigned long addr)
56{ 47{
57 unsigned long offset = reloc_offset(); 48 unsigned long chunk;
58 struct msChunks *_msChunks = PTRRELOC(&msChunks);
59 49
60 return addr & _msChunks->chunk_mask; 50 /* This is a no-op on non-iSeries */
61} 51 if (!firmware_has_feature(FW_FEATURE_ISERIES))
52 return pa;
62 53
63static inline unsigned long 54 chunk = addr_to_chunk(pa);
64abs_chunk(unsigned long pchunk)
65{
66 unsigned long offset = reloc_offset();
67 struct msChunks *_msChunks = PTRRELOC(&msChunks);
68 if ( pchunk >= _msChunks->num_chunks ) {
69 return pchunk;
70 }
71 return PTRRELOC(_msChunks->abs)[pchunk];
72}
73 55
74/* A macro so it can take pointers or unsigned long. */ 56 if (chunk < mschunks_map.num_chunks)
75#define phys_to_abs(pa) \ 57 chunk = mschunks_map.mapping[chunk];
76 ({ unsigned long _pa = (unsigned long)(pa); \
77 chunk_to_addr(abs_chunk(addr_to_chunk(_pa))) + chunk_offset(_pa); \
78 })
79 58
80static inline unsigned long 59 return chunk_to_addr(chunk) + (pa & MSCHUNKS_OFFSET_MASK);
81physRpn_to_absRpn(unsigned long rpn)
82{
83 unsigned long pa = rpn << PAGE_SHIFT;
84 unsigned long aa = phys_to_abs(pa);
85 return (aa >> PAGE_SHIFT);
86} 60}
87 61
88/* A macro so it can take pointers or unsigned long. */
89#define abs_to_phys(aa) lmb_abs_to_phys((unsigned long)(aa))
90
91#else /* !CONFIG_MSCHUNKS */
92
93#define chunk_to_addr(chunk) ((unsigned long)(chunk))
94#define addr_to_chunk(addr) (addr)
95#define chunk_offset(addr) (0)
96#define abs_chunk(pchunk) (pchunk)
97
98#define phys_to_abs(pa) (pa)
99#define physRpn_to_absRpn(rpn) (rpn)
100#define abs_to_phys(aa) (aa)
101
102#endif /* !CONFIG_MSCHUNKS */
103
104/* Convenience macros */ 62/* Convenience macros */
105#define virt_to_abs(va) phys_to_abs(__pa(va)) 63#define virt_to_abs(va) phys_to_abs(__pa(va))
106#define abs_to_virt(aa) __va(abs_to_phys(aa)) 64#define abs_to_virt(aa) __va(aa)
107 65
108#endif /* _ABS_ADDR_H */ 66#endif /* _ABS_ADDR_H */
diff --git a/include/asm-ppc64/agp.h b/include/asm-ppc64/agp.h
deleted file mode 100644
index ca9e423307f4..000000000000
--- a/include/asm-ppc64/agp.h
+++ /dev/null
@@ -1,23 +0,0 @@
1#ifndef AGP_H
2#define AGP_H 1
3
4#include <asm/io.h>
5
6/* nothing much needed here */
7
8#define map_page_into_agp(page)
9#define unmap_page_from_agp(page)
10#define flush_agp_mappings()
11#define flush_agp_cache() mb()
12
13/* Convert a physical address to an address suitable for the GART. */
14#define phys_to_gart(x) (x)
15#define gart_to_phys(x) (x)
16
17/* GATT allocation. Returns/accepts GATT kernel virtual address. */
18#define alloc_gatt_pages(order) \
19 ((char *)__get_free_pages(GFP_KERNEL, (order)))
20#define free_gatt_pages(table, order) \
21 free_pages((unsigned long)(table), (order))
22
23#endif
diff --git a/include/asm-ppc64/cputable.h b/include/asm-ppc64/cputable.h
index d67fa9e26079..ae6cf3830108 100644
--- a/include/asm-ppc64/cputable.h
+++ b/include/asm-ppc64/cputable.h
@@ -56,11 +56,6 @@ struct cpu_spec {
56 * BHT, SPD, etc... from head.S before branching to identify_machine 56 * BHT, SPD, etc... from head.S before branching to identify_machine
57 */ 57 */
58 cpu_setup_t cpu_setup; 58 cpu_setup_t cpu_setup;
59
60 /* This is used to identify firmware features which are available
61 * to the kernel.
62 */
63 unsigned long firmware_features;
64}; 59};
65 60
66extern struct cpu_spec cpu_specs[]; 61extern struct cpu_spec cpu_specs[];
@@ -71,39 +66,6 @@ static inline unsigned long cpu_has_feature(unsigned long feature)
71 return cur_cpu_spec->cpu_features & feature; 66 return cur_cpu_spec->cpu_features & feature;
72} 67}
73 68
74
75/* firmware feature bitmask values */
76#define FIRMWARE_MAX_FEATURES 63
77
78#define FW_FEATURE_PFT (1UL<<0)
79#define FW_FEATURE_TCE (1UL<<1)
80#define FW_FEATURE_SPRG0 (1UL<<2)
81#define FW_FEATURE_DABR (1UL<<3)
82#define FW_FEATURE_COPY (1UL<<4)
83#define FW_FEATURE_ASR (1UL<<5)
84#define FW_FEATURE_DEBUG (1UL<<6)
85#define FW_FEATURE_TERM (1UL<<7)
86#define FW_FEATURE_PERF (1UL<<8)
87#define FW_FEATURE_DUMP (1UL<<9)
88#define FW_FEATURE_INTERRUPT (1UL<<10)
89#define FW_FEATURE_MIGRATE (1UL<<11)
90#define FW_FEATURE_PERFMON (1UL<<12)
91#define FW_FEATURE_CRQ (1UL<<13)
92#define FW_FEATURE_VIO (1UL<<14)
93#define FW_FEATURE_RDMA (1UL<<15)
94#define FW_FEATURE_LLAN (1UL<<16)
95#define FW_FEATURE_BULK (1UL<<17)
96#define FW_FEATURE_XDABR (1UL<<18)
97#define FW_FEATURE_MULTITCE (1UL<<19)
98#define FW_FEATURE_SPLPAR (1UL<<20)
99
100typedef struct {
101 unsigned long val;
102 char * name;
103} firmware_feature_t;
104
105extern firmware_feature_t firmware_features_table[];
106
107#endif /* __ASSEMBLY__ */ 69#endif /* __ASSEMBLY__ */
108 70
109/* CPU kernel features */ 71/* CPU kernel features */
@@ -140,10 +102,8 @@ extern firmware_feature_t firmware_features_table[];
140#define CPU_FTR_MMCRA_SIHV ASM_CONST(0x0000080000000000) 102#define CPU_FTR_MMCRA_SIHV ASM_CONST(0x0000080000000000)
141#define CPU_FTR_CTRL ASM_CONST(0x0000100000000000) 103#define CPU_FTR_CTRL ASM_CONST(0x0000100000000000)
142 104
143/* Platform firmware features */
144#define FW_FTR_ ASM_CONST(0x0000000000000001)
145
146#ifndef __ASSEMBLY__ 105#ifndef __ASSEMBLY__
106
147#define COMMON_USER_PPC64 (PPC_FEATURE_32 | PPC_FEATURE_64 | \ 107#define COMMON_USER_PPC64 (PPC_FEATURE_32 | PPC_FEATURE_64 | \
148 PPC_FEATURE_HAS_FPU | PPC_FEATURE_HAS_MMU) 108 PPC_FEATURE_HAS_FPU | PPC_FEATURE_HAS_MMU)
149 109
@@ -156,10 +116,9 @@ extern firmware_feature_t firmware_features_table[];
156#define CPU_FTR_PPCAS_ARCH_V2 (CPU_FTR_PPCAS_ARCH_V2_BASE) 116#define CPU_FTR_PPCAS_ARCH_V2 (CPU_FTR_PPCAS_ARCH_V2_BASE)
157#else 117#else
158#define CPU_FTR_PPCAS_ARCH_V2 (CPU_FTR_PPCAS_ARCH_V2_BASE | CPU_FTR_16M_PAGE) 118#define CPU_FTR_PPCAS_ARCH_V2 (CPU_FTR_PPCAS_ARCH_V2_BASE | CPU_FTR_16M_PAGE)
159#endif 119#endif /* CONFIG_PPC_ISERIES */
160 120
161#define COMMON_PPC64_FW (0) 121#endif /* __ASSEMBLY */
162#endif
163 122
164#ifdef __ASSEMBLY__ 123#ifdef __ASSEMBLY__
165 124
diff --git a/include/asm-ppc64/cputime.h b/include/asm-ppc64/cputime.h
deleted file mode 100644
index 8e9faf5ce720..000000000000
--- a/include/asm-ppc64/cputime.h
+++ /dev/null
@@ -1,6 +0,0 @@
1#ifndef __PPC_CPUTIME_H
2#define __PPC_CPUTIME_H
3
4#include <asm-generic/cputime.h>
5
6#endif /* __PPC_CPUTIME_H */
diff --git a/include/asm-ppc64/div64.h b/include/asm-ppc64/div64.h
deleted file mode 100644
index 6cd978cefb28..000000000000
--- a/include/asm-ppc64/div64.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/div64.h>
diff --git a/include/asm-ppc64/emergency-restart.h b/include/asm-ppc64/emergency-restart.h
deleted file mode 100644
index 108d8c48e42e..000000000000
--- a/include/asm-ppc64/emergency-restart.h
+++ /dev/null
@@ -1,6 +0,0 @@
1#ifndef _ASM_EMERGENCY_RESTART_H
2#define _ASM_EMERGENCY_RESTART_H
3
4#include <asm-generic/emergency-restart.h>
5
6#endif /* _ASM_EMERGENCY_RESTART_H */
diff --git a/include/asm-ppc64/errno.h b/include/asm-ppc64/errno.h
deleted file mode 100644
index 69bc3b0c6cbe..000000000000
--- a/include/asm-ppc64/errno.h
+++ /dev/null
@@ -1,18 +0,0 @@
1#ifndef _PPC64_ERRNO_H
2#define _PPC64_ERRNO_H
3
4/*
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version
8 * 2 of the License, or (at your option) any later version.
9 */
10
11#include <asm-generic/errno.h>
12
13#undef EDEADLOCK
14#define EDEADLOCK 58 /* File locking deadlock error */
15
16#define _LAST_ERRNO 516
17
18#endif
diff --git a/include/asm-ppc64/firmware.h b/include/asm-ppc64/firmware.h
new file mode 100644
index 000000000000..22bb85cf60af
--- /dev/null
+++ b/include/asm-ppc64/firmware.h
@@ -0,0 +1,101 @@
1/*
2 * include/asm-ppc64/firmware.h
3 *
4 * Extracted from include/asm-ppc64/cputable.h
5 *
6 * Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org)
7 *
8 * Modifications for ppc64:
9 * Copyright (C) 2003 Dave Engebretsen <engebret@us.ibm.com>
10 *
11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public License
13 * as published by the Free Software Foundation; either version
14 * 2 of the License, or (at your option) any later version.
15 */
16#ifndef __ASM_PPC_FIRMWARE_H
17#define __ASM_PPC_FIRMWARE_H
18
19#ifdef __KERNEL__
20
21#ifndef __ASSEMBLY__
22
23/* firmware feature bitmask values */
24#define FIRMWARE_MAX_FEATURES 63
25
26#define FW_FEATURE_PFT (1UL<<0)
27#define FW_FEATURE_TCE (1UL<<1)
28#define FW_FEATURE_SPRG0 (1UL<<2)
29#define FW_FEATURE_DABR (1UL<<3)
30#define FW_FEATURE_COPY (1UL<<4)
31#define FW_FEATURE_ASR (1UL<<5)
32#define FW_FEATURE_DEBUG (1UL<<6)
33#define FW_FEATURE_TERM (1UL<<7)
34#define FW_FEATURE_PERF (1UL<<8)
35#define FW_FEATURE_DUMP (1UL<<9)
36#define FW_FEATURE_INTERRUPT (1UL<<10)
37#define FW_FEATURE_MIGRATE (1UL<<11)
38#define FW_FEATURE_PERFMON (1UL<<12)
39#define FW_FEATURE_CRQ (1UL<<13)
40#define FW_FEATURE_VIO (1UL<<14)
41#define FW_FEATURE_RDMA (1UL<<15)
42#define FW_FEATURE_LLAN (1UL<<16)
43#define FW_FEATURE_BULK (1UL<<17)
44#define FW_FEATURE_XDABR (1UL<<18)
45#define FW_FEATURE_MULTITCE (1UL<<19)
46#define FW_FEATURE_SPLPAR (1UL<<20)
47#define FW_FEATURE_ISERIES (1UL<<21)
48
49enum {
50 FW_FEATURE_PSERIES_POSSIBLE = FW_FEATURE_PFT | FW_FEATURE_TCE |
51 FW_FEATURE_SPRG0 | FW_FEATURE_DABR | FW_FEATURE_COPY |
52 FW_FEATURE_ASR | FW_FEATURE_DEBUG | FW_FEATURE_TERM |
53 FW_FEATURE_PERF | FW_FEATURE_DUMP | FW_FEATURE_INTERRUPT |
54 FW_FEATURE_MIGRATE | FW_FEATURE_PERFMON | FW_FEATURE_CRQ |
55 FW_FEATURE_VIO | FW_FEATURE_RDMA | FW_FEATURE_LLAN |
56 FW_FEATURE_BULK | FW_FEATURE_XDABR | FW_FEATURE_MULTITCE |
57 FW_FEATURE_SPLPAR,
58 FW_FEATURE_PSERIES_ALWAYS = 0,
59 FW_FEATURE_ISERIES_POSSIBLE = FW_FEATURE_ISERIES,
60 FW_FEATURE_ISERIES_ALWAYS = FW_FEATURE_ISERIES,
61 FW_FEATURE_POSSIBLE =
62#ifdef CONFIG_PPC_PSERIES
63 FW_FEATURE_PSERIES_POSSIBLE |
64#endif
65#ifdef CONFIG_PPC_ISERIES
66 FW_FEATURE_ISERIES_POSSIBLE |
67#endif
68 0,
69 FW_FEATURE_ALWAYS =
70#ifdef CONFIG_PPC_PSERIES
71 FW_FEATURE_PSERIES_ALWAYS &
72#endif
73#ifdef CONFIG_PPC_ISERIES
74 FW_FEATURE_ISERIES_ALWAYS &
75#endif
76 FW_FEATURE_POSSIBLE,
77};
78
79/* This is used to identify firmware features which are available
80 * to the kernel.
81 */
82extern unsigned long ppc64_firmware_features;
83
84static inline unsigned long firmware_has_feature(unsigned long feature)
85{
86 return (FW_FEATURE_ALWAYS & feature) ||
87 (FW_FEATURE_POSSIBLE & ppc64_firmware_features & feature);
88}
89
90#ifdef CONFIG_PPC_PSERIES
91typedef struct {
92 unsigned long val;
93 char * name;
94} firmware_feature_t;
95
96extern firmware_feature_t firmware_features_table[];
97#endif
98
99#endif /* __ASSEMBLY__ */
100#endif /* __KERNEL__ */
101#endif /* __ASM_PPC_FIRMWARE_H */
diff --git a/include/asm-ppc64/hdreg.h b/include/asm-ppc64/hdreg.h
deleted file mode 100644
index 7f7fd1af0af3..000000000000
--- a/include/asm-ppc64/hdreg.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/hdreg.h>
diff --git a/include/asm-ppc64/imalloc.h b/include/asm-ppc64/imalloc.h
index e46ff68a6e41..42adf7033a81 100644
--- a/include/asm-ppc64/imalloc.h
+++ b/include/asm-ppc64/imalloc.h
@@ -6,7 +6,7 @@
6 */ 6 */
7#define PHBS_IO_BASE VMALLOC_END 7#define PHBS_IO_BASE VMALLOC_END
8#define IMALLOC_BASE (PHBS_IO_BASE + 0x80000000ul) /* Reserve 2 gigs for PHBs */ 8#define IMALLOC_BASE (PHBS_IO_BASE + 0x80000000ul) /* Reserve 2 gigs for PHBs */
9#define IMALLOC_END (VMALLOC_START + EADDR_MASK) 9#define IMALLOC_END (VMALLOC_START + PGTABLE_RANGE)
10 10
11 11
12/* imalloc region types */ 12/* imalloc region types */
diff --git a/include/asm-ppc64/ioctl.h b/include/asm-ppc64/ioctl.h
deleted file mode 100644
index 42b8c5da7fbc..000000000000
--- a/include/asm-ppc64/ioctl.h
+++ /dev/null
@@ -1,74 +0,0 @@
1#ifndef _PPC64_IOCTL_H
2#define _PPC64_IOCTL_H
3
4
5/*
6 * This was copied from the alpha as it's a bit cleaner there.
7 * -- Cort
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation; either version
12 * 2 of the License, or (at your option) any later version.
13 */
14
15#define _IOC_NRBITS 8
16#define _IOC_TYPEBITS 8
17#define _IOC_SIZEBITS 13
18#define _IOC_DIRBITS 3
19
20#define _IOC_NRMASK ((1 << _IOC_NRBITS)-1)
21#define _IOC_TYPEMASK ((1 << _IOC_TYPEBITS)-1)
22#define _IOC_SIZEMASK ((1 << _IOC_SIZEBITS)-1)
23#define _IOC_DIRMASK ((1 << _IOC_DIRBITS)-1)
24
25#define _IOC_NRSHIFT 0
26#define _IOC_TYPESHIFT (_IOC_NRSHIFT+_IOC_NRBITS)
27#define _IOC_SIZESHIFT (_IOC_TYPESHIFT+_IOC_TYPEBITS)
28#define _IOC_DIRSHIFT (_IOC_SIZESHIFT+_IOC_SIZEBITS)
29
30/*
31 * Direction bits _IOC_NONE could be 0, but OSF/1 gives it a bit.
32 * And this turns out useful to catch old ioctl numbers in header
33 * files for us.
34 */
35#define _IOC_NONE 1U
36#define _IOC_READ 2U
37#define _IOC_WRITE 4U
38
39#define _IOC(dir,type,nr,size) \
40 (((dir) << _IOC_DIRSHIFT) | \
41 ((type) << _IOC_TYPESHIFT) | \
42 ((nr) << _IOC_NRSHIFT) | \
43 ((size) << _IOC_SIZESHIFT))
44
45/* provoke compile error for invalid uses of size argument */
46extern unsigned int __invalid_size_argument_for_IOC;
47#define _IOC_TYPECHECK(t) \
48 ((sizeof(t) == sizeof(t[1]) && \
49 sizeof(t) < (1 << _IOC_SIZEBITS)) ? \
50 sizeof(t) : __invalid_size_argument_for_IOC)
51
52/* used to create numbers */
53#define _IO(type,nr) _IOC(_IOC_NONE,(type),(nr),0)
54#define _IOR(type,nr,size) _IOC(_IOC_READ,(type),(nr),(_IOC_TYPECHECK(size)))
55#define _IOW(type,nr,size) _IOC(_IOC_WRITE,(type),(nr),(_IOC_TYPECHECK(size)))
56#define _IOWR(type,nr,size) _IOC(_IOC_READ|_IOC_WRITE,(type),(nr),(_IOC_TYPECHECK(size)))
57#define _IOR_BAD(type,nr,size) _IOC(_IOC_READ,(type),(nr),sizeof(size))
58#define _IOW_BAD(type,nr,size) _IOC(_IOC_WRITE,(type),(nr),sizeof(size))
59#define _IOWR_BAD(type,nr,size) _IOC(_IOC_READ|_IOC_WRITE,(type),(nr),sizeof(size))
60
61/* used to decode them.. */
62#define _IOC_DIR(nr) (((nr) >> _IOC_DIRSHIFT) & _IOC_DIRMASK)
63#define _IOC_TYPE(nr) (((nr) >> _IOC_TYPESHIFT) & _IOC_TYPEMASK)
64#define _IOC_NR(nr) (((nr) >> _IOC_NRSHIFT) & _IOC_NRMASK)
65#define _IOC_SIZE(nr) (((nr) >> _IOC_SIZESHIFT) & _IOC_SIZEMASK)
66
67/* various drivers, such as the pcmcia stuff, need these... */
68#define IOC_IN (_IOC_WRITE << _IOC_DIRSHIFT)
69#define IOC_OUT (_IOC_READ << _IOC_DIRSHIFT)
70#define IOC_INOUT ((_IOC_WRITE|_IOC_READ) << _IOC_DIRSHIFT)
71#define IOCSIZE_MASK (_IOC_SIZEMASK << _IOC_SIZESHIFT)
72#define IOCSIZE_SHIFT (_IOC_SIZESHIFT)
73
74#endif /* _PPC64_IOCTL_H */
diff --git a/include/asm-ppc64/ioctls.h b/include/asm-ppc64/ioctls.h
deleted file mode 100644
index 48796bf3e4fc..000000000000
--- a/include/asm-ppc64/ioctls.h
+++ /dev/null
@@ -1,114 +0,0 @@
1#ifndef _ASM_PPC64_IOCTLS_H
2#define _ASM_PPC64_IOCTLS_H
3
4/*
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version
8 * 2 of the License, or (at your option) any later version.
9 */
10
11#include <asm/ioctl.h>
12
13#define FIOCLEX _IO('f', 1)
14#define FIONCLEX _IO('f', 2)
15#define FIOASYNC _IOW('f', 125, int)
16#define FIONBIO _IOW('f', 126, int)
17#define FIONREAD _IOR('f', 127, int)
18#define TIOCINQ FIONREAD
19#define FIOQSIZE _IOR('f', 128, loff_t)
20
21#define TIOCGETP _IOR('t', 8, struct sgttyb)
22#define TIOCSETP _IOW('t', 9, struct sgttyb)
23#define TIOCSETN _IOW('t', 10, struct sgttyb) /* TIOCSETP wo flush */
24
25#define TIOCSETC _IOW('t', 17, struct tchars)
26#define TIOCGETC _IOR('t', 18, struct tchars)
27#define TCGETS _IOR('t', 19, struct termios)
28#define TCSETS _IOW('t', 20, struct termios)
29#define TCSETSW _IOW('t', 21, struct termios)
30#define TCSETSF _IOW('t', 22, struct termios)
31
32#define TCGETA _IOR('t', 23, struct termio)
33#define TCSETA _IOW('t', 24, struct termio)
34#define TCSETAW _IOW('t', 25, struct termio)
35#define TCSETAF _IOW('t', 28, struct termio)
36
37#define TCSBRK _IO('t', 29)
38#define TCXONC _IO('t', 30)
39#define TCFLSH _IO('t', 31)
40
41#define TIOCSWINSZ _IOW('t', 103, struct winsize)
42#define TIOCGWINSZ _IOR('t', 104, struct winsize)
43#define TIOCSTART _IO('t', 110) /* start output, like ^Q */
44#define TIOCSTOP _IO('t', 111) /* stop output, like ^S */
45#define TIOCOUTQ _IOR('t', 115, int) /* output queue size */
46
47#define TIOCGLTC _IOR('t', 116, struct ltchars)
48#define TIOCSLTC _IOW('t', 117, struct ltchars)
49#define TIOCSPGRP _IOW('t', 118, int)
50#define TIOCGPGRP _IOR('t', 119, int)
51
52#define TIOCEXCL 0x540C
53#define TIOCNXCL 0x540D
54#define TIOCSCTTY 0x540E
55
56#define TIOCSTI 0x5412
57#define TIOCMGET 0x5415
58#define TIOCMBIS 0x5416
59#define TIOCMBIC 0x5417
60#define TIOCMSET 0x5418
61# define TIOCM_LE 0x001
62# define TIOCM_DTR 0x002
63# define TIOCM_RTS 0x004
64# define TIOCM_ST 0x008
65# define TIOCM_SR 0x010
66# define TIOCM_CTS 0x020
67# define TIOCM_CAR 0x040
68# define TIOCM_RNG 0x080
69# define TIOCM_DSR 0x100
70# define TIOCM_CD TIOCM_CAR
71# define TIOCM_RI TIOCM_RNG
72
73#define TIOCGSOFTCAR 0x5419
74#define TIOCSSOFTCAR 0x541A
75#define TIOCLINUX 0x541C
76#define TIOCCONS 0x541D
77#define TIOCGSERIAL 0x541E
78#define TIOCSSERIAL 0x541F
79#define TIOCPKT 0x5420
80# define TIOCPKT_DATA 0
81# define TIOCPKT_FLUSHREAD 1
82# define TIOCPKT_FLUSHWRITE 2
83# define TIOCPKT_STOP 4
84# define TIOCPKT_START 8
85# define TIOCPKT_NOSTOP 16
86# define TIOCPKT_DOSTOP 32
87
88
89#define TIOCNOTTY 0x5422
90#define TIOCSETD 0x5423
91#define TIOCGETD 0x5424
92#define TCSBRKP 0x5425 /* Needed for POSIX tcsendbreak() */
93#define TIOCSBRK 0x5427 /* BSD compatibility */
94#define TIOCCBRK 0x5428 /* BSD compatibility */
95#define TIOCGSID 0x5429 /* Return the session ID of FD */
96#define TIOCGPTN _IOR('T',0x30, unsigned int) /* Get Pty Number (of pty-mux device) */
97#define TIOCSPTLCK _IOW('T',0x31, int) /* Lock/unlock Pty */
98
99#define TIOCSERCONFIG 0x5453
100#define TIOCSERGWILD 0x5454
101#define TIOCSERSWILD 0x5455
102#define TIOCGLCKTRMIOS 0x5456
103#define TIOCSLCKTRMIOS 0x5457
104#define TIOCSERGSTRUCT 0x5458 /* For debugging only */
105#define TIOCSERGETLSR 0x5459 /* Get line status register */
106 /* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */
107# define TIOCSER_TEMT 0x01 /* Transmitter physically empty */
108#define TIOCSERGETMULTI 0x545A /* Get multiport config */
109#define TIOCSERSETMULTI 0x545B /* Set multiport config */
110
111#define TIOCMIWAIT 0x545C /* wait for a change on serial input line(s) */
112#define TIOCGICOUNT 0x545D /* read serial port inline interrupt counts */
113
114#endif /* _ASM_PPC64_IOCTLS_H */
diff --git a/include/asm-ppc64/iommu.h b/include/asm-ppc64/iommu.h
index 729de5cc21d9..72dcf8116b04 100644
--- a/include/asm-ppc64/iommu.h
+++ b/include/asm-ppc64/iommu.h
@@ -104,9 +104,6 @@ extern void iommu_devnode_init_pSeries(struct device_node *dn);
104 104
105#ifdef CONFIG_PPC_ISERIES 105#ifdef CONFIG_PPC_ISERIES
106 106
107/* Initializes tables for bio buses */
108extern void __init iommu_vio_init(void);
109
110struct iSeries_Device_Node; 107struct iSeries_Device_Node;
111/* Creates table for an individual device node */ 108/* Creates table for an individual device node */
112extern void iommu_devnode_init_iSeries(struct iSeries_Device_Node *dn); 109extern void iommu_devnode_init_iSeries(struct iSeries_Device_Node *dn);
diff --git a/include/asm-ppc64/ipc.h b/include/asm-ppc64/ipc.h
deleted file mode 100644
index a46e3d9c2a3f..000000000000
--- a/include/asm-ppc64/ipc.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/ipc.h>
diff --git a/include/asm-ppc64/linkage.h b/include/asm-ppc64/linkage.h
deleted file mode 100644
index 291c2d01c44f..000000000000
--- a/include/asm-ppc64/linkage.h
+++ /dev/null
@@ -1,6 +0,0 @@
1#ifndef __ASM_LINKAGE_H
2#define __ASM_LINKAGE_H
3
4/* Nothing to see here... */
5
6#endif
diff --git a/include/asm-ppc64/lmb.h b/include/asm-ppc64/lmb.h
index a6cbca21ac1d..cb368bf0f264 100644
--- a/include/asm-ppc64/lmb.h
+++ b/include/asm-ppc64/lmb.h
@@ -22,7 +22,6 @@
22 22
23struct lmb_property { 23struct lmb_property {
24 unsigned long base; 24 unsigned long base;
25 unsigned long physbase;
26 unsigned long size; 25 unsigned long size;
27}; 26};
28 27
diff --git a/include/asm-ppc64/machdep.h b/include/asm-ppc64/machdep.h
index f0ef06375947..ff2c9287d3b6 100644
--- a/include/asm-ppc64/machdep.h
+++ b/include/asm-ppc64/machdep.h
@@ -140,6 +140,9 @@ struct machdep_calls {
140 140
141 /* Idle loop for this platform, leave empty for default idle loop */ 141 /* Idle loop for this platform, leave empty for default idle loop */
142 int (*idle_loop)(void); 142 int (*idle_loop)(void);
143
144 /* Function to enable pmcs for this platform, called once per cpu. */
145 void (*enable_pmcs)(void);
143}; 146};
144 147
145extern int default_idle(void); 148extern int default_idle(void);
diff --git a/include/asm-ppc64/mmu.h b/include/asm-ppc64/mmu.h
index 70348a851313..ad36bb28de29 100644
--- a/include/asm-ppc64/mmu.h
+++ b/include/asm-ppc64/mmu.h
@@ -28,9 +28,12 @@
28#define STE_VSID_SHIFT 12 28#define STE_VSID_SHIFT 12
29 29
30/* Location of cpu0's segment table */ 30/* Location of cpu0's segment table */
31#define STAB0_PAGE 0x9 31#define STAB0_PAGE 0x6
32#define STAB0_PHYS_ADDR (STAB0_PAGE<<PAGE_SHIFT) 32#define STAB0_PHYS_ADDR (STAB0_PAGE<<PAGE_SHIFT)
33#define STAB0_VIRT_ADDR (KERNELBASE+STAB0_PHYS_ADDR) 33
34#ifndef __ASSEMBLY__
35extern char initial_stab[];
36#endif /* ! __ASSEMBLY */
34 37
35/* 38/*
36 * SLB 39 * SLB
@@ -259,8 +262,10 @@ extern void stabs_alloc(void);
259#define VSID_BITS 36 262#define VSID_BITS 36
260#define VSID_MODULUS ((1UL<<VSID_BITS)-1) 263#define VSID_MODULUS ((1UL<<VSID_BITS)-1)
261 264
262#define CONTEXT_BITS 20 265#define CONTEXT_BITS 19
263#define USER_ESID_BITS 15 266#define USER_ESID_BITS 16
267
268#define USER_VSID_RANGE (1UL << (USER_ESID_BITS + SID_SHIFT))
264 269
265/* 270/*
266 * This macro generates asm code to compute the VSID scramble 271 * This macro generates asm code to compute the VSID scramble
@@ -302,8 +307,7 @@ typedef unsigned long mm_context_id_t;
302typedef struct { 307typedef struct {
303 mm_context_id_t id; 308 mm_context_id_t id;
304#ifdef CONFIG_HUGETLB_PAGE 309#ifdef CONFIG_HUGETLB_PAGE
305 pgd_t *huge_pgdir; 310 u16 low_htlb_areas, high_htlb_areas;
306 u16 htlb_segs; /* bitmask */
307#endif 311#endif
308} mm_context_t; 312} mm_context_t;
309 313
diff --git a/include/asm-ppc64/naca.h b/include/asm-ppc64/naca.h
index bfb7caa32eaf..d2afe6447597 100644
--- a/include/asm-ppc64/naca.h
+++ b/include/asm-ppc64/naca.h
@@ -12,8 +12,6 @@
12 12
13#include <asm/types.h> 13#include <asm/types.h>
14 14
15#ifndef __ASSEMBLY__
16
17struct naca_struct { 15struct naca_struct {
18 /* Kernel only data - undefined for user space */ 16 /* Kernel only data - undefined for user space */
19 void *xItVpdAreas; /* VPD Data 0x00 */ 17 void *xItVpdAreas; /* VPD Data 0x00 */
@@ -23,9 +21,4 @@ struct naca_struct {
23 21
24extern struct naca_struct naca; 22extern struct naca_struct naca;
25 23
26#endif /* __ASSEMBLY__ */
27
28#define NACA_PAGE 0x4
29#define NACA_PHYS_ADDR (NACA_PAGE<<PAGE_SHIFT)
30
31#endif /* _NACA_H */ 24#endif /* _NACA_H */
diff --git a/include/asm-ppc64/namei.h b/include/asm-ppc64/namei.h
deleted file mode 100644
index a1412a2d102a..000000000000
--- a/include/asm-ppc64/namei.h
+++ /dev/null
@@ -1,23 +0,0 @@
1/*
2 * linux/include/asm-ppc/namei.h
3 * Adapted from linux/include/asm-alpha/namei.h
4 *
5 * Included from linux/fs/namei.c
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 */
12
13#ifndef __PPC64_NAMEI_H
14#define __PPC64_NAMEI_H
15
16/* This dummy routine maybe changed to something useful
17 * for /usr/gnemul/ emulation stuff.
18 * Look at asm-sparc/namei.h for details.
19 */
20
21#define __emul_prefix() NULL
22
23#endif /* __PPC64_NAMEI_H */
diff --git a/include/asm-ppc64/page.h b/include/asm-ppc64/page.h
index a5893a305a09..a79a08df62bd 100644
--- a/include/asm-ppc64/page.h
+++ b/include/asm-ppc64/page.h
@@ -37,39 +37,45 @@
37 37
38#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) 38#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
39 39
40/* For 64-bit processes the hugepage range is 1T-1.5T */ 40#define HTLB_AREA_SHIFT 40
41#define TASK_HPAGE_BASE ASM_CONST(0x0000010000000000) 41#define HTLB_AREA_SIZE (1UL << HTLB_AREA_SHIFT)
42#define TASK_HPAGE_END ASM_CONST(0x0000018000000000) 42#define GET_HTLB_AREA(x) ((x) >> HTLB_AREA_SHIFT)
43 43
44#define LOW_ESID_MASK(addr, len) (((1U << (GET_ESID(addr+len-1)+1)) \ 44#define LOW_ESID_MASK(addr, len) (((1U << (GET_ESID(addr+len-1)+1)) \
45 - (1U << GET_ESID(addr))) & 0xffff) 45 - (1U << GET_ESID(addr))) & 0xffff)
46#define HTLB_AREA_MASK(addr, len) (((1U << (GET_HTLB_AREA(addr+len-1)+1)) \
47 - (1U << GET_HTLB_AREA(addr))) & 0xffff)
46 48
47#define ARCH_HAS_HUGEPAGE_ONLY_RANGE 49#define ARCH_HAS_HUGEPAGE_ONLY_RANGE
48#define ARCH_HAS_PREPARE_HUGEPAGE_RANGE 50#define ARCH_HAS_PREPARE_HUGEPAGE_RANGE
51#define ARCH_HAS_SETCLEAR_HUGE_PTE
49 52
50#define touches_hugepage_low_range(mm, addr, len) \ 53#define touches_hugepage_low_range(mm, addr, len) \
51 (LOW_ESID_MASK((addr), (len)) & mm->context.htlb_segs) 54 (LOW_ESID_MASK((addr), (len)) & (mm)->context.low_htlb_areas)
52#define touches_hugepage_high_range(addr, len) \ 55#define touches_hugepage_high_range(mm, addr, len) \
53 (((addr) > (TASK_HPAGE_BASE-(len))) && ((addr) < TASK_HPAGE_END)) 56 (HTLB_AREA_MASK((addr), (len)) & (mm)->context.high_htlb_areas)
54 57
55#define __within_hugepage_low_range(addr, len, segmask) \ 58#define __within_hugepage_low_range(addr, len, segmask) \
56 ((LOW_ESID_MASK((addr), (len)) | (segmask)) == (segmask)) 59 ((LOW_ESID_MASK((addr), (len)) | (segmask)) == (segmask))
57#define within_hugepage_low_range(addr, len) \ 60#define within_hugepage_low_range(addr, len) \
58 __within_hugepage_low_range((addr), (len), \ 61 __within_hugepage_low_range((addr), (len), \
59 current->mm->context.htlb_segs) 62 current->mm->context.low_htlb_areas)
60#define within_hugepage_high_range(addr, len) (((addr) >= TASK_HPAGE_BASE) \ 63#define __within_hugepage_high_range(addr, len, zonemask) \
61 && ((addr)+(len) <= TASK_HPAGE_END) && ((addr)+(len) >= (addr))) 64 ((HTLB_AREA_MASK((addr), (len)) | (zonemask)) == (zonemask))
65#define within_hugepage_high_range(addr, len) \
66 __within_hugepage_high_range((addr), (len), \
67 current->mm->context.high_htlb_areas)
62 68
63#define is_hugepage_only_range(mm, addr, len) \ 69#define is_hugepage_only_range(mm, addr, len) \
64 (touches_hugepage_high_range((addr), (len)) || \ 70 (touches_hugepage_high_range((mm), (addr), (len)) || \
65 touches_hugepage_low_range((mm), (addr), (len))) 71 touches_hugepage_low_range((mm), (addr), (len)))
66#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA 72#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
67 73
68#define in_hugepage_area(context, addr) \ 74#define in_hugepage_area(context, addr) \
69 (cpu_has_feature(CPU_FTR_16M_PAGE) && \ 75 (cpu_has_feature(CPU_FTR_16M_PAGE) && \
70 ( (((addr) >= TASK_HPAGE_BASE) && ((addr) < TASK_HPAGE_END)) || \ 76 ( ((1 << GET_HTLB_AREA(addr)) & (context).high_htlb_areas) || \
71 ( ((addr) < 0x100000000L) && \ 77 ( ((addr) < 0x100000000L) && \
72 ((1 << GET_ESID(addr)) & (context).htlb_segs) ) ) ) 78 ((1 << GET_ESID(addr)) & (context).low_htlb_areas) ) ) )
73 79
74#else /* !CONFIG_HUGETLB_PAGE */ 80#else /* !CONFIG_HUGETLB_PAGE */
75 81
@@ -125,36 +131,42 @@ extern void copy_user_page(void *to, void *from, unsigned long vaddr, struct pag
125 * Entries in the pte table are 64b, while entries in the pgd & pmd are 32b. 131 * Entries in the pte table are 64b, while entries in the pgd & pmd are 32b.
126 */ 132 */
127typedef struct { unsigned long pte; } pte_t; 133typedef struct { unsigned long pte; } pte_t;
128typedef struct { unsigned int pmd; } pmd_t; 134typedef struct { unsigned long pmd; } pmd_t;
129typedef struct { unsigned int pgd; } pgd_t; 135typedef struct { unsigned long pud; } pud_t;
136typedef struct { unsigned long pgd; } pgd_t;
130typedef struct { unsigned long pgprot; } pgprot_t; 137typedef struct { unsigned long pgprot; } pgprot_t;
131 138
132#define pte_val(x) ((x).pte) 139#define pte_val(x) ((x).pte)
133#define pmd_val(x) ((x).pmd) 140#define pmd_val(x) ((x).pmd)
141#define pud_val(x) ((x).pud)
134#define pgd_val(x) ((x).pgd) 142#define pgd_val(x) ((x).pgd)
135#define pgprot_val(x) ((x).pgprot) 143#define pgprot_val(x) ((x).pgprot)
136 144
137#define __pte(x) ((pte_t) { (x) } ) 145#define __pte(x) ((pte_t) { (x) })
138#define __pmd(x) ((pmd_t) { (x) } ) 146#define __pmd(x) ((pmd_t) { (x) })
139#define __pgd(x) ((pgd_t) { (x) } ) 147#define __pud(x) ((pud_t) { (x) })
140#define __pgprot(x) ((pgprot_t) { (x) } ) 148#define __pgd(x) ((pgd_t) { (x) })
149#define __pgprot(x) ((pgprot_t) { (x) })
141 150
142#else 151#else
143/* 152/*
144 * .. while these make it easier on the compiler 153 * .. while these make it easier on the compiler
145 */ 154 */
146typedef unsigned long pte_t; 155typedef unsigned long pte_t;
147typedef unsigned int pmd_t; 156typedef unsigned long pmd_t;
148typedef unsigned int pgd_t; 157typedef unsigned long pud_t;
158typedef unsigned long pgd_t;
149typedef unsigned long pgprot_t; 159typedef unsigned long pgprot_t;
150 160
151#define pte_val(x) (x) 161#define pte_val(x) (x)
152#define pmd_val(x) (x) 162#define pmd_val(x) (x)
163#define pud_val(x) (x)
153#define pgd_val(x) (x) 164#define pgd_val(x) (x)
154#define pgprot_val(x) (x) 165#define pgprot_val(x) (x)
155 166
156#define __pte(x) (x) 167#define __pte(x) (x)
157#define __pmd(x) (x) 168#define __pmd(x) (x)
169#define __pud(x) (x)
158#define __pgd(x) (x) 170#define __pgd(x) (x)
159#define __pgprot(x) (x) 171#define __pgprot(x) (x)
160 172
@@ -208,9 +220,6 @@ extern u64 ppc64_pft_size; /* Log 2 of page table size */
208#define USER_REGION_ID (0UL) 220#define USER_REGION_ID (0UL)
209#define REGION_ID(ea) (((unsigned long)(ea)) >> REGION_SHIFT) 221#define REGION_ID(ea) (((unsigned long)(ea)) >> REGION_SHIFT)
210 222
211#define __bpn_to_ba(x) ((((unsigned long)(x)) << PAGE_SHIFT) + KERNELBASE)
212#define __ba_to_bpn(x) ((((unsigned long)(x)) & ~REGION_MASK) >> PAGE_SHIFT)
213
214#define __va(x) ((void *)((unsigned long)(x) + KERNELBASE)) 223#define __va(x) ((void *)((unsigned long)(x) + KERNELBASE))
215 224
216#ifdef CONFIG_DISCONTIGMEM 225#ifdef CONFIG_DISCONTIGMEM
diff --git a/include/asm-ppc64/param.h b/include/asm-ppc64/param.h
index 1fad38dcf707..76c212d475b3 100644
--- a/include/asm-ppc64/param.h
+++ b/include/asm-ppc64/param.h
@@ -1,6 +1,8 @@
1#ifndef _ASM_PPC64_PARAM_H 1#ifndef _ASM_PPC64_PARAM_H
2#define _ASM_PPC64_PARAM_H 2#define _ASM_PPC64_PARAM_H
3 3
4#include <linux/config.h>
5
4/* 6/*
5 * This program is free software; you can redistribute it and/or 7 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License 8 * modify it under the terms of the GNU General Public License
@@ -9,7 +11,7 @@
9 */ 11 */
10 12
11#ifdef __KERNEL__ 13#ifdef __KERNEL__
12# define HZ 1000 /* Internal kernel timer frequency */ 14# define HZ CONFIG_HZ /* Internal kernel timer frequency */
13# define USER_HZ 100 /* .. some user interfaces are in "ticks" */ 15# define USER_HZ 100 /* .. some user interfaces are in "ticks" */
14# define CLOCKS_PER_SEC (USER_HZ) /* like times() */ 16# define CLOCKS_PER_SEC (USER_HZ) /* like times() */
15#endif 17#endif
diff --git a/include/asm-ppc64/percpu.h b/include/asm-ppc64/percpu.h
deleted file mode 100644
index 60a659a4ce1f..000000000000
--- a/include/asm-ppc64/percpu.h
+++ /dev/null
@@ -1,6 +0,0 @@
1#ifndef __ARCH_PPC64_PERCPU__
2#define __ARCH_PPC64_PERCPU__
3
4#include <asm-generic/percpu.h>
5
6#endif /* __ARCH_PPC64_PERCPU__ */
diff --git a/include/asm-ppc64/pgalloc.h b/include/asm-ppc64/pgalloc.h
index 4fc4b739b380..26bc49c1108d 100644
--- a/include/asm-ppc64/pgalloc.h
+++ b/include/asm-ppc64/pgalloc.h
@@ -6,7 +6,12 @@
6#include <linux/cpumask.h> 6#include <linux/cpumask.h>
7#include <linux/percpu.h> 7#include <linux/percpu.h>
8 8
9extern kmem_cache_t *zero_cache; 9extern kmem_cache_t *pgtable_cache[];
10
11#define PTE_CACHE_NUM 0
12#define PMD_CACHE_NUM 1
13#define PUD_CACHE_NUM 1
14#define PGD_CACHE_NUM 0
10 15
11/* 16/*
12 * This program is free software; you can redistribute it and/or 17 * This program is free software; you can redistribute it and/or
@@ -15,30 +20,40 @@ extern kmem_cache_t *zero_cache;
15 * 2 of the License, or (at your option) any later version. 20 * 2 of the License, or (at your option) any later version.
16 */ 21 */
17 22
18static inline pgd_t * 23static inline pgd_t *pgd_alloc(struct mm_struct *mm)
19pgd_alloc(struct mm_struct *mm)
20{ 24{
21 return kmem_cache_alloc(zero_cache, GFP_KERNEL); 25 return kmem_cache_alloc(pgtable_cache[PGD_CACHE_NUM], GFP_KERNEL);
22} 26}
23 27
24static inline void 28static inline void pgd_free(pgd_t *pgd)
25pgd_free(pgd_t *pgd)
26{ 29{
27 kmem_cache_free(zero_cache, pgd); 30 kmem_cache_free(pgtable_cache[PGD_CACHE_NUM], pgd);
31}
32
33#define pgd_populate(MM, PGD, PUD) pgd_set(PGD, PUD)
34
35static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
36{
37 return kmem_cache_alloc(pgtable_cache[PUD_CACHE_NUM],
38 GFP_KERNEL|__GFP_REPEAT);
39}
40
41static inline void pud_free(pud_t *pud)
42{
43 kmem_cache_free(pgtable_cache[PUD_CACHE_NUM], pud);
28} 44}
29 45
30#define pud_populate(MM, PUD, PMD) pud_set(PUD, PMD) 46#define pud_populate(MM, PUD, PMD) pud_set(PUD, PMD)
31 47
32static inline pmd_t * 48static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
33pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
34{ 49{
35 return kmem_cache_alloc(zero_cache, GFP_KERNEL|__GFP_REPEAT); 50 return kmem_cache_alloc(pgtable_cache[PMD_CACHE_NUM],
51 GFP_KERNEL|__GFP_REPEAT);
36} 52}
37 53
38static inline void 54static inline void pmd_free(pmd_t *pmd)
39pmd_free(pmd_t *pmd)
40{ 55{
41 kmem_cache_free(zero_cache, pmd); 56 kmem_cache_free(pgtable_cache[PMD_CACHE_NUM], pmd);
42} 57}
43 58
44#define pmd_populate_kernel(mm, pmd, pte) pmd_set(pmd, pte) 59#define pmd_populate_kernel(mm, pmd, pte) pmd_set(pmd, pte)
@@ -47,44 +62,58 @@ pmd_free(pmd_t *pmd)
47 62
48static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) 63static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
49{ 64{
50 return kmem_cache_alloc(zero_cache, GFP_KERNEL|__GFP_REPEAT); 65 return kmem_cache_alloc(pgtable_cache[PTE_CACHE_NUM],
66 GFP_KERNEL|__GFP_REPEAT);
51} 67}
52 68
53static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) 69static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
54{ 70{
55 pte_t *pte = kmem_cache_alloc(zero_cache, GFP_KERNEL|__GFP_REPEAT); 71 return virt_to_page(pte_alloc_one_kernel(mm, address));
56 if (pte)
57 return virt_to_page(pte);
58 return NULL;
59} 72}
60 73
61static inline void pte_free_kernel(pte_t *pte) 74static inline void pte_free_kernel(pte_t *pte)
62{ 75{
63 kmem_cache_free(zero_cache, pte); 76 kmem_cache_free(pgtable_cache[PTE_CACHE_NUM], pte);
64} 77}
65 78
66static inline void pte_free(struct page *ptepage) 79static inline void pte_free(struct page *ptepage)
67{ 80{
68 kmem_cache_free(zero_cache, page_address(ptepage)); 81 pte_free_kernel(page_address(ptepage));
69} 82}
70 83
71struct pte_freelist_batch 84#define PGF_CACHENUM_MASK 0xf
85
86typedef struct pgtable_free {
87 unsigned long val;
88} pgtable_free_t;
89
90static inline pgtable_free_t pgtable_free_cache(void *p, int cachenum,
91 unsigned long mask)
72{ 92{
73 struct rcu_head rcu; 93 BUG_ON(cachenum > PGF_CACHENUM_MASK);
74 unsigned int index;
75 struct page * pages[0];
76};
77 94
78#define PTE_FREELIST_SIZE ((PAGE_SIZE - sizeof(struct pte_freelist_batch)) / \ 95 return (pgtable_free_t){.val = ((unsigned long) p & ~mask) | cachenum};
79 sizeof(struct page *)) 96}
80 97
81extern void pte_free_now(struct page *ptepage); 98static inline void pgtable_free(pgtable_free_t pgf)
82extern void pte_free_submit(struct pte_freelist_batch *batch); 99{
100 void *p = (void *)(pgf.val & ~PGF_CACHENUM_MASK);
101 int cachenum = pgf.val & PGF_CACHENUM_MASK;
83 102
84DECLARE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur); 103 kmem_cache_free(pgtable_cache[cachenum], p);
104}
85 105
86void __pte_free_tlb(struct mmu_gather *tlb, struct page *ptepage); 106void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf);
87#define __pmd_free_tlb(tlb, pmd) __pte_free_tlb(tlb, virt_to_page(pmd)) 107
108#define __pte_free_tlb(tlb, ptepage) \
109 pgtable_free_tlb(tlb, pgtable_free_cache(page_address(ptepage), \
110 PTE_CACHE_NUM, PTE_TABLE_SIZE-1))
111#define __pmd_free_tlb(tlb, pmd) \
112 pgtable_free_tlb(tlb, pgtable_free_cache(pmd, \
113 PMD_CACHE_NUM, PMD_TABLE_SIZE-1))
114#define __pud_free_tlb(tlb, pmd) \
115 pgtable_free_tlb(tlb, pgtable_free_cache(pud, \
116 PUD_CACHE_NUM, PUD_TABLE_SIZE-1))
88 117
89#define check_pgt_cache() do { } while (0) 118#define check_pgt_cache() do { } while (0)
90 119
diff --git a/include/asm-ppc64/pgtable.h b/include/asm-ppc64/pgtable.h
index 46cf61c2ff69..c83679c9d2b0 100644
--- a/include/asm-ppc64/pgtable.h
+++ b/include/asm-ppc64/pgtable.h
@@ -15,19 +15,24 @@
15#include <asm/tlbflush.h> 15#include <asm/tlbflush.h>
16#endif /* __ASSEMBLY__ */ 16#endif /* __ASSEMBLY__ */
17 17
18#include <asm-generic/pgtable-nopud.h>
19
20/* 18/*
21 * Entries per page directory level. The PTE level must use a 64b record 19 * Entries per page directory level. The PTE level must use a 64b record
22 * for each page table entry. The PMD and PGD level use a 32b record for 20 * for each page table entry. The PMD and PGD level use a 32b record for
23 * each entry by assuming that each entry is page aligned. 21 * each entry by assuming that each entry is page aligned.
24 */ 22 */
25#define PTE_INDEX_SIZE 9 23#define PTE_INDEX_SIZE 9
26#define PMD_INDEX_SIZE 10 24#define PMD_INDEX_SIZE 7
27#define PGD_INDEX_SIZE 10 25#define PUD_INDEX_SIZE 7
26#define PGD_INDEX_SIZE 9
27
28#define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE)
29#define PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE)
30#define PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE)
31#define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE)
28 32
29#define PTRS_PER_PTE (1 << PTE_INDEX_SIZE) 33#define PTRS_PER_PTE (1 << PTE_INDEX_SIZE)
30#define PTRS_PER_PMD (1 << PMD_INDEX_SIZE) 34#define PTRS_PER_PMD (1 << PMD_INDEX_SIZE)
35#define PTRS_PER_PUD (1 << PMD_INDEX_SIZE)
31#define PTRS_PER_PGD (1 << PGD_INDEX_SIZE) 36#define PTRS_PER_PGD (1 << PGD_INDEX_SIZE)
32 37
33/* PMD_SHIFT determines what a second-level page table entry can map */ 38/* PMD_SHIFT determines what a second-level page table entry can map */
@@ -35,8 +40,13 @@
35#define PMD_SIZE (1UL << PMD_SHIFT) 40#define PMD_SIZE (1UL << PMD_SHIFT)
36#define PMD_MASK (~(PMD_SIZE-1)) 41#define PMD_MASK (~(PMD_SIZE-1))
37 42
38/* PGDIR_SHIFT determines what a third-level page table entry can map */ 43/* PUD_SHIFT determines what a third-level page table entry can map */
39#define PGDIR_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE) 44#define PUD_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE)
45#define PUD_SIZE (1UL << PUD_SHIFT)
46#define PUD_MASK (~(PUD_SIZE-1))
47
48/* PGDIR_SHIFT determines what a fourth-level page table entry can map */
49#define PGDIR_SHIFT (PUD_SHIFT + PUD_INDEX_SIZE)
40#define PGDIR_SIZE (1UL << PGDIR_SHIFT) 50#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
41#define PGDIR_MASK (~(PGDIR_SIZE-1)) 51#define PGDIR_MASK (~(PGDIR_SIZE-1))
42 52
@@ -45,15 +55,23 @@
45/* 55/*
46 * Size of EA range mapped by our pagetables. 56 * Size of EA range mapped by our pagetables.
47 */ 57 */
48#define EADDR_SIZE (PTE_INDEX_SIZE + PMD_INDEX_SIZE + \ 58#define PGTABLE_EADDR_SIZE (PTE_INDEX_SIZE + PMD_INDEX_SIZE + \
49 PGD_INDEX_SIZE + PAGE_SHIFT) 59 PUD_INDEX_SIZE + PGD_INDEX_SIZE + PAGE_SHIFT)
50#define EADDR_MASK ((1UL << EADDR_SIZE) - 1) 60#define PGTABLE_RANGE (1UL << PGTABLE_EADDR_SIZE)
61
62#if TASK_SIZE_USER64 > PGTABLE_RANGE
63#error TASK_SIZE_USER64 exceeds pagetable range
64#endif
65
66#if TASK_SIZE_USER64 > (1UL << (USER_ESID_BITS + SID_SHIFT))
67#error TASK_SIZE_USER64 exceeds user VSID range
68#endif
51 69
52/* 70/*
53 * Define the address range of the vmalloc VM area. 71 * Define the address range of the vmalloc VM area.
54 */ 72 */
55#define VMALLOC_START (0xD000000000000000ul) 73#define VMALLOC_START (0xD000000000000000ul)
56#define VMALLOC_SIZE (0x10000000000UL) 74#define VMALLOC_SIZE (0x80000000000UL)
57#define VMALLOC_END (VMALLOC_START + VMALLOC_SIZE) 75#define VMALLOC_END (VMALLOC_START + VMALLOC_SIZE)
58 76
59/* 77/*
@@ -154,8 +172,6 @@ extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)];
154#ifndef __ASSEMBLY__ 172#ifndef __ASSEMBLY__
155int hash_huge_page(struct mm_struct *mm, unsigned long access, 173int hash_huge_page(struct mm_struct *mm, unsigned long access,
156 unsigned long ea, unsigned long vsid, int local); 174 unsigned long ea, unsigned long vsid, int local);
157
158void hugetlb_mm_free_pgd(struct mm_struct *mm);
159#endif /* __ASSEMBLY__ */ 175#endif /* __ASSEMBLY__ */
160 176
161#define HAVE_ARCH_UNMAPPED_AREA 177#define HAVE_ARCH_UNMAPPED_AREA
@@ -163,7 +179,6 @@ void hugetlb_mm_free_pgd(struct mm_struct *mm);
163#else 179#else
164 180
165#define hash_huge_page(mm,a,ea,vsid,local) -1 181#define hash_huge_page(mm,a,ea,vsid,local) -1
166#define hugetlb_mm_free_pgd(mm) do {} while (0)
167 182
168#endif 183#endif
169 184
@@ -197,39 +212,45 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot)
197#define pte_pfn(x) ((unsigned long)((pte_val(x) >> PTE_SHIFT))) 212#define pte_pfn(x) ((unsigned long)((pte_val(x) >> PTE_SHIFT)))
198#define pte_page(x) pfn_to_page(pte_pfn(x)) 213#define pte_page(x) pfn_to_page(pte_pfn(x))
199 214
200#define pmd_set(pmdp, ptep) \ 215#define pmd_set(pmdp, ptep) ({BUG_ON((u64)ptep < KERNELBASE); pmd_val(*(pmdp)) = (unsigned long)(ptep);})
201 (pmd_val(*(pmdp)) = __ba_to_bpn(ptep))
202#define pmd_none(pmd) (!pmd_val(pmd)) 216#define pmd_none(pmd) (!pmd_val(pmd))
203#define pmd_bad(pmd) (pmd_val(pmd) == 0) 217#define pmd_bad(pmd) (pmd_val(pmd) == 0)
204#define pmd_present(pmd) (pmd_val(pmd) != 0) 218#define pmd_present(pmd) (pmd_val(pmd) != 0)
205#define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0) 219#define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0)
206#define pmd_page_kernel(pmd) (__bpn_to_ba(pmd_val(pmd))) 220#define pmd_page_kernel(pmd) (pmd_val(pmd))
207#define pmd_page(pmd) virt_to_page(pmd_page_kernel(pmd)) 221#define pmd_page(pmd) virt_to_page(pmd_page_kernel(pmd))
208 222
209#define pud_set(pudp, pmdp) (pud_val(*(pudp)) = (__ba_to_bpn(pmdp))) 223#define pud_set(pudp, pmdp) (pud_val(*(pudp)) = (unsigned long)(pmdp))
210#define pud_none(pud) (!pud_val(pud)) 224#define pud_none(pud) (!pud_val(pud))
211#define pud_bad(pud) ((pud_val(pud)) == 0UL) 225#define pud_bad(pud) ((pud_val(pud)) == 0)
212#define pud_present(pud) (pud_val(pud) != 0UL) 226#define pud_present(pud) (pud_val(pud) != 0)
213#define pud_clear(pudp) (pud_val(*(pudp)) = 0UL) 227#define pud_clear(pudp) (pud_val(*(pudp)) = 0)
214#define pud_page(pud) (__bpn_to_ba(pud_val(pud))) 228#define pud_page(pud) (pud_val(pud))
229
230#define pgd_set(pgdp, pudp) ({pgd_val(*(pgdp)) = (unsigned long)(pudp);})
231#define pgd_none(pgd) (!pgd_val(pgd))
232#define pgd_bad(pgd) (pgd_val(pgd) == 0)
233#define pgd_present(pgd) (pgd_val(pgd) != 0)
234#define pgd_clear(pgdp) (pgd_val(*(pgdp)) = 0)
235#define pgd_page(pgd) (pgd_val(pgd))
215 236
216/* 237/*
217 * Find an entry in a page-table-directory. We combine the address region 238 * Find an entry in a page-table-directory. We combine the address region
218 * (the high order N bits) and the pgd portion of the address. 239 * (the high order N bits) and the pgd portion of the address.
219 */ 240 */
220/* to avoid overflow in free_pgtables we don't use PTRS_PER_PGD here */ 241/* to avoid overflow in free_pgtables we don't use PTRS_PER_PGD here */
221#define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & 0x7ff) 242#define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & 0x1ff)
222 243
223#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) 244#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address))
224 245
225/* Find an entry in the second-level page table.. */ 246#define pud_offset(pgdp, addr) \
247 (((pud_t *) pgd_page(*(pgdp))) + (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)))
248
226#define pmd_offset(pudp,addr) \ 249#define pmd_offset(pudp,addr) \
227 ((pmd_t *) pud_page(*(pudp)) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))) 250 (((pmd_t *) pud_page(*(pudp))) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)))
228 251
229/* Find an entry in the third-level page table.. */
230#define pte_offset_kernel(dir,addr) \ 252#define pte_offset_kernel(dir,addr) \
231 ((pte_t *) pmd_page_kernel(*(dir)) \ 253 (((pte_t *) pmd_page_kernel(*(dir))) + (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)))
232 + (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)))
233 254
234#define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr)) 255#define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr))
235#define pte_offset_map_nested(dir,addr) pte_offset_kernel((dir), (addr)) 256#define pte_offset_map_nested(dir,addr) pte_offset_kernel((dir), (addr))
@@ -458,23 +479,20 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr,
458#define pte_same(A,B) (((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0) 479#define pte_same(A,B) (((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0)
459 480
460#define pmd_ERROR(e) \ 481#define pmd_ERROR(e) \
461 printk("%s:%d: bad pmd %08x.\n", __FILE__, __LINE__, pmd_val(e)) 482 printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e))
483#define pud_ERROR(e) \
484 printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pud_val(e))
462#define pgd_ERROR(e) \ 485#define pgd_ERROR(e) \
463 printk("%s:%d: bad pgd %08x.\n", __FILE__, __LINE__, pgd_val(e)) 486 printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
464 487
465extern pgd_t swapper_pg_dir[]; 488extern pgd_t swapper_pg_dir[];
466 489
467extern void paging_init(void); 490extern void paging_init(void);
468 491
469/* 492#ifdef CONFIG_HUGETLB_PAGE
470 * Because the huge pgtables are only 2 level, they can take
471 * at most around 4M, much less than one hugepage which the
472 * process is presumably entitled to use. So we don't bother
473 * freeing up the pagetables on unmap, and wait until
474 * destroy_context() to clean up the lot.
475 */
476#define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) \ 493#define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) \
477 do { } while (0) 494 free_pgd_range(tlb, addr, end, floor, ceiling)
495#endif
478 496
479/* 497/*
480 * This gets called at the end of handling a page fault, when 498 * This gets called at the end of handling a page fault, when
diff --git a/include/asm-ppc64/pmc.h b/include/asm-ppc64/pmc.h
index c924748c0bea..d1d297dbccfe 100644
--- a/include/asm-ppc64/pmc.h
+++ b/include/asm-ppc64/pmc.h
@@ -26,4 +26,6 @@ typedef void (*perf_irq_t)(struct pt_regs *);
26int reserve_pmc_hardware(perf_irq_t new_perf_irq); 26int reserve_pmc_hardware(perf_irq_t new_perf_irq);
27void release_pmc_hardware(void); 27void release_pmc_hardware(void);
28 28
29void power4_enable_pmcs(void);
30
29#endif /* _PPC64_PMC_H */ 31#endif /* _PPC64_PMC_H */
diff --git a/include/asm-ppc64/poll.h b/include/asm-ppc64/poll.h
deleted file mode 100644
index 370fa3ba0db4..000000000000
--- a/include/asm-ppc64/poll.h
+++ /dev/null
@@ -1,32 +0,0 @@
1#ifndef __PPC64_POLL_H
2#define __PPC64_POLL_H
3
4/*
5 * Copyright (C) 2001 PPC64 Team, IBM Corp
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 */
12
13#define POLLIN 0x0001
14#define POLLPRI 0x0002
15#define POLLOUT 0x0004
16#define POLLERR 0x0008
17#define POLLHUP 0x0010
18#define POLLNVAL 0x0020
19#define POLLRDNORM 0x0040
20#define POLLRDBAND 0x0080
21#define POLLWRNORM 0x0100
22#define POLLWRBAND 0x0200
23#define POLLMSG 0x0400
24#define POLLREMOVE 0x1000
25
26struct pollfd {
27 int fd;
28 short events;
29 short revents;
30};
31
32#endif /* __PPC64_POLL_H */
diff --git a/include/asm-ppc64/processor.h b/include/asm-ppc64/processor.h
index 352306cfb579..7bd4796f1236 100644
--- a/include/asm-ppc64/processor.h
+++ b/include/asm-ppc64/processor.h
@@ -268,6 +268,7 @@
268#define PV_970FX 0x003C 268#define PV_970FX 0x003C
269#define PV_630 0x0040 269#define PV_630 0x0040
270#define PV_630p 0x0041 270#define PV_630p 0x0041
271#define PV_970MP 0x0044
271#define PV_BE 0x0070 272#define PV_BE 0x0070
272 273
273/* Platforms supported by PPC64 */ 274/* Platforms supported by PPC64 */
@@ -382,8 +383,8 @@ extern long kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
382extern struct task_struct *last_task_used_math; 383extern struct task_struct *last_task_used_math;
383extern struct task_struct *last_task_used_altivec; 384extern struct task_struct *last_task_used_altivec;
384 385
385/* 64-bit user address space is 41-bits (2TBs user VM) */ 386/* 64-bit user address space is 44-bits (16TB user VM) */
386#define TASK_SIZE_USER64 (0x0000020000000000UL) 387#define TASK_SIZE_USER64 (0x0000100000000000UL)
387 388
388/* 389/*
389 * 32-bit user address space is 4GB - 1 page 390 * 32-bit user address space is 4GB - 1 page
diff --git a/include/asm-ppc64/prom.h b/include/asm-ppc64/prom.h
index 04b1a84f7ca3..dc5330b39509 100644
--- a/include/asm-ppc64/prom.h
+++ b/include/asm-ppc64/prom.h
@@ -22,13 +22,15 @@
22#define RELOC(x) (*PTRRELOC(&(x))) 22#define RELOC(x) (*PTRRELOC(&(x)))
23 23
24/* Definitions used by the flattened device tree */ 24/* Definitions used by the flattened device tree */
25#define OF_DT_HEADER 0xd00dfeed /* 4: version, 4: total size */ 25#define OF_DT_HEADER 0xd00dfeed /* marker */
26#define OF_DT_BEGIN_NODE 0x1 /* Start node: full name */ 26#define OF_DT_BEGIN_NODE 0x1 /* Start of node, full name */
27#define OF_DT_END_NODE 0x2 /* End node */ 27#define OF_DT_END_NODE 0x2 /* End node */
28#define OF_DT_PROP 0x3 /* Property: name off, size, content */ 28#define OF_DT_PROP 0x3 /* Property: name off, size,
29 * content */
30#define OF_DT_NOP 0x4 /* nop */
29#define OF_DT_END 0x9 31#define OF_DT_END 0x9
30 32
31#define OF_DT_VERSION 1 33#define OF_DT_VERSION 0x10
32 34
33/* 35/*
34 * This is what gets passed to the kernel by prom_init or kexec 36 * This is what gets passed to the kernel by prom_init or kexec
@@ -54,7 +56,9 @@ struct boot_param_header
54 u32 version; /* format version */ 56 u32 version; /* format version */
55 u32 last_comp_version; /* last compatible version */ 57 u32 last_comp_version; /* last compatible version */
56 /* version 2 fields below */ 58 /* version 2 fields below */
57 u32 boot_cpuid_phys; /* Which physical CPU id we're booting on */ 59 u32 boot_cpuid_phys; /* Physical CPU id we're booting on */
60 /* version 3 fields below */
61 u32 dt_strings_size; /* size of the DT strings block */
58}; 62};
59 63
60 64
diff --git a/include/asm-ppc64/resource.h b/include/asm-ppc64/resource.h
deleted file mode 100644
index add031b9dfd4..000000000000
--- a/include/asm-ppc64/resource.h
+++ /dev/null
@@ -1,6 +0,0 @@
1#ifndef _PPC64_RESOURCE_H
2#define _PPC64_RESOURCE_H
3
4#include <asm-generic/resource.h>
5
6#endif /* _PPC64_RESOURCE_H */
diff --git a/include/asm-ppc64/shmparam.h b/include/asm-ppc64/shmparam.h
deleted file mode 100644
index b2825ceff05e..000000000000
--- a/include/asm-ppc64/shmparam.h
+++ /dev/null
@@ -1,13 +0,0 @@
1#ifndef _PPC64_SHMPARAM_H
2#define _PPC64_SHMPARAM_H
3
4/*
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version
8 * 2 of the License, or (at your option) any later version.
9 */
10
11#define SHMLBA PAGE_SIZE /* attach addr a multiple of this */
12
13#endif /* _PPC64_SHMPARAM_H */
diff --git a/include/asm-ppc64/socket.h b/include/asm-ppc64/socket.h
index 59e00dfc8b8e..9e1af8eb2d96 100644
--- a/include/asm-ppc64/socket.h
+++ b/include/asm-ppc64/socket.h
@@ -21,6 +21,8 @@
21#define SO_BROADCAST 6 21#define SO_BROADCAST 6
22#define SO_SNDBUF 7 22#define SO_SNDBUF 7
23#define SO_RCVBUF 8 23#define SO_RCVBUF 8
24#define SO_SNDBUFFORCE 32
25#define SO_RCVBUFFORCE 33
24#define SO_KEEPALIVE 9 26#define SO_KEEPALIVE 9
25#define SO_OOBINLINE 10 27#define SO_OOBINLINE 10
26#define SO_NO_CHECK 11 28#define SO_NO_CHECK 11
diff --git a/include/asm-ppc64/string.h b/include/asm-ppc64/string.h
deleted file mode 100644
index eeca68ef1e91..000000000000
--- a/include/asm-ppc64/string.h
+++ /dev/null
@@ -1,35 +0,0 @@
1#ifndef _PPC64_STRING_H_
2#define _PPC64_STRING_H_
3
4/*
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version
8 * 2 of the License, or (at your option) any later version.
9 */
10
11#define __HAVE_ARCH_STRCPY
12#define __HAVE_ARCH_STRNCPY
13#define __HAVE_ARCH_STRLEN
14#define __HAVE_ARCH_STRCMP
15#define __HAVE_ARCH_STRCAT
16#define __HAVE_ARCH_MEMSET
17#define __HAVE_ARCH_MEMCPY
18#define __HAVE_ARCH_MEMMOVE
19#define __HAVE_ARCH_MEMCMP
20#define __HAVE_ARCH_MEMCHR
21
22extern int strcasecmp(const char *, const char *);
23extern int strncasecmp(const char *, const char *, int);
24extern char * strcpy(char *,const char *);
25extern char * strncpy(char *,const char *, __kernel_size_t);
26extern __kernel_size_t strlen(const char *);
27extern int strcmp(const char *,const char *);
28extern char * strcat(char *, const char *);
29extern void * memset(void *,int,__kernel_size_t);
30extern void * memcpy(void *,const void *,__kernel_size_t);
31extern void * memmove(void *,const void *,__kernel_size_t);
32extern int memcmp(const void *,const void *,__kernel_size_t);
33extern void * memchr(const void *,int,__kernel_size_t);
34
35#endif /* _PPC64_STRING_H_ */
diff --git a/include/asm-ppc64/system.h b/include/asm-ppc64/system.h
index 98d120ca8a91..b9e1835351e9 100644
--- a/include/asm-ppc64/system.h
+++ b/include/asm-ppc64/system.h
@@ -88,7 +88,7 @@ DEBUGGER_BOILERPLATE(debugger_dabr_match)
88DEBUGGER_BOILERPLATE(debugger_fault_handler) 88DEBUGGER_BOILERPLATE(debugger_fault_handler)
89 89
90#ifdef CONFIG_XMON 90#ifdef CONFIG_XMON
91extern void xmon_init(void); 91extern void xmon_init(int enable);
92#endif 92#endif
93 93
94#else 94#else
@@ -302,5 +302,7 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size)
302 302
303#define arch_align_stack(x) (x) 303#define arch_align_stack(x) (x)
304 304
305extern unsigned long reloc_offset(void);
306
305#endif /* __KERNEL__ */ 307#endif /* __KERNEL__ */
306#endif 308#endif
diff --git a/include/asm-ppc64/unaligned.h b/include/asm-ppc64/unaligned.h
deleted file mode 100644
index 636e93c4f379..000000000000
--- a/include/asm-ppc64/unaligned.h
+++ /dev/null
@@ -1,21 +0,0 @@
1#ifndef __PPC64_UNALIGNED_H
2#define __PPC64_UNALIGNED_H
3
4/*
5 * The PowerPC can do unaligned accesses itself in big endian mode.
6 *
7 * The strange macros are there to make sure these can't
8 * be misused in a way that makes them not work on other
9 * architectures where unaligned accesses aren't as simple.
10 *
11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public License
13 * as published by the Free Software Foundation; either version
14 * 2 of the License, or (at your option) any later version.
15 */
16
17#define get_unaligned(ptr) (*(ptr))
18
19#define put_unaligned(val, ptr) ((void)( *(ptr) = (val) ))
20
21#endif /* __PPC64_UNALIGNED_H */
diff --git a/include/asm-ppc64/vio.h b/include/asm-ppc64/vio.h
index 20cd98ee6337..03f1b95f433b 100644
--- a/include/asm-ppc64/vio.h
+++ b/include/asm-ppc64/vio.h
@@ -19,13 +19,15 @@
19#include <linux/errno.h> 19#include <linux/errno.h>
20#include <linux/device.h> 20#include <linux/device.h>
21#include <linux/dma-mapping.h> 21#include <linux/dma-mapping.h>
22#include <linux/mod_devicetable.h>
23
22#include <asm/hvcall.h> 24#include <asm/hvcall.h>
23#include <asm/prom.h>
24#include <asm/scatterlist.h> 25#include <asm/scatterlist.h>
25/* 26
27/*
26 * Architecture-specific constants for drivers to 28 * Architecture-specific constants for drivers to
27 * extract attributes of the device using vio_get_attribute() 29 * extract attributes of the device using vio_get_attribute()
28*/ 30 */
29#define VETH_MAC_ADDR "local-mac-address" 31#define VETH_MAC_ADDR "local-mac-address"
30#define VETH_MCAST_FILTER_SIZE "ibm,mac-address-filters" 32#define VETH_MCAST_FILTER_SIZE "ibm,mac-address-filters"
31 33
@@ -37,64 +39,65 @@
37#define VIO_IRQ_DISABLE 0UL 39#define VIO_IRQ_DISABLE 0UL
38#define VIO_IRQ_ENABLE 1UL 40#define VIO_IRQ_ENABLE 1UL
39 41
40struct vio_dev;
41struct vio_driver;
42struct vio_device_id;
43struct iommu_table; 42struct iommu_table;
44 43
45int vio_register_driver(struct vio_driver *drv); 44/*
46void vio_unregister_driver(struct vio_driver *drv); 45 * The vio_dev structure is used to describe virtual I/O devices.
47 46 */
48#ifdef CONFIG_PPC_PSERIES 47struct vio_dev {
49struct vio_dev * __devinit vio_register_device_node( 48 struct iommu_table *iommu_table; /* vio_map_* uses this */
50 struct device_node *node_vdev); 49 char *name;
51#endif
52void __devinit vio_unregister_device(struct vio_dev *dev);
53struct vio_dev *vio_find_node(struct device_node *vnode);
54
55const void * vio_get_attribute(struct vio_dev *vdev, void* which, int* length);
56int vio_get_irq(struct vio_dev *dev);
57int vio_enable_interrupts(struct vio_dev *dev);
58int vio_disable_interrupts(struct vio_dev *dev);
59
60extern struct dma_mapping_ops vio_dma_ops;
61
62extern struct bus_type vio_bus_type;
63
64struct vio_device_id {
65 char *type; 50 char *type;
66 char *compat; 51 uint32_t unit_address;
52 unsigned int irq;
53 struct device dev;
67}; 54};
68 55
69struct vio_driver { 56struct vio_driver {
70 struct list_head node; 57 struct list_head node;
71 char *name; 58 char *name;
72 const struct vio_device_id *id_table; /* NULL if wants all devices */ 59 const struct vio_device_id *id_table;
73 int (*probe) (struct vio_dev *dev, const struct vio_device_id *id); /* New device inserted */ 60 int (*probe)(struct vio_dev *dev, const struct vio_device_id *id);
74 int (*remove) (struct vio_dev *dev); /* Device removed (NULL if not a hot-plug capable driver) */ 61 int (*remove)(struct vio_dev *dev);
75 unsigned long driver_data; 62 unsigned long driver_data;
76
77 struct device_driver driver; 63 struct device_driver driver;
78}; 64};
79 65
66struct vio_bus_ops {
67 int (*match)(const struct vio_device_id *id, const struct vio_dev *dev);
68 void (*unregister_device)(struct vio_dev *);
69 void (*release_device)(struct device *);
70};
71
72extern struct dma_mapping_ops vio_dma_ops;
73extern struct bus_type vio_bus_type;
74extern struct vio_dev vio_bus_device;
75
76extern int vio_register_driver(struct vio_driver *drv);
77extern void vio_unregister_driver(struct vio_driver *drv);
78
79extern struct vio_dev * __devinit vio_register_device(struct vio_dev *viodev);
80extern void __devinit vio_unregister_device(struct vio_dev *dev);
81
82extern int vio_bus_init(struct vio_bus_ops *);
83
84#ifdef CONFIG_PPC_PSERIES
85struct device_node;
86
87extern struct vio_dev * __devinit vio_register_device_node(
88 struct device_node *node_vdev);
89extern struct vio_dev *vio_find_node(struct device_node *vnode);
90extern const void *vio_get_attribute(struct vio_dev *vdev, void *which,
91 int *length);
92extern int vio_enable_interrupts(struct vio_dev *dev);
93extern int vio_disable_interrupts(struct vio_dev *dev);
94#endif
95
80static inline struct vio_driver *to_vio_driver(struct device_driver *drv) 96static inline struct vio_driver *to_vio_driver(struct device_driver *drv)
81{ 97{
82 return container_of(drv, struct vio_driver, driver); 98 return container_of(drv, struct vio_driver, driver);
83} 99}
84 100
85/*
86 * The vio_dev structure is used to describe virtual I/O devices.
87 */
88struct vio_dev {
89 struct iommu_table *iommu_table; /* vio_map_* uses this */
90 char *name;
91 char *type;
92 uint32_t unit_address;
93 unsigned int irq;
94
95 struct device dev;
96};
97
98static inline struct vio_dev *to_vio_dev(struct device *dev) 101static inline struct vio_dev *to_vio_dev(struct device *dev)
99{ 102{
100 return container_of(dev, struct vio_dev, dev); 103 return container_of(dev, struct vio_dev, dev);
diff --git a/include/asm-ppc64/xor.h b/include/asm-ppc64/xor.h
deleted file mode 100644
index c82eb12a5b18..000000000000
--- a/include/asm-ppc64/xor.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/xor.h>
diff --git a/include/asm-s390/socket.h b/include/asm-s390/socket.h
index 0e96eeca4e6b..15a5298c8744 100644
--- a/include/asm-s390/socket.h
+++ b/include/asm-s390/socket.h
@@ -22,6 +22,8 @@
22#define SO_BROADCAST 6 22#define SO_BROADCAST 6
23#define SO_SNDBUF 7 23#define SO_SNDBUF 7
24#define SO_RCVBUF 8 24#define SO_RCVBUF 8
25#define SO_SNDBUFFORCE 32
26#define SO_RCVBUFFORCE 33
25#define SO_KEEPALIVE 9 27#define SO_KEEPALIVE 9
26#define SO_OOBINLINE 10 28#define SO_OOBINLINE 10
27#define SO_NO_CHECK 11 29#define SO_NO_CHECK 11
diff --git a/include/asm-sh/socket.h b/include/asm-sh/socket.h
index dde696c3b4c7..553904ff9336 100644
--- a/include/asm-sh/socket.h
+++ b/include/asm-sh/socket.h
@@ -14,6 +14,8 @@
14#define SO_BROADCAST 6 14#define SO_BROADCAST 6
15#define SO_SNDBUF 7 15#define SO_SNDBUF 7
16#define SO_RCVBUF 8 16#define SO_RCVBUF 8
17#define SO_RCVBUFFORCE 32
18#define SO_SNDBUFFORCE 33
17#define SO_KEEPALIVE 9 19#define SO_KEEPALIVE 9
18#define SO_OOBINLINE 10 20#define SO_OOBINLINE 10
19#define SO_NO_CHECK 11 21#define SO_NO_CHECK 11
diff --git a/include/asm-sparc/processor.h b/include/asm-sparc/processor.h
index 32c9699367cf..5a7a1a8d29ac 100644
--- a/include/asm-sparc/processor.h
+++ b/include/asm-sparc/processor.h
@@ -19,7 +19,6 @@
19#include <asm/ptrace.h> 19#include <asm/ptrace.h>
20#include <asm/head.h> 20#include <asm/head.h>
21#include <asm/signal.h> 21#include <asm/signal.h>
22#include <asm/segment.h>
23#include <asm/btfixup.h> 22#include <asm/btfixup.h>
24#include <asm/page.h> 23#include <asm/page.h>
25 24
diff --git a/include/asm-sparc/segment.h b/include/asm-sparc/segment.h
deleted file mode 100644
index a1b7ffc9eec9..000000000000
--- a/include/asm-sparc/segment.h
+++ /dev/null
@@ -1,6 +0,0 @@
1#ifndef __SPARC_SEGMENT_H
2#define __SPARC_SEGMENT_H
3
4/* Only here because we have some old header files that expect it.. */
5
6#endif
diff --git a/include/asm-sparc/socket.h b/include/asm-sparc/socket.h
index c1154e3ecfdf..09575b608adb 100644
--- a/include/asm-sparc/socket.h
+++ b/include/asm-sparc/socket.h
@@ -29,6 +29,8 @@
29 29
30#define SO_SNDBUF 0x1001 30#define SO_SNDBUF 0x1001
31#define SO_RCVBUF 0x1002 31#define SO_RCVBUF 0x1002
32#define SO_SNDBUFFORCE 0x100a
33#define SO_RCVBUFFORCE 0x100b
32#define SO_ERROR 0x1007 34#define SO_ERROR 0x1007
33#define SO_TYPE 0x1008 35#define SO_TYPE 0x1008
34 36
diff --git a/include/asm-sparc/system.h b/include/asm-sparc/system.h
index 898562ebe94c..3557781a4bfd 100644
--- a/include/asm-sparc/system.h
+++ b/include/asm-sparc/system.h
@@ -9,7 +9,6 @@
9#include <linux/threads.h> /* NR_CPUS */ 9#include <linux/threads.h> /* NR_CPUS */
10#include <linux/thread_info.h> 10#include <linux/thread_info.h>
11 11
12#include <asm/segment.h>
13#include <asm/page.h> 12#include <asm/page.h>
14#include <asm/psr.h> 13#include <asm/psr.h>
15#include <asm/ptrace.h> 14#include <asm/ptrace.h>
diff --git a/include/asm-sparc64/atomic.h b/include/asm-sparc64/atomic.h
index d80f3379669b..e175afcf2cde 100644
--- a/include/asm-sparc64/atomic.h
+++ b/include/asm-sparc64/atomic.h
@@ -72,10 +72,10 @@ extern int atomic64_sub_ret(int, atomic64_t *);
72 72
73/* Atomic operations are already serializing */ 73/* Atomic operations are already serializing */
74#ifdef CONFIG_SMP 74#ifdef CONFIG_SMP
75#define smp_mb__before_atomic_dec() membar("#StoreLoad | #LoadLoad") 75#define smp_mb__before_atomic_dec() membar_storeload_loadload();
76#define smp_mb__after_atomic_dec() membar("#StoreLoad | #StoreStore") 76#define smp_mb__after_atomic_dec() membar_storeload_storestore();
77#define smp_mb__before_atomic_inc() membar("#StoreLoad | #LoadLoad") 77#define smp_mb__before_atomic_inc() membar_storeload_loadload();
78#define smp_mb__after_atomic_inc() membar("#StoreLoad | #StoreStore") 78#define smp_mb__after_atomic_inc() membar_storeload_storestore();
79#else 79#else
80#define smp_mb__before_atomic_dec() barrier() 80#define smp_mb__before_atomic_dec() barrier()
81#define smp_mb__after_atomic_dec() barrier() 81#define smp_mb__after_atomic_dec() barrier()
diff --git a/include/asm-sparc64/bitops.h b/include/asm-sparc64/bitops.h
index 9c5e71970287..6388b8376c50 100644
--- a/include/asm-sparc64/bitops.h
+++ b/include/asm-sparc64/bitops.h
@@ -72,8 +72,8 @@ static inline int __test_and_change_bit(int nr, volatile unsigned long *addr)
72} 72}
73 73
74#ifdef CONFIG_SMP 74#ifdef CONFIG_SMP
75#define smp_mb__before_clear_bit() membar("#StoreLoad | #LoadLoad") 75#define smp_mb__before_clear_bit() membar_storeload_loadload()
76#define smp_mb__after_clear_bit() membar("#StoreLoad | #StoreStore") 76#define smp_mb__after_clear_bit() membar_storeload_storestore()
77#else 77#else
78#define smp_mb__before_clear_bit() barrier() 78#define smp_mb__before_clear_bit() barrier()
79#define smp_mb__after_clear_bit() barrier() 79#define smp_mb__after_clear_bit() barrier()
diff --git a/include/asm-sparc64/processor.h b/include/asm-sparc64/processor.h
index d0bee2413560..3169f3e2237e 100644
--- a/include/asm-sparc64/processor.h
+++ b/include/asm-sparc64/processor.h
@@ -18,7 +18,6 @@
18#include <asm/a.out.h> 18#include <asm/a.out.h>
19#include <asm/pstate.h> 19#include <asm/pstate.h>
20#include <asm/ptrace.h> 20#include <asm/ptrace.h>
21#include <asm/segment.h>
22#include <asm/page.h> 21#include <asm/page.h>
23 22
24/* The sparc has no problems with write protection */ 23/* The sparc has no problems with write protection */
diff --git a/include/asm-sparc64/segment.h b/include/asm-sparc64/segment.h
deleted file mode 100644
index b03e709fc945..000000000000
--- a/include/asm-sparc64/segment.h
+++ /dev/null
@@ -1,6 +0,0 @@
1#ifndef __SPARC64_SEGMENT_H
2#define __SPARC64_SEGMENT_H
3
4/* Only here because we have some old header files that expect it.. */
5
6#endif
diff --git a/include/asm-sparc64/sfafsr.h b/include/asm-sparc64/sfafsr.h
new file mode 100644
index 000000000000..2f792c20b53c
--- /dev/null
+++ b/include/asm-sparc64/sfafsr.h
@@ -0,0 +1,82 @@
1#ifndef _SPARC64_SFAFSR_H
2#define _SPARC64_SFAFSR_H
3
4#include <asm/const.h>
5
6/* Spitfire Asynchronous Fault Status register, ASI=0x4C VA<63:0>=0x0 */
7
8#define SFAFSR_ME (_AC(1,UL) << SFAFSR_ME_SHIFT)
9#define SFAFSR_ME_SHIFT 32
10#define SFAFSR_PRIV (_AC(1,UL) << SFAFSR_PRIV_SHIFT)
11#define SFAFSR_PRIV_SHIFT 31
12#define SFAFSR_ISAP (_AC(1,UL) << SFAFSR_ISAP_SHIFT)
13#define SFAFSR_ISAP_SHIFT 30
14#define SFAFSR_ETP (_AC(1,UL) << SFAFSR_ETP_SHIFT)
15#define SFAFSR_ETP_SHIFT 29
16#define SFAFSR_IVUE (_AC(1,UL) << SFAFSR_IVUE_SHIFT)
17#define SFAFSR_IVUE_SHIFT 28
18#define SFAFSR_TO (_AC(1,UL) << SFAFSR_TO_SHIFT)
19#define SFAFSR_TO_SHIFT 27
20#define SFAFSR_BERR (_AC(1,UL) << SFAFSR_BERR_SHIFT)
21#define SFAFSR_BERR_SHIFT 26
22#define SFAFSR_LDP (_AC(1,UL) << SFAFSR_LDP_SHIFT)
23#define SFAFSR_LDP_SHIFT 25
24#define SFAFSR_CP (_AC(1,UL) << SFAFSR_CP_SHIFT)
25#define SFAFSR_CP_SHIFT 24
26#define SFAFSR_WP (_AC(1,UL) << SFAFSR_WP_SHIFT)
27#define SFAFSR_WP_SHIFT 23
28#define SFAFSR_EDP (_AC(1,UL) << SFAFSR_EDP_SHIFT)
29#define SFAFSR_EDP_SHIFT 22
30#define SFAFSR_UE (_AC(1,UL) << SFAFSR_UE_SHIFT)
31#define SFAFSR_UE_SHIFT 21
32#define SFAFSR_CE (_AC(1,UL) << SFAFSR_CE_SHIFT)
33#define SFAFSR_CE_SHIFT 20
34#define SFAFSR_ETS (_AC(0xf,UL) << SFAFSR_ETS_SHIFT)
35#define SFAFSR_ETS_SHIFT 16
36#define SFAFSR_PSYND (_AC(0xffff,UL) << SFAFSR_PSYND_SHIFT)
37#define SFAFSR_PSYND_SHIFT 0
38
39/* UDB Error Register, ASI=0x7f VA<63:0>=0x0(High),0x18(Low) for read
40 * ASI=0x77 VA<63:0>=0x0(High),0x18(Low) for write
41 */
42
43#define UDBE_UE (_AC(1,UL) << 9)
44#define UDBE_CE (_AC(1,UL) << 8)
45#define UDBE_E_SYNDR (_AC(0xff,UL) << 0)
46
47/* The trap handlers for asynchronous errors encode the AFSR and
48 * other pieces of information into a 64-bit argument for C code
49 * encoded as follows:
50 *
51 * -----------------------------------------------
52 * | UDB_H | UDB_L | TL>1 | TT | AFSR |
53 * -----------------------------------------------
54 * 63 54 53 44 42 41 33 32 0
55 *
56 * The AFAR is passed in unchanged.
57 */
58#define SFSTAT_UDBH_MASK (_AC(0x3ff,UL) << SFSTAT_UDBH_SHIFT)
59#define SFSTAT_UDBH_SHIFT 54
60#define SFSTAT_UDBL_MASK (_AC(0x3ff,UL) << SFSTAT_UDBH_SHIFT)
61#define SFSTAT_UDBL_SHIFT 44
62#define SFSTAT_TL_GT_ONE (_AC(1,UL) << SFSTAT_TL_GT_ONE_SHIFT)
63#define SFSTAT_TL_GT_ONE_SHIFT 42
64#define SFSTAT_TRAP_TYPE (_AC(0x1FF,UL) << SFSTAT_TRAP_TYPE_SHIFT)
65#define SFSTAT_TRAP_TYPE_SHIFT 33
66#define SFSTAT_AFSR_MASK (_AC(0x1ffffffff,UL) << SFSTAT_AFSR_SHIFT)
67#define SFSTAT_AFSR_SHIFT 0
68
69/* ESTATE Error Enable Register, ASI=0x4b VA<63:0>=0x0 */
70#define ESTATE_ERR_CE 0x1 /* Correctable errors */
71#define ESTATE_ERR_NCE 0x2 /* TO, BERR, LDP, ETP, EDP, WP, UE, IVUE */
72#define ESTATE_ERR_ISAP 0x4 /* System address parity error */
73#define ESTATE_ERR_ALL (ESTATE_ERR_CE | \
74 ESTATE_ERR_NCE | \
75 ESTATE_ERR_ISAP)
76
77/* The various trap types that report using the above state. */
78#define TRAP_TYPE_IAE 0x09 /* Instruction Access Error */
79#define TRAP_TYPE_DAE 0x32 /* Data Access Error */
80#define TRAP_TYPE_CEE 0x63 /* Correctable ECC Error */
81
82#endif /* _SPARC64_SFAFSR_H */
diff --git a/include/asm-sparc64/socket.h b/include/asm-sparc64/socket.h
index 865547a23908..59987dad3359 100644
--- a/include/asm-sparc64/socket.h
+++ b/include/asm-sparc64/socket.h
@@ -29,6 +29,8 @@
29 29
30#define SO_SNDBUF 0x1001 30#define SO_SNDBUF 0x1001
31#define SO_RCVBUF 0x1002 31#define SO_RCVBUF 0x1002
32#define SO_SNDBUFFORCE 0x100a
33#define SO_RCVBUFFORCE 0x100b
32#define SO_ERROR 0x1007 34#define SO_ERROR 0x1007
33#define SO_TYPE 0x1008 35#define SO_TYPE 0x1008
34 36
diff --git a/include/asm-sparc64/spinlock.h b/include/asm-sparc64/spinlock.h
index 9cb93a5c2b4f..a02c4370eb42 100644
--- a/include/asm-sparc64/spinlock.h
+++ b/include/asm-sparc64/spinlock.h
@@ -43,7 +43,7 @@ typedef struct {
43#define spin_is_locked(lp) ((lp)->lock != 0) 43#define spin_is_locked(lp) ((lp)->lock != 0)
44 44
45#define spin_unlock_wait(lp) \ 45#define spin_unlock_wait(lp) \
46do { membar("#LoadLoad"); \ 46do { rmb(); \
47} while((lp)->lock) 47} while((lp)->lock)
48 48
49static inline void _raw_spin_lock(spinlock_t *lock) 49static inline void _raw_spin_lock(spinlock_t *lock)
@@ -129,15 +129,18 @@ typedef struct {
129#define spin_is_locked(__lock) ((__lock)->lock != 0) 129#define spin_is_locked(__lock) ((__lock)->lock != 0)
130#define spin_unlock_wait(__lock) \ 130#define spin_unlock_wait(__lock) \
131do { \ 131do { \
132 membar("#LoadLoad"); \ 132 rmb(); \
133} while((__lock)->lock) 133} while((__lock)->lock)
134 134
135extern void _do_spin_lock (spinlock_t *lock, char *str); 135extern void _do_spin_lock(spinlock_t *lock, char *str, unsigned long caller);
136extern void _do_spin_unlock (spinlock_t *lock); 136extern void _do_spin_unlock(spinlock_t *lock);
137extern int _do_spin_trylock (spinlock_t *lock); 137extern int _do_spin_trylock(spinlock_t *lock, unsigned long caller);
138 138
139#define _raw_spin_trylock(lp) _do_spin_trylock(lp) 139#define _raw_spin_trylock(lp) \
140#define _raw_spin_lock(lock) _do_spin_lock(lock, "spin_lock") 140 _do_spin_trylock(lp, (unsigned long) __builtin_return_address(0))
141#define _raw_spin_lock(lock) \
142 _do_spin_lock(lock, "spin_lock", \
143 (unsigned long) __builtin_return_address(0))
141#define _raw_spin_unlock(lock) _do_spin_unlock(lock) 144#define _raw_spin_unlock(lock) _do_spin_unlock(lock)
142#define _raw_spin_lock_flags(lock, flags) _raw_spin_lock(lock) 145#define _raw_spin_lock_flags(lock, flags) _raw_spin_lock(lock)
143 146
@@ -279,37 +282,41 @@ typedef struct {
279#define RW_LOCK_UNLOCKED (rwlock_t) { 0, 0, 0xff, { } } 282#define RW_LOCK_UNLOCKED (rwlock_t) { 0, 0, 0xff, { } }
280#define rwlock_init(lp) do { *(lp) = RW_LOCK_UNLOCKED; } while(0) 283#define rwlock_init(lp) do { *(lp) = RW_LOCK_UNLOCKED; } while(0)
281 284
282extern void _do_read_lock(rwlock_t *rw, char *str); 285extern void _do_read_lock(rwlock_t *rw, char *str, unsigned long caller);
283extern void _do_read_unlock(rwlock_t *rw, char *str); 286extern void _do_read_unlock(rwlock_t *rw, char *str, unsigned long caller);
284extern void _do_write_lock(rwlock_t *rw, char *str); 287extern void _do_write_lock(rwlock_t *rw, char *str, unsigned long caller);
285extern void _do_write_unlock(rwlock_t *rw); 288extern void _do_write_unlock(rwlock_t *rw, unsigned long caller);
286extern int _do_write_trylock(rwlock_t *rw, char *str); 289extern int _do_write_trylock(rwlock_t *rw, char *str, unsigned long caller);
287 290
288#define _raw_read_lock(lock) \ 291#define _raw_read_lock(lock) \
289do { unsigned long flags; \ 292do { unsigned long flags; \
290 local_irq_save(flags); \ 293 local_irq_save(flags); \
291 _do_read_lock(lock, "read_lock"); \ 294 _do_read_lock(lock, "read_lock", \
295 (unsigned long) __builtin_return_address(0)); \
292 local_irq_restore(flags); \ 296 local_irq_restore(flags); \
293} while(0) 297} while(0)
294 298
295#define _raw_read_unlock(lock) \ 299#define _raw_read_unlock(lock) \
296do { unsigned long flags; \ 300do { unsigned long flags; \
297 local_irq_save(flags); \ 301 local_irq_save(flags); \
298 _do_read_unlock(lock, "read_unlock"); \ 302 _do_read_unlock(lock, "read_unlock", \
303 (unsigned long) __builtin_return_address(0)); \
299 local_irq_restore(flags); \ 304 local_irq_restore(flags); \
300} while(0) 305} while(0)
301 306
302#define _raw_write_lock(lock) \ 307#define _raw_write_lock(lock) \
303do { unsigned long flags; \ 308do { unsigned long flags; \
304 local_irq_save(flags); \ 309 local_irq_save(flags); \
305 _do_write_lock(lock, "write_lock"); \ 310 _do_write_lock(lock, "write_lock", \
311 (unsigned long) __builtin_return_address(0)); \
306 local_irq_restore(flags); \ 312 local_irq_restore(flags); \
307} while(0) 313} while(0)
308 314
309#define _raw_write_unlock(lock) \ 315#define _raw_write_unlock(lock) \
310do { unsigned long flags; \ 316do { unsigned long flags; \
311 local_irq_save(flags); \ 317 local_irq_save(flags); \
312 _do_write_unlock(lock); \ 318 _do_write_unlock(lock, \
319 (unsigned long) __builtin_return_address(0)); \
313 local_irq_restore(flags); \ 320 local_irq_restore(flags); \
314} while(0) 321} while(0)
315 322
@@ -317,7 +324,8 @@ do { unsigned long flags; \
317({ unsigned long flags; \ 324({ unsigned long flags; \
318 int val; \ 325 int val; \
319 local_irq_save(flags); \ 326 local_irq_save(flags); \
320 val = _do_write_trylock(lock, "write_trylock"); \ 327 val = _do_write_trylock(lock, "write_trylock", \
328 (unsigned long) __builtin_return_address(0)); \
321 local_irq_restore(flags); \ 329 local_irq_restore(flags); \
322 val; \ 330 val; \
323}) 331})
diff --git a/include/asm-sparc64/system.h b/include/asm-sparc64/system.h
index ee4bdfc6b88f..5e94c05dc2fc 100644
--- a/include/asm-sparc64/system.h
+++ b/include/asm-sparc64/system.h
@@ -28,6 +28,14 @@ enum sparc_cpu {
28#define ARCH_SUN4C_SUN4 0 28#define ARCH_SUN4C_SUN4 0
29#define ARCH_SUN4 0 29#define ARCH_SUN4 0
30 30
31extern void mb(void);
32extern void rmb(void);
33extern void wmb(void);
34extern void membar_storeload(void);
35extern void membar_storeload_storestore(void);
36extern void membar_storeload_loadload(void);
37extern void membar_storestore_loadstore(void);
38
31#endif 39#endif
32 40
33#define setipl(__new_ipl) \ 41#define setipl(__new_ipl) \
@@ -78,16 +86,11 @@ enum sparc_cpu {
78 86
79#define nop() __asm__ __volatile__ ("nop") 87#define nop() __asm__ __volatile__ ("nop")
80 88
81#define membar(type) __asm__ __volatile__ ("membar " type : : : "memory")
82#define mb() \
83 membar("#LoadLoad | #LoadStore | #StoreStore | #StoreLoad")
84#define rmb() membar("#LoadLoad")
85#define wmb() membar("#StoreStore")
86#define read_barrier_depends() do { } while(0) 89#define read_barrier_depends() do { } while(0)
87#define set_mb(__var, __value) \ 90#define set_mb(__var, __value) \
88 do { __var = __value; membar("#StoreLoad | #StoreStore"); } while(0) 91 do { __var = __value; membar_storeload_storestore(); } while(0)
89#define set_wmb(__var, __value) \ 92#define set_wmb(__var, __value) \
90 do { __var = __value; membar("#StoreStore"); } while(0) 93 do { __var = __value; wmb(); } while(0)
91 94
92#ifdef CONFIG_SMP 95#ifdef CONFIG_SMP
93#define smp_mb() mb() 96#define smp_mb() mb()
diff --git a/include/asm-v850/socket.h b/include/asm-v850/socket.h
index 213b852af53e..0240d366a0a4 100644
--- a/include/asm-v850/socket.h
+++ b/include/asm-v850/socket.h
@@ -14,6 +14,8 @@
14#define SO_BROADCAST 6 14#define SO_BROADCAST 6
15#define SO_SNDBUF 7 15#define SO_SNDBUF 7
16#define SO_RCVBUF 8 16#define SO_RCVBUF 8
17#define SO_SNDBUFFORCE 32
18#define SO_RCVBUFFORCE 33
17#define SO_KEEPALIVE 9 19#define SO_KEEPALIVE 9
18#define SO_OOBINLINE 10 20#define SO_OOBINLINE 10
19#define SO_NO_CHECK 11 21#define SO_NO_CHECK 11
diff --git a/include/asm-x86_64/checksum.h b/include/asm-x86_64/checksum.h
index d01356f01448..989469e8e0b7 100644
--- a/include/asm-x86_64/checksum.h
+++ b/include/asm-x86_64/checksum.h
@@ -64,7 +64,7 @@ static inline unsigned short ip_fast_csum(unsigned char *iph, unsigned int ihl)
64 " adcl $0, %0\n" 64 " adcl $0, %0\n"
65 " notl %0\n" 65 " notl %0\n"
66 "2:" 66 "2:"
67 /* Since the input registers which are loaded with iph and ipl 67 /* Since the input registers which are loaded with iph and ihl
68 are modified, we must also specify them as outputs, or gcc 68 are modified, we must also specify them as outputs, or gcc
69 will assume they contain their original values. */ 69 will assume they contain their original values. */
70 : "=r" (sum), "=r" (iph), "=r" (ihl) 70 : "=r" (sum), "=r" (iph), "=r" (ihl)
diff --git a/include/asm-x86_64/socket.h b/include/asm-x86_64/socket.h
index d9a252ea8210..f2cdbeae5d5b 100644
--- a/include/asm-x86_64/socket.h
+++ b/include/asm-x86_64/socket.h
@@ -14,6 +14,8 @@
14#define SO_BROADCAST 6 14#define SO_BROADCAST 6
15#define SO_SNDBUF 7 15#define SO_SNDBUF 7
16#define SO_RCVBUF 8 16#define SO_RCVBUF 8
17#define SO_SNDBUFFORCE 32
18#define SO_RCVBUFFORCE 33
17#define SO_KEEPALIVE 9 19#define SO_KEEPALIVE 9
18#define SO_OOBINLINE 10 20#define SO_OOBINLINE 10
19#define SO_NO_CHECK 11 21#define SO_NO_CHECK 11
diff --git a/include/asm-xtensa/socket.h b/include/asm-xtensa/socket.h
index daccd05a14cd..00f83f3a6d72 100644
--- a/include/asm-xtensa/socket.h
+++ b/include/asm-xtensa/socket.h
@@ -24,6 +24,8 @@
24#define SO_BROADCAST 6 24#define SO_BROADCAST 6
25#define SO_SNDBUF 7 25#define SO_SNDBUF 7
26#define SO_RCVBUF 8 26#define SO_RCVBUF 8
27#define SO_SNDBUFFORCE 32
28#define SO_RCVBUFFORCE 33
27#define SO_KEEPALIVE 9 29#define SO_KEEPALIVE 9
28#define SO_OOBINLINE 10 30#define SO_OOBINLINE 10
29#define SO_NO_CHECK 11 31#define SO_NO_CHECK 11
diff --git a/include/linux/8250_pci.h b/include/linux/8250_pci.h
index 5f3ab21b339b..3209dd46ea7d 100644
--- a/include/linux/8250_pci.h
+++ b/include/linux/8250_pci.h
@@ -1,2 +1,37 @@
1int pci_siig10x_fn(struct pci_dev *dev, int enable); 1/*
2int pci_siig20x_fn(struct pci_dev *dev, int enable); 2 * Definitions for PCI support.
3 */
4#define FL_BASE_MASK 0x0007
5#define FL_BASE0 0x0000
6#define FL_BASE1 0x0001
7#define FL_BASE2 0x0002
8#define FL_BASE3 0x0003
9#define FL_BASE4 0x0004
10#define FL_GET_BASE(x) (x & FL_BASE_MASK)
11
12/* Use successive BARs (PCI base address registers),
13 else use offset into some specified BAR */
14#define FL_BASE_BARS 0x0008
15
16/* do not assign an irq */
17#define FL_NOIRQ 0x0080
18
19/* Use the Base address register size to cap number of ports */
20#define FL_REGION_SZ_CAP 0x0100
21
22struct pciserial_board {
23 unsigned int flags;
24 unsigned int num_ports;
25 unsigned int base_baud;
26 unsigned int uart_offset;
27 unsigned int reg_shift;
28 unsigned int first_offset;
29};
30
31struct serial_private;
32
33struct serial_private *
34pciserial_init_ports(struct pci_dev *dev, struct pciserial_board *board);
35void pciserial_remove_ports(struct serial_private *priv);
36void pciserial_suspend_ports(struct serial_private *priv);
37void pciserial_resume_ports(struct serial_private *priv);
diff --git a/include/linux/ata.h b/include/linux/ata.h
index ca5fcadf9981..a5b74efab067 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -1,24 +1,29 @@
1 1
2/* 2/*
3 Copyright 2003-2004 Red Hat, Inc. All rights reserved. 3 * Copyright 2003-2004 Red Hat, Inc. All rights reserved.
4 Copyright 2003-2004 Jeff Garzik 4 * Copyright 2003-2004 Jeff Garzik
5 5 *
6 The contents of this file are subject to the Open 6 *
7 Software License version 1.1 that can be found at 7 * This program is free software; you can redistribute it and/or modify
8 http://www.opensource.org/licenses/osl-1.1.txt and is included herein 8 * it under the terms of the GNU General Public License as published by
9 by reference. 9 * the Free Software Foundation; either version 2, or (at your option)
10 10 * any later version.
11 Alternatively, the contents of this file may be used under the terms 11 *
12 of the GNU General Public License version 2 (the "GPL") as distributed 12 * This program is distributed in the hope that it will be useful,
13 in the kernel source COPYING file, in which case the provisions of 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 the GPL are applicable instead of the above. If you wish to allow 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 the use of your version of this file only under the terms of the 15 * GNU General Public License for more details.
16 GPL and not to allow others to use your version of this file under 16 *
17 the OSL, indicate your decision by deleting the provisions above and 17 * You should have received a copy of the GNU General Public License
18 replace them with the notice and other provisions required by the GPL. 18 * along with this program; see the file COPYING. If not, write to
19 If you do not delete the provisions above, a recipient may use your 19 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
20 version of this file under either the OSL or the GPL. 20 *
21 21 *
22 * libata documentation is available via 'make {ps|pdf}docs',
23 * as Documentation/DocBook/libata.*
24 *
25 * Hardware documentation available from http://www.t13.org/
26 *
22 */ 27 */
23 28
24#ifndef __LINUX_ATA_H__ 29#ifndef __LINUX_ATA_H__
@@ -108,6 +113,8 @@ enum {
108 113
109 /* ATA device commands */ 114 /* ATA device commands */
110 ATA_CMD_CHK_POWER = 0xE5, /* check power mode */ 115 ATA_CMD_CHK_POWER = 0xE5, /* check power mode */
116 ATA_CMD_STANDBY = 0xE2, /* place in standby power mode */
117 ATA_CMD_IDLE = 0xE3, /* place in idle power mode */
111 ATA_CMD_EDD = 0x90, /* execute device diagnostic */ 118 ATA_CMD_EDD = 0x90, /* execute device diagnostic */
112 ATA_CMD_FLUSH = 0xE7, 119 ATA_CMD_FLUSH = 0xE7,
113 ATA_CMD_FLUSH_EXT = 0xEA, 120 ATA_CMD_FLUSH_EXT = 0xEA,
diff --git a/include/linux/dccp.h b/include/linux/dccp.h
new file mode 100644
index 000000000000..007c290f74d4
--- /dev/null
+++ b/include/linux/dccp.h
@@ -0,0 +1,456 @@
1#ifndef _LINUX_DCCP_H
2#define _LINUX_DCCP_H
3
4#include <linux/types.h>
5#include <asm/byteorder.h>
6
7/* Structure describing an Internet (DCCP) socket address. */
8struct sockaddr_dccp {
9 __u16 sdccp_family; /* Address family */
10 __u16 sdccp_port; /* Port number */
11 __u32 sdccp_addr; /* Internet address */
12 __u32 sdccp_service; /* Service */
13 /* Pad to size of `struct sockaddr': 16 bytes . */
14 __u32 sdccp_pad;
15};
16
17/**
18 * struct dccp_hdr - generic part of DCCP packet header
19 *
20 * @dccph_sport - Relevant port on the endpoint that sent this packet
21 * @dccph_dport - Relevant port on the other endpoint
22 * @dccph_doff - Data Offset from the start of the DCCP header, in 32-bit words
23 * @dccph_ccval - Used by the HC-Sender CCID
24 * @dccph_cscov - Parts of the packet that are covered by the Checksum field
25 * @dccph_checksum - Internet checksum, depends on dccph_cscov
26 * @dccph_x - 0 = 24 bit sequence number, 1 = 48
27 * @dccph_type - packet type, see DCCP_PKT_ prefixed macros
28 * @dccph_seq - sequence number high or low order 24 bits, depends on dccph_x
29 */
30struct dccp_hdr {
31 __u16 dccph_sport,
32 dccph_dport;
33 __u8 dccph_doff;
34#if defined(__LITTLE_ENDIAN_BITFIELD)
35 __u8 dccph_cscov:4,
36 dccph_ccval:4;
37#elif defined(__BIG_ENDIAN_BITFIELD)
38 __u8 dccph_ccval:4,
39 dccph_cscov:4;
40#else
41#error "Adjust your <asm/byteorder.h> defines"
42#endif
43 __u16 dccph_checksum;
44#if defined(__LITTLE_ENDIAN_BITFIELD)
45 __u32 dccph_x:1,
46 dccph_type:4,
47 dccph_reserved:3,
48 dccph_seq:24;
49#elif defined(__BIG_ENDIAN_BITFIELD)
50 __u32 dccph_reserved:3,
51 dccph_type:4,
52 dccph_x:1,
53 dccph_seq:24;
54#else
55#error "Adjust your <asm/byteorder.h> defines"
56#endif
57};
58
59/**
60 * struct dccp_hdr_ext - the low bits of a 48 bit seq packet
61 *
62 * @dccph_seq_low - low 24 bits of a 48 bit seq packet
63 */
64struct dccp_hdr_ext {
65 __u32 dccph_seq_low;
66};
67
68/**
69 * struct dccp_hdr_request - Conection initiation request header
70 *
71 * @dccph_req_service - Service to which the client app wants to connect
72 * @dccph_req_options - list of options (must be a multiple of 32 bits
73 */
74struct dccp_hdr_request {
75 __u32 dccph_req_service;
76};
77/**
78 * struct dccp_hdr_ack_bits - acknowledgment bits common to most packets
79 *
80 * @dccph_resp_ack_nr_high - 48 bit ack number high order bits, contains GSR
81 * @dccph_resp_ack_nr_low - 48 bit ack number low order bits, contains GSR
82 */
83struct dccp_hdr_ack_bits {
84 __u32 dccph_reserved1:8,
85 dccph_ack_nr_high:24;
86 __u32 dccph_ack_nr_low;
87};
88/**
89 * struct dccp_hdr_response - Conection initiation response header
90 *
91 * @dccph_resp_ack_nr_high - 48 bit ack number high order bits, contains GSR
92 * @dccph_resp_ack_nr_low - 48 bit ack number low order bits, contains GSR
93 * @dccph_resp_service - Echoes the Service Code on a received DCCP-Request
94 * @dccph_resp_options - list of options (must be a multiple of 32 bits
95 */
96struct dccp_hdr_response {
97 struct dccp_hdr_ack_bits dccph_resp_ack;
98 __u32 dccph_resp_service;
99};
100
101/**
102 * struct dccp_hdr_reset - Unconditionally shut down a connection
103 *
104 * @dccph_reset_service - Echoes the Service Code on a received DCCP-Request
105 * @dccph_reset_options - list of options (must be a multiple of 32 bits
106 */
107struct dccp_hdr_reset {
108 struct dccp_hdr_ack_bits dccph_reset_ack;
109 __u8 dccph_reset_code,
110 dccph_reset_data[3];
111};
112
113enum dccp_pkt_type {
114 DCCP_PKT_REQUEST = 0,
115 DCCP_PKT_RESPONSE,
116 DCCP_PKT_DATA,
117 DCCP_PKT_ACK,
118 DCCP_PKT_DATAACK,
119 DCCP_PKT_CLOSEREQ,
120 DCCP_PKT_CLOSE,
121 DCCP_PKT_RESET,
122 DCCP_PKT_SYNC,
123 DCCP_PKT_SYNCACK,
124 DCCP_PKT_INVALID,
125};
126
127#define DCCP_NR_PKT_TYPES DCCP_PKT_INVALID
128
129static inline unsigned int dccp_packet_hdr_len(const __u8 type)
130{
131 if (type == DCCP_PKT_DATA)
132 return 0;
133 if (type == DCCP_PKT_DATAACK ||
134 type == DCCP_PKT_ACK ||
135 type == DCCP_PKT_SYNC ||
136 type == DCCP_PKT_SYNCACK ||
137 type == DCCP_PKT_CLOSE ||
138 type == DCCP_PKT_CLOSEREQ)
139 return sizeof(struct dccp_hdr_ack_bits);
140 if (type == DCCP_PKT_REQUEST)
141 return sizeof(struct dccp_hdr_request);
142 if (type == DCCP_PKT_RESPONSE)
143 return sizeof(struct dccp_hdr_response);
144 return sizeof(struct dccp_hdr_reset);
145}
146enum dccp_reset_codes {
147 DCCP_RESET_CODE_UNSPECIFIED = 0,
148 DCCP_RESET_CODE_CLOSED,
149 DCCP_RESET_CODE_ABORTED,
150 DCCP_RESET_CODE_NO_CONNECTION,
151 DCCP_RESET_CODE_PACKET_ERROR,
152 DCCP_RESET_CODE_OPTION_ERROR,
153 DCCP_RESET_CODE_MANDATORY_ERROR,
154 DCCP_RESET_CODE_CONNECTION_REFUSED,
155 DCCP_RESET_CODE_BAD_SERVICE_CODE,
156 DCCP_RESET_CODE_TOO_BUSY,
157 DCCP_RESET_CODE_BAD_INIT_COOKIE,
158 DCCP_RESET_CODE_AGGRESSION_PENALTY,
159};
160
161/* DCCP options */
162enum {
163 DCCPO_PADDING = 0,
164 DCCPO_MANDATORY = 1,
165 DCCPO_MIN_RESERVED = 3,
166 DCCPO_MAX_RESERVED = 31,
167 DCCPO_NDP_COUNT = 37,
168 DCCPO_ACK_VECTOR_0 = 38,
169 DCCPO_ACK_VECTOR_1 = 39,
170 DCCPO_TIMESTAMP = 41,
171 DCCPO_TIMESTAMP_ECHO = 42,
172 DCCPO_ELAPSED_TIME = 43,
173 DCCPO_MAX = 45,
174 DCCPO_MIN_CCID_SPECIFIC = 128,
175 DCCPO_MAX_CCID_SPECIFIC = 255,
176};
177
178/* DCCP features */
179enum {
180 DCCPF_RESERVED = 0,
181 DCCPF_SEQUENCE_WINDOW = 3,
182 DCCPF_SEND_ACK_VECTOR = 6,
183 DCCPF_SEND_NDP_COUNT = 7,
184 /* 10-127 reserved */
185 DCCPF_MIN_CCID_SPECIFIC = 128,
186 DCCPF_MAX_CCID_SPECIFIC = 255,
187};
188
189/* DCCP socket options */
190#define DCCP_SOCKOPT_PACKET_SIZE 1
191
192#ifdef __KERNEL__
193
194#include <linux/in.h>
195#include <linux/list.h>
196#include <linux/uio.h>
197#include <linux/workqueue.h>
198
199#include <net/inet_connection_sock.h>
200#include <net/inet_timewait_sock.h>
201#include <net/sock.h>
202#include <net/tcp_states.h>
203#include <net/tcp.h>
204
205enum dccp_state {
206 DCCP_OPEN = TCP_ESTABLISHED,
207 DCCP_REQUESTING = TCP_SYN_SENT,
208 DCCP_PARTOPEN = TCP_FIN_WAIT1, /* FIXME:
209 This mapping is horrible, but TCP has
210 no matching state for DCCP_PARTOPEN,
211 as TCP_SYN_RECV is already used by
212 DCCP_RESPOND, why don't stop using TCP
213 mapping of states? OK, now we don't use
214 sk_stream_sendmsg anymore, so doesn't
215 seem to exist any reason for us to
216 do the TCP mapping here */
217 DCCP_LISTEN = TCP_LISTEN,
218 DCCP_RESPOND = TCP_SYN_RECV,
219 DCCP_CLOSING = TCP_CLOSING,
220 DCCP_TIME_WAIT = TCP_TIME_WAIT,
221 DCCP_CLOSED = TCP_CLOSE,
222 DCCP_MAX_STATES = TCP_MAX_STATES,
223};
224
225#define DCCP_STATE_MASK 0xf
226#define DCCP_ACTION_FIN (1<<7)
227
228enum {
229 DCCPF_OPEN = TCPF_ESTABLISHED,
230 DCCPF_REQUESTING = TCPF_SYN_SENT,
231 DCCPF_PARTOPEN = TCPF_FIN_WAIT1,
232 DCCPF_LISTEN = TCPF_LISTEN,
233 DCCPF_RESPOND = TCPF_SYN_RECV,
234 DCCPF_CLOSING = TCPF_CLOSING,
235 DCCPF_TIME_WAIT = TCPF_TIME_WAIT,
236 DCCPF_CLOSED = TCPF_CLOSE,
237};
238
239static inline struct dccp_hdr *dccp_hdr(const struct sk_buff *skb)
240{
241 return (struct dccp_hdr *)skb->h.raw;
242}
243
244static inline struct dccp_hdr_ext *dccp_hdrx(const struct sk_buff *skb)
245{
246 return (struct dccp_hdr_ext *)(skb->h.raw + sizeof(struct dccp_hdr));
247}
248
249static inline unsigned int __dccp_basic_hdr_len(const struct dccp_hdr *dh)
250{
251 return sizeof(*dh) + (dh->dccph_x ? sizeof(struct dccp_hdr_ext) : 0);
252}
253
254static inline unsigned int dccp_basic_hdr_len(const struct sk_buff *skb)
255{
256 const struct dccp_hdr *dh = dccp_hdr(skb);
257 return __dccp_basic_hdr_len(dh);
258}
259
260static inline __u64 dccp_hdr_seq(const struct sk_buff *skb)
261{
262 const struct dccp_hdr *dh = dccp_hdr(skb);
263#if defined(__LITTLE_ENDIAN_BITFIELD)
264 __u64 seq_nr = ntohl(dh->dccph_seq << 8);
265#elif defined(__BIG_ENDIAN_BITFIELD)
266 __u64 seq_nr = ntohl(dh->dccph_seq);
267#else
268#error "Adjust your <asm/byteorder.h> defines"
269#endif
270
271 if (dh->dccph_x != 0)
272 seq_nr = (seq_nr << 32) + ntohl(dccp_hdrx(skb)->dccph_seq_low);
273
274 return seq_nr;
275}
276
277static inline struct dccp_hdr_request *dccp_hdr_request(struct sk_buff *skb)
278{
279 return (struct dccp_hdr_request *)(skb->h.raw + dccp_basic_hdr_len(skb));
280}
281
282static inline struct dccp_hdr_ack_bits *dccp_hdr_ack_bits(const struct sk_buff *skb)
283{
284 return (struct dccp_hdr_ack_bits *)(skb->h.raw + dccp_basic_hdr_len(skb));
285}
286
287static inline u64 dccp_hdr_ack_seq(const struct sk_buff *skb)
288{
289 const struct dccp_hdr_ack_bits *dhack = dccp_hdr_ack_bits(skb);
290#if defined(__LITTLE_ENDIAN_BITFIELD)
291 return (((u64)ntohl(dhack->dccph_ack_nr_high << 8)) << 32) + ntohl(dhack->dccph_ack_nr_low);
292#elif defined(__BIG_ENDIAN_BITFIELD)
293 return (((u64)ntohl(dhack->dccph_ack_nr_high)) << 32) + ntohl(dhack->dccph_ack_nr_low);
294#else
295#error "Adjust your <asm/byteorder.h> defines"
296#endif
297}
298
299static inline struct dccp_hdr_response *dccp_hdr_response(struct sk_buff *skb)
300{
301 return (struct dccp_hdr_response *)(skb->h.raw + dccp_basic_hdr_len(skb));
302}
303
304static inline struct dccp_hdr_reset *dccp_hdr_reset(struct sk_buff *skb)
305{
306 return (struct dccp_hdr_reset *)(skb->h.raw + dccp_basic_hdr_len(skb));
307}
308
309static inline unsigned int __dccp_hdr_len(const struct dccp_hdr *dh)
310{
311 return __dccp_basic_hdr_len(dh) +
312 dccp_packet_hdr_len(dh->dccph_type);
313}
314
315static inline unsigned int dccp_hdr_len(const struct sk_buff *skb)
316{
317 return __dccp_hdr_len(dccp_hdr(skb));
318}
319
320
321/* initial values for each feature */
322#define DCCPF_INITIAL_SEQUENCE_WINDOW 100
323/* FIXME: for now we're using CCID 3 (TFRC) */
324#define DCCPF_INITIAL_CCID 3
325#define DCCPF_INITIAL_SEND_ACK_VECTOR 0
326/* FIXME: for now we're default to 1 but it should really be 0 */
327#define DCCPF_INITIAL_SEND_NDP_COUNT 1
328
329#define DCCP_NDP_LIMIT 0xFFFFFF
330
331/**
332 * struct dccp_options - option values for a DCCP connection
333 * @dccpo_sequence_window - Sequence Window Feature (section 7.5.2)
334 * @dccpo_ccid - Congestion Control Id (CCID) (section 10)
335 * @dccpo_send_ack_vector - Send Ack Vector Feature (section 11.5)
336 * @dccpo_send_ndp_count - Send NDP Count Feature (7.7.2)
337 */
338struct dccp_options {
339 __u64 dccpo_sequence_window;
340 __u8 dccpo_ccid;
341 __u8 dccpo_send_ack_vector;
342 __u8 dccpo_send_ndp_count;
343};
344
345extern void __dccp_options_init(struct dccp_options *dccpo);
346extern void dccp_options_init(struct dccp_options *dccpo);
347extern int dccp_parse_options(struct sock *sk, struct sk_buff *skb);
348
349struct dccp_request_sock {
350 struct inet_request_sock dreq_inet_rsk;
351 __u64 dreq_iss;
352 __u64 dreq_isr;
353 __u32 dreq_service;
354};
355
356static inline struct dccp_request_sock *dccp_rsk(const struct request_sock *req)
357{
358 return (struct dccp_request_sock *)req;
359}
360
361extern struct inet_timewait_death_row dccp_death_row;
362
363/* Read about the ECN nonce to see why it is 253 */
364#define DCCP_MAX_ACK_VECTOR_LEN 253
365
366struct dccp_options_received {
367 u32 dccpor_ndp:24,
368 dccpor_ack_vector_len:8;
369 u32 dccpor_ack_vector_idx:10;
370 /* 22 bits hole, try to pack */
371 u32 dccpor_timestamp;
372 u32 dccpor_timestamp_echo;
373 u32 dccpor_elapsed_time;
374};
375
376struct ccid;
377
378enum dccp_role {
379 DCCP_ROLE_UNDEFINED,
380 DCCP_ROLE_LISTEN,
381 DCCP_ROLE_CLIENT,
382 DCCP_ROLE_SERVER,
383};
384
385/**
386 * struct dccp_sock - DCCP socket state
387 *
388 * @dccps_swl - sequence number window low
389 * @dccps_swh - sequence number window high
390 * @dccps_awl - acknowledgement number window low
391 * @dccps_awh - acknowledgement number window high
392 * @dccps_iss - initial sequence number sent
393 * @dccps_isr - initial sequence number received
394 * @dccps_osr - first OPEN sequence number received
395 * @dccps_gss - greatest sequence number sent
396 * @dccps_gsr - greatest valid sequence number received
397 * @dccps_gar - greatest valid ack number received on a non-Sync; initialized to %dccps_iss
398 * @dccps_timestamp_time - time of latest TIMESTAMP option
399 * @dccps_timestamp_echo - latest timestamp received on a TIMESTAMP option
400 * @dccps_ext_header_len - network protocol overhead (IP/IPv6 options)
401 * @dccps_pmtu_cookie - Last pmtu seen by socket
402 * @dccps_packet_size - Set thru setsockopt
403 * @dccps_role - Role of this sock, one of %dccp_role
404 * @dccps_ndp_count - number of Non Data Packets since last data packet
405 * @dccps_hc_rx_ackpkts - receiver half connection acked packets
406 */
407struct dccp_sock {
408 /* inet_connection_sock has to be the first member of dccp_sock */
409 struct inet_connection_sock dccps_inet_connection;
410 __u64 dccps_swl;
411 __u64 dccps_swh;
412 __u64 dccps_awl;
413 __u64 dccps_awh;
414 __u64 dccps_iss;
415 __u64 dccps_isr;
416 __u64 dccps_osr;
417 __u64 dccps_gss;
418 __u64 dccps_gsr;
419 __u64 dccps_gar;
420 unsigned long dccps_service;
421 struct timeval dccps_timestamp_time;
422 __u32 dccps_timestamp_echo;
423 __u32 dccps_packet_size;
424 unsigned long dccps_ndp_count;
425 __u16 dccps_ext_header_len;
426 __u32 dccps_pmtu_cookie;
427 __u32 dccps_mss_cache;
428 struct dccp_options dccps_options;
429 struct dccp_ackpkts *dccps_hc_rx_ackpkts;
430 void *dccps_hc_rx_ccid_private;
431 void *dccps_hc_tx_ccid_private;
432 struct ccid *dccps_hc_rx_ccid;
433 struct ccid *dccps_hc_tx_ccid;
434 struct dccp_options_received dccps_options_received;
435 enum dccp_role dccps_role:2;
436};
437
438static inline struct dccp_sock *dccp_sk(const struct sock *sk)
439{
440 return (struct dccp_sock *)sk;
441}
442
443static inline const char *dccp_role(const struct sock *sk)
444{
445 switch (dccp_sk(sk)->dccps_role) {
446 case DCCP_ROLE_UNDEFINED: return "undefined";
447 case DCCP_ROLE_LISTEN: return "listen";
448 case DCCP_ROLE_SERVER: return "server";
449 case DCCP_ROLE_CLIENT: return "client";
450 }
451 return NULL;
452}
453
454#endif /* __KERNEL__ */
455
456#endif /* _LINUX_DCCP_H */
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index a0ab26aab450..ed1440ea4c91 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -250,6 +250,12 @@ struct ethtool_stats {
250 u64 data[0]; 250 u64 data[0];
251}; 251};
252 252
253struct ethtool_perm_addr {
254 u32 cmd; /* ETHTOOL_GPERMADDR */
255 u32 size;
256 u8 data[0];
257};
258
253struct net_device; 259struct net_device;
254 260
255/* Some generic methods drivers may use in their ethtool_ops */ 261/* Some generic methods drivers may use in their ethtool_ops */
@@ -261,6 +267,8 @@ u32 ethtool_op_get_sg(struct net_device *dev);
261int ethtool_op_set_sg(struct net_device *dev, u32 data); 267int ethtool_op_set_sg(struct net_device *dev, u32 data);
262u32 ethtool_op_get_tso(struct net_device *dev); 268u32 ethtool_op_get_tso(struct net_device *dev);
263int ethtool_op_set_tso(struct net_device *dev, u32 data); 269int ethtool_op_set_tso(struct net_device *dev, u32 data);
270int ethtool_op_get_perm_addr(struct net_device *dev,
271 struct ethtool_perm_addr *addr, u8 *data);
264 272
265/** 273/**
266 * &ethtool_ops - Alter and report network device settings 274 * &ethtool_ops - Alter and report network device settings
@@ -294,7 +302,8 @@ int ethtool_op_set_tso(struct net_device *dev, u32 data);
294 * get_strings: Return a set of strings that describe the requested objects 302 * get_strings: Return a set of strings that describe the requested objects
295 * phys_id: Identify the device 303 * phys_id: Identify the device
296 * get_stats: Return statistics about the device 304 * get_stats: Return statistics about the device
297 * 305 * get_perm_addr: Gets the permanent hardware address
306 *
298 * Description: 307 * Description:
299 * 308 *
300 * get_settings: 309 * get_settings:
@@ -352,6 +361,7 @@ struct ethtool_ops {
352 int (*phys_id)(struct net_device *, u32); 361 int (*phys_id)(struct net_device *, u32);
353 int (*get_stats_count)(struct net_device *); 362 int (*get_stats_count)(struct net_device *);
354 void (*get_ethtool_stats)(struct net_device *, struct ethtool_stats *, u64 *); 363 void (*get_ethtool_stats)(struct net_device *, struct ethtool_stats *, u64 *);
364 int (*get_perm_addr)(struct net_device *, struct ethtool_perm_addr *, u8 *);
355 int (*begin)(struct net_device *); 365 int (*begin)(struct net_device *);
356 void (*complete)(struct net_device *); 366 void (*complete)(struct net_device *);
357}; 367};
@@ -389,6 +399,7 @@ struct ethtool_ops {
389#define ETHTOOL_GSTATS 0x0000001d /* get NIC-specific statistics */ 399#define ETHTOOL_GSTATS 0x0000001d /* get NIC-specific statistics */
390#define ETHTOOL_GTSO 0x0000001e /* Get TSO enable (ethtool_value) */ 400#define ETHTOOL_GTSO 0x0000001e /* Get TSO enable (ethtool_value) */
391#define ETHTOOL_STSO 0x0000001f /* Set TSO enable (ethtool_value) */ 401#define ETHTOOL_STSO 0x0000001f /* Set TSO enable (ethtool_value) */
402#define ETHTOOL_GPERMADDR 0x00000020 /* Get permanent hardware address */
392 403
393/* compatibility with older code */ 404/* compatibility with older code */
394#define SPARC_ETH_GSET ETHTOOL_GSET 405#define SPARC_ETH_GSET ETHTOOL_GSET
@@ -408,6 +419,8 @@ struct ethtool_ops {
408#define SUPPORTED_FIBRE (1 << 10) 419#define SUPPORTED_FIBRE (1 << 10)
409#define SUPPORTED_BNC (1 << 11) 420#define SUPPORTED_BNC (1 << 11)
410#define SUPPORTED_10000baseT_Full (1 << 12) 421#define SUPPORTED_10000baseT_Full (1 << 12)
422#define SUPPORTED_Pause (1 << 13)
423#define SUPPORTED_Asym_Pause (1 << 14)
411 424
412/* Indicates what features are advertised by the interface. */ 425/* Indicates what features are advertised by the interface. */
413#define ADVERTISED_10baseT_Half (1 << 0) 426#define ADVERTISED_10baseT_Half (1 << 0)
@@ -423,6 +436,8 @@ struct ethtool_ops {
423#define ADVERTISED_FIBRE (1 << 10) 436#define ADVERTISED_FIBRE (1 << 10)
424#define ADVERTISED_BNC (1 << 11) 437#define ADVERTISED_BNC (1 << 11)
425#define ADVERTISED_10000baseT_Full (1 << 12) 438#define ADVERTISED_10000baseT_Full (1 << 12)
439#define ADVERTISED_Pause (1 << 13)
440#define ADVERTISED_Asym_Pause (1 << 14)
426 441
427/* The following are all involved in forcing a particular link 442/* The following are all involved in forcing a particular link
428 * mode for the device for setting things. When getting the 443 * mode for the device for setting things. When getting the
diff --git a/include/linux/hippidevice.h b/include/linux/hippidevice.h
index 9debe6bbe5f0..bab303dafd6e 100644
--- a/include/linux/hippidevice.h
+++ b/include/linux/hippidevice.h
@@ -26,8 +26,12 @@
26#include <linux/if_hippi.h> 26#include <linux/if_hippi.h>
27 27
28#ifdef __KERNEL__ 28#ifdef __KERNEL__
29extern unsigned short hippi_type_trans(struct sk_buff *skb, 29
30 struct net_device *dev); 30struct hippi_cb {
31 __u32 ifield;
32};
33
34extern __be16 hippi_type_trans(struct sk_buff *skb, struct net_device *dev);
31 35
32extern struct net_device *alloc_hippi_dev(int sizeof_priv); 36extern struct net_device *alloc_hippi_dev(int sizeof_priv);
33#endif 37#endif
diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h
index b5b58e9c054c..fc2d4c8225aa 100644
--- a/include/linux/if_ether.h
+++ b/include/linux/if_ether.h
@@ -110,6 +110,8 @@ static inline struct ethhdr *eth_hdr(const struct sk_buff *skb)
110{ 110{
111 return (struct ethhdr *)skb->mac.raw; 111 return (struct ethhdr *)skb->mac.raw;
112} 112}
113
114extern struct ctl_table ether_table[];
113#endif 115#endif
114 116
115#endif /* _LINUX_IF_ETHER_H */ 117#endif /* _LINUX_IF_ETHER_H */
diff --git a/include/linux/if_fc.h b/include/linux/if_fc.h
index 33330b458b95..376a34ea4723 100644
--- a/include/linux/if_fc.h
+++ b/include/linux/if_fc.h
@@ -44,7 +44,7 @@ struct fcllc {
44 __u8 ssap; /* source SAP */ 44 __u8 ssap; /* source SAP */
45 __u8 llc; /* LLC control field */ 45 __u8 llc; /* LLC control field */
46 __u8 protid[3]; /* protocol id */ 46 __u8 protid[3]; /* protocol id */
47 __u16 ethertype; /* ether type field */ 47 __be16 ethertype; /* ether type field */
48}; 48};
49 49
50#endif /* _LINUX_IF_FC_H */ 50#endif /* _LINUX_IF_FC_H */
diff --git a/include/linux/if_fddi.h b/include/linux/if_fddi.h
index a912818e6361..1288a161bc0b 100644
--- a/include/linux/if_fddi.h
+++ b/include/linux/if_fddi.h
@@ -85,7 +85,7 @@ struct fddi_snap_hdr
85 __u8 ssap; /* always 0xAA */ 85 __u8 ssap; /* always 0xAA */
86 __u8 ctrl; /* always 0x03 */ 86 __u8 ctrl; /* always 0x03 */
87 __u8 oui[FDDI_K_OUI_LEN]; /* organizational universal id */ 87 __u8 oui[FDDI_K_OUI_LEN]; /* organizational universal id */
88 __u16 ethertype; /* packet type ID field */ 88 __be16 ethertype; /* packet type ID field */
89 } __attribute__ ((packed)); 89 } __attribute__ ((packed));
90 90
91/* Define FDDI LLC frame header */ 91/* Define FDDI LLC frame header */
diff --git a/include/linux/if_frad.h b/include/linux/if_frad.h
index 3c94b1736570..511999c7eeda 100644
--- a/include/linux/if_frad.h
+++ b/include/linux/if_frad.h
@@ -191,10 +191,12 @@ struct frad_local
191 int buffer; /* current buffer for S508 firmware */ 191 int buffer; /* current buffer for S508 firmware */
192}; 192};
193 193
194extern void dlci_ioctl_set(int (*hook)(unsigned int, void __user *));
195
196#endif /* __KERNEL__ */ 194#endif /* __KERNEL__ */
197 195
198#endif /* CONFIG_DLCI || CONFIG_DLCI_MODULE */ 196#endif /* CONFIG_DLCI || CONFIG_DLCI_MODULE */
199 197
198#ifdef __KERNEL__
199extern void dlci_ioctl_set(int (*hook)(unsigned int, void __user *));
200#endif
201
200#endif 202#endif
diff --git a/include/linux/if_hippi.h b/include/linux/if_hippi.h
index c8ca72c46f76..94d31ca7d71a 100644
--- a/include/linux/if_hippi.h
+++ b/include/linux/if_hippi.h
@@ -102,9 +102,9 @@ struct hippi_fp_hdr
102#error "Please fix <asm/byteorder.h>" 102#error "Please fix <asm/byteorder.h>"
103#endif 103#endif
104#else 104#else
105 __u32 fixed; 105 __be32 fixed;
106#endif 106#endif
107 __u32 d2_size; 107 __be32 d2_size;
108} __attribute__ ((packed)); 108} __attribute__ ((packed));
109 109
110struct hippi_le_hdr 110struct hippi_le_hdr
@@ -144,7 +144,7 @@ struct hippi_snap_hdr
144 __u8 ssap; /* always 0xAA */ 144 __u8 ssap; /* always 0xAA */
145 __u8 ctrl; /* always 0x03 */ 145 __u8 ctrl; /* always 0x03 */
146 __u8 oui[HIPPI_OUI_LEN]; /* organizational universal id (zero)*/ 146 __u8 oui[HIPPI_OUI_LEN]; /* organizational universal id (zero)*/
147 __u16 ethertype; /* packet type ID field */ 147 __be16 ethertype; /* packet type ID field */
148} __attribute__ ((packed)); 148} __attribute__ ((packed));
149 149
150struct hippi_hdr 150struct hippi_hdr
diff --git a/include/linux/if_tr.h b/include/linux/if_tr.h
index 3fba9e2f5427..5502f597cf0e 100644
--- a/include/linux/if_tr.h
+++ b/include/linux/if_tr.h
@@ -43,12 +43,16 @@ struct trh_hdr {
43}; 43};
44 44
45#ifdef __KERNEL__ 45#ifdef __KERNEL__
46#include <linux/config.h>
46#include <linux/skbuff.h> 47#include <linux/skbuff.h>
47 48
48static inline struct trh_hdr *tr_hdr(const struct sk_buff *skb) 49static inline struct trh_hdr *tr_hdr(const struct sk_buff *skb)
49{ 50{
50 return (struct trh_hdr *)skb->mac.raw; 51 return (struct trh_hdr *)skb->mac.raw;
51} 52}
53#ifdef CONFIG_SYSCTL
54extern struct ctl_table tr_table[];
55#endif
52#endif 56#endif
53 57
54/* This is an Token-Ring LLC structure */ 58/* This is an Token-Ring LLC structure */
diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 62a9d89dfbe2..17d0c0d40b0e 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -155,7 +155,6 @@ static inline int __vlan_hwaccel_rx(struct sk_buff *skb,
155{ 155{
156 struct net_device_stats *stats; 156 struct net_device_stats *stats;
157 157
158 skb->real_dev = skb->dev;
159 skb->dev = grp->vlan_devices[vlan_tag & VLAN_VID_MASK]; 158 skb->dev = grp->vlan_devices[vlan_tag & VLAN_VID_MASK];
160 if (skb->dev == NULL) { 159 if (skb->dev == NULL) {
161 dev_kfree_skb_any(skb); 160 dev_kfree_skb_any(skb);
diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index 0c31ef0b5bad..28f4f3b36950 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -129,6 +129,9 @@ struct igmpv3_query {
129#include <linux/skbuff.h> 129#include <linux/skbuff.h>
130#include <linux/in.h> 130#include <linux/in.h>
131 131
132extern int sysctl_igmp_max_memberships;
133extern int sysctl_igmp_max_msf;
134
132struct ip_sf_socklist 135struct ip_sf_socklist
133{ 136{
134 unsigned int sl_max; 137 unsigned int sl_max;
diff --git a/include/linux/in.h b/include/linux/in.h
index fb88c66d748d..ba355384016a 100644
--- a/include/linux/in.h
+++ b/include/linux/in.h
@@ -32,6 +32,7 @@ enum {
32 IPPROTO_PUP = 12, /* PUP protocol */ 32 IPPROTO_PUP = 12, /* PUP protocol */
33 IPPROTO_UDP = 17, /* User Datagram Protocol */ 33 IPPROTO_UDP = 17, /* User Datagram Protocol */
34 IPPROTO_IDP = 22, /* XNS IDP protocol */ 34 IPPROTO_IDP = 22, /* XNS IDP protocol */
35 IPPROTO_DCCP = 33, /* Datagram Congestion Control Protocol */
35 IPPROTO_RSVP = 46, /* RSVP protocol */ 36 IPPROTO_RSVP = 46, /* RSVP protocol */
36 IPPROTO_GRE = 47, /* Cisco GRE tunnels (rfc 1701,1702) */ 37 IPPROTO_GRE = 47, /* Cisco GRE tunnels (rfc 1701,1702) */
37 38
diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
new file mode 100644
index 000000000000..a4606e5810e5
--- /dev/null
+++ b/include/linux/inet_diag.h
@@ -0,0 +1,138 @@
1#ifndef _INET_DIAG_H_
2#define _INET_DIAG_H_ 1
3
4/* Just some random number */
5#define TCPDIAG_GETSOCK 18
6#define DCCPDIAG_GETSOCK 19
7
8#define INET_DIAG_GETSOCK_MAX 24
9
10/* Socket identity */
11struct inet_diag_sockid {
12 __u16 idiag_sport;
13 __u16 idiag_dport;
14 __u32 idiag_src[4];
15 __u32 idiag_dst[4];
16 __u32 idiag_if;
17 __u32 idiag_cookie[2];
18#define INET_DIAG_NOCOOKIE (~0U)
19};
20
21/* Request structure */
22
23struct inet_diag_req {
24 __u8 idiag_family; /* Family of addresses. */
25 __u8 idiag_src_len;
26 __u8 idiag_dst_len;
27 __u8 idiag_ext; /* Query extended information */
28
29 struct inet_diag_sockid id;
30
31 __u32 idiag_states; /* States to dump */
32 __u32 idiag_dbs; /* Tables to dump (NI) */
33};
34
35enum {
36 INET_DIAG_REQ_NONE,
37 INET_DIAG_REQ_BYTECODE,
38};
39
40#define INET_DIAG_REQ_MAX INET_DIAG_REQ_BYTECODE
41
42/* Bytecode is sequence of 4 byte commands followed by variable arguments.
43 * All the commands identified by "code" are conditional jumps forward:
44 * to offset cc+"yes" or to offset cc+"no". "yes" is supposed to be
45 * length of the command and its arguments.
46 */
47
48struct inet_diag_bc_op {
49 unsigned char code;
50 unsigned char yes;
51 unsigned short no;
52};
53
54enum {
55 INET_DIAG_BC_NOP,
56 INET_DIAG_BC_JMP,
57 INET_DIAG_BC_S_GE,
58 INET_DIAG_BC_S_LE,
59 INET_DIAG_BC_D_GE,
60 INET_DIAG_BC_D_LE,
61 INET_DIAG_BC_AUTO,
62 INET_DIAG_BC_S_COND,
63 INET_DIAG_BC_D_COND,
64};
65
66struct inet_diag_hostcond {
67 __u8 family;
68 __u8 prefix_len;
69 int port;
70 __u32 addr[0];
71};
72
73/* Base info structure. It contains socket identity (addrs/ports/cookie)
74 * and, alas, the information shown by netstat. */
75struct inet_diag_msg {
76 __u8 idiag_family;
77 __u8 idiag_state;
78 __u8 idiag_timer;
79 __u8 idiag_retrans;
80
81 struct inet_diag_sockid id;
82
83 __u32 idiag_expires;
84 __u32 idiag_rqueue;
85 __u32 idiag_wqueue;
86 __u32 idiag_uid;
87 __u32 idiag_inode;
88};
89
90/* Extensions */
91
92enum {
93 INET_DIAG_NONE,
94 INET_DIAG_MEMINFO,
95 INET_DIAG_INFO,
96 INET_DIAG_VEGASINFO,
97 INET_DIAG_CONG,
98};
99
100#define INET_DIAG_MAX INET_DIAG_CONG
101
102
103/* INET_DIAG_MEM */
104
105struct inet_diag_meminfo {
106 __u32 idiag_rmem;
107 __u32 idiag_wmem;
108 __u32 idiag_fmem;
109 __u32 idiag_tmem;
110};
111
112/* INET_DIAG_VEGASINFO */
113
114struct tcpvegas_info {
115 __u32 tcpv_enabled;
116 __u32 tcpv_rttcnt;
117 __u32 tcpv_rtt;
118 __u32 tcpv_minrtt;
119};
120
121#ifdef __KERNEL__
122struct sock;
123struct inet_hashinfo;
124
125struct inet_diag_handler {
126 struct inet_hashinfo *idiag_hashinfo;
127 void (*idiag_get_info)(struct sock *sk,
128 struct inet_diag_msg *r,
129 void *info);
130 __u16 idiag_info_size;
131 __u16 idiag_type;
132};
133
134extern int inet_diag_register(const struct inet_diag_handler *handler);
135extern void inet_diag_unregister(const struct inet_diag_handler *handler);
136#endif /* __KERNEL__ */
137
138#endif /* _INET_DIAG_H_ */
diff --git a/include/linux/ip.h b/include/linux/ip.h
index 31e7cedd9f84..33e8a19a1a0f 100644
--- a/include/linux/ip.h
+++ b/include/linux/ip.h
@@ -196,6 +196,8 @@ static inline void inet_sk_copy_descendant(struct sock *sk_to,
196#endif 196#endif
197#endif 197#endif
198 198
199extern int inet_sk_rebuild_header(struct sock *sk);
200
199struct iphdr { 201struct iphdr {
200#if defined(__LITTLE_ENDIAN_BITFIELD) 202#if defined(__LITTLE_ENDIAN_BITFIELD)
201 __u8 ihl:4, 203 __u8 ihl:4,
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 6fcd6a0ade24..3c7dbc6a0a70 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -193,6 +193,11 @@ struct inet6_skb_parm {
193 193
194#define IP6CB(skb) ((struct inet6_skb_parm*)((skb)->cb)) 194#define IP6CB(skb) ((struct inet6_skb_parm*)((skb)->cb))
195 195
196static inline int inet6_iif(const struct sk_buff *skb)
197{
198 return IP6CB(skb)->iif;
199}
200
196struct tcp6_request_sock { 201struct tcp6_request_sock {
197 struct tcp_request_sock req; 202 struct tcp_request_sock req;
198 struct in6_addr loc_addr; 203 struct in6_addr loc_addr;
@@ -308,6 +313,36 @@ static inline void inet_sk_copy_descendant(struct sock *sk_to,
308 313
309#define __ipv6_only_sock(sk) (inet6_sk(sk)->ipv6only) 314#define __ipv6_only_sock(sk) (inet6_sk(sk)->ipv6only)
310#define ipv6_only_sock(sk) ((sk)->sk_family == PF_INET6 && __ipv6_only_sock(sk)) 315#define ipv6_only_sock(sk) ((sk)->sk_family == PF_INET6 && __ipv6_only_sock(sk))
316
317#include <linux/tcp.h>
318
319struct tcp6_timewait_sock {
320 struct tcp_timewait_sock tw_v6_sk;
321 struct in6_addr tw_v6_daddr;
322 struct in6_addr tw_v6_rcv_saddr;
323};
324
325static inline struct tcp6_timewait_sock *tcp6_twsk(const struct sock *sk)
326{
327 return (struct tcp6_timewait_sock *)sk;
328}
329
330static inline struct in6_addr *__tcp_v6_rcv_saddr(const struct sock *sk)
331{
332 return likely(sk->sk_state != TCP_TIME_WAIT) ?
333 &inet6_sk(sk)->rcv_saddr : &tcp6_twsk(sk)->tw_v6_rcv_saddr;
334}
335
336static inline struct in6_addr *tcp_v6_rcv_saddr(const struct sock *sk)
337{
338 return sk->sk_family == AF_INET6 ? __tcp_v6_rcv_saddr(sk) : NULL;
339}
340
341static inline int inet_v6_ipv6only(const struct sock *sk)
342{
343 return likely(sk->sk_state != TCP_TIME_WAIT) ?
344 ipv6_only_sock(sk) : inet_twsk(sk)->tw_ipv6only;
345}
311#else 346#else
312#define __ipv6_only_sock(sk) 0 347#define __ipv6_only_sock(sk) 0
313#define ipv6_only_sock(sk) 0 348#define ipv6_only_sock(sk) 0
@@ -322,8 +357,19 @@ static inline struct raw6_sock *raw6_sk(const struct sock *sk)
322 return NULL; 357 return NULL;
323} 358}
324 359
325#endif 360#define __tcp_v6_rcv_saddr(__sk) NULL
361#define tcp_v6_rcv_saddr(__sk) NULL
362#define tcp_twsk_ipv6only(__sk) 0
363#define inet_v6_ipv6only(__sk) 0
364#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
326 365
327#endif 366#define INET6_MATCH(__sk, __saddr, __daddr, __ports, __dif) \
367 (((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \
368 ((__sk)->sk_family == AF_INET6) && \
369 ipv6_addr_equal(&inet6_sk(__sk)->daddr, (__saddr)) && \
370 ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr)) && \
371 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
328 372
329#endif 373#endif /* __KERNEL__ */
374
375#endif /* _IPV6_H */
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 6cd9ba63563b..fc05a9899288 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1,23 +1,26 @@
1/* 1/*
2 Copyright 2003-2004 Red Hat, Inc. All rights reserved. 2 * Copyright 2003-2005 Red Hat, Inc. All rights reserved.
3 Copyright 2003-2004 Jeff Garzik 3 * Copyright 2003-2005 Jeff Garzik
4 4 *
5 The contents of this file are subject to the Open 5 *
6 Software License version 1.1 that can be found at 6 * This program is free software; you can redistribute it and/or modify
7 http://www.opensource.org/licenses/osl-1.1.txt and is included herein 7 * it under the terms of the GNU General Public License as published by
8 by reference. 8 * the Free Software Foundation; either version 2, or (at your option)
9 9 * any later version.
10 Alternatively, the contents of this file may be used under the terms 10 *
11 of the GNU General Public License version 2 (the "GPL") as distributed 11 * This program is distributed in the hope that it will be useful,
12 in the kernel source COPYING file, in which case the provisions of 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 the GPL are applicable instead of the above. If you wish to allow 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 the use of your version of this file only under the terms of the 14 * GNU General Public License for more details.
15 GPL and not to allow others to use your version of this file under 15 *
16 the OSL, indicate your decision by deleting the provisions above and 16 * You should have received a copy of the GNU General Public License
17 replace them with the notice and other provisions required by the GPL. 17 * along with this program; see the file COPYING. If not, write to
18 If you do not delete the provisions above, a recipient may use your 18 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
19 version of this file under either the OSL or the GPL. 19 *
20 20 *
21 * libata documentation is available via 'make {ps|pdf}docs',
22 * as Documentation/DocBook/libata.*
23 *
21 */ 24 */
22 25
23#ifndef __LINUX_LIBATA_H__ 26#ifndef __LINUX_LIBATA_H__
@@ -113,6 +116,8 @@ enum {
113 ATA_FLAG_MMIO = (1 << 6), /* use MMIO, not PIO */ 116 ATA_FLAG_MMIO = (1 << 6), /* use MMIO, not PIO */
114 ATA_FLAG_SATA_RESET = (1 << 7), /* use COMRESET */ 117 ATA_FLAG_SATA_RESET = (1 << 7), /* use COMRESET */
115 ATA_FLAG_PIO_DMA = (1 << 8), /* PIO cmds via DMA */ 118 ATA_FLAG_PIO_DMA = (1 << 8), /* PIO cmds via DMA */
119 ATA_FLAG_NOINTR = (1 << 9), /* FIXME: Remove this once
120 * proper HSM is in place. */
116 121
117 ATA_QCFLAG_ACTIVE = (1 << 1), /* cmd not yet ack'd to scsi lyer */ 122 ATA_QCFLAG_ACTIVE = (1 << 1), /* cmd not yet ack'd to scsi lyer */
118 ATA_QCFLAG_SG = (1 << 3), /* have s/g table? */ 123 ATA_QCFLAG_SG = (1 << 3), /* have s/g table? */
@@ -363,7 +368,7 @@ struct ata_port_operations {
363 368
364 void (*host_stop) (struct ata_host_set *host_set); 369 void (*host_stop) (struct ata_host_set *host_set);
365 370
366 void (*bmdma_stop) (struct ata_port *ap); 371 void (*bmdma_stop) (struct ata_queued_cmd *qc);
367 u8 (*bmdma_status) (struct ata_port *ap); 372 u8 (*bmdma_status) (struct ata_port *ap);
368}; 373};
369 374
@@ -424,7 +429,7 @@ extern void ata_dev_id_string(u16 *id, unsigned char *s,
424extern void ata_dev_config(struct ata_port *ap, unsigned int i); 429extern void ata_dev_config(struct ata_port *ap, unsigned int i);
425extern void ata_bmdma_setup (struct ata_queued_cmd *qc); 430extern void ata_bmdma_setup (struct ata_queued_cmd *qc);
426extern void ata_bmdma_start (struct ata_queued_cmd *qc); 431extern void ata_bmdma_start (struct ata_queued_cmd *qc);
427extern void ata_bmdma_stop(struct ata_port *ap); 432extern void ata_bmdma_stop(struct ata_queued_cmd *qc);
428extern u8 ata_bmdma_status(struct ata_port *ap); 433extern u8 ata_bmdma_status(struct ata_port *ap);
429extern void ata_bmdma_irq_clear(struct ata_port *ap); 434extern void ata_bmdma_irq_clear(struct ata_port *ap);
430extern void ata_qc_complete(struct ata_queued_cmd *qc, u8 drv_stat); 435extern void ata_qc_complete(struct ata_queued_cmd *qc, u8 drv_stat);
@@ -644,7 +649,7 @@ static inline void scr_write(struct ata_port *ap, unsigned int reg, u32 val)
644 ap->ops->scr_write(ap, reg, val); 649 ap->ops->scr_write(ap, reg, val);
645} 650}
646 651
647static inline void scr_write_flush(struct ata_port *ap, unsigned int reg, 652static inline void scr_write_flush(struct ata_port *ap, unsigned int reg,
648 u32 val) 653 u32 val)
649{ 654{
650 ap->ops->scr_write(ap, reg, val); 655 ap->ops->scr_write(ap, reg, val);
diff --git a/include/linux/list.h b/include/linux/list.h
index aab2db21b013..e6ec59682274 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -419,6 +419,20 @@ static inline void list_splice_init(struct list_head *list,
419 pos = n, n = list_entry(n->member.next, typeof(*n), member)) 419 pos = n, n = list_entry(n->member.next, typeof(*n), member))
420 420
421/** 421/**
422 * list_for_each_entry_safe_continue - iterate over list of given type
423 * continuing after existing point safe against removal of list entry
424 * @pos: the type * to use as a loop counter.
425 * @n: another type * to use as temporary storage
426 * @head: the head for your list.
427 * @member: the name of the list_struct within the struct.
428 */
429#define list_for_each_entry_safe_continue(pos, n, head, member) \
430 for (pos = list_entry(pos->member.next, typeof(*pos), member), \
431 n = list_entry(pos->member.next, typeof(*pos), member); \
432 &pos->member != (head); \
433 pos = n, n = list_entry(n->member.next, typeof(*n), member))
434
435/**
422 * list_for_each_rcu - iterate over an rcu-protected list 436 * list_for_each_rcu - iterate over an rcu-protected list
423 * @pos: the &struct list_head to use as a loop counter. 437 * @pos: the &struct list_head to use as a loop counter.
424 * @head: the head for your list. 438 * @head: the head for your list.
@@ -620,6 +634,57 @@ static inline void hlist_add_after(struct hlist_node *n,
620 next->next->pprev = &next->next; 634 next->next->pprev = &next->next;
621} 635}
622 636
637/**
638 * hlist_add_before_rcu - adds the specified element to the specified hlist
639 * before the specified node while permitting racing traversals.
640 * @n: the new element to add to the hash list.
641 * @next: the existing element to add the new element before.
642 *
643 * The caller must take whatever precautions are necessary
644 * (such as holding appropriate locks) to avoid racing
645 * with another list-mutation primitive, such as hlist_add_head_rcu()
646 * or hlist_del_rcu(), running on this same list.
647 * However, it is perfectly legal to run concurrently with
648 * the _rcu list-traversal primitives, such as
649 * hlist_for_each_rcu(), used to prevent memory-consistency
650 * problems on Alpha CPUs.
651 */
652static inline void hlist_add_before_rcu(struct hlist_node *n,
653 struct hlist_node *next)
654{
655 n->pprev = next->pprev;
656 n->next = next;
657 smp_wmb();
658 next->pprev = &n->next;
659 *(n->pprev) = n;
660}
661
662/**
663 * hlist_add_after_rcu - adds the specified element to the specified hlist
664 * after the specified node while permitting racing traversals.
665 * @prev: the existing element to add the new element after.
666 * @n: the new element to add to the hash list.
667 *
668 * The caller must take whatever precautions are necessary
669 * (such as holding appropriate locks) to avoid racing
670 * with another list-mutation primitive, such as hlist_add_head_rcu()
671 * or hlist_del_rcu(), running on this same list.
672 * However, it is perfectly legal to run concurrently with
673 * the _rcu list-traversal primitives, such as
674 * hlist_for_each_rcu(), used to prevent memory-consistency
675 * problems on Alpha CPUs.
676 */
677static inline void hlist_add_after_rcu(struct hlist_node *prev,
678 struct hlist_node *n)
679{
680 n->next = prev->next;
681 n->pprev = &prev->next;
682 smp_wmb();
683 prev->next = n;
684 if (n->next)
685 n->next->pprev = &n->next;
686}
687
623#define hlist_entry(ptr, type, member) container_of(ptr,type,member) 688#define hlist_entry(ptr, type, member) container_of(ptr,type,member)
624 689
625#define hlist_for_each(pos, head) \ 690#define hlist_for_each(pos, head) \
diff --git a/include/linux/mii.h b/include/linux/mii.h
index 374b615ea9ea..9b8d0476988a 100644
--- a/include/linux/mii.h
+++ b/include/linux/mii.h
@@ -22,6 +22,7 @@
22#define MII_EXPANSION 0x06 /* Expansion register */ 22#define MII_EXPANSION 0x06 /* Expansion register */
23#define MII_CTRL1000 0x09 /* 1000BASE-T control */ 23#define MII_CTRL1000 0x09 /* 1000BASE-T control */
24#define MII_STAT1000 0x0a /* 1000BASE-T status */ 24#define MII_STAT1000 0x0a /* 1000BASE-T status */
25#define MII_ESTATUS 0x0f /* Extended Status */
25#define MII_DCOUNTER 0x12 /* Disconnect counter */ 26#define MII_DCOUNTER 0x12 /* Disconnect counter */
26#define MII_FCSCOUNTER 0x13 /* False carrier counter */ 27#define MII_FCSCOUNTER 0x13 /* False carrier counter */
27#define MII_NWAYTEST 0x14 /* N-way auto-neg test reg */ 28#define MII_NWAYTEST 0x14 /* N-way auto-neg test reg */
@@ -54,7 +55,10 @@
54#define BMSR_ANEGCAPABLE 0x0008 /* Able to do auto-negotiation */ 55#define BMSR_ANEGCAPABLE 0x0008 /* Able to do auto-negotiation */
55#define BMSR_RFAULT 0x0010 /* Remote fault detected */ 56#define BMSR_RFAULT 0x0010 /* Remote fault detected */
56#define BMSR_ANEGCOMPLETE 0x0020 /* Auto-negotiation complete */ 57#define BMSR_ANEGCOMPLETE 0x0020 /* Auto-negotiation complete */
57#define BMSR_RESV 0x07c0 /* Unused... */ 58#define BMSR_RESV 0x00c0 /* Unused... */
59#define BMSR_ESTATEN 0x0100 /* Extended Status in R15 */
60#define BMSR_100FULL2 0x0200 /* Can do 100BASE-T2 HDX */
61#define BMSR_100HALF2 0x0400 /* Can do 100BASE-T2 FDX */
58#define BMSR_10HALF 0x0800 /* Can do 10mbps, half-duplex */ 62#define BMSR_10HALF 0x0800 /* Can do 10mbps, half-duplex */
59#define BMSR_10FULL 0x1000 /* Can do 10mbps, full-duplex */ 63#define BMSR_10FULL 0x1000 /* Can do 10mbps, full-duplex */
60#define BMSR_100HALF 0x2000 /* Can do 100mbps, half-duplex */ 64#define BMSR_100HALF 0x2000 /* Can do 100mbps, half-duplex */
@@ -114,6 +118,9 @@
114#define EXPANSION_MFAULTS 0x0010 /* Multiple faults detected */ 118#define EXPANSION_MFAULTS 0x0010 /* Multiple faults detected */
115#define EXPANSION_RESV 0xffe0 /* Unused... */ 119#define EXPANSION_RESV 0xffe0 /* Unused... */
116 120
121#define ESTATUS_1000_TFULL 0x2000 /* Can do 1000BT Full */
122#define ESTATUS_1000_THALF 0x1000 /* Can do 1000BT Half */
123
117/* N-way test register. */ 124/* N-way test register. */
118#define NWAYTEST_RESV1 0x00ff /* Unused... */ 125#define NWAYTEST_RESV1 0x00ff /* Unused... */
119#define NWAYTEST_LOOPBACK 0x0100 /* Enable loopback for N-way */ 126#define NWAYTEST_LOOPBACK 0x0100 /* Enable loopback for N-way */
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index f90f674eb3b0..9a0893f3249e 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -63,11 +63,12 @@ struct device;
63 63
64struct mmc_host { 64struct mmc_host {
65 struct device *dev; 65 struct device *dev;
66 struct class_device class_dev;
67 int index;
66 struct mmc_host_ops *ops; 68 struct mmc_host_ops *ops;
67 unsigned int f_min; 69 unsigned int f_min;
68 unsigned int f_max; 70 unsigned int f_max;
69 u32 ocr_avail; 71 u32 ocr_avail;
70 char host_name[8];
71 72
72 /* host specific block data */ 73 /* host specific block data */
73 unsigned int max_seg_size; /* see blk_queue_max_segment_size */ 74 unsigned int max_seg_size; /* see blk_queue_max_segment_size */
@@ -97,6 +98,7 @@ extern void mmc_free_host(struct mmc_host *);
97 98
98#define mmc_priv(x) ((void *)((x) + 1)) 99#define mmc_priv(x) ((void *)((x) + 1))
99#define mmc_dev(x) ((x)->dev) 100#define mmc_dev(x) ((x)->dev)
101#define mmc_hostname(x) ((x)->class_dev.class_id)
100 102
101extern int mmc_suspend_host(struct mmc_host *, pm_message_t); 103extern int mmc_suspend_host(struct mmc_host *, pm_message_t);
102extern int mmc_resume_host(struct mmc_host *); 104extern int mmc_resume_host(struct mmc_host *);
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index dce53ac1625d..47da39ba3f03 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -1,6 +1,6 @@
1/* 1/*
2 * Device tables which are exported to userspace via 2 * Device tables which are exported to userspace via
3 * scripts/table2alias.c. You must keep that file in sync with this 3 * scripts/mod/file2alias.c. You must keep that file in sync with this
4 * header. 4 * header.
5 */ 5 */
6 6
@@ -33,7 +33,8 @@ struct ieee1394_device_id {
33 __u32 model_id; 33 __u32 model_id;
34 __u32 specifier_id; 34 __u32 specifier_id;
35 __u32 version; 35 __u32 version;
36 kernel_ulong_t driver_data; 36 kernel_ulong_t driver_data
37 __attribute__((aligned(sizeof(kernel_ulong_t))));
37}; 38};
38 39
39 40
@@ -182,9 +183,18 @@ struct of_device_id
182 char name[32]; 183 char name[32];
183 char type[32]; 184 char type[32];
184 char compatible[128]; 185 char compatible[128];
186#if __KERNEL__
185 void *data; 187 void *data;
188#else
189 kernel_ulong_t data;
190#endif
186}; 191};
187 192
193/* VIO */
194struct vio_device_id {
195 char type[32];
196 char compat[32];
197};
188 198
189/* PCMCIA */ 199/* PCMCIA */
190 200
@@ -208,7 +218,8 @@ struct pcmcia_device_id {
208#ifdef __KERNEL__ 218#ifdef __KERNEL__
209 const char * prod_id[4]; 219 const char * prod_id[4];
210#else 220#else
211 kernel_ulong_t prod_id[4]; 221 kernel_ulong_t prod_id[4]
222 __attribute__((aligned(sizeof(kernel_ulong_t))));
212#endif 223#endif
213 224
214 /* not matched against */ 225 /* not matched against */
diff --git a/include/linux/net.h b/include/linux/net.h
index 20cb226b2268..4e981585a89a 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -84,6 +84,7 @@ enum sock_type {
84 SOCK_RAW = 3, 84 SOCK_RAW = 3,
85 SOCK_RDM = 4, 85 SOCK_RDM = 4,
86 SOCK_SEQPACKET = 5, 86 SOCK_SEQPACKET = 5,
87 SOCK_DCCP = 6,
87 SOCK_PACKET = 10, 88 SOCK_PACKET = 10,
88}; 89};
89 90
@@ -282,5 +283,15 @@ static struct proto_ops name##_ops = { \
282#define MODULE_ALIAS_NETPROTO(proto) \ 283#define MODULE_ALIAS_NETPROTO(proto) \
283 MODULE_ALIAS("net-pf-" __stringify(proto)) 284 MODULE_ALIAS("net-pf-" __stringify(proto))
284 285
286#define MODULE_ALIAS_NET_PF_PROTO(pf, proto) \
287 MODULE_ALIAS("net-pf-" __stringify(pf) "-proto-" __stringify(proto))
288
289#ifdef CONFIG_SYSCTL
290#include <linux/sysctl.h>
291extern ctl_table net_table[];
292extern int net_msg_cost;
293extern int net_msg_burst;
294#endif
295
285#endif /* __KERNEL__ */ 296#endif /* __KERNEL__ */
286#endif /* _LINUX_NET_H */ 297#endif /* _LINUX_NET_H */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3a0ed7f9e801..7c717907896d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -244,6 +244,7 @@ struct netdev_boot_setup {
244}; 244};
245#define NETDEV_BOOT_SETUP_MAX 8 245#define NETDEV_BOOT_SETUP_MAX 8
246 246
247extern int __init netdev_boot_setup(char *str);
247 248
248/* 249/*
249 * The DEVICE structure. 250 * The DEVICE structure.
@@ -336,6 +337,7 @@ struct net_device
336 /* Interface address info. */ 337 /* Interface address info. */
337 unsigned char broadcast[MAX_ADDR_LEN]; /* hw bcast add */ 338 unsigned char broadcast[MAX_ADDR_LEN]; /* hw bcast add */
338 unsigned char dev_addr[MAX_ADDR_LEN]; /* hw address */ 339 unsigned char dev_addr[MAX_ADDR_LEN]; /* hw address */
340 unsigned char perm_addr[MAX_ADDR_LEN]; /* permanent hw address */
339 unsigned char addr_len; /* hardware address length */ 341 unsigned char addr_len; /* hardware address length */
340 unsigned short dev_id; /* for shared network cards */ 342 unsigned short dev_id; /* for shared network cards */
341 343
@@ -497,10 +499,12 @@ static inline void *netdev_priv(struct net_device *dev)
497#define SET_NETDEV_DEV(net, pdev) ((net)->class_dev.dev = (pdev)) 499#define SET_NETDEV_DEV(net, pdev) ((net)->class_dev.dev = (pdev))
498 500
499struct packet_type { 501struct packet_type {
500 __be16 type; /* This is really htons(ether_type). */ 502 __be16 type; /* This is really htons(ether_type). */
501 struct net_device *dev; /* NULL is wildcarded here */ 503 struct net_device *dev; /* NULL is wildcarded here */
502 int (*func) (struct sk_buff *, struct net_device *, 504 int (*func) (struct sk_buff *,
503 struct packet_type *); 505 struct net_device *,
506 struct packet_type *,
507 struct net_device *);
504 void *af_packet_priv; 508 void *af_packet_priv;
505 struct list_head list; 509 struct list_head list;
506}; 510};
@@ -671,6 +675,7 @@ extern void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev);
671extern void dev_init(void); 675extern void dev_init(void);
672 676
673extern int netdev_nit; 677extern int netdev_nit;
678extern int netdev_budget;
674 679
675/* Called by rtnetlink.c:rtnl_unlock() */ 680/* Called by rtnetlink.c:rtnl_unlock() */
676extern void netdev_run_todo(void); 681extern void netdev_run_todo(void);
@@ -697,19 +702,9 @@ static inline int netif_carrier_ok(const struct net_device *dev)
697 702
698extern void __netdev_watchdog_up(struct net_device *dev); 703extern void __netdev_watchdog_up(struct net_device *dev);
699 704
700static inline void netif_carrier_on(struct net_device *dev) 705extern void netif_carrier_on(struct net_device *dev);
701{
702 if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state))
703 linkwatch_fire_event(dev);
704 if (netif_running(dev))
705 __netdev_watchdog_up(dev);
706}
707 706
708static inline void netif_carrier_off(struct net_device *dev) 707extern void netif_carrier_off(struct net_device *dev);
709{
710 if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state))
711 linkwatch_fire_event(dev);
712}
713 708
714/* Hot-plugging. */ 709/* Hot-plugging. */
715static inline int netif_device_present(struct net_device *dev) 710static inline int netif_device_present(struct net_device *dev)
@@ -916,6 +911,14 @@ extern int skb_checksum_help(struct sk_buff *skb, int inward);
916extern void net_enable_timestamp(void); 911extern void net_enable_timestamp(void);
917extern void net_disable_timestamp(void); 912extern void net_disable_timestamp(void);
918 913
914#ifdef CONFIG_PROC_FS
915extern void *dev_seq_start(struct seq_file *seq, loff_t *pos);
916extern void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos);
917extern void dev_seq_stop(struct seq_file *seq, void *v);
918#endif
919
920extern void linkwatch_run_queue(void);
921
919#endif /* __KERNEL__ */ 922#endif /* __KERNEL__ */
920 923
921#endif /* _LINUX_DEV_H */ 924#endif /* _LINUX_DEV_H */
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 2e2045482cb1..be365e70ee99 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -21,10 +21,23 @@
21#define NF_STOP 5 21#define NF_STOP 5
22#define NF_MAX_VERDICT NF_STOP 22#define NF_MAX_VERDICT NF_STOP
23 23
24/* we overload the higher bits for encoding auxiliary data such as the queue
25 * number. Not nice, but better than additional function arguments. */
26#define NF_VERDICT_MASK 0x0000ffff
27#define NF_VERDICT_BITS 16
28
29#define NF_VERDICT_QMASK 0xffff0000
30#define NF_VERDICT_QBITS 16
31
32#define NF_QUEUE_NR(x) (((x << NF_VERDICT_QBITS) & NF_VERDICT_QMASK) | NF_QUEUE)
33
34/* only for userspace compatibility */
35#ifndef __KERNEL__
24/* Generic cache responses from hook functions. 36/* Generic cache responses from hook functions.
25 <= 0x2000 is used for protocol-flags. */ 37 <= 0x2000 is used for protocol-flags. */
26#define NFC_UNKNOWN 0x4000 38#define NFC_UNKNOWN 0x4000
27#define NFC_ALTERED 0x8000 39#define NFC_ALTERED 0x8000
40#endif
28 41
29#ifdef __KERNEL__ 42#ifdef __KERNEL__
30#include <linux/config.h> 43#include <linux/config.h>
@@ -101,15 +114,51 @@ void nf_unregister_sockopt(struct nf_sockopt_ops *reg);
101 114
102extern struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS]; 115extern struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS];
103 116
104typedef void nf_logfn(unsigned int hooknum, 117/* those NF_LOG_* defines and struct nf_loginfo are legacy definitios that will
118 * disappear once iptables is replaced with pkttables. Please DO NOT use them
119 * for any new code! */
120#define NF_LOG_TCPSEQ 0x01 /* Log TCP sequence numbers */
121#define NF_LOG_TCPOPT 0x02 /* Log TCP options */
122#define NF_LOG_IPOPT 0x04 /* Log IP options */
123#define NF_LOG_UID 0x08 /* Log UID owning local socket */
124#define NF_LOG_MASK 0x0f
125
126#define NF_LOG_TYPE_LOG 0x01
127#define NF_LOG_TYPE_ULOG 0x02
128
129struct nf_loginfo {
130 u_int8_t type;
131 union {
132 struct {
133 u_int32_t copy_len;
134 u_int16_t group;
135 u_int16_t qthreshold;
136 } ulog;
137 struct {
138 u_int8_t level;
139 u_int8_t logflags;
140 } log;
141 } u;
142};
143
144typedef void nf_logfn(unsigned int pf,
145 unsigned int hooknum,
105 const struct sk_buff *skb, 146 const struct sk_buff *skb,
106 const struct net_device *in, 147 const struct net_device *in,
107 const struct net_device *out, 148 const struct net_device *out,
149 const struct nf_loginfo *li,
108 const char *prefix); 150 const char *prefix);
109 151
152struct nf_logger {
153 struct module *me;
154 nf_logfn *logfn;
155 char *name;
156};
157
110/* Function to register/unregister log function. */ 158/* Function to register/unregister log function. */
111int nf_log_register(int pf, nf_logfn *logfn); 159int nf_log_register(int pf, struct nf_logger *logger);
112void nf_log_unregister(int pf, nf_logfn *logfn); 160int nf_log_unregister_pf(int pf);
161void nf_log_unregister_logger(struct nf_logger *logger);
113 162
114/* Calls the registered backend logging function */ 163/* Calls the registered backend logging function */
115void nf_log_packet(int pf, 164void nf_log_packet(int pf,
@@ -117,6 +166,7 @@ void nf_log_packet(int pf,
117 const struct sk_buff *skb, 166 const struct sk_buff *skb,
118 const struct net_device *in, 167 const struct net_device *in,
119 const struct net_device *out, 168 const struct net_device *out,
169 struct nf_loginfo *li,
120 const char *fmt, ...); 170 const char *fmt, ...);
121 171
122/* Activate hook; either okfn or kfree_skb called, unless a hook 172/* Activate hook; either okfn or kfree_skb called, unless a hook
@@ -175,11 +225,16 @@ int nf_getsockopt(struct sock *sk, int pf, int optval, char __user *opt,
175 int *len); 225 int *len);
176 226
177/* Packet queuing */ 227/* Packet queuing */
178typedef int (*nf_queue_outfn_t)(struct sk_buff *skb, 228struct nf_queue_handler {
179 struct nf_info *info, void *data); 229 int (*outfn)(struct sk_buff *skb, struct nf_info *info,
230 unsigned int queuenum, void *data);
231 void *data;
232 char *name;
233};
180extern int nf_register_queue_handler(int pf, 234extern int nf_register_queue_handler(int pf,
181 nf_queue_outfn_t outfn, void *data); 235 struct nf_queue_handler *qh);
182extern int nf_unregister_queue_handler(int pf); 236extern int nf_unregister_queue_handler(int pf);
237extern void nf_unregister_queue_handlers(struct nf_queue_handler *qh);
183extern void nf_reinject(struct sk_buff *skb, 238extern void nf_reinject(struct sk_buff *skb,
184 struct nf_info *info, 239 struct nf_info *info,
185 unsigned int verdict); 240 unsigned int verdict);
@@ -190,6 +245,27 @@ extern void nf_ct_attach(struct sk_buff *, struct sk_buff *);
190/* FIXME: Before cache is ever used, this must be implemented for real. */ 245/* FIXME: Before cache is ever used, this must be implemented for real. */
191extern void nf_invalidate_cache(int pf); 246extern void nf_invalidate_cache(int pf);
192 247
248/* Call this before modifying an existing packet: ensures it is
249 modifiable and linear to the point you care about (writable_len).
250 Returns true or false. */
251extern int skb_make_writable(struct sk_buff **pskb, unsigned int writable_len);
252
253struct nf_queue_rerouter {
254 void (*save)(const struct sk_buff *skb, struct nf_info *info);
255 int (*reroute)(struct sk_buff **skb, const struct nf_info *info);
256 int rer_size;
257};
258
259#define nf_info_reroute(x) ((void *)x + sizeof(struct nf_info))
260
261extern int nf_register_queue_rerouter(int pf, struct nf_queue_rerouter *rer);
262extern int nf_unregister_queue_rerouter(int pf);
263
264#ifdef CONFIG_PROC_FS
265#include <linux/proc_fs.h>
266extern struct proc_dir_entry *proc_net_netfilter;
267#endif
268
193#else /* !CONFIG_NETFILTER */ 269#else /* !CONFIG_NETFILTER */
194#define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb) 270#define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb)
195static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {} 271static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {}
diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
new file mode 100644
index 000000000000..1d5b10ae2399
--- /dev/null
+++ b/include/linux/netfilter/nfnetlink.h
@@ -0,0 +1,169 @@
1#ifndef _NFNETLINK_H
2#define _NFNETLINK_H
3#include <linux/types.h>
4
5#ifndef __KERNEL__
6/* nfnetlink groups: Up to 32 maximum - backwards compatibility for userspace */
7#define NF_NETLINK_CONNTRACK_NEW 0x00000001
8#define NF_NETLINK_CONNTRACK_UPDATE 0x00000002
9#define NF_NETLINK_CONNTRACK_DESTROY 0x00000004
10#define NF_NETLINK_CONNTRACK_EXP_NEW 0x00000008
11#define NF_NETLINK_CONNTRACK_EXP_UPDATE 0x00000010
12#define NF_NETLINK_CONNTRACK_EXP_DESTROY 0x00000020
13#endif
14
15enum nfnetlink_groups {
16 NFNLGRP_NONE,
17#define NFNLGRP_NONE NFNLGRP_NONE
18 NFNLGRP_CONNTRACK_NEW,
19#define NFNLGRP_CONNTRACK_NEW NFNLGRP_CONNTRACK_NEW
20 NFNLGRP_CONNTRACK_UPDATE,
21#define NFNLGRP_CONNTRACK_UPDATE NFNLGRP_CONNTRACK_UPDATE
22 NFNLGRP_CONNTRACK_DESTROY,
23#define NFNLGRP_CONNTRACK_DESTROY NFNLGRP_CONNTRACK_DESTROY
24 NFNLGRP_CONNTRACK_EXP_NEW,
25#define NFNLGRP_CONNTRACK_EXP_NEW NFNLGRP_CONNTRACK_EXP_NEW
26 NFNLGRP_CONNTRACK_EXP_UPDATE,
27#define NFNLGRP_CONNTRACK_EXP_UPDATE NFNLGRP_CONNTRACK_EXP_UPDATE
28 NFNLGRP_CONNTRACK_EXP_DESTROY,
29#define NFNLGRP_CONNTRACK_EXP_DESTROY NFNLGRP_CONNTRACK_EXP_DESTROY
30 __NFNLGRP_MAX,
31};
32#define NFNLGRP_MAX (__NFNLGRP_MAX - 1)
33
34/* Generic structure for encapsulation optional netfilter information.
35 * It is reminiscent of sockaddr, but with sa_family replaced
36 * with attribute type.
37 * ! This should someday be put somewhere generic as now rtnetlink and
38 * ! nfnetlink use the same attributes methods. - J. Schulist.
39 */
40
41struct nfattr
42{
43 u_int16_t nfa_len;
44 u_int16_t nfa_type;
45} __attribute__ ((packed));
46
47/* FIXME: Shamelessly copy and pasted from rtnetlink.h, it's time
48 * to put this in a generic file */
49
50#define NFA_ALIGNTO 4
51#define NFA_ALIGN(len) (((len) + NFA_ALIGNTO - 1) & ~(NFA_ALIGNTO - 1))
52#define NFA_OK(nfa,len) ((len) > 0 && (nfa)->nfa_len >= sizeof(struct nfattr) \
53 && (nfa)->nfa_len <= (len))
54#define NFA_NEXT(nfa,attrlen) ((attrlen) -= NFA_ALIGN((nfa)->nfa_len), \
55 (struct nfattr *)(((char *)(nfa)) + NFA_ALIGN((nfa)->nfa_len)))
56#define NFA_LENGTH(len) (NFA_ALIGN(sizeof(struct nfattr)) + (len))
57#define NFA_SPACE(len) NFA_ALIGN(NFA_LENGTH(len))
58#define NFA_DATA(nfa) ((void *)(((char *)(nfa)) + NFA_LENGTH(0)))
59#define NFA_PAYLOAD(nfa) ((int)((nfa)->nfa_len) - NFA_LENGTH(0))
60#define NFA_NEST(skb, type) \
61({ struct nfattr *__start = (struct nfattr *) (skb)->tail; \
62 NFA_PUT(skb, type, 0, NULL); \
63 __start; })
64#define NFA_NEST_END(skb, start) \
65({ (start)->nfa_len = ((skb)->tail - (unsigned char *) (start)); \
66 (skb)->len; })
67#define NFA_NEST_CANCEL(skb, start) \
68({ if (start) \
69 skb_trim(skb, (unsigned char *) (start) - (skb)->data); \
70 -1; })
71
72/* General form of address family dependent message.
73 */
74struct nfgenmsg {
75 u_int8_t nfgen_family; /* AF_xxx */
76 u_int8_t version; /* nfnetlink version */
77 u_int16_t res_id; /* resource id */
78} __attribute__ ((packed));
79
80#define NFNETLINK_V0 0
81
82#define NFM_NFA(n) ((struct nfattr *)(((char *)(n)) \
83 + NLMSG_ALIGN(sizeof(struct nfgenmsg))))
84#define NFM_PAYLOAD(n) NLMSG_PAYLOAD(n, sizeof(struct nfgenmsg))
85
86/* netfilter netlink message types are split in two pieces:
87 * 8 bit subsystem, 8bit operation.
88 */
89
90#define NFNL_SUBSYS_ID(x) ((x & 0xff00) >> 8)
91#define NFNL_MSG_TYPE(x) (x & 0x00ff)
92
93/* No enum here, otherwise __stringify() trick of MODULE_ALIAS_NFNL_SUBSYS()
94 * won't work anymore */
95#define NFNL_SUBSYS_NONE 0
96#define NFNL_SUBSYS_CTNETLINK 1
97#define NFNL_SUBSYS_CTNETLINK_EXP 2
98#define NFNL_SUBSYS_QUEUE 3
99#define NFNL_SUBSYS_ULOG 4
100#define NFNL_SUBSYS_COUNT 5
101
102#ifdef __KERNEL__
103
104#include <linux/netlink.h>
105#include <linux/capability.h>
106
107struct nfnl_callback
108{
109 int (*call)(struct sock *nl, struct sk_buff *skb,
110 struct nlmsghdr *nlh, struct nfattr *cda[], int *errp);
111 kernel_cap_t cap_required; /* capabilities required for this msg */
112 u_int16_t attr_count; /* number of nfattr's */
113};
114
115struct nfnetlink_subsystem
116{
117 const char *name;
118 __u8 subsys_id; /* nfnetlink subsystem ID */
119 __u8 cb_count; /* number of callbacks */
120 struct nfnl_callback *cb; /* callback for individual types */
121};
122
123extern void __nfa_fill(struct sk_buff *skb, int attrtype,
124 int attrlen, const void *data);
125#define NFA_PUT(skb, attrtype, attrlen, data) \
126({ if (skb_tailroom(skb) < (int)NFA_SPACE(attrlen)) goto nfattr_failure; \
127 __nfa_fill(skb, attrtype, attrlen, data); })
128
129extern struct semaphore nfnl_sem;
130
131#define nfnl_shlock() down(&nfnl_sem)
132#define nfnl_shlock_nowait() down_trylock(&nfnl_sem)
133
134#define nfnl_shunlock() do { up(&nfnl_sem); \
135 if(nfnl && nfnl->sk_receive_queue.qlen) \
136 nfnl->sk_data_ready(nfnl, 0); \
137 } while(0)
138
139extern void nfnl_lock(void);
140extern void nfnl_unlock(void);
141
142extern int nfnetlink_subsys_register(struct nfnetlink_subsystem *n);
143extern int nfnetlink_subsys_unregister(struct nfnetlink_subsystem *n);
144
145extern int nfattr_parse(struct nfattr *tb[], int maxattr,
146 struct nfattr *nfa, int len);
147
148#define nfattr_parse_nested(tb, max, nfa) \
149 nfattr_parse((tb), (max), NFA_DATA((nfa)), NFA_PAYLOAD((nfa)))
150
151#define nfattr_bad_size(tb, max, cta_min) \
152({ int __i, __res = 0; \
153 for (__i=0; __i<max; __i++) \
154 if (tb[__i] && NFA_PAYLOAD(tb[__i]) < cta_min[__i]){ \
155 __res = 1; \
156 break; \
157 } \
158 __res; \
159})
160
161extern int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group,
162 int echo);
163extern int nfnetlink_unicast(struct sk_buff *skb, u_int32_t pid, int flags);
164
165#define MODULE_ALIAS_NFNL_SUBSYS(subsys) \
166 MODULE_ALIAS("nfnetlink-subsys-" __stringify(subsys))
167
168#endif /* __KERNEL__ */
169#endif /* _NFNETLINK_H */
diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h
new file mode 100644
index 000000000000..5c55751c78e4
--- /dev/null
+++ b/include/linux/netfilter/nfnetlink_conntrack.h
@@ -0,0 +1,124 @@
1#ifndef _IPCONNTRACK_NETLINK_H
2#define _IPCONNTRACK_NETLINK_H
3#include <linux/netfilter/nfnetlink.h>
4
5enum cntl_msg_types {
6 IPCTNL_MSG_CT_NEW,
7 IPCTNL_MSG_CT_GET,
8 IPCTNL_MSG_CT_DELETE,
9 IPCTNL_MSG_CT_GET_CTRZERO,
10
11 IPCTNL_MSG_MAX
12};
13
14enum ctnl_exp_msg_types {
15 IPCTNL_MSG_EXP_NEW,
16 IPCTNL_MSG_EXP_GET,
17 IPCTNL_MSG_EXP_DELETE,
18
19 IPCTNL_MSG_EXP_MAX
20};
21
22
23enum ctattr_type {
24 CTA_UNSPEC,
25 CTA_TUPLE_ORIG,
26 CTA_TUPLE_REPLY,
27 CTA_STATUS,
28 CTA_PROTOINFO,
29 CTA_HELP,
30 CTA_NAT,
31 CTA_TIMEOUT,
32 CTA_MARK,
33 CTA_COUNTERS_ORIG,
34 CTA_COUNTERS_REPLY,
35 CTA_USE,
36 CTA_ID,
37 __CTA_MAX
38};
39#define CTA_MAX (__CTA_MAX - 1)
40
41enum ctattr_tuple {
42 CTA_TUPLE_UNSPEC,
43 CTA_TUPLE_IP,
44 CTA_TUPLE_PROTO,
45 __CTA_TUPLE_MAX
46};
47#define CTA_TUPLE_MAX (__CTA_TUPLE_MAX - 1)
48
49enum ctattr_ip {
50 CTA_IP_UNSPEC,
51 CTA_IP_V4_SRC,
52 CTA_IP_V4_DST,
53 CTA_IP_V6_SRC,
54 CTA_IP_V6_DST,
55 __CTA_IP_MAX
56};
57#define CTA_IP_MAX (__CTA_IP_MAX - 1)
58
59enum ctattr_l4proto {
60 CTA_PROTO_UNSPEC,
61 CTA_PROTO_NUM,
62 CTA_PROTO_SRC_PORT,
63 CTA_PROTO_DST_PORT,
64 CTA_PROTO_ICMP_ID,
65 CTA_PROTO_ICMP_TYPE,
66 CTA_PROTO_ICMP_CODE,
67 __CTA_PROTO_MAX
68};
69#define CTA_PROTO_MAX (__CTA_PROTO_MAX - 1)
70
71enum ctattr_protoinfo {
72 CTA_PROTOINFO_UNSPEC,
73 CTA_PROTOINFO_TCP_STATE,
74 __CTA_PROTOINFO_MAX
75};
76#define CTA_PROTOINFO_MAX (__CTA_PROTOINFO_MAX - 1)
77
78enum ctattr_counters {
79 CTA_COUNTERS_UNSPEC,
80 CTA_COUNTERS_PACKETS,
81 CTA_COUNTERS_BYTES,
82 __CTA_COUNTERS_MAX
83};
84#define CTA_COUNTERS_MAX (__CTA_COUNTERS_MAX - 1)
85
86enum ctattr_nat {
87 CTA_NAT_UNSPEC,
88 CTA_NAT_MINIP,
89 CTA_NAT_MAXIP,
90 CTA_NAT_PROTO,
91 __CTA_NAT_MAX
92};
93#define CTA_NAT_MAX (__CTA_NAT_MAX - 1)
94
95enum ctattr_protonat {
96 CTA_PROTONAT_UNSPEC,
97 CTA_PROTONAT_PORT_MIN,
98 CTA_PROTONAT_PORT_MAX,
99 __CTA_PROTONAT_MAX
100};
101#define CTA_PROTONAT_MAX (__CTA_PROTONAT_MAX - 1)
102
103enum ctattr_expect {
104 CTA_EXPECT_UNSPEC,
105 CTA_EXPECT_MASTER,
106 CTA_EXPECT_TUPLE,
107 CTA_EXPECT_MASK,
108 CTA_EXPECT_TIMEOUT,
109 CTA_EXPECT_ID,
110 CTA_EXPECT_HELP_NAME,
111 __CTA_EXPECT_MAX
112};
113#define CTA_EXPECT_MAX (__CTA_EXPECT_MAX - 1)
114
115enum ctattr_help {
116 CTA_HELP_UNSPEC,
117 CTA_HELP_NAME,
118 __CTA_HELP_MAX
119};
120#define CTA_HELP_MAX (__CTA_HELP_MAX - 1)
121
122#define CTA_HELP_MAXNAMESIZE 32
123
124#endif /* _IPCONNTRACK_NETLINK_H */
diff --git a/include/linux/netfilter/nfnetlink_log.h b/include/linux/netfilter/nfnetlink_log.h
new file mode 100644
index 000000000000..b04b03880595
--- /dev/null
+++ b/include/linux/netfilter/nfnetlink_log.h
@@ -0,0 +1,88 @@
1#ifndef _NFNETLINK_LOG_H
2#define _NFNETLINK_LOG_H
3
4/* This file describes the netlink messages (i.e. 'protocol packets'),
5 * and not any kind of function definitions. It is shared between kernel and
6 * userspace. Don't put kernel specific stuff in here */
7
8#include <linux/types.h>
9#include <linux/netfilter/nfnetlink.h>
10
11enum nfulnl_msg_types {
12 NFULNL_MSG_PACKET, /* packet from kernel to userspace */
13 NFULNL_MSG_CONFIG, /* connect to a particular queue */
14
15 NFULNL_MSG_MAX
16};
17
18struct nfulnl_msg_packet_hdr {
19 u_int16_t hw_protocol; /* hw protocol (network order) */
20 u_int8_t hook; /* netfilter hook */
21 u_int8_t _pad;
22} __attribute__ ((packed));
23
24struct nfulnl_msg_packet_hw {
25 u_int16_t hw_addrlen;
26 u_int16_t _pad;
27 u_int8_t hw_addr[8];
28} __attribute__ ((packed));
29
30struct nfulnl_msg_packet_timestamp {
31 aligned_u64 sec;
32 aligned_u64 usec;
33} __attribute__ ((packed));
34
35#define NFULNL_PREFIXLEN 30 /* just like old log target */
36
37enum nfulnl_attr_type {
38 NFULA_UNSPEC,
39 NFULA_PACKET_HDR,
40 NFULA_MARK, /* u_int32_t nfmark */
41 NFULA_TIMESTAMP, /* nfulnl_msg_packet_timestamp */
42 NFULA_IFINDEX_INDEV, /* u_int32_t ifindex */
43 NFULA_IFINDEX_OUTDEV, /* u_int32_t ifindex */
44 NFULA_IFINDEX_PHYSINDEV, /* u_int32_t ifindex */
45 NFULA_IFINDEX_PHYSOUTDEV, /* u_int32_t ifindex */
46 NFULA_HWADDR, /* nfulnl_msg_packet_hw */
47 NFULA_PAYLOAD, /* opaque data payload */
48 NFULA_PREFIX, /* string prefix */
49 NFULA_UID, /* user id of socket */
50
51 __NFULA_MAX
52};
53#define NFULA_MAX (__NFULA_MAX - 1)
54
55enum nfulnl_msg_config_cmds {
56 NFULNL_CFG_CMD_NONE,
57 NFULNL_CFG_CMD_BIND,
58 NFULNL_CFG_CMD_UNBIND,
59 NFULNL_CFG_CMD_PF_BIND,
60 NFULNL_CFG_CMD_PF_UNBIND,
61};
62
63struct nfulnl_msg_config_cmd {
64 u_int8_t command; /* nfulnl_msg_config_cmds */
65} __attribute__ ((packed));
66
67struct nfulnl_msg_config_mode {
68 u_int32_t copy_range;
69 u_int8_t copy_mode;
70 u_int8_t _pad;
71} __attribute__ ((packed));
72
73enum nfulnl_attr_config {
74 NFULA_CFG_UNSPEC,
75 NFULA_CFG_CMD, /* nfulnl_msg_config_cmd */
76 NFULA_CFG_MODE, /* nfulnl_msg_config_mode */
77 NFULA_CFG_NLBUFSIZ, /* u_int32_t buffer size */
78 NFULA_CFG_TIMEOUT, /* u_int32_t in 1/100 s */
79 NFULA_CFG_QTHRESH, /* u_int32_t */
80 __NFULA_CFG_MAX
81};
82#define NFULA_CFG_MAX (__NFULA_CFG_MAX -1)
83
84#define NFULNL_COPY_NONE 0x00
85#define NFULNL_COPY_META 0x01
86#define NFULNL_COPY_PACKET 0x02
87
88#endif /* _NFNETLINK_LOG_H */
diff --git a/include/linux/netfilter/nfnetlink_queue.h b/include/linux/netfilter/nfnetlink_queue.h
new file mode 100644
index 000000000000..9e774373244c
--- /dev/null
+++ b/include/linux/netfilter/nfnetlink_queue.h
@@ -0,0 +1,89 @@
1#ifndef _NFNETLINK_QUEUE_H
2#define _NFNETLINK_QUEUE_H
3
4#include <linux/types.h>
5#include <linux/netfilter/nfnetlink.h>
6
7enum nfqnl_msg_types {
8 NFQNL_MSG_PACKET, /* packet from kernel to userspace */
9 NFQNL_MSG_VERDICT, /* verdict from userspace to kernel */
10 NFQNL_MSG_CONFIG, /* connect to a particular queue */
11
12 NFQNL_MSG_MAX
13};
14
15struct nfqnl_msg_packet_hdr {
16 u_int32_t packet_id; /* unique ID of packet in queue */
17 u_int16_t hw_protocol; /* hw protocol (network order) */
18 u_int8_t hook; /* netfilter hook */
19} __attribute__ ((packed));
20
21struct nfqnl_msg_packet_hw {
22 u_int16_t hw_addrlen;
23 u_int16_t _pad;
24 u_int8_t hw_addr[8];
25} __attribute__ ((packed));
26
27struct nfqnl_msg_packet_timestamp {
28 aligned_u64 sec;
29 aligned_u64 usec;
30} __attribute__ ((packed));
31
32enum nfqnl_attr_type {
33 NFQA_UNSPEC,
34 NFQA_PACKET_HDR,
35 NFQA_VERDICT_HDR, /* nfqnl_msg_verdict_hrd */
36 NFQA_MARK, /* u_int32_t nfmark */
37 NFQA_TIMESTAMP, /* nfqnl_msg_packet_timestamp */
38 NFQA_IFINDEX_INDEV, /* u_int32_t ifindex */
39 NFQA_IFINDEX_OUTDEV, /* u_int32_t ifindex */
40 NFQA_IFINDEX_PHYSINDEV, /* u_int32_t ifindex */
41 NFQA_IFINDEX_PHYSOUTDEV, /* u_int32_t ifindex */
42 NFQA_HWADDR, /* nfqnl_msg_packet_hw */
43 NFQA_PAYLOAD, /* opaque data payload */
44
45 __NFQA_MAX
46};
47#define NFQA_MAX (__NFQA_MAX - 1)
48
49struct nfqnl_msg_verdict_hdr {
50 u_int32_t verdict;
51 u_int32_t id;
52} __attribute__ ((packed));
53
54
55enum nfqnl_msg_config_cmds {
56 NFQNL_CFG_CMD_NONE,
57 NFQNL_CFG_CMD_BIND,
58 NFQNL_CFG_CMD_UNBIND,
59 NFQNL_CFG_CMD_PF_BIND,
60 NFQNL_CFG_CMD_PF_UNBIND,
61};
62
63struct nfqnl_msg_config_cmd {
64 u_int8_t command; /* nfqnl_msg_config_cmds */
65 u_int8_t _pad;
66 u_int16_t pf; /* AF_xxx for PF_[UN]BIND */
67} __attribute__ ((packed));
68
69enum nfqnl_config_mode {
70 NFQNL_COPY_NONE,
71 NFQNL_COPY_META,
72 NFQNL_COPY_PACKET,
73};
74
75struct nfqnl_msg_config_params {
76 u_int32_t copy_range;
77 u_int8_t copy_mode; /* enum nfqnl_config_mode */
78} __attribute__ ((packed));
79
80
81enum nfqnl_attr_config {
82 NFQA_CFG_UNSPEC,
83 NFQA_CFG_CMD, /* nfqnl_msg_config_cmd */
84 NFQA_CFG_PARAMS, /* nfqnl_msg_config_params */
85 __NFQA_CFG_MAX
86};
87#define NFQA_CFG_MAX (__NFQA_CFG_MAX-1)
88
89#endif /* _NFNETLINK_QUEUE_H */
diff --git a/include/linux/netfilter_decnet.h b/include/linux/netfilter_decnet.h
index 3064eec9cb8e..6f425369ee29 100644
--- a/include/linux/netfilter_decnet.h
+++ b/include/linux/netfilter_decnet.h
@@ -9,6 +9,8 @@
9 9
10#include <linux/netfilter.h> 10#include <linux/netfilter.h>
11 11
12/* only for userspace compatibility */
13#ifndef __KERNEL__
12/* IP Cache bits. */ 14/* IP Cache bits. */
13/* Src IP address. */ 15/* Src IP address. */
14#define NFC_DN_SRC 0x0001 16#define NFC_DN_SRC 0x0001
@@ -18,6 +20,7 @@
18#define NFC_DN_IF_IN 0x0004 20#define NFC_DN_IF_IN 0x0004
19/* Output device. */ 21/* Output device. */
20#define NFC_DN_IF_OUT 0x0008 22#define NFC_DN_IF_OUT 0x0008
23#endif /* ! __KERNEL__ */
21 24
22/* DECnet Hooks */ 25/* DECnet Hooks */
23/* After promisc drops, checksum checks. */ 26/* After promisc drops, checksum checks. */
@@ -53,7 +56,21 @@ struct nf_dn_rtmsg {
53 56
54#define NFDN_RTMSG(r) ((unsigned char *)(r) + NLMSG_ALIGN(sizeof(struct nf_dn_rtmsg))) 57#define NFDN_RTMSG(r) ((unsigned char *)(r) + NLMSG_ALIGN(sizeof(struct nf_dn_rtmsg)))
55 58
59#ifndef __KERNEL__
60/* backwards compatibility for userspace */
56#define DNRMG_L1_GROUP 0x01 61#define DNRMG_L1_GROUP 0x01
57#define DNRMG_L2_GROUP 0x02 62#define DNRMG_L2_GROUP 0x02
63#endif
64
65enum {
66 DNRNG_NLGRP_NONE,
67#define DNRNG_NLGRP_NONE DNRNG_NLGRP_NONE
68 DNRNG_NLGRP_L1,
69#define DNRNG_NLGRP_L1 DNRNG_NLGRP_L1
70 DNRNG_NLGRP_L2,
71#define DNRNG_NLGRP_L2 DNRNG_NLGRP_L2
72 __DNRNG_NLGRP_MAX
73};
74#define DNRNG_NLGRP_MAX (__DNRNG_NLGRP_MAX - 1)
58 75
59#endif /*__LINUX_DECNET_NETFILTER_H*/ 76#endif /*__LINUX_DECNET_NETFILTER_H*/
diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h
index 3ebc36afae1a..fdc4a9527343 100644
--- a/include/linux/netfilter_ipv4.h
+++ b/include/linux/netfilter_ipv4.h
@@ -8,6 +8,8 @@
8#include <linux/config.h> 8#include <linux/config.h>
9#include <linux/netfilter.h> 9#include <linux/netfilter.h>
10 10
11/* only for userspace compatibility */
12#ifndef __KERNEL__
11/* IP Cache bits. */ 13/* IP Cache bits. */
12/* Src IP address. */ 14/* Src IP address. */
13#define NFC_IP_SRC 0x0001 15#define NFC_IP_SRC 0x0001
@@ -35,6 +37,7 @@
35#define NFC_IP_DST_PT 0x0400 37#define NFC_IP_DST_PT 0x0400
36/* Something else about the proto */ 38/* Something else about the proto */
37#define NFC_IP_PROTO_UNKNOWN 0x2000 39#define NFC_IP_PROTO_UNKNOWN 0x2000
40#endif /* ! __KERNEL__ */
38 41
39/* IP Hooks */ 42/* IP Hooks */
40/* After promisc drops, checksum checks. */ 43/* After promisc drops, checksum checks. */
@@ -77,11 +80,6 @@ enum nf_ip_hook_priorities {
77#ifdef __KERNEL__ 80#ifdef __KERNEL__
78extern int ip_route_me_harder(struct sk_buff **pskb); 81extern int ip_route_me_harder(struct sk_buff **pskb);
79 82
80/* Call this before modifying an existing IP packet: ensures it is
81 modifiable and linear to the point you care about (writable_len).
82 Returns true or false. */
83extern int skb_ip_make_writable(struct sk_buff **pskb,
84 unsigned int writable_len);
85#endif /*__KERNEL__*/ 83#endif /*__KERNEL__*/
86 84
87#endif /*__LINUX_IP_NETFILTER_H*/ 85#endif /*__LINUX_IP_NETFILTER_H*/
diff --git a/include/linux/netfilter_ipv4/ip_conntrack.h b/include/linux/netfilter_ipv4/ip_conntrack.h
index 08fe5f7d14a0..088742befe49 100644
--- a/include/linux/netfilter_ipv4/ip_conntrack.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack.h
@@ -65,6 +65,63 @@ enum ip_conntrack_status {
65 65
66 /* Both together */ 66 /* Both together */
67 IPS_NAT_DONE_MASK = (IPS_DST_NAT_DONE | IPS_SRC_NAT_DONE), 67 IPS_NAT_DONE_MASK = (IPS_DST_NAT_DONE | IPS_SRC_NAT_DONE),
68
69 /* Connection is dying (removed from lists), can not be unset. */
70 IPS_DYING_BIT = 9,
71 IPS_DYING = (1 << IPS_DYING_BIT),
72};
73
74/* Connection tracking event bits */
75enum ip_conntrack_events
76{
77 /* New conntrack */
78 IPCT_NEW_BIT = 0,
79 IPCT_NEW = (1 << IPCT_NEW_BIT),
80
81 /* Expected connection */
82 IPCT_RELATED_BIT = 1,
83 IPCT_RELATED = (1 << IPCT_RELATED_BIT),
84
85 /* Destroyed conntrack */
86 IPCT_DESTROY_BIT = 2,
87 IPCT_DESTROY = (1 << IPCT_DESTROY_BIT),
88
89 /* Timer has been refreshed */
90 IPCT_REFRESH_BIT = 3,
91 IPCT_REFRESH = (1 << IPCT_REFRESH_BIT),
92
93 /* Status has changed */
94 IPCT_STATUS_BIT = 4,
95 IPCT_STATUS = (1 << IPCT_STATUS_BIT),
96
97 /* Update of protocol info */
98 IPCT_PROTOINFO_BIT = 5,
99 IPCT_PROTOINFO = (1 << IPCT_PROTOINFO_BIT),
100
101 /* Volatile protocol info */
102 IPCT_PROTOINFO_VOLATILE_BIT = 6,
103 IPCT_PROTOINFO_VOLATILE = (1 << IPCT_PROTOINFO_VOLATILE_BIT),
104
105 /* New helper for conntrack */
106 IPCT_HELPER_BIT = 7,
107 IPCT_HELPER = (1 << IPCT_HELPER_BIT),
108
109 /* Update of helper info */
110 IPCT_HELPINFO_BIT = 8,
111 IPCT_HELPINFO = (1 << IPCT_HELPINFO_BIT),
112
113 /* Volatile helper info */
114 IPCT_HELPINFO_VOLATILE_BIT = 9,
115 IPCT_HELPINFO_VOLATILE = (1 << IPCT_HELPINFO_VOLATILE_BIT),
116
117 /* NAT info */
118 IPCT_NATINFO_BIT = 10,
119 IPCT_NATINFO = (1 << IPCT_NATINFO_BIT),
120};
121
122enum ip_conntrack_expect_events {
123 IPEXP_NEW_BIT = 0,
124 IPEXP_NEW = (1 << IPEXP_NEW_BIT),
68}; 125};
69 126
70#ifdef __KERNEL__ 127#ifdef __KERNEL__
@@ -152,6 +209,9 @@ struct ip_conntrack
152 /* Current number of expected connections */ 209 /* Current number of expected connections */
153 unsigned int expecting; 210 unsigned int expecting;
154 211
212 /* Unique ID that identifies this conntrack*/
213 unsigned int id;
214
155 /* Helper, if any. */ 215 /* Helper, if any. */
156 struct ip_conntrack_helper *helper; 216 struct ip_conntrack_helper *helper;
157 217
@@ -171,7 +231,7 @@ struct ip_conntrack
171#endif /* CONFIG_IP_NF_NAT_NEEDED */ 231#endif /* CONFIG_IP_NF_NAT_NEEDED */
172 232
173#if defined(CONFIG_IP_NF_CONNTRACK_MARK) 233#if defined(CONFIG_IP_NF_CONNTRACK_MARK)
174 unsigned long mark; 234 u_int32_t mark;
175#endif 235#endif
176 236
177 /* Traversed often, so hopefully in different cacheline to top */ 237 /* Traversed often, so hopefully in different cacheline to top */
@@ -200,6 +260,9 @@ struct ip_conntrack_expect
200 /* Usage count. */ 260 /* Usage count. */
201 atomic_t use; 261 atomic_t use;
202 262
263 /* Unique ID */
264 unsigned int id;
265
203#ifdef CONFIG_IP_NF_NAT_NEEDED 266#ifdef CONFIG_IP_NF_NAT_NEEDED
204 /* This is the original per-proto part, used to map the 267 /* This is the original per-proto part, used to map the
205 * expected connection the way the recipient expects. */ 268 * expected connection the way the recipient expects. */
@@ -239,7 +302,12 @@ ip_conntrack_get(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo)
239} 302}
240 303
241/* decrement reference count on a conntrack */ 304/* decrement reference count on a conntrack */
242extern void ip_conntrack_put(struct ip_conntrack *ct); 305static inline void
306ip_conntrack_put(struct ip_conntrack *ct)
307{
308 IP_NF_ASSERT(ct);
309 nf_conntrack_put(&ct->ct_general);
310}
243 311
244/* call to create an explicit dependency on ip_conntrack. */ 312/* call to create an explicit dependency on ip_conntrack. */
245extern void need_ip_conntrack(void); 313extern void need_ip_conntrack(void);
@@ -274,12 +342,50 @@ extern void
274ip_ct_iterate_cleanup(int (*iter)(struct ip_conntrack *i, void *data), 342ip_ct_iterate_cleanup(int (*iter)(struct ip_conntrack *i, void *data),
275 void *data); 343 void *data);
276 344
345extern struct ip_conntrack_helper *
346__ip_conntrack_helper_find_byname(const char *);
347extern struct ip_conntrack_helper *
348ip_conntrack_helper_find_get(const struct ip_conntrack_tuple *tuple);
349extern void ip_conntrack_helper_put(struct ip_conntrack_helper *helper);
350
351extern struct ip_conntrack_protocol *
352__ip_conntrack_proto_find(u_int8_t protocol);
353extern struct ip_conntrack_protocol *
354ip_conntrack_proto_find_get(u_int8_t protocol);
355extern void ip_conntrack_proto_put(struct ip_conntrack_protocol *proto);
356
357extern void ip_ct_remove_expectations(struct ip_conntrack *ct);
358
359extern struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *,
360 struct ip_conntrack_tuple *);
361
362extern void ip_conntrack_free(struct ip_conntrack *ct);
363
364extern void ip_conntrack_hash_insert(struct ip_conntrack *ct);
365
366extern struct ip_conntrack_expect *
367__ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple);
368
369extern struct ip_conntrack_expect *
370ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple);
371
372extern struct ip_conntrack_tuple_hash *
373__ip_conntrack_find(const struct ip_conntrack_tuple *tuple,
374 const struct ip_conntrack *ignored_conntrack);
375
376extern void ip_conntrack_flush(void);
377
277/* It's confirmed if it is, or has been in the hash table. */ 378/* It's confirmed if it is, or has been in the hash table. */
278static inline int is_confirmed(struct ip_conntrack *ct) 379static inline int is_confirmed(struct ip_conntrack *ct)
279{ 380{
280 return test_bit(IPS_CONFIRMED_BIT, &ct->status); 381 return test_bit(IPS_CONFIRMED_BIT, &ct->status);
281} 382}
282 383
384static inline int is_dying(struct ip_conntrack *ct)
385{
386 return test_bit(IPS_DYING_BIT, &ct->status);
387}
388
283extern unsigned int ip_conntrack_htable_size; 389extern unsigned int ip_conntrack_htable_size;
284 390
285struct ip_conntrack_stat 391struct ip_conntrack_stat
@@ -303,6 +409,85 @@ struct ip_conntrack_stat
303 409
304#define CONNTRACK_STAT_INC(count) (__get_cpu_var(ip_conntrack_stat).count++) 410#define CONNTRACK_STAT_INC(count) (__get_cpu_var(ip_conntrack_stat).count++)
305 411
412#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
413#include <linux/notifier.h>
414#include <linux/interrupt.h>
415
416struct ip_conntrack_ecache {
417 struct ip_conntrack *ct;
418 unsigned int events;
419};
420DECLARE_PER_CPU(struct ip_conntrack_ecache, ip_conntrack_ecache);
421
422#define CONNTRACK_ECACHE(x) (__get_cpu_var(ip_conntrack_ecache).x)
423
424extern struct notifier_block *ip_conntrack_chain;
425extern struct notifier_block *ip_conntrack_expect_chain;
426
427static inline int ip_conntrack_register_notifier(struct notifier_block *nb)
428{
429 return notifier_chain_register(&ip_conntrack_chain, nb);
430}
431
432static inline int ip_conntrack_unregister_notifier(struct notifier_block *nb)
433{
434 return notifier_chain_unregister(&ip_conntrack_chain, nb);
435}
436
437static inline int
438ip_conntrack_expect_register_notifier(struct notifier_block *nb)
439{
440 return notifier_chain_register(&ip_conntrack_expect_chain, nb);
441}
442
443static inline int
444ip_conntrack_expect_unregister_notifier(struct notifier_block *nb)
445{
446 return notifier_chain_unregister(&ip_conntrack_expect_chain, nb);
447}
448
449extern void ip_ct_deliver_cached_events(const struct ip_conntrack *ct);
450extern void __ip_ct_event_cache_init(struct ip_conntrack *ct);
451
452static inline void
453ip_conntrack_event_cache(enum ip_conntrack_events event,
454 const struct sk_buff *skb)
455{
456 struct ip_conntrack *ct = (struct ip_conntrack *)skb->nfct;
457 struct ip_conntrack_ecache *ecache;
458
459 local_bh_disable();
460 ecache = &__get_cpu_var(ip_conntrack_ecache);
461 if (ct != ecache->ct)
462 __ip_ct_event_cache_init(ct);
463 ecache->events |= event;
464 local_bh_enable();
465}
466
467static inline void ip_conntrack_event(enum ip_conntrack_events event,
468 struct ip_conntrack *ct)
469{
470 if (is_confirmed(ct) && !is_dying(ct))
471 notifier_call_chain(&ip_conntrack_chain, event, ct);
472}
473
474static inline void
475ip_conntrack_expect_event(enum ip_conntrack_expect_events event,
476 struct ip_conntrack_expect *exp)
477{
478 notifier_call_chain(&ip_conntrack_expect_chain, event, exp);
479}
480#else /* CONFIG_IP_NF_CONNTRACK_EVENTS */
481static inline void ip_conntrack_event_cache(enum ip_conntrack_events event,
482 const struct sk_buff *skb) {}
483static inline void ip_conntrack_event(enum ip_conntrack_events event,
484 struct ip_conntrack *ct) {}
485static inline void ip_ct_deliver_cached_events(const struct ip_conntrack *ct) {}
486static inline void
487ip_conntrack_expect_event(enum ip_conntrack_expect_events event,
488 struct ip_conntrack_expect *exp) {}
489#endif /* CONFIG_IP_NF_CONNTRACK_EVENTS */
490
306#ifdef CONFIG_IP_NF_NAT_NEEDED 491#ifdef CONFIG_IP_NF_NAT_NEEDED
307static inline int ip_nat_initialized(struct ip_conntrack *conntrack, 492static inline int ip_nat_initialized(struct ip_conntrack *conntrack,
308 enum ip_nat_manip_type manip) 493 enum ip_nat_manip_type manip)
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_core.h b/include/linux/netfilter_ipv4/ip_conntrack_core.h
index 694aec9b4784..dc4d2a0575de 100644
--- a/include/linux/netfilter_ipv4/ip_conntrack_core.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack_core.h
@@ -2,6 +2,9 @@
2#define _IP_CONNTRACK_CORE_H 2#define _IP_CONNTRACK_CORE_H
3#include <linux/netfilter.h> 3#include <linux/netfilter.h>
4 4
5#define MAX_IP_CT_PROTO 256
6extern struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO];
7
5/* This header is used to share core functionality between the 8/* This header is used to share core functionality between the
6 standalone connection tracking module, and the compatibility layer's use 9 standalone connection tracking module, and the compatibility layer's use
7 of connection tracking. */ 10 of connection tracking. */
@@ -38,12 +41,19 @@ extern int __ip_conntrack_confirm(struct sk_buff **pskb);
38/* Confirm a connection: returns NF_DROP if packet must be dropped. */ 41/* Confirm a connection: returns NF_DROP if packet must be dropped. */
39static inline int ip_conntrack_confirm(struct sk_buff **pskb) 42static inline int ip_conntrack_confirm(struct sk_buff **pskb)
40{ 43{
41 if ((*pskb)->nfct 44 struct ip_conntrack *ct = (struct ip_conntrack *)(*pskb)->nfct;
42 && !is_confirmed((struct ip_conntrack *)(*pskb)->nfct)) 45 int ret = NF_ACCEPT;
43 return __ip_conntrack_confirm(pskb); 46
44 return NF_ACCEPT; 47 if (ct) {
48 if (!is_confirmed(ct))
49 ret = __ip_conntrack_confirm(pskb);
50 ip_ct_deliver_cached_events(ct);
51 }
52 return ret;
45} 53}
46 54
55extern void __ip_ct_expect_unlink_destroy(struct ip_conntrack_expect *exp);
56
47extern struct list_head *ip_conntrack_hash; 57extern struct list_head *ip_conntrack_hash;
48extern struct list_head ip_conntrack_expect_list; 58extern struct list_head ip_conntrack_expect_list;
49extern rwlock_t ip_conntrack_lock; 59extern rwlock_t ip_conntrack_lock;
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_helper.h b/include/linux/netfilter_ipv4/ip_conntrack_helper.h
index 3692daa93dec..8d69279ccfe4 100644
--- a/include/linux/netfilter_ipv4/ip_conntrack_helper.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack_helper.h
@@ -24,6 +24,8 @@ struct ip_conntrack_helper
24 int (*help)(struct sk_buff **pskb, 24 int (*help)(struct sk_buff **pskb,
25 struct ip_conntrack *ct, 25 struct ip_conntrack *ct,
26 enum ip_conntrack_info conntrackinfo); 26 enum ip_conntrack_info conntrackinfo);
27
28 int (*to_nfattr)(struct sk_buff *skb, const struct ip_conntrack *ct);
27}; 29};
28 30
29extern int ip_conntrack_helper_register(struct ip_conntrack_helper *); 31extern int ip_conntrack_helper_register(struct ip_conntrack_helper *);
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_protocol.h b/include/linux/netfilter_ipv4/ip_conntrack_protocol.h
index e20b57c5e1b7..b6b99be8632a 100644
--- a/include/linux/netfilter_ipv4/ip_conntrack_protocol.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack_protocol.h
@@ -2,6 +2,7 @@
2#ifndef _IP_CONNTRACK_PROTOCOL_H 2#ifndef _IP_CONNTRACK_PROTOCOL_H
3#define _IP_CONNTRACK_PROTOCOL_H 3#define _IP_CONNTRACK_PROTOCOL_H
4#include <linux/netfilter_ipv4/ip_conntrack.h> 4#include <linux/netfilter_ipv4/ip_conntrack.h>
5#include <linux/netfilter/nfnetlink_conntrack.h>
5 6
6struct seq_file; 7struct seq_file;
7 8
@@ -47,22 +48,22 @@ struct ip_conntrack_protocol
47 int (*error)(struct sk_buff *skb, enum ip_conntrack_info *ctinfo, 48 int (*error)(struct sk_buff *skb, enum ip_conntrack_info *ctinfo,
48 unsigned int hooknum); 49 unsigned int hooknum);
49 50
51 /* convert protoinfo to nfnetink attributes */
52 int (*to_nfattr)(struct sk_buff *skb, struct nfattr *nfa,
53 const struct ip_conntrack *ct);
54
55 int (*tuple_to_nfattr)(struct sk_buff *skb,
56 const struct ip_conntrack_tuple *t);
57 int (*nfattr_to_tuple)(struct nfattr *tb[],
58 struct ip_conntrack_tuple *t);
59
50 /* Module (if any) which this is connected to. */ 60 /* Module (if any) which this is connected to. */
51 struct module *me; 61 struct module *me;
52}; 62};
53 63
54#define MAX_IP_CT_PROTO 256
55extern struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO];
56
57/* Protocol registration. */ 64/* Protocol registration. */
58extern int ip_conntrack_protocol_register(struct ip_conntrack_protocol *proto); 65extern int ip_conntrack_protocol_register(struct ip_conntrack_protocol *proto);
59extern void ip_conntrack_protocol_unregister(struct ip_conntrack_protocol *proto); 66extern void ip_conntrack_protocol_unregister(struct ip_conntrack_protocol *proto);
60
61static inline struct ip_conntrack_protocol *ip_ct_find_proto(u_int8_t protocol)
62{
63 return ip_ct_protos[protocol];
64}
65
66/* Existing built-in protocols */ 67/* Existing built-in protocols */
67extern struct ip_conntrack_protocol ip_conntrack_protocol_tcp; 68extern struct ip_conntrack_protocol ip_conntrack_protocol_tcp;
68extern struct ip_conntrack_protocol ip_conntrack_protocol_udp; 69extern struct ip_conntrack_protocol ip_conntrack_protocol_udp;
@@ -73,6 +74,11 @@ extern int ip_conntrack_protocol_tcp_init(void);
73/* Log invalid packets */ 74/* Log invalid packets */
74extern unsigned int ip_ct_log_invalid; 75extern unsigned int ip_ct_log_invalid;
75 76
77extern int ip_ct_port_tuple_to_nfattr(struct sk_buff *,
78 const struct ip_conntrack_tuple *);
79extern int ip_ct_port_nfattr_to_tuple(struct nfattr *tb[],
80 struct ip_conntrack_tuple *);
81
76#ifdef CONFIG_SYSCTL 82#ifdef CONFIG_SYSCTL
77#ifdef DEBUG_INVALID_PACKETS 83#ifdef DEBUG_INVALID_PACKETS
78#define LOG_INVALID(proto) \ 84#define LOG_INVALID(proto) \
diff --git a/include/linux/netfilter_ipv4/ip_logging.h b/include/linux/netfilter_ipv4/ip_logging.h
deleted file mode 100644
index 0c5c52cb6589..000000000000
--- a/include/linux/netfilter_ipv4/ip_logging.h
+++ /dev/null
@@ -1,20 +0,0 @@
1/* IPv4 macros for the internal logging interface. */
2#ifndef __IP_LOGGING_H
3#define __IP_LOGGING_H
4
5#ifdef __KERNEL__
6#include <linux/socket.h>
7#include <linux/netfilter_logging.h>
8
9#define nf_log_ip_packet(pskb,hooknum,in,out,fmt,args...) \
10 nf_log_packet(AF_INET,pskb,hooknum,in,out,fmt,##args)
11
12#define nf_log_ip(pfh,len,fmt,args...) \
13 nf_log(AF_INET,pfh,len,fmt,##args)
14
15#define nf_ip_log_register(logging) nf_log_register(AF_INET,logging)
16#define nf_ip_log_unregister(logging) nf_log_unregister(AF_INET,logging)
17
18#endif /*__KERNEL__*/
19
20#endif /*__IP_LOGGING_H*/
diff --git a/include/linux/netfilter_ipv4/ip_nat_protocol.h b/include/linux/netfilter_ipv4/ip_nat_protocol.h
index 129708c22386..ef63aa991a06 100644
--- a/include/linux/netfilter_ipv4/ip_nat_protocol.h
+++ b/include/linux/netfilter_ipv4/ip_nat_protocol.h
@@ -4,6 +4,9 @@
4#include <linux/init.h> 4#include <linux/init.h>
5#include <linux/list.h> 5#include <linux/list.h>
6 6
7#include <linux/netfilter_ipv4/ip_nat.h>
8#include <linux/netfilter/nfnetlink_conntrack.h>
9
7struct iphdr; 10struct iphdr;
8struct ip_nat_range; 11struct ip_nat_range;
9 12
@@ -15,6 +18,8 @@ struct ip_nat_protocol
15 /* Protocol number. */ 18 /* Protocol number. */
16 unsigned int protonum; 19 unsigned int protonum;
17 20
21 struct module *me;
22
18 /* Translate a packet to the target according to manip type. 23 /* Translate a packet to the target according to manip type.
19 Return true if succeeded. */ 24 Return true if succeeded. */
20 int (*manip_pkt)(struct sk_buff **pskb, 25 int (*manip_pkt)(struct sk_buff **pskb,
@@ -43,19 +48,20 @@ struct ip_nat_protocol
43 48
44 unsigned int (*print_range)(char *buffer, 49 unsigned int (*print_range)(char *buffer,
45 const struct ip_nat_range *range); 50 const struct ip_nat_range *range);
46};
47 51
48#define MAX_IP_NAT_PROTO 256 52 int (*range_to_nfattr)(struct sk_buff *skb,
49extern struct ip_nat_protocol *ip_nat_protos[MAX_IP_NAT_PROTO]; 53 const struct ip_nat_range *range);
54
55 int (*nfattr_to_range)(struct nfattr *tb[],
56 struct ip_nat_range *range);
57};
50 58
51/* Protocol registration. */ 59/* Protocol registration. */
52extern int ip_nat_protocol_register(struct ip_nat_protocol *proto); 60extern int ip_nat_protocol_register(struct ip_nat_protocol *proto);
53extern void ip_nat_protocol_unregister(struct ip_nat_protocol *proto); 61extern void ip_nat_protocol_unregister(struct ip_nat_protocol *proto);
54 62
55static inline struct ip_nat_protocol *ip_nat_find_proto(u_int8_t protocol) 63extern struct ip_nat_protocol *ip_nat_proto_find_get(u_int8_t protocol);
56{ 64extern void ip_nat_proto_put(struct ip_nat_protocol *proto);
57 return ip_nat_protos[protocol];
58}
59 65
60/* Built-in protocols. */ 66/* Built-in protocols. */
61extern struct ip_nat_protocol ip_nat_protocol_tcp; 67extern struct ip_nat_protocol ip_nat_protocol_tcp;
@@ -67,4 +73,9 @@ extern int init_protocols(void) __init;
67extern void cleanup_protocols(void); 73extern void cleanup_protocols(void);
68extern struct ip_nat_protocol *find_nat_proto(u_int16_t protonum); 74extern struct ip_nat_protocol *find_nat_proto(u_int16_t protonum);
69 75
76extern int ip_nat_port_range_to_nfattr(struct sk_buff *skb,
77 const struct ip_nat_range *range);
78extern int ip_nat_port_nfattr_to_range(struct nfattr *tb[],
79 struct ip_nat_range *range);
80
70#endif /*_IP_NAT_PROTO_H*/ 81#endif /*_IP_NAT_PROTO_H*/
diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h
index 12ce47808e7d..d19d65cf4530 100644
--- a/include/linux/netfilter_ipv4/ip_tables.h
+++ b/include/linux/netfilter_ipv4/ip_tables.h
@@ -109,7 +109,8 @@ struct ipt_counters
109 109
110/* Values for "flag" field in struct ipt_ip (general ip structure). */ 110/* Values for "flag" field in struct ipt_ip (general ip structure). */
111#define IPT_F_FRAG 0x01 /* Set if rule is a fragment rule */ 111#define IPT_F_FRAG 0x01 /* Set if rule is a fragment rule */
112#define IPT_F_MASK 0x01 /* All possible flag bits mask. */ 112#define IPT_F_GOTO 0x02 /* Set if jump is a goto */
113#define IPT_F_MASK 0x03 /* All possible flag bits mask. */
113 114
114/* Values for "inv" field in struct ipt_ip. */ 115/* Values for "inv" field in struct ipt_ip. */
115#define IPT_INV_VIA_IN 0x01 /* Invert the sense of IN IFACE. */ 116#define IPT_INV_VIA_IN 0x01 /* Invert the sense of IN IFACE. */
diff --git a/include/linux/netfilter_ipv4/ipt_LOG.h b/include/linux/netfilter_ipv4/ipt_LOG.h
index d25f782e57d1..22d16177319b 100644
--- a/include/linux/netfilter_ipv4/ipt_LOG.h
+++ b/include/linux/netfilter_ipv4/ipt_LOG.h
@@ -1,6 +1,7 @@
1#ifndef _IPT_LOG_H 1#ifndef _IPT_LOG_H
2#define _IPT_LOG_H 2#define _IPT_LOG_H
3 3
4/* make sure not to change this without changing netfilter.h:NF_LOG_* (!) */
4#define IPT_LOG_TCPSEQ 0x01 /* Log TCP sequence numbers */ 5#define IPT_LOG_TCPSEQ 0x01 /* Log TCP sequence numbers */
5#define IPT_LOG_TCPOPT 0x02 /* Log TCP options */ 6#define IPT_LOG_TCPOPT 0x02 /* Log TCP options */
6#define IPT_LOG_IPOPT 0x04 /* Log IP options */ 7#define IPT_LOG_IPOPT 0x04 /* Log IP options */
diff --git a/include/linux/netfilter_ipv4/ipt_NFQUEUE.h b/include/linux/netfilter_ipv4/ipt_NFQUEUE.h
new file mode 100644
index 000000000000..b5b2943b0c66
--- /dev/null
+++ b/include/linux/netfilter_ipv4/ipt_NFQUEUE.h
@@ -0,0 +1,16 @@
1/* iptables module for using NFQUEUE mechanism
2 *
3 * (C) 2005 Harald Welte <laforge@netfilter.org>
4 *
5 * This software is distributed under GNU GPL v2, 1991
6 *
7*/
8#ifndef _IPT_NFQ_TARGET_H
9#define _IPT_NFQ_TARGET_H
10
11/* target info */
12struct ipt_NFQ_info {
13 u_int16_t queuenum;
14};
15
16#endif /* _IPT_DSCP_TARGET_H */
diff --git a/include/linux/netfilter_ipv4/ipt_TTL.h b/include/linux/netfilter_ipv4/ipt_TTL.h
new file mode 100644
index 000000000000..ee6611edc112
--- /dev/null
+++ b/include/linux/netfilter_ipv4/ipt_TTL.h
@@ -0,0 +1,21 @@
1/* TTL modification module for IP tables
2 * (C) 2000 by Harald Welte <laforge@netfilter.org> */
3
4#ifndef _IPT_TTL_H
5#define _IPT_TTL_H
6
7enum {
8 IPT_TTL_SET = 0,
9 IPT_TTL_INC,
10 IPT_TTL_DEC
11};
12
13#define IPT_TTL_MAXMODE IPT_TTL_DEC
14
15struct ipt_TTL_info {
16 u_int8_t mode;
17 u_int8_t ttl;
18};
19
20
21#endif
diff --git a/include/linux/netfilter_ipv4/ipt_connbytes.h b/include/linux/netfilter_ipv4/ipt_connbytes.h
new file mode 100644
index 000000000000..9e5532f8d8ac
--- /dev/null
+++ b/include/linux/netfilter_ipv4/ipt_connbytes.h
@@ -0,0 +1,25 @@
1#ifndef _IPT_CONNBYTES_H
2#define _IPT_CONNBYTES_H
3
4enum ipt_connbytes_what {
5 IPT_CONNBYTES_PKTS,
6 IPT_CONNBYTES_BYTES,
7 IPT_CONNBYTES_AVGPKT,
8};
9
10enum ipt_connbytes_direction {
11 IPT_CONNBYTES_DIR_ORIGINAL,
12 IPT_CONNBYTES_DIR_REPLY,
13 IPT_CONNBYTES_DIR_BOTH,
14};
15
16struct ipt_connbytes_info
17{
18 struct {
19 aligned_u64 from; /* count to be matched */
20 aligned_u64 to; /* count to be matched */
21 } count;
22 u_int8_t what; /* ipt_connbytes_what */
23 u_int8_t direction; /* ipt_connbytes_direction */
24};
25#endif
diff --git a/include/linux/netfilter_ipv4/ipt_dccp.h b/include/linux/netfilter_ipv4/ipt_dccp.h
new file mode 100644
index 000000000000..3cb3a522e62b
--- /dev/null
+++ b/include/linux/netfilter_ipv4/ipt_dccp.h
@@ -0,0 +1,23 @@
1#ifndef _IPT_DCCP_H_
2#define _IPT_DCCP_H_
3
4#define IPT_DCCP_SRC_PORTS 0x01
5#define IPT_DCCP_DEST_PORTS 0x02
6#define IPT_DCCP_TYPE 0x04
7#define IPT_DCCP_OPTION 0x08
8
9#define IPT_DCCP_VALID_FLAGS 0x0f
10
11struct ipt_dccp_info {
12 u_int16_t dpts[2]; /* Min, Max */
13 u_int16_t spts[2]; /* Min, Max */
14
15 u_int16_t flags;
16 u_int16_t invflags;
17
18 u_int16_t typemask;
19 u_int8_t option;
20};
21
22#endif /* _IPT_DCCP_H_ */
23
diff --git a/include/linux/netfilter_ipv4/ipt_string.h b/include/linux/netfilter_ipv4/ipt_string.h
new file mode 100644
index 000000000000..a265f6e44eab
--- /dev/null
+++ b/include/linux/netfilter_ipv4/ipt_string.h
@@ -0,0 +1,18 @@
1#ifndef _IPT_STRING_H
2#define _IPT_STRING_H
3
4#define IPT_STRING_MAX_PATTERN_SIZE 128
5#define IPT_STRING_MAX_ALGO_NAME_SIZE 16
6
7struct ipt_string_info
8{
9 u_int16_t from_offset;
10 u_int16_t to_offset;
11 char algo[IPT_STRING_MAX_ALGO_NAME_SIZE];
12 char pattern[IPT_STRING_MAX_PATTERN_SIZE];
13 u_int8_t patlen;
14 u_int8_t invert;
15 struct ts_config __attribute__((aligned(8))) *config;
16};
17
18#endif /*_IPT_STRING_H*/
diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h
index bee7a5ec7c66..edcc2c6eb5c7 100644
--- a/include/linux/netfilter_ipv6.h
+++ b/include/linux/netfilter_ipv6.h
@@ -10,6 +10,8 @@
10 10
11#include <linux/netfilter.h> 11#include <linux/netfilter.h>
12 12
13/* only for userspace compatibility */
14#ifndef __KERNEL__
13/* IP Cache bits. */ 15/* IP Cache bits. */
14/* Src IP address. */ 16/* Src IP address. */
15#define NFC_IP6_SRC 0x0001 17#define NFC_IP6_SRC 0x0001
@@ -38,6 +40,7 @@
38#define NFC_IP6_DST_PT 0x0400 40#define NFC_IP6_DST_PT 0x0400
39/* Something else about the proto */ 41/* Something else about the proto */
40#define NFC_IP6_PROTO_UNKNOWN 0x2000 42#define NFC_IP6_PROTO_UNKNOWN 0x2000
43#endif /* ! __KERNEL__ */
41 44
42 45
43/* IP6 Hooks */ 46/* IP6 Hooks */
@@ -68,4 +71,7 @@ enum nf_ip6_hook_priorities {
68 NF_IP6_PRI_LAST = INT_MAX, 71 NF_IP6_PRI_LAST = INT_MAX,
69}; 72};
70 73
74extern int ipv6_netfilter_init(void);
75extern void ipv6_netfilter_fini(void);
76
71#endif /*__LINUX_IP6_NETFILTER_H*/ 77#endif /*__LINUX_IP6_NETFILTER_H*/
diff --git a/include/linux/netfilter_ipv6/ip6_logging.h b/include/linux/netfilter_ipv6/ip6_logging.h
deleted file mode 100644
index a0b2ee3043aa..000000000000
--- a/include/linux/netfilter_ipv6/ip6_logging.h
+++ /dev/null
@@ -1,20 +0,0 @@
1/* IPv6 macros for the nternal logging interface. */
2#ifndef __IP6_LOGGING_H
3#define __IP6_LOGGING_H
4
5#ifdef __KERNEL__
6#include <linux/socket.h>
7#include <linux/netfilter_logging.h>
8
9#define nf_log_ip6_packet(pskb,hooknum,in,out,fmt,args...) \
10 nf_log_packet(AF_INET6,pskb,hooknum,in,out,fmt,##args)
11
12#define nf_log_ip6(pfh,len,fmt,args...) \
13 nf_log(AF_INET6,pfh,len,fmt,##args)
14
15#define nf_ip6_log_register(logging) nf_log_register(AF_INET6,logging)
16#define nf_ip6_log_unregister(logging) nf_log_unregister(AF_INET6,logging)
17
18#endif /*__KERNEL__*/
19
20#endif /*__IP6_LOGGING_H*/
diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h
index f1ce3b009853..58c72a52dc65 100644
--- a/include/linux/netfilter_ipv6/ip6_tables.h
+++ b/include/linux/netfilter_ipv6/ip6_tables.h
@@ -111,7 +111,8 @@ struct ip6t_counters
111#define IP6T_F_PROTO 0x01 /* Set if rule cares about upper 111#define IP6T_F_PROTO 0x01 /* Set if rule cares about upper
112 protocols */ 112 protocols */
113#define IP6T_F_TOS 0x02 /* Match the TOS. */ 113#define IP6T_F_TOS 0x02 /* Match the TOS. */
114#define IP6T_F_MASK 0x03 /* All possible flag bits mask. */ 114#define IP6T_F_GOTO 0x04 /* Set if jump is a goto */
115#define IP6T_F_MASK 0x07 /* All possible flag bits mask. */
115 116
116/* Values for "inv" field in struct ip6t_ip6. */ 117/* Values for "inv" field in struct ip6t_ip6. */
117#define IP6T_INV_VIA_IN 0x01 /* Invert the sense of IN IFACE. */ 118#define IP6T_INV_VIA_IN 0x01 /* Invert the sense of IN IFACE. */
diff --git a/include/linux/netfilter_ipv6/ip6t_HL.h b/include/linux/netfilter_ipv6/ip6t_HL.h
new file mode 100644
index 000000000000..afb7813d45ab
--- /dev/null
+++ b/include/linux/netfilter_ipv6/ip6t_HL.h
@@ -0,0 +1,22 @@
1/* Hop Limit modification module for ip6tables
2 * Maciej Soltysiak <solt@dns.toxicfilms.tv>
3 * Based on HW's TTL module */
4
5#ifndef _IP6T_HL_H
6#define _IP6T_HL_H
7
8enum {
9 IP6T_HL_SET = 0,
10 IP6T_HL_INC,
11 IP6T_HL_DEC
12};
13
14#define IP6T_HL_MAXMODE IP6T_HL_DEC
15
16struct ip6t_HL_info {
17 u_int8_t mode;
18 u_int8_t hop_limit;
19};
20
21
22#endif
diff --git a/include/linux/netfilter_ipv6/ip6t_LOG.h b/include/linux/netfilter_ipv6/ip6t_LOG.h
index 42996a43bb39..9008ff5c40ae 100644
--- a/include/linux/netfilter_ipv6/ip6t_LOG.h
+++ b/include/linux/netfilter_ipv6/ip6t_LOG.h
@@ -1,6 +1,7 @@
1#ifndef _IP6T_LOG_H 1#ifndef _IP6T_LOG_H
2#define _IP6T_LOG_H 2#define _IP6T_LOG_H
3 3
4/* make sure not to change this without changing netfilter.h:NF_LOG_* (!) */
4#define IP6T_LOG_TCPSEQ 0x01 /* Log TCP sequence numbers */ 5#define IP6T_LOG_TCPSEQ 0x01 /* Log TCP sequence numbers */
5#define IP6T_LOG_TCPOPT 0x02 /* Log TCP options */ 6#define IP6T_LOG_TCPOPT 0x02 /* Log TCP options */
6#define IP6T_LOG_IPOPT 0x04 /* Log IP options */ 7#define IP6T_LOG_IPOPT 0x04 /* Log IP options */
diff --git a/include/linux/netfilter_ipv6/ip6t_REJECT.h b/include/linux/netfilter_ipv6/ip6t_REJECT.h
new file mode 100644
index 000000000000..6be6504162bb
--- /dev/null
+++ b/include/linux/netfilter_ipv6/ip6t_REJECT.h
@@ -0,0 +1,18 @@
1#ifndef _IP6T_REJECT_H
2#define _IP6T_REJECT_H
3
4enum ip6t_reject_with {
5 IP6T_ICMP6_NO_ROUTE,
6 IP6T_ICMP6_ADM_PROHIBITED,
7 IP6T_ICMP6_NOT_NEIGHBOUR,
8 IP6T_ICMP6_ADDR_UNREACH,
9 IP6T_ICMP6_PORT_UNREACH,
10 IP6T_ICMP6_ECHOREPLY,
11 IP6T_TCP_RESET
12};
13
14struct ip6t_reject_info {
15 u_int32_t with; /* reject type */
16};
17
18#endif /*_IP6T_REJECT_H*/
diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 6552b71bfa73..167518668936 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -8,7 +8,7 @@
8#define NETLINK_W1 1 /* 1-wire subsystem */ 8#define NETLINK_W1 1 /* 1-wire subsystem */
9#define NETLINK_USERSOCK 2 /* Reserved for user mode socket protocols */ 9#define NETLINK_USERSOCK 2 /* Reserved for user mode socket protocols */
10#define NETLINK_FIREWALL 3 /* Firewalling hook */ 10#define NETLINK_FIREWALL 3 /* Firewalling hook */
11#define NETLINK_TCPDIAG 4 /* TCP socket monitoring */ 11#define NETLINK_INET_DIAG 4 /* INET socket monitoring */
12#define NETLINK_NFLOG 5 /* netfilter/iptables ULOG */ 12#define NETLINK_NFLOG 5 /* netfilter/iptables ULOG */
13#define NETLINK_XFRM 6 /* ipsec */ 13#define NETLINK_XFRM 6 /* ipsec */
14#define NETLINK_SELINUX 7 /* SELinux event notifications */ 14#define NETLINK_SELINUX 7 /* SELinux event notifications */
@@ -90,6 +90,15 @@ struct nlmsgerr
90 struct nlmsghdr msg; 90 struct nlmsghdr msg;
91}; 91};
92 92
93#define NETLINK_ADD_MEMBERSHIP 1
94#define NETLINK_DROP_MEMBERSHIP 2
95#define NETLINK_PKTINFO 3
96
97struct nl_pktinfo
98{
99 __u32 group;
100};
101
93#define NET_MAJOR 36 /* Major 36 is reserved for networking */ 102#define NET_MAJOR 36 /* Major 36 is reserved for networking */
94 103
95enum { 104enum {
@@ -106,9 +115,8 @@ struct netlink_skb_parms
106{ 115{
107 struct ucred creds; /* Skb credentials */ 116 struct ucred creds; /* Skb credentials */
108 __u32 pid; 117 __u32 pid;
109 __u32 groups;
110 __u32 dst_pid; 118 __u32 dst_pid;
111 __u32 dst_groups; 119 __u32 dst_group;
112 kernel_cap_t eff_cap; 120 kernel_cap_t eff_cap;
113 __u32 loginuid; /* Login (audit) uid */ 121 __u32 loginuid; /* Login (audit) uid */
114}; 122};
@@ -117,11 +125,11 @@ struct netlink_skb_parms
117#define NETLINK_CREDS(skb) (&NETLINK_CB((skb)).creds) 125#define NETLINK_CREDS(skb) (&NETLINK_CB((skb)).creds)
118 126
119 127
120extern struct sock *netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len)); 128extern struct sock *netlink_kernel_create(int unit, unsigned int groups, void (*input)(struct sock *sk, int len), struct module *module);
121extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err); 129extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err);
122extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 pid, int nonblock); 130extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 pid, int nonblock);
123extern int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 pid, 131extern int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 pid,
124 __u32 group, int allocation); 132 __u32 group, unsigned int __nocast allocation);
125extern void netlink_set_err(struct sock *ssk, __u32 pid, __u32 group, int code); 133extern void netlink_set_err(struct sock *ssk, __u32 pid, __u32 group, int code);
126extern int netlink_register_notifier(struct notifier_block *nb); 134extern int netlink_register_notifier(struct notifier_block *nb);
127extern int netlink_unregister_notifier(struct notifier_block *nb); 135extern int netlink_unregister_notifier(struct notifier_block *nb);
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 927ed487630d..499a5325f67f 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1249,6 +1249,7 @@
1249#define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA 0x0266 1249#define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA 0x0266
1250#define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA2 0x0267 1250#define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA2 0x0267
1251#define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_IDE 0x036E 1251#define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_IDE 0x036E
1252#define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA 0x036F
1252#define PCI_DEVICE_ID_NVIDIA_NVENET_12 0x0268 1253#define PCI_DEVICE_ID_NVIDIA_NVENET_12 0x0268
1253#define PCI_DEVICE_ID_NVIDIA_NVENET_13 0x0269 1254#define PCI_DEVICE_ID_NVIDIA_NVENET_13 0x0269
1254#define PCI_DEVICE_ID_NVIDIA_MCP51_AUDIO 0x026B 1255#define PCI_DEVICE_ID_NVIDIA_MCP51_AUDIO 0x026B
diff --git a/include/linux/phy.h b/include/linux/phy.h
new file mode 100644
index 000000000000..72cb67b66e0c
--- /dev/null
+++ b/include/linux/phy.h
@@ -0,0 +1,377 @@
1/*
2 * include/linux/phy.h
3 *
4 * Framework and drivers for configuring and reading different PHYs
5 * Based on code in sungem_phy.c and gianfar_phy.c
6 *
7 * Author: Andy Fleming
8 *
9 * Copyright (c) 2004 Freescale Semiconductor, Inc.
10 *
11 * This program is free software; you can redistribute it and/or modify it
12 * under the terms of the GNU General Public License as published by the
13 * Free Software Foundation; either version 2 of the License, or (at your
14 * option) any later version.
15 *
16 */
17
18#ifndef __PHY_H
19#define __PHY_H
20
21#include <linux/spinlock.h>
22#include <linux/device.h>
23
24#define PHY_BASIC_FEATURES (SUPPORTED_10baseT_Half | \
25 SUPPORTED_10baseT_Full | \
26 SUPPORTED_100baseT_Half | \
27 SUPPORTED_100baseT_Full | \
28 SUPPORTED_Autoneg | \
29 SUPPORTED_TP | \
30 SUPPORTED_MII)
31
32#define PHY_GBIT_FEATURES (PHY_BASIC_FEATURES | \
33 SUPPORTED_1000baseT_Half | \
34 SUPPORTED_1000baseT_Full)
35
36/* Set phydev->irq to PHY_POLL if interrupts are not supported,
37 * or not desired for this PHY. Set to PHY_IGNORE_INTERRUPT if
38 * the attached driver handles the interrupt
39 */
40#define PHY_POLL -1
41#define PHY_IGNORE_INTERRUPT -2
42
43#define PHY_HAS_INTERRUPT 0x00000001
44#define PHY_HAS_MAGICANEG 0x00000002
45
46#define MII_BUS_MAX 4
47
48
49#define PHY_INIT_TIMEOUT 100000
50#define PHY_STATE_TIME 1
51#define PHY_FORCE_TIMEOUT 10
52#define PHY_AN_TIMEOUT 10
53
54#define PHY_MAX_ADDR 32
55
56/* The Bus class for PHYs. Devices which provide access to
57 * PHYs should register using this structure */
58struct mii_bus {
59 const char *name;
60 int id;
61 void *priv;
62 int (*read)(struct mii_bus *bus, int phy_id, int regnum);
63 int (*write)(struct mii_bus *bus, int phy_id, int regnum, u16 val);
64 int (*reset)(struct mii_bus *bus);
65
66 /* A lock to ensure that only one thing can read/write
67 * the MDIO bus at a time */
68 spinlock_t mdio_lock;
69
70 struct device *dev;
71
72 /* list of all PHYs on bus */
73 struct phy_device *phy_map[PHY_MAX_ADDR];
74
75 /* Pointer to an array of interrupts, each PHY's
76 * interrupt at the index matching its address */
77 int *irq;
78};
79
80#define PHY_INTERRUPT_DISABLED 0x0
81#define PHY_INTERRUPT_ENABLED 0x80000000
82
83/* PHY state machine states:
84 *
85 * DOWN: PHY device and driver are not ready for anything. probe
86 * should be called if and only if the PHY is in this state,
87 * given that the PHY device exists.
88 * - PHY driver probe function will, depending on the PHY, set
89 * the state to STARTING or READY
90 *
91 * STARTING: PHY device is coming up, and the ethernet driver is
92 * not ready. PHY drivers may set this in the probe function.
93 * If they do, they are responsible for making sure the state is
94 * eventually set to indicate whether the PHY is UP or READY,
95 * depending on the state when the PHY is done starting up.
96 * - PHY driver will set the state to READY
97 * - start will set the state to PENDING
98 *
99 * READY: PHY is ready to send and receive packets, but the
100 * controller is not. By default, PHYs which do not implement
101 * probe will be set to this state by phy_probe(). If the PHY
102 * driver knows the PHY is ready, and the PHY state is STARTING,
103 * then it sets this STATE.
104 * - start will set the state to UP
105 *
106 * PENDING: PHY device is coming up, but the ethernet driver is
107 * ready. phy_start will set this state if the PHY state is
108 * STARTING.
109 * - PHY driver will set the state to UP when the PHY is ready
110 *
111 * UP: The PHY and attached device are ready to do work.
112 * Interrupts should be started here.
113 * - timer moves to AN
114 *
115 * AN: The PHY is currently negotiating the link state. Link is
116 * therefore down for now. phy_timer will set this state when it
117 * detects the state is UP. config_aneg will set this state
118 * whenever called with phydev->autoneg set to AUTONEG_ENABLE.
119 * - If autonegotiation finishes, but there's no link, it sets
120 * the state to NOLINK.
121 * - If aneg finishes with link, it sets the state to RUNNING,
122 * and calls adjust_link
123 * - If autonegotiation did not finish after an arbitrary amount
124 * of time, autonegotiation should be tried again if the PHY
125 * supports "magic" autonegotiation (back to AN)
126 * - If it didn't finish, and no magic_aneg, move to FORCING.
127 *
128 * NOLINK: PHY is up, but not currently plugged in.
129 * - If the timer notes that the link comes back, we move to RUNNING
130 * - config_aneg moves to AN
131 * - phy_stop moves to HALTED
132 *
133 * FORCING: PHY is being configured with forced settings
134 * - if link is up, move to RUNNING
135 * - If link is down, we drop to the next highest setting, and
136 * retry (FORCING) after a timeout
137 * - phy_stop moves to HALTED
138 *
139 * RUNNING: PHY is currently up, running, and possibly sending
140 * and/or receiving packets
141 * - timer will set CHANGELINK if we're polling (this ensures the
142 * link state is polled every other cycle of this state machine,
143 * which makes it every other second)
144 * - irq will set CHANGELINK
145 * - config_aneg will set AN
146 * - phy_stop moves to HALTED
147 *
148 * CHANGELINK: PHY experienced a change in link state
149 * - timer moves to RUNNING if link
150 * - timer moves to NOLINK if the link is down
151 * - phy_stop moves to HALTED
152 *
153 * HALTED: PHY is up, but no polling or interrupts are done. Or
154 * PHY is in an error state.
155 *
156 * - phy_start moves to RESUMING
157 *
158 * RESUMING: PHY was halted, but now wants to run again.
159 * - If we are forcing, or aneg is done, timer moves to RUNNING
160 * - If aneg is not done, timer moves to AN
161 * - phy_stop moves to HALTED
162 */
163enum phy_state {
164 PHY_DOWN=0,
165 PHY_STARTING,
166 PHY_READY,
167 PHY_PENDING,
168 PHY_UP,
169 PHY_AN,
170 PHY_RUNNING,
171 PHY_NOLINK,
172 PHY_FORCING,
173 PHY_CHANGELINK,
174 PHY_HALTED,
175 PHY_RESUMING
176};
177
178/* phy_device: An instance of a PHY
179 *
180 * drv: Pointer to the driver for this PHY instance
181 * bus: Pointer to the bus this PHY is on
182 * dev: driver model device structure for this PHY
183 * phy_id: UID for this device found during discovery
184 * state: state of the PHY for management purposes
185 * dev_flags: Device-specific flags used by the PHY driver.
186 * addr: Bus address of PHY
187 * link_timeout: The number of timer firings to wait before the
188 * giving up on the current attempt at acquiring a link
189 * irq: IRQ number of the PHY's interrupt (-1 if none)
190 * phy_timer: The timer for handling the state machine
191 * phy_queue: A work_queue for the interrupt
192 * attached_dev: The attached enet driver's device instance ptr
193 * adjust_link: Callback for the enet controller to respond to
194 * changes in the link state.
195 * adjust_state: Callback for the enet driver to respond to
196 * changes in the state machine.
197 *
198 * speed, duplex, pause, supported, advertising, and
199 * autoneg are used like in mii_if_info
200 *
201 * interrupts currently only supports enabled or disabled,
202 * but could be changed in the future to support enabling
203 * and disabling specific interrupts
204 *
205 * Contains some infrastructure for polling and interrupt
206 * handling, as well as handling shifts in PHY hardware state
207 */
208struct phy_device {
209 /* Information about the PHY type */
210 /* And management functions */
211 struct phy_driver *drv;
212
213 struct mii_bus *bus;
214
215 struct device dev;
216
217 u32 phy_id;
218
219 enum phy_state state;
220
221 u32 dev_flags;
222
223 /* Bus address of the PHY (0-32) */
224 int addr;
225
226 /* forced speed & duplex (no autoneg)
227 * partner speed & duplex & pause (autoneg)
228 */
229 int speed;
230 int duplex;
231 int pause;
232 int asym_pause;
233
234 /* The most recently read link state */
235 int link;
236
237 /* Enabled Interrupts */
238 u32 interrupts;
239
240 /* Union of PHY and Attached devices' supported modes */
241 /* See mii.h for more info */
242 u32 supported;
243 u32 advertising;
244
245 int autoneg;
246
247 int link_timeout;
248
249 /* Interrupt number for this PHY
250 * -1 means no interrupt */
251 int irq;
252
253 /* private data pointer */
254 /* For use by PHYs to maintain extra state */
255 void *priv;
256
257 /* Interrupt and Polling infrastructure */
258 struct work_struct phy_queue;
259 struct timer_list phy_timer;
260
261 spinlock_t lock;
262
263 struct net_device *attached_dev;
264
265 void (*adjust_link)(struct net_device *dev);
266
267 void (*adjust_state)(struct net_device *dev);
268};
269#define to_phy_device(d) container_of(d, struct phy_device, dev)
270
271/* struct phy_driver: Driver structure for a particular PHY type
272 *
273 * phy_id: The result of reading the UID registers of this PHY
274 * type, and ANDing them with the phy_id_mask. This driver
275 * only works for PHYs with IDs which match this field
276 * name: The friendly name of this PHY type
277 * phy_id_mask: Defines the important bits of the phy_id
278 * features: A list of features (speed, duplex, etc) supported
279 * by this PHY
280 * flags: A bitfield defining certain other features this PHY
281 * supports (like interrupts)
282 *
283 * The drivers must implement config_aneg and read_status. All
284 * other functions are optional. Note that none of these
285 * functions should be called from interrupt time. The goal is
286 * for the bus read/write functions to be able to block when the
287 * bus transaction is happening, and be freed up by an interrupt
288 * (The MPC85xx has this ability, though it is not currently
289 * supported in the driver).
290 */
291struct phy_driver {
292 u32 phy_id;
293 char *name;
294 unsigned int phy_id_mask;
295 u32 features;
296 u32 flags;
297
298 /* Called to initialize the PHY,
299 * including after a reset */
300 int (*config_init)(struct phy_device *phydev);
301
302 /* Called during discovery. Used to set
303 * up device-specific structures, if any */
304 int (*probe)(struct phy_device *phydev);
305
306 /* PHY Power Management */
307 int (*suspend)(struct phy_device *phydev);
308 int (*resume)(struct phy_device *phydev);
309
310 /* Configures the advertisement and resets
311 * autonegotiation if phydev->autoneg is on,
312 * forces the speed to the current settings in phydev
313 * if phydev->autoneg is off */
314 int (*config_aneg)(struct phy_device *phydev);
315
316 /* Determines the negotiated speed and duplex */
317 int (*read_status)(struct phy_device *phydev);
318
319 /* Clears any pending interrupts */
320 int (*ack_interrupt)(struct phy_device *phydev);
321
322 /* Enables or disables interrupts */
323 int (*config_intr)(struct phy_device *phydev);
324
325 /* Clears up any memory if needed */
326 void (*remove)(struct phy_device *phydev);
327
328 struct device_driver driver;
329};
330#define to_phy_driver(d) container_of(d, struct phy_driver, driver)
331
332int phy_read(struct phy_device *phydev, u16 regnum);
333int phy_write(struct phy_device *phydev, u16 regnum, u16 val);
334struct phy_device* get_phy_device(struct mii_bus *bus, int addr);
335int phy_clear_interrupt(struct phy_device *phydev);
336int phy_config_interrupt(struct phy_device *phydev, u32 interrupts);
337struct phy_device * phy_attach(struct net_device *dev,
338 const char *phy_id, u32 flags);
339struct phy_device * phy_connect(struct net_device *dev, const char *phy_id,
340 void (*handler)(struct net_device *), u32 flags);
341void phy_disconnect(struct phy_device *phydev);
342void phy_detach(struct phy_device *phydev);
343void phy_start(struct phy_device *phydev);
344void phy_stop(struct phy_device *phydev);
345int phy_start_aneg(struct phy_device *phydev);
346
347int mdiobus_register(struct mii_bus *bus);
348void mdiobus_unregister(struct mii_bus *bus);
349void phy_sanitize_settings(struct phy_device *phydev);
350int phy_stop_interrupts(struct phy_device *phydev);
351
352static inline int phy_read_status(struct phy_device *phydev) {
353 return phydev->drv->read_status(phydev);
354}
355
356int genphy_config_advert(struct phy_device *phydev);
357int genphy_setup_forced(struct phy_device *phydev);
358int genphy_restart_aneg(struct phy_device *phydev);
359int genphy_config_aneg(struct phy_device *phydev);
360int genphy_update_link(struct phy_device *phydev);
361int genphy_read_status(struct phy_device *phydev);
362void phy_driver_unregister(struct phy_driver *drv);
363int phy_driver_register(struct phy_driver *new_driver);
364void phy_prepare_link(struct phy_device *phydev,
365 void (*adjust_link)(struct net_device *));
366void phy_start_machine(struct phy_device *phydev,
367 void (*handler)(struct net_device *));
368void phy_stop_machine(struct phy_device *phydev);
369int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd);
370int phy_ethtool_gset(struct phy_device *phydev, struct ethtool_cmd *cmd);
371int phy_mii_ioctl(struct phy_device *phydev,
372 struct mii_ioctl_data *mii_data, int cmd);
373int phy_start_interrupts(struct phy_device *phydev);
374void phy_print_status(struct phy_device *phydev);
375
376extern struct bus_type mdio_bus_type;
377#endif /* __PHY_H */
diff --git a/include/linux/random.h b/include/linux/random.h
index cc6703449916..7b2adb3322d5 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -59,6 +59,8 @@ extern __u32 secure_tcp_sequence_number(__u32 saddr, __u32 daddr,
59 __u16 sport, __u16 dport); 59 __u16 sport, __u16 dport);
60extern __u32 secure_tcpv6_sequence_number(__u32 *saddr, __u32 *daddr, 60extern __u32 secure_tcpv6_sequence_number(__u32 *saddr, __u32 *daddr,
61 __u16 sport, __u16 dport); 61 __u16 sport, __u16 dport);
62extern u64 secure_dccp_sequence_number(__u32 saddr, __u32 daddr,
63 __u16 sport, __u16 dport);
62 64
63#ifndef MODULE 65#ifndef MODULE
64extern struct file_operations random_fops, urandom_fops; 66extern struct file_operations random_fops, urandom_fops;
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 657c05ab8f9e..c231e9a08f0b 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -826,9 +826,8 @@ enum
826#define TCA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcmsg)))) 826#define TCA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcmsg))))
827#define TCA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcmsg)) 827#define TCA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcmsg))
828 828
829 829#ifndef __KERNEL__
830/* RTnetlink multicast groups */ 830/* RTnetlink multicast groups - backwards compatibility for userspace */
831
832#define RTMGRP_LINK 1 831#define RTMGRP_LINK 1
833#define RTMGRP_NOTIFY 2 832#define RTMGRP_NOTIFY 2
834#define RTMGRP_NEIGH 4 833#define RTMGRP_NEIGH 4
@@ -847,6 +846,43 @@ enum
847#define RTMGRP_DECnet_ROUTE 0x4000 846#define RTMGRP_DECnet_ROUTE 0x4000
848 847
849#define RTMGRP_IPV6_PREFIX 0x20000 848#define RTMGRP_IPV6_PREFIX 0x20000
849#endif
850
851/* RTnetlink multicast groups */
852enum rtnetlink_groups {
853 RTNLGRP_NONE,
854#define RTNLGRP_NONE RTNLGRP_NONE
855 RTNLGRP_LINK,
856#define RTNLGRP_LINK RTNLGRP_LINK
857 RTNLGRP_NOTIFY,
858#define RTNLGRP_NOTIFY RTNLGRP_NOTIFY
859 RTNLGRP_NEIGH,
860#define RTNLGRP_NEIGH RTNLGRP_NEIGH
861 RTNLGRP_TC,
862#define RTNLGRP_TC RTNLGRP_TC
863 RTNLGRP_IPV4_IFADDR,
864#define RTNLGRP_IPV4_IFADDR RTNLGRP_IPV4_IFADDR
865 RTNLGRP_IPV4_MROUTE,
866#define RTNLGRP_IPV4_MROUTE RTNLGRP_IPV4_MROUTE
867 RTNLGRP_IPV4_ROUTE,
868#define RTNLGRP_IPV4_ROUTE RTNLGRP_IPV4_ROUTE
869 RTNLGRP_IPV6_IFADDR,
870#define RTNLGRP_IPV6_IFADDR RTNLGRP_IPV6_IFADDR
871 RTNLGRP_IPV6_MROUTE,
872#define RTNLGRP_IPV6_MROUTE RTNLGRP_IPV6_MROUTE
873 RTNLGRP_IPV6_ROUTE,
874#define RTNLGRP_IPV6_ROUTE RTNLGRP_IPV6_ROUTE
875 RTNLGRP_IPV6_IFINFO,
876#define RTNLGRP_IPV6_IFINFO RTNLGRP_IPV6_IFINFO
877 RTNLGRP_DECnet_IFADDR,
878#define RTNLGRP_DECnet_IFADDR RTNLGRP_DECnet_IFADDR
879 RTNLGRP_DECnet_ROUTE,
880#define RTNLGRP_DECnet_ROUTE RTNLGRP_DECnet_ROUTE
881 RTNLGRP_IPV6_PREFIX,
882#define RTNLGRP_IPV6_PREFIX RTNLGRP_IPV6_PREFIX
883 __RTNLGRP_MAX
884};
885#define RTNLGRP_MAX (__RTNLGRP_MAX - 1)
850 886
851/* TC action piece */ 887/* TC action piece */
852struct tcamsg 888struct tcamsg
diff --git a/include/linux/security.h b/include/linux/security.h
index b42095a68b1c..7aab6ab7c57f 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -2727,7 +2727,8 @@ static inline int security_socket_getpeersec(struct socket *sock, char __user *o
2727 return security_ops->socket_getpeersec(sock, optval, optlen, len); 2727 return security_ops->socket_getpeersec(sock, optval, optlen, len);
2728} 2728}
2729 2729
2730static inline int security_sk_alloc(struct sock *sk, int family, int priority) 2730static inline int security_sk_alloc(struct sock *sk, int family,
2731 unsigned int __nocast priority)
2731{ 2732{
2732 return security_ops->sk_alloc_security(sk, family, priority); 2733 return security_ops->sk_alloc_security(sk, family, priority);
2733} 2734}
@@ -2844,7 +2845,8 @@ static inline int security_socket_getpeersec(struct socket *sock, char __user *o
2844 return -ENOPROTOOPT; 2845 return -ENOPROTOOPT;
2845} 2846}
2846 2847
2847static inline int security_sk_alloc(struct sock *sk, int family, int priority) 2848static inline int security_sk_alloc(struct sock *sk, int family,
2849 unsigned int __nocast priority)
2848{ 2850{
2849 return 0; 2851 return 0;
2850} 2852}
diff --git a/include/linux/selinux_netlink.h b/include/linux/selinux_netlink.h
index 957e6ebca4e6..bbf489decd84 100644
--- a/include/linux/selinux_netlink.h
+++ b/include/linux/selinux_netlink.h
@@ -20,10 +20,21 @@ enum {
20 SELNL_MSG_MAX 20 SELNL_MSG_MAX
21}; 21};
22 22
23/* Multicast groups */ 23#ifndef __KERNEL__
24/* Multicast groups - backwards compatiblility for userspace */
24#define SELNL_GRP_NONE 0x00000000 25#define SELNL_GRP_NONE 0x00000000
25#define SELNL_GRP_AVC 0x00000001 /* AVC notifications */ 26#define SELNL_GRP_AVC 0x00000001 /* AVC notifications */
26#define SELNL_GRP_ALL 0xffffffff 27#define SELNL_GRP_ALL 0xffffffff
28#endif
29
30enum selinux_nlgroups {
31 SELNLGRP_NONE,
32#define SELNLGRP_NONE SELNLGRP_NONE
33 SELNLGRP_AVC,
34#define SELNLGRP_AVC SELNLGRP_AVC
35 __SELNLGRP_MAX
36};
37#define SELNLGRP_MAX (__SELNLGRP_MAX - 1)
27 38
28/* Message structures */ 39/* Message structures */
29struct selnl_msg_setenforce { 40struct selnl_msg_setenforce {
diff --git a/include/linux/serialP.h b/include/linux/serialP.h
index 2b2f35a64d75..2b9e6b9554d5 100644
--- a/include/linux/serialP.h
+++ b/include/linux/serialP.h
@@ -140,44 +140,4 @@ struct rs_multiport_struct {
140#define ALPHA_KLUDGE_MCR 0 140#define ALPHA_KLUDGE_MCR 0
141#endif 141#endif
142 142
143/*
144 * Definitions for PCI support.
145 */
146#define SPCI_FL_BASE_MASK 0x0007
147#define SPCI_FL_BASE0 0x0000
148#define SPCI_FL_BASE1 0x0001
149#define SPCI_FL_BASE2 0x0002
150#define SPCI_FL_BASE3 0x0003
151#define SPCI_FL_BASE4 0x0004
152#define SPCI_FL_GET_BASE(x) (x & SPCI_FL_BASE_MASK)
153
154#define SPCI_FL_IRQ_MASK (0x0007 << 4)
155#define SPCI_FL_IRQBASE0 (0x0000 << 4)
156#define SPCI_FL_IRQBASE1 (0x0001 << 4)
157#define SPCI_FL_IRQBASE2 (0x0002 << 4)
158#define SPCI_FL_IRQBASE3 (0x0003 << 4)
159#define SPCI_FL_IRQBASE4 (0x0004 << 4)
160#define SPCI_FL_GET_IRQBASE(x) ((x & SPCI_FL_IRQ_MASK) >> 4)
161
162/* Use successive BARs (PCI base address registers),
163 else use offset into some specified BAR */
164#define SPCI_FL_BASE_TABLE 0x0100
165
166/* Use successive entries in the irq resource table */
167#define SPCI_FL_IRQ_TABLE 0x0200
168
169/* Use the irq resource table instead of dev->irq */
170#define SPCI_FL_IRQRESOURCE 0x0400
171
172/* Use the Base address register size to cap number of ports */
173#define SPCI_FL_REGION_SZ_CAP 0x0800
174
175/* Do not use irq sharing for this device */
176#define SPCI_FL_NO_SHIRQ 0x1000
177
178/* This is a PNP device */
179#define SPCI_FL_ISPNP 0x2000
180
181#define SPCI_FL_PNPDEFAULT (SPCI_FL_IRQRESOURCE|SPCI_FL_ISPNP)
182
183#endif /* _LINUX_SERIAL_H */ 143#endif /* _LINUX_SERIAL_H */
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 948527e42a60..42edce6abe23 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -155,16 +155,29 @@ struct skb_shared_info {
155#define SKB_DATAREF_SHIFT 16 155#define SKB_DATAREF_SHIFT 16
156#define SKB_DATAREF_MASK ((1 << SKB_DATAREF_SHIFT) - 1) 156#define SKB_DATAREF_MASK ((1 << SKB_DATAREF_SHIFT) - 1)
157 157
158extern struct timeval skb_tv_base;
159
160struct skb_timeval {
161 u32 off_sec;
162 u32 off_usec;
163};
164
165
166enum {
167 SKB_FCLONE_UNAVAILABLE,
168 SKB_FCLONE_ORIG,
169 SKB_FCLONE_CLONE,
170};
171
158/** 172/**
159 * struct sk_buff - socket buffer 173 * struct sk_buff - socket buffer
160 * @next: Next buffer in list 174 * @next: Next buffer in list
161 * @prev: Previous buffer in list 175 * @prev: Previous buffer in list
162 * @list: List we are on 176 * @list: List we are on
163 * @sk: Socket we are owned by 177 * @sk: Socket we are owned by
164 * @stamp: Time we arrived 178 * @tstamp: Time we arrived stored as offset to skb_tv_base
165 * @dev: Device we arrived on/are leaving by 179 * @dev: Device we arrived on/are leaving by
166 * @input_dev: Device we arrived on 180 * @input_dev: Device we arrived on
167 * @real_dev: The real device we are using
168 * @h: Transport layer header 181 * @h: Transport layer header
169 * @nh: Network layer header 182 * @nh: Network layer header
170 * @mac: Link layer header 183 * @mac: Link layer header
@@ -190,14 +203,11 @@ struct skb_shared_info {
190 * @end: End pointer 203 * @end: End pointer
191 * @destructor: Destruct function 204 * @destructor: Destruct function
192 * @nfmark: Can be used for communication between hooks 205 * @nfmark: Can be used for communication between hooks
193 * @nfcache: Cache info
194 * @nfct: Associated connection, if any 206 * @nfct: Associated connection, if any
195 * @nfctinfo: Relationship of this skb to the connection 207 * @nfctinfo: Relationship of this skb to the connection
196 * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c 208 * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c
197 * @private: Data which is private to the HIPPI implementation
198 * @tc_index: Traffic control index 209 * @tc_index: Traffic control index
199 * @tc_verd: traffic control verdict 210 * @tc_verd: traffic control verdict
200 * @tc_classid: traffic control classid
201 */ 211 */
202 212
203struct sk_buff { 213struct sk_buff {
@@ -205,12 +215,10 @@ struct sk_buff {
205 struct sk_buff *next; 215 struct sk_buff *next;
206 struct sk_buff *prev; 216 struct sk_buff *prev;
207 217
208 struct sk_buff_head *list;
209 struct sock *sk; 218 struct sock *sk;
210 struct timeval stamp; 219 struct skb_timeval tstamp;
211 struct net_device *dev; 220 struct net_device *dev;
212 struct net_device *input_dev; 221 struct net_device *input_dev;
213 struct net_device *real_dev;
214 222
215 union { 223 union {
216 struct tcphdr *th; 224 struct tcphdr *th;
@@ -252,33 +260,28 @@ struct sk_buff {
252 __u8 local_df:1, 260 __u8 local_df:1,
253 cloned:1, 261 cloned:1,
254 ip_summed:2, 262 ip_summed:2,
255 nohdr:1; 263 nohdr:1,
256 /* 3 bits spare */ 264 nfctinfo:3;
257 __u8 pkt_type; 265 __u8 pkt_type:3,
266 fclone:2;
258 __be16 protocol; 267 __be16 protocol;
259 268
260 void (*destructor)(struct sk_buff *skb); 269 void (*destructor)(struct sk_buff *skb);
261#ifdef CONFIG_NETFILTER 270#ifdef CONFIG_NETFILTER
262 unsigned long nfmark; 271 __u32 nfmark;
263 __u32 nfcache;
264 __u32 nfctinfo;
265 struct nf_conntrack *nfct; 272 struct nf_conntrack *nfct;
273#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
274 __u8 ipvs_property:1;
275#endif
266#ifdef CONFIG_BRIDGE_NETFILTER 276#ifdef CONFIG_BRIDGE_NETFILTER
267 struct nf_bridge_info *nf_bridge; 277 struct nf_bridge_info *nf_bridge;
268#endif 278#endif
269#endif /* CONFIG_NETFILTER */ 279#endif /* CONFIG_NETFILTER */
270#if defined(CONFIG_HIPPI)
271 union {
272 __u32 ifield;
273 } private;
274#endif
275#ifdef CONFIG_NET_SCHED 280#ifdef CONFIG_NET_SCHED
276 __u32 tc_index; /* traffic control index */ 281 __u16 tc_index; /* traffic control index */
277#ifdef CONFIG_NET_CLS_ACT 282#ifdef CONFIG_NET_CLS_ACT
278 __u32 tc_verd; /* traffic control verdict */ 283 __u16 tc_verd; /* traffic control verdict */
279 __u32 tc_classid; /* traffic control classid */
280#endif 284#endif
281
282#endif 285#endif
283 286
284 287
@@ -300,8 +303,20 @@ struct sk_buff {
300#include <asm/system.h> 303#include <asm/system.h>
301 304
302extern void __kfree_skb(struct sk_buff *skb); 305extern void __kfree_skb(struct sk_buff *skb);
303extern struct sk_buff *alloc_skb(unsigned int size, 306extern struct sk_buff *__alloc_skb(unsigned int size,
304 unsigned int __nocast priority); 307 unsigned int __nocast priority, int fclone);
308static inline struct sk_buff *alloc_skb(unsigned int size,
309 unsigned int __nocast priority)
310{
311 return __alloc_skb(size, priority, 0);
312}
313
314static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
315 unsigned int __nocast priority)
316{
317 return __alloc_skb(size, priority, 1);
318}
319
305extern struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp, 320extern struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp,
306 unsigned int size, 321 unsigned int size,
307 unsigned int __nocast priority); 322 unsigned int __nocast priority);
@@ -597,7 +612,6 @@ static inline void __skb_queue_head(struct sk_buff_head *list,
597{ 612{
598 struct sk_buff *prev, *next; 613 struct sk_buff *prev, *next;
599 614
600 newsk->list = list;
601 list->qlen++; 615 list->qlen++;
602 prev = (struct sk_buff *)list; 616 prev = (struct sk_buff *)list;
603 next = prev->next; 617 next = prev->next;
@@ -622,7 +636,6 @@ static inline void __skb_queue_tail(struct sk_buff_head *list,
622{ 636{
623 struct sk_buff *prev, *next; 637 struct sk_buff *prev, *next;
624 638
625 newsk->list = list;
626 list->qlen++; 639 list->qlen++;
627 next = (struct sk_buff *)list; 640 next = (struct sk_buff *)list;
628 prev = next->prev; 641 prev = next->prev;
@@ -655,7 +668,6 @@ static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list)
655 next->prev = prev; 668 next->prev = prev;
656 prev->next = next; 669 prev->next = next;
657 result->next = result->prev = NULL; 670 result->next = result->prev = NULL;
658 result->list = NULL;
659 } 671 }
660 return result; 672 return result;
661} 673}
@@ -664,7 +676,7 @@ static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list)
664/* 676/*
665 * Insert a packet on a list. 677 * Insert a packet on a list.
666 */ 678 */
667extern void skb_insert(struct sk_buff *old, struct sk_buff *newsk); 679extern void skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list);
668static inline void __skb_insert(struct sk_buff *newsk, 680static inline void __skb_insert(struct sk_buff *newsk,
669 struct sk_buff *prev, struct sk_buff *next, 681 struct sk_buff *prev, struct sk_buff *next,
670 struct sk_buff_head *list) 682 struct sk_buff_head *list)
@@ -672,24 +684,23 @@ static inline void __skb_insert(struct sk_buff *newsk,
672 newsk->next = next; 684 newsk->next = next;
673 newsk->prev = prev; 685 newsk->prev = prev;
674 next->prev = prev->next = newsk; 686 next->prev = prev->next = newsk;
675 newsk->list = list;
676 list->qlen++; 687 list->qlen++;
677} 688}
678 689
679/* 690/*
680 * Place a packet after a given packet in a list. 691 * Place a packet after a given packet in a list.
681 */ 692 */
682extern void skb_append(struct sk_buff *old, struct sk_buff *newsk); 693extern void skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list);
683static inline void __skb_append(struct sk_buff *old, struct sk_buff *newsk) 694static inline void __skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list)
684{ 695{
685 __skb_insert(newsk, old, old->next, old->list); 696 __skb_insert(newsk, old, old->next, list);
686} 697}
687 698
688/* 699/*
689 * remove sk_buff from list. _Must_ be called atomically, and with 700 * remove sk_buff from list. _Must_ be called atomically, and with
690 * the list known.. 701 * the list known..
691 */ 702 */
692extern void skb_unlink(struct sk_buff *skb); 703extern void skb_unlink(struct sk_buff *skb, struct sk_buff_head *list);
693static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) 704static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list)
694{ 705{
695 struct sk_buff *next, *prev; 706 struct sk_buff *next, *prev;
@@ -698,7 +709,6 @@ static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list)
698 next = skb->next; 709 next = skb->next;
699 prev = skb->prev; 710 prev = skb->prev;
700 skb->next = skb->prev = NULL; 711 skb->next = skb->prev = NULL;
701 skb->list = NULL;
702 next->prev = prev; 712 next->prev = prev;
703 prev->next = next; 713 prev->next = next;
704} 714}
@@ -1213,6 +1223,8 @@ extern void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to);
1213extern void skb_split(struct sk_buff *skb, 1223extern void skb_split(struct sk_buff *skb,
1214 struct sk_buff *skb1, const u32 len); 1224 struct sk_buff *skb1, const u32 len);
1215 1225
1226extern void skb_release_data(struct sk_buff *skb);
1227
1216static inline void *skb_header_pointer(const struct sk_buff *skb, int offset, 1228static inline void *skb_header_pointer(const struct sk_buff *skb, int offset,
1217 int len, void *buffer) 1229 int len, void *buffer)
1218{ 1230{
@@ -1230,6 +1242,42 @@ static inline void *skb_header_pointer(const struct sk_buff *skb, int offset,
1230extern void skb_init(void); 1242extern void skb_init(void);
1231extern void skb_add_mtu(int mtu); 1243extern void skb_add_mtu(int mtu);
1232 1244
1245/**
1246 * skb_get_timestamp - get timestamp from a skb
1247 * @skb: skb to get stamp from
1248 * @stamp: pointer to struct timeval to store stamp in
1249 *
1250 * Timestamps are stored in the skb as offsets to a base timestamp.
1251 * This function converts the offset back to a struct timeval and stores
1252 * it in stamp.
1253 */
1254static inline void skb_get_timestamp(struct sk_buff *skb, struct timeval *stamp)
1255{
1256 stamp->tv_sec = skb->tstamp.off_sec;
1257 stamp->tv_usec = skb->tstamp.off_usec;
1258 if (skb->tstamp.off_sec) {
1259 stamp->tv_sec += skb_tv_base.tv_sec;
1260 stamp->tv_usec += skb_tv_base.tv_usec;
1261 }
1262}
1263
1264/**
1265 * skb_set_timestamp - set timestamp of a skb
1266 * @skb: skb to set stamp of
1267 * @stamp: pointer to struct timeval to get stamp from
1268 *
1269 * Timestamps are stored in the skb as offsets to a base timestamp.
1270 * This function converts a struct timeval to an offset and stores
1271 * it in the skb.
1272 */
1273static inline void skb_set_timestamp(struct sk_buff *skb, struct timeval *stamp)
1274{
1275 skb->tstamp.off_sec = stamp->tv_sec - skb_tv_base.tv_sec;
1276 skb->tstamp.off_usec = stamp->tv_usec - skb_tv_base.tv_usec;
1277}
1278
1279extern void __net_timestamp(struct sk_buff *skb);
1280
1233#ifdef CONFIG_NETFILTER 1281#ifdef CONFIG_NETFILTER
1234static inline void nf_conntrack_put(struct nf_conntrack *nfct) 1282static inline void nf_conntrack_put(struct nf_conntrack *nfct)
1235{ 1283{
diff --git a/include/linux/socket.h b/include/linux/socket.h
index a5c7d96e4d2e..1739c2d5b95b 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -26,6 +26,13 @@ struct __kernel_sockaddr_storage {
26#include <linux/types.h> /* pid_t */ 26#include <linux/types.h> /* pid_t */
27#include <linux/compiler.h> /* __user */ 27#include <linux/compiler.h> /* __user */
28 28
29extern int sysctl_somaxconn;
30extern void sock_init(void);
31#ifdef CONFIG_PROC_FS
32struct seq_file;
33extern void socket_seq_show(struct seq_file *seq);
34#endif
35
29typedef unsigned short sa_family_t; 36typedef unsigned short sa_family_t;
30 37
31/* 38/*
@@ -271,6 +278,8 @@ struct ucred {
271#define SOL_IRDA 266 278#define SOL_IRDA 266
272#define SOL_NETBEUI 267 279#define SOL_NETBEUI 267
273#define SOL_LLC 268 280#define SOL_LLC 268
281#define SOL_DCCP 269
282#define SOL_NETLINK 270
274 283
275/* IPX options */ 284/* IPX options */
276#define IPX_TYPE 1 285#define IPX_TYPE 1
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index e4fd82e42104..ac4ca44c75ca 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -55,24 +55,6 @@ struct tcphdr {
55 __u16 urg_ptr; 55 __u16 urg_ptr;
56}; 56};
57 57
58
59enum {
60 TCP_ESTABLISHED = 1,
61 TCP_SYN_SENT,
62 TCP_SYN_RECV,
63 TCP_FIN_WAIT1,
64 TCP_FIN_WAIT2,
65 TCP_TIME_WAIT,
66 TCP_CLOSE,
67 TCP_CLOSE_WAIT,
68 TCP_LAST_ACK,
69 TCP_LISTEN,
70 TCP_CLOSING, /* now a valid state */
71
72 TCP_MAX_STATES /* Leave at the end! */
73};
74
75#define TCP_STATE_MASK 0xF
76#define TCP_ACTION_FIN (1 << 7) 58#define TCP_ACTION_FIN (1 << 7)
77 59
78enum { 60enum {
@@ -195,8 +177,9 @@ struct tcp_info
195 177
196#include <linux/config.h> 178#include <linux/config.h>
197#include <linux/skbuff.h> 179#include <linux/skbuff.h>
198#include <linux/ip.h>
199#include <net/sock.h> 180#include <net/sock.h>
181#include <net/inet_connection_sock.h>
182#include <net/inet_timewait_sock.h>
200 183
201/* This defines a selective acknowledgement block. */ 184/* This defines a selective acknowledgement block. */
202struct tcp_sack_block { 185struct tcp_sack_block {
@@ -236,8 +219,8 @@ static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req)
236} 219}
237 220
238struct tcp_sock { 221struct tcp_sock {
239 /* inet_sock has to be the first member of tcp_sock */ 222 /* inet_connection_sock has to be the first member of tcp_sock */
240 struct inet_sock inet; 223 struct inet_connection_sock inet_conn;
241 int tcp_header_len; /* Bytes of tcp header to send */ 224 int tcp_header_len; /* Bytes of tcp header to send */
242 225
243/* 226/*
@@ -258,19 +241,6 @@ struct tcp_sock {
258 __u32 snd_sml; /* Last byte of the most recently transmitted small packet */ 241 __u32 snd_sml; /* Last byte of the most recently transmitted small packet */
259 __u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */ 242 __u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */
260 __u32 lsndtime; /* timestamp of last sent data packet (for restart window) */ 243 __u32 lsndtime; /* timestamp of last sent data packet (for restart window) */
261 struct tcp_bind_bucket *bind_hash;
262 /* Delayed ACK control data */
263 struct {
264 __u8 pending; /* ACK is pending */
265 __u8 quick; /* Scheduled number of quick acks */
266 __u8 pingpong; /* The session is interactive */
267 __u8 blocked; /* Delayed ACK was blocked by socket lock*/
268 __u32 ato; /* Predicted tick of soft clock */
269 unsigned long timeout; /* Currently scheduled timeout */
270 __u32 lrcvtime; /* timestamp of last received data packet*/
271 __u16 last_seg_size; /* Size of last incoming segment */
272 __u16 rcv_mss; /* MSS used for delayed ACK decisions */
273 } ack;
274 244
275 /* Data for direct copy to user */ 245 /* Data for direct copy to user */
276 struct { 246 struct {
@@ -288,19 +258,15 @@ struct tcp_sock {
288 __u32 mss_cache; /* Cached effective mss, not including SACKS */ 258 __u32 mss_cache; /* Cached effective mss, not including SACKS */
289 __u16 xmit_size_goal; /* Goal for segmenting output packets */ 259 __u16 xmit_size_goal; /* Goal for segmenting output packets */
290 __u16 ext_header_len; /* Network protocol overhead (IP/IPv6 options) */ 260 __u16 ext_header_len; /* Network protocol overhead (IP/IPv6 options) */
291 __u8 ca_state; /* State of fast-retransmit machine */
292 __u8 retransmits; /* Number of unrecovered RTO timeouts. */
293 261
294 __u16 advmss; /* Advertised MSS */
295 __u32 window_clamp; /* Maximal window to advertise */ 262 __u32 window_clamp; /* Maximal window to advertise */
296 __u32 rcv_ssthresh; /* Current window clamp */ 263 __u32 rcv_ssthresh; /* Current window clamp */
297 264
298 __u32 frto_highmark; /* snd_nxt when RTO occurred */ 265 __u32 frto_highmark; /* snd_nxt when RTO occurred */
299 __u8 reordering; /* Packet reordering metric. */ 266 __u8 reordering; /* Packet reordering metric. */
300 __u8 frto_counter; /* Number of new acks after RTO */ 267 __u8 frto_counter; /* Number of new acks after RTO */
301 268 __u8 nonagle; /* Disable Nagle algorithm? */
302 __u8 unused; 269 __u8 keepalive_probes; /* num of allowed keep alive probes */
303 __u8 defer_accept; /* User waits for some data after accept() */
304 270
305/* RTT measurement */ 271/* RTT measurement */
306 __u32 srtt; /* smoothed round trip time << 3 */ 272 __u32 srtt; /* smoothed round trip time << 3 */
@@ -308,19 +274,13 @@ struct tcp_sock {
308 __u32 mdev_max; /* maximal mdev for the last rtt period */ 274 __u32 mdev_max; /* maximal mdev for the last rtt period */
309 __u32 rttvar; /* smoothed mdev_max */ 275 __u32 rttvar; /* smoothed mdev_max */
310 __u32 rtt_seq; /* sequence number to update rttvar */ 276 __u32 rtt_seq; /* sequence number to update rttvar */
311 __u32 rto; /* retransmit timeout */
312 277
313 __u32 packets_out; /* Packets which are "in flight" */ 278 __u32 packets_out; /* Packets which are "in flight" */
314 __u32 left_out; /* Packets which leaved network */ 279 __u32 left_out; /* Packets which leaved network */
315 __u32 retrans_out; /* Retransmitted packets out */ 280 __u32 retrans_out; /* Retransmitted packets out */
316 __u8 backoff; /* backoff */
317/* 281/*
318 * Options received (usually on last packet, some only on SYN packets). 282 * Options received (usually on last packet, some only on SYN packets).
319 */ 283 */
320 __u8 nonagle; /* Disable Nagle algorithm? */
321 __u8 keepalive_probes; /* num of allowed keep alive probes */
322
323 __u8 probes_out; /* unanswered 0 window probes */
324 struct tcp_options_received rx_opt; 284 struct tcp_options_received rx_opt;
325 285
326/* 286/*
@@ -333,11 +293,6 @@ struct tcp_sock {
333 __u32 snd_cwnd_used; 293 __u32 snd_cwnd_used;
334 __u32 snd_cwnd_stamp; 294 __u32 snd_cwnd_stamp;
335 295
336 /* Two commonly used timers in both sender and receiver paths. */
337 unsigned long timeout;
338 struct timer_list retransmit_timer; /* Resend (no ack) */
339 struct timer_list delack_timer; /* Ack delay */
340
341 struct sk_buff_head out_of_order_queue; /* Out of order segments go here */ 296 struct sk_buff_head out_of_order_queue; /* Out of order segments go here */
342 297
343 struct tcp_func *af_specific; /* Operations which are AF_INET{4,6} specific */ 298 struct tcp_func *af_specific; /* Operations which are AF_INET{4,6} specific */
@@ -352,8 +307,7 @@ struct tcp_sock {
352 struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */ 307 struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */
353 struct tcp_sack_block selective_acks[4]; /* The SACKS themselves*/ 308 struct tcp_sack_block selective_acks[4]; /* The SACKS themselves*/
354 309
355 __u8 syn_retries; /* num of allowed syn retries */ 310 __u16 advmss; /* Advertised MSS */
356 __u8 ecn_flags; /* ECN status bits. */
357 __u16 prior_ssthresh; /* ssthresh saved at recovery start */ 311 __u16 prior_ssthresh; /* ssthresh saved at recovery start */
358 __u32 lost_out; /* Lost packets */ 312 __u32 lost_out; /* Lost packets */
359 __u32 sacked_out; /* SACK'd packets */ 313 __u32 sacked_out; /* SACK'd packets */
@@ -367,14 +321,12 @@ struct tcp_sock {
367 int undo_retrans; /* number of undoable retransmissions. */ 321 int undo_retrans; /* number of undoable retransmissions. */
368 __u32 urg_seq; /* Seq of received urgent pointer */ 322 __u32 urg_seq; /* Seq of received urgent pointer */
369 __u16 urg_data; /* Saved octet of OOB data and control flags */ 323 __u16 urg_data; /* Saved octet of OOB data and control flags */
370 __u8 pending; /* Scheduled timer event */
371 __u8 urg_mode; /* In urgent mode */ 324 __u8 urg_mode; /* In urgent mode */
325 __u8 ecn_flags; /* ECN status bits. */
372 __u32 snd_up; /* Urgent pointer */ 326 __u32 snd_up; /* Urgent pointer */
373 327
374 __u32 total_retrans; /* Total retransmits for entire connection */ 328 __u32 total_retrans; /* Total retransmits for entire connection */
375 329
376 struct request_sock_queue accept_queue; /* FIFO of established children */
377
378 unsigned int keepalive_time; /* time before keep alive takes place */ 330 unsigned int keepalive_time; /* time before keep alive takes place */
379 unsigned int keepalive_intvl; /* time interval between keep alive probes */ 331 unsigned int keepalive_intvl; /* time interval between keep alive probes */
380 int linger2; 332 int linger2;
@@ -394,11 +346,6 @@ struct tcp_sock {
394 __u32 seq; 346 __u32 seq;
395 __u32 time; 347 __u32 time;
396 } rcvq_space; 348 } rcvq_space;
397
398 /* Pluggable TCP congestion control hook */
399 struct tcp_congestion_ops *ca_ops;
400 u32 ca_priv[16];
401#define TCP_CA_PRIV_SIZE (16*sizeof(u32))
402}; 349};
403 350
404static inline struct tcp_sock *tcp_sk(const struct sock *sk) 351static inline struct tcp_sock *tcp_sk(const struct sock *sk)
@@ -406,9 +353,18 @@ static inline struct tcp_sock *tcp_sk(const struct sock *sk)
406 return (struct tcp_sock *)sk; 353 return (struct tcp_sock *)sk;
407} 354}
408 355
409static inline void *tcp_ca(const struct tcp_sock *tp) 356struct tcp_timewait_sock {
357 struct inet_timewait_sock tw_sk;
358 __u32 tw_rcv_nxt;
359 __u32 tw_snd_nxt;
360 __u32 tw_rcv_wnd;
361 __u32 tw_ts_recent;
362 long tw_ts_recent_stamp;
363};
364
365static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk)
410{ 366{
411 return (void *) tp->ca_priv; 367 return (struct tcp_timewait_sock *)sk;
412} 368}
413 369
414#endif 370#endif
diff --git a/include/linux/tcp_diag.h b/include/linux/tcp_diag.h
deleted file mode 100644
index 7a5996743946..000000000000
--- a/include/linux/tcp_diag.h
+++ /dev/null
@@ -1,127 +0,0 @@
1#ifndef _TCP_DIAG_H_
2#define _TCP_DIAG_H_ 1
3
4/* Just some random number */
5#define TCPDIAG_GETSOCK 18
6
7/* Socket identity */
8struct tcpdiag_sockid
9{
10 __u16 tcpdiag_sport;
11 __u16 tcpdiag_dport;
12 __u32 tcpdiag_src[4];
13 __u32 tcpdiag_dst[4];
14 __u32 tcpdiag_if;
15 __u32 tcpdiag_cookie[2];
16#define TCPDIAG_NOCOOKIE (~0U)
17};
18
19/* Request structure */
20
21struct tcpdiagreq
22{
23 __u8 tcpdiag_family; /* Family of addresses. */
24 __u8 tcpdiag_src_len;
25 __u8 tcpdiag_dst_len;
26 __u8 tcpdiag_ext; /* Query extended information */
27
28 struct tcpdiag_sockid id;
29
30 __u32 tcpdiag_states; /* States to dump */
31 __u32 tcpdiag_dbs; /* Tables to dump (NI) */
32};
33
34enum
35{
36 TCPDIAG_REQ_NONE,
37 TCPDIAG_REQ_BYTECODE,
38};
39
40#define TCPDIAG_REQ_MAX TCPDIAG_REQ_BYTECODE
41
42/* Bytecode is sequence of 4 byte commands followed by variable arguments.
43 * All the commands identified by "code" are conditional jumps forward:
44 * to offset cc+"yes" or to offset cc+"no". "yes" is supposed to be
45 * length of the command and its arguments.
46 */
47
48struct tcpdiag_bc_op
49{
50 unsigned char code;
51 unsigned char yes;
52 unsigned short no;
53};
54
55enum
56{
57 TCPDIAG_BC_NOP,
58 TCPDIAG_BC_JMP,
59 TCPDIAG_BC_S_GE,
60 TCPDIAG_BC_S_LE,
61 TCPDIAG_BC_D_GE,
62 TCPDIAG_BC_D_LE,
63 TCPDIAG_BC_AUTO,
64 TCPDIAG_BC_S_COND,
65 TCPDIAG_BC_D_COND,
66};
67
68struct tcpdiag_hostcond
69{
70 __u8 family;
71 __u8 prefix_len;
72 int port;
73 __u32 addr[0];
74};
75
76/* Base info structure. It contains socket identity (addrs/ports/cookie)
77 * and, alas, the information shown by netstat. */
78struct tcpdiagmsg
79{
80 __u8 tcpdiag_family;
81 __u8 tcpdiag_state;
82 __u8 tcpdiag_timer;
83 __u8 tcpdiag_retrans;
84
85 struct tcpdiag_sockid id;
86
87 __u32 tcpdiag_expires;
88 __u32 tcpdiag_rqueue;
89 __u32 tcpdiag_wqueue;
90 __u32 tcpdiag_uid;
91 __u32 tcpdiag_inode;
92};
93
94/* Extensions */
95
96enum
97{
98 TCPDIAG_NONE,
99 TCPDIAG_MEMINFO,
100 TCPDIAG_INFO,
101 TCPDIAG_VEGASINFO,
102 TCPDIAG_CONG,
103};
104
105#define TCPDIAG_MAX TCPDIAG_CONG
106
107
108/* TCPDIAG_MEM */
109
110struct tcpdiag_meminfo
111{
112 __u32 tcpdiag_rmem;
113 __u32 tcpdiag_wmem;
114 __u32 tcpdiag_fmem;
115 __u32 tcpdiag_tmem;
116};
117
118/* TCPDIAG_VEGASINFO */
119
120struct tcpvegas_info {
121 __u32 tcpv_enabled;
122 __u32 tcpv_rttcnt;
123 __u32 tcpv_rtt;
124 __u32 tcpv_minrtt;
125};
126
127#endif /* _TCP_DIAG_H_ */
diff --git a/include/linux/types.h b/include/linux/types.h
index dcb13f865df9..2b678c22ca4a 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -123,6 +123,9 @@ typedef __u64 u_int64_t;
123typedef __s64 int64_t; 123typedef __s64 int64_t;
124#endif 124#endif
125 125
126/* this is a special 64bit data type that is 8-byte aligned */
127#define aligned_u64 unsigned long long __attribute__((aligned(8)))
128
126/* 129/*
127 * The type used for indexing onto a disc or disc partition. 130 * The type used for indexing onto a disc or disc partition.
128 * If required, asm/types.h can override it and define 131 * If required, asm/types.h can override it and define
diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h
index f0d423300d84..0fb077d68441 100644
--- a/include/linux/xfrm.h
+++ b/include/linux/xfrm.h
@@ -258,9 +258,27 @@ struct xfrm_usersa_flush {
258 __u8 proto; 258 __u8 proto;
259}; 259};
260 260
261#ifndef __KERNEL__
262/* backwards compatibility for userspace */
261#define XFRMGRP_ACQUIRE 1 263#define XFRMGRP_ACQUIRE 1
262#define XFRMGRP_EXPIRE 2 264#define XFRMGRP_EXPIRE 2
263#define XFRMGRP_SA 4 265#define XFRMGRP_SA 4
264#define XFRMGRP_POLICY 8 266#define XFRMGRP_POLICY 8
267#endif
268
269enum xfrm_nlgroups {
270 XFRMNLGRP_NONE,
271#define XFRMNLGRP_NONE XFRMNLGRP_NONE
272 XFRMNLGRP_ACQUIRE,
273#define XFRMNLGRP_ACQUIRE XFRMNLGRP_ACQUIRE
274 XFRMNLGRP_EXPIRE,
275#define XFRMNLGRP_EXPIRE XFRMNLGRP_EXPIRE
276 XFRMNLGRP_SA,
277#define XFRMNLGRP_SA XFRMNLGRP_SA
278 XFRMNLGRP_POLICY,
279#define XFRMNLGRP_POLICY XFRMNLGRP_POLICY
280 __XFRMNLGRP_MAX
281};
282#define XFRMNLGRP_MAX (__XFRMNLGRP_MAX - 1)
265 283
266#endif /* _LINUX_XFRM_H */ 284#endif /* _LINUX_XFRM_H */
diff --git a/include/net/act_api.h b/include/net/act_api.h
index ed00a995f576..b55eb7c7f033 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -63,7 +63,7 @@ struct tc_action_ops
63 __u32 type; /* TBD to match kind */ 63 __u32 type; /* TBD to match kind */
64 __u32 capab; /* capabilities includes 4 bit version */ 64 __u32 capab; /* capabilities includes 4 bit version */
65 struct module *owner; 65 struct module *owner;
66 int (*act)(struct sk_buff **, struct tc_action *); 66 int (*act)(struct sk_buff **, struct tc_action *, struct tcf_result *);
67 int (*get_stats)(struct sk_buff *, struct tc_action *); 67 int (*get_stats)(struct sk_buff *, struct tc_action *);
68 int (*dump)(struct sk_buff *, struct tc_action *,int , int); 68 int (*dump)(struct sk_buff *, struct tc_action *,int , int);
69 int (*cleanup)(struct tc_action *, int bind); 69 int (*cleanup)(struct tc_action *, int bind);
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index a0ed93672176..750e2508dd90 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -45,6 +45,7 @@ struct prefix_info {
45 45
46#ifdef __KERNEL__ 46#ifdef __KERNEL__
47 47
48#include <linux/config.h>
48#include <linux/netdevice.h> 49#include <linux/netdevice.h>
49#include <net/if_inet6.h> 50#include <net/if_inet6.h>
50#include <net/ipv6.h> 51#include <net/ipv6.h>
@@ -238,5 +239,10 @@ static inline int ipv6_addr_is_ll_all_routers(const struct in6_addr *addr)
238 addr->s6_addr32[3] == htonl(0x00000002)); 239 addr->s6_addr32[3] == htonl(0x00000002));
239} 240}
240 241
242#ifdef CONFIG_PROC_FS
243extern int if6_proc_init(void);
244extern void if6_proc_exit(void);
245#endif
246
241#endif 247#endif
242#endif 248#endif
diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index b60b3846b9d1..b5d785ab4a0e 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -1,5 +1,11 @@
1#ifndef __LINUX_NET_AFUNIX_H 1#ifndef __LINUX_NET_AFUNIX_H
2#define __LINUX_NET_AFUNIX_H 2#define __LINUX_NET_AFUNIX_H
3
4#include <linux/config.h>
5#include <linux/socket.h>
6#include <linux/un.h>
7#include <net/sock.h>
8
3extern void unix_inflight(struct file *fp); 9extern void unix_inflight(struct file *fp);
4extern void unix_notinflight(struct file *fp); 10extern void unix_notinflight(struct file *fp);
5extern void unix_gc(void); 11extern void unix_gc(void);
@@ -74,5 +80,14 @@ struct unix_sock {
74 wait_queue_head_t peer_wait; 80 wait_queue_head_t peer_wait;
75}; 81};
76#define unix_sk(__sk) ((struct unix_sock *)__sk) 82#define unix_sk(__sk) ((struct unix_sock *)__sk)
83
84#ifdef CONFIG_SYSCTL
85extern int sysctl_unix_max_dgram_qlen;
86extern void unix_sysctl_register(void);
87extern void unix_sysctl_unregister(void);
88#else
89static inline void unix_sysctl_register(void) {}
90static inline void unix_sysctl_unregister(void) {}
91#endif
77#endif 92#endif
78#endif 93#endif
diff --git a/include/net/arp.h b/include/net/arp.h
index a1f09fad6a52..a13e30c35f42 100644
--- a/include/net/arp.h
+++ b/include/net/arp.h
@@ -11,7 +11,7 @@ extern struct neigh_table arp_tbl;
11 11
12extern void arp_init(void); 12extern void arp_init(void);
13extern int arp_rcv(struct sk_buff *skb, struct net_device *dev, 13extern int arp_rcv(struct sk_buff *skb, struct net_device *dev,
14 struct packet_type *pt); 14 struct packet_type *pt, struct net_device *orig_dev);
15extern int arp_find(unsigned char *haddr, struct sk_buff *skb); 15extern int arp_find(unsigned char *haddr, struct sk_buff *skb);
16extern int arp_ioctl(unsigned int cmd, void __user *arg); 16extern int arp_ioctl(unsigned int cmd, void __user *arg);
17extern void arp_send(int type, int ptype, u32 dest_ip, 17extern void arp_send(int type, int ptype, u32 dest_ip,
diff --git a/include/net/ax25.h b/include/net/ax25.h
index 3696f988a9f1..926eed543023 100644
--- a/include/net/ax25.h
+++ b/include/net/ax25.h
@@ -316,7 +316,7 @@ extern int ax25_protocol_is_registered(unsigned int);
316 316
317/* ax25_in.c */ 317/* ax25_in.c */
318extern int ax25_rx_iframe(ax25_cb *, struct sk_buff *); 318extern int ax25_rx_iframe(ax25_cb *, struct sk_buff *);
319extern int ax25_kiss_rcv(struct sk_buff *, struct net_device *, struct packet_type *); 319extern int ax25_kiss_rcv(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *);
320 320
321/* ax25_ip.c */ 321/* ax25_ip.c */
322extern int ax25_encapsulate(struct sk_buff *, struct net_device *, unsigned short, void *, void *, unsigned int); 322extern int ax25_encapsulate(struct sk_buff *, struct net_device *, unsigned short, void *, void *, unsigned int);
diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index 06b24f637026..6dfa4a61ffd0 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -131,11 +131,12 @@ struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock);
131 131
132/* Skb helpers */ 132/* Skb helpers */
133struct bt_skb_cb { 133struct bt_skb_cb {
134 int incoming; 134 __u8 pkt_type;
135 __u8 incoming;
135}; 136};
136#define bt_cb(skb) ((struct bt_skb_cb *)(skb->cb)) 137#define bt_cb(skb) ((struct bt_skb_cb *)(skb->cb))
137 138
138static inline struct sk_buff *bt_skb_alloc(unsigned int len, int how) 139static inline struct sk_buff *bt_skb_alloc(unsigned int len, unsigned int __nocast how)
139{ 140{
140 struct sk_buff *skb; 141 struct sk_buff *skb;
141 142
diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 6f0706f4af68..371e7d3f2e6f 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -453,6 +453,15 @@ struct inquiry_info_with_rssi {
453 __u16 clock_offset; 453 __u16 clock_offset;
454 __s8 rssi; 454 __s8 rssi;
455} __attribute__ ((packed)); 455} __attribute__ ((packed));
456struct inquiry_info_with_rssi_and_pscan_mode {
457 bdaddr_t bdaddr;
458 __u8 pscan_rep_mode;
459 __u8 pscan_period_mode;
460 __u8 pscan_mode;
461 __u8 dev_class[3];
462 __u16 clock_offset;
463 __s8 rssi;
464} __attribute__ ((packed));
456 465
457#define HCI_EV_CONN_COMPLETE 0x03 466#define HCI_EV_CONN_COMPLETE 0x03
458struct hci_ev_conn_complete { 467struct hci_ev_conn_complete {
@@ -584,6 +593,12 @@ struct hci_ev_clock_offset {
584 __u16 clock_offset; 593 __u16 clock_offset;
585} __attribute__ ((packed)); 594} __attribute__ ((packed));
586 595
596#define HCI_EV_PSCAN_REP_MODE 0x20
597struct hci_ev_pscan_rep_mode {
598 bdaddr_t bdaddr;
599 __u8 pscan_rep_mode;
600} __attribute__ ((packed));
601
587/* Internal events generated by Bluetooth stack */ 602/* Internal events generated by Bluetooth stack */
588#define HCI_EV_STACK_INTERNAL 0xFD 603#define HCI_EV_STACK_INTERNAL 0xFD
589struct hci_ev_stack_internal { 604struct hci_ev_stack_internal {
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 6d63a47c731b..7f933f302078 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -404,7 +404,7 @@ static inline int hci_recv_frame(struct sk_buff *skb)
404 bt_cb(skb)->incoming = 1; 404 bt_cb(skb)->incoming = 1;
405 405
406 /* Time stamp */ 406 /* Time stamp */
407 do_gettimeofday(&skb->stamp); 407 __net_timestamp(skb);
408 408
409 /* Queue frame for rx task */ 409 /* Queue frame for rx task */
410 skb_queue_tail(&hdev->rx_q, skb); 410 skb_queue_tail(&hdev->rx_q, skb);
diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h
index 13669bad00b3..ffea9d54071f 100644
--- a/include/net/bluetooth/rfcomm.h
+++ b/include/net/bluetooth/rfcomm.h
@@ -80,9 +80,9 @@
80#define RFCOMM_RPN_STOP_15 1 80#define RFCOMM_RPN_STOP_15 1
81 81
82#define RFCOMM_RPN_PARITY_NONE 0x0 82#define RFCOMM_RPN_PARITY_NONE 0x0
83#define RFCOMM_RPN_PARITY_ODD 0x4 83#define RFCOMM_RPN_PARITY_ODD 0x1
84#define RFCOMM_RPN_PARITY_EVEN 0x5 84#define RFCOMM_RPN_PARITY_EVEN 0x3
85#define RFCOMM_RPN_PARITY_MARK 0x6 85#define RFCOMM_RPN_PARITY_MARK 0x5
86#define RFCOMM_RPN_PARITY_SPACE 0x7 86#define RFCOMM_RPN_PARITY_SPACE 0x7
87 87
88#define RFCOMM_RPN_FLOW_NONE 0x00 88#define RFCOMM_RPN_FLOW_NONE 0x00
@@ -223,8 +223,14 @@ struct rfcomm_dlc {
223#define RFCOMM_CFC_DISABLED 0 223#define RFCOMM_CFC_DISABLED 0
224#define RFCOMM_CFC_ENABLED RFCOMM_MAX_CREDITS 224#define RFCOMM_CFC_ENABLED RFCOMM_MAX_CREDITS
225 225
226/* ---- RFCOMM SEND RPN ---- */
227int rfcomm_send_rpn(struct rfcomm_session *s, int cr, u8 dlci,
228 u8 bit_rate, u8 data_bits, u8 stop_bits,
229 u8 parity, u8 flow_ctrl_settings,
230 u8 xon_char, u8 xoff_char, u16 param_mask);
231
226/* ---- RFCOMM DLCs (channels) ---- */ 232/* ---- RFCOMM DLCs (channels) ---- */
227struct rfcomm_dlc *rfcomm_dlc_alloc(int prio); 233struct rfcomm_dlc *rfcomm_dlc_alloc(unsigned int __nocast prio);
228void rfcomm_dlc_free(struct rfcomm_dlc *d); 234void rfcomm_dlc_free(struct rfcomm_dlc *d);
229int rfcomm_dlc_open(struct rfcomm_dlc *d, bdaddr_t *src, bdaddr_t *dst, u8 channel); 235int rfcomm_dlc_open(struct rfcomm_dlc *d, bdaddr_t *src, bdaddr_t *dst, u8 channel);
230int rfcomm_dlc_close(struct rfcomm_dlc *d, int reason); 236int rfcomm_dlc_close(struct rfcomm_dlc *d, int reason);
diff --git a/include/net/datalink.h b/include/net/datalink.h
index 5797ba3d2eb5..deb7ca75db48 100644
--- a/include/net/datalink.h
+++ b/include/net/datalink.h
@@ -9,7 +9,7 @@ struct datalink_proto {
9 unsigned short header_length; 9 unsigned short header_length;
10 10
11 int (*rcvfunc)(struct sk_buff *, struct net_device *, 11 int (*rcvfunc)(struct sk_buff *, struct net_device *,
12 struct packet_type *); 12 struct packet_type *, struct net_device *);
13 int (*request)(struct datalink_proto *, struct sk_buff *, 13 int (*request)(struct datalink_proto *, struct sk_buff *,
14 unsigned char *); 14 unsigned char *);
15 struct list_head node; 15 struct list_head node;
diff --git a/include/net/dn.h b/include/net/dn.h
index 5551c46db397..c1dbbd222793 100644
--- a/include/net/dn.h
+++ b/include/net/dn.h
@@ -3,6 +3,7 @@
3 3
4#include <linux/dn.h> 4#include <linux/dn.h>
5#include <net/sock.h> 5#include <net/sock.h>
6#include <net/tcp.h>
6#include <asm/byteorder.h> 7#include <asm/byteorder.h>
7 8
8typedef unsigned short dn_address; 9typedef unsigned short dn_address;
diff --git a/include/net/icmp.h b/include/net/icmp.h
index e5ef0d15fb45..6cdebeee5f96 100644
--- a/include/net/icmp.h
+++ b/include/net/icmp.h
@@ -57,4 +57,11 @@ static inline struct raw_sock *raw_sk(const struct sock *sk)
57 return (struct raw_sock *)sk; 57 return (struct raw_sock *)sk;
58} 58}
59 59
60extern int sysctl_icmp_echo_ignore_all;
61extern int sysctl_icmp_echo_ignore_broadcasts;
62extern int sysctl_icmp_ignore_bogus_error_responses;
63extern int sysctl_icmp_errors_use_inbound_ifaddr;
64extern int sysctl_icmp_ratelimit;
65extern int sysctl_icmp_ratemask;
66
60#endif /* _ICMP_H */ 67#endif /* _ICMP_H */
diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h
new file mode 100644
index 000000000000..03df3b157960
--- /dev/null
+++ b/include/net/inet6_hashtables.h
@@ -0,0 +1,130 @@
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Authors: Lotsa people, from code originally in tcp
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14#ifndef _INET6_HASHTABLES_H
15#define _INET6_HASHTABLES_H
16
17#include <linux/config.h>
18
19#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
20#include <linux/in6.h>
21#include <linux/ipv6.h>
22#include <linux/types.h>
23
24#include <net/ipv6.h>
25
26struct inet_hashinfo;
27
28/* I have no idea if this is a good hash for v6 or not. -DaveM */
29static inline int inet6_ehashfn(const struct in6_addr *laddr, const u16 lport,
30 const struct in6_addr *faddr, const u16 fport,
31 const int ehash_size)
32{
33 int hashent = (lport ^ fport);
34
35 hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]);
36 hashent ^= hashent >> 16;
37 hashent ^= hashent >> 8;
38 return (hashent & (ehash_size - 1));
39}
40
41static inline int inet6_sk_ehashfn(const struct sock *sk, const int ehash_size)
42{
43 const struct inet_sock *inet = inet_sk(sk);
44 const struct ipv6_pinfo *np = inet6_sk(sk);
45 const struct in6_addr *laddr = &np->rcv_saddr;
46 const struct in6_addr *faddr = &np->daddr;
47 const __u16 lport = inet->num;
48 const __u16 fport = inet->dport;
49 return inet6_ehashfn(laddr, lport, faddr, fport, ehash_size);
50}
51
52/*
53 * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
54 * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
55 *
56 * The sockhash lock must be held as a reader here.
57 */
58static inline struct sock *
59 __inet6_lookup_established(struct inet_hashinfo *hashinfo,
60 const struct in6_addr *saddr,
61 const u16 sport,
62 const struct in6_addr *daddr,
63 const u16 hnum,
64 const int dif)
65{
66 struct sock *sk;
67 const struct hlist_node *node;
68 const __u32 ports = INET_COMBINED_PORTS(sport, hnum);
69 /* Optimize here for direct hit, only listening connections can
70 * have wildcards anyways.
71 */
72 const int hash = inet6_ehashfn(daddr, hnum, saddr, sport,
73 hashinfo->ehash_size);
74 struct inet_ehash_bucket *head = &hashinfo->ehash[hash];
75
76 read_lock(&head->lock);
77 sk_for_each(sk, node, &head->chain) {
78 /* For IPV6 do the cheaper port and family tests first. */
79 if (INET6_MATCH(sk, saddr, daddr, ports, dif))
80 goto hit; /* You sunk my battleship! */
81 }
82 /* Must check for a TIME_WAIT'er before going to listener hash. */
83 sk_for_each(sk, node, &(head + hashinfo->ehash_size)->chain) {
84 const struct inet_timewait_sock *tw = inet_twsk(sk);
85
86 if(*((__u32 *)&(tw->tw_dport)) == ports &&
87 sk->sk_family == PF_INET6) {
88 const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk);
89
90 if (ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) &&
91 ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr) &&
92 (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif))
93 goto hit;
94 }
95 }
96 read_unlock(&head->lock);
97 return NULL;
98
99hit:
100 sock_hold(sk);
101 read_unlock(&head->lock);
102 return sk;
103}
104
105extern struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo,
106 const struct in6_addr *daddr,
107 const unsigned short hnum,
108 const int dif);
109
110static inline struct sock *__inet6_lookup(struct inet_hashinfo *hashinfo,
111 const struct in6_addr *saddr,
112 const u16 sport,
113 const struct in6_addr *daddr,
114 const u16 hnum,
115 const int dif)
116{
117 struct sock *sk = __inet6_lookup_established(hashinfo, saddr, sport,
118 daddr, hnum, dif);
119 if (sk)
120 return sk;
121
122 return inet6_lookup_listener(hashinfo, daddr, hnum, dif);
123}
124
125extern struct sock *inet6_lookup(struct inet_hashinfo *hashinfo,
126 const struct in6_addr *saddr, const u16 sport,
127 const struct in6_addr *daddr, const u16 dport,
128 const int dif);
129#endif /* defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) */
130#endif /* _INET6_HASHTABLES_H */
diff --git a/include/net/inet_common.h b/include/net/inet_common.h
index fbc1f4d140d8..f943306ce5ff 100644
--- a/include/net/inet_common.h
+++ b/include/net/inet_common.h
@@ -8,6 +8,11 @@ extern struct proto_ops inet_dgram_ops;
8 * INET4 prototypes used by INET6 8 * INET4 prototypes used by INET6
9 */ 9 */
10 10
11struct msghdr;
12struct sock;
13struct sockaddr;
14struct socket;
15
11extern void inet_remove_sock(struct sock *sk1); 16extern void inet_remove_sock(struct sock *sk1);
12extern void inet_put_sock(unsigned short num, 17extern void inet_put_sock(unsigned short num,
13 struct sock *sk); 18 struct sock *sk);
@@ -29,7 +34,6 @@ extern unsigned int inet_poll(struct file * file, struct socket *sock, struct p
29extern int inet_listen(struct socket *sock, int backlog); 34extern int inet_listen(struct socket *sock, int backlog);
30 35
31extern void inet_sock_destruct(struct sock *sk); 36extern void inet_sock_destruct(struct sock *sk);
32extern atomic_t inet_sock_nr;
33 37
34extern int inet_bind(struct socket *sock, 38extern int inet_bind(struct socket *sock,
35 struct sockaddr *uaddr, int addr_len); 39 struct sockaddr *uaddr, int addr_len);
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
new file mode 100644
index 000000000000..651f824c1008
--- /dev/null
+++ b/include/net/inet_connection_sock.h
@@ -0,0 +1,276 @@
1/*
2 * NET Generic infrastructure for INET connection oriented protocols.
3 *
4 * Definitions for inet_connection_sock
5 *
6 * Authors: Many people, see the TCP sources
7 *
8 * From code originally in TCP
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15#ifndef _INET_CONNECTION_SOCK_H
16#define _INET_CONNECTION_SOCK_H
17
18#include <linux/ip.h>
19#include <linux/string.h>
20#include <linux/timer.h>
21#include <net/request_sock.h>
22
23#define INET_CSK_DEBUG 1
24
25/* Cancel timers, when they are not required. */
26#undef INET_CSK_CLEAR_TIMERS
27
28struct inet_bind_bucket;
29struct inet_hashinfo;
30struct tcp_congestion_ops;
31
32/** inet_connection_sock - INET connection oriented sock
33 *
34 * @icsk_accept_queue: FIFO of established children
35 * @icsk_bind_hash: Bind node
36 * @icsk_timeout: Timeout
37 * @icsk_retransmit_timer: Resend (no ack)
38 * @icsk_rto: Retransmit timeout
39 * @icsk_ca_ops Pluggable congestion control hook
40 * @icsk_ca_state: Congestion control state
41 * @icsk_retransmits: Number of unrecovered [RTO] timeouts
42 * @icsk_pending: Scheduled timer event
43 * @icsk_backoff: Backoff
44 * @icsk_syn_retries: Number of allowed SYN (or equivalent) retries
45 * @icsk_probes_out: unanswered 0 window probes
46 * @icsk_ack: Delayed ACK control data
47 */
48struct inet_connection_sock {
49 /* inet_sock has to be the first member! */
50 struct inet_sock icsk_inet;
51 struct request_sock_queue icsk_accept_queue;
52 struct inet_bind_bucket *icsk_bind_hash;
53 unsigned long icsk_timeout;
54 struct timer_list icsk_retransmit_timer;
55 struct timer_list icsk_delack_timer;
56 __u32 icsk_rto;
57 struct tcp_congestion_ops *icsk_ca_ops;
58 __u8 icsk_ca_state;
59 __u8 icsk_retransmits;
60 __u8 icsk_pending;
61 __u8 icsk_backoff;
62 __u8 icsk_syn_retries;
63 __u8 icsk_probes_out;
64 /* 2 BYTES HOLE, TRY TO PACK! */
65 struct {
66 __u8 pending; /* ACK is pending */
67 __u8 quick; /* Scheduled number of quick acks */
68 __u8 pingpong; /* The session is interactive */
69 __u8 blocked; /* Delayed ACK was blocked by socket lock */
70 __u32 ato; /* Predicted tick of soft clock */
71 unsigned long timeout; /* Currently scheduled timeout */
72 __u32 lrcvtime; /* timestamp of last received data packet */
73 __u16 last_seg_size; /* Size of last incoming segment */
74 __u16 rcv_mss; /* MSS used for delayed ACK decisions */
75 } icsk_ack;
76 u32 icsk_ca_priv[16];
77#define ICSK_CA_PRIV_SIZE (16 * sizeof(u32))
78};
79
80#define ICSK_TIME_RETRANS 1 /* Retransmit timer */
81#define ICSK_TIME_DACK 2 /* Delayed ack timer */
82#define ICSK_TIME_PROBE0 3 /* Zero window probe timer */
83#define ICSK_TIME_KEEPOPEN 4 /* Keepalive timer */
84
85static inline struct inet_connection_sock *inet_csk(const struct sock *sk)
86{
87 return (struct inet_connection_sock *)sk;
88}
89
90static inline void *inet_csk_ca(const struct sock *sk)
91{
92 return (void *)inet_csk(sk)->icsk_ca_priv;
93}
94
95extern struct sock *inet_csk_clone(struct sock *sk,
96 const struct request_sock *req,
97 const unsigned int __nocast priority);
98
99enum inet_csk_ack_state_t {
100 ICSK_ACK_SCHED = 1,
101 ICSK_ACK_TIMER = 2,
102 ICSK_ACK_PUSHED = 4
103};
104
105extern void inet_csk_init_xmit_timers(struct sock *sk,
106 void (*retransmit_handler)(unsigned long),
107 void (*delack_handler)(unsigned long),
108 void (*keepalive_handler)(unsigned long));
109extern void inet_csk_clear_xmit_timers(struct sock *sk);
110
111static inline void inet_csk_schedule_ack(struct sock *sk)
112{
113 inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_SCHED;
114}
115
116static inline int inet_csk_ack_scheduled(const struct sock *sk)
117{
118 return inet_csk(sk)->icsk_ack.pending & ICSK_ACK_SCHED;
119}
120
121static inline void inet_csk_delack_init(struct sock *sk)
122{
123 memset(&inet_csk(sk)->icsk_ack, 0, sizeof(inet_csk(sk)->icsk_ack));
124}
125
126extern void inet_csk_delete_keepalive_timer(struct sock *sk);
127extern void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long timeout);
128
129#ifdef INET_CSK_DEBUG
130extern const char inet_csk_timer_bug_msg[];
131#endif
132
133static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what)
134{
135 struct inet_connection_sock *icsk = inet_csk(sk);
136
137 if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) {
138 icsk->icsk_pending = 0;
139#ifdef INET_CSK_CLEAR_TIMERS
140 sk_stop_timer(sk, &icsk->icsk_retransmit_timer);
141#endif
142 } else if (what == ICSK_TIME_DACK) {
143 icsk->icsk_ack.blocked = icsk->icsk_ack.pending = 0;
144#ifdef INET_CSK_CLEAR_TIMERS
145 sk_stop_timer(sk, &icsk->icsk_delack_timer);
146#endif
147 }
148#ifdef INET_CSK_DEBUG
149 else {
150 pr_debug("%s", inet_csk_timer_bug_msg);
151 }
152#endif
153}
154
155/*
156 * Reset the retransmission timer
157 */
158static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what,
159 unsigned long when,
160 const unsigned long max_when)
161{
162 struct inet_connection_sock *icsk = inet_csk(sk);
163
164 if (when > max_when) {
165#ifdef INET_CSK_DEBUG
166 pr_debug("reset_xmit_timer: sk=%p %d when=0x%lx, caller=%p\n",
167 sk, what, when, current_text_addr());
168#endif
169 when = max_when;
170 }
171
172 if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) {
173 icsk->icsk_pending = what;
174 icsk->icsk_timeout = jiffies + when;
175 sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout);
176 } else if (what == ICSK_TIME_DACK) {
177 icsk->icsk_ack.pending |= ICSK_ACK_TIMER;
178 icsk->icsk_ack.timeout = jiffies + when;
179 sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout);
180 }
181#ifdef INET_CSK_DEBUG
182 else {
183 pr_debug("%s", inet_csk_timer_bug_msg);
184 }
185#endif
186}
187
188extern struct sock *inet_csk_accept(struct sock *sk, int flags, int *err);
189
190extern struct request_sock *inet_csk_search_req(const struct sock *sk,
191 struct request_sock ***prevp,
192 const __u16 rport,
193 const __u32 raddr,
194 const __u32 laddr);
195extern int inet_csk_get_port(struct inet_hashinfo *hashinfo,
196 struct sock *sk, unsigned short snum);
197
198extern struct dst_entry* inet_csk_route_req(struct sock *sk,
199 const struct request_sock *req);
200
201static inline void inet_csk_reqsk_queue_add(struct sock *sk,
202 struct request_sock *req,
203 struct sock *child)
204{
205 reqsk_queue_add(&inet_csk(sk)->icsk_accept_queue, req, sk, child);
206}
207
208extern void inet_csk_reqsk_queue_hash_add(struct sock *sk,
209 struct request_sock *req,
210 const unsigned timeout);
211
212static inline void inet_csk_reqsk_queue_removed(struct sock *sk,
213 struct request_sock *req)
214{
215 if (reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req) == 0)
216 inet_csk_delete_keepalive_timer(sk);
217}
218
219static inline void inet_csk_reqsk_queue_added(struct sock *sk,
220 const unsigned long timeout)
221{
222 if (reqsk_queue_added(&inet_csk(sk)->icsk_accept_queue) == 0)
223 inet_csk_reset_keepalive_timer(sk, timeout);
224}
225
226static inline int inet_csk_reqsk_queue_len(const struct sock *sk)
227{
228 return reqsk_queue_len(&inet_csk(sk)->icsk_accept_queue);
229}
230
231static inline int inet_csk_reqsk_queue_young(const struct sock *sk)
232{
233 return reqsk_queue_len_young(&inet_csk(sk)->icsk_accept_queue);
234}
235
236static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk)
237{
238 return reqsk_queue_is_full(&inet_csk(sk)->icsk_accept_queue);
239}
240
241static inline void inet_csk_reqsk_queue_unlink(struct sock *sk,
242 struct request_sock *req,
243 struct request_sock **prev)
244{
245 reqsk_queue_unlink(&inet_csk(sk)->icsk_accept_queue, req, prev);
246}
247
248static inline void inet_csk_reqsk_queue_drop(struct sock *sk,
249 struct request_sock *req,
250 struct request_sock **prev)
251{
252 inet_csk_reqsk_queue_unlink(sk, req, prev);
253 inet_csk_reqsk_queue_removed(sk, req);
254 reqsk_free(req);
255}
256
257extern void inet_csk_reqsk_queue_prune(struct sock *parent,
258 const unsigned long interval,
259 const unsigned long timeout,
260 const unsigned long max_rto);
261
262extern void inet_csk_destroy_sock(struct sock *sk);
263
264/*
265 * LISTEN is a special case for poll..
266 */
267static inline unsigned int inet_csk_listen_poll(const struct sock *sk)
268{
269 return !reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue) ?
270 (POLLIN | POLLRDNORM) : 0;
271}
272
273extern int inet_csk_listen_start(struct sock *sk, const int nr_table_entries);
274extern void inet_csk_listen_stop(struct sock *sk);
275
276#endif /* _INET_CONNECTION_SOCK_H */
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
new file mode 100644
index 000000000000..646b6ea7fe26
--- /dev/null
+++ b/include/net/inet_hashtables.h
@@ -0,0 +1,427 @@
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Authors: Lotsa people, from code originally in tcp
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14#ifndef _INET_HASHTABLES_H
15#define _INET_HASHTABLES_H
16
17#include <linux/config.h>
18
19#include <linux/interrupt.h>
20#include <linux/ipv6.h>
21#include <linux/list.h>
22#include <linux/slab.h>
23#include <linux/socket.h>
24#include <linux/spinlock.h>
25#include <linux/types.h>
26#include <linux/wait.h>
27
28#include <net/inet_connection_sock.h>
29#include <net/route.h>
30#include <net/sock.h>
31#include <net/tcp_states.h>
32
33#include <asm/atomic.h>
34#include <asm/byteorder.h>
35
36/* This is for all connections with a full identity, no wildcards.
37 * New scheme, half the table is for TIME_WAIT, the other half is
38 * for the rest. I'll experiment with dynamic table growth later.
39 */
40struct inet_ehash_bucket {
41 rwlock_t lock;
42 struct hlist_head chain;
43} __attribute__((__aligned__(8)));
44
45/* There are a few simple rules, which allow for local port reuse by
46 * an application. In essence:
47 *
48 * 1) Sockets bound to different interfaces may share a local port.
49 * Failing that, goto test 2.
50 * 2) If all sockets have sk->sk_reuse set, and none of them are in
51 * TCP_LISTEN state, the port may be shared.
52 * Failing that, goto test 3.
53 * 3) If all sockets are bound to a specific inet_sk(sk)->rcv_saddr local
54 * address, and none of them are the same, the port may be
55 * shared.
56 * Failing this, the port cannot be shared.
57 *
58 * The interesting point, is test #2. This is what an FTP server does
59 * all day. To optimize this case we use a specific flag bit defined
60 * below. As we add sockets to a bind bucket list, we perform a
61 * check of: (newsk->sk_reuse && (newsk->sk_state != TCP_LISTEN))
62 * As long as all sockets added to a bind bucket pass this test,
63 * the flag bit will be set.
64 * The resulting situation is that tcp_v[46]_verify_bind() can just check
65 * for this flag bit, if it is set and the socket trying to bind has
66 * sk->sk_reuse set, we don't even have to walk the owners list at all,
67 * we return that it is ok to bind this socket to the requested local port.
68 *
69 * Sounds like a lot of work, but it is worth it. In a more naive
70 * implementation (ie. current FreeBSD etc.) the entire list of ports
71 * must be walked for each data port opened by an ftp server. Needless
72 * to say, this does not scale at all. With a couple thousand FTP
73 * users logged onto your box, isn't it nice to know that new data
74 * ports are created in O(1) time? I thought so. ;-) -DaveM
75 */
76struct inet_bind_bucket {
77 unsigned short port;
78 signed short fastreuse;
79 struct hlist_node node;
80 struct hlist_head owners;
81};
82
83#define inet_bind_bucket_for_each(tb, node, head) \
84 hlist_for_each_entry(tb, node, head, node)
85
86struct inet_bind_hashbucket {
87 spinlock_t lock;
88 struct hlist_head chain;
89};
90
91/* This is for listening sockets, thus all sockets which possess wildcards. */
92#define INET_LHTABLE_SIZE 32 /* Yes, really, this is all you need. */
93
94struct inet_hashinfo {
95 /* This is for sockets with full identity only. Sockets here will
96 * always be without wildcards and will have the following invariant:
97 *
98 * TCP_ESTABLISHED <= sk->sk_state < TCP_CLOSE
99 *
100 * First half of the table is for sockets not in TIME_WAIT, second half
101 * is for TIME_WAIT sockets only.
102 */
103 struct inet_ehash_bucket *ehash;
104
105 /* Ok, let's try this, I give up, we do need a local binding
106 * TCP hash as well as the others for fast bind/connect.
107 */
108 struct inet_bind_hashbucket *bhash;
109
110 int bhash_size;
111 int ehash_size;
112
113 /* All sockets in TCP_LISTEN state will be in here. This is the only
114 * table where wildcard'd TCP sockets can exist. Hash function here
115 * is just local port number.
116 */
117 struct hlist_head listening_hash[INET_LHTABLE_SIZE];
118
119 /* All the above members are written once at bootup and
120 * never written again _or_ are predominantly read-access.
121 *
122 * Now align to a new cache line as all the following members
123 * are often dirty.
124 */
125 rwlock_t lhash_lock ____cacheline_aligned;
126 atomic_t lhash_users;
127 wait_queue_head_t lhash_wait;
128 spinlock_t portalloc_lock;
129 kmem_cache_t *bind_bucket_cachep;
130 int port_rover;
131};
132
133static inline int inet_ehashfn(const __u32 laddr, const __u16 lport,
134 const __u32 faddr, const __u16 fport,
135 const int ehash_size)
136{
137 int h = (laddr ^ lport) ^ (faddr ^ fport);
138 h ^= h >> 16;
139 h ^= h >> 8;
140 return h & (ehash_size - 1);
141}
142
143static inline int inet_sk_ehashfn(const struct sock *sk, const int ehash_size)
144{
145 const struct inet_sock *inet = inet_sk(sk);
146 const __u32 laddr = inet->rcv_saddr;
147 const __u16 lport = inet->num;
148 const __u32 faddr = inet->daddr;
149 const __u16 fport = inet->dport;
150
151 return inet_ehashfn(laddr, lport, faddr, fport, ehash_size);
152}
153
154extern struct inet_bind_bucket *
155 inet_bind_bucket_create(kmem_cache_t *cachep,
156 struct inet_bind_hashbucket *head,
157 const unsigned short snum);
158extern void inet_bind_bucket_destroy(kmem_cache_t *cachep,
159 struct inet_bind_bucket *tb);
160
161static inline int inet_bhashfn(const __u16 lport, const int bhash_size)
162{
163 return lport & (bhash_size - 1);
164}
165
166extern void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
167 const unsigned short snum);
168
169/* These can have wildcards, don't try too hard. */
170static inline int inet_lhashfn(const unsigned short num)
171{
172 return num & (INET_LHTABLE_SIZE - 1);
173}
174
175static inline int inet_sk_listen_hashfn(const struct sock *sk)
176{
177 return inet_lhashfn(inet_sk(sk)->num);
178}
179
180/* Caller must disable local BH processing. */
181static inline void __inet_inherit_port(struct inet_hashinfo *table,
182 struct sock *sk, struct sock *child)
183{
184 const int bhash = inet_bhashfn(inet_sk(child)->num, table->bhash_size);
185 struct inet_bind_hashbucket *head = &table->bhash[bhash];
186 struct inet_bind_bucket *tb;
187
188 spin_lock(&head->lock);
189 tb = inet_csk(sk)->icsk_bind_hash;
190 sk_add_bind_node(child, &tb->owners);
191 inet_csk(child)->icsk_bind_hash = tb;
192 spin_unlock(&head->lock);
193}
194
195static inline void inet_inherit_port(struct inet_hashinfo *table,
196 struct sock *sk, struct sock *child)
197{
198 local_bh_disable();
199 __inet_inherit_port(table, sk, child);
200 local_bh_enable();
201}
202
203extern void inet_put_port(struct inet_hashinfo *table, struct sock *sk);
204
205extern void inet_listen_wlock(struct inet_hashinfo *hashinfo);
206
207/*
208 * - We may sleep inside this lock.
209 * - If sleeping is not required (or called from BH),
210 * use plain read_(un)lock(&inet_hashinfo.lhash_lock).
211 */
212static inline void inet_listen_lock(struct inet_hashinfo *hashinfo)
213{
214 /* read_lock synchronizes to candidates to writers */
215 read_lock(&hashinfo->lhash_lock);
216 atomic_inc(&hashinfo->lhash_users);
217 read_unlock(&hashinfo->lhash_lock);
218}
219
220static inline void inet_listen_unlock(struct inet_hashinfo *hashinfo)
221{
222 if (atomic_dec_and_test(&hashinfo->lhash_users))
223 wake_up(&hashinfo->lhash_wait);
224}
225
226static inline void __inet_hash(struct inet_hashinfo *hashinfo,
227 struct sock *sk, const int listen_possible)
228{
229 struct hlist_head *list;
230 rwlock_t *lock;
231
232 BUG_TRAP(sk_unhashed(sk));
233 if (listen_possible && sk->sk_state == TCP_LISTEN) {
234 list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
235 lock = &hashinfo->lhash_lock;
236 inet_listen_wlock(hashinfo);
237 } else {
238 sk->sk_hashent = inet_sk_ehashfn(sk, hashinfo->ehash_size);
239 list = &hashinfo->ehash[sk->sk_hashent].chain;
240 lock = &hashinfo->ehash[sk->sk_hashent].lock;
241 write_lock(lock);
242 }
243 __sk_add_node(sk, list);
244 sock_prot_inc_use(sk->sk_prot);
245 write_unlock(lock);
246 if (listen_possible && sk->sk_state == TCP_LISTEN)
247 wake_up(&hashinfo->lhash_wait);
248}
249
250static inline void inet_hash(struct inet_hashinfo *hashinfo, struct sock *sk)
251{
252 if (sk->sk_state != TCP_CLOSE) {
253 local_bh_disable();
254 __inet_hash(hashinfo, sk, 1);
255 local_bh_enable();
256 }
257}
258
259static inline void inet_unhash(struct inet_hashinfo *hashinfo, struct sock *sk)
260{
261 rwlock_t *lock;
262
263 if (sk_unhashed(sk))
264 goto out;
265
266 if (sk->sk_state == TCP_LISTEN) {
267 local_bh_disable();
268 inet_listen_wlock(hashinfo);
269 lock = &hashinfo->lhash_lock;
270 } else {
271 struct inet_ehash_bucket *head = &hashinfo->ehash[sk->sk_hashent];
272 lock = &head->lock;
273 write_lock_bh(&head->lock);
274 }
275
276 if (__sk_del_node_init(sk))
277 sock_prot_dec_use(sk->sk_prot);
278 write_unlock_bh(lock);
279out:
280 if (sk->sk_state == TCP_LISTEN)
281 wake_up(&hashinfo->lhash_wait);
282}
283
284static inline int inet_iif(const struct sk_buff *skb)
285{
286 return ((struct rtable *)skb->dst)->rt_iif;
287}
288
289extern struct sock *__inet_lookup_listener(const struct hlist_head *head,
290 const u32 daddr,
291 const unsigned short hnum,
292 const int dif);
293
294/* Optimize the common listener case. */
295static inline struct sock *
296 inet_lookup_listener(struct inet_hashinfo *hashinfo,
297 const u32 daddr,
298 const unsigned short hnum, const int dif)
299{
300 struct sock *sk = NULL;
301 const struct hlist_head *head;
302
303 read_lock(&hashinfo->lhash_lock);
304 head = &hashinfo->listening_hash[inet_lhashfn(hnum)];
305 if (!hlist_empty(head)) {
306 const struct inet_sock *inet = inet_sk((sk = __sk_head(head)));
307
308 if (inet->num == hnum && !sk->sk_node.next &&
309 (!inet->rcv_saddr || inet->rcv_saddr == daddr) &&
310 (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) &&
311 !sk->sk_bound_dev_if)
312 goto sherry_cache;
313 sk = __inet_lookup_listener(head, daddr, hnum, dif);
314 }
315 if (sk) {
316sherry_cache:
317 sock_hold(sk);
318 }
319 read_unlock(&hashinfo->lhash_lock);
320 return sk;
321}
322
323/* Socket demux engine toys. */
324#ifdef __BIG_ENDIAN
325#define INET_COMBINED_PORTS(__sport, __dport) \
326 (((__u32)(__sport) << 16) | (__u32)(__dport))
327#else /* __LITTLE_ENDIAN */
328#define INET_COMBINED_PORTS(__sport, __dport) \
329 (((__u32)(__dport) << 16) | (__u32)(__sport))
330#endif
331
332#if (BITS_PER_LONG == 64)
333#ifdef __BIG_ENDIAN
334#define INET_ADDR_COOKIE(__name, __saddr, __daddr) \
335 const __u64 __name = (((__u64)(__saddr)) << 32) | ((__u64)(__daddr));
336#else /* __LITTLE_ENDIAN */
337#define INET_ADDR_COOKIE(__name, __saddr, __daddr) \
338 const __u64 __name = (((__u64)(__daddr)) << 32) | ((__u64)(__saddr));
339#endif /* __BIG_ENDIAN */
340#define INET_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
341 (((*((__u64 *)&(inet_sk(__sk)->daddr))) == (__cookie)) && \
342 ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \
343 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
344#define INET_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
345 (((*((__u64 *)&(inet_twsk(__sk)->tw_daddr))) == (__cookie)) && \
346 ((*((__u32 *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \
347 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
348#else /* 32-bit arch */
349#define INET_ADDR_COOKIE(__name, __saddr, __daddr)
350#define INET_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif) \
351 ((inet_sk(__sk)->daddr == (__saddr)) && \
352 (inet_sk(__sk)->rcv_saddr == (__daddr)) && \
353 ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \
354 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
355#define INET_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif) \
356 ((inet_twsk(__sk)->tw_daddr == (__saddr)) && \
357 (inet_twsk(__sk)->tw_rcv_saddr == (__daddr)) && \
358 ((*((__u32 *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \
359 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
360#endif /* 64-bit arch */
361
362/*
363 * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need
364 * not check it for lookups anymore, thanks Alexey. -DaveM
365 *
366 * Local BH must be disabled here.
367 */
368static inline struct sock *
369 __inet_lookup_established(struct inet_hashinfo *hashinfo,
370 const u32 saddr, const u16 sport,
371 const u32 daddr, const u16 hnum,
372 const int dif)
373{
374 INET_ADDR_COOKIE(acookie, saddr, daddr)
375 const __u32 ports = INET_COMBINED_PORTS(sport, hnum);
376 struct sock *sk;
377 const struct hlist_node *node;
378 /* Optimize here for direct hit, only listening connections can
379 * have wildcards anyways.
380 */
381 const int hash = inet_ehashfn(daddr, hnum, saddr, sport, hashinfo->ehash_size);
382 struct inet_ehash_bucket *head = &hashinfo->ehash[hash];
383
384 read_lock(&head->lock);
385 sk_for_each(sk, node, &head->chain) {
386 if (INET_MATCH(sk, acookie, saddr, daddr, ports, dif))
387 goto hit; /* You sunk my battleship! */
388 }
389
390 /* Must check for a TIME_WAIT'er before going to listener hash. */
391 sk_for_each(sk, node, &(head + hashinfo->ehash_size)->chain) {
392 if (INET_TW_MATCH(sk, acookie, saddr, daddr, ports, dif))
393 goto hit;
394 }
395 sk = NULL;
396out:
397 read_unlock(&head->lock);
398 return sk;
399hit:
400 sock_hold(sk);
401 goto out;
402}
403
404static inline struct sock *__inet_lookup(struct inet_hashinfo *hashinfo,
405 const u32 saddr, const u16 sport,
406 const u32 daddr, const u16 hnum,
407 const int dif)
408{
409 struct sock *sk = __inet_lookup_established(hashinfo, saddr, sport, daddr,
410 hnum, dif);
411 return sk ? : inet_lookup_listener(hashinfo, daddr, hnum, dif);
412}
413
414static inline struct sock *inet_lookup(struct inet_hashinfo *hashinfo,
415 const u32 saddr, const u16 sport,
416 const u32 daddr, const u16 dport,
417 const int dif)
418{
419 struct sock *sk;
420
421 local_bh_disable();
422 sk = __inet_lookup(hashinfo, saddr, sport, daddr, ntohs(dport), dif);
423 local_bh_enable();
424
425 return sk;
426}
427#endif /* _INET_HASHTABLES_H */
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
new file mode 100644
index 000000000000..3b070352e869
--- /dev/null
+++ b/include/net/inet_timewait_sock.h
@@ -0,0 +1,219 @@
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Definitions for a generic INET TIMEWAIT sock
7 *
8 * From code originally in net/tcp.h
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15#ifndef _INET_TIMEWAIT_SOCK_
16#define _INET_TIMEWAIT_SOCK_
17
18#include <linux/config.h>
19
20#include <linux/ip.h>
21#include <linux/list.h>
22#include <linux/timer.h>
23#include <linux/types.h>
24#include <linux/workqueue.h>
25
26#include <net/sock.h>
27#include <net/tcp_states.h>
28
29#include <asm/atomic.h>
30
31struct inet_hashinfo;
32
33#define INET_TWDR_RECYCLE_SLOTS_LOG 5
34#define INET_TWDR_RECYCLE_SLOTS (1 << INET_TWDR_RECYCLE_SLOTS_LOG)
35
36/*
37 * If time > 4sec, it is "slow" path, no recycling is required,
38 * so that we select tick to get range about 4 seconds.
39 */
40#if HZ <= 16 || HZ > 4096
41# error Unsupported: HZ <= 16 or HZ > 4096
42#elif HZ <= 32
43# define INET_TWDR_RECYCLE_TICK (5 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
44#elif HZ <= 64
45# define INET_TWDR_RECYCLE_TICK (6 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
46#elif HZ <= 128
47# define INET_TWDR_RECYCLE_TICK (7 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
48#elif HZ <= 256
49# define INET_TWDR_RECYCLE_TICK (8 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
50#elif HZ <= 512
51# define INET_TWDR_RECYCLE_TICK (9 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
52#elif HZ <= 1024
53# define INET_TWDR_RECYCLE_TICK (10 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
54#elif HZ <= 2048
55# define INET_TWDR_RECYCLE_TICK (11 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
56#else
57# define INET_TWDR_RECYCLE_TICK (12 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
58#endif
59
60/* TIME_WAIT reaping mechanism. */
61#define INET_TWDR_TWKILL_SLOTS 8 /* Please keep this a power of 2. */
62
63#define INET_TWDR_TWKILL_QUOTA 100
64
65struct inet_timewait_death_row {
66 /* Short-time timewait calendar */
67 int twcal_hand;
68 int twcal_jiffie;
69 struct timer_list twcal_timer;
70 struct hlist_head twcal_row[INET_TWDR_RECYCLE_SLOTS];
71
72 spinlock_t death_lock;
73 int tw_count;
74 int period;
75 u32 thread_slots;
76 struct work_struct twkill_work;
77 struct timer_list tw_timer;
78 int slot;
79 struct hlist_head cells[INET_TWDR_TWKILL_SLOTS];
80 struct inet_hashinfo *hashinfo;
81 int sysctl_tw_recycle;
82 int sysctl_max_tw_buckets;
83};
84
85extern void inet_twdr_hangman(unsigned long data);
86extern void inet_twdr_twkill_work(void *data);
87extern void inet_twdr_twcal_tick(unsigned long data);
88
89#if (BITS_PER_LONG == 64)
90#define INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES 8
91#else
92#define INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES 4
93#endif
94
95struct inet_bind_bucket;
96
97/*
98 * This is a TIME_WAIT sock. It works around the memory consumption
99 * problems of sockets in such a state on heavily loaded servers, but
100 * without violating the protocol specification.
101 */
102struct inet_timewait_sock {
103 /*
104 * Now struct sock also uses sock_common, so please just
105 * don't add nothing before this first member (__tw_common) --acme
106 */
107 struct sock_common __tw_common;
108#define tw_family __tw_common.skc_family
109#define tw_state __tw_common.skc_state
110#define tw_reuse __tw_common.skc_reuse
111#define tw_bound_dev_if __tw_common.skc_bound_dev_if
112#define tw_node __tw_common.skc_node
113#define tw_bind_node __tw_common.skc_bind_node
114#define tw_refcnt __tw_common.skc_refcnt
115#define tw_prot __tw_common.skc_prot
116 volatile unsigned char tw_substate;
117 /* 3 bits hole, try to pack */
118 unsigned char tw_rcv_wscale;
119 /* Socket demultiplex comparisons on incoming packets. */
120 /* these five are in inet_sock */
121 __u16 tw_sport;
122 __u32 tw_daddr __attribute__((aligned(INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES)));
123 __u32 tw_rcv_saddr;
124 __u16 tw_dport;
125 __u16 tw_num;
126 /* And these are ours. */
127 __u8 tw_ipv6only:1;
128 /* 31 bits hole, try to pack */
129 int tw_hashent;
130 int tw_timeout;
131 unsigned long tw_ttd;
132 struct inet_bind_bucket *tw_tb;
133 struct hlist_node tw_death_node;
134};
135
136static inline void inet_twsk_add_node(struct inet_timewait_sock *tw,
137 struct hlist_head *list)
138{
139 hlist_add_head(&tw->tw_node, list);
140}
141
142static inline void inet_twsk_add_bind_node(struct inet_timewait_sock *tw,
143 struct hlist_head *list)
144{
145 hlist_add_head(&tw->tw_bind_node, list);
146}
147
148static inline int inet_twsk_dead_hashed(const struct inet_timewait_sock *tw)
149{
150 return tw->tw_death_node.pprev != NULL;
151}
152
153static inline void inet_twsk_dead_node_init(struct inet_timewait_sock *tw)
154{
155 tw->tw_death_node.pprev = NULL;
156}
157
158static inline void __inet_twsk_del_dead_node(struct inet_timewait_sock *tw)
159{
160 __hlist_del(&tw->tw_death_node);
161 inet_twsk_dead_node_init(tw);
162}
163
164static inline int inet_twsk_del_dead_node(struct inet_timewait_sock *tw)
165{
166 if (inet_twsk_dead_hashed(tw)) {
167 __inet_twsk_del_dead_node(tw);
168 return 1;
169 }
170 return 0;
171}
172
173#define inet_twsk_for_each(tw, node, head) \
174 hlist_for_each_entry(tw, node, head, tw_node)
175
176#define inet_twsk_for_each_inmate(tw, node, jail) \
177 hlist_for_each_entry(tw, node, jail, tw_death_node)
178
179#define inet_twsk_for_each_inmate_safe(tw, node, safe, jail) \
180 hlist_for_each_entry_safe(tw, node, safe, jail, tw_death_node)
181
182static inline struct inet_timewait_sock *inet_twsk(const struct sock *sk)
183{
184 return (struct inet_timewait_sock *)sk;
185}
186
187static inline u32 inet_rcv_saddr(const struct sock *sk)
188{
189 return likely(sk->sk_state != TCP_TIME_WAIT) ?
190 inet_sk(sk)->rcv_saddr : inet_twsk(sk)->tw_rcv_saddr;
191}
192
193static inline void inet_twsk_put(struct inet_timewait_sock *tw)
194{
195 if (atomic_dec_and_test(&tw->tw_refcnt)) {
196#ifdef SOCK_REFCNT_DEBUG
197 printk(KERN_DEBUG "%s timewait_sock %p released\n",
198 tw->tw_prot->name, tw);
199#endif
200 kmem_cache_free(tw->tw_prot->twsk_slab, tw);
201 }
202}
203
204extern struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk,
205 const int state);
206
207extern void __inet_twsk_kill(struct inet_timewait_sock *tw,
208 struct inet_hashinfo *hashinfo);
209
210extern void __inet_twsk_hashdance(struct inet_timewait_sock *tw,
211 struct sock *sk,
212 struct inet_hashinfo *hashinfo);
213
214extern void inet_twsk_schedule(struct inet_timewait_sock *tw,
215 struct inet_timewait_death_row *twdr,
216 const int timeo, const int timewait_len);
217extern void inet_twsk_deschedule(struct inet_timewait_sock *tw,
218 struct inet_timewait_death_row *twdr);
219#endif /* _INET_TIMEWAIT_SOCK_ */
diff --git a/include/net/ip.h b/include/net/ip.h
index 32360bbe143f..e4563bbee6ea 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -86,7 +86,7 @@ extern int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
86 u32 saddr, u32 daddr, 86 u32 saddr, u32 daddr,
87 struct ip_options *opt); 87 struct ip_options *opt);
88extern int ip_rcv(struct sk_buff *skb, struct net_device *dev, 88extern int ip_rcv(struct sk_buff *skb, struct net_device *dev,
89 struct packet_type *pt); 89 struct packet_type *pt, struct net_device *orig_dev);
90extern int ip_local_deliver(struct sk_buff *skb); 90extern int ip_local_deliver(struct sk_buff *skb);
91extern int ip_mr_input(struct sk_buff *skb); 91extern int ip_mr_input(struct sk_buff *skb);
92extern int ip_output(struct sk_buff *skb); 92extern int ip_output(struct sk_buff *skb);
@@ -140,8 +140,6 @@ struct ip_reply_arg {
140void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg, 140void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg,
141 unsigned int len); 141 unsigned int len);
142 142
143extern int ip_finish_output(struct sk_buff *skb);
144
145struct ipv4_config 143struct ipv4_config
146{ 144{
147 int log_martians; 145 int log_martians;
@@ -165,6 +163,24 @@ extern int sysctl_local_port_range[2];
165extern int sysctl_ip_default_ttl; 163extern int sysctl_ip_default_ttl;
166extern int sysctl_ip_nonlocal_bind; 164extern int sysctl_ip_nonlocal_bind;
167 165
166/* From ip_fragment.c */
167extern int sysctl_ipfrag_high_thresh;
168extern int sysctl_ipfrag_low_thresh;
169extern int sysctl_ipfrag_time;
170extern int sysctl_ipfrag_secret_interval;
171
172/* From inetpeer.c */
173extern int inet_peer_threshold;
174extern int inet_peer_minttl;
175extern int inet_peer_maxttl;
176extern int inet_peer_gc_mintime;
177extern int inet_peer_gc_maxtime;
178
179/* From ip_output.c */
180extern int sysctl_ip_dynaddr;
181
182extern void ipfrag_init(void);
183
168#ifdef CONFIG_INET 184#ifdef CONFIG_INET
169/* The function in 2.2 was invalid, producing wrong result for 185/* The function in 2.2 was invalid, producing wrong result for
170 * check=0xFEFF. It was noticed by Arthur Skawina _year_ ago. --ANK(000625) */ 186 * check=0xFEFF. It was noticed by Arthur Skawina _year_ ago. --ANK(000625) */
@@ -319,7 +335,10 @@ extern void ip_options_build(struct sk_buff *skb, struct ip_options *opt, u32 da
319extern int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb); 335extern int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb);
320extern void ip_options_fragment(struct sk_buff *skb); 336extern void ip_options_fragment(struct sk_buff *skb);
321extern int ip_options_compile(struct ip_options *opt, struct sk_buff *skb); 337extern int ip_options_compile(struct ip_options *opt, struct sk_buff *skb);
322extern int ip_options_get(struct ip_options **optp, unsigned char *data, int optlen, int user); 338extern int ip_options_get(struct ip_options **optp,
339 unsigned char *data, int optlen);
340extern int ip_options_get_from_user(struct ip_options **optp,
341 unsigned char __user *data, int optlen);
323extern void ip_options_undo(struct ip_options * opt); 342extern void ip_options_undo(struct ip_options * opt);
324extern void ip_forward_options(struct sk_buff *skb); 343extern void ip_forward_options(struct sk_buff *skb);
325extern int ip_options_rcv_srr(struct sk_buff *skb); 344extern int ip_options_rcv_srr(struct sk_buff *skb);
@@ -350,5 +369,10 @@ int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
350 void __user *oldval, size_t __user *oldlenp, 369 void __user *oldval, size_t __user *oldlenp,
351 void __user *newval, size_t newlen, 370 void __user *newval, size_t newlen,
352 void **context); 371 void **context);
372#ifdef CONFIG_PROC_FS
373extern int ip_misc_proc_init(void);
374#endif
375
376extern struct ctl_table ipv4_table[];
353 377
354#endif /* _IP_H */ 378#endif /* _IP_H */
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index f920706d526b..1f2e428ca364 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -12,7 +12,6 @@
12#include <net/flow.h> 12#include <net/flow.h>
13#include <net/ip6_fib.h> 13#include <net/ip6_fib.h>
14#include <net/sock.h> 14#include <net/sock.h>
15#include <linux/tcp.h>
16#include <linux/ip.h> 15#include <linux/ip.h>
17#include <linux/ipv6.h> 16#include <linux/ipv6.h>
18 17
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index a4208a336ac0..14de4ebd1211 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -295,4 +295,9 @@ static inline void fib_res_put(struct fib_result *res)
295#endif 295#endif
296} 296}
297 297
298#ifdef CONFIG_PROC_FS
299extern int fib_proc_init(void);
300extern void fib_proc_exit(void);
301#endif
302
298#endif /* _NET_FIB_H */ 303#endif /* _NET_FIB_H */
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 52da5d26617a..7a3c43711a17 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -255,7 +255,6 @@ struct ip_vs_daemon_user {
255#include <asm/atomic.h> /* for struct atomic_t */ 255#include <asm/atomic.h> /* for struct atomic_t */
256#include <linux/netdevice.h> /* for struct neighbour */ 256#include <linux/netdevice.h> /* for struct neighbour */
257#include <net/dst.h> /* for struct dst_entry */ 257#include <net/dst.h> /* for struct dst_entry */
258#include <net/tcp.h>
259#include <net/udp.h> 258#include <net/udp.h>
260#include <linux/compiler.h> 259#include <linux/compiler.h>
261 260
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 69324465e8b3..3203eaff4bd4 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -104,6 +104,7 @@ struct frag_hdr {
104 104
105#ifdef __KERNEL__ 105#ifdef __KERNEL__
106 106
107#include <linux/config.h>
107#include <net/sock.h> 108#include <net/sock.h>
108 109
109/* sysctls */ 110/* sysctls */
@@ -145,7 +146,6 @@ DECLARE_SNMP_STAT(struct udp_mib, udp_stats_in6);
145#define UDP6_INC_STATS(field) SNMP_INC_STATS(udp_stats_in6, field) 146#define UDP6_INC_STATS(field) SNMP_INC_STATS(udp_stats_in6, field)
146#define UDP6_INC_STATS_BH(field) SNMP_INC_STATS_BH(udp_stats_in6, field) 147#define UDP6_INC_STATS_BH(field) SNMP_INC_STATS_BH(udp_stats_in6, field)
147#define UDP6_INC_STATS_USER(field) SNMP_INC_STATS_USER(udp_stats_in6, field) 148#define UDP6_INC_STATS_USER(field) SNMP_INC_STATS_USER(udp_stats_in6, field)
148extern atomic_t inet6_sock_nr;
149 149
150int snmp6_register_dev(struct inet6_dev *idev); 150int snmp6_register_dev(struct inet6_dev *idev);
151int snmp6_unregister_dev(struct inet6_dev *idev); 151int snmp6_unregister_dev(struct inet6_dev *idev);
@@ -346,7 +346,8 @@ static inline int ipv6_addr_any(const struct in6_addr *a)
346 346
347extern int ipv6_rcv(struct sk_buff *skb, 347extern int ipv6_rcv(struct sk_buff *skb,
348 struct net_device *dev, 348 struct net_device *dev,
349 struct packet_type *pt); 349 struct packet_type *pt,
350 struct net_device *orig_dev);
350 351
351/* 352/*
352 * upper-layer output functions 353 * upper-layer output functions
@@ -464,8 +465,38 @@ extern int sysctl_ip6frag_low_thresh;
464extern int sysctl_ip6frag_time; 465extern int sysctl_ip6frag_time;
465extern int sysctl_ip6frag_secret_interval; 466extern int sysctl_ip6frag_secret_interval;
466 467
467#endif /* __KERNEL__ */ 468extern struct proto_ops inet6_stream_ops;
468#endif /* _NET_IPV6_H */ 469extern struct proto_ops inet6_dgram_ops;
470
471extern int ip6_mc_source(int add, int omode, struct sock *sk,
472 struct group_source_req *pgsr);
473extern int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf);
474extern int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
475 struct group_filter __user *optval,
476 int __user *optlen);
477
478#ifdef CONFIG_PROC_FS
479extern int ac6_proc_init(void);
480extern void ac6_proc_exit(void);
481extern int raw6_proc_init(void);
482extern void raw6_proc_exit(void);
483extern int tcp6_proc_init(void);
484extern void tcp6_proc_exit(void);
485extern int udp6_proc_init(void);
486extern void udp6_proc_exit(void);
487extern int ipv6_misc_proc_init(void);
488extern void ipv6_misc_proc_exit(void);
489
490extern struct rt6_statistics rt6_stats;
491#endif
469 492
493#ifdef CONFIG_SYSCTL
494extern ctl_table ipv6_route_table[];
495extern ctl_table ipv6_icmp_table[];
470 496
497extern void ipv6_sysctl_register(void);
498extern void ipv6_sysctl_unregister(void);
499#endif
471 500
501#endif /* __KERNEL__ */
502#endif /* _NET_IPV6_H */
diff --git a/include/net/llc.h b/include/net/llc.h
index c9aed2a8b4e2..71769a5aeef3 100644
--- a/include/net/llc.h
+++ b/include/net/llc.h
@@ -46,7 +46,8 @@ struct llc_sap {
46 unsigned char f_bit; 46 unsigned char f_bit;
47 int (*rcv_func)(struct sk_buff *skb, 47 int (*rcv_func)(struct sk_buff *skb,
48 struct net_device *dev, 48 struct net_device *dev,
49 struct packet_type *pt); 49 struct packet_type *pt,
50 struct net_device *orig_dev);
50 struct llc_addr laddr; 51 struct llc_addr laddr;
51 struct list_head node; 52 struct list_head node;
52 struct { 53 struct {
@@ -64,7 +65,7 @@ extern rwlock_t llc_sap_list_lock;
64extern unsigned char llc_station_mac_sa[ETH_ALEN]; 65extern unsigned char llc_station_mac_sa[ETH_ALEN];
65 66
66extern int llc_rcv(struct sk_buff *skb, struct net_device *dev, 67extern int llc_rcv(struct sk_buff *skb, struct net_device *dev,
67 struct packet_type *pt); 68 struct packet_type *pt, struct net_device *orig_dev);
68 69
69extern int llc_mac_hdr_init(struct sk_buff *skb, 70extern int llc_mac_hdr_init(struct sk_buff *skb,
70 unsigned char *sa, unsigned char *da); 71 unsigned char *sa, unsigned char *da);
@@ -78,7 +79,8 @@ extern void llc_set_station_handler(void (*handler)(struct sk_buff *skb));
78extern struct llc_sap *llc_sap_open(unsigned char lsap, 79extern struct llc_sap *llc_sap_open(unsigned char lsap,
79 int (*rcv)(struct sk_buff *skb, 80 int (*rcv)(struct sk_buff *skb,
80 struct net_device *dev, 81 struct net_device *dev,
81 struct packet_type *pt)); 82 struct packet_type *pt,
83 struct net_device *orig_dev));
82extern void llc_sap_close(struct llc_sap *sap); 84extern void llc_sap_close(struct llc_sap *sap);
83 85
84extern struct llc_sap *llc_sap_find(unsigned char sap_value); 86extern struct llc_sap *llc_sap_find(unsigned char sap_value);
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 89809891e5ab..34c07731933d 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -363,7 +363,14 @@ __neigh_lookup_errno(struct neigh_table *tbl, const void *pkey,
363 return neigh_create(tbl, pkey, dev); 363 return neigh_create(tbl, pkey, dev);
364} 364}
365 365
366#define LOCALLY_ENQUEUED -2 366struct neighbour_cb {
367 unsigned long sched_next;
368 unsigned int flags;
369};
370
371#define LOCALLY_ENQUEUED 0x1
372
373#define NEIGH_CB(skb) ((struct neighbour_cb *)(skb)->cb)
367 374
368#endif 375#endif
369#endif 376#endif
diff --git a/include/net/p8022.h b/include/net/p8022.h
index 3c99a86c3581..42e9fac51b31 100644
--- a/include/net/p8022.h
+++ b/include/net/p8022.h
@@ -4,7 +4,10 @@ extern struct datalink_proto *
4 register_8022_client(unsigned char type, 4 register_8022_client(unsigned char type,
5 int (*func)(struct sk_buff *skb, 5 int (*func)(struct sk_buff *skb,
6 struct net_device *dev, 6 struct net_device *dev,
7 struct packet_type *pt)); 7 struct packet_type *pt,
8 struct net_device *orig_dev));
8extern void unregister_8022_client(struct datalink_proto *proto); 9extern void unregister_8022_client(struct datalink_proto *proto);
9 10
11extern struct datalink_proto *make_8023_client(void);
12extern void destroy_8023_client(struct datalink_proto *dl);
10#endif 13#endif
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 4abda6aec05a..b902d24a3256 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -352,10 +352,10 @@ tcf_change_indev(struct tcf_proto *tp, char *indev, struct rtattr *indev_tlv)
352static inline int 352static inline int
353tcf_match_indev(struct sk_buff *skb, char *indev) 353tcf_match_indev(struct sk_buff *skb, char *indev)
354{ 354{
355 if (0 != indev[0]) { 355 if (indev[0]) {
356 if (NULL == skb->input_dev) 356 if (!skb->input_dev)
357 return 0; 357 return 0;
358 else if (0 != strcmp(indev, skb->input_dev->name)) 358 if (strcmp(indev, skb->input_dev->name))
359 return 0; 359 return 0;
360 } 360 }
361 361
diff --git a/include/net/psnap.h b/include/net/psnap.h
index 9c94e8f98b36..b2e01cc3fc8a 100644
--- a/include/net/psnap.h
+++ b/include/net/psnap.h
@@ -1,7 +1,7 @@
1#ifndef _NET_PSNAP_H 1#ifndef _NET_PSNAP_H
2#define _NET_PSNAP_H 2#define _NET_PSNAP_H
3 3
4extern struct datalink_proto *register_snap_client(unsigned char *desc, int (*rcvfunc)(struct sk_buff *, struct net_device *, struct packet_type *)); 4extern struct datalink_proto *register_snap_client(unsigned char *desc, int (*rcvfunc)(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *orig_dev));
5extern void unregister_snap_client(struct datalink_proto *proto); 5extern void unregister_snap_client(struct datalink_proto *proto);
6 6
7#endif 7#endif
diff --git a/include/net/raw.h b/include/net/raw.h
index 1c411c45587a..f47917469b12 100644
--- a/include/net/raw.h
+++ b/include/net/raw.h
@@ -17,10 +17,10 @@
17#ifndef _RAW_H 17#ifndef _RAW_H
18#define _RAW_H 18#define _RAW_H
19 19
20#include <linux/config.h>
20 21
21extern struct proto raw_prot; 22extern struct proto raw_prot;
22 23
23
24extern void raw_err(struct sock *, struct sk_buff *, u32 info); 24extern void raw_err(struct sock *, struct sk_buff *, u32 info);
25extern int raw_rcv(struct sock *, struct sk_buff *); 25extern int raw_rcv(struct sock *, struct sk_buff *);
26 26
@@ -37,6 +37,11 @@ extern struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num,
37 unsigned long raddr, unsigned long laddr, 37 unsigned long raddr, unsigned long laddr,
38 int dif); 38 int dif);
39 39
40extern void raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash); 40extern int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash);
41
42#ifdef CONFIG_PROC_FS
43extern int raw_proc_init(void);
44extern void raw_proc_exit(void);
45#endif
41 46
42#endif /* _RAW_H */ 47#endif /* _RAW_H */
diff --git a/include/net/rawv6.h b/include/net/rawv6.h
index 23fd9a6a221a..14476a71725e 100644
--- a/include/net/rawv6.h
+++ b/include/net/rawv6.h
@@ -7,10 +7,11 @@
7extern struct hlist_head raw_v6_htable[RAWV6_HTABLE_SIZE]; 7extern struct hlist_head raw_v6_htable[RAWV6_HTABLE_SIZE];
8extern rwlock_t raw_v6_lock; 8extern rwlock_t raw_v6_lock;
9 9
10extern void ipv6_raw_deliver(struct sk_buff *skb, int nexthdr); 10extern int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr);
11 11
12extern struct sock *__raw_v6_lookup(struct sock *sk, unsigned short num, 12extern struct sock *__raw_v6_lookup(struct sock *sk, unsigned short num,
13 struct in6_addr *loc_addr, struct in6_addr *rmt_addr); 13 struct in6_addr *loc_addr, struct in6_addr *rmt_addr,
14 int dif);
14 15
15extern int rawv6_rcv(struct sock *sk, 16extern int rawv6_rcv(struct sock *sk,
16 struct sk_buff *skb); 17 struct sk_buff *skb);
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index 72fd6f5e86b1..b52cc52ffe39 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -89,6 +89,7 @@ struct listen_sock {
89 int qlen_young; 89 int qlen_young;
90 int clock_hand; 90 int clock_hand;
91 u32 hash_rnd; 91 u32 hash_rnd;
92 u32 nr_table_entries;
92 struct request_sock *syn_table[0]; 93 struct request_sock *syn_table[0];
93}; 94};
94 95
@@ -96,6 +97,7 @@ struct listen_sock {
96 * 97 *
97 * @rskq_accept_head - FIFO head of established children 98 * @rskq_accept_head - FIFO head of established children
98 * @rskq_accept_tail - FIFO tail of established children 99 * @rskq_accept_tail - FIFO tail of established children
100 * @rskq_defer_accept - User waits for some data after accept()
99 * @syn_wait_lock - serializer 101 * @syn_wait_lock - serializer
100 * 102 *
101 * %syn_wait_lock is necessary only to avoid proc interface having to grab the main 103 * %syn_wait_lock is necessary only to avoid proc interface having to grab the main
@@ -111,6 +113,8 @@ struct request_sock_queue {
111 struct request_sock *rskq_accept_head; 113 struct request_sock *rskq_accept_head;
112 struct request_sock *rskq_accept_tail; 114 struct request_sock *rskq_accept_tail;
113 rwlock_t syn_wait_lock; 115 rwlock_t syn_wait_lock;
116 u8 rskq_defer_accept;
117 /* 3 bytes hole, try to pack */
114 struct listen_sock *listen_opt; 118 struct listen_sock *listen_opt;
115}; 119};
116 120
@@ -129,11 +133,13 @@ static inline struct listen_sock *reqsk_queue_yank_listen_sk(struct request_sock
129 return lopt; 133 return lopt;
130} 134}
131 135
132static inline void reqsk_queue_destroy(struct request_sock_queue *queue) 136static inline void __reqsk_queue_destroy(struct request_sock_queue *queue)
133{ 137{
134 kfree(reqsk_queue_yank_listen_sk(queue)); 138 kfree(reqsk_queue_yank_listen_sk(queue));
135} 139}
136 140
141extern void reqsk_queue_destroy(struct request_sock_queue *queue);
142
137static inline struct request_sock * 143static inline struct request_sock *
138 reqsk_queue_yank_acceptq(struct request_sock_queue *queue) 144 reqsk_queue_yank_acceptq(struct request_sock_queue *queue)
139{ 145{
@@ -221,17 +227,17 @@ static inline int reqsk_queue_added(struct request_sock_queue *queue)
221 return prev_qlen; 227 return prev_qlen;
222} 228}
223 229
224static inline int reqsk_queue_len(struct request_sock_queue *queue) 230static inline int reqsk_queue_len(const struct request_sock_queue *queue)
225{ 231{
226 return queue->listen_opt != NULL ? queue->listen_opt->qlen : 0; 232 return queue->listen_opt != NULL ? queue->listen_opt->qlen : 0;
227} 233}
228 234
229static inline int reqsk_queue_len_young(struct request_sock_queue *queue) 235static inline int reqsk_queue_len_young(const struct request_sock_queue *queue)
230{ 236{
231 return queue->listen_opt->qlen_young; 237 return queue->listen_opt->qlen_young;
232} 238}
233 239
234static inline int reqsk_queue_is_full(struct request_sock_queue *queue) 240static inline int reqsk_queue_is_full(const struct request_sock_queue *queue)
235{ 241{
236 return queue->listen_opt->qlen >> queue->listen_opt->max_qlen_log; 242 return queue->listen_opt->qlen >> queue->listen_opt->max_qlen_log;
237} 243}
diff --git a/include/net/route.h b/include/net/route.h
index c3cd069a9aca..dbe79ca67d31 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -105,10 +105,6 @@ struct rt_cache_stat
105 unsigned int out_hlist_search; 105 unsigned int out_hlist_search;
106}; 106};
107 107
108extern struct rt_cache_stat *rt_cache_stat;
109#define RT_CACHE_STAT_INC(field) \
110 (per_cpu_ptr(rt_cache_stat, raw_smp_processor_id())->field++)
111
112extern struct ip_rt_acct *ip_rt_acct; 108extern struct ip_rt_acct *ip_rt_acct;
113 109
114struct in_device; 110struct in_device;
@@ -199,4 +195,6 @@ static inline struct inet_peer *rt_get_peer(struct rtable *rt)
199 return rt->peer; 195 return rt->peer;
200} 196}
201 197
198extern ctl_table ipv4_route_table[];
199
202#endif /* _ROUTE_H */ 200#endif /* _ROUTE_H */
diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h
index 5999e5684bbf..c51541ee0247 100644
--- a/include/net/sctp/constants.h
+++ b/include/net/sctp/constants.h
@@ -47,10 +47,10 @@
47#ifndef __sctp_constants_h__ 47#ifndef __sctp_constants_h__
48#define __sctp_constants_h__ 48#define __sctp_constants_h__
49 49
50#include <linux/tcp.h> /* For TCP states used in sctp_sock_state_t */
51#include <linux/sctp.h> 50#include <linux/sctp.h>
52#include <linux/ipv6.h> /* For ipv6hdr. */ 51#include <linux/ipv6.h> /* For ipv6hdr. */
53#include <net/sctp/user.h> 52#include <net/sctp/user.h>
53#include <net/tcp_states.h> /* For TCP states used in sctp_sock_state_t */
54 54
55/* Value used for stream negotiation. */ 55/* Value used for stream negotiation. */
56enum { SCTP_MAX_STREAM = 0xffff }; 56enum { SCTP_MAX_STREAM = 0xffff };
diff --git a/include/net/sock.h b/include/net/sock.h
index e9b1dbab90d0..312cb25cbd18 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -88,6 +88,7 @@ do { spin_lock_init(&((__sk)->sk_lock.slock)); \
88} while(0) 88} while(0)
89 89
90struct sock; 90struct sock;
91struct proto;
91 92
92/** 93/**
93 * struct sock_common - minimal network layer representation of sockets 94 * struct sock_common - minimal network layer representation of sockets
@@ -98,10 +99,11 @@ struct sock;
98 * @skc_node: main hash linkage for various protocol lookup tables 99 * @skc_node: main hash linkage for various protocol lookup tables
99 * @skc_bind_node: bind hash linkage for various protocol lookup tables 100 * @skc_bind_node: bind hash linkage for various protocol lookup tables
100 * @skc_refcnt: reference count 101 * @skc_refcnt: reference count
102 * @skc_prot: protocol handlers inside a network family
101 * 103 *
102 * This is the minimal network layer representation of sockets, the header 104 * This is the minimal network layer representation of sockets, the header
103 * for struct sock and struct tcp_tw_bucket. 105 * for struct sock and struct inet_timewait_sock.
104 */ 106 */
105struct sock_common { 107struct sock_common {
106 unsigned short skc_family; 108 unsigned short skc_family;
107 volatile unsigned char skc_state; 109 volatile unsigned char skc_state;
@@ -110,11 +112,12 @@ struct sock_common {
110 struct hlist_node skc_node; 112 struct hlist_node skc_node;
111 struct hlist_node skc_bind_node; 113 struct hlist_node skc_bind_node;
112 atomic_t skc_refcnt; 114 atomic_t skc_refcnt;
115 struct proto *skc_prot;
113}; 116};
114 117
115/** 118/**
116 * struct sock - network layer representation of sockets 119 * struct sock - network layer representation of sockets
117 * @__sk_common: shared layout with tcp_tw_bucket 120 * @__sk_common: shared layout with inet_timewait_sock
118 * @sk_shutdown: mask of %SEND_SHUTDOWN and/or %RCV_SHUTDOWN 121 * @sk_shutdown: mask of %SEND_SHUTDOWN and/or %RCV_SHUTDOWN
119 * @sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings 122 * @sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings
120 * @sk_lock: synchronizer 123 * @sk_lock: synchronizer
@@ -136,11 +139,10 @@ struct sock_common {
136 * @sk_no_check: %SO_NO_CHECK setting, wether or not checkup packets 139 * @sk_no_check: %SO_NO_CHECK setting, wether or not checkup packets
137 * @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO) 140 * @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO)
138 * @sk_lingertime: %SO_LINGER l_linger setting 141 * @sk_lingertime: %SO_LINGER l_linger setting
139 * @sk_hashent: hash entry in several tables (e.g. tcp_ehash) 142 * @sk_hashent: hash entry in several tables (e.g. inet_hashinfo.ehash)
140 * @sk_backlog: always used with the per-socket spinlock held 143 * @sk_backlog: always used with the per-socket spinlock held
141 * @sk_callback_lock: used with the callbacks in the end of this struct 144 * @sk_callback_lock: used with the callbacks in the end of this struct
142 * @sk_error_queue: rarely used 145 * @sk_error_queue: rarely used
143 * @sk_prot: protocol handlers inside a network family
144 * @sk_prot_creator: sk_prot of original sock creator (see ipv6_setsockopt, IPV6_ADDRFORM for instance) 146 * @sk_prot_creator: sk_prot of original sock creator (see ipv6_setsockopt, IPV6_ADDRFORM for instance)
145 * @sk_err: last error 147 * @sk_err: last error
146 * @sk_err_soft: errors that don't cause failure but are the cause of a persistent failure not just 'timed out' 148 * @sk_err_soft: errors that don't cause failure but are the cause of a persistent failure not just 'timed out'
@@ -173,7 +175,7 @@ struct sock_common {
173 */ 175 */
174struct sock { 176struct sock {
175 /* 177 /*
176 * Now struct tcp_tw_bucket also uses sock_common, so please just 178 * Now struct inet_timewait_sock also uses sock_common, so please just
177 * don't add nothing before this first member (__sk_common) --acme 179 * don't add nothing before this first member (__sk_common) --acme
178 */ 180 */
179 struct sock_common __sk_common; 181 struct sock_common __sk_common;
@@ -184,6 +186,7 @@ struct sock {
184#define sk_node __sk_common.skc_node 186#define sk_node __sk_common.skc_node
185#define sk_bind_node __sk_common.skc_bind_node 187#define sk_bind_node __sk_common.skc_bind_node
186#define sk_refcnt __sk_common.skc_refcnt 188#define sk_refcnt __sk_common.skc_refcnt
189#define sk_prot __sk_common.skc_prot
187 unsigned char sk_shutdown : 2, 190 unsigned char sk_shutdown : 2,
188 sk_no_check : 2, 191 sk_no_check : 2,
189 sk_userlocks : 4; 192 sk_userlocks : 4;
@@ -218,7 +221,6 @@ struct sock {
218 struct sk_buff *tail; 221 struct sk_buff *tail;
219 } sk_backlog; 222 } sk_backlog;
220 struct sk_buff_head sk_error_queue; 223 struct sk_buff_head sk_error_queue;
221 struct proto *sk_prot;
222 struct proto *sk_prot_creator; 224 struct proto *sk_prot_creator;
223 rwlock_t sk_callback_lock; 225 rwlock_t sk_callback_lock;
224 int sk_err, 226 int sk_err,
@@ -253,28 +255,28 @@ struct sock {
253/* 255/*
254 * Hashed lists helper routines 256 * Hashed lists helper routines
255 */ 257 */
256static inline struct sock *__sk_head(struct hlist_head *head) 258static inline struct sock *__sk_head(const struct hlist_head *head)
257{ 259{
258 return hlist_entry(head->first, struct sock, sk_node); 260 return hlist_entry(head->first, struct sock, sk_node);
259} 261}
260 262
261static inline struct sock *sk_head(struct hlist_head *head) 263static inline struct sock *sk_head(const struct hlist_head *head)
262{ 264{
263 return hlist_empty(head) ? NULL : __sk_head(head); 265 return hlist_empty(head) ? NULL : __sk_head(head);
264} 266}
265 267
266static inline struct sock *sk_next(struct sock *sk) 268static inline struct sock *sk_next(const struct sock *sk)
267{ 269{
268 return sk->sk_node.next ? 270 return sk->sk_node.next ?
269 hlist_entry(sk->sk_node.next, struct sock, sk_node) : NULL; 271 hlist_entry(sk->sk_node.next, struct sock, sk_node) : NULL;
270} 272}
271 273
272static inline int sk_unhashed(struct sock *sk) 274static inline int sk_unhashed(const struct sock *sk)
273{ 275{
274 return hlist_unhashed(&sk->sk_node); 276 return hlist_unhashed(&sk->sk_node);
275} 277}
276 278
277static inline int sk_hashed(struct sock *sk) 279static inline int sk_hashed(const struct sock *sk)
278{ 280{
279 return sk->sk_node.pprev != NULL; 281 return sk->sk_node.pprev != NULL;
280} 282}
@@ -554,6 +556,10 @@ struct proto {
554 kmem_cache_t *slab; 556 kmem_cache_t *slab;
555 unsigned int obj_size; 557 unsigned int obj_size;
556 558
559 kmem_cache_t *twsk_slab;
560 unsigned int twsk_obj_size;
561 atomic_t *orphan_count;
562
557 struct request_sock_ops *rsk_prot; 563 struct request_sock_ops *rsk_prot;
558 564
559 struct module *owner; 565 struct module *owner;
@@ -561,7 +567,9 @@ struct proto {
561 char name[32]; 567 char name[32];
562 568
563 struct list_head node; 569 struct list_head node;
564 570#ifdef SOCK_REFCNT_DEBUG
571 atomic_t socks;
572#endif
565 struct { 573 struct {
566 int inuse; 574 int inuse;
567 u8 __pad[SMP_CACHE_BYTES - sizeof(int)]; 575 u8 __pad[SMP_CACHE_BYTES - sizeof(int)];
@@ -571,6 +579,31 @@ struct proto {
571extern int proto_register(struct proto *prot, int alloc_slab); 579extern int proto_register(struct proto *prot, int alloc_slab);
572extern void proto_unregister(struct proto *prot); 580extern void proto_unregister(struct proto *prot);
573 581
582#ifdef SOCK_REFCNT_DEBUG
583static inline void sk_refcnt_debug_inc(struct sock *sk)
584{
585 atomic_inc(&sk->sk_prot->socks);
586}
587
588static inline void sk_refcnt_debug_dec(struct sock *sk)
589{
590 atomic_dec(&sk->sk_prot->socks);
591 printk(KERN_DEBUG "%s socket %p released, %d are still alive\n",
592 sk->sk_prot->name, sk, atomic_read(&sk->sk_prot->socks));
593}
594
595static inline void sk_refcnt_debug_release(const struct sock *sk)
596{
597 if (atomic_read(&sk->sk_refcnt) != 1)
598 printk(KERN_DEBUG "Destruction of the %s socket %p delayed, refcnt=%d\n",
599 sk->sk_prot->name, sk, atomic_read(&sk->sk_refcnt));
600}
601#else /* SOCK_REFCNT_DEBUG */
602#define sk_refcnt_debug_inc(sk) do { } while (0)
603#define sk_refcnt_debug_dec(sk) do { } while (0)
604#define sk_refcnt_debug_release(sk) do { } while (0)
605#endif /* SOCK_REFCNT_DEBUG */
606
574/* Called with local bh disabled */ 607/* Called with local bh disabled */
575static __inline__ void sock_prot_inc_use(struct proto *prot) 608static __inline__ void sock_prot_inc_use(struct proto *prot)
576{ 609{
@@ -582,6 +615,15 @@ static __inline__ void sock_prot_dec_use(struct proto *prot)
582 prot->stats[smp_processor_id()].inuse--; 615 prot->stats[smp_processor_id()].inuse--;
583} 616}
584 617
618/* With per-bucket locks this operation is not-atomic, so that
619 * this version is not worse.
620 */
621static inline void __sk_prot_rehash(struct sock *sk)
622{
623 sk->sk_prot->unhash(sk);
624 sk->sk_prot->hash(sk);
625}
626
585/* About 10 seconds */ 627/* About 10 seconds */
586#define SOCK_DESTROY_TIME (10*HZ) 628#define SOCK_DESTROY_TIME (10*HZ)
587 629
@@ -693,6 +735,8 @@ extern struct sock *sk_alloc(int family,
693 unsigned int __nocast priority, 735 unsigned int __nocast priority,
694 struct proto *prot, int zero_it); 736 struct proto *prot, int zero_it);
695extern void sk_free(struct sock *sk); 737extern void sk_free(struct sock *sk);
738extern struct sock *sk_clone(const struct sock *sk,
739 const unsigned int __nocast priority);
696 740
697extern struct sk_buff *sock_wmalloc(struct sock *sk, 741extern struct sk_buff *sock_wmalloc(struct sock *sk,
698 unsigned long size, int force, 742 unsigned long size, int force,
@@ -986,6 +1030,16 @@ sk_dst_check(struct sock *sk, u32 cookie)
986 return dst; 1030 return dst;
987} 1031}
988 1032
1033static inline void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
1034{
1035 __sk_dst_set(sk, dst);
1036 sk->sk_route_caps = dst->dev->features;
1037 if (sk->sk_route_caps & NETIF_F_TSO) {
1038 if (sock_flag(sk, SOCK_NO_LARGESEND) || dst->header_len)
1039 sk->sk_route_caps &= ~NETIF_F_TSO;
1040 }
1041}
1042
989static inline void sk_charge_skb(struct sock *sk, struct sk_buff *skb) 1043static inline void sk_charge_skb(struct sock *sk, struct sk_buff *skb)
990{ 1044{
991 sk->sk_wmem_queued += skb->truesize; 1045 sk->sk_wmem_queued += skb->truesize;
@@ -1146,7 +1200,7 @@ static inline struct sk_buff *sk_stream_alloc_pskb(struct sock *sk,
1146 int hdr_len; 1200 int hdr_len;
1147 1201
1148 hdr_len = SKB_DATA_ALIGN(sk->sk_prot->max_header); 1202 hdr_len = SKB_DATA_ALIGN(sk->sk_prot->max_header);
1149 skb = alloc_skb(size + hdr_len, gfp); 1203 skb = alloc_skb_fclone(size + hdr_len, gfp);
1150 if (skb) { 1204 if (skb) {
1151 skb->truesize += mem; 1205 skb->truesize += mem;
1152 if (sk->sk_forward_alloc >= (int)skb->truesize || 1206 if (sk->sk_forward_alloc >= (int)skb->truesize ||
@@ -1228,16 +1282,19 @@ static inline int sock_intr_errno(long timeo)
1228static __inline__ void 1282static __inline__ void
1229sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) 1283sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
1230{ 1284{
1231 struct timeval *stamp = &skb->stamp; 1285 struct timeval stamp;
1286
1287 skb_get_timestamp(skb, &stamp);
1232 if (sock_flag(sk, SOCK_RCVTSTAMP)) { 1288 if (sock_flag(sk, SOCK_RCVTSTAMP)) {
1233 /* Race occurred between timestamp enabling and packet 1289 /* Race occurred between timestamp enabling and packet
1234 receiving. Fill in the current time for now. */ 1290 receiving. Fill in the current time for now. */
1235 if (stamp->tv_sec == 0) 1291 if (stamp.tv_sec == 0)
1236 do_gettimeofday(stamp); 1292 do_gettimeofday(&stamp);
1293 skb_set_timestamp(skb, &stamp);
1237 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP, sizeof(struct timeval), 1294 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP, sizeof(struct timeval),
1238 stamp); 1295 &stamp);
1239 } else 1296 } else
1240 sk->sk_stamp = *stamp; 1297 sk->sk_stamp = stamp;
1241} 1298}
1242 1299
1243/** 1300/**
@@ -1262,11 +1319,11 @@ extern int sock_get_timestamp(struct sock *, struct timeval __user *);
1262 */ 1319 */
1263 1320
1264#if 0 1321#if 0
1265#define NETDEBUG(x) do { } while (0) 1322#define NETDEBUG(fmt, args...) do { } while (0)
1266#define LIMIT_NETDEBUG(x) do {} while(0) 1323#define LIMIT_NETDEBUG(fmt, args...) do { } while(0)
1267#else 1324#else
1268#define NETDEBUG(x) do { x; } while (0) 1325#define NETDEBUG(fmt, args...) printk(fmt,##args)
1269#define LIMIT_NETDEBUG(x) do { if (net_ratelimit()) { x; } } while(0) 1326#define LIMIT_NETDEBUG(fmt, args...) do { if (net_ratelimit()) printk(fmt,##args); } while(0)
1270#endif 1327#endif
1271 1328
1272/* 1329/*
@@ -1313,4 +1370,14 @@ static inline int siocdevprivate_ioctl(unsigned int fd, unsigned int cmd, unsign
1313} 1370}
1314#endif 1371#endif
1315 1372
1373extern void sk_init(void);
1374
1375#ifdef CONFIG_SYSCTL
1376extern struct ctl_table core_table[];
1377extern int sysctl_optmem_max;
1378#endif
1379
1380extern __u32 sysctl_wmem_default;
1381extern __u32 sysctl_rmem_default;
1382
1316#endif /* _SOCK_H */ 1383#endif /* _SOCK_H */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 5010f0c5a56e..d6bcf1317a6a 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -21,360 +21,29 @@
21#define TCP_DEBUG 1 21#define TCP_DEBUG 1
22#define FASTRETRANS_DEBUG 1 22#define FASTRETRANS_DEBUG 1
23 23
24/* Cancel timers, when they are not required. */
25#undef TCP_CLEAR_TIMERS
26
27#include <linux/config.h> 24#include <linux/config.h>
28#include <linux/list.h> 25#include <linux/list.h>
29#include <linux/tcp.h> 26#include <linux/tcp.h>
30#include <linux/slab.h> 27#include <linux/slab.h>
31#include <linux/cache.h> 28#include <linux/cache.h>
32#include <linux/percpu.h> 29#include <linux/percpu.h>
30
31#include <net/inet_connection_sock.h>
32#include <net/inet_timewait_sock.h>
33#include <net/inet_hashtables.h>
33#include <net/checksum.h> 34#include <net/checksum.h>
34#include <net/request_sock.h> 35#include <net/request_sock.h>
35#include <net/sock.h> 36#include <net/sock.h>
36#include <net/snmp.h> 37#include <net/snmp.h>
37#include <net/ip.h> 38#include <net/ip.h>
38#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) 39#include <net/tcp_states.h>
39#include <linux/ipv6.h>
40#endif
41#include <linux/seq_file.h>
42
43/* This is for all connections with a full identity, no wildcards.
44 * New scheme, half the table is for TIME_WAIT, the other half is
45 * for the rest. I'll experiment with dynamic table growth later.
46 */
47struct tcp_ehash_bucket {
48 rwlock_t lock;
49 struct hlist_head chain;
50} __attribute__((__aligned__(8)));
51
52/* This is for listening sockets, thus all sockets which possess wildcards. */
53#define TCP_LHTABLE_SIZE 32 /* Yes, really, this is all you need. */
54
55/* There are a few simple rules, which allow for local port reuse by
56 * an application. In essence:
57 *
58 * 1) Sockets bound to different interfaces may share a local port.
59 * Failing that, goto test 2.
60 * 2) If all sockets have sk->sk_reuse set, and none of them are in
61 * TCP_LISTEN state, the port may be shared.
62 * Failing that, goto test 3.
63 * 3) If all sockets are bound to a specific inet_sk(sk)->rcv_saddr local
64 * address, and none of them are the same, the port may be
65 * shared.
66 * Failing this, the port cannot be shared.
67 *
68 * The interesting point, is test #2. This is what an FTP server does
69 * all day. To optimize this case we use a specific flag bit defined
70 * below. As we add sockets to a bind bucket list, we perform a
71 * check of: (newsk->sk_reuse && (newsk->sk_state != TCP_LISTEN))
72 * As long as all sockets added to a bind bucket pass this test,
73 * the flag bit will be set.
74 * The resulting situation is that tcp_v[46]_verify_bind() can just check
75 * for this flag bit, if it is set and the socket trying to bind has
76 * sk->sk_reuse set, we don't even have to walk the owners list at all,
77 * we return that it is ok to bind this socket to the requested local port.
78 *
79 * Sounds like a lot of work, but it is worth it. In a more naive
80 * implementation (ie. current FreeBSD etc.) the entire list of ports
81 * must be walked for each data port opened by an ftp server. Needless
82 * to say, this does not scale at all. With a couple thousand FTP
83 * users logged onto your box, isn't it nice to know that new data
84 * ports are created in O(1) time? I thought so. ;-) -DaveM
85 */
86struct tcp_bind_bucket {
87 unsigned short port;
88 signed short fastreuse;
89 struct hlist_node node;
90 struct hlist_head owners;
91};
92
93#define tb_for_each(tb, node, head) hlist_for_each_entry(tb, node, head, node)
94
95struct tcp_bind_hashbucket {
96 spinlock_t lock;
97 struct hlist_head chain;
98};
99
100static inline struct tcp_bind_bucket *__tb_head(struct tcp_bind_hashbucket *head)
101{
102 return hlist_entry(head->chain.first, struct tcp_bind_bucket, node);
103}
104
105static inline struct tcp_bind_bucket *tb_head(struct tcp_bind_hashbucket *head)
106{
107 return hlist_empty(&head->chain) ? NULL : __tb_head(head);
108}
109
110extern struct tcp_hashinfo {
111 /* This is for sockets with full identity only. Sockets here will
112 * always be without wildcards and will have the following invariant:
113 *
114 * TCP_ESTABLISHED <= sk->sk_state < TCP_CLOSE
115 *
116 * First half of the table is for sockets not in TIME_WAIT, second half
117 * is for TIME_WAIT sockets only.
118 */
119 struct tcp_ehash_bucket *__tcp_ehash;
120
121 /* Ok, let's try this, I give up, we do need a local binding
122 * TCP hash as well as the others for fast bind/connect.
123 */
124 struct tcp_bind_hashbucket *__tcp_bhash;
125 40
126 int __tcp_bhash_size; 41#include <linux/seq_file.h>
127 int __tcp_ehash_size;
128
129 /* All sockets in TCP_LISTEN state will be in here. This is the only
130 * table where wildcard'd TCP sockets can exist. Hash function here
131 * is just local port number.
132 */
133 struct hlist_head __tcp_listening_hash[TCP_LHTABLE_SIZE];
134
135 /* All the above members are written once at bootup and
136 * never written again _or_ are predominantly read-access.
137 *
138 * Now align to a new cache line as all the following members
139 * are often dirty.
140 */
141 rwlock_t __tcp_lhash_lock ____cacheline_aligned;
142 atomic_t __tcp_lhash_users;
143 wait_queue_head_t __tcp_lhash_wait;
144 spinlock_t __tcp_portalloc_lock;
145} tcp_hashinfo;
146
147#define tcp_ehash (tcp_hashinfo.__tcp_ehash)
148#define tcp_bhash (tcp_hashinfo.__tcp_bhash)
149#define tcp_ehash_size (tcp_hashinfo.__tcp_ehash_size)
150#define tcp_bhash_size (tcp_hashinfo.__tcp_bhash_size)
151#define tcp_listening_hash (tcp_hashinfo.__tcp_listening_hash)
152#define tcp_lhash_lock (tcp_hashinfo.__tcp_lhash_lock)
153#define tcp_lhash_users (tcp_hashinfo.__tcp_lhash_users)
154#define tcp_lhash_wait (tcp_hashinfo.__tcp_lhash_wait)
155#define tcp_portalloc_lock (tcp_hashinfo.__tcp_portalloc_lock)
156
157extern kmem_cache_t *tcp_bucket_cachep;
158extern struct tcp_bind_bucket *tcp_bucket_create(struct tcp_bind_hashbucket *head,
159 unsigned short snum);
160extern void tcp_bucket_destroy(struct tcp_bind_bucket *tb);
161extern void tcp_bucket_unlock(struct sock *sk);
162extern int tcp_port_rover;
163
164/* These are AF independent. */
165static __inline__ int tcp_bhashfn(__u16 lport)
166{
167 return (lport & (tcp_bhash_size - 1));
168}
169
170extern void tcp_bind_hash(struct sock *sk, struct tcp_bind_bucket *tb,
171 unsigned short snum);
172
173#if (BITS_PER_LONG == 64)
174#define TCP_ADDRCMP_ALIGN_BYTES 8
175#else
176#define TCP_ADDRCMP_ALIGN_BYTES 4
177#endif
178
179/* This is a TIME_WAIT bucket. It works around the memory consumption
180 * problems of sockets in such a state on heavily loaded servers, but
181 * without violating the protocol specification.
182 */
183struct tcp_tw_bucket {
184 /*
185 * Now struct sock also uses sock_common, so please just
186 * don't add nothing before this first member (__tw_common) --acme
187 */
188 struct sock_common __tw_common;
189#define tw_family __tw_common.skc_family
190#define tw_state __tw_common.skc_state
191#define tw_reuse __tw_common.skc_reuse
192#define tw_bound_dev_if __tw_common.skc_bound_dev_if
193#define tw_node __tw_common.skc_node
194#define tw_bind_node __tw_common.skc_bind_node
195#define tw_refcnt __tw_common.skc_refcnt
196 volatile unsigned char tw_substate;
197 unsigned char tw_rcv_wscale;
198 __u16 tw_sport;
199 /* Socket demultiplex comparisons on incoming packets. */
200 /* these five are in inet_sock */
201 __u32 tw_daddr
202 __attribute__((aligned(TCP_ADDRCMP_ALIGN_BYTES)));
203 __u32 tw_rcv_saddr;
204 __u16 tw_dport;
205 __u16 tw_num;
206 /* And these are ours. */
207 int tw_hashent;
208 int tw_timeout;
209 __u32 tw_rcv_nxt;
210 __u32 tw_snd_nxt;
211 __u32 tw_rcv_wnd;
212 __u32 tw_ts_recent;
213 long tw_ts_recent_stamp;
214 unsigned long tw_ttd;
215 struct tcp_bind_bucket *tw_tb;
216 struct hlist_node tw_death_node;
217#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
218 struct in6_addr tw_v6_daddr;
219 struct in6_addr tw_v6_rcv_saddr;
220 int tw_v6_ipv6only;
221#endif
222};
223
224static __inline__ void tw_add_node(struct tcp_tw_bucket *tw,
225 struct hlist_head *list)
226{
227 hlist_add_head(&tw->tw_node, list);
228}
229
230static __inline__ void tw_add_bind_node(struct tcp_tw_bucket *tw,
231 struct hlist_head *list)
232{
233 hlist_add_head(&tw->tw_bind_node, list);
234}
235
236static inline int tw_dead_hashed(struct tcp_tw_bucket *tw)
237{
238 return tw->tw_death_node.pprev != NULL;
239}
240
241static __inline__ void tw_dead_node_init(struct tcp_tw_bucket *tw)
242{
243 tw->tw_death_node.pprev = NULL;
244}
245
246static __inline__ void __tw_del_dead_node(struct tcp_tw_bucket *tw)
247{
248 __hlist_del(&tw->tw_death_node);
249 tw_dead_node_init(tw);
250}
251
252static __inline__ int tw_del_dead_node(struct tcp_tw_bucket *tw)
253{
254 if (tw_dead_hashed(tw)) {
255 __tw_del_dead_node(tw);
256 return 1;
257 }
258 return 0;
259}
260
261#define tw_for_each(tw, node, head) \
262 hlist_for_each_entry(tw, node, head, tw_node)
263
264#define tw_for_each_inmate(tw, node, jail) \
265 hlist_for_each_entry(tw, node, jail, tw_death_node)
266
267#define tw_for_each_inmate_safe(tw, node, safe, jail) \
268 hlist_for_each_entry_safe(tw, node, safe, jail, tw_death_node)
269
270#define tcptw_sk(__sk) ((struct tcp_tw_bucket *)(__sk))
271
272static inline u32 tcp_v4_rcv_saddr(const struct sock *sk)
273{
274 return likely(sk->sk_state != TCP_TIME_WAIT) ?
275 inet_sk(sk)->rcv_saddr : tcptw_sk(sk)->tw_rcv_saddr;
276}
277
278#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
279static inline struct in6_addr *__tcp_v6_rcv_saddr(const struct sock *sk)
280{
281 return likely(sk->sk_state != TCP_TIME_WAIT) ?
282 &inet6_sk(sk)->rcv_saddr : &tcptw_sk(sk)->tw_v6_rcv_saddr;
283}
284
285static inline struct in6_addr *tcp_v6_rcv_saddr(const struct sock *sk)
286{
287 return sk->sk_family == AF_INET6 ? __tcp_v6_rcv_saddr(sk) : NULL;
288}
289
290#define tcptw_sk_ipv6only(__sk) (tcptw_sk(__sk)->tw_v6_ipv6only)
291
292static inline int tcp_v6_ipv6only(const struct sock *sk)
293{
294 return likely(sk->sk_state != TCP_TIME_WAIT) ?
295 ipv6_only_sock(sk) : tcptw_sk_ipv6only(sk);
296}
297#else
298# define __tcp_v6_rcv_saddr(__sk) NULL
299# define tcp_v6_rcv_saddr(__sk) NULL
300# define tcptw_sk_ipv6only(__sk) 0
301# define tcp_v6_ipv6only(__sk) 0
302#endif
303 42
304extern kmem_cache_t *tcp_timewait_cachep; 43extern struct inet_hashinfo tcp_hashinfo;
305
306static inline void tcp_tw_put(struct tcp_tw_bucket *tw)
307{
308 if (atomic_dec_and_test(&tw->tw_refcnt)) {
309#ifdef INET_REFCNT_DEBUG
310 printk(KERN_DEBUG "tw_bucket %p released\n", tw);
311#endif
312 kmem_cache_free(tcp_timewait_cachep, tw);
313 }
314}
315 44
316extern atomic_t tcp_orphan_count; 45extern atomic_t tcp_orphan_count;
317extern int tcp_tw_count;
318extern void tcp_time_wait(struct sock *sk, int state, int timeo); 46extern void tcp_time_wait(struct sock *sk, int state, int timeo);
319extern void tcp_tw_deschedule(struct tcp_tw_bucket *tw);
320
321
322/* Socket demux engine toys. */
323#ifdef __BIG_ENDIAN
324#define TCP_COMBINED_PORTS(__sport, __dport) \
325 (((__u32)(__sport)<<16) | (__u32)(__dport))
326#else /* __LITTLE_ENDIAN */
327#define TCP_COMBINED_PORTS(__sport, __dport) \
328 (((__u32)(__dport)<<16) | (__u32)(__sport))
329#endif
330
331#if (BITS_PER_LONG == 64)
332#ifdef __BIG_ENDIAN
333#define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) \
334 __u64 __name = (((__u64)(__saddr))<<32)|((__u64)(__daddr));
335#else /* __LITTLE_ENDIAN */
336#define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) \
337 __u64 __name = (((__u64)(__daddr))<<32)|((__u64)(__saddr));
338#endif /* __BIG_ENDIAN */
339#define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
340 (((*((__u64 *)&(inet_sk(__sk)->daddr)))== (__cookie)) && \
341 ((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports)) && \
342 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
343#define TCP_IPV4_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
344 (((*((__u64 *)&(tcptw_sk(__sk)->tw_daddr))) == (__cookie)) && \
345 ((*((__u32 *)&(tcptw_sk(__sk)->tw_dport))) == (__ports)) && \
346 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
347#else /* 32-bit arch */
348#define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr)
349#define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
350 ((inet_sk(__sk)->daddr == (__saddr)) && \
351 (inet_sk(__sk)->rcv_saddr == (__daddr)) && \
352 ((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports)) && \
353 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
354#define TCP_IPV4_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
355 ((tcptw_sk(__sk)->tw_daddr == (__saddr)) && \
356 (tcptw_sk(__sk)->tw_rcv_saddr == (__daddr)) && \
357 ((*((__u32 *)&(tcptw_sk(__sk)->tw_dport))) == (__ports)) && \
358 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
359#endif /* 64-bit arch */
360
361#define TCP_IPV6_MATCH(__sk, __saddr, __daddr, __ports, __dif) \
362 (((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports)) && \
363 ((__sk)->sk_family == AF_INET6) && \
364 ipv6_addr_equal(&inet6_sk(__sk)->daddr, (__saddr)) && \
365 ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr)) && \
366 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
367
368/* These can have wildcards, don't try too hard. */
369static __inline__ int tcp_lhashfn(unsigned short num)
370{
371 return num & (TCP_LHTABLE_SIZE - 1);
372}
373
374static __inline__ int tcp_sk_listen_hashfn(struct sock *sk)
375{
376 return tcp_lhashfn(inet_sk(sk)->num);
377}
378 47
379#define MAX_TCP_HEADER (128 + MAX_HEADER) 48#define MAX_TCP_HEADER (128 + MAX_HEADER)
380 49
@@ -478,33 +147,6 @@ static __inline__ int tcp_sk_listen_hashfn(struct sock *sk)
478 * timestamps. It must be less than 147 * timestamps. It must be less than
479 * minimal timewait lifetime. 148 * minimal timewait lifetime.
480 */ 149 */
481
482#define TCP_TW_RECYCLE_SLOTS_LOG 5
483#define TCP_TW_RECYCLE_SLOTS (1<<TCP_TW_RECYCLE_SLOTS_LOG)
484
485/* If time > 4sec, it is "slow" path, no recycling is required,
486 so that we select tick to get range about 4 seconds.
487 */
488
489#if HZ <= 16 || HZ > 4096
490# error Unsupported: HZ <= 16 or HZ > 4096
491#elif HZ <= 32
492# define TCP_TW_RECYCLE_TICK (5+2-TCP_TW_RECYCLE_SLOTS_LOG)
493#elif HZ <= 64
494# define TCP_TW_RECYCLE_TICK (6+2-TCP_TW_RECYCLE_SLOTS_LOG)
495#elif HZ <= 128
496# define TCP_TW_RECYCLE_TICK (7+2-TCP_TW_RECYCLE_SLOTS_LOG)
497#elif HZ <= 256
498# define TCP_TW_RECYCLE_TICK (8+2-TCP_TW_RECYCLE_SLOTS_LOG)
499#elif HZ <= 512
500# define TCP_TW_RECYCLE_TICK (9+2-TCP_TW_RECYCLE_SLOTS_LOG)
501#elif HZ <= 1024
502# define TCP_TW_RECYCLE_TICK (10+2-TCP_TW_RECYCLE_SLOTS_LOG)
503#elif HZ <= 2048
504# define TCP_TW_RECYCLE_TICK (11+2-TCP_TW_RECYCLE_SLOTS_LOG)
505#else
506# define TCP_TW_RECYCLE_TICK (12+2-TCP_TW_RECYCLE_SLOTS_LOG)
507#endif
508/* 150/*
509 * TCP option 151 * TCP option
510 */ 152 */
@@ -534,22 +176,18 @@ static __inline__ int tcp_sk_listen_hashfn(struct sock *sk)
534#define TCPOLEN_SACK_BASE_ALIGNED 4 176#define TCPOLEN_SACK_BASE_ALIGNED 4
535#define TCPOLEN_SACK_PERBLOCK 8 177#define TCPOLEN_SACK_PERBLOCK 8
536 178
537#define TCP_TIME_RETRANS 1 /* Retransmit timer */
538#define TCP_TIME_DACK 2 /* Delayed ack timer */
539#define TCP_TIME_PROBE0 3 /* Zero window probe timer */
540#define TCP_TIME_KEEPOPEN 4 /* Keepalive timer */
541
542/* Flags in tp->nonagle */ 179/* Flags in tp->nonagle */
543#define TCP_NAGLE_OFF 1 /* Nagle's algo is disabled */ 180#define TCP_NAGLE_OFF 1 /* Nagle's algo is disabled */
544#define TCP_NAGLE_CORK 2 /* Socket is corked */ 181#define TCP_NAGLE_CORK 2 /* Socket is corked */
545#define TCP_NAGLE_PUSH 4 /* Cork is overriden for already queued data */ 182#define TCP_NAGLE_PUSH 4 /* Cork is overriden for already queued data */
546 183
184extern struct inet_timewait_death_row tcp_death_row;
185
547/* sysctl variables for tcp */ 186/* sysctl variables for tcp */
548extern int sysctl_tcp_timestamps; 187extern int sysctl_tcp_timestamps;
549extern int sysctl_tcp_window_scaling; 188extern int sysctl_tcp_window_scaling;
550extern int sysctl_tcp_sack; 189extern int sysctl_tcp_sack;
551extern int sysctl_tcp_fin_timeout; 190extern int sysctl_tcp_fin_timeout;
552extern int sysctl_tcp_tw_recycle;
553extern int sysctl_tcp_keepalive_time; 191extern int sysctl_tcp_keepalive_time;
554extern int sysctl_tcp_keepalive_probes; 192extern int sysctl_tcp_keepalive_probes;
555extern int sysctl_tcp_keepalive_intvl; 193extern int sysctl_tcp_keepalive_intvl;
@@ -564,7 +202,6 @@ extern int sysctl_tcp_stdurg;
564extern int sysctl_tcp_rfc1337; 202extern int sysctl_tcp_rfc1337;
565extern int sysctl_tcp_abort_on_overflow; 203extern int sysctl_tcp_abort_on_overflow;
566extern int sysctl_tcp_max_orphans; 204extern int sysctl_tcp_max_orphans;
567extern int sysctl_tcp_max_tw_buckets;
568extern int sysctl_tcp_fack; 205extern int sysctl_tcp_fack;
569extern int sysctl_tcp_reordering; 206extern int sysctl_tcp_reordering;
570extern int sysctl_tcp_ecn; 207extern int sysctl_tcp_ecn;
@@ -585,12 +222,6 @@ extern atomic_t tcp_memory_allocated;
585extern atomic_t tcp_sockets_allocated; 222extern atomic_t tcp_sockets_allocated;
586extern int tcp_memory_pressure; 223extern int tcp_memory_pressure;
587 224
588#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
589#define TCP_INET_FAMILY(fam) ((fam) == AF_INET)
590#else
591#define TCP_INET_FAMILY(fam) 1
592#endif
593
594/* 225/*
595 * Pointers to address related TCP functions 226 * Pointers to address related TCP functions
596 * (i.e. things that depend on the address family) 227 * (i.e. things that depend on the address family)
@@ -671,9 +302,6 @@ DECLARE_SNMP_STAT(struct tcp_mib, tcp_statistics);
671#define TCP_ADD_STATS_BH(field, val) SNMP_ADD_STATS_BH(tcp_statistics, field, val) 302#define TCP_ADD_STATS_BH(field, val) SNMP_ADD_STATS_BH(tcp_statistics, field, val)
672#define TCP_ADD_STATS_USER(field, val) SNMP_ADD_STATS_USER(tcp_statistics, field, val) 303#define TCP_ADD_STATS_USER(field, val) SNMP_ADD_STATS_USER(tcp_statistics, field, val)
673 304
674extern void tcp_put_port(struct sock *sk);
675extern void tcp_inherit_port(struct sock *sk, struct sock *child);
676
677extern void tcp_v4_err(struct sk_buff *skb, u32); 305extern void tcp_v4_err(struct sk_buff *skb, u32);
678 306
679extern void tcp_shutdown (struct sock *sk, int how); 307extern void tcp_shutdown (struct sock *sk, int how);
@@ -682,7 +310,7 @@ extern int tcp_v4_rcv(struct sk_buff *skb);
682 310
683extern int tcp_v4_remember_stamp(struct sock *sk); 311extern int tcp_v4_remember_stamp(struct sock *sk);
684 312
685extern int tcp_v4_tw_remember_stamp(struct tcp_tw_bucket *tw); 313extern int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw);
686 314
687extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, 315extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk,
688 struct msghdr *msg, size_t size); 316 struct msghdr *msg, size_t size);
@@ -704,42 +332,22 @@ extern int tcp_rcv_established(struct sock *sk,
704 332
705extern void tcp_rcv_space_adjust(struct sock *sk); 333extern void tcp_rcv_space_adjust(struct sock *sk);
706 334
707enum tcp_ack_state_t 335static inline void tcp_dec_quickack_mode(struct sock *sk,
708{ 336 const unsigned int pkts)
709 TCP_ACK_SCHED = 1,
710 TCP_ACK_TIMER = 2,
711 TCP_ACK_PUSHED= 4
712};
713
714static inline void tcp_schedule_ack(struct tcp_sock *tp)
715{ 337{
716 tp->ack.pending |= TCP_ACK_SCHED; 338 struct inet_connection_sock *icsk = inet_csk(sk);
717}
718
719static inline int tcp_ack_scheduled(struct tcp_sock *tp)
720{
721 return tp->ack.pending&TCP_ACK_SCHED;
722}
723
724static __inline__ void tcp_dec_quickack_mode(struct tcp_sock *tp, unsigned int pkts)
725{
726 if (tp->ack.quick) {
727 if (pkts >= tp->ack.quick) {
728 tp->ack.quick = 0;
729 339
340 if (icsk->icsk_ack.quick) {
341 if (pkts >= icsk->icsk_ack.quick) {
342 icsk->icsk_ack.quick = 0;
730 /* Leaving quickack mode we deflate ATO. */ 343 /* Leaving quickack mode we deflate ATO. */
731 tp->ack.ato = TCP_ATO_MIN; 344 icsk->icsk_ack.ato = TCP_ATO_MIN;
732 } else 345 } else
733 tp->ack.quick -= pkts; 346 icsk->icsk_ack.quick -= pkts;
734 } 347 }
735} 348}
736 349
737extern void tcp_enter_quickack_mode(struct tcp_sock *tp); 350extern void tcp_enter_quickack_mode(struct sock *sk);
738
739static __inline__ void tcp_delack_init(struct tcp_sock *tp)
740{
741 memset(&tp->ack, 0, sizeof(tp->ack));
742}
743 351
744static inline void tcp_clear_options(struct tcp_options_received *rx_opt) 352static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
745{ 353{
@@ -755,10 +363,9 @@ enum tcp_tw_status
755}; 363};
756 364
757 365
758extern enum tcp_tw_status tcp_timewait_state_process(struct tcp_tw_bucket *tw, 366extern enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw,
759 struct sk_buff *skb, 367 struct sk_buff *skb,
760 struct tcphdr *th, 368 const struct tcphdr *th);
761 unsigned len);
762 369
763extern struct sock * tcp_check_req(struct sock *sk,struct sk_buff *skb, 370extern struct sock * tcp_check_req(struct sock *sk,struct sk_buff *skb,
764 struct request_sock *req, 371 struct request_sock *req,
@@ -773,7 +380,6 @@ extern void tcp_update_metrics(struct sock *sk);
773 380
774extern void tcp_close(struct sock *sk, 381extern void tcp_close(struct sock *sk,
775 long timeout); 382 long timeout);
776extern struct sock * tcp_accept(struct sock *sk, int flags, int *err);
777extern unsigned int tcp_poll(struct file * file, struct socket *sock, struct poll_table_struct *wait); 383extern unsigned int tcp_poll(struct file * file, struct socket *sock, struct poll_table_struct *wait);
778 384
779extern int tcp_getsockopt(struct sock *sk, int level, 385extern int tcp_getsockopt(struct sock *sk, int level,
@@ -789,8 +395,6 @@ extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk,
789 size_t len, int nonblock, 395 size_t len, int nonblock,
790 int flags, int *addr_len); 396 int flags, int *addr_len);
791 397
792extern int tcp_listen_start(struct sock *sk);
793
794extern void tcp_parse_options(struct sk_buff *skb, 398extern void tcp_parse_options(struct sk_buff *skb,
795 struct tcp_options_received *opt_rx, 399 struct tcp_options_received *opt_rx,
796 int estab); 400 int estab);
@@ -799,11 +403,6 @@ extern void tcp_parse_options(struct sk_buff *skb,
799 * TCP v4 functions exported for the inet6 API 403 * TCP v4 functions exported for the inet6 API
800 */ 404 */
801 405
802extern int tcp_v4_rebuild_header(struct sock *sk);
803
804extern int tcp_v4_build_header(struct sock *sk,
805 struct sk_buff *skb);
806
807extern void tcp_v4_send_check(struct sock *sk, 406extern void tcp_v4_send_check(struct sock *sk,
808 struct tcphdr *th, int len, 407 struct tcphdr *th, int len,
809 struct sk_buff *skb); 408 struct sk_buff *skb);
@@ -872,18 +471,15 @@ extern void tcp_cwnd_application_limited(struct sock *sk);
872 471
873/* tcp_timer.c */ 472/* tcp_timer.c */
874extern void tcp_init_xmit_timers(struct sock *); 473extern void tcp_init_xmit_timers(struct sock *);
875extern void tcp_clear_xmit_timers(struct sock *); 474static inline void tcp_clear_xmit_timers(struct sock *sk)
475{
476 inet_csk_clear_xmit_timers(sk);
477}
876 478
877extern void tcp_delete_keepalive_timer(struct sock *);
878extern void tcp_reset_keepalive_timer(struct sock *, unsigned long);
879extern unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu); 479extern unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu);
880extern unsigned int tcp_current_mss(struct sock *sk, int large); 480extern unsigned int tcp_current_mss(struct sock *sk, int large);
881 481
882#ifdef TCP_DEBUG 482/* tcp.c */
883extern const char tcp_timer_bug_msg[];
884#endif
885
886/* tcp_diag.c */
887extern void tcp_get_info(struct sock *, struct tcp_info *); 483extern void tcp_get_info(struct sock *, struct tcp_info *);
888 484
889/* Read 'sendfile()'-style from a TCP socket */ 485/* Read 'sendfile()'-style from a TCP socket */
@@ -892,72 +488,6 @@ typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *,
892extern int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, 488extern int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
893 sk_read_actor_t recv_actor); 489 sk_read_actor_t recv_actor);
894 490
895static inline void tcp_clear_xmit_timer(struct sock *sk, int what)
896{
897 struct tcp_sock *tp = tcp_sk(sk);
898
899 switch (what) {
900 case TCP_TIME_RETRANS:
901 case TCP_TIME_PROBE0:
902 tp->pending = 0;
903
904#ifdef TCP_CLEAR_TIMERS
905 sk_stop_timer(sk, &tp->retransmit_timer);
906#endif
907 break;
908 case TCP_TIME_DACK:
909 tp->ack.blocked = 0;
910 tp->ack.pending = 0;
911
912#ifdef TCP_CLEAR_TIMERS
913 sk_stop_timer(sk, &tp->delack_timer);
914#endif
915 break;
916 default:
917#ifdef TCP_DEBUG
918 printk(tcp_timer_bug_msg);
919#endif
920 return;
921 };
922
923}
924
925/*
926 * Reset the retransmission timer
927 */
928static inline void tcp_reset_xmit_timer(struct sock *sk, int what, unsigned long when)
929{
930 struct tcp_sock *tp = tcp_sk(sk);
931
932 if (when > TCP_RTO_MAX) {
933#ifdef TCP_DEBUG
934 printk(KERN_DEBUG "reset_xmit_timer sk=%p %d when=0x%lx, caller=%p\n", sk, what, when, current_text_addr());
935#endif
936 when = TCP_RTO_MAX;
937 }
938
939 switch (what) {
940 case TCP_TIME_RETRANS:
941 case TCP_TIME_PROBE0:
942 tp->pending = what;
943 tp->timeout = jiffies+when;
944 sk_reset_timer(sk, &tp->retransmit_timer, tp->timeout);
945 break;
946
947 case TCP_TIME_DACK:
948 tp->ack.pending |= TCP_ACK_TIMER;
949 tp->ack.timeout = jiffies+when;
950 sk_reset_timer(sk, &tp->delack_timer, tp->ack.timeout);
951 break;
952
953 default:
954#ifdef TCP_DEBUG
955 printk(tcp_timer_bug_msg);
956#endif
957 return;
958 };
959}
960
961/* Initialize RCV_MSS value. 491/* Initialize RCV_MSS value.
962 * RCV_MSS is an our guess about MSS used by the peer. 492 * RCV_MSS is an our guess about MSS used by the peer.
963 * We haven't any direct information about the MSS. 493 * We haven't any direct information about the MSS.
@@ -975,7 +505,7 @@ static inline void tcp_initialize_rcv_mss(struct sock *sk)
975 hint = min(hint, TCP_MIN_RCVMSS); 505 hint = min(hint, TCP_MIN_RCVMSS);
976 hint = max(hint, TCP_MIN_MSS); 506 hint = max(hint, TCP_MIN_MSS);
977 507
978 tp->ack.rcv_mss = hint; 508 inet_csk(sk)->icsk_ack.rcv_mss = hint;
979} 509}
980 510
981static __inline__ void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd) 511static __inline__ void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd)
@@ -1110,7 +640,8 @@ static inline void tcp_packets_out_inc(struct sock *sk,
1110 640
1111 tp->packets_out += tcp_skb_pcount(skb); 641 tp->packets_out += tcp_skb_pcount(skb);
1112 if (!orig) 642 if (!orig)
1113 tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); 643 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
644 inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
1114} 645}
1115 646
1116static inline void tcp_packets_out_dec(struct tcp_sock *tp, 647static inline void tcp_packets_out_dec(struct tcp_sock *tp,
@@ -1138,29 +669,29 @@ struct tcp_congestion_ops {
1138 struct list_head list; 669 struct list_head list;
1139 670
1140 /* initialize private data (optional) */ 671 /* initialize private data (optional) */
1141 void (*init)(struct tcp_sock *tp); 672 void (*init)(struct sock *sk);
1142 /* cleanup private data (optional) */ 673 /* cleanup private data (optional) */
1143 void (*release)(struct tcp_sock *tp); 674 void (*release)(struct sock *sk);
1144 675
1145 /* return slow start threshold (required) */ 676 /* return slow start threshold (required) */
1146 u32 (*ssthresh)(struct tcp_sock *tp); 677 u32 (*ssthresh)(struct sock *sk);
1147 /* lower bound for congestion window (optional) */ 678 /* lower bound for congestion window (optional) */
1148 u32 (*min_cwnd)(struct tcp_sock *tp); 679 u32 (*min_cwnd)(struct sock *sk);
1149 /* do new cwnd calculation (required) */ 680 /* do new cwnd calculation (required) */
1150 void (*cong_avoid)(struct tcp_sock *tp, u32 ack, 681 void (*cong_avoid)(struct sock *sk, u32 ack,
1151 u32 rtt, u32 in_flight, int good_ack); 682 u32 rtt, u32 in_flight, int good_ack);
1152 /* round trip time sample per acked packet (optional) */ 683 /* round trip time sample per acked packet (optional) */
1153 void (*rtt_sample)(struct tcp_sock *tp, u32 usrtt); 684 void (*rtt_sample)(struct sock *sk, u32 usrtt);
1154 /* call before changing ca_state (optional) */ 685 /* call before changing ca_state (optional) */
1155 void (*set_state)(struct tcp_sock *tp, u8 new_state); 686 void (*set_state)(struct sock *sk, u8 new_state);
1156 /* call when cwnd event occurs (optional) */ 687 /* call when cwnd event occurs (optional) */
1157 void (*cwnd_event)(struct tcp_sock *tp, enum tcp_ca_event ev); 688 void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev);
1158 /* new value of cwnd after loss (optional) */ 689 /* new value of cwnd after loss (optional) */
1159 u32 (*undo_cwnd)(struct tcp_sock *tp); 690 u32 (*undo_cwnd)(struct sock *sk);
1160 /* hook for packet ack accounting (optional) */ 691 /* hook for packet ack accounting (optional) */
1161 void (*pkts_acked)(struct tcp_sock *tp, u32 num_acked); 692 void (*pkts_acked)(struct sock *sk, u32 num_acked);
1162 /* get info for tcp_diag (optional) */ 693 /* get info for inet_diag (optional) */
1163 void (*get_info)(struct tcp_sock *tp, u32 ext, struct sk_buff *skb); 694 void (*get_info)(struct sock *sk, u32 ext, struct sk_buff *skb);
1164 695
1165 char name[TCP_CA_NAME_MAX]; 696 char name[TCP_CA_NAME_MAX];
1166 struct module *owner; 697 struct module *owner;
@@ -1169,30 +700,34 @@ struct tcp_congestion_ops {
1169extern int tcp_register_congestion_control(struct tcp_congestion_ops *type); 700extern int tcp_register_congestion_control(struct tcp_congestion_ops *type);
1170extern void tcp_unregister_congestion_control(struct tcp_congestion_ops *type); 701extern void tcp_unregister_congestion_control(struct tcp_congestion_ops *type);
1171 702
1172extern void tcp_init_congestion_control(struct tcp_sock *tp); 703extern void tcp_init_congestion_control(struct sock *sk);
1173extern void tcp_cleanup_congestion_control(struct tcp_sock *tp); 704extern void tcp_cleanup_congestion_control(struct sock *sk);
1174extern int tcp_set_default_congestion_control(const char *name); 705extern int tcp_set_default_congestion_control(const char *name);
1175extern void tcp_get_default_congestion_control(char *name); 706extern void tcp_get_default_congestion_control(char *name);
1176extern int tcp_set_congestion_control(struct tcp_sock *tp, const char *name); 707extern int tcp_set_congestion_control(struct sock *sk, const char *name);
1177 708
1178extern struct tcp_congestion_ops tcp_init_congestion_ops; 709extern struct tcp_congestion_ops tcp_init_congestion_ops;
1179extern u32 tcp_reno_ssthresh(struct tcp_sock *tp); 710extern u32 tcp_reno_ssthresh(struct sock *sk);
1180extern void tcp_reno_cong_avoid(struct tcp_sock *tp, u32 ack, 711extern void tcp_reno_cong_avoid(struct sock *sk, u32 ack,
1181 u32 rtt, u32 in_flight, int flag); 712 u32 rtt, u32 in_flight, int flag);
1182extern u32 tcp_reno_min_cwnd(struct tcp_sock *tp); 713extern u32 tcp_reno_min_cwnd(struct sock *sk);
1183extern struct tcp_congestion_ops tcp_reno; 714extern struct tcp_congestion_ops tcp_reno;
1184 715
1185static inline void tcp_set_ca_state(struct tcp_sock *tp, u8 ca_state) 716static inline void tcp_set_ca_state(struct sock *sk, const u8 ca_state)
1186{ 717{
1187 if (tp->ca_ops->set_state) 718 struct inet_connection_sock *icsk = inet_csk(sk);
1188 tp->ca_ops->set_state(tp, ca_state); 719
1189 tp->ca_state = ca_state; 720 if (icsk->icsk_ca_ops->set_state)
721 icsk->icsk_ca_ops->set_state(sk, ca_state);
722 icsk->icsk_ca_state = ca_state;
1190} 723}
1191 724
1192static inline void tcp_ca_event(struct tcp_sock *tp, enum tcp_ca_event event) 725static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event)
1193{ 726{
1194 if (tp->ca_ops->cwnd_event) 727 const struct inet_connection_sock *icsk = inet_csk(sk);
1195 tp->ca_ops->cwnd_event(tp, event); 728
729 if (icsk->icsk_ca_ops->cwnd_event)
730 icsk->icsk_ca_ops->cwnd_event(sk, event);
1196} 731}
1197 732
1198/* This determines how many packets are "in the network" to the best 733/* This determines how many packets are "in the network" to the best
@@ -1218,9 +753,10 @@ static __inline__ unsigned int tcp_packets_in_flight(const struct tcp_sock *tp)
1218 * The exception is rate halving phase, when cwnd is decreasing towards 753 * The exception is rate halving phase, when cwnd is decreasing towards
1219 * ssthresh. 754 * ssthresh.
1220 */ 755 */
1221static inline __u32 tcp_current_ssthresh(struct tcp_sock *tp) 756static inline __u32 tcp_current_ssthresh(const struct sock *sk)
1222{ 757{
1223 if ((1<<tp->ca_state)&(TCPF_CA_CWR|TCPF_CA_Recovery)) 758 const struct tcp_sock *tp = tcp_sk(sk);
759 if ((1 << inet_csk(sk)->icsk_ca_state) & (TCPF_CA_CWR | TCPF_CA_Recovery))
1224 return tp->snd_ssthresh; 760 return tp->snd_ssthresh;
1225 else 761 else
1226 return max(tp->snd_ssthresh, 762 return max(tp->snd_ssthresh,
@@ -1237,10 +773,13 @@ static inline void tcp_sync_left_out(struct tcp_sock *tp)
1237} 773}
1238 774
1239/* Set slow start threshold and cwnd not falling to slow start */ 775/* Set slow start threshold and cwnd not falling to slow start */
1240static inline void __tcp_enter_cwr(struct tcp_sock *tp) 776static inline void __tcp_enter_cwr(struct sock *sk)
1241{ 777{
778 const struct inet_connection_sock *icsk = inet_csk(sk);
779 struct tcp_sock *tp = tcp_sk(sk);
780
1242 tp->undo_marker = 0; 781 tp->undo_marker = 0;
1243 tp->snd_ssthresh = tp->ca_ops->ssthresh(tp); 782 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
1244 tp->snd_cwnd = min(tp->snd_cwnd, 783 tp->snd_cwnd = min(tp->snd_cwnd,
1245 tcp_packets_in_flight(tp) + 1U); 784 tcp_packets_in_flight(tp) + 1U);
1246 tp->snd_cwnd_cnt = 0; 785 tp->snd_cwnd_cnt = 0;
@@ -1249,12 +788,14 @@ static inline void __tcp_enter_cwr(struct tcp_sock *tp)
1249 TCP_ECN_queue_cwr(tp); 788 TCP_ECN_queue_cwr(tp);
1250} 789}
1251 790
1252static inline void tcp_enter_cwr(struct tcp_sock *tp) 791static inline void tcp_enter_cwr(struct sock *sk)
1253{ 792{
793 struct tcp_sock *tp = tcp_sk(sk);
794
1254 tp->prior_ssthresh = 0; 795 tp->prior_ssthresh = 0;
1255 if (tp->ca_state < TCP_CA_CWR) { 796 if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
1256 __tcp_enter_cwr(tp); 797 __tcp_enter_cwr(sk);
1257 tcp_set_ca_state(tp, TCP_CA_CWR); 798 tcp_set_ca_state(sk, TCP_CA_CWR);
1258 } 799 }
1259} 800}
1260 801
@@ -1277,8 +818,10 @@ static __inline__ void tcp_minshall_update(struct tcp_sock *tp, int mss,
1277 818
1278static __inline__ void tcp_check_probe_timer(struct sock *sk, struct tcp_sock *tp) 819static __inline__ void tcp_check_probe_timer(struct sock *sk, struct tcp_sock *tp)
1279{ 820{
1280 if (!tp->packets_out && !tp->pending) 821 const struct inet_connection_sock *icsk = inet_csk(sk);
1281 tcp_reset_xmit_timer(sk, TCP_TIME_PROBE0, tp->rto); 822 if (!tp->packets_out && !icsk->icsk_pending)
823 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
824 icsk->icsk_rto, TCP_RTO_MAX);
1282} 825}
1283 826
1284static __inline__ void tcp_push_pending_frames(struct sock *sk, 827static __inline__ void tcp_push_pending_frames(struct sock *sk,
@@ -1297,9 +840,6 @@ static __inline__ void tcp_update_wl(struct tcp_sock *tp, u32 ack, u32 seq)
1297 tp->snd_wl1 = seq; 840 tp->snd_wl1 = seq;
1298} 841}
1299 842
1300extern void tcp_destroy_sock(struct sock *sk);
1301
1302
1303/* 843/*
1304 * Calculate(/check) TCP checksum 844 * Calculate(/check) TCP checksum
1305 */ 845 */
@@ -1359,8 +899,10 @@ static __inline__ int tcp_prequeue(struct sock *sk, struct sk_buff *skb)
1359 tp->ucopy.memory = 0; 899 tp->ucopy.memory = 0;
1360 } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) { 900 } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
1361 wake_up_interruptible(sk->sk_sleep); 901 wake_up_interruptible(sk->sk_sleep);
1362 if (!tcp_ack_scheduled(tp)) 902 if (!inet_csk_ack_scheduled(sk))
1363 tcp_reset_xmit_timer(sk, TCP_TIME_DACK, (3*TCP_RTO_MIN)/4); 903 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
904 (3 * TCP_RTO_MIN) / 4,
905 TCP_RTO_MAX);
1364 } 906 }
1365 return 1; 907 return 1;
1366 } 908 }
@@ -1393,9 +935,9 @@ static __inline__ void tcp_set_state(struct sock *sk, int state)
1393 TCP_INC_STATS(TCP_MIB_ESTABRESETS); 935 TCP_INC_STATS(TCP_MIB_ESTABRESETS);
1394 936
1395 sk->sk_prot->unhash(sk); 937 sk->sk_prot->unhash(sk);
1396 if (tcp_sk(sk)->bind_hash && 938 if (inet_csk(sk)->icsk_bind_hash &&
1397 !(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) 939 !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
1398 tcp_put_port(sk); 940 inet_put_port(&tcp_hashinfo, sk);
1399 /* fall through */ 941 /* fall through */
1400 default: 942 default:
1401 if (oldstate==TCP_ESTABLISHED) 943 if (oldstate==TCP_ESTABLISHED)
@@ -1422,7 +964,7 @@ static __inline__ void tcp_done(struct sock *sk)
1422 if (!sock_flag(sk, SOCK_DEAD)) 964 if (!sock_flag(sk, SOCK_DEAD))
1423 sk->sk_state_change(sk); 965 sk->sk_state_change(sk);
1424 else 966 else
1425 tcp_destroy_sock(sk); 967 inet_csk_destroy_sock(sk);
1426} 968}
1427 969
1428static __inline__ void tcp_sack_reset(struct tcp_options_received *rx_opt) 970static __inline__ void tcp_sack_reset(struct tcp_options_received *rx_opt)
@@ -1524,54 +1066,6 @@ static inline int tcp_full_space(const struct sock *sk)
1524 return tcp_win_from_space(sk->sk_rcvbuf); 1066 return tcp_win_from_space(sk->sk_rcvbuf);
1525} 1067}
1526 1068
1527static inline void tcp_acceptq_queue(struct sock *sk, struct request_sock *req,
1528 struct sock *child)
1529{
1530 reqsk_queue_add(&tcp_sk(sk)->accept_queue, req, sk, child);
1531}
1532
1533static inline void
1534tcp_synq_removed(struct sock *sk, struct request_sock *req)
1535{
1536 if (reqsk_queue_removed(&tcp_sk(sk)->accept_queue, req) == 0)
1537 tcp_delete_keepalive_timer(sk);
1538}
1539
1540static inline void tcp_synq_added(struct sock *sk)
1541{
1542 if (reqsk_queue_added(&tcp_sk(sk)->accept_queue) == 0)
1543 tcp_reset_keepalive_timer(sk, TCP_TIMEOUT_INIT);
1544}
1545
1546static inline int tcp_synq_len(struct sock *sk)
1547{
1548 return reqsk_queue_len(&tcp_sk(sk)->accept_queue);
1549}
1550
1551static inline int tcp_synq_young(struct sock *sk)
1552{
1553 return reqsk_queue_len_young(&tcp_sk(sk)->accept_queue);
1554}
1555
1556static inline int tcp_synq_is_full(struct sock *sk)
1557{
1558 return reqsk_queue_is_full(&tcp_sk(sk)->accept_queue);
1559}
1560
1561static inline void tcp_synq_unlink(struct tcp_sock *tp, struct request_sock *req,
1562 struct request_sock **prev)
1563{
1564 reqsk_queue_unlink(&tp->accept_queue, req, prev);
1565}
1566
1567static inline void tcp_synq_drop(struct sock *sk, struct request_sock *req,
1568 struct request_sock **prev)
1569{
1570 tcp_synq_unlink(tcp_sk(sk), req, prev);
1571 tcp_synq_removed(sk, req);
1572 reqsk_free(req);
1573}
1574
1575static __inline__ void tcp_openreq_init(struct request_sock *req, 1069static __inline__ void tcp_openreq_init(struct request_sock *req,
1576 struct tcp_options_received *rx_opt, 1070 struct tcp_options_received *rx_opt,
1577 struct sk_buff *skb) 1071 struct sk_buff *skb)
@@ -1593,27 +1087,6 @@ static __inline__ void tcp_openreq_init(struct request_sock *req,
1593 1087
1594extern void tcp_enter_memory_pressure(void); 1088extern void tcp_enter_memory_pressure(void);
1595 1089
1596extern void tcp_listen_wlock(void);
1597
1598/* - We may sleep inside this lock.
1599 * - If sleeping is not required (or called from BH),
1600 * use plain read_(un)lock(&tcp_lhash_lock).
1601 */
1602
1603static inline void tcp_listen_lock(void)
1604{
1605 /* read_lock synchronizes to candidates to writers */
1606 read_lock(&tcp_lhash_lock);
1607 atomic_inc(&tcp_lhash_users);
1608 read_unlock(&tcp_lhash_lock);
1609}
1610
1611static inline void tcp_listen_unlock(void)
1612{
1613 if (atomic_dec_and_test(&tcp_lhash_users))
1614 wake_up(&tcp_lhash_wait);
1615}
1616
1617static inline int keepalive_intvl_when(const struct tcp_sock *tp) 1090static inline int keepalive_intvl_when(const struct tcp_sock *tp)
1618{ 1091{
1619 return tp->keepalive_intvl ? : sysctl_tcp_keepalive_intvl; 1092 return tp->keepalive_intvl ? : sysctl_tcp_keepalive_intvl;
@@ -1624,12 +1097,13 @@ static inline int keepalive_time_when(const struct tcp_sock *tp)
1624 return tp->keepalive_time ? : sysctl_tcp_keepalive_time; 1097 return tp->keepalive_time ? : sysctl_tcp_keepalive_time;
1625} 1098}
1626 1099
1627static inline int tcp_fin_time(const struct tcp_sock *tp) 1100static inline int tcp_fin_time(const struct sock *sk)
1628{ 1101{
1629 int fin_timeout = tp->linger2 ? : sysctl_tcp_fin_timeout; 1102 int fin_timeout = tcp_sk(sk)->linger2 ? : sysctl_tcp_fin_timeout;
1103 const int rto = inet_csk(sk)->icsk_rto;
1630 1104
1631 if (fin_timeout < (tp->rto<<2) - (tp->rto>>1)) 1105 if (fin_timeout < (rto << 2) - (rto >> 1))
1632 fin_timeout = (tp->rto<<2) - (tp->rto>>1); 1106 fin_timeout = (rto << 2) - (rto >> 1);
1633 1107
1634 return fin_timeout; 1108 return fin_timeout;
1635} 1109}
@@ -1658,15 +1132,6 @@ static inline int tcp_paws_check(const struct tcp_options_received *rx_opt, int
1658 return 1; 1132 return 1;
1659} 1133}
1660 1134
1661static inline void tcp_v4_setup_caps(struct sock *sk, struct dst_entry *dst)
1662{
1663 sk->sk_route_caps = dst->dev->features;
1664 if (sk->sk_route_caps & NETIF_F_TSO) {
1665 if (sock_flag(sk, SOCK_NO_LARGESEND) || dst->header_len)
1666 sk->sk_route_caps &= ~NETIF_F_TSO;
1667 }
1668}
1669
1670#define TCP_CHECK_TIMER(sk) do { } while (0) 1135#define TCP_CHECK_TIMER(sk) do { } while (0)
1671 1136
1672static inline int tcp_use_frto(const struct sock *sk) 1137static inline int tcp_use_frto(const struct sock *sk)
@@ -1718,4 +1183,16 @@ struct tcp_iter_state {
1718extern int tcp_proc_register(struct tcp_seq_afinfo *afinfo); 1183extern int tcp_proc_register(struct tcp_seq_afinfo *afinfo);
1719extern void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo); 1184extern void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo);
1720 1185
1186extern struct request_sock_ops tcp_request_sock_ops;
1187
1188extern int tcp_v4_destroy_sock(struct sock *sk);
1189
1190#ifdef CONFIG_PROC_FS
1191extern int tcp4_proc_init(void);
1192extern void tcp4_proc_exit(void);
1193#endif
1194
1195extern void tcp_v4_init(struct net_proto_family *ops);
1196extern void tcp_init(void);
1197
1721#endif /* _TCP_H */ 1198#endif /* _TCP_H */
diff --git a/include/net/tcp_ecn.h b/include/net/tcp_ecn.h
index 64980ee8c92a..c6b84397448d 100644
--- a/include/net/tcp_ecn.h
+++ b/include/net/tcp_ecn.h
@@ -88,7 +88,7 @@ static inline void TCP_ECN_check_ce(struct tcp_sock *tp, struct sk_buff *skb)
88 * it is surely retransmit. It is not in ECN RFC, 88 * it is surely retransmit. It is not in ECN RFC,
89 * but Linux follows this rule. */ 89 * but Linux follows this rule. */
90 else if (INET_ECN_is_not_ect((TCP_SKB_CB(skb)->flags))) 90 else if (INET_ECN_is_not_ect((TCP_SKB_CB(skb)->flags)))
91 tcp_enter_quickack_mode(tp); 91 tcp_enter_quickack_mode((struct sock *)tp);
92 } 92 }
93} 93}
94 94
diff --git a/include/net/tcp_states.h b/include/net/tcp_states.h
new file mode 100644
index 000000000000..b9d4176b2d15
--- /dev/null
+++ b/include/net/tcp_states.h
@@ -0,0 +1,34 @@
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Definitions for the TCP protocol sk_state field.
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13#ifndef _LINUX_TCP_STATES_H
14#define _LINUX_TCP_STATES_H
15
16enum {
17 TCP_ESTABLISHED = 1,
18 TCP_SYN_SENT,
19 TCP_SYN_RECV,
20 TCP_FIN_WAIT1,
21 TCP_FIN_WAIT2,
22 TCP_TIME_WAIT,
23 TCP_CLOSE,
24 TCP_CLOSE_WAIT,
25 TCP_LAST_ACK,
26 TCP_LISTEN,
27 TCP_CLOSING, /* Now a valid state */
28
29 TCP_MAX_STATES /* Leave at the end! */
30};
31
32#define TCP_STATE_MASK 0xF
33
34#endif /* _LINUX_TCP_STATES_H */
diff --git a/include/net/udp.h b/include/net/udp.h
index ac229b761dbc..107b9d791a1f 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -94,6 +94,11 @@ struct udp_iter_state {
94 struct seq_operations seq_ops; 94 struct seq_operations seq_ops;
95}; 95};
96 96
97#ifdef CONFIG_PROC_FS
97extern int udp_proc_register(struct udp_seq_afinfo *afinfo); 98extern int udp_proc_register(struct udp_seq_afinfo *afinfo);
98extern void udp_proc_unregister(struct udp_seq_afinfo *afinfo); 99extern void udp_proc_unregister(struct udp_seq_afinfo *afinfo);
100
101extern int udp4_proc_init(void);
102extern void udp4_proc_exit(void);
103#endif
99#endif /* _UDP_H */ 104#endif /* _UDP_H */
diff --git a/include/net/x25.h b/include/net/x25.h
index 8b39b98876e8..fee62ff8c194 100644
--- a/include/net/x25.h
+++ b/include/net/x25.h
@@ -175,7 +175,7 @@ extern void x25_kill_by_neigh(struct x25_neigh *);
175 175
176/* x25_dev.c */ 176/* x25_dev.c */
177extern void x25_send_frame(struct sk_buff *, struct x25_neigh *); 177extern void x25_send_frame(struct sk_buff *, struct x25_neigh *);
178extern int x25_lapb_receive_frame(struct sk_buff *, struct net_device *, struct packet_type *); 178extern int x25_lapb_receive_frame(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *);
179extern void x25_establish_link(struct x25_neigh *); 179extern void x25_establish_link(struct x25_neigh *);
180extern void x25_terminate_link(struct x25_neigh *); 180extern void x25_terminate_link(struct x25_neigh *);
181 181
diff --git a/include/net/x25device.h b/include/net/x25device.h
index d45ae883bd1d..1a318374faef 100644
--- a/include/net/x25device.h
+++ b/include/net/x25device.h
@@ -8,7 +8,6 @@
8static inline __be16 x25_type_trans(struct sk_buff *skb, struct net_device *dev) 8static inline __be16 x25_type_trans(struct sk_buff *skb, struct net_device *dev)
9{ 9{
10 skb->mac.raw = skb->data; 10 skb->mac.raw = skb->data;
11 skb->input_dev = skb->dev = dev;
12 skb->pkt_type = PACKET_HOST; 11 skb->pkt_type = PACKET_HOST;
13 12
14 return htons(ETH_P_X25); 13 return htons(ETH_P_X25);
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 868ef88ef971..a9d0d8c5dfbf 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -818,7 +818,6 @@ extern void xfrm6_init(void);
818extern void xfrm6_fini(void); 818extern void xfrm6_fini(void);
819extern void xfrm_state_init(void); 819extern void xfrm_state_init(void);
820extern void xfrm4_state_init(void); 820extern void xfrm4_state_init(void);
821extern void xfrm4_state_fini(void);
822extern void xfrm6_state_init(void); 821extern void xfrm6_state_init(void);
823extern void xfrm6_state_fini(void); 822extern void xfrm6_state_fini(void);
824 823
diff --git a/include/rdma/ib_cache.h b/include/rdma/ib_cache.h
new file mode 100644
index 000000000000..5bf9834f7dca
--- /dev/null
+++ b/include/rdma/ib_cache.h
@@ -0,0 +1,105 @@
1/*
2 * Copyright (c) 2004 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Intel Corporation. All rights reserved.
4 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
5 *
6 * This software is available to you under a choice of one of two
7 * licenses. You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * OpenIB.org BSD license below:
11 *
12 * Redistribution and use in source and binary forms, with or
13 * without modification, are permitted provided that the following
14 * conditions are met:
15 *
16 * - Redistributions of source code must retain the above
17 * copyright notice, this list of conditions and the following
18 * disclaimer.
19 *
20 * - Redistributions in binary form must reproduce the above
21 * copyright notice, this list of conditions and the following
22 * disclaimer in the documentation and/or other materials
23 * provided with the distribution.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE.
33 *
34 * $Id: ib_cache.h 1349 2004-12-16 21:09:43Z roland $
35 */
36
37#ifndef _IB_CACHE_H
38#define _IB_CACHE_H
39
40#include <rdma/ib_verbs.h>
41
42/**
43 * ib_get_cached_gid - Returns a cached GID table entry
44 * @device: The device to query.
45 * @port_num: The port number of the device to query.
46 * @index: The index into the cached GID table to query.
47 * @gid: The GID value found at the specified index.
48 *
49 * ib_get_cached_gid() fetches the specified GID table entry stored in
50 * the local software cache.
51 */
52int ib_get_cached_gid(struct ib_device *device,
53 u8 port_num,
54 int index,
55 union ib_gid *gid);
56
57/**
58 * ib_find_cached_gid - Returns the port number and GID table index where
59 * a specified GID value occurs.
60 * @device: The device to query.
61 * @gid: The GID value to search for.
62 * @port_num: The port number of the device where the GID value was found.
63 * @index: The index into the cached GID table where the GID was found. This
64 * parameter may be NULL.
65 *
66 * ib_find_cached_gid() searches for the specified GID value in
67 * the local software cache.
68 */
69int ib_find_cached_gid(struct ib_device *device,
70 union ib_gid *gid,
71 u8 *port_num,
72 u16 *index);
73
74/**
75 * ib_get_cached_pkey - Returns a cached PKey table entry
76 * @device: The device to query.
77 * @port_num: The port number of the device to query.
78 * @index: The index into the cached PKey table to query.
79 * @pkey: The PKey value found at the specified index.
80 *
81 * ib_get_cached_pkey() fetches the specified PKey table entry stored in
82 * the local software cache.
83 */
84int ib_get_cached_pkey(struct ib_device *device_handle,
85 u8 port_num,
86 int index,
87 u16 *pkey);
88
89/**
90 * ib_find_cached_pkey - Returns the PKey table index where a specified
91 * PKey value occurs.
92 * @device: The device to query.
93 * @port_num: The port number of the device to search for the PKey.
94 * @pkey: The PKey value to search for.
95 * @index: The index into the cached PKey table where the PKey was found.
96 *
97 * ib_find_cached_pkey() searches the specified PKey table in
98 * the local software cache.
99 */
100int ib_find_cached_pkey(struct ib_device *device,
101 u8 port_num,
102 u16 pkey,
103 u16 *index);
104
105#endif /* _IB_CACHE_H */
diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h
new file mode 100644
index 000000000000..77fe9039209b
--- /dev/null
+++ b/include/rdma/ib_cm.h
@@ -0,0 +1,568 @@
1/*
2 * Copyright (c) 2004 Intel Corporation. All rights reserved.
3 * Copyright (c) 2004 Topspin Corporation. All rights reserved.
4 * Copyright (c) 2004 Voltaire Corporation. All rights reserved.
5 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
6 *
7 * This software is available to you under a choice of one of two
8 * licenses. You may choose to be licensed under the terms of the GNU
9 * General Public License (GPL) Version 2, available from the file
10 * COPYING in the main directory of this source tree, or the
11 * OpenIB.org BSD license below:
12 *
13 * Redistribution and use in source and binary forms, with or
14 * without modification, are permitted provided that the following
15 * conditions are met:
16 *
17 * - Redistributions of source code must retain the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer.
20 *
21 * - Redistributions in binary form must reproduce the above
22 * copyright notice, this list of conditions and the following
23 * disclaimer in the documentation and/or other materials
24 * provided with the distribution.
25 *
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * SOFTWARE.
34 *
35 * $Id: ib_cm.h 2730 2005-06-28 16:43:03Z sean.hefty $
36 */
37#if !defined(IB_CM_H)
38#define IB_CM_H
39
40#include <rdma/ib_mad.h>
41#include <rdma/ib_sa.h>
42
43enum ib_cm_state {
44 IB_CM_IDLE,
45 IB_CM_LISTEN,
46 IB_CM_REQ_SENT,
47 IB_CM_REQ_RCVD,
48 IB_CM_MRA_REQ_SENT,
49 IB_CM_MRA_REQ_RCVD,
50 IB_CM_REP_SENT,
51 IB_CM_REP_RCVD,
52 IB_CM_MRA_REP_SENT,
53 IB_CM_MRA_REP_RCVD,
54 IB_CM_ESTABLISHED,
55 IB_CM_DREQ_SENT,
56 IB_CM_DREQ_RCVD,
57 IB_CM_TIMEWAIT,
58 IB_CM_SIDR_REQ_SENT,
59 IB_CM_SIDR_REQ_RCVD
60};
61
62enum ib_cm_lap_state {
63 IB_CM_LAP_IDLE,
64 IB_CM_LAP_SENT,
65 IB_CM_LAP_RCVD,
66 IB_CM_MRA_LAP_SENT,
67 IB_CM_MRA_LAP_RCVD,
68};
69
70enum ib_cm_event_type {
71 IB_CM_REQ_ERROR,
72 IB_CM_REQ_RECEIVED,
73 IB_CM_REP_ERROR,
74 IB_CM_REP_RECEIVED,
75 IB_CM_RTU_RECEIVED,
76 IB_CM_USER_ESTABLISHED,
77 IB_CM_DREQ_ERROR,
78 IB_CM_DREQ_RECEIVED,
79 IB_CM_DREP_RECEIVED,
80 IB_CM_TIMEWAIT_EXIT,
81 IB_CM_MRA_RECEIVED,
82 IB_CM_REJ_RECEIVED,
83 IB_CM_LAP_ERROR,
84 IB_CM_LAP_RECEIVED,
85 IB_CM_APR_RECEIVED,
86 IB_CM_SIDR_REQ_ERROR,
87 IB_CM_SIDR_REQ_RECEIVED,
88 IB_CM_SIDR_REP_RECEIVED
89};
90
91enum ib_cm_data_size {
92 IB_CM_REQ_PRIVATE_DATA_SIZE = 92,
93 IB_CM_MRA_PRIVATE_DATA_SIZE = 222,
94 IB_CM_REJ_PRIVATE_DATA_SIZE = 148,
95 IB_CM_REP_PRIVATE_DATA_SIZE = 196,
96 IB_CM_RTU_PRIVATE_DATA_SIZE = 224,
97 IB_CM_DREQ_PRIVATE_DATA_SIZE = 220,
98 IB_CM_DREP_PRIVATE_DATA_SIZE = 224,
99 IB_CM_REJ_ARI_LENGTH = 72,
100 IB_CM_LAP_PRIVATE_DATA_SIZE = 168,
101 IB_CM_APR_PRIVATE_DATA_SIZE = 148,
102 IB_CM_APR_INFO_LENGTH = 72,
103 IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE = 216,
104 IB_CM_SIDR_REP_PRIVATE_DATA_SIZE = 136,
105 IB_CM_SIDR_REP_INFO_LENGTH = 72
106};
107
108struct ib_cm_id;
109
110struct ib_cm_req_event_param {
111 struct ib_cm_id *listen_id;
112 struct ib_device *device;
113 u8 port;
114
115 struct ib_sa_path_rec *primary_path;
116 struct ib_sa_path_rec *alternate_path;
117
118 __be64 remote_ca_guid;
119 u32 remote_qkey;
120 u32 remote_qpn;
121 enum ib_qp_type qp_type;
122
123 u32 starting_psn;
124 u8 responder_resources;
125 u8 initiator_depth;
126 unsigned int local_cm_response_timeout:5;
127 unsigned int flow_control:1;
128 unsigned int remote_cm_response_timeout:5;
129 unsigned int retry_count:3;
130 unsigned int rnr_retry_count:3;
131 unsigned int srq:1;
132};
133
134struct ib_cm_rep_event_param {
135 __be64 remote_ca_guid;
136 u32 remote_qkey;
137 u32 remote_qpn;
138 u32 starting_psn;
139 u8 responder_resources;
140 u8 initiator_depth;
141 unsigned int target_ack_delay:5;
142 unsigned int failover_accepted:2;
143 unsigned int flow_control:1;
144 unsigned int rnr_retry_count:3;
145 unsigned int srq:1;
146};
147
148enum ib_cm_rej_reason {
149 IB_CM_REJ_NO_QP = 1,
150 IB_CM_REJ_NO_EEC = 2,
151 IB_CM_REJ_NO_RESOURCES = 3,
152 IB_CM_REJ_TIMEOUT = 4,
153 IB_CM_REJ_UNSUPPORTED = 5,
154 IB_CM_REJ_INVALID_COMM_ID = 6,
155 IB_CM_REJ_INVALID_COMM_INSTANCE = 7,
156 IB_CM_REJ_INVALID_SERVICE_ID = 8,
157 IB_CM_REJ_INVALID_TRANSPORT_TYPE = 9,
158 IB_CM_REJ_STALE_CONN = 10,
159 IB_CM_REJ_RDC_NOT_EXIST = 11,
160 IB_CM_REJ_INVALID_GID = 12,
161 IB_CM_REJ_INVALID_LID = 13,
162 IB_CM_REJ_INVALID_SL = 14,
163 IB_CM_REJ_INVALID_TRAFFIC_CLASS = 15,
164 IB_CM_REJ_INVALID_HOP_LIMIT = 16,
165 IB_CM_REJ_INVALID_PACKET_RATE = 17,
166 IB_CM_REJ_INVALID_ALT_GID = 18,
167 IB_CM_REJ_INVALID_ALT_LID = 19,
168 IB_CM_REJ_INVALID_ALT_SL = 20,
169 IB_CM_REJ_INVALID_ALT_TRAFFIC_CLASS = 21,
170 IB_CM_REJ_INVALID_ALT_HOP_LIMIT = 22,
171 IB_CM_REJ_INVALID_ALT_PACKET_RATE = 23,
172 IB_CM_REJ_PORT_CM_REDIRECT = 24,
173 IB_CM_REJ_PORT_REDIRECT = 25,
174 IB_CM_REJ_INVALID_MTU = 26,
175 IB_CM_REJ_INSUFFICIENT_RESP_RESOURCES = 27,
176 IB_CM_REJ_CONSUMER_DEFINED = 28,
177 IB_CM_REJ_INVALID_RNR_RETRY = 29,
178 IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID = 30,
179 IB_CM_REJ_INVALID_CLASS_VERSION = 31,
180 IB_CM_REJ_INVALID_FLOW_LABEL = 32,
181 IB_CM_REJ_INVALID_ALT_FLOW_LABEL = 33
182};
183
184struct ib_cm_rej_event_param {
185 enum ib_cm_rej_reason reason;
186 void *ari;
187 u8 ari_length;
188};
189
190struct ib_cm_mra_event_param {
191 u8 service_timeout;
192};
193
194struct ib_cm_lap_event_param {
195 struct ib_sa_path_rec *alternate_path;
196};
197
198enum ib_cm_apr_status {
199 IB_CM_APR_SUCCESS,
200 IB_CM_APR_INVALID_COMM_ID,
201 IB_CM_APR_UNSUPPORTED,
202 IB_CM_APR_REJECT,
203 IB_CM_APR_REDIRECT,
204 IB_CM_APR_IS_CURRENT,
205 IB_CM_APR_INVALID_QPN_EECN,
206 IB_CM_APR_INVALID_LID,
207 IB_CM_APR_INVALID_GID,
208 IB_CM_APR_INVALID_FLOW_LABEL,
209 IB_CM_APR_INVALID_TCLASS,
210 IB_CM_APR_INVALID_HOP_LIMIT,
211 IB_CM_APR_INVALID_PACKET_RATE,
212 IB_CM_APR_INVALID_SL
213};
214
215struct ib_cm_apr_event_param {
216 enum ib_cm_apr_status ap_status;
217 void *apr_info;
218 u8 info_len;
219};
220
221struct ib_cm_sidr_req_event_param {
222 struct ib_cm_id *listen_id;
223 struct ib_device *device;
224 u8 port;
225 u16 pkey;
226};
227
228enum ib_cm_sidr_status {
229 IB_SIDR_SUCCESS,
230 IB_SIDR_UNSUPPORTED,
231 IB_SIDR_REJECT,
232 IB_SIDR_NO_QP,
233 IB_SIDR_REDIRECT,
234 IB_SIDR_UNSUPPORTED_VERSION
235};
236
237struct ib_cm_sidr_rep_event_param {
238 enum ib_cm_sidr_status status;
239 u32 qkey;
240 u32 qpn;
241 void *info;
242 u8 info_len;
243
244};
245
246struct ib_cm_event {
247 enum ib_cm_event_type event;
248 union {
249 struct ib_cm_req_event_param req_rcvd;
250 struct ib_cm_rep_event_param rep_rcvd;
251 /* No data for RTU received events. */
252 struct ib_cm_rej_event_param rej_rcvd;
253 struct ib_cm_mra_event_param mra_rcvd;
254 struct ib_cm_lap_event_param lap_rcvd;
255 struct ib_cm_apr_event_param apr_rcvd;
256 /* No data for DREQ/DREP received events. */
257 struct ib_cm_sidr_req_event_param sidr_req_rcvd;
258 struct ib_cm_sidr_rep_event_param sidr_rep_rcvd;
259 enum ib_wc_status send_status;
260 } param;
261
262 void *private_data;
263};
264
265/**
266 * ib_cm_handler - User-defined callback to process communication events.
267 * @cm_id: Communication identifier associated with the reported event.
268 * @event: Information about the communication event.
269 *
270 * IB_CM_REQ_RECEIVED and IB_CM_SIDR_REQ_RECEIVED communication events
271 * generated as a result of listen requests result in the allocation of a
272 * new @cm_id. The new @cm_id is returned to the user through this callback.
273 * Clients are responsible for destroying the new @cm_id. For peer-to-peer
274 * IB_CM_REQ_RECEIVED and all other events, the returned @cm_id corresponds
275 * to a user's existing communication identifier.
276 *
277 * Users may not call ib_destroy_cm_id while in the context of this callback;
278 * however, returning a non-zero value instructs the communication manager to
279 * destroy the @cm_id after the callback completes.
280 */
281typedef int (*ib_cm_handler)(struct ib_cm_id *cm_id,
282 struct ib_cm_event *event);
283
284struct ib_cm_id {
285 ib_cm_handler cm_handler;
286 void *context;
287 __be64 service_id;
288 __be64 service_mask;
289 enum ib_cm_state state; /* internal CM/debug use */
290 enum ib_cm_lap_state lap_state; /* internal CM/debug use */
291 __be32 local_id;
292 __be32 remote_id;
293};
294
295/**
296 * ib_create_cm_id - Allocate a communication identifier.
297 * @cm_handler: Callback invoked to notify the user of CM events.
298 * @context: User specified context associated with the communication
299 * identifier.
300 *
301 * Communication identifiers are used to track connection states, service
302 * ID resolution requests, and listen requests.
303 */
304struct ib_cm_id *ib_create_cm_id(ib_cm_handler cm_handler,
305 void *context);
306
307/**
308 * ib_destroy_cm_id - Destroy a connection identifier.
309 * @cm_id: Connection identifier to destroy.
310 *
311 * This call blocks until the connection identifier is destroyed.
312 */
313void ib_destroy_cm_id(struct ib_cm_id *cm_id);
314
315#define IB_SERVICE_ID_AGN_MASK __constant_cpu_to_be64(0xFF00000000000000ULL)
316#define IB_CM_ASSIGN_SERVICE_ID __constant_cpu_to_be64(0x0200000000000000ULL)
317
318/**
319 * ib_cm_listen - Initiates listening on the specified service ID for
320 * connection and service ID resolution requests.
321 * @cm_id: Connection identifier associated with the listen request.
322 * @service_id: Service identifier matched against incoming connection
323 * and service ID resolution requests. The service ID should be specified
324 * network-byte order. If set to IB_CM_ASSIGN_SERVICE_ID, the CM will
325 * assign a service ID to the caller.
326 * @service_mask: Mask applied to service ID used to listen across a
327 * range of service IDs. If set to 0, the service ID is matched
328 * exactly. This parameter is ignored if %service_id is set to
329 * IB_CM_ASSIGN_SERVICE_ID.
330 */
331int ib_cm_listen(struct ib_cm_id *cm_id,
332 __be64 service_id,
333 __be64 service_mask);
334
335struct ib_cm_req_param {
336 struct ib_sa_path_rec *primary_path;
337 struct ib_sa_path_rec *alternate_path;
338 __be64 service_id;
339 u32 qp_num;
340 enum ib_qp_type qp_type;
341 u32 starting_psn;
342 const void *private_data;
343 u8 private_data_len;
344 u8 peer_to_peer;
345 u8 responder_resources;
346 u8 initiator_depth;
347 u8 remote_cm_response_timeout;
348 u8 flow_control;
349 u8 local_cm_response_timeout;
350 u8 retry_count;
351 u8 rnr_retry_count;
352 u8 max_cm_retries;
353 u8 srq;
354};
355
356/**
357 * ib_send_cm_req - Sends a connection request to the remote node.
358 * @cm_id: Connection identifier that will be associated with the
359 * connection request.
360 * @param: Connection request information needed to establish the
361 * connection.
362 */
363int ib_send_cm_req(struct ib_cm_id *cm_id,
364 struct ib_cm_req_param *param);
365
366struct ib_cm_rep_param {
367 u32 qp_num;
368 u32 starting_psn;
369 const void *private_data;
370 u8 private_data_len;
371 u8 responder_resources;
372 u8 initiator_depth;
373 u8 target_ack_delay;
374 u8 failover_accepted;
375 u8 flow_control;
376 u8 rnr_retry_count;
377 u8 srq;
378};
379
380/**
381 * ib_send_cm_rep - Sends a connection reply in response to a connection
382 * request.
383 * @cm_id: Connection identifier that will be associated with the
384 * connection request.
385 * @param: Connection reply information needed to establish the
386 * connection.
387 */
388int ib_send_cm_rep(struct ib_cm_id *cm_id,
389 struct ib_cm_rep_param *param);
390
391/**
392 * ib_send_cm_rtu - Sends a connection ready to use message in response
393 * to a connection reply message.
394 * @cm_id: Connection identifier associated with the connection request.
395 * @private_data: Optional user-defined private data sent with the
396 * ready to use message.
397 * @private_data_len: Size of the private data buffer, in bytes.
398 */
399int ib_send_cm_rtu(struct ib_cm_id *cm_id,
400 const void *private_data,
401 u8 private_data_len);
402
403/**
404 * ib_send_cm_dreq - Sends a disconnection request for an existing
405 * connection.
406 * @cm_id: Connection identifier associated with the connection being
407 * released.
408 * @private_data: Optional user-defined private data sent with the
409 * disconnection request message.
410 * @private_data_len: Size of the private data buffer, in bytes.
411 */
412int ib_send_cm_dreq(struct ib_cm_id *cm_id,
413 const void *private_data,
414 u8 private_data_len);
415
416/**
417 * ib_send_cm_drep - Sends a disconnection reply to a disconnection request.
418 * @cm_id: Connection identifier associated with the connection being
419 * released.
420 * @private_data: Optional user-defined private data sent with the
421 * disconnection reply message.
422 * @private_data_len: Size of the private data buffer, in bytes.
423 *
424 * If the cm_id is in the correct state, the CM will transition the connection
425 * to the timewait state, even if an error occurs sending the DREP message.
426 */
427int ib_send_cm_drep(struct ib_cm_id *cm_id,
428 const void *private_data,
429 u8 private_data_len);
430
431/**
432 * ib_cm_establish - Forces a connection state to established.
433 * @cm_id: Connection identifier to transition to established.
434 *
435 * This routine should be invoked by users who receive messages on a
436 * connected QP before an RTU has been received.
437 */
438int ib_cm_establish(struct ib_cm_id *cm_id);
439
440/**
441 * ib_send_cm_rej - Sends a connection rejection message to the
442 * remote node.
443 * @cm_id: Connection identifier associated with the connection being
444 * rejected.
445 * @reason: Reason for the connection request rejection.
446 * @ari: Optional additional rejection information.
447 * @ari_length: Size of the additional rejection information, in bytes.
448 * @private_data: Optional user-defined private data sent with the
449 * rejection message.
450 * @private_data_len: Size of the private data buffer, in bytes.
451 */
452int ib_send_cm_rej(struct ib_cm_id *cm_id,
453 enum ib_cm_rej_reason reason,
454 void *ari,
455 u8 ari_length,
456 const void *private_data,
457 u8 private_data_len);
458
459/**
460 * ib_send_cm_mra - Sends a message receipt acknowledgement to a connection
461 * message.
462 * @cm_id: Connection identifier associated with the connection message.
463 * @service_timeout: The maximum time required for the sender to reply to
464 * to the connection message.
465 * @private_data: Optional user-defined private data sent with the
466 * message receipt acknowledgement.
467 * @private_data_len: Size of the private data buffer, in bytes.
468 */
469int ib_send_cm_mra(struct ib_cm_id *cm_id,
470 u8 service_timeout,
471 const void *private_data,
472 u8 private_data_len);
473
474/**
475 * ib_send_cm_lap - Sends a load alternate path request.
476 * @cm_id: Connection identifier associated with the load alternate path
477 * message.
478 * @alternate_path: A path record that identifies the alternate path to
479 * load.
480 * @private_data: Optional user-defined private data sent with the
481 * load alternate path message.
482 * @private_data_len: Size of the private data buffer, in bytes.
483 */
484int ib_send_cm_lap(struct ib_cm_id *cm_id,
485 struct ib_sa_path_rec *alternate_path,
486 const void *private_data,
487 u8 private_data_len);
488
489/**
490 * ib_cm_init_qp_attr - Initializes the QP attributes for use in transitioning
491 * to a specified QP state.
492 * @cm_id: Communication identifier associated with the QP attributes to
493 * initialize.
494 * @qp_attr: On input, specifies the desired QP state. On output, the
495 * mandatory and desired optional attributes will be set in order to
496 * modify the QP to the specified state.
497 * @qp_attr_mask: The QP attribute mask that may be used to transition the
498 * QP to the specified state.
499 *
500 * Users must set the @qp_attr->qp_state to the desired QP state. This call
501 * will set all required attributes for the given transition, along with
502 * known optional attributes. Users may override the attributes returned from
503 * this call before calling ib_modify_qp.
504 */
505int ib_cm_init_qp_attr(struct ib_cm_id *cm_id,
506 struct ib_qp_attr *qp_attr,
507 int *qp_attr_mask);
508
509/**
510 * ib_send_cm_apr - Sends an alternate path response message in response to
511 * a load alternate path request.
512 * @cm_id: Connection identifier associated with the alternate path response.
513 * @status: Reply status sent with the alternate path response.
514 * @info: Optional additional information sent with the alternate path
515 * response.
516 * @info_length: Size of the additional information, in bytes.
517 * @private_data: Optional user-defined private data sent with the
518 * alternate path response message.
519 * @private_data_len: Size of the private data buffer, in bytes.
520 */
521int ib_send_cm_apr(struct ib_cm_id *cm_id,
522 enum ib_cm_apr_status status,
523 void *info,
524 u8 info_length,
525 const void *private_data,
526 u8 private_data_len);
527
528struct ib_cm_sidr_req_param {
529 struct ib_sa_path_rec *path;
530 __be64 service_id;
531 int timeout_ms;
532 const void *private_data;
533 u8 private_data_len;
534 u8 max_cm_retries;
535 u16 pkey;
536};
537
538/**
539 * ib_send_cm_sidr_req - Sends a service ID resolution request to the
540 * remote node.
541 * @cm_id: Communication identifier that will be associated with the
542 * service ID resolution request.
543 * @param: Service ID resolution request information.
544 */
545int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
546 struct ib_cm_sidr_req_param *param);
547
548struct ib_cm_sidr_rep_param {
549 u32 qp_num;
550 u32 qkey;
551 enum ib_cm_sidr_status status;
552 const void *info;
553 u8 info_length;
554 const void *private_data;
555 u8 private_data_len;
556};
557
558/**
559 * ib_send_cm_sidr_rep - Sends a service ID resolution request to the
560 * remote node.
561 * @cm_id: Communication identifier associated with the received service ID
562 * resolution request.
563 * @param: Service ID resolution reply information.
564 */
565int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
566 struct ib_cm_sidr_rep_param *param);
567
568#endif /* IB_CM_H */
diff --git a/include/rdma/ib_fmr_pool.h b/include/rdma/ib_fmr_pool.h
new file mode 100644
index 000000000000..86b7e93f198b
--- /dev/null
+++ b/include/rdma/ib_fmr_pool.h
@@ -0,0 +1,93 @@
1/*
2 * Copyright (c) 2004 Topspin Corporation. All rights reserved.
3 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 *
33 * $Id: ib_fmr_pool.h 2730 2005-06-28 16:43:03Z sean.hefty $
34 */
35
36#if !defined(IB_FMR_POOL_H)
37#define IB_FMR_POOL_H
38
39#include <rdma/ib_verbs.h>
40
41struct ib_fmr_pool;
42
43/**
44 * struct ib_fmr_pool_param - Parameters for creating FMR pool
45 * @max_pages_per_fmr:Maximum number of pages per map request.
46 * @access:Access flags for FMRs in pool.
47 * @pool_size:Number of FMRs to allocate for pool.
48 * @dirty_watermark:Flush is triggered when @dirty_watermark dirty
49 * FMRs are present.
50 * @flush_function:Callback called when unmapped FMRs are flushed and
51 * more FMRs are possibly available for mapping
52 * @flush_arg:Context passed to user's flush function.
53 * @cache:If set, FMRs may be reused after unmapping for identical map
54 * requests.
55 */
56struct ib_fmr_pool_param {
57 int max_pages_per_fmr;
58 enum ib_access_flags access;
59 int pool_size;
60 int dirty_watermark;
61 void (*flush_function)(struct ib_fmr_pool *pool,
62 void * arg);
63 void *flush_arg;
64 unsigned cache:1;
65};
66
67struct ib_pool_fmr {
68 struct ib_fmr *fmr;
69 struct ib_fmr_pool *pool;
70 struct list_head list;
71 struct hlist_node cache_node;
72 int ref_count;
73 int remap_count;
74 u64 io_virtual_address;
75 int page_list_len;
76 u64 page_list[0];
77};
78
79struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
80 struct ib_fmr_pool_param *params);
81
82void ib_destroy_fmr_pool(struct ib_fmr_pool *pool);
83
84int ib_flush_fmr_pool(struct ib_fmr_pool *pool);
85
86struct ib_pool_fmr *ib_fmr_pool_map_phys(struct ib_fmr_pool *pool_handle,
87 u64 *page_list,
88 int list_len,
89 u64 *io_virtual_address);
90
91int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr);
92
93#endif /* IB_FMR_POOL_H */
diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h
new file mode 100644
index 000000000000..fc6b1c18ffc6
--- /dev/null
+++ b/include/rdma/ib_mad.h
@@ -0,0 +1,579 @@
1/*
2 * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved.
3 * Copyright (c) 2004 Infinicon Corporation. All rights reserved.
4 * Copyright (c) 2004 Intel Corporation. All rights reserved.
5 * Copyright (c) 2004 Topspin Corporation. All rights reserved.
6 * Copyright (c) 2004 Voltaire Corporation. All rights reserved.
7 *
8 * This software is available to you under a choice of one of two
9 * licenses. You may choose to be licensed under the terms of the GNU
10 * General Public License (GPL) Version 2, available from the file
11 * COPYING in the main directory of this source tree, or the
12 * OpenIB.org BSD license below:
13 *
14 * Redistribution and use in source and binary forms, with or
15 * without modification, are permitted provided that the following
16 * conditions are met:
17 *
18 * - Redistributions of source code must retain the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer.
21 *
22 * - Redistributions in binary form must reproduce the above
23 * copyright notice, this list of conditions and the following
24 * disclaimer in the documentation and/or other materials
25 * provided with the distribution.
26 *
27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
30 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
31 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 * SOFTWARE.
35 *
36 * $Id: ib_mad.h 2775 2005-07-02 13:42:12Z halr $
37 */
38
39#if !defined( IB_MAD_H )
40#define IB_MAD_H
41
42#include <linux/pci.h>
43
44#include <rdma/ib_verbs.h>
45
46/* Management base version */
47#define IB_MGMT_BASE_VERSION 1
48
49/* Management classes */
50#define IB_MGMT_CLASS_SUBN_LID_ROUTED 0x01
51#define IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE 0x81
52#define IB_MGMT_CLASS_SUBN_ADM 0x03
53#define IB_MGMT_CLASS_PERF_MGMT 0x04
54#define IB_MGMT_CLASS_BM 0x05
55#define IB_MGMT_CLASS_DEVICE_MGMT 0x06
56#define IB_MGMT_CLASS_CM 0x07
57#define IB_MGMT_CLASS_SNMP 0x08
58#define IB_MGMT_CLASS_VENDOR_RANGE2_START 0x30
59#define IB_MGMT_CLASS_VENDOR_RANGE2_END 0x4F
60
61#define IB_OPENIB_OUI (0x001405)
62
63/* Management methods */
64#define IB_MGMT_METHOD_GET 0x01
65#define IB_MGMT_METHOD_SET 0x02
66#define IB_MGMT_METHOD_GET_RESP 0x81
67#define IB_MGMT_METHOD_SEND 0x03
68#define IB_MGMT_METHOD_TRAP 0x05
69#define IB_MGMT_METHOD_REPORT 0x06
70#define IB_MGMT_METHOD_REPORT_RESP 0x86
71#define IB_MGMT_METHOD_TRAP_REPRESS 0x07
72
73#define IB_MGMT_METHOD_RESP 0x80
74
75#define IB_MGMT_MAX_METHODS 128
76
77/* RMPP information */
78#define IB_MGMT_RMPP_VERSION 1
79
80#define IB_MGMT_RMPP_TYPE_DATA 1
81#define IB_MGMT_RMPP_TYPE_ACK 2
82#define IB_MGMT_RMPP_TYPE_STOP 3
83#define IB_MGMT_RMPP_TYPE_ABORT 4
84
85#define IB_MGMT_RMPP_FLAG_ACTIVE 1
86#define IB_MGMT_RMPP_FLAG_FIRST (1<<1)
87#define IB_MGMT_RMPP_FLAG_LAST (1<<2)
88
89#define IB_MGMT_RMPP_NO_RESPTIME 0x1F
90
91#define IB_MGMT_RMPP_STATUS_SUCCESS 0
92#define IB_MGMT_RMPP_STATUS_RESX 1
93#define IB_MGMT_RMPP_STATUS_ABORT_MIN 118
94#define IB_MGMT_RMPP_STATUS_T2L 118
95#define IB_MGMT_RMPP_STATUS_BAD_LEN 119
96#define IB_MGMT_RMPP_STATUS_BAD_SEG 120
97#define IB_MGMT_RMPP_STATUS_BADT 121
98#define IB_MGMT_RMPP_STATUS_W2S 122
99#define IB_MGMT_RMPP_STATUS_S2B 123
100#define IB_MGMT_RMPP_STATUS_BAD_STATUS 124
101#define IB_MGMT_RMPP_STATUS_UNV 125
102#define IB_MGMT_RMPP_STATUS_TMR 126
103#define IB_MGMT_RMPP_STATUS_UNSPEC 127
104#define IB_MGMT_RMPP_STATUS_ABORT_MAX 127
105
106#define IB_QP0 0
107#define IB_QP1 __constant_htonl(1)
108#define IB_QP1_QKEY 0x80010000
109#define IB_QP_SET_QKEY 0x80000000
110
111struct ib_mad_hdr {
112 u8 base_version;
113 u8 mgmt_class;
114 u8 class_version;
115 u8 method;
116 __be16 status;
117 __be16 class_specific;
118 __be64 tid;
119 __be16 attr_id;
120 __be16 resv;
121 __be32 attr_mod;
122};
123
124struct ib_rmpp_hdr {
125 u8 rmpp_version;
126 u8 rmpp_type;
127 u8 rmpp_rtime_flags;
128 u8 rmpp_status;
129 __be32 seg_num;
130 __be32 paylen_newwin;
131};
132
133typedef u64 __bitwise ib_sa_comp_mask;
134
135#define IB_SA_COMP_MASK(n) ((__force ib_sa_comp_mask) cpu_to_be64(1ull << n))
136
137/*
138 * ib_sa_hdr and ib_sa_mad structures must be packed because they have
139 * 64-bit fields that are only 32-bit aligned. 64-bit architectures will
140 * lay them out wrong otherwise. (And unfortunately they are sent on
141 * the wire so we can't change the layout)
142 */
143struct ib_sa_hdr {
144 __be64 sm_key;
145 __be16 attr_offset;
146 __be16 reserved;
147 ib_sa_comp_mask comp_mask;
148} __attribute__ ((packed));
149
150struct ib_mad {
151 struct ib_mad_hdr mad_hdr;
152 u8 data[232];
153};
154
155struct ib_rmpp_mad {
156 struct ib_mad_hdr mad_hdr;
157 struct ib_rmpp_hdr rmpp_hdr;
158 u8 data[220];
159};
160
161struct ib_sa_mad {
162 struct ib_mad_hdr mad_hdr;
163 struct ib_rmpp_hdr rmpp_hdr;
164 struct ib_sa_hdr sa_hdr;
165 u8 data[200];
166} __attribute__ ((packed));
167
168struct ib_vendor_mad {
169 struct ib_mad_hdr mad_hdr;
170 struct ib_rmpp_hdr rmpp_hdr;
171 u8 reserved;
172 u8 oui[3];
173 u8 data[216];
174};
175
176/**
177 * ib_mad_send_buf - MAD data buffer and work request for sends.
178 * @mad: References an allocated MAD data buffer. The size of the data
179 * buffer is specified in the @send_wr.length field.
180 * @mapping: DMA mapping information.
181 * @mad_agent: MAD agent that allocated the buffer.
182 * @context: User-controlled context fields.
183 * @send_wr: An initialized work request structure used when sending the MAD.
184 * The wr_id field of the work request is initialized to reference this
185 * data structure.
186 * @sge: A scatter-gather list referenced by the work request.
187 *
188 * Users are responsible for initializing the MAD buffer itself, with the
189 * exception of specifying the payload length field in any RMPP MAD.
190 */
191struct ib_mad_send_buf {
192 struct ib_mad *mad;
193 DECLARE_PCI_UNMAP_ADDR(mapping)
194 struct ib_mad_agent *mad_agent;
195 void *context[2];
196 struct ib_send_wr send_wr;
197 struct ib_sge sge;
198};
199
200/**
201 * ib_get_rmpp_resptime - Returns the RMPP response time.
202 * @rmpp_hdr: An RMPP header.
203 */
204static inline u8 ib_get_rmpp_resptime(struct ib_rmpp_hdr *rmpp_hdr)
205{
206 return rmpp_hdr->rmpp_rtime_flags >> 3;
207}
208
209/**
210 * ib_get_rmpp_flags - Returns the RMPP flags.
211 * @rmpp_hdr: An RMPP header.
212 */
213static inline u8 ib_get_rmpp_flags(struct ib_rmpp_hdr *rmpp_hdr)
214{
215 return rmpp_hdr->rmpp_rtime_flags & 0x7;
216}
217
218/**
219 * ib_set_rmpp_resptime - Sets the response time in an RMPP header.
220 * @rmpp_hdr: An RMPP header.
221 * @rtime: The response time to set.
222 */
223static inline void ib_set_rmpp_resptime(struct ib_rmpp_hdr *rmpp_hdr, u8 rtime)
224{
225 rmpp_hdr->rmpp_rtime_flags = ib_get_rmpp_flags(rmpp_hdr) | (rtime << 3);
226}
227
228/**
229 * ib_set_rmpp_flags - Sets the flags in an RMPP header.
230 * @rmpp_hdr: An RMPP header.
231 * @flags: The flags to set.
232 */
233static inline void ib_set_rmpp_flags(struct ib_rmpp_hdr *rmpp_hdr, u8 flags)
234{
235 rmpp_hdr->rmpp_rtime_flags = (rmpp_hdr->rmpp_rtime_flags & 0xF1) |
236 (flags & 0x7);
237}
238
239struct ib_mad_agent;
240struct ib_mad_send_wc;
241struct ib_mad_recv_wc;
242
243/**
244 * ib_mad_send_handler - callback handler for a sent MAD.
245 * @mad_agent: MAD agent that sent the MAD.
246 * @mad_send_wc: Send work completion information on the sent MAD.
247 */
248typedef void (*ib_mad_send_handler)(struct ib_mad_agent *mad_agent,
249 struct ib_mad_send_wc *mad_send_wc);
250
251/**
252 * ib_mad_snoop_handler - Callback handler for snooping sent MADs.
253 * @mad_agent: MAD agent that snooped the MAD.
254 * @send_wr: Work request information on the sent MAD.
255 * @mad_send_wc: Work completion information on the sent MAD. Valid
256 * only for snooping that occurs on a send completion.
257 *
258 * Clients snooping MADs should not modify data referenced by the @send_wr
259 * or @mad_send_wc.
260 */
261typedef void (*ib_mad_snoop_handler)(struct ib_mad_agent *mad_agent,
262 struct ib_send_wr *send_wr,
263 struct ib_mad_send_wc *mad_send_wc);
264
265/**
266 * ib_mad_recv_handler - callback handler for a received MAD.
267 * @mad_agent: MAD agent requesting the received MAD.
268 * @mad_recv_wc: Received work completion information on the received MAD.
269 *
270 * MADs received in response to a send request operation will be handed to
271 * the user after the send operation completes. All data buffers given
272 * to registered agents through this routine are owned by the receiving
273 * client, except for snooping agents. Clients snooping MADs should not
274 * modify the data referenced by @mad_recv_wc.
275 */
276typedef void (*ib_mad_recv_handler)(struct ib_mad_agent *mad_agent,
277 struct ib_mad_recv_wc *mad_recv_wc);
278
279/**
280 * ib_mad_agent - Used to track MAD registration with the access layer.
281 * @device: Reference to device registration is on.
282 * @qp: Reference to QP used for sending and receiving MADs.
283 * @mr: Memory region for system memory usable for DMA.
284 * @recv_handler: Callback handler for a received MAD.
285 * @send_handler: Callback handler for a sent MAD.
286 * @snoop_handler: Callback handler for snooped sent MADs.
287 * @context: User-specified context associated with this registration.
288 * @hi_tid: Access layer assigned transaction ID for this client.
289 * Unsolicited MADs sent by this client will have the upper 32-bits
290 * of their TID set to this value.
291 * @port_num: Port number on which QP is registered
292 * @rmpp_version: If set, indicates the RMPP version used by this agent.
293 */
294struct ib_mad_agent {
295 struct ib_device *device;
296 struct ib_qp *qp;
297 struct ib_mr *mr;
298 ib_mad_recv_handler recv_handler;
299 ib_mad_send_handler send_handler;
300 ib_mad_snoop_handler snoop_handler;
301 void *context;
302 u32 hi_tid;
303 u8 port_num;
304 u8 rmpp_version;
305};
306
307/**
308 * ib_mad_send_wc - MAD send completion information.
309 * @wr_id: Work request identifier associated with the send MAD request.
310 * @status: Completion status.
311 * @vendor_err: Optional vendor error information returned with a failed
312 * request.
313 */
314struct ib_mad_send_wc {
315 u64 wr_id;
316 enum ib_wc_status status;
317 u32 vendor_err;
318};
319
320/**
321 * ib_mad_recv_buf - received MAD buffer information.
322 * @list: Reference to next data buffer for a received RMPP MAD.
323 * @grh: References a data buffer containing the global route header.
324 * The data refereced by this buffer is only valid if the GRH is
325 * valid.
326 * @mad: References the start of the received MAD.
327 */
328struct ib_mad_recv_buf {
329 struct list_head list;
330 struct ib_grh *grh;
331 struct ib_mad *mad;
332};
333
334/**
335 * ib_mad_recv_wc - received MAD information.
336 * @wc: Completion information for the received data.
337 * @recv_buf: Specifies the location of the received data buffer(s).
338 * @rmpp_list: Specifies a list of RMPP reassembled received MAD buffers.
339 * @mad_len: The length of the received MAD, without duplicated headers.
340 *
341 * For received response, the wr_id field of the wc is set to the wr_id
342 * for the corresponding send request.
343 */
344struct ib_mad_recv_wc {
345 struct ib_wc *wc;
346 struct ib_mad_recv_buf recv_buf;
347 struct list_head rmpp_list;
348 int mad_len;
349};
350
351/**
352 * ib_mad_reg_req - MAD registration request
353 * @mgmt_class: Indicates which management class of MADs should be receive
354 * by the caller. This field is only required if the user wishes to
355 * receive unsolicited MADs, otherwise it should be 0.
356 * @mgmt_class_version: Indicates which version of MADs for the given
357 * management class to receive.
358 * @oui: Indicates IEEE OUI when mgmt_class is a vendor class
359 * in the range from 0x30 to 0x4f. Otherwise not used.
360 * @method_mask: The caller will receive unsolicited MADs for any method
361 * where @method_mask = 1.
362 */
363struct ib_mad_reg_req {
364 u8 mgmt_class;
365 u8 mgmt_class_version;
366 u8 oui[3];
367 DECLARE_BITMAP(method_mask, IB_MGMT_MAX_METHODS);
368};
369
370/**
371 * ib_register_mad_agent - Register to send/receive MADs.
372 * @device: The device to register with.
373 * @port_num: The port on the specified device to use.
374 * @qp_type: Specifies which QP to access. Must be either
375 * IB_QPT_SMI or IB_QPT_GSI.
376 * @mad_reg_req: Specifies which unsolicited MADs should be received
377 * by the caller. This parameter may be NULL if the caller only
378 * wishes to receive solicited responses.
379 * @rmpp_version: If set, indicates that the client will send
380 * and receive MADs that contain the RMPP header for the given version.
381 * If set to 0, indicates that RMPP is not used by this client.
382 * @send_handler: The completion callback routine invoked after a send
383 * request has completed.
384 * @recv_handler: The completion callback routine invoked for a received
385 * MAD.
386 * @context: User specified context associated with the registration.
387 */
388struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
389 u8 port_num,
390 enum ib_qp_type qp_type,
391 struct ib_mad_reg_req *mad_reg_req,
392 u8 rmpp_version,
393 ib_mad_send_handler send_handler,
394 ib_mad_recv_handler recv_handler,
395 void *context);
396
397enum ib_mad_snoop_flags {
398 /*IB_MAD_SNOOP_POSTED_SENDS = 1,*/
399 /*IB_MAD_SNOOP_RMPP_SENDS = (1<<1),*/
400 IB_MAD_SNOOP_SEND_COMPLETIONS = (1<<2),
401 /*IB_MAD_SNOOP_RMPP_SEND_COMPLETIONS = (1<<3),*/
402 IB_MAD_SNOOP_RECVS = (1<<4)
403 /*IB_MAD_SNOOP_RMPP_RECVS = (1<<5),*/
404 /*IB_MAD_SNOOP_REDIRECTED_QPS = (1<<6)*/
405};
406
407/**
408 * ib_register_mad_snoop - Register to snoop sent and received MADs.
409 * @device: The device to register with.
410 * @port_num: The port on the specified device to use.
411 * @qp_type: Specifies which QP traffic to snoop. Must be either
412 * IB_QPT_SMI or IB_QPT_GSI.
413 * @mad_snoop_flags: Specifies information where snooping occurs.
414 * @send_handler: The callback routine invoked for a snooped send.
415 * @recv_handler: The callback routine invoked for a snooped receive.
416 * @context: User specified context associated with the registration.
417 */
418struct ib_mad_agent *ib_register_mad_snoop(struct ib_device *device,
419 u8 port_num,
420 enum ib_qp_type qp_type,
421 int mad_snoop_flags,
422 ib_mad_snoop_handler snoop_handler,
423 ib_mad_recv_handler recv_handler,
424 void *context);
425
426/**
427 * ib_unregister_mad_agent - Unregisters a client from using MAD services.
428 * @mad_agent: Corresponding MAD registration request to deregister.
429 *
430 * After invoking this routine, MAD services are no longer usable by the
431 * client on the associated QP.
432 */
433int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent);
434
435/**
436 * ib_post_send_mad - Posts MAD(s) to the send queue of the QP associated
437 * with the registered client.
438 * @mad_agent: Specifies the associated registration to post the send to.
439 * @send_wr: Specifies the information needed to send the MAD(s).
440 * @bad_send_wr: Specifies the MAD on which an error was encountered.
441 *
442 * Sent MADs are not guaranteed to complete in the order that they were posted.
443 *
444 * If the MAD requires RMPP, the data buffer should contain a single copy
445 * of the common MAD, RMPP, and class specific headers, followed by the class
446 * defined data. If the class defined data would not divide evenly into
447 * RMPP segments, then space must be allocated at the end of the referenced
448 * buffer for any required padding. To indicate the amount of class defined
449 * data being transferred, the paylen_newwin field in the RMPP header should
450 * be set to the size of the class specific header plus the amount of class
451 * defined data being transferred. The paylen_newwin field should be
452 * specified in network-byte order.
453 */
454int ib_post_send_mad(struct ib_mad_agent *mad_agent,
455 struct ib_send_wr *send_wr,
456 struct ib_send_wr **bad_send_wr);
457
458/**
459 * ib_coalesce_recv_mad - Coalesces received MAD data into a single buffer.
460 * @mad_recv_wc: Work completion information for a received MAD.
461 * @buf: User-provided data buffer to receive the coalesced buffers. The
462 * referenced buffer should be at least the size of the mad_len specified
463 * by @mad_recv_wc.
464 *
465 * This call copies a chain of received MAD segments into a single data buffer,
466 * removing duplicated headers.
467 */
468void ib_coalesce_recv_mad(struct ib_mad_recv_wc *mad_recv_wc, void *buf);
469
470/**
471 * ib_free_recv_mad - Returns data buffers used to receive a MAD.
472 * @mad_recv_wc: Work completion information for a received MAD.
473 *
474 * Clients receiving MADs through their ib_mad_recv_handler must call this
475 * routine to return the work completion buffers to the access layer.
476 */
477void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc);
478
479/**
480 * ib_cancel_mad - Cancels an outstanding send MAD operation.
481 * @mad_agent: Specifies the registration associated with sent MAD.
482 * @wr_id: Indicates the work request identifier of the MAD to cancel.
483 *
484 * MADs will be returned to the user through the corresponding
485 * ib_mad_send_handler.
486 */
487void ib_cancel_mad(struct ib_mad_agent *mad_agent, u64 wr_id);
488
489/**
490 * ib_modify_mad - Modifies an outstanding send MAD operation.
491 * @mad_agent: Specifies the registration associated with sent MAD.
492 * @wr_id: Indicates the work request identifier of the MAD to modify.
493 * @timeout_ms: New timeout value for sent MAD.
494 *
495 * This call will reset the timeout value for a sent MAD to the specified
496 * value.
497 */
498int ib_modify_mad(struct ib_mad_agent *mad_agent, u64 wr_id, u32 timeout_ms);
499
500/**
501 * ib_redirect_mad_qp - Registers a QP for MAD services.
502 * @qp: Reference to a QP that requires MAD services.
503 * @rmpp_version: If set, indicates that the client will send
504 * and receive MADs that contain the RMPP header for the given version.
505 * If set to 0, indicates that RMPP is not used by this client.
506 * @send_handler: The completion callback routine invoked after a send
507 * request has completed.
508 * @recv_handler: The completion callback routine invoked for a received
509 * MAD.
510 * @context: User specified context associated with the registration.
511 *
512 * Use of this call allows clients to use MAD services, such as RMPP,
513 * on user-owned QPs. After calling this routine, users may send
514 * MADs on the specified QP by calling ib_mad_post_send.
515 */
516struct ib_mad_agent *ib_redirect_mad_qp(struct ib_qp *qp,
517 u8 rmpp_version,
518 ib_mad_send_handler send_handler,
519 ib_mad_recv_handler recv_handler,
520 void *context);
521
522/**
523 * ib_process_mad_wc - Processes a work completion associated with a
524 * MAD sent or received on a redirected QP.
525 * @mad_agent: Specifies the registered MAD service using the redirected QP.
526 * @wc: References a work completion associated with a sent or received
527 * MAD segment.
528 *
529 * This routine is used to complete or continue processing on a MAD request.
530 * If the work completion is associated with a send operation, calling
531 * this routine is required to continue an RMPP transfer or to wait for a
532 * corresponding response, if it is a request. If the work completion is
533 * associated with a receive operation, calling this routine is required to
534 * process an inbound or outbound RMPP transfer, or to match a response MAD
535 * with its corresponding request.
536 */
537int ib_process_mad_wc(struct ib_mad_agent *mad_agent,
538 struct ib_wc *wc);
539
540/**
541 * ib_create_send_mad - Allocate and initialize a data buffer and work request
542 * for sending a MAD.
543 * @mad_agent: Specifies the registered MAD service to associate with the MAD.
544 * @remote_qpn: Specifies the QPN of the receiving node.
545 * @pkey_index: Specifies which PKey the MAD will be sent using. This field
546 * is valid only if the remote_qpn is QP 1.
547 * @ah: References the address handle used to transfer to the remote node.
548 * @rmpp_active: Indicates if the send will enable RMPP.
549 * @hdr_len: Indicates the size of the data header of the MAD. This length
550 * should include the common MAD header, RMPP header, plus any class
551 * specific header.
552 * @data_len: Indicates the size of any user-transferred data. The call will
553 * automatically adjust the allocated buffer size to account for any
554 * additional padding that may be necessary.
555 * @gfp_mask: GFP mask used for the memory allocation.
556 *
557 * This is a helper routine that may be used to allocate a MAD. Users are
558 * not required to allocate outbound MADs using this call. The returned
559 * MAD send buffer will reference a data buffer usable for sending a MAD, along
560 * with an initialized work request structure. Users may modify the returned
561 * MAD data buffer or work request before posting the send.
562 *
563 * The returned data buffer will be cleared. Users are responsible for
564 * initializing the common MAD and any class specific headers. If @rmpp_active
565 * is set, the RMPP header will be initialized for sending.
566 */
567struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
568 u32 remote_qpn, u16 pkey_index,
569 struct ib_ah *ah, int rmpp_active,
570 int hdr_len, int data_len,
571 unsigned int __nocast gfp_mask);
572
573/**
574 * ib_free_send_mad - Returns data buffers used to send a MAD.
575 * @send_buf: Previously allocated send data buffer.
576 */
577void ib_free_send_mad(struct ib_mad_send_buf *send_buf);
578
579#endif /* IB_MAD_H */
diff --git a/include/rdma/ib_pack.h b/include/rdma/ib_pack.h
new file mode 100644
index 000000000000..f926020d6331
--- /dev/null
+++ b/include/rdma/ib_pack.h
@@ -0,0 +1,245 @@
1/*
2 * Copyright (c) 2004 Topspin Corporation. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id: ib_pack.h 1349 2004-12-16 21:09:43Z roland $
33 */
34
35#ifndef IB_PACK_H
36#define IB_PACK_H
37
38#include <rdma/ib_verbs.h>
39
40enum {
41 IB_LRH_BYTES = 8,
42 IB_GRH_BYTES = 40,
43 IB_BTH_BYTES = 12,
44 IB_DETH_BYTES = 8
45};
46
47struct ib_field {
48 size_t struct_offset_bytes;
49 size_t struct_size_bytes;
50 int offset_words;
51 int offset_bits;
52 int size_bits;
53 char *field_name;
54};
55
56#define RESERVED \
57 .field_name = "reserved"
58
59/*
60 * This macro cleans up the definitions of constants for BTH opcodes.
61 * It is used to define constants such as IB_OPCODE_UD_SEND_ONLY,
62 * which becomes IB_OPCODE_UD + IB_OPCODE_SEND_ONLY, and this gives
63 * the correct value.
64 *
65 * In short, user code should use the constants defined using the
66 * macro rather than worrying about adding together other constants.
67*/
68#define IB_OPCODE(transport, op) \
69 IB_OPCODE_ ## transport ## _ ## op = \
70 IB_OPCODE_ ## transport + IB_OPCODE_ ## op
71
72enum {
73 /* transport types -- just used to define real constants */
74 IB_OPCODE_RC = 0x00,
75 IB_OPCODE_UC = 0x20,
76 IB_OPCODE_RD = 0x40,
77 IB_OPCODE_UD = 0x60,
78
79 /* operations -- just used to define real constants */
80 IB_OPCODE_SEND_FIRST = 0x00,
81 IB_OPCODE_SEND_MIDDLE = 0x01,
82 IB_OPCODE_SEND_LAST = 0x02,
83 IB_OPCODE_SEND_LAST_WITH_IMMEDIATE = 0x03,
84 IB_OPCODE_SEND_ONLY = 0x04,
85 IB_OPCODE_SEND_ONLY_WITH_IMMEDIATE = 0x05,
86 IB_OPCODE_RDMA_WRITE_FIRST = 0x06,
87 IB_OPCODE_RDMA_WRITE_MIDDLE = 0x07,
88 IB_OPCODE_RDMA_WRITE_LAST = 0x08,
89 IB_OPCODE_RDMA_WRITE_LAST_WITH_IMMEDIATE = 0x09,
90 IB_OPCODE_RDMA_WRITE_ONLY = 0x0a,
91 IB_OPCODE_RDMA_WRITE_ONLY_WITH_IMMEDIATE = 0x0b,
92 IB_OPCODE_RDMA_READ_REQUEST = 0x0c,
93 IB_OPCODE_RDMA_READ_RESPONSE_FIRST = 0x0d,
94 IB_OPCODE_RDMA_READ_RESPONSE_MIDDLE = 0x0e,
95 IB_OPCODE_RDMA_READ_RESPONSE_LAST = 0x0f,
96 IB_OPCODE_RDMA_READ_RESPONSE_ONLY = 0x10,
97 IB_OPCODE_ACKNOWLEDGE = 0x11,
98 IB_OPCODE_ATOMIC_ACKNOWLEDGE = 0x12,
99 IB_OPCODE_COMPARE_SWAP = 0x13,
100 IB_OPCODE_FETCH_ADD = 0x14,
101
102 /* real constants follow -- see comment about above IB_OPCODE()
103 macro for more details */
104
105 /* RC */
106 IB_OPCODE(RC, SEND_FIRST),
107 IB_OPCODE(RC, SEND_MIDDLE),
108 IB_OPCODE(RC, SEND_LAST),
109 IB_OPCODE(RC, SEND_LAST_WITH_IMMEDIATE),
110 IB_OPCODE(RC, SEND_ONLY),
111 IB_OPCODE(RC, SEND_ONLY_WITH_IMMEDIATE),
112 IB_OPCODE(RC, RDMA_WRITE_FIRST),
113 IB_OPCODE(RC, RDMA_WRITE_MIDDLE),
114 IB_OPCODE(RC, RDMA_WRITE_LAST),
115 IB_OPCODE(RC, RDMA_WRITE_LAST_WITH_IMMEDIATE),
116 IB_OPCODE(RC, RDMA_WRITE_ONLY),
117 IB_OPCODE(RC, RDMA_WRITE_ONLY_WITH_IMMEDIATE),
118 IB_OPCODE(RC, RDMA_READ_REQUEST),
119 IB_OPCODE(RC, RDMA_READ_RESPONSE_FIRST),
120 IB_OPCODE(RC, RDMA_READ_RESPONSE_MIDDLE),
121 IB_OPCODE(RC, RDMA_READ_RESPONSE_LAST),
122 IB_OPCODE(RC, RDMA_READ_RESPONSE_ONLY),
123 IB_OPCODE(RC, ACKNOWLEDGE),
124 IB_OPCODE(RC, ATOMIC_ACKNOWLEDGE),
125 IB_OPCODE(RC, COMPARE_SWAP),
126 IB_OPCODE(RC, FETCH_ADD),
127
128 /* UC */
129 IB_OPCODE(UC, SEND_FIRST),
130 IB_OPCODE(UC, SEND_MIDDLE),
131 IB_OPCODE(UC, SEND_LAST),
132 IB_OPCODE(UC, SEND_LAST_WITH_IMMEDIATE),
133 IB_OPCODE(UC, SEND_ONLY),
134 IB_OPCODE(UC, SEND_ONLY_WITH_IMMEDIATE),
135 IB_OPCODE(UC, RDMA_WRITE_FIRST),
136 IB_OPCODE(UC, RDMA_WRITE_MIDDLE),
137 IB_OPCODE(UC, RDMA_WRITE_LAST),
138 IB_OPCODE(UC, RDMA_WRITE_LAST_WITH_IMMEDIATE),
139 IB_OPCODE(UC, RDMA_WRITE_ONLY),
140 IB_OPCODE(UC, RDMA_WRITE_ONLY_WITH_IMMEDIATE),
141
142 /* RD */
143 IB_OPCODE(RD, SEND_FIRST),
144 IB_OPCODE(RD, SEND_MIDDLE),
145 IB_OPCODE(RD, SEND_LAST),
146 IB_OPCODE(RD, SEND_LAST_WITH_IMMEDIATE),
147 IB_OPCODE(RD, SEND_ONLY),
148 IB_OPCODE(RD, SEND_ONLY_WITH_IMMEDIATE),
149 IB_OPCODE(RD, RDMA_WRITE_FIRST),
150 IB_OPCODE(RD, RDMA_WRITE_MIDDLE),
151 IB_OPCODE(RD, RDMA_WRITE_LAST),
152 IB_OPCODE(RD, RDMA_WRITE_LAST_WITH_IMMEDIATE),
153 IB_OPCODE(RD, RDMA_WRITE_ONLY),
154 IB_OPCODE(RD, RDMA_WRITE_ONLY_WITH_IMMEDIATE),
155 IB_OPCODE(RD, RDMA_READ_REQUEST),
156 IB_OPCODE(RD, RDMA_READ_RESPONSE_FIRST),
157 IB_OPCODE(RD, RDMA_READ_RESPONSE_MIDDLE),
158 IB_OPCODE(RD, RDMA_READ_RESPONSE_LAST),
159 IB_OPCODE(RD, RDMA_READ_RESPONSE_ONLY),
160 IB_OPCODE(RD, ACKNOWLEDGE),
161 IB_OPCODE(RD, ATOMIC_ACKNOWLEDGE),
162 IB_OPCODE(RD, COMPARE_SWAP),
163 IB_OPCODE(RD, FETCH_ADD),
164
165 /* UD */
166 IB_OPCODE(UD, SEND_ONLY),
167 IB_OPCODE(UD, SEND_ONLY_WITH_IMMEDIATE)
168};
169
170enum {
171 IB_LNH_RAW = 0,
172 IB_LNH_IP = 1,
173 IB_LNH_IBA_LOCAL = 2,
174 IB_LNH_IBA_GLOBAL = 3
175};
176
177struct ib_unpacked_lrh {
178 u8 virtual_lane;
179 u8 link_version;
180 u8 service_level;
181 u8 link_next_header;
182 __be16 destination_lid;
183 __be16 packet_length;
184 __be16 source_lid;
185};
186
187struct ib_unpacked_grh {
188 u8 ip_version;
189 u8 traffic_class;
190 __be32 flow_label;
191 __be16 payload_length;
192 u8 next_header;
193 u8 hop_limit;
194 union ib_gid source_gid;
195 union ib_gid destination_gid;
196};
197
198struct ib_unpacked_bth {
199 u8 opcode;
200 u8 solicited_event;
201 u8 mig_req;
202 u8 pad_count;
203 u8 transport_header_version;
204 __be16 pkey;
205 __be32 destination_qpn;
206 u8 ack_req;
207 __be32 psn;
208};
209
210struct ib_unpacked_deth {
211 __be32 qkey;
212 __be32 source_qpn;
213};
214
215struct ib_ud_header {
216 struct ib_unpacked_lrh lrh;
217 int grh_present;
218 struct ib_unpacked_grh grh;
219 struct ib_unpacked_bth bth;
220 struct ib_unpacked_deth deth;
221 int immediate_present;
222 __be32 immediate_data;
223};
224
225void ib_pack(const struct ib_field *desc,
226 int desc_len,
227 void *structure,
228 void *buf);
229
230void ib_unpack(const struct ib_field *desc,
231 int desc_len,
232 void *buf,
233 void *structure);
234
235void ib_ud_header_init(int payload_bytes,
236 int grh_present,
237 struct ib_ud_header *header);
238
239int ib_ud_header_pack(struct ib_ud_header *header,
240 void *buf);
241
242int ib_ud_header_unpack(void *buf,
243 struct ib_ud_header *header);
244
245#endif /* IB_PACK_H */
diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h
new file mode 100644
index 000000000000..c022edfc49da
--- /dev/null
+++ b/include/rdma/ib_sa.h
@@ -0,0 +1,373 @@
1/*
2 * Copyright (c) 2004 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 *
33 * $Id: ib_sa.h 2811 2005-07-06 18:11:43Z halr $
34 */
35
36#ifndef IB_SA_H
37#define IB_SA_H
38
39#include <linux/compiler.h>
40
41#include <rdma/ib_verbs.h>
42#include <rdma/ib_mad.h>
43
44enum {
45 IB_SA_CLASS_VERSION = 2, /* IB spec version 1.1/1.2 */
46
47 IB_SA_METHOD_GET_TABLE = 0x12,
48 IB_SA_METHOD_GET_TABLE_RESP = 0x92,
49 IB_SA_METHOD_DELETE = 0x15
50};
51
52enum ib_sa_selector {
53 IB_SA_GTE = 0,
54 IB_SA_LTE = 1,
55 IB_SA_EQ = 2,
56 /*
57 * The meaning of "best" depends on the attribute: for
58 * example, for MTU best will return the largest available
59 * MTU, while for packet life time, best will return the
60 * smallest available life time.
61 */
62 IB_SA_BEST = 3
63};
64
65enum ib_sa_rate {
66 IB_SA_RATE_2_5_GBPS = 2,
67 IB_SA_RATE_5_GBPS = 5,
68 IB_SA_RATE_10_GBPS = 3,
69 IB_SA_RATE_20_GBPS = 6,
70 IB_SA_RATE_30_GBPS = 4,
71 IB_SA_RATE_40_GBPS = 7,
72 IB_SA_RATE_60_GBPS = 8,
73 IB_SA_RATE_80_GBPS = 9,
74 IB_SA_RATE_120_GBPS = 10
75};
76
77static inline int ib_sa_rate_enum_to_int(enum ib_sa_rate rate)
78{
79 switch (rate) {
80 case IB_SA_RATE_2_5_GBPS: return 1;
81 case IB_SA_RATE_5_GBPS: return 2;
82 case IB_SA_RATE_10_GBPS: return 4;
83 case IB_SA_RATE_20_GBPS: return 8;
84 case IB_SA_RATE_30_GBPS: return 12;
85 case IB_SA_RATE_40_GBPS: return 16;
86 case IB_SA_RATE_60_GBPS: return 24;
87 case IB_SA_RATE_80_GBPS: return 32;
88 case IB_SA_RATE_120_GBPS: return 48;
89 default: return -1;
90 }
91}
92
93/*
94 * Structures for SA records are named "struct ib_sa_xxx_rec." No
95 * attempt is made to pack structures to match the physical layout of
96 * SA records in SA MADs; all packing and unpacking is handled by the
97 * SA query code.
98 *
99 * For a record with structure ib_sa_xxx_rec, the naming convention
100 * for the component mask value for field yyy is IB_SA_XXX_REC_YYY (we
101 * never use different abbreviations or otherwise change the spelling
102 * of xxx/yyy between ib_sa_xxx_rec.yyy and IB_SA_XXX_REC_YYY).
103 *
104 * Reserved rows are indicated with comments to help maintainability.
105 */
106
107/* reserved: 0 */
108/* reserved: 1 */
109#define IB_SA_PATH_REC_DGID IB_SA_COMP_MASK( 2)
110#define IB_SA_PATH_REC_SGID IB_SA_COMP_MASK( 3)
111#define IB_SA_PATH_REC_DLID IB_SA_COMP_MASK( 4)
112#define IB_SA_PATH_REC_SLID IB_SA_COMP_MASK( 5)
113#define IB_SA_PATH_REC_RAW_TRAFFIC IB_SA_COMP_MASK( 6)
114/* reserved: 7 */
115#define IB_SA_PATH_REC_FLOW_LABEL IB_SA_COMP_MASK( 8)
116#define IB_SA_PATH_REC_HOP_LIMIT IB_SA_COMP_MASK( 9)
117#define IB_SA_PATH_REC_TRAFFIC_CLASS IB_SA_COMP_MASK(10)
118#define IB_SA_PATH_REC_REVERSIBLE IB_SA_COMP_MASK(11)
119#define IB_SA_PATH_REC_NUMB_PATH IB_SA_COMP_MASK(12)
120#define IB_SA_PATH_REC_PKEY IB_SA_COMP_MASK(13)
121/* reserved: 14 */
122#define IB_SA_PATH_REC_SL IB_SA_COMP_MASK(15)
123#define IB_SA_PATH_REC_MTU_SELECTOR IB_SA_COMP_MASK(16)
124#define IB_SA_PATH_REC_MTU IB_SA_COMP_MASK(17)
125#define IB_SA_PATH_REC_RATE_SELECTOR IB_SA_COMP_MASK(18)
126#define IB_SA_PATH_REC_RATE IB_SA_COMP_MASK(19)
127#define IB_SA_PATH_REC_PACKET_LIFE_TIME_SELECTOR IB_SA_COMP_MASK(20)
128#define IB_SA_PATH_REC_PACKET_LIFE_TIME IB_SA_COMP_MASK(21)
129#define IB_SA_PATH_REC_PREFERENCE IB_SA_COMP_MASK(22)
130
131struct ib_sa_path_rec {
132 /* reserved */
133 /* reserved */
134 union ib_gid dgid;
135 union ib_gid sgid;
136 __be16 dlid;
137 __be16 slid;
138 int raw_traffic;
139 /* reserved */
140 __be32 flow_label;
141 u8 hop_limit;
142 u8 traffic_class;
143 int reversible;
144 u8 numb_path;
145 __be16 pkey;
146 /* reserved */
147 u8 sl;
148 u8 mtu_selector;
149 u8 mtu;
150 u8 rate_selector;
151 u8 rate;
152 u8 packet_life_time_selector;
153 u8 packet_life_time;
154 u8 preference;
155};
156
157#define IB_SA_MCMEMBER_REC_MGID IB_SA_COMP_MASK( 0)
158#define IB_SA_MCMEMBER_REC_PORT_GID IB_SA_COMP_MASK( 1)
159#define IB_SA_MCMEMBER_REC_QKEY IB_SA_COMP_MASK( 2)
160#define IB_SA_MCMEMBER_REC_MLID IB_SA_COMP_MASK( 3)
161#define IB_SA_MCMEMBER_REC_MTU_SELECTOR IB_SA_COMP_MASK( 4)
162#define IB_SA_MCMEMBER_REC_MTU IB_SA_COMP_MASK( 5)
163#define IB_SA_MCMEMBER_REC_TRAFFIC_CLASS IB_SA_COMP_MASK( 6)
164#define IB_SA_MCMEMBER_REC_PKEY IB_SA_COMP_MASK( 7)
165#define IB_SA_MCMEMBER_REC_RATE_SELECTOR IB_SA_COMP_MASK( 8)
166#define IB_SA_MCMEMBER_REC_RATE IB_SA_COMP_MASK( 9)
167#define IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME_SELECTOR IB_SA_COMP_MASK(10)
168#define IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME IB_SA_COMP_MASK(11)
169#define IB_SA_MCMEMBER_REC_SL IB_SA_COMP_MASK(12)
170#define IB_SA_MCMEMBER_REC_FLOW_LABEL IB_SA_COMP_MASK(13)
171#define IB_SA_MCMEMBER_REC_HOP_LIMIT IB_SA_COMP_MASK(14)
172#define IB_SA_MCMEMBER_REC_SCOPE IB_SA_COMP_MASK(15)
173#define IB_SA_MCMEMBER_REC_JOIN_STATE IB_SA_COMP_MASK(16)
174#define IB_SA_MCMEMBER_REC_PROXY_JOIN IB_SA_COMP_MASK(17)
175
176struct ib_sa_mcmember_rec {
177 union ib_gid mgid;
178 union ib_gid port_gid;
179 __be32 qkey;
180 __be16 mlid;
181 u8 mtu_selector;
182 u8 mtu;
183 u8 traffic_class;
184 __be16 pkey;
185 u8 rate_selector;
186 u8 rate;
187 u8 packet_life_time_selector;
188 u8 packet_life_time;
189 u8 sl;
190 __be32 flow_label;
191 u8 hop_limit;
192 u8 scope;
193 u8 join_state;
194 int proxy_join;
195};
196
197/* Service Record Component Mask Sec 15.2.5.14 Ver 1.1 */
198#define IB_SA_SERVICE_REC_SERVICE_ID IB_SA_COMP_MASK( 0)
199#define IB_SA_SERVICE_REC_SERVICE_GID IB_SA_COMP_MASK( 1)
200#define IB_SA_SERVICE_REC_SERVICE_PKEY IB_SA_COMP_MASK( 2)
201/* reserved: 3 */
202#define IB_SA_SERVICE_REC_SERVICE_LEASE IB_SA_COMP_MASK( 4)
203#define IB_SA_SERVICE_REC_SERVICE_KEY IB_SA_COMP_MASK( 5)
204#define IB_SA_SERVICE_REC_SERVICE_NAME IB_SA_COMP_MASK( 6)
205#define IB_SA_SERVICE_REC_SERVICE_DATA8_0 IB_SA_COMP_MASK( 7)
206#define IB_SA_SERVICE_REC_SERVICE_DATA8_1 IB_SA_COMP_MASK( 8)
207#define IB_SA_SERVICE_REC_SERVICE_DATA8_2 IB_SA_COMP_MASK( 9)
208#define IB_SA_SERVICE_REC_SERVICE_DATA8_3 IB_SA_COMP_MASK(10)
209#define IB_SA_SERVICE_REC_SERVICE_DATA8_4 IB_SA_COMP_MASK(11)
210#define IB_SA_SERVICE_REC_SERVICE_DATA8_5 IB_SA_COMP_MASK(12)
211#define IB_SA_SERVICE_REC_SERVICE_DATA8_6 IB_SA_COMP_MASK(13)
212#define IB_SA_SERVICE_REC_SERVICE_DATA8_7 IB_SA_COMP_MASK(14)
213#define IB_SA_SERVICE_REC_SERVICE_DATA8_8 IB_SA_COMP_MASK(15)
214#define IB_SA_SERVICE_REC_SERVICE_DATA8_9 IB_SA_COMP_MASK(16)
215#define IB_SA_SERVICE_REC_SERVICE_DATA8_10 IB_SA_COMP_MASK(17)
216#define IB_SA_SERVICE_REC_SERVICE_DATA8_11 IB_SA_COMP_MASK(18)
217#define IB_SA_SERVICE_REC_SERVICE_DATA8_12 IB_SA_COMP_MASK(19)
218#define IB_SA_SERVICE_REC_SERVICE_DATA8_13 IB_SA_COMP_MASK(20)
219#define IB_SA_SERVICE_REC_SERVICE_DATA8_14 IB_SA_COMP_MASK(21)
220#define IB_SA_SERVICE_REC_SERVICE_DATA8_15 IB_SA_COMP_MASK(22)
221#define IB_SA_SERVICE_REC_SERVICE_DATA16_0 IB_SA_COMP_MASK(23)
222#define IB_SA_SERVICE_REC_SERVICE_DATA16_1 IB_SA_COMP_MASK(24)
223#define IB_SA_SERVICE_REC_SERVICE_DATA16_2 IB_SA_COMP_MASK(25)
224#define IB_SA_SERVICE_REC_SERVICE_DATA16_3 IB_SA_COMP_MASK(26)
225#define IB_SA_SERVICE_REC_SERVICE_DATA16_4 IB_SA_COMP_MASK(27)
226#define IB_SA_SERVICE_REC_SERVICE_DATA16_5 IB_SA_COMP_MASK(28)
227#define IB_SA_SERVICE_REC_SERVICE_DATA16_6 IB_SA_COMP_MASK(29)
228#define IB_SA_SERVICE_REC_SERVICE_DATA16_7 IB_SA_COMP_MASK(30)
229#define IB_SA_SERVICE_REC_SERVICE_DATA32_0 IB_SA_COMP_MASK(31)
230#define IB_SA_SERVICE_REC_SERVICE_DATA32_1 IB_SA_COMP_MASK(32)
231#define IB_SA_SERVICE_REC_SERVICE_DATA32_2 IB_SA_COMP_MASK(33)
232#define IB_SA_SERVICE_REC_SERVICE_DATA32_3 IB_SA_COMP_MASK(34)
233#define IB_SA_SERVICE_REC_SERVICE_DATA64_0 IB_SA_COMP_MASK(35)
234#define IB_SA_SERVICE_REC_SERVICE_DATA64_1 IB_SA_COMP_MASK(36)
235
236#define IB_DEFAULT_SERVICE_LEASE 0xFFFFFFFF
237
238struct ib_sa_service_rec {
239 u64 id;
240 union ib_gid gid;
241 __be16 pkey;
242 /* reserved */
243 u32 lease;
244 u8 key[16];
245 u8 name[64];
246 u8 data8[16];
247 u16 data16[8];
248 u32 data32[4];
249 u64 data64[2];
250};
251
252struct ib_sa_query;
253
254void ib_sa_cancel_query(int id, struct ib_sa_query *query);
255
256int ib_sa_path_rec_get(struct ib_device *device, u8 port_num,
257 struct ib_sa_path_rec *rec,
258 ib_sa_comp_mask comp_mask,
259 int timeout_ms, unsigned int __nocast gfp_mask,
260 void (*callback)(int status,
261 struct ib_sa_path_rec *resp,
262 void *context),
263 void *context,
264 struct ib_sa_query **query);
265
266int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num,
267 u8 method,
268 struct ib_sa_mcmember_rec *rec,
269 ib_sa_comp_mask comp_mask,
270 int timeout_ms, unsigned int __nocast gfp_mask,
271 void (*callback)(int status,
272 struct ib_sa_mcmember_rec *resp,
273 void *context),
274 void *context,
275 struct ib_sa_query **query);
276
277int ib_sa_service_rec_query(struct ib_device *device, u8 port_num,
278 u8 method,
279 struct ib_sa_service_rec *rec,
280 ib_sa_comp_mask comp_mask,
281 int timeout_ms, unsigned int __nocast gfp_mask,
282 void (*callback)(int status,
283 struct ib_sa_service_rec *resp,
284 void *context),
285 void *context,
286 struct ib_sa_query **sa_query);
287
288/**
289 * ib_sa_mcmember_rec_set - Start an MCMember set query
290 * @device:device to send query on
291 * @port_num: port number to send query on
292 * @rec:MCMember Record to send in query
293 * @comp_mask:component mask to send in query
294 * @timeout_ms:time to wait for response
295 * @gfp_mask:GFP mask to use for internal allocations
296 * @callback:function called when query completes, times out or is
297 * canceled
298 * @context:opaque user context passed to callback
299 * @sa_query:query context, used to cancel query
300 *
301 * Send an MCMember Set query to the SA (eg to join a multicast
302 * group). The callback function will be called when the query
303 * completes (or fails); status is 0 for a successful response, -EINTR
304 * if the query is canceled, -ETIMEDOUT is the query timed out, or
305 * -EIO if an error occurred sending the query. The resp parameter of
306 * the callback is only valid if status is 0.
307 *
308 * If the return value of ib_sa_mcmember_rec_set() is negative, it is
309 * an error code. Otherwise it is a query ID that can be used to
310 * cancel the query.
311 */
312static inline int
313ib_sa_mcmember_rec_set(struct ib_device *device, u8 port_num,
314 struct ib_sa_mcmember_rec *rec,
315 ib_sa_comp_mask comp_mask,
316 int timeout_ms, unsigned int __nocast gfp_mask,
317 void (*callback)(int status,
318 struct ib_sa_mcmember_rec *resp,
319 void *context),
320 void *context,
321 struct ib_sa_query **query)
322{
323 return ib_sa_mcmember_rec_query(device, port_num,
324 IB_MGMT_METHOD_SET,
325 rec, comp_mask,
326 timeout_ms, gfp_mask, callback,
327 context, query);
328}
329
330/**
331 * ib_sa_mcmember_rec_delete - Start an MCMember delete query
332 * @device:device to send query on
333 * @port_num: port number to send query on
334 * @rec:MCMember Record to send in query
335 * @comp_mask:component mask to send in query
336 * @timeout_ms:time to wait for response
337 * @gfp_mask:GFP mask to use for internal allocations
338 * @callback:function called when query completes, times out or is
339 * canceled
340 * @context:opaque user context passed to callback
341 * @sa_query:query context, used to cancel query
342 *
343 * Send an MCMember Delete query to the SA (eg to leave a multicast
344 * group). The callback function will be called when the query
345 * completes (or fails); status is 0 for a successful response, -EINTR
346 * if the query is canceled, -ETIMEDOUT is the query timed out, or
347 * -EIO if an error occurred sending the query. The resp parameter of
348 * the callback is only valid if status is 0.
349 *
350 * If the return value of ib_sa_mcmember_rec_delete() is negative, it
351 * is an error code. Otherwise it is a query ID that can be used to
352 * cancel the query.
353 */
354static inline int
355ib_sa_mcmember_rec_delete(struct ib_device *device, u8 port_num,
356 struct ib_sa_mcmember_rec *rec,
357 ib_sa_comp_mask comp_mask,
358 int timeout_ms, unsigned int __nocast gfp_mask,
359 void (*callback)(int status,
360 struct ib_sa_mcmember_rec *resp,
361 void *context),
362 void *context,
363 struct ib_sa_query **query)
364{
365 return ib_sa_mcmember_rec_query(device, port_num,
366 IB_SA_METHOD_DELETE,
367 rec, comp_mask,
368 timeout_ms, gfp_mask, callback,
369 context, query);
370}
371
372
373#endif /* IB_SA_H */
diff --git a/include/rdma/ib_smi.h b/include/rdma/ib_smi.h
new file mode 100644
index 000000000000..87f60737f695
--- /dev/null
+++ b/include/rdma/ib_smi.h
@@ -0,0 +1,94 @@
1/*
2 * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved.
3 * Copyright (c) 2004 Infinicon Corporation. All rights reserved.
4 * Copyright (c) 2004 Intel Corporation. All rights reserved.
5 * Copyright (c) 2004 Topspin Corporation. All rights reserved.
6 * Copyright (c) 2004 Voltaire Corporation. All rights reserved.
7 *
8 * This software is available to you under a choice of one of two
9 * licenses. You may choose to be licensed under the terms of the GNU
10 * General Public License (GPL) Version 2, available from the file
11 * COPYING in the main directory of this source tree, or the
12 * OpenIB.org BSD license below:
13 *
14 * Redistribution and use in source and binary forms, with or
15 * without modification, are permitted provided that the following
16 * conditions are met:
17 *
18 * - Redistributions of source code must retain the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer.
21 *
22 * - Redistributions in binary form must reproduce the above
23 * copyright notice, this list of conditions and the following
24 * disclaimer in the documentation and/or other materials
25 * provided with the distribution.
26 *
27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
30 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
31 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 * SOFTWARE.
35 *
36 * $Id: ib_smi.h 1389 2004-12-27 22:56:47Z roland $
37 */
38
39#if !defined( IB_SMI_H )
40#define IB_SMI_H
41
42#include <rdma/ib_mad.h>
43
44#define IB_SMP_DATA_SIZE 64
45#define IB_SMP_MAX_PATH_HOPS 64
46
47struct ib_smp {
48 u8 base_version;
49 u8 mgmt_class;
50 u8 class_version;
51 u8 method;
52 __be16 status;
53 u8 hop_ptr;
54 u8 hop_cnt;
55 __be64 tid;
56 __be16 attr_id;
57 __be16 resv;
58 __be32 attr_mod;
59 __be64 mkey;
60 __be16 dr_slid;
61 __be16 dr_dlid;
62 u8 reserved[28];
63 u8 data[IB_SMP_DATA_SIZE];
64 u8 initial_path[IB_SMP_MAX_PATH_HOPS];
65 u8 return_path[IB_SMP_MAX_PATH_HOPS];
66} __attribute__ ((packed));
67
68#define IB_SMP_DIRECTION __constant_htons(0x8000)
69
70/* Subnet management attributes */
71#define IB_SMP_ATTR_NOTICE __constant_htons(0x0002)
72#define IB_SMP_ATTR_NODE_DESC __constant_htons(0x0010)
73#define IB_SMP_ATTR_NODE_INFO __constant_htons(0x0011)
74#define IB_SMP_ATTR_SWITCH_INFO __constant_htons(0x0012)
75#define IB_SMP_ATTR_GUID_INFO __constant_htons(0x0014)
76#define IB_SMP_ATTR_PORT_INFO __constant_htons(0x0015)
77#define IB_SMP_ATTR_PKEY_TABLE __constant_htons(0x0016)
78#define IB_SMP_ATTR_SL_TO_VL_TABLE __constant_htons(0x0017)
79#define IB_SMP_ATTR_VL_ARB_TABLE __constant_htons(0x0018)
80#define IB_SMP_ATTR_LINEAR_FORWARD_TABLE __constant_htons(0x0019)
81#define IB_SMP_ATTR_RANDOM_FORWARD_TABLE __constant_htons(0x001A)
82#define IB_SMP_ATTR_MCAST_FORWARD_TABLE __constant_htons(0x001B)
83#define IB_SMP_ATTR_SM_INFO __constant_htons(0x0020)
84#define IB_SMP_ATTR_VENDOR_DIAG __constant_htons(0x0030)
85#define IB_SMP_ATTR_LED_INFO __constant_htons(0x0031)
86#define IB_SMP_ATTR_VENDOR_MASK __constant_htons(0xFF00)
87
88static inline u8
89ib_get_smp_direction(struct ib_smp *smp)
90{
91 return ((smp->status & IB_SMP_DIRECTION) == IB_SMP_DIRECTION);
92}
93
94#endif /* IB_SMI_H */
diff --git a/include/rdma/ib_user_cm.h b/include/rdma/ib_user_cm.h
new file mode 100644
index 000000000000..72182d16778b
--- /dev/null
+++ b/include/rdma/ib_user_cm.h
@@ -0,0 +1,328 @@
1/*
2 * Copyright (c) 2005 Topspin Communications. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id: ib_user_cm.h 2576 2005-06-09 17:00:30Z libor $
33 */
34
35#ifndef IB_USER_CM_H
36#define IB_USER_CM_H
37
38#include <linux/types.h>
39
40#define IB_USER_CM_ABI_VERSION 1
41
42enum {
43 IB_USER_CM_CMD_CREATE_ID,
44 IB_USER_CM_CMD_DESTROY_ID,
45 IB_USER_CM_CMD_ATTR_ID,
46
47 IB_USER_CM_CMD_LISTEN,
48 IB_USER_CM_CMD_ESTABLISH,
49
50 IB_USER_CM_CMD_SEND_REQ,
51 IB_USER_CM_CMD_SEND_REP,
52 IB_USER_CM_CMD_SEND_RTU,
53 IB_USER_CM_CMD_SEND_DREQ,
54 IB_USER_CM_CMD_SEND_DREP,
55 IB_USER_CM_CMD_SEND_REJ,
56 IB_USER_CM_CMD_SEND_MRA,
57 IB_USER_CM_CMD_SEND_LAP,
58 IB_USER_CM_CMD_SEND_APR,
59 IB_USER_CM_CMD_SEND_SIDR_REQ,
60 IB_USER_CM_CMD_SEND_SIDR_REP,
61
62 IB_USER_CM_CMD_EVENT,
63};
64/*
65 * command ABI structures.
66 */
67struct ib_ucm_cmd_hdr {
68 __u32 cmd;
69 __u16 in;
70 __u16 out;
71};
72
73struct ib_ucm_create_id {
74 __u64 response;
75};
76
77struct ib_ucm_create_id_resp {
78 __u32 id;
79};
80
81struct ib_ucm_destroy_id {
82 __u32 id;
83};
84
85struct ib_ucm_attr_id {
86 __u64 response;
87 __u32 id;
88};
89
90struct ib_ucm_attr_id_resp {
91 __be64 service_id;
92 __be64 service_mask;
93 __be32 local_id;
94 __be32 remote_id;
95};
96
97struct ib_ucm_listen {
98 __be64 service_id;
99 __be64 service_mask;
100 __u32 id;
101};
102
103struct ib_ucm_establish {
104 __u32 id;
105};
106
107struct ib_ucm_private_data {
108 __u64 data;
109 __u32 id;
110 __u8 len;
111 __u8 reserved[3];
112};
113
114struct ib_ucm_path_rec {
115 __u8 dgid[16];
116 __u8 sgid[16];
117 __be16 dlid;
118 __be16 slid;
119 __u32 raw_traffic;
120 __be32 flow_label;
121 __u32 reversible;
122 __u32 mtu;
123 __be16 pkey;
124 __u8 hop_limit;
125 __u8 traffic_class;
126 __u8 numb_path;
127 __u8 sl;
128 __u8 mtu_selector;
129 __u8 rate_selector;
130 __u8 rate;
131 __u8 packet_life_time_selector;
132 __u8 packet_life_time;
133 __u8 preference;
134};
135
136struct ib_ucm_req {
137 __u32 id;
138 __u32 qpn;
139 __u32 qp_type;
140 __u32 psn;
141 __be64 sid;
142 __u64 data;
143 __u64 primary_path;
144 __u64 alternate_path;
145 __u8 len;
146 __u8 peer_to_peer;
147 __u8 responder_resources;
148 __u8 initiator_depth;
149 __u8 remote_cm_response_timeout;
150 __u8 flow_control;
151 __u8 local_cm_response_timeout;
152 __u8 retry_count;
153 __u8 rnr_retry_count;
154 __u8 max_cm_retries;
155 __u8 srq;
156 __u8 reserved[1];
157};
158
159struct ib_ucm_rep {
160 __u64 data;
161 __u32 id;
162 __u32 qpn;
163 __u32 psn;
164 __u8 len;
165 __u8 responder_resources;
166 __u8 initiator_depth;
167 __u8 target_ack_delay;
168 __u8 failover_accepted;
169 __u8 flow_control;
170 __u8 rnr_retry_count;
171 __u8 srq;
172};
173
174struct ib_ucm_info {
175 __u32 id;
176 __u32 status;
177 __u64 info;
178 __u64 data;
179 __u8 info_len;
180 __u8 data_len;
181 __u8 reserved[2];
182};
183
184struct ib_ucm_mra {
185 __u64 data;
186 __u32 id;
187 __u8 len;
188 __u8 timeout;
189 __u8 reserved[2];
190};
191
192struct ib_ucm_lap {
193 __u64 path;
194 __u64 data;
195 __u32 id;
196 __u8 len;
197 __u8 reserved[3];
198};
199
200struct ib_ucm_sidr_req {
201 __u32 id;
202 __u32 timeout;
203 __be64 sid;
204 __u64 data;
205 __u64 path;
206 __u16 pkey;
207 __u8 len;
208 __u8 max_cm_retries;
209};
210
211struct ib_ucm_sidr_rep {
212 __u32 id;
213 __u32 qpn;
214 __u32 qkey;
215 __u32 status;
216 __u64 info;
217 __u64 data;
218 __u8 info_len;
219 __u8 data_len;
220 __u8 reserved[2];
221};
222/*
223 * event notification ABI structures.
224 */
225struct ib_ucm_event_get {
226 __u64 response;
227 __u64 data;
228 __u64 info;
229 __u8 data_len;
230 __u8 info_len;
231 __u8 reserved[2];
232};
233
234struct ib_ucm_req_event_resp {
235 __u32 listen_id;
236 /* device */
237 /* port */
238 struct ib_ucm_path_rec primary_path;
239 struct ib_ucm_path_rec alternate_path;
240 __be64 remote_ca_guid;
241 __u32 remote_qkey;
242 __u32 remote_qpn;
243 __u32 qp_type;
244 __u32 starting_psn;
245 __u8 responder_resources;
246 __u8 initiator_depth;
247 __u8 local_cm_response_timeout;
248 __u8 flow_control;
249 __u8 remote_cm_response_timeout;
250 __u8 retry_count;
251 __u8 rnr_retry_count;
252 __u8 srq;
253};
254
255struct ib_ucm_rep_event_resp {
256 __be64 remote_ca_guid;
257 __u32 remote_qkey;
258 __u32 remote_qpn;
259 __u32 starting_psn;
260 __u8 responder_resources;
261 __u8 initiator_depth;
262 __u8 target_ack_delay;
263 __u8 failover_accepted;
264 __u8 flow_control;
265 __u8 rnr_retry_count;
266 __u8 srq;
267 __u8 reserved[1];
268};
269
270struct ib_ucm_rej_event_resp {
271 __u32 reason;
272 /* ari in ib_ucm_event_get info field. */
273};
274
275struct ib_ucm_mra_event_resp {
276 __u8 timeout;
277 __u8 reserved[3];
278};
279
280struct ib_ucm_lap_event_resp {
281 struct ib_ucm_path_rec path;
282};
283
284struct ib_ucm_apr_event_resp {
285 __u32 status;
286 /* apr info in ib_ucm_event_get info field. */
287};
288
289struct ib_ucm_sidr_req_event_resp {
290 __u32 listen_id;
291 /* device */
292 /* port */
293 __u16 pkey;
294 __u8 reserved[2];
295};
296
297struct ib_ucm_sidr_rep_event_resp {
298 __u32 status;
299 __u32 qkey;
300 __u32 qpn;
301 /* info in ib_ucm_event_get info field. */
302};
303
304#define IB_UCM_PRES_DATA 0x01
305#define IB_UCM_PRES_INFO 0x02
306#define IB_UCM_PRES_PRIMARY 0x04
307#define IB_UCM_PRES_ALTERNATE 0x08
308
309struct ib_ucm_event_resp {
310 __u32 id;
311 __u32 event;
312 __u32 present;
313 union {
314 struct ib_ucm_req_event_resp req_resp;
315 struct ib_ucm_rep_event_resp rep_resp;
316 struct ib_ucm_rej_event_resp rej_resp;
317 struct ib_ucm_mra_event_resp mra_resp;
318 struct ib_ucm_lap_event_resp lap_resp;
319 struct ib_ucm_apr_event_resp apr_resp;
320
321 struct ib_ucm_sidr_req_event_resp sidr_req_resp;
322 struct ib_ucm_sidr_rep_event_resp sidr_rep_resp;
323
324 __u32 send_status;
325 } u;
326};
327
328#endif /* IB_USER_CM_H */
diff --git a/include/rdma/ib_user_mad.h b/include/rdma/ib_user_mad.h
new file mode 100644
index 000000000000..44537aa32e62
--- /dev/null
+++ b/include/rdma/ib_user_mad.h
@@ -0,0 +1,137 @@
1/*
2 * Copyright (c) 2004 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 *
33 * $Id: ib_user_mad.h 2814 2005-07-06 19:14:09Z halr $
34 */
35
36#ifndef IB_USER_MAD_H
37#define IB_USER_MAD_H
38
39#include <linux/types.h>
40#include <linux/ioctl.h>
41
42/*
43 * Increment this value if any changes that break userspace ABI
44 * compatibility are made.
45 */
46#define IB_USER_MAD_ABI_VERSION 5
47
48/*
49 * Make sure that all structs defined in this file remain laid out so
50 * that they pack the same way on 32-bit and 64-bit architectures (to
51 * avoid incompatibility between 32-bit userspace and 64-bit kernels).
52 */
53
54/**
55 * ib_user_mad_hdr - MAD packet header
56 * @id - ID of agent MAD received with/to be sent with
57 * @status - 0 on successful receive, ETIMEDOUT if no response
58 * received (transaction ID in data[] will be set to TID of original
59 * request) (ignored on send)
60 * @timeout_ms - Milliseconds to wait for response (unset on receive)
61 * @retries - Number of automatic retries to attempt
62 * @qpn - Remote QP number received from/to be sent to
63 * @qkey - Remote Q_Key to be sent with (unset on receive)
64 * @lid - Remote lid received from/to be sent to
65 * @sl - Service level received with/to be sent with
66 * @path_bits - Local path bits received with/to be sent with
67 * @grh_present - If set, GRH was received/should be sent
68 * @gid_index - Local GID index to send with (unset on receive)
69 * @hop_limit - Hop limit in GRH
70 * @traffic_class - Traffic class in GRH
71 * @gid - Remote GID in GRH
72 * @flow_label - Flow label in GRH
73 */
74struct ib_user_mad_hdr {
75 __u32 id;
76 __u32 status;
77 __u32 timeout_ms;
78 __u32 retries;
79 __u32 length;
80 __be32 qpn;
81 __be32 qkey;
82 __be16 lid;
83 __u8 sl;
84 __u8 path_bits;
85 __u8 grh_present;
86 __u8 gid_index;
87 __u8 hop_limit;
88 __u8 traffic_class;
89 __u8 gid[16];
90 __be32 flow_label;
91};
92
93/**
94 * ib_user_mad - MAD packet
95 * @hdr - MAD packet header
96 * @data - Contents of MAD
97 *
98 */
99struct ib_user_mad {
100 struct ib_user_mad_hdr hdr;
101 __u8 data[0];
102};
103
104/**
105 * ib_user_mad_reg_req - MAD registration request
106 * @id - Set by the kernel; used to identify agent in future requests.
107 * @qpn - Queue pair number; must be 0 or 1.
108 * @method_mask - The caller will receive unsolicited MADs for any method
109 * where @method_mask = 1.
110 * @mgmt_class - Indicates which management class of MADs should be receive
111 * by the caller. This field is only required if the user wishes to
112 * receive unsolicited MADs, otherwise it should be 0.
113 * @mgmt_class_version - Indicates which version of MADs for the given
114 * management class to receive.
115 * @oui: Indicates IEEE OUI when mgmt_class is a vendor class
116 * in the range from 0x30 to 0x4f. Otherwise not used.
117 * @rmpp_version: If set, indicates the RMPP version used.
118 *
119 */
120struct ib_user_mad_reg_req {
121 __u32 id;
122 __u32 method_mask[4];
123 __u8 qpn;
124 __u8 mgmt_class;
125 __u8 mgmt_class_version;
126 __u8 oui[3];
127 __u8 rmpp_version;
128};
129
130#define IB_IOCTL_MAGIC 0x1b
131
132#define IB_USER_MAD_REGISTER_AGENT _IOWR(IB_IOCTL_MAGIC, 1, \
133 struct ib_user_mad_reg_req)
134
135#define IB_USER_MAD_UNREGISTER_AGENT _IOW(IB_IOCTL_MAGIC, 2, __u32)
136
137#endif /* IB_USER_MAD_H */
diff --git a/include/rdma/ib_user_verbs.h b/include/rdma/ib_user_verbs.h
new file mode 100644
index 000000000000..7ebb01c8f996
--- /dev/null
+++ b/include/rdma/ib_user_verbs.h
@@ -0,0 +1,422 @@
1/*
2 * Copyright (c) 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Cisco Systems. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 *
33 * $Id: ib_user_verbs.h 2708 2005-06-24 17:27:21Z roland $
34 */
35
36#ifndef IB_USER_VERBS_H
37#define IB_USER_VERBS_H
38
39#include <linux/types.h>
40
41/*
42 * Increment this value if any changes that break userspace ABI
43 * compatibility are made.
44 */
45#define IB_USER_VERBS_ABI_VERSION 1
46
47enum {
48 IB_USER_VERBS_CMD_QUERY_PARAMS,
49 IB_USER_VERBS_CMD_GET_CONTEXT,
50 IB_USER_VERBS_CMD_QUERY_DEVICE,
51 IB_USER_VERBS_CMD_QUERY_PORT,
52 IB_USER_VERBS_CMD_QUERY_GID,
53 IB_USER_VERBS_CMD_QUERY_PKEY,
54 IB_USER_VERBS_CMD_ALLOC_PD,
55 IB_USER_VERBS_CMD_DEALLOC_PD,
56 IB_USER_VERBS_CMD_CREATE_AH,
57 IB_USER_VERBS_CMD_MODIFY_AH,
58 IB_USER_VERBS_CMD_QUERY_AH,
59 IB_USER_VERBS_CMD_DESTROY_AH,
60 IB_USER_VERBS_CMD_REG_MR,
61 IB_USER_VERBS_CMD_REG_SMR,
62 IB_USER_VERBS_CMD_REREG_MR,
63 IB_USER_VERBS_CMD_QUERY_MR,
64 IB_USER_VERBS_CMD_DEREG_MR,
65 IB_USER_VERBS_CMD_ALLOC_MW,
66 IB_USER_VERBS_CMD_BIND_MW,
67 IB_USER_VERBS_CMD_DEALLOC_MW,
68 IB_USER_VERBS_CMD_CREATE_CQ,
69 IB_USER_VERBS_CMD_RESIZE_CQ,
70 IB_USER_VERBS_CMD_DESTROY_CQ,
71 IB_USER_VERBS_CMD_POLL_CQ,
72 IB_USER_VERBS_CMD_PEEK_CQ,
73 IB_USER_VERBS_CMD_REQ_NOTIFY_CQ,
74 IB_USER_VERBS_CMD_CREATE_QP,
75 IB_USER_VERBS_CMD_QUERY_QP,
76 IB_USER_VERBS_CMD_MODIFY_QP,
77 IB_USER_VERBS_CMD_DESTROY_QP,
78 IB_USER_VERBS_CMD_POST_SEND,
79 IB_USER_VERBS_CMD_POST_RECV,
80 IB_USER_VERBS_CMD_ATTACH_MCAST,
81 IB_USER_VERBS_CMD_DETACH_MCAST,
82 IB_USER_VERBS_CMD_CREATE_SRQ,
83 IB_USER_VERBS_CMD_MODIFY_SRQ,
84 IB_USER_VERBS_CMD_QUERY_SRQ,
85 IB_USER_VERBS_CMD_DESTROY_SRQ,
86 IB_USER_VERBS_CMD_POST_SRQ_RECV
87};
88
89/*
90 * Make sure that all structs defined in this file remain laid out so
91 * that they pack the same way on 32-bit and 64-bit architectures (to
92 * avoid incompatibility between 32-bit userspace and 64-bit kernels).
93 * In particular do not use pointer types -- pass pointers in __u64
94 * instead.
95 */
96
97struct ib_uverbs_async_event_desc {
98 __u64 element;
99 __u32 event_type; /* enum ib_event_type */
100 __u32 reserved;
101};
102
103struct ib_uverbs_comp_event_desc {
104 __u64 cq_handle;
105};
106
107/*
108 * All commands from userspace should start with a __u32 command field
109 * followed by __u16 in_words and out_words fields (which give the
110 * length of the command block and response buffer if any in 32-bit
111 * words). The kernel driver will read these fields first and read
112 * the rest of the command struct based on these value.
113 */
114
115struct ib_uverbs_cmd_hdr {
116 __u32 command;
117 __u16 in_words;
118 __u16 out_words;
119};
120
121/*
122 * No driver_data for "query params" command, since this is intended
123 * to be a core function with no possible device dependence.
124 */
125struct ib_uverbs_query_params {
126 __u64 response;
127};
128
129struct ib_uverbs_query_params_resp {
130 __u32 num_cq_events;
131};
132
133struct ib_uverbs_get_context {
134 __u64 response;
135 __u64 cq_fd_tab;
136 __u64 driver_data[0];
137};
138
139struct ib_uverbs_get_context_resp {
140 __u32 async_fd;
141 __u32 reserved;
142};
143
144struct ib_uverbs_query_device {
145 __u64 response;
146 __u64 driver_data[0];
147};
148
149struct ib_uverbs_query_device_resp {
150 __u64 fw_ver;
151 __be64 node_guid;
152 __be64 sys_image_guid;
153 __u64 max_mr_size;
154 __u64 page_size_cap;
155 __u32 vendor_id;
156 __u32 vendor_part_id;
157 __u32 hw_ver;
158 __u32 max_qp;
159 __u32 max_qp_wr;
160 __u32 device_cap_flags;
161 __u32 max_sge;
162 __u32 max_sge_rd;
163 __u32 max_cq;
164 __u32 max_cqe;
165 __u32 max_mr;
166 __u32 max_pd;
167 __u32 max_qp_rd_atom;
168 __u32 max_ee_rd_atom;
169 __u32 max_res_rd_atom;
170 __u32 max_qp_init_rd_atom;
171 __u32 max_ee_init_rd_atom;
172 __u32 atomic_cap;
173 __u32 max_ee;
174 __u32 max_rdd;
175 __u32 max_mw;
176 __u32 max_raw_ipv6_qp;
177 __u32 max_raw_ethy_qp;
178 __u32 max_mcast_grp;
179 __u32 max_mcast_qp_attach;
180 __u32 max_total_mcast_qp_attach;
181 __u32 max_ah;
182 __u32 max_fmr;
183 __u32 max_map_per_fmr;
184 __u32 max_srq;
185 __u32 max_srq_wr;
186 __u32 max_srq_sge;
187 __u16 max_pkeys;
188 __u8 local_ca_ack_delay;
189 __u8 phys_port_cnt;
190 __u8 reserved[4];
191};
192
193struct ib_uverbs_query_port {
194 __u64 response;
195 __u8 port_num;
196 __u8 reserved[7];
197 __u64 driver_data[0];
198};
199
200struct ib_uverbs_query_port_resp {
201 __u32 port_cap_flags;
202 __u32 max_msg_sz;
203 __u32 bad_pkey_cntr;
204 __u32 qkey_viol_cntr;
205 __u32 gid_tbl_len;
206 __u16 pkey_tbl_len;
207 __u16 lid;
208 __u16 sm_lid;
209 __u8 state;
210 __u8 max_mtu;
211 __u8 active_mtu;
212 __u8 lmc;
213 __u8 max_vl_num;
214 __u8 sm_sl;
215 __u8 subnet_timeout;
216 __u8 init_type_reply;
217 __u8 active_width;
218 __u8 active_speed;
219 __u8 phys_state;
220 __u8 reserved[3];
221};
222
223struct ib_uverbs_query_gid {
224 __u64 response;
225 __u8 port_num;
226 __u8 index;
227 __u8 reserved[6];
228 __u64 driver_data[0];
229};
230
231struct ib_uverbs_query_gid_resp {
232 __u8 gid[16];
233};
234
235struct ib_uverbs_query_pkey {
236 __u64 response;
237 __u8 port_num;
238 __u8 index;
239 __u8 reserved[6];
240 __u64 driver_data[0];
241};
242
243struct ib_uverbs_query_pkey_resp {
244 __u16 pkey;
245 __u16 reserved;
246};
247
248struct ib_uverbs_alloc_pd {
249 __u64 response;
250 __u64 driver_data[0];
251};
252
253struct ib_uverbs_alloc_pd_resp {
254 __u32 pd_handle;
255};
256
257struct ib_uverbs_dealloc_pd {
258 __u32 pd_handle;
259};
260
261struct ib_uverbs_reg_mr {
262 __u64 response;
263 __u64 start;
264 __u64 length;
265 __u64 hca_va;
266 __u32 pd_handle;
267 __u32 access_flags;
268 __u64 driver_data[0];
269};
270
271struct ib_uverbs_reg_mr_resp {
272 __u32 mr_handle;
273 __u32 lkey;
274 __u32 rkey;
275};
276
277struct ib_uverbs_dereg_mr {
278 __u32 mr_handle;
279};
280
281struct ib_uverbs_create_cq {
282 __u64 response;
283 __u64 user_handle;
284 __u32 cqe;
285 __u32 event_handler;
286 __u64 driver_data[0];
287};
288
289struct ib_uverbs_create_cq_resp {
290 __u32 cq_handle;
291 __u32 cqe;
292};
293
294struct ib_uverbs_destroy_cq {
295 __u32 cq_handle;
296};
297
298struct ib_uverbs_create_qp {
299 __u64 response;
300 __u64 user_handle;
301 __u32 pd_handle;
302 __u32 send_cq_handle;
303 __u32 recv_cq_handle;
304 __u32 srq_handle;
305 __u32 max_send_wr;
306 __u32 max_recv_wr;
307 __u32 max_send_sge;
308 __u32 max_recv_sge;
309 __u32 max_inline_data;
310 __u8 sq_sig_all;
311 __u8 qp_type;
312 __u8 is_srq;
313 __u8 reserved;
314 __u64 driver_data[0];
315};
316
317struct ib_uverbs_create_qp_resp {
318 __u32 qp_handle;
319 __u32 qpn;
320};
321
322/*
323 * This struct needs to remain a multiple of 8 bytes to keep the
324 * alignment of the modify QP parameters.
325 */
326struct ib_uverbs_qp_dest {
327 __u8 dgid[16];
328 __u32 flow_label;
329 __u16 dlid;
330 __u16 reserved;
331 __u8 sgid_index;
332 __u8 hop_limit;
333 __u8 traffic_class;
334 __u8 sl;
335 __u8 src_path_bits;
336 __u8 static_rate;
337 __u8 is_global;
338 __u8 port_num;
339};
340
341struct ib_uverbs_modify_qp {
342 struct ib_uverbs_qp_dest dest;
343 struct ib_uverbs_qp_dest alt_dest;
344 __u32 qp_handle;
345 __u32 attr_mask;
346 __u32 qkey;
347 __u32 rq_psn;
348 __u32 sq_psn;
349 __u32 dest_qp_num;
350 __u32 qp_access_flags;
351 __u16 pkey_index;
352 __u16 alt_pkey_index;
353 __u8 qp_state;
354 __u8 cur_qp_state;
355 __u8 path_mtu;
356 __u8 path_mig_state;
357 __u8 en_sqd_async_notify;
358 __u8 max_rd_atomic;
359 __u8 max_dest_rd_atomic;
360 __u8 min_rnr_timer;
361 __u8 port_num;
362 __u8 timeout;
363 __u8 retry_cnt;
364 __u8 rnr_retry;
365 __u8 alt_port_num;
366 __u8 alt_timeout;
367 __u8 reserved[2];
368 __u64 driver_data[0];
369};
370
371struct ib_uverbs_modify_qp_resp {
372};
373
374struct ib_uverbs_destroy_qp {
375 __u32 qp_handle;
376};
377
378struct ib_uverbs_attach_mcast {
379 __u8 gid[16];
380 __u32 qp_handle;
381 __u16 mlid;
382 __u16 reserved;
383 __u64 driver_data[0];
384};
385
386struct ib_uverbs_detach_mcast {
387 __u8 gid[16];
388 __u32 qp_handle;
389 __u16 mlid;
390 __u16 reserved;
391 __u64 driver_data[0];
392};
393
394struct ib_uverbs_create_srq {
395 __u64 response;
396 __u64 user_handle;
397 __u32 pd_handle;
398 __u32 max_wr;
399 __u32 max_sge;
400 __u32 srq_limit;
401 __u64 driver_data[0];
402};
403
404struct ib_uverbs_create_srq_resp {
405 __u32 srq_handle;
406};
407
408struct ib_uverbs_modify_srq {
409 __u32 srq_handle;
410 __u32 attr_mask;
411 __u32 max_wr;
412 __u32 max_sge;
413 __u32 srq_limit;
414 __u32 reserved;
415 __u64 driver_data[0];
416};
417
418struct ib_uverbs_destroy_srq {
419 __u32 srq_handle;
420};
421
422#endif /* IB_USER_VERBS_H */
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
new file mode 100644
index 000000000000..e16cf94870f2
--- /dev/null
+++ b/include/rdma/ib_verbs.h
@@ -0,0 +1,1461 @@
1/*
2 * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved.
3 * Copyright (c) 2004 Infinicon Corporation. All rights reserved.
4 * Copyright (c) 2004 Intel Corporation. All rights reserved.
5 * Copyright (c) 2004 Topspin Corporation. All rights reserved.
6 * Copyright (c) 2004 Voltaire Corporation. All rights reserved.
7 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
8 * Copyright (c) 2005 Cisco Systems. All rights reserved.
9 *
10 * This software is available to you under a choice of one of two
11 * licenses. You may choose to be licensed under the terms of the GNU
12 * General Public License (GPL) Version 2, available from the file
13 * COPYING in the main directory of this source tree, or the
14 * OpenIB.org BSD license below:
15 *
16 * Redistribution and use in source and binary forms, with or
17 * without modification, are permitted provided that the following
18 * conditions are met:
19 *
20 * - Redistributions of source code must retain the above
21 * copyright notice, this list of conditions and the following
22 * disclaimer.
23 *
24 * - Redistributions in binary form must reproduce the above
25 * copyright notice, this list of conditions and the following
26 * disclaimer in the documentation and/or other materials
27 * provided with the distribution.
28 *
29 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
30 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
31 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
32 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
33 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
34 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
35 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
36 * SOFTWARE.
37 *
38 * $Id: ib_verbs.h 1349 2004-12-16 21:09:43Z roland $
39 */
40
41#if !defined(IB_VERBS_H)
42#define IB_VERBS_H
43
44#include <linux/types.h>
45#include <linux/device.h>
46
47#include <asm/atomic.h>
48#include <asm/scatterlist.h>
49#include <asm/uaccess.h>
50
51union ib_gid {
52 u8 raw[16];
53 struct {
54 __be64 subnet_prefix;
55 __be64 interface_id;
56 } global;
57};
58
59enum ib_node_type {
60 IB_NODE_CA = 1,
61 IB_NODE_SWITCH,
62 IB_NODE_ROUTER
63};
64
65enum ib_device_cap_flags {
66 IB_DEVICE_RESIZE_MAX_WR = 1,
67 IB_DEVICE_BAD_PKEY_CNTR = (1<<1),
68 IB_DEVICE_BAD_QKEY_CNTR = (1<<2),
69 IB_DEVICE_RAW_MULTI = (1<<3),
70 IB_DEVICE_AUTO_PATH_MIG = (1<<4),
71 IB_DEVICE_CHANGE_PHY_PORT = (1<<5),
72 IB_DEVICE_UD_AV_PORT_ENFORCE = (1<<6),
73 IB_DEVICE_CURR_QP_STATE_MOD = (1<<7),
74 IB_DEVICE_SHUTDOWN_PORT = (1<<8),
75 IB_DEVICE_INIT_TYPE = (1<<9),
76 IB_DEVICE_PORT_ACTIVE_EVENT = (1<<10),
77 IB_DEVICE_SYS_IMAGE_GUID = (1<<11),
78 IB_DEVICE_RC_RNR_NAK_GEN = (1<<12),
79 IB_DEVICE_SRQ_RESIZE = (1<<13),
80 IB_DEVICE_N_NOTIFY_CQ = (1<<14),
81};
82
83enum ib_atomic_cap {
84 IB_ATOMIC_NONE,
85 IB_ATOMIC_HCA,
86 IB_ATOMIC_GLOB
87};
88
89struct ib_device_attr {
90 u64 fw_ver;
91 __be64 node_guid;
92 __be64 sys_image_guid;
93 u64 max_mr_size;
94 u64 page_size_cap;
95 u32 vendor_id;
96 u32 vendor_part_id;
97 u32 hw_ver;
98 int max_qp;
99 int max_qp_wr;
100 int device_cap_flags;
101 int max_sge;
102 int max_sge_rd;
103 int max_cq;
104 int max_cqe;
105 int max_mr;
106 int max_pd;
107 int max_qp_rd_atom;
108 int max_ee_rd_atom;
109 int max_res_rd_atom;
110 int max_qp_init_rd_atom;
111 int max_ee_init_rd_atom;
112 enum ib_atomic_cap atomic_cap;
113 int max_ee;
114 int max_rdd;
115 int max_mw;
116 int max_raw_ipv6_qp;
117 int max_raw_ethy_qp;
118 int max_mcast_grp;
119 int max_mcast_qp_attach;
120 int max_total_mcast_qp_attach;
121 int max_ah;
122 int max_fmr;
123 int max_map_per_fmr;
124 int max_srq;
125 int max_srq_wr;
126 int max_srq_sge;
127 u16 max_pkeys;
128 u8 local_ca_ack_delay;
129};
130
131enum ib_mtu {
132 IB_MTU_256 = 1,
133 IB_MTU_512 = 2,
134 IB_MTU_1024 = 3,
135 IB_MTU_2048 = 4,
136 IB_MTU_4096 = 5
137};
138
139static inline int ib_mtu_enum_to_int(enum ib_mtu mtu)
140{
141 switch (mtu) {
142 case IB_MTU_256: return 256;
143 case IB_MTU_512: return 512;
144 case IB_MTU_1024: return 1024;
145 case IB_MTU_2048: return 2048;
146 case IB_MTU_4096: return 4096;
147 default: return -1;
148 }
149}
150
151enum ib_port_state {
152 IB_PORT_NOP = 0,
153 IB_PORT_DOWN = 1,
154 IB_PORT_INIT = 2,
155 IB_PORT_ARMED = 3,
156 IB_PORT_ACTIVE = 4,
157 IB_PORT_ACTIVE_DEFER = 5
158};
159
160enum ib_port_cap_flags {
161 IB_PORT_SM = 1 << 1,
162 IB_PORT_NOTICE_SUP = 1 << 2,
163 IB_PORT_TRAP_SUP = 1 << 3,
164 IB_PORT_OPT_IPD_SUP = 1 << 4,
165 IB_PORT_AUTO_MIGR_SUP = 1 << 5,
166 IB_PORT_SL_MAP_SUP = 1 << 6,
167 IB_PORT_MKEY_NVRAM = 1 << 7,
168 IB_PORT_PKEY_NVRAM = 1 << 8,
169 IB_PORT_LED_INFO_SUP = 1 << 9,
170 IB_PORT_SM_DISABLED = 1 << 10,
171 IB_PORT_SYS_IMAGE_GUID_SUP = 1 << 11,
172 IB_PORT_PKEY_SW_EXT_PORT_TRAP_SUP = 1 << 12,
173 IB_PORT_CM_SUP = 1 << 16,
174 IB_PORT_SNMP_TUNNEL_SUP = 1 << 17,
175 IB_PORT_REINIT_SUP = 1 << 18,
176 IB_PORT_DEVICE_MGMT_SUP = 1 << 19,
177 IB_PORT_VENDOR_CLASS_SUP = 1 << 20,
178 IB_PORT_DR_NOTICE_SUP = 1 << 21,
179 IB_PORT_CAP_MASK_NOTICE_SUP = 1 << 22,
180 IB_PORT_BOOT_MGMT_SUP = 1 << 23,
181 IB_PORT_LINK_LATENCY_SUP = 1 << 24,
182 IB_PORT_CLIENT_REG_SUP = 1 << 25
183};
184
185enum ib_port_width {
186 IB_WIDTH_1X = 1,
187 IB_WIDTH_4X = 2,
188 IB_WIDTH_8X = 4,
189 IB_WIDTH_12X = 8
190};
191
192static inline int ib_width_enum_to_int(enum ib_port_width width)
193{
194 switch (width) {
195 case IB_WIDTH_1X: return 1;
196 case IB_WIDTH_4X: return 4;
197 case IB_WIDTH_8X: return 8;
198 case IB_WIDTH_12X: return 12;
199 default: return -1;
200 }
201}
202
203struct ib_port_attr {
204 enum ib_port_state state;
205 enum ib_mtu max_mtu;
206 enum ib_mtu active_mtu;
207 int gid_tbl_len;
208 u32 port_cap_flags;
209 u32 max_msg_sz;
210 u32 bad_pkey_cntr;
211 u32 qkey_viol_cntr;
212 u16 pkey_tbl_len;
213 u16 lid;
214 u16 sm_lid;
215 u8 lmc;
216 u8 max_vl_num;
217 u8 sm_sl;
218 u8 subnet_timeout;
219 u8 init_type_reply;
220 u8 active_width;
221 u8 active_speed;
222 u8 phys_state;
223};
224
225enum ib_device_modify_flags {
226 IB_DEVICE_MODIFY_SYS_IMAGE_GUID = 1
227};
228
229struct ib_device_modify {
230 u64 sys_image_guid;
231};
232
233enum ib_port_modify_flags {
234 IB_PORT_SHUTDOWN = 1,
235 IB_PORT_INIT_TYPE = (1<<2),
236 IB_PORT_RESET_QKEY_CNTR = (1<<3)
237};
238
239struct ib_port_modify {
240 u32 set_port_cap_mask;
241 u32 clr_port_cap_mask;
242 u8 init_type;
243};
244
245enum ib_event_type {
246 IB_EVENT_CQ_ERR,
247 IB_EVENT_QP_FATAL,
248 IB_EVENT_QP_REQ_ERR,
249 IB_EVENT_QP_ACCESS_ERR,
250 IB_EVENT_COMM_EST,
251 IB_EVENT_SQ_DRAINED,
252 IB_EVENT_PATH_MIG,
253 IB_EVENT_PATH_MIG_ERR,
254 IB_EVENT_DEVICE_FATAL,
255 IB_EVENT_PORT_ACTIVE,
256 IB_EVENT_PORT_ERR,
257 IB_EVENT_LID_CHANGE,
258 IB_EVENT_PKEY_CHANGE,
259 IB_EVENT_SM_CHANGE,
260 IB_EVENT_SRQ_ERR,
261 IB_EVENT_SRQ_LIMIT_REACHED,
262 IB_EVENT_QP_LAST_WQE_REACHED
263};
264
265struct ib_event {
266 struct ib_device *device;
267 union {
268 struct ib_cq *cq;
269 struct ib_qp *qp;
270 struct ib_srq *srq;
271 u8 port_num;
272 } element;
273 enum ib_event_type event;
274};
275
276struct ib_event_handler {
277 struct ib_device *device;
278 void (*handler)(struct ib_event_handler *, struct ib_event *);
279 struct list_head list;
280};
281
282#define INIT_IB_EVENT_HANDLER(_ptr, _device, _handler) \
283 do { \
284 (_ptr)->device = _device; \
285 (_ptr)->handler = _handler; \
286 INIT_LIST_HEAD(&(_ptr)->list); \
287 } while (0)
288
289struct ib_global_route {
290 union ib_gid dgid;
291 u32 flow_label;
292 u8 sgid_index;
293 u8 hop_limit;
294 u8 traffic_class;
295};
296
297struct ib_grh {
298 __be32 version_tclass_flow;
299 __be16 paylen;
300 u8 next_hdr;
301 u8 hop_limit;
302 union ib_gid sgid;
303 union ib_gid dgid;
304};
305
306enum {
307 IB_MULTICAST_QPN = 0xffffff
308};
309
310#define IB_LID_PERMISSIVE __constant_htons(0xFFFF)
311
312enum ib_ah_flags {
313 IB_AH_GRH = 1
314};
315
316struct ib_ah_attr {
317 struct ib_global_route grh;
318 u16 dlid;
319 u8 sl;
320 u8 src_path_bits;
321 u8 static_rate;
322 u8 ah_flags;
323 u8 port_num;
324};
325
326enum ib_wc_status {
327 IB_WC_SUCCESS,
328 IB_WC_LOC_LEN_ERR,
329 IB_WC_LOC_QP_OP_ERR,
330 IB_WC_LOC_EEC_OP_ERR,
331 IB_WC_LOC_PROT_ERR,
332 IB_WC_WR_FLUSH_ERR,
333 IB_WC_MW_BIND_ERR,
334 IB_WC_BAD_RESP_ERR,
335 IB_WC_LOC_ACCESS_ERR,
336 IB_WC_REM_INV_REQ_ERR,
337 IB_WC_REM_ACCESS_ERR,
338 IB_WC_REM_OP_ERR,
339 IB_WC_RETRY_EXC_ERR,
340 IB_WC_RNR_RETRY_EXC_ERR,
341 IB_WC_LOC_RDD_VIOL_ERR,
342 IB_WC_REM_INV_RD_REQ_ERR,
343 IB_WC_REM_ABORT_ERR,
344 IB_WC_INV_EECN_ERR,
345 IB_WC_INV_EEC_STATE_ERR,
346 IB_WC_FATAL_ERR,
347 IB_WC_RESP_TIMEOUT_ERR,
348 IB_WC_GENERAL_ERR
349};
350
351enum ib_wc_opcode {
352 IB_WC_SEND,
353 IB_WC_RDMA_WRITE,
354 IB_WC_RDMA_READ,
355 IB_WC_COMP_SWAP,
356 IB_WC_FETCH_ADD,
357 IB_WC_BIND_MW,
358/*
359 * Set value of IB_WC_RECV so consumers can test if a completion is a
360 * receive by testing (opcode & IB_WC_RECV).
361 */
362 IB_WC_RECV = 1 << 7,
363 IB_WC_RECV_RDMA_WITH_IMM
364};
365
366enum ib_wc_flags {
367 IB_WC_GRH = 1,
368 IB_WC_WITH_IMM = (1<<1)
369};
370
371struct ib_wc {
372 u64 wr_id;
373 enum ib_wc_status status;
374 enum ib_wc_opcode opcode;
375 u32 vendor_err;
376 u32 byte_len;
377 __be32 imm_data;
378 u32 qp_num;
379 u32 src_qp;
380 int wc_flags;
381 u16 pkey_index;
382 u16 slid;
383 u8 sl;
384 u8 dlid_path_bits;
385 u8 port_num; /* valid only for DR SMPs on switches */
386};
387
388enum ib_cq_notify {
389 IB_CQ_SOLICITED,
390 IB_CQ_NEXT_COMP
391};
392
393enum ib_srq_attr_mask {
394 IB_SRQ_MAX_WR = 1 << 0,
395 IB_SRQ_LIMIT = 1 << 1,
396};
397
398struct ib_srq_attr {
399 u32 max_wr;
400 u32 max_sge;
401 u32 srq_limit;
402};
403
404struct ib_srq_init_attr {
405 void (*event_handler)(struct ib_event *, void *);
406 void *srq_context;
407 struct ib_srq_attr attr;
408};
409
410struct ib_qp_cap {
411 u32 max_send_wr;
412 u32 max_recv_wr;
413 u32 max_send_sge;
414 u32 max_recv_sge;
415 u32 max_inline_data;
416};
417
418enum ib_sig_type {
419 IB_SIGNAL_ALL_WR,
420 IB_SIGNAL_REQ_WR
421};
422
423enum ib_qp_type {
424 /*
425 * IB_QPT_SMI and IB_QPT_GSI have to be the first two entries
426 * here (and in that order) since the MAD layer uses them as
427 * indices into a 2-entry table.
428 */
429 IB_QPT_SMI,
430 IB_QPT_GSI,
431
432 IB_QPT_RC,
433 IB_QPT_UC,
434 IB_QPT_UD,
435 IB_QPT_RAW_IPV6,
436 IB_QPT_RAW_ETY
437};
438
439struct ib_qp_init_attr {
440 void (*event_handler)(struct ib_event *, void *);
441 void *qp_context;
442 struct ib_cq *send_cq;
443 struct ib_cq *recv_cq;
444 struct ib_srq *srq;
445 struct ib_qp_cap cap;
446 enum ib_sig_type sq_sig_type;
447 enum ib_qp_type qp_type;
448 u8 port_num; /* special QP types only */
449};
450
451enum ib_rnr_timeout {
452 IB_RNR_TIMER_655_36 = 0,
453 IB_RNR_TIMER_000_01 = 1,
454 IB_RNR_TIMER_000_02 = 2,
455 IB_RNR_TIMER_000_03 = 3,
456 IB_RNR_TIMER_000_04 = 4,
457 IB_RNR_TIMER_000_06 = 5,
458 IB_RNR_TIMER_000_08 = 6,
459 IB_RNR_TIMER_000_12 = 7,
460 IB_RNR_TIMER_000_16 = 8,
461 IB_RNR_TIMER_000_24 = 9,
462 IB_RNR_TIMER_000_32 = 10,
463 IB_RNR_TIMER_000_48 = 11,
464 IB_RNR_TIMER_000_64 = 12,
465 IB_RNR_TIMER_000_96 = 13,
466 IB_RNR_TIMER_001_28 = 14,
467 IB_RNR_TIMER_001_92 = 15,
468 IB_RNR_TIMER_002_56 = 16,
469 IB_RNR_TIMER_003_84 = 17,
470 IB_RNR_TIMER_005_12 = 18,
471 IB_RNR_TIMER_007_68 = 19,
472 IB_RNR_TIMER_010_24 = 20,
473 IB_RNR_TIMER_015_36 = 21,
474 IB_RNR_TIMER_020_48 = 22,
475 IB_RNR_TIMER_030_72 = 23,
476 IB_RNR_TIMER_040_96 = 24,
477 IB_RNR_TIMER_061_44 = 25,
478 IB_RNR_TIMER_081_92 = 26,
479 IB_RNR_TIMER_122_88 = 27,
480 IB_RNR_TIMER_163_84 = 28,
481 IB_RNR_TIMER_245_76 = 29,
482 IB_RNR_TIMER_327_68 = 30,
483 IB_RNR_TIMER_491_52 = 31
484};
485
486enum ib_qp_attr_mask {
487 IB_QP_STATE = 1,
488 IB_QP_CUR_STATE = (1<<1),
489 IB_QP_EN_SQD_ASYNC_NOTIFY = (1<<2),
490 IB_QP_ACCESS_FLAGS = (1<<3),
491 IB_QP_PKEY_INDEX = (1<<4),
492 IB_QP_PORT = (1<<5),
493 IB_QP_QKEY = (1<<6),
494 IB_QP_AV = (1<<7),
495 IB_QP_PATH_MTU = (1<<8),
496 IB_QP_TIMEOUT = (1<<9),
497 IB_QP_RETRY_CNT = (1<<10),
498 IB_QP_RNR_RETRY = (1<<11),
499 IB_QP_RQ_PSN = (1<<12),
500 IB_QP_MAX_QP_RD_ATOMIC = (1<<13),
501 IB_QP_ALT_PATH = (1<<14),
502 IB_QP_MIN_RNR_TIMER = (1<<15),
503 IB_QP_SQ_PSN = (1<<16),
504 IB_QP_MAX_DEST_RD_ATOMIC = (1<<17),
505 IB_QP_PATH_MIG_STATE = (1<<18),
506 IB_QP_CAP = (1<<19),
507 IB_QP_DEST_QPN = (1<<20)
508};
509
510enum ib_qp_state {
511 IB_QPS_RESET,
512 IB_QPS_INIT,
513 IB_QPS_RTR,
514 IB_QPS_RTS,
515 IB_QPS_SQD,
516 IB_QPS_SQE,
517 IB_QPS_ERR
518};
519
520enum ib_mig_state {
521 IB_MIG_MIGRATED,
522 IB_MIG_REARM,
523 IB_MIG_ARMED
524};
525
526struct ib_qp_attr {
527 enum ib_qp_state qp_state;
528 enum ib_qp_state cur_qp_state;
529 enum ib_mtu path_mtu;
530 enum ib_mig_state path_mig_state;
531 u32 qkey;
532 u32 rq_psn;
533 u32 sq_psn;
534 u32 dest_qp_num;
535 int qp_access_flags;
536 struct ib_qp_cap cap;
537 struct ib_ah_attr ah_attr;
538 struct ib_ah_attr alt_ah_attr;
539 u16 pkey_index;
540 u16 alt_pkey_index;
541 u8 en_sqd_async_notify;
542 u8 sq_draining;
543 u8 max_rd_atomic;
544 u8 max_dest_rd_atomic;
545 u8 min_rnr_timer;
546 u8 port_num;
547 u8 timeout;
548 u8 retry_cnt;
549 u8 rnr_retry;
550 u8 alt_port_num;
551 u8 alt_timeout;
552};
553
554enum ib_wr_opcode {
555 IB_WR_RDMA_WRITE,
556 IB_WR_RDMA_WRITE_WITH_IMM,
557 IB_WR_SEND,
558 IB_WR_SEND_WITH_IMM,
559 IB_WR_RDMA_READ,
560 IB_WR_ATOMIC_CMP_AND_SWP,
561 IB_WR_ATOMIC_FETCH_AND_ADD
562};
563
564enum ib_send_flags {
565 IB_SEND_FENCE = 1,
566 IB_SEND_SIGNALED = (1<<1),
567 IB_SEND_SOLICITED = (1<<2),
568 IB_SEND_INLINE = (1<<3)
569};
570
571struct ib_sge {
572 u64 addr;
573 u32 length;
574 u32 lkey;
575};
576
577struct ib_send_wr {
578 struct ib_send_wr *next;
579 u64 wr_id;
580 struct ib_sge *sg_list;
581 int num_sge;
582 enum ib_wr_opcode opcode;
583 int send_flags;
584 __be32 imm_data;
585 union {
586 struct {
587 u64 remote_addr;
588 u32 rkey;
589 } rdma;
590 struct {
591 u64 remote_addr;
592 u64 compare_add;
593 u64 swap;
594 u32 rkey;
595 } atomic;
596 struct {
597 struct ib_ah *ah;
598 struct ib_mad_hdr *mad_hdr;
599 u32 remote_qpn;
600 u32 remote_qkey;
601 int timeout_ms; /* valid for MADs only */
602 int retries; /* valid for MADs only */
603 u16 pkey_index; /* valid for GSI only */
604 u8 port_num; /* valid for DR SMPs on switch only */
605 } ud;
606 } wr;
607};
608
609struct ib_recv_wr {
610 struct ib_recv_wr *next;
611 u64 wr_id;
612 struct ib_sge *sg_list;
613 int num_sge;
614};
615
616enum ib_access_flags {
617 IB_ACCESS_LOCAL_WRITE = 1,
618 IB_ACCESS_REMOTE_WRITE = (1<<1),
619 IB_ACCESS_REMOTE_READ = (1<<2),
620 IB_ACCESS_REMOTE_ATOMIC = (1<<3),
621 IB_ACCESS_MW_BIND = (1<<4)
622};
623
624struct ib_phys_buf {
625 u64 addr;
626 u64 size;
627};
628
629struct ib_mr_attr {
630 struct ib_pd *pd;
631 u64 device_virt_addr;
632 u64 size;
633 int mr_access_flags;
634 u32 lkey;
635 u32 rkey;
636};
637
638enum ib_mr_rereg_flags {
639 IB_MR_REREG_TRANS = 1,
640 IB_MR_REREG_PD = (1<<1),
641 IB_MR_REREG_ACCESS = (1<<2)
642};
643
644struct ib_mw_bind {
645 struct ib_mr *mr;
646 u64 wr_id;
647 u64 addr;
648 u32 length;
649 int send_flags;
650 int mw_access_flags;
651};
652
653struct ib_fmr_attr {
654 int max_pages;
655 int max_maps;
656 u8 page_size;
657};
658
659struct ib_ucontext {
660 struct ib_device *device;
661 struct list_head pd_list;
662 struct list_head mr_list;
663 struct list_head mw_list;
664 struct list_head cq_list;
665 struct list_head qp_list;
666 struct list_head srq_list;
667 struct list_head ah_list;
668 spinlock_t lock;
669};
670
671struct ib_uobject {
672 u64 user_handle; /* handle given to us by userspace */
673 struct ib_ucontext *context; /* associated user context */
674 struct list_head list; /* link to context's list */
675 u32 id; /* index into kernel idr */
676};
677
678struct ib_umem {
679 unsigned long user_base;
680 unsigned long virt_base;
681 size_t length;
682 int offset;
683 int page_size;
684 int writable;
685 struct list_head chunk_list;
686};
687
688struct ib_umem_chunk {
689 struct list_head list;
690 int nents;
691 int nmap;
692 struct scatterlist page_list[0];
693};
694
695struct ib_udata {
696 void __user *inbuf;
697 void __user *outbuf;
698 size_t inlen;
699 size_t outlen;
700};
701
702#define IB_UMEM_MAX_PAGE_CHUNK \
703 ((PAGE_SIZE - offsetof(struct ib_umem_chunk, page_list)) / \
704 ((void *) &((struct ib_umem_chunk *) 0)->page_list[1] - \
705 (void *) &((struct ib_umem_chunk *) 0)->page_list[0]))
706
707struct ib_umem_object {
708 struct ib_uobject uobject;
709 struct ib_umem umem;
710};
711
712struct ib_pd {
713 struct ib_device *device;
714 struct ib_uobject *uobject;
715 atomic_t usecnt; /* count all resources */
716};
717
718struct ib_ah {
719 struct ib_device *device;
720 struct ib_pd *pd;
721 struct ib_uobject *uobject;
722};
723
724typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context);
725
726struct ib_cq {
727 struct ib_device *device;
728 struct ib_uobject *uobject;
729 ib_comp_handler comp_handler;
730 void (*event_handler)(struct ib_event *, void *);
731 void * cq_context;
732 int cqe;
733 atomic_t usecnt; /* count number of work queues */
734};
735
736struct ib_srq {
737 struct ib_device *device;
738 struct ib_pd *pd;
739 struct ib_uobject *uobject;
740 void (*event_handler)(struct ib_event *, void *);
741 void *srq_context;
742 atomic_t usecnt;
743};
744
745struct ib_qp {
746 struct ib_device *device;
747 struct ib_pd *pd;
748 struct ib_cq *send_cq;
749 struct ib_cq *recv_cq;
750 struct ib_srq *srq;
751 struct ib_uobject *uobject;
752 void (*event_handler)(struct ib_event *, void *);
753 void *qp_context;
754 u32 qp_num;
755 enum ib_qp_type qp_type;
756};
757
758struct ib_mr {
759 struct ib_device *device;
760 struct ib_pd *pd;
761 struct ib_uobject *uobject;
762 u32 lkey;
763 u32 rkey;
764 atomic_t usecnt; /* count number of MWs */
765};
766
767struct ib_mw {
768 struct ib_device *device;
769 struct ib_pd *pd;
770 struct ib_uobject *uobject;
771 u32 rkey;
772};
773
774struct ib_fmr {
775 struct ib_device *device;
776 struct ib_pd *pd;
777 struct list_head list;
778 u32 lkey;
779 u32 rkey;
780};
781
782struct ib_mad;
783struct ib_grh;
784
785enum ib_process_mad_flags {
786 IB_MAD_IGNORE_MKEY = 1,
787 IB_MAD_IGNORE_BKEY = 2,
788 IB_MAD_IGNORE_ALL = IB_MAD_IGNORE_MKEY | IB_MAD_IGNORE_BKEY
789};
790
791enum ib_mad_result {
792 IB_MAD_RESULT_FAILURE = 0, /* (!SUCCESS is the important flag) */
793 IB_MAD_RESULT_SUCCESS = 1 << 0, /* MAD was successfully processed */
794 IB_MAD_RESULT_REPLY = 1 << 1, /* Reply packet needs to be sent */
795 IB_MAD_RESULT_CONSUMED = 1 << 2 /* Packet consumed: stop processing */
796};
797
798#define IB_DEVICE_NAME_MAX 64
799
800struct ib_cache {
801 rwlock_t lock;
802 struct ib_event_handler event_handler;
803 struct ib_pkey_cache **pkey_cache;
804 struct ib_gid_cache **gid_cache;
805};
806
807struct ib_device {
808 struct device *dma_device;
809
810 char name[IB_DEVICE_NAME_MAX];
811
812 struct list_head event_handler_list;
813 spinlock_t event_handler_lock;
814
815 struct list_head core_list;
816 struct list_head client_data_list;
817 spinlock_t client_data_lock;
818
819 struct ib_cache cache;
820
821 u32 flags;
822
823 int (*query_device)(struct ib_device *device,
824 struct ib_device_attr *device_attr);
825 int (*query_port)(struct ib_device *device,
826 u8 port_num,
827 struct ib_port_attr *port_attr);
828 int (*query_gid)(struct ib_device *device,
829 u8 port_num, int index,
830 union ib_gid *gid);
831 int (*query_pkey)(struct ib_device *device,
832 u8 port_num, u16 index, u16 *pkey);
833 int (*modify_device)(struct ib_device *device,
834 int device_modify_mask,
835 struct ib_device_modify *device_modify);
836 int (*modify_port)(struct ib_device *device,
837 u8 port_num, int port_modify_mask,
838 struct ib_port_modify *port_modify);
839 struct ib_ucontext * (*alloc_ucontext)(struct ib_device *device,
840 struct ib_udata *udata);
841 int (*dealloc_ucontext)(struct ib_ucontext *context);
842 int (*mmap)(struct ib_ucontext *context,
843 struct vm_area_struct *vma);
844 struct ib_pd * (*alloc_pd)(struct ib_device *device,
845 struct ib_ucontext *context,
846 struct ib_udata *udata);
847 int (*dealloc_pd)(struct ib_pd *pd);
848 struct ib_ah * (*create_ah)(struct ib_pd *pd,
849 struct ib_ah_attr *ah_attr);
850 int (*modify_ah)(struct ib_ah *ah,
851 struct ib_ah_attr *ah_attr);
852 int (*query_ah)(struct ib_ah *ah,
853 struct ib_ah_attr *ah_attr);
854 int (*destroy_ah)(struct ib_ah *ah);
855 struct ib_srq * (*create_srq)(struct ib_pd *pd,
856 struct ib_srq_init_attr *srq_init_attr,
857 struct ib_udata *udata);
858 int (*modify_srq)(struct ib_srq *srq,
859 struct ib_srq_attr *srq_attr,
860 enum ib_srq_attr_mask srq_attr_mask);
861 int (*query_srq)(struct ib_srq *srq,
862 struct ib_srq_attr *srq_attr);
863 int (*destroy_srq)(struct ib_srq *srq);
864 int (*post_srq_recv)(struct ib_srq *srq,
865 struct ib_recv_wr *recv_wr,
866 struct ib_recv_wr **bad_recv_wr);
867 struct ib_qp * (*create_qp)(struct ib_pd *pd,
868 struct ib_qp_init_attr *qp_init_attr,
869 struct ib_udata *udata);
870 int (*modify_qp)(struct ib_qp *qp,
871 struct ib_qp_attr *qp_attr,
872 int qp_attr_mask);
873 int (*query_qp)(struct ib_qp *qp,
874 struct ib_qp_attr *qp_attr,
875 int qp_attr_mask,
876 struct ib_qp_init_attr *qp_init_attr);
877 int (*destroy_qp)(struct ib_qp *qp);
878 int (*post_send)(struct ib_qp *qp,
879 struct ib_send_wr *send_wr,
880 struct ib_send_wr **bad_send_wr);
881 int (*post_recv)(struct ib_qp *qp,
882 struct ib_recv_wr *recv_wr,
883 struct ib_recv_wr **bad_recv_wr);
884 struct ib_cq * (*create_cq)(struct ib_device *device, int cqe,
885 struct ib_ucontext *context,
886 struct ib_udata *udata);
887 int (*destroy_cq)(struct ib_cq *cq);
888 int (*resize_cq)(struct ib_cq *cq, int *cqe);
889 int (*poll_cq)(struct ib_cq *cq, int num_entries,
890 struct ib_wc *wc);
891 int (*peek_cq)(struct ib_cq *cq, int wc_cnt);
892 int (*req_notify_cq)(struct ib_cq *cq,
893 enum ib_cq_notify cq_notify);
894 int (*req_ncomp_notif)(struct ib_cq *cq,
895 int wc_cnt);
896 struct ib_mr * (*get_dma_mr)(struct ib_pd *pd,
897 int mr_access_flags);
898 struct ib_mr * (*reg_phys_mr)(struct ib_pd *pd,
899 struct ib_phys_buf *phys_buf_array,
900 int num_phys_buf,
901 int mr_access_flags,
902 u64 *iova_start);
903 struct ib_mr * (*reg_user_mr)(struct ib_pd *pd,
904 struct ib_umem *region,
905 int mr_access_flags,
906 struct ib_udata *udata);
907 int (*query_mr)(struct ib_mr *mr,
908 struct ib_mr_attr *mr_attr);
909 int (*dereg_mr)(struct ib_mr *mr);
910 int (*rereg_phys_mr)(struct ib_mr *mr,
911 int mr_rereg_mask,
912 struct ib_pd *pd,
913 struct ib_phys_buf *phys_buf_array,
914 int num_phys_buf,
915 int mr_access_flags,
916 u64 *iova_start);
917 struct ib_mw * (*alloc_mw)(struct ib_pd *pd);
918 int (*bind_mw)(struct ib_qp *qp,
919 struct ib_mw *mw,
920 struct ib_mw_bind *mw_bind);
921 int (*dealloc_mw)(struct ib_mw *mw);
922 struct ib_fmr * (*alloc_fmr)(struct ib_pd *pd,
923 int mr_access_flags,
924 struct ib_fmr_attr *fmr_attr);
925 int (*map_phys_fmr)(struct ib_fmr *fmr,
926 u64 *page_list, int list_len,
927 u64 iova);
928 int (*unmap_fmr)(struct list_head *fmr_list);
929 int (*dealloc_fmr)(struct ib_fmr *fmr);
930 int (*attach_mcast)(struct ib_qp *qp,
931 union ib_gid *gid,
932 u16 lid);
933 int (*detach_mcast)(struct ib_qp *qp,
934 union ib_gid *gid,
935 u16 lid);
936 int (*process_mad)(struct ib_device *device,
937 int process_mad_flags,
938 u8 port_num,
939 struct ib_wc *in_wc,
940 struct ib_grh *in_grh,
941 struct ib_mad *in_mad,
942 struct ib_mad *out_mad);
943
944 struct module *owner;
945 struct class_device class_dev;
946 struct kobject ports_parent;
947 struct list_head port_list;
948
949 enum {
950 IB_DEV_UNINITIALIZED,
951 IB_DEV_REGISTERED,
952 IB_DEV_UNREGISTERED
953 } reg_state;
954
955 u8 node_type;
956 u8 phys_port_cnt;
957};
958
959struct ib_client {
960 char *name;
961 void (*add) (struct ib_device *);
962 void (*remove)(struct ib_device *);
963
964 struct list_head list;
965};
966
967struct ib_device *ib_alloc_device(size_t size);
968void ib_dealloc_device(struct ib_device *device);
969
970int ib_register_device (struct ib_device *device);
971void ib_unregister_device(struct ib_device *device);
972
973int ib_register_client (struct ib_client *client);
974void ib_unregister_client(struct ib_client *client);
975
976void *ib_get_client_data(struct ib_device *device, struct ib_client *client);
977void ib_set_client_data(struct ib_device *device, struct ib_client *client,
978 void *data);
979
980static inline int ib_copy_from_udata(void *dest, struct ib_udata *udata, size_t len)
981{
982 return copy_from_user(dest, udata->inbuf, len) ? -EFAULT : 0;
983}
984
985static inline int ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len)
986{
987 return copy_to_user(udata->outbuf, src, len) ? -EFAULT : 0;
988}
989
990int ib_register_event_handler (struct ib_event_handler *event_handler);
991int ib_unregister_event_handler(struct ib_event_handler *event_handler);
992void ib_dispatch_event(struct ib_event *event);
993
994int ib_query_device(struct ib_device *device,
995 struct ib_device_attr *device_attr);
996
997int ib_query_port(struct ib_device *device,
998 u8 port_num, struct ib_port_attr *port_attr);
999
1000int ib_query_gid(struct ib_device *device,
1001 u8 port_num, int index, union ib_gid *gid);
1002
1003int ib_query_pkey(struct ib_device *device,
1004 u8 port_num, u16 index, u16 *pkey);
1005
1006int ib_modify_device(struct ib_device *device,
1007 int device_modify_mask,
1008 struct ib_device_modify *device_modify);
1009
1010int ib_modify_port(struct ib_device *device,
1011 u8 port_num, int port_modify_mask,
1012 struct ib_port_modify *port_modify);
1013
1014/**
1015 * ib_alloc_pd - Allocates an unused protection domain.
1016 * @device: The device on which to allocate the protection domain.
1017 *
1018 * A protection domain object provides an association between QPs, shared
1019 * receive queues, address handles, memory regions, and memory windows.
1020 */
1021struct ib_pd *ib_alloc_pd(struct ib_device *device);
1022
1023/**
1024 * ib_dealloc_pd - Deallocates a protection domain.
1025 * @pd: The protection domain to deallocate.
1026 */
1027int ib_dealloc_pd(struct ib_pd *pd);
1028
1029/**
1030 * ib_create_ah - Creates an address handle for the given address vector.
1031 * @pd: The protection domain associated with the address handle.
1032 * @ah_attr: The attributes of the address vector.
1033 *
1034 * The address handle is used to reference a local or global destination
1035 * in all UD QP post sends.
1036 */
1037struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr);
1038
1039/**
1040 * ib_create_ah_from_wc - Creates an address handle associated with the
1041 * sender of the specified work completion.
1042 * @pd: The protection domain associated with the address handle.
1043 * @wc: Work completion information associated with a received message.
1044 * @grh: References the received global route header. This parameter is
1045 * ignored unless the work completion indicates that the GRH is valid.
1046 * @port_num: The outbound port number to associate with the address.
1047 *
1048 * The address handle is used to reference a local or global destination
1049 * in all UD QP post sends.
1050 */
1051struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, struct ib_wc *wc,
1052 struct ib_grh *grh, u8 port_num);
1053
1054/**
1055 * ib_modify_ah - Modifies the address vector associated with an address
1056 * handle.
1057 * @ah: The address handle to modify.
1058 * @ah_attr: The new address vector attributes to associate with the
1059 * address handle.
1060 */
1061int ib_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr);
1062
1063/**
1064 * ib_query_ah - Queries the address vector associated with an address
1065 * handle.
1066 * @ah: The address handle to query.
1067 * @ah_attr: The address vector attributes associated with the address
1068 * handle.
1069 */
1070int ib_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr);
1071
1072/**
1073 * ib_destroy_ah - Destroys an address handle.
1074 * @ah: The address handle to destroy.
1075 */
1076int ib_destroy_ah(struct ib_ah *ah);
1077
1078/**
1079 * ib_create_srq - Creates a SRQ associated with the specified protection
1080 * domain.
1081 * @pd: The protection domain associated with the SRQ.
1082 * @srq_init_attr: A list of initial attributes required to create the SRQ.
1083 *
1084 * srq_attr->max_wr and srq_attr->max_sge are read the determine the
1085 * requested size of the SRQ, and set to the actual values allocated
1086 * on return. If ib_create_srq() succeeds, then max_wr and max_sge
1087 * will always be at least as large as the requested values.
1088 */
1089struct ib_srq *ib_create_srq(struct ib_pd *pd,
1090 struct ib_srq_init_attr *srq_init_attr);
1091
1092/**
1093 * ib_modify_srq - Modifies the attributes for the specified SRQ.
1094 * @srq: The SRQ to modify.
1095 * @srq_attr: On input, specifies the SRQ attributes to modify. On output,
1096 * the current values of selected SRQ attributes are returned.
1097 * @srq_attr_mask: A bit-mask used to specify which attributes of the SRQ
1098 * are being modified.
1099 *
1100 * The mask may contain IB_SRQ_MAX_WR to resize the SRQ and/or
1101 * IB_SRQ_LIMIT to set the SRQ's limit and request notification when
1102 * the number of receives queued drops below the limit.
1103 */
1104int ib_modify_srq(struct ib_srq *srq,
1105 struct ib_srq_attr *srq_attr,
1106 enum ib_srq_attr_mask srq_attr_mask);
1107
1108/**
1109 * ib_query_srq - Returns the attribute list and current values for the
1110 * specified SRQ.
1111 * @srq: The SRQ to query.
1112 * @srq_attr: The attributes of the specified SRQ.
1113 */
1114int ib_query_srq(struct ib_srq *srq,
1115 struct ib_srq_attr *srq_attr);
1116
1117/**
1118 * ib_destroy_srq - Destroys the specified SRQ.
1119 * @srq: The SRQ to destroy.
1120 */
1121int ib_destroy_srq(struct ib_srq *srq);
1122
1123/**
1124 * ib_post_srq_recv - Posts a list of work requests to the specified SRQ.
1125 * @srq: The SRQ to post the work request on.
1126 * @recv_wr: A list of work requests to post on the receive queue.
1127 * @bad_recv_wr: On an immediate failure, this parameter will reference
1128 * the work request that failed to be posted on the QP.
1129 */
1130static inline int ib_post_srq_recv(struct ib_srq *srq,
1131 struct ib_recv_wr *recv_wr,
1132 struct ib_recv_wr **bad_recv_wr)
1133{
1134 return srq->device->post_srq_recv(srq, recv_wr, bad_recv_wr);
1135}
1136
1137/**
1138 * ib_create_qp - Creates a QP associated with the specified protection
1139 * domain.
1140 * @pd: The protection domain associated with the QP.
1141 * @qp_init_attr: A list of initial attributes required to create the QP.
1142 */
1143struct ib_qp *ib_create_qp(struct ib_pd *pd,
1144 struct ib_qp_init_attr *qp_init_attr);
1145
1146/**
1147 * ib_modify_qp - Modifies the attributes for the specified QP and then
1148 * transitions the QP to the given state.
1149 * @qp: The QP to modify.
1150 * @qp_attr: On input, specifies the QP attributes to modify. On output,
1151 * the current values of selected QP attributes are returned.
1152 * @qp_attr_mask: A bit-mask used to specify which attributes of the QP
1153 * are being modified.
1154 */
1155int ib_modify_qp(struct ib_qp *qp,
1156 struct ib_qp_attr *qp_attr,
1157 int qp_attr_mask);
1158
1159/**
1160 * ib_query_qp - Returns the attribute list and current values for the
1161 * specified QP.
1162 * @qp: The QP to query.
1163 * @qp_attr: The attributes of the specified QP.
1164 * @qp_attr_mask: A bit-mask used to select specific attributes to query.
1165 * @qp_init_attr: Additional attributes of the selected QP.
1166 *
1167 * The qp_attr_mask may be used to limit the query to gathering only the
1168 * selected attributes.
1169 */
1170int ib_query_qp(struct ib_qp *qp,
1171 struct ib_qp_attr *qp_attr,
1172 int qp_attr_mask,
1173 struct ib_qp_init_attr *qp_init_attr);
1174
1175/**
1176 * ib_destroy_qp - Destroys the specified QP.
1177 * @qp: The QP to destroy.
1178 */
1179int ib_destroy_qp(struct ib_qp *qp);
1180
1181/**
1182 * ib_post_send - Posts a list of work requests to the send queue of
1183 * the specified QP.
1184 * @qp: The QP to post the work request on.
1185 * @send_wr: A list of work requests to post on the send queue.
1186 * @bad_send_wr: On an immediate failure, this parameter will reference
1187 * the work request that failed to be posted on the QP.
1188 */
1189static inline int ib_post_send(struct ib_qp *qp,
1190 struct ib_send_wr *send_wr,
1191 struct ib_send_wr **bad_send_wr)
1192{
1193 return qp->device->post_send(qp, send_wr, bad_send_wr);
1194}
1195
1196/**
1197 * ib_post_recv - Posts a list of work requests to the receive queue of
1198 * the specified QP.
1199 * @qp: The QP to post the work request on.
1200 * @recv_wr: A list of work requests to post on the receive queue.
1201 * @bad_recv_wr: On an immediate failure, this parameter will reference
1202 * the work request that failed to be posted on the QP.
1203 */
1204static inline int ib_post_recv(struct ib_qp *qp,
1205 struct ib_recv_wr *recv_wr,
1206 struct ib_recv_wr **bad_recv_wr)
1207{
1208 return qp->device->post_recv(qp, recv_wr, bad_recv_wr);
1209}
1210
1211/**
1212 * ib_create_cq - Creates a CQ on the specified device.
1213 * @device: The device on which to create the CQ.
1214 * @comp_handler: A user-specified callback that is invoked when a
1215 * completion event occurs on the CQ.
1216 * @event_handler: A user-specified callback that is invoked when an
1217 * asynchronous event not associated with a completion occurs on the CQ.
1218 * @cq_context: Context associated with the CQ returned to the user via
1219 * the associated completion and event handlers.
1220 * @cqe: The minimum size of the CQ.
1221 *
1222 * Users can examine the cq structure to determine the actual CQ size.
1223 */
1224struct ib_cq *ib_create_cq(struct ib_device *device,
1225 ib_comp_handler comp_handler,
1226 void (*event_handler)(struct ib_event *, void *),
1227 void *cq_context, int cqe);
1228
1229/**
1230 * ib_resize_cq - Modifies the capacity of the CQ.
1231 * @cq: The CQ to resize.
1232 * @cqe: The minimum size of the CQ.
1233 *
1234 * Users can examine the cq structure to determine the actual CQ size.
1235 */
1236int ib_resize_cq(struct ib_cq *cq, int cqe);
1237
1238/**
1239 * ib_destroy_cq - Destroys the specified CQ.
1240 * @cq: The CQ to destroy.
1241 */
1242int ib_destroy_cq(struct ib_cq *cq);
1243
1244/**
1245 * ib_poll_cq - poll a CQ for completion(s)
1246 * @cq:the CQ being polled
1247 * @num_entries:maximum number of completions to return
1248 * @wc:array of at least @num_entries &struct ib_wc where completions
1249 * will be returned
1250 *
1251 * Poll a CQ for (possibly multiple) completions. If the return value
1252 * is < 0, an error occurred. If the return value is >= 0, it is the
1253 * number of completions returned. If the return value is
1254 * non-negative and < num_entries, then the CQ was emptied.
1255 */
1256static inline int ib_poll_cq(struct ib_cq *cq, int num_entries,
1257 struct ib_wc *wc)
1258{
1259 return cq->device->poll_cq(cq, num_entries, wc);
1260}
1261
1262/**
1263 * ib_peek_cq - Returns the number of unreaped completions currently
1264 * on the specified CQ.
1265 * @cq: The CQ to peek.
1266 * @wc_cnt: A minimum number of unreaped completions to check for.
1267 *
1268 * If the number of unreaped completions is greater than or equal to wc_cnt,
1269 * this function returns wc_cnt, otherwise, it returns the actual number of
1270 * unreaped completions.
1271 */
1272int ib_peek_cq(struct ib_cq *cq, int wc_cnt);
1273
1274/**
1275 * ib_req_notify_cq - Request completion notification on a CQ.
1276 * @cq: The CQ to generate an event for.
1277 * @cq_notify: If set to %IB_CQ_SOLICITED, completion notification will
1278 * occur on the next solicited event. If set to %IB_CQ_NEXT_COMP,
1279 * notification will occur on the next completion.
1280 */
1281static inline int ib_req_notify_cq(struct ib_cq *cq,
1282 enum ib_cq_notify cq_notify)
1283{
1284 return cq->device->req_notify_cq(cq, cq_notify);
1285}
1286
1287/**
1288 * ib_req_ncomp_notif - Request completion notification when there are
1289 * at least the specified number of unreaped completions on the CQ.
1290 * @cq: The CQ to generate an event for.
1291 * @wc_cnt: The number of unreaped completions that should be on the
1292 * CQ before an event is generated.
1293 */
1294static inline int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt)
1295{
1296 return cq->device->req_ncomp_notif ?
1297 cq->device->req_ncomp_notif(cq, wc_cnt) :
1298 -ENOSYS;
1299}
1300
1301/**
1302 * ib_get_dma_mr - Returns a memory region for system memory that is
1303 * usable for DMA.
1304 * @pd: The protection domain associated with the memory region.
1305 * @mr_access_flags: Specifies the memory access rights.
1306 */
1307struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags);
1308
1309/**
1310 * ib_reg_phys_mr - Prepares a virtually addressed memory region for use
1311 * by an HCA.
1312 * @pd: The protection domain associated assigned to the registered region.
1313 * @phys_buf_array: Specifies a list of physical buffers to use in the
1314 * memory region.
1315 * @num_phys_buf: Specifies the size of the phys_buf_array.
1316 * @mr_access_flags: Specifies the memory access rights.
1317 * @iova_start: The offset of the region's starting I/O virtual address.
1318 */
1319struct ib_mr *ib_reg_phys_mr(struct ib_pd *pd,
1320 struct ib_phys_buf *phys_buf_array,
1321 int num_phys_buf,
1322 int mr_access_flags,
1323 u64 *iova_start);
1324
1325/**
1326 * ib_rereg_phys_mr - Modifies the attributes of an existing memory region.
1327 * Conceptually, this call performs the functions deregister memory region
1328 * followed by register physical memory region. Where possible,
1329 * resources are reused instead of deallocated and reallocated.
1330 * @mr: The memory region to modify.
1331 * @mr_rereg_mask: A bit-mask used to indicate which of the following
1332 * properties of the memory region are being modified.
1333 * @pd: If %IB_MR_REREG_PD is set in mr_rereg_mask, this field specifies
1334 * the new protection domain to associated with the memory region,
1335 * otherwise, this parameter is ignored.
1336 * @phys_buf_array: If %IB_MR_REREG_TRANS is set in mr_rereg_mask, this
1337 * field specifies a list of physical buffers to use in the new
1338 * translation, otherwise, this parameter is ignored.
1339 * @num_phys_buf: If %IB_MR_REREG_TRANS is set in mr_rereg_mask, this
1340 * field specifies the size of the phys_buf_array, otherwise, this
1341 * parameter is ignored.
1342 * @mr_access_flags: If %IB_MR_REREG_ACCESS is set in mr_rereg_mask, this
1343 * field specifies the new memory access rights, otherwise, this
1344 * parameter is ignored.
1345 * @iova_start: The offset of the region's starting I/O virtual address.
1346 */
1347int ib_rereg_phys_mr(struct ib_mr *mr,
1348 int mr_rereg_mask,
1349 struct ib_pd *pd,
1350 struct ib_phys_buf *phys_buf_array,
1351 int num_phys_buf,
1352 int mr_access_flags,
1353 u64 *iova_start);
1354
1355/**
1356 * ib_query_mr - Retrieves information about a specific memory region.
1357 * @mr: The memory region to retrieve information about.
1358 * @mr_attr: The attributes of the specified memory region.
1359 */
1360int ib_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr);
1361
1362/**
1363 * ib_dereg_mr - Deregisters a memory region and removes it from the
1364 * HCA translation table.
1365 * @mr: The memory region to deregister.
1366 */
1367int ib_dereg_mr(struct ib_mr *mr);
1368
1369/**
1370 * ib_alloc_mw - Allocates a memory window.
1371 * @pd: The protection domain associated with the memory window.
1372 */
1373struct ib_mw *ib_alloc_mw(struct ib_pd *pd);
1374
1375/**
1376 * ib_bind_mw - Posts a work request to the send queue of the specified
1377 * QP, which binds the memory window to the given address range and
1378 * remote access attributes.
1379 * @qp: QP to post the bind work request on.
1380 * @mw: The memory window to bind.
1381 * @mw_bind: Specifies information about the memory window, including
1382 * its address range, remote access rights, and associated memory region.
1383 */
1384static inline int ib_bind_mw(struct ib_qp *qp,
1385 struct ib_mw *mw,
1386 struct ib_mw_bind *mw_bind)
1387{
1388 /* XXX reference counting in corresponding MR? */
1389 return mw->device->bind_mw ?
1390 mw->device->bind_mw(qp, mw, mw_bind) :
1391 -ENOSYS;
1392}
1393
1394/**
1395 * ib_dealloc_mw - Deallocates a memory window.
1396 * @mw: The memory window to deallocate.
1397 */
1398int ib_dealloc_mw(struct ib_mw *mw);
1399
1400/**
1401 * ib_alloc_fmr - Allocates a unmapped fast memory region.
1402 * @pd: The protection domain associated with the unmapped region.
1403 * @mr_access_flags: Specifies the memory access rights.
1404 * @fmr_attr: Attributes of the unmapped region.
1405 *
1406 * A fast memory region must be mapped before it can be used as part of
1407 * a work request.
1408 */
1409struct ib_fmr *ib_alloc_fmr(struct ib_pd *pd,
1410 int mr_access_flags,
1411 struct ib_fmr_attr *fmr_attr);
1412
1413/**
1414 * ib_map_phys_fmr - Maps a list of physical pages to a fast memory region.
1415 * @fmr: The fast memory region to associate with the pages.
1416 * @page_list: An array of physical pages to map to the fast memory region.
1417 * @list_len: The number of pages in page_list.
1418 * @iova: The I/O virtual address to use with the mapped region.
1419 */
1420static inline int ib_map_phys_fmr(struct ib_fmr *fmr,
1421 u64 *page_list, int list_len,
1422 u64 iova)
1423{
1424 return fmr->device->map_phys_fmr(fmr, page_list, list_len, iova);
1425}
1426
1427/**
1428 * ib_unmap_fmr - Removes the mapping from a list of fast memory regions.
1429 * @fmr_list: A linked list of fast memory regions to unmap.
1430 */
1431int ib_unmap_fmr(struct list_head *fmr_list);
1432
1433/**
1434 * ib_dealloc_fmr - Deallocates a fast memory region.
1435 * @fmr: The fast memory region to deallocate.
1436 */
1437int ib_dealloc_fmr(struct ib_fmr *fmr);
1438
1439/**
1440 * ib_attach_mcast - Attaches the specified QP to a multicast group.
1441 * @qp: QP to attach to the multicast group. The QP must be type
1442 * IB_QPT_UD.
1443 * @gid: Multicast group GID.
1444 * @lid: Multicast group LID in host byte order.
1445 *
1446 * In order to send and receive multicast packets, subnet
1447 * administration must have created the multicast group and configured
1448 * the fabric appropriately. The port associated with the specified
1449 * QP must also be a member of the multicast group.
1450 */
1451int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid);
1452
1453/**
1454 * ib_detach_mcast - Detaches the specified QP from a multicast group.
1455 * @qp: QP to detach from the multicast group.
1456 * @gid: Multicast group GID.
1457 * @lid: Multicast group LID in host byte order.
1458 */
1459int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid);
1460
1461#endif /* IB_VERBS_H */