diff options
author | Jeff Garzik <jgarzik@pobox.com> | 2005-08-30 13:32:29 -0400 |
---|---|---|
committer | Jeff Garzik <jgarzik@pobox.com> | 2005-08-30 13:32:29 -0400 |
commit | ed735ccbefaf7e5e3ef61418f7e209b8c59308a7 (patch) | |
tree | b8cc69814d2368b08d0a84c8da0c12028bd04867 | |
parent | 39fbe47377062200acc26ea0ccef223b4399a82c (diff) | |
parent | d8971fcb702e24d1e22c77fd1772f182ffee87e3 (diff) |
Merge HEAD from /spare/repo/linux-2.6/.git
661 files changed, 29643 insertions, 11418 deletions
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 8b1430b46655..0665cb12bd66 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt | |||
@@ -135,3 +135,15 @@ Why: With the 16-bit PCMCIA subsystem now behaving (almost) like a | |||
135 | pcmciautils package available at | 135 | pcmciautils package available at |
136 | http://kernel.org/pub/linux/utils/kernel/pcmcia/ | 136 | http://kernel.org/pub/linux/utils/kernel/pcmcia/ |
137 | Who: Dominik Brodowski <linux@brodo.de> | 137 | Who: Dominik Brodowski <linux@brodo.de> |
138 | |||
139 | --------------------------- | ||
140 | |||
141 | What: ip_queue and ip6_queue (old ipv4-only and ipv6-only netfilter queue) | ||
142 | When: December 2005 | ||
143 | Why: This interface has been obsoleted by the new layer3-independent | ||
144 | "nfnetlink_queue". The Kernel interface is compatible, so the old | ||
145 | ip[6]tables "QUEUE" targets still work and will transparently handle | ||
146 | all packets into nfnetlink queue number 0. Userspace users will have | ||
147 | to link against API-compatible library on top of libnfnetlink_queue | ||
148 | instead of the current 'libipq'. | ||
149 | Who: Harald Welte <laforge@netfilter.org> | ||
diff --git a/arch/ppc/Makefile b/arch/ppc/Makefile index f9b0d778dd82..d1b6e6dcb504 100644 --- a/arch/ppc/Makefile +++ b/arch/ppc/Makefile | |||
@@ -21,11 +21,13 @@ CC := $(CC) -m32 | |||
21 | endif | 21 | endif |
22 | 22 | ||
23 | LDFLAGS_vmlinux := -Ttext $(KERNELLOAD) -Bstatic | 23 | LDFLAGS_vmlinux := -Ttext $(KERNELLOAD) -Bstatic |
24 | CPPFLAGS += -Iarch/$(ARCH) | 24 | CPPFLAGS += -Iarch/$(ARCH) -Iinclude3 |
25 | AFLAGS += -Iarch/$(ARCH) | 25 | AFLAGS += -Iarch/$(ARCH) |
26 | CFLAGS += -Iarch/$(ARCH) -msoft-float -pipe \ | 26 | CFLAGS += -Iarch/$(ARCH) -msoft-float -pipe \ |
27 | -ffixed-r2 -mmultiple | 27 | -ffixed-r2 -mmultiple |
28 | CPP = $(CC) -E $(CFLAGS) | 28 | CPP = $(CC) -E $(CFLAGS) |
29 | # Temporary hack until we have migrated to asm-powerpc | ||
30 | LINUXINCLUDE += -Iinclude3 | ||
29 | 31 | ||
30 | CHECKFLAGS += -D__powerpc__ | 32 | CHECKFLAGS += -D__powerpc__ |
31 | 33 | ||
@@ -101,6 +103,7 @@ endef | |||
101 | 103 | ||
102 | archclean: | 104 | archclean: |
103 | $(Q)$(MAKE) $(clean)=arch/ppc/boot | 105 | $(Q)$(MAKE) $(clean)=arch/ppc/boot |
106 | $(Q)rm -rf include3 | ||
104 | 107 | ||
105 | prepare: include/asm-$(ARCH)/offsets.h checkbin | 108 | prepare: include/asm-$(ARCH)/offsets.h checkbin |
106 | 109 | ||
@@ -110,6 +113,12 @@ arch/$(ARCH)/kernel/asm-offsets.s: include/asm include/linux/version.h \ | |||
110 | include/asm-$(ARCH)/offsets.h: arch/$(ARCH)/kernel/asm-offsets.s | 113 | include/asm-$(ARCH)/offsets.h: arch/$(ARCH)/kernel/asm-offsets.s |
111 | $(call filechk,gen-asm-offsets) | 114 | $(call filechk,gen-asm-offsets) |
112 | 115 | ||
116 | # Temporary hack until we have migrated to asm-powerpc | ||
117 | include/asm: include3/asm | ||
118 | include3/asm: | ||
119 | $(Q)if [ ! -d include3 ]; then mkdir -p include3; fi | ||
120 | $(Q)ln -fsn $(srctree)/include/asm-powerpc include3/asm | ||
121 | |||
113 | # Use the file '.tmp_gas_check' for binutils tests, as gas won't output | 122 | # Use the file '.tmp_gas_check' for binutils tests, as gas won't output |
114 | # to stdout and these checks are run even on install targets. | 123 | # to stdout and these checks are run even on install targets. |
115 | TOUT := .tmp_gas_check | 124 | TOUT := .tmp_gas_check |
diff --git a/arch/ppc/boot/utils/addRamDisk.c b/arch/ppc/boot/utils/addRamDisk.c deleted file mode 100644 index 93400dfcce7f..000000000000 --- a/arch/ppc/boot/utils/addRamDisk.c +++ /dev/null | |||
@@ -1,203 +0,0 @@ | |||
1 | #include <stdio.h> | ||
2 | #include <stdlib.h> | ||
3 | #include <netinet/in.h> | ||
4 | #include <unistd.h> | ||
5 | #include <sys/types.h> | ||
6 | #include <sys/stat.h> | ||
7 | #include <string.h> | ||
8 | |||
9 | #define ElfHeaderSize (64 * 1024) | ||
10 | #define ElfPages (ElfHeaderSize / 4096) | ||
11 | #define KERNELBASE (0xc0000000) | ||
12 | |||
13 | void get4k(FILE *file, char *buf ) | ||
14 | { | ||
15 | unsigned j; | ||
16 | unsigned num = fread(buf, 1, 4096, file); | ||
17 | for ( j=num; j<4096; ++j ) | ||
18 | buf[j] = 0; | ||
19 | } | ||
20 | |||
21 | void put4k(FILE *file, char *buf ) | ||
22 | { | ||
23 | fwrite(buf, 1, 4096, file); | ||
24 | } | ||
25 | |||
26 | void death(const char *msg, FILE *fdesc, const char *fname) | ||
27 | { | ||
28 | printf(msg); | ||
29 | fclose(fdesc); | ||
30 | unlink(fname); | ||
31 | exit(1); | ||
32 | } | ||
33 | |||
34 | int main(int argc, char **argv) | ||
35 | { | ||
36 | char inbuf[4096]; | ||
37 | FILE *ramDisk = NULL; | ||
38 | FILE *inputVmlinux = NULL; | ||
39 | FILE *outputVmlinux = NULL; | ||
40 | unsigned i = 0; | ||
41 | u_int32_t ramFileLen = 0; | ||
42 | u_int32_t ramLen = 0; | ||
43 | u_int32_t roundR = 0; | ||
44 | u_int32_t kernelLen = 0; | ||
45 | u_int32_t actualKernelLen = 0; | ||
46 | u_int32_t round = 0; | ||
47 | u_int32_t roundedKernelLen = 0; | ||
48 | u_int32_t ramStartOffs = 0; | ||
49 | u_int32_t ramPages = 0; | ||
50 | u_int32_t roundedKernelPages = 0; | ||
51 | u_int32_t hvReleaseData = 0; | ||
52 | u_int32_t eyeCatcher = 0xc8a5d9c4; | ||
53 | u_int32_t naca = 0; | ||
54 | u_int32_t xRamDisk = 0; | ||
55 | u_int32_t xRamDiskSize = 0; | ||
56 | if ( argc < 2 ) { | ||
57 | printf("Name of RAM disk file missing.\n"); | ||
58 | exit(1); | ||
59 | } | ||
60 | |||
61 | if ( argc < 3 ) { | ||
62 | printf("Name of vmlinux file missing.\n"); | ||
63 | exit(1); | ||
64 | } | ||
65 | |||
66 | if ( argc < 4 ) { | ||
67 | printf("Name of vmlinux output file missing.\n"); | ||
68 | exit(1); | ||
69 | } | ||
70 | |||
71 | ramDisk = fopen(argv[1], "r"); | ||
72 | if ( ! ramDisk ) { | ||
73 | printf("RAM disk file \"%s\" failed to open.\n", argv[1]); | ||
74 | exit(1); | ||
75 | } | ||
76 | inputVmlinux = fopen(argv[2], "r"); | ||
77 | if ( ! inputVmlinux ) { | ||
78 | printf("vmlinux file \"%s\" failed to open.\n", argv[2]); | ||
79 | exit(1); | ||
80 | } | ||
81 | outputVmlinux = fopen(argv[3], "w+"); | ||
82 | if ( ! outputVmlinux ) { | ||
83 | printf("output vmlinux file \"%s\" failed to open.\n", argv[3]); | ||
84 | exit(1); | ||
85 | } | ||
86 | fseek(ramDisk, 0, SEEK_END); | ||
87 | ramFileLen = ftell(ramDisk); | ||
88 | fseek(ramDisk, 0, SEEK_SET); | ||
89 | printf("%s file size = %d\n", argv[1], ramFileLen); | ||
90 | |||
91 | ramLen = ramFileLen; | ||
92 | |||
93 | roundR = 4096 - (ramLen % 4096); | ||
94 | if ( roundR ) { | ||
95 | printf("Rounding RAM disk file up to a multiple of 4096, adding %d\n", roundR); | ||
96 | ramLen += roundR; | ||
97 | } | ||
98 | |||
99 | printf("Rounded RAM disk size is %d\n", ramLen); | ||
100 | fseek(inputVmlinux, 0, SEEK_END); | ||
101 | kernelLen = ftell(inputVmlinux); | ||
102 | fseek(inputVmlinux, 0, SEEK_SET); | ||
103 | printf("kernel file size = %d\n", kernelLen); | ||
104 | if ( kernelLen == 0 ) { | ||
105 | printf("You must have a linux kernel specified as argv[2]\n"); | ||
106 | exit(1); | ||
107 | } | ||
108 | |||
109 | actualKernelLen = kernelLen - ElfHeaderSize; | ||
110 | |||
111 | printf("actual kernel length (minus ELF header) = %d\n", actualKernelLen); | ||
112 | |||
113 | round = actualKernelLen % 4096; | ||
114 | roundedKernelLen = actualKernelLen; | ||
115 | if ( round ) | ||
116 | roundedKernelLen += (4096 - round); | ||
117 | |||
118 | printf("actual kernel length rounded up to a 4k multiple = %d\n", roundedKernelLen); | ||
119 | |||
120 | ramStartOffs = roundedKernelLen; | ||
121 | ramPages = ramLen / 4096; | ||
122 | |||
123 | printf("RAM disk pages to copy = %d\n", ramPages); | ||
124 | |||
125 | // Copy 64K ELF header | ||
126 | for (i=0; i<(ElfPages); ++i) { | ||
127 | get4k( inputVmlinux, inbuf ); | ||
128 | put4k( outputVmlinux, inbuf ); | ||
129 | } | ||
130 | |||
131 | roundedKernelPages = roundedKernelLen / 4096; | ||
132 | |||
133 | fseek(inputVmlinux, ElfHeaderSize, SEEK_SET); | ||
134 | |||
135 | for ( i=0; i<roundedKernelPages; ++i ) { | ||
136 | get4k( inputVmlinux, inbuf ); | ||
137 | put4k( outputVmlinux, inbuf ); | ||
138 | } | ||
139 | |||
140 | for ( i=0; i<ramPages; ++i ) { | ||
141 | get4k( ramDisk, inbuf ); | ||
142 | put4k( outputVmlinux, inbuf ); | ||
143 | } | ||
144 | |||
145 | /* Close the input files */ | ||
146 | fclose(ramDisk); | ||
147 | fclose(inputVmlinux); | ||
148 | /* And flush the written output file */ | ||
149 | fflush(outputVmlinux); | ||
150 | |||
151 | /* fseek to the hvReleaseData pointer */ | ||
152 | fseek(outputVmlinux, ElfHeaderSize + 0x24, SEEK_SET); | ||
153 | if (fread(&hvReleaseData, 4, 1, outputVmlinux) != 1) { | ||
154 | death("Could not read hvReleaseData pointer\n", outputVmlinux, argv[3]); | ||
155 | } | ||
156 | hvReleaseData = ntohl(hvReleaseData); /* Convert to native int */ | ||
157 | printf("hvReleaseData is at %08x\n", hvReleaseData); | ||
158 | |||
159 | /* fseek to the hvReleaseData */ | ||
160 | fseek(outputVmlinux, ElfHeaderSize + hvReleaseData, SEEK_SET); | ||
161 | if (fread(inbuf, 0x40, 1, outputVmlinux) != 1) { | ||
162 | death("Could not read hvReleaseData\n", outputVmlinux, argv[3]); | ||
163 | } | ||
164 | /* Check hvReleaseData sanity */ | ||
165 | if (memcmp(inbuf, &eyeCatcher, 4) != 0) { | ||
166 | death("hvReleaseData is invalid\n", outputVmlinux, argv[3]); | ||
167 | } | ||
168 | /* Get the naca pointer */ | ||
169 | naca = ntohl(*((u_int32_t *) &inbuf[0x0c])) - KERNELBASE; | ||
170 | printf("naca is at %08x\n", naca); | ||
171 | |||
172 | /* fseek to the naca */ | ||
173 | fseek(outputVmlinux, ElfHeaderSize + naca, SEEK_SET); | ||
174 | if (fread(inbuf, 0x18, 1, outputVmlinux) != 1) { | ||
175 | death("Could not read naca\n", outputVmlinux, argv[3]); | ||
176 | } | ||
177 | xRamDisk = ntohl(*((u_int32_t *) &inbuf[0x0c])); | ||
178 | xRamDiskSize = ntohl(*((u_int32_t *) &inbuf[0x14])); | ||
179 | /* Make sure a RAM disk isn't already present */ | ||
180 | if ((xRamDisk != 0) || (xRamDiskSize != 0)) { | ||
181 | death("RAM disk is already attached to this kernel\n", outputVmlinux, argv[3]); | ||
182 | } | ||
183 | /* Fill in the values */ | ||
184 | *((u_int32_t *) &inbuf[0x0c]) = htonl(ramStartOffs); | ||
185 | *((u_int32_t *) &inbuf[0x14]) = htonl(ramPages); | ||
186 | |||
187 | /* Write out the new naca */ | ||
188 | fflush(outputVmlinux); | ||
189 | fseek(outputVmlinux, ElfHeaderSize + naca, SEEK_SET); | ||
190 | if (fwrite(inbuf, 0x18, 1, outputVmlinux) != 1) { | ||
191 | death("Could not write naca\n", outputVmlinux, argv[3]); | ||
192 | } | ||
193 | printf("RAM Disk of 0x%x pages size is attached to the kernel at offset 0x%08x\n", | ||
194 | ramPages, ramStartOffs); | ||
195 | |||
196 | /* Done */ | ||
197 | fclose(outputVmlinux); | ||
198 | /* Set permission to executable */ | ||
199 | chmod(argv[3], S_IRUSR|S_IWUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH); | ||
200 | |||
201 | return 0; | ||
202 | } | ||
203 | |||
diff --git a/arch/ppc64/Kconfig b/arch/ppc64/Kconfig index 2ce87836c671..13b262f10216 100644 --- a/arch/ppc64/Kconfig +++ b/arch/ppc64/Kconfig | |||
@@ -302,12 +302,6 @@ config GENERIC_HARDIRQS | |||
302 | bool | 302 | bool |
303 | default y | 303 | default y |
304 | 304 | ||
305 | config MSCHUNKS | ||
306 | bool | ||
307 | depends on PPC_ISERIES | ||
308 | default y | ||
309 | |||
310 | |||
311 | config PPC_RTAS | 305 | config PPC_RTAS |
312 | bool | 306 | bool |
313 | depends on PPC_PSERIES || PPC_BPA | 307 | depends on PPC_PSERIES || PPC_BPA |
@@ -350,13 +344,46 @@ config SECCOMP | |||
350 | 344 | ||
351 | If unsure, say Y. Only embedded should say N here. | 345 | If unsure, say Y. Only embedded should say N here. |
352 | 346 | ||
347 | source "fs/Kconfig.binfmt" | ||
348 | |||
349 | config HOTPLUG_CPU | ||
350 | bool "Support for hot-pluggable CPUs" | ||
351 | depends on SMP && EXPERIMENTAL && (PPC_PSERIES || PPC_PMAC) | ||
352 | select HOTPLUG | ||
353 | ---help--- | ||
354 | Say Y here to be able to turn CPUs off and on. | ||
355 | |||
356 | Say N if you are unsure. | ||
357 | |||
358 | config PROC_DEVICETREE | ||
359 | bool "Support for Open Firmware device tree in /proc" | ||
360 | depends on !PPC_ISERIES | ||
361 | help | ||
362 | This option adds a device-tree directory under /proc which contains | ||
363 | an image of the device tree that the kernel copies from Open | ||
364 | Firmware. If unsure, say Y here. | ||
365 | |||
366 | config CMDLINE_BOOL | ||
367 | bool "Default bootloader kernel arguments" | ||
368 | depends on !PPC_ISERIES | ||
369 | |||
370 | config CMDLINE | ||
371 | string "Initial kernel command string" | ||
372 | depends on CMDLINE_BOOL | ||
373 | default "console=ttyS0,9600 console=tty0 root=/dev/sda2" | ||
374 | help | ||
375 | On some platforms, there is currently no way for the boot loader to | ||
376 | pass arguments to the kernel. For these platforms, you can supply | ||
377 | some command-line options at build time by entering them here. In | ||
378 | most cases you will need to specify the root device here. | ||
379 | |||
353 | endmenu | 380 | endmenu |
354 | 381 | ||
355 | config ISA_DMA_API | 382 | config ISA_DMA_API |
356 | bool | 383 | bool |
357 | default y | 384 | default y |
358 | 385 | ||
359 | menu "General setup" | 386 | menu "Bus Options" |
360 | 387 | ||
361 | config ISA | 388 | config ISA |
362 | bool | 389 | bool |
@@ -389,45 +416,12 @@ config PCI_DOMAINS | |||
389 | bool | 416 | bool |
390 | default PCI | 417 | default PCI |
391 | 418 | ||
392 | source "fs/Kconfig.binfmt" | ||
393 | |||
394 | source "drivers/pci/Kconfig" | 419 | source "drivers/pci/Kconfig" |
395 | 420 | ||
396 | config HOTPLUG_CPU | ||
397 | bool "Support for hot-pluggable CPUs" | ||
398 | depends on SMP && EXPERIMENTAL && (PPC_PSERIES || PPC_PMAC) | ||
399 | select HOTPLUG | ||
400 | ---help--- | ||
401 | Say Y here to be able to turn CPUs off and on. | ||
402 | |||
403 | Say N if you are unsure. | ||
404 | |||
405 | source "drivers/pcmcia/Kconfig" | 421 | source "drivers/pcmcia/Kconfig" |
406 | 422 | ||
407 | source "drivers/pci/hotplug/Kconfig" | 423 | source "drivers/pci/hotplug/Kconfig" |
408 | 424 | ||
409 | config PROC_DEVICETREE | ||
410 | bool "Support for Open Firmware device tree in /proc" | ||
411 | depends on !PPC_ISERIES | ||
412 | help | ||
413 | This option adds a device-tree directory under /proc which contains | ||
414 | an image of the device tree that the kernel copies from Open | ||
415 | Firmware. If unsure, say Y here. | ||
416 | |||
417 | config CMDLINE_BOOL | ||
418 | bool "Default bootloader kernel arguments" | ||
419 | depends on !PPC_ISERIES | ||
420 | |||
421 | config CMDLINE | ||
422 | string "Initial kernel command string" | ||
423 | depends on CMDLINE_BOOL | ||
424 | default "console=ttyS0,9600 console=tty0 root=/dev/sda2" | ||
425 | help | ||
426 | On some platforms, there is currently no way for the boot loader to | ||
427 | pass arguments to the kernel. For these platforms, you can supply | ||
428 | some command-line options at build time by entering them here. In | ||
429 | most cases you will need to specify the root device here. | ||
430 | |||
431 | endmenu | 425 | endmenu |
432 | 426 | ||
433 | source "net/Kconfig" | 427 | source "net/Kconfig" |
diff --git a/arch/ppc64/Makefile b/arch/ppc64/Makefile index 731b84758331..6350cce82efb 100644 --- a/arch/ppc64/Makefile +++ b/arch/ppc64/Makefile | |||
@@ -55,6 +55,8 @@ LDFLAGS := -m elf64ppc | |||
55 | LDFLAGS_vmlinux := -Bstatic -e $(KERNELLOAD) -Ttext $(KERNELLOAD) | 55 | LDFLAGS_vmlinux := -Bstatic -e $(KERNELLOAD) -Ttext $(KERNELLOAD) |
56 | CFLAGS += -msoft-float -pipe -mminimal-toc -mtraceback=none \ | 56 | CFLAGS += -msoft-float -pipe -mminimal-toc -mtraceback=none \ |
57 | -mcall-aixdesc | 57 | -mcall-aixdesc |
58 | # Temporary hack until we have migrated to asm-powerpc | ||
59 | CPPFLAGS += -Iinclude3 | ||
58 | 60 | ||
59 | GCC_VERSION := $(call cc-version) | 61 | GCC_VERSION := $(call cc-version) |
60 | GCC_BROKEN_VEC := $(shell if [ $(GCC_VERSION) -lt 0400 ] ; then echo "y"; fi ;) | 62 | GCC_BROKEN_VEC := $(shell if [ $(GCC_VERSION) -lt 0400 ] ; then echo "y"; fi ;) |
@@ -112,6 +114,7 @@ all: $(KBUILD_IMAGE) | |||
112 | 114 | ||
113 | archclean: | 115 | archclean: |
114 | $(Q)$(MAKE) $(clean)=$(boot) | 116 | $(Q)$(MAKE) $(clean)=$(boot) |
117 | $(Q)rm -rf include3 | ||
115 | 118 | ||
116 | prepare: include/asm-ppc64/offsets.h | 119 | prepare: include/asm-ppc64/offsets.h |
117 | 120 | ||
@@ -121,6 +124,12 @@ arch/ppc64/kernel/asm-offsets.s: include/asm include/linux/version.h \ | |||
121 | include/asm-ppc64/offsets.h: arch/ppc64/kernel/asm-offsets.s | 124 | include/asm-ppc64/offsets.h: arch/ppc64/kernel/asm-offsets.s |
122 | $(call filechk,gen-asm-offsets) | 125 | $(call filechk,gen-asm-offsets) |
123 | 126 | ||
127 | # Temporary hack until we have migrated to asm-powerpc | ||
128 | include/asm: include3/asm | ||
129 | include3/asm: | ||
130 | $(Q)if [ ! -d include3 ]; then mkdir -p include3; fi; | ||
131 | $(Q)ln -fsn $(srctree)/include/asm-powerpc include3/asm | ||
132 | |||
124 | define archhelp | 133 | define archhelp |
125 | echo '* zImage - Compressed kernel image (arch/$(ARCH)/boot/zImage)' | 134 | echo '* zImage - Compressed kernel image (arch/$(ARCH)/boot/zImage)' |
126 | echo ' zImage.initrd- Compressed kernel image with initrd attached,' | 135 | echo ' zImage.initrd- Compressed kernel image with initrd attached,' |
diff --git a/arch/ppc64/boot/Makefile b/arch/ppc64/boot/Makefile index 683b2d43c15f..2c5f5e73d00c 100644 --- a/arch/ppc64/boot/Makefile +++ b/arch/ppc64/boot/Makefile | |||
@@ -22,8 +22,8 @@ | |||
22 | 22 | ||
23 | 23 | ||
24 | HOSTCC := gcc | 24 | HOSTCC := gcc |
25 | BOOTCFLAGS := $(HOSTCFLAGS) $(LINUXINCLUDE) -fno-builtin | 25 | BOOTCFLAGS := $(HOSTCFLAGS) -fno-builtin -nostdinc -isystem $(shell $(CROSS32CC) -print-file-name=include) |
26 | BOOTAFLAGS := -D__ASSEMBLY__ $(BOOTCFLAGS) -traditional | 26 | BOOTAFLAGS := -D__ASSEMBLY__ $(BOOTCFLAGS) -traditional -nostdinc |
27 | BOOTLFLAGS := -Ttext 0x00400000 -e _start -T $(srctree)/$(src)/zImage.lds | 27 | BOOTLFLAGS := -Ttext 0x00400000 -e _start -T $(srctree)/$(src)/zImage.lds |
28 | OBJCOPYFLAGS := contents,alloc,load,readonly,data | 28 | OBJCOPYFLAGS := contents,alloc,load,readonly,data |
29 | 29 | ||
diff --git a/arch/ppc64/boot/addnote.c b/arch/ppc64/boot/addnote.c index 719663a694bb..8041a9845ab7 100644 --- a/arch/ppc64/boot/addnote.c +++ b/arch/ppc64/boot/addnote.c | |||
@@ -157,7 +157,7 @@ main(int ac, char **av) | |||
157 | PUT_32BE(ns, strlen(arch) + 1); | 157 | PUT_32BE(ns, strlen(arch) + 1); |
158 | PUT_32BE(ns + 4, N_DESCR * 4); | 158 | PUT_32BE(ns + 4, N_DESCR * 4); |
159 | PUT_32BE(ns + 8, 0x1275); | 159 | PUT_32BE(ns + 8, 0x1275); |
160 | strcpy(&buf[ns + 12], arch); | 160 | strcpy((char *) &buf[ns + 12], arch); |
161 | ns += 12 + strlen(arch) + 1; | 161 | ns += 12 + strlen(arch) + 1; |
162 | for (i = 0; i < N_DESCR; ++i, ns += 4) | 162 | for (i = 0; i < N_DESCR; ++i, ns += 4) |
163 | PUT_32BE(ns, descr[i]); | 163 | PUT_32BE(ns, descr[i]); |
@@ -172,7 +172,7 @@ main(int ac, char **av) | |||
172 | PUT_32BE(ns, strlen(rpaname) + 1); | 172 | PUT_32BE(ns, strlen(rpaname) + 1); |
173 | PUT_32BE(ns + 4, sizeof(rpanote)); | 173 | PUT_32BE(ns + 4, sizeof(rpanote)); |
174 | PUT_32BE(ns + 8, 0x12759999); | 174 | PUT_32BE(ns + 8, 0x12759999); |
175 | strcpy(&buf[ns + 12], rpaname); | 175 | strcpy((char *) &buf[ns + 12], rpaname); |
176 | ns += 12 + ROUNDUP(strlen(rpaname) + 1); | 176 | ns += 12 + ROUNDUP(strlen(rpaname) + 1); |
177 | for (i = 0; i < N_RPA_DESCR; ++i, ns += 4) | 177 | for (i = 0; i < N_RPA_DESCR; ++i, ns += 4) |
178 | PUT_32BE(ns, rpanote[i]); | 178 | PUT_32BE(ns, rpanote[i]); |
diff --git a/arch/ppc64/boot/crt0.S b/arch/ppc64/boot/crt0.S index 04d3e74cd72f..3861e7f9cf19 100644 --- a/arch/ppc64/boot/crt0.S +++ b/arch/ppc64/boot/crt0.S | |||
@@ -9,7 +9,7 @@ | |||
9 | * NOTE: this code runs in 32 bit mode and is packaged as ELF32. | 9 | * NOTE: this code runs in 32 bit mode and is packaged as ELF32. |
10 | */ | 10 | */ |
11 | 11 | ||
12 | #include <asm/ppc_asm.h> | 12 | #include "ppc_asm.h" |
13 | 13 | ||
14 | .text | 14 | .text |
15 | .globl _start | 15 | .globl _start |
diff --git a/arch/ppc64/boot/div64.S b/arch/ppc64/boot/div64.S index 38f7e466d7d6..722f360a32a9 100644 --- a/arch/ppc64/boot/div64.S +++ b/arch/ppc64/boot/div64.S | |||
@@ -13,7 +13,7 @@ | |||
13 | * as published by the Free Software Foundation; either version | 13 | * as published by the Free Software Foundation; either version |
14 | * 2 of the License, or (at your option) any later version. | 14 | * 2 of the License, or (at your option) any later version. |
15 | */ | 15 | */ |
16 | #include <asm/ppc_asm.h> | 16 | #include "ppc_asm.h" |
17 | 17 | ||
18 | .globl __div64_32 | 18 | .globl __div64_32 |
19 | __div64_32: | 19 | __div64_32: |
diff --git a/arch/ppc64/boot/elf.h b/arch/ppc64/boot/elf.h new file mode 100644 index 000000000000..d4828fcf1cb9 --- /dev/null +++ b/arch/ppc64/boot/elf.h | |||
@@ -0,0 +1,149 @@ | |||
1 | #ifndef _PPC_BOOT_ELF_H_ | ||
2 | #define _PPC_BOOT_ELF_H_ | ||
3 | |||
4 | /* 32-bit ELF base types. */ | ||
5 | typedef unsigned int Elf32_Addr; | ||
6 | typedef unsigned short Elf32_Half; | ||
7 | typedef unsigned int Elf32_Off; | ||
8 | typedef signed int Elf32_Sword; | ||
9 | typedef unsigned int Elf32_Word; | ||
10 | |||
11 | /* 64-bit ELF base types. */ | ||
12 | typedef unsigned long long Elf64_Addr; | ||
13 | typedef unsigned short Elf64_Half; | ||
14 | typedef signed short Elf64_SHalf; | ||
15 | typedef unsigned long long Elf64_Off; | ||
16 | typedef signed int Elf64_Sword; | ||
17 | typedef unsigned int Elf64_Word; | ||
18 | typedef unsigned long long Elf64_Xword; | ||
19 | typedef signed long long Elf64_Sxword; | ||
20 | |||
21 | /* These constants are for the segment types stored in the image headers */ | ||
22 | #define PT_NULL 0 | ||
23 | #define PT_LOAD 1 | ||
24 | #define PT_DYNAMIC 2 | ||
25 | #define PT_INTERP 3 | ||
26 | #define PT_NOTE 4 | ||
27 | #define PT_SHLIB 5 | ||
28 | #define PT_PHDR 6 | ||
29 | #define PT_TLS 7 /* Thread local storage segment */ | ||
30 | #define PT_LOOS 0x60000000 /* OS-specific */ | ||
31 | #define PT_HIOS 0x6fffffff /* OS-specific */ | ||
32 | #define PT_LOPROC 0x70000000 | ||
33 | #define PT_HIPROC 0x7fffffff | ||
34 | #define PT_GNU_EH_FRAME 0x6474e550 | ||
35 | |||
36 | #define PT_GNU_STACK (PT_LOOS + 0x474e551) | ||
37 | |||
38 | /* These constants define the different elf file types */ | ||
39 | #define ET_NONE 0 | ||
40 | #define ET_REL 1 | ||
41 | #define ET_EXEC 2 | ||
42 | #define ET_DYN 3 | ||
43 | #define ET_CORE 4 | ||
44 | #define ET_LOPROC 0xff00 | ||
45 | #define ET_HIPROC 0xffff | ||
46 | |||
47 | /* These constants define the various ELF target machines */ | ||
48 | #define EM_NONE 0 | ||
49 | #define EM_PPC 20 /* PowerPC */ | ||
50 | #define EM_PPC64 21 /* PowerPC64 */ | ||
51 | |||
52 | #define EI_NIDENT 16 | ||
53 | |||
54 | typedef struct elf32_hdr { | ||
55 | unsigned char e_ident[EI_NIDENT]; | ||
56 | Elf32_Half e_type; | ||
57 | Elf32_Half e_machine; | ||
58 | Elf32_Word e_version; | ||
59 | Elf32_Addr e_entry; /* Entry point */ | ||
60 | Elf32_Off e_phoff; | ||
61 | Elf32_Off e_shoff; | ||
62 | Elf32_Word e_flags; | ||
63 | Elf32_Half e_ehsize; | ||
64 | Elf32_Half e_phentsize; | ||
65 | Elf32_Half e_phnum; | ||
66 | Elf32_Half e_shentsize; | ||
67 | Elf32_Half e_shnum; | ||
68 | Elf32_Half e_shstrndx; | ||
69 | } Elf32_Ehdr; | ||
70 | |||
71 | typedef struct elf64_hdr { | ||
72 | unsigned char e_ident[16]; /* ELF "magic number" */ | ||
73 | Elf64_Half e_type; | ||
74 | Elf64_Half e_machine; | ||
75 | Elf64_Word e_version; | ||
76 | Elf64_Addr e_entry; /* Entry point virtual address */ | ||
77 | Elf64_Off e_phoff; /* Program header table file offset */ | ||
78 | Elf64_Off e_shoff; /* Section header table file offset */ | ||
79 | Elf64_Word e_flags; | ||
80 | Elf64_Half e_ehsize; | ||
81 | Elf64_Half e_phentsize; | ||
82 | Elf64_Half e_phnum; | ||
83 | Elf64_Half e_shentsize; | ||
84 | Elf64_Half e_shnum; | ||
85 | Elf64_Half e_shstrndx; | ||
86 | } Elf64_Ehdr; | ||
87 | |||
88 | /* These constants define the permissions on sections in the program | ||
89 | header, p_flags. */ | ||
90 | #define PF_R 0x4 | ||
91 | #define PF_W 0x2 | ||
92 | #define PF_X 0x1 | ||
93 | |||
94 | typedef struct elf32_phdr { | ||
95 | Elf32_Word p_type; | ||
96 | Elf32_Off p_offset; | ||
97 | Elf32_Addr p_vaddr; | ||
98 | Elf32_Addr p_paddr; | ||
99 | Elf32_Word p_filesz; | ||
100 | Elf32_Word p_memsz; | ||
101 | Elf32_Word p_flags; | ||
102 | Elf32_Word p_align; | ||
103 | } Elf32_Phdr; | ||
104 | |||
105 | typedef struct elf64_phdr { | ||
106 | Elf64_Word p_type; | ||
107 | Elf64_Word p_flags; | ||
108 | Elf64_Off p_offset; /* Segment file offset */ | ||
109 | Elf64_Addr p_vaddr; /* Segment virtual address */ | ||
110 | Elf64_Addr p_paddr; /* Segment physical address */ | ||
111 | Elf64_Xword p_filesz; /* Segment size in file */ | ||
112 | Elf64_Xword p_memsz; /* Segment size in memory */ | ||
113 | Elf64_Xword p_align; /* Segment alignment, file & memory */ | ||
114 | } Elf64_Phdr; | ||
115 | |||
116 | #define EI_MAG0 0 /* e_ident[] indexes */ | ||
117 | #define EI_MAG1 1 | ||
118 | #define EI_MAG2 2 | ||
119 | #define EI_MAG3 3 | ||
120 | #define EI_CLASS 4 | ||
121 | #define EI_DATA 5 | ||
122 | #define EI_VERSION 6 | ||
123 | #define EI_OSABI 7 | ||
124 | #define EI_PAD 8 | ||
125 | |||
126 | #define ELFMAG0 0x7f /* EI_MAG */ | ||
127 | #define ELFMAG1 'E' | ||
128 | #define ELFMAG2 'L' | ||
129 | #define ELFMAG3 'F' | ||
130 | #define ELFMAG "\177ELF" | ||
131 | #define SELFMAG 4 | ||
132 | |||
133 | #define ELFCLASSNONE 0 /* EI_CLASS */ | ||
134 | #define ELFCLASS32 1 | ||
135 | #define ELFCLASS64 2 | ||
136 | #define ELFCLASSNUM 3 | ||
137 | |||
138 | #define ELFDATANONE 0 /* e_ident[EI_DATA] */ | ||
139 | #define ELFDATA2LSB 1 | ||
140 | #define ELFDATA2MSB 2 | ||
141 | |||
142 | #define EV_NONE 0 /* e_version, EI_VERSION */ | ||
143 | #define EV_CURRENT 1 | ||
144 | #define EV_NUM 2 | ||
145 | |||
146 | #define ELFOSABI_NONE 0 | ||
147 | #define ELFOSABI_LINUX 3 | ||
148 | |||
149 | #endif /* _PPC_BOOT_ELF_H_ */ | ||
diff --git a/arch/ppc64/boot/main.c b/arch/ppc64/boot/main.c index 199d9804f61c..99e68cfbe688 100644 --- a/arch/ppc64/boot/main.c +++ b/arch/ppc64/boot/main.c | |||
@@ -8,36 +8,28 @@ | |||
8 | * as published by the Free Software Foundation; either version | 8 | * as published by the Free Software Foundation; either version |
9 | * 2 of the License, or (at your option) any later version. | 9 | * 2 of the License, or (at your option) any later version. |
10 | */ | 10 | */ |
11 | #include "ppc32-types.h" | 11 | #include <stdarg.h> |
12 | #include <stddef.h> | ||
13 | #include "elf.h" | ||
14 | #include "page.h" | ||
15 | #include "string.h" | ||
16 | #include "stdio.h" | ||
17 | #include "prom.h" | ||
12 | #include "zlib.h" | 18 | #include "zlib.h" |
13 | #include <linux/elf.h> | 19 | |
14 | #include <linux/string.h> | 20 | static void gunzip(void *, int, unsigned char *, int *); |
15 | #include <asm/processor.h> | 21 | extern void flush_cache(void *, unsigned long); |
16 | #include <asm/page.h> | 22 | |
17 | |||
18 | extern void *finddevice(const char *); | ||
19 | extern int getprop(void *, const char *, void *, int); | ||
20 | extern void printf(const char *fmt, ...); | ||
21 | extern int sprintf(char *buf, const char *fmt, ...); | ||
22 | void gunzip(void *, int, unsigned char *, int *); | ||
23 | void *claim(unsigned int, unsigned int, unsigned int); | ||
24 | void flush_cache(void *, unsigned long); | ||
25 | void pause(void); | ||
26 | extern void exit(void); | ||
27 | |||
28 | unsigned long strlen(const char *s); | ||
29 | void *memmove(void *dest, const void *src, unsigned long n); | ||
30 | void *memcpy(void *dest, const void *src, unsigned long n); | ||
31 | 23 | ||
32 | /* Value picked to match that used by yaboot */ | 24 | /* Value picked to match that used by yaboot */ |
33 | #define PROG_START 0x01400000 | 25 | #define PROG_START 0x01400000 |
34 | #define RAM_END (256<<20) // Fixme: use OF */ | 26 | #define RAM_END (256<<20) // Fixme: use OF */ |
35 | 27 | ||
36 | char *avail_ram; | 28 | static char *avail_ram; |
37 | char *begin_avail, *end_avail; | 29 | static char *begin_avail, *end_avail; |
38 | char *avail_high; | 30 | static char *avail_high; |
39 | unsigned int heap_use; | 31 | static unsigned int heap_use; |
40 | unsigned int heap_max; | 32 | static unsigned int heap_max; |
41 | 33 | ||
42 | extern char _start[]; | 34 | extern char _start[]; |
43 | extern char _vmlinux_start[]; | 35 | extern char _vmlinux_start[]; |
@@ -52,9 +44,9 @@ struct addr_range { | |||
52 | unsigned long size; | 44 | unsigned long size; |
53 | unsigned long memsize; | 45 | unsigned long memsize; |
54 | }; | 46 | }; |
55 | struct addr_range vmlinux = {0, 0, 0}; | 47 | static struct addr_range vmlinux = {0, 0, 0}; |
56 | struct addr_range vmlinuz = {0, 0, 0}; | 48 | static struct addr_range vmlinuz = {0, 0, 0}; |
57 | struct addr_range initrd = {0, 0, 0}; | 49 | static struct addr_range initrd = {0, 0, 0}; |
58 | 50 | ||
59 | static char scratch[128<<10]; /* 128kB of scratch space for gunzip */ | 51 | static char scratch[128<<10]; /* 128kB of scratch space for gunzip */ |
60 | 52 | ||
@@ -64,13 +56,6 @@ typedef void (*kernel_entry_t)( unsigned long, | |||
64 | void *); | 56 | void *); |
65 | 57 | ||
66 | 58 | ||
67 | int (*prom)(void *); | ||
68 | |||
69 | void *chosen_handle; | ||
70 | void *stdin; | ||
71 | void *stdout; | ||
72 | void *stderr; | ||
73 | |||
74 | #undef DEBUG | 59 | #undef DEBUG |
75 | 60 | ||
76 | static unsigned long claim_base = PROG_START; | 61 | static unsigned long claim_base = PROG_START; |
@@ -277,7 +262,7 @@ void zfree(void *x, void *addr, unsigned nb) | |||
277 | 262 | ||
278 | #define DEFLATED 8 | 263 | #define DEFLATED 8 |
279 | 264 | ||
280 | void gunzip(void *dst, int dstlen, unsigned char *src, int *lenp) | 265 | static void gunzip(void *dst, int dstlen, unsigned char *src, int *lenp) |
281 | { | 266 | { |
282 | z_stream s; | 267 | z_stream s; |
283 | int r, i, flags; | 268 | int r, i, flags; |
diff --git a/arch/ppc64/boot/page.h b/arch/ppc64/boot/page.h new file mode 100644 index 000000000000..14eca30fef64 --- /dev/null +++ b/arch/ppc64/boot/page.h | |||
@@ -0,0 +1,34 @@ | |||
1 | #ifndef _PPC_BOOT_PAGE_H | ||
2 | #define _PPC_BOOT_PAGE_H | ||
3 | /* | ||
4 | * Copyright (C) 2001 PPC64 Team, IBM Corp | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #ifdef __ASSEMBLY__ | ||
13 | #define ASM_CONST(x) x | ||
14 | #else | ||
15 | #define __ASM_CONST(x) x##UL | ||
16 | #define ASM_CONST(x) __ASM_CONST(x) | ||
17 | #endif | ||
18 | |||
19 | /* PAGE_SHIFT determines the page size */ | ||
20 | #define PAGE_SHIFT 12 | ||
21 | #define PAGE_SIZE (ASM_CONST(1) << PAGE_SHIFT) | ||
22 | #define PAGE_MASK (~(PAGE_SIZE-1)) | ||
23 | |||
24 | /* align addr on a size boundary - adjust address up/down if needed */ | ||
25 | #define _ALIGN_UP(addr,size) (((addr)+((size)-1))&(~((size)-1))) | ||
26 | #define _ALIGN_DOWN(addr,size) ((addr)&(~((size)-1))) | ||
27 | |||
28 | /* align addr on a size boundary - adjust address up if needed */ | ||
29 | #define _ALIGN(addr,size) _ALIGN_UP(addr,size) | ||
30 | |||
31 | /* to align the pointer to the (next) page boundary */ | ||
32 | #define PAGE_ALIGN(addr) _ALIGN(addr, PAGE_SIZE) | ||
33 | |||
34 | #endif /* _PPC_BOOT_PAGE_H */ | ||
diff --git a/arch/ppc64/boot/ppc32-types.h b/arch/ppc64/boot/ppc32-types.h deleted file mode 100644 index f7b8884f8f70..000000000000 --- a/arch/ppc64/boot/ppc32-types.h +++ /dev/null | |||
@@ -1,36 +0,0 @@ | |||
1 | #ifndef _PPC64_TYPES_H | ||
2 | #define _PPC64_TYPES_H | ||
3 | |||
4 | typedef __signed__ char __s8; | ||
5 | typedef unsigned char __u8; | ||
6 | |||
7 | typedef __signed__ short __s16; | ||
8 | typedef unsigned short __u16; | ||
9 | |||
10 | typedef __signed__ int __s32; | ||
11 | typedef unsigned int __u32; | ||
12 | |||
13 | typedef __signed__ long long __s64; | ||
14 | typedef unsigned long long __u64; | ||
15 | |||
16 | typedef signed char s8; | ||
17 | typedef unsigned char u8; | ||
18 | |||
19 | typedef signed short s16; | ||
20 | typedef unsigned short u16; | ||
21 | |||
22 | typedef signed int s32; | ||
23 | typedef unsigned int u32; | ||
24 | |||
25 | typedef signed long long s64; | ||
26 | typedef unsigned long long u64; | ||
27 | |||
28 | typedef struct { | ||
29 | __u32 u[4]; | ||
30 | } __attribute((aligned(16))) __vector128; | ||
31 | |||
32 | #define BITS_PER_LONG 32 | ||
33 | |||
34 | typedef __vector128 vector128; | ||
35 | |||
36 | #endif /* _PPC64_TYPES_H */ | ||
diff --git a/arch/ppc64/boot/ppc_asm.h b/arch/ppc64/boot/ppc_asm.h new file mode 100644 index 000000000000..1c2c2817f9b7 --- /dev/null +++ b/arch/ppc64/boot/ppc_asm.h | |||
@@ -0,0 +1,62 @@ | |||
1 | #ifndef _PPC64_PPC_ASM_H | ||
2 | #define _PPC64_PPC_ASM_H | ||
3 | /* | ||
4 | * | ||
5 | * Definitions used by various bits of low-level assembly code on PowerPC. | ||
6 | * | ||
7 | * Copyright (C) 1995-1999 Gary Thomas, Paul Mackerras, Cort Dougan. | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or | ||
10 | * modify it under the terms of the GNU General Public License | ||
11 | * as published by the Free Software Foundation; either version | ||
12 | * 2 of the License, or (at your option) any later version. | ||
13 | */ | ||
14 | |||
15 | /* Condition Register Bit Fields */ | ||
16 | |||
17 | #define cr0 0 | ||
18 | #define cr1 1 | ||
19 | #define cr2 2 | ||
20 | #define cr3 3 | ||
21 | #define cr4 4 | ||
22 | #define cr5 5 | ||
23 | #define cr6 6 | ||
24 | #define cr7 7 | ||
25 | |||
26 | |||
27 | /* General Purpose Registers (GPRs) */ | ||
28 | |||
29 | #define r0 0 | ||
30 | #define r1 1 | ||
31 | #define r2 2 | ||
32 | #define r3 3 | ||
33 | #define r4 4 | ||
34 | #define r5 5 | ||
35 | #define r6 6 | ||
36 | #define r7 7 | ||
37 | #define r8 8 | ||
38 | #define r9 9 | ||
39 | #define r10 10 | ||
40 | #define r11 11 | ||
41 | #define r12 12 | ||
42 | #define r13 13 | ||
43 | #define r14 14 | ||
44 | #define r15 15 | ||
45 | #define r16 16 | ||
46 | #define r17 17 | ||
47 | #define r18 18 | ||
48 | #define r19 19 | ||
49 | #define r20 20 | ||
50 | #define r21 21 | ||
51 | #define r22 22 | ||
52 | #define r23 23 | ||
53 | #define r24 24 | ||
54 | #define r25 25 | ||
55 | #define r26 26 | ||
56 | #define r27 27 | ||
57 | #define r28 28 | ||
58 | #define r29 29 | ||
59 | #define r30 30 | ||
60 | #define r31 31 | ||
61 | |||
62 | #endif /* _PPC64_PPC_ASM_H */ | ||
diff --git a/arch/ppc64/boot/prom.c b/arch/ppc64/boot/prom.c index 5e48b80ff5a0..4bea2f4dcb06 100644 --- a/arch/ppc64/boot/prom.c +++ b/arch/ppc64/boot/prom.c | |||
@@ -7,43 +7,19 @@ | |||
7 | * 2 of the License, or (at your option) any later version. | 7 | * 2 of the License, or (at your option) any later version. |
8 | */ | 8 | */ |
9 | #include <stdarg.h> | 9 | #include <stdarg.h> |
10 | #include <linux/types.h> | 10 | #include <stddef.h> |
11 | #include <linux/string.h> | 11 | #include "string.h" |
12 | #include <linux/ctype.h> | 12 | #include "stdio.h" |
13 | 13 | #include "prom.h" | |
14 | extern __u32 __div64_32(unsigned long long *dividend, __u32 divisor); | ||
15 | |||
16 | /* The unnecessary pointer compare is there | ||
17 | * to check for type safety (n must be 64bit) | ||
18 | */ | ||
19 | # define do_div(n,base) ({ \ | ||
20 | __u32 __base = (base); \ | ||
21 | __u32 __rem; \ | ||
22 | (void)(((typeof((n)) *)0) == ((unsigned long long *)0)); \ | ||
23 | if (((n) >> 32) == 0) { \ | ||
24 | __rem = (__u32)(n) % __base; \ | ||
25 | (n) = (__u32)(n) / __base; \ | ||
26 | } else \ | ||
27 | __rem = __div64_32(&(n), __base); \ | ||
28 | __rem; \ | ||
29 | }) | ||
30 | 14 | ||
31 | int (*prom)(void *); | 15 | int (*prom)(void *); |
32 | 16 | ||
33 | void *chosen_handle; | 17 | void *chosen_handle; |
18 | |||
34 | void *stdin; | 19 | void *stdin; |
35 | void *stdout; | 20 | void *stdout; |
36 | void *stderr; | 21 | void *stderr; |
37 | 22 | ||
38 | void exit(void); | ||
39 | void *finddevice(const char *name); | ||
40 | int getprop(void *phandle, const char *name, void *buf, int buflen); | ||
41 | void chrpboot(int a1, int a2, void *prom); /* in main.c */ | ||
42 | |||
43 | int printf(char *fmt, ...); | ||
44 | |||
45 | /* there is no convenient header to get this from... -- paulus */ | ||
46 | extern unsigned long strlen(const char *); | ||
47 | 23 | ||
48 | int | 24 | int |
49 | write(void *handle, void *ptr, int nb) | 25 | write(void *handle, void *ptr, int nb) |
@@ -210,107 +186,6 @@ fputs(char *str, void *f) | |||
210 | return write(f, str, n) == n? 0: -1; | 186 | return write(f, str, n) == n? 0: -1; |
211 | } | 187 | } |
212 | 188 | ||
213 | int | ||
214 | readchar(void) | ||
215 | { | ||
216 | char ch; | ||
217 | |||
218 | for (;;) { | ||
219 | switch (read(stdin, &ch, 1)) { | ||
220 | case 1: | ||
221 | return ch; | ||
222 | case -1: | ||
223 | printf("read(stdin) returned -1\r\n"); | ||
224 | return -1; | ||
225 | } | ||
226 | } | ||
227 | } | ||
228 | |||
229 | static char line[256]; | ||
230 | static char *lineptr; | ||
231 | static int lineleft; | ||
232 | |||
233 | int | ||
234 | getchar(void) | ||
235 | { | ||
236 | int c; | ||
237 | |||
238 | if (lineleft == 0) { | ||
239 | lineptr = line; | ||
240 | for (;;) { | ||
241 | c = readchar(); | ||
242 | if (c == -1 || c == 4) | ||
243 | break; | ||
244 | if (c == '\r' || c == '\n') { | ||
245 | *lineptr++ = '\n'; | ||
246 | putchar('\n'); | ||
247 | break; | ||
248 | } | ||
249 | switch (c) { | ||
250 | case 0177: | ||
251 | case '\b': | ||
252 | if (lineptr > line) { | ||
253 | putchar('\b'); | ||
254 | putchar(' '); | ||
255 | putchar('\b'); | ||
256 | --lineptr; | ||
257 | } | ||
258 | break; | ||
259 | case 'U' & 0x1F: | ||
260 | while (lineptr > line) { | ||
261 | putchar('\b'); | ||
262 | putchar(' '); | ||
263 | putchar('\b'); | ||
264 | --lineptr; | ||
265 | } | ||
266 | break; | ||
267 | default: | ||
268 | if (lineptr >= &line[sizeof(line) - 1]) | ||
269 | putchar('\a'); | ||
270 | else { | ||
271 | putchar(c); | ||
272 | *lineptr++ = c; | ||
273 | } | ||
274 | } | ||
275 | } | ||
276 | lineleft = lineptr - line; | ||
277 | lineptr = line; | ||
278 | } | ||
279 | if (lineleft == 0) | ||
280 | return -1; | ||
281 | --lineleft; | ||
282 | return *lineptr++; | ||
283 | } | ||
284 | |||
285 | |||
286 | |||
287 | /* String functions lifted from lib/vsprintf.c and lib/ctype.c */ | ||
288 | unsigned char _ctype[] = { | ||
289 | _C,_C,_C,_C,_C,_C,_C,_C, /* 0-7 */ | ||
290 | _C,_C|_S,_C|_S,_C|_S,_C|_S,_C|_S,_C,_C, /* 8-15 */ | ||
291 | _C,_C,_C,_C,_C,_C,_C,_C, /* 16-23 */ | ||
292 | _C,_C,_C,_C,_C,_C,_C,_C, /* 24-31 */ | ||
293 | _S|_SP,_P,_P,_P,_P,_P,_P,_P, /* 32-39 */ | ||
294 | _P,_P,_P,_P,_P,_P,_P,_P, /* 40-47 */ | ||
295 | _D,_D,_D,_D,_D,_D,_D,_D, /* 48-55 */ | ||
296 | _D,_D,_P,_P,_P,_P,_P,_P, /* 56-63 */ | ||
297 | _P,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U, /* 64-71 */ | ||
298 | _U,_U,_U,_U,_U,_U,_U,_U, /* 72-79 */ | ||
299 | _U,_U,_U,_U,_U,_U,_U,_U, /* 80-87 */ | ||
300 | _U,_U,_U,_P,_P,_P,_P,_P, /* 88-95 */ | ||
301 | _P,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L, /* 96-103 */ | ||
302 | _L,_L,_L,_L,_L,_L,_L,_L, /* 104-111 */ | ||
303 | _L,_L,_L,_L,_L,_L,_L,_L, /* 112-119 */ | ||
304 | _L,_L,_L,_P,_P,_P,_P,_C, /* 120-127 */ | ||
305 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 128-143 */ | ||
306 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 144-159 */ | ||
307 | _S|_SP,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P, /* 160-175 */ | ||
308 | _P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P, /* 176-191 */ | ||
309 | _U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U, /* 192-207 */ | ||
310 | _U,_U,_U,_U,_U,_U,_U,_P,_U,_U,_U,_U,_U,_U,_U,_L, /* 208-223 */ | ||
311 | _L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L, /* 224-239 */ | ||
312 | _L,_L,_L,_L,_L,_L,_L,_P,_L,_L,_L,_L,_L,_L,_L,_L}; /* 240-255 */ | ||
313 | |||
314 | size_t strnlen(const char * s, size_t count) | 189 | size_t strnlen(const char * s, size_t count) |
315 | { | 190 | { |
316 | const char *sc; | 191 | const char *sc; |
@@ -320,44 +195,30 @@ size_t strnlen(const char * s, size_t count) | |||
320 | return sc - s; | 195 | return sc - s; |
321 | } | 196 | } |
322 | 197 | ||
323 | unsigned long simple_strtoul(const char *cp,char **endp,unsigned int base) | 198 | extern unsigned int __div64_32(unsigned long long *dividend, |
324 | { | 199 | unsigned int divisor); |
325 | unsigned long result = 0,value; | ||
326 | 200 | ||
327 | if (!base) { | 201 | /* The unnecessary pointer compare is there |
328 | base = 10; | 202 | * to check for type safety (n must be 64bit) |
329 | if (*cp == '0') { | 203 | */ |
330 | base = 8; | 204 | # define do_div(n,base) ({ \ |
331 | cp++; | 205 | unsigned int __base = (base); \ |
332 | if ((*cp == 'x') && isxdigit(cp[1])) { | 206 | unsigned int __rem; \ |
333 | cp++; | 207 | (void)(((typeof((n)) *)0) == ((unsigned long long *)0)); \ |
334 | base = 16; | 208 | if (((n) >> 32) == 0) { \ |
335 | } | 209 | __rem = (unsigned int)(n) % __base; \ |
336 | } | 210 | (n) = (unsigned int)(n) / __base; \ |
337 | } | 211 | } else \ |
338 | while (isxdigit(*cp) && | 212 | __rem = __div64_32(&(n), __base); \ |
339 | (value = isdigit(*cp) ? *cp-'0' : toupper(*cp)-'A'+10) < base) { | 213 | __rem; \ |
340 | result = result*base + value; | 214 | }) |
341 | cp++; | ||
342 | } | ||
343 | if (endp) | ||
344 | *endp = (char *)cp; | ||
345 | return result; | ||
346 | } | ||
347 | |||
348 | long simple_strtol(const char *cp,char **endp,unsigned int base) | ||
349 | { | ||
350 | if(*cp=='-') | ||
351 | return -simple_strtoul(cp+1,endp,base); | ||
352 | return simple_strtoul(cp,endp,base); | ||
353 | } | ||
354 | 215 | ||
355 | static int skip_atoi(const char **s) | 216 | static int skip_atoi(const char **s) |
356 | { | 217 | { |
357 | int i=0; | 218 | int i, c; |
358 | 219 | ||
359 | while (isdigit(**s)) | 220 | for (i = 0; '0' <= (c = **s) && c <= '9'; ++*s) |
360 | i = i*10 + *((*s)++) - '0'; | 221 | i = i*10 + c - '0'; |
361 | return i; | 222 | return i; |
362 | } | 223 | } |
363 | 224 | ||
@@ -436,9 +297,6 @@ static char * number(char * str, unsigned long long num, int base, int size, int | |||
436 | return str; | 297 | return str; |
437 | } | 298 | } |
438 | 299 | ||
439 | /* Forward decl. needed for IP address printing stuff... */ | ||
440 | int sprintf(char * buf, const char *fmt, ...); | ||
441 | |||
442 | int vsprintf(char *buf, const char *fmt, va_list args) | 300 | int vsprintf(char *buf, const char *fmt, va_list args) |
443 | { | 301 | { |
444 | int len; | 302 | int len; |
@@ -477,7 +335,7 @@ int vsprintf(char *buf, const char *fmt, va_list args) | |||
477 | 335 | ||
478 | /* get field width */ | 336 | /* get field width */ |
479 | field_width = -1; | 337 | field_width = -1; |
480 | if (isdigit(*fmt)) | 338 | if ('0' <= *fmt && *fmt <= '9') |
481 | field_width = skip_atoi(&fmt); | 339 | field_width = skip_atoi(&fmt); |
482 | else if (*fmt == '*') { | 340 | else if (*fmt == '*') { |
483 | ++fmt; | 341 | ++fmt; |
@@ -493,7 +351,7 @@ int vsprintf(char *buf, const char *fmt, va_list args) | |||
493 | precision = -1; | 351 | precision = -1; |
494 | if (*fmt == '.') { | 352 | if (*fmt == '.') { |
495 | ++fmt; | 353 | ++fmt; |
496 | if (isdigit(*fmt)) | 354 | if ('0' <= *fmt && *fmt <= '9') |
497 | precision = skip_atoi(&fmt); | 355 | precision = skip_atoi(&fmt); |
498 | else if (*fmt == '*') { | 356 | else if (*fmt == '*') { |
499 | ++fmt; | 357 | ++fmt; |
@@ -628,7 +486,7 @@ int sprintf(char * buf, const char *fmt, ...) | |||
628 | static char sprint_buf[1024]; | 486 | static char sprint_buf[1024]; |
629 | 487 | ||
630 | int | 488 | int |
631 | printf(char *fmt, ...) | 489 | printf(const char *fmt, ...) |
632 | { | 490 | { |
633 | va_list args; | 491 | va_list args; |
634 | int n; | 492 | int n; |
diff --git a/arch/ppc64/boot/prom.h b/arch/ppc64/boot/prom.h new file mode 100644 index 000000000000..96ab5aec740c --- /dev/null +++ b/arch/ppc64/boot/prom.h | |||
@@ -0,0 +1,18 @@ | |||
1 | #ifndef _PPC_BOOT_PROM_H_ | ||
2 | #define _PPC_BOOT_PROM_H_ | ||
3 | |||
4 | extern int (*prom) (void *); | ||
5 | extern void *chosen_handle; | ||
6 | |||
7 | extern void *stdin; | ||
8 | extern void *stdout; | ||
9 | extern void *stderr; | ||
10 | |||
11 | extern int write(void *handle, void *ptr, int nb); | ||
12 | extern int read(void *handle, void *ptr, int nb); | ||
13 | extern void exit(void); | ||
14 | extern void pause(void); | ||
15 | extern void *finddevice(const char *); | ||
16 | extern void *claim(unsigned long virt, unsigned long size, unsigned long align); | ||
17 | extern int getprop(void *phandle, const char *name, void *buf, int buflen); | ||
18 | #endif /* _PPC_BOOT_PROM_H_ */ | ||
diff --git a/arch/ppc64/boot/stdio.h b/arch/ppc64/boot/stdio.h new file mode 100644 index 000000000000..24bd3a8dee94 --- /dev/null +++ b/arch/ppc64/boot/stdio.h | |||
@@ -0,0 +1,16 @@ | |||
1 | #ifndef _PPC_BOOT_STDIO_H_ | ||
2 | #define _PPC_BOOT_STDIO_H_ | ||
3 | |||
4 | extern int printf(const char *fmt, ...); | ||
5 | |||
6 | extern int sprintf(char *buf, const char *fmt, ...); | ||
7 | |||
8 | extern int vsprintf(char *buf, const char *fmt, va_list args); | ||
9 | |||
10 | extern int putc(int c, void *f); | ||
11 | extern int putchar(int c); | ||
12 | extern int getchar(void); | ||
13 | |||
14 | extern int fputs(char *str, void *f); | ||
15 | |||
16 | #endif /* _PPC_BOOT_STDIO_H_ */ | ||
diff --git a/arch/ppc64/boot/string.S b/arch/ppc64/boot/string.S index ba5f2d21c9ea..7ade87ae7718 100644 --- a/arch/ppc64/boot/string.S +++ b/arch/ppc64/boot/string.S | |||
@@ -9,7 +9,7 @@ | |||
9 | * NOTE: this code runs in 32 bit mode and is packaged as ELF32. | 9 | * NOTE: this code runs in 32 bit mode and is packaged as ELF32. |
10 | */ | 10 | */ |
11 | 11 | ||
12 | #include <asm/ppc_asm.h> | 12 | #include "ppc_asm.h" |
13 | 13 | ||
14 | .text | 14 | .text |
15 | .globl strcpy | 15 | .globl strcpy |
diff --git a/arch/ppc64/boot/string.h b/arch/ppc64/boot/string.h new file mode 100644 index 000000000000..9289258bcbd6 --- /dev/null +++ b/arch/ppc64/boot/string.h | |||
@@ -0,0 +1,16 @@ | |||
1 | #ifndef _PPC_BOOT_STRING_H_ | ||
2 | #define _PPC_BOOT_STRING_H_ | ||
3 | |||
4 | extern char *strcpy(char *dest, const char *src); | ||
5 | extern char *strncpy(char *dest, const char *src, size_t n); | ||
6 | extern char *strcat(char *dest, const char *src); | ||
7 | extern int strcmp(const char *s1, const char *s2); | ||
8 | extern size_t strlen(const char *s); | ||
9 | extern size_t strnlen(const char *s, size_t count); | ||
10 | |||
11 | extern void *memset(void *s, int c, size_t n); | ||
12 | extern void *memmove(void *dest, const void *src, unsigned long n); | ||
13 | extern void *memcpy(void *dest, const void *src, unsigned long n); | ||
14 | extern int memcmp(const void *s1, const void *s2, size_t n); | ||
15 | |||
16 | #endif /* _PPC_BOOT_STRING_H_ */ | ||
diff --git a/arch/ppc64/boot/zlib.c b/arch/ppc64/boot/zlib.c index 78837e884b8b..0d910cd2079d 100644 --- a/arch/ppc64/boot/zlib.c +++ b/arch/ppc64/boot/zlib.c | |||
@@ -107,7 +107,7 @@ extern void *memcpy(void *, const void *, unsigned long); | |||
107 | 107 | ||
108 | /* Diagnostic functions */ | 108 | /* Diagnostic functions */ |
109 | #ifdef DEBUG_ZLIB | 109 | #ifdef DEBUG_ZLIB |
110 | # include <stdio.h> | 110 | # include "stdio.h" |
111 | # ifndef verbose | 111 | # ifndef verbose |
112 | # define verbose 0 | 112 | # define verbose 0 |
113 | # endif | 113 | # endif |
diff --git a/arch/ppc64/configs/g5_defconfig b/arch/ppc64/configs/g5_defconfig index ab567741e80e..fc83d9330282 100644 --- a/arch/ppc64/configs/g5_defconfig +++ b/arch/ppc64/configs/g5_defconfig | |||
@@ -103,10 +103,10 @@ CONFIG_PREEMPT_NONE=y | |||
103 | # CONFIG_PREEMPT_VOLUNTARY is not set | 103 | # CONFIG_PREEMPT_VOLUNTARY is not set |
104 | # CONFIG_PREEMPT is not set | 104 | # CONFIG_PREEMPT is not set |
105 | # CONFIG_PREEMPT_BKL is not set | 105 | # CONFIG_PREEMPT_BKL is not set |
106 | CONFIG_HZ_100=y | 106 | # CONFIG_HZ_100 is not set |
107 | # CONFIG_HZ_250 is not set | 107 | CONFIG_HZ_250=y |
108 | # CONFIG_HZ_1000 is not set | 108 | # CONFIG_HZ_1000 is not set |
109 | CONFIG_HZ=100 | 109 | CONFIG_HZ=250 |
110 | CONFIG_GENERIC_HARDIRQS=y | 110 | CONFIG_GENERIC_HARDIRQS=y |
111 | CONFIG_SECCOMP=y | 111 | CONFIG_SECCOMP=y |
112 | CONFIG_ISA_DMA_API=y | 112 | CONFIG_ISA_DMA_API=y |
diff --git a/arch/ppc64/configs/iSeries_defconfig b/arch/ppc64/configs/iSeries_defconfig index 394ba18b58c7..013d4e0e4003 100644 --- a/arch/ppc64/configs/iSeries_defconfig +++ b/arch/ppc64/configs/iSeries_defconfig | |||
@@ -94,12 +94,11 @@ CONFIG_PREEMPT_NONE=y | |||
94 | # CONFIG_PREEMPT_VOLUNTARY is not set | 94 | # CONFIG_PREEMPT_VOLUNTARY is not set |
95 | # CONFIG_PREEMPT is not set | 95 | # CONFIG_PREEMPT is not set |
96 | # CONFIG_PREEMPT_BKL is not set | 96 | # CONFIG_PREEMPT_BKL is not set |
97 | CONFIG_HZ_100=y | 97 | # CONFIG_HZ_100 is not set |
98 | # CONFIG_HZ_250 is not set | 98 | CONFIG_HZ_250=y |
99 | # CONFIG_HZ_1000 is not set | 99 | # CONFIG_HZ_1000 is not set |
100 | CONFIG_HZ=100 | 100 | CONFIG_HZ=250 |
101 | CONFIG_GENERIC_HARDIRQS=y | 101 | CONFIG_GENERIC_HARDIRQS=y |
102 | CONFIG_MSCHUNKS=y | ||
103 | CONFIG_LPARCFG=y | 102 | CONFIG_LPARCFG=y |
104 | CONFIG_SECCOMP=y | 103 | CONFIG_SECCOMP=y |
105 | CONFIG_ISA_DMA_API=y | 104 | CONFIG_ISA_DMA_API=y |
diff --git a/arch/ppc64/configs/maple_defconfig b/arch/ppc64/configs/maple_defconfig index 2033fe663dbe..dd42892cd873 100644 --- a/arch/ppc64/configs/maple_defconfig +++ b/arch/ppc64/configs/maple_defconfig | |||
@@ -103,10 +103,10 @@ CONFIG_PREEMPT_NONE=y | |||
103 | # CONFIG_PREEMPT_VOLUNTARY is not set | 103 | # CONFIG_PREEMPT_VOLUNTARY is not set |
104 | # CONFIG_PREEMPT is not set | 104 | # CONFIG_PREEMPT is not set |
105 | # CONFIG_PREEMPT_BKL is not set | 105 | # CONFIG_PREEMPT_BKL is not set |
106 | CONFIG_HZ_100=y | 106 | # CONFIG_HZ_100 is not set |
107 | # CONFIG_HZ_250 is not set | 107 | CONFIG_HZ_250=y |
108 | # CONFIG_HZ_1000 is not set | 108 | # CONFIG_HZ_1000 is not set |
109 | CONFIG_HZ=100 | 109 | CONFIG_HZ=250 |
110 | CONFIG_GENERIC_HARDIRQS=y | 110 | CONFIG_GENERIC_HARDIRQS=y |
111 | CONFIG_SECCOMP=y | 111 | CONFIG_SECCOMP=y |
112 | CONFIG_ISA_DMA_API=y | 112 | CONFIG_ISA_DMA_API=y |
diff --git a/arch/ppc64/configs/pSeries_defconfig b/arch/ppc64/configs/pSeries_defconfig index 297fd5229487..29f7b80b0efc 100644 --- a/arch/ppc64/configs/pSeries_defconfig +++ b/arch/ppc64/configs/pSeries_defconfig | |||
@@ -112,10 +112,10 @@ CONFIG_PREEMPT_NONE=y | |||
112 | # CONFIG_PREEMPT_VOLUNTARY is not set | 112 | # CONFIG_PREEMPT_VOLUNTARY is not set |
113 | # CONFIG_PREEMPT is not set | 113 | # CONFIG_PREEMPT is not set |
114 | # CONFIG_PREEMPT_BKL is not set | 114 | # CONFIG_PREEMPT_BKL is not set |
115 | CONFIG_HZ_100=y | 115 | # CONFIG_HZ_100 is not set |
116 | # CONFIG_HZ_250 is not set | 116 | CONFIG_HZ_250=y |
117 | # CONFIG_HZ_1000 is not set | 117 | # CONFIG_HZ_1000 is not set |
118 | CONFIG_HZ=100 | 118 | CONFIG_HZ=250 |
119 | CONFIG_EEH=y | 119 | CONFIG_EEH=y |
120 | CONFIG_GENERIC_HARDIRQS=y | 120 | CONFIG_GENERIC_HARDIRQS=y |
121 | CONFIG_PPC_RTAS=y | 121 | CONFIG_PPC_RTAS=y |
diff --git a/arch/ppc64/defconfig b/arch/ppc64/defconfig index c361e7727b7a..7cb4750bb7a9 100644 --- a/arch/ppc64/defconfig +++ b/arch/ppc64/defconfig | |||
@@ -114,10 +114,10 @@ CONFIG_PREEMPT_NONE=y | |||
114 | # CONFIG_PREEMPT_VOLUNTARY is not set | 114 | # CONFIG_PREEMPT_VOLUNTARY is not set |
115 | # CONFIG_PREEMPT is not set | 115 | # CONFIG_PREEMPT is not set |
116 | # CONFIG_PREEMPT_BKL is not set | 116 | # CONFIG_PREEMPT_BKL is not set |
117 | CONFIG_HZ_100=y | 117 | # CONFIG_HZ_100 is not set |
118 | # CONFIG_HZ_250 is not set | 118 | CONFIG_HZ_250=y |
119 | # CONFIG_HZ_1000 is not set | 119 | # CONFIG_HZ_1000 is not set |
120 | CONFIG_HZ=100 | 120 | CONFIG_HZ=250 |
121 | CONFIG_EEH=y | 121 | CONFIG_EEH=y |
122 | CONFIG_GENERIC_HARDIRQS=y | 122 | CONFIG_GENERIC_HARDIRQS=y |
123 | CONFIG_PPC_RTAS=y | 123 | CONFIG_PPC_RTAS=y |
diff --git a/arch/ppc64/kernel/LparData.c b/arch/ppc64/kernel/LparData.c index 1c11031c838e..0a9c23ca2f0c 100644 --- a/arch/ppc64/kernel/LparData.c +++ b/arch/ppc64/kernel/LparData.c | |||
@@ -51,6 +51,17 @@ struct HvReleaseData hvReleaseData = { | |||
51 | 0xf4, 0x4b, 0xf6, 0xf4 }, | 51 | 0xf4, 0x4b, 0xf6, 0xf4 }, |
52 | }; | 52 | }; |
53 | 53 | ||
54 | /* | ||
55 | * The NACA. The first dword of the naca is required by the iSeries | ||
56 | * hypervisor to point to itVpdAreas. The hypervisor finds the NACA | ||
57 | * through the pointer in hvReleaseData. | ||
58 | */ | ||
59 | struct naca_struct naca = { | ||
60 | .xItVpdAreas = &itVpdAreas, | ||
61 | .xRamDisk = 0, | ||
62 | .xRamDiskSize = 0, | ||
63 | }; | ||
64 | |||
54 | extern void system_reset_iSeries(void); | 65 | extern void system_reset_iSeries(void); |
55 | extern void machine_check_iSeries(void); | 66 | extern void machine_check_iSeries(void); |
56 | extern void data_access_iSeries(void); | 67 | extern void data_access_iSeries(void); |
@@ -214,29 +225,3 @@ struct ItVpdAreas itVpdAreas = { | |||
214 | 0,0 | 225 | 0,0 |
215 | } | 226 | } |
216 | }; | 227 | }; |
217 | |||
218 | struct msChunks msChunks; | ||
219 | EXPORT_SYMBOL(msChunks); | ||
220 | |||
221 | /* Depending on whether this is called from iSeries or pSeries setup | ||
222 | * code, the location of the msChunks struct may or may not have | ||
223 | * to be reloc'd, so we force the caller to do that for us by passing | ||
224 | * in a pointer to the structure. | ||
225 | */ | ||
226 | unsigned long | ||
227 | msChunks_alloc(unsigned long mem, unsigned long num_chunks, unsigned long chunk_size) | ||
228 | { | ||
229 | unsigned long offset = reloc_offset(); | ||
230 | struct msChunks *_msChunks = PTRRELOC(&msChunks); | ||
231 | |||
232 | _msChunks->num_chunks = num_chunks; | ||
233 | _msChunks->chunk_size = chunk_size; | ||
234 | _msChunks->chunk_shift = __ilog2(chunk_size); | ||
235 | _msChunks->chunk_mask = (1UL<<_msChunks->chunk_shift)-1; | ||
236 | |||
237 | mem = _ALIGN(mem, sizeof(msChunks_entry)); | ||
238 | _msChunks->abs = (msChunks_entry *)(mem + offset); | ||
239 | mem += num_chunks * sizeof(msChunks_entry); | ||
240 | |||
241 | return mem; | ||
242 | } | ||
diff --git a/arch/ppc64/kernel/Makefile b/arch/ppc64/kernel/Makefile index 2ecccb6b4f8c..f4b3bfcc109d 100644 --- a/arch/ppc64/kernel/Makefile +++ b/arch/ppc64/kernel/Makefile | |||
@@ -11,7 +11,7 @@ obj-y := setup.o entry.o traps.o irq.o idle.o dma.o \ | |||
11 | udbg.o binfmt_elf32.o sys_ppc32.o ioctl32.o \ | 11 | udbg.o binfmt_elf32.o sys_ppc32.o ioctl32.o \ |
12 | ptrace32.o signal32.o rtc.o init_task.o \ | 12 | ptrace32.o signal32.o rtc.o init_task.o \ |
13 | lmb.o cputable.o cpu_setup_power4.o idle_power4.o \ | 13 | lmb.o cputable.o cpu_setup_power4.o idle_power4.o \ |
14 | iommu.o sysfs.o vdso.o pmc.o | 14 | iommu.o sysfs.o vdso.o pmc.o firmware.o |
15 | obj-y += vdso32/ vdso64/ | 15 | obj-y += vdso32/ vdso64/ |
16 | 16 | ||
17 | obj-$(CONFIG_PPC_OF) += of_device.o | 17 | obj-$(CONFIG_PPC_OF) += of_device.o |
@@ -50,7 +50,10 @@ obj-$(CONFIG_LPARCFG) += lparcfg.o | |||
50 | obj-$(CONFIG_HVC_CONSOLE) += hvconsole.o | 50 | obj-$(CONFIG_HVC_CONSOLE) += hvconsole.o |
51 | obj-$(CONFIG_BOOTX_TEXT) += btext.o | 51 | obj-$(CONFIG_BOOTX_TEXT) += btext.o |
52 | obj-$(CONFIG_HVCS) += hvcserver.o | 52 | obj-$(CONFIG_HVCS) += hvcserver.o |
53 | obj-$(CONFIG_IBMVIO) += vio.o | 53 | |
54 | vio-obj-$(CONFIG_PPC_PSERIES) += pSeries_vio.o | ||
55 | vio-obj-$(CONFIG_PPC_ISERIES) += iSeries_vio.o | ||
56 | obj-$(CONFIG_IBMVIO) += vio.o $(vio-obj-y) | ||
54 | obj-$(CONFIG_XICS) += xics.o | 57 | obj-$(CONFIG_XICS) += xics.o |
55 | obj-$(CONFIG_MPIC) += mpic.o | 58 | obj-$(CONFIG_MPIC) += mpic.o |
56 | 59 | ||
diff --git a/arch/ppc64/kernel/asm-offsets.c b/arch/ppc64/kernel/asm-offsets.c index abb9e5b5da03..17e35d0fed09 100644 --- a/arch/ppc64/kernel/asm-offsets.c +++ b/arch/ppc64/kernel/asm-offsets.c | |||
@@ -94,7 +94,8 @@ int main(void) | |||
94 | DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr)); | 94 | DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr)); |
95 | DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); | 95 | DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); |
96 | #ifdef CONFIG_HUGETLB_PAGE | 96 | #ifdef CONFIG_HUGETLB_PAGE |
97 | DEFINE(PACAHTLBSEGS, offsetof(struct paca_struct, context.htlb_segs)); | 97 | DEFINE(PACALOWHTLBAREAS, offsetof(struct paca_struct, context.low_htlb_areas)); |
98 | DEFINE(PACAHIGHHTLBAREAS, offsetof(struct paca_struct, context.high_htlb_areas)); | ||
98 | #endif /* CONFIG_HUGETLB_PAGE */ | 99 | #endif /* CONFIG_HUGETLB_PAGE */ |
99 | DEFINE(PACADEFAULTDECR, offsetof(struct paca_struct, default_decr)); | 100 | DEFINE(PACADEFAULTDECR, offsetof(struct paca_struct, default_decr)); |
100 | DEFINE(PACA_EXGEN, offsetof(struct paca_struct, exgen)); | 101 | DEFINE(PACA_EXGEN, offsetof(struct paca_struct, exgen)); |
diff --git a/arch/ppc64/kernel/cputable.c b/arch/ppc64/kernel/cputable.c index 77cec42f9525..4847f2ac8c9f 100644 --- a/arch/ppc64/kernel/cputable.c +++ b/arch/ppc64/kernel/cputable.c | |||
@@ -5,7 +5,7 @@ | |||
5 | * | 5 | * |
6 | * Modifications for ppc64: | 6 | * Modifications for ppc64: |
7 | * Copyright (C) 2003 Dave Engebretsen <engebret@us.ibm.com> | 7 | * Copyright (C) 2003 Dave Engebretsen <engebret@us.ibm.com> |
8 | * | 8 | * |
9 | * This program is free software; you can redistribute it and/or | 9 | * This program is free software; you can redistribute it and/or |
10 | * modify it under the terms of the GNU General Public License | 10 | * modify it under the terms of the GNU General Public License |
11 | * as published by the Free Software Foundation; either version | 11 | * as published by the Free Software Foundation; either version |
@@ -60,7 +60,6 @@ struct cpu_spec cpu_specs[] = { | |||
60 | .icache_bsize = 128, | 60 | .icache_bsize = 128, |
61 | .dcache_bsize = 128, | 61 | .dcache_bsize = 128, |
62 | .cpu_setup = __setup_cpu_power3, | 62 | .cpu_setup = __setup_cpu_power3, |
63 | .firmware_features = COMMON_PPC64_FW, | ||
64 | }, | 63 | }, |
65 | { /* Power3+ */ | 64 | { /* Power3+ */ |
66 | .pvr_mask = 0xffff0000, | 65 | .pvr_mask = 0xffff0000, |
@@ -73,7 +72,6 @@ struct cpu_spec cpu_specs[] = { | |||
73 | .icache_bsize = 128, | 72 | .icache_bsize = 128, |
74 | .dcache_bsize = 128, | 73 | .dcache_bsize = 128, |
75 | .cpu_setup = __setup_cpu_power3, | 74 | .cpu_setup = __setup_cpu_power3, |
76 | .firmware_features = COMMON_PPC64_FW, | ||
77 | }, | 75 | }, |
78 | { /* Northstar */ | 76 | { /* Northstar */ |
79 | .pvr_mask = 0xffff0000, | 77 | .pvr_mask = 0xffff0000, |
@@ -86,7 +84,6 @@ struct cpu_spec cpu_specs[] = { | |||
86 | .icache_bsize = 128, | 84 | .icache_bsize = 128, |
87 | .dcache_bsize = 128, | 85 | .dcache_bsize = 128, |
88 | .cpu_setup = __setup_cpu_power3, | 86 | .cpu_setup = __setup_cpu_power3, |
89 | .firmware_features = COMMON_PPC64_FW, | ||
90 | }, | 87 | }, |
91 | { /* Pulsar */ | 88 | { /* Pulsar */ |
92 | .pvr_mask = 0xffff0000, | 89 | .pvr_mask = 0xffff0000, |
@@ -99,7 +96,6 @@ struct cpu_spec cpu_specs[] = { | |||
99 | .icache_bsize = 128, | 96 | .icache_bsize = 128, |
100 | .dcache_bsize = 128, | 97 | .dcache_bsize = 128, |
101 | .cpu_setup = __setup_cpu_power3, | 98 | .cpu_setup = __setup_cpu_power3, |
102 | .firmware_features = COMMON_PPC64_FW, | ||
103 | }, | 99 | }, |
104 | { /* I-star */ | 100 | { /* I-star */ |
105 | .pvr_mask = 0xffff0000, | 101 | .pvr_mask = 0xffff0000, |
@@ -112,7 +108,6 @@ struct cpu_spec cpu_specs[] = { | |||
112 | .icache_bsize = 128, | 108 | .icache_bsize = 128, |
113 | .dcache_bsize = 128, | 109 | .dcache_bsize = 128, |
114 | .cpu_setup = __setup_cpu_power3, | 110 | .cpu_setup = __setup_cpu_power3, |
115 | .firmware_features = COMMON_PPC64_FW, | ||
116 | }, | 111 | }, |
117 | { /* S-star */ | 112 | { /* S-star */ |
118 | .pvr_mask = 0xffff0000, | 113 | .pvr_mask = 0xffff0000, |
@@ -125,7 +120,6 @@ struct cpu_spec cpu_specs[] = { | |||
125 | .icache_bsize = 128, | 120 | .icache_bsize = 128, |
126 | .dcache_bsize = 128, | 121 | .dcache_bsize = 128, |
127 | .cpu_setup = __setup_cpu_power3, | 122 | .cpu_setup = __setup_cpu_power3, |
128 | .firmware_features = COMMON_PPC64_FW, | ||
129 | }, | 123 | }, |
130 | { /* Power4 */ | 124 | { /* Power4 */ |
131 | .pvr_mask = 0xffff0000, | 125 | .pvr_mask = 0xffff0000, |
@@ -138,7 +132,6 @@ struct cpu_spec cpu_specs[] = { | |||
138 | .icache_bsize = 128, | 132 | .icache_bsize = 128, |
139 | .dcache_bsize = 128, | 133 | .dcache_bsize = 128, |
140 | .cpu_setup = __setup_cpu_power4, | 134 | .cpu_setup = __setup_cpu_power4, |
141 | .firmware_features = COMMON_PPC64_FW, | ||
142 | }, | 135 | }, |
143 | { /* Power4+ */ | 136 | { /* Power4+ */ |
144 | .pvr_mask = 0xffff0000, | 137 | .pvr_mask = 0xffff0000, |
@@ -151,7 +144,6 @@ struct cpu_spec cpu_specs[] = { | |||
151 | .icache_bsize = 128, | 144 | .icache_bsize = 128, |
152 | .dcache_bsize = 128, | 145 | .dcache_bsize = 128, |
153 | .cpu_setup = __setup_cpu_power4, | 146 | .cpu_setup = __setup_cpu_power4, |
154 | .firmware_features = COMMON_PPC64_FW, | ||
155 | }, | 147 | }, |
156 | { /* PPC970 */ | 148 | { /* PPC970 */ |
157 | .pvr_mask = 0xffff0000, | 149 | .pvr_mask = 0xffff0000, |
@@ -166,7 +158,6 @@ struct cpu_spec cpu_specs[] = { | |||
166 | .icache_bsize = 128, | 158 | .icache_bsize = 128, |
167 | .dcache_bsize = 128, | 159 | .dcache_bsize = 128, |
168 | .cpu_setup = __setup_cpu_ppc970, | 160 | .cpu_setup = __setup_cpu_ppc970, |
169 | .firmware_features = COMMON_PPC64_FW, | ||
170 | }, | 161 | }, |
171 | { /* PPC970FX */ | 162 | { /* PPC970FX */ |
172 | .pvr_mask = 0xffff0000, | 163 | .pvr_mask = 0xffff0000, |
@@ -181,7 +172,6 @@ struct cpu_spec cpu_specs[] = { | |||
181 | .icache_bsize = 128, | 172 | .icache_bsize = 128, |
182 | .dcache_bsize = 128, | 173 | .dcache_bsize = 128, |
183 | .cpu_setup = __setup_cpu_ppc970, | 174 | .cpu_setup = __setup_cpu_ppc970, |
184 | .firmware_features = COMMON_PPC64_FW, | ||
185 | }, | 175 | }, |
186 | { /* PPC970MP */ | 176 | { /* PPC970MP */ |
187 | .pvr_mask = 0xffff0000, | 177 | .pvr_mask = 0xffff0000, |
@@ -196,7 +186,6 @@ struct cpu_spec cpu_specs[] = { | |||
196 | .icache_bsize = 128, | 186 | .icache_bsize = 128, |
197 | .dcache_bsize = 128, | 187 | .dcache_bsize = 128, |
198 | .cpu_setup = __setup_cpu_ppc970, | 188 | .cpu_setup = __setup_cpu_ppc970, |
199 | .firmware_features = COMMON_PPC64_FW, | ||
200 | }, | 189 | }, |
201 | { /* Power5 */ | 190 | { /* Power5 */ |
202 | .pvr_mask = 0xffff0000, | 191 | .pvr_mask = 0xffff0000, |
@@ -211,7 +200,6 @@ struct cpu_spec cpu_specs[] = { | |||
211 | .icache_bsize = 128, | 200 | .icache_bsize = 128, |
212 | .dcache_bsize = 128, | 201 | .dcache_bsize = 128, |
213 | .cpu_setup = __setup_cpu_power4, | 202 | .cpu_setup = __setup_cpu_power4, |
214 | .firmware_features = COMMON_PPC64_FW, | ||
215 | }, | 203 | }, |
216 | { /* Power5 */ | 204 | { /* Power5 */ |
217 | .pvr_mask = 0xffff0000, | 205 | .pvr_mask = 0xffff0000, |
@@ -226,7 +214,6 @@ struct cpu_spec cpu_specs[] = { | |||
226 | .icache_bsize = 128, | 214 | .icache_bsize = 128, |
227 | .dcache_bsize = 128, | 215 | .dcache_bsize = 128, |
228 | .cpu_setup = __setup_cpu_power4, | 216 | .cpu_setup = __setup_cpu_power4, |
229 | .firmware_features = COMMON_PPC64_FW, | ||
230 | }, | 217 | }, |
231 | { /* BE DD1.x */ | 218 | { /* BE DD1.x */ |
232 | .pvr_mask = 0xffff0000, | 219 | .pvr_mask = 0xffff0000, |
@@ -241,7 +228,6 @@ struct cpu_spec cpu_specs[] = { | |||
241 | .icache_bsize = 128, | 228 | .icache_bsize = 128, |
242 | .dcache_bsize = 128, | 229 | .dcache_bsize = 128, |
243 | .cpu_setup = __setup_cpu_be, | 230 | .cpu_setup = __setup_cpu_be, |
244 | .firmware_features = COMMON_PPC64_FW, | ||
245 | }, | 231 | }, |
246 | { /* default match */ | 232 | { /* default match */ |
247 | .pvr_mask = 0x00000000, | 233 | .pvr_mask = 0x00000000, |
@@ -254,29 +240,5 @@ struct cpu_spec cpu_specs[] = { | |||
254 | .icache_bsize = 128, | 240 | .icache_bsize = 128, |
255 | .dcache_bsize = 128, | 241 | .dcache_bsize = 128, |
256 | .cpu_setup = __setup_cpu_power4, | 242 | .cpu_setup = __setup_cpu_power4, |
257 | .firmware_features = COMMON_PPC64_FW, | ||
258 | } | 243 | } |
259 | }; | 244 | }; |
260 | |||
261 | firmware_feature_t firmware_features_table[FIRMWARE_MAX_FEATURES] = { | ||
262 | {FW_FEATURE_PFT, "hcall-pft"}, | ||
263 | {FW_FEATURE_TCE, "hcall-tce"}, | ||
264 | {FW_FEATURE_SPRG0, "hcall-sprg0"}, | ||
265 | {FW_FEATURE_DABR, "hcall-dabr"}, | ||
266 | {FW_FEATURE_COPY, "hcall-copy"}, | ||
267 | {FW_FEATURE_ASR, "hcall-asr"}, | ||
268 | {FW_FEATURE_DEBUG, "hcall-debug"}, | ||
269 | {FW_FEATURE_PERF, "hcall-perf"}, | ||
270 | {FW_FEATURE_DUMP, "hcall-dump"}, | ||
271 | {FW_FEATURE_INTERRUPT, "hcall-interrupt"}, | ||
272 | {FW_FEATURE_MIGRATE, "hcall-migrate"}, | ||
273 | {FW_FEATURE_PERFMON, "hcall-perfmon"}, | ||
274 | {FW_FEATURE_CRQ, "hcall-crq"}, | ||
275 | {FW_FEATURE_VIO, "hcall-vio"}, | ||
276 | {FW_FEATURE_RDMA, "hcall-rdma"}, | ||
277 | {FW_FEATURE_LLAN, "hcall-lLAN"}, | ||
278 | {FW_FEATURE_BULK, "hcall-bulk"}, | ||
279 | {FW_FEATURE_XDABR, "hcall-xdabr"}, | ||
280 | {FW_FEATURE_MULTITCE, "hcall-multi-tce"}, | ||
281 | {FW_FEATURE_SPLPAR, "hcall-splpar"}, | ||
282 | }; | ||
diff --git a/arch/ppc64/kernel/firmware.c b/arch/ppc64/kernel/firmware.c new file mode 100644 index 000000000000..d8432c0fb27d --- /dev/null +++ b/arch/ppc64/kernel/firmware.c | |||
@@ -0,0 +1,47 @@ | |||
1 | /* | ||
2 | * arch/ppc64/kernel/firmware.c | ||
3 | * | ||
4 | * Extracted from cputable.c | ||
5 | * | ||
6 | * Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org) | ||
7 | * | ||
8 | * Modifications for ppc64: | ||
9 | * Copyright (C) 2003 Dave Engebretsen <engebret@us.ibm.com> | ||
10 | * Copyright (C) 2005 Stephen Rothwell, IBM Corporation | ||
11 | * | ||
12 | * This program is free software; you can redistribute it and/or | ||
13 | * modify it under the terms of the GNU General Public License | ||
14 | * as published by the Free Software Foundation; either version | ||
15 | * 2 of the License, or (at your option) any later version. | ||
16 | */ | ||
17 | |||
18 | #include <linux/config.h> | ||
19 | |||
20 | #include <asm/firmware.h> | ||
21 | |||
22 | unsigned long ppc64_firmware_features; | ||
23 | |||
24 | #ifdef CONFIG_PPC_PSERIES | ||
25 | firmware_feature_t firmware_features_table[FIRMWARE_MAX_FEATURES] = { | ||
26 | {FW_FEATURE_PFT, "hcall-pft"}, | ||
27 | {FW_FEATURE_TCE, "hcall-tce"}, | ||
28 | {FW_FEATURE_SPRG0, "hcall-sprg0"}, | ||
29 | {FW_FEATURE_DABR, "hcall-dabr"}, | ||
30 | {FW_FEATURE_COPY, "hcall-copy"}, | ||
31 | {FW_FEATURE_ASR, "hcall-asr"}, | ||
32 | {FW_FEATURE_DEBUG, "hcall-debug"}, | ||
33 | {FW_FEATURE_PERF, "hcall-perf"}, | ||
34 | {FW_FEATURE_DUMP, "hcall-dump"}, | ||
35 | {FW_FEATURE_INTERRUPT, "hcall-interrupt"}, | ||
36 | {FW_FEATURE_MIGRATE, "hcall-migrate"}, | ||
37 | {FW_FEATURE_PERFMON, "hcall-perfmon"}, | ||
38 | {FW_FEATURE_CRQ, "hcall-crq"}, | ||
39 | {FW_FEATURE_VIO, "hcall-vio"}, | ||
40 | {FW_FEATURE_RDMA, "hcall-rdma"}, | ||
41 | {FW_FEATURE_LLAN, "hcall-lLAN"}, | ||
42 | {FW_FEATURE_BULK, "hcall-bulk"}, | ||
43 | {FW_FEATURE_XDABR, "hcall-xdabr"}, | ||
44 | {FW_FEATURE_MULTITCE, "hcall-multi-tce"}, | ||
45 | {FW_FEATURE_SPLPAR, "hcall-splpar"}, | ||
46 | }; | ||
47 | #endif | ||
diff --git a/arch/ppc64/kernel/head.S b/arch/ppc64/kernel/head.S index accaa052d31f..036959775623 100644 --- a/arch/ppc64/kernel/head.S +++ b/arch/ppc64/kernel/head.S | |||
@@ -23,14 +23,11 @@ | |||
23 | * 2 of the License, or (at your option) any later version. | 23 | * 2 of the License, or (at your option) any later version. |
24 | */ | 24 | */ |
25 | 25 | ||
26 | #define SECONDARY_PROCESSORS | ||
27 | |||
28 | #include <linux/config.h> | 26 | #include <linux/config.h> |
29 | #include <linux/threads.h> | 27 | #include <linux/threads.h> |
30 | #include <asm/processor.h> | 28 | #include <asm/processor.h> |
31 | #include <asm/page.h> | 29 | #include <asm/page.h> |
32 | #include <asm/mmu.h> | 30 | #include <asm/mmu.h> |
33 | #include <asm/naca.h> | ||
34 | #include <asm/systemcfg.h> | 31 | #include <asm/systemcfg.h> |
35 | #include <asm/ppc_asm.h> | 32 | #include <asm/ppc_asm.h> |
36 | #include <asm/offsets.h> | 33 | #include <asm/offsets.h> |
@@ -45,18 +42,13 @@ | |||
45 | #endif | 42 | #endif |
46 | 43 | ||
47 | /* | 44 | /* |
48 | * hcall interface to pSeries LPAR | ||
49 | */ | ||
50 | #define H_SET_ASR 0x30 | ||
51 | |||
52 | /* | ||
53 | * We layout physical memory as follows: | 45 | * We layout physical memory as follows: |
54 | * 0x0000 - 0x00ff : Secondary processor spin code | 46 | * 0x0000 - 0x00ff : Secondary processor spin code |
55 | * 0x0100 - 0x2fff : pSeries Interrupt prologs | 47 | * 0x0100 - 0x2fff : pSeries Interrupt prologs |
56 | * 0x3000 - 0x3fff : Interrupt support | 48 | * 0x3000 - 0x5fff : interrupt support, iSeries and common interrupt prologs |
57 | * 0x4000 - 0x4fff : NACA | 49 | * 0x6000 - 0x6fff : Initial (CPU0) segment table |
58 | * 0x6000 : iSeries and common interrupt prologs | 50 | * 0x7000 - 0x7fff : FWNMI data area |
59 | * 0x9000 - 0x9fff : Initial segment table | 51 | * 0x8000 - : Early init and support code |
60 | */ | 52 | */ |
61 | 53 | ||
62 | /* | 54 | /* |
@@ -94,6 +86,7 @@ END_FTR_SECTION(0, 1) | |||
94 | 86 | ||
95 | /* Catch branch to 0 in real mode */ | 87 | /* Catch branch to 0 in real mode */ |
96 | trap | 88 | trap |
89 | |||
97 | #ifdef CONFIG_PPC_ISERIES | 90 | #ifdef CONFIG_PPC_ISERIES |
98 | /* | 91 | /* |
99 | * At offset 0x20, there is a pointer to iSeries LPAR data. | 92 | * At offset 0x20, there is a pointer to iSeries LPAR data. |
@@ -103,12 +96,12 @@ END_FTR_SECTION(0, 1) | |||
103 | .llong hvReleaseData-KERNELBASE | 96 | .llong hvReleaseData-KERNELBASE |
104 | 97 | ||
105 | /* | 98 | /* |
106 | * At offset 0x28 and 0x30 are offsets to the msChunks | 99 | * At offset 0x28 and 0x30 are offsets to the mschunks_map |
107 | * array (used by the iSeries LPAR debugger to do translation | 100 | * array (used by the iSeries LPAR debugger to do translation |
108 | * between physical addresses and absolute addresses) and | 101 | * between physical addresses and absolute addresses) and |
109 | * to the pidhash table (also used by the debugger) | 102 | * to the pidhash table (also used by the debugger) |
110 | */ | 103 | */ |
111 | .llong msChunks-KERNELBASE | 104 | .llong mschunks_map-KERNELBASE |
112 | .llong 0 /* pidhash-KERNELBASE SFRXXX */ | 105 | .llong 0 /* pidhash-KERNELBASE SFRXXX */ |
113 | 106 | ||
114 | /* Offset 0x38 - Pointer to start of embedded System.map */ | 107 | /* Offset 0x38 - Pointer to start of embedded System.map */ |
@@ -120,7 +113,7 @@ embedded_sysmap_start: | |||
120 | embedded_sysmap_end: | 113 | embedded_sysmap_end: |
121 | .llong 0 | 114 | .llong 0 |
122 | 115 | ||
123 | #else /* CONFIG_PPC_ISERIES */ | 116 | #endif /* CONFIG_PPC_ISERIES */ |
124 | 117 | ||
125 | /* Secondary processors spin on this value until it goes to 1. */ | 118 | /* Secondary processors spin on this value until it goes to 1. */ |
126 | .globl __secondary_hold_spinloop | 119 | .globl __secondary_hold_spinloop |
@@ -155,7 +148,7 @@ _GLOBAL(__secondary_hold) | |||
155 | std r24,__secondary_hold_acknowledge@l(0) | 148 | std r24,__secondary_hold_acknowledge@l(0) |
156 | sync | 149 | sync |
157 | 150 | ||
158 | /* All secondary cpu's wait here until told to start. */ | 151 | /* All secondary cpus wait here until told to start. */ |
159 | 100: ld r4,__secondary_hold_spinloop@l(0) | 152 | 100: ld r4,__secondary_hold_spinloop@l(0) |
160 | cmpdi 0,r4,1 | 153 | cmpdi 0,r4,1 |
161 | bne 100b | 154 | bne 100b |
@@ -170,7 +163,6 @@ _GLOBAL(__secondary_hold) | |||
170 | BUG_OPCODE | 163 | BUG_OPCODE |
171 | #endif | 164 | #endif |
172 | #endif | 165 | #endif |
173 | #endif | ||
174 | 166 | ||
175 | /* This value is used to mark exception frames on the stack. */ | 167 | /* This value is used to mark exception frames on the stack. */ |
176 | .section ".toc","aw" | 168 | .section ".toc","aw" |
@@ -502,33 +494,37 @@ system_call_pSeries: | |||
502 | STD_EXCEPTION_PSERIES(0x1300, instruction_breakpoint) | 494 | STD_EXCEPTION_PSERIES(0x1300, instruction_breakpoint) |
503 | STD_EXCEPTION_PSERIES(0x1700, altivec_assist) | 495 | STD_EXCEPTION_PSERIES(0x1700, altivec_assist) |
504 | 496 | ||
497 | . = 0x3000 | ||
498 | |||
499 | /*** pSeries interrupt support ***/ | ||
500 | |||
505 | /* moved from 0xf00 */ | 501 | /* moved from 0xf00 */ |
506 | STD_EXCEPTION_PSERIES(0x3000, performance_monitor) | 502 | STD_EXCEPTION_PSERIES(., performance_monitor) |
507 | 503 | ||
508 | . = 0x3100 | 504 | .align 7 |
509 | _GLOBAL(do_stab_bolted_pSeries) | 505 | _GLOBAL(do_stab_bolted_pSeries) |
510 | mtcrf 0x80,r12 | 506 | mtcrf 0x80,r12 |
511 | mfspr r12,SPRG2 | 507 | mfspr r12,SPRG2 |
512 | EXCEPTION_PROLOG_PSERIES(PACA_EXSLB, .do_stab_bolted) | 508 | EXCEPTION_PROLOG_PSERIES(PACA_EXSLB, .do_stab_bolted) |
513 | 509 | ||
514 | 510 | /* | |
515 | /* Space for the naca. Architected to be located at real address | 511 | * Vectors for the FWNMI option. Share common code. |
516 | * NACA_PHYS_ADDR. Various tools rely on this location being fixed. | 512 | */ |
517 | * The first dword of the naca is required by iSeries LPAR to | 513 | .globl system_reset_fwnmi |
518 | * point to itVpdAreas. On pSeries native, this value is not used. | 514 | system_reset_fwnmi: |
519 | */ | 515 | HMT_MEDIUM |
520 | . = NACA_PHYS_ADDR | 516 | mtspr SPRG1,r13 /* save r13 */ |
521 | .globl __end_interrupts | 517 | RUNLATCH_ON(r13) |
522 | __end_interrupts: | 518 | EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common) |
523 | #ifdef CONFIG_PPC_ISERIES | ||
524 | .globl naca | ||
525 | naca: | ||
526 | .llong itVpdAreas | ||
527 | .llong 0 /* xRamDisk */ | ||
528 | .llong 0 /* xRamDiskSize */ | ||
529 | 519 | ||
530 | . = 0x6100 | 520 | .globl machine_check_fwnmi |
521 | machine_check_fwnmi: | ||
522 | HMT_MEDIUM | ||
523 | mtspr SPRG1,r13 /* save r13 */ | ||
524 | RUNLATCH_ON(r13) | ||
525 | EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common) | ||
531 | 526 | ||
527 | #ifdef CONFIG_PPC_ISERIES | ||
532 | /*** ISeries-LPAR interrupt handlers ***/ | 528 | /*** ISeries-LPAR interrupt handlers ***/ |
533 | 529 | ||
534 | STD_EXCEPTION_ISERIES(0x200, machine_check, PACA_EXMC) | 530 | STD_EXCEPTION_ISERIES(0x200, machine_check, PACA_EXMC) |
@@ -626,9 +622,7 @@ system_reset_iSeries: | |||
626 | 622 | ||
627 | cmpwi 0,r23,0 | 623 | cmpwi 0,r23,0 |
628 | beq iSeries_secondary_smp_loop /* Loop until told to go */ | 624 | beq iSeries_secondary_smp_loop /* Loop until told to go */ |
629 | #ifdef SECONDARY_PROCESSORS | ||
630 | bne .__secondary_start /* Loop until told to go */ | 625 | bne .__secondary_start /* Loop until told to go */ |
631 | #endif | ||
632 | iSeries_secondary_smp_loop: | 626 | iSeries_secondary_smp_loop: |
633 | /* Let the Hypervisor know we are alive */ | 627 | /* Let the Hypervisor know we are alive */ |
634 | /* 8002 is a call to HvCallCfg::getLps, a harmless Hypervisor function */ | 628 | /* 8002 is a call to HvCallCfg::getLps, a harmless Hypervisor function */ |
@@ -671,51 +665,8 @@ hardware_interrupt_iSeries_masked: | |||
671 | ld r13,PACA_EXGEN+EX_R13(r13) | 665 | ld r13,PACA_EXGEN+EX_R13(r13) |
672 | rfid | 666 | rfid |
673 | b . /* prevent speculative execution */ | 667 | b . /* prevent speculative execution */ |
674 | #endif | ||
675 | |||
676 | /* | ||
677 | * Data area reserved for FWNMI option. | ||
678 | */ | ||
679 | .= 0x7000 | ||
680 | .globl fwnmi_data_area | ||
681 | fwnmi_data_area: | ||
682 | |||
683 | #ifdef CONFIG_PPC_ISERIES | ||
684 | . = LPARMAP_PHYS | ||
685 | #include "lparmap.s" | ||
686 | #endif /* CONFIG_PPC_ISERIES */ | 668 | #endif /* CONFIG_PPC_ISERIES */ |
687 | 669 | ||
688 | /* | ||
689 | * Vectors for the FWNMI option. Share common code. | ||
690 | */ | ||
691 | . = 0x8000 | ||
692 | .globl system_reset_fwnmi | ||
693 | system_reset_fwnmi: | ||
694 | HMT_MEDIUM | ||
695 | mtspr SPRG1,r13 /* save r13 */ | ||
696 | RUNLATCH_ON(r13) | ||
697 | EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common) | ||
698 | .globl machine_check_fwnmi | ||
699 | machine_check_fwnmi: | ||
700 | HMT_MEDIUM | ||
701 | mtspr SPRG1,r13 /* save r13 */ | ||
702 | RUNLATCH_ON(r13) | ||
703 | EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common) | ||
704 | |||
705 | /* | ||
706 | * Space for the initial segment table | ||
707 | * For LPAR, the hypervisor must fill in at least one entry | ||
708 | * before we get control (with relocate on) | ||
709 | */ | ||
710 | . = STAB0_PHYS_ADDR | ||
711 | .globl __start_stab | ||
712 | __start_stab: | ||
713 | |||
714 | . = (STAB0_PHYS_ADDR + PAGE_SIZE) | ||
715 | .globl __end_stab | ||
716 | __end_stab: | ||
717 | |||
718 | |||
719 | /*** Common interrupt handlers ***/ | 670 | /*** Common interrupt handlers ***/ |
720 | 671 | ||
721 | STD_EXCEPTION_COMMON(0x100, system_reset, .system_reset_exception) | 672 | STD_EXCEPTION_COMMON(0x100, system_reset, .system_reset_exception) |
@@ -752,8 +703,8 @@ machine_check_common: | |||
752 | * R9 contains the saved CR, r13 points to the paca, | 703 | * R9 contains the saved CR, r13 points to the paca, |
753 | * r10 contains the (bad) kernel stack pointer, | 704 | * r10 contains the (bad) kernel stack pointer, |
754 | * r11 and r12 contain the saved SRR0 and SRR1. | 705 | * r11 and r12 contain the saved SRR0 and SRR1. |
755 | * We switch to using the paca guard page as an emergency stack, | 706 | * We switch to using an emergency stack, save the registers there, |
756 | * save the registers there, and call kernel_bad_stack(), which panics. | 707 | * and call kernel_bad_stack(), which panics. |
757 | */ | 708 | */ |
758 | bad_stack: | 709 | bad_stack: |
759 | ld r1,PACAEMERGSP(r13) | 710 | ld r1,PACAEMERGSP(r13) |
@@ -906,6 +857,62 @@ fp_unavailable_common: | |||
906 | bl .kernel_fp_unavailable_exception | 857 | bl .kernel_fp_unavailable_exception |
907 | BUG_OPCODE | 858 | BUG_OPCODE |
908 | 859 | ||
860 | /* | ||
861 | * load_up_fpu(unused, unused, tsk) | ||
862 | * Disable FP for the task which had the FPU previously, | ||
863 | * and save its floating-point registers in its thread_struct. | ||
864 | * Enables the FPU for use in the kernel on return. | ||
865 | * On SMP we know the fpu is free, since we give it up every | ||
866 | * switch (ie, no lazy save of the FP registers). | ||
867 | * On entry: r13 == 'current' && last_task_used_math != 'current' | ||
868 | */ | ||
869 | _STATIC(load_up_fpu) | ||
870 | mfmsr r5 /* grab the current MSR */ | ||
871 | ori r5,r5,MSR_FP | ||
872 | mtmsrd r5 /* enable use of fpu now */ | ||
873 | isync | ||
874 | /* | ||
875 | * For SMP, we don't do lazy FPU switching because it just gets too | ||
876 | * horrendously complex, especially when a task switches from one CPU | ||
877 | * to another. Instead we call giveup_fpu in switch_to. | ||
878 | * | ||
879 | */ | ||
880 | #ifndef CONFIG_SMP | ||
881 | ld r3,last_task_used_math@got(r2) | ||
882 | ld r4,0(r3) | ||
883 | cmpdi 0,r4,0 | ||
884 | beq 1f | ||
885 | /* Save FP state to last_task_used_math's THREAD struct */ | ||
886 | addi r4,r4,THREAD | ||
887 | SAVE_32FPRS(0, r4) | ||
888 | mffs fr0 | ||
889 | stfd fr0,THREAD_FPSCR(r4) | ||
890 | /* Disable FP for last_task_used_math */ | ||
891 | ld r5,PT_REGS(r4) | ||
892 | ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) | ||
893 | li r6,MSR_FP|MSR_FE0|MSR_FE1 | ||
894 | andc r4,r4,r6 | ||
895 | std r4,_MSR-STACK_FRAME_OVERHEAD(r5) | ||
896 | 1: | ||
897 | #endif /* CONFIG_SMP */ | ||
898 | /* enable use of FP after return */ | ||
899 | ld r4,PACACURRENT(r13) | ||
900 | addi r5,r4,THREAD /* Get THREAD */ | ||
901 | ld r4,THREAD_FPEXC_MODE(r5) | ||
902 | ori r12,r12,MSR_FP | ||
903 | or r12,r12,r4 | ||
904 | std r12,_MSR(r1) | ||
905 | lfd fr0,THREAD_FPSCR(r5) | ||
906 | mtfsf 0xff,fr0 | ||
907 | REST_32FPRS(0, r5) | ||
908 | #ifndef CONFIG_SMP | ||
909 | /* Update last_task_used_math to 'current' */ | ||
910 | subi r4,r5,THREAD /* Back to 'current' */ | ||
911 | std r4,0(r3) | ||
912 | #endif /* CONFIG_SMP */ | ||
913 | /* restore registers and return */ | ||
914 | b fast_exception_return | ||
915 | |||
909 | .align 7 | 916 | .align 7 |
910 | .globl altivec_unavailable_common | 917 | .globl altivec_unavailable_common |
911 | altivec_unavailable_common: | 918 | altivec_unavailable_common: |
@@ -921,6 +928,80 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) | |||
921 | bl .altivec_unavailable_exception | 928 | bl .altivec_unavailable_exception |
922 | b .ret_from_except | 929 | b .ret_from_except |
923 | 930 | ||
931 | #ifdef CONFIG_ALTIVEC | ||
932 | /* | ||
933 | * load_up_altivec(unused, unused, tsk) | ||
934 | * Disable VMX for the task which had it previously, | ||
935 | * and save its vector registers in its thread_struct. | ||
936 | * Enables the VMX for use in the kernel on return. | ||
937 | * On SMP we know the VMX is free, since we give it up every | ||
938 | * switch (ie, no lazy save of the vector registers). | ||
939 | * On entry: r13 == 'current' && last_task_used_altivec != 'current' | ||
940 | */ | ||
941 | _STATIC(load_up_altivec) | ||
942 | mfmsr r5 /* grab the current MSR */ | ||
943 | oris r5,r5,MSR_VEC@h | ||
944 | mtmsrd r5 /* enable use of VMX now */ | ||
945 | isync | ||
946 | |||
947 | /* | ||
948 | * For SMP, we don't do lazy VMX switching because it just gets too | ||
949 | * horrendously complex, especially when a task switches from one CPU | ||
950 | * to another. Instead we call giveup_altvec in switch_to. | ||
951 | * VRSAVE isn't dealt with here, that is done in the normal context | ||
952 | * switch code. Note that we could rely on vrsave value to eventually | ||
953 | * avoid saving all of the VREGs here... | ||
954 | */ | ||
955 | #ifndef CONFIG_SMP | ||
956 | ld r3,last_task_used_altivec@got(r2) | ||
957 | ld r4,0(r3) | ||
958 | cmpdi 0,r4,0 | ||
959 | beq 1f | ||
960 | /* Save VMX state to last_task_used_altivec's THREAD struct */ | ||
961 | addi r4,r4,THREAD | ||
962 | SAVE_32VRS(0,r5,r4) | ||
963 | mfvscr vr0 | ||
964 | li r10,THREAD_VSCR | ||
965 | stvx vr0,r10,r4 | ||
966 | /* Disable VMX for last_task_used_altivec */ | ||
967 | ld r5,PT_REGS(r4) | ||
968 | ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) | ||
969 | lis r6,MSR_VEC@h | ||
970 | andc r4,r4,r6 | ||
971 | std r4,_MSR-STACK_FRAME_OVERHEAD(r5) | ||
972 | 1: | ||
973 | #endif /* CONFIG_SMP */ | ||
974 | /* Hack: if we get an altivec unavailable trap with VRSAVE | ||
975 | * set to all zeros, we assume this is a broken application | ||
976 | * that fails to set it properly, and thus we switch it to | ||
977 | * all 1's | ||
978 | */ | ||
979 | mfspr r4,SPRN_VRSAVE | ||
980 | cmpdi 0,r4,0 | ||
981 | bne+ 1f | ||
982 | li r4,-1 | ||
983 | mtspr SPRN_VRSAVE,r4 | ||
984 | 1: | ||
985 | /* enable use of VMX after return */ | ||
986 | ld r4,PACACURRENT(r13) | ||
987 | addi r5,r4,THREAD /* Get THREAD */ | ||
988 | oris r12,r12,MSR_VEC@h | ||
989 | std r12,_MSR(r1) | ||
990 | li r4,1 | ||
991 | li r10,THREAD_VSCR | ||
992 | stw r4,THREAD_USED_VR(r5) | ||
993 | lvx vr0,r10,r5 | ||
994 | mtvscr vr0 | ||
995 | REST_32VRS(0,r4,r5) | ||
996 | #ifndef CONFIG_SMP | ||
997 | /* Update last_task_used_math to 'current' */ | ||
998 | subi r4,r5,THREAD /* Back to 'current' */ | ||
999 | std r4,0(r3) | ||
1000 | #endif /* CONFIG_SMP */ | ||
1001 | /* restore registers and return */ | ||
1002 | b fast_exception_return | ||
1003 | #endif /* CONFIG_ALTIVEC */ | ||
1004 | |||
924 | /* | 1005 | /* |
925 | * Hash table stuff | 1006 | * Hash table stuff |
926 | */ | 1007 | */ |
@@ -1167,6 +1248,42 @@ unrecov_slb: | |||
1167 | bl .unrecoverable_exception | 1248 | bl .unrecoverable_exception |
1168 | b 1b | 1249 | b 1b |
1169 | 1250 | ||
1251 | /* | ||
1252 | * Space for CPU0's segment table. | ||
1253 | * | ||
1254 | * On iSeries, the hypervisor must fill in at least one entry before | ||
1255 | * we get control (with relocate on). The address is give to the hv | ||
1256 | * as a page number (see xLparMap in LparData.c), so this must be at a | ||
1257 | * fixed address (the linker can't compute (u64)&initial_stab >> | ||
1258 | * PAGE_SHIFT). | ||
1259 | */ | ||
1260 | . = STAB0_PHYS_ADDR /* 0x6000 */ | ||
1261 | .globl initial_stab | ||
1262 | initial_stab: | ||
1263 | .space 4096 | ||
1264 | |||
1265 | /* | ||
1266 | * Data area reserved for FWNMI option. | ||
1267 | * This address (0x7000) is fixed by the RPA. | ||
1268 | */ | ||
1269 | .= 0x7000 | ||
1270 | .globl fwnmi_data_area | ||
1271 | fwnmi_data_area: | ||
1272 | |||
1273 | /* iSeries does not use the FWNMI stuff, so it is safe to put | ||
1274 | * this here, even if we later allow kernels that will boot on | ||
1275 | * both pSeries and iSeries */ | ||
1276 | #ifdef CONFIG_PPC_ISERIES | ||
1277 | . = LPARMAP_PHYS | ||
1278 | #include "lparmap.s" | ||
1279 | /* | ||
1280 | * This ".text" is here for old compilers that generate a trailing | ||
1281 | * .note section when compiling .c files to .s | ||
1282 | */ | ||
1283 | .text | ||
1284 | #endif /* CONFIG_PPC_ISERIES */ | ||
1285 | |||
1286 | . = 0x8000 | ||
1170 | 1287 | ||
1171 | /* | 1288 | /* |
1172 | * On pSeries, secondary processors spin in the following code. | 1289 | * On pSeries, secondary processors spin in the following code. |
@@ -1200,7 +1317,7 @@ _GLOBAL(pSeries_secondary_smp_init) | |||
1200 | b .kexec_wait /* next kernel might do better */ | 1317 | b .kexec_wait /* next kernel might do better */ |
1201 | 1318 | ||
1202 | 2: mtspr SPRG3,r13 /* Save vaddr of paca in SPRG3 */ | 1319 | 2: mtspr SPRG3,r13 /* Save vaddr of paca in SPRG3 */ |
1203 | /* From now on, r24 is expected to be logica cpuid */ | 1320 | /* From now on, r24 is expected to be logical cpuid */ |
1204 | mr r24,r5 | 1321 | mr r24,r5 |
1205 | 3: HMT_LOW | 1322 | 3: HMT_LOW |
1206 | lbz r23,PACAPROCSTART(r13) /* Test if this processor should */ | 1323 | lbz r23,PACAPROCSTART(r13) /* Test if this processor should */ |
@@ -1213,10 +1330,8 @@ _GLOBAL(pSeries_secondary_smp_init) | |||
1213 | 1330 | ||
1214 | cmpwi 0,r23,0 | 1331 | cmpwi 0,r23,0 |
1215 | #ifdef CONFIG_SMP | 1332 | #ifdef CONFIG_SMP |
1216 | #ifdef SECONDARY_PROCESSORS | ||
1217 | bne .__secondary_start | 1333 | bne .__secondary_start |
1218 | #endif | 1334 | #endif |
1219 | #endif | ||
1220 | b 3b /* Loop until told to go */ | 1335 | b 3b /* Loop until told to go */ |
1221 | 1336 | ||
1222 | #ifdef CONFIG_PPC_ISERIES | 1337 | #ifdef CONFIG_PPC_ISERIES |
@@ -1430,228 +1545,6 @@ _GLOBAL(copy_and_flush) | |||
1430 | .align 8 | 1545 | .align 8 |
1431 | copy_to_here: | 1546 | copy_to_here: |
1432 | 1547 | ||
1433 | /* | ||
1434 | * load_up_fpu(unused, unused, tsk) | ||
1435 | * Disable FP for the task which had the FPU previously, | ||
1436 | * and save its floating-point registers in its thread_struct. | ||
1437 | * Enables the FPU for use in the kernel on return. | ||
1438 | * On SMP we know the fpu is free, since we give it up every | ||
1439 | * switch (ie, no lazy save of the FP registers). | ||
1440 | * On entry: r13 == 'current' && last_task_used_math != 'current' | ||
1441 | */ | ||
1442 | _STATIC(load_up_fpu) | ||
1443 | mfmsr r5 /* grab the current MSR */ | ||
1444 | ori r5,r5,MSR_FP | ||
1445 | mtmsrd r5 /* enable use of fpu now */ | ||
1446 | isync | ||
1447 | /* | ||
1448 | * For SMP, we don't do lazy FPU switching because it just gets too | ||
1449 | * horrendously complex, especially when a task switches from one CPU | ||
1450 | * to another. Instead we call giveup_fpu in switch_to. | ||
1451 | * | ||
1452 | */ | ||
1453 | #ifndef CONFIG_SMP | ||
1454 | ld r3,last_task_used_math@got(r2) | ||
1455 | ld r4,0(r3) | ||
1456 | cmpdi 0,r4,0 | ||
1457 | beq 1f | ||
1458 | /* Save FP state to last_task_used_math's THREAD struct */ | ||
1459 | addi r4,r4,THREAD | ||
1460 | SAVE_32FPRS(0, r4) | ||
1461 | mffs fr0 | ||
1462 | stfd fr0,THREAD_FPSCR(r4) | ||
1463 | /* Disable FP for last_task_used_math */ | ||
1464 | ld r5,PT_REGS(r4) | ||
1465 | ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) | ||
1466 | li r6,MSR_FP|MSR_FE0|MSR_FE1 | ||
1467 | andc r4,r4,r6 | ||
1468 | std r4,_MSR-STACK_FRAME_OVERHEAD(r5) | ||
1469 | 1: | ||
1470 | #endif /* CONFIG_SMP */ | ||
1471 | /* enable use of FP after return */ | ||
1472 | ld r4,PACACURRENT(r13) | ||
1473 | addi r5,r4,THREAD /* Get THREAD */ | ||
1474 | ld r4,THREAD_FPEXC_MODE(r5) | ||
1475 | ori r12,r12,MSR_FP | ||
1476 | or r12,r12,r4 | ||
1477 | std r12,_MSR(r1) | ||
1478 | lfd fr0,THREAD_FPSCR(r5) | ||
1479 | mtfsf 0xff,fr0 | ||
1480 | REST_32FPRS(0, r5) | ||
1481 | #ifndef CONFIG_SMP | ||
1482 | /* Update last_task_used_math to 'current' */ | ||
1483 | subi r4,r5,THREAD /* Back to 'current' */ | ||
1484 | std r4,0(r3) | ||
1485 | #endif /* CONFIG_SMP */ | ||
1486 | /* restore registers and return */ | ||
1487 | b fast_exception_return | ||
1488 | |||
1489 | /* | ||
1490 | * disable_kernel_fp() | ||
1491 | * Disable the FPU. | ||
1492 | */ | ||
1493 | _GLOBAL(disable_kernel_fp) | ||
1494 | mfmsr r3 | ||
1495 | rldicl r0,r3,(63-MSR_FP_LG),1 | ||
1496 | rldicl r3,r0,(MSR_FP_LG+1),0 | ||
1497 | mtmsrd r3 /* disable use of fpu now */ | ||
1498 | isync | ||
1499 | blr | ||
1500 | |||
1501 | /* | ||
1502 | * giveup_fpu(tsk) | ||
1503 | * Disable FP for the task given as the argument, | ||
1504 | * and save the floating-point registers in its thread_struct. | ||
1505 | * Enables the FPU for use in the kernel on return. | ||
1506 | */ | ||
1507 | _GLOBAL(giveup_fpu) | ||
1508 | mfmsr r5 | ||
1509 | ori r5,r5,MSR_FP | ||
1510 | mtmsrd r5 /* enable use of fpu now */ | ||
1511 | isync | ||
1512 | cmpdi 0,r3,0 | ||
1513 | beqlr- /* if no previous owner, done */ | ||
1514 | addi r3,r3,THREAD /* want THREAD of task */ | ||
1515 | ld r5,PT_REGS(r3) | ||
1516 | cmpdi 0,r5,0 | ||
1517 | SAVE_32FPRS(0, r3) | ||
1518 | mffs fr0 | ||
1519 | stfd fr0,THREAD_FPSCR(r3) | ||
1520 | beq 1f | ||
1521 | ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) | ||
1522 | li r3,MSR_FP|MSR_FE0|MSR_FE1 | ||
1523 | andc r4,r4,r3 /* disable FP for previous task */ | ||
1524 | std r4,_MSR-STACK_FRAME_OVERHEAD(r5) | ||
1525 | 1: | ||
1526 | #ifndef CONFIG_SMP | ||
1527 | li r5,0 | ||
1528 | ld r4,last_task_used_math@got(r2) | ||
1529 | std r5,0(r4) | ||
1530 | #endif /* CONFIG_SMP */ | ||
1531 | blr | ||
1532 | |||
1533 | |||
1534 | #ifdef CONFIG_ALTIVEC | ||
1535 | |||
1536 | /* | ||
1537 | * load_up_altivec(unused, unused, tsk) | ||
1538 | * Disable VMX for the task which had it previously, | ||
1539 | * and save its vector registers in its thread_struct. | ||
1540 | * Enables the VMX for use in the kernel on return. | ||
1541 | * On SMP we know the VMX is free, since we give it up every | ||
1542 | * switch (ie, no lazy save of the vector registers). | ||
1543 | * On entry: r13 == 'current' && last_task_used_altivec != 'current' | ||
1544 | */ | ||
1545 | _STATIC(load_up_altivec) | ||
1546 | mfmsr r5 /* grab the current MSR */ | ||
1547 | oris r5,r5,MSR_VEC@h | ||
1548 | mtmsrd r5 /* enable use of VMX now */ | ||
1549 | isync | ||
1550 | |||
1551 | /* | ||
1552 | * For SMP, we don't do lazy VMX switching because it just gets too | ||
1553 | * horrendously complex, especially when a task switches from one CPU | ||
1554 | * to another. Instead we call giveup_altvec in switch_to. | ||
1555 | * VRSAVE isn't dealt with here, that is done in the normal context | ||
1556 | * switch code. Note that we could rely on vrsave value to eventually | ||
1557 | * avoid saving all of the VREGs here... | ||
1558 | */ | ||
1559 | #ifndef CONFIG_SMP | ||
1560 | ld r3,last_task_used_altivec@got(r2) | ||
1561 | ld r4,0(r3) | ||
1562 | cmpdi 0,r4,0 | ||
1563 | beq 1f | ||
1564 | /* Save VMX state to last_task_used_altivec's THREAD struct */ | ||
1565 | addi r4,r4,THREAD | ||
1566 | SAVE_32VRS(0,r5,r4) | ||
1567 | mfvscr vr0 | ||
1568 | li r10,THREAD_VSCR | ||
1569 | stvx vr0,r10,r4 | ||
1570 | /* Disable VMX for last_task_used_altivec */ | ||
1571 | ld r5,PT_REGS(r4) | ||
1572 | ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) | ||
1573 | lis r6,MSR_VEC@h | ||
1574 | andc r4,r4,r6 | ||
1575 | std r4,_MSR-STACK_FRAME_OVERHEAD(r5) | ||
1576 | 1: | ||
1577 | #endif /* CONFIG_SMP */ | ||
1578 | /* Hack: if we get an altivec unavailable trap with VRSAVE | ||
1579 | * set to all zeros, we assume this is a broken application | ||
1580 | * that fails to set it properly, and thus we switch it to | ||
1581 | * all 1's | ||
1582 | */ | ||
1583 | mfspr r4,SPRN_VRSAVE | ||
1584 | cmpdi 0,r4,0 | ||
1585 | bne+ 1f | ||
1586 | li r4,-1 | ||
1587 | mtspr SPRN_VRSAVE,r4 | ||
1588 | 1: | ||
1589 | /* enable use of VMX after return */ | ||
1590 | ld r4,PACACURRENT(r13) | ||
1591 | addi r5,r4,THREAD /* Get THREAD */ | ||
1592 | oris r12,r12,MSR_VEC@h | ||
1593 | std r12,_MSR(r1) | ||
1594 | li r4,1 | ||
1595 | li r10,THREAD_VSCR | ||
1596 | stw r4,THREAD_USED_VR(r5) | ||
1597 | lvx vr0,r10,r5 | ||
1598 | mtvscr vr0 | ||
1599 | REST_32VRS(0,r4,r5) | ||
1600 | #ifndef CONFIG_SMP | ||
1601 | /* Update last_task_used_math to 'current' */ | ||
1602 | subi r4,r5,THREAD /* Back to 'current' */ | ||
1603 | std r4,0(r3) | ||
1604 | #endif /* CONFIG_SMP */ | ||
1605 | /* restore registers and return */ | ||
1606 | b fast_exception_return | ||
1607 | |||
1608 | /* | ||
1609 | * disable_kernel_altivec() | ||
1610 | * Disable the VMX. | ||
1611 | */ | ||
1612 | _GLOBAL(disable_kernel_altivec) | ||
1613 | mfmsr r3 | ||
1614 | rldicl r0,r3,(63-MSR_VEC_LG),1 | ||
1615 | rldicl r3,r0,(MSR_VEC_LG+1),0 | ||
1616 | mtmsrd r3 /* disable use of VMX now */ | ||
1617 | isync | ||
1618 | blr | ||
1619 | |||
1620 | /* | ||
1621 | * giveup_altivec(tsk) | ||
1622 | * Disable VMX for the task given as the argument, | ||
1623 | * and save the vector registers in its thread_struct. | ||
1624 | * Enables the VMX for use in the kernel on return. | ||
1625 | */ | ||
1626 | _GLOBAL(giveup_altivec) | ||
1627 | mfmsr r5 | ||
1628 | oris r5,r5,MSR_VEC@h | ||
1629 | mtmsrd r5 /* enable use of VMX now */ | ||
1630 | isync | ||
1631 | cmpdi 0,r3,0 | ||
1632 | beqlr- /* if no previous owner, done */ | ||
1633 | addi r3,r3,THREAD /* want THREAD of task */ | ||
1634 | ld r5,PT_REGS(r3) | ||
1635 | cmpdi 0,r5,0 | ||
1636 | SAVE_32VRS(0,r4,r3) | ||
1637 | mfvscr vr0 | ||
1638 | li r4,THREAD_VSCR | ||
1639 | stvx vr0,r4,r3 | ||
1640 | beq 1f | ||
1641 | ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) | ||
1642 | lis r3,MSR_VEC@h | ||
1643 | andc r4,r4,r3 /* disable FP for previous task */ | ||
1644 | std r4,_MSR-STACK_FRAME_OVERHEAD(r5) | ||
1645 | 1: | ||
1646 | #ifndef CONFIG_SMP | ||
1647 | li r5,0 | ||
1648 | ld r4,last_task_used_altivec@got(r2) | ||
1649 | std r5,0(r4) | ||
1650 | #endif /* CONFIG_SMP */ | ||
1651 | blr | ||
1652 | |||
1653 | #endif /* CONFIG_ALTIVEC */ | ||
1654 | |||
1655 | #ifdef CONFIG_SMP | 1548 | #ifdef CONFIG_SMP |
1656 | #ifdef CONFIG_PPC_PMAC | 1549 | #ifdef CONFIG_PPC_PMAC |
1657 | /* | 1550 | /* |
@@ -2002,9 +1895,6 @@ _STATIC(start_here_common) | |||
2002 | 1895 | ||
2003 | bl .start_kernel | 1896 | bl .start_kernel |
2004 | 1897 | ||
2005 | _GLOBAL(__setup_cpu_power3) | ||
2006 | blr | ||
2007 | |||
2008 | _GLOBAL(hmt_init) | 1898 | _GLOBAL(hmt_init) |
2009 | #ifdef CONFIG_HMT | 1899 | #ifdef CONFIG_HMT |
2010 | LOADADDR(r5, hmt_thread_data) | 1900 | LOADADDR(r5, hmt_thread_data) |
@@ -2095,20 +1985,19 @@ _GLOBAL(smp_release_cpus) | |||
2095 | 1985 | ||
2096 | /* | 1986 | /* |
2097 | * We put a few things here that have to be page-aligned. | 1987 | * We put a few things here that have to be page-aligned. |
2098 | * This stuff goes at the beginning of the data segment, | 1988 | * This stuff goes at the beginning of the bss, which is page-aligned. |
2099 | * which is page-aligned. | ||
2100 | */ | 1989 | */ |
2101 | .data | 1990 | .section ".bss" |
1991 | |||
2102 | .align 12 | 1992 | .align 12 |
2103 | .globl sdata | 1993 | |
2104 | sdata: | ||
2105 | .globl empty_zero_page | 1994 | .globl empty_zero_page |
2106 | empty_zero_page: | 1995 | empty_zero_page: |
2107 | .space 4096 | 1996 | .space PAGE_SIZE |
2108 | 1997 | ||
2109 | .globl swapper_pg_dir | 1998 | .globl swapper_pg_dir |
2110 | swapper_pg_dir: | 1999 | swapper_pg_dir: |
2111 | .space 4096 | 2000 | .space PAGE_SIZE |
2112 | 2001 | ||
2113 | /* | 2002 | /* |
2114 | * This space gets a copy of optional info passed to us by the bootstrap | 2003 | * This space gets a copy of optional info passed to us by the bootstrap |
diff --git a/arch/ppc64/kernel/iSeries_htab.c b/arch/ppc64/kernel/iSeries_htab.c index b0250ae4a72a..2192055a90a0 100644 --- a/arch/ppc64/kernel/iSeries_htab.c +++ b/arch/ppc64/kernel/iSeries_htab.c | |||
@@ -41,6 +41,7 @@ static long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va, | |||
41 | unsigned long prpn, unsigned long vflags, | 41 | unsigned long prpn, unsigned long vflags, |
42 | unsigned long rflags) | 42 | unsigned long rflags) |
43 | { | 43 | { |
44 | unsigned long arpn; | ||
44 | long slot; | 45 | long slot; |
45 | hpte_t lhpte; | 46 | hpte_t lhpte; |
46 | int secondary = 0; | 47 | int secondary = 0; |
@@ -70,8 +71,10 @@ static long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va, | |||
70 | slot &= 0x7fffffffffffffff; | 71 | slot &= 0x7fffffffffffffff; |
71 | } | 72 | } |
72 | 73 | ||
74 | arpn = phys_to_abs(prpn << PAGE_SHIFT) >> PAGE_SHIFT; | ||
75 | |||
73 | lhpte.v = (va >> 23) << HPTE_V_AVPN_SHIFT | vflags | HPTE_V_VALID; | 76 | lhpte.v = (va >> 23) << HPTE_V_AVPN_SHIFT | vflags | HPTE_V_VALID; |
74 | lhpte.r = (physRpn_to_absRpn(prpn) << HPTE_R_RPN_SHIFT) | rflags; | 77 | lhpte.r = (arpn << HPTE_R_RPN_SHIFT) | rflags; |
75 | 78 | ||
76 | /* Now fill in the actual HPTE */ | 79 | /* Now fill in the actual HPTE */ |
77 | HvCallHpt_addValidate(slot, secondary, &lhpte); | 80 | HvCallHpt_addValidate(slot, secondary, &lhpte); |
diff --git a/arch/ppc64/kernel/iSeries_setup.c b/arch/ppc64/kernel/iSeries_setup.c index a649edbb23b6..3ffefbbc6623 100644 --- a/arch/ppc64/kernel/iSeries_setup.c +++ b/arch/ppc64/kernel/iSeries_setup.c | |||
@@ -39,6 +39,7 @@ | |||
39 | #include <asm/cputable.h> | 39 | #include <asm/cputable.h> |
40 | #include <asm/sections.h> | 40 | #include <asm/sections.h> |
41 | #include <asm/iommu.h> | 41 | #include <asm/iommu.h> |
42 | #include <asm/firmware.h> | ||
42 | 43 | ||
43 | #include <asm/time.h> | 44 | #include <asm/time.h> |
44 | #include "iSeries_setup.h" | 45 | #include "iSeries_setup.h" |
@@ -314,6 +315,8 @@ static void __init iSeries_init_early(void) | |||
314 | 315 | ||
315 | DBG(" -> iSeries_init_early()\n"); | 316 | DBG(" -> iSeries_init_early()\n"); |
316 | 317 | ||
318 | ppc64_firmware_features = FW_FEATURE_ISERIES; | ||
319 | |||
317 | ppcdbg_initialize(); | 320 | ppcdbg_initialize(); |
318 | 321 | ||
319 | #if defined(CONFIG_BLK_DEV_INITRD) | 322 | #if defined(CONFIG_BLK_DEV_INITRD) |
@@ -412,6 +415,22 @@ static void __init iSeries_init_early(void) | |||
412 | DBG(" <- iSeries_init_early()\n"); | 415 | DBG(" <- iSeries_init_early()\n"); |
413 | } | 416 | } |
414 | 417 | ||
418 | struct mschunks_map mschunks_map = { | ||
419 | /* XXX We don't use these, but Piranha might need them. */ | ||
420 | .chunk_size = MSCHUNKS_CHUNK_SIZE, | ||
421 | .chunk_shift = MSCHUNKS_CHUNK_SHIFT, | ||
422 | .chunk_mask = MSCHUNKS_OFFSET_MASK, | ||
423 | }; | ||
424 | EXPORT_SYMBOL(mschunks_map); | ||
425 | |||
426 | void mschunks_alloc(unsigned long num_chunks) | ||
427 | { | ||
428 | klimit = _ALIGN(klimit, sizeof(u32)); | ||
429 | mschunks_map.mapping = (u32 *)klimit; | ||
430 | klimit += num_chunks * sizeof(u32); | ||
431 | mschunks_map.num_chunks = num_chunks; | ||
432 | } | ||
433 | |||
415 | /* | 434 | /* |
416 | * The iSeries may have very large memories ( > 128 GB ) and a partition | 435 | * The iSeries may have very large memories ( > 128 GB ) and a partition |
417 | * may get memory in "chunks" that may be anywhere in the 2**52 real | 436 | * may get memory in "chunks" that may be anywhere in the 2**52 real |
@@ -449,7 +468,7 @@ static void __init build_iSeries_Memory_Map(void) | |||
449 | 468 | ||
450 | /* Chunk size on iSeries is 256K bytes */ | 469 | /* Chunk size on iSeries is 256K bytes */ |
451 | totalChunks = (u32)HvLpConfig_getMsChunks(); | 470 | totalChunks = (u32)HvLpConfig_getMsChunks(); |
452 | klimit = msChunks_alloc(klimit, totalChunks, 1UL << 18); | 471 | mschunks_alloc(totalChunks); |
453 | 472 | ||
454 | /* | 473 | /* |
455 | * Get absolute address of our load area | 474 | * Get absolute address of our load area |
@@ -486,7 +505,7 @@ static void __init build_iSeries_Memory_Map(void) | |||
486 | printk("Load area size %dK\n", loadAreaSize * 256); | 505 | printk("Load area size %dK\n", loadAreaSize * 256); |
487 | 506 | ||
488 | for (nextPhysChunk = 0; nextPhysChunk < loadAreaSize; ++nextPhysChunk) | 507 | for (nextPhysChunk = 0; nextPhysChunk < loadAreaSize; ++nextPhysChunk) |
489 | msChunks.abs[nextPhysChunk] = | 508 | mschunks_map.mapping[nextPhysChunk] = |
490 | loadAreaFirstChunk + nextPhysChunk; | 509 | loadAreaFirstChunk + nextPhysChunk; |
491 | 510 | ||
492 | /* | 511 | /* |
@@ -495,7 +514,7 @@ static void __init build_iSeries_Memory_Map(void) | |||
495 | */ | 514 | */ |
496 | hptFirstChunk = (u32)addr_to_chunk(HvCallHpt_getHptAddress()); | 515 | hptFirstChunk = (u32)addr_to_chunk(HvCallHpt_getHptAddress()); |
497 | hptSizePages = (u32)HvCallHpt_getHptPages(); | 516 | hptSizePages = (u32)HvCallHpt_getHptPages(); |
498 | hptSizeChunks = hptSizePages >> (msChunks.chunk_shift - PAGE_SHIFT); | 517 | hptSizeChunks = hptSizePages >> (MSCHUNKS_CHUNK_SHIFT - PAGE_SHIFT); |
499 | hptLastChunk = hptFirstChunk + hptSizeChunks - 1; | 518 | hptLastChunk = hptFirstChunk + hptSizeChunks - 1; |
500 | 519 | ||
501 | printk("HPT absolute addr = %016lx, size = %dK\n", | 520 | printk("HPT absolute addr = %016lx, size = %dK\n", |
@@ -552,7 +571,8 @@ static void __init build_iSeries_Memory_Map(void) | |||
552 | (absChunk > hptLastChunk)) && | 571 | (absChunk > hptLastChunk)) && |
553 | ((absChunk < loadAreaFirstChunk) || | 572 | ((absChunk < loadAreaFirstChunk) || |
554 | (absChunk > loadAreaLastChunk))) { | 573 | (absChunk > loadAreaLastChunk))) { |
555 | msChunks.abs[nextPhysChunk] = absChunk; | 574 | mschunks_map.mapping[nextPhysChunk] = |
575 | absChunk; | ||
556 | ++nextPhysChunk; | 576 | ++nextPhysChunk; |
557 | } | 577 | } |
558 | } | 578 | } |
@@ -944,6 +964,8 @@ void __init iSeries_early_setup(void) | |||
944 | ppc_md.calibrate_decr = iSeries_calibrate_decr; | 964 | ppc_md.calibrate_decr = iSeries_calibrate_decr; |
945 | ppc_md.progress = iSeries_progress; | 965 | ppc_md.progress = iSeries_progress; |
946 | 966 | ||
967 | /* XXX Implement enable_pmcs for iSeries */ | ||
968 | |||
947 | if (get_paca()->lppaca.shared_proc) { | 969 | if (get_paca()->lppaca.shared_proc) { |
948 | ppc_md.idle_loop = iseries_shared_idle; | 970 | ppc_md.idle_loop = iseries_shared_idle; |
949 | printk(KERN_INFO "Using shared processor idle loop\n"); | 971 | printk(KERN_INFO "Using shared processor idle loop\n"); |
diff --git a/arch/ppc64/kernel/iSeries_vio.c b/arch/ppc64/kernel/iSeries_vio.c new file mode 100644 index 000000000000..6b754b0c8344 --- /dev/null +++ b/arch/ppc64/kernel/iSeries_vio.c | |||
@@ -0,0 +1,155 @@ | |||
1 | /* | ||
2 | * IBM PowerPC iSeries Virtual I/O Infrastructure Support. | ||
3 | * | ||
4 | * Copyright (c) 2005 Stephen Rothwell, IBM Corp. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | #include <linux/types.h> | ||
12 | #include <linux/device.h> | ||
13 | #include <linux/init.h> | ||
14 | |||
15 | #include <asm/vio.h> | ||
16 | #include <asm/iommu.h> | ||
17 | #include <asm/abs_addr.h> | ||
18 | #include <asm/page.h> | ||
19 | #include <asm/iSeries/vio.h> | ||
20 | #include <asm/iSeries/HvTypes.h> | ||
21 | #include <asm/iSeries/HvLpConfig.h> | ||
22 | #include <asm/iSeries/HvCallXm.h> | ||
23 | |||
24 | struct device *iSeries_vio_dev = &vio_bus_device.dev; | ||
25 | EXPORT_SYMBOL(iSeries_vio_dev); | ||
26 | |||
27 | static struct iommu_table veth_iommu_table; | ||
28 | static struct iommu_table vio_iommu_table; | ||
29 | |||
30 | static void __init iommu_vio_init(void) | ||
31 | { | ||
32 | struct iommu_table *t; | ||
33 | struct iommu_table_cb cb; | ||
34 | unsigned long cbp; | ||
35 | unsigned long itc_entries; | ||
36 | |||
37 | cb.itc_busno = 255; /* Bus 255 is the virtual bus */ | ||
38 | cb.itc_virtbus = 0xff; /* Ask for virtual bus */ | ||
39 | |||
40 | cbp = virt_to_abs(&cb); | ||
41 | HvCallXm_getTceTableParms(cbp); | ||
42 | |||
43 | itc_entries = cb.itc_size * PAGE_SIZE / sizeof(union tce_entry); | ||
44 | veth_iommu_table.it_size = itc_entries / 2; | ||
45 | veth_iommu_table.it_busno = cb.itc_busno; | ||
46 | veth_iommu_table.it_offset = cb.itc_offset; | ||
47 | veth_iommu_table.it_index = cb.itc_index; | ||
48 | veth_iommu_table.it_type = TCE_VB; | ||
49 | veth_iommu_table.it_blocksize = 1; | ||
50 | |||
51 | t = iommu_init_table(&veth_iommu_table); | ||
52 | |||
53 | if (!t) | ||
54 | printk("Virtual Bus VETH TCE table failed.\n"); | ||
55 | |||
56 | vio_iommu_table.it_size = itc_entries - veth_iommu_table.it_size; | ||
57 | vio_iommu_table.it_busno = cb.itc_busno; | ||
58 | vio_iommu_table.it_offset = cb.itc_offset + | ||
59 | veth_iommu_table.it_size; | ||
60 | vio_iommu_table.it_index = cb.itc_index; | ||
61 | vio_iommu_table.it_type = TCE_VB; | ||
62 | vio_iommu_table.it_blocksize = 1; | ||
63 | |||
64 | t = iommu_init_table(&vio_iommu_table); | ||
65 | |||
66 | if (!t) | ||
67 | printk("Virtual Bus VIO TCE table failed.\n"); | ||
68 | } | ||
69 | |||
70 | /** | ||
71 | * vio_register_device_iseries: - Register a new iSeries vio device. | ||
72 | * @voidev: The device to register. | ||
73 | */ | ||
74 | static struct vio_dev *__init vio_register_device_iseries(char *type, | ||
75 | uint32_t unit_num) | ||
76 | { | ||
77 | struct vio_dev *viodev; | ||
78 | |||
79 | /* allocate a vio_dev for this device */ | ||
80 | viodev = kmalloc(sizeof(struct vio_dev), GFP_KERNEL); | ||
81 | if (!viodev) | ||
82 | return NULL; | ||
83 | memset(viodev, 0, sizeof(struct vio_dev)); | ||
84 | |||
85 | snprintf(viodev->dev.bus_id, BUS_ID_SIZE, "%s%d", type, unit_num); | ||
86 | |||
87 | viodev->name = viodev->dev.bus_id; | ||
88 | viodev->type = type; | ||
89 | viodev->unit_address = unit_num; | ||
90 | viodev->iommu_table = &vio_iommu_table; | ||
91 | if (vio_register_device(viodev) == NULL) { | ||
92 | kfree(viodev); | ||
93 | return NULL; | ||
94 | } | ||
95 | return viodev; | ||
96 | } | ||
97 | |||
98 | void __init probe_bus_iseries(void) | ||
99 | { | ||
100 | HvLpIndexMap vlan_map; | ||
101 | struct vio_dev *viodev; | ||
102 | int i; | ||
103 | |||
104 | /* there is only one of each of these */ | ||
105 | vio_register_device_iseries("viocons", 0); | ||
106 | vio_register_device_iseries("vscsi", 0); | ||
107 | |||
108 | vlan_map = HvLpConfig_getVirtualLanIndexMap(); | ||
109 | for (i = 0; i < HVMAXARCHITECTEDVIRTUALLANS; i++) { | ||
110 | if ((vlan_map & (0x8000 >> i)) == 0) | ||
111 | continue; | ||
112 | viodev = vio_register_device_iseries("vlan", i); | ||
113 | /* veth is special and has it own iommu_table */ | ||
114 | viodev->iommu_table = &veth_iommu_table; | ||
115 | } | ||
116 | for (i = 0; i < HVMAXARCHITECTEDVIRTUALDISKS; i++) | ||
117 | vio_register_device_iseries("viodasd", i); | ||
118 | for (i = 0; i < HVMAXARCHITECTEDVIRTUALCDROMS; i++) | ||
119 | vio_register_device_iseries("viocd", i); | ||
120 | for (i = 0; i < HVMAXARCHITECTEDVIRTUALTAPES; i++) | ||
121 | vio_register_device_iseries("viotape", i); | ||
122 | } | ||
123 | |||
124 | /** | ||
125 | * vio_match_device_iseries: - Tell if a iSeries VIO device matches a | ||
126 | * vio_device_id | ||
127 | */ | ||
128 | static int vio_match_device_iseries(const struct vio_device_id *id, | ||
129 | const struct vio_dev *dev) | ||
130 | { | ||
131 | return strncmp(dev->type, id->type, strlen(id->type)) == 0; | ||
132 | } | ||
133 | |||
134 | static struct vio_bus_ops vio_bus_ops_iseries = { | ||
135 | .match = vio_match_device_iseries, | ||
136 | }; | ||
137 | |||
138 | /** | ||
139 | * vio_bus_init_iseries: - Initialize the iSeries virtual IO bus | ||
140 | */ | ||
141 | static int __init vio_bus_init_iseries(void) | ||
142 | { | ||
143 | int err; | ||
144 | |||
145 | err = vio_bus_init(&vio_bus_ops_iseries); | ||
146 | if (err == 0) { | ||
147 | iommu_vio_init(); | ||
148 | vio_bus_device.iommu_table = &vio_iommu_table; | ||
149 | iSeries_vio_dev = &vio_bus_device.dev; | ||
150 | probe_bus_iseries(); | ||
151 | } | ||
152 | return err; | ||
153 | } | ||
154 | |||
155 | __initcall(vio_bus_init_iseries); | ||
diff --git a/arch/ppc64/kernel/lmb.c b/arch/ppc64/kernel/lmb.c index d6c6bd03d2a4..5adaca2ddc9d 100644 --- a/arch/ppc64/kernel/lmb.c +++ b/arch/ppc64/kernel/lmb.c | |||
@@ -28,33 +28,28 @@ void lmb_dump_all(void) | |||
28 | { | 28 | { |
29 | #ifdef DEBUG | 29 | #ifdef DEBUG |
30 | unsigned long i; | 30 | unsigned long i; |
31 | struct lmb *_lmb = &lmb; | ||
32 | 31 | ||
33 | udbg_printf("lmb_dump_all:\n"); | 32 | udbg_printf("lmb_dump_all:\n"); |
34 | udbg_printf(" memory.cnt = 0x%lx\n", | 33 | udbg_printf(" memory.cnt = 0x%lx\n", |
35 | _lmb->memory.cnt); | 34 | lmb.memory.cnt); |
36 | udbg_printf(" memory.size = 0x%lx\n", | 35 | udbg_printf(" memory.size = 0x%lx\n", |
37 | _lmb->memory.size); | 36 | lmb.memory.size); |
38 | for (i=0; i < _lmb->memory.cnt ;i++) { | 37 | for (i=0; i < lmb.memory.cnt ;i++) { |
39 | udbg_printf(" memory.region[0x%x].base = 0x%lx\n", | 38 | udbg_printf(" memory.region[0x%x].base = 0x%lx\n", |
40 | i, _lmb->memory.region[i].base); | 39 | i, lmb.memory.region[i].base); |
41 | udbg_printf(" .physbase = 0x%lx\n", | ||
42 | _lmb->memory.region[i].physbase); | ||
43 | udbg_printf(" .size = 0x%lx\n", | 40 | udbg_printf(" .size = 0x%lx\n", |
44 | _lmb->memory.region[i].size); | 41 | lmb.memory.region[i].size); |
45 | } | 42 | } |
46 | 43 | ||
47 | udbg_printf("\n reserved.cnt = 0x%lx\n", | 44 | udbg_printf("\n reserved.cnt = 0x%lx\n", |
48 | _lmb->reserved.cnt); | 45 | lmb.reserved.cnt); |
49 | udbg_printf(" reserved.size = 0x%lx\n", | 46 | udbg_printf(" reserved.size = 0x%lx\n", |
50 | _lmb->reserved.size); | 47 | lmb.reserved.size); |
51 | for (i=0; i < _lmb->reserved.cnt ;i++) { | 48 | for (i=0; i < lmb.reserved.cnt ;i++) { |
52 | udbg_printf(" reserved.region[0x%x].base = 0x%lx\n", | 49 | udbg_printf(" reserved.region[0x%x].base = 0x%lx\n", |
53 | i, _lmb->reserved.region[i].base); | 50 | i, lmb.reserved.region[i].base); |
54 | udbg_printf(" .physbase = 0x%lx\n", | ||
55 | _lmb->reserved.region[i].physbase); | ||
56 | udbg_printf(" .size = 0x%lx\n", | 51 | udbg_printf(" .size = 0x%lx\n", |
57 | _lmb->reserved.region[i].size); | 52 | lmb.reserved.region[i].size); |
58 | } | 53 | } |
59 | #endif /* DEBUG */ | 54 | #endif /* DEBUG */ |
60 | } | 55 | } |
@@ -98,7 +93,6 @@ lmb_coalesce_regions(struct lmb_region *rgn, unsigned long r1, unsigned long r2) | |||
98 | rgn->region[r1].size += rgn->region[r2].size; | 93 | rgn->region[r1].size += rgn->region[r2].size; |
99 | for (i=r2; i < rgn->cnt-1; i++) { | 94 | for (i=r2; i < rgn->cnt-1; i++) { |
100 | rgn->region[i].base = rgn->region[i+1].base; | 95 | rgn->region[i].base = rgn->region[i+1].base; |
101 | rgn->region[i].physbase = rgn->region[i+1].physbase; | ||
102 | rgn->region[i].size = rgn->region[i+1].size; | 96 | rgn->region[i].size = rgn->region[i+1].size; |
103 | } | 97 | } |
104 | rgn->cnt--; | 98 | rgn->cnt--; |
@@ -108,49 +102,29 @@ lmb_coalesce_regions(struct lmb_region *rgn, unsigned long r1, unsigned long r2) | |||
108 | void __init | 102 | void __init |
109 | lmb_init(void) | 103 | lmb_init(void) |
110 | { | 104 | { |
111 | struct lmb *_lmb = &lmb; | ||
112 | |||
113 | /* Create a dummy zero size LMB which will get coalesced away later. | 105 | /* Create a dummy zero size LMB which will get coalesced away later. |
114 | * This simplifies the lmb_add() code below... | 106 | * This simplifies the lmb_add() code below... |
115 | */ | 107 | */ |
116 | _lmb->memory.region[0].base = 0; | 108 | lmb.memory.region[0].base = 0; |
117 | _lmb->memory.region[0].size = 0; | 109 | lmb.memory.region[0].size = 0; |
118 | _lmb->memory.cnt = 1; | 110 | lmb.memory.cnt = 1; |
119 | 111 | ||
120 | /* Ditto. */ | 112 | /* Ditto. */ |
121 | _lmb->reserved.region[0].base = 0; | 113 | lmb.reserved.region[0].base = 0; |
122 | _lmb->reserved.region[0].size = 0; | 114 | lmb.reserved.region[0].size = 0; |
123 | _lmb->reserved.cnt = 1; | 115 | lmb.reserved.cnt = 1; |
124 | } | 116 | } |
125 | 117 | ||
126 | /* This routine called with relocation disabled. */ | 118 | /* This routine called with relocation disabled. */ |
127 | void __init | 119 | void __init |
128 | lmb_analyze(void) | 120 | lmb_analyze(void) |
129 | { | 121 | { |
130 | unsigned long i; | 122 | int i; |
131 | unsigned long mem_size = 0; | 123 | |
132 | unsigned long size_mask = 0; | 124 | lmb.memory.size = 0; |
133 | struct lmb *_lmb = &lmb; | ||
134 | #ifdef CONFIG_MSCHUNKS | ||
135 | unsigned long physbase = 0; | ||
136 | #endif | ||
137 | |||
138 | for (i=0; i < _lmb->memory.cnt; i++) { | ||
139 | unsigned long lmb_size; | ||
140 | |||
141 | lmb_size = _lmb->memory.region[i].size; | ||
142 | |||
143 | #ifdef CONFIG_MSCHUNKS | ||
144 | _lmb->memory.region[i].physbase = physbase; | ||
145 | physbase += lmb_size; | ||
146 | #else | ||
147 | _lmb->memory.region[i].physbase = _lmb->memory.region[i].base; | ||
148 | #endif | ||
149 | mem_size += lmb_size; | ||
150 | size_mask |= lmb_size; | ||
151 | } | ||
152 | 125 | ||
153 | _lmb->memory.size = mem_size; | 126 | for (i = 0; i < lmb.memory.cnt; i++) |
127 | lmb.memory.size += lmb.memory.region[i].size; | ||
154 | } | 128 | } |
155 | 129 | ||
156 | /* This routine called with relocation disabled. */ | 130 | /* This routine called with relocation disabled. */ |
@@ -168,7 +142,6 @@ lmb_add_region(struct lmb_region *rgn, unsigned long base, unsigned long size) | |||
168 | adjacent = lmb_addrs_adjacent(base,size,rgnbase,rgnsize); | 142 | adjacent = lmb_addrs_adjacent(base,size,rgnbase,rgnsize); |
169 | if ( adjacent > 0 ) { | 143 | if ( adjacent > 0 ) { |
170 | rgn->region[i].base -= size; | 144 | rgn->region[i].base -= size; |
171 | rgn->region[i].physbase -= size; | ||
172 | rgn->region[i].size += size; | 145 | rgn->region[i].size += size; |
173 | coalesced++; | 146 | coalesced++; |
174 | break; | 147 | break; |
@@ -195,11 +168,9 @@ lmb_add_region(struct lmb_region *rgn, unsigned long base, unsigned long size) | |||
195 | for (i=rgn->cnt-1; i >= 0; i--) { | 168 | for (i=rgn->cnt-1; i >= 0; i--) { |
196 | if (base < rgn->region[i].base) { | 169 | if (base < rgn->region[i].base) { |
197 | rgn->region[i+1].base = rgn->region[i].base; | 170 | rgn->region[i+1].base = rgn->region[i].base; |
198 | rgn->region[i+1].physbase = rgn->region[i].physbase; | ||
199 | rgn->region[i+1].size = rgn->region[i].size; | 171 | rgn->region[i+1].size = rgn->region[i].size; |
200 | } else { | 172 | } else { |
201 | rgn->region[i+1].base = base; | 173 | rgn->region[i+1].base = base; |
202 | rgn->region[i+1].physbase = lmb_abs_to_phys(base); | ||
203 | rgn->region[i+1].size = size; | 174 | rgn->region[i+1].size = size; |
204 | break; | 175 | break; |
205 | } | 176 | } |
@@ -213,12 +184,11 @@ lmb_add_region(struct lmb_region *rgn, unsigned long base, unsigned long size) | |||
213 | long __init | 184 | long __init |
214 | lmb_add(unsigned long base, unsigned long size) | 185 | lmb_add(unsigned long base, unsigned long size) |
215 | { | 186 | { |
216 | struct lmb *_lmb = &lmb; | 187 | struct lmb_region *_rgn = &(lmb.memory); |
217 | struct lmb_region *_rgn = &(_lmb->memory); | ||
218 | 188 | ||
219 | /* On pSeries LPAR systems, the first LMB is our RMO region. */ | 189 | /* On pSeries LPAR systems, the first LMB is our RMO region. */ |
220 | if ( base == 0 ) | 190 | if ( base == 0 ) |
221 | _lmb->rmo_size = size; | 191 | lmb.rmo_size = size; |
222 | 192 | ||
223 | return lmb_add_region(_rgn, base, size); | 193 | return lmb_add_region(_rgn, base, size); |
224 | 194 | ||
@@ -227,8 +197,7 @@ lmb_add(unsigned long base, unsigned long size) | |||
227 | long __init | 197 | long __init |
228 | lmb_reserve(unsigned long base, unsigned long size) | 198 | lmb_reserve(unsigned long base, unsigned long size) |
229 | { | 199 | { |
230 | struct lmb *_lmb = &lmb; | 200 | struct lmb_region *_rgn = &(lmb.reserved); |
231 | struct lmb_region *_rgn = &(_lmb->reserved); | ||
232 | 201 | ||
233 | return lmb_add_region(_rgn, base, size); | 202 | return lmb_add_region(_rgn, base, size); |
234 | } | 203 | } |
@@ -260,13 +229,10 @@ lmb_alloc_base(unsigned long size, unsigned long align, unsigned long max_addr) | |||
260 | { | 229 | { |
261 | long i, j; | 230 | long i, j; |
262 | unsigned long base = 0; | 231 | unsigned long base = 0; |
263 | struct lmb *_lmb = &lmb; | ||
264 | struct lmb_region *_mem = &(_lmb->memory); | ||
265 | struct lmb_region *_rsv = &(_lmb->reserved); | ||
266 | 232 | ||
267 | for (i=_mem->cnt-1; i >= 0; i--) { | 233 | for (i=lmb.memory.cnt-1; i >= 0; i--) { |
268 | unsigned long lmbbase = _mem->region[i].base; | 234 | unsigned long lmbbase = lmb.memory.region[i].base; |
269 | unsigned long lmbsize = _mem->region[i].size; | 235 | unsigned long lmbsize = lmb.memory.region[i].size; |
270 | 236 | ||
271 | if ( max_addr == LMB_ALLOC_ANYWHERE ) | 237 | if ( max_addr == LMB_ALLOC_ANYWHERE ) |
272 | base = _ALIGN_DOWN(lmbbase+lmbsize-size, align); | 238 | base = _ALIGN_DOWN(lmbbase+lmbsize-size, align); |
@@ -276,8 +242,8 @@ lmb_alloc_base(unsigned long size, unsigned long align, unsigned long max_addr) | |||
276 | continue; | 242 | continue; |
277 | 243 | ||
278 | while ( (lmbbase <= base) && | 244 | while ( (lmbbase <= base) && |
279 | ((j = lmb_overlaps_region(_rsv,base,size)) >= 0) ) { | 245 | ((j = lmb_overlaps_region(&lmb.reserved,base,size)) >= 0) ) { |
280 | base = _ALIGN_DOWN(_rsv->region[j].base-size, align); | 246 | base = _ALIGN_DOWN(lmb.reserved.region[j].base-size, align); |
281 | } | 247 | } |
282 | 248 | ||
283 | if ( (base != 0) && (lmbbase <= base) ) | 249 | if ( (base != 0) && (lmbbase <= base) ) |
@@ -287,62 +253,24 @@ lmb_alloc_base(unsigned long size, unsigned long align, unsigned long max_addr) | |||
287 | if ( i < 0 ) | 253 | if ( i < 0 ) |
288 | return 0; | 254 | return 0; |
289 | 255 | ||
290 | lmb_add_region(_rsv, base, size); | 256 | lmb_add_region(&lmb.reserved, base, size); |
291 | 257 | ||
292 | return base; | 258 | return base; |
293 | } | 259 | } |
294 | 260 | ||
261 | /* You must call lmb_analyze() before this. */ | ||
295 | unsigned long __init | 262 | unsigned long __init |
296 | lmb_phys_mem_size(void) | 263 | lmb_phys_mem_size(void) |
297 | { | 264 | { |
298 | struct lmb *_lmb = &lmb; | 265 | return lmb.memory.size; |
299 | #ifdef CONFIG_MSCHUNKS | ||
300 | return _lmb->memory.size; | ||
301 | #else | ||
302 | struct lmb_region *_mem = &(_lmb->memory); | ||
303 | unsigned long total = 0; | ||
304 | int i; | ||
305 | |||
306 | /* add all physical memory to the bootmem map */ | ||
307 | for (i=0; i < _mem->cnt; i++) | ||
308 | total += _mem->region[i].size; | ||
309 | return total; | ||
310 | #endif /* CONFIG_MSCHUNKS */ | ||
311 | } | 266 | } |
312 | 267 | ||
313 | unsigned long __init | 268 | unsigned long __init |
314 | lmb_end_of_DRAM(void) | 269 | lmb_end_of_DRAM(void) |
315 | { | 270 | { |
316 | struct lmb *_lmb = &lmb; | 271 | int idx = lmb.memory.cnt - 1; |
317 | struct lmb_region *_mem = &(_lmb->memory); | ||
318 | int idx = _mem->cnt - 1; | ||
319 | |||
320 | #ifdef CONFIG_MSCHUNKS | ||
321 | return (_mem->region[idx].physbase + _mem->region[idx].size); | ||
322 | #else | ||
323 | return (_mem->region[idx].base + _mem->region[idx].size); | ||
324 | #endif /* CONFIG_MSCHUNKS */ | ||
325 | |||
326 | return 0; | ||
327 | } | ||
328 | |||
329 | unsigned long __init | ||
330 | lmb_abs_to_phys(unsigned long aa) | ||
331 | { | ||
332 | unsigned long i, pa = aa; | ||
333 | struct lmb *_lmb = &lmb; | ||
334 | struct lmb_region *_mem = &(_lmb->memory); | ||
335 | |||
336 | for (i=0; i < _mem->cnt; i++) { | ||
337 | unsigned long lmbbase = _mem->region[i].base; | ||
338 | unsigned long lmbsize = _mem->region[i].size; | ||
339 | if ( lmb_addrs_overlap(aa,1,lmbbase,lmbsize) ) { | ||
340 | pa = _mem->region[i].physbase + (aa - lmbbase); | ||
341 | break; | ||
342 | } | ||
343 | } | ||
344 | 272 | ||
345 | return pa; | 273 | return (lmb.memory.region[idx].base + lmb.memory.region[idx].size); |
346 | } | 274 | } |
347 | 275 | ||
348 | /* | 276 | /* |
@@ -353,20 +281,19 @@ void __init lmb_enforce_memory_limit(void) | |||
353 | { | 281 | { |
354 | extern unsigned long memory_limit; | 282 | extern unsigned long memory_limit; |
355 | unsigned long i, limit; | 283 | unsigned long i, limit; |
356 | struct lmb_region *mem = &(lmb.memory); | ||
357 | 284 | ||
358 | if (! memory_limit) | 285 | if (! memory_limit) |
359 | return; | 286 | return; |
360 | 287 | ||
361 | limit = memory_limit; | 288 | limit = memory_limit; |
362 | for (i = 0; i < mem->cnt; i++) { | 289 | for (i = 0; i < lmb.memory.cnt; i++) { |
363 | if (limit > mem->region[i].size) { | 290 | if (limit > lmb.memory.region[i].size) { |
364 | limit -= mem->region[i].size; | 291 | limit -= lmb.memory.region[i].size; |
365 | continue; | 292 | continue; |
366 | } | 293 | } |
367 | 294 | ||
368 | mem->region[i].size = limit; | 295 | lmb.memory.region[i].size = limit; |
369 | mem->cnt = i + 1; | 296 | lmb.memory.cnt = i + 1; |
370 | break; | 297 | break; |
371 | } | 298 | } |
372 | } | 299 | } |
diff --git a/arch/ppc64/kernel/lparcfg.c b/arch/ppc64/kernel/lparcfg.c index 02e96627fa66..edad361a8db0 100644 --- a/arch/ppc64/kernel/lparcfg.c +++ b/arch/ppc64/kernel/lparcfg.c | |||
@@ -29,7 +29,7 @@ | |||
29 | #include <asm/iSeries/HvLpConfig.h> | 29 | #include <asm/iSeries/HvLpConfig.h> |
30 | #include <asm/lppaca.h> | 30 | #include <asm/lppaca.h> |
31 | #include <asm/hvcall.h> | 31 | #include <asm/hvcall.h> |
32 | #include <asm/cputable.h> | 32 | #include <asm/firmware.h> |
33 | #include <asm/rtas.h> | 33 | #include <asm/rtas.h> |
34 | #include <asm/system.h> | 34 | #include <asm/system.h> |
35 | #include <asm/time.h> | 35 | #include <asm/time.h> |
@@ -273,6 +273,7 @@ static void parse_system_parameter_string(struct seq_file *m) | |||
273 | if (!workbuffer) { | 273 | if (!workbuffer) { |
274 | printk(KERN_ERR "%s %s kmalloc failure at line %d \n", | 274 | printk(KERN_ERR "%s %s kmalloc failure at line %d \n", |
275 | __FILE__, __FUNCTION__, __LINE__); | 275 | __FILE__, __FUNCTION__, __LINE__); |
276 | kfree(local_buffer); | ||
276 | return; | 277 | return; |
277 | } | 278 | } |
278 | #ifdef LPARCFG_DEBUG | 279 | #ifdef LPARCFG_DEBUG |
@@ -377,7 +378,7 @@ static int lparcfg_data(struct seq_file *m, void *v) | |||
377 | 378 | ||
378 | partition_active_processors = lparcfg_count_active_processors(); | 379 | partition_active_processors = lparcfg_count_active_processors(); |
379 | 380 | ||
380 | if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) { | 381 | if (firmware_has_feature(FW_FEATURE_SPLPAR)) { |
381 | unsigned long h_entitled, h_unallocated; | 382 | unsigned long h_entitled, h_unallocated; |
382 | unsigned long h_aggregation, h_resource; | 383 | unsigned long h_aggregation, h_resource; |
383 | unsigned long pool_idle_time, pool_procs; | 384 | unsigned long pool_idle_time, pool_procs; |
@@ -571,7 +572,7 @@ int __init lparcfg_init(void) | |||
571 | mode_t mode = S_IRUSR; | 572 | mode_t mode = S_IRUSR; |
572 | 573 | ||
573 | /* Allow writing if we have FW_FEATURE_SPLPAR */ | 574 | /* Allow writing if we have FW_FEATURE_SPLPAR */ |
574 | if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) { | 575 | if (firmware_has_feature(FW_FEATURE_SPLPAR)) { |
575 | lparcfg_fops.write = lparcfg_write; | 576 | lparcfg_fops.write = lparcfg_write; |
576 | mode |= S_IWUSR; | 577 | mode |= S_IWUSR; |
577 | } | 578 | } |
diff --git a/arch/ppc64/kernel/misc.S b/arch/ppc64/kernel/misc.S index a05b50b738e9..474df0a862bf 100644 --- a/arch/ppc64/kernel/misc.S +++ b/arch/ppc64/kernel/misc.S | |||
@@ -680,6 +680,104 @@ _GLOBAL(kernel_thread) | |||
680 | ld r30,-16(r1) | 680 | ld r30,-16(r1) |
681 | blr | 681 | blr |
682 | 682 | ||
683 | /* | ||
684 | * disable_kernel_fp() | ||
685 | * Disable the FPU. | ||
686 | */ | ||
687 | _GLOBAL(disable_kernel_fp) | ||
688 | mfmsr r3 | ||
689 | rldicl r0,r3,(63-MSR_FP_LG),1 | ||
690 | rldicl r3,r0,(MSR_FP_LG+1),0 | ||
691 | mtmsrd r3 /* disable use of fpu now */ | ||
692 | isync | ||
693 | blr | ||
694 | |||
695 | /* | ||
696 | * giveup_fpu(tsk) | ||
697 | * Disable FP for the task given as the argument, | ||
698 | * and save the floating-point registers in its thread_struct. | ||
699 | * Enables the FPU for use in the kernel on return. | ||
700 | */ | ||
701 | _GLOBAL(giveup_fpu) | ||
702 | mfmsr r5 | ||
703 | ori r5,r5,MSR_FP | ||
704 | mtmsrd r5 /* enable use of fpu now */ | ||
705 | isync | ||
706 | cmpdi 0,r3,0 | ||
707 | beqlr- /* if no previous owner, done */ | ||
708 | addi r3,r3,THREAD /* want THREAD of task */ | ||
709 | ld r5,PT_REGS(r3) | ||
710 | cmpdi 0,r5,0 | ||
711 | SAVE_32FPRS(0, r3) | ||
712 | mffs fr0 | ||
713 | stfd fr0,THREAD_FPSCR(r3) | ||
714 | beq 1f | ||
715 | ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) | ||
716 | li r3,MSR_FP|MSR_FE0|MSR_FE1 | ||
717 | andc r4,r4,r3 /* disable FP for previous task */ | ||
718 | std r4,_MSR-STACK_FRAME_OVERHEAD(r5) | ||
719 | 1: | ||
720 | #ifndef CONFIG_SMP | ||
721 | li r5,0 | ||
722 | ld r4,last_task_used_math@got(r2) | ||
723 | std r5,0(r4) | ||
724 | #endif /* CONFIG_SMP */ | ||
725 | blr | ||
726 | |||
727 | #ifdef CONFIG_ALTIVEC | ||
728 | |||
729 | #if 0 /* this has no callers for now */ | ||
730 | /* | ||
731 | * disable_kernel_altivec() | ||
732 | * Disable the VMX. | ||
733 | */ | ||
734 | _GLOBAL(disable_kernel_altivec) | ||
735 | mfmsr r3 | ||
736 | rldicl r0,r3,(63-MSR_VEC_LG),1 | ||
737 | rldicl r3,r0,(MSR_VEC_LG+1),0 | ||
738 | mtmsrd r3 /* disable use of VMX now */ | ||
739 | isync | ||
740 | blr | ||
741 | #endif /* 0 */ | ||
742 | |||
743 | /* | ||
744 | * giveup_altivec(tsk) | ||
745 | * Disable VMX for the task given as the argument, | ||
746 | * and save the vector registers in its thread_struct. | ||
747 | * Enables the VMX for use in the kernel on return. | ||
748 | */ | ||
749 | _GLOBAL(giveup_altivec) | ||
750 | mfmsr r5 | ||
751 | oris r5,r5,MSR_VEC@h | ||
752 | mtmsrd r5 /* enable use of VMX now */ | ||
753 | isync | ||
754 | cmpdi 0,r3,0 | ||
755 | beqlr- /* if no previous owner, done */ | ||
756 | addi r3,r3,THREAD /* want THREAD of task */ | ||
757 | ld r5,PT_REGS(r3) | ||
758 | cmpdi 0,r5,0 | ||
759 | SAVE_32VRS(0,r4,r3) | ||
760 | mfvscr vr0 | ||
761 | li r4,THREAD_VSCR | ||
762 | stvx vr0,r4,r3 | ||
763 | beq 1f | ||
764 | ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) | ||
765 | lis r3,MSR_VEC@h | ||
766 | andc r4,r4,r3 /* disable FP for previous task */ | ||
767 | std r4,_MSR-STACK_FRAME_OVERHEAD(r5) | ||
768 | 1: | ||
769 | #ifndef CONFIG_SMP | ||
770 | li r5,0 | ||
771 | ld r4,last_task_used_altivec@got(r2) | ||
772 | std r5,0(r4) | ||
773 | #endif /* CONFIG_SMP */ | ||
774 | blr | ||
775 | |||
776 | #endif /* CONFIG_ALTIVEC */ | ||
777 | |||
778 | _GLOBAL(__setup_cpu_power3) | ||
779 | blr | ||
780 | |||
683 | /* kexec_wait(phys_cpu) | 781 | /* kexec_wait(phys_cpu) |
684 | * | 782 | * |
685 | * wait for the flag to change, indicating this kernel is going away but | 783 | * wait for the flag to change, indicating this kernel is going away but |
diff --git a/arch/ppc64/kernel/of_device.c b/arch/ppc64/kernel/of_device.c index b80e81984ba8..da580812ddfe 100644 --- a/arch/ppc64/kernel/of_device.c +++ b/arch/ppc64/kernel/of_device.c | |||
@@ -236,7 +236,6 @@ void of_device_unregister(struct of_device *ofdev) | |||
236 | struct of_device* of_platform_device_create(struct device_node *np, const char *bus_id) | 236 | struct of_device* of_platform_device_create(struct device_node *np, const char *bus_id) |
237 | { | 237 | { |
238 | struct of_device *dev; | 238 | struct of_device *dev; |
239 | u32 *reg; | ||
240 | 239 | ||
241 | dev = kmalloc(sizeof(*dev), GFP_KERNEL); | 240 | dev = kmalloc(sizeof(*dev), GFP_KERNEL); |
242 | if (!dev) | 241 | if (!dev) |
@@ -250,7 +249,6 @@ struct of_device* of_platform_device_create(struct device_node *np, const char * | |||
250 | dev->dev.bus = &of_platform_bus_type; | 249 | dev->dev.bus = &of_platform_bus_type; |
251 | dev->dev.release = of_release_dev; | 250 | dev->dev.release = of_release_dev; |
252 | 251 | ||
253 | reg = (u32 *)get_property(np, "reg", NULL); | ||
254 | strlcpy(dev->dev.bus_id, bus_id, BUS_ID_SIZE); | 252 | strlcpy(dev->dev.bus_id, bus_id, BUS_ID_SIZE); |
255 | 253 | ||
256 | if (of_device_register(dev) != 0) { | 254 | if (of_device_register(dev) != 0) { |
diff --git a/arch/ppc64/kernel/pSeries_iommu.c b/arch/ppc64/kernel/pSeries_iommu.c index 69130522a87e..9d5e1e7fc389 100644 --- a/arch/ppc64/kernel/pSeries_iommu.c +++ b/arch/ppc64/kernel/pSeries_iommu.c | |||
@@ -45,6 +45,7 @@ | |||
45 | #include <asm/plpar_wrappers.h> | 45 | #include <asm/plpar_wrappers.h> |
46 | #include <asm/pSeries_reconfig.h> | 46 | #include <asm/pSeries_reconfig.h> |
47 | #include <asm/systemcfg.h> | 47 | #include <asm/systemcfg.h> |
48 | #include <asm/firmware.h> | ||
48 | #include "pci.h" | 49 | #include "pci.h" |
49 | 50 | ||
50 | #define DBG(fmt...) | 51 | #define DBG(fmt...) |
@@ -546,7 +547,7 @@ void iommu_init_early_pSeries(void) | |||
546 | } | 547 | } |
547 | 548 | ||
548 | if (systemcfg->platform & PLATFORM_LPAR) { | 549 | if (systemcfg->platform & PLATFORM_LPAR) { |
549 | if (cur_cpu_spec->firmware_features & FW_FEATURE_MULTITCE) { | 550 | if (firmware_has_feature(FW_FEATURE_MULTITCE)) { |
550 | ppc_md.tce_build = tce_buildmulti_pSeriesLP; | 551 | ppc_md.tce_build = tce_buildmulti_pSeriesLP; |
551 | ppc_md.tce_free = tce_freemulti_pSeriesLP; | 552 | ppc_md.tce_free = tce_freemulti_pSeriesLP; |
552 | } else { | 553 | } else { |
diff --git a/arch/ppc64/kernel/pSeries_lpar.c b/arch/ppc64/kernel/pSeries_lpar.c index 74dd144dcce8..0a3ddc9227c5 100644 --- a/arch/ppc64/kernel/pSeries_lpar.c +++ b/arch/ppc64/kernel/pSeries_lpar.c | |||
@@ -52,7 +52,6 @@ EXPORT_SYMBOL(plpar_hcall_4out); | |||
52 | EXPORT_SYMBOL(plpar_hcall_norets); | 52 | EXPORT_SYMBOL(plpar_hcall_norets); |
53 | EXPORT_SYMBOL(plpar_hcall_8arg_2ret); | 53 | EXPORT_SYMBOL(plpar_hcall_8arg_2ret); |
54 | 54 | ||
55 | extern void fw_feature_init(void); | ||
56 | extern void pSeries_find_serial_port(void); | 55 | extern void pSeries_find_serial_port(void); |
57 | 56 | ||
58 | 57 | ||
@@ -279,7 +278,6 @@ long pSeries_lpar_hpte_insert(unsigned long hpte_group, | |||
279 | unsigned long va, unsigned long prpn, | 278 | unsigned long va, unsigned long prpn, |
280 | unsigned long vflags, unsigned long rflags) | 279 | unsigned long vflags, unsigned long rflags) |
281 | { | 280 | { |
282 | unsigned long arpn = physRpn_to_absRpn(prpn); | ||
283 | unsigned long lpar_rc; | 281 | unsigned long lpar_rc; |
284 | unsigned long flags; | 282 | unsigned long flags; |
285 | unsigned long slot; | 283 | unsigned long slot; |
@@ -290,7 +288,7 @@ long pSeries_lpar_hpte_insert(unsigned long hpte_group, | |||
290 | if (vflags & HPTE_V_LARGE) | 288 | if (vflags & HPTE_V_LARGE) |
291 | hpte_v &= ~(1UL << HPTE_V_AVPN_SHIFT); | 289 | hpte_v &= ~(1UL << HPTE_V_AVPN_SHIFT); |
292 | 290 | ||
293 | hpte_r = (arpn << HPTE_R_RPN_SHIFT) | rflags; | 291 | hpte_r = (prpn << HPTE_R_RPN_SHIFT) | rflags; |
294 | 292 | ||
295 | /* Now fill in the actual HPTE */ | 293 | /* Now fill in the actual HPTE */ |
296 | /* Set CEC cookie to 0 */ | 294 | /* Set CEC cookie to 0 */ |
diff --git a/arch/ppc64/kernel/pSeries_setup.c b/arch/ppc64/kernel/pSeries_setup.c index 5bec956e44a0..f0f0630cf07c 100644 --- a/arch/ppc64/kernel/pSeries_setup.c +++ b/arch/ppc64/kernel/pSeries_setup.c | |||
@@ -60,7 +60,8 @@ | |||
60 | #include <asm/nvram.h> | 60 | #include <asm/nvram.h> |
61 | #include <asm/plpar_wrappers.h> | 61 | #include <asm/plpar_wrappers.h> |
62 | #include <asm/xics.h> | 62 | #include <asm/xics.h> |
63 | #include <asm/cputable.h> | 63 | #include <asm/firmware.h> |
64 | #include <asm/pmc.h> | ||
64 | 65 | ||
65 | #include "i8259.h" | 66 | #include "i8259.h" |
66 | #include "mpic.h" | 67 | #include "mpic.h" |
@@ -187,6 +188,21 @@ static void __init pSeries_setup_mpic(void) | |||
187 | " MPIC "); | 188 | " MPIC "); |
188 | } | 189 | } |
189 | 190 | ||
191 | static void pseries_lpar_enable_pmcs(void) | ||
192 | { | ||
193 | unsigned long set, reset; | ||
194 | |||
195 | power4_enable_pmcs(); | ||
196 | |||
197 | set = 1UL << 63; | ||
198 | reset = 0; | ||
199 | plpar_hcall_norets(H_PERFMON, set, reset); | ||
200 | |||
201 | /* instruct hypervisor to maintain PMCs */ | ||
202 | if (firmware_has_feature(FW_FEATURE_SPLPAR)) | ||
203 | get_paca()->lppaca.pmcregs_in_use = 1; | ||
204 | } | ||
205 | |||
190 | static void __init pSeries_setup_arch(void) | 206 | static void __init pSeries_setup_arch(void) |
191 | { | 207 | { |
192 | /* Fixup ppc_md depending on the type of interrupt controller */ | 208 | /* Fixup ppc_md depending on the type of interrupt controller */ |
@@ -231,11 +247,9 @@ static void __init pSeries_setup_arch(void) | |||
231 | 247 | ||
232 | pSeries_nvram_init(); | 248 | pSeries_nvram_init(); |
233 | 249 | ||
234 | if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) | ||
235 | vpa_init(boot_cpuid); | ||
236 | |||
237 | /* Choose an idle loop */ | 250 | /* Choose an idle loop */ |
238 | if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) { | 251 | if (firmware_has_feature(FW_FEATURE_SPLPAR)) { |
252 | vpa_init(boot_cpuid); | ||
239 | if (get_paca()->lppaca.shared_proc) { | 253 | if (get_paca()->lppaca.shared_proc) { |
240 | printk(KERN_INFO "Using shared processor idle loop\n"); | 254 | printk(KERN_INFO "Using shared processor idle loop\n"); |
241 | ppc_md.idle_loop = pseries_shared_idle; | 255 | ppc_md.idle_loop = pseries_shared_idle; |
@@ -247,6 +261,11 @@ static void __init pSeries_setup_arch(void) | |||
247 | printk(KERN_INFO "Using default idle loop\n"); | 261 | printk(KERN_INFO "Using default idle loop\n"); |
248 | ppc_md.idle_loop = default_idle; | 262 | ppc_md.idle_loop = default_idle; |
249 | } | 263 | } |
264 | |||
265 | if (systemcfg->platform & PLATFORM_LPAR) | ||
266 | ppc_md.enable_pmcs = pseries_lpar_enable_pmcs; | ||
267 | else | ||
268 | ppc_md.enable_pmcs = power4_enable_pmcs; | ||
250 | } | 269 | } |
251 | 270 | ||
252 | static int __init pSeries_init_panel(void) | 271 | static int __init pSeries_init_panel(void) |
@@ -260,11 +279,11 @@ static int __init pSeries_init_panel(void) | |||
260 | arch_initcall(pSeries_init_panel); | 279 | arch_initcall(pSeries_init_panel); |
261 | 280 | ||
262 | 281 | ||
263 | /* Build up the firmware_features bitmask field | 282 | /* Build up the ppc64_firmware_features bitmask field |
264 | * using contents of device-tree/ibm,hypertas-functions. | 283 | * using contents of device-tree/ibm,hypertas-functions. |
265 | * Ultimately this functionality may be moved into prom.c prom_init(). | 284 | * Ultimately this functionality may be moved into prom.c prom_init(). |
266 | */ | 285 | */ |
267 | void __init fw_feature_init(void) | 286 | static void __init fw_feature_init(void) |
268 | { | 287 | { |
269 | struct device_node * dn; | 288 | struct device_node * dn; |
270 | char * hypertas; | 289 | char * hypertas; |
@@ -272,7 +291,7 @@ void __init fw_feature_init(void) | |||
272 | 291 | ||
273 | DBG(" -> fw_feature_init()\n"); | 292 | DBG(" -> fw_feature_init()\n"); |
274 | 293 | ||
275 | cur_cpu_spec->firmware_features = 0; | 294 | ppc64_firmware_features = 0; |
276 | dn = of_find_node_by_path("/rtas"); | 295 | dn = of_find_node_by_path("/rtas"); |
277 | if (dn == NULL) { | 296 | if (dn == NULL) { |
278 | printk(KERN_ERR "WARNING ! Cannot find RTAS in device-tree !\n"); | 297 | printk(KERN_ERR "WARNING ! Cannot find RTAS in device-tree !\n"); |
@@ -288,7 +307,7 @@ void __init fw_feature_init(void) | |||
288 | if ((firmware_features_table[i].name) && | 307 | if ((firmware_features_table[i].name) && |
289 | (strcmp(firmware_features_table[i].name,hypertas))==0) { | 308 | (strcmp(firmware_features_table[i].name,hypertas))==0) { |
290 | /* we have a match */ | 309 | /* we have a match */ |
291 | cur_cpu_spec->firmware_features |= | 310 | ppc64_firmware_features |= |
292 | (firmware_features_table[i].val); | 311 | (firmware_features_table[i].val); |
293 | break; | 312 | break; |
294 | } | 313 | } |
@@ -302,7 +321,7 @@ void __init fw_feature_init(void) | |||
302 | of_node_put(dn); | 321 | of_node_put(dn); |
303 | no_rtas: | 322 | no_rtas: |
304 | printk(KERN_INFO "firmware_features = 0x%lx\n", | 323 | printk(KERN_INFO "firmware_features = 0x%lx\n", |
305 | cur_cpu_spec->firmware_features); | 324 | ppc64_firmware_features); |
306 | 325 | ||
307 | DBG(" <- fw_feature_init()\n"); | 326 | DBG(" <- fw_feature_init()\n"); |
308 | } | 327 | } |
diff --git a/arch/ppc64/kernel/pSeries_smp.c b/arch/ppc64/kernel/pSeries_smp.c index 62c55a123560..79c7f3223665 100644 --- a/arch/ppc64/kernel/pSeries_smp.c +++ b/arch/ppc64/kernel/pSeries_smp.c | |||
@@ -41,6 +41,7 @@ | |||
41 | #include <asm/machdep.h> | 41 | #include <asm/machdep.h> |
42 | #include <asm/xics.h> | 42 | #include <asm/xics.h> |
43 | #include <asm/cputable.h> | 43 | #include <asm/cputable.h> |
44 | #include <asm/firmware.h> | ||
44 | #include <asm/system.h> | 45 | #include <asm/system.h> |
45 | #include <asm/rtas.h> | 46 | #include <asm/rtas.h> |
46 | #include <asm/plpar_wrappers.h> | 47 | #include <asm/plpar_wrappers.h> |
@@ -326,7 +327,7 @@ static void __devinit smp_xics_setup_cpu(int cpu) | |||
326 | if (cpu != boot_cpuid) | 327 | if (cpu != boot_cpuid) |
327 | xics_setup_cpu(); | 328 | xics_setup_cpu(); |
328 | 329 | ||
329 | if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) | 330 | if (firmware_has_feature(FW_FEATURE_SPLPAR)) |
330 | vpa_init(cpu); | 331 | vpa_init(cpu); |
331 | 332 | ||
332 | cpu_clear(cpu, of_spin_map); | 333 | cpu_clear(cpu, of_spin_map); |
diff --git a/arch/ppc64/kernel/pSeries_vio.c b/arch/ppc64/kernel/pSeries_vio.c new file mode 100644 index 000000000000..e0ae06f58f86 --- /dev/null +++ b/arch/ppc64/kernel/pSeries_vio.c | |||
@@ -0,0 +1,273 @@ | |||
1 | /* | ||
2 | * IBM PowerPC pSeries Virtual I/O Infrastructure Support. | ||
3 | * | ||
4 | * Copyright (c) 2003-2005 IBM Corp. | ||
5 | * Dave Engebretsen engebret@us.ibm.com | ||
6 | * Santiago Leon santil@us.ibm.com | ||
7 | * Hollis Blanchard <hollisb@us.ibm.com> | ||
8 | * Stephen Rothwell | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public License | ||
12 | * as published by the Free Software Foundation; either version | ||
13 | * 2 of the License, or (at your option) any later version. | ||
14 | */ | ||
15 | |||
16 | #include <linux/init.h> | ||
17 | #include <linux/module.h> | ||
18 | #include <linux/mm.h> | ||
19 | #include <linux/kobject.h> | ||
20 | #include <asm/iommu.h> | ||
21 | #include <asm/dma.h> | ||
22 | #include <asm/prom.h> | ||
23 | #include <asm/vio.h> | ||
24 | #include <asm/hvcall.h> | ||
25 | |||
26 | extern struct subsystem devices_subsys; /* needed for vio_find_name() */ | ||
27 | |||
28 | static void probe_bus_pseries(void) | ||
29 | { | ||
30 | struct device_node *node_vroot, *of_node; | ||
31 | |||
32 | node_vroot = find_devices("vdevice"); | ||
33 | if ((node_vroot == NULL) || (node_vroot->child == NULL)) | ||
34 | /* this machine doesn't do virtual IO, and that's ok */ | ||
35 | return; | ||
36 | |||
37 | /* | ||
38 | * Create struct vio_devices for each virtual device in the device tree. | ||
39 | * Drivers will associate with them later. | ||
40 | */ | ||
41 | for (of_node = node_vroot->child; of_node != NULL; | ||
42 | of_node = of_node->sibling) { | ||
43 | printk(KERN_DEBUG "%s: processing %p\n", __FUNCTION__, of_node); | ||
44 | vio_register_device_node(of_node); | ||
45 | } | ||
46 | } | ||
47 | |||
48 | /** | ||
49 | * vio_match_device_pseries: - Tell if a pSeries VIO device matches a | ||
50 | * vio_device_id | ||
51 | */ | ||
52 | static int vio_match_device_pseries(const struct vio_device_id *id, | ||
53 | const struct vio_dev *dev) | ||
54 | { | ||
55 | return (strncmp(dev->type, id->type, strlen(id->type)) == 0) && | ||
56 | device_is_compatible(dev->dev.platform_data, id->compat); | ||
57 | } | ||
58 | |||
59 | static void vio_release_device_pseries(struct device *dev) | ||
60 | { | ||
61 | /* XXX free TCE table */ | ||
62 | of_node_put(dev->platform_data); | ||
63 | } | ||
64 | |||
65 | static ssize_t viodev_show_devspec(struct device *dev, | ||
66 | struct device_attribute *attr, char *buf) | ||
67 | { | ||
68 | struct device_node *of_node = dev->platform_data; | ||
69 | |||
70 | return sprintf(buf, "%s\n", of_node->full_name); | ||
71 | } | ||
72 | DEVICE_ATTR(devspec, S_IRUSR | S_IRGRP | S_IROTH, viodev_show_devspec, NULL); | ||
73 | |||
74 | static void vio_unregister_device_pseries(struct vio_dev *viodev) | ||
75 | { | ||
76 | device_remove_file(&viodev->dev, &dev_attr_devspec); | ||
77 | } | ||
78 | |||
79 | static struct vio_bus_ops vio_bus_ops_pseries = { | ||
80 | .match = vio_match_device_pseries, | ||
81 | .unregister_device = vio_unregister_device_pseries, | ||
82 | .release_device = vio_release_device_pseries, | ||
83 | }; | ||
84 | |||
85 | /** | ||
86 | * vio_bus_init_pseries: - Initialize the pSeries virtual IO bus | ||
87 | */ | ||
88 | static int __init vio_bus_init_pseries(void) | ||
89 | { | ||
90 | int err; | ||
91 | |||
92 | err = vio_bus_init(&vio_bus_ops_pseries); | ||
93 | if (err == 0) | ||
94 | probe_bus_pseries(); | ||
95 | return err; | ||
96 | } | ||
97 | |||
98 | __initcall(vio_bus_init_pseries); | ||
99 | |||
100 | /** | ||
101 | * vio_build_iommu_table: - gets the dma information from OF and | ||
102 | * builds the TCE tree. | ||
103 | * @dev: the virtual device. | ||
104 | * | ||
105 | * Returns a pointer to the built tce tree, or NULL if it can't | ||
106 | * find property. | ||
107 | */ | ||
108 | static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev) | ||
109 | { | ||
110 | unsigned int *dma_window; | ||
111 | struct iommu_table *newTceTable; | ||
112 | unsigned long offset; | ||
113 | int dma_window_property_size; | ||
114 | |||
115 | dma_window = (unsigned int *) get_property(dev->dev.platform_data, "ibm,my-dma-window", &dma_window_property_size); | ||
116 | if(!dma_window) { | ||
117 | return NULL; | ||
118 | } | ||
119 | |||
120 | newTceTable = (struct iommu_table *) kmalloc(sizeof(struct iommu_table), GFP_KERNEL); | ||
121 | |||
122 | /* There should be some code to extract the phys-encoded offset | ||
123 | using prom_n_addr_cells(). However, according to a comment | ||
124 | on earlier versions, it's always zero, so we don't bother */ | ||
125 | offset = dma_window[1] >> PAGE_SHIFT; | ||
126 | |||
127 | /* TCE table size - measured in tce entries */ | ||
128 | newTceTable->it_size = dma_window[4] >> PAGE_SHIFT; | ||
129 | /* offset for VIO should always be 0 */ | ||
130 | newTceTable->it_offset = offset; | ||
131 | newTceTable->it_busno = 0; | ||
132 | newTceTable->it_index = (unsigned long)dma_window[0]; | ||
133 | newTceTable->it_type = TCE_VB; | ||
134 | |||
135 | return iommu_init_table(newTceTable); | ||
136 | } | ||
137 | |||
138 | /** | ||
139 | * vio_register_device_node: - Register a new vio device. | ||
140 | * @of_node: The OF node for this device. | ||
141 | * | ||
142 | * Creates and initializes a vio_dev structure from the data in | ||
143 | * of_node (dev.platform_data) and adds it to the list of virtual devices. | ||
144 | * Returns a pointer to the created vio_dev or NULL if node has | ||
145 | * NULL device_type or compatible fields. | ||
146 | */ | ||
147 | struct vio_dev * __devinit vio_register_device_node(struct device_node *of_node) | ||
148 | { | ||
149 | struct vio_dev *viodev; | ||
150 | unsigned int *unit_address; | ||
151 | unsigned int *irq_p; | ||
152 | |||
153 | /* we need the 'device_type' property, in order to match with drivers */ | ||
154 | if ((NULL == of_node->type)) { | ||
155 | printk(KERN_WARNING | ||
156 | "%s: node %s missing 'device_type'\n", __FUNCTION__, | ||
157 | of_node->name ? of_node->name : "<unknown>"); | ||
158 | return NULL; | ||
159 | } | ||
160 | |||
161 | unit_address = (unsigned int *)get_property(of_node, "reg", NULL); | ||
162 | if (!unit_address) { | ||
163 | printk(KERN_WARNING "%s: node %s missing 'reg'\n", __FUNCTION__, | ||
164 | of_node->name ? of_node->name : "<unknown>"); | ||
165 | return NULL; | ||
166 | } | ||
167 | |||
168 | /* allocate a vio_dev for this node */ | ||
169 | viodev = kmalloc(sizeof(struct vio_dev), GFP_KERNEL); | ||
170 | if (!viodev) { | ||
171 | return NULL; | ||
172 | } | ||
173 | memset(viodev, 0, sizeof(struct vio_dev)); | ||
174 | |||
175 | viodev->dev.platform_data = of_node_get(of_node); | ||
176 | |||
177 | viodev->irq = NO_IRQ; | ||
178 | irq_p = (unsigned int *)get_property(of_node, "interrupts", NULL); | ||
179 | if (irq_p) { | ||
180 | int virq = virt_irq_create_mapping(*irq_p); | ||
181 | if (virq == NO_IRQ) { | ||
182 | printk(KERN_ERR "Unable to allocate interrupt " | ||
183 | "number for %s\n", of_node->full_name); | ||
184 | } else | ||
185 | viodev->irq = irq_offset_up(virq); | ||
186 | } | ||
187 | |||
188 | snprintf(viodev->dev.bus_id, BUS_ID_SIZE, "%x", *unit_address); | ||
189 | viodev->name = of_node->name; | ||
190 | viodev->type = of_node->type; | ||
191 | viodev->unit_address = *unit_address; | ||
192 | viodev->iommu_table = vio_build_iommu_table(viodev); | ||
193 | |||
194 | /* register with generic device framework */ | ||
195 | if (vio_register_device(viodev) == NULL) { | ||
196 | /* XXX free TCE table */ | ||
197 | kfree(viodev); | ||
198 | return NULL; | ||
199 | } | ||
200 | device_create_file(&viodev->dev, &dev_attr_devspec); | ||
201 | |||
202 | return viodev; | ||
203 | } | ||
204 | EXPORT_SYMBOL(vio_register_device_node); | ||
205 | |||
206 | /** | ||
207 | * vio_get_attribute: - get attribute for virtual device | ||
208 | * @vdev: The vio device to get property. | ||
209 | * @which: The property/attribute to be extracted. | ||
210 | * @length: Pointer to length of returned data size (unused if NULL). | ||
211 | * | ||
212 | * Calls prom.c's get_property() to return the value of the | ||
213 | * attribute specified by the preprocessor constant @which | ||
214 | */ | ||
215 | const void * vio_get_attribute(struct vio_dev *vdev, void* which, int* length) | ||
216 | { | ||
217 | return get_property(vdev->dev.platform_data, (char*)which, length); | ||
218 | } | ||
219 | EXPORT_SYMBOL(vio_get_attribute); | ||
220 | |||
221 | /* vio_find_name() - internal because only vio.c knows how we formatted the | ||
222 | * kobject name | ||
223 | * XXX once vio_bus_type.devices is actually used as a kset in | ||
224 | * drivers/base/bus.c, this function should be removed in favor of | ||
225 | * "device_find(kobj_name, &vio_bus_type)" | ||
226 | */ | ||
227 | static struct vio_dev *vio_find_name(const char *kobj_name) | ||
228 | { | ||
229 | struct kobject *found; | ||
230 | |||
231 | found = kset_find_obj(&devices_subsys.kset, kobj_name); | ||
232 | if (!found) | ||
233 | return NULL; | ||
234 | |||
235 | return to_vio_dev(container_of(found, struct device, kobj)); | ||
236 | } | ||
237 | |||
238 | /** | ||
239 | * vio_find_node - find an already-registered vio_dev | ||
240 | * @vnode: device_node of the virtual device we're looking for | ||
241 | */ | ||
242 | struct vio_dev *vio_find_node(struct device_node *vnode) | ||
243 | { | ||
244 | uint32_t *unit_address; | ||
245 | char kobj_name[BUS_ID_SIZE]; | ||
246 | |||
247 | /* construct the kobject name from the device node */ | ||
248 | unit_address = (uint32_t *)get_property(vnode, "reg", NULL); | ||
249 | if (!unit_address) | ||
250 | return NULL; | ||
251 | snprintf(kobj_name, BUS_ID_SIZE, "%x", *unit_address); | ||
252 | |||
253 | return vio_find_name(kobj_name); | ||
254 | } | ||
255 | EXPORT_SYMBOL(vio_find_node); | ||
256 | |||
257 | int vio_enable_interrupts(struct vio_dev *dev) | ||
258 | { | ||
259 | int rc = h_vio_signal(dev->unit_address, VIO_IRQ_ENABLE); | ||
260 | if (rc != H_Success) | ||
261 | printk(KERN_ERR "vio: Error 0x%x enabling interrupts\n", rc); | ||
262 | return rc; | ||
263 | } | ||
264 | EXPORT_SYMBOL(vio_enable_interrupts); | ||
265 | |||
266 | int vio_disable_interrupts(struct vio_dev *dev) | ||
267 | { | ||
268 | int rc = h_vio_signal(dev->unit_address, VIO_IRQ_DISABLE); | ||
269 | if (rc != H_Success) | ||
270 | printk(KERN_ERR "vio: Error 0x%x disabling interrupts\n", rc); | ||
271 | return rc; | ||
272 | } | ||
273 | EXPORT_SYMBOL(vio_disable_interrupts); | ||
diff --git a/arch/ppc64/kernel/pacaData.c b/arch/ppc64/kernel/pacaData.c index 6316188737b6..6182a2cd90a5 100644 --- a/arch/ppc64/kernel/pacaData.c +++ b/arch/ppc64/kernel/pacaData.c | |||
@@ -78,7 +78,7 @@ extern unsigned long __toc_start; | |||
78 | 78 | ||
79 | #define BOOTCPU_PACA_INIT(number) \ | 79 | #define BOOTCPU_PACA_INIT(number) \ |
80 | { \ | 80 | { \ |
81 | PACA_INIT_COMMON(number, 1, 0, STAB0_VIRT_ADDR) \ | 81 | PACA_INIT_COMMON(number, 1, 0, (u64)&initial_stab) \ |
82 | PACA_INIT_ISERIES(number) \ | 82 | PACA_INIT_ISERIES(number) \ |
83 | } | 83 | } |
84 | 84 | ||
@@ -90,7 +90,7 @@ extern unsigned long __toc_start; | |||
90 | 90 | ||
91 | #define BOOTCPU_PACA_INIT(number) \ | 91 | #define BOOTCPU_PACA_INIT(number) \ |
92 | { \ | 92 | { \ |
93 | PACA_INIT_COMMON(number, 1, STAB0_PHYS_ADDR, STAB0_VIRT_ADDR) \ | 93 | PACA_INIT_COMMON(number, 1, STAB0_PHYS_ADDR, (u64)&initial_stab) \ |
94 | } | 94 | } |
95 | #endif | 95 | #endif |
96 | 96 | ||
diff --git a/arch/ppc64/kernel/pmac_setup.c b/arch/ppc64/kernel/pmac_setup.c index e40877fa67cd..8ff86a766cdf 100644 --- a/arch/ppc64/kernel/pmac_setup.c +++ b/arch/ppc64/kernel/pmac_setup.c | |||
@@ -71,6 +71,7 @@ | |||
71 | #include <asm/of_device.h> | 71 | #include <asm/of_device.h> |
72 | #include <asm/lmb.h> | 72 | #include <asm/lmb.h> |
73 | #include <asm/smu.h> | 73 | #include <asm/smu.h> |
74 | #include <asm/pmc.h> | ||
74 | 75 | ||
75 | #include "pmac.h" | 76 | #include "pmac.h" |
76 | #include "mpic.h" | 77 | #include "mpic.h" |
@@ -511,4 +512,5 @@ struct machdep_calls __initdata pmac_md = { | |||
511 | .progress = pmac_progress, | 512 | .progress = pmac_progress, |
512 | .check_legacy_ioport = pmac_check_legacy_ioport, | 513 | .check_legacy_ioport = pmac_check_legacy_ioport, |
513 | .idle_loop = native_idle, | 514 | .idle_loop = native_idle, |
515 | .enable_pmcs = power4_enable_pmcs, | ||
514 | }; | 516 | }; |
diff --git a/arch/ppc64/kernel/pmc.c b/arch/ppc64/kernel/pmc.c index 67be773f9c00..cdfec7438d01 100644 --- a/arch/ppc64/kernel/pmc.c +++ b/arch/ppc64/kernel/pmc.c | |||
@@ -65,3 +65,24 @@ void release_pmc_hardware(void) | |||
65 | spin_unlock(&pmc_owner_lock); | 65 | spin_unlock(&pmc_owner_lock); |
66 | } | 66 | } |
67 | EXPORT_SYMBOL_GPL(release_pmc_hardware); | 67 | EXPORT_SYMBOL_GPL(release_pmc_hardware); |
68 | |||
69 | void power4_enable_pmcs(void) | ||
70 | { | ||
71 | unsigned long hid0; | ||
72 | |||
73 | hid0 = mfspr(HID0); | ||
74 | hid0 |= 1UL << (63 - 20); | ||
75 | |||
76 | /* POWER4 requires the following sequence */ | ||
77 | asm volatile( | ||
78 | "sync\n" | ||
79 | "mtspr %1, %0\n" | ||
80 | "mfspr %0, %1\n" | ||
81 | "mfspr %0, %1\n" | ||
82 | "mfspr %0, %1\n" | ||
83 | "mfspr %0, %1\n" | ||
84 | "mfspr %0, %1\n" | ||
85 | "mfspr %0, %1\n" | ||
86 | "isync" : "=&r" (hid0) : "i" (HID0), "0" (hid0): | ||
87 | "memory"); | ||
88 | } | ||
diff --git a/arch/ppc64/kernel/process.c b/arch/ppc64/kernel/process.c index f7cae05e40fb..7a7e027653ad 100644 --- a/arch/ppc64/kernel/process.c +++ b/arch/ppc64/kernel/process.c | |||
@@ -50,6 +50,7 @@ | |||
50 | #include <asm/machdep.h> | 50 | #include <asm/machdep.h> |
51 | #include <asm/iSeries/HvCallHpt.h> | 51 | #include <asm/iSeries/HvCallHpt.h> |
52 | #include <asm/cputable.h> | 52 | #include <asm/cputable.h> |
53 | #include <asm/firmware.h> | ||
53 | #include <asm/sections.h> | 54 | #include <asm/sections.h> |
54 | #include <asm/tlbflush.h> | 55 | #include <asm/tlbflush.h> |
55 | #include <asm/time.h> | 56 | #include <asm/time.h> |
@@ -202,11 +203,10 @@ struct task_struct *__switch_to(struct task_struct *prev, | |||
202 | new_thread = &new->thread; | 203 | new_thread = &new->thread; |
203 | old_thread = ¤t->thread; | 204 | old_thread = ¤t->thread; |
204 | 205 | ||
205 | /* Collect purr utilization data per process and per processor wise */ | 206 | /* Collect purr utilization data per process and per processor |
206 | /* purr is nothing but processor time base */ | 207 | * wise purr is nothing but processor time base |
207 | 208 | */ | |
208 | #if defined(CONFIG_PPC_PSERIES) | 209 | if (firmware_has_feature(FW_FEATURE_SPLPAR)) { |
209 | if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) { | ||
210 | struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array); | 210 | struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array); |
211 | long unsigned start_tb, current_tb; | 211 | long unsigned start_tb, current_tb; |
212 | start_tb = old_thread->start_tb; | 212 | start_tb = old_thread->start_tb; |
@@ -214,8 +214,6 @@ struct task_struct *__switch_to(struct task_struct *prev, | |||
214 | old_thread->accum_tb += (current_tb - start_tb); | 214 | old_thread->accum_tb += (current_tb - start_tb); |
215 | new_thread->start_tb = current_tb; | 215 | new_thread->start_tb = current_tb; |
216 | } | 216 | } |
217 | #endif | ||
218 | |||
219 | 217 | ||
220 | local_irq_save(flags); | 218 | local_irq_save(flags); |
221 | last = _switch(old_thread, new_thread); | 219 | last = _switch(old_thread, new_thread); |
diff --git a/arch/ppc64/kernel/prom.c b/arch/ppc64/kernel/prom.c index 5aca01ddd81f..b21848826791 100644 --- a/arch/ppc64/kernel/prom.c +++ b/arch/ppc64/kernel/prom.c | |||
@@ -625,8 +625,8 @@ void __init finish_device_tree(void) | |||
625 | 625 | ||
626 | static inline char *find_flat_dt_string(u32 offset) | 626 | static inline char *find_flat_dt_string(u32 offset) |
627 | { | 627 | { |
628 | return ((char *)initial_boot_params) + initial_boot_params->off_dt_strings | 628 | return ((char *)initial_boot_params) + |
629 | + offset; | 629 | initial_boot_params->off_dt_strings + offset; |
630 | } | 630 | } |
631 | 631 | ||
632 | /** | 632 | /** |
@@ -635,26 +635,33 @@ static inline char *find_flat_dt_string(u32 offset) | |||
635 | * unflatten the tree | 635 | * unflatten the tree |
636 | */ | 636 | */ |
637 | static int __init scan_flat_dt(int (*it)(unsigned long node, | 637 | static int __init scan_flat_dt(int (*it)(unsigned long node, |
638 | const char *full_path, void *data), | 638 | const char *uname, int depth, |
639 | void *data), | ||
639 | void *data) | 640 | void *data) |
640 | { | 641 | { |
641 | unsigned long p = ((unsigned long)initial_boot_params) + | 642 | unsigned long p = ((unsigned long)initial_boot_params) + |
642 | initial_boot_params->off_dt_struct; | 643 | initial_boot_params->off_dt_struct; |
643 | int rc = 0; | 644 | int rc = 0; |
645 | int depth = -1; | ||
644 | 646 | ||
645 | do { | 647 | do { |
646 | u32 tag = *((u32 *)p); | 648 | u32 tag = *((u32 *)p); |
647 | char *pathp; | 649 | char *pathp; |
648 | 650 | ||
649 | p += 4; | 651 | p += 4; |
650 | if (tag == OF_DT_END_NODE) | 652 | if (tag == OF_DT_END_NODE) { |
653 | depth --; | ||
654 | continue; | ||
655 | } | ||
656 | if (tag == OF_DT_NOP) | ||
651 | continue; | 657 | continue; |
652 | if (tag == OF_DT_END) | 658 | if (tag == OF_DT_END) |
653 | break; | 659 | break; |
654 | if (tag == OF_DT_PROP) { | 660 | if (tag == OF_DT_PROP) { |
655 | u32 sz = *((u32 *)p); | 661 | u32 sz = *((u32 *)p); |
656 | p += 8; | 662 | p += 8; |
657 | p = _ALIGN(p, sz >= 8 ? 8 : 4); | 663 | if (initial_boot_params->version < 0x10) |
664 | p = _ALIGN(p, sz >= 8 ? 8 : 4); | ||
658 | p += sz; | 665 | p += sz; |
659 | p = _ALIGN(p, 4); | 666 | p = _ALIGN(p, 4); |
660 | continue; | 667 | continue; |
@@ -664,9 +671,18 @@ static int __init scan_flat_dt(int (*it)(unsigned long node, | |||
664 | " device tree !\n", tag); | 671 | " device tree !\n", tag); |
665 | return -EINVAL; | 672 | return -EINVAL; |
666 | } | 673 | } |
674 | depth++; | ||
667 | pathp = (char *)p; | 675 | pathp = (char *)p; |
668 | p = _ALIGN(p + strlen(pathp) + 1, 4); | 676 | p = _ALIGN(p + strlen(pathp) + 1, 4); |
669 | rc = it(p, pathp, data); | 677 | if ((*pathp) == '/') { |
678 | char *lp, *np; | ||
679 | for (lp = NULL, np = pathp; *np; np++) | ||
680 | if ((*np) == '/') | ||
681 | lp = np+1; | ||
682 | if (lp != NULL) | ||
683 | pathp = lp; | ||
684 | } | ||
685 | rc = it(p, pathp, depth, data); | ||
670 | if (rc != 0) | 686 | if (rc != 0) |
671 | break; | 687 | break; |
672 | } while(1); | 688 | } while(1); |
@@ -689,17 +705,21 @@ static void* __init get_flat_dt_prop(unsigned long node, const char *name, | |||
689 | const char *nstr; | 705 | const char *nstr; |
690 | 706 | ||
691 | p += 4; | 707 | p += 4; |
708 | if (tag == OF_DT_NOP) | ||
709 | continue; | ||
692 | if (tag != OF_DT_PROP) | 710 | if (tag != OF_DT_PROP) |
693 | return NULL; | 711 | return NULL; |
694 | 712 | ||
695 | sz = *((u32 *)p); | 713 | sz = *((u32 *)p); |
696 | noff = *((u32 *)(p + 4)); | 714 | noff = *((u32 *)(p + 4)); |
697 | p += 8; | 715 | p += 8; |
698 | p = _ALIGN(p, sz >= 8 ? 8 : 4); | 716 | if (initial_boot_params->version < 0x10) |
717 | p = _ALIGN(p, sz >= 8 ? 8 : 4); | ||
699 | 718 | ||
700 | nstr = find_flat_dt_string(noff); | 719 | nstr = find_flat_dt_string(noff); |
701 | if (nstr == NULL) { | 720 | if (nstr == NULL) { |
702 | printk(KERN_WARNING "Can't find property index name !\n"); | 721 | printk(KERN_WARNING "Can't find property index" |
722 | " name !\n"); | ||
703 | return NULL; | 723 | return NULL; |
704 | } | 724 | } |
705 | if (strcmp(name, nstr) == 0) { | 725 | if (strcmp(name, nstr) == 0) { |
@@ -713,7 +733,7 @@ static void* __init get_flat_dt_prop(unsigned long node, const char *name, | |||
713 | } | 733 | } |
714 | 734 | ||
715 | static void *__init unflatten_dt_alloc(unsigned long *mem, unsigned long size, | 735 | static void *__init unflatten_dt_alloc(unsigned long *mem, unsigned long size, |
716 | unsigned long align) | 736 | unsigned long align) |
717 | { | 737 | { |
718 | void *res; | 738 | void *res; |
719 | 739 | ||
@@ -727,13 +747,16 @@ static void *__init unflatten_dt_alloc(unsigned long *mem, unsigned long size, | |||
727 | static unsigned long __init unflatten_dt_node(unsigned long mem, | 747 | static unsigned long __init unflatten_dt_node(unsigned long mem, |
728 | unsigned long *p, | 748 | unsigned long *p, |
729 | struct device_node *dad, | 749 | struct device_node *dad, |
730 | struct device_node ***allnextpp) | 750 | struct device_node ***allnextpp, |
751 | unsigned long fpsize) | ||
731 | { | 752 | { |
732 | struct device_node *np; | 753 | struct device_node *np; |
733 | struct property *pp, **prev_pp = NULL; | 754 | struct property *pp, **prev_pp = NULL; |
734 | char *pathp; | 755 | char *pathp; |
735 | u32 tag; | 756 | u32 tag; |
736 | unsigned int l; | 757 | unsigned int l, allocl; |
758 | int has_name = 0; | ||
759 | int new_format = 0; | ||
737 | 760 | ||
738 | tag = *((u32 *)(*p)); | 761 | tag = *((u32 *)(*p)); |
739 | if (tag != OF_DT_BEGIN_NODE) { | 762 | if (tag != OF_DT_BEGIN_NODE) { |
@@ -742,21 +765,62 @@ static unsigned long __init unflatten_dt_node(unsigned long mem, | |||
742 | } | 765 | } |
743 | *p += 4; | 766 | *p += 4; |
744 | pathp = (char *)*p; | 767 | pathp = (char *)*p; |
745 | l = strlen(pathp) + 1; | 768 | l = allocl = strlen(pathp) + 1; |
746 | *p = _ALIGN(*p + l, 4); | 769 | *p = _ALIGN(*p + l, 4); |
747 | 770 | ||
748 | np = unflatten_dt_alloc(&mem, sizeof(struct device_node) + l, | 771 | /* version 0x10 has a more compact unit name here instead of the full |
772 | * path. we accumulate the full path size using "fpsize", we'll rebuild | ||
773 | * it later. We detect this because the first character of the name is | ||
774 | * not '/'. | ||
775 | */ | ||
776 | if ((*pathp) != '/') { | ||
777 | new_format = 1; | ||
778 | if (fpsize == 0) { | ||
779 | /* root node: special case. fpsize accounts for path | ||
780 | * plus terminating zero. root node only has '/', so | ||
781 | * fpsize should be 2, but we want to avoid the first | ||
782 | * level nodes to have two '/' so we use fpsize 1 here | ||
783 | */ | ||
784 | fpsize = 1; | ||
785 | allocl = 2; | ||
786 | } else { | ||
787 | /* account for '/' and path size minus terminal 0 | ||
788 | * already in 'l' | ||
789 | */ | ||
790 | fpsize += l; | ||
791 | allocl = fpsize; | ||
792 | } | ||
793 | } | ||
794 | |||
795 | |||
796 | np = unflatten_dt_alloc(&mem, sizeof(struct device_node) + allocl, | ||
749 | __alignof__(struct device_node)); | 797 | __alignof__(struct device_node)); |
750 | if (allnextpp) { | 798 | if (allnextpp) { |
751 | memset(np, 0, sizeof(*np)); | 799 | memset(np, 0, sizeof(*np)); |
752 | np->full_name = ((char*)np) + sizeof(struct device_node); | 800 | np->full_name = ((char*)np) + sizeof(struct device_node); |
753 | memcpy(np->full_name, pathp, l); | 801 | if (new_format) { |
802 | char *p = np->full_name; | ||
803 | /* rebuild full path for new format */ | ||
804 | if (dad && dad->parent) { | ||
805 | strcpy(p, dad->full_name); | ||
806 | #ifdef DEBUG | ||
807 | if ((strlen(p) + l + 1) != allocl) { | ||
808 | DBG("%s: p: %d, l: %d, a: %d\n", | ||
809 | pathp, strlen(p), l, allocl); | ||
810 | } | ||
811 | #endif | ||
812 | p += strlen(p); | ||
813 | } | ||
814 | *(p++) = '/'; | ||
815 | memcpy(p, pathp, l); | ||
816 | } else | ||
817 | memcpy(np->full_name, pathp, l); | ||
754 | prev_pp = &np->properties; | 818 | prev_pp = &np->properties; |
755 | **allnextpp = np; | 819 | **allnextpp = np; |
756 | *allnextpp = &np->allnext; | 820 | *allnextpp = &np->allnext; |
757 | if (dad != NULL) { | 821 | if (dad != NULL) { |
758 | np->parent = dad; | 822 | np->parent = dad; |
759 | /* we temporarily use the `next' field as `last_child'. */ | 823 | /* we temporarily use the next field as `last_child'*/ |
760 | if (dad->next == 0) | 824 | if (dad->next == 0) |
761 | dad->child = np; | 825 | dad->child = np; |
762 | else | 826 | else |
@@ -770,18 +834,26 @@ static unsigned long __init unflatten_dt_node(unsigned long mem, | |||
770 | char *pname; | 834 | char *pname; |
771 | 835 | ||
772 | tag = *((u32 *)(*p)); | 836 | tag = *((u32 *)(*p)); |
837 | if (tag == OF_DT_NOP) { | ||
838 | *p += 4; | ||
839 | continue; | ||
840 | } | ||
773 | if (tag != OF_DT_PROP) | 841 | if (tag != OF_DT_PROP) |
774 | break; | 842 | break; |
775 | *p += 4; | 843 | *p += 4; |
776 | sz = *((u32 *)(*p)); | 844 | sz = *((u32 *)(*p)); |
777 | noff = *((u32 *)((*p) + 4)); | 845 | noff = *((u32 *)((*p) + 4)); |
778 | *p = _ALIGN((*p) + 8, sz >= 8 ? 8 : 4); | 846 | *p += 8; |
847 | if (initial_boot_params->version < 0x10) | ||
848 | *p = _ALIGN(*p, sz >= 8 ? 8 : 4); | ||
779 | 849 | ||
780 | pname = find_flat_dt_string(noff); | 850 | pname = find_flat_dt_string(noff); |
781 | if (pname == NULL) { | 851 | if (pname == NULL) { |
782 | printk("Can't find property name in list !\n"); | 852 | printk("Can't find property name in list !\n"); |
783 | break; | 853 | break; |
784 | } | 854 | } |
855 | if (strcmp(pname, "name") == 0) | ||
856 | has_name = 1; | ||
785 | l = strlen(pname) + 1; | 857 | l = strlen(pname) + 1; |
786 | pp = unflatten_dt_alloc(&mem, sizeof(struct property), | 858 | pp = unflatten_dt_alloc(&mem, sizeof(struct property), |
787 | __alignof__(struct property)); | 859 | __alignof__(struct property)); |
@@ -801,6 +873,36 @@ static unsigned long __init unflatten_dt_node(unsigned long mem, | |||
801 | } | 873 | } |
802 | *p = _ALIGN((*p) + sz, 4); | 874 | *p = _ALIGN((*p) + sz, 4); |
803 | } | 875 | } |
876 | /* with version 0x10 we may not have the name property, recreate | ||
877 | * it here from the unit name if absent | ||
878 | */ | ||
879 | if (!has_name) { | ||
880 | char *p = pathp, *ps = pathp, *pa = NULL; | ||
881 | int sz; | ||
882 | |||
883 | while (*p) { | ||
884 | if ((*p) == '@') | ||
885 | pa = p; | ||
886 | if ((*p) == '/') | ||
887 | ps = p + 1; | ||
888 | p++; | ||
889 | } | ||
890 | if (pa < ps) | ||
891 | pa = p; | ||
892 | sz = (pa - ps) + 1; | ||
893 | pp = unflatten_dt_alloc(&mem, sizeof(struct property) + sz, | ||
894 | __alignof__(struct property)); | ||
895 | if (allnextpp) { | ||
896 | pp->name = "name"; | ||
897 | pp->length = sz; | ||
898 | pp->value = (unsigned char *)(pp + 1); | ||
899 | *prev_pp = pp; | ||
900 | prev_pp = &pp->next; | ||
901 | memcpy(pp->value, ps, sz - 1); | ||
902 | ((char *)pp->value)[sz - 1] = 0; | ||
903 | DBG("fixed up name for %s -> %s\n", pathp, pp->value); | ||
904 | } | ||
905 | } | ||
804 | if (allnextpp) { | 906 | if (allnextpp) { |
805 | *prev_pp = NULL; | 907 | *prev_pp = NULL; |
806 | np->name = get_property(np, "name", NULL); | 908 | np->name = get_property(np, "name", NULL); |
@@ -812,11 +914,11 @@ static unsigned long __init unflatten_dt_node(unsigned long mem, | |||
812 | np->type = "<NULL>"; | 914 | np->type = "<NULL>"; |
813 | } | 915 | } |
814 | while (tag == OF_DT_BEGIN_NODE) { | 916 | while (tag == OF_DT_BEGIN_NODE) { |
815 | mem = unflatten_dt_node(mem, p, np, allnextpp); | 917 | mem = unflatten_dt_node(mem, p, np, allnextpp, fpsize); |
816 | tag = *((u32 *)(*p)); | 918 | tag = *((u32 *)(*p)); |
817 | } | 919 | } |
818 | if (tag != OF_DT_END_NODE) { | 920 | if (tag != OF_DT_END_NODE) { |
819 | printk("Weird tag at start of node: %x\n", tag); | 921 | printk("Weird tag at end of node: %x\n", tag); |
820 | return mem; | 922 | return mem; |
821 | } | 923 | } |
822 | *p += 4; | 924 | *p += 4; |
@@ -842,21 +944,32 @@ void __init unflatten_device_tree(void) | |||
842 | /* First pass, scan for size */ | 944 | /* First pass, scan for size */ |
843 | start = ((unsigned long)initial_boot_params) + | 945 | start = ((unsigned long)initial_boot_params) + |
844 | initial_boot_params->off_dt_struct; | 946 | initial_boot_params->off_dt_struct; |
845 | size = unflatten_dt_node(0, &start, NULL, NULL); | 947 | size = unflatten_dt_node(0, &start, NULL, NULL, 0); |
948 | size = (size | 3) + 1; | ||
846 | 949 | ||
847 | DBG(" size is %lx, allocating...\n", size); | 950 | DBG(" size is %lx, allocating...\n", size); |
848 | 951 | ||
849 | /* Allocate memory for the expanded device tree */ | 952 | /* Allocate memory for the expanded device tree */ |
850 | mem = (unsigned long)abs_to_virt(lmb_alloc(size, | 953 | mem = lmb_alloc(size + 4, __alignof__(struct device_node)); |
851 | __alignof__(struct device_node))); | 954 | if (!mem) { |
955 | DBG("Couldn't allocate memory with lmb_alloc()!\n"); | ||
956 | panic("Couldn't allocate memory with lmb_alloc()!\n"); | ||
957 | } | ||
958 | mem = (unsigned long)abs_to_virt(mem); | ||
959 | |||
960 | ((u32 *)mem)[size / 4] = 0xdeadbeef; | ||
961 | |||
852 | DBG(" unflattening...\n", mem); | 962 | DBG(" unflattening...\n", mem); |
853 | 963 | ||
854 | /* Second pass, do actual unflattening */ | 964 | /* Second pass, do actual unflattening */ |
855 | start = ((unsigned long)initial_boot_params) + | 965 | start = ((unsigned long)initial_boot_params) + |
856 | initial_boot_params->off_dt_struct; | 966 | initial_boot_params->off_dt_struct; |
857 | unflatten_dt_node(mem, &start, NULL, &allnextp); | 967 | unflatten_dt_node(mem, &start, NULL, &allnextp, 0); |
858 | if (*((u32 *)start) != OF_DT_END) | 968 | if (*((u32 *)start) != OF_DT_END) |
859 | printk(KERN_WARNING "Weird tag at end of tree: %x\n", *((u32 *)start)); | 969 | printk(KERN_WARNING "Weird tag at end of tree: %08x\n", *((u32 *)start)); |
970 | if (((u32 *)mem)[size / 4] != 0xdeadbeef) | ||
971 | printk(KERN_WARNING "End of tree marker overwritten: %08x\n", | ||
972 | ((u32 *)mem)[size / 4] ); | ||
860 | *allnextp = NULL; | 973 | *allnextp = NULL; |
861 | 974 | ||
862 | /* Get pointer to OF "/chosen" node for use everywhere */ | 975 | /* Get pointer to OF "/chosen" node for use everywhere */ |
@@ -880,7 +993,7 @@ void __init unflatten_device_tree(void) | |||
880 | 993 | ||
881 | 994 | ||
882 | static int __init early_init_dt_scan_cpus(unsigned long node, | 995 | static int __init early_init_dt_scan_cpus(unsigned long node, |
883 | const char *full_path, void *data) | 996 | const char *uname, int depth, void *data) |
884 | { | 997 | { |
885 | char *type = get_flat_dt_prop(node, "device_type", NULL); | 998 | char *type = get_flat_dt_prop(node, "device_type", NULL); |
886 | u32 *prop; | 999 | u32 *prop; |
@@ -947,13 +1060,15 @@ static int __init early_init_dt_scan_cpus(unsigned long node, | |||
947 | } | 1060 | } |
948 | 1061 | ||
949 | static int __init early_init_dt_scan_chosen(unsigned long node, | 1062 | static int __init early_init_dt_scan_chosen(unsigned long node, |
950 | const char *full_path, void *data) | 1063 | const char *uname, int depth, void *data) |
951 | { | 1064 | { |
952 | u32 *prop; | 1065 | u32 *prop; |
953 | u64 *prop64; | 1066 | u64 *prop64; |
954 | extern unsigned long memory_limit, tce_alloc_start, tce_alloc_end; | 1067 | extern unsigned long memory_limit, tce_alloc_start, tce_alloc_end; |
955 | 1068 | ||
956 | if (strcmp(full_path, "/chosen") != 0) | 1069 | DBG("search \"chosen\", depth: %d, uname: %s\n", depth, uname); |
1070 | |||
1071 | if (depth != 1 || strcmp(uname, "chosen") != 0) | ||
957 | return 0; | 1072 | return 0; |
958 | 1073 | ||
959 | /* get platform type */ | 1074 | /* get platform type */ |
@@ -1003,18 +1118,20 @@ static int __init early_init_dt_scan_chosen(unsigned long node, | |||
1003 | } | 1118 | } |
1004 | 1119 | ||
1005 | static int __init early_init_dt_scan_root(unsigned long node, | 1120 | static int __init early_init_dt_scan_root(unsigned long node, |
1006 | const char *full_path, void *data) | 1121 | const char *uname, int depth, void *data) |
1007 | { | 1122 | { |
1008 | u32 *prop; | 1123 | u32 *prop; |
1009 | 1124 | ||
1010 | if (strcmp(full_path, "/") != 0) | 1125 | if (depth != 0) |
1011 | return 0; | 1126 | return 0; |
1012 | 1127 | ||
1013 | prop = (u32 *)get_flat_dt_prop(node, "#size-cells", NULL); | 1128 | prop = (u32 *)get_flat_dt_prop(node, "#size-cells", NULL); |
1014 | dt_root_size_cells = (prop == NULL) ? 1 : *prop; | 1129 | dt_root_size_cells = (prop == NULL) ? 1 : *prop; |
1015 | 1130 | DBG("dt_root_size_cells = %x\n", dt_root_size_cells); | |
1131 | |||
1016 | prop = (u32 *)get_flat_dt_prop(node, "#address-cells", NULL); | 1132 | prop = (u32 *)get_flat_dt_prop(node, "#address-cells", NULL); |
1017 | dt_root_addr_cells = (prop == NULL) ? 2 : *prop; | 1133 | dt_root_addr_cells = (prop == NULL) ? 2 : *prop; |
1134 | DBG("dt_root_addr_cells = %x\n", dt_root_addr_cells); | ||
1018 | 1135 | ||
1019 | /* break now */ | 1136 | /* break now */ |
1020 | return 1; | 1137 | return 1; |
@@ -1042,7 +1159,7 @@ static unsigned long __init dt_mem_next_cell(int s, cell_t **cellp) | |||
1042 | 1159 | ||
1043 | 1160 | ||
1044 | static int __init early_init_dt_scan_memory(unsigned long node, | 1161 | static int __init early_init_dt_scan_memory(unsigned long node, |
1045 | const char *full_path, void *data) | 1162 | const char *uname, int depth, void *data) |
1046 | { | 1163 | { |
1047 | char *type = get_flat_dt_prop(node, "device_type", NULL); | 1164 | char *type = get_flat_dt_prop(node, "device_type", NULL); |
1048 | cell_t *reg, *endp; | 1165 | cell_t *reg, *endp; |
@@ -1058,7 +1175,9 @@ static int __init early_init_dt_scan_memory(unsigned long node, | |||
1058 | 1175 | ||
1059 | endp = reg + (l / sizeof(cell_t)); | 1176 | endp = reg + (l / sizeof(cell_t)); |
1060 | 1177 | ||
1061 | DBG("memory scan node %s ...\n", full_path); | 1178 | DBG("memory scan node %s ..., reg size %ld, data: %x %x %x %x, ...\n", |
1179 | uname, l, reg[0], reg[1], reg[2], reg[3]); | ||
1180 | |||
1062 | while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) { | 1181 | while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) { |
1063 | unsigned long base, size; | 1182 | unsigned long base, size; |
1064 | 1183 | ||
@@ -1469,10 +1588,11 @@ struct device_node *of_find_node_by_path(const char *path) | |||
1469 | struct device_node *np = allnodes; | 1588 | struct device_node *np = allnodes; |
1470 | 1589 | ||
1471 | read_lock(&devtree_lock); | 1590 | read_lock(&devtree_lock); |
1472 | for (; np != 0; np = np->allnext) | 1591 | for (; np != 0; np = np->allnext) { |
1473 | if (np->full_name != 0 && strcasecmp(np->full_name, path) == 0 | 1592 | if (np->full_name != 0 && strcasecmp(np->full_name, path) == 0 |
1474 | && of_node_get(np)) | 1593 | && of_node_get(np)) |
1475 | break; | 1594 | break; |
1595 | } | ||
1476 | read_unlock(&devtree_lock); | 1596 | read_unlock(&devtree_lock); |
1477 | return np; | 1597 | return np; |
1478 | } | 1598 | } |
diff --git a/arch/ppc64/kernel/prom_init.c b/arch/ppc64/kernel/prom_init.c index dbbe6c79d8da..122283a1d39a 100644 --- a/arch/ppc64/kernel/prom_init.c +++ b/arch/ppc64/kernel/prom_init.c | |||
@@ -892,7 +892,10 @@ static void __init prom_init_mem(void) | |||
892 | if ( RELOC(of_platform) == PLATFORM_PSERIES_LPAR ) | 892 | if ( RELOC(of_platform) == PLATFORM_PSERIES_LPAR ) |
893 | RELOC(alloc_top) = RELOC(rmo_top); | 893 | RELOC(alloc_top) = RELOC(rmo_top); |
894 | else | 894 | else |
895 | RELOC(alloc_top) = RELOC(rmo_top) = min(0x40000000ul, RELOC(ram_top)); | 895 | /* Some RS64 machines have buggy firmware where claims up at 1GB |
896 | * fails. Cap at 768MB as a workaround. Still plenty of room. | ||
897 | */ | ||
898 | RELOC(alloc_top) = RELOC(rmo_top) = min(0x30000000ul, RELOC(ram_top)); | ||
896 | 899 | ||
897 | prom_printf("memory layout at init:\n"); | 900 | prom_printf("memory layout at init:\n"); |
898 | prom_printf(" memory_limit : %x (16 MB aligned)\n", RELOC(prom_memory_limit)); | 901 | prom_printf(" memory_limit : %x (16 MB aligned)\n", RELOC(prom_memory_limit)); |
@@ -1534,7 +1537,8 @@ static unsigned long __init dt_find_string(char *str) | |||
1534 | */ | 1537 | */ |
1535 | #define MAX_PROPERTY_NAME 64 | 1538 | #define MAX_PROPERTY_NAME 64 |
1536 | 1539 | ||
1537 | static void __init scan_dt_build_strings(phandle node, unsigned long *mem_start, | 1540 | static void __init scan_dt_build_strings(phandle node, |
1541 | unsigned long *mem_start, | ||
1538 | unsigned long *mem_end) | 1542 | unsigned long *mem_end) |
1539 | { | 1543 | { |
1540 | unsigned long offset = reloc_offset(); | 1544 | unsigned long offset = reloc_offset(); |
@@ -1547,16 +1551,21 @@ static void __init scan_dt_build_strings(phandle node, unsigned long *mem_start, | |||
1547 | /* get and store all property names */ | 1551 | /* get and store all property names */ |
1548 | prev_name = RELOC(""); | 1552 | prev_name = RELOC(""); |
1549 | for (;;) { | 1553 | for (;;) { |
1550 | int rc; | ||
1551 | |||
1552 | /* 64 is max len of name including nul. */ | 1554 | /* 64 is max len of name including nul. */ |
1553 | namep = make_room(mem_start, mem_end, MAX_PROPERTY_NAME, 1); | 1555 | namep = make_room(mem_start, mem_end, MAX_PROPERTY_NAME, 1); |
1554 | rc = call_prom("nextprop", 3, 1, node, prev_name, namep); | 1556 | if (call_prom("nextprop", 3, 1, node, prev_name, namep) != 1) { |
1555 | if (rc != 1) { | ||
1556 | /* No more nodes: unwind alloc */ | 1557 | /* No more nodes: unwind alloc */ |
1557 | *mem_start = (unsigned long)namep; | 1558 | *mem_start = (unsigned long)namep; |
1558 | break; | 1559 | break; |
1559 | } | 1560 | } |
1561 | |||
1562 | /* skip "name" */ | ||
1563 | if (strcmp(namep, RELOC("name")) == 0) { | ||
1564 | *mem_start = (unsigned long)namep; | ||
1565 | prev_name = RELOC("name"); | ||
1566 | continue; | ||
1567 | } | ||
1568 | /* get/create string entry */ | ||
1560 | soff = dt_find_string(namep); | 1569 | soff = dt_find_string(namep); |
1561 | if (soff != 0) { | 1570 | if (soff != 0) { |
1562 | *mem_start = (unsigned long)namep; | 1571 | *mem_start = (unsigned long)namep; |
@@ -1571,7 +1580,7 @@ static void __init scan_dt_build_strings(phandle node, unsigned long *mem_start, | |||
1571 | 1580 | ||
1572 | /* do all our children */ | 1581 | /* do all our children */ |
1573 | child = call_prom("child", 1, 1, node); | 1582 | child = call_prom("child", 1, 1, node); |
1574 | while (child != (phandle)0) { | 1583 | while (child != 0) { |
1575 | scan_dt_build_strings(child, mem_start, mem_end); | 1584 | scan_dt_build_strings(child, mem_start, mem_end); |
1576 | child = call_prom("peer", 1, 1, child); | 1585 | child = call_prom("peer", 1, 1, child); |
1577 | } | 1586 | } |
@@ -1580,16 +1589,13 @@ static void __init scan_dt_build_strings(phandle node, unsigned long *mem_start, | |||
1580 | static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start, | 1589 | static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start, |
1581 | unsigned long *mem_end) | 1590 | unsigned long *mem_end) |
1582 | { | 1591 | { |
1583 | int l, align; | ||
1584 | phandle child; | 1592 | phandle child; |
1585 | char *namep, *prev_name, *sstart, *p, *ep; | 1593 | char *namep, *prev_name, *sstart, *p, *ep, *lp, *path; |
1586 | unsigned long soff; | 1594 | unsigned long soff; |
1587 | unsigned char *valp; | 1595 | unsigned char *valp; |
1588 | unsigned long offset = reloc_offset(); | 1596 | unsigned long offset = reloc_offset(); |
1589 | char pname[MAX_PROPERTY_NAME]; | 1597 | static char pname[MAX_PROPERTY_NAME]; |
1590 | char *path; | 1598 | int l; |
1591 | |||
1592 | path = RELOC(prom_scratch); | ||
1593 | 1599 | ||
1594 | dt_push_token(OF_DT_BEGIN_NODE, mem_start, mem_end); | 1600 | dt_push_token(OF_DT_BEGIN_NODE, mem_start, mem_end); |
1595 | 1601 | ||
@@ -1599,23 +1605,33 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start, | |||
1599 | namep, *mem_end - *mem_start); | 1605 | namep, *mem_end - *mem_start); |
1600 | if (l >= 0) { | 1606 | if (l >= 0) { |
1601 | /* Didn't fit? Get more room. */ | 1607 | /* Didn't fit? Get more room. */ |
1602 | if (l+1 > *mem_end - *mem_start) { | 1608 | if ((l+1) > (*mem_end - *mem_start)) { |
1603 | namep = make_room(mem_start, mem_end, l+1, 1); | 1609 | namep = make_room(mem_start, mem_end, l+1, 1); |
1604 | call_prom("package-to-path", 3, 1, node, namep, l); | 1610 | call_prom("package-to-path", 3, 1, node, namep, l); |
1605 | } | 1611 | } |
1606 | namep[l] = '\0'; | 1612 | namep[l] = '\0'; |
1613 | |||
1607 | /* Fixup an Apple bug where they have bogus \0 chars in the | 1614 | /* Fixup an Apple bug where they have bogus \0 chars in the |
1608 | * middle of the path in some properties | 1615 | * middle of the path in some properties |
1609 | */ | 1616 | */ |
1610 | for (p = namep, ep = namep + l; p < ep; p++) | 1617 | for (p = namep, ep = namep + l; p < ep; p++) |
1611 | if (*p == '\0') { | 1618 | if (*p == '\0') { |
1612 | memmove(p, p+1, ep - p); | 1619 | memmove(p, p+1, ep - p); |
1613 | ep--; l--; | 1620 | ep--; l--; p--; |
1614 | } | 1621 | } |
1615 | *mem_start = _ALIGN(((unsigned long) namep) + strlen(namep) + 1, 4); | 1622 | |
1623 | /* now try to extract the unit name in that mess */ | ||
1624 | for (p = namep, lp = NULL; *p; p++) | ||
1625 | if (*p == '/') | ||
1626 | lp = p + 1; | ||
1627 | if (lp != NULL) | ||
1628 | memmove(namep, lp, strlen(lp) + 1); | ||
1629 | *mem_start = _ALIGN(((unsigned long) namep) + | ||
1630 | strlen(namep) + 1, 4); | ||
1616 | } | 1631 | } |
1617 | 1632 | ||
1618 | /* get it again for debugging */ | 1633 | /* get it again for debugging */ |
1634 | path = RELOC(prom_scratch); | ||
1619 | memset(path, 0, PROM_SCRATCH_SIZE); | 1635 | memset(path, 0, PROM_SCRATCH_SIZE); |
1620 | call_prom("package-to-path", 3, 1, node, path, PROM_SCRATCH_SIZE-1); | 1636 | call_prom("package-to-path", 3, 1, node, path, PROM_SCRATCH_SIZE-1); |
1621 | 1637 | ||
@@ -1623,23 +1639,27 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start, | |||
1623 | prev_name = RELOC(""); | 1639 | prev_name = RELOC(""); |
1624 | sstart = (char *)RELOC(dt_string_start); | 1640 | sstart = (char *)RELOC(dt_string_start); |
1625 | for (;;) { | 1641 | for (;;) { |
1626 | int rc; | 1642 | if (call_prom("nextprop", 3, 1, node, prev_name, |
1627 | 1643 | RELOC(pname)) != 1) | |
1628 | rc = call_prom("nextprop", 3, 1, node, prev_name, pname); | ||
1629 | if (rc != 1) | ||
1630 | break; | 1644 | break; |
1631 | 1645 | ||
1646 | /* skip "name" */ | ||
1647 | if (strcmp(RELOC(pname), RELOC("name")) == 0) { | ||
1648 | prev_name = RELOC("name"); | ||
1649 | continue; | ||
1650 | } | ||
1651 | |||
1632 | /* find string offset */ | 1652 | /* find string offset */ |
1633 | soff = dt_find_string(pname); | 1653 | soff = dt_find_string(RELOC(pname)); |
1634 | if (soff == 0) { | 1654 | if (soff == 0) { |
1635 | prom_printf("WARNING: Can't find string index for <%s>, node %s\n", | 1655 | prom_printf("WARNING: Can't find string index for" |
1636 | pname, path); | 1656 | " <%s>, node %s\n", RELOC(pname), path); |
1637 | break; | 1657 | break; |
1638 | } | 1658 | } |
1639 | prev_name = sstart + soff; | 1659 | prev_name = sstart + soff; |
1640 | 1660 | ||
1641 | /* get length */ | 1661 | /* get length */ |
1642 | l = call_prom("getproplen", 2, 1, node, pname); | 1662 | l = call_prom("getproplen", 2, 1, node, RELOC(pname)); |
1643 | 1663 | ||
1644 | /* sanity checks */ | 1664 | /* sanity checks */ |
1645 | if (l == PROM_ERROR) | 1665 | if (l == PROM_ERROR) |
@@ -1648,7 +1668,7 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start, | |||
1648 | prom_printf("WARNING: ignoring large property "); | 1668 | prom_printf("WARNING: ignoring large property "); |
1649 | /* It seems OF doesn't null-terminate the path :-( */ | 1669 | /* It seems OF doesn't null-terminate the path :-( */ |
1650 | prom_printf("[%s] ", path); | 1670 | prom_printf("[%s] ", path); |
1651 | prom_printf("%s length 0x%x\n", pname, l); | 1671 | prom_printf("%s length 0x%x\n", RELOC(pname), l); |
1652 | continue; | 1672 | continue; |
1653 | } | 1673 | } |
1654 | 1674 | ||
@@ -1658,17 +1678,16 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start, | |||
1658 | dt_push_token(soff, mem_start, mem_end); | 1678 | dt_push_token(soff, mem_start, mem_end); |
1659 | 1679 | ||
1660 | /* push property content */ | 1680 | /* push property content */ |
1661 | align = (l >= 8) ? 8 : 4; | 1681 | valp = make_room(mem_start, mem_end, l, 4); |
1662 | valp = make_room(mem_start, mem_end, l, align); | 1682 | call_prom("getprop", 4, 1, node, RELOC(pname), valp, l); |
1663 | call_prom("getprop", 4, 1, node, pname, valp, l); | ||
1664 | *mem_start = _ALIGN(*mem_start, 4); | 1683 | *mem_start = _ALIGN(*mem_start, 4); |
1665 | } | 1684 | } |
1666 | 1685 | ||
1667 | /* Add a "linux,phandle" property. */ | 1686 | /* Add a "linux,phandle" property. */ |
1668 | soff = dt_find_string(RELOC("linux,phandle")); | 1687 | soff = dt_find_string(RELOC("linux,phandle")); |
1669 | if (soff == 0) | 1688 | if (soff == 0) |
1670 | prom_printf("WARNING: Can't find string index for <linux-phandle>" | 1689 | prom_printf("WARNING: Can't find string index for" |
1671 | " node %s\n", path); | 1690 | " <linux-phandle> node %s\n", path); |
1672 | else { | 1691 | else { |
1673 | dt_push_token(OF_DT_PROP, mem_start, mem_end); | 1692 | dt_push_token(OF_DT_PROP, mem_start, mem_end); |
1674 | dt_push_token(4, mem_start, mem_end); | 1693 | dt_push_token(4, mem_start, mem_end); |
@@ -1679,7 +1698,7 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start, | |||
1679 | 1698 | ||
1680 | /* do all our children */ | 1699 | /* do all our children */ |
1681 | child = call_prom("child", 1, 1, node); | 1700 | child = call_prom("child", 1, 1, node); |
1682 | while (child != (phandle)0) { | 1701 | while (child != 0) { |
1683 | scan_dt_build_struct(child, mem_start, mem_end); | 1702 | scan_dt_build_struct(child, mem_start, mem_end); |
1684 | child = call_prom("peer", 1, 1, child); | 1703 | child = call_prom("peer", 1, 1, child); |
1685 | } | 1704 | } |
@@ -1718,7 +1737,8 @@ static void __init flatten_device_tree(void) | |||
1718 | 1737 | ||
1719 | /* Build header and make room for mem rsv map */ | 1738 | /* Build header and make room for mem rsv map */ |
1720 | mem_start = _ALIGN(mem_start, 4); | 1739 | mem_start = _ALIGN(mem_start, 4); |
1721 | hdr = make_room(&mem_start, &mem_end, sizeof(struct boot_param_header), 4); | 1740 | hdr = make_room(&mem_start, &mem_end, |
1741 | sizeof(struct boot_param_header), 4); | ||
1722 | RELOC(dt_header_start) = (unsigned long)hdr; | 1742 | RELOC(dt_header_start) = (unsigned long)hdr; |
1723 | rsvmap = make_room(&mem_start, &mem_end, sizeof(mem_reserve_map), 8); | 1743 | rsvmap = make_room(&mem_start, &mem_end, sizeof(mem_reserve_map), 8); |
1724 | 1744 | ||
@@ -1731,11 +1751,11 @@ static void __init flatten_device_tree(void) | |||
1731 | namep = make_room(&mem_start, &mem_end, 16, 1); | 1751 | namep = make_room(&mem_start, &mem_end, 16, 1); |
1732 | strcpy(namep, RELOC("linux,phandle")); | 1752 | strcpy(namep, RELOC("linux,phandle")); |
1733 | mem_start = (unsigned long)namep + strlen(namep) + 1; | 1753 | mem_start = (unsigned long)namep + strlen(namep) + 1; |
1734 | RELOC(dt_string_end) = mem_start; | ||
1735 | 1754 | ||
1736 | /* Build string array */ | 1755 | /* Build string array */ |
1737 | prom_printf("Building dt strings...\n"); | 1756 | prom_printf("Building dt strings...\n"); |
1738 | scan_dt_build_strings(root, &mem_start, &mem_end); | 1757 | scan_dt_build_strings(root, &mem_start, &mem_end); |
1758 | RELOC(dt_string_end) = mem_start; | ||
1739 | 1759 | ||
1740 | /* Build structure */ | 1760 | /* Build structure */ |
1741 | mem_start = PAGE_ALIGN(mem_start); | 1761 | mem_start = PAGE_ALIGN(mem_start); |
@@ -1750,9 +1770,11 @@ static void __init flatten_device_tree(void) | |||
1750 | hdr->totalsize = RELOC(dt_struct_end) - RELOC(dt_header_start); | 1770 | hdr->totalsize = RELOC(dt_struct_end) - RELOC(dt_header_start); |
1751 | hdr->off_dt_struct = RELOC(dt_struct_start) - RELOC(dt_header_start); | 1771 | hdr->off_dt_struct = RELOC(dt_struct_start) - RELOC(dt_header_start); |
1752 | hdr->off_dt_strings = RELOC(dt_string_start) - RELOC(dt_header_start); | 1772 | hdr->off_dt_strings = RELOC(dt_string_start) - RELOC(dt_header_start); |
1773 | hdr->dt_strings_size = RELOC(dt_string_end) - RELOC(dt_string_start); | ||
1753 | hdr->off_mem_rsvmap = ((unsigned long)rsvmap) - RELOC(dt_header_start); | 1774 | hdr->off_mem_rsvmap = ((unsigned long)rsvmap) - RELOC(dt_header_start); |
1754 | hdr->version = OF_DT_VERSION; | 1775 | hdr->version = OF_DT_VERSION; |
1755 | hdr->last_comp_version = 1; | 1776 | /* Version 16 is not backward compatible */ |
1777 | hdr->last_comp_version = 0x10; | ||
1756 | 1778 | ||
1757 | /* Reserve the whole thing and copy the reserve map in, we | 1779 | /* Reserve the whole thing and copy the reserve map in, we |
1758 | * also bump mem_reserve_cnt to cause further reservations to | 1780 | * also bump mem_reserve_cnt to cause further reservations to |
@@ -1808,6 +1830,9 @@ static void __init fixup_device_tree(void) | |||
1808 | /* does it need fixup ? */ | 1830 | /* does it need fixup ? */ |
1809 | if (prom_getproplen(i2c, "interrupts") > 0) | 1831 | if (prom_getproplen(i2c, "interrupts") > 0) |
1810 | return; | 1832 | return; |
1833 | |||
1834 | prom_printf("fixing up bogus interrupts for u3 i2c...\n"); | ||
1835 | |||
1811 | /* interrupt on this revision of u3 is number 0 and level */ | 1836 | /* interrupt on this revision of u3 is number 0 and level */ |
1812 | interrupts[0] = 0; | 1837 | interrupts[0] = 0; |
1813 | interrupts[1] = 1; | 1838 | interrupts[1] = 1; |
diff --git a/arch/ppc64/kernel/rtas_pci.c b/arch/ppc64/kernel/rtas_pci.c index 1048817befb8..1dccadaddd1d 100644 --- a/arch/ppc64/kernel/rtas_pci.c +++ b/arch/ppc64/kernel/rtas_pci.c | |||
@@ -58,6 +58,21 @@ static int config_access_valid(struct device_node *dn, int where) | |||
58 | return 0; | 58 | return 0; |
59 | } | 59 | } |
60 | 60 | ||
61 | static int of_device_available(struct device_node * dn) | ||
62 | { | ||
63 | char * status; | ||
64 | |||
65 | status = get_property(dn, "status", NULL); | ||
66 | |||
67 | if (!status) | ||
68 | return 1; | ||
69 | |||
70 | if (!strcmp(status, "okay")) | ||
71 | return 1; | ||
72 | |||
73 | return 0; | ||
74 | } | ||
75 | |||
61 | static int rtas_read_config(struct device_node *dn, int where, int size, u32 *val) | 76 | static int rtas_read_config(struct device_node *dn, int where, int size, u32 *val) |
62 | { | 77 | { |
63 | int returnval = -1; | 78 | int returnval = -1; |
@@ -103,7 +118,7 @@ static int rtas_pci_read_config(struct pci_bus *bus, | |||
103 | 118 | ||
104 | /* Search only direct children of the bus */ | 119 | /* Search only direct children of the bus */ |
105 | for (dn = busdn->child; dn; dn = dn->sibling) | 120 | for (dn = busdn->child; dn; dn = dn->sibling) |
106 | if (dn->devfn == devfn) | 121 | if (dn->devfn == devfn && of_device_available(dn)) |
107 | return rtas_read_config(dn, where, size, val); | 122 | return rtas_read_config(dn, where, size, val); |
108 | return PCIBIOS_DEVICE_NOT_FOUND; | 123 | return PCIBIOS_DEVICE_NOT_FOUND; |
109 | } | 124 | } |
@@ -146,7 +161,7 @@ static int rtas_pci_write_config(struct pci_bus *bus, | |||
146 | 161 | ||
147 | /* Search only direct children of the bus */ | 162 | /* Search only direct children of the bus */ |
148 | for (dn = busdn->child; dn; dn = dn->sibling) | 163 | for (dn = busdn->child; dn; dn = dn->sibling) |
149 | if (dn->devfn == devfn) | 164 | if (dn->devfn == devfn && of_device_available(dn)) |
150 | return rtas_write_config(dn, where, size, val); | 165 | return rtas_write_config(dn, where, size, val); |
151 | return PCIBIOS_DEVICE_NOT_FOUND; | 166 | return PCIBIOS_DEVICE_NOT_FOUND; |
152 | } | 167 | } |
diff --git a/arch/ppc64/kernel/setup.c b/arch/ppc64/kernel/setup.c index e9c24d2dbd91..ee3b20de2e7a 100644 --- a/arch/ppc64/kernel/setup.c +++ b/arch/ppc64/kernel/setup.c | |||
@@ -536,15 +536,19 @@ static void __init check_for_initrd(void) | |||
536 | 536 | ||
537 | DBG(" -> check_for_initrd()\n"); | 537 | DBG(" -> check_for_initrd()\n"); |
538 | 538 | ||
539 | prop = (u64 *)get_property(of_chosen, "linux,initrd-start", NULL); | 539 | if (of_chosen) { |
540 | if (prop != NULL) { | 540 | prop = (u64 *)get_property(of_chosen, |
541 | initrd_start = (unsigned long)__va(*prop); | 541 | "linux,initrd-start", NULL); |
542 | prop = (u64 *)get_property(of_chosen, "linux,initrd-end", NULL); | ||
543 | if (prop != NULL) { | 542 | if (prop != NULL) { |
544 | initrd_end = (unsigned long)__va(*prop); | 543 | initrd_start = (unsigned long)__va(*prop); |
545 | initrd_below_start_ok = 1; | 544 | prop = (u64 *)get_property(of_chosen, |
546 | } else | 545 | "linux,initrd-end", NULL); |
547 | initrd_start = 0; | 546 | if (prop != NULL) { |
547 | initrd_end = (unsigned long)__va(*prop); | ||
548 | initrd_below_start_ok = 1; | ||
549 | } else | ||
550 | initrd_start = 0; | ||
551 | } | ||
548 | } | 552 | } |
549 | 553 | ||
550 | /* If we were passed an initrd, set the ROOT_DEV properly if the values | 554 | /* If we were passed an initrd, set the ROOT_DEV properly if the values |
@@ -627,7 +631,7 @@ void __init setup_system(void) | |||
627 | * Initialize xmon | 631 | * Initialize xmon |
628 | */ | 632 | */ |
629 | #ifdef CONFIG_XMON_DEFAULT | 633 | #ifdef CONFIG_XMON_DEFAULT |
630 | xmon_init(); | 634 | xmon_init(1); |
631 | #endif | 635 | #endif |
632 | /* | 636 | /* |
633 | * Register early console | 637 | * Register early console |
@@ -1343,11 +1347,13 @@ static int __init early_xmon(char *p) | |||
1343 | /* ensure xmon is enabled */ | 1347 | /* ensure xmon is enabled */ |
1344 | if (p) { | 1348 | if (p) { |
1345 | if (strncmp(p, "on", 2) == 0) | 1349 | if (strncmp(p, "on", 2) == 0) |
1346 | xmon_init(); | 1350 | xmon_init(1); |
1351 | if (strncmp(p, "off", 3) == 0) | ||
1352 | xmon_init(0); | ||
1347 | if (strncmp(p, "early", 5) != 0) | 1353 | if (strncmp(p, "early", 5) != 0) |
1348 | return 0; | 1354 | return 0; |
1349 | } | 1355 | } |
1350 | xmon_init(); | 1356 | xmon_init(1); |
1351 | debugger(NULL); | 1357 | debugger(NULL); |
1352 | 1358 | ||
1353 | return 0; | 1359 | return 0; |
diff --git a/arch/ppc64/kernel/sysfs.c b/arch/ppc64/kernel/sysfs.c index 02b8ac4e0168..f311ee7c0070 100644 --- a/arch/ppc64/kernel/sysfs.c +++ b/arch/ppc64/kernel/sysfs.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <asm/current.h> | 13 | #include <asm/current.h> |
14 | #include <asm/processor.h> | 14 | #include <asm/processor.h> |
15 | #include <asm/cputable.h> | 15 | #include <asm/cputable.h> |
16 | #include <asm/firmware.h> | ||
16 | #include <asm/hvcall.h> | 17 | #include <asm/hvcall.h> |
17 | #include <asm/prom.h> | 18 | #include <asm/prom.h> |
18 | #include <asm/systemcfg.h> | 19 | #include <asm/systemcfg.h> |
@@ -100,6 +101,8 @@ static int __init setup_smt_snooze_delay(char *str) | |||
100 | } | 101 | } |
101 | __setup("smt-snooze-delay=", setup_smt_snooze_delay); | 102 | __setup("smt-snooze-delay=", setup_smt_snooze_delay); |
102 | 103 | ||
104 | #endif /* CONFIG_PPC_MULTIPLATFORM */ | ||
105 | |||
103 | /* | 106 | /* |
104 | * Enabling PMCs will slow partition context switch times so we only do | 107 | * Enabling PMCs will slow partition context switch times so we only do |
105 | * it the first time we write to the PMCs. | 108 | * it the first time we write to the PMCs. |
@@ -109,65 +112,15 @@ static DEFINE_PER_CPU(char, pmcs_enabled); | |||
109 | 112 | ||
110 | void ppc64_enable_pmcs(void) | 113 | void ppc64_enable_pmcs(void) |
111 | { | 114 | { |
112 | unsigned long hid0; | ||
113 | #ifdef CONFIG_PPC_PSERIES | ||
114 | unsigned long set, reset; | ||
115 | #endif /* CONFIG_PPC_PSERIES */ | ||
116 | |||
117 | /* Only need to enable them once */ | 115 | /* Only need to enable them once */ |
118 | if (__get_cpu_var(pmcs_enabled)) | 116 | if (__get_cpu_var(pmcs_enabled)) |
119 | return; | 117 | return; |
120 | 118 | ||
121 | __get_cpu_var(pmcs_enabled) = 1; | 119 | __get_cpu_var(pmcs_enabled) = 1; |
122 | 120 | ||
123 | switch (systemcfg->platform) { | 121 | if (ppc_md.enable_pmcs) |
124 | case PLATFORM_PSERIES: | 122 | ppc_md.enable_pmcs(); |
125 | case PLATFORM_POWERMAC: | ||
126 | hid0 = mfspr(HID0); | ||
127 | hid0 |= 1UL << (63 - 20); | ||
128 | |||
129 | /* POWER4 requires the following sequence */ | ||
130 | asm volatile( | ||
131 | "sync\n" | ||
132 | "mtspr %1, %0\n" | ||
133 | "mfspr %0, %1\n" | ||
134 | "mfspr %0, %1\n" | ||
135 | "mfspr %0, %1\n" | ||
136 | "mfspr %0, %1\n" | ||
137 | "mfspr %0, %1\n" | ||
138 | "mfspr %0, %1\n" | ||
139 | "isync" : "=&r" (hid0) : "i" (HID0), "0" (hid0): | ||
140 | "memory"); | ||
141 | break; | ||
142 | |||
143 | #ifdef CONFIG_PPC_PSERIES | ||
144 | case PLATFORM_PSERIES_LPAR: | ||
145 | set = 1UL << 63; | ||
146 | reset = 0; | ||
147 | plpar_hcall_norets(H_PERFMON, set, reset); | ||
148 | break; | ||
149 | #endif /* CONFIG_PPC_PSERIES */ | ||
150 | |||
151 | default: | ||
152 | break; | ||
153 | } | ||
154 | |||
155 | #ifdef CONFIG_PPC_PSERIES | ||
156 | /* instruct hypervisor to maintain PMCs */ | ||
157 | if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) | ||
158 | get_paca()->lppaca.pmcregs_in_use = 1; | ||
159 | #endif /* CONFIG_PPC_PSERIES */ | ||
160 | } | 123 | } |
161 | |||
162 | #else | ||
163 | |||
164 | /* PMC stuff */ | ||
165 | void ppc64_enable_pmcs(void) | ||
166 | { | ||
167 | /* XXX Implement for iseries */ | ||
168 | } | ||
169 | #endif /* CONFIG_PPC_MULTIPLATFORM */ | ||
170 | |||
171 | EXPORT_SYMBOL(ppc64_enable_pmcs); | 124 | EXPORT_SYMBOL(ppc64_enable_pmcs); |
172 | 125 | ||
173 | /* XXX convert to rusty's on_one_cpu */ | 126 | /* XXX convert to rusty's on_one_cpu */ |
diff --git a/arch/ppc64/kernel/time.c b/arch/ppc64/kernel/time.c index 909462e1adea..1696e1b05bb9 100644 --- a/arch/ppc64/kernel/time.c +++ b/arch/ppc64/kernel/time.c | |||
@@ -67,6 +67,7 @@ | |||
67 | #include <asm/prom.h> | 67 | #include <asm/prom.h> |
68 | #include <asm/sections.h> | 68 | #include <asm/sections.h> |
69 | #include <asm/systemcfg.h> | 69 | #include <asm/systemcfg.h> |
70 | #include <asm/firmware.h> | ||
70 | 71 | ||
71 | u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES; | 72 | u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES; |
72 | 73 | ||
@@ -370,13 +371,11 @@ int timer_interrupt(struct pt_regs * regs) | |||
370 | process_hvlpevents(regs); | 371 | process_hvlpevents(regs); |
371 | #endif | 372 | #endif |
372 | 373 | ||
373 | /* collect purr register values often, for accurate calculations */ | 374 | /* collect purr register values often, for accurate calculations */ |
374 | #if defined(CONFIG_PPC_PSERIES) | 375 | if (firmware_has_feature(FW_FEATURE_SPLPAR)) { |
375 | if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) { | ||
376 | struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array); | 376 | struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array); |
377 | cu->current_tb = mfspr(SPRN_PURR); | 377 | cu->current_tb = mfspr(SPRN_PURR); |
378 | } | 378 | } |
379 | #endif | ||
380 | 379 | ||
381 | irq_exit(); | 380 | irq_exit(); |
382 | 381 | ||
diff --git a/arch/ppc64/kernel/vio.c b/arch/ppc64/kernel/vio.c index 0c0ba71ac0e8..c90e1dd875ce 100644 --- a/arch/ppc64/kernel/vio.c +++ b/arch/ppc64/kernel/vio.c | |||
@@ -1,10 +1,11 @@ | |||
1 | /* | 1 | /* |
2 | * IBM PowerPC Virtual I/O Infrastructure Support. | 2 | * IBM PowerPC Virtual I/O Infrastructure Support. |
3 | * | 3 | * |
4 | * Copyright (c) 2003 IBM Corp. | 4 | * Copyright (c) 2003-2005 IBM Corp. |
5 | * Dave Engebretsen engebret@us.ibm.com | 5 | * Dave Engebretsen engebret@us.ibm.com |
6 | * Santiago Leon santil@us.ibm.com | 6 | * Santiago Leon santil@us.ibm.com |
7 | * Hollis Blanchard <hollisb@us.ibm.com> | 7 | * Hollis Blanchard <hollisb@us.ibm.com> |
8 | * Stephen Rothwell | ||
8 | * | 9 | * |
9 | * This program is free software; you can redistribute it and/or | 10 | * This program is free software; you can redistribute it and/or |
10 | * modify it under the terms of the GNU General Public License | 11 | * modify it under the terms of the GNU General Public License |
@@ -14,61 +15,30 @@ | |||
14 | 15 | ||
15 | #include <linux/init.h> | 16 | #include <linux/init.h> |
16 | #include <linux/console.h> | 17 | #include <linux/console.h> |
17 | #include <linux/version.h> | ||
18 | #include <linux/module.h> | 18 | #include <linux/module.h> |
19 | #include <linux/kobject.h> | ||
20 | #include <linux/mm.h> | 19 | #include <linux/mm.h> |
21 | #include <linux/dma-mapping.h> | 20 | #include <linux/dma-mapping.h> |
22 | #include <asm/rtas.h> | ||
23 | #include <asm/iommu.h> | 21 | #include <asm/iommu.h> |
24 | #include <asm/dma.h> | 22 | #include <asm/dma.h> |
25 | #include <asm/ppcdebug.h> | ||
26 | #include <asm/vio.h> | 23 | #include <asm/vio.h> |
27 | #include <asm/hvcall.h> | ||
28 | #include <asm/iSeries/vio.h> | ||
29 | #include <asm/iSeries/HvTypes.h> | ||
30 | #include <asm/iSeries/HvCallXm.h> | ||
31 | #include <asm/iSeries/HvLpConfig.h> | ||
32 | |||
33 | #define DBGENTER() pr_debug("%s entered\n", __FUNCTION__) | ||
34 | |||
35 | extern struct subsystem devices_subsys; /* needed for vio_find_name() */ | ||
36 | 24 | ||
37 | static const struct vio_device_id *vio_match_device( | 25 | static const struct vio_device_id *vio_match_device( |
38 | const struct vio_device_id *, const struct vio_dev *); | 26 | const struct vio_device_id *, const struct vio_dev *); |
39 | 27 | ||
40 | #ifdef CONFIG_PPC_PSERIES | 28 | struct vio_dev vio_bus_device = { /* fake "parent" device */ |
41 | static struct iommu_table *vio_build_iommu_table(struct vio_dev *); | ||
42 | static int vio_num_address_cells; | ||
43 | #endif | ||
44 | #ifdef CONFIG_PPC_ISERIES | ||
45 | static struct iommu_table veth_iommu_table; | ||
46 | static struct iommu_table vio_iommu_table; | ||
47 | #endif | ||
48 | static struct vio_dev vio_bus_device = { /* fake "parent" device */ | ||
49 | .name = vio_bus_device.dev.bus_id, | 29 | .name = vio_bus_device.dev.bus_id, |
50 | .type = "", | 30 | .type = "", |
51 | #ifdef CONFIG_PPC_ISERIES | ||
52 | .iommu_table = &vio_iommu_table, | ||
53 | #endif | ||
54 | .dev.bus_id = "vio", | 31 | .dev.bus_id = "vio", |
55 | .dev.bus = &vio_bus_type, | 32 | .dev.bus = &vio_bus_type, |
56 | }; | 33 | }; |
57 | 34 | ||
58 | #ifdef CONFIG_PPC_ISERIES | 35 | static struct vio_bus_ops vio_bus_ops; |
59 | static struct vio_dev *__init vio_register_device_iseries(char *type, | ||
60 | uint32_t unit_num); | ||
61 | |||
62 | struct device *iSeries_vio_dev = &vio_bus_device.dev; | ||
63 | EXPORT_SYMBOL(iSeries_vio_dev); | ||
64 | |||
65 | #define device_is_compatible(a, b) 1 | ||
66 | 36 | ||
67 | #endif | 37 | /* |
68 | 38 | * Convert from struct device to struct vio_dev and pass to driver. | |
69 | /* convert from struct device to struct vio_dev and pass to driver. | ||
70 | * dev->driver has already been set by generic code because vio_bus_match | 39 | * dev->driver has already been set by generic code because vio_bus_match |
71 | * succeeded. */ | 40 | * succeeded. |
41 | */ | ||
72 | static int vio_bus_probe(struct device *dev) | 42 | static int vio_bus_probe(struct device *dev) |
73 | { | 43 | { |
74 | struct vio_dev *viodev = to_vio_dev(dev); | 44 | struct vio_dev *viodev = to_vio_dev(dev); |
@@ -76,15 +46,12 @@ static int vio_bus_probe(struct device *dev) | |||
76 | const struct vio_device_id *id; | 46 | const struct vio_device_id *id; |
77 | int error = -ENODEV; | 47 | int error = -ENODEV; |
78 | 48 | ||
79 | DBGENTER(); | ||
80 | |||
81 | if (!viodrv->probe) | 49 | if (!viodrv->probe) |
82 | return error; | 50 | return error; |
83 | 51 | ||
84 | id = vio_match_device(viodrv->id_table, viodev); | 52 | id = vio_match_device(viodrv->id_table, viodev); |
85 | if (id) { | 53 | if (id) |
86 | error = viodrv->probe(viodev, id); | 54 | error = viodrv->probe(viodev, id); |
87 | } | ||
88 | 55 | ||
89 | return error; | 56 | return error; |
90 | } | 57 | } |
@@ -95,11 +62,8 @@ static int vio_bus_remove(struct device *dev) | |||
95 | struct vio_dev *viodev = to_vio_dev(dev); | 62 | struct vio_dev *viodev = to_vio_dev(dev); |
96 | struct vio_driver *viodrv = to_vio_driver(dev->driver); | 63 | struct vio_driver *viodrv = to_vio_driver(dev->driver); |
97 | 64 | ||
98 | DBGENTER(); | 65 | if (viodrv->remove) |
99 | |||
100 | if (viodrv->remove) { | ||
101 | return viodrv->remove(viodev); | 66 | return viodrv->remove(viodev); |
102 | } | ||
103 | 67 | ||
104 | /* driver can't remove */ | 68 | /* driver can't remove */ |
105 | return 1; | 69 | return 1; |
@@ -135,193 +99,72 @@ void vio_unregister_driver(struct vio_driver *viodrv) | |||
135 | EXPORT_SYMBOL(vio_unregister_driver); | 99 | EXPORT_SYMBOL(vio_unregister_driver); |
136 | 100 | ||
137 | /** | 101 | /** |
138 | * vio_match_device: - Tell if a VIO device has a matching VIO device id structure. | 102 | * vio_match_device: - Tell if a VIO device has a matching |
139 | * @ids: array of VIO device id structures to search in | 103 | * VIO device id structure. |
140 | * @dev: the VIO device structure to match against | 104 | * @ids: array of VIO device id structures to search in |
105 | * @dev: the VIO device structure to match against | ||
141 | * | 106 | * |
142 | * Used by a driver to check whether a VIO device present in the | 107 | * Used by a driver to check whether a VIO device present in the |
143 | * system is in its list of supported devices. Returns the matching | 108 | * system is in its list of supported devices. Returns the matching |
144 | * vio_device_id structure or NULL if there is no match. | 109 | * vio_device_id structure or NULL if there is no match. |
145 | */ | 110 | */ |
146 | static const struct vio_device_id * vio_match_device(const struct vio_device_id *ids, | 111 | static const struct vio_device_id *vio_match_device( |
147 | const struct vio_dev *dev) | 112 | const struct vio_device_id *ids, const struct vio_dev *dev) |
148 | { | 113 | { |
149 | DBGENTER(); | 114 | while (ids->type[0] != '\0') { |
150 | 115 | if (vio_bus_ops.match(ids, dev)) | |
151 | while (ids->type) { | ||
152 | if ((strncmp(dev->type, ids->type, strlen(ids->type)) == 0) && | ||
153 | device_is_compatible(dev->dev.platform_data, ids->compat)) | ||
154 | return ids; | 116 | return ids; |
155 | ids++; | 117 | ids++; |
156 | } | 118 | } |
157 | return NULL; | 119 | return NULL; |
158 | } | 120 | } |
159 | 121 | ||
160 | #ifdef CONFIG_PPC_ISERIES | ||
161 | void __init iommu_vio_init(void) | ||
162 | { | ||
163 | struct iommu_table *t; | ||
164 | struct iommu_table_cb cb; | ||
165 | unsigned long cbp; | ||
166 | unsigned long itc_entries; | ||
167 | |||
168 | cb.itc_busno = 255; /* Bus 255 is the virtual bus */ | ||
169 | cb.itc_virtbus = 0xff; /* Ask for virtual bus */ | ||
170 | |||
171 | cbp = virt_to_abs(&cb); | ||
172 | HvCallXm_getTceTableParms(cbp); | ||
173 | |||
174 | itc_entries = cb.itc_size * PAGE_SIZE / sizeof(union tce_entry); | ||
175 | veth_iommu_table.it_size = itc_entries / 2; | ||
176 | veth_iommu_table.it_busno = cb.itc_busno; | ||
177 | veth_iommu_table.it_offset = cb.itc_offset; | ||
178 | veth_iommu_table.it_index = cb.itc_index; | ||
179 | veth_iommu_table.it_type = TCE_VB; | ||
180 | veth_iommu_table.it_blocksize = 1; | ||
181 | |||
182 | t = iommu_init_table(&veth_iommu_table); | ||
183 | |||
184 | if (!t) | ||
185 | printk("Virtual Bus VETH TCE table failed.\n"); | ||
186 | |||
187 | vio_iommu_table.it_size = itc_entries - veth_iommu_table.it_size; | ||
188 | vio_iommu_table.it_busno = cb.itc_busno; | ||
189 | vio_iommu_table.it_offset = cb.itc_offset + | ||
190 | veth_iommu_table.it_size; | ||
191 | vio_iommu_table.it_index = cb.itc_index; | ||
192 | vio_iommu_table.it_type = TCE_VB; | ||
193 | vio_iommu_table.it_blocksize = 1; | ||
194 | |||
195 | t = iommu_init_table(&vio_iommu_table); | ||
196 | |||
197 | if (!t) | ||
198 | printk("Virtual Bus VIO TCE table failed.\n"); | ||
199 | } | ||
200 | #endif | ||
201 | |||
202 | #ifdef CONFIG_PPC_PSERIES | ||
203 | static void probe_bus_pseries(void) | ||
204 | { | ||
205 | struct device_node *node_vroot, *of_node; | ||
206 | |||
207 | node_vroot = find_devices("vdevice"); | ||
208 | if ((node_vroot == NULL) || (node_vroot->child == NULL)) | ||
209 | /* this machine doesn't do virtual IO, and that's ok */ | ||
210 | return; | ||
211 | |||
212 | vio_num_address_cells = prom_n_addr_cells(node_vroot->child); | ||
213 | |||
214 | /* | ||
215 | * Create struct vio_devices for each virtual device in the device tree. | ||
216 | * Drivers will associate with them later. | ||
217 | */ | ||
218 | for (of_node = node_vroot->child; of_node != NULL; | ||
219 | of_node = of_node->sibling) { | ||
220 | printk(KERN_DEBUG "%s: processing %p\n", __FUNCTION__, of_node); | ||
221 | vio_register_device_node(of_node); | ||
222 | } | ||
223 | } | ||
224 | #endif | ||
225 | |||
226 | #ifdef CONFIG_PPC_ISERIES | ||
227 | static void probe_bus_iseries(void) | ||
228 | { | ||
229 | HvLpIndexMap vlan_map = HvLpConfig_getVirtualLanIndexMap(); | ||
230 | struct vio_dev *viodev; | ||
231 | int i; | ||
232 | |||
233 | /* there is only one of each of these */ | ||
234 | vio_register_device_iseries("viocons", 0); | ||
235 | vio_register_device_iseries("vscsi", 0); | ||
236 | |||
237 | vlan_map = HvLpConfig_getVirtualLanIndexMap(); | ||
238 | for (i = 0; i < HVMAXARCHITECTEDVIRTUALLANS; i++) { | ||
239 | if ((vlan_map & (0x8000 >> i)) == 0) | ||
240 | continue; | ||
241 | viodev = vio_register_device_iseries("vlan", i); | ||
242 | /* veth is special and has it own iommu_table */ | ||
243 | viodev->iommu_table = &veth_iommu_table; | ||
244 | } | ||
245 | for (i = 0; i < HVMAXARCHITECTEDVIRTUALDISKS; i++) | ||
246 | vio_register_device_iseries("viodasd", i); | ||
247 | for (i = 0; i < HVMAXARCHITECTEDVIRTUALCDROMS; i++) | ||
248 | vio_register_device_iseries("viocd", i); | ||
249 | for (i = 0; i < HVMAXARCHITECTEDVIRTUALTAPES; i++) | ||
250 | vio_register_device_iseries("viotape", i); | ||
251 | } | ||
252 | #endif | ||
253 | |||
254 | /** | 122 | /** |
255 | * vio_bus_init: - Initialize the virtual IO bus | 123 | * vio_bus_init: - Initialize the virtual IO bus |
256 | */ | 124 | */ |
257 | static int __init vio_bus_init(void) | 125 | int __init vio_bus_init(struct vio_bus_ops *ops) |
258 | { | 126 | { |
259 | int err; | 127 | int err; |
260 | 128 | ||
129 | vio_bus_ops = *ops; | ||
130 | |||
261 | err = bus_register(&vio_bus_type); | 131 | err = bus_register(&vio_bus_type); |
262 | if (err) { | 132 | if (err) { |
263 | printk(KERN_ERR "failed to register VIO bus\n"); | 133 | printk(KERN_ERR "failed to register VIO bus\n"); |
264 | return err; | 134 | return err; |
265 | } | 135 | } |
266 | 136 | ||
267 | /* the fake parent of all vio devices, just to give us a nice directory */ | 137 | /* |
138 | * The fake parent of all vio devices, just to give us | ||
139 | * a nice directory | ||
140 | */ | ||
268 | err = device_register(&vio_bus_device.dev); | 141 | err = device_register(&vio_bus_device.dev); |
269 | if (err) { | 142 | if (err) { |
270 | printk(KERN_WARNING "%s: device_register returned %i\n", __FUNCTION__, | 143 | printk(KERN_WARNING "%s: device_register returned %i\n", |
271 | err); | 144 | __FUNCTION__, err); |
272 | return err; | 145 | return err; |
273 | } | 146 | } |
274 | 147 | ||
275 | #ifdef CONFIG_PPC_PSERIES | ||
276 | probe_bus_pseries(); | ||
277 | #endif | ||
278 | #ifdef CONFIG_PPC_ISERIES | ||
279 | probe_bus_iseries(); | ||
280 | #endif | ||
281 | |||
282 | return 0; | 148 | return 0; |
283 | } | 149 | } |
284 | 150 | ||
285 | __initcall(vio_bus_init); | ||
286 | |||
287 | /* vio_dev refcount hit 0 */ | 151 | /* vio_dev refcount hit 0 */ |
288 | static void __devinit vio_dev_release(struct device *dev) | 152 | static void __devinit vio_dev_release(struct device *dev) |
289 | { | 153 | { |
290 | DBGENTER(); | 154 | if (vio_bus_ops.release_device) |
291 | 155 | vio_bus_ops.release_device(dev); | |
292 | #ifdef CONFIG_PPC_PSERIES | ||
293 | /* XXX free TCE table */ | ||
294 | of_node_put(dev->platform_data); | ||
295 | #endif | ||
296 | kfree(to_vio_dev(dev)); | 156 | kfree(to_vio_dev(dev)); |
297 | } | 157 | } |
298 | 158 | ||
299 | #ifdef CONFIG_PPC_PSERIES | 159 | static ssize_t viodev_show_name(struct device *dev, |
300 | static ssize_t viodev_show_devspec(struct device *dev, struct device_attribute *attr, char *buf) | 160 | struct device_attribute *attr, char *buf) |
301 | { | ||
302 | struct device_node *of_node = dev->platform_data; | ||
303 | |||
304 | return sprintf(buf, "%s\n", of_node->full_name); | ||
305 | } | ||
306 | DEVICE_ATTR(devspec, S_IRUSR | S_IRGRP | S_IROTH, viodev_show_devspec, NULL); | ||
307 | #endif | ||
308 | |||
309 | static ssize_t viodev_show_name(struct device *dev, struct device_attribute *attr, char *buf) | ||
310 | { | 161 | { |
311 | return sprintf(buf, "%s\n", to_vio_dev(dev)->name); | 162 | return sprintf(buf, "%s\n", to_vio_dev(dev)->name); |
312 | } | 163 | } |
313 | DEVICE_ATTR(name, S_IRUSR | S_IRGRP | S_IROTH, viodev_show_name, NULL); | 164 | DEVICE_ATTR(name, S_IRUSR | S_IRGRP | S_IROTH, viodev_show_name, NULL); |
314 | 165 | ||
315 | static struct vio_dev * __devinit vio_register_device_common( | 166 | struct vio_dev * __devinit vio_register_device(struct vio_dev *viodev) |
316 | struct vio_dev *viodev, char *name, char *type, | ||
317 | uint32_t unit_address, struct iommu_table *iommu_table) | ||
318 | { | 167 | { |
319 | DBGENTER(); | ||
320 | |||
321 | viodev->name = name; | ||
322 | viodev->type = type; | ||
323 | viodev->unit_address = unit_address; | ||
324 | viodev->iommu_table = iommu_table; | ||
325 | /* init generic 'struct device' fields: */ | 168 | /* init generic 'struct device' fields: */ |
326 | viodev->dev.parent = &vio_bus_device.dev; | 169 | viodev->dev.parent = &vio_bus_device.dev; |
327 | viodev->dev.bus = &vio_bus_type; | 170 | viodev->dev.bus = &vio_bus_type; |
@@ -338,222 +181,15 @@ static struct vio_dev * __devinit vio_register_device_common( | |||
338 | return viodev; | 181 | return viodev; |
339 | } | 182 | } |
340 | 183 | ||
341 | #ifdef CONFIG_PPC_PSERIES | ||
342 | /** | ||
343 | * vio_register_device_node: - Register a new vio device. | ||
344 | * @of_node: The OF node for this device. | ||
345 | * | ||
346 | * Creates and initializes a vio_dev structure from the data in | ||
347 | * of_node (dev.platform_data) and adds it to the list of virtual devices. | ||
348 | * Returns a pointer to the created vio_dev or NULL if node has | ||
349 | * NULL device_type or compatible fields. | ||
350 | */ | ||
351 | struct vio_dev * __devinit vio_register_device_node(struct device_node *of_node) | ||
352 | { | ||
353 | struct vio_dev *viodev; | ||
354 | unsigned int *unit_address; | ||
355 | unsigned int *irq_p; | ||
356 | |||
357 | DBGENTER(); | ||
358 | |||
359 | /* we need the 'device_type' property, in order to match with drivers */ | ||
360 | if ((NULL == of_node->type)) { | ||
361 | printk(KERN_WARNING | ||
362 | "%s: node %s missing 'device_type'\n", __FUNCTION__, | ||
363 | of_node->name ? of_node->name : "<unknown>"); | ||
364 | return NULL; | ||
365 | } | ||
366 | |||
367 | unit_address = (unsigned int *)get_property(of_node, "reg", NULL); | ||
368 | if (!unit_address) { | ||
369 | printk(KERN_WARNING "%s: node %s missing 'reg'\n", __FUNCTION__, | ||
370 | of_node->name ? of_node->name : "<unknown>"); | ||
371 | return NULL; | ||
372 | } | ||
373 | |||
374 | /* allocate a vio_dev for this node */ | ||
375 | viodev = kmalloc(sizeof(struct vio_dev), GFP_KERNEL); | ||
376 | if (!viodev) { | ||
377 | return NULL; | ||
378 | } | ||
379 | memset(viodev, 0, sizeof(struct vio_dev)); | ||
380 | |||
381 | viodev->dev.platform_data = of_node_get(of_node); | ||
382 | |||
383 | viodev->irq = NO_IRQ; | ||
384 | irq_p = (unsigned int *)get_property(of_node, "interrupts", NULL); | ||
385 | if (irq_p) { | ||
386 | int virq = virt_irq_create_mapping(*irq_p); | ||
387 | if (virq == NO_IRQ) { | ||
388 | printk(KERN_ERR "Unable to allocate interrupt " | ||
389 | "number for %s\n", of_node->full_name); | ||
390 | } else | ||
391 | viodev->irq = irq_offset_up(virq); | ||
392 | } | ||
393 | |||
394 | snprintf(viodev->dev.bus_id, BUS_ID_SIZE, "%x", *unit_address); | ||
395 | |||
396 | /* register with generic device framework */ | ||
397 | if (vio_register_device_common(viodev, of_node->name, of_node->type, | ||
398 | *unit_address, vio_build_iommu_table(viodev)) | ||
399 | == NULL) { | ||
400 | /* XXX free TCE table */ | ||
401 | kfree(viodev); | ||
402 | return NULL; | ||
403 | } | ||
404 | device_create_file(&viodev->dev, &dev_attr_devspec); | ||
405 | |||
406 | return viodev; | ||
407 | } | ||
408 | EXPORT_SYMBOL(vio_register_device_node); | ||
409 | #endif | ||
410 | |||
411 | #ifdef CONFIG_PPC_ISERIES | ||
412 | /** | ||
413 | * vio_register_device: - Register a new vio device. | ||
414 | * @voidev: The device to register. | ||
415 | */ | ||
416 | static struct vio_dev *__init vio_register_device_iseries(char *type, | ||
417 | uint32_t unit_num) | ||
418 | { | ||
419 | struct vio_dev *viodev; | ||
420 | |||
421 | DBGENTER(); | ||
422 | |||
423 | /* allocate a vio_dev for this node */ | ||
424 | viodev = kmalloc(sizeof(struct vio_dev), GFP_KERNEL); | ||
425 | if (!viodev) | ||
426 | return NULL; | ||
427 | memset(viodev, 0, sizeof(struct vio_dev)); | ||
428 | |||
429 | snprintf(viodev->dev.bus_id, BUS_ID_SIZE, "%s%d", type, unit_num); | ||
430 | |||
431 | return vio_register_device_common(viodev, viodev->dev.bus_id, type, | ||
432 | unit_num, &vio_iommu_table); | ||
433 | } | ||
434 | #endif | ||
435 | |||
436 | void __devinit vio_unregister_device(struct vio_dev *viodev) | 184 | void __devinit vio_unregister_device(struct vio_dev *viodev) |
437 | { | 185 | { |
438 | DBGENTER(); | 186 | if (vio_bus_ops.unregister_device) |
439 | #ifdef CONFIG_PPC_PSERIES | 187 | vio_bus_ops.unregister_device(viodev); |
440 | device_remove_file(&viodev->dev, &dev_attr_devspec); | ||
441 | #endif | ||
442 | device_remove_file(&viodev->dev, &dev_attr_name); | 188 | device_remove_file(&viodev->dev, &dev_attr_name); |
443 | device_unregister(&viodev->dev); | 189 | device_unregister(&viodev->dev); |
444 | } | 190 | } |
445 | EXPORT_SYMBOL(vio_unregister_device); | 191 | EXPORT_SYMBOL(vio_unregister_device); |
446 | 192 | ||
447 | #ifdef CONFIG_PPC_PSERIES | ||
448 | /** | ||
449 | * vio_get_attribute: - get attribute for virtual device | ||
450 | * @vdev: The vio device to get property. | ||
451 | * @which: The property/attribute to be extracted. | ||
452 | * @length: Pointer to length of returned data size (unused if NULL). | ||
453 | * | ||
454 | * Calls prom.c's get_property() to return the value of the | ||
455 | * attribute specified by the preprocessor constant @which | ||
456 | */ | ||
457 | const void * vio_get_attribute(struct vio_dev *vdev, void* which, int* length) | ||
458 | { | ||
459 | return get_property(vdev->dev.platform_data, (char*)which, length); | ||
460 | } | ||
461 | EXPORT_SYMBOL(vio_get_attribute); | ||
462 | |||
463 | /* vio_find_name() - internal because only vio.c knows how we formatted the | ||
464 | * kobject name | ||
465 | * XXX once vio_bus_type.devices is actually used as a kset in | ||
466 | * drivers/base/bus.c, this function should be removed in favor of | ||
467 | * "device_find(kobj_name, &vio_bus_type)" | ||
468 | */ | ||
469 | static struct vio_dev *vio_find_name(const char *kobj_name) | ||
470 | { | ||
471 | struct kobject *found; | ||
472 | |||
473 | found = kset_find_obj(&devices_subsys.kset, kobj_name); | ||
474 | if (!found) | ||
475 | return NULL; | ||
476 | |||
477 | return to_vio_dev(container_of(found, struct device, kobj)); | ||
478 | } | ||
479 | |||
480 | /** | ||
481 | * vio_find_node - find an already-registered vio_dev | ||
482 | * @vnode: device_node of the virtual device we're looking for | ||
483 | */ | ||
484 | struct vio_dev *vio_find_node(struct device_node *vnode) | ||
485 | { | ||
486 | uint32_t *unit_address; | ||
487 | char kobj_name[BUS_ID_SIZE]; | ||
488 | |||
489 | /* construct the kobject name from the device node */ | ||
490 | unit_address = (uint32_t *)get_property(vnode, "reg", NULL); | ||
491 | if (!unit_address) | ||
492 | return NULL; | ||
493 | snprintf(kobj_name, BUS_ID_SIZE, "%x", *unit_address); | ||
494 | |||
495 | return vio_find_name(kobj_name); | ||
496 | } | ||
497 | EXPORT_SYMBOL(vio_find_node); | ||
498 | |||
499 | /** | ||
500 | * vio_build_iommu_table: - gets the dma information from OF and builds the TCE tree. | ||
501 | * @dev: the virtual device. | ||
502 | * | ||
503 | * Returns a pointer to the built tce tree, or NULL if it can't | ||
504 | * find property. | ||
505 | */ | ||
506 | static struct iommu_table * vio_build_iommu_table(struct vio_dev *dev) | ||
507 | { | ||
508 | unsigned int *dma_window; | ||
509 | struct iommu_table *newTceTable; | ||
510 | unsigned long offset; | ||
511 | int dma_window_property_size; | ||
512 | |||
513 | dma_window = (unsigned int *) get_property(dev->dev.platform_data, "ibm,my-dma-window", &dma_window_property_size); | ||
514 | if(!dma_window) { | ||
515 | return NULL; | ||
516 | } | ||
517 | |||
518 | newTceTable = (struct iommu_table *) kmalloc(sizeof(struct iommu_table), GFP_KERNEL); | ||
519 | |||
520 | /* There should be some code to extract the phys-encoded offset | ||
521 | using prom_n_addr_cells(). However, according to a comment | ||
522 | on earlier versions, it's always zero, so we don't bother */ | ||
523 | offset = dma_window[1] >> PAGE_SHIFT; | ||
524 | |||
525 | /* TCE table size - measured in tce entries */ | ||
526 | newTceTable->it_size = dma_window[4] >> PAGE_SHIFT; | ||
527 | /* offset for VIO should always be 0 */ | ||
528 | newTceTable->it_offset = offset; | ||
529 | newTceTable->it_busno = 0; | ||
530 | newTceTable->it_index = (unsigned long)dma_window[0]; | ||
531 | newTceTable->it_type = TCE_VB; | ||
532 | |||
533 | return iommu_init_table(newTceTable); | ||
534 | } | ||
535 | |||
536 | int vio_enable_interrupts(struct vio_dev *dev) | ||
537 | { | ||
538 | int rc = h_vio_signal(dev->unit_address, VIO_IRQ_ENABLE); | ||
539 | if (rc != H_Success) { | ||
540 | printk(KERN_ERR "vio: Error 0x%x enabling interrupts\n", rc); | ||
541 | } | ||
542 | return rc; | ||
543 | } | ||
544 | EXPORT_SYMBOL(vio_enable_interrupts); | ||
545 | |||
546 | int vio_disable_interrupts(struct vio_dev *dev) | ||
547 | { | ||
548 | int rc = h_vio_signal(dev->unit_address, VIO_IRQ_DISABLE); | ||
549 | if (rc != H_Success) { | ||
550 | printk(KERN_ERR "vio: Error 0x%x disabling interrupts\n", rc); | ||
551 | } | ||
552 | return rc; | ||
553 | } | ||
554 | EXPORT_SYMBOL(vio_disable_interrupts); | ||
555 | #endif | ||
556 | |||
557 | static dma_addr_t vio_map_single(struct device *dev, void *vaddr, | 193 | static dma_addr_t vio_map_single(struct device *dev, void *vaddr, |
558 | size_t size, enum dma_data_direction direction) | 194 | size_t size, enum dma_data_direction direction) |
559 | { | 195 | { |
@@ -615,18 +251,8 @@ static int vio_bus_match(struct device *dev, struct device_driver *drv) | |||
615 | const struct vio_dev *vio_dev = to_vio_dev(dev); | 251 | const struct vio_dev *vio_dev = to_vio_dev(dev); |
616 | struct vio_driver *vio_drv = to_vio_driver(drv); | 252 | struct vio_driver *vio_drv = to_vio_driver(drv); |
617 | const struct vio_device_id *ids = vio_drv->id_table; | 253 | const struct vio_device_id *ids = vio_drv->id_table; |
618 | const struct vio_device_id *found_id; | ||
619 | |||
620 | DBGENTER(); | ||
621 | 254 | ||
622 | if (!ids) | 255 | return (ids != NULL) && (vio_match_device(ids, vio_dev) != NULL); |
623 | return 0; | ||
624 | |||
625 | found_id = vio_match_device(ids, vio_dev); | ||
626 | if (found_id) | ||
627 | return 1; | ||
628 | |||
629 | return 0; | ||
630 | } | 256 | } |
631 | 257 | ||
632 | struct bus_type vio_bus_type = { | 258 | struct bus_type vio_bus_type = { |
diff --git a/arch/ppc64/mm/hash_low.S b/arch/ppc64/mm/hash_low.S index fbff24827ae7..35eb49e1b890 100644 --- a/arch/ppc64/mm/hash_low.S +++ b/arch/ppc64/mm/hash_low.S | |||
@@ -129,12 +129,10 @@ _GLOBAL(__hash_page) | |||
129 | * code rather than call a C function...) | 129 | * code rather than call a C function...) |
130 | */ | 130 | */ |
131 | BEGIN_FTR_SECTION | 131 | BEGIN_FTR_SECTION |
132 | BEGIN_FTR_SECTION | ||
133 | mr r4,r30 | 132 | mr r4,r30 |
134 | mr r5,r7 | 133 | mr r5,r7 |
135 | bl .hash_page_do_lazy_icache | 134 | bl .hash_page_do_lazy_icache |
136 | END_FTR_SECTION_IFSET(CPU_FTR_NOEXECUTE) | 135 | END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE) |
137 | END_FTR_SECTION_IFCLR(CPU_FTR_COHERENT_ICACHE) | ||
138 | 136 | ||
139 | /* At this point, r3 contains new PP bits, save them in | 137 | /* At this point, r3 contains new PP bits, save them in |
140 | * place of "access" in the param area (sic) | 138 | * place of "access" in the param area (sic) |
diff --git a/arch/ppc64/mm/hash_native.c b/arch/ppc64/mm/hash_native.c index a6abd3a979bf..7626bb59954d 100644 --- a/arch/ppc64/mm/hash_native.c +++ b/arch/ppc64/mm/hash_native.c | |||
@@ -51,7 +51,6 @@ long native_hpte_insert(unsigned long hpte_group, unsigned long va, | |||
51 | unsigned long prpn, unsigned long vflags, | 51 | unsigned long prpn, unsigned long vflags, |
52 | unsigned long rflags) | 52 | unsigned long rflags) |
53 | { | 53 | { |
54 | unsigned long arpn = physRpn_to_absRpn(prpn); | ||
55 | hpte_t *hptep = htab_address + hpte_group; | 54 | hpte_t *hptep = htab_address + hpte_group; |
56 | unsigned long hpte_v, hpte_r; | 55 | unsigned long hpte_v, hpte_r; |
57 | int i; | 56 | int i; |
@@ -74,7 +73,7 @@ long native_hpte_insert(unsigned long hpte_group, unsigned long va, | |||
74 | hpte_v = (va >> 23) << HPTE_V_AVPN_SHIFT | vflags | HPTE_V_VALID; | 73 | hpte_v = (va >> 23) << HPTE_V_AVPN_SHIFT | vflags | HPTE_V_VALID; |
75 | if (vflags & HPTE_V_LARGE) | 74 | if (vflags & HPTE_V_LARGE) |
76 | va &= ~(1UL << HPTE_V_AVPN_SHIFT); | 75 | va &= ~(1UL << HPTE_V_AVPN_SHIFT); |
77 | hpte_r = (arpn << HPTE_R_RPN_SHIFT) | rflags; | 76 | hpte_r = (prpn << HPTE_R_RPN_SHIFT) | rflags; |
78 | 77 | ||
79 | hptep->r = hpte_r; | 78 | hptep->r = hpte_r; |
80 | /* Guarantee the second dword is visible before the valid bit */ | 79 | /* Guarantee the second dword is visible before the valid bit */ |
diff --git a/arch/ppc64/mm/hash_utils.c b/arch/ppc64/mm/hash_utils.c index 623b5d130c31..09475c8edf7c 100644 --- a/arch/ppc64/mm/hash_utils.c +++ b/arch/ppc64/mm/hash_utils.c | |||
@@ -210,7 +210,7 @@ void __init htab_initialize(void) | |||
210 | 210 | ||
211 | /* create bolted the linear mapping in the hash table */ | 211 | /* create bolted the linear mapping in the hash table */ |
212 | for (i=0; i < lmb.memory.cnt; i++) { | 212 | for (i=0; i < lmb.memory.cnt; i++) { |
213 | base = lmb.memory.region[i].physbase + KERNELBASE; | 213 | base = lmb.memory.region[i].base + KERNELBASE; |
214 | size = lmb.memory.region[i].size; | 214 | size = lmb.memory.region[i].size; |
215 | 215 | ||
216 | DBG("creating mapping for region: %lx : %lx\n", base, size); | 216 | DBG("creating mapping for region: %lx : %lx\n", base, size); |
@@ -302,7 +302,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) | |||
302 | int local = 0; | 302 | int local = 0; |
303 | cpumask_t tmp; | 303 | cpumask_t tmp; |
304 | 304 | ||
305 | if ((ea & ~REGION_MASK) > EADDR_MASK) | 305 | if ((ea & ~REGION_MASK) >= PGTABLE_RANGE) |
306 | return 1; | 306 | return 1; |
307 | 307 | ||
308 | switch (REGION_ID(ea)) { | 308 | switch (REGION_ID(ea)) { |
diff --git a/arch/ppc64/mm/hugetlbpage.c b/arch/ppc64/mm/hugetlbpage.c index f9524602818d..e7833c80eb68 100644 --- a/arch/ppc64/mm/hugetlbpage.c +++ b/arch/ppc64/mm/hugetlbpage.c | |||
@@ -27,124 +27,94 @@ | |||
27 | 27 | ||
28 | #include <linux/sysctl.h> | 28 | #include <linux/sysctl.h> |
29 | 29 | ||
30 | #define HUGEPGDIR_SHIFT (HPAGE_SHIFT + PAGE_SHIFT - 3) | 30 | #define NUM_LOW_AREAS (0x100000000UL >> SID_SHIFT) |
31 | #define HUGEPGDIR_SIZE (1UL << HUGEPGDIR_SHIFT) | 31 | #define NUM_HIGH_AREAS (PGTABLE_RANGE >> HTLB_AREA_SHIFT) |
32 | #define HUGEPGDIR_MASK (~(HUGEPGDIR_SIZE-1)) | ||
33 | 32 | ||
34 | #define HUGEPTE_INDEX_SIZE 9 | 33 | /* Modelled after find_linux_pte() */ |
35 | #define HUGEPGD_INDEX_SIZE 10 | 34 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) |
36 | |||
37 | #define PTRS_PER_HUGEPTE (1 << HUGEPTE_INDEX_SIZE) | ||
38 | #define PTRS_PER_HUGEPGD (1 << HUGEPGD_INDEX_SIZE) | ||
39 | |||
40 | static inline int hugepgd_index(unsigned long addr) | ||
41 | { | ||
42 | return (addr & ~REGION_MASK) >> HUGEPGDIR_SHIFT; | ||
43 | } | ||
44 | |||
45 | static pud_t *hugepgd_offset(struct mm_struct *mm, unsigned long addr) | ||
46 | { | 35 | { |
47 | int index; | 36 | pgd_t *pg; |
37 | pud_t *pu; | ||
38 | pmd_t *pm; | ||
39 | pte_t *pt; | ||
48 | 40 | ||
49 | if (! mm->context.huge_pgdir) | 41 | BUG_ON(! in_hugepage_area(mm->context, addr)); |
50 | return NULL; | ||
51 | 42 | ||
43 | addr &= HPAGE_MASK; | ||
44 | |||
45 | pg = pgd_offset(mm, addr); | ||
46 | if (!pgd_none(*pg)) { | ||
47 | pu = pud_offset(pg, addr); | ||
48 | if (!pud_none(*pu)) { | ||
49 | pm = pmd_offset(pu, addr); | ||
50 | pt = (pte_t *)pm; | ||
51 | BUG_ON(!pmd_none(*pm) | ||
52 | && !(pte_present(*pt) && pte_huge(*pt))); | ||
53 | return pt; | ||
54 | } | ||
55 | } | ||
52 | 56 | ||
53 | index = hugepgd_index(addr); | 57 | return NULL; |
54 | BUG_ON(index >= PTRS_PER_HUGEPGD); | ||
55 | return (pud_t *)(mm->context.huge_pgdir + index); | ||
56 | } | 58 | } |
57 | 59 | ||
58 | static inline pte_t *hugepte_offset(pud_t *dir, unsigned long addr) | 60 | pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) |
59 | { | 61 | { |
60 | int index; | 62 | pgd_t *pg; |
61 | 63 | pud_t *pu; | |
62 | if (pud_none(*dir)) | 64 | pmd_t *pm; |
63 | return NULL; | 65 | pte_t *pt; |
64 | 66 | ||
65 | index = (addr >> HPAGE_SHIFT) % PTRS_PER_HUGEPTE; | ||
66 | return (pte_t *)pud_page(*dir) + index; | ||
67 | } | ||
68 | |||
69 | static pud_t *hugepgd_alloc(struct mm_struct *mm, unsigned long addr) | ||
70 | { | ||
71 | BUG_ON(! in_hugepage_area(mm->context, addr)); | 67 | BUG_ON(! in_hugepage_area(mm->context, addr)); |
72 | 68 | ||
73 | if (! mm->context.huge_pgdir) { | 69 | addr &= HPAGE_MASK; |
74 | pgd_t *new; | ||
75 | spin_unlock(&mm->page_table_lock); | ||
76 | /* Don't use pgd_alloc(), because we want __GFP_REPEAT */ | ||
77 | new = kmem_cache_alloc(zero_cache, GFP_KERNEL | __GFP_REPEAT); | ||
78 | BUG_ON(memcmp(new, empty_zero_page, PAGE_SIZE)); | ||
79 | spin_lock(&mm->page_table_lock); | ||
80 | 70 | ||
81 | /* | 71 | pg = pgd_offset(mm, addr); |
82 | * Because we dropped the lock, we should re-check the | 72 | pu = pud_alloc(mm, pg, addr); |
83 | * entry, as somebody else could have populated it.. | ||
84 | */ | ||
85 | if (mm->context.huge_pgdir) | ||
86 | pgd_free(new); | ||
87 | else | ||
88 | mm->context.huge_pgdir = new; | ||
89 | } | ||
90 | return hugepgd_offset(mm, addr); | ||
91 | } | ||
92 | 73 | ||
93 | static pte_t *hugepte_alloc(struct mm_struct *mm, pud_t *dir, unsigned long addr) | 74 | if (pu) { |
94 | { | 75 | pm = pmd_alloc(mm, pu, addr); |
95 | if (! pud_present(*dir)) { | 76 | if (pm) { |
96 | pte_t *new; | 77 | pt = (pte_t *)pm; |
97 | 78 | BUG_ON(!pmd_none(*pm) | |
98 | spin_unlock(&mm->page_table_lock); | 79 | && !(pte_present(*pt) && pte_huge(*pt))); |
99 | new = kmem_cache_alloc(zero_cache, GFP_KERNEL | __GFP_REPEAT); | 80 | return pt; |
100 | BUG_ON(memcmp(new, empty_zero_page, PAGE_SIZE)); | ||
101 | spin_lock(&mm->page_table_lock); | ||
102 | /* | ||
103 | * Because we dropped the lock, we should re-check the | ||
104 | * entry, as somebody else could have populated it.. | ||
105 | */ | ||
106 | if (pud_present(*dir)) { | ||
107 | if (new) | ||
108 | kmem_cache_free(zero_cache, new); | ||
109 | } else { | ||
110 | struct page *ptepage; | ||
111 | |||
112 | if (! new) | ||
113 | return NULL; | ||
114 | ptepage = virt_to_page(new); | ||
115 | ptepage->mapping = (void *) mm; | ||
116 | ptepage->index = addr & HUGEPGDIR_MASK; | ||
117 | pud_populate(mm, dir, new); | ||
118 | } | 81 | } |
119 | } | 82 | } |
120 | 83 | ||
121 | return hugepte_offset(dir, addr); | 84 | return NULL; |
122 | } | 85 | } |
123 | 86 | ||
124 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | 87 | #define HUGEPTE_BATCH_SIZE (HPAGE_SIZE / PMD_SIZE) |
125 | { | ||
126 | pud_t *pud; | ||
127 | 88 | ||
128 | BUG_ON(! in_hugepage_area(mm->context, addr)); | 89 | void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, |
90 | pte_t *ptep, pte_t pte) | ||
91 | { | ||
92 | int i; | ||
129 | 93 | ||
130 | pud = hugepgd_offset(mm, addr); | 94 | if (pte_present(*ptep)) { |
131 | if (! pud) | 95 | pte_clear(mm, addr, ptep); |
132 | return NULL; | 96 | flush_tlb_pending(); |
97 | } | ||
133 | 98 | ||
134 | return hugepte_offset(pud, addr); | 99 | for (i = 0; i < HUGEPTE_BATCH_SIZE; i++) { |
100 | *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); | ||
101 | ptep++; | ||
102 | } | ||
135 | } | 103 | } |
136 | 104 | ||
137 | pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) | 105 | pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, |
106 | pte_t *ptep) | ||
138 | { | 107 | { |
139 | pud_t *pud; | 108 | unsigned long old = pte_update(ptep, ~0UL); |
109 | int i; | ||
140 | 110 | ||
141 | BUG_ON(! in_hugepage_area(mm->context, addr)); | 111 | if (old & _PAGE_HASHPTE) |
112 | hpte_update(mm, addr, old, 0); | ||
142 | 113 | ||
143 | pud = hugepgd_alloc(mm, addr); | 114 | for (i = 1; i < HUGEPTE_BATCH_SIZE; i++) |
144 | if (! pud) | 115 | ptep[i] = __pte(0); |
145 | return NULL; | ||
146 | 116 | ||
147 | return hugepte_alloc(mm, pud, addr); | 117 | return __pte(old); |
148 | } | 118 | } |
149 | 119 | ||
150 | /* | 120 | /* |
@@ -162,15 +132,17 @@ int is_aligned_hugepage_range(unsigned long addr, unsigned long len) | |||
162 | return 0; | 132 | return 0; |
163 | } | 133 | } |
164 | 134 | ||
165 | static void flush_segments(void *parm) | 135 | static void flush_low_segments(void *parm) |
166 | { | 136 | { |
167 | u16 segs = (unsigned long) parm; | 137 | u16 areas = (unsigned long) parm; |
168 | unsigned long i; | 138 | unsigned long i; |
169 | 139 | ||
170 | asm volatile("isync" : : : "memory"); | 140 | asm volatile("isync" : : : "memory"); |
171 | 141 | ||
172 | for (i = 0; i < 16; i++) { | 142 | BUILD_BUG_ON((sizeof(areas)*8) != NUM_LOW_AREAS); |
173 | if (! (segs & (1U << i))) | 143 | |
144 | for (i = 0; i < NUM_LOW_AREAS; i++) { | ||
145 | if (! (areas & (1U << i))) | ||
174 | continue; | 146 | continue; |
175 | asm volatile("slbie %0" : : "r" (i << SID_SHIFT)); | 147 | asm volatile("slbie %0" : : "r" (i << SID_SHIFT)); |
176 | } | 148 | } |
@@ -178,13 +150,33 @@ static void flush_segments(void *parm) | |||
178 | asm volatile("isync" : : : "memory"); | 150 | asm volatile("isync" : : : "memory"); |
179 | } | 151 | } |
180 | 152 | ||
181 | static int prepare_low_seg_for_htlb(struct mm_struct *mm, unsigned long seg) | 153 | static void flush_high_segments(void *parm) |
182 | { | 154 | { |
183 | unsigned long start = seg << SID_SHIFT; | 155 | u16 areas = (unsigned long) parm; |
184 | unsigned long end = (seg+1) << SID_SHIFT; | 156 | unsigned long i, j; |
157 | |||
158 | asm volatile("isync" : : : "memory"); | ||
159 | |||
160 | BUILD_BUG_ON((sizeof(areas)*8) != NUM_HIGH_AREAS); | ||
161 | |||
162 | for (i = 0; i < NUM_HIGH_AREAS; i++) { | ||
163 | if (! (areas & (1U << i))) | ||
164 | continue; | ||
165 | for (j = 0; j < (1UL << (HTLB_AREA_SHIFT-SID_SHIFT)); j++) | ||
166 | asm volatile("slbie %0" | ||
167 | :: "r" ((i << HTLB_AREA_SHIFT) + (j << SID_SHIFT))); | ||
168 | } | ||
169 | |||
170 | asm volatile("isync" : : : "memory"); | ||
171 | } | ||
172 | |||
173 | static int prepare_low_area_for_htlb(struct mm_struct *mm, unsigned long area) | ||
174 | { | ||
175 | unsigned long start = area << SID_SHIFT; | ||
176 | unsigned long end = (area+1) << SID_SHIFT; | ||
185 | struct vm_area_struct *vma; | 177 | struct vm_area_struct *vma; |
186 | 178 | ||
187 | BUG_ON(seg >= 16); | 179 | BUG_ON(area >= NUM_LOW_AREAS); |
188 | 180 | ||
189 | /* Check no VMAs are in the region */ | 181 | /* Check no VMAs are in the region */ |
190 | vma = find_vma(mm, start); | 182 | vma = find_vma(mm, start); |
@@ -194,20 +186,39 @@ static int prepare_low_seg_for_htlb(struct mm_struct *mm, unsigned long seg) | |||
194 | return 0; | 186 | return 0; |
195 | } | 187 | } |
196 | 188 | ||
197 | static int open_low_hpage_segs(struct mm_struct *mm, u16 newsegs) | 189 | static int prepare_high_area_for_htlb(struct mm_struct *mm, unsigned long area) |
190 | { | ||
191 | unsigned long start = area << HTLB_AREA_SHIFT; | ||
192 | unsigned long end = (area+1) << HTLB_AREA_SHIFT; | ||
193 | struct vm_area_struct *vma; | ||
194 | |||
195 | BUG_ON(area >= NUM_HIGH_AREAS); | ||
196 | |||
197 | /* Check no VMAs are in the region */ | ||
198 | vma = find_vma(mm, start); | ||
199 | if (vma && (vma->vm_start < end)) | ||
200 | return -EBUSY; | ||
201 | |||
202 | return 0; | ||
203 | } | ||
204 | |||
205 | static int open_low_hpage_areas(struct mm_struct *mm, u16 newareas) | ||
198 | { | 206 | { |
199 | unsigned long i; | 207 | unsigned long i; |
200 | 208 | ||
201 | newsegs &= ~(mm->context.htlb_segs); | 209 | BUILD_BUG_ON((sizeof(newareas)*8) != NUM_LOW_AREAS); |
202 | if (! newsegs) | 210 | BUILD_BUG_ON((sizeof(mm->context.low_htlb_areas)*8) != NUM_LOW_AREAS); |
211 | |||
212 | newareas &= ~(mm->context.low_htlb_areas); | ||
213 | if (! newareas) | ||
203 | return 0; /* The segments we want are already open */ | 214 | return 0; /* The segments we want are already open */ |
204 | 215 | ||
205 | for (i = 0; i < 16; i++) | 216 | for (i = 0; i < NUM_LOW_AREAS; i++) |
206 | if ((1 << i) & newsegs) | 217 | if ((1 << i) & newareas) |
207 | if (prepare_low_seg_for_htlb(mm, i) != 0) | 218 | if (prepare_low_area_for_htlb(mm, i) != 0) |
208 | return -EBUSY; | 219 | return -EBUSY; |
209 | 220 | ||
210 | mm->context.htlb_segs |= newsegs; | 221 | mm->context.low_htlb_areas |= newareas; |
211 | 222 | ||
212 | /* update the paca copy of the context struct */ | 223 | /* update the paca copy of the context struct */ |
213 | get_paca()->context = mm->context; | 224 | get_paca()->context = mm->context; |
@@ -215,29 +226,63 @@ static int open_low_hpage_segs(struct mm_struct *mm, u16 newsegs) | |||
215 | /* the context change must make it to memory before the flush, | 226 | /* the context change must make it to memory before the flush, |
216 | * so that further SLB misses do the right thing. */ | 227 | * so that further SLB misses do the right thing. */ |
217 | mb(); | 228 | mb(); |
218 | on_each_cpu(flush_segments, (void *)(unsigned long)newsegs, 0, 1); | 229 | on_each_cpu(flush_low_segments, (void *)(unsigned long)newareas, 0, 1); |
230 | |||
231 | return 0; | ||
232 | } | ||
233 | |||
234 | static int open_high_hpage_areas(struct mm_struct *mm, u16 newareas) | ||
235 | { | ||
236 | unsigned long i; | ||
237 | |||
238 | BUILD_BUG_ON((sizeof(newareas)*8) != NUM_HIGH_AREAS); | ||
239 | BUILD_BUG_ON((sizeof(mm->context.high_htlb_areas)*8) | ||
240 | != NUM_HIGH_AREAS); | ||
241 | |||
242 | newareas &= ~(mm->context.high_htlb_areas); | ||
243 | if (! newareas) | ||
244 | return 0; /* The areas we want are already open */ | ||
245 | |||
246 | for (i = 0; i < NUM_HIGH_AREAS; i++) | ||
247 | if ((1 << i) & newareas) | ||
248 | if (prepare_high_area_for_htlb(mm, i) != 0) | ||
249 | return -EBUSY; | ||
250 | |||
251 | mm->context.high_htlb_areas |= newareas; | ||
252 | |||
253 | /* update the paca copy of the context struct */ | ||
254 | get_paca()->context = mm->context; | ||
255 | |||
256 | /* the context change must make it to memory before the flush, | ||
257 | * so that further SLB misses do the right thing. */ | ||
258 | mb(); | ||
259 | on_each_cpu(flush_high_segments, (void *)(unsigned long)newareas, 0, 1); | ||
219 | 260 | ||
220 | return 0; | 261 | return 0; |
221 | } | 262 | } |
222 | 263 | ||
223 | int prepare_hugepage_range(unsigned long addr, unsigned long len) | 264 | int prepare_hugepage_range(unsigned long addr, unsigned long len) |
224 | { | 265 | { |
225 | if (within_hugepage_high_range(addr, len)) | 266 | int err; |
226 | return 0; | 267 | |
227 | else if ((addr < 0x100000000UL) && ((addr+len) < 0x100000000UL)) { | 268 | if ( (addr+len) < addr ) |
228 | int err; | 269 | return -EINVAL; |
229 | /* Yes, we need both tests, in case addr+len overflows | 270 | |
230 | * 64-bit arithmetic */ | 271 | if ((addr + len) < 0x100000000UL) |
231 | err = open_low_hpage_segs(current->mm, | 272 | err = open_low_hpage_areas(current->mm, |
232 | LOW_ESID_MASK(addr, len)); | 273 | LOW_ESID_MASK(addr, len)); |
233 | if (err) | 274 | else |
234 | printk(KERN_DEBUG "prepare_hugepage_range(%lx, %lx)" | 275 | err = open_high_hpage_areas(current->mm, |
235 | " failed (segs: 0x%04hx)\n", addr, len, | 276 | HTLB_AREA_MASK(addr, len)); |
236 | LOW_ESID_MASK(addr, len)); | 277 | if (err) { |
278 | printk(KERN_DEBUG "prepare_hugepage_range(%lx, %lx)" | ||
279 | " failed (lowmask: 0x%04hx, highmask: 0x%04hx)\n", | ||
280 | addr, len, | ||
281 | LOW_ESID_MASK(addr, len), HTLB_AREA_MASK(addr, len)); | ||
237 | return err; | 282 | return err; |
238 | } | 283 | } |
239 | 284 | ||
240 | return -EINVAL; | 285 | return 0; |
241 | } | 286 | } |
242 | 287 | ||
243 | struct page * | 288 | struct page * |
@@ -309,8 +354,8 @@ full_search: | |||
309 | vma = find_vma(mm, addr); | 354 | vma = find_vma(mm, addr); |
310 | continue; | 355 | continue; |
311 | } | 356 | } |
312 | if (touches_hugepage_high_range(addr, len)) { | 357 | if (touches_hugepage_high_range(mm, addr, len)) { |
313 | addr = TASK_HPAGE_END; | 358 | addr = ALIGN(addr+1, 1UL<<HTLB_AREA_SHIFT); |
314 | vma = find_vma(mm, addr); | 359 | vma = find_vma(mm, addr); |
315 | continue; | 360 | continue; |
316 | } | 361 | } |
@@ -389,8 +434,9 @@ hugepage_recheck: | |||
389 | if (touches_hugepage_low_range(mm, addr, len)) { | 434 | if (touches_hugepage_low_range(mm, addr, len)) { |
390 | addr = (addr & ((~0) << SID_SHIFT)) - len; | 435 | addr = (addr & ((~0) << SID_SHIFT)) - len; |
391 | goto hugepage_recheck; | 436 | goto hugepage_recheck; |
392 | } else if (touches_hugepage_high_range(addr, len)) { | 437 | } else if (touches_hugepage_high_range(mm, addr, len)) { |
393 | addr = TASK_HPAGE_BASE - len; | 438 | addr = (addr & ((~0UL) << HTLB_AREA_SHIFT)) - len; |
439 | goto hugepage_recheck; | ||
394 | } | 440 | } |
395 | 441 | ||
396 | /* | 442 | /* |
@@ -481,23 +527,28 @@ static unsigned long htlb_get_low_area(unsigned long len, u16 segmask) | |||
481 | return -ENOMEM; | 527 | return -ENOMEM; |
482 | } | 528 | } |
483 | 529 | ||
484 | static unsigned long htlb_get_high_area(unsigned long len) | 530 | static unsigned long htlb_get_high_area(unsigned long len, u16 areamask) |
485 | { | 531 | { |
486 | unsigned long addr = TASK_HPAGE_BASE; | 532 | unsigned long addr = 0x100000000UL; |
487 | struct vm_area_struct *vma; | 533 | struct vm_area_struct *vma; |
488 | 534 | ||
489 | vma = find_vma(current->mm, addr); | 535 | vma = find_vma(current->mm, addr); |
490 | for (vma = find_vma(current->mm, addr); | 536 | while (addr + len <= TASK_SIZE_USER64) { |
491 | addr + len <= TASK_HPAGE_END; | ||
492 | vma = vma->vm_next) { | ||
493 | BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */ | 537 | BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */ |
494 | BUG_ON(! within_hugepage_high_range(addr, len)); | 538 | |
539 | if (! __within_hugepage_high_range(addr, len, areamask)) { | ||
540 | addr = ALIGN(addr+1, 1UL<<HTLB_AREA_SHIFT); | ||
541 | vma = find_vma(current->mm, addr); | ||
542 | continue; | ||
543 | } | ||
495 | 544 | ||
496 | if (!vma || (addr + len) <= vma->vm_start) | 545 | if (!vma || (addr + len) <= vma->vm_start) |
497 | return addr; | 546 | return addr; |
498 | addr = ALIGN(vma->vm_end, HPAGE_SIZE); | 547 | addr = ALIGN(vma->vm_end, HPAGE_SIZE); |
499 | /* Because we're in a hugepage region, this alignment | 548 | /* Depending on segmask this might not be a confirmed |
500 | * should not skip us over any VMAs */ | 549 | * hugepage region, so the ALIGN could have skipped |
550 | * some VMAs */ | ||
551 | vma = find_vma(current->mm, addr); | ||
501 | } | 552 | } |
502 | 553 | ||
503 | return -ENOMEM; | 554 | return -ENOMEM; |
@@ -507,6 +558,9 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, | |||
507 | unsigned long len, unsigned long pgoff, | 558 | unsigned long len, unsigned long pgoff, |
508 | unsigned long flags) | 559 | unsigned long flags) |
509 | { | 560 | { |
561 | int lastshift; | ||
562 | u16 areamask, curareas; | ||
563 | |||
510 | if (len & ~HPAGE_MASK) | 564 | if (len & ~HPAGE_MASK) |
511 | return -EINVAL; | 565 | return -EINVAL; |
512 | 566 | ||
@@ -514,67 +568,49 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, | |||
514 | return -EINVAL; | 568 | return -EINVAL; |
515 | 569 | ||
516 | if (test_thread_flag(TIF_32BIT)) { | 570 | if (test_thread_flag(TIF_32BIT)) { |
517 | int lastshift = 0; | 571 | curareas = current->mm->context.low_htlb_areas; |
518 | u16 segmask, cursegs = current->mm->context.htlb_segs; | ||
519 | 572 | ||
520 | /* First see if we can do the mapping in the existing | 573 | /* First see if we can do the mapping in the existing |
521 | * low hpage segments */ | 574 | * low areas */ |
522 | addr = htlb_get_low_area(len, cursegs); | 575 | addr = htlb_get_low_area(len, curareas); |
523 | if (addr != -ENOMEM) | 576 | if (addr != -ENOMEM) |
524 | return addr; | 577 | return addr; |
525 | 578 | ||
526 | for (segmask = LOW_ESID_MASK(0x100000000UL-len, len); | 579 | lastshift = 0; |
527 | ! lastshift; segmask >>=1) { | 580 | for (areamask = LOW_ESID_MASK(0x100000000UL-len, len); |
528 | if (segmask & 1) | 581 | ! lastshift; areamask >>=1) { |
582 | if (areamask & 1) | ||
529 | lastshift = 1; | 583 | lastshift = 1; |
530 | 584 | ||
531 | addr = htlb_get_low_area(len, cursegs | segmask); | 585 | addr = htlb_get_low_area(len, curareas | areamask); |
532 | if ((addr != -ENOMEM) | 586 | if ((addr != -ENOMEM) |
533 | && open_low_hpage_segs(current->mm, segmask) == 0) | 587 | && open_low_hpage_areas(current->mm, areamask) == 0) |
534 | return addr; | 588 | return addr; |
535 | } | 589 | } |
536 | printk(KERN_DEBUG "hugetlb_get_unmapped_area() unable to open" | ||
537 | " enough segments\n"); | ||
538 | return -ENOMEM; | ||
539 | } else { | 590 | } else { |
540 | return htlb_get_high_area(len); | 591 | curareas = current->mm->context.high_htlb_areas; |
541 | } | ||
542 | } | ||
543 | |||
544 | void hugetlb_mm_free_pgd(struct mm_struct *mm) | ||
545 | { | ||
546 | int i; | ||
547 | pgd_t *pgdir; | ||
548 | |||
549 | spin_lock(&mm->page_table_lock); | ||
550 | |||
551 | pgdir = mm->context.huge_pgdir; | ||
552 | if (! pgdir) | ||
553 | goto out; | ||
554 | |||
555 | mm->context.huge_pgdir = NULL; | ||
556 | 592 | ||
557 | /* cleanup any hugepte pages leftover */ | 593 | /* First see if we can do the mapping in the existing |
558 | for (i = 0; i < PTRS_PER_HUGEPGD; i++) { | 594 | * high areas */ |
559 | pud_t *pud = (pud_t *)(pgdir + i); | 595 | addr = htlb_get_high_area(len, curareas); |
560 | 596 | if (addr != -ENOMEM) | |
561 | if (! pud_none(*pud)) { | 597 | return addr; |
562 | pte_t *pte = (pte_t *)pud_page(*pud); | ||
563 | struct page *ptepage = virt_to_page(pte); | ||
564 | 598 | ||
565 | ptepage->mapping = NULL; | 599 | lastshift = 0; |
600 | for (areamask = HTLB_AREA_MASK(TASK_SIZE_USER64-len, len); | ||
601 | ! lastshift; areamask >>=1) { | ||
602 | if (areamask & 1) | ||
603 | lastshift = 1; | ||
566 | 604 | ||
567 | BUG_ON(memcmp(pte, empty_zero_page, PAGE_SIZE)); | 605 | addr = htlb_get_high_area(len, curareas | areamask); |
568 | kmem_cache_free(zero_cache, pte); | 606 | if ((addr != -ENOMEM) |
607 | && open_high_hpage_areas(current->mm, areamask) == 0) | ||
608 | return addr; | ||
569 | } | 609 | } |
570 | pud_clear(pud); | ||
571 | } | 610 | } |
572 | 611 | printk(KERN_DEBUG "hugetlb_get_unmapped_area() unable to open" | |
573 | BUG_ON(memcmp(pgdir, empty_zero_page, PAGE_SIZE)); | 612 | " enough areas\n"); |
574 | kmem_cache_free(zero_cache, pgdir); | 613 | return -ENOMEM; |
575 | |||
576 | out: | ||
577 | spin_unlock(&mm->page_table_lock); | ||
578 | } | 614 | } |
579 | 615 | ||
580 | int hash_huge_page(struct mm_struct *mm, unsigned long access, | 616 | int hash_huge_page(struct mm_struct *mm, unsigned long access, |
diff --git a/arch/ppc64/mm/imalloc.c b/arch/ppc64/mm/imalloc.c index b6e75b891ac0..c65b87b92756 100644 --- a/arch/ppc64/mm/imalloc.c +++ b/arch/ppc64/mm/imalloc.c | |||
@@ -31,7 +31,7 @@ static int get_free_im_addr(unsigned long size, unsigned long *im_addr) | |||
31 | break; | 31 | break; |
32 | if ((unsigned long)tmp->addr >= ioremap_bot) | 32 | if ((unsigned long)tmp->addr >= ioremap_bot) |
33 | addr = tmp->size + (unsigned long) tmp->addr; | 33 | addr = tmp->size + (unsigned long) tmp->addr; |
34 | if (addr > IMALLOC_END-size) | 34 | if (addr >= IMALLOC_END-size) |
35 | return 1; | 35 | return 1; |
36 | } | 36 | } |
37 | *im_addr = addr; | 37 | *im_addr = addr; |
diff --git a/arch/ppc64/mm/init.c b/arch/ppc64/mm/init.c index e58a24d42879..c02dc9809ca5 100644 --- a/arch/ppc64/mm/init.c +++ b/arch/ppc64/mm/init.c | |||
@@ -42,7 +42,6 @@ | |||
42 | 42 | ||
43 | #include <asm/pgalloc.h> | 43 | #include <asm/pgalloc.h> |
44 | #include <asm/page.h> | 44 | #include <asm/page.h> |
45 | #include <asm/abs_addr.h> | ||
46 | #include <asm/prom.h> | 45 | #include <asm/prom.h> |
47 | #include <asm/lmb.h> | 46 | #include <asm/lmb.h> |
48 | #include <asm/rtas.h> | 47 | #include <asm/rtas.h> |
@@ -66,6 +65,14 @@ | |||
66 | #include <asm/vdso.h> | 65 | #include <asm/vdso.h> |
67 | #include <asm/imalloc.h> | 66 | #include <asm/imalloc.h> |
68 | 67 | ||
68 | #if PGTABLE_RANGE > USER_VSID_RANGE | ||
69 | #warning Limited user VSID range means pagetable space is wasted | ||
70 | #endif | ||
71 | |||
72 | #if (TASK_SIZE_USER64 < PGTABLE_RANGE) && (TASK_SIZE_USER64 < USER_VSID_RANGE) | ||
73 | #warning TASK_SIZE is smaller than it needs to be. | ||
74 | #endif | ||
75 | |||
69 | int mem_init_done; | 76 | int mem_init_done; |
70 | unsigned long ioremap_bot = IMALLOC_BASE; | 77 | unsigned long ioremap_bot = IMALLOC_BASE; |
71 | static unsigned long phbs_io_bot = PHBS_IO_BASE; | 78 | static unsigned long phbs_io_bot = PHBS_IO_BASE; |
@@ -159,7 +166,6 @@ static int map_io_page(unsigned long ea, unsigned long pa, int flags) | |||
159 | ptep = pte_alloc_kernel(&init_mm, pmdp, ea); | 166 | ptep = pte_alloc_kernel(&init_mm, pmdp, ea); |
160 | if (!ptep) | 167 | if (!ptep) |
161 | return -ENOMEM; | 168 | return -ENOMEM; |
162 | pa = abs_to_phys(pa); | ||
163 | set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, | 169 | set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, |
164 | __pgprot(flags))); | 170 | __pgprot(flags))); |
165 | spin_unlock(&init_mm.page_table_lock); | 171 | spin_unlock(&init_mm.page_table_lock); |
@@ -226,7 +232,7 @@ void __iomem * __ioremap(unsigned long addr, unsigned long size, | |||
226 | * Before that, we map using addresses going | 232 | * Before that, we map using addresses going |
227 | * up from ioremap_bot. imalloc will use | 233 | * up from ioremap_bot. imalloc will use |
228 | * the addresses from ioremap_bot through | 234 | * the addresses from ioremap_bot through |
229 | * IMALLOC_END (0xE000001fffffffff) | 235 | * IMALLOC_END |
230 | * | 236 | * |
231 | */ | 237 | */ |
232 | pa = addr & PAGE_MASK; | 238 | pa = addr & PAGE_MASK; |
@@ -417,12 +423,6 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) | |||
417 | int index; | 423 | int index; |
418 | int err; | 424 | int err; |
419 | 425 | ||
420 | #ifdef CONFIG_HUGETLB_PAGE | ||
421 | /* We leave htlb_segs as it was, but for a fork, we need to | ||
422 | * clear the huge_pgdir. */ | ||
423 | mm->context.huge_pgdir = NULL; | ||
424 | #endif | ||
425 | |||
426 | again: | 426 | again: |
427 | if (!idr_pre_get(&mmu_context_idr, GFP_KERNEL)) | 427 | if (!idr_pre_get(&mmu_context_idr, GFP_KERNEL)) |
428 | return -ENOMEM; | 428 | return -ENOMEM; |
@@ -453,8 +453,6 @@ void destroy_context(struct mm_struct *mm) | |||
453 | spin_unlock(&mmu_context_lock); | 453 | spin_unlock(&mmu_context_lock); |
454 | 454 | ||
455 | mm->context.id = NO_CONTEXT; | 455 | mm->context.id = NO_CONTEXT; |
456 | |||
457 | hugetlb_mm_free_pgd(mm); | ||
458 | } | 456 | } |
459 | 457 | ||
460 | /* | 458 | /* |
@@ -484,9 +482,9 @@ void __init mm_init_ppc64(void) | |||
484 | for (i = 1; i < lmb.memory.cnt; i++) { | 482 | for (i = 1; i < lmb.memory.cnt; i++) { |
485 | unsigned long base, prevbase, prevsize; | 483 | unsigned long base, prevbase, prevsize; |
486 | 484 | ||
487 | prevbase = lmb.memory.region[i-1].physbase; | 485 | prevbase = lmb.memory.region[i-1].base; |
488 | prevsize = lmb.memory.region[i-1].size; | 486 | prevsize = lmb.memory.region[i-1].size; |
489 | base = lmb.memory.region[i].physbase; | 487 | base = lmb.memory.region[i].base; |
490 | if (base > (prevbase + prevsize)) { | 488 | if (base > (prevbase + prevsize)) { |
491 | io_hole_start = prevbase + prevsize; | 489 | io_hole_start = prevbase + prevsize; |
492 | io_hole_size = base - (prevbase + prevsize); | 490 | io_hole_size = base - (prevbase + prevsize); |
@@ -513,11 +511,8 @@ int page_is_ram(unsigned long pfn) | |||
513 | for (i=0; i < lmb.memory.cnt; i++) { | 511 | for (i=0; i < lmb.memory.cnt; i++) { |
514 | unsigned long base; | 512 | unsigned long base; |
515 | 513 | ||
516 | #ifdef CONFIG_MSCHUNKS | ||
517 | base = lmb.memory.region[i].physbase; | ||
518 | #else | ||
519 | base = lmb.memory.region[i].base; | 514 | base = lmb.memory.region[i].base; |
520 | #endif | 515 | |
521 | if ((paddr >= base) && | 516 | if ((paddr >= base) && |
522 | (paddr < (base + lmb.memory.region[i].size))) { | 517 | (paddr < (base + lmb.memory.region[i].size))) { |
523 | return 1; | 518 | return 1; |
@@ -547,7 +542,7 @@ void __init do_init_bootmem(void) | |||
547 | */ | 542 | */ |
548 | bootmap_pages = bootmem_bootmap_pages(total_pages); | 543 | bootmap_pages = bootmem_bootmap_pages(total_pages); |
549 | 544 | ||
550 | start = abs_to_phys(lmb_alloc(bootmap_pages<<PAGE_SHIFT, PAGE_SIZE)); | 545 | start = lmb_alloc(bootmap_pages<<PAGE_SHIFT, PAGE_SIZE); |
551 | BUG_ON(!start); | 546 | BUG_ON(!start); |
552 | 547 | ||
553 | boot_mapsize = init_bootmem(start >> PAGE_SHIFT, total_pages); | 548 | boot_mapsize = init_bootmem(start >> PAGE_SHIFT, total_pages); |
@@ -558,25 +553,25 @@ void __init do_init_bootmem(void) | |||
558 | * present. | 553 | * present. |
559 | */ | 554 | */ |
560 | for (i=0; i < lmb.memory.cnt; i++) { | 555 | for (i=0; i < lmb.memory.cnt; i++) { |
561 | unsigned long physbase, size; | 556 | unsigned long base, size; |
562 | unsigned long start_pfn, end_pfn; | 557 | unsigned long start_pfn, end_pfn; |
563 | 558 | ||
564 | physbase = lmb.memory.region[i].physbase; | 559 | base = lmb.memory.region[i].base; |
565 | size = lmb.memory.region[i].size; | 560 | size = lmb.memory.region[i].size; |
566 | 561 | ||
567 | start_pfn = physbase >> PAGE_SHIFT; | 562 | start_pfn = base >> PAGE_SHIFT; |
568 | end_pfn = start_pfn + (size >> PAGE_SHIFT); | 563 | end_pfn = start_pfn + (size >> PAGE_SHIFT); |
569 | memory_present(0, start_pfn, end_pfn); | 564 | memory_present(0, start_pfn, end_pfn); |
570 | 565 | ||
571 | free_bootmem(physbase, size); | 566 | free_bootmem(base, size); |
572 | } | 567 | } |
573 | 568 | ||
574 | /* reserve the sections we're already using */ | 569 | /* reserve the sections we're already using */ |
575 | for (i=0; i < lmb.reserved.cnt; i++) { | 570 | for (i=0; i < lmb.reserved.cnt; i++) { |
576 | unsigned long physbase = lmb.reserved.region[i].physbase; | 571 | unsigned long base = lmb.reserved.region[i].base; |
577 | unsigned long size = lmb.reserved.region[i].size; | 572 | unsigned long size = lmb.reserved.region[i].size; |
578 | 573 | ||
579 | reserve_bootmem(physbase, size); | 574 | reserve_bootmem(base, size); |
580 | } | 575 | } |
581 | } | 576 | } |
582 | 577 | ||
@@ -615,10 +610,10 @@ static int __init setup_kcore(void) | |||
615 | int i; | 610 | int i; |
616 | 611 | ||
617 | for (i=0; i < lmb.memory.cnt; i++) { | 612 | for (i=0; i < lmb.memory.cnt; i++) { |
618 | unsigned long physbase, size; | 613 | unsigned long base, size; |
619 | struct kcore_list *kcore_mem; | 614 | struct kcore_list *kcore_mem; |
620 | 615 | ||
621 | physbase = lmb.memory.region[i].physbase; | 616 | base = lmb.memory.region[i].base; |
622 | size = lmb.memory.region[i].size; | 617 | size = lmb.memory.region[i].size; |
623 | 618 | ||
624 | /* GFP_ATOMIC to avoid might_sleep warnings during boot */ | 619 | /* GFP_ATOMIC to avoid might_sleep warnings during boot */ |
@@ -626,7 +621,7 @@ static int __init setup_kcore(void) | |||
626 | if (!kcore_mem) | 621 | if (!kcore_mem) |
627 | panic("mem_init: kmalloc failed\n"); | 622 | panic("mem_init: kmalloc failed\n"); |
628 | 623 | ||
629 | kclist_add(kcore_mem, __va(physbase), size); | 624 | kclist_add(kcore_mem, __va(base), size); |
630 | } | 625 | } |
631 | 626 | ||
632 | kclist_add(&kcore_vmem, (void *)VMALLOC_START, VMALLOC_END-VMALLOC_START); | 627 | kclist_add(&kcore_vmem, (void *)VMALLOC_START, VMALLOC_END-VMALLOC_START); |
@@ -686,9 +681,6 @@ void __init mem_init(void) | |||
686 | 681 | ||
687 | mem_init_done = 1; | 682 | mem_init_done = 1; |
688 | 683 | ||
689 | #ifdef CONFIG_PPC_ISERIES | ||
690 | iommu_vio_init(); | ||
691 | #endif | ||
692 | /* Initialize the vDSO */ | 684 | /* Initialize the vDSO */ |
693 | vdso_init(); | 685 | vdso_init(); |
694 | } | 686 | } |
@@ -833,23 +825,43 @@ void __iomem * reserve_phb_iospace(unsigned long size) | |||
833 | return virt_addr; | 825 | return virt_addr; |
834 | } | 826 | } |
835 | 827 | ||
836 | kmem_cache_t *zero_cache; | 828 | static void zero_ctor(void *addr, kmem_cache_t *cache, unsigned long flags) |
837 | |||
838 | static void zero_ctor(void *pte, kmem_cache_t *cache, unsigned long flags) | ||
839 | { | 829 | { |
840 | memset(pte, 0, PAGE_SIZE); | 830 | memset(addr, 0, kmem_cache_size(cache)); |
841 | } | 831 | } |
842 | 832 | ||
833 | static const int pgtable_cache_size[2] = { | ||
834 | PTE_TABLE_SIZE, PMD_TABLE_SIZE | ||
835 | }; | ||
836 | static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = { | ||
837 | "pgd_pte_cache", "pud_pmd_cache", | ||
838 | }; | ||
839 | |||
840 | kmem_cache_t *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)]; | ||
841 | |||
843 | void pgtable_cache_init(void) | 842 | void pgtable_cache_init(void) |
844 | { | 843 | { |
845 | zero_cache = kmem_cache_create("zero", | 844 | int i; |
846 | PAGE_SIZE, | 845 | |
847 | 0, | 846 | BUILD_BUG_ON(PTE_TABLE_SIZE != pgtable_cache_size[PTE_CACHE_NUM]); |
848 | SLAB_HWCACHE_ALIGN | SLAB_MUST_HWCACHE_ALIGN, | 847 | BUILD_BUG_ON(PMD_TABLE_SIZE != pgtable_cache_size[PMD_CACHE_NUM]); |
849 | zero_ctor, | 848 | BUILD_BUG_ON(PUD_TABLE_SIZE != pgtable_cache_size[PUD_CACHE_NUM]); |
850 | NULL); | 849 | BUILD_BUG_ON(PGD_TABLE_SIZE != pgtable_cache_size[PGD_CACHE_NUM]); |
851 | if (!zero_cache) | 850 | |
852 | panic("pgtable_cache_init(): could not create zero_cache!\n"); | 851 | for (i = 0; i < ARRAY_SIZE(pgtable_cache_size); i++) { |
852 | int size = pgtable_cache_size[i]; | ||
853 | const char *name = pgtable_cache_name[i]; | ||
854 | |||
855 | pgtable_cache[i] = kmem_cache_create(name, | ||
856 | size, size, | ||
857 | SLAB_HWCACHE_ALIGN | ||
858 | | SLAB_MUST_HWCACHE_ALIGN, | ||
859 | zero_ctor, | ||
860 | NULL); | ||
861 | if (! pgtable_cache[i]) | ||
862 | panic("pgtable_cache_init(): could not create %s!\n", | ||
863 | name); | ||
864 | } | ||
853 | } | 865 | } |
854 | 866 | ||
855 | pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr, | 867 | pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr, |
diff --git a/arch/ppc64/mm/numa.c b/arch/ppc64/mm/numa.c index 0b191f2de016..c3116f0d788c 100644 --- a/arch/ppc64/mm/numa.c +++ b/arch/ppc64/mm/numa.c | |||
@@ -671,7 +671,7 @@ new_range: | |||
671 | * Mark reserved regions on this node | 671 | * Mark reserved regions on this node |
672 | */ | 672 | */ |
673 | for (i = 0; i < lmb.reserved.cnt; i++) { | 673 | for (i = 0; i < lmb.reserved.cnt; i++) { |
674 | unsigned long physbase = lmb.reserved.region[i].physbase; | 674 | unsigned long physbase = lmb.reserved.region[i].base; |
675 | unsigned long size = lmb.reserved.region[i].size; | 675 | unsigned long size = lmb.reserved.region[i].size; |
676 | 676 | ||
677 | if (pa_to_nid(physbase) != nid && | 677 | if (pa_to_nid(physbase) != nid && |
diff --git a/arch/ppc64/mm/slb_low.S b/arch/ppc64/mm/slb_low.S index 8379d678f70f..bab255889c58 100644 --- a/arch/ppc64/mm/slb_low.S +++ b/arch/ppc64/mm/slb_low.S | |||
@@ -89,28 +89,29 @@ END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE) | |||
89 | b 9f | 89 | b 9f |
90 | 90 | ||
91 | 0: /* user address: proto-VSID = context<<15 | ESID */ | 91 | 0: /* user address: proto-VSID = context<<15 | ESID */ |
92 | li r11,SLB_VSID_USER | 92 | srdi. r9,r3,USER_ESID_BITS |
93 | |||
94 | srdi. r9,r3,13 | ||
95 | bne- 8f /* invalid ea bits set */ | 93 | bne- 8f /* invalid ea bits set */ |
96 | 94 | ||
97 | #ifdef CONFIG_HUGETLB_PAGE | 95 | #ifdef CONFIG_HUGETLB_PAGE |
98 | BEGIN_FTR_SECTION | 96 | BEGIN_FTR_SECTION |
99 | /* check against the hugepage ranges */ | 97 | lhz r9,PACAHIGHHTLBAREAS(r13) |
100 | cmpldi r3,(TASK_HPAGE_END>>SID_SHIFT) | 98 | srdi r11,r3,(HTLB_AREA_SHIFT-SID_SHIFT) |
101 | bge 6f /* >= TASK_HPAGE_END */ | 99 | srd r9,r9,r11 |
102 | cmpldi r3,(TASK_HPAGE_BASE>>SID_SHIFT) | 100 | andi. r9,r9,1 |
103 | bge 5f /* TASK_HPAGE_BASE..TASK_HPAGE_END */ | 101 | bne 5f |
102 | |||
103 | li r11,SLB_VSID_USER | ||
104 | |||
104 | cmpldi r3,16 | 105 | cmpldi r3,16 |
105 | bge 6f /* 4GB..TASK_HPAGE_BASE */ | 106 | bge 6f |
106 | 107 | ||
107 | lhz r9,PACAHTLBSEGS(r13) | 108 | lhz r9,PACALOWHTLBAREAS(r13) |
108 | srd r9,r9,r3 | 109 | srd r9,r9,r3 |
109 | andi. r9,r9,1 | 110 | andi. r9,r9,1 |
111 | |||
110 | beq 6f | 112 | beq 6f |
111 | 113 | ||
112 | 5: /* this is a hugepage user address */ | 114 | 5: li r11,SLB_VSID_USER|SLB_VSID_L |
113 | li r11,(SLB_VSID_USER|SLB_VSID_L) | ||
114 | END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE) | 115 | END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE) |
115 | #endif /* CONFIG_HUGETLB_PAGE */ | 116 | #endif /* CONFIG_HUGETLB_PAGE */ |
116 | 117 | ||
diff --git a/arch/ppc64/mm/tlb.c b/arch/ppc64/mm/tlb.c index 26f0172c4527..d8a6593a13f0 100644 --- a/arch/ppc64/mm/tlb.c +++ b/arch/ppc64/mm/tlb.c | |||
@@ -41,7 +41,58 @@ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); | |||
41 | DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur); | 41 | DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur); |
42 | unsigned long pte_freelist_forced_free; | 42 | unsigned long pte_freelist_forced_free; |
43 | 43 | ||
44 | void __pte_free_tlb(struct mmu_gather *tlb, struct page *ptepage) | 44 | struct pte_freelist_batch |
45 | { | ||
46 | struct rcu_head rcu; | ||
47 | unsigned int index; | ||
48 | pgtable_free_t tables[0]; | ||
49 | }; | ||
50 | |||
51 | DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur); | ||
52 | unsigned long pte_freelist_forced_free; | ||
53 | |||
54 | #define PTE_FREELIST_SIZE \ | ||
55 | ((PAGE_SIZE - sizeof(struct pte_freelist_batch)) \ | ||
56 | / sizeof(pgtable_free_t)) | ||
57 | |||
58 | #ifdef CONFIG_SMP | ||
59 | static void pte_free_smp_sync(void *arg) | ||
60 | { | ||
61 | /* Do nothing, just ensure we sync with all CPUs */ | ||
62 | } | ||
63 | #endif | ||
64 | |||
65 | /* This is only called when we are critically out of memory | ||
66 | * (and fail to get a page in pte_free_tlb). | ||
67 | */ | ||
68 | static void pgtable_free_now(pgtable_free_t pgf) | ||
69 | { | ||
70 | pte_freelist_forced_free++; | ||
71 | |||
72 | smp_call_function(pte_free_smp_sync, NULL, 0, 1); | ||
73 | |||
74 | pgtable_free(pgf); | ||
75 | } | ||
76 | |||
77 | static void pte_free_rcu_callback(struct rcu_head *head) | ||
78 | { | ||
79 | struct pte_freelist_batch *batch = | ||
80 | container_of(head, struct pte_freelist_batch, rcu); | ||
81 | unsigned int i; | ||
82 | |||
83 | for (i = 0; i < batch->index; i++) | ||
84 | pgtable_free(batch->tables[i]); | ||
85 | |||
86 | free_page((unsigned long)batch); | ||
87 | } | ||
88 | |||
89 | static void pte_free_submit(struct pte_freelist_batch *batch) | ||
90 | { | ||
91 | INIT_RCU_HEAD(&batch->rcu); | ||
92 | call_rcu(&batch->rcu, pte_free_rcu_callback); | ||
93 | } | ||
94 | |||
95 | void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf) | ||
45 | { | 96 | { |
46 | /* This is safe as we are holding page_table_lock */ | 97 | /* This is safe as we are holding page_table_lock */ |
47 | cpumask_t local_cpumask = cpumask_of_cpu(smp_processor_id()); | 98 | cpumask_t local_cpumask = cpumask_of_cpu(smp_processor_id()); |
@@ -49,19 +100,19 @@ void __pte_free_tlb(struct mmu_gather *tlb, struct page *ptepage) | |||
49 | 100 | ||
50 | if (atomic_read(&tlb->mm->mm_users) < 2 || | 101 | if (atomic_read(&tlb->mm->mm_users) < 2 || |
51 | cpus_equal(tlb->mm->cpu_vm_mask, local_cpumask)) { | 102 | cpus_equal(tlb->mm->cpu_vm_mask, local_cpumask)) { |
52 | pte_free(ptepage); | 103 | pgtable_free(pgf); |
53 | return; | 104 | return; |
54 | } | 105 | } |
55 | 106 | ||
56 | if (*batchp == NULL) { | 107 | if (*batchp == NULL) { |
57 | *batchp = (struct pte_freelist_batch *)__get_free_page(GFP_ATOMIC); | 108 | *batchp = (struct pte_freelist_batch *)__get_free_page(GFP_ATOMIC); |
58 | if (*batchp == NULL) { | 109 | if (*batchp == NULL) { |
59 | pte_free_now(ptepage); | 110 | pgtable_free_now(pgf); |
60 | return; | 111 | return; |
61 | } | 112 | } |
62 | (*batchp)->index = 0; | 113 | (*batchp)->index = 0; |
63 | } | 114 | } |
64 | (*batchp)->pages[(*batchp)->index++] = ptepage; | 115 | (*batchp)->tables[(*batchp)->index++] = pgf; |
65 | if ((*batchp)->index == PTE_FREELIST_SIZE) { | 116 | if ((*batchp)->index == PTE_FREELIST_SIZE) { |
66 | pte_free_submit(*batchp); | 117 | pte_free_submit(*batchp); |
67 | *batchp = NULL; | 118 | *batchp = NULL; |
@@ -132,42 +183,6 @@ void __flush_tlb_pending(struct ppc64_tlb_batch *batch) | |||
132 | put_cpu(); | 183 | put_cpu(); |
133 | } | 184 | } |
134 | 185 | ||
135 | #ifdef CONFIG_SMP | ||
136 | static void pte_free_smp_sync(void *arg) | ||
137 | { | ||
138 | /* Do nothing, just ensure we sync with all CPUs */ | ||
139 | } | ||
140 | #endif | ||
141 | |||
142 | /* This is only called when we are critically out of memory | ||
143 | * (and fail to get a page in pte_free_tlb). | ||
144 | */ | ||
145 | void pte_free_now(struct page *ptepage) | ||
146 | { | ||
147 | pte_freelist_forced_free++; | ||
148 | |||
149 | smp_call_function(pte_free_smp_sync, NULL, 0, 1); | ||
150 | |||
151 | pte_free(ptepage); | ||
152 | } | ||
153 | |||
154 | static void pte_free_rcu_callback(struct rcu_head *head) | ||
155 | { | ||
156 | struct pte_freelist_batch *batch = | ||
157 | container_of(head, struct pte_freelist_batch, rcu); | ||
158 | unsigned int i; | ||
159 | |||
160 | for (i = 0; i < batch->index; i++) | ||
161 | pte_free(batch->pages[i]); | ||
162 | free_page((unsigned long)batch); | ||
163 | } | ||
164 | |||
165 | void pte_free_submit(struct pte_freelist_batch *batch) | ||
166 | { | ||
167 | INIT_RCU_HEAD(&batch->rcu); | ||
168 | call_rcu(&batch->rcu, pte_free_rcu_callback); | ||
169 | } | ||
170 | |||
171 | void pte_free_finish(void) | 186 | void pte_free_finish(void) |
172 | { | 187 | { |
173 | /* This is safe as we are holding page_table_lock */ | 188 | /* This is safe as we are holding page_table_lock */ |
diff --git a/arch/ppc64/oprofile/common.c b/arch/ppc64/oprofile/common.c index b28bfda23d94..4acd1a424933 100644 --- a/arch/ppc64/oprofile/common.c +++ b/arch/ppc64/oprofile/common.c | |||
@@ -153,6 +153,7 @@ int __init oprofile_arch_init(struct oprofile_operations *ops) | |||
153 | 153 | ||
154 | case PV_970: | 154 | case PV_970: |
155 | case PV_970FX: | 155 | case PV_970FX: |
156 | case PV_970MP: | ||
156 | model = &op_model_power4; | 157 | model = &op_model_power4; |
157 | model->num_counters = 8; | 158 | model->num_counters = 8; |
158 | ops->cpu_type = "ppc64/970"; | 159 | ops->cpu_type = "ppc64/970"; |
diff --git a/arch/ppc64/xmon/start.c b/arch/ppc64/xmon/start.c index a9265bcc79b2..f86b584acd76 100644 --- a/arch/ppc64/xmon/start.c +++ b/arch/ppc64/xmon/start.c | |||
@@ -27,7 +27,7 @@ static void sysrq_handle_xmon(int key, struct pt_regs *pt_regs, | |||
27 | struct tty_struct *tty) | 27 | struct tty_struct *tty) |
28 | { | 28 | { |
29 | /* ensure xmon is enabled */ | 29 | /* ensure xmon is enabled */ |
30 | xmon_init(); | 30 | xmon_init(1); |
31 | debugger(pt_regs); | 31 | debugger(pt_regs); |
32 | } | 32 | } |
33 | 33 | ||
diff --git a/arch/ppc64/xmon/xmon.c b/arch/ppc64/xmon/xmon.c index 05539439e6bc..45908b10acd3 100644 --- a/arch/ppc64/xmon/xmon.c +++ b/arch/ppc64/xmon/xmon.c | |||
@@ -2496,15 +2496,25 @@ static void dump_stab(void) | |||
2496 | } | 2496 | } |
2497 | } | 2497 | } |
2498 | 2498 | ||
2499 | void xmon_init(void) | 2499 | void xmon_init(int enable) |
2500 | { | 2500 | { |
2501 | __debugger = xmon; | 2501 | if (enable) { |
2502 | __debugger_ipi = xmon_ipi; | 2502 | __debugger = xmon; |
2503 | __debugger_bpt = xmon_bpt; | 2503 | __debugger_ipi = xmon_ipi; |
2504 | __debugger_sstep = xmon_sstep; | 2504 | __debugger_bpt = xmon_bpt; |
2505 | __debugger_iabr_match = xmon_iabr_match; | 2505 | __debugger_sstep = xmon_sstep; |
2506 | __debugger_dabr_match = xmon_dabr_match; | 2506 | __debugger_iabr_match = xmon_iabr_match; |
2507 | __debugger_fault_handler = xmon_fault_handler; | 2507 | __debugger_dabr_match = xmon_dabr_match; |
2508 | __debugger_fault_handler = xmon_fault_handler; | ||
2509 | } else { | ||
2510 | __debugger = NULL; | ||
2511 | __debugger_ipi = NULL; | ||
2512 | __debugger_bpt = NULL; | ||
2513 | __debugger_sstep = NULL; | ||
2514 | __debugger_iabr_match = NULL; | ||
2515 | __debugger_dabr_match = NULL; | ||
2516 | __debugger_fault_handler = NULL; | ||
2517 | } | ||
2508 | } | 2518 | } |
2509 | 2519 | ||
2510 | void dump_segments(void) | 2520 | void dump_segments(void) |
diff --git a/arch/sparc/kernel/setup.c b/arch/sparc/kernel/setup.c index 55352ed85e8a..53c192a4982f 100644 --- a/arch/sparc/kernel/setup.c +++ b/arch/sparc/kernel/setup.c | |||
@@ -32,7 +32,6 @@ | |||
32 | #include <linux/spinlock.h> | 32 | #include <linux/spinlock.h> |
33 | #include <linux/root_dev.h> | 33 | #include <linux/root_dev.h> |
34 | 34 | ||
35 | #include <asm/segment.h> | ||
36 | #include <asm/system.h> | 35 | #include <asm/system.h> |
37 | #include <asm/io.h> | 36 | #include <asm/io.h> |
38 | #include <asm/processor.h> | 37 | #include <asm/processor.h> |
diff --git a/arch/sparc/kernel/tick14.c b/arch/sparc/kernel/tick14.c index fd8005a3e6bd..591547af4c65 100644 --- a/arch/sparc/kernel/tick14.c +++ b/arch/sparc/kernel/tick14.c | |||
@@ -19,7 +19,6 @@ | |||
19 | #include <linux/interrupt.h> | 19 | #include <linux/interrupt.h> |
20 | 20 | ||
21 | #include <asm/oplib.h> | 21 | #include <asm/oplib.h> |
22 | #include <asm/segment.h> | ||
23 | #include <asm/timer.h> | 22 | #include <asm/timer.h> |
24 | #include <asm/mostek.h> | 23 | #include <asm/mostek.h> |
25 | #include <asm/system.h> | 24 | #include <asm/system.h> |
diff --git a/arch/sparc/kernel/time.c b/arch/sparc/kernel/time.c index 6486cbf2efe9..3b759aefc170 100644 --- a/arch/sparc/kernel/time.c +++ b/arch/sparc/kernel/time.c | |||
@@ -32,7 +32,6 @@ | |||
32 | #include <linux/profile.h> | 32 | #include <linux/profile.h> |
33 | 33 | ||
34 | #include <asm/oplib.h> | 34 | #include <asm/oplib.h> |
35 | #include <asm/segment.h> | ||
36 | #include <asm/timer.h> | 35 | #include <asm/timer.h> |
37 | #include <asm/mostek.h> | 36 | #include <asm/mostek.h> |
38 | #include <asm/system.h> | 37 | #include <asm/system.h> |
diff --git a/arch/sparc/mm/fault.c b/arch/sparc/mm/fault.c index 37f4107bae66..2bbd53f3cafb 100644 --- a/arch/sparc/mm/fault.c +++ b/arch/sparc/mm/fault.c | |||
@@ -23,7 +23,6 @@ | |||
23 | #include <linux/module.h> | 23 | #include <linux/module.h> |
24 | 24 | ||
25 | #include <asm/system.h> | 25 | #include <asm/system.h> |
26 | #include <asm/segment.h> | ||
27 | #include <asm/page.h> | 26 | #include <asm/page.h> |
28 | #include <asm/pgtable.h> | 27 | #include <asm/pgtable.h> |
29 | #include <asm/memreg.h> | 28 | #include <asm/memreg.h> |
diff --git a/arch/sparc/mm/init.c b/arch/sparc/mm/init.c index ec2e05028a10..c03babaa0498 100644 --- a/arch/sparc/mm/init.c +++ b/arch/sparc/mm/init.c | |||
@@ -25,7 +25,6 @@ | |||
25 | #include <linux/bootmem.h> | 25 | #include <linux/bootmem.h> |
26 | 26 | ||
27 | #include <asm/system.h> | 27 | #include <asm/system.h> |
28 | #include <asm/segment.h> | ||
29 | #include <asm/vac-ops.h> | 28 | #include <asm/vac-ops.h> |
30 | #include <asm/page.h> | 29 | #include <asm/page.h> |
31 | #include <asm/pgtable.h> | 30 | #include <asm/pgtable.h> |
diff --git a/arch/sparc64/kernel/entry.S b/arch/sparc64/kernel/entry.S index 88332f00094a..cecdc0a7521f 100644 --- a/arch/sparc64/kernel/entry.S +++ b/arch/sparc64/kernel/entry.S | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <asm/visasm.h> | 21 | #include <asm/visasm.h> |
22 | #include <asm/estate.h> | 22 | #include <asm/estate.h> |
23 | #include <asm/auxio.h> | 23 | #include <asm/auxio.h> |
24 | #include <asm/sfafsr.h> | ||
24 | 25 | ||
25 | #define curptr g6 | 26 | #define curptr g6 |
26 | 27 | ||
@@ -690,14 +691,159 @@ netbsd_syscall: | |||
690 | retl | 691 | retl |
691 | nop | 692 | nop |
692 | 693 | ||
693 | /* These next few routines must be sure to clear the | 694 | /* We need to carefully read the error status, ACK |
694 | * SFSR FaultValid bit so that the fast tlb data protection | 695 | * the errors, prevent recursive traps, and pass the |
695 | * handler does not flush the wrong context and lock up the | 696 | * information on to C code for logging. |
696 | * box. | 697 | * |
698 | * We pass the AFAR in as-is, and we encode the status | ||
699 | * information as described in asm-sparc64/sfafsr.h | ||
700 | */ | ||
701 | .globl __spitfire_access_error | ||
702 | __spitfire_access_error: | ||
703 | /* Disable ESTATE error reporting so that we do not | ||
704 | * take recursive traps and RED state the processor. | ||
705 | */ | ||
706 | stxa %g0, [%g0] ASI_ESTATE_ERROR_EN | ||
707 | membar #Sync | ||
708 | |||
709 | mov UDBE_UE, %g1 | ||
710 | ldxa [%g0] ASI_AFSR, %g4 ! Get AFSR | ||
711 | |||
712 | /* __spitfire_cee_trap branches here with AFSR in %g4 and | ||
713 | * UDBE_CE in %g1. It only clears ESTATE_ERR_CE in the | ||
714 | * ESTATE Error Enable register. | ||
715 | */ | ||
716 | __spitfire_cee_trap_continue: | ||
717 | ldxa [%g0] ASI_AFAR, %g5 ! Get AFAR | ||
718 | |||
719 | rdpr %tt, %g3 | ||
720 | and %g3, 0x1ff, %g3 ! Paranoia | ||
721 | sllx %g3, SFSTAT_TRAP_TYPE_SHIFT, %g3 | ||
722 | or %g4, %g3, %g4 | ||
723 | rdpr %tl, %g3 | ||
724 | cmp %g3, 1 | ||
725 | mov 1, %g3 | ||
726 | bleu %xcc, 1f | ||
727 | sllx %g3, SFSTAT_TL_GT_ONE_SHIFT, %g3 | ||
728 | |||
729 | or %g4, %g3, %g4 | ||
730 | |||
731 | /* Read in the UDB error register state, clearing the | ||
732 | * sticky error bits as-needed. We only clear them if | ||
733 | * the UE bit is set. Likewise, __spitfire_cee_trap | ||
734 | * below will only do so if the CE bit is set. | ||
735 | * | ||
736 | * NOTE: UltraSparc-I/II have high and low UDB error | ||
737 | * registers, corresponding to the two UDB units | ||
738 | * present on those chips. UltraSparc-IIi only | ||
739 | * has a single UDB, called "SDB" in the manual. | ||
740 | * For IIi the upper UDB register always reads | ||
741 | * as zero so for our purposes things will just | ||
742 | * work with the checks below. | ||
697 | */ | 743 | */ |
698 | .globl __do_data_access_exception | 744 | 1: ldxa [%g0] ASI_UDBH_ERROR_R, %g3 |
699 | .globl __do_data_access_exception_tl1 | 745 | and %g3, 0x3ff, %g7 ! Paranoia |
700 | __do_data_access_exception_tl1: | 746 | sllx %g7, SFSTAT_UDBH_SHIFT, %g7 |
747 | or %g4, %g7, %g4 | ||
748 | andcc %g3, %g1, %g3 ! UDBE_UE or UDBE_CE | ||
749 | be,pn %xcc, 1f | ||
750 | nop | ||
751 | stxa %g3, [%g0] ASI_UDB_ERROR_W | ||
752 | membar #Sync | ||
753 | |||
754 | 1: mov 0x18, %g3 | ||
755 | ldxa [%g3] ASI_UDBL_ERROR_R, %g3 | ||
756 | and %g3, 0x3ff, %g7 ! Paranoia | ||
757 | sllx %g7, SFSTAT_UDBL_SHIFT, %g7 | ||
758 | or %g4, %g7, %g4 | ||
759 | andcc %g3, %g1, %g3 ! UDBE_UE or UDBE_CE | ||
760 | be,pn %xcc, 1f | ||
761 | nop | ||
762 | mov 0x18, %g7 | ||
763 | stxa %g3, [%g7] ASI_UDB_ERROR_W | ||
764 | membar #Sync | ||
765 | |||
766 | 1: /* Ok, now that we've latched the error state, | ||
767 | * clear the sticky bits in the AFSR. | ||
768 | */ | ||
769 | stxa %g4, [%g0] ASI_AFSR | ||
770 | membar #Sync | ||
771 | |||
772 | rdpr %tl, %g2 | ||
773 | cmp %g2, 1 | ||
774 | rdpr %pil, %g2 | ||
775 | bleu,pt %xcc, 1f | ||
776 | wrpr %g0, 15, %pil | ||
777 | |||
778 | ba,pt %xcc, etraptl1 | ||
779 | rd %pc, %g7 | ||
780 | |||
781 | ba,pt %xcc, 2f | ||
782 | nop | ||
783 | |||
784 | 1: ba,pt %xcc, etrap_irq | ||
785 | rd %pc, %g7 | ||
786 | |||
787 | 2: mov %l4, %o1 | ||
788 | mov %l5, %o2 | ||
789 | call spitfire_access_error | ||
790 | add %sp, PTREGS_OFF, %o0 | ||
791 | ba,pt %xcc, rtrap | ||
792 | clr %l6 | ||
793 | |||
794 | /* This is the trap handler entry point for ECC correctable | ||
795 | * errors. They are corrected, but we listen for the trap | ||
796 | * so that the event can be logged. | ||
797 | * | ||
798 | * Disrupting errors are either: | ||
799 | * 1) single-bit ECC errors during UDB reads to system | ||
800 | * memory | ||
801 | * 2) data parity errors during write-back events | ||
802 | * | ||
803 | * As far as I can make out from the manual, the CEE trap | ||
804 | * is only for correctable errors during memory read | ||
805 | * accesses by the front-end of the processor. | ||
806 | * | ||
807 | * The code below is only for trap level 1 CEE events, | ||
808 | * as it is the only situation where we can safely record | ||
809 | * and log. For trap level >1 we just clear the CE bit | ||
810 | * in the AFSR and return. | ||
811 | * | ||
812 | * This is just like __spiftire_access_error above, but it | ||
813 | * specifically handles correctable errors. If an | ||
814 | * uncorrectable error is indicated in the AFSR we | ||
815 | * will branch directly above to __spitfire_access_error | ||
816 | * to handle it instead. Uncorrectable therefore takes | ||
817 | * priority over correctable, and the error logging | ||
818 | * C code will notice this case by inspecting the | ||
819 | * trap type. | ||
820 | */ | ||
821 | .globl __spitfire_cee_trap | ||
822 | __spitfire_cee_trap: | ||
823 | ldxa [%g0] ASI_AFSR, %g4 ! Get AFSR | ||
824 | mov 1, %g3 | ||
825 | sllx %g3, SFAFSR_UE_SHIFT, %g3 | ||
826 | andcc %g4, %g3, %g0 ! Check for UE | ||
827 | bne,pn %xcc, __spitfire_access_error | ||
828 | nop | ||
829 | |||
830 | /* Ok, in this case we only have a correctable error. | ||
831 | * Indicate we only wish to capture that state in register | ||
832 | * %g1, and we only disable CE error reporting unlike UE | ||
833 | * handling which disables all errors. | ||
834 | */ | ||
835 | ldxa [%g0] ASI_ESTATE_ERROR_EN, %g3 | ||
836 | andn %g3, ESTATE_ERR_CE, %g3 | ||
837 | stxa %g3, [%g0] ASI_ESTATE_ERROR_EN | ||
838 | membar #Sync | ||
839 | |||
840 | /* Preserve AFSR in %g4, indicate UDB state to capture in %g1 */ | ||
841 | ba,pt %xcc, __spitfire_cee_trap_continue | ||
842 | mov UDBE_CE, %g1 | ||
843 | |||
844 | .globl __spitfire_data_access_exception | ||
845 | .globl __spitfire_data_access_exception_tl1 | ||
846 | __spitfire_data_access_exception_tl1: | ||
701 | rdpr %pstate, %g4 | 847 | rdpr %pstate, %g4 |
702 | wrpr %g4, PSTATE_MG|PSTATE_AG, %pstate | 848 | wrpr %g4, PSTATE_MG|PSTATE_AG, %pstate |
703 | mov TLB_SFSR, %g3 | 849 | mov TLB_SFSR, %g3 |
@@ -706,9 +852,25 @@ __do_data_access_exception_tl1: | |||
706 | ldxa [%g5] ASI_DMMU, %g5 ! Get SFAR | 852 | ldxa [%g5] ASI_DMMU, %g5 ! Get SFAR |
707 | stxa %g0, [%g3] ASI_DMMU ! Clear SFSR.FaultValid bit | 853 | stxa %g0, [%g3] ASI_DMMU ! Clear SFSR.FaultValid bit |
708 | membar #Sync | 854 | membar #Sync |
855 | rdpr %tt, %g3 | ||
856 | cmp %g3, 0x80 ! first win spill/fill trap | ||
857 | blu,pn %xcc, 1f | ||
858 | cmp %g3, 0xff ! last win spill/fill trap | ||
859 | bgu,pn %xcc, 1f | ||
860 | nop | ||
709 | ba,pt %xcc, winfix_dax | 861 | ba,pt %xcc, winfix_dax |
710 | rdpr %tpc, %g3 | 862 | rdpr %tpc, %g3 |
711 | __do_data_access_exception: | 863 | 1: sethi %hi(109f), %g7 |
864 | ba,pt %xcc, etraptl1 | ||
865 | 109: or %g7, %lo(109b), %g7 | ||
866 | mov %l4, %o1 | ||
867 | mov %l5, %o2 | ||
868 | call spitfire_data_access_exception_tl1 | ||
869 | add %sp, PTREGS_OFF, %o0 | ||
870 | ba,pt %xcc, rtrap | ||
871 | clr %l6 | ||
872 | |||
873 | __spitfire_data_access_exception: | ||
712 | rdpr %pstate, %g4 | 874 | rdpr %pstate, %g4 |
713 | wrpr %g4, PSTATE_MG|PSTATE_AG, %pstate | 875 | wrpr %g4, PSTATE_MG|PSTATE_AG, %pstate |
714 | mov TLB_SFSR, %g3 | 876 | mov TLB_SFSR, %g3 |
@@ -722,20 +884,19 @@ __do_data_access_exception: | |||
722 | 109: or %g7, %lo(109b), %g7 | 884 | 109: or %g7, %lo(109b), %g7 |
723 | mov %l4, %o1 | 885 | mov %l4, %o1 |
724 | mov %l5, %o2 | 886 | mov %l5, %o2 |
725 | call data_access_exception | 887 | call spitfire_data_access_exception |
726 | add %sp, PTREGS_OFF, %o0 | 888 | add %sp, PTREGS_OFF, %o0 |
727 | ba,pt %xcc, rtrap | 889 | ba,pt %xcc, rtrap |
728 | clr %l6 | 890 | clr %l6 |
729 | 891 | ||
730 | .globl __do_instruction_access_exception | 892 | .globl __spitfire_insn_access_exception |
731 | .globl __do_instruction_access_exception_tl1 | 893 | .globl __spitfire_insn_access_exception_tl1 |
732 | __do_instruction_access_exception_tl1: | 894 | __spitfire_insn_access_exception_tl1: |
733 | rdpr %pstate, %g4 | 895 | rdpr %pstate, %g4 |
734 | wrpr %g4, PSTATE_MG|PSTATE_AG, %pstate | 896 | wrpr %g4, PSTATE_MG|PSTATE_AG, %pstate |
735 | mov TLB_SFSR, %g3 | 897 | mov TLB_SFSR, %g3 |
736 | mov DMMU_SFAR, %g5 | 898 | ldxa [%g3] ASI_IMMU, %g4 ! Get SFSR |
737 | ldxa [%g3] ASI_DMMU, %g4 ! Get SFSR | 899 | rdpr %tpc, %g5 ! IMMU has no SFAR, use TPC |
738 | ldxa [%g5] ASI_DMMU, %g5 ! Get SFAR | ||
739 | stxa %g0, [%g3] ASI_IMMU ! Clear FaultValid bit | 900 | stxa %g0, [%g3] ASI_IMMU ! Clear FaultValid bit |
740 | membar #Sync | 901 | membar #Sync |
741 | sethi %hi(109f), %g7 | 902 | sethi %hi(109f), %g7 |
@@ -743,18 +904,17 @@ __do_instruction_access_exception_tl1: | |||
743 | 109: or %g7, %lo(109b), %g7 | 904 | 109: or %g7, %lo(109b), %g7 |
744 | mov %l4, %o1 | 905 | mov %l4, %o1 |
745 | mov %l5, %o2 | 906 | mov %l5, %o2 |
746 | call instruction_access_exception_tl1 | 907 | call spitfire_insn_access_exception_tl1 |
747 | add %sp, PTREGS_OFF, %o0 | 908 | add %sp, PTREGS_OFF, %o0 |
748 | ba,pt %xcc, rtrap | 909 | ba,pt %xcc, rtrap |
749 | clr %l6 | 910 | clr %l6 |
750 | 911 | ||
751 | __do_instruction_access_exception: | 912 | __spitfire_insn_access_exception: |
752 | rdpr %pstate, %g4 | 913 | rdpr %pstate, %g4 |
753 | wrpr %g4, PSTATE_MG|PSTATE_AG, %pstate | 914 | wrpr %g4, PSTATE_MG|PSTATE_AG, %pstate |
754 | mov TLB_SFSR, %g3 | 915 | mov TLB_SFSR, %g3 |
755 | mov DMMU_SFAR, %g5 | 916 | ldxa [%g3] ASI_IMMU, %g4 ! Get SFSR |
756 | ldxa [%g3] ASI_DMMU, %g4 ! Get SFSR | 917 | rdpr %tpc, %g5 ! IMMU has no SFAR, use TPC |
757 | ldxa [%g5] ASI_DMMU, %g5 ! Get SFAR | ||
758 | stxa %g0, [%g3] ASI_IMMU ! Clear FaultValid bit | 918 | stxa %g0, [%g3] ASI_IMMU ! Clear FaultValid bit |
759 | membar #Sync | 919 | membar #Sync |
760 | sethi %hi(109f), %g7 | 920 | sethi %hi(109f), %g7 |
@@ -762,102 +922,11 @@ __do_instruction_access_exception: | |||
762 | 109: or %g7, %lo(109b), %g7 | 922 | 109: or %g7, %lo(109b), %g7 |
763 | mov %l4, %o1 | 923 | mov %l4, %o1 |
764 | mov %l5, %o2 | 924 | mov %l5, %o2 |
765 | call instruction_access_exception | 925 | call spitfire_insn_access_exception |
766 | add %sp, PTREGS_OFF, %o0 | 926 | add %sp, PTREGS_OFF, %o0 |
767 | ba,pt %xcc, rtrap | 927 | ba,pt %xcc, rtrap |
768 | clr %l6 | 928 | clr %l6 |
769 | 929 | ||
770 | /* This is the trap handler entry point for ECC correctable | ||
771 | * errors. They are corrected, but we listen for the trap | ||
772 | * so that the event can be logged. | ||
773 | * | ||
774 | * Disrupting errors are either: | ||
775 | * 1) single-bit ECC errors during UDB reads to system | ||
776 | * memory | ||
777 | * 2) data parity errors during write-back events | ||
778 | * | ||
779 | * As far as I can make out from the manual, the CEE trap | ||
780 | * is only for correctable errors during memory read | ||
781 | * accesses by the front-end of the processor. | ||
782 | * | ||
783 | * The code below is only for trap level 1 CEE events, | ||
784 | * as it is the only situation where we can safely record | ||
785 | * and log. For trap level >1 we just clear the CE bit | ||
786 | * in the AFSR and return. | ||
787 | */ | ||
788 | |||
789 | /* Our trap handling infrastructure allows us to preserve | ||
790 | * two 64-bit values during etrap for arguments to | ||
791 | * subsequent C code. Therefore we encode the information | ||
792 | * as follows: | ||
793 | * | ||
794 | * value 1) Full 64-bits of AFAR | ||
795 | * value 2) Low 33-bits of AFSR, then bits 33-->42 | ||
796 | * are UDBL error status and bits 43-->52 | ||
797 | * are UDBH error status | ||
798 | */ | ||
799 | .align 64 | ||
800 | .globl cee_trap | ||
801 | cee_trap: | ||
802 | ldxa [%g0] ASI_AFSR, %g1 ! Read AFSR | ||
803 | ldxa [%g0] ASI_AFAR, %g2 ! Read AFAR | ||
804 | sllx %g1, 31, %g1 ! Clear reserved bits | ||
805 | srlx %g1, 31, %g1 ! in AFSR | ||
806 | |||
807 | /* NOTE: UltraSparc-I/II have high and low UDB error | ||
808 | * registers, corresponding to the two UDB units | ||
809 | * present on those chips. UltraSparc-IIi only | ||
810 | * has a single UDB, called "SDB" in the manual. | ||
811 | * For IIi the upper UDB register always reads | ||
812 | * as zero so for our purposes things will just | ||
813 | * work with the checks below. | ||
814 | */ | ||
815 | ldxa [%g0] ASI_UDBL_ERROR_R, %g3 ! Read UDB-Low error status | ||
816 | andcc %g3, (1 << 8), %g4 ! Check CE bit | ||
817 | sllx %g3, (64 - 10), %g3 ! Clear reserved bits | ||
818 | srlx %g3, (64 - 10), %g3 ! in UDB-Low error status | ||
819 | |||
820 | sllx %g3, (33 + 0), %g3 ! Shift up to encoding area | ||
821 | or %g1, %g3, %g1 ! Or it in | ||
822 | be,pn %xcc, 1f ! Branch if CE bit was clear | ||
823 | nop | ||
824 | stxa %g4, [%g0] ASI_UDB_ERROR_W ! Clear CE sticky bit in UDBL | ||
825 | membar #Sync ! Synchronize ASI stores | ||
826 | 1: mov 0x18, %g5 ! Addr of UDB-High error status | ||
827 | ldxa [%g5] ASI_UDBH_ERROR_R, %g3 ! Read it | ||
828 | |||
829 | andcc %g3, (1 << 8), %g4 ! Check CE bit | ||
830 | sllx %g3, (64 - 10), %g3 ! Clear reserved bits | ||
831 | srlx %g3, (64 - 10), %g3 ! in UDB-High error status | ||
832 | sllx %g3, (33 + 10), %g3 ! Shift up to encoding area | ||
833 | or %g1, %g3, %g1 ! Or it in | ||
834 | be,pn %xcc, 1f ! Branch if CE bit was clear | ||
835 | nop | ||
836 | nop | ||
837 | |||
838 | stxa %g4, [%g5] ASI_UDB_ERROR_W ! Clear CE sticky bit in UDBH | ||
839 | membar #Sync ! Synchronize ASI stores | ||
840 | 1: mov 1, %g5 ! AFSR CE bit is | ||
841 | sllx %g5, 20, %g5 ! bit 20 | ||
842 | stxa %g5, [%g0] ASI_AFSR ! Clear CE sticky bit in AFSR | ||
843 | membar #Sync ! Synchronize ASI stores | ||
844 | sllx %g2, (64 - 41), %g2 ! Clear reserved bits | ||
845 | srlx %g2, (64 - 41), %g2 ! in latched AFAR | ||
846 | |||
847 | andn %g2, 0x0f, %g2 ! Finish resv bit clearing | ||
848 | mov %g1, %g4 ! Move AFSR+UDB* into save reg | ||
849 | mov %g2, %g5 ! Move AFAR into save reg | ||
850 | rdpr %pil, %g2 | ||
851 | wrpr %g0, 15, %pil | ||
852 | ba,pt %xcc, etrap_irq | ||
853 | rd %pc, %g7 | ||
854 | mov %l4, %o0 | ||
855 | |||
856 | mov %l5, %o1 | ||
857 | call cee_log | ||
858 | add %sp, PTREGS_OFF, %o2 | ||
859 | ba,a,pt %xcc, rtrap_irq | ||
860 | |||
861 | /* Capture I/D/E-cache state into per-cpu error scoreboard. | 930 | /* Capture I/D/E-cache state into per-cpu error scoreboard. |
862 | * | 931 | * |
863 | * %g1: (TL>=0) ? 1 : 0 | 932 | * %g1: (TL>=0) ? 1 : 0 |
diff --git a/arch/sparc64/kernel/pci_iommu.c b/arch/sparc64/kernel/pci_iommu.c index 2803bc7c2c79..425c60cfea19 100644 --- a/arch/sparc64/kernel/pci_iommu.c +++ b/arch/sparc64/kernel/pci_iommu.c | |||
@@ -466,7 +466,7 @@ do_flush_sync: | |||
466 | if (!limit) | 466 | if (!limit) |
467 | break; | 467 | break; |
468 | udelay(1); | 468 | udelay(1); |
469 | membar("#LoadLoad"); | 469 | rmb(); |
470 | } | 470 | } |
471 | if (!limit) | 471 | if (!limit) |
472 | printk(KERN_WARNING "pci_strbuf_flush: flushflag timeout " | 472 | printk(KERN_WARNING "pci_strbuf_flush: flushflag timeout " |
diff --git a/arch/sparc64/kernel/process.c b/arch/sparc64/kernel/process.c index 07424b075938..66255434128a 100644 --- a/arch/sparc64/kernel/process.c +++ b/arch/sparc64/kernel/process.c | |||
@@ -103,7 +103,7 @@ void cpu_idle(void) | |||
103 | * other cpus see our increasing idleness for the buddy | 103 | * other cpus see our increasing idleness for the buddy |
104 | * redistribution algorithm. -DaveM | 104 | * redistribution algorithm. -DaveM |
105 | */ | 105 | */ |
106 | membar("#StoreStore | #StoreLoad"); | 106 | membar_storeload_storestore(); |
107 | } | 107 | } |
108 | } | 108 | } |
109 | 109 | ||
diff --git a/arch/sparc64/kernel/sbus.c b/arch/sparc64/kernel/sbus.c index 89f5e019f24c..e09ddf927655 100644 --- a/arch/sparc64/kernel/sbus.c +++ b/arch/sparc64/kernel/sbus.c | |||
@@ -147,7 +147,7 @@ static void sbus_strbuf_flush(struct sbus_iommu *iommu, u32 base, unsigned long | |||
147 | if (!limit) | 147 | if (!limit) |
148 | break; | 148 | break; |
149 | udelay(1); | 149 | udelay(1); |
150 | membar("#LoadLoad"); | 150 | rmb(); |
151 | } | 151 | } |
152 | if (!limit) | 152 | if (!limit) |
153 | printk(KERN_WARNING "sbus_strbuf_flush: flushflag timeout " | 153 | printk(KERN_WARNING "sbus_strbuf_flush: flushflag timeout " |
diff --git a/arch/sparc64/kernel/setup.c b/arch/sparc64/kernel/setup.c index b7e6a91952b2..fbdfed3798d8 100644 --- a/arch/sparc64/kernel/setup.c +++ b/arch/sparc64/kernel/setup.c | |||
@@ -33,7 +33,6 @@ | |||
33 | #include <linux/cpu.h> | 33 | #include <linux/cpu.h> |
34 | #include <linux/initrd.h> | 34 | #include <linux/initrd.h> |
35 | 35 | ||
36 | #include <asm/segment.h> | ||
37 | #include <asm/system.h> | 36 | #include <asm/system.h> |
38 | #include <asm/io.h> | 37 | #include <asm/io.h> |
39 | #include <asm/processor.h> | 38 | #include <asm/processor.h> |
diff --git a/arch/sparc64/kernel/signal32.c b/arch/sparc64/kernel/signal32.c index b1ed23091fbb..aecccd0df1d1 100644 --- a/arch/sparc64/kernel/signal32.c +++ b/arch/sparc64/kernel/signal32.c | |||
@@ -877,11 +877,12 @@ static void new_setup_frame32(struct k_sigaction *ka, struct pt_regs *regs, | |||
877 | unsigned long page = (unsigned long) | 877 | unsigned long page = (unsigned long) |
878 | page_address(pte_page(*ptep)); | 878 | page_address(pte_page(*ptep)); |
879 | 879 | ||
880 | __asm__ __volatile__( | 880 | wmb(); |
881 | " membar #StoreStore\n" | 881 | __asm__ __volatile__("flush %0 + %1" |
882 | " flush %0 + %1" | 882 | : /* no outputs */ |
883 | : : "r" (page), "r" (address & (PAGE_SIZE - 1)) | 883 | : "r" (page), |
884 | : "memory"); | 884 | "r" (address & (PAGE_SIZE - 1)) |
885 | : "memory"); | ||
885 | } | 886 | } |
886 | pte_unmap(ptep); | 887 | pte_unmap(ptep); |
887 | preempt_enable(); | 888 | preempt_enable(); |
@@ -1292,11 +1293,12 @@ static void setup_rt_frame32(struct k_sigaction *ka, struct pt_regs *regs, | |||
1292 | unsigned long page = (unsigned long) | 1293 | unsigned long page = (unsigned long) |
1293 | page_address(pte_page(*ptep)); | 1294 | page_address(pte_page(*ptep)); |
1294 | 1295 | ||
1295 | __asm__ __volatile__( | 1296 | wmb(); |
1296 | " membar #StoreStore\n" | 1297 | __asm__ __volatile__("flush %0 + %1" |
1297 | " flush %0 + %1" | 1298 | : /* no outputs */ |
1298 | : : "r" (page), "r" (address & (PAGE_SIZE - 1)) | 1299 | : "r" (page), |
1299 | : "memory"); | 1300 | "r" (address & (PAGE_SIZE - 1)) |
1301 | : "memory"); | ||
1300 | } | 1302 | } |
1301 | pte_unmap(ptep); | 1303 | pte_unmap(ptep); |
1302 | preempt_enable(); | 1304 | preempt_enable(); |
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c index b9b42491e118..b4fc6a5462b2 100644 --- a/arch/sparc64/kernel/smp.c +++ b/arch/sparc64/kernel/smp.c | |||
@@ -144,7 +144,7 @@ void __init smp_callin(void) | |||
144 | current->active_mm = &init_mm; | 144 | current->active_mm = &init_mm; |
145 | 145 | ||
146 | while (!cpu_isset(cpuid, smp_commenced_mask)) | 146 | while (!cpu_isset(cpuid, smp_commenced_mask)) |
147 | membar("#LoadLoad"); | 147 | rmb(); |
148 | 148 | ||
149 | cpu_set(cpuid, cpu_online_map); | 149 | cpu_set(cpuid, cpu_online_map); |
150 | } | 150 | } |
@@ -184,11 +184,11 @@ static inline long get_delta (long *rt, long *master) | |||
184 | for (i = 0; i < NUM_ITERS; i++) { | 184 | for (i = 0; i < NUM_ITERS; i++) { |
185 | t0 = tick_ops->get_tick(); | 185 | t0 = tick_ops->get_tick(); |
186 | go[MASTER] = 1; | 186 | go[MASTER] = 1; |
187 | membar("#StoreLoad"); | 187 | membar_storeload(); |
188 | while (!(tm = go[SLAVE])) | 188 | while (!(tm = go[SLAVE])) |
189 | membar("#LoadLoad"); | 189 | rmb(); |
190 | go[SLAVE] = 0; | 190 | go[SLAVE] = 0; |
191 | membar("#StoreStore"); | 191 | wmb(); |
192 | t1 = tick_ops->get_tick(); | 192 | t1 = tick_ops->get_tick(); |
193 | 193 | ||
194 | if (t1 - t0 < best_t1 - best_t0) | 194 | if (t1 - t0 < best_t1 - best_t0) |
@@ -221,7 +221,7 @@ void smp_synchronize_tick_client(void) | |||
221 | go[MASTER] = 1; | 221 | go[MASTER] = 1; |
222 | 222 | ||
223 | while (go[MASTER]) | 223 | while (go[MASTER]) |
224 | membar("#LoadLoad"); | 224 | rmb(); |
225 | 225 | ||
226 | local_irq_save(flags); | 226 | local_irq_save(flags); |
227 | { | 227 | { |
@@ -273,21 +273,21 @@ static void smp_synchronize_one_tick(int cpu) | |||
273 | 273 | ||
274 | /* wait for client to be ready */ | 274 | /* wait for client to be ready */ |
275 | while (!go[MASTER]) | 275 | while (!go[MASTER]) |
276 | membar("#LoadLoad"); | 276 | rmb(); |
277 | 277 | ||
278 | /* now let the client proceed into his loop */ | 278 | /* now let the client proceed into his loop */ |
279 | go[MASTER] = 0; | 279 | go[MASTER] = 0; |
280 | membar("#StoreLoad"); | 280 | membar_storeload(); |
281 | 281 | ||
282 | spin_lock_irqsave(&itc_sync_lock, flags); | 282 | spin_lock_irqsave(&itc_sync_lock, flags); |
283 | { | 283 | { |
284 | for (i = 0; i < NUM_ROUNDS*NUM_ITERS; i++) { | 284 | for (i = 0; i < NUM_ROUNDS*NUM_ITERS; i++) { |
285 | while (!go[MASTER]) | 285 | while (!go[MASTER]) |
286 | membar("#LoadLoad"); | 286 | rmb(); |
287 | go[MASTER] = 0; | 287 | go[MASTER] = 0; |
288 | membar("#StoreStore"); | 288 | wmb(); |
289 | go[SLAVE] = tick_ops->get_tick(); | 289 | go[SLAVE] = tick_ops->get_tick(); |
290 | membar("#StoreLoad"); | 290 | membar_storeload(); |
291 | } | 291 | } |
292 | } | 292 | } |
293 | spin_unlock_irqrestore(&itc_sync_lock, flags); | 293 | spin_unlock_irqrestore(&itc_sync_lock, flags); |
@@ -927,11 +927,11 @@ void smp_capture(void) | |||
927 | smp_processor_id()); | 927 | smp_processor_id()); |
928 | #endif | 928 | #endif |
929 | penguins_are_doing_time = 1; | 929 | penguins_are_doing_time = 1; |
930 | membar("#StoreStore | #LoadStore"); | 930 | membar_storestore_loadstore(); |
931 | atomic_inc(&smp_capture_registry); | 931 | atomic_inc(&smp_capture_registry); |
932 | smp_cross_call(&xcall_capture, 0, 0, 0); | 932 | smp_cross_call(&xcall_capture, 0, 0, 0); |
933 | while (atomic_read(&smp_capture_registry) != ncpus) | 933 | while (atomic_read(&smp_capture_registry) != ncpus) |
934 | membar("#LoadLoad"); | 934 | rmb(); |
935 | #ifdef CAPTURE_DEBUG | 935 | #ifdef CAPTURE_DEBUG |
936 | printk("done\n"); | 936 | printk("done\n"); |
937 | #endif | 937 | #endif |
@@ -947,7 +947,7 @@ void smp_release(void) | |||
947 | smp_processor_id()); | 947 | smp_processor_id()); |
948 | #endif | 948 | #endif |
949 | penguins_are_doing_time = 0; | 949 | penguins_are_doing_time = 0; |
950 | membar("#StoreStore | #StoreLoad"); | 950 | membar_storeload_storestore(); |
951 | atomic_dec(&smp_capture_registry); | 951 | atomic_dec(&smp_capture_registry); |
952 | } | 952 | } |
953 | } | 953 | } |
@@ -970,9 +970,9 @@ void smp_penguin_jailcell(int irq, struct pt_regs *regs) | |||
970 | save_alternate_globals(global_save); | 970 | save_alternate_globals(global_save); |
971 | prom_world(1); | 971 | prom_world(1); |
972 | atomic_inc(&smp_capture_registry); | 972 | atomic_inc(&smp_capture_registry); |
973 | membar("#StoreLoad | #StoreStore"); | 973 | membar_storeload_storestore(); |
974 | while (penguins_are_doing_time) | 974 | while (penguins_are_doing_time) |
975 | membar("#LoadLoad"); | 975 | rmb(); |
976 | restore_alternate_globals(global_save); | 976 | restore_alternate_globals(global_save); |
977 | atomic_dec(&smp_capture_registry); | 977 | atomic_dec(&smp_capture_registry); |
978 | prom_world(0); | 978 | prom_world(0); |
diff --git a/arch/sparc64/kernel/sparc64_ksyms.c b/arch/sparc64/kernel/sparc64_ksyms.c index 9202d925a9ce..a3ea697f1adb 100644 --- a/arch/sparc64/kernel/sparc64_ksyms.c +++ b/arch/sparc64/kernel/sparc64_ksyms.c | |||
@@ -99,17 +99,6 @@ extern int __ashrdi3(int, int); | |||
99 | extern void dump_thread(struct pt_regs *, struct user *); | 99 | extern void dump_thread(struct pt_regs *, struct user *); |
100 | extern int dump_fpu (struct pt_regs * regs, elf_fpregset_t * fpregs); | 100 | extern int dump_fpu (struct pt_regs * regs, elf_fpregset_t * fpregs); |
101 | 101 | ||
102 | #if defined(CONFIG_SMP) && defined(CONFIG_DEBUG_SPINLOCK) | ||
103 | extern void _do_spin_lock (spinlock_t *lock, char *str); | ||
104 | extern void _do_spin_unlock (spinlock_t *lock); | ||
105 | extern int _spin_trylock (spinlock_t *lock); | ||
106 | extern void _do_read_lock(rwlock_t *rw, char *str); | ||
107 | extern void _do_read_unlock(rwlock_t *rw, char *str); | ||
108 | extern void _do_write_lock(rwlock_t *rw, char *str); | ||
109 | extern void _do_write_unlock(rwlock_t *rw); | ||
110 | extern int _do_write_trylock(rwlock_t *rw, char *str); | ||
111 | #endif | ||
112 | |||
113 | extern unsigned long phys_base; | 102 | extern unsigned long phys_base; |
114 | extern unsigned long pfn_base; | 103 | extern unsigned long pfn_base; |
115 | 104 | ||
@@ -152,18 +141,6 @@ EXPORT_SYMBOL(_mcount); | |||
152 | EXPORT_SYMBOL(cpu_online_map); | 141 | EXPORT_SYMBOL(cpu_online_map); |
153 | EXPORT_SYMBOL(phys_cpu_present_map); | 142 | EXPORT_SYMBOL(phys_cpu_present_map); |
154 | 143 | ||
155 | /* Spinlock debugging library, optional. */ | ||
156 | #ifdef CONFIG_DEBUG_SPINLOCK | ||
157 | EXPORT_SYMBOL(_do_spin_lock); | ||
158 | EXPORT_SYMBOL(_do_spin_unlock); | ||
159 | EXPORT_SYMBOL(_spin_trylock); | ||
160 | EXPORT_SYMBOL(_do_read_lock); | ||
161 | EXPORT_SYMBOL(_do_read_unlock); | ||
162 | EXPORT_SYMBOL(_do_write_lock); | ||
163 | EXPORT_SYMBOL(_do_write_unlock); | ||
164 | EXPORT_SYMBOL(_do_write_trylock); | ||
165 | #endif | ||
166 | |||
167 | EXPORT_SYMBOL(smp_call_function); | 144 | EXPORT_SYMBOL(smp_call_function); |
168 | #endif /* CONFIG_SMP */ | 145 | #endif /* CONFIG_SMP */ |
169 | 146 | ||
@@ -429,3 +406,12 @@ EXPORT_SYMBOL(xor_vis_4); | |||
429 | EXPORT_SYMBOL(xor_vis_5); | 406 | EXPORT_SYMBOL(xor_vis_5); |
430 | 407 | ||
431 | EXPORT_SYMBOL(prom_palette); | 408 | EXPORT_SYMBOL(prom_palette); |
409 | |||
410 | /* memory barriers */ | ||
411 | EXPORT_SYMBOL(mb); | ||
412 | EXPORT_SYMBOL(rmb); | ||
413 | EXPORT_SYMBOL(wmb); | ||
414 | EXPORT_SYMBOL(membar_storeload); | ||
415 | EXPORT_SYMBOL(membar_storeload_storestore); | ||
416 | EXPORT_SYMBOL(membar_storeload_loadload); | ||
417 | EXPORT_SYMBOL(membar_storestore_loadstore); | ||
diff --git a/arch/sparc64/kernel/traps.c b/arch/sparc64/kernel/traps.c index 0c9e54b2f0c8..b280b2ef674f 100644 --- a/arch/sparc64/kernel/traps.c +++ b/arch/sparc64/kernel/traps.c | |||
@@ -33,6 +33,7 @@ | |||
33 | #include <asm/dcu.h> | 33 | #include <asm/dcu.h> |
34 | #include <asm/estate.h> | 34 | #include <asm/estate.h> |
35 | #include <asm/chafsr.h> | 35 | #include <asm/chafsr.h> |
36 | #include <asm/sfafsr.h> | ||
36 | #include <asm/psrcompat.h> | 37 | #include <asm/psrcompat.h> |
37 | #include <asm/processor.h> | 38 | #include <asm/processor.h> |
38 | #include <asm/timer.h> | 39 | #include <asm/timer.h> |
@@ -143,8 +144,7 @@ void do_BUG(const char *file, int line) | |||
143 | } | 144 | } |
144 | #endif | 145 | #endif |
145 | 146 | ||
146 | void instruction_access_exception(struct pt_regs *regs, | 147 | void spitfire_insn_access_exception(struct pt_regs *regs, unsigned long sfsr, unsigned long sfar) |
147 | unsigned long sfsr, unsigned long sfar) | ||
148 | { | 148 | { |
149 | siginfo_t info; | 149 | siginfo_t info; |
150 | 150 | ||
@@ -153,8 +153,8 @@ void instruction_access_exception(struct pt_regs *regs, | |||
153 | return; | 153 | return; |
154 | 154 | ||
155 | if (regs->tstate & TSTATE_PRIV) { | 155 | if (regs->tstate & TSTATE_PRIV) { |
156 | printk("instruction_access_exception: SFSR[%016lx] SFAR[%016lx], going.\n", | 156 | printk("spitfire_insn_access_exception: SFSR[%016lx] " |
157 | sfsr, sfar); | 157 | "SFAR[%016lx], going.\n", sfsr, sfar); |
158 | die_if_kernel("Iax", regs); | 158 | die_if_kernel("Iax", regs); |
159 | } | 159 | } |
160 | if (test_thread_flag(TIF_32BIT)) { | 160 | if (test_thread_flag(TIF_32BIT)) { |
@@ -169,19 +169,17 @@ void instruction_access_exception(struct pt_regs *regs, | |||
169 | force_sig_info(SIGSEGV, &info, current); | 169 | force_sig_info(SIGSEGV, &info, current); |
170 | } | 170 | } |
171 | 171 | ||
172 | void instruction_access_exception_tl1(struct pt_regs *regs, | 172 | void spitfire_insn_access_exception_tl1(struct pt_regs *regs, unsigned long sfsr, unsigned long sfar) |
173 | unsigned long sfsr, unsigned long sfar) | ||
174 | { | 173 | { |
175 | if (notify_die(DIE_TRAP_TL1, "instruction access exception tl1", regs, | 174 | if (notify_die(DIE_TRAP_TL1, "instruction access exception tl1", regs, |
176 | 0, 0x8, SIGTRAP) == NOTIFY_STOP) | 175 | 0, 0x8, SIGTRAP) == NOTIFY_STOP) |
177 | return; | 176 | return; |
178 | 177 | ||
179 | dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); | 178 | dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); |
180 | instruction_access_exception(regs, sfsr, sfar); | 179 | spitfire_insn_access_exception(regs, sfsr, sfar); |
181 | } | 180 | } |
182 | 181 | ||
183 | void data_access_exception(struct pt_regs *regs, | 182 | void spitfire_data_access_exception(struct pt_regs *regs, unsigned long sfsr, unsigned long sfar) |
184 | unsigned long sfsr, unsigned long sfar) | ||
185 | { | 183 | { |
186 | siginfo_t info; | 184 | siginfo_t info; |
187 | 185 | ||
@@ -207,8 +205,8 @@ void data_access_exception(struct pt_regs *regs, | |||
207 | return; | 205 | return; |
208 | } | 206 | } |
209 | /* Shit... */ | 207 | /* Shit... */ |
210 | printk("data_access_exception: SFSR[%016lx] SFAR[%016lx], going.\n", | 208 | printk("spitfire_data_access_exception: SFSR[%016lx] " |
211 | sfsr, sfar); | 209 | "SFAR[%016lx], going.\n", sfsr, sfar); |
212 | die_if_kernel("Dax", regs); | 210 | die_if_kernel("Dax", regs); |
213 | } | 211 | } |
214 | 212 | ||
@@ -220,6 +218,16 @@ void data_access_exception(struct pt_regs *regs, | |||
220 | force_sig_info(SIGSEGV, &info, current); | 218 | force_sig_info(SIGSEGV, &info, current); |
221 | } | 219 | } |
222 | 220 | ||
221 | void spitfire_data_access_exception_tl1(struct pt_regs *regs, unsigned long sfsr, unsigned long sfar) | ||
222 | { | ||
223 | if (notify_die(DIE_TRAP_TL1, "data access exception tl1", regs, | ||
224 | 0, 0x30, SIGTRAP) == NOTIFY_STOP) | ||
225 | return; | ||
226 | |||
227 | dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); | ||
228 | spitfire_data_access_exception(regs, sfsr, sfar); | ||
229 | } | ||
230 | |||
223 | #ifdef CONFIG_PCI | 231 | #ifdef CONFIG_PCI |
224 | /* This is really pathetic... */ | 232 | /* This is really pathetic... */ |
225 | extern volatile int pci_poke_in_progress; | 233 | extern volatile int pci_poke_in_progress; |
@@ -253,54 +261,13 @@ static void spitfire_clean_and_reenable_l1_caches(void) | |||
253 | : "memory"); | 261 | : "memory"); |
254 | } | 262 | } |
255 | 263 | ||
256 | void do_iae(struct pt_regs *regs) | 264 | static void spitfire_enable_estate_errors(void) |
257 | { | 265 | { |
258 | siginfo_t info; | 266 | __asm__ __volatile__("stxa %0, [%%g0] %1\n\t" |
259 | 267 | "membar #Sync" | |
260 | spitfire_clean_and_reenable_l1_caches(); | 268 | : /* no outputs */ |
261 | 269 | : "r" (ESTATE_ERR_ALL), | |
262 | if (notify_die(DIE_TRAP, "instruction access exception", regs, | 270 | "i" (ASI_ESTATE_ERROR_EN)); |
263 | 0, 0x8, SIGTRAP) == NOTIFY_STOP) | ||
264 | return; | ||
265 | |||
266 | info.si_signo = SIGBUS; | ||
267 | info.si_errno = 0; | ||
268 | info.si_code = BUS_OBJERR; | ||
269 | info.si_addr = (void *)0; | ||
270 | info.si_trapno = 0; | ||
271 | force_sig_info(SIGBUS, &info, current); | ||
272 | } | ||
273 | |||
274 | void do_dae(struct pt_regs *regs) | ||
275 | { | ||
276 | siginfo_t info; | ||
277 | |||
278 | #ifdef CONFIG_PCI | ||
279 | if (pci_poke_in_progress && pci_poke_cpu == smp_processor_id()) { | ||
280 | spitfire_clean_and_reenable_l1_caches(); | ||
281 | |||
282 | pci_poke_faulted = 1; | ||
283 | |||
284 | /* Why the fuck did they have to change this? */ | ||
285 | if (tlb_type == cheetah || tlb_type == cheetah_plus) | ||
286 | regs->tpc += 4; | ||
287 | |||
288 | regs->tnpc = regs->tpc + 4; | ||
289 | return; | ||
290 | } | ||
291 | #endif | ||
292 | spitfire_clean_and_reenable_l1_caches(); | ||
293 | |||
294 | if (notify_die(DIE_TRAP, "data access exception", regs, | ||
295 | 0, 0x30, SIGTRAP) == NOTIFY_STOP) | ||
296 | return; | ||
297 | |||
298 | info.si_signo = SIGBUS; | ||
299 | info.si_errno = 0; | ||
300 | info.si_code = BUS_OBJERR; | ||
301 | info.si_addr = (void *)0; | ||
302 | info.si_trapno = 0; | ||
303 | force_sig_info(SIGBUS, &info, current); | ||
304 | } | 271 | } |
305 | 272 | ||
306 | static char ecc_syndrome_table[] = { | 273 | static char ecc_syndrome_table[] = { |
@@ -338,65 +305,15 @@ static char ecc_syndrome_table[] = { | |||
338 | 0x0b, 0x48, 0x48, 0x4b, 0x48, 0x4b, 0x4b, 0x4a | 305 | 0x0b, 0x48, 0x48, 0x4b, 0x48, 0x4b, 0x4b, 0x4a |
339 | }; | 306 | }; |
340 | 307 | ||
341 | /* cee_trap in entry.S encodes AFSR/UDBH/UDBL error status | ||
342 | * in the following format. The AFAR is left as is, with | ||
343 | * reserved bits cleared, and is a raw 40-bit physical | ||
344 | * address. | ||
345 | */ | ||
346 | #define CE_STATUS_UDBH_UE (1UL << (43 + 9)) | ||
347 | #define CE_STATUS_UDBH_CE (1UL << (43 + 8)) | ||
348 | #define CE_STATUS_UDBH_ESYNDR (0xffUL << 43) | ||
349 | #define CE_STATUS_UDBH_SHIFT 43 | ||
350 | #define CE_STATUS_UDBL_UE (1UL << (33 + 9)) | ||
351 | #define CE_STATUS_UDBL_CE (1UL << (33 + 8)) | ||
352 | #define CE_STATUS_UDBL_ESYNDR (0xffUL << 33) | ||
353 | #define CE_STATUS_UDBL_SHIFT 33 | ||
354 | #define CE_STATUS_AFSR_MASK (0x1ffffffffUL) | ||
355 | #define CE_STATUS_AFSR_ME (1UL << 32) | ||
356 | #define CE_STATUS_AFSR_PRIV (1UL << 31) | ||
357 | #define CE_STATUS_AFSR_ISAP (1UL << 30) | ||
358 | #define CE_STATUS_AFSR_ETP (1UL << 29) | ||
359 | #define CE_STATUS_AFSR_IVUE (1UL << 28) | ||
360 | #define CE_STATUS_AFSR_TO (1UL << 27) | ||
361 | #define CE_STATUS_AFSR_BERR (1UL << 26) | ||
362 | #define CE_STATUS_AFSR_LDP (1UL << 25) | ||
363 | #define CE_STATUS_AFSR_CP (1UL << 24) | ||
364 | #define CE_STATUS_AFSR_WP (1UL << 23) | ||
365 | #define CE_STATUS_AFSR_EDP (1UL << 22) | ||
366 | #define CE_STATUS_AFSR_UE (1UL << 21) | ||
367 | #define CE_STATUS_AFSR_CE (1UL << 20) | ||
368 | #define CE_STATUS_AFSR_ETS (0xfUL << 16) | ||
369 | #define CE_STATUS_AFSR_ETS_SHIFT 16 | ||
370 | #define CE_STATUS_AFSR_PSYND (0xffffUL << 0) | ||
371 | #define CE_STATUS_AFSR_PSYND_SHIFT 0 | ||
372 | |||
373 | /* Layout of Ecache TAG Parity Syndrome of AFSR */ | ||
374 | #define AFSR_ETSYNDROME_7_0 0x1UL /* E$-tag bus bits <7:0> */ | ||
375 | #define AFSR_ETSYNDROME_15_8 0x2UL /* E$-tag bus bits <15:8> */ | ||
376 | #define AFSR_ETSYNDROME_21_16 0x4UL /* E$-tag bus bits <21:16> */ | ||
377 | #define AFSR_ETSYNDROME_24_22 0x8UL /* E$-tag bus bits <24:22> */ | ||
378 | |||
379 | static char *syndrome_unknown = "<Unknown>"; | 308 | static char *syndrome_unknown = "<Unknown>"; |
380 | 309 | ||
381 | asmlinkage void cee_log(unsigned long ce_status, | 310 | static void spitfire_log_udb_syndrome(unsigned long afar, unsigned long udbh, unsigned long udbl, unsigned long bit) |
382 | unsigned long afar, | ||
383 | struct pt_regs *regs) | ||
384 | { | 311 | { |
385 | char memmod_str[64]; | 312 | unsigned short scode; |
386 | char *p; | 313 | char memmod_str[64], *p; |
387 | unsigned short scode, udb_reg; | ||
388 | 314 | ||
389 | printk(KERN_WARNING "CPU[%d]: Correctable ECC Error " | 315 | if (udbl & bit) { |
390 | "AFSR[%lx] AFAR[%016lx] UDBL[%lx] UDBH[%lx]\n", | 316 | scode = ecc_syndrome_table[udbl & 0xff]; |
391 | smp_processor_id(), | ||
392 | (ce_status & CE_STATUS_AFSR_MASK), | ||
393 | afar, | ||
394 | ((ce_status >> CE_STATUS_UDBL_SHIFT) & 0x3ffUL), | ||
395 | ((ce_status >> CE_STATUS_UDBH_SHIFT) & 0x3ffUL)); | ||
396 | |||
397 | udb_reg = ((ce_status >> CE_STATUS_UDBL_SHIFT) & 0x3ffUL); | ||
398 | if (udb_reg & (1 << 8)) { | ||
399 | scode = ecc_syndrome_table[udb_reg & 0xff]; | ||
400 | if (prom_getunumber(scode, afar, | 317 | if (prom_getunumber(scode, afar, |
401 | memmod_str, sizeof(memmod_str)) == -1) | 318 | memmod_str, sizeof(memmod_str)) == -1) |
402 | p = syndrome_unknown; | 319 | p = syndrome_unknown; |
@@ -407,9 +324,8 @@ asmlinkage void cee_log(unsigned long ce_status, | |||
407 | smp_processor_id(), scode, p); | 324 | smp_processor_id(), scode, p); |
408 | } | 325 | } |
409 | 326 | ||
410 | udb_reg = ((ce_status >> CE_STATUS_UDBH_SHIFT) & 0x3ffUL); | 327 | if (udbh & bit) { |
411 | if (udb_reg & (1 << 8)) { | 328 | scode = ecc_syndrome_table[udbh & 0xff]; |
412 | scode = ecc_syndrome_table[udb_reg & 0xff]; | ||
413 | if (prom_getunumber(scode, afar, | 329 | if (prom_getunumber(scode, afar, |
414 | memmod_str, sizeof(memmod_str)) == -1) | 330 | memmod_str, sizeof(memmod_str)) == -1) |
415 | p = syndrome_unknown; | 331 | p = syndrome_unknown; |
@@ -419,6 +335,127 @@ asmlinkage void cee_log(unsigned long ce_status, | |||
419 | "Memory Module \"%s\"\n", | 335 | "Memory Module \"%s\"\n", |
420 | smp_processor_id(), scode, p); | 336 | smp_processor_id(), scode, p); |
421 | } | 337 | } |
338 | |||
339 | } | ||
340 | |||
341 | static void spitfire_cee_log(unsigned long afsr, unsigned long afar, unsigned long udbh, unsigned long udbl, int tl1, struct pt_regs *regs) | ||
342 | { | ||
343 | |||
344 | printk(KERN_WARNING "CPU[%d]: Correctable ECC Error " | ||
345 | "AFSR[%lx] AFAR[%016lx] UDBL[%lx] UDBH[%lx] TL>1[%d]\n", | ||
346 | smp_processor_id(), afsr, afar, udbl, udbh, tl1); | ||
347 | |||
348 | spitfire_log_udb_syndrome(afar, udbh, udbl, UDBE_CE); | ||
349 | |||
350 | /* We always log it, even if someone is listening for this | ||
351 | * trap. | ||
352 | */ | ||
353 | notify_die(DIE_TRAP, "Correctable ECC Error", regs, | ||
354 | 0, TRAP_TYPE_CEE, SIGTRAP); | ||
355 | |||
356 | /* The Correctable ECC Error trap does not disable I/D caches. So | ||
357 | * we only have to restore the ESTATE Error Enable register. | ||
358 | */ | ||
359 | spitfire_enable_estate_errors(); | ||
360 | } | ||
361 | |||
362 | static void spitfire_ue_log(unsigned long afsr, unsigned long afar, unsigned long udbh, unsigned long udbl, unsigned long tt, int tl1, struct pt_regs *regs) | ||
363 | { | ||
364 | siginfo_t info; | ||
365 | |||
366 | printk(KERN_WARNING "CPU[%d]: Uncorrectable Error AFSR[%lx] " | ||
367 | "AFAR[%lx] UDBL[%lx] UDBH[%ld] TT[%lx] TL>1[%d]\n", | ||
368 | smp_processor_id(), afsr, afar, udbl, udbh, tt, tl1); | ||
369 | |||
370 | /* XXX add more human friendly logging of the error status | ||
371 | * XXX as is implemented for cheetah | ||
372 | */ | ||
373 | |||
374 | spitfire_log_udb_syndrome(afar, udbh, udbl, UDBE_UE); | ||
375 | |||
376 | /* We always log it, even if someone is listening for this | ||
377 | * trap. | ||
378 | */ | ||
379 | notify_die(DIE_TRAP, "Uncorrectable Error", regs, | ||
380 | 0, tt, SIGTRAP); | ||
381 | |||
382 | if (regs->tstate & TSTATE_PRIV) { | ||
383 | if (tl1) | ||
384 | dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); | ||
385 | die_if_kernel("UE", regs); | ||
386 | } | ||
387 | |||
388 | /* XXX need more intelligent processing here, such as is implemented | ||
389 | * XXX for cheetah errors, in fact if the E-cache still holds the | ||
390 | * XXX line with bad parity this will loop | ||
391 | */ | ||
392 | |||
393 | spitfire_clean_and_reenable_l1_caches(); | ||
394 | spitfire_enable_estate_errors(); | ||
395 | |||
396 | if (test_thread_flag(TIF_32BIT)) { | ||
397 | regs->tpc &= 0xffffffff; | ||
398 | regs->tnpc &= 0xffffffff; | ||
399 | } | ||
400 | info.si_signo = SIGBUS; | ||
401 | info.si_errno = 0; | ||
402 | info.si_code = BUS_OBJERR; | ||
403 | info.si_addr = (void *)0; | ||
404 | info.si_trapno = 0; | ||
405 | force_sig_info(SIGBUS, &info, current); | ||
406 | } | ||
407 | |||
408 | void spitfire_access_error(struct pt_regs *regs, unsigned long status_encoded, unsigned long afar) | ||
409 | { | ||
410 | unsigned long afsr, tt, udbh, udbl; | ||
411 | int tl1; | ||
412 | |||
413 | afsr = (status_encoded & SFSTAT_AFSR_MASK) >> SFSTAT_AFSR_SHIFT; | ||
414 | tt = (status_encoded & SFSTAT_TRAP_TYPE) >> SFSTAT_TRAP_TYPE_SHIFT; | ||
415 | tl1 = (status_encoded & SFSTAT_TL_GT_ONE) ? 1 : 0; | ||
416 | udbl = (status_encoded & SFSTAT_UDBL_MASK) >> SFSTAT_UDBL_SHIFT; | ||
417 | udbh = (status_encoded & SFSTAT_UDBH_MASK) >> SFSTAT_UDBH_SHIFT; | ||
418 | |||
419 | #ifdef CONFIG_PCI | ||
420 | if (tt == TRAP_TYPE_DAE && | ||
421 | pci_poke_in_progress && pci_poke_cpu == smp_processor_id()) { | ||
422 | spitfire_clean_and_reenable_l1_caches(); | ||
423 | spitfire_enable_estate_errors(); | ||
424 | |||
425 | pci_poke_faulted = 1; | ||
426 | regs->tnpc = regs->tpc + 4; | ||
427 | return; | ||
428 | } | ||
429 | #endif | ||
430 | |||
431 | if (afsr & SFAFSR_UE) | ||
432 | spitfire_ue_log(afsr, afar, udbh, udbl, tt, tl1, regs); | ||
433 | |||
434 | if (tt == TRAP_TYPE_CEE) { | ||
435 | /* Handle the case where we took a CEE trap, but ACK'd | ||
436 | * only the UE state in the UDB error registers. | ||
437 | */ | ||
438 | if (afsr & SFAFSR_UE) { | ||
439 | if (udbh & UDBE_CE) { | ||
440 | __asm__ __volatile__( | ||
441 | "stxa %0, [%1] %2\n\t" | ||
442 | "membar #Sync" | ||
443 | : /* no outputs */ | ||
444 | : "r" (udbh & UDBE_CE), | ||
445 | "r" (0x0), "i" (ASI_UDB_ERROR_W)); | ||
446 | } | ||
447 | if (udbl & UDBE_CE) { | ||
448 | __asm__ __volatile__( | ||
449 | "stxa %0, [%1] %2\n\t" | ||
450 | "membar #Sync" | ||
451 | : /* no outputs */ | ||
452 | : "r" (udbl & UDBE_CE), | ||
453 | "r" (0x18), "i" (ASI_UDB_ERROR_W)); | ||
454 | } | ||
455 | } | ||
456 | |||
457 | spitfire_cee_log(afsr, afar, udbh, udbl, tl1, regs); | ||
458 | } | ||
422 | } | 459 | } |
423 | 460 | ||
424 | int cheetah_pcache_forced_on; | 461 | int cheetah_pcache_forced_on; |
diff --git a/arch/sparc64/kernel/ttable.S b/arch/sparc64/kernel/ttable.S index 491bb3681f9d..8365bc1f81f3 100644 --- a/arch/sparc64/kernel/ttable.S +++ b/arch/sparc64/kernel/ttable.S | |||
@@ -18,9 +18,10 @@ sparc64_ttable_tl0: | |||
18 | tl0_resv000: BOOT_KERNEL BTRAP(0x1) BTRAP(0x2) BTRAP(0x3) | 18 | tl0_resv000: BOOT_KERNEL BTRAP(0x1) BTRAP(0x2) BTRAP(0x3) |
19 | tl0_resv004: BTRAP(0x4) BTRAP(0x5) BTRAP(0x6) BTRAP(0x7) | 19 | tl0_resv004: BTRAP(0x4) BTRAP(0x5) BTRAP(0x6) BTRAP(0x7) |
20 | tl0_iax: membar #Sync | 20 | tl0_iax: membar #Sync |
21 | TRAP_NOSAVE_7INSNS(__do_instruction_access_exception) | 21 | TRAP_NOSAVE_7INSNS(__spitfire_insn_access_exception) |
22 | tl0_resv009: BTRAP(0x9) | 22 | tl0_resv009: BTRAP(0x9) |
23 | tl0_iae: TRAP(do_iae) | 23 | tl0_iae: membar #Sync |
24 | TRAP_NOSAVE_7INSNS(__spitfire_access_error) | ||
24 | tl0_resv00b: BTRAP(0xb) BTRAP(0xc) BTRAP(0xd) BTRAP(0xe) BTRAP(0xf) | 25 | tl0_resv00b: BTRAP(0xb) BTRAP(0xc) BTRAP(0xd) BTRAP(0xe) BTRAP(0xf) |
25 | tl0_ill: membar #Sync | 26 | tl0_ill: membar #Sync |
26 | TRAP_7INSNS(do_illegal_instruction) | 27 | TRAP_7INSNS(do_illegal_instruction) |
@@ -36,9 +37,10 @@ tl0_cwin: CLEAN_WINDOW | |||
36 | tl0_div0: TRAP(do_div0) | 37 | tl0_div0: TRAP(do_div0) |
37 | tl0_resv029: BTRAP(0x29) BTRAP(0x2a) BTRAP(0x2b) BTRAP(0x2c) BTRAP(0x2d) BTRAP(0x2e) | 38 | tl0_resv029: BTRAP(0x29) BTRAP(0x2a) BTRAP(0x2b) BTRAP(0x2c) BTRAP(0x2d) BTRAP(0x2e) |
38 | tl0_resv02f: BTRAP(0x2f) | 39 | tl0_resv02f: BTRAP(0x2f) |
39 | tl0_dax: TRAP_NOSAVE(__do_data_access_exception) | 40 | tl0_dax: TRAP_NOSAVE(__spitfire_data_access_exception) |
40 | tl0_resv031: BTRAP(0x31) | 41 | tl0_resv031: BTRAP(0x31) |
41 | tl0_dae: TRAP(do_dae) | 42 | tl0_dae: membar #Sync |
43 | TRAP_NOSAVE_7INSNS(__spitfire_access_error) | ||
42 | tl0_resv033: BTRAP(0x33) | 44 | tl0_resv033: BTRAP(0x33) |
43 | tl0_mna: TRAP_NOSAVE(do_mna) | 45 | tl0_mna: TRAP_NOSAVE(do_mna) |
44 | tl0_lddfmna: TRAP_NOSAVE(do_lddfmna) | 46 | tl0_lddfmna: TRAP_NOSAVE(do_lddfmna) |
@@ -73,7 +75,8 @@ tl0_resv05c: BTRAP(0x5c) BTRAP(0x5d) BTRAP(0x5e) BTRAP(0x5f) | |||
73 | tl0_ivec: TRAP_IVEC | 75 | tl0_ivec: TRAP_IVEC |
74 | tl0_paw: TRAP(do_paw) | 76 | tl0_paw: TRAP(do_paw) |
75 | tl0_vaw: TRAP(do_vaw) | 77 | tl0_vaw: TRAP(do_vaw) |
76 | tl0_cee: TRAP_NOSAVE(cee_trap) | 78 | tl0_cee: membar #Sync |
79 | TRAP_NOSAVE_7INSNS(__spitfire_cee_trap) | ||
77 | tl0_iamiss: | 80 | tl0_iamiss: |
78 | #include "itlb_base.S" | 81 | #include "itlb_base.S" |
79 | tl0_damiss: | 82 | tl0_damiss: |
@@ -175,9 +178,10 @@ tl0_resv1f0: BTRAPS(0x1f0) BTRAPS(0x1f8) | |||
175 | sparc64_ttable_tl1: | 178 | sparc64_ttable_tl1: |
176 | tl1_resv000: BOOT_KERNEL BTRAPTL1(0x1) BTRAPTL1(0x2) BTRAPTL1(0x3) | 179 | tl1_resv000: BOOT_KERNEL BTRAPTL1(0x1) BTRAPTL1(0x2) BTRAPTL1(0x3) |
177 | tl1_resv004: BTRAPTL1(0x4) BTRAPTL1(0x5) BTRAPTL1(0x6) BTRAPTL1(0x7) | 180 | tl1_resv004: BTRAPTL1(0x4) BTRAPTL1(0x5) BTRAPTL1(0x6) BTRAPTL1(0x7) |
178 | tl1_iax: TRAP_NOSAVE(__do_instruction_access_exception_tl1) | 181 | tl1_iax: TRAP_NOSAVE(__spitfire_insn_access_exception_tl1) |
179 | tl1_resv009: BTRAPTL1(0x9) | 182 | tl1_resv009: BTRAPTL1(0x9) |
180 | tl1_iae: TRAPTL1(do_iae_tl1) | 183 | tl1_iae: membar #Sync |
184 | TRAP_NOSAVE_7INSNS(__spitfire_access_error) | ||
181 | tl1_resv00b: BTRAPTL1(0xb) BTRAPTL1(0xc) BTRAPTL1(0xd) BTRAPTL1(0xe) BTRAPTL1(0xf) | 185 | tl1_resv00b: BTRAPTL1(0xb) BTRAPTL1(0xc) BTRAPTL1(0xd) BTRAPTL1(0xe) BTRAPTL1(0xf) |
182 | tl1_ill: TRAPTL1(do_ill_tl1) | 186 | tl1_ill: TRAPTL1(do_ill_tl1) |
183 | tl1_privop: BTRAPTL1(0x11) | 187 | tl1_privop: BTRAPTL1(0x11) |
@@ -193,9 +197,10 @@ tl1_cwin: CLEAN_WINDOW | |||
193 | tl1_div0: TRAPTL1(do_div0_tl1) | 197 | tl1_div0: TRAPTL1(do_div0_tl1) |
194 | tl1_resv029: BTRAPTL1(0x29) BTRAPTL1(0x2a) BTRAPTL1(0x2b) BTRAPTL1(0x2c) | 198 | tl1_resv029: BTRAPTL1(0x29) BTRAPTL1(0x2a) BTRAPTL1(0x2b) BTRAPTL1(0x2c) |
195 | tl1_resv02d: BTRAPTL1(0x2d) BTRAPTL1(0x2e) BTRAPTL1(0x2f) | 199 | tl1_resv02d: BTRAPTL1(0x2d) BTRAPTL1(0x2e) BTRAPTL1(0x2f) |
196 | tl1_dax: TRAP_NOSAVE(__do_data_access_exception_tl1) | 200 | tl1_dax: TRAP_NOSAVE(__spitfire_data_access_exception_tl1) |
197 | tl1_resv031: BTRAPTL1(0x31) | 201 | tl1_resv031: BTRAPTL1(0x31) |
198 | tl1_dae: TRAPTL1(do_dae_tl1) | 202 | tl1_dae: membar #Sync |
203 | TRAP_NOSAVE_7INSNS(__spitfire_access_error) | ||
199 | tl1_resv033: BTRAPTL1(0x33) | 204 | tl1_resv033: BTRAPTL1(0x33) |
200 | tl1_mna: TRAP_NOSAVE(do_mna) | 205 | tl1_mna: TRAP_NOSAVE(do_mna) |
201 | tl1_lddfmna: TRAPTL1(do_lddfmna_tl1) | 206 | tl1_lddfmna: TRAPTL1(do_lddfmna_tl1) |
@@ -219,8 +224,8 @@ tl1_paw: TRAPTL1(do_paw_tl1) | |||
219 | tl1_vaw: TRAPTL1(do_vaw_tl1) | 224 | tl1_vaw: TRAPTL1(do_vaw_tl1) |
220 | 225 | ||
221 | /* The grotty trick to save %g1 into current->thread.cee_stuff | 226 | /* The grotty trick to save %g1 into current->thread.cee_stuff |
222 | * is because when we take this trap we could be interrupting trap | 227 | * is because when we take this trap we could be interrupting |
223 | * code already using the trap alternate global registers. | 228 | * trap code already using the trap alternate global registers. |
224 | * | 229 | * |
225 | * We cross our fingers and pray that this store/load does | 230 | * We cross our fingers and pray that this store/load does |
226 | * not cause yet another CEE trap. | 231 | * not cause yet another CEE trap. |
diff --git a/arch/sparc64/kernel/unaligned.c b/arch/sparc64/kernel/unaligned.c index 11c3e88732e4..da9739f0d437 100644 --- a/arch/sparc64/kernel/unaligned.c +++ b/arch/sparc64/kernel/unaligned.c | |||
@@ -349,9 +349,9 @@ int handle_popc(u32 insn, struct pt_regs *regs) | |||
349 | 349 | ||
350 | extern void do_fpother(struct pt_regs *regs); | 350 | extern void do_fpother(struct pt_regs *regs); |
351 | extern void do_privact(struct pt_regs *regs); | 351 | extern void do_privact(struct pt_regs *regs); |
352 | extern void data_access_exception(struct pt_regs *regs, | 352 | extern void spitfire_data_access_exception(struct pt_regs *regs, |
353 | unsigned long sfsr, | 353 | unsigned long sfsr, |
354 | unsigned long sfar); | 354 | unsigned long sfar); |
355 | 355 | ||
356 | int handle_ldf_stq(u32 insn, struct pt_regs *regs) | 356 | int handle_ldf_stq(u32 insn, struct pt_regs *regs) |
357 | { | 357 | { |
@@ -394,14 +394,14 @@ int handle_ldf_stq(u32 insn, struct pt_regs *regs) | |||
394 | break; | 394 | break; |
395 | } | 395 | } |
396 | default: | 396 | default: |
397 | data_access_exception(regs, 0, addr); | 397 | spitfire_data_access_exception(regs, 0, addr); |
398 | return 1; | 398 | return 1; |
399 | } | 399 | } |
400 | if (put_user (first >> 32, (u32 __user *)addr) || | 400 | if (put_user (first >> 32, (u32 __user *)addr) || |
401 | __put_user ((u32)first, (u32 __user *)(addr + 4)) || | 401 | __put_user ((u32)first, (u32 __user *)(addr + 4)) || |
402 | __put_user (second >> 32, (u32 __user *)(addr + 8)) || | 402 | __put_user (second >> 32, (u32 __user *)(addr + 8)) || |
403 | __put_user ((u32)second, (u32 __user *)(addr + 12))) { | 403 | __put_user ((u32)second, (u32 __user *)(addr + 12))) { |
404 | data_access_exception(regs, 0, addr); | 404 | spitfire_data_access_exception(regs, 0, addr); |
405 | return 1; | 405 | return 1; |
406 | } | 406 | } |
407 | } else { | 407 | } else { |
@@ -414,7 +414,7 @@ int handle_ldf_stq(u32 insn, struct pt_regs *regs) | |||
414 | do_privact(regs); | 414 | do_privact(regs); |
415 | return 1; | 415 | return 1; |
416 | } else if (asi > ASI_SNFL) { | 416 | } else if (asi > ASI_SNFL) { |
417 | data_access_exception(regs, 0, addr); | 417 | spitfire_data_access_exception(regs, 0, addr); |
418 | return 1; | 418 | return 1; |
419 | } | 419 | } |
420 | switch (insn & 0x180000) { | 420 | switch (insn & 0x180000) { |
@@ -431,7 +431,7 @@ int handle_ldf_stq(u32 insn, struct pt_regs *regs) | |||
431 | err |= __get_user (data[i], (u32 __user *)(addr + 4*i)); | 431 | err |= __get_user (data[i], (u32 __user *)(addr + 4*i)); |
432 | } | 432 | } |
433 | if (err && !(asi & 0x2 /* NF */)) { | 433 | if (err && !(asi & 0x2 /* NF */)) { |
434 | data_access_exception(regs, 0, addr); | 434 | spitfire_data_access_exception(regs, 0, addr); |
435 | return 1; | 435 | return 1; |
436 | } | 436 | } |
437 | if (asi & 0x8) /* Little */ { | 437 | if (asi & 0x8) /* Little */ { |
@@ -534,7 +534,7 @@ void handle_lddfmna(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr | |||
534 | *(u64 *)(f->regs + freg) = value; | 534 | *(u64 *)(f->regs + freg) = value; |
535 | current_thread_info()->fpsaved[0] |= flag; | 535 | current_thread_info()->fpsaved[0] |= flag; |
536 | } else { | 536 | } else { |
537 | daex: data_access_exception(regs, sfsr, sfar); | 537 | daex: spitfire_data_access_exception(regs, sfsr, sfar); |
538 | return; | 538 | return; |
539 | } | 539 | } |
540 | advance(regs); | 540 | advance(regs); |
@@ -578,7 +578,7 @@ void handle_stdfmna(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr | |||
578 | __put_user ((u32)value, (u32 __user *)(sfar + 4))) | 578 | __put_user ((u32)value, (u32 __user *)(sfar + 4))) |
579 | goto daex; | 579 | goto daex; |
580 | } else { | 580 | } else { |
581 | daex: data_access_exception(regs, sfsr, sfar); | 581 | daex: spitfire_data_access_exception(regs, sfsr, sfar); |
582 | return; | 582 | return; |
583 | } | 583 | } |
584 | advance(regs); | 584 | advance(regs); |
diff --git a/arch/sparc64/kernel/winfixup.S b/arch/sparc64/kernel/winfixup.S index dfbc7e0dcf70..99c809a1e5ac 100644 --- a/arch/sparc64/kernel/winfixup.S +++ b/arch/sparc64/kernel/winfixup.S | |||
@@ -318,7 +318,7 @@ fill_fixup_dax: | |||
318 | nop | 318 | nop |
319 | rdpr %pstate, %l1 ! Prepare to change globals. | 319 | rdpr %pstate, %l1 ! Prepare to change globals. |
320 | mov %g4, %o1 ! Setup args for | 320 | mov %g4, %o1 ! Setup args for |
321 | mov %g5, %o2 ! final call to data_access_exception. | 321 | mov %g5, %o2 ! final call to spitfire_data_access_exception. |
322 | andn %l1, PSTATE_MM, %l1 ! We want to be in RMO | 322 | andn %l1, PSTATE_MM, %l1 ! We want to be in RMO |
323 | 323 | ||
324 | mov %g6, %o7 ! Stash away current. | 324 | mov %g6, %o7 ! Stash away current. |
@@ -330,7 +330,7 @@ fill_fixup_dax: | |||
330 | mov TSB_REG, %g1 | 330 | mov TSB_REG, %g1 |
331 | ldxa [%g1] ASI_IMMU, %g5 | 331 | ldxa [%g1] ASI_IMMU, %g5 |
332 | #endif | 332 | #endif |
333 | call data_access_exception | 333 | call spitfire_data_access_exception |
334 | add %sp, PTREGS_OFF, %o0 | 334 | add %sp, PTREGS_OFF, %o0 |
335 | 335 | ||
336 | b,pt %xcc, rtrap | 336 | b,pt %xcc, rtrap |
@@ -391,7 +391,7 @@ window_dax_from_user_common: | |||
391 | 109: or %g7, %lo(109b), %g7 | 391 | 109: or %g7, %lo(109b), %g7 |
392 | mov %l4, %o1 | 392 | mov %l4, %o1 |
393 | mov %l5, %o2 | 393 | mov %l5, %o2 |
394 | call data_access_exception | 394 | call spitfire_data_access_exception |
395 | add %sp, PTREGS_OFF, %o0 | 395 | add %sp, PTREGS_OFF, %o0 |
396 | ba,pt %xcc, rtrap | 396 | ba,pt %xcc, rtrap |
397 | clr %l6 | 397 | clr %l6 |
diff --git a/arch/sparc64/lib/Makefile b/arch/sparc64/lib/Makefile index 40dbeec7e5d6..6201f1040982 100644 --- a/arch/sparc64/lib/Makefile +++ b/arch/sparc64/lib/Makefile | |||
@@ -12,7 +12,7 @@ lib-y := PeeCeeI.o copy_page.o clear_page.o strlen.o strncmp.o \ | |||
12 | U1memcpy.o U1copy_from_user.o U1copy_to_user.o \ | 12 | U1memcpy.o U1copy_from_user.o U1copy_to_user.o \ |
13 | U3memcpy.o U3copy_from_user.o U3copy_to_user.o U3patch.o \ | 13 | U3memcpy.o U3copy_from_user.o U3copy_to_user.o U3patch.o \ |
14 | copy_in_user.o user_fixup.o memmove.o \ | 14 | copy_in_user.o user_fixup.o memmove.o \ |
15 | mcount.o ipcsum.o rwsem.o xor.o find_bit.o delay.o | 15 | mcount.o ipcsum.o rwsem.o xor.o find_bit.o delay.o mb.o |
16 | 16 | ||
17 | lib-$(CONFIG_DEBUG_SPINLOCK) += debuglocks.o | 17 | lib-$(CONFIG_DEBUG_SPINLOCK) += debuglocks.o |
18 | lib-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o | 18 | lib-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o |
diff --git a/arch/sparc64/lib/debuglocks.c b/arch/sparc64/lib/debuglocks.c index f03344cf784e..f5f0b5586f01 100644 --- a/arch/sparc64/lib/debuglocks.c +++ b/arch/sparc64/lib/debuglocks.c | |||
@@ -12,8 +12,6 @@ | |||
12 | 12 | ||
13 | #ifdef CONFIG_SMP | 13 | #ifdef CONFIG_SMP |
14 | 14 | ||
15 | #define GET_CALLER(PC) __asm__ __volatile__("mov %%i7, %0" : "=r" (PC)) | ||
16 | |||
17 | static inline void show (char *str, spinlock_t *lock, unsigned long caller) | 15 | static inline void show (char *str, spinlock_t *lock, unsigned long caller) |
18 | { | 16 | { |
19 | int cpu = smp_processor_id(); | 17 | int cpu = smp_processor_id(); |
@@ -51,20 +49,19 @@ static inline void show_write (char *str, rwlock_t *lock, unsigned long caller) | |||
51 | #undef INIT_STUCK | 49 | #undef INIT_STUCK |
52 | #define INIT_STUCK 100000000 | 50 | #define INIT_STUCK 100000000 |
53 | 51 | ||
54 | void _do_spin_lock(spinlock_t *lock, char *str) | 52 | void _do_spin_lock(spinlock_t *lock, char *str, unsigned long caller) |
55 | { | 53 | { |
56 | unsigned long caller, val; | 54 | unsigned long val; |
57 | int stuck = INIT_STUCK; | 55 | int stuck = INIT_STUCK; |
58 | int cpu = get_cpu(); | 56 | int cpu = get_cpu(); |
59 | int shown = 0; | 57 | int shown = 0; |
60 | 58 | ||
61 | GET_CALLER(caller); | ||
62 | again: | 59 | again: |
63 | __asm__ __volatile__("ldstub [%1], %0" | 60 | __asm__ __volatile__("ldstub [%1], %0" |
64 | : "=r" (val) | 61 | : "=r" (val) |
65 | : "r" (&(lock->lock)) | 62 | : "r" (&(lock->lock)) |
66 | : "memory"); | 63 | : "memory"); |
67 | membar("#StoreLoad | #StoreStore"); | 64 | membar_storeload_storestore(); |
68 | if (val) { | 65 | if (val) { |
69 | while (lock->lock) { | 66 | while (lock->lock) { |
70 | if (!--stuck) { | 67 | if (!--stuck) { |
@@ -72,7 +69,7 @@ again: | |||
72 | show(str, lock, caller); | 69 | show(str, lock, caller); |
73 | stuck = INIT_STUCK; | 70 | stuck = INIT_STUCK; |
74 | } | 71 | } |
75 | membar("#LoadLoad"); | 72 | rmb(); |
76 | } | 73 | } |
77 | goto again; | 74 | goto again; |
78 | } | 75 | } |
@@ -84,17 +81,16 @@ again: | |||
84 | put_cpu(); | 81 | put_cpu(); |
85 | } | 82 | } |
86 | 83 | ||
87 | int _do_spin_trylock(spinlock_t *lock) | 84 | int _do_spin_trylock(spinlock_t *lock, unsigned long caller) |
88 | { | 85 | { |
89 | unsigned long val, caller; | 86 | unsigned long val; |
90 | int cpu = get_cpu(); | 87 | int cpu = get_cpu(); |
91 | 88 | ||
92 | GET_CALLER(caller); | ||
93 | __asm__ __volatile__("ldstub [%1], %0" | 89 | __asm__ __volatile__("ldstub [%1], %0" |
94 | : "=r" (val) | 90 | : "=r" (val) |
95 | : "r" (&(lock->lock)) | 91 | : "r" (&(lock->lock)) |
96 | : "memory"); | 92 | : "memory"); |
97 | membar("#StoreLoad | #StoreStore"); | 93 | membar_storeload_storestore(); |
98 | if (!val) { | 94 | if (!val) { |
99 | lock->owner_pc = ((unsigned int)caller); | 95 | lock->owner_pc = ((unsigned int)caller); |
100 | lock->owner_cpu = cpu; | 96 | lock->owner_cpu = cpu; |
@@ -111,21 +107,20 @@ void _do_spin_unlock(spinlock_t *lock) | |||
111 | { | 107 | { |
112 | lock->owner_pc = 0; | 108 | lock->owner_pc = 0; |
113 | lock->owner_cpu = NO_PROC_ID; | 109 | lock->owner_cpu = NO_PROC_ID; |
114 | membar("#StoreStore | #LoadStore"); | 110 | membar_storestore_loadstore(); |
115 | lock->lock = 0; | 111 | lock->lock = 0; |
116 | current->thread.smp_lock_count--; | 112 | current->thread.smp_lock_count--; |
117 | } | 113 | } |
118 | 114 | ||
119 | /* Keep INIT_STUCK the same... */ | 115 | /* Keep INIT_STUCK the same... */ |
120 | 116 | ||
121 | void _do_read_lock (rwlock_t *rw, char *str) | 117 | void _do_read_lock(rwlock_t *rw, char *str, unsigned long caller) |
122 | { | 118 | { |
123 | unsigned long caller, val; | 119 | unsigned long val; |
124 | int stuck = INIT_STUCK; | 120 | int stuck = INIT_STUCK; |
125 | int cpu = get_cpu(); | 121 | int cpu = get_cpu(); |
126 | int shown = 0; | 122 | int shown = 0; |
127 | 123 | ||
128 | GET_CALLER(caller); | ||
129 | wlock_again: | 124 | wlock_again: |
130 | /* Wait for any writer to go away. */ | 125 | /* Wait for any writer to go away. */ |
131 | while (((long)(rw->lock)) < 0) { | 126 | while (((long)(rw->lock)) < 0) { |
@@ -134,7 +129,7 @@ wlock_again: | |||
134 | show_read(str, rw, caller); | 129 | show_read(str, rw, caller); |
135 | stuck = INIT_STUCK; | 130 | stuck = INIT_STUCK; |
136 | } | 131 | } |
137 | membar("#LoadLoad"); | 132 | rmb(); |
138 | } | 133 | } |
139 | /* Try once to increment the counter. */ | 134 | /* Try once to increment the counter. */ |
140 | __asm__ __volatile__( | 135 | __asm__ __volatile__( |
@@ -147,7 +142,7 @@ wlock_again: | |||
147 | "2:" : "=r" (val) | 142 | "2:" : "=r" (val) |
148 | : "0" (&(rw->lock)) | 143 | : "0" (&(rw->lock)) |
149 | : "g1", "g7", "memory"); | 144 | : "g1", "g7", "memory"); |
150 | membar("#StoreLoad | #StoreStore"); | 145 | membar_storeload_storestore(); |
151 | if (val) | 146 | if (val) |
152 | goto wlock_again; | 147 | goto wlock_again; |
153 | rw->reader_pc[cpu] = ((unsigned int)caller); | 148 | rw->reader_pc[cpu] = ((unsigned int)caller); |
@@ -157,15 +152,13 @@ wlock_again: | |||
157 | put_cpu(); | 152 | put_cpu(); |
158 | } | 153 | } |
159 | 154 | ||
160 | void _do_read_unlock (rwlock_t *rw, char *str) | 155 | void _do_read_unlock(rwlock_t *rw, char *str, unsigned long caller) |
161 | { | 156 | { |
162 | unsigned long caller, val; | 157 | unsigned long val; |
163 | int stuck = INIT_STUCK; | 158 | int stuck = INIT_STUCK; |
164 | int cpu = get_cpu(); | 159 | int cpu = get_cpu(); |
165 | int shown = 0; | 160 | int shown = 0; |
166 | 161 | ||
167 | GET_CALLER(caller); | ||
168 | |||
169 | /* Drop our identity _first_. */ | 162 | /* Drop our identity _first_. */ |
170 | rw->reader_pc[cpu] = 0; | 163 | rw->reader_pc[cpu] = 0; |
171 | current->thread.smp_lock_count--; | 164 | current->thread.smp_lock_count--; |
@@ -193,14 +186,13 @@ runlock_again: | |||
193 | put_cpu(); | 186 | put_cpu(); |
194 | } | 187 | } |
195 | 188 | ||
196 | void _do_write_lock (rwlock_t *rw, char *str) | 189 | void _do_write_lock(rwlock_t *rw, char *str, unsigned long caller) |
197 | { | 190 | { |
198 | unsigned long caller, val; | 191 | unsigned long val; |
199 | int stuck = INIT_STUCK; | 192 | int stuck = INIT_STUCK; |
200 | int cpu = get_cpu(); | 193 | int cpu = get_cpu(); |
201 | int shown = 0; | 194 | int shown = 0; |
202 | 195 | ||
203 | GET_CALLER(caller); | ||
204 | wlock_again: | 196 | wlock_again: |
205 | /* Spin while there is another writer. */ | 197 | /* Spin while there is another writer. */ |
206 | while (((long)rw->lock) < 0) { | 198 | while (((long)rw->lock) < 0) { |
@@ -209,7 +201,7 @@ wlock_again: | |||
209 | show_write(str, rw, caller); | 201 | show_write(str, rw, caller); |
210 | stuck = INIT_STUCK; | 202 | stuck = INIT_STUCK; |
211 | } | 203 | } |
212 | membar("#LoadLoad"); | 204 | rmb(); |
213 | } | 205 | } |
214 | 206 | ||
215 | /* Try to acuire the write bit. */ | 207 | /* Try to acuire the write bit. */ |
@@ -264,7 +256,7 @@ wlock_again: | |||
264 | show_write(str, rw, caller); | 256 | show_write(str, rw, caller); |
265 | stuck = INIT_STUCK; | 257 | stuck = INIT_STUCK; |
266 | } | 258 | } |
267 | membar("#LoadLoad"); | 259 | rmb(); |
268 | } | 260 | } |
269 | goto wlock_again; | 261 | goto wlock_again; |
270 | } | 262 | } |
@@ -278,14 +270,12 @@ wlock_again: | |||
278 | put_cpu(); | 270 | put_cpu(); |
279 | } | 271 | } |
280 | 272 | ||
281 | void _do_write_unlock(rwlock_t *rw) | 273 | void _do_write_unlock(rwlock_t *rw, unsigned long caller) |
282 | { | 274 | { |
283 | unsigned long caller, val; | 275 | unsigned long val; |
284 | int stuck = INIT_STUCK; | 276 | int stuck = INIT_STUCK; |
285 | int shown = 0; | 277 | int shown = 0; |
286 | 278 | ||
287 | GET_CALLER(caller); | ||
288 | |||
289 | /* Drop our identity _first_ */ | 279 | /* Drop our identity _first_ */ |
290 | rw->writer_pc = 0; | 280 | rw->writer_pc = 0; |
291 | rw->writer_cpu = NO_PROC_ID; | 281 | rw->writer_cpu = NO_PROC_ID; |
@@ -313,13 +303,11 @@ wlock_again: | |||
313 | } | 303 | } |
314 | } | 304 | } |
315 | 305 | ||
316 | int _do_write_trylock (rwlock_t *rw, char *str) | 306 | int _do_write_trylock(rwlock_t *rw, char *str, unsigned long caller) |
317 | { | 307 | { |
318 | unsigned long caller, val; | 308 | unsigned long val; |
319 | int cpu = get_cpu(); | 309 | int cpu = get_cpu(); |
320 | 310 | ||
321 | GET_CALLER(caller); | ||
322 | |||
323 | /* Try to acuire the write bit. */ | 311 | /* Try to acuire the write bit. */ |
324 | __asm__ __volatile__( | 312 | __asm__ __volatile__( |
325 | " mov 1, %%g3\n" | 313 | " mov 1, %%g3\n" |
diff --git a/arch/sparc64/lib/mb.S b/arch/sparc64/lib/mb.S new file mode 100644 index 000000000000..4004f748619f --- /dev/null +++ b/arch/sparc64/lib/mb.S | |||
@@ -0,0 +1,73 @@ | |||
1 | /* mb.S: Out of line memory barriers. | ||
2 | * | ||
3 | * Copyright (C) 2005 David S. Miller (davem@davemloft.net) | ||
4 | */ | ||
5 | |||
6 | /* These are here in an effort to more fully work around | ||
7 | * Spitfire Errata #51. Essentially, if a memory barrier | ||
8 | * occurs soon after a mispredicted branch, the chip can stop | ||
9 | * executing instructions until a trap occurs. Therefore, if | ||
10 | * interrupts are disabled, the chip can hang forever. | ||
11 | * | ||
12 | * It used to be believed that the memory barrier had to be | ||
13 | * right in the delay slot, but a case has been traced | ||
14 | * recently wherein the memory barrier was one instruction | ||
15 | * after the branch delay slot and the chip still hung. The | ||
16 | * offending sequence was the following in sym_wakeup_done() | ||
17 | * of the sym53c8xx_2 driver: | ||
18 | * | ||
19 | * call sym_ccb_from_dsa, 0 | ||
20 | * movge %icc, 0, %l0 | ||
21 | * brz,pn %o0, .LL1303 | ||
22 | * mov %o0, %l2 | ||
23 | * membar #LoadLoad | ||
24 | * | ||
25 | * The branch has to be mispredicted for the bug to occur. | ||
26 | * Therefore, we put the memory barrier explicitly into a | ||
27 | * "branch always, predicted taken" delay slot to avoid the | ||
28 | * problem case. | ||
29 | */ | ||
30 | |||
31 | .text | ||
32 | |||
33 | 99: retl | ||
34 | nop | ||
35 | |||
36 | .globl mb | ||
37 | mb: ba,pt %xcc, 99b | ||
38 | membar #LoadLoad | #LoadStore | #StoreStore | #StoreLoad | ||
39 | .size mb, .-mb | ||
40 | |||
41 | .globl rmb | ||
42 | rmb: ba,pt %xcc, 99b | ||
43 | membar #LoadLoad | ||
44 | .size rmb, .-rmb | ||
45 | |||
46 | .globl wmb | ||
47 | wmb: ba,pt %xcc, 99b | ||
48 | membar #StoreStore | ||
49 | .size wmb, .-wmb | ||
50 | |||
51 | .globl membar_storeload | ||
52 | membar_storeload: | ||
53 | ba,pt %xcc, 99b | ||
54 | membar #StoreLoad | ||
55 | .size membar_storeload, .-membar_storeload | ||
56 | |||
57 | .globl membar_storeload_storestore | ||
58 | membar_storeload_storestore: | ||
59 | ba,pt %xcc, 99b | ||
60 | membar #StoreLoad | #StoreStore | ||
61 | .size membar_storeload_storestore, .-membar_storeload_storestore | ||
62 | |||
63 | .globl membar_storeload_loadload | ||
64 | membar_storeload_loadload: | ||
65 | ba,pt %xcc, 99b | ||
66 | membar #StoreLoad | #LoadLoad | ||
67 | .size membar_storeload_loadload, .-membar_storeload_loadload | ||
68 | |||
69 | .globl membar_storestore_loadstore | ||
70 | membar_storestore_loadstore: | ||
71 | ba,pt %xcc, 99b | ||
72 | membar #StoreStore | #LoadStore | ||
73 | .size membar_storestore_loadstore, .-membar_storestore_loadstore | ||
diff --git a/arch/sparc64/solaris/misc.c b/arch/sparc64/solaris/misc.c index 15b4cfe07557..302efbcba70e 100644 --- a/arch/sparc64/solaris/misc.c +++ b/arch/sparc64/solaris/misc.c | |||
@@ -737,7 +737,8 @@ MODULE_LICENSE("GPL"); | |||
737 | extern u32 tl0_solaris[8]; | 737 | extern u32 tl0_solaris[8]; |
738 | #define update_ttable(x) \ | 738 | #define update_ttable(x) \ |
739 | tl0_solaris[3] = (((long)(x) - (long)tl0_solaris - 3) >> 2) | 0x40000000; \ | 739 | tl0_solaris[3] = (((long)(x) - (long)tl0_solaris - 3) >> 2) | 0x40000000; \ |
740 | __asm__ __volatile__ ("membar #StoreStore; flush %0" : : "r" (&tl0_solaris[3])) | 740 | wmb(); \ |
741 | __asm__ __volatile__ ("flush %0" : : "r" (&tl0_solaris[3])) | ||
741 | #else | 742 | #else |
742 | #endif | 743 | #endif |
743 | 744 | ||
@@ -761,7 +762,8 @@ int init_module(void) | |||
761 | entry64_personality_patch |= | 762 | entry64_personality_patch |= |
762 | (offsetof(struct task_struct, personality) + | 763 | (offsetof(struct task_struct, personality) + |
763 | (sizeof(unsigned long) - 1)); | 764 | (sizeof(unsigned long) - 1)); |
764 | __asm__ __volatile__("membar #StoreStore; flush %0" | 765 | wmb(); |
766 | __asm__ __volatile__("flush %0" | ||
765 | : : "r" (&entry64_personality_patch)); | 767 | : : "r" (&entry64_personality_patch)); |
766 | return 0; | 768 | return 0; |
767 | } | 769 | } |
diff --git a/drivers/atm/ambassador.c b/drivers/atm/ambassador.c index 73c6b85299c1..d74a7c5e75dd 100644 --- a/drivers/atm/ambassador.c +++ b/drivers/atm/ambassador.c | |||
@@ -513,7 +513,7 @@ static void rx_complete (amb_dev * dev, rx_out * rx) { | |||
513 | 513 | ||
514 | // VC layer stats | 514 | // VC layer stats |
515 | atomic_inc(&atm_vcc->stats->rx); | 515 | atomic_inc(&atm_vcc->stats->rx); |
516 | do_gettimeofday(&skb->stamp); | 516 | __net_timestamp(skb); |
517 | // end of our responsability | 517 | // end of our responsability |
518 | atm_vcc->push (atm_vcc, skb); | 518 | atm_vcc->push (atm_vcc, skb); |
519 | return; | 519 | return; |
diff --git a/drivers/atm/atmtcp.c b/drivers/atm/atmtcp.c index f2f01cb82cb4..57f1810fdccd 100644 --- a/drivers/atm/atmtcp.c +++ b/drivers/atm/atmtcp.c | |||
@@ -325,7 +325,7 @@ static int atmtcp_c_send(struct atm_vcc *vcc,struct sk_buff *skb) | |||
325 | result = -ENOBUFS; | 325 | result = -ENOBUFS; |
326 | goto done; | 326 | goto done; |
327 | } | 327 | } |
328 | do_gettimeofday(&new_skb->stamp); | 328 | __net_timestamp(new_skb); |
329 | memcpy(skb_put(new_skb,skb->len),skb->data,skb->len); | 329 | memcpy(skb_put(new_skb,skb->len),skb->data,skb->len); |
330 | out_vcc->push(out_vcc,new_skb); | 330 | out_vcc->push(out_vcc,new_skb); |
331 | atomic_inc(&vcc->stats->tx); | 331 | atomic_inc(&vcc->stats->tx); |
diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c index 10da36934769..c13c4d736ef5 100644 --- a/drivers/atm/eni.c +++ b/drivers/atm/eni.c | |||
@@ -537,7 +537,7 @@ static int rx_aal0(struct atm_vcc *vcc) | |||
537 | return 0; | 537 | return 0; |
538 | } | 538 | } |
539 | skb_put(skb,length); | 539 | skb_put(skb,length); |
540 | skb->stamp = eni_vcc->timestamp; | 540 | skb_set_timestamp(skb, &eni_vcc->timestamp); |
541 | DPRINTK("got len %ld\n",length); | 541 | DPRINTK("got len %ld\n",length); |
542 | if (do_rx_dma(vcc,skb,1,length >> 2,length >> 2)) return 1; | 542 | if (do_rx_dma(vcc,skb,1,length >> 2,length >> 2)) return 1; |
543 | eni_vcc->rxing++; | 543 | eni_vcc->rxing++; |
diff --git a/drivers/atm/firestream.c b/drivers/atm/firestream.c index b078fa548ebf..58219744f5db 100644 --- a/drivers/atm/firestream.c +++ b/drivers/atm/firestream.c | |||
@@ -815,7 +815,7 @@ static void process_incoming (struct fs_dev *dev, struct queue *q) | |||
815 | skb_put (skb, qe->p1 & 0xffff); | 815 | skb_put (skb, qe->p1 & 0xffff); |
816 | ATM_SKB(skb)->vcc = atm_vcc; | 816 | ATM_SKB(skb)->vcc = atm_vcc; |
817 | atomic_inc(&atm_vcc->stats->rx); | 817 | atomic_inc(&atm_vcc->stats->rx); |
818 | do_gettimeofday(&skb->stamp); | 818 | __net_timestamp(skb); |
819 | fs_dprintk (FS_DEBUG_ALLOC, "Free rec-skb: %p (pushed)\n", skb); | 819 | fs_dprintk (FS_DEBUG_ALLOC, "Free rec-skb: %p (pushed)\n", skb); |
820 | atm_vcc->push (atm_vcc, skb); | 820 | atm_vcc->push (atm_vcc, skb); |
821 | fs_dprintk (FS_DEBUG_ALLOC, "Free rec-d: %p\n", pe); | 821 | fs_dprintk (FS_DEBUG_ALLOC, "Free rec-d: %p\n", pe); |
diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c index 5f702199543a..2bf723a7b6e6 100644 --- a/drivers/atm/fore200e.c +++ b/drivers/atm/fore200e.c | |||
@@ -1176,7 +1176,7 @@ fore200e_push_rpd(struct fore200e* fore200e, struct atm_vcc* vcc, struct rpd* rp | |||
1176 | return -ENOMEM; | 1176 | return -ENOMEM; |
1177 | } | 1177 | } |
1178 | 1178 | ||
1179 | do_gettimeofday(&skb->stamp); | 1179 | __net_timestamp(skb); |
1180 | 1180 | ||
1181 | #ifdef FORE200E_52BYTE_AAL0_SDU | 1181 | #ifdef FORE200E_52BYTE_AAL0_SDU |
1182 | if (cell_header) { | 1182 | if (cell_header) { |
diff --git a/drivers/atm/he.c b/drivers/atm/he.c index 28250c9b32d6..fde9334059af 100644 --- a/drivers/atm/he.c +++ b/drivers/atm/he.c | |||
@@ -1886,7 +1886,7 @@ he_service_rbrq(struct he_dev *he_dev, int group) | |||
1886 | if (rx_skb_reserve > 0) | 1886 | if (rx_skb_reserve > 0) |
1887 | skb_reserve(skb, rx_skb_reserve); | 1887 | skb_reserve(skb, rx_skb_reserve); |
1888 | 1888 | ||
1889 | do_gettimeofday(&skb->stamp); | 1889 | __net_timestamp(skb); |
1890 | 1890 | ||
1891 | for (iov = he_vcc->iov_head; | 1891 | for (iov = he_vcc->iov_head; |
1892 | iov < he_vcc->iov_tail; ++iov) { | 1892 | iov < he_vcc->iov_tail; ++iov) { |
diff --git a/drivers/atm/horizon.c b/drivers/atm/horizon.c index 924a2c8988bd..0cded0468003 100644 --- a/drivers/atm/horizon.c +++ b/drivers/atm/horizon.c | |||
@@ -1034,7 +1034,7 @@ static void rx_schedule (hrz_dev * dev, int irq) { | |||
1034 | struct atm_vcc * vcc = ATM_SKB(skb)->vcc; | 1034 | struct atm_vcc * vcc = ATM_SKB(skb)->vcc; |
1035 | // VC layer stats | 1035 | // VC layer stats |
1036 | atomic_inc(&vcc->stats->rx); | 1036 | atomic_inc(&vcc->stats->rx); |
1037 | do_gettimeofday(&skb->stamp); | 1037 | __net_timestamp(skb); |
1038 | // end of our responsability | 1038 | // end of our responsability |
1039 | vcc->push (vcc, skb); | 1039 | vcc->push (vcc, skb); |
1040 | } | 1040 | } |
diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c index 30b7e990ed0b..b4a76cade646 100644 --- a/drivers/atm/idt77252.c +++ b/drivers/atm/idt77252.c | |||
@@ -1101,7 +1101,7 @@ dequeue_rx(struct idt77252_dev *card, struct rsq_entry *rsqe) | |||
1101 | cell, ATM_CELL_PAYLOAD); | 1101 | cell, ATM_CELL_PAYLOAD); |
1102 | 1102 | ||
1103 | ATM_SKB(sb)->vcc = vcc; | 1103 | ATM_SKB(sb)->vcc = vcc; |
1104 | do_gettimeofday(&sb->stamp); | 1104 | __net_timestamp(sb); |
1105 | vcc->push(vcc, sb); | 1105 | vcc->push(vcc, sb); |
1106 | atomic_inc(&vcc->stats->rx); | 1106 | atomic_inc(&vcc->stats->rx); |
1107 | 1107 | ||
@@ -1179,7 +1179,7 @@ dequeue_rx(struct idt77252_dev *card, struct rsq_entry *rsqe) | |||
1179 | 1179 | ||
1180 | skb_trim(skb, len); | 1180 | skb_trim(skb, len); |
1181 | ATM_SKB(skb)->vcc = vcc; | 1181 | ATM_SKB(skb)->vcc = vcc; |
1182 | do_gettimeofday(&skb->stamp); | 1182 | __net_timestamp(skb); |
1183 | 1183 | ||
1184 | vcc->push(vcc, skb); | 1184 | vcc->push(vcc, skb); |
1185 | atomic_inc(&vcc->stats->rx); | 1185 | atomic_inc(&vcc->stats->rx); |
@@ -1201,7 +1201,7 @@ dequeue_rx(struct idt77252_dev *card, struct rsq_entry *rsqe) | |||
1201 | 1201 | ||
1202 | skb_trim(skb, len); | 1202 | skb_trim(skb, len); |
1203 | ATM_SKB(skb)->vcc = vcc; | 1203 | ATM_SKB(skb)->vcc = vcc; |
1204 | do_gettimeofday(&skb->stamp); | 1204 | __net_timestamp(skb); |
1205 | 1205 | ||
1206 | vcc->push(vcc, skb); | 1206 | vcc->push(vcc, skb); |
1207 | atomic_inc(&vcc->stats->rx); | 1207 | atomic_inc(&vcc->stats->rx); |
@@ -1340,7 +1340,7 @@ idt77252_rx_raw(struct idt77252_dev *card) | |||
1340 | ATM_CELL_PAYLOAD); | 1340 | ATM_CELL_PAYLOAD); |
1341 | 1341 | ||
1342 | ATM_SKB(sb)->vcc = vcc; | 1342 | ATM_SKB(sb)->vcc = vcc; |
1343 | do_gettimeofday(&sb->stamp); | 1343 | __net_timestamp(sb); |
1344 | vcc->push(vcc, sb); | 1344 | vcc->push(vcc, sb); |
1345 | atomic_inc(&vcc->stats->rx); | 1345 | atomic_inc(&vcc->stats->rx); |
1346 | 1346 | ||
diff --git a/drivers/atm/lanai.c b/drivers/atm/lanai.c index ffe3afa723b8..51ec14787293 100644 --- a/drivers/atm/lanai.c +++ b/drivers/atm/lanai.c | |||
@@ -1427,7 +1427,7 @@ static void vcc_rx_aal5(struct lanai_vcc *lvcc, int endptr) | |||
1427 | skb_put(skb, size); | 1427 | skb_put(skb, size); |
1428 | vcc_rx_memcpy(skb->data, lvcc, size); | 1428 | vcc_rx_memcpy(skb->data, lvcc, size); |
1429 | ATM_SKB(skb)->vcc = lvcc->rx.atmvcc; | 1429 | ATM_SKB(skb)->vcc = lvcc->rx.atmvcc; |
1430 | do_gettimeofday(&skb->stamp); | 1430 | __net_timestamp(skb); |
1431 | lvcc->rx.atmvcc->push(lvcc->rx.atmvcc, skb); | 1431 | lvcc->rx.atmvcc->push(lvcc->rx.atmvcc, skb); |
1432 | atomic_inc(&lvcc->rx.atmvcc->stats->rx); | 1432 | atomic_inc(&lvcc->rx.atmvcc->stats->rx); |
1433 | out: | 1433 | out: |
diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c index b2a7b754fd14..c57e20dcb0f8 100644 --- a/drivers/atm/nicstar.c +++ b/drivers/atm/nicstar.c | |||
@@ -214,8 +214,7 @@ static int __devinit ns_init_card(int i, struct pci_dev *pcidev); | |||
214 | static void __devinit ns_init_card_error(ns_dev *card, int error); | 214 | static void __devinit ns_init_card_error(ns_dev *card, int error); |
215 | static scq_info *get_scq(int size, u32 scd); | 215 | static scq_info *get_scq(int size, u32 scd); |
216 | static void free_scq(scq_info *scq, struct atm_vcc *vcc); | 216 | static void free_scq(scq_info *scq, struct atm_vcc *vcc); |
217 | static void push_rxbufs(ns_dev *card, u32 type, u32 handle1, u32 addr1, | 217 | static void push_rxbufs(ns_dev *, struct sk_buff *); |
218 | u32 handle2, u32 addr2); | ||
219 | static irqreturn_t ns_irq_handler(int irq, void *dev_id, struct pt_regs *regs); | 218 | static irqreturn_t ns_irq_handler(int irq, void *dev_id, struct pt_regs *regs); |
220 | static int ns_open(struct atm_vcc *vcc); | 219 | static int ns_open(struct atm_vcc *vcc); |
221 | static void ns_close(struct atm_vcc *vcc); | 220 | static void ns_close(struct atm_vcc *vcc); |
@@ -766,6 +765,7 @@ static int __devinit ns_init_card(int i, struct pci_dev *pcidev) | |||
766 | ns_init_card_error(card, error); | 765 | ns_init_card_error(card, error); |
767 | return error; | 766 | return error; |
768 | } | 767 | } |
768 | NS_SKB_CB(hb)->buf_type = BUF_NONE; | ||
769 | skb_queue_tail(&card->hbpool.queue, hb); | 769 | skb_queue_tail(&card->hbpool.queue, hb); |
770 | card->hbpool.count++; | 770 | card->hbpool.count++; |
771 | } | 771 | } |
@@ -786,9 +786,10 @@ static int __devinit ns_init_card(int i, struct pci_dev *pcidev) | |||
786 | ns_init_card_error(card, error); | 786 | ns_init_card_error(card, error); |
787 | return error; | 787 | return error; |
788 | } | 788 | } |
789 | NS_SKB_CB(lb)->buf_type = BUF_LG; | ||
789 | skb_queue_tail(&card->lbpool.queue, lb); | 790 | skb_queue_tail(&card->lbpool.queue, lb); |
790 | skb_reserve(lb, NS_SMBUFSIZE); | 791 | skb_reserve(lb, NS_SMBUFSIZE); |
791 | push_rxbufs(card, BUF_LG, (u32) lb, (u32) virt_to_bus(lb->data), 0, 0); | 792 | push_rxbufs(card, lb); |
792 | /* Due to the implementation of push_rxbufs() this is 1, not 0 */ | 793 | /* Due to the implementation of push_rxbufs() this is 1, not 0 */ |
793 | if (j == 1) | 794 | if (j == 1) |
794 | { | 795 | { |
@@ -822,9 +823,10 @@ static int __devinit ns_init_card(int i, struct pci_dev *pcidev) | |||
822 | ns_init_card_error(card, error); | 823 | ns_init_card_error(card, error); |
823 | return error; | 824 | return error; |
824 | } | 825 | } |
826 | NS_SKB_CB(sb)->buf_type = BUF_SM; | ||
825 | skb_queue_tail(&card->sbpool.queue, sb); | 827 | skb_queue_tail(&card->sbpool.queue, sb); |
826 | skb_reserve(sb, NS_AAL0_HEADER); | 828 | skb_reserve(sb, NS_AAL0_HEADER); |
827 | push_rxbufs(card, BUF_SM, (u32) sb, (u32) virt_to_bus(sb->data), 0, 0); | 829 | push_rxbufs(card, sb); |
828 | } | 830 | } |
829 | /* Test for strange behaviour which leads to crashes */ | 831 | /* Test for strange behaviour which leads to crashes */ |
830 | if ((bcount = ns_stat_sfbqc_get(readl(card->membase + STAT))) < card->sbnr.min) | 832 | if ((bcount = ns_stat_sfbqc_get(readl(card->membase + STAT))) < card->sbnr.min) |
@@ -852,6 +854,7 @@ static int __devinit ns_init_card(int i, struct pci_dev *pcidev) | |||
852 | ns_init_card_error(card, error); | 854 | ns_init_card_error(card, error); |
853 | return error; | 855 | return error; |
854 | } | 856 | } |
857 | NS_SKB_CB(iovb)->buf_type = BUF_NONE; | ||
855 | skb_queue_tail(&card->iovpool.queue, iovb); | 858 | skb_queue_tail(&card->iovpool.queue, iovb); |
856 | card->iovpool.count++; | 859 | card->iovpool.count++; |
857 | } | 860 | } |
@@ -1078,12 +1081,18 @@ static void free_scq(scq_info *scq, struct atm_vcc *vcc) | |||
1078 | 1081 | ||
1079 | /* The handles passed must be pointers to the sk_buff containing the small | 1082 | /* The handles passed must be pointers to the sk_buff containing the small |
1080 | or large buffer(s) cast to u32. */ | 1083 | or large buffer(s) cast to u32. */ |
1081 | static void push_rxbufs(ns_dev *card, u32 type, u32 handle1, u32 addr1, | 1084 | static void push_rxbufs(ns_dev *card, struct sk_buff *skb) |
1082 | u32 handle2, u32 addr2) | ||
1083 | { | 1085 | { |
1086 | struct ns_skb_cb *cb = NS_SKB_CB(skb); | ||
1087 | u32 handle1, addr1; | ||
1088 | u32 handle2, addr2; | ||
1084 | u32 stat; | 1089 | u32 stat; |
1085 | unsigned long flags; | 1090 | unsigned long flags; |
1086 | 1091 | ||
1092 | /* *BARF* */ | ||
1093 | handle2 = addr2 = 0; | ||
1094 | handle1 = (u32)skb; | ||
1095 | addr1 = (u32)virt_to_bus(skb->data); | ||
1087 | 1096 | ||
1088 | #ifdef GENERAL_DEBUG | 1097 | #ifdef GENERAL_DEBUG |
1089 | if (!addr1) | 1098 | if (!addr1) |
@@ -1093,7 +1102,7 @@ static void push_rxbufs(ns_dev *card, u32 type, u32 handle1, u32 addr1, | |||
1093 | stat = readl(card->membase + STAT); | 1102 | stat = readl(card->membase + STAT); |
1094 | card->sbfqc = ns_stat_sfbqc_get(stat); | 1103 | card->sbfqc = ns_stat_sfbqc_get(stat); |
1095 | card->lbfqc = ns_stat_lfbqc_get(stat); | 1104 | card->lbfqc = ns_stat_lfbqc_get(stat); |
1096 | if (type == BUF_SM) | 1105 | if (cb->buf_type == BUF_SM) |
1097 | { | 1106 | { |
1098 | if (!addr2) | 1107 | if (!addr2) |
1099 | { | 1108 | { |
@@ -1111,7 +1120,7 @@ static void push_rxbufs(ns_dev *card, u32 type, u32 handle1, u32 addr1, | |||
1111 | } | 1120 | } |
1112 | } | 1121 | } |
1113 | } | 1122 | } |
1114 | else /* type == BUF_LG */ | 1123 | else /* buf_type == BUF_LG */ |
1115 | { | 1124 | { |
1116 | if (!addr2) | 1125 | if (!addr2) |
1117 | { | 1126 | { |
@@ -1132,26 +1141,26 @@ static void push_rxbufs(ns_dev *card, u32 type, u32 handle1, u32 addr1, | |||
1132 | 1141 | ||
1133 | if (addr2) | 1142 | if (addr2) |
1134 | { | 1143 | { |
1135 | if (type == BUF_SM) | 1144 | if (cb->buf_type == BUF_SM) |
1136 | { | 1145 | { |
1137 | if (card->sbfqc >= card->sbnr.max) | 1146 | if (card->sbfqc >= card->sbnr.max) |
1138 | { | 1147 | { |
1139 | skb_unlink((struct sk_buff *) handle1); | 1148 | skb_unlink((struct sk_buff *) handle1, &card->sbpool.queue); |
1140 | dev_kfree_skb_any((struct sk_buff *) handle1); | 1149 | dev_kfree_skb_any((struct sk_buff *) handle1); |
1141 | skb_unlink((struct sk_buff *) handle2); | 1150 | skb_unlink((struct sk_buff *) handle2, &card->sbpool.queue); |
1142 | dev_kfree_skb_any((struct sk_buff *) handle2); | 1151 | dev_kfree_skb_any((struct sk_buff *) handle2); |
1143 | return; | 1152 | return; |
1144 | } | 1153 | } |
1145 | else | 1154 | else |
1146 | card->sbfqc += 2; | 1155 | card->sbfqc += 2; |
1147 | } | 1156 | } |
1148 | else /* (type == BUF_LG) */ | 1157 | else /* (buf_type == BUF_LG) */ |
1149 | { | 1158 | { |
1150 | if (card->lbfqc >= card->lbnr.max) | 1159 | if (card->lbfqc >= card->lbnr.max) |
1151 | { | 1160 | { |
1152 | skb_unlink((struct sk_buff *) handle1); | 1161 | skb_unlink((struct sk_buff *) handle1, &card->lbpool.queue); |
1153 | dev_kfree_skb_any((struct sk_buff *) handle1); | 1162 | dev_kfree_skb_any((struct sk_buff *) handle1); |
1154 | skb_unlink((struct sk_buff *) handle2); | 1163 | skb_unlink((struct sk_buff *) handle2, &card->lbpool.queue); |
1155 | dev_kfree_skb_any((struct sk_buff *) handle2); | 1164 | dev_kfree_skb_any((struct sk_buff *) handle2); |
1156 | return; | 1165 | return; |
1157 | } | 1166 | } |
@@ -1166,12 +1175,12 @@ static void push_rxbufs(ns_dev *card, u32 type, u32 handle1, u32 addr1, | |||
1166 | writel(handle2, card->membase + DR2); | 1175 | writel(handle2, card->membase + DR2); |
1167 | writel(addr1, card->membase + DR1); | 1176 | writel(addr1, card->membase + DR1); |
1168 | writel(handle1, card->membase + DR0); | 1177 | writel(handle1, card->membase + DR0); |
1169 | writel(NS_CMD_WRITE_FREEBUFQ | (u32) type, card->membase + CMD); | 1178 | writel(NS_CMD_WRITE_FREEBUFQ | cb->buf_type, card->membase + CMD); |
1170 | 1179 | ||
1171 | spin_unlock_irqrestore(&card->res_lock, flags); | 1180 | spin_unlock_irqrestore(&card->res_lock, flags); |
1172 | 1181 | ||
1173 | XPRINTK("nicstar%d: Pushing %s buffers at 0x%x and 0x%x.\n", card->index, | 1182 | XPRINTK("nicstar%d: Pushing %s buffers at 0x%x and 0x%x.\n", card->index, |
1174 | (type == BUF_SM ? "small" : "large"), addr1, addr2); | 1183 | (cb->buf_type == BUF_SM ? "small" : "large"), addr1, addr2); |
1175 | } | 1184 | } |
1176 | 1185 | ||
1177 | if (!card->efbie && card->sbfqc >= card->sbnr.min && | 1186 | if (!card->efbie && card->sbfqc >= card->sbnr.min && |
@@ -1322,9 +1331,10 @@ static irqreturn_t ns_irq_handler(int irq, void *dev_id, struct pt_regs *regs) | |||
1322 | card->efbie = 0; | 1331 | card->efbie = 0; |
1323 | break; | 1332 | break; |
1324 | } | 1333 | } |
1334 | NS_SKB_CB(sb)->buf_type = BUF_SM; | ||
1325 | skb_queue_tail(&card->sbpool.queue, sb); | 1335 | skb_queue_tail(&card->sbpool.queue, sb); |
1326 | skb_reserve(sb, NS_AAL0_HEADER); | 1336 | skb_reserve(sb, NS_AAL0_HEADER); |
1327 | push_rxbufs(card, BUF_SM, (u32) sb, (u32) virt_to_bus(sb->data), 0, 0); | 1337 | push_rxbufs(card, sb); |
1328 | } | 1338 | } |
1329 | card->sbfqc = i; | 1339 | card->sbfqc = i; |
1330 | process_rsq(card); | 1340 | process_rsq(card); |
@@ -1348,9 +1358,10 @@ static irqreturn_t ns_irq_handler(int irq, void *dev_id, struct pt_regs *regs) | |||
1348 | card->efbie = 0; | 1358 | card->efbie = 0; |
1349 | break; | 1359 | break; |
1350 | } | 1360 | } |
1361 | NS_SKB_CB(lb)->buf_type = BUF_LG; | ||
1351 | skb_queue_tail(&card->lbpool.queue, lb); | 1362 | skb_queue_tail(&card->lbpool.queue, lb); |
1352 | skb_reserve(lb, NS_SMBUFSIZE); | 1363 | skb_reserve(lb, NS_SMBUFSIZE); |
1353 | push_rxbufs(card, BUF_LG, (u32) lb, (u32) virt_to_bus(lb->data), 0, 0); | 1364 | push_rxbufs(card, lb); |
1354 | } | 1365 | } |
1355 | card->lbfqc = i; | 1366 | card->lbfqc = i; |
1356 | process_rsq(card); | 1367 | process_rsq(card); |
@@ -2202,7 +2213,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) | |||
2202 | memcpy(sb->tail, cell, ATM_CELL_PAYLOAD); | 2213 | memcpy(sb->tail, cell, ATM_CELL_PAYLOAD); |
2203 | skb_put(sb, ATM_CELL_PAYLOAD); | 2214 | skb_put(sb, ATM_CELL_PAYLOAD); |
2204 | ATM_SKB(sb)->vcc = vcc; | 2215 | ATM_SKB(sb)->vcc = vcc; |
2205 | do_gettimeofday(&sb->stamp); | 2216 | __net_timestamp(sb); |
2206 | vcc->push(vcc, sb); | 2217 | vcc->push(vcc, sb); |
2207 | atomic_inc(&vcc->stats->rx); | 2218 | atomic_inc(&vcc->stats->rx); |
2208 | cell += ATM_CELL_PAYLOAD; | 2219 | cell += ATM_CELL_PAYLOAD; |
@@ -2227,6 +2238,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) | |||
2227 | recycle_rx_buf(card, skb); | 2238 | recycle_rx_buf(card, skb); |
2228 | return; | 2239 | return; |
2229 | } | 2240 | } |
2241 | NS_SKB_CB(iovb)->buf_type = BUF_NONE; | ||
2230 | } | 2242 | } |
2231 | else | 2243 | else |
2232 | if (--card->iovpool.count < card->iovnr.min) | 2244 | if (--card->iovpool.count < card->iovnr.min) |
@@ -2234,6 +2246,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) | |||
2234 | struct sk_buff *new_iovb; | 2246 | struct sk_buff *new_iovb; |
2235 | if ((new_iovb = alloc_skb(NS_IOVBUFSIZE, GFP_ATOMIC)) != NULL) | 2247 | if ((new_iovb = alloc_skb(NS_IOVBUFSIZE, GFP_ATOMIC)) != NULL) |
2236 | { | 2248 | { |
2249 | NS_SKB_CB(iovb)->buf_type = BUF_NONE; | ||
2237 | skb_queue_tail(&card->iovpool.queue, new_iovb); | 2250 | skb_queue_tail(&card->iovpool.queue, new_iovb); |
2238 | card->iovpool.count++; | 2251 | card->iovpool.count++; |
2239 | } | 2252 | } |
@@ -2264,7 +2277,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) | |||
2264 | 2277 | ||
2265 | if (NS_SKB(iovb)->iovcnt == 1) | 2278 | if (NS_SKB(iovb)->iovcnt == 1) |
2266 | { | 2279 | { |
2267 | if (skb->list != &card->sbpool.queue) | 2280 | if (NS_SKB_CB(skb)->buf_type != BUF_SM) |
2268 | { | 2281 | { |
2269 | printk("nicstar%d: Expected a small buffer, and this is not one.\n", | 2282 | printk("nicstar%d: Expected a small buffer, and this is not one.\n", |
2270 | card->index); | 2283 | card->index); |
@@ -2278,7 +2291,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) | |||
2278 | } | 2291 | } |
2279 | else /* NS_SKB(iovb)->iovcnt >= 2 */ | 2292 | else /* NS_SKB(iovb)->iovcnt >= 2 */ |
2280 | { | 2293 | { |
2281 | if (skb->list != &card->lbpool.queue) | 2294 | if (NS_SKB_CB(skb)->buf_type != BUF_LG) |
2282 | { | 2295 | { |
2283 | printk("nicstar%d: Expected a large buffer, and this is not one.\n", | 2296 | printk("nicstar%d: Expected a large buffer, and this is not one.\n", |
2284 | card->index); | 2297 | card->index); |
@@ -2322,8 +2335,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) | |||
2322 | /* skb points to a small buffer */ | 2335 | /* skb points to a small buffer */ |
2323 | if (!atm_charge(vcc, skb->truesize)) | 2336 | if (!atm_charge(vcc, skb->truesize)) |
2324 | { | 2337 | { |
2325 | push_rxbufs(card, BUF_SM, (u32) skb, (u32) virt_to_bus(skb->data), | 2338 | push_rxbufs(card, skb); |
2326 | 0, 0); | ||
2327 | atomic_inc(&vcc->stats->rx_drop); | 2339 | atomic_inc(&vcc->stats->rx_drop); |
2328 | } | 2340 | } |
2329 | else | 2341 | else |
@@ -2334,7 +2346,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) | |||
2334 | skb->destructor = ns_sb_destructor; | 2346 | skb->destructor = ns_sb_destructor; |
2335 | #endif /* NS_USE_DESTRUCTORS */ | 2347 | #endif /* NS_USE_DESTRUCTORS */ |
2336 | ATM_SKB(skb)->vcc = vcc; | 2348 | ATM_SKB(skb)->vcc = vcc; |
2337 | do_gettimeofday(&skb->stamp); | 2349 | __net_timestamp(skb); |
2338 | vcc->push(vcc, skb); | 2350 | vcc->push(vcc, skb); |
2339 | atomic_inc(&vcc->stats->rx); | 2351 | atomic_inc(&vcc->stats->rx); |
2340 | } | 2352 | } |
@@ -2350,8 +2362,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) | |||
2350 | { | 2362 | { |
2351 | if (!atm_charge(vcc, sb->truesize)) | 2363 | if (!atm_charge(vcc, sb->truesize)) |
2352 | { | 2364 | { |
2353 | push_rxbufs(card, BUF_SM, (u32) sb, (u32) virt_to_bus(sb->data), | 2365 | push_rxbufs(card, sb); |
2354 | 0, 0); | ||
2355 | atomic_inc(&vcc->stats->rx_drop); | 2366 | atomic_inc(&vcc->stats->rx_drop); |
2356 | } | 2367 | } |
2357 | else | 2368 | else |
@@ -2362,21 +2373,19 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) | |||
2362 | sb->destructor = ns_sb_destructor; | 2373 | sb->destructor = ns_sb_destructor; |
2363 | #endif /* NS_USE_DESTRUCTORS */ | 2374 | #endif /* NS_USE_DESTRUCTORS */ |
2364 | ATM_SKB(sb)->vcc = vcc; | 2375 | ATM_SKB(sb)->vcc = vcc; |
2365 | do_gettimeofday(&sb->stamp); | 2376 | __net_timestamp(sb); |
2366 | vcc->push(vcc, sb); | 2377 | vcc->push(vcc, sb); |
2367 | atomic_inc(&vcc->stats->rx); | 2378 | atomic_inc(&vcc->stats->rx); |
2368 | } | 2379 | } |
2369 | 2380 | ||
2370 | push_rxbufs(card, BUF_LG, (u32) skb, | 2381 | push_rxbufs(card, skb); |
2371 | (u32) virt_to_bus(skb->data), 0, 0); | ||
2372 | 2382 | ||
2373 | } | 2383 | } |
2374 | else /* len > NS_SMBUFSIZE, the usual case */ | 2384 | else /* len > NS_SMBUFSIZE, the usual case */ |
2375 | { | 2385 | { |
2376 | if (!atm_charge(vcc, skb->truesize)) | 2386 | if (!atm_charge(vcc, skb->truesize)) |
2377 | { | 2387 | { |
2378 | push_rxbufs(card, BUF_LG, (u32) skb, | 2388 | push_rxbufs(card, skb); |
2379 | (u32) virt_to_bus(skb->data), 0, 0); | ||
2380 | atomic_inc(&vcc->stats->rx_drop); | 2389 | atomic_inc(&vcc->stats->rx_drop); |
2381 | } | 2390 | } |
2382 | else | 2391 | else |
@@ -2389,13 +2398,12 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) | |||
2389 | memcpy(skb->data, sb->data, NS_SMBUFSIZE); | 2398 | memcpy(skb->data, sb->data, NS_SMBUFSIZE); |
2390 | skb_put(skb, len - NS_SMBUFSIZE); | 2399 | skb_put(skb, len - NS_SMBUFSIZE); |
2391 | ATM_SKB(skb)->vcc = vcc; | 2400 | ATM_SKB(skb)->vcc = vcc; |
2392 | do_gettimeofday(&skb->stamp); | 2401 | __net_timestamp(skb); |
2393 | vcc->push(vcc, skb); | 2402 | vcc->push(vcc, skb); |
2394 | atomic_inc(&vcc->stats->rx); | 2403 | atomic_inc(&vcc->stats->rx); |
2395 | } | 2404 | } |
2396 | 2405 | ||
2397 | push_rxbufs(card, BUF_SM, (u32) sb, (u32) virt_to_bus(sb->data), | 2406 | push_rxbufs(card, sb); |
2398 | 0, 0); | ||
2399 | 2407 | ||
2400 | } | 2408 | } |
2401 | 2409 | ||
@@ -2430,6 +2438,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) | |||
2430 | card->hbpool.count++; | 2438 | card->hbpool.count++; |
2431 | } | 2439 | } |
2432 | } | 2440 | } |
2441 | NS_SKB_CB(hb)->buf_type = BUF_NONE; | ||
2433 | } | 2442 | } |
2434 | else | 2443 | else |
2435 | if (--card->hbpool.count < card->hbnr.min) | 2444 | if (--card->hbpool.count < card->hbnr.min) |
@@ -2437,6 +2446,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) | |||
2437 | struct sk_buff *new_hb; | 2446 | struct sk_buff *new_hb; |
2438 | if ((new_hb = dev_alloc_skb(NS_HBUFSIZE)) != NULL) | 2447 | if ((new_hb = dev_alloc_skb(NS_HBUFSIZE)) != NULL) |
2439 | { | 2448 | { |
2449 | NS_SKB_CB(new_hb)->buf_type = BUF_NONE; | ||
2440 | skb_queue_tail(&card->hbpool.queue, new_hb); | 2450 | skb_queue_tail(&card->hbpool.queue, new_hb); |
2441 | card->hbpool.count++; | 2451 | card->hbpool.count++; |
2442 | } | 2452 | } |
@@ -2444,6 +2454,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) | |||
2444 | { | 2454 | { |
2445 | if ((new_hb = dev_alloc_skb(NS_HBUFSIZE)) != NULL) | 2455 | if ((new_hb = dev_alloc_skb(NS_HBUFSIZE)) != NULL) |
2446 | { | 2456 | { |
2457 | NS_SKB_CB(new_hb)->buf_type = BUF_NONE; | ||
2447 | skb_queue_tail(&card->hbpool.queue, new_hb); | 2458 | skb_queue_tail(&card->hbpool.queue, new_hb); |
2448 | card->hbpool.count++; | 2459 | card->hbpool.count++; |
2449 | } | 2460 | } |
@@ -2473,8 +2484,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) | |||
2473 | remaining = len - iov->iov_len; | 2484 | remaining = len - iov->iov_len; |
2474 | iov++; | 2485 | iov++; |
2475 | /* Free the small buffer */ | 2486 | /* Free the small buffer */ |
2476 | push_rxbufs(card, BUF_SM, (u32) sb, (u32) virt_to_bus(sb->data), | 2487 | push_rxbufs(card, sb); |
2477 | 0, 0); | ||
2478 | 2488 | ||
2479 | /* Copy all large buffers to the huge buffer and free them */ | 2489 | /* Copy all large buffers to the huge buffer and free them */ |
2480 | for (j = 1; j < NS_SKB(iovb)->iovcnt; j++) | 2490 | for (j = 1; j < NS_SKB(iovb)->iovcnt; j++) |
@@ -2485,8 +2495,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) | |||
2485 | skb_put(hb, tocopy); | 2495 | skb_put(hb, tocopy); |
2486 | iov++; | 2496 | iov++; |
2487 | remaining -= tocopy; | 2497 | remaining -= tocopy; |
2488 | push_rxbufs(card, BUF_LG, (u32) lb, | 2498 | push_rxbufs(card, lb); |
2489 | (u32) virt_to_bus(lb->data), 0, 0); | ||
2490 | } | 2499 | } |
2491 | #ifdef EXTRA_DEBUG | 2500 | #ifdef EXTRA_DEBUG |
2492 | if (remaining != 0 || hb->len != len) | 2501 | if (remaining != 0 || hb->len != len) |
@@ -2496,7 +2505,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) | |||
2496 | #ifdef NS_USE_DESTRUCTORS | 2505 | #ifdef NS_USE_DESTRUCTORS |
2497 | hb->destructor = ns_hb_destructor; | 2506 | hb->destructor = ns_hb_destructor; |
2498 | #endif /* NS_USE_DESTRUCTORS */ | 2507 | #endif /* NS_USE_DESTRUCTORS */ |
2499 | do_gettimeofday(&hb->stamp); | 2508 | __net_timestamp(hb); |
2500 | vcc->push(vcc, hb); | 2509 | vcc->push(vcc, hb); |
2501 | atomic_inc(&vcc->stats->rx); | 2510 | atomic_inc(&vcc->stats->rx); |
2502 | } | 2511 | } |
@@ -2527,9 +2536,10 @@ static void ns_sb_destructor(struct sk_buff *sb) | |||
2527 | sb = __dev_alloc_skb(NS_SMSKBSIZE, GFP_KERNEL); | 2536 | sb = __dev_alloc_skb(NS_SMSKBSIZE, GFP_KERNEL); |
2528 | if (sb == NULL) | 2537 | if (sb == NULL) |
2529 | break; | 2538 | break; |
2539 | NS_SKB_CB(sb)->buf_type = BUF_SM; | ||
2530 | skb_queue_tail(&card->sbpool.queue, sb); | 2540 | skb_queue_tail(&card->sbpool.queue, sb); |
2531 | skb_reserve(sb, NS_AAL0_HEADER); | 2541 | skb_reserve(sb, NS_AAL0_HEADER); |
2532 | push_rxbufs(card, BUF_SM, (u32) sb, (u32) virt_to_bus(sb->data), 0, 0); | 2542 | push_rxbufs(card, sb); |
2533 | } while (card->sbfqc < card->sbnr.min); | 2543 | } while (card->sbfqc < card->sbnr.min); |
2534 | } | 2544 | } |
2535 | 2545 | ||
@@ -2550,9 +2560,10 @@ static void ns_lb_destructor(struct sk_buff *lb) | |||
2550 | lb = __dev_alloc_skb(NS_LGSKBSIZE, GFP_KERNEL); | 2560 | lb = __dev_alloc_skb(NS_LGSKBSIZE, GFP_KERNEL); |
2551 | if (lb == NULL) | 2561 | if (lb == NULL) |
2552 | break; | 2562 | break; |
2563 | NS_SKB_CB(lb)->buf_type = BUF_LG; | ||
2553 | skb_queue_tail(&card->lbpool.queue, lb); | 2564 | skb_queue_tail(&card->lbpool.queue, lb); |
2554 | skb_reserve(lb, NS_SMBUFSIZE); | 2565 | skb_reserve(lb, NS_SMBUFSIZE); |
2555 | push_rxbufs(card, BUF_LG, (u32) lb, (u32) virt_to_bus(lb->data), 0, 0); | 2566 | push_rxbufs(card, lb); |
2556 | } while (card->lbfqc < card->lbnr.min); | 2567 | } while (card->lbfqc < card->lbnr.min); |
2557 | } | 2568 | } |
2558 | 2569 | ||
@@ -2569,6 +2580,7 @@ static void ns_hb_destructor(struct sk_buff *hb) | |||
2569 | hb = __dev_alloc_skb(NS_HBUFSIZE, GFP_KERNEL); | 2580 | hb = __dev_alloc_skb(NS_HBUFSIZE, GFP_KERNEL); |
2570 | if (hb == NULL) | 2581 | if (hb == NULL) |
2571 | break; | 2582 | break; |
2583 | NS_SKB_CB(hb)->buf_type = BUF_NONE; | ||
2572 | skb_queue_tail(&card->hbpool.queue, hb); | 2584 | skb_queue_tail(&card->hbpool.queue, hb); |
2573 | card->hbpool.count++; | 2585 | card->hbpool.count++; |
2574 | } | 2586 | } |
@@ -2577,45 +2589,25 @@ static void ns_hb_destructor(struct sk_buff *hb) | |||
2577 | #endif /* NS_USE_DESTRUCTORS */ | 2589 | #endif /* NS_USE_DESTRUCTORS */ |
2578 | 2590 | ||
2579 | 2591 | ||
2580 | |||
2581 | static void recycle_rx_buf(ns_dev *card, struct sk_buff *skb) | 2592 | static void recycle_rx_buf(ns_dev *card, struct sk_buff *skb) |
2582 | { | 2593 | { |
2583 | if (skb->list == &card->sbpool.queue) | 2594 | struct ns_skb_cb *cb = NS_SKB_CB(skb); |
2584 | push_rxbufs(card, BUF_SM, (u32) skb, (u32) virt_to_bus(skb->data), 0, 0); | ||
2585 | else if (skb->list == &card->lbpool.queue) | ||
2586 | push_rxbufs(card, BUF_LG, (u32) skb, (u32) virt_to_bus(skb->data), 0, 0); | ||
2587 | else | ||
2588 | { | ||
2589 | printk("nicstar%d: What kind of rx buffer is this?\n", card->index); | ||
2590 | dev_kfree_skb_any(skb); | ||
2591 | } | ||
2592 | } | ||
2593 | 2595 | ||
2596 | if (unlikely(cb->buf_type == BUF_NONE)) { | ||
2597 | printk("nicstar%d: What kind of rx buffer is this?\n", card->index); | ||
2598 | dev_kfree_skb_any(skb); | ||
2599 | } else | ||
2600 | push_rxbufs(card, skb); | ||
2601 | } | ||
2594 | 2602 | ||
2595 | 2603 | ||
2596 | static void recycle_iovec_rx_bufs(ns_dev *card, struct iovec *iov, int count) | 2604 | static void recycle_iovec_rx_bufs(ns_dev *card, struct iovec *iov, int count) |
2597 | { | 2605 | { |
2598 | struct sk_buff *skb; | 2606 | while (count-- > 0) |
2599 | 2607 | recycle_rx_buf(card, (struct sk_buff *) (iov++)->iov_base); | |
2600 | for (; count > 0; count--) | ||
2601 | { | ||
2602 | skb = (struct sk_buff *) (iov++)->iov_base; | ||
2603 | if (skb->list == &card->sbpool.queue) | ||
2604 | push_rxbufs(card, BUF_SM, (u32) skb, (u32) virt_to_bus(skb->data), | ||
2605 | 0, 0); | ||
2606 | else if (skb->list == &card->lbpool.queue) | ||
2607 | push_rxbufs(card, BUF_LG, (u32) skb, (u32) virt_to_bus(skb->data), | ||
2608 | 0, 0); | ||
2609 | else | ||
2610 | { | ||
2611 | printk("nicstar%d: What kind of rx buffer is this?\n", card->index); | ||
2612 | dev_kfree_skb_any(skb); | ||
2613 | } | ||
2614 | } | ||
2615 | } | 2608 | } |
2616 | 2609 | ||
2617 | 2610 | ||
2618 | |||
2619 | static void recycle_iov_buf(ns_dev *card, struct sk_buff *iovb) | 2611 | static void recycle_iov_buf(ns_dev *card, struct sk_buff *iovb) |
2620 | { | 2612 | { |
2621 | if (card->iovpool.count < card->iovnr.max) | 2613 | if (card->iovpool.count < card->iovnr.max) |
@@ -2631,7 +2623,7 @@ static void recycle_iov_buf(ns_dev *card, struct sk_buff *iovb) | |||
2631 | 2623 | ||
2632 | static void dequeue_sm_buf(ns_dev *card, struct sk_buff *sb) | 2624 | static void dequeue_sm_buf(ns_dev *card, struct sk_buff *sb) |
2633 | { | 2625 | { |
2634 | skb_unlink(sb); | 2626 | skb_unlink(sb, &card->sbpool.queue); |
2635 | #ifdef NS_USE_DESTRUCTORS | 2627 | #ifdef NS_USE_DESTRUCTORS |
2636 | if (card->sbfqc < card->sbnr.min) | 2628 | if (card->sbfqc < card->sbnr.min) |
2637 | #else | 2629 | #else |
@@ -2640,10 +2632,10 @@ static void dequeue_sm_buf(ns_dev *card, struct sk_buff *sb) | |||
2640 | struct sk_buff *new_sb; | 2632 | struct sk_buff *new_sb; |
2641 | if ((new_sb = dev_alloc_skb(NS_SMSKBSIZE)) != NULL) | 2633 | if ((new_sb = dev_alloc_skb(NS_SMSKBSIZE)) != NULL) |
2642 | { | 2634 | { |
2635 | NS_SKB_CB(new_sb)->buf_type = BUF_SM; | ||
2643 | skb_queue_tail(&card->sbpool.queue, new_sb); | 2636 | skb_queue_tail(&card->sbpool.queue, new_sb); |
2644 | skb_reserve(new_sb, NS_AAL0_HEADER); | 2637 | skb_reserve(new_sb, NS_AAL0_HEADER); |
2645 | push_rxbufs(card, BUF_SM, (u32) new_sb, | 2638 | push_rxbufs(card, new_sb); |
2646 | (u32) virt_to_bus(new_sb->data), 0, 0); | ||
2647 | } | 2639 | } |
2648 | } | 2640 | } |
2649 | if (card->sbfqc < card->sbnr.init) | 2641 | if (card->sbfqc < card->sbnr.init) |
@@ -2652,10 +2644,10 @@ static void dequeue_sm_buf(ns_dev *card, struct sk_buff *sb) | |||
2652 | struct sk_buff *new_sb; | 2644 | struct sk_buff *new_sb; |
2653 | if ((new_sb = dev_alloc_skb(NS_SMSKBSIZE)) != NULL) | 2645 | if ((new_sb = dev_alloc_skb(NS_SMSKBSIZE)) != NULL) |
2654 | { | 2646 | { |
2647 | NS_SKB_CB(new_sb)->buf_type = BUF_SM; | ||
2655 | skb_queue_tail(&card->sbpool.queue, new_sb); | 2648 | skb_queue_tail(&card->sbpool.queue, new_sb); |
2656 | skb_reserve(new_sb, NS_AAL0_HEADER); | 2649 | skb_reserve(new_sb, NS_AAL0_HEADER); |
2657 | push_rxbufs(card, BUF_SM, (u32) new_sb, | 2650 | push_rxbufs(card, new_sb); |
2658 | (u32) virt_to_bus(new_sb->data), 0, 0); | ||
2659 | } | 2651 | } |
2660 | } | 2652 | } |
2661 | } | 2653 | } |
@@ -2664,7 +2656,7 @@ static void dequeue_sm_buf(ns_dev *card, struct sk_buff *sb) | |||
2664 | 2656 | ||
2665 | static void dequeue_lg_buf(ns_dev *card, struct sk_buff *lb) | 2657 | static void dequeue_lg_buf(ns_dev *card, struct sk_buff *lb) |
2666 | { | 2658 | { |
2667 | skb_unlink(lb); | 2659 | skb_unlink(lb, &card->lbpool.queue); |
2668 | #ifdef NS_USE_DESTRUCTORS | 2660 | #ifdef NS_USE_DESTRUCTORS |
2669 | if (card->lbfqc < card->lbnr.min) | 2661 | if (card->lbfqc < card->lbnr.min) |
2670 | #else | 2662 | #else |
@@ -2673,10 +2665,10 @@ static void dequeue_lg_buf(ns_dev *card, struct sk_buff *lb) | |||
2673 | struct sk_buff *new_lb; | 2665 | struct sk_buff *new_lb; |
2674 | if ((new_lb = dev_alloc_skb(NS_LGSKBSIZE)) != NULL) | 2666 | if ((new_lb = dev_alloc_skb(NS_LGSKBSIZE)) != NULL) |
2675 | { | 2667 | { |
2668 | NS_SKB_CB(new_lb)->buf_type = BUF_LG; | ||
2676 | skb_queue_tail(&card->lbpool.queue, new_lb); | 2669 | skb_queue_tail(&card->lbpool.queue, new_lb); |
2677 | skb_reserve(new_lb, NS_SMBUFSIZE); | 2670 | skb_reserve(new_lb, NS_SMBUFSIZE); |
2678 | push_rxbufs(card, BUF_LG, (u32) new_lb, | 2671 | push_rxbufs(card, new_lb); |
2679 | (u32) virt_to_bus(new_lb->data), 0, 0); | ||
2680 | } | 2672 | } |
2681 | } | 2673 | } |
2682 | if (card->lbfqc < card->lbnr.init) | 2674 | if (card->lbfqc < card->lbnr.init) |
@@ -2685,10 +2677,10 @@ static void dequeue_lg_buf(ns_dev *card, struct sk_buff *lb) | |||
2685 | struct sk_buff *new_lb; | 2677 | struct sk_buff *new_lb; |
2686 | if ((new_lb = dev_alloc_skb(NS_LGSKBSIZE)) != NULL) | 2678 | if ((new_lb = dev_alloc_skb(NS_LGSKBSIZE)) != NULL) |
2687 | { | 2679 | { |
2680 | NS_SKB_CB(new_lb)->buf_type = BUF_LG; | ||
2688 | skb_queue_tail(&card->lbpool.queue, new_lb); | 2681 | skb_queue_tail(&card->lbpool.queue, new_lb); |
2689 | skb_reserve(new_lb, NS_SMBUFSIZE); | 2682 | skb_reserve(new_lb, NS_SMBUFSIZE); |
2690 | push_rxbufs(card, BUF_LG, (u32) new_lb, | 2683 | push_rxbufs(card, new_lb); |
2691 | (u32) virt_to_bus(new_lb->data), 0, 0); | ||
2692 | } | 2684 | } |
2693 | } | 2685 | } |
2694 | } | 2686 | } |
@@ -2880,9 +2872,10 @@ static int ns_ioctl(struct atm_dev *dev, unsigned int cmd, void __user *arg) | |||
2880 | sb = __dev_alloc_skb(NS_SMSKBSIZE, GFP_KERNEL); | 2872 | sb = __dev_alloc_skb(NS_SMSKBSIZE, GFP_KERNEL); |
2881 | if (sb == NULL) | 2873 | if (sb == NULL) |
2882 | return -ENOMEM; | 2874 | return -ENOMEM; |
2875 | NS_SKB_CB(sb)->buf_type = BUF_SM; | ||
2883 | skb_queue_tail(&card->sbpool.queue, sb); | 2876 | skb_queue_tail(&card->sbpool.queue, sb); |
2884 | skb_reserve(sb, NS_AAL0_HEADER); | 2877 | skb_reserve(sb, NS_AAL0_HEADER); |
2885 | push_rxbufs(card, BUF_SM, (u32) sb, (u32) virt_to_bus(sb->data), 0, 0); | 2878 | push_rxbufs(card, sb); |
2886 | } | 2879 | } |
2887 | break; | 2880 | break; |
2888 | 2881 | ||
@@ -2894,9 +2887,10 @@ static int ns_ioctl(struct atm_dev *dev, unsigned int cmd, void __user *arg) | |||
2894 | lb = __dev_alloc_skb(NS_LGSKBSIZE, GFP_KERNEL); | 2887 | lb = __dev_alloc_skb(NS_LGSKBSIZE, GFP_KERNEL); |
2895 | if (lb == NULL) | 2888 | if (lb == NULL) |
2896 | return -ENOMEM; | 2889 | return -ENOMEM; |
2890 | NS_SKB_CB(lb)->buf_type = BUF_LG; | ||
2897 | skb_queue_tail(&card->lbpool.queue, lb); | 2891 | skb_queue_tail(&card->lbpool.queue, lb); |
2898 | skb_reserve(lb, NS_SMBUFSIZE); | 2892 | skb_reserve(lb, NS_SMBUFSIZE); |
2899 | push_rxbufs(card, BUF_LG, (u32) lb, (u32) virt_to_bus(lb->data), 0, 0); | 2893 | push_rxbufs(card, lb); |
2900 | } | 2894 | } |
2901 | break; | 2895 | break; |
2902 | 2896 | ||
@@ -2923,6 +2917,7 @@ static int ns_ioctl(struct atm_dev *dev, unsigned int cmd, void __user *arg) | |||
2923 | hb = __dev_alloc_skb(NS_HBUFSIZE, GFP_KERNEL); | 2917 | hb = __dev_alloc_skb(NS_HBUFSIZE, GFP_KERNEL); |
2924 | if (hb == NULL) | 2918 | if (hb == NULL) |
2925 | return -ENOMEM; | 2919 | return -ENOMEM; |
2920 | NS_SKB_CB(hb)->buf_type = BUF_NONE; | ||
2926 | ns_grab_int_lock(card, flags); | 2921 | ns_grab_int_lock(card, flags); |
2927 | skb_queue_tail(&card->hbpool.queue, hb); | 2922 | skb_queue_tail(&card->hbpool.queue, hb); |
2928 | card->hbpool.count++; | 2923 | card->hbpool.count++; |
@@ -2953,6 +2948,7 @@ static int ns_ioctl(struct atm_dev *dev, unsigned int cmd, void __user *arg) | |||
2953 | iovb = alloc_skb(NS_IOVBUFSIZE, GFP_KERNEL); | 2948 | iovb = alloc_skb(NS_IOVBUFSIZE, GFP_KERNEL); |
2954 | if (iovb == NULL) | 2949 | if (iovb == NULL) |
2955 | return -ENOMEM; | 2950 | return -ENOMEM; |
2951 | NS_SKB_CB(iovb)->buf_type = BUF_NONE; | ||
2956 | ns_grab_int_lock(card, flags); | 2952 | ns_grab_int_lock(card, flags); |
2957 | skb_queue_tail(&card->iovpool.queue, iovb); | 2953 | skb_queue_tail(&card->iovpool.queue, iovb); |
2958 | card->iovpool.count++; | 2954 | card->iovpool.count++; |
@@ -2979,17 +2975,12 @@ static int ns_ioctl(struct atm_dev *dev, unsigned int cmd, void __user *arg) | |||
2979 | } | 2975 | } |
2980 | 2976 | ||
2981 | 2977 | ||
2982 | |||
2983 | static void which_list(ns_dev *card, struct sk_buff *skb) | 2978 | static void which_list(ns_dev *card, struct sk_buff *skb) |
2984 | { | 2979 | { |
2985 | printk("It's a %s buffer.\n", skb->list == &card->sbpool.queue ? | 2980 | printk("skb buf_type: 0x%08x\n", NS_SKB_CB(skb)->buf_type); |
2986 | "small" : skb->list == &card->lbpool.queue ? "large" : | ||
2987 | skb->list == &card->hbpool.queue ? "huge" : | ||
2988 | skb->list == &card->iovpool.queue ? "iovec" : "unknown"); | ||
2989 | } | 2981 | } |
2990 | 2982 | ||
2991 | 2983 | ||
2992 | |||
2993 | static void ns_poll(unsigned long arg) | 2984 | static void ns_poll(unsigned long arg) |
2994 | { | 2985 | { |
2995 | int i; | 2986 | int i; |
diff --git a/drivers/atm/nicstar.h b/drivers/atm/nicstar.h index ea83c46c8ba5..5997bcb45b59 100644 --- a/drivers/atm/nicstar.h +++ b/drivers/atm/nicstar.h | |||
@@ -103,8 +103,14 @@ | |||
103 | 103 | ||
104 | #define NS_IOREMAP_SIZE 4096 | 104 | #define NS_IOREMAP_SIZE 4096 |
105 | 105 | ||
106 | #define BUF_SM 0x00000000 /* These two are used for push_rxbufs() */ | 106 | /* |
107 | #define BUF_LG 0x00000001 /* CMD, Write_FreeBufQ, LBUF bit */ | 107 | * BUF_XX distinguish the Rx buffers depending on their (small/large) size. |
108 | * BUG_SM and BUG_LG are both used by the driver and the device. | ||
109 | * BUF_NONE is only used by the driver. | ||
110 | */ | ||
111 | #define BUF_SM 0x00000000 /* These two are used for push_rxbufs() */ | ||
112 | #define BUF_LG 0x00000001 /* CMD, Write_FreeBufQ, LBUF bit */ | ||
113 | #define BUF_NONE 0xffffffff /* Software only: */ | ||
108 | 114 | ||
109 | #define NS_HBUFSIZE 65568 /* Size of max. AAL5 PDU */ | 115 | #define NS_HBUFSIZE 65568 /* Size of max. AAL5 PDU */ |
110 | #define NS_MAX_IOVECS (2 + (65568 - NS_SMBUFSIZE) / \ | 116 | #define NS_MAX_IOVECS (2 + (65568 - NS_SMBUFSIZE) / \ |
@@ -684,6 +690,12 @@ enum ns_regs | |||
684 | /* Device driver structures ***************************************************/ | 690 | /* Device driver structures ***************************************************/ |
685 | 691 | ||
686 | 692 | ||
693 | struct ns_skb_cb { | ||
694 | u32 buf_type; /* BUF_SM/BUF_LG/BUF_NONE */ | ||
695 | }; | ||
696 | |||
697 | #define NS_SKB_CB(skb) ((struct ns_skb_cb *)((skb)->cb)) | ||
698 | |||
687 | typedef struct tsq_info | 699 | typedef struct tsq_info |
688 | { | 700 | { |
689 | void *org; | 701 | void *org; |
diff --git a/drivers/atm/zatm.c b/drivers/atm/zatm.c index a2b236a966e0..c4b75ecf9460 100644 --- a/drivers/atm/zatm.c +++ b/drivers/atm/zatm.c | |||
@@ -400,7 +400,7 @@ unsigned long *x; | |||
400 | EVENT("error code 0x%x/0x%x\n",(here[3] & uPD98401_AAL5_ES) >> | 400 | EVENT("error code 0x%x/0x%x\n",(here[3] & uPD98401_AAL5_ES) >> |
401 | uPD98401_AAL5_ES_SHIFT,error); | 401 | uPD98401_AAL5_ES_SHIFT,error); |
402 | skb = ((struct rx_buffer_head *) bus_to_virt(here[2]))->skb; | 402 | skb = ((struct rx_buffer_head *) bus_to_virt(here[2]))->skb; |
403 | do_gettimeofday(&skb->stamp); | 403 | __net_timestamp(skb); |
404 | #if 0 | 404 | #if 0 |
405 | printk("[-3..0] 0x%08lx 0x%08lx 0x%08lx 0x%08lx\n",((unsigned *) skb->data)[-3], | 405 | printk("[-3..0] 0x%08lx 0x%08lx 0x%08lx 0x%08lx\n",((unsigned *) skb->data)[-3], |
406 | ((unsigned *) skb->data)[-2],((unsigned *) skb->data)[-1], | 406 | ((unsigned *) skb->data)[-2],((unsigned *) skb->data)[-1], |
@@ -417,10 +417,12 @@ printk("dummy: 0x%08lx, 0x%08lx\n",dummy[0],dummy[1]); | |||
417 | chan = (here[3] & uPD98401_AAL5_CHAN) >> | 417 | chan = (here[3] & uPD98401_AAL5_CHAN) >> |
418 | uPD98401_AAL5_CHAN_SHIFT; | 418 | uPD98401_AAL5_CHAN_SHIFT; |
419 | if (chan < zatm_dev->chans && zatm_dev->rx_map[chan]) { | 419 | if (chan < zatm_dev->chans && zatm_dev->rx_map[chan]) { |
420 | int pos = ZATM_VCC(vcc)->pool; | ||
421 | |||
420 | vcc = zatm_dev->rx_map[chan]; | 422 | vcc = zatm_dev->rx_map[chan]; |
421 | if (skb == zatm_dev->last_free[ZATM_VCC(vcc)->pool]) | 423 | if (skb == zatm_dev->last_free[pos]) |
422 | zatm_dev->last_free[ZATM_VCC(vcc)->pool] = NULL; | 424 | zatm_dev->last_free[pos] = NULL; |
423 | skb_unlink(skb); | 425 | skb_unlink(skb, zatm_dev->pool + pos); |
424 | } | 426 | } |
425 | else { | 427 | else { |
426 | printk(KERN_ERR DEV_LABEL "(itf %d): RX indication " | 428 | printk(KERN_ERR DEV_LABEL "(itf %d): RX indication " |
diff --git a/drivers/block/aoe/aoenet.c b/drivers/block/aoe/aoenet.c index 9e6f51c528b0..4be976940f69 100644 --- a/drivers/block/aoe/aoenet.c +++ b/drivers/block/aoe/aoenet.c | |||
@@ -120,7 +120,7 @@ aoenet_xmit(struct sk_buff *sl) | |||
120 | * (1) len doesn't include the header by default. I want this. | 120 | * (1) len doesn't include the header by default. I want this. |
121 | */ | 121 | */ |
122 | static int | 122 | static int |
123 | aoenet_rcv(struct sk_buff *skb, struct net_device *ifp, struct packet_type *pt) | 123 | aoenet_rcv(struct sk_buff *skb, struct net_device *ifp, struct packet_type *pt, struct net_device *orig_dev) |
124 | { | 124 | { |
125 | struct aoe_hdr *h; | 125 | struct aoe_hdr *h; |
126 | u32 n; | 126 | u32 n; |
diff --git a/drivers/block/viodasd.c b/drivers/block/viodasd.c index 46e56a25d2c8..e46ecd23b3ac 100644 --- a/drivers/block/viodasd.c +++ b/drivers/block/viodasd.c | |||
@@ -776,7 +776,7 @@ static int viodasd_remove(struct vio_dev *vdev) | |||
776 | */ | 776 | */ |
777 | static struct vio_device_id viodasd_device_table[] __devinitdata = { | 777 | static struct vio_device_id viodasd_device_table[] __devinitdata = { |
778 | { "viodasd", "" }, | 778 | { "viodasd", "" }, |
779 | { 0, } | 779 | { "", "" } |
780 | }; | 780 | }; |
781 | 781 | ||
782 | MODULE_DEVICE_TABLE(vio, viodasd_device_table); | 782 | MODULE_DEVICE_TABLE(vio, viodasd_device_table); |
diff --git a/drivers/bluetooth/bfusb.c b/drivers/bluetooth/bfusb.c index c42d7e6ac1c5..1e9db0156ea7 100644 --- a/drivers/bluetooth/bfusb.c +++ b/drivers/bluetooth/bfusb.c | |||
@@ -158,7 +158,7 @@ static int bfusb_send_bulk(struct bfusb *bfusb, struct sk_buff *skb) | |||
158 | if (err) { | 158 | if (err) { |
159 | BT_ERR("%s bulk tx submit failed urb %p err %d", | 159 | BT_ERR("%s bulk tx submit failed urb %p err %d", |
160 | bfusb->hdev->name, urb, err); | 160 | bfusb->hdev->name, urb, err); |
161 | skb_unlink(skb); | 161 | skb_unlink(skb, &bfusb->pending_q); |
162 | usb_free_urb(urb); | 162 | usb_free_urb(urb); |
163 | } else | 163 | } else |
164 | atomic_inc(&bfusb->pending_tx); | 164 | atomic_inc(&bfusb->pending_tx); |
@@ -212,7 +212,7 @@ static void bfusb_tx_complete(struct urb *urb, struct pt_regs *regs) | |||
212 | 212 | ||
213 | read_lock(&bfusb->lock); | 213 | read_lock(&bfusb->lock); |
214 | 214 | ||
215 | skb_unlink(skb); | 215 | skb_unlink(skb, &bfusb->pending_q); |
216 | skb_queue_tail(&bfusb->completed_q, skb); | 216 | skb_queue_tail(&bfusb->completed_q, skb); |
217 | 217 | ||
218 | bfusb_tx_wakeup(bfusb); | 218 | bfusb_tx_wakeup(bfusb); |
@@ -253,7 +253,7 @@ static int bfusb_rx_submit(struct bfusb *bfusb, struct urb *urb) | |||
253 | if (err) { | 253 | if (err) { |
254 | BT_ERR("%s bulk rx submit failed urb %p err %d", | 254 | BT_ERR("%s bulk rx submit failed urb %p err %d", |
255 | bfusb->hdev->name, urb, err); | 255 | bfusb->hdev->name, urb, err); |
256 | skb_unlink(skb); | 256 | skb_unlink(skb, &bfusb->pending_q); |
257 | kfree_skb(skb); | 257 | kfree_skb(skb); |
258 | usb_free_urb(urb); | 258 | usb_free_urb(urb); |
259 | } | 259 | } |
@@ -330,7 +330,7 @@ static inline int bfusb_recv_block(struct bfusb *bfusb, int hdr, unsigned char * | |||
330 | } | 330 | } |
331 | 331 | ||
332 | skb->dev = (void *) bfusb->hdev; | 332 | skb->dev = (void *) bfusb->hdev; |
333 | skb->pkt_type = pkt_type; | 333 | bt_cb(skb)->pkt_type = pkt_type; |
334 | 334 | ||
335 | bfusb->reassembly = skb; | 335 | bfusb->reassembly = skb; |
336 | } else { | 336 | } else { |
@@ -398,7 +398,7 @@ static void bfusb_rx_complete(struct urb *urb, struct pt_regs *regs) | |||
398 | buf += len; | 398 | buf += len; |
399 | } | 399 | } |
400 | 400 | ||
401 | skb_unlink(skb); | 401 | skb_unlink(skb, &bfusb->pending_q); |
402 | kfree_skb(skb); | 402 | kfree_skb(skb); |
403 | 403 | ||
404 | bfusb_rx_submit(bfusb, urb); | 404 | bfusb_rx_submit(bfusb, urb); |
@@ -485,7 +485,7 @@ static int bfusb_send_frame(struct sk_buff *skb) | |||
485 | unsigned char buf[3]; | 485 | unsigned char buf[3]; |
486 | int sent = 0, size, count; | 486 | int sent = 0, size, count; |
487 | 487 | ||
488 | BT_DBG("hdev %p skb %p type %d len %d", hdev, skb, skb->pkt_type, skb->len); | 488 | BT_DBG("hdev %p skb %p type %d len %d", hdev, skb, bt_cb(skb)->pkt_type, skb->len); |
489 | 489 | ||
490 | if (!hdev) { | 490 | if (!hdev) { |
491 | BT_ERR("Frame for unknown HCI device (hdev=NULL)"); | 491 | BT_ERR("Frame for unknown HCI device (hdev=NULL)"); |
@@ -497,7 +497,7 @@ static int bfusb_send_frame(struct sk_buff *skb) | |||
497 | 497 | ||
498 | bfusb = (struct bfusb *) hdev->driver_data; | 498 | bfusb = (struct bfusb *) hdev->driver_data; |
499 | 499 | ||
500 | switch (skb->pkt_type) { | 500 | switch (bt_cb(skb)->pkt_type) { |
501 | case HCI_COMMAND_PKT: | 501 | case HCI_COMMAND_PKT: |
502 | hdev->stat.cmd_tx++; | 502 | hdev->stat.cmd_tx++; |
503 | break; | 503 | break; |
@@ -510,7 +510,7 @@ static int bfusb_send_frame(struct sk_buff *skb) | |||
510 | }; | 510 | }; |
511 | 511 | ||
512 | /* Prepend skb with frame type */ | 512 | /* Prepend skb with frame type */ |
513 | memcpy(skb_push(skb, 1), &(skb->pkt_type), 1); | 513 | memcpy(skb_push(skb, 1), &bt_cb(skb)->pkt_type, 1); |
514 | 514 | ||
515 | count = skb->len; | 515 | count = skb->len; |
516 | 516 | ||
diff --git a/drivers/bluetooth/bluecard_cs.c b/drivers/bluetooth/bluecard_cs.c index bd2ec7e284cc..26fe9c0e1d20 100644 --- a/drivers/bluetooth/bluecard_cs.c +++ b/drivers/bluetooth/bluecard_cs.c | |||
@@ -270,7 +270,7 @@ static void bluecard_write_wakeup(bluecard_info_t *info) | |||
270 | if (!(skb = skb_dequeue(&(info->txq)))) | 270 | if (!(skb = skb_dequeue(&(info->txq)))) |
271 | break; | 271 | break; |
272 | 272 | ||
273 | if (skb->pkt_type & 0x80) { | 273 | if (bt_cb(skb)->pkt_type & 0x80) { |
274 | /* Disable RTS */ | 274 | /* Disable RTS */ |
275 | info->ctrl_reg |= REG_CONTROL_RTS; | 275 | info->ctrl_reg |= REG_CONTROL_RTS; |
276 | outb(info->ctrl_reg, iobase + REG_CONTROL); | 276 | outb(info->ctrl_reg, iobase + REG_CONTROL); |
@@ -288,13 +288,13 @@ static void bluecard_write_wakeup(bluecard_info_t *info) | |||
288 | /* Mark the buffer as dirty */ | 288 | /* Mark the buffer as dirty */ |
289 | clear_bit(ready_bit, &(info->tx_state)); | 289 | clear_bit(ready_bit, &(info->tx_state)); |
290 | 290 | ||
291 | if (skb->pkt_type & 0x80) { | 291 | if (bt_cb(skb)->pkt_type & 0x80) { |
292 | DECLARE_WAIT_QUEUE_HEAD(wq); | 292 | DECLARE_WAIT_QUEUE_HEAD(wq); |
293 | DEFINE_WAIT(wait); | 293 | DEFINE_WAIT(wait); |
294 | 294 | ||
295 | unsigned char baud_reg; | 295 | unsigned char baud_reg; |
296 | 296 | ||
297 | switch (skb->pkt_type) { | 297 | switch (bt_cb(skb)->pkt_type) { |
298 | case PKT_BAUD_RATE_460800: | 298 | case PKT_BAUD_RATE_460800: |
299 | baud_reg = REG_CONTROL_BAUD_RATE_460800; | 299 | baud_reg = REG_CONTROL_BAUD_RATE_460800; |
300 | break; | 300 | break; |
@@ -410,9 +410,9 @@ static void bluecard_receive(bluecard_info_t *info, unsigned int offset) | |||
410 | if (info->rx_state == RECV_WAIT_PACKET_TYPE) { | 410 | if (info->rx_state == RECV_WAIT_PACKET_TYPE) { |
411 | 411 | ||
412 | info->rx_skb->dev = (void *) info->hdev; | 412 | info->rx_skb->dev = (void *) info->hdev; |
413 | info->rx_skb->pkt_type = buf[i]; | 413 | bt_cb(info->rx_skb)->pkt_type = buf[i]; |
414 | 414 | ||
415 | switch (info->rx_skb->pkt_type) { | 415 | switch (bt_cb(info->rx_skb)->pkt_type) { |
416 | 416 | ||
417 | case 0x00: | 417 | case 0x00: |
418 | /* init packet */ | 418 | /* init packet */ |
@@ -444,7 +444,7 @@ static void bluecard_receive(bluecard_info_t *info, unsigned int offset) | |||
444 | 444 | ||
445 | default: | 445 | default: |
446 | /* unknown packet */ | 446 | /* unknown packet */ |
447 | BT_ERR("Unknown HCI packet with type 0x%02x received", info->rx_skb->pkt_type); | 447 | BT_ERR("Unknown HCI packet with type 0x%02x received", bt_cb(info->rx_skb)->pkt_type); |
448 | info->hdev->stat.err_rx++; | 448 | info->hdev->stat.err_rx++; |
449 | 449 | ||
450 | kfree_skb(info->rx_skb); | 450 | kfree_skb(info->rx_skb); |
@@ -586,21 +586,21 @@ static int bluecard_hci_set_baud_rate(struct hci_dev *hdev, int baud) | |||
586 | switch (baud) { | 586 | switch (baud) { |
587 | case 460800: | 587 | case 460800: |
588 | cmd[4] = 0x00; | 588 | cmd[4] = 0x00; |
589 | skb->pkt_type = PKT_BAUD_RATE_460800; | 589 | bt_cb(skb)->pkt_type = PKT_BAUD_RATE_460800; |
590 | break; | 590 | break; |
591 | case 230400: | 591 | case 230400: |
592 | cmd[4] = 0x01; | 592 | cmd[4] = 0x01; |
593 | skb->pkt_type = PKT_BAUD_RATE_230400; | 593 | bt_cb(skb)->pkt_type = PKT_BAUD_RATE_230400; |
594 | break; | 594 | break; |
595 | case 115200: | 595 | case 115200: |
596 | cmd[4] = 0x02; | 596 | cmd[4] = 0x02; |
597 | skb->pkt_type = PKT_BAUD_RATE_115200; | 597 | bt_cb(skb)->pkt_type = PKT_BAUD_RATE_115200; |
598 | break; | 598 | break; |
599 | case 57600: | 599 | case 57600: |
600 | /* Fall through... */ | 600 | /* Fall through... */ |
601 | default: | 601 | default: |
602 | cmd[4] = 0x03; | 602 | cmd[4] = 0x03; |
603 | skb->pkt_type = PKT_BAUD_RATE_57600; | 603 | bt_cb(skb)->pkt_type = PKT_BAUD_RATE_57600; |
604 | break; | 604 | break; |
605 | } | 605 | } |
606 | 606 | ||
@@ -680,7 +680,7 @@ static int bluecard_hci_send_frame(struct sk_buff *skb) | |||
680 | 680 | ||
681 | info = (bluecard_info_t *)(hdev->driver_data); | 681 | info = (bluecard_info_t *)(hdev->driver_data); |
682 | 682 | ||
683 | switch (skb->pkt_type) { | 683 | switch (bt_cb(skb)->pkt_type) { |
684 | case HCI_COMMAND_PKT: | 684 | case HCI_COMMAND_PKT: |
685 | hdev->stat.cmd_tx++; | 685 | hdev->stat.cmd_tx++; |
686 | break; | 686 | break; |
@@ -693,7 +693,7 @@ static int bluecard_hci_send_frame(struct sk_buff *skb) | |||
693 | }; | 693 | }; |
694 | 694 | ||
695 | /* Prepend skb with frame type */ | 695 | /* Prepend skb with frame type */ |
696 | memcpy(skb_push(skb, 1), &(skb->pkt_type), 1); | 696 | memcpy(skb_push(skb, 1), &bt_cb(skb)->pkt_type, 1); |
697 | skb_queue_tail(&(info->txq), skb); | 697 | skb_queue_tail(&(info->txq), skb); |
698 | 698 | ||
699 | bluecard_write_wakeup(info); | 699 | bluecard_write_wakeup(info); |
diff --git a/drivers/bluetooth/bpa10x.c b/drivers/bluetooth/bpa10x.c index f696da6f417b..a1bf8f066c88 100644 --- a/drivers/bluetooth/bpa10x.c +++ b/drivers/bluetooth/bpa10x.c | |||
@@ -105,7 +105,7 @@ static void bpa10x_recv_bulk(struct bpa10x_data *data, unsigned char *buf, int c | |||
105 | if (skb) { | 105 | if (skb) { |
106 | memcpy(skb_put(skb, len), buf, len); | 106 | memcpy(skb_put(skb, len), buf, len); |
107 | skb->dev = (void *) data->hdev; | 107 | skb->dev = (void *) data->hdev; |
108 | skb->pkt_type = HCI_ACLDATA_PKT; | 108 | bt_cb(skb)->pkt_type = HCI_ACLDATA_PKT; |
109 | hci_recv_frame(skb); | 109 | hci_recv_frame(skb); |
110 | } | 110 | } |
111 | break; | 111 | break; |
@@ -117,7 +117,7 @@ static void bpa10x_recv_bulk(struct bpa10x_data *data, unsigned char *buf, int c | |||
117 | if (skb) { | 117 | if (skb) { |
118 | memcpy(skb_put(skb, len), buf, len); | 118 | memcpy(skb_put(skb, len), buf, len); |
119 | skb->dev = (void *) data->hdev; | 119 | skb->dev = (void *) data->hdev; |
120 | skb->pkt_type = HCI_SCODATA_PKT; | 120 | bt_cb(skb)->pkt_type = HCI_SCODATA_PKT; |
121 | hci_recv_frame(skb); | 121 | hci_recv_frame(skb); |
122 | } | 122 | } |
123 | break; | 123 | break; |
@@ -129,7 +129,7 @@ static void bpa10x_recv_bulk(struct bpa10x_data *data, unsigned char *buf, int c | |||
129 | if (skb) { | 129 | if (skb) { |
130 | memcpy(skb_put(skb, len), buf, len); | 130 | memcpy(skb_put(skb, len), buf, len); |
131 | skb->dev = (void *) data->hdev; | 131 | skb->dev = (void *) data->hdev; |
132 | skb->pkt_type = HCI_VENDOR_PKT; | 132 | bt_cb(skb)->pkt_type = HCI_VENDOR_PKT; |
133 | hci_recv_frame(skb); | 133 | hci_recv_frame(skb); |
134 | } | 134 | } |
135 | break; | 135 | break; |
@@ -190,7 +190,7 @@ static int bpa10x_recv_event(struct bpa10x_data *data, unsigned char *buf, int s | |||
190 | } | 190 | } |
191 | 191 | ||
192 | skb->dev = (void *) data->hdev; | 192 | skb->dev = (void *) data->hdev; |
193 | skb->pkt_type = pkt_type; | 193 | bt_cb(skb)->pkt_type = pkt_type; |
194 | 194 | ||
195 | memcpy(skb_put(skb, size), buf, size); | 195 | memcpy(skb_put(skb, size), buf, size); |
196 | 196 | ||
@@ -307,7 +307,8 @@ unlock: | |||
307 | read_unlock(&data->lock); | 307 | read_unlock(&data->lock); |
308 | } | 308 | } |
309 | 309 | ||
310 | static inline struct urb *bpa10x_alloc_urb(struct usb_device *udev, unsigned int pipe, size_t size, int flags, void *data) | 310 | static inline struct urb *bpa10x_alloc_urb(struct usb_device *udev, unsigned int pipe, |
311 | size_t size, unsigned int __nocast flags, void *data) | ||
311 | { | 312 | { |
312 | struct urb *urb; | 313 | struct urb *urb; |
313 | struct usb_ctrlrequest *cr; | 314 | struct usb_ctrlrequest *cr; |
@@ -487,7 +488,7 @@ static int bpa10x_send_frame(struct sk_buff *skb) | |||
487 | struct hci_dev *hdev = (struct hci_dev *) skb->dev; | 488 | struct hci_dev *hdev = (struct hci_dev *) skb->dev; |
488 | struct bpa10x_data *data; | 489 | struct bpa10x_data *data; |
489 | 490 | ||
490 | BT_DBG("hdev %p skb %p type %d len %d", hdev, skb, skb->pkt_type, skb->len); | 491 | BT_DBG("hdev %p skb %p type %d len %d", hdev, skb, bt_cb(skb)->pkt_type, skb->len); |
491 | 492 | ||
492 | if (!hdev) { | 493 | if (!hdev) { |
493 | BT_ERR("Frame for unknown HCI device"); | 494 | BT_ERR("Frame for unknown HCI device"); |
@@ -500,9 +501,9 @@ static int bpa10x_send_frame(struct sk_buff *skb) | |||
500 | data = hdev->driver_data; | 501 | data = hdev->driver_data; |
501 | 502 | ||
502 | /* Prepend skb with frame type */ | 503 | /* Prepend skb with frame type */ |
503 | memcpy(skb_push(skb, 1), &(skb->pkt_type), 1); | 504 | memcpy(skb_push(skb, 1), &bt_cb(skb)->pkt_type, 1); |
504 | 505 | ||
505 | switch (skb->pkt_type) { | 506 | switch (bt_cb(skb)->pkt_type) { |
506 | case HCI_COMMAND_PKT: | 507 | case HCI_COMMAND_PKT: |
507 | hdev->stat.cmd_tx++; | 508 | hdev->stat.cmd_tx++; |
508 | skb_queue_tail(&data->cmd_queue, skb); | 509 | skb_queue_tail(&data->cmd_queue, skb); |
diff --git a/drivers/bluetooth/bt3c_cs.c b/drivers/bluetooth/bt3c_cs.c index adf1750ea58d..2e0338d80f32 100644 --- a/drivers/bluetooth/bt3c_cs.c +++ b/drivers/bluetooth/bt3c_cs.c | |||
@@ -259,11 +259,11 @@ static void bt3c_receive(bt3c_info_t *info) | |||
259 | if (info->rx_state == RECV_WAIT_PACKET_TYPE) { | 259 | if (info->rx_state == RECV_WAIT_PACKET_TYPE) { |
260 | 260 | ||
261 | info->rx_skb->dev = (void *) info->hdev; | 261 | info->rx_skb->dev = (void *) info->hdev; |
262 | info->rx_skb->pkt_type = inb(iobase + DATA_L); | 262 | bt_cb(info->rx_skb)->pkt_type = inb(iobase + DATA_L); |
263 | inb(iobase + DATA_H); | 263 | inb(iobase + DATA_H); |
264 | //printk("bt3c: PACKET_TYPE=%02x\n", info->rx_skb->pkt_type); | 264 | //printk("bt3c: PACKET_TYPE=%02x\n", bt_cb(info->rx_skb)->pkt_type); |
265 | 265 | ||
266 | switch (info->rx_skb->pkt_type) { | 266 | switch (bt_cb(info->rx_skb)->pkt_type) { |
267 | 267 | ||
268 | case HCI_EVENT_PKT: | 268 | case HCI_EVENT_PKT: |
269 | info->rx_state = RECV_WAIT_EVENT_HEADER; | 269 | info->rx_state = RECV_WAIT_EVENT_HEADER; |
@@ -282,7 +282,7 @@ static void bt3c_receive(bt3c_info_t *info) | |||
282 | 282 | ||
283 | default: | 283 | default: |
284 | /* Unknown packet */ | 284 | /* Unknown packet */ |
285 | BT_ERR("Unknown HCI packet with type 0x%02x received", info->rx_skb->pkt_type); | 285 | BT_ERR("Unknown HCI packet with type 0x%02x received", bt_cb(info->rx_skb)->pkt_type); |
286 | info->hdev->stat.err_rx++; | 286 | info->hdev->stat.err_rx++; |
287 | clear_bit(HCI_RUNNING, &(info->hdev->flags)); | 287 | clear_bit(HCI_RUNNING, &(info->hdev->flags)); |
288 | 288 | ||
@@ -439,7 +439,7 @@ static int bt3c_hci_send_frame(struct sk_buff *skb) | |||
439 | 439 | ||
440 | info = (bt3c_info_t *) (hdev->driver_data); | 440 | info = (bt3c_info_t *) (hdev->driver_data); |
441 | 441 | ||
442 | switch (skb->pkt_type) { | 442 | switch (bt_cb(skb)->pkt_type) { |
443 | case HCI_COMMAND_PKT: | 443 | case HCI_COMMAND_PKT: |
444 | hdev->stat.cmd_tx++; | 444 | hdev->stat.cmd_tx++; |
445 | break; | 445 | break; |
@@ -452,7 +452,7 @@ static int bt3c_hci_send_frame(struct sk_buff *skb) | |||
452 | }; | 452 | }; |
453 | 453 | ||
454 | /* Prepend skb with frame type */ | 454 | /* Prepend skb with frame type */ |
455 | memcpy(skb_push(skb, 1), &(skb->pkt_type), 1); | 455 | memcpy(skb_push(skb, 1), &bt_cb(skb)->pkt_type, 1); |
456 | skb_queue_tail(&(info->txq), skb); | 456 | skb_queue_tail(&(info->txq), skb); |
457 | 457 | ||
458 | spin_lock_irqsave(&(info->lock), flags); | 458 | spin_lock_irqsave(&(info->lock), flags); |
diff --git a/drivers/bluetooth/btuart_cs.c b/drivers/bluetooth/btuart_cs.c index e4c59fdc0e12..89486ea7a021 100644 --- a/drivers/bluetooth/btuart_cs.c +++ b/drivers/bluetooth/btuart_cs.c | |||
@@ -211,9 +211,9 @@ static void btuart_receive(btuart_info_t *info) | |||
211 | if (info->rx_state == RECV_WAIT_PACKET_TYPE) { | 211 | if (info->rx_state == RECV_WAIT_PACKET_TYPE) { |
212 | 212 | ||
213 | info->rx_skb->dev = (void *) info->hdev; | 213 | info->rx_skb->dev = (void *) info->hdev; |
214 | info->rx_skb->pkt_type = inb(iobase + UART_RX); | 214 | bt_cb(info->rx_skb)->pkt_type = inb(iobase + UART_RX); |
215 | 215 | ||
216 | switch (info->rx_skb->pkt_type) { | 216 | switch (bt_cb(info->rx_skb)->pkt_type) { |
217 | 217 | ||
218 | case HCI_EVENT_PKT: | 218 | case HCI_EVENT_PKT: |
219 | info->rx_state = RECV_WAIT_EVENT_HEADER; | 219 | info->rx_state = RECV_WAIT_EVENT_HEADER; |
@@ -232,7 +232,7 @@ static void btuart_receive(btuart_info_t *info) | |||
232 | 232 | ||
233 | default: | 233 | default: |
234 | /* Unknown packet */ | 234 | /* Unknown packet */ |
235 | BT_ERR("Unknown HCI packet with type 0x%02x received", info->rx_skb->pkt_type); | 235 | BT_ERR("Unknown HCI packet with type 0x%02x received", bt_cb(info->rx_skb)->pkt_type); |
236 | info->hdev->stat.err_rx++; | 236 | info->hdev->stat.err_rx++; |
237 | clear_bit(HCI_RUNNING, &(info->hdev->flags)); | 237 | clear_bit(HCI_RUNNING, &(info->hdev->flags)); |
238 | 238 | ||
@@ -447,7 +447,7 @@ static int btuart_hci_send_frame(struct sk_buff *skb) | |||
447 | 447 | ||
448 | info = (btuart_info_t *)(hdev->driver_data); | 448 | info = (btuart_info_t *)(hdev->driver_data); |
449 | 449 | ||
450 | switch (skb->pkt_type) { | 450 | switch (bt_cb(skb)->pkt_type) { |
451 | case HCI_COMMAND_PKT: | 451 | case HCI_COMMAND_PKT: |
452 | hdev->stat.cmd_tx++; | 452 | hdev->stat.cmd_tx++; |
453 | break; | 453 | break; |
@@ -460,7 +460,7 @@ static int btuart_hci_send_frame(struct sk_buff *skb) | |||
460 | }; | 460 | }; |
461 | 461 | ||
462 | /* Prepend skb with frame type */ | 462 | /* Prepend skb with frame type */ |
463 | memcpy(skb_push(skb, 1), &(skb->pkt_type), 1); | 463 | memcpy(skb_push(skb, 1), &bt_cb(skb)->pkt_type, 1); |
464 | skb_queue_tail(&(info->txq), skb); | 464 | skb_queue_tail(&(info->txq), skb); |
465 | 465 | ||
466 | btuart_write_wakeup(info); | 466 | btuart_write_wakeup(info); |
diff --git a/drivers/bluetooth/dtl1_cs.c b/drivers/bluetooth/dtl1_cs.c index e39868c3da48..84c1f8839422 100644 --- a/drivers/bluetooth/dtl1_cs.c +++ b/drivers/bluetooth/dtl1_cs.c | |||
@@ -251,7 +251,7 @@ static void dtl1_receive(dtl1_info_t *info) | |||
251 | info->rx_count = nsh->len + (nsh->len & 0x0001); | 251 | info->rx_count = nsh->len + (nsh->len & 0x0001); |
252 | break; | 252 | break; |
253 | case RECV_WAIT_DATA: | 253 | case RECV_WAIT_DATA: |
254 | info->rx_skb->pkt_type = nsh->type; | 254 | bt_cb(info->rx_skb)->pkt_type = nsh->type; |
255 | 255 | ||
256 | /* remove PAD byte if it exists */ | 256 | /* remove PAD byte if it exists */ |
257 | if (nsh->len & 0x0001) { | 257 | if (nsh->len & 0x0001) { |
@@ -262,7 +262,7 @@ static void dtl1_receive(dtl1_info_t *info) | |||
262 | /* remove NSH */ | 262 | /* remove NSH */ |
263 | skb_pull(info->rx_skb, NSHL); | 263 | skb_pull(info->rx_skb, NSHL); |
264 | 264 | ||
265 | switch (info->rx_skb->pkt_type) { | 265 | switch (bt_cb(info->rx_skb)->pkt_type) { |
266 | case 0x80: | 266 | case 0x80: |
267 | /* control data for the Nokia Card */ | 267 | /* control data for the Nokia Card */ |
268 | dtl1_control(info, info->rx_skb); | 268 | dtl1_control(info, info->rx_skb); |
@@ -272,12 +272,12 @@ static void dtl1_receive(dtl1_info_t *info) | |||
272 | case 0x84: | 272 | case 0x84: |
273 | /* send frame to the HCI layer */ | 273 | /* send frame to the HCI layer */ |
274 | info->rx_skb->dev = (void *) info->hdev; | 274 | info->rx_skb->dev = (void *) info->hdev; |
275 | info->rx_skb->pkt_type &= 0x0f; | 275 | bt_cb(info->rx_skb)->pkt_type &= 0x0f; |
276 | hci_recv_frame(info->rx_skb); | 276 | hci_recv_frame(info->rx_skb); |
277 | break; | 277 | break; |
278 | default: | 278 | default: |
279 | /* unknown packet */ | 279 | /* unknown packet */ |
280 | BT_ERR("Unknown HCI packet with type 0x%02x received", info->rx_skb->pkt_type); | 280 | BT_ERR("Unknown HCI packet with type 0x%02x received", bt_cb(info->rx_skb)->pkt_type); |
281 | kfree_skb(info->rx_skb); | 281 | kfree_skb(info->rx_skb); |
282 | break; | 282 | break; |
283 | } | 283 | } |
@@ -410,7 +410,7 @@ static int dtl1_hci_send_frame(struct sk_buff *skb) | |||
410 | 410 | ||
411 | info = (dtl1_info_t *)(hdev->driver_data); | 411 | info = (dtl1_info_t *)(hdev->driver_data); |
412 | 412 | ||
413 | switch (skb->pkt_type) { | 413 | switch (bt_cb(skb)->pkt_type) { |
414 | case HCI_COMMAND_PKT: | 414 | case HCI_COMMAND_PKT: |
415 | hdev->stat.cmd_tx++; | 415 | hdev->stat.cmd_tx++; |
416 | nsh.type = 0x81; | 416 | nsh.type = 0x81; |
diff --git a/drivers/bluetooth/hci_bcsp.c b/drivers/bluetooth/hci_bcsp.c index 858fddb046de..0ee324e1265d 100644 --- a/drivers/bluetooth/hci_bcsp.c +++ b/drivers/bluetooth/hci_bcsp.c | |||
@@ -149,7 +149,7 @@ static int bcsp_enqueue(struct hci_uart *hu, struct sk_buff *skb) | |||
149 | return 0; | 149 | return 0; |
150 | } | 150 | } |
151 | 151 | ||
152 | switch (skb->pkt_type) { | 152 | switch (bt_cb(skb)->pkt_type) { |
153 | case HCI_ACLDATA_PKT: | 153 | case HCI_ACLDATA_PKT: |
154 | case HCI_COMMAND_PKT: | 154 | case HCI_COMMAND_PKT: |
155 | skb_queue_tail(&bcsp->rel, skb); | 155 | skb_queue_tail(&bcsp->rel, skb); |
@@ -227,7 +227,7 @@ static struct sk_buff *bcsp_prepare_pkt(struct bcsp_struct *bcsp, u8 *data, | |||
227 | if (!nskb) | 227 | if (!nskb) |
228 | return NULL; | 228 | return NULL; |
229 | 229 | ||
230 | nskb->pkt_type = pkt_type; | 230 | bt_cb(nskb)->pkt_type = pkt_type; |
231 | 231 | ||
232 | bcsp_slip_msgdelim(nskb); | 232 | bcsp_slip_msgdelim(nskb); |
233 | 233 | ||
@@ -286,7 +286,7 @@ static struct sk_buff *bcsp_dequeue(struct hci_uart *hu) | |||
286 | since they have priority */ | 286 | since they have priority */ |
287 | 287 | ||
288 | if ((skb = skb_dequeue(&bcsp->unrel)) != NULL) { | 288 | if ((skb = skb_dequeue(&bcsp->unrel)) != NULL) { |
289 | struct sk_buff *nskb = bcsp_prepare_pkt(bcsp, skb->data, skb->len, skb->pkt_type); | 289 | struct sk_buff *nskb = bcsp_prepare_pkt(bcsp, skb->data, skb->len, bt_cb(skb)->pkt_type); |
290 | if (nskb) { | 290 | if (nskb) { |
291 | kfree_skb(skb); | 291 | kfree_skb(skb); |
292 | return nskb; | 292 | return nskb; |
@@ -303,7 +303,7 @@ static struct sk_buff *bcsp_dequeue(struct hci_uart *hu) | |||
303 | spin_lock_irqsave(&bcsp->unack.lock, flags); | 303 | spin_lock_irqsave(&bcsp->unack.lock, flags); |
304 | 304 | ||
305 | if (bcsp->unack.qlen < BCSP_TXWINSIZE && (skb = skb_dequeue(&bcsp->rel)) != NULL) { | 305 | if (bcsp->unack.qlen < BCSP_TXWINSIZE && (skb = skb_dequeue(&bcsp->rel)) != NULL) { |
306 | struct sk_buff *nskb = bcsp_prepare_pkt(bcsp, skb->data, skb->len, skb->pkt_type); | 306 | struct sk_buff *nskb = bcsp_prepare_pkt(bcsp, skb->data, skb->len, bt_cb(skb)->pkt_type); |
307 | if (nskb) { | 307 | if (nskb) { |
308 | __skb_queue_tail(&bcsp->unack, skb); | 308 | __skb_queue_tail(&bcsp->unack, skb); |
309 | mod_timer(&bcsp->tbcsp, jiffies + HZ / 4); | 309 | mod_timer(&bcsp->tbcsp, jiffies + HZ / 4); |
@@ -401,7 +401,7 @@ static void bcsp_handle_le_pkt(struct hci_uart *hu) | |||
401 | if (!nskb) | 401 | if (!nskb) |
402 | return; | 402 | return; |
403 | memcpy(skb_put(nskb, 4), conf_rsp_pkt, 4); | 403 | memcpy(skb_put(nskb, 4), conf_rsp_pkt, 4); |
404 | nskb->pkt_type = BCSP_LE_PKT; | 404 | bt_cb(nskb)->pkt_type = BCSP_LE_PKT; |
405 | 405 | ||
406 | skb_queue_head(&bcsp->unrel, nskb); | 406 | skb_queue_head(&bcsp->unrel, nskb); |
407 | hci_uart_tx_wakeup(hu); | 407 | hci_uart_tx_wakeup(hu); |
@@ -483,14 +483,14 @@ static inline void bcsp_complete_rx_pkt(struct hci_uart *hu) | |||
483 | bcsp_pkt_cull(bcsp); | 483 | bcsp_pkt_cull(bcsp); |
484 | if ((bcsp->rx_skb->data[1] & 0x0f) == 6 && | 484 | if ((bcsp->rx_skb->data[1] & 0x0f) == 6 && |
485 | bcsp->rx_skb->data[0] & 0x80) { | 485 | bcsp->rx_skb->data[0] & 0x80) { |
486 | bcsp->rx_skb->pkt_type = HCI_ACLDATA_PKT; | 486 | bt_cb(bcsp->rx_skb)->pkt_type = HCI_ACLDATA_PKT; |
487 | pass_up = 1; | 487 | pass_up = 1; |
488 | } else if ((bcsp->rx_skb->data[1] & 0x0f) == 5 && | 488 | } else if ((bcsp->rx_skb->data[1] & 0x0f) == 5 && |
489 | bcsp->rx_skb->data[0] & 0x80) { | 489 | bcsp->rx_skb->data[0] & 0x80) { |
490 | bcsp->rx_skb->pkt_type = HCI_EVENT_PKT; | 490 | bt_cb(bcsp->rx_skb)->pkt_type = HCI_EVENT_PKT; |
491 | pass_up = 1; | 491 | pass_up = 1; |
492 | } else if ((bcsp->rx_skb->data[1] & 0x0f) == 7) { | 492 | } else if ((bcsp->rx_skb->data[1] & 0x0f) == 7) { |
493 | bcsp->rx_skb->pkt_type = HCI_SCODATA_PKT; | 493 | bt_cb(bcsp->rx_skb)->pkt_type = HCI_SCODATA_PKT; |
494 | pass_up = 1; | 494 | pass_up = 1; |
495 | } else if ((bcsp->rx_skb->data[1] & 0x0f) == 1 && | 495 | } else if ((bcsp->rx_skb->data[1] & 0x0f) == 1 && |
496 | !(bcsp->rx_skb->data[0] & 0x80)) { | 496 | !(bcsp->rx_skb->data[0] & 0x80)) { |
@@ -512,7 +512,7 @@ static inline void bcsp_complete_rx_pkt(struct hci_uart *hu) | |||
512 | hdr.evt = 0xff; | 512 | hdr.evt = 0xff; |
513 | hdr.plen = bcsp->rx_skb->len; | 513 | hdr.plen = bcsp->rx_skb->len; |
514 | memcpy(skb_push(bcsp->rx_skb, HCI_EVENT_HDR_SIZE), &hdr, HCI_EVENT_HDR_SIZE); | 514 | memcpy(skb_push(bcsp->rx_skb, HCI_EVENT_HDR_SIZE), &hdr, HCI_EVENT_HDR_SIZE); |
515 | bcsp->rx_skb->pkt_type = HCI_EVENT_PKT; | 515 | bt_cb(bcsp->rx_skb)->pkt_type = HCI_EVENT_PKT; |
516 | 516 | ||
517 | hci_recv_frame(bcsp->rx_skb); | 517 | hci_recv_frame(bcsp->rx_skb); |
518 | } else { | 518 | } else { |
diff --git a/drivers/bluetooth/hci_h4.c b/drivers/bluetooth/hci_h4.c index 533323b60e63..cf8a22d58d96 100644 --- a/drivers/bluetooth/hci_h4.c +++ b/drivers/bluetooth/hci_h4.c | |||
@@ -112,7 +112,7 @@ static int h4_enqueue(struct hci_uart *hu, struct sk_buff *skb) | |||
112 | BT_DBG("hu %p skb %p", hu, skb); | 112 | BT_DBG("hu %p skb %p", hu, skb); |
113 | 113 | ||
114 | /* Prepend skb with frame type */ | 114 | /* Prepend skb with frame type */ |
115 | memcpy(skb_push(skb, 1), &skb->pkt_type, 1); | 115 | memcpy(skb_push(skb, 1), &bt_cb(skb)->pkt_type, 1); |
116 | skb_queue_tail(&h4->txq, skb); | 116 | skb_queue_tail(&h4->txq, skb); |
117 | return 0; | 117 | return 0; |
118 | } | 118 | } |
@@ -239,7 +239,7 @@ static int h4_recv(struct hci_uart *hu, void *data, int count) | |||
239 | return 0; | 239 | return 0; |
240 | } | 240 | } |
241 | h4->rx_skb->dev = (void *) hu->hdev; | 241 | h4->rx_skb->dev = (void *) hu->hdev; |
242 | h4->rx_skb->pkt_type = type; | 242 | bt_cb(h4->rx_skb)->pkt_type = type; |
243 | } | 243 | } |
244 | return count; | 244 | return count; |
245 | } | 245 | } |
diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c index 90be2eae52e0..aed80cc22890 100644 --- a/drivers/bluetooth/hci_ldisc.c +++ b/drivers/bluetooth/hci_ldisc.c | |||
@@ -153,7 +153,7 @@ restart: | |||
153 | break; | 153 | break; |
154 | } | 154 | } |
155 | 155 | ||
156 | hci_uart_tx_complete(hu, skb->pkt_type); | 156 | hci_uart_tx_complete(hu, bt_cb(skb)->pkt_type); |
157 | kfree_skb(skb); | 157 | kfree_skb(skb); |
158 | } | 158 | } |
159 | 159 | ||
@@ -229,7 +229,7 @@ static int hci_uart_send_frame(struct sk_buff *skb) | |||
229 | hu = (struct hci_uart *) hdev->driver_data; | 229 | hu = (struct hci_uart *) hdev->driver_data; |
230 | tty = hu->tty; | 230 | tty = hu->tty; |
231 | 231 | ||
232 | BT_DBG("%s: type %d len %d", hdev->name, skb->pkt_type, skb->len); | 232 | BT_DBG("%s: type %d len %d", hdev->name, bt_cb(skb)->pkt_type, skb->len); |
233 | 233 | ||
234 | hu->proto->enqueue(hu, skb); | 234 | hu->proto->enqueue(hu, skb); |
235 | 235 | ||
diff --git a/drivers/bluetooth/hci_usb.c b/drivers/bluetooth/hci_usb.c index 657719b8254f..67d96b5cbb96 100644 --- a/drivers/bluetooth/hci_usb.c +++ b/drivers/bluetooth/hci_usb.c | |||
@@ -127,7 +127,7 @@ static struct usb_device_id blacklist_ids[] = { | |||
127 | { } /* Terminating entry */ | 127 | { } /* Terminating entry */ |
128 | }; | 128 | }; |
129 | 129 | ||
130 | static struct _urb *_urb_alloc(int isoc, int gfp) | 130 | static struct _urb *_urb_alloc(int isoc, unsigned int __nocast gfp) |
131 | { | 131 | { |
132 | struct _urb *_urb = kmalloc(sizeof(struct _urb) + | 132 | struct _urb *_urb = kmalloc(sizeof(struct _urb) + |
133 | sizeof(struct usb_iso_packet_descriptor) * isoc, gfp); | 133 | sizeof(struct usb_iso_packet_descriptor) * isoc, gfp); |
@@ -443,7 +443,7 @@ static int __tx_submit(struct hci_usb *husb, struct _urb *_urb) | |||
443 | 443 | ||
444 | static inline int hci_usb_send_ctrl(struct hci_usb *husb, struct sk_buff *skb) | 444 | static inline int hci_usb_send_ctrl(struct hci_usb *husb, struct sk_buff *skb) |
445 | { | 445 | { |
446 | struct _urb *_urb = __get_completed(husb, skb->pkt_type); | 446 | struct _urb *_urb = __get_completed(husb, bt_cb(skb)->pkt_type); |
447 | struct usb_ctrlrequest *dr; | 447 | struct usb_ctrlrequest *dr; |
448 | struct urb *urb; | 448 | struct urb *urb; |
449 | 449 | ||
@@ -451,7 +451,7 @@ static inline int hci_usb_send_ctrl(struct hci_usb *husb, struct sk_buff *skb) | |||
451 | _urb = _urb_alloc(0, GFP_ATOMIC); | 451 | _urb = _urb_alloc(0, GFP_ATOMIC); |
452 | if (!_urb) | 452 | if (!_urb) |
453 | return -ENOMEM; | 453 | return -ENOMEM; |
454 | _urb->type = skb->pkt_type; | 454 | _urb->type = bt_cb(skb)->pkt_type; |
455 | 455 | ||
456 | dr = kmalloc(sizeof(*dr), GFP_ATOMIC); | 456 | dr = kmalloc(sizeof(*dr), GFP_ATOMIC); |
457 | if (!dr) { | 457 | if (!dr) { |
@@ -479,7 +479,7 @@ static inline int hci_usb_send_ctrl(struct hci_usb *husb, struct sk_buff *skb) | |||
479 | 479 | ||
480 | static inline int hci_usb_send_bulk(struct hci_usb *husb, struct sk_buff *skb) | 480 | static inline int hci_usb_send_bulk(struct hci_usb *husb, struct sk_buff *skb) |
481 | { | 481 | { |
482 | struct _urb *_urb = __get_completed(husb, skb->pkt_type); | 482 | struct _urb *_urb = __get_completed(husb, bt_cb(skb)->pkt_type); |
483 | struct urb *urb; | 483 | struct urb *urb; |
484 | int pipe; | 484 | int pipe; |
485 | 485 | ||
@@ -487,7 +487,7 @@ static inline int hci_usb_send_bulk(struct hci_usb *husb, struct sk_buff *skb) | |||
487 | _urb = _urb_alloc(0, GFP_ATOMIC); | 487 | _urb = _urb_alloc(0, GFP_ATOMIC); |
488 | if (!_urb) | 488 | if (!_urb) |
489 | return -ENOMEM; | 489 | return -ENOMEM; |
490 | _urb->type = skb->pkt_type; | 490 | _urb->type = bt_cb(skb)->pkt_type; |
491 | } | 491 | } |
492 | 492 | ||
493 | urb = &_urb->urb; | 493 | urb = &_urb->urb; |
@@ -505,14 +505,14 @@ static inline int hci_usb_send_bulk(struct hci_usb *husb, struct sk_buff *skb) | |||
505 | #ifdef CONFIG_BT_HCIUSB_SCO | 505 | #ifdef CONFIG_BT_HCIUSB_SCO |
506 | static inline int hci_usb_send_isoc(struct hci_usb *husb, struct sk_buff *skb) | 506 | static inline int hci_usb_send_isoc(struct hci_usb *husb, struct sk_buff *skb) |
507 | { | 507 | { |
508 | struct _urb *_urb = __get_completed(husb, skb->pkt_type); | 508 | struct _urb *_urb = __get_completed(husb, bt_cb(skb)->pkt_type); |
509 | struct urb *urb; | 509 | struct urb *urb; |
510 | 510 | ||
511 | if (!_urb) { | 511 | if (!_urb) { |
512 | _urb = _urb_alloc(HCI_MAX_ISOC_FRAMES, GFP_ATOMIC); | 512 | _urb = _urb_alloc(HCI_MAX_ISOC_FRAMES, GFP_ATOMIC); |
513 | if (!_urb) | 513 | if (!_urb) |
514 | return -ENOMEM; | 514 | return -ENOMEM; |
515 | _urb->type = skb->pkt_type; | 515 | _urb->type = bt_cb(skb)->pkt_type; |
516 | } | 516 | } |
517 | 517 | ||
518 | BT_DBG("%s skb %p len %d", husb->hdev->name, skb, skb->len); | 518 | BT_DBG("%s skb %p len %d", husb->hdev->name, skb, skb->len); |
@@ -601,11 +601,11 @@ static int hci_usb_send_frame(struct sk_buff *skb) | |||
601 | if (!test_bit(HCI_RUNNING, &hdev->flags)) | 601 | if (!test_bit(HCI_RUNNING, &hdev->flags)) |
602 | return -EBUSY; | 602 | return -EBUSY; |
603 | 603 | ||
604 | BT_DBG("%s type %d len %d", hdev->name, skb->pkt_type, skb->len); | 604 | BT_DBG("%s type %d len %d", hdev->name, bt_cb(skb)->pkt_type, skb->len); |
605 | 605 | ||
606 | husb = (struct hci_usb *) hdev->driver_data; | 606 | husb = (struct hci_usb *) hdev->driver_data; |
607 | 607 | ||
608 | switch (skb->pkt_type) { | 608 | switch (bt_cb(skb)->pkt_type) { |
609 | case HCI_COMMAND_PKT: | 609 | case HCI_COMMAND_PKT: |
610 | hdev->stat.cmd_tx++; | 610 | hdev->stat.cmd_tx++; |
611 | break; | 611 | break; |
@@ -627,7 +627,7 @@ static int hci_usb_send_frame(struct sk_buff *skb) | |||
627 | 627 | ||
628 | read_lock(&husb->completion_lock); | 628 | read_lock(&husb->completion_lock); |
629 | 629 | ||
630 | skb_queue_tail(__transmit_q(husb, skb->pkt_type), skb); | 630 | skb_queue_tail(__transmit_q(husb, bt_cb(skb)->pkt_type), skb); |
631 | hci_usb_tx_wakeup(husb); | 631 | hci_usb_tx_wakeup(husb); |
632 | 632 | ||
633 | read_unlock(&husb->completion_lock); | 633 | read_unlock(&husb->completion_lock); |
@@ -682,7 +682,7 @@ static inline int __recv_frame(struct hci_usb *husb, int type, void *data, int c | |||
682 | return -ENOMEM; | 682 | return -ENOMEM; |
683 | } | 683 | } |
684 | skb->dev = (void *) husb->hdev; | 684 | skb->dev = (void *) husb->hdev; |
685 | skb->pkt_type = type; | 685 | bt_cb(skb)->pkt_type = type; |
686 | 686 | ||
687 | __reassembly(husb, type) = skb; | 687 | __reassembly(husb, type) = skb; |
688 | 688 | ||
@@ -702,6 +702,7 @@ static inline int __recv_frame(struct hci_usb *husb, int type, void *data, int c | |||
702 | if (!scb->expect) { | 702 | if (!scb->expect) { |
703 | /* Complete frame */ | 703 | /* Complete frame */ |
704 | __reassembly(husb, type) = NULL; | 704 | __reassembly(husb, type) = NULL; |
705 | bt_cb(skb)->pkt_type = type; | ||
705 | hci_recv_frame(skb); | 706 | hci_recv_frame(skb); |
706 | } | 707 | } |
707 | 708 | ||
diff --git a/drivers/bluetooth/hci_vhci.c b/drivers/bluetooth/hci_vhci.c index f9b956fb2b8b..52cbd45c308f 100644 --- a/drivers/bluetooth/hci_vhci.c +++ b/drivers/bluetooth/hci_vhci.c | |||
@@ -1,229 +1,220 @@ | |||
1 | /* | ||
2 | BlueZ - Bluetooth protocol stack for Linux | ||
3 | Copyright (C) 2000-2001 Qualcomm Incorporated | ||
4 | |||
5 | Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com> | ||
6 | |||
7 | This program is free software; you can redistribute it and/or modify | ||
8 | it under the terms of the GNU General Public License version 2 as | ||
9 | published by the Free Software Foundation; | ||
10 | |||
11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
12 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
13 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. | ||
14 | IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY | ||
15 | CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES | ||
16 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
17 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
18 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
19 | |||
20 | ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, | ||
21 | COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS | ||
22 | SOFTWARE IS DISCLAIMED. | ||
23 | */ | ||
24 | |||
25 | /* | 1 | /* |
26 | * Bluetooth HCI virtual device driver. | ||
27 | * | 2 | * |
28 | * $Id: hci_vhci.c,v 1.3 2002/04/17 17:37:20 maxk Exp $ | 3 | * Bluetooth virtual HCI driver |
4 | * | ||
5 | * Copyright (C) 2000-2001 Qualcomm Incorporated | ||
6 | * Copyright (C) 2002-2003 Maxim Krasnyansky <maxk@qualcomm.com> | ||
7 | * Copyright (C) 2004-2005 Marcel Holtmann <marcel@holtmann.org> | ||
8 | * | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License as published by | ||
12 | * the Free Software Foundation; either version 2 of the License, or | ||
13 | * (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
18 | * GNU General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public License | ||
21 | * along with this program; if not, write to the Free Software | ||
22 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
23 | * | ||
29 | */ | 24 | */ |
30 | #define VERSION "1.1" | ||
31 | 25 | ||
32 | #include <linux/config.h> | 26 | #include <linux/config.h> |
33 | #include <linux/module.h> | 27 | #include <linux/module.h> |
34 | 28 | ||
35 | #include <linux/errno.h> | ||
36 | #include <linux/kernel.h> | 29 | #include <linux/kernel.h> |
37 | #include <linux/major.h> | 30 | #include <linux/init.h> |
38 | #include <linux/sched.h> | ||
39 | #include <linux/slab.h> | 31 | #include <linux/slab.h> |
32 | #include <linux/types.h> | ||
33 | #include <linux/errno.h> | ||
34 | #include <linux/sched.h> | ||
40 | #include <linux/poll.h> | 35 | #include <linux/poll.h> |
41 | #include <linux/fcntl.h> | ||
42 | #include <linux/init.h> | ||
43 | #include <linux/random.h> | ||
44 | 36 | ||
45 | #include <linux/skbuff.h> | 37 | #include <linux/skbuff.h> |
46 | #include <linux/miscdevice.h> | 38 | #include <linux/miscdevice.h> |
47 | 39 | ||
48 | #include <asm/system.h> | ||
49 | #include <asm/uaccess.h> | ||
50 | |||
51 | #include <net/bluetooth/bluetooth.h> | 40 | #include <net/bluetooth/bluetooth.h> |
52 | #include <net/bluetooth/hci_core.h> | 41 | #include <net/bluetooth/hci_core.h> |
53 | #include "hci_vhci.h" | ||
54 | 42 | ||
55 | /* HCI device part */ | 43 | #ifndef CONFIG_BT_HCIVHCI_DEBUG |
44 | #undef BT_DBG | ||
45 | #define BT_DBG(D...) | ||
46 | #endif | ||
47 | |||
48 | #define VERSION "1.2" | ||
49 | |||
50 | static int minor = MISC_DYNAMIC_MINOR; | ||
51 | |||
52 | struct vhci_data { | ||
53 | struct hci_dev *hdev; | ||
54 | |||
55 | unsigned long flags; | ||
56 | |||
57 | wait_queue_head_t read_wait; | ||
58 | struct sk_buff_head readq; | ||
59 | |||
60 | struct fasync_struct *fasync; | ||
61 | }; | ||
56 | 62 | ||
57 | static int hci_vhci_open(struct hci_dev *hdev) | 63 | #define VHCI_FASYNC 0x0010 |
64 | |||
65 | static struct miscdevice vhci_miscdev; | ||
66 | |||
67 | static int vhci_open_dev(struct hci_dev *hdev) | ||
58 | { | 68 | { |
59 | set_bit(HCI_RUNNING, &hdev->flags); | 69 | set_bit(HCI_RUNNING, &hdev->flags); |
60 | return 0; | ||
61 | } | ||
62 | 70 | ||
63 | static int hci_vhci_flush(struct hci_dev *hdev) | ||
64 | { | ||
65 | struct hci_vhci_struct *hci_vhci = (struct hci_vhci_struct *) hdev->driver_data; | ||
66 | skb_queue_purge(&hci_vhci->readq); | ||
67 | return 0; | 71 | return 0; |
68 | } | 72 | } |
69 | 73 | ||
70 | static int hci_vhci_close(struct hci_dev *hdev) | 74 | static int vhci_close_dev(struct hci_dev *hdev) |
71 | { | 75 | { |
76 | struct vhci_data *vhci = hdev->driver_data; | ||
77 | |||
72 | if (!test_and_clear_bit(HCI_RUNNING, &hdev->flags)) | 78 | if (!test_and_clear_bit(HCI_RUNNING, &hdev->flags)) |
73 | return 0; | 79 | return 0; |
74 | 80 | ||
75 | hci_vhci_flush(hdev); | 81 | skb_queue_purge(&vhci->readq); |
82 | |||
76 | return 0; | 83 | return 0; |
77 | } | 84 | } |
78 | 85 | ||
79 | static void hci_vhci_destruct(struct hci_dev *hdev) | 86 | static int vhci_flush(struct hci_dev *hdev) |
80 | { | 87 | { |
81 | struct hci_vhci_struct *vhci; | 88 | struct vhci_data *vhci = hdev->driver_data; |
82 | 89 | ||
83 | if (!hdev) return; | 90 | skb_queue_purge(&vhci->readq); |
84 | 91 | ||
85 | vhci = (struct hci_vhci_struct *) hdev->driver_data; | 92 | return 0; |
86 | kfree(vhci); | ||
87 | } | 93 | } |
88 | 94 | ||
89 | static int hci_vhci_send_frame(struct sk_buff *skb) | 95 | static int vhci_send_frame(struct sk_buff *skb) |
90 | { | 96 | { |
91 | struct hci_dev* hdev = (struct hci_dev *) skb->dev; | 97 | struct hci_dev* hdev = (struct hci_dev *) skb->dev; |
92 | struct hci_vhci_struct *hci_vhci; | 98 | struct vhci_data *vhci; |
93 | 99 | ||
94 | if (!hdev) { | 100 | if (!hdev) { |
95 | BT_ERR("Frame for uknown device (hdev=NULL)"); | 101 | BT_ERR("Frame for unknown HCI device (hdev=NULL)"); |
96 | return -ENODEV; | 102 | return -ENODEV; |
97 | } | 103 | } |
98 | 104 | ||
99 | if (!test_bit(HCI_RUNNING, &hdev->flags)) | 105 | if (!test_bit(HCI_RUNNING, &hdev->flags)) |
100 | return -EBUSY; | 106 | return -EBUSY; |
101 | 107 | ||
102 | hci_vhci = (struct hci_vhci_struct *) hdev->driver_data; | 108 | vhci = hdev->driver_data; |
109 | |||
110 | memcpy(skb_push(skb, 1), &bt_cb(skb)->pkt_type, 1); | ||
111 | skb_queue_tail(&vhci->readq, skb); | ||
103 | 112 | ||
104 | memcpy(skb_push(skb, 1), &skb->pkt_type, 1); | 113 | if (vhci->flags & VHCI_FASYNC) |
105 | skb_queue_tail(&hci_vhci->readq, skb); | 114 | kill_fasync(&vhci->fasync, SIGIO, POLL_IN); |
106 | 115 | ||
107 | if (hci_vhci->flags & VHCI_FASYNC) | 116 | wake_up_interruptible(&vhci->read_wait); |
108 | kill_fasync(&hci_vhci->fasync, SIGIO, POLL_IN); | ||
109 | wake_up_interruptible(&hci_vhci->read_wait); | ||
110 | 117 | ||
111 | return 0; | 118 | return 0; |
112 | } | 119 | } |
113 | 120 | ||
114 | /* Character device part */ | 121 | static void vhci_destruct(struct hci_dev *hdev) |
115 | 122 | { | |
116 | /* Poll */ | 123 | kfree(hdev->driver_data); |
117 | static unsigned int hci_vhci_chr_poll(struct file *file, poll_table * wait) | ||
118 | { | ||
119 | struct hci_vhci_struct *hci_vhci = (struct hci_vhci_struct *) file->private_data; | ||
120 | |||
121 | poll_wait(file, &hci_vhci->read_wait, wait); | ||
122 | |||
123 | if (!skb_queue_empty(&hci_vhci->readq)) | ||
124 | return POLLIN | POLLRDNORM; | ||
125 | |||
126 | return POLLOUT | POLLWRNORM; | ||
127 | } | 124 | } |
128 | 125 | ||
129 | /* Get packet from user space buffer(already verified) */ | 126 | static inline ssize_t vhci_get_user(struct vhci_data *vhci, |
130 | static inline ssize_t hci_vhci_get_user(struct hci_vhci_struct *hci_vhci, const char __user *buf, size_t count) | 127 | const char __user *buf, size_t count) |
131 | { | 128 | { |
132 | struct sk_buff *skb; | 129 | struct sk_buff *skb; |
133 | 130 | ||
134 | if (count > HCI_MAX_FRAME_SIZE) | 131 | if (count > HCI_MAX_FRAME_SIZE) |
135 | return -EINVAL; | 132 | return -EINVAL; |
136 | 133 | ||
137 | if (!(skb = bt_skb_alloc(count, GFP_KERNEL))) | 134 | skb = bt_skb_alloc(count, GFP_KERNEL); |
135 | if (!skb) | ||
138 | return -ENOMEM; | 136 | return -ENOMEM; |
139 | 137 | ||
140 | if (copy_from_user(skb_put(skb, count), buf, count)) { | 138 | if (copy_from_user(skb_put(skb, count), buf, count)) { |
141 | kfree_skb(skb); | 139 | kfree_skb(skb); |
142 | return -EFAULT; | 140 | return -EFAULT; |
143 | } | 141 | } |
144 | 142 | ||
145 | skb->dev = (void *) hci_vhci->hdev; | 143 | skb->dev = (void *) vhci->hdev; |
146 | skb->pkt_type = *((__u8 *) skb->data); | 144 | bt_cb(skb)->pkt_type = *((__u8 *) skb->data); |
147 | skb_pull(skb, 1); | 145 | skb_pull(skb, 1); |
148 | 146 | ||
149 | hci_recv_frame(skb); | 147 | hci_recv_frame(skb); |
150 | 148 | ||
151 | return count; | 149 | return count; |
152 | } | ||
153 | |||
154 | /* Write */ | ||
155 | static ssize_t hci_vhci_chr_write(struct file * file, const char __user * buf, | ||
156 | size_t count, loff_t *pos) | ||
157 | { | ||
158 | struct hci_vhci_struct *hci_vhci = (struct hci_vhci_struct *) file->private_data; | ||
159 | |||
160 | if (!access_ok(VERIFY_READ, buf, count)) | ||
161 | return -EFAULT; | ||
162 | |||
163 | return hci_vhci_get_user(hci_vhci, buf, count); | ||
164 | } | 150 | } |
165 | 151 | ||
166 | /* Put packet to user space buffer(already verified) */ | 152 | static inline ssize_t vhci_put_user(struct vhci_data *vhci, |
167 | static inline ssize_t hci_vhci_put_user(struct hci_vhci_struct *hci_vhci, | 153 | struct sk_buff *skb, char __user *buf, int count) |
168 | struct sk_buff *skb, char __user *buf, | ||
169 | int count) | ||
170 | { | 154 | { |
171 | int len = count, total = 0; | ||
172 | char __user *ptr = buf; | 155 | char __user *ptr = buf; |
156 | int len, total = 0; | ||
157 | |||
158 | len = min_t(unsigned int, skb->len, count); | ||
173 | 159 | ||
174 | len = min_t(unsigned int, skb->len, len); | ||
175 | if (copy_to_user(ptr, skb->data, len)) | 160 | if (copy_to_user(ptr, skb->data, len)) |
176 | return -EFAULT; | 161 | return -EFAULT; |
162 | |||
177 | total += len; | 163 | total += len; |
178 | 164 | ||
179 | hci_vhci->hdev->stat.byte_tx += len; | 165 | vhci->hdev->stat.byte_tx += len; |
180 | switch (skb->pkt_type) { | ||
181 | case HCI_COMMAND_PKT: | ||
182 | hci_vhci->hdev->stat.cmd_tx++; | ||
183 | break; | ||
184 | 166 | ||
185 | case HCI_ACLDATA_PKT: | 167 | switch (bt_cb(skb)->pkt_type) { |
186 | hci_vhci->hdev->stat.acl_tx++; | 168 | case HCI_COMMAND_PKT: |
187 | break; | 169 | vhci->hdev->stat.cmd_tx++; |
170 | break; | ||
171 | |||
172 | case HCI_ACLDATA_PKT: | ||
173 | vhci->hdev->stat.acl_tx++; | ||
174 | break; | ||
188 | 175 | ||
189 | case HCI_SCODATA_PKT: | 176 | case HCI_SCODATA_PKT: |
190 | hci_vhci->hdev->stat.cmd_tx++; | 177 | vhci->hdev->stat.cmd_tx++; |
191 | break; | 178 | break; |
192 | }; | 179 | }; |
193 | 180 | ||
194 | return total; | 181 | return total; |
195 | } | 182 | } |
196 | 183 | ||
197 | /* Read */ | 184 | static loff_t vhci_llseek(struct file * file, loff_t offset, int origin) |
198 | static ssize_t hci_vhci_chr_read(struct file * file, char __user * buf, size_t count, loff_t *pos) | 185 | { |
186 | return -ESPIPE; | ||
187 | } | ||
188 | |||
189 | static ssize_t vhci_read(struct file * file, char __user * buf, size_t count, loff_t *pos) | ||
199 | { | 190 | { |
200 | struct hci_vhci_struct *hci_vhci = (struct hci_vhci_struct *) file->private_data; | ||
201 | DECLARE_WAITQUEUE(wait, current); | 191 | DECLARE_WAITQUEUE(wait, current); |
192 | struct vhci_data *vhci = file->private_data; | ||
202 | struct sk_buff *skb; | 193 | struct sk_buff *skb; |
203 | ssize_t ret = 0; | 194 | ssize_t ret = 0; |
204 | 195 | ||
205 | add_wait_queue(&hci_vhci->read_wait, &wait); | 196 | add_wait_queue(&vhci->read_wait, &wait); |
206 | while (count) { | 197 | while (count) { |
207 | set_current_state(TASK_INTERRUPTIBLE); | 198 | set_current_state(TASK_INTERRUPTIBLE); |
208 | 199 | ||
209 | /* Read frames from device queue */ | 200 | skb = skb_dequeue(&vhci->readq); |
210 | if (!(skb = skb_dequeue(&hci_vhci->readq))) { | 201 | if (!skb) { |
211 | if (file->f_flags & O_NONBLOCK) { | 202 | if (file->f_flags & O_NONBLOCK) { |
212 | ret = -EAGAIN; | 203 | ret = -EAGAIN; |
213 | break; | 204 | break; |
214 | } | 205 | } |
206 | |||
215 | if (signal_pending(current)) { | 207 | if (signal_pending(current)) { |
216 | ret = -ERESTARTSYS; | 208 | ret = -ERESTARTSYS; |
217 | break; | 209 | break; |
218 | } | 210 | } |
219 | 211 | ||
220 | /* Nothing to read, let's sleep */ | ||
221 | schedule(); | 212 | schedule(); |
222 | continue; | 213 | continue; |
223 | } | 214 | } |
224 | 215 | ||
225 | if (access_ok(VERIFY_WRITE, buf, count)) | 216 | if (access_ok(VERIFY_WRITE, buf, count)) |
226 | ret = hci_vhci_put_user(hci_vhci, skb, buf, count); | 217 | ret = vhci_put_user(vhci, skb, buf, count); |
227 | else | 218 | else |
228 | ret = -EFAULT; | 219 | ret = -EFAULT; |
229 | 220 | ||
@@ -231,84 +222,90 @@ static ssize_t hci_vhci_chr_read(struct file * file, char __user * buf, size_t c | |||
231 | break; | 222 | break; |
232 | } | 223 | } |
233 | set_current_state(TASK_RUNNING); | 224 | set_current_state(TASK_RUNNING); |
234 | remove_wait_queue(&hci_vhci->read_wait, &wait); | 225 | remove_wait_queue(&vhci->read_wait, &wait); |
235 | 226 | ||
236 | return ret; | 227 | return ret; |
237 | } | 228 | } |
238 | 229 | ||
239 | static loff_t hci_vhci_chr_lseek(struct file * file, loff_t offset, int origin) | 230 | static ssize_t vhci_write(struct file *file, |
231 | const char __user *buf, size_t count, loff_t *pos) | ||
240 | { | 232 | { |
241 | return -ESPIPE; | 233 | struct vhci_data *vhci = file->private_data; |
242 | } | ||
243 | 234 | ||
244 | static int hci_vhci_chr_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) | 235 | if (!access_ok(VERIFY_READ, buf, count)) |
245 | { | 236 | return -EFAULT; |
246 | return -EINVAL; | 237 | |
238 | return vhci_get_user(vhci, buf, count); | ||
247 | } | 239 | } |
248 | 240 | ||
249 | static int hci_vhci_chr_fasync(int fd, struct file *file, int on) | 241 | static unsigned int vhci_poll(struct file *file, poll_table *wait) |
250 | { | 242 | { |
251 | struct hci_vhci_struct *hci_vhci = (struct hci_vhci_struct *) file->private_data; | 243 | struct vhci_data *vhci = file->private_data; |
252 | int ret; | ||
253 | 244 | ||
254 | if ((ret = fasync_helper(fd, file, on, &hci_vhci->fasync)) < 0) | 245 | poll_wait(file, &vhci->read_wait, wait); |
255 | return ret; | ||
256 | |||
257 | if (on) | ||
258 | hci_vhci->flags |= VHCI_FASYNC; | ||
259 | else | ||
260 | hci_vhci->flags &= ~VHCI_FASYNC; | ||
261 | 246 | ||
262 | return 0; | 247 | if (!skb_queue_empty(&vhci->readq)) |
248 | return POLLIN | POLLRDNORM; | ||
249 | |||
250 | return POLLOUT | POLLWRNORM; | ||
263 | } | 251 | } |
264 | 252 | ||
265 | static int hci_vhci_chr_open(struct inode *inode, struct file * file) | 253 | static int vhci_ioctl(struct inode *inode, struct file *file, |
254 | unsigned int cmd, unsigned long arg) | ||
266 | { | 255 | { |
267 | struct hci_vhci_struct *hci_vhci = NULL; | 256 | return -EINVAL; |
257 | } | ||
258 | |||
259 | static int vhci_open(struct inode *inode, struct file *file) | ||
260 | { | ||
261 | struct vhci_data *vhci; | ||
268 | struct hci_dev *hdev; | 262 | struct hci_dev *hdev; |
269 | 263 | ||
270 | if (!(hci_vhci = kmalloc(sizeof(struct hci_vhci_struct), GFP_KERNEL))) | 264 | vhci = kmalloc(sizeof(struct vhci_data), GFP_KERNEL); |
265 | if (!vhci) | ||
271 | return -ENOMEM; | 266 | return -ENOMEM; |
272 | 267 | ||
273 | memset(hci_vhci, 0, sizeof(struct hci_vhci_struct)); | 268 | memset(vhci, 0, sizeof(struct vhci_data)); |
274 | 269 | ||
275 | skb_queue_head_init(&hci_vhci->readq); | 270 | skb_queue_head_init(&vhci->readq); |
276 | init_waitqueue_head(&hci_vhci->read_wait); | 271 | init_waitqueue_head(&vhci->read_wait); |
277 | 272 | ||
278 | /* Initialize and register HCI device */ | ||
279 | hdev = hci_alloc_dev(); | 273 | hdev = hci_alloc_dev(); |
280 | if (!hdev) { | 274 | if (!hdev) { |
281 | kfree(hci_vhci); | 275 | kfree(vhci); |
282 | return -ENOMEM; | 276 | return -ENOMEM; |
283 | } | 277 | } |
284 | 278 | ||
285 | hci_vhci->hdev = hdev; | 279 | vhci->hdev = hdev; |
286 | 280 | ||
287 | hdev->type = HCI_VHCI; | 281 | hdev->type = HCI_VHCI; |
288 | hdev->driver_data = hci_vhci; | 282 | hdev->driver_data = vhci; |
283 | SET_HCIDEV_DEV(hdev, vhci_miscdev.dev); | ||
289 | 284 | ||
290 | hdev->open = hci_vhci_open; | 285 | hdev->open = vhci_open_dev; |
291 | hdev->close = hci_vhci_close; | 286 | hdev->close = vhci_close_dev; |
292 | hdev->flush = hci_vhci_flush; | 287 | hdev->flush = vhci_flush; |
293 | hdev->send = hci_vhci_send_frame; | 288 | hdev->send = vhci_send_frame; |
294 | hdev->destruct = hci_vhci_destruct; | 289 | hdev->destruct = vhci_destruct; |
295 | 290 | ||
296 | hdev->owner = THIS_MODULE; | 291 | hdev->owner = THIS_MODULE; |
297 | 292 | ||
298 | if (hci_register_dev(hdev) < 0) { | 293 | if (hci_register_dev(hdev) < 0) { |
299 | kfree(hci_vhci); | 294 | BT_ERR("Can't register HCI device"); |
295 | kfree(vhci); | ||
300 | hci_free_dev(hdev); | 296 | hci_free_dev(hdev); |
301 | return -EBUSY; | 297 | return -EBUSY; |
302 | } | 298 | } |
303 | 299 | ||
304 | file->private_data = hci_vhci; | 300 | file->private_data = vhci; |
305 | return nonseekable_open(inode, file); | 301 | |
302 | return nonseekable_open(inode, file); | ||
306 | } | 303 | } |
307 | 304 | ||
308 | static int hci_vhci_chr_close(struct inode *inode, struct file *file) | 305 | static int vhci_release(struct inode *inode, struct file *file) |
309 | { | 306 | { |
310 | struct hci_vhci_struct *hci_vhci = (struct hci_vhci_struct *) file->private_data; | 307 | struct vhci_data *vhci = file->private_data; |
311 | struct hci_dev *hdev = hci_vhci->hdev; | 308 | struct hci_dev *hdev = vhci->hdev; |
312 | 309 | ||
313 | if (hci_unregister_dev(hdev) < 0) { | 310 | if (hci_unregister_dev(hdev) < 0) { |
314 | BT_ERR("Can't unregister HCI device %s", hdev->name); | 311 | BT_ERR("Can't unregister HCI device %s", hdev->name); |
@@ -317,48 +314,71 @@ static int hci_vhci_chr_close(struct inode *inode, struct file *file) | |||
317 | hci_free_dev(hdev); | 314 | hci_free_dev(hdev); |
318 | 315 | ||
319 | file->private_data = NULL; | 316 | file->private_data = NULL; |
317 | |||
320 | return 0; | 318 | return 0; |
321 | } | 319 | } |
322 | 320 | ||
323 | static struct file_operations hci_vhci_fops = { | 321 | static int vhci_fasync(int fd, struct file *file, int on) |
324 | .owner = THIS_MODULE, | 322 | { |
325 | .llseek = hci_vhci_chr_lseek, | 323 | struct vhci_data *vhci = file->private_data; |
326 | .read = hci_vhci_chr_read, | 324 | int err; |
327 | .write = hci_vhci_chr_write, | 325 | |
328 | .poll = hci_vhci_chr_poll, | 326 | err = fasync_helper(fd, file, on, &vhci->fasync); |
329 | .ioctl = hci_vhci_chr_ioctl, | 327 | if (err < 0) |
330 | .open = hci_vhci_chr_open, | 328 | return err; |
331 | .release = hci_vhci_chr_close, | 329 | |
332 | .fasync = hci_vhci_chr_fasync | 330 | if (on) |
331 | vhci->flags |= VHCI_FASYNC; | ||
332 | else | ||
333 | vhci->flags &= ~VHCI_FASYNC; | ||
334 | |||
335 | return 0; | ||
336 | } | ||
337 | |||
338 | static struct file_operations vhci_fops = { | ||
339 | .owner = THIS_MODULE, | ||
340 | .llseek = vhci_llseek, | ||
341 | .read = vhci_read, | ||
342 | .write = vhci_write, | ||
343 | .poll = vhci_poll, | ||
344 | .ioctl = vhci_ioctl, | ||
345 | .open = vhci_open, | ||
346 | .release = vhci_release, | ||
347 | .fasync = vhci_fasync, | ||
333 | }; | 348 | }; |
334 | 349 | ||
335 | static struct miscdevice hci_vhci_miscdev= | 350 | static struct miscdevice vhci_miscdev= { |
336 | { | 351 | .name = "vhci", |
337 | VHCI_MINOR, | 352 | .fops = &vhci_fops, |
338 | "hci_vhci", | ||
339 | &hci_vhci_fops | ||
340 | }; | 353 | }; |
341 | 354 | ||
342 | static int __init hci_vhci_init(void) | 355 | static int __init vhci_init(void) |
343 | { | 356 | { |
344 | BT_INFO("VHCI driver ver %s", VERSION); | 357 | BT_INFO("Virtual HCI driver ver %s", VERSION); |
345 | 358 | ||
346 | if (misc_register(&hci_vhci_miscdev)) { | 359 | vhci_miscdev.minor = minor; |
347 | BT_ERR("Can't register misc device %d\n", VHCI_MINOR); | 360 | |
361 | if (misc_register(&vhci_miscdev) < 0) { | ||
362 | BT_ERR("Can't register misc device with minor %d", minor); | ||
348 | return -EIO; | 363 | return -EIO; |
349 | } | 364 | } |
350 | 365 | ||
351 | return 0; | 366 | return 0; |
352 | } | 367 | } |
353 | 368 | ||
354 | static void hci_vhci_cleanup(void) | 369 | static void __exit vhci_exit(void) |
355 | { | 370 | { |
356 | misc_deregister(&hci_vhci_miscdev); | 371 | if (misc_deregister(&vhci_miscdev) < 0) |
372 | BT_ERR("Can't unregister misc device with minor %d", minor); | ||
357 | } | 373 | } |
358 | 374 | ||
359 | module_init(hci_vhci_init); | 375 | module_init(vhci_init); |
360 | module_exit(hci_vhci_cleanup); | 376 | module_exit(vhci_exit); |
377 | |||
378 | module_param(minor, int, 0444); | ||
379 | MODULE_PARM_DESC(minor, "Miscellaneous minor device number"); | ||
361 | 380 | ||
362 | MODULE_AUTHOR("Maxim Krasnyansky <maxk@qualcomm.com>"); | 381 | MODULE_AUTHOR("Maxim Krasnyansky <maxk@qualcomm.com>, Marcel Holtmann <marcel@holtmann.org>"); |
363 | MODULE_DESCRIPTION("Bluetooth VHCI driver ver " VERSION); | 382 | MODULE_DESCRIPTION("Bluetooth virtual HCI driver ver " VERSION); |
364 | MODULE_LICENSE("GPL"); | 383 | MODULE_VERSION(VERSION); |
384 | MODULE_LICENSE("GPL"); | ||
diff --git a/drivers/bluetooth/hci_vhci.h b/drivers/bluetooth/hci_vhci.h deleted file mode 100644 index 53b11f9ef76d..000000000000 --- a/drivers/bluetooth/hci_vhci.h +++ /dev/null | |||
@@ -1,50 +0,0 @@ | |||
1 | /* | ||
2 | BlueZ - Bluetooth protocol stack for Linux | ||
3 | Copyright (C) 2000-2001 Qualcomm Incorporated | ||
4 | |||
5 | Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com> | ||
6 | |||
7 | This program is free software; you can redistribute it and/or modify | ||
8 | it under the terms of the GNU General Public License version 2 as | ||
9 | published by the Free Software Foundation; | ||
10 | |||
11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
12 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
13 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. | ||
14 | IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY | ||
15 | CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES | ||
16 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
17 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
18 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
19 | |||
20 | ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, | ||
21 | COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS | ||
22 | SOFTWARE IS DISCLAIMED. | ||
23 | */ | ||
24 | |||
25 | /* | ||
26 | * $Id: hci_vhci.h,v 1.1.1.1 2002/03/08 21:03:15 maxk Exp $ | ||
27 | */ | ||
28 | |||
29 | #ifndef __HCI_VHCI_H | ||
30 | #define __HCI_VHCI_H | ||
31 | |||
32 | #ifdef __KERNEL__ | ||
33 | |||
34 | struct hci_vhci_struct { | ||
35 | struct hci_dev *hdev; | ||
36 | __u32 flags; | ||
37 | wait_queue_head_t read_wait; | ||
38 | struct sk_buff_head readq; | ||
39 | struct fasync_struct *fasync; | ||
40 | }; | ||
41 | |||
42 | /* VHCI device flags */ | ||
43 | #define VHCI_FASYNC 0x0010 | ||
44 | |||
45 | #endif /* __KERNEL__ */ | ||
46 | |||
47 | #define VHCI_DEV "/dev/vhci" | ||
48 | #define VHCI_MINOR 250 | ||
49 | |||
50 | #endif /* __HCI_VHCI_H */ | ||
diff --git a/drivers/cdrom/viocd.c b/drivers/cdrom/viocd.c index 38dd9ffbe8bc..0829db58462f 100644 --- a/drivers/cdrom/viocd.c +++ b/drivers/cdrom/viocd.c | |||
@@ -734,7 +734,7 @@ static int viocd_remove(struct vio_dev *vdev) | |||
734 | */ | 734 | */ |
735 | static struct vio_device_id viocd_device_table[] __devinitdata = { | 735 | static struct vio_device_id viocd_device_table[] __devinitdata = { |
736 | { "viocd", "" }, | 736 | { "viocd", "" }, |
737 | { 0, } | 737 | { "", "" } |
738 | }; | 738 | }; |
739 | 739 | ||
740 | MODULE_DEVICE_TABLE(vio, viocd_device_table); | 740 | MODULE_DEVICE_TABLE(vio, viocd_device_table); |
diff --git a/drivers/char/hvc_vio.c b/drivers/char/hvc_vio.c index 60bb9152b832..78d681dc35a8 100644 --- a/drivers/char/hvc_vio.c +++ b/drivers/char/hvc_vio.c | |||
@@ -39,7 +39,7 @@ char hvc_driver_name[] = "hvc_console"; | |||
39 | 39 | ||
40 | static struct vio_device_id hvc_driver_table[] __devinitdata = { | 40 | static struct vio_device_id hvc_driver_table[] __devinitdata = { |
41 | {"serial", "hvterm1"}, | 41 | {"serial", "hvterm1"}, |
42 | { NULL, } | 42 | { "", "" } |
43 | }; | 43 | }; |
44 | MODULE_DEVICE_TABLE(vio, hvc_driver_table); | 44 | MODULE_DEVICE_TABLE(vio, hvc_driver_table); |
45 | 45 | ||
diff --git a/drivers/char/hvcs.c b/drivers/char/hvcs.c index 3236d2404905..f47f009f9259 100644 --- a/drivers/char/hvcs.c +++ b/drivers/char/hvcs.c | |||
@@ -527,7 +527,7 @@ static int khvcsd(void *unused) | |||
527 | 527 | ||
528 | static struct vio_device_id hvcs_driver_table[] __devinitdata= { | 528 | static struct vio_device_id hvcs_driver_table[] __devinitdata= { |
529 | {"serial-server", "hvterm2"}, | 529 | {"serial-server", "hvterm2"}, |
530 | { NULL, } | 530 | { "", "" } |
531 | }; | 531 | }; |
532 | MODULE_DEVICE_TABLE(vio, hvcs_driver_table); | 532 | MODULE_DEVICE_TABLE(vio, hvcs_driver_table); |
533 | 533 | ||
diff --git a/drivers/char/random.c b/drivers/char/random.c index 6b11d6b2129f..7999da25fe40 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c | |||
@@ -1589,6 +1589,40 @@ u32 secure_tcpv6_port_ephemeral(const __u32 *saddr, const __u32 *daddr, __u16 dp | |||
1589 | EXPORT_SYMBOL(secure_tcpv6_port_ephemeral); | 1589 | EXPORT_SYMBOL(secure_tcpv6_port_ephemeral); |
1590 | #endif | 1590 | #endif |
1591 | 1591 | ||
1592 | #if defined(CONFIG_IP_DCCP) || defined(CONFIG_IP_DCCP_MODULE) | ||
1593 | /* Similar to secure_tcp_sequence_number but generate a 48 bit value | ||
1594 | * bit's 32-47 increase every key exchange | ||
1595 | * 0-31 hash(source, dest) | ||
1596 | */ | ||
1597 | u64 secure_dccp_sequence_number(__u32 saddr, __u32 daddr, | ||
1598 | __u16 sport, __u16 dport) | ||
1599 | { | ||
1600 | struct timeval tv; | ||
1601 | u64 seq; | ||
1602 | __u32 hash[4]; | ||
1603 | struct keydata *keyptr = get_keyptr(); | ||
1604 | |||
1605 | hash[0] = saddr; | ||
1606 | hash[1] = daddr; | ||
1607 | hash[2] = (sport << 16) + dport; | ||
1608 | hash[3] = keyptr->secret[11]; | ||
1609 | |||
1610 | seq = half_md4_transform(hash, keyptr->secret); | ||
1611 | seq |= ((u64)keyptr->count) << (32 - HASH_BITS); | ||
1612 | |||
1613 | do_gettimeofday(&tv); | ||
1614 | seq += tv.tv_usec + tv.tv_sec * 1000000; | ||
1615 | seq &= (1ull << 48) - 1; | ||
1616 | #if 0 | ||
1617 | printk("dccp init_seq(%lx, %lx, %d, %d) = %d\n", | ||
1618 | saddr, daddr, sport, dport, seq); | ||
1619 | #endif | ||
1620 | return seq; | ||
1621 | } | ||
1622 | |||
1623 | EXPORT_SYMBOL(secure_dccp_sequence_number); | ||
1624 | #endif | ||
1625 | |||
1592 | #endif /* CONFIG_INET */ | 1626 | #endif /* CONFIG_INET */ |
1593 | 1627 | ||
1594 | 1628 | ||
diff --git a/drivers/char/viotape.c b/drivers/char/viotape.c index 4764b4f9555d..0aff45fac2e6 100644 --- a/drivers/char/viotape.c +++ b/drivers/char/viotape.c | |||
@@ -991,7 +991,7 @@ static int viotape_remove(struct vio_dev *vdev) | |||
991 | */ | 991 | */ |
992 | static struct vio_device_id viotape_device_table[] __devinitdata = { | 992 | static struct vio_device_id viotape_device_table[] __devinitdata = { |
993 | { "viotape", "" }, | 993 | { "viotape", "" }, |
994 | { 0, } | 994 | { "", "" } |
995 | }; | 995 | }; |
996 | 996 | ||
997 | MODULE_DEVICE_TABLE(vio, viotape_device_table); | 997 | MODULE_DEVICE_TABLE(vio, viotape_device_table); |
diff --git a/drivers/ieee1394/ieee1394_core.c b/drivers/ieee1394/ieee1394_core.c index b248d89de8b4..d633770fac8e 100644 --- a/drivers/ieee1394/ieee1394_core.c +++ b/drivers/ieee1394/ieee1394_core.c | |||
@@ -681,7 +681,7 @@ static void handle_packet_response(struct hpsb_host *host, int tcode, | |||
681 | return; | 681 | return; |
682 | } | 682 | } |
683 | 683 | ||
684 | __skb_unlink(skb, skb->list); | 684 | __skb_unlink(skb, &host->pending_packet_queue); |
685 | 685 | ||
686 | if (packet->state == hpsb_queued) { | 686 | if (packet->state == hpsb_queued) { |
687 | packet->sendtime = jiffies; | 687 | packet->sendtime = jiffies; |
@@ -989,7 +989,7 @@ void abort_timedouts(unsigned long __opaque) | |||
989 | packet = (struct hpsb_packet *)skb->data; | 989 | packet = (struct hpsb_packet *)skb->data; |
990 | 990 | ||
991 | if (time_before(packet->sendtime + expire, jiffies)) { | 991 | if (time_before(packet->sendtime + expire, jiffies)) { |
992 | __skb_unlink(skb, skb->list); | 992 | __skb_unlink(skb, &host->pending_packet_queue); |
993 | packet->state = hpsb_complete; | 993 | packet->state = hpsb_complete; |
994 | packet->ack_code = ACKX_TIMEOUT; | 994 | packet->ack_code = ACKX_TIMEOUT; |
995 | queue_packet_complete(packet); | 995 | queue_packet_complete(packet); |
diff --git a/drivers/isdn/act2000/capi.c b/drivers/isdn/act2000/capi.c index afa46681f983..6ae6eb322111 100644 --- a/drivers/isdn/act2000/capi.c +++ b/drivers/isdn/act2000/capi.c | |||
@@ -606,7 +606,7 @@ handle_ack(act2000_card *card, act2000_chan *chan, __u8 blocknr) { | |||
606 | if ((((m->msg.data_b3_req.fakencci >> 8) & 0xff) == chan->ncci) && | 606 | if ((((m->msg.data_b3_req.fakencci >> 8) & 0xff) == chan->ncci) && |
607 | (m->msg.data_b3_req.blocknr == blocknr)) { | 607 | (m->msg.data_b3_req.blocknr == blocknr)) { |
608 | /* found corresponding DATA_B3_REQ */ | 608 | /* found corresponding DATA_B3_REQ */ |
609 | skb_unlink(tmp); | 609 | skb_unlink(tmp, &card->ackq); |
610 | chan->queued -= m->msg.data_b3_req.datalen; | 610 | chan->queued -= m->msg.data_b3_req.datalen; |
611 | if (m->msg.data_b3_req.flags) | 611 | if (m->msg.data_b3_req.flags) |
612 | ret = m->msg.data_b3_req.datalen; | 612 | ret = m->msg.data_b3_req.datalen; |
diff --git a/drivers/isdn/i4l/isdn_net.c b/drivers/isdn/i4l/isdn_net.c index f30e8e63ae0d..96c115e13389 100644 --- a/drivers/isdn/i4l/isdn_net.c +++ b/drivers/isdn/i4l/isdn_net.c | |||
@@ -1786,7 +1786,6 @@ isdn_net_receive(struct net_device *ndev, struct sk_buff *skb) | |||
1786 | lp->stats.rx_bytes += skb->len; | 1786 | lp->stats.rx_bytes += skb->len; |
1787 | } | 1787 | } |
1788 | skb->dev = ndev; | 1788 | skb->dev = ndev; |
1789 | skb->input_dev = ndev; | ||
1790 | skb->pkt_type = PACKET_HOST; | 1789 | skb->pkt_type = PACKET_HOST; |
1791 | skb->mac.raw = skb->data; | 1790 | skb->mac.raw = skb->data; |
1792 | #ifdef ISDN_DEBUG_NET_DUMP | 1791 | #ifdef ISDN_DEBUG_NET_DUMP |
diff --git a/drivers/isdn/i4l/isdn_ppp.c b/drivers/isdn/i4l/isdn_ppp.c index 260a323a96d3..d97a9be5469c 100644 --- a/drivers/isdn/i4l/isdn_ppp.c +++ b/drivers/isdn/i4l/isdn_ppp.c | |||
@@ -1177,7 +1177,6 @@ isdn_ppp_push_higher(isdn_net_dev * net_dev, isdn_net_local * lp, struct sk_buff | |||
1177 | mlp->huptimer = 0; | 1177 | mlp->huptimer = 0; |
1178 | #endif /* CONFIG_IPPP_FILTER */ | 1178 | #endif /* CONFIG_IPPP_FILTER */ |
1179 | skb->dev = dev; | 1179 | skb->dev = dev; |
1180 | skb->input_dev = dev; | ||
1181 | skb->mac.raw = skb->data; | 1180 | skb->mac.raw = skb->data; |
1182 | netif_rx(skb); | 1181 | netif_rx(skb); |
1183 | /* net_dev->local->stats.rx_packets++; done in isdn_net.c */ | 1182 | /* net_dev->local->stats.rx_packets++; done in isdn_net.c */ |
diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c index 8acc655ec1e8..7babf6af4e28 100644 --- a/drivers/net/bnx2.c +++ b/drivers/net/bnx2.c | |||
@@ -14,8 +14,8 @@ | |||
14 | 14 | ||
15 | #define DRV_MODULE_NAME "bnx2" | 15 | #define DRV_MODULE_NAME "bnx2" |
16 | #define PFX DRV_MODULE_NAME ": " | 16 | #define PFX DRV_MODULE_NAME ": " |
17 | #define DRV_MODULE_VERSION "1.2.19" | 17 | #define DRV_MODULE_VERSION "1.2.20" |
18 | #define DRV_MODULE_RELDATE "May 23, 2005" | 18 | #define DRV_MODULE_RELDATE "August 22, 2005" |
19 | 19 | ||
20 | #define RUN_AT(x) (jiffies + (x)) | 20 | #define RUN_AT(x) (jiffies + (x)) |
21 | 21 | ||
@@ -52,7 +52,6 @@ static struct { | |||
52 | { "HP NC370i Multifunction Gigabit Server Adapter" }, | 52 | { "HP NC370i Multifunction Gigabit Server Adapter" }, |
53 | { "Broadcom NetXtreme II BCM5706 1000Base-SX" }, | 53 | { "Broadcom NetXtreme II BCM5706 1000Base-SX" }, |
54 | { "HP NC370F Multifunction Gigabit Server Adapter" }, | 54 | { "HP NC370F Multifunction Gigabit Server Adapter" }, |
55 | { 0 }, | ||
56 | }; | 55 | }; |
57 | 56 | ||
58 | static struct pci_device_id bnx2_pci_tbl[] = { | 57 | static struct pci_device_id bnx2_pci_tbl[] = { |
@@ -108,6 +107,15 @@ static struct flash_spec flash_table[] = | |||
108 | 107 | ||
109 | MODULE_DEVICE_TABLE(pci, bnx2_pci_tbl); | 108 | MODULE_DEVICE_TABLE(pci, bnx2_pci_tbl); |
110 | 109 | ||
110 | static inline u32 bnx2_tx_avail(struct bnx2 *bp) | ||
111 | { | ||
112 | u32 diff = TX_RING_IDX(bp->tx_prod) - TX_RING_IDX(bp->tx_cons); | ||
113 | |||
114 | if (diff > MAX_TX_DESC_CNT) | ||
115 | diff = (diff & MAX_TX_DESC_CNT) - 1; | ||
116 | return (bp->tx_ring_size - diff); | ||
117 | } | ||
118 | |||
111 | static u32 | 119 | static u32 |
112 | bnx2_reg_rd_ind(struct bnx2 *bp, u32 offset) | 120 | bnx2_reg_rd_ind(struct bnx2 *bp, u32 offset) |
113 | { | 121 | { |
@@ -807,7 +815,19 @@ bnx2_setup_serdes_phy(struct bnx2 *bp) | |||
807 | bnx2_write_phy(bp, MII_ADVERTISE, new_adv); | 815 | bnx2_write_phy(bp, MII_ADVERTISE, new_adv); |
808 | bnx2_write_phy(bp, MII_BMCR, bmcr | BMCR_ANRESTART | | 816 | bnx2_write_phy(bp, MII_BMCR, bmcr | BMCR_ANRESTART | |
809 | BMCR_ANENABLE); | 817 | BMCR_ANENABLE); |
810 | bp->serdes_an_pending = SERDES_AN_TIMEOUT / bp->timer_interval; | 818 | if (CHIP_NUM(bp) == CHIP_NUM_5706) { |
819 | /* Speed up link-up time when the link partner | ||
820 | * does not autonegotiate which is very common | ||
821 | * in blade servers. Some blade servers use | ||
822 | * IPMI for kerboard input and it's important | ||
823 | * to minimize link disruptions. Autoneg. involves | ||
824 | * exchanging base pages plus 3 next pages and | ||
825 | * normally completes in about 120 msec. | ||
826 | */ | ||
827 | bp->current_interval = SERDES_AN_TIMEOUT; | ||
828 | bp->serdes_an_pending = 1; | ||
829 | mod_timer(&bp->timer, jiffies + bp->current_interval); | ||
830 | } | ||
811 | } | 831 | } |
812 | 832 | ||
813 | return 0; | 833 | return 0; |
@@ -1327,22 +1347,17 @@ bnx2_tx_int(struct bnx2 *bp) | |||
1327 | } | 1347 | } |
1328 | } | 1348 | } |
1329 | 1349 | ||
1330 | atomic_add(tx_free_bd, &bp->tx_avail_bd); | 1350 | bp->tx_cons = sw_cons; |
1331 | 1351 | ||
1332 | if (unlikely(netif_queue_stopped(bp->dev))) { | 1352 | if (unlikely(netif_queue_stopped(bp->dev))) { |
1333 | unsigned long flags; | 1353 | spin_lock(&bp->tx_lock); |
1334 | |||
1335 | spin_lock_irqsave(&bp->tx_lock, flags); | ||
1336 | if ((netif_queue_stopped(bp->dev)) && | 1354 | if ((netif_queue_stopped(bp->dev)) && |
1337 | (atomic_read(&bp->tx_avail_bd) > MAX_SKB_FRAGS)) { | 1355 | (bnx2_tx_avail(bp) > MAX_SKB_FRAGS)) { |
1338 | 1356 | ||
1339 | netif_wake_queue(bp->dev); | 1357 | netif_wake_queue(bp->dev); |
1340 | } | 1358 | } |
1341 | spin_unlock_irqrestore(&bp->tx_lock, flags); | 1359 | spin_unlock(&bp->tx_lock); |
1342 | } | 1360 | } |
1343 | |||
1344 | bp->tx_cons = sw_cons; | ||
1345 | |||
1346 | } | 1361 | } |
1347 | 1362 | ||
1348 | static inline void | 1363 | static inline void |
@@ -1523,15 +1538,12 @@ bnx2_msi(int irq, void *dev_instance, struct pt_regs *regs) | |||
1523 | BNX2_PCICFG_INT_ACK_CMD_MASK_INT); | 1538 | BNX2_PCICFG_INT_ACK_CMD_MASK_INT); |
1524 | 1539 | ||
1525 | /* Return here if interrupt is disabled. */ | 1540 | /* Return here if interrupt is disabled. */ |
1526 | if (unlikely(atomic_read(&bp->intr_sem) != 0)) { | 1541 | if (unlikely(atomic_read(&bp->intr_sem) != 0)) |
1527 | return IRQ_RETVAL(1); | 1542 | return IRQ_HANDLED; |
1528 | } | ||
1529 | 1543 | ||
1530 | if (netif_rx_schedule_prep(dev)) { | 1544 | netif_rx_schedule(dev); |
1531 | __netif_rx_schedule(dev); | ||
1532 | } | ||
1533 | 1545 | ||
1534 | return IRQ_RETVAL(1); | 1546 | return IRQ_HANDLED; |
1535 | } | 1547 | } |
1536 | 1548 | ||
1537 | static irqreturn_t | 1549 | static irqreturn_t |
@@ -1549,22 +1561,19 @@ bnx2_interrupt(int irq, void *dev_instance, struct pt_regs *regs) | |||
1549 | if ((bp->status_blk->status_idx == bp->last_status_idx) || | 1561 | if ((bp->status_blk->status_idx == bp->last_status_idx) || |
1550 | (REG_RD(bp, BNX2_PCICFG_MISC_STATUS) & | 1562 | (REG_RD(bp, BNX2_PCICFG_MISC_STATUS) & |
1551 | BNX2_PCICFG_MISC_STATUS_INTA_VALUE)) | 1563 | BNX2_PCICFG_MISC_STATUS_INTA_VALUE)) |
1552 | return IRQ_RETVAL(0); | 1564 | return IRQ_NONE; |
1553 | 1565 | ||
1554 | REG_WR(bp, BNX2_PCICFG_INT_ACK_CMD, | 1566 | REG_WR(bp, BNX2_PCICFG_INT_ACK_CMD, |
1555 | BNX2_PCICFG_INT_ACK_CMD_USE_INT_HC_PARAM | | 1567 | BNX2_PCICFG_INT_ACK_CMD_USE_INT_HC_PARAM | |
1556 | BNX2_PCICFG_INT_ACK_CMD_MASK_INT); | 1568 | BNX2_PCICFG_INT_ACK_CMD_MASK_INT); |
1557 | 1569 | ||
1558 | /* Return here if interrupt is shared and is disabled. */ | 1570 | /* Return here if interrupt is shared and is disabled. */ |
1559 | if (unlikely(atomic_read(&bp->intr_sem) != 0)) { | 1571 | if (unlikely(atomic_read(&bp->intr_sem) != 0)) |
1560 | return IRQ_RETVAL(1); | 1572 | return IRQ_HANDLED; |
1561 | } | ||
1562 | 1573 | ||
1563 | if (netif_rx_schedule_prep(dev)) { | 1574 | netif_rx_schedule(dev); |
1564 | __netif_rx_schedule(dev); | ||
1565 | } | ||
1566 | 1575 | ||
1567 | return IRQ_RETVAL(1); | 1576 | return IRQ_HANDLED; |
1568 | } | 1577 | } |
1569 | 1578 | ||
1570 | static int | 1579 | static int |
@@ -1581,11 +1590,9 @@ bnx2_poll(struct net_device *dev, int *budget) | |||
1581 | (bp->status_blk->status_attn_bits_ack & | 1590 | (bp->status_blk->status_attn_bits_ack & |
1582 | STATUS_ATTN_BITS_LINK_STATE)) { | 1591 | STATUS_ATTN_BITS_LINK_STATE)) { |
1583 | 1592 | ||
1584 | unsigned long flags; | 1593 | spin_lock(&bp->phy_lock); |
1585 | |||
1586 | spin_lock_irqsave(&bp->phy_lock, flags); | ||
1587 | bnx2_phy_int(bp); | 1594 | bnx2_phy_int(bp); |
1588 | spin_unlock_irqrestore(&bp->phy_lock, flags); | 1595 | spin_unlock(&bp->phy_lock); |
1589 | } | 1596 | } |
1590 | 1597 | ||
1591 | if (bp->status_blk->status_tx_quick_consumer_index0 != bp->tx_cons) { | 1598 | if (bp->status_blk->status_tx_quick_consumer_index0 != bp->tx_cons) { |
@@ -1628,9 +1635,8 @@ bnx2_set_rx_mode(struct net_device *dev) | |||
1628 | struct bnx2 *bp = dev->priv; | 1635 | struct bnx2 *bp = dev->priv; |
1629 | u32 rx_mode, sort_mode; | 1636 | u32 rx_mode, sort_mode; |
1630 | int i; | 1637 | int i; |
1631 | unsigned long flags; | ||
1632 | 1638 | ||
1633 | spin_lock_irqsave(&bp->phy_lock, flags); | 1639 | spin_lock_bh(&bp->phy_lock); |
1634 | 1640 | ||
1635 | rx_mode = bp->rx_mode & ~(BNX2_EMAC_RX_MODE_PROMISCUOUS | | 1641 | rx_mode = bp->rx_mode & ~(BNX2_EMAC_RX_MODE_PROMISCUOUS | |
1636 | BNX2_EMAC_RX_MODE_KEEP_VLAN_TAG); | 1642 | BNX2_EMAC_RX_MODE_KEEP_VLAN_TAG); |
@@ -1691,7 +1697,7 @@ bnx2_set_rx_mode(struct net_device *dev) | |||
1691 | REG_WR(bp, BNX2_RPM_SORT_USER0, sort_mode); | 1697 | REG_WR(bp, BNX2_RPM_SORT_USER0, sort_mode); |
1692 | REG_WR(bp, BNX2_RPM_SORT_USER0, sort_mode | BNX2_RPM_SORT_USER0_ENA); | 1698 | REG_WR(bp, BNX2_RPM_SORT_USER0, sort_mode | BNX2_RPM_SORT_USER0_ENA); |
1693 | 1699 | ||
1694 | spin_unlock_irqrestore(&bp->phy_lock, flags); | 1700 | spin_unlock_bh(&bp->phy_lock); |
1695 | } | 1701 | } |
1696 | 1702 | ||
1697 | static void | 1703 | static void |
@@ -2960,7 +2966,6 @@ bnx2_init_tx_ring(struct bnx2 *bp) | |||
2960 | bp->tx_prod = 0; | 2966 | bp->tx_prod = 0; |
2961 | bp->tx_cons = 0; | 2967 | bp->tx_cons = 0; |
2962 | bp->tx_prod_bseq = 0; | 2968 | bp->tx_prod_bseq = 0; |
2963 | atomic_set(&bp->tx_avail_bd, bp->tx_ring_size); | ||
2964 | 2969 | ||
2965 | val = BNX2_L2CTX_TYPE_TYPE_L2; | 2970 | val = BNX2_L2CTX_TYPE_TYPE_L2; |
2966 | val |= BNX2_L2CTX_TYPE_SIZE_L2; | 2971 | val |= BNX2_L2CTX_TYPE_SIZE_L2; |
@@ -3507,11 +3512,11 @@ bnx2_test_registers(struct bnx2 *bp) | |||
3507 | rw_mask = reg_tbl[i].rw_mask; | 3512 | rw_mask = reg_tbl[i].rw_mask; |
3508 | ro_mask = reg_tbl[i].ro_mask; | 3513 | ro_mask = reg_tbl[i].ro_mask; |
3509 | 3514 | ||
3510 | save_val = readl((u8 *) bp->regview + offset); | 3515 | save_val = readl(bp->regview + offset); |
3511 | 3516 | ||
3512 | writel(0, (u8 *) bp->regview + offset); | 3517 | writel(0, bp->regview + offset); |
3513 | 3518 | ||
3514 | val = readl((u8 *) bp->regview + offset); | 3519 | val = readl(bp->regview + offset); |
3515 | if ((val & rw_mask) != 0) { | 3520 | if ((val & rw_mask) != 0) { |
3516 | goto reg_test_err; | 3521 | goto reg_test_err; |
3517 | } | 3522 | } |
@@ -3520,9 +3525,9 @@ bnx2_test_registers(struct bnx2 *bp) | |||
3520 | goto reg_test_err; | 3525 | goto reg_test_err; |
3521 | } | 3526 | } |
3522 | 3527 | ||
3523 | writel(0xffffffff, (u8 *) bp->regview + offset); | 3528 | writel(0xffffffff, bp->regview + offset); |
3524 | 3529 | ||
3525 | val = readl((u8 *) bp->regview + offset); | 3530 | val = readl(bp->regview + offset); |
3526 | if ((val & rw_mask) != rw_mask) { | 3531 | if ((val & rw_mask) != rw_mask) { |
3527 | goto reg_test_err; | 3532 | goto reg_test_err; |
3528 | } | 3533 | } |
@@ -3531,11 +3536,11 @@ bnx2_test_registers(struct bnx2 *bp) | |||
3531 | goto reg_test_err; | 3536 | goto reg_test_err; |
3532 | } | 3537 | } |
3533 | 3538 | ||
3534 | writel(save_val, (u8 *) bp->regview + offset); | 3539 | writel(save_val, bp->regview + offset); |
3535 | continue; | 3540 | continue; |
3536 | 3541 | ||
3537 | reg_test_err: | 3542 | reg_test_err: |
3538 | writel(save_val, (u8 *) bp->regview + offset); | 3543 | writel(save_val, bp->regview + offset); |
3539 | ret = -ENODEV; | 3544 | ret = -ENODEV; |
3540 | break; | 3545 | break; |
3541 | } | 3546 | } |
@@ -3752,10 +3757,10 @@ bnx2_test_link(struct bnx2 *bp) | |||
3752 | { | 3757 | { |
3753 | u32 bmsr; | 3758 | u32 bmsr; |
3754 | 3759 | ||
3755 | spin_lock_irq(&bp->phy_lock); | 3760 | spin_lock_bh(&bp->phy_lock); |
3756 | bnx2_read_phy(bp, MII_BMSR, &bmsr); | 3761 | bnx2_read_phy(bp, MII_BMSR, &bmsr); |
3757 | bnx2_read_phy(bp, MII_BMSR, &bmsr); | 3762 | bnx2_read_phy(bp, MII_BMSR, &bmsr); |
3758 | spin_unlock_irq(&bp->phy_lock); | 3763 | spin_unlock_bh(&bp->phy_lock); |
3759 | 3764 | ||
3760 | if (bmsr & BMSR_LSTATUS) { | 3765 | if (bmsr & BMSR_LSTATUS) { |
3761 | return 0; | 3766 | return 0; |
@@ -3801,6 +3806,9 @@ bnx2_timer(unsigned long data) | |||
3801 | struct bnx2 *bp = (struct bnx2 *) data; | 3806 | struct bnx2 *bp = (struct bnx2 *) data; |
3802 | u32 msg; | 3807 | u32 msg; |
3803 | 3808 | ||
3809 | if (!netif_running(bp->dev)) | ||
3810 | return; | ||
3811 | |||
3804 | if (atomic_read(&bp->intr_sem) != 0) | 3812 | if (atomic_read(&bp->intr_sem) != 0) |
3805 | goto bnx2_restart_timer; | 3813 | goto bnx2_restart_timer; |
3806 | 3814 | ||
@@ -3809,15 +3817,16 @@ bnx2_timer(unsigned long data) | |||
3809 | 3817 | ||
3810 | if ((bp->phy_flags & PHY_SERDES_FLAG) && | 3818 | if ((bp->phy_flags & PHY_SERDES_FLAG) && |
3811 | (CHIP_NUM(bp) == CHIP_NUM_5706)) { | 3819 | (CHIP_NUM(bp) == CHIP_NUM_5706)) { |
3812 | unsigned long flags; | ||
3813 | 3820 | ||
3814 | spin_lock_irqsave(&bp->phy_lock, flags); | 3821 | spin_lock(&bp->phy_lock); |
3815 | if (bp->serdes_an_pending) { | 3822 | if (bp->serdes_an_pending) { |
3816 | bp->serdes_an_pending--; | 3823 | bp->serdes_an_pending--; |
3817 | } | 3824 | } |
3818 | else if ((bp->link_up == 0) && (bp->autoneg & AUTONEG_SPEED)) { | 3825 | else if ((bp->link_up == 0) && (bp->autoneg & AUTONEG_SPEED)) { |
3819 | u32 bmcr; | 3826 | u32 bmcr; |
3820 | 3827 | ||
3828 | bp->current_interval = bp->timer_interval; | ||
3829 | |||
3821 | bnx2_read_phy(bp, MII_BMCR, &bmcr); | 3830 | bnx2_read_phy(bp, MII_BMCR, &bmcr); |
3822 | 3831 | ||
3823 | if (bmcr & BMCR_ANENABLE) { | 3832 | if (bmcr & BMCR_ANENABLE) { |
@@ -3860,14 +3869,14 @@ bnx2_timer(unsigned long data) | |||
3860 | 3869 | ||
3861 | } | 3870 | } |
3862 | } | 3871 | } |
3872 | else | ||
3873 | bp->current_interval = bp->timer_interval; | ||
3863 | 3874 | ||
3864 | spin_unlock_irqrestore(&bp->phy_lock, flags); | 3875 | spin_unlock(&bp->phy_lock); |
3865 | } | 3876 | } |
3866 | 3877 | ||
3867 | bnx2_restart_timer: | 3878 | bnx2_restart_timer: |
3868 | bp->timer.expires = RUN_AT(bp->timer_interval); | 3879 | mod_timer(&bp->timer, jiffies + bp->current_interval); |
3869 | |||
3870 | add_timer(&bp->timer); | ||
3871 | } | 3880 | } |
3872 | 3881 | ||
3873 | /* Called with rtnl_lock */ | 3882 | /* Called with rtnl_lock */ |
@@ -3920,12 +3929,7 @@ bnx2_open(struct net_device *dev) | |||
3920 | return rc; | 3929 | return rc; |
3921 | } | 3930 | } |
3922 | 3931 | ||
3923 | init_timer(&bp->timer); | 3932 | mod_timer(&bp->timer, jiffies + bp->current_interval); |
3924 | |||
3925 | bp->timer.expires = RUN_AT(bp->timer_interval); | ||
3926 | bp->timer.data = (unsigned long) bp; | ||
3927 | bp->timer.function = bnx2_timer; | ||
3928 | add_timer(&bp->timer); | ||
3929 | 3933 | ||
3930 | atomic_set(&bp->intr_sem, 0); | 3934 | atomic_set(&bp->intr_sem, 0); |
3931 | 3935 | ||
@@ -3976,12 +3980,17 @@ bnx2_reset_task(void *data) | |||
3976 | { | 3980 | { |
3977 | struct bnx2 *bp = data; | 3981 | struct bnx2 *bp = data; |
3978 | 3982 | ||
3983 | if (!netif_running(bp->dev)) | ||
3984 | return; | ||
3985 | |||
3986 | bp->in_reset_task = 1; | ||
3979 | bnx2_netif_stop(bp); | 3987 | bnx2_netif_stop(bp); |
3980 | 3988 | ||
3981 | bnx2_init_nic(bp); | 3989 | bnx2_init_nic(bp); |
3982 | 3990 | ||
3983 | atomic_set(&bp->intr_sem, 1); | 3991 | atomic_set(&bp->intr_sem, 1); |
3984 | bnx2_netif_start(bp); | 3992 | bnx2_netif_start(bp); |
3993 | bp->in_reset_task = 0; | ||
3985 | } | 3994 | } |
3986 | 3995 | ||
3987 | static void | 3996 | static void |
@@ -4041,9 +4050,7 @@ bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev) | |||
4041 | u16 prod, ring_prod; | 4050 | u16 prod, ring_prod; |
4042 | int i; | 4051 | int i; |
4043 | 4052 | ||
4044 | if (unlikely(atomic_read(&bp->tx_avail_bd) < | 4053 | if (unlikely(bnx2_tx_avail(bp) < (skb_shinfo(skb)->nr_frags + 1))) { |
4045 | (skb_shinfo(skb)->nr_frags + 1))) { | ||
4046 | |||
4047 | netif_stop_queue(dev); | 4054 | netif_stop_queue(dev); |
4048 | printk(KERN_ERR PFX "%s: BUG! Tx ring full when queue awake!\n", | 4055 | printk(KERN_ERR PFX "%s: BUG! Tx ring full when queue awake!\n", |
4049 | dev->name); | 4056 | dev->name); |
@@ -4140,8 +4147,6 @@ bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev) | |||
4140 | prod = NEXT_TX_BD(prod); | 4147 | prod = NEXT_TX_BD(prod); |
4141 | bp->tx_prod_bseq += skb->len; | 4148 | bp->tx_prod_bseq += skb->len; |
4142 | 4149 | ||
4143 | atomic_sub(last_frag + 1, &bp->tx_avail_bd); | ||
4144 | |||
4145 | REG_WR16(bp, MB_TX_CID_ADDR + BNX2_L2CTX_TX_HOST_BIDX, prod); | 4150 | REG_WR16(bp, MB_TX_CID_ADDR + BNX2_L2CTX_TX_HOST_BIDX, prod); |
4146 | REG_WR(bp, MB_TX_CID_ADDR + BNX2_L2CTX_TX_HOST_BSEQ, bp->tx_prod_bseq); | 4151 | REG_WR(bp, MB_TX_CID_ADDR + BNX2_L2CTX_TX_HOST_BSEQ, bp->tx_prod_bseq); |
4147 | 4152 | ||
@@ -4150,17 +4155,13 @@ bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev) | |||
4150 | bp->tx_prod = prod; | 4155 | bp->tx_prod = prod; |
4151 | dev->trans_start = jiffies; | 4156 | dev->trans_start = jiffies; |
4152 | 4157 | ||
4153 | if (unlikely(atomic_read(&bp->tx_avail_bd) <= MAX_SKB_FRAGS)) { | 4158 | if (unlikely(bnx2_tx_avail(bp) <= MAX_SKB_FRAGS)) { |
4154 | unsigned long flags; | 4159 | spin_lock(&bp->tx_lock); |
4155 | 4160 | netif_stop_queue(dev); | |
4156 | spin_lock_irqsave(&bp->tx_lock, flags); | 4161 | |
4157 | if (atomic_read(&bp->tx_avail_bd) <= MAX_SKB_FRAGS) { | 4162 | if (bnx2_tx_avail(bp) > MAX_SKB_FRAGS) |
4158 | netif_stop_queue(dev); | 4163 | netif_wake_queue(dev); |
4159 | 4164 | spin_unlock(&bp->tx_lock); | |
4160 | if (atomic_read(&bp->tx_avail_bd) > MAX_SKB_FRAGS) | ||
4161 | netif_wake_queue(dev); | ||
4162 | } | ||
4163 | spin_unlock_irqrestore(&bp->tx_lock, flags); | ||
4164 | } | 4165 | } |
4165 | 4166 | ||
4166 | return NETDEV_TX_OK; | 4167 | return NETDEV_TX_OK; |
@@ -4173,7 +4174,13 @@ bnx2_close(struct net_device *dev) | |||
4173 | struct bnx2 *bp = dev->priv; | 4174 | struct bnx2 *bp = dev->priv; |
4174 | u32 reset_code; | 4175 | u32 reset_code; |
4175 | 4176 | ||
4176 | flush_scheduled_work(); | 4177 | /* Calling flush_scheduled_work() may deadlock because |
4178 | * linkwatch_event() may be on the workqueue and it will try to get | ||
4179 | * the rtnl_lock which we are holding. | ||
4180 | */ | ||
4181 | while (bp->in_reset_task) | ||
4182 | msleep(1); | ||
4183 | |||
4177 | bnx2_netif_stop(bp); | 4184 | bnx2_netif_stop(bp); |
4178 | del_timer_sync(&bp->timer); | 4185 | del_timer_sync(&bp->timer); |
4179 | if (bp->wol) | 4186 | if (bp->wol) |
@@ -4390,11 +4397,11 @@ bnx2_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) | |||
4390 | bp->req_line_speed = req_line_speed; | 4397 | bp->req_line_speed = req_line_speed; |
4391 | bp->req_duplex = req_duplex; | 4398 | bp->req_duplex = req_duplex; |
4392 | 4399 | ||
4393 | spin_lock_irq(&bp->phy_lock); | 4400 | spin_lock_bh(&bp->phy_lock); |
4394 | 4401 | ||
4395 | bnx2_setup_phy(bp); | 4402 | bnx2_setup_phy(bp); |
4396 | 4403 | ||
4397 | spin_unlock_irq(&bp->phy_lock); | 4404 | spin_unlock_bh(&bp->phy_lock); |
4398 | 4405 | ||
4399 | return 0; | 4406 | return 0; |
4400 | } | 4407 | } |
@@ -4464,19 +4471,20 @@ bnx2_nway_reset(struct net_device *dev) | |||
4464 | return -EINVAL; | 4471 | return -EINVAL; |
4465 | } | 4472 | } |
4466 | 4473 | ||
4467 | spin_lock_irq(&bp->phy_lock); | 4474 | spin_lock_bh(&bp->phy_lock); |
4468 | 4475 | ||
4469 | /* Force a link down visible on the other side */ | 4476 | /* Force a link down visible on the other side */ |
4470 | if (bp->phy_flags & PHY_SERDES_FLAG) { | 4477 | if (bp->phy_flags & PHY_SERDES_FLAG) { |
4471 | bnx2_write_phy(bp, MII_BMCR, BMCR_LOOPBACK); | 4478 | bnx2_write_phy(bp, MII_BMCR, BMCR_LOOPBACK); |
4472 | spin_unlock_irq(&bp->phy_lock); | 4479 | spin_unlock_bh(&bp->phy_lock); |
4473 | 4480 | ||
4474 | msleep(20); | 4481 | msleep(20); |
4475 | 4482 | ||
4476 | spin_lock_irq(&bp->phy_lock); | 4483 | spin_lock_bh(&bp->phy_lock); |
4477 | if (CHIP_NUM(bp) == CHIP_NUM_5706) { | 4484 | if (CHIP_NUM(bp) == CHIP_NUM_5706) { |
4478 | bp->serdes_an_pending = SERDES_AN_TIMEOUT / | 4485 | bp->current_interval = SERDES_AN_TIMEOUT; |
4479 | bp->timer_interval; | 4486 | bp->serdes_an_pending = 1; |
4487 | mod_timer(&bp->timer, jiffies + bp->current_interval); | ||
4480 | } | 4488 | } |
4481 | } | 4489 | } |
4482 | 4490 | ||
@@ -4484,7 +4492,7 @@ bnx2_nway_reset(struct net_device *dev) | |||
4484 | bmcr &= ~BMCR_LOOPBACK; | 4492 | bmcr &= ~BMCR_LOOPBACK; |
4485 | bnx2_write_phy(bp, MII_BMCR, bmcr | BMCR_ANRESTART | BMCR_ANENABLE); | 4493 | bnx2_write_phy(bp, MII_BMCR, bmcr | BMCR_ANRESTART | BMCR_ANENABLE); |
4486 | 4494 | ||
4487 | spin_unlock_irq(&bp->phy_lock); | 4495 | spin_unlock_bh(&bp->phy_lock); |
4488 | 4496 | ||
4489 | return 0; | 4497 | return 0; |
4490 | } | 4498 | } |
@@ -4670,11 +4678,11 @@ bnx2_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam *epause) | |||
4670 | bp->autoneg &= ~AUTONEG_FLOW_CTRL; | 4678 | bp->autoneg &= ~AUTONEG_FLOW_CTRL; |
4671 | } | 4679 | } |
4672 | 4680 | ||
4673 | spin_lock_irq(&bp->phy_lock); | 4681 | spin_lock_bh(&bp->phy_lock); |
4674 | 4682 | ||
4675 | bnx2_setup_phy(bp); | 4683 | bnx2_setup_phy(bp); |
4676 | 4684 | ||
4677 | spin_unlock_irq(&bp->phy_lock); | 4685 | spin_unlock_bh(&bp->phy_lock); |
4678 | 4686 | ||
4679 | return 0; | 4687 | return 0; |
4680 | } | 4688 | } |
@@ -4698,7 +4706,7 @@ bnx2_set_rx_csum(struct net_device *dev, u32 data) | |||
4698 | 4706 | ||
4699 | #define BNX2_NUM_STATS 45 | 4707 | #define BNX2_NUM_STATS 45 |
4700 | 4708 | ||
4701 | struct { | 4709 | static struct { |
4702 | char string[ETH_GSTRING_LEN]; | 4710 | char string[ETH_GSTRING_LEN]; |
4703 | } bnx2_stats_str_arr[BNX2_NUM_STATS] = { | 4711 | } bnx2_stats_str_arr[BNX2_NUM_STATS] = { |
4704 | { "rx_bytes" }, | 4712 | { "rx_bytes" }, |
@@ -4750,7 +4758,7 @@ struct { | |||
4750 | 4758 | ||
4751 | #define STATS_OFFSET32(offset_name) (offsetof(struct statistics_block, offset_name) / 4) | 4759 | #define STATS_OFFSET32(offset_name) (offsetof(struct statistics_block, offset_name) / 4) |
4752 | 4760 | ||
4753 | unsigned long bnx2_stats_offset_arr[BNX2_NUM_STATS] = { | 4761 | static unsigned long bnx2_stats_offset_arr[BNX2_NUM_STATS] = { |
4754 | STATS_OFFSET32(stat_IfHCInOctets_hi), | 4762 | STATS_OFFSET32(stat_IfHCInOctets_hi), |
4755 | STATS_OFFSET32(stat_IfHCInBadOctets_hi), | 4763 | STATS_OFFSET32(stat_IfHCInBadOctets_hi), |
4756 | STATS_OFFSET32(stat_IfHCOutOctets_hi), | 4764 | STATS_OFFSET32(stat_IfHCOutOctets_hi), |
@@ -4801,7 +4809,7 @@ unsigned long bnx2_stats_offset_arr[BNX2_NUM_STATS] = { | |||
4801 | /* stat_IfHCInBadOctets and stat_Dot3StatsCarrierSenseErrors are | 4809 | /* stat_IfHCInBadOctets and stat_Dot3StatsCarrierSenseErrors are |
4802 | * skipped because of errata. | 4810 | * skipped because of errata. |
4803 | */ | 4811 | */ |
4804 | u8 bnx2_5706_stats_len_arr[BNX2_NUM_STATS] = { | 4812 | static u8 bnx2_5706_stats_len_arr[BNX2_NUM_STATS] = { |
4805 | 8,0,8,8,8,8,8,8,8,8, | 4813 | 8,0,8,8,8,8,8,8,8,8, |
4806 | 4,0,4,4,4,4,4,4,4,4, | 4814 | 4,0,4,4,4,4,4,4,4,4, |
4807 | 4,4,4,4,4,4,4,4,4,4, | 4815 | 4,4,4,4,4,4,4,4,4,4, |
@@ -4811,7 +4819,7 @@ u8 bnx2_5706_stats_len_arr[BNX2_NUM_STATS] = { | |||
4811 | 4819 | ||
4812 | #define BNX2_NUM_TESTS 6 | 4820 | #define BNX2_NUM_TESTS 6 |
4813 | 4821 | ||
4814 | struct { | 4822 | static struct { |
4815 | char string[ETH_GSTRING_LEN]; | 4823 | char string[ETH_GSTRING_LEN]; |
4816 | } bnx2_tests_str_arr[BNX2_NUM_TESTS] = { | 4824 | } bnx2_tests_str_arr[BNX2_NUM_TESTS] = { |
4817 | { "register_test (offline)" }, | 4825 | { "register_test (offline)" }, |
@@ -4910,7 +4918,7 @@ bnx2_get_ethtool_stats(struct net_device *dev, | |||
4910 | struct bnx2 *bp = dev->priv; | 4918 | struct bnx2 *bp = dev->priv; |
4911 | int i; | 4919 | int i; |
4912 | u32 *hw_stats = (u32 *) bp->stats_blk; | 4920 | u32 *hw_stats = (u32 *) bp->stats_blk; |
4913 | u8 *stats_len_arr = 0; | 4921 | u8 *stats_len_arr = NULL; |
4914 | 4922 | ||
4915 | if (hw_stats == NULL) { | 4923 | if (hw_stats == NULL) { |
4916 | memset(buf, 0, sizeof(u64) * BNX2_NUM_STATS); | 4924 | memset(buf, 0, sizeof(u64) * BNX2_NUM_STATS); |
@@ -5012,7 +5020,7 @@ static struct ethtool_ops bnx2_ethtool_ops = { | |||
5012 | static int | 5020 | static int |
5013 | bnx2_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) | 5021 | bnx2_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) |
5014 | { | 5022 | { |
5015 | struct mii_ioctl_data *data = (struct mii_ioctl_data *)&ifr->ifr_data; | 5023 | struct mii_ioctl_data *data = if_mii(ifr); |
5016 | struct bnx2 *bp = dev->priv; | 5024 | struct bnx2 *bp = dev->priv; |
5017 | int err; | 5025 | int err; |
5018 | 5026 | ||
@@ -5024,9 +5032,9 @@ bnx2_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) | |||
5024 | case SIOCGMIIREG: { | 5032 | case SIOCGMIIREG: { |
5025 | u32 mii_regval; | 5033 | u32 mii_regval; |
5026 | 5034 | ||
5027 | spin_lock_irq(&bp->phy_lock); | 5035 | spin_lock_bh(&bp->phy_lock); |
5028 | err = bnx2_read_phy(bp, data->reg_num & 0x1f, &mii_regval); | 5036 | err = bnx2_read_phy(bp, data->reg_num & 0x1f, &mii_regval); |
5029 | spin_unlock_irq(&bp->phy_lock); | 5037 | spin_unlock_bh(&bp->phy_lock); |
5030 | 5038 | ||
5031 | data->val_out = mii_regval; | 5039 | data->val_out = mii_regval; |
5032 | 5040 | ||
@@ -5037,9 +5045,9 @@ bnx2_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) | |||
5037 | if (!capable(CAP_NET_ADMIN)) | 5045 | if (!capable(CAP_NET_ADMIN)) |
5038 | return -EPERM; | 5046 | return -EPERM; |
5039 | 5047 | ||
5040 | spin_lock_irq(&bp->phy_lock); | 5048 | spin_lock_bh(&bp->phy_lock); |
5041 | err = bnx2_write_phy(bp, data->reg_num & 0x1f, data->val_in); | 5049 | err = bnx2_write_phy(bp, data->reg_num & 0x1f, data->val_in); |
5042 | spin_unlock_irq(&bp->phy_lock); | 5050 | spin_unlock_bh(&bp->phy_lock); |
5043 | 5051 | ||
5044 | return err; | 5052 | return err; |
5045 | 5053 | ||
@@ -5057,6 +5065,9 @@ bnx2_change_mac_addr(struct net_device *dev, void *p) | |||
5057 | struct sockaddr *addr = p; | 5065 | struct sockaddr *addr = p; |
5058 | struct bnx2 *bp = dev->priv; | 5066 | struct bnx2 *bp = dev->priv; |
5059 | 5067 | ||
5068 | if (!is_valid_ether_addr(addr->sa_data)) | ||
5069 | return -EINVAL; | ||
5070 | |||
5060 | memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); | 5071 | memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); |
5061 | if (netif_running(dev)) | 5072 | if (netif_running(dev)) |
5062 | bnx2_set_mac_addr(bp); | 5073 | bnx2_set_mac_addr(bp); |
@@ -5305,6 +5316,7 @@ bnx2_init_board(struct pci_dev *pdev, struct net_device *dev) | |||
5305 | bp->stats_ticks = 1000000 & 0xffff00; | 5316 | bp->stats_ticks = 1000000 & 0xffff00; |
5306 | 5317 | ||
5307 | bp->timer_interval = HZ; | 5318 | bp->timer_interval = HZ; |
5319 | bp->current_interval = HZ; | ||
5308 | 5320 | ||
5309 | /* Disable WOL support if we are running on a SERDES chip. */ | 5321 | /* Disable WOL support if we are running on a SERDES chip. */ |
5310 | if (CHIP_BOND_ID(bp) & CHIP_BOND_ID_SERDES_BIT) { | 5322 | if (CHIP_BOND_ID(bp) & CHIP_BOND_ID_SERDES_BIT) { |
@@ -5328,6 +5340,15 @@ bnx2_init_board(struct pci_dev *pdev, struct net_device *dev) | |||
5328 | bp->req_line_speed = 0; | 5340 | bp->req_line_speed = 0; |
5329 | if (bp->phy_flags & PHY_SERDES_FLAG) { | 5341 | if (bp->phy_flags & PHY_SERDES_FLAG) { |
5330 | bp->advertising = ETHTOOL_ALL_FIBRE_SPEED | ADVERTISED_Autoneg; | 5342 | bp->advertising = ETHTOOL_ALL_FIBRE_SPEED | ADVERTISED_Autoneg; |
5343 | |||
5344 | reg = REG_RD_IND(bp, HOST_VIEW_SHMEM_BASE + | ||
5345 | BNX2_PORT_HW_CFG_CONFIG); | ||
5346 | reg &= BNX2_PORT_HW_CFG_CFG_DFLT_LINK_MASK; | ||
5347 | if (reg == BNX2_PORT_HW_CFG_CFG_DFLT_LINK_1G) { | ||
5348 | bp->autoneg = 0; | ||
5349 | bp->req_line_speed = bp->line_speed = SPEED_1000; | ||
5350 | bp->req_duplex = DUPLEX_FULL; | ||
5351 | } | ||
5331 | } | 5352 | } |
5332 | else { | 5353 | else { |
5333 | bp->advertising = ETHTOOL_ALL_COPPER_SPEED | ADVERTISED_Autoneg; | 5354 | bp->advertising = ETHTOOL_ALL_COPPER_SPEED | ADVERTISED_Autoneg; |
@@ -5335,11 +5356,17 @@ bnx2_init_board(struct pci_dev *pdev, struct net_device *dev) | |||
5335 | 5356 | ||
5336 | bp->req_flow_ctrl = FLOW_CTRL_RX | FLOW_CTRL_TX; | 5357 | bp->req_flow_ctrl = FLOW_CTRL_RX | FLOW_CTRL_TX; |
5337 | 5358 | ||
5359 | init_timer(&bp->timer); | ||
5360 | bp->timer.expires = RUN_AT(bp->timer_interval); | ||
5361 | bp->timer.data = (unsigned long) bp; | ||
5362 | bp->timer.function = bnx2_timer; | ||
5363 | |||
5338 | return 0; | 5364 | return 0; |
5339 | 5365 | ||
5340 | err_out_unmap: | 5366 | err_out_unmap: |
5341 | if (bp->regview) { | 5367 | if (bp->regview) { |
5342 | iounmap(bp->regview); | 5368 | iounmap(bp->regview); |
5369 | bp->regview = NULL; | ||
5343 | } | 5370 | } |
5344 | 5371 | ||
5345 | err_out_release: | 5372 | err_out_release: |
@@ -5454,6 +5481,8 @@ bnx2_remove_one(struct pci_dev *pdev) | |||
5454 | struct net_device *dev = pci_get_drvdata(pdev); | 5481 | struct net_device *dev = pci_get_drvdata(pdev); |
5455 | struct bnx2 *bp = dev->priv; | 5482 | struct bnx2 *bp = dev->priv; |
5456 | 5483 | ||
5484 | flush_scheduled_work(); | ||
5485 | |||
5457 | unregister_netdev(dev); | 5486 | unregister_netdev(dev); |
5458 | 5487 | ||
5459 | if (bp->regview) | 5488 | if (bp->regview) |
@@ -5505,12 +5534,12 @@ bnx2_resume(struct pci_dev *pdev) | |||
5505 | } | 5534 | } |
5506 | 5535 | ||
5507 | static struct pci_driver bnx2_pci_driver = { | 5536 | static struct pci_driver bnx2_pci_driver = { |
5508 | name: DRV_MODULE_NAME, | 5537 | .name = DRV_MODULE_NAME, |
5509 | id_table: bnx2_pci_tbl, | 5538 | .id_table = bnx2_pci_tbl, |
5510 | probe: bnx2_init_one, | 5539 | .probe = bnx2_init_one, |
5511 | remove: __devexit_p(bnx2_remove_one), | 5540 | .remove = __devexit_p(bnx2_remove_one), |
5512 | suspend: bnx2_suspend, | 5541 | .suspend = bnx2_suspend, |
5513 | resume: bnx2_resume, | 5542 | .resume = bnx2_resume, |
5514 | }; | 5543 | }; |
5515 | 5544 | ||
5516 | static int __init bnx2_init(void) | 5545 | static int __init bnx2_init(void) |
diff --git a/drivers/net/bnx2.h b/drivers/net/bnx2.h index 8214a2853d0d..9ad3f5740cd8 100644 --- a/drivers/net/bnx2.h +++ b/drivers/net/bnx2.h | |||
@@ -3841,12 +3841,12 @@ struct bnx2 { | |||
3841 | struct status_block *status_blk; | 3841 | struct status_block *status_blk; |
3842 | u32 last_status_idx; | 3842 | u32 last_status_idx; |
3843 | 3843 | ||
3844 | atomic_t tx_avail_bd; | ||
3845 | struct tx_bd *tx_desc_ring; | 3844 | struct tx_bd *tx_desc_ring; |
3846 | struct sw_bd *tx_buf_ring; | 3845 | struct sw_bd *tx_buf_ring; |
3847 | u32 tx_prod_bseq; | 3846 | u32 tx_prod_bseq; |
3848 | u16 tx_prod; | 3847 | u16 tx_prod; |
3849 | u16 tx_cons; | 3848 | u16 tx_cons; |
3849 | int tx_ring_size; | ||
3850 | 3850 | ||
3851 | #ifdef BCM_VLAN | 3851 | #ifdef BCM_VLAN |
3852 | struct vlan_group *vlgrp; | 3852 | struct vlan_group *vlgrp; |
@@ -3872,8 +3872,10 @@ struct bnx2 { | |||
3872 | char *name; | 3872 | char *name; |
3873 | 3873 | ||
3874 | int timer_interval; | 3874 | int timer_interval; |
3875 | int current_interval; | ||
3875 | struct timer_list timer; | 3876 | struct timer_list timer; |
3876 | struct work_struct reset_task; | 3877 | struct work_struct reset_task; |
3878 | int in_reset_task; | ||
3877 | 3879 | ||
3878 | /* Used to synchronize phy accesses. */ | 3880 | /* Used to synchronize phy accesses. */ |
3879 | spinlock_t phy_lock; | 3881 | spinlock_t phy_lock; |
@@ -3927,7 +3929,6 @@ struct bnx2 { | |||
3927 | u16 fw_wr_seq; | 3929 | u16 fw_wr_seq; |
3928 | u16 fw_drv_pulse_wr_seq; | 3930 | u16 fw_drv_pulse_wr_seq; |
3929 | 3931 | ||
3930 | int tx_ring_size; | ||
3931 | dma_addr_t tx_desc_mapping; | 3932 | dma_addr_t tx_desc_mapping; |
3932 | 3933 | ||
3933 | 3934 | ||
@@ -3985,7 +3986,7 @@ struct bnx2 { | |||
3985 | #define PHY_LOOPBACK 2 | 3986 | #define PHY_LOOPBACK 2 |
3986 | 3987 | ||
3987 | u8 serdes_an_pending; | 3988 | u8 serdes_an_pending; |
3988 | #define SERDES_AN_TIMEOUT (2 * HZ) | 3989 | #define SERDES_AN_TIMEOUT (HZ / 3) |
3989 | 3990 | ||
3990 | u8 mac_addr[8]; | 3991 | u8 mac_addr[8]; |
3991 | 3992 | ||
@@ -4171,6 +4172,9 @@ struct fw_info { | |||
4171 | 4172 | ||
4172 | #define BNX2_PORT_HW_CFG_MAC_LOWER 0x00000054 | 4173 | #define BNX2_PORT_HW_CFG_MAC_LOWER 0x00000054 |
4173 | #define BNX2_PORT_HW_CFG_CONFIG 0x00000058 | 4174 | #define BNX2_PORT_HW_CFG_CONFIG 0x00000058 |
4175 | #define BNX2_PORT_HW_CFG_CFG_DFLT_LINK_MASK 0x001f0000 | ||
4176 | #define BNX2_PORT_HW_CFG_CFG_DFLT_LINK_AN 0x00000000 | ||
4177 | #define BNX2_PORT_HW_CFG_CFG_DFLT_LINK_1G 0x00030000 | ||
4174 | 4178 | ||
4175 | #define BNX2_PORT_HW_CFG_IMD_MAC_A_UPPER 0x00000068 | 4179 | #define BNX2_PORT_HW_CFG_IMD_MAC_A_UPPER 0x00000068 |
4176 | #define BNX2_PORT_HW_CFG_IMD_MAC_A_LOWER 0x0000006c | 4180 | #define BNX2_PORT_HW_CFG_IMD_MAC_A_LOWER 0x0000006c |
diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index a2e8dda5afac..d2f34d5a8083 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c | |||
@@ -2419,22 +2419,19 @@ out: | |||
2419 | return 0; | 2419 | return 0; |
2420 | } | 2420 | } |
2421 | 2421 | ||
2422 | int bond_3ad_lacpdu_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type* ptype) | 2422 | int bond_3ad_lacpdu_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type* ptype, struct net_device *orig_dev) |
2423 | { | 2423 | { |
2424 | struct bonding *bond = dev->priv; | 2424 | struct bonding *bond = dev->priv; |
2425 | struct slave *slave = NULL; | 2425 | struct slave *slave = NULL; |
2426 | int ret = NET_RX_DROP; | 2426 | int ret = NET_RX_DROP; |
2427 | 2427 | ||
2428 | if (!(dev->flags & IFF_MASTER)) { | 2428 | if (!(dev->flags & IFF_MASTER)) |
2429 | goto out; | 2429 | goto out; |
2430 | } | ||
2431 | 2430 | ||
2432 | read_lock(&bond->lock); | 2431 | read_lock(&bond->lock); |
2433 | slave = bond_get_slave_by_dev((struct bonding *)dev->priv, | 2432 | slave = bond_get_slave_by_dev((struct bonding *)dev->priv, orig_dev); |
2434 | skb->real_dev); | 2433 | if (!slave) |
2435 | if (slave == NULL) { | ||
2436 | goto out_unlock; | 2434 | goto out_unlock; |
2437 | } | ||
2438 | 2435 | ||
2439 | bond_3ad_rx_indication((struct lacpdu *) skb->data, slave, skb->len); | 2436 | bond_3ad_rx_indication((struct lacpdu *) skb->data, slave, skb->len); |
2440 | 2437 | ||
diff --git a/drivers/net/bonding/bond_3ad.h b/drivers/net/bonding/bond_3ad.h index f46823894187..673a30af5660 100644 --- a/drivers/net/bonding/bond_3ad.h +++ b/drivers/net/bonding/bond_3ad.h | |||
@@ -295,6 +295,6 @@ void bond_3ad_adapter_duplex_changed(struct slave *slave); | |||
295 | void bond_3ad_handle_link_change(struct slave *slave, char link); | 295 | void bond_3ad_handle_link_change(struct slave *slave, char link); |
296 | int bond_3ad_get_active_agg_info(struct bonding *bond, struct ad_info *ad_info); | 296 | int bond_3ad_get_active_agg_info(struct bonding *bond, struct ad_info *ad_info); |
297 | int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev); | 297 | int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev); |
298 | int bond_3ad_lacpdu_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type* ptype); | 298 | int bond_3ad_lacpdu_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type* ptype, struct net_device *orig_dev); |
299 | #endif //__BOND_3AD_H__ | 299 | #endif //__BOND_3AD_H__ |
300 | 300 | ||
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index 19e829b567d0..f8fce3961197 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c | |||
@@ -354,15 +354,14 @@ static void rlb_update_entry_from_arp(struct bonding *bond, struct arp_pkt *arp) | |||
354 | _unlock_rx_hashtbl(bond); | 354 | _unlock_rx_hashtbl(bond); |
355 | } | 355 | } |
356 | 356 | ||
357 | static int rlb_arp_recv(struct sk_buff *skb, struct net_device *bond_dev, struct packet_type *ptype) | 357 | static int rlb_arp_recv(struct sk_buff *skb, struct net_device *bond_dev, struct packet_type *ptype, struct net_device *orig_dev) |
358 | { | 358 | { |
359 | struct bonding *bond = bond_dev->priv; | 359 | struct bonding *bond = bond_dev->priv; |
360 | struct arp_pkt *arp = (struct arp_pkt *)skb->data; | 360 | struct arp_pkt *arp = (struct arp_pkt *)skb->data; |
361 | int res = NET_RX_DROP; | 361 | int res = NET_RX_DROP; |
362 | 362 | ||
363 | if (!(bond_dev->flags & IFF_MASTER)) { | 363 | if (!(bond_dev->flags & IFF_MASTER)) |
364 | goto out; | 364 | goto out; |
365 | } | ||
366 | 365 | ||
367 | if (!arp) { | 366 | if (!arp) { |
368 | dprintk("Packet has no ARP data\n"); | 367 | dprintk("Packet has no ARP data\n"); |
diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c index ba9f0580e1f9..2946e037a9b1 100644 --- a/drivers/net/hamradio/bpqether.c +++ b/drivers/net/hamradio/bpqether.c | |||
@@ -98,7 +98,7 @@ static char bcast_addr[6]={0xFF,0xFF,0xFF,0xFF,0xFF,0xFF}; | |||
98 | 98 | ||
99 | static char bpq_eth_addr[6]; | 99 | static char bpq_eth_addr[6]; |
100 | 100 | ||
101 | static int bpq_rcv(struct sk_buff *, struct net_device *, struct packet_type *); | 101 | static int bpq_rcv(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); |
102 | static int bpq_device_event(struct notifier_block *, unsigned long, void *); | 102 | static int bpq_device_event(struct notifier_block *, unsigned long, void *); |
103 | static const char *bpq_print_ethaddr(const unsigned char *); | 103 | static const char *bpq_print_ethaddr(const unsigned char *); |
104 | 104 | ||
@@ -165,7 +165,7 @@ static inline int dev_is_ethdev(struct net_device *dev) | |||
165 | /* | 165 | /* |
166 | * Receive an AX.25 frame via an ethernet interface. | 166 | * Receive an AX.25 frame via an ethernet interface. |
167 | */ | 167 | */ |
168 | static int bpq_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype) | 168 | static int bpq_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype, struct net_device *orig_dev) |
169 | { | 169 | { |
170 | int len; | 170 | int len; |
171 | char * ptr; | 171 | char * ptr; |
diff --git a/drivers/net/ibmveth.c b/drivers/net/ibmveth.c index c39b0609742a..32d5fabd4b10 100644 --- a/drivers/net/ibmveth.c +++ b/drivers/net/ibmveth.c | |||
@@ -1144,7 +1144,7 @@ static void ibmveth_proc_unregister_driver(void) | |||
1144 | 1144 | ||
1145 | static struct vio_device_id ibmveth_device_table[] __devinitdata= { | 1145 | static struct vio_device_id ibmveth_device_table[] __devinitdata= { |
1146 | { "network", "IBM,l-lan"}, | 1146 | { "network", "IBM,l-lan"}, |
1147 | { 0,} | 1147 | { "", "" } |
1148 | }; | 1148 | }; |
1149 | 1149 | ||
1150 | MODULE_DEVICE_TABLE(vio, ibmveth_device_table); | 1150 | MODULE_DEVICE_TABLE(vio, ibmveth_device_table); |
diff --git a/drivers/net/iseries_veth.c b/drivers/net/iseries_veth.c index 55af32e9bf08..183ba97785b0 100644 --- a/drivers/net/iseries_veth.c +++ b/drivers/net/iseries_veth.c | |||
@@ -1370,7 +1370,7 @@ static int veth_probe(struct vio_dev *vdev, const struct vio_device_id *id) | |||
1370 | */ | 1370 | */ |
1371 | static struct vio_device_id veth_device_table[] __devinitdata = { | 1371 | static struct vio_device_id veth_device_table[] __devinitdata = { |
1372 | { "vlan", "" }, | 1372 | { "vlan", "" }, |
1373 | { NULL, NULL } | 1373 | { "", "" } |
1374 | }; | 1374 | }; |
1375 | MODULE_DEVICE_TABLE(vio, veth_device_table); | 1375 | MODULE_DEVICE_TABLE(vio, veth_device_table); |
1376 | 1376 | ||
diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c index a32668e88e09..bb71638a7c44 100644 --- a/drivers/net/ppp_generic.c +++ b/drivers/net/ppp_generic.c | |||
@@ -1657,7 +1657,6 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb) | |||
1657 | skb->dev = ppp->dev; | 1657 | skb->dev = ppp->dev; |
1658 | skb->protocol = htons(npindex_to_ethertype[npi]); | 1658 | skb->protocol = htons(npindex_to_ethertype[npi]); |
1659 | skb->mac.raw = skb->data; | 1659 | skb->mac.raw = skb->data; |
1660 | skb->input_dev = ppp->dev; | ||
1661 | netif_rx(skb); | 1660 | netif_rx(skb); |
1662 | ppp->dev->last_rx = jiffies; | 1661 | ppp->dev->last_rx = jiffies; |
1663 | } | 1662 | } |
diff --git a/drivers/net/pppoe.c b/drivers/net/pppoe.c index ce1a9bf7b9a7..82f236cc3b9b 100644 --- a/drivers/net/pppoe.c +++ b/drivers/net/pppoe.c | |||
@@ -377,7 +377,8 @@ abort_kfree: | |||
377 | ***********************************************************************/ | 377 | ***********************************************************************/ |
378 | static int pppoe_rcv(struct sk_buff *skb, | 378 | static int pppoe_rcv(struct sk_buff *skb, |
379 | struct net_device *dev, | 379 | struct net_device *dev, |
380 | struct packet_type *pt) | 380 | struct packet_type *pt, |
381 | struct net_device *orig_dev) | ||
381 | 382 | ||
382 | { | 383 | { |
383 | struct pppoe_hdr *ph; | 384 | struct pppoe_hdr *ph; |
@@ -426,7 +427,8 @@ out: | |||
426 | ***********************************************************************/ | 427 | ***********************************************************************/ |
427 | static int pppoe_disc_rcv(struct sk_buff *skb, | 428 | static int pppoe_disc_rcv(struct sk_buff *skb, |
428 | struct net_device *dev, | 429 | struct net_device *dev, |
429 | struct packet_type *pt) | 430 | struct packet_type *pt, |
431 | struct net_device *orig_dev) | ||
430 | 432 | ||
431 | { | 433 | { |
432 | struct pppoe_hdr *ph; | 434 | struct pppoe_hdr *ph; |
diff --git a/drivers/net/rrunner.c b/drivers/net/rrunner.c index 12a86f96d973..ec1a18d189a1 100644 --- a/drivers/net/rrunner.c +++ b/drivers/net/rrunner.c | |||
@@ -1429,6 +1429,7 @@ static int rr_start_xmit(struct sk_buff *skb, struct net_device *dev) | |||
1429 | { | 1429 | { |
1430 | struct rr_private *rrpriv = netdev_priv(dev); | 1430 | struct rr_private *rrpriv = netdev_priv(dev); |
1431 | struct rr_regs __iomem *regs = rrpriv->regs; | 1431 | struct rr_regs __iomem *regs = rrpriv->regs; |
1432 | struct hippi_cb *hcb = (struct hippi_cb *) skb->cb; | ||
1432 | struct ring_ctrl *txctrl; | 1433 | struct ring_ctrl *txctrl; |
1433 | unsigned long flags; | 1434 | unsigned long flags; |
1434 | u32 index, len = skb->len; | 1435 | u32 index, len = skb->len; |
@@ -1460,7 +1461,7 @@ static int rr_start_xmit(struct sk_buff *skb, struct net_device *dev) | |||
1460 | ifield = (u32 *)skb_push(skb, 8); | 1461 | ifield = (u32 *)skb_push(skb, 8); |
1461 | 1462 | ||
1462 | ifield[0] = 0; | 1463 | ifield[0] = 0; |
1463 | ifield[1] = skb->private.ifield; | 1464 | ifield[1] = hcb->ifield; |
1464 | 1465 | ||
1465 | /* | 1466 | /* |
1466 | * We don't need the lock before we are actually going to start | 1467 | * We don't need the lock before we are actually going to start |
diff --git a/drivers/net/s2io.h b/drivers/net/s2io.h index 5d9270730ca2..bc64d967f080 100644 --- a/drivers/net/s2io.h +++ b/drivers/net/s2io.h | |||
@@ -762,8 +762,8 @@ static inline u64 readq(void __iomem *addr) | |||
762 | { | 762 | { |
763 | u64 ret = 0; | 763 | u64 ret = 0; |
764 | ret = readl(addr + 4); | 764 | ret = readl(addr + 4); |
765 | (u64) ret <<= 32; | 765 | ret <<= 32; |
766 | (u64) ret |= readl(addr); | 766 | ret |= readl(addr); |
767 | 767 | ||
768 | return ret; | 768 | return ret; |
769 | } | 769 | } |
diff --git a/drivers/net/shaper.c b/drivers/net/shaper.c index 3ad0b6751f6f..221354eea21f 100644 --- a/drivers/net/shaper.c +++ b/drivers/net/shaper.c | |||
@@ -156,52 +156,6 @@ static int shaper_start_xmit(struct sk_buff *skb, struct net_device *dev) | |||
156 | 156 | ||
157 | SHAPERCB(skb)->shapelen= shaper_clocks(shaper,skb); | 157 | SHAPERCB(skb)->shapelen= shaper_clocks(shaper,skb); |
158 | 158 | ||
159 | #ifdef SHAPER_COMPLEX /* and broken.. */ | ||
160 | |||
161 | while(ptr && ptr!=(struct sk_buff *)&shaper->sendq) | ||
162 | { | ||
163 | if(ptr->pri<skb->pri | ||
164 | && jiffies - SHAPERCB(ptr)->shapeclock < SHAPER_MAXSLIP) | ||
165 | { | ||
166 | struct sk_buff *tmp=ptr->prev; | ||
167 | |||
168 | /* | ||
169 | * It goes before us therefore we slip the length | ||
170 | * of the new frame. | ||
171 | */ | ||
172 | |||
173 | SHAPERCB(ptr)->shapeclock+=SHAPERCB(skb)->shapelen; | ||
174 | SHAPERCB(ptr)->shapelatency+=SHAPERCB(skb)->shapelen; | ||
175 | |||
176 | /* | ||
177 | * The packet may have slipped so far back it | ||
178 | * fell off. | ||
179 | */ | ||
180 | if(SHAPERCB(ptr)->shapelatency > SHAPER_LATENCY) | ||
181 | { | ||
182 | skb_unlink(ptr); | ||
183 | dev_kfree_skb(ptr); | ||
184 | } | ||
185 | ptr=tmp; | ||
186 | } | ||
187 | else | ||
188 | break; | ||
189 | } | ||
190 | if(ptr==NULL || ptr==(struct sk_buff *)&shaper->sendq) | ||
191 | skb_queue_head(&shaper->sendq,skb); | ||
192 | else | ||
193 | { | ||
194 | struct sk_buff *tmp; | ||
195 | /* | ||
196 | * Set the packet clock out time according to the | ||
197 | * frames ahead. Im sure a bit of thought could drop | ||
198 | * this loop. | ||
199 | */ | ||
200 | for(tmp=skb_peek(&shaper->sendq); tmp!=NULL && tmp!=ptr; tmp=tmp->next) | ||
201 | SHAPERCB(skb)->shapeclock+=tmp->shapelen; | ||
202 | skb_append(ptr,skb); | ||
203 | } | ||
204 | #else | ||
205 | { | 159 | { |
206 | struct sk_buff *tmp; | 160 | struct sk_buff *tmp; |
207 | /* | 161 | /* |
@@ -220,7 +174,7 @@ static int shaper_start_xmit(struct sk_buff *skb, struct net_device *dev) | |||
220 | } else | 174 | } else |
221 | skb_queue_tail(&shaper->sendq, skb); | 175 | skb_queue_tail(&shaper->sendq, skb); |
222 | } | 176 | } |
223 | #endif | 177 | |
224 | if(sh_debug) | 178 | if(sh_debug) |
225 | printk("Frame queued.\n"); | 179 | printk("Frame queued.\n"); |
226 | if(skb_queue_len(&shaper->sendq)>SHAPER_QLEN) | 180 | if(skb_queue_len(&shaper->sendq)>SHAPER_QLEN) |
@@ -302,7 +256,7 @@ static void shaper_kick(struct shaper *shaper) | |||
302 | * Pull the frame and get interrupts back on. | 256 | * Pull the frame and get interrupts back on. |
303 | */ | 257 | */ |
304 | 258 | ||
305 | skb_unlink(skb); | 259 | skb_unlink(skb, &shaper->sendq); |
306 | if (shaper->recovery < | 260 | if (shaper->recovery < |
307 | SHAPERCB(skb)->shapeclock + SHAPERCB(skb)->shapelen) | 261 | SHAPERCB(skb)->shapeclock + SHAPERCB(skb)->shapelen) |
308 | shaper->recovery = SHAPERCB(skb)->shapeclock + SHAPERCB(skb)->shapelen; | 262 | shaper->recovery = SHAPERCB(skb)->shapeclock + SHAPERCB(skb)->shapelen; |
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index 6d4ab1e333b5..af8263a1580e 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c | |||
@@ -340,41 +340,92 @@ static struct { | |||
340 | 340 | ||
341 | static void tg3_write_indirect_reg32(struct tg3 *tp, u32 off, u32 val) | 341 | static void tg3_write_indirect_reg32(struct tg3 *tp, u32 off, u32 val) |
342 | { | 342 | { |
343 | if ((tp->tg3_flags & TG3_FLAG_PCIX_TARGET_HWBUG) != 0) { | 343 | unsigned long flags; |
344 | spin_lock_bh(&tp->indirect_lock); | 344 | |
345 | pci_write_config_dword(tp->pdev, TG3PCI_REG_BASE_ADDR, off); | 345 | spin_lock_irqsave(&tp->indirect_lock, flags); |
346 | pci_write_config_dword(tp->pdev, TG3PCI_REG_DATA, val); | 346 | pci_write_config_dword(tp->pdev, TG3PCI_REG_BASE_ADDR, off); |
347 | spin_unlock_bh(&tp->indirect_lock); | 347 | pci_write_config_dword(tp->pdev, TG3PCI_REG_DATA, val); |
348 | } else { | 348 | spin_unlock_irqrestore(&tp->indirect_lock, flags); |
349 | writel(val, tp->regs + off); | 349 | } |
350 | if ((tp->tg3_flags & TG3_FLAG_5701_REG_WRITE_BUG) != 0) | 350 | |
351 | readl(tp->regs + off); | 351 | static void tg3_write_flush_reg32(struct tg3 *tp, u32 off, u32 val) |
352 | { | ||
353 | writel(val, tp->regs + off); | ||
354 | readl(tp->regs + off); | ||
355 | } | ||
356 | |||
357 | static u32 tg3_read_indirect_reg32(struct tg3 *tp, u32 off) | ||
358 | { | ||
359 | unsigned long flags; | ||
360 | u32 val; | ||
361 | |||
362 | spin_lock_irqsave(&tp->indirect_lock, flags); | ||
363 | pci_write_config_dword(tp->pdev, TG3PCI_REG_BASE_ADDR, off); | ||
364 | pci_read_config_dword(tp->pdev, TG3PCI_REG_DATA, &val); | ||
365 | spin_unlock_irqrestore(&tp->indirect_lock, flags); | ||
366 | return val; | ||
367 | } | ||
368 | |||
369 | static void tg3_write_indirect_mbox(struct tg3 *tp, u32 off, u32 val) | ||
370 | { | ||
371 | unsigned long flags; | ||
372 | |||
373 | if (off == (MAILBOX_RCVRET_CON_IDX_0 + TG3_64BIT_REG_LOW)) { | ||
374 | pci_write_config_dword(tp->pdev, TG3PCI_RCV_RET_RING_CON_IDX + | ||
375 | TG3_64BIT_REG_LOW, val); | ||
376 | return; | ||
377 | } | ||
378 | if (off == (MAILBOX_RCV_STD_PROD_IDX + TG3_64BIT_REG_LOW)) { | ||
379 | pci_write_config_dword(tp->pdev, TG3PCI_STD_RING_PROD_IDX + | ||
380 | TG3_64BIT_REG_LOW, val); | ||
381 | return; | ||
382 | } | ||
383 | |||
384 | spin_lock_irqsave(&tp->indirect_lock, flags); | ||
385 | pci_write_config_dword(tp->pdev, TG3PCI_REG_BASE_ADDR, off + 0x5600); | ||
386 | pci_write_config_dword(tp->pdev, TG3PCI_REG_DATA, val); | ||
387 | spin_unlock_irqrestore(&tp->indirect_lock, flags); | ||
388 | |||
389 | /* In indirect mode when disabling interrupts, we also need | ||
390 | * to clear the interrupt bit in the GRC local ctrl register. | ||
391 | */ | ||
392 | if ((off == (MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW)) && | ||
393 | (val == 0x1)) { | ||
394 | pci_write_config_dword(tp->pdev, TG3PCI_MISC_LOCAL_CTRL, | ||
395 | tp->grc_local_ctrl|GRC_LCLCTRL_CLEARINT); | ||
352 | } | 396 | } |
353 | } | 397 | } |
354 | 398 | ||
399 | static u32 tg3_read_indirect_mbox(struct tg3 *tp, u32 off) | ||
400 | { | ||
401 | unsigned long flags; | ||
402 | u32 val; | ||
403 | |||
404 | spin_lock_irqsave(&tp->indirect_lock, flags); | ||
405 | pci_write_config_dword(tp->pdev, TG3PCI_REG_BASE_ADDR, off + 0x5600); | ||
406 | pci_read_config_dword(tp->pdev, TG3PCI_REG_DATA, &val); | ||
407 | spin_unlock_irqrestore(&tp->indirect_lock, flags); | ||
408 | return val; | ||
409 | } | ||
410 | |||
355 | static void _tw32_flush(struct tg3 *tp, u32 off, u32 val) | 411 | static void _tw32_flush(struct tg3 *tp, u32 off, u32 val) |
356 | { | 412 | { |
357 | if ((tp->tg3_flags & TG3_FLAG_PCIX_TARGET_HWBUG) != 0) { | 413 | tp->write32(tp, off, val); |
358 | spin_lock_bh(&tp->indirect_lock); | 414 | if (!(tp->tg3_flags & TG3_FLAG_PCIX_TARGET_HWBUG) && |
359 | pci_write_config_dword(tp->pdev, TG3PCI_REG_BASE_ADDR, off); | 415 | !(tp->tg3_flags & TG3_FLAG_5701_REG_WRITE_BUG) && |
360 | pci_write_config_dword(tp->pdev, TG3PCI_REG_DATA, val); | 416 | !(tp->tg3_flags2 & TG3_FLG2_ICH_WORKAROUND)) |
361 | spin_unlock_bh(&tp->indirect_lock); | 417 | tp->read32(tp, off); /* flush */ |
362 | } else { | ||
363 | void __iomem *dest = tp->regs + off; | ||
364 | writel(val, dest); | ||
365 | readl(dest); /* always flush PCI write */ | ||
366 | } | ||
367 | } | 418 | } |
368 | 419 | ||
369 | static inline void _tw32_rx_mbox(struct tg3 *tp, u32 off, u32 val) | 420 | static inline void tw32_mailbox_flush(struct tg3 *tp, u32 off, u32 val) |
370 | { | 421 | { |
371 | void __iomem *mbox = tp->regs + off; | 422 | tp->write32_mbox(tp, off, val); |
372 | writel(val, mbox); | 423 | if (!(tp->tg3_flags & TG3_FLAG_MBOX_WRITE_REORDER) && |
373 | if (tp->tg3_flags & TG3_FLAG_MBOX_WRITE_REORDER) | 424 | !(tp->tg3_flags2 & TG3_FLG2_ICH_WORKAROUND)) |
374 | readl(mbox); | 425 | tp->read32_mbox(tp, off); |
375 | } | 426 | } |
376 | 427 | ||
377 | static inline void _tw32_tx_mbox(struct tg3 *tp, u32 off, u32 val) | 428 | static void tg3_write32_tx_mbox(struct tg3 *tp, u32 off, u32 val) |
378 | { | 429 | { |
379 | void __iomem *mbox = tp->regs + off; | 430 | void __iomem *mbox = tp->regs + off; |
380 | writel(val, mbox); | 431 | writel(val, mbox); |
@@ -384,46 +435,57 @@ static inline void _tw32_tx_mbox(struct tg3 *tp, u32 off, u32 val) | |||
384 | readl(mbox); | 435 | readl(mbox); |
385 | } | 436 | } |
386 | 437 | ||
387 | #define tw32_mailbox(reg, val) writel(((val) & 0xffffffff), tp->regs + (reg)) | 438 | static void tg3_write32(struct tg3 *tp, u32 off, u32 val) |
388 | #define tw32_rx_mbox(reg, val) _tw32_rx_mbox(tp, reg, val) | 439 | { |
389 | #define tw32_tx_mbox(reg, val) _tw32_tx_mbox(tp, reg, val) | 440 | writel(val, tp->regs + off); |
441 | } | ||
442 | |||
443 | static u32 tg3_read32(struct tg3 *tp, u32 off) | ||
444 | { | ||
445 | return (readl(tp->regs + off)); | ||
446 | } | ||
447 | |||
448 | #define tw32_mailbox(reg, val) tp->write32_mbox(tp, reg, val) | ||
449 | #define tw32_mailbox_f(reg, val) tw32_mailbox_flush(tp, (reg), (val)) | ||
450 | #define tw32_rx_mbox(reg, val) tp->write32_rx_mbox(tp, reg, val) | ||
451 | #define tw32_tx_mbox(reg, val) tp->write32_tx_mbox(tp, reg, val) | ||
452 | #define tr32_mailbox(reg) tp->read32_mbox(tp, reg) | ||
390 | 453 | ||
391 | #define tw32(reg,val) tg3_write_indirect_reg32(tp,(reg),(val)) | 454 | #define tw32(reg,val) tp->write32(tp, reg, val) |
392 | #define tw32_f(reg,val) _tw32_flush(tp,(reg),(val)) | 455 | #define tw32_f(reg,val) _tw32_flush(tp,(reg),(val)) |
393 | #define tw16(reg,val) writew(((val) & 0xffff), tp->regs + (reg)) | 456 | #define tr32(reg) tp->read32(tp, reg) |
394 | #define tw8(reg,val) writeb(((val) & 0xff), tp->regs + (reg)) | ||
395 | #define tr32(reg) readl(tp->regs + (reg)) | ||
396 | #define tr16(reg) readw(tp->regs + (reg)) | ||
397 | #define tr8(reg) readb(tp->regs + (reg)) | ||
398 | 457 | ||
399 | static void tg3_write_mem(struct tg3 *tp, u32 off, u32 val) | 458 | static void tg3_write_mem(struct tg3 *tp, u32 off, u32 val) |
400 | { | 459 | { |
401 | spin_lock_bh(&tp->indirect_lock); | 460 | unsigned long flags; |
461 | |||
462 | spin_lock_irqsave(&tp->indirect_lock, flags); | ||
402 | pci_write_config_dword(tp->pdev, TG3PCI_MEM_WIN_BASE_ADDR, off); | 463 | pci_write_config_dword(tp->pdev, TG3PCI_MEM_WIN_BASE_ADDR, off); |
403 | pci_write_config_dword(tp->pdev, TG3PCI_MEM_WIN_DATA, val); | 464 | pci_write_config_dword(tp->pdev, TG3PCI_MEM_WIN_DATA, val); |
404 | 465 | ||
405 | /* Always leave this as zero. */ | 466 | /* Always leave this as zero. */ |
406 | pci_write_config_dword(tp->pdev, TG3PCI_MEM_WIN_BASE_ADDR, 0); | 467 | pci_write_config_dword(tp->pdev, TG3PCI_MEM_WIN_BASE_ADDR, 0); |
407 | spin_unlock_bh(&tp->indirect_lock); | 468 | spin_unlock_irqrestore(&tp->indirect_lock, flags); |
408 | } | 469 | } |
409 | 470 | ||
410 | static void tg3_read_mem(struct tg3 *tp, u32 off, u32 *val) | 471 | static void tg3_read_mem(struct tg3 *tp, u32 off, u32 *val) |
411 | { | 472 | { |
412 | spin_lock_bh(&tp->indirect_lock); | 473 | unsigned long flags; |
474 | |||
475 | spin_lock_irqsave(&tp->indirect_lock, flags); | ||
413 | pci_write_config_dword(tp->pdev, TG3PCI_MEM_WIN_BASE_ADDR, off); | 476 | pci_write_config_dword(tp->pdev, TG3PCI_MEM_WIN_BASE_ADDR, off); |
414 | pci_read_config_dword(tp->pdev, TG3PCI_MEM_WIN_DATA, val); | 477 | pci_read_config_dword(tp->pdev, TG3PCI_MEM_WIN_DATA, val); |
415 | 478 | ||
416 | /* Always leave this as zero. */ | 479 | /* Always leave this as zero. */ |
417 | pci_write_config_dword(tp->pdev, TG3PCI_MEM_WIN_BASE_ADDR, 0); | 480 | pci_write_config_dword(tp->pdev, TG3PCI_MEM_WIN_BASE_ADDR, 0); |
418 | spin_unlock_bh(&tp->indirect_lock); | 481 | spin_unlock_irqrestore(&tp->indirect_lock, flags); |
419 | } | 482 | } |
420 | 483 | ||
421 | static void tg3_disable_ints(struct tg3 *tp) | 484 | static void tg3_disable_ints(struct tg3 *tp) |
422 | { | 485 | { |
423 | tw32(TG3PCI_MISC_HOST_CTRL, | 486 | tw32(TG3PCI_MISC_HOST_CTRL, |
424 | (tp->misc_host_ctrl | MISC_HOST_CTRL_MASK_PCI_INT)); | 487 | (tp->misc_host_ctrl | MISC_HOST_CTRL_MASK_PCI_INT)); |
425 | tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, 0x00000001); | 488 | tw32_mailbox_f(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, 0x00000001); |
426 | tr32(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW); | ||
427 | } | 489 | } |
428 | 490 | ||
429 | static inline void tg3_cond_int(struct tg3 *tp) | 491 | static inline void tg3_cond_int(struct tg3 *tp) |
@@ -439,9 +501,8 @@ static void tg3_enable_ints(struct tg3 *tp) | |||
439 | 501 | ||
440 | tw32(TG3PCI_MISC_HOST_CTRL, | 502 | tw32(TG3PCI_MISC_HOST_CTRL, |
441 | (tp->misc_host_ctrl & ~MISC_HOST_CTRL_MASK_PCI_INT)); | 503 | (tp->misc_host_ctrl & ~MISC_HOST_CTRL_MASK_PCI_INT)); |
442 | tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, | 504 | tw32_mailbox_f(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, |
443 | (tp->last_tag << 24)); | 505 | (tp->last_tag << 24)); |
444 | tr32(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW); | ||
445 | tg3_cond_int(tp); | 506 | tg3_cond_int(tp); |
446 | } | 507 | } |
447 | 508 | ||
@@ -472,8 +533,6 @@ static inline unsigned int tg3_has_work(struct tg3 *tp) | |||
472 | */ | 533 | */ |
473 | static void tg3_restart_ints(struct tg3 *tp) | 534 | static void tg3_restart_ints(struct tg3 *tp) |
474 | { | 535 | { |
475 | tw32(TG3PCI_MISC_HOST_CTRL, | ||
476 | (tp->misc_host_ctrl & ~MISC_HOST_CTRL_MASK_PCI_INT)); | ||
477 | tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, | 536 | tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, |
478 | tp->last_tag << 24); | 537 | tp->last_tag << 24); |
479 | mmiowb(); | 538 | mmiowb(); |
@@ -3278,9 +3337,8 @@ static irqreturn_t tg3_interrupt(int irq, void *dev_id, struct pt_regs *regs) | |||
3278 | /* No work, shared interrupt perhaps? re-enable | 3337 | /* No work, shared interrupt perhaps? re-enable |
3279 | * interrupts, and flush that PCI write | 3338 | * interrupts, and flush that PCI write |
3280 | */ | 3339 | */ |
3281 | tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, | 3340 | tw32_mailbox_f(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, |
3282 | 0x00000000); | 3341 | 0x00000000); |
3283 | tr32(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW); | ||
3284 | } | 3342 | } |
3285 | } else { /* shared interrupt */ | 3343 | } else { /* shared interrupt */ |
3286 | handled = 0; | 3344 | handled = 0; |
@@ -3323,9 +3381,8 @@ static irqreturn_t tg3_interrupt_tagged(int irq, void *dev_id, struct pt_regs *r | |||
3323 | /* no work, shared interrupt perhaps? re-enable | 3381 | /* no work, shared interrupt perhaps? re-enable |
3324 | * interrupts, and flush that PCI write | 3382 | * interrupts, and flush that PCI write |
3325 | */ | 3383 | */ |
3326 | tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, | 3384 | tw32_mailbox_f(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, |
3327 | tp->last_tag << 24); | 3385 | tp->last_tag << 24); |
3328 | tr32(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW); | ||
3329 | } | 3386 | } |
3330 | } else { /* shared interrupt */ | 3387 | } else { /* shared interrupt */ |
3331 | handled = 0; | 3388 | handled = 0; |
@@ -4216,7 +4273,7 @@ static void tg3_stop_fw(struct tg3 *); | |||
4216 | static int tg3_chip_reset(struct tg3 *tp) | 4273 | static int tg3_chip_reset(struct tg3 *tp) |
4217 | { | 4274 | { |
4218 | u32 val; | 4275 | u32 val; |
4219 | u32 flags_save; | 4276 | void (*write_op)(struct tg3 *, u32, u32); |
4220 | int i; | 4277 | int i; |
4221 | 4278 | ||
4222 | if (!(tp->tg3_flags2 & TG3_FLG2_SUN_570X)) | 4279 | if (!(tp->tg3_flags2 & TG3_FLG2_SUN_570X)) |
@@ -4228,8 +4285,9 @@ static int tg3_chip_reset(struct tg3 *tp) | |||
4228 | * fun things. So, temporarily disable the 5701 | 4285 | * fun things. So, temporarily disable the 5701 |
4229 | * hardware workaround, while we do the reset. | 4286 | * hardware workaround, while we do the reset. |
4230 | */ | 4287 | */ |
4231 | flags_save = tp->tg3_flags; | 4288 | write_op = tp->write32; |
4232 | tp->tg3_flags &= ~TG3_FLAG_5701_REG_WRITE_BUG; | 4289 | if (write_op == tg3_write_flush_reg32) |
4290 | tp->write32 = tg3_write32; | ||
4233 | 4291 | ||
4234 | /* do the reset */ | 4292 | /* do the reset */ |
4235 | val = GRC_MISC_CFG_CORECLK_RESET; | 4293 | val = GRC_MISC_CFG_CORECLK_RESET; |
@@ -4248,8 +4306,8 @@ static int tg3_chip_reset(struct tg3 *tp) | |||
4248 | val |= GRC_MISC_CFG_KEEP_GPHY_POWER; | 4306 | val |= GRC_MISC_CFG_KEEP_GPHY_POWER; |
4249 | tw32(GRC_MISC_CFG, val); | 4307 | tw32(GRC_MISC_CFG, val); |
4250 | 4308 | ||
4251 | /* restore 5701 hardware bug workaround flag */ | 4309 | /* restore 5701 hardware bug workaround write method */ |
4252 | tp->tg3_flags = flags_save; | 4310 | tp->write32 = write_op; |
4253 | 4311 | ||
4254 | /* Unfortunately, we have to delay before the PCI read back. | 4312 | /* Unfortunately, we have to delay before the PCI read back. |
4255 | * Some 575X chips even will not respond to a PCI cfg access | 4313 | * Some 575X chips even will not respond to a PCI cfg access |
@@ -4635,7 +4693,6 @@ static int tg3_load_firmware_cpu(struct tg3 *tp, u32 cpu_base, u32 cpu_scratch_b | |||
4635 | int cpu_scratch_size, struct fw_info *info) | 4693 | int cpu_scratch_size, struct fw_info *info) |
4636 | { | 4694 | { |
4637 | int err, i; | 4695 | int err, i; |
4638 | u32 orig_tg3_flags = tp->tg3_flags; | ||
4639 | void (*write_op)(struct tg3 *, u32, u32); | 4696 | void (*write_op)(struct tg3 *, u32, u32); |
4640 | 4697 | ||
4641 | if (cpu_base == TX_CPU_BASE && | 4698 | if (cpu_base == TX_CPU_BASE && |
@@ -4651,11 +4708,6 @@ static int tg3_load_firmware_cpu(struct tg3 *tp, u32 cpu_base, u32 cpu_scratch_b | |||
4651 | else | 4708 | else |
4652 | write_op = tg3_write_indirect_reg32; | 4709 | write_op = tg3_write_indirect_reg32; |
4653 | 4710 | ||
4654 | /* Force use of PCI config space for indirect register | ||
4655 | * write calls. | ||
4656 | */ | ||
4657 | tp->tg3_flags |= TG3_FLAG_PCIX_TARGET_HWBUG; | ||
4658 | |||
4659 | /* It is possible that bootcode is still loading at this point. | 4711 | /* It is possible that bootcode is still loading at this point. |
4660 | * Get the nvram lock first before halting the cpu. | 4712 | * Get the nvram lock first before halting the cpu. |
4661 | */ | 4713 | */ |
@@ -4691,7 +4743,6 @@ static int tg3_load_firmware_cpu(struct tg3 *tp, u32 cpu_base, u32 cpu_scratch_b | |||
4691 | err = 0; | 4743 | err = 0; |
4692 | 4744 | ||
4693 | out: | 4745 | out: |
4694 | tp->tg3_flags = orig_tg3_flags; | ||
4695 | return err; | 4746 | return err; |
4696 | } | 4747 | } |
4697 | 4748 | ||
@@ -5808,8 +5859,7 @@ static int tg3_reset_hw(struct tg3 *tp) | |||
5808 | tw32_f(GRC_LOCAL_CTRL, tp->grc_local_ctrl); | 5859 | tw32_f(GRC_LOCAL_CTRL, tp->grc_local_ctrl); |
5809 | udelay(100); | 5860 | udelay(100); |
5810 | 5861 | ||
5811 | tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, 0); | 5862 | tw32_mailbox_f(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, 0); |
5812 | tr32(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW); | ||
5813 | tp->last_tag = 0; | 5863 | tp->last_tag = 0; |
5814 | 5864 | ||
5815 | if (!(tp->tg3_flags2 & TG3_FLG2_5705_PLUS)) { | 5865 | if (!(tp->tg3_flags2 & TG3_FLG2_5705_PLUS)) { |
@@ -6198,7 +6248,8 @@ static int tg3_test_interrupt(struct tg3 *tp) | |||
6198 | HOSTCC_MODE_NOW); | 6248 | HOSTCC_MODE_NOW); |
6199 | 6249 | ||
6200 | for (i = 0; i < 5; i++) { | 6250 | for (i = 0; i < 5; i++) { |
6201 | int_mbox = tr32(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW); | 6251 | int_mbox = tr32_mailbox(MAILBOX_INTERRUPT_0 + |
6252 | TG3_64BIT_REG_LOW); | ||
6202 | if (int_mbox != 0) | 6253 | if (int_mbox != 0) |
6203 | break; | 6254 | break; |
6204 | msleep(10); | 6255 | msleep(10); |
@@ -6598,10 +6649,10 @@ static int tg3_open(struct net_device *dev) | |||
6598 | 6649 | ||
6599 | /* Mailboxes */ | 6650 | /* Mailboxes */ |
6600 | printk("DEBUG: SNDHOST_PROD[%08x%08x] SNDNIC_PROD[%08x%08x]\n", | 6651 | printk("DEBUG: SNDHOST_PROD[%08x%08x] SNDNIC_PROD[%08x%08x]\n", |
6601 | tr32(MAILBOX_SNDHOST_PROD_IDX_0 + 0x0), | 6652 | tr32_mailbox(MAILBOX_SNDHOST_PROD_IDX_0 + 0x0), |
6602 | tr32(MAILBOX_SNDHOST_PROD_IDX_0 + 0x4), | 6653 | tr32_mailbox(MAILBOX_SNDHOST_PROD_IDX_0 + 0x4), |
6603 | tr32(MAILBOX_SNDNIC_PROD_IDX_0 + 0x0), | 6654 | tr32_mailbox(MAILBOX_SNDNIC_PROD_IDX_0 + 0x0), |
6604 | tr32(MAILBOX_SNDNIC_PROD_IDX_0 + 0x4)); | 6655 | tr32_mailbox(MAILBOX_SNDNIC_PROD_IDX_0 + 0x4)); |
6605 | 6656 | ||
6606 | /* NIC side send descriptors. */ | 6657 | /* NIC side send descriptors. */ |
6607 | for (i = 0; i < 6; i++) { | 6658 | for (i = 0; i < 6; i++) { |
@@ -7901,7 +7952,7 @@ static int tg3_test_loopback(struct tg3 *tp) | |||
7901 | num_pkts++; | 7952 | num_pkts++; |
7902 | 7953 | ||
7903 | tw32_tx_mbox(MAILBOX_SNDHOST_PROD_IDX_0 + TG3_64BIT_REG_LOW, send_idx); | 7954 | tw32_tx_mbox(MAILBOX_SNDHOST_PROD_IDX_0 + TG3_64BIT_REG_LOW, send_idx); |
7904 | tr32(MAILBOX_SNDHOST_PROD_IDX_0 + TG3_64BIT_REG_LOW); | 7955 | tr32_mailbox(MAILBOX_SNDHOST_PROD_IDX_0 + TG3_64BIT_REG_LOW); |
7905 | 7956 | ||
7906 | udelay(10); | 7957 | udelay(10); |
7907 | 7958 | ||
@@ -9153,14 +9204,6 @@ static int __devinit tg3_is_sun_570X(struct tg3 *tp) | |||
9153 | static int __devinit tg3_get_invariants(struct tg3 *tp) | 9204 | static int __devinit tg3_get_invariants(struct tg3 *tp) |
9154 | { | 9205 | { |
9155 | static struct pci_device_id write_reorder_chipsets[] = { | 9206 | static struct pci_device_id write_reorder_chipsets[] = { |
9156 | { PCI_DEVICE(PCI_VENDOR_ID_INTEL, | ||
9157 | PCI_DEVICE_ID_INTEL_82801AA_8) }, | ||
9158 | { PCI_DEVICE(PCI_VENDOR_ID_INTEL, | ||
9159 | PCI_DEVICE_ID_INTEL_82801AB_8) }, | ||
9160 | { PCI_DEVICE(PCI_VENDOR_ID_INTEL, | ||
9161 | PCI_DEVICE_ID_INTEL_82801BA_11) }, | ||
9162 | { PCI_DEVICE(PCI_VENDOR_ID_INTEL, | ||
9163 | PCI_DEVICE_ID_INTEL_82801BA_6) }, | ||
9164 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, | 9207 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, |
9165 | PCI_DEVICE_ID_AMD_FE_GATE_700C) }, | 9208 | PCI_DEVICE_ID_AMD_FE_GATE_700C) }, |
9166 | { }, | 9209 | { }, |
@@ -9177,7 +9220,7 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) | |||
9177 | tp->tg3_flags2 |= TG3_FLG2_SUN_570X; | 9220 | tp->tg3_flags2 |= TG3_FLG2_SUN_570X; |
9178 | #endif | 9221 | #endif |
9179 | 9222 | ||
9180 | /* If we have an AMD 762 or Intel ICH/ICH0/ICH2 chipset, write | 9223 | /* If we have an AMD 762 chipset, write |
9181 | * reordering to the mailbox registers done by the host | 9224 | * reordering to the mailbox registers done by the host |
9182 | * controller can cause major troubles. We read back from | 9225 | * controller can cause major troubles. We read back from |
9183 | * every mailbox register write to force the writes to be | 9226 | * every mailbox register write to force the writes to be |
@@ -9215,6 +9258,69 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) | |||
9215 | if (tp->pci_chip_rev_id == CHIPREV_ID_5752_A0_HW) | 9258 | if (tp->pci_chip_rev_id == CHIPREV_ID_5752_A0_HW) |
9216 | tp->pci_chip_rev_id = CHIPREV_ID_5752_A0; | 9259 | tp->pci_chip_rev_id = CHIPREV_ID_5752_A0; |
9217 | 9260 | ||
9261 | /* If we have 5702/03 A1 or A2 on certain ICH chipsets, | ||
9262 | * we need to disable memory and use config. cycles | ||
9263 | * only to access all registers. The 5702/03 chips | ||
9264 | * can mistakenly decode the special cycles from the | ||
9265 | * ICH chipsets as memory write cycles, causing corruption | ||
9266 | * of register and memory space. Only certain ICH bridges | ||
9267 | * will drive special cycles with non-zero data during the | ||
9268 | * address phase which can fall within the 5703's address | ||
9269 | * range. This is not an ICH bug as the PCI spec allows | ||
9270 | * non-zero address during special cycles. However, only | ||
9271 | * these ICH bridges are known to drive non-zero addresses | ||
9272 | * during special cycles. | ||
9273 | * | ||
9274 | * Since special cycles do not cross PCI bridges, we only | ||
9275 | * enable this workaround if the 5703 is on the secondary | ||
9276 | * bus of these ICH bridges. | ||
9277 | */ | ||
9278 | if ((tp->pci_chip_rev_id == CHIPREV_ID_5703_A1) || | ||
9279 | (tp->pci_chip_rev_id == CHIPREV_ID_5703_A2)) { | ||
9280 | static struct tg3_dev_id { | ||
9281 | u32 vendor; | ||
9282 | u32 device; | ||
9283 | u32 rev; | ||
9284 | } ich_chipsets[] = { | ||
9285 | { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801AA_8, | ||
9286 | PCI_ANY_ID }, | ||
9287 | { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801AB_8, | ||
9288 | PCI_ANY_ID }, | ||
9289 | { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801BA_11, | ||
9290 | 0xa }, | ||
9291 | { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801BA_6, | ||
9292 | PCI_ANY_ID }, | ||
9293 | { }, | ||
9294 | }; | ||
9295 | struct tg3_dev_id *pci_id = &ich_chipsets[0]; | ||
9296 | struct pci_dev *bridge = NULL; | ||
9297 | |||
9298 | while (pci_id->vendor != 0) { | ||
9299 | bridge = pci_get_device(pci_id->vendor, pci_id->device, | ||
9300 | bridge); | ||
9301 | if (!bridge) { | ||
9302 | pci_id++; | ||
9303 | continue; | ||
9304 | } | ||
9305 | if (pci_id->rev != PCI_ANY_ID) { | ||
9306 | u8 rev; | ||
9307 | |||
9308 | pci_read_config_byte(bridge, PCI_REVISION_ID, | ||
9309 | &rev); | ||
9310 | if (rev > pci_id->rev) | ||
9311 | continue; | ||
9312 | } | ||
9313 | if (bridge->subordinate && | ||
9314 | (bridge->subordinate->number == | ||
9315 | tp->pdev->bus->number)) { | ||
9316 | |||
9317 | tp->tg3_flags2 |= TG3_FLG2_ICH_WORKAROUND; | ||
9318 | pci_dev_put(bridge); | ||
9319 | break; | ||
9320 | } | ||
9321 | } | ||
9322 | } | ||
9323 | |||
9218 | /* Find msi capability. */ | 9324 | /* Find msi capability. */ |
9219 | if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5780) | 9325 | if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5780) |
9220 | tp->msi_cap = pci_find_capability(tp->pdev, PCI_CAP_ID_MSI); | 9326 | tp->msi_cap = pci_find_capability(tp->pdev, PCI_CAP_ID_MSI); |
@@ -9302,6 +9408,12 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) | |||
9302 | } | 9408 | } |
9303 | } | 9409 | } |
9304 | 9410 | ||
9411 | /* 5700 BX chips need to have their TX producer index mailboxes | ||
9412 | * written twice to workaround a bug. | ||
9413 | */ | ||
9414 | if (GET_CHIP_REV(tp->pci_chip_rev_id) == CHIPREV_5700_BX) | ||
9415 | tp->tg3_flags |= TG3_FLAG_TXD_MBOX_HWBUG; | ||
9416 | |||
9305 | /* Back to back register writes can cause problems on this chip, | 9417 | /* Back to back register writes can cause problems on this chip, |
9306 | * the workaround is to read back all reg writes except those to | 9418 | * the workaround is to read back all reg writes except those to |
9307 | * mailbox regs. See tg3_write_indirect_reg32(). | 9419 | * mailbox regs. See tg3_write_indirect_reg32(). |
@@ -9325,6 +9437,43 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) | |||
9325 | pci_write_config_dword(tp->pdev, TG3PCI_PCISTATE, pci_state_reg); | 9437 | pci_write_config_dword(tp->pdev, TG3PCI_PCISTATE, pci_state_reg); |
9326 | } | 9438 | } |
9327 | 9439 | ||
9440 | /* Default fast path register access methods */ | ||
9441 | tp->read32 = tg3_read32; | ||
9442 | tp->write32 = tg3_write32; | ||
9443 | tp->read32_mbox = tg3_read32; | ||
9444 | tp->write32_mbox = tg3_write32; | ||
9445 | tp->write32_tx_mbox = tg3_write32; | ||
9446 | tp->write32_rx_mbox = tg3_write32; | ||
9447 | |||
9448 | /* Various workaround register access methods */ | ||
9449 | if (tp->tg3_flags & TG3_FLAG_PCIX_TARGET_HWBUG) | ||
9450 | tp->write32 = tg3_write_indirect_reg32; | ||
9451 | else if (tp->tg3_flags & TG3_FLAG_5701_REG_WRITE_BUG) | ||
9452 | tp->write32 = tg3_write_flush_reg32; | ||
9453 | |||
9454 | if ((tp->tg3_flags & TG3_FLAG_TXD_MBOX_HWBUG) || | ||
9455 | (tp->tg3_flags & TG3_FLAG_MBOX_WRITE_REORDER)) { | ||
9456 | tp->write32_tx_mbox = tg3_write32_tx_mbox; | ||
9457 | if (tp->tg3_flags & TG3_FLAG_MBOX_WRITE_REORDER) | ||
9458 | tp->write32_rx_mbox = tg3_write_flush_reg32; | ||
9459 | } | ||
9460 | |||
9461 | if (tp->tg3_flags2 & TG3_FLG2_ICH_WORKAROUND) { | ||
9462 | tp->read32 = tg3_read_indirect_reg32; | ||
9463 | tp->write32 = tg3_write_indirect_reg32; | ||
9464 | tp->read32_mbox = tg3_read_indirect_mbox; | ||
9465 | tp->write32_mbox = tg3_write_indirect_mbox; | ||
9466 | tp->write32_tx_mbox = tg3_write_indirect_mbox; | ||
9467 | tp->write32_rx_mbox = tg3_write_indirect_mbox; | ||
9468 | |||
9469 | iounmap(tp->regs); | ||
9470 | tp->regs = 0; | ||
9471 | |||
9472 | pci_read_config_word(tp->pdev, PCI_COMMAND, &pci_cmd); | ||
9473 | pci_cmd &= ~PCI_COMMAND_MEMORY; | ||
9474 | pci_write_config_word(tp->pdev, PCI_COMMAND, pci_cmd); | ||
9475 | } | ||
9476 | |||
9328 | /* Get eeprom hw config before calling tg3_set_power_state(). | 9477 | /* Get eeprom hw config before calling tg3_set_power_state(). |
9329 | * In particular, the TG3_FLAG_EEPROM_WRITE_PROT flag must be | 9478 | * In particular, the TG3_FLAG_EEPROM_WRITE_PROT flag must be |
9330 | * determined before calling tg3_set_power_state() so that | 9479 | * determined before calling tg3_set_power_state() so that |
@@ -9539,14 +9688,6 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) | |||
9539 | else | 9688 | else |
9540 | tp->tg3_flags &= ~TG3_FLAG_POLL_SERDES; | 9689 | tp->tg3_flags &= ~TG3_FLAG_POLL_SERDES; |
9541 | 9690 | ||
9542 | /* 5700 BX chips need to have their TX producer index mailboxes | ||
9543 | * written twice to workaround a bug. | ||
9544 | */ | ||
9545 | if (GET_CHIP_REV(tp->pci_chip_rev_id) == CHIPREV_5700_BX) | ||
9546 | tp->tg3_flags |= TG3_FLAG_TXD_MBOX_HWBUG; | ||
9547 | else | ||
9548 | tp->tg3_flags &= ~TG3_FLAG_TXD_MBOX_HWBUG; | ||
9549 | |||
9550 | /* It seems all chips can get confused if TX buffers | 9691 | /* It seems all chips can get confused if TX buffers |
9551 | * straddle the 4GB address boundary in some cases. | 9692 | * straddle the 4GB address boundary in some cases. |
9552 | */ | 9693 | */ |
@@ -10469,7 +10610,10 @@ static int __devinit tg3_init_one(struct pci_dev *pdev, | |||
10469 | return 0; | 10610 | return 0; |
10470 | 10611 | ||
10471 | err_out_iounmap: | 10612 | err_out_iounmap: |
10472 | iounmap(tp->regs); | 10613 | if (tp->regs) { |
10614 | iounmap(tp->regs); | ||
10615 | tp->regs = 0; | ||
10616 | } | ||
10473 | 10617 | ||
10474 | err_out_free_dev: | 10618 | err_out_free_dev: |
10475 | free_netdev(dev); | 10619 | free_netdev(dev); |
@@ -10491,7 +10635,10 @@ static void __devexit tg3_remove_one(struct pci_dev *pdev) | |||
10491 | struct tg3 *tp = netdev_priv(dev); | 10635 | struct tg3 *tp = netdev_priv(dev); |
10492 | 10636 | ||
10493 | unregister_netdev(dev); | 10637 | unregister_netdev(dev); |
10494 | iounmap(tp->regs); | 10638 | if (tp->regs) { |
10639 | iounmap(tp->regs); | ||
10640 | tp->regs = 0; | ||
10641 | } | ||
10495 | free_netdev(dev); | 10642 | free_netdev(dev); |
10496 | pci_release_regions(pdev); | 10643 | pci_release_regions(pdev); |
10497 | pci_disable_device(pdev); | 10644 | pci_disable_device(pdev); |
diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h index 5c4433c147fa..c184b773e585 100644 --- a/drivers/net/tg3.h +++ b/drivers/net/tg3.h | |||
@@ -2049,6 +2049,11 @@ struct tg3 { | |||
2049 | spinlock_t lock; | 2049 | spinlock_t lock; |
2050 | spinlock_t indirect_lock; | 2050 | spinlock_t indirect_lock; |
2051 | 2051 | ||
2052 | u32 (*read32) (struct tg3 *, u32); | ||
2053 | void (*write32) (struct tg3 *, u32, u32); | ||
2054 | u32 (*read32_mbox) (struct tg3 *, u32); | ||
2055 | void (*write32_mbox) (struct tg3 *, u32, | ||
2056 | u32); | ||
2052 | void __iomem *regs; | 2057 | void __iomem *regs; |
2053 | struct net_device *dev; | 2058 | struct net_device *dev; |
2054 | struct pci_dev *pdev; | 2059 | struct pci_dev *pdev; |
@@ -2060,6 +2065,8 @@ struct tg3 { | |||
2060 | u32 msg_enable; | 2065 | u32 msg_enable; |
2061 | 2066 | ||
2062 | /* begin "tx thread" cacheline section */ | 2067 | /* begin "tx thread" cacheline section */ |
2068 | void (*write32_tx_mbox) (struct tg3 *, u32, | ||
2069 | u32); | ||
2063 | u32 tx_prod; | 2070 | u32 tx_prod; |
2064 | u32 tx_cons; | 2071 | u32 tx_cons; |
2065 | u32 tx_pending; | 2072 | u32 tx_pending; |
@@ -2071,6 +2078,8 @@ struct tg3 { | |||
2071 | dma_addr_t tx_desc_mapping; | 2078 | dma_addr_t tx_desc_mapping; |
2072 | 2079 | ||
2073 | /* begin "rx thread" cacheline section */ | 2080 | /* begin "rx thread" cacheline section */ |
2081 | void (*write32_rx_mbox) (struct tg3 *, u32, | ||
2082 | u32); | ||
2074 | u32 rx_rcb_ptr; | 2083 | u32 rx_rcb_ptr; |
2075 | u32 rx_std_ptr; | 2084 | u32 rx_std_ptr; |
2076 | u32 rx_jumbo_ptr; | 2085 | u32 rx_jumbo_ptr; |
@@ -2165,6 +2174,7 @@ struct tg3 { | |||
2165 | #define TG3_FLG2_ANY_SERDES (TG3_FLG2_PHY_SERDES | \ | 2174 | #define TG3_FLG2_ANY_SERDES (TG3_FLG2_PHY_SERDES | \ |
2166 | TG3_FLG2_MII_SERDES) | 2175 | TG3_FLG2_MII_SERDES) |
2167 | #define TG3_FLG2_PARALLEL_DETECT 0x01000000 | 2176 | #define TG3_FLG2_PARALLEL_DETECT 0x01000000 |
2177 | #define TG3_FLG2_ICH_WORKAROUND 0x02000000 | ||
2168 | 2178 | ||
2169 | u32 split_mode_max_reqs; | 2179 | u32 split_mode_max_reqs; |
2170 | #define SPLIT_MODE_5704_MAX_REQ 3 | 2180 | #define SPLIT_MODE_5704_MAX_REQ 3 |
diff --git a/drivers/net/wan/hdlc_generic.c b/drivers/net/wan/hdlc_generic.c index a63f6a2cc4f7..cdd4c09c2d90 100644 --- a/drivers/net/wan/hdlc_generic.c +++ b/drivers/net/wan/hdlc_generic.c | |||
@@ -61,7 +61,7 @@ static struct net_device_stats *hdlc_get_stats(struct net_device *dev) | |||
61 | 61 | ||
62 | 62 | ||
63 | static int hdlc_rcv(struct sk_buff *skb, struct net_device *dev, | 63 | static int hdlc_rcv(struct sk_buff *skb, struct net_device *dev, |
64 | struct packet_type *p) | 64 | struct packet_type *p, struct net_device *orig_dev) |
65 | { | 65 | { |
66 | hdlc_device *hdlc = dev_to_hdlc(dev); | 66 | hdlc_device *hdlc = dev_to_hdlc(dev); |
67 | if (hdlc->proto.netif_rx) | 67 | if (hdlc->proto.netif_rx) |
diff --git a/drivers/net/wan/lapbether.c b/drivers/net/wan/lapbether.c index 7f2e3653c5e5..6c302e9dbca2 100644 --- a/drivers/net/wan/lapbether.c +++ b/drivers/net/wan/lapbether.c | |||
@@ -86,7 +86,7 @@ static __inline__ int dev_is_ethdev(struct net_device *dev) | |||
86 | /* | 86 | /* |
87 | * Receive a LAPB frame via an ethernet interface. | 87 | * Receive a LAPB frame via an ethernet interface. |
88 | */ | 88 | */ |
89 | static int lapbeth_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype) | 89 | static int lapbeth_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype, struct net_device *orig_dev) |
90 | { | 90 | { |
91 | int len, err; | 91 | int len, err; |
92 | struct lapbethdev *lapbeth; | 92 | struct lapbethdev *lapbeth; |
diff --git a/drivers/net/wan/sdla_fr.c b/drivers/net/wan/sdla_fr.c index c5f5e62aab8b..0497dbdb8631 100644 --- a/drivers/net/wan/sdla_fr.c +++ b/drivers/net/wan/sdla_fr.c | |||
@@ -445,7 +445,7 @@ void s508_s514_unlock(sdla_t *card, unsigned long *smp_flags); | |||
445 | void s508_s514_lock(sdla_t *card, unsigned long *smp_flags); | 445 | void s508_s514_lock(sdla_t *card, unsigned long *smp_flags); |
446 | 446 | ||
447 | unsigned short calc_checksum (char *, int); | 447 | unsigned short calc_checksum (char *, int); |
448 | static int setup_fr_header(struct sk_buff** skb, | 448 | static int setup_fr_header(struct sk_buff *skb, |
449 | struct net_device* dev, char op_mode); | 449 | struct net_device* dev, char op_mode); |
450 | 450 | ||
451 | 451 | ||
@@ -1372,7 +1372,7 @@ static int if_send(struct sk_buff* skb, struct net_device* dev) | |||
1372 | /* Move the if_header() code to here. By inserting frame | 1372 | /* Move the if_header() code to here. By inserting frame |
1373 | * relay header in if_header() we would break the | 1373 | * relay header in if_header() we would break the |
1374 | * tcpdump and other packet sniffers */ | 1374 | * tcpdump and other packet sniffers */ |
1375 | chan->fr_header_len = setup_fr_header(&skb,dev,chan->common.usedby); | 1375 | chan->fr_header_len = setup_fr_header(skb,dev,chan->common.usedby); |
1376 | if (chan->fr_header_len < 0 ){ | 1376 | if (chan->fr_header_len < 0 ){ |
1377 | ++chan->ifstats.tx_dropped; | 1377 | ++chan->ifstats.tx_dropped; |
1378 | ++card->wandev.stats.tx_dropped; | 1378 | ++card->wandev.stats.tx_dropped; |
@@ -1597,8 +1597,6 @@ static int setup_for_delayed_transmit(struct net_device* dev, | |||
1597 | return 1; | 1597 | return 1; |
1598 | } | 1598 | } |
1599 | 1599 | ||
1600 | skb_unlink(skb); | ||
1601 | |||
1602 | chan->transmit_length = len; | 1600 | chan->transmit_length = len; |
1603 | chan->delay_skb = skb; | 1601 | chan->delay_skb = skb; |
1604 | 1602 | ||
@@ -4871,18 +4869,15 @@ static void unconfig_fr (sdla_t *card) | |||
4871 | } | 4869 | } |
4872 | } | 4870 | } |
4873 | 4871 | ||
4874 | static int setup_fr_header(struct sk_buff **skb_orig, struct net_device* dev, | 4872 | static int setup_fr_header(struct sk_buff *skb, struct net_device* dev, |
4875 | char op_mode) | 4873 | char op_mode) |
4876 | { | 4874 | { |
4877 | struct sk_buff *skb = *skb_orig; | ||
4878 | fr_channel_t *chan=dev->priv; | 4875 | fr_channel_t *chan=dev->priv; |
4879 | 4876 | ||
4880 | if (op_mode == WANPIPE){ | 4877 | if (op_mode == WANPIPE) { |
4881 | |||
4882 | chan->fr_header[0]=Q922_UI; | 4878 | chan->fr_header[0]=Q922_UI; |
4883 | 4879 | ||
4884 | switch (htons(skb->protocol)){ | 4880 | switch (htons(skb->protocol)){ |
4885 | |||
4886 | case ETH_P_IP: | 4881 | case ETH_P_IP: |
4887 | chan->fr_header[1]=NLPID_IP; | 4882 | chan->fr_header[1]=NLPID_IP; |
4888 | break; | 4883 | break; |
@@ -4894,16 +4889,14 @@ static int setup_fr_header(struct sk_buff **skb_orig, struct net_device* dev, | |||
4894 | } | 4889 | } |
4895 | 4890 | ||
4896 | /* If we are in bridging mode, we must apply | 4891 | /* If we are in bridging mode, we must apply |
4897 | * an Ethernet header */ | 4892 | * an Ethernet header |
4898 | if (op_mode == BRIDGE || op_mode == BRIDGE_NODE){ | 4893 | */ |
4899 | 4894 | if (op_mode == BRIDGE || op_mode == BRIDGE_NODE) { | |
4900 | |||
4901 | /* Encapsulate the packet as a bridged Ethernet frame. */ | 4895 | /* Encapsulate the packet as a bridged Ethernet frame. */ |
4902 | #ifdef DEBUG | 4896 | #ifdef DEBUG |
4903 | printk(KERN_INFO "%s: encapsulating skb for frame relay\n", | 4897 | printk(KERN_INFO "%s: encapsulating skb for frame relay\n", |
4904 | dev->name); | 4898 | dev->name); |
4905 | #endif | 4899 | #endif |
4906 | |||
4907 | chan->fr_header[0] = 0x03; | 4900 | chan->fr_header[0] = 0x03; |
4908 | chan->fr_header[1] = 0x00; | 4901 | chan->fr_header[1] = 0x00; |
4909 | chan->fr_header[2] = 0x80; | 4902 | chan->fr_header[2] = 0x80; |
@@ -4916,7 +4909,6 @@ static int setup_fr_header(struct sk_buff **skb_orig, struct net_device* dev, | |||
4916 | /* Yuck. */ | 4909 | /* Yuck. */ |
4917 | skb->protocol = ETH_P_802_3; | 4910 | skb->protocol = ETH_P_802_3; |
4918 | return 8; | 4911 | return 8; |
4919 | |||
4920 | } | 4912 | } |
4921 | 4913 | ||
4922 | return 0; | 4914 | return 0; |
diff --git a/drivers/net/wan/syncppp.c b/drivers/net/wan/syncppp.c index 84b65c60c799..f58c794a963a 100644 --- a/drivers/net/wan/syncppp.c +++ b/drivers/net/wan/syncppp.c | |||
@@ -1447,7 +1447,7 @@ static void sppp_print_bytes (u_char *p, u16 len) | |||
1447 | * after interrupt servicing to process frames queued via netif_rx. | 1447 | * after interrupt servicing to process frames queued via netif_rx. |
1448 | */ | 1448 | */ |
1449 | 1449 | ||
1450 | static int sppp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *p) | 1450 | static int sppp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *p, struct net_device *orig_dev) |
1451 | { | 1451 | { |
1452 | if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) | 1452 | if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) |
1453 | return NET_RX_DROP; | 1453 | return NET_RX_DROP; |
diff --git a/drivers/scsi/ahci.c b/drivers/scsi/ahci.c index 841f4e2cfe08..179c95c878ac 100644 --- a/drivers/scsi/ahci.c +++ b/drivers/scsi/ahci.c | |||
@@ -1,26 +1,34 @@ | |||
1 | /* | 1 | /* |
2 | * ahci.c - AHCI SATA support | 2 | * ahci.c - AHCI SATA support |
3 | * | 3 | * |
4 | * Copyright 2004 Red Hat, Inc. | 4 | * Maintained by: Jeff Garzik <jgarzik@pobox.com> |
5 | * Please ALWAYS copy linux-ide@vger.kernel.org | ||
6 | * on emails. | ||
5 | * | 7 | * |
6 | * The contents of this file are subject to the Open | 8 | * Copyright 2004-2005 Red Hat, Inc. |
7 | * Software License version 1.1 that can be found at | ||
8 | * http://www.opensource.org/licenses/osl-1.1.txt and is included herein | ||
9 | * by reference. | ||
10 | * | 9 | * |
11 | * Alternatively, the contents of this file may be used under the terms | ||
12 | * of the GNU General Public License version 2 (the "GPL") as distributed | ||
13 | * in the kernel source COPYING file, in which case the provisions of | ||
14 | * the GPL are applicable instead of the above. If you wish to allow | ||
15 | * the use of your version of this file only under the terms of the | ||
16 | * GPL and not to allow others to use your version of this file under | ||
17 | * the OSL, indicate your decision by deleting the provisions above and | ||
18 | * replace them with the notice and other provisions required by the GPL. | ||
19 | * If you do not delete the provisions above, a recipient may use your | ||
20 | * version of this file under either the OSL or the GPL. | ||
21 | * | 10 | * |
22 | * Version 1.0 of the AHCI specification: | 11 | * This program is free software; you can redistribute it and/or modify |
12 | * it under the terms of the GNU General Public License as published by | ||
13 | * the Free Software Foundation; either version 2, or (at your option) | ||
14 | * any later version. | ||
15 | * | ||
16 | * This program is distributed in the hope that it will be useful, | ||
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
19 | * GNU General Public License for more details. | ||
20 | * | ||
21 | * You should have received a copy of the GNU General Public License | ||
22 | * along with this program; see the file COPYING. If not, write to | ||
23 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. | ||
24 | * | ||
25 | * | ||
26 | * libata documentation is available via 'make {ps|pdf}docs', | ||
27 | * as Documentation/DocBook/libata.* | ||
28 | * | ||
29 | * AHCI hardware documentation: | ||
23 | * http://www.intel.com/technology/serialata/pdf/rev1_0.pdf | 30 | * http://www.intel.com/technology/serialata/pdf/rev1_0.pdf |
31 | * http://www.intel.com/technology/serialata/pdf/rev1_1.pdf | ||
24 | * | 32 | * |
25 | */ | 33 | */ |
26 | 34 | ||
diff --git a/drivers/scsi/ata_piix.c b/drivers/scsi/ata_piix.c index 03695616e59e..fb28c1261848 100644 --- a/drivers/scsi/ata_piix.c +++ b/drivers/scsi/ata_piix.c | |||
@@ -1,24 +1,42 @@ | |||
1 | /* | 1 | /* |
2 | 2 | * ata_piix.c - Intel PATA/SATA controllers | |
3 | ata_piix.c - Intel PATA/SATA controllers | 3 | * |
4 | 4 | * Maintained by: Jeff Garzik <jgarzik@pobox.com> | |
5 | Maintained by: Jeff Garzik <jgarzik@pobox.com> | 5 | * Please ALWAYS copy linux-ide@vger.kernel.org |
6 | Please ALWAYS copy linux-ide@vger.kernel.org | 6 | * on emails. |
7 | on emails. | 7 | * |
8 | 8 | * | |
9 | 9 | * Copyright 2003-2005 Red Hat Inc | |
10 | Copyright 2003-2004 Red Hat Inc | 10 | * Copyright 2003-2005 Jeff Garzik |
11 | Copyright 2003-2004 Jeff Garzik | 11 | * |
12 | 12 | * | |
13 | 13 | * Copyright header from piix.c: | |
14 | Copyright header from piix.c: | 14 | * |
15 | 15 | * Copyright (C) 1998-1999 Andrzej Krzysztofowicz, Author and Maintainer | |
16 | Copyright (C) 1998-1999 Andrzej Krzysztofowicz, Author and Maintainer | 16 | * Copyright (C) 1998-2000 Andre Hedrick <andre@linux-ide.org> |
17 | Copyright (C) 1998-2000 Andre Hedrick <andre@linux-ide.org> | 17 | * Copyright (C) 2003 Red Hat Inc <alan@redhat.com> |
18 | Copyright (C) 2003 Red Hat Inc <alan@redhat.com> | 18 | * |
19 | 19 | * | |
20 | May be copied or modified under the terms of the GNU General Public License | 20 | * This program is free software; you can redistribute it and/or modify |
21 | 21 | * it under the terms of the GNU General Public License as published by | |
22 | * the Free Software Foundation; either version 2, or (at your option) | ||
23 | * any later version. | ||
24 | * | ||
25 | * This program is distributed in the hope that it will be useful, | ||
26 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
27 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
28 | * GNU General Public License for more details. | ||
29 | * | ||
30 | * You should have received a copy of the GNU General Public License | ||
31 | * along with this program; see the file COPYING. If not, write to | ||
32 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. | ||
33 | * | ||
34 | * | ||
35 | * libata documentation is available via 'make {ps|pdf}docs', | ||
36 | * as Documentation/DocBook/libata.* | ||
37 | * | ||
38 | * Hardware documentation available at http://developer.intel.com/ | ||
39 | * | ||
22 | */ | 40 | */ |
23 | 41 | ||
24 | #include <linux/kernel.h> | 42 | #include <linux/kernel.h> |
diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c index fe09d145542a..2cb3c8340ca8 100644 --- a/drivers/scsi/ibmvscsi/ibmvscsi.c +++ b/drivers/scsi/ibmvscsi/ibmvscsi.c | |||
@@ -1442,7 +1442,7 @@ static int ibmvscsi_remove(struct vio_dev *vdev) | |||
1442 | */ | 1442 | */ |
1443 | static struct vio_device_id ibmvscsi_device_table[] __devinitdata = { | 1443 | static struct vio_device_id ibmvscsi_device_table[] __devinitdata = { |
1444 | {"vscsi", "IBM,v-scsi"}, | 1444 | {"vscsi", "IBM,v-scsi"}, |
1445 | {0,} | 1445 | { "", "" } |
1446 | }; | 1446 | }; |
1447 | 1447 | ||
1448 | MODULE_DEVICE_TABLE(vio, ibmvscsi_device_table); | 1448 | MODULE_DEVICE_TABLE(vio, ibmvscsi_device_table); |
diff --git a/drivers/scsi/ibmvscsi/rpa_vscsi.c b/drivers/scsi/ibmvscsi/rpa_vscsi.c index 035f615817d7..8bf5652f1060 100644 --- a/drivers/scsi/ibmvscsi/rpa_vscsi.c +++ b/drivers/scsi/ibmvscsi/rpa_vscsi.c | |||
@@ -28,6 +28,7 @@ | |||
28 | */ | 28 | */ |
29 | 29 | ||
30 | #include <asm/vio.h> | 30 | #include <asm/vio.h> |
31 | #include <asm/prom.h> | ||
31 | #include <asm/iommu.h> | 32 | #include <asm/iommu.h> |
32 | #include <asm/hvcall.h> | 33 | #include <asm/hvcall.h> |
33 | #include <linux/dma-mapping.h> | 34 | #include <linux/dma-mapping.h> |
diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c index f15a07f9f471..dee4b12b0342 100644 --- a/drivers/scsi/libata-core.c +++ b/drivers/scsi/libata-core.c | |||
@@ -1,25 +1,35 @@ | |||
1 | /* | 1 | /* |
2 | libata-core.c - helper library for ATA | 2 | * libata-core.c - helper library for ATA |
3 | 3 | * | |
4 | Copyright 2003-2004 Red Hat, Inc. All rights reserved. | 4 | * Maintained by: Jeff Garzik <jgarzik@pobox.com> |
5 | Copyright 2003-2004 Jeff Garzik | 5 | * Please ALWAYS copy linux-ide@vger.kernel.org |
6 | 6 | * on emails. | |
7 | The contents of this file are subject to the Open | 7 | * |
8 | Software License version 1.1 that can be found at | 8 | * Copyright 2003-2004 Red Hat, Inc. All rights reserved. |
9 | http://www.opensource.org/licenses/osl-1.1.txt and is included herein | 9 | * Copyright 2003-2004 Jeff Garzik |
10 | by reference. | 10 | * |
11 | 11 | * | |
12 | Alternatively, the contents of this file may be used under the terms | 12 | * This program is free software; you can redistribute it and/or modify |
13 | of the GNU General Public License version 2 (the "GPL") as distributed | 13 | * it under the terms of the GNU General Public License as published by |
14 | in the kernel source COPYING file, in which case the provisions of | 14 | * the Free Software Foundation; either version 2, or (at your option) |
15 | the GPL are applicable instead of the above. If you wish to allow | 15 | * any later version. |
16 | the use of your version of this file only under the terms of the | 16 | * |
17 | GPL and not to allow others to use your version of this file under | 17 | * This program is distributed in the hope that it will be useful, |
18 | the OSL, indicate your decision by deleting the provisions above and | 18 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
19 | replace them with the notice and other provisions required by the GPL. | 19 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
20 | If you do not delete the provisions above, a recipient may use your | 20 | * GNU General Public License for more details. |
21 | version of this file under either the OSL or the GPL. | 21 | * |
22 | 22 | * You should have received a copy of the GNU General Public License | |
23 | * along with this program; see the file COPYING. If not, write to | ||
24 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. | ||
25 | * | ||
26 | * | ||
27 | * libata documentation is available via 'make {ps|pdf}docs', | ||
28 | * as Documentation/DocBook/libata.* | ||
29 | * | ||
30 | * Hardware documentation available from http://www.t13.org/ and | ||
31 | * http://www.sata-io.org/ | ||
32 | * | ||
23 | */ | 33 | */ |
24 | 34 | ||
25 | #include <linux/config.h> | 35 | #include <linux/config.h> |
diff --git a/drivers/scsi/libata-scsi.c b/drivers/scsi/libata-scsi.c index 4074e7877ba3..346eb36b1e31 100644 --- a/drivers/scsi/libata-scsi.c +++ b/drivers/scsi/libata-scsi.c | |||
@@ -1,25 +1,36 @@ | |||
1 | /* | 1 | /* |
2 | libata-scsi.c - helper library for ATA | 2 | * libata-scsi.c - helper library for ATA |
3 | 3 | * | |
4 | Copyright 2003-2004 Red Hat, Inc. All rights reserved. | 4 | * Maintained by: Jeff Garzik <jgarzik@pobox.com> |
5 | Copyright 2003-2004 Jeff Garzik | 5 | * Please ALWAYS copy linux-ide@vger.kernel.org |
6 | 6 | * on emails. | |
7 | The contents of this file are subject to the Open | 7 | * |
8 | Software License version 1.1 that can be found at | 8 | * Copyright 2003-2004 Red Hat, Inc. All rights reserved. |
9 | http://www.opensource.org/licenses/osl-1.1.txt and is included herein | 9 | * Copyright 2003-2004 Jeff Garzik |
10 | by reference. | 10 | * |
11 | 11 | * | |
12 | Alternatively, the contents of this file may be used under the terms | 12 | * This program is free software; you can redistribute it and/or modify |
13 | of the GNU General Public License version 2 (the "GPL") as distributed | 13 | * it under the terms of the GNU General Public License as published by |
14 | in the kernel source COPYING file, in which case the provisions of | 14 | * the Free Software Foundation; either version 2, or (at your option) |
15 | the GPL are applicable instead of the above. If you wish to allow | 15 | * any later version. |
16 | the use of your version of this file only under the terms of the | 16 | * |
17 | GPL and not to allow others to use your version of this file under | 17 | * This program is distributed in the hope that it will be useful, |
18 | the OSL, indicate your decision by deleting the provisions above and | 18 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
19 | replace them with the notice and other provisions required by the GPL. | 19 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
20 | If you do not delete the provisions above, a recipient may use your | 20 | * GNU General Public License for more details. |
21 | version of this file under either the OSL or the GPL. | 21 | * |
22 | 22 | * You should have received a copy of the GNU General Public License | |
23 | * along with this program; see the file COPYING. If not, write to | ||
24 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. | ||
25 | * | ||
26 | * | ||
27 | * libata documentation is available via 'make {ps|pdf}docs', | ||
28 | * as Documentation/DocBook/libata.* | ||
29 | * | ||
30 | * Hardware documentation available from | ||
31 | * - http://www.t10.org/ | ||
32 | * - http://www.t13.org/ | ||
33 | * | ||
23 | */ | 34 | */ |
24 | 35 | ||
25 | #include <linux/kernel.h> | 36 | #include <linux/kernel.h> |
diff --git a/drivers/scsi/libata.h b/drivers/scsi/libata.h index 620d21772bd6..809c634afbcd 100644 --- a/drivers/scsi/libata.h +++ b/drivers/scsi/libata.h | |||
@@ -1,25 +1,28 @@ | |||
1 | /* | 1 | /* |
2 | libata.h - helper library for ATA | 2 | * libata.h - helper library for ATA |
3 | 3 | * | |
4 | Copyright 2003-2004 Red Hat, Inc. All rights reserved. | 4 | * Copyright 2003-2004 Red Hat, Inc. All rights reserved. |
5 | Copyright 2003-2004 Jeff Garzik | 5 | * Copyright 2003-2004 Jeff Garzik |
6 | 6 | * | |
7 | The contents of this file are subject to the Open | 7 | * |
8 | Software License version 1.1 that can be found at | 8 | * This program is free software; you can redistribute it and/or modify |
9 | http://www.opensource.org/licenses/osl-1.1.txt and is included herein | 9 | * it under the terms of the GNU General Public License as published by |
10 | by reference. | 10 | * the Free Software Foundation; either version 2, or (at your option) |
11 | 11 | * any later version. | |
12 | Alternatively, the contents of this file may be used under the terms | 12 | * |
13 | of the GNU General Public License version 2 (the "GPL") as distributed | 13 | * This program is distributed in the hope that it will be useful, |
14 | in the kernel source COPYING file, in which case the provisions of | 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | the GPL are applicable instead of the above. If you wish to allow | 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
16 | the use of your version of this file only under the terms of the | 16 | * GNU General Public License for more details. |
17 | GPL and not to allow others to use your version of this file under | 17 | * |
18 | the OSL, indicate your decision by deleting the provisions above and | 18 | * You should have received a copy of the GNU General Public License |
19 | replace them with the notice and other provisions required by the GPL. | 19 | * along with this program; see the file COPYING. If not, write to |
20 | If you do not delete the provisions above, a recipient may use your | 20 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. |
21 | version of this file under either the OSL or the GPL. | 21 | * |
22 | 22 | * | |
23 | * libata documentation is available via 'make {ps|pdf}docs', | ||
24 | * as Documentation/DocBook/libata.* | ||
25 | * | ||
23 | */ | 26 | */ |
24 | 27 | ||
25 | #ifndef __LIBATA_H__ | 28 | #ifndef __LIBATA_H__ |
diff --git a/drivers/scsi/sata_nv.c b/drivers/scsi/sata_nv.c index 41a3421b02b4..03d9bc6e69df 100644 --- a/drivers/scsi/sata_nv.c +++ b/drivers/scsi/sata_nv.c | |||
@@ -4,21 +4,31 @@ | |||
4 | * Copyright 2004 NVIDIA Corp. All rights reserved. | 4 | * Copyright 2004 NVIDIA Corp. All rights reserved. |
5 | * Copyright 2004 Andrew Chew | 5 | * Copyright 2004 Andrew Chew |
6 | * | 6 | * |
7 | * The contents of this file are subject to the Open | ||
8 | * Software License version 1.1 that can be found at | ||
9 | * http://www.opensource.org/licenses/osl-1.1.txt and is included herein | ||
10 | * by reference. | ||
11 | * | 7 | * |
12 | * Alternatively, the contents of this file may be used under the terms | 8 | * This program is free software; you can redistribute it and/or modify |
13 | * of the GNU General Public License version 2 (the "GPL") as distributed | 9 | * it under the terms of the GNU General Public License as published by |
14 | * in the kernel source COPYING file, in which case the provisions of | 10 | * the Free Software Foundation; either version 2, or (at your option) |
15 | * the GPL are applicable instead of the above. If you wish to allow | 11 | * any later version. |
16 | * the use of your version of this file only under the terms of the | 12 | * |
17 | * GPL and not to allow others to use your version of this file under | 13 | * This program is distributed in the hope that it will be useful, |
18 | * the OSL, indicate your decision by deleting the provisions above and | 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
19 | * replace them with the notice and other provisions required by the GPL. | 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
20 | * If you do not delete the provisions above, a recipient may use your | 16 | * GNU General Public License for more details. |
21 | * version of this file under either the OSL or the GPL. | 17 | * |
18 | * You should have received a copy of the GNU General Public License | ||
19 | * along with this program; see the file COPYING. If not, write to | ||
20 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. | ||
21 | * | ||
22 | * | ||
23 | * libata documentation is available via 'make {ps|pdf}docs', | ||
24 | * as Documentation/DocBook/libata.* | ||
25 | * | ||
26 | * No hardware documentation available outside of NVIDIA. | ||
27 | * This driver programs the NVIDIA SATA controller in a similar | ||
28 | * fashion as with other PCI IDE BMDMA controllers, with a few | ||
29 | * NV-specific details such as register offsets, SATA phy location, | ||
30 | * hotplug info, etc. | ||
31 | * | ||
22 | * | 32 | * |
23 | * 0.08 | 33 | * 0.08 |
24 | * - Added support for MCP51 and MCP55. | 34 | * - Added support for MCP51 and MCP55. |
diff --git a/drivers/scsi/sata_promise.c b/drivers/scsi/sata_promise.c index b8dc49fed769..7c4f6ecc1cc9 100644 --- a/drivers/scsi/sata_promise.c +++ b/drivers/scsi/sata_promise.c | |||
@@ -7,21 +7,26 @@ | |||
7 | * | 7 | * |
8 | * Copyright 2003-2004 Red Hat, Inc. | 8 | * Copyright 2003-2004 Red Hat, Inc. |
9 | * | 9 | * |
10 | * The contents of this file are subject to the Open | ||
11 | * Software License version 1.1 that can be found at | ||
12 | * http://www.opensource.org/licenses/osl-1.1.txt and is included herein | ||
13 | * by reference. | ||
14 | * | 10 | * |
15 | * Alternatively, the contents of this file may be used under the terms | 11 | * This program is free software; you can redistribute it and/or modify |
16 | * of the GNU General Public License version 2 (the "GPL") as distributed | 12 | * it under the terms of the GNU General Public License as published by |
17 | * in the kernel source COPYING file, in which case the provisions of | 13 | * the Free Software Foundation; either version 2, or (at your option) |
18 | * the GPL are applicable instead of the above. If you wish to allow | 14 | * any later version. |
19 | * the use of your version of this file only under the terms of the | 15 | * |
20 | * GPL and not to allow others to use your version of this file under | 16 | * This program is distributed in the hope that it will be useful, |
21 | * the OSL, indicate your decision by deleting the provisions above and | 17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
22 | * replace them with the notice and other provisions required by the GPL. | 18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
23 | * If you do not delete the provisions above, a recipient may use your | 19 | * GNU General Public License for more details. |
24 | * version of this file under either the OSL or the GPL. | 20 | * |
21 | * You should have received a copy of the GNU General Public License | ||
22 | * along with this program; see the file COPYING. If not, write to | ||
23 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. | ||
24 | * | ||
25 | * | ||
26 | * libata documentation is available via 'make {ps|pdf}docs', | ||
27 | * as Documentation/DocBook/libata.* | ||
28 | * | ||
29 | * Hardware information only available under NDA. | ||
25 | * | 30 | * |
26 | */ | 31 | */ |
27 | 32 | ||
@@ -79,7 +84,8 @@ static irqreturn_t pdc_interrupt (int irq, void *dev_instance, struct pt_regs *r | |||
79 | static void pdc_eng_timeout(struct ata_port *ap); | 84 | static void pdc_eng_timeout(struct ata_port *ap); |
80 | static int pdc_port_start(struct ata_port *ap); | 85 | static int pdc_port_start(struct ata_port *ap); |
81 | static void pdc_port_stop(struct ata_port *ap); | 86 | static void pdc_port_stop(struct ata_port *ap); |
82 | static void pdc_phy_reset(struct ata_port *ap); | 87 | static void pdc_pata_phy_reset(struct ata_port *ap); |
88 | static void pdc_sata_phy_reset(struct ata_port *ap); | ||
83 | static void pdc_qc_prep(struct ata_queued_cmd *qc); | 89 | static void pdc_qc_prep(struct ata_queued_cmd *qc); |
84 | static void pdc_tf_load_mmio(struct ata_port *ap, struct ata_taskfile *tf); | 90 | static void pdc_tf_load_mmio(struct ata_port *ap, struct ata_taskfile *tf); |
85 | static void pdc_exec_command_mmio(struct ata_port *ap, struct ata_taskfile *tf); | 91 | static void pdc_exec_command_mmio(struct ata_port *ap, struct ata_taskfile *tf); |
@@ -106,19 +112,22 @@ static Scsi_Host_Template pdc_ata_sht = { | |||
106 | .ordered_flush = 1, | 112 | .ordered_flush = 1, |
107 | }; | 113 | }; |
108 | 114 | ||
109 | static struct ata_port_operations pdc_ata_ops = { | 115 | static struct ata_port_operations pdc_sata_ops = { |
110 | .port_disable = ata_port_disable, | 116 | .port_disable = ata_port_disable, |
111 | .tf_load = pdc_tf_load_mmio, | 117 | .tf_load = pdc_tf_load_mmio, |
112 | .tf_read = ata_tf_read, | 118 | .tf_read = ata_tf_read, |
113 | .check_status = ata_check_status, | 119 | .check_status = ata_check_status, |
114 | .exec_command = pdc_exec_command_mmio, | 120 | .exec_command = pdc_exec_command_mmio, |
115 | .dev_select = ata_std_dev_select, | 121 | .dev_select = ata_std_dev_select, |
116 | .phy_reset = pdc_phy_reset, | 122 | |
123 | .phy_reset = pdc_sata_phy_reset, | ||
124 | |||
117 | .qc_prep = pdc_qc_prep, | 125 | .qc_prep = pdc_qc_prep, |
118 | .qc_issue = pdc_qc_issue_prot, | 126 | .qc_issue = pdc_qc_issue_prot, |
119 | .eng_timeout = pdc_eng_timeout, | 127 | .eng_timeout = pdc_eng_timeout, |
120 | .irq_handler = pdc_interrupt, | 128 | .irq_handler = pdc_interrupt, |
121 | .irq_clear = pdc_irq_clear, | 129 | .irq_clear = pdc_irq_clear, |
130 | |||
122 | .scr_read = pdc_sata_scr_read, | 131 | .scr_read = pdc_sata_scr_read, |
123 | .scr_write = pdc_sata_scr_write, | 132 | .scr_write = pdc_sata_scr_write, |
124 | .port_start = pdc_port_start, | 133 | .port_start = pdc_port_start, |
@@ -126,6 +135,27 @@ static struct ata_port_operations pdc_ata_ops = { | |||
126 | .host_stop = ata_host_stop, | 135 | .host_stop = ata_host_stop, |
127 | }; | 136 | }; |
128 | 137 | ||
138 | static struct ata_port_operations pdc_pata_ops = { | ||
139 | .port_disable = ata_port_disable, | ||
140 | .tf_load = pdc_tf_load_mmio, | ||
141 | .tf_read = ata_tf_read, | ||
142 | .check_status = ata_check_status, | ||
143 | .exec_command = pdc_exec_command_mmio, | ||
144 | .dev_select = ata_std_dev_select, | ||
145 | |||
146 | .phy_reset = pdc_pata_phy_reset, | ||
147 | |||
148 | .qc_prep = pdc_qc_prep, | ||
149 | .qc_issue = pdc_qc_issue_prot, | ||
150 | .eng_timeout = pdc_eng_timeout, | ||
151 | .irq_handler = pdc_interrupt, | ||
152 | .irq_clear = pdc_irq_clear, | ||
153 | |||
154 | .port_start = pdc_port_start, | ||
155 | .port_stop = pdc_port_stop, | ||
156 | .host_stop = ata_host_stop, | ||
157 | }; | ||
158 | |||
129 | static struct ata_port_info pdc_port_info[] = { | 159 | static struct ata_port_info pdc_port_info[] = { |
130 | /* board_2037x */ | 160 | /* board_2037x */ |
131 | { | 161 | { |
@@ -135,7 +165,7 @@ static struct ata_port_info pdc_port_info[] = { | |||
135 | .pio_mask = 0x1f, /* pio0-4 */ | 165 | .pio_mask = 0x1f, /* pio0-4 */ |
136 | .mwdma_mask = 0x07, /* mwdma0-2 */ | 166 | .mwdma_mask = 0x07, /* mwdma0-2 */ |
137 | .udma_mask = 0x7f, /* udma0-6 ; FIXME */ | 167 | .udma_mask = 0x7f, /* udma0-6 ; FIXME */ |
138 | .port_ops = &pdc_ata_ops, | 168 | .port_ops = &pdc_sata_ops, |
139 | }, | 169 | }, |
140 | 170 | ||
141 | /* board_20319 */ | 171 | /* board_20319 */ |
@@ -146,7 +176,7 @@ static struct ata_port_info pdc_port_info[] = { | |||
146 | .pio_mask = 0x1f, /* pio0-4 */ | 176 | .pio_mask = 0x1f, /* pio0-4 */ |
147 | .mwdma_mask = 0x07, /* mwdma0-2 */ | 177 | .mwdma_mask = 0x07, /* mwdma0-2 */ |
148 | .udma_mask = 0x7f, /* udma0-6 ; FIXME */ | 178 | .udma_mask = 0x7f, /* udma0-6 ; FIXME */ |
149 | .port_ops = &pdc_ata_ops, | 179 | .port_ops = &pdc_sata_ops, |
150 | }, | 180 | }, |
151 | 181 | ||
152 | /* board_20619 */ | 182 | /* board_20619 */ |
@@ -157,7 +187,7 @@ static struct ata_port_info pdc_port_info[] = { | |||
157 | .pio_mask = 0x1f, /* pio0-4 */ | 187 | .pio_mask = 0x1f, /* pio0-4 */ |
158 | .mwdma_mask = 0x07, /* mwdma0-2 */ | 188 | .mwdma_mask = 0x07, /* mwdma0-2 */ |
159 | .udma_mask = 0x7f, /* udma0-6 ; FIXME */ | 189 | .udma_mask = 0x7f, /* udma0-6 ; FIXME */ |
160 | .port_ops = &pdc_ata_ops, | 190 | .port_ops = &pdc_pata_ops, |
161 | }, | 191 | }, |
162 | }; | 192 | }; |
163 | 193 | ||
@@ -272,12 +302,23 @@ static void pdc_reset_port(struct ata_port *ap) | |||
272 | readl(mmio); /* flush */ | 302 | readl(mmio); /* flush */ |
273 | } | 303 | } |
274 | 304 | ||
275 | static void pdc_phy_reset(struct ata_port *ap) | 305 | static void pdc_sata_phy_reset(struct ata_port *ap) |
276 | { | 306 | { |
277 | pdc_reset_port(ap); | 307 | pdc_reset_port(ap); |
278 | sata_phy_reset(ap); | 308 | sata_phy_reset(ap); |
279 | } | 309 | } |
280 | 310 | ||
311 | static void pdc_pata_phy_reset(struct ata_port *ap) | ||
312 | { | ||
313 | /* FIXME: add cable detect. Don't assume 40-pin cable */ | ||
314 | ap->cbl = ATA_CBL_PATA40; | ||
315 | ap->udma_mask &= ATA_UDMA_MASK_40C; | ||
316 | |||
317 | pdc_reset_port(ap); | ||
318 | ata_port_probe(ap); | ||
319 | ata_bus_reset(ap); | ||
320 | } | ||
321 | |||
281 | static u32 pdc_sata_scr_read (struct ata_port *ap, unsigned int sc_reg) | 322 | static u32 pdc_sata_scr_read (struct ata_port *ap, unsigned int sc_reg) |
282 | { | 323 | { |
283 | if (sc_reg > SCR_CONTROL) | 324 | if (sc_reg > SCR_CONTROL) |
diff --git a/drivers/scsi/sata_promise.h b/drivers/scsi/sata_promise.h index 6e7e96b9ee13..6ee5e190262d 100644 --- a/drivers/scsi/sata_promise.h +++ b/drivers/scsi/sata_promise.h | |||
@@ -3,21 +3,24 @@ | |||
3 | * | 3 | * |
4 | * Copyright 2003-2004 Red Hat, Inc. | 4 | * Copyright 2003-2004 Red Hat, Inc. |
5 | * | 5 | * |
6 | * The contents of this file are subject to the Open | ||
7 | * Software License version 1.1 that can be found at | ||
8 | * http://www.opensource.org/licenses/osl-1.1.txt and is included herein | ||
9 | * by reference. | ||
10 | * | 6 | * |
11 | * Alternatively, the contents of this file may be used under the terms | 7 | * This program is free software; you can redistribute it and/or modify |
12 | * of the GNU General Public License version 2 (the "GPL") as distributed | 8 | * it under the terms of the GNU General Public License as published by |
13 | * in the kernel source COPYING file, in which case the provisions of | 9 | * the Free Software Foundation; either version 2, or (at your option) |
14 | * the GPL are applicable instead of the above. If you wish to allow | 10 | * any later version. |
15 | * the use of your version of this file only under the terms of the | 11 | * |
16 | * GPL and not to allow others to use your version of this file under | 12 | * This program is distributed in the hope that it will be useful, |
17 | * the OSL, indicate your decision by deleting the provisions above and | 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
18 | * replace them with the notice and other provisions required by the GPL. | 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
19 | * If you do not delete the provisions above, a recipient may use your | 15 | * GNU General Public License for more details. |
20 | * version of this file under either the OSL or the GPL. | 16 | * |
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program; see the file COPYING. If not, write to | ||
19 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. | ||
20 | * | ||
21 | * | ||
22 | * libata documentation is available via 'make {ps|pdf}docs', | ||
23 | * as Documentation/DocBook/libata.* | ||
21 | * | 24 | * |
22 | */ | 25 | */ |
23 | 26 | ||
diff --git a/drivers/scsi/sata_qstor.c b/drivers/scsi/sata_qstor.c index 93fd06fb4f15..9c99ab433bd3 100644 --- a/drivers/scsi/sata_qstor.c +++ b/drivers/scsi/sata_qstor.c | |||
@@ -6,21 +6,24 @@ | |||
6 | * Copyright 2005 Pacific Digital Corporation. | 6 | * Copyright 2005 Pacific Digital Corporation. |
7 | * (OSL/GPL code release authorized by Jalil Fadavi). | 7 | * (OSL/GPL code release authorized by Jalil Fadavi). |
8 | * | 8 | * |
9 | * The contents of this file are subject to the Open | ||
10 | * Software License version 1.1 that can be found at | ||
11 | * http://www.opensource.org/licenses/osl-1.1.txt and is included herein | ||
12 | * by reference. | ||
13 | * | 9 | * |
14 | * Alternatively, the contents of this file may be used under the terms | 10 | * This program is free software; you can redistribute it and/or modify |
15 | * of the GNU General Public License version 2 (the "GPL") as distributed | 11 | * it under the terms of the GNU General Public License as published by |
16 | * in the kernel source COPYING file, in which case the provisions of | 12 | * the Free Software Foundation; either version 2, or (at your option) |
17 | * the GPL are applicable instead of the above. If you wish to allow | 13 | * any later version. |
18 | * the use of your version of this file only under the terms of the | 14 | * |
19 | * GPL and not to allow others to use your version of this file under | 15 | * This program is distributed in the hope that it will be useful, |
20 | * the OSL, indicate your decision by deleting the provisions above and | 16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
21 | * replace them with the notice and other provisions required by the GPL. | 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
22 | * If you do not delete the provisions above, a recipient may use your | 18 | * GNU General Public License for more details. |
23 | * version of this file under either the OSL or the GPL. | 19 | * |
20 | * You should have received a copy of the GNU General Public License | ||
21 | * along with this program; see the file COPYING. If not, write to | ||
22 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. | ||
23 | * | ||
24 | * | ||
25 | * libata documentation is available via 'make {ps|pdf}docs', | ||
26 | * as Documentation/DocBook/libata.* | ||
24 | * | 27 | * |
25 | */ | 28 | */ |
26 | 29 | ||
diff --git a/drivers/scsi/sata_sil.c b/drivers/scsi/sata_sil.c index 9d24d6c328b4..71d49548f0a3 100644 --- a/drivers/scsi/sata_sil.c +++ b/drivers/scsi/sata_sil.c | |||
@@ -5,24 +5,27 @@ | |||
5 | * Please ALWAYS copy linux-ide@vger.kernel.org | 5 | * Please ALWAYS copy linux-ide@vger.kernel.org |
6 | * on emails. | 6 | * on emails. |
7 | * | 7 | * |
8 | * Copyright 2003 Red Hat, Inc. | 8 | * Copyright 2003-2005 Red Hat, Inc. |
9 | * Copyright 2003 Benjamin Herrenschmidt | 9 | * Copyright 2003 Benjamin Herrenschmidt |
10 | * | 10 | * |
11 | * The contents of this file are subject to the Open | ||
12 | * Software License version 1.1 that can be found at | ||
13 | * http://www.opensource.org/licenses/osl-1.1.txt and is included herein | ||
14 | * by reference. | ||
15 | * | 11 | * |
16 | * Alternatively, the contents of this file may be used under the terms | 12 | * This program is free software; you can redistribute it and/or modify |
17 | * of the GNU General Public License version 2 (the "GPL") as distributed | 13 | * it under the terms of the GNU General Public License as published by |
18 | * in the kernel source COPYING file, in which case the provisions of | 14 | * the Free Software Foundation; either version 2, or (at your option) |
19 | * the GPL are applicable instead of the above. If you wish to allow | 15 | * any later version. |
20 | * the use of your version of this file only under the terms of the | 16 | * |
21 | * GPL and not to allow others to use your version of this file under | 17 | * This program is distributed in the hope that it will be useful, |
22 | * the OSL, indicate your decision by deleting the provisions above and | 18 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
23 | * replace them with the notice and other provisions required by the GPL. | 19 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
24 | * If you do not delete the provisions above, a recipient may use your | 20 | * GNU General Public License for more details. |
25 | * version of this file under either the OSL or the GPL. | 21 | * |
22 | * You should have received a copy of the GNU General Public License | ||
23 | * along with this program; see the file COPYING. If not, write to | ||
24 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. | ||
25 | * | ||
26 | * | ||
27 | * libata documentation is available via 'make {ps|pdf}docs', | ||
28 | * as Documentation/DocBook/libata.* | ||
26 | * | 29 | * |
27 | * Documentation for SiI 3112: | 30 | * Documentation for SiI 3112: |
28 | * http://gkernel.sourceforge.net/specs/sii/3112A_SiI-DS-0095-B2.pdf.bz2 | 31 | * http://gkernel.sourceforge.net/specs/sii/3112A_SiI-DS-0095-B2.pdf.bz2 |
diff --git a/drivers/scsi/sata_sis.c b/drivers/scsi/sata_sis.c index b250ae0c7773..43af445b3ad2 100644 --- a/drivers/scsi/sata_sis.c +++ b/drivers/scsi/sata_sis.c | |||
@@ -7,21 +7,26 @@ | |||
7 | * | 7 | * |
8 | * Copyright 2004 Uwe Koziolek | 8 | * Copyright 2004 Uwe Koziolek |
9 | * | 9 | * |
10 | * The contents of this file are subject to the Open | ||
11 | * Software License version 1.1 that can be found at | ||
12 | * http://www.opensource.org/licenses/osl-1.1.txt and is included herein | ||
13 | * by reference. | ||
14 | * | 10 | * |
15 | * Alternatively, the contents of this file may be used under the terms | 11 | * This program is free software; you can redistribute it and/or modify |
16 | * of the GNU General Public License version 2 (the "GPL") as distributed | 12 | * it under the terms of the GNU General Public License as published by |
17 | * in the kernel source COPYING file, in which case the provisions of | 13 | * the Free Software Foundation; either version 2, or (at your option) |
18 | * the GPL are applicable instead of the above. If you wish to allow | 14 | * any later version. |
19 | * the use of your version of this file only under the terms of the | 15 | * |
20 | * GPL and not to allow others to use your version of this file under | 16 | * This program is distributed in the hope that it will be useful, |
21 | * the OSL, indicate your decision by deleting the provisions above and | 17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
22 | * replace them with the notice and other provisions required by the GPL. | 18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
23 | * If you do not delete the provisions above, a recipient may use your | 19 | * GNU General Public License for more details. |
24 | * version of this file under either the OSL or the GPL. | 20 | * |
21 | * You should have received a copy of the GNU General Public License | ||
22 | * along with this program; see the file COPYING. If not, write to | ||
23 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. | ||
24 | * | ||
25 | * | ||
26 | * libata documentation is available via 'make {ps|pdf}docs', | ||
27 | * as Documentation/DocBook/libata.* | ||
28 | * | ||
29 | * Hardware documentation available under NDA. | ||
25 | * | 30 | * |
26 | */ | 31 | */ |
27 | 32 | ||
diff --git a/drivers/scsi/sata_svw.c b/drivers/scsi/sata_svw.c index 6fd2ce1ffcd8..19d3bb3b0fb6 100644 --- a/drivers/scsi/sata_svw.c +++ b/drivers/scsi/sata_svw.c | |||
@@ -13,21 +13,26 @@ | |||
13 | * This driver probably works with non-Apple versions of the | 13 | * This driver probably works with non-Apple versions of the |
14 | * Broadcom chipset... | 14 | * Broadcom chipset... |
15 | * | 15 | * |
16 | * The contents of this file are subject to the Open | ||
17 | * Software License version 1.1 that can be found at | ||
18 | * http://www.opensource.org/licenses/osl-1.1.txt and is included herein | ||
19 | * by reference. | ||
20 | * | 16 | * |
21 | * Alternatively, the contents of this file may be used under the terms | 17 | * This program is free software; you can redistribute it and/or modify |
22 | * of the GNU General Public License version 2 (the "GPL") as distributed | 18 | * it under the terms of the GNU General Public License as published by |
23 | * in the kernel source COPYING file, in which case the provisions of | 19 | * the Free Software Foundation; either version 2, or (at your option) |
24 | * the GPL are applicable instead of the above. If you wish to allow | 20 | * any later version. |
25 | * the use of your version of this file only under the terms of the | 21 | * |
26 | * GPL and not to allow others to use your version of this file under | 22 | * This program is distributed in the hope that it will be useful, |
27 | * the OSL, indicate your decision by deleting the provisions above and | 23 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
28 | * replace them with the notice and other provisions required by the GPL. | 24 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
29 | * If you do not delete the provisions above, a recipient may use your | 25 | * GNU General Public License for more details. |
30 | * version of this file under either the OSL or the GPL. | 26 | * |
27 | * You should have received a copy of the GNU General Public License | ||
28 | * along with this program; see the file COPYING. If not, write to | ||
29 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. | ||
30 | * | ||
31 | * | ||
32 | * libata documentation is available via 'make {ps|pdf}docs', | ||
33 | * as Documentation/DocBook/libata.* | ||
34 | * | ||
35 | * Hardware documentation available under NDA. | ||
31 | * | 36 | * |
32 | */ | 37 | */ |
33 | 38 | ||
diff --git a/drivers/scsi/sata_sx4.c b/drivers/scsi/sata_sx4.c index a20d4285090a..c72fcc46f0fa 100644 --- a/drivers/scsi/sata_sx4.c +++ b/drivers/scsi/sata_sx4.c | |||
@@ -7,21 +7,26 @@ | |||
7 | * | 7 | * |
8 | * Copyright 2003-2004 Red Hat, Inc. | 8 | * Copyright 2003-2004 Red Hat, Inc. |
9 | * | 9 | * |
10 | * The contents of this file are subject to the Open | ||
11 | * Software License version 1.1 that can be found at | ||
12 | * http://www.opensource.org/licenses/osl-1.1.txt and is included herein | ||
13 | * by reference. | ||
14 | * | 10 | * |
15 | * Alternatively, the contents of this file may be used under the terms | 11 | * This program is free software; you can redistribute it and/or modify |
16 | * of the GNU General Public License version 2 (the "GPL") as distributed | 12 | * it under the terms of the GNU General Public License as published by |
17 | * in the kernel source COPYING file, in which case the provisions of | 13 | * the Free Software Foundation; either version 2, or (at your option) |
18 | * the GPL are applicable instead of the above. If you wish to allow | 14 | * any later version. |
19 | * the use of your version of this file only under the terms of the | 15 | * |
20 | * GPL and not to allow others to use your version of this file under | 16 | * This program is distributed in the hope that it will be useful, |
21 | * the OSL, indicate your decision by deleting the provisions above and | 17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
22 | * replace them with the notice and other provisions required by the GPL. | 18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
23 | * If you do not delete the provisions above, a recipient may use your | 19 | * GNU General Public License for more details. |
24 | * version of this file under either the OSL or the GPL. | 20 | * |
21 | * You should have received a copy of the GNU General Public License | ||
22 | * along with this program; see the file COPYING. If not, write to | ||
23 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. | ||
24 | * | ||
25 | * | ||
26 | * libata documentation is available via 'make {ps|pdf}docs', | ||
27 | * as Documentation/DocBook/libata.* | ||
28 | * | ||
29 | * Hardware documentation available under NDA. | ||
25 | * | 30 | * |
26 | */ | 31 | */ |
27 | 32 | ||
diff --git a/drivers/scsi/sata_uli.c b/drivers/scsi/sata_uli.c index eb202a73bc0e..1566886815fb 100644 --- a/drivers/scsi/sata_uli.c +++ b/drivers/scsi/sata_uli.c | |||
@@ -1,21 +1,26 @@ | |||
1 | /* | 1 | /* |
2 | * sata_uli.c - ULi Electronics SATA | 2 | * sata_uli.c - ULi Electronics SATA |
3 | * | 3 | * |
4 | * The contents of this file are subject to the Open | ||
5 | * Software License version 1.1 that can be found at | ||
6 | * http://www.opensource.org/licenses/osl-1.1.txt and is included herein | ||
7 | * by reference. | ||
8 | * | 4 | * |
9 | * Alternatively, the contents of this file may be used under the terms | 5 | * This program is free software; you can redistribute it and/or modify |
10 | * of the GNU General Public License version 2 (the "GPL") as distributed | 6 | * it under the terms of the GNU General Public License as published by |
11 | * in the kernel source COPYING file, in which case the provisions of | 7 | * the Free Software Foundation; either version 2, or (at your option) |
12 | * the GPL are applicable instead of the above. If you wish to allow | 8 | * any later version. |
13 | * the use of your version of this file only under the terms of the | 9 | * |
14 | * GPL and not to allow others to use your version of this file under | 10 | * This program is distributed in the hope that it will be useful, |
15 | * the OSL, indicate your decision by deleting the provisions above and | 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | * replace them with the notice and other provisions required by the GPL. | 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
17 | * If you do not delete the provisions above, a recipient may use your | 13 | * GNU General Public License for more details. |
18 | * version of this file under either the OSL or the GPL. | 14 | * |
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program; see the file COPYING. If not, write to | ||
17 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. | ||
18 | * | ||
19 | * | ||
20 | * libata documentation is available via 'make {ps|pdf}docs', | ||
21 | * as Documentation/DocBook/libata.* | ||
22 | * | ||
23 | * Hardware documentation available under NDA. | ||
19 | * | 24 | * |
20 | */ | 25 | */ |
21 | 26 | ||
diff --git a/drivers/scsi/sata_via.c b/drivers/scsi/sata_via.c index feff10980487..128b996b07b7 100644 --- a/drivers/scsi/sata_via.c +++ b/drivers/scsi/sata_via.c | |||
@@ -1,34 +1,38 @@ | |||
1 | /* | 1 | /* |
2 | sata_via.c - VIA Serial ATA controllers | 2 | * sata_via.c - VIA Serial ATA controllers |
3 | 3 | * | |
4 | Maintained by: Jeff Garzik <jgarzik@pobox.com> | 4 | * Maintained by: Jeff Garzik <jgarzik@pobox.com> |
5 | Please ALWAYS copy linux-ide@vger.kernel.org | 5 | * Please ALWAYS copy linux-ide@vger.kernel.org |
6 | on emails. | 6 | on emails. |
7 | 7 | * | |
8 | Copyright 2003-2004 Red Hat, Inc. All rights reserved. | 8 | * Copyright 2003-2004 Red Hat, Inc. All rights reserved. |
9 | Copyright 2003-2004 Jeff Garzik | 9 | * Copyright 2003-2004 Jeff Garzik |
10 | 10 | * | |
11 | The contents of this file are subject to the Open | 11 | * |
12 | Software License version 1.1 that can be found at | 12 | * This program is free software; you can redistribute it and/or modify |
13 | http://www.opensource.org/licenses/osl-1.1.txt and is included herein | 13 | * it under the terms of the GNU General Public License as published by |
14 | by reference. | 14 | * the Free Software Foundation; either version 2, or (at your option) |
15 | 15 | * any later version. | |
16 | Alternatively, the contents of this file may be used under the terms | 16 | * |
17 | of the GNU General Public License version 2 (the "GPL") as distributed | 17 | * This program is distributed in the hope that it will be useful, |
18 | in the kernel source COPYING file, in which case the provisions of | 18 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
19 | the GPL are applicable instead of the above. If you wish to allow | 19 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
20 | the use of your version of this file only under the terms of the | 20 | * GNU General Public License for more details. |
21 | GPL and not to allow others to use your version of this file under | 21 | * |
22 | the OSL, indicate your decision by deleting the provisions above and | 22 | * You should have received a copy of the GNU General Public License |
23 | replace them with the notice and other provisions required by the GPL. | 23 | * along with this program; see the file COPYING. If not, write to |
24 | If you do not delete the provisions above, a recipient may use your | 24 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. |
25 | version of this file under either the OSL or the GPL. | 25 | * |
26 | 26 | * | |
27 | ---------------------------------------------------------------------- | 27 | * libata documentation is available via 'make {ps|pdf}docs', |
28 | 28 | * as Documentation/DocBook/libata.* | |
29 | To-do list: | 29 | * |
30 | * VT6421 PATA support | 30 | * Hardware documentation available under NDA. |
31 | 31 | * | |
32 | * | ||
33 | * To-do list: | ||
34 | * - VT6421 PATA support | ||
35 | * | ||
32 | */ | 36 | */ |
33 | 37 | ||
34 | #include <linux/kernel.h> | 38 | #include <linux/kernel.h> |
diff --git a/drivers/scsi/sata_vsc.c b/drivers/scsi/sata_vsc.c index 6f2562171be0..3985f344da4d 100644 --- a/drivers/scsi/sata_vsc.c +++ b/drivers/scsi/sata_vsc.c | |||
@@ -9,9 +9,29 @@ | |||
9 | * | 9 | * |
10 | * Bits from Jeff Garzik, Copyright RedHat, Inc. | 10 | * Bits from Jeff Garzik, Copyright RedHat, Inc. |
11 | * | 11 | * |
12 | * This file is subject to the terms and conditions of the GNU General Public | 12 | * |
13 | * License. See the file "COPYING" in the main directory of this archive | 13 | * This program is free software; you can redistribute it and/or modify |
14 | * for more details. | 14 | * it under the terms of the GNU General Public License as published by |
15 | * the Free Software Foundation; either version 2, or (at your option) | ||
16 | * any later version. | ||
17 | * | ||
18 | * This program is distributed in the hope that it will be useful, | ||
19 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
20 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
21 | * GNU General Public License for more details. | ||
22 | * | ||
23 | * You should have received a copy of the GNU General Public License | ||
24 | * along with this program; see the file COPYING. If not, write to | ||
25 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. | ||
26 | * | ||
27 | * | ||
28 | * libata documentation is available via 'make {ps|pdf}docs', | ||
29 | * as Documentation/DocBook/libata.* | ||
30 | * | ||
31 | * Vitesse hardware documentation presumably available under NDA. | ||
32 | * Intel 31244 (same hardware interface) documentation presumably | ||
33 | * available from http://developer.intel.com/ | ||
34 | * | ||
15 | */ | 35 | */ |
16 | 36 | ||
17 | #include <linux/kernel.h> | 37 | #include <linux/kernel.h> |
diff --git a/drivers/usb/net/usbnet.c b/drivers/usb/net/usbnet.c index 4528a00c45b0..a2f67245f6da 100644 --- a/drivers/usb/net/usbnet.c +++ b/drivers/usb/net/usbnet.c | |||
@@ -2903,19 +2903,18 @@ static struct net_device_stats *usbnet_get_stats (struct net_device *net) | |||
2903 | * completion callbacks. 2.5 should have fixed those bugs... | 2903 | * completion callbacks. 2.5 should have fixed those bugs... |
2904 | */ | 2904 | */ |
2905 | 2905 | ||
2906 | static void defer_bh (struct usbnet *dev, struct sk_buff *skb) | 2906 | static void defer_bh(struct usbnet *dev, struct sk_buff *skb, struct sk_buff_head *list) |
2907 | { | 2907 | { |
2908 | struct sk_buff_head *list = skb->list; | ||
2909 | unsigned long flags; | 2908 | unsigned long flags; |
2910 | 2909 | ||
2911 | spin_lock_irqsave (&list->lock, flags); | 2910 | spin_lock_irqsave(&list->lock, flags); |
2912 | __skb_unlink (skb, list); | 2911 | __skb_unlink(skb, list); |
2913 | spin_unlock (&list->lock); | 2912 | spin_unlock(&list->lock); |
2914 | spin_lock (&dev->done.lock); | 2913 | spin_lock(&dev->done.lock); |
2915 | __skb_queue_tail (&dev->done, skb); | 2914 | __skb_queue_tail(&dev->done, skb); |
2916 | if (dev->done.qlen == 1) | 2915 | if (dev->done.qlen == 1) |
2917 | tasklet_schedule (&dev->bh); | 2916 | tasklet_schedule(&dev->bh); |
2918 | spin_unlock_irqrestore (&dev->done.lock, flags); | 2917 | spin_unlock_irqrestore(&dev->done.lock, flags); |
2919 | } | 2918 | } |
2920 | 2919 | ||
2921 | /* some work can't be done in tasklets, so we use keventd | 2920 | /* some work can't be done in tasklets, so we use keventd |
@@ -3120,7 +3119,7 @@ block: | |||
3120 | break; | 3119 | break; |
3121 | } | 3120 | } |
3122 | 3121 | ||
3123 | defer_bh (dev, skb); | 3122 | defer_bh(dev, skb, &dev->rxq); |
3124 | 3123 | ||
3125 | if (urb) { | 3124 | if (urb) { |
3126 | if (netif_running (dev->net) | 3125 | if (netif_running (dev->net) |
@@ -3490,7 +3489,7 @@ static void tx_complete (struct urb *urb, struct pt_regs *regs) | |||
3490 | 3489 | ||
3491 | urb->dev = NULL; | 3490 | urb->dev = NULL; |
3492 | entry->state = tx_done; | 3491 | entry->state = tx_done; |
3493 | defer_bh (dev, skb); | 3492 | defer_bh(dev, skb, &dev->txq); |
3494 | } | 3493 | } |
3495 | 3494 | ||
3496 | /*-------------------------------------------------------------------------*/ | 3495 | /*-------------------------------------------------------------------------*/ |
diff --git a/drivers/w1/w1_int.c b/drivers/w1/w1_int.c index b5a5e04b6d37..498ad505fa5f 100644 --- a/drivers/w1/w1_int.c +++ b/drivers/w1/w1_int.c | |||
@@ -86,9 +86,9 @@ static struct w1_master * w1_alloc_dev(u32 id, int slave_count, int slave_ttl, | |||
86 | 86 | ||
87 | dev->driver = driver; | 87 | dev->driver = driver; |
88 | 88 | ||
89 | dev->groups = 23; | 89 | dev->groups = 1; |
90 | dev->seq = 1; | 90 | dev->seq = 1; |
91 | dev->nls = netlink_kernel_create(NETLINK_W1, NULL); | 91 | dev->nls = netlink_kernel_create(NETLINK_W1, 1, NULL, THIS_MODULE); |
92 | if (!dev->nls) { | 92 | if (!dev->nls) { |
93 | printk(KERN_ERR "Failed to create new netlink socket(%u) for w1 master %s.\n", | 93 | printk(KERN_ERR "Failed to create new netlink socket(%u) for w1 master %s.\n", |
94 | NETLINK_NFLOG, dev->dev.bus_id); | 94 | NETLINK_NFLOG, dev->dev.bus_id); |
@@ -225,3 +225,5 @@ void w1_remove_master_device(struct w1_bus_master *bm) | |||
225 | 225 | ||
226 | EXPORT_SYMBOL(w1_add_master_device); | 226 | EXPORT_SYMBOL(w1_add_master_device); |
227 | EXPORT_SYMBOL(w1_remove_master_device); | 227 | EXPORT_SYMBOL(w1_remove_master_device); |
228 | |||
229 | MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_W1); | ||
diff --git a/drivers/w1/w1_netlink.c b/drivers/w1/w1_netlink.c index 2a82fb055c70..e7b774423dd6 100644 --- a/drivers/w1/w1_netlink.c +++ b/drivers/w1/w1_netlink.c | |||
@@ -51,7 +51,7 @@ void w1_netlink_send(struct w1_master *dev, struct w1_netlink_msg *msg) | |||
51 | 51 | ||
52 | memcpy(data, msg, sizeof(struct w1_netlink_msg)); | 52 | memcpy(data, msg, sizeof(struct w1_netlink_msg)); |
53 | 53 | ||
54 | NETLINK_CB(skb).dst_groups = dev->groups; | 54 | NETLINK_CB(skb).dst_group = dev->groups; |
55 | netlink_broadcast(dev->nls, skb, 0, dev->groups, GFP_ATOMIC); | 55 | netlink_broadcast(dev->nls, skb, 0, dev->groups, GFP_ATOMIC); |
56 | 56 | ||
57 | nlmsg_failure: | 57 | nlmsg_failure: |
diff --git a/fs/smbfs/sock.c b/fs/smbfs/sock.c index 93f3cd22a2e9..6815b1b12b68 100644 --- a/fs/smbfs/sock.c +++ b/fs/smbfs/sock.c | |||
@@ -15,12 +15,12 @@ | |||
15 | #include <linux/file.h> | 15 | #include <linux/file.h> |
16 | #include <linux/in.h> | 16 | #include <linux/in.h> |
17 | #include <linux/net.h> | 17 | #include <linux/net.h> |
18 | #include <linux/tcp.h> | ||
19 | #include <linux/mm.h> | 18 | #include <linux/mm.h> |
20 | #include <linux/netdevice.h> | 19 | #include <linux/netdevice.h> |
21 | #include <linux/smp_lock.h> | 20 | #include <linux/smp_lock.h> |
22 | #include <linux/workqueue.h> | 21 | #include <linux/workqueue.h> |
23 | #include <net/scm.h> | 22 | #include <net/scm.h> |
23 | #include <net/tcp_states.h> | ||
24 | #include <net/ip.h> | 24 | #include <net/ip.h> |
25 | 25 | ||
26 | #include <linux/smb_fs.h> | 26 | #include <linux/smb_fs.h> |
diff --git a/include/asm-alpha/socket.h b/include/asm-alpha/socket.h index d00259d3dc78..b5193229132a 100644 --- a/include/asm-alpha/socket.h +++ b/include/asm-alpha/socket.h | |||
@@ -25,6 +25,8 @@ | |||
25 | #define SO_ERROR 0x1007 | 25 | #define SO_ERROR 0x1007 |
26 | #define SO_SNDBUF 0x1001 | 26 | #define SO_SNDBUF 0x1001 |
27 | #define SO_RCVBUF 0x1002 | 27 | #define SO_RCVBUF 0x1002 |
28 | #define SO_SNDBUFFORCE 0x100a | ||
29 | #define SO_RCVBUFFORCE 0x100b | ||
28 | #define SO_RCVLOWAT 0x1010 | 30 | #define SO_RCVLOWAT 0x1010 |
29 | #define SO_SNDLOWAT 0x1011 | 31 | #define SO_SNDLOWAT 0x1011 |
30 | #define SO_RCVTIMEO 0x1012 | 32 | #define SO_RCVTIMEO 0x1012 |
diff --git a/include/asm-arm/socket.h b/include/asm-arm/socket.h index 46d20585d951..3c51da6438c9 100644 --- a/include/asm-arm/socket.h +++ b/include/asm-arm/socket.h | |||
@@ -14,6 +14,8 @@ | |||
14 | #define SO_BROADCAST 6 | 14 | #define SO_BROADCAST 6 |
15 | #define SO_SNDBUF 7 | 15 | #define SO_SNDBUF 7 |
16 | #define SO_RCVBUF 8 | 16 | #define SO_RCVBUF 8 |
17 | #define SO_SNDBUFFORCE 32 | ||
18 | #define SO_RCVBUFFORCE 33 | ||
17 | #define SO_KEEPALIVE 9 | 19 | #define SO_KEEPALIVE 9 |
18 | #define SO_OOBINLINE 10 | 20 | #define SO_OOBINLINE 10 |
19 | #define SO_NO_CHECK 11 | 21 | #define SO_NO_CHECK 11 |
diff --git a/include/asm-arm26/socket.h b/include/asm-arm26/socket.h index 46d20585d951..3c51da6438c9 100644 --- a/include/asm-arm26/socket.h +++ b/include/asm-arm26/socket.h | |||
@@ -14,6 +14,8 @@ | |||
14 | #define SO_BROADCAST 6 | 14 | #define SO_BROADCAST 6 |
15 | #define SO_SNDBUF 7 | 15 | #define SO_SNDBUF 7 |
16 | #define SO_RCVBUF 8 | 16 | #define SO_RCVBUF 8 |
17 | #define SO_SNDBUFFORCE 32 | ||
18 | #define SO_RCVBUFFORCE 33 | ||
17 | #define SO_KEEPALIVE 9 | 19 | #define SO_KEEPALIVE 9 |
18 | #define SO_OOBINLINE 10 | 20 | #define SO_OOBINLINE 10 |
19 | #define SO_NO_CHECK 11 | 21 | #define SO_NO_CHECK 11 |
diff --git a/include/asm-cris/socket.h b/include/asm-cris/socket.h index f159b4f165f7..8b1da3e58c55 100644 --- a/include/asm-cris/socket.h +++ b/include/asm-cris/socket.h | |||
@@ -16,6 +16,8 @@ | |||
16 | #define SO_BROADCAST 6 | 16 | #define SO_BROADCAST 6 |
17 | #define SO_SNDBUF 7 | 17 | #define SO_SNDBUF 7 |
18 | #define SO_RCVBUF 8 | 18 | #define SO_RCVBUF 8 |
19 | #define SO_SNDBUFFORCE 32 | ||
20 | #define SO_RCVBUFFORCE 33 | ||
19 | #define SO_KEEPALIVE 9 | 21 | #define SO_KEEPALIVE 9 |
20 | #define SO_OOBINLINE 10 | 22 | #define SO_OOBINLINE 10 |
21 | #define SO_NO_CHECK 11 | 23 | #define SO_NO_CHECK 11 |
diff --git a/include/asm-frv/socket.h b/include/asm-frv/socket.h index c3be17c7de4b..7177f8b9817c 100644 --- a/include/asm-frv/socket.h +++ b/include/asm-frv/socket.h | |||
@@ -14,6 +14,8 @@ | |||
14 | #define SO_BROADCAST 6 | 14 | #define SO_BROADCAST 6 |
15 | #define SO_SNDBUF 7 | 15 | #define SO_SNDBUF 7 |
16 | #define SO_RCVBUF 8 | 16 | #define SO_RCVBUF 8 |
17 | #define SO_SNDBUFFORCE 32 | ||
18 | #define SO_RCVBUFFORCE 33 | ||
17 | #define SO_KEEPALIVE 9 | 19 | #define SO_KEEPALIVE 9 |
18 | #define SO_OOBINLINE 10 | 20 | #define SO_OOBINLINE 10 |
19 | #define SO_NO_CHECK 11 | 21 | #define SO_NO_CHECK 11 |
diff --git a/include/asm-h8300/socket.h b/include/asm-h8300/socket.h index af33b8525dcf..d98cf85bafc1 100644 --- a/include/asm-h8300/socket.h +++ b/include/asm-h8300/socket.h | |||
@@ -14,6 +14,8 @@ | |||
14 | #define SO_BROADCAST 6 | 14 | #define SO_BROADCAST 6 |
15 | #define SO_SNDBUF 7 | 15 | #define SO_SNDBUF 7 |
16 | #define SO_RCVBUF 8 | 16 | #define SO_RCVBUF 8 |
17 | #define SO_SNDBUFFORCE 32 | ||
18 | #define SO_RCVBUFFORCE 33 | ||
17 | #define SO_KEEPALIVE 9 | 19 | #define SO_KEEPALIVE 9 |
18 | #define SO_OOBINLINE 10 | 20 | #define SO_OOBINLINE 10 |
19 | #define SO_NO_CHECK 11 | 21 | #define SO_NO_CHECK 11 |
diff --git a/include/asm-i386/checksum.h b/include/asm-i386/checksum.h index f949e44c2a35..67d3630c4e89 100644 --- a/include/asm-i386/checksum.h +++ b/include/asm-i386/checksum.h | |||
@@ -83,7 +83,7 @@ static inline unsigned short ip_fast_csum(unsigned char * iph, | |||
83 | "adcl $0, %0 ;\n" | 83 | "adcl $0, %0 ;\n" |
84 | "notl %0 ;\n" | 84 | "notl %0 ;\n" |
85 | "2: ;\n" | 85 | "2: ;\n" |
86 | /* Since the input registers which are loaded with iph and ipl | 86 | /* Since the input registers which are loaded with iph and ihl |
87 | are modified, we must also specify them as outputs, or gcc | 87 | are modified, we must also specify them as outputs, or gcc |
88 | will assume they contain their original values. */ | 88 | will assume they contain their original values. */ |
89 | : "=r" (sum), "=r" (iph), "=r" (ihl) | 89 | : "=r" (sum), "=r" (iph), "=r" (ihl) |
diff --git a/include/asm-i386/socket.h b/include/asm-i386/socket.h index 07f6b38ad140..802ae76195b7 100644 --- a/include/asm-i386/socket.h +++ b/include/asm-i386/socket.h | |||
@@ -14,6 +14,8 @@ | |||
14 | #define SO_BROADCAST 6 | 14 | #define SO_BROADCAST 6 |
15 | #define SO_SNDBUF 7 | 15 | #define SO_SNDBUF 7 |
16 | #define SO_RCVBUF 8 | 16 | #define SO_RCVBUF 8 |
17 | #define SO_SNDBUFFORCE 32 | ||
18 | #define SO_RCVBUFFORCE 33 | ||
17 | #define SO_KEEPALIVE 9 | 19 | #define SO_KEEPALIVE 9 |
18 | #define SO_OOBINLINE 10 | 20 | #define SO_OOBINLINE 10 |
19 | #define SO_NO_CHECK 11 | 21 | #define SO_NO_CHECK 11 |
diff --git a/include/asm-ia64/socket.h b/include/asm-ia64/socket.h index 21a9f10d6baa..a255006fb7b5 100644 --- a/include/asm-ia64/socket.h +++ b/include/asm-ia64/socket.h | |||
@@ -23,6 +23,8 @@ | |||
23 | #define SO_BROADCAST 6 | 23 | #define SO_BROADCAST 6 |
24 | #define SO_SNDBUF 7 | 24 | #define SO_SNDBUF 7 |
25 | #define SO_RCVBUF 8 | 25 | #define SO_RCVBUF 8 |
26 | #define SO_SNDBUFFORCE 32 | ||
27 | #define SO_RCVBUFFORCE 33 | ||
26 | #define SO_KEEPALIVE 9 | 28 | #define SO_KEEPALIVE 9 |
27 | #define SO_OOBINLINE 10 | 29 | #define SO_OOBINLINE 10 |
28 | #define SO_NO_CHECK 11 | 30 | #define SO_NO_CHECK 11 |
diff --git a/include/asm-m32r/checksum.h b/include/asm-m32r/checksum.h index 99f37dbf2558..877ebf46e9ff 100644 --- a/include/asm-m32r/checksum.h +++ b/include/asm-m32r/checksum.h | |||
@@ -105,7 +105,7 @@ static inline unsigned short ip_fast_csum(unsigned char * iph, | |||
105 | " addx %0, %3 \n" | 105 | " addx %0, %3 \n" |
106 | " .fillinsn\n" | 106 | " .fillinsn\n" |
107 | "2: \n" | 107 | "2: \n" |
108 | /* Since the input registers which are loaded with iph and ipl | 108 | /* Since the input registers which are loaded with iph and ihl |
109 | are modified, we must also specify them as outputs, or gcc | 109 | are modified, we must also specify them as outputs, or gcc |
110 | will assume they contain their original values. */ | 110 | will assume they contain their original values. */ |
111 | : "=&r" (sum), "=r" (iph), "=r" (ihl), "=&r" (tmpreg0), "=&r" (tmpreg1) | 111 | : "=&r" (sum), "=r" (iph), "=r" (ihl), "=&r" (tmpreg0), "=&r" (tmpreg1) |
diff --git a/include/asm-m32r/socket.h b/include/asm-m32r/socket.h index 159519d99042..8b6680f223c0 100644 --- a/include/asm-m32r/socket.h +++ b/include/asm-m32r/socket.h | |||
@@ -14,6 +14,8 @@ | |||
14 | #define SO_BROADCAST 6 | 14 | #define SO_BROADCAST 6 |
15 | #define SO_SNDBUF 7 | 15 | #define SO_SNDBUF 7 |
16 | #define SO_RCVBUF 8 | 16 | #define SO_RCVBUF 8 |
17 | #define SO_SNDBUFFORCE 32 | ||
18 | #define SO_RCVBUFFORCE 33 | ||
17 | #define SO_KEEPALIVE 9 | 19 | #define SO_KEEPALIVE 9 |
18 | #define SO_OOBINLINE 10 | 20 | #define SO_OOBINLINE 10 |
19 | #define SO_NO_CHECK 11 | 21 | #define SO_NO_CHECK 11 |
diff --git a/include/asm-m68k/socket.h b/include/asm-m68k/socket.h index 8d0b9fc2d07e..f578ca4b776a 100644 --- a/include/asm-m68k/socket.h +++ b/include/asm-m68k/socket.h | |||
@@ -14,6 +14,8 @@ | |||
14 | #define SO_BROADCAST 6 | 14 | #define SO_BROADCAST 6 |
15 | #define SO_SNDBUF 7 | 15 | #define SO_SNDBUF 7 |
16 | #define SO_RCVBUF 8 | 16 | #define SO_RCVBUF 8 |
17 | #define SO_SNDBUFFORCE 32 | ||
18 | #define SO_RCVBUFFORCE 33 | ||
17 | #define SO_KEEPALIVE 9 | 19 | #define SO_KEEPALIVE 9 |
18 | #define SO_OOBINLINE 10 | 20 | #define SO_OOBINLINE 10 |
19 | #define SO_NO_CHECK 11 | 21 | #define SO_NO_CHECK 11 |
diff --git a/include/asm-mips/socket.h b/include/asm-mips/socket.h index 020b4db70ee5..d478a86294ee 100644 --- a/include/asm-mips/socket.h +++ b/include/asm-mips/socket.h | |||
@@ -37,6 +37,8 @@ To add: #define SO_REUSEPORT 0x0200 /* Allow local address and port reuse. */ | |||
37 | #define SO_ERROR 0x1007 /* get error status and clear */ | 37 | #define SO_ERROR 0x1007 /* get error status and clear */ |
38 | #define SO_SNDBUF 0x1001 /* Send buffer size. */ | 38 | #define SO_SNDBUF 0x1001 /* Send buffer size. */ |
39 | #define SO_RCVBUF 0x1002 /* Receive buffer. */ | 39 | #define SO_RCVBUF 0x1002 /* Receive buffer. */ |
40 | #define SO_SNDBUFFORCE 0x100a | ||
41 | #define SO_RCVBUFFORCE 0x100b | ||
40 | #define SO_SNDLOWAT 0x1003 /* send low-water mark */ | 42 | #define SO_SNDLOWAT 0x1003 /* send low-water mark */ |
41 | #define SO_RCVLOWAT 0x1004 /* receive low-water mark */ | 43 | #define SO_RCVLOWAT 0x1004 /* receive low-water mark */ |
42 | #define SO_SNDTIMEO 0x1005 /* send timeout */ | 44 | #define SO_SNDTIMEO 0x1005 /* send timeout */ |
diff --git a/include/asm-parisc/socket.h b/include/asm-parisc/socket.h index 4a77996c1862..1bf54dc53c10 100644 --- a/include/asm-parisc/socket.h +++ b/include/asm-parisc/socket.h | |||
@@ -16,6 +16,8 @@ | |||
16 | /* To add :#define SO_REUSEPORT 0x0200 */ | 16 | /* To add :#define SO_REUSEPORT 0x0200 */ |
17 | #define SO_SNDBUF 0x1001 | 17 | #define SO_SNDBUF 0x1001 |
18 | #define SO_RCVBUF 0x1002 | 18 | #define SO_RCVBUF 0x1002 |
19 | #define SO_SNDBUFFORCE 0x100a | ||
20 | #define SO_RCVBUFFORCE 0x100b | ||
19 | #define SO_SNDLOWAT 0x1003 | 21 | #define SO_SNDLOWAT 0x1003 |
20 | #define SO_RCVLOWAT 0x1004 | 22 | #define SO_RCVLOWAT 0x1004 |
21 | #define SO_SNDTIMEO 0x1005 | 23 | #define SO_SNDTIMEO 0x1005 |
diff --git a/include/asm-ppc64/8253pit.h b/include/asm-powerpc/8253pit.h index 285f78488ccb..862708a749b0 100644 --- a/include/asm-ppc64/8253pit.h +++ b/include/asm-powerpc/8253pit.h | |||
@@ -5,6 +5,6 @@ | |||
5 | #ifndef _8253PIT_H | 5 | #ifndef _8253PIT_H |
6 | #define _8253PIT_H | 6 | #define _8253PIT_H |
7 | 7 | ||
8 | #define PIT_TICK_RATE 1193182UL | 8 | #define PIT_TICK_RATE 1193182UL |
9 | 9 | ||
10 | #endif | 10 | #endif |
diff --git a/include/asm-ppc/agp.h b/include/asm-powerpc/agp.h index ca9e423307f4..ca9e423307f4 100644 --- a/include/asm-ppc/agp.h +++ b/include/asm-powerpc/agp.h | |||
diff --git a/include/asm-powerpc/cputime.h b/include/asm-powerpc/cputime.h new file mode 100644 index 000000000000..6d68ad7e0ea3 --- /dev/null +++ b/include/asm-powerpc/cputime.h | |||
@@ -0,0 +1 @@ | |||
#include <asm-generic/cputime.h> | |||
diff --git a/include/asm-ppc/div64.h b/include/asm-powerpc/div64.h index 6cd978cefb28..6cd978cefb28 100644 --- a/include/asm-ppc/div64.h +++ b/include/asm-powerpc/div64.h | |||
diff --git a/include/asm-powerpc/emergency-restart.h b/include/asm-powerpc/emergency-restart.h new file mode 100644 index 000000000000..3711bd9d50bd --- /dev/null +++ b/include/asm-powerpc/emergency-restart.h | |||
@@ -0,0 +1 @@ | |||
#include <asm-generic/emergency-restart.h> | |||
diff --git a/include/asm-ppc/errno.h b/include/asm-powerpc/errno.h index 19f20bd41ae6..19f20bd41ae6 100644 --- a/include/asm-ppc/errno.h +++ b/include/asm-powerpc/errno.h | |||
diff --git a/include/asm-ppc/ioctl.h b/include/asm-powerpc/ioctl.h index 93c6acfdd0fd..93c6acfdd0fd 100644 --- a/include/asm-ppc/ioctl.h +++ b/include/asm-powerpc/ioctl.h | |||
diff --git a/include/asm-ppc/ioctls.h b/include/asm-powerpc/ioctls.h index f5b7f2b055e7..f5b7f2b055e7 100644 --- a/include/asm-ppc/ioctls.h +++ b/include/asm-powerpc/ioctls.h | |||
diff --git a/include/asm-ppc/ipc.h b/include/asm-powerpc/ipc.h index a46e3d9c2a3f..a46e3d9c2a3f 100644 --- a/include/asm-ppc/ipc.h +++ b/include/asm-powerpc/ipc.h | |||
diff --git a/include/asm-ppc/linkage.h b/include/asm-powerpc/linkage.h index 291c2d01c44f..291c2d01c44f 100644 --- a/include/asm-ppc/linkage.h +++ b/include/asm-powerpc/linkage.h | |||
diff --git a/include/asm-ppc64/local.h b/include/asm-powerpc/local.h index c11c530f74d0..c11c530f74d0 100644 --- a/include/asm-ppc64/local.h +++ b/include/asm-powerpc/local.h | |||
diff --git a/include/asm-ppc/namei.h b/include/asm-powerpc/namei.h index 29c9ec832133..29c9ec832133 100644 --- a/include/asm-ppc/namei.h +++ b/include/asm-powerpc/namei.h | |||
diff --git a/include/asm-powerpc/percpu.h b/include/asm-powerpc/percpu.h new file mode 100644 index 000000000000..06a959d67234 --- /dev/null +++ b/include/asm-powerpc/percpu.h | |||
@@ -0,0 +1 @@ | |||
#include <asm-generic/percpu.h> | |||
diff --git a/include/asm-ppc/poll.h b/include/asm-powerpc/poll.h index be5024913c62..be5024913c62 100644 --- a/include/asm-ppc/poll.h +++ b/include/asm-powerpc/poll.h | |||
diff --git a/include/asm-powerpc/resource.h b/include/asm-powerpc/resource.h new file mode 100644 index 000000000000..04bc4db8921b --- /dev/null +++ b/include/asm-powerpc/resource.h | |||
@@ -0,0 +1 @@ | |||
#include <asm-generic/resource.h> | |||
diff --git a/include/asm-ppc/shmparam.h b/include/asm-powerpc/shmparam.h index d6250602ae64..d6250602ae64 100644 --- a/include/asm-ppc/shmparam.h +++ b/include/asm-powerpc/shmparam.h | |||
diff --git a/include/asm-ppc/string.h b/include/asm-powerpc/string.h index 225575997392..225575997392 100644 --- a/include/asm-ppc/string.h +++ b/include/asm-powerpc/string.h | |||
diff --git a/include/asm-ppc/unaligned.h b/include/asm-powerpc/unaligned.h index 45520d9b85d1..45520d9b85d1 100644 --- a/include/asm-ppc/unaligned.h +++ b/include/asm-powerpc/unaligned.h | |||
diff --git a/include/asm-ppc/xor.h b/include/asm-powerpc/xor.h index c82eb12a5b18..c82eb12a5b18 100644 --- a/include/asm-ppc/xor.h +++ b/include/asm-powerpc/xor.h | |||
diff --git a/include/asm-ppc/8253pit.h b/include/asm-ppc/8253pit.h deleted file mode 100644 index 285f78488ccb..000000000000 --- a/include/asm-ppc/8253pit.h +++ /dev/null | |||
@@ -1,10 +0,0 @@ | |||
1 | /* | ||
2 | * 8253/8254 Programmable Interval Timer | ||
3 | */ | ||
4 | |||
5 | #ifndef _8253PIT_H | ||
6 | #define _8253PIT_H | ||
7 | |||
8 | #define PIT_TICK_RATE 1193182UL | ||
9 | |||
10 | #endif | ||
diff --git a/include/asm-ppc/cputime.h b/include/asm-ppc/cputime.h deleted file mode 100644 index 8e9faf5ce720..000000000000 --- a/include/asm-ppc/cputime.h +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | #ifndef __PPC_CPUTIME_H | ||
2 | #define __PPC_CPUTIME_H | ||
3 | |||
4 | #include <asm-generic/cputime.h> | ||
5 | |||
6 | #endif /* __PPC_CPUTIME_H */ | ||
diff --git a/include/asm-ppc/emergency-restart.h b/include/asm-ppc/emergency-restart.h deleted file mode 100644 index 108d8c48e42e..000000000000 --- a/include/asm-ppc/emergency-restart.h +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | #ifndef _ASM_EMERGENCY_RESTART_H | ||
2 | #define _ASM_EMERGENCY_RESTART_H | ||
3 | |||
4 | #include <asm-generic/emergency-restart.h> | ||
5 | |||
6 | #endif /* _ASM_EMERGENCY_RESTART_H */ | ||
diff --git a/include/asm-ppc/hdreg.h b/include/asm-ppc/hdreg.h deleted file mode 100644 index 7f7fd1af0af3..000000000000 --- a/include/asm-ppc/hdreg.h +++ /dev/null | |||
@@ -1 +0,0 @@ | |||
1 | #include <asm-generic/hdreg.h> | ||
diff --git a/include/asm-ppc/local.h b/include/asm-ppc/local.h deleted file mode 100644 index b08e3eced10e..000000000000 --- a/include/asm-ppc/local.h +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | #ifndef __PPC_LOCAL_H | ||
2 | #define __PPC_LOCAL_H | ||
3 | |||
4 | #include <asm-generic/local.h> | ||
5 | |||
6 | #endif /* __PPC_LOCAL_H */ | ||
diff --git a/include/asm-ppc/percpu.h b/include/asm-ppc/percpu.h deleted file mode 100644 index d66667cd5878..000000000000 --- a/include/asm-ppc/percpu.h +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | #ifndef __ARCH_PPC_PERCPU__ | ||
2 | #define __ARCH_PPC_PERCPU__ | ||
3 | |||
4 | #include <asm-generic/percpu.h> | ||
5 | |||
6 | #endif /* __ARCH_PPC_PERCPU__ */ | ||
diff --git a/include/asm-ppc/resource.h b/include/asm-ppc/resource.h deleted file mode 100644 index 86a1ea23a6ed..000000000000 --- a/include/asm-ppc/resource.h +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | #ifndef _PPC_RESOURCE_H | ||
2 | #define _PPC_RESOURCE_H | ||
3 | |||
4 | #include <asm-generic/resource.h> | ||
5 | |||
6 | #endif | ||
diff --git a/include/asm-ppc/socket.h b/include/asm-ppc/socket.h index 4134376b0f66..296e1a3469d0 100644 --- a/include/asm-ppc/socket.h +++ b/include/asm-ppc/socket.h | |||
@@ -20,6 +20,8 @@ | |||
20 | #define SO_BROADCAST 6 | 20 | #define SO_BROADCAST 6 |
21 | #define SO_SNDBUF 7 | 21 | #define SO_SNDBUF 7 |
22 | #define SO_RCVBUF 8 | 22 | #define SO_RCVBUF 8 |
23 | #define SO_SNDBUFFORCE 32 | ||
24 | #define SO_RCVBUFFORCE 33 | ||
23 | #define SO_KEEPALIVE 9 | 25 | #define SO_KEEPALIVE 9 |
24 | #define SO_OOBINLINE 10 | 26 | #define SO_OOBINLINE 10 |
25 | #define SO_NO_CHECK 11 | 27 | #define SO_NO_CHECK 11 |
diff --git a/include/asm-ppc64/abs_addr.h b/include/asm-ppc64/abs_addr.h index 6d4e8e787058..84c24d4cdb71 100644 --- a/include/asm-ppc64/abs_addr.h +++ b/include/asm-ppc64/abs_addr.h | |||
@@ -16,93 +16,51 @@ | |||
16 | #include <asm/page.h> | 16 | #include <asm/page.h> |
17 | #include <asm/prom.h> | 17 | #include <asm/prom.h> |
18 | #include <asm/lmb.h> | 18 | #include <asm/lmb.h> |
19 | #include <asm/firmware.h> | ||
19 | 20 | ||
20 | typedef u32 msChunks_entry; | 21 | struct mschunks_map { |
21 | struct msChunks { | ||
22 | unsigned long num_chunks; | 22 | unsigned long num_chunks; |
23 | unsigned long chunk_size; | 23 | unsigned long chunk_size; |
24 | unsigned long chunk_shift; | 24 | unsigned long chunk_shift; |
25 | unsigned long chunk_mask; | 25 | unsigned long chunk_mask; |
26 | msChunks_entry *abs; | 26 | u32 *mapping; |
27 | }; | 27 | }; |
28 | 28 | ||
29 | extern struct msChunks msChunks; | 29 | extern struct mschunks_map mschunks_map; |
30 | 30 | ||
31 | extern unsigned long msChunks_alloc(unsigned long, unsigned long, unsigned long); | 31 | /* Chunks are 256 KB */ |
32 | extern unsigned long reloc_offset(void); | 32 | #define MSCHUNKS_CHUNK_SHIFT (18) |
33 | #define MSCHUNKS_CHUNK_SIZE (1UL << MSCHUNKS_CHUNK_SHIFT) | ||
34 | #define MSCHUNKS_OFFSET_MASK (MSCHUNKS_CHUNK_SIZE - 1) | ||
33 | 35 | ||
34 | #ifdef CONFIG_MSCHUNKS | 36 | static inline unsigned long chunk_to_addr(unsigned long chunk) |
35 | |||
36 | static inline unsigned long | ||
37 | chunk_to_addr(unsigned long chunk) | ||
38 | { | 37 | { |
39 | unsigned long offset = reloc_offset(); | 38 | return chunk << MSCHUNKS_CHUNK_SHIFT; |
40 | struct msChunks *_msChunks = PTRRELOC(&msChunks); | ||
41 | |||
42 | return chunk << _msChunks->chunk_shift; | ||
43 | } | 39 | } |
44 | 40 | ||
45 | static inline unsigned long | 41 | static inline unsigned long addr_to_chunk(unsigned long addr) |
46 | addr_to_chunk(unsigned long addr) | ||
47 | { | 42 | { |
48 | unsigned long offset = reloc_offset(); | 43 | return addr >> MSCHUNKS_CHUNK_SHIFT; |
49 | struct msChunks *_msChunks = PTRRELOC(&msChunks); | ||
50 | |||
51 | return addr >> _msChunks->chunk_shift; | ||
52 | } | 44 | } |
53 | 45 | ||
54 | static inline unsigned long | 46 | static inline unsigned long phys_to_abs(unsigned long pa) |
55 | chunk_offset(unsigned long addr) | ||
56 | { | 47 | { |
57 | unsigned long offset = reloc_offset(); | 48 | unsigned long chunk; |
58 | struct msChunks *_msChunks = PTRRELOC(&msChunks); | ||
59 | 49 | ||
60 | return addr & _msChunks->chunk_mask; | 50 | /* This is a no-op on non-iSeries */ |
61 | } | 51 | if (!firmware_has_feature(FW_FEATURE_ISERIES)) |
52 | return pa; | ||
62 | 53 | ||
63 | static inline unsigned long | 54 | chunk = addr_to_chunk(pa); |
64 | abs_chunk(unsigned long pchunk) | ||
65 | { | ||
66 | unsigned long offset = reloc_offset(); | ||
67 | struct msChunks *_msChunks = PTRRELOC(&msChunks); | ||
68 | if ( pchunk >= _msChunks->num_chunks ) { | ||
69 | return pchunk; | ||
70 | } | ||
71 | return PTRRELOC(_msChunks->abs)[pchunk]; | ||
72 | } | ||
73 | 55 | ||
74 | /* A macro so it can take pointers or unsigned long. */ | 56 | if (chunk < mschunks_map.num_chunks) |
75 | #define phys_to_abs(pa) \ | 57 | chunk = mschunks_map.mapping[chunk]; |
76 | ({ unsigned long _pa = (unsigned long)(pa); \ | ||
77 | chunk_to_addr(abs_chunk(addr_to_chunk(_pa))) + chunk_offset(_pa); \ | ||
78 | }) | ||
79 | 58 | ||
80 | static inline unsigned long | 59 | return chunk_to_addr(chunk) + (pa & MSCHUNKS_OFFSET_MASK); |
81 | physRpn_to_absRpn(unsigned long rpn) | ||
82 | { | ||
83 | unsigned long pa = rpn << PAGE_SHIFT; | ||
84 | unsigned long aa = phys_to_abs(pa); | ||
85 | return (aa >> PAGE_SHIFT); | ||
86 | } | 60 | } |
87 | 61 | ||
88 | /* A macro so it can take pointers or unsigned long. */ | ||
89 | #define abs_to_phys(aa) lmb_abs_to_phys((unsigned long)(aa)) | ||
90 | |||
91 | #else /* !CONFIG_MSCHUNKS */ | ||
92 | |||
93 | #define chunk_to_addr(chunk) ((unsigned long)(chunk)) | ||
94 | #define addr_to_chunk(addr) (addr) | ||
95 | #define chunk_offset(addr) (0) | ||
96 | #define abs_chunk(pchunk) (pchunk) | ||
97 | |||
98 | #define phys_to_abs(pa) (pa) | ||
99 | #define physRpn_to_absRpn(rpn) (rpn) | ||
100 | #define abs_to_phys(aa) (aa) | ||
101 | |||
102 | #endif /* !CONFIG_MSCHUNKS */ | ||
103 | |||
104 | /* Convenience macros */ | 62 | /* Convenience macros */ |
105 | #define virt_to_abs(va) phys_to_abs(__pa(va)) | 63 | #define virt_to_abs(va) phys_to_abs(__pa(va)) |
106 | #define abs_to_virt(aa) __va(abs_to_phys(aa)) | 64 | #define abs_to_virt(aa) __va(aa) |
107 | 65 | ||
108 | #endif /* _ABS_ADDR_H */ | 66 | #endif /* _ABS_ADDR_H */ |
diff --git a/include/asm-ppc64/agp.h b/include/asm-ppc64/agp.h deleted file mode 100644 index ca9e423307f4..000000000000 --- a/include/asm-ppc64/agp.h +++ /dev/null | |||
@@ -1,23 +0,0 @@ | |||
1 | #ifndef AGP_H | ||
2 | #define AGP_H 1 | ||
3 | |||
4 | #include <asm/io.h> | ||
5 | |||
6 | /* nothing much needed here */ | ||
7 | |||
8 | #define map_page_into_agp(page) | ||
9 | #define unmap_page_from_agp(page) | ||
10 | #define flush_agp_mappings() | ||
11 | #define flush_agp_cache() mb() | ||
12 | |||
13 | /* Convert a physical address to an address suitable for the GART. */ | ||
14 | #define phys_to_gart(x) (x) | ||
15 | #define gart_to_phys(x) (x) | ||
16 | |||
17 | /* GATT allocation. Returns/accepts GATT kernel virtual address. */ | ||
18 | #define alloc_gatt_pages(order) \ | ||
19 | ((char *)__get_free_pages(GFP_KERNEL, (order))) | ||
20 | #define free_gatt_pages(table, order) \ | ||
21 | free_pages((unsigned long)(table), (order)) | ||
22 | |||
23 | #endif | ||
diff --git a/include/asm-ppc64/cputable.h b/include/asm-ppc64/cputable.h index d67fa9e26079..ae6cf3830108 100644 --- a/include/asm-ppc64/cputable.h +++ b/include/asm-ppc64/cputable.h | |||
@@ -56,11 +56,6 @@ struct cpu_spec { | |||
56 | * BHT, SPD, etc... from head.S before branching to identify_machine | 56 | * BHT, SPD, etc... from head.S before branching to identify_machine |
57 | */ | 57 | */ |
58 | cpu_setup_t cpu_setup; | 58 | cpu_setup_t cpu_setup; |
59 | |||
60 | /* This is used to identify firmware features which are available | ||
61 | * to the kernel. | ||
62 | */ | ||
63 | unsigned long firmware_features; | ||
64 | }; | 59 | }; |
65 | 60 | ||
66 | extern struct cpu_spec cpu_specs[]; | 61 | extern struct cpu_spec cpu_specs[]; |
@@ -71,39 +66,6 @@ static inline unsigned long cpu_has_feature(unsigned long feature) | |||
71 | return cur_cpu_spec->cpu_features & feature; | 66 | return cur_cpu_spec->cpu_features & feature; |
72 | } | 67 | } |
73 | 68 | ||
74 | |||
75 | /* firmware feature bitmask values */ | ||
76 | #define FIRMWARE_MAX_FEATURES 63 | ||
77 | |||
78 | #define FW_FEATURE_PFT (1UL<<0) | ||
79 | #define FW_FEATURE_TCE (1UL<<1) | ||
80 | #define FW_FEATURE_SPRG0 (1UL<<2) | ||
81 | #define FW_FEATURE_DABR (1UL<<3) | ||
82 | #define FW_FEATURE_COPY (1UL<<4) | ||
83 | #define FW_FEATURE_ASR (1UL<<5) | ||
84 | #define FW_FEATURE_DEBUG (1UL<<6) | ||
85 | #define FW_FEATURE_TERM (1UL<<7) | ||
86 | #define FW_FEATURE_PERF (1UL<<8) | ||
87 | #define FW_FEATURE_DUMP (1UL<<9) | ||
88 | #define FW_FEATURE_INTERRUPT (1UL<<10) | ||
89 | #define FW_FEATURE_MIGRATE (1UL<<11) | ||
90 | #define FW_FEATURE_PERFMON (1UL<<12) | ||
91 | #define FW_FEATURE_CRQ (1UL<<13) | ||
92 | #define FW_FEATURE_VIO (1UL<<14) | ||
93 | #define FW_FEATURE_RDMA (1UL<<15) | ||
94 | #define FW_FEATURE_LLAN (1UL<<16) | ||
95 | #define FW_FEATURE_BULK (1UL<<17) | ||
96 | #define FW_FEATURE_XDABR (1UL<<18) | ||
97 | #define FW_FEATURE_MULTITCE (1UL<<19) | ||
98 | #define FW_FEATURE_SPLPAR (1UL<<20) | ||
99 | |||
100 | typedef struct { | ||
101 | unsigned long val; | ||
102 | char * name; | ||
103 | } firmware_feature_t; | ||
104 | |||
105 | extern firmware_feature_t firmware_features_table[]; | ||
106 | |||
107 | #endif /* __ASSEMBLY__ */ | 69 | #endif /* __ASSEMBLY__ */ |
108 | 70 | ||
109 | /* CPU kernel features */ | 71 | /* CPU kernel features */ |
@@ -140,10 +102,8 @@ extern firmware_feature_t firmware_features_table[]; | |||
140 | #define CPU_FTR_MMCRA_SIHV ASM_CONST(0x0000080000000000) | 102 | #define CPU_FTR_MMCRA_SIHV ASM_CONST(0x0000080000000000) |
141 | #define CPU_FTR_CTRL ASM_CONST(0x0000100000000000) | 103 | #define CPU_FTR_CTRL ASM_CONST(0x0000100000000000) |
142 | 104 | ||
143 | /* Platform firmware features */ | ||
144 | #define FW_FTR_ ASM_CONST(0x0000000000000001) | ||
145 | |||
146 | #ifndef __ASSEMBLY__ | 105 | #ifndef __ASSEMBLY__ |
106 | |||
147 | #define COMMON_USER_PPC64 (PPC_FEATURE_32 | PPC_FEATURE_64 | \ | 107 | #define COMMON_USER_PPC64 (PPC_FEATURE_32 | PPC_FEATURE_64 | \ |
148 | PPC_FEATURE_HAS_FPU | PPC_FEATURE_HAS_MMU) | 108 | PPC_FEATURE_HAS_FPU | PPC_FEATURE_HAS_MMU) |
149 | 109 | ||
@@ -156,10 +116,9 @@ extern firmware_feature_t firmware_features_table[]; | |||
156 | #define CPU_FTR_PPCAS_ARCH_V2 (CPU_FTR_PPCAS_ARCH_V2_BASE) | 116 | #define CPU_FTR_PPCAS_ARCH_V2 (CPU_FTR_PPCAS_ARCH_V2_BASE) |
157 | #else | 117 | #else |
158 | #define CPU_FTR_PPCAS_ARCH_V2 (CPU_FTR_PPCAS_ARCH_V2_BASE | CPU_FTR_16M_PAGE) | 118 | #define CPU_FTR_PPCAS_ARCH_V2 (CPU_FTR_PPCAS_ARCH_V2_BASE | CPU_FTR_16M_PAGE) |
159 | #endif | 119 | #endif /* CONFIG_PPC_ISERIES */ |
160 | 120 | ||
161 | #define COMMON_PPC64_FW (0) | 121 | #endif /* __ASSEMBLY */ |
162 | #endif | ||
163 | 122 | ||
164 | #ifdef __ASSEMBLY__ | 123 | #ifdef __ASSEMBLY__ |
165 | 124 | ||
diff --git a/include/asm-ppc64/cputime.h b/include/asm-ppc64/cputime.h deleted file mode 100644 index 8e9faf5ce720..000000000000 --- a/include/asm-ppc64/cputime.h +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | #ifndef __PPC_CPUTIME_H | ||
2 | #define __PPC_CPUTIME_H | ||
3 | |||
4 | #include <asm-generic/cputime.h> | ||
5 | |||
6 | #endif /* __PPC_CPUTIME_H */ | ||
diff --git a/include/asm-ppc64/div64.h b/include/asm-ppc64/div64.h deleted file mode 100644 index 6cd978cefb28..000000000000 --- a/include/asm-ppc64/div64.h +++ /dev/null | |||
@@ -1 +0,0 @@ | |||
1 | #include <asm-generic/div64.h> | ||
diff --git a/include/asm-ppc64/emergency-restart.h b/include/asm-ppc64/emergency-restart.h deleted file mode 100644 index 108d8c48e42e..000000000000 --- a/include/asm-ppc64/emergency-restart.h +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | #ifndef _ASM_EMERGENCY_RESTART_H | ||
2 | #define _ASM_EMERGENCY_RESTART_H | ||
3 | |||
4 | #include <asm-generic/emergency-restart.h> | ||
5 | |||
6 | #endif /* _ASM_EMERGENCY_RESTART_H */ | ||
diff --git a/include/asm-ppc64/errno.h b/include/asm-ppc64/errno.h deleted file mode 100644 index 69bc3b0c6cbe..000000000000 --- a/include/asm-ppc64/errno.h +++ /dev/null | |||
@@ -1,18 +0,0 @@ | |||
1 | #ifndef _PPC64_ERRNO_H | ||
2 | #define _PPC64_ERRNO_H | ||
3 | |||
4 | /* | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License | ||
7 | * as published by the Free Software Foundation; either version | ||
8 | * 2 of the License, or (at your option) any later version. | ||
9 | */ | ||
10 | |||
11 | #include <asm-generic/errno.h> | ||
12 | |||
13 | #undef EDEADLOCK | ||
14 | #define EDEADLOCK 58 /* File locking deadlock error */ | ||
15 | |||
16 | #define _LAST_ERRNO 516 | ||
17 | |||
18 | #endif | ||
diff --git a/include/asm-ppc64/firmware.h b/include/asm-ppc64/firmware.h new file mode 100644 index 000000000000..22bb85cf60af --- /dev/null +++ b/include/asm-ppc64/firmware.h | |||
@@ -0,0 +1,101 @@ | |||
1 | /* | ||
2 | * include/asm-ppc64/firmware.h | ||
3 | * | ||
4 | * Extracted from include/asm-ppc64/cputable.h | ||
5 | * | ||
6 | * Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org) | ||
7 | * | ||
8 | * Modifications for ppc64: | ||
9 | * Copyright (C) 2003 Dave Engebretsen <engebret@us.ibm.com> | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or | ||
12 | * modify it under the terms of the GNU General Public License | ||
13 | * as published by the Free Software Foundation; either version | ||
14 | * 2 of the License, or (at your option) any later version. | ||
15 | */ | ||
16 | #ifndef __ASM_PPC_FIRMWARE_H | ||
17 | #define __ASM_PPC_FIRMWARE_H | ||
18 | |||
19 | #ifdef __KERNEL__ | ||
20 | |||
21 | #ifndef __ASSEMBLY__ | ||
22 | |||
23 | /* firmware feature bitmask values */ | ||
24 | #define FIRMWARE_MAX_FEATURES 63 | ||
25 | |||
26 | #define FW_FEATURE_PFT (1UL<<0) | ||
27 | #define FW_FEATURE_TCE (1UL<<1) | ||
28 | #define FW_FEATURE_SPRG0 (1UL<<2) | ||
29 | #define FW_FEATURE_DABR (1UL<<3) | ||
30 | #define FW_FEATURE_COPY (1UL<<4) | ||
31 | #define FW_FEATURE_ASR (1UL<<5) | ||
32 | #define FW_FEATURE_DEBUG (1UL<<6) | ||
33 | #define FW_FEATURE_TERM (1UL<<7) | ||
34 | #define FW_FEATURE_PERF (1UL<<8) | ||
35 | #define FW_FEATURE_DUMP (1UL<<9) | ||
36 | #define FW_FEATURE_INTERRUPT (1UL<<10) | ||
37 | #define FW_FEATURE_MIGRATE (1UL<<11) | ||
38 | #define FW_FEATURE_PERFMON (1UL<<12) | ||
39 | #define FW_FEATURE_CRQ (1UL<<13) | ||
40 | #define FW_FEATURE_VIO (1UL<<14) | ||
41 | #define FW_FEATURE_RDMA (1UL<<15) | ||
42 | #define FW_FEATURE_LLAN (1UL<<16) | ||
43 | #define FW_FEATURE_BULK (1UL<<17) | ||
44 | #define FW_FEATURE_XDABR (1UL<<18) | ||
45 | #define FW_FEATURE_MULTITCE (1UL<<19) | ||
46 | #define FW_FEATURE_SPLPAR (1UL<<20) | ||
47 | #define FW_FEATURE_ISERIES (1UL<<21) | ||
48 | |||
49 | enum { | ||
50 | FW_FEATURE_PSERIES_POSSIBLE = FW_FEATURE_PFT | FW_FEATURE_TCE | | ||
51 | FW_FEATURE_SPRG0 | FW_FEATURE_DABR | FW_FEATURE_COPY | | ||
52 | FW_FEATURE_ASR | FW_FEATURE_DEBUG | FW_FEATURE_TERM | | ||
53 | FW_FEATURE_PERF | FW_FEATURE_DUMP | FW_FEATURE_INTERRUPT | | ||
54 | FW_FEATURE_MIGRATE | FW_FEATURE_PERFMON | FW_FEATURE_CRQ | | ||
55 | FW_FEATURE_VIO | FW_FEATURE_RDMA | FW_FEATURE_LLAN | | ||
56 | FW_FEATURE_BULK | FW_FEATURE_XDABR | FW_FEATURE_MULTITCE | | ||
57 | FW_FEATURE_SPLPAR, | ||
58 | FW_FEATURE_PSERIES_ALWAYS = 0, | ||
59 | FW_FEATURE_ISERIES_POSSIBLE = FW_FEATURE_ISERIES, | ||
60 | FW_FEATURE_ISERIES_ALWAYS = FW_FEATURE_ISERIES, | ||
61 | FW_FEATURE_POSSIBLE = | ||
62 | #ifdef CONFIG_PPC_PSERIES | ||
63 | FW_FEATURE_PSERIES_POSSIBLE | | ||
64 | #endif | ||
65 | #ifdef CONFIG_PPC_ISERIES | ||
66 | FW_FEATURE_ISERIES_POSSIBLE | | ||
67 | #endif | ||
68 | 0, | ||
69 | FW_FEATURE_ALWAYS = | ||
70 | #ifdef CONFIG_PPC_PSERIES | ||
71 | FW_FEATURE_PSERIES_ALWAYS & | ||
72 | #endif | ||
73 | #ifdef CONFIG_PPC_ISERIES | ||
74 | FW_FEATURE_ISERIES_ALWAYS & | ||
75 | #endif | ||
76 | FW_FEATURE_POSSIBLE, | ||
77 | }; | ||
78 | |||
79 | /* This is used to identify firmware features which are available | ||
80 | * to the kernel. | ||
81 | */ | ||
82 | extern unsigned long ppc64_firmware_features; | ||
83 | |||
84 | static inline unsigned long firmware_has_feature(unsigned long feature) | ||
85 | { | ||
86 | return (FW_FEATURE_ALWAYS & feature) || | ||
87 | (FW_FEATURE_POSSIBLE & ppc64_firmware_features & feature); | ||
88 | } | ||
89 | |||
90 | #ifdef CONFIG_PPC_PSERIES | ||
91 | typedef struct { | ||
92 | unsigned long val; | ||
93 | char * name; | ||
94 | } firmware_feature_t; | ||
95 | |||
96 | extern firmware_feature_t firmware_features_table[]; | ||
97 | #endif | ||
98 | |||
99 | #endif /* __ASSEMBLY__ */ | ||
100 | #endif /* __KERNEL__ */ | ||
101 | #endif /* __ASM_PPC_FIRMWARE_H */ | ||
diff --git a/include/asm-ppc64/hdreg.h b/include/asm-ppc64/hdreg.h deleted file mode 100644 index 7f7fd1af0af3..000000000000 --- a/include/asm-ppc64/hdreg.h +++ /dev/null | |||
@@ -1 +0,0 @@ | |||
1 | #include <asm-generic/hdreg.h> | ||
diff --git a/include/asm-ppc64/imalloc.h b/include/asm-ppc64/imalloc.h index e46ff68a6e41..42adf7033a81 100644 --- a/include/asm-ppc64/imalloc.h +++ b/include/asm-ppc64/imalloc.h | |||
@@ -6,7 +6,7 @@ | |||
6 | */ | 6 | */ |
7 | #define PHBS_IO_BASE VMALLOC_END | 7 | #define PHBS_IO_BASE VMALLOC_END |
8 | #define IMALLOC_BASE (PHBS_IO_BASE + 0x80000000ul) /* Reserve 2 gigs for PHBs */ | 8 | #define IMALLOC_BASE (PHBS_IO_BASE + 0x80000000ul) /* Reserve 2 gigs for PHBs */ |
9 | #define IMALLOC_END (VMALLOC_START + EADDR_MASK) | 9 | #define IMALLOC_END (VMALLOC_START + PGTABLE_RANGE) |
10 | 10 | ||
11 | 11 | ||
12 | /* imalloc region types */ | 12 | /* imalloc region types */ |
diff --git a/include/asm-ppc64/ioctl.h b/include/asm-ppc64/ioctl.h deleted file mode 100644 index 42b8c5da7fbc..000000000000 --- a/include/asm-ppc64/ioctl.h +++ /dev/null | |||
@@ -1,74 +0,0 @@ | |||
1 | #ifndef _PPC64_IOCTL_H | ||
2 | #define _PPC64_IOCTL_H | ||
3 | |||
4 | |||
5 | /* | ||
6 | * This was copied from the alpha as it's a bit cleaner there. | ||
7 | * -- Cort | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or | ||
10 | * modify it under the terms of the GNU General Public License | ||
11 | * as published by the Free Software Foundation; either version | ||
12 | * 2 of the License, or (at your option) any later version. | ||
13 | */ | ||
14 | |||
15 | #define _IOC_NRBITS 8 | ||
16 | #define _IOC_TYPEBITS 8 | ||
17 | #define _IOC_SIZEBITS 13 | ||
18 | #define _IOC_DIRBITS 3 | ||
19 | |||
20 | #define _IOC_NRMASK ((1 << _IOC_NRBITS)-1) | ||
21 | #define _IOC_TYPEMASK ((1 << _IOC_TYPEBITS)-1) | ||
22 | #define _IOC_SIZEMASK ((1 << _IOC_SIZEBITS)-1) | ||
23 | #define _IOC_DIRMASK ((1 << _IOC_DIRBITS)-1) | ||
24 | |||
25 | #define _IOC_NRSHIFT 0 | ||
26 | #define _IOC_TYPESHIFT (_IOC_NRSHIFT+_IOC_NRBITS) | ||
27 | #define _IOC_SIZESHIFT (_IOC_TYPESHIFT+_IOC_TYPEBITS) | ||
28 | #define _IOC_DIRSHIFT (_IOC_SIZESHIFT+_IOC_SIZEBITS) | ||
29 | |||
30 | /* | ||
31 | * Direction bits _IOC_NONE could be 0, but OSF/1 gives it a bit. | ||
32 | * And this turns out useful to catch old ioctl numbers in header | ||
33 | * files for us. | ||
34 | */ | ||
35 | #define _IOC_NONE 1U | ||
36 | #define _IOC_READ 2U | ||
37 | #define _IOC_WRITE 4U | ||
38 | |||
39 | #define _IOC(dir,type,nr,size) \ | ||
40 | (((dir) << _IOC_DIRSHIFT) | \ | ||
41 | ((type) << _IOC_TYPESHIFT) | \ | ||
42 | ((nr) << _IOC_NRSHIFT) | \ | ||
43 | ((size) << _IOC_SIZESHIFT)) | ||
44 | |||
45 | /* provoke compile error for invalid uses of size argument */ | ||
46 | extern unsigned int __invalid_size_argument_for_IOC; | ||
47 | #define _IOC_TYPECHECK(t) \ | ||
48 | ((sizeof(t) == sizeof(t[1]) && \ | ||
49 | sizeof(t) < (1 << _IOC_SIZEBITS)) ? \ | ||
50 | sizeof(t) : __invalid_size_argument_for_IOC) | ||
51 | |||
52 | /* used to create numbers */ | ||
53 | #define _IO(type,nr) _IOC(_IOC_NONE,(type),(nr),0) | ||
54 | #define _IOR(type,nr,size) _IOC(_IOC_READ,(type),(nr),(_IOC_TYPECHECK(size))) | ||
55 | #define _IOW(type,nr,size) _IOC(_IOC_WRITE,(type),(nr),(_IOC_TYPECHECK(size))) | ||
56 | #define _IOWR(type,nr,size) _IOC(_IOC_READ|_IOC_WRITE,(type),(nr),(_IOC_TYPECHECK(size))) | ||
57 | #define _IOR_BAD(type,nr,size) _IOC(_IOC_READ,(type),(nr),sizeof(size)) | ||
58 | #define _IOW_BAD(type,nr,size) _IOC(_IOC_WRITE,(type),(nr),sizeof(size)) | ||
59 | #define _IOWR_BAD(type,nr,size) _IOC(_IOC_READ|_IOC_WRITE,(type),(nr),sizeof(size)) | ||
60 | |||
61 | /* used to decode them.. */ | ||
62 | #define _IOC_DIR(nr) (((nr) >> _IOC_DIRSHIFT) & _IOC_DIRMASK) | ||
63 | #define _IOC_TYPE(nr) (((nr) >> _IOC_TYPESHIFT) & _IOC_TYPEMASK) | ||
64 | #define _IOC_NR(nr) (((nr) >> _IOC_NRSHIFT) & _IOC_NRMASK) | ||
65 | #define _IOC_SIZE(nr) (((nr) >> _IOC_SIZESHIFT) & _IOC_SIZEMASK) | ||
66 | |||
67 | /* various drivers, such as the pcmcia stuff, need these... */ | ||
68 | #define IOC_IN (_IOC_WRITE << _IOC_DIRSHIFT) | ||
69 | #define IOC_OUT (_IOC_READ << _IOC_DIRSHIFT) | ||
70 | #define IOC_INOUT ((_IOC_WRITE|_IOC_READ) << _IOC_DIRSHIFT) | ||
71 | #define IOCSIZE_MASK (_IOC_SIZEMASK << _IOC_SIZESHIFT) | ||
72 | #define IOCSIZE_SHIFT (_IOC_SIZESHIFT) | ||
73 | |||
74 | #endif /* _PPC64_IOCTL_H */ | ||
diff --git a/include/asm-ppc64/ioctls.h b/include/asm-ppc64/ioctls.h deleted file mode 100644 index 48796bf3e4fc..000000000000 --- a/include/asm-ppc64/ioctls.h +++ /dev/null | |||
@@ -1,114 +0,0 @@ | |||
1 | #ifndef _ASM_PPC64_IOCTLS_H | ||
2 | #define _ASM_PPC64_IOCTLS_H | ||
3 | |||
4 | /* | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License | ||
7 | * as published by the Free Software Foundation; either version | ||
8 | * 2 of the License, or (at your option) any later version. | ||
9 | */ | ||
10 | |||
11 | #include <asm/ioctl.h> | ||
12 | |||
13 | #define FIOCLEX _IO('f', 1) | ||
14 | #define FIONCLEX _IO('f', 2) | ||
15 | #define FIOASYNC _IOW('f', 125, int) | ||
16 | #define FIONBIO _IOW('f', 126, int) | ||
17 | #define FIONREAD _IOR('f', 127, int) | ||
18 | #define TIOCINQ FIONREAD | ||
19 | #define FIOQSIZE _IOR('f', 128, loff_t) | ||
20 | |||
21 | #define TIOCGETP _IOR('t', 8, struct sgttyb) | ||
22 | #define TIOCSETP _IOW('t', 9, struct sgttyb) | ||
23 | #define TIOCSETN _IOW('t', 10, struct sgttyb) /* TIOCSETP wo flush */ | ||
24 | |||
25 | #define TIOCSETC _IOW('t', 17, struct tchars) | ||
26 | #define TIOCGETC _IOR('t', 18, struct tchars) | ||
27 | #define TCGETS _IOR('t', 19, struct termios) | ||
28 | #define TCSETS _IOW('t', 20, struct termios) | ||
29 | #define TCSETSW _IOW('t', 21, struct termios) | ||
30 | #define TCSETSF _IOW('t', 22, struct termios) | ||
31 | |||
32 | #define TCGETA _IOR('t', 23, struct termio) | ||
33 | #define TCSETA _IOW('t', 24, struct termio) | ||
34 | #define TCSETAW _IOW('t', 25, struct termio) | ||
35 | #define TCSETAF _IOW('t', 28, struct termio) | ||
36 | |||
37 | #define TCSBRK _IO('t', 29) | ||
38 | #define TCXONC _IO('t', 30) | ||
39 | #define TCFLSH _IO('t', 31) | ||
40 | |||
41 | #define TIOCSWINSZ _IOW('t', 103, struct winsize) | ||
42 | #define TIOCGWINSZ _IOR('t', 104, struct winsize) | ||
43 | #define TIOCSTART _IO('t', 110) /* start output, like ^Q */ | ||
44 | #define TIOCSTOP _IO('t', 111) /* stop output, like ^S */ | ||
45 | #define TIOCOUTQ _IOR('t', 115, int) /* output queue size */ | ||
46 | |||
47 | #define TIOCGLTC _IOR('t', 116, struct ltchars) | ||
48 | #define TIOCSLTC _IOW('t', 117, struct ltchars) | ||
49 | #define TIOCSPGRP _IOW('t', 118, int) | ||
50 | #define TIOCGPGRP _IOR('t', 119, int) | ||
51 | |||
52 | #define TIOCEXCL 0x540C | ||
53 | #define TIOCNXCL 0x540D | ||
54 | #define TIOCSCTTY 0x540E | ||
55 | |||
56 | #define TIOCSTI 0x5412 | ||
57 | #define TIOCMGET 0x5415 | ||
58 | #define TIOCMBIS 0x5416 | ||
59 | #define TIOCMBIC 0x5417 | ||
60 | #define TIOCMSET 0x5418 | ||
61 | # define TIOCM_LE 0x001 | ||
62 | # define TIOCM_DTR 0x002 | ||
63 | # define TIOCM_RTS 0x004 | ||
64 | # define TIOCM_ST 0x008 | ||
65 | # define TIOCM_SR 0x010 | ||
66 | # define TIOCM_CTS 0x020 | ||
67 | # define TIOCM_CAR 0x040 | ||
68 | # define TIOCM_RNG 0x080 | ||
69 | # define TIOCM_DSR 0x100 | ||
70 | # define TIOCM_CD TIOCM_CAR | ||
71 | # define TIOCM_RI TIOCM_RNG | ||
72 | |||
73 | #define TIOCGSOFTCAR 0x5419 | ||
74 | #define TIOCSSOFTCAR 0x541A | ||
75 | #define TIOCLINUX 0x541C | ||
76 | #define TIOCCONS 0x541D | ||
77 | #define TIOCGSERIAL 0x541E | ||
78 | #define TIOCSSERIAL 0x541F | ||
79 | #define TIOCPKT 0x5420 | ||
80 | # define TIOCPKT_DATA 0 | ||
81 | # define TIOCPKT_FLUSHREAD 1 | ||
82 | # define TIOCPKT_FLUSHWRITE 2 | ||
83 | # define TIOCPKT_STOP 4 | ||
84 | # define TIOCPKT_START 8 | ||
85 | # define TIOCPKT_NOSTOP 16 | ||
86 | # define TIOCPKT_DOSTOP 32 | ||
87 | |||
88 | |||
89 | #define TIOCNOTTY 0x5422 | ||
90 | #define TIOCSETD 0x5423 | ||
91 | #define TIOCGETD 0x5424 | ||
92 | #define TCSBRKP 0x5425 /* Needed for POSIX tcsendbreak() */ | ||
93 | #define TIOCSBRK 0x5427 /* BSD compatibility */ | ||
94 | #define TIOCCBRK 0x5428 /* BSD compatibility */ | ||
95 | #define TIOCGSID 0x5429 /* Return the session ID of FD */ | ||
96 | #define TIOCGPTN _IOR('T',0x30, unsigned int) /* Get Pty Number (of pty-mux device) */ | ||
97 | #define TIOCSPTLCK _IOW('T',0x31, int) /* Lock/unlock Pty */ | ||
98 | |||
99 | #define TIOCSERCONFIG 0x5453 | ||
100 | #define TIOCSERGWILD 0x5454 | ||
101 | #define TIOCSERSWILD 0x5455 | ||
102 | #define TIOCGLCKTRMIOS 0x5456 | ||
103 | #define TIOCSLCKTRMIOS 0x5457 | ||
104 | #define TIOCSERGSTRUCT 0x5458 /* For debugging only */ | ||
105 | #define TIOCSERGETLSR 0x5459 /* Get line status register */ | ||
106 | /* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */ | ||
107 | # define TIOCSER_TEMT 0x01 /* Transmitter physically empty */ | ||
108 | #define TIOCSERGETMULTI 0x545A /* Get multiport config */ | ||
109 | #define TIOCSERSETMULTI 0x545B /* Set multiport config */ | ||
110 | |||
111 | #define TIOCMIWAIT 0x545C /* wait for a change on serial input line(s) */ | ||
112 | #define TIOCGICOUNT 0x545D /* read serial port inline interrupt counts */ | ||
113 | |||
114 | #endif /* _ASM_PPC64_IOCTLS_H */ | ||
diff --git a/include/asm-ppc64/iommu.h b/include/asm-ppc64/iommu.h index 729de5cc21d9..72dcf8116b04 100644 --- a/include/asm-ppc64/iommu.h +++ b/include/asm-ppc64/iommu.h | |||
@@ -104,9 +104,6 @@ extern void iommu_devnode_init_pSeries(struct device_node *dn); | |||
104 | 104 | ||
105 | #ifdef CONFIG_PPC_ISERIES | 105 | #ifdef CONFIG_PPC_ISERIES |
106 | 106 | ||
107 | /* Initializes tables for bio buses */ | ||
108 | extern void __init iommu_vio_init(void); | ||
109 | |||
110 | struct iSeries_Device_Node; | 107 | struct iSeries_Device_Node; |
111 | /* Creates table for an individual device node */ | 108 | /* Creates table for an individual device node */ |
112 | extern void iommu_devnode_init_iSeries(struct iSeries_Device_Node *dn); | 109 | extern void iommu_devnode_init_iSeries(struct iSeries_Device_Node *dn); |
diff --git a/include/asm-ppc64/ipc.h b/include/asm-ppc64/ipc.h deleted file mode 100644 index a46e3d9c2a3f..000000000000 --- a/include/asm-ppc64/ipc.h +++ /dev/null | |||
@@ -1 +0,0 @@ | |||
1 | #include <asm-generic/ipc.h> | ||
diff --git a/include/asm-ppc64/linkage.h b/include/asm-ppc64/linkage.h deleted file mode 100644 index 291c2d01c44f..000000000000 --- a/include/asm-ppc64/linkage.h +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | #ifndef __ASM_LINKAGE_H | ||
2 | #define __ASM_LINKAGE_H | ||
3 | |||
4 | /* Nothing to see here... */ | ||
5 | |||
6 | #endif | ||
diff --git a/include/asm-ppc64/lmb.h b/include/asm-ppc64/lmb.h index a6cbca21ac1d..cb368bf0f264 100644 --- a/include/asm-ppc64/lmb.h +++ b/include/asm-ppc64/lmb.h | |||
@@ -22,7 +22,6 @@ | |||
22 | 22 | ||
23 | struct lmb_property { | 23 | struct lmb_property { |
24 | unsigned long base; | 24 | unsigned long base; |
25 | unsigned long physbase; | ||
26 | unsigned long size; | 25 | unsigned long size; |
27 | }; | 26 | }; |
28 | 27 | ||
diff --git a/include/asm-ppc64/machdep.h b/include/asm-ppc64/machdep.h index f0ef06375947..ff2c9287d3b6 100644 --- a/include/asm-ppc64/machdep.h +++ b/include/asm-ppc64/machdep.h | |||
@@ -140,6 +140,9 @@ struct machdep_calls { | |||
140 | 140 | ||
141 | /* Idle loop for this platform, leave empty for default idle loop */ | 141 | /* Idle loop for this platform, leave empty for default idle loop */ |
142 | int (*idle_loop)(void); | 142 | int (*idle_loop)(void); |
143 | |||
144 | /* Function to enable pmcs for this platform, called once per cpu. */ | ||
145 | void (*enable_pmcs)(void); | ||
143 | }; | 146 | }; |
144 | 147 | ||
145 | extern int default_idle(void); | 148 | extern int default_idle(void); |
diff --git a/include/asm-ppc64/mmu.h b/include/asm-ppc64/mmu.h index 70348a851313..ad36bb28de29 100644 --- a/include/asm-ppc64/mmu.h +++ b/include/asm-ppc64/mmu.h | |||
@@ -28,9 +28,12 @@ | |||
28 | #define STE_VSID_SHIFT 12 | 28 | #define STE_VSID_SHIFT 12 |
29 | 29 | ||
30 | /* Location of cpu0's segment table */ | 30 | /* Location of cpu0's segment table */ |
31 | #define STAB0_PAGE 0x9 | 31 | #define STAB0_PAGE 0x6 |
32 | #define STAB0_PHYS_ADDR (STAB0_PAGE<<PAGE_SHIFT) | 32 | #define STAB0_PHYS_ADDR (STAB0_PAGE<<PAGE_SHIFT) |
33 | #define STAB0_VIRT_ADDR (KERNELBASE+STAB0_PHYS_ADDR) | 33 | |
34 | #ifndef __ASSEMBLY__ | ||
35 | extern char initial_stab[]; | ||
36 | #endif /* ! __ASSEMBLY */ | ||
34 | 37 | ||
35 | /* | 38 | /* |
36 | * SLB | 39 | * SLB |
@@ -259,8 +262,10 @@ extern void stabs_alloc(void); | |||
259 | #define VSID_BITS 36 | 262 | #define VSID_BITS 36 |
260 | #define VSID_MODULUS ((1UL<<VSID_BITS)-1) | 263 | #define VSID_MODULUS ((1UL<<VSID_BITS)-1) |
261 | 264 | ||
262 | #define CONTEXT_BITS 20 | 265 | #define CONTEXT_BITS 19 |
263 | #define USER_ESID_BITS 15 | 266 | #define USER_ESID_BITS 16 |
267 | |||
268 | #define USER_VSID_RANGE (1UL << (USER_ESID_BITS + SID_SHIFT)) | ||
264 | 269 | ||
265 | /* | 270 | /* |
266 | * This macro generates asm code to compute the VSID scramble | 271 | * This macro generates asm code to compute the VSID scramble |
@@ -302,8 +307,7 @@ typedef unsigned long mm_context_id_t; | |||
302 | typedef struct { | 307 | typedef struct { |
303 | mm_context_id_t id; | 308 | mm_context_id_t id; |
304 | #ifdef CONFIG_HUGETLB_PAGE | 309 | #ifdef CONFIG_HUGETLB_PAGE |
305 | pgd_t *huge_pgdir; | 310 | u16 low_htlb_areas, high_htlb_areas; |
306 | u16 htlb_segs; /* bitmask */ | ||
307 | #endif | 311 | #endif |
308 | } mm_context_t; | 312 | } mm_context_t; |
309 | 313 | ||
diff --git a/include/asm-ppc64/naca.h b/include/asm-ppc64/naca.h index bfb7caa32eaf..d2afe6447597 100644 --- a/include/asm-ppc64/naca.h +++ b/include/asm-ppc64/naca.h | |||
@@ -12,8 +12,6 @@ | |||
12 | 12 | ||
13 | #include <asm/types.h> | 13 | #include <asm/types.h> |
14 | 14 | ||
15 | #ifndef __ASSEMBLY__ | ||
16 | |||
17 | struct naca_struct { | 15 | struct naca_struct { |
18 | /* Kernel only data - undefined for user space */ | 16 | /* Kernel only data - undefined for user space */ |
19 | void *xItVpdAreas; /* VPD Data 0x00 */ | 17 | void *xItVpdAreas; /* VPD Data 0x00 */ |
@@ -23,9 +21,4 @@ struct naca_struct { | |||
23 | 21 | ||
24 | extern struct naca_struct naca; | 22 | extern struct naca_struct naca; |
25 | 23 | ||
26 | #endif /* __ASSEMBLY__ */ | ||
27 | |||
28 | #define NACA_PAGE 0x4 | ||
29 | #define NACA_PHYS_ADDR (NACA_PAGE<<PAGE_SHIFT) | ||
30 | |||
31 | #endif /* _NACA_H */ | 24 | #endif /* _NACA_H */ |
diff --git a/include/asm-ppc64/namei.h b/include/asm-ppc64/namei.h deleted file mode 100644 index a1412a2d102a..000000000000 --- a/include/asm-ppc64/namei.h +++ /dev/null | |||
@@ -1,23 +0,0 @@ | |||
1 | /* | ||
2 | * linux/include/asm-ppc/namei.h | ||
3 | * Adapted from linux/include/asm-alpha/namei.h | ||
4 | * | ||
5 | * Included from linux/fs/namei.c | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License | ||
9 | * as published by the Free Software Foundation; either version | ||
10 | * 2 of the License, or (at your option) any later version. | ||
11 | */ | ||
12 | |||
13 | #ifndef __PPC64_NAMEI_H | ||
14 | #define __PPC64_NAMEI_H | ||
15 | |||
16 | /* This dummy routine maybe changed to something useful | ||
17 | * for /usr/gnemul/ emulation stuff. | ||
18 | * Look at asm-sparc/namei.h for details. | ||
19 | */ | ||
20 | |||
21 | #define __emul_prefix() NULL | ||
22 | |||
23 | #endif /* __PPC64_NAMEI_H */ | ||
diff --git a/include/asm-ppc64/page.h b/include/asm-ppc64/page.h index a5893a305a09..a79a08df62bd 100644 --- a/include/asm-ppc64/page.h +++ b/include/asm-ppc64/page.h | |||
@@ -37,39 +37,45 @@ | |||
37 | 37 | ||
38 | #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) | 38 | #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) |
39 | 39 | ||
40 | /* For 64-bit processes the hugepage range is 1T-1.5T */ | 40 | #define HTLB_AREA_SHIFT 40 |
41 | #define TASK_HPAGE_BASE ASM_CONST(0x0000010000000000) | 41 | #define HTLB_AREA_SIZE (1UL << HTLB_AREA_SHIFT) |
42 | #define TASK_HPAGE_END ASM_CONST(0x0000018000000000) | 42 | #define GET_HTLB_AREA(x) ((x) >> HTLB_AREA_SHIFT) |
43 | 43 | ||
44 | #define LOW_ESID_MASK(addr, len) (((1U << (GET_ESID(addr+len-1)+1)) \ | 44 | #define LOW_ESID_MASK(addr, len) (((1U << (GET_ESID(addr+len-1)+1)) \ |
45 | - (1U << GET_ESID(addr))) & 0xffff) | 45 | - (1U << GET_ESID(addr))) & 0xffff) |
46 | #define HTLB_AREA_MASK(addr, len) (((1U << (GET_HTLB_AREA(addr+len-1)+1)) \ | ||
47 | - (1U << GET_HTLB_AREA(addr))) & 0xffff) | ||
46 | 48 | ||
47 | #define ARCH_HAS_HUGEPAGE_ONLY_RANGE | 49 | #define ARCH_HAS_HUGEPAGE_ONLY_RANGE |
48 | #define ARCH_HAS_PREPARE_HUGEPAGE_RANGE | 50 | #define ARCH_HAS_PREPARE_HUGEPAGE_RANGE |
51 | #define ARCH_HAS_SETCLEAR_HUGE_PTE | ||
49 | 52 | ||
50 | #define touches_hugepage_low_range(mm, addr, len) \ | 53 | #define touches_hugepage_low_range(mm, addr, len) \ |
51 | (LOW_ESID_MASK((addr), (len)) & mm->context.htlb_segs) | 54 | (LOW_ESID_MASK((addr), (len)) & (mm)->context.low_htlb_areas) |
52 | #define touches_hugepage_high_range(addr, len) \ | 55 | #define touches_hugepage_high_range(mm, addr, len) \ |
53 | (((addr) > (TASK_HPAGE_BASE-(len))) && ((addr) < TASK_HPAGE_END)) | 56 | (HTLB_AREA_MASK((addr), (len)) & (mm)->context.high_htlb_areas) |
54 | 57 | ||
55 | #define __within_hugepage_low_range(addr, len, segmask) \ | 58 | #define __within_hugepage_low_range(addr, len, segmask) \ |
56 | ((LOW_ESID_MASK((addr), (len)) | (segmask)) == (segmask)) | 59 | ((LOW_ESID_MASK((addr), (len)) | (segmask)) == (segmask)) |
57 | #define within_hugepage_low_range(addr, len) \ | 60 | #define within_hugepage_low_range(addr, len) \ |
58 | __within_hugepage_low_range((addr), (len), \ | 61 | __within_hugepage_low_range((addr), (len), \ |
59 | current->mm->context.htlb_segs) | 62 | current->mm->context.low_htlb_areas) |
60 | #define within_hugepage_high_range(addr, len) (((addr) >= TASK_HPAGE_BASE) \ | 63 | #define __within_hugepage_high_range(addr, len, zonemask) \ |
61 | && ((addr)+(len) <= TASK_HPAGE_END) && ((addr)+(len) >= (addr))) | 64 | ((HTLB_AREA_MASK((addr), (len)) | (zonemask)) == (zonemask)) |
65 | #define within_hugepage_high_range(addr, len) \ | ||
66 | __within_hugepage_high_range((addr), (len), \ | ||
67 | current->mm->context.high_htlb_areas) | ||
62 | 68 | ||
63 | #define is_hugepage_only_range(mm, addr, len) \ | 69 | #define is_hugepage_only_range(mm, addr, len) \ |
64 | (touches_hugepage_high_range((addr), (len)) || \ | 70 | (touches_hugepage_high_range((mm), (addr), (len)) || \ |
65 | touches_hugepage_low_range((mm), (addr), (len))) | 71 | touches_hugepage_low_range((mm), (addr), (len))) |
66 | #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA | 72 | #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA |
67 | 73 | ||
68 | #define in_hugepage_area(context, addr) \ | 74 | #define in_hugepage_area(context, addr) \ |
69 | (cpu_has_feature(CPU_FTR_16M_PAGE) && \ | 75 | (cpu_has_feature(CPU_FTR_16M_PAGE) && \ |
70 | ( (((addr) >= TASK_HPAGE_BASE) && ((addr) < TASK_HPAGE_END)) || \ | 76 | ( ((1 << GET_HTLB_AREA(addr)) & (context).high_htlb_areas) || \ |
71 | ( ((addr) < 0x100000000L) && \ | 77 | ( ((addr) < 0x100000000L) && \ |
72 | ((1 << GET_ESID(addr)) & (context).htlb_segs) ) ) ) | 78 | ((1 << GET_ESID(addr)) & (context).low_htlb_areas) ) ) ) |
73 | 79 | ||
74 | #else /* !CONFIG_HUGETLB_PAGE */ | 80 | #else /* !CONFIG_HUGETLB_PAGE */ |
75 | 81 | ||
@@ -125,36 +131,42 @@ extern void copy_user_page(void *to, void *from, unsigned long vaddr, struct pag | |||
125 | * Entries in the pte table are 64b, while entries in the pgd & pmd are 32b. | 131 | * Entries in the pte table are 64b, while entries in the pgd & pmd are 32b. |
126 | */ | 132 | */ |
127 | typedef struct { unsigned long pte; } pte_t; | 133 | typedef struct { unsigned long pte; } pte_t; |
128 | typedef struct { unsigned int pmd; } pmd_t; | 134 | typedef struct { unsigned long pmd; } pmd_t; |
129 | typedef struct { unsigned int pgd; } pgd_t; | 135 | typedef struct { unsigned long pud; } pud_t; |
136 | typedef struct { unsigned long pgd; } pgd_t; | ||
130 | typedef struct { unsigned long pgprot; } pgprot_t; | 137 | typedef struct { unsigned long pgprot; } pgprot_t; |
131 | 138 | ||
132 | #define pte_val(x) ((x).pte) | 139 | #define pte_val(x) ((x).pte) |
133 | #define pmd_val(x) ((x).pmd) | 140 | #define pmd_val(x) ((x).pmd) |
141 | #define pud_val(x) ((x).pud) | ||
134 | #define pgd_val(x) ((x).pgd) | 142 | #define pgd_val(x) ((x).pgd) |
135 | #define pgprot_val(x) ((x).pgprot) | 143 | #define pgprot_val(x) ((x).pgprot) |
136 | 144 | ||
137 | #define __pte(x) ((pte_t) { (x) } ) | 145 | #define __pte(x) ((pte_t) { (x) }) |
138 | #define __pmd(x) ((pmd_t) { (x) } ) | 146 | #define __pmd(x) ((pmd_t) { (x) }) |
139 | #define __pgd(x) ((pgd_t) { (x) } ) | 147 | #define __pud(x) ((pud_t) { (x) }) |
140 | #define __pgprot(x) ((pgprot_t) { (x) } ) | 148 | #define __pgd(x) ((pgd_t) { (x) }) |
149 | #define __pgprot(x) ((pgprot_t) { (x) }) | ||
141 | 150 | ||
142 | #else | 151 | #else |
143 | /* | 152 | /* |
144 | * .. while these make it easier on the compiler | 153 | * .. while these make it easier on the compiler |
145 | */ | 154 | */ |
146 | typedef unsigned long pte_t; | 155 | typedef unsigned long pte_t; |
147 | typedef unsigned int pmd_t; | 156 | typedef unsigned long pmd_t; |
148 | typedef unsigned int pgd_t; | 157 | typedef unsigned long pud_t; |
158 | typedef unsigned long pgd_t; | ||
149 | typedef unsigned long pgprot_t; | 159 | typedef unsigned long pgprot_t; |
150 | 160 | ||
151 | #define pte_val(x) (x) | 161 | #define pte_val(x) (x) |
152 | #define pmd_val(x) (x) | 162 | #define pmd_val(x) (x) |
163 | #define pud_val(x) (x) | ||
153 | #define pgd_val(x) (x) | 164 | #define pgd_val(x) (x) |
154 | #define pgprot_val(x) (x) | 165 | #define pgprot_val(x) (x) |
155 | 166 | ||
156 | #define __pte(x) (x) | 167 | #define __pte(x) (x) |
157 | #define __pmd(x) (x) | 168 | #define __pmd(x) (x) |
169 | #define __pud(x) (x) | ||
158 | #define __pgd(x) (x) | 170 | #define __pgd(x) (x) |
159 | #define __pgprot(x) (x) | 171 | #define __pgprot(x) (x) |
160 | 172 | ||
@@ -208,9 +220,6 @@ extern u64 ppc64_pft_size; /* Log 2 of page table size */ | |||
208 | #define USER_REGION_ID (0UL) | 220 | #define USER_REGION_ID (0UL) |
209 | #define REGION_ID(ea) (((unsigned long)(ea)) >> REGION_SHIFT) | 221 | #define REGION_ID(ea) (((unsigned long)(ea)) >> REGION_SHIFT) |
210 | 222 | ||
211 | #define __bpn_to_ba(x) ((((unsigned long)(x)) << PAGE_SHIFT) + KERNELBASE) | ||
212 | #define __ba_to_bpn(x) ((((unsigned long)(x)) & ~REGION_MASK) >> PAGE_SHIFT) | ||
213 | |||
214 | #define __va(x) ((void *)((unsigned long)(x) + KERNELBASE)) | 223 | #define __va(x) ((void *)((unsigned long)(x) + KERNELBASE)) |
215 | 224 | ||
216 | #ifdef CONFIG_DISCONTIGMEM | 225 | #ifdef CONFIG_DISCONTIGMEM |
diff --git a/include/asm-ppc64/param.h b/include/asm-ppc64/param.h index 1fad38dcf707..76c212d475b3 100644 --- a/include/asm-ppc64/param.h +++ b/include/asm-ppc64/param.h | |||
@@ -1,6 +1,8 @@ | |||
1 | #ifndef _ASM_PPC64_PARAM_H | 1 | #ifndef _ASM_PPC64_PARAM_H |
2 | #define _ASM_PPC64_PARAM_H | 2 | #define _ASM_PPC64_PARAM_H |
3 | 3 | ||
4 | #include <linux/config.h> | ||
5 | |||
4 | /* | 6 | /* |
5 | * This program is free software; you can redistribute it and/or | 7 | * This program is free software; you can redistribute it and/or |
6 | * modify it under the terms of the GNU General Public License | 8 | * modify it under the terms of the GNU General Public License |
@@ -9,7 +11,7 @@ | |||
9 | */ | 11 | */ |
10 | 12 | ||
11 | #ifdef __KERNEL__ | 13 | #ifdef __KERNEL__ |
12 | # define HZ 1000 /* Internal kernel timer frequency */ | 14 | # define HZ CONFIG_HZ /* Internal kernel timer frequency */ |
13 | # define USER_HZ 100 /* .. some user interfaces are in "ticks" */ | 15 | # define USER_HZ 100 /* .. some user interfaces are in "ticks" */ |
14 | # define CLOCKS_PER_SEC (USER_HZ) /* like times() */ | 16 | # define CLOCKS_PER_SEC (USER_HZ) /* like times() */ |
15 | #endif | 17 | #endif |
diff --git a/include/asm-ppc64/percpu.h b/include/asm-ppc64/percpu.h deleted file mode 100644 index 60a659a4ce1f..000000000000 --- a/include/asm-ppc64/percpu.h +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | #ifndef __ARCH_PPC64_PERCPU__ | ||
2 | #define __ARCH_PPC64_PERCPU__ | ||
3 | |||
4 | #include <asm-generic/percpu.h> | ||
5 | |||
6 | #endif /* __ARCH_PPC64_PERCPU__ */ | ||
diff --git a/include/asm-ppc64/pgalloc.h b/include/asm-ppc64/pgalloc.h index 4fc4b739b380..26bc49c1108d 100644 --- a/include/asm-ppc64/pgalloc.h +++ b/include/asm-ppc64/pgalloc.h | |||
@@ -6,7 +6,12 @@ | |||
6 | #include <linux/cpumask.h> | 6 | #include <linux/cpumask.h> |
7 | #include <linux/percpu.h> | 7 | #include <linux/percpu.h> |
8 | 8 | ||
9 | extern kmem_cache_t *zero_cache; | 9 | extern kmem_cache_t *pgtable_cache[]; |
10 | |||
11 | #define PTE_CACHE_NUM 0 | ||
12 | #define PMD_CACHE_NUM 1 | ||
13 | #define PUD_CACHE_NUM 1 | ||
14 | #define PGD_CACHE_NUM 0 | ||
10 | 15 | ||
11 | /* | 16 | /* |
12 | * This program is free software; you can redistribute it and/or | 17 | * This program is free software; you can redistribute it and/or |
@@ -15,30 +20,40 @@ extern kmem_cache_t *zero_cache; | |||
15 | * 2 of the License, or (at your option) any later version. | 20 | * 2 of the License, or (at your option) any later version. |
16 | */ | 21 | */ |
17 | 22 | ||
18 | static inline pgd_t * | 23 | static inline pgd_t *pgd_alloc(struct mm_struct *mm) |
19 | pgd_alloc(struct mm_struct *mm) | ||
20 | { | 24 | { |
21 | return kmem_cache_alloc(zero_cache, GFP_KERNEL); | 25 | return kmem_cache_alloc(pgtable_cache[PGD_CACHE_NUM], GFP_KERNEL); |
22 | } | 26 | } |
23 | 27 | ||
24 | static inline void | 28 | static inline void pgd_free(pgd_t *pgd) |
25 | pgd_free(pgd_t *pgd) | ||
26 | { | 29 | { |
27 | kmem_cache_free(zero_cache, pgd); | 30 | kmem_cache_free(pgtable_cache[PGD_CACHE_NUM], pgd); |
31 | } | ||
32 | |||
33 | #define pgd_populate(MM, PGD, PUD) pgd_set(PGD, PUD) | ||
34 | |||
35 | static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) | ||
36 | { | ||
37 | return kmem_cache_alloc(pgtable_cache[PUD_CACHE_NUM], | ||
38 | GFP_KERNEL|__GFP_REPEAT); | ||
39 | } | ||
40 | |||
41 | static inline void pud_free(pud_t *pud) | ||
42 | { | ||
43 | kmem_cache_free(pgtable_cache[PUD_CACHE_NUM], pud); | ||
28 | } | 44 | } |
29 | 45 | ||
30 | #define pud_populate(MM, PUD, PMD) pud_set(PUD, PMD) | 46 | #define pud_populate(MM, PUD, PMD) pud_set(PUD, PMD) |
31 | 47 | ||
32 | static inline pmd_t * | 48 | static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) |
33 | pmd_alloc_one(struct mm_struct *mm, unsigned long addr) | ||
34 | { | 49 | { |
35 | return kmem_cache_alloc(zero_cache, GFP_KERNEL|__GFP_REPEAT); | 50 | return kmem_cache_alloc(pgtable_cache[PMD_CACHE_NUM], |
51 | GFP_KERNEL|__GFP_REPEAT); | ||
36 | } | 52 | } |
37 | 53 | ||
38 | static inline void | 54 | static inline void pmd_free(pmd_t *pmd) |
39 | pmd_free(pmd_t *pmd) | ||
40 | { | 55 | { |
41 | kmem_cache_free(zero_cache, pmd); | 56 | kmem_cache_free(pgtable_cache[PMD_CACHE_NUM], pmd); |
42 | } | 57 | } |
43 | 58 | ||
44 | #define pmd_populate_kernel(mm, pmd, pte) pmd_set(pmd, pte) | 59 | #define pmd_populate_kernel(mm, pmd, pte) pmd_set(pmd, pte) |
@@ -47,44 +62,58 @@ pmd_free(pmd_t *pmd) | |||
47 | 62 | ||
48 | static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) | 63 | static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) |
49 | { | 64 | { |
50 | return kmem_cache_alloc(zero_cache, GFP_KERNEL|__GFP_REPEAT); | 65 | return kmem_cache_alloc(pgtable_cache[PTE_CACHE_NUM], |
66 | GFP_KERNEL|__GFP_REPEAT); | ||
51 | } | 67 | } |
52 | 68 | ||
53 | static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) | 69 | static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) |
54 | { | 70 | { |
55 | pte_t *pte = kmem_cache_alloc(zero_cache, GFP_KERNEL|__GFP_REPEAT); | 71 | return virt_to_page(pte_alloc_one_kernel(mm, address)); |
56 | if (pte) | ||
57 | return virt_to_page(pte); | ||
58 | return NULL; | ||
59 | } | 72 | } |
60 | 73 | ||
61 | static inline void pte_free_kernel(pte_t *pte) | 74 | static inline void pte_free_kernel(pte_t *pte) |
62 | { | 75 | { |
63 | kmem_cache_free(zero_cache, pte); | 76 | kmem_cache_free(pgtable_cache[PTE_CACHE_NUM], pte); |
64 | } | 77 | } |
65 | 78 | ||
66 | static inline void pte_free(struct page *ptepage) | 79 | static inline void pte_free(struct page *ptepage) |
67 | { | 80 | { |
68 | kmem_cache_free(zero_cache, page_address(ptepage)); | 81 | pte_free_kernel(page_address(ptepage)); |
69 | } | 82 | } |
70 | 83 | ||
71 | struct pte_freelist_batch | 84 | #define PGF_CACHENUM_MASK 0xf |
85 | |||
86 | typedef struct pgtable_free { | ||
87 | unsigned long val; | ||
88 | } pgtable_free_t; | ||
89 | |||
90 | static inline pgtable_free_t pgtable_free_cache(void *p, int cachenum, | ||
91 | unsigned long mask) | ||
72 | { | 92 | { |
73 | struct rcu_head rcu; | 93 | BUG_ON(cachenum > PGF_CACHENUM_MASK); |
74 | unsigned int index; | ||
75 | struct page * pages[0]; | ||
76 | }; | ||
77 | 94 | ||
78 | #define PTE_FREELIST_SIZE ((PAGE_SIZE - sizeof(struct pte_freelist_batch)) / \ | 95 | return (pgtable_free_t){.val = ((unsigned long) p & ~mask) | cachenum}; |
79 | sizeof(struct page *)) | 96 | } |
80 | 97 | ||
81 | extern void pte_free_now(struct page *ptepage); | 98 | static inline void pgtable_free(pgtable_free_t pgf) |
82 | extern void pte_free_submit(struct pte_freelist_batch *batch); | 99 | { |
100 | void *p = (void *)(pgf.val & ~PGF_CACHENUM_MASK); | ||
101 | int cachenum = pgf.val & PGF_CACHENUM_MASK; | ||
83 | 102 | ||
84 | DECLARE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur); | 103 | kmem_cache_free(pgtable_cache[cachenum], p); |
104 | } | ||
85 | 105 | ||
86 | void __pte_free_tlb(struct mmu_gather *tlb, struct page *ptepage); | 106 | void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf); |
87 | #define __pmd_free_tlb(tlb, pmd) __pte_free_tlb(tlb, virt_to_page(pmd)) | 107 | |
108 | #define __pte_free_tlb(tlb, ptepage) \ | ||
109 | pgtable_free_tlb(tlb, pgtable_free_cache(page_address(ptepage), \ | ||
110 | PTE_CACHE_NUM, PTE_TABLE_SIZE-1)) | ||
111 | #define __pmd_free_tlb(tlb, pmd) \ | ||
112 | pgtable_free_tlb(tlb, pgtable_free_cache(pmd, \ | ||
113 | PMD_CACHE_NUM, PMD_TABLE_SIZE-1)) | ||
114 | #define __pud_free_tlb(tlb, pmd) \ | ||
115 | pgtable_free_tlb(tlb, pgtable_free_cache(pud, \ | ||
116 | PUD_CACHE_NUM, PUD_TABLE_SIZE-1)) | ||
88 | 117 | ||
89 | #define check_pgt_cache() do { } while (0) | 118 | #define check_pgt_cache() do { } while (0) |
90 | 119 | ||
diff --git a/include/asm-ppc64/pgtable.h b/include/asm-ppc64/pgtable.h index 46cf61c2ff69..c83679c9d2b0 100644 --- a/include/asm-ppc64/pgtable.h +++ b/include/asm-ppc64/pgtable.h | |||
@@ -15,19 +15,24 @@ | |||
15 | #include <asm/tlbflush.h> | 15 | #include <asm/tlbflush.h> |
16 | #endif /* __ASSEMBLY__ */ | 16 | #endif /* __ASSEMBLY__ */ |
17 | 17 | ||
18 | #include <asm-generic/pgtable-nopud.h> | ||
19 | |||
20 | /* | 18 | /* |
21 | * Entries per page directory level. The PTE level must use a 64b record | 19 | * Entries per page directory level. The PTE level must use a 64b record |
22 | * for each page table entry. The PMD and PGD level use a 32b record for | 20 | * for each page table entry. The PMD and PGD level use a 32b record for |
23 | * each entry by assuming that each entry is page aligned. | 21 | * each entry by assuming that each entry is page aligned. |
24 | */ | 22 | */ |
25 | #define PTE_INDEX_SIZE 9 | 23 | #define PTE_INDEX_SIZE 9 |
26 | #define PMD_INDEX_SIZE 10 | 24 | #define PMD_INDEX_SIZE 7 |
27 | #define PGD_INDEX_SIZE 10 | 25 | #define PUD_INDEX_SIZE 7 |
26 | #define PGD_INDEX_SIZE 9 | ||
27 | |||
28 | #define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE) | ||
29 | #define PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE) | ||
30 | #define PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE) | ||
31 | #define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE) | ||
28 | 32 | ||
29 | #define PTRS_PER_PTE (1 << PTE_INDEX_SIZE) | 33 | #define PTRS_PER_PTE (1 << PTE_INDEX_SIZE) |
30 | #define PTRS_PER_PMD (1 << PMD_INDEX_SIZE) | 34 | #define PTRS_PER_PMD (1 << PMD_INDEX_SIZE) |
35 | #define PTRS_PER_PUD (1 << PMD_INDEX_SIZE) | ||
31 | #define PTRS_PER_PGD (1 << PGD_INDEX_SIZE) | 36 | #define PTRS_PER_PGD (1 << PGD_INDEX_SIZE) |
32 | 37 | ||
33 | /* PMD_SHIFT determines what a second-level page table entry can map */ | 38 | /* PMD_SHIFT determines what a second-level page table entry can map */ |
@@ -35,8 +40,13 @@ | |||
35 | #define PMD_SIZE (1UL << PMD_SHIFT) | 40 | #define PMD_SIZE (1UL << PMD_SHIFT) |
36 | #define PMD_MASK (~(PMD_SIZE-1)) | 41 | #define PMD_MASK (~(PMD_SIZE-1)) |
37 | 42 | ||
38 | /* PGDIR_SHIFT determines what a third-level page table entry can map */ | 43 | /* PUD_SHIFT determines what a third-level page table entry can map */ |
39 | #define PGDIR_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE) | 44 | #define PUD_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE) |
45 | #define PUD_SIZE (1UL << PUD_SHIFT) | ||
46 | #define PUD_MASK (~(PUD_SIZE-1)) | ||
47 | |||
48 | /* PGDIR_SHIFT determines what a fourth-level page table entry can map */ | ||
49 | #define PGDIR_SHIFT (PUD_SHIFT + PUD_INDEX_SIZE) | ||
40 | #define PGDIR_SIZE (1UL << PGDIR_SHIFT) | 50 | #define PGDIR_SIZE (1UL << PGDIR_SHIFT) |
41 | #define PGDIR_MASK (~(PGDIR_SIZE-1)) | 51 | #define PGDIR_MASK (~(PGDIR_SIZE-1)) |
42 | 52 | ||
@@ -45,15 +55,23 @@ | |||
45 | /* | 55 | /* |
46 | * Size of EA range mapped by our pagetables. | 56 | * Size of EA range mapped by our pagetables. |
47 | */ | 57 | */ |
48 | #define EADDR_SIZE (PTE_INDEX_SIZE + PMD_INDEX_SIZE + \ | 58 | #define PGTABLE_EADDR_SIZE (PTE_INDEX_SIZE + PMD_INDEX_SIZE + \ |
49 | PGD_INDEX_SIZE + PAGE_SHIFT) | 59 | PUD_INDEX_SIZE + PGD_INDEX_SIZE + PAGE_SHIFT) |
50 | #define EADDR_MASK ((1UL << EADDR_SIZE) - 1) | 60 | #define PGTABLE_RANGE (1UL << PGTABLE_EADDR_SIZE) |
61 | |||
62 | #if TASK_SIZE_USER64 > PGTABLE_RANGE | ||
63 | #error TASK_SIZE_USER64 exceeds pagetable range | ||
64 | #endif | ||
65 | |||
66 | #if TASK_SIZE_USER64 > (1UL << (USER_ESID_BITS + SID_SHIFT)) | ||
67 | #error TASK_SIZE_USER64 exceeds user VSID range | ||
68 | #endif | ||
51 | 69 | ||
52 | /* | 70 | /* |
53 | * Define the address range of the vmalloc VM area. | 71 | * Define the address range of the vmalloc VM area. |
54 | */ | 72 | */ |
55 | #define VMALLOC_START (0xD000000000000000ul) | 73 | #define VMALLOC_START (0xD000000000000000ul) |
56 | #define VMALLOC_SIZE (0x10000000000UL) | 74 | #define VMALLOC_SIZE (0x80000000000UL) |
57 | #define VMALLOC_END (VMALLOC_START + VMALLOC_SIZE) | 75 | #define VMALLOC_END (VMALLOC_START + VMALLOC_SIZE) |
58 | 76 | ||
59 | /* | 77 | /* |
@@ -154,8 +172,6 @@ extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)]; | |||
154 | #ifndef __ASSEMBLY__ | 172 | #ifndef __ASSEMBLY__ |
155 | int hash_huge_page(struct mm_struct *mm, unsigned long access, | 173 | int hash_huge_page(struct mm_struct *mm, unsigned long access, |
156 | unsigned long ea, unsigned long vsid, int local); | 174 | unsigned long ea, unsigned long vsid, int local); |
157 | |||
158 | void hugetlb_mm_free_pgd(struct mm_struct *mm); | ||
159 | #endif /* __ASSEMBLY__ */ | 175 | #endif /* __ASSEMBLY__ */ |
160 | 176 | ||
161 | #define HAVE_ARCH_UNMAPPED_AREA | 177 | #define HAVE_ARCH_UNMAPPED_AREA |
@@ -163,7 +179,6 @@ void hugetlb_mm_free_pgd(struct mm_struct *mm); | |||
163 | #else | 179 | #else |
164 | 180 | ||
165 | #define hash_huge_page(mm,a,ea,vsid,local) -1 | 181 | #define hash_huge_page(mm,a,ea,vsid,local) -1 |
166 | #define hugetlb_mm_free_pgd(mm) do {} while (0) | ||
167 | 182 | ||
168 | #endif | 183 | #endif |
169 | 184 | ||
@@ -197,39 +212,45 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot) | |||
197 | #define pte_pfn(x) ((unsigned long)((pte_val(x) >> PTE_SHIFT))) | 212 | #define pte_pfn(x) ((unsigned long)((pte_val(x) >> PTE_SHIFT))) |
198 | #define pte_page(x) pfn_to_page(pte_pfn(x)) | 213 | #define pte_page(x) pfn_to_page(pte_pfn(x)) |
199 | 214 | ||
200 | #define pmd_set(pmdp, ptep) \ | 215 | #define pmd_set(pmdp, ptep) ({BUG_ON((u64)ptep < KERNELBASE); pmd_val(*(pmdp)) = (unsigned long)(ptep);}) |
201 | (pmd_val(*(pmdp)) = __ba_to_bpn(ptep)) | ||
202 | #define pmd_none(pmd) (!pmd_val(pmd)) | 216 | #define pmd_none(pmd) (!pmd_val(pmd)) |
203 | #define pmd_bad(pmd) (pmd_val(pmd) == 0) | 217 | #define pmd_bad(pmd) (pmd_val(pmd) == 0) |
204 | #define pmd_present(pmd) (pmd_val(pmd) != 0) | 218 | #define pmd_present(pmd) (pmd_val(pmd) != 0) |
205 | #define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0) | 219 | #define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0) |
206 | #define pmd_page_kernel(pmd) (__bpn_to_ba(pmd_val(pmd))) | 220 | #define pmd_page_kernel(pmd) (pmd_val(pmd)) |
207 | #define pmd_page(pmd) virt_to_page(pmd_page_kernel(pmd)) | 221 | #define pmd_page(pmd) virt_to_page(pmd_page_kernel(pmd)) |
208 | 222 | ||
209 | #define pud_set(pudp, pmdp) (pud_val(*(pudp)) = (__ba_to_bpn(pmdp))) | 223 | #define pud_set(pudp, pmdp) (pud_val(*(pudp)) = (unsigned long)(pmdp)) |
210 | #define pud_none(pud) (!pud_val(pud)) | 224 | #define pud_none(pud) (!pud_val(pud)) |
211 | #define pud_bad(pud) ((pud_val(pud)) == 0UL) | 225 | #define pud_bad(pud) ((pud_val(pud)) == 0) |
212 | #define pud_present(pud) (pud_val(pud) != 0UL) | 226 | #define pud_present(pud) (pud_val(pud) != 0) |
213 | #define pud_clear(pudp) (pud_val(*(pudp)) = 0UL) | 227 | #define pud_clear(pudp) (pud_val(*(pudp)) = 0) |
214 | #define pud_page(pud) (__bpn_to_ba(pud_val(pud))) | 228 | #define pud_page(pud) (pud_val(pud)) |
229 | |||
230 | #define pgd_set(pgdp, pudp) ({pgd_val(*(pgdp)) = (unsigned long)(pudp);}) | ||
231 | #define pgd_none(pgd) (!pgd_val(pgd)) | ||
232 | #define pgd_bad(pgd) (pgd_val(pgd) == 0) | ||
233 | #define pgd_present(pgd) (pgd_val(pgd) != 0) | ||
234 | #define pgd_clear(pgdp) (pgd_val(*(pgdp)) = 0) | ||
235 | #define pgd_page(pgd) (pgd_val(pgd)) | ||
215 | 236 | ||
216 | /* | 237 | /* |
217 | * Find an entry in a page-table-directory. We combine the address region | 238 | * Find an entry in a page-table-directory. We combine the address region |
218 | * (the high order N bits) and the pgd portion of the address. | 239 | * (the high order N bits) and the pgd portion of the address. |
219 | */ | 240 | */ |
220 | /* to avoid overflow in free_pgtables we don't use PTRS_PER_PGD here */ | 241 | /* to avoid overflow in free_pgtables we don't use PTRS_PER_PGD here */ |
221 | #define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & 0x7ff) | 242 | #define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & 0x1ff) |
222 | 243 | ||
223 | #define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) | 244 | #define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) |
224 | 245 | ||
225 | /* Find an entry in the second-level page table.. */ | 246 | #define pud_offset(pgdp, addr) \ |
247 | (((pud_t *) pgd_page(*(pgdp))) + (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))) | ||
248 | |||
226 | #define pmd_offset(pudp,addr) \ | 249 | #define pmd_offset(pudp,addr) \ |
227 | ((pmd_t *) pud_page(*(pudp)) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))) | 250 | (((pmd_t *) pud_page(*(pudp))) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))) |
228 | 251 | ||
229 | /* Find an entry in the third-level page table.. */ | ||
230 | #define pte_offset_kernel(dir,addr) \ | 252 | #define pte_offset_kernel(dir,addr) \ |
231 | ((pte_t *) pmd_page_kernel(*(dir)) \ | 253 | (((pte_t *) pmd_page_kernel(*(dir))) + (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))) |
232 | + (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))) | ||
233 | 254 | ||
234 | #define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr)) | 255 | #define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr)) |
235 | #define pte_offset_map_nested(dir,addr) pte_offset_kernel((dir), (addr)) | 256 | #define pte_offset_map_nested(dir,addr) pte_offset_kernel((dir), (addr)) |
@@ -458,23 +479,20 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr, | |||
458 | #define pte_same(A,B) (((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0) | 479 | #define pte_same(A,B) (((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0) |
459 | 480 | ||
460 | #define pmd_ERROR(e) \ | 481 | #define pmd_ERROR(e) \ |
461 | printk("%s:%d: bad pmd %08x.\n", __FILE__, __LINE__, pmd_val(e)) | 482 | printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e)) |
483 | #define pud_ERROR(e) \ | ||
484 | printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pud_val(e)) | ||
462 | #define pgd_ERROR(e) \ | 485 | #define pgd_ERROR(e) \ |
463 | printk("%s:%d: bad pgd %08x.\n", __FILE__, __LINE__, pgd_val(e)) | 486 | printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) |
464 | 487 | ||
465 | extern pgd_t swapper_pg_dir[]; | 488 | extern pgd_t swapper_pg_dir[]; |
466 | 489 | ||
467 | extern void paging_init(void); | 490 | extern void paging_init(void); |
468 | 491 | ||
469 | /* | 492 | #ifdef CONFIG_HUGETLB_PAGE |
470 | * Because the huge pgtables are only 2 level, they can take | ||
471 | * at most around 4M, much less than one hugepage which the | ||
472 | * process is presumably entitled to use. So we don't bother | ||
473 | * freeing up the pagetables on unmap, and wait until | ||
474 | * destroy_context() to clean up the lot. | ||
475 | */ | ||
476 | #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) \ | 493 | #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) \ |
477 | do { } while (0) | 494 | free_pgd_range(tlb, addr, end, floor, ceiling) |
495 | #endif | ||
478 | 496 | ||
479 | /* | 497 | /* |
480 | * This gets called at the end of handling a page fault, when | 498 | * This gets called at the end of handling a page fault, when |
diff --git a/include/asm-ppc64/pmc.h b/include/asm-ppc64/pmc.h index c924748c0bea..d1d297dbccfe 100644 --- a/include/asm-ppc64/pmc.h +++ b/include/asm-ppc64/pmc.h | |||
@@ -26,4 +26,6 @@ typedef void (*perf_irq_t)(struct pt_regs *); | |||
26 | int reserve_pmc_hardware(perf_irq_t new_perf_irq); | 26 | int reserve_pmc_hardware(perf_irq_t new_perf_irq); |
27 | void release_pmc_hardware(void); | 27 | void release_pmc_hardware(void); |
28 | 28 | ||
29 | void power4_enable_pmcs(void); | ||
30 | |||
29 | #endif /* _PPC64_PMC_H */ | 31 | #endif /* _PPC64_PMC_H */ |
diff --git a/include/asm-ppc64/poll.h b/include/asm-ppc64/poll.h deleted file mode 100644 index 370fa3ba0db4..000000000000 --- a/include/asm-ppc64/poll.h +++ /dev/null | |||
@@ -1,32 +0,0 @@ | |||
1 | #ifndef __PPC64_POLL_H | ||
2 | #define __PPC64_POLL_H | ||
3 | |||
4 | /* | ||
5 | * Copyright (C) 2001 PPC64 Team, IBM Corp | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License | ||
9 | * as published by the Free Software Foundation; either version | ||
10 | * 2 of the License, or (at your option) any later version. | ||
11 | */ | ||
12 | |||
13 | #define POLLIN 0x0001 | ||
14 | #define POLLPRI 0x0002 | ||
15 | #define POLLOUT 0x0004 | ||
16 | #define POLLERR 0x0008 | ||
17 | #define POLLHUP 0x0010 | ||
18 | #define POLLNVAL 0x0020 | ||
19 | #define POLLRDNORM 0x0040 | ||
20 | #define POLLRDBAND 0x0080 | ||
21 | #define POLLWRNORM 0x0100 | ||
22 | #define POLLWRBAND 0x0200 | ||
23 | #define POLLMSG 0x0400 | ||
24 | #define POLLREMOVE 0x1000 | ||
25 | |||
26 | struct pollfd { | ||
27 | int fd; | ||
28 | short events; | ||
29 | short revents; | ||
30 | }; | ||
31 | |||
32 | #endif /* __PPC64_POLL_H */ | ||
diff --git a/include/asm-ppc64/processor.h b/include/asm-ppc64/processor.h index 352306cfb579..7bd4796f1236 100644 --- a/include/asm-ppc64/processor.h +++ b/include/asm-ppc64/processor.h | |||
@@ -268,6 +268,7 @@ | |||
268 | #define PV_970FX 0x003C | 268 | #define PV_970FX 0x003C |
269 | #define PV_630 0x0040 | 269 | #define PV_630 0x0040 |
270 | #define PV_630p 0x0041 | 270 | #define PV_630p 0x0041 |
271 | #define PV_970MP 0x0044 | ||
271 | #define PV_BE 0x0070 | 272 | #define PV_BE 0x0070 |
272 | 273 | ||
273 | /* Platforms supported by PPC64 */ | 274 | /* Platforms supported by PPC64 */ |
@@ -382,8 +383,8 @@ extern long kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); | |||
382 | extern struct task_struct *last_task_used_math; | 383 | extern struct task_struct *last_task_used_math; |
383 | extern struct task_struct *last_task_used_altivec; | 384 | extern struct task_struct *last_task_used_altivec; |
384 | 385 | ||
385 | /* 64-bit user address space is 41-bits (2TBs user VM) */ | 386 | /* 64-bit user address space is 44-bits (16TB user VM) */ |
386 | #define TASK_SIZE_USER64 (0x0000020000000000UL) | 387 | #define TASK_SIZE_USER64 (0x0000100000000000UL) |
387 | 388 | ||
388 | /* | 389 | /* |
389 | * 32-bit user address space is 4GB - 1 page | 390 | * 32-bit user address space is 4GB - 1 page |
diff --git a/include/asm-ppc64/prom.h b/include/asm-ppc64/prom.h index 04b1a84f7ca3..dc5330b39509 100644 --- a/include/asm-ppc64/prom.h +++ b/include/asm-ppc64/prom.h | |||
@@ -22,13 +22,15 @@ | |||
22 | #define RELOC(x) (*PTRRELOC(&(x))) | 22 | #define RELOC(x) (*PTRRELOC(&(x))) |
23 | 23 | ||
24 | /* Definitions used by the flattened device tree */ | 24 | /* Definitions used by the flattened device tree */ |
25 | #define OF_DT_HEADER 0xd00dfeed /* 4: version, 4: total size */ | 25 | #define OF_DT_HEADER 0xd00dfeed /* marker */ |
26 | #define OF_DT_BEGIN_NODE 0x1 /* Start node: full name */ | 26 | #define OF_DT_BEGIN_NODE 0x1 /* Start of node, full name */ |
27 | #define OF_DT_END_NODE 0x2 /* End node */ | 27 | #define OF_DT_END_NODE 0x2 /* End node */ |
28 | #define OF_DT_PROP 0x3 /* Property: name off, size, content */ | 28 | #define OF_DT_PROP 0x3 /* Property: name off, size, |
29 | * content */ | ||
30 | #define OF_DT_NOP 0x4 /* nop */ | ||
29 | #define OF_DT_END 0x9 | 31 | #define OF_DT_END 0x9 |
30 | 32 | ||
31 | #define OF_DT_VERSION 1 | 33 | #define OF_DT_VERSION 0x10 |
32 | 34 | ||
33 | /* | 35 | /* |
34 | * This is what gets passed to the kernel by prom_init or kexec | 36 | * This is what gets passed to the kernel by prom_init or kexec |
@@ -54,7 +56,9 @@ struct boot_param_header | |||
54 | u32 version; /* format version */ | 56 | u32 version; /* format version */ |
55 | u32 last_comp_version; /* last compatible version */ | 57 | u32 last_comp_version; /* last compatible version */ |
56 | /* version 2 fields below */ | 58 | /* version 2 fields below */ |
57 | u32 boot_cpuid_phys; /* Which physical CPU id we're booting on */ | 59 | u32 boot_cpuid_phys; /* Physical CPU id we're booting on */ |
60 | /* version 3 fields below */ | ||
61 | u32 dt_strings_size; /* size of the DT strings block */ | ||
58 | }; | 62 | }; |
59 | 63 | ||
60 | 64 | ||
diff --git a/include/asm-ppc64/resource.h b/include/asm-ppc64/resource.h deleted file mode 100644 index add031b9dfd4..000000000000 --- a/include/asm-ppc64/resource.h +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | #ifndef _PPC64_RESOURCE_H | ||
2 | #define _PPC64_RESOURCE_H | ||
3 | |||
4 | #include <asm-generic/resource.h> | ||
5 | |||
6 | #endif /* _PPC64_RESOURCE_H */ | ||
diff --git a/include/asm-ppc64/shmparam.h b/include/asm-ppc64/shmparam.h deleted file mode 100644 index b2825ceff05e..000000000000 --- a/include/asm-ppc64/shmparam.h +++ /dev/null | |||
@@ -1,13 +0,0 @@ | |||
1 | #ifndef _PPC64_SHMPARAM_H | ||
2 | #define _PPC64_SHMPARAM_H | ||
3 | |||
4 | /* | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License | ||
7 | * as published by the Free Software Foundation; either version | ||
8 | * 2 of the License, or (at your option) any later version. | ||
9 | */ | ||
10 | |||
11 | #define SHMLBA PAGE_SIZE /* attach addr a multiple of this */ | ||
12 | |||
13 | #endif /* _PPC64_SHMPARAM_H */ | ||
diff --git a/include/asm-ppc64/socket.h b/include/asm-ppc64/socket.h index 59e00dfc8b8e..9e1af8eb2d96 100644 --- a/include/asm-ppc64/socket.h +++ b/include/asm-ppc64/socket.h | |||
@@ -21,6 +21,8 @@ | |||
21 | #define SO_BROADCAST 6 | 21 | #define SO_BROADCAST 6 |
22 | #define SO_SNDBUF 7 | 22 | #define SO_SNDBUF 7 |
23 | #define SO_RCVBUF 8 | 23 | #define SO_RCVBUF 8 |
24 | #define SO_SNDBUFFORCE 32 | ||
25 | #define SO_RCVBUFFORCE 33 | ||
24 | #define SO_KEEPALIVE 9 | 26 | #define SO_KEEPALIVE 9 |
25 | #define SO_OOBINLINE 10 | 27 | #define SO_OOBINLINE 10 |
26 | #define SO_NO_CHECK 11 | 28 | #define SO_NO_CHECK 11 |
diff --git a/include/asm-ppc64/string.h b/include/asm-ppc64/string.h deleted file mode 100644 index eeca68ef1e91..000000000000 --- a/include/asm-ppc64/string.h +++ /dev/null | |||
@@ -1,35 +0,0 @@ | |||
1 | #ifndef _PPC64_STRING_H_ | ||
2 | #define _PPC64_STRING_H_ | ||
3 | |||
4 | /* | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License | ||
7 | * as published by the Free Software Foundation; either version | ||
8 | * 2 of the License, or (at your option) any later version. | ||
9 | */ | ||
10 | |||
11 | #define __HAVE_ARCH_STRCPY | ||
12 | #define __HAVE_ARCH_STRNCPY | ||
13 | #define __HAVE_ARCH_STRLEN | ||
14 | #define __HAVE_ARCH_STRCMP | ||
15 | #define __HAVE_ARCH_STRCAT | ||
16 | #define __HAVE_ARCH_MEMSET | ||
17 | #define __HAVE_ARCH_MEMCPY | ||
18 | #define __HAVE_ARCH_MEMMOVE | ||
19 | #define __HAVE_ARCH_MEMCMP | ||
20 | #define __HAVE_ARCH_MEMCHR | ||
21 | |||
22 | extern int strcasecmp(const char *, const char *); | ||
23 | extern int strncasecmp(const char *, const char *, int); | ||
24 | extern char * strcpy(char *,const char *); | ||
25 | extern char * strncpy(char *,const char *, __kernel_size_t); | ||
26 | extern __kernel_size_t strlen(const char *); | ||
27 | extern int strcmp(const char *,const char *); | ||
28 | extern char * strcat(char *, const char *); | ||
29 | extern void * memset(void *,int,__kernel_size_t); | ||
30 | extern void * memcpy(void *,const void *,__kernel_size_t); | ||
31 | extern void * memmove(void *,const void *,__kernel_size_t); | ||
32 | extern int memcmp(const void *,const void *,__kernel_size_t); | ||
33 | extern void * memchr(const void *,int,__kernel_size_t); | ||
34 | |||
35 | #endif /* _PPC64_STRING_H_ */ | ||
diff --git a/include/asm-ppc64/system.h b/include/asm-ppc64/system.h index 98d120ca8a91..b9e1835351e9 100644 --- a/include/asm-ppc64/system.h +++ b/include/asm-ppc64/system.h | |||
@@ -88,7 +88,7 @@ DEBUGGER_BOILERPLATE(debugger_dabr_match) | |||
88 | DEBUGGER_BOILERPLATE(debugger_fault_handler) | 88 | DEBUGGER_BOILERPLATE(debugger_fault_handler) |
89 | 89 | ||
90 | #ifdef CONFIG_XMON | 90 | #ifdef CONFIG_XMON |
91 | extern void xmon_init(void); | 91 | extern void xmon_init(int enable); |
92 | #endif | 92 | #endif |
93 | 93 | ||
94 | #else | 94 | #else |
@@ -302,5 +302,7 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size) | |||
302 | 302 | ||
303 | #define arch_align_stack(x) (x) | 303 | #define arch_align_stack(x) (x) |
304 | 304 | ||
305 | extern unsigned long reloc_offset(void); | ||
306 | |||
305 | #endif /* __KERNEL__ */ | 307 | #endif /* __KERNEL__ */ |
306 | #endif | 308 | #endif |
diff --git a/include/asm-ppc64/unaligned.h b/include/asm-ppc64/unaligned.h deleted file mode 100644 index 636e93c4f379..000000000000 --- a/include/asm-ppc64/unaligned.h +++ /dev/null | |||
@@ -1,21 +0,0 @@ | |||
1 | #ifndef __PPC64_UNALIGNED_H | ||
2 | #define __PPC64_UNALIGNED_H | ||
3 | |||
4 | /* | ||
5 | * The PowerPC can do unaligned accesses itself in big endian mode. | ||
6 | * | ||
7 | * The strange macros are there to make sure these can't | ||
8 | * be misused in a way that makes them not work on other | ||
9 | * architectures where unaligned accesses aren't as simple. | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or | ||
12 | * modify it under the terms of the GNU General Public License | ||
13 | * as published by the Free Software Foundation; either version | ||
14 | * 2 of the License, or (at your option) any later version. | ||
15 | */ | ||
16 | |||
17 | #define get_unaligned(ptr) (*(ptr)) | ||
18 | |||
19 | #define put_unaligned(val, ptr) ((void)( *(ptr) = (val) )) | ||
20 | |||
21 | #endif /* __PPC64_UNALIGNED_H */ | ||
diff --git a/include/asm-ppc64/vio.h b/include/asm-ppc64/vio.h index 20cd98ee6337..03f1b95f433b 100644 --- a/include/asm-ppc64/vio.h +++ b/include/asm-ppc64/vio.h | |||
@@ -19,13 +19,15 @@ | |||
19 | #include <linux/errno.h> | 19 | #include <linux/errno.h> |
20 | #include <linux/device.h> | 20 | #include <linux/device.h> |
21 | #include <linux/dma-mapping.h> | 21 | #include <linux/dma-mapping.h> |
22 | #include <linux/mod_devicetable.h> | ||
23 | |||
22 | #include <asm/hvcall.h> | 24 | #include <asm/hvcall.h> |
23 | #include <asm/prom.h> | ||
24 | #include <asm/scatterlist.h> | 25 | #include <asm/scatterlist.h> |
25 | /* | 26 | |
27 | /* | ||
26 | * Architecture-specific constants for drivers to | 28 | * Architecture-specific constants for drivers to |
27 | * extract attributes of the device using vio_get_attribute() | 29 | * extract attributes of the device using vio_get_attribute() |
28 | */ | 30 | */ |
29 | #define VETH_MAC_ADDR "local-mac-address" | 31 | #define VETH_MAC_ADDR "local-mac-address" |
30 | #define VETH_MCAST_FILTER_SIZE "ibm,mac-address-filters" | 32 | #define VETH_MCAST_FILTER_SIZE "ibm,mac-address-filters" |
31 | 33 | ||
@@ -37,64 +39,65 @@ | |||
37 | #define VIO_IRQ_DISABLE 0UL | 39 | #define VIO_IRQ_DISABLE 0UL |
38 | #define VIO_IRQ_ENABLE 1UL | 40 | #define VIO_IRQ_ENABLE 1UL |
39 | 41 | ||
40 | struct vio_dev; | ||
41 | struct vio_driver; | ||
42 | struct vio_device_id; | ||
43 | struct iommu_table; | 42 | struct iommu_table; |
44 | 43 | ||
45 | int vio_register_driver(struct vio_driver *drv); | 44 | /* |
46 | void vio_unregister_driver(struct vio_driver *drv); | 45 | * The vio_dev structure is used to describe virtual I/O devices. |
47 | 46 | */ | |
48 | #ifdef CONFIG_PPC_PSERIES | 47 | struct vio_dev { |
49 | struct vio_dev * __devinit vio_register_device_node( | 48 | struct iommu_table *iommu_table; /* vio_map_* uses this */ |
50 | struct device_node *node_vdev); | 49 | char *name; |
51 | #endif | ||
52 | void __devinit vio_unregister_device(struct vio_dev *dev); | ||
53 | struct vio_dev *vio_find_node(struct device_node *vnode); | ||
54 | |||
55 | const void * vio_get_attribute(struct vio_dev *vdev, void* which, int* length); | ||
56 | int vio_get_irq(struct vio_dev *dev); | ||
57 | int vio_enable_interrupts(struct vio_dev *dev); | ||
58 | int vio_disable_interrupts(struct vio_dev *dev); | ||
59 | |||
60 | extern struct dma_mapping_ops vio_dma_ops; | ||
61 | |||
62 | extern struct bus_type vio_bus_type; | ||
63 | |||
64 | struct vio_device_id { | ||
65 | char *type; | 50 | char *type; |
66 | char *compat; | 51 | uint32_t unit_address; |
52 | unsigned int irq; | ||
53 | struct device dev; | ||
67 | }; | 54 | }; |
68 | 55 | ||
69 | struct vio_driver { | 56 | struct vio_driver { |
70 | struct list_head node; | 57 | struct list_head node; |
71 | char *name; | 58 | char *name; |
72 | const struct vio_device_id *id_table; /* NULL if wants all devices */ | 59 | const struct vio_device_id *id_table; |
73 | int (*probe) (struct vio_dev *dev, const struct vio_device_id *id); /* New device inserted */ | 60 | int (*probe)(struct vio_dev *dev, const struct vio_device_id *id); |
74 | int (*remove) (struct vio_dev *dev); /* Device removed (NULL if not a hot-plug capable driver) */ | 61 | int (*remove)(struct vio_dev *dev); |
75 | unsigned long driver_data; | 62 | unsigned long driver_data; |
76 | |||
77 | struct device_driver driver; | 63 | struct device_driver driver; |
78 | }; | 64 | }; |
79 | 65 | ||
66 | struct vio_bus_ops { | ||
67 | int (*match)(const struct vio_device_id *id, const struct vio_dev *dev); | ||
68 | void (*unregister_device)(struct vio_dev *); | ||
69 | void (*release_device)(struct device *); | ||
70 | }; | ||
71 | |||
72 | extern struct dma_mapping_ops vio_dma_ops; | ||
73 | extern struct bus_type vio_bus_type; | ||
74 | extern struct vio_dev vio_bus_device; | ||
75 | |||
76 | extern int vio_register_driver(struct vio_driver *drv); | ||
77 | extern void vio_unregister_driver(struct vio_driver *drv); | ||
78 | |||
79 | extern struct vio_dev * __devinit vio_register_device(struct vio_dev *viodev); | ||
80 | extern void __devinit vio_unregister_device(struct vio_dev *dev); | ||
81 | |||
82 | extern int vio_bus_init(struct vio_bus_ops *); | ||
83 | |||
84 | #ifdef CONFIG_PPC_PSERIES | ||
85 | struct device_node; | ||
86 | |||
87 | extern struct vio_dev * __devinit vio_register_device_node( | ||
88 | struct device_node *node_vdev); | ||
89 | extern struct vio_dev *vio_find_node(struct device_node *vnode); | ||
90 | extern const void *vio_get_attribute(struct vio_dev *vdev, void *which, | ||
91 | int *length); | ||
92 | extern int vio_enable_interrupts(struct vio_dev *dev); | ||
93 | extern int vio_disable_interrupts(struct vio_dev *dev); | ||
94 | #endif | ||
95 | |||
80 | static inline struct vio_driver *to_vio_driver(struct device_driver *drv) | 96 | static inline struct vio_driver *to_vio_driver(struct device_driver *drv) |
81 | { | 97 | { |
82 | return container_of(drv, struct vio_driver, driver); | 98 | return container_of(drv, struct vio_driver, driver); |
83 | } | 99 | } |
84 | 100 | ||
85 | /* | ||
86 | * The vio_dev structure is used to describe virtual I/O devices. | ||
87 | */ | ||
88 | struct vio_dev { | ||
89 | struct iommu_table *iommu_table; /* vio_map_* uses this */ | ||
90 | char *name; | ||
91 | char *type; | ||
92 | uint32_t unit_address; | ||
93 | unsigned int irq; | ||
94 | |||
95 | struct device dev; | ||
96 | }; | ||
97 | |||
98 | static inline struct vio_dev *to_vio_dev(struct device *dev) | 101 | static inline struct vio_dev *to_vio_dev(struct device *dev) |
99 | { | 102 | { |
100 | return container_of(dev, struct vio_dev, dev); | 103 | return container_of(dev, struct vio_dev, dev); |
diff --git a/include/asm-ppc64/xor.h b/include/asm-ppc64/xor.h deleted file mode 100644 index c82eb12a5b18..000000000000 --- a/include/asm-ppc64/xor.h +++ /dev/null | |||
@@ -1 +0,0 @@ | |||
1 | #include <asm-generic/xor.h> | ||
diff --git a/include/asm-s390/socket.h b/include/asm-s390/socket.h index 0e96eeca4e6b..15a5298c8744 100644 --- a/include/asm-s390/socket.h +++ b/include/asm-s390/socket.h | |||
@@ -22,6 +22,8 @@ | |||
22 | #define SO_BROADCAST 6 | 22 | #define SO_BROADCAST 6 |
23 | #define SO_SNDBUF 7 | 23 | #define SO_SNDBUF 7 |
24 | #define SO_RCVBUF 8 | 24 | #define SO_RCVBUF 8 |
25 | #define SO_SNDBUFFORCE 32 | ||
26 | #define SO_RCVBUFFORCE 33 | ||
25 | #define SO_KEEPALIVE 9 | 27 | #define SO_KEEPALIVE 9 |
26 | #define SO_OOBINLINE 10 | 28 | #define SO_OOBINLINE 10 |
27 | #define SO_NO_CHECK 11 | 29 | #define SO_NO_CHECK 11 |
diff --git a/include/asm-sh/socket.h b/include/asm-sh/socket.h index dde696c3b4c7..553904ff9336 100644 --- a/include/asm-sh/socket.h +++ b/include/asm-sh/socket.h | |||
@@ -14,6 +14,8 @@ | |||
14 | #define SO_BROADCAST 6 | 14 | #define SO_BROADCAST 6 |
15 | #define SO_SNDBUF 7 | 15 | #define SO_SNDBUF 7 |
16 | #define SO_RCVBUF 8 | 16 | #define SO_RCVBUF 8 |
17 | #define SO_RCVBUFFORCE 32 | ||
18 | #define SO_SNDBUFFORCE 33 | ||
17 | #define SO_KEEPALIVE 9 | 19 | #define SO_KEEPALIVE 9 |
18 | #define SO_OOBINLINE 10 | 20 | #define SO_OOBINLINE 10 |
19 | #define SO_NO_CHECK 11 | 21 | #define SO_NO_CHECK 11 |
diff --git a/include/asm-sparc/processor.h b/include/asm-sparc/processor.h index 32c9699367cf..5a7a1a8d29ac 100644 --- a/include/asm-sparc/processor.h +++ b/include/asm-sparc/processor.h | |||
@@ -19,7 +19,6 @@ | |||
19 | #include <asm/ptrace.h> | 19 | #include <asm/ptrace.h> |
20 | #include <asm/head.h> | 20 | #include <asm/head.h> |
21 | #include <asm/signal.h> | 21 | #include <asm/signal.h> |
22 | #include <asm/segment.h> | ||
23 | #include <asm/btfixup.h> | 22 | #include <asm/btfixup.h> |
24 | #include <asm/page.h> | 23 | #include <asm/page.h> |
25 | 24 | ||
diff --git a/include/asm-sparc/segment.h b/include/asm-sparc/segment.h deleted file mode 100644 index a1b7ffc9eec9..000000000000 --- a/include/asm-sparc/segment.h +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | #ifndef __SPARC_SEGMENT_H | ||
2 | #define __SPARC_SEGMENT_H | ||
3 | |||
4 | /* Only here because we have some old header files that expect it.. */ | ||
5 | |||
6 | #endif | ||
diff --git a/include/asm-sparc/socket.h b/include/asm-sparc/socket.h index c1154e3ecfdf..09575b608adb 100644 --- a/include/asm-sparc/socket.h +++ b/include/asm-sparc/socket.h | |||
@@ -29,6 +29,8 @@ | |||
29 | 29 | ||
30 | #define SO_SNDBUF 0x1001 | 30 | #define SO_SNDBUF 0x1001 |
31 | #define SO_RCVBUF 0x1002 | 31 | #define SO_RCVBUF 0x1002 |
32 | #define SO_SNDBUFFORCE 0x100a | ||
33 | #define SO_RCVBUFFORCE 0x100b | ||
32 | #define SO_ERROR 0x1007 | 34 | #define SO_ERROR 0x1007 |
33 | #define SO_TYPE 0x1008 | 35 | #define SO_TYPE 0x1008 |
34 | 36 | ||
diff --git a/include/asm-sparc/system.h b/include/asm-sparc/system.h index 898562ebe94c..3557781a4bfd 100644 --- a/include/asm-sparc/system.h +++ b/include/asm-sparc/system.h | |||
@@ -9,7 +9,6 @@ | |||
9 | #include <linux/threads.h> /* NR_CPUS */ | 9 | #include <linux/threads.h> /* NR_CPUS */ |
10 | #include <linux/thread_info.h> | 10 | #include <linux/thread_info.h> |
11 | 11 | ||
12 | #include <asm/segment.h> | ||
13 | #include <asm/page.h> | 12 | #include <asm/page.h> |
14 | #include <asm/psr.h> | 13 | #include <asm/psr.h> |
15 | #include <asm/ptrace.h> | 14 | #include <asm/ptrace.h> |
diff --git a/include/asm-sparc64/atomic.h b/include/asm-sparc64/atomic.h index d80f3379669b..e175afcf2cde 100644 --- a/include/asm-sparc64/atomic.h +++ b/include/asm-sparc64/atomic.h | |||
@@ -72,10 +72,10 @@ extern int atomic64_sub_ret(int, atomic64_t *); | |||
72 | 72 | ||
73 | /* Atomic operations are already serializing */ | 73 | /* Atomic operations are already serializing */ |
74 | #ifdef CONFIG_SMP | 74 | #ifdef CONFIG_SMP |
75 | #define smp_mb__before_atomic_dec() membar("#StoreLoad | #LoadLoad") | 75 | #define smp_mb__before_atomic_dec() membar_storeload_loadload(); |
76 | #define smp_mb__after_atomic_dec() membar("#StoreLoad | #StoreStore") | 76 | #define smp_mb__after_atomic_dec() membar_storeload_storestore(); |
77 | #define smp_mb__before_atomic_inc() membar("#StoreLoad | #LoadLoad") | 77 | #define smp_mb__before_atomic_inc() membar_storeload_loadload(); |
78 | #define smp_mb__after_atomic_inc() membar("#StoreLoad | #StoreStore") | 78 | #define smp_mb__after_atomic_inc() membar_storeload_storestore(); |
79 | #else | 79 | #else |
80 | #define smp_mb__before_atomic_dec() barrier() | 80 | #define smp_mb__before_atomic_dec() barrier() |
81 | #define smp_mb__after_atomic_dec() barrier() | 81 | #define smp_mb__after_atomic_dec() barrier() |
diff --git a/include/asm-sparc64/bitops.h b/include/asm-sparc64/bitops.h index 9c5e71970287..6388b8376c50 100644 --- a/include/asm-sparc64/bitops.h +++ b/include/asm-sparc64/bitops.h | |||
@@ -72,8 +72,8 @@ static inline int __test_and_change_bit(int nr, volatile unsigned long *addr) | |||
72 | } | 72 | } |
73 | 73 | ||
74 | #ifdef CONFIG_SMP | 74 | #ifdef CONFIG_SMP |
75 | #define smp_mb__before_clear_bit() membar("#StoreLoad | #LoadLoad") | 75 | #define smp_mb__before_clear_bit() membar_storeload_loadload() |
76 | #define smp_mb__after_clear_bit() membar("#StoreLoad | #StoreStore") | 76 | #define smp_mb__after_clear_bit() membar_storeload_storestore() |
77 | #else | 77 | #else |
78 | #define smp_mb__before_clear_bit() barrier() | 78 | #define smp_mb__before_clear_bit() barrier() |
79 | #define smp_mb__after_clear_bit() barrier() | 79 | #define smp_mb__after_clear_bit() barrier() |
diff --git a/include/asm-sparc64/processor.h b/include/asm-sparc64/processor.h index d0bee2413560..3169f3e2237e 100644 --- a/include/asm-sparc64/processor.h +++ b/include/asm-sparc64/processor.h | |||
@@ -18,7 +18,6 @@ | |||
18 | #include <asm/a.out.h> | 18 | #include <asm/a.out.h> |
19 | #include <asm/pstate.h> | 19 | #include <asm/pstate.h> |
20 | #include <asm/ptrace.h> | 20 | #include <asm/ptrace.h> |
21 | #include <asm/segment.h> | ||
22 | #include <asm/page.h> | 21 | #include <asm/page.h> |
23 | 22 | ||
24 | /* The sparc has no problems with write protection */ | 23 | /* The sparc has no problems with write protection */ |
diff --git a/include/asm-sparc64/segment.h b/include/asm-sparc64/segment.h deleted file mode 100644 index b03e709fc945..000000000000 --- a/include/asm-sparc64/segment.h +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | #ifndef __SPARC64_SEGMENT_H | ||
2 | #define __SPARC64_SEGMENT_H | ||
3 | |||
4 | /* Only here because we have some old header files that expect it.. */ | ||
5 | |||
6 | #endif | ||
diff --git a/include/asm-sparc64/sfafsr.h b/include/asm-sparc64/sfafsr.h new file mode 100644 index 000000000000..2f792c20b53c --- /dev/null +++ b/include/asm-sparc64/sfafsr.h | |||
@@ -0,0 +1,82 @@ | |||
1 | #ifndef _SPARC64_SFAFSR_H | ||
2 | #define _SPARC64_SFAFSR_H | ||
3 | |||
4 | #include <asm/const.h> | ||
5 | |||
6 | /* Spitfire Asynchronous Fault Status register, ASI=0x4C VA<63:0>=0x0 */ | ||
7 | |||
8 | #define SFAFSR_ME (_AC(1,UL) << SFAFSR_ME_SHIFT) | ||
9 | #define SFAFSR_ME_SHIFT 32 | ||
10 | #define SFAFSR_PRIV (_AC(1,UL) << SFAFSR_PRIV_SHIFT) | ||
11 | #define SFAFSR_PRIV_SHIFT 31 | ||
12 | #define SFAFSR_ISAP (_AC(1,UL) << SFAFSR_ISAP_SHIFT) | ||
13 | #define SFAFSR_ISAP_SHIFT 30 | ||
14 | #define SFAFSR_ETP (_AC(1,UL) << SFAFSR_ETP_SHIFT) | ||
15 | #define SFAFSR_ETP_SHIFT 29 | ||
16 | #define SFAFSR_IVUE (_AC(1,UL) << SFAFSR_IVUE_SHIFT) | ||
17 | #define SFAFSR_IVUE_SHIFT 28 | ||
18 | #define SFAFSR_TO (_AC(1,UL) << SFAFSR_TO_SHIFT) | ||
19 | #define SFAFSR_TO_SHIFT 27 | ||
20 | #define SFAFSR_BERR (_AC(1,UL) << SFAFSR_BERR_SHIFT) | ||
21 | #define SFAFSR_BERR_SHIFT 26 | ||
22 | #define SFAFSR_LDP (_AC(1,UL) << SFAFSR_LDP_SHIFT) | ||
23 | #define SFAFSR_LDP_SHIFT 25 | ||
24 | #define SFAFSR_CP (_AC(1,UL) << SFAFSR_CP_SHIFT) | ||
25 | #define SFAFSR_CP_SHIFT 24 | ||
26 | #define SFAFSR_WP (_AC(1,UL) << SFAFSR_WP_SHIFT) | ||
27 | #define SFAFSR_WP_SHIFT 23 | ||
28 | #define SFAFSR_EDP (_AC(1,UL) << SFAFSR_EDP_SHIFT) | ||
29 | #define SFAFSR_EDP_SHIFT 22 | ||
30 | #define SFAFSR_UE (_AC(1,UL) << SFAFSR_UE_SHIFT) | ||
31 | #define SFAFSR_UE_SHIFT 21 | ||
32 | #define SFAFSR_CE (_AC(1,UL) << SFAFSR_CE_SHIFT) | ||
33 | #define SFAFSR_CE_SHIFT 20 | ||
34 | #define SFAFSR_ETS (_AC(0xf,UL) << SFAFSR_ETS_SHIFT) | ||
35 | #define SFAFSR_ETS_SHIFT 16 | ||
36 | #define SFAFSR_PSYND (_AC(0xffff,UL) << SFAFSR_PSYND_SHIFT) | ||
37 | #define SFAFSR_PSYND_SHIFT 0 | ||
38 | |||
39 | /* UDB Error Register, ASI=0x7f VA<63:0>=0x0(High),0x18(Low) for read | ||
40 | * ASI=0x77 VA<63:0>=0x0(High),0x18(Low) for write | ||
41 | */ | ||
42 | |||
43 | #define UDBE_UE (_AC(1,UL) << 9) | ||
44 | #define UDBE_CE (_AC(1,UL) << 8) | ||
45 | #define UDBE_E_SYNDR (_AC(0xff,UL) << 0) | ||
46 | |||
47 | /* The trap handlers for asynchronous errors encode the AFSR and | ||
48 | * other pieces of information into a 64-bit argument for C code | ||
49 | * encoded as follows: | ||
50 | * | ||
51 | * ----------------------------------------------- | ||
52 | * | UDB_H | UDB_L | TL>1 | TT | AFSR | | ||
53 | * ----------------------------------------------- | ||
54 | * 63 54 53 44 42 41 33 32 0 | ||
55 | * | ||
56 | * The AFAR is passed in unchanged. | ||
57 | */ | ||
58 | #define SFSTAT_UDBH_MASK (_AC(0x3ff,UL) << SFSTAT_UDBH_SHIFT) | ||
59 | #define SFSTAT_UDBH_SHIFT 54 | ||
60 | #define SFSTAT_UDBL_MASK (_AC(0x3ff,UL) << SFSTAT_UDBH_SHIFT) | ||
61 | #define SFSTAT_UDBL_SHIFT 44 | ||
62 | #define SFSTAT_TL_GT_ONE (_AC(1,UL) << SFSTAT_TL_GT_ONE_SHIFT) | ||
63 | #define SFSTAT_TL_GT_ONE_SHIFT 42 | ||
64 | #define SFSTAT_TRAP_TYPE (_AC(0x1FF,UL) << SFSTAT_TRAP_TYPE_SHIFT) | ||
65 | #define SFSTAT_TRAP_TYPE_SHIFT 33 | ||
66 | #define SFSTAT_AFSR_MASK (_AC(0x1ffffffff,UL) << SFSTAT_AFSR_SHIFT) | ||
67 | #define SFSTAT_AFSR_SHIFT 0 | ||
68 | |||
69 | /* ESTATE Error Enable Register, ASI=0x4b VA<63:0>=0x0 */ | ||
70 | #define ESTATE_ERR_CE 0x1 /* Correctable errors */ | ||
71 | #define ESTATE_ERR_NCE 0x2 /* TO, BERR, LDP, ETP, EDP, WP, UE, IVUE */ | ||
72 | #define ESTATE_ERR_ISAP 0x4 /* System address parity error */ | ||
73 | #define ESTATE_ERR_ALL (ESTATE_ERR_CE | \ | ||
74 | ESTATE_ERR_NCE | \ | ||
75 | ESTATE_ERR_ISAP) | ||
76 | |||
77 | /* The various trap types that report using the above state. */ | ||
78 | #define TRAP_TYPE_IAE 0x09 /* Instruction Access Error */ | ||
79 | #define TRAP_TYPE_DAE 0x32 /* Data Access Error */ | ||
80 | #define TRAP_TYPE_CEE 0x63 /* Correctable ECC Error */ | ||
81 | |||
82 | #endif /* _SPARC64_SFAFSR_H */ | ||
diff --git a/include/asm-sparc64/socket.h b/include/asm-sparc64/socket.h index 865547a23908..59987dad3359 100644 --- a/include/asm-sparc64/socket.h +++ b/include/asm-sparc64/socket.h | |||
@@ -29,6 +29,8 @@ | |||
29 | 29 | ||
30 | #define SO_SNDBUF 0x1001 | 30 | #define SO_SNDBUF 0x1001 |
31 | #define SO_RCVBUF 0x1002 | 31 | #define SO_RCVBUF 0x1002 |
32 | #define SO_SNDBUFFORCE 0x100a | ||
33 | #define SO_RCVBUFFORCE 0x100b | ||
32 | #define SO_ERROR 0x1007 | 34 | #define SO_ERROR 0x1007 |
33 | #define SO_TYPE 0x1008 | 35 | #define SO_TYPE 0x1008 |
34 | 36 | ||
diff --git a/include/asm-sparc64/spinlock.h b/include/asm-sparc64/spinlock.h index 9cb93a5c2b4f..a02c4370eb42 100644 --- a/include/asm-sparc64/spinlock.h +++ b/include/asm-sparc64/spinlock.h | |||
@@ -43,7 +43,7 @@ typedef struct { | |||
43 | #define spin_is_locked(lp) ((lp)->lock != 0) | 43 | #define spin_is_locked(lp) ((lp)->lock != 0) |
44 | 44 | ||
45 | #define spin_unlock_wait(lp) \ | 45 | #define spin_unlock_wait(lp) \ |
46 | do { membar("#LoadLoad"); \ | 46 | do { rmb(); \ |
47 | } while((lp)->lock) | 47 | } while((lp)->lock) |
48 | 48 | ||
49 | static inline void _raw_spin_lock(spinlock_t *lock) | 49 | static inline void _raw_spin_lock(spinlock_t *lock) |
@@ -129,15 +129,18 @@ typedef struct { | |||
129 | #define spin_is_locked(__lock) ((__lock)->lock != 0) | 129 | #define spin_is_locked(__lock) ((__lock)->lock != 0) |
130 | #define spin_unlock_wait(__lock) \ | 130 | #define spin_unlock_wait(__lock) \ |
131 | do { \ | 131 | do { \ |
132 | membar("#LoadLoad"); \ | 132 | rmb(); \ |
133 | } while((__lock)->lock) | 133 | } while((__lock)->lock) |
134 | 134 | ||
135 | extern void _do_spin_lock (spinlock_t *lock, char *str); | 135 | extern void _do_spin_lock(spinlock_t *lock, char *str, unsigned long caller); |
136 | extern void _do_spin_unlock (spinlock_t *lock); | 136 | extern void _do_spin_unlock(spinlock_t *lock); |
137 | extern int _do_spin_trylock (spinlock_t *lock); | 137 | extern int _do_spin_trylock(spinlock_t *lock, unsigned long caller); |
138 | 138 | ||
139 | #define _raw_spin_trylock(lp) _do_spin_trylock(lp) | 139 | #define _raw_spin_trylock(lp) \ |
140 | #define _raw_spin_lock(lock) _do_spin_lock(lock, "spin_lock") | 140 | _do_spin_trylock(lp, (unsigned long) __builtin_return_address(0)) |
141 | #define _raw_spin_lock(lock) \ | ||
142 | _do_spin_lock(lock, "spin_lock", \ | ||
143 | (unsigned long) __builtin_return_address(0)) | ||
141 | #define _raw_spin_unlock(lock) _do_spin_unlock(lock) | 144 | #define _raw_spin_unlock(lock) _do_spin_unlock(lock) |
142 | #define _raw_spin_lock_flags(lock, flags) _raw_spin_lock(lock) | 145 | #define _raw_spin_lock_flags(lock, flags) _raw_spin_lock(lock) |
143 | 146 | ||
@@ -279,37 +282,41 @@ typedef struct { | |||
279 | #define RW_LOCK_UNLOCKED (rwlock_t) { 0, 0, 0xff, { } } | 282 | #define RW_LOCK_UNLOCKED (rwlock_t) { 0, 0, 0xff, { } } |
280 | #define rwlock_init(lp) do { *(lp) = RW_LOCK_UNLOCKED; } while(0) | 283 | #define rwlock_init(lp) do { *(lp) = RW_LOCK_UNLOCKED; } while(0) |
281 | 284 | ||
282 | extern void _do_read_lock(rwlock_t *rw, char *str); | 285 | extern void _do_read_lock(rwlock_t *rw, char *str, unsigned long caller); |
283 | extern void _do_read_unlock(rwlock_t *rw, char *str); | 286 | extern void _do_read_unlock(rwlock_t *rw, char *str, unsigned long caller); |
284 | extern void _do_write_lock(rwlock_t *rw, char *str); | 287 | extern void _do_write_lock(rwlock_t *rw, char *str, unsigned long caller); |
285 | extern void _do_write_unlock(rwlock_t *rw); | 288 | extern void _do_write_unlock(rwlock_t *rw, unsigned long caller); |
286 | extern int _do_write_trylock(rwlock_t *rw, char *str); | 289 | extern int _do_write_trylock(rwlock_t *rw, char *str, unsigned long caller); |
287 | 290 | ||
288 | #define _raw_read_lock(lock) \ | 291 | #define _raw_read_lock(lock) \ |
289 | do { unsigned long flags; \ | 292 | do { unsigned long flags; \ |
290 | local_irq_save(flags); \ | 293 | local_irq_save(flags); \ |
291 | _do_read_lock(lock, "read_lock"); \ | 294 | _do_read_lock(lock, "read_lock", \ |
295 | (unsigned long) __builtin_return_address(0)); \ | ||
292 | local_irq_restore(flags); \ | 296 | local_irq_restore(flags); \ |
293 | } while(0) | 297 | } while(0) |
294 | 298 | ||
295 | #define _raw_read_unlock(lock) \ | 299 | #define _raw_read_unlock(lock) \ |
296 | do { unsigned long flags; \ | 300 | do { unsigned long flags; \ |
297 | local_irq_save(flags); \ | 301 | local_irq_save(flags); \ |
298 | _do_read_unlock(lock, "read_unlock"); \ | 302 | _do_read_unlock(lock, "read_unlock", \ |
303 | (unsigned long) __builtin_return_address(0)); \ | ||
299 | local_irq_restore(flags); \ | 304 | local_irq_restore(flags); \ |
300 | } while(0) | 305 | } while(0) |
301 | 306 | ||
302 | #define _raw_write_lock(lock) \ | 307 | #define _raw_write_lock(lock) \ |
303 | do { unsigned long flags; \ | 308 | do { unsigned long flags; \ |
304 | local_irq_save(flags); \ | 309 | local_irq_save(flags); \ |
305 | _do_write_lock(lock, "write_lock"); \ | 310 | _do_write_lock(lock, "write_lock", \ |
311 | (unsigned long) __builtin_return_address(0)); \ | ||
306 | local_irq_restore(flags); \ | 312 | local_irq_restore(flags); \ |
307 | } while(0) | 313 | } while(0) |
308 | 314 | ||
309 | #define _raw_write_unlock(lock) \ | 315 | #define _raw_write_unlock(lock) \ |
310 | do { unsigned long flags; \ | 316 | do { unsigned long flags; \ |
311 | local_irq_save(flags); \ | 317 | local_irq_save(flags); \ |
312 | _do_write_unlock(lock); \ | 318 | _do_write_unlock(lock, \ |
319 | (unsigned long) __builtin_return_address(0)); \ | ||
313 | local_irq_restore(flags); \ | 320 | local_irq_restore(flags); \ |
314 | } while(0) | 321 | } while(0) |
315 | 322 | ||
@@ -317,7 +324,8 @@ do { unsigned long flags; \ | |||
317 | ({ unsigned long flags; \ | 324 | ({ unsigned long flags; \ |
318 | int val; \ | 325 | int val; \ |
319 | local_irq_save(flags); \ | 326 | local_irq_save(flags); \ |
320 | val = _do_write_trylock(lock, "write_trylock"); \ | 327 | val = _do_write_trylock(lock, "write_trylock", \ |
328 | (unsigned long) __builtin_return_address(0)); \ | ||
321 | local_irq_restore(flags); \ | 329 | local_irq_restore(flags); \ |
322 | val; \ | 330 | val; \ |
323 | }) | 331 | }) |
diff --git a/include/asm-sparc64/system.h b/include/asm-sparc64/system.h index ee4bdfc6b88f..5e94c05dc2fc 100644 --- a/include/asm-sparc64/system.h +++ b/include/asm-sparc64/system.h | |||
@@ -28,6 +28,14 @@ enum sparc_cpu { | |||
28 | #define ARCH_SUN4C_SUN4 0 | 28 | #define ARCH_SUN4C_SUN4 0 |
29 | #define ARCH_SUN4 0 | 29 | #define ARCH_SUN4 0 |
30 | 30 | ||
31 | extern void mb(void); | ||
32 | extern void rmb(void); | ||
33 | extern void wmb(void); | ||
34 | extern void membar_storeload(void); | ||
35 | extern void membar_storeload_storestore(void); | ||
36 | extern void membar_storeload_loadload(void); | ||
37 | extern void membar_storestore_loadstore(void); | ||
38 | |||
31 | #endif | 39 | #endif |
32 | 40 | ||
33 | #define setipl(__new_ipl) \ | 41 | #define setipl(__new_ipl) \ |
@@ -78,16 +86,11 @@ enum sparc_cpu { | |||
78 | 86 | ||
79 | #define nop() __asm__ __volatile__ ("nop") | 87 | #define nop() __asm__ __volatile__ ("nop") |
80 | 88 | ||
81 | #define membar(type) __asm__ __volatile__ ("membar " type : : : "memory") | ||
82 | #define mb() \ | ||
83 | membar("#LoadLoad | #LoadStore | #StoreStore | #StoreLoad") | ||
84 | #define rmb() membar("#LoadLoad") | ||
85 | #define wmb() membar("#StoreStore") | ||
86 | #define read_barrier_depends() do { } while(0) | 89 | #define read_barrier_depends() do { } while(0) |
87 | #define set_mb(__var, __value) \ | 90 | #define set_mb(__var, __value) \ |
88 | do { __var = __value; membar("#StoreLoad | #StoreStore"); } while(0) | 91 | do { __var = __value; membar_storeload_storestore(); } while(0) |
89 | #define set_wmb(__var, __value) \ | 92 | #define set_wmb(__var, __value) \ |
90 | do { __var = __value; membar("#StoreStore"); } while(0) | 93 | do { __var = __value; wmb(); } while(0) |
91 | 94 | ||
92 | #ifdef CONFIG_SMP | 95 | #ifdef CONFIG_SMP |
93 | #define smp_mb() mb() | 96 | #define smp_mb() mb() |
diff --git a/include/asm-v850/socket.h b/include/asm-v850/socket.h index 213b852af53e..0240d366a0a4 100644 --- a/include/asm-v850/socket.h +++ b/include/asm-v850/socket.h | |||
@@ -14,6 +14,8 @@ | |||
14 | #define SO_BROADCAST 6 | 14 | #define SO_BROADCAST 6 |
15 | #define SO_SNDBUF 7 | 15 | #define SO_SNDBUF 7 |
16 | #define SO_RCVBUF 8 | 16 | #define SO_RCVBUF 8 |
17 | #define SO_SNDBUFFORCE 32 | ||
18 | #define SO_RCVBUFFORCE 33 | ||
17 | #define SO_KEEPALIVE 9 | 19 | #define SO_KEEPALIVE 9 |
18 | #define SO_OOBINLINE 10 | 20 | #define SO_OOBINLINE 10 |
19 | #define SO_NO_CHECK 11 | 21 | #define SO_NO_CHECK 11 |
diff --git a/include/asm-x86_64/checksum.h b/include/asm-x86_64/checksum.h index d01356f01448..989469e8e0b7 100644 --- a/include/asm-x86_64/checksum.h +++ b/include/asm-x86_64/checksum.h | |||
@@ -64,7 +64,7 @@ static inline unsigned short ip_fast_csum(unsigned char *iph, unsigned int ihl) | |||
64 | " adcl $0, %0\n" | 64 | " adcl $0, %0\n" |
65 | " notl %0\n" | 65 | " notl %0\n" |
66 | "2:" | 66 | "2:" |
67 | /* Since the input registers which are loaded with iph and ipl | 67 | /* Since the input registers which are loaded with iph and ihl |
68 | are modified, we must also specify them as outputs, or gcc | 68 | are modified, we must also specify them as outputs, or gcc |
69 | will assume they contain their original values. */ | 69 | will assume they contain their original values. */ |
70 | : "=r" (sum), "=r" (iph), "=r" (ihl) | 70 | : "=r" (sum), "=r" (iph), "=r" (ihl) |
diff --git a/include/asm-x86_64/socket.h b/include/asm-x86_64/socket.h index d9a252ea8210..f2cdbeae5d5b 100644 --- a/include/asm-x86_64/socket.h +++ b/include/asm-x86_64/socket.h | |||
@@ -14,6 +14,8 @@ | |||
14 | #define SO_BROADCAST 6 | 14 | #define SO_BROADCAST 6 |
15 | #define SO_SNDBUF 7 | 15 | #define SO_SNDBUF 7 |
16 | #define SO_RCVBUF 8 | 16 | #define SO_RCVBUF 8 |
17 | #define SO_SNDBUFFORCE 32 | ||
18 | #define SO_RCVBUFFORCE 33 | ||
17 | #define SO_KEEPALIVE 9 | 19 | #define SO_KEEPALIVE 9 |
18 | #define SO_OOBINLINE 10 | 20 | #define SO_OOBINLINE 10 |
19 | #define SO_NO_CHECK 11 | 21 | #define SO_NO_CHECK 11 |
diff --git a/include/asm-xtensa/socket.h b/include/asm-xtensa/socket.h index daccd05a14cd..00f83f3a6d72 100644 --- a/include/asm-xtensa/socket.h +++ b/include/asm-xtensa/socket.h | |||
@@ -24,6 +24,8 @@ | |||
24 | #define SO_BROADCAST 6 | 24 | #define SO_BROADCAST 6 |
25 | #define SO_SNDBUF 7 | 25 | #define SO_SNDBUF 7 |
26 | #define SO_RCVBUF 8 | 26 | #define SO_RCVBUF 8 |
27 | #define SO_SNDBUFFORCE 32 | ||
28 | #define SO_RCVBUFFORCE 33 | ||
27 | #define SO_KEEPALIVE 9 | 29 | #define SO_KEEPALIVE 9 |
28 | #define SO_OOBINLINE 10 | 30 | #define SO_OOBINLINE 10 |
29 | #define SO_NO_CHECK 11 | 31 | #define SO_NO_CHECK 11 |
diff --git a/include/linux/ata.h b/include/linux/ata.h index 9d25e9886d60..a5b74efab067 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h | |||
@@ -1,24 +1,29 @@ | |||
1 | 1 | ||
2 | /* | 2 | /* |
3 | Copyright 2003-2004 Red Hat, Inc. All rights reserved. | 3 | * Copyright 2003-2004 Red Hat, Inc. All rights reserved. |
4 | Copyright 2003-2004 Jeff Garzik | 4 | * Copyright 2003-2004 Jeff Garzik |
5 | 5 | * | |
6 | The contents of this file are subject to the Open | 6 | * |
7 | Software License version 1.1 that can be found at | 7 | * This program is free software; you can redistribute it and/or modify |
8 | http://www.opensource.org/licenses/osl-1.1.txt and is included herein | 8 | * it under the terms of the GNU General Public License as published by |
9 | by reference. | 9 | * the Free Software Foundation; either version 2, or (at your option) |
10 | 10 | * any later version. | |
11 | Alternatively, the contents of this file may be used under the terms | 11 | * |
12 | of the GNU General Public License version 2 (the "GPL") as distributed | 12 | * This program is distributed in the hope that it will be useful, |
13 | in the kernel source COPYING file, in which case the provisions of | 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | the GPL are applicable instead of the above. If you wish to allow | 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 | the use of your version of this file only under the terms of the | 15 | * GNU General Public License for more details. |
16 | GPL and not to allow others to use your version of this file under | 16 | * |
17 | the OSL, indicate your decision by deleting the provisions above and | 17 | * You should have received a copy of the GNU General Public License |
18 | replace them with the notice and other provisions required by the GPL. | 18 | * along with this program; see the file COPYING. If not, write to |
19 | If you do not delete the provisions above, a recipient may use your | 19 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. |
20 | version of this file under either the OSL or the GPL. | 20 | * |
21 | 21 | * | |
22 | * libata documentation is available via 'make {ps|pdf}docs', | ||
23 | * as Documentation/DocBook/libata.* | ||
24 | * | ||
25 | * Hardware documentation available from http://www.t13.org/ | ||
26 | * | ||
22 | */ | 27 | */ |
23 | 28 | ||
24 | #ifndef __LINUX_ATA_H__ | 29 | #ifndef __LINUX_ATA_H__ |
diff --git a/include/linux/dccp.h b/include/linux/dccp.h new file mode 100644 index 000000000000..007c290f74d4 --- /dev/null +++ b/include/linux/dccp.h | |||
@@ -0,0 +1,456 @@ | |||
1 | #ifndef _LINUX_DCCP_H | ||
2 | #define _LINUX_DCCP_H | ||
3 | |||
4 | #include <linux/types.h> | ||
5 | #include <asm/byteorder.h> | ||
6 | |||
7 | /* Structure describing an Internet (DCCP) socket address. */ | ||
8 | struct sockaddr_dccp { | ||
9 | __u16 sdccp_family; /* Address family */ | ||
10 | __u16 sdccp_port; /* Port number */ | ||
11 | __u32 sdccp_addr; /* Internet address */ | ||
12 | __u32 sdccp_service; /* Service */ | ||
13 | /* Pad to size of `struct sockaddr': 16 bytes . */ | ||
14 | __u32 sdccp_pad; | ||
15 | }; | ||
16 | |||
17 | /** | ||
18 | * struct dccp_hdr - generic part of DCCP packet header | ||
19 | * | ||
20 | * @dccph_sport - Relevant port on the endpoint that sent this packet | ||
21 | * @dccph_dport - Relevant port on the other endpoint | ||
22 | * @dccph_doff - Data Offset from the start of the DCCP header, in 32-bit words | ||
23 | * @dccph_ccval - Used by the HC-Sender CCID | ||
24 | * @dccph_cscov - Parts of the packet that are covered by the Checksum field | ||
25 | * @dccph_checksum - Internet checksum, depends on dccph_cscov | ||
26 | * @dccph_x - 0 = 24 bit sequence number, 1 = 48 | ||
27 | * @dccph_type - packet type, see DCCP_PKT_ prefixed macros | ||
28 | * @dccph_seq - sequence number high or low order 24 bits, depends on dccph_x | ||
29 | */ | ||
30 | struct dccp_hdr { | ||
31 | __u16 dccph_sport, | ||
32 | dccph_dport; | ||
33 | __u8 dccph_doff; | ||
34 | #if defined(__LITTLE_ENDIAN_BITFIELD) | ||
35 | __u8 dccph_cscov:4, | ||
36 | dccph_ccval:4; | ||
37 | #elif defined(__BIG_ENDIAN_BITFIELD) | ||
38 | __u8 dccph_ccval:4, | ||
39 | dccph_cscov:4; | ||
40 | #else | ||
41 | #error "Adjust your <asm/byteorder.h> defines" | ||
42 | #endif | ||
43 | __u16 dccph_checksum; | ||
44 | #if defined(__LITTLE_ENDIAN_BITFIELD) | ||
45 | __u32 dccph_x:1, | ||
46 | dccph_type:4, | ||
47 | dccph_reserved:3, | ||
48 | dccph_seq:24; | ||
49 | #elif defined(__BIG_ENDIAN_BITFIELD) | ||
50 | __u32 dccph_reserved:3, | ||
51 | dccph_type:4, | ||
52 | dccph_x:1, | ||
53 | dccph_seq:24; | ||
54 | #else | ||
55 | #error "Adjust your <asm/byteorder.h> defines" | ||
56 | #endif | ||
57 | }; | ||
58 | |||
59 | /** | ||
60 | * struct dccp_hdr_ext - the low bits of a 48 bit seq packet | ||
61 | * | ||
62 | * @dccph_seq_low - low 24 bits of a 48 bit seq packet | ||
63 | */ | ||
64 | struct dccp_hdr_ext { | ||
65 | __u32 dccph_seq_low; | ||
66 | }; | ||
67 | |||
68 | /** | ||
69 | * struct dccp_hdr_request - Conection initiation request header | ||
70 | * | ||
71 | * @dccph_req_service - Service to which the client app wants to connect | ||
72 | * @dccph_req_options - list of options (must be a multiple of 32 bits | ||
73 | */ | ||
74 | struct dccp_hdr_request { | ||
75 | __u32 dccph_req_service; | ||
76 | }; | ||
77 | /** | ||
78 | * struct dccp_hdr_ack_bits - acknowledgment bits common to most packets | ||
79 | * | ||
80 | * @dccph_resp_ack_nr_high - 48 bit ack number high order bits, contains GSR | ||
81 | * @dccph_resp_ack_nr_low - 48 bit ack number low order bits, contains GSR | ||
82 | */ | ||
83 | struct dccp_hdr_ack_bits { | ||
84 | __u32 dccph_reserved1:8, | ||
85 | dccph_ack_nr_high:24; | ||
86 | __u32 dccph_ack_nr_low; | ||
87 | }; | ||
88 | /** | ||
89 | * struct dccp_hdr_response - Conection initiation response header | ||
90 | * | ||
91 | * @dccph_resp_ack_nr_high - 48 bit ack number high order bits, contains GSR | ||
92 | * @dccph_resp_ack_nr_low - 48 bit ack number low order bits, contains GSR | ||
93 | * @dccph_resp_service - Echoes the Service Code on a received DCCP-Request | ||
94 | * @dccph_resp_options - list of options (must be a multiple of 32 bits | ||
95 | */ | ||
96 | struct dccp_hdr_response { | ||
97 | struct dccp_hdr_ack_bits dccph_resp_ack; | ||
98 | __u32 dccph_resp_service; | ||
99 | }; | ||
100 | |||
101 | /** | ||
102 | * struct dccp_hdr_reset - Unconditionally shut down a connection | ||
103 | * | ||
104 | * @dccph_reset_service - Echoes the Service Code on a received DCCP-Request | ||
105 | * @dccph_reset_options - list of options (must be a multiple of 32 bits | ||
106 | */ | ||
107 | struct dccp_hdr_reset { | ||
108 | struct dccp_hdr_ack_bits dccph_reset_ack; | ||
109 | __u8 dccph_reset_code, | ||
110 | dccph_reset_data[3]; | ||
111 | }; | ||
112 | |||
113 | enum dccp_pkt_type { | ||
114 | DCCP_PKT_REQUEST = 0, | ||
115 | DCCP_PKT_RESPONSE, | ||
116 | DCCP_PKT_DATA, | ||
117 | DCCP_PKT_ACK, | ||
118 | DCCP_PKT_DATAACK, | ||
119 | DCCP_PKT_CLOSEREQ, | ||
120 | DCCP_PKT_CLOSE, | ||
121 | DCCP_PKT_RESET, | ||
122 | DCCP_PKT_SYNC, | ||
123 | DCCP_PKT_SYNCACK, | ||
124 | DCCP_PKT_INVALID, | ||
125 | }; | ||
126 | |||
127 | #define DCCP_NR_PKT_TYPES DCCP_PKT_INVALID | ||
128 | |||
129 | static inline unsigned int dccp_packet_hdr_len(const __u8 type) | ||
130 | { | ||
131 | if (type == DCCP_PKT_DATA) | ||
132 | return 0; | ||
133 | if (type == DCCP_PKT_DATAACK || | ||
134 | type == DCCP_PKT_ACK || | ||
135 | type == DCCP_PKT_SYNC || | ||
136 | type == DCCP_PKT_SYNCACK || | ||
137 | type == DCCP_PKT_CLOSE || | ||
138 | type == DCCP_PKT_CLOSEREQ) | ||
139 | return sizeof(struct dccp_hdr_ack_bits); | ||
140 | if (type == DCCP_PKT_REQUEST) | ||
141 | return sizeof(struct dccp_hdr_request); | ||
142 | if (type == DCCP_PKT_RESPONSE) | ||
143 | return sizeof(struct dccp_hdr_response); | ||
144 | return sizeof(struct dccp_hdr_reset); | ||
145 | } | ||
146 | enum dccp_reset_codes { | ||
147 | DCCP_RESET_CODE_UNSPECIFIED = 0, | ||
148 | DCCP_RESET_CODE_CLOSED, | ||
149 | DCCP_RESET_CODE_ABORTED, | ||
150 | DCCP_RESET_CODE_NO_CONNECTION, | ||
151 | DCCP_RESET_CODE_PACKET_ERROR, | ||
152 | DCCP_RESET_CODE_OPTION_ERROR, | ||
153 | DCCP_RESET_CODE_MANDATORY_ERROR, | ||
154 | DCCP_RESET_CODE_CONNECTION_REFUSED, | ||
155 | DCCP_RESET_CODE_BAD_SERVICE_CODE, | ||
156 | DCCP_RESET_CODE_TOO_BUSY, | ||
157 | DCCP_RESET_CODE_BAD_INIT_COOKIE, | ||
158 | DCCP_RESET_CODE_AGGRESSION_PENALTY, | ||
159 | }; | ||
160 | |||
161 | /* DCCP options */ | ||
162 | enum { | ||
163 | DCCPO_PADDING = 0, | ||
164 | DCCPO_MANDATORY = 1, | ||
165 | DCCPO_MIN_RESERVED = 3, | ||
166 | DCCPO_MAX_RESERVED = 31, | ||
167 | DCCPO_NDP_COUNT = 37, | ||
168 | DCCPO_ACK_VECTOR_0 = 38, | ||
169 | DCCPO_ACK_VECTOR_1 = 39, | ||
170 | DCCPO_TIMESTAMP = 41, | ||
171 | DCCPO_TIMESTAMP_ECHO = 42, | ||
172 | DCCPO_ELAPSED_TIME = 43, | ||
173 | DCCPO_MAX = 45, | ||
174 | DCCPO_MIN_CCID_SPECIFIC = 128, | ||
175 | DCCPO_MAX_CCID_SPECIFIC = 255, | ||
176 | }; | ||
177 | |||
178 | /* DCCP features */ | ||
179 | enum { | ||
180 | DCCPF_RESERVED = 0, | ||
181 | DCCPF_SEQUENCE_WINDOW = 3, | ||
182 | DCCPF_SEND_ACK_VECTOR = 6, | ||
183 | DCCPF_SEND_NDP_COUNT = 7, | ||
184 | /* 10-127 reserved */ | ||
185 | DCCPF_MIN_CCID_SPECIFIC = 128, | ||
186 | DCCPF_MAX_CCID_SPECIFIC = 255, | ||
187 | }; | ||
188 | |||
189 | /* DCCP socket options */ | ||
190 | #define DCCP_SOCKOPT_PACKET_SIZE 1 | ||
191 | |||
192 | #ifdef __KERNEL__ | ||
193 | |||
194 | #include <linux/in.h> | ||
195 | #include <linux/list.h> | ||
196 | #include <linux/uio.h> | ||
197 | #include <linux/workqueue.h> | ||
198 | |||
199 | #include <net/inet_connection_sock.h> | ||
200 | #include <net/inet_timewait_sock.h> | ||
201 | #include <net/sock.h> | ||
202 | #include <net/tcp_states.h> | ||
203 | #include <net/tcp.h> | ||
204 | |||
205 | enum dccp_state { | ||
206 | DCCP_OPEN = TCP_ESTABLISHED, | ||
207 | DCCP_REQUESTING = TCP_SYN_SENT, | ||
208 | DCCP_PARTOPEN = TCP_FIN_WAIT1, /* FIXME: | ||
209 | This mapping is horrible, but TCP has | ||
210 | no matching state for DCCP_PARTOPEN, | ||
211 | as TCP_SYN_RECV is already used by | ||
212 | DCCP_RESPOND, why don't stop using TCP | ||
213 | mapping of states? OK, now we don't use | ||
214 | sk_stream_sendmsg anymore, so doesn't | ||
215 | seem to exist any reason for us to | ||
216 | do the TCP mapping here */ | ||
217 | DCCP_LISTEN = TCP_LISTEN, | ||
218 | DCCP_RESPOND = TCP_SYN_RECV, | ||
219 | DCCP_CLOSING = TCP_CLOSING, | ||
220 | DCCP_TIME_WAIT = TCP_TIME_WAIT, | ||
221 | DCCP_CLOSED = TCP_CLOSE, | ||
222 | DCCP_MAX_STATES = TCP_MAX_STATES, | ||
223 | }; | ||
224 | |||
225 | #define DCCP_STATE_MASK 0xf | ||
226 | #define DCCP_ACTION_FIN (1<<7) | ||
227 | |||
228 | enum { | ||
229 | DCCPF_OPEN = TCPF_ESTABLISHED, | ||
230 | DCCPF_REQUESTING = TCPF_SYN_SENT, | ||
231 | DCCPF_PARTOPEN = TCPF_FIN_WAIT1, | ||
232 | DCCPF_LISTEN = TCPF_LISTEN, | ||
233 | DCCPF_RESPOND = TCPF_SYN_RECV, | ||
234 | DCCPF_CLOSING = TCPF_CLOSING, | ||
235 | DCCPF_TIME_WAIT = TCPF_TIME_WAIT, | ||
236 | DCCPF_CLOSED = TCPF_CLOSE, | ||
237 | }; | ||
238 | |||
239 | static inline struct dccp_hdr *dccp_hdr(const struct sk_buff *skb) | ||
240 | { | ||
241 | return (struct dccp_hdr *)skb->h.raw; | ||
242 | } | ||
243 | |||
244 | static inline struct dccp_hdr_ext *dccp_hdrx(const struct sk_buff *skb) | ||
245 | { | ||
246 | return (struct dccp_hdr_ext *)(skb->h.raw + sizeof(struct dccp_hdr)); | ||
247 | } | ||
248 | |||
249 | static inline unsigned int __dccp_basic_hdr_len(const struct dccp_hdr *dh) | ||
250 | { | ||
251 | return sizeof(*dh) + (dh->dccph_x ? sizeof(struct dccp_hdr_ext) : 0); | ||
252 | } | ||
253 | |||
254 | static inline unsigned int dccp_basic_hdr_len(const struct sk_buff *skb) | ||
255 | { | ||
256 | const struct dccp_hdr *dh = dccp_hdr(skb); | ||
257 | return __dccp_basic_hdr_len(dh); | ||
258 | } | ||
259 | |||
260 | static inline __u64 dccp_hdr_seq(const struct sk_buff *skb) | ||
261 | { | ||
262 | const struct dccp_hdr *dh = dccp_hdr(skb); | ||
263 | #if defined(__LITTLE_ENDIAN_BITFIELD) | ||
264 | __u64 seq_nr = ntohl(dh->dccph_seq << 8); | ||
265 | #elif defined(__BIG_ENDIAN_BITFIELD) | ||
266 | __u64 seq_nr = ntohl(dh->dccph_seq); | ||
267 | #else | ||
268 | #error "Adjust your <asm/byteorder.h> defines" | ||
269 | #endif | ||
270 | |||
271 | if (dh->dccph_x != 0) | ||
272 | seq_nr = (seq_nr << 32) + ntohl(dccp_hdrx(skb)->dccph_seq_low); | ||
273 | |||
274 | return seq_nr; | ||
275 | } | ||
276 | |||
277 | static inline struct dccp_hdr_request *dccp_hdr_request(struct sk_buff *skb) | ||
278 | { | ||
279 | return (struct dccp_hdr_request *)(skb->h.raw + dccp_basic_hdr_len(skb)); | ||
280 | } | ||
281 | |||
282 | static inline struct dccp_hdr_ack_bits *dccp_hdr_ack_bits(const struct sk_buff *skb) | ||
283 | { | ||
284 | return (struct dccp_hdr_ack_bits *)(skb->h.raw + dccp_basic_hdr_len(skb)); | ||
285 | } | ||
286 | |||
287 | static inline u64 dccp_hdr_ack_seq(const struct sk_buff *skb) | ||
288 | { | ||
289 | const struct dccp_hdr_ack_bits *dhack = dccp_hdr_ack_bits(skb); | ||
290 | #if defined(__LITTLE_ENDIAN_BITFIELD) | ||
291 | return (((u64)ntohl(dhack->dccph_ack_nr_high << 8)) << 32) + ntohl(dhack->dccph_ack_nr_low); | ||
292 | #elif defined(__BIG_ENDIAN_BITFIELD) | ||
293 | return (((u64)ntohl(dhack->dccph_ack_nr_high)) << 32) + ntohl(dhack->dccph_ack_nr_low); | ||
294 | #else | ||
295 | #error "Adjust your <asm/byteorder.h> defines" | ||
296 | #endif | ||
297 | } | ||
298 | |||
299 | static inline struct dccp_hdr_response *dccp_hdr_response(struct sk_buff *skb) | ||
300 | { | ||
301 | return (struct dccp_hdr_response *)(skb->h.raw + dccp_basic_hdr_len(skb)); | ||
302 | } | ||
303 | |||
304 | static inline struct dccp_hdr_reset *dccp_hdr_reset(struct sk_buff *skb) | ||
305 | { | ||
306 | return (struct dccp_hdr_reset *)(skb->h.raw + dccp_basic_hdr_len(skb)); | ||
307 | } | ||
308 | |||
309 | static inline unsigned int __dccp_hdr_len(const struct dccp_hdr *dh) | ||
310 | { | ||
311 | return __dccp_basic_hdr_len(dh) + | ||
312 | dccp_packet_hdr_len(dh->dccph_type); | ||
313 | } | ||
314 | |||
315 | static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) | ||
316 | { | ||
317 | return __dccp_hdr_len(dccp_hdr(skb)); | ||
318 | } | ||
319 | |||
320 | |||
321 | /* initial values for each feature */ | ||
322 | #define DCCPF_INITIAL_SEQUENCE_WINDOW 100 | ||
323 | /* FIXME: for now we're using CCID 3 (TFRC) */ | ||
324 | #define DCCPF_INITIAL_CCID 3 | ||
325 | #define DCCPF_INITIAL_SEND_ACK_VECTOR 0 | ||
326 | /* FIXME: for now we're default to 1 but it should really be 0 */ | ||
327 | #define DCCPF_INITIAL_SEND_NDP_COUNT 1 | ||
328 | |||
329 | #define DCCP_NDP_LIMIT 0xFFFFFF | ||
330 | |||
331 | /** | ||
332 | * struct dccp_options - option values for a DCCP connection | ||
333 | * @dccpo_sequence_window - Sequence Window Feature (section 7.5.2) | ||
334 | * @dccpo_ccid - Congestion Control Id (CCID) (section 10) | ||
335 | * @dccpo_send_ack_vector - Send Ack Vector Feature (section 11.5) | ||
336 | * @dccpo_send_ndp_count - Send NDP Count Feature (7.7.2) | ||
337 | */ | ||
338 | struct dccp_options { | ||
339 | __u64 dccpo_sequence_window; | ||
340 | __u8 dccpo_ccid; | ||
341 | __u8 dccpo_send_ack_vector; | ||
342 | __u8 dccpo_send_ndp_count; | ||
343 | }; | ||
344 | |||
345 | extern void __dccp_options_init(struct dccp_options *dccpo); | ||
346 | extern void dccp_options_init(struct dccp_options *dccpo); | ||
347 | extern int dccp_parse_options(struct sock *sk, struct sk_buff *skb); | ||
348 | |||
349 | struct dccp_request_sock { | ||
350 | struct inet_request_sock dreq_inet_rsk; | ||
351 | __u64 dreq_iss; | ||
352 | __u64 dreq_isr; | ||
353 | __u32 dreq_service; | ||
354 | }; | ||
355 | |||
356 | static inline struct dccp_request_sock *dccp_rsk(const struct request_sock *req) | ||
357 | { | ||
358 | return (struct dccp_request_sock *)req; | ||
359 | } | ||
360 | |||
361 | extern struct inet_timewait_death_row dccp_death_row; | ||
362 | |||
363 | /* Read about the ECN nonce to see why it is 253 */ | ||
364 | #define DCCP_MAX_ACK_VECTOR_LEN 253 | ||
365 | |||
366 | struct dccp_options_received { | ||
367 | u32 dccpor_ndp:24, | ||
368 | dccpor_ack_vector_len:8; | ||
369 | u32 dccpor_ack_vector_idx:10; | ||
370 | /* 22 bits hole, try to pack */ | ||
371 | u32 dccpor_timestamp; | ||
372 | u32 dccpor_timestamp_echo; | ||
373 | u32 dccpor_elapsed_time; | ||
374 | }; | ||
375 | |||
376 | struct ccid; | ||
377 | |||
378 | enum dccp_role { | ||
379 | DCCP_ROLE_UNDEFINED, | ||
380 | DCCP_ROLE_LISTEN, | ||
381 | DCCP_ROLE_CLIENT, | ||
382 | DCCP_ROLE_SERVER, | ||
383 | }; | ||
384 | |||
385 | /** | ||
386 | * struct dccp_sock - DCCP socket state | ||
387 | * | ||
388 | * @dccps_swl - sequence number window low | ||
389 | * @dccps_swh - sequence number window high | ||
390 | * @dccps_awl - acknowledgement number window low | ||
391 | * @dccps_awh - acknowledgement number window high | ||
392 | * @dccps_iss - initial sequence number sent | ||
393 | * @dccps_isr - initial sequence number received | ||
394 | * @dccps_osr - first OPEN sequence number received | ||
395 | * @dccps_gss - greatest sequence number sent | ||
396 | * @dccps_gsr - greatest valid sequence number received | ||
397 | * @dccps_gar - greatest valid ack number received on a non-Sync; initialized to %dccps_iss | ||
398 | * @dccps_timestamp_time - time of latest TIMESTAMP option | ||
399 | * @dccps_timestamp_echo - latest timestamp received on a TIMESTAMP option | ||
400 | * @dccps_ext_header_len - network protocol overhead (IP/IPv6 options) | ||
401 | * @dccps_pmtu_cookie - Last pmtu seen by socket | ||
402 | * @dccps_packet_size - Set thru setsockopt | ||
403 | * @dccps_role - Role of this sock, one of %dccp_role | ||
404 | * @dccps_ndp_count - number of Non Data Packets since last data packet | ||
405 | * @dccps_hc_rx_ackpkts - receiver half connection acked packets | ||
406 | */ | ||
407 | struct dccp_sock { | ||
408 | /* inet_connection_sock has to be the first member of dccp_sock */ | ||
409 | struct inet_connection_sock dccps_inet_connection; | ||
410 | __u64 dccps_swl; | ||
411 | __u64 dccps_swh; | ||
412 | __u64 dccps_awl; | ||
413 | __u64 dccps_awh; | ||
414 | __u64 dccps_iss; | ||
415 | __u64 dccps_isr; | ||
416 | __u64 dccps_osr; | ||
417 | __u64 dccps_gss; | ||
418 | __u64 dccps_gsr; | ||
419 | __u64 dccps_gar; | ||
420 | unsigned long dccps_service; | ||
421 | struct timeval dccps_timestamp_time; | ||
422 | __u32 dccps_timestamp_echo; | ||
423 | __u32 dccps_packet_size; | ||
424 | unsigned long dccps_ndp_count; | ||
425 | __u16 dccps_ext_header_len; | ||
426 | __u32 dccps_pmtu_cookie; | ||
427 | __u32 dccps_mss_cache; | ||
428 | struct dccp_options dccps_options; | ||
429 | struct dccp_ackpkts *dccps_hc_rx_ackpkts; | ||
430 | void *dccps_hc_rx_ccid_private; | ||
431 | void *dccps_hc_tx_ccid_private; | ||
432 | struct ccid *dccps_hc_rx_ccid; | ||
433 | struct ccid *dccps_hc_tx_ccid; | ||
434 | struct dccp_options_received dccps_options_received; | ||
435 | enum dccp_role dccps_role:2; | ||
436 | }; | ||
437 | |||
438 | static inline struct dccp_sock *dccp_sk(const struct sock *sk) | ||
439 | { | ||
440 | return (struct dccp_sock *)sk; | ||
441 | } | ||
442 | |||
443 | static inline const char *dccp_role(const struct sock *sk) | ||
444 | { | ||
445 | switch (dccp_sk(sk)->dccps_role) { | ||
446 | case DCCP_ROLE_UNDEFINED: return "undefined"; | ||
447 | case DCCP_ROLE_LISTEN: return "listen"; | ||
448 | case DCCP_ROLE_SERVER: return "server"; | ||
449 | case DCCP_ROLE_CLIENT: return "client"; | ||
450 | } | ||
451 | return NULL; | ||
452 | } | ||
453 | |||
454 | #endif /* __KERNEL__ */ | ||
455 | |||
456 | #endif /* _LINUX_DCCP_H */ | ||
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index d7021c391b2b..ed1440ea4c91 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h | |||
@@ -250,6 +250,12 @@ struct ethtool_stats { | |||
250 | u64 data[0]; | 250 | u64 data[0]; |
251 | }; | 251 | }; |
252 | 252 | ||
253 | struct ethtool_perm_addr { | ||
254 | u32 cmd; /* ETHTOOL_GPERMADDR */ | ||
255 | u32 size; | ||
256 | u8 data[0]; | ||
257 | }; | ||
258 | |||
253 | struct net_device; | 259 | struct net_device; |
254 | 260 | ||
255 | /* Some generic methods drivers may use in their ethtool_ops */ | 261 | /* Some generic methods drivers may use in their ethtool_ops */ |
@@ -261,6 +267,8 @@ u32 ethtool_op_get_sg(struct net_device *dev); | |||
261 | int ethtool_op_set_sg(struct net_device *dev, u32 data); | 267 | int ethtool_op_set_sg(struct net_device *dev, u32 data); |
262 | u32 ethtool_op_get_tso(struct net_device *dev); | 268 | u32 ethtool_op_get_tso(struct net_device *dev); |
263 | int ethtool_op_set_tso(struct net_device *dev, u32 data); | 269 | int ethtool_op_set_tso(struct net_device *dev, u32 data); |
270 | int ethtool_op_get_perm_addr(struct net_device *dev, | ||
271 | struct ethtool_perm_addr *addr, u8 *data); | ||
264 | 272 | ||
265 | /** | 273 | /** |
266 | * ðtool_ops - Alter and report network device settings | 274 | * ðtool_ops - Alter and report network device settings |
@@ -294,7 +302,8 @@ int ethtool_op_set_tso(struct net_device *dev, u32 data); | |||
294 | * get_strings: Return a set of strings that describe the requested objects | 302 | * get_strings: Return a set of strings that describe the requested objects |
295 | * phys_id: Identify the device | 303 | * phys_id: Identify the device |
296 | * get_stats: Return statistics about the device | 304 | * get_stats: Return statistics about the device |
297 | * | 305 | * get_perm_addr: Gets the permanent hardware address |
306 | * | ||
298 | * Description: | 307 | * Description: |
299 | * | 308 | * |
300 | * get_settings: | 309 | * get_settings: |
@@ -352,6 +361,7 @@ struct ethtool_ops { | |||
352 | int (*phys_id)(struct net_device *, u32); | 361 | int (*phys_id)(struct net_device *, u32); |
353 | int (*get_stats_count)(struct net_device *); | 362 | int (*get_stats_count)(struct net_device *); |
354 | void (*get_ethtool_stats)(struct net_device *, struct ethtool_stats *, u64 *); | 363 | void (*get_ethtool_stats)(struct net_device *, struct ethtool_stats *, u64 *); |
364 | int (*get_perm_addr)(struct net_device *, struct ethtool_perm_addr *, u8 *); | ||
355 | int (*begin)(struct net_device *); | 365 | int (*begin)(struct net_device *); |
356 | void (*complete)(struct net_device *); | 366 | void (*complete)(struct net_device *); |
357 | }; | 367 | }; |
@@ -389,6 +399,7 @@ struct ethtool_ops { | |||
389 | #define ETHTOOL_GSTATS 0x0000001d /* get NIC-specific statistics */ | 399 | #define ETHTOOL_GSTATS 0x0000001d /* get NIC-specific statistics */ |
390 | #define ETHTOOL_GTSO 0x0000001e /* Get TSO enable (ethtool_value) */ | 400 | #define ETHTOOL_GTSO 0x0000001e /* Get TSO enable (ethtool_value) */ |
391 | #define ETHTOOL_STSO 0x0000001f /* Set TSO enable (ethtool_value) */ | 401 | #define ETHTOOL_STSO 0x0000001f /* Set TSO enable (ethtool_value) */ |
402 | #define ETHTOOL_GPERMADDR 0x00000020 /* Get permanent hardware address */ | ||
392 | 403 | ||
393 | /* compatibility with older code */ | 404 | /* compatibility with older code */ |
394 | #define SPARC_ETH_GSET ETHTOOL_GSET | 405 | #define SPARC_ETH_GSET ETHTOOL_GSET |
diff --git a/include/linux/hippidevice.h b/include/linux/hippidevice.h index 9debe6bbe5f0..bab303dafd6e 100644 --- a/include/linux/hippidevice.h +++ b/include/linux/hippidevice.h | |||
@@ -26,8 +26,12 @@ | |||
26 | #include <linux/if_hippi.h> | 26 | #include <linux/if_hippi.h> |
27 | 27 | ||
28 | #ifdef __KERNEL__ | 28 | #ifdef __KERNEL__ |
29 | extern unsigned short hippi_type_trans(struct sk_buff *skb, | 29 | |
30 | struct net_device *dev); | 30 | struct hippi_cb { |
31 | __u32 ifield; | ||
32 | }; | ||
33 | |||
34 | extern __be16 hippi_type_trans(struct sk_buff *skb, struct net_device *dev); | ||
31 | 35 | ||
32 | extern struct net_device *alloc_hippi_dev(int sizeof_priv); | 36 | extern struct net_device *alloc_hippi_dev(int sizeof_priv); |
33 | #endif | 37 | #endif |
diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h index b5b58e9c054c..fc2d4c8225aa 100644 --- a/include/linux/if_ether.h +++ b/include/linux/if_ether.h | |||
@@ -110,6 +110,8 @@ static inline struct ethhdr *eth_hdr(const struct sk_buff *skb) | |||
110 | { | 110 | { |
111 | return (struct ethhdr *)skb->mac.raw; | 111 | return (struct ethhdr *)skb->mac.raw; |
112 | } | 112 | } |
113 | |||
114 | extern struct ctl_table ether_table[]; | ||
113 | #endif | 115 | #endif |
114 | 116 | ||
115 | #endif /* _LINUX_IF_ETHER_H */ | 117 | #endif /* _LINUX_IF_ETHER_H */ |
diff --git a/include/linux/if_fc.h b/include/linux/if_fc.h index 33330b458b95..376a34ea4723 100644 --- a/include/linux/if_fc.h +++ b/include/linux/if_fc.h | |||
@@ -44,7 +44,7 @@ struct fcllc { | |||
44 | __u8 ssap; /* source SAP */ | 44 | __u8 ssap; /* source SAP */ |
45 | __u8 llc; /* LLC control field */ | 45 | __u8 llc; /* LLC control field */ |
46 | __u8 protid[3]; /* protocol id */ | 46 | __u8 protid[3]; /* protocol id */ |
47 | __u16 ethertype; /* ether type field */ | 47 | __be16 ethertype; /* ether type field */ |
48 | }; | 48 | }; |
49 | 49 | ||
50 | #endif /* _LINUX_IF_FC_H */ | 50 | #endif /* _LINUX_IF_FC_H */ |
diff --git a/include/linux/if_fddi.h b/include/linux/if_fddi.h index a912818e6361..1288a161bc0b 100644 --- a/include/linux/if_fddi.h +++ b/include/linux/if_fddi.h | |||
@@ -85,7 +85,7 @@ struct fddi_snap_hdr | |||
85 | __u8 ssap; /* always 0xAA */ | 85 | __u8 ssap; /* always 0xAA */ |
86 | __u8 ctrl; /* always 0x03 */ | 86 | __u8 ctrl; /* always 0x03 */ |
87 | __u8 oui[FDDI_K_OUI_LEN]; /* organizational universal id */ | 87 | __u8 oui[FDDI_K_OUI_LEN]; /* organizational universal id */ |
88 | __u16 ethertype; /* packet type ID field */ | 88 | __be16 ethertype; /* packet type ID field */ |
89 | } __attribute__ ((packed)); | 89 | } __attribute__ ((packed)); |
90 | 90 | ||
91 | /* Define FDDI LLC frame header */ | 91 | /* Define FDDI LLC frame header */ |
diff --git a/include/linux/if_frad.h b/include/linux/if_frad.h index 3c94b1736570..511999c7eeda 100644 --- a/include/linux/if_frad.h +++ b/include/linux/if_frad.h | |||
@@ -191,10 +191,12 @@ struct frad_local | |||
191 | int buffer; /* current buffer for S508 firmware */ | 191 | int buffer; /* current buffer for S508 firmware */ |
192 | }; | 192 | }; |
193 | 193 | ||
194 | extern void dlci_ioctl_set(int (*hook)(unsigned int, void __user *)); | ||
195 | |||
196 | #endif /* __KERNEL__ */ | 194 | #endif /* __KERNEL__ */ |
197 | 195 | ||
198 | #endif /* CONFIG_DLCI || CONFIG_DLCI_MODULE */ | 196 | #endif /* CONFIG_DLCI || CONFIG_DLCI_MODULE */ |
199 | 197 | ||
198 | #ifdef __KERNEL__ | ||
199 | extern void dlci_ioctl_set(int (*hook)(unsigned int, void __user *)); | ||
200 | #endif | ||
201 | |||
200 | #endif | 202 | #endif |
diff --git a/include/linux/if_hippi.h b/include/linux/if_hippi.h index c8ca72c46f76..94d31ca7d71a 100644 --- a/include/linux/if_hippi.h +++ b/include/linux/if_hippi.h | |||
@@ -102,9 +102,9 @@ struct hippi_fp_hdr | |||
102 | #error "Please fix <asm/byteorder.h>" | 102 | #error "Please fix <asm/byteorder.h>" |
103 | #endif | 103 | #endif |
104 | #else | 104 | #else |
105 | __u32 fixed; | 105 | __be32 fixed; |
106 | #endif | 106 | #endif |
107 | __u32 d2_size; | 107 | __be32 d2_size; |
108 | } __attribute__ ((packed)); | 108 | } __attribute__ ((packed)); |
109 | 109 | ||
110 | struct hippi_le_hdr | 110 | struct hippi_le_hdr |
@@ -144,7 +144,7 @@ struct hippi_snap_hdr | |||
144 | __u8 ssap; /* always 0xAA */ | 144 | __u8 ssap; /* always 0xAA */ |
145 | __u8 ctrl; /* always 0x03 */ | 145 | __u8 ctrl; /* always 0x03 */ |
146 | __u8 oui[HIPPI_OUI_LEN]; /* organizational universal id (zero)*/ | 146 | __u8 oui[HIPPI_OUI_LEN]; /* organizational universal id (zero)*/ |
147 | __u16 ethertype; /* packet type ID field */ | 147 | __be16 ethertype; /* packet type ID field */ |
148 | } __attribute__ ((packed)); | 148 | } __attribute__ ((packed)); |
149 | 149 | ||
150 | struct hippi_hdr | 150 | struct hippi_hdr |
diff --git a/include/linux/if_tr.h b/include/linux/if_tr.h index 3fba9e2f5427..5502f597cf0e 100644 --- a/include/linux/if_tr.h +++ b/include/linux/if_tr.h | |||
@@ -43,12 +43,16 @@ struct trh_hdr { | |||
43 | }; | 43 | }; |
44 | 44 | ||
45 | #ifdef __KERNEL__ | 45 | #ifdef __KERNEL__ |
46 | #include <linux/config.h> | ||
46 | #include <linux/skbuff.h> | 47 | #include <linux/skbuff.h> |
47 | 48 | ||
48 | static inline struct trh_hdr *tr_hdr(const struct sk_buff *skb) | 49 | static inline struct trh_hdr *tr_hdr(const struct sk_buff *skb) |
49 | { | 50 | { |
50 | return (struct trh_hdr *)skb->mac.raw; | 51 | return (struct trh_hdr *)skb->mac.raw; |
51 | } | 52 | } |
53 | #ifdef CONFIG_SYSCTL | ||
54 | extern struct ctl_table tr_table[]; | ||
55 | #endif | ||
52 | #endif | 56 | #endif |
53 | 57 | ||
54 | /* This is an Token-Ring LLC structure */ | 58 | /* This is an Token-Ring LLC structure */ |
diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 62a9d89dfbe2..17d0c0d40b0e 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h | |||
@@ -155,7 +155,6 @@ static inline int __vlan_hwaccel_rx(struct sk_buff *skb, | |||
155 | { | 155 | { |
156 | struct net_device_stats *stats; | 156 | struct net_device_stats *stats; |
157 | 157 | ||
158 | skb->real_dev = skb->dev; | ||
159 | skb->dev = grp->vlan_devices[vlan_tag & VLAN_VID_MASK]; | 158 | skb->dev = grp->vlan_devices[vlan_tag & VLAN_VID_MASK]; |
160 | if (skb->dev == NULL) { | 159 | if (skb->dev == NULL) { |
161 | dev_kfree_skb_any(skb); | 160 | dev_kfree_skb_any(skb); |
diff --git a/include/linux/igmp.h b/include/linux/igmp.h index 0c31ef0b5bad..28f4f3b36950 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h | |||
@@ -129,6 +129,9 @@ struct igmpv3_query { | |||
129 | #include <linux/skbuff.h> | 129 | #include <linux/skbuff.h> |
130 | #include <linux/in.h> | 130 | #include <linux/in.h> |
131 | 131 | ||
132 | extern int sysctl_igmp_max_memberships; | ||
133 | extern int sysctl_igmp_max_msf; | ||
134 | |||
132 | struct ip_sf_socklist | 135 | struct ip_sf_socklist |
133 | { | 136 | { |
134 | unsigned int sl_max; | 137 | unsigned int sl_max; |
diff --git a/include/linux/in.h b/include/linux/in.h index fb88c66d748d..ba355384016a 100644 --- a/include/linux/in.h +++ b/include/linux/in.h | |||
@@ -32,6 +32,7 @@ enum { | |||
32 | IPPROTO_PUP = 12, /* PUP protocol */ | 32 | IPPROTO_PUP = 12, /* PUP protocol */ |
33 | IPPROTO_UDP = 17, /* User Datagram Protocol */ | 33 | IPPROTO_UDP = 17, /* User Datagram Protocol */ |
34 | IPPROTO_IDP = 22, /* XNS IDP protocol */ | 34 | IPPROTO_IDP = 22, /* XNS IDP protocol */ |
35 | IPPROTO_DCCP = 33, /* Datagram Congestion Control Protocol */ | ||
35 | IPPROTO_RSVP = 46, /* RSVP protocol */ | 36 | IPPROTO_RSVP = 46, /* RSVP protocol */ |
36 | IPPROTO_GRE = 47, /* Cisco GRE tunnels (rfc 1701,1702) */ | 37 | IPPROTO_GRE = 47, /* Cisco GRE tunnels (rfc 1701,1702) */ |
37 | 38 | ||
diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h new file mode 100644 index 000000000000..a4606e5810e5 --- /dev/null +++ b/include/linux/inet_diag.h | |||
@@ -0,0 +1,138 @@ | |||
1 | #ifndef _INET_DIAG_H_ | ||
2 | #define _INET_DIAG_H_ 1 | ||
3 | |||
4 | /* Just some random number */ | ||
5 | #define TCPDIAG_GETSOCK 18 | ||
6 | #define DCCPDIAG_GETSOCK 19 | ||
7 | |||
8 | #define INET_DIAG_GETSOCK_MAX 24 | ||
9 | |||
10 | /* Socket identity */ | ||
11 | struct inet_diag_sockid { | ||
12 | __u16 idiag_sport; | ||
13 | __u16 idiag_dport; | ||
14 | __u32 idiag_src[4]; | ||
15 | __u32 idiag_dst[4]; | ||
16 | __u32 idiag_if; | ||
17 | __u32 idiag_cookie[2]; | ||
18 | #define INET_DIAG_NOCOOKIE (~0U) | ||
19 | }; | ||
20 | |||
21 | /* Request structure */ | ||
22 | |||
23 | struct inet_diag_req { | ||
24 | __u8 idiag_family; /* Family of addresses. */ | ||
25 | __u8 idiag_src_len; | ||
26 | __u8 idiag_dst_len; | ||
27 | __u8 idiag_ext; /* Query extended information */ | ||
28 | |||
29 | struct inet_diag_sockid id; | ||
30 | |||
31 | __u32 idiag_states; /* States to dump */ | ||
32 | __u32 idiag_dbs; /* Tables to dump (NI) */ | ||
33 | }; | ||
34 | |||
35 | enum { | ||
36 | INET_DIAG_REQ_NONE, | ||
37 | INET_DIAG_REQ_BYTECODE, | ||
38 | }; | ||
39 | |||
40 | #define INET_DIAG_REQ_MAX INET_DIAG_REQ_BYTECODE | ||
41 | |||
42 | /* Bytecode is sequence of 4 byte commands followed by variable arguments. | ||
43 | * All the commands identified by "code" are conditional jumps forward: | ||
44 | * to offset cc+"yes" or to offset cc+"no". "yes" is supposed to be | ||
45 | * length of the command and its arguments. | ||
46 | */ | ||
47 | |||
48 | struct inet_diag_bc_op { | ||
49 | unsigned char code; | ||
50 | unsigned char yes; | ||
51 | unsigned short no; | ||
52 | }; | ||
53 | |||
54 | enum { | ||
55 | INET_DIAG_BC_NOP, | ||
56 | INET_DIAG_BC_JMP, | ||
57 | INET_DIAG_BC_S_GE, | ||
58 | INET_DIAG_BC_S_LE, | ||
59 | INET_DIAG_BC_D_GE, | ||
60 | INET_DIAG_BC_D_LE, | ||
61 | INET_DIAG_BC_AUTO, | ||
62 | INET_DIAG_BC_S_COND, | ||
63 | INET_DIAG_BC_D_COND, | ||
64 | }; | ||
65 | |||
66 | struct inet_diag_hostcond { | ||
67 | __u8 family; | ||
68 | __u8 prefix_len; | ||
69 | int port; | ||
70 | __u32 addr[0]; | ||
71 | }; | ||
72 | |||
73 | /* Base info structure. It contains socket identity (addrs/ports/cookie) | ||
74 | * and, alas, the information shown by netstat. */ | ||
75 | struct inet_diag_msg { | ||
76 | __u8 idiag_family; | ||
77 | __u8 idiag_state; | ||
78 | __u8 idiag_timer; | ||
79 | __u8 idiag_retrans; | ||
80 | |||
81 | struct inet_diag_sockid id; | ||
82 | |||
83 | __u32 idiag_expires; | ||
84 | __u32 idiag_rqueue; | ||
85 | __u32 idiag_wqueue; | ||
86 | __u32 idiag_uid; | ||
87 | __u32 idiag_inode; | ||
88 | }; | ||
89 | |||
90 | /* Extensions */ | ||
91 | |||
92 | enum { | ||
93 | INET_DIAG_NONE, | ||
94 | INET_DIAG_MEMINFO, | ||
95 | INET_DIAG_INFO, | ||
96 | INET_DIAG_VEGASINFO, | ||
97 | INET_DIAG_CONG, | ||
98 | }; | ||
99 | |||
100 | #define INET_DIAG_MAX INET_DIAG_CONG | ||
101 | |||
102 | |||
103 | /* INET_DIAG_MEM */ | ||
104 | |||
105 | struct inet_diag_meminfo { | ||
106 | __u32 idiag_rmem; | ||
107 | __u32 idiag_wmem; | ||
108 | __u32 idiag_fmem; | ||
109 | __u32 idiag_tmem; | ||
110 | }; | ||
111 | |||
112 | /* INET_DIAG_VEGASINFO */ | ||
113 | |||
114 | struct tcpvegas_info { | ||
115 | __u32 tcpv_enabled; | ||
116 | __u32 tcpv_rttcnt; | ||
117 | __u32 tcpv_rtt; | ||
118 | __u32 tcpv_minrtt; | ||
119 | }; | ||
120 | |||
121 | #ifdef __KERNEL__ | ||
122 | struct sock; | ||
123 | struct inet_hashinfo; | ||
124 | |||
125 | struct inet_diag_handler { | ||
126 | struct inet_hashinfo *idiag_hashinfo; | ||
127 | void (*idiag_get_info)(struct sock *sk, | ||
128 | struct inet_diag_msg *r, | ||
129 | void *info); | ||
130 | __u16 idiag_info_size; | ||
131 | __u16 idiag_type; | ||
132 | }; | ||
133 | |||
134 | extern int inet_diag_register(const struct inet_diag_handler *handler); | ||
135 | extern void inet_diag_unregister(const struct inet_diag_handler *handler); | ||
136 | #endif /* __KERNEL__ */ | ||
137 | |||
138 | #endif /* _INET_DIAG_H_ */ | ||
diff --git a/include/linux/ip.h b/include/linux/ip.h index 31e7cedd9f84..33e8a19a1a0f 100644 --- a/include/linux/ip.h +++ b/include/linux/ip.h | |||
@@ -196,6 +196,8 @@ static inline void inet_sk_copy_descendant(struct sock *sk_to, | |||
196 | #endif | 196 | #endif |
197 | #endif | 197 | #endif |
198 | 198 | ||
199 | extern int inet_sk_rebuild_header(struct sock *sk); | ||
200 | |||
199 | struct iphdr { | 201 | struct iphdr { |
200 | #if defined(__LITTLE_ENDIAN_BITFIELD) | 202 | #if defined(__LITTLE_ENDIAN_BITFIELD) |
201 | __u8 ihl:4, | 203 | __u8 ihl:4, |
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 6fcd6a0ade24..3c7dbc6a0a70 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h | |||
@@ -193,6 +193,11 @@ struct inet6_skb_parm { | |||
193 | 193 | ||
194 | #define IP6CB(skb) ((struct inet6_skb_parm*)((skb)->cb)) | 194 | #define IP6CB(skb) ((struct inet6_skb_parm*)((skb)->cb)) |
195 | 195 | ||
196 | static inline int inet6_iif(const struct sk_buff *skb) | ||
197 | { | ||
198 | return IP6CB(skb)->iif; | ||
199 | } | ||
200 | |||
196 | struct tcp6_request_sock { | 201 | struct tcp6_request_sock { |
197 | struct tcp_request_sock req; | 202 | struct tcp_request_sock req; |
198 | struct in6_addr loc_addr; | 203 | struct in6_addr loc_addr; |
@@ -308,6 +313,36 @@ static inline void inet_sk_copy_descendant(struct sock *sk_to, | |||
308 | 313 | ||
309 | #define __ipv6_only_sock(sk) (inet6_sk(sk)->ipv6only) | 314 | #define __ipv6_only_sock(sk) (inet6_sk(sk)->ipv6only) |
310 | #define ipv6_only_sock(sk) ((sk)->sk_family == PF_INET6 && __ipv6_only_sock(sk)) | 315 | #define ipv6_only_sock(sk) ((sk)->sk_family == PF_INET6 && __ipv6_only_sock(sk)) |
316 | |||
317 | #include <linux/tcp.h> | ||
318 | |||
319 | struct tcp6_timewait_sock { | ||
320 | struct tcp_timewait_sock tw_v6_sk; | ||
321 | struct in6_addr tw_v6_daddr; | ||
322 | struct in6_addr tw_v6_rcv_saddr; | ||
323 | }; | ||
324 | |||
325 | static inline struct tcp6_timewait_sock *tcp6_twsk(const struct sock *sk) | ||
326 | { | ||
327 | return (struct tcp6_timewait_sock *)sk; | ||
328 | } | ||
329 | |||
330 | static inline struct in6_addr *__tcp_v6_rcv_saddr(const struct sock *sk) | ||
331 | { | ||
332 | return likely(sk->sk_state != TCP_TIME_WAIT) ? | ||
333 | &inet6_sk(sk)->rcv_saddr : &tcp6_twsk(sk)->tw_v6_rcv_saddr; | ||
334 | } | ||
335 | |||
336 | static inline struct in6_addr *tcp_v6_rcv_saddr(const struct sock *sk) | ||
337 | { | ||
338 | return sk->sk_family == AF_INET6 ? __tcp_v6_rcv_saddr(sk) : NULL; | ||
339 | } | ||
340 | |||
341 | static inline int inet_v6_ipv6only(const struct sock *sk) | ||
342 | { | ||
343 | return likely(sk->sk_state != TCP_TIME_WAIT) ? | ||
344 | ipv6_only_sock(sk) : inet_twsk(sk)->tw_ipv6only; | ||
345 | } | ||
311 | #else | 346 | #else |
312 | #define __ipv6_only_sock(sk) 0 | 347 | #define __ipv6_only_sock(sk) 0 |
313 | #define ipv6_only_sock(sk) 0 | 348 | #define ipv6_only_sock(sk) 0 |
@@ -322,8 +357,19 @@ static inline struct raw6_sock *raw6_sk(const struct sock *sk) | |||
322 | return NULL; | 357 | return NULL; |
323 | } | 358 | } |
324 | 359 | ||
325 | #endif | 360 | #define __tcp_v6_rcv_saddr(__sk) NULL |
361 | #define tcp_v6_rcv_saddr(__sk) NULL | ||
362 | #define tcp_twsk_ipv6only(__sk) 0 | ||
363 | #define inet_v6_ipv6only(__sk) 0 | ||
364 | #endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ | ||
326 | 365 | ||
327 | #endif | 366 | #define INET6_MATCH(__sk, __saddr, __daddr, __ports, __dif) \ |
367 | (((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \ | ||
368 | ((__sk)->sk_family == AF_INET6) && \ | ||
369 | ipv6_addr_equal(&inet6_sk(__sk)->daddr, (__saddr)) && \ | ||
370 | ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr)) && \ | ||
371 | (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) | ||
328 | 372 | ||
329 | #endif | 373 | #endif /* __KERNEL__ */ |
374 | |||
375 | #endif /* _IPV6_H */ | ||
diff --git a/include/linux/libata.h b/include/linux/libata.h index 7c09540c52bc..fc05a9899288 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h | |||
@@ -1,23 +1,26 @@ | |||
1 | /* | 1 | /* |
2 | Copyright 2003-2004 Red Hat, Inc. All rights reserved. | 2 | * Copyright 2003-2005 Red Hat, Inc. All rights reserved. |
3 | Copyright 2003-2004 Jeff Garzik | 3 | * Copyright 2003-2005 Jeff Garzik |
4 | 4 | * | |
5 | The contents of this file are subject to the Open | 5 | * |
6 | Software License version 1.1 that can be found at | 6 | * This program is free software; you can redistribute it and/or modify |
7 | http://www.opensource.org/licenses/osl-1.1.txt and is included herein | 7 | * it under the terms of the GNU General Public License as published by |
8 | by reference. | 8 | * the Free Software Foundation; either version 2, or (at your option) |
9 | 9 | * any later version. | |
10 | Alternatively, the contents of this file may be used under the terms | 10 | * |
11 | of the GNU General Public License version 2 (the "GPL") as distributed | 11 | * This program is distributed in the hope that it will be useful, |
12 | in the kernel source COPYING file, in which case the provisions of | 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | the GPL are applicable instead of the above. If you wish to allow | 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 | the use of your version of this file only under the terms of the | 14 | * GNU General Public License for more details. |
15 | GPL and not to allow others to use your version of this file under | 15 | * |
16 | the OSL, indicate your decision by deleting the provisions above and | 16 | * You should have received a copy of the GNU General Public License |
17 | replace them with the notice and other provisions required by the GPL. | 17 | * along with this program; see the file COPYING. If not, write to |
18 | If you do not delete the provisions above, a recipient may use your | 18 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. |
19 | version of this file under either the OSL or the GPL. | 19 | * |
20 | 20 | * | |
21 | * libata documentation is available via 'make {ps|pdf}docs', | ||
22 | * as Documentation/DocBook/libata.* | ||
23 | * | ||
21 | */ | 24 | */ |
22 | 25 | ||
23 | #ifndef __LINUX_LIBATA_H__ | 26 | #ifndef __LINUX_LIBATA_H__ |
diff --git a/include/linux/list.h b/include/linux/list.h index aab2db21b013..e6ec59682274 100644 --- a/include/linux/list.h +++ b/include/linux/list.h | |||
@@ -419,6 +419,20 @@ static inline void list_splice_init(struct list_head *list, | |||
419 | pos = n, n = list_entry(n->member.next, typeof(*n), member)) | 419 | pos = n, n = list_entry(n->member.next, typeof(*n), member)) |
420 | 420 | ||
421 | /** | 421 | /** |
422 | * list_for_each_entry_safe_continue - iterate over list of given type | ||
423 | * continuing after existing point safe against removal of list entry | ||
424 | * @pos: the type * to use as a loop counter. | ||
425 | * @n: another type * to use as temporary storage | ||
426 | * @head: the head for your list. | ||
427 | * @member: the name of the list_struct within the struct. | ||
428 | */ | ||
429 | #define list_for_each_entry_safe_continue(pos, n, head, member) \ | ||
430 | for (pos = list_entry(pos->member.next, typeof(*pos), member), \ | ||
431 | n = list_entry(pos->member.next, typeof(*pos), member); \ | ||
432 | &pos->member != (head); \ | ||
433 | pos = n, n = list_entry(n->member.next, typeof(*n), member)) | ||
434 | |||
435 | /** | ||
422 | * list_for_each_rcu - iterate over an rcu-protected list | 436 | * list_for_each_rcu - iterate over an rcu-protected list |
423 | * @pos: the &struct list_head to use as a loop counter. | 437 | * @pos: the &struct list_head to use as a loop counter. |
424 | * @head: the head for your list. | 438 | * @head: the head for your list. |
@@ -620,6 +634,57 @@ static inline void hlist_add_after(struct hlist_node *n, | |||
620 | next->next->pprev = &next->next; | 634 | next->next->pprev = &next->next; |
621 | } | 635 | } |
622 | 636 | ||
637 | /** | ||
638 | * hlist_add_before_rcu - adds the specified element to the specified hlist | ||
639 | * before the specified node while permitting racing traversals. | ||
640 | * @n: the new element to add to the hash list. | ||
641 | * @next: the existing element to add the new element before. | ||
642 | * | ||
643 | * The caller must take whatever precautions are necessary | ||
644 | * (such as holding appropriate locks) to avoid racing | ||
645 | * with another list-mutation primitive, such as hlist_add_head_rcu() | ||
646 | * or hlist_del_rcu(), running on this same list. | ||
647 | * However, it is perfectly legal to run concurrently with | ||
648 | * the _rcu list-traversal primitives, such as | ||
649 | * hlist_for_each_rcu(), used to prevent memory-consistency | ||
650 | * problems on Alpha CPUs. | ||
651 | */ | ||
652 | static inline void hlist_add_before_rcu(struct hlist_node *n, | ||
653 | struct hlist_node *next) | ||
654 | { | ||
655 | n->pprev = next->pprev; | ||
656 | n->next = next; | ||
657 | smp_wmb(); | ||
658 | next->pprev = &n->next; | ||
659 | *(n->pprev) = n; | ||
660 | } | ||
661 | |||
662 | /** | ||
663 | * hlist_add_after_rcu - adds the specified element to the specified hlist | ||
664 | * after the specified node while permitting racing traversals. | ||
665 | * @prev: the existing element to add the new element after. | ||
666 | * @n: the new element to add to the hash list. | ||
667 | * | ||
668 | * The caller must take whatever precautions are necessary | ||
669 | * (such as holding appropriate locks) to avoid racing | ||
670 | * with another list-mutation primitive, such as hlist_add_head_rcu() | ||
671 | * or hlist_del_rcu(), running on this same list. | ||
672 | * However, it is perfectly legal to run concurrently with | ||
673 | * the _rcu list-traversal primitives, such as | ||
674 | * hlist_for_each_rcu(), used to prevent memory-consistency | ||
675 | * problems on Alpha CPUs. | ||
676 | */ | ||
677 | static inline void hlist_add_after_rcu(struct hlist_node *prev, | ||
678 | struct hlist_node *n) | ||
679 | { | ||
680 | n->next = prev->next; | ||
681 | n->pprev = &prev->next; | ||
682 | smp_wmb(); | ||
683 | prev->next = n; | ||
684 | if (n->next) | ||
685 | n->next->pprev = &n->next; | ||
686 | } | ||
687 | |||
623 | #define hlist_entry(ptr, type, member) container_of(ptr,type,member) | 688 | #define hlist_entry(ptr, type, member) container_of(ptr,type,member) |
624 | 689 | ||
625 | #define hlist_for_each(pos, head) \ | 690 | #define hlist_for_each(pos, head) \ |
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 97bbccdbcca3..47da39ba3f03 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h | |||
@@ -1,6 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Device tables which are exported to userspace via | 2 | * Device tables which are exported to userspace via |
3 | * scripts/table2alias.c. You must keep that file in sync with this | 3 | * scripts/mod/file2alias.c. You must keep that file in sync with this |
4 | * header. | 4 | * header. |
5 | */ | 5 | */ |
6 | 6 | ||
@@ -190,6 +190,11 @@ struct of_device_id | |||
190 | #endif | 190 | #endif |
191 | }; | 191 | }; |
192 | 192 | ||
193 | /* VIO */ | ||
194 | struct vio_device_id { | ||
195 | char type[32]; | ||
196 | char compat[32]; | ||
197 | }; | ||
193 | 198 | ||
194 | /* PCMCIA */ | 199 | /* PCMCIA */ |
195 | 200 | ||
diff --git a/include/linux/net.h b/include/linux/net.h index 20cb226b2268..4e981585a89a 100644 --- a/include/linux/net.h +++ b/include/linux/net.h | |||
@@ -84,6 +84,7 @@ enum sock_type { | |||
84 | SOCK_RAW = 3, | 84 | SOCK_RAW = 3, |
85 | SOCK_RDM = 4, | 85 | SOCK_RDM = 4, |
86 | SOCK_SEQPACKET = 5, | 86 | SOCK_SEQPACKET = 5, |
87 | SOCK_DCCP = 6, | ||
87 | SOCK_PACKET = 10, | 88 | SOCK_PACKET = 10, |
88 | }; | 89 | }; |
89 | 90 | ||
@@ -282,5 +283,15 @@ static struct proto_ops name##_ops = { \ | |||
282 | #define MODULE_ALIAS_NETPROTO(proto) \ | 283 | #define MODULE_ALIAS_NETPROTO(proto) \ |
283 | MODULE_ALIAS("net-pf-" __stringify(proto)) | 284 | MODULE_ALIAS("net-pf-" __stringify(proto)) |
284 | 285 | ||
286 | #define MODULE_ALIAS_NET_PF_PROTO(pf, proto) \ | ||
287 | MODULE_ALIAS("net-pf-" __stringify(pf) "-proto-" __stringify(proto)) | ||
288 | |||
289 | #ifdef CONFIG_SYSCTL | ||
290 | #include <linux/sysctl.h> | ||
291 | extern ctl_table net_table[]; | ||
292 | extern int net_msg_cost; | ||
293 | extern int net_msg_burst; | ||
294 | #endif | ||
295 | |||
285 | #endif /* __KERNEL__ */ | 296 | #endif /* __KERNEL__ */ |
286 | #endif /* _LINUX_NET_H */ | 297 | #endif /* _LINUX_NET_H */ |
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3a0ed7f9e801..7c717907896d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h | |||
@@ -244,6 +244,7 @@ struct netdev_boot_setup { | |||
244 | }; | 244 | }; |
245 | #define NETDEV_BOOT_SETUP_MAX 8 | 245 | #define NETDEV_BOOT_SETUP_MAX 8 |
246 | 246 | ||
247 | extern int __init netdev_boot_setup(char *str); | ||
247 | 248 | ||
248 | /* | 249 | /* |
249 | * The DEVICE structure. | 250 | * The DEVICE structure. |
@@ -336,6 +337,7 @@ struct net_device | |||
336 | /* Interface address info. */ | 337 | /* Interface address info. */ |
337 | unsigned char broadcast[MAX_ADDR_LEN]; /* hw bcast add */ | 338 | unsigned char broadcast[MAX_ADDR_LEN]; /* hw bcast add */ |
338 | unsigned char dev_addr[MAX_ADDR_LEN]; /* hw address */ | 339 | unsigned char dev_addr[MAX_ADDR_LEN]; /* hw address */ |
340 | unsigned char perm_addr[MAX_ADDR_LEN]; /* permanent hw address */ | ||
339 | unsigned char addr_len; /* hardware address length */ | 341 | unsigned char addr_len; /* hardware address length */ |
340 | unsigned short dev_id; /* for shared network cards */ | 342 | unsigned short dev_id; /* for shared network cards */ |
341 | 343 | ||
@@ -497,10 +499,12 @@ static inline void *netdev_priv(struct net_device *dev) | |||
497 | #define SET_NETDEV_DEV(net, pdev) ((net)->class_dev.dev = (pdev)) | 499 | #define SET_NETDEV_DEV(net, pdev) ((net)->class_dev.dev = (pdev)) |
498 | 500 | ||
499 | struct packet_type { | 501 | struct packet_type { |
500 | __be16 type; /* This is really htons(ether_type). */ | 502 | __be16 type; /* This is really htons(ether_type). */ |
501 | struct net_device *dev; /* NULL is wildcarded here */ | 503 | struct net_device *dev; /* NULL is wildcarded here */ |
502 | int (*func) (struct sk_buff *, struct net_device *, | 504 | int (*func) (struct sk_buff *, |
503 | struct packet_type *); | 505 | struct net_device *, |
506 | struct packet_type *, | ||
507 | struct net_device *); | ||
504 | void *af_packet_priv; | 508 | void *af_packet_priv; |
505 | struct list_head list; | 509 | struct list_head list; |
506 | }; | 510 | }; |
@@ -671,6 +675,7 @@ extern void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev); | |||
671 | extern void dev_init(void); | 675 | extern void dev_init(void); |
672 | 676 | ||
673 | extern int netdev_nit; | 677 | extern int netdev_nit; |
678 | extern int netdev_budget; | ||
674 | 679 | ||
675 | /* Called by rtnetlink.c:rtnl_unlock() */ | 680 | /* Called by rtnetlink.c:rtnl_unlock() */ |
676 | extern void netdev_run_todo(void); | 681 | extern void netdev_run_todo(void); |
@@ -697,19 +702,9 @@ static inline int netif_carrier_ok(const struct net_device *dev) | |||
697 | 702 | ||
698 | extern void __netdev_watchdog_up(struct net_device *dev); | 703 | extern void __netdev_watchdog_up(struct net_device *dev); |
699 | 704 | ||
700 | static inline void netif_carrier_on(struct net_device *dev) | 705 | extern void netif_carrier_on(struct net_device *dev); |
701 | { | ||
702 | if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) | ||
703 | linkwatch_fire_event(dev); | ||
704 | if (netif_running(dev)) | ||
705 | __netdev_watchdog_up(dev); | ||
706 | } | ||
707 | 706 | ||
708 | static inline void netif_carrier_off(struct net_device *dev) | 707 | extern void netif_carrier_off(struct net_device *dev); |
709 | { | ||
710 | if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state)) | ||
711 | linkwatch_fire_event(dev); | ||
712 | } | ||
713 | 708 | ||
714 | /* Hot-plugging. */ | 709 | /* Hot-plugging. */ |
715 | static inline int netif_device_present(struct net_device *dev) | 710 | static inline int netif_device_present(struct net_device *dev) |
@@ -916,6 +911,14 @@ extern int skb_checksum_help(struct sk_buff *skb, int inward); | |||
916 | extern void net_enable_timestamp(void); | 911 | extern void net_enable_timestamp(void); |
917 | extern void net_disable_timestamp(void); | 912 | extern void net_disable_timestamp(void); |
918 | 913 | ||
914 | #ifdef CONFIG_PROC_FS | ||
915 | extern void *dev_seq_start(struct seq_file *seq, loff_t *pos); | ||
916 | extern void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos); | ||
917 | extern void dev_seq_stop(struct seq_file *seq, void *v); | ||
918 | #endif | ||
919 | |||
920 | extern void linkwatch_run_queue(void); | ||
921 | |||
919 | #endif /* __KERNEL__ */ | 922 | #endif /* __KERNEL__ */ |
920 | 923 | ||
921 | #endif /* _LINUX_DEV_H */ | 924 | #endif /* _LINUX_DEV_H */ |
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 2e2045482cb1..be365e70ee99 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h | |||
@@ -21,10 +21,23 @@ | |||
21 | #define NF_STOP 5 | 21 | #define NF_STOP 5 |
22 | #define NF_MAX_VERDICT NF_STOP | 22 | #define NF_MAX_VERDICT NF_STOP |
23 | 23 | ||
24 | /* we overload the higher bits for encoding auxiliary data such as the queue | ||
25 | * number. Not nice, but better than additional function arguments. */ | ||
26 | #define NF_VERDICT_MASK 0x0000ffff | ||
27 | #define NF_VERDICT_BITS 16 | ||
28 | |||
29 | #define NF_VERDICT_QMASK 0xffff0000 | ||
30 | #define NF_VERDICT_QBITS 16 | ||
31 | |||
32 | #define NF_QUEUE_NR(x) (((x << NF_VERDICT_QBITS) & NF_VERDICT_QMASK) | NF_QUEUE) | ||
33 | |||
34 | /* only for userspace compatibility */ | ||
35 | #ifndef __KERNEL__ | ||
24 | /* Generic cache responses from hook functions. | 36 | /* Generic cache responses from hook functions. |
25 | <= 0x2000 is used for protocol-flags. */ | 37 | <= 0x2000 is used for protocol-flags. */ |
26 | #define NFC_UNKNOWN 0x4000 | 38 | #define NFC_UNKNOWN 0x4000 |
27 | #define NFC_ALTERED 0x8000 | 39 | #define NFC_ALTERED 0x8000 |
40 | #endif | ||
28 | 41 | ||
29 | #ifdef __KERNEL__ | 42 | #ifdef __KERNEL__ |
30 | #include <linux/config.h> | 43 | #include <linux/config.h> |
@@ -101,15 +114,51 @@ void nf_unregister_sockopt(struct nf_sockopt_ops *reg); | |||
101 | 114 | ||
102 | extern struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS]; | 115 | extern struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS]; |
103 | 116 | ||
104 | typedef void nf_logfn(unsigned int hooknum, | 117 | /* those NF_LOG_* defines and struct nf_loginfo are legacy definitios that will |
118 | * disappear once iptables is replaced with pkttables. Please DO NOT use them | ||
119 | * for any new code! */ | ||
120 | #define NF_LOG_TCPSEQ 0x01 /* Log TCP sequence numbers */ | ||
121 | #define NF_LOG_TCPOPT 0x02 /* Log TCP options */ | ||
122 | #define NF_LOG_IPOPT 0x04 /* Log IP options */ | ||
123 | #define NF_LOG_UID 0x08 /* Log UID owning local socket */ | ||
124 | #define NF_LOG_MASK 0x0f | ||
125 | |||
126 | #define NF_LOG_TYPE_LOG 0x01 | ||
127 | #define NF_LOG_TYPE_ULOG 0x02 | ||
128 | |||
129 | struct nf_loginfo { | ||
130 | u_int8_t type; | ||
131 | union { | ||
132 | struct { | ||
133 | u_int32_t copy_len; | ||
134 | u_int16_t group; | ||
135 | u_int16_t qthreshold; | ||
136 | } ulog; | ||
137 | struct { | ||
138 | u_int8_t level; | ||
139 | u_int8_t logflags; | ||
140 | } log; | ||
141 | } u; | ||
142 | }; | ||
143 | |||
144 | typedef void nf_logfn(unsigned int pf, | ||
145 | unsigned int hooknum, | ||
105 | const struct sk_buff *skb, | 146 | const struct sk_buff *skb, |
106 | const struct net_device *in, | 147 | const struct net_device *in, |
107 | const struct net_device *out, | 148 | const struct net_device *out, |
149 | const struct nf_loginfo *li, | ||
108 | const char *prefix); | 150 | const char *prefix); |
109 | 151 | ||
152 | struct nf_logger { | ||
153 | struct module *me; | ||
154 | nf_logfn *logfn; | ||
155 | char *name; | ||
156 | }; | ||
157 | |||
110 | /* Function to register/unregister log function. */ | 158 | /* Function to register/unregister log function. */ |
111 | int nf_log_register(int pf, nf_logfn *logfn); | 159 | int nf_log_register(int pf, struct nf_logger *logger); |
112 | void nf_log_unregister(int pf, nf_logfn *logfn); | 160 | int nf_log_unregister_pf(int pf); |
161 | void nf_log_unregister_logger(struct nf_logger *logger); | ||
113 | 162 | ||
114 | /* Calls the registered backend logging function */ | 163 | /* Calls the registered backend logging function */ |
115 | void nf_log_packet(int pf, | 164 | void nf_log_packet(int pf, |
@@ -117,6 +166,7 @@ void nf_log_packet(int pf, | |||
117 | const struct sk_buff *skb, | 166 | const struct sk_buff *skb, |
118 | const struct net_device *in, | 167 | const struct net_device *in, |
119 | const struct net_device *out, | 168 | const struct net_device *out, |
169 | struct nf_loginfo *li, | ||
120 | const char *fmt, ...); | 170 | const char *fmt, ...); |
121 | 171 | ||
122 | /* Activate hook; either okfn or kfree_skb called, unless a hook | 172 | /* Activate hook; either okfn or kfree_skb called, unless a hook |
@@ -175,11 +225,16 @@ int nf_getsockopt(struct sock *sk, int pf, int optval, char __user *opt, | |||
175 | int *len); | 225 | int *len); |
176 | 226 | ||
177 | /* Packet queuing */ | 227 | /* Packet queuing */ |
178 | typedef int (*nf_queue_outfn_t)(struct sk_buff *skb, | 228 | struct nf_queue_handler { |
179 | struct nf_info *info, void *data); | 229 | int (*outfn)(struct sk_buff *skb, struct nf_info *info, |
230 | unsigned int queuenum, void *data); | ||
231 | void *data; | ||
232 | char *name; | ||
233 | }; | ||
180 | extern int nf_register_queue_handler(int pf, | 234 | extern int nf_register_queue_handler(int pf, |
181 | nf_queue_outfn_t outfn, void *data); | 235 | struct nf_queue_handler *qh); |
182 | extern int nf_unregister_queue_handler(int pf); | 236 | extern int nf_unregister_queue_handler(int pf); |
237 | extern void nf_unregister_queue_handlers(struct nf_queue_handler *qh); | ||
183 | extern void nf_reinject(struct sk_buff *skb, | 238 | extern void nf_reinject(struct sk_buff *skb, |
184 | struct nf_info *info, | 239 | struct nf_info *info, |
185 | unsigned int verdict); | 240 | unsigned int verdict); |
@@ -190,6 +245,27 @@ extern void nf_ct_attach(struct sk_buff *, struct sk_buff *); | |||
190 | /* FIXME: Before cache is ever used, this must be implemented for real. */ | 245 | /* FIXME: Before cache is ever used, this must be implemented for real. */ |
191 | extern void nf_invalidate_cache(int pf); | 246 | extern void nf_invalidate_cache(int pf); |
192 | 247 | ||
248 | /* Call this before modifying an existing packet: ensures it is | ||
249 | modifiable and linear to the point you care about (writable_len). | ||
250 | Returns true or false. */ | ||
251 | extern int skb_make_writable(struct sk_buff **pskb, unsigned int writable_len); | ||
252 | |||
253 | struct nf_queue_rerouter { | ||
254 | void (*save)(const struct sk_buff *skb, struct nf_info *info); | ||
255 | int (*reroute)(struct sk_buff **skb, const struct nf_info *info); | ||
256 | int rer_size; | ||
257 | }; | ||
258 | |||
259 | #define nf_info_reroute(x) ((void *)x + sizeof(struct nf_info)) | ||
260 | |||
261 | extern int nf_register_queue_rerouter(int pf, struct nf_queue_rerouter *rer); | ||
262 | extern int nf_unregister_queue_rerouter(int pf); | ||
263 | |||
264 | #ifdef CONFIG_PROC_FS | ||
265 | #include <linux/proc_fs.h> | ||
266 | extern struct proc_dir_entry *proc_net_netfilter; | ||
267 | #endif | ||
268 | |||
193 | #else /* !CONFIG_NETFILTER */ | 269 | #else /* !CONFIG_NETFILTER */ |
194 | #define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb) | 270 | #define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb) |
195 | static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {} | 271 | static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {} |
diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h new file mode 100644 index 000000000000..1d5b10ae2399 --- /dev/null +++ b/include/linux/netfilter/nfnetlink.h | |||
@@ -0,0 +1,169 @@ | |||
1 | #ifndef _NFNETLINK_H | ||
2 | #define _NFNETLINK_H | ||
3 | #include <linux/types.h> | ||
4 | |||
5 | #ifndef __KERNEL__ | ||
6 | /* nfnetlink groups: Up to 32 maximum - backwards compatibility for userspace */ | ||
7 | #define NF_NETLINK_CONNTRACK_NEW 0x00000001 | ||
8 | #define NF_NETLINK_CONNTRACK_UPDATE 0x00000002 | ||
9 | #define NF_NETLINK_CONNTRACK_DESTROY 0x00000004 | ||
10 | #define NF_NETLINK_CONNTRACK_EXP_NEW 0x00000008 | ||
11 | #define NF_NETLINK_CONNTRACK_EXP_UPDATE 0x00000010 | ||
12 | #define NF_NETLINK_CONNTRACK_EXP_DESTROY 0x00000020 | ||
13 | #endif | ||
14 | |||
15 | enum nfnetlink_groups { | ||
16 | NFNLGRP_NONE, | ||
17 | #define NFNLGRP_NONE NFNLGRP_NONE | ||
18 | NFNLGRP_CONNTRACK_NEW, | ||
19 | #define NFNLGRP_CONNTRACK_NEW NFNLGRP_CONNTRACK_NEW | ||
20 | NFNLGRP_CONNTRACK_UPDATE, | ||
21 | #define NFNLGRP_CONNTRACK_UPDATE NFNLGRP_CONNTRACK_UPDATE | ||
22 | NFNLGRP_CONNTRACK_DESTROY, | ||
23 | #define NFNLGRP_CONNTRACK_DESTROY NFNLGRP_CONNTRACK_DESTROY | ||
24 | NFNLGRP_CONNTRACK_EXP_NEW, | ||
25 | #define NFNLGRP_CONNTRACK_EXP_NEW NFNLGRP_CONNTRACK_EXP_NEW | ||
26 | NFNLGRP_CONNTRACK_EXP_UPDATE, | ||
27 | #define NFNLGRP_CONNTRACK_EXP_UPDATE NFNLGRP_CONNTRACK_EXP_UPDATE | ||
28 | NFNLGRP_CONNTRACK_EXP_DESTROY, | ||
29 | #define NFNLGRP_CONNTRACK_EXP_DESTROY NFNLGRP_CONNTRACK_EXP_DESTROY | ||
30 | __NFNLGRP_MAX, | ||
31 | }; | ||
32 | #define NFNLGRP_MAX (__NFNLGRP_MAX - 1) | ||
33 | |||
34 | /* Generic structure for encapsulation optional netfilter information. | ||
35 | * It is reminiscent of sockaddr, but with sa_family replaced | ||
36 | * with attribute type. | ||
37 | * ! This should someday be put somewhere generic as now rtnetlink and | ||
38 | * ! nfnetlink use the same attributes methods. - J. Schulist. | ||
39 | */ | ||
40 | |||
41 | struct nfattr | ||
42 | { | ||
43 | u_int16_t nfa_len; | ||
44 | u_int16_t nfa_type; | ||
45 | } __attribute__ ((packed)); | ||
46 | |||
47 | /* FIXME: Shamelessly copy and pasted from rtnetlink.h, it's time | ||
48 | * to put this in a generic file */ | ||
49 | |||
50 | #define NFA_ALIGNTO 4 | ||
51 | #define NFA_ALIGN(len) (((len) + NFA_ALIGNTO - 1) & ~(NFA_ALIGNTO - 1)) | ||
52 | #define NFA_OK(nfa,len) ((len) > 0 && (nfa)->nfa_len >= sizeof(struct nfattr) \ | ||
53 | && (nfa)->nfa_len <= (len)) | ||
54 | #define NFA_NEXT(nfa,attrlen) ((attrlen) -= NFA_ALIGN((nfa)->nfa_len), \ | ||
55 | (struct nfattr *)(((char *)(nfa)) + NFA_ALIGN((nfa)->nfa_len))) | ||
56 | #define NFA_LENGTH(len) (NFA_ALIGN(sizeof(struct nfattr)) + (len)) | ||
57 | #define NFA_SPACE(len) NFA_ALIGN(NFA_LENGTH(len)) | ||
58 | #define NFA_DATA(nfa) ((void *)(((char *)(nfa)) + NFA_LENGTH(0))) | ||
59 | #define NFA_PAYLOAD(nfa) ((int)((nfa)->nfa_len) - NFA_LENGTH(0)) | ||
60 | #define NFA_NEST(skb, type) \ | ||
61 | ({ struct nfattr *__start = (struct nfattr *) (skb)->tail; \ | ||
62 | NFA_PUT(skb, type, 0, NULL); \ | ||
63 | __start; }) | ||
64 | #define NFA_NEST_END(skb, start) \ | ||
65 | ({ (start)->nfa_len = ((skb)->tail - (unsigned char *) (start)); \ | ||
66 | (skb)->len; }) | ||
67 | #define NFA_NEST_CANCEL(skb, start) \ | ||
68 | ({ if (start) \ | ||
69 | skb_trim(skb, (unsigned char *) (start) - (skb)->data); \ | ||
70 | -1; }) | ||
71 | |||
72 | /* General form of address family dependent message. | ||
73 | */ | ||
74 | struct nfgenmsg { | ||
75 | u_int8_t nfgen_family; /* AF_xxx */ | ||
76 | u_int8_t version; /* nfnetlink version */ | ||
77 | u_int16_t res_id; /* resource id */ | ||
78 | } __attribute__ ((packed)); | ||
79 | |||
80 | #define NFNETLINK_V0 0 | ||
81 | |||
82 | #define NFM_NFA(n) ((struct nfattr *)(((char *)(n)) \ | ||
83 | + NLMSG_ALIGN(sizeof(struct nfgenmsg)))) | ||
84 | #define NFM_PAYLOAD(n) NLMSG_PAYLOAD(n, sizeof(struct nfgenmsg)) | ||
85 | |||
86 | /* netfilter netlink message types are split in two pieces: | ||
87 | * 8 bit subsystem, 8bit operation. | ||
88 | */ | ||
89 | |||
90 | #define NFNL_SUBSYS_ID(x) ((x & 0xff00) >> 8) | ||
91 | #define NFNL_MSG_TYPE(x) (x & 0x00ff) | ||
92 | |||
93 | /* No enum here, otherwise __stringify() trick of MODULE_ALIAS_NFNL_SUBSYS() | ||
94 | * won't work anymore */ | ||
95 | #define NFNL_SUBSYS_NONE 0 | ||
96 | #define NFNL_SUBSYS_CTNETLINK 1 | ||
97 | #define NFNL_SUBSYS_CTNETLINK_EXP 2 | ||
98 | #define NFNL_SUBSYS_QUEUE 3 | ||
99 | #define NFNL_SUBSYS_ULOG 4 | ||
100 | #define NFNL_SUBSYS_COUNT 5 | ||
101 | |||
102 | #ifdef __KERNEL__ | ||
103 | |||
104 | #include <linux/netlink.h> | ||
105 | #include <linux/capability.h> | ||
106 | |||
107 | struct nfnl_callback | ||
108 | { | ||
109 | int (*call)(struct sock *nl, struct sk_buff *skb, | ||
110 | struct nlmsghdr *nlh, struct nfattr *cda[], int *errp); | ||
111 | kernel_cap_t cap_required; /* capabilities required for this msg */ | ||
112 | u_int16_t attr_count; /* number of nfattr's */ | ||
113 | }; | ||
114 | |||
115 | struct nfnetlink_subsystem | ||
116 | { | ||
117 | const char *name; | ||
118 | __u8 subsys_id; /* nfnetlink subsystem ID */ | ||
119 | __u8 cb_count; /* number of callbacks */ | ||
120 | struct nfnl_callback *cb; /* callback for individual types */ | ||
121 | }; | ||
122 | |||
123 | extern void __nfa_fill(struct sk_buff *skb, int attrtype, | ||
124 | int attrlen, const void *data); | ||
125 | #define NFA_PUT(skb, attrtype, attrlen, data) \ | ||
126 | ({ if (skb_tailroom(skb) < (int)NFA_SPACE(attrlen)) goto nfattr_failure; \ | ||
127 | __nfa_fill(skb, attrtype, attrlen, data); }) | ||
128 | |||
129 | extern struct semaphore nfnl_sem; | ||
130 | |||
131 | #define nfnl_shlock() down(&nfnl_sem) | ||
132 | #define nfnl_shlock_nowait() down_trylock(&nfnl_sem) | ||
133 | |||
134 | #define nfnl_shunlock() do { up(&nfnl_sem); \ | ||
135 | if(nfnl && nfnl->sk_receive_queue.qlen) \ | ||
136 | nfnl->sk_data_ready(nfnl, 0); \ | ||
137 | } while(0) | ||
138 | |||
139 | extern void nfnl_lock(void); | ||
140 | extern void nfnl_unlock(void); | ||
141 | |||
142 | extern int nfnetlink_subsys_register(struct nfnetlink_subsystem *n); | ||
143 | extern int nfnetlink_subsys_unregister(struct nfnetlink_subsystem *n); | ||
144 | |||
145 | extern int nfattr_parse(struct nfattr *tb[], int maxattr, | ||
146 | struct nfattr *nfa, int len); | ||
147 | |||
148 | #define nfattr_parse_nested(tb, max, nfa) \ | ||
149 | nfattr_parse((tb), (max), NFA_DATA((nfa)), NFA_PAYLOAD((nfa))) | ||
150 | |||
151 | #define nfattr_bad_size(tb, max, cta_min) \ | ||
152 | ({ int __i, __res = 0; \ | ||
153 | for (__i=0; __i<max; __i++) \ | ||
154 | if (tb[__i] && NFA_PAYLOAD(tb[__i]) < cta_min[__i]){ \ | ||
155 | __res = 1; \ | ||
156 | break; \ | ||
157 | } \ | ||
158 | __res; \ | ||
159 | }) | ||
160 | |||
161 | extern int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, | ||
162 | int echo); | ||
163 | extern int nfnetlink_unicast(struct sk_buff *skb, u_int32_t pid, int flags); | ||
164 | |||
165 | #define MODULE_ALIAS_NFNL_SUBSYS(subsys) \ | ||
166 | MODULE_ALIAS("nfnetlink-subsys-" __stringify(subsys)) | ||
167 | |||
168 | #endif /* __KERNEL__ */ | ||
169 | #endif /* _NFNETLINK_H */ | ||
diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h new file mode 100644 index 000000000000..5c55751c78e4 --- /dev/null +++ b/include/linux/netfilter/nfnetlink_conntrack.h | |||
@@ -0,0 +1,124 @@ | |||
1 | #ifndef _IPCONNTRACK_NETLINK_H | ||
2 | #define _IPCONNTRACK_NETLINK_H | ||
3 | #include <linux/netfilter/nfnetlink.h> | ||
4 | |||
5 | enum cntl_msg_types { | ||
6 | IPCTNL_MSG_CT_NEW, | ||
7 | IPCTNL_MSG_CT_GET, | ||
8 | IPCTNL_MSG_CT_DELETE, | ||
9 | IPCTNL_MSG_CT_GET_CTRZERO, | ||
10 | |||
11 | IPCTNL_MSG_MAX | ||
12 | }; | ||
13 | |||
14 | enum ctnl_exp_msg_types { | ||
15 | IPCTNL_MSG_EXP_NEW, | ||
16 | IPCTNL_MSG_EXP_GET, | ||
17 | IPCTNL_MSG_EXP_DELETE, | ||
18 | |||
19 | IPCTNL_MSG_EXP_MAX | ||
20 | }; | ||
21 | |||
22 | |||
23 | enum ctattr_type { | ||
24 | CTA_UNSPEC, | ||
25 | CTA_TUPLE_ORIG, | ||
26 | CTA_TUPLE_REPLY, | ||
27 | CTA_STATUS, | ||
28 | CTA_PROTOINFO, | ||
29 | CTA_HELP, | ||
30 | CTA_NAT, | ||
31 | CTA_TIMEOUT, | ||
32 | CTA_MARK, | ||
33 | CTA_COUNTERS_ORIG, | ||
34 | CTA_COUNTERS_REPLY, | ||
35 | CTA_USE, | ||
36 | CTA_ID, | ||
37 | __CTA_MAX | ||
38 | }; | ||
39 | #define CTA_MAX (__CTA_MAX - 1) | ||
40 | |||
41 | enum ctattr_tuple { | ||
42 | CTA_TUPLE_UNSPEC, | ||
43 | CTA_TUPLE_IP, | ||
44 | CTA_TUPLE_PROTO, | ||
45 | __CTA_TUPLE_MAX | ||
46 | }; | ||
47 | #define CTA_TUPLE_MAX (__CTA_TUPLE_MAX - 1) | ||
48 | |||
49 | enum ctattr_ip { | ||
50 | CTA_IP_UNSPEC, | ||
51 | CTA_IP_V4_SRC, | ||
52 | CTA_IP_V4_DST, | ||
53 | CTA_IP_V6_SRC, | ||
54 | CTA_IP_V6_DST, | ||
55 | __CTA_IP_MAX | ||
56 | }; | ||
57 | #define CTA_IP_MAX (__CTA_IP_MAX - 1) | ||
58 | |||
59 | enum ctattr_l4proto { | ||
60 | CTA_PROTO_UNSPEC, | ||
61 | CTA_PROTO_NUM, | ||
62 | CTA_PROTO_SRC_PORT, | ||
63 | CTA_PROTO_DST_PORT, | ||
64 | CTA_PROTO_ICMP_ID, | ||
65 | CTA_PROTO_ICMP_TYPE, | ||
66 | CTA_PROTO_ICMP_CODE, | ||
67 | __CTA_PROTO_MAX | ||
68 | }; | ||
69 | #define CTA_PROTO_MAX (__CTA_PROTO_MAX - 1) | ||
70 | |||
71 | enum ctattr_protoinfo { | ||
72 | CTA_PROTOINFO_UNSPEC, | ||
73 | CTA_PROTOINFO_TCP_STATE, | ||
74 | __CTA_PROTOINFO_MAX | ||
75 | }; | ||
76 | #define CTA_PROTOINFO_MAX (__CTA_PROTOINFO_MAX - 1) | ||
77 | |||
78 | enum ctattr_counters { | ||
79 | CTA_COUNTERS_UNSPEC, | ||
80 | CTA_COUNTERS_PACKETS, | ||
81 | CTA_COUNTERS_BYTES, | ||
82 | __CTA_COUNTERS_MAX | ||
83 | }; | ||
84 | #define CTA_COUNTERS_MAX (__CTA_COUNTERS_MAX - 1) | ||
85 | |||
86 | enum ctattr_nat { | ||
87 | CTA_NAT_UNSPEC, | ||
88 | CTA_NAT_MINIP, | ||
89 | CTA_NAT_MAXIP, | ||
90 | CTA_NAT_PROTO, | ||
91 | __CTA_NAT_MAX | ||
92 | }; | ||
93 | #define CTA_NAT_MAX (__CTA_NAT_MAX - 1) | ||
94 | |||
95 | enum ctattr_protonat { | ||
96 | CTA_PROTONAT_UNSPEC, | ||
97 | CTA_PROTONAT_PORT_MIN, | ||
98 | CTA_PROTONAT_PORT_MAX, | ||
99 | __CTA_PROTONAT_MAX | ||
100 | }; | ||
101 | #define CTA_PROTONAT_MAX (__CTA_PROTONAT_MAX - 1) | ||
102 | |||
103 | enum ctattr_expect { | ||
104 | CTA_EXPECT_UNSPEC, | ||
105 | CTA_EXPECT_MASTER, | ||
106 | CTA_EXPECT_TUPLE, | ||
107 | CTA_EXPECT_MASK, | ||
108 | CTA_EXPECT_TIMEOUT, | ||
109 | CTA_EXPECT_ID, | ||
110 | CTA_EXPECT_HELP_NAME, | ||
111 | __CTA_EXPECT_MAX | ||
112 | }; | ||
113 | #define CTA_EXPECT_MAX (__CTA_EXPECT_MAX - 1) | ||
114 | |||
115 | enum ctattr_help { | ||
116 | CTA_HELP_UNSPEC, | ||
117 | CTA_HELP_NAME, | ||
118 | __CTA_HELP_MAX | ||
119 | }; | ||
120 | #define CTA_HELP_MAX (__CTA_HELP_MAX - 1) | ||
121 | |||
122 | #define CTA_HELP_MAXNAMESIZE 32 | ||
123 | |||
124 | #endif /* _IPCONNTRACK_NETLINK_H */ | ||
diff --git a/include/linux/netfilter/nfnetlink_log.h b/include/linux/netfilter/nfnetlink_log.h new file mode 100644 index 000000000000..b04b03880595 --- /dev/null +++ b/include/linux/netfilter/nfnetlink_log.h | |||
@@ -0,0 +1,88 @@ | |||
1 | #ifndef _NFNETLINK_LOG_H | ||
2 | #define _NFNETLINK_LOG_H | ||
3 | |||
4 | /* This file describes the netlink messages (i.e. 'protocol packets'), | ||
5 | * and not any kind of function definitions. It is shared between kernel and | ||
6 | * userspace. Don't put kernel specific stuff in here */ | ||
7 | |||
8 | #include <linux/types.h> | ||
9 | #include <linux/netfilter/nfnetlink.h> | ||
10 | |||
11 | enum nfulnl_msg_types { | ||
12 | NFULNL_MSG_PACKET, /* packet from kernel to userspace */ | ||
13 | NFULNL_MSG_CONFIG, /* connect to a particular queue */ | ||
14 | |||
15 | NFULNL_MSG_MAX | ||
16 | }; | ||
17 | |||
18 | struct nfulnl_msg_packet_hdr { | ||
19 | u_int16_t hw_protocol; /* hw protocol (network order) */ | ||
20 | u_int8_t hook; /* netfilter hook */ | ||
21 | u_int8_t _pad; | ||
22 | } __attribute__ ((packed)); | ||
23 | |||
24 | struct nfulnl_msg_packet_hw { | ||
25 | u_int16_t hw_addrlen; | ||
26 | u_int16_t _pad; | ||
27 | u_int8_t hw_addr[8]; | ||
28 | } __attribute__ ((packed)); | ||
29 | |||
30 | struct nfulnl_msg_packet_timestamp { | ||
31 | aligned_u64 sec; | ||
32 | aligned_u64 usec; | ||
33 | } __attribute__ ((packed)); | ||
34 | |||
35 | #define NFULNL_PREFIXLEN 30 /* just like old log target */ | ||
36 | |||
37 | enum nfulnl_attr_type { | ||
38 | NFULA_UNSPEC, | ||
39 | NFULA_PACKET_HDR, | ||
40 | NFULA_MARK, /* u_int32_t nfmark */ | ||
41 | NFULA_TIMESTAMP, /* nfulnl_msg_packet_timestamp */ | ||
42 | NFULA_IFINDEX_INDEV, /* u_int32_t ifindex */ | ||
43 | NFULA_IFINDEX_OUTDEV, /* u_int32_t ifindex */ | ||
44 | NFULA_IFINDEX_PHYSINDEV, /* u_int32_t ifindex */ | ||
45 | NFULA_IFINDEX_PHYSOUTDEV, /* u_int32_t ifindex */ | ||
46 | NFULA_HWADDR, /* nfulnl_msg_packet_hw */ | ||
47 | NFULA_PAYLOAD, /* opaque data payload */ | ||
48 | NFULA_PREFIX, /* string prefix */ | ||
49 | NFULA_UID, /* user id of socket */ | ||
50 | |||
51 | __NFULA_MAX | ||
52 | }; | ||
53 | #define NFULA_MAX (__NFULA_MAX - 1) | ||
54 | |||
55 | enum nfulnl_msg_config_cmds { | ||
56 | NFULNL_CFG_CMD_NONE, | ||
57 | NFULNL_CFG_CMD_BIND, | ||
58 | NFULNL_CFG_CMD_UNBIND, | ||
59 | NFULNL_CFG_CMD_PF_BIND, | ||
60 | NFULNL_CFG_CMD_PF_UNBIND, | ||
61 | }; | ||
62 | |||
63 | struct nfulnl_msg_config_cmd { | ||
64 | u_int8_t command; /* nfulnl_msg_config_cmds */ | ||
65 | } __attribute__ ((packed)); | ||
66 | |||
67 | struct nfulnl_msg_config_mode { | ||
68 | u_int32_t copy_range; | ||
69 | u_int8_t copy_mode; | ||
70 | u_int8_t _pad; | ||
71 | } __attribute__ ((packed)); | ||
72 | |||
73 | enum nfulnl_attr_config { | ||
74 | NFULA_CFG_UNSPEC, | ||
75 | NFULA_CFG_CMD, /* nfulnl_msg_config_cmd */ | ||
76 | NFULA_CFG_MODE, /* nfulnl_msg_config_mode */ | ||
77 | NFULA_CFG_NLBUFSIZ, /* u_int32_t buffer size */ | ||
78 | NFULA_CFG_TIMEOUT, /* u_int32_t in 1/100 s */ | ||
79 | NFULA_CFG_QTHRESH, /* u_int32_t */ | ||
80 | __NFULA_CFG_MAX | ||
81 | }; | ||
82 | #define NFULA_CFG_MAX (__NFULA_CFG_MAX -1) | ||
83 | |||
84 | #define NFULNL_COPY_NONE 0x00 | ||
85 | #define NFULNL_COPY_META 0x01 | ||
86 | #define NFULNL_COPY_PACKET 0x02 | ||
87 | |||
88 | #endif /* _NFNETLINK_LOG_H */ | ||
diff --git a/include/linux/netfilter/nfnetlink_queue.h b/include/linux/netfilter/nfnetlink_queue.h new file mode 100644 index 000000000000..9e774373244c --- /dev/null +++ b/include/linux/netfilter/nfnetlink_queue.h | |||
@@ -0,0 +1,89 @@ | |||
1 | #ifndef _NFNETLINK_QUEUE_H | ||
2 | #define _NFNETLINK_QUEUE_H | ||
3 | |||
4 | #include <linux/types.h> | ||
5 | #include <linux/netfilter/nfnetlink.h> | ||
6 | |||
7 | enum nfqnl_msg_types { | ||
8 | NFQNL_MSG_PACKET, /* packet from kernel to userspace */ | ||
9 | NFQNL_MSG_VERDICT, /* verdict from userspace to kernel */ | ||
10 | NFQNL_MSG_CONFIG, /* connect to a particular queue */ | ||
11 | |||
12 | NFQNL_MSG_MAX | ||
13 | }; | ||
14 | |||
15 | struct nfqnl_msg_packet_hdr { | ||
16 | u_int32_t packet_id; /* unique ID of packet in queue */ | ||
17 | u_int16_t hw_protocol; /* hw protocol (network order) */ | ||
18 | u_int8_t hook; /* netfilter hook */ | ||
19 | } __attribute__ ((packed)); | ||
20 | |||
21 | struct nfqnl_msg_packet_hw { | ||
22 | u_int16_t hw_addrlen; | ||
23 | u_int16_t _pad; | ||
24 | u_int8_t hw_addr[8]; | ||
25 | } __attribute__ ((packed)); | ||
26 | |||
27 | struct nfqnl_msg_packet_timestamp { | ||
28 | aligned_u64 sec; | ||
29 | aligned_u64 usec; | ||
30 | } __attribute__ ((packed)); | ||
31 | |||
32 | enum nfqnl_attr_type { | ||
33 | NFQA_UNSPEC, | ||
34 | NFQA_PACKET_HDR, | ||
35 | NFQA_VERDICT_HDR, /* nfqnl_msg_verdict_hrd */ | ||
36 | NFQA_MARK, /* u_int32_t nfmark */ | ||
37 | NFQA_TIMESTAMP, /* nfqnl_msg_packet_timestamp */ | ||
38 | NFQA_IFINDEX_INDEV, /* u_int32_t ifindex */ | ||
39 | NFQA_IFINDEX_OUTDEV, /* u_int32_t ifindex */ | ||
40 | NFQA_IFINDEX_PHYSINDEV, /* u_int32_t ifindex */ | ||
41 | NFQA_IFINDEX_PHYSOUTDEV, /* u_int32_t ifindex */ | ||
42 | NFQA_HWADDR, /* nfqnl_msg_packet_hw */ | ||
43 | NFQA_PAYLOAD, /* opaque data payload */ | ||
44 | |||
45 | __NFQA_MAX | ||
46 | }; | ||
47 | #define NFQA_MAX (__NFQA_MAX - 1) | ||
48 | |||
49 | struct nfqnl_msg_verdict_hdr { | ||
50 | u_int32_t verdict; | ||
51 | u_int32_t id; | ||
52 | } __attribute__ ((packed)); | ||
53 | |||
54 | |||
55 | enum nfqnl_msg_config_cmds { | ||
56 | NFQNL_CFG_CMD_NONE, | ||
57 | NFQNL_CFG_CMD_BIND, | ||
58 | NFQNL_CFG_CMD_UNBIND, | ||
59 | NFQNL_CFG_CMD_PF_BIND, | ||
60 | NFQNL_CFG_CMD_PF_UNBIND, | ||
61 | }; | ||
62 | |||
63 | struct nfqnl_msg_config_cmd { | ||
64 | u_int8_t command; /* nfqnl_msg_config_cmds */ | ||
65 | u_int8_t _pad; | ||
66 | u_int16_t pf; /* AF_xxx for PF_[UN]BIND */ | ||
67 | } __attribute__ ((packed)); | ||
68 | |||
69 | enum nfqnl_config_mode { | ||
70 | NFQNL_COPY_NONE, | ||
71 | NFQNL_COPY_META, | ||
72 | NFQNL_COPY_PACKET, | ||
73 | }; | ||
74 | |||
75 | struct nfqnl_msg_config_params { | ||
76 | u_int32_t copy_range; | ||
77 | u_int8_t copy_mode; /* enum nfqnl_config_mode */ | ||
78 | } __attribute__ ((packed)); | ||
79 | |||
80 | |||
81 | enum nfqnl_attr_config { | ||
82 | NFQA_CFG_UNSPEC, | ||
83 | NFQA_CFG_CMD, /* nfqnl_msg_config_cmd */ | ||
84 | NFQA_CFG_PARAMS, /* nfqnl_msg_config_params */ | ||
85 | __NFQA_CFG_MAX | ||
86 | }; | ||
87 | #define NFQA_CFG_MAX (__NFQA_CFG_MAX-1) | ||
88 | |||
89 | #endif /* _NFNETLINK_QUEUE_H */ | ||
diff --git a/include/linux/netfilter_decnet.h b/include/linux/netfilter_decnet.h index 3064eec9cb8e..6f425369ee29 100644 --- a/include/linux/netfilter_decnet.h +++ b/include/linux/netfilter_decnet.h | |||
@@ -9,6 +9,8 @@ | |||
9 | 9 | ||
10 | #include <linux/netfilter.h> | 10 | #include <linux/netfilter.h> |
11 | 11 | ||
12 | /* only for userspace compatibility */ | ||
13 | #ifndef __KERNEL__ | ||
12 | /* IP Cache bits. */ | 14 | /* IP Cache bits. */ |
13 | /* Src IP address. */ | 15 | /* Src IP address. */ |
14 | #define NFC_DN_SRC 0x0001 | 16 | #define NFC_DN_SRC 0x0001 |
@@ -18,6 +20,7 @@ | |||
18 | #define NFC_DN_IF_IN 0x0004 | 20 | #define NFC_DN_IF_IN 0x0004 |
19 | /* Output device. */ | 21 | /* Output device. */ |
20 | #define NFC_DN_IF_OUT 0x0008 | 22 | #define NFC_DN_IF_OUT 0x0008 |
23 | #endif /* ! __KERNEL__ */ | ||
21 | 24 | ||
22 | /* DECnet Hooks */ | 25 | /* DECnet Hooks */ |
23 | /* After promisc drops, checksum checks. */ | 26 | /* After promisc drops, checksum checks. */ |
@@ -53,7 +56,21 @@ struct nf_dn_rtmsg { | |||
53 | 56 | ||
54 | #define NFDN_RTMSG(r) ((unsigned char *)(r) + NLMSG_ALIGN(sizeof(struct nf_dn_rtmsg))) | 57 | #define NFDN_RTMSG(r) ((unsigned char *)(r) + NLMSG_ALIGN(sizeof(struct nf_dn_rtmsg))) |
55 | 58 | ||
59 | #ifndef __KERNEL__ | ||
60 | /* backwards compatibility for userspace */ | ||
56 | #define DNRMG_L1_GROUP 0x01 | 61 | #define DNRMG_L1_GROUP 0x01 |
57 | #define DNRMG_L2_GROUP 0x02 | 62 | #define DNRMG_L2_GROUP 0x02 |
63 | #endif | ||
64 | |||
65 | enum { | ||
66 | DNRNG_NLGRP_NONE, | ||
67 | #define DNRNG_NLGRP_NONE DNRNG_NLGRP_NONE | ||
68 | DNRNG_NLGRP_L1, | ||
69 | #define DNRNG_NLGRP_L1 DNRNG_NLGRP_L1 | ||
70 | DNRNG_NLGRP_L2, | ||
71 | #define DNRNG_NLGRP_L2 DNRNG_NLGRP_L2 | ||
72 | __DNRNG_NLGRP_MAX | ||
73 | }; | ||
74 | #define DNRNG_NLGRP_MAX (__DNRNG_NLGRP_MAX - 1) | ||
58 | 75 | ||
59 | #endif /*__LINUX_DECNET_NETFILTER_H*/ | 76 | #endif /*__LINUX_DECNET_NETFILTER_H*/ |
diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h index 3ebc36afae1a..fdc4a9527343 100644 --- a/include/linux/netfilter_ipv4.h +++ b/include/linux/netfilter_ipv4.h | |||
@@ -8,6 +8,8 @@ | |||
8 | #include <linux/config.h> | 8 | #include <linux/config.h> |
9 | #include <linux/netfilter.h> | 9 | #include <linux/netfilter.h> |
10 | 10 | ||
11 | /* only for userspace compatibility */ | ||
12 | #ifndef __KERNEL__ | ||
11 | /* IP Cache bits. */ | 13 | /* IP Cache bits. */ |
12 | /* Src IP address. */ | 14 | /* Src IP address. */ |
13 | #define NFC_IP_SRC 0x0001 | 15 | #define NFC_IP_SRC 0x0001 |
@@ -35,6 +37,7 @@ | |||
35 | #define NFC_IP_DST_PT 0x0400 | 37 | #define NFC_IP_DST_PT 0x0400 |
36 | /* Something else about the proto */ | 38 | /* Something else about the proto */ |
37 | #define NFC_IP_PROTO_UNKNOWN 0x2000 | 39 | #define NFC_IP_PROTO_UNKNOWN 0x2000 |
40 | #endif /* ! __KERNEL__ */ | ||
38 | 41 | ||
39 | /* IP Hooks */ | 42 | /* IP Hooks */ |
40 | /* After promisc drops, checksum checks. */ | 43 | /* After promisc drops, checksum checks. */ |
@@ -77,11 +80,6 @@ enum nf_ip_hook_priorities { | |||
77 | #ifdef __KERNEL__ | 80 | #ifdef __KERNEL__ |
78 | extern int ip_route_me_harder(struct sk_buff **pskb); | 81 | extern int ip_route_me_harder(struct sk_buff **pskb); |
79 | 82 | ||
80 | /* Call this before modifying an existing IP packet: ensures it is | ||
81 | modifiable and linear to the point you care about (writable_len). | ||
82 | Returns true or false. */ | ||
83 | extern int skb_ip_make_writable(struct sk_buff **pskb, | ||
84 | unsigned int writable_len); | ||
85 | #endif /*__KERNEL__*/ | 83 | #endif /*__KERNEL__*/ |
86 | 84 | ||
87 | #endif /*__LINUX_IP_NETFILTER_H*/ | 85 | #endif /*__LINUX_IP_NETFILTER_H*/ |
diff --git a/include/linux/netfilter_ipv4/ip_conntrack.h b/include/linux/netfilter_ipv4/ip_conntrack.h index 08fe5f7d14a0..088742befe49 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack.h +++ b/include/linux/netfilter_ipv4/ip_conntrack.h | |||
@@ -65,6 +65,63 @@ enum ip_conntrack_status { | |||
65 | 65 | ||
66 | /* Both together */ | 66 | /* Both together */ |
67 | IPS_NAT_DONE_MASK = (IPS_DST_NAT_DONE | IPS_SRC_NAT_DONE), | 67 | IPS_NAT_DONE_MASK = (IPS_DST_NAT_DONE | IPS_SRC_NAT_DONE), |
68 | |||
69 | /* Connection is dying (removed from lists), can not be unset. */ | ||
70 | IPS_DYING_BIT = 9, | ||
71 | IPS_DYING = (1 << IPS_DYING_BIT), | ||
72 | }; | ||
73 | |||
74 | /* Connection tracking event bits */ | ||
75 | enum ip_conntrack_events | ||
76 | { | ||
77 | /* New conntrack */ | ||
78 | IPCT_NEW_BIT = 0, | ||
79 | IPCT_NEW = (1 << IPCT_NEW_BIT), | ||
80 | |||
81 | /* Expected connection */ | ||
82 | IPCT_RELATED_BIT = 1, | ||
83 | IPCT_RELATED = (1 << IPCT_RELATED_BIT), | ||
84 | |||
85 | /* Destroyed conntrack */ | ||
86 | IPCT_DESTROY_BIT = 2, | ||
87 | IPCT_DESTROY = (1 << IPCT_DESTROY_BIT), | ||
88 | |||
89 | /* Timer has been refreshed */ | ||
90 | IPCT_REFRESH_BIT = 3, | ||
91 | IPCT_REFRESH = (1 << IPCT_REFRESH_BIT), | ||
92 | |||
93 | /* Status has changed */ | ||
94 | IPCT_STATUS_BIT = 4, | ||
95 | IPCT_STATUS = (1 << IPCT_STATUS_BIT), | ||
96 | |||
97 | /* Update of protocol info */ | ||
98 | IPCT_PROTOINFO_BIT = 5, | ||
99 | IPCT_PROTOINFO = (1 << IPCT_PROTOINFO_BIT), | ||
100 | |||
101 | /* Volatile protocol info */ | ||
102 | IPCT_PROTOINFO_VOLATILE_BIT = 6, | ||
103 | IPCT_PROTOINFO_VOLATILE = (1 << IPCT_PROTOINFO_VOLATILE_BIT), | ||
104 | |||
105 | /* New helper for conntrack */ | ||
106 | IPCT_HELPER_BIT = 7, | ||
107 | IPCT_HELPER = (1 << IPCT_HELPER_BIT), | ||
108 | |||
109 | /* Update of helper info */ | ||
110 | IPCT_HELPINFO_BIT = 8, | ||
111 | IPCT_HELPINFO = (1 << IPCT_HELPINFO_BIT), | ||
112 | |||
113 | /* Volatile helper info */ | ||
114 | IPCT_HELPINFO_VOLATILE_BIT = 9, | ||
115 | IPCT_HELPINFO_VOLATILE = (1 << IPCT_HELPINFO_VOLATILE_BIT), | ||
116 | |||
117 | /* NAT info */ | ||
118 | IPCT_NATINFO_BIT = 10, | ||
119 | IPCT_NATINFO = (1 << IPCT_NATINFO_BIT), | ||
120 | }; | ||
121 | |||
122 | enum ip_conntrack_expect_events { | ||
123 | IPEXP_NEW_BIT = 0, | ||
124 | IPEXP_NEW = (1 << IPEXP_NEW_BIT), | ||
68 | }; | 125 | }; |
69 | 126 | ||
70 | #ifdef __KERNEL__ | 127 | #ifdef __KERNEL__ |
@@ -152,6 +209,9 @@ struct ip_conntrack | |||
152 | /* Current number of expected connections */ | 209 | /* Current number of expected connections */ |
153 | unsigned int expecting; | 210 | unsigned int expecting; |
154 | 211 | ||
212 | /* Unique ID that identifies this conntrack*/ | ||
213 | unsigned int id; | ||
214 | |||
155 | /* Helper, if any. */ | 215 | /* Helper, if any. */ |
156 | struct ip_conntrack_helper *helper; | 216 | struct ip_conntrack_helper *helper; |
157 | 217 | ||
@@ -171,7 +231,7 @@ struct ip_conntrack | |||
171 | #endif /* CONFIG_IP_NF_NAT_NEEDED */ | 231 | #endif /* CONFIG_IP_NF_NAT_NEEDED */ |
172 | 232 | ||
173 | #if defined(CONFIG_IP_NF_CONNTRACK_MARK) | 233 | #if defined(CONFIG_IP_NF_CONNTRACK_MARK) |
174 | unsigned long mark; | 234 | u_int32_t mark; |
175 | #endif | 235 | #endif |
176 | 236 | ||
177 | /* Traversed often, so hopefully in different cacheline to top */ | 237 | /* Traversed often, so hopefully in different cacheline to top */ |
@@ -200,6 +260,9 @@ struct ip_conntrack_expect | |||
200 | /* Usage count. */ | 260 | /* Usage count. */ |
201 | atomic_t use; | 261 | atomic_t use; |
202 | 262 | ||
263 | /* Unique ID */ | ||
264 | unsigned int id; | ||
265 | |||
203 | #ifdef CONFIG_IP_NF_NAT_NEEDED | 266 | #ifdef CONFIG_IP_NF_NAT_NEEDED |
204 | /* This is the original per-proto part, used to map the | 267 | /* This is the original per-proto part, used to map the |
205 | * expected connection the way the recipient expects. */ | 268 | * expected connection the way the recipient expects. */ |
@@ -239,7 +302,12 @@ ip_conntrack_get(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo) | |||
239 | } | 302 | } |
240 | 303 | ||
241 | /* decrement reference count on a conntrack */ | 304 | /* decrement reference count on a conntrack */ |
242 | extern void ip_conntrack_put(struct ip_conntrack *ct); | 305 | static inline void |
306 | ip_conntrack_put(struct ip_conntrack *ct) | ||
307 | { | ||
308 | IP_NF_ASSERT(ct); | ||
309 | nf_conntrack_put(&ct->ct_general); | ||
310 | } | ||
243 | 311 | ||
244 | /* call to create an explicit dependency on ip_conntrack. */ | 312 | /* call to create an explicit dependency on ip_conntrack. */ |
245 | extern void need_ip_conntrack(void); | 313 | extern void need_ip_conntrack(void); |
@@ -274,12 +342,50 @@ extern void | |||
274 | ip_ct_iterate_cleanup(int (*iter)(struct ip_conntrack *i, void *data), | 342 | ip_ct_iterate_cleanup(int (*iter)(struct ip_conntrack *i, void *data), |
275 | void *data); | 343 | void *data); |
276 | 344 | ||
345 | extern struct ip_conntrack_helper * | ||
346 | __ip_conntrack_helper_find_byname(const char *); | ||
347 | extern struct ip_conntrack_helper * | ||
348 | ip_conntrack_helper_find_get(const struct ip_conntrack_tuple *tuple); | ||
349 | extern void ip_conntrack_helper_put(struct ip_conntrack_helper *helper); | ||
350 | |||
351 | extern struct ip_conntrack_protocol * | ||
352 | __ip_conntrack_proto_find(u_int8_t protocol); | ||
353 | extern struct ip_conntrack_protocol * | ||
354 | ip_conntrack_proto_find_get(u_int8_t protocol); | ||
355 | extern void ip_conntrack_proto_put(struct ip_conntrack_protocol *proto); | ||
356 | |||
357 | extern void ip_ct_remove_expectations(struct ip_conntrack *ct); | ||
358 | |||
359 | extern struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *, | ||
360 | struct ip_conntrack_tuple *); | ||
361 | |||
362 | extern void ip_conntrack_free(struct ip_conntrack *ct); | ||
363 | |||
364 | extern void ip_conntrack_hash_insert(struct ip_conntrack *ct); | ||
365 | |||
366 | extern struct ip_conntrack_expect * | ||
367 | __ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple); | ||
368 | |||
369 | extern struct ip_conntrack_expect * | ||
370 | ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple); | ||
371 | |||
372 | extern struct ip_conntrack_tuple_hash * | ||
373 | __ip_conntrack_find(const struct ip_conntrack_tuple *tuple, | ||
374 | const struct ip_conntrack *ignored_conntrack); | ||
375 | |||
376 | extern void ip_conntrack_flush(void); | ||
377 | |||
277 | /* It's confirmed if it is, or has been in the hash table. */ | 378 | /* It's confirmed if it is, or has been in the hash table. */ |
278 | static inline int is_confirmed(struct ip_conntrack *ct) | 379 | static inline int is_confirmed(struct ip_conntrack *ct) |
279 | { | 380 | { |
280 | return test_bit(IPS_CONFIRMED_BIT, &ct->status); | 381 | return test_bit(IPS_CONFIRMED_BIT, &ct->status); |
281 | } | 382 | } |
282 | 383 | ||
384 | static inline int is_dying(struct ip_conntrack *ct) | ||
385 | { | ||
386 | return test_bit(IPS_DYING_BIT, &ct->status); | ||
387 | } | ||
388 | |||
283 | extern unsigned int ip_conntrack_htable_size; | 389 | extern unsigned int ip_conntrack_htable_size; |
284 | 390 | ||
285 | struct ip_conntrack_stat | 391 | struct ip_conntrack_stat |
@@ -303,6 +409,85 @@ struct ip_conntrack_stat | |||
303 | 409 | ||
304 | #define CONNTRACK_STAT_INC(count) (__get_cpu_var(ip_conntrack_stat).count++) | 410 | #define CONNTRACK_STAT_INC(count) (__get_cpu_var(ip_conntrack_stat).count++) |
305 | 411 | ||
412 | #ifdef CONFIG_IP_NF_CONNTRACK_EVENTS | ||
413 | #include <linux/notifier.h> | ||
414 | #include <linux/interrupt.h> | ||
415 | |||
416 | struct ip_conntrack_ecache { | ||
417 | struct ip_conntrack *ct; | ||
418 | unsigned int events; | ||
419 | }; | ||
420 | DECLARE_PER_CPU(struct ip_conntrack_ecache, ip_conntrack_ecache); | ||
421 | |||
422 | #define CONNTRACK_ECACHE(x) (__get_cpu_var(ip_conntrack_ecache).x) | ||
423 | |||
424 | extern struct notifier_block *ip_conntrack_chain; | ||
425 | extern struct notifier_block *ip_conntrack_expect_chain; | ||
426 | |||
427 | static inline int ip_conntrack_register_notifier(struct notifier_block *nb) | ||
428 | { | ||
429 | return notifier_chain_register(&ip_conntrack_chain, nb); | ||
430 | } | ||
431 | |||
432 | static inline int ip_conntrack_unregister_notifier(struct notifier_block *nb) | ||
433 | { | ||
434 | return notifier_chain_unregister(&ip_conntrack_chain, nb); | ||
435 | } | ||
436 | |||
437 | static inline int | ||
438 | ip_conntrack_expect_register_notifier(struct notifier_block *nb) | ||
439 | { | ||
440 | return notifier_chain_register(&ip_conntrack_expect_chain, nb); | ||
441 | } | ||
442 | |||
443 | static inline int | ||
444 | ip_conntrack_expect_unregister_notifier(struct notifier_block *nb) | ||
445 | { | ||
446 | return notifier_chain_unregister(&ip_conntrack_expect_chain, nb); | ||
447 | } | ||
448 | |||
449 | extern void ip_ct_deliver_cached_events(const struct ip_conntrack *ct); | ||
450 | extern void __ip_ct_event_cache_init(struct ip_conntrack *ct); | ||
451 | |||
452 | static inline void | ||
453 | ip_conntrack_event_cache(enum ip_conntrack_events event, | ||
454 | const struct sk_buff *skb) | ||
455 | { | ||
456 | struct ip_conntrack *ct = (struct ip_conntrack *)skb->nfct; | ||
457 | struct ip_conntrack_ecache *ecache; | ||
458 | |||
459 | local_bh_disable(); | ||
460 | ecache = &__get_cpu_var(ip_conntrack_ecache); | ||
461 | if (ct != ecache->ct) | ||
462 | __ip_ct_event_cache_init(ct); | ||
463 | ecache->events |= event; | ||
464 | local_bh_enable(); | ||
465 | } | ||
466 | |||
467 | static inline void ip_conntrack_event(enum ip_conntrack_events event, | ||
468 | struct ip_conntrack *ct) | ||
469 | { | ||
470 | if (is_confirmed(ct) && !is_dying(ct)) | ||
471 | notifier_call_chain(&ip_conntrack_chain, event, ct); | ||
472 | } | ||
473 | |||
474 | static inline void | ||
475 | ip_conntrack_expect_event(enum ip_conntrack_expect_events event, | ||
476 | struct ip_conntrack_expect *exp) | ||
477 | { | ||
478 | notifier_call_chain(&ip_conntrack_expect_chain, event, exp); | ||
479 | } | ||
480 | #else /* CONFIG_IP_NF_CONNTRACK_EVENTS */ | ||
481 | static inline void ip_conntrack_event_cache(enum ip_conntrack_events event, | ||
482 | const struct sk_buff *skb) {} | ||
483 | static inline void ip_conntrack_event(enum ip_conntrack_events event, | ||
484 | struct ip_conntrack *ct) {} | ||
485 | static inline void ip_ct_deliver_cached_events(const struct ip_conntrack *ct) {} | ||
486 | static inline void | ||
487 | ip_conntrack_expect_event(enum ip_conntrack_expect_events event, | ||
488 | struct ip_conntrack_expect *exp) {} | ||
489 | #endif /* CONFIG_IP_NF_CONNTRACK_EVENTS */ | ||
490 | |||
306 | #ifdef CONFIG_IP_NF_NAT_NEEDED | 491 | #ifdef CONFIG_IP_NF_NAT_NEEDED |
307 | static inline int ip_nat_initialized(struct ip_conntrack *conntrack, | 492 | static inline int ip_nat_initialized(struct ip_conntrack *conntrack, |
308 | enum ip_nat_manip_type manip) | 493 | enum ip_nat_manip_type manip) |
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_core.h b/include/linux/netfilter_ipv4/ip_conntrack_core.h index 694aec9b4784..dc4d2a0575de 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_core.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_core.h | |||
@@ -2,6 +2,9 @@ | |||
2 | #define _IP_CONNTRACK_CORE_H | 2 | #define _IP_CONNTRACK_CORE_H |
3 | #include <linux/netfilter.h> | 3 | #include <linux/netfilter.h> |
4 | 4 | ||
5 | #define MAX_IP_CT_PROTO 256 | ||
6 | extern struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO]; | ||
7 | |||
5 | /* This header is used to share core functionality between the | 8 | /* This header is used to share core functionality between the |
6 | standalone connection tracking module, and the compatibility layer's use | 9 | standalone connection tracking module, and the compatibility layer's use |
7 | of connection tracking. */ | 10 | of connection tracking. */ |
@@ -38,12 +41,19 @@ extern int __ip_conntrack_confirm(struct sk_buff **pskb); | |||
38 | /* Confirm a connection: returns NF_DROP if packet must be dropped. */ | 41 | /* Confirm a connection: returns NF_DROP if packet must be dropped. */ |
39 | static inline int ip_conntrack_confirm(struct sk_buff **pskb) | 42 | static inline int ip_conntrack_confirm(struct sk_buff **pskb) |
40 | { | 43 | { |
41 | if ((*pskb)->nfct | 44 | struct ip_conntrack *ct = (struct ip_conntrack *)(*pskb)->nfct; |
42 | && !is_confirmed((struct ip_conntrack *)(*pskb)->nfct)) | 45 | int ret = NF_ACCEPT; |
43 | return __ip_conntrack_confirm(pskb); | 46 | |
44 | return NF_ACCEPT; | 47 | if (ct) { |
48 | if (!is_confirmed(ct)) | ||
49 | ret = __ip_conntrack_confirm(pskb); | ||
50 | ip_ct_deliver_cached_events(ct); | ||
51 | } | ||
52 | return ret; | ||
45 | } | 53 | } |
46 | 54 | ||
55 | extern void __ip_ct_expect_unlink_destroy(struct ip_conntrack_expect *exp); | ||
56 | |||
47 | extern struct list_head *ip_conntrack_hash; | 57 | extern struct list_head *ip_conntrack_hash; |
48 | extern struct list_head ip_conntrack_expect_list; | 58 | extern struct list_head ip_conntrack_expect_list; |
49 | extern rwlock_t ip_conntrack_lock; | 59 | extern rwlock_t ip_conntrack_lock; |
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_helper.h b/include/linux/netfilter_ipv4/ip_conntrack_helper.h index 3692daa93dec..8d69279ccfe4 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_helper.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_helper.h | |||
@@ -24,6 +24,8 @@ struct ip_conntrack_helper | |||
24 | int (*help)(struct sk_buff **pskb, | 24 | int (*help)(struct sk_buff **pskb, |
25 | struct ip_conntrack *ct, | 25 | struct ip_conntrack *ct, |
26 | enum ip_conntrack_info conntrackinfo); | 26 | enum ip_conntrack_info conntrackinfo); |
27 | |||
28 | int (*to_nfattr)(struct sk_buff *skb, const struct ip_conntrack *ct); | ||
27 | }; | 29 | }; |
28 | 30 | ||
29 | extern int ip_conntrack_helper_register(struct ip_conntrack_helper *); | 31 | extern int ip_conntrack_helper_register(struct ip_conntrack_helper *); |
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_protocol.h b/include/linux/netfilter_ipv4/ip_conntrack_protocol.h index e20b57c5e1b7..b6b99be8632a 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_protocol.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_protocol.h | |||
@@ -2,6 +2,7 @@ | |||
2 | #ifndef _IP_CONNTRACK_PROTOCOL_H | 2 | #ifndef _IP_CONNTRACK_PROTOCOL_H |
3 | #define _IP_CONNTRACK_PROTOCOL_H | 3 | #define _IP_CONNTRACK_PROTOCOL_H |
4 | #include <linux/netfilter_ipv4/ip_conntrack.h> | 4 | #include <linux/netfilter_ipv4/ip_conntrack.h> |
5 | #include <linux/netfilter/nfnetlink_conntrack.h> | ||
5 | 6 | ||
6 | struct seq_file; | 7 | struct seq_file; |
7 | 8 | ||
@@ -47,22 +48,22 @@ struct ip_conntrack_protocol | |||
47 | int (*error)(struct sk_buff *skb, enum ip_conntrack_info *ctinfo, | 48 | int (*error)(struct sk_buff *skb, enum ip_conntrack_info *ctinfo, |
48 | unsigned int hooknum); | 49 | unsigned int hooknum); |
49 | 50 | ||
51 | /* convert protoinfo to nfnetink attributes */ | ||
52 | int (*to_nfattr)(struct sk_buff *skb, struct nfattr *nfa, | ||
53 | const struct ip_conntrack *ct); | ||
54 | |||
55 | int (*tuple_to_nfattr)(struct sk_buff *skb, | ||
56 | const struct ip_conntrack_tuple *t); | ||
57 | int (*nfattr_to_tuple)(struct nfattr *tb[], | ||
58 | struct ip_conntrack_tuple *t); | ||
59 | |||
50 | /* Module (if any) which this is connected to. */ | 60 | /* Module (if any) which this is connected to. */ |
51 | struct module *me; | 61 | struct module *me; |
52 | }; | 62 | }; |
53 | 63 | ||
54 | #define MAX_IP_CT_PROTO 256 | ||
55 | extern struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO]; | ||
56 | |||
57 | /* Protocol registration. */ | 64 | /* Protocol registration. */ |
58 | extern int ip_conntrack_protocol_register(struct ip_conntrack_protocol *proto); | 65 | extern int ip_conntrack_protocol_register(struct ip_conntrack_protocol *proto); |
59 | extern void ip_conntrack_protocol_unregister(struct ip_conntrack_protocol *proto); | 66 | extern void ip_conntrack_protocol_unregister(struct ip_conntrack_protocol *proto); |
60 | |||
61 | static inline struct ip_conntrack_protocol *ip_ct_find_proto(u_int8_t protocol) | ||
62 | { | ||
63 | return ip_ct_protos[protocol]; | ||
64 | } | ||
65 | |||
66 | /* Existing built-in protocols */ | 67 | /* Existing built-in protocols */ |
67 | extern struct ip_conntrack_protocol ip_conntrack_protocol_tcp; | 68 | extern struct ip_conntrack_protocol ip_conntrack_protocol_tcp; |
68 | extern struct ip_conntrack_protocol ip_conntrack_protocol_udp; | 69 | extern struct ip_conntrack_protocol ip_conntrack_protocol_udp; |
@@ -73,6 +74,11 @@ extern int ip_conntrack_protocol_tcp_init(void); | |||
73 | /* Log invalid packets */ | 74 | /* Log invalid packets */ |
74 | extern unsigned int ip_ct_log_invalid; | 75 | extern unsigned int ip_ct_log_invalid; |
75 | 76 | ||
77 | extern int ip_ct_port_tuple_to_nfattr(struct sk_buff *, | ||
78 | const struct ip_conntrack_tuple *); | ||
79 | extern int ip_ct_port_nfattr_to_tuple(struct nfattr *tb[], | ||
80 | struct ip_conntrack_tuple *); | ||
81 | |||
76 | #ifdef CONFIG_SYSCTL | 82 | #ifdef CONFIG_SYSCTL |
77 | #ifdef DEBUG_INVALID_PACKETS | 83 | #ifdef DEBUG_INVALID_PACKETS |
78 | #define LOG_INVALID(proto) \ | 84 | #define LOG_INVALID(proto) \ |
diff --git a/include/linux/netfilter_ipv4/ip_logging.h b/include/linux/netfilter_ipv4/ip_logging.h deleted file mode 100644 index 0c5c52cb6589..000000000000 --- a/include/linux/netfilter_ipv4/ip_logging.h +++ /dev/null | |||
@@ -1,20 +0,0 @@ | |||
1 | /* IPv4 macros for the internal logging interface. */ | ||
2 | #ifndef __IP_LOGGING_H | ||
3 | #define __IP_LOGGING_H | ||
4 | |||
5 | #ifdef __KERNEL__ | ||
6 | #include <linux/socket.h> | ||
7 | #include <linux/netfilter_logging.h> | ||
8 | |||
9 | #define nf_log_ip_packet(pskb,hooknum,in,out,fmt,args...) \ | ||
10 | nf_log_packet(AF_INET,pskb,hooknum,in,out,fmt,##args) | ||
11 | |||
12 | #define nf_log_ip(pfh,len,fmt,args...) \ | ||
13 | nf_log(AF_INET,pfh,len,fmt,##args) | ||
14 | |||
15 | #define nf_ip_log_register(logging) nf_log_register(AF_INET,logging) | ||
16 | #define nf_ip_log_unregister(logging) nf_log_unregister(AF_INET,logging) | ||
17 | |||
18 | #endif /*__KERNEL__*/ | ||
19 | |||
20 | #endif /*__IP_LOGGING_H*/ | ||
diff --git a/include/linux/netfilter_ipv4/ip_nat_protocol.h b/include/linux/netfilter_ipv4/ip_nat_protocol.h index 129708c22386..ef63aa991a06 100644 --- a/include/linux/netfilter_ipv4/ip_nat_protocol.h +++ b/include/linux/netfilter_ipv4/ip_nat_protocol.h | |||
@@ -4,6 +4,9 @@ | |||
4 | #include <linux/init.h> | 4 | #include <linux/init.h> |
5 | #include <linux/list.h> | 5 | #include <linux/list.h> |
6 | 6 | ||
7 | #include <linux/netfilter_ipv4/ip_nat.h> | ||
8 | #include <linux/netfilter/nfnetlink_conntrack.h> | ||
9 | |||
7 | struct iphdr; | 10 | struct iphdr; |
8 | struct ip_nat_range; | 11 | struct ip_nat_range; |
9 | 12 | ||
@@ -15,6 +18,8 @@ struct ip_nat_protocol | |||
15 | /* Protocol number. */ | 18 | /* Protocol number. */ |
16 | unsigned int protonum; | 19 | unsigned int protonum; |
17 | 20 | ||
21 | struct module *me; | ||
22 | |||
18 | /* Translate a packet to the target according to manip type. | 23 | /* Translate a packet to the target according to manip type. |
19 | Return true if succeeded. */ | 24 | Return true if succeeded. */ |
20 | int (*manip_pkt)(struct sk_buff **pskb, | 25 | int (*manip_pkt)(struct sk_buff **pskb, |
@@ -43,19 +48,20 @@ struct ip_nat_protocol | |||
43 | 48 | ||
44 | unsigned int (*print_range)(char *buffer, | 49 | unsigned int (*print_range)(char *buffer, |
45 | const struct ip_nat_range *range); | 50 | const struct ip_nat_range *range); |
46 | }; | ||
47 | 51 | ||
48 | #define MAX_IP_NAT_PROTO 256 | 52 | int (*range_to_nfattr)(struct sk_buff *skb, |
49 | extern struct ip_nat_protocol *ip_nat_protos[MAX_IP_NAT_PROTO]; | 53 | const struct ip_nat_range *range); |
54 | |||
55 | int (*nfattr_to_range)(struct nfattr *tb[], | ||
56 | struct ip_nat_range *range); | ||
57 | }; | ||
50 | 58 | ||
51 | /* Protocol registration. */ | 59 | /* Protocol registration. */ |
52 | extern int ip_nat_protocol_register(struct ip_nat_protocol *proto); | 60 | extern int ip_nat_protocol_register(struct ip_nat_protocol *proto); |
53 | extern void ip_nat_protocol_unregister(struct ip_nat_protocol *proto); | 61 | extern void ip_nat_protocol_unregister(struct ip_nat_protocol *proto); |
54 | 62 | ||
55 | static inline struct ip_nat_protocol *ip_nat_find_proto(u_int8_t protocol) | 63 | extern struct ip_nat_protocol *ip_nat_proto_find_get(u_int8_t protocol); |
56 | { | 64 | extern void ip_nat_proto_put(struct ip_nat_protocol *proto); |
57 | return ip_nat_protos[protocol]; | ||
58 | } | ||
59 | 65 | ||
60 | /* Built-in protocols. */ | 66 | /* Built-in protocols. */ |
61 | extern struct ip_nat_protocol ip_nat_protocol_tcp; | 67 | extern struct ip_nat_protocol ip_nat_protocol_tcp; |
@@ -67,4 +73,9 @@ extern int init_protocols(void) __init; | |||
67 | extern void cleanup_protocols(void); | 73 | extern void cleanup_protocols(void); |
68 | extern struct ip_nat_protocol *find_nat_proto(u_int16_t protonum); | 74 | extern struct ip_nat_protocol *find_nat_proto(u_int16_t protonum); |
69 | 75 | ||
76 | extern int ip_nat_port_range_to_nfattr(struct sk_buff *skb, | ||
77 | const struct ip_nat_range *range); | ||
78 | extern int ip_nat_port_nfattr_to_range(struct nfattr *tb[], | ||
79 | struct ip_nat_range *range); | ||
80 | |||
70 | #endif /*_IP_NAT_PROTO_H*/ | 81 | #endif /*_IP_NAT_PROTO_H*/ |
diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h index 12ce47808e7d..d19d65cf4530 100644 --- a/include/linux/netfilter_ipv4/ip_tables.h +++ b/include/linux/netfilter_ipv4/ip_tables.h | |||
@@ -109,7 +109,8 @@ struct ipt_counters | |||
109 | 109 | ||
110 | /* Values for "flag" field in struct ipt_ip (general ip structure). */ | 110 | /* Values for "flag" field in struct ipt_ip (general ip structure). */ |
111 | #define IPT_F_FRAG 0x01 /* Set if rule is a fragment rule */ | 111 | #define IPT_F_FRAG 0x01 /* Set if rule is a fragment rule */ |
112 | #define IPT_F_MASK 0x01 /* All possible flag bits mask. */ | 112 | #define IPT_F_GOTO 0x02 /* Set if jump is a goto */ |
113 | #define IPT_F_MASK 0x03 /* All possible flag bits mask. */ | ||
113 | 114 | ||
114 | /* Values for "inv" field in struct ipt_ip. */ | 115 | /* Values for "inv" field in struct ipt_ip. */ |
115 | #define IPT_INV_VIA_IN 0x01 /* Invert the sense of IN IFACE. */ | 116 | #define IPT_INV_VIA_IN 0x01 /* Invert the sense of IN IFACE. */ |
diff --git a/include/linux/netfilter_ipv4/ipt_LOG.h b/include/linux/netfilter_ipv4/ipt_LOG.h index d25f782e57d1..22d16177319b 100644 --- a/include/linux/netfilter_ipv4/ipt_LOG.h +++ b/include/linux/netfilter_ipv4/ipt_LOG.h | |||
@@ -1,6 +1,7 @@ | |||
1 | #ifndef _IPT_LOG_H | 1 | #ifndef _IPT_LOG_H |
2 | #define _IPT_LOG_H | 2 | #define _IPT_LOG_H |
3 | 3 | ||
4 | /* make sure not to change this without changing netfilter.h:NF_LOG_* (!) */ | ||
4 | #define IPT_LOG_TCPSEQ 0x01 /* Log TCP sequence numbers */ | 5 | #define IPT_LOG_TCPSEQ 0x01 /* Log TCP sequence numbers */ |
5 | #define IPT_LOG_TCPOPT 0x02 /* Log TCP options */ | 6 | #define IPT_LOG_TCPOPT 0x02 /* Log TCP options */ |
6 | #define IPT_LOG_IPOPT 0x04 /* Log IP options */ | 7 | #define IPT_LOG_IPOPT 0x04 /* Log IP options */ |
diff --git a/include/linux/netfilter_ipv4/ipt_NFQUEUE.h b/include/linux/netfilter_ipv4/ipt_NFQUEUE.h new file mode 100644 index 000000000000..b5b2943b0c66 --- /dev/null +++ b/include/linux/netfilter_ipv4/ipt_NFQUEUE.h | |||
@@ -0,0 +1,16 @@ | |||
1 | /* iptables module for using NFQUEUE mechanism | ||
2 | * | ||
3 | * (C) 2005 Harald Welte <laforge@netfilter.org> | ||
4 | * | ||
5 | * This software is distributed under GNU GPL v2, 1991 | ||
6 | * | ||
7 | */ | ||
8 | #ifndef _IPT_NFQ_TARGET_H | ||
9 | #define _IPT_NFQ_TARGET_H | ||
10 | |||
11 | /* target info */ | ||
12 | struct ipt_NFQ_info { | ||
13 | u_int16_t queuenum; | ||
14 | }; | ||
15 | |||
16 | #endif /* _IPT_DSCP_TARGET_H */ | ||
diff --git a/include/linux/netfilter_ipv4/ipt_TTL.h b/include/linux/netfilter_ipv4/ipt_TTL.h new file mode 100644 index 000000000000..ee6611edc112 --- /dev/null +++ b/include/linux/netfilter_ipv4/ipt_TTL.h | |||
@@ -0,0 +1,21 @@ | |||
1 | /* TTL modification module for IP tables | ||
2 | * (C) 2000 by Harald Welte <laforge@netfilter.org> */ | ||
3 | |||
4 | #ifndef _IPT_TTL_H | ||
5 | #define _IPT_TTL_H | ||
6 | |||
7 | enum { | ||
8 | IPT_TTL_SET = 0, | ||
9 | IPT_TTL_INC, | ||
10 | IPT_TTL_DEC | ||
11 | }; | ||
12 | |||
13 | #define IPT_TTL_MAXMODE IPT_TTL_DEC | ||
14 | |||
15 | struct ipt_TTL_info { | ||
16 | u_int8_t mode; | ||
17 | u_int8_t ttl; | ||
18 | }; | ||
19 | |||
20 | |||
21 | #endif | ||
diff --git a/include/linux/netfilter_ipv4/ipt_connbytes.h b/include/linux/netfilter_ipv4/ipt_connbytes.h new file mode 100644 index 000000000000..9e5532f8d8ac --- /dev/null +++ b/include/linux/netfilter_ipv4/ipt_connbytes.h | |||
@@ -0,0 +1,25 @@ | |||
1 | #ifndef _IPT_CONNBYTES_H | ||
2 | #define _IPT_CONNBYTES_H | ||
3 | |||
4 | enum ipt_connbytes_what { | ||
5 | IPT_CONNBYTES_PKTS, | ||
6 | IPT_CONNBYTES_BYTES, | ||
7 | IPT_CONNBYTES_AVGPKT, | ||
8 | }; | ||
9 | |||
10 | enum ipt_connbytes_direction { | ||
11 | IPT_CONNBYTES_DIR_ORIGINAL, | ||
12 | IPT_CONNBYTES_DIR_REPLY, | ||
13 | IPT_CONNBYTES_DIR_BOTH, | ||
14 | }; | ||
15 | |||
16 | struct ipt_connbytes_info | ||
17 | { | ||
18 | struct { | ||
19 | aligned_u64 from; /* count to be matched */ | ||
20 | aligned_u64 to; /* count to be matched */ | ||
21 | } count; | ||
22 | u_int8_t what; /* ipt_connbytes_what */ | ||
23 | u_int8_t direction; /* ipt_connbytes_direction */ | ||
24 | }; | ||
25 | #endif | ||
diff --git a/include/linux/netfilter_ipv4/ipt_dccp.h b/include/linux/netfilter_ipv4/ipt_dccp.h new file mode 100644 index 000000000000..3cb3a522e62b --- /dev/null +++ b/include/linux/netfilter_ipv4/ipt_dccp.h | |||
@@ -0,0 +1,23 @@ | |||
1 | #ifndef _IPT_DCCP_H_ | ||
2 | #define _IPT_DCCP_H_ | ||
3 | |||
4 | #define IPT_DCCP_SRC_PORTS 0x01 | ||
5 | #define IPT_DCCP_DEST_PORTS 0x02 | ||
6 | #define IPT_DCCP_TYPE 0x04 | ||
7 | #define IPT_DCCP_OPTION 0x08 | ||
8 | |||
9 | #define IPT_DCCP_VALID_FLAGS 0x0f | ||
10 | |||
11 | struct ipt_dccp_info { | ||
12 | u_int16_t dpts[2]; /* Min, Max */ | ||
13 | u_int16_t spts[2]; /* Min, Max */ | ||
14 | |||
15 | u_int16_t flags; | ||
16 | u_int16_t invflags; | ||
17 | |||
18 | u_int16_t typemask; | ||
19 | u_int8_t option; | ||
20 | }; | ||
21 | |||
22 | #endif /* _IPT_DCCP_H_ */ | ||
23 | |||
diff --git a/include/linux/netfilter_ipv4/ipt_string.h b/include/linux/netfilter_ipv4/ipt_string.h new file mode 100644 index 000000000000..a265f6e44eab --- /dev/null +++ b/include/linux/netfilter_ipv4/ipt_string.h | |||
@@ -0,0 +1,18 @@ | |||
1 | #ifndef _IPT_STRING_H | ||
2 | #define _IPT_STRING_H | ||
3 | |||
4 | #define IPT_STRING_MAX_PATTERN_SIZE 128 | ||
5 | #define IPT_STRING_MAX_ALGO_NAME_SIZE 16 | ||
6 | |||
7 | struct ipt_string_info | ||
8 | { | ||
9 | u_int16_t from_offset; | ||
10 | u_int16_t to_offset; | ||
11 | char algo[IPT_STRING_MAX_ALGO_NAME_SIZE]; | ||
12 | char pattern[IPT_STRING_MAX_PATTERN_SIZE]; | ||
13 | u_int8_t patlen; | ||
14 | u_int8_t invert; | ||
15 | struct ts_config __attribute__((aligned(8))) *config; | ||
16 | }; | ||
17 | |||
18 | #endif /*_IPT_STRING_H*/ | ||
diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index bee7a5ec7c66..edcc2c6eb5c7 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h | |||
@@ -10,6 +10,8 @@ | |||
10 | 10 | ||
11 | #include <linux/netfilter.h> | 11 | #include <linux/netfilter.h> |
12 | 12 | ||
13 | /* only for userspace compatibility */ | ||
14 | #ifndef __KERNEL__ | ||
13 | /* IP Cache bits. */ | 15 | /* IP Cache bits. */ |
14 | /* Src IP address. */ | 16 | /* Src IP address. */ |
15 | #define NFC_IP6_SRC 0x0001 | 17 | #define NFC_IP6_SRC 0x0001 |
@@ -38,6 +40,7 @@ | |||
38 | #define NFC_IP6_DST_PT 0x0400 | 40 | #define NFC_IP6_DST_PT 0x0400 |
39 | /* Something else about the proto */ | 41 | /* Something else about the proto */ |
40 | #define NFC_IP6_PROTO_UNKNOWN 0x2000 | 42 | #define NFC_IP6_PROTO_UNKNOWN 0x2000 |
43 | #endif /* ! __KERNEL__ */ | ||
41 | 44 | ||
42 | 45 | ||
43 | /* IP6 Hooks */ | 46 | /* IP6 Hooks */ |
@@ -68,4 +71,7 @@ enum nf_ip6_hook_priorities { | |||
68 | NF_IP6_PRI_LAST = INT_MAX, | 71 | NF_IP6_PRI_LAST = INT_MAX, |
69 | }; | 72 | }; |
70 | 73 | ||
74 | extern int ipv6_netfilter_init(void); | ||
75 | extern void ipv6_netfilter_fini(void); | ||
76 | |||
71 | #endif /*__LINUX_IP6_NETFILTER_H*/ | 77 | #endif /*__LINUX_IP6_NETFILTER_H*/ |
diff --git a/include/linux/netfilter_ipv6/ip6_logging.h b/include/linux/netfilter_ipv6/ip6_logging.h deleted file mode 100644 index a0b2ee3043aa..000000000000 --- a/include/linux/netfilter_ipv6/ip6_logging.h +++ /dev/null | |||
@@ -1,20 +0,0 @@ | |||
1 | /* IPv6 macros for the nternal logging interface. */ | ||
2 | #ifndef __IP6_LOGGING_H | ||
3 | #define __IP6_LOGGING_H | ||
4 | |||
5 | #ifdef __KERNEL__ | ||
6 | #include <linux/socket.h> | ||
7 | #include <linux/netfilter_logging.h> | ||
8 | |||
9 | #define nf_log_ip6_packet(pskb,hooknum,in,out,fmt,args...) \ | ||
10 | nf_log_packet(AF_INET6,pskb,hooknum,in,out,fmt,##args) | ||
11 | |||
12 | #define nf_log_ip6(pfh,len,fmt,args...) \ | ||
13 | nf_log(AF_INET6,pfh,len,fmt,##args) | ||
14 | |||
15 | #define nf_ip6_log_register(logging) nf_log_register(AF_INET6,logging) | ||
16 | #define nf_ip6_log_unregister(logging) nf_log_unregister(AF_INET6,logging) | ||
17 | |||
18 | #endif /*__KERNEL__*/ | ||
19 | |||
20 | #endif /*__IP6_LOGGING_H*/ | ||
diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index f1ce3b009853..58c72a52dc65 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h | |||
@@ -111,7 +111,8 @@ struct ip6t_counters | |||
111 | #define IP6T_F_PROTO 0x01 /* Set if rule cares about upper | 111 | #define IP6T_F_PROTO 0x01 /* Set if rule cares about upper |
112 | protocols */ | 112 | protocols */ |
113 | #define IP6T_F_TOS 0x02 /* Match the TOS. */ | 113 | #define IP6T_F_TOS 0x02 /* Match the TOS. */ |
114 | #define IP6T_F_MASK 0x03 /* All possible flag bits mask. */ | 114 | #define IP6T_F_GOTO 0x04 /* Set if jump is a goto */ |
115 | #define IP6T_F_MASK 0x07 /* All possible flag bits mask. */ | ||
115 | 116 | ||
116 | /* Values for "inv" field in struct ip6t_ip6. */ | 117 | /* Values for "inv" field in struct ip6t_ip6. */ |
117 | #define IP6T_INV_VIA_IN 0x01 /* Invert the sense of IN IFACE. */ | 118 | #define IP6T_INV_VIA_IN 0x01 /* Invert the sense of IN IFACE. */ |
diff --git a/include/linux/netfilter_ipv6/ip6t_HL.h b/include/linux/netfilter_ipv6/ip6t_HL.h new file mode 100644 index 000000000000..afb7813d45ab --- /dev/null +++ b/include/linux/netfilter_ipv6/ip6t_HL.h | |||
@@ -0,0 +1,22 @@ | |||
1 | /* Hop Limit modification module for ip6tables | ||
2 | * Maciej Soltysiak <solt@dns.toxicfilms.tv> | ||
3 | * Based on HW's TTL module */ | ||
4 | |||
5 | #ifndef _IP6T_HL_H | ||
6 | #define _IP6T_HL_H | ||
7 | |||
8 | enum { | ||
9 | IP6T_HL_SET = 0, | ||
10 | IP6T_HL_INC, | ||
11 | IP6T_HL_DEC | ||
12 | }; | ||
13 | |||
14 | #define IP6T_HL_MAXMODE IP6T_HL_DEC | ||
15 | |||
16 | struct ip6t_HL_info { | ||
17 | u_int8_t mode; | ||
18 | u_int8_t hop_limit; | ||
19 | }; | ||
20 | |||
21 | |||
22 | #endif | ||
diff --git a/include/linux/netfilter_ipv6/ip6t_LOG.h b/include/linux/netfilter_ipv6/ip6t_LOG.h index 42996a43bb39..9008ff5c40ae 100644 --- a/include/linux/netfilter_ipv6/ip6t_LOG.h +++ b/include/linux/netfilter_ipv6/ip6t_LOG.h | |||
@@ -1,6 +1,7 @@ | |||
1 | #ifndef _IP6T_LOG_H | 1 | #ifndef _IP6T_LOG_H |
2 | #define _IP6T_LOG_H | 2 | #define _IP6T_LOG_H |
3 | 3 | ||
4 | /* make sure not to change this without changing netfilter.h:NF_LOG_* (!) */ | ||
4 | #define IP6T_LOG_TCPSEQ 0x01 /* Log TCP sequence numbers */ | 5 | #define IP6T_LOG_TCPSEQ 0x01 /* Log TCP sequence numbers */ |
5 | #define IP6T_LOG_TCPOPT 0x02 /* Log TCP options */ | 6 | #define IP6T_LOG_TCPOPT 0x02 /* Log TCP options */ |
6 | #define IP6T_LOG_IPOPT 0x04 /* Log IP options */ | 7 | #define IP6T_LOG_IPOPT 0x04 /* Log IP options */ |
diff --git a/include/linux/netfilter_ipv6/ip6t_REJECT.h b/include/linux/netfilter_ipv6/ip6t_REJECT.h new file mode 100644 index 000000000000..6be6504162bb --- /dev/null +++ b/include/linux/netfilter_ipv6/ip6t_REJECT.h | |||
@@ -0,0 +1,18 @@ | |||
1 | #ifndef _IP6T_REJECT_H | ||
2 | #define _IP6T_REJECT_H | ||
3 | |||
4 | enum ip6t_reject_with { | ||
5 | IP6T_ICMP6_NO_ROUTE, | ||
6 | IP6T_ICMP6_ADM_PROHIBITED, | ||
7 | IP6T_ICMP6_NOT_NEIGHBOUR, | ||
8 | IP6T_ICMP6_ADDR_UNREACH, | ||
9 | IP6T_ICMP6_PORT_UNREACH, | ||
10 | IP6T_ICMP6_ECHOREPLY, | ||
11 | IP6T_TCP_RESET | ||
12 | }; | ||
13 | |||
14 | struct ip6t_reject_info { | ||
15 | u_int32_t with; /* reject type */ | ||
16 | }; | ||
17 | |||
18 | #endif /*_IP6T_REJECT_H*/ | ||
diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 6552b71bfa73..167518668936 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h | |||
@@ -8,7 +8,7 @@ | |||
8 | #define NETLINK_W1 1 /* 1-wire subsystem */ | 8 | #define NETLINK_W1 1 /* 1-wire subsystem */ |
9 | #define NETLINK_USERSOCK 2 /* Reserved for user mode socket protocols */ | 9 | #define NETLINK_USERSOCK 2 /* Reserved for user mode socket protocols */ |
10 | #define NETLINK_FIREWALL 3 /* Firewalling hook */ | 10 | #define NETLINK_FIREWALL 3 /* Firewalling hook */ |
11 | #define NETLINK_TCPDIAG 4 /* TCP socket monitoring */ | 11 | #define NETLINK_INET_DIAG 4 /* INET socket monitoring */ |
12 | #define NETLINK_NFLOG 5 /* netfilter/iptables ULOG */ | 12 | #define NETLINK_NFLOG 5 /* netfilter/iptables ULOG */ |
13 | #define NETLINK_XFRM 6 /* ipsec */ | 13 | #define NETLINK_XFRM 6 /* ipsec */ |
14 | #define NETLINK_SELINUX 7 /* SELinux event notifications */ | 14 | #define NETLINK_SELINUX 7 /* SELinux event notifications */ |
@@ -90,6 +90,15 @@ struct nlmsgerr | |||
90 | struct nlmsghdr msg; | 90 | struct nlmsghdr msg; |
91 | }; | 91 | }; |
92 | 92 | ||
93 | #define NETLINK_ADD_MEMBERSHIP 1 | ||
94 | #define NETLINK_DROP_MEMBERSHIP 2 | ||
95 | #define NETLINK_PKTINFO 3 | ||
96 | |||
97 | struct nl_pktinfo | ||
98 | { | ||
99 | __u32 group; | ||
100 | }; | ||
101 | |||
93 | #define NET_MAJOR 36 /* Major 36 is reserved for networking */ | 102 | #define NET_MAJOR 36 /* Major 36 is reserved for networking */ |
94 | 103 | ||
95 | enum { | 104 | enum { |
@@ -106,9 +115,8 @@ struct netlink_skb_parms | |||
106 | { | 115 | { |
107 | struct ucred creds; /* Skb credentials */ | 116 | struct ucred creds; /* Skb credentials */ |
108 | __u32 pid; | 117 | __u32 pid; |
109 | __u32 groups; | ||
110 | __u32 dst_pid; | 118 | __u32 dst_pid; |
111 | __u32 dst_groups; | 119 | __u32 dst_group; |
112 | kernel_cap_t eff_cap; | 120 | kernel_cap_t eff_cap; |
113 | __u32 loginuid; /* Login (audit) uid */ | 121 | __u32 loginuid; /* Login (audit) uid */ |
114 | }; | 122 | }; |
@@ -117,11 +125,11 @@ struct netlink_skb_parms | |||
117 | #define NETLINK_CREDS(skb) (&NETLINK_CB((skb)).creds) | 125 | #define NETLINK_CREDS(skb) (&NETLINK_CB((skb)).creds) |
118 | 126 | ||
119 | 127 | ||
120 | extern struct sock *netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len)); | 128 | extern struct sock *netlink_kernel_create(int unit, unsigned int groups, void (*input)(struct sock *sk, int len), struct module *module); |
121 | extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err); | 129 | extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err); |
122 | extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 pid, int nonblock); | 130 | extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 pid, int nonblock); |
123 | extern int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 pid, | 131 | extern int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 pid, |
124 | __u32 group, int allocation); | 132 | __u32 group, unsigned int __nocast allocation); |
125 | extern void netlink_set_err(struct sock *ssk, __u32 pid, __u32 group, int code); | 133 | extern void netlink_set_err(struct sock *ssk, __u32 pid, __u32 group, int code); |
126 | extern int netlink_register_notifier(struct notifier_block *nb); | 134 | extern int netlink_register_notifier(struct notifier_block *nb); |
127 | extern int netlink_unregister_notifier(struct notifier_block *nb); | 135 | extern int netlink_unregister_notifier(struct notifier_block *nb); |
diff --git a/include/linux/random.h b/include/linux/random.h index cc6703449916..7b2adb3322d5 100644 --- a/include/linux/random.h +++ b/include/linux/random.h | |||
@@ -59,6 +59,8 @@ extern __u32 secure_tcp_sequence_number(__u32 saddr, __u32 daddr, | |||
59 | __u16 sport, __u16 dport); | 59 | __u16 sport, __u16 dport); |
60 | extern __u32 secure_tcpv6_sequence_number(__u32 *saddr, __u32 *daddr, | 60 | extern __u32 secure_tcpv6_sequence_number(__u32 *saddr, __u32 *daddr, |
61 | __u16 sport, __u16 dport); | 61 | __u16 sport, __u16 dport); |
62 | extern u64 secure_dccp_sequence_number(__u32 saddr, __u32 daddr, | ||
63 | __u16 sport, __u16 dport); | ||
62 | 64 | ||
63 | #ifndef MODULE | 65 | #ifndef MODULE |
64 | extern struct file_operations random_fops, urandom_fops; | 66 | extern struct file_operations random_fops, urandom_fops; |
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 657c05ab8f9e..c231e9a08f0b 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h | |||
@@ -826,9 +826,8 @@ enum | |||
826 | #define TCA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcmsg)))) | 826 | #define TCA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcmsg)))) |
827 | #define TCA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcmsg)) | 827 | #define TCA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcmsg)) |
828 | 828 | ||
829 | 829 | #ifndef __KERNEL__ | |
830 | /* RTnetlink multicast groups */ | 830 | /* RTnetlink multicast groups - backwards compatibility for userspace */ |
831 | |||
832 | #define RTMGRP_LINK 1 | 831 | #define RTMGRP_LINK 1 |
833 | #define RTMGRP_NOTIFY 2 | 832 | #define RTMGRP_NOTIFY 2 |
834 | #define RTMGRP_NEIGH 4 | 833 | #define RTMGRP_NEIGH 4 |
@@ -847,6 +846,43 @@ enum | |||
847 | #define RTMGRP_DECnet_ROUTE 0x4000 | 846 | #define RTMGRP_DECnet_ROUTE 0x4000 |
848 | 847 | ||
849 | #define RTMGRP_IPV6_PREFIX 0x20000 | 848 | #define RTMGRP_IPV6_PREFIX 0x20000 |
849 | #endif | ||
850 | |||
851 | /* RTnetlink multicast groups */ | ||
852 | enum rtnetlink_groups { | ||
853 | RTNLGRP_NONE, | ||
854 | #define RTNLGRP_NONE RTNLGRP_NONE | ||
855 | RTNLGRP_LINK, | ||
856 | #define RTNLGRP_LINK RTNLGRP_LINK | ||
857 | RTNLGRP_NOTIFY, | ||
858 | #define RTNLGRP_NOTIFY RTNLGRP_NOTIFY | ||
859 | RTNLGRP_NEIGH, | ||
860 | #define RTNLGRP_NEIGH RTNLGRP_NEIGH | ||
861 | RTNLGRP_TC, | ||
862 | #define RTNLGRP_TC RTNLGRP_TC | ||
863 | RTNLGRP_IPV4_IFADDR, | ||
864 | #define RTNLGRP_IPV4_IFADDR RTNLGRP_IPV4_IFADDR | ||
865 | RTNLGRP_IPV4_MROUTE, | ||
866 | #define RTNLGRP_IPV4_MROUTE RTNLGRP_IPV4_MROUTE | ||
867 | RTNLGRP_IPV4_ROUTE, | ||
868 | #define RTNLGRP_IPV4_ROUTE RTNLGRP_IPV4_ROUTE | ||
869 | RTNLGRP_IPV6_IFADDR, | ||
870 | #define RTNLGRP_IPV6_IFADDR RTNLGRP_IPV6_IFADDR | ||
871 | RTNLGRP_IPV6_MROUTE, | ||
872 | #define RTNLGRP_IPV6_MROUTE RTNLGRP_IPV6_MROUTE | ||
873 | RTNLGRP_IPV6_ROUTE, | ||
874 | #define RTNLGRP_IPV6_ROUTE RTNLGRP_IPV6_ROUTE | ||
875 | RTNLGRP_IPV6_IFINFO, | ||
876 | #define RTNLGRP_IPV6_IFINFO RTNLGRP_IPV6_IFINFO | ||
877 | RTNLGRP_DECnet_IFADDR, | ||
878 | #define RTNLGRP_DECnet_IFADDR RTNLGRP_DECnet_IFADDR | ||
879 | RTNLGRP_DECnet_ROUTE, | ||
880 | #define RTNLGRP_DECnet_ROUTE RTNLGRP_DECnet_ROUTE | ||
881 | RTNLGRP_IPV6_PREFIX, | ||
882 | #define RTNLGRP_IPV6_PREFIX RTNLGRP_IPV6_PREFIX | ||
883 | __RTNLGRP_MAX | ||
884 | }; | ||
885 | #define RTNLGRP_MAX (__RTNLGRP_MAX - 1) | ||
850 | 886 | ||
851 | /* TC action piece */ | 887 | /* TC action piece */ |
852 | struct tcamsg | 888 | struct tcamsg |
diff --git a/include/linux/security.h b/include/linux/security.h index b42095a68b1c..7aab6ab7c57f 100644 --- a/include/linux/security.h +++ b/include/linux/security.h | |||
@@ -2727,7 +2727,8 @@ static inline int security_socket_getpeersec(struct socket *sock, char __user *o | |||
2727 | return security_ops->socket_getpeersec(sock, optval, optlen, len); | 2727 | return security_ops->socket_getpeersec(sock, optval, optlen, len); |
2728 | } | 2728 | } |
2729 | 2729 | ||
2730 | static inline int security_sk_alloc(struct sock *sk, int family, int priority) | 2730 | static inline int security_sk_alloc(struct sock *sk, int family, |
2731 | unsigned int __nocast priority) | ||
2731 | { | 2732 | { |
2732 | return security_ops->sk_alloc_security(sk, family, priority); | 2733 | return security_ops->sk_alloc_security(sk, family, priority); |
2733 | } | 2734 | } |
@@ -2844,7 +2845,8 @@ static inline int security_socket_getpeersec(struct socket *sock, char __user *o | |||
2844 | return -ENOPROTOOPT; | 2845 | return -ENOPROTOOPT; |
2845 | } | 2846 | } |
2846 | 2847 | ||
2847 | static inline int security_sk_alloc(struct sock *sk, int family, int priority) | 2848 | static inline int security_sk_alloc(struct sock *sk, int family, |
2849 | unsigned int __nocast priority) | ||
2848 | { | 2850 | { |
2849 | return 0; | 2851 | return 0; |
2850 | } | 2852 | } |
diff --git a/include/linux/selinux_netlink.h b/include/linux/selinux_netlink.h index 957e6ebca4e6..bbf489decd84 100644 --- a/include/linux/selinux_netlink.h +++ b/include/linux/selinux_netlink.h | |||
@@ -20,10 +20,21 @@ enum { | |||
20 | SELNL_MSG_MAX | 20 | SELNL_MSG_MAX |
21 | }; | 21 | }; |
22 | 22 | ||
23 | /* Multicast groups */ | 23 | #ifndef __KERNEL__ |
24 | /* Multicast groups - backwards compatiblility for userspace */ | ||
24 | #define SELNL_GRP_NONE 0x00000000 | 25 | #define SELNL_GRP_NONE 0x00000000 |
25 | #define SELNL_GRP_AVC 0x00000001 /* AVC notifications */ | 26 | #define SELNL_GRP_AVC 0x00000001 /* AVC notifications */ |
26 | #define SELNL_GRP_ALL 0xffffffff | 27 | #define SELNL_GRP_ALL 0xffffffff |
28 | #endif | ||
29 | |||
30 | enum selinux_nlgroups { | ||
31 | SELNLGRP_NONE, | ||
32 | #define SELNLGRP_NONE SELNLGRP_NONE | ||
33 | SELNLGRP_AVC, | ||
34 | #define SELNLGRP_AVC SELNLGRP_AVC | ||
35 | __SELNLGRP_MAX | ||
36 | }; | ||
37 | #define SELNLGRP_MAX (__SELNLGRP_MAX - 1) | ||
27 | 38 | ||
28 | /* Message structures */ | 39 | /* Message structures */ |
29 | struct selnl_msg_setenforce { | 40 | struct selnl_msg_setenforce { |
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 948527e42a60..42edce6abe23 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h | |||
@@ -155,16 +155,29 @@ struct skb_shared_info { | |||
155 | #define SKB_DATAREF_SHIFT 16 | 155 | #define SKB_DATAREF_SHIFT 16 |
156 | #define SKB_DATAREF_MASK ((1 << SKB_DATAREF_SHIFT) - 1) | 156 | #define SKB_DATAREF_MASK ((1 << SKB_DATAREF_SHIFT) - 1) |
157 | 157 | ||
158 | extern struct timeval skb_tv_base; | ||
159 | |||
160 | struct skb_timeval { | ||
161 | u32 off_sec; | ||
162 | u32 off_usec; | ||
163 | }; | ||
164 | |||
165 | |||
166 | enum { | ||
167 | SKB_FCLONE_UNAVAILABLE, | ||
168 | SKB_FCLONE_ORIG, | ||
169 | SKB_FCLONE_CLONE, | ||
170 | }; | ||
171 | |||
158 | /** | 172 | /** |
159 | * struct sk_buff - socket buffer | 173 | * struct sk_buff - socket buffer |
160 | * @next: Next buffer in list | 174 | * @next: Next buffer in list |
161 | * @prev: Previous buffer in list | 175 | * @prev: Previous buffer in list |
162 | * @list: List we are on | 176 | * @list: List we are on |
163 | * @sk: Socket we are owned by | 177 | * @sk: Socket we are owned by |
164 | * @stamp: Time we arrived | 178 | * @tstamp: Time we arrived stored as offset to skb_tv_base |
165 | * @dev: Device we arrived on/are leaving by | 179 | * @dev: Device we arrived on/are leaving by |
166 | * @input_dev: Device we arrived on | 180 | * @input_dev: Device we arrived on |
167 | * @real_dev: The real device we are using | ||
168 | * @h: Transport layer header | 181 | * @h: Transport layer header |
169 | * @nh: Network layer header | 182 | * @nh: Network layer header |
170 | * @mac: Link layer header | 183 | * @mac: Link layer header |
@@ -190,14 +203,11 @@ struct skb_shared_info { | |||
190 | * @end: End pointer | 203 | * @end: End pointer |
191 | * @destructor: Destruct function | 204 | * @destructor: Destruct function |
192 | * @nfmark: Can be used for communication between hooks | 205 | * @nfmark: Can be used for communication between hooks |
193 | * @nfcache: Cache info | ||
194 | * @nfct: Associated connection, if any | 206 | * @nfct: Associated connection, if any |
195 | * @nfctinfo: Relationship of this skb to the connection | 207 | * @nfctinfo: Relationship of this skb to the connection |
196 | * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c | 208 | * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c |
197 | * @private: Data which is private to the HIPPI implementation | ||
198 | * @tc_index: Traffic control index | 209 | * @tc_index: Traffic control index |
199 | * @tc_verd: traffic control verdict | 210 | * @tc_verd: traffic control verdict |
200 | * @tc_classid: traffic control classid | ||
201 | */ | 211 | */ |
202 | 212 | ||
203 | struct sk_buff { | 213 | struct sk_buff { |
@@ -205,12 +215,10 @@ struct sk_buff { | |||
205 | struct sk_buff *next; | 215 | struct sk_buff *next; |
206 | struct sk_buff *prev; | 216 | struct sk_buff *prev; |
207 | 217 | ||
208 | struct sk_buff_head *list; | ||
209 | struct sock *sk; | 218 | struct sock *sk; |
210 | struct timeval stamp; | 219 | struct skb_timeval tstamp; |
211 | struct net_device *dev; | 220 | struct net_device *dev; |
212 | struct net_device *input_dev; | 221 | struct net_device *input_dev; |
213 | struct net_device *real_dev; | ||
214 | 222 | ||
215 | union { | 223 | union { |
216 | struct tcphdr *th; | 224 | struct tcphdr *th; |
@@ -252,33 +260,28 @@ struct sk_buff { | |||
252 | __u8 local_df:1, | 260 | __u8 local_df:1, |
253 | cloned:1, | 261 | cloned:1, |
254 | ip_summed:2, | 262 | ip_summed:2, |
255 | nohdr:1; | 263 | nohdr:1, |
256 | /* 3 bits spare */ | 264 | nfctinfo:3; |
257 | __u8 pkt_type; | 265 | __u8 pkt_type:3, |
266 | fclone:2; | ||
258 | __be16 protocol; | 267 | __be16 protocol; |
259 | 268 | ||
260 | void (*destructor)(struct sk_buff *skb); | 269 | void (*destructor)(struct sk_buff *skb); |
261 | #ifdef CONFIG_NETFILTER | 270 | #ifdef CONFIG_NETFILTER |
262 | unsigned long nfmark; | 271 | __u32 nfmark; |
263 | __u32 nfcache; | ||
264 | __u32 nfctinfo; | ||
265 | struct nf_conntrack *nfct; | 272 | struct nf_conntrack *nfct; |
273 | #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) | ||
274 | __u8 ipvs_property:1; | ||
275 | #endif | ||
266 | #ifdef CONFIG_BRIDGE_NETFILTER | 276 | #ifdef CONFIG_BRIDGE_NETFILTER |
267 | struct nf_bridge_info *nf_bridge; | 277 | struct nf_bridge_info *nf_bridge; |
268 | #endif | 278 | #endif |
269 | #endif /* CONFIG_NETFILTER */ | 279 | #endif /* CONFIG_NETFILTER */ |
270 | #if defined(CONFIG_HIPPI) | ||
271 | union { | ||
272 | __u32 ifield; | ||
273 | } private; | ||
274 | #endif | ||
275 | #ifdef CONFIG_NET_SCHED | 280 | #ifdef CONFIG_NET_SCHED |
276 | __u32 tc_index; /* traffic control index */ | 281 | __u16 tc_index; /* traffic control index */ |
277 | #ifdef CONFIG_NET_CLS_ACT | 282 | #ifdef CONFIG_NET_CLS_ACT |
278 | __u32 tc_verd; /* traffic control verdict */ | 283 | __u16 tc_verd; /* traffic control verdict */ |
279 | __u32 tc_classid; /* traffic control classid */ | ||
280 | #endif | 284 | #endif |
281 | |||
282 | #endif | 285 | #endif |
283 | 286 | ||
284 | 287 | ||
@@ -300,8 +303,20 @@ struct sk_buff { | |||
300 | #include <asm/system.h> | 303 | #include <asm/system.h> |
301 | 304 | ||
302 | extern void __kfree_skb(struct sk_buff *skb); | 305 | extern void __kfree_skb(struct sk_buff *skb); |
303 | extern struct sk_buff *alloc_skb(unsigned int size, | 306 | extern struct sk_buff *__alloc_skb(unsigned int size, |
304 | unsigned int __nocast priority); | 307 | unsigned int __nocast priority, int fclone); |
308 | static inline struct sk_buff *alloc_skb(unsigned int size, | ||
309 | unsigned int __nocast priority) | ||
310 | { | ||
311 | return __alloc_skb(size, priority, 0); | ||
312 | } | ||
313 | |||
314 | static inline struct sk_buff *alloc_skb_fclone(unsigned int size, | ||
315 | unsigned int __nocast priority) | ||
316 | { | ||
317 | return __alloc_skb(size, priority, 1); | ||
318 | } | ||
319 | |||
305 | extern struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp, | 320 | extern struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp, |
306 | unsigned int size, | 321 | unsigned int size, |
307 | unsigned int __nocast priority); | 322 | unsigned int __nocast priority); |
@@ -597,7 +612,6 @@ static inline void __skb_queue_head(struct sk_buff_head *list, | |||
597 | { | 612 | { |
598 | struct sk_buff *prev, *next; | 613 | struct sk_buff *prev, *next; |
599 | 614 | ||
600 | newsk->list = list; | ||
601 | list->qlen++; | 615 | list->qlen++; |
602 | prev = (struct sk_buff *)list; | 616 | prev = (struct sk_buff *)list; |
603 | next = prev->next; | 617 | next = prev->next; |
@@ -622,7 +636,6 @@ static inline void __skb_queue_tail(struct sk_buff_head *list, | |||
622 | { | 636 | { |
623 | struct sk_buff *prev, *next; | 637 | struct sk_buff *prev, *next; |
624 | 638 | ||
625 | newsk->list = list; | ||
626 | list->qlen++; | 639 | list->qlen++; |
627 | next = (struct sk_buff *)list; | 640 | next = (struct sk_buff *)list; |
628 | prev = next->prev; | 641 | prev = next->prev; |
@@ -655,7 +668,6 @@ static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list) | |||
655 | next->prev = prev; | 668 | next->prev = prev; |
656 | prev->next = next; | 669 | prev->next = next; |
657 | result->next = result->prev = NULL; | 670 | result->next = result->prev = NULL; |
658 | result->list = NULL; | ||
659 | } | 671 | } |
660 | return result; | 672 | return result; |
661 | } | 673 | } |
@@ -664,7 +676,7 @@ static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list) | |||
664 | /* | 676 | /* |
665 | * Insert a packet on a list. | 677 | * Insert a packet on a list. |
666 | */ | 678 | */ |
667 | extern void skb_insert(struct sk_buff *old, struct sk_buff *newsk); | 679 | extern void skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list); |
668 | static inline void __skb_insert(struct sk_buff *newsk, | 680 | static inline void __skb_insert(struct sk_buff *newsk, |
669 | struct sk_buff *prev, struct sk_buff *next, | 681 | struct sk_buff *prev, struct sk_buff *next, |
670 | struct sk_buff_head *list) | 682 | struct sk_buff_head *list) |
@@ -672,24 +684,23 @@ static inline void __skb_insert(struct sk_buff *newsk, | |||
672 | newsk->next = next; | 684 | newsk->next = next; |
673 | newsk->prev = prev; | 685 | newsk->prev = prev; |
674 | next->prev = prev->next = newsk; | 686 | next->prev = prev->next = newsk; |
675 | newsk->list = list; | ||
676 | list->qlen++; | 687 | list->qlen++; |
677 | } | 688 | } |
678 | 689 | ||
679 | /* | 690 | /* |
680 | * Place a packet after a given packet in a list. | 691 | * Place a packet after a given packet in a list. |
681 | */ | 692 | */ |
682 | extern void skb_append(struct sk_buff *old, struct sk_buff *newsk); | 693 | extern void skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list); |
683 | static inline void __skb_append(struct sk_buff *old, struct sk_buff *newsk) | 694 | static inline void __skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list) |
684 | { | 695 | { |
685 | __skb_insert(newsk, old, old->next, old->list); | 696 | __skb_insert(newsk, old, old->next, list); |
686 | } | 697 | } |
687 | 698 | ||
688 | /* | 699 | /* |
689 | * remove sk_buff from list. _Must_ be called atomically, and with | 700 | * remove sk_buff from list. _Must_ be called atomically, and with |
690 | * the list known.. | 701 | * the list known.. |
691 | */ | 702 | */ |
692 | extern void skb_unlink(struct sk_buff *skb); | 703 | extern void skb_unlink(struct sk_buff *skb, struct sk_buff_head *list); |
693 | static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) | 704 | static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) |
694 | { | 705 | { |
695 | struct sk_buff *next, *prev; | 706 | struct sk_buff *next, *prev; |
@@ -698,7 +709,6 @@ static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) | |||
698 | next = skb->next; | 709 | next = skb->next; |
699 | prev = skb->prev; | 710 | prev = skb->prev; |
700 | skb->next = skb->prev = NULL; | 711 | skb->next = skb->prev = NULL; |
701 | skb->list = NULL; | ||
702 | next->prev = prev; | 712 | next->prev = prev; |
703 | prev->next = next; | 713 | prev->next = next; |
704 | } | 714 | } |
@@ -1213,6 +1223,8 @@ extern void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); | |||
1213 | extern void skb_split(struct sk_buff *skb, | 1223 | extern void skb_split(struct sk_buff *skb, |
1214 | struct sk_buff *skb1, const u32 len); | 1224 | struct sk_buff *skb1, const u32 len); |
1215 | 1225 | ||
1226 | extern void skb_release_data(struct sk_buff *skb); | ||
1227 | |||
1216 | static inline void *skb_header_pointer(const struct sk_buff *skb, int offset, | 1228 | static inline void *skb_header_pointer(const struct sk_buff *skb, int offset, |
1217 | int len, void *buffer) | 1229 | int len, void *buffer) |
1218 | { | 1230 | { |
@@ -1230,6 +1242,42 @@ static inline void *skb_header_pointer(const struct sk_buff *skb, int offset, | |||
1230 | extern void skb_init(void); | 1242 | extern void skb_init(void); |
1231 | extern void skb_add_mtu(int mtu); | 1243 | extern void skb_add_mtu(int mtu); |
1232 | 1244 | ||
1245 | /** | ||
1246 | * skb_get_timestamp - get timestamp from a skb | ||
1247 | * @skb: skb to get stamp from | ||
1248 | * @stamp: pointer to struct timeval to store stamp in | ||
1249 | * | ||
1250 | * Timestamps are stored in the skb as offsets to a base timestamp. | ||
1251 | * This function converts the offset back to a struct timeval and stores | ||
1252 | * it in stamp. | ||
1253 | */ | ||
1254 | static inline void skb_get_timestamp(struct sk_buff *skb, struct timeval *stamp) | ||
1255 | { | ||
1256 | stamp->tv_sec = skb->tstamp.off_sec; | ||
1257 | stamp->tv_usec = skb->tstamp.off_usec; | ||
1258 | if (skb->tstamp.off_sec) { | ||
1259 | stamp->tv_sec += skb_tv_base.tv_sec; | ||
1260 | stamp->tv_usec += skb_tv_base.tv_usec; | ||
1261 | } | ||
1262 | } | ||
1263 | |||
1264 | /** | ||
1265 | * skb_set_timestamp - set timestamp of a skb | ||
1266 | * @skb: skb to set stamp of | ||
1267 | * @stamp: pointer to struct timeval to get stamp from | ||
1268 | * | ||
1269 | * Timestamps are stored in the skb as offsets to a base timestamp. | ||
1270 | * This function converts a struct timeval to an offset and stores | ||
1271 | * it in the skb. | ||
1272 | */ | ||
1273 | static inline void skb_set_timestamp(struct sk_buff *skb, struct timeval *stamp) | ||
1274 | { | ||
1275 | skb->tstamp.off_sec = stamp->tv_sec - skb_tv_base.tv_sec; | ||
1276 | skb->tstamp.off_usec = stamp->tv_usec - skb_tv_base.tv_usec; | ||
1277 | } | ||
1278 | |||
1279 | extern void __net_timestamp(struct sk_buff *skb); | ||
1280 | |||
1233 | #ifdef CONFIG_NETFILTER | 1281 | #ifdef CONFIG_NETFILTER |
1234 | static inline void nf_conntrack_put(struct nf_conntrack *nfct) | 1282 | static inline void nf_conntrack_put(struct nf_conntrack *nfct) |
1235 | { | 1283 | { |
diff --git a/include/linux/socket.h b/include/linux/socket.h index a5c7d96e4d2e..1739c2d5b95b 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h | |||
@@ -26,6 +26,13 @@ struct __kernel_sockaddr_storage { | |||
26 | #include <linux/types.h> /* pid_t */ | 26 | #include <linux/types.h> /* pid_t */ |
27 | #include <linux/compiler.h> /* __user */ | 27 | #include <linux/compiler.h> /* __user */ |
28 | 28 | ||
29 | extern int sysctl_somaxconn; | ||
30 | extern void sock_init(void); | ||
31 | #ifdef CONFIG_PROC_FS | ||
32 | struct seq_file; | ||
33 | extern void socket_seq_show(struct seq_file *seq); | ||
34 | #endif | ||
35 | |||
29 | typedef unsigned short sa_family_t; | 36 | typedef unsigned short sa_family_t; |
30 | 37 | ||
31 | /* | 38 | /* |
@@ -271,6 +278,8 @@ struct ucred { | |||
271 | #define SOL_IRDA 266 | 278 | #define SOL_IRDA 266 |
272 | #define SOL_NETBEUI 267 | 279 | #define SOL_NETBEUI 267 |
273 | #define SOL_LLC 268 | 280 | #define SOL_LLC 268 |
281 | #define SOL_DCCP 269 | ||
282 | #define SOL_NETLINK 270 | ||
274 | 283 | ||
275 | /* IPX options */ | 284 | /* IPX options */ |
276 | #define IPX_TYPE 1 | 285 | #define IPX_TYPE 1 |
diff --git a/include/linux/tcp.h b/include/linux/tcp.h index e4fd82e42104..ac4ca44c75ca 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h | |||
@@ -55,24 +55,6 @@ struct tcphdr { | |||
55 | __u16 urg_ptr; | 55 | __u16 urg_ptr; |
56 | }; | 56 | }; |
57 | 57 | ||
58 | |||
59 | enum { | ||
60 | TCP_ESTABLISHED = 1, | ||
61 | TCP_SYN_SENT, | ||
62 | TCP_SYN_RECV, | ||
63 | TCP_FIN_WAIT1, | ||
64 | TCP_FIN_WAIT2, | ||
65 | TCP_TIME_WAIT, | ||
66 | TCP_CLOSE, | ||
67 | TCP_CLOSE_WAIT, | ||
68 | TCP_LAST_ACK, | ||
69 | TCP_LISTEN, | ||
70 | TCP_CLOSING, /* now a valid state */ | ||
71 | |||
72 | TCP_MAX_STATES /* Leave at the end! */ | ||
73 | }; | ||
74 | |||
75 | #define TCP_STATE_MASK 0xF | ||
76 | #define TCP_ACTION_FIN (1 << 7) | 58 | #define TCP_ACTION_FIN (1 << 7) |
77 | 59 | ||
78 | enum { | 60 | enum { |
@@ -195,8 +177,9 @@ struct tcp_info | |||
195 | 177 | ||
196 | #include <linux/config.h> | 178 | #include <linux/config.h> |
197 | #include <linux/skbuff.h> | 179 | #include <linux/skbuff.h> |
198 | #include <linux/ip.h> | ||
199 | #include <net/sock.h> | 180 | #include <net/sock.h> |
181 | #include <net/inet_connection_sock.h> | ||
182 | #include <net/inet_timewait_sock.h> | ||
200 | 183 | ||
201 | /* This defines a selective acknowledgement block. */ | 184 | /* This defines a selective acknowledgement block. */ |
202 | struct tcp_sack_block { | 185 | struct tcp_sack_block { |
@@ -236,8 +219,8 @@ static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req) | |||
236 | } | 219 | } |
237 | 220 | ||
238 | struct tcp_sock { | 221 | struct tcp_sock { |
239 | /* inet_sock has to be the first member of tcp_sock */ | 222 | /* inet_connection_sock has to be the first member of tcp_sock */ |
240 | struct inet_sock inet; | 223 | struct inet_connection_sock inet_conn; |
241 | int tcp_header_len; /* Bytes of tcp header to send */ | 224 | int tcp_header_len; /* Bytes of tcp header to send */ |
242 | 225 | ||
243 | /* | 226 | /* |
@@ -258,19 +241,6 @@ struct tcp_sock { | |||
258 | __u32 snd_sml; /* Last byte of the most recently transmitted small packet */ | 241 | __u32 snd_sml; /* Last byte of the most recently transmitted small packet */ |
259 | __u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */ | 242 | __u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */ |
260 | __u32 lsndtime; /* timestamp of last sent data packet (for restart window) */ | 243 | __u32 lsndtime; /* timestamp of last sent data packet (for restart window) */ |
261 | struct tcp_bind_bucket *bind_hash; | ||
262 | /* Delayed ACK control data */ | ||
263 | struct { | ||
264 | __u8 pending; /* ACK is pending */ | ||
265 | __u8 quick; /* Scheduled number of quick acks */ | ||
266 | __u8 pingpong; /* The session is interactive */ | ||
267 | __u8 blocked; /* Delayed ACK was blocked by socket lock*/ | ||
268 | __u32 ato; /* Predicted tick of soft clock */ | ||
269 | unsigned long timeout; /* Currently scheduled timeout */ | ||
270 | __u32 lrcvtime; /* timestamp of last received data packet*/ | ||
271 | __u16 last_seg_size; /* Size of last incoming segment */ | ||
272 | __u16 rcv_mss; /* MSS used for delayed ACK decisions */ | ||
273 | } ack; | ||
274 | 244 | ||
275 | /* Data for direct copy to user */ | 245 | /* Data for direct copy to user */ |
276 | struct { | 246 | struct { |
@@ -288,19 +258,15 @@ struct tcp_sock { | |||
288 | __u32 mss_cache; /* Cached effective mss, not including SACKS */ | 258 | __u32 mss_cache; /* Cached effective mss, not including SACKS */ |
289 | __u16 xmit_size_goal; /* Goal for segmenting output packets */ | 259 | __u16 xmit_size_goal; /* Goal for segmenting output packets */ |
290 | __u16 ext_header_len; /* Network protocol overhead (IP/IPv6 options) */ | 260 | __u16 ext_header_len; /* Network protocol overhead (IP/IPv6 options) */ |
291 | __u8 ca_state; /* State of fast-retransmit machine */ | ||
292 | __u8 retransmits; /* Number of unrecovered RTO timeouts. */ | ||
293 | 261 | ||
294 | __u16 advmss; /* Advertised MSS */ | ||
295 | __u32 window_clamp; /* Maximal window to advertise */ | 262 | __u32 window_clamp; /* Maximal window to advertise */ |
296 | __u32 rcv_ssthresh; /* Current window clamp */ | 263 | __u32 rcv_ssthresh; /* Current window clamp */ |
297 | 264 | ||
298 | __u32 frto_highmark; /* snd_nxt when RTO occurred */ | 265 | __u32 frto_highmark; /* snd_nxt when RTO occurred */ |
299 | __u8 reordering; /* Packet reordering metric. */ | 266 | __u8 reordering; /* Packet reordering metric. */ |
300 | __u8 frto_counter; /* Number of new acks after RTO */ | 267 | __u8 frto_counter; /* Number of new acks after RTO */ |
301 | 268 | __u8 nonagle; /* Disable Nagle algorithm? */ | |
302 | __u8 unused; | 269 | __u8 keepalive_probes; /* num of allowed keep alive probes */ |
303 | __u8 defer_accept; /* User waits for some data after accept() */ | ||
304 | 270 | ||
305 | /* RTT measurement */ | 271 | /* RTT measurement */ |
306 | __u32 srtt; /* smoothed round trip time << 3 */ | 272 | __u32 srtt; /* smoothed round trip time << 3 */ |
@@ -308,19 +274,13 @@ struct tcp_sock { | |||
308 | __u32 mdev_max; /* maximal mdev for the last rtt period */ | 274 | __u32 mdev_max; /* maximal mdev for the last rtt period */ |
309 | __u32 rttvar; /* smoothed mdev_max */ | 275 | __u32 rttvar; /* smoothed mdev_max */ |
310 | __u32 rtt_seq; /* sequence number to update rttvar */ | 276 | __u32 rtt_seq; /* sequence number to update rttvar */ |
311 | __u32 rto; /* retransmit timeout */ | ||
312 | 277 | ||
313 | __u32 packets_out; /* Packets which are "in flight" */ | 278 | __u32 packets_out; /* Packets which are "in flight" */ |
314 | __u32 left_out; /* Packets which leaved network */ | 279 | __u32 left_out; /* Packets which leaved network */ |
315 | __u32 retrans_out; /* Retransmitted packets out */ | 280 | __u32 retrans_out; /* Retransmitted packets out */ |
316 | __u8 backoff; /* backoff */ | ||
317 | /* | 281 | /* |
318 | * Options received (usually on last packet, some only on SYN packets). | 282 | * Options received (usually on last packet, some only on SYN packets). |
319 | */ | 283 | */ |
320 | __u8 nonagle; /* Disable Nagle algorithm? */ | ||
321 | __u8 keepalive_probes; /* num of allowed keep alive probes */ | ||
322 | |||
323 | __u8 probes_out; /* unanswered 0 window probes */ | ||
324 | struct tcp_options_received rx_opt; | 284 | struct tcp_options_received rx_opt; |
325 | 285 | ||
326 | /* | 286 | /* |
@@ -333,11 +293,6 @@ struct tcp_sock { | |||
333 | __u32 snd_cwnd_used; | 293 | __u32 snd_cwnd_used; |
334 | __u32 snd_cwnd_stamp; | 294 | __u32 snd_cwnd_stamp; |
335 | 295 | ||
336 | /* Two commonly used timers in both sender and receiver paths. */ | ||
337 | unsigned long timeout; | ||
338 | struct timer_list retransmit_timer; /* Resend (no ack) */ | ||
339 | struct timer_list delack_timer; /* Ack delay */ | ||
340 | |||
341 | struct sk_buff_head out_of_order_queue; /* Out of order segments go here */ | 296 | struct sk_buff_head out_of_order_queue; /* Out of order segments go here */ |
342 | 297 | ||
343 | struct tcp_func *af_specific; /* Operations which are AF_INET{4,6} specific */ | 298 | struct tcp_func *af_specific; /* Operations which are AF_INET{4,6} specific */ |
@@ -352,8 +307,7 @@ struct tcp_sock { | |||
352 | struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */ | 307 | struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */ |
353 | struct tcp_sack_block selective_acks[4]; /* The SACKS themselves*/ | 308 | struct tcp_sack_block selective_acks[4]; /* The SACKS themselves*/ |
354 | 309 | ||
355 | __u8 syn_retries; /* num of allowed syn retries */ | 310 | __u16 advmss; /* Advertised MSS */ |
356 | __u8 ecn_flags; /* ECN status bits. */ | ||
357 | __u16 prior_ssthresh; /* ssthresh saved at recovery start */ | 311 | __u16 prior_ssthresh; /* ssthresh saved at recovery start */ |
358 | __u32 lost_out; /* Lost packets */ | 312 | __u32 lost_out; /* Lost packets */ |
359 | __u32 sacked_out; /* SACK'd packets */ | 313 | __u32 sacked_out; /* SACK'd packets */ |
@@ -367,14 +321,12 @@ struct tcp_sock { | |||
367 | int undo_retrans; /* number of undoable retransmissions. */ | 321 | int undo_retrans; /* number of undoable retransmissions. */ |
368 | __u32 urg_seq; /* Seq of received urgent pointer */ | 322 | __u32 urg_seq; /* Seq of received urgent pointer */ |
369 | __u16 urg_data; /* Saved octet of OOB data and control flags */ | 323 | __u16 urg_data; /* Saved octet of OOB data and control flags */ |
370 | __u8 pending; /* Scheduled timer event */ | ||
371 | __u8 urg_mode; /* In urgent mode */ | 324 | __u8 urg_mode; /* In urgent mode */ |
325 | __u8 ecn_flags; /* ECN status bits. */ | ||
372 | __u32 snd_up; /* Urgent pointer */ | 326 | __u32 snd_up; /* Urgent pointer */ |
373 | 327 | ||
374 | __u32 total_retrans; /* Total retransmits for entire connection */ | 328 | __u32 total_retrans; /* Total retransmits for entire connection */ |
375 | 329 | ||
376 | struct request_sock_queue accept_queue; /* FIFO of established children */ | ||
377 | |||
378 | unsigned int keepalive_time; /* time before keep alive takes place */ | 330 | unsigned int keepalive_time; /* time before keep alive takes place */ |
379 | unsigned int keepalive_intvl; /* time interval between keep alive probes */ | 331 | unsigned int keepalive_intvl; /* time interval between keep alive probes */ |
380 | int linger2; | 332 | int linger2; |
@@ -394,11 +346,6 @@ struct tcp_sock { | |||
394 | __u32 seq; | 346 | __u32 seq; |
395 | __u32 time; | 347 | __u32 time; |
396 | } rcvq_space; | 348 | } rcvq_space; |
397 | |||
398 | /* Pluggable TCP congestion control hook */ | ||
399 | struct tcp_congestion_ops *ca_ops; | ||
400 | u32 ca_priv[16]; | ||
401 | #define TCP_CA_PRIV_SIZE (16*sizeof(u32)) | ||
402 | }; | 349 | }; |
403 | 350 | ||
404 | static inline struct tcp_sock *tcp_sk(const struct sock *sk) | 351 | static inline struct tcp_sock *tcp_sk(const struct sock *sk) |
@@ -406,9 +353,18 @@ static inline struct tcp_sock *tcp_sk(const struct sock *sk) | |||
406 | return (struct tcp_sock *)sk; | 353 | return (struct tcp_sock *)sk; |
407 | } | 354 | } |
408 | 355 | ||
409 | static inline void *tcp_ca(const struct tcp_sock *tp) | 356 | struct tcp_timewait_sock { |
357 | struct inet_timewait_sock tw_sk; | ||
358 | __u32 tw_rcv_nxt; | ||
359 | __u32 tw_snd_nxt; | ||
360 | __u32 tw_rcv_wnd; | ||
361 | __u32 tw_ts_recent; | ||
362 | long tw_ts_recent_stamp; | ||
363 | }; | ||
364 | |||
365 | static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk) | ||
410 | { | 366 | { |
411 | return (void *) tp->ca_priv; | 367 | return (struct tcp_timewait_sock *)sk; |
412 | } | 368 | } |
413 | 369 | ||
414 | #endif | 370 | #endif |
diff --git a/include/linux/tcp_diag.h b/include/linux/tcp_diag.h deleted file mode 100644 index 7a5996743946..000000000000 --- a/include/linux/tcp_diag.h +++ /dev/null | |||
@@ -1,127 +0,0 @@ | |||
1 | #ifndef _TCP_DIAG_H_ | ||
2 | #define _TCP_DIAG_H_ 1 | ||
3 | |||
4 | /* Just some random number */ | ||
5 | #define TCPDIAG_GETSOCK 18 | ||
6 | |||
7 | /* Socket identity */ | ||
8 | struct tcpdiag_sockid | ||
9 | { | ||
10 | __u16 tcpdiag_sport; | ||
11 | __u16 tcpdiag_dport; | ||
12 | __u32 tcpdiag_src[4]; | ||
13 | __u32 tcpdiag_dst[4]; | ||
14 | __u32 tcpdiag_if; | ||
15 | __u32 tcpdiag_cookie[2]; | ||
16 | #define TCPDIAG_NOCOOKIE (~0U) | ||
17 | }; | ||
18 | |||
19 | /* Request structure */ | ||
20 | |||
21 | struct tcpdiagreq | ||
22 | { | ||
23 | __u8 tcpdiag_family; /* Family of addresses. */ | ||
24 | __u8 tcpdiag_src_len; | ||
25 | __u8 tcpdiag_dst_len; | ||
26 | __u8 tcpdiag_ext; /* Query extended information */ | ||
27 | |||
28 | struct tcpdiag_sockid id; | ||
29 | |||
30 | __u32 tcpdiag_states; /* States to dump */ | ||
31 | __u32 tcpdiag_dbs; /* Tables to dump (NI) */ | ||
32 | }; | ||
33 | |||
34 | enum | ||
35 | { | ||
36 | TCPDIAG_REQ_NONE, | ||
37 | TCPDIAG_REQ_BYTECODE, | ||
38 | }; | ||
39 | |||
40 | #define TCPDIAG_REQ_MAX TCPDIAG_REQ_BYTECODE | ||
41 | |||
42 | /* Bytecode is sequence of 4 byte commands followed by variable arguments. | ||
43 | * All the commands identified by "code" are conditional jumps forward: | ||
44 | * to offset cc+"yes" or to offset cc+"no". "yes" is supposed to be | ||
45 | * length of the command and its arguments. | ||
46 | */ | ||
47 | |||
48 | struct tcpdiag_bc_op | ||
49 | { | ||
50 | unsigned char code; | ||
51 | unsigned char yes; | ||
52 | unsigned short no; | ||
53 | }; | ||
54 | |||
55 | enum | ||
56 | { | ||
57 | TCPDIAG_BC_NOP, | ||
58 | TCPDIAG_BC_JMP, | ||
59 | TCPDIAG_BC_S_GE, | ||
60 | TCPDIAG_BC_S_LE, | ||
61 | TCPDIAG_BC_D_GE, | ||
62 | TCPDIAG_BC_D_LE, | ||
63 | TCPDIAG_BC_AUTO, | ||
64 | TCPDIAG_BC_S_COND, | ||
65 | TCPDIAG_BC_D_COND, | ||
66 | }; | ||
67 | |||
68 | struct tcpdiag_hostcond | ||
69 | { | ||
70 | __u8 family; | ||
71 | __u8 prefix_len; | ||
72 | int port; | ||
73 | __u32 addr[0]; | ||
74 | }; | ||
75 | |||
76 | /* Base info structure. It contains socket identity (addrs/ports/cookie) | ||
77 | * and, alas, the information shown by netstat. */ | ||
78 | struct tcpdiagmsg | ||
79 | { | ||
80 | __u8 tcpdiag_family; | ||
81 | __u8 tcpdiag_state; | ||
82 | __u8 tcpdiag_timer; | ||
83 | __u8 tcpdiag_retrans; | ||
84 | |||
85 | struct tcpdiag_sockid id; | ||
86 | |||
87 | __u32 tcpdiag_expires; | ||
88 | __u32 tcpdiag_rqueue; | ||
89 | __u32 tcpdiag_wqueue; | ||
90 | __u32 tcpdiag_uid; | ||
91 | __u32 tcpdiag_inode; | ||
92 | }; | ||
93 | |||
94 | /* Extensions */ | ||
95 | |||
96 | enum | ||
97 | { | ||
98 | TCPDIAG_NONE, | ||
99 | TCPDIAG_MEMINFO, | ||
100 | TCPDIAG_INFO, | ||
101 | TCPDIAG_VEGASINFO, | ||
102 | TCPDIAG_CONG, | ||
103 | }; | ||
104 | |||
105 | #define TCPDIAG_MAX TCPDIAG_CONG | ||
106 | |||
107 | |||
108 | /* TCPDIAG_MEM */ | ||
109 | |||
110 | struct tcpdiag_meminfo | ||
111 | { | ||
112 | __u32 tcpdiag_rmem; | ||
113 | __u32 tcpdiag_wmem; | ||
114 | __u32 tcpdiag_fmem; | ||
115 | __u32 tcpdiag_tmem; | ||
116 | }; | ||
117 | |||
118 | /* TCPDIAG_VEGASINFO */ | ||
119 | |||
120 | struct tcpvegas_info { | ||
121 | __u32 tcpv_enabled; | ||
122 | __u32 tcpv_rttcnt; | ||
123 | __u32 tcpv_rtt; | ||
124 | __u32 tcpv_minrtt; | ||
125 | }; | ||
126 | |||
127 | #endif /* _TCP_DIAG_H_ */ | ||
diff --git a/include/linux/types.h b/include/linux/types.h index dcb13f865df9..2b678c22ca4a 100644 --- a/include/linux/types.h +++ b/include/linux/types.h | |||
@@ -123,6 +123,9 @@ typedef __u64 u_int64_t; | |||
123 | typedef __s64 int64_t; | 123 | typedef __s64 int64_t; |
124 | #endif | 124 | #endif |
125 | 125 | ||
126 | /* this is a special 64bit data type that is 8-byte aligned */ | ||
127 | #define aligned_u64 unsigned long long __attribute__((aligned(8))) | ||
128 | |||
126 | /* | 129 | /* |
127 | * The type used for indexing onto a disc or disc partition. | 130 | * The type used for indexing onto a disc or disc partition. |
128 | * If required, asm/types.h can override it and define | 131 | * If required, asm/types.h can override it and define |
diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index f0d423300d84..0fb077d68441 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h | |||
@@ -258,9 +258,27 @@ struct xfrm_usersa_flush { | |||
258 | __u8 proto; | 258 | __u8 proto; |
259 | }; | 259 | }; |
260 | 260 | ||
261 | #ifndef __KERNEL__ | ||
262 | /* backwards compatibility for userspace */ | ||
261 | #define XFRMGRP_ACQUIRE 1 | 263 | #define XFRMGRP_ACQUIRE 1 |
262 | #define XFRMGRP_EXPIRE 2 | 264 | #define XFRMGRP_EXPIRE 2 |
263 | #define XFRMGRP_SA 4 | 265 | #define XFRMGRP_SA 4 |
264 | #define XFRMGRP_POLICY 8 | 266 | #define XFRMGRP_POLICY 8 |
267 | #endif | ||
268 | |||
269 | enum xfrm_nlgroups { | ||
270 | XFRMNLGRP_NONE, | ||
271 | #define XFRMNLGRP_NONE XFRMNLGRP_NONE | ||
272 | XFRMNLGRP_ACQUIRE, | ||
273 | #define XFRMNLGRP_ACQUIRE XFRMNLGRP_ACQUIRE | ||
274 | XFRMNLGRP_EXPIRE, | ||
275 | #define XFRMNLGRP_EXPIRE XFRMNLGRP_EXPIRE | ||
276 | XFRMNLGRP_SA, | ||
277 | #define XFRMNLGRP_SA XFRMNLGRP_SA | ||
278 | XFRMNLGRP_POLICY, | ||
279 | #define XFRMNLGRP_POLICY XFRMNLGRP_POLICY | ||
280 | __XFRMNLGRP_MAX | ||
281 | }; | ||
282 | #define XFRMNLGRP_MAX (__XFRMNLGRP_MAX - 1) | ||
265 | 283 | ||
266 | #endif /* _LINUX_XFRM_H */ | 284 | #endif /* _LINUX_XFRM_H */ |
diff --git a/include/net/act_api.h b/include/net/act_api.h index ed00a995f576..b55eb7c7f033 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h | |||
@@ -63,7 +63,7 @@ struct tc_action_ops | |||
63 | __u32 type; /* TBD to match kind */ | 63 | __u32 type; /* TBD to match kind */ |
64 | __u32 capab; /* capabilities includes 4 bit version */ | 64 | __u32 capab; /* capabilities includes 4 bit version */ |
65 | struct module *owner; | 65 | struct module *owner; |
66 | int (*act)(struct sk_buff **, struct tc_action *); | 66 | int (*act)(struct sk_buff **, struct tc_action *, struct tcf_result *); |
67 | int (*get_stats)(struct sk_buff *, struct tc_action *); | 67 | int (*get_stats)(struct sk_buff *, struct tc_action *); |
68 | int (*dump)(struct sk_buff *, struct tc_action *,int , int); | 68 | int (*dump)(struct sk_buff *, struct tc_action *,int , int); |
69 | int (*cleanup)(struct tc_action *, int bind); | 69 | int (*cleanup)(struct tc_action *, int bind); |
diff --git a/include/net/addrconf.h b/include/net/addrconf.h index a0ed93672176..750e2508dd90 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h | |||
@@ -45,6 +45,7 @@ struct prefix_info { | |||
45 | 45 | ||
46 | #ifdef __KERNEL__ | 46 | #ifdef __KERNEL__ |
47 | 47 | ||
48 | #include <linux/config.h> | ||
48 | #include <linux/netdevice.h> | 49 | #include <linux/netdevice.h> |
49 | #include <net/if_inet6.h> | 50 | #include <net/if_inet6.h> |
50 | #include <net/ipv6.h> | 51 | #include <net/ipv6.h> |
@@ -238,5 +239,10 @@ static inline int ipv6_addr_is_ll_all_routers(const struct in6_addr *addr) | |||
238 | addr->s6_addr32[3] == htonl(0x00000002)); | 239 | addr->s6_addr32[3] == htonl(0x00000002)); |
239 | } | 240 | } |
240 | 241 | ||
242 | #ifdef CONFIG_PROC_FS | ||
243 | extern int if6_proc_init(void); | ||
244 | extern void if6_proc_exit(void); | ||
245 | #endif | ||
246 | |||
241 | #endif | 247 | #endif |
242 | #endif | 248 | #endif |
diff --git a/include/net/af_unix.h b/include/net/af_unix.h index b60b3846b9d1..b5d785ab4a0e 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h | |||
@@ -1,5 +1,11 @@ | |||
1 | #ifndef __LINUX_NET_AFUNIX_H | 1 | #ifndef __LINUX_NET_AFUNIX_H |
2 | #define __LINUX_NET_AFUNIX_H | 2 | #define __LINUX_NET_AFUNIX_H |
3 | |||
4 | #include <linux/config.h> | ||
5 | #include <linux/socket.h> | ||
6 | #include <linux/un.h> | ||
7 | #include <net/sock.h> | ||
8 | |||
3 | extern void unix_inflight(struct file *fp); | 9 | extern void unix_inflight(struct file *fp); |
4 | extern void unix_notinflight(struct file *fp); | 10 | extern void unix_notinflight(struct file *fp); |
5 | extern void unix_gc(void); | 11 | extern void unix_gc(void); |
@@ -74,5 +80,14 @@ struct unix_sock { | |||
74 | wait_queue_head_t peer_wait; | 80 | wait_queue_head_t peer_wait; |
75 | }; | 81 | }; |
76 | #define unix_sk(__sk) ((struct unix_sock *)__sk) | 82 | #define unix_sk(__sk) ((struct unix_sock *)__sk) |
83 | |||
84 | #ifdef CONFIG_SYSCTL | ||
85 | extern int sysctl_unix_max_dgram_qlen; | ||
86 | extern void unix_sysctl_register(void); | ||
87 | extern void unix_sysctl_unregister(void); | ||
88 | #else | ||
89 | static inline void unix_sysctl_register(void) {} | ||
90 | static inline void unix_sysctl_unregister(void) {} | ||
91 | #endif | ||
77 | #endif | 92 | #endif |
78 | #endif | 93 | #endif |
diff --git a/include/net/arp.h b/include/net/arp.h index a1f09fad6a52..a13e30c35f42 100644 --- a/include/net/arp.h +++ b/include/net/arp.h | |||
@@ -11,7 +11,7 @@ extern struct neigh_table arp_tbl; | |||
11 | 11 | ||
12 | extern void arp_init(void); | 12 | extern void arp_init(void); |
13 | extern int arp_rcv(struct sk_buff *skb, struct net_device *dev, | 13 | extern int arp_rcv(struct sk_buff *skb, struct net_device *dev, |
14 | struct packet_type *pt); | 14 | struct packet_type *pt, struct net_device *orig_dev); |
15 | extern int arp_find(unsigned char *haddr, struct sk_buff *skb); | 15 | extern int arp_find(unsigned char *haddr, struct sk_buff *skb); |
16 | extern int arp_ioctl(unsigned int cmd, void __user *arg); | 16 | extern int arp_ioctl(unsigned int cmd, void __user *arg); |
17 | extern void arp_send(int type, int ptype, u32 dest_ip, | 17 | extern void arp_send(int type, int ptype, u32 dest_ip, |
diff --git a/include/net/ax25.h b/include/net/ax25.h index 3696f988a9f1..926eed543023 100644 --- a/include/net/ax25.h +++ b/include/net/ax25.h | |||
@@ -316,7 +316,7 @@ extern int ax25_protocol_is_registered(unsigned int); | |||
316 | 316 | ||
317 | /* ax25_in.c */ | 317 | /* ax25_in.c */ |
318 | extern int ax25_rx_iframe(ax25_cb *, struct sk_buff *); | 318 | extern int ax25_rx_iframe(ax25_cb *, struct sk_buff *); |
319 | extern int ax25_kiss_rcv(struct sk_buff *, struct net_device *, struct packet_type *); | 319 | extern int ax25_kiss_rcv(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); |
320 | 320 | ||
321 | /* ax25_ip.c */ | 321 | /* ax25_ip.c */ |
322 | extern int ax25_encapsulate(struct sk_buff *, struct net_device *, unsigned short, void *, void *, unsigned int); | 322 | extern int ax25_encapsulate(struct sk_buff *, struct net_device *, unsigned short, void *, void *, unsigned int); |
diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 06b24f637026..6dfa4a61ffd0 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h | |||
@@ -131,11 +131,12 @@ struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock); | |||
131 | 131 | ||
132 | /* Skb helpers */ | 132 | /* Skb helpers */ |
133 | struct bt_skb_cb { | 133 | struct bt_skb_cb { |
134 | int incoming; | 134 | __u8 pkt_type; |
135 | __u8 incoming; | ||
135 | }; | 136 | }; |
136 | #define bt_cb(skb) ((struct bt_skb_cb *)(skb->cb)) | 137 | #define bt_cb(skb) ((struct bt_skb_cb *)(skb->cb)) |
137 | 138 | ||
138 | static inline struct sk_buff *bt_skb_alloc(unsigned int len, int how) | 139 | static inline struct sk_buff *bt_skb_alloc(unsigned int len, unsigned int __nocast how) |
139 | { | 140 | { |
140 | struct sk_buff *skb; | 141 | struct sk_buff *skb; |
141 | 142 | ||
diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 6f0706f4af68..371e7d3f2e6f 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h | |||
@@ -453,6 +453,15 @@ struct inquiry_info_with_rssi { | |||
453 | __u16 clock_offset; | 453 | __u16 clock_offset; |
454 | __s8 rssi; | 454 | __s8 rssi; |
455 | } __attribute__ ((packed)); | 455 | } __attribute__ ((packed)); |
456 | struct inquiry_info_with_rssi_and_pscan_mode { | ||
457 | bdaddr_t bdaddr; | ||
458 | __u8 pscan_rep_mode; | ||
459 | __u8 pscan_period_mode; | ||
460 | __u8 pscan_mode; | ||
461 | __u8 dev_class[3]; | ||
462 | __u16 clock_offset; | ||
463 | __s8 rssi; | ||
464 | } __attribute__ ((packed)); | ||
456 | 465 | ||
457 | #define HCI_EV_CONN_COMPLETE 0x03 | 466 | #define HCI_EV_CONN_COMPLETE 0x03 |
458 | struct hci_ev_conn_complete { | 467 | struct hci_ev_conn_complete { |
@@ -584,6 +593,12 @@ struct hci_ev_clock_offset { | |||
584 | __u16 clock_offset; | 593 | __u16 clock_offset; |
585 | } __attribute__ ((packed)); | 594 | } __attribute__ ((packed)); |
586 | 595 | ||
596 | #define HCI_EV_PSCAN_REP_MODE 0x20 | ||
597 | struct hci_ev_pscan_rep_mode { | ||
598 | bdaddr_t bdaddr; | ||
599 | __u8 pscan_rep_mode; | ||
600 | } __attribute__ ((packed)); | ||
601 | |||
587 | /* Internal events generated by Bluetooth stack */ | 602 | /* Internal events generated by Bluetooth stack */ |
588 | #define HCI_EV_STACK_INTERNAL 0xFD | 603 | #define HCI_EV_STACK_INTERNAL 0xFD |
589 | struct hci_ev_stack_internal { | 604 | struct hci_ev_stack_internal { |
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 6d63a47c731b..7f933f302078 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h | |||
@@ -404,7 +404,7 @@ static inline int hci_recv_frame(struct sk_buff *skb) | |||
404 | bt_cb(skb)->incoming = 1; | 404 | bt_cb(skb)->incoming = 1; |
405 | 405 | ||
406 | /* Time stamp */ | 406 | /* Time stamp */ |
407 | do_gettimeofday(&skb->stamp); | 407 | __net_timestamp(skb); |
408 | 408 | ||
409 | /* Queue frame for rx task */ | 409 | /* Queue frame for rx task */ |
410 | skb_queue_tail(&hdev->rx_q, skb); | 410 | skb_queue_tail(&hdev->rx_q, skb); |
diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h index 13669bad00b3..ffea9d54071f 100644 --- a/include/net/bluetooth/rfcomm.h +++ b/include/net/bluetooth/rfcomm.h | |||
@@ -80,9 +80,9 @@ | |||
80 | #define RFCOMM_RPN_STOP_15 1 | 80 | #define RFCOMM_RPN_STOP_15 1 |
81 | 81 | ||
82 | #define RFCOMM_RPN_PARITY_NONE 0x0 | 82 | #define RFCOMM_RPN_PARITY_NONE 0x0 |
83 | #define RFCOMM_RPN_PARITY_ODD 0x4 | 83 | #define RFCOMM_RPN_PARITY_ODD 0x1 |
84 | #define RFCOMM_RPN_PARITY_EVEN 0x5 | 84 | #define RFCOMM_RPN_PARITY_EVEN 0x3 |
85 | #define RFCOMM_RPN_PARITY_MARK 0x6 | 85 | #define RFCOMM_RPN_PARITY_MARK 0x5 |
86 | #define RFCOMM_RPN_PARITY_SPACE 0x7 | 86 | #define RFCOMM_RPN_PARITY_SPACE 0x7 |
87 | 87 | ||
88 | #define RFCOMM_RPN_FLOW_NONE 0x00 | 88 | #define RFCOMM_RPN_FLOW_NONE 0x00 |
@@ -223,8 +223,14 @@ struct rfcomm_dlc { | |||
223 | #define RFCOMM_CFC_DISABLED 0 | 223 | #define RFCOMM_CFC_DISABLED 0 |
224 | #define RFCOMM_CFC_ENABLED RFCOMM_MAX_CREDITS | 224 | #define RFCOMM_CFC_ENABLED RFCOMM_MAX_CREDITS |
225 | 225 | ||
226 | /* ---- RFCOMM SEND RPN ---- */ | ||
227 | int rfcomm_send_rpn(struct rfcomm_session *s, int cr, u8 dlci, | ||
228 | u8 bit_rate, u8 data_bits, u8 stop_bits, | ||
229 | u8 parity, u8 flow_ctrl_settings, | ||
230 | u8 xon_char, u8 xoff_char, u16 param_mask); | ||
231 | |||
226 | /* ---- RFCOMM DLCs (channels) ---- */ | 232 | /* ---- RFCOMM DLCs (channels) ---- */ |
227 | struct rfcomm_dlc *rfcomm_dlc_alloc(int prio); | 233 | struct rfcomm_dlc *rfcomm_dlc_alloc(unsigned int __nocast prio); |
228 | void rfcomm_dlc_free(struct rfcomm_dlc *d); | 234 | void rfcomm_dlc_free(struct rfcomm_dlc *d); |
229 | int rfcomm_dlc_open(struct rfcomm_dlc *d, bdaddr_t *src, bdaddr_t *dst, u8 channel); | 235 | int rfcomm_dlc_open(struct rfcomm_dlc *d, bdaddr_t *src, bdaddr_t *dst, u8 channel); |
230 | int rfcomm_dlc_close(struct rfcomm_dlc *d, int reason); | 236 | int rfcomm_dlc_close(struct rfcomm_dlc *d, int reason); |
diff --git a/include/net/datalink.h b/include/net/datalink.h index 5797ba3d2eb5..deb7ca75db48 100644 --- a/include/net/datalink.h +++ b/include/net/datalink.h | |||
@@ -9,7 +9,7 @@ struct datalink_proto { | |||
9 | unsigned short header_length; | 9 | unsigned short header_length; |
10 | 10 | ||
11 | int (*rcvfunc)(struct sk_buff *, struct net_device *, | 11 | int (*rcvfunc)(struct sk_buff *, struct net_device *, |
12 | struct packet_type *); | 12 | struct packet_type *, struct net_device *); |
13 | int (*request)(struct datalink_proto *, struct sk_buff *, | 13 | int (*request)(struct datalink_proto *, struct sk_buff *, |
14 | unsigned char *); | 14 | unsigned char *); |
15 | struct list_head node; | 15 | struct list_head node; |
diff --git a/include/net/dn.h b/include/net/dn.h index 5551c46db397..c1dbbd222793 100644 --- a/include/net/dn.h +++ b/include/net/dn.h | |||
@@ -3,6 +3,7 @@ | |||
3 | 3 | ||
4 | #include <linux/dn.h> | 4 | #include <linux/dn.h> |
5 | #include <net/sock.h> | 5 | #include <net/sock.h> |
6 | #include <net/tcp.h> | ||
6 | #include <asm/byteorder.h> | 7 | #include <asm/byteorder.h> |
7 | 8 | ||
8 | typedef unsigned short dn_address; | 9 | typedef unsigned short dn_address; |
diff --git a/include/net/icmp.h b/include/net/icmp.h index e5ef0d15fb45..6cdebeee5f96 100644 --- a/include/net/icmp.h +++ b/include/net/icmp.h | |||
@@ -57,4 +57,11 @@ static inline struct raw_sock *raw_sk(const struct sock *sk) | |||
57 | return (struct raw_sock *)sk; | 57 | return (struct raw_sock *)sk; |
58 | } | 58 | } |
59 | 59 | ||
60 | extern int sysctl_icmp_echo_ignore_all; | ||
61 | extern int sysctl_icmp_echo_ignore_broadcasts; | ||
62 | extern int sysctl_icmp_ignore_bogus_error_responses; | ||
63 | extern int sysctl_icmp_errors_use_inbound_ifaddr; | ||
64 | extern int sysctl_icmp_ratelimit; | ||
65 | extern int sysctl_icmp_ratemask; | ||
66 | |||
60 | #endif /* _ICMP_H */ | 67 | #endif /* _ICMP_H */ |
diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h new file mode 100644 index 000000000000..03df3b157960 --- /dev/null +++ b/include/net/inet6_hashtables.h | |||
@@ -0,0 +1,130 @@ | |||
1 | /* | ||
2 | * INET An implementation of the TCP/IP protocol suite for the LINUX | ||
3 | * operating system. INET is implemented using the BSD Socket | ||
4 | * interface as the means of communication with the user level. | ||
5 | * | ||
6 | * Authors: Lotsa people, from code originally in tcp | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public License | ||
10 | * as published by the Free Software Foundation; either version | ||
11 | * 2 of the License, or (at your option) any later version. | ||
12 | */ | ||
13 | |||
14 | #ifndef _INET6_HASHTABLES_H | ||
15 | #define _INET6_HASHTABLES_H | ||
16 | |||
17 | #include <linux/config.h> | ||
18 | |||
19 | #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) | ||
20 | #include <linux/in6.h> | ||
21 | #include <linux/ipv6.h> | ||
22 | #include <linux/types.h> | ||
23 | |||
24 | #include <net/ipv6.h> | ||
25 | |||
26 | struct inet_hashinfo; | ||
27 | |||
28 | /* I have no idea if this is a good hash for v6 or not. -DaveM */ | ||
29 | static inline int inet6_ehashfn(const struct in6_addr *laddr, const u16 lport, | ||
30 | const struct in6_addr *faddr, const u16 fport, | ||
31 | const int ehash_size) | ||
32 | { | ||
33 | int hashent = (lport ^ fport); | ||
34 | |||
35 | hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]); | ||
36 | hashent ^= hashent >> 16; | ||
37 | hashent ^= hashent >> 8; | ||
38 | return (hashent & (ehash_size - 1)); | ||
39 | } | ||
40 | |||
41 | static inline int inet6_sk_ehashfn(const struct sock *sk, const int ehash_size) | ||
42 | { | ||
43 | const struct inet_sock *inet = inet_sk(sk); | ||
44 | const struct ipv6_pinfo *np = inet6_sk(sk); | ||
45 | const struct in6_addr *laddr = &np->rcv_saddr; | ||
46 | const struct in6_addr *faddr = &np->daddr; | ||
47 | const __u16 lport = inet->num; | ||
48 | const __u16 fport = inet->dport; | ||
49 | return inet6_ehashfn(laddr, lport, faddr, fport, ehash_size); | ||
50 | } | ||
51 | |||
52 | /* | ||
53 | * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so | ||
54 | * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM | ||
55 | * | ||
56 | * The sockhash lock must be held as a reader here. | ||
57 | */ | ||
58 | static inline struct sock * | ||
59 | __inet6_lookup_established(struct inet_hashinfo *hashinfo, | ||
60 | const struct in6_addr *saddr, | ||
61 | const u16 sport, | ||
62 | const struct in6_addr *daddr, | ||
63 | const u16 hnum, | ||
64 | const int dif) | ||
65 | { | ||
66 | struct sock *sk; | ||
67 | const struct hlist_node *node; | ||
68 | const __u32 ports = INET_COMBINED_PORTS(sport, hnum); | ||
69 | /* Optimize here for direct hit, only listening connections can | ||
70 | * have wildcards anyways. | ||
71 | */ | ||
72 | const int hash = inet6_ehashfn(daddr, hnum, saddr, sport, | ||
73 | hashinfo->ehash_size); | ||
74 | struct inet_ehash_bucket *head = &hashinfo->ehash[hash]; | ||
75 | |||
76 | read_lock(&head->lock); | ||
77 | sk_for_each(sk, node, &head->chain) { | ||
78 | /* For IPV6 do the cheaper port and family tests first. */ | ||
79 | if (INET6_MATCH(sk, saddr, daddr, ports, dif)) | ||
80 | goto hit; /* You sunk my battleship! */ | ||
81 | } | ||
82 | /* Must check for a TIME_WAIT'er before going to listener hash. */ | ||
83 | sk_for_each(sk, node, &(head + hashinfo->ehash_size)->chain) { | ||
84 | const struct inet_timewait_sock *tw = inet_twsk(sk); | ||
85 | |||
86 | if(*((__u32 *)&(tw->tw_dport)) == ports && | ||
87 | sk->sk_family == PF_INET6) { | ||
88 | const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk); | ||
89 | |||
90 | if (ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) && | ||
91 | ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr) && | ||
92 | (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif)) | ||
93 | goto hit; | ||
94 | } | ||
95 | } | ||
96 | read_unlock(&head->lock); | ||
97 | return NULL; | ||
98 | |||
99 | hit: | ||
100 | sock_hold(sk); | ||
101 | read_unlock(&head->lock); | ||
102 | return sk; | ||
103 | } | ||
104 | |||
105 | extern struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo, | ||
106 | const struct in6_addr *daddr, | ||
107 | const unsigned short hnum, | ||
108 | const int dif); | ||
109 | |||
110 | static inline struct sock *__inet6_lookup(struct inet_hashinfo *hashinfo, | ||
111 | const struct in6_addr *saddr, | ||
112 | const u16 sport, | ||
113 | const struct in6_addr *daddr, | ||
114 | const u16 hnum, | ||
115 | const int dif) | ||
116 | { | ||
117 | struct sock *sk = __inet6_lookup_established(hashinfo, saddr, sport, | ||
118 | daddr, hnum, dif); | ||
119 | if (sk) | ||
120 | return sk; | ||
121 | |||
122 | return inet6_lookup_listener(hashinfo, daddr, hnum, dif); | ||
123 | } | ||
124 | |||
125 | extern struct sock *inet6_lookup(struct inet_hashinfo *hashinfo, | ||
126 | const struct in6_addr *saddr, const u16 sport, | ||
127 | const struct in6_addr *daddr, const u16 dport, | ||
128 | const int dif); | ||
129 | #endif /* defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) */ | ||
130 | #endif /* _INET6_HASHTABLES_H */ | ||
diff --git a/include/net/inet_common.h b/include/net/inet_common.h index fbc1f4d140d8..f943306ce5ff 100644 --- a/include/net/inet_common.h +++ b/include/net/inet_common.h | |||
@@ -8,6 +8,11 @@ extern struct proto_ops inet_dgram_ops; | |||
8 | * INET4 prototypes used by INET6 | 8 | * INET4 prototypes used by INET6 |
9 | */ | 9 | */ |
10 | 10 | ||
11 | struct msghdr; | ||
12 | struct sock; | ||
13 | struct sockaddr; | ||
14 | struct socket; | ||
15 | |||
11 | extern void inet_remove_sock(struct sock *sk1); | 16 | extern void inet_remove_sock(struct sock *sk1); |
12 | extern void inet_put_sock(unsigned short num, | 17 | extern void inet_put_sock(unsigned short num, |
13 | struct sock *sk); | 18 | struct sock *sk); |
@@ -29,7 +34,6 @@ extern unsigned int inet_poll(struct file * file, struct socket *sock, struct p | |||
29 | extern int inet_listen(struct socket *sock, int backlog); | 34 | extern int inet_listen(struct socket *sock, int backlog); |
30 | 35 | ||
31 | extern void inet_sock_destruct(struct sock *sk); | 36 | extern void inet_sock_destruct(struct sock *sk); |
32 | extern atomic_t inet_sock_nr; | ||
33 | 37 | ||
34 | extern int inet_bind(struct socket *sock, | 38 | extern int inet_bind(struct socket *sock, |
35 | struct sockaddr *uaddr, int addr_len); | 39 | struct sockaddr *uaddr, int addr_len); |
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h new file mode 100644 index 000000000000..651f824c1008 --- /dev/null +++ b/include/net/inet_connection_sock.h | |||
@@ -0,0 +1,276 @@ | |||
1 | /* | ||
2 | * NET Generic infrastructure for INET connection oriented protocols. | ||
3 | * | ||
4 | * Definitions for inet_connection_sock | ||
5 | * | ||
6 | * Authors: Many people, see the TCP sources | ||
7 | * | ||
8 | * From code originally in TCP | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public License | ||
12 | * as published by the Free Software Foundation; either version | ||
13 | * 2 of the License, or (at your option) any later version. | ||
14 | */ | ||
15 | #ifndef _INET_CONNECTION_SOCK_H | ||
16 | #define _INET_CONNECTION_SOCK_H | ||
17 | |||
18 | #include <linux/ip.h> | ||
19 | #include <linux/string.h> | ||
20 | #include <linux/timer.h> | ||
21 | #include <net/request_sock.h> | ||
22 | |||
23 | #define INET_CSK_DEBUG 1 | ||
24 | |||
25 | /* Cancel timers, when they are not required. */ | ||
26 | #undef INET_CSK_CLEAR_TIMERS | ||
27 | |||
28 | struct inet_bind_bucket; | ||
29 | struct inet_hashinfo; | ||
30 | struct tcp_congestion_ops; | ||
31 | |||
32 | /** inet_connection_sock - INET connection oriented sock | ||
33 | * | ||
34 | * @icsk_accept_queue: FIFO of established children | ||
35 | * @icsk_bind_hash: Bind node | ||
36 | * @icsk_timeout: Timeout | ||
37 | * @icsk_retransmit_timer: Resend (no ack) | ||
38 | * @icsk_rto: Retransmit timeout | ||
39 | * @icsk_ca_ops Pluggable congestion control hook | ||
40 | * @icsk_ca_state: Congestion control state | ||
41 | * @icsk_retransmits: Number of unrecovered [RTO] timeouts | ||
42 | * @icsk_pending: Scheduled timer event | ||
43 | * @icsk_backoff: Backoff | ||
44 | * @icsk_syn_retries: Number of allowed SYN (or equivalent) retries | ||
45 | * @icsk_probes_out: unanswered 0 window probes | ||
46 | * @icsk_ack: Delayed ACK control data | ||
47 | */ | ||
48 | struct inet_connection_sock { | ||
49 | /* inet_sock has to be the first member! */ | ||
50 | struct inet_sock icsk_inet; | ||
51 | struct request_sock_queue icsk_accept_queue; | ||
52 | struct inet_bind_bucket *icsk_bind_hash; | ||
53 | unsigned long icsk_timeout; | ||
54 | struct timer_list icsk_retransmit_timer; | ||
55 | struct timer_list icsk_delack_timer; | ||
56 | __u32 icsk_rto; | ||
57 | struct tcp_congestion_ops *icsk_ca_ops; | ||
58 | __u8 icsk_ca_state; | ||
59 | __u8 icsk_retransmits; | ||
60 | __u8 icsk_pending; | ||
61 | __u8 icsk_backoff; | ||
62 | __u8 icsk_syn_retries; | ||
63 | __u8 icsk_probes_out; | ||
64 | /* 2 BYTES HOLE, TRY TO PACK! */ | ||
65 | struct { | ||
66 | __u8 pending; /* ACK is pending */ | ||
67 | __u8 quick; /* Scheduled number of quick acks */ | ||
68 | __u8 pingpong; /* The session is interactive */ | ||
69 | __u8 blocked; /* Delayed ACK was blocked by socket lock */ | ||
70 | __u32 ato; /* Predicted tick of soft clock */ | ||
71 | unsigned long timeout; /* Currently scheduled timeout */ | ||
72 | __u32 lrcvtime; /* timestamp of last received data packet */ | ||
73 | __u16 last_seg_size; /* Size of last incoming segment */ | ||
74 | __u16 rcv_mss; /* MSS used for delayed ACK decisions */ | ||
75 | } icsk_ack; | ||
76 | u32 icsk_ca_priv[16]; | ||
77 | #define ICSK_CA_PRIV_SIZE (16 * sizeof(u32)) | ||
78 | }; | ||
79 | |||
80 | #define ICSK_TIME_RETRANS 1 /* Retransmit timer */ | ||
81 | #define ICSK_TIME_DACK 2 /* Delayed ack timer */ | ||
82 | #define ICSK_TIME_PROBE0 3 /* Zero window probe timer */ | ||
83 | #define ICSK_TIME_KEEPOPEN 4 /* Keepalive timer */ | ||
84 | |||
85 | static inline struct inet_connection_sock *inet_csk(const struct sock *sk) | ||
86 | { | ||
87 | return (struct inet_connection_sock *)sk; | ||
88 | } | ||
89 | |||
90 | static inline void *inet_csk_ca(const struct sock *sk) | ||
91 | { | ||
92 | return (void *)inet_csk(sk)->icsk_ca_priv; | ||
93 | } | ||
94 | |||
95 | extern struct sock *inet_csk_clone(struct sock *sk, | ||
96 | const struct request_sock *req, | ||
97 | const unsigned int __nocast priority); | ||
98 | |||
99 | enum inet_csk_ack_state_t { | ||
100 | ICSK_ACK_SCHED = 1, | ||
101 | ICSK_ACK_TIMER = 2, | ||
102 | ICSK_ACK_PUSHED = 4 | ||
103 | }; | ||
104 | |||
105 | extern void inet_csk_init_xmit_timers(struct sock *sk, | ||
106 | void (*retransmit_handler)(unsigned long), | ||
107 | void (*delack_handler)(unsigned long), | ||
108 | void (*keepalive_handler)(unsigned long)); | ||
109 | extern void inet_csk_clear_xmit_timers(struct sock *sk); | ||
110 | |||
111 | static inline void inet_csk_schedule_ack(struct sock *sk) | ||
112 | { | ||
113 | inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_SCHED; | ||
114 | } | ||
115 | |||
116 | static inline int inet_csk_ack_scheduled(const struct sock *sk) | ||
117 | { | ||
118 | return inet_csk(sk)->icsk_ack.pending & ICSK_ACK_SCHED; | ||
119 | } | ||
120 | |||
121 | static inline void inet_csk_delack_init(struct sock *sk) | ||
122 | { | ||
123 | memset(&inet_csk(sk)->icsk_ack, 0, sizeof(inet_csk(sk)->icsk_ack)); | ||
124 | } | ||
125 | |||
126 | extern void inet_csk_delete_keepalive_timer(struct sock *sk); | ||
127 | extern void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long timeout); | ||
128 | |||
129 | #ifdef INET_CSK_DEBUG | ||
130 | extern const char inet_csk_timer_bug_msg[]; | ||
131 | #endif | ||
132 | |||
133 | static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what) | ||
134 | { | ||
135 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
136 | |||
137 | if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) { | ||
138 | icsk->icsk_pending = 0; | ||
139 | #ifdef INET_CSK_CLEAR_TIMERS | ||
140 | sk_stop_timer(sk, &icsk->icsk_retransmit_timer); | ||
141 | #endif | ||
142 | } else if (what == ICSK_TIME_DACK) { | ||
143 | icsk->icsk_ack.blocked = icsk->icsk_ack.pending = 0; | ||
144 | #ifdef INET_CSK_CLEAR_TIMERS | ||
145 | sk_stop_timer(sk, &icsk->icsk_delack_timer); | ||
146 | #endif | ||
147 | } | ||
148 | #ifdef INET_CSK_DEBUG | ||
149 | else { | ||
150 | pr_debug("%s", inet_csk_timer_bug_msg); | ||
151 | } | ||
152 | #endif | ||
153 | } | ||
154 | |||
155 | /* | ||
156 | * Reset the retransmission timer | ||
157 | */ | ||
158 | static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what, | ||
159 | unsigned long when, | ||
160 | const unsigned long max_when) | ||
161 | { | ||
162 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
163 | |||
164 | if (when > max_when) { | ||
165 | #ifdef INET_CSK_DEBUG | ||
166 | pr_debug("reset_xmit_timer: sk=%p %d when=0x%lx, caller=%p\n", | ||
167 | sk, what, when, current_text_addr()); | ||
168 | #endif | ||
169 | when = max_when; | ||
170 | } | ||
171 | |||
172 | if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) { | ||
173 | icsk->icsk_pending = what; | ||
174 | icsk->icsk_timeout = jiffies + when; | ||
175 | sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout); | ||
176 | } else if (what == ICSK_TIME_DACK) { | ||
177 | icsk->icsk_ack.pending |= ICSK_ACK_TIMER; | ||
178 | icsk->icsk_ack.timeout = jiffies + when; | ||
179 | sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout); | ||
180 | } | ||
181 | #ifdef INET_CSK_DEBUG | ||
182 | else { | ||
183 | pr_debug("%s", inet_csk_timer_bug_msg); | ||
184 | } | ||
185 | #endif | ||
186 | } | ||
187 | |||
188 | extern struct sock *inet_csk_accept(struct sock *sk, int flags, int *err); | ||
189 | |||
190 | extern struct request_sock *inet_csk_search_req(const struct sock *sk, | ||
191 | struct request_sock ***prevp, | ||
192 | const __u16 rport, | ||
193 | const __u32 raddr, | ||
194 | const __u32 laddr); | ||
195 | extern int inet_csk_get_port(struct inet_hashinfo *hashinfo, | ||
196 | struct sock *sk, unsigned short snum); | ||
197 | |||
198 | extern struct dst_entry* inet_csk_route_req(struct sock *sk, | ||
199 | const struct request_sock *req); | ||
200 | |||
201 | static inline void inet_csk_reqsk_queue_add(struct sock *sk, | ||
202 | struct request_sock *req, | ||
203 | struct sock *child) | ||
204 | { | ||
205 | reqsk_queue_add(&inet_csk(sk)->icsk_accept_queue, req, sk, child); | ||
206 | } | ||
207 | |||
208 | extern void inet_csk_reqsk_queue_hash_add(struct sock *sk, | ||
209 | struct request_sock *req, | ||
210 | const unsigned timeout); | ||
211 | |||
212 | static inline void inet_csk_reqsk_queue_removed(struct sock *sk, | ||
213 | struct request_sock *req) | ||
214 | { | ||
215 | if (reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req) == 0) | ||
216 | inet_csk_delete_keepalive_timer(sk); | ||
217 | } | ||
218 | |||
219 | static inline void inet_csk_reqsk_queue_added(struct sock *sk, | ||
220 | const unsigned long timeout) | ||
221 | { | ||
222 | if (reqsk_queue_added(&inet_csk(sk)->icsk_accept_queue) == 0) | ||
223 | inet_csk_reset_keepalive_timer(sk, timeout); | ||
224 | } | ||
225 | |||
226 | static inline int inet_csk_reqsk_queue_len(const struct sock *sk) | ||
227 | { | ||
228 | return reqsk_queue_len(&inet_csk(sk)->icsk_accept_queue); | ||
229 | } | ||
230 | |||
231 | static inline int inet_csk_reqsk_queue_young(const struct sock *sk) | ||
232 | { | ||
233 | return reqsk_queue_len_young(&inet_csk(sk)->icsk_accept_queue); | ||
234 | } | ||
235 | |||
236 | static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk) | ||
237 | { | ||
238 | return reqsk_queue_is_full(&inet_csk(sk)->icsk_accept_queue); | ||
239 | } | ||
240 | |||
241 | static inline void inet_csk_reqsk_queue_unlink(struct sock *sk, | ||
242 | struct request_sock *req, | ||
243 | struct request_sock **prev) | ||
244 | { | ||
245 | reqsk_queue_unlink(&inet_csk(sk)->icsk_accept_queue, req, prev); | ||
246 | } | ||
247 | |||
248 | static inline void inet_csk_reqsk_queue_drop(struct sock *sk, | ||
249 | struct request_sock *req, | ||
250 | struct request_sock **prev) | ||
251 | { | ||
252 | inet_csk_reqsk_queue_unlink(sk, req, prev); | ||
253 | inet_csk_reqsk_queue_removed(sk, req); | ||
254 | reqsk_free(req); | ||
255 | } | ||
256 | |||
257 | extern void inet_csk_reqsk_queue_prune(struct sock *parent, | ||
258 | const unsigned long interval, | ||
259 | const unsigned long timeout, | ||
260 | const unsigned long max_rto); | ||
261 | |||
262 | extern void inet_csk_destroy_sock(struct sock *sk); | ||
263 | |||
264 | /* | ||
265 | * LISTEN is a special case for poll.. | ||
266 | */ | ||
267 | static inline unsigned int inet_csk_listen_poll(const struct sock *sk) | ||
268 | { | ||
269 | return !reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue) ? | ||
270 | (POLLIN | POLLRDNORM) : 0; | ||
271 | } | ||
272 | |||
273 | extern int inet_csk_listen_start(struct sock *sk, const int nr_table_entries); | ||
274 | extern void inet_csk_listen_stop(struct sock *sk); | ||
275 | |||
276 | #endif /* _INET_CONNECTION_SOCK_H */ | ||
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h new file mode 100644 index 000000000000..646b6ea7fe26 --- /dev/null +++ b/include/net/inet_hashtables.h | |||
@@ -0,0 +1,427 @@ | |||
1 | /* | ||
2 | * INET An implementation of the TCP/IP protocol suite for the LINUX | ||
3 | * operating system. INET is implemented using the BSD Socket | ||
4 | * interface as the means of communication with the user level. | ||
5 | * | ||
6 | * Authors: Lotsa people, from code originally in tcp | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public License | ||
10 | * as published by the Free Software Foundation; either version | ||
11 | * 2 of the License, or (at your option) any later version. | ||
12 | */ | ||
13 | |||
14 | #ifndef _INET_HASHTABLES_H | ||
15 | #define _INET_HASHTABLES_H | ||
16 | |||
17 | #include <linux/config.h> | ||
18 | |||
19 | #include <linux/interrupt.h> | ||
20 | #include <linux/ipv6.h> | ||
21 | #include <linux/list.h> | ||
22 | #include <linux/slab.h> | ||
23 | #include <linux/socket.h> | ||
24 | #include <linux/spinlock.h> | ||
25 | #include <linux/types.h> | ||
26 | #include <linux/wait.h> | ||
27 | |||
28 | #include <net/inet_connection_sock.h> | ||
29 | #include <net/route.h> | ||
30 | #include <net/sock.h> | ||
31 | #include <net/tcp_states.h> | ||
32 | |||
33 | #include <asm/atomic.h> | ||
34 | #include <asm/byteorder.h> | ||
35 | |||
36 | /* This is for all connections with a full identity, no wildcards. | ||
37 | * New scheme, half the table is for TIME_WAIT, the other half is | ||
38 | * for the rest. I'll experiment with dynamic table growth later. | ||
39 | */ | ||
40 | struct inet_ehash_bucket { | ||
41 | rwlock_t lock; | ||
42 | struct hlist_head chain; | ||
43 | } __attribute__((__aligned__(8))); | ||
44 | |||
45 | /* There are a few simple rules, which allow for local port reuse by | ||
46 | * an application. In essence: | ||
47 | * | ||
48 | * 1) Sockets bound to different interfaces may share a local port. | ||
49 | * Failing that, goto test 2. | ||
50 | * 2) If all sockets have sk->sk_reuse set, and none of them are in | ||
51 | * TCP_LISTEN state, the port may be shared. | ||
52 | * Failing that, goto test 3. | ||
53 | * 3) If all sockets are bound to a specific inet_sk(sk)->rcv_saddr local | ||
54 | * address, and none of them are the same, the port may be | ||
55 | * shared. | ||
56 | * Failing this, the port cannot be shared. | ||
57 | * | ||
58 | * The interesting point, is test #2. This is what an FTP server does | ||
59 | * all day. To optimize this case we use a specific flag bit defined | ||
60 | * below. As we add sockets to a bind bucket list, we perform a | ||
61 | * check of: (newsk->sk_reuse && (newsk->sk_state != TCP_LISTEN)) | ||
62 | * As long as all sockets added to a bind bucket pass this test, | ||
63 | * the flag bit will be set. | ||
64 | * The resulting situation is that tcp_v[46]_verify_bind() can just check | ||
65 | * for this flag bit, if it is set and the socket trying to bind has | ||
66 | * sk->sk_reuse set, we don't even have to walk the owners list at all, | ||
67 | * we return that it is ok to bind this socket to the requested local port. | ||
68 | * | ||
69 | * Sounds like a lot of work, but it is worth it. In a more naive | ||
70 | * implementation (ie. current FreeBSD etc.) the entire list of ports | ||
71 | * must be walked for each data port opened by an ftp server. Needless | ||
72 | * to say, this does not scale at all. With a couple thousand FTP | ||
73 | * users logged onto your box, isn't it nice to know that new data | ||
74 | * ports are created in O(1) time? I thought so. ;-) -DaveM | ||
75 | */ | ||
76 | struct inet_bind_bucket { | ||
77 | unsigned short port; | ||
78 | signed short fastreuse; | ||
79 | struct hlist_node node; | ||
80 | struct hlist_head owners; | ||
81 | }; | ||
82 | |||
83 | #define inet_bind_bucket_for_each(tb, node, head) \ | ||
84 | hlist_for_each_entry(tb, node, head, node) | ||
85 | |||
86 | struct inet_bind_hashbucket { | ||
87 | spinlock_t lock; | ||
88 | struct hlist_head chain; | ||
89 | }; | ||
90 | |||
91 | /* This is for listening sockets, thus all sockets which possess wildcards. */ | ||
92 | #define INET_LHTABLE_SIZE 32 /* Yes, really, this is all you need. */ | ||
93 | |||
94 | struct inet_hashinfo { | ||
95 | /* This is for sockets with full identity only. Sockets here will | ||
96 | * always be without wildcards and will have the following invariant: | ||
97 | * | ||
98 | * TCP_ESTABLISHED <= sk->sk_state < TCP_CLOSE | ||
99 | * | ||
100 | * First half of the table is for sockets not in TIME_WAIT, second half | ||
101 | * is for TIME_WAIT sockets only. | ||
102 | */ | ||
103 | struct inet_ehash_bucket *ehash; | ||
104 | |||
105 | /* Ok, let's try this, I give up, we do need a local binding | ||
106 | * TCP hash as well as the others for fast bind/connect. | ||
107 | */ | ||
108 | struct inet_bind_hashbucket *bhash; | ||
109 | |||
110 | int bhash_size; | ||
111 | int ehash_size; | ||
112 | |||
113 | /* All sockets in TCP_LISTEN state will be in here. This is the only | ||
114 | * table where wildcard'd TCP sockets can exist. Hash function here | ||
115 | * is just local port number. | ||
116 | */ | ||
117 | struct hlist_head listening_hash[INET_LHTABLE_SIZE]; | ||
118 | |||
119 | /* All the above members are written once at bootup and | ||
120 | * never written again _or_ are predominantly read-access. | ||
121 | * | ||
122 | * Now align to a new cache line as all the following members | ||
123 | * are often dirty. | ||
124 | */ | ||
125 | rwlock_t lhash_lock ____cacheline_aligned; | ||
126 | atomic_t lhash_users; | ||
127 | wait_queue_head_t lhash_wait; | ||
128 | spinlock_t portalloc_lock; | ||
129 | kmem_cache_t *bind_bucket_cachep; | ||
130 | int port_rover; | ||
131 | }; | ||
132 | |||
133 | static inline int inet_ehashfn(const __u32 laddr, const __u16 lport, | ||
134 | const __u32 faddr, const __u16 fport, | ||
135 | const int ehash_size) | ||
136 | { | ||
137 | int h = (laddr ^ lport) ^ (faddr ^ fport); | ||
138 | h ^= h >> 16; | ||
139 | h ^= h >> 8; | ||
140 | return h & (ehash_size - 1); | ||
141 | } | ||
142 | |||
143 | static inline int inet_sk_ehashfn(const struct sock *sk, const int ehash_size) | ||
144 | { | ||
145 | const struct inet_sock *inet = inet_sk(sk); | ||
146 | const __u32 laddr = inet->rcv_saddr; | ||
147 | const __u16 lport = inet->num; | ||
148 | const __u32 faddr = inet->daddr; | ||
149 | const __u16 fport = inet->dport; | ||
150 | |||
151 | return inet_ehashfn(laddr, lport, faddr, fport, ehash_size); | ||
152 | } | ||
153 | |||
154 | extern struct inet_bind_bucket * | ||
155 | inet_bind_bucket_create(kmem_cache_t *cachep, | ||
156 | struct inet_bind_hashbucket *head, | ||
157 | const unsigned short snum); | ||
158 | extern void inet_bind_bucket_destroy(kmem_cache_t *cachep, | ||
159 | struct inet_bind_bucket *tb); | ||
160 | |||
161 | static inline int inet_bhashfn(const __u16 lport, const int bhash_size) | ||
162 | { | ||
163 | return lport & (bhash_size - 1); | ||
164 | } | ||
165 | |||
166 | extern void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, | ||
167 | const unsigned short snum); | ||
168 | |||
169 | /* These can have wildcards, don't try too hard. */ | ||
170 | static inline int inet_lhashfn(const unsigned short num) | ||
171 | { | ||
172 | return num & (INET_LHTABLE_SIZE - 1); | ||
173 | } | ||
174 | |||
175 | static inline int inet_sk_listen_hashfn(const struct sock *sk) | ||
176 | { | ||
177 | return inet_lhashfn(inet_sk(sk)->num); | ||
178 | } | ||
179 | |||
180 | /* Caller must disable local BH processing. */ | ||
181 | static inline void __inet_inherit_port(struct inet_hashinfo *table, | ||
182 | struct sock *sk, struct sock *child) | ||
183 | { | ||
184 | const int bhash = inet_bhashfn(inet_sk(child)->num, table->bhash_size); | ||
185 | struct inet_bind_hashbucket *head = &table->bhash[bhash]; | ||
186 | struct inet_bind_bucket *tb; | ||
187 | |||
188 | spin_lock(&head->lock); | ||
189 | tb = inet_csk(sk)->icsk_bind_hash; | ||
190 | sk_add_bind_node(child, &tb->owners); | ||
191 | inet_csk(child)->icsk_bind_hash = tb; | ||
192 | spin_unlock(&head->lock); | ||
193 | } | ||
194 | |||
195 | static inline void inet_inherit_port(struct inet_hashinfo *table, | ||
196 | struct sock *sk, struct sock *child) | ||
197 | { | ||
198 | local_bh_disable(); | ||
199 | __inet_inherit_port(table, sk, child); | ||
200 | local_bh_enable(); | ||
201 | } | ||
202 | |||
203 | extern void inet_put_port(struct inet_hashinfo *table, struct sock *sk); | ||
204 | |||
205 | extern void inet_listen_wlock(struct inet_hashinfo *hashinfo); | ||
206 | |||
207 | /* | ||
208 | * - We may sleep inside this lock. | ||
209 | * - If sleeping is not required (or called from BH), | ||
210 | * use plain read_(un)lock(&inet_hashinfo.lhash_lock). | ||
211 | */ | ||
212 | static inline void inet_listen_lock(struct inet_hashinfo *hashinfo) | ||
213 | { | ||
214 | /* read_lock synchronizes to candidates to writers */ | ||
215 | read_lock(&hashinfo->lhash_lock); | ||
216 | atomic_inc(&hashinfo->lhash_users); | ||
217 | read_unlock(&hashinfo->lhash_lock); | ||
218 | } | ||
219 | |||
220 | static inline void inet_listen_unlock(struct inet_hashinfo *hashinfo) | ||
221 | { | ||
222 | if (atomic_dec_and_test(&hashinfo->lhash_users)) | ||
223 | wake_up(&hashinfo->lhash_wait); | ||
224 | } | ||
225 | |||
226 | static inline void __inet_hash(struct inet_hashinfo *hashinfo, | ||
227 | struct sock *sk, const int listen_possible) | ||
228 | { | ||
229 | struct hlist_head *list; | ||
230 | rwlock_t *lock; | ||
231 | |||
232 | BUG_TRAP(sk_unhashed(sk)); | ||
233 | if (listen_possible && sk->sk_state == TCP_LISTEN) { | ||
234 | list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; | ||
235 | lock = &hashinfo->lhash_lock; | ||
236 | inet_listen_wlock(hashinfo); | ||
237 | } else { | ||
238 | sk->sk_hashent = inet_sk_ehashfn(sk, hashinfo->ehash_size); | ||
239 | list = &hashinfo->ehash[sk->sk_hashent].chain; | ||
240 | lock = &hashinfo->ehash[sk->sk_hashent].lock; | ||
241 | write_lock(lock); | ||
242 | } | ||
243 | __sk_add_node(sk, list); | ||
244 | sock_prot_inc_use(sk->sk_prot); | ||
245 | write_unlock(lock); | ||
246 | if (listen_possible && sk->sk_state == TCP_LISTEN) | ||
247 | wake_up(&hashinfo->lhash_wait); | ||
248 | } | ||
249 | |||
250 | static inline void inet_hash(struct inet_hashinfo *hashinfo, struct sock *sk) | ||
251 | { | ||
252 | if (sk->sk_state != TCP_CLOSE) { | ||
253 | local_bh_disable(); | ||
254 | __inet_hash(hashinfo, sk, 1); | ||
255 | local_bh_enable(); | ||
256 | } | ||
257 | } | ||
258 | |||
259 | static inline void inet_unhash(struct inet_hashinfo *hashinfo, struct sock *sk) | ||
260 | { | ||
261 | rwlock_t *lock; | ||
262 | |||
263 | if (sk_unhashed(sk)) | ||
264 | goto out; | ||
265 | |||
266 | if (sk->sk_state == TCP_LISTEN) { | ||
267 | local_bh_disable(); | ||
268 | inet_listen_wlock(hashinfo); | ||
269 | lock = &hashinfo->lhash_lock; | ||
270 | } else { | ||
271 | struct inet_ehash_bucket *head = &hashinfo->ehash[sk->sk_hashent]; | ||
272 | lock = &head->lock; | ||
273 | write_lock_bh(&head->lock); | ||
274 | } | ||
275 | |||
276 | if (__sk_del_node_init(sk)) | ||
277 | sock_prot_dec_use(sk->sk_prot); | ||
278 | write_unlock_bh(lock); | ||
279 | out: | ||
280 | if (sk->sk_state == TCP_LISTEN) | ||
281 | wake_up(&hashinfo->lhash_wait); | ||
282 | } | ||
283 | |||
284 | static inline int inet_iif(const struct sk_buff *skb) | ||
285 | { | ||
286 | return ((struct rtable *)skb->dst)->rt_iif; | ||
287 | } | ||
288 | |||
289 | extern struct sock *__inet_lookup_listener(const struct hlist_head *head, | ||
290 | const u32 daddr, | ||
291 | const unsigned short hnum, | ||
292 | const int dif); | ||
293 | |||
294 | /* Optimize the common listener case. */ | ||
295 | static inline struct sock * | ||
296 | inet_lookup_listener(struct inet_hashinfo *hashinfo, | ||
297 | const u32 daddr, | ||
298 | const unsigned short hnum, const int dif) | ||
299 | { | ||
300 | struct sock *sk = NULL; | ||
301 | const struct hlist_head *head; | ||
302 | |||
303 | read_lock(&hashinfo->lhash_lock); | ||
304 | head = &hashinfo->listening_hash[inet_lhashfn(hnum)]; | ||
305 | if (!hlist_empty(head)) { | ||
306 | const struct inet_sock *inet = inet_sk((sk = __sk_head(head))); | ||
307 | |||
308 | if (inet->num == hnum && !sk->sk_node.next && | ||
309 | (!inet->rcv_saddr || inet->rcv_saddr == daddr) && | ||
310 | (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) && | ||
311 | !sk->sk_bound_dev_if) | ||
312 | goto sherry_cache; | ||
313 | sk = __inet_lookup_listener(head, daddr, hnum, dif); | ||
314 | } | ||
315 | if (sk) { | ||
316 | sherry_cache: | ||
317 | sock_hold(sk); | ||
318 | } | ||
319 | read_unlock(&hashinfo->lhash_lock); | ||
320 | return sk; | ||
321 | } | ||
322 | |||
323 | /* Socket demux engine toys. */ | ||
324 | #ifdef __BIG_ENDIAN | ||
325 | #define INET_COMBINED_PORTS(__sport, __dport) \ | ||
326 | (((__u32)(__sport) << 16) | (__u32)(__dport)) | ||
327 | #else /* __LITTLE_ENDIAN */ | ||
328 | #define INET_COMBINED_PORTS(__sport, __dport) \ | ||
329 | (((__u32)(__dport) << 16) | (__u32)(__sport)) | ||
330 | #endif | ||
331 | |||
332 | #if (BITS_PER_LONG == 64) | ||
333 | #ifdef __BIG_ENDIAN | ||
334 | #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \ | ||
335 | const __u64 __name = (((__u64)(__saddr)) << 32) | ((__u64)(__daddr)); | ||
336 | #else /* __LITTLE_ENDIAN */ | ||
337 | #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \ | ||
338 | const __u64 __name = (((__u64)(__daddr)) << 32) | ((__u64)(__saddr)); | ||
339 | #endif /* __BIG_ENDIAN */ | ||
340 | #define INET_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ | ||
341 | (((*((__u64 *)&(inet_sk(__sk)->daddr))) == (__cookie)) && \ | ||
342 | ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \ | ||
343 | (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) | ||
344 | #define INET_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ | ||
345 | (((*((__u64 *)&(inet_twsk(__sk)->tw_daddr))) == (__cookie)) && \ | ||
346 | ((*((__u32 *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ | ||
347 | (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) | ||
348 | #else /* 32-bit arch */ | ||
349 | #define INET_ADDR_COOKIE(__name, __saddr, __daddr) | ||
350 | #define INET_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif) \ | ||
351 | ((inet_sk(__sk)->daddr == (__saddr)) && \ | ||
352 | (inet_sk(__sk)->rcv_saddr == (__daddr)) && \ | ||
353 | ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \ | ||
354 | (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) | ||
355 | #define INET_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif) \ | ||
356 | ((inet_twsk(__sk)->tw_daddr == (__saddr)) && \ | ||
357 | (inet_twsk(__sk)->tw_rcv_saddr == (__daddr)) && \ | ||
358 | ((*((__u32 *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ | ||
359 | (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) | ||
360 | #endif /* 64-bit arch */ | ||
361 | |||
362 | /* | ||
363 | * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need | ||
364 | * not check it for lookups anymore, thanks Alexey. -DaveM | ||
365 | * | ||
366 | * Local BH must be disabled here. | ||
367 | */ | ||
368 | static inline struct sock * | ||
369 | __inet_lookup_established(struct inet_hashinfo *hashinfo, | ||
370 | const u32 saddr, const u16 sport, | ||
371 | const u32 daddr, const u16 hnum, | ||
372 | const int dif) | ||
373 | { | ||
374 | INET_ADDR_COOKIE(acookie, saddr, daddr) | ||
375 | const __u32 ports = INET_COMBINED_PORTS(sport, hnum); | ||
376 | struct sock *sk; | ||
377 | const struct hlist_node *node; | ||
378 | /* Optimize here for direct hit, only listening connections can | ||
379 | * have wildcards anyways. | ||
380 | */ | ||
381 | const int hash = inet_ehashfn(daddr, hnum, saddr, sport, hashinfo->ehash_size); | ||
382 | struct inet_ehash_bucket *head = &hashinfo->ehash[hash]; | ||
383 | |||
384 | read_lock(&head->lock); | ||
385 | sk_for_each(sk, node, &head->chain) { | ||
386 | if (INET_MATCH(sk, acookie, saddr, daddr, ports, dif)) | ||
387 | goto hit; /* You sunk my battleship! */ | ||
388 | } | ||
389 | |||
390 | /* Must check for a TIME_WAIT'er before going to listener hash. */ | ||
391 | sk_for_each(sk, node, &(head + hashinfo->ehash_size)->chain) { | ||
392 | if (INET_TW_MATCH(sk, acookie, saddr, daddr, ports, dif)) | ||
393 | goto hit; | ||
394 | } | ||
395 | sk = NULL; | ||
396 | out: | ||
397 | read_unlock(&head->lock); | ||
398 | return sk; | ||
399 | hit: | ||
400 | sock_hold(sk); | ||
401 | goto out; | ||
402 | } | ||
403 | |||
404 | static inline struct sock *__inet_lookup(struct inet_hashinfo *hashinfo, | ||
405 | const u32 saddr, const u16 sport, | ||
406 | const u32 daddr, const u16 hnum, | ||
407 | const int dif) | ||
408 | { | ||
409 | struct sock *sk = __inet_lookup_established(hashinfo, saddr, sport, daddr, | ||
410 | hnum, dif); | ||
411 | return sk ? : inet_lookup_listener(hashinfo, daddr, hnum, dif); | ||
412 | } | ||
413 | |||
414 | static inline struct sock *inet_lookup(struct inet_hashinfo *hashinfo, | ||
415 | const u32 saddr, const u16 sport, | ||
416 | const u32 daddr, const u16 dport, | ||
417 | const int dif) | ||
418 | { | ||
419 | struct sock *sk; | ||
420 | |||
421 | local_bh_disable(); | ||
422 | sk = __inet_lookup(hashinfo, saddr, sport, daddr, ntohs(dport), dif); | ||
423 | local_bh_enable(); | ||
424 | |||
425 | return sk; | ||
426 | } | ||
427 | #endif /* _INET_HASHTABLES_H */ | ||
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h new file mode 100644 index 000000000000..3b070352e869 --- /dev/null +++ b/include/net/inet_timewait_sock.h | |||
@@ -0,0 +1,219 @@ | |||
1 | /* | ||
2 | * INET An implementation of the TCP/IP protocol suite for the LINUX | ||
3 | * operating system. INET is implemented using the BSD Socket | ||
4 | * interface as the means of communication with the user level. | ||
5 | * | ||
6 | * Definitions for a generic INET TIMEWAIT sock | ||
7 | * | ||
8 | * From code originally in net/tcp.h | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public License | ||
12 | * as published by the Free Software Foundation; either version | ||
13 | * 2 of the License, or (at your option) any later version. | ||
14 | */ | ||
15 | #ifndef _INET_TIMEWAIT_SOCK_ | ||
16 | #define _INET_TIMEWAIT_SOCK_ | ||
17 | |||
18 | #include <linux/config.h> | ||
19 | |||
20 | #include <linux/ip.h> | ||
21 | #include <linux/list.h> | ||
22 | #include <linux/timer.h> | ||
23 | #include <linux/types.h> | ||
24 | #include <linux/workqueue.h> | ||
25 | |||
26 | #include <net/sock.h> | ||
27 | #include <net/tcp_states.h> | ||
28 | |||
29 | #include <asm/atomic.h> | ||
30 | |||
31 | struct inet_hashinfo; | ||
32 | |||
33 | #define INET_TWDR_RECYCLE_SLOTS_LOG 5 | ||
34 | #define INET_TWDR_RECYCLE_SLOTS (1 << INET_TWDR_RECYCLE_SLOTS_LOG) | ||
35 | |||
36 | /* | ||
37 | * If time > 4sec, it is "slow" path, no recycling is required, | ||
38 | * so that we select tick to get range about 4 seconds. | ||
39 | */ | ||
40 | #if HZ <= 16 || HZ > 4096 | ||
41 | # error Unsupported: HZ <= 16 or HZ > 4096 | ||
42 | #elif HZ <= 32 | ||
43 | # define INET_TWDR_RECYCLE_TICK (5 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) | ||
44 | #elif HZ <= 64 | ||
45 | # define INET_TWDR_RECYCLE_TICK (6 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) | ||
46 | #elif HZ <= 128 | ||
47 | # define INET_TWDR_RECYCLE_TICK (7 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) | ||
48 | #elif HZ <= 256 | ||
49 | # define INET_TWDR_RECYCLE_TICK (8 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) | ||
50 | #elif HZ <= 512 | ||
51 | # define INET_TWDR_RECYCLE_TICK (9 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) | ||
52 | #elif HZ <= 1024 | ||
53 | # define INET_TWDR_RECYCLE_TICK (10 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) | ||
54 | #elif HZ <= 2048 | ||
55 | # define INET_TWDR_RECYCLE_TICK (11 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) | ||
56 | #else | ||
57 | # define INET_TWDR_RECYCLE_TICK (12 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) | ||
58 | #endif | ||
59 | |||
60 | /* TIME_WAIT reaping mechanism. */ | ||
61 | #define INET_TWDR_TWKILL_SLOTS 8 /* Please keep this a power of 2. */ | ||
62 | |||
63 | #define INET_TWDR_TWKILL_QUOTA 100 | ||
64 | |||
65 | struct inet_timewait_death_row { | ||
66 | /* Short-time timewait calendar */ | ||
67 | int twcal_hand; | ||
68 | int twcal_jiffie; | ||
69 | struct timer_list twcal_timer; | ||
70 | struct hlist_head twcal_row[INET_TWDR_RECYCLE_SLOTS]; | ||
71 | |||
72 | spinlock_t death_lock; | ||
73 | int tw_count; | ||
74 | int period; | ||
75 | u32 thread_slots; | ||
76 | struct work_struct twkill_work; | ||
77 | struct timer_list tw_timer; | ||
78 | int slot; | ||
79 | struct hlist_head cells[INET_TWDR_TWKILL_SLOTS]; | ||
80 | struct inet_hashinfo *hashinfo; | ||
81 | int sysctl_tw_recycle; | ||
82 | int sysctl_max_tw_buckets; | ||
83 | }; | ||
84 | |||
85 | extern void inet_twdr_hangman(unsigned long data); | ||
86 | extern void inet_twdr_twkill_work(void *data); | ||
87 | extern void inet_twdr_twcal_tick(unsigned long data); | ||
88 | |||
89 | #if (BITS_PER_LONG == 64) | ||
90 | #define INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES 8 | ||
91 | #else | ||
92 | #define INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES 4 | ||
93 | #endif | ||
94 | |||
95 | struct inet_bind_bucket; | ||
96 | |||
97 | /* | ||
98 | * This is a TIME_WAIT sock. It works around the memory consumption | ||
99 | * problems of sockets in such a state on heavily loaded servers, but | ||
100 | * without violating the protocol specification. | ||
101 | */ | ||
102 | struct inet_timewait_sock { | ||
103 | /* | ||
104 | * Now struct sock also uses sock_common, so please just | ||
105 | * don't add nothing before this first member (__tw_common) --acme | ||
106 | */ | ||
107 | struct sock_common __tw_common; | ||
108 | #define tw_family __tw_common.skc_family | ||
109 | #define tw_state __tw_common.skc_state | ||
110 | #define tw_reuse __tw_common.skc_reuse | ||
111 | #define tw_bound_dev_if __tw_common.skc_bound_dev_if | ||
112 | #define tw_node __tw_common.skc_node | ||
113 | #define tw_bind_node __tw_common.skc_bind_node | ||
114 | #define tw_refcnt __tw_common.skc_refcnt | ||
115 | #define tw_prot __tw_common.skc_prot | ||
116 | volatile unsigned char tw_substate; | ||
117 | /* 3 bits hole, try to pack */ | ||
118 | unsigned char tw_rcv_wscale; | ||
119 | /* Socket demultiplex comparisons on incoming packets. */ | ||
120 | /* these five are in inet_sock */ | ||
121 | __u16 tw_sport; | ||
122 | __u32 tw_daddr __attribute__((aligned(INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES))); | ||
123 | __u32 tw_rcv_saddr; | ||
124 | __u16 tw_dport; | ||
125 | __u16 tw_num; | ||
126 | /* And these are ours. */ | ||
127 | __u8 tw_ipv6only:1; | ||
128 | /* 31 bits hole, try to pack */ | ||
129 | int tw_hashent; | ||
130 | int tw_timeout; | ||
131 | unsigned long tw_ttd; | ||
132 | struct inet_bind_bucket *tw_tb; | ||
133 | struct hlist_node tw_death_node; | ||
134 | }; | ||
135 | |||
136 | static inline void inet_twsk_add_node(struct inet_timewait_sock *tw, | ||
137 | struct hlist_head *list) | ||
138 | { | ||
139 | hlist_add_head(&tw->tw_node, list); | ||
140 | } | ||
141 | |||
142 | static inline void inet_twsk_add_bind_node(struct inet_timewait_sock *tw, | ||
143 | struct hlist_head *list) | ||
144 | { | ||
145 | hlist_add_head(&tw->tw_bind_node, list); | ||
146 | } | ||
147 | |||
148 | static inline int inet_twsk_dead_hashed(const struct inet_timewait_sock *tw) | ||
149 | { | ||
150 | return tw->tw_death_node.pprev != NULL; | ||
151 | } | ||
152 | |||
153 | static inline void inet_twsk_dead_node_init(struct inet_timewait_sock *tw) | ||
154 | { | ||
155 | tw->tw_death_node.pprev = NULL; | ||
156 | } | ||
157 | |||
158 | static inline void __inet_twsk_del_dead_node(struct inet_timewait_sock *tw) | ||
159 | { | ||
160 | __hlist_del(&tw->tw_death_node); | ||
161 | inet_twsk_dead_node_init(tw); | ||
162 | } | ||
163 | |||
164 | static inline int inet_twsk_del_dead_node(struct inet_timewait_sock *tw) | ||
165 | { | ||
166 | if (inet_twsk_dead_hashed(tw)) { | ||
167 | __inet_twsk_del_dead_node(tw); | ||
168 | return 1; | ||
169 | } | ||
170 | return 0; | ||
171 | } | ||
172 | |||
173 | #define inet_twsk_for_each(tw, node, head) \ | ||
174 | hlist_for_each_entry(tw, node, head, tw_node) | ||
175 | |||
176 | #define inet_twsk_for_each_inmate(tw, node, jail) \ | ||
177 | hlist_for_each_entry(tw, node, jail, tw_death_node) | ||
178 | |||
179 | #define inet_twsk_for_each_inmate_safe(tw, node, safe, jail) \ | ||
180 | hlist_for_each_entry_safe(tw, node, safe, jail, tw_death_node) | ||
181 | |||
182 | static inline struct inet_timewait_sock *inet_twsk(const struct sock *sk) | ||
183 | { | ||
184 | return (struct inet_timewait_sock *)sk; | ||
185 | } | ||
186 | |||
187 | static inline u32 inet_rcv_saddr(const struct sock *sk) | ||
188 | { | ||
189 | return likely(sk->sk_state != TCP_TIME_WAIT) ? | ||
190 | inet_sk(sk)->rcv_saddr : inet_twsk(sk)->tw_rcv_saddr; | ||
191 | } | ||
192 | |||
193 | static inline void inet_twsk_put(struct inet_timewait_sock *tw) | ||
194 | { | ||
195 | if (atomic_dec_and_test(&tw->tw_refcnt)) { | ||
196 | #ifdef SOCK_REFCNT_DEBUG | ||
197 | printk(KERN_DEBUG "%s timewait_sock %p released\n", | ||
198 | tw->tw_prot->name, tw); | ||
199 | #endif | ||
200 | kmem_cache_free(tw->tw_prot->twsk_slab, tw); | ||
201 | } | ||
202 | } | ||
203 | |||
204 | extern struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, | ||
205 | const int state); | ||
206 | |||
207 | extern void __inet_twsk_kill(struct inet_timewait_sock *tw, | ||
208 | struct inet_hashinfo *hashinfo); | ||
209 | |||
210 | extern void __inet_twsk_hashdance(struct inet_timewait_sock *tw, | ||
211 | struct sock *sk, | ||
212 | struct inet_hashinfo *hashinfo); | ||
213 | |||
214 | extern void inet_twsk_schedule(struct inet_timewait_sock *tw, | ||
215 | struct inet_timewait_death_row *twdr, | ||
216 | const int timeo, const int timewait_len); | ||
217 | extern void inet_twsk_deschedule(struct inet_timewait_sock *tw, | ||
218 | struct inet_timewait_death_row *twdr); | ||
219 | #endif /* _INET_TIMEWAIT_SOCK_ */ | ||
diff --git a/include/net/ip.h b/include/net/ip.h index 32360bbe143f..e4563bbee6ea 100644 --- a/include/net/ip.h +++ b/include/net/ip.h | |||
@@ -86,7 +86,7 @@ extern int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, | |||
86 | u32 saddr, u32 daddr, | 86 | u32 saddr, u32 daddr, |
87 | struct ip_options *opt); | 87 | struct ip_options *opt); |
88 | extern int ip_rcv(struct sk_buff *skb, struct net_device *dev, | 88 | extern int ip_rcv(struct sk_buff *skb, struct net_device *dev, |
89 | struct packet_type *pt); | 89 | struct packet_type *pt, struct net_device *orig_dev); |
90 | extern int ip_local_deliver(struct sk_buff *skb); | 90 | extern int ip_local_deliver(struct sk_buff *skb); |
91 | extern int ip_mr_input(struct sk_buff *skb); | 91 | extern int ip_mr_input(struct sk_buff *skb); |
92 | extern int ip_output(struct sk_buff *skb); | 92 | extern int ip_output(struct sk_buff *skb); |
@@ -140,8 +140,6 @@ struct ip_reply_arg { | |||
140 | void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg, | 140 | void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg, |
141 | unsigned int len); | 141 | unsigned int len); |
142 | 142 | ||
143 | extern int ip_finish_output(struct sk_buff *skb); | ||
144 | |||
145 | struct ipv4_config | 143 | struct ipv4_config |
146 | { | 144 | { |
147 | int log_martians; | 145 | int log_martians; |
@@ -165,6 +163,24 @@ extern int sysctl_local_port_range[2]; | |||
165 | extern int sysctl_ip_default_ttl; | 163 | extern int sysctl_ip_default_ttl; |
166 | extern int sysctl_ip_nonlocal_bind; | 164 | extern int sysctl_ip_nonlocal_bind; |
167 | 165 | ||
166 | /* From ip_fragment.c */ | ||
167 | extern int sysctl_ipfrag_high_thresh; | ||
168 | extern int sysctl_ipfrag_low_thresh; | ||
169 | extern int sysctl_ipfrag_time; | ||
170 | extern int sysctl_ipfrag_secret_interval; | ||
171 | |||
172 | /* From inetpeer.c */ | ||
173 | extern int inet_peer_threshold; | ||
174 | extern int inet_peer_minttl; | ||
175 | extern int inet_peer_maxttl; | ||
176 | extern int inet_peer_gc_mintime; | ||
177 | extern int inet_peer_gc_maxtime; | ||
178 | |||
179 | /* From ip_output.c */ | ||
180 | extern int sysctl_ip_dynaddr; | ||
181 | |||
182 | extern void ipfrag_init(void); | ||
183 | |||
168 | #ifdef CONFIG_INET | 184 | #ifdef CONFIG_INET |
169 | /* The function in 2.2 was invalid, producing wrong result for | 185 | /* The function in 2.2 was invalid, producing wrong result for |
170 | * check=0xFEFF. It was noticed by Arthur Skawina _year_ ago. --ANK(000625) */ | 186 | * check=0xFEFF. It was noticed by Arthur Skawina _year_ ago. --ANK(000625) */ |
@@ -319,7 +335,10 @@ extern void ip_options_build(struct sk_buff *skb, struct ip_options *opt, u32 da | |||
319 | extern int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb); | 335 | extern int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb); |
320 | extern void ip_options_fragment(struct sk_buff *skb); | 336 | extern void ip_options_fragment(struct sk_buff *skb); |
321 | extern int ip_options_compile(struct ip_options *opt, struct sk_buff *skb); | 337 | extern int ip_options_compile(struct ip_options *opt, struct sk_buff *skb); |
322 | extern int ip_options_get(struct ip_options **optp, unsigned char *data, int optlen, int user); | 338 | extern int ip_options_get(struct ip_options **optp, |
339 | unsigned char *data, int optlen); | ||
340 | extern int ip_options_get_from_user(struct ip_options **optp, | ||
341 | unsigned char __user *data, int optlen); | ||
323 | extern void ip_options_undo(struct ip_options * opt); | 342 | extern void ip_options_undo(struct ip_options * opt); |
324 | extern void ip_forward_options(struct sk_buff *skb); | 343 | extern void ip_forward_options(struct sk_buff *skb); |
325 | extern int ip_options_rcv_srr(struct sk_buff *skb); | 344 | extern int ip_options_rcv_srr(struct sk_buff *skb); |
@@ -350,5 +369,10 @@ int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen, | |||
350 | void __user *oldval, size_t __user *oldlenp, | 369 | void __user *oldval, size_t __user *oldlenp, |
351 | void __user *newval, size_t newlen, | 370 | void __user *newval, size_t newlen, |
352 | void **context); | 371 | void **context); |
372 | #ifdef CONFIG_PROC_FS | ||
373 | extern int ip_misc_proc_init(void); | ||
374 | #endif | ||
375 | |||
376 | extern struct ctl_table ipv4_table[]; | ||
353 | 377 | ||
354 | #endif /* _IP_H */ | 378 | #endif /* _IP_H */ |
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index f920706d526b..1f2e428ca364 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h | |||
@@ -12,7 +12,6 @@ | |||
12 | #include <net/flow.h> | 12 | #include <net/flow.h> |
13 | #include <net/ip6_fib.h> | 13 | #include <net/ip6_fib.h> |
14 | #include <net/sock.h> | 14 | #include <net/sock.h> |
15 | #include <linux/tcp.h> | ||
16 | #include <linux/ip.h> | 15 | #include <linux/ip.h> |
17 | #include <linux/ipv6.h> | 16 | #include <linux/ipv6.h> |
18 | 17 | ||
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index a4208a336ac0..14de4ebd1211 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h | |||
@@ -295,4 +295,9 @@ static inline void fib_res_put(struct fib_result *res) | |||
295 | #endif | 295 | #endif |
296 | } | 296 | } |
297 | 297 | ||
298 | #ifdef CONFIG_PROC_FS | ||
299 | extern int fib_proc_init(void); | ||
300 | extern void fib_proc_exit(void); | ||
301 | #endif | ||
302 | |||
298 | #endif /* _NET_FIB_H */ | 303 | #endif /* _NET_FIB_H */ |
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 52da5d26617a..7a3c43711a17 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h | |||
@@ -255,7 +255,6 @@ struct ip_vs_daemon_user { | |||
255 | #include <asm/atomic.h> /* for struct atomic_t */ | 255 | #include <asm/atomic.h> /* for struct atomic_t */ |
256 | #include <linux/netdevice.h> /* for struct neighbour */ | 256 | #include <linux/netdevice.h> /* for struct neighbour */ |
257 | #include <net/dst.h> /* for struct dst_entry */ | 257 | #include <net/dst.h> /* for struct dst_entry */ |
258 | #include <net/tcp.h> | ||
259 | #include <net/udp.h> | 258 | #include <net/udp.h> |
260 | #include <linux/compiler.h> | 259 | #include <linux/compiler.h> |
261 | 260 | ||
diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 69324465e8b3..3203eaff4bd4 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h | |||
@@ -104,6 +104,7 @@ struct frag_hdr { | |||
104 | 104 | ||
105 | #ifdef __KERNEL__ | 105 | #ifdef __KERNEL__ |
106 | 106 | ||
107 | #include <linux/config.h> | ||
107 | #include <net/sock.h> | 108 | #include <net/sock.h> |
108 | 109 | ||
109 | /* sysctls */ | 110 | /* sysctls */ |
@@ -145,7 +146,6 @@ DECLARE_SNMP_STAT(struct udp_mib, udp_stats_in6); | |||
145 | #define UDP6_INC_STATS(field) SNMP_INC_STATS(udp_stats_in6, field) | 146 | #define UDP6_INC_STATS(field) SNMP_INC_STATS(udp_stats_in6, field) |
146 | #define UDP6_INC_STATS_BH(field) SNMP_INC_STATS_BH(udp_stats_in6, field) | 147 | #define UDP6_INC_STATS_BH(field) SNMP_INC_STATS_BH(udp_stats_in6, field) |
147 | #define UDP6_INC_STATS_USER(field) SNMP_INC_STATS_USER(udp_stats_in6, field) | 148 | #define UDP6_INC_STATS_USER(field) SNMP_INC_STATS_USER(udp_stats_in6, field) |
148 | extern atomic_t inet6_sock_nr; | ||
149 | 149 | ||
150 | int snmp6_register_dev(struct inet6_dev *idev); | 150 | int snmp6_register_dev(struct inet6_dev *idev); |
151 | int snmp6_unregister_dev(struct inet6_dev *idev); | 151 | int snmp6_unregister_dev(struct inet6_dev *idev); |
@@ -346,7 +346,8 @@ static inline int ipv6_addr_any(const struct in6_addr *a) | |||
346 | 346 | ||
347 | extern int ipv6_rcv(struct sk_buff *skb, | 347 | extern int ipv6_rcv(struct sk_buff *skb, |
348 | struct net_device *dev, | 348 | struct net_device *dev, |
349 | struct packet_type *pt); | 349 | struct packet_type *pt, |
350 | struct net_device *orig_dev); | ||
350 | 351 | ||
351 | /* | 352 | /* |
352 | * upper-layer output functions | 353 | * upper-layer output functions |
@@ -464,8 +465,38 @@ extern int sysctl_ip6frag_low_thresh; | |||
464 | extern int sysctl_ip6frag_time; | 465 | extern int sysctl_ip6frag_time; |
465 | extern int sysctl_ip6frag_secret_interval; | 466 | extern int sysctl_ip6frag_secret_interval; |
466 | 467 | ||
467 | #endif /* __KERNEL__ */ | 468 | extern struct proto_ops inet6_stream_ops; |
468 | #endif /* _NET_IPV6_H */ | 469 | extern struct proto_ops inet6_dgram_ops; |
470 | |||
471 | extern int ip6_mc_source(int add, int omode, struct sock *sk, | ||
472 | struct group_source_req *pgsr); | ||
473 | extern int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf); | ||
474 | extern int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, | ||
475 | struct group_filter __user *optval, | ||
476 | int __user *optlen); | ||
477 | |||
478 | #ifdef CONFIG_PROC_FS | ||
479 | extern int ac6_proc_init(void); | ||
480 | extern void ac6_proc_exit(void); | ||
481 | extern int raw6_proc_init(void); | ||
482 | extern void raw6_proc_exit(void); | ||
483 | extern int tcp6_proc_init(void); | ||
484 | extern void tcp6_proc_exit(void); | ||
485 | extern int udp6_proc_init(void); | ||
486 | extern void udp6_proc_exit(void); | ||
487 | extern int ipv6_misc_proc_init(void); | ||
488 | extern void ipv6_misc_proc_exit(void); | ||
489 | |||
490 | extern struct rt6_statistics rt6_stats; | ||
491 | #endif | ||
469 | 492 | ||
493 | #ifdef CONFIG_SYSCTL | ||
494 | extern ctl_table ipv6_route_table[]; | ||
495 | extern ctl_table ipv6_icmp_table[]; | ||
470 | 496 | ||
497 | extern void ipv6_sysctl_register(void); | ||
498 | extern void ipv6_sysctl_unregister(void); | ||
499 | #endif | ||
471 | 500 | ||
501 | #endif /* __KERNEL__ */ | ||
502 | #endif /* _NET_IPV6_H */ | ||
diff --git a/include/net/llc.h b/include/net/llc.h index c9aed2a8b4e2..71769a5aeef3 100644 --- a/include/net/llc.h +++ b/include/net/llc.h | |||
@@ -46,7 +46,8 @@ struct llc_sap { | |||
46 | unsigned char f_bit; | 46 | unsigned char f_bit; |
47 | int (*rcv_func)(struct sk_buff *skb, | 47 | int (*rcv_func)(struct sk_buff *skb, |
48 | struct net_device *dev, | 48 | struct net_device *dev, |
49 | struct packet_type *pt); | 49 | struct packet_type *pt, |
50 | struct net_device *orig_dev); | ||
50 | struct llc_addr laddr; | 51 | struct llc_addr laddr; |
51 | struct list_head node; | 52 | struct list_head node; |
52 | struct { | 53 | struct { |
@@ -64,7 +65,7 @@ extern rwlock_t llc_sap_list_lock; | |||
64 | extern unsigned char llc_station_mac_sa[ETH_ALEN]; | 65 | extern unsigned char llc_station_mac_sa[ETH_ALEN]; |
65 | 66 | ||
66 | extern int llc_rcv(struct sk_buff *skb, struct net_device *dev, | 67 | extern int llc_rcv(struct sk_buff *skb, struct net_device *dev, |
67 | struct packet_type *pt); | 68 | struct packet_type *pt, struct net_device *orig_dev); |
68 | 69 | ||
69 | extern int llc_mac_hdr_init(struct sk_buff *skb, | 70 | extern int llc_mac_hdr_init(struct sk_buff *skb, |
70 | unsigned char *sa, unsigned char *da); | 71 | unsigned char *sa, unsigned char *da); |
@@ -78,7 +79,8 @@ extern void llc_set_station_handler(void (*handler)(struct sk_buff *skb)); | |||
78 | extern struct llc_sap *llc_sap_open(unsigned char lsap, | 79 | extern struct llc_sap *llc_sap_open(unsigned char lsap, |
79 | int (*rcv)(struct sk_buff *skb, | 80 | int (*rcv)(struct sk_buff *skb, |
80 | struct net_device *dev, | 81 | struct net_device *dev, |
81 | struct packet_type *pt)); | 82 | struct packet_type *pt, |
83 | struct net_device *orig_dev)); | ||
82 | extern void llc_sap_close(struct llc_sap *sap); | 84 | extern void llc_sap_close(struct llc_sap *sap); |
83 | 85 | ||
84 | extern struct llc_sap *llc_sap_find(unsigned char sap_value); | 86 | extern struct llc_sap *llc_sap_find(unsigned char sap_value); |
diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 89809891e5ab..34c07731933d 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h | |||
@@ -363,7 +363,14 @@ __neigh_lookup_errno(struct neigh_table *tbl, const void *pkey, | |||
363 | return neigh_create(tbl, pkey, dev); | 363 | return neigh_create(tbl, pkey, dev); |
364 | } | 364 | } |
365 | 365 | ||
366 | #define LOCALLY_ENQUEUED -2 | 366 | struct neighbour_cb { |
367 | unsigned long sched_next; | ||
368 | unsigned int flags; | ||
369 | }; | ||
370 | |||
371 | #define LOCALLY_ENQUEUED 0x1 | ||
372 | |||
373 | #define NEIGH_CB(skb) ((struct neighbour_cb *)(skb)->cb) | ||
367 | 374 | ||
368 | #endif | 375 | #endif |
369 | #endif | 376 | #endif |
diff --git a/include/net/p8022.h b/include/net/p8022.h index 3c99a86c3581..42e9fac51b31 100644 --- a/include/net/p8022.h +++ b/include/net/p8022.h | |||
@@ -4,7 +4,10 @@ extern struct datalink_proto * | |||
4 | register_8022_client(unsigned char type, | 4 | register_8022_client(unsigned char type, |
5 | int (*func)(struct sk_buff *skb, | 5 | int (*func)(struct sk_buff *skb, |
6 | struct net_device *dev, | 6 | struct net_device *dev, |
7 | struct packet_type *pt)); | 7 | struct packet_type *pt, |
8 | struct net_device *orig_dev)); | ||
8 | extern void unregister_8022_client(struct datalink_proto *proto); | 9 | extern void unregister_8022_client(struct datalink_proto *proto); |
9 | 10 | ||
11 | extern struct datalink_proto *make_8023_client(void); | ||
12 | extern void destroy_8023_client(struct datalink_proto *dl); | ||
10 | #endif | 13 | #endif |
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 4abda6aec05a..b902d24a3256 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h | |||
@@ -352,10 +352,10 @@ tcf_change_indev(struct tcf_proto *tp, char *indev, struct rtattr *indev_tlv) | |||
352 | static inline int | 352 | static inline int |
353 | tcf_match_indev(struct sk_buff *skb, char *indev) | 353 | tcf_match_indev(struct sk_buff *skb, char *indev) |
354 | { | 354 | { |
355 | if (0 != indev[0]) { | 355 | if (indev[0]) { |
356 | if (NULL == skb->input_dev) | 356 | if (!skb->input_dev) |
357 | return 0; | 357 | return 0; |
358 | else if (0 != strcmp(indev, skb->input_dev->name)) | 358 | if (strcmp(indev, skb->input_dev->name)) |
359 | return 0; | 359 | return 0; |
360 | } | 360 | } |
361 | 361 | ||
diff --git a/include/net/psnap.h b/include/net/psnap.h index 9c94e8f98b36..b2e01cc3fc8a 100644 --- a/include/net/psnap.h +++ b/include/net/psnap.h | |||
@@ -1,7 +1,7 @@ | |||
1 | #ifndef _NET_PSNAP_H | 1 | #ifndef _NET_PSNAP_H |
2 | #define _NET_PSNAP_H | 2 | #define _NET_PSNAP_H |
3 | 3 | ||
4 | extern struct datalink_proto *register_snap_client(unsigned char *desc, int (*rcvfunc)(struct sk_buff *, struct net_device *, struct packet_type *)); | 4 | extern struct datalink_proto *register_snap_client(unsigned char *desc, int (*rcvfunc)(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *orig_dev)); |
5 | extern void unregister_snap_client(struct datalink_proto *proto); | 5 | extern void unregister_snap_client(struct datalink_proto *proto); |
6 | 6 | ||
7 | #endif | 7 | #endif |
diff --git a/include/net/raw.h b/include/net/raw.h index 1c411c45587a..f47917469b12 100644 --- a/include/net/raw.h +++ b/include/net/raw.h | |||
@@ -17,10 +17,10 @@ | |||
17 | #ifndef _RAW_H | 17 | #ifndef _RAW_H |
18 | #define _RAW_H | 18 | #define _RAW_H |
19 | 19 | ||
20 | #include <linux/config.h> | ||
20 | 21 | ||
21 | extern struct proto raw_prot; | 22 | extern struct proto raw_prot; |
22 | 23 | ||
23 | |||
24 | extern void raw_err(struct sock *, struct sk_buff *, u32 info); | 24 | extern void raw_err(struct sock *, struct sk_buff *, u32 info); |
25 | extern int raw_rcv(struct sock *, struct sk_buff *); | 25 | extern int raw_rcv(struct sock *, struct sk_buff *); |
26 | 26 | ||
@@ -37,6 +37,11 @@ extern struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num, | |||
37 | unsigned long raddr, unsigned long laddr, | 37 | unsigned long raddr, unsigned long laddr, |
38 | int dif); | 38 | int dif); |
39 | 39 | ||
40 | extern void raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash); | 40 | extern int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash); |
41 | |||
42 | #ifdef CONFIG_PROC_FS | ||
43 | extern int raw_proc_init(void); | ||
44 | extern void raw_proc_exit(void); | ||
45 | #endif | ||
41 | 46 | ||
42 | #endif /* _RAW_H */ | 47 | #endif /* _RAW_H */ |
diff --git a/include/net/rawv6.h b/include/net/rawv6.h index 23fd9a6a221a..14476a71725e 100644 --- a/include/net/rawv6.h +++ b/include/net/rawv6.h | |||
@@ -7,10 +7,11 @@ | |||
7 | extern struct hlist_head raw_v6_htable[RAWV6_HTABLE_SIZE]; | 7 | extern struct hlist_head raw_v6_htable[RAWV6_HTABLE_SIZE]; |
8 | extern rwlock_t raw_v6_lock; | 8 | extern rwlock_t raw_v6_lock; |
9 | 9 | ||
10 | extern void ipv6_raw_deliver(struct sk_buff *skb, int nexthdr); | 10 | extern int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr); |
11 | 11 | ||
12 | extern struct sock *__raw_v6_lookup(struct sock *sk, unsigned short num, | 12 | extern struct sock *__raw_v6_lookup(struct sock *sk, unsigned short num, |
13 | struct in6_addr *loc_addr, struct in6_addr *rmt_addr); | 13 | struct in6_addr *loc_addr, struct in6_addr *rmt_addr, |
14 | int dif); | ||
14 | 15 | ||
15 | extern int rawv6_rcv(struct sock *sk, | 16 | extern int rawv6_rcv(struct sock *sk, |
16 | struct sk_buff *skb); | 17 | struct sk_buff *skb); |
diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 72fd6f5e86b1..b52cc52ffe39 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h | |||
@@ -89,6 +89,7 @@ struct listen_sock { | |||
89 | int qlen_young; | 89 | int qlen_young; |
90 | int clock_hand; | 90 | int clock_hand; |
91 | u32 hash_rnd; | 91 | u32 hash_rnd; |
92 | u32 nr_table_entries; | ||
92 | struct request_sock *syn_table[0]; | 93 | struct request_sock *syn_table[0]; |
93 | }; | 94 | }; |
94 | 95 | ||
@@ -96,6 +97,7 @@ struct listen_sock { | |||
96 | * | 97 | * |
97 | * @rskq_accept_head - FIFO head of established children | 98 | * @rskq_accept_head - FIFO head of established children |
98 | * @rskq_accept_tail - FIFO tail of established children | 99 | * @rskq_accept_tail - FIFO tail of established children |
100 | * @rskq_defer_accept - User waits for some data after accept() | ||
99 | * @syn_wait_lock - serializer | 101 | * @syn_wait_lock - serializer |
100 | * | 102 | * |
101 | * %syn_wait_lock is necessary only to avoid proc interface having to grab the main | 103 | * %syn_wait_lock is necessary only to avoid proc interface having to grab the main |
@@ -111,6 +113,8 @@ struct request_sock_queue { | |||
111 | struct request_sock *rskq_accept_head; | 113 | struct request_sock *rskq_accept_head; |
112 | struct request_sock *rskq_accept_tail; | 114 | struct request_sock *rskq_accept_tail; |
113 | rwlock_t syn_wait_lock; | 115 | rwlock_t syn_wait_lock; |
116 | u8 rskq_defer_accept; | ||
117 | /* 3 bytes hole, try to pack */ | ||
114 | struct listen_sock *listen_opt; | 118 | struct listen_sock *listen_opt; |
115 | }; | 119 | }; |
116 | 120 | ||
@@ -129,11 +133,13 @@ static inline struct listen_sock *reqsk_queue_yank_listen_sk(struct request_sock | |||
129 | return lopt; | 133 | return lopt; |
130 | } | 134 | } |
131 | 135 | ||
132 | static inline void reqsk_queue_destroy(struct request_sock_queue *queue) | 136 | static inline void __reqsk_queue_destroy(struct request_sock_queue *queue) |
133 | { | 137 | { |
134 | kfree(reqsk_queue_yank_listen_sk(queue)); | 138 | kfree(reqsk_queue_yank_listen_sk(queue)); |
135 | } | 139 | } |
136 | 140 | ||
141 | extern void reqsk_queue_destroy(struct request_sock_queue *queue); | ||
142 | |||
137 | static inline struct request_sock * | 143 | static inline struct request_sock * |
138 | reqsk_queue_yank_acceptq(struct request_sock_queue *queue) | 144 | reqsk_queue_yank_acceptq(struct request_sock_queue *queue) |
139 | { | 145 | { |
@@ -221,17 +227,17 @@ static inline int reqsk_queue_added(struct request_sock_queue *queue) | |||
221 | return prev_qlen; | 227 | return prev_qlen; |
222 | } | 228 | } |
223 | 229 | ||
224 | static inline int reqsk_queue_len(struct request_sock_queue *queue) | 230 | static inline int reqsk_queue_len(const struct request_sock_queue *queue) |
225 | { | 231 | { |
226 | return queue->listen_opt != NULL ? queue->listen_opt->qlen : 0; | 232 | return queue->listen_opt != NULL ? queue->listen_opt->qlen : 0; |
227 | } | 233 | } |
228 | 234 | ||
229 | static inline int reqsk_queue_len_young(struct request_sock_queue *queue) | 235 | static inline int reqsk_queue_len_young(const struct request_sock_queue *queue) |
230 | { | 236 | { |
231 | return queue->listen_opt->qlen_young; | 237 | return queue->listen_opt->qlen_young; |
232 | } | 238 | } |
233 | 239 | ||
234 | static inline int reqsk_queue_is_full(struct request_sock_queue *queue) | 240 | static inline int reqsk_queue_is_full(const struct request_sock_queue *queue) |
235 | { | 241 | { |
236 | return queue->listen_opt->qlen >> queue->listen_opt->max_qlen_log; | 242 | return queue->listen_opt->qlen >> queue->listen_opt->max_qlen_log; |
237 | } | 243 | } |
diff --git a/include/net/route.h b/include/net/route.h index c3cd069a9aca..dbe79ca67d31 100644 --- a/include/net/route.h +++ b/include/net/route.h | |||
@@ -105,10 +105,6 @@ struct rt_cache_stat | |||
105 | unsigned int out_hlist_search; | 105 | unsigned int out_hlist_search; |
106 | }; | 106 | }; |
107 | 107 | ||
108 | extern struct rt_cache_stat *rt_cache_stat; | ||
109 | #define RT_CACHE_STAT_INC(field) \ | ||
110 | (per_cpu_ptr(rt_cache_stat, raw_smp_processor_id())->field++) | ||
111 | |||
112 | extern struct ip_rt_acct *ip_rt_acct; | 108 | extern struct ip_rt_acct *ip_rt_acct; |
113 | 109 | ||
114 | struct in_device; | 110 | struct in_device; |
@@ -199,4 +195,6 @@ static inline struct inet_peer *rt_get_peer(struct rtable *rt) | |||
199 | return rt->peer; | 195 | return rt->peer; |
200 | } | 196 | } |
201 | 197 | ||
198 | extern ctl_table ipv4_route_table[]; | ||
199 | |||
202 | #endif /* _ROUTE_H */ | 200 | #endif /* _ROUTE_H */ |
diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 5999e5684bbf..c51541ee0247 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h | |||
@@ -47,10 +47,10 @@ | |||
47 | #ifndef __sctp_constants_h__ | 47 | #ifndef __sctp_constants_h__ |
48 | #define __sctp_constants_h__ | 48 | #define __sctp_constants_h__ |
49 | 49 | ||
50 | #include <linux/tcp.h> /* For TCP states used in sctp_sock_state_t */ | ||
51 | #include <linux/sctp.h> | 50 | #include <linux/sctp.h> |
52 | #include <linux/ipv6.h> /* For ipv6hdr. */ | 51 | #include <linux/ipv6.h> /* For ipv6hdr. */ |
53 | #include <net/sctp/user.h> | 52 | #include <net/sctp/user.h> |
53 | #include <net/tcp_states.h> /* For TCP states used in sctp_sock_state_t */ | ||
54 | 54 | ||
55 | /* Value used for stream negotiation. */ | 55 | /* Value used for stream negotiation. */ |
56 | enum { SCTP_MAX_STREAM = 0xffff }; | 56 | enum { SCTP_MAX_STREAM = 0xffff }; |
diff --git a/include/net/sock.h b/include/net/sock.h index e9b1dbab90d0..312cb25cbd18 100644 --- a/include/net/sock.h +++ b/include/net/sock.h | |||
@@ -88,6 +88,7 @@ do { spin_lock_init(&((__sk)->sk_lock.slock)); \ | |||
88 | } while(0) | 88 | } while(0) |
89 | 89 | ||
90 | struct sock; | 90 | struct sock; |
91 | struct proto; | ||
91 | 92 | ||
92 | /** | 93 | /** |
93 | * struct sock_common - minimal network layer representation of sockets | 94 | * struct sock_common - minimal network layer representation of sockets |
@@ -98,10 +99,11 @@ struct sock; | |||
98 | * @skc_node: main hash linkage for various protocol lookup tables | 99 | * @skc_node: main hash linkage for various protocol lookup tables |
99 | * @skc_bind_node: bind hash linkage for various protocol lookup tables | 100 | * @skc_bind_node: bind hash linkage for various protocol lookup tables |
100 | * @skc_refcnt: reference count | 101 | * @skc_refcnt: reference count |
102 | * @skc_prot: protocol handlers inside a network family | ||
101 | * | 103 | * |
102 | * This is the minimal network layer representation of sockets, the header | 104 | * This is the minimal network layer representation of sockets, the header |
103 | * for struct sock and struct tcp_tw_bucket. | 105 | * for struct sock and struct inet_timewait_sock. |
104 | */ | 106 | */ |
105 | struct sock_common { | 107 | struct sock_common { |
106 | unsigned short skc_family; | 108 | unsigned short skc_family; |
107 | volatile unsigned char skc_state; | 109 | volatile unsigned char skc_state; |
@@ -110,11 +112,12 @@ struct sock_common { | |||
110 | struct hlist_node skc_node; | 112 | struct hlist_node skc_node; |
111 | struct hlist_node skc_bind_node; | 113 | struct hlist_node skc_bind_node; |
112 | atomic_t skc_refcnt; | 114 | atomic_t skc_refcnt; |
115 | struct proto *skc_prot; | ||
113 | }; | 116 | }; |
114 | 117 | ||
115 | /** | 118 | /** |
116 | * struct sock - network layer representation of sockets | 119 | * struct sock - network layer representation of sockets |
117 | * @__sk_common: shared layout with tcp_tw_bucket | 120 | * @__sk_common: shared layout with inet_timewait_sock |
118 | * @sk_shutdown: mask of %SEND_SHUTDOWN and/or %RCV_SHUTDOWN | 121 | * @sk_shutdown: mask of %SEND_SHUTDOWN and/or %RCV_SHUTDOWN |
119 | * @sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings | 122 | * @sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings |
120 | * @sk_lock: synchronizer | 123 | * @sk_lock: synchronizer |
@@ -136,11 +139,10 @@ struct sock_common { | |||
136 | * @sk_no_check: %SO_NO_CHECK setting, wether or not checkup packets | 139 | * @sk_no_check: %SO_NO_CHECK setting, wether or not checkup packets |
137 | * @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO) | 140 | * @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO) |
138 | * @sk_lingertime: %SO_LINGER l_linger setting | 141 | * @sk_lingertime: %SO_LINGER l_linger setting |
139 | * @sk_hashent: hash entry in several tables (e.g. tcp_ehash) | 142 | * @sk_hashent: hash entry in several tables (e.g. inet_hashinfo.ehash) |
140 | * @sk_backlog: always used with the per-socket spinlock held | 143 | * @sk_backlog: always used with the per-socket spinlock held |
141 | * @sk_callback_lock: used with the callbacks in the end of this struct | 144 | * @sk_callback_lock: used with the callbacks in the end of this struct |
142 | * @sk_error_queue: rarely used | 145 | * @sk_error_queue: rarely used |
143 | * @sk_prot: protocol handlers inside a network family | ||
144 | * @sk_prot_creator: sk_prot of original sock creator (see ipv6_setsockopt, IPV6_ADDRFORM for instance) | 146 | * @sk_prot_creator: sk_prot of original sock creator (see ipv6_setsockopt, IPV6_ADDRFORM for instance) |
145 | * @sk_err: last error | 147 | * @sk_err: last error |
146 | * @sk_err_soft: errors that don't cause failure but are the cause of a persistent failure not just 'timed out' | 148 | * @sk_err_soft: errors that don't cause failure but are the cause of a persistent failure not just 'timed out' |
@@ -173,7 +175,7 @@ struct sock_common { | |||
173 | */ | 175 | */ |
174 | struct sock { | 176 | struct sock { |
175 | /* | 177 | /* |
176 | * Now struct tcp_tw_bucket also uses sock_common, so please just | 178 | * Now struct inet_timewait_sock also uses sock_common, so please just |
177 | * don't add nothing before this first member (__sk_common) --acme | 179 | * don't add nothing before this first member (__sk_common) --acme |
178 | */ | 180 | */ |
179 | struct sock_common __sk_common; | 181 | struct sock_common __sk_common; |
@@ -184,6 +186,7 @@ struct sock { | |||
184 | #define sk_node __sk_common.skc_node | 186 | #define sk_node __sk_common.skc_node |
185 | #define sk_bind_node __sk_common.skc_bind_node | 187 | #define sk_bind_node __sk_common.skc_bind_node |
186 | #define sk_refcnt __sk_common.skc_refcnt | 188 | #define sk_refcnt __sk_common.skc_refcnt |
189 | #define sk_prot __sk_common.skc_prot | ||
187 | unsigned char sk_shutdown : 2, | 190 | unsigned char sk_shutdown : 2, |
188 | sk_no_check : 2, | 191 | sk_no_check : 2, |
189 | sk_userlocks : 4; | 192 | sk_userlocks : 4; |
@@ -218,7 +221,6 @@ struct sock { | |||
218 | struct sk_buff *tail; | 221 | struct sk_buff *tail; |
219 | } sk_backlog; | 222 | } sk_backlog; |
220 | struct sk_buff_head sk_error_queue; | 223 | struct sk_buff_head sk_error_queue; |
221 | struct proto *sk_prot; | ||
222 | struct proto *sk_prot_creator; | 224 | struct proto *sk_prot_creator; |
223 | rwlock_t sk_callback_lock; | 225 | rwlock_t sk_callback_lock; |
224 | int sk_err, | 226 | int sk_err, |
@@ -253,28 +255,28 @@ struct sock { | |||
253 | /* | 255 | /* |
254 | * Hashed lists helper routines | 256 | * Hashed lists helper routines |
255 | */ | 257 | */ |
256 | static inline struct sock *__sk_head(struct hlist_head *head) | 258 | static inline struct sock *__sk_head(const struct hlist_head *head) |
257 | { | 259 | { |
258 | return hlist_entry(head->first, struct sock, sk_node); | 260 | return hlist_entry(head->first, struct sock, sk_node); |
259 | } | 261 | } |
260 | 262 | ||
261 | static inline struct sock *sk_head(struct hlist_head *head) | 263 | static inline struct sock *sk_head(const struct hlist_head *head) |
262 | { | 264 | { |
263 | return hlist_empty(head) ? NULL : __sk_head(head); | 265 | return hlist_empty(head) ? NULL : __sk_head(head); |
264 | } | 266 | } |
265 | 267 | ||
266 | static inline struct sock *sk_next(struct sock *sk) | 268 | static inline struct sock *sk_next(const struct sock *sk) |
267 | { | 269 | { |
268 | return sk->sk_node.next ? | 270 | return sk->sk_node.next ? |
269 | hlist_entry(sk->sk_node.next, struct sock, sk_node) : NULL; | 271 | hlist_entry(sk->sk_node.next, struct sock, sk_node) : NULL; |
270 | } | 272 | } |
271 | 273 | ||
272 | static inline int sk_unhashed(struct sock *sk) | 274 | static inline int sk_unhashed(const struct sock *sk) |
273 | { | 275 | { |
274 | return hlist_unhashed(&sk->sk_node); | 276 | return hlist_unhashed(&sk->sk_node); |
275 | } | 277 | } |
276 | 278 | ||
277 | static inline int sk_hashed(struct sock *sk) | 279 | static inline int sk_hashed(const struct sock *sk) |
278 | { | 280 | { |
279 | return sk->sk_node.pprev != NULL; | 281 | return sk->sk_node.pprev != NULL; |
280 | } | 282 | } |
@@ -554,6 +556,10 @@ struct proto { | |||
554 | kmem_cache_t *slab; | 556 | kmem_cache_t *slab; |
555 | unsigned int obj_size; | 557 | unsigned int obj_size; |
556 | 558 | ||
559 | kmem_cache_t *twsk_slab; | ||
560 | unsigned int twsk_obj_size; | ||
561 | atomic_t *orphan_count; | ||
562 | |||
557 | struct request_sock_ops *rsk_prot; | 563 | struct request_sock_ops *rsk_prot; |
558 | 564 | ||
559 | struct module *owner; | 565 | struct module *owner; |
@@ -561,7 +567,9 @@ struct proto { | |||
561 | char name[32]; | 567 | char name[32]; |
562 | 568 | ||
563 | struct list_head node; | 569 | struct list_head node; |
564 | 570 | #ifdef SOCK_REFCNT_DEBUG | |
571 | atomic_t socks; | ||
572 | #endif | ||
565 | struct { | 573 | struct { |
566 | int inuse; | 574 | int inuse; |
567 | u8 __pad[SMP_CACHE_BYTES - sizeof(int)]; | 575 | u8 __pad[SMP_CACHE_BYTES - sizeof(int)]; |
@@ -571,6 +579,31 @@ struct proto { | |||
571 | extern int proto_register(struct proto *prot, int alloc_slab); | 579 | extern int proto_register(struct proto *prot, int alloc_slab); |
572 | extern void proto_unregister(struct proto *prot); | 580 | extern void proto_unregister(struct proto *prot); |
573 | 581 | ||
582 | #ifdef SOCK_REFCNT_DEBUG | ||
583 | static inline void sk_refcnt_debug_inc(struct sock *sk) | ||
584 | { | ||
585 | atomic_inc(&sk->sk_prot->socks); | ||
586 | } | ||
587 | |||
588 | static inline void sk_refcnt_debug_dec(struct sock *sk) | ||
589 | { | ||
590 | atomic_dec(&sk->sk_prot->socks); | ||
591 | printk(KERN_DEBUG "%s socket %p released, %d are still alive\n", | ||
592 | sk->sk_prot->name, sk, atomic_read(&sk->sk_prot->socks)); | ||
593 | } | ||
594 | |||
595 | static inline void sk_refcnt_debug_release(const struct sock *sk) | ||
596 | { | ||
597 | if (atomic_read(&sk->sk_refcnt) != 1) | ||
598 | printk(KERN_DEBUG "Destruction of the %s socket %p delayed, refcnt=%d\n", | ||
599 | sk->sk_prot->name, sk, atomic_read(&sk->sk_refcnt)); | ||
600 | } | ||
601 | #else /* SOCK_REFCNT_DEBUG */ | ||
602 | #define sk_refcnt_debug_inc(sk) do { } while (0) | ||
603 | #define sk_refcnt_debug_dec(sk) do { } while (0) | ||
604 | #define sk_refcnt_debug_release(sk) do { } while (0) | ||
605 | #endif /* SOCK_REFCNT_DEBUG */ | ||
606 | |||
574 | /* Called with local bh disabled */ | 607 | /* Called with local bh disabled */ |
575 | static __inline__ void sock_prot_inc_use(struct proto *prot) | 608 | static __inline__ void sock_prot_inc_use(struct proto *prot) |
576 | { | 609 | { |
@@ -582,6 +615,15 @@ static __inline__ void sock_prot_dec_use(struct proto *prot) | |||
582 | prot->stats[smp_processor_id()].inuse--; | 615 | prot->stats[smp_processor_id()].inuse--; |
583 | } | 616 | } |
584 | 617 | ||
618 | /* With per-bucket locks this operation is not-atomic, so that | ||
619 | * this version is not worse. | ||
620 | */ | ||
621 | static inline void __sk_prot_rehash(struct sock *sk) | ||
622 | { | ||
623 | sk->sk_prot->unhash(sk); | ||
624 | sk->sk_prot->hash(sk); | ||
625 | } | ||
626 | |||
585 | /* About 10 seconds */ | 627 | /* About 10 seconds */ |
586 | #define SOCK_DESTROY_TIME (10*HZ) | 628 | #define SOCK_DESTROY_TIME (10*HZ) |
587 | 629 | ||
@@ -693,6 +735,8 @@ extern struct sock *sk_alloc(int family, | |||
693 | unsigned int __nocast priority, | 735 | unsigned int __nocast priority, |
694 | struct proto *prot, int zero_it); | 736 | struct proto *prot, int zero_it); |
695 | extern void sk_free(struct sock *sk); | 737 | extern void sk_free(struct sock *sk); |
738 | extern struct sock *sk_clone(const struct sock *sk, | ||
739 | const unsigned int __nocast priority); | ||
696 | 740 | ||
697 | extern struct sk_buff *sock_wmalloc(struct sock *sk, | 741 | extern struct sk_buff *sock_wmalloc(struct sock *sk, |
698 | unsigned long size, int force, | 742 | unsigned long size, int force, |
@@ -986,6 +1030,16 @@ sk_dst_check(struct sock *sk, u32 cookie) | |||
986 | return dst; | 1030 | return dst; |
987 | } | 1031 | } |
988 | 1032 | ||
1033 | static inline void sk_setup_caps(struct sock *sk, struct dst_entry *dst) | ||
1034 | { | ||
1035 | __sk_dst_set(sk, dst); | ||
1036 | sk->sk_route_caps = dst->dev->features; | ||
1037 | if (sk->sk_route_caps & NETIF_F_TSO) { | ||
1038 | if (sock_flag(sk, SOCK_NO_LARGESEND) || dst->header_len) | ||
1039 | sk->sk_route_caps &= ~NETIF_F_TSO; | ||
1040 | } | ||
1041 | } | ||
1042 | |||
989 | static inline void sk_charge_skb(struct sock *sk, struct sk_buff *skb) | 1043 | static inline void sk_charge_skb(struct sock *sk, struct sk_buff *skb) |
990 | { | 1044 | { |
991 | sk->sk_wmem_queued += skb->truesize; | 1045 | sk->sk_wmem_queued += skb->truesize; |
@@ -1146,7 +1200,7 @@ static inline struct sk_buff *sk_stream_alloc_pskb(struct sock *sk, | |||
1146 | int hdr_len; | 1200 | int hdr_len; |
1147 | 1201 | ||
1148 | hdr_len = SKB_DATA_ALIGN(sk->sk_prot->max_header); | 1202 | hdr_len = SKB_DATA_ALIGN(sk->sk_prot->max_header); |
1149 | skb = alloc_skb(size + hdr_len, gfp); | 1203 | skb = alloc_skb_fclone(size + hdr_len, gfp); |
1150 | if (skb) { | 1204 | if (skb) { |
1151 | skb->truesize += mem; | 1205 | skb->truesize += mem; |
1152 | if (sk->sk_forward_alloc >= (int)skb->truesize || | 1206 | if (sk->sk_forward_alloc >= (int)skb->truesize || |
@@ -1228,16 +1282,19 @@ static inline int sock_intr_errno(long timeo) | |||
1228 | static __inline__ void | 1282 | static __inline__ void |
1229 | sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) | 1283 | sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) |
1230 | { | 1284 | { |
1231 | struct timeval *stamp = &skb->stamp; | 1285 | struct timeval stamp; |
1286 | |||
1287 | skb_get_timestamp(skb, &stamp); | ||
1232 | if (sock_flag(sk, SOCK_RCVTSTAMP)) { | 1288 | if (sock_flag(sk, SOCK_RCVTSTAMP)) { |
1233 | /* Race occurred between timestamp enabling and packet | 1289 | /* Race occurred between timestamp enabling and packet |
1234 | receiving. Fill in the current time for now. */ | 1290 | receiving. Fill in the current time for now. */ |
1235 | if (stamp->tv_sec == 0) | 1291 | if (stamp.tv_sec == 0) |
1236 | do_gettimeofday(stamp); | 1292 | do_gettimeofday(&stamp); |
1293 | skb_set_timestamp(skb, &stamp); | ||
1237 | put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP, sizeof(struct timeval), | 1294 | put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP, sizeof(struct timeval), |
1238 | stamp); | 1295 | &stamp); |
1239 | } else | 1296 | } else |
1240 | sk->sk_stamp = *stamp; | 1297 | sk->sk_stamp = stamp; |
1241 | } | 1298 | } |
1242 | 1299 | ||
1243 | /** | 1300 | /** |
@@ -1262,11 +1319,11 @@ extern int sock_get_timestamp(struct sock *, struct timeval __user *); | |||
1262 | */ | 1319 | */ |
1263 | 1320 | ||
1264 | #if 0 | 1321 | #if 0 |
1265 | #define NETDEBUG(x) do { } while (0) | 1322 | #define NETDEBUG(fmt, args...) do { } while (0) |
1266 | #define LIMIT_NETDEBUG(x) do {} while(0) | 1323 | #define LIMIT_NETDEBUG(fmt, args...) do { } while(0) |
1267 | #else | 1324 | #else |
1268 | #define NETDEBUG(x) do { x; } while (0) | 1325 | #define NETDEBUG(fmt, args...) printk(fmt,##args) |
1269 | #define LIMIT_NETDEBUG(x) do { if (net_ratelimit()) { x; } } while(0) | 1326 | #define LIMIT_NETDEBUG(fmt, args...) do { if (net_ratelimit()) printk(fmt,##args); } while(0) |
1270 | #endif | 1327 | #endif |
1271 | 1328 | ||
1272 | /* | 1329 | /* |
@@ -1313,4 +1370,14 @@ static inline int siocdevprivate_ioctl(unsigned int fd, unsigned int cmd, unsign | |||
1313 | } | 1370 | } |
1314 | #endif | 1371 | #endif |
1315 | 1372 | ||
1373 | extern void sk_init(void); | ||
1374 | |||
1375 | #ifdef CONFIG_SYSCTL | ||
1376 | extern struct ctl_table core_table[]; | ||
1377 | extern int sysctl_optmem_max; | ||
1378 | #endif | ||
1379 | |||
1380 | extern __u32 sysctl_wmem_default; | ||
1381 | extern __u32 sysctl_rmem_default; | ||
1382 | |||
1316 | #endif /* _SOCK_H */ | 1383 | #endif /* _SOCK_H */ |
diff --git a/include/net/tcp.h b/include/net/tcp.h index 5010f0c5a56e..d6bcf1317a6a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h | |||
@@ -21,360 +21,29 @@ | |||
21 | #define TCP_DEBUG 1 | 21 | #define TCP_DEBUG 1 |
22 | #define FASTRETRANS_DEBUG 1 | 22 | #define FASTRETRANS_DEBUG 1 |
23 | 23 | ||
24 | /* Cancel timers, when they are not required. */ | ||
25 | #undef TCP_CLEAR_TIMERS | ||
26 | |||
27 | #include <linux/config.h> | 24 | #include <linux/config.h> |
28 | #include <linux/list.h> | 25 | #include <linux/list.h> |
29 | #include <linux/tcp.h> | 26 | #include <linux/tcp.h> |
30 | #include <linux/slab.h> | 27 | #include <linux/slab.h> |
31 | #include <linux/cache.h> | 28 | #include <linux/cache.h> |
32 | #include <linux/percpu.h> | 29 | #include <linux/percpu.h> |
30 | |||
31 | #include <net/inet_connection_sock.h> | ||
32 | #include <net/inet_timewait_sock.h> | ||
33 | #include <net/inet_hashtables.h> | ||
33 | #include <net/checksum.h> | 34 | #include <net/checksum.h> |
34 | #include <net/request_sock.h> | 35 | #include <net/request_sock.h> |
35 | #include <net/sock.h> | 36 | #include <net/sock.h> |
36 | #include <net/snmp.h> | 37 | #include <net/snmp.h> |
37 | #include <net/ip.h> | 38 | #include <net/ip.h> |
38 | #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) | 39 | #include <net/tcp_states.h> |
39 | #include <linux/ipv6.h> | ||
40 | #endif | ||
41 | #include <linux/seq_file.h> | ||
42 | |||
43 | /* This is for all connections with a full identity, no wildcards. | ||
44 | * New scheme, half the table is for TIME_WAIT, the other half is | ||
45 | * for the rest. I'll experiment with dynamic table growth later. | ||
46 | */ | ||
47 | struct tcp_ehash_bucket { | ||
48 | rwlock_t lock; | ||
49 | struct hlist_head chain; | ||
50 | } __attribute__((__aligned__(8))); | ||
51 | |||
52 | /* This is for listening sockets, thus all sockets which possess wildcards. */ | ||
53 | #define TCP_LHTABLE_SIZE 32 /* Yes, really, this is all you need. */ | ||
54 | |||
55 | /* There are a few simple rules, which allow for local port reuse by | ||
56 | * an application. In essence: | ||
57 | * | ||
58 | * 1) Sockets bound to different interfaces may share a local port. | ||
59 | * Failing that, goto test 2. | ||
60 | * 2) If all sockets have sk->sk_reuse set, and none of them are in | ||
61 | * TCP_LISTEN state, the port may be shared. | ||
62 | * Failing that, goto test 3. | ||
63 | * 3) If all sockets are bound to a specific inet_sk(sk)->rcv_saddr local | ||
64 | * address, and none of them are the same, the port may be | ||
65 | * shared. | ||
66 | * Failing this, the port cannot be shared. | ||
67 | * | ||
68 | * The interesting point, is test #2. This is what an FTP server does | ||
69 | * all day. To optimize this case we use a specific flag bit defined | ||
70 | * below. As we add sockets to a bind bucket list, we perform a | ||
71 | * check of: (newsk->sk_reuse && (newsk->sk_state != TCP_LISTEN)) | ||
72 | * As long as all sockets added to a bind bucket pass this test, | ||
73 | * the flag bit will be set. | ||
74 | * The resulting situation is that tcp_v[46]_verify_bind() can just check | ||
75 | * for this flag bit, if it is set and the socket trying to bind has | ||
76 | * sk->sk_reuse set, we don't even have to walk the owners list at all, | ||
77 | * we return that it is ok to bind this socket to the requested local port. | ||
78 | * | ||
79 | * Sounds like a lot of work, but it is worth it. In a more naive | ||
80 | * implementation (ie. current FreeBSD etc.) the entire list of ports | ||
81 | * must be walked for each data port opened by an ftp server. Needless | ||
82 | * to say, this does not scale at all. With a couple thousand FTP | ||
83 | * users logged onto your box, isn't it nice to know that new data | ||
84 | * ports are created in O(1) time? I thought so. ;-) -DaveM | ||
85 | */ | ||
86 | struct tcp_bind_bucket { | ||
87 | unsigned short port; | ||
88 | signed short fastreuse; | ||
89 | struct hlist_node node; | ||
90 | struct hlist_head owners; | ||
91 | }; | ||
92 | |||
93 | #define tb_for_each(tb, node, head) hlist_for_each_entry(tb, node, head, node) | ||
94 | |||
95 | struct tcp_bind_hashbucket { | ||
96 | spinlock_t lock; | ||
97 | struct hlist_head chain; | ||
98 | }; | ||
99 | |||
100 | static inline struct tcp_bind_bucket *__tb_head(struct tcp_bind_hashbucket *head) | ||
101 | { | ||
102 | return hlist_entry(head->chain.first, struct tcp_bind_bucket, node); | ||
103 | } | ||
104 | |||
105 | static inline struct tcp_bind_bucket *tb_head(struct tcp_bind_hashbucket *head) | ||
106 | { | ||
107 | return hlist_empty(&head->chain) ? NULL : __tb_head(head); | ||
108 | } | ||
109 | |||
110 | extern struct tcp_hashinfo { | ||
111 | /* This is for sockets with full identity only. Sockets here will | ||
112 | * always be without wildcards and will have the following invariant: | ||
113 | * | ||
114 | * TCP_ESTABLISHED <= sk->sk_state < TCP_CLOSE | ||
115 | * | ||
116 | * First half of the table is for sockets not in TIME_WAIT, second half | ||
117 | * is for TIME_WAIT sockets only. | ||
118 | */ | ||
119 | struct tcp_ehash_bucket *__tcp_ehash; | ||
120 | |||
121 | /* Ok, let's try this, I give up, we do need a local binding | ||
122 | * TCP hash as well as the others for fast bind/connect. | ||
123 | */ | ||
124 | struct tcp_bind_hashbucket *__tcp_bhash; | ||
125 | 40 | ||
126 | int __tcp_bhash_size; | 41 | #include <linux/seq_file.h> |
127 | int __tcp_ehash_size; | ||
128 | |||
129 | /* All sockets in TCP_LISTEN state will be in here. This is the only | ||
130 | * table where wildcard'd TCP sockets can exist. Hash function here | ||
131 | * is just local port number. | ||
132 | */ | ||
133 | struct hlist_head __tcp_listening_hash[TCP_LHTABLE_SIZE]; | ||
134 | |||
135 | /* All the above members are written once at bootup and | ||
136 | * never written again _or_ are predominantly read-access. | ||
137 | * | ||
138 | * Now align to a new cache line as all the following members | ||
139 | * are often dirty. | ||
140 | */ | ||
141 | rwlock_t __tcp_lhash_lock ____cacheline_aligned; | ||
142 | atomic_t __tcp_lhash_users; | ||
143 | wait_queue_head_t __tcp_lhash_wait; | ||
144 | spinlock_t __tcp_portalloc_lock; | ||
145 | } tcp_hashinfo; | ||
146 | |||
147 | #define tcp_ehash (tcp_hashinfo.__tcp_ehash) | ||
148 | #define tcp_bhash (tcp_hashinfo.__tcp_bhash) | ||
149 | #define tcp_ehash_size (tcp_hashinfo.__tcp_ehash_size) | ||
150 | #define tcp_bhash_size (tcp_hashinfo.__tcp_bhash_size) | ||
151 | #define tcp_listening_hash (tcp_hashinfo.__tcp_listening_hash) | ||
152 | #define tcp_lhash_lock (tcp_hashinfo.__tcp_lhash_lock) | ||
153 | #define tcp_lhash_users (tcp_hashinfo.__tcp_lhash_users) | ||
154 | #define tcp_lhash_wait (tcp_hashinfo.__tcp_lhash_wait) | ||
155 | #define tcp_portalloc_lock (tcp_hashinfo.__tcp_portalloc_lock) | ||
156 | |||
157 | extern kmem_cache_t *tcp_bucket_cachep; | ||
158 | extern struct tcp_bind_bucket *tcp_bucket_create(struct tcp_bind_hashbucket *head, | ||
159 | unsigned short snum); | ||
160 | extern void tcp_bucket_destroy(struct tcp_bind_bucket *tb); | ||
161 | extern void tcp_bucket_unlock(struct sock *sk); | ||
162 | extern int tcp_port_rover; | ||
163 | |||
164 | /* These are AF independent. */ | ||
165 | static __inline__ int tcp_bhashfn(__u16 lport) | ||
166 | { | ||
167 | return (lport & (tcp_bhash_size - 1)); | ||
168 | } | ||
169 | |||
170 | extern void tcp_bind_hash(struct sock *sk, struct tcp_bind_bucket *tb, | ||
171 | unsigned short snum); | ||
172 | |||
173 | #if (BITS_PER_LONG == 64) | ||
174 | #define TCP_ADDRCMP_ALIGN_BYTES 8 | ||
175 | #else | ||
176 | #define TCP_ADDRCMP_ALIGN_BYTES 4 | ||
177 | #endif | ||
178 | |||
179 | /* This is a TIME_WAIT bucket. It works around the memory consumption | ||
180 | * problems of sockets in such a state on heavily loaded servers, but | ||
181 | * without violating the protocol specification. | ||
182 | */ | ||
183 | struct tcp_tw_bucket { | ||
184 | /* | ||
185 | * Now struct sock also uses sock_common, so please just | ||
186 | * don't add nothing before this first member (__tw_common) --acme | ||
187 | */ | ||
188 | struct sock_common __tw_common; | ||
189 | #define tw_family __tw_common.skc_family | ||
190 | #define tw_state __tw_common.skc_state | ||
191 | #define tw_reuse __tw_common.skc_reuse | ||
192 | #define tw_bound_dev_if __tw_common.skc_bound_dev_if | ||
193 | #define tw_node __tw_common.skc_node | ||
194 | #define tw_bind_node __tw_common.skc_bind_node | ||
195 | #define tw_refcnt __tw_common.skc_refcnt | ||
196 | volatile unsigned char tw_substate; | ||
197 | unsigned char tw_rcv_wscale; | ||
198 | __u16 tw_sport; | ||
199 | /* Socket demultiplex comparisons on incoming packets. */ | ||
200 | /* these five are in inet_sock */ | ||
201 | __u32 tw_daddr | ||
202 | __attribute__((aligned(TCP_ADDRCMP_ALIGN_BYTES))); | ||
203 | __u32 tw_rcv_saddr; | ||
204 | __u16 tw_dport; | ||
205 | __u16 tw_num; | ||
206 | /* And these are ours. */ | ||
207 | int tw_hashent; | ||
208 | int tw_timeout; | ||
209 | __u32 tw_rcv_nxt; | ||
210 | __u32 tw_snd_nxt; | ||
211 | __u32 tw_rcv_wnd; | ||
212 | __u32 tw_ts_recent; | ||
213 | long tw_ts_recent_stamp; | ||
214 | unsigned long tw_ttd; | ||
215 | struct tcp_bind_bucket *tw_tb; | ||
216 | struct hlist_node tw_death_node; | ||
217 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | ||
218 | struct in6_addr tw_v6_daddr; | ||
219 | struct in6_addr tw_v6_rcv_saddr; | ||
220 | int tw_v6_ipv6only; | ||
221 | #endif | ||
222 | }; | ||
223 | |||
224 | static __inline__ void tw_add_node(struct tcp_tw_bucket *tw, | ||
225 | struct hlist_head *list) | ||
226 | { | ||
227 | hlist_add_head(&tw->tw_node, list); | ||
228 | } | ||
229 | |||
230 | static __inline__ void tw_add_bind_node(struct tcp_tw_bucket *tw, | ||
231 | struct hlist_head *list) | ||
232 | { | ||
233 | hlist_add_head(&tw->tw_bind_node, list); | ||
234 | } | ||
235 | |||
236 | static inline int tw_dead_hashed(struct tcp_tw_bucket *tw) | ||
237 | { | ||
238 | return tw->tw_death_node.pprev != NULL; | ||
239 | } | ||
240 | |||
241 | static __inline__ void tw_dead_node_init(struct tcp_tw_bucket *tw) | ||
242 | { | ||
243 | tw->tw_death_node.pprev = NULL; | ||
244 | } | ||
245 | |||
246 | static __inline__ void __tw_del_dead_node(struct tcp_tw_bucket *tw) | ||
247 | { | ||
248 | __hlist_del(&tw->tw_death_node); | ||
249 | tw_dead_node_init(tw); | ||
250 | } | ||
251 | |||
252 | static __inline__ int tw_del_dead_node(struct tcp_tw_bucket *tw) | ||
253 | { | ||
254 | if (tw_dead_hashed(tw)) { | ||
255 | __tw_del_dead_node(tw); | ||
256 | return 1; | ||
257 | } | ||
258 | return 0; | ||
259 | } | ||
260 | |||
261 | #define tw_for_each(tw, node, head) \ | ||
262 | hlist_for_each_entry(tw, node, head, tw_node) | ||
263 | |||
264 | #define tw_for_each_inmate(tw, node, jail) \ | ||
265 | hlist_for_each_entry(tw, node, jail, tw_death_node) | ||
266 | |||
267 | #define tw_for_each_inmate_safe(tw, node, safe, jail) \ | ||
268 | hlist_for_each_entry_safe(tw, node, safe, jail, tw_death_node) | ||
269 | |||
270 | #define tcptw_sk(__sk) ((struct tcp_tw_bucket *)(__sk)) | ||
271 | |||
272 | static inline u32 tcp_v4_rcv_saddr(const struct sock *sk) | ||
273 | { | ||
274 | return likely(sk->sk_state != TCP_TIME_WAIT) ? | ||
275 | inet_sk(sk)->rcv_saddr : tcptw_sk(sk)->tw_rcv_saddr; | ||
276 | } | ||
277 | |||
278 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | ||
279 | static inline struct in6_addr *__tcp_v6_rcv_saddr(const struct sock *sk) | ||
280 | { | ||
281 | return likely(sk->sk_state != TCP_TIME_WAIT) ? | ||
282 | &inet6_sk(sk)->rcv_saddr : &tcptw_sk(sk)->tw_v6_rcv_saddr; | ||
283 | } | ||
284 | |||
285 | static inline struct in6_addr *tcp_v6_rcv_saddr(const struct sock *sk) | ||
286 | { | ||
287 | return sk->sk_family == AF_INET6 ? __tcp_v6_rcv_saddr(sk) : NULL; | ||
288 | } | ||
289 | |||
290 | #define tcptw_sk_ipv6only(__sk) (tcptw_sk(__sk)->tw_v6_ipv6only) | ||
291 | |||
292 | static inline int tcp_v6_ipv6only(const struct sock *sk) | ||
293 | { | ||
294 | return likely(sk->sk_state != TCP_TIME_WAIT) ? | ||
295 | ipv6_only_sock(sk) : tcptw_sk_ipv6only(sk); | ||
296 | } | ||
297 | #else | ||
298 | # define __tcp_v6_rcv_saddr(__sk) NULL | ||
299 | # define tcp_v6_rcv_saddr(__sk) NULL | ||
300 | # define tcptw_sk_ipv6only(__sk) 0 | ||
301 | # define tcp_v6_ipv6only(__sk) 0 | ||
302 | #endif | ||
303 | 42 | ||
304 | extern kmem_cache_t *tcp_timewait_cachep; | 43 | extern struct inet_hashinfo tcp_hashinfo; |
305 | |||
306 | static inline void tcp_tw_put(struct tcp_tw_bucket *tw) | ||
307 | { | ||
308 | if (atomic_dec_and_test(&tw->tw_refcnt)) { | ||
309 | #ifdef INET_REFCNT_DEBUG | ||
310 | printk(KERN_DEBUG "tw_bucket %p released\n", tw); | ||
311 | #endif | ||
312 | kmem_cache_free(tcp_timewait_cachep, tw); | ||
313 | } | ||
314 | } | ||
315 | 44 | ||
316 | extern atomic_t tcp_orphan_count; | 45 | extern atomic_t tcp_orphan_count; |
317 | extern int tcp_tw_count; | ||
318 | extern void tcp_time_wait(struct sock *sk, int state, int timeo); | 46 | extern void tcp_time_wait(struct sock *sk, int state, int timeo); |
319 | extern void tcp_tw_deschedule(struct tcp_tw_bucket *tw); | ||
320 | |||
321 | |||
322 | /* Socket demux engine toys. */ | ||
323 | #ifdef __BIG_ENDIAN | ||
324 | #define TCP_COMBINED_PORTS(__sport, __dport) \ | ||
325 | (((__u32)(__sport)<<16) | (__u32)(__dport)) | ||
326 | #else /* __LITTLE_ENDIAN */ | ||
327 | #define TCP_COMBINED_PORTS(__sport, __dport) \ | ||
328 | (((__u32)(__dport)<<16) | (__u32)(__sport)) | ||
329 | #endif | ||
330 | |||
331 | #if (BITS_PER_LONG == 64) | ||
332 | #ifdef __BIG_ENDIAN | ||
333 | #define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) \ | ||
334 | __u64 __name = (((__u64)(__saddr))<<32)|((__u64)(__daddr)); | ||
335 | #else /* __LITTLE_ENDIAN */ | ||
336 | #define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) \ | ||
337 | __u64 __name = (((__u64)(__daddr))<<32)|((__u64)(__saddr)); | ||
338 | #endif /* __BIG_ENDIAN */ | ||
339 | #define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ | ||
340 | (((*((__u64 *)&(inet_sk(__sk)->daddr)))== (__cookie)) && \ | ||
341 | ((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports)) && \ | ||
342 | (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) | ||
343 | #define TCP_IPV4_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ | ||
344 | (((*((__u64 *)&(tcptw_sk(__sk)->tw_daddr))) == (__cookie)) && \ | ||
345 | ((*((__u32 *)&(tcptw_sk(__sk)->tw_dport))) == (__ports)) && \ | ||
346 | (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) | ||
347 | #else /* 32-bit arch */ | ||
348 | #define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) | ||
349 | #define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ | ||
350 | ((inet_sk(__sk)->daddr == (__saddr)) && \ | ||
351 | (inet_sk(__sk)->rcv_saddr == (__daddr)) && \ | ||
352 | ((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports)) && \ | ||
353 | (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) | ||
354 | #define TCP_IPV4_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ | ||
355 | ((tcptw_sk(__sk)->tw_daddr == (__saddr)) && \ | ||
356 | (tcptw_sk(__sk)->tw_rcv_saddr == (__daddr)) && \ | ||
357 | ((*((__u32 *)&(tcptw_sk(__sk)->tw_dport))) == (__ports)) && \ | ||
358 | (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) | ||
359 | #endif /* 64-bit arch */ | ||
360 | |||
361 | #define TCP_IPV6_MATCH(__sk, __saddr, __daddr, __ports, __dif) \ | ||
362 | (((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports)) && \ | ||
363 | ((__sk)->sk_family == AF_INET6) && \ | ||
364 | ipv6_addr_equal(&inet6_sk(__sk)->daddr, (__saddr)) && \ | ||
365 | ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr)) && \ | ||
366 | (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) | ||
367 | |||
368 | /* These can have wildcards, don't try too hard. */ | ||
369 | static __inline__ int tcp_lhashfn(unsigned short num) | ||
370 | { | ||
371 | return num & (TCP_LHTABLE_SIZE - 1); | ||
372 | } | ||
373 | |||
374 | static __inline__ int tcp_sk_listen_hashfn(struct sock *sk) | ||
375 | { | ||
376 | return tcp_lhashfn(inet_sk(sk)->num); | ||
377 | } | ||
378 | 47 | ||
379 | #define MAX_TCP_HEADER (128 + MAX_HEADER) | 48 | #define MAX_TCP_HEADER (128 + MAX_HEADER) |
380 | 49 | ||
@@ -478,33 +147,6 @@ static __inline__ int tcp_sk_listen_hashfn(struct sock *sk) | |||
478 | * timestamps. It must be less than | 147 | * timestamps. It must be less than |
479 | * minimal timewait lifetime. | 148 | * minimal timewait lifetime. |
480 | */ | 149 | */ |
481 | |||
482 | #define TCP_TW_RECYCLE_SLOTS_LOG 5 | ||
483 | #define TCP_TW_RECYCLE_SLOTS (1<<TCP_TW_RECYCLE_SLOTS_LOG) | ||
484 | |||
485 | /* If time > 4sec, it is "slow" path, no recycling is required, | ||
486 | so that we select tick to get range about 4 seconds. | ||
487 | */ | ||
488 | |||
489 | #if HZ <= 16 || HZ > 4096 | ||
490 | # error Unsupported: HZ <= 16 or HZ > 4096 | ||
491 | #elif HZ <= 32 | ||
492 | # define TCP_TW_RECYCLE_TICK (5+2-TCP_TW_RECYCLE_SLOTS_LOG) | ||
493 | #elif HZ <= 64 | ||
494 | # define TCP_TW_RECYCLE_TICK (6+2-TCP_TW_RECYCLE_SLOTS_LOG) | ||
495 | #elif HZ <= 128 | ||
496 | # define TCP_TW_RECYCLE_TICK (7+2-TCP_TW_RECYCLE_SLOTS_LOG) | ||
497 | #elif HZ <= 256 | ||
498 | # define TCP_TW_RECYCLE_TICK (8+2-TCP_TW_RECYCLE_SLOTS_LOG) | ||
499 | #elif HZ <= 512 | ||
500 | # define TCP_TW_RECYCLE_TICK (9+2-TCP_TW_RECYCLE_SLOTS_LOG) | ||
501 | #elif HZ <= 1024 | ||
502 | # define TCP_TW_RECYCLE_TICK (10+2-TCP_TW_RECYCLE_SLOTS_LOG) | ||
503 | #elif HZ <= 2048 | ||
504 | # define TCP_TW_RECYCLE_TICK (11+2-TCP_TW_RECYCLE_SLOTS_LOG) | ||
505 | #else | ||
506 | # define TCP_TW_RECYCLE_TICK (12+2-TCP_TW_RECYCLE_SLOTS_LOG) | ||
507 | #endif | ||
508 | /* | 150 | /* |
509 | * TCP option | 151 | * TCP option |
510 | */ | 152 | */ |
@@ -534,22 +176,18 @@ static __inline__ int tcp_sk_listen_hashfn(struct sock *sk) | |||
534 | #define TCPOLEN_SACK_BASE_ALIGNED 4 | 176 | #define TCPOLEN_SACK_BASE_ALIGNED 4 |
535 | #define TCPOLEN_SACK_PERBLOCK 8 | 177 | #define TCPOLEN_SACK_PERBLOCK 8 |
536 | 178 | ||
537 | #define TCP_TIME_RETRANS 1 /* Retransmit timer */ | ||
538 | #define TCP_TIME_DACK 2 /* Delayed ack timer */ | ||
539 | #define TCP_TIME_PROBE0 3 /* Zero window probe timer */ | ||
540 | #define TCP_TIME_KEEPOPEN 4 /* Keepalive timer */ | ||
541 | |||
542 | /* Flags in tp->nonagle */ | 179 | /* Flags in tp->nonagle */ |
543 | #define TCP_NAGLE_OFF 1 /* Nagle's algo is disabled */ | 180 | #define TCP_NAGLE_OFF 1 /* Nagle's algo is disabled */ |
544 | #define TCP_NAGLE_CORK 2 /* Socket is corked */ | 181 | #define TCP_NAGLE_CORK 2 /* Socket is corked */ |
545 | #define TCP_NAGLE_PUSH 4 /* Cork is overriden for already queued data */ | 182 | #define TCP_NAGLE_PUSH 4 /* Cork is overriden for already queued data */ |
546 | 183 | ||
184 | extern struct inet_timewait_death_row tcp_death_row; | ||
185 | |||
547 | /* sysctl variables for tcp */ | 186 | /* sysctl variables for tcp */ |
548 | extern int sysctl_tcp_timestamps; | 187 | extern int sysctl_tcp_timestamps; |
549 | extern int sysctl_tcp_window_scaling; | 188 | extern int sysctl_tcp_window_scaling; |
550 | extern int sysctl_tcp_sack; | 189 | extern int sysctl_tcp_sack; |
551 | extern int sysctl_tcp_fin_timeout; | 190 | extern int sysctl_tcp_fin_timeout; |
552 | extern int sysctl_tcp_tw_recycle; | ||
553 | extern int sysctl_tcp_keepalive_time; | 191 | extern int sysctl_tcp_keepalive_time; |
554 | extern int sysctl_tcp_keepalive_probes; | 192 | extern int sysctl_tcp_keepalive_probes; |
555 | extern int sysctl_tcp_keepalive_intvl; | 193 | extern int sysctl_tcp_keepalive_intvl; |
@@ -564,7 +202,6 @@ extern int sysctl_tcp_stdurg; | |||
564 | extern int sysctl_tcp_rfc1337; | 202 | extern int sysctl_tcp_rfc1337; |
565 | extern int sysctl_tcp_abort_on_overflow; | 203 | extern int sysctl_tcp_abort_on_overflow; |
566 | extern int sysctl_tcp_max_orphans; | 204 | extern int sysctl_tcp_max_orphans; |
567 | extern int sysctl_tcp_max_tw_buckets; | ||
568 | extern int sysctl_tcp_fack; | 205 | extern int sysctl_tcp_fack; |
569 | extern int sysctl_tcp_reordering; | 206 | extern int sysctl_tcp_reordering; |
570 | extern int sysctl_tcp_ecn; | 207 | extern int sysctl_tcp_ecn; |
@@ -585,12 +222,6 @@ extern atomic_t tcp_memory_allocated; | |||
585 | extern atomic_t tcp_sockets_allocated; | 222 | extern atomic_t tcp_sockets_allocated; |
586 | extern int tcp_memory_pressure; | 223 | extern int tcp_memory_pressure; |
587 | 224 | ||
588 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | ||
589 | #define TCP_INET_FAMILY(fam) ((fam) == AF_INET) | ||
590 | #else | ||
591 | #define TCP_INET_FAMILY(fam) 1 | ||
592 | #endif | ||
593 | |||
594 | /* | 225 | /* |
595 | * Pointers to address related TCP functions | 226 | * Pointers to address related TCP functions |
596 | * (i.e. things that depend on the address family) | 227 | * (i.e. things that depend on the address family) |
@@ -671,9 +302,6 @@ DECLARE_SNMP_STAT(struct tcp_mib, tcp_statistics); | |||
671 | #define TCP_ADD_STATS_BH(field, val) SNMP_ADD_STATS_BH(tcp_statistics, field, val) | 302 | #define TCP_ADD_STATS_BH(field, val) SNMP_ADD_STATS_BH(tcp_statistics, field, val) |
672 | #define TCP_ADD_STATS_USER(field, val) SNMP_ADD_STATS_USER(tcp_statistics, field, val) | 303 | #define TCP_ADD_STATS_USER(field, val) SNMP_ADD_STATS_USER(tcp_statistics, field, val) |
673 | 304 | ||
674 | extern void tcp_put_port(struct sock *sk); | ||
675 | extern void tcp_inherit_port(struct sock *sk, struct sock *child); | ||
676 | |||
677 | extern void tcp_v4_err(struct sk_buff *skb, u32); | 305 | extern void tcp_v4_err(struct sk_buff *skb, u32); |
678 | 306 | ||
679 | extern void tcp_shutdown (struct sock *sk, int how); | 307 | extern void tcp_shutdown (struct sock *sk, int how); |
@@ -682,7 +310,7 @@ extern int tcp_v4_rcv(struct sk_buff *skb); | |||
682 | 310 | ||
683 | extern int tcp_v4_remember_stamp(struct sock *sk); | 311 | extern int tcp_v4_remember_stamp(struct sock *sk); |
684 | 312 | ||
685 | extern int tcp_v4_tw_remember_stamp(struct tcp_tw_bucket *tw); | 313 | extern int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw); |
686 | 314 | ||
687 | extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, | 315 | extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, |
688 | struct msghdr *msg, size_t size); | 316 | struct msghdr *msg, size_t size); |
@@ -704,42 +332,22 @@ extern int tcp_rcv_established(struct sock *sk, | |||
704 | 332 | ||
705 | extern void tcp_rcv_space_adjust(struct sock *sk); | 333 | extern void tcp_rcv_space_adjust(struct sock *sk); |
706 | 334 | ||
707 | enum tcp_ack_state_t | 335 | static inline void tcp_dec_quickack_mode(struct sock *sk, |
708 | { | 336 | const unsigned int pkts) |
709 | TCP_ACK_SCHED = 1, | ||
710 | TCP_ACK_TIMER = 2, | ||
711 | TCP_ACK_PUSHED= 4 | ||
712 | }; | ||
713 | |||
714 | static inline void tcp_schedule_ack(struct tcp_sock *tp) | ||
715 | { | 337 | { |
716 | tp->ack.pending |= TCP_ACK_SCHED; | 338 | struct inet_connection_sock *icsk = inet_csk(sk); |
717 | } | ||
718 | |||
719 | static inline int tcp_ack_scheduled(struct tcp_sock *tp) | ||
720 | { | ||
721 | return tp->ack.pending&TCP_ACK_SCHED; | ||
722 | } | ||
723 | |||
724 | static __inline__ void tcp_dec_quickack_mode(struct tcp_sock *tp, unsigned int pkts) | ||
725 | { | ||
726 | if (tp->ack.quick) { | ||
727 | if (pkts >= tp->ack.quick) { | ||
728 | tp->ack.quick = 0; | ||
729 | 339 | ||
340 | if (icsk->icsk_ack.quick) { | ||
341 | if (pkts >= icsk->icsk_ack.quick) { | ||
342 | icsk->icsk_ack.quick = 0; | ||
730 | /* Leaving quickack mode we deflate ATO. */ | 343 | /* Leaving quickack mode we deflate ATO. */ |
731 | tp->ack.ato = TCP_ATO_MIN; | 344 | icsk->icsk_ack.ato = TCP_ATO_MIN; |
732 | } else | 345 | } else |
733 | tp->ack.quick -= pkts; | 346 | icsk->icsk_ack.quick -= pkts; |
734 | } | 347 | } |
735 | } | 348 | } |
736 | 349 | ||
737 | extern void tcp_enter_quickack_mode(struct tcp_sock *tp); | 350 | extern void tcp_enter_quickack_mode(struct sock *sk); |
738 | |||
739 | static __inline__ void tcp_delack_init(struct tcp_sock *tp) | ||
740 | { | ||
741 | memset(&tp->ack, 0, sizeof(tp->ack)); | ||
742 | } | ||
743 | 351 | ||
744 | static inline void tcp_clear_options(struct tcp_options_received *rx_opt) | 352 | static inline void tcp_clear_options(struct tcp_options_received *rx_opt) |
745 | { | 353 | { |
@@ -755,10 +363,9 @@ enum tcp_tw_status | |||
755 | }; | 363 | }; |
756 | 364 | ||
757 | 365 | ||
758 | extern enum tcp_tw_status tcp_timewait_state_process(struct tcp_tw_bucket *tw, | 366 | extern enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw, |
759 | struct sk_buff *skb, | 367 | struct sk_buff *skb, |
760 | struct tcphdr *th, | 368 | const struct tcphdr *th); |
761 | unsigned len); | ||
762 | 369 | ||
763 | extern struct sock * tcp_check_req(struct sock *sk,struct sk_buff *skb, | 370 | extern struct sock * tcp_check_req(struct sock *sk,struct sk_buff *skb, |
764 | struct request_sock *req, | 371 | struct request_sock *req, |
@@ -773,7 +380,6 @@ extern void tcp_update_metrics(struct sock *sk); | |||
773 | 380 | ||
774 | extern void tcp_close(struct sock *sk, | 381 | extern void tcp_close(struct sock *sk, |
775 | long timeout); | 382 | long timeout); |
776 | extern struct sock * tcp_accept(struct sock *sk, int flags, int *err); | ||
777 | extern unsigned int tcp_poll(struct file * file, struct socket *sock, struct poll_table_struct *wait); | 383 | extern unsigned int tcp_poll(struct file * file, struct socket *sock, struct poll_table_struct *wait); |
778 | 384 | ||
779 | extern int tcp_getsockopt(struct sock *sk, int level, | 385 | extern int tcp_getsockopt(struct sock *sk, int level, |
@@ -789,8 +395,6 @@ extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, | |||
789 | size_t len, int nonblock, | 395 | size_t len, int nonblock, |
790 | int flags, int *addr_len); | 396 | int flags, int *addr_len); |
791 | 397 | ||
792 | extern int tcp_listen_start(struct sock *sk); | ||
793 | |||
794 | extern void tcp_parse_options(struct sk_buff *skb, | 398 | extern void tcp_parse_options(struct sk_buff *skb, |
795 | struct tcp_options_received *opt_rx, | 399 | struct tcp_options_received *opt_rx, |
796 | int estab); | 400 | int estab); |
@@ -799,11 +403,6 @@ extern void tcp_parse_options(struct sk_buff *skb, | |||
799 | * TCP v4 functions exported for the inet6 API | 403 | * TCP v4 functions exported for the inet6 API |
800 | */ | 404 | */ |
801 | 405 | ||
802 | extern int tcp_v4_rebuild_header(struct sock *sk); | ||
803 | |||
804 | extern int tcp_v4_build_header(struct sock *sk, | ||
805 | struct sk_buff *skb); | ||
806 | |||
807 | extern void tcp_v4_send_check(struct sock *sk, | 406 | extern void tcp_v4_send_check(struct sock *sk, |
808 | struct tcphdr *th, int len, | 407 | struct tcphdr *th, int len, |
809 | struct sk_buff *skb); | 408 | struct sk_buff *skb); |
@@ -872,18 +471,15 @@ extern void tcp_cwnd_application_limited(struct sock *sk); | |||
872 | 471 | ||
873 | /* tcp_timer.c */ | 472 | /* tcp_timer.c */ |
874 | extern void tcp_init_xmit_timers(struct sock *); | 473 | extern void tcp_init_xmit_timers(struct sock *); |
875 | extern void tcp_clear_xmit_timers(struct sock *); | 474 | static inline void tcp_clear_xmit_timers(struct sock *sk) |
475 | { | ||
476 | inet_csk_clear_xmit_timers(sk); | ||
477 | } | ||
876 | 478 | ||
877 | extern void tcp_delete_keepalive_timer(struct sock *); | ||
878 | extern void tcp_reset_keepalive_timer(struct sock *, unsigned long); | ||
879 | extern unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu); | 479 | extern unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu); |
880 | extern unsigned int tcp_current_mss(struct sock *sk, int large); | 480 | extern unsigned int tcp_current_mss(struct sock *sk, int large); |
881 | 481 | ||
882 | #ifdef TCP_DEBUG | 482 | /* tcp.c */ |
883 | extern const char tcp_timer_bug_msg[]; | ||
884 | #endif | ||
885 | |||
886 | /* tcp_diag.c */ | ||
887 | extern void tcp_get_info(struct sock *, struct tcp_info *); | 483 | extern void tcp_get_info(struct sock *, struct tcp_info *); |
888 | 484 | ||
889 | /* Read 'sendfile()'-style from a TCP socket */ | 485 | /* Read 'sendfile()'-style from a TCP socket */ |
@@ -892,72 +488,6 @@ typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *, | |||
892 | extern int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, | 488 | extern int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, |
893 | sk_read_actor_t recv_actor); | 489 | sk_read_actor_t recv_actor); |
894 | 490 | ||
895 | static inline void tcp_clear_xmit_timer(struct sock *sk, int what) | ||
896 | { | ||
897 | struct tcp_sock *tp = tcp_sk(sk); | ||
898 | |||
899 | switch (what) { | ||
900 | case TCP_TIME_RETRANS: | ||
901 | case TCP_TIME_PROBE0: | ||
902 | tp->pending = 0; | ||
903 | |||
904 | #ifdef TCP_CLEAR_TIMERS | ||
905 | sk_stop_timer(sk, &tp->retransmit_timer); | ||
906 | #endif | ||
907 | break; | ||
908 | case TCP_TIME_DACK: | ||
909 | tp->ack.blocked = 0; | ||
910 | tp->ack.pending = 0; | ||
911 | |||
912 | #ifdef TCP_CLEAR_TIMERS | ||
913 | sk_stop_timer(sk, &tp->delack_timer); | ||
914 | #endif | ||
915 | break; | ||
916 | default: | ||
917 | #ifdef TCP_DEBUG | ||
918 | printk(tcp_timer_bug_msg); | ||
919 | #endif | ||
920 | return; | ||
921 | }; | ||
922 | |||
923 | } | ||
924 | |||
925 | /* | ||
926 | * Reset the retransmission timer | ||
927 | */ | ||
928 | static inline void tcp_reset_xmit_timer(struct sock *sk, int what, unsigned long when) | ||
929 | { | ||
930 | struct tcp_sock *tp = tcp_sk(sk); | ||
931 | |||
932 | if (when > TCP_RTO_MAX) { | ||
933 | #ifdef TCP_DEBUG | ||
934 | printk(KERN_DEBUG "reset_xmit_timer sk=%p %d when=0x%lx, caller=%p\n", sk, what, when, current_text_addr()); | ||
935 | #endif | ||
936 | when = TCP_RTO_MAX; | ||
937 | } | ||
938 | |||
939 | switch (what) { | ||
940 | case TCP_TIME_RETRANS: | ||
941 | case TCP_TIME_PROBE0: | ||
942 | tp->pending = what; | ||
943 | tp->timeout = jiffies+when; | ||
944 | sk_reset_timer(sk, &tp->retransmit_timer, tp->timeout); | ||
945 | break; | ||
946 | |||
947 | case TCP_TIME_DACK: | ||
948 | tp->ack.pending |= TCP_ACK_TIMER; | ||
949 | tp->ack.timeout = jiffies+when; | ||
950 | sk_reset_timer(sk, &tp->delack_timer, tp->ack.timeout); | ||
951 | break; | ||
952 | |||
953 | default: | ||
954 | #ifdef TCP_DEBUG | ||
955 | printk(tcp_timer_bug_msg); | ||
956 | #endif | ||
957 | return; | ||
958 | }; | ||
959 | } | ||
960 | |||
961 | /* Initialize RCV_MSS value. | 491 | /* Initialize RCV_MSS value. |
962 | * RCV_MSS is an our guess about MSS used by the peer. | 492 | * RCV_MSS is an our guess about MSS used by the peer. |
963 | * We haven't any direct information about the MSS. | 493 | * We haven't any direct information about the MSS. |
@@ -975,7 +505,7 @@ static inline void tcp_initialize_rcv_mss(struct sock *sk) | |||
975 | hint = min(hint, TCP_MIN_RCVMSS); | 505 | hint = min(hint, TCP_MIN_RCVMSS); |
976 | hint = max(hint, TCP_MIN_MSS); | 506 | hint = max(hint, TCP_MIN_MSS); |
977 | 507 | ||
978 | tp->ack.rcv_mss = hint; | 508 | inet_csk(sk)->icsk_ack.rcv_mss = hint; |
979 | } | 509 | } |
980 | 510 | ||
981 | static __inline__ void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd) | 511 | static __inline__ void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd) |
@@ -1110,7 +640,8 @@ static inline void tcp_packets_out_inc(struct sock *sk, | |||
1110 | 640 | ||
1111 | tp->packets_out += tcp_skb_pcount(skb); | 641 | tp->packets_out += tcp_skb_pcount(skb); |
1112 | if (!orig) | 642 | if (!orig) |
1113 | tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); | 643 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, |
644 | inet_csk(sk)->icsk_rto, TCP_RTO_MAX); | ||
1114 | } | 645 | } |
1115 | 646 | ||
1116 | static inline void tcp_packets_out_dec(struct tcp_sock *tp, | 647 | static inline void tcp_packets_out_dec(struct tcp_sock *tp, |
@@ -1138,29 +669,29 @@ struct tcp_congestion_ops { | |||
1138 | struct list_head list; | 669 | struct list_head list; |
1139 | 670 | ||
1140 | /* initialize private data (optional) */ | 671 | /* initialize private data (optional) */ |
1141 | void (*init)(struct tcp_sock *tp); | 672 | void (*init)(struct sock *sk); |
1142 | /* cleanup private data (optional) */ | 673 | /* cleanup private data (optional) */ |
1143 | void (*release)(struct tcp_sock *tp); | 674 | void (*release)(struct sock *sk); |
1144 | 675 | ||
1145 | /* return slow start threshold (required) */ | 676 | /* return slow start threshold (required) */ |
1146 | u32 (*ssthresh)(struct tcp_sock *tp); | 677 | u32 (*ssthresh)(struct sock *sk); |
1147 | /* lower bound for congestion window (optional) */ | 678 | /* lower bound for congestion window (optional) */ |
1148 | u32 (*min_cwnd)(struct tcp_sock *tp); | 679 | u32 (*min_cwnd)(struct sock *sk); |
1149 | /* do new cwnd calculation (required) */ | 680 | /* do new cwnd calculation (required) */ |
1150 | void (*cong_avoid)(struct tcp_sock *tp, u32 ack, | 681 | void (*cong_avoid)(struct sock *sk, u32 ack, |
1151 | u32 rtt, u32 in_flight, int good_ack); | 682 | u32 rtt, u32 in_flight, int good_ack); |
1152 | /* round trip time sample per acked packet (optional) */ | 683 | /* round trip time sample per acked packet (optional) */ |
1153 | void (*rtt_sample)(struct tcp_sock *tp, u32 usrtt); | 684 | void (*rtt_sample)(struct sock *sk, u32 usrtt); |
1154 | /* call before changing ca_state (optional) */ | 685 | /* call before changing ca_state (optional) */ |
1155 | void (*set_state)(struct tcp_sock *tp, u8 new_state); | 686 | void (*set_state)(struct sock *sk, u8 new_state); |
1156 | /* call when cwnd event occurs (optional) */ | 687 | /* call when cwnd event occurs (optional) */ |
1157 | void (*cwnd_event)(struct tcp_sock *tp, enum tcp_ca_event ev); | 688 | void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev); |
1158 | /* new value of cwnd after loss (optional) */ | 689 | /* new value of cwnd after loss (optional) */ |
1159 | u32 (*undo_cwnd)(struct tcp_sock *tp); | 690 | u32 (*undo_cwnd)(struct sock *sk); |
1160 | /* hook for packet ack accounting (optional) */ | 691 | /* hook for packet ack accounting (optional) */ |
1161 | void (*pkts_acked)(struct tcp_sock *tp, u32 num_acked); | 692 | void (*pkts_acked)(struct sock *sk, u32 num_acked); |
1162 | /* get info for tcp_diag (optional) */ | 693 | /* get info for inet_diag (optional) */ |
1163 | void (*get_info)(struct tcp_sock *tp, u32 ext, struct sk_buff *skb); | 694 | void (*get_info)(struct sock *sk, u32 ext, struct sk_buff *skb); |
1164 | 695 | ||
1165 | char name[TCP_CA_NAME_MAX]; | 696 | char name[TCP_CA_NAME_MAX]; |
1166 | struct module *owner; | 697 | struct module *owner; |
@@ -1169,30 +700,34 @@ struct tcp_congestion_ops { | |||
1169 | extern int tcp_register_congestion_control(struct tcp_congestion_ops *type); | 700 | extern int tcp_register_congestion_control(struct tcp_congestion_ops *type); |
1170 | extern void tcp_unregister_congestion_control(struct tcp_congestion_ops *type); | 701 | extern void tcp_unregister_congestion_control(struct tcp_congestion_ops *type); |
1171 | 702 | ||
1172 | extern void tcp_init_congestion_control(struct tcp_sock *tp); | 703 | extern void tcp_init_congestion_control(struct sock *sk); |
1173 | extern void tcp_cleanup_congestion_control(struct tcp_sock *tp); | 704 | extern void tcp_cleanup_congestion_control(struct sock *sk); |
1174 | extern int tcp_set_default_congestion_control(const char *name); | 705 | extern int tcp_set_default_congestion_control(const char *name); |
1175 | extern void tcp_get_default_congestion_control(char *name); | 706 | extern void tcp_get_default_congestion_control(char *name); |
1176 | extern int tcp_set_congestion_control(struct tcp_sock *tp, const char *name); | 707 | extern int tcp_set_congestion_control(struct sock *sk, const char *name); |
1177 | 708 | ||
1178 | extern struct tcp_congestion_ops tcp_init_congestion_ops; | 709 | extern struct tcp_congestion_ops tcp_init_congestion_ops; |
1179 | extern u32 tcp_reno_ssthresh(struct tcp_sock *tp); | 710 | extern u32 tcp_reno_ssthresh(struct sock *sk); |
1180 | extern void tcp_reno_cong_avoid(struct tcp_sock *tp, u32 ack, | 711 | extern void tcp_reno_cong_avoid(struct sock *sk, u32 ack, |
1181 | u32 rtt, u32 in_flight, int flag); | 712 | u32 rtt, u32 in_flight, int flag); |
1182 | extern u32 tcp_reno_min_cwnd(struct tcp_sock *tp); | 713 | extern u32 tcp_reno_min_cwnd(struct sock *sk); |
1183 | extern struct tcp_congestion_ops tcp_reno; | 714 | extern struct tcp_congestion_ops tcp_reno; |
1184 | 715 | ||
1185 | static inline void tcp_set_ca_state(struct tcp_sock *tp, u8 ca_state) | 716 | static inline void tcp_set_ca_state(struct sock *sk, const u8 ca_state) |
1186 | { | 717 | { |
1187 | if (tp->ca_ops->set_state) | 718 | struct inet_connection_sock *icsk = inet_csk(sk); |
1188 | tp->ca_ops->set_state(tp, ca_state); | 719 | |
1189 | tp->ca_state = ca_state; | 720 | if (icsk->icsk_ca_ops->set_state) |
721 | icsk->icsk_ca_ops->set_state(sk, ca_state); | ||
722 | icsk->icsk_ca_state = ca_state; | ||
1190 | } | 723 | } |
1191 | 724 | ||
1192 | static inline void tcp_ca_event(struct tcp_sock *tp, enum tcp_ca_event event) | 725 | static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event) |
1193 | { | 726 | { |
1194 | if (tp->ca_ops->cwnd_event) | 727 | const struct inet_connection_sock *icsk = inet_csk(sk); |
1195 | tp->ca_ops->cwnd_event(tp, event); | 728 | |
729 | if (icsk->icsk_ca_ops->cwnd_event) | ||
730 | icsk->icsk_ca_ops->cwnd_event(sk, event); | ||
1196 | } | 731 | } |
1197 | 732 | ||
1198 | /* This determines how many packets are "in the network" to the best | 733 | /* This determines how many packets are "in the network" to the best |
@@ -1218,9 +753,10 @@ static __inline__ unsigned int tcp_packets_in_flight(const struct tcp_sock *tp) | |||
1218 | * The exception is rate halving phase, when cwnd is decreasing towards | 753 | * The exception is rate halving phase, when cwnd is decreasing towards |
1219 | * ssthresh. | 754 | * ssthresh. |
1220 | */ | 755 | */ |
1221 | static inline __u32 tcp_current_ssthresh(struct tcp_sock *tp) | 756 | static inline __u32 tcp_current_ssthresh(const struct sock *sk) |
1222 | { | 757 | { |
1223 | if ((1<<tp->ca_state)&(TCPF_CA_CWR|TCPF_CA_Recovery)) | 758 | const struct tcp_sock *tp = tcp_sk(sk); |
759 | if ((1 << inet_csk(sk)->icsk_ca_state) & (TCPF_CA_CWR | TCPF_CA_Recovery)) | ||
1224 | return tp->snd_ssthresh; | 760 | return tp->snd_ssthresh; |
1225 | else | 761 | else |
1226 | return max(tp->snd_ssthresh, | 762 | return max(tp->snd_ssthresh, |
@@ -1237,10 +773,13 @@ static inline void tcp_sync_left_out(struct tcp_sock *tp) | |||
1237 | } | 773 | } |
1238 | 774 | ||
1239 | /* Set slow start threshold and cwnd not falling to slow start */ | 775 | /* Set slow start threshold and cwnd not falling to slow start */ |
1240 | static inline void __tcp_enter_cwr(struct tcp_sock *tp) | 776 | static inline void __tcp_enter_cwr(struct sock *sk) |
1241 | { | 777 | { |
778 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
779 | struct tcp_sock *tp = tcp_sk(sk); | ||
780 | |||
1242 | tp->undo_marker = 0; | 781 | tp->undo_marker = 0; |
1243 | tp->snd_ssthresh = tp->ca_ops->ssthresh(tp); | 782 | tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); |
1244 | tp->snd_cwnd = min(tp->snd_cwnd, | 783 | tp->snd_cwnd = min(tp->snd_cwnd, |
1245 | tcp_packets_in_flight(tp) + 1U); | 784 | tcp_packets_in_flight(tp) + 1U); |
1246 | tp->snd_cwnd_cnt = 0; | 785 | tp->snd_cwnd_cnt = 0; |
@@ -1249,12 +788,14 @@ static inline void __tcp_enter_cwr(struct tcp_sock *tp) | |||
1249 | TCP_ECN_queue_cwr(tp); | 788 | TCP_ECN_queue_cwr(tp); |
1250 | } | 789 | } |
1251 | 790 | ||
1252 | static inline void tcp_enter_cwr(struct tcp_sock *tp) | 791 | static inline void tcp_enter_cwr(struct sock *sk) |
1253 | { | 792 | { |
793 | struct tcp_sock *tp = tcp_sk(sk); | ||
794 | |||
1254 | tp->prior_ssthresh = 0; | 795 | tp->prior_ssthresh = 0; |
1255 | if (tp->ca_state < TCP_CA_CWR) { | 796 | if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { |
1256 | __tcp_enter_cwr(tp); | 797 | __tcp_enter_cwr(sk); |
1257 | tcp_set_ca_state(tp, TCP_CA_CWR); | 798 | tcp_set_ca_state(sk, TCP_CA_CWR); |
1258 | } | 799 | } |
1259 | } | 800 | } |
1260 | 801 | ||
@@ -1277,8 +818,10 @@ static __inline__ void tcp_minshall_update(struct tcp_sock *tp, int mss, | |||
1277 | 818 | ||
1278 | static __inline__ void tcp_check_probe_timer(struct sock *sk, struct tcp_sock *tp) | 819 | static __inline__ void tcp_check_probe_timer(struct sock *sk, struct tcp_sock *tp) |
1279 | { | 820 | { |
1280 | if (!tp->packets_out && !tp->pending) | 821 | const struct inet_connection_sock *icsk = inet_csk(sk); |
1281 | tcp_reset_xmit_timer(sk, TCP_TIME_PROBE0, tp->rto); | 822 | if (!tp->packets_out && !icsk->icsk_pending) |
823 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, | ||
824 | icsk->icsk_rto, TCP_RTO_MAX); | ||
1282 | } | 825 | } |
1283 | 826 | ||
1284 | static __inline__ void tcp_push_pending_frames(struct sock *sk, | 827 | static __inline__ void tcp_push_pending_frames(struct sock *sk, |
@@ -1297,9 +840,6 @@ static __inline__ void tcp_update_wl(struct tcp_sock *tp, u32 ack, u32 seq) | |||
1297 | tp->snd_wl1 = seq; | 840 | tp->snd_wl1 = seq; |
1298 | } | 841 | } |
1299 | 842 | ||
1300 | extern void tcp_destroy_sock(struct sock *sk); | ||
1301 | |||
1302 | |||
1303 | /* | 843 | /* |
1304 | * Calculate(/check) TCP checksum | 844 | * Calculate(/check) TCP checksum |
1305 | */ | 845 | */ |
@@ -1359,8 +899,10 @@ static __inline__ int tcp_prequeue(struct sock *sk, struct sk_buff *skb) | |||
1359 | tp->ucopy.memory = 0; | 899 | tp->ucopy.memory = 0; |
1360 | } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) { | 900 | } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) { |
1361 | wake_up_interruptible(sk->sk_sleep); | 901 | wake_up_interruptible(sk->sk_sleep); |
1362 | if (!tcp_ack_scheduled(tp)) | 902 | if (!inet_csk_ack_scheduled(sk)) |
1363 | tcp_reset_xmit_timer(sk, TCP_TIME_DACK, (3*TCP_RTO_MIN)/4); | 903 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, |
904 | (3 * TCP_RTO_MIN) / 4, | ||
905 | TCP_RTO_MAX); | ||
1364 | } | 906 | } |
1365 | return 1; | 907 | return 1; |
1366 | } | 908 | } |
@@ -1393,9 +935,9 @@ static __inline__ void tcp_set_state(struct sock *sk, int state) | |||
1393 | TCP_INC_STATS(TCP_MIB_ESTABRESETS); | 935 | TCP_INC_STATS(TCP_MIB_ESTABRESETS); |
1394 | 936 | ||
1395 | sk->sk_prot->unhash(sk); | 937 | sk->sk_prot->unhash(sk); |
1396 | if (tcp_sk(sk)->bind_hash && | 938 | if (inet_csk(sk)->icsk_bind_hash && |
1397 | !(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) | 939 | !(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) |
1398 | tcp_put_port(sk); | 940 | inet_put_port(&tcp_hashinfo, sk); |
1399 | /* fall through */ | 941 | /* fall through */ |
1400 | default: | 942 | default: |
1401 | if (oldstate==TCP_ESTABLISHED) | 943 | if (oldstate==TCP_ESTABLISHED) |
@@ -1422,7 +964,7 @@ static __inline__ void tcp_done(struct sock *sk) | |||
1422 | if (!sock_flag(sk, SOCK_DEAD)) | 964 | if (!sock_flag(sk, SOCK_DEAD)) |
1423 | sk->sk_state_change(sk); | 965 | sk->sk_state_change(sk); |
1424 | else | 966 | else |
1425 | tcp_destroy_sock(sk); | 967 | inet_csk_destroy_sock(sk); |
1426 | } | 968 | } |
1427 | 969 | ||
1428 | static __inline__ void tcp_sack_reset(struct tcp_options_received *rx_opt) | 970 | static __inline__ void tcp_sack_reset(struct tcp_options_received *rx_opt) |
@@ -1524,54 +1066,6 @@ static inline int tcp_full_space(const struct sock *sk) | |||
1524 | return tcp_win_from_space(sk->sk_rcvbuf); | 1066 | return tcp_win_from_space(sk->sk_rcvbuf); |
1525 | } | 1067 | } |
1526 | 1068 | ||
1527 | static inline void tcp_acceptq_queue(struct sock *sk, struct request_sock *req, | ||
1528 | struct sock *child) | ||
1529 | { | ||
1530 | reqsk_queue_add(&tcp_sk(sk)->accept_queue, req, sk, child); | ||
1531 | } | ||
1532 | |||
1533 | static inline void | ||
1534 | tcp_synq_removed(struct sock *sk, struct request_sock *req) | ||
1535 | { | ||
1536 | if (reqsk_queue_removed(&tcp_sk(sk)->accept_queue, req) == 0) | ||
1537 | tcp_delete_keepalive_timer(sk); | ||
1538 | } | ||
1539 | |||
1540 | static inline void tcp_synq_added(struct sock *sk) | ||
1541 | { | ||
1542 | if (reqsk_queue_added(&tcp_sk(sk)->accept_queue) == 0) | ||
1543 | tcp_reset_keepalive_timer(sk, TCP_TIMEOUT_INIT); | ||
1544 | } | ||
1545 | |||
1546 | static inline int tcp_synq_len(struct sock *sk) | ||
1547 | { | ||
1548 | return reqsk_queue_len(&tcp_sk(sk)->accept_queue); | ||
1549 | } | ||
1550 | |||
1551 | static inline int tcp_synq_young(struct sock *sk) | ||
1552 | { | ||
1553 | return reqsk_queue_len_young(&tcp_sk(sk)->accept_queue); | ||
1554 | } | ||
1555 | |||
1556 | static inline int tcp_synq_is_full(struct sock *sk) | ||
1557 | { | ||
1558 | return reqsk_queue_is_full(&tcp_sk(sk)->accept_queue); | ||
1559 | } | ||
1560 | |||
1561 | static inline void tcp_synq_unlink(struct tcp_sock *tp, struct request_sock *req, | ||
1562 | struct request_sock **prev) | ||
1563 | { | ||
1564 | reqsk_queue_unlink(&tp->accept_queue, req, prev); | ||
1565 | } | ||
1566 | |||
1567 | static inline void tcp_synq_drop(struct sock *sk, struct request_sock *req, | ||
1568 | struct request_sock **prev) | ||
1569 | { | ||
1570 | tcp_synq_unlink(tcp_sk(sk), req, prev); | ||
1571 | tcp_synq_removed(sk, req); | ||
1572 | reqsk_free(req); | ||
1573 | } | ||
1574 | |||
1575 | static __inline__ void tcp_openreq_init(struct request_sock *req, | 1069 | static __inline__ void tcp_openreq_init(struct request_sock *req, |
1576 | struct tcp_options_received *rx_opt, | 1070 | struct tcp_options_received *rx_opt, |
1577 | struct sk_buff *skb) | 1071 | struct sk_buff *skb) |
@@ -1593,27 +1087,6 @@ static __inline__ void tcp_openreq_init(struct request_sock *req, | |||
1593 | 1087 | ||
1594 | extern void tcp_enter_memory_pressure(void); | 1088 | extern void tcp_enter_memory_pressure(void); |
1595 | 1089 | ||
1596 | extern void tcp_listen_wlock(void); | ||
1597 | |||
1598 | /* - We may sleep inside this lock. | ||
1599 | * - If sleeping is not required (or called from BH), | ||
1600 | * use plain read_(un)lock(&tcp_lhash_lock). | ||
1601 | */ | ||
1602 | |||
1603 | static inline void tcp_listen_lock(void) | ||
1604 | { | ||
1605 | /* read_lock synchronizes to candidates to writers */ | ||
1606 | read_lock(&tcp_lhash_lock); | ||
1607 | atomic_inc(&tcp_lhash_users); | ||
1608 | read_unlock(&tcp_lhash_lock); | ||
1609 | } | ||
1610 | |||
1611 | static inline void tcp_listen_unlock(void) | ||
1612 | { | ||
1613 | if (atomic_dec_and_test(&tcp_lhash_users)) | ||
1614 | wake_up(&tcp_lhash_wait); | ||
1615 | } | ||
1616 | |||
1617 | static inline int keepalive_intvl_when(const struct tcp_sock *tp) | 1090 | static inline int keepalive_intvl_when(const struct tcp_sock *tp) |
1618 | { | 1091 | { |
1619 | return tp->keepalive_intvl ? : sysctl_tcp_keepalive_intvl; | 1092 | return tp->keepalive_intvl ? : sysctl_tcp_keepalive_intvl; |
@@ -1624,12 +1097,13 @@ static inline int keepalive_time_when(const struct tcp_sock *tp) | |||
1624 | return tp->keepalive_time ? : sysctl_tcp_keepalive_time; | 1097 | return tp->keepalive_time ? : sysctl_tcp_keepalive_time; |
1625 | } | 1098 | } |
1626 | 1099 | ||
1627 | static inline int tcp_fin_time(const struct tcp_sock *tp) | 1100 | static inline int tcp_fin_time(const struct sock *sk) |
1628 | { | 1101 | { |
1629 | int fin_timeout = tp->linger2 ? : sysctl_tcp_fin_timeout; | 1102 | int fin_timeout = tcp_sk(sk)->linger2 ? : sysctl_tcp_fin_timeout; |
1103 | const int rto = inet_csk(sk)->icsk_rto; | ||
1630 | 1104 | ||
1631 | if (fin_timeout < (tp->rto<<2) - (tp->rto>>1)) | 1105 | if (fin_timeout < (rto << 2) - (rto >> 1)) |
1632 | fin_timeout = (tp->rto<<2) - (tp->rto>>1); | 1106 | fin_timeout = (rto << 2) - (rto >> 1); |
1633 | 1107 | ||
1634 | return fin_timeout; | 1108 | return fin_timeout; |
1635 | } | 1109 | } |
@@ -1658,15 +1132,6 @@ static inline int tcp_paws_check(const struct tcp_options_received *rx_opt, int | |||
1658 | return 1; | 1132 | return 1; |
1659 | } | 1133 | } |
1660 | 1134 | ||
1661 | static inline void tcp_v4_setup_caps(struct sock *sk, struct dst_entry *dst) | ||
1662 | { | ||
1663 | sk->sk_route_caps = dst->dev->features; | ||
1664 | if (sk->sk_route_caps & NETIF_F_TSO) { | ||
1665 | if (sock_flag(sk, SOCK_NO_LARGESEND) || dst->header_len) | ||
1666 | sk->sk_route_caps &= ~NETIF_F_TSO; | ||
1667 | } | ||
1668 | } | ||
1669 | |||
1670 | #define TCP_CHECK_TIMER(sk) do { } while (0) | 1135 | #define TCP_CHECK_TIMER(sk) do { } while (0) |
1671 | 1136 | ||
1672 | static inline int tcp_use_frto(const struct sock *sk) | 1137 | static inline int tcp_use_frto(const struct sock *sk) |
@@ -1718,4 +1183,16 @@ struct tcp_iter_state { | |||
1718 | extern int tcp_proc_register(struct tcp_seq_afinfo *afinfo); | 1183 | extern int tcp_proc_register(struct tcp_seq_afinfo *afinfo); |
1719 | extern void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo); | 1184 | extern void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo); |
1720 | 1185 | ||
1186 | extern struct request_sock_ops tcp_request_sock_ops; | ||
1187 | |||
1188 | extern int tcp_v4_destroy_sock(struct sock *sk); | ||
1189 | |||
1190 | #ifdef CONFIG_PROC_FS | ||
1191 | extern int tcp4_proc_init(void); | ||
1192 | extern void tcp4_proc_exit(void); | ||
1193 | #endif | ||
1194 | |||
1195 | extern void tcp_v4_init(struct net_proto_family *ops); | ||
1196 | extern void tcp_init(void); | ||
1197 | |||
1721 | #endif /* _TCP_H */ | 1198 | #endif /* _TCP_H */ |
diff --git a/include/net/tcp_ecn.h b/include/net/tcp_ecn.h index 64980ee8c92a..c6b84397448d 100644 --- a/include/net/tcp_ecn.h +++ b/include/net/tcp_ecn.h | |||
@@ -88,7 +88,7 @@ static inline void TCP_ECN_check_ce(struct tcp_sock *tp, struct sk_buff *skb) | |||
88 | * it is surely retransmit. It is not in ECN RFC, | 88 | * it is surely retransmit. It is not in ECN RFC, |
89 | * but Linux follows this rule. */ | 89 | * but Linux follows this rule. */ |
90 | else if (INET_ECN_is_not_ect((TCP_SKB_CB(skb)->flags))) | 90 | else if (INET_ECN_is_not_ect((TCP_SKB_CB(skb)->flags))) |
91 | tcp_enter_quickack_mode(tp); | 91 | tcp_enter_quickack_mode((struct sock *)tp); |
92 | } | 92 | } |
93 | } | 93 | } |
94 | 94 | ||
diff --git a/include/net/tcp_states.h b/include/net/tcp_states.h new file mode 100644 index 000000000000..b9d4176b2d15 --- /dev/null +++ b/include/net/tcp_states.h | |||
@@ -0,0 +1,34 @@ | |||
1 | /* | ||
2 | * INET An implementation of the TCP/IP protocol suite for the LINUX | ||
3 | * operating system. INET is implemented using the BSD Socket | ||
4 | * interface as the means of communication with the user level. | ||
5 | * | ||
6 | * Definitions for the TCP protocol sk_state field. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public License | ||
10 | * as published by the Free Software Foundation; either version | ||
11 | * 2 of the License, or (at your option) any later version. | ||
12 | */ | ||
13 | #ifndef _LINUX_TCP_STATES_H | ||
14 | #define _LINUX_TCP_STATES_H | ||
15 | |||
16 | enum { | ||
17 | TCP_ESTABLISHED = 1, | ||
18 | TCP_SYN_SENT, | ||
19 | TCP_SYN_RECV, | ||
20 | TCP_FIN_WAIT1, | ||
21 | TCP_FIN_WAIT2, | ||
22 | TCP_TIME_WAIT, | ||
23 | TCP_CLOSE, | ||
24 | TCP_CLOSE_WAIT, | ||
25 | TCP_LAST_ACK, | ||
26 | TCP_LISTEN, | ||
27 | TCP_CLOSING, /* Now a valid state */ | ||
28 | |||
29 | TCP_MAX_STATES /* Leave at the end! */ | ||
30 | }; | ||
31 | |||
32 | #define TCP_STATE_MASK 0xF | ||
33 | |||
34 | #endif /* _LINUX_TCP_STATES_H */ | ||
diff --git a/include/net/udp.h b/include/net/udp.h index ac229b761dbc..107b9d791a1f 100644 --- a/include/net/udp.h +++ b/include/net/udp.h | |||
@@ -94,6 +94,11 @@ struct udp_iter_state { | |||
94 | struct seq_operations seq_ops; | 94 | struct seq_operations seq_ops; |
95 | }; | 95 | }; |
96 | 96 | ||
97 | #ifdef CONFIG_PROC_FS | ||
97 | extern int udp_proc_register(struct udp_seq_afinfo *afinfo); | 98 | extern int udp_proc_register(struct udp_seq_afinfo *afinfo); |
98 | extern void udp_proc_unregister(struct udp_seq_afinfo *afinfo); | 99 | extern void udp_proc_unregister(struct udp_seq_afinfo *afinfo); |
100 | |||
101 | extern int udp4_proc_init(void); | ||
102 | extern void udp4_proc_exit(void); | ||
103 | #endif | ||
99 | #endif /* _UDP_H */ | 104 | #endif /* _UDP_H */ |
diff --git a/include/net/x25.h b/include/net/x25.h index 8b39b98876e8..fee62ff8c194 100644 --- a/include/net/x25.h +++ b/include/net/x25.h | |||
@@ -175,7 +175,7 @@ extern void x25_kill_by_neigh(struct x25_neigh *); | |||
175 | 175 | ||
176 | /* x25_dev.c */ | 176 | /* x25_dev.c */ |
177 | extern void x25_send_frame(struct sk_buff *, struct x25_neigh *); | 177 | extern void x25_send_frame(struct sk_buff *, struct x25_neigh *); |
178 | extern int x25_lapb_receive_frame(struct sk_buff *, struct net_device *, struct packet_type *); | 178 | extern int x25_lapb_receive_frame(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); |
179 | extern void x25_establish_link(struct x25_neigh *); | 179 | extern void x25_establish_link(struct x25_neigh *); |
180 | extern void x25_terminate_link(struct x25_neigh *); | 180 | extern void x25_terminate_link(struct x25_neigh *); |
181 | 181 | ||
diff --git a/include/net/x25device.h b/include/net/x25device.h index d45ae883bd1d..1a318374faef 100644 --- a/include/net/x25device.h +++ b/include/net/x25device.h | |||
@@ -8,7 +8,6 @@ | |||
8 | static inline __be16 x25_type_trans(struct sk_buff *skb, struct net_device *dev) | 8 | static inline __be16 x25_type_trans(struct sk_buff *skb, struct net_device *dev) |
9 | { | 9 | { |
10 | skb->mac.raw = skb->data; | 10 | skb->mac.raw = skb->data; |
11 | skb->input_dev = skb->dev = dev; | ||
12 | skb->pkt_type = PACKET_HOST; | 11 | skb->pkt_type = PACKET_HOST; |
13 | 12 | ||
14 | return htons(ETH_P_X25); | 13 | return htons(ETH_P_X25); |
diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 868ef88ef971..a9d0d8c5dfbf 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h | |||
@@ -818,7 +818,6 @@ extern void xfrm6_init(void); | |||
818 | extern void xfrm6_fini(void); | 818 | extern void xfrm6_fini(void); |
819 | extern void xfrm_state_init(void); | 819 | extern void xfrm_state_init(void); |
820 | extern void xfrm4_state_init(void); | 820 | extern void xfrm4_state_init(void); |
821 | extern void xfrm4_state_fini(void); | ||
822 | extern void xfrm6_state_init(void); | 821 | extern void xfrm6_state_init(void); |
823 | extern void xfrm6_state_fini(void); | 822 | extern void xfrm6_state_fini(void); |
824 | 823 | ||
diff --git a/init/main.c b/init/main.c index c9c311cf1771..ff410063e4e1 100644 --- a/init/main.c +++ b/init/main.c | |||
@@ -47,6 +47,7 @@ | |||
47 | #include <linux/rmap.h> | 47 | #include <linux/rmap.h> |
48 | #include <linux/mempolicy.h> | 48 | #include <linux/mempolicy.h> |
49 | #include <linux/key.h> | 49 | #include <linux/key.h> |
50 | #include <net/sock.h> | ||
50 | 51 | ||
51 | #include <asm/io.h> | 52 | #include <asm/io.h> |
52 | #include <asm/bugs.h> | 53 | #include <asm/bugs.h> |
@@ -80,7 +81,6 @@ | |||
80 | static int init(void *); | 81 | static int init(void *); |
81 | 82 | ||
82 | extern void init_IRQ(void); | 83 | extern void init_IRQ(void); |
83 | extern void sock_init(void); | ||
84 | extern void fork_init(unsigned long); | 84 | extern void fork_init(unsigned long); |
85 | extern void mca_init(void); | 85 | extern void mca_init(void); |
86 | extern void sbus_init(void); | 86 | extern void sbus_init(void); |
diff --git a/kernel/audit.c b/kernel/audit.c index ef35166fdc29..7f0699790d46 100644 --- a/kernel/audit.c +++ b/kernel/audit.c | |||
@@ -514,7 +514,8 @@ static int __init audit_init(void) | |||
514 | { | 514 | { |
515 | printk(KERN_INFO "audit: initializing netlink socket (%s)\n", | 515 | printk(KERN_INFO "audit: initializing netlink socket (%s)\n", |
516 | audit_default ? "enabled" : "disabled"); | 516 | audit_default ? "enabled" : "disabled"); |
517 | audit_sock = netlink_kernel_create(NETLINK_AUDIT, audit_receive); | 517 | audit_sock = netlink_kernel_create(NETLINK_AUDIT, 0, audit_receive, |
518 | THIS_MODULE); | ||
518 | if (!audit_sock) | 519 | if (!audit_sock) |
519 | audit_panic("cannot initialize netlink socket"); | 520 | audit_panic("cannot initialize netlink socket"); |
520 | 521 | ||
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 3e0bbee549ea..8e56e2495542 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <linux/smp_lock.h> | 31 | #include <linux/smp_lock.h> |
32 | #include <linux/init.h> | 32 | #include <linux/init.h> |
33 | #include <linux/kernel.h> | 33 | #include <linux/kernel.h> |
34 | #include <linux/net.h> | ||
34 | #include <linux/sysrq.h> | 35 | #include <linux/sysrq.h> |
35 | #include <linux/highuid.h> | 36 | #include <linux/highuid.h> |
36 | #include <linux/writeback.h> | 37 | #include <linux/writeback.h> |
@@ -136,9 +137,6 @@ static struct ctl_table_header root_table_header = | |||
136 | 137 | ||
137 | static ctl_table kern_table[]; | 138 | static ctl_table kern_table[]; |
138 | static ctl_table vm_table[]; | 139 | static ctl_table vm_table[]; |
139 | #ifdef CONFIG_NET | ||
140 | extern ctl_table net_table[]; | ||
141 | #endif | ||
142 | static ctl_table proc_table[]; | 140 | static ctl_table proc_table[]; |
143 | static ctl_table fs_table[]; | 141 | static ctl_table fs_table[]; |
144 | static ctl_table debug_table[]; | 142 | static ctl_table debug_table[]; |
diff --git a/lib/Kconfig b/lib/Kconfig index eeb429a52152..e43197efeb9c 100644 --- a/lib/Kconfig +++ b/lib/Kconfig | |||
@@ -72,6 +72,9 @@ config TEXTSEARCH | |||
72 | config TEXTSEARCH_KMP | 72 | config TEXTSEARCH_KMP |
73 | tristate | 73 | tristate |
74 | 74 | ||
75 | config TEXTSEARCH_BM | ||
76 | tristate | ||
77 | |||
75 | config TEXTSEARCH_FSM | 78 | config TEXTSEARCH_FSM |
76 | tristate | 79 | tristate |
77 | 80 | ||
diff --git a/lib/Makefile b/lib/Makefile index f28d9031303c..52f83380f704 100644 --- a/lib/Makefile +++ b/lib/Makefile | |||
@@ -38,6 +38,7 @@ obj-$(CONFIG_REED_SOLOMON) += reed_solomon/ | |||
38 | 38 | ||
39 | obj-$(CONFIG_TEXTSEARCH) += textsearch.o | 39 | obj-$(CONFIG_TEXTSEARCH) += textsearch.o |
40 | obj-$(CONFIG_TEXTSEARCH_KMP) += ts_kmp.o | 40 | obj-$(CONFIG_TEXTSEARCH_KMP) += ts_kmp.o |
41 | obj-$(CONFIG_TEXTSEARCH_BM) += ts_bm.o | ||
41 | obj-$(CONFIG_TEXTSEARCH_FSM) += ts_fsm.o | 42 | obj-$(CONFIG_TEXTSEARCH_FSM) += ts_fsm.o |
42 | 43 | ||
43 | hostprogs-y := gen_crc32table | 44 | hostprogs-y := gen_crc32table |
diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index 8e49d21057e4..04ca4429ddfa 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c | |||
@@ -93,6 +93,7 @@ static int send_uevent(const char *signal, const char *obj, | |||
93 | } | 93 | } |
94 | } | 94 | } |
95 | 95 | ||
96 | NETLINK_CB(skb).dst_group = 1; | ||
96 | return netlink_broadcast(uevent_sock, skb, 0, 1, gfp_mask); | 97 | return netlink_broadcast(uevent_sock, skb, 0, 1, gfp_mask); |
97 | } | 98 | } |
98 | 99 | ||
@@ -153,7 +154,8 @@ EXPORT_SYMBOL_GPL(kobject_uevent_atomic); | |||
153 | 154 | ||
154 | static int __init kobject_uevent_init(void) | 155 | static int __init kobject_uevent_init(void) |
155 | { | 156 | { |
156 | uevent_sock = netlink_kernel_create(NETLINK_KOBJECT_UEVENT, NULL); | 157 | uevent_sock = netlink_kernel_create(NETLINK_KOBJECT_UEVENT, 1, NULL, |
158 | THIS_MODULE); | ||
157 | 159 | ||
158 | if (!uevent_sock) { | 160 | if (!uevent_sock) { |
159 | printk(KERN_ERR | 161 | printk(KERN_ERR |
diff --git a/lib/ts_bm.c b/lib/ts_bm.c new file mode 100644 index 000000000000..2cc79112ecc3 --- /dev/null +++ b/lib/ts_bm.c | |||
@@ -0,0 +1,185 @@ | |||
1 | /* | ||
2 | * lib/ts_bm.c Boyer-Moore text search implementation | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License | ||
6 | * as published by the Free Software Foundation; either version | ||
7 | * 2 of the License, or (at your option) any later version. | ||
8 | * | ||
9 | * Authors: Pablo Neira Ayuso <pablo@eurodev.net> | ||
10 | * | ||
11 | * ========================================================================== | ||
12 | * | ||
13 | * Implements Boyer-Moore string matching algorithm: | ||
14 | * | ||
15 | * [1] A Fast String Searching Algorithm, R.S. Boyer and Moore. | ||
16 | * Communications of the Association for Computing Machinery, | ||
17 | * 20(10), 1977, pp. 762-772. | ||
18 | * http://www.cs.utexas.edu/users/moore/publications/fstrpos.pdf | ||
19 | * | ||
20 | * [2] Handbook of Exact String Matching Algorithms, Thierry Lecroq, 2004 | ||
21 | * http://www-igm.univ-mlv.fr/~lecroq/string/string.pdf | ||
22 | * | ||
23 | * Note: Since Boyer-Moore (BM) performs searches for matchings from right | ||
24 | * to left, it's still possible that a matching could be spread over | ||
25 | * multiple blocks, in that case this algorithm won't find any coincidence. | ||
26 | * | ||
27 | * If you're willing to ensure that such thing won't ever happen, use the | ||
28 | * Knuth-Pratt-Morris (KMP) implementation instead. In conclusion, choose | ||
29 | * the proper string search algorithm depending on your setting. | ||
30 | * | ||
31 | * Say you're using the textsearch infrastructure for filtering, NIDS or | ||
32 | * any similar security focused purpose, then go KMP. Otherwise, if you | ||
33 | * really care about performance, say you're classifying packets to apply | ||
34 | * Quality of Service (QoS) policies, and you don't mind about possible | ||
35 | * matchings spread over multiple fragments, then go BM. | ||
36 | */ | ||
37 | |||
38 | #include <linux/config.h> | ||
39 | #include <linux/kernel.h> | ||
40 | #include <linux/module.h> | ||
41 | #include <linux/types.h> | ||
42 | #include <linux/string.h> | ||
43 | #include <linux/textsearch.h> | ||
44 | |||
45 | /* Alphabet size, use ASCII */ | ||
46 | #define ASIZE 256 | ||
47 | |||
48 | #if 0 | ||
49 | #define DEBUGP printk | ||
50 | #else | ||
51 | #define DEBUGP(args, format...) | ||
52 | #endif | ||
53 | |||
54 | struct ts_bm | ||
55 | { | ||
56 | u8 * pattern; | ||
57 | unsigned int patlen; | ||
58 | unsigned int bad_shift[ASIZE]; | ||
59 | unsigned int good_shift[0]; | ||
60 | }; | ||
61 | |||
62 | static unsigned int bm_find(struct ts_config *conf, struct ts_state *state) | ||
63 | { | ||
64 | struct ts_bm *bm = ts_config_priv(conf); | ||
65 | unsigned int i, text_len, consumed = state->offset; | ||
66 | const u8 *text; | ||
67 | int shift = bm->patlen, bs; | ||
68 | |||
69 | for (;;) { | ||
70 | text_len = conf->get_next_block(consumed, &text, conf, state); | ||
71 | |||
72 | if (unlikely(text_len == 0)) | ||
73 | break; | ||
74 | |||
75 | while (shift < text_len) { | ||
76 | DEBUGP("Searching in position %d (%c)\n", | ||
77 | shift, text[shift]); | ||
78 | for (i = 0; i < bm->patlen; i++) | ||
79 | if (text[shift-i] != bm->pattern[bm->patlen-1-i]) | ||
80 | goto next; | ||
81 | |||
82 | /* London calling... */ | ||
83 | DEBUGP("found!\n"); | ||
84 | return consumed += (shift-(bm->patlen-1)); | ||
85 | |||
86 | next: bs = bm->bad_shift[text[shift-i]]; | ||
87 | |||
88 | /* Now jumping to... */ | ||
89 | shift = max_t(int, shift-i+bs, shift+bm->good_shift[i]); | ||
90 | } | ||
91 | consumed += text_len; | ||
92 | } | ||
93 | |||
94 | return UINT_MAX; | ||
95 | } | ||
96 | |||
97 | static void compute_prefix_tbl(struct ts_bm *bm, const u8 *pattern, | ||
98 | unsigned int len) | ||
99 | { | ||
100 | int i, j, ended, l[ASIZE]; | ||
101 | |||
102 | for (i = 0; i < ASIZE; i++) | ||
103 | bm->bad_shift[i] = len; | ||
104 | for (i = 0; i < len - 1; i++) | ||
105 | bm->bad_shift[pattern[i]] = len - 1 - i; | ||
106 | |||
107 | /* Compute the good shift array, used to match reocurrences | ||
108 | * of a subpattern */ | ||
109 | for (i = 1; i < bm->patlen; i++) { | ||
110 | for (j = 0; j < bm->patlen && bm->pattern[bm->patlen - 1 - j] | ||
111 | == bm->pattern[bm->patlen - 1 - i - j]; j++); | ||
112 | l[i] = j; | ||
113 | } | ||
114 | |||
115 | bm->good_shift[0] = 1; | ||
116 | for (i = 1; i < bm->patlen; i++) | ||
117 | bm->good_shift[i] = bm->patlen; | ||
118 | for (i = bm->patlen - 1; i > 0; i--) | ||
119 | bm->good_shift[l[i]] = i; | ||
120 | ended = 0; | ||
121 | for (i = 0; i < bm->patlen; i++) { | ||
122 | if (l[i] == bm->patlen - 1 - i) | ||
123 | ended = i; | ||
124 | if (ended) | ||
125 | bm->good_shift[i] = ended; | ||
126 | } | ||
127 | } | ||
128 | |||
129 | static struct ts_config *bm_init(const void *pattern, unsigned int len, | ||
130 | int gfp_mask) | ||
131 | { | ||
132 | struct ts_config *conf; | ||
133 | struct ts_bm *bm; | ||
134 | unsigned int prefix_tbl_len = len * sizeof(unsigned int); | ||
135 | size_t priv_size = sizeof(*bm) + len + prefix_tbl_len; | ||
136 | |||
137 | conf = alloc_ts_config(priv_size, gfp_mask); | ||
138 | if (IS_ERR(conf)) | ||
139 | return conf; | ||
140 | |||
141 | bm = ts_config_priv(conf); | ||
142 | bm->patlen = len; | ||
143 | bm->pattern = (u8 *) bm->good_shift + prefix_tbl_len; | ||
144 | compute_prefix_tbl(bm, pattern, len); | ||
145 | memcpy(bm->pattern, pattern, len); | ||
146 | |||
147 | return conf; | ||
148 | } | ||
149 | |||
150 | static void *bm_get_pattern(struct ts_config *conf) | ||
151 | { | ||
152 | struct ts_bm *bm = ts_config_priv(conf); | ||
153 | return bm->pattern; | ||
154 | } | ||
155 | |||
156 | static unsigned int bm_get_pattern_len(struct ts_config *conf) | ||
157 | { | ||
158 | struct ts_bm *bm = ts_config_priv(conf); | ||
159 | return bm->patlen; | ||
160 | } | ||
161 | |||
162 | static struct ts_ops bm_ops = { | ||
163 | .name = "bm", | ||
164 | .find = bm_find, | ||
165 | .init = bm_init, | ||
166 | .get_pattern = bm_get_pattern, | ||
167 | .get_pattern_len = bm_get_pattern_len, | ||
168 | .owner = THIS_MODULE, | ||
169 | .list = LIST_HEAD_INIT(bm_ops.list) | ||
170 | }; | ||
171 | |||
172 | static int __init init_bm(void) | ||
173 | { | ||
174 | return textsearch_register(&bm_ops); | ||
175 | } | ||
176 | |||
177 | static void __exit exit_bm(void) | ||
178 | { | ||
179 | textsearch_unregister(&bm_ops); | ||
180 | } | ||
181 | |||
182 | MODULE_LICENSE("GPL"); | ||
183 | |||
184 | module_init(init_bm); | ||
185 | module_exit(exit_bm); | ||
diff --git a/mm/memory.c b/mm/memory.c index e046b7e4b530..a596c1172248 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -498,6 +498,17 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
498 | unsigned long addr = vma->vm_start; | 498 | unsigned long addr = vma->vm_start; |
499 | unsigned long end = vma->vm_end; | 499 | unsigned long end = vma->vm_end; |
500 | 500 | ||
501 | /* | ||
502 | * Don't copy ptes where a page fault will fill them correctly. | ||
503 | * Fork becomes much lighter when there are big shared or private | ||
504 | * readonly mappings. The tradeoff is that copy_page_range is more | ||
505 | * efficient than faulting. | ||
506 | */ | ||
507 | if (!(vma->vm_flags & (VM_HUGETLB|VM_NONLINEAR|VM_RESERVED))) { | ||
508 | if (!vma->anon_vma) | ||
509 | return 0; | ||
510 | } | ||
511 | |||
501 | if (is_vm_hugetlb_page(vma)) | 512 | if (is_vm_hugetlb_page(vma)) |
502 | return copy_hugetlb_page_range(dst_mm, src_mm, vma); | 513 | return copy_hugetlb_page_range(dst_mm, src_mm, vma); |
503 | 514 | ||
diff --git a/net/802/fc.c b/net/802/fc.c index 640d34e026c2..282c4ab1abe6 100644 --- a/net/802/fc.c +++ b/net/802/fc.c | |||
@@ -87,7 +87,7 @@ static int fc_rebuild_header(struct sk_buff *skb) | |||
87 | struct fch_hdr *fch=(struct fch_hdr *)skb->data; | 87 | struct fch_hdr *fch=(struct fch_hdr *)skb->data; |
88 | struct fcllc *fcllc=(struct fcllc *)(skb->data+sizeof(struct fch_hdr)); | 88 | struct fcllc *fcllc=(struct fcllc *)(skb->data+sizeof(struct fch_hdr)); |
89 | if(fcllc->ethertype != htons(ETH_P_IP)) { | 89 | if(fcllc->ethertype != htons(ETH_P_IP)) { |
90 | printk("fc_rebuild_header: Don't know how to resolve type %04X addresses ?\n",(unsigned int)htons(fcllc->ethertype)); | 90 | printk("fc_rebuild_header: Don't know how to resolve type %04X addresses ?\n", ntohs(fcllc->ethertype)); |
91 | return 0; | 91 | return 0; |
92 | } | 92 | } |
93 | #ifdef CONFIG_INET | 93 | #ifdef CONFIG_INET |
diff --git a/net/802/fddi.c b/net/802/fddi.c index 5ce24c4bb840..ac242a4bc346 100644 --- a/net/802/fddi.c +++ b/net/802/fddi.c | |||
@@ -108,8 +108,8 @@ static int fddi_rebuild_header(struct sk_buff *skb) | |||
108 | else | 108 | else |
109 | #endif | 109 | #endif |
110 | { | 110 | { |
111 | printk("%s: Don't know how to resolve type %02X addresses.\n", | 111 | printk("%s: Don't know how to resolve type %04X addresses.\n", |
112 | skb->dev->name, htons(fddi->hdr.llc_snap.ethertype)); | 112 | skb->dev->name, ntohs(fddi->hdr.llc_snap.ethertype)); |
113 | return(0); | 113 | return(0); |
114 | } | 114 | } |
115 | } | 115 | } |
diff --git a/net/802/hippi.c b/net/802/hippi.c index 051e8af56a77..6d7fed3dd99a 100644 --- a/net/802/hippi.c +++ b/net/802/hippi.c | |||
@@ -51,6 +51,7 @@ static int hippi_header(struct sk_buff *skb, struct net_device *dev, | |||
51 | unsigned len) | 51 | unsigned len) |
52 | { | 52 | { |
53 | struct hippi_hdr *hip = (struct hippi_hdr *)skb_push(skb, HIPPI_HLEN); | 53 | struct hippi_hdr *hip = (struct hippi_hdr *)skb_push(skb, HIPPI_HLEN); |
54 | struct hippi_cb *hcb = (struct hippi_cb *) skb->cb; | ||
54 | 55 | ||
55 | if (!len){ | 56 | if (!len){ |
56 | len = skb->len - HIPPI_HLEN; | 57 | len = skb->len - HIPPI_HLEN; |
@@ -84,9 +85,10 @@ static int hippi_header(struct sk_buff *skb, struct net_device *dev, | |||
84 | if (daddr) | 85 | if (daddr) |
85 | { | 86 | { |
86 | memcpy(hip->le.dest_switch_addr, daddr + 3, 3); | 87 | memcpy(hip->le.dest_switch_addr, daddr + 3, 3); |
87 | memcpy(&skb->private.ifield, daddr + 2, 4); | 88 | memcpy(&hcb->ifield, daddr + 2, 4); |
88 | return HIPPI_HLEN; | 89 | return HIPPI_HLEN; |
89 | } | 90 | } |
91 | hcb->ifield = 0; | ||
90 | return -((int)HIPPI_HLEN); | 92 | return -((int)HIPPI_HLEN); |
91 | } | 93 | } |
92 | 94 | ||
@@ -122,7 +124,7 @@ static int hippi_rebuild_header(struct sk_buff *skb) | |||
122 | * Determine the packet's protocol ID. | 124 | * Determine the packet's protocol ID. |
123 | */ | 125 | */ |
124 | 126 | ||
125 | unsigned short hippi_type_trans(struct sk_buff *skb, struct net_device *dev) | 127 | __be16 hippi_type_trans(struct sk_buff *skb, struct net_device *dev) |
126 | { | 128 | { |
127 | struct hippi_hdr *hip; | 129 | struct hippi_hdr *hip; |
128 | 130 | ||
diff --git a/net/802/p8022.c b/net/802/p8022.c index 5ae63416df6d..b24817c63ca8 100644 --- a/net/802/p8022.c +++ b/net/802/p8022.c | |||
@@ -35,7 +35,8 @@ static int p8022_request(struct datalink_proto *dl, struct sk_buff *skb, | |||
35 | struct datalink_proto *register_8022_client(unsigned char type, | 35 | struct datalink_proto *register_8022_client(unsigned char type, |
36 | int (*func)(struct sk_buff *skb, | 36 | int (*func)(struct sk_buff *skb, |
37 | struct net_device *dev, | 37 | struct net_device *dev, |
38 | struct packet_type *pt)) | 38 | struct packet_type *pt, |
39 | struct net_device *orig_dev)) | ||
39 | { | 40 | { |
40 | struct datalink_proto *proto; | 41 | struct datalink_proto *proto; |
41 | 42 | ||
diff --git a/net/802/p8023.c b/net/802/p8023.c index a0b61b40225f..6368d3dce444 100644 --- a/net/802/p8023.c +++ b/net/802/p8023.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/skbuff.h> | 20 | #include <linux/skbuff.h> |
21 | 21 | ||
22 | #include <net/datalink.h> | 22 | #include <net/datalink.h> |
23 | #include <net/p8022.h> | ||
23 | 24 | ||
24 | /* | 25 | /* |
25 | * Place an 802.3 header on a packet. The driver will do the mac | 26 | * Place an 802.3 header on a packet. The driver will do the mac |
diff --git a/net/802/psnap.c b/net/802/psnap.c index 1053821ddf93..ab80b1fab53c 100644 --- a/net/802/psnap.c +++ b/net/802/psnap.c | |||
@@ -47,7 +47,7 @@ static struct datalink_proto *find_snap_client(unsigned char *desc) | |||
47 | * A SNAP packet has arrived | 47 | * A SNAP packet has arrived |
48 | */ | 48 | */ |
49 | static int snap_rcv(struct sk_buff *skb, struct net_device *dev, | 49 | static int snap_rcv(struct sk_buff *skb, struct net_device *dev, |
50 | struct packet_type *pt) | 50 | struct packet_type *pt, struct net_device *orig_dev) |
51 | { | 51 | { |
52 | int rc = 1; | 52 | int rc = 1; |
53 | struct datalink_proto *proto; | 53 | struct datalink_proto *proto; |
@@ -61,7 +61,7 @@ static int snap_rcv(struct sk_buff *skb, struct net_device *dev, | |||
61 | /* Pass the frame on. */ | 61 | /* Pass the frame on. */ |
62 | skb->h.raw += 5; | 62 | skb->h.raw += 5; |
63 | skb_pull(skb, 5); | 63 | skb_pull(skb, 5); |
64 | rc = proto->rcvfunc(skb, dev, &snap_packet_type); | 64 | rc = proto->rcvfunc(skb, dev, &snap_packet_type, orig_dev); |
65 | } else { | 65 | } else { |
66 | skb->sk = NULL; | 66 | skb->sk = NULL; |
67 | kfree_skb(skb); | 67 | kfree_skb(skb); |
@@ -118,7 +118,8 @@ module_exit(snap_exit); | |||
118 | struct datalink_proto *register_snap_client(unsigned char *desc, | 118 | struct datalink_proto *register_snap_client(unsigned char *desc, |
119 | int (*rcvfunc)(struct sk_buff *, | 119 | int (*rcvfunc)(struct sk_buff *, |
120 | struct net_device *, | 120 | struct net_device *, |
121 | struct packet_type *)) | 121 | struct packet_type *, |
122 | struct net_device *)) | ||
122 | { | 123 | { |
123 | struct datalink_proto *proto = NULL; | 124 | struct datalink_proto *proto = NULL; |
124 | 125 | ||
diff --git a/net/802/sysctl_net_802.c b/net/802/sysctl_net_802.c index 36079630c49f..700129556c13 100644 --- a/net/802/sysctl_net_802.c +++ b/net/802/sysctl_net_802.c | |||
@@ -10,9 +10,10 @@ | |||
10 | * 2 of the License, or (at your option) any later version. | 10 | * 2 of the License, or (at your option) any later version. |
11 | */ | 11 | */ |
12 | 12 | ||
13 | #include <linux/config.h> | ||
13 | #include <linux/mm.h> | 14 | #include <linux/mm.h> |
15 | #include <linux/if_tr.h> | ||
14 | #include <linux/sysctl.h> | 16 | #include <linux/sysctl.h> |
15 | #include <linux/config.h> | ||
16 | 17 | ||
17 | #ifdef CONFIG_TR | 18 | #ifdef CONFIG_TR |
18 | extern int sysctl_tr_rif_timeout; | 19 | extern int sysctl_tr_rif_timeout; |
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index 508b1fa14546..9ae3a14dd016 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h | |||
@@ -51,7 +51,7 @@ struct net_device *__find_vlan_dev(struct net_device* real_dev, | |||
51 | /* found in vlan_dev.c */ | 51 | /* found in vlan_dev.c */ |
52 | int vlan_dev_rebuild_header(struct sk_buff *skb); | 52 | int vlan_dev_rebuild_header(struct sk_buff *skb); |
53 | int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, | 53 | int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, |
54 | struct packet_type* ptype); | 54 | struct packet_type *ptype, struct net_device *orig_dev); |
55 | int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev, | 55 | int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev, |
56 | unsigned short type, void *daddr, void *saddr, | 56 | unsigned short type, void *daddr, void *saddr, |
57 | unsigned len); | 57 | unsigned len); |
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 49c487413518..145f5cde96cf 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c | |||
@@ -113,7 +113,7 @@ static inline struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb) | |||
113 | * | 113 | * |
114 | */ | 114 | */ |
115 | int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, | 115 | int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, |
116 | struct packet_type* ptype) | 116 | struct packet_type* ptype, struct net_device *orig_dev) |
117 | { | 117 | { |
118 | unsigned char *rawp = NULL; | 118 | unsigned char *rawp = NULL; |
119 | struct vlan_hdr *vhdr = (struct vlan_hdr *)(skb->data); | 119 | struct vlan_hdr *vhdr = (struct vlan_hdr *)(skb->data); |
diff --git a/net/Kconfig b/net/Kconfig index 40a31ba86d2c..c07aafb59a0f 100644 --- a/net/Kconfig +++ b/net/Kconfig | |||
@@ -147,6 +147,7 @@ source "net/bridge/netfilter/Kconfig" | |||
147 | 147 | ||
148 | endif | 148 | endif |
149 | 149 | ||
150 | source "net/dccp/Kconfig" | ||
150 | source "net/sctp/Kconfig" | 151 | source "net/sctp/Kconfig" |
151 | source "net/atm/Kconfig" | 152 | source "net/atm/Kconfig" |
152 | source "net/bridge/Kconfig" | 153 | source "net/bridge/Kconfig" |
@@ -205,6 +206,8 @@ config NET_PKTGEN | |||
205 | To compile this code as a module, choose M here: the | 206 | To compile this code as a module, choose M here: the |
206 | module will be called pktgen. | 207 | module will be called pktgen. |
207 | 208 | ||
209 | source "net/netfilter/Kconfig" | ||
210 | |||
208 | endmenu | 211 | endmenu |
209 | 212 | ||
210 | endmenu | 213 | endmenu |
diff --git a/net/Makefile b/net/Makefile index 8e2bdc025ab8..7e6eff206c81 100644 --- a/net/Makefile +++ b/net/Makefile | |||
@@ -16,6 +16,7 @@ obj-$(CONFIG_NET) += $(tmp-y) | |||
16 | obj-$(CONFIG_LLC) += llc/ | 16 | obj-$(CONFIG_LLC) += llc/ |
17 | obj-$(CONFIG_NET) += ethernet/ 802/ sched/ netlink/ | 17 | obj-$(CONFIG_NET) += ethernet/ 802/ sched/ netlink/ |
18 | obj-$(CONFIG_INET) += ipv4/ | 18 | obj-$(CONFIG_INET) += ipv4/ |
19 | obj-$(CONFIG_NETFILTER) += netfilter/ | ||
19 | obj-$(CONFIG_XFRM) += xfrm/ | 20 | obj-$(CONFIG_XFRM) += xfrm/ |
20 | obj-$(CONFIG_UNIX) += unix/ | 21 | obj-$(CONFIG_UNIX) += unix/ |
21 | ifneq ($(CONFIG_IPV6),) | 22 | ifneq ($(CONFIG_IPV6),) |
@@ -41,6 +42,7 @@ obj-$(CONFIG_ATM) += atm/ | |||
41 | obj-$(CONFIG_DECNET) += decnet/ | 42 | obj-$(CONFIG_DECNET) += decnet/ |
42 | obj-$(CONFIG_ECONET) += econet/ | 43 | obj-$(CONFIG_ECONET) += econet/ |
43 | obj-$(CONFIG_VLAN_8021Q) += 8021q/ | 44 | obj-$(CONFIG_VLAN_8021Q) += 8021q/ |
45 | obj-$(CONFIG_IP_DCCP) += dccp/ | ||
44 | obj-$(CONFIG_IP_SCTP) += sctp/ | 46 | obj-$(CONFIG_IP_SCTP) += sctp/ |
45 | 47 | ||
46 | ifeq ($(CONFIG_NET),y) | 48 | ifeq ($(CONFIG_NET),y) |
diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c index c34614ea5fce..7076097debc2 100644 --- a/net/appletalk/aarp.c +++ b/net/appletalk/aarp.c | |||
@@ -698,7 +698,7 @@ static void __aarp_resolved(struct aarp_entry **list, struct aarp_entry *a, | |||
698 | * frame. We currently only support Ethernet. | 698 | * frame. We currently only support Ethernet. |
699 | */ | 699 | */ |
700 | static int aarp_rcv(struct sk_buff *skb, struct net_device *dev, | 700 | static int aarp_rcv(struct sk_buff *skb, struct net_device *dev, |
701 | struct packet_type *pt) | 701 | struct packet_type *pt, struct net_device *orig_dev) |
702 | { | 702 | { |
703 | struct elapaarp *ea = aarp_hdr(skb); | 703 | struct elapaarp *ea = aarp_hdr(skb); |
704 | int hash, ret = 0; | 704 | int hash, ret = 0; |
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 192b529f86a4..1d31b3a3f1e5 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c | |||
@@ -53,12 +53,12 @@ | |||
53 | 53 | ||
54 | #include <linux/config.h> | 54 | #include <linux/config.h> |
55 | #include <linux/module.h> | 55 | #include <linux/module.h> |
56 | #include <linux/tcp.h> | ||
57 | #include <linux/if_arp.h> | 56 | #include <linux/if_arp.h> |
58 | #include <linux/termios.h> /* For TIOCOUTQ/INQ */ | 57 | #include <linux/termios.h> /* For TIOCOUTQ/INQ */ |
59 | #include <net/datalink.h> | 58 | #include <net/datalink.h> |
60 | #include <net/psnap.h> | 59 | #include <net/psnap.h> |
61 | #include <net/sock.h> | 60 | #include <net/sock.h> |
61 | #include <net/tcp_states.h> | ||
62 | #include <net/route.h> | 62 | #include <net/route.h> |
63 | #include <linux/atalk.h> | 63 | #include <linux/atalk.h> |
64 | 64 | ||
@@ -1390,7 +1390,7 @@ free_it: | |||
1390 | * [ie ARPHRD_ETHERTALK] | 1390 | * [ie ARPHRD_ETHERTALK] |
1391 | */ | 1391 | */ |
1392 | static int atalk_rcv(struct sk_buff *skb, struct net_device *dev, | 1392 | static int atalk_rcv(struct sk_buff *skb, struct net_device *dev, |
1393 | struct packet_type *pt) | 1393 | struct packet_type *pt, struct net_device *orig_dev) |
1394 | { | 1394 | { |
1395 | struct ddpehdr *ddp; | 1395 | struct ddpehdr *ddp; |
1396 | struct sock *sock; | 1396 | struct sock *sock; |
@@ -1482,7 +1482,7 @@ freeit: | |||
1482 | * header and append a long one. | 1482 | * header and append a long one. |
1483 | */ | 1483 | */ |
1484 | static int ltalk_rcv(struct sk_buff *skb, struct net_device *dev, | 1484 | static int ltalk_rcv(struct sk_buff *skb, struct net_device *dev, |
1485 | struct packet_type *pt) | 1485 | struct packet_type *pt, struct net_device *orig_dev) |
1486 | { | 1486 | { |
1487 | /* Expand any short form frames */ | 1487 | /* Expand any short form frames */ |
1488 | if (skb->mac.raw[2] == 1) { | 1488 | if (skb->mac.raw[2] == 1) { |
@@ -1528,7 +1528,7 @@ static int ltalk_rcv(struct sk_buff *skb, struct net_device *dev, | |||
1528 | } | 1528 | } |
1529 | skb->h.raw = skb->data; | 1529 | skb->h.raw = skb->data; |
1530 | 1530 | ||
1531 | return atalk_rcv(skb, dev, pt); | 1531 | return atalk_rcv(skb, dev, pt, orig_dev); |
1532 | freeit: | 1532 | freeit: |
1533 | kfree_skb(skb); | 1533 | kfree_skb(skb); |
1534 | return 0; | 1534 | return 0; |
diff --git a/net/atm/ipcommon.c b/net/atm/ipcommon.c index 181a3002d8ad..4b1faca5013f 100644 --- a/net/atm/ipcommon.c +++ b/net/atm/ipcommon.c | |||
@@ -34,7 +34,6 @@ | |||
34 | 34 | ||
35 | void skb_migrate(struct sk_buff_head *from,struct sk_buff_head *to) | 35 | void skb_migrate(struct sk_buff_head *from,struct sk_buff_head *to) |
36 | { | 36 | { |
37 | struct sk_buff *skb; | ||
38 | unsigned long flags; | 37 | unsigned long flags; |
39 | struct sk_buff *skb_from = (struct sk_buff *) from; | 38 | struct sk_buff *skb_from = (struct sk_buff *) from; |
40 | struct sk_buff *skb_to = (struct sk_buff *) to; | 39 | struct sk_buff *skb_to = (struct sk_buff *) to; |
@@ -47,8 +46,6 @@ void skb_migrate(struct sk_buff_head *from,struct sk_buff_head *to) | |||
47 | prev->next = skb_to; | 46 | prev->next = skb_to; |
48 | to->prev->next = from->next; | 47 | to->prev->next = from->next; |
49 | to->prev = from->prev; | 48 | to->prev = from->prev; |
50 | for (skb = from->next; skb != skb_to; skb = skb->next) | ||
51 | skb->list = to; | ||
52 | to->qlen += from->qlen; | 49 | to->qlen += from->qlen; |
53 | spin_unlock(&to->lock); | 50 | spin_unlock(&to->lock); |
54 | from->prev = skb_from; | 51 | from->prev = skb_from; |
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index a5c94f11547c..ea43dfb774e2 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c | |||
@@ -45,7 +45,7 @@ | |||
45 | #include <linux/sysctl.h> | 45 | #include <linux/sysctl.h> |
46 | #include <linux/init.h> | 46 | #include <linux/init.h> |
47 | #include <linux/spinlock.h> | 47 | #include <linux/spinlock.h> |
48 | #include <net/tcp.h> | 48 | #include <net/tcp_states.h> |
49 | #include <net/ip.h> | 49 | #include <net/ip.h> |
50 | #include <net/arp.h> | 50 | #include <net/arp.h> |
51 | 51 | ||
diff --git a/net/ax25/ax25_ds_in.c b/net/ax25/ax25_ds_in.c index 8adc0022cf58..edcaa897027c 100644 --- a/net/ax25/ax25_ds_in.c +++ b/net/ax25/ax25_ds_in.c | |||
@@ -22,8 +22,7 @@ | |||
22 | #include <linux/netdevice.h> | 22 | #include <linux/netdevice.h> |
23 | #include <linux/skbuff.h> | 23 | #include <linux/skbuff.h> |
24 | #include <net/sock.h> | 24 | #include <net/sock.h> |
25 | #include <net/ip.h> /* For ip_rcv */ | 25 | #include <net/tcp_states.h> |
26 | #include <net/tcp.h> | ||
27 | #include <asm/uaccess.h> | 26 | #include <asm/uaccess.h> |
28 | #include <asm/system.h> | 27 | #include <asm/system.h> |
29 | #include <linux/fcntl.h> | 28 | #include <linux/fcntl.h> |
diff --git a/net/ax25/ax25_ds_timer.c b/net/ax25/ax25_ds_timer.c index 3a8b67316fc3..061083efc1dc 100644 --- a/net/ax25/ax25_ds_timer.c +++ b/net/ax25/ax25_ds_timer.c | |||
@@ -18,7 +18,7 @@ | |||
18 | #include <linux/string.h> | 18 | #include <linux/string.h> |
19 | #include <linux/sockios.h> | 19 | #include <linux/sockios.h> |
20 | #include <linux/net.h> | 20 | #include <linux/net.h> |
21 | #include <net/tcp.h> | 21 | #include <net/tcp_states.h> |
22 | #include <net/ax25.h> | 22 | #include <net/ax25.h> |
23 | #include <linux/inet.h> | 23 | #include <linux/inet.h> |
24 | #include <linux/netdevice.h> | 24 | #include <linux/netdevice.h> |
diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c index 3dc808fde33f..810c9c76c2e0 100644 --- a/net/ax25/ax25_in.c +++ b/net/ax25/ax25_in.c | |||
@@ -9,7 +9,6 @@ | |||
9 | * Copyright (C) Joerg Reuter DL1BKE (jreuter@yaina.de) | 9 | * Copyright (C) Joerg Reuter DL1BKE (jreuter@yaina.de) |
10 | * Copyright (C) Hans-Joachim Hetscher DD8NE (dd8ne@bnv-bamberg.de) | 10 | * Copyright (C) Hans-Joachim Hetscher DD8NE (dd8ne@bnv-bamberg.de) |
11 | */ | 11 | */ |
12 | #include <linux/config.h> | ||
13 | #include <linux/errno.h> | 12 | #include <linux/errno.h> |
14 | #include <linux/types.h> | 13 | #include <linux/types.h> |
15 | #include <linux/socket.h> | 14 | #include <linux/socket.h> |
@@ -26,9 +25,7 @@ | |||
26 | #include <linux/skbuff.h> | 25 | #include <linux/skbuff.h> |
27 | #include <linux/netfilter.h> | 26 | #include <linux/netfilter.h> |
28 | #include <net/sock.h> | 27 | #include <net/sock.h> |
29 | #include <net/ip.h> /* For ip_rcv */ | 28 | #include <net/tcp_states.h> |
30 | #include <net/tcp.h> | ||
31 | #include <net/arp.h> /* For arp_rcv */ | ||
32 | #include <asm/uaccess.h> | 29 | #include <asm/uaccess.h> |
33 | #include <asm/system.h> | 30 | #include <asm/system.h> |
34 | #include <linux/fcntl.h> | 31 | #include <linux/fcntl.h> |
@@ -114,7 +111,6 @@ int ax25_rx_iframe(ax25_cb *ax25, struct sk_buff *skb) | |||
114 | 111 | ||
115 | pid = *skb->data; | 112 | pid = *skb->data; |
116 | 113 | ||
117 | #ifdef CONFIG_INET | ||
118 | if (pid == AX25_P_IP) { | 114 | if (pid == AX25_P_IP) { |
119 | /* working around a TCP bug to keep additional listeners | 115 | /* working around a TCP bug to keep additional listeners |
120 | * happy. TCP re-uses the buffer and destroys the original | 116 | * happy. TCP re-uses the buffer and destroys the original |
@@ -132,10 +128,9 @@ int ax25_rx_iframe(ax25_cb *ax25, struct sk_buff *skb) | |||
132 | skb->dev = ax25->ax25_dev->dev; | 128 | skb->dev = ax25->ax25_dev->dev; |
133 | skb->pkt_type = PACKET_HOST; | 129 | skb->pkt_type = PACKET_HOST; |
134 | skb->protocol = htons(ETH_P_IP); | 130 | skb->protocol = htons(ETH_P_IP); |
135 | ip_rcv(skb, skb->dev, NULL); /* Wrong ptype */ | 131 | netif_rx(skb); |
136 | return 1; | 132 | return 1; |
137 | } | 133 | } |
138 | #endif | ||
139 | if (pid == AX25_P_SEGMENT) { | 134 | if (pid == AX25_P_SEGMENT) { |
140 | skb_pull(skb, 1); /* Remove PID */ | 135 | skb_pull(skb, 1); /* Remove PID */ |
141 | return ax25_rx_fragment(ax25, skb); | 136 | return ax25_rx_fragment(ax25, skb); |
@@ -250,7 +245,6 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev, | |||
250 | 245 | ||
251 | /* Now we are pointing at the pid byte */ | 246 | /* Now we are pointing at the pid byte */ |
252 | switch (skb->data[1]) { | 247 | switch (skb->data[1]) { |
253 | #ifdef CONFIG_INET | ||
254 | case AX25_P_IP: | 248 | case AX25_P_IP: |
255 | skb_pull(skb,2); /* drop PID/CTRL */ | 249 | skb_pull(skb,2); /* drop PID/CTRL */ |
256 | skb->h.raw = skb->data; | 250 | skb->h.raw = skb->data; |
@@ -258,7 +252,7 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev, | |||
258 | skb->dev = dev; | 252 | skb->dev = dev; |
259 | skb->pkt_type = PACKET_HOST; | 253 | skb->pkt_type = PACKET_HOST; |
260 | skb->protocol = htons(ETH_P_IP); | 254 | skb->protocol = htons(ETH_P_IP); |
261 | ip_rcv(skb, dev, ptype); /* Note ptype here is the wrong one, fix me later */ | 255 | netif_rx(skb); |
262 | break; | 256 | break; |
263 | 257 | ||
264 | case AX25_P_ARP: | 258 | case AX25_P_ARP: |
@@ -268,9 +262,8 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev, | |||
268 | skb->dev = dev; | 262 | skb->dev = dev; |
269 | skb->pkt_type = PACKET_HOST; | 263 | skb->pkt_type = PACKET_HOST; |
270 | skb->protocol = htons(ETH_P_ARP); | 264 | skb->protocol = htons(ETH_P_ARP); |
271 | arp_rcv(skb, dev, ptype); /* Note ptype here is wrong... */ | 265 | netif_rx(skb); |
272 | break; | 266 | break; |
273 | #endif | ||
274 | case AX25_P_TEXT: | 267 | case AX25_P_TEXT: |
275 | /* Now find a suitable dgram socket */ | 268 | /* Now find a suitable dgram socket */ |
276 | sk = ax25_get_socket(&dest, &src, SOCK_DGRAM); | 269 | sk = ax25_get_socket(&dest, &src, SOCK_DGRAM); |
@@ -454,7 +447,7 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev, | |||
454 | * Receive an AX.25 frame via a SLIP interface. | 447 | * Receive an AX.25 frame via a SLIP interface. |
455 | */ | 448 | */ |
456 | int ax25_kiss_rcv(struct sk_buff *skb, struct net_device *dev, | 449 | int ax25_kiss_rcv(struct sk_buff *skb, struct net_device *dev, |
457 | struct packet_type *ptype) | 450 | struct packet_type *ptype, struct net_device *orig_dev) |
458 | { | 451 | { |
459 | skb->sk = NULL; /* Initially we don't know who it's for */ | 452 | skb->sk = NULL; /* Initially we don't know who it's for */ |
460 | skb->destructor = NULL; /* Who initializes this, dammit?! */ | 453 | skb->destructor = NULL; /* Who initializes this, dammit?! */ |
diff --git a/net/ax25/ax25_std_in.c b/net/ax25/ax25_std_in.c index 7131873322c4..f6ed283e9de8 100644 --- a/net/ax25/ax25_std_in.c +++ b/net/ax25/ax25_std_in.c | |||
@@ -29,8 +29,7 @@ | |||
29 | #include <linux/netdevice.h> | 29 | #include <linux/netdevice.h> |
30 | #include <linux/skbuff.h> | 30 | #include <linux/skbuff.h> |
31 | #include <net/sock.h> | 31 | #include <net/sock.h> |
32 | #include <net/ip.h> /* For ip_rcv */ | 32 | #include <net/tcp_states.h> |
33 | #include <net/tcp.h> | ||
34 | #include <asm/uaccess.h> | 33 | #include <asm/uaccess.h> |
35 | #include <asm/system.h> | 34 | #include <asm/system.h> |
36 | #include <linux/fcntl.h> | 35 | #include <linux/fcntl.h> |
diff --git a/net/ax25/ax25_std_timer.c b/net/ax25/ax25_std_timer.c index 066897bc0749..a29c480a4dc1 100644 --- a/net/ax25/ax25_std_timer.c +++ b/net/ax25/ax25_std_timer.c | |||
@@ -24,7 +24,7 @@ | |||
24 | #include <linux/netdevice.h> | 24 | #include <linux/netdevice.h> |
25 | #include <linux/skbuff.h> | 25 | #include <linux/skbuff.h> |
26 | #include <net/sock.h> | 26 | #include <net/sock.h> |
27 | #include <net/tcp.h> | 27 | #include <net/tcp_states.h> |
28 | #include <asm/uaccess.h> | 28 | #include <asm/uaccess.h> |
29 | #include <asm/system.h> | 29 | #include <asm/system.h> |
30 | #include <linux/fcntl.h> | 30 | #include <linux/fcntl.h> |
diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c index 99694b57f6f5..c41dbe5fadee 100644 --- a/net/ax25/ax25_subr.c +++ b/net/ax25/ax25_subr.c | |||
@@ -24,7 +24,7 @@ | |||
24 | #include <linux/netdevice.h> | 24 | #include <linux/netdevice.h> |
25 | #include <linux/skbuff.h> | 25 | #include <linux/skbuff.h> |
26 | #include <net/sock.h> | 26 | #include <net/sock.h> |
27 | #include <net/tcp.h> | 27 | #include <net/tcp_states.h> |
28 | #include <asm/uaccess.h> | 28 | #include <asm/uaccess.h> |
29 | #include <asm/system.h> | 29 | #include <asm/system.h> |
30 | #include <linux/fcntl.h> | 30 | #include <linux/fcntl.h> |
@@ -76,7 +76,7 @@ void ax25_requeue_frames(ax25_cb *ax25) | |||
76 | if (skb_prev == NULL) | 76 | if (skb_prev == NULL) |
77 | skb_queue_head(&ax25->write_queue, skb); | 77 | skb_queue_head(&ax25->write_queue, skb); |
78 | else | 78 | else |
79 | skb_append(skb_prev, skb); | 79 | skb_append(skb_prev, skb, &ax25->write_queue); |
80 | skb_prev = skb; | 80 | skb_prev = skb; |
81 | } | 81 | } |
82 | } | 82 | } |
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index ffa26c10bfe8..55dc42eac92c 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c | |||
@@ -191,7 +191,7 @@ static void hci_init_req(struct hci_dev *hdev, unsigned long opt) | |||
191 | 191 | ||
192 | /* Special commands */ | 192 | /* Special commands */ |
193 | while ((skb = skb_dequeue(&hdev->driver_init))) { | 193 | while ((skb = skb_dequeue(&hdev->driver_init))) { |
194 | skb->pkt_type = HCI_COMMAND_PKT; | 194 | bt_cb(skb)->pkt_type = HCI_COMMAND_PKT; |
195 | skb->dev = (void *) hdev; | 195 | skb->dev = (void *) hdev; |
196 | skb_queue_tail(&hdev->cmd_q, skb); | 196 | skb_queue_tail(&hdev->cmd_q, skb); |
197 | hci_sched_cmd(hdev); | 197 | hci_sched_cmd(hdev); |
@@ -995,11 +995,11 @@ static int hci_send_frame(struct sk_buff *skb) | |||
995 | return -ENODEV; | 995 | return -ENODEV; |
996 | } | 996 | } |
997 | 997 | ||
998 | BT_DBG("%s type %d len %d", hdev->name, skb->pkt_type, skb->len); | 998 | BT_DBG("%s type %d len %d", hdev->name, bt_cb(skb)->pkt_type, skb->len); |
999 | 999 | ||
1000 | if (atomic_read(&hdev->promisc)) { | 1000 | if (atomic_read(&hdev->promisc)) { |
1001 | /* Time stamp */ | 1001 | /* Time stamp */ |
1002 | do_gettimeofday(&skb->stamp); | 1002 | __net_timestamp(skb); |
1003 | 1003 | ||
1004 | hci_send_to_sock(hdev, skb); | 1004 | hci_send_to_sock(hdev, skb); |
1005 | } | 1005 | } |
@@ -1034,7 +1034,7 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 ogf, __u16 ocf, __u32 plen, void *p | |||
1034 | 1034 | ||
1035 | BT_DBG("skb len %d", skb->len); | 1035 | BT_DBG("skb len %d", skb->len); |
1036 | 1036 | ||
1037 | skb->pkt_type = HCI_COMMAND_PKT; | 1037 | bt_cb(skb)->pkt_type = HCI_COMMAND_PKT; |
1038 | skb->dev = (void *) hdev; | 1038 | skb->dev = (void *) hdev; |
1039 | skb_queue_tail(&hdev->cmd_q, skb); | 1039 | skb_queue_tail(&hdev->cmd_q, skb); |
1040 | hci_sched_cmd(hdev); | 1040 | hci_sched_cmd(hdev); |
@@ -1081,7 +1081,7 @@ int hci_send_acl(struct hci_conn *conn, struct sk_buff *skb, __u16 flags) | |||
1081 | BT_DBG("%s conn %p flags 0x%x", hdev->name, conn, flags); | 1081 | BT_DBG("%s conn %p flags 0x%x", hdev->name, conn, flags); |
1082 | 1082 | ||
1083 | skb->dev = (void *) hdev; | 1083 | skb->dev = (void *) hdev; |
1084 | skb->pkt_type = HCI_ACLDATA_PKT; | 1084 | bt_cb(skb)->pkt_type = HCI_ACLDATA_PKT; |
1085 | hci_add_acl_hdr(skb, conn->handle, flags | ACL_START); | 1085 | hci_add_acl_hdr(skb, conn->handle, flags | ACL_START); |
1086 | 1086 | ||
1087 | if (!(list = skb_shinfo(skb)->frag_list)) { | 1087 | if (!(list = skb_shinfo(skb)->frag_list)) { |
@@ -1103,7 +1103,7 @@ int hci_send_acl(struct hci_conn *conn, struct sk_buff *skb, __u16 flags) | |||
1103 | skb = list; list = list->next; | 1103 | skb = list; list = list->next; |
1104 | 1104 | ||
1105 | skb->dev = (void *) hdev; | 1105 | skb->dev = (void *) hdev; |
1106 | skb->pkt_type = HCI_ACLDATA_PKT; | 1106 | bt_cb(skb)->pkt_type = HCI_ACLDATA_PKT; |
1107 | hci_add_acl_hdr(skb, conn->handle, flags | ACL_CONT); | 1107 | hci_add_acl_hdr(skb, conn->handle, flags | ACL_CONT); |
1108 | 1108 | ||
1109 | BT_DBG("%s frag %p len %d", hdev->name, skb, skb->len); | 1109 | BT_DBG("%s frag %p len %d", hdev->name, skb, skb->len); |
@@ -1139,7 +1139,7 @@ int hci_send_sco(struct hci_conn *conn, struct sk_buff *skb) | |||
1139 | memcpy(skb->h.raw, &hdr, HCI_SCO_HDR_SIZE); | 1139 | memcpy(skb->h.raw, &hdr, HCI_SCO_HDR_SIZE); |
1140 | 1140 | ||
1141 | skb->dev = (void *) hdev; | 1141 | skb->dev = (void *) hdev; |
1142 | skb->pkt_type = HCI_SCODATA_PKT; | 1142 | bt_cb(skb)->pkt_type = HCI_SCODATA_PKT; |
1143 | skb_queue_tail(&conn->data_q, skb); | 1143 | skb_queue_tail(&conn->data_q, skb); |
1144 | hci_sched_tx(hdev); | 1144 | hci_sched_tx(hdev); |
1145 | return 0; | 1145 | return 0; |
@@ -1369,7 +1369,7 @@ void hci_rx_task(unsigned long arg) | |||
1369 | 1369 | ||
1370 | if (test_bit(HCI_INIT, &hdev->flags)) { | 1370 | if (test_bit(HCI_INIT, &hdev->flags)) { |
1371 | /* Don't process data packets in this states. */ | 1371 | /* Don't process data packets in this states. */ |
1372 | switch (skb->pkt_type) { | 1372 | switch (bt_cb(skb)->pkt_type) { |
1373 | case HCI_ACLDATA_PKT: | 1373 | case HCI_ACLDATA_PKT: |
1374 | case HCI_SCODATA_PKT: | 1374 | case HCI_SCODATA_PKT: |
1375 | kfree_skb(skb); | 1375 | kfree_skb(skb); |
@@ -1378,7 +1378,7 @@ void hci_rx_task(unsigned long arg) | |||
1378 | } | 1378 | } |
1379 | 1379 | ||
1380 | /* Process frame */ | 1380 | /* Process frame */ |
1381 | switch (skb->pkt_type) { | 1381 | switch (bt_cb(skb)->pkt_type) { |
1382 | case HCI_EVENT_PKT: | 1382 | case HCI_EVENT_PKT: |
1383 | hci_event_packet(hdev, skb); | 1383 | hci_event_packet(hdev, skb); |
1384 | break; | 1384 | break; |
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 46367bd129c3..d6da0939216d 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c | |||
@@ -484,14 +484,18 @@ static inline void hci_inquiry_complete_evt(struct hci_dev *hdev, struct sk_buff | |||
484 | /* Inquiry Result */ | 484 | /* Inquiry Result */ |
485 | static inline void hci_inquiry_result_evt(struct hci_dev *hdev, struct sk_buff *skb) | 485 | static inline void hci_inquiry_result_evt(struct hci_dev *hdev, struct sk_buff *skb) |
486 | { | 486 | { |
487 | struct inquiry_data data; | ||
487 | struct inquiry_info *info = (struct inquiry_info *) (skb->data + 1); | 488 | struct inquiry_info *info = (struct inquiry_info *) (skb->data + 1); |
488 | int num_rsp = *((__u8 *) skb->data); | 489 | int num_rsp = *((__u8 *) skb->data); |
489 | 490 | ||
490 | BT_DBG("%s num_rsp %d", hdev->name, num_rsp); | 491 | BT_DBG("%s num_rsp %d", hdev->name, num_rsp); |
491 | 492 | ||
493 | if (!num_rsp) | ||
494 | return; | ||
495 | |||
492 | hci_dev_lock(hdev); | 496 | hci_dev_lock(hdev); |
497 | |||
493 | for (; num_rsp; num_rsp--) { | 498 | for (; num_rsp; num_rsp--) { |
494 | struct inquiry_data data; | ||
495 | bacpy(&data.bdaddr, &info->bdaddr); | 499 | bacpy(&data.bdaddr, &info->bdaddr); |
496 | data.pscan_rep_mode = info->pscan_rep_mode; | 500 | data.pscan_rep_mode = info->pscan_rep_mode; |
497 | data.pscan_period_mode = info->pscan_period_mode; | 501 | data.pscan_period_mode = info->pscan_period_mode; |
@@ -502,30 +506,55 @@ static inline void hci_inquiry_result_evt(struct hci_dev *hdev, struct sk_buff * | |||
502 | info++; | 506 | info++; |
503 | hci_inquiry_cache_update(hdev, &data); | 507 | hci_inquiry_cache_update(hdev, &data); |
504 | } | 508 | } |
509 | |||
505 | hci_dev_unlock(hdev); | 510 | hci_dev_unlock(hdev); |
506 | } | 511 | } |
507 | 512 | ||
508 | /* Inquiry Result With RSSI */ | 513 | /* Inquiry Result With RSSI */ |
509 | static inline void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, struct sk_buff *skb) | 514 | static inline void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, struct sk_buff *skb) |
510 | { | 515 | { |
511 | struct inquiry_info_with_rssi *info = (struct inquiry_info_with_rssi *) (skb->data + 1); | 516 | struct inquiry_data data; |
512 | int num_rsp = *((__u8 *) skb->data); | 517 | int num_rsp = *((__u8 *) skb->data); |
513 | 518 | ||
514 | BT_DBG("%s num_rsp %d", hdev->name, num_rsp); | 519 | BT_DBG("%s num_rsp %d", hdev->name, num_rsp); |
515 | 520 | ||
521 | if (!num_rsp) | ||
522 | return; | ||
523 | |||
516 | hci_dev_lock(hdev); | 524 | hci_dev_lock(hdev); |
517 | for (; num_rsp; num_rsp--) { | 525 | |
518 | struct inquiry_data data; | 526 | if ((skb->len - 1) / num_rsp != sizeof(struct inquiry_info_with_rssi)) { |
519 | bacpy(&data.bdaddr, &info->bdaddr); | 527 | struct inquiry_info_with_rssi_and_pscan_mode *info = |
520 | data.pscan_rep_mode = info->pscan_rep_mode; | 528 | (struct inquiry_info_with_rssi_and_pscan_mode *) (skb->data + 1); |
521 | data.pscan_period_mode = info->pscan_period_mode; | 529 | |
522 | data.pscan_mode = 0x00; | 530 | for (; num_rsp; num_rsp--) { |
523 | memcpy(data.dev_class, info->dev_class, 3); | 531 | bacpy(&data.bdaddr, &info->bdaddr); |
524 | data.clock_offset = info->clock_offset; | 532 | data.pscan_rep_mode = info->pscan_rep_mode; |
525 | data.rssi = info->rssi; | 533 | data.pscan_period_mode = info->pscan_period_mode; |
526 | info++; | 534 | data.pscan_mode = info->pscan_mode; |
527 | hci_inquiry_cache_update(hdev, &data); | 535 | memcpy(data.dev_class, info->dev_class, 3); |
536 | data.clock_offset = info->clock_offset; | ||
537 | data.rssi = info->rssi; | ||
538 | info++; | ||
539 | hci_inquiry_cache_update(hdev, &data); | ||
540 | } | ||
541 | } else { | ||
542 | struct inquiry_info_with_rssi *info = | ||
543 | (struct inquiry_info_with_rssi *) (skb->data + 1); | ||
544 | |||
545 | for (; num_rsp; num_rsp--) { | ||
546 | bacpy(&data.bdaddr, &info->bdaddr); | ||
547 | data.pscan_rep_mode = info->pscan_rep_mode; | ||
548 | data.pscan_period_mode = info->pscan_period_mode; | ||
549 | data.pscan_mode = 0x00; | ||
550 | memcpy(data.dev_class, info->dev_class, 3); | ||
551 | data.clock_offset = info->clock_offset; | ||
552 | data.rssi = info->rssi; | ||
553 | info++; | ||
554 | hci_inquiry_cache_update(hdev, &data); | ||
555 | } | ||
528 | } | 556 | } |
557 | |||
529 | hci_dev_unlock(hdev); | 558 | hci_dev_unlock(hdev); |
530 | } | 559 | } |
531 | 560 | ||
@@ -865,6 +894,24 @@ static inline void hci_clock_offset_evt(struct hci_dev *hdev, struct sk_buff *sk | |||
865 | hci_dev_unlock(hdev); | 894 | hci_dev_unlock(hdev); |
866 | } | 895 | } |
867 | 896 | ||
897 | /* Page Scan Repetition Mode */ | ||
898 | static inline void hci_pscan_rep_mode_evt(struct hci_dev *hdev, struct sk_buff *skb) | ||
899 | { | ||
900 | struct hci_ev_pscan_rep_mode *ev = (struct hci_ev_pscan_rep_mode *) skb->data; | ||
901 | struct inquiry_entry *ie; | ||
902 | |||
903 | BT_DBG("%s", hdev->name); | ||
904 | |||
905 | hci_dev_lock(hdev); | ||
906 | |||
907 | if ((ie = hci_inquiry_cache_lookup(hdev, &ev->bdaddr))) { | ||
908 | ie->data.pscan_rep_mode = ev->pscan_rep_mode; | ||
909 | ie->timestamp = jiffies; | ||
910 | } | ||
911 | |||
912 | hci_dev_unlock(hdev); | ||
913 | } | ||
914 | |||
868 | void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) | 915 | void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) |
869 | { | 916 | { |
870 | struct hci_event_hdr *hdr = (struct hci_event_hdr *) skb->data; | 917 | struct hci_event_hdr *hdr = (struct hci_event_hdr *) skb->data; |
@@ -937,6 +984,10 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) | |||
937 | hci_clock_offset_evt(hdev, skb); | 984 | hci_clock_offset_evt(hdev, skb); |
938 | break; | 985 | break; |
939 | 986 | ||
987 | case HCI_EV_PSCAN_REP_MODE: | ||
988 | hci_pscan_rep_mode_evt(hdev, skb); | ||
989 | break; | ||
990 | |||
940 | case HCI_EV_CMD_STATUS: | 991 | case HCI_EV_CMD_STATUS: |
941 | cs = (struct hci_ev_cmd_status *) skb->data; | 992 | cs = (struct hci_ev_cmd_status *) skb->data; |
942 | skb_pull(skb, sizeof(cs)); | 993 | skb_pull(skb, sizeof(cs)); |
@@ -1036,9 +1087,9 @@ void hci_si_event(struct hci_dev *hdev, int type, int dlen, void *data) | |||
1036 | memcpy(ev->data, data, dlen); | 1087 | memcpy(ev->data, data, dlen); |
1037 | 1088 | ||
1038 | bt_cb(skb)->incoming = 1; | 1089 | bt_cb(skb)->incoming = 1; |
1039 | do_gettimeofday(&skb->stamp); | 1090 | __net_timestamp(skb); |
1040 | 1091 | ||
1041 | skb->pkt_type = HCI_EVENT_PKT; | 1092 | bt_cb(skb)->pkt_type = HCI_EVENT_PKT; |
1042 | skb->dev = (void *) hdev; | 1093 | skb->dev = (void *) hdev; |
1043 | hci_send_to_sock(hdev, skb); | 1094 | hci_send_to_sock(hdev, skb); |
1044 | kfree_skb(skb); | 1095 | kfree_skb(skb); |
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index ebdcce5e7ca0..32ef7975a139 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c | |||
@@ -110,11 +110,11 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb) | |||
110 | /* Apply filter */ | 110 | /* Apply filter */ |
111 | flt = &hci_pi(sk)->filter; | 111 | flt = &hci_pi(sk)->filter; |
112 | 112 | ||
113 | if (!test_bit((skb->pkt_type == HCI_VENDOR_PKT) ? | 113 | if (!test_bit((bt_cb(skb)->pkt_type == HCI_VENDOR_PKT) ? |
114 | 0 : (skb->pkt_type & HCI_FLT_TYPE_BITS), &flt->type_mask)) | 114 | 0 : (bt_cb(skb)->pkt_type & HCI_FLT_TYPE_BITS), &flt->type_mask)) |
115 | continue; | 115 | continue; |
116 | 116 | ||
117 | if (skb->pkt_type == HCI_EVENT_PKT) { | 117 | if (bt_cb(skb)->pkt_type == HCI_EVENT_PKT) { |
118 | register int evt = (*(__u8 *)skb->data & HCI_FLT_EVENT_BITS); | 118 | register int evt = (*(__u8 *)skb->data & HCI_FLT_EVENT_BITS); |
119 | 119 | ||
120 | if (!hci_test_bit(evt, &flt->event_mask)) | 120 | if (!hci_test_bit(evt, &flt->event_mask)) |
@@ -131,7 +131,7 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb) | |||
131 | continue; | 131 | continue; |
132 | 132 | ||
133 | /* Put type byte before the data */ | 133 | /* Put type byte before the data */ |
134 | memcpy(skb_push(nskb, 1), &nskb->pkt_type, 1); | 134 | memcpy(skb_push(nskb, 1), &bt_cb(nskb)->pkt_type, 1); |
135 | 135 | ||
136 | if (sock_queue_rcv_skb(sk, nskb)) | 136 | if (sock_queue_rcv_skb(sk, nskb)) |
137 | kfree_skb(nskb); | 137 | kfree_skb(nskb); |
@@ -327,11 +327,17 @@ static inline void hci_sock_cmsg(struct sock *sk, struct msghdr *msg, struct sk_ | |||
327 | { | 327 | { |
328 | __u32 mask = hci_pi(sk)->cmsg_mask; | 328 | __u32 mask = hci_pi(sk)->cmsg_mask; |
329 | 329 | ||
330 | if (mask & HCI_CMSG_DIR) | 330 | if (mask & HCI_CMSG_DIR) { |
331 | put_cmsg(msg, SOL_HCI, HCI_CMSG_DIR, sizeof(int), &bt_cb(skb)->incoming); | 331 | int incoming = bt_cb(skb)->incoming; |
332 | put_cmsg(msg, SOL_HCI, HCI_CMSG_DIR, sizeof(incoming), &incoming); | ||
333 | } | ||
334 | |||
335 | if (mask & HCI_CMSG_TSTAMP) { | ||
336 | struct timeval tv; | ||
332 | 337 | ||
333 | if (mask & HCI_CMSG_TSTAMP) | 338 | skb_get_timestamp(skb, &tv); |
334 | put_cmsg(msg, SOL_HCI, HCI_CMSG_TSTAMP, sizeof(skb->stamp), &skb->stamp); | 339 | put_cmsg(msg, SOL_HCI, HCI_CMSG_TSTAMP, sizeof(tv), &tv); |
340 | } | ||
335 | } | 341 | } |
336 | 342 | ||
337 | static int hci_sock_recvmsg(struct kiocb *iocb, struct socket *sock, | 343 | static int hci_sock_recvmsg(struct kiocb *iocb, struct socket *sock, |
@@ -405,11 +411,11 @@ static int hci_sock_sendmsg(struct kiocb *iocb, struct socket *sock, | |||
405 | goto drop; | 411 | goto drop; |
406 | } | 412 | } |
407 | 413 | ||
408 | skb->pkt_type = *((unsigned char *) skb->data); | 414 | bt_cb(skb)->pkt_type = *((unsigned char *) skb->data); |
409 | skb_pull(skb, 1); | 415 | skb_pull(skb, 1); |
410 | skb->dev = (void *) hdev; | 416 | skb->dev = (void *) hdev; |
411 | 417 | ||
412 | if (skb->pkt_type == HCI_COMMAND_PKT) { | 418 | if (bt_cb(skb)->pkt_type == HCI_COMMAND_PKT) { |
413 | u16 opcode = __le16_to_cpu(get_unaligned((u16 *)skb->data)); | 419 | u16 opcode = __le16_to_cpu(get_unaligned((u16 *)skb->data)); |
414 | u16 ogf = hci_opcode_ogf(opcode); | 420 | u16 ogf = hci_opcode_ogf(opcode); |
415 | u16 ocf = hci_opcode_ocf(opcode); | 421 | u16 ocf = hci_opcode_ocf(opcode); |
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index 32fccfb5bfa5..d3d6bc547212 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c | |||
@@ -372,7 +372,7 @@ static struct proto l2cap_proto = { | |||
372 | .obj_size = sizeof(struct l2cap_pinfo) | 372 | .obj_size = sizeof(struct l2cap_pinfo) |
373 | }; | 373 | }; |
374 | 374 | ||
375 | static struct sock *l2cap_sock_alloc(struct socket *sock, int proto, int prio) | 375 | static struct sock *l2cap_sock_alloc(struct socket *sock, int proto, unsigned int __nocast prio) |
376 | { | 376 | { |
377 | struct sock *sk; | 377 | struct sock *sk; |
378 | 378 | ||
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 27bf5047cd33..173f46e8cdae 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c | |||
@@ -21,10 +21,6 @@ | |||
21 | SOFTWARE IS DISCLAIMED. | 21 | SOFTWARE IS DISCLAIMED. |
22 | */ | 22 | */ |
23 | 23 | ||
24 | /* | ||
25 | RPN support - Dirk Husemann <hud@zurich.ibm.com> | ||
26 | */ | ||
27 | |||
28 | /* | 24 | /* |
29 | * Bluetooth RFCOMM core. | 25 | * Bluetooth RFCOMM core. |
30 | * | 26 | * |
@@ -115,10 +111,10 @@ static void rfcomm_session_del(struct rfcomm_session *s); | |||
115 | #define __get_mcc_len(b) ((b & 0xfe) >> 1) | 111 | #define __get_mcc_len(b) ((b & 0xfe) >> 1) |
116 | 112 | ||
117 | /* RPN macros */ | 113 | /* RPN macros */ |
118 | #define __rpn_line_settings(data, stop, parity) ((data & 0x3) | ((stop & 0x1) << 2) | ((parity & 0x3) << 3)) | 114 | #define __rpn_line_settings(data, stop, parity) ((data & 0x3) | ((stop & 0x1) << 2) | ((parity & 0x7) << 3)) |
119 | #define __get_rpn_data_bits(line) ((line) & 0x3) | 115 | #define __get_rpn_data_bits(line) ((line) & 0x3) |
120 | #define __get_rpn_stop_bits(line) (((line) >> 2) & 0x1) | 116 | #define __get_rpn_stop_bits(line) (((line) >> 2) & 0x1) |
121 | #define __get_rpn_parity(line) (((line) >> 3) & 0x3) | 117 | #define __get_rpn_parity(line) (((line) >> 3) & 0x7) |
122 | 118 | ||
123 | static inline void rfcomm_schedule(uint event) | 119 | static inline void rfcomm_schedule(uint event) |
124 | { | 120 | { |
@@ -233,7 +229,7 @@ static void rfcomm_dlc_clear_state(struct rfcomm_dlc *d) | |||
233 | d->rx_credits = RFCOMM_DEFAULT_CREDITS; | 229 | d->rx_credits = RFCOMM_DEFAULT_CREDITS; |
234 | } | 230 | } |
235 | 231 | ||
236 | struct rfcomm_dlc *rfcomm_dlc_alloc(int prio) | 232 | struct rfcomm_dlc *rfcomm_dlc_alloc(unsigned int __nocast prio) |
237 | { | 233 | { |
238 | struct rfcomm_dlc *d = kmalloc(sizeof(*d), prio); | 234 | struct rfcomm_dlc *d = kmalloc(sizeof(*d), prio); |
239 | if (!d) | 235 | if (!d) |
@@ -780,10 +776,10 @@ static int rfcomm_send_pn(struct rfcomm_session *s, int cr, struct rfcomm_dlc *d | |||
780 | return rfcomm_send_frame(s, buf, ptr - buf); | 776 | return rfcomm_send_frame(s, buf, ptr - buf); |
781 | } | 777 | } |
782 | 778 | ||
783 | static int rfcomm_send_rpn(struct rfcomm_session *s, int cr, u8 dlci, | 779 | int rfcomm_send_rpn(struct rfcomm_session *s, int cr, u8 dlci, |
784 | u8 bit_rate, u8 data_bits, u8 stop_bits, | 780 | u8 bit_rate, u8 data_bits, u8 stop_bits, |
785 | u8 parity, u8 flow_ctrl_settings, | 781 | u8 parity, u8 flow_ctrl_settings, |
786 | u8 xon_char, u8 xoff_char, u16 param_mask) | 782 | u8 xon_char, u8 xoff_char, u16 param_mask) |
787 | { | 783 | { |
788 | struct rfcomm_hdr *hdr; | 784 | struct rfcomm_hdr *hdr; |
789 | struct rfcomm_mcc *mcc; | 785 | struct rfcomm_mcc *mcc; |
@@ -791,9 +787,9 @@ static int rfcomm_send_rpn(struct rfcomm_session *s, int cr, u8 dlci, | |||
791 | u8 buf[16], *ptr = buf; | 787 | u8 buf[16], *ptr = buf; |
792 | 788 | ||
793 | BT_DBG("%p cr %d dlci %d bit_r 0x%x data_b 0x%x stop_b 0x%x parity 0x%x" | 789 | BT_DBG("%p cr %d dlci %d bit_r 0x%x data_b 0x%x stop_b 0x%x parity 0x%x" |
794 | "flwc_s 0x%x xon_c 0x%x xoff_c 0x%x p_mask 0x%x", | 790 | " flwc_s 0x%x xon_c 0x%x xoff_c 0x%x p_mask 0x%x", |
795 | s, cr, dlci, bit_rate, data_bits, stop_bits, parity, | 791 | s, cr, dlci, bit_rate, data_bits, stop_bits, parity, |
796 | flow_ctrl_settings, xon_char, xoff_char, param_mask); | 792 | flow_ctrl_settings, xon_char, xoff_char, param_mask); |
797 | 793 | ||
798 | hdr = (void *) ptr; ptr += sizeof(*hdr); | 794 | hdr = (void *) ptr; ptr += sizeof(*hdr); |
799 | hdr->addr = __addr(s->initiator, 0); | 795 | hdr->addr = __addr(s->initiator, 0); |
@@ -1265,16 +1261,16 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_ | |||
1265 | u8 xon_char = 0; | 1261 | u8 xon_char = 0; |
1266 | u8 xoff_char = 0; | 1262 | u8 xoff_char = 0; |
1267 | u16 rpn_mask = RFCOMM_RPN_PM_ALL; | 1263 | u16 rpn_mask = RFCOMM_RPN_PM_ALL; |
1268 | 1264 | ||
1269 | BT_DBG("dlci %d cr %d len 0x%x bitr 0x%x line 0x%x flow 0x%x xonc 0x%x xoffc 0x%x pm 0x%x", | 1265 | BT_DBG("dlci %d cr %d len 0x%x bitr 0x%x line 0x%x flow 0x%x xonc 0x%x xoffc 0x%x pm 0x%x", |
1270 | dlci, cr, len, rpn->bit_rate, rpn->line_settings, rpn->flow_ctrl, | 1266 | dlci, cr, len, rpn->bit_rate, rpn->line_settings, rpn->flow_ctrl, |
1271 | rpn->xon_char, rpn->xoff_char, rpn->param_mask); | 1267 | rpn->xon_char, rpn->xoff_char, rpn->param_mask); |
1272 | 1268 | ||
1273 | if (!cr) | 1269 | if (!cr) |
1274 | return 0; | 1270 | return 0; |
1275 | 1271 | ||
1276 | if (len == 1) { | 1272 | if (len == 1) { |
1277 | /* request: return default setting */ | 1273 | /* This is a request, return default settings */ |
1278 | bit_rate = RFCOMM_RPN_BR_115200; | 1274 | bit_rate = RFCOMM_RPN_BR_115200; |
1279 | data_bits = RFCOMM_RPN_DATA_8; | 1275 | data_bits = RFCOMM_RPN_DATA_8; |
1280 | stop_bits = RFCOMM_RPN_STOP_1; | 1276 | stop_bits = RFCOMM_RPN_STOP_1; |
@@ -1282,11 +1278,12 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_ | |||
1282 | flow_ctrl = RFCOMM_RPN_FLOW_NONE; | 1278 | flow_ctrl = RFCOMM_RPN_FLOW_NONE; |
1283 | xon_char = RFCOMM_RPN_XON_CHAR; | 1279 | xon_char = RFCOMM_RPN_XON_CHAR; |
1284 | xoff_char = RFCOMM_RPN_XOFF_CHAR; | 1280 | xoff_char = RFCOMM_RPN_XOFF_CHAR; |
1285 | |||
1286 | goto rpn_out; | 1281 | goto rpn_out; |
1287 | } | 1282 | } |
1288 | /* check for sane values: ignore/accept bit_rate, 8 bits, 1 stop bit, no parity, | 1283 | |
1289 | no flow control lines, normal XON/XOFF chars */ | 1284 | /* Check for sane values, ignore/accept bit_rate, 8 bits, 1 stop bit, |
1285 | * no parity, no flow control lines, normal XON/XOFF chars */ | ||
1286 | |||
1290 | if (rpn->param_mask & RFCOMM_RPN_PM_BITRATE) { | 1287 | if (rpn->param_mask & RFCOMM_RPN_PM_BITRATE) { |
1291 | bit_rate = rpn->bit_rate; | 1288 | bit_rate = rpn->bit_rate; |
1292 | if (bit_rate != RFCOMM_RPN_BR_115200) { | 1289 | if (bit_rate != RFCOMM_RPN_BR_115200) { |
@@ -1295,6 +1292,7 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_ | |||
1295 | rpn_mask ^= RFCOMM_RPN_PM_BITRATE; | 1292 | rpn_mask ^= RFCOMM_RPN_PM_BITRATE; |
1296 | } | 1293 | } |
1297 | } | 1294 | } |
1295 | |||
1298 | if (rpn->param_mask & RFCOMM_RPN_PM_DATA) { | 1296 | if (rpn->param_mask & RFCOMM_RPN_PM_DATA) { |
1299 | data_bits = __get_rpn_data_bits(rpn->line_settings); | 1297 | data_bits = __get_rpn_data_bits(rpn->line_settings); |
1300 | if (data_bits != RFCOMM_RPN_DATA_8) { | 1298 | if (data_bits != RFCOMM_RPN_DATA_8) { |
@@ -1303,6 +1301,7 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_ | |||
1303 | rpn_mask ^= RFCOMM_RPN_PM_DATA; | 1301 | rpn_mask ^= RFCOMM_RPN_PM_DATA; |
1304 | } | 1302 | } |
1305 | } | 1303 | } |
1304 | |||
1306 | if (rpn->param_mask & RFCOMM_RPN_PM_STOP) { | 1305 | if (rpn->param_mask & RFCOMM_RPN_PM_STOP) { |
1307 | stop_bits = __get_rpn_stop_bits(rpn->line_settings); | 1306 | stop_bits = __get_rpn_stop_bits(rpn->line_settings); |
1308 | if (stop_bits != RFCOMM_RPN_STOP_1) { | 1307 | if (stop_bits != RFCOMM_RPN_STOP_1) { |
@@ -1311,6 +1310,7 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_ | |||
1311 | rpn_mask ^= RFCOMM_RPN_PM_STOP; | 1310 | rpn_mask ^= RFCOMM_RPN_PM_STOP; |
1312 | } | 1311 | } |
1313 | } | 1312 | } |
1313 | |||
1314 | if (rpn->param_mask & RFCOMM_RPN_PM_PARITY) { | 1314 | if (rpn->param_mask & RFCOMM_RPN_PM_PARITY) { |
1315 | parity = __get_rpn_parity(rpn->line_settings); | 1315 | parity = __get_rpn_parity(rpn->line_settings); |
1316 | if (parity != RFCOMM_RPN_PARITY_NONE) { | 1316 | if (parity != RFCOMM_RPN_PARITY_NONE) { |
@@ -1319,6 +1319,7 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_ | |||
1319 | rpn_mask ^= RFCOMM_RPN_PM_PARITY; | 1319 | rpn_mask ^= RFCOMM_RPN_PM_PARITY; |
1320 | } | 1320 | } |
1321 | } | 1321 | } |
1322 | |||
1322 | if (rpn->param_mask & RFCOMM_RPN_PM_FLOW) { | 1323 | if (rpn->param_mask & RFCOMM_RPN_PM_FLOW) { |
1323 | flow_ctrl = rpn->flow_ctrl; | 1324 | flow_ctrl = rpn->flow_ctrl; |
1324 | if (flow_ctrl != RFCOMM_RPN_FLOW_NONE) { | 1325 | if (flow_ctrl != RFCOMM_RPN_FLOW_NONE) { |
@@ -1327,6 +1328,7 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_ | |||
1327 | rpn_mask ^= RFCOMM_RPN_PM_FLOW; | 1328 | rpn_mask ^= RFCOMM_RPN_PM_FLOW; |
1328 | } | 1329 | } |
1329 | } | 1330 | } |
1331 | |||
1330 | if (rpn->param_mask & RFCOMM_RPN_PM_XON) { | 1332 | if (rpn->param_mask & RFCOMM_RPN_PM_XON) { |
1331 | xon_char = rpn->xon_char; | 1333 | xon_char = rpn->xon_char; |
1332 | if (xon_char != RFCOMM_RPN_XON_CHAR) { | 1334 | if (xon_char != RFCOMM_RPN_XON_CHAR) { |
@@ -1335,6 +1337,7 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_ | |||
1335 | rpn_mask ^= RFCOMM_RPN_PM_XON; | 1337 | rpn_mask ^= RFCOMM_RPN_PM_XON; |
1336 | } | 1338 | } |
1337 | } | 1339 | } |
1340 | |||
1338 | if (rpn->param_mask & RFCOMM_RPN_PM_XOFF) { | 1341 | if (rpn->param_mask & RFCOMM_RPN_PM_XOFF) { |
1339 | xoff_char = rpn->xoff_char; | 1342 | xoff_char = rpn->xoff_char; |
1340 | if (xoff_char != RFCOMM_RPN_XOFF_CHAR) { | 1343 | if (xoff_char != RFCOMM_RPN_XOFF_CHAR) { |
@@ -1345,9 +1348,8 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_ | |||
1345 | } | 1348 | } |
1346 | 1349 | ||
1347 | rpn_out: | 1350 | rpn_out: |
1348 | rfcomm_send_rpn(s, 0, dlci, | 1351 | rfcomm_send_rpn(s, 0, dlci, bit_rate, data_bits, stop_bits, |
1349 | bit_rate, data_bits, stop_bits, parity, flow_ctrl, | 1352 | parity, flow_ctrl, xon_char, xoff_char, rpn_mask); |
1350 | xon_char, xoff_char, rpn_mask); | ||
1351 | 1353 | ||
1352 | return 0; | 1354 | return 0; |
1353 | } | 1355 | } |
@@ -1358,14 +1360,13 @@ static int rfcomm_recv_rls(struct rfcomm_session *s, int cr, struct sk_buff *skb | |||
1358 | u8 dlci = __get_dlci(rls->dlci); | 1360 | u8 dlci = __get_dlci(rls->dlci); |
1359 | 1361 | ||
1360 | BT_DBG("dlci %d cr %d status 0x%x", dlci, cr, rls->status); | 1362 | BT_DBG("dlci %d cr %d status 0x%x", dlci, cr, rls->status); |
1361 | 1363 | ||
1362 | if (!cr) | 1364 | if (!cr) |
1363 | return 0; | 1365 | return 0; |
1364 | 1366 | ||
1365 | /* FIXME: We should probably do something with this | 1367 | /* We should probably do something with this information here. But |
1366 | information here. But for now it's sufficient just | 1368 | * for now it's sufficient just to reply -- Bluetooth 1.1 says it's |
1367 | to reply -- Bluetooth 1.1 says it's mandatory to | 1369 | * mandatory to recognise and respond to RLS */ |
1368 | recognise and respond to RLS */ | ||
1369 | 1370 | ||
1370 | rfcomm_send_rls(s, 0, dlci, rls->status); | 1371 | rfcomm_send_rls(s, 0, dlci, rls->status); |
1371 | 1372 | ||
@@ -1381,7 +1382,7 @@ static int rfcomm_recv_msc(struct rfcomm_session *s, int cr, struct sk_buff *skb | |||
1381 | BT_DBG("dlci %d cr %d v24 0x%x", dlci, cr, msc->v24_sig); | 1382 | BT_DBG("dlci %d cr %d v24 0x%x", dlci, cr, msc->v24_sig); |
1382 | 1383 | ||
1383 | d = rfcomm_dlc_get(s, dlci); | 1384 | d = rfcomm_dlc_get(s, dlci); |
1384 | if (!d) | 1385 | if (!d) |
1385 | return 0; | 1386 | return 0; |
1386 | 1387 | ||
1387 | if (cr) { | 1388 | if (cr) { |
@@ -1389,7 +1390,7 @@ static int rfcomm_recv_msc(struct rfcomm_session *s, int cr, struct sk_buff *skb | |||
1389 | set_bit(RFCOMM_TX_THROTTLED, &d->flags); | 1390 | set_bit(RFCOMM_TX_THROTTLED, &d->flags); |
1390 | else | 1391 | else |
1391 | clear_bit(RFCOMM_TX_THROTTLED, &d->flags); | 1392 | clear_bit(RFCOMM_TX_THROTTLED, &d->flags); |
1392 | 1393 | ||
1393 | rfcomm_dlc_lock(d); | 1394 | rfcomm_dlc_lock(d); |
1394 | if (d->modem_status) | 1395 | if (d->modem_status) |
1395 | d->modem_status(d, msc->v24_sig); | 1396 | d->modem_status(d, msc->v24_sig); |
@@ -1398,7 +1399,7 @@ static int rfcomm_recv_msc(struct rfcomm_session *s, int cr, struct sk_buff *skb | |||
1398 | rfcomm_send_msc(s, 0, dlci, msc->v24_sig); | 1399 | rfcomm_send_msc(s, 0, dlci, msc->v24_sig); |
1399 | 1400 | ||
1400 | d->mscex |= RFCOMM_MSCEX_RX; | 1401 | d->mscex |= RFCOMM_MSCEX_RX; |
1401 | } else | 1402 | } else |
1402 | d->mscex |= RFCOMM_MSCEX_TX; | 1403 | d->mscex |= RFCOMM_MSCEX_TX; |
1403 | 1404 | ||
1404 | return 0; | 1405 | return 0; |
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 63a123c5c41b..90e19eb6d3cc 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c | |||
@@ -284,7 +284,7 @@ static struct proto rfcomm_proto = { | |||
284 | .obj_size = sizeof(struct rfcomm_pinfo) | 284 | .obj_size = sizeof(struct rfcomm_pinfo) |
285 | }; | 285 | }; |
286 | 286 | ||
287 | static struct sock *rfcomm_sock_alloc(struct socket *sock, int proto, int prio) | 287 | static struct sock *rfcomm_sock_alloc(struct socket *sock, int proto, unsigned int __nocast prio) |
288 | { | 288 | { |
289 | struct rfcomm_dlc *d; | 289 | struct rfcomm_dlc *d; |
290 | struct sock *sk; | 290 | struct sock *sk; |
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index 6304590fd36a..1bca860a6109 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c | |||
@@ -286,7 +286,7 @@ static inline void rfcomm_set_owner_w(struct sk_buff *skb, struct rfcomm_dev *de | |||
286 | skb->destructor = rfcomm_wfree; | 286 | skb->destructor = rfcomm_wfree; |
287 | } | 287 | } |
288 | 288 | ||
289 | static struct sk_buff *rfcomm_wmalloc(struct rfcomm_dev *dev, unsigned long size, int priority) | 289 | static struct sk_buff *rfcomm_wmalloc(struct rfcomm_dev *dev, unsigned long size, unsigned int __nocast priority) |
290 | { | 290 | { |
291 | if (atomic_read(&dev->wmem_alloc) < rfcomm_room(dev->dlc)) { | 291 | if (atomic_read(&dev->wmem_alloc) < rfcomm_room(dev->dlc)) { |
292 | struct sk_buff *skb = alloc_skb(size, priority); | 292 | struct sk_buff *skb = alloc_skb(size, priority); |
@@ -528,9 +528,14 @@ static void rfcomm_dev_modem_status(struct rfcomm_dlc *dlc, u8 v24_sig) | |||
528 | struct rfcomm_dev *dev = dlc->owner; | 528 | struct rfcomm_dev *dev = dlc->owner; |
529 | if (!dev) | 529 | if (!dev) |
530 | return; | 530 | return; |
531 | 531 | ||
532 | BT_DBG("dlc %p dev %p v24_sig 0x%02x", dlc, dev, v24_sig); | 532 | BT_DBG("dlc %p dev %p v24_sig 0x%02x", dlc, dev, v24_sig); |
533 | 533 | ||
534 | if ((dev->modem_status & TIOCM_CD) && !(v24_sig & RFCOMM_V24_DV)) { | ||
535 | if (dev->tty && !C_CLOCAL(dev->tty)) | ||
536 | tty_hangup(dev->tty); | ||
537 | } | ||
538 | |||
534 | dev->modem_status = | 539 | dev->modem_status = |
535 | ((v24_sig & RFCOMM_V24_RTC) ? (TIOCM_DSR | TIOCM_DTR) : 0) | | 540 | ((v24_sig & RFCOMM_V24_RTC) ? (TIOCM_DSR | TIOCM_DTR) : 0) | |
536 | ((v24_sig & RFCOMM_V24_RTR) ? (TIOCM_RTS | TIOCM_CTS) : 0) | | 541 | ((v24_sig & RFCOMM_V24_RTR) ? (TIOCM_RTS | TIOCM_CTS) : 0) | |
@@ -740,20 +745,143 @@ static int rfcomm_tty_ioctl(struct tty_struct *tty, struct file *filp, unsigned | |||
740 | return -ENOIOCTLCMD; | 745 | return -ENOIOCTLCMD; |
741 | } | 746 | } |
742 | 747 | ||
743 | #define RELEVANT_IFLAG(iflag) (iflag & (IGNBRK|BRKINT|IGNPAR|PARMRK|INPCK)) | ||
744 | |||
745 | static void rfcomm_tty_set_termios(struct tty_struct *tty, struct termios *old) | 748 | static void rfcomm_tty_set_termios(struct tty_struct *tty, struct termios *old) |
746 | { | 749 | { |
747 | BT_DBG("tty %p", tty); | 750 | struct termios *new = (struct termios *) tty->termios; |
751 | int old_baud_rate = tty_termios_baud_rate(old); | ||
752 | int new_baud_rate = tty_termios_baud_rate(new); | ||
748 | 753 | ||
749 | if ((tty->termios->c_cflag == old->c_cflag) && | 754 | u8 baud, data_bits, stop_bits, parity, x_on, x_off; |
750 | (RELEVANT_IFLAG(tty->termios->c_iflag) == RELEVANT_IFLAG(old->c_iflag))) | 755 | u16 changes = 0; |
751 | return; | 756 | |
757 | struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data; | ||
758 | |||
759 | BT_DBG("tty %p termios %p", tty, old); | ||
760 | |||
761 | /* Handle turning off CRTSCTS */ | ||
762 | if ((old->c_cflag & CRTSCTS) && !(new->c_cflag & CRTSCTS)) | ||
763 | BT_DBG("Turning off CRTSCTS unsupported"); | ||
764 | |||
765 | /* Parity on/off and when on, odd/even */ | ||
766 | if (((old->c_cflag & PARENB) != (new->c_cflag & PARENB)) || | ||
767 | ((old->c_cflag & PARODD) != (new->c_cflag & PARODD)) ) { | ||
768 | changes |= RFCOMM_RPN_PM_PARITY; | ||
769 | BT_DBG("Parity change detected."); | ||
770 | } | ||
771 | |||
772 | /* Mark and space parity are not supported! */ | ||
773 | if (new->c_cflag & PARENB) { | ||
774 | if (new->c_cflag & PARODD) { | ||
775 | BT_DBG("Parity is ODD"); | ||
776 | parity = RFCOMM_RPN_PARITY_ODD; | ||
777 | } else { | ||
778 | BT_DBG("Parity is EVEN"); | ||
779 | parity = RFCOMM_RPN_PARITY_EVEN; | ||
780 | } | ||
781 | } else { | ||
782 | BT_DBG("Parity is OFF"); | ||
783 | parity = RFCOMM_RPN_PARITY_NONE; | ||
784 | } | ||
785 | |||
786 | /* Setting the x_on / x_off characters */ | ||
787 | if (old->c_cc[VSTOP] != new->c_cc[VSTOP]) { | ||
788 | BT_DBG("XOFF custom"); | ||
789 | x_on = new->c_cc[VSTOP]; | ||
790 | changes |= RFCOMM_RPN_PM_XON; | ||
791 | } else { | ||
792 | BT_DBG("XOFF default"); | ||
793 | x_on = RFCOMM_RPN_XON_CHAR; | ||
794 | } | ||
795 | |||
796 | if (old->c_cc[VSTART] != new->c_cc[VSTART]) { | ||
797 | BT_DBG("XON custom"); | ||
798 | x_off = new->c_cc[VSTART]; | ||
799 | changes |= RFCOMM_RPN_PM_XOFF; | ||
800 | } else { | ||
801 | BT_DBG("XON default"); | ||
802 | x_off = RFCOMM_RPN_XOFF_CHAR; | ||
803 | } | ||
804 | |||
805 | /* Handle setting of stop bits */ | ||
806 | if ((old->c_cflag & CSTOPB) != (new->c_cflag & CSTOPB)) | ||
807 | changes |= RFCOMM_RPN_PM_STOP; | ||
808 | |||
809 | /* POSIX does not support 1.5 stop bits and RFCOMM does not | ||
810 | * support 2 stop bits. So a request for 2 stop bits gets | ||
811 | * translated to 1.5 stop bits */ | ||
812 | if (new->c_cflag & CSTOPB) { | ||
813 | stop_bits = RFCOMM_RPN_STOP_15; | ||
814 | } else { | ||
815 | stop_bits = RFCOMM_RPN_STOP_1; | ||
816 | } | ||
817 | |||
818 | /* Handle number of data bits [5-8] */ | ||
819 | if ((old->c_cflag & CSIZE) != (new->c_cflag & CSIZE)) | ||
820 | changes |= RFCOMM_RPN_PM_DATA; | ||
821 | |||
822 | switch (new->c_cflag & CSIZE) { | ||
823 | case CS5: | ||
824 | data_bits = RFCOMM_RPN_DATA_5; | ||
825 | break; | ||
826 | case CS6: | ||
827 | data_bits = RFCOMM_RPN_DATA_6; | ||
828 | break; | ||
829 | case CS7: | ||
830 | data_bits = RFCOMM_RPN_DATA_7; | ||
831 | break; | ||
832 | case CS8: | ||
833 | data_bits = RFCOMM_RPN_DATA_8; | ||
834 | break; | ||
835 | default: | ||
836 | data_bits = RFCOMM_RPN_DATA_8; | ||
837 | break; | ||
838 | } | ||
839 | |||
840 | /* Handle baudrate settings */ | ||
841 | if (old_baud_rate != new_baud_rate) | ||
842 | changes |= RFCOMM_RPN_PM_BITRATE; | ||
752 | 843 | ||
753 | /* handle turning off CRTSCTS */ | 844 | switch (new_baud_rate) { |
754 | if ((old->c_cflag & CRTSCTS) && !(tty->termios->c_cflag & CRTSCTS)) { | 845 | case 2400: |
755 | BT_DBG("turning off CRTSCTS"); | 846 | baud = RFCOMM_RPN_BR_2400; |
847 | break; | ||
848 | case 4800: | ||
849 | baud = RFCOMM_RPN_BR_4800; | ||
850 | break; | ||
851 | case 7200: | ||
852 | baud = RFCOMM_RPN_BR_7200; | ||
853 | break; | ||
854 | case 9600: | ||
855 | baud = RFCOMM_RPN_BR_9600; | ||
856 | break; | ||
857 | case 19200: | ||
858 | baud = RFCOMM_RPN_BR_19200; | ||
859 | break; | ||
860 | case 38400: | ||
861 | baud = RFCOMM_RPN_BR_38400; | ||
862 | break; | ||
863 | case 57600: | ||
864 | baud = RFCOMM_RPN_BR_57600; | ||
865 | break; | ||
866 | case 115200: | ||
867 | baud = RFCOMM_RPN_BR_115200; | ||
868 | break; | ||
869 | case 230400: | ||
870 | baud = RFCOMM_RPN_BR_230400; | ||
871 | break; | ||
872 | default: | ||
873 | /* 9600 is standard accordinag to the RFCOMM specification */ | ||
874 | baud = RFCOMM_RPN_BR_9600; | ||
875 | break; | ||
876 | |||
756 | } | 877 | } |
878 | |||
879 | if (changes) | ||
880 | rfcomm_send_rpn(dev->dlc->session, 1, dev->dlc->dlci, baud, | ||
881 | data_bits, stop_bits, parity, | ||
882 | RFCOMM_RPN_FLOW_NONE, x_on, x_off, changes); | ||
883 | |||
884 | return; | ||
757 | } | 885 | } |
758 | 886 | ||
759 | static void rfcomm_tty_throttle(struct tty_struct *tty) | 887 | static void rfcomm_tty_throttle(struct tty_struct *tty) |
@@ -761,7 +889,7 @@ static void rfcomm_tty_throttle(struct tty_struct *tty) | |||
761 | struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data; | 889 | struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data; |
762 | 890 | ||
763 | BT_DBG("tty %p dev %p", tty, dev); | 891 | BT_DBG("tty %p dev %p", tty, dev); |
764 | 892 | ||
765 | rfcomm_dlc_throttle(dev->dlc); | 893 | rfcomm_dlc_throttle(dev->dlc); |
766 | } | 894 | } |
767 | 895 | ||
@@ -770,7 +898,7 @@ static void rfcomm_tty_unthrottle(struct tty_struct *tty) | |||
770 | struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data; | 898 | struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data; |
771 | 899 | ||
772 | BT_DBG("tty %p dev %p", tty, dev); | 900 | BT_DBG("tty %p dev %p", tty, dev); |
773 | 901 | ||
774 | rfcomm_dlc_unthrottle(dev->dlc); | 902 | rfcomm_dlc_unthrottle(dev->dlc); |
775 | } | 903 | } |
776 | 904 | ||
@@ -841,35 +969,35 @@ static int rfcomm_tty_tiocmget(struct tty_struct *tty, struct file *filp) | |||
841 | 969 | ||
842 | static int rfcomm_tty_tiocmset(struct tty_struct *tty, struct file *filp, unsigned int set, unsigned int clear) | 970 | static int rfcomm_tty_tiocmset(struct tty_struct *tty, struct file *filp, unsigned int set, unsigned int clear) |
843 | { | 971 | { |
844 | struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data; | 972 | struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data; |
845 | struct rfcomm_dlc *dlc = dev->dlc; | 973 | struct rfcomm_dlc *dlc = dev->dlc; |
846 | u8 v24_sig; | 974 | u8 v24_sig; |
847 | 975 | ||
848 | BT_DBG("tty %p dev %p set 0x%02x clear 0x%02x", tty, dev, set, clear); | 976 | BT_DBG("tty %p dev %p set 0x%02x clear 0x%02x", tty, dev, set, clear); |
849 | 977 | ||
850 | rfcomm_dlc_get_modem_status(dlc, &v24_sig); | 978 | rfcomm_dlc_get_modem_status(dlc, &v24_sig); |
851 | 979 | ||
852 | if (set & TIOCM_DSR || set & TIOCM_DTR) | 980 | if (set & TIOCM_DSR || set & TIOCM_DTR) |
853 | v24_sig |= RFCOMM_V24_RTC; | 981 | v24_sig |= RFCOMM_V24_RTC; |
854 | if (set & TIOCM_RTS || set & TIOCM_CTS) | 982 | if (set & TIOCM_RTS || set & TIOCM_CTS) |
855 | v24_sig |= RFCOMM_V24_RTR; | 983 | v24_sig |= RFCOMM_V24_RTR; |
856 | if (set & TIOCM_RI) | 984 | if (set & TIOCM_RI) |
857 | v24_sig |= RFCOMM_V24_IC; | 985 | v24_sig |= RFCOMM_V24_IC; |
858 | if (set & TIOCM_CD) | 986 | if (set & TIOCM_CD) |
859 | v24_sig |= RFCOMM_V24_DV; | 987 | v24_sig |= RFCOMM_V24_DV; |
860 | 988 | ||
861 | if (clear & TIOCM_DSR || clear & TIOCM_DTR) | 989 | if (clear & TIOCM_DSR || clear & TIOCM_DTR) |
862 | v24_sig &= ~RFCOMM_V24_RTC; | 990 | v24_sig &= ~RFCOMM_V24_RTC; |
863 | if (clear & TIOCM_RTS || clear & TIOCM_CTS) | 991 | if (clear & TIOCM_RTS || clear & TIOCM_CTS) |
864 | v24_sig &= ~RFCOMM_V24_RTR; | 992 | v24_sig &= ~RFCOMM_V24_RTR; |
865 | if (clear & TIOCM_RI) | 993 | if (clear & TIOCM_RI) |
866 | v24_sig &= ~RFCOMM_V24_IC; | 994 | v24_sig &= ~RFCOMM_V24_IC; |
867 | if (clear & TIOCM_CD) | 995 | if (clear & TIOCM_CD) |
868 | v24_sig &= ~RFCOMM_V24_DV; | 996 | v24_sig &= ~RFCOMM_V24_DV; |
869 | 997 | ||
870 | rfcomm_dlc_set_modem_status(dlc, v24_sig); | 998 | rfcomm_dlc_set_modem_status(dlc, v24_sig); |
871 | 999 | ||
872 | return 0; | 1000 | return 0; |
873 | } | 1001 | } |
874 | 1002 | ||
875 | /* ---- TTY structure ---- */ | 1003 | /* ---- TTY structure ---- */ |
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 746c11fc017e..ce7ab7dfa0b2 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c | |||
@@ -418,7 +418,7 @@ static struct proto sco_proto = { | |||
418 | .obj_size = sizeof(struct sco_pinfo) | 418 | .obj_size = sizeof(struct sco_pinfo) |
419 | }; | 419 | }; |
420 | 420 | ||
421 | static struct sock *sco_sock_alloc(struct socket *sock, int proto, int prio) | 421 | static struct sock *sco_sock_alloc(struct socket *sock, int proto, unsigned int __nocast prio) |
422 | { | 422 | { |
423 | struct sock *sk; | 423 | struct sock *sk; |
424 | 424 | ||
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index e6c2200b7ca3..24396b914d11 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c | |||
@@ -23,7 +23,7 @@ | |||
23 | #include <asm/atomic.h> | 23 | #include <asm/atomic.h> |
24 | #include "br_private.h" | 24 | #include "br_private.h" |
25 | 25 | ||
26 | static kmem_cache_t *br_fdb_cache; | 26 | static kmem_cache_t *br_fdb_cache __read_mostly; |
27 | static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, | 27 | static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, |
28 | const unsigned char *addr); | 28 | const unsigned char *addr); |
29 | 29 | ||
diff --git a/net/bridge/netfilter/ebt_mark.c b/net/bridge/netfilter/ebt_mark.c index 02c632b4d325..c93d35ab95c0 100644 --- a/net/bridge/netfilter/ebt_mark.c +++ b/net/bridge/netfilter/ebt_mark.c | |||
@@ -23,10 +23,9 @@ static int ebt_target_mark(struct sk_buff **pskb, unsigned int hooknr, | |||
23 | { | 23 | { |
24 | struct ebt_mark_t_info *info = (struct ebt_mark_t_info *)data; | 24 | struct ebt_mark_t_info *info = (struct ebt_mark_t_info *)data; |
25 | 25 | ||
26 | if ((*pskb)->nfmark != info->mark) { | 26 | if ((*pskb)->nfmark != info->mark) |
27 | (*pskb)->nfmark = info->mark; | 27 | (*pskb)->nfmark = info->mark; |
28 | (*pskb)->nfcache |= NFC_ALTERED; | 28 | |
29 | } | ||
30 | return info->target; | 29 | return info->target; |
31 | } | 30 | } |
32 | 31 | ||
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index 01af4fcef26d..aae26ae2e61f 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c | |||
@@ -78,8 +78,8 @@ static void ulog_send(unsigned int nlgroup) | |||
78 | if (ub->qlen > 1) | 78 | if (ub->qlen > 1) |
79 | ub->lastnlh->nlmsg_type = NLMSG_DONE; | 79 | ub->lastnlh->nlmsg_type = NLMSG_DONE; |
80 | 80 | ||
81 | NETLINK_CB(ub->skb).dst_groups = 1 << nlgroup; | 81 | NETLINK_CB(ub->skb).dst_group = nlgroup + 1; |
82 | netlink_broadcast(ebtulognl, ub->skb, 0, 1 << nlgroup, GFP_ATOMIC); | 82 | netlink_broadcast(ebtulognl, ub->skb, 0, nlgroup + 1, GFP_ATOMIC); |
83 | 83 | ||
84 | ub->qlen = 0; | 84 | ub->qlen = 0; |
85 | ub->skb = NULL; | 85 | ub->skb = NULL; |
@@ -162,7 +162,7 @@ static void ebt_ulog(const struct sk_buff *skb, unsigned int hooknr, | |||
162 | pm->version = EBT_ULOG_VERSION; | 162 | pm->version = EBT_ULOG_VERSION; |
163 | do_gettimeofday(&pm->stamp); | 163 | do_gettimeofday(&pm->stamp); |
164 | if (ub->qlen == 1) | 164 | if (ub->qlen == 1) |
165 | ub->skb->stamp = pm->stamp; | 165 | skb_set_timestamp(ub->skb, &pm->stamp); |
166 | pm->data_len = copy_len; | 166 | pm->data_len = copy_len; |
167 | pm->mark = skb->nfmark; | 167 | pm->mark = skb->nfmark; |
168 | pm->hook = hooknr; | 168 | pm->hook = hooknr; |
@@ -258,7 +258,8 @@ static int __init init(void) | |||
258 | spin_lock_init(&ulog_buffers[i].lock); | 258 | spin_lock_init(&ulog_buffers[i].lock); |
259 | } | 259 | } |
260 | 260 | ||
261 | ebtulognl = netlink_kernel_create(NETLINK_NFLOG, NULL); | 261 | ebtulognl = netlink_kernel_create(NETLINK_NFLOG, EBT_ULOG_MAXNLGROUPS, |
262 | NULL, THIS_MODULE); | ||
262 | if (!ebtulognl) | 263 | if (!ebtulognl) |
263 | ret = -ENOMEM; | 264 | ret = -ENOMEM; |
264 | else if ((ret = ebt_register_watcher(&ulog))) | 265 | else if ((ret = ebt_register_watcher(&ulog))) |
diff --git a/net/core/Makefile b/net/core/Makefile index f5f5e58943e8..630da0f0579e 100644 --- a/net/core/Makefile +++ b/net/core/Makefile | |||
@@ -12,7 +12,6 @@ obj-y += dev.o ethtool.o dev_mcast.o dst.o \ | |||
12 | 12 | ||
13 | obj-$(CONFIG_XFRM) += flow.o | 13 | obj-$(CONFIG_XFRM) += flow.o |
14 | obj-$(CONFIG_SYSFS) += net-sysfs.o | 14 | obj-$(CONFIG_SYSFS) += net-sysfs.o |
15 | obj-$(CONFIG_NETFILTER) += netfilter.o | ||
16 | obj-$(CONFIG_NET_DIVERT) += dv.o | 15 | obj-$(CONFIG_NET_DIVERT) += dv.o |
17 | obj-$(CONFIG_NET_PKTGEN) += pktgen.o | 16 | obj-$(CONFIG_NET_PKTGEN) += pktgen.o |
18 | obj-$(CONFIG_NET_RADIO) += wireless.o | 17 | obj-$(CONFIG_NET_RADIO) += wireless.o |
diff --git a/net/core/datagram.c b/net/core/datagram.c index fcee054b6f75..da9bf71421a7 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c | |||
@@ -43,7 +43,6 @@ | |||
43 | #include <linux/errno.h> | 43 | #include <linux/errno.h> |
44 | #include <linux/sched.h> | 44 | #include <linux/sched.h> |
45 | #include <linux/inet.h> | 45 | #include <linux/inet.h> |
46 | #include <linux/tcp.h> | ||
47 | #include <linux/netdevice.h> | 46 | #include <linux/netdevice.h> |
48 | #include <linux/rtnetlink.h> | 47 | #include <linux/rtnetlink.h> |
49 | #include <linux/poll.h> | 48 | #include <linux/poll.h> |
@@ -51,9 +50,10 @@ | |||
51 | 50 | ||
52 | #include <net/protocol.h> | 51 | #include <net/protocol.h> |
53 | #include <linux/skbuff.h> | 52 | #include <linux/skbuff.h> |
54 | #include <net/sock.h> | ||
55 | #include <net/checksum.h> | ||
56 | 53 | ||
54 | #include <net/checksum.h> | ||
55 | #include <net/sock.h> | ||
56 | #include <net/tcp_states.h> | ||
57 | 57 | ||
58 | /* | 58 | /* |
59 | * Is a socket 'connection oriented' ? | 59 | * Is a socket 'connection oriented' ? |
diff --git a/net/core/dev.c b/net/core/dev.c index faf59b02c4bf..c01511e3d0c1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -267,10 +267,6 @@ void dev_add_pack(struct packet_type *pt) | |||
267 | spin_unlock_bh(&ptype_lock); | 267 | spin_unlock_bh(&ptype_lock); |
268 | } | 268 | } |
269 | 269 | ||
270 | extern void linkwatch_run_queue(void); | ||
271 | |||
272 | |||
273 | |||
274 | /** | 270 | /** |
275 | * __dev_remove_pack - remove packet handler | 271 | * __dev_remove_pack - remove packet handler |
276 | * @pt: packet type declaration | 272 | * @pt: packet type declaration |
@@ -1009,13 +1005,22 @@ void net_disable_timestamp(void) | |||
1009 | atomic_dec(&netstamp_needed); | 1005 | atomic_dec(&netstamp_needed); |
1010 | } | 1006 | } |
1011 | 1007 | ||
1012 | static inline void net_timestamp(struct timeval *stamp) | 1008 | void __net_timestamp(struct sk_buff *skb) |
1009 | { | ||
1010 | struct timeval tv; | ||
1011 | |||
1012 | do_gettimeofday(&tv); | ||
1013 | skb_set_timestamp(skb, &tv); | ||
1014 | } | ||
1015 | EXPORT_SYMBOL(__net_timestamp); | ||
1016 | |||
1017 | static inline void net_timestamp(struct sk_buff *skb) | ||
1013 | { | 1018 | { |
1014 | if (atomic_read(&netstamp_needed)) | 1019 | if (atomic_read(&netstamp_needed)) |
1015 | do_gettimeofday(stamp); | 1020 | __net_timestamp(skb); |
1016 | else { | 1021 | else { |
1017 | stamp->tv_sec = 0; | 1022 | skb->tstamp.off_sec = 0; |
1018 | stamp->tv_usec = 0; | 1023 | skb->tstamp.off_usec = 0; |
1019 | } | 1024 | } |
1020 | } | 1025 | } |
1021 | 1026 | ||
@@ -1027,7 +1032,8 @@ static inline void net_timestamp(struct timeval *stamp) | |||
1027 | void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) | 1032 | void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) |
1028 | { | 1033 | { |
1029 | struct packet_type *ptype; | 1034 | struct packet_type *ptype; |
1030 | net_timestamp(&skb->stamp); | 1035 | |
1036 | net_timestamp(skb); | ||
1031 | 1037 | ||
1032 | rcu_read_lock(); | 1038 | rcu_read_lock(); |
1033 | list_for_each_entry_rcu(ptype, &ptype_all, list) { | 1039 | list_for_each_entry_rcu(ptype, &ptype_all, list) { |
@@ -1058,7 +1064,7 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) | |||
1058 | 1064 | ||
1059 | skb2->h.raw = skb2->nh.raw; | 1065 | skb2->h.raw = skb2->nh.raw; |
1060 | skb2->pkt_type = PACKET_OUTGOING; | 1066 | skb2->pkt_type = PACKET_OUTGOING; |
1061 | ptype->func(skb2, skb->dev, ptype); | 1067 | ptype->func(skb2, skb->dev, ptype, skb->dev); |
1062 | } | 1068 | } |
1063 | } | 1069 | } |
1064 | rcu_read_unlock(); | 1070 | rcu_read_unlock(); |
@@ -1123,8 +1129,6 @@ static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb) | |||
1123 | #define illegal_highdma(dev, skb) (0) | 1129 | #define illegal_highdma(dev, skb) (0) |
1124 | #endif | 1130 | #endif |
1125 | 1131 | ||
1126 | extern void skb_release_data(struct sk_buff *); | ||
1127 | |||
1128 | /* Keep head the same: replace data */ | 1132 | /* Keep head the same: replace data */ |
1129 | int __skb_linearize(struct sk_buff *skb, unsigned int __nocast gfp_mask) | 1133 | int __skb_linearize(struct sk_buff *skb, unsigned int __nocast gfp_mask) |
1130 | { | 1134 | { |
@@ -1379,8 +1383,8 @@ int netif_rx(struct sk_buff *skb) | |||
1379 | if (netpoll_rx(skb)) | 1383 | if (netpoll_rx(skb)) |
1380 | return NET_RX_DROP; | 1384 | return NET_RX_DROP; |
1381 | 1385 | ||
1382 | if (!skb->stamp.tv_sec) | 1386 | if (!skb->tstamp.off_sec) |
1383 | net_timestamp(&skb->stamp); | 1387 | net_timestamp(skb); |
1384 | 1388 | ||
1385 | /* | 1389 | /* |
1386 | * The code is rearranged so that the path is the most | 1390 | * The code is rearranged so that the path is the most |
@@ -1425,14 +1429,14 @@ int netif_rx_ni(struct sk_buff *skb) | |||
1425 | 1429 | ||
1426 | EXPORT_SYMBOL(netif_rx_ni); | 1430 | EXPORT_SYMBOL(netif_rx_ni); |
1427 | 1431 | ||
1428 | static __inline__ void skb_bond(struct sk_buff *skb) | 1432 | static inline struct net_device *skb_bond(struct sk_buff *skb) |
1429 | { | 1433 | { |
1430 | struct net_device *dev = skb->dev; | 1434 | struct net_device *dev = skb->dev; |
1431 | 1435 | ||
1432 | if (dev->master) { | 1436 | if (dev->master) |
1433 | skb->real_dev = skb->dev; | ||
1434 | skb->dev = dev->master; | 1437 | skb->dev = dev->master; |
1435 | } | 1438 | |
1439 | return dev; | ||
1436 | } | 1440 | } |
1437 | 1441 | ||
1438 | static void net_tx_action(struct softirq_action *h) | 1442 | static void net_tx_action(struct softirq_action *h) |
@@ -1482,10 +1486,11 @@ static void net_tx_action(struct softirq_action *h) | |||
1482 | } | 1486 | } |
1483 | 1487 | ||
1484 | static __inline__ int deliver_skb(struct sk_buff *skb, | 1488 | static __inline__ int deliver_skb(struct sk_buff *skb, |
1485 | struct packet_type *pt_prev) | 1489 | struct packet_type *pt_prev, |
1490 | struct net_device *orig_dev) | ||
1486 | { | 1491 | { |
1487 | atomic_inc(&skb->users); | 1492 | atomic_inc(&skb->users); |
1488 | return pt_prev->func(skb, skb->dev, pt_prev); | 1493 | return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); |
1489 | } | 1494 | } |
1490 | 1495 | ||
1491 | #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE) | 1496 | #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE) |
@@ -1496,7 +1501,8 @@ struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br, | |||
1496 | void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent); | 1501 | void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent); |
1497 | 1502 | ||
1498 | static __inline__ int handle_bridge(struct sk_buff **pskb, | 1503 | static __inline__ int handle_bridge(struct sk_buff **pskb, |
1499 | struct packet_type **pt_prev, int *ret) | 1504 | struct packet_type **pt_prev, int *ret, |
1505 | struct net_device *orig_dev) | ||
1500 | { | 1506 | { |
1501 | struct net_bridge_port *port; | 1507 | struct net_bridge_port *port; |
1502 | 1508 | ||
@@ -1505,14 +1511,14 @@ static __inline__ int handle_bridge(struct sk_buff **pskb, | |||
1505 | return 0; | 1511 | return 0; |
1506 | 1512 | ||
1507 | if (*pt_prev) { | 1513 | if (*pt_prev) { |
1508 | *ret = deliver_skb(*pskb, *pt_prev); | 1514 | *ret = deliver_skb(*pskb, *pt_prev, orig_dev); |
1509 | *pt_prev = NULL; | 1515 | *pt_prev = NULL; |
1510 | } | 1516 | } |
1511 | 1517 | ||
1512 | return br_handle_frame_hook(port, pskb); | 1518 | return br_handle_frame_hook(port, pskb); |
1513 | } | 1519 | } |
1514 | #else | 1520 | #else |
1515 | #define handle_bridge(skb, pt_prev, ret) (0) | 1521 | #define handle_bridge(skb, pt_prev, ret, orig_dev) (0) |
1516 | #endif | 1522 | #endif |
1517 | 1523 | ||
1518 | #ifdef CONFIG_NET_CLS_ACT | 1524 | #ifdef CONFIG_NET_CLS_ACT |
@@ -1534,17 +1540,14 @@ static int ing_filter(struct sk_buff *skb) | |||
1534 | __u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd); | 1540 | __u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd); |
1535 | if (MAX_RED_LOOP < ttl++) { | 1541 | if (MAX_RED_LOOP < ttl++) { |
1536 | printk("Redir loop detected Dropping packet (%s->%s)\n", | 1542 | printk("Redir loop detected Dropping packet (%s->%s)\n", |
1537 | skb->input_dev?skb->input_dev->name:"??",skb->dev->name); | 1543 | skb->input_dev->name, skb->dev->name); |
1538 | return TC_ACT_SHOT; | 1544 | return TC_ACT_SHOT; |
1539 | } | 1545 | } |
1540 | 1546 | ||
1541 | skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl); | 1547 | skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl); |
1542 | 1548 | ||
1543 | skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS); | 1549 | skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS); |
1544 | if (NULL == skb->input_dev) { | 1550 | |
1545 | skb->input_dev = skb->dev; | ||
1546 | printk("ing_filter: fixed %s out %s\n",skb->input_dev->name,skb->dev->name); | ||
1547 | } | ||
1548 | spin_lock(&dev->ingress_lock); | 1551 | spin_lock(&dev->ingress_lock); |
1549 | if ((q = dev->qdisc_ingress) != NULL) | 1552 | if ((q = dev->qdisc_ingress) != NULL) |
1550 | result = q->enqueue(skb, q); | 1553 | result = q->enqueue(skb, q); |
@@ -1559,6 +1562,7 @@ static int ing_filter(struct sk_buff *skb) | |||
1559 | int netif_receive_skb(struct sk_buff *skb) | 1562 | int netif_receive_skb(struct sk_buff *skb) |
1560 | { | 1563 | { |
1561 | struct packet_type *ptype, *pt_prev; | 1564 | struct packet_type *ptype, *pt_prev; |
1565 | struct net_device *orig_dev; | ||
1562 | int ret = NET_RX_DROP; | 1566 | int ret = NET_RX_DROP; |
1563 | unsigned short type; | 1567 | unsigned short type; |
1564 | 1568 | ||
@@ -1566,10 +1570,13 @@ int netif_receive_skb(struct sk_buff *skb) | |||
1566 | if (skb->dev->poll && netpoll_rx(skb)) | 1570 | if (skb->dev->poll && netpoll_rx(skb)) |
1567 | return NET_RX_DROP; | 1571 | return NET_RX_DROP; |
1568 | 1572 | ||
1569 | if (!skb->stamp.tv_sec) | 1573 | if (!skb->tstamp.off_sec) |
1570 | net_timestamp(&skb->stamp); | 1574 | net_timestamp(skb); |
1575 | |||
1576 | if (!skb->input_dev) | ||
1577 | skb->input_dev = skb->dev; | ||
1571 | 1578 | ||
1572 | skb_bond(skb); | 1579 | orig_dev = skb_bond(skb); |
1573 | 1580 | ||
1574 | __get_cpu_var(netdev_rx_stat).total++; | 1581 | __get_cpu_var(netdev_rx_stat).total++; |
1575 | 1582 | ||
@@ -1590,14 +1597,14 @@ int netif_receive_skb(struct sk_buff *skb) | |||
1590 | list_for_each_entry_rcu(ptype, &ptype_all, list) { | 1597 | list_for_each_entry_rcu(ptype, &ptype_all, list) { |
1591 | if (!ptype->dev || ptype->dev == skb->dev) { | 1598 | if (!ptype->dev || ptype->dev == skb->dev) { |
1592 | if (pt_prev) | 1599 | if (pt_prev) |
1593 | ret = deliver_skb(skb, pt_prev); | 1600 | ret = deliver_skb(skb, pt_prev, orig_dev); |
1594 | pt_prev = ptype; | 1601 | pt_prev = ptype; |
1595 | } | 1602 | } |
1596 | } | 1603 | } |
1597 | 1604 | ||
1598 | #ifdef CONFIG_NET_CLS_ACT | 1605 | #ifdef CONFIG_NET_CLS_ACT |
1599 | if (pt_prev) { | 1606 | if (pt_prev) { |
1600 | ret = deliver_skb(skb, pt_prev); | 1607 | ret = deliver_skb(skb, pt_prev, orig_dev); |
1601 | pt_prev = NULL; /* noone else should process this after*/ | 1608 | pt_prev = NULL; /* noone else should process this after*/ |
1602 | } else { | 1609 | } else { |
1603 | skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd); | 1610 | skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd); |
@@ -1616,7 +1623,7 @@ ncls: | |||
1616 | 1623 | ||
1617 | handle_diverter(skb); | 1624 | handle_diverter(skb); |
1618 | 1625 | ||
1619 | if (handle_bridge(&skb, &pt_prev, &ret)) | 1626 | if (handle_bridge(&skb, &pt_prev, &ret, orig_dev)) |
1620 | goto out; | 1627 | goto out; |
1621 | 1628 | ||
1622 | type = skb->protocol; | 1629 | type = skb->protocol; |
@@ -1624,13 +1631,13 @@ ncls: | |||
1624 | if (ptype->type == type && | 1631 | if (ptype->type == type && |
1625 | (!ptype->dev || ptype->dev == skb->dev)) { | 1632 | (!ptype->dev || ptype->dev == skb->dev)) { |
1626 | if (pt_prev) | 1633 | if (pt_prev) |
1627 | ret = deliver_skb(skb, pt_prev); | 1634 | ret = deliver_skb(skb, pt_prev, orig_dev); |
1628 | pt_prev = ptype; | 1635 | pt_prev = ptype; |
1629 | } | 1636 | } |
1630 | } | 1637 | } |
1631 | 1638 | ||
1632 | if (pt_prev) { | 1639 | if (pt_prev) { |
1633 | ret = pt_prev->func(skb, skb->dev, pt_prev); | 1640 | ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); |
1634 | } else { | 1641 | } else { |
1635 | kfree_skb(skb); | 1642 | kfree_skb(skb); |
1636 | /* Jamal, now you will not able to escape explaining | 1643 | /* Jamal, now you will not able to escape explaining |
diff --git a/net/core/ethtool.c b/net/core/ethtool.c index a3eeb88e1c81..289c1b5a8e4a 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c | |||
@@ -81,6 +81,18 @@ int ethtool_op_set_tso(struct net_device *dev, u32 data) | |||
81 | return 0; | 81 | return 0; |
82 | } | 82 | } |
83 | 83 | ||
84 | int ethtool_op_get_perm_addr(struct net_device *dev, struct ethtool_perm_addr *addr, u8 *data) | ||
85 | { | ||
86 | unsigned char len = dev->addr_len; | ||
87 | if ( addr->size < len ) | ||
88 | return -ETOOSMALL; | ||
89 | |||
90 | addr->size = len; | ||
91 | memcpy(data, dev->perm_addr, len); | ||
92 | return 0; | ||
93 | } | ||
94 | |||
95 | |||
84 | /* Handlers for each ethtool command */ | 96 | /* Handlers for each ethtool command */ |
85 | 97 | ||
86 | static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) | 98 | static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) |
@@ -683,6 +695,39 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr) | |||
683 | return ret; | 695 | return ret; |
684 | } | 696 | } |
685 | 697 | ||
698 | static int ethtool_get_perm_addr(struct net_device *dev, void *useraddr) | ||
699 | { | ||
700 | struct ethtool_perm_addr epaddr; | ||
701 | u8 *data; | ||
702 | int ret; | ||
703 | |||
704 | if (!dev->ethtool_ops->get_perm_addr) | ||
705 | return -EOPNOTSUPP; | ||
706 | |||
707 | if (copy_from_user(&epaddr,useraddr,sizeof(epaddr))) | ||
708 | return -EFAULT; | ||
709 | |||
710 | data = kmalloc(epaddr.size, GFP_USER); | ||
711 | if (!data) | ||
712 | return -ENOMEM; | ||
713 | |||
714 | ret = dev->ethtool_ops->get_perm_addr(dev,&epaddr,data); | ||
715 | if (ret) | ||
716 | return ret; | ||
717 | |||
718 | ret = -EFAULT; | ||
719 | if (copy_to_user(useraddr, &epaddr, sizeof(epaddr))) | ||
720 | goto out; | ||
721 | useraddr += sizeof(epaddr); | ||
722 | if (copy_to_user(useraddr, data, epaddr.size)) | ||
723 | goto out; | ||
724 | ret = 0; | ||
725 | |||
726 | out: | ||
727 | kfree(data); | ||
728 | return ret; | ||
729 | } | ||
730 | |||
686 | /* The main entry point in this file. Called from net/core/dev.c */ | 731 | /* The main entry point in this file. Called from net/core/dev.c */ |
687 | 732 | ||
688 | int dev_ethtool(struct ifreq *ifr) | 733 | int dev_ethtool(struct ifreq *ifr) |
@@ -806,6 +851,9 @@ int dev_ethtool(struct ifreq *ifr) | |||
806 | case ETHTOOL_GSTATS: | 851 | case ETHTOOL_GSTATS: |
807 | rc = ethtool_get_stats(dev, useraddr); | 852 | rc = ethtool_get_stats(dev, useraddr); |
808 | break; | 853 | break; |
854 | case ETHTOOL_GPERMADDR: | ||
855 | rc = ethtool_get_perm_addr(dev, useraddr); | ||
856 | break; | ||
809 | default: | 857 | default: |
810 | rc = -EOPNOTSUPP; | 858 | rc = -EOPNOTSUPP; |
811 | } | 859 | } |
@@ -826,6 +874,7 @@ int dev_ethtool(struct ifreq *ifr) | |||
826 | 874 | ||
827 | EXPORT_SYMBOL(dev_ethtool); | 875 | EXPORT_SYMBOL(dev_ethtool); |
828 | EXPORT_SYMBOL(ethtool_op_get_link); | 876 | EXPORT_SYMBOL(ethtool_op_get_link); |
877 | EXPORT_SYMBOL_GPL(ethtool_op_get_perm_addr); | ||
829 | EXPORT_SYMBOL(ethtool_op_get_sg); | 878 | EXPORT_SYMBOL(ethtool_op_get_sg); |
830 | EXPORT_SYMBOL(ethtool_op_get_tso); | 879 | EXPORT_SYMBOL(ethtool_op_get_tso); |
831 | EXPORT_SYMBOL(ethtool_op_get_tx_csum); | 880 | EXPORT_SYMBOL(ethtool_op_get_tx_csum); |
diff --git a/net/core/flow.c b/net/core/flow.c index f289570b15a3..7e95b39de9fd 100644 --- a/net/core/flow.c +++ b/net/core/flow.c | |||
@@ -42,7 +42,7 @@ static DEFINE_PER_CPU(struct flow_cache_entry **, flow_tables) = { NULL }; | |||
42 | 42 | ||
43 | #define flow_table(cpu) (per_cpu(flow_tables, cpu)) | 43 | #define flow_table(cpu) (per_cpu(flow_tables, cpu)) |
44 | 44 | ||
45 | static kmem_cache_t *flow_cachep; | 45 | static kmem_cache_t *flow_cachep __read_mostly; |
46 | 46 | ||
47 | static int flow_lwm, flow_hwm; | 47 | static int flow_lwm, flow_hwm; |
48 | 48 | ||
diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 1beb782ac41b..39fc55edf691 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c | |||
@@ -1217,7 +1217,7 @@ static void neigh_proxy_process(unsigned long arg) | |||
1217 | 1217 | ||
1218 | while (skb != (struct sk_buff *)&tbl->proxy_queue) { | 1218 | while (skb != (struct sk_buff *)&tbl->proxy_queue) { |
1219 | struct sk_buff *back = skb; | 1219 | struct sk_buff *back = skb; |
1220 | long tdif = back->stamp.tv_usec - now; | 1220 | long tdif = NEIGH_CB(back)->sched_next - now; |
1221 | 1221 | ||
1222 | skb = skb->next; | 1222 | skb = skb->next; |
1223 | if (tdif <= 0) { | 1223 | if (tdif <= 0) { |
@@ -1248,8 +1248,9 @@ void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p, | |||
1248 | kfree_skb(skb); | 1248 | kfree_skb(skb); |
1249 | return; | 1249 | return; |
1250 | } | 1250 | } |
1251 | skb->stamp.tv_sec = LOCALLY_ENQUEUED; | 1251 | |
1252 | skb->stamp.tv_usec = sched_next; | 1252 | NEIGH_CB(skb)->sched_next = sched_next; |
1253 | NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED; | ||
1253 | 1254 | ||
1254 | spin_lock(&tbl->proxy_queue.lock); | 1255 | spin_lock(&tbl->proxy_queue.lock); |
1255 | if (del_timer(&tbl->proxy_timer)) { | 1256 | if (del_timer(&tbl->proxy_timer)) { |
@@ -2342,8 +2343,8 @@ void neigh_app_ns(struct neighbour *n) | |||
2342 | } | 2343 | } |
2343 | nlh = (struct nlmsghdr *)skb->data; | 2344 | nlh = (struct nlmsghdr *)skb->data; |
2344 | nlh->nlmsg_flags = NLM_F_REQUEST; | 2345 | nlh->nlmsg_flags = NLM_F_REQUEST; |
2345 | NETLINK_CB(skb).dst_groups = RTMGRP_NEIGH; | 2346 | NETLINK_CB(skb).dst_group = RTNLGRP_NEIGH; |
2346 | netlink_broadcast(rtnl, skb, 0, RTMGRP_NEIGH, GFP_ATOMIC); | 2347 | netlink_broadcast(rtnl, skb, 0, RTNLGRP_NEIGH, GFP_ATOMIC); |
2347 | } | 2348 | } |
2348 | 2349 | ||
2349 | static void neigh_app_notify(struct neighbour *n) | 2350 | static void neigh_app_notify(struct neighbour *n) |
@@ -2360,8 +2361,8 @@ static void neigh_app_notify(struct neighbour *n) | |||
2360 | return; | 2361 | return; |
2361 | } | 2362 | } |
2362 | nlh = (struct nlmsghdr *)skb->data; | 2363 | nlh = (struct nlmsghdr *)skb->data; |
2363 | NETLINK_CB(skb).dst_groups = RTMGRP_NEIGH; | 2364 | NETLINK_CB(skb).dst_group = RTNLGRP_NEIGH; |
2364 | netlink_broadcast(rtnl, skb, 0, RTMGRP_NEIGH, GFP_ATOMIC); | 2365 | netlink_broadcast(rtnl, skb, 0, RTNLGRP_NEIGH, GFP_ATOMIC); |
2365 | } | 2366 | } |
2366 | 2367 | ||
2367 | #endif /* CONFIG_ARPD */ | 2368 | #endif /* CONFIG_ARPD */ |
diff --git a/net/core/netfilter.c b/net/core/netfilter.c deleted file mode 100644 index 076c156d5eda..000000000000 --- a/net/core/netfilter.c +++ /dev/null | |||
@@ -1,648 +0,0 @@ | |||
1 | /* netfilter.c: look after the filters for various protocols. | ||
2 | * Heavily influenced by the old firewall.c by David Bonn and Alan Cox. | ||
3 | * | ||
4 | * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any | ||
5 | * way. | ||
6 | * | ||
7 | * Rusty Russell (C)2000 -- This code is GPL. | ||
8 | * | ||
9 | * February 2000: Modified by James Morris to have 1 queue per protocol. | ||
10 | * 15-Mar-2000: Added NF_REPEAT --RR. | ||
11 | * 08-May-2003: Internal logging interface added by Jozsef Kadlecsik. | ||
12 | */ | ||
13 | #include <linux/config.h> | ||
14 | #include <linux/kernel.h> | ||
15 | #include <linux/netfilter.h> | ||
16 | #include <net/protocol.h> | ||
17 | #include <linux/init.h> | ||
18 | #include <linux/skbuff.h> | ||
19 | #include <linux/wait.h> | ||
20 | #include <linux/module.h> | ||
21 | #include <linux/interrupt.h> | ||
22 | #include <linux/if.h> | ||
23 | #include <linux/netdevice.h> | ||
24 | #include <linux/inetdevice.h> | ||
25 | #include <linux/tcp.h> | ||
26 | #include <linux/udp.h> | ||
27 | #include <linux/icmp.h> | ||
28 | #include <net/sock.h> | ||
29 | #include <net/route.h> | ||
30 | #include <linux/ip.h> | ||
31 | |||
32 | /* In this code, we can be waiting indefinitely for userspace to | ||
33 | * service a packet if a hook returns NF_QUEUE. We could keep a count | ||
34 | * of skbuffs queued for userspace, and not deregister a hook unless | ||
35 | * this is zero, but that sucks. Now, we simply check when the | ||
36 | * packets come back: if the hook is gone, the packet is discarded. */ | ||
37 | #ifdef CONFIG_NETFILTER_DEBUG | ||
38 | #define NFDEBUG(format, args...) printk(format , ## args) | ||
39 | #else | ||
40 | #define NFDEBUG(format, args...) | ||
41 | #endif | ||
42 | |||
43 | /* Sockopts only registered and called from user context, so | ||
44 | net locking would be overkill. Also, [gs]etsockopt calls may | ||
45 | sleep. */ | ||
46 | static DECLARE_MUTEX(nf_sockopt_mutex); | ||
47 | |||
48 | struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS]; | ||
49 | static LIST_HEAD(nf_sockopts); | ||
50 | static DEFINE_SPINLOCK(nf_hook_lock); | ||
51 | |||
52 | /* | ||
53 | * A queue handler may be registered for each protocol. Each is protected by | ||
54 | * long term mutex. The handler must provide an an outfn() to accept packets | ||
55 | * for queueing and must reinject all packets it receives, no matter what. | ||
56 | */ | ||
57 | static struct nf_queue_handler_t { | ||
58 | nf_queue_outfn_t outfn; | ||
59 | void *data; | ||
60 | } queue_handler[NPROTO]; | ||
61 | static DEFINE_RWLOCK(queue_handler_lock); | ||
62 | |||
63 | int nf_register_hook(struct nf_hook_ops *reg) | ||
64 | { | ||
65 | struct list_head *i; | ||
66 | |||
67 | spin_lock_bh(&nf_hook_lock); | ||
68 | list_for_each(i, &nf_hooks[reg->pf][reg->hooknum]) { | ||
69 | if (reg->priority < ((struct nf_hook_ops *)i)->priority) | ||
70 | break; | ||
71 | } | ||
72 | list_add_rcu(®->list, i->prev); | ||
73 | spin_unlock_bh(&nf_hook_lock); | ||
74 | |||
75 | synchronize_net(); | ||
76 | return 0; | ||
77 | } | ||
78 | |||
79 | void nf_unregister_hook(struct nf_hook_ops *reg) | ||
80 | { | ||
81 | spin_lock_bh(&nf_hook_lock); | ||
82 | list_del_rcu(®->list); | ||
83 | spin_unlock_bh(&nf_hook_lock); | ||
84 | |||
85 | synchronize_net(); | ||
86 | } | ||
87 | |||
88 | /* Do exclusive ranges overlap? */ | ||
89 | static inline int overlap(int min1, int max1, int min2, int max2) | ||
90 | { | ||
91 | return max1 > min2 && min1 < max2; | ||
92 | } | ||
93 | |||
94 | /* Functions to register sockopt ranges (exclusive). */ | ||
95 | int nf_register_sockopt(struct nf_sockopt_ops *reg) | ||
96 | { | ||
97 | struct list_head *i; | ||
98 | int ret = 0; | ||
99 | |||
100 | if (down_interruptible(&nf_sockopt_mutex) != 0) | ||
101 | return -EINTR; | ||
102 | |||
103 | list_for_each(i, &nf_sockopts) { | ||
104 | struct nf_sockopt_ops *ops = (struct nf_sockopt_ops *)i; | ||
105 | if (ops->pf == reg->pf | ||
106 | && (overlap(ops->set_optmin, ops->set_optmax, | ||
107 | reg->set_optmin, reg->set_optmax) | ||
108 | || overlap(ops->get_optmin, ops->get_optmax, | ||
109 | reg->get_optmin, reg->get_optmax))) { | ||
110 | NFDEBUG("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n", | ||
111 | ops->set_optmin, ops->set_optmax, | ||
112 | ops->get_optmin, ops->get_optmax, | ||
113 | reg->set_optmin, reg->set_optmax, | ||
114 | reg->get_optmin, reg->get_optmax); | ||
115 | ret = -EBUSY; | ||
116 | goto out; | ||
117 | } | ||
118 | } | ||
119 | |||
120 | list_add(®->list, &nf_sockopts); | ||
121 | out: | ||
122 | up(&nf_sockopt_mutex); | ||
123 | return ret; | ||
124 | } | ||
125 | |||
126 | void nf_unregister_sockopt(struct nf_sockopt_ops *reg) | ||
127 | { | ||
128 | /* No point being interruptible: we're probably in cleanup_module() */ | ||
129 | restart: | ||
130 | down(&nf_sockopt_mutex); | ||
131 | if (reg->use != 0) { | ||
132 | /* To be woken by nf_sockopt call... */ | ||
133 | /* FIXME: Stuart Young's name appears gratuitously. */ | ||
134 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
135 | reg->cleanup_task = current; | ||
136 | up(&nf_sockopt_mutex); | ||
137 | schedule(); | ||
138 | goto restart; | ||
139 | } | ||
140 | list_del(®->list); | ||
141 | up(&nf_sockopt_mutex); | ||
142 | } | ||
143 | |||
144 | /* Call get/setsockopt() */ | ||
145 | static int nf_sockopt(struct sock *sk, int pf, int val, | ||
146 | char __user *opt, int *len, int get) | ||
147 | { | ||
148 | struct list_head *i; | ||
149 | struct nf_sockopt_ops *ops; | ||
150 | int ret; | ||
151 | |||
152 | if (down_interruptible(&nf_sockopt_mutex) != 0) | ||
153 | return -EINTR; | ||
154 | |||
155 | list_for_each(i, &nf_sockopts) { | ||
156 | ops = (struct nf_sockopt_ops *)i; | ||
157 | if (ops->pf == pf) { | ||
158 | if (get) { | ||
159 | if (val >= ops->get_optmin | ||
160 | && val < ops->get_optmax) { | ||
161 | ops->use++; | ||
162 | up(&nf_sockopt_mutex); | ||
163 | ret = ops->get(sk, val, opt, len); | ||
164 | goto out; | ||
165 | } | ||
166 | } else { | ||
167 | if (val >= ops->set_optmin | ||
168 | && val < ops->set_optmax) { | ||
169 | ops->use++; | ||
170 | up(&nf_sockopt_mutex); | ||
171 | ret = ops->set(sk, val, opt, *len); | ||
172 | goto out; | ||
173 | } | ||
174 | } | ||
175 | } | ||
176 | } | ||
177 | up(&nf_sockopt_mutex); | ||
178 | return -ENOPROTOOPT; | ||
179 | |||
180 | out: | ||
181 | down(&nf_sockopt_mutex); | ||
182 | ops->use--; | ||
183 | if (ops->cleanup_task) | ||
184 | wake_up_process(ops->cleanup_task); | ||
185 | up(&nf_sockopt_mutex); | ||
186 | return ret; | ||
187 | } | ||
188 | |||
189 | int nf_setsockopt(struct sock *sk, int pf, int val, char __user *opt, | ||
190 | int len) | ||
191 | { | ||
192 | return nf_sockopt(sk, pf, val, opt, &len, 0); | ||
193 | } | ||
194 | |||
195 | int nf_getsockopt(struct sock *sk, int pf, int val, char __user *opt, int *len) | ||
196 | { | ||
197 | return nf_sockopt(sk, pf, val, opt, len, 1); | ||
198 | } | ||
199 | |||
200 | static unsigned int nf_iterate(struct list_head *head, | ||
201 | struct sk_buff **skb, | ||
202 | int hook, | ||
203 | const struct net_device *indev, | ||
204 | const struct net_device *outdev, | ||
205 | struct list_head **i, | ||
206 | int (*okfn)(struct sk_buff *), | ||
207 | int hook_thresh) | ||
208 | { | ||
209 | unsigned int verdict; | ||
210 | |||
211 | /* | ||
212 | * The caller must not block between calls to this | ||
213 | * function because of risk of continuing from deleted element. | ||
214 | */ | ||
215 | list_for_each_continue_rcu(*i, head) { | ||
216 | struct nf_hook_ops *elem = (struct nf_hook_ops *)*i; | ||
217 | |||
218 | if (hook_thresh > elem->priority) | ||
219 | continue; | ||
220 | |||
221 | /* Optimization: we don't need to hold module | ||
222 | reference here, since function can't sleep. --RR */ | ||
223 | verdict = elem->hook(hook, skb, indev, outdev, okfn); | ||
224 | if (verdict != NF_ACCEPT) { | ||
225 | #ifdef CONFIG_NETFILTER_DEBUG | ||
226 | if (unlikely(verdict > NF_MAX_VERDICT)) { | ||
227 | NFDEBUG("Evil return from %p(%u).\n", | ||
228 | elem->hook, hook); | ||
229 | continue; | ||
230 | } | ||
231 | #endif | ||
232 | if (verdict != NF_REPEAT) | ||
233 | return verdict; | ||
234 | *i = (*i)->prev; | ||
235 | } | ||
236 | } | ||
237 | return NF_ACCEPT; | ||
238 | } | ||
239 | |||
240 | int nf_register_queue_handler(int pf, nf_queue_outfn_t outfn, void *data) | ||
241 | { | ||
242 | int ret; | ||
243 | |||
244 | write_lock_bh(&queue_handler_lock); | ||
245 | if (queue_handler[pf].outfn) | ||
246 | ret = -EBUSY; | ||
247 | else { | ||
248 | queue_handler[pf].outfn = outfn; | ||
249 | queue_handler[pf].data = data; | ||
250 | ret = 0; | ||
251 | } | ||
252 | write_unlock_bh(&queue_handler_lock); | ||
253 | |||
254 | return ret; | ||
255 | } | ||
256 | |||
257 | /* The caller must flush their queue before this */ | ||
258 | int nf_unregister_queue_handler(int pf) | ||
259 | { | ||
260 | write_lock_bh(&queue_handler_lock); | ||
261 | queue_handler[pf].outfn = NULL; | ||
262 | queue_handler[pf].data = NULL; | ||
263 | write_unlock_bh(&queue_handler_lock); | ||
264 | |||
265 | return 0; | ||
266 | } | ||
267 | |||
268 | /* | ||
269 | * Any packet that leaves via this function must come back | ||
270 | * through nf_reinject(). | ||
271 | */ | ||
272 | static int nf_queue(struct sk_buff *skb, | ||
273 | struct list_head *elem, | ||
274 | int pf, unsigned int hook, | ||
275 | struct net_device *indev, | ||
276 | struct net_device *outdev, | ||
277 | int (*okfn)(struct sk_buff *)) | ||
278 | { | ||
279 | int status; | ||
280 | struct nf_info *info; | ||
281 | #ifdef CONFIG_BRIDGE_NETFILTER | ||
282 | struct net_device *physindev = NULL; | ||
283 | struct net_device *physoutdev = NULL; | ||
284 | #endif | ||
285 | |||
286 | /* QUEUE == DROP if noone is waiting, to be safe. */ | ||
287 | read_lock(&queue_handler_lock); | ||
288 | if (!queue_handler[pf].outfn) { | ||
289 | read_unlock(&queue_handler_lock); | ||
290 | kfree_skb(skb); | ||
291 | return 1; | ||
292 | } | ||
293 | |||
294 | info = kmalloc(sizeof(*info), GFP_ATOMIC); | ||
295 | if (!info) { | ||
296 | if (net_ratelimit()) | ||
297 | printk(KERN_ERR "OOM queueing packet %p\n", | ||
298 | skb); | ||
299 | read_unlock(&queue_handler_lock); | ||
300 | kfree_skb(skb); | ||
301 | return 1; | ||
302 | } | ||
303 | |||
304 | *info = (struct nf_info) { | ||
305 | (struct nf_hook_ops *)elem, pf, hook, indev, outdev, okfn }; | ||
306 | |||
307 | /* If it's going away, ignore hook. */ | ||
308 | if (!try_module_get(info->elem->owner)) { | ||
309 | read_unlock(&queue_handler_lock); | ||
310 | kfree(info); | ||
311 | return 0; | ||
312 | } | ||
313 | |||
314 | /* Bump dev refs so they don't vanish while packet is out */ | ||
315 | if (indev) dev_hold(indev); | ||
316 | if (outdev) dev_hold(outdev); | ||
317 | |||
318 | #ifdef CONFIG_BRIDGE_NETFILTER | ||
319 | if (skb->nf_bridge) { | ||
320 | physindev = skb->nf_bridge->physindev; | ||
321 | if (physindev) dev_hold(physindev); | ||
322 | physoutdev = skb->nf_bridge->physoutdev; | ||
323 | if (physoutdev) dev_hold(physoutdev); | ||
324 | } | ||
325 | #endif | ||
326 | |||
327 | status = queue_handler[pf].outfn(skb, info, queue_handler[pf].data); | ||
328 | read_unlock(&queue_handler_lock); | ||
329 | |||
330 | if (status < 0) { | ||
331 | /* James M doesn't say fuck enough. */ | ||
332 | if (indev) dev_put(indev); | ||
333 | if (outdev) dev_put(outdev); | ||
334 | #ifdef CONFIG_BRIDGE_NETFILTER | ||
335 | if (physindev) dev_put(physindev); | ||
336 | if (physoutdev) dev_put(physoutdev); | ||
337 | #endif | ||
338 | module_put(info->elem->owner); | ||
339 | kfree(info); | ||
340 | kfree_skb(skb); | ||
341 | return 1; | ||
342 | } | ||
343 | return 1; | ||
344 | } | ||
345 | |||
346 | /* Returns 1 if okfn() needs to be executed by the caller, | ||
347 | * -EPERM for NF_DROP, 0 otherwise. */ | ||
348 | int nf_hook_slow(int pf, unsigned int hook, struct sk_buff **pskb, | ||
349 | struct net_device *indev, | ||
350 | struct net_device *outdev, | ||
351 | int (*okfn)(struct sk_buff *), | ||
352 | int hook_thresh) | ||
353 | { | ||
354 | struct list_head *elem; | ||
355 | unsigned int verdict; | ||
356 | int ret = 0; | ||
357 | |||
358 | /* We may already have this, but read-locks nest anyway */ | ||
359 | rcu_read_lock(); | ||
360 | |||
361 | elem = &nf_hooks[pf][hook]; | ||
362 | next_hook: | ||
363 | verdict = nf_iterate(&nf_hooks[pf][hook], pskb, hook, indev, | ||
364 | outdev, &elem, okfn, hook_thresh); | ||
365 | if (verdict == NF_ACCEPT || verdict == NF_STOP) { | ||
366 | ret = 1; | ||
367 | goto unlock; | ||
368 | } else if (verdict == NF_DROP) { | ||
369 | kfree_skb(*pskb); | ||
370 | ret = -EPERM; | ||
371 | } else if (verdict == NF_QUEUE) { | ||
372 | NFDEBUG("nf_hook: Verdict = QUEUE.\n"); | ||
373 | if (!nf_queue(*pskb, elem, pf, hook, indev, outdev, okfn)) | ||
374 | goto next_hook; | ||
375 | } | ||
376 | unlock: | ||
377 | rcu_read_unlock(); | ||
378 | return ret; | ||
379 | } | ||
380 | |||
381 | void nf_reinject(struct sk_buff *skb, struct nf_info *info, | ||
382 | unsigned int verdict) | ||
383 | { | ||
384 | struct list_head *elem = &info->elem->list; | ||
385 | struct list_head *i; | ||
386 | |||
387 | rcu_read_lock(); | ||
388 | |||
389 | /* Release those devices we held, or Alexey will kill me. */ | ||
390 | if (info->indev) dev_put(info->indev); | ||
391 | if (info->outdev) dev_put(info->outdev); | ||
392 | #ifdef CONFIG_BRIDGE_NETFILTER | ||
393 | if (skb->nf_bridge) { | ||
394 | if (skb->nf_bridge->physindev) | ||
395 | dev_put(skb->nf_bridge->physindev); | ||
396 | if (skb->nf_bridge->physoutdev) | ||
397 | dev_put(skb->nf_bridge->physoutdev); | ||
398 | } | ||
399 | #endif | ||
400 | |||
401 | /* Drop reference to owner of hook which queued us. */ | ||
402 | module_put(info->elem->owner); | ||
403 | |||
404 | list_for_each_rcu(i, &nf_hooks[info->pf][info->hook]) { | ||
405 | if (i == elem) | ||
406 | break; | ||
407 | } | ||
408 | |||
409 | if (elem == &nf_hooks[info->pf][info->hook]) { | ||
410 | /* The module which sent it to userspace is gone. */ | ||
411 | NFDEBUG("%s: module disappeared, dropping packet.\n", | ||
412 | __FUNCTION__); | ||
413 | verdict = NF_DROP; | ||
414 | } | ||
415 | |||
416 | /* Continue traversal iff userspace said ok... */ | ||
417 | if (verdict == NF_REPEAT) { | ||
418 | elem = elem->prev; | ||
419 | verdict = NF_ACCEPT; | ||
420 | } | ||
421 | |||
422 | if (verdict == NF_ACCEPT) { | ||
423 | next_hook: | ||
424 | verdict = nf_iterate(&nf_hooks[info->pf][info->hook], | ||
425 | &skb, info->hook, | ||
426 | info->indev, info->outdev, &elem, | ||
427 | info->okfn, INT_MIN); | ||
428 | } | ||
429 | |||
430 | switch (verdict) { | ||
431 | case NF_ACCEPT: | ||
432 | info->okfn(skb); | ||
433 | break; | ||
434 | |||
435 | case NF_QUEUE: | ||
436 | if (!nf_queue(skb, elem, info->pf, info->hook, | ||
437 | info->indev, info->outdev, info->okfn)) | ||
438 | goto next_hook; | ||
439 | break; | ||
440 | } | ||
441 | rcu_read_unlock(); | ||
442 | |||
443 | if (verdict == NF_DROP) | ||
444 | kfree_skb(skb); | ||
445 | |||
446 | kfree(info); | ||
447 | return; | ||
448 | } | ||
449 | |||
450 | #ifdef CONFIG_INET | ||
451 | /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ | ||
452 | int ip_route_me_harder(struct sk_buff **pskb) | ||
453 | { | ||
454 | struct iphdr *iph = (*pskb)->nh.iph; | ||
455 | struct rtable *rt; | ||
456 | struct flowi fl = {}; | ||
457 | struct dst_entry *odst; | ||
458 | unsigned int hh_len; | ||
459 | |||
460 | /* some non-standard hacks like ipt_REJECT.c:send_reset() can cause | ||
461 | * packets with foreign saddr to appear on the NF_IP_LOCAL_OUT hook. | ||
462 | */ | ||
463 | if (inet_addr_type(iph->saddr) == RTN_LOCAL) { | ||
464 | fl.nl_u.ip4_u.daddr = iph->daddr; | ||
465 | fl.nl_u.ip4_u.saddr = iph->saddr; | ||
466 | fl.nl_u.ip4_u.tos = RT_TOS(iph->tos); | ||
467 | fl.oif = (*pskb)->sk ? (*pskb)->sk->sk_bound_dev_if : 0; | ||
468 | #ifdef CONFIG_IP_ROUTE_FWMARK | ||
469 | fl.nl_u.ip4_u.fwmark = (*pskb)->nfmark; | ||
470 | #endif | ||
471 | fl.proto = iph->protocol; | ||
472 | if (ip_route_output_key(&rt, &fl) != 0) | ||
473 | return -1; | ||
474 | |||
475 | /* Drop old route. */ | ||
476 | dst_release((*pskb)->dst); | ||
477 | (*pskb)->dst = &rt->u.dst; | ||
478 | } else { | ||
479 | /* non-local src, find valid iif to satisfy | ||
480 | * rp-filter when calling ip_route_input. */ | ||
481 | fl.nl_u.ip4_u.daddr = iph->saddr; | ||
482 | if (ip_route_output_key(&rt, &fl) != 0) | ||
483 | return -1; | ||
484 | |||
485 | odst = (*pskb)->dst; | ||
486 | if (ip_route_input(*pskb, iph->daddr, iph->saddr, | ||
487 | RT_TOS(iph->tos), rt->u.dst.dev) != 0) { | ||
488 | dst_release(&rt->u.dst); | ||
489 | return -1; | ||
490 | } | ||
491 | dst_release(&rt->u.dst); | ||
492 | dst_release(odst); | ||
493 | } | ||
494 | |||
495 | if ((*pskb)->dst->error) | ||
496 | return -1; | ||
497 | |||
498 | /* Change in oif may mean change in hh_len. */ | ||
499 | hh_len = (*pskb)->dst->dev->hard_header_len; | ||
500 | if (skb_headroom(*pskb) < hh_len) { | ||
501 | struct sk_buff *nskb; | ||
502 | |||
503 | nskb = skb_realloc_headroom(*pskb, hh_len); | ||
504 | if (!nskb) | ||
505 | return -1; | ||
506 | if ((*pskb)->sk) | ||
507 | skb_set_owner_w(nskb, (*pskb)->sk); | ||
508 | kfree_skb(*pskb); | ||
509 | *pskb = nskb; | ||
510 | } | ||
511 | |||
512 | return 0; | ||
513 | } | ||
514 | EXPORT_SYMBOL(ip_route_me_harder); | ||
515 | |||
516 | int skb_ip_make_writable(struct sk_buff **pskb, unsigned int writable_len) | ||
517 | { | ||
518 | struct sk_buff *nskb; | ||
519 | |||
520 | if (writable_len > (*pskb)->len) | ||
521 | return 0; | ||
522 | |||
523 | /* Not exclusive use of packet? Must copy. */ | ||
524 | if (skb_shared(*pskb) || skb_cloned(*pskb)) | ||
525 | goto copy_skb; | ||
526 | |||
527 | return pskb_may_pull(*pskb, writable_len); | ||
528 | |||
529 | copy_skb: | ||
530 | nskb = skb_copy(*pskb, GFP_ATOMIC); | ||
531 | if (!nskb) | ||
532 | return 0; | ||
533 | BUG_ON(skb_is_nonlinear(nskb)); | ||
534 | |||
535 | /* Rest of kernel will get very unhappy if we pass it a | ||
536 | suddenly-orphaned skbuff */ | ||
537 | if ((*pskb)->sk) | ||
538 | skb_set_owner_w(nskb, (*pskb)->sk); | ||
539 | kfree_skb(*pskb); | ||
540 | *pskb = nskb; | ||
541 | return 1; | ||
542 | } | ||
543 | EXPORT_SYMBOL(skb_ip_make_writable); | ||
544 | #endif /*CONFIG_INET*/ | ||
545 | |||
546 | /* Internal logging interface, which relies on the real | ||
547 | LOG target modules */ | ||
548 | |||
549 | #define NF_LOG_PREFIXLEN 128 | ||
550 | |||
551 | static nf_logfn *nf_logging[NPROTO]; /* = NULL */ | ||
552 | static int reported = 0; | ||
553 | static DEFINE_SPINLOCK(nf_log_lock); | ||
554 | |||
555 | int nf_log_register(int pf, nf_logfn *logfn) | ||
556 | { | ||
557 | int ret = -EBUSY; | ||
558 | |||
559 | /* Any setup of logging members must be done before | ||
560 | * substituting pointer. */ | ||
561 | spin_lock(&nf_log_lock); | ||
562 | if (!nf_logging[pf]) { | ||
563 | rcu_assign_pointer(nf_logging[pf], logfn); | ||
564 | ret = 0; | ||
565 | } | ||
566 | spin_unlock(&nf_log_lock); | ||
567 | return ret; | ||
568 | } | ||
569 | |||
570 | void nf_log_unregister(int pf, nf_logfn *logfn) | ||
571 | { | ||
572 | spin_lock(&nf_log_lock); | ||
573 | if (nf_logging[pf] == logfn) | ||
574 | nf_logging[pf] = NULL; | ||
575 | spin_unlock(&nf_log_lock); | ||
576 | |||
577 | /* Give time to concurrent readers. */ | ||
578 | synchronize_net(); | ||
579 | } | ||
580 | |||
581 | void nf_log_packet(int pf, | ||
582 | unsigned int hooknum, | ||
583 | const struct sk_buff *skb, | ||
584 | const struct net_device *in, | ||
585 | const struct net_device *out, | ||
586 | const char *fmt, ...) | ||
587 | { | ||
588 | va_list args; | ||
589 | char prefix[NF_LOG_PREFIXLEN]; | ||
590 | nf_logfn *logfn; | ||
591 | |||
592 | rcu_read_lock(); | ||
593 | logfn = rcu_dereference(nf_logging[pf]); | ||
594 | if (logfn) { | ||
595 | va_start(args, fmt); | ||
596 | vsnprintf(prefix, sizeof(prefix), fmt, args); | ||
597 | va_end(args); | ||
598 | /* We must read logging before nf_logfn[pf] */ | ||
599 | logfn(hooknum, skb, in, out, prefix); | ||
600 | } else if (!reported) { | ||
601 | printk(KERN_WARNING "nf_log_packet: can\'t log yet, " | ||
602 | "no backend logging module loaded in!\n"); | ||
603 | reported++; | ||
604 | } | ||
605 | rcu_read_unlock(); | ||
606 | } | ||
607 | EXPORT_SYMBOL(nf_log_register); | ||
608 | EXPORT_SYMBOL(nf_log_unregister); | ||
609 | EXPORT_SYMBOL(nf_log_packet); | ||
610 | |||
611 | /* This does not belong here, but locally generated errors need it if connection | ||
612 | tracking in use: without this, connection may not be in hash table, and hence | ||
613 | manufactured ICMP or RST packets will not be associated with it. */ | ||
614 | void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *); | ||
615 | |||
616 | void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) | ||
617 | { | ||
618 | void (*attach)(struct sk_buff *, struct sk_buff *); | ||
619 | |||
620 | if (skb->nfct && (attach = ip_ct_attach) != NULL) { | ||
621 | mb(); /* Just to be sure: must be read before executing this */ | ||
622 | attach(new, skb); | ||
623 | } | ||
624 | } | ||
625 | |||
626 | void __init netfilter_init(void) | ||
627 | { | ||
628 | int i, h; | ||
629 | |||
630 | for (i = 0; i < NPROTO; i++) { | ||
631 | for (h = 0; h < NF_MAX_HOOKS; h++) | ||
632 | INIT_LIST_HEAD(&nf_hooks[i][h]); | ||
633 | } | ||
634 | } | ||
635 | |||
636 | EXPORT_SYMBOL(ip_ct_attach); | ||
637 | EXPORT_SYMBOL(nf_ct_attach); | ||
638 | EXPORT_SYMBOL(nf_getsockopt); | ||
639 | EXPORT_SYMBOL(nf_hook_slow); | ||
640 | EXPORT_SYMBOL(nf_hooks); | ||
641 | EXPORT_SYMBOL(nf_register_hook); | ||
642 | EXPORT_SYMBOL(nf_register_queue_handler); | ||
643 | EXPORT_SYMBOL(nf_register_sockopt); | ||
644 | EXPORT_SYMBOL(nf_reinject); | ||
645 | EXPORT_SYMBOL(nf_setsockopt); | ||
646 | EXPORT_SYMBOL(nf_unregister_hook); | ||
647 | EXPORT_SYMBOL(nf_unregister_queue_handler); | ||
648 | EXPORT_SYMBOL(nf_unregister_sockopt); | ||
diff --git a/net/core/request_sock.c b/net/core/request_sock.c index bb55675f0685..b8203de5ff07 100644 --- a/net/core/request_sock.c +++ b/net/core/request_sock.c | |||
@@ -32,7 +32,6 @@ | |||
32 | * Further increasing requires to change hash table size. | 32 | * Further increasing requires to change hash table size. |
33 | */ | 33 | */ |
34 | int sysctl_max_syn_backlog = 256; | 34 | int sysctl_max_syn_backlog = 256; |
35 | EXPORT_SYMBOL(sysctl_max_syn_backlog); | ||
36 | 35 | ||
37 | int reqsk_queue_alloc(struct request_sock_queue *queue, | 36 | int reqsk_queue_alloc(struct request_sock_queue *queue, |
38 | const int nr_table_entries) | 37 | const int nr_table_entries) |
@@ -53,6 +52,8 @@ int reqsk_queue_alloc(struct request_sock_queue *queue, | |||
53 | get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd)); | 52 | get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd)); |
54 | rwlock_init(&queue->syn_wait_lock); | 53 | rwlock_init(&queue->syn_wait_lock); |
55 | queue->rskq_accept_head = queue->rskq_accept_head = NULL; | 54 | queue->rskq_accept_head = queue->rskq_accept_head = NULL; |
55 | queue->rskq_defer_accept = 0; | ||
56 | lopt->nr_table_entries = nr_table_entries; | ||
56 | 57 | ||
57 | write_lock_bh(&queue->syn_wait_lock); | 58 | write_lock_bh(&queue->syn_wait_lock); |
58 | queue->listen_opt = lopt; | 59 | queue->listen_opt = lopt; |
@@ -62,3 +63,28 @@ int reqsk_queue_alloc(struct request_sock_queue *queue, | |||
62 | } | 63 | } |
63 | 64 | ||
64 | EXPORT_SYMBOL(reqsk_queue_alloc); | 65 | EXPORT_SYMBOL(reqsk_queue_alloc); |
66 | |||
67 | void reqsk_queue_destroy(struct request_sock_queue *queue) | ||
68 | { | ||
69 | /* make all the listen_opt local to us */ | ||
70 | struct listen_sock *lopt = reqsk_queue_yank_listen_sk(queue); | ||
71 | |||
72 | if (lopt->qlen != 0) { | ||
73 | int i; | ||
74 | |||
75 | for (i = 0; i < lopt->nr_table_entries; i++) { | ||
76 | struct request_sock *req; | ||
77 | |||
78 | while ((req = lopt->syn_table[i]) != NULL) { | ||
79 | lopt->syn_table[i] = req->dl_next; | ||
80 | lopt->qlen--; | ||
81 | reqsk_free(req); | ||
82 | } | ||
83 | } | ||
84 | } | ||
85 | |||
86 | BUG_TRAP(lopt->qlen == 0); | ||
87 | kfree(lopt); | ||
88 | } | ||
89 | |||
90 | EXPORT_SYMBOL(reqsk_queue_destroy); | ||
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 4b1bb30e6381..9bed7569ce3f 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c | |||
@@ -148,7 +148,7 @@ int rtnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo) | |||
148 | { | 148 | { |
149 | int err = 0; | 149 | int err = 0; |
150 | 150 | ||
151 | NETLINK_CB(skb).dst_groups = group; | 151 | NETLINK_CB(skb).dst_group = group; |
152 | if (echo) | 152 | if (echo) |
153 | atomic_inc(&skb->users); | 153 | atomic_inc(&skb->users); |
154 | netlink_broadcast(rtnl, skb, pid, group, GFP_KERNEL); | 154 | netlink_broadcast(rtnl, skb, pid, group, GFP_KERNEL); |
@@ -458,8 +458,8 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change) | |||
458 | kfree_skb(skb); | 458 | kfree_skb(skb); |
459 | return; | 459 | return; |
460 | } | 460 | } |
461 | NETLINK_CB(skb).dst_groups = RTMGRP_LINK; | 461 | NETLINK_CB(skb).dst_group = RTNLGRP_LINK; |
462 | netlink_broadcast(rtnl, skb, 0, RTMGRP_LINK, GFP_KERNEL); | 462 | netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_KERNEL); |
463 | } | 463 | } |
464 | 464 | ||
465 | static int rtnetlink_done(struct netlink_callback *cb) | 465 | static int rtnetlink_done(struct netlink_callback *cb) |
@@ -708,7 +708,8 @@ void __init rtnetlink_init(void) | |||
708 | if (!rta_buf) | 708 | if (!rta_buf) |
709 | panic("rtnetlink_init: cannot allocate rta_buf\n"); | 709 | panic("rtnetlink_init: cannot allocate rta_buf\n"); |
710 | 710 | ||
711 | rtnl = netlink_kernel_create(NETLINK_ROUTE, rtnetlink_rcv); | 711 | rtnl = netlink_kernel_create(NETLINK_ROUTE, RTNLGRP_MAX, rtnetlink_rcv, |
712 | THIS_MODULE); | ||
712 | if (rtnl == NULL) | 713 | if (rtnl == NULL) |
713 | panic("rtnetlink_init: cannot initialize rtnetlink\n"); | 714 | panic("rtnetlink_init: cannot initialize rtnetlink\n"); |
714 | netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV); | 715 | netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV); |
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 7eab867ede59..f80a28785610 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c | |||
@@ -68,7 +68,10 @@ | |||
68 | #include <asm/uaccess.h> | 68 | #include <asm/uaccess.h> |
69 | #include <asm/system.h> | 69 | #include <asm/system.h> |
70 | 70 | ||
71 | static kmem_cache_t *skbuff_head_cache; | 71 | static kmem_cache_t *skbuff_head_cache __read_mostly; |
72 | static kmem_cache_t *skbuff_fclone_cache __read_mostly; | ||
73 | |||
74 | struct timeval __read_mostly skb_tv_base; | ||
72 | 75 | ||
73 | /* | 76 | /* |
74 | * Keep out-of-line to prevent kernel bloat. | 77 | * Keep out-of-line to prevent kernel bloat. |
@@ -118,7 +121,7 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here) | |||
118 | */ | 121 | */ |
119 | 122 | ||
120 | /** | 123 | /** |
121 | * alloc_skb - allocate a network buffer | 124 | * __alloc_skb - allocate a network buffer |
122 | * @size: size to allocate | 125 | * @size: size to allocate |
123 | * @gfp_mask: allocation mask | 126 | * @gfp_mask: allocation mask |
124 | * | 127 | * |
@@ -129,14 +132,20 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here) | |||
129 | * Buffers may only be allocated from interrupts using a @gfp_mask of | 132 | * Buffers may only be allocated from interrupts using a @gfp_mask of |
130 | * %GFP_ATOMIC. | 133 | * %GFP_ATOMIC. |
131 | */ | 134 | */ |
132 | struct sk_buff *alloc_skb(unsigned int size, unsigned int __nocast gfp_mask) | 135 | struct sk_buff *__alloc_skb(unsigned int size, unsigned int __nocast gfp_mask, |
136 | int fclone) | ||
133 | { | 137 | { |
134 | struct sk_buff *skb; | 138 | struct sk_buff *skb; |
135 | u8 *data; | 139 | u8 *data; |
136 | 140 | ||
137 | /* Get the HEAD */ | 141 | /* Get the HEAD */ |
138 | skb = kmem_cache_alloc(skbuff_head_cache, | 142 | if (fclone) |
139 | gfp_mask & ~__GFP_DMA); | 143 | skb = kmem_cache_alloc(skbuff_fclone_cache, |
144 | gfp_mask & ~__GFP_DMA); | ||
145 | else | ||
146 | skb = kmem_cache_alloc(skbuff_head_cache, | ||
147 | gfp_mask & ~__GFP_DMA); | ||
148 | |||
140 | if (!skb) | 149 | if (!skb) |
141 | goto out; | 150 | goto out; |
142 | 151 | ||
@@ -153,7 +162,15 @@ struct sk_buff *alloc_skb(unsigned int size, unsigned int __nocast gfp_mask) | |||
153 | skb->data = data; | 162 | skb->data = data; |
154 | skb->tail = data; | 163 | skb->tail = data; |
155 | skb->end = data + size; | 164 | skb->end = data + size; |
165 | if (fclone) { | ||
166 | struct sk_buff *child = skb + 1; | ||
167 | atomic_t *fclone_ref = (atomic_t *) (child + 1); | ||
156 | 168 | ||
169 | skb->fclone = SKB_FCLONE_ORIG; | ||
170 | atomic_set(fclone_ref, 1); | ||
171 | |||
172 | child->fclone = SKB_FCLONE_UNAVAILABLE; | ||
173 | } | ||
157 | atomic_set(&(skb_shinfo(skb)->dataref), 1); | 174 | atomic_set(&(skb_shinfo(skb)->dataref), 1); |
158 | skb_shinfo(skb)->nr_frags = 0; | 175 | skb_shinfo(skb)->nr_frags = 0; |
159 | skb_shinfo(skb)->tso_size = 0; | 176 | skb_shinfo(skb)->tso_size = 0; |
@@ -266,8 +283,34 @@ void skb_release_data(struct sk_buff *skb) | |||
266 | */ | 283 | */ |
267 | void kfree_skbmem(struct sk_buff *skb) | 284 | void kfree_skbmem(struct sk_buff *skb) |
268 | { | 285 | { |
286 | struct sk_buff *other; | ||
287 | atomic_t *fclone_ref; | ||
288 | |||
269 | skb_release_data(skb); | 289 | skb_release_data(skb); |
270 | kmem_cache_free(skbuff_head_cache, skb); | 290 | switch (skb->fclone) { |
291 | case SKB_FCLONE_UNAVAILABLE: | ||
292 | kmem_cache_free(skbuff_head_cache, skb); | ||
293 | break; | ||
294 | |||
295 | case SKB_FCLONE_ORIG: | ||
296 | fclone_ref = (atomic_t *) (skb + 2); | ||
297 | if (atomic_dec_and_test(fclone_ref)) | ||
298 | kmem_cache_free(skbuff_fclone_cache, skb); | ||
299 | break; | ||
300 | |||
301 | case SKB_FCLONE_CLONE: | ||
302 | fclone_ref = (atomic_t *) (skb + 1); | ||
303 | other = skb - 1; | ||
304 | |||
305 | /* The clone portion is available for | ||
306 | * fast-cloning again. | ||
307 | */ | ||
308 | skb->fclone = SKB_FCLONE_UNAVAILABLE; | ||
309 | |||
310 | if (atomic_dec_and_test(fclone_ref)) | ||
311 | kmem_cache_free(skbuff_fclone_cache, other); | ||
312 | break; | ||
313 | }; | ||
271 | } | 314 | } |
272 | 315 | ||
273 | /** | 316 | /** |
@@ -281,8 +324,6 @@ void kfree_skbmem(struct sk_buff *skb) | |||
281 | 324 | ||
282 | void __kfree_skb(struct sk_buff *skb) | 325 | void __kfree_skb(struct sk_buff *skb) |
283 | { | 326 | { |
284 | BUG_ON(skb->list != NULL); | ||
285 | |||
286 | dst_release(skb->dst); | 327 | dst_release(skb->dst); |
287 | #ifdef CONFIG_XFRM | 328 | #ifdef CONFIG_XFRM |
288 | secpath_put(skb->sp); | 329 | secpath_put(skb->sp); |
@@ -302,7 +343,6 @@ void __kfree_skb(struct sk_buff *skb) | |||
302 | skb->tc_index = 0; | 343 | skb->tc_index = 0; |
303 | #ifdef CONFIG_NET_CLS_ACT | 344 | #ifdef CONFIG_NET_CLS_ACT |
304 | skb->tc_verd = 0; | 345 | skb->tc_verd = 0; |
305 | skb->tc_classid = 0; | ||
306 | #endif | 346 | #endif |
307 | #endif | 347 | #endif |
308 | 348 | ||
@@ -325,19 +365,27 @@ void __kfree_skb(struct sk_buff *skb) | |||
325 | 365 | ||
326 | struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask) | 366 | struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask) |
327 | { | 367 | { |
328 | struct sk_buff *n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); | 368 | struct sk_buff *n; |
329 | 369 | ||
330 | if (!n) | 370 | n = skb + 1; |
331 | return NULL; | 371 | if (skb->fclone == SKB_FCLONE_ORIG && |
372 | n->fclone == SKB_FCLONE_UNAVAILABLE) { | ||
373 | atomic_t *fclone_ref = (atomic_t *) (n + 1); | ||
374 | n->fclone = SKB_FCLONE_CLONE; | ||
375 | atomic_inc(fclone_ref); | ||
376 | } else { | ||
377 | n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); | ||
378 | if (!n) | ||
379 | return NULL; | ||
380 | n->fclone = SKB_FCLONE_UNAVAILABLE; | ||
381 | } | ||
332 | 382 | ||
333 | #define C(x) n->x = skb->x | 383 | #define C(x) n->x = skb->x |
334 | 384 | ||
335 | n->next = n->prev = NULL; | 385 | n->next = n->prev = NULL; |
336 | n->list = NULL; | ||
337 | n->sk = NULL; | 386 | n->sk = NULL; |
338 | C(stamp); | 387 | C(tstamp); |
339 | C(dev); | 388 | C(dev); |
340 | C(real_dev); | ||
341 | C(h); | 389 | C(h); |
342 | C(nh); | 390 | C(nh); |
343 | C(mac); | 391 | C(mac); |
@@ -361,7 +409,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask) | |||
361 | n->destructor = NULL; | 409 | n->destructor = NULL; |
362 | #ifdef CONFIG_NETFILTER | 410 | #ifdef CONFIG_NETFILTER |
363 | C(nfmark); | 411 | C(nfmark); |
364 | C(nfcache); | ||
365 | C(nfct); | 412 | C(nfct); |
366 | nf_conntrack_get(skb->nfct); | 413 | nf_conntrack_get(skb->nfct); |
367 | C(nfctinfo); | 414 | C(nfctinfo); |
@@ -370,9 +417,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask) | |||
370 | nf_bridge_get(skb->nf_bridge); | 417 | nf_bridge_get(skb->nf_bridge); |
371 | #endif | 418 | #endif |
372 | #endif /*CONFIG_NETFILTER*/ | 419 | #endif /*CONFIG_NETFILTER*/ |
373 | #if defined(CONFIG_HIPPI) | ||
374 | C(private); | ||
375 | #endif | ||
376 | #ifdef CONFIG_NET_SCHED | 420 | #ifdef CONFIG_NET_SCHED |
377 | C(tc_index); | 421 | C(tc_index); |
378 | #ifdef CONFIG_NET_CLS_ACT | 422 | #ifdef CONFIG_NET_CLS_ACT |
@@ -380,7 +424,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask) | |||
380 | n->tc_verd = CLR_TC_OK2MUNGE(n->tc_verd); | 424 | n->tc_verd = CLR_TC_OK2MUNGE(n->tc_verd); |
381 | n->tc_verd = CLR_TC_MUNGED(n->tc_verd); | 425 | n->tc_verd = CLR_TC_MUNGED(n->tc_verd); |
382 | C(input_dev); | 426 | C(input_dev); |
383 | C(tc_classid); | ||
384 | #endif | 427 | #endif |
385 | 428 | ||
386 | #endif | 429 | #endif |
@@ -404,10 +447,8 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) | |||
404 | */ | 447 | */ |
405 | unsigned long offset = new->data - old->data; | 448 | unsigned long offset = new->data - old->data; |
406 | 449 | ||
407 | new->list = NULL; | ||
408 | new->sk = NULL; | 450 | new->sk = NULL; |
409 | new->dev = old->dev; | 451 | new->dev = old->dev; |
410 | new->real_dev = old->real_dev; | ||
411 | new->priority = old->priority; | 452 | new->priority = old->priority; |
412 | new->protocol = old->protocol; | 453 | new->protocol = old->protocol; |
413 | new->dst = dst_clone(old->dst); | 454 | new->dst = dst_clone(old->dst); |
@@ -419,12 +460,12 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) | |||
419 | new->mac.raw = old->mac.raw + offset; | 460 | new->mac.raw = old->mac.raw + offset; |
420 | memcpy(new->cb, old->cb, sizeof(old->cb)); | 461 | memcpy(new->cb, old->cb, sizeof(old->cb)); |
421 | new->local_df = old->local_df; | 462 | new->local_df = old->local_df; |
463 | new->fclone = SKB_FCLONE_UNAVAILABLE; | ||
422 | new->pkt_type = old->pkt_type; | 464 | new->pkt_type = old->pkt_type; |
423 | new->stamp = old->stamp; | 465 | new->tstamp = old->tstamp; |
424 | new->destructor = NULL; | 466 | new->destructor = NULL; |
425 | #ifdef CONFIG_NETFILTER | 467 | #ifdef CONFIG_NETFILTER |
426 | new->nfmark = old->nfmark; | 468 | new->nfmark = old->nfmark; |
427 | new->nfcache = old->nfcache; | ||
428 | new->nfct = old->nfct; | 469 | new->nfct = old->nfct; |
429 | nf_conntrack_get(old->nfct); | 470 | nf_conntrack_get(old->nfct); |
430 | new->nfctinfo = old->nfctinfo; | 471 | new->nfctinfo = old->nfctinfo; |
@@ -1344,50 +1385,43 @@ void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk) | |||
1344 | __skb_queue_tail(list, newsk); | 1385 | __skb_queue_tail(list, newsk); |
1345 | spin_unlock_irqrestore(&list->lock, flags); | 1386 | spin_unlock_irqrestore(&list->lock, flags); |
1346 | } | 1387 | } |
1388 | |||
1347 | /** | 1389 | /** |
1348 | * skb_unlink - remove a buffer from a list | 1390 | * skb_unlink - remove a buffer from a list |
1349 | * @skb: buffer to remove | 1391 | * @skb: buffer to remove |
1392 | * @list: list to use | ||
1350 | * | 1393 | * |
1351 | * Place a packet after a given packet in a list. The list locks are taken | 1394 | * Remove a packet from a list. The list locks are taken and this |
1352 | * and this function is atomic with respect to other list locked calls | 1395 | * function is atomic with respect to other list locked calls |
1353 | * | 1396 | * |
1354 | * Works even without knowing the list it is sitting on, which can be | 1397 | * You must know what list the SKB is on. |
1355 | * handy at times. It also means that THE LIST MUST EXIST when you | ||
1356 | * unlink. Thus a list must have its contents unlinked before it is | ||
1357 | * destroyed. | ||
1358 | */ | 1398 | */ |
1359 | void skb_unlink(struct sk_buff *skb) | 1399 | void skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) |
1360 | { | 1400 | { |
1361 | struct sk_buff_head *list = skb->list; | 1401 | unsigned long flags; |
1362 | |||
1363 | if (list) { | ||
1364 | unsigned long flags; | ||
1365 | 1402 | ||
1366 | spin_lock_irqsave(&list->lock, flags); | 1403 | spin_lock_irqsave(&list->lock, flags); |
1367 | if (skb->list == list) | 1404 | __skb_unlink(skb, list); |
1368 | __skb_unlink(skb, skb->list); | 1405 | spin_unlock_irqrestore(&list->lock, flags); |
1369 | spin_unlock_irqrestore(&list->lock, flags); | ||
1370 | } | ||
1371 | } | 1406 | } |
1372 | 1407 | ||
1373 | |||
1374 | /** | 1408 | /** |
1375 | * skb_append - append a buffer | 1409 | * skb_append - append a buffer |
1376 | * @old: buffer to insert after | 1410 | * @old: buffer to insert after |
1377 | * @newsk: buffer to insert | 1411 | * @newsk: buffer to insert |
1412 | * @list: list to use | ||
1378 | * | 1413 | * |
1379 | * Place a packet after a given packet in a list. The list locks are taken | 1414 | * Place a packet after a given packet in a list. The list locks are taken |
1380 | * and this function is atomic with respect to other list locked calls. | 1415 | * and this function is atomic with respect to other list locked calls. |
1381 | * A buffer cannot be placed on two lists at the same time. | 1416 | * A buffer cannot be placed on two lists at the same time. |
1382 | */ | 1417 | */ |
1383 | 1418 | void skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list) | |
1384 | void skb_append(struct sk_buff *old, struct sk_buff *newsk) | ||
1385 | { | 1419 | { |
1386 | unsigned long flags; | 1420 | unsigned long flags; |
1387 | 1421 | ||
1388 | spin_lock_irqsave(&old->list->lock, flags); | 1422 | spin_lock_irqsave(&list->lock, flags); |
1389 | __skb_append(old, newsk); | 1423 | __skb_append(old, newsk, list); |
1390 | spin_unlock_irqrestore(&old->list->lock, flags); | 1424 | spin_unlock_irqrestore(&list->lock, flags); |
1391 | } | 1425 | } |
1392 | 1426 | ||
1393 | 1427 | ||
@@ -1395,19 +1429,21 @@ void skb_append(struct sk_buff *old, struct sk_buff *newsk) | |||
1395 | * skb_insert - insert a buffer | 1429 | * skb_insert - insert a buffer |
1396 | * @old: buffer to insert before | 1430 | * @old: buffer to insert before |
1397 | * @newsk: buffer to insert | 1431 | * @newsk: buffer to insert |
1432 | * @list: list to use | ||
1433 | * | ||
1434 | * Place a packet before a given packet in a list. The list locks are | ||
1435 | * taken and this function is atomic with respect to other list locked | ||
1436 | * calls. | ||
1398 | * | 1437 | * |
1399 | * Place a packet before a given packet in a list. The list locks are taken | ||
1400 | * and this function is atomic with respect to other list locked calls | ||
1401 | * A buffer cannot be placed on two lists at the same time. | 1438 | * A buffer cannot be placed on two lists at the same time. |
1402 | */ | 1439 | */ |
1403 | 1440 | void skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list) | |
1404 | void skb_insert(struct sk_buff *old, struct sk_buff *newsk) | ||
1405 | { | 1441 | { |
1406 | unsigned long flags; | 1442 | unsigned long flags; |
1407 | 1443 | ||
1408 | spin_lock_irqsave(&old->list->lock, flags); | 1444 | spin_lock_irqsave(&list->lock, flags); |
1409 | __skb_insert(newsk, old->prev, old, old->list); | 1445 | __skb_insert(newsk, old->prev, old, list); |
1410 | spin_unlock_irqrestore(&old->list->lock, flags); | 1446 | spin_unlock_irqrestore(&list->lock, flags); |
1411 | } | 1447 | } |
1412 | 1448 | ||
1413 | #if 0 | 1449 | #if 0 |
@@ -1663,12 +1699,23 @@ void __init skb_init(void) | |||
1663 | NULL, NULL); | 1699 | NULL, NULL); |
1664 | if (!skbuff_head_cache) | 1700 | if (!skbuff_head_cache) |
1665 | panic("cannot create skbuff cache"); | 1701 | panic("cannot create skbuff cache"); |
1702 | |||
1703 | skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache", | ||
1704 | (2*sizeof(struct sk_buff)) + | ||
1705 | sizeof(atomic_t), | ||
1706 | 0, | ||
1707 | SLAB_HWCACHE_ALIGN, | ||
1708 | NULL, NULL); | ||
1709 | if (!skbuff_fclone_cache) | ||
1710 | panic("cannot create skbuff cache"); | ||
1711 | |||
1712 | do_gettimeofday(&skb_tv_base); | ||
1666 | } | 1713 | } |
1667 | 1714 | ||
1668 | EXPORT_SYMBOL(___pskb_trim); | 1715 | EXPORT_SYMBOL(___pskb_trim); |
1669 | EXPORT_SYMBOL(__kfree_skb); | 1716 | EXPORT_SYMBOL(__kfree_skb); |
1670 | EXPORT_SYMBOL(__pskb_pull_tail); | 1717 | EXPORT_SYMBOL(__pskb_pull_tail); |
1671 | EXPORT_SYMBOL(alloc_skb); | 1718 | EXPORT_SYMBOL(__alloc_skb); |
1672 | EXPORT_SYMBOL(pskb_copy); | 1719 | EXPORT_SYMBOL(pskb_copy); |
1673 | EXPORT_SYMBOL(pskb_expand_head); | 1720 | EXPORT_SYMBOL(pskb_expand_head); |
1674 | EXPORT_SYMBOL(skb_checksum); | 1721 | EXPORT_SYMBOL(skb_checksum); |
@@ -1696,3 +1743,4 @@ EXPORT_SYMBOL(skb_prepare_seq_read); | |||
1696 | EXPORT_SYMBOL(skb_seq_read); | 1743 | EXPORT_SYMBOL(skb_seq_read); |
1697 | EXPORT_SYMBOL(skb_abort_seq_read); | 1744 | EXPORT_SYMBOL(skb_abort_seq_read); |
1698 | EXPORT_SYMBOL(skb_find_text); | 1745 | EXPORT_SYMBOL(skb_find_text); |
1746 | EXPORT_SYMBOL(skb_tv_base); | ||
diff --git a/net/core/sock.c b/net/core/sock.c index 12f6d9a2a522..ccd10fd65682 100644 --- a/net/core/sock.c +++ b/net/core/sock.c | |||
@@ -260,7 +260,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname, | |||
260 | 260 | ||
261 | if (val > sysctl_wmem_max) | 261 | if (val > sysctl_wmem_max) |
262 | val = sysctl_wmem_max; | 262 | val = sysctl_wmem_max; |
263 | 263 | set_sndbuf: | |
264 | sk->sk_userlocks |= SOCK_SNDBUF_LOCK; | 264 | sk->sk_userlocks |= SOCK_SNDBUF_LOCK; |
265 | if ((val * 2) < SOCK_MIN_SNDBUF) | 265 | if ((val * 2) < SOCK_MIN_SNDBUF) |
266 | sk->sk_sndbuf = SOCK_MIN_SNDBUF; | 266 | sk->sk_sndbuf = SOCK_MIN_SNDBUF; |
@@ -274,6 +274,13 @@ int sock_setsockopt(struct socket *sock, int level, int optname, | |||
274 | sk->sk_write_space(sk); | 274 | sk->sk_write_space(sk); |
275 | break; | 275 | break; |
276 | 276 | ||
277 | case SO_SNDBUFFORCE: | ||
278 | if (!capable(CAP_NET_ADMIN)) { | ||
279 | ret = -EPERM; | ||
280 | break; | ||
281 | } | ||
282 | goto set_sndbuf; | ||
283 | |||
277 | case SO_RCVBUF: | 284 | case SO_RCVBUF: |
278 | /* Don't error on this BSD doesn't and if you think | 285 | /* Don't error on this BSD doesn't and if you think |
279 | about it this is right. Otherwise apps have to | 286 | about it this is right. Otherwise apps have to |
@@ -282,7 +289,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname, | |||
282 | 289 | ||
283 | if (val > sysctl_rmem_max) | 290 | if (val > sysctl_rmem_max) |
284 | val = sysctl_rmem_max; | 291 | val = sysctl_rmem_max; |
285 | 292 | set_rcvbuf: | |
286 | sk->sk_userlocks |= SOCK_RCVBUF_LOCK; | 293 | sk->sk_userlocks |= SOCK_RCVBUF_LOCK; |
287 | /* FIXME: is this lower bound the right one? */ | 294 | /* FIXME: is this lower bound the right one? */ |
288 | if ((val * 2) < SOCK_MIN_RCVBUF) | 295 | if ((val * 2) < SOCK_MIN_RCVBUF) |
@@ -291,6 +298,13 @@ int sock_setsockopt(struct socket *sock, int level, int optname, | |||
291 | sk->sk_rcvbuf = val * 2; | 298 | sk->sk_rcvbuf = val * 2; |
292 | break; | 299 | break; |
293 | 300 | ||
301 | case SO_RCVBUFFORCE: | ||
302 | if (!capable(CAP_NET_ADMIN)) { | ||
303 | ret = -EPERM; | ||
304 | break; | ||
305 | } | ||
306 | goto set_rcvbuf; | ||
307 | |||
294 | case SO_KEEPALIVE: | 308 | case SO_KEEPALIVE: |
295 | #ifdef CONFIG_INET | 309 | #ifdef CONFIG_INET |
296 | if (sk->sk_protocol == IPPROTO_TCP) | 310 | if (sk->sk_protocol == IPPROTO_TCP) |
@@ -686,6 +700,80 @@ void sk_free(struct sock *sk) | |||
686 | module_put(owner); | 700 | module_put(owner); |
687 | } | 701 | } |
688 | 702 | ||
703 | struct sock *sk_clone(const struct sock *sk, const unsigned int __nocast priority) | ||
704 | { | ||
705 | struct sock *newsk = sk_alloc(sk->sk_family, priority, sk->sk_prot, 0); | ||
706 | |||
707 | if (newsk != NULL) { | ||
708 | struct sk_filter *filter; | ||
709 | |||
710 | memcpy(newsk, sk, sk->sk_prot->obj_size); | ||
711 | |||
712 | /* SANITY */ | ||
713 | sk_node_init(&newsk->sk_node); | ||
714 | sock_lock_init(newsk); | ||
715 | bh_lock_sock(newsk); | ||
716 | |||
717 | atomic_set(&newsk->sk_rmem_alloc, 0); | ||
718 | atomic_set(&newsk->sk_wmem_alloc, 0); | ||
719 | atomic_set(&newsk->sk_omem_alloc, 0); | ||
720 | skb_queue_head_init(&newsk->sk_receive_queue); | ||
721 | skb_queue_head_init(&newsk->sk_write_queue); | ||
722 | |||
723 | rwlock_init(&newsk->sk_dst_lock); | ||
724 | rwlock_init(&newsk->sk_callback_lock); | ||
725 | |||
726 | newsk->sk_dst_cache = NULL; | ||
727 | newsk->sk_wmem_queued = 0; | ||
728 | newsk->sk_forward_alloc = 0; | ||
729 | newsk->sk_send_head = NULL; | ||
730 | newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL; | ||
731 | newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK; | ||
732 | |||
733 | sock_reset_flag(newsk, SOCK_DONE); | ||
734 | skb_queue_head_init(&newsk->sk_error_queue); | ||
735 | |||
736 | filter = newsk->sk_filter; | ||
737 | if (filter != NULL) | ||
738 | sk_filter_charge(newsk, filter); | ||
739 | |||
740 | if (unlikely(xfrm_sk_clone_policy(newsk))) { | ||
741 | /* It is still raw copy of parent, so invalidate | ||
742 | * destructor and make plain sk_free() */ | ||
743 | newsk->sk_destruct = NULL; | ||
744 | sk_free(newsk); | ||
745 | newsk = NULL; | ||
746 | goto out; | ||
747 | } | ||
748 | |||
749 | newsk->sk_err = 0; | ||
750 | newsk->sk_priority = 0; | ||
751 | atomic_set(&newsk->sk_refcnt, 2); | ||
752 | |||
753 | /* | ||
754 | * Increment the counter in the same struct proto as the master | ||
755 | * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that | ||
756 | * is the same as sk->sk_prot->socks, as this field was copied | ||
757 | * with memcpy). | ||
758 | * | ||
759 | * This _changes_ the previous behaviour, where | ||
760 | * tcp_create_openreq_child always was incrementing the | ||
761 | * equivalent to tcp_prot->socks (inet_sock_nr), so this have | ||
762 | * to be taken into account in all callers. -acme | ||
763 | */ | ||
764 | sk_refcnt_debug_inc(newsk); | ||
765 | newsk->sk_socket = NULL; | ||
766 | newsk->sk_sleep = NULL; | ||
767 | |||
768 | if (newsk->sk_prot->sockets_allocated) | ||
769 | atomic_inc(newsk->sk_prot->sockets_allocated); | ||
770 | } | ||
771 | out: | ||
772 | return newsk; | ||
773 | } | ||
774 | |||
775 | EXPORT_SYMBOL_GPL(sk_clone); | ||
776 | |||
689 | void __init sk_init(void) | 777 | void __init sk_init(void) |
690 | { | 778 | { |
691 | if (num_physpages <= 4096) { | 779 | if (num_physpages <= 4096) { |
@@ -1353,11 +1441,7 @@ void sk_common_release(struct sock *sk) | |||
1353 | 1441 | ||
1354 | xfrm_sk_free_policy(sk); | 1442 | xfrm_sk_free_policy(sk); |
1355 | 1443 | ||
1356 | #ifdef INET_REFCNT_DEBUG | 1444 | sk_refcnt_debug_release(sk); |
1357 | if (atomic_read(&sk->sk_refcnt) != 1) | ||
1358 | printk(KERN_DEBUG "Destruction of the socket %p delayed, c=%d\n", | ||
1359 | sk, atomic_read(&sk->sk_refcnt)); | ||
1360 | #endif | ||
1361 | sock_put(sk); | 1445 | sock_put(sk); |
1362 | } | 1446 | } |
1363 | 1447 | ||
@@ -1368,7 +1452,8 @@ static LIST_HEAD(proto_list); | |||
1368 | 1452 | ||
1369 | int proto_register(struct proto *prot, int alloc_slab) | 1453 | int proto_register(struct proto *prot, int alloc_slab) |
1370 | { | 1454 | { |
1371 | char *request_sock_slab_name; | 1455 | char *request_sock_slab_name = NULL; |
1456 | char *timewait_sock_slab_name; | ||
1372 | int rc = -ENOBUFS; | 1457 | int rc = -ENOBUFS; |
1373 | 1458 | ||
1374 | if (alloc_slab) { | 1459 | if (alloc_slab) { |
@@ -1399,6 +1484,23 @@ int proto_register(struct proto *prot, int alloc_slab) | |||
1399 | goto out_free_request_sock_slab_name; | 1484 | goto out_free_request_sock_slab_name; |
1400 | } | 1485 | } |
1401 | } | 1486 | } |
1487 | |||
1488 | if (prot->twsk_obj_size) { | ||
1489 | static const char mask[] = "tw_sock_%s"; | ||
1490 | |||
1491 | timewait_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL); | ||
1492 | |||
1493 | if (timewait_sock_slab_name == NULL) | ||
1494 | goto out_free_request_sock_slab; | ||
1495 | |||
1496 | sprintf(timewait_sock_slab_name, mask, prot->name); | ||
1497 | prot->twsk_slab = kmem_cache_create(timewait_sock_slab_name, | ||
1498 | prot->twsk_obj_size, | ||
1499 | 0, SLAB_HWCACHE_ALIGN, | ||
1500 | NULL, NULL); | ||
1501 | if (prot->twsk_slab == NULL) | ||
1502 | goto out_free_timewait_sock_slab_name; | ||
1503 | } | ||
1402 | } | 1504 | } |
1403 | 1505 | ||
1404 | write_lock(&proto_list_lock); | 1506 | write_lock(&proto_list_lock); |
@@ -1407,6 +1509,13 @@ int proto_register(struct proto *prot, int alloc_slab) | |||
1407 | rc = 0; | 1509 | rc = 0; |
1408 | out: | 1510 | out: |
1409 | return rc; | 1511 | return rc; |
1512 | out_free_timewait_sock_slab_name: | ||
1513 | kfree(timewait_sock_slab_name); | ||
1514 | out_free_request_sock_slab: | ||
1515 | if (prot->rsk_prot && prot->rsk_prot->slab) { | ||
1516 | kmem_cache_destroy(prot->rsk_prot->slab); | ||
1517 | prot->rsk_prot->slab = NULL; | ||
1518 | } | ||
1410 | out_free_request_sock_slab_name: | 1519 | out_free_request_sock_slab_name: |
1411 | kfree(request_sock_slab_name); | 1520 | kfree(request_sock_slab_name); |
1412 | out_free_sock_slab: | 1521 | out_free_sock_slab: |
@@ -1434,6 +1543,14 @@ void proto_unregister(struct proto *prot) | |||
1434 | prot->rsk_prot->slab = NULL; | 1543 | prot->rsk_prot->slab = NULL; |
1435 | } | 1544 | } |
1436 | 1545 | ||
1546 | if (prot->twsk_slab != NULL) { | ||
1547 | const char *name = kmem_cache_name(prot->twsk_slab); | ||
1548 | |||
1549 | kmem_cache_destroy(prot->twsk_slab); | ||
1550 | kfree(name); | ||
1551 | prot->twsk_slab = NULL; | ||
1552 | } | ||
1553 | |||
1437 | list_del(&prot->node); | 1554 | list_del(&prot->node); |
1438 | write_unlock(&proto_list_lock); | 1555 | write_unlock(&proto_list_lock); |
1439 | } | 1556 | } |
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 8f817ad9f546..2f278c8e4743 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c | |||
@@ -9,23 +9,18 @@ | |||
9 | #include <linux/sysctl.h> | 9 | #include <linux/sysctl.h> |
10 | #include <linux/config.h> | 10 | #include <linux/config.h> |
11 | #include <linux/module.h> | 11 | #include <linux/module.h> |
12 | #include <linux/socket.h> | ||
13 | #include <net/sock.h> | ||
12 | 14 | ||
13 | #ifdef CONFIG_SYSCTL | 15 | #ifdef CONFIG_SYSCTL |
14 | 16 | ||
15 | extern int netdev_max_backlog; | 17 | extern int netdev_max_backlog; |
16 | extern int netdev_budget; | ||
17 | extern int weight_p; | 18 | extern int weight_p; |
18 | extern int net_msg_cost; | ||
19 | extern int net_msg_burst; | ||
20 | 19 | ||
21 | extern __u32 sysctl_wmem_max; | 20 | extern __u32 sysctl_wmem_max; |
22 | extern __u32 sysctl_rmem_max; | 21 | extern __u32 sysctl_rmem_max; |
23 | extern __u32 sysctl_wmem_default; | ||
24 | extern __u32 sysctl_rmem_default; | ||
25 | 22 | ||
26 | extern int sysctl_core_destroy_delay; | 23 | extern int sysctl_core_destroy_delay; |
27 | extern int sysctl_optmem_max; | ||
28 | extern int sysctl_somaxconn; | ||
29 | 24 | ||
30 | #ifdef CONFIG_NET_DIVERT | 25 | #ifdef CONFIG_NET_DIVERT |
31 | extern char sysctl_divert_version[]; | 26 | extern char sysctl_divert_version[]; |
diff --git a/net/core/utils.c b/net/core/utils.c index 88eb8b68e26b..7b5970fc9e40 100644 --- a/net/core/utils.c +++ b/net/core/utils.c | |||
@@ -16,7 +16,9 @@ | |||
16 | #include <linux/module.h> | 16 | #include <linux/module.h> |
17 | #include <linux/jiffies.h> | 17 | #include <linux/jiffies.h> |
18 | #include <linux/kernel.h> | 18 | #include <linux/kernel.h> |
19 | #include <linux/inet.h> | ||
19 | #include <linux/mm.h> | 20 | #include <linux/mm.h> |
21 | #include <linux/net.h> | ||
20 | #include <linux/string.h> | 22 | #include <linux/string.h> |
21 | #include <linux/types.h> | 23 | #include <linux/types.h> |
22 | #include <linux/random.h> | 24 | #include <linux/random.h> |
diff --git a/net/core/wireless.c b/net/core/wireless.c index 3ff5639c0b78..5caae2399f3a 100644 --- a/net/core/wireless.c +++ b/net/core/wireless.c | |||
@@ -571,10 +571,6 @@ static int wireless_seq_show(struct seq_file *seq, void *v) | |||
571 | return 0; | 571 | return 0; |
572 | } | 572 | } |
573 | 573 | ||
574 | extern void *dev_seq_start(struct seq_file *seq, loff_t *pos); | ||
575 | extern void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos); | ||
576 | extern void dev_seq_stop(struct seq_file *seq, void *v); | ||
577 | |||
578 | static struct seq_operations wireless_seq_ops = { | 574 | static struct seq_operations wireless_seq_ops = { |
579 | .start = dev_seq_start, | 575 | .start = dev_seq_start, |
580 | .next = dev_seq_next, | 576 | .next = dev_seq_next, |
@@ -1144,8 +1140,8 @@ static inline void rtmsg_iwinfo(struct net_device * dev, | |||
1144 | kfree_skb(skb); | 1140 | kfree_skb(skb); |
1145 | return; | 1141 | return; |
1146 | } | 1142 | } |
1147 | NETLINK_CB(skb).dst_groups = RTMGRP_LINK; | 1143 | NETLINK_CB(skb).dst_group = RTNLGRP_LINK; |
1148 | netlink_broadcast(rtnl, skb, 0, RTMGRP_LINK, GFP_ATOMIC); | 1144 | netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_ATOMIC); |
1149 | } | 1145 | } |
1150 | #endif /* WE_EVENT_NETLINK */ | 1146 | #endif /* WE_EVENT_NETLINK */ |
1151 | 1147 | ||
diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig new file mode 100644 index 000000000000..187ac182e24b --- /dev/null +++ b/net/dccp/Kconfig | |||
@@ -0,0 +1,50 @@ | |||
1 | menu "DCCP Configuration (EXPERIMENTAL)" | ||
2 | depends on INET && EXPERIMENTAL | ||
3 | |||
4 | config IP_DCCP | ||
5 | tristate "The DCCP Protocol (EXPERIMENTAL)" | ||
6 | ---help--- | ||
7 | Datagram Congestion Control Protocol | ||
8 | |||
9 | From draft-ietf-dccp-spec-11 <http://www.icir.org/kohler/dcp/draft-ietf-dccp-spec-11.txt>. | ||
10 | |||
11 | The Datagram Congestion Control Protocol (DCCP) is a transport | ||
12 | protocol that implements bidirectional, unicast connections of | ||
13 | congestion-controlled, unreliable datagrams. It should be suitable | ||
14 | for use by applications such as streaming media, Internet telephony, | ||
15 | and on-line games | ||
16 | |||
17 | To compile this protocol support as a module, choose M here: the | ||
18 | module will be called dccp. | ||
19 | |||
20 | If in doubt, say N. | ||
21 | |||
22 | config INET_DCCP_DIAG | ||
23 | depends on IP_DCCP && INET_DIAG | ||
24 | def_tristate y if (IP_DCCP = y && INET_DIAG = y) | ||
25 | def_tristate m | ||
26 | |||
27 | source "net/dccp/ccids/Kconfig" | ||
28 | |||
29 | menu "DCCP Kernel Hacking" | ||
30 | depends on IP_DCCP && DEBUG_KERNEL=y | ||
31 | |||
32 | config IP_DCCP_DEBUG | ||
33 | bool "DCCP debug messages" | ||
34 | ---help--- | ||
35 | Only use this if you're hacking DCCP. | ||
36 | |||
37 | Just say N. | ||
38 | |||
39 | config IP_DCCP_UNLOAD_HACK | ||
40 | depends on IP_DCCP=m && IP_DCCP_CCID3=m | ||
41 | bool "DCCP control sock unload hack" | ||
42 | ---help--- | ||
43 | Enable this to be able to unload the dccp module when the it | ||
44 | has only one refcount held, the control sock one. Just execute | ||
45 | "rmmod dccp_ccid3 dccp" | ||
46 | |||
47 | Just say N. | ||
48 | endmenu | ||
49 | |||
50 | endmenu | ||
diff --git a/net/dccp/Makefile b/net/dccp/Makefile new file mode 100644 index 000000000000..fb97bb042455 --- /dev/null +++ b/net/dccp/Makefile | |||
@@ -0,0 +1,10 @@ | |||
1 | obj-$(CONFIG_IP_DCCP) += dccp.o | ||
2 | |||
3 | dccp-y := ccid.o input.o ipv4.o minisocks.o options.o output.o proto.o \ | ||
4 | timer.o | ||
5 | |||
6 | obj-$(CONFIG_INET_DCCP_DIAG) += dccp_diag.o | ||
7 | |||
8 | dccp_diag-y := diag.o | ||
9 | |||
10 | obj-y += ccids/ | ||
diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c new file mode 100644 index 000000000000..9d8fc0e289ea --- /dev/null +++ b/net/dccp/ccid.c | |||
@@ -0,0 +1,139 @@ | |||
1 | /* | ||
2 | * net/dccp/ccid.c | ||
3 | * | ||
4 | * An implementation of the DCCP protocol | ||
5 | * Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
6 | * | ||
7 | * CCID infrastructure | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify it | ||
10 | * under the terms of the GNU General Public License version 2 as | ||
11 | * published by the Free Software Foundation. | ||
12 | */ | ||
13 | |||
14 | #include "ccid.h" | ||
15 | |||
16 | static struct ccid *ccids[CCID_MAX]; | ||
17 | #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT) | ||
18 | static atomic_t ccids_lockct = ATOMIC_INIT(0); | ||
19 | static DEFINE_SPINLOCK(ccids_lock); | ||
20 | |||
21 | /* | ||
22 | * The strategy is: modifications ccids vector are short, do not sleep and | ||
23 | * veeery rare, but read access should be free of any exclusive locks. | ||
24 | */ | ||
25 | static void ccids_write_lock(void) | ||
26 | { | ||
27 | spin_lock(&ccids_lock); | ||
28 | while (atomic_read(&ccids_lockct) != 0) { | ||
29 | spin_unlock(&ccids_lock); | ||
30 | yield(); | ||
31 | spin_lock(&ccids_lock); | ||
32 | } | ||
33 | } | ||
34 | |||
35 | static inline void ccids_write_unlock(void) | ||
36 | { | ||
37 | spin_unlock(&ccids_lock); | ||
38 | } | ||
39 | |||
40 | static inline void ccids_read_lock(void) | ||
41 | { | ||
42 | atomic_inc(&ccids_lockct); | ||
43 | spin_unlock_wait(&ccids_lock); | ||
44 | } | ||
45 | |||
46 | static inline void ccids_read_unlock(void) | ||
47 | { | ||
48 | atomic_dec(&ccids_lockct); | ||
49 | } | ||
50 | |||
51 | #else | ||
52 | #define ccids_write_lock() do { } while(0) | ||
53 | #define ccids_write_unlock() do { } while(0) | ||
54 | #define ccids_read_lock() do { } while(0) | ||
55 | #define ccids_read_unlock() do { } while(0) | ||
56 | #endif | ||
57 | |||
58 | int ccid_register(struct ccid *ccid) | ||
59 | { | ||
60 | int err; | ||
61 | |||
62 | if (ccid->ccid_init == NULL) | ||
63 | return -1; | ||
64 | |||
65 | ccids_write_lock(); | ||
66 | err = -EEXIST; | ||
67 | if (ccids[ccid->ccid_id] == NULL) { | ||
68 | ccids[ccid->ccid_id] = ccid; | ||
69 | err = 0; | ||
70 | } | ||
71 | ccids_write_unlock(); | ||
72 | if (err == 0) | ||
73 | pr_info("CCID: Registered CCID %d (%s)\n", | ||
74 | ccid->ccid_id, ccid->ccid_name); | ||
75 | return err; | ||
76 | } | ||
77 | |||
78 | EXPORT_SYMBOL_GPL(ccid_register); | ||
79 | |||
80 | int ccid_unregister(struct ccid *ccid) | ||
81 | { | ||
82 | ccids_write_lock(); | ||
83 | ccids[ccid->ccid_id] = NULL; | ||
84 | ccids_write_unlock(); | ||
85 | pr_info("CCID: Unregistered CCID %d (%s)\n", | ||
86 | ccid->ccid_id, ccid->ccid_name); | ||
87 | return 0; | ||
88 | } | ||
89 | |||
90 | EXPORT_SYMBOL_GPL(ccid_unregister); | ||
91 | |||
92 | struct ccid *ccid_init(unsigned char id, struct sock *sk) | ||
93 | { | ||
94 | struct ccid *ccid; | ||
95 | |||
96 | #ifdef CONFIG_KMOD | ||
97 | if (ccids[id] == NULL) | ||
98 | request_module("net-dccp-ccid-%d", id); | ||
99 | #endif | ||
100 | ccids_read_lock(); | ||
101 | |||
102 | ccid = ccids[id]; | ||
103 | if (ccid == NULL) | ||
104 | goto out; | ||
105 | |||
106 | if (!try_module_get(ccid->ccid_owner)) | ||
107 | goto out_err; | ||
108 | |||
109 | if (ccid->ccid_init(sk) != 0) | ||
110 | goto out_module_put; | ||
111 | out: | ||
112 | ccids_read_unlock(); | ||
113 | return ccid; | ||
114 | out_module_put: | ||
115 | module_put(ccid->ccid_owner); | ||
116 | out_err: | ||
117 | ccid = NULL; | ||
118 | goto out; | ||
119 | } | ||
120 | |||
121 | EXPORT_SYMBOL_GPL(ccid_init); | ||
122 | |||
123 | void ccid_exit(struct ccid *ccid, struct sock *sk) | ||
124 | { | ||
125 | if (ccid == NULL) | ||
126 | return; | ||
127 | |||
128 | ccids_read_lock(); | ||
129 | |||
130 | if (ccids[ccid->ccid_id] != NULL) { | ||
131 | if (ccid->ccid_exit != NULL) | ||
132 | ccid->ccid_exit(sk); | ||
133 | module_put(ccid->ccid_owner); | ||
134 | } | ||
135 | |||
136 | ccids_read_unlock(); | ||
137 | } | ||
138 | |||
139 | EXPORT_SYMBOL_GPL(ccid_exit); | ||
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h new file mode 100644 index 000000000000..962f1e9e2f7e --- /dev/null +++ b/net/dccp/ccid.h | |||
@@ -0,0 +1,180 @@ | |||
1 | #ifndef _CCID_H | ||
2 | #define _CCID_H | ||
3 | /* | ||
4 | * net/dccp/ccid.h | ||
5 | * | ||
6 | * An implementation of the DCCP protocol | ||
7 | * Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
8 | * | ||
9 | * CCID infrastructure | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify it | ||
12 | * under the terms of the GNU General Public License version 2 as | ||
13 | * published by the Free Software Foundation. | ||
14 | */ | ||
15 | |||
16 | #include <net/sock.h> | ||
17 | #include <linux/dccp.h> | ||
18 | #include <linux/list.h> | ||
19 | #include <linux/module.h> | ||
20 | |||
21 | #define CCID_MAX 255 | ||
22 | |||
23 | struct ccid { | ||
24 | unsigned char ccid_id; | ||
25 | const char *ccid_name; | ||
26 | struct module *ccid_owner; | ||
27 | int (*ccid_init)(struct sock *sk); | ||
28 | void (*ccid_exit)(struct sock *sk); | ||
29 | int (*ccid_hc_rx_init)(struct sock *sk); | ||
30 | int (*ccid_hc_tx_init)(struct sock *sk); | ||
31 | void (*ccid_hc_rx_exit)(struct sock *sk); | ||
32 | void (*ccid_hc_tx_exit)(struct sock *sk); | ||
33 | void (*ccid_hc_rx_packet_recv)(struct sock *sk, | ||
34 | struct sk_buff *skb); | ||
35 | int (*ccid_hc_rx_parse_options)(struct sock *sk, | ||
36 | unsigned char option, | ||
37 | unsigned char len, u16 idx, | ||
38 | unsigned char* value); | ||
39 | void (*ccid_hc_rx_insert_options)(struct sock *sk, | ||
40 | struct sk_buff *skb); | ||
41 | void (*ccid_hc_tx_insert_options)(struct sock *sk, | ||
42 | struct sk_buff *skb); | ||
43 | void (*ccid_hc_tx_packet_recv)(struct sock *sk, | ||
44 | struct sk_buff *skb); | ||
45 | int (*ccid_hc_tx_parse_options)(struct sock *sk, | ||
46 | unsigned char option, | ||
47 | unsigned char len, u16 idx, | ||
48 | unsigned char* value); | ||
49 | int (*ccid_hc_tx_send_packet)(struct sock *sk, | ||
50 | struct sk_buff *skb, int len); | ||
51 | void (*ccid_hc_tx_packet_sent)(struct sock *sk, int more, | ||
52 | int len); | ||
53 | void (*ccid_hc_rx_get_info)(struct sock *sk, | ||
54 | struct tcp_info *info); | ||
55 | void (*ccid_hc_tx_get_info)(struct sock *sk, | ||
56 | struct tcp_info *info); | ||
57 | }; | ||
58 | |||
59 | extern int ccid_register(struct ccid *ccid); | ||
60 | extern int ccid_unregister(struct ccid *ccid); | ||
61 | |||
62 | extern struct ccid *ccid_init(unsigned char id, struct sock *sk); | ||
63 | extern void ccid_exit(struct ccid *ccid, struct sock *sk); | ||
64 | |||
65 | static inline void __ccid_get(struct ccid *ccid) | ||
66 | { | ||
67 | __module_get(ccid->ccid_owner); | ||
68 | } | ||
69 | |||
70 | static inline int ccid_hc_tx_send_packet(struct ccid *ccid, struct sock *sk, | ||
71 | struct sk_buff *skb, int len) | ||
72 | { | ||
73 | int rc = 0; | ||
74 | if (ccid->ccid_hc_tx_send_packet != NULL) | ||
75 | rc = ccid->ccid_hc_tx_send_packet(sk, skb, len); | ||
76 | return rc; | ||
77 | } | ||
78 | |||
79 | static inline void ccid_hc_tx_packet_sent(struct ccid *ccid, struct sock *sk, | ||
80 | int more, int len) | ||
81 | { | ||
82 | if (ccid->ccid_hc_tx_packet_sent != NULL) | ||
83 | ccid->ccid_hc_tx_packet_sent(sk, more, len); | ||
84 | } | ||
85 | |||
86 | static inline int ccid_hc_rx_init(struct ccid *ccid, struct sock *sk) | ||
87 | { | ||
88 | int rc = 0; | ||
89 | if (ccid->ccid_hc_rx_init != NULL) | ||
90 | rc = ccid->ccid_hc_rx_init(sk); | ||
91 | return rc; | ||
92 | } | ||
93 | |||
94 | static inline int ccid_hc_tx_init(struct ccid *ccid, struct sock *sk) | ||
95 | { | ||
96 | int rc = 0; | ||
97 | if (ccid->ccid_hc_tx_init != NULL) | ||
98 | rc = ccid->ccid_hc_tx_init(sk); | ||
99 | return rc; | ||
100 | } | ||
101 | |||
102 | static inline void ccid_hc_rx_exit(struct ccid *ccid, struct sock *sk) | ||
103 | { | ||
104 | if (ccid->ccid_hc_rx_exit != NULL && | ||
105 | dccp_sk(sk)->dccps_hc_rx_ccid_private != NULL) | ||
106 | ccid->ccid_hc_rx_exit(sk); | ||
107 | } | ||
108 | |||
109 | static inline void ccid_hc_tx_exit(struct ccid *ccid, struct sock *sk) | ||
110 | { | ||
111 | if (ccid->ccid_hc_tx_exit != NULL && | ||
112 | dccp_sk(sk)->dccps_hc_tx_ccid_private != NULL) | ||
113 | ccid->ccid_hc_tx_exit(sk); | ||
114 | } | ||
115 | |||
116 | static inline void ccid_hc_rx_packet_recv(struct ccid *ccid, struct sock *sk, | ||
117 | struct sk_buff *skb) | ||
118 | { | ||
119 | if (ccid->ccid_hc_rx_packet_recv != NULL) | ||
120 | ccid->ccid_hc_rx_packet_recv(sk, skb); | ||
121 | } | ||
122 | |||
123 | static inline void ccid_hc_tx_packet_recv(struct ccid *ccid, struct sock *sk, | ||
124 | struct sk_buff *skb) | ||
125 | { | ||
126 | if (ccid->ccid_hc_tx_packet_recv != NULL) | ||
127 | ccid->ccid_hc_tx_packet_recv(sk, skb); | ||
128 | } | ||
129 | |||
130 | static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk, | ||
131 | unsigned char option, | ||
132 | unsigned char len, u16 idx, | ||
133 | unsigned char* value) | ||
134 | { | ||
135 | int rc = 0; | ||
136 | if (ccid->ccid_hc_tx_parse_options != NULL) | ||
137 | rc = ccid->ccid_hc_tx_parse_options(sk, option, len, idx, | ||
138 | value); | ||
139 | return rc; | ||
140 | } | ||
141 | |||
142 | static inline int ccid_hc_rx_parse_options(struct ccid *ccid, struct sock *sk, | ||
143 | unsigned char option, | ||
144 | unsigned char len, u16 idx, | ||
145 | unsigned char* value) | ||
146 | { | ||
147 | int rc = 0; | ||
148 | if (ccid->ccid_hc_rx_parse_options != NULL) | ||
149 | rc = ccid->ccid_hc_rx_parse_options(sk, option, len, idx, value); | ||
150 | return rc; | ||
151 | } | ||
152 | |||
153 | static inline void ccid_hc_tx_insert_options(struct ccid *ccid, struct sock *sk, | ||
154 | struct sk_buff *skb) | ||
155 | { | ||
156 | if (ccid->ccid_hc_tx_insert_options != NULL) | ||
157 | ccid->ccid_hc_tx_insert_options(sk, skb); | ||
158 | } | ||
159 | |||
160 | static inline void ccid_hc_rx_insert_options(struct ccid *ccid, struct sock *sk, | ||
161 | struct sk_buff *skb) | ||
162 | { | ||
163 | if (ccid->ccid_hc_rx_insert_options != NULL) | ||
164 | ccid->ccid_hc_rx_insert_options(sk, skb); | ||
165 | } | ||
166 | |||
167 | static inline void ccid_hc_rx_get_info(struct ccid *ccid, struct sock *sk, | ||
168 | struct tcp_info *info) | ||
169 | { | ||
170 | if (ccid->ccid_hc_rx_get_info != NULL) | ||
171 | ccid->ccid_hc_rx_get_info(sk, info); | ||
172 | } | ||
173 | |||
174 | static inline void ccid_hc_tx_get_info(struct ccid *ccid, struct sock *sk, | ||
175 | struct tcp_info *info) | ||
176 | { | ||
177 | if (ccid->ccid_hc_tx_get_info != NULL) | ||
178 | ccid->ccid_hc_tx_get_info(sk, info); | ||
179 | } | ||
180 | #endif /* _CCID_H */ | ||
diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig new file mode 100644 index 000000000000..7684d83946a4 --- /dev/null +++ b/net/dccp/ccids/Kconfig | |||
@@ -0,0 +1,29 @@ | |||
1 | menu "DCCP CCIDs Configuration (EXPERIMENTAL)" | ||
2 | depends on IP_DCCP && EXPERIMENTAL | ||
3 | |||
4 | config IP_DCCP_CCID3 | ||
5 | tristate "CCID3 (TFRC) (EXPERIMENTAL)" | ||
6 | depends on IP_DCCP | ||
7 | ---help--- | ||
8 | CCID 3 denotes TCP-Friendly Rate Control (TFRC), an equation-based | ||
9 | rate-controlled congestion control mechanism. TFRC is designed to | ||
10 | be reasonably fair when competing for bandwidth with TCP-like flows, | ||
11 | where a flow is "reasonably fair" if its sending rate is generally | ||
12 | within a factor of two of the sending rate of a TCP flow under the | ||
13 | same conditions. However, TFRC has a much lower variation of | ||
14 | throughput over time compared with TCP, which makes CCID 3 more | ||
15 | suitable than CCID 2 for applications such streaming media where a | ||
16 | relatively smooth sending rate is of importance. | ||
17 | |||
18 | CCID 3 is further described in [CCID 3 PROFILE]. The TFRC | ||
19 | congestion control algorithms were initially described in RFC 3448. | ||
20 | |||
21 | This text was extracted from draft-ietf-dccp-spec-11.txt. | ||
22 | |||
23 | If in doubt, say M. | ||
24 | |||
25 | config IP_DCCP_TFRC_LIB | ||
26 | depends on IP_DCCP_CCID3 | ||
27 | def_tristate IP_DCCP_CCID3 | ||
28 | |||
29 | endmenu | ||
diff --git a/net/dccp/ccids/Makefile b/net/dccp/ccids/Makefile new file mode 100644 index 000000000000..956f79f50743 --- /dev/null +++ b/net/dccp/ccids/Makefile | |||
@@ -0,0 +1,5 @@ | |||
1 | obj-$(CONFIG_IP_DCCP_CCID3) += dccp_ccid3.o | ||
2 | |||
3 | dccp_ccid3-y := ccid3.o | ||
4 | |||
5 | obj-y += lib/ | ||
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c new file mode 100644 index 000000000000..7bf3b3a91e97 --- /dev/null +++ b/net/dccp/ccids/ccid3.c | |||
@@ -0,0 +1,1221 @@ | |||
1 | /* | ||
2 | * net/dccp/ccids/ccid3.c | ||
3 | * | ||
4 | * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. | ||
5 | * Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz> | ||
6 | * | ||
7 | * An implementation of the DCCP protocol | ||
8 | * | ||
9 | * This code has been developed by the University of Waikato WAND | ||
10 | * research group. For further information please see http://www.wand.net.nz/ | ||
11 | * | ||
12 | * This code also uses code from Lulea University, rereleased as GPL by its | ||
13 | * authors: | ||
14 | * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon | ||
15 | * | ||
16 | * Changes to meet Linux coding standards, to make it meet latest ccid3 draft | ||
17 | * and to make it work as a loadable module in the DCCP stack written by | ||
18 | * Arnaldo Carvalho de Melo <acme@conectiva.com.br>. | ||
19 | * | ||
20 | * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
21 | * | ||
22 | * This program is free software; you can redistribute it and/or modify | ||
23 | * it under the terms of the GNU General Public License as published by | ||
24 | * the Free Software Foundation; either version 2 of the License, or | ||
25 | * (at your option) any later version. | ||
26 | * | ||
27 | * This program is distributed in the hope that it will be useful, | ||
28 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
29 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
30 | * GNU General Public License for more details. | ||
31 | * | ||
32 | * You should have received a copy of the GNU General Public License | ||
33 | * along with this program; if not, write to the Free Software | ||
34 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
35 | */ | ||
36 | |||
37 | #include <linux/config.h> | ||
38 | #include "../ccid.h" | ||
39 | #include "../dccp.h" | ||
40 | #include "lib/packet_history.h" | ||
41 | #include "lib/loss_interval.h" | ||
42 | #include "lib/tfrc.h" | ||
43 | #include "ccid3.h" | ||
44 | |||
45 | /* | ||
46 | * Reason for maths with 10 here is to avoid 32 bit overflow when a is big. | ||
47 | */ | ||
48 | static inline u32 usecs_div(const u32 a, const u32 b) | ||
49 | { | ||
50 | const u32 tmp = a * (USEC_PER_SEC / 10); | ||
51 | return b > 20 ? tmp / (b / 10) : tmp; | ||
52 | } | ||
53 | |||
54 | static int ccid3_debug; | ||
55 | |||
56 | #ifdef CCID3_DEBUG | ||
57 | #define ccid3_pr_debug(format, a...) \ | ||
58 | do { if (ccid3_debug) \ | ||
59 | printk(KERN_DEBUG "%s: " format, __FUNCTION__, ##a); \ | ||
60 | } while (0) | ||
61 | #else | ||
62 | #define ccid3_pr_debug(format, a...) | ||
63 | #endif | ||
64 | |||
65 | static struct dccp_tx_hist *ccid3_tx_hist; | ||
66 | static struct dccp_rx_hist *ccid3_rx_hist; | ||
67 | static struct dccp_li_hist *ccid3_li_hist; | ||
68 | |||
69 | static int ccid3_init(struct sock *sk) | ||
70 | { | ||
71 | ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); | ||
72 | return 0; | ||
73 | } | ||
74 | |||
75 | static void ccid3_exit(struct sock *sk) | ||
76 | { | ||
77 | ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); | ||
78 | } | ||
79 | |||
80 | /* TFRC sender states */ | ||
81 | enum ccid3_hc_tx_states { | ||
82 | TFRC_SSTATE_NO_SENT = 1, | ||
83 | TFRC_SSTATE_NO_FBACK, | ||
84 | TFRC_SSTATE_FBACK, | ||
85 | TFRC_SSTATE_TERM, | ||
86 | }; | ||
87 | |||
88 | #ifdef CCID3_DEBUG | ||
89 | static const char *ccid3_tx_state_name(enum ccid3_hc_tx_states state) | ||
90 | { | ||
91 | static char *ccid3_state_names[] = { | ||
92 | [TFRC_SSTATE_NO_SENT] = "NO_SENT", | ||
93 | [TFRC_SSTATE_NO_FBACK] = "NO_FBACK", | ||
94 | [TFRC_SSTATE_FBACK] = "FBACK", | ||
95 | [TFRC_SSTATE_TERM] = "TERM", | ||
96 | }; | ||
97 | |||
98 | return ccid3_state_names[state]; | ||
99 | } | ||
100 | #endif | ||
101 | |||
102 | static inline void ccid3_hc_tx_set_state(struct sock *sk, | ||
103 | enum ccid3_hc_tx_states state) | ||
104 | { | ||
105 | struct dccp_sock *dp = dccp_sk(sk); | ||
106 | struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; | ||
107 | enum ccid3_hc_tx_states oldstate = hctx->ccid3hctx_state; | ||
108 | |||
109 | ccid3_pr_debug("%s(%p) %-8.8s -> %s\n", | ||
110 | dccp_role(sk), sk, ccid3_tx_state_name(oldstate), | ||
111 | ccid3_tx_state_name(state)); | ||
112 | WARN_ON(state == oldstate); | ||
113 | hctx->ccid3hctx_state = state; | ||
114 | } | ||
115 | |||
116 | /* Calculate new t_ipi (inter packet interval) by t_ipi = s / X_inst */ | ||
117 | static inline void ccid3_calc_new_t_ipi(struct ccid3_hc_tx_sock *hctx) | ||
118 | { | ||
119 | /* | ||
120 | * If no feedback spec says t_ipi is 1 second (set elsewhere and then | ||
121 | * doubles after every no feedback timer (separate function) | ||
122 | */ | ||
123 | if (hctx->ccid3hctx_state != TFRC_SSTATE_NO_FBACK) | ||
124 | hctx->ccid3hctx_t_ipi = usecs_div(hctx->ccid3hctx_s, | ||
125 | hctx->ccid3hctx_x); | ||
126 | } | ||
127 | |||
128 | /* Calculate new delta by delta = min(t_ipi / 2, t_gran / 2) */ | ||
129 | static inline void ccid3_calc_new_delta(struct ccid3_hc_tx_sock *hctx) | ||
130 | { | ||
131 | hctx->ccid3hctx_delta = min_t(u32, hctx->ccid3hctx_t_ipi / 2, | ||
132 | TFRC_OPSYS_HALF_TIME_GRAN); | ||
133 | } | ||
134 | |||
135 | /* | ||
136 | * Update X by | ||
137 | * If (p > 0) | ||
138 | * x_calc = calcX(s, R, p); | ||
139 | * X = max(min(X_calc, 2 * X_recv), s / t_mbi); | ||
140 | * Else | ||
141 | * If (now - tld >= R) | ||
142 | * X = max(min(2 * X, 2 * X_recv), s / R); | ||
143 | * tld = now; | ||
144 | */ | ||
145 | static void ccid3_hc_tx_update_x(struct sock *sk) | ||
146 | { | ||
147 | struct dccp_sock *dp = dccp_sk(sk); | ||
148 | struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; | ||
149 | |||
150 | /* To avoid large error in calcX */ | ||
151 | if (hctx->ccid3hctx_p >= TFRC_SMALLEST_P) { | ||
152 | hctx->ccid3hctx_x_calc = tfrc_calc_x(hctx->ccid3hctx_s, | ||
153 | hctx->ccid3hctx_rtt, | ||
154 | hctx->ccid3hctx_p); | ||
155 | hctx->ccid3hctx_x = max_t(u32, min_t(u32, hctx->ccid3hctx_x_calc, | ||
156 | 2 * hctx->ccid3hctx_x_recv), | ||
157 | (hctx->ccid3hctx_s / | ||
158 | TFRC_MAX_BACK_OFF_TIME)); | ||
159 | } else { | ||
160 | struct timeval now; | ||
161 | |||
162 | do_gettimeofday(&now); | ||
163 | if (timeval_delta(&now, &hctx->ccid3hctx_t_ld) >= | ||
164 | hctx->ccid3hctx_rtt) { | ||
165 | hctx->ccid3hctx_x = max_t(u32, min_t(u32, hctx->ccid3hctx_x_recv, | ||
166 | hctx->ccid3hctx_x) * 2, | ||
167 | usecs_div(hctx->ccid3hctx_s, | ||
168 | hctx->ccid3hctx_rtt)); | ||
169 | hctx->ccid3hctx_t_ld = now; | ||
170 | } | ||
171 | } | ||
172 | } | ||
173 | |||
174 | static void ccid3_hc_tx_no_feedback_timer(unsigned long data) | ||
175 | { | ||
176 | struct sock *sk = (struct sock *)data; | ||
177 | struct dccp_sock *dp = dccp_sk(sk); | ||
178 | unsigned long next_tmout = 0; | ||
179 | struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; | ||
180 | |||
181 | bh_lock_sock(sk); | ||
182 | if (sock_owned_by_user(sk)) { | ||
183 | /* Try again later. */ | ||
184 | /* XXX: set some sensible MIB */ | ||
185 | sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, | ||
186 | jiffies + HZ / 5); | ||
187 | goto out; | ||
188 | } | ||
189 | |||
190 | ccid3_pr_debug("%s, sk=%p, state=%s\n", dccp_role(sk), sk, | ||
191 | ccid3_tx_state_name(hctx->ccid3hctx_state)); | ||
192 | |||
193 | switch (hctx->ccid3hctx_state) { | ||
194 | case TFRC_SSTATE_TERM: | ||
195 | goto out; | ||
196 | case TFRC_SSTATE_NO_FBACK: | ||
197 | /* Halve send rate */ | ||
198 | hctx->ccid3hctx_x /= 2; | ||
199 | if (hctx->ccid3hctx_x < (hctx->ccid3hctx_s / | ||
200 | TFRC_MAX_BACK_OFF_TIME)) | ||
201 | hctx->ccid3hctx_x = (hctx->ccid3hctx_s / | ||
202 | TFRC_MAX_BACK_OFF_TIME); | ||
203 | |||
204 | ccid3_pr_debug("%s, sk=%p, state=%s, updated tx rate to %d " | ||
205 | "bytes/s\n", | ||
206 | dccp_role(sk), sk, | ||
207 | ccid3_tx_state_name(hctx->ccid3hctx_state), | ||
208 | hctx->ccid3hctx_x); | ||
209 | next_tmout = max_t(u32, 2 * usecs_div(hctx->ccid3hctx_s, | ||
210 | hctx->ccid3hctx_x), | ||
211 | TFRC_INITIAL_TIMEOUT); | ||
212 | /* | ||
213 | * FIXME - not sure above calculation is correct. See section | ||
214 | * 5 of CCID3 11 should adjust tx_t_ipi and double that to | ||
215 | * achieve it really | ||
216 | */ | ||
217 | break; | ||
218 | case TFRC_SSTATE_FBACK: | ||
219 | /* | ||
220 | * Check if IDLE since last timeout and recv rate is less than | ||
221 | * 4 packets per RTT | ||
222 | */ | ||
223 | if (!hctx->ccid3hctx_idle || | ||
224 | (hctx->ccid3hctx_x_recv >= | ||
225 | 4 * usecs_div(hctx->ccid3hctx_s, hctx->ccid3hctx_rtt))) { | ||
226 | ccid3_pr_debug("%s, sk=%p, state=%s, not idle\n", | ||
227 | dccp_role(sk), sk, | ||
228 | ccid3_tx_state_name(hctx->ccid3hctx_state)); | ||
229 | /* Halve sending rate */ | ||
230 | |||
231 | /* If (X_calc > 2 * X_recv) | ||
232 | * X_recv = max(X_recv / 2, s / (2 * t_mbi)); | ||
233 | * Else | ||
234 | * X_recv = X_calc / 4; | ||
235 | */ | ||
236 | BUG_ON(hctx->ccid3hctx_p >= TFRC_SMALLEST_P && | ||
237 | hctx->ccid3hctx_x_calc == 0); | ||
238 | |||
239 | /* check also if p is zero -> x_calc is infinity? */ | ||
240 | if (hctx->ccid3hctx_p < TFRC_SMALLEST_P || | ||
241 | hctx->ccid3hctx_x_calc > 2 * hctx->ccid3hctx_x_recv) | ||
242 | hctx->ccid3hctx_x_recv = max_t(u32, hctx->ccid3hctx_x_recv / 2, | ||
243 | hctx->ccid3hctx_s / (2 * TFRC_MAX_BACK_OFF_TIME)); | ||
244 | else | ||
245 | hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc / 4; | ||
246 | |||
247 | /* Update sending rate */ | ||
248 | ccid3_hc_tx_update_x(sk); | ||
249 | } | ||
250 | /* | ||
251 | * Schedule no feedback timer to expire in | ||
252 | * max(4 * R, 2 * s / X) | ||
253 | */ | ||
254 | next_tmout = max_t(u32, hctx->ccid3hctx_t_rto, | ||
255 | 2 * usecs_div(hctx->ccid3hctx_s, | ||
256 | hctx->ccid3hctx_x)); | ||
257 | break; | ||
258 | default: | ||
259 | printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", | ||
260 | __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state); | ||
261 | dump_stack(); | ||
262 | goto out; | ||
263 | } | ||
264 | |||
265 | sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, | ||
266 | jiffies + max_t(u32, 1, usecs_to_jiffies(next_tmout))); | ||
267 | hctx->ccid3hctx_idle = 1; | ||
268 | out: | ||
269 | bh_unlock_sock(sk); | ||
270 | sock_put(sk); | ||
271 | } | ||
272 | |||
273 | static int ccid3_hc_tx_send_packet(struct sock *sk, | ||
274 | struct sk_buff *skb, int len) | ||
275 | { | ||
276 | struct dccp_sock *dp = dccp_sk(sk); | ||
277 | struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; | ||
278 | struct dccp_tx_hist_entry *new_packet; | ||
279 | struct timeval now; | ||
280 | long delay; | ||
281 | int rc = -ENOTCONN; | ||
282 | |||
283 | /* Check if pure ACK or Terminating*/ | ||
284 | |||
285 | /* | ||
286 | * XXX: We only call this function for DATA and DATAACK, on, these | ||
287 | * packets can have zero length, but why the comment about "pure ACK"? | ||
288 | */ | ||
289 | if (hctx == NULL || len == 0 || | ||
290 | hctx->ccid3hctx_state == TFRC_SSTATE_TERM) | ||
291 | goto out; | ||
292 | |||
293 | /* See if last packet allocated was not sent */ | ||
294 | new_packet = dccp_tx_hist_head(&hctx->ccid3hctx_hist); | ||
295 | if (new_packet == NULL || new_packet->dccphtx_sent) { | ||
296 | new_packet = dccp_tx_hist_entry_new(ccid3_tx_hist, | ||
297 | SLAB_ATOMIC); | ||
298 | |||
299 | rc = -ENOBUFS; | ||
300 | if (new_packet == NULL) { | ||
301 | ccid3_pr_debug("%s, sk=%p, not enough mem to add " | ||
302 | "to history, send refused\n", | ||
303 | dccp_role(sk), sk); | ||
304 | goto out; | ||
305 | } | ||
306 | |||
307 | dccp_tx_hist_add_entry(&hctx->ccid3hctx_hist, new_packet); | ||
308 | } | ||
309 | |||
310 | do_gettimeofday(&now); | ||
311 | |||
312 | switch (hctx->ccid3hctx_state) { | ||
313 | case TFRC_SSTATE_NO_SENT: | ||
314 | ccid3_pr_debug("%s, sk=%p, first packet(%llu)\n", | ||
315 | dccp_role(sk), sk, dp->dccps_gss); | ||
316 | |||
317 | hctx->ccid3hctx_no_feedback_timer.function = ccid3_hc_tx_no_feedback_timer; | ||
318 | hctx->ccid3hctx_no_feedback_timer.data = (unsigned long)sk; | ||
319 | sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, | ||
320 | jiffies + usecs_to_jiffies(TFRC_INITIAL_TIMEOUT)); | ||
321 | hctx->ccid3hctx_last_win_count = 0; | ||
322 | hctx->ccid3hctx_t_last_win_count = now; | ||
323 | ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK); | ||
324 | hctx->ccid3hctx_t_ipi = TFRC_INITIAL_TIMEOUT; | ||
325 | |||
326 | /* Set nominal send time for initial packet */ | ||
327 | hctx->ccid3hctx_t_nom = now; | ||
328 | timeval_add_usecs(&hctx->ccid3hctx_t_nom, | ||
329 | hctx->ccid3hctx_t_ipi); | ||
330 | ccid3_calc_new_delta(hctx); | ||
331 | rc = 0; | ||
332 | break; | ||
333 | case TFRC_SSTATE_NO_FBACK: | ||
334 | case TFRC_SSTATE_FBACK: | ||
335 | delay = (timeval_delta(&now, &hctx->ccid3hctx_t_nom) - | ||
336 | hctx->ccid3hctx_delta); | ||
337 | ccid3_pr_debug("send_packet delay=%ld\n", delay); | ||
338 | delay /= -1000; | ||
339 | /* divide by -1000 is to convert to ms and get sign right */ | ||
340 | rc = delay > 0 ? delay : 0; | ||
341 | break; | ||
342 | default: | ||
343 | printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", | ||
344 | __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state); | ||
345 | dump_stack(); | ||
346 | rc = -EINVAL; | ||
347 | break; | ||
348 | } | ||
349 | |||
350 | /* Can we send? if so add options and add to packet history */ | ||
351 | if (rc == 0) | ||
352 | new_packet->dccphtx_ccval = | ||
353 | DCCP_SKB_CB(skb)->dccpd_ccval = | ||
354 | hctx->ccid3hctx_last_win_count; | ||
355 | out: | ||
356 | return rc; | ||
357 | } | ||
358 | |||
359 | static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) | ||
360 | { | ||
361 | struct dccp_sock *dp = dccp_sk(sk); | ||
362 | struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; | ||
363 | struct timeval now; | ||
364 | |||
365 | BUG_ON(hctx == NULL); | ||
366 | |||
367 | if (hctx->ccid3hctx_state == TFRC_SSTATE_TERM) { | ||
368 | ccid3_pr_debug("%s, sk=%p, while state is TFRC_SSTATE_TERM!\n", | ||
369 | dccp_role(sk), sk); | ||
370 | return; | ||
371 | } | ||
372 | |||
373 | do_gettimeofday(&now); | ||
374 | |||
375 | /* check if we have sent a data packet */ | ||
376 | if (len > 0) { | ||
377 | unsigned long quarter_rtt; | ||
378 | struct dccp_tx_hist_entry *packet; | ||
379 | |||
380 | packet = dccp_tx_hist_head(&hctx->ccid3hctx_hist); | ||
381 | if (packet == NULL) { | ||
382 | printk(KERN_CRIT "%s: packet doesn't exists in " | ||
383 | "history!\n", __FUNCTION__); | ||
384 | return; | ||
385 | } | ||
386 | if (packet->dccphtx_sent) { | ||
387 | printk(KERN_CRIT "%s: no unsent packet in history!\n", | ||
388 | __FUNCTION__); | ||
389 | return; | ||
390 | } | ||
391 | packet->dccphtx_tstamp = now; | ||
392 | packet->dccphtx_seqno = dp->dccps_gss; | ||
393 | /* | ||
394 | * Check if win_count have changed | ||
395 | * Algorithm in "8.1. Window Counter Valuer" in | ||
396 | * draft-ietf-dccp-ccid3-11.txt | ||
397 | */ | ||
398 | quarter_rtt = timeval_delta(&now, &hctx->ccid3hctx_t_last_win_count); | ||
399 | if (likely(hctx->ccid3hctx_rtt > 8)) | ||
400 | quarter_rtt /= hctx->ccid3hctx_rtt / 4; | ||
401 | |||
402 | if (quarter_rtt > 0) { | ||
403 | hctx->ccid3hctx_t_last_win_count = now; | ||
404 | hctx->ccid3hctx_last_win_count = (hctx->ccid3hctx_last_win_count + | ||
405 | min_t(unsigned long, quarter_rtt, 5)) % 16; | ||
406 | ccid3_pr_debug("%s, sk=%p, window changed from " | ||
407 | "%u to %u!\n", | ||
408 | dccp_role(sk), sk, | ||
409 | packet->dccphtx_ccval, | ||
410 | hctx->ccid3hctx_last_win_count); | ||
411 | } | ||
412 | |||
413 | hctx->ccid3hctx_idle = 0; | ||
414 | packet->dccphtx_rtt = hctx->ccid3hctx_rtt; | ||
415 | packet->dccphtx_sent = 1; | ||
416 | } else | ||
417 | ccid3_pr_debug("%s, sk=%p, seqno=%llu NOT inserted!\n", | ||
418 | dccp_role(sk), sk, dp->dccps_gss); | ||
419 | |||
420 | switch (hctx->ccid3hctx_state) { | ||
421 | case TFRC_SSTATE_NO_SENT: | ||
422 | /* if first wasn't pure ack */ | ||
423 | if (len != 0) | ||
424 | printk(KERN_CRIT "%s: %s, First packet sent is noted " | ||
425 | "as a data packet\n", | ||
426 | __FUNCTION__, dccp_role(sk)); | ||
427 | return; | ||
428 | case TFRC_SSTATE_NO_FBACK: | ||
429 | case TFRC_SSTATE_FBACK: | ||
430 | if (len > 0) { | ||
431 | hctx->ccid3hctx_t_nom = now; | ||
432 | ccid3_calc_new_t_ipi(hctx); | ||
433 | ccid3_calc_new_delta(hctx); | ||
434 | timeval_add_usecs(&hctx->ccid3hctx_t_nom, | ||
435 | hctx->ccid3hctx_t_ipi); | ||
436 | } | ||
437 | break; | ||
438 | default: | ||
439 | printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", | ||
440 | __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state); | ||
441 | dump_stack(); | ||
442 | break; | ||
443 | } | ||
444 | } | ||
445 | |||
446 | static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | ||
447 | { | ||
448 | struct dccp_sock *dp = dccp_sk(sk); | ||
449 | struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; | ||
450 | struct ccid3_options_received *opt_recv; | ||
451 | struct dccp_tx_hist_entry *packet; | ||
452 | unsigned long next_tmout; | ||
453 | u32 t_elapsed; | ||
454 | u32 pinv; | ||
455 | u32 x_recv; | ||
456 | u32 r_sample; | ||
457 | |||
458 | if (hctx == NULL) | ||
459 | return; | ||
460 | |||
461 | if (hctx->ccid3hctx_state == TFRC_SSTATE_TERM) { | ||
462 | ccid3_pr_debug("%s, sk=%p, received a packet when " | ||
463 | "terminating!\n", dccp_role(sk), sk); | ||
464 | return; | ||
465 | } | ||
466 | |||
467 | /* we are only interested in ACKs */ | ||
468 | if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK || | ||
469 | DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_DATAACK)) | ||
470 | return; | ||
471 | |||
472 | opt_recv = &hctx->ccid3hctx_options_received; | ||
473 | |||
474 | t_elapsed = dp->dccps_options_received.dccpor_elapsed_time; | ||
475 | x_recv = opt_recv->ccid3or_receive_rate; | ||
476 | pinv = opt_recv->ccid3or_loss_event_rate; | ||
477 | |||
478 | switch (hctx->ccid3hctx_state) { | ||
479 | case TFRC_SSTATE_NO_SENT: | ||
480 | /* FIXME: what to do here? */ | ||
481 | return; | ||
482 | case TFRC_SSTATE_NO_FBACK: | ||
483 | case TFRC_SSTATE_FBACK: | ||
484 | /* Calculate new round trip sample by | ||
485 | * R_sample = (now - t_recvdata) - t_delay */ | ||
486 | /* get t_recvdata from history */ | ||
487 | packet = dccp_tx_hist_find_entry(&hctx->ccid3hctx_hist, | ||
488 | DCCP_SKB_CB(skb)->dccpd_ack_seq); | ||
489 | if (packet == NULL) { | ||
490 | ccid3_pr_debug("%s, sk=%p, seqno %llu(%s) does't " | ||
491 | "exist in history!\n", | ||
492 | dccp_role(sk), sk, | ||
493 | DCCP_SKB_CB(skb)->dccpd_ack_seq, | ||
494 | dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type)); | ||
495 | return; | ||
496 | } | ||
497 | |||
498 | /* Update RTT */ | ||
499 | r_sample = timeval_now_delta(&packet->dccphtx_tstamp); | ||
500 | /* FIXME: */ | ||
501 | // r_sample -= usecs_to_jiffies(t_elapsed * 10); | ||
502 | |||
503 | /* Update RTT estimate by | ||
504 | * If (No feedback recv) | ||
505 | * R = R_sample; | ||
506 | * Else | ||
507 | * R = q * R + (1 - q) * R_sample; | ||
508 | * | ||
509 | * q is a constant, RFC 3448 recomments 0.9 | ||
510 | */ | ||
511 | if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) { | ||
512 | ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK); | ||
513 | hctx->ccid3hctx_rtt = r_sample; | ||
514 | } else | ||
515 | hctx->ccid3hctx_rtt = (hctx->ccid3hctx_rtt * 9) / 10 + | ||
516 | r_sample / 10; | ||
517 | |||
518 | ccid3_pr_debug("%s, sk=%p, New RTT estimate=%uus, " | ||
519 | "r_sample=%us\n", dccp_role(sk), sk, | ||
520 | hctx->ccid3hctx_rtt, r_sample); | ||
521 | |||
522 | /* Update timeout interval */ | ||
523 | hctx->ccid3hctx_t_rto = max_t(u32, 4 * hctx->ccid3hctx_rtt, | ||
524 | USEC_PER_SEC); | ||
525 | |||
526 | /* Update receive rate */ | ||
527 | hctx->ccid3hctx_x_recv = x_recv;/* X_recv in bytes per sec */ | ||
528 | |||
529 | /* Update loss event rate */ | ||
530 | if (pinv == ~0 || pinv == 0) | ||
531 | hctx->ccid3hctx_p = 0; | ||
532 | else { | ||
533 | hctx->ccid3hctx_p = 1000000 / pinv; | ||
534 | |||
535 | if (hctx->ccid3hctx_p < TFRC_SMALLEST_P) { | ||
536 | hctx->ccid3hctx_p = TFRC_SMALLEST_P; | ||
537 | ccid3_pr_debug("%s, sk=%p, Smallest p used!\n", | ||
538 | dccp_role(sk), sk); | ||
539 | } | ||
540 | } | ||
541 | |||
542 | /* unschedule no feedback timer */ | ||
543 | sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer); | ||
544 | |||
545 | /* Update sending rate */ | ||
546 | ccid3_hc_tx_update_x(sk); | ||
547 | |||
548 | /* Update next send time */ | ||
549 | timeval_sub_usecs(&hctx->ccid3hctx_t_nom, | ||
550 | hctx->ccid3hctx_t_ipi); | ||
551 | ccid3_calc_new_t_ipi(hctx); | ||
552 | timeval_add_usecs(&hctx->ccid3hctx_t_nom, | ||
553 | hctx->ccid3hctx_t_ipi); | ||
554 | ccid3_calc_new_delta(hctx); | ||
555 | |||
556 | /* remove all packets older than the one acked from history */ | ||
557 | dccp_tx_hist_purge_older(ccid3_tx_hist, | ||
558 | &hctx->ccid3hctx_hist, packet); | ||
559 | /* | ||
560 | * As we have calculated new ipi, delta, t_nom it is possible that | ||
561 | * we now can send a packet, so wake up dccp_wait_for_ccids. | ||
562 | */ | ||
563 | sk->sk_write_space(sk); | ||
564 | |||
565 | /* | ||
566 | * Schedule no feedback timer to expire in | ||
567 | * max(4 * R, 2 * s / X) | ||
568 | */ | ||
569 | next_tmout = max(hctx->ccid3hctx_t_rto, | ||
570 | 2 * usecs_div(hctx->ccid3hctx_s, | ||
571 | hctx->ccid3hctx_x)); | ||
572 | |||
573 | ccid3_pr_debug("%s, sk=%p, Scheduled no feedback timer to " | ||
574 | "expire in %lu jiffies (%luus)\n", | ||
575 | dccp_role(sk), sk, | ||
576 | usecs_to_jiffies(next_tmout), next_tmout); | ||
577 | |||
578 | sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, | ||
579 | jiffies + max_t(u32, 1, usecs_to_jiffies(next_tmout))); | ||
580 | |||
581 | /* set idle flag */ | ||
582 | hctx->ccid3hctx_idle = 1; | ||
583 | break; | ||
584 | default: | ||
585 | printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", | ||
586 | __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state); | ||
587 | dump_stack(); | ||
588 | break; | ||
589 | } | ||
590 | } | ||
591 | |||
592 | static void ccid3_hc_tx_insert_options(struct sock *sk, struct sk_buff *skb) | ||
593 | { | ||
594 | const struct dccp_sock *dp = dccp_sk(sk); | ||
595 | struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; | ||
596 | |||
597 | if (hctx == NULL || !(sk->sk_state == DCCP_OPEN || | ||
598 | sk->sk_state == DCCP_PARTOPEN)) | ||
599 | return; | ||
600 | |||
601 | DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count; | ||
602 | } | ||
603 | |||
604 | static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, | ||
605 | unsigned char len, u16 idx, | ||
606 | unsigned char *value) | ||
607 | { | ||
608 | int rc = 0; | ||
609 | struct dccp_sock *dp = dccp_sk(sk); | ||
610 | struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; | ||
611 | struct ccid3_options_received *opt_recv; | ||
612 | |||
613 | if (hctx == NULL) | ||
614 | return 0; | ||
615 | |||
616 | opt_recv = &hctx->ccid3hctx_options_received; | ||
617 | |||
618 | if (opt_recv->ccid3or_seqno != dp->dccps_gsr) { | ||
619 | opt_recv->ccid3or_seqno = dp->dccps_gsr; | ||
620 | opt_recv->ccid3or_loss_event_rate = ~0; | ||
621 | opt_recv->ccid3or_loss_intervals_idx = 0; | ||
622 | opt_recv->ccid3or_loss_intervals_len = 0; | ||
623 | opt_recv->ccid3or_receive_rate = 0; | ||
624 | } | ||
625 | |||
626 | switch (option) { | ||
627 | case TFRC_OPT_LOSS_EVENT_RATE: | ||
628 | if (len != 4) { | ||
629 | ccid3_pr_debug("%s, sk=%p, invalid len for " | ||
630 | "TFRC_OPT_LOSS_EVENT_RATE\n", | ||
631 | dccp_role(sk), sk); | ||
632 | rc = -EINVAL; | ||
633 | } else { | ||
634 | opt_recv->ccid3or_loss_event_rate = ntohl(*(u32 *)value); | ||
635 | ccid3_pr_debug("%s, sk=%p, LOSS_EVENT_RATE=%u\n", | ||
636 | dccp_role(sk), sk, | ||
637 | opt_recv->ccid3or_loss_event_rate); | ||
638 | } | ||
639 | break; | ||
640 | case TFRC_OPT_LOSS_INTERVALS: | ||
641 | opt_recv->ccid3or_loss_intervals_idx = idx; | ||
642 | opt_recv->ccid3or_loss_intervals_len = len; | ||
643 | ccid3_pr_debug("%s, sk=%p, LOSS_INTERVALS=(%u, %u)\n", | ||
644 | dccp_role(sk), sk, | ||
645 | opt_recv->ccid3or_loss_intervals_idx, | ||
646 | opt_recv->ccid3or_loss_intervals_len); | ||
647 | break; | ||
648 | case TFRC_OPT_RECEIVE_RATE: | ||
649 | if (len != 4) { | ||
650 | ccid3_pr_debug("%s, sk=%p, invalid len for " | ||
651 | "TFRC_OPT_RECEIVE_RATE\n", | ||
652 | dccp_role(sk), sk); | ||
653 | rc = -EINVAL; | ||
654 | } else { | ||
655 | opt_recv->ccid3or_receive_rate = ntohl(*(u32 *)value); | ||
656 | ccid3_pr_debug("%s, sk=%p, RECEIVE_RATE=%u\n", | ||
657 | dccp_role(sk), sk, | ||
658 | opt_recv->ccid3or_receive_rate); | ||
659 | } | ||
660 | break; | ||
661 | } | ||
662 | |||
663 | return rc; | ||
664 | } | ||
665 | |||
666 | static int ccid3_hc_tx_init(struct sock *sk) | ||
667 | { | ||
668 | struct dccp_sock *dp = dccp_sk(sk); | ||
669 | struct ccid3_hc_tx_sock *hctx; | ||
670 | |||
671 | ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); | ||
672 | |||
673 | hctx = dp->dccps_hc_tx_ccid_private = kmalloc(sizeof(*hctx), | ||
674 | gfp_any()); | ||
675 | if (hctx == NULL) | ||
676 | return -ENOMEM; | ||
677 | |||
678 | memset(hctx, 0, sizeof(*hctx)); | ||
679 | |||
680 | if (dp->dccps_packet_size >= TFRC_MIN_PACKET_SIZE && | ||
681 | dp->dccps_packet_size <= TFRC_MAX_PACKET_SIZE) | ||
682 | hctx->ccid3hctx_s = dp->dccps_packet_size; | ||
683 | else | ||
684 | hctx->ccid3hctx_s = TFRC_STD_PACKET_SIZE; | ||
685 | |||
686 | /* Set transmission rate to 1 packet per second */ | ||
687 | hctx->ccid3hctx_x = hctx->ccid3hctx_s; | ||
688 | hctx->ccid3hctx_t_rto = USEC_PER_SEC; | ||
689 | hctx->ccid3hctx_state = TFRC_SSTATE_NO_SENT; | ||
690 | INIT_LIST_HEAD(&hctx->ccid3hctx_hist); | ||
691 | init_timer(&hctx->ccid3hctx_no_feedback_timer); | ||
692 | |||
693 | return 0; | ||
694 | } | ||
695 | |||
696 | static void ccid3_hc_tx_exit(struct sock *sk) | ||
697 | { | ||
698 | struct dccp_sock *dp = dccp_sk(sk); | ||
699 | struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; | ||
700 | |||
701 | ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); | ||
702 | BUG_ON(hctx == NULL); | ||
703 | |||
704 | ccid3_hc_tx_set_state(sk, TFRC_SSTATE_TERM); | ||
705 | sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer); | ||
706 | |||
707 | /* Empty packet history */ | ||
708 | dccp_tx_hist_purge(ccid3_tx_hist, &hctx->ccid3hctx_hist); | ||
709 | |||
710 | kfree(dp->dccps_hc_tx_ccid_private); | ||
711 | dp->dccps_hc_tx_ccid_private = NULL; | ||
712 | } | ||
713 | |||
714 | /* | ||
715 | * RX Half Connection methods | ||
716 | */ | ||
717 | |||
718 | /* TFRC receiver states */ | ||
719 | enum ccid3_hc_rx_states { | ||
720 | TFRC_RSTATE_NO_DATA = 1, | ||
721 | TFRC_RSTATE_DATA, | ||
722 | TFRC_RSTATE_TERM = 127, | ||
723 | }; | ||
724 | |||
725 | #ifdef CCID3_DEBUG | ||
726 | static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state) | ||
727 | { | ||
728 | static char *ccid3_rx_state_names[] = { | ||
729 | [TFRC_RSTATE_NO_DATA] = "NO_DATA", | ||
730 | [TFRC_RSTATE_DATA] = "DATA", | ||
731 | [TFRC_RSTATE_TERM] = "TERM", | ||
732 | }; | ||
733 | |||
734 | return ccid3_rx_state_names[state]; | ||
735 | } | ||
736 | #endif | ||
737 | |||
738 | static inline void ccid3_hc_rx_set_state(struct sock *sk, | ||
739 | enum ccid3_hc_rx_states state) | ||
740 | { | ||
741 | struct dccp_sock *dp = dccp_sk(sk); | ||
742 | struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; | ||
743 | enum ccid3_hc_rx_states oldstate = hcrx->ccid3hcrx_state; | ||
744 | |||
745 | ccid3_pr_debug("%s(%p) %-8.8s -> %s\n", | ||
746 | dccp_role(sk), sk, ccid3_rx_state_name(oldstate), | ||
747 | ccid3_rx_state_name(state)); | ||
748 | WARN_ON(state == oldstate); | ||
749 | hcrx->ccid3hcrx_state = state; | ||
750 | } | ||
751 | |||
752 | static void ccid3_hc_rx_send_feedback(struct sock *sk) | ||
753 | { | ||
754 | struct dccp_sock *dp = dccp_sk(sk); | ||
755 | struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; | ||
756 | struct dccp_rx_hist_entry *packet; | ||
757 | struct timeval now; | ||
758 | |||
759 | ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); | ||
760 | |||
761 | do_gettimeofday(&now); | ||
762 | |||
763 | switch (hcrx->ccid3hcrx_state) { | ||
764 | case TFRC_RSTATE_NO_DATA: | ||
765 | hcrx->ccid3hcrx_x_recv = 0; | ||
766 | break; | ||
767 | case TFRC_RSTATE_DATA: { | ||
768 | const u32 delta = timeval_delta(&now, | ||
769 | &hcrx->ccid3hcrx_tstamp_last_feedback); | ||
770 | |||
771 | hcrx->ccid3hcrx_x_recv = (hcrx->ccid3hcrx_bytes_recv * | ||
772 | USEC_PER_SEC); | ||
773 | if (likely(delta > 1)) | ||
774 | hcrx->ccid3hcrx_x_recv /= delta; | ||
775 | } | ||
776 | break; | ||
777 | default: | ||
778 | printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", | ||
779 | __FUNCTION__, dccp_role(sk), sk, hcrx->ccid3hcrx_state); | ||
780 | dump_stack(); | ||
781 | return; | ||
782 | } | ||
783 | |||
784 | packet = dccp_rx_hist_find_data_packet(&hcrx->ccid3hcrx_hist); | ||
785 | if (packet == NULL) { | ||
786 | printk(KERN_CRIT "%s: %s, sk=%p, no data packet in history!\n", | ||
787 | __FUNCTION__, dccp_role(sk), sk); | ||
788 | dump_stack(); | ||
789 | return; | ||
790 | } | ||
791 | |||
792 | hcrx->ccid3hcrx_tstamp_last_feedback = now; | ||
793 | hcrx->ccid3hcrx_last_counter = packet->dccphrx_ccval; | ||
794 | hcrx->ccid3hcrx_seqno_last_counter = packet->dccphrx_seqno; | ||
795 | hcrx->ccid3hcrx_bytes_recv = 0; | ||
796 | |||
797 | /* Convert to multiples of 10us */ | ||
798 | hcrx->ccid3hcrx_elapsed_time = | ||
799 | timeval_delta(&now, &packet->dccphrx_tstamp) / 10; | ||
800 | if (hcrx->ccid3hcrx_p == 0) | ||
801 | hcrx->ccid3hcrx_pinv = ~0; | ||
802 | else | ||
803 | hcrx->ccid3hcrx_pinv = 1000000 / hcrx->ccid3hcrx_p; | ||
804 | dccp_send_ack(sk); | ||
805 | } | ||
806 | |||
807 | static void ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) | ||
808 | { | ||
809 | const struct dccp_sock *dp = dccp_sk(sk); | ||
810 | u32 x_recv, pinv; | ||
811 | struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; | ||
812 | |||
813 | if (hcrx == NULL || !(sk->sk_state == DCCP_OPEN || | ||
814 | sk->sk_state == DCCP_PARTOPEN)) | ||
815 | return; | ||
816 | |||
817 | DCCP_SKB_CB(skb)->dccpd_ccval = hcrx->ccid3hcrx_last_counter; | ||
818 | |||
819 | if (dccp_packet_without_ack(skb)) | ||
820 | return; | ||
821 | |||
822 | if (hcrx->ccid3hcrx_elapsed_time != 0) | ||
823 | dccp_insert_option_elapsed_time(sk, skb, | ||
824 | hcrx->ccid3hcrx_elapsed_time); | ||
825 | dccp_insert_option_timestamp(sk, skb); | ||
826 | x_recv = htonl(hcrx->ccid3hcrx_x_recv); | ||
827 | pinv = htonl(hcrx->ccid3hcrx_pinv); | ||
828 | dccp_insert_option(sk, skb, TFRC_OPT_LOSS_EVENT_RATE, | ||
829 | &pinv, sizeof(pinv)); | ||
830 | dccp_insert_option(sk, skb, TFRC_OPT_RECEIVE_RATE, | ||
831 | &x_recv, sizeof(x_recv)); | ||
832 | } | ||
833 | |||
834 | /* calculate first loss interval | ||
835 | * | ||
836 | * returns estimated loss interval in usecs */ | ||
837 | |||
838 | static u32 ccid3_hc_rx_calc_first_li(struct sock *sk) | ||
839 | { | ||
840 | struct dccp_sock *dp = dccp_sk(sk); | ||
841 | struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; | ||
842 | struct dccp_rx_hist_entry *entry, *next, *tail = NULL; | ||
843 | u32 rtt, delta, x_recv, fval, p, tmp2; | ||
844 | struct timeval tstamp = { 0, }; | ||
845 | int interval = 0; | ||
846 | int win_count = 0; | ||
847 | int step = 0; | ||
848 | u64 tmp1; | ||
849 | |||
850 | list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, | ||
851 | dccphrx_node) { | ||
852 | if (dccp_rx_hist_entry_data_packet(entry)) { | ||
853 | tail = entry; | ||
854 | |||
855 | switch (step) { | ||
856 | case 0: | ||
857 | tstamp = entry->dccphrx_tstamp; | ||
858 | win_count = entry->dccphrx_ccval; | ||
859 | step = 1; | ||
860 | break; | ||
861 | case 1: | ||
862 | interval = win_count - entry->dccphrx_ccval; | ||
863 | if (interval < 0) | ||
864 | interval += TFRC_WIN_COUNT_LIMIT; | ||
865 | if (interval > 4) | ||
866 | goto found; | ||
867 | break; | ||
868 | } | ||
869 | } | ||
870 | } | ||
871 | |||
872 | if (step == 0) { | ||
873 | printk(KERN_CRIT "%s: %s, sk=%p, packet history contains no " | ||
874 | "data packets!\n", | ||
875 | __FUNCTION__, dccp_role(sk), sk); | ||
876 | return ~0; | ||
877 | } | ||
878 | |||
879 | if (interval == 0) { | ||
880 | ccid3_pr_debug("%s, sk=%p, Could not find a win_count " | ||
881 | "interval > 0. Defaulting to 1\n", | ||
882 | dccp_role(sk), sk); | ||
883 | interval = 1; | ||
884 | } | ||
885 | found: | ||
886 | rtt = timeval_delta(&tstamp, &tail->dccphrx_tstamp) * 4 / interval; | ||
887 | ccid3_pr_debug("%s, sk=%p, approximated RTT to %uus\n", | ||
888 | dccp_role(sk), sk, rtt); | ||
889 | if (rtt == 0) | ||
890 | rtt = 1; | ||
891 | |||
892 | delta = timeval_now_delta(&hcrx->ccid3hcrx_tstamp_last_feedback); | ||
893 | x_recv = hcrx->ccid3hcrx_bytes_recv * USEC_PER_SEC; | ||
894 | if (likely(delta > 1)) | ||
895 | x_recv /= delta; | ||
896 | |||
897 | tmp1 = (u64)x_recv * (u64)rtt; | ||
898 | do_div(tmp1,10000000); | ||
899 | tmp2 = (u32)tmp1; | ||
900 | fval = (hcrx->ccid3hcrx_s * 100000) / tmp2; | ||
901 | /* do not alter order above or you will get overflow on 32 bit */ | ||
902 | p = tfrc_calc_x_reverse_lookup(fval); | ||
903 | ccid3_pr_debug("%s, sk=%p, receive rate=%u bytes/s, implied " | ||
904 | "loss rate=%u\n", dccp_role(sk), sk, x_recv, p); | ||
905 | |||
906 | if (p == 0) | ||
907 | return ~0; | ||
908 | else | ||
909 | return 1000000 / p; | ||
910 | } | ||
911 | |||
912 | static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss) | ||
913 | { | ||
914 | struct dccp_sock *dp = dccp_sk(sk); | ||
915 | struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; | ||
916 | |||
917 | if (seq_loss != DCCP_MAX_SEQNO + 1 && | ||
918 | list_empty(&hcrx->ccid3hcrx_li_hist)) { | ||
919 | struct dccp_li_hist_entry *li_tail; | ||
920 | |||
921 | li_tail = dccp_li_hist_interval_new(ccid3_li_hist, | ||
922 | &hcrx->ccid3hcrx_li_hist, | ||
923 | seq_loss, win_loss); | ||
924 | if (li_tail == NULL) | ||
925 | return; | ||
926 | li_tail->dccplih_interval = ccid3_hc_rx_calc_first_li(sk); | ||
927 | } | ||
928 | /* FIXME: find end of interval */ | ||
929 | } | ||
930 | |||
931 | static void ccid3_hc_rx_detect_loss(struct sock *sk) | ||
932 | { | ||
933 | struct dccp_sock *dp = dccp_sk(sk); | ||
934 | struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; | ||
935 | u8 win_loss; | ||
936 | const u64 seq_loss = dccp_rx_hist_detect_loss(&hcrx->ccid3hcrx_hist, | ||
937 | &hcrx->ccid3hcrx_li_hist, | ||
938 | &win_loss); | ||
939 | |||
940 | ccid3_hc_rx_update_li(sk, seq_loss, win_loss); | ||
941 | } | ||
942 | |||
943 | static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) | ||
944 | { | ||
945 | struct dccp_sock *dp = dccp_sk(sk); | ||
946 | struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; | ||
947 | const struct dccp_options_received *opt_recv; | ||
948 | struct dccp_rx_hist_entry *packet; | ||
949 | struct timeval now; | ||
950 | u8 win_count; | ||
951 | u32 p_prev; | ||
952 | int ins; | ||
953 | |||
954 | if (hcrx == NULL) | ||
955 | return; | ||
956 | |||
957 | BUG_ON(!(hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA || | ||
958 | hcrx->ccid3hcrx_state == TFRC_RSTATE_DATA)); | ||
959 | |||
960 | opt_recv = &dp->dccps_options_received; | ||
961 | |||
962 | switch (DCCP_SKB_CB(skb)->dccpd_type) { | ||
963 | case DCCP_PKT_ACK: | ||
964 | if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA) | ||
965 | return; | ||
966 | case DCCP_PKT_DATAACK: | ||
967 | if (opt_recv->dccpor_timestamp_echo == 0) | ||
968 | break; | ||
969 | p_prev = hcrx->ccid3hcrx_rtt; | ||
970 | do_gettimeofday(&now); | ||
971 | hcrx->ccid3hcrx_rtt = timeval_usecs(&now) - | ||
972 | (opt_recv->dccpor_timestamp_echo - | ||
973 | opt_recv->dccpor_elapsed_time) * 10; | ||
974 | if (p_prev != hcrx->ccid3hcrx_rtt) | ||
975 | ccid3_pr_debug("%s, New RTT=%luus, elapsed time=%u\n", | ||
976 | dccp_role(sk), hcrx->ccid3hcrx_rtt, | ||
977 | opt_recv->dccpor_elapsed_time); | ||
978 | break; | ||
979 | case DCCP_PKT_DATA: | ||
980 | break; | ||
981 | default: | ||
982 | ccid3_pr_debug("%s, sk=%p, not DATA/DATAACK/ACK packet(%s)\n", | ||
983 | dccp_role(sk), sk, | ||
984 | dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type)); | ||
985 | return; | ||
986 | } | ||
987 | |||
988 | packet = dccp_rx_hist_entry_new(ccid3_rx_hist, opt_recv->dccpor_ndp, | ||
989 | skb, SLAB_ATOMIC); | ||
990 | if (packet == NULL) { | ||
991 | ccid3_pr_debug("%s, sk=%p, Not enough mem to add rx packet " | ||
992 | "to history (consider it lost)!", | ||
993 | dccp_role(sk), sk); | ||
994 | return; | ||
995 | } | ||
996 | |||
997 | win_count = packet->dccphrx_ccval; | ||
998 | |||
999 | ins = dccp_rx_hist_add_packet(ccid3_rx_hist, &hcrx->ccid3hcrx_hist, | ||
1000 | &hcrx->ccid3hcrx_li_hist, packet); | ||
1001 | |||
1002 | if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK) | ||
1003 | return; | ||
1004 | |||
1005 | switch (hcrx->ccid3hcrx_state) { | ||
1006 | case TFRC_RSTATE_NO_DATA: | ||
1007 | ccid3_pr_debug("%s, sk=%p(%s), skb=%p, sending initial " | ||
1008 | "feedback\n", | ||
1009 | dccp_role(sk), sk, | ||
1010 | dccp_state_name(sk->sk_state), skb); | ||
1011 | ccid3_hc_rx_send_feedback(sk); | ||
1012 | ccid3_hc_rx_set_state(sk, TFRC_RSTATE_DATA); | ||
1013 | return; | ||
1014 | case TFRC_RSTATE_DATA: | ||
1015 | hcrx->ccid3hcrx_bytes_recv += skb->len - | ||
1016 | dccp_hdr(skb)->dccph_doff * 4; | ||
1017 | if (ins != 0) | ||
1018 | break; | ||
1019 | |||
1020 | do_gettimeofday(&now); | ||
1021 | if (timeval_delta(&now, &hcrx->ccid3hcrx_tstamp_last_ack) >= | ||
1022 | hcrx->ccid3hcrx_rtt) { | ||
1023 | hcrx->ccid3hcrx_tstamp_last_ack = now; | ||
1024 | ccid3_hc_rx_send_feedback(sk); | ||
1025 | } | ||
1026 | return; | ||
1027 | default: | ||
1028 | printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", | ||
1029 | __FUNCTION__, dccp_role(sk), sk, hcrx->ccid3hcrx_state); | ||
1030 | dump_stack(); | ||
1031 | return; | ||
1032 | } | ||
1033 | |||
1034 | /* Dealing with packet loss */ | ||
1035 | ccid3_pr_debug("%s, sk=%p(%s), data loss! Reacting...\n", | ||
1036 | dccp_role(sk), sk, dccp_state_name(sk->sk_state)); | ||
1037 | |||
1038 | ccid3_hc_rx_detect_loss(sk); | ||
1039 | p_prev = hcrx->ccid3hcrx_p; | ||
1040 | |||
1041 | /* Calculate loss event rate */ | ||
1042 | if (!list_empty(&hcrx->ccid3hcrx_li_hist)) | ||
1043 | /* Scaling up by 1000000 as fixed decimal */ | ||
1044 | hcrx->ccid3hcrx_p = 1000000 / dccp_li_hist_calc_i_mean(&hcrx->ccid3hcrx_li_hist); | ||
1045 | |||
1046 | if (hcrx->ccid3hcrx_p > p_prev) { | ||
1047 | ccid3_hc_rx_send_feedback(sk); | ||
1048 | return; | ||
1049 | } | ||
1050 | } | ||
1051 | |||
1052 | static int ccid3_hc_rx_init(struct sock *sk) | ||
1053 | { | ||
1054 | struct dccp_sock *dp = dccp_sk(sk); | ||
1055 | struct ccid3_hc_rx_sock *hcrx; | ||
1056 | |||
1057 | ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); | ||
1058 | |||
1059 | hcrx = dp->dccps_hc_rx_ccid_private = kmalloc(sizeof(*hcrx), | ||
1060 | gfp_any()); | ||
1061 | if (hcrx == NULL) | ||
1062 | return -ENOMEM; | ||
1063 | |||
1064 | memset(hcrx, 0, sizeof(*hcrx)); | ||
1065 | |||
1066 | if (dp->dccps_packet_size >= TFRC_MIN_PACKET_SIZE && | ||
1067 | dp->dccps_packet_size <= TFRC_MAX_PACKET_SIZE) | ||
1068 | hcrx->ccid3hcrx_s = dp->dccps_packet_size; | ||
1069 | else | ||
1070 | hcrx->ccid3hcrx_s = TFRC_STD_PACKET_SIZE; | ||
1071 | |||
1072 | hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA; | ||
1073 | INIT_LIST_HEAD(&hcrx->ccid3hcrx_hist); | ||
1074 | INIT_LIST_HEAD(&hcrx->ccid3hcrx_li_hist); | ||
1075 | /* | ||
1076 | * XXX this seems to be paranoid, need to think more about this, for | ||
1077 | * now start with something different than zero. -acme | ||
1078 | */ | ||
1079 | hcrx->ccid3hcrx_rtt = USEC_PER_SEC / 5; | ||
1080 | return 0; | ||
1081 | } | ||
1082 | |||
1083 | static void ccid3_hc_rx_exit(struct sock *sk) | ||
1084 | { | ||
1085 | struct dccp_sock *dp = dccp_sk(sk); | ||
1086 | struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; | ||
1087 | |||
1088 | ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); | ||
1089 | |||
1090 | if (hcrx == NULL) | ||
1091 | return; | ||
1092 | |||
1093 | ccid3_hc_rx_set_state(sk, TFRC_RSTATE_TERM); | ||
1094 | |||
1095 | /* Empty packet history */ | ||
1096 | dccp_rx_hist_purge(ccid3_rx_hist, &hcrx->ccid3hcrx_hist); | ||
1097 | |||
1098 | /* Empty loss interval history */ | ||
1099 | dccp_li_hist_purge(ccid3_li_hist, &hcrx->ccid3hcrx_li_hist); | ||
1100 | |||
1101 | kfree(dp->dccps_hc_rx_ccid_private); | ||
1102 | dp->dccps_hc_rx_ccid_private = NULL; | ||
1103 | } | ||
1104 | |||
1105 | static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info) | ||
1106 | { | ||
1107 | const struct dccp_sock *dp = dccp_sk(sk); | ||
1108 | const struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; | ||
1109 | |||
1110 | if (hcrx == NULL) | ||
1111 | return; | ||
1112 | |||
1113 | info->tcpi_ca_state = hcrx->ccid3hcrx_state; | ||
1114 | info->tcpi_options |= TCPI_OPT_TIMESTAMPS; | ||
1115 | info->tcpi_rcv_rtt = hcrx->ccid3hcrx_rtt; | ||
1116 | } | ||
1117 | |||
1118 | static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info) | ||
1119 | { | ||
1120 | const struct dccp_sock *dp = dccp_sk(sk); | ||
1121 | const struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; | ||
1122 | |||
1123 | if (hctx == NULL) | ||
1124 | return; | ||
1125 | |||
1126 | info->tcpi_rto = hctx->ccid3hctx_t_rto; | ||
1127 | info->tcpi_rtt = hctx->ccid3hctx_rtt; | ||
1128 | } | ||
1129 | |||
1130 | static struct ccid ccid3 = { | ||
1131 | .ccid_id = 3, | ||
1132 | .ccid_name = "ccid3", | ||
1133 | .ccid_owner = THIS_MODULE, | ||
1134 | .ccid_init = ccid3_init, | ||
1135 | .ccid_exit = ccid3_exit, | ||
1136 | .ccid_hc_tx_init = ccid3_hc_tx_init, | ||
1137 | .ccid_hc_tx_exit = ccid3_hc_tx_exit, | ||
1138 | .ccid_hc_tx_send_packet = ccid3_hc_tx_send_packet, | ||
1139 | .ccid_hc_tx_packet_sent = ccid3_hc_tx_packet_sent, | ||
1140 | .ccid_hc_tx_packet_recv = ccid3_hc_tx_packet_recv, | ||
1141 | .ccid_hc_tx_insert_options = ccid3_hc_tx_insert_options, | ||
1142 | .ccid_hc_tx_parse_options = ccid3_hc_tx_parse_options, | ||
1143 | .ccid_hc_rx_init = ccid3_hc_rx_init, | ||
1144 | .ccid_hc_rx_exit = ccid3_hc_rx_exit, | ||
1145 | .ccid_hc_rx_insert_options = ccid3_hc_rx_insert_options, | ||
1146 | .ccid_hc_rx_packet_recv = ccid3_hc_rx_packet_recv, | ||
1147 | .ccid_hc_rx_get_info = ccid3_hc_rx_get_info, | ||
1148 | .ccid_hc_tx_get_info = ccid3_hc_tx_get_info, | ||
1149 | }; | ||
1150 | |||
1151 | module_param(ccid3_debug, int, 0444); | ||
1152 | MODULE_PARM_DESC(ccid3_debug, "Enable debug messages"); | ||
1153 | |||
1154 | static __init int ccid3_module_init(void) | ||
1155 | { | ||
1156 | int rc = -ENOBUFS; | ||
1157 | |||
1158 | ccid3_rx_hist = dccp_rx_hist_new("ccid3"); | ||
1159 | if (ccid3_rx_hist == NULL) | ||
1160 | goto out; | ||
1161 | |||
1162 | ccid3_tx_hist = dccp_tx_hist_new("ccid3"); | ||
1163 | if (ccid3_tx_hist == NULL) | ||
1164 | goto out_free_rx; | ||
1165 | |||
1166 | ccid3_li_hist = dccp_li_hist_new("ccid3"); | ||
1167 | if (ccid3_li_hist == NULL) | ||
1168 | goto out_free_tx; | ||
1169 | |||
1170 | rc = ccid_register(&ccid3); | ||
1171 | if (rc != 0) | ||
1172 | goto out_free_loss_interval_history; | ||
1173 | out: | ||
1174 | return rc; | ||
1175 | |||
1176 | out_free_loss_interval_history: | ||
1177 | dccp_li_hist_delete(ccid3_li_hist); | ||
1178 | ccid3_li_hist = NULL; | ||
1179 | out_free_tx: | ||
1180 | dccp_tx_hist_delete(ccid3_tx_hist); | ||
1181 | ccid3_tx_hist = NULL; | ||
1182 | out_free_rx: | ||
1183 | dccp_rx_hist_delete(ccid3_rx_hist); | ||
1184 | ccid3_rx_hist = NULL; | ||
1185 | goto out; | ||
1186 | } | ||
1187 | module_init(ccid3_module_init); | ||
1188 | |||
1189 | static __exit void ccid3_module_exit(void) | ||
1190 | { | ||
1191 | #ifdef CONFIG_IP_DCCP_UNLOAD_HACK | ||
1192 | /* | ||
1193 | * Hack to use while developing, so that we get rid of the control | ||
1194 | * sock, that is what keeps a refcount on dccp.ko -acme | ||
1195 | */ | ||
1196 | extern void dccp_ctl_sock_exit(void); | ||
1197 | |||
1198 | dccp_ctl_sock_exit(); | ||
1199 | #endif | ||
1200 | ccid_unregister(&ccid3); | ||
1201 | |||
1202 | if (ccid3_tx_hist != NULL) { | ||
1203 | dccp_tx_hist_delete(ccid3_tx_hist); | ||
1204 | ccid3_tx_hist = NULL; | ||
1205 | } | ||
1206 | if (ccid3_rx_hist != NULL) { | ||
1207 | dccp_rx_hist_delete(ccid3_rx_hist); | ||
1208 | ccid3_rx_hist = NULL; | ||
1209 | } | ||
1210 | if (ccid3_li_hist != NULL) { | ||
1211 | dccp_li_hist_delete(ccid3_li_hist); | ||
1212 | ccid3_li_hist = NULL; | ||
1213 | } | ||
1214 | } | ||
1215 | module_exit(ccid3_module_exit); | ||
1216 | |||
1217 | MODULE_AUTHOR("Ian McDonald <iam4@cs.waikato.ac.nz>, " | ||
1218 | "Arnaldo Carvalho de Melo <acme@ghostprotocols.net>"); | ||
1219 | MODULE_DESCRIPTION("DCCP TFRC CCID3 CCID"); | ||
1220 | MODULE_LICENSE("GPL"); | ||
1221 | MODULE_ALIAS("net-dccp-ccid-3"); | ||
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h new file mode 100644 index 000000000000..ee8cbace6630 --- /dev/null +++ b/net/dccp/ccids/ccid3.h | |||
@@ -0,0 +1,137 @@ | |||
1 | /* | ||
2 | * net/dccp/ccids/ccid3.h | ||
3 | * | ||
4 | * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. | ||
5 | * | ||
6 | * An implementation of the DCCP protocol | ||
7 | * | ||
8 | * This code has been developed by the University of Waikato WAND | ||
9 | * research group. For further information please see http://www.wand.net.nz/ | ||
10 | * or e-mail Ian McDonald - iam4@cs.waikato.ac.nz | ||
11 | * | ||
12 | * This code also uses code from Lulea University, rereleased as GPL by its | ||
13 | * authors: | ||
14 | * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon | ||
15 | * | ||
16 | * Changes to meet Linux coding standards, to make it meet latest ccid3 draft | ||
17 | * and to make it work as a loadable module in the DCCP stack written by | ||
18 | * Arnaldo Carvalho de Melo <acme@conectiva.com.br>. | ||
19 | * | ||
20 | * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
21 | * | ||
22 | * This program is free software; you can redistribute it and/or modify | ||
23 | * it under the terms of the GNU General Public License as published by | ||
24 | * the Free Software Foundation; either version 2 of the License, or | ||
25 | * (at your option) any later version. | ||
26 | * | ||
27 | * This program is distributed in the hope that it will be useful, | ||
28 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
29 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
30 | * GNU General Public License for more details. | ||
31 | * | ||
32 | * You should have received a copy of the GNU General Public License | ||
33 | * along with this program; if not, write to the Free Software | ||
34 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
35 | */ | ||
36 | #ifndef _DCCP_CCID3_H_ | ||
37 | #define _DCCP_CCID3_H_ | ||
38 | |||
39 | #include <linux/config.h> | ||
40 | #include <linux/list.h> | ||
41 | #include <linux/time.h> | ||
42 | #include <linux/types.h> | ||
43 | |||
44 | #define TFRC_MIN_PACKET_SIZE 16 | ||
45 | #define TFRC_STD_PACKET_SIZE 256 | ||
46 | #define TFRC_MAX_PACKET_SIZE 65535 | ||
47 | |||
48 | /* Two seconds as per CCID3 spec */ | ||
49 | #define TFRC_INITIAL_TIMEOUT (2 * USEC_PER_SEC) | ||
50 | |||
51 | /* In usecs - half the scheduling granularity as per RFC3448 4.6 */ | ||
52 | #define TFRC_OPSYS_HALF_TIME_GRAN (USEC_PER_SEC / (2 * HZ)) | ||
53 | |||
54 | /* In seconds */ | ||
55 | #define TFRC_MAX_BACK_OFF_TIME 64 | ||
56 | |||
57 | #define TFRC_SMALLEST_P 40 | ||
58 | |||
59 | enum ccid3_options { | ||
60 | TFRC_OPT_LOSS_EVENT_RATE = 192, | ||
61 | TFRC_OPT_LOSS_INTERVALS = 193, | ||
62 | TFRC_OPT_RECEIVE_RATE = 194, | ||
63 | }; | ||
64 | |||
65 | struct ccid3_options_received { | ||
66 | u64 ccid3or_seqno:48, | ||
67 | ccid3or_loss_intervals_idx:16; | ||
68 | u16 ccid3or_loss_intervals_len; | ||
69 | u32 ccid3or_loss_event_rate; | ||
70 | u32 ccid3or_receive_rate; | ||
71 | }; | ||
72 | |||
73 | /** struct ccid3_hc_tx_sock - CCID3 sender half connection sock | ||
74 | * | ||
75 | * @ccid3hctx_state - Sender state | ||
76 | * @ccid3hctx_x - Current sending rate | ||
77 | * @ccid3hctx_x_recv - Receive rate | ||
78 | * @ccid3hctx_x_calc - Calculated send (?) rate | ||
79 | * @ccid3hctx_s - Packet size | ||
80 | * @ccid3hctx_rtt - Estimate of current round trip time in usecs | ||
81 | * @@ccid3hctx_p - Current loss event rate (0-1) scaled by 1000000 | ||
82 | * @ccid3hctx_last_win_count - Last window counter sent | ||
83 | * @ccid3hctx_t_last_win_count - Timestamp of earliest packet | ||
84 | * with last_win_count value sent | ||
85 | * @ccid3hctx_no_feedback_timer - Handle to no feedback timer | ||
86 | * @ccid3hctx_idle - FIXME | ||
87 | * @ccid3hctx_t_ld - Time last doubled during slow start | ||
88 | * @ccid3hctx_t_nom - Nominal send time of next packet | ||
89 | * @ccid3hctx_t_ipi - Interpacket (send) interval | ||
90 | * @ccid3hctx_delta - Send timer delta | ||
91 | * @ccid3hctx_hist - Packet history | ||
92 | */ | ||
93 | struct ccid3_hc_tx_sock { | ||
94 | u32 ccid3hctx_x; | ||
95 | u32 ccid3hctx_x_recv; | ||
96 | u32 ccid3hctx_x_calc; | ||
97 | u16 ccid3hctx_s; | ||
98 | u32 ccid3hctx_rtt; | ||
99 | u32 ccid3hctx_p; | ||
100 | u8 ccid3hctx_state; | ||
101 | u8 ccid3hctx_last_win_count; | ||
102 | u8 ccid3hctx_idle; | ||
103 | struct timeval ccid3hctx_t_last_win_count; | ||
104 | struct timer_list ccid3hctx_no_feedback_timer; | ||
105 | struct timeval ccid3hctx_t_ld; | ||
106 | struct timeval ccid3hctx_t_nom; | ||
107 | u32 ccid3hctx_t_rto; | ||
108 | u32 ccid3hctx_t_ipi; | ||
109 | u32 ccid3hctx_delta; | ||
110 | struct list_head ccid3hctx_hist; | ||
111 | struct ccid3_options_received ccid3hctx_options_received; | ||
112 | }; | ||
113 | |||
114 | struct ccid3_hc_rx_sock { | ||
115 | u64 ccid3hcrx_seqno_last_counter:48, | ||
116 | ccid3hcrx_state:8, | ||
117 | ccid3hcrx_last_counter:4; | ||
118 | unsigned long ccid3hcrx_rtt; | ||
119 | u32 ccid3hcrx_p; | ||
120 | u32 ccid3hcrx_bytes_recv; | ||
121 | struct timeval ccid3hcrx_tstamp_last_feedback; | ||
122 | struct timeval ccid3hcrx_tstamp_last_ack; | ||
123 | struct list_head ccid3hcrx_hist; | ||
124 | struct list_head ccid3hcrx_li_hist; | ||
125 | u16 ccid3hcrx_s; | ||
126 | u32 ccid3hcrx_pinv; | ||
127 | u32 ccid3hcrx_elapsed_time; | ||
128 | u32 ccid3hcrx_x_recv; | ||
129 | }; | ||
130 | |||
131 | #define ccid3_hc_tx_field(s,field) (s->dccps_hc_tx_ccid_private == NULL ? 0 : \ | ||
132 | ((struct ccid3_hc_tx_sock *)s->dccps_hc_tx_ccid_private)->ccid3hctx_##field) | ||
133 | |||
134 | #define ccid3_hc_rx_field(s,field) (s->dccps_hc_rx_ccid_private == NULL ? 0 : \ | ||
135 | ((struct ccid3_hc_rx_sock *)s->dccps_hc_rx_ccid_private)->ccid3hcrx_##field) | ||
136 | |||
137 | #endif /* _DCCP_CCID3_H_ */ | ||
diff --git a/net/dccp/ccids/lib/Makefile b/net/dccp/ccids/lib/Makefile new file mode 100644 index 000000000000..5f940a6cbaca --- /dev/null +++ b/net/dccp/ccids/lib/Makefile | |||
@@ -0,0 +1,3 @@ | |||
1 | obj-$(CONFIG_IP_DCCP_TFRC_LIB) += dccp_tfrc_lib.o | ||
2 | |||
3 | dccp_tfrc_lib-y := loss_interval.o packet_history.o tfrc_equation.o | ||
diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c new file mode 100644 index 000000000000..4c01a54143ad --- /dev/null +++ b/net/dccp/ccids/lib/loss_interval.c | |||
@@ -0,0 +1,144 @@ | |||
1 | /* | ||
2 | * net/dccp/ccids/lib/loss_interval.c | ||
3 | * | ||
4 | * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. | ||
5 | * Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz> | ||
6 | * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License as published by | ||
10 | * the Free Software Foundation; either version 2 of the License, or | ||
11 | * (at your option) any later version. | ||
12 | */ | ||
13 | |||
14 | #include <linux/config.h> | ||
15 | #include <linux/module.h> | ||
16 | |||
17 | #include "loss_interval.h" | ||
18 | |||
19 | struct dccp_li_hist *dccp_li_hist_new(const char *name) | ||
20 | { | ||
21 | struct dccp_li_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC); | ||
22 | static const char dccp_li_hist_mask[] = "li_hist_%s"; | ||
23 | char *slab_name; | ||
24 | |||
25 | if (hist == NULL) | ||
26 | goto out; | ||
27 | |||
28 | slab_name = kmalloc(strlen(name) + sizeof(dccp_li_hist_mask) - 1, | ||
29 | GFP_ATOMIC); | ||
30 | if (slab_name == NULL) | ||
31 | goto out_free_hist; | ||
32 | |||
33 | sprintf(slab_name, dccp_li_hist_mask, name); | ||
34 | hist->dccplih_slab = kmem_cache_create(slab_name, | ||
35 | sizeof(struct dccp_li_hist_entry), | ||
36 | 0, SLAB_HWCACHE_ALIGN, | ||
37 | NULL, NULL); | ||
38 | if (hist->dccplih_slab == NULL) | ||
39 | goto out_free_slab_name; | ||
40 | out: | ||
41 | return hist; | ||
42 | out_free_slab_name: | ||
43 | kfree(slab_name); | ||
44 | out_free_hist: | ||
45 | kfree(hist); | ||
46 | hist = NULL; | ||
47 | goto out; | ||
48 | } | ||
49 | |||
50 | EXPORT_SYMBOL_GPL(dccp_li_hist_new); | ||
51 | |||
52 | void dccp_li_hist_delete(struct dccp_li_hist *hist) | ||
53 | { | ||
54 | const char* name = kmem_cache_name(hist->dccplih_slab); | ||
55 | |||
56 | kmem_cache_destroy(hist->dccplih_slab); | ||
57 | kfree(name); | ||
58 | kfree(hist); | ||
59 | } | ||
60 | |||
61 | EXPORT_SYMBOL_GPL(dccp_li_hist_delete); | ||
62 | |||
63 | void dccp_li_hist_purge(struct dccp_li_hist *hist, struct list_head *list) | ||
64 | { | ||
65 | struct dccp_li_hist_entry *entry, *next; | ||
66 | |||
67 | list_for_each_entry_safe(entry, next, list, dccplih_node) { | ||
68 | list_del_init(&entry->dccplih_node); | ||
69 | kmem_cache_free(hist->dccplih_slab, entry); | ||
70 | } | ||
71 | } | ||
72 | |||
73 | EXPORT_SYMBOL_GPL(dccp_li_hist_purge); | ||
74 | |||
75 | /* Weights used to calculate loss event rate */ | ||
76 | /* | ||
77 | * These are integers as per section 8 of RFC3448. We can then divide by 4 * | ||
78 | * when we use it. | ||
79 | */ | ||
80 | static const int dccp_li_hist_w[DCCP_LI_HIST_IVAL_F_LENGTH] = { | ||
81 | 4, 4, 4, 4, 3, 2, 1, 1, | ||
82 | }; | ||
83 | |||
84 | u32 dccp_li_hist_calc_i_mean(struct list_head *list) | ||
85 | { | ||
86 | struct dccp_li_hist_entry *li_entry, *li_next; | ||
87 | int i = 0; | ||
88 | u32 i_tot; | ||
89 | u32 i_tot0 = 0; | ||
90 | u32 i_tot1 = 0; | ||
91 | u32 w_tot = 0; | ||
92 | |||
93 | list_for_each_entry_safe(li_entry, li_next, list, dccplih_node) { | ||
94 | if (i < DCCP_LI_HIST_IVAL_F_LENGTH) { | ||
95 | i_tot0 += li_entry->dccplih_interval * dccp_li_hist_w[i]; | ||
96 | w_tot += dccp_li_hist_w[i]; | ||
97 | } | ||
98 | |||
99 | if (i != 0) | ||
100 | i_tot1 += li_entry->dccplih_interval * dccp_li_hist_w[i - 1]; | ||
101 | |||
102 | if (++i > DCCP_LI_HIST_IVAL_F_LENGTH) | ||
103 | break; | ||
104 | } | ||
105 | |||
106 | if (i != DCCP_LI_HIST_IVAL_F_LENGTH) | ||
107 | return 0; | ||
108 | |||
109 | i_tot = max(i_tot0, i_tot1); | ||
110 | |||
111 | /* FIXME: Why do we do this? -Ian McDonald */ | ||
112 | if (i_tot * 4 < w_tot) | ||
113 | i_tot = w_tot * 4; | ||
114 | |||
115 | return i_tot * 4 / w_tot; | ||
116 | } | ||
117 | |||
118 | EXPORT_SYMBOL_GPL(dccp_li_hist_calc_i_mean); | ||
119 | |||
120 | struct dccp_li_hist_entry *dccp_li_hist_interval_new(struct dccp_li_hist *hist, | ||
121 | struct list_head *list, | ||
122 | const u64 seq_loss, | ||
123 | const u8 win_loss) | ||
124 | { | ||
125 | struct dccp_li_hist_entry *tail = NULL, *entry; | ||
126 | int i; | ||
127 | |||
128 | for (i = 0; i <= DCCP_LI_HIST_IVAL_F_LENGTH; ++i) { | ||
129 | entry = dccp_li_hist_entry_new(hist, SLAB_ATOMIC); | ||
130 | if (entry == NULL) { | ||
131 | dccp_li_hist_purge(hist, list); | ||
132 | return NULL; | ||
133 | } | ||
134 | if (tail == NULL) | ||
135 | tail = entry; | ||
136 | list_add(&entry->dccplih_node, list); | ||
137 | } | ||
138 | |||
139 | entry->dccplih_seqno = seq_loss; | ||
140 | entry->dccplih_win_count = win_loss; | ||
141 | return tail; | ||
142 | } | ||
143 | |||
144 | EXPORT_SYMBOL_GPL(dccp_li_hist_interval_new); | ||
diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h new file mode 100644 index 000000000000..13ad47ba1420 --- /dev/null +++ b/net/dccp/ccids/lib/loss_interval.h | |||
@@ -0,0 +1,61 @@ | |||
1 | #ifndef _DCCP_LI_HIST_ | ||
2 | #define _DCCP_LI_HIST_ | ||
3 | /* | ||
4 | * net/dccp/ccids/lib/loss_interval.h | ||
5 | * | ||
6 | * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. | ||
7 | * Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz> | ||
8 | * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify it | ||
11 | * under the terms of the GNU General Public License as published by the Free | ||
12 | * Software Foundation; either version 2 of the License, or (at your option) | ||
13 | * any later version. | ||
14 | */ | ||
15 | |||
16 | #include <linux/config.h> | ||
17 | #include <linux/list.h> | ||
18 | #include <linux/slab.h> | ||
19 | #include <linux/time.h> | ||
20 | |||
21 | #define DCCP_LI_HIST_IVAL_F_LENGTH 8 | ||
22 | |||
23 | struct dccp_li_hist { | ||
24 | kmem_cache_t *dccplih_slab; | ||
25 | }; | ||
26 | |||
27 | extern struct dccp_li_hist *dccp_li_hist_new(const char *name); | ||
28 | extern void dccp_li_hist_delete(struct dccp_li_hist *hist); | ||
29 | |||
30 | struct dccp_li_hist_entry { | ||
31 | struct list_head dccplih_node; | ||
32 | u64 dccplih_seqno:48, | ||
33 | dccplih_win_count:4; | ||
34 | u32 dccplih_interval; | ||
35 | }; | ||
36 | |||
37 | static inline struct dccp_li_hist_entry * | ||
38 | dccp_li_hist_entry_new(struct dccp_li_hist *hist, | ||
39 | const unsigned int __nocast prio) | ||
40 | { | ||
41 | return kmem_cache_alloc(hist->dccplih_slab, prio); | ||
42 | } | ||
43 | |||
44 | static inline void dccp_li_hist_entry_delete(struct dccp_li_hist *hist, | ||
45 | struct dccp_li_hist_entry *entry) | ||
46 | { | ||
47 | if (entry != NULL) | ||
48 | kmem_cache_free(hist->dccplih_slab, entry); | ||
49 | } | ||
50 | |||
51 | extern void dccp_li_hist_purge(struct dccp_li_hist *hist, | ||
52 | struct list_head *list); | ||
53 | |||
54 | extern u32 dccp_li_hist_calc_i_mean(struct list_head *list); | ||
55 | |||
56 | extern struct dccp_li_hist_entry * | ||
57 | dccp_li_hist_interval_new(struct dccp_li_hist *hist, | ||
58 | struct list_head *list, | ||
59 | const u64 seq_loss, | ||
60 | const u8 win_loss); | ||
61 | #endif /* _DCCP_LI_HIST_ */ | ||
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c new file mode 100644 index 000000000000..d3f9d2053830 --- /dev/null +++ b/net/dccp/ccids/lib/packet_history.c | |||
@@ -0,0 +1,398 @@ | |||
1 | /* | ||
2 | * net/dccp/packet_history.h | ||
3 | * | ||
4 | * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. | ||
5 | * | ||
6 | * An implementation of the DCCP protocol | ||
7 | * | ||
8 | * This code has been developed by the University of Waikato WAND | ||
9 | * research group. For further information please see http://www.wand.net.nz/ | ||
10 | * or e-mail Ian McDonald - iam4@cs.waikato.ac.nz | ||
11 | * | ||
12 | * This code also uses code from Lulea University, rereleased as GPL by its | ||
13 | * authors: | ||
14 | * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon | ||
15 | * | ||
16 | * Changes to meet Linux coding standards, to make it meet latest ccid3 draft | ||
17 | * and to make it work as a loadable module in the DCCP stack written by | ||
18 | * Arnaldo Carvalho de Melo <acme@conectiva.com.br>. | ||
19 | * | ||
20 | * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
21 | * | ||
22 | * This program is free software; you can redistribute it and/or modify | ||
23 | * it under the terms of the GNU General Public License as published by | ||
24 | * the Free Software Foundation; either version 2 of the License, or | ||
25 | * (at your option) any later version. | ||
26 | * | ||
27 | * This program is distributed in the hope that it will be useful, | ||
28 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
29 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
30 | * GNU General Public License for more details. | ||
31 | * | ||
32 | * You should have received a copy of the GNU General Public License | ||
33 | * along with this program; if not, write to the Free Software | ||
34 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
35 | */ | ||
36 | |||
37 | #include <linux/config.h> | ||
38 | #include <linux/module.h> | ||
39 | #include <linux/string.h> | ||
40 | |||
41 | #include "packet_history.h" | ||
42 | |||
43 | struct dccp_rx_hist *dccp_rx_hist_new(const char *name) | ||
44 | { | ||
45 | struct dccp_rx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC); | ||
46 | static const char dccp_rx_hist_mask[] = "rx_hist_%s"; | ||
47 | char *slab_name; | ||
48 | |||
49 | if (hist == NULL) | ||
50 | goto out; | ||
51 | |||
52 | slab_name = kmalloc(strlen(name) + sizeof(dccp_rx_hist_mask) - 1, | ||
53 | GFP_ATOMIC); | ||
54 | if (slab_name == NULL) | ||
55 | goto out_free_hist; | ||
56 | |||
57 | sprintf(slab_name, dccp_rx_hist_mask, name); | ||
58 | hist->dccprxh_slab = kmem_cache_create(slab_name, | ||
59 | sizeof(struct dccp_rx_hist_entry), | ||
60 | 0, SLAB_HWCACHE_ALIGN, | ||
61 | NULL, NULL); | ||
62 | if (hist->dccprxh_slab == NULL) | ||
63 | goto out_free_slab_name; | ||
64 | out: | ||
65 | return hist; | ||
66 | out_free_slab_name: | ||
67 | kfree(slab_name); | ||
68 | out_free_hist: | ||
69 | kfree(hist); | ||
70 | hist = NULL; | ||
71 | goto out; | ||
72 | } | ||
73 | |||
74 | EXPORT_SYMBOL_GPL(dccp_rx_hist_new); | ||
75 | |||
76 | void dccp_rx_hist_delete(struct dccp_rx_hist *hist) | ||
77 | { | ||
78 | const char* name = kmem_cache_name(hist->dccprxh_slab); | ||
79 | |||
80 | kmem_cache_destroy(hist->dccprxh_slab); | ||
81 | kfree(name); | ||
82 | kfree(hist); | ||
83 | } | ||
84 | |||
85 | EXPORT_SYMBOL_GPL(dccp_rx_hist_delete); | ||
86 | |||
87 | void dccp_rx_hist_purge(struct dccp_rx_hist *hist, struct list_head *list) | ||
88 | { | ||
89 | struct dccp_rx_hist_entry *entry, *next; | ||
90 | |||
91 | list_for_each_entry_safe(entry, next, list, dccphrx_node) { | ||
92 | list_del_init(&entry->dccphrx_node); | ||
93 | kmem_cache_free(hist->dccprxh_slab, entry); | ||
94 | } | ||
95 | } | ||
96 | |||
97 | EXPORT_SYMBOL_GPL(dccp_rx_hist_purge); | ||
98 | |||
99 | struct dccp_rx_hist_entry * | ||
100 | dccp_rx_hist_find_data_packet(const struct list_head *list) | ||
101 | { | ||
102 | struct dccp_rx_hist_entry *entry, *packet = NULL; | ||
103 | |||
104 | list_for_each_entry(entry, list, dccphrx_node) | ||
105 | if (entry->dccphrx_type == DCCP_PKT_DATA || | ||
106 | entry->dccphrx_type == DCCP_PKT_DATAACK) { | ||
107 | packet = entry; | ||
108 | break; | ||
109 | } | ||
110 | |||
111 | return packet; | ||
112 | } | ||
113 | |||
114 | EXPORT_SYMBOL_GPL(dccp_rx_hist_find_data_packet); | ||
115 | |||
116 | int dccp_rx_hist_add_packet(struct dccp_rx_hist *hist, | ||
117 | struct list_head *rx_list, | ||
118 | struct list_head *li_list, | ||
119 | struct dccp_rx_hist_entry *packet) | ||
120 | { | ||
121 | struct dccp_rx_hist_entry *entry, *next, *iter; | ||
122 | u8 num_later = 0; | ||
123 | |||
124 | iter = dccp_rx_hist_head(rx_list); | ||
125 | if (iter == NULL) | ||
126 | dccp_rx_hist_add_entry(rx_list, packet); | ||
127 | else { | ||
128 | const u64 seqno = packet->dccphrx_seqno; | ||
129 | |||
130 | if (after48(seqno, iter->dccphrx_seqno)) | ||
131 | dccp_rx_hist_add_entry(rx_list, packet); | ||
132 | else { | ||
133 | if (dccp_rx_hist_entry_data_packet(iter)) | ||
134 | num_later = 1; | ||
135 | |||
136 | list_for_each_entry_continue(iter, rx_list, | ||
137 | dccphrx_node) { | ||
138 | if (after48(seqno, iter->dccphrx_seqno)) { | ||
139 | dccp_rx_hist_add_entry(&iter->dccphrx_node, | ||
140 | packet); | ||
141 | goto trim_history; | ||
142 | } | ||
143 | |||
144 | if (dccp_rx_hist_entry_data_packet(iter)) | ||
145 | num_later++; | ||
146 | |||
147 | if (num_later == TFRC_RECV_NUM_LATE_LOSS) { | ||
148 | dccp_rx_hist_entry_delete(hist, packet); | ||
149 | return 1; | ||
150 | } | ||
151 | } | ||
152 | |||
153 | if (num_later < TFRC_RECV_NUM_LATE_LOSS) | ||
154 | dccp_rx_hist_add_entry(rx_list, packet); | ||
155 | /* | ||
156 | * FIXME: else what? should we destroy the packet | ||
157 | * like above? | ||
158 | */ | ||
159 | } | ||
160 | } | ||
161 | |||
162 | trim_history: | ||
163 | /* | ||
164 | * Trim history (remove all packets after the NUM_LATE_LOSS + 1 | ||
165 | * data packets) | ||
166 | */ | ||
167 | num_later = TFRC_RECV_NUM_LATE_LOSS + 1; | ||
168 | |||
169 | if (!list_empty(li_list)) { | ||
170 | list_for_each_entry_safe(entry, next, rx_list, dccphrx_node) { | ||
171 | if (num_later == 0) { | ||
172 | list_del_init(&entry->dccphrx_node); | ||
173 | dccp_rx_hist_entry_delete(hist, entry); | ||
174 | } else if (dccp_rx_hist_entry_data_packet(entry)) | ||
175 | --num_later; | ||
176 | } | ||
177 | } else { | ||
178 | int step = 0; | ||
179 | u8 win_count = 0; /* Not needed, but lets shut up gcc */ | ||
180 | int tmp; | ||
181 | /* | ||
182 | * We have no loss interval history so we need at least one | ||
183 | * rtt:s of data packets to approximate rtt. | ||
184 | */ | ||
185 | list_for_each_entry_safe(entry, next, rx_list, dccphrx_node) { | ||
186 | if (num_later == 0) { | ||
187 | switch (step) { | ||
188 | case 0: | ||
189 | step = 1; | ||
190 | /* OK, find next data packet */ | ||
191 | num_later = 1; | ||
192 | break; | ||
193 | case 1: | ||
194 | step = 2; | ||
195 | /* OK, find next data packet */ | ||
196 | num_later = 1; | ||
197 | win_count = entry->dccphrx_ccval; | ||
198 | break; | ||
199 | case 2: | ||
200 | tmp = win_count - entry->dccphrx_ccval; | ||
201 | if (tmp < 0) | ||
202 | tmp += TFRC_WIN_COUNT_LIMIT; | ||
203 | if (tmp > TFRC_WIN_COUNT_PER_RTT + 1) { | ||
204 | /* | ||
205 | * We have found a packet older | ||
206 | * than one rtt remove the rest | ||
207 | */ | ||
208 | step = 3; | ||
209 | } else /* OK, find next data packet */ | ||
210 | num_later = 1; | ||
211 | break; | ||
212 | case 3: | ||
213 | list_del_init(&entry->dccphrx_node); | ||
214 | dccp_rx_hist_entry_delete(hist, entry); | ||
215 | break; | ||
216 | } | ||
217 | } else if (dccp_rx_hist_entry_data_packet(entry)) | ||
218 | --num_later; | ||
219 | } | ||
220 | } | ||
221 | |||
222 | return 0; | ||
223 | } | ||
224 | |||
225 | EXPORT_SYMBOL_GPL(dccp_rx_hist_add_packet); | ||
226 | |||
227 | u64 dccp_rx_hist_detect_loss(struct list_head *rx_list, | ||
228 | struct list_head *li_list, u8 *win_loss) | ||
229 | { | ||
230 | struct dccp_rx_hist_entry *entry, *next, *packet; | ||
231 | struct dccp_rx_hist_entry *a_loss = NULL; | ||
232 | struct dccp_rx_hist_entry *b_loss = NULL; | ||
233 | u64 seq_loss = DCCP_MAX_SEQNO + 1; | ||
234 | u8 num_later = TFRC_RECV_NUM_LATE_LOSS; | ||
235 | |||
236 | list_for_each_entry_safe(entry, next, rx_list, dccphrx_node) { | ||
237 | if (num_later == 0) { | ||
238 | b_loss = entry; | ||
239 | break; | ||
240 | } else if (dccp_rx_hist_entry_data_packet(entry)) | ||
241 | --num_later; | ||
242 | } | ||
243 | |||
244 | if (b_loss == NULL) | ||
245 | goto out; | ||
246 | |||
247 | num_later = 1; | ||
248 | list_for_each_entry_safe_continue(entry, next, rx_list, dccphrx_node) { | ||
249 | if (num_later == 0) { | ||
250 | a_loss = entry; | ||
251 | break; | ||
252 | } else if (dccp_rx_hist_entry_data_packet(entry)) | ||
253 | --num_later; | ||
254 | } | ||
255 | |||
256 | if (a_loss == NULL) { | ||
257 | if (list_empty(li_list)) { | ||
258 | /* no loss event have occured yet */ | ||
259 | LIMIT_NETDEBUG("%s: TODO: find a lost data packet by " | ||
260 | "comparing to initial seqno\n", | ||
261 | __FUNCTION__); | ||
262 | goto out; | ||
263 | } else { | ||
264 | LIMIT_NETDEBUG("%s: Less than 4 data pkts in history!", | ||
265 | __FUNCTION__); | ||
266 | goto out; | ||
267 | } | ||
268 | } | ||
269 | |||
270 | /* Locate a lost data packet */ | ||
271 | entry = packet = b_loss; | ||
272 | list_for_each_entry_safe_continue(entry, next, rx_list, dccphrx_node) { | ||
273 | u64 delta = dccp_delta_seqno(entry->dccphrx_seqno, | ||
274 | packet->dccphrx_seqno); | ||
275 | |||
276 | if (delta != 0) { | ||
277 | if (dccp_rx_hist_entry_data_packet(packet)) | ||
278 | --delta; | ||
279 | /* | ||
280 | * FIXME: check this, probably this % usage is because | ||
281 | * in earlier drafts the ndp count was just 8 bits | ||
282 | * long, but now it cam be up to 24 bits long. | ||
283 | */ | ||
284 | #if 0 | ||
285 | if (delta % DCCP_NDP_LIMIT != | ||
286 | (packet->dccphrx_ndp - | ||
287 | entry->dccphrx_ndp) % DCCP_NDP_LIMIT) | ||
288 | #endif | ||
289 | if (delta != packet->dccphrx_ndp - entry->dccphrx_ndp) { | ||
290 | seq_loss = entry->dccphrx_seqno; | ||
291 | dccp_inc_seqno(&seq_loss); | ||
292 | } | ||
293 | } | ||
294 | packet = entry; | ||
295 | if (packet == a_loss) | ||
296 | break; | ||
297 | } | ||
298 | out: | ||
299 | if (seq_loss != DCCP_MAX_SEQNO + 1) | ||
300 | *win_loss = a_loss->dccphrx_ccval; | ||
301 | else | ||
302 | *win_loss = 0; /* Paranoia */ | ||
303 | |||
304 | return seq_loss; | ||
305 | } | ||
306 | |||
307 | EXPORT_SYMBOL_GPL(dccp_rx_hist_detect_loss); | ||
308 | |||
309 | struct dccp_tx_hist *dccp_tx_hist_new(const char *name) | ||
310 | { | ||
311 | struct dccp_tx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC); | ||
312 | static const char dccp_tx_hist_mask[] = "tx_hist_%s"; | ||
313 | char *slab_name; | ||
314 | |||
315 | if (hist == NULL) | ||
316 | goto out; | ||
317 | |||
318 | slab_name = kmalloc(strlen(name) + sizeof(dccp_tx_hist_mask) - 1, | ||
319 | GFP_ATOMIC); | ||
320 | if (slab_name == NULL) | ||
321 | goto out_free_hist; | ||
322 | |||
323 | sprintf(slab_name, dccp_tx_hist_mask, name); | ||
324 | hist->dccptxh_slab = kmem_cache_create(slab_name, | ||
325 | sizeof(struct dccp_tx_hist_entry), | ||
326 | 0, SLAB_HWCACHE_ALIGN, | ||
327 | NULL, NULL); | ||
328 | if (hist->dccptxh_slab == NULL) | ||
329 | goto out_free_slab_name; | ||
330 | out: | ||
331 | return hist; | ||
332 | out_free_slab_name: | ||
333 | kfree(slab_name); | ||
334 | out_free_hist: | ||
335 | kfree(hist); | ||
336 | hist = NULL; | ||
337 | goto out; | ||
338 | } | ||
339 | |||
340 | EXPORT_SYMBOL_GPL(dccp_tx_hist_new); | ||
341 | |||
342 | void dccp_tx_hist_delete(struct dccp_tx_hist *hist) | ||
343 | { | ||
344 | const char* name = kmem_cache_name(hist->dccptxh_slab); | ||
345 | |||
346 | kmem_cache_destroy(hist->dccptxh_slab); | ||
347 | kfree(name); | ||
348 | kfree(hist); | ||
349 | } | ||
350 | |||
351 | EXPORT_SYMBOL_GPL(dccp_tx_hist_delete); | ||
352 | |||
353 | struct dccp_tx_hist_entry * | ||
354 | dccp_tx_hist_find_entry(const struct list_head *list, const u64 seq) | ||
355 | { | ||
356 | struct dccp_tx_hist_entry *packet = NULL, *entry; | ||
357 | |||
358 | list_for_each_entry(entry, list, dccphtx_node) | ||
359 | if (entry->dccphtx_seqno == seq) { | ||
360 | packet = entry; | ||
361 | break; | ||
362 | } | ||
363 | |||
364 | return packet; | ||
365 | } | ||
366 | |||
367 | EXPORT_SYMBOL_GPL(dccp_tx_hist_find_entry); | ||
368 | |||
369 | void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist, | ||
370 | struct list_head *list, | ||
371 | struct dccp_tx_hist_entry *packet) | ||
372 | { | ||
373 | struct dccp_tx_hist_entry *next; | ||
374 | |||
375 | list_for_each_entry_safe_continue(packet, next, list, dccphtx_node) { | ||
376 | list_del_init(&packet->dccphtx_node); | ||
377 | dccp_tx_hist_entry_delete(hist, packet); | ||
378 | } | ||
379 | } | ||
380 | |||
381 | EXPORT_SYMBOL_GPL(dccp_tx_hist_purge_older); | ||
382 | |||
383 | void dccp_tx_hist_purge(struct dccp_tx_hist *hist, struct list_head *list) | ||
384 | { | ||
385 | struct dccp_tx_hist_entry *entry, *next; | ||
386 | |||
387 | list_for_each_entry_safe(entry, next, list, dccphtx_node) { | ||
388 | list_del_init(&entry->dccphtx_node); | ||
389 | dccp_tx_hist_entry_delete(hist, entry); | ||
390 | } | ||
391 | } | ||
392 | |||
393 | EXPORT_SYMBOL_GPL(dccp_tx_hist_purge); | ||
394 | |||
395 | MODULE_AUTHOR("Ian McDonald <iam4@cs.waikato.ac.nz>, " | ||
396 | "Arnaldo Carvalho de Melo <acme@ghostprotocols.net>"); | ||
397 | MODULE_DESCRIPTION("DCCP TFRC library"); | ||
398 | MODULE_LICENSE("GPL"); | ||
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h new file mode 100644 index 000000000000..fb90a91aa93d --- /dev/null +++ b/net/dccp/ccids/lib/packet_history.h | |||
@@ -0,0 +1,199 @@ | |||
1 | /* | ||
2 | * net/dccp/packet_history.h | ||
3 | * | ||
4 | * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. | ||
5 | * | ||
6 | * An implementation of the DCCP protocol | ||
7 | * | ||
8 | * This code has been developed by the University of Waikato WAND | ||
9 | * research group. For further information please see http://www.wand.net.nz/ | ||
10 | * or e-mail Ian McDonald - iam4@cs.waikato.ac.nz | ||
11 | * | ||
12 | * This code also uses code from Lulea University, rereleased as GPL by its | ||
13 | * authors: | ||
14 | * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon | ||
15 | * | ||
16 | * Changes to meet Linux coding standards, to make it meet latest ccid3 draft | ||
17 | * and to make it work as a loadable module in the DCCP stack written by | ||
18 | * Arnaldo Carvalho de Melo <acme@conectiva.com.br>. | ||
19 | * | ||
20 | * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
21 | * | ||
22 | * This program is free software; you can redistribute it and/or modify | ||
23 | * it under the terms of the GNU General Public License as published by | ||
24 | * the Free Software Foundation; either version 2 of the License, or | ||
25 | * (at your option) any later version. | ||
26 | * | ||
27 | * This program is distributed in the hope that it will be useful, | ||
28 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
29 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
30 | * GNU General Public License for more details. | ||
31 | * | ||
32 | * You should have received a copy of the GNU General Public License | ||
33 | * along with this program; if not, write to the Free Software | ||
34 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
35 | */ | ||
36 | |||
37 | #ifndef _DCCP_PKT_HIST_ | ||
38 | #define _DCCP_PKT_HIST_ | ||
39 | |||
40 | #include <linux/config.h> | ||
41 | #include <linux/list.h> | ||
42 | #include <linux/slab.h> | ||
43 | #include <linux/time.h> | ||
44 | |||
45 | #include "../../dccp.h" | ||
46 | |||
47 | /* Number of later packets received before one is considered lost */ | ||
48 | #define TFRC_RECV_NUM_LATE_LOSS 3 | ||
49 | |||
50 | #define TFRC_WIN_COUNT_PER_RTT 4 | ||
51 | #define TFRC_WIN_COUNT_LIMIT 16 | ||
52 | |||
53 | struct dccp_tx_hist_entry { | ||
54 | struct list_head dccphtx_node; | ||
55 | u64 dccphtx_seqno:48, | ||
56 | dccphtx_ccval:4, | ||
57 | dccphtx_sent:1; | ||
58 | u32 dccphtx_rtt; | ||
59 | struct timeval dccphtx_tstamp; | ||
60 | }; | ||
61 | |||
62 | struct dccp_rx_hist_entry { | ||
63 | struct list_head dccphrx_node; | ||
64 | u64 dccphrx_seqno:48, | ||
65 | dccphrx_ccval:4, | ||
66 | dccphrx_type:4; | ||
67 | u32 dccphrx_ndp; /* In fact it is from 8 to 24 bits */ | ||
68 | struct timeval dccphrx_tstamp; | ||
69 | }; | ||
70 | |||
71 | struct dccp_tx_hist { | ||
72 | kmem_cache_t *dccptxh_slab; | ||
73 | }; | ||
74 | |||
75 | extern struct dccp_tx_hist *dccp_tx_hist_new(const char *name); | ||
76 | extern void dccp_tx_hist_delete(struct dccp_tx_hist *hist); | ||
77 | |||
78 | struct dccp_rx_hist { | ||
79 | kmem_cache_t *dccprxh_slab; | ||
80 | }; | ||
81 | |||
82 | extern struct dccp_rx_hist *dccp_rx_hist_new(const char *name); | ||
83 | extern void dccp_rx_hist_delete(struct dccp_rx_hist *hist); | ||
84 | extern struct dccp_rx_hist_entry * | ||
85 | dccp_rx_hist_find_data_packet(const struct list_head *list); | ||
86 | |||
87 | static inline struct dccp_tx_hist_entry * | ||
88 | dccp_tx_hist_entry_new(struct dccp_tx_hist *hist, | ||
89 | const unsigned int __nocast prio) | ||
90 | { | ||
91 | struct dccp_tx_hist_entry *entry = kmem_cache_alloc(hist->dccptxh_slab, | ||
92 | prio); | ||
93 | |||
94 | if (entry != NULL) | ||
95 | entry->dccphtx_sent = 0; | ||
96 | |||
97 | return entry; | ||
98 | } | ||
99 | |||
100 | static inline void dccp_tx_hist_entry_delete(struct dccp_tx_hist *hist, | ||
101 | struct dccp_tx_hist_entry *entry) | ||
102 | { | ||
103 | if (entry != NULL) | ||
104 | kmem_cache_free(hist->dccptxh_slab, entry); | ||
105 | } | ||
106 | |||
107 | extern struct dccp_tx_hist_entry * | ||
108 | dccp_tx_hist_find_entry(const struct list_head *list, | ||
109 | const u64 seq); | ||
110 | |||
111 | static inline void dccp_tx_hist_add_entry(struct list_head *list, | ||
112 | struct dccp_tx_hist_entry *entry) | ||
113 | { | ||
114 | list_add(&entry->dccphtx_node, list); | ||
115 | } | ||
116 | |||
117 | extern void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist, | ||
118 | struct list_head *list, | ||
119 | struct dccp_tx_hist_entry *next); | ||
120 | |||
121 | extern void dccp_tx_hist_purge(struct dccp_tx_hist *hist, | ||
122 | struct list_head *list); | ||
123 | |||
124 | static inline struct dccp_tx_hist_entry * | ||
125 | dccp_tx_hist_head(struct list_head *list) | ||
126 | { | ||
127 | struct dccp_tx_hist_entry *head = NULL; | ||
128 | |||
129 | if (!list_empty(list)) | ||
130 | head = list_entry(list->next, struct dccp_tx_hist_entry, | ||
131 | dccphtx_node); | ||
132 | return head; | ||
133 | } | ||
134 | |||
135 | static inline struct dccp_rx_hist_entry * | ||
136 | dccp_rx_hist_entry_new(struct dccp_rx_hist *hist, | ||
137 | const u32 ndp, | ||
138 | const struct sk_buff *skb, | ||
139 | const unsigned int __nocast prio) | ||
140 | { | ||
141 | struct dccp_rx_hist_entry *entry = kmem_cache_alloc(hist->dccprxh_slab, | ||
142 | prio); | ||
143 | |||
144 | if (entry != NULL) { | ||
145 | const struct dccp_hdr *dh = dccp_hdr(skb); | ||
146 | |||
147 | entry->dccphrx_seqno = DCCP_SKB_CB(skb)->dccpd_seq; | ||
148 | entry->dccphrx_ccval = dh->dccph_ccval; | ||
149 | entry->dccphrx_type = dh->dccph_type; | ||
150 | entry->dccphrx_ndp = ndp; | ||
151 | do_gettimeofday(&(entry->dccphrx_tstamp)); | ||
152 | } | ||
153 | |||
154 | return entry; | ||
155 | } | ||
156 | |||
157 | static inline void dccp_rx_hist_entry_delete(struct dccp_rx_hist *hist, | ||
158 | struct dccp_rx_hist_entry *entry) | ||
159 | { | ||
160 | if (entry != NULL) | ||
161 | kmem_cache_free(hist->dccprxh_slab, entry); | ||
162 | } | ||
163 | |||
164 | extern void dccp_rx_hist_purge(struct dccp_rx_hist *hist, | ||
165 | struct list_head *list); | ||
166 | |||
167 | static inline void dccp_rx_hist_add_entry(struct list_head *list, | ||
168 | struct dccp_rx_hist_entry *entry) | ||
169 | { | ||
170 | list_add(&entry->dccphrx_node, list); | ||
171 | } | ||
172 | |||
173 | static inline struct dccp_rx_hist_entry * | ||
174 | dccp_rx_hist_head(struct list_head *list) | ||
175 | { | ||
176 | struct dccp_rx_hist_entry *head = NULL; | ||
177 | |||
178 | if (!list_empty(list)) | ||
179 | head = list_entry(list->next, struct dccp_rx_hist_entry, | ||
180 | dccphrx_node); | ||
181 | return head; | ||
182 | } | ||
183 | |||
184 | static inline int | ||
185 | dccp_rx_hist_entry_data_packet(const struct dccp_rx_hist_entry *entry) | ||
186 | { | ||
187 | return entry->dccphrx_type == DCCP_PKT_DATA || | ||
188 | entry->dccphrx_type == DCCP_PKT_DATAACK; | ||
189 | } | ||
190 | |||
191 | extern int dccp_rx_hist_add_packet(struct dccp_rx_hist *hist, | ||
192 | struct list_head *rx_list, | ||
193 | struct list_head *li_list, | ||
194 | struct dccp_rx_hist_entry *packet); | ||
195 | |||
196 | extern u64 dccp_rx_hist_detect_loss(struct list_head *rx_list, | ||
197 | struct list_head *li_list, u8 *win_loss); | ||
198 | |||
199 | #endif /* _DCCP_PKT_HIST_ */ | ||
diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h new file mode 100644 index 000000000000..130c4c40cfe3 --- /dev/null +++ b/net/dccp/ccids/lib/tfrc.h | |||
@@ -0,0 +1,22 @@ | |||
1 | #ifndef _TFRC_H_ | ||
2 | #define _TFRC_H_ | ||
3 | /* | ||
4 | * net/dccp/ccids/lib/tfrc.h | ||
5 | * | ||
6 | * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. | ||
7 | * Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz> | ||
8 | * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
9 | * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify | ||
12 | * it under the terms of the GNU General Public License as published by | ||
13 | * the Free Software Foundation; either version 2 of the License, or | ||
14 | * (at your option) any later version. | ||
15 | */ | ||
16 | |||
17 | #include <linux/types.h> | ||
18 | |||
19 | extern u32 tfrc_calc_x(u16 s, u32 R, u32 p); | ||
20 | extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue); | ||
21 | |||
22 | #endif /* _TFRC_H_ */ | ||
diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c new file mode 100644 index 000000000000..d2b5933b4510 --- /dev/null +++ b/net/dccp/ccids/lib/tfrc_equation.c | |||
@@ -0,0 +1,644 @@ | |||
1 | /* | ||
2 | * net/dccp/ccids/lib/tfrc_equation.c | ||
3 | * | ||
4 | * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. | ||
5 | * Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz> | ||
6 | * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
7 | * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify | ||
10 | * it under the terms of the GNU General Public License as published by | ||
11 | * the Free Software Foundation; either version 2 of the License, or | ||
12 | * (at your option) any later version. | ||
13 | */ | ||
14 | |||
15 | #include <linux/config.h> | ||
16 | #include <linux/module.h> | ||
17 | |||
18 | #include <asm/bug.h> | ||
19 | #include <asm/div64.h> | ||
20 | |||
21 | #include "tfrc.h" | ||
22 | |||
23 | #define TFRC_CALC_X_ARRSIZE 500 | ||
24 | |||
25 | #define TFRC_CALC_X_SPLIT 50000 | ||
26 | /* equivalent to 0.05 */ | ||
27 | |||
28 | static const u32 tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE][2] = { | ||
29 | { 37172, 8172 }, | ||
30 | { 53499, 11567 }, | ||
31 | { 66664, 14180 }, | ||
32 | { 78298, 16388 }, | ||
33 | { 89021, 18339 }, | ||
34 | { 99147, 20108 }, | ||
35 | { 108858, 21738 }, | ||
36 | { 118273, 23260 }, | ||
37 | { 127474, 24693 }, | ||
38 | { 136520, 26052 }, | ||
39 | { 145456, 27348 }, | ||
40 | { 154316, 28589 }, | ||
41 | { 163130, 29783 }, | ||
42 | { 171919, 30935 }, | ||
43 | { 180704, 32049 }, | ||
44 | { 189502, 33130 }, | ||
45 | { 198328, 34180 }, | ||
46 | { 207194, 35202 }, | ||
47 | { 216114, 36198 }, | ||
48 | { 225097, 37172 }, | ||
49 | { 234153, 38123 }, | ||
50 | { 243294, 39055 }, | ||
51 | { 252527, 39968 }, | ||
52 | { 261861, 40864 }, | ||
53 | { 271305, 41743 }, | ||
54 | { 280866, 42607 }, | ||
55 | { 290553, 43457 }, | ||
56 | { 300372, 44293 }, | ||
57 | { 310333, 45117 }, | ||
58 | { 320441, 45929 }, | ||
59 | { 330705, 46729 }, | ||
60 | { 341131, 47518 }, | ||
61 | { 351728, 48297 }, | ||
62 | { 362501, 49066 }, | ||
63 | { 373460, 49826 }, | ||
64 | { 384609, 50577 }, | ||
65 | { 395958, 51320 }, | ||
66 | { 407513, 52054 }, | ||
67 | { 419281, 52780 }, | ||
68 | { 431270, 53499 }, | ||
69 | { 443487, 54211 }, | ||
70 | { 455940, 54916 }, | ||
71 | { 468635, 55614 }, | ||
72 | { 481581, 56306 }, | ||
73 | { 494785, 56991 }, | ||
74 | { 508254, 57671 }, | ||
75 | { 521996, 58345 }, | ||
76 | { 536019, 59014 }, | ||
77 | { 550331, 59677 }, | ||
78 | { 564939, 60335 }, | ||
79 | { 579851, 60988 }, | ||
80 | { 595075, 61636 }, | ||
81 | { 610619, 62279 }, | ||
82 | { 626491, 62918 }, | ||
83 | { 642700, 63553 }, | ||
84 | { 659253, 64183 }, | ||
85 | { 676158, 64809 }, | ||
86 | { 693424, 65431 }, | ||
87 | { 711060, 66050 }, | ||
88 | { 729073, 66664 }, | ||
89 | { 747472, 67275 }, | ||
90 | { 766266, 67882 }, | ||
91 | { 785464, 68486 }, | ||
92 | { 805073, 69087 }, | ||
93 | { 825103, 69684 }, | ||
94 | { 845562, 70278 }, | ||
95 | { 866460, 70868 }, | ||
96 | { 887805, 71456 }, | ||
97 | { 909606, 72041 }, | ||
98 | { 931873, 72623 }, | ||
99 | { 954614, 73202 }, | ||
100 | { 977839, 73778 }, | ||
101 | { 1001557, 74352 }, | ||
102 | { 1025777, 74923 }, | ||
103 | { 1050508, 75492 }, | ||
104 | { 1075761, 76058 }, | ||
105 | { 1101544, 76621 }, | ||
106 | { 1127867, 77183 }, | ||
107 | { 1154739, 77741 }, | ||
108 | { 1182172, 78298 }, | ||
109 | { 1210173, 78852 }, | ||
110 | { 1238753, 79405 }, | ||
111 | { 1267922, 79955 }, | ||
112 | { 1297689, 80503 }, | ||
113 | { 1328066, 81049 }, | ||
114 | { 1359060, 81593 }, | ||
115 | { 1390684, 82135 }, | ||
116 | { 1422947, 82675 }, | ||
117 | { 1455859, 83213 }, | ||
118 | { 1489430, 83750 }, | ||
119 | { 1523671, 84284 }, | ||
120 | { 1558593, 84817 }, | ||
121 | { 1594205, 85348 }, | ||
122 | { 1630518, 85878 }, | ||
123 | { 1667543, 86406 }, | ||
124 | { 1705290, 86932 }, | ||
125 | { 1743770, 87457 }, | ||
126 | { 1782994, 87980 }, | ||
127 | { 1822973, 88501 }, | ||
128 | { 1863717, 89021 }, | ||
129 | { 1905237, 89540 }, | ||
130 | { 1947545, 90057 }, | ||
131 | { 1990650, 90573 }, | ||
132 | { 2034566, 91087 }, | ||
133 | { 2079301, 91600 }, | ||
134 | { 2124869, 92111 }, | ||
135 | { 2171279, 92622 }, | ||
136 | { 2218543, 93131 }, | ||
137 | { 2266673, 93639 }, | ||
138 | { 2315680, 94145 }, | ||
139 | { 2365575, 94650 }, | ||
140 | { 2416371, 95154 }, | ||
141 | { 2468077, 95657 }, | ||
142 | { 2520707, 96159 }, | ||
143 | { 2574271, 96660 }, | ||
144 | { 2628782, 97159 }, | ||
145 | { 2684250, 97658 }, | ||
146 | { 2740689, 98155 }, | ||
147 | { 2798110, 98651 }, | ||
148 | { 2856524, 99147 }, | ||
149 | { 2915944, 99641 }, | ||
150 | { 2976382, 100134 }, | ||
151 | { 3037850, 100626 }, | ||
152 | { 3100360, 101117 }, | ||
153 | { 3163924, 101608 }, | ||
154 | { 3228554, 102097 }, | ||
155 | { 3294263, 102586 }, | ||
156 | { 3361063, 103073 }, | ||
157 | { 3428966, 103560 }, | ||
158 | { 3497984, 104045 }, | ||
159 | { 3568131, 104530 }, | ||
160 | { 3639419, 105014 }, | ||
161 | { 3711860, 105498 }, | ||
162 | { 3785467, 105980 }, | ||
163 | { 3860253, 106462 }, | ||
164 | { 3936229, 106942 }, | ||
165 | { 4013410, 107422 }, | ||
166 | { 4091808, 107902 }, | ||
167 | { 4171435, 108380 }, | ||
168 | { 4252306, 108858 }, | ||
169 | { 4334431, 109335 }, | ||
170 | { 4417825, 109811 }, | ||
171 | { 4502501, 110287 }, | ||
172 | { 4588472, 110762 }, | ||
173 | { 4675750, 111236 }, | ||
174 | { 4764349, 111709 }, | ||
175 | { 4854283, 112182 }, | ||
176 | { 4945564, 112654 }, | ||
177 | { 5038206, 113126 }, | ||
178 | { 5132223, 113597 }, | ||
179 | { 5227627, 114067 }, | ||
180 | { 5324432, 114537 }, | ||
181 | { 5422652, 115006 }, | ||
182 | { 5522299, 115474 }, | ||
183 | { 5623389, 115942 }, | ||
184 | { 5725934, 116409 }, | ||
185 | { 5829948, 116876 }, | ||
186 | { 5935446, 117342 }, | ||
187 | { 6042439, 117808 }, | ||
188 | { 6150943, 118273 }, | ||
189 | { 6260972, 118738 }, | ||
190 | { 6372538, 119202 }, | ||
191 | { 6485657, 119665 }, | ||
192 | { 6600342, 120128 }, | ||
193 | { 6716607, 120591 }, | ||
194 | { 6834467, 121053 }, | ||
195 | { 6953935, 121514 }, | ||
196 | { 7075025, 121976 }, | ||
197 | { 7197752, 122436 }, | ||
198 | { 7322131, 122896 }, | ||
199 | { 7448175, 123356 }, | ||
200 | { 7575898, 123815 }, | ||
201 | { 7705316, 124274 }, | ||
202 | { 7836442, 124733 }, | ||
203 | { 7969291, 125191 }, | ||
204 | { 8103877, 125648 }, | ||
205 | { 8240216, 126105 }, | ||
206 | { 8378321, 126562 }, | ||
207 | { 8518208, 127018 }, | ||
208 | { 8659890, 127474 }, | ||
209 | { 8803384, 127930 }, | ||
210 | { 8948702, 128385 }, | ||
211 | { 9095861, 128840 }, | ||
212 | { 9244875, 129294 }, | ||
213 | { 9395760, 129748 }, | ||
214 | { 9548529, 130202 }, | ||
215 | { 9703198, 130655 }, | ||
216 | { 9859782, 131108 }, | ||
217 | { 10018296, 131561 }, | ||
218 | { 10178755, 132014 }, | ||
219 | { 10341174, 132466 }, | ||
220 | { 10505569, 132917 }, | ||
221 | { 10671954, 133369 }, | ||
222 | { 10840345, 133820 }, | ||
223 | { 11010757, 134271 }, | ||
224 | { 11183206, 134721 }, | ||
225 | { 11357706, 135171 }, | ||
226 | { 11534274, 135621 }, | ||
227 | { 11712924, 136071 }, | ||
228 | { 11893673, 136520 }, | ||
229 | { 12076536, 136969 }, | ||
230 | { 12261527, 137418 }, | ||
231 | { 12448664, 137867 }, | ||
232 | { 12637961, 138315 }, | ||
233 | { 12829435, 138763 }, | ||
234 | { 13023101, 139211 }, | ||
235 | { 13218974, 139658 }, | ||
236 | { 13417071, 140106 }, | ||
237 | { 13617407, 140553 }, | ||
238 | { 13819999, 140999 }, | ||
239 | { 14024862, 141446 }, | ||
240 | { 14232012, 141892 }, | ||
241 | { 14441465, 142339 }, | ||
242 | { 14653238, 142785 }, | ||
243 | { 14867346, 143230 }, | ||
244 | { 15083805, 143676 }, | ||
245 | { 15302632, 144121 }, | ||
246 | { 15523842, 144566 }, | ||
247 | { 15747453, 145011 }, | ||
248 | { 15973479, 145456 }, | ||
249 | { 16201939, 145900 }, | ||
250 | { 16432847, 146345 }, | ||
251 | { 16666221, 146789 }, | ||
252 | { 16902076, 147233 }, | ||
253 | { 17140429, 147677 }, | ||
254 | { 17381297, 148121 }, | ||
255 | { 17624696, 148564 }, | ||
256 | { 17870643, 149007 }, | ||
257 | { 18119154, 149451 }, | ||
258 | { 18370247, 149894 }, | ||
259 | { 18623936, 150336 }, | ||
260 | { 18880241, 150779 }, | ||
261 | { 19139176, 151222 }, | ||
262 | { 19400759, 151664 }, | ||
263 | { 19665007, 152107 }, | ||
264 | { 19931936, 152549 }, | ||
265 | { 20201564, 152991 }, | ||
266 | { 20473907, 153433 }, | ||
267 | { 20748982, 153875 }, | ||
268 | { 21026807, 154316 }, | ||
269 | { 21307399, 154758 }, | ||
270 | { 21590773, 155199 }, | ||
271 | { 21876949, 155641 }, | ||
272 | { 22165941, 156082 }, | ||
273 | { 22457769, 156523 }, | ||
274 | { 22752449, 156964 }, | ||
275 | { 23049999, 157405 }, | ||
276 | { 23350435, 157846 }, | ||
277 | { 23653774, 158287 }, | ||
278 | { 23960036, 158727 }, | ||
279 | { 24269236, 159168 }, | ||
280 | { 24581392, 159608 }, | ||
281 | { 24896521, 160049 }, | ||
282 | { 25214642, 160489 }, | ||
283 | { 25535772, 160929 }, | ||
284 | { 25859927, 161370 }, | ||
285 | { 26187127, 161810 }, | ||
286 | { 26517388, 162250 }, | ||
287 | { 26850728, 162690 }, | ||
288 | { 27187165, 163130 }, | ||
289 | { 27526716, 163569 }, | ||
290 | { 27869400, 164009 }, | ||
291 | { 28215234, 164449 }, | ||
292 | { 28564236, 164889 }, | ||
293 | { 28916423, 165328 }, | ||
294 | { 29271815, 165768 }, | ||
295 | { 29630428, 166208 }, | ||
296 | { 29992281, 166647 }, | ||
297 | { 30357392, 167087 }, | ||
298 | { 30725779, 167526 }, | ||
299 | { 31097459, 167965 }, | ||
300 | { 31472452, 168405 }, | ||
301 | { 31850774, 168844 }, | ||
302 | { 32232445, 169283 }, | ||
303 | { 32617482, 169723 }, | ||
304 | { 33005904, 170162 }, | ||
305 | { 33397730, 170601 }, | ||
306 | { 33792976, 171041 }, | ||
307 | { 34191663, 171480 }, | ||
308 | { 34593807, 171919 }, | ||
309 | { 34999428, 172358 }, | ||
310 | { 35408544, 172797 }, | ||
311 | { 35821174, 173237 }, | ||
312 | { 36237335, 173676 }, | ||
313 | { 36657047, 174115 }, | ||
314 | { 37080329, 174554 }, | ||
315 | { 37507197, 174993 }, | ||
316 | { 37937673, 175433 }, | ||
317 | { 38371773, 175872 }, | ||
318 | { 38809517, 176311 }, | ||
319 | { 39250924, 176750 }, | ||
320 | { 39696012, 177190 }, | ||
321 | { 40144800, 177629 }, | ||
322 | { 40597308, 178068 }, | ||
323 | { 41053553, 178507 }, | ||
324 | { 41513554, 178947 }, | ||
325 | { 41977332, 179386 }, | ||
326 | { 42444904, 179825 }, | ||
327 | { 42916290, 180265 }, | ||
328 | { 43391509, 180704 }, | ||
329 | { 43870579, 181144 }, | ||
330 | { 44353520, 181583 }, | ||
331 | { 44840352, 182023 }, | ||
332 | { 45331092, 182462 }, | ||
333 | { 45825761, 182902 }, | ||
334 | { 46324378, 183342 }, | ||
335 | { 46826961, 183781 }, | ||
336 | { 47333531, 184221 }, | ||
337 | { 47844106, 184661 }, | ||
338 | { 48358706, 185101 }, | ||
339 | { 48877350, 185541 }, | ||
340 | { 49400058, 185981 }, | ||
341 | { 49926849, 186421 }, | ||
342 | { 50457743, 186861 }, | ||
343 | { 50992759, 187301 }, | ||
344 | { 51531916, 187741 }, | ||
345 | { 52075235, 188181 }, | ||
346 | { 52622735, 188622 }, | ||
347 | { 53174435, 189062 }, | ||
348 | { 53730355, 189502 }, | ||
349 | { 54290515, 189943 }, | ||
350 | { 54854935, 190383 }, | ||
351 | { 55423634, 190824 }, | ||
352 | { 55996633, 191265 }, | ||
353 | { 56573950, 191706 }, | ||
354 | { 57155606, 192146 }, | ||
355 | { 57741621, 192587 }, | ||
356 | { 58332014, 193028 }, | ||
357 | { 58926806, 193470 }, | ||
358 | { 59526017, 193911 }, | ||
359 | { 60129666, 194352 }, | ||
360 | { 60737774, 194793 }, | ||
361 | { 61350361, 195235 }, | ||
362 | { 61967446, 195677 }, | ||
363 | { 62589050, 196118 }, | ||
364 | { 63215194, 196560 }, | ||
365 | { 63845897, 197002 }, | ||
366 | { 64481179, 197444 }, | ||
367 | { 65121061, 197886 }, | ||
368 | { 65765563, 198328 }, | ||
369 | { 66414705, 198770 }, | ||
370 | { 67068508, 199213 }, | ||
371 | { 67726992, 199655 }, | ||
372 | { 68390177, 200098 }, | ||
373 | { 69058085, 200540 }, | ||
374 | { 69730735, 200983 }, | ||
375 | { 70408147, 201426 }, | ||
376 | { 71090343, 201869 }, | ||
377 | { 71777343, 202312 }, | ||
378 | { 72469168, 202755 }, | ||
379 | { 73165837, 203199 }, | ||
380 | { 73867373, 203642 }, | ||
381 | { 74573795, 204086 }, | ||
382 | { 75285124, 204529 }, | ||
383 | { 76001380, 204973 }, | ||
384 | { 76722586, 205417 }, | ||
385 | { 77448761, 205861 }, | ||
386 | { 78179926, 206306 }, | ||
387 | { 78916102, 206750 }, | ||
388 | { 79657310, 207194 }, | ||
389 | { 80403571, 207639 }, | ||
390 | { 81154906, 208084 }, | ||
391 | { 81911335, 208529 }, | ||
392 | { 82672880, 208974 }, | ||
393 | { 83439562, 209419 }, | ||
394 | { 84211402, 209864 }, | ||
395 | { 84988421, 210309 }, | ||
396 | { 85770640, 210755 }, | ||
397 | { 86558080, 211201 }, | ||
398 | { 87350762, 211647 }, | ||
399 | { 88148708, 212093 }, | ||
400 | { 88951938, 212539 }, | ||
401 | { 89760475, 212985 }, | ||
402 | { 90574339, 213432 }, | ||
403 | { 91393551, 213878 }, | ||
404 | { 92218133, 214325 }, | ||
405 | { 93048107, 214772 }, | ||
406 | { 93883493, 215219 }, | ||
407 | { 94724314, 215666 }, | ||
408 | { 95570590, 216114 }, | ||
409 | { 96422343, 216561 }, | ||
410 | { 97279594, 217009 }, | ||
411 | { 98142366, 217457 }, | ||
412 | { 99010679, 217905 }, | ||
413 | { 99884556, 218353 }, | ||
414 | { 100764018, 218801 }, | ||
415 | { 101649086, 219250 }, | ||
416 | { 102539782, 219698 }, | ||
417 | { 103436128, 220147 }, | ||
418 | { 104338146, 220596 }, | ||
419 | { 105245857, 221046 }, | ||
420 | { 106159284, 221495 }, | ||
421 | { 107078448, 221945 }, | ||
422 | { 108003370, 222394 }, | ||
423 | { 108934074, 222844 }, | ||
424 | { 109870580, 223294 }, | ||
425 | { 110812910, 223745 }, | ||
426 | { 111761087, 224195 }, | ||
427 | { 112715133, 224646 }, | ||
428 | { 113675069, 225097 }, | ||
429 | { 114640918, 225548 }, | ||
430 | { 115612702, 225999 }, | ||
431 | { 116590442, 226450 }, | ||
432 | { 117574162, 226902 }, | ||
433 | { 118563882, 227353 }, | ||
434 | { 119559626, 227805 }, | ||
435 | { 120561415, 228258 }, | ||
436 | { 121569272, 228710 }, | ||
437 | { 122583219, 229162 }, | ||
438 | { 123603278, 229615 }, | ||
439 | { 124629471, 230068 }, | ||
440 | { 125661822, 230521 }, | ||
441 | { 126700352, 230974 }, | ||
442 | { 127745083, 231428 }, | ||
443 | { 128796039, 231882 }, | ||
444 | { 129853241, 232336 }, | ||
445 | { 130916713, 232790 }, | ||
446 | { 131986475, 233244 }, | ||
447 | { 133062553, 233699 }, | ||
448 | { 134144966, 234153 }, | ||
449 | { 135233739, 234608 }, | ||
450 | { 136328894, 235064 }, | ||
451 | { 137430453, 235519 }, | ||
452 | { 138538440, 235975 }, | ||
453 | { 139652876, 236430 }, | ||
454 | { 140773786, 236886 }, | ||
455 | { 141901190, 237343 }, | ||
456 | { 143035113, 237799 }, | ||
457 | { 144175576, 238256 }, | ||
458 | { 145322604, 238713 }, | ||
459 | { 146476218, 239170 }, | ||
460 | { 147636442, 239627 }, | ||
461 | { 148803298, 240085 }, | ||
462 | { 149976809, 240542 }, | ||
463 | { 151156999, 241000 }, | ||
464 | { 152343890, 241459 }, | ||
465 | { 153537506, 241917 }, | ||
466 | { 154737869, 242376 }, | ||
467 | { 155945002, 242835 }, | ||
468 | { 157158929, 243294 }, | ||
469 | { 158379673, 243753 }, | ||
470 | { 159607257, 244213 }, | ||
471 | { 160841704, 244673 }, | ||
472 | { 162083037, 245133 }, | ||
473 | { 163331279, 245593 }, | ||
474 | { 164586455, 246054 }, | ||
475 | { 165848586, 246514 }, | ||
476 | { 167117696, 246975 }, | ||
477 | { 168393810, 247437 }, | ||
478 | { 169676949, 247898 }, | ||
479 | { 170967138, 248360 }, | ||
480 | { 172264399, 248822 }, | ||
481 | { 173568757, 249284 }, | ||
482 | { 174880235, 249747 }, | ||
483 | { 176198856, 250209 }, | ||
484 | { 177524643, 250672 }, | ||
485 | { 178857621, 251136 }, | ||
486 | { 180197813, 251599 }, | ||
487 | { 181545242, 252063 }, | ||
488 | { 182899933, 252527 }, | ||
489 | { 184261908, 252991 }, | ||
490 | { 185631191, 253456 }, | ||
491 | { 187007807, 253920 }, | ||
492 | { 188391778, 254385 }, | ||
493 | { 189783129, 254851 }, | ||
494 | { 191181884, 255316 }, | ||
495 | { 192588065, 255782 }, | ||
496 | { 194001698, 256248 }, | ||
497 | { 195422805, 256714 }, | ||
498 | { 196851411, 257181 }, | ||
499 | { 198287540, 257648 }, | ||
500 | { 199731215, 258115 }, | ||
501 | { 201182461, 258582 }, | ||
502 | { 202641302, 259050 }, | ||
503 | { 204107760, 259518 }, | ||
504 | { 205581862, 259986 }, | ||
505 | { 207063630, 260454 }, | ||
506 | { 208553088, 260923 }, | ||
507 | { 210050262, 261392 }, | ||
508 | { 211555174, 261861 }, | ||
509 | { 213067849, 262331 }, | ||
510 | { 214588312, 262800 }, | ||
511 | { 216116586, 263270 }, | ||
512 | { 217652696, 263741 }, | ||
513 | { 219196666, 264211 }, | ||
514 | { 220748520, 264682 }, | ||
515 | { 222308282, 265153 }, | ||
516 | { 223875978, 265625 }, | ||
517 | { 225451630, 266097 }, | ||
518 | { 227035265, 266569 }, | ||
519 | { 228626905, 267041 }, | ||
520 | { 230226576, 267514 }, | ||
521 | { 231834302, 267986 }, | ||
522 | { 233450107, 268460 }, | ||
523 | { 235074016, 268933 }, | ||
524 | { 236706054, 269407 }, | ||
525 | { 238346244, 269881 }, | ||
526 | { 239994613, 270355 }, | ||
527 | { 241651183, 270830 }, | ||
528 | { 243315981, 271305 } | ||
529 | }; | ||
530 | |||
531 | /* Calculate the send rate as per section 3.1 of RFC3448 | ||
532 | |||
533 | Returns send rate in bytes per second | ||
534 | |||
535 | Integer maths and lookups are used as not allowed floating point in kernel | ||
536 | |||
537 | The function for Xcalc as per section 3.1 of RFC3448 is: | ||
538 | |||
539 | X = s | ||
540 | ------------------------------------------------------------- | ||
541 | R*sqrt(2*b*p/3) + (t_RTO * (3*sqrt(3*b*p/8) * p * (1+32*p^2))) | ||
542 | |||
543 | where | ||
544 | X is the trasmit rate in bytes/second | ||
545 | s is the packet size in bytes | ||
546 | R is the round trip time in seconds | ||
547 | p is the loss event rate, between 0 and 1.0, of the number of loss events | ||
548 | as a fraction of the number of packets transmitted | ||
549 | t_RTO is the TCP retransmission timeout value in seconds | ||
550 | b is the number of packets acknowledged by a single TCP acknowledgement | ||
551 | |||
552 | we can assume that b = 1 and t_RTO is 4 * R. With this the equation becomes: | ||
553 | |||
554 | X = s | ||
555 | ----------------------------------------------------------------------- | ||
556 | R * sqrt(2 * p / 3) + (12 * R * (sqrt(3 * p / 8) * p * (1 + 32 * p^2))) | ||
557 | |||
558 | |||
559 | which we can break down into: | ||
560 | |||
561 | X = s | ||
562 | -------- | ||
563 | R * f(p) | ||
564 | |||
565 | where f(p) = sqrt(2 * p / 3) + (12 * sqrt(3 * p / 8) * p * (1 + 32 * p * p)) | ||
566 | |||
567 | Function parameters: | ||
568 | s - bytes | ||
569 | R - RTT in usecs | ||
570 | p - loss rate (decimal fraction multiplied by 1,000,000) | ||
571 | |||
572 | Returns Xcalc in bytes per second | ||
573 | |||
574 | DON'T alter this code unless you run test cases against it as the code | ||
575 | has been manipulated to stop underflow/overlow. | ||
576 | |||
577 | */ | ||
578 | u32 tfrc_calc_x(u16 s, u32 R, u32 p) | ||
579 | { | ||
580 | int index; | ||
581 | u32 f; | ||
582 | u64 tmp1, tmp2; | ||
583 | |||
584 | if (p < TFRC_CALC_X_SPLIT) | ||
585 | index = (p / (TFRC_CALC_X_SPLIT / TFRC_CALC_X_ARRSIZE)) - 1; | ||
586 | else | ||
587 | index = (p / (1000000 / TFRC_CALC_X_ARRSIZE)) - 1; | ||
588 | |||
589 | if (index < 0) | ||
590 | /* p should be 0 unless there is a bug in my code */ | ||
591 | index = 0; | ||
592 | |||
593 | if (R == 0) | ||
594 | R = 1; /* RTT can't be zero or else divide by zero */ | ||
595 | |||
596 | BUG_ON(index >= TFRC_CALC_X_ARRSIZE); | ||
597 | |||
598 | if (p >= TFRC_CALC_X_SPLIT) | ||
599 | f = tfrc_calc_x_lookup[index][0]; | ||
600 | else | ||
601 | f = tfrc_calc_x_lookup[index][1]; | ||
602 | |||
603 | tmp1 = ((u64)s * 100000000); | ||
604 | tmp2 = ((u64)R * (u64)f); | ||
605 | do_div(tmp2, 10000); | ||
606 | do_div(tmp1, tmp2); | ||
607 | /* Don't alter above math unless you test due to overflow on 32 bit */ | ||
608 | |||
609 | return (u32)tmp1; | ||
610 | } | ||
611 | |||
612 | EXPORT_SYMBOL_GPL(tfrc_calc_x); | ||
613 | |||
614 | /* | ||
615 | * args: fvalue - function value to match | ||
616 | * returns: p closest to that value | ||
617 | * | ||
618 | * both fvalue and p are multiplied by 1,000,000 to use ints | ||
619 | */ | ||
620 | u32 tfrc_calc_x_reverse_lookup(u32 fvalue) | ||
621 | { | ||
622 | int ctr = 0; | ||
623 | int small; | ||
624 | |||
625 | if (fvalue < tfrc_calc_x_lookup[0][1]) | ||
626 | return 0; | ||
627 | |||
628 | if (fvalue <= tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE - 1][1]) | ||
629 | small = 1; | ||
630 | else if (fvalue > tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE - 1][0]) | ||
631 | return 1000000; | ||
632 | else | ||
633 | small = 0; | ||
634 | |||
635 | while (fvalue > tfrc_calc_x_lookup[ctr][small]) | ||
636 | ctr++; | ||
637 | |||
638 | if (small) | ||
639 | return TFRC_CALC_X_SPLIT * ctr / TFRC_CALC_X_ARRSIZE; | ||
640 | else | ||
641 | return 1000000 * ctr / TFRC_CALC_X_ARRSIZE; | ||
642 | } | ||
643 | |||
644 | EXPORT_SYMBOL_GPL(tfrc_calc_x_reverse_lookup); | ||
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h new file mode 100644 index 000000000000..33456c0d5937 --- /dev/null +++ b/net/dccp/dccp.h | |||
@@ -0,0 +1,493 @@ | |||
1 | #ifndef _DCCP_H | ||
2 | #define _DCCP_H | ||
3 | /* | ||
4 | * net/dccp/dccp.h | ||
5 | * | ||
6 | * An implementation of the DCCP protocol | ||
7 | * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
8 | * Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz> | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify it | ||
11 | * under the terms of the GNU General Public License version 2 as | ||
12 | * published by the Free Software Foundation. | ||
13 | */ | ||
14 | |||
15 | #include <linux/config.h> | ||
16 | #include <linux/dccp.h> | ||
17 | #include <net/snmp.h> | ||
18 | #include <net/sock.h> | ||
19 | #include <net/tcp.h> | ||
20 | |||
21 | #ifdef CONFIG_IP_DCCP_DEBUG | ||
22 | extern int dccp_debug; | ||
23 | |||
24 | #define dccp_pr_debug(format, a...) \ | ||
25 | do { if (dccp_debug) \ | ||
26 | printk(KERN_DEBUG "%s: " format, __FUNCTION__ , ##a); \ | ||
27 | } while (0) | ||
28 | #define dccp_pr_debug_cat(format, a...) do { if (dccp_debug) \ | ||
29 | printk(format, ##a); } while (0) | ||
30 | #else | ||
31 | #define dccp_pr_debug(format, a...) | ||
32 | #define dccp_pr_debug_cat(format, a...) | ||
33 | #endif | ||
34 | |||
35 | extern struct inet_hashinfo dccp_hashinfo; | ||
36 | |||
37 | extern atomic_t dccp_orphan_count; | ||
38 | extern int dccp_tw_count; | ||
39 | extern void dccp_tw_deschedule(struct inet_timewait_sock *tw); | ||
40 | |||
41 | extern void dccp_time_wait(struct sock *sk, int state, int timeo); | ||
42 | |||
43 | /* FIXME: Right size this */ | ||
44 | #define DCCP_MAX_OPT_LEN 128 | ||
45 | |||
46 | #define DCCP_MAX_PACKET_HDR 32 | ||
47 | |||
48 | #define MAX_DCCP_HEADER (DCCP_MAX_PACKET_HDR + DCCP_MAX_OPT_LEN + MAX_HEADER) | ||
49 | |||
50 | #define DCCP_TIMEWAIT_LEN (60 * HZ) /* how long to wait to destroy TIME-WAIT | ||
51 | * state, about 60 seconds */ | ||
52 | |||
53 | /* draft-ietf-dccp-spec-11.txt initial RTO value */ | ||
54 | #define DCCP_TIMEOUT_INIT ((unsigned)(3 * HZ)) | ||
55 | |||
56 | /* Maximal interval between probes for local resources. */ | ||
57 | #define DCCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ / 2U)) | ||
58 | |||
59 | #define DCCP_RTO_MAX ((unsigned)(120 * HZ)) /* FIXME: using TCP value */ | ||
60 | |||
61 | extern struct proto dccp_v4_prot; | ||
62 | |||
63 | /* is seq1 < seq2 ? */ | ||
64 | static inline int before48(const u64 seq1, const u64 seq2) | ||
65 | { | ||
66 | return (s64)((seq1 << 16) - (seq2 << 16)) < 0; | ||
67 | } | ||
68 | |||
69 | /* is seq1 > seq2 ? */ | ||
70 | static inline int after48(const u64 seq1, const u64 seq2) | ||
71 | { | ||
72 | return (s64)((seq2 << 16) - (seq1 << 16)) < 0; | ||
73 | } | ||
74 | |||
75 | /* is seq2 <= seq1 <= seq3 ? */ | ||
76 | static inline int between48(const u64 seq1, const u64 seq2, const u64 seq3) | ||
77 | { | ||
78 | return (seq3 << 16) - (seq2 << 16) >= (seq1 << 16) - (seq2 << 16); | ||
79 | } | ||
80 | |||
81 | static inline u64 max48(const u64 seq1, const u64 seq2) | ||
82 | { | ||
83 | return after48(seq1, seq2) ? seq1 : seq2; | ||
84 | } | ||
85 | |||
86 | enum { | ||
87 | DCCP_MIB_NUM = 0, | ||
88 | DCCP_MIB_ACTIVEOPENS, /* ActiveOpens */ | ||
89 | DCCP_MIB_ESTABRESETS, /* EstabResets */ | ||
90 | DCCP_MIB_CURRESTAB, /* CurrEstab */ | ||
91 | DCCP_MIB_OUTSEGS, /* OutSegs */ | ||
92 | DCCP_MIB_OUTRSTS, | ||
93 | DCCP_MIB_ABORTONTIMEOUT, | ||
94 | DCCP_MIB_TIMEOUTS, | ||
95 | DCCP_MIB_ABORTFAILED, | ||
96 | DCCP_MIB_PASSIVEOPENS, | ||
97 | DCCP_MIB_ATTEMPTFAILS, | ||
98 | DCCP_MIB_OUTDATAGRAMS, | ||
99 | DCCP_MIB_INERRS, | ||
100 | DCCP_MIB_OPTMANDATORYERROR, | ||
101 | DCCP_MIB_INVALIDOPT, | ||
102 | __DCCP_MIB_MAX | ||
103 | }; | ||
104 | |||
105 | #define DCCP_MIB_MAX __DCCP_MIB_MAX | ||
106 | struct dccp_mib { | ||
107 | unsigned long mibs[DCCP_MIB_MAX]; | ||
108 | } __SNMP_MIB_ALIGN__; | ||
109 | |||
110 | DECLARE_SNMP_STAT(struct dccp_mib, dccp_statistics); | ||
111 | #define DCCP_INC_STATS(field) SNMP_INC_STATS(dccp_statistics, field) | ||
112 | #define DCCP_INC_STATS_BH(field) SNMP_INC_STATS_BH(dccp_statistics, field) | ||
113 | #define DCCP_INC_STATS_USER(field) SNMP_INC_STATS_USER(dccp_statistics, field) | ||
114 | #define DCCP_DEC_STATS(field) SNMP_DEC_STATS(dccp_statistics, field) | ||
115 | #define DCCP_ADD_STATS_BH(field, val) \ | ||
116 | SNMP_ADD_STATS_BH(dccp_statistics, field, val) | ||
117 | #define DCCP_ADD_STATS_USER(field, val) \ | ||
118 | SNMP_ADD_STATS_USER(dccp_statistics, field, val) | ||
119 | |||
120 | extern int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb); | ||
121 | extern int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb); | ||
122 | |||
123 | extern int dccp_send_response(struct sock *sk); | ||
124 | extern void dccp_send_ack(struct sock *sk); | ||
125 | extern void dccp_send_delayed_ack(struct sock *sk); | ||
126 | extern void dccp_send_sync(struct sock *sk, const u64 seq, | ||
127 | const enum dccp_pkt_type pkt_type); | ||
128 | |||
129 | extern int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo); | ||
130 | extern void dccp_write_space(struct sock *sk); | ||
131 | |||
132 | extern void dccp_init_xmit_timers(struct sock *sk); | ||
133 | static inline void dccp_clear_xmit_timers(struct sock *sk) | ||
134 | { | ||
135 | inet_csk_clear_xmit_timers(sk); | ||
136 | } | ||
137 | |||
138 | extern unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu); | ||
139 | |||
140 | extern const char *dccp_packet_name(const int type); | ||
141 | extern const char *dccp_state_name(const int state); | ||
142 | |||
143 | static inline void dccp_set_state(struct sock *sk, const int state) | ||
144 | { | ||
145 | const int oldstate = sk->sk_state; | ||
146 | |||
147 | dccp_pr_debug("%s(%p) %-10.10s -> %s\n", | ||
148 | dccp_role(sk), sk, | ||
149 | dccp_state_name(oldstate), dccp_state_name(state)); | ||
150 | WARN_ON(state == oldstate); | ||
151 | |||
152 | switch (state) { | ||
153 | case DCCP_OPEN: | ||
154 | if (oldstate != DCCP_OPEN) | ||
155 | DCCP_INC_STATS(DCCP_MIB_CURRESTAB); | ||
156 | break; | ||
157 | |||
158 | case DCCP_CLOSED: | ||
159 | if (oldstate == DCCP_CLOSING || oldstate == DCCP_OPEN) | ||
160 | DCCP_INC_STATS(DCCP_MIB_ESTABRESETS); | ||
161 | |||
162 | sk->sk_prot->unhash(sk); | ||
163 | if (inet_csk(sk)->icsk_bind_hash != NULL && | ||
164 | !(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) | ||
165 | inet_put_port(&dccp_hashinfo, sk); | ||
166 | /* fall through */ | ||
167 | default: | ||
168 | if (oldstate == DCCP_OPEN) | ||
169 | DCCP_DEC_STATS(DCCP_MIB_CURRESTAB); | ||
170 | } | ||
171 | |||
172 | /* Change state AFTER socket is unhashed to avoid closed | ||
173 | * socket sitting in hash tables. | ||
174 | */ | ||
175 | sk->sk_state = state; | ||
176 | } | ||
177 | |||
178 | static inline void dccp_done(struct sock *sk) | ||
179 | { | ||
180 | dccp_set_state(sk, DCCP_CLOSED); | ||
181 | dccp_clear_xmit_timers(sk); | ||
182 | |||
183 | sk->sk_shutdown = SHUTDOWN_MASK; | ||
184 | |||
185 | if (!sock_flag(sk, SOCK_DEAD)) | ||
186 | sk->sk_state_change(sk); | ||
187 | else | ||
188 | inet_csk_destroy_sock(sk); | ||
189 | } | ||
190 | |||
191 | static inline void dccp_openreq_init(struct request_sock *req, | ||
192 | struct dccp_sock *dp, | ||
193 | struct sk_buff *skb) | ||
194 | { | ||
195 | /* | ||
196 | * FIXME: fill in the other req fields from the DCCP options | ||
197 | * received | ||
198 | */ | ||
199 | inet_rsk(req)->rmt_port = dccp_hdr(skb)->dccph_sport; | ||
200 | inet_rsk(req)->acked = 0; | ||
201 | req->rcv_wnd = 0; | ||
202 | } | ||
203 | |||
204 | extern int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb); | ||
205 | |||
206 | extern struct sock *dccp_create_openreq_child(struct sock *sk, | ||
207 | const struct request_sock *req, | ||
208 | const struct sk_buff *skb); | ||
209 | |||
210 | extern int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb); | ||
211 | |||
212 | extern void dccp_v4_err(struct sk_buff *skb, u32); | ||
213 | |||
214 | extern int dccp_v4_rcv(struct sk_buff *skb); | ||
215 | |||
216 | extern struct sock *dccp_v4_request_recv_sock(struct sock *sk, | ||
217 | struct sk_buff *skb, | ||
218 | struct request_sock *req, | ||
219 | struct dst_entry *dst); | ||
220 | extern struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, | ||
221 | struct request_sock *req, | ||
222 | struct request_sock **prev); | ||
223 | |||
224 | extern int dccp_child_process(struct sock *parent, struct sock *child, | ||
225 | struct sk_buff *skb); | ||
226 | extern int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | ||
227 | struct dccp_hdr *dh, unsigned len); | ||
228 | extern int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, | ||
229 | const struct dccp_hdr *dh, const unsigned len); | ||
230 | |||
231 | extern void dccp_close(struct sock *sk, long timeout); | ||
232 | extern struct sk_buff *dccp_make_response(struct sock *sk, | ||
233 | struct dst_entry *dst, | ||
234 | struct request_sock *req); | ||
235 | extern struct sk_buff *dccp_make_reset(struct sock *sk, | ||
236 | struct dst_entry *dst, | ||
237 | enum dccp_reset_codes code); | ||
238 | |||
239 | extern int dccp_connect(struct sock *sk); | ||
240 | extern int dccp_disconnect(struct sock *sk, int flags); | ||
241 | extern int dccp_getsockopt(struct sock *sk, int level, int optname, | ||
242 | char __user *optval, int __user *optlen); | ||
243 | extern int dccp_setsockopt(struct sock *sk, int level, int optname, | ||
244 | char __user *optval, int optlen); | ||
245 | extern int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg); | ||
246 | extern int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, | ||
247 | struct msghdr *msg, size_t size); | ||
248 | extern int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, | ||
249 | struct msghdr *msg, size_t len, int nonblock, | ||
250 | int flags, int *addr_len); | ||
251 | extern void dccp_shutdown(struct sock *sk, int how); | ||
252 | |||
253 | extern int dccp_v4_checksum(const struct sk_buff *skb, | ||
254 | const u32 saddr, const u32 daddr); | ||
255 | |||
256 | extern int dccp_v4_send_reset(struct sock *sk, | ||
257 | enum dccp_reset_codes code); | ||
258 | extern void dccp_send_close(struct sock *sk, const int active); | ||
259 | |||
260 | struct dccp_skb_cb { | ||
261 | __u8 dccpd_type; | ||
262 | __u8 dccpd_reset_code; | ||
263 | __u8 dccpd_service; | ||
264 | __u8 dccpd_ccval; | ||
265 | __u64 dccpd_seq; | ||
266 | __u64 dccpd_ack_seq; | ||
267 | int dccpd_opt_len; | ||
268 | }; | ||
269 | |||
270 | #define DCCP_SKB_CB(__skb) ((struct dccp_skb_cb *)&((__skb)->cb[0])) | ||
271 | |||
272 | static inline int dccp_non_data_packet(const struct sk_buff *skb) | ||
273 | { | ||
274 | const __u8 type = DCCP_SKB_CB(skb)->dccpd_type; | ||
275 | |||
276 | return type == DCCP_PKT_ACK || | ||
277 | type == DCCP_PKT_CLOSE || | ||
278 | type == DCCP_PKT_CLOSEREQ || | ||
279 | type == DCCP_PKT_RESET || | ||
280 | type == DCCP_PKT_SYNC || | ||
281 | type == DCCP_PKT_SYNCACK; | ||
282 | } | ||
283 | |||
284 | static inline int dccp_packet_without_ack(const struct sk_buff *skb) | ||
285 | { | ||
286 | const __u8 type = DCCP_SKB_CB(skb)->dccpd_type; | ||
287 | |||
288 | return type == DCCP_PKT_DATA || type == DCCP_PKT_REQUEST; | ||
289 | } | ||
290 | |||
291 | #define DCCP_MAX_SEQNO ((((u64)1) << 48) - 1) | ||
292 | #define DCCP_PKT_WITHOUT_ACK_SEQ (DCCP_MAX_SEQNO << 2) | ||
293 | |||
294 | static inline void dccp_set_seqno(u64 *seqno, u64 value) | ||
295 | { | ||
296 | if (value > DCCP_MAX_SEQNO) | ||
297 | value -= DCCP_MAX_SEQNO + 1; | ||
298 | *seqno = value; | ||
299 | } | ||
300 | |||
301 | static inline u64 dccp_delta_seqno(u64 seqno1, u64 seqno2) | ||
302 | { | ||
303 | return ((seqno2 << 16) - (seqno1 << 16)) >> 16; | ||
304 | } | ||
305 | |||
306 | static inline void dccp_inc_seqno(u64 *seqno) | ||
307 | { | ||
308 | if (++*seqno > DCCP_MAX_SEQNO) | ||
309 | *seqno = 0; | ||
310 | } | ||
311 | |||
312 | static inline void dccp_hdr_set_seq(struct dccp_hdr *dh, const u64 gss) | ||
313 | { | ||
314 | struct dccp_hdr_ext *dhx = (struct dccp_hdr_ext *)((void *)dh + | ||
315 | sizeof(*dh)); | ||
316 | |||
317 | #if defined(__LITTLE_ENDIAN_BITFIELD) | ||
318 | dh->dccph_seq = htonl((gss >> 32)) >> 8; | ||
319 | #elif defined(__BIG_ENDIAN_BITFIELD) | ||
320 | dh->dccph_seq = htonl((gss >> 32)); | ||
321 | #else | ||
322 | #error "Adjust your <asm/byteorder.h> defines" | ||
323 | #endif | ||
324 | dhx->dccph_seq_low = htonl(gss & 0xffffffff); | ||
325 | } | ||
326 | |||
327 | static inline void dccp_hdr_set_ack(struct dccp_hdr_ack_bits *dhack, | ||
328 | const u64 gsr) | ||
329 | { | ||
330 | #if defined(__LITTLE_ENDIAN_BITFIELD) | ||
331 | dhack->dccph_ack_nr_high = htonl((gsr >> 32)) >> 8; | ||
332 | #elif defined(__BIG_ENDIAN_BITFIELD) | ||
333 | dhack->dccph_ack_nr_high = htonl((gsr >> 32)); | ||
334 | #else | ||
335 | #error "Adjust your <asm/byteorder.h> defines" | ||
336 | #endif | ||
337 | dhack->dccph_ack_nr_low = htonl(gsr & 0xffffffff); | ||
338 | } | ||
339 | |||
340 | static inline void dccp_update_gsr(struct sock *sk, u64 seq) | ||
341 | { | ||
342 | struct dccp_sock *dp = dccp_sk(sk); | ||
343 | |||
344 | dp->dccps_gsr = seq; | ||
345 | dccp_set_seqno(&dp->dccps_swl, | ||
346 | (dp->dccps_gsr + 1 - | ||
347 | (dp->dccps_options.dccpo_sequence_window / 4))); | ||
348 | dccp_set_seqno(&dp->dccps_swh, | ||
349 | (dp->dccps_gsr + | ||
350 | (3 * dp->dccps_options.dccpo_sequence_window) / 4)); | ||
351 | } | ||
352 | |||
353 | static inline void dccp_update_gss(struct sock *sk, u64 seq) | ||
354 | { | ||
355 | struct dccp_sock *dp = dccp_sk(sk); | ||
356 | |||
357 | dp->dccps_awh = dp->dccps_gss = seq; | ||
358 | dccp_set_seqno(&dp->dccps_awl, | ||
359 | (dp->dccps_gss - | ||
360 | dp->dccps_options.dccpo_sequence_window + 1)); | ||
361 | } | ||
362 | |||
363 | extern void dccp_insert_options(struct sock *sk, struct sk_buff *skb); | ||
364 | extern void dccp_insert_option_elapsed_time(struct sock *sk, | ||
365 | struct sk_buff *skb, | ||
366 | u32 elapsed_time); | ||
367 | extern void dccp_insert_option_timestamp(struct sock *sk, | ||
368 | struct sk_buff *skb); | ||
369 | extern void dccp_insert_option(struct sock *sk, struct sk_buff *skb, | ||
370 | unsigned char option, | ||
371 | const void *value, unsigned char len); | ||
372 | |||
373 | extern struct socket *dccp_ctl_socket; | ||
374 | |||
375 | #define DCCP_ACKPKTS_STATE_RECEIVED 0 | ||
376 | #define DCCP_ACKPKTS_STATE_ECN_MARKED (1 << 6) | ||
377 | #define DCCP_ACKPKTS_STATE_NOT_RECEIVED (3 << 6) | ||
378 | |||
379 | #define DCCP_ACKPKTS_STATE_MASK 0xC0 /* 11000000 */ | ||
380 | #define DCCP_ACKPKTS_LEN_MASK 0x3F /* 00111111 */ | ||
381 | |||
382 | /** struct dccp_ackpkts - acknowledgeable packets | ||
383 | * | ||
384 | * This data structure is the one defined in the DCCP draft | ||
385 | * Appendix A. | ||
386 | * | ||
387 | * @dccpap_buf_head - circular buffer head | ||
388 | * @dccpap_buf_tail - circular buffer tail | ||
389 | * @dccpap_buf_ackno - ack # of the most recent packet acknowledgeable in the | ||
390 | * buffer (i.e. %dccpap_buf_head) | ||
391 | * @dccpap_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked | ||
392 | * by the buffer with State 0 | ||
393 | * | ||
394 | * Additionally, the HC-Receiver must keep some information about the | ||
395 | * Ack Vectors it has recently sent. For each packet sent carrying an | ||
396 | * Ack Vector, it remembers four variables: | ||
397 | * | ||
398 | * @dccpap_ack_seqno - the Sequence Number used for the packet | ||
399 | * (HC-Receiver seqno) | ||
400 | * @dccpap_ack_ptr - the value of buf_head at the time of acknowledgement. | ||
401 | * @dccpap_ack_ackno - the Acknowledgement Number used for the packet | ||
402 | * (HC-Sender seqno) | ||
403 | * @dccpap_ack_nonce - the one-bit sum of the ECN Nonces for all State 0. | ||
404 | * | ||
405 | * @dccpap_buf_len - circular buffer length | ||
406 | * @dccpap_time - the time in usecs | ||
407 | * @dccpap_buf - circular buffer of acknowledgeable packets | ||
408 | */ | ||
409 | struct dccp_ackpkts { | ||
410 | unsigned int dccpap_buf_head; | ||
411 | unsigned int dccpap_buf_tail; | ||
412 | u64 dccpap_buf_ackno; | ||
413 | u64 dccpap_ack_seqno; | ||
414 | u64 dccpap_ack_ackno; | ||
415 | unsigned int dccpap_ack_ptr; | ||
416 | unsigned int dccpap_buf_vector_len; | ||
417 | unsigned int dccpap_ack_vector_len; | ||
418 | unsigned int dccpap_buf_len; | ||
419 | struct timeval dccpap_time; | ||
420 | u8 dccpap_buf_nonce; | ||
421 | u8 dccpap_ack_nonce; | ||
422 | u8 dccpap_buf[0]; | ||
423 | }; | ||
424 | |||
425 | extern struct dccp_ackpkts * | ||
426 | dccp_ackpkts_alloc(unsigned int len, | ||
427 | const unsigned int __nocast priority); | ||
428 | extern void dccp_ackpkts_free(struct dccp_ackpkts *ap); | ||
429 | extern int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state); | ||
430 | extern void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap, | ||
431 | struct sock *sk, u64 ackno); | ||
432 | |||
433 | static inline suseconds_t timeval_usecs(const struct timeval *tv) | ||
434 | { | ||
435 | return tv->tv_sec * USEC_PER_SEC + tv->tv_usec; | ||
436 | } | ||
437 | |||
438 | static inline suseconds_t timeval_delta(const struct timeval *large, | ||
439 | const struct timeval *small) | ||
440 | { | ||
441 | time_t secs = large->tv_sec - small->tv_sec; | ||
442 | suseconds_t usecs = large->tv_usec - small->tv_usec; | ||
443 | |||
444 | if (usecs < 0) { | ||
445 | secs--; | ||
446 | usecs += USEC_PER_SEC; | ||
447 | } | ||
448 | return secs * USEC_PER_SEC + usecs; | ||
449 | } | ||
450 | |||
451 | static inline void timeval_add_usecs(struct timeval *tv, | ||
452 | const suseconds_t usecs) | ||
453 | { | ||
454 | tv->tv_usec += usecs; | ||
455 | while (tv->tv_usec >= USEC_PER_SEC) { | ||
456 | tv->tv_sec++; | ||
457 | tv->tv_usec -= USEC_PER_SEC; | ||
458 | } | ||
459 | } | ||
460 | |||
461 | static inline void timeval_sub_usecs(struct timeval *tv, | ||
462 | const suseconds_t usecs) | ||
463 | { | ||
464 | tv->tv_usec -= usecs; | ||
465 | while (tv->tv_usec < 0) { | ||
466 | tv->tv_sec--; | ||
467 | tv->tv_usec += USEC_PER_SEC; | ||
468 | } | ||
469 | } | ||
470 | |||
471 | /* | ||
472 | * Returns the difference in usecs between timeval | ||
473 | * passed in and current time | ||
474 | */ | ||
475 | static inline suseconds_t timeval_now_delta(const struct timeval *tv) | ||
476 | { | ||
477 | struct timeval now; | ||
478 | do_gettimeofday(&now); | ||
479 | return timeval_delta(&now, tv); | ||
480 | } | ||
481 | |||
482 | #ifdef CONFIG_IP_DCCP_DEBUG | ||
483 | extern void dccp_ackvector_print(const u64 ackno, | ||
484 | const unsigned char *vector, int len); | ||
485 | extern void dccp_ackpkts_print(const struct dccp_ackpkts *ap); | ||
486 | #else | ||
487 | static inline void dccp_ackvector_print(const u64 ackno, | ||
488 | const unsigned char *vector, | ||
489 | int len) { } | ||
490 | static inline void dccp_ackpkts_print(const struct dccp_ackpkts *ap) { } | ||
491 | #endif | ||
492 | |||
493 | #endif /* _DCCP_H */ | ||
diff --git a/net/dccp/diag.c b/net/dccp/diag.c new file mode 100644 index 000000000000..f675d8e642d3 --- /dev/null +++ b/net/dccp/diag.c | |||
@@ -0,0 +1,71 @@ | |||
1 | /* | ||
2 | * net/dccp/diag.c | ||
3 | * | ||
4 | * An implementation of the DCCP protocol | ||
5 | * Arnaldo Carvalho de Melo <acme@mandriva.com> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify it | ||
8 | * under the terms of the GNU General Public License version 2 as | ||
9 | * published by the Free Software Foundation. | ||
10 | */ | ||
11 | |||
12 | #include <linux/config.h> | ||
13 | |||
14 | #include <linux/module.h> | ||
15 | #include <linux/inet_diag.h> | ||
16 | |||
17 | #include "ccid.h" | ||
18 | #include "dccp.h" | ||
19 | |||
20 | static void dccp_get_info(struct sock *sk, struct tcp_info *info) | ||
21 | { | ||
22 | struct dccp_sock *dp = dccp_sk(sk); | ||
23 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
24 | |||
25 | memset(info, 0, sizeof(*info)); | ||
26 | |||
27 | info->tcpi_state = sk->sk_state; | ||
28 | info->tcpi_retransmits = icsk->icsk_retransmits; | ||
29 | info->tcpi_probes = icsk->icsk_probes_out; | ||
30 | info->tcpi_backoff = icsk->icsk_backoff; | ||
31 | info->tcpi_pmtu = dp->dccps_pmtu_cookie; | ||
32 | |||
33 | if (dp->dccps_options.dccpo_send_ack_vector) | ||
34 | info->tcpi_options |= TCPI_OPT_SACK; | ||
35 | |||
36 | ccid_hc_rx_get_info(dp->dccps_hc_rx_ccid, sk, info); | ||
37 | ccid_hc_tx_get_info(dp->dccps_hc_tx_ccid, sk, info); | ||
38 | } | ||
39 | |||
40 | static void dccp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, | ||
41 | void *_info) | ||
42 | { | ||
43 | r->idiag_rqueue = r->idiag_wqueue = 0; | ||
44 | |||
45 | if (_info != NULL) | ||
46 | dccp_get_info(sk, _info); | ||
47 | } | ||
48 | |||
49 | static struct inet_diag_handler dccp_diag_handler = { | ||
50 | .idiag_hashinfo = &dccp_hashinfo, | ||
51 | .idiag_get_info = dccp_diag_get_info, | ||
52 | .idiag_type = DCCPDIAG_GETSOCK, | ||
53 | .idiag_info_size = sizeof(struct tcp_info), | ||
54 | }; | ||
55 | |||
56 | static int __init dccp_diag_init(void) | ||
57 | { | ||
58 | return inet_diag_register(&dccp_diag_handler); | ||
59 | } | ||
60 | |||
61 | static void __exit dccp_diag_fini(void) | ||
62 | { | ||
63 | inet_diag_unregister(&dccp_diag_handler); | ||
64 | } | ||
65 | |||
66 | module_init(dccp_diag_init); | ||
67 | module_exit(dccp_diag_fini); | ||
68 | |||
69 | MODULE_LICENSE("GPL"); | ||
70 | MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@mandriva.com>"); | ||
71 | MODULE_DESCRIPTION("DCCP inet_diag handler"); | ||
diff --git a/net/dccp/input.c b/net/dccp/input.c new file mode 100644 index 000000000000..ef29cef1dafe --- /dev/null +++ b/net/dccp/input.c | |||
@@ -0,0 +1,600 @@ | |||
1 | /* | ||
2 | * net/dccp/input.c | ||
3 | * | ||
4 | * An implementation of the DCCP protocol | ||
5 | * Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License | ||
9 | * as published by the Free Software Foundation; either version | ||
10 | * 2 of the License, or (at your option) any later version. | ||
11 | */ | ||
12 | |||
13 | #include <linux/config.h> | ||
14 | #include <linux/dccp.h> | ||
15 | #include <linux/skbuff.h> | ||
16 | |||
17 | #include <net/sock.h> | ||
18 | |||
19 | #include "ccid.h" | ||
20 | #include "dccp.h" | ||
21 | |||
22 | static void dccp_fin(struct sock *sk, struct sk_buff *skb) | ||
23 | { | ||
24 | sk->sk_shutdown |= RCV_SHUTDOWN; | ||
25 | sock_set_flag(sk, SOCK_DONE); | ||
26 | __skb_pull(skb, dccp_hdr(skb)->dccph_doff * 4); | ||
27 | __skb_queue_tail(&sk->sk_receive_queue, skb); | ||
28 | skb_set_owner_r(skb, sk); | ||
29 | sk->sk_data_ready(sk, 0); | ||
30 | } | ||
31 | |||
32 | static void dccp_rcv_close(struct sock *sk, struct sk_buff *skb) | ||
33 | { | ||
34 | dccp_v4_send_reset(sk, DCCP_RESET_CODE_CLOSED); | ||
35 | dccp_fin(sk, skb); | ||
36 | dccp_set_state(sk, DCCP_CLOSED); | ||
37 | sk_wake_async(sk, 1, POLL_HUP); | ||
38 | } | ||
39 | |||
40 | static void dccp_rcv_closereq(struct sock *sk, struct sk_buff *skb) | ||
41 | { | ||
42 | /* | ||
43 | * Step 7: Check for unexpected packet types | ||
44 | * If (S.is_server and P.type == CloseReq) | ||
45 | * Send Sync packet acknowledging P.seqno | ||
46 | * Drop packet and return | ||
47 | */ | ||
48 | if (dccp_sk(sk)->dccps_role != DCCP_ROLE_CLIENT) { | ||
49 | dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_PKT_SYNC); | ||
50 | return; | ||
51 | } | ||
52 | |||
53 | dccp_set_state(sk, DCCP_CLOSING); | ||
54 | dccp_send_close(sk, 0); | ||
55 | } | ||
56 | |||
57 | static inline void dccp_event_ack_recv(struct sock *sk, struct sk_buff *skb) | ||
58 | { | ||
59 | struct dccp_sock *dp = dccp_sk(sk); | ||
60 | |||
61 | if (dp->dccps_options.dccpo_send_ack_vector) | ||
62 | dccp_ackpkts_check_rcv_ackno(dp->dccps_hc_rx_ackpkts, sk, | ||
63 | DCCP_SKB_CB(skb)->dccpd_ack_seq); | ||
64 | } | ||
65 | |||
66 | static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) | ||
67 | { | ||
68 | const struct dccp_hdr *dh = dccp_hdr(skb); | ||
69 | struct dccp_sock *dp = dccp_sk(sk); | ||
70 | u64 lswl, lawl; | ||
71 | |||
72 | /* | ||
73 | * Step 5: Prepare sequence numbers for Sync | ||
74 | * If P.type == Sync or P.type == SyncAck, | ||
75 | * If S.AWL <= P.ackno <= S.AWH and P.seqno >= S.SWL, | ||
76 | * / * P is valid, so update sequence number variables | ||
77 | * accordingly. After this update, P will pass the tests | ||
78 | * in Step 6. A SyncAck is generated if necessary in | ||
79 | * Step 15 * / | ||
80 | * Update S.GSR, S.SWL, S.SWH | ||
81 | * Otherwise, | ||
82 | * Drop packet and return | ||
83 | */ | ||
84 | if (dh->dccph_type == DCCP_PKT_SYNC || | ||
85 | dh->dccph_type == DCCP_PKT_SYNCACK) { | ||
86 | if (between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, | ||
87 | dp->dccps_awl, dp->dccps_awh) && | ||
88 | !before48(DCCP_SKB_CB(skb)->dccpd_seq, dp->dccps_swl)) | ||
89 | dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq); | ||
90 | else | ||
91 | return -1; | ||
92 | } | ||
93 | |||
94 | /* | ||
95 | * Step 6: Check sequence numbers | ||
96 | * Let LSWL = S.SWL and LAWL = S.AWL | ||
97 | * If P.type == CloseReq or P.type == Close or P.type == Reset, | ||
98 | * LSWL := S.GSR + 1, LAWL := S.GAR | ||
99 | * If LSWL <= P.seqno <= S.SWH | ||
100 | * and (P.ackno does not exist or LAWL <= P.ackno <= S.AWH), | ||
101 | * Update S.GSR, S.SWL, S.SWH | ||
102 | * If P.type != Sync, | ||
103 | * Update S.GAR | ||
104 | * Otherwise, | ||
105 | * Send Sync packet acknowledging P.seqno | ||
106 | * Drop packet and return | ||
107 | */ | ||
108 | lswl = dp->dccps_swl; | ||
109 | lawl = dp->dccps_awl; | ||
110 | |||
111 | if (dh->dccph_type == DCCP_PKT_CLOSEREQ || | ||
112 | dh->dccph_type == DCCP_PKT_CLOSE || | ||
113 | dh->dccph_type == DCCP_PKT_RESET) { | ||
114 | lswl = dp->dccps_gsr; | ||
115 | dccp_inc_seqno(&lswl); | ||
116 | lawl = dp->dccps_gar; | ||
117 | } | ||
118 | |||
119 | if (between48(DCCP_SKB_CB(skb)->dccpd_seq, lswl, dp->dccps_swh) && | ||
120 | (DCCP_SKB_CB(skb)->dccpd_ack_seq == DCCP_PKT_WITHOUT_ACK_SEQ || | ||
121 | between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, | ||
122 | lawl, dp->dccps_awh))) { | ||
123 | dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq); | ||
124 | |||
125 | if (dh->dccph_type != DCCP_PKT_SYNC && | ||
126 | (DCCP_SKB_CB(skb)->dccpd_ack_seq != | ||
127 | DCCP_PKT_WITHOUT_ACK_SEQ)) | ||
128 | dp->dccps_gar = DCCP_SKB_CB(skb)->dccpd_ack_seq; | ||
129 | } else { | ||
130 | LIMIT_NETDEBUG(KERN_WARNING "DCCP: Step 6 failed for %s packet, " | ||
131 | "(LSWL(%llu) <= P.seqno(%llu) <= S.SWH(%llu)) and " | ||
132 | "(P.ackno %s or LAWL(%llu) <= P.ackno(%llu) <= S.AWH(%llu), " | ||
133 | "sending SYNC...\n", | ||
134 | dccp_packet_name(dh->dccph_type), | ||
135 | (unsigned long long) lswl, | ||
136 | (unsigned long long) | ||
137 | DCCP_SKB_CB(skb)->dccpd_seq, | ||
138 | (unsigned long long) dp->dccps_swh, | ||
139 | (DCCP_SKB_CB(skb)->dccpd_ack_seq == | ||
140 | DCCP_PKT_WITHOUT_ACK_SEQ) ? "doesn't exist" : "exists", | ||
141 | (unsigned long long) lawl, | ||
142 | (unsigned long long) | ||
143 | DCCP_SKB_CB(skb)->dccpd_ack_seq, | ||
144 | (unsigned long long) dp->dccps_awh); | ||
145 | dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_PKT_SYNC); | ||
146 | return -1; | ||
147 | } | ||
148 | |||
149 | return 0; | ||
150 | } | ||
151 | |||
152 | int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, | ||
153 | const struct dccp_hdr *dh, const unsigned len) | ||
154 | { | ||
155 | struct dccp_sock *dp = dccp_sk(sk); | ||
156 | |||
157 | if (dccp_check_seqno(sk, skb)) | ||
158 | goto discard; | ||
159 | |||
160 | if (dccp_parse_options(sk, skb)) | ||
161 | goto discard; | ||
162 | |||
163 | if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) | ||
164 | dccp_event_ack_recv(sk, skb); | ||
165 | |||
166 | /* | ||
167 | * FIXME: check ECN to see if we should use | ||
168 | * DCCP_ACKPKTS_STATE_ECN_MARKED | ||
169 | */ | ||
170 | if (dp->dccps_options.dccpo_send_ack_vector) { | ||
171 | struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; | ||
172 | |||
173 | if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, | ||
174 | DCCP_SKB_CB(skb)->dccpd_seq, | ||
175 | DCCP_ACKPKTS_STATE_RECEIVED)) { | ||
176 | LIMIT_NETDEBUG(KERN_WARNING "DCCP: acknowledgeable " | ||
177 | "packets buffer full!\n"); | ||
178 | ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; | ||
179 | inet_csk_schedule_ack(sk); | ||
180 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, | ||
181 | TCP_DELACK_MIN, | ||
182 | DCCP_RTO_MAX); | ||
183 | goto discard; | ||
184 | } | ||
185 | |||
186 | /* | ||
187 | * FIXME: this activation is probably wrong, have to study more | ||
188 | * TCP delack machinery and how it fits into DCCP draft, but | ||
189 | * for now it kinda "works" 8) | ||
190 | */ | ||
191 | if (!inet_csk_ack_scheduled(sk)) { | ||
192 | inet_csk_schedule_ack(sk); | ||
193 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, 5 * HZ, | ||
194 | DCCP_RTO_MAX); | ||
195 | } | ||
196 | } | ||
197 | |||
198 | ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); | ||
199 | ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); | ||
200 | |||
201 | switch (dccp_hdr(skb)->dccph_type) { | ||
202 | case DCCP_PKT_DATAACK: | ||
203 | case DCCP_PKT_DATA: | ||
204 | /* | ||
205 | * FIXME: check if sk_receive_queue is full, schedule DATA_DROPPED | ||
206 | * option if it is. | ||
207 | */ | ||
208 | __skb_pull(skb, dh->dccph_doff * 4); | ||
209 | __skb_queue_tail(&sk->sk_receive_queue, skb); | ||
210 | skb_set_owner_r(skb, sk); | ||
211 | sk->sk_data_ready(sk, 0); | ||
212 | return 0; | ||
213 | case DCCP_PKT_ACK: | ||
214 | goto discard; | ||
215 | case DCCP_PKT_RESET: | ||
216 | /* | ||
217 | * Step 9: Process Reset | ||
218 | * If P.type == Reset, | ||
219 | * Tear down connection | ||
220 | * S.state := TIMEWAIT | ||
221 | * Set TIMEWAIT timer | ||
222 | * Drop packet and return | ||
223 | */ | ||
224 | dccp_fin(sk, skb); | ||
225 | dccp_time_wait(sk, DCCP_TIME_WAIT, 0); | ||
226 | return 0; | ||
227 | case DCCP_PKT_CLOSEREQ: | ||
228 | dccp_rcv_closereq(sk, skb); | ||
229 | goto discard; | ||
230 | case DCCP_PKT_CLOSE: | ||
231 | dccp_rcv_close(sk, skb); | ||
232 | return 0; | ||
233 | case DCCP_PKT_REQUEST: | ||
234 | /* Step 7 | ||
235 | * or (S.is_server and P.type == Response) | ||
236 | * or (S.is_client and P.type == Request) | ||
237 | * or (S.state >= OPEN and P.type == Request | ||
238 | * and P.seqno >= S.OSR) | ||
239 | * or (S.state >= OPEN and P.type == Response | ||
240 | * and P.seqno >= S.OSR) | ||
241 | * or (S.state == RESPOND and P.type == Data), | ||
242 | * Send Sync packet acknowledging P.seqno | ||
243 | * Drop packet and return | ||
244 | */ | ||
245 | if (dp->dccps_role != DCCP_ROLE_LISTEN) | ||
246 | goto send_sync; | ||
247 | goto check_seq; | ||
248 | case DCCP_PKT_RESPONSE: | ||
249 | if (dp->dccps_role != DCCP_ROLE_CLIENT) | ||
250 | goto send_sync; | ||
251 | check_seq: | ||
252 | if (!before48(DCCP_SKB_CB(skb)->dccpd_seq, dp->dccps_osr)) { | ||
253 | send_sync: | ||
254 | dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, | ||
255 | DCCP_PKT_SYNC); | ||
256 | } | ||
257 | break; | ||
258 | case DCCP_PKT_SYNC: | ||
259 | dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, | ||
260 | DCCP_PKT_SYNCACK); | ||
261 | /* | ||
262 | * From the draft: | ||
263 | * | ||
264 | * As with DCCP-Ack packets, DCCP-Sync and DCCP-SyncAck packets | ||
265 | * MAY have non-zero-length application data areas, whose | ||
266 | * contents * receivers MUST ignore. | ||
267 | */ | ||
268 | goto discard; | ||
269 | } | ||
270 | |||
271 | DCCP_INC_STATS_BH(DCCP_MIB_INERRS); | ||
272 | discard: | ||
273 | __kfree_skb(skb); | ||
274 | return 0; | ||
275 | } | ||
276 | |||
277 | static int dccp_rcv_request_sent_state_process(struct sock *sk, | ||
278 | struct sk_buff *skb, | ||
279 | const struct dccp_hdr *dh, | ||
280 | const unsigned len) | ||
281 | { | ||
282 | /* | ||
283 | * Step 4: Prepare sequence numbers in REQUEST | ||
284 | * If S.state == REQUEST, | ||
285 | * If (P.type == Response or P.type == Reset) | ||
286 | * and S.AWL <= P.ackno <= S.AWH, | ||
287 | * / * Set sequence number variables corresponding to the | ||
288 | * other endpoint, so P will pass the tests in Step 6 * / | ||
289 | * Set S.GSR, S.ISR, S.SWL, S.SWH | ||
290 | * / * Response processing continues in Step 10; Reset | ||
291 | * processing continues in Step 9 * / | ||
292 | */ | ||
293 | if (dh->dccph_type == DCCP_PKT_RESPONSE) { | ||
294 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
295 | struct dccp_sock *dp = dccp_sk(sk); | ||
296 | |||
297 | /* Stop the REQUEST timer */ | ||
298 | inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); | ||
299 | BUG_TRAP(sk->sk_send_head != NULL); | ||
300 | __kfree_skb(sk->sk_send_head); | ||
301 | sk->sk_send_head = NULL; | ||
302 | |||
303 | if (!between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, | ||
304 | dp->dccps_awl, dp->dccps_awh)) { | ||
305 | dccp_pr_debug("invalid ackno: S.AWL=%llu, " | ||
306 | "P.ackno=%llu, S.AWH=%llu \n", | ||
307 | (unsigned long long)dp->dccps_awl, | ||
308 | (unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq, | ||
309 | (unsigned long long)dp->dccps_awh); | ||
310 | goto out_invalid_packet; | ||
311 | } | ||
312 | |||
313 | dp->dccps_isr = DCCP_SKB_CB(skb)->dccpd_seq; | ||
314 | dccp_update_gsr(sk, dp->dccps_isr); | ||
315 | /* | ||
316 | * SWL and AWL are initially adjusted so that they are not less than | ||
317 | * the initial Sequence Numbers received and sent, respectively: | ||
318 | * SWL := max(GSR + 1 - floor(W/4), ISR), | ||
319 | * AWL := max(GSS - W' + 1, ISS). | ||
320 | * These adjustments MUST be applied only at the beginning of the | ||
321 | * connection. | ||
322 | * | ||
323 | * AWL was adjusted in dccp_v4_connect -acme | ||
324 | */ | ||
325 | dccp_set_seqno(&dp->dccps_swl, | ||
326 | max48(dp->dccps_swl, dp->dccps_isr)); | ||
327 | |||
328 | if (ccid_hc_rx_init(dp->dccps_hc_rx_ccid, sk) != 0 || | ||
329 | ccid_hc_tx_init(dp->dccps_hc_tx_ccid, sk) != 0) { | ||
330 | ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk); | ||
331 | ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk); | ||
332 | /* FIXME: send appropriate RESET code */ | ||
333 | goto out_invalid_packet; | ||
334 | } | ||
335 | |||
336 | dccp_sync_mss(sk, dp->dccps_pmtu_cookie); | ||
337 | |||
338 | /* | ||
339 | * Step 10: Process REQUEST state (second part) | ||
340 | * If S.state == REQUEST, | ||
341 | * / * If we get here, P is a valid Response from the | ||
342 | * server (see Step 4), and we should move to | ||
343 | * PARTOPEN state. PARTOPEN means send an Ack, | ||
344 | * don't send Data packets, retransmit Acks | ||
345 | * periodically, and always include any Init Cookie | ||
346 | * from the Response * / | ||
347 | * S.state := PARTOPEN | ||
348 | * Set PARTOPEN timer | ||
349 | * Continue with S.state == PARTOPEN | ||
350 | * / * Step 12 will send the Ack completing the | ||
351 | * three-way handshake * / | ||
352 | */ | ||
353 | dccp_set_state(sk, DCCP_PARTOPEN); | ||
354 | |||
355 | /* Make sure socket is routed, for correct metrics. */ | ||
356 | inet_sk_rebuild_header(sk); | ||
357 | |||
358 | if (!sock_flag(sk, SOCK_DEAD)) { | ||
359 | sk->sk_state_change(sk); | ||
360 | sk_wake_async(sk, 0, POLL_OUT); | ||
361 | } | ||
362 | |||
363 | if (sk->sk_write_pending || icsk->icsk_ack.pingpong || | ||
364 | icsk->icsk_accept_queue.rskq_defer_accept) { | ||
365 | /* Save one ACK. Data will be ready after | ||
366 | * several ticks, if write_pending is set. | ||
367 | * | ||
368 | * It may be deleted, but with this feature tcpdumps | ||
369 | * look so _wonderfully_ clever, that I was not able | ||
370 | * to stand against the temptation 8) --ANK | ||
371 | */ | ||
372 | /* | ||
373 | * OK, in DCCP we can as well do a similar trick, its | ||
374 | * even in the draft, but there is no need for us to | ||
375 | * schedule an ack here, as dccp_sendmsg does this for | ||
376 | * us, also stated in the draft. -acme | ||
377 | */ | ||
378 | __kfree_skb(skb); | ||
379 | return 0; | ||
380 | } | ||
381 | dccp_send_ack(sk); | ||
382 | return -1; | ||
383 | } | ||
384 | |||
385 | out_invalid_packet: | ||
386 | return 1; /* dccp_v4_do_rcv will send a reset, but... | ||
387 | FIXME: the reset code should be | ||
388 | DCCP_RESET_CODE_PACKET_ERROR */ | ||
389 | } | ||
390 | |||
391 | static int dccp_rcv_respond_partopen_state_process(struct sock *sk, | ||
392 | struct sk_buff *skb, | ||
393 | const struct dccp_hdr *dh, | ||
394 | const unsigned len) | ||
395 | { | ||
396 | int queued = 0; | ||
397 | |||
398 | switch (dh->dccph_type) { | ||
399 | case DCCP_PKT_RESET: | ||
400 | inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); | ||
401 | break; | ||
402 | case DCCP_PKT_DATAACK: | ||
403 | case DCCP_PKT_ACK: | ||
404 | /* | ||
405 | * FIXME: we should be reseting the PARTOPEN (DELACK) timer | ||
406 | * here but only if we haven't used the DELACK timer for | ||
407 | * something else, like sending a delayed ack for a TIMESTAMP | ||
408 | * echo, etc, for now were not clearing it, sending an extra | ||
409 | * ACK when there is nothing else to do in DELACK is not a big | ||
410 | * deal after all. | ||
411 | */ | ||
412 | |||
413 | /* Stop the PARTOPEN timer */ | ||
414 | if (sk->sk_state == DCCP_PARTOPEN) | ||
415 | inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); | ||
416 | |||
417 | dccp_sk(sk)->dccps_osr = DCCP_SKB_CB(skb)->dccpd_seq; | ||
418 | dccp_set_state(sk, DCCP_OPEN); | ||
419 | |||
420 | if (dh->dccph_type == DCCP_PKT_DATAACK) { | ||
421 | dccp_rcv_established(sk, skb, dh, len); | ||
422 | queued = 1; /* packet was queued | ||
423 | (by dccp_rcv_established) */ | ||
424 | } | ||
425 | break; | ||
426 | } | ||
427 | |||
428 | return queued; | ||
429 | } | ||
430 | |||
431 | int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | ||
432 | struct dccp_hdr *dh, unsigned len) | ||
433 | { | ||
434 | struct dccp_sock *dp = dccp_sk(sk); | ||
435 | const int old_state = sk->sk_state; | ||
436 | int queued = 0; | ||
437 | |||
438 | /* | ||
439 | * Step 3: Process LISTEN state | ||
440 | * (Continuing from dccp_v4_do_rcv and dccp_v6_do_rcv) | ||
441 | * | ||
442 | * If S.state == LISTEN, | ||
443 | * If P.type == Request or P contains a valid Init Cookie | ||
444 | * option, | ||
445 | * * Must scan the packet's options to check for an Init | ||
446 | * Cookie. Only the Init Cookie is processed here, | ||
447 | * however; other options are processed in Step 8. This | ||
448 | * scan need only be performed if the endpoint uses Init | ||
449 | * Cookies * | ||
450 | * * Generate a new socket and switch to that socket * | ||
451 | * Set S := new socket for this port pair | ||
452 | * S.state = RESPOND | ||
453 | * Choose S.ISS (initial seqno) or set from Init Cookie | ||
454 | * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie | ||
455 | * Continue with S.state == RESPOND | ||
456 | * * A Response packet will be generated in Step 11 * | ||
457 | * Otherwise, | ||
458 | * Generate Reset(No Connection) unless P.type == Reset | ||
459 | * Drop packet and return | ||
460 | * | ||
461 | * NOTE: the check for the packet types is done in | ||
462 | * dccp_rcv_state_process | ||
463 | */ | ||
464 | if (sk->sk_state == DCCP_LISTEN) { | ||
465 | if (dh->dccph_type == DCCP_PKT_REQUEST) { | ||
466 | if (dccp_v4_conn_request(sk, skb) < 0) | ||
467 | return 1; | ||
468 | |||
469 | /* FIXME: do congestion control initialization */ | ||
470 | goto discard; | ||
471 | } | ||
472 | if (dh->dccph_type == DCCP_PKT_RESET) | ||
473 | goto discard; | ||
474 | |||
475 | /* Caller (dccp_v4_do_rcv) will send Reset(No Connection)*/ | ||
476 | return 1; | ||
477 | } | ||
478 | |||
479 | if (sk->sk_state != DCCP_REQUESTING) { | ||
480 | if (dccp_check_seqno(sk, skb)) | ||
481 | goto discard; | ||
482 | |||
483 | /* | ||
484 | * Step 8: Process options and mark acknowledgeable | ||
485 | */ | ||
486 | if (dccp_parse_options(sk, skb)) | ||
487 | goto discard; | ||
488 | |||
489 | if (DCCP_SKB_CB(skb)->dccpd_ack_seq != | ||
490 | DCCP_PKT_WITHOUT_ACK_SEQ) | ||
491 | dccp_event_ack_recv(sk, skb); | ||
492 | |||
493 | ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); | ||
494 | ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); | ||
495 | |||
496 | /* | ||
497 | * FIXME: check ECN to see if we should use | ||
498 | * DCCP_ACKPKTS_STATE_ECN_MARKED | ||
499 | */ | ||
500 | if (dp->dccps_options.dccpo_send_ack_vector) { | ||
501 | if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, | ||
502 | DCCP_SKB_CB(skb)->dccpd_seq, | ||
503 | DCCP_ACKPKTS_STATE_RECEIVED)) | ||
504 | goto discard; | ||
505 | /* | ||
506 | * FIXME: this activation is probably wrong, have to | ||
507 | * study more TCP delack machinery and how it fits into | ||
508 | * DCCP draft, but for now it kinda "works" 8) | ||
509 | */ | ||
510 | if ((dp->dccps_hc_rx_ackpkts->dccpap_ack_seqno == | ||
511 | DCCP_MAX_SEQNO + 1) && | ||
512 | !inet_csk_ack_scheduled(sk)) { | ||
513 | inet_csk_schedule_ack(sk); | ||
514 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, | ||
515 | TCP_DELACK_MIN, | ||
516 | DCCP_RTO_MAX); | ||
517 | } | ||
518 | } | ||
519 | } | ||
520 | |||
521 | /* | ||
522 | * Step 9: Process Reset | ||
523 | * If P.type == Reset, | ||
524 | * Tear down connection | ||
525 | * S.state := TIMEWAIT | ||
526 | * Set TIMEWAIT timer | ||
527 | * Drop packet and return | ||
528 | */ | ||
529 | if (dh->dccph_type == DCCP_PKT_RESET) { | ||
530 | /* | ||
531 | * Queue the equivalent of TCP fin so that dccp_recvmsg | ||
532 | * exits the loop | ||
533 | */ | ||
534 | dccp_fin(sk, skb); | ||
535 | dccp_time_wait(sk, DCCP_TIME_WAIT, 0); | ||
536 | return 0; | ||
537 | /* | ||
538 | * Step 7: Check for unexpected packet types | ||
539 | * If (S.is_server and P.type == CloseReq) | ||
540 | * or (S.is_server and P.type == Response) | ||
541 | * or (S.is_client and P.type == Request) | ||
542 | * or (S.state == RESPOND and P.type == Data), | ||
543 | * Send Sync packet acknowledging P.seqno | ||
544 | * Drop packet and return | ||
545 | */ | ||
546 | } else if ((dp->dccps_role != DCCP_ROLE_CLIENT && | ||
547 | (dh->dccph_type == DCCP_PKT_RESPONSE || | ||
548 | dh->dccph_type == DCCP_PKT_CLOSEREQ)) || | ||
549 | (dp->dccps_role == DCCP_ROLE_CLIENT && | ||
550 | dh->dccph_type == DCCP_PKT_REQUEST) || | ||
551 | (sk->sk_state == DCCP_RESPOND && | ||
552 | dh->dccph_type == DCCP_PKT_DATA)) { | ||
553 | dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, | ||
554 | DCCP_PKT_SYNC); | ||
555 | goto discard; | ||
556 | } else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) { | ||
557 | dccp_rcv_closereq(sk, skb); | ||
558 | goto discard; | ||
559 | } else if (dh->dccph_type == DCCP_PKT_CLOSE) { | ||
560 | dccp_rcv_close(sk, skb); | ||
561 | return 0; | ||
562 | } | ||
563 | |||
564 | switch (sk->sk_state) { | ||
565 | case DCCP_CLOSED: | ||
566 | return 1; | ||
567 | |||
568 | case DCCP_REQUESTING: | ||
569 | /* FIXME: do congestion control initialization */ | ||
570 | |||
571 | queued = dccp_rcv_request_sent_state_process(sk, skb, dh, len); | ||
572 | if (queued >= 0) | ||
573 | return queued; | ||
574 | |||
575 | __kfree_skb(skb); | ||
576 | return 0; | ||
577 | |||
578 | case DCCP_RESPOND: | ||
579 | case DCCP_PARTOPEN: | ||
580 | queued = dccp_rcv_respond_partopen_state_process(sk, skb, | ||
581 | dh, len); | ||
582 | break; | ||
583 | } | ||
584 | |||
585 | if (dh->dccph_type == DCCP_PKT_ACK || | ||
586 | dh->dccph_type == DCCP_PKT_DATAACK) { | ||
587 | switch (old_state) { | ||
588 | case DCCP_PARTOPEN: | ||
589 | sk->sk_state_change(sk); | ||
590 | sk_wake_async(sk, 0, POLL_OUT); | ||
591 | break; | ||
592 | } | ||
593 | } | ||
594 | |||
595 | if (!queued) { | ||
596 | discard: | ||
597 | __kfree_skb(skb); | ||
598 | } | ||
599 | return 0; | ||
600 | } | ||
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c new file mode 100644 index 000000000000..3fc75dbee4b8 --- /dev/null +++ b/net/dccp/ipv4.c | |||
@@ -0,0 +1,1356 @@ | |||
1 | /* | ||
2 | * net/dccp/ipv4.c | ||
3 | * | ||
4 | * An implementation of the DCCP protocol | ||
5 | * Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License | ||
9 | * as published by the Free Software Foundation; either version | ||
10 | * 2 of the License, or (at your option) any later version. | ||
11 | */ | ||
12 | |||
13 | #include <linux/config.h> | ||
14 | #include <linux/dccp.h> | ||
15 | #include <linux/icmp.h> | ||
16 | #include <linux/module.h> | ||
17 | #include <linux/skbuff.h> | ||
18 | #include <linux/random.h> | ||
19 | |||
20 | #include <net/icmp.h> | ||
21 | #include <net/inet_hashtables.h> | ||
22 | #include <net/sock.h> | ||
23 | #include <net/tcp_states.h> | ||
24 | #include <net/xfrm.h> | ||
25 | |||
26 | #include "ccid.h" | ||
27 | #include "dccp.h" | ||
28 | |||
29 | struct inet_hashinfo __cacheline_aligned dccp_hashinfo = { | ||
30 | .lhash_lock = RW_LOCK_UNLOCKED, | ||
31 | .lhash_users = ATOMIC_INIT(0), | ||
32 | .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait), | ||
33 | .portalloc_lock = SPIN_LOCK_UNLOCKED, | ||
34 | .port_rover = 1024 - 1, | ||
35 | }; | ||
36 | |||
37 | EXPORT_SYMBOL_GPL(dccp_hashinfo); | ||
38 | |||
39 | static int dccp_v4_get_port(struct sock *sk, const unsigned short snum) | ||
40 | { | ||
41 | return inet_csk_get_port(&dccp_hashinfo, sk, snum); | ||
42 | } | ||
43 | |||
44 | static void dccp_v4_hash(struct sock *sk) | ||
45 | { | ||
46 | inet_hash(&dccp_hashinfo, sk); | ||
47 | } | ||
48 | |||
49 | static void dccp_v4_unhash(struct sock *sk) | ||
50 | { | ||
51 | inet_unhash(&dccp_hashinfo, sk); | ||
52 | } | ||
53 | |||
54 | /* called with local bh disabled */ | ||
55 | static int __dccp_v4_check_established(struct sock *sk, const __u16 lport, | ||
56 | struct inet_timewait_sock **twp) | ||
57 | { | ||
58 | struct inet_sock *inet = inet_sk(sk); | ||
59 | const u32 daddr = inet->rcv_saddr; | ||
60 | const u32 saddr = inet->daddr; | ||
61 | const int dif = sk->sk_bound_dev_if; | ||
62 | INET_ADDR_COOKIE(acookie, saddr, daddr) | ||
63 | const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport); | ||
64 | const int hash = inet_ehashfn(daddr, lport, saddr, inet->dport, | ||
65 | dccp_hashinfo.ehash_size); | ||
66 | struct inet_ehash_bucket *head = &dccp_hashinfo.ehash[hash]; | ||
67 | const struct sock *sk2; | ||
68 | const struct hlist_node *node; | ||
69 | struct inet_timewait_sock *tw; | ||
70 | |||
71 | write_lock(&head->lock); | ||
72 | |||
73 | /* Check TIME-WAIT sockets first. */ | ||
74 | sk_for_each(sk2, node, &(head + dccp_hashinfo.ehash_size)->chain) { | ||
75 | tw = inet_twsk(sk2); | ||
76 | |||
77 | if (INET_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif)) | ||
78 | goto not_unique; | ||
79 | } | ||
80 | tw = NULL; | ||
81 | |||
82 | /* And established part... */ | ||
83 | sk_for_each(sk2, node, &head->chain) { | ||
84 | if (INET_MATCH(sk2, acookie, saddr, daddr, ports, dif)) | ||
85 | goto not_unique; | ||
86 | } | ||
87 | |||
88 | /* Must record num and sport now. Otherwise we will see | ||
89 | * in hash table socket with a funny identity. */ | ||
90 | inet->num = lport; | ||
91 | inet->sport = htons(lport); | ||
92 | sk->sk_hashent = hash; | ||
93 | BUG_TRAP(sk_unhashed(sk)); | ||
94 | __sk_add_node(sk, &head->chain); | ||
95 | sock_prot_inc_use(sk->sk_prot); | ||
96 | write_unlock(&head->lock); | ||
97 | |||
98 | if (twp != NULL) { | ||
99 | *twp = tw; | ||
100 | NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); | ||
101 | } else if (tw != NULL) { | ||
102 | /* Silly. Should hash-dance instead... */ | ||
103 | inet_twsk_deschedule(tw, &dccp_death_row); | ||
104 | NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); | ||
105 | |||
106 | inet_twsk_put(tw); | ||
107 | } | ||
108 | |||
109 | return 0; | ||
110 | |||
111 | not_unique: | ||
112 | write_unlock(&head->lock); | ||
113 | return -EADDRNOTAVAIL; | ||
114 | } | ||
115 | |||
116 | /* | ||
117 | * Bind a port for a connect operation and hash it. | ||
118 | */ | ||
119 | static int dccp_v4_hash_connect(struct sock *sk) | ||
120 | { | ||
121 | const unsigned short snum = inet_sk(sk)->num; | ||
122 | struct inet_bind_hashbucket *head; | ||
123 | struct inet_bind_bucket *tb; | ||
124 | int ret; | ||
125 | |||
126 | if (snum == 0) { | ||
127 | int rover; | ||
128 | int low = sysctl_local_port_range[0]; | ||
129 | int high = sysctl_local_port_range[1]; | ||
130 | int remaining = (high - low) + 1; | ||
131 | struct hlist_node *node; | ||
132 | struct inet_timewait_sock *tw = NULL; | ||
133 | |||
134 | local_bh_disable(); | ||
135 | |||
136 | /* TODO. Actually it is not so bad idea to remove | ||
137 | * dccp_hashinfo.portalloc_lock before next submission to | ||
138 | * Linus. | ||
139 | * As soon as we touch this place at all it is time to think. | ||
140 | * | ||
141 | * Now it protects single _advisory_ variable | ||
142 | * dccp_hashinfo.port_rover, hence it is mostly useless. | ||
143 | * Code will work nicely if we just delete it, but | ||
144 | * I am afraid in contented case it will work not better or | ||
145 | * even worse: another cpu just will hit the same bucket | ||
146 | * and spin there. | ||
147 | * So some cpu salt could remove both contention and | ||
148 | * memory pingpong. Any ideas how to do this in a nice way? | ||
149 | */ | ||
150 | spin_lock(&dccp_hashinfo.portalloc_lock); | ||
151 | rover = dccp_hashinfo.port_rover; | ||
152 | |||
153 | do { | ||
154 | rover++; | ||
155 | if ((rover < low) || (rover > high)) | ||
156 | rover = low; | ||
157 | head = &dccp_hashinfo.bhash[inet_bhashfn(rover, | ||
158 | dccp_hashinfo.bhash_size)]; | ||
159 | spin_lock(&head->lock); | ||
160 | |||
161 | /* Does not bother with rcv_saddr checks, | ||
162 | * because the established check is already | ||
163 | * unique enough. | ||
164 | */ | ||
165 | inet_bind_bucket_for_each(tb, node, &head->chain) { | ||
166 | if (tb->port == rover) { | ||
167 | BUG_TRAP(!hlist_empty(&tb->owners)); | ||
168 | if (tb->fastreuse >= 0) | ||
169 | goto next_port; | ||
170 | if (!__dccp_v4_check_established(sk, | ||
171 | rover, | ||
172 | &tw)) | ||
173 | goto ok; | ||
174 | goto next_port; | ||
175 | } | ||
176 | } | ||
177 | |||
178 | tb = inet_bind_bucket_create(dccp_hashinfo.bind_bucket_cachep, | ||
179 | head, rover); | ||
180 | if (tb == NULL) { | ||
181 | spin_unlock(&head->lock); | ||
182 | break; | ||
183 | } | ||
184 | tb->fastreuse = -1; | ||
185 | goto ok; | ||
186 | |||
187 | next_port: | ||
188 | spin_unlock(&head->lock); | ||
189 | } while (--remaining > 0); | ||
190 | dccp_hashinfo.port_rover = rover; | ||
191 | spin_unlock(&dccp_hashinfo.portalloc_lock); | ||
192 | |||
193 | local_bh_enable(); | ||
194 | |||
195 | return -EADDRNOTAVAIL; | ||
196 | |||
197 | ok: | ||
198 | /* All locks still held and bhs disabled */ | ||
199 | dccp_hashinfo.port_rover = rover; | ||
200 | spin_unlock(&dccp_hashinfo.portalloc_lock); | ||
201 | |||
202 | inet_bind_hash(sk, tb, rover); | ||
203 | if (sk_unhashed(sk)) { | ||
204 | inet_sk(sk)->sport = htons(rover); | ||
205 | __inet_hash(&dccp_hashinfo, sk, 0); | ||
206 | } | ||
207 | spin_unlock(&head->lock); | ||
208 | |||
209 | if (tw != NULL) { | ||
210 | inet_twsk_deschedule(tw, &dccp_death_row); | ||
211 | inet_twsk_put(tw); | ||
212 | } | ||
213 | |||
214 | ret = 0; | ||
215 | goto out; | ||
216 | } | ||
217 | |||
218 | head = &dccp_hashinfo.bhash[inet_bhashfn(snum, | ||
219 | dccp_hashinfo.bhash_size)]; | ||
220 | tb = inet_csk(sk)->icsk_bind_hash; | ||
221 | spin_lock_bh(&head->lock); | ||
222 | if (sk_head(&tb->owners) == sk && sk->sk_bind_node.next == NULL) { | ||
223 | __inet_hash(&dccp_hashinfo, sk, 0); | ||
224 | spin_unlock_bh(&head->lock); | ||
225 | return 0; | ||
226 | } else { | ||
227 | spin_unlock(&head->lock); | ||
228 | /* No definite answer... Walk to established hash table */ | ||
229 | ret = __dccp_v4_check_established(sk, snum, NULL); | ||
230 | out: | ||
231 | local_bh_enable(); | ||
232 | return ret; | ||
233 | } | ||
234 | } | ||
235 | |||
236 | static int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, | ||
237 | int addr_len) | ||
238 | { | ||
239 | struct inet_sock *inet = inet_sk(sk); | ||
240 | struct dccp_sock *dp = dccp_sk(sk); | ||
241 | const struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; | ||
242 | struct rtable *rt; | ||
243 | u32 daddr, nexthop; | ||
244 | int tmp; | ||
245 | int err; | ||
246 | |||
247 | dp->dccps_role = DCCP_ROLE_CLIENT; | ||
248 | |||
249 | if (addr_len < sizeof(struct sockaddr_in)) | ||
250 | return -EINVAL; | ||
251 | |||
252 | if (usin->sin_family != AF_INET) | ||
253 | return -EAFNOSUPPORT; | ||
254 | |||
255 | nexthop = daddr = usin->sin_addr.s_addr; | ||
256 | if (inet->opt != NULL && inet->opt->srr) { | ||
257 | if (daddr == 0) | ||
258 | return -EINVAL; | ||
259 | nexthop = inet->opt->faddr; | ||
260 | } | ||
261 | |||
262 | tmp = ip_route_connect(&rt, nexthop, inet->saddr, | ||
263 | RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, | ||
264 | IPPROTO_DCCP, | ||
265 | inet->sport, usin->sin_port, sk); | ||
266 | if (tmp < 0) | ||
267 | return tmp; | ||
268 | |||
269 | if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { | ||
270 | ip_rt_put(rt); | ||
271 | return -ENETUNREACH; | ||
272 | } | ||
273 | |||
274 | if (inet->opt == NULL || !inet->opt->srr) | ||
275 | daddr = rt->rt_dst; | ||
276 | |||
277 | if (inet->saddr == 0) | ||
278 | inet->saddr = rt->rt_src; | ||
279 | inet->rcv_saddr = inet->saddr; | ||
280 | |||
281 | inet->dport = usin->sin_port; | ||
282 | inet->daddr = daddr; | ||
283 | |||
284 | dp->dccps_ext_header_len = 0; | ||
285 | if (inet->opt != NULL) | ||
286 | dp->dccps_ext_header_len = inet->opt->optlen; | ||
287 | /* | ||
288 | * Socket identity is still unknown (sport may be zero). | ||
289 | * However we set state to DCCP_REQUESTING and not releasing socket | ||
290 | * lock select source port, enter ourselves into the hash tables and | ||
291 | * complete initialization after this. | ||
292 | */ | ||
293 | dccp_set_state(sk, DCCP_REQUESTING); | ||
294 | err = dccp_v4_hash_connect(sk); | ||
295 | if (err != 0) | ||
296 | goto failure; | ||
297 | |||
298 | err = ip_route_newports(&rt, inet->sport, inet->dport, sk); | ||
299 | if (err != 0) | ||
300 | goto failure; | ||
301 | |||
302 | /* OK, now commit destination to socket. */ | ||
303 | sk_setup_caps(sk, &rt->u.dst); | ||
304 | |||
305 | dp->dccps_gar = | ||
306 | dp->dccps_iss = secure_dccp_sequence_number(inet->saddr, | ||
307 | inet->daddr, | ||
308 | inet->sport, | ||
309 | usin->sin_port); | ||
310 | dccp_update_gss(sk, dp->dccps_iss); | ||
311 | |||
312 | /* | ||
313 | * SWL and AWL are initially adjusted so that they are not less than | ||
314 | * the initial Sequence Numbers received and sent, respectively: | ||
315 | * SWL := max(GSR + 1 - floor(W/4), ISR), | ||
316 | * AWL := max(GSS - W' + 1, ISS). | ||
317 | * These adjustments MUST be applied only at the beginning of the | ||
318 | * connection. | ||
319 | */ | ||
320 | dccp_set_seqno(&dp->dccps_awl, max48(dp->dccps_awl, dp->dccps_iss)); | ||
321 | |||
322 | inet->id = dp->dccps_iss ^ jiffies; | ||
323 | |||
324 | err = dccp_connect(sk); | ||
325 | rt = NULL; | ||
326 | if (err != 0) | ||
327 | goto failure; | ||
328 | out: | ||
329 | return err; | ||
330 | failure: | ||
331 | /* | ||
332 | * This unhashes the socket and releases the local port, if necessary. | ||
333 | */ | ||
334 | dccp_set_state(sk, DCCP_CLOSED); | ||
335 | ip_rt_put(rt); | ||
336 | sk->sk_route_caps = 0; | ||
337 | inet->dport = 0; | ||
338 | goto out; | ||
339 | } | ||
340 | |||
341 | /* | ||
342 | * This routine does path mtu discovery as defined in RFC1191. | ||
343 | */ | ||
344 | static inline void dccp_do_pmtu_discovery(struct sock *sk, | ||
345 | const struct iphdr *iph, | ||
346 | u32 mtu) | ||
347 | { | ||
348 | struct dst_entry *dst; | ||
349 | const struct inet_sock *inet = inet_sk(sk); | ||
350 | const struct dccp_sock *dp = dccp_sk(sk); | ||
351 | |||
352 | /* We are not interested in DCCP_LISTEN and request_socks (RESPONSEs | ||
353 | * send out by Linux are always < 576bytes so they should go through | ||
354 | * unfragmented). | ||
355 | */ | ||
356 | if (sk->sk_state == DCCP_LISTEN) | ||
357 | return; | ||
358 | |||
359 | /* We don't check in the destentry if pmtu discovery is forbidden | ||
360 | * on this route. We just assume that no packet_to_big packets | ||
361 | * are send back when pmtu discovery is not active. | ||
362 | * There is a small race when the user changes this flag in the | ||
363 | * route, but I think that's acceptable. | ||
364 | */ | ||
365 | if ((dst = __sk_dst_check(sk, 0)) == NULL) | ||
366 | return; | ||
367 | |||
368 | dst->ops->update_pmtu(dst, mtu); | ||
369 | |||
370 | /* Something is about to be wrong... Remember soft error | ||
371 | * for the case, if this connection will not able to recover. | ||
372 | */ | ||
373 | if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst)) | ||
374 | sk->sk_err_soft = EMSGSIZE; | ||
375 | |||
376 | mtu = dst_mtu(dst); | ||
377 | |||
378 | if (inet->pmtudisc != IP_PMTUDISC_DONT && | ||
379 | dp->dccps_pmtu_cookie > mtu) { | ||
380 | dccp_sync_mss(sk, mtu); | ||
381 | |||
382 | /* | ||
383 | * From: draft-ietf-dccp-spec-11.txt | ||
384 | * | ||
385 | * DCCP-Sync packets are the best choice for upward | ||
386 | * probing, since DCCP-Sync probes do not risk application | ||
387 | * data loss. | ||
388 | */ | ||
389 | dccp_send_sync(sk, dp->dccps_gsr, DCCP_PKT_SYNC); | ||
390 | } /* else let the usual retransmit timer handle it */ | ||
391 | } | ||
392 | |||
393 | static void dccp_v4_ctl_send_ack(struct sk_buff *rxskb) | ||
394 | { | ||
395 | int err; | ||
396 | struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh; | ||
397 | const int dccp_hdr_ack_len = sizeof(struct dccp_hdr) + | ||
398 | sizeof(struct dccp_hdr_ext) + | ||
399 | sizeof(struct dccp_hdr_ack_bits); | ||
400 | struct sk_buff *skb; | ||
401 | |||
402 | if (((struct rtable *)rxskb->dst)->rt_type != RTN_LOCAL) | ||
403 | return; | ||
404 | |||
405 | skb = alloc_skb(MAX_DCCP_HEADER + 15, GFP_ATOMIC); | ||
406 | if (skb == NULL) | ||
407 | return; | ||
408 | |||
409 | /* Reserve space for headers. */ | ||
410 | skb_reserve(skb, MAX_DCCP_HEADER); | ||
411 | |||
412 | skb->dst = dst_clone(rxskb->dst); | ||
413 | |||
414 | skb->h.raw = skb_push(skb, dccp_hdr_ack_len); | ||
415 | dh = dccp_hdr(skb); | ||
416 | memset(dh, 0, dccp_hdr_ack_len); | ||
417 | |||
418 | /* Build DCCP header and checksum it. */ | ||
419 | dh->dccph_type = DCCP_PKT_ACK; | ||
420 | dh->dccph_sport = rxdh->dccph_dport; | ||
421 | dh->dccph_dport = rxdh->dccph_sport; | ||
422 | dh->dccph_doff = dccp_hdr_ack_len / 4; | ||
423 | dh->dccph_x = 1; | ||
424 | |||
425 | dccp_hdr_set_seq(dh, DCCP_SKB_CB(rxskb)->dccpd_ack_seq); | ||
426 | dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), | ||
427 | DCCP_SKB_CB(rxskb)->dccpd_seq); | ||
428 | |||
429 | bh_lock_sock(dccp_ctl_socket->sk); | ||
430 | err = ip_build_and_send_pkt(skb, dccp_ctl_socket->sk, | ||
431 | rxskb->nh.iph->daddr, | ||
432 | rxskb->nh.iph->saddr, NULL); | ||
433 | bh_unlock_sock(dccp_ctl_socket->sk); | ||
434 | |||
435 | if (err == NET_XMIT_CN || err == 0) { | ||
436 | DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); | ||
437 | DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); | ||
438 | } | ||
439 | } | ||
440 | |||
441 | static void dccp_v4_reqsk_send_ack(struct sk_buff *skb, | ||
442 | struct request_sock *req) | ||
443 | { | ||
444 | dccp_v4_ctl_send_ack(skb); | ||
445 | } | ||
446 | |||
447 | static int dccp_v4_send_response(struct sock *sk, struct request_sock *req, | ||
448 | struct dst_entry *dst) | ||
449 | { | ||
450 | int err = -1; | ||
451 | struct sk_buff *skb; | ||
452 | |||
453 | /* First, grab a route. */ | ||
454 | |||
455 | if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL) | ||
456 | goto out; | ||
457 | |||
458 | skb = dccp_make_response(sk, dst, req); | ||
459 | if (skb != NULL) { | ||
460 | const struct inet_request_sock *ireq = inet_rsk(req); | ||
461 | |||
462 | err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr, | ||
463 | ireq->rmt_addr, | ||
464 | ireq->opt); | ||
465 | if (err == NET_XMIT_CN) | ||
466 | err = 0; | ||
467 | } | ||
468 | |||
469 | out: | ||
470 | dst_release(dst); | ||
471 | return err; | ||
472 | } | ||
473 | |||
474 | /* | ||
475 | * This routine is called by the ICMP module when it gets some sort of error | ||
476 | * condition. If err < 0 then the socket should be closed and the error | ||
477 | * returned to the user. If err > 0 it's just the icmp type << 8 | icmp code. | ||
478 | * After adjustment header points to the first 8 bytes of the tcp header. We | ||
479 | * need to find the appropriate port. | ||
480 | * | ||
481 | * The locking strategy used here is very "optimistic". When someone else | ||
482 | * accesses the socket the ICMP is just dropped and for some paths there is no | ||
483 | * check at all. A more general error queue to queue errors for later handling | ||
484 | * is probably better. | ||
485 | */ | ||
486 | void dccp_v4_err(struct sk_buff *skb, u32 info) | ||
487 | { | ||
488 | const struct iphdr *iph = (struct iphdr *)skb->data; | ||
489 | const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + | ||
490 | (iph->ihl << 2)); | ||
491 | struct dccp_sock *dp; | ||
492 | struct inet_sock *inet; | ||
493 | const int type = skb->h.icmph->type; | ||
494 | const int code = skb->h.icmph->code; | ||
495 | struct sock *sk; | ||
496 | __u64 seq; | ||
497 | int err; | ||
498 | |||
499 | if (skb->len < (iph->ihl << 2) + 8) { | ||
500 | ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); | ||
501 | return; | ||
502 | } | ||
503 | |||
504 | sk = inet_lookup(&dccp_hashinfo, iph->daddr, dh->dccph_dport, | ||
505 | iph->saddr, dh->dccph_sport, inet_iif(skb)); | ||
506 | if (sk == NULL) { | ||
507 | ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); | ||
508 | return; | ||
509 | } | ||
510 | |||
511 | if (sk->sk_state == DCCP_TIME_WAIT) { | ||
512 | inet_twsk_put((struct inet_timewait_sock *)sk); | ||
513 | return; | ||
514 | } | ||
515 | |||
516 | bh_lock_sock(sk); | ||
517 | /* If too many ICMPs get dropped on busy | ||
518 | * servers this needs to be solved differently. | ||
519 | */ | ||
520 | if (sock_owned_by_user(sk)) | ||
521 | NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS); | ||
522 | |||
523 | if (sk->sk_state == DCCP_CLOSED) | ||
524 | goto out; | ||
525 | |||
526 | dp = dccp_sk(sk); | ||
527 | seq = dccp_hdr_seq(skb); | ||
528 | if (sk->sk_state != DCCP_LISTEN && | ||
529 | !between48(seq, dp->dccps_swl, dp->dccps_swh)) { | ||
530 | NET_INC_STATS(LINUX_MIB_OUTOFWINDOWICMPS); | ||
531 | goto out; | ||
532 | } | ||
533 | |||
534 | switch (type) { | ||
535 | case ICMP_SOURCE_QUENCH: | ||
536 | /* Just silently ignore these. */ | ||
537 | goto out; | ||
538 | case ICMP_PARAMETERPROB: | ||
539 | err = EPROTO; | ||
540 | break; | ||
541 | case ICMP_DEST_UNREACH: | ||
542 | if (code > NR_ICMP_UNREACH) | ||
543 | goto out; | ||
544 | |||
545 | if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */ | ||
546 | if (!sock_owned_by_user(sk)) | ||
547 | dccp_do_pmtu_discovery(sk, iph, info); | ||
548 | goto out; | ||
549 | } | ||
550 | |||
551 | err = icmp_err_convert[code].errno; | ||
552 | break; | ||
553 | case ICMP_TIME_EXCEEDED: | ||
554 | err = EHOSTUNREACH; | ||
555 | break; | ||
556 | default: | ||
557 | goto out; | ||
558 | } | ||
559 | |||
560 | switch (sk->sk_state) { | ||
561 | struct request_sock *req , **prev; | ||
562 | case DCCP_LISTEN: | ||
563 | if (sock_owned_by_user(sk)) | ||
564 | goto out; | ||
565 | req = inet_csk_search_req(sk, &prev, dh->dccph_dport, | ||
566 | iph->daddr, iph->saddr); | ||
567 | if (!req) | ||
568 | goto out; | ||
569 | |||
570 | /* | ||
571 | * ICMPs are not backlogged, hence we cannot get an established | ||
572 | * socket here. | ||
573 | */ | ||
574 | BUG_TRAP(!req->sk); | ||
575 | |||
576 | if (seq != dccp_rsk(req)->dreq_iss) { | ||
577 | NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); | ||
578 | goto out; | ||
579 | } | ||
580 | /* | ||
581 | * Still in RESPOND, just remove it silently. | ||
582 | * There is no good way to pass the error to the newly | ||
583 | * created socket, and POSIX does not want network | ||
584 | * errors returned from accept(). | ||
585 | */ | ||
586 | inet_csk_reqsk_queue_drop(sk, req, prev); | ||
587 | goto out; | ||
588 | |||
589 | case DCCP_REQUESTING: | ||
590 | case DCCP_RESPOND: | ||
591 | if (!sock_owned_by_user(sk)) { | ||
592 | DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); | ||
593 | sk->sk_err = err; | ||
594 | |||
595 | sk->sk_error_report(sk); | ||
596 | |||
597 | dccp_done(sk); | ||
598 | } else | ||
599 | sk->sk_err_soft = err; | ||
600 | goto out; | ||
601 | } | ||
602 | |||
603 | /* If we've already connected we will keep trying | ||
604 | * until we time out, or the user gives up. | ||
605 | * | ||
606 | * rfc1122 4.2.3.9 allows to consider as hard errors | ||
607 | * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too, | ||
608 | * but it is obsoleted by pmtu discovery). | ||
609 | * | ||
610 | * Note, that in modern internet, where routing is unreliable | ||
611 | * and in each dark corner broken firewalls sit, sending random | ||
612 | * errors ordered by their masters even this two messages finally lose | ||
613 | * their original sense (even Linux sends invalid PORT_UNREACHs) | ||
614 | * | ||
615 | * Now we are in compliance with RFCs. | ||
616 | * --ANK (980905) | ||
617 | */ | ||
618 | |||
619 | inet = inet_sk(sk); | ||
620 | if (!sock_owned_by_user(sk) && inet->recverr) { | ||
621 | sk->sk_err = err; | ||
622 | sk->sk_error_report(sk); | ||
623 | } else /* Only an error on timeout */ | ||
624 | sk->sk_err_soft = err; | ||
625 | out: | ||
626 | bh_unlock_sock(sk); | ||
627 | sock_put(sk); | ||
628 | } | ||
629 | |||
630 | int dccp_v4_send_reset(struct sock *sk, enum dccp_reset_codes code) | ||
631 | { | ||
632 | struct sk_buff *skb; | ||
633 | /* | ||
634 | * FIXME: what if rebuild_header fails? | ||
635 | * Should we be doing a rebuild_header here? | ||
636 | */ | ||
637 | int err = inet_sk_rebuild_header(sk); | ||
638 | |||
639 | if (err != 0) | ||
640 | return err; | ||
641 | |||
642 | skb = dccp_make_reset(sk, sk->sk_dst_cache, code); | ||
643 | if (skb != NULL) { | ||
644 | const struct dccp_sock *dp = dccp_sk(sk); | ||
645 | const struct inet_sock *inet = inet_sk(sk); | ||
646 | |||
647 | err = ip_build_and_send_pkt(skb, sk, | ||
648 | inet->saddr, inet->daddr, NULL); | ||
649 | if (err == NET_XMIT_CN) | ||
650 | err = 0; | ||
651 | |||
652 | ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk); | ||
653 | ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk); | ||
654 | } | ||
655 | |||
656 | return err; | ||
657 | } | ||
658 | |||
659 | static inline u64 dccp_v4_init_sequence(const struct sock *sk, | ||
660 | const struct sk_buff *skb) | ||
661 | { | ||
662 | return secure_dccp_sequence_number(skb->nh.iph->daddr, | ||
663 | skb->nh.iph->saddr, | ||
664 | dccp_hdr(skb)->dccph_dport, | ||
665 | dccp_hdr(skb)->dccph_sport); | ||
666 | } | ||
667 | |||
668 | int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | ||
669 | { | ||
670 | struct inet_request_sock *ireq; | ||
671 | struct dccp_sock dp; | ||
672 | struct request_sock *req; | ||
673 | struct dccp_request_sock *dreq; | ||
674 | const __u32 saddr = skb->nh.iph->saddr; | ||
675 | const __u32 daddr = skb->nh.iph->daddr; | ||
676 | struct dst_entry *dst = NULL; | ||
677 | |||
678 | /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */ | ||
679 | if (((struct rtable *)skb->dst)->rt_flags & | ||
680 | (RTCF_BROADCAST | RTCF_MULTICAST)) | ||
681 | goto drop; | ||
682 | |||
683 | /* | ||
684 | * TW buckets are converted to open requests without | ||
685 | * limitations, they conserve resources and peer is | ||
686 | * evidently real one. | ||
687 | */ | ||
688 | if (inet_csk_reqsk_queue_is_full(sk)) | ||
689 | goto drop; | ||
690 | |||
691 | /* | ||
692 | * Accept backlog is full. If we have already queued enough | ||
693 | * of warm entries in syn queue, drop request. It is better than | ||
694 | * clogging syn queue with openreqs with exponentially increasing | ||
695 | * timeout. | ||
696 | */ | ||
697 | if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) | ||
698 | goto drop; | ||
699 | |||
700 | req = reqsk_alloc(sk->sk_prot->rsk_prot); | ||
701 | if (req == NULL) | ||
702 | goto drop; | ||
703 | |||
704 | /* FIXME: process options */ | ||
705 | |||
706 | dccp_openreq_init(req, &dp, skb); | ||
707 | |||
708 | ireq = inet_rsk(req); | ||
709 | ireq->loc_addr = daddr; | ||
710 | ireq->rmt_addr = saddr; | ||
711 | /* FIXME: Merge Aristeu's option parsing code when ready */ | ||
712 | req->rcv_wnd = 100; /* Fake, option parsing will get the | ||
713 | right value */ | ||
714 | ireq->opt = NULL; | ||
715 | |||
716 | /* | ||
717 | * Step 3: Process LISTEN state | ||
718 | * | ||
719 | * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie | ||
720 | * | ||
721 | * In fact we defer setting S.GSR, S.SWL, S.SWH to | ||
722 | * dccp_create_openreq_child. | ||
723 | */ | ||
724 | dreq = dccp_rsk(req); | ||
725 | dreq->dreq_isr = DCCP_SKB_CB(skb)->dccpd_seq; | ||
726 | dreq->dreq_iss = dccp_v4_init_sequence(sk, skb); | ||
727 | dreq->dreq_service = dccp_hdr_request(skb)->dccph_req_service; | ||
728 | |||
729 | if (dccp_v4_send_response(sk, req, dst)) | ||
730 | goto drop_and_free; | ||
731 | |||
732 | inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); | ||
733 | return 0; | ||
734 | |||
735 | drop_and_free: | ||
736 | /* | ||
737 | * FIXME: should be reqsk_free after implementing req->rsk_ops | ||
738 | */ | ||
739 | __reqsk_free(req); | ||
740 | drop: | ||
741 | DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); | ||
742 | return -1; | ||
743 | } | ||
744 | |||
745 | /* | ||
746 | * The three way handshake has completed - we got a valid ACK or DATAACK - | ||
747 | * now create the new socket. | ||
748 | * | ||
749 | * This is the equivalent of TCP's tcp_v4_syn_recv_sock | ||
750 | */ | ||
751 | struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb, | ||
752 | struct request_sock *req, | ||
753 | struct dst_entry *dst) | ||
754 | { | ||
755 | struct inet_request_sock *ireq; | ||
756 | struct inet_sock *newinet; | ||
757 | struct dccp_sock *newdp; | ||
758 | struct sock *newsk; | ||
759 | |||
760 | if (sk_acceptq_is_full(sk)) | ||
761 | goto exit_overflow; | ||
762 | |||
763 | if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL) | ||
764 | goto exit; | ||
765 | |||
766 | newsk = dccp_create_openreq_child(sk, req, skb); | ||
767 | if (newsk == NULL) | ||
768 | goto exit; | ||
769 | |||
770 | sk_setup_caps(newsk, dst); | ||
771 | |||
772 | newdp = dccp_sk(newsk); | ||
773 | newinet = inet_sk(newsk); | ||
774 | ireq = inet_rsk(req); | ||
775 | newinet->daddr = ireq->rmt_addr; | ||
776 | newinet->rcv_saddr = ireq->loc_addr; | ||
777 | newinet->saddr = ireq->loc_addr; | ||
778 | newinet->opt = ireq->opt; | ||
779 | ireq->opt = NULL; | ||
780 | newinet->mc_index = inet_iif(skb); | ||
781 | newinet->mc_ttl = skb->nh.iph->ttl; | ||
782 | newinet->id = jiffies; | ||
783 | |||
784 | dccp_sync_mss(newsk, dst_mtu(dst)); | ||
785 | |||
786 | __inet_hash(&dccp_hashinfo, newsk, 0); | ||
787 | __inet_inherit_port(&dccp_hashinfo, sk, newsk); | ||
788 | |||
789 | return newsk; | ||
790 | |||
791 | exit_overflow: | ||
792 | NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS); | ||
793 | exit: | ||
794 | NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS); | ||
795 | dst_release(dst); | ||
796 | return NULL; | ||
797 | } | ||
798 | |||
799 | static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) | ||
800 | { | ||
801 | const struct dccp_hdr *dh = dccp_hdr(skb); | ||
802 | const struct iphdr *iph = skb->nh.iph; | ||
803 | struct sock *nsk; | ||
804 | struct request_sock **prev; | ||
805 | /* Find possible connection requests. */ | ||
806 | struct request_sock *req = inet_csk_search_req(sk, &prev, | ||
807 | dh->dccph_sport, | ||
808 | iph->saddr, iph->daddr); | ||
809 | if (req != NULL) | ||
810 | return dccp_check_req(sk, skb, req, prev); | ||
811 | |||
812 | nsk = __inet_lookup_established(&dccp_hashinfo, | ||
813 | iph->saddr, dh->dccph_sport, | ||
814 | iph->daddr, ntohs(dh->dccph_dport), | ||
815 | inet_iif(skb)); | ||
816 | if (nsk != NULL) { | ||
817 | if (nsk->sk_state != DCCP_TIME_WAIT) { | ||
818 | bh_lock_sock(nsk); | ||
819 | return nsk; | ||
820 | } | ||
821 | inet_twsk_put((struct inet_timewait_sock *)nsk); | ||
822 | return NULL; | ||
823 | } | ||
824 | |||
825 | return sk; | ||
826 | } | ||
827 | |||
828 | int dccp_v4_checksum(const struct sk_buff *skb, const u32 saddr, | ||
829 | const u32 daddr) | ||
830 | { | ||
831 | const struct dccp_hdr* dh = dccp_hdr(skb); | ||
832 | int checksum_len; | ||
833 | u32 tmp; | ||
834 | |||
835 | if (dh->dccph_cscov == 0) | ||
836 | checksum_len = skb->len; | ||
837 | else { | ||
838 | checksum_len = (dh->dccph_cscov + dh->dccph_x) * sizeof(u32); | ||
839 | checksum_len = checksum_len < skb->len ? checksum_len : | ||
840 | skb->len; | ||
841 | } | ||
842 | |||
843 | tmp = csum_partial((unsigned char *)dh, checksum_len, 0); | ||
844 | return csum_tcpudp_magic(saddr, daddr, checksum_len, | ||
845 | IPPROTO_DCCP, tmp); | ||
846 | } | ||
847 | |||
848 | static int dccp_v4_verify_checksum(struct sk_buff *skb, | ||
849 | const u32 saddr, const u32 daddr) | ||
850 | { | ||
851 | struct dccp_hdr *dh = dccp_hdr(skb); | ||
852 | int checksum_len; | ||
853 | u32 tmp; | ||
854 | |||
855 | if (dh->dccph_cscov == 0) | ||
856 | checksum_len = skb->len; | ||
857 | else { | ||
858 | checksum_len = (dh->dccph_cscov + dh->dccph_x) * sizeof(u32); | ||
859 | checksum_len = checksum_len < skb->len ? checksum_len : | ||
860 | skb->len; | ||
861 | } | ||
862 | tmp = csum_partial((unsigned char *)dh, checksum_len, 0); | ||
863 | return csum_tcpudp_magic(saddr, daddr, checksum_len, | ||
864 | IPPROTO_DCCP, tmp) == 0 ? 0 : -1; | ||
865 | } | ||
866 | |||
867 | static struct dst_entry* dccp_v4_route_skb(struct sock *sk, | ||
868 | struct sk_buff *skb) | ||
869 | { | ||
870 | struct rtable *rt; | ||
871 | struct flowi fl = { .oif = ((struct rtable *)skb->dst)->rt_iif, | ||
872 | .nl_u = { .ip4_u = | ||
873 | { .daddr = skb->nh.iph->saddr, | ||
874 | .saddr = skb->nh.iph->daddr, | ||
875 | .tos = RT_CONN_FLAGS(sk) } }, | ||
876 | .proto = sk->sk_protocol, | ||
877 | .uli_u = { .ports = | ||
878 | { .sport = dccp_hdr(skb)->dccph_dport, | ||
879 | .dport = dccp_hdr(skb)->dccph_sport } | ||
880 | } | ||
881 | }; | ||
882 | |||
883 | if (ip_route_output_flow(&rt, &fl, sk, 0)) { | ||
884 | IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); | ||
885 | return NULL; | ||
886 | } | ||
887 | |||
888 | return &rt->u.dst; | ||
889 | } | ||
890 | |||
891 | static void dccp_v4_ctl_send_reset(struct sk_buff *rxskb) | ||
892 | { | ||
893 | int err; | ||
894 | struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh; | ||
895 | const int dccp_hdr_reset_len = sizeof(struct dccp_hdr) + | ||
896 | sizeof(struct dccp_hdr_ext) + | ||
897 | sizeof(struct dccp_hdr_reset); | ||
898 | struct sk_buff *skb; | ||
899 | struct dst_entry *dst; | ||
900 | u64 seqno; | ||
901 | |||
902 | /* Never send a reset in response to a reset. */ | ||
903 | if (rxdh->dccph_type == DCCP_PKT_RESET) | ||
904 | return; | ||
905 | |||
906 | if (((struct rtable *)rxskb->dst)->rt_type != RTN_LOCAL) | ||
907 | return; | ||
908 | |||
909 | dst = dccp_v4_route_skb(dccp_ctl_socket->sk, rxskb); | ||
910 | if (dst == NULL) | ||
911 | return; | ||
912 | |||
913 | skb = alloc_skb(MAX_DCCP_HEADER + 15, GFP_ATOMIC); | ||
914 | if (skb == NULL) | ||
915 | goto out; | ||
916 | |||
917 | /* Reserve space for headers. */ | ||
918 | skb_reserve(skb, MAX_DCCP_HEADER); | ||
919 | skb->dst = dst_clone(dst); | ||
920 | |||
921 | skb->h.raw = skb_push(skb, dccp_hdr_reset_len); | ||
922 | dh = dccp_hdr(skb); | ||
923 | memset(dh, 0, dccp_hdr_reset_len); | ||
924 | |||
925 | /* Build DCCP header and checksum it. */ | ||
926 | dh->dccph_type = DCCP_PKT_RESET; | ||
927 | dh->dccph_sport = rxdh->dccph_dport; | ||
928 | dh->dccph_dport = rxdh->dccph_sport; | ||
929 | dh->dccph_doff = dccp_hdr_reset_len / 4; | ||
930 | dh->dccph_x = 1; | ||
931 | dccp_hdr_reset(skb)->dccph_reset_code = | ||
932 | DCCP_SKB_CB(rxskb)->dccpd_reset_code; | ||
933 | |||
934 | /* See "8.3.1. Abnormal Termination" in draft-ietf-dccp-spec-11 */ | ||
935 | seqno = 0; | ||
936 | if (DCCP_SKB_CB(rxskb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) | ||
937 | dccp_set_seqno(&seqno, DCCP_SKB_CB(rxskb)->dccpd_ack_seq + 1); | ||
938 | |||
939 | dccp_hdr_set_seq(dh, seqno); | ||
940 | dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), | ||
941 | DCCP_SKB_CB(rxskb)->dccpd_seq); | ||
942 | |||
943 | dh->dccph_checksum = dccp_v4_checksum(skb, rxskb->nh.iph->saddr, | ||
944 | rxskb->nh.iph->daddr); | ||
945 | |||
946 | bh_lock_sock(dccp_ctl_socket->sk); | ||
947 | err = ip_build_and_send_pkt(skb, dccp_ctl_socket->sk, | ||
948 | rxskb->nh.iph->daddr, | ||
949 | rxskb->nh.iph->saddr, NULL); | ||
950 | bh_unlock_sock(dccp_ctl_socket->sk); | ||
951 | |||
952 | if (err == NET_XMIT_CN || err == 0) { | ||
953 | DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); | ||
954 | DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); | ||
955 | } | ||
956 | out: | ||
957 | dst_release(dst); | ||
958 | } | ||
959 | |||
960 | int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) | ||
961 | { | ||
962 | struct dccp_hdr *dh = dccp_hdr(skb); | ||
963 | |||
964 | if (sk->sk_state == DCCP_OPEN) { /* Fast path */ | ||
965 | if (dccp_rcv_established(sk, skb, dh, skb->len)) | ||
966 | goto reset; | ||
967 | return 0; | ||
968 | } | ||
969 | |||
970 | /* | ||
971 | * Step 3: Process LISTEN state | ||
972 | * If S.state == LISTEN, | ||
973 | * If P.type == Request or P contains a valid Init Cookie | ||
974 | * option, | ||
975 | * * Must scan the packet's options to check for an Init | ||
976 | * Cookie. Only the Init Cookie is processed here, | ||
977 | * however; other options are processed in Step 8. This | ||
978 | * scan need only be performed if the endpoint uses Init | ||
979 | * Cookies * | ||
980 | * * Generate a new socket and switch to that socket * | ||
981 | * Set S := new socket for this port pair | ||
982 | * S.state = RESPOND | ||
983 | * Choose S.ISS (initial seqno) or set from Init Cookie | ||
984 | * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie | ||
985 | * Continue with S.state == RESPOND | ||
986 | * * A Response packet will be generated in Step 11 * | ||
987 | * Otherwise, | ||
988 | * Generate Reset(No Connection) unless P.type == Reset | ||
989 | * Drop packet and return | ||
990 | * | ||
991 | * NOTE: the check for the packet types is done in | ||
992 | * dccp_rcv_state_process | ||
993 | */ | ||
994 | if (sk->sk_state == DCCP_LISTEN) { | ||
995 | struct sock *nsk = dccp_v4_hnd_req(sk, skb); | ||
996 | |||
997 | if (nsk == NULL) | ||
998 | goto discard; | ||
999 | |||
1000 | if (nsk != sk) { | ||
1001 | if (dccp_child_process(sk, nsk, skb)) | ||
1002 | goto reset; | ||
1003 | return 0; | ||
1004 | } | ||
1005 | } | ||
1006 | |||
1007 | if (dccp_rcv_state_process(sk, skb, dh, skb->len)) | ||
1008 | goto reset; | ||
1009 | return 0; | ||
1010 | |||
1011 | reset: | ||
1012 | DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; | ||
1013 | dccp_v4_ctl_send_reset(skb); | ||
1014 | discard: | ||
1015 | kfree_skb(skb); | ||
1016 | return 0; | ||
1017 | } | ||
1018 | |||
1019 | static inline int dccp_invalid_packet(struct sk_buff *skb) | ||
1020 | { | ||
1021 | const struct dccp_hdr *dh; | ||
1022 | |||
1023 | if (skb->pkt_type != PACKET_HOST) | ||
1024 | return 1; | ||
1025 | |||
1026 | if (!pskb_may_pull(skb, sizeof(struct dccp_hdr))) { | ||
1027 | LIMIT_NETDEBUG(KERN_WARNING "DCCP: pskb_may_pull failed\n"); | ||
1028 | return 1; | ||
1029 | } | ||
1030 | |||
1031 | dh = dccp_hdr(skb); | ||
1032 | |||
1033 | /* If the packet type is not understood, drop packet and return */ | ||
1034 | if (dh->dccph_type >= DCCP_PKT_INVALID) { | ||
1035 | LIMIT_NETDEBUG(KERN_WARNING "DCCP: invalid packet type\n"); | ||
1036 | return 1; | ||
1037 | } | ||
1038 | |||
1039 | /* | ||
1040 | * If P.Data Offset is too small for packet type, or too large for | ||
1041 | * packet, drop packet and return | ||
1042 | */ | ||
1043 | if (dh->dccph_doff < dccp_hdr_len(skb) / sizeof(u32)) { | ||
1044 | LIMIT_NETDEBUG(KERN_WARNING "DCCP: P.Data Offset(%u) " | ||
1045 | "too small 1\n", | ||
1046 | dh->dccph_doff); | ||
1047 | return 1; | ||
1048 | } | ||
1049 | |||
1050 | if (!pskb_may_pull(skb, dh->dccph_doff * sizeof(u32))) { | ||
1051 | LIMIT_NETDEBUG(KERN_WARNING "DCCP: P.Data Offset(%u) " | ||
1052 | "too small 2\n", | ||
1053 | dh->dccph_doff); | ||
1054 | return 1; | ||
1055 | } | ||
1056 | |||
1057 | dh = dccp_hdr(skb); | ||
1058 | |||
1059 | /* | ||
1060 | * If P.type is not Data, Ack, or DataAck and P.X == 0 (the packet | ||
1061 | * has short sequence numbers), drop packet and return | ||
1062 | */ | ||
1063 | if (dh->dccph_x == 0 && | ||
1064 | dh->dccph_type != DCCP_PKT_DATA && | ||
1065 | dh->dccph_type != DCCP_PKT_ACK && | ||
1066 | dh->dccph_type != DCCP_PKT_DATAACK) { | ||
1067 | LIMIT_NETDEBUG(KERN_WARNING "DCCP: P.type (%s) not Data, Ack " | ||
1068 | "nor DataAck and P.X == 0\n", | ||
1069 | dccp_packet_name(dh->dccph_type)); | ||
1070 | return 1; | ||
1071 | } | ||
1072 | |||
1073 | /* If the header checksum is incorrect, drop packet and return */ | ||
1074 | if (dccp_v4_verify_checksum(skb, skb->nh.iph->saddr, | ||
1075 | skb->nh.iph->daddr) < 0) { | ||
1076 | LIMIT_NETDEBUG(KERN_WARNING "DCCP: header checksum is " | ||
1077 | "incorrect\n"); | ||
1078 | return 1; | ||
1079 | } | ||
1080 | |||
1081 | return 0; | ||
1082 | } | ||
1083 | |||
1084 | /* this is called when real data arrives */ | ||
1085 | int dccp_v4_rcv(struct sk_buff *skb) | ||
1086 | { | ||
1087 | const struct dccp_hdr *dh; | ||
1088 | struct sock *sk; | ||
1089 | int rc; | ||
1090 | |||
1091 | /* Step 1: Check header basics: */ | ||
1092 | |||
1093 | if (dccp_invalid_packet(skb)) | ||
1094 | goto discard_it; | ||
1095 | |||
1096 | dh = dccp_hdr(skb); | ||
1097 | #if 0 | ||
1098 | /* | ||
1099 | * Use something like this to simulate some DATA/DATAACK loss to test | ||
1100 | * dccp_ackpkts_add, you'll get something like this on a session that | ||
1101 | * sends 10 DATA/DATAACK packets: | ||
1102 | * | ||
1103 | * ackpkts_print: 281473596467422 |0,0|3,0|0,0|3,0|0,0|3,0|0,0|3,0|0,1| | ||
1104 | * | ||
1105 | * 0, 0 means: DCCP_ACKPKTS_STATE_RECEIVED, RLE == just this packet | ||
1106 | * 0, 1 means: DCCP_ACKPKTS_STATE_RECEIVED, RLE == two adjacent packets | ||
1107 | * with the same state | ||
1108 | * 3, 0 means: DCCP_ACKPKTS_STATE_NOT_RECEIVED, RLE == just this packet | ||
1109 | * | ||
1110 | * So... | ||
1111 | * | ||
1112 | * 281473596467422 was received | ||
1113 | * 281473596467421 was not received | ||
1114 | * 281473596467420 was received | ||
1115 | * 281473596467419 was not received | ||
1116 | * 281473596467418 was received | ||
1117 | * 281473596467417 was not received | ||
1118 | * 281473596467416 was received | ||
1119 | * 281473596467415 was not received | ||
1120 | * 281473596467414 was received | ||
1121 | * 281473596467413 was received (this one was the 3way handshake | ||
1122 | * RESPONSE) | ||
1123 | * | ||
1124 | */ | ||
1125 | if (dh->dccph_type == DCCP_PKT_DATA || | ||
1126 | dh->dccph_type == DCCP_PKT_DATAACK) { | ||
1127 | static int discard = 0; | ||
1128 | |||
1129 | if (discard) { | ||
1130 | discard = 0; | ||
1131 | goto discard_it; | ||
1132 | } | ||
1133 | discard = 1; | ||
1134 | } | ||
1135 | #endif | ||
1136 | DCCP_SKB_CB(skb)->dccpd_seq = dccp_hdr_seq(skb); | ||
1137 | DCCP_SKB_CB(skb)->dccpd_type = dh->dccph_type; | ||
1138 | |||
1139 | dccp_pr_debug("%8.8s " | ||
1140 | "src=%u.%u.%u.%u@%-5d " | ||
1141 | "dst=%u.%u.%u.%u@%-5d seq=%llu", | ||
1142 | dccp_packet_name(dh->dccph_type), | ||
1143 | NIPQUAD(skb->nh.iph->saddr), ntohs(dh->dccph_sport), | ||
1144 | NIPQUAD(skb->nh.iph->daddr), ntohs(dh->dccph_dport), | ||
1145 | (unsigned long long) DCCP_SKB_CB(skb)->dccpd_seq); | ||
1146 | |||
1147 | if (dccp_packet_without_ack(skb)) { | ||
1148 | DCCP_SKB_CB(skb)->dccpd_ack_seq = DCCP_PKT_WITHOUT_ACK_SEQ; | ||
1149 | dccp_pr_debug_cat("\n"); | ||
1150 | } else { | ||
1151 | DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb); | ||
1152 | dccp_pr_debug_cat(", ack=%llu\n", | ||
1153 | (unsigned long long) | ||
1154 | DCCP_SKB_CB(skb)->dccpd_ack_seq); | ||
1155 | } | ||
1156 | |||
1157 | /* Step 2: | ||
1158 | * Look up flow ID in table and get corresponding socket */ | ||
1159 | sk = __inet_lookup(&dccp_hashinfo, | ||
1160 | skb->nh.iph->saddr, dh->dccph_sport, | ||
1161 | skb->nh.iph->daddr, ntohs(dh->dccph_dport), | ||
1162 | inet_iif(skb)); | ||
1163 | |||
1164 | /* | ||
1165 | * Step 2: | ||
1166 | * If no socket ... | ||
1167 | * Generate Reset(No Connection) unless P.type == Reset | ||
1168 | * Drop packet and return | ||
1169 | */ | ||
1170 | if (sk == NULL) { | ||
1171 | dccp_pr_debug("failed to look up flow ID in table and " | ||
1172 | "get corresponding socket\n"); | ||
1173 | goto no_dccp_socket; | ||
1174 | } | ||
1175 | |||
1176 | /* | ||
1177 | * Step 2: | ||
1178 | * ... or S.state == TIMEWAIT, | ||
1179 | * Generate Reset(No Connection) unless P.type == Reset | ||
1180 | * Drop packet and return | ||
1181 | */ | ||
1182 | |||
1183 | if (sk->sk_state == DCCP_TIME_WAIT) { | ||
1184 | dccp_pr_debug("sk->sk_state == DCCP_TIME_WAIT: " | ||
1185 | "do_time_wait\n"); | ||
1186 | goto do_time_wait; | ||
1187 | } | ||
1188 | |||
1189 | if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) { | ||
1190 | dccp_pr_debug("xfrm4_policy_check failed\n"); | ||
1191 | goto discard_and_relse; | ||
1192 | } | ||
1193 | |||
1194 | if (sk_filter(sk, skb, 0)) { | ||
1195 | dccp_pr_debug("sk_filter failed\n"); | ||
1196 | goto discard_and_relse; | ||
1197 | } | ||
1198 | |||
1199 | skb->dev = NULL; | ||
1200 | |||
1201 | bh_lock_sock(sk); | ||
1202 | rc = 0; | ||
1203 | if (!sock_owned_by_user(sk)) | ||
1204 | rc = dccp_v4_do_rcv(sk, skb); | ||
1205 | else | ||
1206 | sk_add_backlog(sk, skb); | ||
1207 | bh_unlock_sock(sk); | ||
1208 | |||
1209 | sock_put(sk); | ||
1210 | return rc; | ||
1211 | |||
1212 | no_dccp_socket: | ||
1213 | if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) | ||
1214 | goto discard_it; | ||
1215 | /* | ||
1216 | * Step 2: | ||
1217 | * Generate Reset(No Connection) unless P.type == Reset | ||
1218 | * Drop packet and return | ||
1219 | */ | ||
1220 | if (dh->dccph_type != DCCP_PKT_RESET) { | ||
1221 | DCCP_SKB_CB(skb)->dccpd_reset_code = | ||
1222 | DCCP_RESET_CODE_NO_CONNECTION; | ||
1223 | dccp_v4_ctl_send_reset(skb); | ||
1224 | } | ||
1225 | |||
1226 | discard_it: | ||
1227 | /* Discard frame. */ | ||
1228 | kfree_skb(skb); | ||
1229 | return 0; | ||
1230 | |||
1231 | discard_and_relse: | ||
1232 | sock_put(sk); | ||
1233 | goto discard_it; | ||
1234 | |||
1235 | do_time_wait: | ||
1236 | inet_twsk_put((struct inet_timewait_sock *)sk); | ||
1237 | goto no_dccp_socket; | ||
1238 | } | ||
1239 | |||
1240 | static int dccp_v4_init_sock(struct sock *sk) | ||
1241 | { | ||
1242 | struct dccp_sock *dp = dccp_sk(sk); | ||
1243 | static int dccp_ctl_socket_init = 1; | ||
1244 | |||
1245 | dccp_options_init(&dp->dccps_options); | ||
1246 | |||
1247 | if (dp->dccps_options.dccpo_send_ack_vector) { | ||
1248 | dp->dccps_hc_rx_ackpkts = | ||
1249 | dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN, | ||
1250 | GFP_KERNEL); | ||
1251 | |||
1252 | if (dp->dccps_hc_rx_ackpkts == NULL) | ||
1253 | return -ENOMEM; | ||
1254 | } | ||
1255 | |||
1256 | /* | ||
1257 | * FIXME: We're hardcoding the CCID, and doing this at this point makes | ||
1258 | * the listening (master) sock get CCID control blocks, which is not | ||
1259 | * necessary, but for now, to not mess with the test userspace apps, | ||
1260 | * lets leave it here, later the real solution is to do this in a | ||
1261 | * setsockopt(CCIDs-I-want/accept). -acme | ||
1262 | */ | ||
1263 | if (likely(!dccp_ctl_socket_init)) { | ||
1264 | dp->dccps_hc_rx_ccid = ccid_init(dp->dccps_options.dccpo_ccid, | ||
1265 | sk); | ||
1266 | dp->dccps_hc_tx_ccid = ccid_init(dp->dccps_options.dccpo_ccid, | ||
1267 | sk); | ||
1268 | if (dp->dccps_hc_rx_ccid == NULL || | ||
1269 | dp->dccps_hc_tx_ccid == NULL) { | ||
1270 | ccid_exit(dp->dccps_hc_rx_ccid, sk); | ||
1271 | ccid_exit(dp->dccps_hc_tx_ccid, sk); | ||
1272 | dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts); | ||
1273 | dp->dccps_hc_rx_ackpkts = NULL; | ||
1274 | dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; | ||
1275 | return -ENOMEM; | ||
1276 | } | ||
1277 | } else | ||
1278 | dccp_ctl_socket_init = 0; | ||
1279 | |||
1280 | dccp_init_xmit_timers(sk); | ||
1281 | inet_csk(sk)->icsk_rto = DCCP_TIMEOUT_INIT; | ||
1282 | sk->sk_state = DCCP_CLOSED; | ||
1283 | sk->sk_write_space = dccp_write_space; | ||
1284 | dp->dccps_mss_cache = 536; | ||
1285 | dp->dccps_role = DCCP_ROLE_UNDEFINED; | ||
1286 | |||
1287 | return 0; | ||
1288 | } | ||
1289 | |||
1290 | static int dccp_v4_destroy_sock(struct sock *sk) | ||
1291 | { | ||
1292 | struct dccp_sock *dp = dccp_sk(sk); | ||
1293 | |||
1294 | /* | ||
1295 | * DCCP doesn't use sk_qrite_queue, just sk_send_head | ||
1296 | * for retransmissions | ||
1297 | */ | ||
1298 | if (sk->sk_send_head != NULL) { | ||
1299 | kfree_skb(sk->sk_send_head); | ||
1300 | sk->sk_send_head = NULL; | ||
1301 | } | ||
1302 | |||
1303 | /* Clean up a referenced DCCP bind bucket. */ | ||
1304 | if (inet_csk(sk)->icsk_bind_hash != NULL) | ||
1305 | inet_put_port(&dccp_hashinfo, sk); | ||
1306 | |||
1307 | ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk); | ||
1308 | ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk); | ||
1309 | dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts); | ||
1310 | dp->dccps_hc_rx_ackpkts = NULL; | ||
1311 | ccid_exit(dp->dccps_hc_rx_ccid, sk); | ||
1312 | ccid_exit(dp->dccps_hc_tx_ccid, sk); | ||
1313 | dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; | ||
1314 | |||
1315 | return 0; | ||
1316 | } | ||
1317 | |||
1318 | static void dccp_v4_reqsk_destructor(struct request_sock *req) | ||
1319 | { | ||
1320 | kfree(inet_rsk(req)->opt); | ||
1321 | } | ||
1322 | |||
1323 | static struct request_sock_ops dccp_request_sock_ops = { | ||
1324 | .family = PF_INET, | ||
1325 | .obj_size = sizeof(struct dccp_request_sock), | ||
1326 | .rtx_syn_ack = dccp_v4_send_response, | ||
1327 | .send_ack = dccp_v4_reqsk_send_ack, | ||
1328 | .destructor = dccp_v4_reqsk_destructor, | ||
1329 | .send_reset = dccp_v4_ctl_send_reset, | ||
1330 | }; | ||
1331 | |||
1332 | struct proto dccp_v4_prot = { | ||
1333 | .name = "DCCP", | ||
1334 | .owner = THIS_MODULE, | ||
1335 | .close = dccp_close, | ||
1336 | .connect = dccp_v4_connect, | ||
1337 | .disconnect = dccp_disconnect, | ||
1338 | .ioctl = dccp_ioctl, | ||
1339 | .init = dccp_v4_init_sock, | ||
1340 | .setsockopt = dccp_setsockopt, | ||
1341 | .getsockopt = dccp_getsockopt, | ||
1342 | .sendmsg = dccp_sendmsg, | ||
1343 | .recvmsg = dccp_recvmsg, | ||
1344 | .backlog_rcv = dccp_v4_do_rcv, | ||
1345 | .hash = dccp_v4_hash, | ||
1346 | .unhash = dccp_v4_unhash, | ||
1347 | .accept = inet_csk_accept, | ||
1348 | .get_port = dccp_v4_get_port, | ||
1349 | .shutdown = dccp_shutdown, | ||
1350 | .destroy = dccp_v4_destroy_sock, | ||
1351 | .orphan_count = &dccp_orphan_count, | ||
1352 | .max_header = MAX_DCCP_HEADER, | ||
1353 | .obj_size = sizeof(struct dccp_sock), | ||
1354 | .rsk_prot = &dccp_request_sock_ops, | ||
1355 | .twsk_obj_size = sizeof(struct inet_timewait_sock), | ||
1356 | }; | ||
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c new file mode 100644 index 000000000000..ce5dff4ac22e --- /dev/null +++ b/net/dccp/minisocks.c | |||
@@ -0,0 +1,264 @@ | |||
1 | /* | ||
2 | * net/dccp/minisocks.c | ||
3 | * | ||
4 | * An implementation of the DCCP protocol | ||
5 | * Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License | ||
9 | * as published by the Free Software Foundation; either version | ||
10 | * 2 of the License, or (at your option) any later version. | ||
11 | */ | ||
12 | |||
13 | #include <linux/config.h> | ||
14 | #include <linux/dccp.h> | ||
15 | #include <linux/skbuff.h> | ||
16 | #include <linux/timer.h> | ||
17 | |||
18 | #include <net/sock.h> | ||
19 | #include <net/xfrm.h> | ||
20 | #include <net/inet_timewait_sock.h> | ||
21 | |||
22 | #include "ccid.h" | ||
23 | #include "dccp.h" | ||
24 | |||
25 | struct inet_timewait_death_row dccp_death_row = { | ||
26 | .sysctl_max_tw_buckets = NR_FILE * 2, | ||
27 | .period = DCCP_TIMEWAIT_LEN / INET_TWDR_TWKILL_SLOTS, | ||
28 | .death_lock = SPIN_LOCK_UNLOCKED, | ||
29 | .hashinfo = &dccp_hashinfo, | ||
30 | .tw_timer = TIMER_INITIALIZER(inet_twdr_hangman, 0, | ||
31 | (unsigned long)&dccp_death_row), | ||
32 | .twkill_work = __WORK_INITIALIZER(dccp_death_row.twkill_work, | ||
33 | inet_twdr_twkill_work, | ||
34 | &dccp_death_row), | ||
35 | /* Short-time timewait calendar */ | ||
36 | |||
37 | .twcal_hand = -1, | ||
38 | .twcal_timer = TIMER_INITIALIZER(inet_twdr_twcal_tick, 0, | ||
39 | (unsigned long)&dccp_death_row), | ||
40 | }; | ||
41 | |||
42 | void dccp_time_wait(struct sock *sk, int state, int timeo) | ||
43 | { | ||
44 | struct inet_timewait_sock *tw = NULL; | ||
45 | |||
46 | if (dccp_death_row.tw_count < dccp_death_row.sysctl_max_tw_buckets) | ||
47 | tw = inet_twsk_alloc(sk, state); | ||
48 | |||
49 | if (tw != NULL) { | ||
50 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
51 | const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1); | ||
52 | |||
53 | /* Linkage updates. */ | ||
54 | __inet_twsk_hashdance(tw, sk, &dccp_hashinfo); | ||
55 | |||
56 | /* Get the TIME_WAIT timeout firing. */ | ||
57 | if (timeo < rto) | ||
58 | timeo = rto; | ||
59 | |||
60 | tw->tw_timeout = DCCP_TIMEWAIT_LEN; | ||
61 | if (state == DCCP_TIME_WAIT) | ||
62 | timeo = DCCP_TIMEWAIT_LEN; | ||
63 | |||
64 | inet_twsk_schedule(tw, &dccp_death_row, timeo, | ||
65 | DCCP_TIMEWAIT_LEN); | ||
66 | inet_twsk_put(tw); | ||
67 | } else { | ||
68 | /* Sorry, if we're out of memory, just CLOSE this | ||
69 | * socket up. We've got bigger problems than | ||
70 | * non-graceful socket closings. | ||
71 | */ | ||
72 | LIMIT_NETDEBUG(KERN_INFO "DCCP: time wait bucket " | ||
73 | "table overflow\n"); | ||
74 | } | ||
75 | |||
76 | dccp_done(sk); | ||
77 | } | ||
78 | |||
79 | struct sock *dccp_create_openreq_child(struct sock *sk, | ||
80 | const struct request_sock *req, | ||
81 | const struct sk_buff *skb) | ||
82 | { | ||
83 | /* | ||
84 | * Step 3: Process LISTEN state | ||
85 | * | ||
86 | * // Generate a new socket and switch to that socket | ||
87 | * Set S := new socket for this port pair | ||
88 | */ | ||
89 | struct sock *newsk = inet_csk_clone(sk, req, GFP_ATOMIC); | ||
90 | |||
91 | if (newsk != NULL) { | ||
92 | const struct dccp_request_sock *dreq = dccp_rsk(req); | ||
93 | struct inet_connection_sock *newicsk = inet_csk(sk); | ||
94 | struct dccp_sock *newdp = dccp_sk(newsk); | ||
95 | |||
96 | newdp->dccps_hc_rx_ackpkts = NULL; | ||
97 | newdp->dccps_role = DCCP_ROLE_SERVER; | ||
98 | newicsk->icsk_rto = DCCP_TIMEOUT_INIT; | ||
99 | |||
100 | if (newdp->dccps_options.dccpo_send_ack_vector) { | ||
101 | newdp->dccps_hc_rx_ackpkts = | ||
102 | dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN, | ||
103 | GFP_ATOMIC); | ||
104 | /* | ||
105 | * XXX: We're using the same CCIDs set on the parent, | ||
106 | * i.e. sk_clone copied the master sock and left the | ||
107 | * CCID pointers for this child, that is why we do the | ||
108 | * __ccid_get calls. | ||
109 | */ | ||
110 | if (unlikely(newdp->dccps_hc_rx_ackpkts == NULL)) | ||
111 | goto out_free; | ||
112 | } | ||
113 | |||
114 | if (unlikely(ccid_hc_rx_init(newdp->dccps_hc_rx_ccid, | ||
115 | newsk) != 0 || | ||
116 | ccid_hc_tx_init(newdp->dccps_hc_tx_ccid, | ||
117 | newsk) != 0)) { | ||
118 | dccp_ackpkts_free(newdp->dccps_hc_rx_ackpkts); | ||
119 | ccid_hc_rx_exit(newdp->dccps_hc_rx_ccid, newsk); | ||
120 | ccid_hc_tx_exit(newdp->dccps_hc_tx_ccid, newsk); | ||
121 | out_free: | ||
122 | /* It is still raw copy of parent, so invalidate | ||
123 | * destructor and make plain sk_free() */ | ||
124 | newsk->sk_destruct = NULL; | ||
125 | sk_free(newsk); | ||
126 | return NULL; | ||
127 | } | ||
128 | |||
129 | __ccid_get(newdp->dccps_hc_rx_ccid); | ||
130 | __ccid_get(newdp->dccps_hc_tx_ccid); | ||
131 | |||
132 | /* | ||
133 | * Step 3: Process LISTEN state | ||
134 | * | ||
135 | * Choose S.ISS (initial seqno) or set from Init Cookie | ||
136 | * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init | ||
137 | * Cookie | ||
138 | */ | ||
139 | |||
140 | /* See dccp_v4_conn_request */ | ||
141 | newdp->dccps_options.dccpo_sequence_window = req->rcv_wnd; | ||
142 | |||
143 | newdp->dccps_gar = newdp->dccps_isr = dreq->dreq_isr; | ||
144 | dccp_update_gsr(newsk, dreq->dreq_isr); | ||
145 | |||
146 | newdp->dccps_iss = dreq->dreq_iss; | ||
147 | dccp_update_gss(newsk, dreq->dreq_iss); | ||
148 | |||
149 | /* | ||
150 | * SWL and AWL are initially adjusted so that they are not less than | ||
151 | * the initial Sequence Numbers received and sent, respectively: | ||
152 | * SWL := max(GSR + 1 - floor(W/4), ISR), | ||
153 | * AWL := max(GSS - W' + 1, ISS). | ||
154 | * These adjustments MUST be applied only at the beginning of the | ||
155 | * connection. | ||
156 | */ | ||
157 | dccp_set_seqno(&newdp->dccps_swl, | ||
158 | max48(newdp->dccps_swl, newdp->dccps_isr)); | ||
159 | dccp_set_seqno(&newdp->dccps_awl, | ||
160 | max48(newdp->dccps_awl, newdp->dccps_iss)); | ||
161 | |||
162 | dccp_init_xmit_timers(newsk); | ||
163 | |||
164 | DCCP_INC_STATS_BH(DCCP_MIB_PASSIVEOPENS); | ||
165 | } | ||
166 | return newsk; | ||
167 | } | ||
168 | |||
169 | /* | ||
170 | * Process an incoming packet for RESPOND sockets represented | ||
171 | * as an request_sock. | ||
172 | */ | ||
173 | struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, | ||
174 | struct request_sock *req, | ||
175 | struct request_sock **prev) | ||
176 | { | ||
177 | struct sock *child = NULL; | ||
178 | |||
179 | /* Check for retransmitted REQUEST */ | ||
180 | if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) { | ||
181 | if (after48(DCCP_SKB_CB(skb)->dccpd_seq, | ||
182 | dccp_rsk(req)->dreq_isr)) { | ||
183 | struct dccp_request_sock *dreq = dccp_rsk(req); | ||
184 | |||
185 | dccp_pr_debug("Retransmitted REQUEST\n"); | ||
186 | /* Send another RESPONSE packet */ | ||
187 | dccp_set_seqno(&dreq->dreq_iss, dreq->dreq_iss + 1); | ||
188 | dccp_set_seqno(&dreq->dreq_isr, | ||
189 | DCCP_SKB_CB(skb)->dccpd_seq); | ||
190 | req->rsk_ops->rtx_syn_ack(sk, req, NULL); | ||
191 | } | ||
192 | /* Network Duplicate, discard packet */ | ||
193 | return NULL; | ||
194 | } | ||
195 | |||
196 | DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR; | ||
197 | |||
198 | if (dccp_hdr(skb)->dccph_type != DCCP_PKT_ACK && | ||
199 | dccp_hdr(skb)->dccph_type != DCCP_PKT_DATAACK) | ||
200 | goto drop; | ||
201 | |||
202 | /* Invalid ACK */ | ||
203 | if (DCCP_SKB_CB(skb)->dccpd_ack_seq != dccp_rsk(req)->dreq_iss) { | ||
204 | dccp_pr_debug("Invalid ACK number: ack_seq=%llu, " | ||
205 | "dreq_iss=%llu\n", | ||
206 | (unsigned long long) | ||
207 | DCCP_SKB_CB(skb)->dccpd_ack_seq, | ||
208 | (unsigned long long) | ||
209 | dccp_rsk(req)->dreq_iss); | ||
210 | goto drop; | ||
211 | } | ||
212 | |||
213 | child = dccp_v4_request_recv_sock(sk, skb, req, NULL); | ||
214 | if (child == NULL) | ||
215 | goto listen_overflow; | ||
216 | |||
217 | /* FIXME: deal with options */ | ||
218 | |||
219 | inet_csk_reqsk_queue_unlink(sk, req, prev); | ||
220 | inet_csk_reqsk_queue_removed(sk, req); | ||
221 | inet_csk_reqsk_queue_add(sk, req, child); | ||
222 | out: | ||
223 | return child; | ||
224 | listen_overflow: | ||
225 | dccp_pr_debug("listen_overflow!\n"); | ||
226 | DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY; | ||
227 | drop: | ||
228 | if (dccp_hdr(skb)->dccph_type != DCCP_PKT_RESET) | ||
229 | req->rsk_ops->send_reset(skb); | ||
230 | |||
231 | inet_csk_reqsk_queue_drop(sk, req, prev); | ||
232 | goto out; | ||
233 | } | ||
234 | |||
235 | /* | ||
236 | * Queue segment on the new socket if the new socket is active, | ||
237 | * otherwise we just shortcircuit this and continue with | ||
238 | * the new socket. | ||
239 | */ | ||
240 | int dccp_child_process(struct sock *parent, struct sock *child, | ||
241 | struct sk_buff *skb) | ||
242 | { | ||
243 | int ret = 0; | ||
244 | const int state = child->sk_state; | ||
245 | |||
246 | if (!sock_owned_by_user(child)) { | ||
247 | ret = dccp_rcv_state_process(child, skb, dccp_hdr(skb), | ||
248 | skb->len); | ||
249 | |||
250 | /* Wakeup parent, send SIGIO */ | ||
251 | if (state == DCCP_RESPOND && child->sk_state != state) | ||
252 | parent->sk_data_ready(parent, 0); | ||
253 | } else { | ||
254 | /* Alas, it is possible again, because we do lookup | ||
255 | * in main socket hash table and lock on listening | ||
256 | * socket does not protect us more. | ||
257 | */ | ||
258 | sk_add_backlog(child, skb); | ||
259 | } | ||
260 | |||
261 | bh_unlock_sock(child); | ||
262 | sock_put(child); | ||
263 | return ret; | ||
264 | } | ||
diff --git a/net/dccp/options.c b/net/dccp/options.c new file mode 100644 index 000000000000..382c5894acb2 --- /dev/null +++ b/net/dccp/options.c | |||
@@ -0,0 +1,855 @@ | |||
1 | /* | ||
2 | * net/dccp/options.c | ||
3 | * | ||
4 | * An implementation of the DCCP protocol | ||
5 | * Copyright (c) 2005 Aristeu Sergio Rozanski Filho <aris@cathedrallabs.org> | ||
6 | * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@ghostprotocols.net> | ||
7 | * Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz> | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or | ||
10 | * modify it under the terms of the GNU General Public License | ||
11 | * as published by the Free Software Foundation; either version | ||
12 | * 2 of the License, or (at your option) any later version. | ||
13 | */ | ||
14 | #include <linux/config.h> | ||
15 | #include <linux/dccp.h> | ||
16 | #include <linux/module.h> | ||
17 | #include <linux/types.h> | ||
18 | #include <linux/kernel.h> | ||
19 | #include <linux/skbuff.h> | ||
20 | |||
21 | #include "ccid.h" | ||
22 | #include "dccp.h" | ||
23 | |||
24 | static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap, | ||
25 | struct sock *sk, | ||
26 | const u64 ackno, | ||
27 | const unsigned char len, | ||
28 | const unsigned char *vector); | ||
29 | |||
30 | /* stores the default values for new connection. may be changed with sysctl */ | ||
31 | static const struct dccp_options dccpo_default_values = { | ||
32 | .dccpo_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW, | ||
33 | .dccpo_ccid = DCCPF_INITIAL_CCID, | ||
34 | .dccpo_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR, | ||
35 | .dccpo_send_ndp_count = DCCPF_INITIAL_SEND_NDP_COUNT, | ||
36 | }; | ||
37 | |||
38 | void dccp_options_init(struct dccp_options *dccpo) | ||
39 | { | ||
40 | memcpy(dccpo, &dccpo_default_values, sizeof(*dccpo)); | ||
41 | } | ||
42 | |||
43 | static u32 dccp_decode_value_var(const unsigned char *bf, const u8 len) | ||
44 | { | ||
45 | u32 value = 0; | ||
46 | |||
47 | if (len > 3) | ||
48 | value += *bf++ << 24; | ||
49 | if (len > 2) | ||
50 | value += *bf++ << 16; | ||
51 | if (len > 1) | ||
52 | value += *bf++ << 8; | ||
53 | if (len > 0) | ||
54 | value += *bf; | ||
55 | |||
56 | return value; | ||
57 | } | ||
58 | |||
59 | int dccp_parse_options(struct sock *sk, struct sk_buff *skb) | ||
60 | { | ||
61 | struct dccp_sock *dp = dccp_sk(sk); | ||
62 | #ifdef CONFIG_IP_DCCP_DEBUG | ||
63 | const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? | ||
64 | "CLIENT rx opt: " : "server rx opt: "; | ||
65 | #endif | ||
66 | const struct dccp_hdr *dh = dccp_hdr(skb); | ||
67 | const u8 pkt_type = DCCP_SKB_CB(skb)->dccpd_type; | ||
68 | unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb); | ||
69 | unsigned char *opt_ptr = options; | ||
70 | const unsigned char *opt_end = (unsigned char *)dh + | ||
71 | (dh->dccph_doff * 4); | ||
72 | struct dccp_options_received *opt_recv = &dp->dccps_options_received; | ||
73 | unsigned char opt, len; | ||
74 | unsigned char *value; | ||
75 | |||
76 | memset(opt_recv, 0, sizeof(*opt_recv)); | ||
77 | |||
78 | while (opt_ptr != opt_end) { | ||
79 | opt = *opt_ptr++; | ||
80 | len = 0; | ||
81 | value = NULL; | ||
82 | |||
83 | /* Check if this isn't a single byte option */ | ||
84 | if (opt > DCCPO_MAX_RESERVED) { | ||
85 | if (opt_ptr == opt_end) | ||
86 | goto out_invalid_option; | ||
87 | |||
88 | len = *opt_ptr++; | ||
89 | if (len < 3) | ||
90 | goto out_invalid_option; | ||
91 | /* | ||
92 | * Remove the type and len fields, leaving | ||
93 | * just the value size | ||
94 | */ | ||
95 | len -= 2; | ||
96 | value = opt_ptr; | ||
97 | opt_ptr += len; | ||
98 | |||
99 | if (opt_ptr > opt_end) | ||
100 | goto out_invalid_option; | ||
101 | } | ||
102 | |||
103 | switch (opt) { | ||
104 | case DCCPO_PADDING: | ||
105 | break; | ||
106 | case DCCPO_NDP_COUNT: | ||
107 | if (len > 3) | ||
108 | goto out_invalid_option; | ||
109 | |||
110 | opt_recv->dccpor_ndp = dccp_decode_value_var(value, len); | ||
111 | dccp_pr_debug("%sNDP count=%d\n", debug_prefix, | ||
112 | opt_recv->dccpor_ndp); | ||
113 | break; | ||
114 | case DCCPO_ACK_VECTOR_0: | ||
115 | if (len > DCCP_MAX_ACK_VECTOR_LEN) | ||
116 | goto out_invalid_option; | ||
117 | |||
118 | if (pkt_type == DCCP_PKT_DATA) | ||
119 | continue; | ||
120 | |||
121 | opt_recv->dccpor_ack_vector_len = len; | ||
122 | opt_recv->dccpor_ack_vector_idx = value - options; | ||
123 | |||
124 | dccp_pr_debug("%sACK vector 0, len=%d, ack_ackno=%llu\n", | ||
125 | debug_prefix, len, | ||
126 | (unsigned long long) | ||
127 | DCCP_SKB_CB(skb)->dccpd_ack_seq); | ||
128 | dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq, | ||
129 | value, len); | ||
130 | dccp_ackpkts_check_rcv_ackvector(dp->dccps_hc_rx_ackpkts, | ||
131 | sk, | ||
132 | DCCP_SKB_CB(skb)->dccpd_ack_seq, | ||
133 | len, value); | ||
134 | break; | ||
135 | case DCCPO_TIMESTAMP: | ||
136 | if (len != 4) | ||
137 | goto out_invalid_option; | ||
138 | |||
139 | opt_recv->dccpor_timestamp = ntohl(*(u32 *)value); | ||
140 | |||
141 | dp->dccps_timestamp_echo = opt_recv->dccpor_timestamp; | ||
142 | do_gettimeofday(&dp->dccps_timestamp_time); | ||
143 | |||
144 | dccp_pr_debug("%sTIMESTAMP=%u, ackno=%llu\n", | ||
145 | debug_prefix, opt_recv->dccpor_timestamp, | ||
146 | (unsigned long long) | ||
147 | DCCP_SKB_CB(skb)->dccpd_ack_seq); | ||
148 | break; | ||
149 | case DCCPO_TIMESTAMP_ECHO: | ||
150 | if (len != 4 && len != 6 && len != 8) | ||
151 | goto out_invalid_option; | ||
152 | |||
153 | opt_recv->dccpor_timestamp_echo = ntohl(*(u32 *)value); | ||
154 | |||
155 | dccp_pr_debug("%sTIMESTAMP_ECHO=%u, len=%d, ackno=%llu, ", | ||
156 | debug_prefix, | ||
157 | opt_recv->dccpor_timestamp_echo, | ||
158 | len + 2, | ||
159 | (unsigned long long) | ||
160 | DCCP_SKB_CB(skb)->dccpd_ack_seq); | ||
161 | |||
162 | if (len > 4) { | ||
163 | if (len == 6) | ||
164 | opt_recv->dccpor_elapsed_time = | ||
165 | ntohs(*(u16 *)(value + 4)); | ||
166 | else | ||
167 | opt_recv->dccpor_elapsed_time = | ||
168 | ntohl(*(u32 *)(value + 4)); | ||
169 | |||
170 | dccp_pr_debug("%sTIMESTAMP_ECHO ELAPSED_TIME=%d\n", | ||
171 | debug_prefix, | ||
172 | opt_recv->dccpor_elapsed_time); | ||
173 | } | ||
174 | break; | ||
175 | case DCCPO_ELAPSED_TIME: | ||
176 | if (len != 2 && len != 4) | ||
177 | goto out_invalid_option; | ||
178 | |||
179 | if (pkt_type == DCCP_PKT_DATA) | ||
180 | continue; | ||
181 | |||
182 | if (len == 2) | ||
183 | opt_recv->dccpor_elapsed_time = | ||
184 | ntohs(*(u16 *)value); | ||
185 | else | ||
186 | opt_recv->dccpor_elapsed_time = | ||
187 | ntohl(*(u32 *)value); | ||
188 | |||
189 | dccp_pr_debug("%sELAPSED_TIME=%d\n", debug_prefix, | ||
190 | opt_recv->dccpor_elapsed_time); | ||
191 | break; | ||
192 | /* | ||
193 | * From draft-ietf-dccp-spec-11.txt: | ||
194 | * | ||
195 | * Option numbers 128 through 191 are for | ||
196 | * options sent from the HC-Sender to the | ||
197 | * HC-Receiver; option numbers 192 through 255 | ||
198 | * are for options sent from the HC-Receiver to | ||
199 | * the HC-Sender. | ||
200 | */ | ||
201 | case 128 ... 191: { | ||
202 | const u16 idx = value - options; | ||
203 | |||
204 | if (ccid_hc_rx_parse_options(dp->dccps_hc_rx_ccid, sk, | ||
205 | opt, len, idx, | ||
206 | value) != 0) | ||
207 | goto out_invalid_option; | ||
208 | } | ||
209 | break; | ||
210 | case 192 ... 255: { | ||
211 | const u16 idx = value - options; | ||
212 | |||
213 | if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk, | ||
214 | opt, len, idx, | ||
215 | value) != 0) | ||
216 | goto out_invalid_option; | ||
217 | } | ||
218 | break; | ||
219 | default: | ||
220 | pr_info("DCCP(%p): option %d(len=%d) not " | ||
221 | "implemented, ignoring\n", | ||
222 | sk, opt, len); | ||
223 | break; | ||
224 | } | ||
225 | } | ||
226 | |||
227 | return 0; | ||
228 | |||
229 | out_invalid_option: | ||
230 | DCCP_INC_STATS_BH(DCCP_MIB_INVALIDOPT); | ||
231 | DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_OPTION_ERROR; | ||
232 | pr_info("DCCP(%p): invalid option %d, len=%d\n", sk, opt, len); | ||
233 | return -1; | ||
234 | } | ||
235 | |||
236 | static void dccp_encode_value_var(const u32 value, unsigned char *to, | ||
237 | const unsigned int len) | ||
238 | { | ||
239 | if (len > 3) | ||
240 | *to++ = (value & 0xFF000000) >> 24; | ||
241 | if (len > 2) | ||
242 | *to++ = (value & 0xFF0000) >> 16; | ||
243 | if (len > 1) | ||
244 | *to++ = (value & 0xFF00) >> 8; | ||
245 | if (len > 0) | ||
246 | *to++ = (value & 0xFF); | ||
247 | } | ||
248 | |||
249 | static inline int dccp_ndp_len(const int ndp) | ||
250 | { | ||
251 | return likely(ndp <= 0xFF) ? 1 : ndp <= 0xFFFF ? 2 : 3; | ||
252 | } | ||
253 | |||
254 | void dccp_insert_option(struct sock *sk, struct sk_buff *skb, | ||
255 | const unsigned char option, | ||
256 | const void *value, const unsigned char len) | ||
257 | { | ||
258 | unsigned char *to; | ||
259 | |||
260 | if (DCCP_SKB_CB(skb)->dccpd_opt_len + len + 2 > DCCP_MAX_OPT_LEN) { | ||
261 | LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to insert " | ||
262 | "%d option!\n", option); | ||
263 | return; | ||
264 | } | ||
265 | |||
266 | DCCP_SKB_CB(skb)->dccpd_opt_len += len + 2; | ||
267 | |||
268 | to = skb_push(skb, len + 2); | ||
269 | *to++ = option; | ||
270 | *to++ = len + 2; | ||
271 | |||
272 | memcpy(to, value, len); | ||
273 | } | ||
274 | |||
275 | EXPORT_SYMBOL_GPL(dccp_insert_option); | ||
276 | |||
277 | static void dccp_insert_option_ndp(struct sock *sk, struct sk_buff *skb) | ||
278 | { | ||
279 | struct dccp_sock *dp = dccp_sk(sk); | ||
280 | int ndp = dp->dccps_ndp_count; | ||
281 | |||
282 | if (dccp_non_data_packet(skb)) | ||
283 | ++dp->dccps_ndp_count; | ||
284 | else | ||
285 | dp->dccps_ndp_count = 0; | ||
286 | |||
287 | if (ndp > 0) { | ||
288 | unsigned char *ptr; | ||
289 | const int ndp_len = dccp_ndp_len(ndp); | ||
290 | const int len = ndp_len + 2; | ||
291 | |||
292 | if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) | ||
293 | return; | ||
294 | |||
295 | DCCP_SKB_CB(skb)->dccpd_opt_len += len; | ||
296 | |||
297 | ptr = skb_push(skb, len); | ||
298 | *ptr++ = DCCPO_NDP_COUNT; | ||
299 | *ptr++ = len; | ||
300 | dccp_encode_value_var(ndp, ptr, ndp_len); | ||
301 | } | ||
302 | } | ||
303 | |||
304 | static inline int dccp_elapsed_time_len(const u32 elapsed_time) | ||
305 | { | ||
306 | return elapsed_time == 0 ? 0 : elapsed_time <= 0xFFFF ? 2 : 4; | ||
307 | } | ||
308 | |||
309 | void dccp_insert_option_elapsed_time(struct sock *sk, | ||
310 | struct sk_buff *skb, | ||
311 | u32 elapsed_time) | ||
312 | { | ||
313 | #ifdef CONFIG_IP_DCCP_DEBUG | ||
314 | struct dccp_sock *dp = dccp_sk(sk); | ||
315 | const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? | ||
316 | "CLIENT TX opt: " : "server TX opt: "; | ||
317 | #endif | ||
318 | const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time); | ||
319 | const int len = 2 + elapsed_time_len; | ||
320 | unsigned char *to; | ||
321 | |||
322 | if (elapsed_time_len == 0) | ||
323 | return; | ||
324 | |||
325 | if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { | ||
326 | LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to " | ||
327 | "insert elapsed time!\n"); | ||
328 | return; | ||
329 | } | ||
330 | |||
331 | DCCP_SKB_CB(skb)->dccpd_opt_len += len; | ||
332 | |||
333 | to = skb_push(skb, len); | ||
334 | *to++ = DCCPO_ELAPSED_TIME; | ||
335 | *to++ = len; | ||
336 | |||
337 | if (elapsed_time_len == 2) { | ||
338 | const u16 var16 = htons((u16)elapsed_time); | ||
339 | memcpy(to, &var16, 2); | ||
340 | } else { | ||
341 | const u32 var32 = htonl(elapsed_time); | ||
342 | memcpy(to, &var32, 4); | ||
343 | } | ||
344 | |||
345 | dccp_pr_debug("%sELAPSED_TIME=%u, len=%d, seqno=%llu\n", | ||
346 | debug_prefix, elapsed_time, | ||
347 | len, | ||
348 | (unsigned long long) DCCP_SKB_CB(skb)->dccpd_seq); | ||
349 | } | ||
350 | |||
351 | EXPORT_SYMBOL_GPL(dccp_insert_option_elapsed_time); | ||
352 | |||
353 | static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb) | ||
354 | { | ||
355 | struct dccp_sock *dp = dccp_sk(sk); | ||
356 | #ifdef CONFIG_IP_DCCP_DEBUG | ||
357 | const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? | ||
358 | "CLIENT TX opt: " : "server TX opt: "; | ||
359 | #endif | ||
360 | struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; | ||
361 | int len = ap->dccpap_buf_vector_len + 2; | ||
362 | const u32 elapsed_time = timeval_now_delta(&ap->dccpap_time) / 10; | ||
363 | unsigned char *to, *from; | ||
364 | |||
365 | if (elapsed_time != 0) | ||
366 | dccp_insert_option_elapsed_time(sk, skb, elapsed_time); | ||
367 | |||
368 | if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { | ||
369 | LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to " | ||
370 | "insert ACK Vector!\n"); | ||
371 | return; | ||
372 | } | ||
373 | |||
374 | /* | ||
375 | * XXX: now we have just one ack vector sent record, so | ||
376 | * we have to wait for it to be cleared. | ||
377 | * | ||
378 | * Of course this is not acceptable, but this is just for | ||
379 | * basic testing now. | ||
380 | */ | ||
381 | if (ap->dccpap_ack_seqno != DCCP_MAX_SEQNO + 1) | ||
382 | return; | ||
383 | |||
384 | DCCP_SKB_CB(skb)->dccpd_opt_len += len; | ||
385 | |||
386 | to = skb_push(skb, len); | ||
387 | *to++ = DCCPO_ACK_VECTOR_0; | ||
388 | *to++ = len; | ||
389 | |||
390 | len = ap->dccpap_buf_vector_len; | ||
391 | from = ap->dccpap_buf + ap->dccpap_buf_head; | ||
392 | |||
393 | /* Check if buf_head wraps */ | ||
394 | if (ap->dccpap_buf_head + len > ap->dccpap_buf_len) { | ||
395 | const unsigned int tailsize = (ap->dccpap_buf_len - | ||
396 | ap->dccpap_buf_head); | ||
397 | |||
398 | memcpy(to, from, tailsize); | ||
399 | to += tailsize; | ||
400 | len -= tailsize; | ||
401 | from = ap->dccpap_buf; | ||
402 | } | ||
403 | |||
404 | memcpy(to, from, len); | ||
405 | /* | ||
406 | * From draft-ietf-dccp-spec-11.txt: | ||
407 | * | ||
408 | * For each acknowledgement it sends, the HC-Receiver will add an | ||
409 | * acknowledgement record. ack_seqno will equal the HC-Receiver | ||
410 | * sequence number it used for the ack packet; ack_ptr will equal | ||
411 | * buf_head; ack_ackno will equal buf_ackno; and ack_nonce will | ||
412 | * equal buf_nonce. | ||
413 | * | ||
414 | * This implemention uses just one ack record for now. | ||
415 | */ | ||
416 | ap->dccpap_ack_seqno = DCCP_SKB_CB(skb)->dccpd_seq; | ||
417 | ap->dccpap_ack_ptr = ap->dccpap_buf_head; | ||
418 | ap->dccpap_ack_ackno = ap->dccpap_buf_ackno; | ||
419 | ap->dccpap_ack_nonce = ap->dccpap_buf_nonce; | ||
420 | ap->dccpap_ack_vector_len = ap->dccpap_buf_vector_len; | ||
421 | |||
422 | dccp_pr_debug("%sACK Vector 0, len=%d, ack_seqno=%llu, " | ||
423 | "ack_ackno=%llu\n", | ||
424 | debug_prefix, ap->dccpap_ack_vector_len, | ||
425 | (unsigned long long) ap->dccpap_ack_seqno, | ||
426 | (unsigned long long) ap->dccpap_ack_ackno); | ||
427 | } | ||
428 | |||
429 | void dccp_insert_option_timestamp(struct sock *sk, struct sk_buff *skb) | ||
430 | { | ||
431 | struct timeval tv; | ||
432 | u32 now; | ||
433 | |||
434 | do_gettimeofday(&tv); | ||
435 | now = (tv.tv_sec * USEC_PER_SEC + tv.tv_usec) / 10; | ||
436 | /* yes this will overflow but that is the point as we want a | ||
437 | * 10 usec 32 bit timer which mean it wraps every 11.9 hours */ | ||
438 | |||
439 | now = htonl(now); | ||
440 | dccp_insert_option(sk, skb, DCCPO_TIMESTAMP, &now, sizeof(now)); | ||
441 | } | ||
442 | |||
443 | EXPORT_SYMBOL_GPL(dccp_insert_option_timestamp); | ||
444 | |||
445 | static void dccp_insert_option_timestamp_echo(struct sock *sk, | ||
446 | struct sk_buff *skb) | ||
447 | { | ||
448 | struct dccp_sock *dp = dccp_sk(sk); | ||
449 | #ifdef CONFIG_IP_DCCP_DEBUG | ||
450 | const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? | ||
451 | "CLIENT TX opt: " : "server TX opt: "; | ||
452 | #endif | ||
453 | u32 tstamp_echo; | ||
454 | const u32 elapsed_time = | ||
455 | timeval_now_delta(&dp->dccps_timestamp_time) / 10; | ||
456 | const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time); | ||
457 | const int len = 6 + elapsed_time_len; | ||
458 | unsigned char *to; | ||
459 | |||
460 | if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { | ||
461 | LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to insert " | ||
462 | "timestamp echo!\n"); | ||
463 | return; | ||
464 | } | ||
465 | |||
466 | DCCP_SKB_CB(skb)->dccpd_opt_len += len; | ||
467 | |||
468 | to = skb_push(skb, len); | ||
469 | *to++ = DCCPO_TIMESTAMP_ECHO; | ||
470 | *to++ = len; | ||
471 | |||
472 | tstamp_echo = htonl(dp->dccps_timestamp_echo); | ||
473 | memcpy(to, &tstamp_echo, 4); | ||
474 | to += 4; | ||
475 | |||
476 | if (elapsed_time_len == 2) { | ||
477 | const u16 var16 = htons((u16)elapsed_time); | ||
478 | memcpy(to, &var16, 2); | ||
479 | } else if (elapsed_time_len == 4) { | ||
480 | const u32 var32 = htonl(elapsed_time); | ||
481 | memcpy(to, &var32, 4); | ||
482 | } | ||
483 | |||
484 | dccp_pr_debug("%sTIMESTAMP_ECHO=%u, len=%d, seqno=%llu\n", | ||
485 | debug_prefix, dp->dccps_timestamp_echo, | ||
486 | len, | ||
487 | (unsigned long long) DCCP_SKB_CB(skb)->dccpd_seq); | ||
488 | |||
489 | dp->dccps_timestamp_echo = 0; | ||
490 | dp->dccps_timestamp_time.tv_sec = 0; | ||
491 | dp->dccps_timestamp_time.tv_usec = 0; | ||
492 | } | ||
493 | |||
494 | void dccp_insert_options(struct sock *sk, struct sk_buff *skb) | ||
495 | { | ||
496 | struct dccp_sock *dp = dccp_sk(sk); | ||
497 | |||
498 | DCCP_SKB_CB(skb)->dccpd_opt_len = 0; | ||
499 | |||
500 | if (dp->dccps_options.dccpo_send_ndp_count) | ||
501 | dccp_insert_option_ndp(sk, skb); | ||
502 | |||
503 | if (!dccp_packet_without_ack(skb)) { | ||
504 | if (dp->dccps_options.dccpo_send_ack_vector && | ||
505 | (dp->dccps_hc_rx_ackpkts->dccpap_buf_ackno != | ||
506 | DCCP_MAX_SEQNO + 1)) | ||
507 | dccp_insert_option_ack_vector(sk, skb); | ||
508 | |||
509 | if (dp->dccps_timestamp_echo != 0) | ||
510 | dccp_insert_option_timestamp_echo(sk, skb); | ||
511 | } | ||
512 | |||
513 | ccid_hc_rx_insert_options(dp->dccps_hc_rx_ccid, sk, skb); | ||
514 | ccid_hc_tx_insert_options(dp->dccps_hc_tx_ccid, sk, skb); | ||
515 | |||
516 | /* XXX: insert other options when appropriate */ | ||
517 | |||
518 | if (DCCP_SKB_CB(skb)->dccpd_opt_len != 0) { | ||
519 | /* The length of all options has to be a multiple of 4 */ | ||
520 | int padding = DCCP_SKB_CB(skb)->dccpd_opt_len % 4; | ||
521 | |||
522 | if (padding != 0) { | ||
523 | padding = 4 - padding; | ||
524 | memset(skb_push(skb, padding), 0, padding); | ||
525 | DCCP_SKB_CB(skb)->dccpd_opt_len += padding; | ||
526 | } | ||
527 | } | ||
528 | } | ||
529 | |||
530 | struct dccp_ackpkts *dccp_ackpkts_alloc(const unsigned int len, | ||
531 | const unsigned int __nocast priority) | ||
532 | { | ||
533 | struct dccp_ackpkts *ap = kmalloc(sizeof(*ap) + len, priority); | ||
534 | |||
535 | if (ap != NULL) { | ||
536 | #ifdef CONFIG_IP_DCCP_DEBUG | ||
537 | memset(ap->dccpap_buf, 0xFF, len); | ||
538 | #endif | ||
539 | ap->dccpap_buf_len = len; | ||
540 | ap->dccpap_buf_head = | ||
541 | ap->dccpap_buf_tail = | ||
542 | ap->dccpap_buf_len - 1; | ||
543 | ap->dccpap_buf_ackno = | ||
544 | ap->dccpap_ack_ackno = | ||
545 | ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; | ||
546 | ap->dccpap_buf_nonce = ap->dccpap_buf_nonce = 0; | ||
547 | ap->dccpap_ack_ptr = 0; | ||
548 | ap->dccpap_time.tv_sec = 0; | ||
549 | ap->dccpap_time.tv_usec = 0; | ||
550 | ap->dccpap_buf_vector_len = ap->dccpap_ack_vector_len = 0; | ||
551 | } | ||
552 | |||
553 | return ap; | ||
554 | } | ||
555 | |||
556 | void dccp_ackpkts_free(struct dccp_ackpkts *ap) | ||
557 | { | ||
558 | if (ap != NULL) { | ||
559 | #ifdef CONFIG_IP_DCCP_DEBUG | ||
560 | memset(ap, 0xFF, sizeof(*ap) + ap->dccpap_buf_len); | ||
561 | #endif | ||
562 | kfree(ap); | ||
563 | } | ||
564 | } | ||
565 | |||
566 | static inline u8 dccp_ackpkts_state(const struct dccp_ackpkts *ap, | ||
567 | const unsigned int index) | ||
568 | { | ||
569 | return ap->dccpap_buf[index] & DCCP_ACKPKTS_STATE_MASK; | ||
570 | } | ||
571 | |||
572 | static inline u8 dccp_ackpkts_len(const struct dccp_ackpkts *ap, | ||
573 | const unsigned int index) | ||
574 | { | ||
575 | return ap->dccpap_buf[index] & DCCP_ACKPKTS_LEN_MASK; | ||
576 | } | ||
577 | |||
578 | /* | ||
579 | * If several packets are missing, the HC-Receiver may prefer to enter multiple | ||
580 | * bytes with run length 0, rather than a single byte with a larger run length; | ||
581 | * this simplifies table updates if one of the missing packets arrives. | ||
582 | */ | ||
583 | static inline int dccp_ackpkts_set_buf_head_state(struct dccp_ackpkts *ap, | ||
584 | const unsigned int packets, | ||
585 | const unsigned char state) | ||
586 | { | ||
587 | unsigned int gap; | ||
588 | signed long new_head; | ||
589 | |||
590 | if (ap->dccpap_buf_vector_len + packets > ap->dccpap_buf_len) | ||
591 | return -ENOBUFS; | ||
592 | |||
593 | gap = packets - 1; | ||
594 | new_head = ap->dccpap_buf_head - packets; | ||
595 | |||
596 | if (new_head < 0) { | ||
597 | if (gap > 0) { | ||
598 | memset(ap->dccpap_buf, DCCP_ACKPKTS_STATE_NOT_RECEIVED, | ||
599 | gap + new_head + 1); | ||
600 | gap = -new_head; | ||
601 | } | ||
602 | new_head += ap->dccpap_buf_len; | ||
603 | } | ||
604 | |||
605 | ap->dccpap_buf_head = new_head; | ||
606 | |||
607 | if (gap > 0) | ||
608 | memset(ap->dccpap_buf + ap->dccpap_buf_head + 1, | ||
609 | DCCP_ACKPKTS_STATE_NOT_RECEIVED, gap); | ||
610 | |||
611 | ap->dccpap_buf[ap->dccpap_buf_head] = state; | ||
612 | ap->dccpap_buf_vector_len += packets; | ||
613 | return 0; | ||
614 | } | ||
615 | |||
616 | /* | ||
617 | * Implements the draft-ietf-dccp-spec-11.txt Appendix A | ||
618 | */ | ||
619 | int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state) | ||
620 | { | ||
621 | /* | ||
622 | * Check at the right places if the buffer is full, if it is, tell the | ||
623 | * caller to start dropping packets till the HC-Sender acks our ACK | ||
624 | * vectors, when we will free up space in dccpap_buf. | ||
625 | * | ||
626 | * We may well decide to do buffer compression, etc, but for now lets | ||
627 | * just drop. | ||
628 | * | ||
629 | * From Appendix A: | ||
630 | * | ||
631 | * Of course, the circular buffer may overflow, either when the | ||
632 | * HC-Sender is sending data at a very high rate, when the | ||
633 | * HC-Receiver's acknowledgements are not reaching the HC-Sender, | ||
634 | * or when the HC-Sender is forgetting to acknowledge those acks | ||
635 | * (so the HC-Receiver is unable to clean up old state). In this | ||
636 | * case, the HC-Receiver should either compress the buffer (by | ||
637 | * increasing run lengths when possible), transfer its state to | ||
638 | * a larger buffer, or, as a last resort, drop all received | ||
639 | * packets, without processing them whatsoever, until its buffer | ||
640 | * shrinks again. | ||
641 | */ | ||
642 | |||
643 | /* See if this is the first ackno being inserted */ | ||
644 | if (ap->dccpap_buf_vector_len == 0) { | ||
645 | ap->dccpap_buf[ap->dccpap_buf_head] = state; | ||
646 | ap->dccpap_buf_vector_len = 1; | ||
647 | } else if (after48(ackno, ap->dccpap_buf_ackno)) { | ||
648 | const u64 delta = dccp_delta_seqno(ap->dccpap_buf_ackno, | ||
649 | ackno); | ||
650 | |||
651 | /* | ||
652 | * Look if the state of this packet is the same as the | ||
653 | * previous ackno and if so if we can bump the head len. | ||
654 | */ | ||
655 | if (delta == 1 && | ||
656 | dccp_ackpkts_state(ap, ap->dccpap_buf_head) == state && | ||
657 | (dccp_ackpkts_len(ap, ap->dccpap_buf_head) < | ||
658 | DCCP_ACKPKTS_LEN_MASK)) | ||
659 | ap->dccpap_buf[ap->dccpap_buf_head]++; | ||
660 | else if (dccp_ackpkts_set_buf_head_state(ap, delta, state)) | ||
661 | return -ENOBUFS; | ||
662 | } else { | ||
663 | /* | ||
664 | * A.1.2. Old Packets | ||
665 | * | ||
666 | * When a packet with Sequence Number S arrives, and | ||
667 | * S <= buf_ackno, the HC-Receiver will scan the table | ||
668 | * for the byte corresponding to S. (Indexing structures | ||
669 | * could reduce the complexity of this scan.) | ||
670 | */ | ||
671 | u64 delta = dccp_delta_seqno(ackno, ap->dccpap_buf_ackno); | ||
672 | unsigned int index = ap->dccpap_buf_head; | ||
673 | |||
674 | while (1) { | ||
675 | const u8 len = dccp_ackpkts_len(ap, index); | ||
676 | const u8 state = dccp_ackpkts_state(ap, index); | ||
677 | /* | ||
678 | * valid packets not yet in dccpap_buf have a reserved | ||
679 | * entry, with a len equal to 0. | ||
680 | */ | ||
681 | if (state == DCCP_ACKPKTS_STATE_NOT_RECEIVED && | ||
682 | len == 0 && delta == 0) { /* Found our | ||
683 | reserved seat! */ | ||
684 | dccp_pr_debug("Found %llu reserved seat!\n", | ||
685 | (unsigned long long) ackno); | ||
686 | ap->dccpap_buf[index] = state; | ||
687 | goto out; | ||
688 | } | ||
689 | /* len == 0 means one packet */ | ||
690 | if (delta < len + 1) | ||
691 | goto out_duplicate; | ||
692 | |||
693 | delta -= len + 1; | ||
694 | if (++index == ap->dccpap_buf_len) | ||
695 | index = 0; | ||
696 | } | ||
697 | } | ||
698 | |||
699 | ap->dccpap_buf_ackno = ackno; | ||
700 | do_gettimeofday(&ap->dccpap_time); | ||
701 | out: | ||
702 | dccp_pr_debug(""); | ||
703 | dccp_ackpkts_print(ap); | ||
704 | return 0; | ||
705 | |||
706 | out_duplicate: | ||
707 | /* Duplicate packet */ | ||
708 | dccp_pr_debug("Received a dup or already considered lost " | ||
709 | "packet: %llu\n", (unsigned long long) ackno); | ||
710 | return -EILSEQ; | ||
711 | } | ||
712 | |||
713 | #ifdef CONFIG_IP_DCCP_DEBUG | ||
714 | void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, | ||
715 | int len) | ||
716 | { | ||
717 | if (!dccp_debug) | ||
718 | return; | ||
719 | |||
720 | printk("ACK vector len=%d, ackno=%llu |", len, | ||
721 | (unsigned long long) ackno); | ||
722 | |||
723 | while (len--) { | ||
724 | const u8 state = (*vector & DCCP_ACKPKTS_STATE_MASK) >> 6; | ||
725 | const u8 rl = (*vector & DCCP_ACKPKTS_LEN_MASK); | ||
726 | |||
727 | printk("%d,%d|", state, rl); | ||
728 | ++vector; | ||
729 | } | ||
730 | |||
731 | printk("\n"); | ||
732 | } | ||
733 | |||
734 | void dccp_ackpkts_print(const struct dccp_ackpkts *ap) | ||
735 | { | ||
736 | dccp_ackvector_print(ap->dccpap_buf_ackno, | ||
737 | ap->dccpap_buf + ap->dccpap_buf_head, | ||
738 | ap->dccpap_buf_vector_len); | ||
739 | } | ||
740 | #endif | ||
741 | |||
742 | static void dccp_ackpkts_trow_away_ack_record(struct dccp_ackpkts *ap) | ||
743 | { | ||
744 | /* | ||
745 | * As we're keeping track of the ack vector size | ||
746 | * (dccpap_buf_vector_len) and the sent ack vector size | ||
747 | * (dccpap_ack_vector_len) we don't need dccpap_buf_tail at all, but | ||
748 | * keep this code here as in the future we'll implement a vector of | ||
749 | * ack records, as suggested in draft-ietf-dccp-spec-11.txt | ||
750 | * Appendix A. -acme | ||
751 | */ | ||
752 | #if 0 | ||
753 | ap->dccpap_buf_tail = ap->dccpap_ack_ptr + 1; | ||
754 | if (ap->dccpap_buf_tail >= ap->dccpap_buf_len) | ||
755 | ap->dccpap_buf_tail -= ap->dccpap_buf_len; | ||
756 | #endif | ||
757 | ap->dccpap_buf_vector_len -= ap->dccpap_ack_vector_len; | ||
758 | } | ||
759 | |||
760 | void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap, struct sock *sk, | ||
761 | u64 ackno) | ||
762 | { | ||
763 | /* Check if we actually sent an ACK vector */ | ||
764 | if (ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1) | ||
765 | return; | ||
766 | |||
767 | if (ackno == ap->dccpap_ack_seqno) { | ||
768 | #ifdef CONFIG_IP_DCCP_DEBUG | ||
769 | struct dccp_sock *dp = dccp_sk(sk); | ||
770 | const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? | ||
771 | "CLIENT rx ack: " : "server rx ack: "; | ||
772 | #endif | ||
773 | dccp_pr_debug("%sACK packet 0, len=%d, ack_seqno=%llu, " | ||
774 | "ack_ackno=%llu, ACKED!\n", | ||
775 | debug_prefix, 1, | ||
776 | (unsigned long long) ap->dccpap_ack_seqno, | ||
777 | (unsigned long long) ap->dccpap_ack_ackno); | ||
778 | dccp_ackpkts_trow_away_ack_record(ap); | ||
779 | ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; | ||
780 | } | ||
781 | } | ||
782 | |||
783 | static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap, | ||
784 | struct sock *sk, u64 ackno, | ||
785 | const unsigned char len, | ||
786 | const unsigned char *vector) | ||
787 | { | ||
788 | unsigned char i; | ||
789 | |||
790 | /* Check if we actually sent an ACK vector */ | ||
791 | if (ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1) | ||
792 | return; | ||
793 | /* | ||
794 | * We're in the receiver half connection, so if the received an ACK | ||
795 | * vector ackno (e.g. 50) before dccpap_ack_seqno (e.g. 52), we're | ||
796 | * not interested. | ||
797 | * | ||
798 | * Extra explanation with example: | ||
799 | * | ||
800 | * if we received an ACK vector with ackno 50, it can only be acking | ||
801 | * 50, 49, 48, etc, not 52 (the seqno for the ACK vector we sent). | ||
802 | */ | ||
803 | /* dccp_pr_debug("is %llu < %llu? ", ackno, ap->dccpap_ack_seqno); */ | ||
804 | if (before48(ackno, ap->dccpap_ack_seqno)) { | ||
805 | /* dccp_pr_debug_cat("yes\n"); */ | ||
806 | return; | ||
807 | } | ||
808 | /* dccp_pr_debug_cat("no\n"); */ | ||
809 | |||
810 | i = len; | ||
811 | while (i--) { | ||
812 | const u8 rl = (*vector & DCCP_ACKPKTS_LEN_MASK); | ||
813 | u64 ackno_end_rl; | ||
814 | |||
815 | dccp_set_seqno(&ackno_end_rl, ackno - rl); | ||
816 | |||
817 | /* | ||
818 | * dccp_pr_debug("is %llu <= %llu <= %llu? ", ackno_end_rl, | ||
819 | * ap->dccpap_ack_seqno, ackno); | ||
820 | */ | ||
821 | if (between48(ap->dccpap_ack_seqno, ackno_end_rl, ackno)) { | ||
822 | const u8 state = (*vector & | ||
823 | DCCP_ACKPKTS_STATE_MASK) >> 6; | ||
824 | /* dccp_pr_debug_cat("yes\n"); */ | ||
825 | |||
826 | if (state != DCCP_ACKPKTS_STATE_NOT_RECEIVED) { | ||
827 | #ifdef CONFIG_IP_DCCP_DEBUG | ||
828 | struct dccp_sock *dp = dccp_sk(sk); | ||
829 | const char *debug_prefix = | ||
830 | dp->dccps_role == DCCP_ROLE_CLIENT ? | ||
831 | "CLIENT rx ack: " : "server rx ack: "; | ||
832 | #endif | ||
833 | dccp_pr_debug("%sACK vector 0, len=%d, " | ||
834 | "ack_seqno=%llu, ack_ackno=%llu, " | ||
835 | "ACKED!\n", | ||
836 | debug_prefix, len, | ||
837 | (unsigned long long) | ||
838 | ap->dccpap_ack_seqno, | ||
839 | (unsigned long long) | ||
840 | ap->dccpap_ack_ackno); | ||
841 | dccp_ackpkts_trow_away_ack_record(ap); | ||
842 | } | ||
843 | /* | ||
844 | * If dccpap_ack_seqno was not received, no problem | ||
845 | * we'll send another ACK vector. | ||
846 | */ | ||
847 | ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; | ||
848 | break; | ||
849 | } | ||
850 | /* dccp_pr_debug_cat("no\n"); */ | ||
851 | |||
852 | dccp_set_seqno(&ackno, ackno_end_rl - 1); | ||
853 | ++vector; | ||
854 | } | ||
855 | } | ||
diff --git a/net/dccp/output.c b/net/dccp/output.c new file mode 100644 index 000000000000..28de157a4326 --- /dev/null +++ b/net/dccp/output.c | |||
@@ -0,0 +1,528 @@ | |||
1 | /* | ||
2 | * net/dccp/output.c | ||
3 | * | ||
4 | * An implementation of the DCCP protocol | ||
5 | * Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License | ||
9 | * as published by the Free Software Foundation; either version | ||
10 | * 2 of the License, or (at your option) any later version. | ||
11 | */ | ||
12 | |||
13 | #include <linux/config.h> | ||
14 | #include <linux/dccp.h> | ||
15 | #include <linux/skbuff.h> | ||
16 | |||
17 | #include <net/sock.h> | ||
18 | |||
19 | #include "ccid.h" | ||
20 | #include "dccp.h" | ||
21 | |||
22 | static inline void dccp_event_ack_sent(struct sock *sk) | ||
23 | { | ||
24 | inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); | ||
25 | } | ||
26 | |||
27 | /* | ||
28 | * All SKB's seen here are completely headerless. It is our | ||
29 | * job to build the DCCP header, and pass the packet down to | ||
30 | * IP so it can do the same plus pass the packet off to the | ||
31 | * device. | ||
32 | */ | ||
33 | int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) | ||
34 | { | ||
35 | if (likely(skb != NULL)) { | ||
36 | const struct inet_sock *inet = inet_sk(sk); | ||
37 | struct dccp_sock *dp = dccp_sk(sk); | ||
38 | struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); | ||
39 | struct dccp_hdr *dh; | ||
40 | /* XXX For now we're using only 48 bits sequence numbers */ | ||
41 | const int dccp_header_size = sizeof(*dh) + | ||
42 | sizeof(struct dccp_hdr_ext) + | ||
43 | dccp_packet_hdr_len(dcb->dccpd_type); | ||
44 | int err, set_ack = 1; | ||
45 | u64 ackno = dp->dccps_gsr; | ||
46 | |||
47 | dccp_inc_seqno(&dp->dccps_gss); | ||
48 | |||
49 | switch (dcb->dccpd_type) { | ||
50 | case DCCP_PKT_DATA: | ||
51 | set_ack = 0; | ||
52 | break; | ||
53 | case DCCP_PKT_SYNC: | ||
54 | case DCCP_PKT_SYNCACK: | ||
55 | ackno = dcb->dccpd_seq; | ||
56 | break; | ||
57 | } | ||
58 | |||
59 | dcb->dccpd_seq = dp->dccps_gss; | ||
60 | dccp_insert_options(sk, skb); | ||
61 | |||
62 | skb->h.raw = skb_push(skb, dccp_header_size); | ||
63 | dh = dccp_hdr(skb); | ||
64 | /* | ||
65 | * Data packets are not cloned as they are never retransmitted | ||
66 | */ | ||
67 | if (skb_cloned(skb)) | ||
68 | skb_set_owner_w(skb, sk); | ||
69 | |||
70 | /* Build DCCP header and checksum it. */ | ||
71 | memset(dh, 0, dccp_header_size); | ||
72 | dh->dccph_type = dcb->dccpd_type; | ||
73 | dh->dccph_sport = inet->sport; | ||
74 | dh->dccph_dport = inet->dport; | ||
75 | dh->dccph_doff = (dccp_header_size + dcb->dccpd_opt_len) / 4; | ||
76 | dh->dccph_ccval = dcb->dccpd_ccval; | ||
77 | /* XXX For now we're using only 48 bits sequence numbers */ | ||
78 | dh->dccph_x = 1; | ||
79 | |||
80 | dp->dccps_awh = dp->dccps_gss; | ||
81 | dccp_hdr_set_seq(dh, dp->dccps_gss); | ||
82 | if (set_ack) | ||
83 | dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), ackno); | ||
84 | |||
85 | switch (dcb->dccpd_type) { | ||
86 | case DCCP_PKT_REQUEST: | ||
87 | dccp_hdr_request(skb)->dccph_req_service = | ||
88 | dcb->dccpd_service; | ||
89 | break; | ||
90 | case DCCP_PKT_RESET: | ||
91 | dccp_hdr_reset(skb)->dccph_reset_code = | ||
92 | dcb->dccpd_reset_code; | ||
93 | break; | ||
94 | } | ||
95 | |||
96 | dh->dccph_checksum = dccp_v4_checksum(skb, inet->saddr, | ||
97 | inet->daddr); | ||
98 | |||
99 | if (set_ack) | ||
100 | dccp_event_ack_sent(sk); | ||
101 | |||
102 | DCCP_INC_STATS(DCCP_MIB_OUTSEGS); | ||
103 | |||
104 | err = ip_queue_xmit(skb, 0); | ||
105 | if (err <= 0) | ||
106 | return err; | ||
107 | |||
108 | /* NET_XMIT_CN is special. It does not guarantee, | ||
109 | * that this packet is lost. It tells that device | ||
110 | * is about to start to drop packets or already | ||
111 | * drops some packets of the same priority and | ||
112 | * invokes us to send less aggressively. | ||
113 | */ | ||
114 | return err == NET_XMIT_CN ? 0 : err; | ||
115 | } | ||
116 | return -ENOBUFS; | ||
117 | } | ||
118 | |||
119 | unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu) | ||
120 | { | ||
121 | struct dccp_sock *dp = dccp_sk(sk); | ||
122 | int mss_now; | ||
123 | |||
124 | /* | ||
125 | * FIXME: we really should be using the af_specific thing to support | ||
126 | * IPv6. | ||
127 | * mss_now = pmtu - tp->af_specific->net_header_len - | ||
128 | * sizeof(struct dccp_hdr) - sizeof(struct dccp_hdr_ext); | ||
129 | */ | ||
130 | mss_now = pmtu - sizeof(struct iphdr) - sizeof(struct dccp_hdr) - | ||
131 | sizeof(struct dccp_hdr_ext); | ||
132 | |||
133 | /* Now subtract optional transport overhead */ | ||
134 | mss_now -= dp->dccps_ext_header_len; | ||
135 | |||
136 | /* | ||
137 | * FIXME: this should come from the CCID infrastructure, where, say, | ||
138 | * TFRC will say it wants TIMESTAMPS, ELAPSED time, etc, for now lets | ||
139 | * put a rough estimate for NDP + TIMESTAMP + TIMESTAMP_ECHO + ELAPSED | ||
140 | * TIME + TFRC_OPT_LOSS_EVENT_RATE + TFRC_OPT_RECEIVE_RATE + padding to | ||
141 | * make it a multiple of 4 | ||
142 | */ | ||
143 | |||
144 | mss_now -= ((5 + 6 + 10 + 6 + 6 + 6 + 3) / 4) * 4; | ||
145 | |||
146 | /* And store cached results */ | ||
147 | dp->dccps_pmtu_cookie = pmtu; | ||
148 | dp->dccps_mss_cache = mss_now; | ||
149 | |||
150 | return mss_now; | ||
151 | } | ||
152 | |||
153 | void dccp_write_space(struct sock *sk) | ||
154 | { | ||
155 | read_lock(&sk->sk_callback_lock); | ||
156 | |||
157 | if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) | ||
158 | wake_up_interruptible(sk->sk_sleep); | ||
159 | /* Should agree with poll, otherwise some programs break */ | ||
160 | if (sock_writeable(sk)) | ||
161 | sk_wake_async(sk, 2, POLL_OUT); | ||
162 | |||
163 | read_unlock(&sk->sk_callback_lock); | ||
164 | } | ||
165 | |||
166 | /** | ||
167 | * dccp_wait_for_ccid - Wait for ccid to tell us we can send a packet | ||
168 | * @sk: socket to wait for | ||
169 | * @timeo: for how long | ||
170 | */ | ||
171 | static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb, | ||
172 | long *timeo) | ||
173 | { | ||
174 | struct dccp_sock *dp = dccp_sk(sk); | ||
175 | DEFINE_WAIT(wait); | ||
176 | long delay; | ||
177 | int rc; | ||
178 | |||
179 | while (1) { | ||
180 | prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); | ||
181 | |||
182 | if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) | ||
183 | goto do_error; | ||
184 | if (!*timeo) | ||
185 | goto do_nonblock; | ||
186 | if (signal_pending(current)) | ||
187 | goto do_interrupted; | ||
188 | |||
189 | rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb, | ||
190 | skb->len); | ||
191 | if (rc <= 0) | ||
192 | break; | ||
193 | delay = msecs_to_jiffies(rc); | ||
194 | if (delay > *timeo || delay < 0) | ||
195 | goto do_nonblock; | ||
196 | |||
197 | sk->sk_write_pending++; | ||
198 | release_sock(sk); | ||
199 | *timeo -= schedule_timeout(delay); | ||
200 | lock_sock(sk); | ||
201 | sk->sk_write_pending--; | ||
202 | } | ||
203 | out: | ||
204 | finish_wait(sk->sk_sleep, &wait); | ||
205 | return rc; | ||
206 | |||
207 | do_error: | ||
208 | rc = -EPIPE; | ||
209 | goto out; | ||
210 | do_nonblock: | ||
211 | rc = -EAGAIN; | ||
212 | goto out; | ||
213 | do_interrupted: | ||
214 | rc = sock_intr_errno(*timeo); | ||
215 | goto out; | ||
216 | } | ||
217 | |||
218 | int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo) | ||
219 | { | ||
220 | const struct dccp_sock *dp = dccp_sk(sk); | ||
221 | int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb, | ||
222 | skb->len); | ||
223 | |||
224 | if (err > 0) | ||
225 | err = dccp_wait_for_ccid(sk, skb, timeo); | ||
226 | |||
227 | if (err == 0) { | ||
228 | const struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; | ||
229 | struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); | ||
230 | const int len = skb->len; | ||
231 | |||
232 | if (sk->sk_state == DCCP_PARTOPEN) { | ||
233 | /* See 8.1.5. Handshake Completion */ | ||
234 | inet_csk_schedule_ack(sk); | ||
235 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, | ||
236 | inet_csk(sk)->icsk_rto, | ||
237 | DCCP_RTO_MAX); | ||
238 | dcb->dccpd_type = DCCP_PKT_DATAACK; | ||
239 | /* | ||
240 | * FIXME: we really should have a | ||
241 | * dccps_ack_pending or use icsk. | ||
242 | */ | ||
243 | } else if (inet_csk_ack_scheduled(sk) || | ||
244 | dp->dccps_timestamp_echo != 0 || | ||
245 | (dp->dccps_options.dccpo_send_ack_vector && | ||
246 | ap->dccpap_buf_ackno != DCCP_MAX_SEQNO + 1 && | ||
247 | ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1)) | ||
248 | dcb->dccpd_type = DCCP_PKT_DATAACK; | ||
249 | else | ||
250 | dcb->dccpd_type = DCCP_PKT_DATA; | ||
251 | |||
252 | err = dccp_transmit_skb(sk, skb); | ||
253 | ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len); | ||
254 | } | ||
255 | |||
256 | return err; | ||
257 | } | ||
258 | |||
259 | int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | ||
260 | { | ||
261 | if (inet_sk_rebuild_header(sk) != 0) | ||
262 | return -EHOSTUNREACH; /* Routing failure or similar. */ | ||
263 | |||
264 | return dccp_transmit_skb(sk, (skb_cloned(skb) ? | ||
265 | pskb_copy(skb, GFP_ATOMIC): | ||
266 | skb_clone(skb, GFP_ATOMIC))); | ||
267 | } | ||
268 | |||
269 | struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, | ||
270 | struct request_sock *req) | ||
271 | { | ||
272 | struct dccp_hdr *dh; | ||
273 | const int dccp_header_size = sizeof(struct dccp_hdr) + | ||
274 | sizeof(struct dccp_hdr_ext) + | ||
275 | sizeof(struct dccp_hdr_response); | ||
276 | struct sk_buff *skb = sock_wmalloc(sk, MAX_HEADER + DCCP_MAX_OPT_LEN + | ||
277 | dccp_header_size, 1, | ||
278 | GFP_ATOMIC); | ||
279 | if (skb == NULL) | ||
280 | return NULL; | ||
281 | |||
282 | /* Reserve space for headers. */ | ||
283 | skb_reserve(skb, MAX_HEADER + DCCP_MAX_OPT_LEN + dccp_header_size); | ||
284 | |||
285 | skb->dst = dst_clone(dst); | ||
286 | skb->csum = 0; | ||
287 | |||
288 | DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE; | ||
289 | DCCP_SKB_CB(skb)->dccpd_seq = dccp_rsk(req)->dreq_iss; | ||
290 | dccp_insert_options(sk, skb); | ||
291 | |||
292 | skb->h.raw = skb_push(skb, dccp_header_size); | ||
293 | |||
294 | dh = dccp_hdr(skb); | ||
295 | memset(dh, 0, dccp_header_size); | ||
296 | |||
297 | dh->dccph_sport = inet_sk(sk)->sport; | ||
298 | dh->dccph_dport = inet_rsk(req)->rmt_port; | ||
299 | dh->dccph_doff = (dccp_header_size + | ||
300 | DCCP_SKB_CB(skb)->dccpd_opt_len) / 4; | ||
301 | dh->dccph_type = DCCP_PKT_RESPONSE; | ||
302 | dh->dccph_x = 1; | ||
303 | dccp_hdr_set_seq(dh, dccp_rsk(req)->dreq_iss); | ||
304 | dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dccp_rsk(req)->dreq_isr); | ||
305 | |||
306 | dh->dccph_checksum = dccp_v4_checksum(skb, inet_rsk(req)->loc_addr, | ||
307 | inet_rsk(req)->rmt_addr); | ||
308 | |||
309 | DCCP_INC_STATS(DCCP_MIB_OUTSEGS); | ||
310 | return skb; | ||
311 | } | ||
312 | |||
313 | struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst, | ||
314 | const enum dccp_reset_codes code) | ||
315 | |||
316 | { | ||
317 | struct dccp_hdr *dh; | ||
318 | struct dccp_sock *dp = dccp_sk(sk); | ||
319 | const int dccp_header_size = sizeof(struct dccp_hdr) + | ||
320 | sizeof(struct dccp_hdr_ext) + | ||
321 | sizeof(struct dccp_hdr_reset); | ||
322 | struct sk_buff *skb = sock_wmalloc(sk, MAX_HEADER + DCCP_MAX_OPT_LEN + | ||
323 | dccp_header_size, 1, | ||
324 | GFP_ATOMIC); | ||
325 | if (skb == NULL) | ||
326 | return NULL; | ||
327 | |||
328 | /* Reserve space for headers. */ | ||
329 | skb_reserve(skb, MAX_HEADER + DCCP_MAX_OPT_LEN + dccp_header_size); | ||
330 | |||
331 | skb->dst = dst_clone(dst); | ||
332 | skb->csum = 0; | ||
333 | |||
334 | dccp_inc_seqno(&dp->dccps_gss); | ||
335 | |||
336 | DCCP_SKB_CB(skb)->dccpd_reset_code = code; | ||
337 | DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESET; | ||
338 | DCCP_SKB_CB(skb)->dccpd_seq = dp->dccps_gss; | ||
339 | dccp_insert_options(sk, skb); | ||
340 | |||
341 | skb->h.raw = skb_push(skb, dccp_header_size); | ||
342 | |||
343 | dh = dccp_hdr(skb); | ||
344 | memset(dh, 0, dccp_header_size); | ||
345 | |||
346 | dh->dccph_sport = inet_sk(sk)->sport; | ||
347 | dh->dccph_dport = inet_sk(sk)->dport; | ||
348 | dh->dccph_doff = (dccp_header_size + | ||
349 | DCCP_SKB_CB(skb)->dccpd_opt_len) / 4; | ||
350 | dh->dccph_type = DCCP_PKT_RESET; | ||
351 | dh->dccph_x = 1; | ||
352 | dccp_hdr_set_seq(dh, dp->dccps_gss); | ||
353 | dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dp->dccps_gsr); | ||
354 | |||
355 | dccp_hdr_reset(skb)->dccph_reset_code = code; | ||
356 | |||
357 | dh->dccph_checksum = dccp_v4_checksum(skb, inet_sk(sk)->saddr, | ||
358 | inet_sk(sk)->daddr); | ||
359 | |||
360 | DCCP_INC_STATS(DCCP_MIB_OUTSEGS); | ||
361 | return skb; | ||
362 | } | ||
363 | |||
364 | /* | ||
365 | * Do all connect socket setups that can be done AF independent. | ||
366 | */ | ||
367 | static inline void dccp_connect_init(struct sock *sk) | ||
368 | { | ||
369 | struct dst_entry *dst = __sk_dst_get(sk); | ||
370 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
371 | |||
372 | sk->sk_err = 0; | ||
373 | sock_reset_flag(sk, SOCK_DONE); | ||
374 | |||
375 | dccp_sync_mss(sk, dst_mtu(dst)); | ||
376 | |||
377 | /* | ||
378 | * FIXME: set dp->{dccps_swh,dccps_swl}, with | ||
379 | * something like dccp_inc_seq | ||
380 | */ | ||
381 | |||
382 | icsk->icsk_retransmits = 0; | ||
383 | } | ||
384 | |||
385 | int dccp_connect(struct sock *sk) | ||
386 | { | ||
387 | struct sk_buff *skb; | ||
388 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
389 | |||
390 | dccp_connect_init(sk); | ||
391 | |||
392 | skb = alloc_skb(MAX_DCCP_HEADER + 15, sk->sk_allocation); | ||
393 | if (unlikely(skb == NULL)) | ||
394 | return -ENOBUFS; | ||
395 | |||
396 | /* Reserve space for headers. */ | ||
397 | skb_reserve(skb, MAX_DCCP_HEADER); | ||
398 | |||
399 | DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST; | ||
400 | /* FIXME: set service to something meaningful, coming | ||
401 | * from userspace*/ | ||
402 | DCCP_SKB_CB(skb)->dccpd_service = 0; | ||
403 | skb->csum = 0; | ||
404 | skb_set_owner_w(skb, sk); | ||
405 | |||
406 | BUG_TRAP(sk->sk_send_head == NULL); | ||
407 | sk->sk_send_head = skb; | ||
408 | dccp_transmit_skb(sk, skb_clone(skb, GFP_KERNEL)); | ||
409 | DCCP_INC_STATS(DCCP_MIB_ACTIVEOPENS); | ||
410 | |||
411 | /* Timer for repeating the REQUEST until an answer. */ | ||
412 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, | ||
413 | icsk->icsk_rto, DCCP_RTO_MAX); | ||
414 | return 0; | ||
415 | } | ||
416 | |||
417 | void dccp_send_ack(struct sock *sk) | ||
418 | { | ||
419 | /* If we have been reset, we may not send again. */ | ||
420 | if (sk->sk_state != DCCP_CLOSED) { | ||
421 | struct sk_buff *skb = alloc_skb(MAX_DCCP_HEADER, GFP_ATOMIC); | ||
422 | |||
423 | if (skb == NULL) { | ||
424 | inet_csk_schedule_ack(sk); | ||
425 | inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN; | ||
426 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, | ||
427 | TCP_DELACK_MAX, | ||
428 | DCCP_RTO_MAX); | ||
429 | return; | ||
430 | } | ||
431 | |||
432 | /* Reserve space for headers */ | ||
433 | skb_reserve(skb, MAX_DCCP_HEADER); | ||
434 | skb->csum = 0; | ||
435 | DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_ACK; | ||
436 | skb_set_owner_w(skb, sk); | ||
437 | dccp_transmit_skb(sk, skb); | ||
438 | } | ||
439 | } | ||
440 | |||
441 | EXPORT_SYMBOL_GPL(dccp_send_ack); | ||
442 | |||
443 | void dccp_send_delayed_ack(struct sock *sk) | ||
444 | { | ||
445 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
446 | /* | ||
447 | * FIXME: tune this timer. elapsed time fixes the skew, so no problem | ||
448 | * with using 2s, and active senders also piggyback the ACK into a | ||
449 | * DATAACK packet, so this is really for quiescent senders. | ||
450 | */ | ||
451 | unsigned long timeout = jiffies + 2 * HZ; | ||
452 | |||
453 | /* Use new timeout only if there wasn't a older one earlier. */ | ||
454 | if (icsk->icsk_ack.pending & ICSK_ACK_TIMER) { | ||
455 | /* If delack timer was blocked or is about to expire, | ||
456 | * send ACK now. | ||
457 | * | ||
458 | * FIXME: check the "about to expire" part | ||
459 | */ | ||
460 | if (icsk->icsk_ack.blocked) { | ||
461 | dccp_send_ack(sk); | ||
462 | return; | ||
463 | } | ||
464 | |||
465 | if (!time_before(timeout, icsk->icsk_ack.timeout)) | ||
466 | timeout = icsk->icsk_ack.timeout; | ||
467 | } | ||
468 | icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER; | ||
469 | icsk->icsk_ack.timeout = timeout; | ||
470 | sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout); | ||
471 | } | ||
472 | |||
473 | void dccp_send_sync(struct sock *sk, const u64 seq, | ||
474 | const enum dccp_pkt_type pkt_type) | ||
475 | { | ||
476 | /* | ||
477 | * We are not putting this on the write queue, so | ||
478 | * dccp_transmit_skb() will set the ownership to this | ||
479 | * sock. | ||
480 | */ | ||
481 | struct sk_buff *skb = alloc_skb(MAX_DCCP_HEADER, GFP_ATOMIC); | ||
482 | |||
483 | if (skb == NULL) | ||
484 | /* FIXME: how to make sure the sync is sent? */ | ||
485 | return; | ||
486 | |||
487 | /* Reserve space for headers and prepare control bits. */ | ||
488 | skb_reserve(skb, MAX_DCCP_HEADER); | ||
489 | skb->csum = 0; | ||
490 | DCCP_SKB_CB(skb)->dccpd_type = pkt_type; | ||
491 | DCCP_SKB_CB(skb)->dccpd_seq = seq; | ||
492 | |||
493 | skb_set_owner_w(skb, sk); | ||
494 | dccp_transmit_skb(sk, skb); | ||
495 | } | ||
496 | |||
497 | /* | ||
498 | * Send a DCCP_PKT_CLOSE/CLOSEREQ. The caller locks the socket for us. This | ||
499 | * cannot be allowed to fail queueing a DCCP_PKT_CLOSE/CLOSEREQ frame under | ||
500 | * any circumstances. | ||
501 | */ | ||
502 | void dccp_send_close(struct sock *sk, const int active) | ||
503 | { | ||
504 | struct dccp_sock *dp = dccp_sk(sk); | ||
505 | struct sk_buff *skb; | ||
506 | const unsigned int prio = active ? GFP_KERNEL : GFP_ATOMIC; | ||
507 | |||
508 | skb = alloc_skb(sk->sk_prot->max_header, prio); | ||
509 | if (skb == NULL) | ||
510 | return; | ||
511 | |||
512 | /* Reserve space for headers and prepare control bits. */ | ||
513 | skb_reserve(skb, sk->sk_prot->max_header); | ||
514 | skb->csum = 0; | ||
515 | DCCP_SKB_CB(skb)->dccpd_type = dp->dccps_role == DCCP_ROLE_CLIENT ? | ||
516 | DCCP_PKT_CLOSE : DCCP_PKT_CLOSEREQ; | ||
517 | |||
518 | skb_set_owner_w(skb, sk); | ||
519 | if (active) { | ||
520 | BUG_TRAP(sk->sk_send_head == NULL); | ||
521 | sk->sk_send_head = skb; | ||
522 | dccp_transmit_skb(sk, skb_clone(skb, prio)); | ||
523 | } else | ||
524 | dccp_transmit_skb(sk, skb); | ||
525 | |||
526 | ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk); | ||
527 | ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk); | ||
528 | } | ||
diff --git a/net/dccp/proto.c b/net/dccp/proto.c new file mode 100644 index 000000000000..18a0e69c9dc7 --- /dev/null +++ b/net/dccp/proto.c | |||
@@ -0,0 +1,826 @@ | |||
1 | /* | ||
2 | * net/dccp/proto.c | ||
3 | * | ||
4 | * An implementation of the DCCP protocol | ||
5 | * Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify it | ||
8 | * under the terms of the GNU General Public License version 2 as | ||
9 | * published by the Free Software Foundation. | ||
10 | */ | ||
11 | |||
12 | #include <linux/config.h> | ||
13 | #include <linux/dccp.h> | ||
14 | #include <linux/module.h> | ||
15 | #include <linux/types.h> | ||
16 | #include <linux/sched.h> | ||
17 | #include <linux/kernel.h> | ||
18 | #include <linux/skbuff.h> | ||
19 | #include <linux/netdevice.h> | ||
20 | #include <linux/in.h> | ||
21 | #include <linux/if_arp.h> | ||
22 | #include <linux/init.h> | ||
23 | #include <linux/random.h> | ||
24 | #include <net/checksum.h> | ||
25 | |||
26 | #include <net/inet_common.h> | ||
27 | #include <net/ip.h> | ||
28 | #include <net/protocol.h> | ||
29 | #include <net/sock.h> | ||
30 | #include <net/xfrm.h> | ||
31 | |||
32 | #include <asm/semaphore.h> | ||
33 | #include <linux/spinlock.h> | ||
34 | #include <linux/timer.h> | ||
35 | #include <linux/delay.h> | ||
36 | #include <linux/poll.h> | ||
37 | #include <linux/dccp.h> | ||
38 | |||
39 | #include "ccid.h" | ||
40 | #include "dccp.h" | ||
41 | |||
42 | DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly; | ||
43 | |||
44 | atomic_t dccp_orphan_count = ATOMIC_INIT(0); | ||
45 | |||
46 | static struct net_protocol dccp_protocol = { | ||
47 | .handler = dccp_v4_rcv, | ||
48 | .err_handler = dccp_v4_err, | ||
49 | }; | ||
50 | |||
51 | const char *dccp_packet_name(const int type) | ||
52 | { | ||
53 | static const char *dccp_packet_names[] = { | ||
54 | [DCCP_PKT_REQUEST] = "REQUEST", | ||
55 | [DCCP_PKT_RESPONSE] = "RESPONSE", | ||
56 | [DCCP_PKT_DATA] = "DATA", | ||
57 | [DCCP_PKT_ACK] = "ACK", | ||
58 | [DCCP_PKT_DATAACK] = "DATAACK", | ||
59 | [DCCP_PKT_CLOSEREQ] = "CLOSEREQ", | ||
60 | [DCCP_PKT_CLOSE] = "CLOSE", | ||
61 | [DCCP_PKT_RESET] = "RESET", | ||
62 | [DCCP_PKT_SYNC] = "SYNC", | ||
63 | [DCCP_PKT_SYNCACK] = "SYNCACK", | ||
64 | }; | ||
65 | |||
66 | if (type >= DCCP_NR_PKT_TYPES) | ||
67 | return "INVALID"; | ||
68 | else | ||
69 | return dccp_packet_names[type]; | ||
70 | } | ||
71 | |||
72 | EXPORT_SYMBOL_GPL(dccp_packet_name); | ||
73 | |||
74 | const char *dccp_state_name(const int state) | ||
75 | { | ||
76 | static char *dccp_state_names[] = { | ||
77 | [DCCP_OPEN] = "OPEN", | ||
78 | [DCCP_REQUESTING] = "REQUESTING", | ||
79 | [DCCP_PARTOPEN] = "PARTOPEN", | ||
80 | [DCCP_LISTEN] = "LISTEN", | ||
81 | [DCCP_RESPOND] = "RESPOND", | ||
82 | [DCCP_CLOSING] = "CLOSING", | ||
83 | [DCCP_TIME_WAIT] = "TIME_WAIT", | ||
84 | [DCCP_CLOSED] = "CLOSED", | ||
85 | }; | ||
86 | |||
87 | if (state >= DCCP_MAX_STATES) | ||
88 | return "INVALID STATE!"; | ||
89 | else | ||
90 | return dccp_state_names[state]; | ||
91 | } | ||
92 | |||
93 | EXPORT_SYMBOL_GPL(dccp_state_name); | ||
94 | |||
95 | static inline int dccp_listen_start(struct sock *sk) | ||
96 | { | ||
97 | dccp_sk(sk)->dccps_role = DCCP_ROLE_LISTEN; | ||
98 | return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE); | ||
99 | } | ||
100 | |||
101 | int dccp_disconnect(struct sock *sk, int flags) | ||
102 | { | ||
103 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
104 | struct inet_sock *inet = inet_sk(sk); | ||
105 | int err = 0; | ||
106 | const int old_state = sk->sk_state; | ||
107 | |||
108 | if (old_state != DCCP_CLOSED) | ||
109 | dccp_set_state(sk, DCCP_CLOSED); | ||
110 | |||
111 | /* ABORT function of RFC793 */ | ||
112 | if (old_state == DCCP_LISTEN) { | ||
113 | inet_csk_listen_stop(sk); | ||
114 | /* FIXME: do the active reset thing */ | ||
115 | } else if (old_state == DCCP_REQUESTING) | ||
116 | sk->sk_err = ECONNRESET; | ||
117 | |||
118 | dccp_clear_xmit_timers(sk); | ||
119 | __skb_queue_purge(&sk->sk_receive_queue); | ||
120 | if (sk->sk_send_head != NULL) { | ||
121 | __kfree_skb(sk->sk_send_head); | ||
122 | sk->sk_send_head = NULL; | ||
123 | } | ||
124 | |||
125 | inet->dport = 0; | ||
126 | |||
127 | if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) | ||
128 | inet_reset_saddr(sk); | ||
129 | |||
130 | sk->sk_shutdown = 0; | ||
131 | sock_reset_flag(sk, SOCK_DONE); | ||
132 | |||
133 | icsk->icsk_backoff = 0; | ||
134 | inet_csk_delack_init(sk); | ||
135 | __sk_dst_reset(sk); | ||
136 | |||
137 | BUG_TRAP(!inet->num || icsk->icsk_bind_hash); | ||
138 | |||
139 | sk->sk_error_report(sk); | ||
140 | return err; | ||
141 | } | ||
142 | |||
143 | /* | ||
144 | * Wait for a DCCP event. | ||
145 | * | ||
146 | * Note that we don't need to lock the socket, as the upper poll layers | ||
147 | * take care of normal races (between the test and the event) and we don't | ||
148 | * go look at any of the socket buffers directly. | ||
149 | */ | ||
150 | static unsigned int dccp_poll(struct file *file, struct socket *sock, | ||
151 | poll_table *wait) | ||
152 | { | ||
153 | unsigned int mask; | ||
154 | struct sock *sk = sock->sk; | ||
155 | |||
156 | poll_wait(file, sk->sk_sleep, wait); | ||
157 | if (sk->sk_state == DCCP_LISTEN) | ||
158 | return inet_csk_listen_poll(sk); | ||
159 | |||
160 | /* Socket is not locked. We are protected from async events | ||
161 | by poll logic and correct handling of state changes | ||
162 | made by another threads is impossible in any case. | ||
163 | */ | ||
164 | |||
165 | mask = 0; | ||
166 | if (sk->sk_err) | ||
167 | mask = POLLERR; | ||
168 | |||
169 | if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED) | ||
170 | mask |= POLLHUP; | ||
171 | if (sk->sk_shutdown & RCV_SHUTDOWN) | ||
172 | mask |= POLLIN | POLLRDNORM; | ||
173 | |||
174 | /* Connected? */ | ||
175 | if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) { | ||
176 | if (atomic_read(&sk->sk_rmem_alloc) > 0) | ||
177 | mask |= POLLIN | POLLRDNORM; | ||
178 | |||
179 | if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { | ||
180 | if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) { | ||
181 | mask |= POLLOUT | POLLWRNORM; | ||
182 | } else { /* send SIGIO later */ | ||
183 | set_bit(SOCK_ASYNC_NOSPACE, | ||
184 | &sk->sk_socket->flags); | ||
185 | set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); | ||
186 | |||
187 | /* Race breaker. If space is freed after | ||
188 | * wspace test but before the flags are set, | ||
189 | * IO signal will be lost. | ||
190 | */ | ||
191 | if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) | ||
192 | mask |= POLLOUT | POLLWRNORM; | ||
193 | } | ||
194 | } | ||
195 | } | ||
196 | return mask; | ||
197 | } | ||
198 | |||
199 | int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg) | ||
200 | { | ||
201 | dccp_pr_debug("entry\n"); | ||
202 | return -ENOIOCTLCMD; | ||
203 | } | ||
204 | |||
205 | int dccp_setsockopt(struct sock *sk, int level, int optname, | ||
206 | char __user *optval, int optlen) | ||
207 | { | ||
208 | struct dccp_sock *dp; | ||
209 | int err; | ||
210 | int val; | ||
211 | |||
212 | if (level != SOL_DCCP) | ||
213 | return ip_setsockopt(sk, level, optname, optval, optlen); | ||
214 | |||
215 | if (optlen < sizeof(int)) | ||
216 | return -EINVAL; | ||
217 | |||
218 | if (get_user(val, (int __user *)optval)) | ||
219 | return -EFAULT; | ||
220 | |||
221 | lock_sock(sk); | ||
222 | |||
223 | dp = dccp_sk(sk); | ||
224 | err = 0; | ||
225 | |||
226 | switch (optname) { | ||
227 | case DCCP_SOCKOPT_PACKET_SIZE: | ||
228 | dp->dccps_packet_size = val; | ||
229 | break; | ||
230 | default: | ||
231 | err = -ENOPROTOOPT; | ||
232 | break; | ||
233 | } | ||
234 | |||
235 | release_sock(sk); | ||
236 | return err; | ||
237 | } | ||
238 | |||
239 | int dccp_getsockopt(struct sock *sk, int level, int optname, | ||
240 | char __user *optval, int __user *optlen) | ||
241 | { | ||
242 | struct dccp_sock *dp; | ||
243 | int val, len; | ||
244 | |||
245 | if (level != SOL_DCCP) | ||
246 | return ip_getsockopt(sk, level, optname, optval, optlen); | ||
247 | |||
248 | if (get_user(len, optlen)) | ||
249 | return -EFAULT; | ||
250 | |||
251 | len = min_t(unsigned int, len, sizeof(int)); | ||
252 | if (len < 0) | ||
253 | return -EINVAL; | ||
254 | |||
255 | dp = dccp_sk(sk); | ||
256 | |||
257 | switch (optname) { | ||
258 | case DCCP_SOCKOPT_PACKET_SIZE: | ||
259 | val = dp->dccps_packet_size; | ||
260 | break; | ||
261 | default: | ||
262 | return -ENOPROTOOPT; | ||
263 | } | ||
264 | |||
265 | if (put_user(len, optlen) || copy_to_user(optval, &val, len)) | ||
266 | return -EFAULT; | ||
267 | |||
268 | return 0; | ||
269 | } | ||
270 | |||
271 | int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | ||
272 | size_t len) | ||
273 | { | ||
274 | const struct dccp_sock *dp = dccp_sk(sk); | ||
275 | const int flags = msg->msg_flags; | ||
276 | const int noblock = flags & MSG_DONTWAIT; | ||
277 | struct sk_buff *skb; | ||
278 | int rc, size; | ||
279 | long timeo; | ||
280 | |||
281 | if (len > dp->dccps_mss_cache) | ||
282 | return -EMSGSIZE; | ||
283 | |||
284 | lock_sock(sk); | ||
285 | timeo = sock_sndtimeo(sk, noblock); | ||
286 | |||
287 | /* | ||
288 | * We have to use sk_stream_wait_connect here to set sk_write_pending, | ||
289 | * so that the trick in dccp_rcv_request_sent_state_process. | ||
290 | */ | ||
291 | /* Wait for a connection to finish. */ | ||
292 | if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING)) | ||
293 | if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0) | ||
294 | goto out_release; | ||
295 | |||
296 | size = sk->sk_prot->max_header + len; | ||
297 | release_sock(sk); | ||
298 | skb = sock_alloc_send_skb(sk, size, noblock, &rc); | ||
299 | lock_sock(sk); | ||
300 | if (skb == NULL) | ||
301 | goto out_release; | ||
302 | |||
303 | skb_reserve(skb, sk->sk_prot->max_header); | ||
304 | rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len); | ||
305 | if (rc != 0) | ||
306 | goto out_discard; | ||
307 | |||
308 | rc = dccp_write_xmit(sk, skb, &timeo); | ||
309 | /* | ||
310 | * XXX we don't use sk_write_queue, so just discard the packet. | ||
311 | * Current plan however is to _use_ sk_write_queue with | ||
312 | * an algorith similar to tcp_sendmsg, where the main difference | ||
313 | * is that in DCCP we have to respect packet boundaries, so | ||
314 | * no coalescing of skbs. | ||
315 | * | ||
316 | * This bug was _quickly_ found & fixed by just looking at an OSTRA | ||
317 | * generated callgraph 8) -acme | ||
318 | */ | ||
319 | if (rc != 0) | ||
320 | goto out_discard; | ||
321 | out_release: | ||
322 | release_sock(sk); | ||
323 | return rc ? : len; | ||
324 | out_discard: | ||
325 | kfree_skb(skb); | ||
326 | goto out_release; | ||
327 | } | ||
328 | |||
329 | int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | ||
330 | size_t len, int nonblock, int flags, int *addr_len) | ||
331 | { | ||
332 | const struct dccp_hdr *dh; | ||
333 | long timeo; | ||
334 | |||
335 | lock_sock(sk); | ||
336 | |||
337 | if (sk->sk_state == DCCP_LISTEN) { | ||
338 | len = -ENOTCONN; | ||
339 | goto out; | ||
340 | } | ||
341 | |||
342 | timeo = sock_rcvtimeo(sk, nonblock); | ||
343 | |||
344 | do { | ||
345 | struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); | ||
346 | |||
347 | if (skb == NULL) | ||
348 | goto verify_sock_status; | ||
349 | |||
350 | dh = dccp_hdr(skb); | ||
351 | |||
352 | if (dh->dccph_type == DCCP_PKT_DATA || | ||
353 | dh->dccph_type == DCCP_PKT_DATAACK) | ||
354 | goto found_ok_skb; | ||
355 | |||
356 | if (dh->dccph_type == DCCP_PKT_RESET || | ||
357 | dh->dccph_type == DCCP_PKT_CLOSE) { | ||
358 | dccp_pr_debug("found fin ok!\n"); | ||
359 | len = 0; | ||
360 | goto found_fin_ok; | ||
361 | } | ||
362 | dccp_pr_debug("packet_type=%s\n", | ||
363 | dccp_packet_name(dh->dccph_type)); | ||
364 | sk_eat_skb(sk, skb); | ||
365 | verify_sock_status: | ||
366 | if (sock_flag(sk, SOCK_DONE)) { | ||
367 | len = 0; | ||
368 | break; | ||
369 | } | ||
370 | |||
371 | if (sk->sk_err) { | ||
372 | len = sock_error(sk); | ||
373 | break; | ||
374 | } | ||
375 | |||
376 | if (sk->sk_shutdown & RCV_SHUTDOWN) { | ||
377 | len = 0; | ||
378 | break; | ||
379 | } | ||
380 | |||
381 | if (sk->sk_state == DCCP_CLOSED) { | ||
382 | if (!sock_flag(sk, SOCK_DONE)) { | ||
383 | /* This occurs when user tries to read | ||
384 | * from never connected socket. | ||
385 | */ | ||
386 | len = -ENOTCONN; | ||
387 | break; | ||
388 | } | ||
389 | len = 0; | ||
390 | break; | ||
391 | } | ||
392 | |||
393 | if (!timeo) { | ||
394 | len = -EAGAIN; | ||
395 | break; | ||
396 | } | ||
397 | |||
398 | if (signal_pending(current)) { | ||
399 | len = sock_intr_errno(timeo); | ||
400 | break; | ||
401 | } | ||
402 | |||
403 | sk_wait_data(sk, &timeo); | ||
404 | continue; | ||
405 | found_ok_skb: | ||
406 | if (len > skb->len) | ||
407 | len = skb->len; | ||
408 | else if (len < skb->len) | ||
409 | msg->msg_flags |= MSG_TRUNC; | ||
410 | |||
411 | if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) { | ||
412 | /* Exception. Bailout! */ | ||
413 | len = -EFAULT; | ||
414 | break; | ||
415 | } | ||
416 | found_fin_ok: | ||
417 | if (!(flags & MSG_PEEK)) | ||
418 | sk_eat_skb(sk, skb); | ||
419 | break; | ||
420 | } while (1); | ||
421 | out: | ||
422 | release_sock(sk); | ||
423 | return len; | ||
424 | } | ||
425 | |||
426 | static int inet_dccp_listen(struct socket *sock, int backlog) | ||
427 | { | ||
428 | struct sock *sk = sock->sk; | ||
429 | unsigned char old_state; | ||
430 | int err; | ||
431 | |||
432 | lock_sock(sk); | ||
433 | |||
434 | err = -EINVAL; | ||
435 | if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP) | ||
436 | goto out; | ||
437 | |||
438 | old_state = sk->sk_state; | ||
439 | if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN))) | ||
440 | goto out; | ||
441 | |||
442 | /* Really, if the socket is already in listen state | ||
443 | * we can only allow the backlog to be adjusted. | ||
444 | */ | ||
445 | if (old_state != DCCP_LISTEN) { | ||
446 | /* | ||
447 | * FIXME: here it probably should be sk->sk_prot->listen_start | ||
448 | * see tcp_listen_start | ||
449 | */ | ||
450 | err = dccp_listen_start(sk); | ||
451 | if (err) | ||
452 | goto out; | ||
453 | } | ||
454 | sk->sk_max_ack_backlog = backlog; | ||
455 | err = 0; | ||
456 | |||
457 | out: | ||
458 | release_sock(sk); | ||
459 | return err; | ||
460 | } | ||
461 | |||
462 | static const unsigned char dccp_new_state[] = { | ||
463 | /* current state: new state: action: */ | ||
464 | [0] = DCCP_CLOSED, | ||
465 | [DCCP_OPEN] = DCCP_CLOSING | DCCP_ACTION_FIN, | ||
466 | [DCCP_REQUESTING] = DCCP_CLOSED, | ||
467 | [DCCP_PARTOPEN] = DCCP_CLOSING | DCCP_ACTION_FIN, | ||
468 | [DCCP_LISTEN] = DCCP_CLOSED, | ||
469 | [DCCP_RESPOND] = DCCP_CLOSED, | ||
470 | [DCCP_CLOSING] = DCCP_CLOSED, | ||
471 | [DCCP_TIME_WAIT] = DCCP_CLOSED, | ||
472 | [DCCP_CLOSED] = DCCP_CLOSED, | ||
473 | }; | ||
474 | |||
475 | static int dccp_close_state(struct sock *sk) | ||
476 | { | ||
477 | const int next = dccp_new_state[sk->sk_state]; | ||
478 | const int ns = next & DCCP_STATE_MASK; | ||
479 | |||
480 | if (ns != sk->sk_state) | ||
481 | dccp_set_state(sk, ns); | ||
482 | |||
483 | return next & DCCP_ACTION_FIN; | ||
484 | } | ||
485 | |||
486 | void dccp_close(struct sock *sk, long timeout) | ||
487 | { | ||
488 | struct sk_buff *skb; | ||
489 | |||
490 | lock_sock(sk); | ||
491 | |||
492 | sk->sk_shutdown = SHUTDOWN_MASK; | ||
493 | |||
494 | if (sk->sk_state == DCCP_LISTEN) { | ||
495 | dccp_set_state(sk, DCCP_CLOSED); | ||
496 | |||
497 | /* Special case. */ | ||
498 | inet_csk_listen_stop(sk); | ||
499 | |||
500 | goto adjudge_to_death; | ||
501 | } | ||
502 | |||
503 | /* | ||
504 | * We need to flush the recv. buffs. We do this only on the | ||
505 | * descriptor close, not protocol-sourced closes, because the | ||
506 | *reader process may not have drained the data yet! | ||
507 | */ | ||
508 | /* FIXME: check for unread data */ | ||
509 | while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) { | ||
510 | __kfree_skb(skb); | ||
511 | } | ||
512 | |||
513 | if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { | ||
514 | /* Check zero linger _after_ checking for unread data. */ | ||
515 | sk->sk_prot->disconnect(sk, 0); | ||
516 | } else if (dccp_close_state(sk)) { | ||
517 | dccp_send_close(sk, 1); | ||
518 | } | ||
519 | |||
520 | sk_stream_wait_close(sk, timeout); | ||
521 | |||
522 | adjudge_to_death: | ||
523 | /* | ||
524 | * It is the last release_sock in its life. It will remove backlog. | ||
525 | */ | ||
526 | release_sock(sk); | ||
527 | /* | ||
528 | * Now socket is owned by kernel and we acquire BH lock | ||
529 | * to finish close. No need to check for user refs. | ||
530 | */ | ||
531 | local_bh_disable(); | ||
532 | bh_lock_sock(sk); | ||
533 | BUG_TRAP(!sock_owned_by_user(sk)); | ||
534 | |||
535 | sock_hold(sk); | ||
536 | sock_orphan(sk); | ||
537 | |||
538 | /* | ||
539 | * The last release_sock may have processed the CLOSE or RESET | ||
540 | * packet moving sock to CLOSED state, if not we have to fire | ||
541 | * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination" | ||
542 | * in draft-ietf-dccp-spec-11. -acme | ||
543 | */ | ||
544 | if (sk->sk_state == DCCP_CLOSING) { | ||
545 | /* FIXME: should start at 2 * RTT */ | ||
546 | /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */ | ||
547 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, | ||
548 | inet_csk(sk)->icsk_rto, | ||
549 | DCCP_RTO_MAX); | ||
550 | #if 0 | ||
551 | /* Yeah, we should use sk->sk_prot->orphan_count, etc */ | ||
552 | dccp_set_state(sk, DCCP_CLOSED); | ||
553 | #endif | ||
554 | } | ||
555 | |||
556 | atomic_inc(sk->sk_prot->orphan_count); | ||
557 | if (sk->sk_state == DCCP_CLOSED) | ||
558 | inet_csk_destroy_sock(sk); | ||
559 | |||
560 | /* Otherwise, socket is reprieved until protocol close. */ | ||
561 | |||
562 | bh_unlock_sock(sk); | ||
563 | local_bh_enable(); | ||
564 | sock_put(sk); | ||
565 | } | ||
566 | |||
567 | void dccp_shutdown(struct sock *sk, int how) | ||
568 | { | ||
569 | dccp_pr_debug("entry\n"); | ||
570 | } | ||
571 | |||
572 | static struct proto_ops inet_dccp_ops = { | ||
573 | .family = PF_INET, | ||
574 | .owner = THIS_MODULE, | ||
575 | .release = inet_release, | ||
576 | .bind = inet_bind, | ||
577 | .connect = inet_stream_connect, | ||
578 | .socketpair = sock_no_socketpair, | ||
579 | .accept = inet_accept, | ||
580 | .getname = inet_getname, | ||
581 | /* FIXME: work on tcp_poll to rename it to inet_csk_poll */ | ||
582 | .poll = dccp_poll, | ||
583 | .ioctl = inet_ioctl, | ||
584 | /* FIXME: work on inet_listen to rename it to sock_common_listen */ | ||
585 | .listen = inet_dccp_listen, | ||
586 | .shutdown = inet_shutdown, | ||
587 | .setsockopt = sock_common_setsockopt, | ||
588 | .getsockopt = sock_common_getsockopt, | ||
589 | .sendmsg = inet_sendmsg, | ||
590 | .recvmsg = sock_common_recvmsg, | ||
591 | .mmap = sock_no_mmap, | ||
592 | .sendpage = sock_no_sendpage, | ||
593 | }; | ||
594 | |||
595 | extern struct net_proto_family inet_family_ops; | ||
596 | |||
597 | static struct inet_protosw dccp_v4_protosw = { | ||
598 | .type = SOCK_DCCP, | ||
599 | .protocol = IPPROTO_DCCP, | ||
600 | .prot = &dccp_v4_prot, | ||
601 | .ops = &inet_dccp_ops, | ||
602 | .capability = -1, | ||
603 | .no_check = 0, | ||
604 | .flags = 0, | ||
605 | }; | ||
606 | |||
607 | /* | ||
608 | * This is the global socket data structure used for responding to | ||
609 | * the Out-of-the-blue (OOTB) packets. A control sock will be created | ||
610 | * for this socket at the initialization time. | ||
611 | */ | ||
612 | struct socket *dccp_ctl_socket; | ||
613 | |||
614 | static char dccp_ctl_socket_err_msg[] __initdata = | ||
615 | KERN_ERR "DCCP: Failed to create the control socket.\n"; | ||
616 | |||
617 | static int __init dccp_ctl_sock_init(void) | ||
618 | { | ||
619 | int rc = sock_create_kern(PF_INET, SOCK_DCCP, IPPROTO_DCCP, | ||
620 | &dccp_ctl_socket); | ||
621 | if (rc < 0) | ||
622 | printk(dccp_ctl_socket_err_msg); | ||
623 | else { | ||
624 | dccp_ctl_socket->sk->sk_allocation = GFP_ATOMIC; | ||
625 | inet_sk(dccp_ctl_socket->sk)->uc_ttl = -1; | ||
626 | |||
627 | /* Unhash it so that IP input processing does not even | ||
628 | * see it, we do not wish this socket to see incoming | ||
629 | * packets. | ||
630 | */ | ||
631 | dccp_ctl_socket->sk->sk_prot->unhash(dccp_ctl_socket->sk); | ||
632 | } | ||
633 | |||
634 | return rc; | ||
635 | } | ||
636 | |||
637 | #ifdef CONFIG_IP_DCCP_UNLOAD_HACK | ||
638 | void dccp_ctl_sock_exit(void) | ||
639 | { | ||
640 | if (dccp_ctl_socket != NULL) { | ||
641 | sock_release(dccp_ctl_socket); | ||
642 | dccp_ctl_socket = NULL; | ||
643 | } | ||
644 | } | ||
645 | |||
646 | EXPORT_SYMBOL_GPL(dccp_ctl_sock_exit); | ||
647 | #endif | ||
648 | |||
649 | static int __init init_dccp_v4_mibs(void) | ||
650 | { | ||
651 | int rc = -ENOMEM; | ||
652 | |||
653 | dccp_statistics[0] = alloc_percpu(struct dccp_mib); | ||
654 | if (dccp_statistics[0] == NULL) | ||
655 | goto out; | ||
656 | |||
657 | dccp_statistics[1] = alloc_percpu(struct dccp_mib); | ||
658 | if (dccp_statistics[1] == NULL) | ||
659 | goto out_free_one; | ||
660 | |||
661 | rc = 0; | ||
662 | out: | ||
663 | return rc; | ||
664 | out_free_one: | ||
665 | free_percpu(dccp_statistics[0]); | ||
666 | dccp_statistics[0] = NULL; | ||
667 | goto out; | ||
668 | |||
669 | } | ||
670 | |||
671 | static int thash_entries; | ||
672 | module_param(thash_entries, int, 0444); | ||
673 | MODULE_PARM_DESC(thash_entries, "Number of ehash buckets"); | ||
674 | |||
675 | #ifdef CONFIG_IP_DCCP_DEBUG | ||
676 | int dccp_debug; | ||
677 | module_param(dccp_debug, int, 0444); | ||
678 | MODULE_PARM_DESC(dccp_debug, "Enable debug messages"); | ||
679 | #endif | ||
680 | |||
681 | static int __init dccp_init(void) | ||
682 | { | ||
683 | unsigned long goal; | ||
684 | int ehash_order, bhash_order, i; | ||
685 | int rc = proto_register(&dccp_v4_prot, 1); | ||
686 | |||
687 | if (rc) | ||
688 | goto out; | ||
689 | |||
690 | dccp_hashinfo.bind_bucket_cachep = | ||
691 | kmem_cache_create("dccp_bind_bucket", | ||
692 | sizeof(struct inet_bind_bucket), 0, | ||
693 | SLAB_HWCACHE_ALIGN, NULL, NULL); | ||
694 | if (!dccp_hashinfo.bind_bucket_cachep) | ||
695 | goto out_proto_unregister; | ||
696 | |||
697 | /* | ||
698 | * Size and allocate the main established and bind bucket | ||
699 | * hash tables. | ||
700 | * | ||
701 | * The methodology is similar to that of the buffer cache. | ||
702 | */ | ||
703 | if (num_physpages >= (128 * 1024)) | ||
704 | goal = num_physpages >> (21 - PAGE_SHIFT); | ||
705 | else | ||
706 | goal = num_physpages >> (23 - PAGE_SHIFT); | ||
707 | |||
708 | if (thash_entries) | ||
709 | goal = (thash_entries * | ||
710 | sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT; | ||
711 | for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++) | ||
712 | ; | ||
713 | do { | ||
714 | dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE / | ||
715 | sizeof(struct inet_ehash_bucket); | ||
716 | dccp_hashinfo.ehash_size >>= 1; | ||
717 | while (dccp_hashinfo.ehash_size & | ||
718 | (dccp_hashinfo.ehash_size - 1)) | ||
719 | dccp_hashinfo.ehash_size--; | ||
720 | dccp_hashinfo.ehash = (struct inet_ehash_bucket *) | ||
721 | __get_free_pages(GFP_ATOMIC, ehash_order); | ||
722 | } while (!dccp_hashinfo.ehash && --ehash_order > 0); | ||
723 | |||
724 | if (!dccp_hashinfo.ehash) { | ||
725 | printk(KERN_CRIT "Failed to allocate DCCP " | ||
726 | "established hash table\n"); | ||
727 | goto out_free_bind_bucket_cachep; | ||
728 | } | ||
729 | |||
730 | for (i = 0; i < (dccp_hashinfo.ehash_size << 1); i++) { | ||
731 | rwlock_init(&dccp_hashinfo.ehash[i].lock); | ||
732 | INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain); | ||
733 | } | ||
734 | |||
735 | bhash_order = ehash_order; | ||
736 | |||
737 | do { | ||
738 | dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE / | ||
739 | sizeof(struct inet_bind_hashbucket); | ||
740 | if ((dccp_hashinfo.bhash_size > (64 * 1024)) && | ||
741 | bhash_order > 0) | ||
742 | continue; | ||
743 | dccp_hashinfo.bhash = (struct inet_bind_hashbucket *) | ||
744 | __get_free_pages(GFP_ATOMIC, bhash_order); | ||
745 | } while (!dccp_hashinfo.bhash && --bhash_order >= 0); | ||
746 | |||
747 | if (!dccp_hashinfo.bhash) { | ||
748 | printk(KERN_CRIT "Failed to allocate DCCP bind hash table\n"); | ||
749 | goto out_free_dccp_ehash; | ||
750 | } | ||
751 | |||
752 | for (i = 0; i < dccp_hashinfo.bhash_size; i++) { | ||
753 | spin_lock_init(&dccp_hashinfo.bhash[i].lock); | ||
754 | INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain); | ||
755 | } | ||
756 | |||
757 | if (init_dccp_v4_mibs()) | ||
758 | goto out_free_dccp_bhash; | ||
759 | |||
760 | rc = -EAGAIN; | ||
761 | if (inet_add_protocol(&dccp_protocol, IPPROTO_DCCP)) | ||
762 | goto out_free_dccp_v4_mibs; | ||
763 | |||
764 | inet_register_protosw(&dccp_v4_protosw); | ||
765 | |||
766 | rc = dccp_ctl_sock_init(); | ||
767 | if (rc) | ||
768 | goto out_unregister_protosw; | ||
769 | out: | ||
770 | return rc; | ||
771 | out_unregister_protosw: | ||
772 | inet_unregister_protosw(&dccp_v4_protosw); | ||
773 | inet_del_protocol(&dccp_protocol, IPPROTO_DCCP); | ||
774 | out_free_dccp_v4_mibs: | ||
775 | free_percpu(dccp_statistics[0]); | ||
776 | free_percpu(dccp_statistics[1]); | ||
777 | dccp_statistics[0] = dccp_statistics[1] = NULL; | ||
778 | out_free_dccp_bhash: | ||
779 | free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order); | ||
780 | dccp_hashinfo.bhash = NULL; | ||
781 | out_free_dccp_ehash: | ||
782 | free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order); | ||
783 | dccp_hashinfo.ehash = NULL; | ||
784 | out_free_bind_bucket_cachep: | ||
785 | kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); | ||
786 | dccp_hashinfo.bind_bucket_cachep = NULL; | ||
787 | out_proto_unregister: | ||
788 | proto_unregister(&dccp_v4_prot); | ||
789 | goto out; | ||
790 | } | ||
791 | |||
792 | static const char dccp_del_proto_err_msg[] __exitdata = | ||
793 | KERN_ERR "can't remove dccp net_protocol\n"; | ||
794 | |||
795 | static void __exit dccp_fini(void) | ||
796 | { | ||
797 | inet_unregister_protosw(&dccp_v4_protosw); | ||
798 | |||
799 | if (inet_del_protocol(&dccp_protocol, IPPROTO_DCCP) < 0) | ||
800 | printk(dccp_del_proto_err_msg); | ||
801 | |||
802 | free_percpu(dccp_statistics[0]); | ||
803 | free_percpu(dccp_statistics[1]); | ||
804 | free_pages((unsigned long)dccp_hashinfo.bhash, | ||
805 | get_order(dccp_hashinfo.bhash_size * | ||
806 | sizeof(struct inet_bind_hashbucket))); | ||
807 | free_pages((unsigned long)dccp_hashinfo.ehash, | ||
808 | get_order(dccp_hashinfo.ehash_size * | ||
809 | sizeof(struct inet_ehash_bucket))); | ||
810 | kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); | ||
811 | proto_unregister(&dccp_v4_prot); | ||
812 | } | ||
813 | |||
814 | module_init(dccp_init); | ||
815 | module_exit(dccp_fini); | ||
816 | |||
817 | /* | ||
818 | * __stringify doesn't likes enums, so use SOCK_DCCP (6) and IPPROTO_DCCP (33) | ||
819 | * values directly, Also cover the case where the protocol is not specified, | ||
820 | * i.e. net-pf-PF_INET-proto-0-type-SOCK_DCCP | ||
821 | */ | ||
822 | MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-33-type-6"); | ||
823 | MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-0-type-6"); | ||
824 | MODULE_LICENSE("GPL"); | ||
825 | MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>"); | ||
826 | MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol"); | ||
diff --git a/net/dccp/timer.c b/net/dccp/timer.c new file mode 100644 index 000000000000..aa34b576e228 --- /dev/null +++ b/net/dccp/timer.c | |||
@@ -0,0 +1,255 @@ | |||
1 | /* | ||
2 | * net/dccp/timer.c | ||
3 | * | ||
4 | * An implementation of the DCCP protocol | ||
5 | * Arnaldo Carvalho de Melo <acme@conectiva.com.br> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License | ||
9 | * as published by the Free Software Foundation; either version | ||
10 | * 2 of the License, or (at your option) any later version. | ||
11 | */ | ||
12 | |||
13 | #include <linux/config.h> | ||
14 | #include <linux/dccp.h> | ||
15 | #include <linux/skbuff.h> | ||
16 | |||
17 | #include "dccp.h" | ||
18 | |||
19 | static void dccp_write_timer(unsigned long data); | ||
20 | static void dccp_keepalive_timer(unsigned long data); | ||
21 | static void dccp_delack_timer(unsigned long data); | ||
22 | |||
23 | void dccp_init_xmit_timers(struct sock *sk) | ||
24 | { | ||
25 | inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer, | ||
26 | &dccp_keepalive_timer); | ||
27 | } | ||
28 | |||
29 | static void dccp_write_err(struct sock *sk) | ||
30 | { | ||
31 | sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT; | ||
32 | sk->sk_error_report(sk); | ||
33 | |||
34 | dccp_v4_send_reset(sk, DCCP_RESET_CODE_ABORTED); | ||
35 | dccp_done(sk); | ||
36 | DCCP_INC_STATS_BH(DCCP_MIB_ABORTONTIMEOUT); | ||
37 | } | ||
38 | |||
39 | /* A write timeout has occurred. Process the after effects. */ | ||
40 | static int dccp_write_timeout(struct sock *sk) | ||
41 | { | ||
42 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
43 | int retry_until; | ||
44 | |||
45 | if (sk->sk_state == DCCP_REQUESTING || sk->sk_state == DCCP_PARTOPEN) { | ||
46 | if (icsk->icsk_retransmits != 0) | ||
47 | dst_negative_advice(&sk->sk_dst_cache); | ||
48 | retry_until = icsk->icsk_syn_retries ? : | ||
49 | /* FIXME! */ 3 /* FIXME! sysctl_tcp_syn_retries */; | ||
50 | } else { | ||
51 | if (icsk->icsk_retransmits >= | ||
52 | /* FIXME! sysctl_tcp_retries1 */ 5 /* FIXME! */) { | ||
53 | /* NOTE. draft-ietf-tcpimpl-pmtud-01.txt requires pmtu | ||
54 | black hole detection. :-( | ||
55 | |||
56 | It is place to make it. It is not made. I do not want | ||
57 | to make it. It is disguisting. It does not work in any | ||
58 | case. Let me to cite the same draft, which requires for | ||
59 | us to implement this: | ||
60 | |||
61 | "The one security concern raised by this memo is that ICMP black holes | ||
62 | are often caused by over-zealous security administrators who block | ||
63 | all ICMP messages. It is vitally important that those who design and | ||
64 | deploy security systems understand the impact of strict filtering on | ||
65 | upper-layer protocols. The safest web site in the world is worthless | ||
66 | if most TCP implementations cannot transfer data from it. It would | ||
67 | be far nicer to have all of the black holes fixed rather than fixing | ||
68 | all of the TCP implementations." | ||
69 | |||
70 | Golden words :-). | ||
71 | */ | ||
72 | |||
73 | dst_negative_advice(&sk->sk_dst_cache); | ||
74 | } | ||
75 | |||
76 | retry_until = /* FIXME! */ 15 /* FIXME! sysctl_tcp_retries2 */; | ||
77 | /* | ||
78 | * FIXME: see tcp_write_timout and tcp_out_of_resources | ||
79 | */ | ||
80 | } | ||
81 | |||
82 | if (icsk->icsk_retransmits >= retry_until) { | ||
83 | /* Has it gone just too far? */ | ||
84 | dccp_write_err(sk); | ||
85 | return 1; | ||
86 | } | ||
87 | return 0; | ||
88 | } | ||
89 | |||
90 | /* This is the same as tcp_delack_timer, sans prequeue & mem_reclaim stuff */ | ||
91 | static void dccp_delack_timer(unsigned long data) | ||
92 | { | ||
93 | struct sock *sk = (struct sock *)data; | ||
94 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
95 | |||
96 | bh_lock_sock(sk); | ||
97 | if (sock_owned_by_user(sk)) { | ||
98 | /* Try again later. */ | ||
99 | icsk->icsk_ack.blocked = 1; | ||
100 | NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOCKED); | ||
101 | sk_reset_timer(sk, &icsk->icsk_delack_timer, | ||
102 | jiffies + TCP_DELACK_MIN); | ||
103 | goto out; | ||
104 | } | ||
105 | |||
106 | if (sk->sk_state == DCCP_CLOSED || | ||
107 | !(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) | ||
108 | goto out; | ||
109 | if (time_after(icsk->icsk_ack.timeout, jiffies)) { | ||
110 | sk_reset_timer(sk, &icsk->icsk_delack_timer, | ||
111 | icsk->icsk_ack.timeout); | ||
112 | goto out; | ||
113 | } | ||
114 | |||
115 | icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER; | ||
116 | |||
117 | if (inet_csk_ack_scheduled(sk)) { | ||
118 | if (!icsk->icsk_ack.pingpong) { | ||
119 | /* Delayed ACK missed: inflate ATO. */ | ||
120 | icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1, | ||
121 | icsk->icsk_rto); | ||
122 | } else { | ||
123 | /* Delayed ACK missed: leave pingpong mode and | ||
124 | * deflate ATO. | ||
125 | */ | ||
126 | icsk->icsk_ack.pingpong = 0; | ||
127 | icsk->icsk_ack.ato = TCP_ATO_MIN; | ||
128 | } | ||
129 | dccp_send_ack(sk); | ||
130 | NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKS); | ||
131 | } | ||
132 | out: | ||
133 | bh_unlock_sock(sk); | ||
134 | sock_put(sk); | ||
135 | } | ||
136 | |||
137 | /* | ||
138 | * The DCCP retransmit timer. | ||
139 | */ | ||
140 | static void dccp_retransmit_timer(struct sock *sk) | ||
141 | { | ||
142 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
143 | |||
144 | /* | ||
145 | * sk->sk_send_head has to have one skb with | ||
146 | * DCCP_SKB_CB(skb)->dccpd_type set to one of the retransmittable DCCP | ||
147 | * packet types (REQUEST, RESPONSE, the ACK in the 3way handshake | ||
148 | * (PARTOPEN timer), etc). | ||
149 | */ | ||
150 | BUG_TRAP(sk->sk_send_head != NULL); | ||
151 | |||
152 | /* | ||
153 | * More than than 4MSL (8 minutes) has passed, a RESET(aborted) was | ||
154 | * sent, no need to retransmit, this sock is dead. | ||
155 | */ | ||
156 | if (dccp_write_timeout(sk)) | ||
157 | goto out; | ||
158 | |||
159 | /* | ||
160 | * We want to know the number of packets retransmitted, not the | ||
161 | * total number of retransmissions of clones of original packets. | ||
162 | */ | ||
163 | if (icsk->icsk_retransmits == 0) | ||
164 | DCCP_INC_STATS_BH(DCCP_MIB_TIMEOUTS); | ||
165 | |||
166 | if (dccp_retransmit_skb(sk, sk->sk_send_head) < 0) { | ||
167 | /* | ||
168 | * Retransmission failed because of local congestion, | ||
169 | * do not backoff. | ||
170 | */ | ||
171 | if (icsk->icsk_retransmits == 0) | ||
172 | icsk->icsk_retransmits = 1; | ||
173 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, | ||
174 | min(icsk->icsk_rto, | ||
175 | TCP_RESOURCE_PROBE_INTERVAL), | ||
176 | DCCP_RTO_MAX); | ||
177 | goto out; | ||
178 | } | ||
179 | |||
180 | icsk->icsk_backoff++; | ||
181 | icsk->icsk_retransmits++; | ||
182 | |||
183 | icsk->icsk_rto = min(icsk->icsk_rto << 1, DCCP_RTO_MAX); | ||
184 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, | ||
185 | DCCP_RTO_MAX); | ||
186 | if (icsk->icsk_retransmits > 3 /* FIXME: sysctl_dccp_retries1 */) | ||
187 | __sk_dst_reset(sk); | ||
188 | out:; | ||
189 | } | ||
190 | |||
191 | static void dccp_write_timer(unsigned long data) | ||
192 | { | ||
193 | struct sock *sk = (struct sock *)data; | ||
194 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
195 | int event = 0; | ||
196 | |||
197 | bh_lock_sock(sk); | ||
198 | if (sock_owned_by_user(sk)) { | ||
199 | /* Try again later */ | ||
200 | sk_reset_timer(sk, &icsk->icsk_retransmit_timer, | ||
201 | jiffies + (HZ / 20)); | ||
202 | goto out; | ||
203 | } | ||
204 | |||
205 | if (sk->sk_state == DCCP_CLOSED || !icsk->icsk_pending) | ||
206 | goto out; | ||
207 | |||
208 | if (time_after(icsk->icsk_timeout, jiffies)) { | ||
209 | sk_reset_timer(sk, &icsk->icsk_retransmit_timer, | ||
210 | icsk->icsk_timeout); | ||
211 | goto out; | ||
212 | } | ||
213 | |||
214 | event = icsk->icsk_pending; | ||
215 | icsk->icsk_pending = 0; | ||
216 | |||
217 | switch (event) { | ||
218 | case ICSK_TIME_RETRANS: | ||
219 | dccp_retransmit_timer(sk); | ||
220 | break; | ||
221 | } | ||
222 | out: | ||
223 | bh_unlock_sock(sk); | ||
224 | sock_put(sk); | ||
225 | } | ||
226 | |||
227 | /* | ||
228 | * Timer for listening sockets | ||
229 | */ | ||
230 | static void dccp_response_timer(struct sock *sk) | ||
231 | { | ||
232 | inet_csk_reqsk_queue_prune(sk, TCP_SYNQ_INTERVAL, DCCP_TIMEOUT_INIT, | ||
233 | DCCP_RTO_MAX); | ||
234 | } | ||
235 | |||
236 | static void dccp_keepalive_timer(unsigned long data) | ||
237 | { | ||
238 | struct sock *sk = (struct sock *)data; | ||
239 | |||
240 | /* Only process if socket is not in use. */ | ||
241 | bh_lock_sock(sk); | ||
242 | if (sock_owned_by_user(sk)) { | ||
243 | /* Try again later. */ | ||
244 | inet_csk_reset_keepalive_timer(sk, HZ / 20); | ||
245 | goto out; | ||
246 | } | ||
247 | |||
248 | if (sk->sk_state == DCCP_LISTEN) { | ||
249 | dccp_response_timer(sk); | ||
250 | goto out; | ||
251 | } | ||
252 | out: | ||
253 | bh_unlock_sock(sk); | ||
254 | sock_put(sk); | ||
255 | } | ||
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index acdd18e6adb2..621680f127af 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c | |||
@@ -118,7 +118,7 @@ Version 0.0.6 2.1.110 07-aug-98 Eduardo Marcelo Serrat | |||
118 | #include <linux/netfilter.h> | 118 | #include <linux/netfilter.h> |
119 | #include <linux/seq_file.h> | 119 | #include <linux/seq_file.h> |
120 | #include <net/sock.h> | 120 | #include <net/sock.h> |
121 | #include <net/tcp.h> | 121 | #include <net/tcp_states.h> |
122 | #include <net/flow.h> | 122 | #include <net/flow.h> |
123 | #include <asm/system.h> | 123 | #include <asm/system.h> |
124 | #include <asm/ioctls.h> | 124 | #include <asm/ioctls.h> |
@@ -1763,7 +1763,7 @@ static int dn_recvmsg(struct kiocb *iocb, struct socket *sock, | |||
1763 | nskb = skb->next; | 1763 | nskb = skb->next; |
1764 | 1764 | ||
1765 | if (skb->len == 0) { | 1765 | if (skb->len == 0) { |
1766 | skb_unlink(skb); | 1766 | skb_unlink(skb, queue); |
1767 | kfree_skb(skb); | 1767 | kfree_skb(skb); |
1768 | /* | 1768 | /* |
1769 | * N.B. Don't refer to skb or cb after this point | 1769 | * N.B. Don't refer to skb or cb after this point |
@@ -2064,7 +2064,7 @@ static struct notifier_block dn_dev_notifier = { | |||
2064 | .notifier_call = dn_device_event, | 2064 | .notifier_call = dn_device_event, |
2065 | }; | 2065 | }; |
2066 | 2066 | ||
2067 | extern int dn_route_rcv(struct sk_buff *, struct net_device *, struct packet_type *); | 2067 | extern int dn_route_rcv(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); |
2068 | 2068 | ||
2069 | static struct packet_type dn_dix_packet_type = { | 2069 | static struct packet_type dn_dix_packet_type = { |
2070 | .type = __constant_htons(ETH_P_DNA_RT), | 2070 | .type = __constant_htons(ETH_P_DNA_RT), |
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 00233ecbc9cb..5610bb16dbf9 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c | |||
@@ -752,16 +752,16 @@ static void rtmsg_ifa(int event, struct dn_ifaddr *ifa) | |||
752 | 752 | ||
753 | skb = alloc_skb(size, GFP_KERNEL); | 753 | skb = alloc_skb(size, GFP_KERNEL); |
754 | if (!skb) { | 754 | if (!skb) { |
755 | netlink_set_err(rtnl, 0, RTMGRP_DECnet_IFADDR, ENOBUFS); | 755 | netlink_set_err(rtnl, 0, RTNLGRP_DECnet_IFADDR, ENOBUFS); |
756 | return; | 756 | return; |
757 | } | 757 | } |
758 | if (dn_dev_fill_ifaddr(skb, ifa, 0, 0, event, 0) < 0) { | 758 | if (dn_dev_fill_ifaddr(skb, ifa, 0, 0, event, 0) < 0) { |
759 | kfree_skb(skb); | 759 | kfree_skb(skb); |
760 | netlink_set_err(rtnl, 0, RTMGRP_DECnet_IFADDR, EINVAL); | 760 | netlink_set_err(rtnl, 0, RTNLGRP_DECnet_IFADDR, EINVAL); |
761 | return; | 761 | return; |
762 | } | 762 | } |
763 | NETLINK_CB(skb).dst_groups = RTMGRP_DECnet_IFADDR; | 763 | NETLINK_CB(skb).dst_group = RTNLGRP_DECnet_IFADDR; |
764 | netlink_broadcast(rtnl, skb, 0, RTMGRP_DECnet_IFADDR, GFP_KERNEL); | 764 | netlink_broadcast(rtnl, skb, 0, RTNLGRP_DECnet_IFADDR, GFP_KERNEL); |
765 | } | 765 | } |
766 | 766 | ||
767 | static int dn_dev_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) | 767 | static int dn_dev_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) |
diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c index 202dbde9850d..369f25b60f3f 100644 --- a/net/decnet/dn_nsp_in.c +++ b/net/decnet/dn_nsp_in.c | |||
@@ -60,7 +60,7 @@ | |||
60 | #include <linux/inet.h> | 60 | #include <linux/inet.h> |
61 | #include <linux/route.h> | 61 | #include <linux/route.h> |
62 | #include <net/sock.h> | 62 | #include <net/sock.h> |
63 | #include <net/tcp.h> | 63 | #include <net/tcp_states.h> |
64 | #include <asm/system.h> | 64 | #include <asm/system.h> |
65 | #include <linux/fcntl.h> | 65 | #include <linux/fcntl.h> |
66 | #include <linux/mm.h> | 66 | #include <linux/mm.h> |
diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c index 8cce1fdbda90..e0bebf4bbcad 100644 --- a/net/decnet/dn_nsp_out.c +++ b/net/decnet/dn_nsp_out.c | |||
@@ -479,7 +479,7 @@ int dn_nsp_check_xmit_queue(struct sock *sk, struct sk_buff *skb, struct sk_buff | |||
479 | xmit_count = cb2->xmit_count; | 479 | xmit_count = cb2->xmit_count; |
480 | segnum = cb2->segnum; | 480 | segnum = cb2->segnum; |
481 | /* Remove and drop ack'ed packet */ | 481 | /* Remove and drop ack'ed packet */ |
482 | skb_unlink(ack); | 482 | skb_unlink(ack, q); |
483 | kfree_skb(ack); | 483 | kfree_skb(ack); |
484 | ack = NULL; | 484 | ack = NULL; |
485 | 485 | ||
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 2399fa8a3f86..2c915f305be3 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c | |||
@@ -572,7 +572,7 @@ static int dn_route_ptp_hello(struct sk_buff *skb) | |||
572 | return NET_RX_SUCCESS; | 572 | return NET_RX_SUCCESS; |
573 | } | 573 | } |
574 | 574 | ||
575 | int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) | 575 | int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) |
576 | { | 576 | { |
577 | struct dn_skb_cb *cb; | 577 | struct dn_skb_cb *cb; |
578 | unsigned char flags = 0; | 578 | unsigned char flags = 0; |
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index 28ba5777a25a..eeba56f99323 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c | |||
@@ -79,7 +79,7 @@ for( ; ((f) = *(fp)) != NULL && dn_key_eq((f)->fn_key, (key)); (fp) = &(f)->fn_n | |||
79 | static DEFINE_RWLOCK(dn_fib_tables_lock); | 79 | static DEFINE_RWLOCK(dn_fib_tables_lock); |
80 | struct dn_fib_table *dn_fib_tables[RT_TABLE_MAX + 1]; | 80 | struct dn_fib_table *dn_fib_tables[RT_TABLE_MAX + 1]; |
81 | 81 | ||
82 | static kmem_cache_t *dn_hash_kmem; | 82 | static kmem_cache_t *dn_hash_kmem __read_mostly; |
83 | static int dn_fib_hash_zombies; | 83 | static int dn_fib_hash_zombies; |
84 | 84 | ||
85 | static inline dn_fib_idx_t dn_hash(dn_fib_key_t key, struct dn_zone *dz) | 85 | static inline dn_fib_idx_t dn_hash(dn_fib_key_t key, struct dn_zone *dz) |
@@ -349,10 +349,10 @@ static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, int tb_id, | |||
349 | kfree_skb(skb); | 349 | kfree_skb(skb); |
350 | return; | 350 | return; |
351 | } | 351 | } |
352 | NETLINK_CB(skb).dst_groups = RTMGRP_DECnet_ROUTE; | 352 | NETLINK_CB(skb).dst_group = RTNLGRP_DECnet_ROUTE; |
353 | if (nlh->nlmsg_flags & NLM_F_ECHO) | 353 | if (nlh->nlmsg_flags & NLM_F_ECHO) |
354 | atomic_inc(&skb->users); | 354 | atomic_inc(&skb->users); |
355 | netlink_broadcast(rtnl, skb, pid, RTMGRP_DECnet_ROUTE, GFP_KERNEL); | 355 | netlink_broadcast(rtnl, skb, pid, RTNLGRP_DECnet_ROUTE, GFP_KERNEL); |
356 | if (nlh->nlmsg_flags & NLM_F_ECHO) | 356 | if (nlh->nlmsg_flags & NLM_F_ECHO) |
357 | netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT); | 357 | netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT); |
358 | } | 358 | } |
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c index 284a9998e53d..1ab94c6e22ed 100644 --- a/net/decnet/netfilter/dn_rtmsg.c +++ b/net/decnet/netfilter/dn_rtmsg.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <linux/netfilter.h> | 19 | #include <linux/netfilter.h> |
20 | #include <linux/spinlock.h> | 20 | #include <linux/spinlock.h> |
21 | #include <linux/netlink.h> | 21 | #include <linux/netlink.h> |
22 | #include <linux/netfilter_decnet.h> | ||
22 | 23 | ||
23 | #include <net/sock.h> | 24 | #include <net/sock.h> |
24 | #include <net/flow.h> | 25 | #include <net/flow.h> |
@@ -71,10 +72,10 @@ static void dnrmg_send_peer(struct sk_buff *skb) | |||
71 | 72 | ||
72 | switch(flags & DN_RT_CNTL_MSK) { | 73 | switch(flags & DN_RT_CNTL_MSK) { |
73 | case DN_RT_PKT_L1RT: | 74 | case DN_RT_PKT_L1RT: |
74 | group = DNRMG_L1_GROUP; | 75 | group = DNRNG_NLGRP_L1; |
75 | break; | 76 | break; |
76 | case DN_RT_PKT_L2RT: | 77 | case DN_RT_PKT_L2RT: |
77 | group = DNRMG_L2_GROUP; | 78 | group = DNRNG_NLGRP_L2; |
78 | break; | 79 | break; |
79 | default: | 80 | default: |
80 | return; | 81 | return; |
@@ -83,7 +84,7 @@ static void dnrmg_send_peer(struct sk_buff *skb) | |||
83 | skb2 = dnrmg_build_message(skb, &status); | 84 | skb2 = dnrmg_build_message(skb, &status); |
84 | if (skb2 == NULL) | 85 | if (skb2 == NULL) |
85 | return; | 86 | return; |
86 | NETLINK_CB(skb2).dst_groups = group; | 87 | NETLINK_CB(skb2).dst_group = group; |
87 | netlink_broadcast(dnrmg, skb2, 0, group, GFP_ATOMIC); | 88 | netlink_broadcast(dnrmg, skb2, 0, group, GFP_ATOMIC); |
88 | } | 89 | } |
89 | 90 | ||
@@ -138,7 +139,8 @@ static int __init init(void) | |||
138 | { | 139 | { |
139 | int rv = 0; | 140 | int rv = 0; |
140 | 141 | ||
141 | dnrmg = netlink_kernel_create(NETLINK_DNRTMSG, dnrmg_receive_user_sk); | 142 | dnrmg = netlink_kernel_create(NETLINK_DNRTMSG, DNRNG_NLGRP_MAX, |
143 | dnrmg_receive_user_sk, THIS_MODULE); | ||
142 | if (dnrmg == NULL) { | 144 | if (dnrmg == NULL) { |
143 | printk(KERN_ERR "dn_rtmsg: Cannot create netlink socket"); | 145 | printk(KERN_ERR "dn_rtmsg: Cannot create netlink socket"); |
144 | return -ENOMEM; | 146 | return -ENOMEM; |
@@ -162,6 +164,7 @@ static void __exit fini(void) | |||
162 | MODULE_DESCRIPTION("DECnet Routing Message Grabulator"); | 164 | MODULE_DESCRIPTION("DECnet Routing Message Grabulator"); |
163 | MODULE_AUTHOR("Steven Whitehouse <steve@chygwyn.com>"); | 165 | MODULE_AUTHOR("Steven Whitehouse <steve@chygwyn.com>"); |
164 | MODULE_LICENSE("GPL"); | 166 | MODULE_LICENSE("GPL"); |
167 | MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_DNRTMSG); | ||
165 | 168 | ||
166 | module_init(init); | 169 | module_init(init); |
167 | module_exit(fini); | 170 | module_exit(fini); |
diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index de691e119e17..4a62093eb343 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c | |||
@@ -159,7 +159,7 @@ static int econet_recvmsg(struct kiocb *iocb, struct socket *sock, | |||
159 | err = memcpy_toiovec(msg->msg_iov, skb->data, copied); | 159 | err = memcpy_toiovec(msg->msg_iov, skb->data, copied); |
160 | if (err) | 160 | if (err) |
161 | goto out_free; | 161 | goto out_free; |
162 | sk->sk_stamp = skb->stamp; | 162 | skb_get_timestamp(skb, &sk->sk_stamp); |
163 | 163 | ||
164 | if (msg->msg_name) | 164 | if (msg->msg_name) |
165 | memcpy(msg->msg_name, skb->cb, msg->msg_namelen); | 165 | memcpy(msg->msg_name, skb->cb, msg->msg_namelen); |
@@ -869,7 +869,7 @@ static void aun_tx_ack(unsigned long seq, int result) | |||
869 | 869 | ||
870 | foundit: | 870 | foundit: |
871 | tx_result(skb->sk, eb->cookie, result); | 871 | tx_result(skb->sk, eb->cookie, result); |
872 | skb_unlink(skb); | 872 | skb_unlink(skb, &aun_queue); |
873 | spin_unlock_irqrestore(&aun_queue_lock, flags); | 873 | spin_unlock_irqrestore(&aun_queue_lock, flags); |
874 | kfree_skb(skb); | 874 | kfree_skb(skb); |
875 | } | 875 | } |
@@ -947,7 +947,7 @@ static void ab_cleanup(unsigned long h) | |||
947 | { | 947 | { |
948 | tx_result(skb->sk, eb->cookie, | 948 | tx_result(skb->sk, eb->cookie, |
949 | ECTYPE_TRANSMIT_NOT_PRESENT); | 949 | ECTYPE_TRANSMIT_NOT_PRESENT); |
950 | skb_unlink(skb); | 950 | skb_unlink(skb, &aun_queue); |
951 | kfree_skb(skb); | 951 | kfree_skb(skb); |
952 | } | 952 | } |
953 | skb = newskb; | 953 | skb = newskb; |
@@ -1009,7 +1009,7 @@ release: | |||
1009 | * Receive an Econet frame from a device. | 1009 | * Receive an Econet frame from a device. |
1010 | */ | 1010 | */ |
1011 | 1011 | ||
1012 | static int econet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) | 1012 | static int econet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) |
1013 | { | 1013 | { |
1014 | struct ec_framehdr *hdr; | 1014 | struct ec_framehdr *hdr; |
1015 | struct sock *sk; | 1015 | struct sock *sk; |
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index f6dbfb99b14d..87a052a9a84f 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c | |||
@@ -62,8 +62,6 @@ | |||
62 | #include <asm/system.h> | 62 | #include <asm/system.h> |
63 | #include <asm/checksum.h> | 63 | #include <asm/checksum.h> |
64 | 64 | ||
65 | extern int __init netdev_boot_setup(char *str); | ||
66 | |||
67 | __setup("ether=", netdev_boot_setup); | 65 | __setup("ether=", netdev_boot_setup); |
68 | 66 | ||
69 | /* | 67 | /* |
@@ -163,7 +161,6 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) | |||
163 | skb->mac.raw=skb->data; | 161 | skb->mac.raw=skb->data; |
164 | skb_pull(skb,ETH_HLEN); | 162 | skb_pull(skb,ETH_HLEN); |
165 | eth = eth_hdr(skb); | 163 | eth = eth_hdr(skb); |
166 | skb->input_dev = dev; | ||
167 | 164 | ||
168 | if(*eth->h_dest&1) | 165 | if(*eth->h_dest&1) |
169 | { | 166 | { |
diff --git a/net/ethernet/sysctl_net_ether.c b/net/ethernet/sysctl_net_ether.c index b81a6d532342..66b39fc342d2 100644 --- a/net/ethernet/sysctl_net_ether.c +++ b/net/ethernet/sysctl_net_ether.c | |||
@@ -7,6 +7,7 @@ | |||
7 | 7 | ||
8 | #include <linux/mm.h> | 8 | #include <linux/mm.h> |
9 | #include <linux/sysctl.h> | 9 | #include <linux/sysctl.h> |
10 | #include <linux/if_ether.h> | ||
10 | 11 | ||
11 | ctl_table ether_table[] = { | 12 | ctl_table ether_table[] = { |
12 | {0} | 13 | {0} |
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 0b3d9f1d8069..e55136ae09f4 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig | |||
@@ -413,20 +413,19 @@ config INET_TUNNEL | |||
413 | 413 | ||
414 | If unsure, say Y. | 414 | If unsure, say Y. |
415 | 415 | ||
416 | config IP_TCPDIAG | 416 | config INET_DIAG |
417 | tristate "IP: TCP socket monitoring interface" | 417 | tristate "INET: socket monitoring interface" |
418 | default y | 418 | default y |
419 | ---help--- | 419 | ---help--- |
420 | Support for TCP socket monitoring interface used by native Linux | 420 | Support for INET (TCP, DCCP, etc) socket monitoring interface used by |
421 | tools such as ss. ss is included in iproute2, currently downloadable | 421 | native Linux tools such as ss. ss is included in iproute2, currently |
422 | at <http://developer.osdl.org/dev/iproute2>. If you want IPv6 support | 422 | downloadable at <http://developer.osdl.org/dev/iproute2>. |
423 | and have selected IPv6 as a module, you need to build this as a | ||
424 | module too. | ||
425 | 423 | ||
426 | If unsure, say Y. | 424 | If unsure, say Y. |
427 | 425 | ||
428 | config IP_TCPDIAG_IPV6 | 426 | config INET_TCP_DIAG |
429 | def_bool (IP_TCPDIAG=y && IPV6=y) || (IP_TCPDIAG=m && IPV6) | 427 | depends on INET_DIAG |
428 | def_tristate INET_DIAG | ||
430 | 429 | ||
431 | config TCP_CONG_ADVANCED | 430 | config TCP_CONG_ADVANCED |
432 | bool "TCP: advanced congestion control" | 431 | bool "TCP: advanced congestion control" |
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 55dc6cca1e7b..f0435d00db6b 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile | |||
@@ -4,11 +4,12 @@ | |||
4 | 4 | ||
5 | obj-y := route.o inetpeer.o protocol.o \ | 5 | obj-y := route.o inetpeer.o protocol.o \ |
6 | ip_input.o ip_fragment.o ip_forward.o ip_options.o \ | 6 | ip_input.o ip_fragment.o ip_forward.o ip_options.o \ |
7 | ip_output.o ip_sockglue.o \ | 7 | ip_output.o ip_sockglue.o inet_hashtables.o \ |
8 | inet_timewait_sock.o inet_connection_sock.o \ | ||
8 | tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ | 9 | tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ |
9 | tcp_minisocks.o tcp_cong.o \ | 10 | tcp_minisocks.o tcp_cong.o \ |
10 | datagram.o raw.o udp.o arp.o icmp.o devinet.o af_inet.o igmp.o \ | 11 | datagram.o raw.o udp.o arp.o icmp.o devinet.o af_inet.o igmp.o \ |
11 | sysctl_net_ipv4.o fib_frontend.o fib_semantics.o | 12 | sysctl_net_ipv4.o fib_frontend.o fib_semantics.o netfilter.o |
12 | 13 | ||
13 | obj-$(CONFIG_IP_FIB_HASH) += fib_hash.o | 14 | obj-$(CONFIG_IP_FIB_HASH) += fib_hash.o |
14 | obj-$(CONFIG_IP_FIB_TRIE) += fib_trie.o | 15 | obj-$(CONFIG_IP_FIB_TRIE) += fib_trie.o |
@@ -29,8 +30,9 @@ obj-$(CONFIG_IP_ROUTE_MULTIPATH_WRANDOM) += multipath_wrandom.o | |||
29 | obj-$(CONFIG_IP_ROUTE_MULTIPATH_DRR) += multipath_drr.o | 30 | obj-$(CONFIG_IP_ROUTE_MULTIPATH_DRR) += multipath_drr.o |
30 | obj-$(CONFIG_NETFILTER) += netfilter/ | 31 | obj-$(CONFIG_NETFILTER) += netfilter/ |
31 | obj-$(CONFIG_IP_VS) += ipvs/ | 32 | obj-$(CONFIG_IP_VS) += ipvs/ |
32 | obj-$(CONFIG_IP_TCPDIAG) += tcp_diag.o | 33 | obj-$(CONFIG_INET_DIAG) += inet_diag.o |
33 | obj-$(CONFIG_IP_ROUTE_MULTIPATH_CACHED) += multipath.o | 34 | obj-$(CONFIG_IP_ROUTE_MULTIPATH_CACHED) += multipath.o |
35 | obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o | ||
34 | obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o | 36 | obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o |
35 | obj-$(CONFIG_TCP_CONG_WESTWOOD) += tcp_westwood.o | 37 | obj-$(CONFIG_TCP_CONG_WESTWOOD) += tcp_westwood.o |
36 | obj-$(CONFIG_TCP_CONG_HSTCP) += tcp_highspeed.o | 38 | obj-$(CONFIG_TCP_CONG_HSTCP) += tcp_highspeed.o |
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 163ae4068b5f..bf147f8db399 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c | |||
@@ -99,6 +99,7 @@ | |||
99 | #include <net/arp.h> | 99 | #include <net/arp.h> |
100 | #include <net/route.h> | 100 | #include <net/route.h> |
101 | #include <net/ip_fib.h> | 101 | #include <net/ip_fib.h> |
102 | #include <net/inet_connection_sock.h> | ||
102 | #include <net/tcp.h> | 103 | #include <net/tcp.h> |
103 | #include <net/udp.h> | 104 | #include <net/udp.h> |
104 | #include <linux/skbuff.h> | 105 | #include <linux/skbuff.h> |
@@ -112,11 +113,7 @@ | |||
112 | #include <linux/mroute.h> | 113 | #include <linux/mroute.h> |
113 | #endif | 114 | #endif |
114 | 115 | ||
115 | DEFINE_SNMP_STAT(struct linux_mib, net_statistics); | 116 | DEFINE_SNMP_STAT(struct linux_mib, net_statistics) __read_mostly; |
116 | |||
117 | #ifdef INET_REFCNT_DEBUG | ||
118 | atomic_t inet_sock_nr; | ||
119 | #endif | ||
120 | 117 | ||
121 | extern void ip_mc_drop_socket(struct sock *sk); | 118 | extern void ip_mc_drop_socket(struct sock *sk); |
122 | 119 | ||
@@ -153,11 +150,7 @@ void inet_sock_destruct(struct sock *sk) | |||
153 | if (inet->opt) | 150 | if (inet->opt) |
154 | kfree(inet->opt); | 151 | kfree(inet->opt); |
155 | dst_release(sk->sk_dst_cache); | 152 | dst_release(sk->sk_dst_cache); |
156 | #ifdef INET_REFCNT_DEBUG | 153 | sk_refcnt_debug_dec(sk); |
157 | atomic_dec(&inet_sock_nr); | ||
158 | printk(KERN_DEBUG "INET socket %p released, %d are still alive\n", | ||
159 | sk, atomic_read(&inet_sock_nr)); | ||
160 | #endif | ||
161 | } | 154 | } |
162 | 155 | ||
163 | /* | 156 | /* |
@@ -210,7 +203,7 @@ int inet_listen(struct socket *sock, int backlog) | |||
210 | * we can only allow the backlog to be adjusted. | 203 | * we can only allow the backlog to be adjusted. |
211 | */ | 204 | */ |
212 | if (old_state != TCP_LISTEN) { | 205 | if (old_state != TCP_LISTEN) { |
213 | err = tcp_listen_start(sk); | 206 | err = inet_csk_listen_start(sk, TCP_SYNQ_HSIZE); |
214 | if (err) | 207 | if (err) |
215 | goto out; | 208 | goto out; |
216 | } | 209 | } |
@@ -235,12 +228,14 @@ static int inet_create(struct socket *sock, int protocol) | |||
235 | struct proto *answer_prot; | 228 | struct proto *answer_prot; |
236 | unsigned char answer_flags; | 229 | unsigned char answer_flags; |
237 | char answer_no_check; | 230 | char answer_no_check; |
238 | int err; | 231 | int try_loading_module = 0; |
232 | int err = -ESOCKTNOSUPPORT; | ||
239 | 233 | ||
240 | sock->state = SS_UNCONNECTED; | 234 | sock->state = SS_UNCONNECTED; |
241 | 235 | ||
242 | /* Look for the requested type/protocol pair. */ | 236 | /* Look for the requested type/protocol pair. */ |
243 | answer = NULL; | 237 | answer = NULL; |
238 | lookup_protocol: | ||
244 | rcu_read_lock(); | 239 | rcu_read_lock(); |
245 | list_for_each_rcu(p, &inetsw[sock->type]) { | 240 | list_for_each_rcu(p, &inetsw[sock->type]) { |
246 | answer = list_entry(p, struct inet_protosw, list); | 241 | answer = list_entry(p, struct inet_protosw, list); |
@@ -261,9 +256,28 @@ static int inet_create(struct socket *sock, int protocol) | |||
261 | answer = NULL; | 256 | answer = NULL; |
262 | } | 257 | } |
263 | 258 | ||
264 | err = -ESOCKTNOSUPPORT; | 259 | if (unlikely(answer == NULL)) { |
265 | if (!answer) | 260 | if (try_loading_module < 2) { |
266 | goto out_rcu_unlock; | 261 | rcu_read_unlock(); |
262 | /* | ||
263 | * Be more specific, e.g. net-pf-2-proto-132-type-1 | ||
264 | * (net-pf-PF_INET-proto-IPPROTO_SCTP-type-SOCK_STREAM) | ||
265 | */ | ||
266 | if (++try_loading_module == 1) | ||
267 | request_module("net-pf-%d-proto-%d-type-%d", | ||
268 | PF_INET, protocol, sock->type); | ||
269 | /* | ||
270 | * Fall back to generic, e.g. net-pf-2-proto-132 | ||
271 | * (net-pf-PF_INET-proto-IPPROTO_SCTP) | ||
272 | */ | ||
273 | else | ||
274 | request_module("net-pf-%d-proto-%d", | ||
275 | PF_INET, protocol); | ||
276 | goto lookup_protocol; | ||
277 | } else | ||
278 | goto out_rcu_unlock; | ||
279 | } | ||
280 | |||
267 | err = -EPERM; | 281 | err = -EPERM; |
268 | if (answer->capability > 0 && !capable(answer->capability)) | 282 | if (answer->capability > 0 && !capable(answer->capability)) |
269 | goto out_rcu_unlock; | 283 | goto out_rcu_unlock; |
@@ -317,9 +331,7 @@ static int inet_create(struct socket *sock, int protocol) | |||
317 | inet->mc_index = 0; | 331 | inet->mc_index = 0; |
318 | inet->mc_list = NULL; | 332 | inet->mc_list = NULL; |
319 | 333 | ||
320 | #ifdef INET_REFCNT_DEBUG | 334 | sk_refcnt_debug_inc(sk); |
321 | atomic_inc(&inet_sock_nr); | ||
322 | #endif | ||
323 | 335 | ||
324 | if (inet->num) { | 336 | if (inet->num) { |
325 | /* It assumes that any protocol which allows | 337 | /* It assumes that any protocol which allows |
@@ -847,10 +859,6 @@ static struct net_proto_family inet_family_ops = { | |||
847 | .owner = THIS_MODULE, | 859 | .owner = THIS_MODULE, |
848 | }; | 860 | }; |
849 | 861 | ||
850 | |||
851 | extern void tcp_init(void); | ||
852 | extern void tcp_v4_init(struct net_proto_family *); | ||
853 | |||
854 | /* Upon startup we insert all the elements in inetsw_array[] into | 862 | /* Upon startup we insert all the elements in inetsw_array[] into |
855 | * the linked list inetsw. | 863 | * the linked list inetsw. |
856 | */ | 864 | */ |
@@ -961,6 +969,119 @@ void inet_unregister_protosw(struct inet_protosw *p) | |||
961 | } | 969 | } |
962 | } | 970 | } |
963 | 971 | ||
972 | /* | ||
973 | * Shall we try to damage output packets if routing dev changes? | ||
974 | */ | ||
975 | |||
976 | int sysctl_ip_dynaddr; | ||
977 | |||
978 | static int inet_sk_reselect_saddr(struct sock *sk) | ||
979 | { | ||
980 | struct inet_sock *inet = inet_sk(sk); | ||
981 | int err; | ||
982 | struct rtable *rt; | ||
983 | __u32 old_saddr = inet->saddr; | ||
984 | __u32 new_saddr; | ||
985 | __u32 daddr = inet->daddr; | ||
986 | |||
987 | if (inet->opt && inet->opt->srr) | ||
988 | daddr = inet->opt->faddr; | ||
989 | |||
990 | /* Query new route. */ | ||
991 | err = ip_route_connect(&rt, daddr, 0, | ||
992 | RT_CONN_FLAGS(sk), | ||
993 | sk->sk_bound_dev_if, | ||
994 | sk->sk_protocol, | ||
995 | inet->sport, inet->dport, sk); | ||
996 | if (err) | ||
997 | return err; | ||
998 | |||
999 | sk_setup_caps(sk, &rt->u.dst); | ||
1000 | |||
1001 | new_saddr = rt->rt_src; | ||
1002 | |||
1003 | if (new_saddr == old_saddr) | ||
1004 | return 0; | ||
1005 | |||
1006 | if (sysctl_ip_dynaddr > 1) { | ||
1007 | printk(KERN_INFO "%s(): shifting inet->" | ||
1008 | "saddr from %d.%d.%d.%d to %d.%d.%d.%d\n", | ||
1009 | __FUNCTION__, | ||
1010 | NIPQUAD(old_saddr), | ||
1011 | NIPQUAD(new_saddr)); | ||
1012 | } | ||
1013 | |||
1014 | inet->saddr = inet->rcv_saddr = new_saddr; | ||
1015 | |||
1016 | /* | ||
1017 | * XXX The only one ugly spot where we need to | ||
1018 | * XXX really change the sockets identity after | ||
1019 | * XXX it has entered the hashes. -DaveM | ||
1020 | * | ||
1021 | * Besides that, it does not check for connection | ||
1022 | * uniqueness. Wait for troubles. | ||
1023 | */ | ||
1024 | __sk_prot_rehash(sk); | ||
1025 | return 0; | ||
1026 | } | ||
1027 | |||
1028 | int inet_sk_rebuild_header(struct sock *sk) | ||
1029 | { | ||
1030 | struct inet_sock *inet = inet_sk(sk); | ||
1031 | struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0); | ||
1032 | u32 daddr; | ||
1033 | int err; | ||
1034 | |||
1035 | /* Route is OK, nothing to do. */ | ||
1036 | if (rt) | ||
1037 | return 0; | ||
1038 | |||
1039 | /* Reroute. */ | ||
1040 | daddr = inet->daddr; | ||
1041 | if (inet->opt && inet->opt->srr) | ||
1042 | daddr = inet->opt->faddr; | ||
1043 | { | ||
1044 | struct flowi fl = { | ||
1045 | .oif = sk->sk_bound_dev_if, | ||
1046 | .nl_u = { | ||
1047 | .ip4_u = { | ||
1048 | .daddr = daddr, | ||
1049 | .saddr = inet->saddr, | ||
1050 | .tos = RT_CONN_FLAGS(sk), | ||
1051 | }, | ||
1052 | }, | ||
1053 | .proto = sk->sk_protocol, | ||
1054 | .uli_u = { | ||
1055 | .ports = { | ||
1056 | .sport = inet->sport, | ||
1057 | .dport = inet->dport, | ||
1058 | }, | ||
1059 | }, | ||
1060 | }; | ||
1061 | |||
1062 | err = ip_route_output_flow(&rt, &fl, sk, 0); | ||
1063 | } | ||
1064 | if (!err) | ||
1065 | sk_setup_caps(sk, &rt->u.dst); | ||
1066 | else { | ||
1067 | /* Routing failed... */ | ||
1068 | sk->sk_route_caps = 0; | ||
1069 | /* | ||
1070 | * Other protocols have to map its equivalent state to TCP_SYN_SENT. | ||
1071 | * DCCP maps its DCCP_REQUESTING state to TCP_SYN_SENT. -acme | ||
1072 | */ | ||
1073 | if (!sysctl_ip_dynaddr || | ||
1074 | sk->sk_state != TCP_SYN_SENT || | ||
1075 | (sk->sk_userlocks & SOCK_BINDADDR_LOCK) || | ||
1076 | (err = inet_sk_reselect_saddr(sk)) != 0) | ||
1077 | sk->sk_err_soft = -err; | ||
1078 | } | ||
1079 | |||
1080 | return err; | ||
1081 | } | ||
1082 | |||
1083 | EXPORT_SYMBOL(inet_sk_rebuild_header); | ||
1084 | |||
964 | #ifdef CONFIG_IP_MULTICAST | 1085 | #ifdef CONFIG_IP_MULTICAST |
965 | static struct net_protocol igmp_protocol = { | 1086 | static struct net_protocol igmp_protocol = { |
966 | .handler = igmp_rcv, | 1087 | .handler = igmp_rcv, |
@@ -1007,7 +1128,6 @@ static int __init init_ipv4_mibs(void) | |||
1007 | } | 1128 | } |
1008 | 1129 | ||
1009 | static int ipv4_proc_init(void); | 1130 | static int ipv4_proc_init(void); |
1010 | extern void ipfrag_init(void); | ||
1011 | 1131 | ||
1012 | /* | 1132 | /* |
1013 | * IP protocol layer initialiser | 1133 | * IP protocol layer initialiser |
@@ -1128,19 +1248,10 @@ module_init(inet_init); | |||
1128 | /* ------------------------------------------------------------------------ */ | 1248 | /* ------------------------------------------------------------------------ */ |
1129 | 1249 | ||
1130 | #ifdef CONFIG_PROC_FS | 1250 | #ifdef CONFIG_PROC_FS |
1131 | extern int fib_proc_init(void); | ||
1132 | extern void fib_proc_exit(void); | ||
1133 | #ifdef CONFIG_IP_FIB_TRIE | 1251 | #ifdef CONFIG_IP_FIB_TRIE |
1134 | extern int fib_stat_proc_init(void); | 1252 | extern int fib_stat_proc_init(void); |
1135 | extern void fib_stat_proc_exit(void); | 1253 | extern void fib_stat_proc_exit(void); |
1136 | #endif | 1254 | #endif |
1137 | extern int ip_misc_proc_init(void); | ||
1138 | extern int raw_proc_init(void); | ||
1139 | extern void raw_proc_exit(void); | ||
1140 | extern int tcp4_proc_init(void); | ||
1141 | extern void tcp4_proc_exit(void); | ||
1142 | extern int udp4_proc_init(void); | ||
1143 | extern void udp4_proc_exit(void); | ||
1144 | 1255 | ||
1145 | static int __init ipv4_proc_init(void) | 1256 | static int __init ipv4_proc_init(void) |
1146 | { | 1257 | { |
@@ -1205,7 +1316,3 @@ EXPORT_SYMBOL(inet_stream_ops); | |||
1205 | EXPORT_SYMBOL(inet_unregister_protosw); | 1316 | EXPORT_SYMBOL(inet_unregister_protosw); |
1206 | EXPORT_SYMBOL(net_statistics); | 1317 | EXPORT_SYMBOL(net_statistics); |
1207 | EXPORT_SYMBOL(sysctl_ip_nonlocal_bind); | 1318 | EXPORT_SYMBOL(sysctl_ip_nonlocal_bind); |
1208 | |||
1209 | #ifdef INET_REFCNT_DEBUG | ||
1210 | EXPORT_SYMBOL(inet_sock_nr); | ||
1211 | #endif | ||
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index a642fd612853..8bf312bdea13 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c | |||
@@ -700,7 +700,7 @@ void arp_send(int type, int ptype, u32 dest_ip, | |||
700 | static void parp_redo(struct sk_buff *skb) | 700 | static void parp_redo(struct sk_buff *skb) |
701 | { | 701 | { |
702 | nf_reset(skb); | 702 | nf_reset(skb); |
703 | arp_rcv(skb, skb->dev, NULL); | 703 | arp_rcv(skb, skb->dev, NULL, skb->dev); |
704 | } | 704 | } |
705 | 705 | ||
706 | /* | 706 | /* |
@@ -865,7 +865,7 @@ static int arp_process(struct sk_buff *skb) | |||
865 | if (n) | 865 | if (n) |
866 | neigh_release(n); | 866 | neigh_release(n); |
867 | 867 | ||
868 | if (skb->stamp.tv_sec == LOCALLY_ENQUEUED || | 868 | if (NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED || |
869 | skb->pkt_type == PACKET_HOST || | 869 | skb->pkt_type == PACKET_HOST || |
870 | in_dev->arp_parms->proxy_delay == 0) { | 870 | in_dev->arp_parms->proxy_delay == 0) { |
871 | arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha); | 871 | arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha); |
@@ -927,7 +927,7 @@ out: | |||
927 | * Receive an arp request from the device layer. | 927 | * Receive an arp request from the device layer. |
928 | */ | 928 | */ |
929 | 929 | ||
930 | int arp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) | 930 | int arp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) |
931 | { | 931 | { |
932 | struct arphdr *arp; | 932 | struct arphdr *arp; |
933 | 933 | ||
@@ -948,6 +948,8 @@ int arp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) | |||
948 | if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) | 948 | if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) |
949 | goto out_of_mem; | 949 | goto out_of_mem; |
950 | 950 | ||
951 | memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb)); | ||
952 | |||
951 | return NF_HOOK(NF_ARP, NF_ARP_IN, skb, dev, NULL, arp_process); | 953 | return NF_HOOK(NF_ARP, NF_ARP_IN, skb, dev, NULL, arp_process); |
952 | 954 | ||
953 | freeskb: | 955 | freeskb: |
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index b1db561f2542..c1b42b5257f8 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c | |||
@@ -16,9 +16,10 @@ | |||
16 | #include <linux/module.h> | 16 | #include <linux/module.h> |
17 | #include <linux/ip.h> | 17 | #include <linux/ip.h> |
18 | #include <linux/in.h> | 18 | #include <linux/in.h> |
19 | #include <net/ip.h> | ||
19 | #include <net/sock.h> | 20 | #include <net/sock.h> |
20 | #include <net/tcp.h> | ||
21 | #include <net/route.h> | 21 | #include <net/route.h> |
22 | #include <net/tcp_states.h> | ||
22 | 23 | ||
23 | int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | 24 | int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) |
24 | { | 25 | { |
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index d8a10e3dd77d..ba2895ae8151 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c | |||
@@ -1111,13 +1111,12 @@ static void rtmsg_ifa(int event, struct in_ifaddr* ifa) | |||
1111 | struct sk_buff *skb = alloc_skb(size, GFP_KERNEL); | 1111 | struct sk_buff *skb = alloc_skb(size, GFP_KERNEL); |
1112 | 1112 | ||
1113 | if (!skb) | 1113 | if (!skb) |
1114 | netlink_set_err(rtnl, 0, RTMGRP_IPV4_IFADDR, ENOBUFS); | 1114 | netlink_set_err(rtnl, 0, RTNLGRP_IPV4_IFADDR, ENOBUFS); |
1115 | else if (inet_fill_ifaddr(skb, ifa, current->pid, 0, event, 0) < 0) { | 1115 | else if (inet_fill_ifaddr(skb, ifa, current->pid, 0, event, 0) < 0) { |
1116 | kfree_skb(skb); | 1116 | kfree_skb(skb); |
1117 | netlink_set_err(rtnl, 0, RTMGRP_IPV4_IFADDR, EINVAL); | 1117 | netlink_set_err(rtnl, 0, RTNLGRP_IPV4_IFADDR, EINVAL); |
1118 | } else { | 1118 | } else { |
1119 | NETLINK_CB(skb).dst_groups = RTMGRP_IPV4_IFADDR; | 1119 | netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV4_IFADDR, GFP_KERNEL); |
1120 | netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV4_IFADDR, GFP_KERNEL); | ||
1121 | } | 1120 | } |
1122 | } | 1121 | } |
1123 | 1122 | ||
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index ba57446d5d1f..b31ffc5053d2 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c | |||
@@ -331,8 +331,8 @@ static void esp4_err(struct sk_buff *skb, u32 info) | |||
331 | x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET); | 331 | x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET); |
332 | if (!x) | 332 | if (!x) |
333 | return; | 333 | return; |
334 | NETDEBUG(printk(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n", | 334 | NETDEBUG(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n", |
335 | ntohl(esph->spi), ntohl(iph->daddr))); | 335 | ntohl(esph->spi), ntohl(iph->daddr)); |
336 | xfrm_state_put(x); | 336 | xfrm_state_put(x); |
337 | } | 337 | } |
338 | 338 | ||
@@ -395,10 +395,10 @@ static int esp_init_state(struct xfrm_state *x) | |||
395 | 395 | ||
396 | if (aalg_desc->uinfo.auth.icv_fullbits/8 != | 396 | if (aalg_desc->uinfo.auth.icv_fullbits/8 != |
397 | crypto_tfm_alg_digestsize(esp->auth.tfm)) { | 397 | crypto_tfm_alg_digestsize(esp->auth.tfm)) { |
398 | NETDEBUG(printk(KERN_INFO "ESP: %s digestsize %u != %hu\n", | 398 | NETDEBUG(KERN_INFO "ESP: %s digestsize %u != %hu\n", |
399 | x->aalg->alg_name, | 399 | x->aalg->alg_name, |
400 | crypto_tfm_alg_digestsize(esp->auth.tfm), | 400 | crypto_tfm_alg_digestsize(esp->auth.tfm), |
401 | aalg_desc->uinfo.auth.icv_fullbits/8)); | 401 | aalg_desc->uinfo.auth.icv_fullbits/8); |
402 | goto error; | 402 | goto error; |
403 | } | 403 | } |
404 | 404 | ||
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index cd8e45ab9580..4e1379f71269 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c | |||
@@ -558,16 +558,15 @@ static void nl_fib_input(struct sock *sk, int len) | |||
558 | nl_fib_lookup(frn, tb); | 558 | nl_fib_lookup(frn, tb); |
559 | 559 | ||
560 | pid = nlh->nlmsg_pid; /*pid of sending process */ | 560 | pid = nlh->nlmsg_pid; /*pid of sending process */ |
561 | NETLINK_CB(skb).groups = 0; /* not in mcast group */ | ||
562 | NETLINK_CB(skb).pid = 0; /* from kernel */ | 561 | NETLINK_CB(skb).pid = 0; /* from kernel */ |
563 | NETLINK_CB(skb).dst_pid = pid; | 562 | NETLINK_CB(skb).dst_pid = pid; |
564 | NETLINK_CB(skb).dst_groups = 0; /* unicast */ | 563 | NETLINK_CB(skb).dst_group = 0; /* unicast */ |
565 | netlink_unicast(sk, skb, pid, MSG_DONTWAIT); | 564 | netlink_unicast(sk, skb, pid, MSG_DONTWAIT); |
566 | } | 565 | } |
567 | 566 | ||
568 | static void nl_fib_lookup_init(void) | 567 | static void nl_fib_lookup_init(void) |
569 | { | 568 | { |
570 | netlink_kernel_create(NETLINK_FIB_LOOKUP, nl_fib_input); | 569 | netlink_kernel_create(NETLINK_FIB_LOOKUP, 0, nl_fib_input, THIS_MODULE); |
571 | } | 570 | } |
572 | 571 | ||
573 | static void fib_disable_ip(struct net_device *dev, int force) | 572 | static void fib_disable_ip(struct net_device *dev, int force) |
@@ -662,5 +661,4 @@ void __init ip_fib_init(void) | |||
662 | } | 661 | } |
663 | 662 | ||
664 | EXPORT_SYMBOL(inet_addr_type); | 663 | EXPORT_SYMBOL(inet_addr_type); |
665 | EXPORT_SYMBOL(ip_dev_find); | ||
666 | EXPORT_SYMBOL(ip_rt_ioctl); | 664 | EXPORT_SYMBOL(ip_rt_ioctl); |
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c index b10d6bb5ef3d..2a8c9afc3695 100644 --- a/net/ipv4/fib_hash.c +++ b/net/ipv4/fib_hash.c | |||
@@ -45,8 +45,8 @@ | |||
45 | 45 | ||
46 | #include "fib_lookup.h" | 46 | #include "fib_lookup.h" |
47 | 47 | ||
48 | static kmem_cache_t *fn_hash_kmem; | 48 | static kmem_cache_t *fn_hash_kmem __read_mostly; |
49 | static kmem_cache_t *fn_alias_kmem; | 49 | static kmem_cache_t *fn_alias_kmem __read_mostly; |
50 | 50 | ||
51 | struct fib_node { | 51 | struct fib_node { |
52 | struct hlist_node fn_hash; | 52 | struct hlist_node fn_hash; |
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index b729d97cfa93..ef6609ea0eb7 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h | |||
@@ -7,6 +7,7 @@ | |||
7 | 7 | ||
8 | struct fib_alias { | 8 | struct fib_alias { |
9 | struct list_head fa_list; | 9 | struct list_head fa_list; |
10 | struct rcu_head rcu; | ||
10 | struct fib_info *fa_info; | 11 | struct fib_info *fa_info; |
11 | u8 fa_tos; | 12 | u8 fa_tos; |
12 | u8 fa_type; | 13 | u8 fa_type; |
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index e278cb9d0075..d41219e8037c 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c | |||
@@ -290,10 +290,10 @@ void rtmsg_fib(int event, u32 key, struct fib_alias *fa, | |||
290 | kfree_skb(skb); | 290 | kfree_skb(skb); |
291 | return; | 291 | return; |
292 | } | 292 | } |
293 | NETLINK_CB(skb).dst_groups = RTMGRP_IPV4_ROUTE; | 293 | NETLINK_CB(skb).dst_group = RTNLGRP_IPV4_ROUTE; |
294 | if (n->nlmsg_flags&NLM_F_ECHO) | 294 | if (n->nlmsg_flags&NLM_F_ECHO) |
295 | atomic_inc(&skb->users); | 295 | atomic_inc(&skb->users); |
296 | netlink_broadcast(rtnl, skb, pid, RTMGRP_IPV4_ROUTE, GFP_KERNEL); | 296 | netlink_broadcast(rtnl, skb, pid, RTNLGRP_IPV4_ROUTE, GFP_KERNEL); |
297 | if (n->nlmsg_flags&NLM_F_ECHO) | 297 | if (n->nlmsg_flags&NLM_F_ECHO) |
298 | netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT); | 298 | netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT); |
299 | } | 299 | } |
@@ -854,6 +854,7 @@ failure: | |||
854 | return NULL; | 854 | return NULL; |
855 | } | 855 | } |
856 | 856 | ||
857 | /* Note! fib_semantic_match intentionally uses RCU list functions. */ | ||
857 | int fib_semantic_match(struct list_head *head, const struct flowi *flp, | 858 | int fib_semantic_match(struct list_head *head, const struct flowi *flp, |
858 | struct fib_result *res, __u32 zone, __u32 mask, | 859 | struct fib_result *res, __u32 zone, __u32 mask, |
859 | int prefixlen) | 860 | int prefixlen) |
@@ -861,7 +862,7 @@ int fib_semantic_match(struct list_head *head, const struct flowi *flp, | |||
861 | struct fib_alias *fa; | 862 | struct fib_alias *fa; |
862 | int nh_sel = 0; | 863 | int nh_sel = 0; |
863 | 864 | ||
864 | list_for_each_entry(fa, head, fa_list) { | 865 | list_for_each_entry_rcu(fa, head, fa_list) { |
865 | int err; | 866 | int err; |
866 | 867 | ||
867 | if (fa->fa_tos && | 868 | if (fa->fa_tos && |
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 45efd5f4741b..b2dea4e5da77 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c | |||
@@ -43,7 +43,7 @@ | |||
43 | * 2 of the License, or (at your option) any later version. | 43 | * 2 of the License, or (at your option) any later version. |
44 | */ | 44 | */ |
45 | 45 | ||
46 | #define VERSION "0.325" | 46 | #define VERSION "0.402" |
47 | 47 | ||
48 | #include <linux/config.h> | 48 | #include <linux/config.h> |
49 | #include <asm/uaccess.h> | 49 | #include <asm/uaccess.h> |
@@ -62,6 +62,7 @@ | |||
62 | #include <linux/netdevice.h> | 62 | #include <linux/netdevice.h> |
63 | #include <linux/if_arp.h> | 63 | #include <linux/if_arp.h> |
64 | #include <linux/proc_fs.h> | 64 | #include <linux/proc_fs.h> |
65 | #include <linux/rcupdate.h> | ||
65 | #include <linux/skbuff.h> | 66 | #include <linux/skbuff.h> |
66 | #include <linux/netlink.h> | 67 | #include <linux/netlink.h> |
67 | #include <linux/init.h> | 68 | #include <linux/init.h> |
@@ -77,56 +78,55 @@ | |||
77 | #undef CONFIG_IP_FIB_TRIE_STATS | 78 | #undef CONFIG_IP_FIB_TRIE_STATS |
78 | #define MAX_CHILDS 16384 | 79 | #define MAX_CHILDS 16384 |
79 | 80 | ||
80 | #define EXTRACT(p, n, str) ((str)<<(p)>>(32-(n))) | ||
81 | #define KEYLENGTH (8*sizeof(t_key)) | 81 | #define KEYLENGTH (8*sizeof(t_key)) |
82 | #define MASK_PFX(k, l) (((l)==0)?0:(k >> (KEYLENGTH-l)) << (KEYLENGTH-l)) | 82 | #define MASK_PFX(k, l) (((l)==0)?0:(k >> (KEYLENGTH-l)) << (KEYLENGTH-l)) |
83 | #define TKEY_GET_MASK(offset, bits) (((bits)==0)?0:((t_key)(-1) << (KEYLENGTH - bits) >> offset)) | 83 | #define TKEY_GET_MASK(offset, bits) (((bits)==0)?0:((t_key)(-1) << (KEYLENGTH - bits) >> offset)) |
84 | 84 | ||
85 | static DEFINE_RWLOCK(fib_lock); | ||
86 | |||
87 | typedef unsigned int t_key; | 85 | typedef unsigned int t_key; |
88 | 86 | ||
89 | #define T_TNODE 0 | 87 | #define T_TNODE 0 |
90 | #define T_LEAF 1 | 88 | #define T_LEAF 1 |
91 | #define NODE_TYPE_MASK 0x1UL | 89 | #define NODE_TYPE_MASK 0x1UL |
92 | #define NODE_PARENT(_node) \ | 90 | #define NODE_PARENT(node) \ |
93 | ((struct tnode *)((_node)->_parent & ~NODE_TYPE_MASK)) | 91 | ((struct tnode *)rcu_dereference(((node)->parent & ~NODE_TYPE_MASK))) |
94 | #define NODE_SET_PARENT(_node, _ptr) \ | 92 | |
95 | ((_node)->_parent = (((unsigned long)(_ptr)) | \ | 93 | #define NODE_TYPE(node) ((node)->parent & NODE_TYPE_MASK) |
96 | ((_node)->_parent & NODE_TYPE_MASK))) | 94 | |
97 | #define NODE_INIT_PARENT(_node, _type) \ | 95 | #define NODE_SET_PARENT(node, ptr) \ |
98 | ((_node)->_parent = (_type)) | 96 | rcu_assign_pointer((node)->parent, \ |
99 | #define NODE_TYPE(_node) \ | 97 | ((unsigned long)(ptr)) | NODE_TYPE(node)) |
100 | ((_node)->_parent & NODE_TYPE_MASK) | 98 | |
101 | 99 | #define IS_TNODE(n) (!(n->parent & T_LEAF)) | |
102 | #define IS_TNODE(n) (!(n->_parent & T_LEAF)) | 100 | #define IS_LEAF(n) (n->parent & T_LEAF) |
103 | #define IS_LEAF(n) (n->_parent & T_LEAF) | ||
104 | 101 | ||
105 | struct node { | 102 | struct node { |
106 | t_key key; | 103 | t_key key; |
107 | unsigned long _parent; | 104 | unsigned long parent; |
108 | }; | 105 | }; |
109 | 106 | ||
110 | struct leaf { | 107 | struct leaf { |
111 | t_key key; | 108 | t_key key; |
112 | unsigned long _parent; | 109 | unsigned long parent; |
113 | struct hlist_head list; | 110 | struct hlist_head list; |
111 | struct rcu_head rcu; | ||
114 | }; | 112 | }; |
115 | 113 | ||
116 | struct leaf_info { | 114 | struct leaf_info { |
117 | struct hlist_node hlist; | 115 | struct hlist_node hlist; |
116 | struct rcu_head rcu; | ||
118 | int plen; | 117 | int plen; |
119 | struct list_head falh; | 118 | struct list_head falh; |
120 | }; | 119 | }; |
121 | 120 | ||
122 | struct tnode { | 121 | struct tnode { |
123 | t_key key; | 122 | t_key key; |
124 | unsigned long _parent; | 123 | unsigned long parent; |
125 | unsigned short pos:5; /* 2log(KEYLENGTH) bits needed */ | 124 | unsigned short pos:5; /* 2log(KEYLENGTH) bits needed */ |
126 | unsigned short bits:5; /* 2log(KEYLENGTH) bits needed */ | 125 | unsigned short bits:5; /* 2log(KEYLENGTH) bits needed */ |
127 | unsigned short full_children; /* KEYLENGTH bits needed */ | 126 | unsigned short full_children; /* KEYLENGTH bits needed */ |
128 | unsigned short empty_children; /* KEYLENGTH bits needed */ | 127 | unsigned short empty_children; /* KEYLENGTH bits needed */ |
129 | struct node *child[0]; | 128 | struct rcu_head rcu; |
129 | struct node *child[0]; | ||
130 | }; | 130 | }; |
131 | 131 | ||
132 | #ifdef CONFIG_IP_FIB_TRIE_STATS | 132 | #ifdef CONFIG_IP_FIB_TRIE_STATS |
@@ -150,77 +150,45 @@ struct trie_stat { | |||
150 | }; | 150 | }; |
151 | 151 | ||
152 | struct trie { | 152 | struct trie { |
153 | struct node *trie; | 153 | struct node *trie; |
154 | #ifdef CONFIG_IP_FIB_TRIE_STATS | 154 | #ifdef CONFIG_IP_FIB_TRIE_STATS |
155 | struct trie_use_stats stats; | 155 | struct trie_use_stats stats; |
156 | #endif | 156 | #endif |
157 | int size; | 157 | int size; |
158 | unsigned int revision; | 158 | unsigned int revision; |
159 | }; | 159 | }; |
160 | 160 | ||
161 | static int trie_debug = 0; | ||
162 | |||
163 | static int tnode_full(struct tnode *tn, struct node *n); | ||
164 | static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n); | 161 | static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n); |
165 | static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull); | 162 | static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull); |
166 | static int tnode_child_length(struct tnode *tn); | ||
167 | static struct node *resize(struct trie *t, struct tnode *tn); | 163 | static struct node *resize(struct trie *t, struct tnode *tn); |
168 | static struct tnode *inflate(struct trie *t, struct tnode *tn, int *err); | 164 | static struct tnode *inflate(struct trie *t, struct tnode *tn); |
169 | static struct tnode *halve(struct trie *t, struct tnode *tn, int *err); | 165 | static struct tnode *halve(struct trie *t, struct tnode *tn); |
170 | static void tnode_free(struct tnode *tn); | 166 | static void tnode_free(struct tnode *tn); |
171 | static void trie_dump_seq(struct seq_file *seq, struct trie *t); | 167 | static void trie_dump_seq(struct seq_file *seq, struct trie *t); |
172 | extern struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio); | ||
173 | extern int fib_detect_death(struct fib_info *fi, int order, | ||
174 | struct fib_info **last_resort, int *last_idx, int *dflt); | ||
175 | |||
176 | extern void rtmsg_fib(int event, u32 key, struct fib_alias *fa, int z, int tb_id, | ||
177 | struct nlmsghdr *n, struct netlink_skb_parms *req); | ||
178 | 168 | ||
179 | static kmem_cache_t *fn_alias_kmem; | 169 | static kmem_cache_t *fn_alias_kmem __read_mostly; |
180 | static struct trie *trie_local = NULL, *trie_main = NULL; | 170 | static struct trie *trie_local = NULL, *trie_main = NULL; |
181 | 171 | ||
182 | static void trie_bug(char *err) | 172 | |
183 | { | 173 | /* rcu_read_lock needs to be hold by caller from readside */ |
184 | printk("Trie Bug: %s\n", err); | ||
185 | BUG(); | ||
186 | } | ||
187 | 174 | ||
188 | static inline struct node *tnode_get_child(struct tnode *tn, int i) | 175 | static inline struct node *tnode_get_child(struct tnode *tn, int i) |
189 | { | 176 | { |
190 | if (i >= 1<<tn->bits) | 177 | BUG_ON(i >= 1 << tn->bits); |
191 | trie_bug("tnode_get_child"); | ||
192 | 178 | ||
193 | return tn->child[i]; | 179 | return rcu_dereference(tn->child[i]); |
194 | } | 180 | } |
195 | 181 | ||
196 | static inline int tnode_child_length(struct tnode *tn) | 182 | static inline int tnode_child_length(const struct tnode *tn) |
197 | { | 183 | { |
198 | return 1<<tn->bits; | 184 | return 1 << tn->bits; |
199 | } | 185 | } |
200 | 186 | ||
201 | /* | ||
202 | _________________________________________________________________ | ||
203 | | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C | | ||
204 | ---------------------------------------------------------------- | ||
205 | 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 | ||
206 | |||
207 | _________________________________________________________________ | ||
208 | | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u | | ||
209 | ----------------------------------------------------------------- | ||
210 | 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 | ||
211 | |||
212 | tp->pos = 7 | ||
213 | tp->bits = 3 | ||
214 | n->pos = 15 | ||
215 | n->bits=4 | ||
216 | KEYLENGTH=32 | ||
217 | */ | ||
218 | |||
219 | static inline t_key tkey_extract_bits(t_key a, int offset, int bits) | 187 | static inline t_key tkey_extract_bits(t_key a, int offset, int bits) |
220 | { | 188 | { |
221 | if (offset < KEYLENGTH) | 189 | if (offset < KEYLENGTH) |
222 | return ((t_key)(a << offset)) >> (KEYLENGTH - bits); | 190 | return ((t_key)(a << offset)) >> (KEYLENGTH - bits); |
223 | else | 191 | else |
224 | return 0; | 192 | return 0; |
225 | } | 193 | } |
226 | 194 | ||
@@ -233,8 +201,8 @@ static inline int tkey_sub_equals(t_key a, int offset, int bits, t_key b) | |||
233 | { | 201 | { |
234 | if (bits == 0 || offset >= KEYLENGTH) | 202 | if (bits == 0 || offset >= KEYLENGTH) |
235 | return 1; | 203 | return 1; |
236 | bits = bits > KEYLENGTH ? KEYLENGTH : bits; | 204 | bits = bits > KEYLENGTH ? KEYLENGTH : bits; |
237 | return ((a ^ b) << offset) >> (KEYLENGTH - bits) == 0; | 205 | return ((a ^ b) << offset) >> (KEYLENGTH - bits) == 0; |
238 | } | 206 | } |
239 | 207 | ||
240 | static inline int tkey_mismatch(t_key a, int offset, t_key b) | 208 | static inline int tkey_mismatch(t_key a, int offset, t_key b) |
@@ -249,14 +217,6 @@ static inline int tkey_mismatch(t_key a, int offset, t_key b) | |||
249 | return i; | 217 | return i; |
250 | } | 218 | } |
251 | 219 | ||
252 | /* Candiate for fib_semantics */ | ||
253 | |||
254 | static void fn_free_alias(struct fib_alias *fa) | ||
255 | { | ||
256 | fib_release_info(fa->fa_info); | ||
257 | kmem_cache_free(fn_alias_kmem, fa); | ||
258 | } | ||
259 | |||
260 | /* | 220 | /* |
261 | To understand this stuff, an understanding of keys and all their bits is | 221 | To understand this stuff, an understanding of keys and all their bits is |
262 | necessary. Every node in the trie has a key associated with it, but not | 222 | necessary. Every node in the trie has a key associated with it, but not |
@@ -295,7 +255,7 @@ static void fn_free_alias(struct fib_alias *fa) | |||
295 | tp->pos = 7 | 255 | tp->pos = 7 |
296 | tp->bits = 3 | 256 | tp->bits = 3 |
297 | n->pos = 15 | 257 | n->pos = 15 |
298 | n->bits=4 | 258 | n->bits = 4 |
299 | 259 | ||
300 | First, let's just ignore the bits that come before the parent tp, that is | 260 | First, let's just ignore the bits that come before the parent tp, that is |
301 | the bits from 0 to (tp->pos-1). They are *known* but at this point we do | 261 | the bits from 0 to (tp->pos-1). They are *known* but at this point we do |
@@ -320,60 +280,65 @@ static void fn_free_alias(struct fib_alias *fa) | |||
320 | 280 | ||
321 | */ | 281 | */ |
322 | 282 | ||
323 | static void check_tnode(struct tnode *tn) | 283 | static inline void check_tnode(const struct tnode *tn) |
324 | { | 284 | { |
325 | if (tn && tn->pos+tn->bits > 32) { | 285 | WARN_ON(tn && tn->pos+tn->bits > 32); |
326 | printk("TNODE ERROR tn=%p, pos=%d, bits=%d\n", tn, tn->pos, tn->bits); | ||
327 | } | ||
328 | } | 286 | } |
329 | 287 | ||
330 | static int halve_threshold = 25; | 288 | static int halve_threshold = 25; |
331 | static int inflate_threshold = 50; | 289 | static int inflate_threshold = 50; |
332 | 290 | ||
333 | static struct leaf *leaf_new(void) | 291 | |
292 | static void __alias_free_mem(struct rcu_head *head) | ||
334 | { | 293 | { |
335 | struct leaf *l = kmalloc(sizeof(struct leaf), GFP_KERNEL); | 294 | struct fib_alias *fa = container_of(head, struct fib_alias, rcu); |
336 | if (l) { | 295 | kmem_cache_free(fn_alias_kmem, fa); |
337 | NODE_INIT_PARENT(l, T_LEAF); | ||
338 | INIT_HLIST_HEAD(&l->list); | ||
339 | } | ||
340 | return l; | ||
341 | } | 296 | } |
342 | 297 | ||
343 | static struct leaf_info *leaf_info_new(int plen) | 298 | static inline void alias_free_mem_rcu(struct fib_alias *fa) |
344 | { | 299 | { |
345 | struct leaf_info *li = kmalloc(sizeof(struct leaf_info), GFP_KERNEL); | 300 | call_rcu(&fa->rcu, __alias_free_mem); |
346 | if (li) { | 301 | } |
347 | li->plen = plen; | 302 | |
348 | INIT_LIST_HEAD(&li->falh); | 303 | static void __leaf_free_rcu(struct rcu_head *head) |
349 | } | 304 | { |
350 | return li; | 305 | kfree(container_of(head, struct leaf, rcu)); |
306 | } | ||
307 | |||
308 | static inline void free_leaf(struct leaf *leaf) | ||
309 | { | ||
310 | call_rcu(&leaf->rcu, __leaf_free_rcu); | ||
351 | } | 311 | } |
352 | 312 | ||
353 | static inline void free_leaf(struct leaf *l) | 313 | static void __leaf_info_free_rcu(struct rcu_head *head) |
354 | { | 314 | { |
355 | kfree(l); | 315 | kfree(container_of(head, struct leaf_info, rcu)); |
356 | } | 316 | } |
357 | 317 | ||
358 | static inline void free_leaf_info(struct leaf_info *li) | 318 | static inline void free_leaf_info(struct leaf_info *leaf) |
359 | { | 319 | { |
360 | kfree(li); | 320 | call_rcu(&leaf->rcu, __leaf_info_free_rcu); |
361 | } | 321 | } |
362 | 322 | ||
363 | static struct tnode *tnode_alloc(unsigned int size) | 323 | static struct tnode *tnode_alloc(unsigned int size) |
364 | { | 324 | { |
365 | if (size <= PAGE_SIZE) { | 325 | struct page *pages; |
366 | return kmalloc(size, GFP_KERNEL); | 326 | |
367 | } else { | 327 | if (size <= PAGE_SIZE) |
368 | return (struct tnode *) | 328 | return kcalloc(size, 1, GFP_KERNEL); |
369 | __get_free_pages(GFP_KERNEL, get_order(size)); | 329 | |
370 | } | 330 | pages = alloc_pages(GFP_KERNEL|__GFP_ZERO, get_order(size)); |
331 | if (!pages) | ||
332 | return NULL; | ||
333 | |||
334 | return page_address(pages); | ||
371 | } | 335 | } |
372 | 336 | ||
373 | static void __tnode_free(struct tnode *tn) | 337 | static void __tnode_free_rcu(struct rcu_head *head) |
374 | { | 338 | { |
339 | struct tnode *tn = container_of(head, struct tnode, rcu); | ||
375 | unsigned int size = sizeof(struct tnode) + | 340 | unsigned int size = sizeof(struct tnode) + |
376 | (1<<tn->bits) * sizeof(struct node *); | 341 | (1 << tn->bits) * sizeof(struct node *); |
377 | 342 | ||
378 | if (size <= PAGE_SIZE) | 343 | if (size <= PAGE_SIZE) |
379 | kfree(tn); | 344 | kfree(tn); |
@@ -381,15 +346,40 @@ static void __tnode_free(struct tnode *tn) | |||
381 | free_pages((unsigned long)tn, get_order(size)); | 346 | free_pages((unsigned long)tn, get_order(size)); |
382 | } | 347 | } |
383 | 348 | ||
349 | static inline void tnode_free(struct tnode *tn) | ||
350 | { | ||
351 | call_rcu(&tn->rcu, __tnode_free_rcu); | ||
352 | } | ||
353 | |||
354 | static struct leaf *leaf_new(void) | ||
355 | { | ||
356 | struct leaf *l = kmalloc(sizeof(struct leaf), GFP_KERNEL); | ||
357 | if (l) { | ||
358 | l->parent = T_LEAF; | ||
359 | INIT_HLIST_HEAD(&l->list); | ||
360 | } | ||
361 | return l; | ||
362 | } | ||
363 | |||
364 | static struct leaf_info *leaf_info_new(int plen) | ||
365 | { | ||
366 | struct leaf_info *li = kmalloc(sizeof(struct leaf_info), GFP_KERNEL); | ||
367 | if (li) { | ||
368 | li->plen = plen; | ||
369 | INIT_LIST_HEAD(&li->falh); | ||
370 | } | ||
371 | return li; | ||
372 | } | ||
373 | |||
384 | static struct tnode* tnode_new(t_key key, int pos, int bits) | 374 | static struct tnode* tnode_new(t_key key, int pos, int bits) |
385 | { | 375 | { |
386 | int nchildren = 1<<bits; | 376 | int nchildren = 1<<bits; |
387 | int sz = sizeof(struct tnode) + nchildren * sizeof(struct node *); | 377 | int sz = sizeof(struct tnode) + nchildren * sizeof(struct node *); |
388 | struct tnode *tn = tnode_alloc(sz); | 378 | struct tnode *tn = tnode_alloc(sz); |
389 | 379 | ||
390 | if (tn) { | 380 | if (tn) { |
391 | memset(tn, 0, sz); | 381 | memset(tn, 0, sz); |
392 | NODE_INIT_PARENT(tn, T_TNODE); | 382 | tn->parent = T_TNODE; |
393 | tn->pos = pos; | 383 | tn->pos = pos; |
394 | tn->bits = bits; | 384 | tn->bits = bits; |
395 | tn->key = key; | 385 | tn->key = key; |
@@ -397,38 +387,17 @@ static struct tnode* tnode_new(t_key key, int pos, int bits) | |||
397 | tn->empty_children = 1<<bits; | 387 | tn->empty_children = 1<<bits; |
398 | } | 388 | } |
399 | 389 | ||
400 | if (trie_debug > 0) | 390 | pr_debug("AT %p s=%u %u\n", tn, (unsigned int) sizeof(struct tnode), |
401 | printk("AT %p s=%u %u\n", tn, (unsigned int) sizeof(struct tnode), | 391 | (unsigned int) (sizeof(struct node) * 1<<bits)); |
402 | (unsigned int) (sizeof(struct node) * 1<<bits)); | ||
403 | return tn; | 392 | return tn; |
404 | } | 393 | } |
405 | 394 | ||
406 | static void tnode_free(struct tnode *tn) | ||
407 | { | ||
408 | if (!tn) { | ||
409 | trie_bug("tnode_free\n"); | ||
410 | } | ||
411 | if (IS_LEAF(tn)) { | ||
412 | free_leaf((struct leaf *)tn); | ||
413 | if (trie_debug > 0 ) | ||
414 | printk("FL %p \n", tn); | ||
415 | } | ||
416 | else if (IS_TNODE(tn)) { | ||
417 | __tnode_free(tn); | ||
418 | if (trie_debug > 0 ) | ||
419 | printk("FT %p \n", tn); | ||
420 | } | ||
421 | else { | ||
422 | trie_bug("tnode_free\n"); | ||
423 | } | ||
424 | } | ||
425 | |||
426 | /* | 395 | /* |
427 | * Check whether a tnode 'n' is "full", i.e. it is an internal node | 396 | * Check whether a tnode 'n' is "full", i.e. it is an internal node |
428 | * and no bits are skipped. See discussion in dyntree paper p. 6 | 397 | * and no bits are skipped. See discussion in dyntree paper p. 6 |
429 | */ | 398 | */ |
430 | 399 | ||
431 | static inline int tnode_full(struct tnode *tn, struct node *n) | 400 | static inline int tnode_full(const struct tnode *tn, const struct node *n) |
432 | { | 401 | { |
433 | if (n == NULL || IS_LEAF(n)) | 402 | if (n == NULL || IS_LEAF(n)) |
434 | return 0; | 403 | return 0; |
@@ -448,15 +417,11 @@ static inline void put_child(struct trie *t, struct tnode *tn, int i, struct nod | |||
448 | 417 | ||
449 | static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull) | 418 | static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull) |
450 | { | 419 | { |
451 | struct node *chi; | 420 | struct node *chi = tn->child[i]; |
452 | int isfull; | 421 | int isfull; |
453 | 422 | ||
454 | if (i >= 1<<tn->bits) { | 423 | BUG_ON(i >= 1<<tn->bits); |
455 | printk("bits=%d, i=%d\n", tn->bits, i); | 424 | |
456 | trie_bug("tnode_put_child_reorg bits"); | ||
457 | } | ||
458 | write_lock_bh(&fib_lock); | ||
459 | chi = tn->child[i]; | ||
460 | 425 | ||
461 | /* update emptyChildren */ | 426 | /* update emptyChildren */ |
462 | if (n == NULL && chi != NULL) | 427 | if (n == NULL && chi != NULL) |
@@ -465,33 +430,32 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int w | |||
465 | tn->empty_children--; | 430 | tn->empty_children--; |
466 | 431 | ||
467 | /* update fullChildren */ | 432 | /* update fullChildren */ |
468 | if (wasfull == -1) | 433 | if (wasfull == -1) |
469 | wasfull = tnode_full(tn, chi); | 434 | wasfull = tnode_full(tn, chi); |
470 | 435 | ||
471 | isfull = tnode_full(tn, n); | 436 | isfull = tnode_full(tn, n); |
472 | if (wasfull && !isfull) | 437 | if (wasfull && !isfull) |
473 | tn->full_children--; | 438 | tn->full_children--; |
474 | |||
475 | else if (!wasfull && isfull) | 439 | else if (!wasfull && isfull) |
476 | tn->full_children++; | 440 | tn->full_children++; |
441 | |||
477 | if (n) | 442 | if (n) |
478 | NODE_SET_PARENT(n, tn); | 443 | NODE_SET_PARENT(n, tn); |
479 | 444 | ||
480 | tn->child[i] = n; | 445 | rcu_assign_pointer(tn->child[i], n); |
481 | write_unlock_bh(&fib_lock); | ||
482 | } | 446 | } |
483 | 447 | ||
484 | static struct node *resize(struct trie *t, struct tnode *tn) | 448 | static struct node *resize(struct trie *t, struct tnode *tn) |
485 | { | 449 | { |
486 | int i; | 450 | int i; |
487 | int err = 0; | 451 | int err = 0; |
452 | struct tnode *old_tn; | ||
488 | 453 | ||
489 | if (!tn) | 454 | if (!tn) |
490 | return NULL; | 455 | return NULL; |
491 | 456 | ||
492 | if (trie_debug) | 457 | pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n", |
493 | printk("In tnode_resize %p inflate_threshold=%d threshold=%d\n", | 458 | tn, inflate_threshold, halve_threshold); |
494 | tn, inflate_threshold, halve_threshold); | ||
495 | 459 | ||
496 | /* No children */ | 460 | /* No children */ |
497 | if (tn->empty_children == tnode_child_length(tn)) { | 461 | if (tn->empty_children == tnode_child_length(tn)) { |
@@ -501,20 +465,16 @@ static struct node *resize(struct trie *t, struct tnode *tn) | |||
501 | /* One child */ | 465 | /* One child */ |
502 | if (tn->empty_children == tnode_child_length(tn) - 1) | 466 | if (tn->empty_children == tnode_child_length(tn) - 1) |
503 | for (i = 0; i < tnode_child_length(tn); i++) { | 467 | for (i = 0; i < tnode_child_length(tn); i++) { |
468 | struct node *n; | ||
504 | 469 | ||
505 | write_lock_bh(&fib_lock); | 470 | n = tn->child[i]; |
506 | if (tn->child[i] != NULL) { | 471 | if (!n) |
507 | 472 | continue; | |
508 | /* compress one level */ | ||
509 | struct node *n = tn->child[i]; | ||
510 | if (n) | ||
511 | NODE_INIT_PARENT(n, NODE_TYPE(n)); | ||
512 | 473 | ||
513 | write_unlock_bh(&fib_lock); | 474 | /* compress one level */ |
514 | tnode_free(tn); | 475 | NODE_SET_PARENT(n, NULL); |
515 | return n; | 476 | tnode_free(tn); |
516 | } | 477 | return n; |
517 | write_unlock_bh(&fib_lock); | ||
518 | } | 478 | } |
519 | /* | 479 | /* |
520 | * Double as long as the resulting node has a number of | 480 | * Double as long as the resulting node has a number of |
@@ -566,16 +526,16 @@ static struct node *resize(struct trie *t, struct tnode *tn) | |||
566 | * | 526 | * |
567 | * expand not_to_be_doubled and to_be_doubled, and shorten: | 527 | * expand not_to_be_doubled and to_be_doubled, and shorten: |
568 | * 100 * (tnode_child_length(tn) - tn->empty_children + | 528 | * 100 * (tnode_child_length(tn) - tn->empty_children + |
569 | * tn->full_children ) >= inflate_threshold * new_child_length | 529 | * tn->full_children) >= inflate_threshold * new_child_length |
570 | * | 530 | * |
571 | * expand new_child_length: | 531 | * expand new_child_length: |
572 | * 100 * (tnode_child_length(tn) - tn->empty_children + | 532 | * 100 * (tnode_child_length(tn) - tn->empty_children + |
573 | * tn->full_children ) >= | 533 | * tn->full_children) >= |
574 | * inflate_threshold * tnode_child_length(tn) * 2 | 534 | * inflate_threshold * tnode_child_length(tn) * 2 |
575 | * | 535 | * |
576 | * shorten again: | 536 | * shorten again: |
577 | * 50 * (tn->full_children + tnode_child_length(tn) - | 537 | * 50 * (tn->full_children + tnode_child_length(tn) - |
578 | * tn->empty_children ) >= inflate_threshold * | 538 | * tn->empty_children) >= inflate_threshold * |
579 | * tnode_child_length(tn) | 539 | * tnode_child_length(tn) |
580 | * | 540 | * |
581 | */ | 541 | */ |
@@ -587,9 +547,10 @@ static struct node *resize(struct trie *t, struct tnode *tn) | |||
587 | 50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >= | 547 | 50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >= |
588 | inflate_threshold * tnode_child_length(tn))) { | 548 | inflate_threshold * tnode_child_length(tn))) { |
589 | 549 | ||
590 | tn = inflate(t, tn, &err); | 550 | old_tn = tn; |
591 | 551 | tn = inflate(t, tn); | |
592 | if (err) { | 552 | if (IS_ERR(tn)) { |
553 | tn = old_tn; | ||
593 | #ifdef CONFIG_IP_FIB_TRIE_STATS | 554 | #ifdef CONFIG_IP_FIB_TRIE_STATS |
594 | t->stats.resize_node_skipped++; | 555 | t->stats.resize_node_skipped++; |
595 | #endif | 556 | #endif |
@@ -609,9 +570,10 @@ static struct node *resize(struct trie *t, struct tnode *tn) | |||
609 | 100 * (tnode_child_length(tn) - tn->empty_children) < | 570 | 100 * (tnode_child_length(tn) - tn->empty_children) < |
610 | halve_threshold * tnode_child_length(tn)) { | 571 | halve_threshold * tnode_child_length(tn)) { |
611 | 572 | ||
612 | tn = halve(t, tn, &err); | 573 | old_tn = tn; |
613 | 574 | tn = halve(t, tn); | |
614 | if (err) { | 575 | if (IS_ERR(tn)) { |
576 | tn = old_tn; | ||
615 | #ifdef CONFIG_IP_FIB_TRIE_STATS | 577 | #ifdef CONFIG_IP_FIB_TRIE_STATS |
616 | t->stats.resize_node_skipped++; | 578 | t->stats.resize_node_skipped++; |
617 | #endif | 579 | #endif |
@@ -621,44 +583,37 @@ static struct node *resize(struct trie *t, struct tnode *tn) | |||
621 | 583 | ||
622 | 584 | ||
623 | /* Only one child remains */ | 585 | /* Only one child remains */ |
624 | |||
625 | if (tn->empty_children == tnode_child_length(tn) - 1) | 586 | if (tn->empty_children == tnode_child_length(tn) - 1) |
626 | for (i = 0; i < tnode_child_length(tn); i++) { | 587 | for (i = 0; i < tnode_child_length(tn); i++) { |
627 | 588 | struct node *n; | |
628 | write_lock_bh(&fib_lock); | 589 | |
629 | if (tn->child[i] != NULL) { | 590 | n = tn->child[i]; |
630 | /* compress one level */ | 591 | if (!n) |
631 | struct node *n = tn->child[i]; | 592 | continue; |
632 | 593 | ||
633 | if (n) | 594 | /* compress one level */ |
634 | NODE_INIT_PARENT(n, NODE_TYPE(n)); | 595 | |
635 | 596 | NODE_SET_PARENT(n, NULL); | |
636 | write_unlock_bh(&fib_lock); | 597 | tnode_free(tn); |
637 | tnode_free(tn); | 598 | return n; |
638 | return n; | ||
639 | } | ||
640 | write_unlock_bh(&fib_lock); | ||
641 | } | 599 | } |
642 | 600 | ||
643 | return (struct node *) tn; | 601 | return (struct node *) tn; |
644 | } | 602 | } |
645 | 603 | ||
646 | static struct tnode *inflate(struct trie *t, struct tnode *tn, int *err) | 604 | static struct tnode *inflate(struct trie *t, struct tnode *tn) |
647 | { | 605 | { |
648 | struct tnode *inode; | 606 | struct tnode *inode; |
649 | struct tnode *oldtnode = tn; | 607 | struct tnode *oldtnode = tn; |
650 | int olen = tnode_child_length(tn); | 608 | int olen = tnode_child_length(tn); |
651 | int i; | 609 | int i; |
652 | 610 | ||
653 | if (trie_debug) | 611 | pr_debug("In inflate\n"); |
654 | printk("In inflate\n"); | ||
655 | 612 | ||
656 | tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1); | 613 | tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1); |
657 | 614 | ||
658 | if (!tn) { | 615 | if (!tn) |
659 | *err = -ENOMEM; | 616 | return ERR_PTR(-ENOMEM); |
660 | return oldtnode; | ||
661 | } | ||
662 | 617 | ||
663 | /* | 618 | /* |
664 | * Preallocate and store tnodes before the actual work so we | 619 | * Preallocate and store tnodes before the actual work so we |
@@ -666,8 +621,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn, int *err) | |||
666 | * fails. In case of failure we return the oldnode and inflate | 621 | * fails. In case of failure we return the oldnode and inflate |
667 | * of tnode is ignored. | 622 | * of tnode is ignored. |
668 | */ | 623 | */ |
669 | 624 | ||
670 | for(i = 0; i < olen; i++) { | 625 | for (i = 0; i < olen; i++) { |
671 | struct tnode *inode = (struct tnode *) tnode_get_child(oldtnode, i); | 626 | struct tnode *inode = (struct tnode *) tnode_get_child(oldtnode, i); |
672 | 627 | ||
673 | if (inode && | 628 | if (inode && |
@@ -675,46 +630,30 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn, int *err) | |||
675 | inode->pos == oldtnode->pos + oldtnode->bits && | 630 | inode->pos == oldtnode->pos + oldtnode->bits && |
676 | inode->bits > 1) { | 631 | inode->bits > 1) { |
677 | struct tnode *left, *right; | 632 | struct tnode *left, *right; |
678 | |||
679 | t_key m = TKEY_GET_MASK(inode->pos, 1); | 633 | t_key m = TKEY_GET_MASK(inode->pos, 1); |
680 | 634 | ||
681 | left = tnode_new(inode->key&(~m), inode->pos + 1, | 635 | left = tnode_new(inode->key&(~m), inode->pos + 1, |
682 | inode->bits - 1); | 636 | inode->bits - 1); |
637 | if (!left) | ||
638 | goto nomem; | ||
683 | 639 | ||
684 | if (!left) { | ||
685 | *err = -ENOMEM; | ||
686 | break; | ||
687 | } | ||
688 | |||
689 | right = tnode_new(inode->key|m, inode->pos + 1, | 640 | right = tnode_new(inode->key|m, inode->pos + 1, |
690 | inode->bits - 1); | 641 | inode->bits - 1); |
691 | 642 | ||
692 | if (!right) { | 643 | if (!right) { |
693 | *err = -ENOMEM; | 644 | tnode_free(left); |
694 | break; | 645 | goto nomem; |
695 | } | 646 | } |
696 | 647 | ||
697 | put_child(t, tn, 2*i, (struct node *) left); | 648 | put_child(t, tn, 2*i, (struct node *) left); |
698 | put_child(t, tn, 2*i+1, (struct node *) right); | 649 | put_child(t, tn, 2*i+1, (struct node *) right); |
699 | } | 650 | } |
700 | } | 651 | } |
701 | 652 | ||
702 | if (*err) { | 653 | for (i = 0; i < olen; i++) { |
703 | int size = tnode_child_length(tn); | ||
704 | int j; | ||
705 | |||
706 | for(j = 0; j < size; j++) | ||
707 | if (tn->child[j]) | ||
708 | tnode_free((struct tnode *)tn->child[j]); | ||
709 | |||
710 | tnode_free(tn); | ||
711 | |||
712 | *err = -ENOMEM; | ||
713 | return oldtnode; | ||
714 | } | ||
715 | |||
716 | for(i = 0; i < olen; i++) { | ||
717 | struct node *node = tnode_get_child(oldtnode, i); | 654 | struct node *node = tnode_get_child(oldtnode, i); |
655 | struct tnode *left, *right; | ||
656 | int size, j; | ||
718 | 657 | ||
719 | /* An empty child */ | 658 | /* An empty child */ |
720 | if (node == NULL) | 659 | if (node == NULL) |
@@ -740,76 +679,82 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn, int *err) | |||
740 | put_child(t, tn, 2*i+1, inode->child[1]); | 679 | put_child(t, tn, 2*i+1, inode->child[1]); |
741 | 680 | ||
742 | tnode_free(inode); | 681 | tnode_free(inode); |
682 | continue; | ||
743 | } | 683 | } |
744 | 684 | ||
745 | /* An internal node with more than two children */ | 685 | /* An internal node with more than two children */ |
746 | else { | 686 | |
747 | struct tnode *left, *right; | 687 | /* We will replace this node 'inode' with two new |
748 | int size, j; | 688 | * ones, 'left' and 'right', each with half of the |
749 | 689 | * original children. The two new nodes will have | |
750 | /* We will replace this node 'inode' with two new | 690 | * a position one bit further down the key and this |
751 | * ones, 'left' and 'right', each with half of the | 691 | * means that the "significant" part of their keys |
752 | * original children. The two new nodes will have | 692 | * (see the discussion near the top of this file) |
753 | * a position one bit further down the key and this | 693 | * will differ by one bit, which will be "0" in |
754 | * means that the "significant" part of their keys | 694 | * left's key and "1" in right's key. Since we are |
755 | * (see the discussion near the top of this file) | 695 | * moving the key position by one step, the bit that |
756 | * will differ by one bit, which will be "0" in | 696 | * we are moving away from - the bit at position |
757 | * left's key and "1" in right's key. Since we are | 697 | * (inode->pos) - is the one that will differ between |
758 | * moving the key position by one step, the bit that | 698 | * left and right. So... we synthesize that bit in the |
759 | * we are moving away from - the bit at position | 699 | * two new keys. |
760 | * (inode->pos) - is the one that will differ between | 700 | * The mask 'm' below will be a single "one" bit at |
761 | * left and right. So... we synthesize that bit in the | 701 | * the position (inode->pos) |
762 | * two new keys. | 702 | */ |
763 | * The mask 'm' below will be a single "one" bit at | ||
764 | * the position (inode->pos) | ||
765 | */ | ||
766 | |||
767 | /* Use the old key, but set the new significant | ||
768 | * bit to zero. | ||
769 | */ | ||
770 | 703 | ||
771 | left = (struct tnode *) tnode_get_child(tn, 2*i); | 704 | /* Use the old key, but set the new significant |
772 | put_child(t, tn, 2*i, NULL); | 705 | * bit to zero. |
706 | */ | ||
773 | 707 | ||
774 | if (!left) | 708 | left = (struct tnode *) tnode_get_child(tn, 2*i); |
775 | BUG(); | 709 | put_child(t, tn, 2*i, NULL); |
776 | 710 | ||
777 | right = (struct tnode *) tnode_get_child(tn, 2*i+1); | 711 | BUG_ON(!left); |
778 | put_child(t, tn, 2*i+1, NULL); | ||
779 | 712 | ||
780 | if (!right) | 713 | right = (struct tnode *) tnode_get_child(tn, 2*i+1); |
781 | BUG(); | 714 | put_child(t, tn, 2*i+1, NULL); |
782 | 715 | ||
783 | size = tnode_child_length(left); | 716 | BUG_ON(!right); |
784 | for(j = 0; j < size; j++) { | ||
785 | put_child(t, left, j, inode->child[j]); | ||
786 | put_child(t, right, j, inode->child[j + size]); | ||
787 | } | ||
788 | put_child(t, tn, 2*i, resize(t, left)); | ||
789 | put_child(t, tn, 2*i+1, resize(t, right)); | ||
790 | 717 | ||
791 | tnode_free(inode); | 718 | size = tnode_child_length(left); |
719 | for (j = 0; j < size; j++) { | ||
720 | put_child(t, left, j, inode->child[j]); | ||
721 | put_child(t, right, j, inode->child[j + size]); | ||
792 | } | 722 | } |
723 | put_child(t, tn, 2*i, resize(t, left)); | ||
724 | put_child(t, tn, 2*i+1, resize(t, right)); | ||
725 | |||
726 | tnode_free(inode); | ||
793 | } | 727 | } |
794 | tnode_free(oldtnode); | 728 | tnode_free(oldtnode); |
795 | return tn; | 729 | return tn; |
730 | nomem: | ||
731 | { | ||
732 | int size = tnode_child_length(tn); | ||
733 | int j; | ||
734 | |||
735 | for (j = 0; j < size; j++) | ||
736 | if (tn->child[j]) | ||
737 | tnode_free((struct tnode *)tn->child[j]); | ||
738 | |||
739 | tnode_free(tn); | ||
740 | |||
741 | return ERR_PTR(-ENOMEM); | ||
742 | } | ||
796 | } | 743 | } |
797 | 744 | ||
798 | static struct tnode *halve(struct trie *t, struct tnode *tn, int *err) | 745 | static struct tnode *halve(struct trie *t, struct tnode *tn) |
799 | { | 746 | { |
800 | struct tnode *oldtnode = tn; | 747 | struct tnode *oldtnode = tn; |
801 | struct node *left, *right; | 748 | struct node *left, *right; |
802 | int i; | 749 | int i; |
803 | int olen = tnode_child_length(tn); | 750 | int olen = tnode_child_length(tn); |
804 | 751 | ||
805 | if (trie_debug) printk("In halve\n"); | 752 | pr_debug("In halve\n"); |
806 | 753 | ||
807 | tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1); | 754 | tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1); |
808 | 755 | ||
809 | if (!tn) { | 756 | if (!tn) |
810 | *err = -ENOMEM; | 757 | return ERR_PTR(-ENOMEM); |
811 | return oldtnode; | ||
812 | } | ||
813 | 758 | ||
814 | /* | 759 | /* |
815 | * Preallocate and store tnodes before the actual work so we | 760 | * Preallocate and store tnodes before the actual work so we |
@@ -818,38 +763,27 @@ static struct tnode *halve(struct trie *t, struct tnode *tn, int *err) | |||
818 | * of tnode is ignored. | 763 | * of tnode is ignored. |
819 | */ | 764 | */ |
820 | 765 | ||
821 | for(i = 0; i < olen; i += 2) { | 766 | for (i = 0; i < olen; i += 2) { |
822 | left = tnode_get_child(oldtnode, i); | 767 | left = tnode_get_child(oldtnode, i); |
823 | right = tnode_get_child(oldtnode, i+1); | 768 | right = tnode_get_child(oldtnode, i+1); |
824 | 769 | ||
825 | /* Two nonempty children */ | 770 | /* Two nonempty children */ |
826 | if (left && right) { | 771 | if (left && right) { |
827 | struct tnode *newBinNode = | 772 | struct tnode *newn; |
828 | tnode_new(left->key, tn->pos + tn->bits, 1); | ||
829 | 773 | ||
830 | if (!newBinNode) { | 774 | newn = tnode_new(left->key, tn->pos + tn->bits, 1); |
831 | *err = -ENOMEM; | ||
832 | break; | ||
833 | } | ||
834 | put_child(t, tn, i/2, (struct node *)newBinNode); | ||
835 | } | ||
836 | } | ||
837 | 775 | ||
838 | if (*err) { | 776 | if (!newn) |
839 | int size = tnode_child_length(tn); | 777 | goto nomem; |
840 | int j; | ||
841 | 778 | ||
842 | for(j = 0; j < size; j++) | 779 | put_child(t, tn, i/2, (struct node *)newn); |
843 | if (tn->child[j]) | 780 | } |
844 | tnode_free((struct tnode *)tn->child[j]); | ||
845 | 781 | ||
846 | tnode_free(tn); | ||
847 | |||
848 | *err = -ENOMEM; | ||
849 | return oldtnode; | ||
850 | } | 782 | } |
851 | 783 | ||
852 | for(i = 0; i < olen; i += 2) { | 784 | for (i = 0; i < olen; i += 2) { |
785 | struct tnode *newBinNode; | ||
786 | |||
853 | left = tnode_get_child(oldtnode, i); | 787 | left = tnode_get_child(oldtnode, i); |
854 | right = tnode_get_child(oldtnode, i+1); | 788 | right = tnode_get_child(oldtnode, i+1); |
855 | 789 | ||
@@ -858,88 +792,99 @@ static struct tnode *halve(struct trie *t, struct tnode *tn, int *err) | |||
858 | if (right == NULL) /* Both are empty */ | 792 | if (right == NULL) /* Both are empty */ |
859 | continue; | 793 | continue; |
860 | put_child(t, tn, i/2, right); | 794 | put_child(t, tn, i/2, right); |
861 | } else if (right == NULL) | 795 | continue; |
796 | } | ||
797 | |||
798 | if (right == NULL) { | ||
862 | put_child(t, tn, i/2, left); | 799 | put_child(t, tn, i/2, left); |
800 | continue; | ||
801 | } | ||
863 | 802 | ||
864 | /* Two nonempty children */ | 803 | /* Two nonempty children */ |
865 | else { | 804 | newBinNode = (struct tnode *) tnode_get_child(tn, i/2); |
866 | struct tnode *newBinNode = | 805 | put_child(t, tn, i/2, NULL); |
867 | (struct tnode *) tnode_get_child(tn, i/2); | 806 | put_child(t, newBinNode, 0, left); |
868 | put_child(t, tn, i/2, NULL); | 807 | put_child(t, newBinNode, 1, right); |
869 | 808 | put_child(t, tn, i/2, resize(t, newBinNode)); | |
870 | if (!newBinNode) | ||
871 | BUG(); | ||
872 | |||
873 | put_child(t, newBinNode, 0, left); | ||
874 | put_child(t, newBinNode, 1, right); | ||
875 | put_child(t, tn, i/2, resize(t, newBinNode)); | ||
876 | } | ||
877 | } | 809 | } |
878 | tnode_free(oldtnode); | 810 | tnode_free(oldtnode); |
879 | return tn; | 811 | return tn; |
812 | nomem: | ||
813 | { | ||
814 | int size = tnode_child_length(tn); | ||
815 | int j; | ||
816 | |||
817 | for (j = 0; j < size; j++) | ||
818 | if (tn->child[j]) | ||
819 | tnode_free((struct tnode *)tn->child[j]); | ||
820 | |||
821 | tnode_free(tn); | ||
822 | |||
823 | return ERR_PTR(-ENOMEM); | ||
824 | } | ||
880 | } | 825 | } |
881 | 826 | ||
882 | static void *trie_init(struct trie *t) | 827 | static void trie_init(struct trie *t) |
883 | { | 828 | { |
884 | if (t) { | 829 | if (!t) |
885 | t->size = 0; | 830 | return; |
886 | t->trie = NULL; | 831 | |
887 | t->revision = 0; | 832 | t->size = 0; |
833 | rcu_assign_pointer(t->trie, NULL); | ||
834 | t->revision = 0; | ||
888 | #ifdef CONFIG_IP_FIB_TRIE_STATS | 835 | #ifdef CONFIG_IP_FIB_TRIE_STATS |
889 | memset(&t->stats, 0, sizeof(struct trie_use_stats)); | 836 | memset(&t->stats, 0, sizeof(struct trie_use_stats)); |
890 | #endif | 837 | #endif |
891 | } | ||
892 | return t; | ||
893 | } | 838 | } |
894 | 839 | ||
840 | /* readside most use rcu_read_lock currently dump routines | ||
841 | via get_fa_head and dump */ | ||
842 | |||
895 | static struct leaf_info *find_leaf_info(struct hlist_head *head, int plen) | 843 | static struct leaf_info *find_leaf_info(struct hlist_head *head, int plen) |
896 | { | 844 | { |
897 | struct hlist_node *node; | 845 | struct hlist_node *node; |
898 | struct leaf_info *li; | 846 | struct leaf_info *li; |
899 | 847 | ||
900 | hlist_for_each_entry(li, node, head, hlist) { | 848 | hlist_for_each_entry_rcu(li, node, head, hlist) |
901 | if (li->plen == plen) | 849 | if (li->plen == plen) |
902 | return li; | 850 | return li; |
903 | } | 851 | |
904 | return NULL; | 852 | return NULL; |
905 | } | 853 | } |
906 | 854 | ||
907 | static inline struct list_head * get_fa_head(struct leaf *l, int plen) | 855 | static inline struct list_head * get_fa_head(struct leaf *l, int plen) |
908 | { | 856 | { |
909 | struct list_head *fa_head = NULL; | ||
910 | struct leaf_info *li = find_leaf_info(&l->list, plen); | 857 | struct leaf_info *li = find_leaf_info(&l->list, plen); |
911 | 858 | ||
912 | if (li) | 859 | if (!li) |
913 | fa_head = &li->falh; | 860 | return NULL; |
914 | 861 | ||
915 | return fa_head; | 862 | return &li->falh; |
916 | } | 863 | } |
917 | 864 | ||
918 | static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new) | 865 | static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new) |
919 | { | 866 | { |
920 | struct leaf_info *li = NULL, *last = NULL; | 867 | struct leaf_info *li = NULL, *last = NULL; |
921 | struct hlist_node *node, *tmp; | 868 | struct hlist_node *node; |
922 | 869 | ||
923 | write_lock_bh(&fib_lock); | 870 | if (hlist_empty(head)) { |
924 | 871 | hlist_add_head_rcu(&new->hlist, head); | |
925 | if (hlist_empty(head)) | 872 | } else { |
926 | hlist_add_head(&new->hlist, head); | 873 | hlist_for_each_entry(li, node, head, hlist) { |
927 | else { | 874 | if (new->plen > li->plen) |
928 | hlist_for_each_entry_safe(li, node, tmp, head, hlist) { | 875 | break; |
929 | 876 | ||
930 | if (new->plen > li->plen) | 877 | last = li; |
931 | break; | 878 | } |
932 | 879 | if (last) | |
933 | last = li; | 880 | hlist_add_after_rcu(&last->hlist, &new->hlist); |
934 | } | 881 | else |
935 | if (last) | 882 | hlist_add_before_rcu(&new->hlist, &li->hlist); |
936 | hlist_add_after(&last->hlist, &new->hlist); | 883 | } |
937 | else | ||
938 | hlist_add_before(&new->hlist, &li->hlist); | ||
939 | } | ||
940 | write_unlock_bh(&fib_lock); | ||
941 | } | 884 | } |
942 | 885 | ||
886 | /* rcu_read_lock needs to be hold by caller from readside */ | ||
887 | |||
943 | static struct leaf * | 888 | static struct leaf * |
944 | fib_find_node(struct trie *t, u32 key) | 889 | fib_find_node(struct trie *t, u32 key) |
945 | { | 890 | { |
@@ -948,61 +893,43 @@ fib_find_node(struct trie *t, u32 key) | |||
948 | struct node *n; | 893 | struct node *n; |
949 | 894 | ||
950 | pos = 0; | 895 | pos = 0; |
951 | n = t->trie; | 896 | n = rcu_dereference(t->trie); |
952 | 897 | ||
953 | while (n != NULL && NODE_TYPE(n) == T_TNODE) { | 898 | while (n != NULL && NODE_TYPE(n) == T_TNODE) { |
954 | tn = (struct tnode *) n; | 899 | tn = (struct tnode *) n; |
955 | 900 | ||
956 | check_tnode(tn); | 901 | check_tnode(tn); |
957 | 902 | ||
958 | if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) { | 903 | if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) { |
959 | pos=tn->pos + tn->bits; | 904 | pos = tn->pos + tn->bits; |
960 | n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits)); | 905 | n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits)); |
961 | } | 906 | } else |
962 | else | ||
963 | break; | 907 | break; |
964 | } | 908 | } |
965 | /* Case we have found a leaf. Compare prefixes */ | 909 | /* Case we have found a leaf. Compare prefixes */ |
966 | 910 | ||
967 | if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) { | 911 | if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) |
968 | struct leaf *l = (struct leaf *) n; | 912 | return (struct leaf *)n; |
969 | return l; | 913 | |
970 | } | ||
971 | return NULL; | 914 | return NULL; |
972 | } | 915 | } |
973 | 916 | ||
974 | static struct node *trie_rebalance(struct trie *t, struct tnode *tn) | 917 | static struct node *trie_rebalance(struct trie *t, struct tnode *tn) |
975 | { | 918 | { |
976 | int i = 0; | ||
977 | int wasfull; | 919 | int wasfull; |
978 | t_key cindex, key; | 920 | t_key cindex, key; |
979 | struct tnode *tp = NULL; | 921 | struct tnode *tp = NULL; |
980 | 922 | ||
981 | if (!tn) | ||
982 | BUG(); | ||
983 | |||
984 | key = tn->key; | 923 | key = tn->key; |
985 | i = 0; | ||
986 | 924 | ||
987 | while (tn != NULL && NODE_PARENT(tn) != NULL) { | 925 | while (tn != NULL && NODE_PARENT(tn) != NULL) { |
988 | 926 | ||
989 | if (i > 10) { | ||
990 | printk("Rebalance tn=%p \n", tn); | ||
991 | if (tn) printk("tn->parent=%p \n", NODE_PARENT(tn)); | ||
992 | |||
993 | printk("Rebalance tp=%p \n", tp); | ||
994 | if (tp) printk("tp->parent=%p \n", NODE_PARENT(tp)); | ||
995 | } | ||
996 | |||
997 | if (i > 12) BUG(); | ||
998 | i++; | ||
999 | |||
1000 | tp = NODE_PARENT(tn); | 927 | tp = NODE_PARENT(tn); |
1001 | cindex = tkey_extract_bits(key, tp->pos, tp->bits); | 928 | cindex = tkey_extract_bits(key, tp->pos, tp->bits); |
1002 | wasfull = tnode_full(tp, tnode_get_child(tp, cindex)); | 929 | wasfull = tnode_full(tp, tnode_get_child(tp, cindex)); |
1003 | tn = (struct tnode *) resize (t, (struct tnode *)tn); | 930 | tn = (struct tnode *) resize (t, (struct tnode *)tn); |
1004 | tnode_put_child_reorg((struct tnode *)tp, cindex,(struct node*)tn, wasfull); | 931 | tnode_put_child_reorg((struct tnode *)tp, cindex,(struct node*)tn, wasfull); |
1005 | 932 | ||
1006 | if (!NODE_PARENT(tn)) | 933 | if (!NODE_PARENT(tn)) |
1007 | break; | 934 | break; |
1008 | 935 | ||
@@ -1015,6 +942,8 @@ static struct node *trie_rebalance(struct trie *t, struct tnode *tn) | |||
1015 | return (struct node*) tn; | 942 | return (struct node*) tn; |
1016 | } | 943 | } |
1017 | 944 | ||
945 | /* only used from updater-side */ | ||
946 | |||
1018 | static struct list_head * | 947 | static struct list_head * |
1019 | fib_insert_node(struct trie *t, int *err, u32 key, int plen) | 948 | fib_insert_node(struct trie *t, int *err, u32 key, int plen) |
1020 | { | 949 | { |
@@ -1050,20 +979,16 @@ fib_insert_node(struct trie *t, int *err, u32 key, int plen) | |||
1050 | 979 | ||
1051 | while (n != NULL && NODE_TYPE(n) == T_TNODE) { | 980 | while (n != NULL && NODE_TYPE(n) == T_TNODE) { |
1052 | tn = (struct tnode *) n; | 981 | tn = (struct tnode *) n; |
1053 | 982 | ||
1054 | check_tnode(tn); | 983 | check_tnode(tn); |
1055 | 984 | ||
1056 | if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) { | 985 | if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) { |
1057 | tp = tn; | 986 | tp = tn; |
1058 | pos=tn->pos + tn->bits; | 987 | pos = tn->pos + tn->bits; |
1059 | n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits)); | 988 | n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits)); |
1060 | 989 | ||
1061 | if (n && NODE_PARENT(n) != tn) { | 990 | BUG_ON(n && NODE_PARENT(n) != tn); |
1062 | printk("BUG tn=%p, n->parent=%p\n", tn, NODE_PARENT(n)); | 991 | } else |
1063 | BUG(); | ||
1064 | } | ||
1065 | } | ||
1066 | else | ||
1067 | break; | 992 | break; |
1068 | } | 993 | } |
1069 | 994 | ||
@@ -1073,17 +998,15 @@ fib_insert_node(struct trie *t, int *err, u32 key, int plen) | |||
1073 | * tp is n's (parent) ----> NULL or TNODE | 998 | * tp is n's (parent) ----> NULL or TNODE |
1074 | */ | 999 | */ |
1075 | 1000 | ||
1076 | if (tp && IS_LEAF(tp)) | 1001 | BUG_ON(tp && IS_LEAF(tp)); |
1077 | BUG(); | ||
1078 | |||
1079 | 1002 | ||
1080 | /* Case 1: n is a leaf. Compare prefixes */ | 1003 | /* Case 1: n is a leaf. Compare prefixes */ |
1081 | 1004 | ||
1082 | if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) { | 1005 | if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) { |
1083 | struct leaf *l = ( struct leaf *) n; | 1006 | struct leaf *l = (struct leaf *) n; |
1084 | 1007 | ||
1085 | li = leaf_info_new(plen); | 1008 | li = leaf_info_new(plen); |
1086 | 1009 | ||
1087 | if (!li) { | 1010 | if (!li) { |
1088 | *err = -ENOMEM; | 1011 | *err = -ENOMEM; |
1089 | goto err; | 1012 | goto err; |
@@ -1113,35 +1036,29 @@ fib_insert_node(struct trie *t, int *err, u32 key, int plen) | |||
1113 | fa_head = &li->falh; | 1036 | fa_head = &li->falh; |
1114 | insert_leaf_info(&l->list, li); | 1037 | insert_leaf_info(&l->list, li); |
1115 | 1038 | ||
1116 | /* Case 2: n is NULL, and will just insert a new leaf */ | ||
1117 | if (t->trie && n == NULL) { | 1039 | if (t->trie && n == NULL) { |
1040 | /* Case 2: n is NULL, and will just insert a new leaf */ | ||
1118 | 1041 | ||
1119 | NODE_SET_PARENT(l, tp); | 1042 | NODE_SET_PARENT(l, tp); |
1120 | |||
1121 | if (!tp) | ||
1122 | BUG(); | ||
1123 | 1043 | ||
1124 | else { | 1044 | cindex = tkey_extract_bits(key, tp->pos, tp->bits); |
1125 | cindex = tkey_extract_bits(key, tp->pos, tp->bits); | 1045 | put_child(t, (struct tnode *)tp, cindex, (struct node *)l); |
1126 | put_child(t, (struct tnode *)tp, cindex, (struct node *)l); | 1046 | } else { |
1127 | } | 1047 | /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */ |
1128 | } | ||
1129 | /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */ | ||
1130 | else { | ||
1131 | /* | 1048 | /* |
1132 | * Add a new tnode here | 1049 | * Add a new tnode here |
1133 | * first tnode need some special handling | 1050 | * first tnode need some special handling |
1134 | */ | 1051 | */ |
1135 | 1052 | ||
1136 | if (tp) | 1053 | if (tp) |
1137 | pos=tp->pos+tp->bits; | 1054 | pos = tp->pos+tp->bits; |
1138 | else | 1055 | else |
1139 | pos=0; | 1056 | pos = 0; |
1057 | |||
1140 | if (n) { | 1058 | if (n) { |
1141 | newpos = tkey_mismatch(key, pos, n->key); | 1059 | newpos = tkey_mismatch(key, pos, n->key); |
1142 | tn = tnode_new(n->key, newpos, 1); | 1060 | tn = tnode_new(n->key, newpos, 1); |
1143 | } | 1061 | } else { |
1144 | else { | ||
1145 | newpos = 0; | 1062 | newpos = 0; |
1146 | tn = tnode_new(key, newpos, 1); /* First tnode */ | 1063 | tn = tnode_new(key, newpos, 1); /* First tnode */ |
1147 | } | 1064 | } |
@@ -1151,32 +1068,33 @@ fib_insert_node(struct trie *t, int *err, u32 key, int plen) | |||
1151 | tnode_free((struct tnode *) l); | 1068 | tnode_free((struct tnode *) l); |
1152 | *err = -ENOMEM; | 1069 | *err = -ENOMEM; |
1153 | goto err; | 1070 | goto err; |
1154 | } | 1071 | } |
1155 | 1072 | ||
1156 | NODE_SET_PARENT(tn, tp); | 1073 | NODE_SET_PARENT(tn, tp); |
1157 | 1074 | ||
1158 | missbit=tkey_extract_bits(key, newpos, 1); | 1075 | missbit = tkey_extract_bits(key, newpos, 1); |
1159 | put_child(t, tn, missbit, (struct node *)l); | 1076 | put_child(t, tn, missbit, (struct node *)l); |
1160 | put_child(t, tn, 1-missbit, n); | 1077 | put_child(t, tn, 1-missbit, n); |
1161 | 1078 | ||
1162 | if (tp) { | 1079 | if (tp) { |
1163 | cindex = tkey_extract_bits(key, tp->pos, tp->bits); | 1080 | cindex = tkey_extract_bits(key, tp->pos, tp->bits); |
1164 | put_child(t, (struct tnode *)tp, cindex, (struct node *)tn); | 1081 | put_child(t, (struct tnode *)tp, cindex, (struct node *)tn); |
1165 | } | 1082 | } else { |
1166 | else { | 1083 | rcu_assign_pointer(t->trie, (struct node *)tn); /* First tnode */ |
1167 | t->trie = (struct node*) tn; /* First tnode */ | ||
1168 | tp = tn; | 1084 | tp = tn; |
1169 | } | 1085 | } |
1170 | } | 1086 | } |
1171 | if (tp && tp->pos+tp->bits > 32) { | 1087 | |
1088 | if (tp && tp->pos + tp->bits > 32) | ||
1172 | printk("ERROR tp=%p pos=%d, bits=%d, key=%0x plen=%d\n", | 1089 | printk("ERROR tp=%p pos=%d, bits=%d, key=%0x plen=%d\n", |
1173 | tp, tp->pos, tp->bits, key, plen); | 1090 | tp, tp->pos, tp->bits, key, plen); |
1174 | } | 1091 | |
1175 | /* Rebalance the trie */ | 1092 | /* Rebalance the trie */ |
1176 | t->trie = trie_rebalance(t, tp); | 1093 | |
1094 | rcu_assign_pointer(t->trie, trie_rebalance(t, tp)); | ||
1177 | done: | 1095 | done: |
1178 | t->revision++; | 1096 | t->revision++; |
1179 | err:; | 1097 | err: |
1180 | return fa_head; | 1098 | return fa_head; |
1181 | } | 1099 | } |
1182 | 1100 | ||
@@ -1204,17 +1122,18 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, | |||
1204 | 1122 | ||
1205 | key = ntohl(key); | 1123 | key = ntohl(key); |
1206 | 1124 | ||
1207 | if (trie_debug) | 1125 | pr_debug("Insert table=%d %08x/%d\n", tb->tb_id, key, plen); |
1208 | printk("Insert table=%d %08x/%d\n", tb->tb_id, key, plen); | ||
1209 | 1126 | ||
1210 | mask = ntohl( inet_make_mask(plen) ); | 1127 | mask = ntohl(inet_make_mask(plen)); |
1211 | 1128 | ||
1212 | if (key & ~mask) | 1129 | if (key & ~mask) |
1213 | return -EINVAL; | 1130 | return -EINVAL; |
1214 | 1131 | ||
1215 | key = key & mask; | 1132 | key = key & mask; |
1216 | 1133 | ||
1217 | if ((fi = fib_create_info(r, rta, nlhdr, &err)) == NULL) | 1134 | fi = fib_create_info(r, rta, nlhdr, &err); |
1135 | |||
1136 | if (!fi) | ||
1218 | goto err; | 1137 | goto err; |
1219 | 1138 | ||
1220 | l = fib_find_node(t, key); | 1139 | l = fib_find_node(t, key); |
@@ -1236,8 +1155,7 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, | |||
1236 | * and we need to allocate a new one of those as well. | 1155 | * and we need to allocate a new one of those as well. |
1237 | */ | 1156 | */ |
1238 | 1157 | ||
1239 | if (fa && | 1158 | if (fa && fa->fa_info->fib_priority == fi->fib_priority) { |
1240 | fa->fa_info->fib_priority == fi->fib_priority) { | ||
1241 | struct fib_alias *fa_orig; | 1159 | struct fib_alias *fa_orig; |
1242 | 1160 | ||
1243 | err = -EEXIST; | 1161 | err = -EEXIST; |
@@ -1248,22 +1166,27 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, | |||
1248 | struct fib_info *fi_drop; | 1166 | struct fib_info *fi_drop; |
1249 | u8 state; | 1167 | u8 state; |
1250 | 1168 | ||
1251 | write_lock_bh(&fib_lock); | 1169 | err = -ENOBUFS; |
1170 | new_fa = kmem_cache_alloc(fn_alias_kmem, SLAB_KERNEL); | ||
1171 | if (new_fa == NULL) | ||
1172 | goto out; | ||
1252 | 1173 | ||
1253 | fi_drop = fa->fa_info; | 1174 | fi_drop = fa->fa_info; |
1254 | fa->fa_info = fi; | 1175 | new_fa->fa_tos = fa->fa_tos; |
1255 | fa->fa_type = type; | 1176 | new_fa->fa_info = fi; |
1256 | fa->fa_scope = r->rtm_scope; | 1177 | new_fa->fa_type = type; |
1178 | new_fa->fa_scope = r->rtm_scope; | ||
1257 | state = fa->fa_state; | 1179 | state = fa->fa_state; |
1258 | fa->fa_state &= ~FA_S_ACCESSED; | 1180 | new_fa->fa_state &= ~FA_S_ACCESSED; |
1259 | 1181 | ||
1260 | write_unlock_bh(&fib_lock); | 1182 | list_replace_rcu(&fa->fa_list, &new_fa->fa_list); |
1183 | alias_free_mem_rcu(fa); | ||
1261 | 1184 | ||
1262 | fib_release_info(fi_drop); | 1185 | fib_release_info(fi_drop); |
1263 | if (state & FA_S_ACCESSED) | 1186 | if (state & FA_S_ACCESSED) |
1264 | rt_cache_flush(-1); | 1187 | rt_cache_flush(-1); |
1265 | 1188 | ||
1266 | goto succeeded; | 1189 | goto succeeded; |
1267 | } | 1190 | } |
1268 | /* Error if we find a perfect match which | 1191 | /* Error if we find a perfect match which |
1269 | * uses the same scope, type, and nexthop | 1192 | * uses the same scope, type, and nexthop |
@@ -1285,7 +1208,7 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, | |||
1285 | fa = fa_orig; | 1208 | fa = fa_orig; |
1286 | } | 1209 | } |
1287 | err = -ENOENT; | 1210 | err = -ENOENT; |
1288 | if (!(nlhdr->nlmsg_flags&NLM_F_CREATE)) | 1211 | if (!(nlhdr->nlmsg_flags & NLM_F_CREATE)) |
1289 | goto out; | 1212 | goto out; |
1290 | 1213 | ||
1291 | err = -ENOBUFS; | 1214 | err = -ENOBUFS; |
@@ -1298,9 +1221,6 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, | |||
1298 | new_fa->fa_type = type; | 1221 | new_fa->fa_type = type; |
1299 | new_fa->fa_scope = r->rtm_scope; | 1222 | new_fa->fa_scope = r->rtm_scope; |
1300 | new_fa->fa_state = 0; | 1223 | new_fa->fa_state = 0; |
1301 | #if 0 | ||
1302 | new_fa->dst = NULL; | ||
1303 | #endif | ||
1304 | /* | 1224 | /* |
1305 | * Insert new entry to the list. | 1225 | * Insert new entry to the list. |
1306 | */ | 1226 | */ |
@@ -1312,12 +1232,8 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, | |||
1312 | goto out_free_new_fa; | 1232 | goto out_free_new_fa; |
1313 | } | 1233 | } |
1314 | 1234 | ||
1315 | write_lock_bh(&fib_lock); | 1235 | list_add_tail_rcu(&new_fa->fa_list, |
1316 | 1236 | (fa ? &fa->fa_list : fa_head)); | |
1317 | list_add_tail(&new_fa->fa_list, | ||
1318 | (fa ? &fa->fa_list : fa_head)); | ||
1319 | |||
1320 | write_unlock_bh(&fib_lock); | ||
1321 | 1237 | ||
1322 | rt_cache_flush(-1); | 1238 | rt_cache_flush(-1); |
1323 | rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, nlhdr, req); | 1239 | rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, nlhdr, req); |
@@ -1328,11 +1244,14 @@ out_free_new_fa: | |||
1328 | kmem_cache_free(fn_alias_kmem, new_fa); | 1244 | kmem_cache_free(fn_alias_kmem, new_fa); |
1329 | out: | 1245 | out: |
1330 | fib_release_info(fi); | 1246 | fib_release_info(fi); |
1331 | err:; | 1247 | err: |
1332 | return err; | 1248 | return err; |
1333 | } | 1249 | } |
1334 | 1250 | ||
1335 | static inline int check_leaf(struct trie *t, struct leaf *l, t_key key, int *plen, const struct flowi *flp, | 1251 | |
1252 | /* should be clalled with rcu_read_lock */ | ||
1253 | static inline int check_leaf(struct trie *t, struct leaf *l, | ||
1254 | t_key key, int *plen, const struct flowi *flp, | ||
1336 | struct fib_result *res) | 1255 | struct fib_result *res) |
1337 | { | 1256 | { |
1338 | int err, i; | 1257 | int err, i; |
@@ -1341,8 +1260,7 @@ static inline int check_leaf(struct trie *t, struct leaf *l, t_key key, int *pl | |||
1341 | struct hlist_head *hhead = &l->list; | 1260 | struct hlist_head *hhead = &l->list; |
1342 | struct hlist_node *node; | 1261 | struct hlist_node *node; |
1343 | 1262 | ||
1344 | hlist_for_each_entry(li, node, hhead, hlist) { | 1263 | hlist_for_each_entry_rcu(li, node, hhead, hlist) { |
1345 | |||
1346 | i = li->plen; | 1264 | i = li->plen; |
1347 | mask = ntohl(inet_make_mask(i)); | 1265 | mask = ntohl(inet_make_mask(i)); |
1348 | if (l->key != (key & mask)) | 1266 | if (l->key != (key & mask)) |
@@ -1370,13 +1288,17 @@ fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result | |||
1370 | struct node *n; | 1288 | struct node *n; |
1371 | struct tnode *pn; | 1289 | struct tnode *pn; |
1372 | int pos, bits; | 1290 | int pos, bits; |
1373 | t_key key=ntohl(flp->fl4_dst); | 1291 | t_key key = ntohl(flp->fl4_dst); |
1374 | int chopped_off; | 1292 | int chopped_off; |
1375 | t_key cindex = 0; | 1293 | t_key cindex = 0; |
1376 | int current_prefix_length = KEYLENGTH; | 1294 | int current_prefix_length = KEYLENGTH; |
1377 | n = t->trie; | 1295 | struct tnode *cn; |
1296 | t_key node_prefix, key_prefix, pref_mismatch; | ||
1297 | int mp; | ||
1298 | |||
1299 | rcu_read_lock(); | ||
1378 | 1300 | ||
1379 | read_lock(&fib_lock); | 1301 | n = rcu_dereference(t->trie); |
1380 | if (!n) | 1302 | if (!n) |
1381 | goto failed; | 1303 | goto failed; |
1382 | 1304 | ||
@@ -1393,8 +1315,7 @@ fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result | |||
1393 | pn = (struct tnode *) n; | 1315 | pn = (struct tnode *) n; |
1394 | chopped_off = 0; | 1316 | chopped_off = 0; |
1395 | 1317 | ||
1396 | while (pn) { | 1318 | while (pn) { |
1397 | |||
1398 | pos = pn->pos; | 1319 | pos = pn->pos; |
1399 | bits = pn->bits; | 1320 | bits = pn->bits; |
1400 | 1321 | ||
@@ -1410,130 +1331,129 @@ fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result | |||
1410 | goto backtrace; | 1331 | goto backtrace; |
1411 | } | 1332 | } |
1412 | 1333 | ||
1413 | if (IS_TNODE(n)) { | 1334 | if (IS_LEAF(n)) { |
1335 | if ((ret = check_leaf(t, (struct leaf *)n, key, &plen, flp, res)) <= 0) | ||
1336 | goto found; | ||
1337 | else | ||
1338 | goto backtrace; | ||
1339 | } | ||
1340 | |||
1414 | #define HL_OPTIMIZE | 1341 | #define HL_OPTIMIZE |
1415 | #ifdef HL_OPTIMIZE | 1342 | #ifdef HL_OPTIMIZE |
1416 | struct tnode *cn = (struct tnode *)n; | 1343 | cn = (struct tnode *)n; |
1417 | t_key node_prefix, key_prefix, pref_mismatch; | ||
1418 | int mp; | ||
1419 | 1344 | ||
1420 | /* | 1345 | /* |
1421 | * It's a tnode, and we can do some extra checks here if we | 1346 | * It's a tnode, and we can do some extra checks here if we |
1422 | * like, to avoid descending into a dead-end branch. | 1347 | * like, to avoid descending into a dead-end branch. |
1423 | * This tnode is in the parent's child array at index | 1348 | * This tnode is in the parent's child array at index |
1424 | * key[p_pos..p_pos+p_bits] but potentially with some bits | 1349 | * key[p_pos..p_pos+p_bits] but potentially with some bits |
1425 | * chopped off, so in reality the index may be just a | 1350 | * chopped off, so in reality the index may be just a |
1426 | * subprefix, padded with zero at the end. | 1351 | * subprefix, padded with zero at the end. |
1427 | * We can also take a look at any skipped bits in this | 1352 | * We can also take a look at any skipped bits in this |
1428 | * tnode - everything up to p_pos is supposed to be ok, | 1353 | * tnode - everything up to p_pos is supposed to be ok, |
1429 | * and the non-chopped bits of the index (se previous | 1354 | * and the non-chopped bits of the index (se previous |
1430 | * paragraph) are also guaranteed ok, but the rest is | 1355 | * paragraph) are also guaranteed ok, but the rest is |
1431 | * considered unknown. | 1356 | * considered unknown. |
1432 | * | 1357 | * |
1433 | * The skipped bits are key[pos+bits..cn->pos]. | 1358 | * The skipped bits are key[pos+bits..cn->pos]. |
1434 | */ | 1359 | */ |
1435 | |||
1436 | /* If current_prefix_length < pos+bits, we are already doing | ||
1437 | * actual prefix matching, which means everything from | ||
1438 | * pos+(bits-chopped_off) onward must be zero along some | ||
1439 | * branch of this subtree - otherwise there is *no* valid | ||
1440 | * prefix present. Here we can only check the skipped | ||
1441 | * bits. Remember, since we have already indexed into the | ||
1442 | * parent's child array, we know that the bits we chopped of | ||
1443 | * *are* zero. | ||
1444 | */ | ||
1445 | 1360 | ||
1446 | /* NOTA BENE: CHECKING ONLY SKIPPED BITS FOR THE NEW NODE HERE */ | 1361 | /* If current_prefix_length < pos+bits, we are already doing |
1447 | 1362 | * actual prefix matching, which means everything from | |
1448 | if (current_prefix_length < pos+bits) { | 1363 | * pos+(bits-chopped_off) onward must be zero along some |
1449 | if (tkey_extract_bits(cn->key, current_prefix_length, | 1364 | * branch of this subtree - otherwise there is *no* valid |
1450 | cn->pos - current_prefix_length) != 0 || | 1365 | * prefix present. Here we can only check the skipped |
1451 | !(cn->child[0])) | 1366 | * bits. Remember, since we have already indexed into the |
1452 | goto backtrace; | 1367 | * parent's child array, we know that the bits we chopped of |
1453 | } | 1368 | * *are* zero. |
1369 | */ | ||
1454 | 1370 | ||
1455 | /* | 1371 | /* NOTA BENE: CHECKING ONLY SKIPPED BITS FOR THE NEW NODE HERE */ |
1456 | * If chopped_off=0, the index is fully validated and we | ||
1457 | * only need to look at the skipped bits for this, the new, | ||
1458 | * tnode. What we actually want to do is to find out if | ||
1459 | * these skipped bits match our key perfectly, or if we will | ||
1460 | * have to count on finding a matching prefix further down, | ||
1461 | * because if we do, we would like to have some way of | ||
1462 | * verifying the existence of such a prefix at this point. | ||
1463 | */ | ||
1464 | 1372 | ||
1465 | /* The only thing we can do at this point is to verify that | 1373 | if (current_prefix_length < pos+bits) { |
1466 | * any such matching prefix can indeed be a prefix to our | 1374 | if (tkey_extract_bits(cn->key, current_prefix_length, |
1467 | * key, and if the bits in the node we are inspecting that | 1375 | cn->pos - current_prefix_length) != 0 || |
1468 | * do not match our key are not ZERO, this cannot be true. | 1376 | !(cn->child[0])) |
1469 | * Thus, find out where there is a mismatch (before cn->pos) | 1377 | goto backtrace; |
1470 | * and verify that all the mismatching bits are zero in the | 1378 | } |
1471 | * new tnode's key. | ||
1472 | */ | ||
1473 | 1379 | ||
1474 | /* Note: We aren't very concerned about the piece of the key | 1380 | /* |
1475 | * that precede pn->pos+pn->bits, since these have already been | 1381 | * If chopped_off=0, the index is fully validated and we |
1476 | * checked. The bits after cn->pos aren't checked since these are | 1382 | * only need to look at the skipped bits for this, the new, |
1477 | * by definition "unknown" at this point. Thus, what we want to | 1383 | * tnode. What we actually want to do is to find out if |
1478 | * see is if we are about to enter the "prefix matching" state, | 1384 | * these skipped bits match our key perfectly, or if we will |
1479 | * and in that case verify that the skipped bits that will prevail | 1385 | * have to count on finding a matching prefix further down, |
1480 | * throughout this subtree are zero, as they have to be if we are | 1386 | * because if we do, we would like to have some way of |
1481 | * to find a matching prefix. | 1387 | * verifying the existence of such a prefix at this point. |
1482 | */ | 1388 | */ |
1483 | 1389 | ||
1484 | node_prefix = MASK_PFX(cn->key, cn->pos); | 1390 | /* The only thing we can do at this point is to verify that |
1485 | key_prefix = MASK_PFX(key, cn->pos); | 1391 | * any such matching prefix can indeed be a prefix to our |
1486 | pref_mismatch = key_prefix^node_prefix; | 1392 | * key, and if the bits in the node we are inspecting that |
1487 | mp = 0; | 1393 | * do not match our key are not ZERO, this cannot be true. |
1394 | * Thus, find out where there is a mismatch (before cn->pos) | ||
1395 | * and verify that all the mismatching bits are zero in the | ||
1396 | * new tnode's key. | ||
1397 | */ | ||
1488 | 1398 | ||
1489 | /* In short: If skipped bits in this node do not match the search | 1399 | /* Note: We aren't very concerned about the piece of the key |
1490 | * key, enter the "prefix matching" state.directly. | 1400 | * that precede pn->pos+pn->bits, since these have already been |
1491 | */ | 1401 | * checked. The bits after cn->pos aren't checked since these are |
1492 | if (pref_mismatch) { | 1402 | * by definition "unknown" at this point. Thus, what we want to |
1493 | while (!(pref_mismatch & (1<<(KEYLENGTH-1)))) { | 1403 | * see is if we are about to enter the "prefix matching" state, |
1494 | mp++; | 1404 | * and in that case verify that the skipped bits that will prevail |
1495 | pref_mismatch = pref_mismatch <<1; | 1405 | * throughout this subtree are zero, as they have to be if we are |
1496 | } | 1406 | * to find a matching prefix. |
1497 | key_prefix = tkey_extract_bits(cn->key, mp, cn->pos-mp); | 1407 | */ |
1498 | 1408 | ||
1499 | if (key_prefix != 0) | 1409 | node_prefix = MASK_PFX(cn->key, cn->pos); |
1500 | goto backtrace; | 1410 | key_prefix = MASK_PFX(key, cn->pos); |
1501 | 1411 | pref_mismatch = key_prefix^node_prefix; | |
1502 | if (current_prefix_length >= cn->pos) | 1412 | mp = 0; |
1503 | current_prefix_length=mp; | 1413 | |
1504 | } | 1414 | /* In short: If skipped bits in this node do not match the search |
1505 | #endif | 1415 | * key, enter the "prefix matching" state.directly. |
1506 | pn = (struct tnode *)n; /* Descend */ | 1416 | */ |
1507 | chopped_off = 0; | 1417 | if (pref_mismatch) { |
1508 | continue; | 1418 | while (!(pref_mismatch & (1<<(KEYLENGTH-1)))) { |
1419 | mp++; | ||
1420 | pref_mismatch = pref_mismatch <<1; | ||
1421 | } | ||
1422 | key_prefix = tkey_extract_bits(cn->key, mp, cn->pos-mp); | ||
1423 | |||
1424 | if (key_prefix != 0) | ||
1425 | goto backtrace; | ||
1426 | |||
1427 | if (current_prefix_length >= cn->pos) | ||
1428 | current_prefix_length = mp; | ||
1509 | } | 1429 | } |
1510 | if (IS_LEAF(n)) { | 1430 | #endif |
1511 | if ((ret = check_leaf(t, (struct leaf *)n, key, &plen, flp, res)) <= 0) | 1431 | pn = (struct tnode *)n; /* Descend */ |
1512 | goto found; | 1432 | chopped_off = 0; |
1513 | } | 1433 | continue; |
1434 | |||
1514 | backtrace: | 1435 | backtrace: |
1515 | chopped_off++; | 1436 | chopped_off++; |
1516 | 1437 | ||
1517 | /* As zero don't change the child key (cindex) */ | 1438 | /* As zero don't change the child key (cindex) */ |
1518 | while ((chopped_off <= pn->bits) && !(cindex & (1<<(chopped_off-1)))) { | 1439 | while ((chopped_off <= pn->bits) && !(cindex & (1<<(chopped_off-1)))) |
1519 | chopped_off++; | 1440 | chopped_off++; |
1520 | } | ||
1521 | 1441 | ||
1522 | /* Decrease current_... with bits chopped off */ | 1442 | /* Decrease current_... with bits chopped off */ |
1523 | if (current_prefix_length > pn->pos + pn->bits - chopped_off) | 1443 | if (current_prefix_length > pn->pos + pn->bits - chopped_off) |
1524 | current_prefix_length = pn->pos + pn->bits - chopped_off; | 1444 | current_prefix_length = pn->pos + pn->bits - chopped_off; |
1525 | 1445 | ||
1526 | /* | 1446 | /* |
1527 | * Either we do the actual chop off according or if we have | 1447 | * Either we do the actual chop off according or if we have |
1528 | * chopped off all bits in this tnode walk up to our parent. | 1448 | * chopped off all bits in this tnode walk up to our parent. |
1529 | */ | 1449 | */ |
1530 | 1450 | ||
1531 | if (chopped_off <= pn->bits) | 1451 | if (chopped_off <= pn->bits) { |
1532 | cindex &= ~(1 << (chopped_off-1)); | 1452 | cindex &= ~(1 << (chopped_off-1)); |
1533 | else { | 1453 | } else { |
1534 | if (NODE_PARENT(pn) == NULL) | 1454 | if (NODE_PARENT(pn) == NULL) |
1535 | goto failed; | 1455 | goto failed; |
1536 | 1456 | ||
1537 | /* Get Child's index */ | 1457 | /* Get Child's index */ |
1538 | cindex = tkey_extract_bits(pn->key, NODE_PARENT(pn)->pos, NODE_PARENT(pn)->bits); | 1458 | cindex = tkey_extract_bits(pn->key, NODE_PARENT(pn)->pos, NODE_PARENT(pn)->bits); |
1539 | pn = NODE_PARENT(pn); | 1459 | pn = NODE_PARENT(pn); |
@@ -1548,10 +1468,11 @@ backtrace: | |||
1548 | failed: | 1468 | failed: |
1549 | ret = 1; | 1469 | ret = 1; |
1550 | found: | 1470 | found: |
1551 | read_unlock(&fib_lock); | 1471 | rcu_read_unlock(); |
1552 | return ret; | 1472 | return ret; |
1553 | } | 1473 | } |
1554 | 1474 | ||
1475 | /* only called from updater side */ | ||
1555 | static int trie_leaf_remove(struct trie *t, t_key key) | 1476 | static int trie_leaf_remove(struct trie *t, t_key key) |
1556 | { | 1477 | { |
1557 | t_key cindex; | 1478 | t_key cindex; |
@@ -1559,24 +1480,20 @@ static int trie_leaf_remove(struct trie *t, t_key key) | |||
1559 | struct node *n = t->trie; | 1480 | struct node *n = t->trie; |
1560 | struct leaf *l; | 1481 | struct leaf *l; |
1561 | 1482 | ||
1562 | if (trie_debug) | 1483 | pr_debug("entering trie_leaf_remove(%p)\n", n); |
1563 | printk("entering trie_leaf_remove(%p)\n", n); | ||
1564 | 1484 | ||
1565 | /* Note that in the case skipped bits, those bits are *not* checked! | 1485 | /* Note that in the case skipped bits, those bits are *not* checked! |
1566 | * When we finish this, we will have NULL or a T_LEAF, and the | 1486 | * When we finish this, we will have NULL or a T_LEAF, and the |
1567 | * T_LEAF may or may not match our key. | 1487 | * T_LEAF may or may not match our key. |
1568 | */ | 1488 | */ |
1569 | 1489 | ||
1570 | while (n != NULL && IS_TNODE(n)) { | 1490 | while (n != NULL && IS_TNODE(n)) { |
1571 | struct tnode *tn = (struct tnode *) n; | 1491 | struct tnode *tn = (struct tnode *) n; |
1572 | check_tnode(tn); | 1492 | check_tnode(tn); |
1573 | n = tnode_get_child(tn ,tkey_extract_bits(key, tn->pos, tn->bits)); | 1493 | n = tnode_get_child(tn ,tkey_extract_bits(key, tn->pos, tn->bits)); |
1574 | 1494 | ||
1575 | if (n && NODE_PARENT(n) != tn) { | 1495 | BUG_ON(n && NODE_PARENT(n) != tn); |
1576 | printk("BUG tn=%p, n->parent=%p\n", tn, NODE_PARENT(n)); | 1496 | } |
1577 | BUG(); | ||
1578 | } | ||
1579 | } | ||
1580 | l = (struct leaf *) n; | 1497 | l = (struct leaf *) n; |
1581 | 1498 | ||
1582 | if (!n || !tkey_equals(l->key, key)) | 1499 | if (!n || !tkey_equals(l->key, key)) |
@@ -1590,23 +1507,24 @@ static int trie_leaf_remove(struct trie *t, t_key key) | |||
1590 | t->revision++; | 1507 | t->revision++; |
1591 | t->size--; | 1508 | t->size--; |
1592 | 1509 | ||
1510 | preempt_disable(); | ||
1593 | tp = NODE_PARENT(n); | 1511 | tp = NODE_PARENT(n); |
1594 | tnode_free((struct tnode *) n); | 1512 | tnode_free((struct tnode *) n); |
1595 | 1513 | ||
1596 | if (tp) { | 1514 | if (tp) { |
1597 | cindex = tkey_extract_bits(key, tp->pos, tp->bits); | 1515 | cindex = tkey_extract_bits(key, tp->pos, tp->bits); |
1598 | put_child(t, (struct tnode *)tp, cindex, NULL); | 1516 | put_child(t, (struct tnode *)tp, cindex, NULL); |
1599 | t->trie = trie_rebalance(t, tp); | 1517 | rcu_assign_pointer(t->trie, trie_rebalance(t, tp)); |
1600 | } | 1518 | } else |
1601 | else | 1519 | rcu_assign_pointer(t->trie, NULL); |
1602 | t->trie = NULL; | 1520 | preempt_enable(); |
1603 | 1521 | ||
1604 | return 1; | 1522 | return 1; |
1605 | } | 1523 | } |
1606 | 1524 | ||
1607 | static int | 1525 | static int |
1608 | fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, | 1526 | fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, |
1609 | struct nlmsghdr *nlhdr, struct netlink_skb_parms *req) | 1527 | struct nlmsghdr *nlhdr, struct netlink_skb_parms *req) |
1610 | { | 1528 | { |
1611 | struct trie *t = (struct trie *) tb->tb_data; | 1529 | struct trie *t = (struct trie *) tb->tb_data; |
1612 | u32 key, mask; | 1530 | u32 key, mask; |
@@ -1615,6 +1533,8 @@ fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, | |||
1615 | struct fib_alias *fa, *fa_to_delete; | 1533 | struct fib_alias *fa, *fa_to_delete; |
1616 | struct list_head *fa_head; | 1534 | struct list_head *fa_head; |
1617 | struct leaf *l; | 1535 | struct leaf *l; |
1536 | struct leaf_info *li; | ||
1537 | |||
1618 | 1538 | ||
1619 | if (plen > 32) | 1539 | if (plen > 32) |
1620 | return -EINVAL; | 1540 | return -EINVAL; |
@@ -1624,7 +1544,7 @@ fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, | |||
1624 | memcpy(&key, rta->rta_dst, 4); | 1544 | memcpy(&key, rta->rta_dst, 4); |
1625 | 1545 | ||
1626 | key = ntohl(key); | 1546 | key = ntohl(key); |
1627 | mask = ntohl( inet_make_mask(plen) ); | 1547 | mask = ntohl(inet_make_mask(plen)); |
1628 | 1548 | ||
1629 | if (key & ~mask) | 1549 | if (key & ~mask) |
1630 | return -EINVAL; | 1550 | return -EINVAL; |
@@ -1641,11 +1561,11 @@ fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, | |||
1641 | if (!fa) | 1561 | if (!fa) |
1642 | return -ESRCH; | 1562 | return -ESRCH; |
1643 | 1563 | ||
1644 | if (trie_debug) | 1564 | pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t); |
1645 | printk("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t); | ||
1646 | 1565 | ||
1647 | fa_to_delete = NULL; | 1566 | fa_to_delete = NULL; |
1648 | fa_head = fa->fa_list.prev; | 1567 | fa_head = fa->fa_list.prev; |
1568 | |||
1649 | list_for_each_entry(fa, fa_head, fa_list) { | 1569 | list_for_each_entry(fa, fa_head, fa_list) { |
1650 | struct fib_info *fi = fa->fa_info; | 1570 | struct fib_info *fi = fa->fa_info; |
1651 | 1571 | ||
@@ -1664,39 +1584,31 @@ fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, | |||
1664 | } | 1584 | } |
1665 | } | 1585 | } |
1666 | 1586 | ||
1667 | if (fa_to_delete) { | 1587 | if (!fa_to_delete) |
1668 | int kill_li = 0; | 1588 | return -ESRCH; |
1669 | struct leaf_info *li; | ||
1670 | |||
1671 | fa = fa_to_delete; | ||
1672 | rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, nlhdr, req); | ||
1673 | 1589 | ||
1674 | l = fib_find_node(t, key); | 1590 | fa = fa_to_delete; |
1675 | li = find_leaf_info(&l->list, plen); | 1591 | rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, nlhdr, req); |
1676 | 1592 | ||
1677 | write_lock_bh(&fib_lock); | 1593 | l = fib_find_node(t, key); |
1594 | li = find_leaf_info(&l->list, plen); | ||
1678 | 1595 | ||
1679 | list_del(&fa->fa_list); | 1596 | list_del_rcu(&fa->fa_list); |
1680 | 1597 | ||
1681 | if (list_empty(fa_head)) { | 1598 | if (list_empty(fa_head)) { |
1682 | hlist_del(&li->hlist); | 1599 | hlist_del_rcu(&li->hlist); |
1683 | kill_li = 1; | 1600 | free_leaf_info(li); |
1684 | } | 1601 | } |
1685 | write_unlock_bh(&fib_lock); | ||
1686 | |||
1687 | if (kill_li) | ||
1688 | free_leaf_info(li); | ||
1689 | 1602 | ||
1690 | if (hlist_empty(&l->list)) | 1603 | if (hlist_empty(&l->list)) |
1691 | trie_leaf_remove(t, key); | 1604 | trie_leaf_remove(t, key); |
1692 | 1605 | ||
1693 | if (fa->fa_state & FA_S_ACCESSED) | 1606 | if (fa->fa_state & FA_S_ACCESSED) |
1694 | rt_cache_flush(-1); | 1607 | rt_cache_flush(-1); |
1695 | 1608 | ||
1696 | fn_free_alias(fa); | 1609 | fib_release_info(fa->fa_info); |
1697 | return 0; | 1610 | alias_free_mem_rcu(fa); |
1698 | } | 1611 | return 0; |
1699 | return -ESRCH; | ||
1700 | } | 1612 | } |
1701 | 1613 | ||
1702 | static int trie_flush_list(struct trie *t, struct list_head *head) | 1614 | static int trie_flush_list(struct trie *t, struct list_head *head) |
@@ -1706,14 +1618,11 @@ static int trie_flush_list(struct trie *t, struct list_head *head) | |||
1706 | 1618 | ||
1707 | list_for_each_entry_safe(fa, fa_node, head, fa_list) { | 1619 | list_for_each_entry_safe(fa, fa_node, head, fa_list) { |
1708 | struct fib_info *fi = fa->fa_info; | 1620 | struct fib_info *fi = fa->fa_info; |
1709 | |||
1710 | if (fi && (fi->fib_flags&RTNH_F_DEAD)) { | ||
1711 | |||
1712 | write_lock_bh(&fib_lock); | ||
1713 | list_del(&fa->fa_list); | ||
1714 | write_unlock_bh(&fib_lock); | ||
1715 | 1621 | ||
1716 | fn_free_alias(fa); | 1622 | if (fi && (fi->fib_flags & RTNH_F_DEAD)) { |
1623 | list_del_rcu(&fa->fa_list); | ||
1624 | fib_release_info(fa->fa_info); | ||
1625 | alias_free_mem_rcu(fa); | ||
1717 | found++; | 1626 | found++; |
1718 | } | 1627 | } |
1719 | } | 1628 | } |
@@ -1728,37 +1637,34 @@ static int trie_flush_leaf(struct trie *t, struct leaf *l) | |||
1728 | struct leaf_info *li = NULL; | 1637 | struct leaf_info *li = NULL; |
1729 | 1638 | ||
1730 | hlist_for_each_entry_safe(li, node, tmp, lih, hlist) { | 1639 | hlist_for_each_entry_safe(li, node, tmp, lih, hlist) { |
1731 | |||
1732 | found += trie_flush_list(t, &li->falh); | 1640 | found += trie_flush_list(t, &li->falh); |
1733 | 1641 | ||
1734 | if (list_empty(&li->falh)) { | 1642 | if (list_empty(&li->falh)) { |
1735 | 1643 | hlist_del_rcu(&li->hlist); | |
1736 | write_lock_bh(&fib_lock); | ||
1737 | hlist_del(&li->hlist); | ||
1738 | write_unlock_bh(&fib_lock); | ||
1739 | |||
1740 | free_leaf_info(li); | 1644 | free_leaf_info(li); |
1741 | } | 1645 | } |
1742 | } | 1646 | } |
1743 | return found; | 1647 | return found; |
1744 | } | 1648 | } |
1745 | 1649 | ||
1650 | /* rcu_read_lock needs to be hold by caller from readside */ | ||
1651 | |||
1746 | static struct leaf *nextleaf(struct trie *t, struct leaf *thisleaf) | 1652 | static struct leaf *nextleaf(struct trie *t, struct leaf *thisleaf) |
1747 | { | 1653 | { |
1748 | struct node *c = (struct node *) thisleaf; | 1654 | struct node *c = (struct node *) thisleaf; |
1749 | struct tnode *p; | 1655 | struct tnode *p; |
1750 | int idx; | 1656 | int idx; |
1657 | struct node *trie = rcu_dereference(t->trie); | ||
1751 | 1658 | ||
1752 | if (c == NULL) { | 1659 | if (c == NULL) { |
1753 | if (t->trie == NULL) | 1660 | if (trie == NULL) |
1754 | return NULL; | 1661 | return NULL; |
1755 | 1662 | ||
1756 | if (IS_LEAF(t->trie)) /* trie w. just a leaf */ | 1663 | if (IS_LEAF(trie)) /* trie w. just a leaf */ |
1757 | return (struct leaf *) t->trie; | 1664 | return (struct leaf *) trie; |
1758 | 1665 | ||
1759 | p = (struct tnode*) t->trie; /* Start */ | 1666 | p = (struct tnode*) trie; /* Start */ |
1760 | } | 1667 | } else |
1761 | else | ||
1762 | p = (struct tnode *) NODE_PARENT(c); | 1668 | p = (struct tnode *) NODE_PARENT(c); |
1763 | 1669 | ||
1764 | while (p) { | 1670 | while (p) { |
@@ -1771,29 +1677,31 @@ static struct leaf *nextleaf(struct trie *t, struct leaf *thisleaf) | |||
1771 | pos = 0; | 1677 | pos = 0; |
1772 | 1678 | ||
1773 | last = 1 << p->bits; | 1679 | last = 1 << p->bits; |
1774 | for(idx = pos; idx < last ; idx++) { | 1680 | for (idx = pos; idx < last ; idx++) { |
1775 | if (p->child[idx]) { | 1681 | c = rcu_dereference(p->child[idx]); |
1776 | 1682 | ||
1777 | /* Decend if tnode */ | 1683 | if (!c) |
1778 | 1684 | continue; | |
1779 | while (IS_TNODE(p->child[idx])) { | 1685 | |
1780 | p = (struct tnode*) p->child[idx]; | 1686 | /* Decend if tnode */ |
1781 | idx = 0; | 1687 | while (IS_TNODE(c)) { |
1782 | 1688 | p = (struct tnode *) c; | |
1783 | /* Rightmost non-NULL branch */ | 1689 | idx = 0; |
1784 | if (p && IS_TNODE(p)) | 1690 | |
1785 | while (p->child[idx] == NULL && idx < (1 << p->bits)) idx++; | 1691 | /* Rightmost non-NULL branch */ |
1786 | 1692 | if (p && IS_TNODE(p)) | |
1787 | /* Done with this tnode? */ | 1693 | while (!(c = rcu_dereference(p->child[idx])) |
1788 | if (idx >= (1 << p->bits) || p->child[idx] == NULL ) | 1694 | && idx < (1<<p->bits)) idx++; |
1789 | goto up; | 1695 | |
1790 | } | 1696 | /* Done with this tnode? */ |
1791 | return (struct leaf*) p->child[idx]; | 1697 | if (idx >= (1 << p->bits) || !c) |
1698 | goto up; | ||
1792 | } | 1699 | } |
1700 | return (struct leaf *) c; | ||
1793 | } | 1701 | } |
1794 | up: | 1702 | up: |
1795 | /* No more children go up one step */ | 1703 | /* No more children go up one step */ |
1796 | c = (struct node*) p; | 1704 | c = (struct node *) p; |
1797 | p = (struct tnode *) NODE_PARENT(p); | 1705 | p = (struct tnode *) NODE_PARENT(p); |
1798 | } | 1706 | } |
1799 | return NULL; /* Ready. Root of trie */ | 1707 | return NULL; /* Ready. Root of trie */ |
@@ -1807,23 +1715,24 @@ static int fn_trie_flush(struct fib_table *tb) | |||
1807 | 1715 | ||
1808 | t->revision++; | 1716 | t->revision++; |
1809 | 1717 | ||
1810 | for (h=0; (l = nextleaf(t, l)) != NULL; h++) { | 1718 | rcu_read_lock(); |
1719 | for (h = 0; (l = nextleaf(t, l)) != NULL; h++) { | ||
1811 | found += trie_flush_leaf(t, l); | 1720 | found += trie_flush_leaf(t, l); |
1812 | 1721 | ||
1813 | if (ll && hlist_empty(&ll->list)) | 1722 | if (ll && hlist_empty(&ll->list)) |
1814 | trie_leaf_remove(t, ll->key); | 1723 | trie_leaf_remove(t, ll->key); |
1815 | ll = l; | 1724 | ll = l; |
1816 | } | 1725 | } |
1726 | rcu_read_unlock(); | ||
1817 | 1727 | ||
1818 | if (ll && hlist_empty(&ll->list)) | 1728 | if (ll && hlist_empty(&ll->list)) |
1819 | trie_leaf_remove(t, ll->key); | 1729 | trie_leaf_remove(t, ll->key); |
1820 | 1730 | ||
1821 | if (trie_debug) | 1731 | pr_debug("trie_flush found=%d\n", found); |
1822 | printk("trie_flush found=%d\n", found); | ||
1823 | return found; | 1732 | return found; |
1824 | } | 1733 | } |
1825 | 1734 | ||
1826 | static int trie_last_dflt=-1; | 1735 | static int trie_last_dflt = -1; |
1827 | 1736 | ||
1828 | static void | 1737 | static void |
1829 | fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res) | 1738 | fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res) |
@@ -1840,7 +1749,7 @@ fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib | |||
1840 | last_resort = NULL; | 1749 | last_resort = NULL; |
1841 | order = -1; | 1750 | order = -1; |
1842 | 1751 | ||
1843 | read_lock(&fib_lock); | 1752 | rcu_read_lock(); |
1844 | 1753 | ||
1845 | l = fib_find_node(t, 0); | 1754 | l = fib_find_node(t, 0); |
1846 | if (!l) | 1755 | if (!l) |
@@ -1853,20 +1762,20 @@ fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib | |||
1853 | if (list_empty(fa_head)) | 1762 | if (list_empty(fa_head)) |
1854 | goto out; | 1763 | goto out; |
1855 | 1764 | ||
1856 | list_for_each_entry(fa, fa_head, fa_list) { | 1765 | list_for_each_entry_rcu(fa, fa_head, fa_list) { |
1857 | struct fib_info *next_fi = fa->fa_info; | 1766 | struct fib_info *next_fi = fa->fa_info; |
1858 | 1767 | ||
1859 | if (fa->fa_scope != res->scope || | 1768 | if (fa->fa_scope != res->scope || |
1860 | fa->fa_type != RTN_UNICAST) | 1769 | fa->fa_type != RTN_UNICAST) |
1861 | continue; | 1770 | continue; |
1862 | 1771 | ||
1863 | if (next_fi->fib_priority > res->fi->fib_priority) | 1772 | if (next_fi->fib_priority > res->fi->fib_priority) |
1864 | break; | 1773 | break; |
1865 | if (!next_fi->fib_nh[0].nh_gw || | 1774 | if (!next_fi->fib_nh[0].nh_gw || |
1866 | next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK) | 1775 | next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK) |
1867 | continue; | 1776 | continue; |
1868 | fa->fa_state |= FA_S_ACCESSED; | 1777 | fa->fa_state |= FA_S_ACCESSED; |
1869 | 1778 | ||
1870 | if (fi == NULL) { | 1779 | if (fi == NULL) { |
1871 | if (next_fi != res->fi) | 1780 | if (next_fi != res->fi) |
1872 | break; | 1781 | break; |
@@ -1904,7 +1813,7 @@ fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib | |||
1904 | } | 1813 | } |
1905 | trie_last_dflt = last_idx; | 1814 | trie_last_dflt = last_idx; |
1906 | out:; | 1815 | out:; |
1907 | read_unlock(&fib_lock); | 1816 | rcu_read_unlock(); |
1908 | } | 1817 | } |
1909 | 1818 | ||
1910 | static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fib_table *tb, | 1819 | static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fib_table *tb, |
@@ -1913,12 +1822,14 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fi | |||
1913 | int i, s_i; | 1822 | int i, s_i; |
1914 | struct fib_alias *fa; | 1823 | struct fib_alias *fa; |
1915 | 1824 | ||
1916 | u32 xkey=htonl(key); | 1825 | u32 xkey = htonl(key); |
1917 | 1826 | ||
1918 | s_i=cb->args[3]; | 1827 | s_i = cb->args[3]; |
1919 | i = 0; | 1828 | i = 0; |
1920 | 1829 | ||
1921 | list_for_each_entry(fa, fah, fa_list) { | 1830 | /* rcu_read_lock is hold by caller */ |
1831 | |||
1832 | list_for_each_entry_rcu(fa, fah, fa_list) { | ||
1922 | if (i < s_i) { | 1833 | if (i < s_i) { |
1923 | i++; | 1834 | i++; |
1924 | continue; | 1835 | continue; |
@@ -1946,10 +1857,10 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fi | |||
1946 | fa->fa_info, 0) < 0) { | 1857 | fa->fa_info, 0) < 0) { |
1947 | cb->args[3] = i; | 1858 | cb->args[3] = i; |
1948 | return -1; | 1859 | return -1; |
1949 | } | 1860 | } |
1950 | i++; | 1861 | i++; |
1951 | } | 1862 | } |
1952 | cb->args[3]=i; | 1863 | cb->args[3] = i; |
1953 | return skb->len; | 1864 | return skb->len; |
1954 | } | 1865 | } |
1955 | 1866 | ||
@@ -1959,10 +1870,10 @@ static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, str | |||
1959 | int h, s_h; | 1870 | int h, s_h; |
1960 | struct list_head *fa_head; | 1871 | struct list_head *fa_head; |
1961 | struct leaf *l = NULL; | 1872 | struct leaf *l = NULL; |
1962 | s_h=cb->args[2]; | ||
1963 | 1873 | ||
1964 | for (h=0; (l = nextleaf(t, l)) != NULL; h++) { | 1874 | s_h = cb->args[2]; |
1965 | 1875 | ||
1876 | for (h = 0; (l = nextleaf(t, l)) != NULL; h++) { | ||
1966 | if (h < s_h) | 1877 | if (h < s_h) |
1967 | continue; | 1878 | continue; |
1968 | if (h > s_h) | 1879 | if (h > s_h) |
@@ -1970,7 +1881,7 @@ static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, str | |||
1970 | sizeof(cb->args) - 3*sizeof(cb->args[0])); | 1881 | sizeof(cb->args) - 3*sizeof(cb->args[0])); |
1971 | 1882 | ||
1972 | fa_head = get_fa_head(l, plen); | 1883 | fa_head = get_fa_head(l, plen); |
1973 | 1884 | ||
1974 | if (!fa_head) | 1885 | if (!fa_head) |
1975 | continue; | 1886 | continue; |
1976 | 1887 | ||
@@ -1978,11 +1889,11 @@ static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, str | |||
1978 | continue; | 1889 | continue; |
1979 | 1890 | ||
1980 | if (fn_trie_dump_fa(l->key, plen, fa_head, tb, skb, cb)<0) { | 1891 | if (fn_trie_dump_fa(l->key, plen, fa_head, tb, skb, cb)<0) { |
1981 | cb->args[2]=h; | 1892 | cb->args[2] = h; |
1982 | return -1; | 1893 | return -1; |
1983 | } | 1894 | } |
1984 | } | 1895 | } |
1985 | cb->args[2]=h; | 1896 | cb->args[2] = h; |
1986 | return skb->len; | 1897 | return skb->len; |
1987 | } | 1898 | } |
1988 | 1899 | ||
@@ -1993,25 +1904,24 @@ static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb, struct netlin | |||
1993 | 1904 | ||
1994 | s_m = cb->args[1]; | 1905 | s_m = cb->args[1]; |
1995 | 1906 | ||
1996 | read_lock(&fib_lock); | 1907 | rcu_read_lock(); |
1997 | for (m=0; m<=32; m++) { | 1908 | for (m = 0; m <= 32; m++) { |
1998 | |||
1999 | if (m < s_m) | 1909 | if (m < s_m) |
2000 | continue; | 1910 | continue; |
2001 | if (m > s_m) | 1911 | if (m > s_m) |
2002 | memset(&cb->args[2], 0, | 1912 | memset(&cb->args[2], 0, |
2003 | sizeof(cb->args) - 2*sizeof(cb->args[0])); | 1913 | sizeof(cb->args) - 2*sizeof(cb->args[0])); |
2004 | 1914 | ||
2005 | if (fn_trie_dump_plen(t, 32-m, tb, skb, cb)<0) { | 1915 | if (fn_trie_dump_plen(t, 32-m, tb, skb, cb)<0) { |
2006 | cb->args[1] = m; | 1916 | cb->args[1] = m; |
2007 | goto out; | 1917 | goto out; |
2008 | } | 1918 | } |
2009 | } | 1919 | } |
2010 | read_unlock(&fib_lock); | 1920 | rcu_read_unlock(); |
2011 | cb->args[1] = m; | 1921 | cb->args[1] = m; |
2012 | return skb->len; | 1922 | return skb->len; |
2013 | out: | 1923 | out: |
2014 | read_unlock(&fib_lock); | 1924 | rcu_read_unlock(); |
2015 | return -1; | 1925 | return -1; |
2016 | } | 1926 | } |
2017 | 1927 | ||
@@ -2051,9 +1961,9 @@ struct fib_table * __init fib_hash_init(int id) | |||
2051 | trie_init(t); | 1961 | trie_init(t); |
2052 | 1962 | ||
2053 | if (id == RT_TABLE_LOCAL) | 1963 | if (id == RT_TABLE_LOCAL) |
2054 | trie_local = t; | 1964 | trie_local = t; |
2055 | else if (id == RT_TABLE_MAIN) | 1965 | else if (id == RT_TABLE_MAIN) |
2056 | trie_main = t; | 1966 | trie_main = t; |
2057 | 1967 | ||
2058 | if (id == RT_TABLE_LOCAL) | 1968 | if (id == RT_TABLE_LOCAL) |
2059 | printk("IPv4 FIB: Using LC-trie version %s\n", VERSION); | 1969 | printk("IPv4 FIB: Using LC-trie version %s\n", VERSION); |
@@ -2065,7 +1975,8 @@ struct fib_table * __init fib_hash_init(int id) | |||
2065 | 1975 | ||
2066 | static void putspace_seq(struct seq_file *seq, int n) | 1976 | static void putspace_seq(struct seq_file *seq, int n) |
2067 | { | 1977 | { |
2068 | while (n--) seq_printf(seq, " "); | 1978 | while (n--) |
1979 | seq_printf(seq, " "); | ||
2069 | } | 1980 | } |
2070 | 1981 | ||
2071 | static void printbin_seq(struct seq_file *seq, unsigned int v, int bits) | 1982 | static void printbin_seq(struct seq_file *seq, unsigned int v, int bits) |
@@ -2086,29 +1997,22 @@ static void printnode_seq(struct seq_file *seq, int indent, struct node *n, | |||
2086 | seq_printf(seq, "%d/", cindex); | 1997 | seq_printf(seq, "%d/", cindex); |
2087 | printbin_seq(seq, cindex, bits); | 1998 | printbin_seq(seq, cindex, bits); |
2088 | seq_printf(seq, ": "); | 1999 | seq_printf(seq, ": "); |
2089 | } | 2000 | } else |
2090 | else | ||
2091 | seq_printf(seq, "<root>: "); | 2001 | seq_printf(seq, "<root>: "); |
2092 | seq_printf(seq, "%s:%p ", IS_LEAF(n)?"Leaf":"Internal node", n); | 2002 | seq_printf(seq, "%s:%p ", IS_LEAF(n)?"Leaf":"Internal node", n); |
2093 | 2003 | ||
2094 | if (IS_LEAF(n)) | ||
2095 | seq_printf(seq, "key=%d.%d.%d.%d\n", | ||
2096 | n->key >> 24, (n->key >> 16) % 256, (n->key >> 8) % 256, n->key % 256); | ||
2097 | else { | ||
2098 | int plen = ((struct tnode *)n)->pos; | ||
2099 | t_key prf=MASK_PFX(n->key, plen); | ||
2100 | seq_printf(seq, "key=%d.%d.%d.%d/%d\n", | ||
2101 | prf >> 24, (prf >> 16) % 256, (prf >> 8) % 256, prf % 256, plen); | ||
2102 | } | ||
2103 | if (IS_LEAF(n)) { | 2004 | if (IS_LEAF(n)) { |
2104 | struct leaf *l=(struct leaf *)n; | 2005 | struct leaf *l = (struct leaf *)n; |
2105 | struct fib_alias *fa; | 2006 | struct fib_alias *fa; |
2106 | int i; | 2007 | int i; |
2107 | for (i=32; i>=0; i--) | 2008 | |
2108 | if (find_leaf_info(&l->list, i)) { | 2009 | seq_printf(seq, "key=%d.%d.%d.%d\n", |
2109 | 2010 | n->key >> 24, (n->key >> 16) % 256, (n->key >> 8) % 256, n->key % 256); | |
2011 | |||
2012 | for (i = 32; i >= 0; i--) | ||
2013 | if (find_leaf_info(&l->list, i)) { | ||
2110 | struct list_head *fa_head = get_fa_head(l, i); | 2014 | struct list_head *fa_head = get_fa_head(l, i); |
2111 | 2015 | ||
2112 | if (!fa_head) | 2016 | if (!fa_head) |
2113 | continue; | 2017 | continue; |
2114 | 2018 | ||
@@ -2118,17 +2022,16 @@ static void printnode_seq(struct seq_file *seq, int indent, struct node *n, | |||
2118 | putspace_seq(seq, indent+2); | 2022 | putspace_seq(seq, indent+2); |
2119 | seq_printf(seq, "{/%d...dumping}\n", i); | 2023 | seq_printf(seq, "{/%d...dumping}\n", i); |
2120 | 2024 | ||
2121 | 2025 | list_for_each_entry_rcu(fa, fa_head, fa_list) { | |
2122 | list_for_each_entry(fa, fa_head, fa_list) { | ||
2123 | putspace_seq(seq, indent+2); | 2026 | putspace_seq(seq, indent+2); |
2124 | if (fa->fa_info->fib_nh == NULL) { | ||
2125 | seq_printf(seq, "Error _fib_nh=NULL\n"); | ||
2126 | continue; | ||
2127 | } | ||
2128 | if (fa->fa_info == NULL) { | 2027 | if (fa->fa_info == NULL) { |
2129 | seq_printf(seq, "Error fa_info=NULL\n"); | 2028 | seq_printf(seq, "Error fa_info=NULL\n"); |
2130 | continue; | 2029 | continue; |
2131 | } | 2030 | } |
2031 | if (fa->fa_info->fib_nh == NULL) { | ||
2032 | seq_printf(seq, "Error _fib_nh=NULL\n"); | ||
2033 | continue; | ||
2034 | } | ||
2132 | 2035 | ||
2133 | seq_printf(seq, "{type=%d scope=%d TOS=%d}\n", | 2036 | seq_printf(seq, "{type=%d scope=%d TOS=%d}\n", |
2134 | fa->fa_type, | 2037 | fa->fa_type, |
@@ -2136,11 +2039,16 @@ static void printnode_seq(struct seq_file *seq, int indent, struct node *n, | |||
2136 | fa->fa_tos); | 2039 | fa->fa_tos); |
2137 | } | 2040 | } |
2138 | } | 2041 | } |
2139 | } | 2042 | } else { |
2140 | else if (IS_TNODE(n)) { | ||
2141 | struct tnode *tn = (struct tnode *)n; | 2043 | struct tnode *tn = (struct tnode *)n; |
2044 | int plen = ((struct tnode *)n)->pos; | ||
2045 | t_key prf = MASK_PFX(n->key, plen); | ||
2046 | |||
2047 | seq_printf(seq, "key=%d.%d.%d.%d/%d\n", | ||
2048 | prf >> 24, (prf >> 16) % 256, (prf >> 8) % 256, prf % 256, plen); | ||
2049 | |||
2142 | putspace_seq(seq, indent); seq_printf(seq, "| "); | 2050 | putspace_seq(seq, indent); seq_printf(seq, "| "); |
2143 | seq_printf(seq, "{key prefix=%08x/", tn->key&TKEY_GET_MASK(0, tn->pos)); | 2051 | seq_printf(seq, "{key prefix=%08x/", tn->key & TKEY_GET_MASK(0, tn->pos)); |
2144 | printbin_seq(seq, tkey_extract_bits(tn->key, 0, tn->pos), tn->pos); | 2052 | printbin_seq(seq, tkey_extract_bits(tn->key, 0, tn->pos), tn->pos); |
2145 | seq_printf(seq, "}\n"); | 2053 | seq_printf(seq, "}\n"); |
2146 | putspace_seq(seq, indent); seq_printf(seq, "| "); | 2054 | putspace_seq(seq, indent); seq_printf(seq, "| "); |
@@ -2154,194 +2062,196 @@ static void printnode_seq(struct seq_file *seq, int indent, struct node *n, | |||
2154 | 2062 | ||
2155 | static void trie_dump_seq(struct seq_file *seq, struct trie *t) | 2063 | static void trie_dump_seq(struct seq_file *seq, struct trie *t) |
2156 | { | 2064 | { |
2157 | struct node *n = t->trie; | 2065 | struct node *n; |
2158 | int cindex=0; | 2066 | int cindex = 0; |
2159 | int indent=1; | 2067 | int indent = 1; |
2160 | int pend=0; | 2068 | int pend = 0; |
2161 | int depth = 0; | 2069 | int depth = 0; |
2070 | struct tnode *tn; | ||
2162 | 2071 | ||
2163 | read_lock(&fib_lock); | 2072 | rcu_read_lock(); |
2164 | 2073 | n = rcu_dereference(t->trie); | |
2165 | seq_printf(seq, "------ trie_dump of t=%p ------\n", t); | 2074 | seq_printf(seq, "------ trie_dump of t=%p ------\n", t); |
2166 | if (n) { | ||
2167 | printnode_seq(seq, indent, n, pend, cindex, 0); | ||
2168 | if (IS_TNODE(n)) { | ||
2169 | struct tnode *tn = (struct tnode *)n; | ||
2170 | pend = tn->pos+tn->bits; | ||
2171 | putspace_seq(seq, indent); seq_printf(seq, "\\--\n"); | ||
2172 | indent += 3; | ||
2173 | depth++; | ||
2174 | |||
2175 | while (tn && cindex < (1 << tn->bits)) { | ||
2176 | if (tn->child[cindex]) { | ||
2177 | |||
2178 | /* Got a child */ | ||
2179 | |||
2180 | printnode_seq(seq, indent, tn->child[cindex], pend, cindex, tn->bits); | ||
2181 | if (IS_LEAF(tn->child[cindex])) { | ||
2182 | cindex++; | ||
2183 | |||
2184 | } | ||
2185 | else { | ||
2186 | /* | ||
2187 | * New tnode. Decend one level | ||
2188 | */ | ||
2189 | |||
2190 | depth++; | ||
2191 | n = tn->child[cindex]; | ||
2192 | tn = (struct tnode *)n; | ||
2193 | pend = tn->pos+tn->bits; | ||
2194 | putspace_seq(seq, indent); seq_printf(seq, "\\--\n"); | ||
2195 | indent+=3; | ||
2196 | cindex=0; | ||
2197 | } | ||
2198 | } | ||
2199 | else | ||
2200 | cindex++; | ||
2201 | 2075 | ||
2076 | if (!n) { | ||
2077 | seq_printf(seq, "------ trie is empty\n"); | ||
2078 | |||
2079 | rcu_read_unlock(); | ||
2080 | return; | ||
2081 | } | ||
2082 | |||
2083 | printnode_seq(seq, indent, n, pend, cindex, 0); | ||
2084 | |||
2085 | if (!IS_TNODE(n)) { | ||
2086 | rcu_read_unlock(); | ||
2087 | return; | ||
2088 | } | ||
2089 | |||
2090 | tn = (struct tnode *)n; | ||
2091 | pend = tn->pos+tn->bits; | ||
2092 | putspace_seq(seq, indent); seq_printf(seq, "\\--\n"); | ||
2093 | indent += 3; | ||
2094 | depth++; | ||
2095 | |||
2096 | while (tn && cindex < (1 << tn->bits)) { | ||
2097 | struct node *child = rcu_dereference(tn->child[cindex]); | ||
2098 | if (!child) | ||
2099 | cindex++; | ||
2100 | else { | ||
2101 | /* Got a child */ | ||
2102 | printnode_seq(seq, indent, child, pend, | ||
2103 | cindex, tn->bits); | ||
2104 | |||
2105 | if (IS_LEAF(child)) | ||
2106 | cindex++; | ||
2107 | |||
2108 | else { | ||
2202 | /* | 2109 | /* |
2203 | * Test if we are done | 2110 | * New tnode. Decend one level |
2204 | */ | 2111 | */ |
2205 | |||
2206 | while (cindex >= (1 << tn->bits)) { | ||
2207 | 2112 | ||
2208 | /* | 2113 | depth++; |
2209 | * Move upwards and test for root | 2114 | n = child; |
2210 | * pop off all traversed nodes | 2115 | tn = (struct tnode *)n; |
2211 | */ | 2116 | pend = tn->pos+tn->bits; |
2212 | 2117 | putspace_seq(seq, indent); | |
2213 | if (NODE_PARENT(tn) == NULL) { | 2118 | seq_printf(seq, "\\--\n"); |
2214 | tn = NULL; | 2119 | indent += 3; |
2215 | n = NULL; | 2120 | cindex = 0; |
2216 | break; | ||
2217 | } | ||
2218 | else { | ||
2219 | cindex = tkey_extract_bits(tn->key, NODE_PARENT(tn)->pos, NODE_PARENT(tn)->bits); | ||
2220 | tn = NODE_PARENT(tn); | ||
2221 | cindex++; | ||
2222 | n = (struct node *)tn; | ||
2223 | pend = tn->pos+tn->bits; | ||
2224 | indent-=3; | ||
2225 | depth--; | ||
2226 | } | ||
2227 | } | ||
2228 | } | 2121 | } |
2229 | } | 2122 | } |
2230 | else n = NULL; | ||
2231 | } | ||
2232 | else seq_printf(seq, "------ trie is empty\n"); | ||
2233 | 2123 | ||
2234 | read_unlock(&fib_lock); | 2124 | /* |
2125 | * Test if we are done | ||
2126 | */ | ||
2127 | |||
2128 | while (cindex >= (1 << tn->bits)) { | ||
2129 | /* | ||
2130 | * Move upwards and test for root | ||
2131 | * pop off all traversed nodes | ||
2132 | */ | ||
2133 | |||
2134 | if (NODE_PARENT(tn) == NULL) { | ||
2135 | tn = NULL; | ||
2136 | break; | ||
2137 | } | ||
2138 | |||
2139 | cindex = tkey_extract_bits(tn->key, NODE_PARENT(tn)->pos, NODE_PARENT(tn)->bits); | ||
2140 | cindex++; | ||
2141 | tn = NODE_PARENT(tn); | ||
2142 | pend = tn->pos + tn->bits; | ||
2143 | indent -= 3; | ||
2144 | depth--; | ||
2145 | } | ||
2146 | } | ||
2147 | rcu_read_unlock(); | ||
2235 | } | 2148 | } |
2236 | 2149 | ||
2237 | static struct trie_stat *trie_stat_new(void) | 2150 | static struct trie_stat *trie_stat_new(void) |
2238 | { | 2151 | { |
2239 | struct trie_stat *s = kmalloc(sizeof(struct trie_stat), GFP_KERNEL); | 2152 | struct trie_stat *s; |
2240 | int i; | 2153 | int i; |
2241 | 2154 | ||
2242 | if (s) { | 2155 | s = kmalloc(sizeof(struct trie_stat), GFP_KERNEL); |
2243 | s->totdepth = 0; | 2156 | if (!s) |
2244 | s->maxdepth = 0; | 2157 | return NULL; |
2245 | s->tnodes = 0; | 2158 | |
2246 | s->leaves = 0; | 2159 | s->totdepth = 0; |
2247 | s->nullpointers = 0; | 2160 | s->maxdepth = 0; |
2248 | 2161 | s->tnodes = 0; | |
2249 | for(i=0; i< MAX_CHILDS; i++) | 2162 | s->leaves = 0; |
2250 | s->nodesizes[i] = 0; | 2163 | s->nullpointers = 0; |
2251 | } | 2164 | |
2165 | for (i = 0; i < MAX_CHILDS; i++) | ||
2166 | s->nodesizes[i] = 0; | ||
2167 | |||
2252 | return s; | 2168 | return s; |
2253 | } | 2169 | } |
2254 | 2170 | ||
2255 | static struct trie_stat *trie_collect_stats(struct trie *t) | 2171 | static struct trie_stat *trie_collect_stats(struct trie *t) |
2256 | { | 2172 | { |
2257 | struct node *n = t->trie; | 2173 | struct node *n; |
2258 | struct trie_stat *s = trie_stat_new(); | 2174 | struct trie_stat *s = trie_stat_new(); |
2259 | int cindex = 0; | 2175 | int cindex = 0; |
2260 | int indent = 1; | ||
2261 | int pend = 0; | 2176 | int pend = 0; |
2262 | int depth = 0; | 2177 | int depth = 0; |
2263 | 2178 | ||
2264 | read_lock(&fib_lock); | 2179 | if (!s) |
2180 | return NULL; | ||
2265 | 2181 | ||
2266 | if (s) { | 2182 | rcu_read_lock(); |
2267 | if (n) { | 2183 | n = rcu_dereference(t->trie); |
2268 | if (IS_TNODE(n)) { | ||
2269 | struct tnode *tn = (struct tnode *)n; | ||
2270 | pend = tn->pos+tn->bits; | ||
2271 | indent += 3; | ||
2272 | s->nodesizes[tn->bits]++; | ||
2273 | depth++; | ||
2274 | 2184 | ||
2275 | while (tn && cindex < (1 << tn->bits)) { | 2185 | if (!n) |
2276 | if (tn->child[cindex]) { | 2186 | return s; |
2277 | /* Got a child */ | 2187 | |
2278 | 2188 | if (IS_TNODE(n)) { | |
2279 | if (IS_LEAF(tn->child[cindex])) { | 2189 | struct tnode *tn = (struct tnode *)n; |
2280 | cindex++; | 2190 | pend = tn->pos+tn->bits; |
2281 | 2191 | s->nodesizes[tn->bits]++; | |
2282 | /* stats */ | 2192 | depth++; |
2283 | if (depth > s->maxdepth) | 2193 | |
2284 | s->maxdepth = depth; | 2194 | while (tn && cindex < (1 << tn->bits)) { |
2285 | s->totdepth += depth; | 2195 | struct node *ch = rcu_dereference(tn->child[cindex]); |
2286 | s->leaves++; | 2196 | if (ch) { |
2287 | } | ||
2288 | |||
2289 | else { | ||
2290 | /* | ||
2291 | * New tnode. Decend one level | ||
2292 | */ | ||
2293 | |||
2294 | s->tnodes++; | ||
2295 | s->nodesizes[tn->bits]++; | ||
2296 | depth++; | ||
2297 | |||
2298 | n = tn->child[cindex]; | ||
2299 | tn = (struct tnode *)n; | ||
2300 | pend = tn->pos+tn->bits; | ||
2301 | |||
2302 | indent += 3; | ||
2303 | cindex = 0; | ||
2304 | } | ||
2305 | } | ||
2306 | else { | ||
2307 | cindex++; | ||
2308 | s->nullpointers++; | ||
2309 | } | ||
2310 | 2197 | ||
2198 | /* Got a child */ | ||
2199 | |||
2200 | if (IS_LEAF(tn->child[cindex])) { | ||
2201 | cindex++; | ||
2202 | |||
2203 | /* stats */ | ||
2204 | if (depth > s->maxdepth) | ||
2205 | s->maxdepth = depth; | ||
2206 | s->totdepth += depth; | ||
2207 | s->leaves++; | ||
2208 | } else { | ||
2311 | /* | 2209 | /* |
2312 | * Test if we are done | 2210 | * New tnode. Decend one level |
2313 | */ | 2211 | */ |
2314 | 2212 | ||
2315 | while (cindex >= (1 << tn->bits)) { | 2213 | s->tnodes++; |
2316 | 2214 | s->nodesizes[tn->bits]++; | |
2317 | /* | 2215 | depth++; |
2318 | * Move upwards and test for root | 2216 | |
2319 | * pop off all traversed nodes | 2217 | n = ch; |
2320 | */ | 2218 | tn = (struct tnode *)n; |
2321 | 2219 | pend = tn->pos+tn->bits; | |
2322 | 2220 | ||
2323 | if (NODE_PARENT(tn) == NULL) { | 2221 | cindex = 0; |
2324 | tn = NULL; | ||
2325 | n = NULL; | ||
2326 | break; | ||
2327 | } | ||
2328 | else { | ||
2329 | cindex = tkey_extract_bits(tn->key, NODE_PARENT(tn)->pos, NODE_PARENT(tn)->bits); | ||
2330 | tn = NODE_PARENT(tn); | ||
2331 | cindex++; | ||
2332 | n = (struct node *)tn; | ||
2333 | pend = tn->pos+tn->bits; | ||
2334 | indent -= 3; | ||
2335 | depth--; | ||
2336 | } | ||
2337 | } | ||
2338 | } | 2222 | } |
2223 | } else { | ||
2224 | cindex++; | ||
2225 | s->nullpointers++; | ||
2339 | } | 2226 | } |
2340 | else n = NULL; | 2227 | |
2228 | /* | ||
2229 | * Test if we are done | ||
2230 | */ | ||
2231 | |||
2232 | while (cindex >= (1 << tn->bits)) { | ||
2233 | /* | ||
2234 | * Move upwards and test for root | ||
2235 | * pop off all traversed nodes | ||
2236 | */ | ||
2237 | |||
2238 | if (NODE_PARENT(tn) == NULL) { | ||
2239 | tn = NULL; | ||
2240 | n = NULL; | ||
2241 | break; | ||
2242 | } | ||
2243 | |||
2244 | cindex = tkey_extract_bits(tn->key, NODE_PARENT(tn)->pos, NODE_PARENT(tn)->bits); | ||
2245 | tn = NODE_PARENT(tn); | ||
2246 | cindex++; | ||
2247 | n = (struct node *)tn; | ||
2248 | pend = tn->pos+tn->bits; | ||
2249 | depth--; | ||
2250 | } | ||
2341 | } | 2251 | } |
2342 | } | 2252 | } |
2343 | 2253 | ||
2344 | read_unlock(&fib_lock); | 2254 | rcu_read_unlock(); |
2345 | return s; | 2255 | return s; |
2346 | } | 2256 | } |
2347 | 2257 | ||
@@ -2359,17 +2269,22 @@ static struct fib_alias *fib_triestat_get_next(struct seq_file *seq) | |||
2359 | 2269 | ||
2360 | static void *fib_triestat_seq_start(struct seq_file *seq, loff_t *pos) | 2270 | static void *fib_triestat_seq_start(struct seq_file *seq, loff_t *pos) |
2361 | { | 2271 | { |
2362 | void *v = NULL; | 2272 | if (!ip_fib_main_table) |
2273 | return NULL; | ||
2363 | 2274 | ||
2364 | if (ip_fib_main_table) | 2275 | if (*pos) |
2365 | v = *pos ? fib_triestat_get_next(seq) : SEQ_START_TOKEN; | 2276 | return fib_triestat_get_next(seq); |
2366 | return v; | 2277 | else |
2278 | return SEQ_START_TOKEN; | ||
2367 | } | 2279 | } |
2368 | 2280 | ||
2369 | static void *fib_triestat_seq_next(struct seq_file *seq, void *v, loff_t *pos) | 2281 | static void *fib_triestat_seq_next(struct seq_file *seq, void *v, loff_t *pos) |
2370 | { | 2282 | { |
2371 | ++*pos; | 2283 | ++*pos; |
2372 | return v == SEQ_START_TOKEN ? fib_triestat_get_first(seq) : fib_triestat_get_next(seq); | 2284 | if (v == SEQ_START_TOKEN) |
2285 | return fib_triestat_get_first(seq); | ||
2286 | else | ||
2287 | return fib_triestat_get_next(seq); | ||
2373 | } | 2288 | } |
2374 | 2289 | ||
2375 | static void fib_triestat_seq_stop(struct seq_file *seq, void *v) | 2290 | static void fib_triestat_seq_stop(struct seq_file *seq, void *v) |
@@ -2388,22 +2303,22 @@ static void collect_and_show(struct trie *t, struct seq_file *seq) | |||
2388 | { | 2303 | { |
2389 | int bytes = 0; /* How many bytes are used, a ref is 4 bytes */ | 2304 | int bytes = 0; /* How many bytes are used, a ref is 4 bytes */ |
2390 | int i, max, pointers; | 2305 | int i, max, pointers; |
2391 | struct trie_stat *stat; | 2306 | struct trie_stat *stat; |
2392 | int avdepth; | 2307 | int avdepth; |
2393 | 2308 | ||
2394 | stat = trie_collect_stats(t); | 2309 | stat = trie_collect_stats(t); |
2395 | 2310 | ||
2396 | bytes=0; | 2311 | bytes = 0; |
2397 | seq_printf(seq, "trie=%p\n", t); | 2312 | seq_printf(seq, "trie=%p\n", t); |
2398 | 2313 | ||
2399 | if (stat) { | 2314 | if (stat) { |
2400 | if (stat->leaves) | 2315 | if (stat->leaves) |
2401 | avdepth=stat->totdepth*100 / stat->leaves; | 2316 | avdepth = stat->totdepth*100 / stat->leaves; |
2402 | else | 2317 | else |
2403 | avdepth=0; | 2318 | avdepth = 0; |
2404 | seq_printf(seq, "Aver depth: %d.%02d\n", avdepth / 100, avdepth % 100 ); | 2319 | seq_printf(seq, "Aver depth: %d.%02d\n", avdepth / 100, avdepth % 100); |
2405 | seq_printf(seq, "Max depth: %4d\n", stat->maxdepth); | 2320 | seq_printf(seq, "Max depth: %4d\n", stat->maxdepth); |
2406 | 2321 | ||
2407 | seq_printf(seq, "Leaves: %d\n", stat->leaves); | 2322 | seq_printf(seq, "Leaves: %d\n", stat->leaves); |
2408 | bytes += sizeof(struct leaf) * stat->leaves; | 2323 | bytes += sizeof(struct leaf) * stat->leaves; |
2409 | seq_printf(seq, "Internal nodes: %d\n", stat->tnodes); | 2324 | seq_printf(seq, "Internal nodes: %d\n", stat->tnodes); |
@@ -2455,11 +2370,9 @@ static int fib_triestat_seq_show(struct seq_file *seq, void *v) | |||
2455 | 2370 | ||
2456 | if (trie_main) | 2371 | if (trie_main) |
2457 | collect_and_show(trie_main, seq); | 2372 | collect_and_show(trie_main, seq); |
2458 | } | 2373 | } else { |
2459 | else { | 2374 | snprintf(bf, sizeof(bf), "*\t%08X\t%08X", 200, 400); |
2460 | snprintf(bf, sizeof(bf), | 2375 | |
2461 | "*\t%08X\t%08X", 200, 400); | ||
2462 | |||
2463 | seq_printf(seq, "%-127s\n", bf); | 2376 | seq_printf(seq, "%-127s\n", bf); |
2464 | } | 2377 | } |
2465 | return 0; | 2378 | return 0; |
@@ -2520,22 +2433,27 @@ static struct fib_alias *fib_trie_get_next(struct seq_file *seq) | |||
2520 | 2433 | ||
2521 | static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos) | 2434 | static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos) |
2522 | { | 2435 | { |
2523 | void *v = NULL; | 2436 | if (!ip_fib_main_table) |
2437 | return NULL; | ||
2524 | 2438 | ||
2525 | if (ip_fib_main_table) | 2439 | if (*pos) |
2526 | v = *pos ? fib_trie_get_next(seq) : SEQ_START_TOKEN; | 2440 | return fib_trie_get_next(seq); |
2527 | return v; | 2441 | else |
2442 | return SEQ_START_TOKEN; | ||
2528 | } | 2443 | } |
2529 | 2444 | ||
2530 | static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos) | 2445 | static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos) |
2531 | { | 2446 | { |
2532 | ++*pos; | 2447 | ++*pos; |
2533 | return v == SEQ_START_TOKEN ? fib_trie_get_first(seq) : fib_trie_get_next(seq); | 2448 | if (v == SEQ_START_TOKEN) |
2449 | return fib_trie_get_first(seq); | ||
2450 | else | ||
2451 | return fib_trie_get_next(seq); | ||
2452 | |||
2534 | } | 2453 | } |
2535 | 2454 | ||
2536 | static void fib_trie_seq_stop(struct seq_file *seq, void *v) | 2455 | static void fib_trie_seq_stop(struct seq_file *seq, void *v) |
2537 | { | 2456 | { |
2538 | |||
2539 | } | 2457 | } |
2540 | 2458 | ||
2541 | /* | 2459 | /* |
@@ -2555,9 +2473,7 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v) | |||
2555 | 2473 | ||
2556 | if (trie_main) | 2474 | if (trie_main) |
2557 | trie_dump_seq(seq, trie_main); | 2475 | trie_dump_seq(seq, trie_main); |
2558 | } | 2476 | } else { |
2559 | |||
2560 | else { | ||
2561 | snprintf(bf, sizeof(bf), | 2477 | snprintf(bf, sizeof(bf), |
2562 | "*\t%08X\t%08X", 200, 400); | 2478 | "*\t%08X\t%08X", 200, 400); |
2563 | seq_printf(seq, "%-127s\n", bf); | 2479 | seq_printf(seq, "%-127s\n", bf); |
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index badfc5849973..24eb56ae1b5a 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c | |||
@@ -114,7 +114,7 @@ struct icmp_bxm { | |||
114 | /* | 114 | /* |
115 | * Statistics | 115 | * Statistics |
116 | */ | 116 | */ |
117 | DEFINE_SNMP_STAT(struct icmp_mib, icmp_statistics); | 117 | DEFINE_SNMP_STAT(struct icmp_mib, icmp_statistics) __read_mostly; |
118 | 118 | ||
119 | /* An array of errno for error messages from dest unreach. */ | 119 | /* An array of errno for error messages from dest unreach. */ |
120 | /* RFC 1122: 3.2.2.1 States that NET_UNREACH, HOST_UNREACH and SR_FAILED MUST be considered 'transient errs'. */ | 120 | /* RFC 1122: 3.2.2.1 States that NET_UNREACH, HOST_UNREACH and SR_FAILED MUST be considered 'transient errs'. */ |
@@ -627,11 +627,10 @@ static void icmp_unreach(struct sk_buff *skb) | |||
627 | break; | 627 | break; |
628 | case ICMP_FRAG_NEEDED: | 628 | case ICMP_FRAG_NEEDED: |
629 | if (ipv4_config.no_pmtu_disc) { | 629 | if (ipv4_config.no_pmtu_disc) { |
630 | LIMIT_NETDEBUG( | 630 | LIMIT_NETDEBUG(KERN_INFO "ICMP: %u.%u.%u.%u: " |
631 | printk(KERN_INFO "ICMP: %u.%u.%u.%u: " | ||
632 | "fragmentation needed " | 631 | "fragmentation needed " |
633 | "and DF set.\n", | 632 | "and DF set.\n", |
634 | NIPQUAD(iph->daddr))); | 633 | NIPQUAD(iph->daddr)); |
635 | } else { | 634 | } else { |
636 | info = ip_rt_frag_needed(iph, | 635 | info = ip_rt_frag_needed(iph, |
637 | ntohs(icmph->un.frag.mtu)); | 636 | ntohs(icmph->un.frag.mtu)); |
@@ -640,10 +639,9 @@ static void icmp_unreach(struct sk_buff *skb) | |||
640 | } | 639 | } |
641 | break; | 640 | break; |
642 | case ICMP_SR_FAILED: | 641 | case ICMP_SR_FAILED: |
643 | LIMIT_NETDEBUG( | 642 | LIMIT_NETDEBUG(KERN_INFO "ICMP: %u.%u.%u.%u: Source " |
644 | printk(KERN_INFO "ICMP: %u.%u.%u.%u: Source " | ||
645 | "Route Failed.\n", | 643 | "Route Failed.\n", |
646 | NIPQUAD(iph->daddr))); | 644 | NIPQUAD(iph->daddr)); |
647 | break; | 645 | break; |
648 | default: | 646 | default: |
649 | break; | 647 | break; |
@@ -936,7 +934,7 @@ int icmp_rcv(struct sk_buff *skb) | |||
936 | case CHECKSUM_HW: | 934 | case CHECKSUM_HW: |
937 | if (!(u16)csum_fold(skb->csum)) | 935 | if (!(u16)csum_fold(skb->csum)) |
938 | break; | 936 | break; |
939 | LIMIT_NETDEBUG(printk(KERN_DEBUG "icmp v4 hw csum failure\n")); | 937 | LIMIT_NETDEBUG(KERN_DEBUG "icmp v4 hw csum failure\n"); |
940 | case CHECKSUM_NONE: | 938 | case CHECKSUM_NONE: |
941 | if ((u16)csum_fold(skb_checksum(skb, 0, skb->len, 0))) | 939 | if ((u16)csum_fold(skb_checksum(skb, 0, skb->len, 0))) |
942 | goto error; | 940 | goto error; |
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 5088f90835ae..44607f4767b8 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c | |||
@@ -904,7 +904,7 @@ int igmp_rcv(struct sk_buff *skb) | |||
904 | case IGMP_MTRACE_RESP: | 904 | case IGMP_MTRACE_RESP: |
905 | break; | 905 | break; |
906 | default: | 906 | default: |
907 | NETDEBUG(printk(KERN_DEBUG "New IGMP type=%d, why we do not know about it?\n", ih->type)); | 907 | NETDEBUG(KERN_DEBUG "New IGMP type=%d, why we do not know about it?\n", ih->type); |
908 | } | 908 | } |
909 | in_dev_put(in_dev); | 909 | in_dev_put(in_dev); |
910 | kfree_skb(skb); | 910 | kfree_skb(skb); |
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c new file mode 100644 index 000000000000..fe3c6d3d0c91 --- /dev/null +++ b/net/ipv4/inet_connection_sock.c | |||
@@ -0,0 +1,641 @@ | |||
1 | /* | ||
2 | * INET An implementation of the TCP/IP protocol suite for the LINUX | ||
3 | * operating system. INET is implemented using the BSD Socket | ||
4 | * interface as the means of communication with the user level. | ||
5 | * | ||
6 | * Support for INET connection oriented protocols. | ||
7 | * | ||
8 | * Authors: See the TCP sources | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public License | ||
12 | * as published by the Free Software Foundation; either version | ||
13 | * 2 of the License, or(at your option) any later version. | ||
14 | */ | ||
15 | |||
16 | #include <linux/config.h> | ||
17 | #include <linux/module.h> | ||
18 | #include <linux/jhash.h> | ||
19 | |||
20 | #include <net/inet_connection_sock.h> | ||
21 | #include <net/inet_hashtables.h> | ||
22 | #include <net/inet_timewait_sock.h> | ||
23 | #include <net/ip.h> | ||
24 | #include <net/route.h> | ||
25 | #include <net/tcp_states.h> | ||
26 | #include <net/xfrm.h> | ||
27 | |||
28 | #ifdef INET_CSK_DEBUG | ||
29 | const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n"; | ||
30 | EXPORT_SYMBOL(inet_csk_timer_bug_msg); | ||
31 | #endif | ||
32 | |||
33 | /* | ||
34 | * This array holds the first and last local port number. | ||
35 | * For high-usage systems, use sysctl to change this to | ||
36 | * 32768-61000 | ||
37 | */ | ||
38 | int sysctl_local_port_range[2] = { 1024, 4999 }; | ||
39 | |||
40 | static inline int inet_csk_bind_conflict(struct sock *sk, struct inet_bind_bucket *tb) | ||
41 | { | ||
42 | const u32 sk_rcv_saddr = inet_rcv_saddr(sk); | ||
43 | struct sock *sk2; | ||
44 | struct hlist_node *node; | ||
45 | int reuse = sk->sk_reuse; | ||
46 | |||
47 | sk_for_each_bound(sk2, node, &tb->owners) { | ||
48 | if (sk != sk2 && | ||
49 | !inet_v6_ipv6only(sk2) && | ||
50 | (!sk->sk_bound_dev_if || | ||
51 | !sk2->sk_bound_dev_if || | ||
52 | sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { | ||
53 | if (!reuse || !sk2->sk_reuse || | ||
54 | sk2->sk_state == TCP_LISTEN) { | ||
55 | const u32 sk2_rcv_saddr = inet_rcv_saddr(sk2); | ||
56 | if (!sk2_rcv_saddr || !sk_rcv_saddr || | ||
57 | sk2_rcv_saddr == sk_rcv_saddr) | ||
58 | break; | ||
59 | } | ||
60 | } | ||
61 | } | ||
62 | return node != NULL; | ||
63 | } | ||
64 | |||
65 | /* Obtain a reference to a local port for the given sock, | ||
66 | * if snum is zero it means select any available local port. | ||
67 | */ | ||
68 | int inet_csk_get_port(struct inet_hashinfo *hashinfo, | ||
69 | struct sock *sk, unsigned short snum) | ||
70 | { | ||
71 | struct inet_bind_hashbucket *head; | ||
72 | struct hlist_node *node; | ||
73 | struct inet_bind_bucket *tb; | ||
74 | int ret; | ||
75 | |||
76 | local_bh_disable(); | ||
77 | if (!snum) { | ||
78 | int low = sysctl_local_port_range[0]; | ||
79 | int high = sysctl_local_port_range[1]; | ||
80 | int remaining = (high - low) + 1; | ||
81 | int rover; | ||
82 | |||
83 | spin_lock(&hashinfo->portalloc_lock); | ||
84 | if (hashinfo->port_rover < low) | ||
85 | rover = low; | ||
86 | else | ||
87 | rover = hashinfo->port_rover; | ||
88 | do { | ||
89 | rover++; | ||
90 | if (rover > high) | ||
91 | rover = low; | ||
92 | head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)]; | ||
93 | spin_lock(&head->lock); | ||
94 | inet_bind_bucket_for_each(tb, node, &head->chain) | ||
95 | if (tb->port == rover) | ||
96 | goto next; | ||
97 | break; | ||
98 | next: | ||
99 | spin_unlock(&head->lock); | ||
100 | } while (--remaining > 0); | ||
101 | hashinfo->port_rover = rover; | ||
102 | spin_unlock(&hashinfo->portalloc_lock); | ||
103 | |||
104 | /* Exhausted local port range during search? It is not | ||
105 | * possible for us to be holding one of the bind hash | ||
106 | * locks if this test triggers, because if 'remaining' | ||
107 | * drops to zero, we broke out of the do/while loop at | ||
108 | * the top level, not from the 'break;' statement. | ||
109 | */ | ||
110 | ret = 1; | ||
111 | if (remaining <= 0) | ||
112 | goto fail; | ||
113 | |||
114 | /* OK, here is the one we will use. HEAD is | ||
115 | * non-NULL and we hold it's mutex. | ||
116 | */ | ||
117 | snum = rover; | ||
118 | } else { | ||
119 | head = &hashinfo->bhash[inet_bhashfn(snum, hashinfo->bhash_size)]; | ||
120 | spin_lock(&head->lock); | ||
121 | inet_bind_bucket_for_each(tb, node, &head->chain) | ||
122 | if (tb->port == snum) | ||
123 | goto tb_found; | ||
124 | } | ||
125 | tb = NULL; | ||
126 | goto tb_not_found; | ||
127 | tb_found: | ||
128 | if (!hlist_empty(&tb->owners)) { | ||
129 | if (sk->sk_reuse > 1) | ||
130 | goto success; | ||
131 | if (tb->fastreuse > 0 && | ||
132 | sk->sk_reuse && sk->sk_state != TCP_LISTEN) { | ||
133 | goto success; | ||
134 | } else { | ||
135 | ret = 1; | ||
136 | if (inet_csk_bind_conflict(sk, tb)) | ||
137 | goto fail_unlock; | ||
138 | } | ||
139 | } | ||
140 | tb_not_found: | ||
141 | ret = 1; | ||
142 | if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep, head, snum)) == NULL) | ||
143 | goto fail_unlock; | ||
144 | if (hlist_empty(&tb->owners)) { | ||
145 | if (sk->sk_reuse && sk->sk_state != TCP_LISTEN) | ||
146 | tb->fastreuse = 1; | ||
147 | else | ||
148 | tb->fastreuse = 0; | ||
149 | } else if (tb->fastreuse && | ||
150 | (!sk->sk_reuse || sk->sk_state == TCP_LISTEN)) | ||
151 | tb->fastreuse = 0; | ||
152 | success: | ||
153 | if (!inet_csk(sk)->icsk_bind_hash) | ||
154 | inet_bind_hash(sk, tb, snum); | ||
155 | BUG_TRAP(inet_csk(sk)->icsk_bind_hash == tb); | ||
156 | ret = 0; | ||
157 | |||
158 | fail_unlock: | ||
159 | spin_unlock(&head->lock); | ||
160 | fail: | ||
161 | local_bh_enable(); | ||
162 | return ret; | ||
163 | } | ||
164 | |||
165 | EXPORT_SYMBOL_GPL(inet_csk_get_port); | ||
166 | |||
167 | /* | ||
168 | * Wait for an incoming connection, avoid race conditions. This must be called | ||
169 | * with the socket locked. | ||
170 | */ | ||
171 | static int inet_csk_wait_for_connect(struct sock *sk, long timeo) | ||
172 | { | ||
173 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
174 | DEFINE_WAIT(wait); | ||
175 | int err; | ||
176 | |||
177 | /* | ||
178 | * True wake-one mechanism for incoming connections: only | ||
179 | * one process gets woken up, not the 'whole herd'. | ||
180 | * Since we do not 'race & poll' for established sockets | ||
181 | * anymore, the common case will execute the loop only once. | ||
182 | * | ||
183 | * Subtle issue: "add_wait_queue_exclusive()" will be added | ||
184 | * after any current non-exclusive waiters, and we know that | ||
185 | * it will always _stay_ after any new non-exclusive waiters | ||
186 | * because all non-exclusive waiters are added at the | ||
187 | * beginning of the wait-queue. As such, it's ok to "drop" | ||
188 | * our exclusiveness temporarily when we get woken up without | ||
189 | * having to remove and re-insert us on the wait queue. | ||
190 | */ | ||
191 | for (;;) { | ||
192 | prepare_to_wait_exclusive(sk->sk_sleep, &wait, | ||
193 | TASK_INTERRUPTIBLE); | ||
194 | release_sock(sk); | ||
195 | if (reqsk_queue_empty(&icsk->icsk_accept_queue)) | ||
196 | timeo = schedule_timeout(timeo); | ||
197 | lock_sock(sk); | ||
198 | err = 0; | ||
199 | if (!reqsk_queue_empty(&icsk->icsk_accept_queue)) | ||
200 | break; | ||
201 | err = -EINVAL; | ||
202 | if (sk->sk_state != TCP_LISTEN) | ||
203 | break; | ||
204 | err = sock_intr_errno(timeo); | ||
205 | if (signal_pending(current)) | ||
206 | break; | ||
207 | err = -EAGAIN; | ||
208 | if (!timeo) | ||
209 | break; | ||
210 | } | ||
211 | finish_wait(sk->sk_sleep, &wait); | ||
212 | return err; | ||
213 | } | ||
214 | |||
215 | /* | ||
216 | * This will accept the next outstanding connection. | ||
217 | */ | ||
218 | struct sock *inet_csk_accept(struct sock *sk, int flags, int *err) | ||
219 | { | ||
220 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
221 | struct sock *newsk; | ||
222 | int error; | ||
223 | |||
224 | lock_sock(sk); | ||
225 | |||
226 | /* We need to make sure that this socket is listening, | ||
227 | * and that it has something pending. | ||
228 | */ | ||
229 | error = -EINVAL; | ||
230 | if (sk->sk_state != TCP_LISTEN) | ||
231 | goto out_err; | ||
232 | |||
233 | /* Find already established connection */ | ||
234 | if (reqsk_queue_empty(&icsk->icsk_accept_queue)) { | ||
235 | long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); | ||
236 | |||
237 | /* If this is a non blocking socket don't sleep */ | ||
238 | error = -EAGAIN; | ||
239 | if (!timeo) | ||
240 | goto out_err; | ||
241 | |||
242 | error = inet_csk_wait_for_connect(sk, timeo); | ||
243 | if (error) | ||
244 | goto out_err; | ||
245 | } | ||
246 | |||
247 | newsk = reqsk_queue_get_child(&icsk->icsk_accept_queue, sk); | ||
248 | BUG_TRAP(newsk->sk_state != TCP_SYN_RECV); | ||
249 | out: | ||
250 | release_sock(sk); | ||
251 | return newsk; | ||
252 | out_err: | ||
253 | newsk = NULL; | ||
254 | *err = error; | ||
255 | goto out; | ||
256 | } | ||
257 | |||
258 | EXPORT_SYMBOL(inet_csk_accept); | ||
259 | |||
260 | /* | ||
261 | * Using different timers for retransmit, delayed acks and probes | ||
262 | * We may wish use just one timer maintaining a list of expire jiffies | ||
263 | * to optimize. | ||
264 | */ | ||
265 | void inet_csk_init_xmit_timers(struct sock *sk, | ||
266 | void (*retransmit_handler)(unsigned long), | ||
267 | void (*delack_handler)(unsigned long), | ||
268 | void (*keepalive_handler)(unsigned long)) | ||
269 | { | ||
270 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
271 | |||
272 | init_timer(&icsk->icsk_retransmit_timer); | ||
273 | init_timer(&icsk->icsk_delack_timer); | ||
274 | init_timer(&sk->sk_timer); | ||
275 | |||
276 | icsk->icsk_retransmit_timer.function = retransmit_handler; | ||
277 | icsk->icsk_delack_timer.function = delack_handler; | ||
278 | sk->sk_timer.function = keepalive_handler; | ||
279 | |||
280 | icsk->icsk_retransmit_timer.data = | ||
281 | icsk->icsk_delack_timer.data = | ||
282 | sk->sk_timer.data = (unsigned long)sk; | ||
283 | |||
284 | icsk->icsk_pending = icsk->icsk_ack.pending = 0; | ||
285 | } | ||
286 | |||
287 | EXPORT_SYMBOL(inet_csk_init_xmit_timers); | ||
288 | |||
289 | void inet_csk_clear_xmit_timers(struct sock *sk) | ||
290 | { | ||
291 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
292 | |||
293 | icsk->icsk_pending = icsk->icsk_ack.pending = icsk->icsk_ack.blocked = 0; | ||
294 | |||
295 | sk_stop_timer(sk, &icsk->icsk_retransmit_timer); | ||
296 | sk_stop_timer(sk, &icsk->icsk_delack_timer); | ||
297 | sk_stop_timer(sk, &sk->sk_timer); | ||
298 | } | ||
299 | |||
300 | EXPORT_SYMBOL(inet_csk_clear_xmit_timers); | ||
301 | |||
302 | void inet_csk_delete_keepalive_timer(struct sock *sk) | ||
303 | { | ||
304 | sk_stop_timer(sk, &sk->sk_timer); | ||
305 | } | ||
306 | |||
307 | EXPORT_SYMBOL(inet_csk_delete_keepalive_timer); | ||
308 | |||
309 | void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len) | ||
310 | { | ||
311 | sk_reset_timer(sk, &sk->sk_timer, jiffies + len); | ||
312 | } | ||
313 | |||
314 | EXPORT_SYMBOL(inet_csk_reset_keepalive_timer); | ||
315 | |||
316 | struct dst_entry* inet_csk_route_req(struct sock *sk, | ||
317 | const struct request_sock *req) | ||
318 | { | ||
319 | struct rtable *rt; | ||
320 | const struct inet_request_sock *ireq = inet_rsk(req); | ||
321 | struct ip_options *opt = inet_rsk(req)->opt; | ||
322 | struct flowi fl = { .oif = sk->sk_bound_dev_if, | ||
323 | .nl_u = { .ip4_u = | ||
324 | { .daddr = ((opt && opt->srr) ? | ||
325 | opt->faddr : | ||
326 | ireq->rmt_addr), | ||
327 | .saddr = ireq->loc_addr, | ||
328 | .tos = RT_CONN_FLAGS(sk) } }, | ||
329 | .proto = sk->sk_protocol, | ||
330 | .uli_u = { .ports = | ||
331 | { .sport = inet_sk(sk)->sport, | ||
332 | .dport = ireq->rmt_port } } }; | ||
333 | |||
334 | if (ip_route_output_flow(&rt, &fl, sk, 0)) { | ||
335 | IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); | ||
336 | return NULL; | ||
337 | } | ||
338 | if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) { | ||
339 | ip_rt_put(rt); | ||
340 | IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); | ||
341 | return NULL; | ||
342 | } | ||
343 | return &rt->u.dst; | ||
344 | } | ||
345 | |||
346 | EXPORT_SYMBOL_GPL(inet_csk_route_req); | ||
347 | |||
348 | static inline u32 inet_synq_hash(const u32 raddr, const u16 rport, | ||
349 | const u32 rnd, const u16 synq_hsize) | ||
350 | { | ||
351 | return jhash_2words(raddr, (u32)rport, rnd) & (synq_hsize - 1); | ||
352 | } | ||
353 | |||
354 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | ||
355 | #define AF_INET_FAMILY(fam) ((fam) == AF_INET) | ||
356 | #else | ||
357 | #define AF_INET_FAMILY(fam) 1 | ||
358 | #endif | ||
359 | |||
360 | struct request_sock *inet_csk_search_req(const struct sock *sk, | ||
361 | struct request_sock ***prevp, | ||
362 | const __u16 rport, const __u32 raddr, | ||
363 | const __u32 laddr) | ||
364 | { | ||
365 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
366 | struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; | ||
367 | struct request_sock *req, **prev; | ||
368 | |||
369 | for (prev = &lopt->syn_table[inet_synq_hash(raddr, rport, lopt->hash_rnd, | ||
370 | lopt->nr_table_entries)]; | ||
371 | (req = *prev) != NULL; | ||
372 | prev = &req->dl_next) { | ||
373 | const struct inet_request_sock *ireq = inet_rsk(req); | ||
374 | |||
375 | if (ireq->rmt_port == rport && | ||
376 | ireq->rmt_addr == raddr && | ||
377 | ireq->loc_addr == laddr && | ||
378 | AF_INET_FAMILY(req->rsk_ops->family)) { | ||
379 | BUG_TRAP(!req->sk); | ||
380 | *prevp = prev; | ||
381 | break; | ||
382 | } | ||
383 | } | ||
384 | |||
385 | return req; | ||
386 | } | ||
387 | |||
388 | EXPORT_SYMBOL_GPL(inet_csk_search_req); | ||
389 | |||
390 | void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, | ||
391 | const unsigned timeout) | ||
392 | { | ||
393 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
394 | struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; | ||
395 | const u32 h = inet_synq_hash(inet_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, | ||
396 | lopt->hash_rnd, lopt->nr_table_entries); | ||
397 | |||
398 | reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout); | ||
399 | inet_csk_reqsk_queue_added(sk, timeout); | ||
400 | } | ||
401 | |||
402 | /* Only thing we need from tcp.h */ | ||
403 | extern int sysctl_tcp_synack_retries; | ||
404 | |||
405 | EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add); | ||
406 | |||
407 | void inet_csk_reqsk_queue_prune(struct sock *parent, | ||
408 | const unsigned long interval, | ||
409 | const unsigned long timeout, | ||
410 | const unsigned long max_rto) | ||
411 | { | ||
412 | struct inet_connection_sock *icsk = inet_csk(parent); | ||
413 | struct request_sock_queue *queue = &icsk->icsk_accept_queue; | ||
414 | struct listen_sock *lopt = queue->listen_opt; | ||
415 | int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries; | ||
416 | int thresh = max_retries; | ||
417 | unsigned long now = jiffies; | ||
418 | struct request_sock **reqp, *req; | ||
419 | int i, budget; | ||
420 | |||
421 | if (lopt == NULL || lopt->qlen == 0) | ||
422 | return; | ||
423 | |||
424 | /* Normally all the openreqs are young and become mature | ||
425 | * (i.e. converted to established socket) for first timeout. | ||
426 | * If synack was not acknowledged for 3 seconds, it means | ||
427 | * one of the following things: synack was lost, ack was lost, | ||
428 | * rtt is high or nobody planned to ack (i.e. synflood). | ||
429 | * When server is a bit loaded, queue is populated with old | ||
430 | * open requests, reducing effective size of queue. | ||
431 | * When server is well loaded, queue size reduces to zero | ||
432 | * after several minutes of work. It is not synflood, | ||
433 | * it is normal operation. The solution is pruning | ||
434 | * too old entries overriding normal timeout, when | ||
435 | * situation becomes dangerous. | ||
436 | * | ||
437 | * Essentially, we reserve half of room for young | ||
438 | * embrions; and abort old ones without pity, if old | ||
439 | * ones are about to clog our table. | ||
440 | */ | ||
441 | if (lopt->qlen>>(lopt->max_qlen_log-1)) { | ||
442 | int young = (lopt->qlen_young<<1); | ||
443 | |||
444 | while (thresh > 2) { | ||
445 | if (lopt->qlen < young) | ||
446 | break; | ||
447 | thresh--; | ||
448 | young <<= 1; | ||
449 | } | ||
450 | } | ||
451 | |||
452 | if (queue->rskq_defer_accept) | ||
453 | max_retries = queue->rskq_defer_accept; | ||
454 | |||
455 | budget = 2 * (lopt->nr_table_entries / (timeout / interval)); | ||
456 | i = lopt->clock_hand; | ||
457 | |||
458 | do { | ||
459 | reqp=&lopt->syn_table[i]; | ||
460 | while ((req = *reqp) != NULL) { | ||
461 | if (time_after_eq(now, req->expires)) { | ||
462 | if ((req->retrans < thresh || | ||
463 | (inet_rsk(req)->acked && req->retrans < max_retries)) | ||
464 | && !req->rsk_ops->rtx_syn_ack(parent, req, NULL)) { | ||
465 | unsigned long timeo; | ||
466 | |||
467 | if (req->retrans++ == 0) | ||
468 | lopt->qlen_young--; | ||
469 | timeo = min((timeout << req->retrans), max_rto); | ||
470 | req->expires = now + timeo; | ||
471 | reqp = &req->dl_next; | ||
472 | continue; | ||
473 | } | ||
474 | |||
475 | /* Drop this request */ | ||
476 | inet_csk_reqsk_queue_unlink(parent, req, reqp); | ||
477 | reqsk_queue_removed(queue, req); | ||
478 | reqsk_free(req); | ||
479 | continue; | ||
480 | } | ||
481 | reqp = &req->dl_next; | ||
482 | } | ||
483 | |||
484 | i = (i + 1) & (lopt->nr_table_entries - 1); | ||
485 | |||
486 | } while (--budget > 0); | ||
487 | |||
488 | lopt->clock_hand = i; | ||
489 | |||
490 | if (lopt->qlen) | ||
491 | inet_csk_reset_keepalive_timer(parent, interval); | ||
492 | } | ||
493 | |||
494 | EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_prune); | ||
495 | |||
496 | struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, | ||
497 | const unsigned int __nocast priority) | ||
498 | { | ||
499 | struct sock *newsk = sk_clone(sk, priority); | ||
500 | |||
501 | if (newsk != NULL) { | ||
502 | struct inet_connection_sock *newicsk = inet_csk(newsk); | ||
503 | |||
504 | newsk->sk_state = TCP_SYN_RECV; | ||
505 | newicsk->icsk_bind_hash = NULL; | ||
506 | |||
507 | inet_sk(newsk)->dport = inet_rsk(req)->rmt_port; | ||
508 | newsk->sk_write_space = sk_stream_write_space; | ||
509 | |||
510 | newicsk->icsk_retransmits = 0; | ||
511 | newicsk->icsk_backoff = 0; | ||
512 | newicsk->icsk_probes_out = 0; | ||
513 | |||
514 | /* Deinitialize accept_queue to trap illegal accesses. */ | ||
515 | memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue)); | ||
516 | } | ||
517 | return newsk; | ||
518 | } | ||
519 | |||
520 | EXPORT_SYMBOL_GPL(inet_csk_clone); | ||
521 | |||
522 | /* | ||
523 | * At this point, there should be no process reference to this | ||
524 | * socket, and thus no user references at all. Therefore we | ||
525 | * can assume the socket waitqueue is inactive and nobody will | ||
526 | * try to jump onto it. | ||
527 | */ | ||
528 | void inet_csk_destroy_sock(struct sock *sk) | ||
529 | { | ||
530 | BUG_TRAP(sk->sk_state == TCP_CLOSE); | ||
531 | BUG_TRAP(sock_flag(sk, SOCK_DEAD)); | ||
532 | |||
533 | /* It cannot be in hash table! */ | ||
534 | BUG_TRAP(sk_unhashed(sk)); | ||
535 | |||
536 | /* If it has not 0 inet_sk(sk)->num, it must be bound */ | ||
537 | BUG_TRAP(!inet_sk(sk)->num || inet_csk(sk)->icsk_bind_hash); | ||
538 | |||
539 | sk->sk_prot->destroy(sk); | ||
540 | |||
541 | sk_stream_kill_queues(sk); | ||
542 | |||
543 | xfrm_sk_free_policy(sk); | ||
544 | |||
545 | sk_refcnt_debug_release(sk); | ||
546 | |||
547 | atomic_dec(sk->sk_prot->orphan_count); | ||
548 | sock_put(sk); | ||
549 | } | ||
550 | |||
551 | EXPORT_SYMBOL(inet_csk_destroy_sock); | ||
552 | |||
553 | int inet_csk_listen_start(struct sock *sk, const int nr_table_entries) | ||
554 | { | ||
555 | struct inet_sock *inet = inet_sk(sk); | ||
556 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
557 | int rc = reqsk_queue_alloc(&icsk->icsk_accept_queue, nr_table_entries); | ||
558 | |||
559 | if (rc != 0) | ||
560 | return rc; | ||
561 | |||
562 | sk->sk_max_ack_backlog = 0; | ||
563 | sk->sk_ack_backlog = 0; | ||
564 | inet_csk_delack_init(sk); | ||
565 | |||
566 | /* There is race window here: we announce ourselves listening, | ||
567 | * but this transition is still not validated by get_port(). | ||
568 | * It is OK, because this socket enters to hash table only | ||
569 | * after validation is complete. | ||
570 | */ | ||
571 | sk->sk_state = TCP_LISTEN; | ||
572 | if (!sk->sk_prot->get_port(sk, inet->num)) { | ||
573 | inet->sport = htons(inet->num); | ||
574 | |||
575 | sk_dst_reset(sk); | ||
576 | sk->sk_prot->hash(sk); | ||
577 | |||
578 | return 0; | ||
579 | } | ||
580 | |||
581 | sk->sk_state = TCP_CLOSE; | ||
582 | __reqsk_queue_destroy(&icsk->icsk_accept_queue); | ||
583 | return -EADDRINUSE; | ||
584 | } | ||
585 | |||
586 | EXPORT_SYMBOL_GPL(inet_csk_listen_start); | ||
587 | |||
588 | /* | ||
589 | * This routine closes sockets which have been at least partially | ||
590 | * opened, but not yet accepted. | ||
591 | */ | ||
592 | void inet_csk_listen_stop(struct sock *sk) | ||
593 | { | ||
594 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
595 | struct request_sock *acc_req; | ||
596 | struct request_sock *req; | ||
597 | |||
598 | inet_csk_delete_keepalive_timer(sk); | ||
599 | |||
600 | /* make all the listen_opt local to us */ | ||
601 | acc_req = reqsk_queue_yank_acceptq(&icsk->icsk_accept_queue); | ||
602 | |||
603 | /* Following specs, it would be better either to send FIN | ||
604 | * (and enter FIN-WAIT-1, it is normal close) | ||
605 | * or to send active reset (abort). | ||
606 | * Certainly, it is pretty dangerous while synflood, but it is | ||
607 | * bad justification for our negligence 8) | ||
608 | * To be honest, we are not able to make either | ||
609 | * of the variants now. --ANK | ||
610 | */ | ||
611 | reqsk_queue_destroy(&icsk->icsk_accept_queue); | ||
612 | |||
613 | while ((req = acc_req) != NULL) { | ||
614 | struct sock *child = req->sk; | ||
615 | |||
616 | acc_req = req->dl_next; | ||
617 | |||
618 | local_bh_disable(); | ||
619 | bh_lock_sock(child); | ||
620 | BUG_TRAP(!sock_owned_by_user(child)); | ||
621 | sock_hold(child); | ||
622 | |||
623 | sk->sk_prot->disconnect(child, O_NONBLOCK); | ||
624 | |||
625 | sock_orphan(child); | ||
626 | |||
627 | atomic_inc(sk->sk_prot->orphan_count); | ||
628 | |||
629 | inet_csk_destroy_sock(child); | ||
630 | |||
631 | bh_unlock_sock(child); | ||
632 | local_bh_enable(); | ||
633 | sock_put(child); | ||
634 | |||
635 | sk_acceptq_removed(sk); | ||
636 | __reqsk_free(req); | ||
637 | } | ||
638 | BUG_TRAP(!sk->sk_ack_backlog); | ||
639 | } | ||
640 | |||
641 | EXPORT_SYMBOL_GPL(inet_csk_listen_stop); | ||
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c new file mode 100644 index 000000000000..71f3c7350c6e --- /dev/null +++ b/net/ipv4/inet_diag.c | |||
@@ -0,0 +1,868 @@ | |||
1 | /* | ||
2 | * inet_diag.c Module for monitoring INET transport protocols sockets. | ||
3 | * | ||
4 | * Version: $Id: inet_diag.c,v 1.3 2002/02/01 22:01:04 davem Exp $ | ||
5 | * | ||
6 | * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public License | ||
10 | * as published by the Free Software Foundation; either version | ||
11 | * 2 of the License, or (at your option) any later version. | ||
12 | */ | ||
13 | |||
14 | #include <linux/config.h> | ||
15 | #include <linux/module.h> | ||
16 | #include <linux/types.h> | ||
17 | #include <linux/fcntl.h> | ||
18 | #include <linux/random.h> | ||
19 | #include <linux/cache.h> | ||
20 | #include <linux/init.h> | ||
21 | #include <linux/time.h> | ||
22 | |||
23 | #include <net/icmp.h> | ||
24 | #include <net/tcp.h> | ||
25 | #include <net/ipv6.h> | ||
26 | #include <net/inet_common.h> | ||
27 | #include <net/inet_connection_sock.h> | ||
28 | #include <net/inet_hashtables.h> | ||
29 | #include <net/inet_timewait_sock.h> | ||
30 | #include <net/inet6_hashtables.h> | ||
31 | |||
32 | #include <linux/inet.h> | ||
33 | #include <linux/stddef.h> | ||
34 | |||
35 | #include <linux/inet_diag.h> | ||
36 | |||
37 | static const struct inet_diag_handler **inet_diag_table; | ||
38 | |||
39 | struct inet_diag_entry { | ||
40 | u32 *saddr; | ||
41 | u32 *daddr; | ||
42 | u16 sport; | ||
43 | u16 dport; | ||
44 | u16 family; | ||
45 | u16 userlocks; | ||
46 | }; | ||
47 | |||
48 | static struct sock *idiagnl; | ||
49 | |||
50 | #define INET_DIAG_PUT(skb, attrtype, attrlen) \ | ||
51 | RTA_DATA(__RTA_PUT(skb, attrtype, attrlen)) | ||
52 | |||
53 | static int inet_diag_fill(struct sk_buff *skb, struct sock *sk, | ||
54 | int ext, u32 pid, u32 seq, u16 nlmsg_flags, | ||
55 | const struct nlmsghdr *unlh) | ||
56 | { | ||
57 | const struct inet_sock *inet = inet_sk(sk); | ||
58 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
59 | struct inet_diag_msg *r; | ||
60 | struct nlmsghdr *nlh; | ||
61 | void *info = NULL; | ||
62 | struct inet_diag_meminfo *minfo = NULL; | ||
63 | unsigned char *b = skb->tail; | ||
64 | const struct inet_diag_handler *handler; | ||
65 | |||
66 | handler = inet_diag_table[unlh->nlmsg_type]; | ||
67 | BUG_ON(handler == NULL); | ||
68 | |||
69 | nlh = NLMSG_PUT(skb, pid, seq, unlh->nlmsg_type, sizeof(*r)); | ||
70 | nlh->nlmsg_flags = nlmsg_flags; | ||
71 | |||
72 | r = NLMSG_DATA(nlh); | ||
73 | if (sk->sk_state != TCP_TIME_WAIT) { | ||
74 | if (ext & (1 << (INET_DIAG_MEMINFO - 1))) | ||
75 | minfo = INET_DIAG_PUT(skb, INET_DIAG_MEMINFO, | ||
76 | sizeof(*minfo)); | ||
77 | if (ext & (1 << (INET_DIAG_INFO - 1))) | ||
78 | info = INET_DIAG_PUT(skb, INET_DIAG_INFO, | ||
79 | handler->idiag_info_size); | ||
80 | |||
81 | if ((ext & (1 << (INET_DIAG_CONG - 1))) && icsk->icsk_ca_ops) { | ||
82 | size_t len = strlen(icsk->icsk_ca_ops->name); | ||
83 | strcpy(INET_DIAG_PUT(skb, INET_DIAG_CONG, len + 1), | ||
84 | icsk->icsk_ca_ops->name); | ||
85 | } | ||
86 | } | ||
87 | r->idiag_family = sk->sk_family; | ||
88 | r->idiag_state = sk->sk_state; | ||
89 | r->idiag_timer = 0; | ||
90 | r->idiag_retrans = 0; | ||
91 | |||
92 | r->id.idiag_if = sk->sk_bound_dev_if; | ||
93 | r->id.idiag_cookie[0] = (u32)(unsigned long)sk; | ||
94 | r->id.idiag_cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1); | ||
95 | |||
96 | if (r->idiag_state == TCP_TIME_WAIT) { | ||
97 | const struct inet_timewait_sock *tw = inet_twsk(sk); | ||
98 | long tmo = tw->tw_ttd - jiffies; | ||
99 | if (tmo < 0) | ||
100 | tmo = 0; | ||
101 | |||
102 | r->id.idiag_sport = tw->tw_sport; | ||
103 | r->id.idiag_dport = tw->tw_dport; | ||
104 | r->id.idiag_src[0] = tw->tw_rcv_saddr; | ||
105 | r->id.idiag_dst[0] = tw->tw_daddr; | ||
106 | r->idiag_state = tw->tw_substate; | ||
107 | r->idiag_timer = 3; | ||
108 | r->idiag_expires = (tmo * 1000 + HZ - 1) / HZ; | ||
109 | r->idiag_rqueue = 0; | ||
110 | r->idiag_wqueue = 0; | ||
111 | r->idiag_uid = 0; | ||
112 | r->idiag_inode = 0; | ||
113 | #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) | ||
114 | if (r->idiag_family == AF_INET6) { | ||
115 | const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk); | ||
116 | |||
117 | ipv6_addr_copy((struct in6_addr *)r->id.idiag_src, | ||
118 | &tcp6tw->tw_v6_rcv_saddr); | ||
119 | ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst, | ||
120 | &tcp6tw->tw_v6_daddr); | ||
121 | } | ||
122 | #endif | ||
123 | nlh->nlmsg_len = skb->tail - b; | ||
124 | return skb->len; | ||
125 | } | ||
126 | |||
127 | r->id.idiag_sport = inet->sport; | ||
128 | r->id.idiag_dport = inet->dport; | ||
129 | r->id.idiag_src[0] = inet->rcv_saddr; | ||
130 | r->id.idiag_dst[0] = inet->daddr; | ||
131 | |||
132 | #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) | ||
133 | if (r->idiag_family == AF_INET6) { | ||
134 | struct ipv6_pinfo *np = inet6_sk(sk); | ||
135 | |||
136 | ipv6_addr_copy((struct in6_addr *)r->id.idiag_src, | ||
137 | &np->rcv_saddr); | ||
138 | ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst, | ||
139 | &np->daddr); | ||
140 | } | ||
141 | #endif | ||
142 | |||
143 | #define EXPIRES_IN_MS(tmo) ((tmo - jiffies) * 1000 + HZ - 1) / HZ | ||
144 | |||
145 | if (icsk->icsk_pending == ICSK_TIME_RETRANS) { | ||
146 | r->idiag_timer = 1; | ||
147 | r->idiag_retrans = icsk->icsk_retransmits; | ||
148 | r->idiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout); | ||
149 | } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { | ||
150 | r->idiag_timer = 4; | ||
151 | r->idiag_retrans = icsk->icsk_probes_out; | ||
152 | r->idiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout); | ||
153 | } else if (timer_pending(&sk->sk_timer)) { | ||
154 | r->idiag_timer = 2; | ||
155 | r->idiag_retrans = icsk->icsk_probes_out; | ||
156 | r->idiag_expires = EXPIRES_IN_MS(sk->sk_timer.expires); | ||
157 | } else { | ||
158 | r->idiag_timer = 0; | ||
159 | r->idiag_expires = 0; | ||
160 | } | ||
161 | #undef EXPIRES_IN_MS | ||
162 | |||
163 | r->idiag_uid = sock_i_uid(sk); | ||
164 | r->idiag_inode = sock_i_ino(sk); | ||
165 | |||
166 | if (minfo) { | ||
167 | minfo->idiag_rmem = atomic_read(&sk->sk_rmem_alloc); | ||
168 | minfo->idiag_wmem = sk->sk_wmem_queued; | ||
169 | minfo->idiag_fmem = sk->sk_forward_alloc; | ||
170 | minfo->idiag_tmem = atomic_read(&sk->sk_wmem_alloc); | ||
171 | } | ||
172 | |||
173 | handler->idiag_get_info(sk, r, info); | ||
174 | |||
175 | if (sk->sk_state < TCP_TIME_WAIT && | ||
176 | icsk->icsk_ca_ops && icsk->icsk_ca_ops->get_info) | ||
177 | icsk->icsk_ca_ops->get_info(sk, ext, skb); | ||
178 | |||
179 | nlh->nlmsg_len = skb->tail - b; | ||
180 | return skb->len; | ||
181 | |||
182 | rtattr_failure: | ||
183 | nlmsg_failure: | ||
184 | skb_trim(skb, b - skb->data); | ||
185 | return -1; | ||
186 | } | ||
187 | |||
188 | static int inet_diag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nlh) | ||
189 | { | ||
190 | int err; | ||
191 | struct sock *sk; | ||
192 | struct inet_diag_req *req = NLMSG_DATA(nlh); | ||
193 | struct sk_buff *rep; | ||
194 | struct inet_hashinfo *hashinfo; | ||
195 | const struct inet_diag_handler *handler; | ||
196 | |||
197 | handler = inet_diag_table[nlh->nlmsg_type]; | ||
198 | BUG_ON(handler == NULL); | ||
199 | hashinfo = handler->idiag_hashinfo; | ||
200 | |||
201 | if (req->idiag_family == AF_INET) { | ||
202 | sk = inet_lookup(hashinfo, req->id.idiag_dst[0], | ||
203 | req->id.idiag_dport, req->id.idiag_src[0], | ||
204 | req->id.idiag_sport, req->id.idiag_if); | ||
205 | } | ||
206 | #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) | ||
207 | else if (req->idiag_family == AF_INET6) { | ||
208 | sk = inet6_lookup(hashinfo, | ||
209 | (struct in6_addr *)req->id.idiag_dst, | ||
210 | req->id.idiag_dport, | ||
211 | (struct in6_addr *)req->id.idiag_src, | ||
212 | req->id.idiag_sport, | ||
213 | req->id.idiag_if); | ||
214 | } | ||
215 | #endif | ||
216 | else { | ||
217 | return -EINVAL; | ||
218 | } | ||
219 | |||
220 | if (sk == NULL) | ||
221 | return -ENOENT; | ||
222 | |||
223 | err = -ESTALE; | ||
224 | if ((req->id.idiag_cookie[0] != INET_DIAG_NOCOOKIE || | ||
225 | req->id.idiag_cookie[1] != INET_DIAG_NOCOOKIE) && | ||
226 | ((u32)(unsigned long)sk != req->id.idiag_cookie[0] || | ||
227 | (u32)((((unsigned long)sk) >> 31) >> 1) != req->id.idiag_cookie[1])) | ||
228 | goto out; | ||
229 | |||
230 | err = -ENOMEM; | ||
231 | rep = alloc_skb(NLMSG_SPACE((sizeof(struct inet_diag_msg) + | ||
232 | sizeof(struct inet_diag_meminfo) + | ||
233 | handler->idiag_info_size + 64)), | ||
234 | GFP_KERNEL); | ||
235 | if (!rep) | ||
236 | goto out; | ||
237 | |||
238 | if (inet_diag_fill(rep, sk, req->idiag_ext, | ||
239 | NETLINK_CB(in_skb).pid, | ||
240 | nlh->nlmsg_seq, 0, nlh) <= 0) | ||
241 | BUG(); | ||
242 | |||
243 | err = netlink_unicast(idiagnl, rep, NETLINK_CB(in_skb).pid, | ||
244 | MSG_DONTWAIT); | ||
245 | if (err > 0) | ||
246 | err = 0; | ||
247 | |||
248 | out: | ||
249 | if (sk) { | ||
250 | if (sk->sk_state == TCP_TIME_WAIT) | ||
251 | inet_twsk_put((struct inet_timewait_sock *)sk); | ||
252 | else | ||
253 | sock_put(sk); | ||
254 | } | ||
255 | return err; | ||
256 | } | ||
257 | |||
258 | static int bitstring_match(const u32 *a1, const u32 *a2, int bits) | ||
259 | { | ||
260 | int words = bits >> 5; | ||
261 | |||
262 | bits &= 0x1f; | ||
263 | |||
264 | if (words) { | ||
265 | if (memcmp(a1, a2, words << 2)) | ||
266 | return 0; | ||
267 | } | ||
268 | if (bits) { | ||
269 | __u32 w1, w2; | ||
270 | __u32 mask; | ||
271 | |||
272 | w1 = a1[words]; | ||
273 | w2 = a2[words]; | ||
274 | |||
275 | mask = htonl((0xffffffff) << (32 - bits)); | ||
276 | |||
277 | if ((w1 ^ w2) & mask) | ||
278 | return 0; | ||
279 | } | ||
280 | |||
281 | return 1; | ||
282 | } | ||
283 | |||
284 | |||
285 | static int inet_diag_bc_run(const void *bc, int len, | ||
286 | const struct inet_diag_entry *entry) | ||
287 | { | ||
288 | while (len > 0) { | ||
289 | int yes = 1; | ||
290 | const struct inet_diag_bc_op *op = bc; | ||
291 | |||
292 | switch (op->code) { | ||
293 | case INET_DIAG_BC_NOP: | ||
294 | break; | ||
295 | case INET_DIAG_BC_JMP: | ||
296 | yes = 0; | ||
297 | break; | ||
298 | case INET_DIAG_BC_S_GE: | ||
299 | yes = entry->sport >= op[1].no; | ||
300 | break; | ||
301 | case INET_DIAG_BC_S_LE: | ||
302 | yes = entry->dport <= op[1].no; | ||
303 | break; | ||
304 | case INET_DIAG_BC_D_GE: | ||
305 | yes = entry->dport >= op[1].no; | ||
306 | break; | ||
307 | case INET_DIAG_BC_D_LE: | ||
308 | yes = entry->dport <= op[1].no; | ||
309 | break; | ||
310 | case INET_DIAG_BC_AUTO: | ||
311 | yes = !(entry->userlocks & SOCK_BINDPORT_LOCK); | ||
312 | break; | ||
313 | case INET_DIAG_BC_S_COND: | ||
314 | case INET_DIAG_BC_D_COND: { | ||
315 | struct inet_diag_hostcond *cond; | ||
316 | u32 *addr; | ||
317 | |||
318 | cond = (struct inet_diag_hostcond *)(op + 1); | ||
319 | if (cond->port != -1 && | ||
320 | cond->port != (op->code == INET_DIAG_BC_S_COND ? | ||
321 | entry->sport : entry->dport)) { | ||
322 | yes = 0; | ||
323 | break; | ||
324 | } | ||
325 | |||
326 | if (cond->prefix_len == 0) | ||
327 | break; | ||
328 | |||
329 | if (op->code == INET_DIAG_BC_S_COND) | ||
330 | addr = entry->saddr; | ||
331 | else | ||
332 | addr = entry->daddr; | ||
333 | |||
334 | if (bitstring_match(addr, cond->addr, cond->prefix_len)) | ||
335 | break; | ||
336 | if (entry->family == AF_INET6 && | ||
337 | cond->family == AF_INET) { | ||
338 | if (addr[0] == 0 && addr[1] == 0 && | ||
339 | addr[2] == htonl(0xffff) && | ||
340 | bitstring_match(addr + 3, cond->addr, | ||
341 | cond->prefix_len)) | ||
342 | break; | ||
343 | } | ||
344 | yes = 0; | ||
345 | break; | ||
346 | } | ||
347 | } | ||
348 | |||
349 | if (yes) { | ||
350 | len -= op->yes; | ||
351 | bc += op->yes; | ||
352 | } else { | ||
353 | len -= op->no; | ||
354 | bc += op->no; | ||
355 | } | ||
356 | } | ||
357 | return (len == 0); | ||
358 | } | ||
359 | |||
360 | static int valid_cc(const void *bc, int len, int cc) | ||
361 | { | ||
362 | while (len >= 0) { | ||
363 | const struct inet_diag_bc_op *op = bc; | ||
364 | |||
365 | if (cc > len) | ||
366 | return 0; | ||
367 | if (cc == len) | ||
368 | return 1; | ||
369 | if (op->yes < 4) | ||
370 | return 0; | ||
371 | len -= op->yes; | ||
372 | bc += op->yes; | ||
373 | } | ||
374 | return 0; | ||
375 | } | ||
376 | |||
377 | static int inet_diag_bc_audit(const void *bytecode, int bytecode_len) | ||
378 | { | ||
379 | const unsigned char *bc = bytecode; | ||
380 | int len = bytecode_len; | ||
381 | |||
382 | while (len > 0) { | ||
383 | struct inet_diag_bc_op *op = (struct inet_diag_bc_op *)bc; | ||
384 | |||
385 | //printk("BC: %d %d %d {%d} / %d\n", op->code, op->yes, op->no, op[1].no, len); | ||
386 | switch (op->code) { | ||
387 | case INET_DIAG_BC_AUTO: | ||
388 | case INET_DIAG_BC_S_COND: | ||
389 | case INET_DIAG_BC_D_COND: | ||
390 | case INET_DIAG_BC_S_GE: | ||
391 | case INET_DIAG_BC_S_LE: | ||
392 | case INET_DIAG_BC_D_GE: | ||
393 | case INET_DIAG_BC_D_LE: | ||
394 | if (op->yes < 4 || op->yes > len + 4) | ||
395 | return -EINVAL; | ||
396 | case INET_DIAG_BC_JMP: | ||
397 | if (op->no < 4 || op->no > len + 4) | ||
398 | return -EINVAL; | ||
399 | if (op->no < len && | ||
400 | !valid_cc(bytecode, bytecode_len, len - op->no)) | ||
401 | return -EINVAL; | ||
402 | break; | ||
403 | case INET_DIAG_BC_NOP: | ||
404 | if (op->yes < 4 || op->yes > len + 4) | ||
405 | return -EINVAL; | ||
406 | break; | ||
407 | default: | ||
408 | return -EINVAL; | ||
409 | } | ||
410 | bc += op->yes; | ||
411 | len -= op->yes; | ||
412 | } | ||
413 | return len == 0 ? 0 : -EINVAL; | ||
414 | } | ||
415 | |||
416 | static int inet_diag_dump_sock(struct sk_buff *skb, struct sock *sk, | ||
417 | struct netlink_callback *cb) | ||
418 | { | ||
419 | struct inet_diag_req *r = NLMSG_DATA(cb->nlh); | ||
420 | |||
421 | if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) { | ||
422 | struct inet_diag_entry entry; | ||
423 | struct rtattr *bc = (struct rtattr *)(r + 1); | ||
424 | struct inet_sock *inet = inet_sk(sk); | ||
425 | |||
426 | entry.family = sk->sk_family; | ||
427 | #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) | ||
428 | if (entry.family == AF_INET6) { | ||
429 | struct ipv6_pinfo *np = inet6_sk(sk); | ||
430 | |||
431 | entry.saddr = np->rcv_saddr.s6_addr32; | ||
432 | entry.daddr = np->daddr.s6_addr32; | ||
433 | } else | ||
434 | #endif | ||
435 | { | ||
436 | entry.saddr = &inet->rcv_saddr; | ||
437 | entry.daddr = &inet->daddr; | ||
438 | } | ||
439 | entry.sport = inet->num; | ||
440 | entry.dport = ntohs(inet->dport); | ||
441 | entry.userlocks = sk->sk_userlocks; | ||
442 | |||
443 | if (!inet_diag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), &entry)) | ||
444 | return 0; | ||
445 | } | ||
446 | |||
447 | return inet_diag_fill(skb, sk, r->idiag_ext, NETLINK_CB(cb->skb).pid, | ||
448 | cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); | ||
449 | } | ||
450 | |||
451 | static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, | ||
452 | struct request_sock *req, | ||
453 | u32 pid, u32 seq, | ||
454 | const struct nlmsghdr *unlh) | ||
455 | { | ||
456 | const struct inet_request_sock *ireq = inet_rsk(req); | ||
457 | struct inet_sock *inet = inet_sk(sk); | ||
458 | unsigned char *b = skb->tail; | ||
459 | struct inet_diag_msg *r; | ||
460 | struct nlmsghdr *nlh; | ||
461 | long tmo; | ||
462 | |||
463 | nlh = NLMSG_PUT(skb, pid, seq, unlh->nlmsg_type, sizeof(*r)); | ||
464 | nlh->nlmsg_flags = NLM_F_MULTI; | ||
465 | r = NLMSG_DATA(nlh); | ||
466 | |||
467 | r->idiag_family = sk->sk_family; | ||
468 | r->idiag_state = TCP_SYN_RECV; | ||
469 | r->idiag_timer = 1; | ||
470 | r->idiag_retrans = req->retrans; | ||
471 | |||
472 | r->id.idiag_if = sk->sk_bound_dev_if; | ||
473 | r->id.idiag_cookie[0] = (u32)(unsigned long)req; | ||
474 | r->id.idiag_cookie[1] = (u32)(((unsigned long)req >> 31) >> 1); | ||
475 | |||
476 | tmo = req->expires - jiffies; | ||
477 | if (tmo < 0) | ||
478 | tmo = 0; | ||
479 | |||
480 | r->id.idiag_sport = inet->sport; | ||
481 | r->id.idiag_dport = ireq->rmt_port; | ||
482 | r->id.idiag_src[0] = ireq->loc_addr; | ||
483 | r->id.idiag_dst[0] = ireq->rmt_addr; | ||
484 | r->idiag_expires = jiffies_to_msecs(tmo); | ||
485 | r->idiag_rqueue = 0; | ||
486 | r->idiag_wqueue = 0; | ||
487 | r->idiag_uid = sock_i_uid(sk); | ||
488 | r->idiag_inode = 0; | ||
489 | #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) | ||
490 | if (r->idiag_family == AF_INET6) { | ||
491 | ipv6_addr_copy((struct in6_addr *)r->id.idiag_src, | ||
492 | &tcp6_rsk(req)->loc_addr); | ||
493 | ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst, | ||
494 | &tcp6_rsk(req)->rmt_addr); | ||
495 | } | ||
496 | #endif | ||
497 | nlh->nlmsg_len = skb->tail - b; | ||
498 | |||
499 | return skb->len; | ||
500 | |||
501 | nlmsg_failure: | ||
502 | skb_trim(skb, b - skb->data); | ||
503 | return -1; | ||
504 | } | ||
505 | |||
506 | static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, | ||
507 | struct netlink_callback *cb) | ||
508 | { | ||
509 | struct inet_diag_entry entry; | ||
510 | struct inet_diag_req *r = NLMSG_DATA(cb->nlh); | ||
511 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
512 | struct listen_sock *lopt; | ||
513 | struct rtattr *bc = NULL; | ||
514 | struct inet_sock *inet = inet_sk(sk); | ||
515 | int j, s_j; | ||
516 | int reqnum, s_reqnum; | ||
517 | int err = 0; | ||
518 | |||
519 | s_j = cb->args[3]; | ||
520 | s_reqnum = cb->args[4]; | ||
521 | |||
522 | if (s_j > 0) | ||
523 | s_j--; | ||
524 | |||
525 | entry.family = sk->sk_family; | ||
526 | |||
527 | read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); | ||
528 | |||
529 | lopt = icsk->icsk_accept_queue.listen_opt; | ||
530 | if (!lopt || !lopt->qlen) | ||
531 | goto out; | ||
532 | |||
533 | if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) { | ||
534 | bc = (struct rtattr *)(r + 1); | ||
535 | entry.sport = inet->num; | ||
536 | entry.userlocks = sk->sk_userlocks; | ||
537 | } | ||
538 | |||
539 | for (j = s_j; j < lopt->nr_table_entries; j++) { | ||
540 | struct request_sock *req, *head = lopt->syn_table[j]; | ||
541 | |||
542 | reqnum = 0; | ||
543 | for (req = head; req; reqnum++, req = req->dl_next) { | ||
544 | struct inet_request_sock *ireq = inet_rsk(req); | ||
545 | |||
546 | if (reqnum < s_reqnum) | ||
547 | continue; | ||
548 | if (r->id.idiag_dport != ireq->rmt_port && | ||
549 | r->id.idiag_dport) | ||
550 | continue; | ||
551 | |||
552 | if (bc) { | ||
553 | entry.saddr = | ||
554 | #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) | ||
555 | (entry.family == AF_INET6) ? | ||
556 | tcp6_rsk(req)->loc_addr.s6_addr32 : | ||
557 | #endif | ||
558 | &ireq->loc_addr; | ||
559 | entry.daddr = | ||
560 | #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) | ||
561 | (entry.family == AF_INET6) ? | ||
562 | tcp6_rsk(req)->rmt_addr.s6_addr32 : | ||
563 | #endif | ||
564 | &ireq->rmt_addr; | ||
565 | entry.dport = ntohs(ireq->rmt_port); | ||
566 | |||
567 | if (!inet_diag_bc_run(RTA_DATA(bc), | ||
568 | RTA_PAYLOAD(bc), &entry)) | ||
569 | continue; | ||
570 | } | ||
571 | |||
572 | err = inet_diag_fill_req(skb, sk, req, | ||
573 | NETLINK_CB(cb->skb).pid, | ||
574 | cb->nlh->nlmsg_seq, cb->nlh); | ||
575 | if (err < 0) { | ||
576 | cb->args[3] = j + 1; | ||
577 | cb->args[4] = reqnum; | ||
578 | goto out; | ||
579 | } | ||
580 | } | ||
581 | |||
582 | s_reqnum = 0; | ||
583 | } | ||
584 | |||
585 | out: | ||
586 | read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); | ||
587 | |||
588 | return err; | ||
589 | } | ||
590 | |||
591 | static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) | ||
592 | { | ||
593 | int i, num; | ||
594 | int s_i, s_num; | ||
595 | struct inet_diag_req *r = NLMSG_DATA(cb->nlh); | ||
596 | const struct inet_diag_handler *handler; | ||
597 | struct inet_hashinfo *hashinfo; | ||
598 | |||
599 | handler = inet_diag_table[cb->nlh->nlmsg_type]; | ||
600 | BUG_ON(handler == NULL); | ||
601 | hashinfo = handler->idiag_hashinfo; | ||
602 | |||
603 | s_i = cb->args[1]; | ||
604 | s_num = num = cb->args[2]; | ||
605 | |||
606 | if (cb->args[0] == 0) { | ||
607 | if (!(r->idiag_states & (TCPF_LISTEN | TCPF_SYN_RECV))) | ||
608 | goto skip_listen_ht; | ||
609 | |||
610 | inet_listen_lock(hashinfo); | ||
611 | for (i = s_i; i < INET_LHTABLE_SIZE; i++) { | ||
612 | struct sock *sk; | ||
613 | struct hlist_node *node; | ||
614 | |||
615 | num = 0; | ||
616 | sk_for_each(sk, node, &hashinfo->listening_hash[i]) { | ||
617 | struct inet_sock *inet = inet_sk(sk); | ||
618 | |||
619 | if (num < s_num) { | ||
620 | num++; | ||
621 | continue; | ||
622 | } | ||
623 | |||
624 | if (r->id.idiag_sport != inet->sport && | ||
625 | r->id.idiag_sport) | ||
626 | goto next_listen; | ||
627 | |||
628 | if (!(r->idiag_states & TCPF_LISTEN) || | ||
629 | r->id.idiag_dport || | ||
630 | cb->args[3] > 0) | ||
631 | goto syn_recv; | ||
632 | |||
633 | if (inet_diag_dump_sock(skb, sk, cb) < 0) { | ||
634 | inet_listen_unlock(hashinfo); | ||
635 | goto done; | ||
636 | } | ||
637 | |||
638 | syn_recv: | ||
639 | if (!(r->idiag_states & TCPF_SYN_RECV)) | ||
640 | goto next_listen; | ||
641 | |||
642 | if (inet_diag_dump_reqs(skb, sk, cb) < 0) { | ||
643 | inet_listen_unlock(hashinfo); | ||
644 | goto done; | ||
645 | } | ||
646 | |||
647 | next_listen: | ||
648 | cb->args[3] = 0; | ||
649 | cb->args[4] = 0; | ||
650 | ++num; | ||
651 | } | ||
652 | |||
653 | s_num = 0; | ||
654 | cb->args[3] = 0; | ||
655 | cb->args[4] = 0; | ||
656 | } | ||
657 | inet_listen_unlock(hashinfo); | ||
658 | skip_listen_ht: | ||
659 | cb->args[0] = 1; | ||
660 | s_i = num = s_num = 0; | ||
661 | } | ||
662 | |||
663 | if (!(r->idiag_states & ~(TCPF_LISTEN | TCPF_SYN_RECV))) | ||
664 | return skb->len; | ||
665 | |||
666 | for (i = s_i; i < hashinfo->ehash_size; i++) { | ||
667 | struct inet_ehash_bucket *head = &hashinfo->ehash[i]; | ||
668 | struct sock *sk; | ||
669 | struct hlist_node *node; | ||
670 | |||
671 | if (i > s_i) | ||
672 | s_num = 0; | ||
673 | |||
674 | read_lock_bh(&head->lock); | ||
675 | |||
676 | num = 0; | ||
677 | sk_for_each(sk, node, &head->chain) { | ||
678 | struct inet_sock *inet = inet_sk(sk); | ||
679 | |||
680 | if (num < s_num) | ||
681 | goto next_normal; | ||
682 | if (!(r->idiag_states & (1 << sk->sk_state))) | ||
683 | goto next_normal; | ||
684 | if (r->id.idiag_sport != inet->sport && | ||
685 | r->id.idiag_sport) | ||
686 | goto next_normal; | ||
687 | if (r->id.idiag_dport != inet->dport && r->id.idiag_dport) | ||
688 | goto next_normal; | ||
689 | if (inet_diag_dump_sock(skb, sk, cb) < 0) { | ||
690 | read_unlock_bh(&head->lock); | ||
691 | goto done; | ||
692 | } | ||
693 | next_normal: | ||
694 | ++num; | ||
695 | } | ||
696 | |||
697 | if (r->idiag_states & TCPF_TIME_WAIT) { | ||
698 | sk_for_each(sk, node, | ||
699 | &hashinfo->ehash[i + hashinfo->ehash_size].chain) { | ||
700 | struct inet_sock *inet = inet_sk(sk); | ||
701 | |||
702 | if (num < s_num) | ||
703 | goto next_dying; | ||
704 | if (r->id.idiag_sport != inet->sport && | ||
705 | r->id.idiag_sport) | ||
706 | goto next_dying; | ||
707 | if (r->id.idiag_dport != inet->dport && | ||
708 | r->id.idiag_dport) | ||
709 | goto next_dying; | ||
710 | if (inet_diag_dump_sock(skb, sk, cb) < 0) { | ||
711 | read_unlock_bh(&head->lock); | ||
712 | goto done; | ||
713 | } | ||
714 | next_dying: | ||
715 | ++num; | ||
716 | } | ||
717 | } | ||
718 | read_unlock_bh(&head->lock); | ||
719 | } | ||
720 | |||
721 | done: | ||
722 | cb->args[1] = i; | ||
723 | cb->args[2] = num; | ||
724 | return skb->len; | ||
725 | } | ||
726 | |||
727 | static int inet_diag_dump_done(struct netlink_callback *cb) | ||
728 | { | ||
729 | return 0; | ||
730 | } | ||
731 | |||
732 | |||
733 | static __inline__ int | ||
734 | inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | ||
735 | { | ||
736 | if (!(nlh->nlmsg_flags&NLM_F_REQUEST)) | ||
737 | return 0; | ||
738 | |||
739 | if (nlh->nlmsg_type >= INET_DIAG_GETSOCK_MAX) | ||
740 | goto err_inval; | ||
741 | |||
742 | if (inet_diag_table[nlh->nlmsg_type] == NULL) | ||
743 | return -ENOENT; | ||
744 | |||
745 | if (NLMSG_LENGTH(sizeof(struct inet_diag_req)) > skb->len) | ||
746 | goto err_inval; | ||
747 | |||
748 | if (nlh->nlmsg_flags&NLM_F_DUMP) { | ||
749 | if (nlh->nlmsg_len > | ||
750 | (4 + NLMSG_SPACE(sizeof(struct inet_diag_req)))) { | ||
751 | struct rtattr *rta = (void *)(NLMSG_DATA(nlh) + | ||
752 | sizeof(struct inet_diag_req)); | ||
753 | if (rta->rta_type != INET_DIAG_REQ_BYTECODE || | ||
754 | rta->rta_len < 8 || | ||
755 | rta->rta_len > | ||
756 | (nlh->nlmsg_len - | ||
757 | NLMSG_SPACE(sizeof(struct inet_diag_req)))) | ||
758 | goto err_inval; | ||
759 | if (inet_diag_bc_audit(RTA_DATA(rta), RTA_PAYLOAD(rta))) | ||
760 | goto err_inval; | ||
761 | } | ||
762 | return netlink_dump_start(idiagnl, skb, nlh, | ||
763 | inet_diag_dump, | ||
764 | inet_diag_dump_done); | ||
765 | } else { | ||
766 | return inet_diag_get_exact(skb, nlh); | ||
767 | } | ||
768 | |||
769 | err_inval: | ||
770 | return -EINVAL; | ||
771 | } | ||
772 | |||
773 | |||
774 | static inline void inet_diag_rcv_skb(struct sk_buff *skb) | ||
775 | { | ||
776 | int err; | ||
777 | struct nlmsghdr * nlh; | ||
778 | |||
779 | if (skb->len >= NLMSG_SPACE(0)) { | ||
780 | nlh = (struct nlmsghdr *)skb->data; | ||
781 | if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len) | ||
782 | return; | ||
783 | err = inet_diag_rcv_msg(skb, nlh); | ||
784 | if (err || nlh->nlmsg_flags & NLM_F_ACK) | ||
785 | netlink_ack(skb, nlh, err); | ||
786 | } | ||
787 | } | ||
788 | |||
789 | static void inet_diag_rcv(struct sock *sk, int len) | ||
790 | { | ||
791 | struct sk_buff *skb; | ||
792 | unsigned int qlen = skb_queue_len(&sk->sk_receive_queue); | ||
793 | |||
794 | while (qlen-- && (skb = skb_dequeue(&sk->sk_receive_queue))) { | ||
795 | inet_diag_rcv_skb(skb); | ||
796 | kfree_skb(skb); | ||
797 | } | ||
798 | } | ||
799 | |||
800 | static DEFINE_SPINLOCK(inet_diag_register_lock); | ||
801 | |||
802 | int inet_diag_register(const struct inet_diag_handler *h) | ||
803 | { | ||
804 | const __u16 type = h->idiag_type; | ||
805 | int err = -EINVAL; | ||
806 | |||
807 | if (type >= INET_DIAG_GETSOCK_MAX) | ||
808 | goto out; | ||
809 | |||
810 | spin_lock(&inet_diag_register_lock); | ||
811 | err = -EEXIST; | ||
812 | if (inet_diag_table[type] == NULL) { | ||
813 | inet_diag_table[type] = h; | ||
814 | err = 0; | ||
815 | } | ||
816 | spin_unlock(&inet_diag_register_lock); | ||
817 | out: | ||
818 | return err; | ||
819 | } | ||
820 | EXPORT_SYMBOL_GPL(inet_diag_register); | ||
821 | |||
822 | void inet_diag_unregister(const struct inet_diag_handler *h) | ||
823 | { | ||
824 | const __u16 type = h->idiag_type; | ||
825 | |||
826 | if (type >= INET_DIAG_GETSOCK_MAX) | ||
827 | return; | ||
828 | |||
829 | spin_lock(&inet_diag_register_lock); | ||
830 | inet_diag_table[type] = NULL; | ||
831 | spin_unlock(&inet_diag_register_lock); | ||
832 | |||
833 | synchronize_rcu(); | ||
834 | } | ||
835 | EXPORT_SYMBOL_GPL(inet_diag_unregister); | ||
836 | |||
837 | static int __init inet_diag_init(void) | ||
838 | { | ||
839 | const int inet_diag_table_size = (INET_DIAG_GETSOCK_MAX * | ||
840 | sizeof(struct inet_diag_handler *)); | ||
841 | int err = -ENOMEM; | ||
842 | |||
843 | inet_diag_table = kmalloc(inet_diag_table_size, GFP_KERNEL); | ||
844 | if (!inet_diag_table) | ||
845 | goto out; | ||
846 | |||
847 | memset(inet_diag_table, 0, inet_diag_table_size); | ||
848 | idiagnl = netlink_kernel_create(NETLINK_INET_DIAG, 0, inet_diag_rcv, | ||
849 | THIS_MODULE); | ||
850 | if (idiagnl == NULL) | ||
851 | goto out_free_table; | ||
852 | err = 0; | ||
853 | out: | ||
854 | return err; | ||
855 | out_free_table: | ||
856 | kfree(inet_diag_table); | ||
857 | goto out; | ||
858 | } | ||
859 | |||
860 | static void __exit inet_diag_exit(void) | ||
861 | { | ||
862 | sock_release(idiagnl->sk_socket); | ||
863 | kfree(inet_diag_table); | ||
864 | } | ||
865 | |||
866 | module_init(inet_diag_init); | ||
867 | module_exit(inet_diag_exit); | ||
868 | MODULE_LICENSE("GPL"); | ||
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c new file mode 100644 index 000000000000..e8d29fe736d2 --- /dev/null +++ b/net/ipv4/inet_hashtables.c | |||
@@ -0,0 +1,165 @@ | |||
1 | /* | ||
2 | * INET An implementation of the TCP/IP protocol suite for the LINUX | ||
3 | * operating system. INET is implemented using the BSD Socket | ||
4 | * interface as the means of communication with the user level. | ||
5 | * | ||
6 | * Generic INET transport hashtables | ||
7 | * | ||
8 | * Authors: Lotsa people, from code originally in tcp | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public License | ||
12 | * as published by the Free Software Foundation; either version | ||
13 | * 2 of the License, or (at your option) any later version. | ||
14 | */ | ||
15 | |||
16 | #include <linux/config.h> | ||
17 | #include <linux/module.h> | ||
18 | #include <linux/sched.h> | ||
19 | #include <linux/slab.h> | ||
20 | #include <linux/wait.h> | ||
21 | |||
22 | #include <net/inet_connection_sock.h> | ||
23 | #include <net/inet_hashtables.h> | ||
24 | |||
25 | /* | ||
26 | * Allocate and initialize a new local port bind bucket. | ||
27 | * The bindhash mutex for snum's hash chain must be held here. | ||
28 | */ | ||
29 | struct inet_bind_bucket *inet_bind_bucket_create(kmem_cache_t *cachep, | ||
30 | struct inet_bind_hashbucket *head, | ||
31 | const unsigned short snum) | ||
32 | { | ||
33 | struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, SLAB_ATOMIC); | ||
34 | |||
35 | if (tb != NULL) { | ||
36 | tb->port = snum; | ||
37 | tb->fastreuse = 0; | ||
38 | INIT_HLIST_HEAD(&tb->owners); | ||
39 | hlist_add_head(&tb->node, &head->chain); | ||
40 | } | ||
41 | return tb; | ||
42 | } | ||
43 | |||
44 | EXPORT_SYMBOL(inet_bind_bucket_create); | ||
45 | |||
46 | /* | ||
47 | * Caller must hold hashbucket lock for this tb with local BH disabled | ||
48 | */ | ||
49 | void inet_bind_bucket_destroy(kmem_cache_t *cachep, struct inet_bind_bucket *tb) | ||
50 | { | ||
51 | if (hlist_empty(&tb->owners)) { | ||
52 | __hlist_del(&tb->node); | ||
53 | kmem_cache_free(cachep, tb); | ||
54 | } | ||
55 | } | ||
56 | |||
57 | void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, | ||
58 | const unsigned short snum) | ||
59 | { | ||
60 | inet_sk(sk)->num = snum; | ||
61 | sk_add_bind_node(sk, &tb->owners); | ||
62 | inet_csk(sk)->icsk_bind_hash = tb; | ||
63 | } | ||
64 | |||
65 | EXPORT_SYMBOL(inet_bind_hash); | ||
66 | |||
67 | /* | ||
68 | * Get rid of any references to a local port held by the given sock. | ||
69 | */ | ||
70 | static void __inet_put_port(struct inet_hashinfo *hashinfo, struct sock *sk) | ||
71 | { | ||
72 | const int bhash = inet_bhashfn(inet_sk(sk)->num, hashinfo->bhash_size); | ||
73 | struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash]; | ||
74 | struct inet_bind_bucket *tb; | ||
75 | |||
76 | spin_lock(&head->lock); | ||
77 | tb = inet_csk(sk)->icsk_bind_hash; | ||
78 | __sk_del_bind_node(sk); | ||
79 | inet_csk(sk)->icsk_bind_hash = NULL; | ||
80 | inet_sk(sk)->num = 0; | ||
81 | inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); | ||
82 | spin_unlock(&head->lock); | ||
83 | } | ||
84 | |||
85 | void inet_put_port(struct inet_hashinfo *hashinfo, struct sock *sk) | ||
86 | { | ||
87 | local_bh_disable(); | ||
88 | __inet_put_port(hashinfo, sk); | ||
89 | local_bh_enable(); | ||
90 | } | ||
91 | |||
92 | EXPORT_SYMBOL(inet_put_port); | ||
93 | |||
94 | /* | ||
95 | * This lock without WQ_FLAG_EXCLUSIVE is good on UP and it can be very bad on SMP. | ||
96 | * Look, when several writers sleep and reader wakes them up, all but one | ||
97 | * immediately hit write lock and grab all the cpus. Exclusive sleep solves | ||
98 | * this, _but_ remember, it adds useless work on UP machines (wake up each | ||
99 | * exclusive lock release). It should be ifdefed really. | ||
100 | */ | ||
101 | void inet_listen_wlock(struct inet_hashinfo *hashinfo) | ||
102 | { | ||
103 | write_lock(&hashinfo->lhash_lock); | ||
104 | |||
105 | if (atomic_read(&hashinfo->lhash_users)) { | ||
106 | DEFINE_WAIT(wait); | ||
107 | |||
108 | for (;;) { | ||
109 | prepare_to_wait_exclusive(&hashinfo->lhash_wait, | ||
110 | &wait, TASK_UNINTERRUPTIBLE); | ||
111 | if (!atomic_read(&hashinfo->lhash_users)) | ||
112 | break; | ||
113 | write_unlock_bh(&hashinfo->lhash_lock); | ||
114 | schedule(); | ||
115 | write_lock_bh(&hashinfo->lhash_lock); | ||
116 | } | ||
117 | |||
118 | finish_wait(&hashinfo->lhash_wait, &wait); | ||
119 | } | ||
120 | } | ||
121 | |||
122 | EXPORT_SYMBOL(inet_listen_wlock); | ||
123 | |||
124 | /* | ||
125 | * Don't inline this cruft. Here are some nice properties to exploit here. The | ||
126 | * BSD API does not allow a listening sock to specify the remote port nor the | ||
127 | * remote address for the connection. So always assume those are both | ||
128 | * wildcarded during the search since they can never be otherwise. | ||
129 | */ | ||
130 | struct sock *__inet_lookup_listener(const struct hlist_head *head, const u32 daddr, | ||
131 | const unsigned short hnum, const int dif) | ||
132 | { | ||
133 | struct sock *result = NULL, *sk; | ||
134 | const struct hlist_node *node; | ||
135 | int hiscore = -1; | ||
136 | |||
137 | sk_for_each(sk, node, head) { | ||
138 | const struct inet_sock *inet = inet_sk(sk); | ||
139 | |||
140 | if (inet->num == hnum && !ipv6_only_sock(sk)) { | ||
141 | const __u32 rcv_saddr = inet->rcv_saddr; | ||
142 | int score = sk->sk_family == PF_INET ? 1 : 0; | ||
143 | |||
144 | if (rcv_saddr) { | ||
145 | if (rcv_saddr != daddr) | ||
146 | continue; | ||
147 | score += 2; | ||
148 | } | ||
149 | if (sk->sk_bound_dev_if) { | ||
150 | if (sk->sk_bound_dev_if != dif) | ||
151 | continue; | ||
152 | score += 2; | ||
153 | } | ||
154 | if (score == 5) | ||
155 | return sk; | ||
156 | if (score > hiscore) { | ||
157 | hiscore = score; | ||
158 | result = sk; | ||
159 | } | ||
160 | } | ||
161 | } | ||
162 | return result; | ||
163 | } | ||
164 | |||
165 | EXPORT_SYMBOL_GPL(__inet_lookup_listener); | ||
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c new file mode 100644 index 000000000000..4d1502a49852 --- /dev/null +++ b/net/ipv4/inet_timewait_sock.c | |||
@@ -0,0 +1,384 @@ | |||
1 | /* | ||
2 | * INET An implementation of the TCP/IP protocol suite for the LINUX | ||
3 | * operating system. INET is implemented using the BSD Socket | ||
4 | * interface as the means of communication with the user level. | ||
5 | * | ||
6 | * Generic TIME_WAIT sockets functions | ||
7 | * | ||
8 | * From code orinally in TCP | ||
9 | */ | ||
10 | |||
11 | #include <linux/config.h> | ||
12 | |||
13 | #include <net/inet_hashtables.h> | ||
14 | #include <net/inet_timewait_sock.h> | ||
15 | #include <net/ip.h> | ||
16 | |||
17 | /* Must be called with locally disabled BHs. */ | ||
18 | void __inet_twsk_kill(struct inet_timewait_sock *tw, struct inet_hashinfo *hashinfo) | ||
19 | { | ||
20 | struct inet_bind_hashbucket *bhead; | ||
21 | struct inet_bind_bucket *tb; | ||
22 | /* Unlink from established hashes. */ | ||
23 | struct inet_ehash_bucket *ehead = &hashinfo->ehash[tw->tw_hashent]; | ||
24 | |||
25 | write_lock(&ehead->lock); | ||
26 | if (hlist_unhashed(&tw->tw_node)) { | ||
27 | write_unlock(&ehead->lock); | ||
28 | return; | ||
29 | } | ||
30 | __hlist_del(&tw->tw_node); | ||
31 | sk_node_init(&tw->tw_node); | ||
32 | write_unlock(&ehead->lock); | ||
33 | |||
34 | /* Disassociate with bind bucket. */ | ||
35 | bhead = &hashinfo->bhash[inet_bhashfn(tw->tw_num, hashinfo->bhash_size)]; | ||
36 | spin_lock(&bhead->lock); | ||
37 | tb = tw->tw_tb; | ||
38 | __hlist_del(&tw->tw_bind_node); | ||
39 | tw->tw_tb = NULL; | ||
40 | inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); | ||
41 | spin_unlock(&bhead->lock); | ||
42 | #ifdef SOCK_REFCNT_DEBUG | ||
43 | if (atomic_read(&tw->tw_refcnt) != 1) { | ||
44 | printk(KERN_DEBUG "%s timewait_sock %p refcnt=%d\n", | ||
45 | tw->tw_prot->name, tw, atomic_read(&tw->tw_refcnt)); | ||
46 | } | ||
47 | #endif | ||
48 | inet_twsk_put(tw); | ||
49 | } | ||
50 | |||
51 | EXPORT_SYMBOL_GPL(__inet_twsk_kill); | ||
52 | |||
53 | /* | ||
54 | * Enter the time wait state. This is called with locally disabled BH. | ||
55 | * Essentially we whip up a timewait bucket, copy the relevant info into it | ||
56 | * from the SK, and mess with hash chains and list linkage. | ||
57 | */ | ||
58 | void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, | ||
59 | struct inet_hashinfo *hashinfo) | ||
60 | { | ||
61 | const struct inet_sock *inet = inet_sk(sk); | ||
62 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
63 | struct inet_ehash_bucket *ehead = &hashinfo->ehash[sk->sk_hashent]; | ||
64 | struct inet_bind_hashbucket *bhead; | ||
65 | /* Step 1: Put TW into bind hash. Original socket stays there too. | ||
66 | Note, that any socket with inet->num != 0 MUST be bound in | ||
67 | binding cache, even if it is closed. | ||
68 | */ | ||
69 | bhead = &hashinfo->bhash[inet_bhashfn(inet->num, hashinfo->bhash_size)]; | ||
70 | spin_lock(&bhead->lock); | ||
71 | tw->tw_tb = icsk->icsk_bind_hash; | ||
72 | BUG_TRAP(icsk->icsk_bind_hash); | ||
73 | inet_twsk_add_bind_node(tw, &tw->tw_tb->owners); | ||
74 | spin_unlock(&bhead->lock); | ||
75 | |||
76 | write_lock(&ehead->lock); | ||
77 | |||
78 | /* Step 2: Remove SK from established hash. */ | ||
79 | if (__sk_del_node_init(sk)) | ||
80 | sock_prot_dec_use(sk->sk_prot); | ||
81 | |||
82 | /* Step 3: Hash TW into TIMEWAIT half of established hash table. */ | ||
83 | inet_twsk_add_node(tw, &(ehead + hashinfo->ehash_size)->chain); | ||
84 | atomic_inc(&tw->tw_refcnt); | ||
85 | |||
86 | write_unlock(&ehead->lock); | ||
87 | } | ||
88 | |||
89 | EXPORT_SYMBOL_GPL(__inet_twsk_hashdance); | ||
90 | |||
91 | struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int state) | ||
92 | { | ||
93 | struct inet_timewait_sock *tw = kmem_cache_alloc(sk->sk_prot_creator->twsk_slab, | ||
94 | SLAB_ATOMIC); | ||
95 | if (tw != NULL) { | ||
96 | const struct inet_sock *inet = inet_sk(sk); | ||
97 | |||
98 | /* Give us an identity. */ | ||
99 | tw->tw_daddr = inet->daddr; | ||
100 | tw->tw_rcv_saddr = inet->rcv_saddr; | ||
101 | tw->tw_bound_dev_if = sk->sk_bound_dev_if; | ||
102 | tw->tw_num = inet->num; | ||
103 | tw->tw_state = TCP_TIME_WAIT; | ||
104 | tw->tw_substate = state; | ||
105 | tw->tw_sport = inet->sport; | ||
106 | tw->tw_dport = inet->dport; | ||
107 | tw->tw_family = sk->sk_family; | ||
108 | tw->tw_reuse = sk->sk_reuse; | ||
109 | tw->tw_hashent = sk->sk_hashent; | ||
110 | tw->tw_ipv6only = 0; | ||
111 | tw->tw_prot = sk->sk_prot_creator; | ||
112 | atomic_set(&tw->tw_refcnt, 1); | ||
113 | inet_twsk_dead_node_init(tw); | ||
114 | } | ||
115 | |||
116 | return tw; | ||
117 | } | ||
118 | |||
119 | EXPORT_SYMBOL_GPL(inet_twsk_alloc); | ||
120 | |||
121 | /* Returns non-zero if quota exceeded. */ | ||
122 | static int inet_twdr_do_twkill_work(struct inet_timewait_death_row *twdr, | ||
123 | const int slot) | ||
124 | { | ||
125 | struct inet_timewait_sock *tw; | ||
126 | struct hlist_node *node; | ||
127 | unsigned int killed; | ||
128 | int ret; | ||
129 | |||
130 | /* NOTE: compare this to previous version where lock | ||
131 | * was released after detaching chain. It was racy, | ||
132 | * because tw buckets are scheduled in not serialized context | ||
133 | * in 2.3 (with netfilter), and with softnet it is common, because | ||
134 | * soft irqs are not sequenced. | ||
135 | */ | ||
136 | killed = 0; | ||
137 | ret = 0; | ||
138 | rescan: | ||
139 | inet_twsk_for_each_inmate(tw, node, &twdr->cells[slot]) { | ||
140 | __inet_twsk_del_dead_node(tw); | ||
141 | spin_unlock(&twdr->death_lock); | ||
142 | __inet_twsk_kill(tw, twdr->hashinfo); | ||
143 | inet_twsk_put(tw); | ||
144 | killed++; | ||
145 | spin_lock(&twdr->death_lock); | ||
146 | if (killed > INET_TWDR_TWKILL_QUOTA) { | ||
147 | ret = 1; | ||
148 | break; | ||
149 | } | ||
150 | |||
151 | /* While we dropped twdr->death_lock, another cpu may have | ||
152 | * killed off the next TW bucket in the list, therefore | ||
153 | * do a fresh re-read of the hlist head node with the | ||
154 | * lock reacquired. We still use the hlist traversal | ||
155 | * macro in order to get the prefetches. | ||
156 | */ | ||
157 | goto rescan; | ||
158 | } | ||
159 | |||
160 | twdr->tw_count -= killed; | ||
161 | NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITED, killed); | ||
162 | |||
163 | return ret; | ||
164 | } | ||
165 | |||
166 | void inet_twdr_hangman(unsigned long data) | ||
167 | { | ||
168 | struct inet_timewait_death_row *twdr; | ||
169 | int unsigned need_timer; | ||
170 | |||
171 | twdr = (struct inet_timewait_death_row *)data; | ||
172 | spin_lock(&twdr->death_lock); | ||
173 | |||
174 | if (twdr->tw_count == 0) | ||
175 | goto out; | ||
176 | |||
177 | need_timer = 0; | ||
178 | if (inet_twdr_do_twkill_work(twdr, twdr->slot)) { | ||
179 | twdr->thread_slots |= (1 << twdr->slot); | ||
180 | mb(); | ||
181 | schedule_work(&twdr->twkill_work); | ||
182 | need_timer = 1; | ||
183 | } else { | ||
184 | /* We purged the entire slot, anything left? */ | ||
185 | if (twdr->tw_count) | ||
186 | need_timer = 1; | ||
187 | } | ||
188 | twdr->slot = ((twdr->slot + 1) & (INET_TWDR_TWKILL_SLOTS - 1)); | ||
189 | if (need_timer) | ||
190 | mod_timer(&twdr->tw_timer, jiffies + twdr->period); | ||
191 | out: | ||
192 | spin_unlock(&twdr->death_lock); | ||
193 | } | ||
194 | |||
195 | EXPORT_SYMBOL_GPL(inet_twdr_hangman); | ||
196 | |||
197 | extern void twkill_slots_invalid(void); | ||
198 | |||
199 | void inet_twdr_twkill_work(void *data) | ||
200 | { | ||
201 | struct inet_timewait_death_row *twdr = data; | ||
202 | int i; | ||
203 | |||
204 | if ((INET_TWDR_TWKILL_SLOTS - 1) > (sizeof(twdr->thread_slots) * 8)) | ||
205 | twkill_slots_invalid(); | ||
206 | |||
207 | while (twdr->thread_slots) { | ||
208 | spin_lock_bh(&twdr->death_lock); | ||
209 | for (i = 0; i < INET_TWDR_TWKILL_SLOTS; i++) { | ||
210 | if (!(twdr->thread_slots & (1 << i))) | ||
211 | continue; | ||
212 | |||
213 | while (inet_twdr_do_twkill_work(twdr, i) != 0) { | ||
214 | if (need_resched()) { | ||
215 | spin_unlock_bh(&twdr->death_lock); | ||
216 | schedule(); | ||
217 | spin_lock_bh(&twdr->death_lock); | ||
218 | } | ||
219 | } | ||
220 | |||
221 | twdr->thread_slots &= ~(1 << i); | ||
222 | } | ||
223 | spin_unlock_bh(&twdr->death_lock); | ||
224 | } | ||
225 | } | ||
226 | |||
227 | EXPORT_SYMBOL_GPL(inet_twdr_twkill_work); | ||
228 | |||
229 | /* These are always called from BH context. See callers in | ||
230 | * tcp_input.c to verify this. | ||
231 | */ | ||
232 | |||
233 | /* This is for handling early-kills of TIME_WAIT sockets. */ | ||
234 | void inet_twsk_deschedule(struct inet_timewait_sock *tw, | ||
235 | struct inet_timewait_death_row *twdr) | ||
236 | { | ||
237 | spin_lock(&twdr->death_lock); | ||
238 | if (inet_twsk_del_dead_node(tw)) { | ||
239 | inet_twsk_put(tw); | ||
240 | if (--twdr->tw_count == 0) | ||
241 | del_timer(&twdr->tw_timer); | ||
242 | } | ||
243 | spin_unlock(&twdr->death_lock); | ||
244 | __inet_twsk_kill(tw, twdr->hashinfo); | ||
245 | } | ||
246 | |||
247 | EXPORT_SYMBOL(inet_twsk_deschedule); | ||
248 | |||
249 | void inet_twsk_schedule(struct inet_timewait_sock *tw, | ||
250 | struct inet_timewait_death_row *twdr, | ||
251 | const int timeo, const int timewait_len) | ||
252 | { | ||
253 | struct hlist_head *list; | ||
254 | int slot; | ||
255 | |||
256 | /* timeout := RTO * 3.5 | ||
257 | * | ||
258 | * 3.5 = 1+2+0.5 to wait for two retransmits. | ||
259 | * | ||
260 | * RATIONALE: if FIN arrived and we entered TIME-WAIT state, | ||
261 | * our ACK acking that FIN can be lost. If N subsequent retransmitted | ||
262 | * FINs (or previous seqments) are lost (probability of such event | ||
263 | * is p^(N+1), where p is probability to lose single packet and | ||
264 | * time to detect the loss is about RTO*(2^N - 1) with exponential | ||
265 | * backoff). Normal timewait length is calculated so, that we | ||
266 | * waited at least for one retransmitted FIN (maximal RTO is 120sec). | ||
267 | * [ BTW Linux. following BSD, violates this requirement waiting | ||
268 | * only for 60sec, we should wait at least for 240 secs. | ||
269 | * Well, 240 consumes too much of resources 8) | ||
270 | * ] | ||
271 | * This interval is not reduced to catch old duplicate and | ||
272 | * responces to our wandering segments living for two MSLs. | ||
273 | * However, if we use PAWS to detect | ||
274 | * old duplicates, we can reduce the interval to bounds required | ||
275 | * by RTO, rather than MSL. So, if peer understands PAWS, we | ||
276 | * kill tw bucket after 3.5*RTO (it is important that this number | ||
277 | * is greater than TS tick!) and detect old duplicates with help | ||
278 | * of PAWS. | ||
279 | */ | ||
280 | slot = (timeo + (1 << INET_TWDR_RECYCLE_TICK) - 1) >> INET_TWDR_RECYCLE_TICK; | ||
281 | |||
282 | spin_lock(&twdr->death_lock); | ||
283 | |||
284 | /* Unlink it, if it was scheduled */ | ||
285 | if (inet_twsk_del_dead_node(tw)) | ||
286 | twdr->tw_count--; | ||
287 | else | ||
288 | atomic_inc(&tw->tw_refcnt); | ||
289 | |||
290 | if (slot >= INET_TWDR_RECYCLE_SLOTS) { | ||
291 | /* Schedule to slow timer */ | ||
292 | if (timeo >= timewait_len) { | ||
293 | slot = INET_TWDR_TWKILL_SLOTS - 1; | ||
294 | } else { | ||
295 | slot = (timeo + twdr->period - 1) / twdr->period; | ||
296 | if (slot >= INET_TWDR_TWKILL_SLOTS) | ||
297 | slot = INET_TWDR_TWKILL_SLOTS - 1; | ||
298 | } | ||
299 | tw->tw_ttd = jiffies + timeo; | ||
300 | slot = (twdr->slot + slot) & (INET_TWDR_TWKILL_SLOTS - 1); | ||
301 | list = &twdr->cells[slot]; | ||
302 | } else { | ||
303 | tw->tw_ttd = jiffies + (slot << INET_TWDR_RECYCLE_TICK); | ||
304 | |||
305 | if (twdr->twcal_hand < 0) { | ||
306 | twdr->twcal_hand = 0; | ||
307 | twdr->twcal_jiffie = jiffies; | ||
308 | twdr->twcal_timer.expires = twdr->twcal_jiffie + | ||
309 | (slot << INET_TWDR_RECYCLE_TICK); | ||
310 | add_timer(&twdr->twcal_timer); | ||
311 | } else { | ||
312 | if (time_after(twdr->twcal_timer.expires, | ||
313 | jiffies + (slot << INET_TWDR_RECYCLE_TICK))) | ||
314 | mod_timer(&twdr->twcal_timer, | ||
315 | jiffies + (slot << INET_TWDR_RECYCLE_TICK)); | ||
316 | slot = (twdr->twcal_hand + slot) & (INET_TWDR_RECYCLE_SLOTS - 1); | ||
317 | } | ||
318 | list = &twdr->twcal_row[slot]; | ||
319 | } | ||
320 | |||
321 | hlist_add_head(&tw->tw_death_node, list); | ||
322 | |||
323 | if (twdr->tw_count++ == 0) | ||
324 | mod_timer(&twdr->tw_timer, jiffies + twdr->period); | ||
325 | spin_unlock(&twdr->death_lock); | ||
326 | } | ||
327 | |||
328 | EXPORT_SYMBOL_GPL(inet_twsk_schedule); | ||
329 | |||
330 | void inet_twdr_twcal_tick(unsigned long data) | ||
331 | { | ||
332 | struct inet_timewait_death_row *twdr; | ||
333 | int n, slot; | ||
334 | unsigned long j; | ||
335 | unsigned long now = jiffies; | ||
336 | int killed = 0; | ||
337 | int adv = 0; | ||
338 | |||
339 | twdr = (struct inet_timewait_death_row *)data; | ||
340 | |||
341 | spin_lock(&twdr->death_lock); | ||
342 | if (twdr->twcal_hand < 0) | ||
343 | goto out; | ||
344 | |||
345 | slot = twdr->twcal_hand; | ||
346 | j = twdr->twcal_jiffie; | ||
347 | |||
348 | for (n = 0; n < INET_TWDR_RECYCLE_SLOTS; n++) { | ||
349 | if (time_before_eq(j, now)) { | ||
350 | struct hlist_node *node, *safe; | ||
351 | struct inet_timewait_sock *tw; | ||
352 | |||
353 | inet_twsk_for_each_inmate_safe(tw, node, safe, | ||
354 | &twdr->twcal_row[slot]) { | ||
355 | __inet_twsk_del_dead_node(tw); | ||
356 | __inet_twsk_kill(tw, twdr->hashinfo); | ||
357 | inet_twsk_put(tw); | ||
358 | killed++; | ||
359 | } | ||
360 | } else { | ||
361 | if (!adv) { | ||
362 | adv = 1; | ||
363 | twdr->twcal_jiffie = j; | ||
364 | twdr->twcal_hand = slot; | ||
365 | } | ||
366 | |||
367 | if (!hlist_empty(&twdr->twcal_row[slot])) { | ||
368 | mod_timer(&twdr->twcal_timer, j); | ||
369 | goto out; | ||
370 | } | ||
371 | } | ||
372 | j += 1 << INET_TWDR_RECYCLE_TICK; | ||
373 | slot = (slot + 1) & (INET_TWDR_RECYCLE_SLOTS - 1); | ||
374 | } | ||
375 | twdr->twcal_hand = -1; | ||
376 | |||
377 | out: | ||
378 | if ((twdr->tw_count -= killed) == 0) | ||
379 | del_timer(&twdr->tw_timer); | ||
380 | NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITKILLED, killed); | ||
381 | spin_unlock(&twdr->death_lock); | ||
382 | } | ||
383 | |||
384 | EXPORT_SYMBOL_GPL(inet_twdr_twcal_tick); | ||
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index ab18a853d7ce..f84ba9c96551 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/kernel.h> | 20 | #include <linux/kernel.h> |
21 | #include <linux/mm.h> | 21 | #include <linux/mm.h> |
22 | #include <linux/net.h> | 22 | #include <linux/net.h> |
23 | #include <net/ip.h> | ||
23 | #include <net/inetpeer.h> | 24 | #include <net/inetpeer.h> |
24 | 25 | ||
25 | /* | 26 | /* |
@@ -72,7 +73,7 @@ | |||
72 | /* Exported for inet_getid inline function. */ | 73 | /* Exported for inet_getid inline function. */ |
73 | DEFINE_SPINLOCK(inet_peer_idlock); | 74 | DEFINE_SPINLOCK(inet_peer_idlock); |
74 | 75 | ||
75 | static kmem_cache_t *peer_cachep; | 76 | static kmem_cache_t *peer_cachep __read_mostly; |
76 | 77 | ||
77 | #define node_height(x) x->avl_height | 78 | #define node_height(x) x->avl_height |
78 | static struct inet_peer peer_fake_node = { | 79 | static struct inet_peer peer_fake_node = { |
@@ -459,5 +460,3 @@ static void peer_check_expire(unsigned long dummy) | |||
459 | peer_total / inet_peer_threshold * HZ; | 460 | peer_total / inet_peer_threshold * HZ; |
460 | add_timer(&peer_periodic_timer); | 461 | add_timer(&peer_periodic_timer); |
461 | } | 462 | } |
462 | |||
463 | EXPORT_SYMBOL(inet_peer_idlock); | ||
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 77094aac6c28..0923add122b4 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c | |||
@@ -76,16 +76,12 @@ int ip_forward(struct sk_buff *skb) | |||
76 | * that reaches zero, we must reply an ICMP control message telling | 76 | * that reaches zero, we must reply an ICMP control message telling |
77 | * that the packet's lifetime expired. | 77 | * that the packet's lifetime expired. |
78 | */ | 78 | */ |
79 | 79 | if (skb->nh.iph->ttl <= 1) | |
80 | iph = skb->nh.iph; | ||
81 | |||
82 | if (iph->ttl <= 1) | ||
83 | goto too_many_hops; | 80 | goto too_many_hops; |
84 | 81 | ||
85 | if (!xfrm4_route_forward(skb)) | 82 | if (!xfrm4_route_forward(skb)) |
86 | goto drop; | 83 | goto drop; |
87 | 84 | ||
88 | iph = skb->nh.iph; | ||
89 | rt = (struct rtable*)skb->dst; | 85 | rt = (struct rtable*)skb->dst; |
90 | 86 | ||
91 | if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway) | 87 | if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway) |
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index eb377ae15305..9e6e683cc34d 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c | |||
@@ -377,7 +377,7 @@ static struct ipq *ip_frag_create(unsigned hash, struct iphdr *iph, u32 user) | |||
377 | return ip_frag_intern(hash, qp); | 377 | return ip_frag_intern(hash, qp); |
378 | 378 | ||
379 | out_nomem: | 379 | out_nomem: |
380 | LIMIT_NETDEBUG(printk(KERN_ERR "ip_frag_create: no memory left !\n")); | 380 | LIMIT_NETDEBUG(KERN_ERR "ip_frag_create: no memory left !\n"); |
381 | return NULL; | 381 | return NULL; |
382 | } | 382 | } |
383 | 383 | ||
@@ -533,7 +533,7 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) | |||
533 | if (skb->dev) | 533 | if (skb->dev) |
534 | qp->iif = skb->dev->ifindex; | 534 | qp->iif = skb->dev->ifindex; |
535 | skb->dev = NULL; | 535 | skb->dev = NULL; |
536 | qp->stamp = skb->stamp; | 536 | skb_get_timestamp(skb, &qp->stamp); |
537 | qp->meat += skb->len; | 537 | qp->meat += skb->len; |
538 | atomic_add(skb->truesize, &ip_frag_mem); | 538 | atomic_add(skb->truesize, &ip_frag_mem); |
539 | if (offset == 0) | 539 | if (offset == 0) |
@@ -615,7 +615,7 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev) | |||
615 | 615 | ||
616 | head->next = NULL; | 616 | head->next = NULL; |
617 | head->dev = dev; | 617 | head->dev = dev; |
618 | head->stamp = qp->stamp; | 618 | skb_set_timestamp(head, &qp->stamp); |
619 | 619 | ||
620 | iph = head->nh.iph; | 620 | iph = head->nh.iph; |
621 | iph->frag_off = 0; | 621 | iph->frag_off = 0; |
@@ -625,8 +625,8 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev) | |||
625 | return head; | 625 | return head; |
626 | 626 | ||
627 | out_nomem: | 627 | out_nomem: |
628 | LIMIT_NETDEBUG(printk(KERN_ERR "IP: queue_glue: no memory for gluing " | 628 | LIMIT_NETDEBUG(KERN_ERR "IP: queue_glue: no memory for gluing " |
629 | "queue %p\n", qp)); | 629 | "queue %p\n", qp); |
630 | goto out_fail; | 630 | goto out_fail; |
631 | out_oversize: | 631 | out_oversize: |
632 | if (net_ratelimit()) | 632 | if (net_ratelimit()) |
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index c703528e0bcd..473d0f2b2e0d 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c | |||
@@ -150,7 +150,7 @@ | |||
150 | * SNMP management statistics | 150 | * SNMP management statistics |
151 | */ | 151 | */ |
152 | 152 | ||
153 | DEFINE_SNMP_STAT(struct ipstats_mib, ip_statistics); | 153 | DEFINE_SNMP_STAT(struct ipstats_mib, ip_statistics) __read_mostly; |
154 | 154 | ||
155 | /* | 155 | /* |
156 | * Process Router Attention IP option | 156 | * Process Router Attention IP option |
@@ -225,8 +225,8 @@ static inline int ip_local_deliver_finish(struct sk_buff *skb) | |||
225 | /* If there maybe a raw socket we must check - if not we | 225 | /* If there maybe a raw socket we must check - if not we |
226 | * don't care less | 226 | * don't care less |
227 | */ | 227 | */ |
228 | if (raw_sk) | 228 | if (raw_sk && !raw_v4_input(skb, skb->nh.iph, hash)) |
229 | raw_v4_input(skb, skb->nh.iph, hash); | 229 | raw_sk = NULL; |
230 | 230 | ||
231 | if ((ipprot = rcu_dereference(inet_protos[hash])) != NULL) { | 231 | if ((ipprot = rcu_dereference(inet_protos[hash])) != NULL) { |
232 | int ret; | 232 | int ret; |
@@ -279,18 +279,70 @@ int ip_local_deliver(struct sk_buff *skb) | |||
279 | ip_local_deliver_finish); | 279 | ip_local_deliver_finish); |
280 | } | 280 | } |
281 | 281 | ||
282 | static inline int ip_rcv_finish(struct sk_buff *skb) | 282 | static inline int ip_rcv_options(struct sk_buff *skb) |
283 | { | 283 | { |
284 | struct ip_options *opt; | ||
285 | struct iphdr *iph; | ||
284 | struct net_device *dev = skb->dev; | 286 | struct net_device *dev = skb->dev; |
287 | |||
288 | /* It looks as overkill, because not all | ||
289 | IP options require packet mangling. | ||
290 | But it is the easiest for now, especially taking | ||
291 | into account that combination of IP options | ||
292 | and running sniffer is extremely rare condition. | ||
293 | --ANK (980813) | ||
294 | */ | ||
295 | if (skb_cow(skb, skb_headroom(skb))) { | ||
296 | IP_INC_STATS_BH(IPSTATS_MIB_INDISCARDS); | ||
297 | goto drop; | ||
298 | } | ||
299 | |||
300 | iph = skb->nh.iph; | ||
301 | |||
302 | if (ip_options_compile(NULL, skb)) { | ||
303 | IP_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); | ||
304 | goto drop; | ||
305 | } | ||
306 | |||
307 | opt = &(IPCB(skb)->opt); | ||
308 | if (unlikely(opt->srr)) { | ||
309 | struct in_device *in_dev = in_dev_get(dev); | ||
310 | if (in_dev) { | ||
311 | if (!IN_DEV_SOURCE_ROUTE(in_dev)) { | ||
312 | if (IN_DEV_LOG_MARTIANS(in_dev) && | ||
313 | net_ratelimit()) | ||
314 | printk(KERN_INFO "source route option " | ||
315 | "%u.%u.%u.%u -> %u.%u.%u.%u\n", | ||
316 | NIPQUAD(iph->saddr), | ||
317 | NIPQUAD(iph->daddr)); | ||
318 | in_dev_put(in_dev); | ||
319 | goto drop; | ||
320 | } | ||
321 | |||
322 | in_dev_put(in_dev); | ||
323 | } | ||
324 | |||
325 | if (ip_options_rcv_srr(skb)) | ||
326 | goto drop; | ||
327 | } | ||
328 | |||
329 | return 0; | ||
330 | drop: | ||
331 | return -1; | ||
332 | } | ||
333 | |||
334 | static inline int ip_rcv_finish(struct sk_buff *skb) | ||
335 | { | ||
285 | struct iphdr *iph = skb->nh.iph; | 336 | struct iphdr *iph = skb->nh.iph; |
286 | int err; | ||
287 | 337 | ||
288 | /* | 338 | /* |
289 | * Initialise the virtual path cache for the packet. It describes | 339 | * Initialise the virtual path cache for the packet. It describes |
290 | * how the packet travels inside Linux networking. | 340 | * how the packet travels inside Linux networking. |
291 | */ | 341 | */ |
292 | if (skb->dst == NULL) { | 342 | if (likely(skb->dst == NULL)) { |
293 | if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) { | 343 | int err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, |
344 | skb->dev); | ||
345 | if (unlikely(err)) { | ||
294 | if (err == -EHOSTUNREACH) | 346 | if (err == -EHOSTUNREACH) |
295 | IP_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS); | 347 | IP_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS); |
296 | goto drop; | 348 | goto drop; |
@@ -298,7 +350,7 @@ static inline int ip_rcv_finish(struct sk_buff *skb) | |||
298 | } | 350 | } |
299 | 351 | ||
300 | #ifdef CONFIG_NET_CLS_ROUTE | 352 | #ifdef CONFIG_NET_CLS_ROUTE |
301 | if (skb->dst->tclassid) { | 353 | if (unlikely(skb->dst->tclassid)) { |
302 | struct ip_rt_acct *st = ip_rt_acct + 256*smp_processor_id(); | 354 | struct ip_rt_acct *st = ip_rt_acct + 256*smp_processor_id(); |
303 | u32 idx = skb->dst->tclassid; | 355 | u32 idx = skb->dst->tclassid; |
304 | st[idx&0xFF].o_packets++; | 356 | st[idx&0xFF].o_packets++; |
@@ -308,48 +360,11 @@ static inline int ip_rcv_finish(struct sk_buff *skb) | |||
308 | } | 360 | } |
309 | #endif | 361 | #endif |
310 | 362 | ||
311 | if (iph->ihl > 5) { | 363 | if (iph->ihl > 5 && ip_rcv_options(skb)) |
312 | struct ip_options *opt; | 364 | goto drop; |
313 | |||
314 | /* It looks as overkill, because not all | ||
315 | IP options require packet mangling. | ||
316 | But it is the easiest for now, especially taking | ||
317 | into account that combination of IP options | ||
318 | and running sniffer is extremely rare condition. | ||
319 | --ANK (980813) | ||
320 | */ | ||
321 | |||
322 | if (skb_cow(skb, skb_headroom(skb))) { | ||
323 | IP_INC_STATS_BH(IPSTATS_MIB_INDISCARDS); | ||
324 | goto drop; | ||
325 | } | ||
326 | iph = skb->nh.iph; | ||
327 | |||
328 | if (ip_options_compile(NULL, skb)) | ||
329 | goto inhdr_error; | ||
330 | |||
331 | opt = &(IPCB(skb)->opt); | ||
332 | if (opt->srr) { | ||
333 | struct in_device *in_dev = in_dev_get(dev); | ||
334 | if (in_dev) { | ||
335 | if (!IN_DEV_SOURCE_ROUTE(in_dev)) { | ||
336 | if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) | ||
337 | printk(KERN_INFO "source route option %u.%u.%u.%u -> %u.%u.%u.%u\n", | ||
338 | NIPQUAD(iph->saddr), NIPQUAD(iph->daddr)); | ||
339 | in_dev_put(in_dev); | ||
340 | goto drop; | ||
341 | } | ||
342 | in_dev_put(in_dev); | ||
343 | } | ||
344 | if (ip_options_rcv_srr(skb)) | ||
345 | goto drop; | ||
346 | } | ||
347 | } | ||
348 | 365 | ||
349 | return dst_input(skb); | 366 | return dst_input(skb); |
350 | 367 | ||
351 | inhdr_error: | ||
352 | IP_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); | ||
353 | drop: | 368 | drop: |
354 | kfree_skb(skb); | 369 | kfree_skb(skb); |
355 | return NET_RX_DROP; | 370 | return NET_RX_DROP; |
@@ -358,9 +373,10 @@ drop: | |||
358 | /* | 373 | /* |
359 | * Main IP Receive routine. | 374 | * Main IP Receive routine. |
360 | */ | 375 | */ |
361 | int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) | 376 | int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) |
362 | { | 377 | { |
363 | struct iphdr *iph; | 378 | struct iphdr *iph; |
379 | u32 len; | ||
364 | 380 | ||
365 | /* When the interface is in promisc. mode, drop all the crap | 381 | /* When the interface is in promisc. mode, drop all the crap |
366 | * that it receives, do not try to analyse it. | 382 | * that it receives, do not try to analyse it. |
@@ -392,29 +408,27 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) | |||
392 | */ | 408 | */ |
393 | 409 | ||
394 | if (iph->ihl < 5 || iph->version != 4) | 410 | if (iph->ihl < 5 || iph->version != 4) |
395 | goto inhdr_error; | 411 | goto inhdr_error; |
396 | 412 | ||
397 | if (!pskb_may_pull(skb, iph->ihl*4)) | 413 | if (!pskb_may_pull(skb, iph->ihl*4)) |
398 | goto inhdr_error; | 414 | goto inhdr_error; |
399 | 415 | ||
400 | iph = skb->nh.iph; | 416 | iph = skb->nh.iph; |
401 | 417 | ||
402 | if (ip_fast_csum((u8 *)iph, iph->ihl) != 0) | 418 | if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) |
403 | goto inhdr_error; | 419 | goto inhdr_error; |
404 | 420 | ||
405 | { | 421 | len = ntohs(iph->tot_len); |
406 | __u32 len = ntohs(iph->tot_len); | 422 | if (skb->len < len || len < (iph->ihl*4)) |
407 | if (skb->len < len || len < (iph->ihl<<2)) | 423 | goto inhdr_error; |
408 | goto inhdr_error; | ||
409 | 424 | ||
410 | /* Our transport medium may have padded the buffer out. Now we know it | 425 | /* Our transport medium may have padded the buffer out. Now we know it |
411 | * is IP we can trim to the true length of the frame. | 426 | * is IP we can trim to the true length of the frame. |
412 | * Note this now means skb->len holds ntohs(iph->tot_len). | 427 | * Note this now means skb->len holds ntohs(iph->tot_len). |
413 | */ | 428 | */ |
414 | if (pskb_trim_rcsum(skb, len)) { | 429 | if (pskb_trim_rcsum(skb, len)) { |
415 | IP_INC_STATS_BH(IPSTATS_MIB_INDISCARDS); | 430 | IP_INC_STATS_BH(IPSTATS_MIB_INDISCARDS); |
416 | goto drop; | 431 | goto drop; |
417 | } | ||
418 | } | 432 | } |
419 | 433 | ||
420 | return NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, dev, NULL, | 434 | return NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, dev, NULL, |
@@ -428,5 +442,4 @@ out: | |||
428 | return NET_RX_DROP; | 442 | return NET_RX_DROP; |
429 | } | 443 | } |
430 | 444 | ||
431 | EXPORT_SYMBOL(ip_rcv); | ||
432 | EXPORT_SYMBOL(ip_statistics); | 445 | EXPORT_SYMBOL(ip_statistics); |
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 6d89f3f3e701..bce4e875193b 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c | |||
@@ -489,23 +489,18 @@ void ip_options_undo(struct ip_options * opt) | |||
489 | } | 489 | } |
490 | } | 490 | } |
491 | 491 | ||
492 | int ip_options_get(struct ip_options **optp, unsigned char *data, int optlen, int user) | 492 | static struct ip_options *ip_options_get_alloc(const int optlen) |
493 | { | 493 | { |
494 | struct ip_options *opt; | 494 | struct ip_options *opt = kmalloc(sizeof(*opt) + ((optlen + 3) & ~3), |
495 | GFP_KERNEL); | ||
496 | if (opt) | ||
497 | memset(opt, 0, sizeof(*opt)); | ||
498 | return opt; | ||
499 | } | ||
495 | 500 | ||
496 | opt = kmalloc(sizeof(struct ip_options)+((optlen+3)&~3), GFP_KERNEL); | 501 | static int ip_options_get_finish(struct ip_options **optp, |
497 | if (!opt) | 502 | struct ip_options *opt, int optlen) |
498 | return -ENOMEM; | 503 | { |
499 | memset(opt, 0, sizeof(struct ip_options)); | ||
500 | if (optlen) { | ||
501 | if (user) { | ||
502 | if (copy_from_user(opt->__data, data, optlen)) { | ||
503 | kfree(opt); | ||
504 | return -EFAULT; | ||
505 | } | ||
506 | } else | ||
507 | memcpy(opt->__data, data, optlen); | ||
508 | } | ||
509 | while (optlen & 3) | 504 | while (optlen & 3) |
510 | opt->__data[optlen++] = IPOPT_END; | 505 | opt->__data[optlen++] = IPOPT_END; |
511 | opt->optlen = optlen; | 506 | opt->optlen = optlen; |
@@ -521,6 +516,30 @@ int ip_options_get(struct ip_options **optp, unsigned char *data, int optlen, in | |||
521 | return 0; | 516 | return 0; |
522 | } | 517 | } |
523 | 518 | ||
519 | int ip_options_get_from_user(struct ip_options **optp, unsigned char __user *data, int optlen) | ||
520 | { | ||
521 | struct ip_options *opt = ip_options_get_alloc(optlen); | ||
522 | |||
523 | if (!opt) | ||
524 | return -ENOMEM; | ||
525 | if (optlen && copy_from_user(opt->__data, data, optlen)) { | ||
526 | kfree(opt); | ||
527 | return -EFAULT; | ||
528 | } | ||
529 | return ip_options_get_finish(optp, opt, optlen); | ||
530 | } | ||
531 | |||
532 | int ip_options_get(struct ip_options **optp, unsigned char *data, int optlen) | ||
533 | { | ||
534 | struct ip_options *opt = ip_options_get_alloc(optlen); | ||
535 | |||
536 | if (!opt) | ||
537 | return -ENOMEM; | ||
538 | if (optlen) | ||
539 | memcpy(opt->__data, data, optlen); | ||
540 | return ip_options_get_finish(optp, opt, optlen); | ||
541 | } | ||
542 | |||
524 | void ip_forward_options(struct sk_buff *skb) | 543 | void ip_forward_options(struct sk_buff *skb) |
525 | { | 544 | { |
526 | struct ip_options * opt = &(IPCB(skb)->opt); | 545 | struct ip_options * opt = &(IPCB(skb)->opt); |
@@ -620,6 +639,3 @@ int ip_options_rcv_srr(struct sk_buff *skb) | |||
620 | } | 639 | } |
621 | return 0; | 640 | return 0; |
622 | } | 641 | } |
623 | |||
624 | EXPORT_SYMBOL(ip_options_compile); | ||
625 | EXPORT_SYMBOL(ip_options_undo); | ||
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 80d13103b2b0..3f1a263e1249 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c | |||
@@ -69,13 +69,10 @@ | |||
69 | #include <net/ip.h> | 69 | #include <net/ip.h> |
70 | #include <net/protocol.h> | 70 | #include <net/protocol.h> |
71 | #include <net/route.h> | 71 | #include <net/route.h> |
72 | #include <net/tcp.h> | ||
73 | #include <net/udp.h> | ||
74 | #include <linux/skbuff.h> | 72 | #include <linux/skbuff.h> |
75 | #include <net/sock.h> | 73 | #include <net/sock.h> |
76 | #include <net/arp.h> | 74 | #include <net/arp.h> |
77 | #include <net/icmp.h> | 75 | #include <net/icmp.h> |
78 | #include <net/raw.h> | ||
79 | #include <net/checksum.h> | 76 | #include <net/checksum.h> |
80 | #include <net/inetpeer.h> | 77 | #include <net/inetpeer.h> |
81 | #include <net/checksum.h> | 78 | #include <net/checksum.h> |
@@ -84,12 +81,8 @@ | |||
84 | #include <linux/netfilter_bridge.h> | 81 | #include <linux/netfilter_bridge.h> |
85 | #include <linux/mroute.h> | 82 | #include <linux/mroute.h> |
86 | #include <linux/netlink.h> | 83 | #include <linux/netlink.h> |
84 | #include <linux/tcp.h> | ||
87 | 85 | ||
88 | /* | ||
89 | * Shall we try to damage output packets if routing dev changes? | ||
90 | */ | ||
91 | |||
92 | int sysctl_ip_dynaddr; | ||
93 | int sysctl_ip_default_ttl = IPDEFTTL; | 86 | int sysctl_ip_default_ttl = IPDEFTTL; |
94 | 87 | ||
95 | /* Generate a checksum for an outgoing IP datagram. */ | 88 | /* Generate a checksum for an outgoing IP datagram. */ |
@@ -165,6 +158,8 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, | |||
165 | dst_output); | 158 | dst_output); |
166 | } | 159 | } |
167 | 160 | ||
161 | EXPORT_SYMBOL_GPL(ip_build_and_send_pkt); | ||
162 | |||
168 | static inline int ip_finish_output2(struct sk_buff *skb) | 163 | static inline int ip_finish_output2(struct sk_buff *skb) |
169 | { | 164 | { |
170 | struct dst_entry *dst = skb->dst; | 165 | struct dst_entry *dst = skb->dst; |
@@ -205,7 +200,7 @@ static inline int ip_finish_output2(struct sk_buff *skb) | |||
205 | return -EINVAL; | 200 | return -EINVAL; |
206 | } | 201 | } |
207 | 202 | ||
208 | int ip_finish_output(struct sk_buff *skb) | 203 | static inline int ip_finish_output(struct sk_buff *skb) |
209 | { | 204 | { |
210 | struct net_device *dev = skb->dst->dev; | 205 | struct net_device *dev = skb->dst->dev; |
211 | 206 | ||
@@ -329,8 +324,7 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok) | |||
329 | if (ip_route_output_flow(&rt, &fl, sk, 0)) | 324 | if (ip_route_output_flow(&rt, &fl, sk, 0)) |
330 | goto no_route; | 325 | goto no_route; |
331 | } | 326 | } |
332 | __sk_dst_set(sk, &rt->u.dst); | 327 | sk_setup_caps(sk, &rt->u.dst); |
333 | tcp_v4_setup_caps(sk, &rt->u.dst); | ||
334 | } | 328 | } |
335 | skb->dst = dst_clone(&rt->u.dst); | 329 | skb->dst = dst_clone(&rt->u.dst); |
336 | 330 | ||
@@ -392,7 +386,6 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) | |||
392 | #endif | 386 | #endif |
393 | #ifdef CONFIG_NETFILTER | 387 | #ifdef CONFIG_NETFILTER |
394 | to->nfmark = from->nfmark; | 388 | to->nfmark = from->nfmark; |
395 | to->nfcache = from->nfcache; | ||
396 | /* Connection association is same as pre-frag packet */ | 389 | /* Connection association is same as pre-frag packet */ |
397 | nf_conntrack_put(to->nfct); | 390 | nf_conntrack_put(to->nfct); |
398 | to->nfct = from->nfct; | 391 | to->nfct = from->nfct; |
@@ -580,7 +573,7 @@ slow_path: | |||
580 | */ | 573 | */ |
581 | 574 | ||
582 | if ((skb2 = alloc_skb(len+hlen+ll_rs, GFP_ATOMIC)) == NULL) { | 575 | if ((skb2 = alloc_skb(len+hlen+ll_rs, GFP_ATOMIC)) == NULL) { |
583 | NETDEBUG(printk(KERN_INFO "IP: frag: no memory for new fragment!\n")); | 576 | NETDEBUG(KERN_INFO "IP: frag: no memory for new fragment!\n"); |
584 | err = -ENOMEM; | 577 | err = -ENOMEM; |
585 | goto fail; | 578 | goto fail; |
586 | } | 579 | } |
@@ -1329,12 +1322,7 @@ void __init ip_init(void) | |||
1329 | #endif | 1322 | #endif |
1330 | } | 1323 | } |
1331 | 1324 | ||
1332 | EXPORT_SYMBOL(ip_finish_output); | ||
1333 | EXPORT_SYMBOL(ip_fragment); | 1325 | EXPORT_SYMBOL(ip_fragment); |
1334 | EXPORT_SYMBOL(ip_generic_getfrag); | 1326 | EXPORT_SYMBOL(ip_generic_getfrag); |
1335 | EXPORT_SYMBOL(ip_queue_xmit); | 1327 | EXPORT_SYMBOL(ip_queue_xmit); |
1336 | EXPORT_SYMBOL(ip_send_check); | 1328 | EXPORT_SYMBOL(ip_send_check); |
1337 | |||
1338 | #ifdef CONFIG_SYSCTL | ||
1339 | EXPORT_SYMBOL(sysctl_ip_default_ttl); | ||
1340 | #endif | ||
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index ff4bd067b397..2f0b47da5b37 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c | |||
@@ -153,7 +153,7 @@ int ip_cmsg_send(struct msghdr *msg, struct ipcm_cookie *ipc) | |||
153 | switch (cmsg->cmsg_type) { | 153 | switch (cmsg->cmsg_type) { |
154 | case IP_RETOPTS: | 154 | case IP_RETOPTS: |
155 | err = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr)); | 155 | err = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr)); |
156 | err = ip_options_get(&ipc->opt, CMSG_DATA(cmsg), err < 40 ? err : 40, 0); | 156 | err = ip_options_get(&ipc->opt, CMSG_DATA(cmsg), err < 40 ? err : 40); |
157 | if (err) | 157 | if (err) |
158 | return err; | 158 | return err; |
159 | break; | 159 | break; |
@@ -425,7 +425,7 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval, | |||
425 | struct ip_options * opt = NULL; | 425 | struct ip_options * opt = NULL; |
426 | if (optlen > 40 || optlen < 0) | 426 | if (optlen > 40 || optlen < 0) |
427 | goto e_inval; | 427 | goto e_inval; |
428 | err = ip_options_get(&opt, optval, optlen, 1); | 428 | err = ip_options_get_from_user(&opt, optval, optlen); |
429 | if (err) | 429 | if (err) |
430 | break; | 430 | break; |
431 | if (sk->sk_type == SOCK_STREAM) { | 431 | if (sk->sk_type == SOCK_STREAM) { |
@@ -614,7 +614,6 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval, | |||
614 | } | 614 | } |
615 | case IP_MSFILTER: | 615 | case IP_MSFILTER: |
616 | { | 616 | { |
617 | extern int sysctl_optmem_max; | ||
618 | extern int sysctl_igmp_max_msf; | 617 | extern int sysctl_igmp_max_msf; |
619 | struct ip_msfilter *msf; | 618 | struct ip_msfilter *msf; |
620 | 619 | ||
@@ -769,7 +768,6 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval, | |||
769 | } | 768 | } |
770 | case MCAST_MSFILTER: | 769 | case MCAST_MSFILTER: |
771 | { | 770 | { |
772 | extern int sysctl_optmem_max; | ||
773 | extern int sysctl_igmp_max_msf; | 771 | extern int sysctl_igmp_max_msf; |
774 | struct sockaddr_in *psin; | 772 | struct sockaddr_in *psin; |
775 | struct ip_msfilter *msf = NULL; | 773 | struct ip_msfilter *msf = NULL; |
@@ -1090,7 +1088,5 @@ int ip_getsockopt(struct sock *sk, int level, int optname, char __user *optval, | |||
1090 | 1088 | ||
1091 | EXPORT_SYMBOL(ip_cmsg_recv); | 1089 | EXPORT_SYMBOL(ip_cmsg_recv); |
1092 | 1090 | ||
1093 | #ifdef CONFIG_IP_SCTP_MODULE | ||
1094 | EXPORT_SYMBOL(ip_getsockopt); | 1091 | EXPORT_SYMBOL(ip_getsockopt); |
1095 | EXPORT_SYMBOL(ip_setsockopt); | 1092 | EXPORT_SYMBOL(ip_setsockopt); |
1096 | #endif | ||
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index 7ded6e60f43a..dcb7ee6c4858 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c | |||
@@ -214,8 +214,8 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info) | |||
214 | spi, IPPROTO_COMP, AF_INET); | 214 | spi, IPPROTO_COMP, AF_INET); |
215 | if (!x) | 215 | if (!x) |
216 | return; | 216 | return; |
217 | NETDEBUG(printk(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/%u.%u.%u.%u\n", | 217 | NETDEBUG(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/%u.%u.%u.%u\n", |
218 | spi, NIPQUAD(iph->daddr))); | 218 | spi, NIPQUAD(iph->daddr)); |
219 | xfrm_state_put(x); | 219 | xfrm_state_put(x); |
220 | } | 220 | } |
221 | 221 | ||
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index d2bf8e1930a3..63e106605f28 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c | |||
@@ -393,7 +393,7 @@ static int __init ic_defaults(void) | |||
393 | 393 | ||
394 | #ifdef IPCONFIG_RARP | 394 | #ifdef IPCONFIG_RARP |
395 | 395 | ||
396 | static int ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt); | 396 | static int ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev); |
397 | 397 | ||
398 | static struct packet_type rarp_packet_type __initdata = { | 398 | static struct packet_type rarp_packet_type __initdata = { |
399 | .type = __constant_htons(ETH_P_RARP), | 399 | .type = __constant_htons(ETH_P_RARP), |
@@ -414,7 +414,7 @@ static inline void ic_rarp_cleanup(void) | |||
414 | * Process received RARP packet. | 414 | * Process received RARP packet. |
415 | */ | 415 | */ |
416 | static int __init | 416 | static int __init |
417 | ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) | 417 | ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) |
418 | { | 418 | { |
419 | struct arphdr *rarp; | 419 | struct arphdr *rarp; |
420 | unsigned char *rarp_ptr; | 420 | unsigned char *rarp_ptr; |
@@ -555,7 +555,7 @@ struct bootp_pkt { /* BOOTP packet format */ | |||
555 | #define DHCPRELEASE 7 | 555 | #define DHCPRELEASE 7 |
556 | #define DHCPINFORM 8 | 556 | #define DHCPINFORM 8 |
557 | 557 | ||
558 | static int ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt); | 558 | static int ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev); |
559 | 559 | ||
560 | static struct packet_type bootp_packet_type __initdata = { | 560 | static struct packet_type bootp_packet_type __initdata = { |
561 | .type = __constant_htons(ETH_P_IP), | 561 | .type = __constant_htons(ETH_P_IP), |
@@ -823,7 +823,7 @@ static void __init ic_do_bootp_ext(u8 *ext) | |||
823 | /* | 823 | /* |
824 | * Receive BOOTP reply. | 824 | * Receive BOOTP reply. |
825 | */ | 825 | */ |
826 | static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) | 826 | static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) |
827 | { | 827 | { |
828 | struct bootp_pkt *b; | 828 | struct bootp_pkt *b; |
829 | struct iphdr *h; | 829 | struct iphdr *h; |
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index dc806b578427..9dbf5909f3a6 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c | |||
@@ -103,7 +103,7 @@ static DEFINE_SPINLOCK(mfc_unres_lock); | |||
103 | In this case data path is free of exclusive locks at all. | 103 | In this case data path is free of exclusive locks at all. |
104 | */ | 104 | */ |
105 | 105 | ||
106 | static kmem_cache_t *mrt_cachep; | 106 | static kmem_cache_t *mrt_cachep __read_mostly; |
107 | 107 | ||
108 | static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local); | 108 | static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local); |
109 | static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert); | 109 | static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert); |
diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c index d9212addd193..6e092dadb388 100644 --- a/net/ipv4/ipvs/ip_vs_app.c +++ b/net/ipv4/ipvs/ip_vs_app.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <linux/in.h> | 26 | #include <linux/in.h> |
27 | #include <linux/ip.h> | 27 | #include <linux/ip.h> |
28 | #include <net/protocol.h> | 28 | #include <net/protocol.h> |
29 | #include <net/tcp.h> | ||
29 | #include <asm/system.h> | 30 | #include <asm/system.h> |
30 | #include <linux/stat.h> | 31 | #include <linux/stat.h> |
31 | #include <linux/proc_fs.h> | 32 | #include <linux/proc_fs.h> |
diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c index d0145a8b1551..e11952ea17af 100644 --- a/net/ipv4/ipvs/ip_vs_conn.c +++ b/net/ipv4/ipvs/ip_vs_conn.c | |||
@@ -40,7 +40,7 @@ | |||
40 | static struct list_head *ip_vs_conn_tab; | 40 | static struct list_head *ip_vs_conn_tab; |
41 | 41 | ||
42 | /* SLAB cache for IPVS connections */ | 42 | /* SLAB cache for IPVS connections */ |
43 | static kmem_cache_t *ip_vs_conn_cachep; | 43 | static kmem_cache_t *ip_vs_conn_cachep __read_mostly; |
44 | 44 | ||
45 | /* counter for current IPVS connections */ | 45 | /* counter for current IPVS connections */ |
46 | static atomic_t ip_vs_conn_count = ATOMIC_INIT(0); | 46 | static atomic_t ip_vs_conn_count = ATOMIC_INIT(0); |
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c index 5fb257dd07cb..3ac7eeca04ac 100644 --- a/net/ipv4/ipvs/ip_vs_core.c +++ b/net/ipv4/ipvs/ip_vs_core.c | |||
@@ -22,6 +22,7 @@ | |||
22 | * | 22 | * |
23 | * Changes: | 23 | * Changes: |
24 | * Paul `Rusty' Russell properly handle non-linear skbs | 24 | * Paul `Rusty' Russell properly handle non-linear skbs |
25 | * Harald Welte don't use nfcache | ||
25 | * | 26 | * |
26 | */ | 27 | */ |
27 | 28 | ||
@@ -529,7 +530,7 @@ static unsigned int ip_vs_post_routing(unsigned int hooknum, | |||
529 | const struct net_device *out, | 530 | const struct net_device *out, |
530 | int (*okfn)(struct sk_buff *)) | 531 | int (*okfn)(struct sk_buff *)) |
531 | { | 532 | { |
532 | if (!((*pskb)->nfcache & NFC_IPVS_PROPERTY)) | 533 | if (!((*pskb)->ipvs_property)) |
533 | return NF_ACCEPT; | 534 | return NF_ACCEPT; |
534 | 535 | ||
535 | /* The packet was sent from IPVS, exit this chain */ | 536 | /* The packet was sent from IPVS, exit this chain */ |
@@ -701,7 +702,7 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related) | |||
701 | /* do the statistics and put it back */ | 702 | /* do the statistics and put it back */ |
702 | ip_vs_out_stats(cp, skb); | 703 | ip_vs_out_stats(cp, skb); |
703 | 704 | ||
704 | skb->nfcache |= NFC_IPVS_PROPERTY; | 705 | skb->ipvs_property = 1; |
705 | verdict = NF_ACCEPT; | 706 | verdict = NF_ACCEPT; |
706 | 707 | ||
707 | out: | 708 | out: |
@@ -739,7 +740,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb, | |||
739 | 740 | ||
740 | EnterFunction(11); | 741 | EnterFunction(11); |
741 | 742 | ||
742 | if (skb->nfcache & NFC_IPVS_PROPERTY) | 743 | if (skb->ipvs_property) |
743 | return NF_ACCEPT; | 744 | return NF_ACCEPT; |
744 | 745 | ||
745 | iph = skb->nh.iph; | 746 | iph = skb->nh.iph; |
@@ -821,7 +822,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb, | |||
821 | ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp); | 822 | ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp); |
822 | ip_vs_conn_put(cp); | 823 | ip_vs_conn_put(cp); |
823 | 824 | ||
824 | skb->nfcache |= NFC_IPVS_PROPERTY; | 825 | skb->ipvs_property = 1; |
825 | 826 | ||
826 | LeaveFunction(11); | 827 | LeaveFunction(11); |
827 | return NF_ACCEPT; | 828 | return NF_ACCEPT; |
diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c index 7d99ede2ef79..2d66848e7aa0 100644 --- a/net/ipv4/ipvs/ip_vs_ctl.c +++ b/net/ipv4/ipvs/ip_vs_ctl.c | |||
@@ -1598,7 +1598,7 @@ static ctl_table vs_table[] = { | |||
1598 | { .ctl_name = 0 } | 1598 | { .ctl_name = 0 } |
1599 | }; | 1599 | }; |
1600 | 1600 | ||
1601 | static ctl_table ipv4_table[] = { | 1601 | static ctl_table ipvs_ipv4_table[] = { |
1602 | { | 1602 | { |
1603 | .ctl_name = NET_IPV4, | 1603 | .ctl_name = NET_IPV4, |
1604 | .procname = "ipv4", | 1604 | .procname = "ipv4", |
@@ -1613,7 +1613,7 @@ static ctl_table vs_root_table[] = { | |||
1613 | .ctl_name = CTL_NET, | 1613 | .ctl_name = CTL_NET, |
1614 | .procname = "net", | 1614 | .procname = "net", |
1615 | .mode = 0555, | 1615 | .mode = 0555, |
1616 | .child = ipv4_table, | 1616 | .child = ipvs_ipv4_table, |
1617 | }, | 1617 | }, |
1618 | { .ctl_name = 0 } | 1618 | { .ctl_name = 0 } |
1619 | }; | 1619 | }; |
diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/ipv4/ipvs/ip_vs_lblc.c index c035838b780a..561cda326fa8 100644 --- a/net/ipv4/ipvs/ip_vs_lblc.c +++ b/net/ipv4/ipvs/ip_vs_lblc.c | |||
@@ -131,7 +131,7 @@ static ctl_table vs_table[] = { | |||
131 | { .ctl_name = 0 } | 131 | { .ctl_name = 0 } |
132 | }; | 132 | }; |
133 | 133 | ||
134 | static ctl_table ipv4_table[] = { | 134 | static ctl_table ipvs_ipv4_table[] = { |
135 | { | 135 | { |
136 | .ctl_name = NET_IPV4, | 136 | .ctl_name = NET_IPV4, |
137 | .procname = "ipv4", | 137 | .procname = "ipv4", |
@@ -146,7 +146,7 @@ static ctl_table lblc_root_table[] = { | |||
146 | .ctl_name = CTL_NET, | 146 | .ctl_name = CTL_NET, |
147 | .procname = "net", | 147 | .procname = "net", |
148 | .mode = 0555, | 148 | .mode = 0555, |
149 | .child = ipv4_table | 149 | .child = ipvs_ipv4_table |
150 | }, | 150 | }, |
151 | { .ctl_name = 0 } | 151 | { .ctl_name = 0 } |
152 | }; | 152 | }; |
diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c index 22b5dd55d271..ce456dbf09a5 100644 --- a/net/ipv4/ipvs/ip_vs_lblcr.c +++ b/net/ipv4/ipvs/ip_vs_lblcr.c | |||
@@ -320,7 +320,7 @@ static ctl_table vs_table[] = { | |||
320 | { .ctl_name = 0 } | 320 | { .ctl_name = 0 } |
321 | }; | 321 | }; |
322 | 322 | ||
323 | static ctl_table ipv4_table[] = { | 323 | static ctl_table ipvs_ipv4_table[] = { |
324 | { | 324 | { |
325 | .ctl_name = NET_IPV4, | 325 | .ctl_name = NET_IPV4, |
326 | .procname = "ipv4", | 326 | .procname = "ipv4", |
@@ -335,7 +335,7 @@ static ctl_table lblcr_root_table[] = { | |||
335 | .ctl_name = CTL_NET, | 335 | .ctl_name = CTL_NET, |
336 | .procname = "net", | 336 | .procname = "net", |
337 | .mode = 0555, | 337 | .mode = 0555, |
338 | .child = ipv4_table | 338 | .child = ipvs_ipv4_table |
339 | }, | 339 | }, |
340 | { .ctl_name = 0 } | 340 | { .ctl_name = 0 } |
341 | }; | 341 | }; |
diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c index e65de675da74..c19408973c09 100644 --- a/net/ipv4/ipvs/ip_vs_proto_tcp.c +++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c | |||
@@ -604,14 +604,14 @@ void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp) | |||
604 | } | 604 | } |
605 | 605 | ||
606 | 606 | ||
607 | static void tcp_init(struct ip_vs_protocol *pp) | 607 | static void ip_vs_tcp_init(struct ip_vs_protocol *pp) |
608 | { | 608 | { |
609 | IP_VS_INIT_HASH_TABLE(tcp_apps); | 609 | IP_VS_INIT_HASH_TABLE(tcp_apps); |
610 | pp->timeout_table = tcp_timeouts; | 610 | pp->timeout_table = tcp_timeouts; |
611 | } | 611 | } |
612 | 612 | ||
613 | 613 | ||
614 | static void tcp_exit(struct ip_vs_protocol *pp) | 614 | static void ip_vs_tcp_exit(struct ip_vs_protocol *pp) |
615 | { | 615 | { |
616 | } | 616 | } |
617 | 617 | ||
@@ -621,8 +621,8 @@ struct ip_vs_protocol ip_vs_protocol_tcp = { | |||
621 | .protocol = IPPROTO_TCP, | 621 | .protocol = IPPROTO_TCP, |
622 | .dont_defrag = 0, | 622 | .dont_defrag = 0, |
623 | .appcnt = ATOMIC_INIT(0), | 623 | .appcnt = ATOMIC_INIT(0), |
624 | .init = tcp_init, | 624 | .init = ip_vs_tcp_init, |
625 | .exit = tcp_exit, | 625 | .exit = ip_vs_tcp_exit, |
626 | .register_app = tcp_register_app, | 626 | .register_app = tcp_register_app, |
627 | .unregister_app = tcp_unregister_app, | 627 | .unregister_app = tcp_unregister_app, |
628 | .conn_schedule = tcp_conn_schedule, | 628 | .conn_schedule = tcp_conn_schedule, |
diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c index a8512a3fd08a..3b87482049cf 100644 --- a/net/ipv4/ipvs/ip_vs_xmit.c +++ b/net/ipv4/ipvs/ip_vs_xmit.c | |||
@@ -127,7 +127,7 @@ ip_vs_dst_reset(struct ip_vs_dest *dest) | |||
127 | 127 | ||
128 | #define IP_VS_XMIT(skb, rt) \ | 128 | #define IP_VS_XMIT(skb, rt) \ |
129 | do { \ | 129 | do { \ |
130 | (skb)->nfcache |= NFC_IPVS_PROPERTY; \ | 130 | (skb)->ipvs_property = 1; \ |
131 | (skb)->ip_summed = CHECKSUM_NONE; \ | 131 | (skb)->ip_summed = CHECKSUM_NONE; \ |
132 | NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, (skb), NULL, \ | 132 | NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, (skb), NULL, \ |
133 | (rt)->u.dst.dev, dst_output); \ | 133 | (rt)->u.dst.dev, dst_output); \ |
diff --git a/net/ipv4/multipath_drr.c b/net/ipv4/multipath_drr.c index c9cf8726051d..db67373f9b34 100644 --- a/net/ipv4/multipath_drr.c +++ b/net/ipv4/multipath_drr.c | |||
@@ -107,7 +107,7 @@ static int drr_dev_event(struct notifier_block *this, | |||
107 | return NOTIFY_DONE; | 107 | return NOTIFY_DONE; |
108 | } | 108 | } |
109 | 109 | ||
110 | struct notifier_block drr_dev_notifier = { | 110 | static struct notifier_block drr_dev_notifier = { |
111 | .notifier_call = drr_dev_event, | 111 | .notifier_call = drr_dev_event, |
112 | }; | 112 | }; |
113 | 113 | ||
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c new file mode 100644 index 000000000000..ae0779d82c5d --- /dev/null +++ b/net/ipv4/netfilter.c | |||
@@ -0,0 +1,139 @@ | |||
1 | /* IPv4 specific functions of netfilter core */ | ||
2 | |||
3 | #include <linux/config.h> | ||
4 | #ifdef CONFIG_NETFILTER | ||
5 | |||
6 | #include <linux/kernel.h> | ||
7 | #include <linux/netfilter.h> | ||
8 | #include <linux/netfilter_ipv4.h> | ||
9 | |||
10 | #include <linux/tcp.h> | ||
11 | #include <linux/udp.h> | ||
12 | #include <linux/icmp.h> | ||
13 | #include <net/route.h> | ||
14 | #include <linux/ip.h> | ||
15 | |||
16 | /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ | ||
17 | int ip_route_me_harder(struct sk_buff **pskb) | ||
18 | { | ||
19 | struct iphdr *iph = (*pskb)->nh.iph; | ||
20 | struct rtable *rt; | ||
21 | struct flowi fl = {}; | ||
22 | struct dst_entry *odst; | ||
23 | unsigned int hh_len; | ||
24 | |||
25 | /* some non-standard hacks like ipt_REJECT.c:send_reset() can cause | ||
26 | * packets with foreign saddr to appear on the NF_IP_LOCAL_OUT hook. | ||
27 | */ | ||
28 | if (inet_addr_type(iph->saddr) == RTN_LOCAL) { | ||
29 | fl.nl_u.ip4_u.daddr = iph->daddr; | ||
30 | fl.nl_u.ip4_u.saddr = iph->saddr; | ||
31 | fl.nl_u.ip4_u.tos = RT_TOS(iph->tos); | ||
32 | fl.oif = (*pskb)->sk ? (*pskb)->sk->sk_bound_dev_if : 0; | ||
33 | #ifdef CONFIG_IP_ROUTE_FWMARK | ||
34 | fl.nl_u.ip4_u.fwmark = (*pskb)->nfmark; | ||
35 | #endif | ||
36 | fl.proto = iph->protocol; | ||
37 | if (ip_route_output_key(&rt, &fl) != 0) | ||
38 | return -1; | ||
39 | |||
40 | /* Drop old route. */ | ||
41 | dst_release((*pskb)->dst); | ||
42 | (*pskb)->dst = &rt->u.dst; | ||
43 | } else { | ||
44 | /* non-local src, find valid iif to satisfy | ||
45 | * rp-filter when calling ip_route_input. */ | ||
46 | fl.nl_u.ip4_u.daddr = iph->saddr; | ||
47 | if (ip_route_output_key(&rt, &fl) != 0) | ||
48 | return -1; | ||
49 | |||
50 | odst = (*pskb)->dst; | ||
51 | if (ip_route_input(*pskb, iph->daddr, iph->saddr, | ||
52 | RT_TOS(iph->tos), rt->u.dst.dev) != 0) { | ||
53 | dst_release(&rt->u.dst); | ||
54 | return -1; | ||
55 | } | ||
56 | dst_release(&rt->u.dst); | ||
57 | dst_release(odst); | ||
58 | } | ||
59 | |||
60 | if ((*pskb)->dst->error) | ||
61 | return -1; | ||
62 | |||
63 | /* Change in oif may mean change in hh_len. */ | ||
64 | hh_len = (*pskb)->dst->dev->hard_header_len; | ||
65 | if (skb_headroom(*pskb) < hh_len) { | ||
66 | struct sk_buff *nskb; | ||
67 | |||
68 | nskb = skb_realloc_headroom(*pskb, hh_len); | ||
69 | if (!nskb) | ||
70 | return -1; | ||
71 | if ((*pskb)->sk) | ||
72 | skb_set_owner_w(nskb, (*pskb)->sk); | ||
73 | kfree_skb(*pskb); | ||
74 | *pskb = nskb; | ||
75 | } | ||
76 | |||
77 | return 0; | ||
78 | } | ||
79 | EXPORT_SYMBOL(ip_route_me_harder); | ||
80 | |||
81 | /* | ||
82 | * Extra routing may needed on local out, as the QUEUE target never | ||
83 | * returns control to the table. | ||
84 | */ | ||
85 | |||
86 | struct ip_rt_info { | ||
87 | u_int32_t daddr; | ||
88 | u_int32_t saddr; | ||
89 | u_int8_t tos; | ||
90 | }; | ||
91 | |||
92 | static void queue_save(const struct sk_buff *skb, struct nf_info *info) | ||
93 | { | ||
94 | struct ip_rt_info *rt_info = nf_info_reroute(info); | ||
95 | |||
96 | if (info->hook == NF_IP_LOCAL_OUT) { | ||
97 | const struct iphdr *iph = skb->nh.iph; | ||
98 | |||
99 | rt_info->tos = iph->tos; | ||
100 | rt_info->daddr = iph->daddr; | ||
101 | rt_info->saddr = iph->saddr; | ||
102 | } | ||
103 | } | ||
104 | |||
105 | static int queue_reroute(struct sk_buff **pskb, const struct nf_info *info) | ||
106 | { | ||
107 | const struct ip_rt_info *rt_info = nf_info_reroute(info); | ||
108 | |||
109 | if (info->hook == NF_IP_LOCAL_OUT) { | ||
110 | struct iphdr *iph = (*pskb)->nh.iph; | ||
111 | |||
112 | if (!(iph->tos == rt_info->tos | ||
113 | && iph->daddr == rt_info->daddr | ||
114 | && iph->saddr == rt_info->saddr)) | ||
115 | return ip_route_me_harder(pskb); | ||
116 | } | ||
117 | return 0; | ||
118 | } | ||
119 | |||
120 | static struct nf_queue_rerouter ip_reroute = { | ||
121 | .rer_size = sizeof(struct ip_rt_info), | ||
122 | .save = queue_save, | ||
123 | .reroute = queue_reroute, | ||
124 | }; | ||
125 | |||
126 | static int init(void) | ||
127 | { | ||
128 | return nf_register_queue_rerouter(PF_INET, &ip_reroute); | ||
129 | } | ||
130 | |||
131 | static void fini(void) | ||
132 | { | ||
133 | nf_unregister_queue_rerouter(PF_INET); | ||
134 | } | ||
135 | |||
136 | module_init(init); | ||
137 | module_exit(fini); | ||
138 | |||
139 | #endif /* CONFIG_NETFILTER */ | ||
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 46d4cb1c06f0..e046f5521814 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig | |||
@@ -40,6 +40,16 @@ config IP_NF_CONNTRACK_MARK | |||
40 | of packets, but this mark value is kept in the conntrack session | 40 | of packets, but this mark value is kept in the conntrack session |
41 | instead of the individual packets. | 41 | instead of the individual packets. |
42 | 42 | ||
43 | config IP_NF_CONNTRACK_EVENTS | ||
44 | bool "Connection tracking events" | ||
45 | depends on IP_NF_CONNTRACK | ||
46 | help | ||
47 | If this option is enabled, the connection tracking code will | ||
48 | provide a notifier chain that can be used by other kernel code | ||
49 | to get notified about changes in the connection tracking state. | ||
50 | |||
51 | IF unsure, say `N'. | ||
52 | |||
43 | config IP_NF_CT_PROTO_SCTP | 53 | config IP_NF_CT_PROTO_SCTP |
44 | tristate 'SCTP protocol connection tracking support (EXPERIMENTAL)' | 54 | tristate 'SCTP protocol connection tracking support (EXPERIMENTAL)' |
45 | depends on IP_NF_CONNTRACK && EXPERIMENTAL | 55 | depends on IP_NF_CONNTRACK && EXPERIMENTAL |
@@ -100,11 +110,15 @@ config IP_NF_AMANDA | |||
100 | To compile it as a module, choose M here. If unsure, say Y. | 110 | To compile it as a module, choose M here. If unsure, say Y. |
101 | 111 | ||
102 | config IP_NF_QUEUE | 112 | config IP_NF_QUEUE |
103 | tristate "Userspace queueing via NETLINK" | 113 | tristate "IP Userspace queueing via NETLINK (OBSOLETE)" |
104 | help | 114 | help |
105 | Netfilter has the ability to queue packets to user space: the | 115 | Netfilter has the ability to queue packets to user space: the |
106 | netlink device can be used to access them using this driver. | 116 | netlink device can be used to access them using this driver. |
107 | 117 | ||
118 | This option enables the old IPv4-only "ip_queue" implementation | ||
119 | which has been obsoleted by the new "nfnetlink_queue" code (see | ||
120 | CONFIG_NETFILTER_NETLINK_QUEUE). | ||
121 | |||
108 | To compile it as a module, choose M here. If unsure, say N. | 122 | To compile it as a module, choose M here. If unsure, say N. |
109 | 123 | ||
110 | config IP_NF_IPTABLES | 124 | config IP_NF_IPTABLES |
@@ -340,6 +354,17 @@ config IP_NF_MATCH_SCTP | |||
340 | If you want to compile it as a module, say M here and read | 354 | If you want to compile it as a module, say M here and read |
341 | <file:Documentation/modules.txt>. If unsure, say `N'. | 355 | <file:Documentation/modules.txt>. If unsure, say `N'. |
342 | 356 | ||
357 | config IP_NF_MATCH_DCCP | ||
358 | tristate 'DCCP protocol match support' | ||
359 | depends on IP_NF_IPTABLES | ||
360 | help | ||
361 | With this option enabled, you will be able to use the iptables | ||
362 | `dccp' match in order to match on DCCP source/destination ports | ||
363 | and DCCP flags. | ||
364 | |||
365 | If you want to compile it as a module, say M here and read | ||
366 | <file:Documentation/modules.txt>. If unsure, say `N'. | ||
367 | |||
343 | config IP_NF_MATCH_COMMENT | 368 | config IP_NF_MATCH_COMMENT |
344 | tristate 'comment match support' | 369 | tristate 'comment match support' |
345 | depends on IP_NF_IPTABLES | 370 | depends on IP_NF_IPTABLES |
@@ -361,6 +386,16 @@ config IP_NF_MATCH_CONNMARK | |||
361 | <file:Documentation/modules.txt>. The module will be called | 386 | <file:Documentation/modules.txt>. The module will be called |
362 | ipt_connmark.o. If unsure, say `N'. | 387 | ipt_connmark.o. If unsure, say `N'. |
363 | 388 | ||
389 | config IP_NF_MATCH_CONNBYTES | ||
390 | tristate 'Connection byte/packet counter match support' | ||
391 | depends on IP_NF_CT_ACCT && IP_NF_IPTABLES | ||
392 | help | ||
393 | This option adds a `connbytes' match, which allows you to match the | ||
394 | number of bytes and/or packets for each direction within a connection. | ||
395 | |||
396 | If you want to compile it as a module, say M here and read | ||
397 | <file:Documentation/modules.txt>. If unsure, say `N'. | ||
398 | |||
364 | config IP_NF_MATCH_HASHLIMIT | 399 | config IP_NF_MATCH_HASHLIMIT |
365 | tristate 'hashlimit match support' | 400 | tristate 'hashlimit match support' |
366 | depends on IP_NF_IPTABLES | 401 | depends on IP_NF_IPTABLES |
@@ -375,6 +410,19 @@ config IP_NF_MATCH_HASHLIMIT | |||
375 | destination IP' or `500pps from any given source IP' with a single | 410 | destination IP' or `500pps from any given source IP' with a single |
376 | IPtables rule. | 411 | IPtables rule. |
377 | 412 | ||
413 | config IP_NF_MATCH_STRING | ||
414 | tristate 'string match support' | ||
415 | depends on IP_NF_IPTABLES | ||
416 | select TEXTSEARCH | ||
417 | select TEXTSEARCH_KMP | ||
418 | select TEXTSEARCH_BM | ||
419 | select TEXTSEARCH_FSM | ||
420 | help | ||
421 | This option adds a `string' match, which allows you to look for | ||
422 | pattern matchings in packets. | ||
423 | |||
424 | To compile it as a module, choose M here. If unsure, say N. | ||
425 | |||
378 | # `filter', generic and specific targets | 426 | # `filter', generic and specific targets |
379 | config IP_NF_FILTER | 427 | config IP_NF_FILTER |
380 | tristate "Packet filtering" | 428 | tristate "Packet filtering" |
@@ -616,6 +664,20 @@ config IP_NF_TARGET_CLASSIFY | |||
616 | 664 | ||
617 | To compile it as a module, choose M here. If unsure, say N. | 665 | To compile it as a module, choose M here. If unsure, say N. |
618 | 666 | ||
667 | config IP_NF_TARGET_TTL | ||
668 | tristate 'TTL target support' | ||
669 | depends on IP_NF_MANGLE | ||
670 | help | ||
671 | This option adds a `TTL' target, which enables the user to modify | ||
672 | the TTL value of the IP header. | ||
673 | |||
674 | While it is safe to decrement/lower the TTL, this target also enables | ||
675 | functionality to increment and set the TTL value of the IP header to | ||
676 | arbitrary values. This is EXTREMELY DANGEROUS since you can easily | ||
677 | create immortal packets that loop forever on the network. | ||
678 | |||
679 | To compile it as a module, choose M here. If unsure, say N. | ||
680 | |||
619 | config IP_NF_TARGET_CONNMARK | 681 | config IP_NF_TARGET_CONNMARK |
620 | tristate 'CONNMARK target support' | 682 | tristate 'CONNMARK target support' |
621 | depends on IP_NF_CONNTRACK_MARK && IP_NF_MANGLE | 683 | depends on IP_NF_CONNTRACK_MARK && IP_NF_MANGLE |
@@ -692,5 +754,11 @@ config IP_NF_ARP_MANGLE | |||
692 | Allows altering the ARP packet payload: source and destination | 754 | Allows altering the ARP packet payload: source and destination |
693 | hardware and network addresses. | 755 | hardware and network addresses. |
694 | 756 | ||
757 | config IP_NF_CONNTRACK_NETLINK | ||
758 | tristate 'Connection tracking netlink interface' | ||
759 | depends on IP_NF_CONNTRACK && NETFILTER_NETLINK | ||
760 | help | ||
761 | This option enables support for a netlink-based userspace interface | ||
762 | |||
695 | endmenu | 763 | endmenu |
696 | 764 | ||
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 45796d5924dd..a7bd38f50522 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile | |||
@@ -9,6 +9,10 @@ iptable_nat-objs := ip_nat_standalone.o ip_nat_rule.o ip_nat_core.o ip_nat_helpe | |||
9 | # connection tracking | 9 | # connection tracking |
10 | obj-$(CONFIG_IP_NF_CONNTRACK) += ip_conntrack.o | 10 | obj-$(CONFIG_IP_NF_CONNTRACK) += ip_conntrack.o |
11 | 11 | ||
12 | # conntrack netlink interface | ||
13 | obj-$(CONFIG_IP_NF_CONNTRACK_NETLINK) += ip_conntrack_netlink.o | ||
14 | |||
15 | |||
12 | # SCTP protocol connection tracking | 16 | # SCTP protocol connection tracking |
13 | obj-$(CONFIG_IP_NF_CT_PROTO_SCTP) += ip_conntrack_proto_sctp.o | 17 | obj-$(CONFIG_IP_NF_CT_PROTO_SCTP) += ip_conntrack_proto_sctp.o |
14 | 18 | ||
@@ -38,6 +42,7 @@ obj-$(CONFIG_IP_NF_MATCH_HELPER) += ipt_helper.o | |||
38 | obj-$(CONFIG_IP_NF_MATCH_LIMIT) += ipt_limit.o | 42 | obj-$(CONFIG_IP_NF_MATCH_LIMIT) += ipt_limit.o |
39 | obj-$(CONFIG_IP_NF_MATCH_HASHLIMIT) += ipt_hashlimit.o | 43 | obj-$(CONFIG_IP_NF_MATCH_HASHLIMIT) += ipt_hashlimit.o |
40 | obj-$(CONFIG_IP_NF_MATCH_SCTP) += ipt_sctp.o | 44 | obj-$(CONFIG_IP_NF_MATCH_SCTP) += ipt_sctp.o |
45 | obj-$(CONFIG_IP_NF_MATCH_DCCP) += ipt_dccp.o | ||
41 | obj-$(CONFIG_IP_NF_MATCH_MARK) += ipt_mark.o | 46 | obj-$(CONFIG_IP_NF_MATCH_MARK) += ipt_mark.o |
42 | obj-$(CONFIG_IP_NF_MATCH_MAC) += ipt_mac.o | 47 | obj-$(CONFIG_IP_NF_MATCH_MAC) += ipt_mac.o |
43 | obj-$(CONFIG_IP_NF_MATCH_IPRANGE) += ipt_iprange.o | 48 | obj-$(CONFIG_IP_NF_MATCH_IPRANGE) += ipt_iprange.o |
@@ -54,11 +59,13 @@ obj-$(CONFIG_IP_NF_MATCH_TTL) += ipt_ttl.o | |||
54 | obj-$(CONFIG_IP_NF_MATCH_STATE) += ipt_state.o | 59 | obj-$(CONFIG_IP_NF_MATCH_STATE) += ipt_state.o |
55 | obj-$(CONFIG_IP_NF_MATCH_CONNMARK) += ipt_connmark.o | 60 | obj-$(CONFIG_IP_NF_MATCH_CONNMARK) += ipt_connmark.o |
56 | obj-$(CONFIG_IP_NF_MATCH_CONNTRACK) += ipt_conntrack.o | 61 | obj-$(CONFIG_IP_NF_MATCH_CONNTRACK) += ipt_conntrack.o |
62 | obj-$(CONFIG_IP_NF_MATCH_CONNBYTES) += ipt_connbytes.o | ||
57 | obj-$(CONFIG_IP_NF_MATCH_TCPMSS) += ipt_tcpmss.o | 63 | obj-$(CONFIG_IP_NF_MATCH_TCPMSS) += ipt_tcpmss.o |
58 | obj-$(CONFIG_IP_NF_MATCH_REALM) += ipt_realm.o | 64 | obj-$(CONFIG_IP_NF_MATCH_REALM) += ipt_realm.o |
59 | obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o | 65 | obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o |
60 | obj-$(CONFIG_IP_NF_MATCH_PHYSDEV) += ipt_physdev.o | 66 | obj-$(CONFIG_IP_NF_MATCH_PHYSDEV) += ipt_physdev.o |
61 | obj-$(CONFIG_IP_NF_MATCH_COMMENT) += ipt_comment.o | 67 | obj-$(CONFIG_IP_NF_MATCH_COMMENT) += ipt_comment.o |
68 | obj-$(CONFIG_IP_NF_MATCH_STRING) += ipt_string.o | ||
62 | 69 | ||
63 | # targets | 70 | # targets |
64 | obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o | 71 | obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o |
@@ -78,6 +85,7 @@ obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o | |||
78 | obj-$(CONFIG_IP_NF_TARGET_TCPMSS) += ipt_TCPMSS.o | 85 | obj-$(CONFIG_IP_NF_TARGET_TCPMSS) += ipt_TCPMSS.o |
79 | obj-$(CONFIG_IP_NF_TARGET_NOTRACK) += ipt_NOTRACK.o | 86 | obj-$(CONFIG_IP_NF_TARGET_NOTRACK) += ipt_NOTRACK.o |
80 | obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o | 87 | obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o |
88 | obj-$(CONFIG_IP_NF_TARGET_TTL) += ipt_TTL.o | ||
81 | 89 | ||
82 | # generic ARP tables | 90 | # generic ARP tables |
83 | obj-$(CONFIG_IP_NF_ARPTABLES) += arp_tables.o | 91 | obj-$(CONFIG_IP_NF_ARPTABLES) += arp_tables.o |
@@ -87,3 +95,4 @@ obj-$(CONFIG_IP_NF_ARP_MANGLE) += arpt_mangle.o | |||
87 | obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o | 95 | obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o |
88 | 96 | ||
89 | obj-$(CONFIG_IP_NF_QUEUE) += ip_queue.o | 97 | obj-$(CONFIG_IP_NF_QUEUE) += ip_queue.o |
98 | obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += ipt_NFQUEUE.o | ||
diff --git a/net/ipv4/netfilter/ip_conntrack_amanda.c b/net/ipv4/netfilter/ip_conntrack_amanda.c index 01e1b58322a9..be4c9eb3243f 100644 --- a/net/ipv4/netfilter/ip_conntrack_amanda.c +++ b/net/ipv4/netfilter/ip_conntrack_amanda.c | |||
@@ -40,7 +40,7 @@ MODULE_PARM_DESC(master_timeout, "timeout for the master connection"); | |||
40 | static char *conns[] = { "DATA ", "MESG ", "INDEX " }; | 40 | static char *conns[] = { "DATA ", "MESG ", "INDEX " }; |
41 | 41 | ||
42 | /* This is slow, but it's simple. --RR */ | 42 | /* This is slow, but it's simple. --RR */ |
43 | static char amanda_buffer[65536]; | 43 | static char *amanda_buffer; |
44 | static DEFINE_SPINLOCK(amanda_buffer_lock); | 44 | static DEFINE_SPINLOCK(amanda_buffer_lock); |
45 | 45 | ||
46 | unsigned int (*ip_nat_amanda_hook)(struct sk_buff **pskb, | 46 | unsigned int (*ip_nat_amanda_hook)(struct sk_buff **pskb, |
@@ -153,11 +153,25 @@ static struct ip_conntrack_helper amanda_helper = { | |||
153 | static void __exit fini(void) | 153 | static void __exit fini(void) |
154 | { | 154 | { |
155 | ip_conntrack_helper_unregister(&amanda_helper); | 155 | ip_conntrack_helper_unregister(&amanda_helper); |
156 | kfree(amanda_buffer); | ||
156 | } | 157 | } |
157 | 158 | ||
158 | static int __init init(void) | 159 | static int __init init(void) |
159 | { | 160 | { |
160 | return ip_conntrack_helper_register(&amanda_helper); | 161 | int ret; |
162 | |||
163 | amanda_buffer = kmalloc(65536, GFP_KERNEL); | ||
164 | if (!amanda_buffer) | ||
165 | return -ENOMEM; | ||
166 | |||
167 | ret = ip_conntrack_helper_register(&amanda_helper); | ||
168 | if (ret < 0) { | ||
169 | kfree(amanda_buffer); | ||
170 | return ret; | ||
171 | } | ||
172 | return 0; | ||
173 | |||
174 | |||
161 | } | 175 | } |
162 | 176 | ||
163 | module_init(init); | 177 | module_init(init); |
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index a7f0c821a9b2..a0648600190e 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c | |||
@@ -37,6 +37,7 @@ | |||
37 | #include <linux/err.h> | 37 | #include <linux/err.h> |
38 | #include <linux/percpu.h> | 38 | #include <linux/percpu.h> |
39 | #include <linux/moduleparam.h> | 39 | #include <linux/moduleparam.h> |
40 | #include <linux/notifier.h> | ||
40 | 41 | ||
41 | /* ip_conntrack_lock protects the main hash table, protocol/helper/expected | 42 | /* ip_conntrack_lock protects the main hash table, protocol/helper/expected |
42 | registrations, conntrack timers*/ | 43 | registrations, conntrack timers*/ |
@@ -49,7 +50,7 @@ | |||
49 | #include <linux/netfilter_ipv4/ip_conntrack_core.h> | 50 | #include <linux/netfilter_ipv4/ip_conntrack_core.h> |
50 | #include <linux/netfilter_ipv4/listhelp.h> | 51 | #include <linux/netfilter_ipv4/listhelp.h> |
51 | 52 | ||
52 | #define IP_CONNTRACK_VERSION "2.1" | 53 | #define IP_CONNTRACK_VERSION "2.3" |
53 | 54 | ||
54 | #if 0 | 55 | #if 0 |
55 | #define DEBUGP printk | 56 | #define DEBUGP printk |
@@ -69,22 +70,81 @@ static LIST_HEAD(helpers); | |||
69 | unsigned int ip_conntrack_htable_size = 0; | 70 | unsigned int ip_conntrack_htable_size = 0; |
70 | int ip_conntrack_max; | 71 | int ip_conntrack_max; |
71 | struct list_head *ip_conntrack_hash; | 72 | struct list_head *ip_conntrack_hash; |
72 | static kmem_cache_t *ip_conntrack_cachep; | 73 | static kmem_cache_t *ip_conntrack_cachep __read_mostly; |
73 | static kmem_cache_t *ip_conntrack_expect_cachep; | 74 | static kmem_cache_t *ip_conntrack_expect_cachep __read_mostly; |
74 | struct ip_conntrack ip_conntrack_untracked; | 75 | struct ip_conntrack ip_conntrack_untracked; |
75 | unsigned int ip_ct_log_invalid; | 76 | unsigned int ip_ct_log_invalid; |
76 | static LIST_HEAD(unconfirmed); | 77 | static LIST_HEAD(unconfirmed); |
77 | static int ip_conntrack_vmalloc; | 78 | static int ip_conntrack_vmalloc; |
78 | 79 | ||
79 | DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat); | 80 | static unsigned int ip_conntrack_next_id = 1; |
81 | static unsigned int ip_conntrack_expect_next_id = 1; | ||
82 | #ifdef CONFIG_IP_NF_CONNTRACK_EVENTS | ||
83 | struct notifier_block *ip_conntrack_chain; | ||
84 | struct notifier_block *ip_conntrack_expect_chain; | ||
85 | |||
86 | DEFINE_PER_CPU(struct ip_conntrack_ecache, ip_conntrack_ecache); | ||
80 | 87 | ||
81 | void | 88 | /* deliver cached events and clear cache entry - must be called with locally |
82 | ip_conntrack_put(struct ip_conntrack *ct) | 89 | * disabled softirqs */ |
90 | static inline void | ||
91 | __ip_ct_deliver_cached_events(struct ip_conntrack_ecache *ecache) | ||
83 | { | 92 | { |
84 | IP_NF_ASSERT(ct); | 93 | DEBUGP("ecache: delivering events for %p\n", ecache->ct); |
85 | nf_conntrack_put(&ct->ct_general); | 94 | if (is_confirmed(ecache->ct) && !is_dying(ecache->ct) && ecache->events) |
95 | notifier_call_chain(&ip_conntrack_chain, ecache->events, | ||
96 | ecache->ct); | ||
97 | ecache->events = 0; | ||
98 | ip_conntrack_put(ecache->ct); | ||
99 | ecache->ct = NULL; | ||
86 | } | 100 | } |
87 | 101 | ||
102 | /* Deliver all cached events for a particular conntrack. This is called | ||
103 | * by code prior to async packet handling or freeing the skb */ | ||
104 | void ip_ct_deliver_cached_events(const struct ip_conntrack *ct) | ||
105 | { | ||
106 | struct ip_conntrack_ecache *ecache; | ||
107 | |||
108 | local_bh_disable(); | ||
109 | ecache = &__get_cpu_var(ip_conntrack_ecache); | ||
110 | if (ecache->ct == ct) | ||
111 | __ip_ct_deliver_cached_events(ecache); | ||
112 | local_bh_enable(); | ||
113 | } | ||
114 | |||
115 | void __ip_ct_event_cache_init(struct ip_conntrack *ct) | ||
116 | { | ||
117 | struct ip_conntrack_ecache *ecache; | ||
118 | |||
119 | /* take care of delivering potentially old events */ | ||
120 | ecache = &__get_cpu_var(ip_conntrack_ecache); | ||
121 | BUG_ON(ecache->ct == ct); | ||
122 | if (ecache->ct) | ||
123 | __ip_ct_deliver_cached_events(ecache); | ||
124 | /* initialize for this conntrack/packet */ | ||
125 | ecache->ct = ct; | ||
126 | nf_conntrack_get(&ct->ct_general); | ||
127 | } | ||
128 | |||
129 | /* flush the event cache - touches other CPU's data and must not be called while | ||
130 | * packets are still passing through the code */ | ||
131 | static void ip_ct_event_cache_flush(void) | ||
132 | { | ||
133 | struct ip_conntrack_ecache *ecache; | ||
134 | int cpu; | ||
135 | |||
136 | for_each_cpu(cpu) { | ||
137 | ecache = &per_cpu(ip_conntrack_ecache, cpu); | ||
138 | if (ecache->ct) | ||
139 | ip_conntrack_put(ecache->ct); | ||
140 | } | ||
141 | } | ||
142 | #else | ||
143 | static inline void ip_ct_event_cache_flush(void) {} | ||
144 | #endif /* CONFIG_IP_NF_CONNTRACK_EVENTS */ | ||
145 | |||
146 | DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat); | ||
147 | |||
88 | static int ip_conntrack_hash_rnd_initted; | 148 | static int ip_conntrack_hash_rnd_initted; |
89 | static unsigned int ip_conntrack_hash_rnd; | 149 | static unsigned int ip_conntrack_hash_rnd; |
90 | 150 | ||
@@ -144,6 +204,13 @@ static void unlink_expect(struct ip_conntrack_expect *exp) | |||
144 | list_del(&exp->list); | 204 | list_del(&exp->list); |
145 | CONNTRACK_STAT_INC(expect_delete); | 205 | CONNTRACK_STAT_INC(expect_delete); |
146 | exp->master->expecting--; | 206 | exp->master->expecting--; |
207 | ip_conntrack_expect_put(exp); | ||
208 | } | ||
209 | |||
210 | void __ip_ct_expect_unlink_destroy(struct ip_conntrack_expect *exp) | ||
211 | { | ||
212 | unlink_expect(exp); | ||
213 | ip_conntrack_expect_put(exp); | ||
147 | } | 214 | } |
148 | 215 | ||
149 | static void expectation_timed_out(unsigned long ul_expect) | 216 | static void expectation_timed_out(unsigned long ul_expect) |
@@ -156,6 +223,33 @@ static void expectation_timed_out(unsigned long ul_expect) | |||
156 | ip_conntrack_expect_put(exp); | 223 | ip_conntrack_expect_put(exp); |
157 | } | 224 | } |
158 | 225 | ||
226 | struct ip_conntrack_expect * | ||
227 | __ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple) | ||
228 | { | ||
229 | struct ip_conntrack_expect *i; | ||
230 | |||
231 | list_for_each_entry(i, &ip_conntrack_expect_list, list) { | ||
232 | if (ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) { | ||
233 | atomic_inc(&i->use); | ||
234 | return i; | ||
235 | } | ||
236 | } | ||
237 | return NULL; | ||
238 | } | ||
239 | |||
240 | /* Just find a expectation corresponding to a tuple. */ | ||
241 | struct ip_conntrack_expect * | ||
242 | ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple) | ||
243 | { | ||
244 | struct ip_conntrack_expect *i; | ||
245 | |||
246 | read_lock_bh(&ip_conntrack_lock); | ||
247 | i = __ip_conntrack_expect_find(tuple); | ||
248 | read_unlock_bh(&ip_conntrack_lock); | ||
249 | |||
250 | return i; | ||
251 | } | ||
252 | |||
159 | /* If an expectation for this connection is found, it gets delete from | 253 | /* If an expectation for this connection is found, it gets delete from |
160 | * global list then returned. */ | 254 | * global list then returned. */ |
161 | static struct ip_conntrack_expect * | 255 | static struct ip_conntrack_expect * |
@@ -180,7 +274,7 @@ find_expectation(const struct ip_conntrack_tuple *tuple) | |||
180 | } | 274 | } |
181 | 275 | ||
182 | /* delete all expectations for this conntrack */ | 276 | /* delete all expectations for this conntrack */ |
183 | static void remove_expectations(struct ip_conntrack *ct) | 277 | void ip_ct_remove_expectations(struct ip_conntrack *ct) |
184 | { | 278 | { |
185 | struct ip_conntrack_expect *i, *tmp; | 279 | struct ip_conntrack_expect *i, *tmp; |
186 | 280 | ||
@@ -210,7 +304,7 @@ clean_from_lists(struct ip_conntrack *ct) | |||
210 | LIST_DELETE(&ip_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]); | 304 | LIST_DELETE(&ip_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]); |
211 | 305 | ||
212 | /* Destroy all pending expectations */ | 306 | /* Destroy all pending expectations */ |
213 | remove_expectations(ct); | 307 | ip_ct_remove_expectations(ct); |
214 | } | 308 | } |
215 | 309 | ||
216 | static void | 310 | static void |
@@ -223,10 +317,13 @@ destroy_conntrack(struct nf_conntrack *nfct) | |||
223 | IP_NF_ASSERT(atomic_read(&nfct->use) == 0); | 317 | IP_NF_ASSERT(atomic_read(&nfct->use) == 0); |
224 | IP_NF_ASSERT(!timer_pending(&ct->timeout)); | 318 | IP_NF_ASSERT(!timer_pending(&ct->timeout)); |
225 | 319 | ||
320 | ip_conntrack_event(IPCT_DESTROY, ct); | ||
321 | set_bit(IPS_DYING_BIT, &ct->status); | ||
322 | |||
226 | /* To make sure we don't get any weird locking issues here: | 323 | /* To make sure we don't get any weird locking issues here: |
227 | * destroy_conntrack() MUST NOT be called with a write lock | 324 | * destroy_conntrack() MUST NOT be called with a write lock |
228 | * to ip_conntrack_lock!!! -HW */ | 325 | * to ip_conntrack_lock!!! -HW */ |
229 | proto = ip_ct_find_proto(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum); | 326 | proto = __ip_conntrack_proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum); |
230 | if (proto && proto->destroy) | 327 | if (proto && proto->destroy) |
231 | proto->destroy(ct); | 328 | proto->destroy(ct); |
232 | 329 | ||
@@ -238,7 +335,7 @@ destroy_conntrack(struct nf_conntrack *nfct) | |||
238 | * except TFTP can create an expectation on the first packet, | 335 | * except TFTP can create an expectation on the first packet, |
239 | * before connection is in the list, so we need to clean here, | 336 | * before connection is in the list, so we need to clean here, |
240 | * too. */ | 337 | * too. */ |
241 | remove_expectations(ct); | 338 | ip_ct_remove_expectations(ct); |
242 | 339 | ||
243 | /* We overload first tuple to link into unconfirmed list. */ | 340 | /* We overload first tuple to link into unconfirmed list. */ |
244 | if (!is_confirmed(ct)) { | 341 | if (!is_confirmed(ct)) { |
@@ -253,8 +350,7 @@ destroy_conntrack(struct nf_conntrack *nfct) | |||
253 | ip_conntrack_put(ct->master); | 350 | ip_conntrack_put(ct->master); |
254 | 351 | ||
255 | DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct); | 352 | DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct); |
256 | kmem_cache_free(ip_conntrack_cachep, ct); | 353 | ip_conntrack_free(ct); |
257 | atomic_dec(&ip_conntrack_count); | ||
258 | } | 354 | } |
259 | 355 | ||
260 | static void death_by_timeout(unsigned long ul_conntrack) | 356 | static void death_by_timeout(unsigned long ul_conntrack) |
@@ -280,7 +376,7 @@ conntrack_tuple_cmp(const struct ip_conntrack_tuple_hash *i, | |||
280 | && ip_ct_tuple_equal(tuple, &i->tuple); | 376 | && ip_ct_tuple_equal(tuple, &i->tuple); |
281 | } | 377 | } |
282 | 378 | ||
283 | static struct ip_conntrack_tuple_hash * | 379 | struct ip_conntrack_tuple_hash * |
284 | __ip_conntrack_find(const struct ip_conntrack_tuple *tuple, | 380 | __ip_conntrack_find(const struct ip_conntrack_tuple *tuple, |
285 | const struct ip_conntrack *ignored_conntrack) | 381 | const struct ip_conntrack *ignored_conntrack) |
286 | { | 382 | { |
@@ -315,6 +411,29 @@ ip_conntrack_find_get(const struct ip_conntrack_tuple *tuple, | |||
315 | return h; | 411 | return h; |
316 | } | 412 | } |
317 | 413 | ||
414 | static void __ip_conntrack_hash_insert(struct ip_conntrack *ct, | ||
415 | unsigned int hash, | ||
416 | unsigned int repl_hash) | ||
417 | { | ||
418 | ct->id = ++ip_conntrack_next_id; | ||
419 | list_prepend(&ip_conntrack_hash[hash], | ||
420 | &ct->tuplehash[IP_CT_DIR_ORIGINAL].list); | ||
421 | list_prepend(&ip_conntrack_hash[repl_hash], | ||
422 | &ct->tuplehash[IP_CT_DIR_REPLY].list); | ||
423 | } | ||
424 | |||
425 | void ip_conntrack_hash_insert(struct ip_conntrack *ct) | ||
426 | { | ||
427 | unsigned int hash, repl_hash; | ||
428 | |||
429 | hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); | ||
430 | repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); | ||
431 | |||
432 | write_lock_bh(&ip_conntrack_lock); | ||
433 | __ip_conntrack_hash_insert(ct, hash, repl_hash); | ||
434 | write_unlock_bh(&ip_conntrack_lock); | ||
435 | } | ||
436 | |||
318 | /* Confirm a connection given skb; places it in hash table */ | 437 | /* Confirm a connection given skb; places it in hash table */ |
319 | int | 438 | int |
320 | __ip_conntrack_confirm(struct sk_buff **pskb) | 439 | __ip_conntrack_confirm(struct sk_buff **pskb) |
@@ -361,10 +480,7 @@ __ip_conntrack_confirm(struct sk_buff **pskb) | |||
361 | /* Remove from unconfirmed list */ | 480 | /* Remove from unconfirmed list */ |
362 | list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); | 481 | list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); |
363 | 482 | ||
364 | list_prepend(&ip_conntrack_hash[hash], | 483 | __ip_conntrack_hash_insert(ct, hash, repl_hash); |
365 | &ct->tuplehash[IP_CT_DIR_ORIGINAL]); | ||
366 | list_prepend(&ip_conntrack_hash[repl_hash], | ||
367 | &ct->tuplehash[IP_CT_DIR_REPLY]); | ||
368 | /* Timer relative to confirmation time, not original | 484 | /* Timer relative to confirmation time, not original |
369 | setting time, otherwise we'd get timer wrap in | 485 | setting time, otherwise we'd get timer wrap in |
370 | weird delay cases. */ | 486 | weird delay cases. */ |
@@ -374,6 +490,16 @@ __ip_conntrack_confirm(struct sk_buff **pskb) | |||
374 | set_bit(IPS_CONFIRMED_BIT, &ct->status); | 490 | set_bit(IPS_CONFIRMED_BIT, &ct->status); |
375 | CONNTRACK_STAT_INC(insert); | 491 | CONNTRACK_STAT_INC(insert); |
376 | write_unlock_bh(&ip_conntrack_lock); | 492 | write_unlock_bh(&ip_conntrack_lock); |
493 | if (ct->helper) | ||
494 | ip_conntrack_event_cache(IPCT_HELPER, *pskb); | ||
495 | #ifdef CONFIG_IP_NF_NAT_NEEDED | ||
496 | if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) || | ||
497 | test_bit(IPS_DST_NAT_DONE_BIT, &ct->status)) | ||
498 | ip_conntrack_event_cache(IPCT_NATINFO, *pskb); | ||
499 | #endif | ||
500 | ip_conntrack_event_cache(master_ct(ct) ? | ||
501 | IPCT_RELATED : IPCT_NEW, *pskb); | ||
502 | |||
377 | return NF_ACCEPT; | 503 | return NF_ACCEPT; |
378 | } | 504 | } |
379 | 505 | ||
@@ -438,34 +564,84 @@ static inline int helper_cmp(const struct ip_conntrack_helper *i, | |||
438 | return ip_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask); | 564 | return ip_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask); |
439 | } | 565 | } |
440 | 566 | ||
441 | static struct ip_conntrack_helper *ip_ct_find_helper(const struct ip_conntrack_tuple *tuple) | 567 | static struct ip_conntrack_helper * |
568 | __ip_conntrack_helper_find( const struct ip_conntrack_tuple *tuple) | ||
442 | { | 569 | { |
443 | return LIST_FIND(&helpers, helper_cmp, | 570 | return LIST_FIND(&helpers, helper_cmp, |
444 | struct ip_conntrack_helper *, | 571 | struct ip_conntrack_helper *, |
445 | tuple); | 572 | tuple); |
446 | } | 573 | } |
447 | 574 | ||
448 | /* Allocate a new conntrack: we return -ENOMEM if classification | 575 | struct ip_conntrack_helper * |
449 | failed due to stress. Otherwise it really is unclassifiable. */ | 576 | ip_conntrack_helper_find_get( const struct ip_conntrack_tuple *tuple) |
450 | static struct ip_conntrack_tuple_hash * | 577 | { |
451 | init_conntrack(const struct ip_conntrack_tuple *tuple, | 578 | struct ip_conntrack_helper *helper; |
452 | struct ip_conntrack_protocol *protocol, | 579 | |
453 | struct sk_buff *skb) | 580 | /* need ip_conntrack_lock to assure that helper exists until |
581 | * try_module_get() is called */ | ||
582 | read_lock_bh(&ip_conntrack_lock); | ||
583 | |||
584 | helper = __ip_conntrack_helper_find(tuple); | ||
585 | if (helper) { | ||
586 | /* need to increase module usage count to assure helper will | ||
587 | * not go away while the caller is e.g. busy putting a | ||
588 | * conntrack in the hash that uses the helper */ | ||
589 | if (!try_module_get(helper->me)) | ||
590 | helper = NULL; | ||
591 | } | ||
592 | |||
593 | read_unlock_bh(&ip_conntrack_lock); | ||
594 | |||
595 | return helper; | ||
596 | } | ||
597 | |||
598 | void ip_conntrack_helper_put(struct ip_conntrack_helper *helper) | ||
599 | { | ||
600 | module_put(helper->me); | ||
601 | } | ||
602 | |||
603 | struct ip_conntrack_protocol * | ||
604 | __ip_conntrack_proto_find(u_int8_t protocol) | ||
605 | { | ||
606 | return ip_ct_protos[protocol]; | ||
607 | } | ||
608 | |||
609 | /* this is guaranteed to always return a valid protocol helper, since | ||
610 | * it falls back to generic_protocol */ | ||
611 | struct ip_conntrack_protocol * | ||
612 | ip_conntrack_proto_find_get(u_int8_t protocol) | ||
613 | { | ||
614 | struct ip_conntrack_protocol *p; | ||
615 | |||
616 | preempt_disable(); | ||
617 | p = __ip_conntrack_proto_find(protocol); | ||
618 | if (p) { | ||
619 | if (!try_module_get(p->me)) | ||
620 | p = &ip_conntrack_generic_protocol; | ||
621 | } | ||
622 | preempt_enable(); | ||
623 | |||
624 | return p; | ||
625 | } | ||
626 | |||
627 | void ip_conntrack_proto_put(struct ip_conntrack_protocol *p) | ||
628 | { | ||
629 | module_put(p->me); | ||
630 | } | ||
631 | |||
632 | struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig, | ||
633 | struct ip_conntrack_tuple *repl) | ||
454 | { | 634 | { |
455 | struct ip_conntrack *conntrack; | 635 | struct ip_conntrack *conntrack; |
456 | struct ip_conntrack_tuple repl_tuple; | ||
457 | size_t hash; | ||
458 | struct ip_conntrack_expect *exp; | ||
459 | 636 | ||
460 | if (!ip_conntrack_hash_rnd_initted) { | 637 | if (!ip_conntrack_hash_rnd_initted) { |
461 | get_random_bytes(&ip_conntrack_hash_rnd, 4); | 638 | get_random_bytes(&ip_conntrack_hash_rnd, 4); |
462 | ip_conntrack_hash_rnd_initted = 1; | 639 | ip_conntrack_hash_rnd_initted = 1; |
463 | } | 640 | } |
464 | 641 | ||
465 | hash = hash_conntrack(tuple); | ||
466 | |||
467 | if (ip_conntrack_max | 642 | if (ip_conntrack_max |
468 | && atomic_read(&ip_conntrack_count) >= ip_conntrack_max) { | 643 | && atomic_read(&ip_conntrack_count) >= ip_conntrack_max) { |
644 | unsigned int hash = hash_conntrack(orig); | ||
469 | /* Try dropping from this hash chain. */ | 645 | /* Try dropping from this hash chain. */ |
470 | if (!early_drop(&ip_conntrack_hash[hash])) { | 646 | if (!early_drop(&ip_conntrack_hash[hash])) { |
471 | if (net_ratelimit()) | 647 | if (net_ratelimit()) |
@@ -476,11 +652,6 @@ init_conntrack(const struct ip_conntrack_tuple *tuple, | |||
476 | } | 652 | } |
477 | } | 653 | } |
478 | 654 | ||
479 | if (!ip_ct_invert_tuple(&repl_tuple, tuple, protocol)) { | ||
480 | DEBUGP("Can't invert tuple.\n"); | ||
481 | return NULL; | ||
482 | } | ||
483 | |||
484 | conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC); | 655 | conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC); |
485 | if (!conntrack) { | 656 | if (!conntrack) { |
486 | DEBUGP("Can't allocate conntrack.\n"); | 657 | DEBUGP("Can't allocate conntrack.\n"); |
@@ -490,17 +661,50 @@ init_conntrack(const struct ip_conntrack_tuple *tuple, | |||
490 | memset(conntrack, 0, sizeof(*conntrack)); | 661 | memset(conntrack, 0, sizeof(*conntrack)); |
491 | atomic_set(&conntrack->ct_general.use, 1); | 662 | atomic_set(&conntrack->ct_general.use, 1); |
492 | conntrack->ct_general.destroy = destroy_conntrack; | 663 | conntrack->ct_general.destroy = destroy_conntrack; |
493 | conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *tuple; | 664 | conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig; |
494 | conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = repl_tuple; | 665 | conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *repl; |
495 | if (!protocol->new(conntrack, skb)) { | ||
496 | kmem_cache_free(ip_conntrack_cachep, conntrack); | ||
497 | return NULL; | ||
498 | } | ||
499 | /* Don't set timer yet: wait for confirmation */ | 666 | /* Don't set timer yet: wait for confirmation */ |
500 | init_timer(&conntrack->timeout); | 667 | init_timer(&conntrack->timeout); |
501 | conntrack->timeout.data = (unsigned long)conntrack; | 668 | conntrack->timeout.data = (unsigned long)conntrack; |
502 | conntrack->timeout.function = death_by_timeout; | 669 | conntrack->timeout.function = death_by_timeout; |
503 | 670 | ||
671 | atomic_inc(&ip_conntrack_count); | ||
672 | |||
673 | return conntrack; | ||
674 | } | ||
675 | |||
676 | void | ||
677 | ip_conntrack_free(struct ip_conntrack *conntrack) | ||
678 | { | ||
679 | atomic_dec(&ip_conntrack_count); | ||
680 | kmem_cache_free(ip_conntrack_cachep, conntrack); | ||
681 | } | ||
682 | |||
683 | /* Allocate a new conntrack: we return -ENOMEM if classification | ||
684 | * failed due to stress. Otherwise it really is unclassifiable */ | ||
685 | static struct ip_conntrack_tuple_hash * | ||
686 | init_conntrack(struct ip_conntrack_tuple *tuple, | ||
687 | struct ip_conntrack_protocol *protocol, | ||
688 | struct sk_buff *skb) | ||
689 | { | ||
690 | struct ip_conntrack *conntrack; | ||
691 | struct ip_conntrack_tuple repl_tuple; | ||
692 | struct ip_conntrack_expect *exp; | ||
693 | |||
694 | if (!ip_ct_invert_tuple(&repl_tuple, tuple, protocol)) { | ||
695 | DEBUGP("Can't invert tuple.\n"); | ||
696 | return NULL; | ||
697 | } | ||
698 | |||
699 | conntrack = ip_conntrack_alloc(tuple, &repl_tuple); | ||
700 | if (conntrack == NULL || IS_ERR(conntrack)) | ||
701 | return (struct ip_conntrack_tuple_hash *)conntrack; | ||
702 | |||
703 | if (!protocol->new(conntrack, skb)) { | ||
704 | ip_conntrack_free(conntrack); | ||
705 | return NULL; | ||
706 | } | ||
707 | |||
504 | write_lock_bh(&ip_conntrack_lock); | 708 | write_lock_bh(&ip_conntrack_lock); |
505 | exp = find_expectation(tuple); | 709 | exp = find_expectation(tuple); |
506 | 710 | ||
@@ -521,7 +725,7 @@ init_conntrack(const struct ip_conntrack_tuple *tuple, | |||
521 | nf_conntrack_get(&conntrack->master->ct_general); | 725 | nf_conntrack_get(&conntrack->master->ct_general); |
522 | CONNTRACK_STAT_INC(expect_new); | 726 | CONNTRACK_STAT_INC(expect_new); |
523 | } else { | 727 | } else { |
524 | conntrack->helper = ip_ct_find_helper(&repl_tuple); | 728 | conntrack->helper = __ip_conntrack_helper_find(&repl_tuple); |
525 | 729 | ||
526 | CONNTRACK_STAT_INC(new); | 730 | CONNTRACK_STAT_INC(new); |
527 | } | 731 | } |
@@ -529,7 +733,6 @@ init_conntrack(const struct ip_conntrack_tuple *tuple, | |||
529 | /* Overload tuple linked list to put us in unconfirmed list. */ | 733 | /* Overload tuple linked list to put us in unconfirmed list. */ |
530 | list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed); | 734 | list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed); |
531 | 735 | ||
532 | atomic_inc(&ip_conntrack_count); | ||
533 | write_unlock_bh(&ip_conntrack_lock); | 736 | write_unlock_bh(&ip_conntrack_lock); |
534 | 737 | ||
535 | if (exp) { | 738 | if (exp) { |
@@ -607,7 +810,7 @@ unsigned int ip_conntrack_in(unsigned int hooknum, | |||
607 | struct ip_conntrack *ct; | 810 | struct ip_conntrack *ct; |
608 | enum ip_conntrack_info ctinfo; | 811 | enum ip_conntrack_info ctinfo; |
609 | struct ip_conntrack_protocol *proto; | 812 | struct ip_conntrack_protocol *proto; |
610 | int set_reply; | 813 | int set_reply = 0; |
611 | int ret; | 814 | int ret; |
612 | 815 | ||
613 | /* Previously seen (loopback or untracked)? Ignore. */ | 816 | /* Previously seen (loopback or untracked)? Ignore. */ |
@@ -625,9 +828,6 @@ unsigned int ip_conntrack_in(unsigned int hooknum, | |||
625 | return NF_DROP; | 828 | return NF_DROP; |
626 | } | 829 | } |
627 | 830 | ||
628 | /* FIXME: Do this right please. --RR */ | ||
629 | (*pskb)->nfcache |= NFC_UNKNOWN; | ||
630 | |||
631 | /* Doesn't cover locally-generated broadcast, so not worth it. */ | 831 | /* Doesn't cover locally-generated broadcast, so not worth it. */ |
632 | #if 0 | 832 | #if 0 |
633 | /* Ignore broadcast: no `connection'. */ | 833 | /* Ignore broadcast: no `connection'. */ |
@@ -643,7 +843,7 @@ unsigned int ip_conntrack_in(unsigned int hooknum, | |||
643 | } | 843 | } |
644 | #endif | 844 | #endif |
645 | 845 | ||
646 | proto = ip_ct_find_proto((*pskb)->nh.iph->protocol); | 846 | proto = __ip_conntrack_proto_find((*pskb)->nh.iph->protocol); |
647 | 847 | ||
648 | /* It may be an special packet, error, unclean... | 848 | /* It may be an special packet, error, unclean... |
649 | * inverse of the return code tells to the netfilter | 849 | * inverse of the return code tells to the netfilter |
@@ -679,8 +879,8 @@ unsigned int ip_conntrack_in(unsigned int hooknum, | |||
679 | return -ret; | 879 | return -ret; |
680 | } | 880 | } |
681 | 881 | ||
682 | if (set_reply) | 882 | if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status)) |
683 | set_bit(IPS_SEEN_REPLY_BIT, &ct->status); | 883 | ip_conntrack_event_cache(IPCT_STATUS, *pskb); |
684 | 884 | ||
685 | return ret; | 885 | return ret; |
686 | } | 886 | } |
@@ -689,7 +889,7 @@ int invert_tuplepr(struct ip_conntrack_tuple *inverse, | |||
689 | const struct ip_conntrack_tuple *orig) | 889 | const struct ip_conntrack_tuple *orig) |
690 | { | 890 | { |
691 | return ip_ct_invert_tuple(inverse, orig, | 891 | return ip_ct_invert_tuple(inverse, orig, |
692 | ip_ct_find_proto(orig->dst.protonum)); | 892 | __ip_conntrack_proto_find(orig->dst.protonum)); |
693 | } | 893 | } |
694 | 894 | ||
695 | /* Would two expected things clash? */ | 895 | /* Would two expected things clash? */ |
@@ -769,6 +969,8 @@ static void ip_conntrack_expect_insert(struct ip_conntrack_expect *exp) | |||
769 | exp->timeout.expires = jiffies + exp->master->helper->timeout * HZ; | 969 | exp->timeout.expires = jiffies + exp->master->helper->timeout * HZ; |
770 | add_timer(&exp->timeout); | 970 | add_timer(&exp->timeout); |
771 | 971 | ||
972 | exp->id = ++ip_conntrack_expect_next_id; | ||
973 | atomic_inc(&exp->use); | ||
772 | CONNTRACK_STAT_INC(expect_create); | 974 | CONNTRACK_STAT_INC(expect_create); |
773 | } | 975 | } |
774 | 976 | ||
@@ -827,6 +1029,7 @@ int ip_conntrack_expect_related(struct ip_conntrack_expect *expect) | |||
827 | evict_oldest_expect(expect->master); | 1029 | evict_oldest_expect(expect->master); |
828 | 1030 | ||
829 | ip_conntrack_expect_insert(expect); | 1031 | ip_conntrack_expect_insert(expect); |
1032 | ip_conntrack_expect_event(IPEXP_NEW, expect); | ||
830 | ret = 0; | 1033 | ret = 0; |
831 | out: | 1034 | out: |
832 | write_unlock_bh(&ip_conntrack_lock); | 1035 | write_unlock_bh(&ip_conntrack_lock); |
@@ -847,7 +1050,7 @@ void ip_conntrack_alter_reply(struct ip_conntrack *conntrack, | |||
847 | 1050 | ||
848 | conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply; | 1051 | conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply; |
849 | if (!conntrack->master && conntrack->expecting == 0) | 1052 | if (!conntrack->master && conntrack->expecting == 0) |
850 | conntrack->helper = ip_ct_find_helper(newreply); | 1053 | conntrack->helper = __ip_conntrack_helper_find(newreply); |
851 | write_unlock_bh(&ip_conntrack_lock); | 1054 | write_unlock_bh(&ip_conntrack_lock); |
852 | } | 1055 | } |
853 | 1056 | ||
@@ -861,11 +1064,26 @@ int ip_conntrack_helper_register(struct ip_conntrack_helper *me) | |||
861 | return 0; | 1064 | return 0; |
862 | } | 1065 | } |
863 | 1066 | ||
1067 | struct ip_conntrack_helper * | ||
1068 | __ip_conntrack_helper_find_byname(const char *name) | ||
1069 | { | ||
1070 | struct ip_conntrack_helper *h; | ||
1071 | |||
1072 | list_for_each_entry(h, &helpers, list) { | ||
1073 | if (!strcmp(h->name, name)) | ||
1074 | return h; | ||
1075 | } | ||
1076 | |||
1077 | return NULL; | ||
1078 | } | ||
1079 | |||
864 | static inline int unhelp(struct ip_conntrack_tuple_hash *i, | 1080 | static inline int unhelp(struct ip_conntrack_tuple_hash *i, |
865 | const struct ip_conntrack_helper *me) | 1081 | const struct ip_conntrack_helper *me) |
866 | { | 1082 | { |
867 | if (tuplehash_to_ctrack(i)->helper == me) | 1083 | if (tuplehash_to_ctrack(i)->helper == me) { |
1084 | ip_conntrack_event(IPCT_HELPER, tuplehash_to_ctrack(i)); | ||
868 | tuplehash_to_ctrack(i)->helper = NULL; | 1085 | tuplehash_to_ctrack(i)->helper = NULL; |
1086 | } | ||
869 | return 0; | 1087 | return 0; |
870 | } | 1088 | } |
871 | 1089 | ||
@@ -927,12 +1145,46 @@ void ip_ct_refresh_acct(struct ip_conntrack *ct, | |||
927 | if (del_timer(&ct->timeout)) { | 1145 | if (del_timer(&ct->timeout)) { |
928 | ct->timeout.expires = jiffies + extra_jiffies; | 1146 | ct->timeout.expires = jiffies + extra_jiffies; |
929 | add_timer(&ct->timeout); | 1147 | add_timer(&ct->timeout); |
1148 | ip_conntrack_event_cache(IPCT_REFRESH, skb); | ||
930 | } | 1149 | } |
931 | ct_add_counters(ct, ctinfo, skb); | 1150 | ct_add_counters(ct, ctinfo, skb); |
932 | write_unlock_bh(&ip_conntrack_lock); | 1151 | write_unlock_bh(&ip_conntrack_lock); |
933 | } | 1152 | } |
934 | } | 1153 | } |
935 | 1154 | ||
1155 | #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ | ||
1156 | defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) | ||
1157 | /* Generic function for tcp/udp/sctp/dccp and alike. This needs to be | ||
1158 | * in ip_conntrack_core, since we don't want the protocols to autoload | ||
1159 | * or depend on ctnetlink */ | ||
1160 | int ip_ct_port_tuple_to_nfattr(struct sk_buff *skb, | ||
1161 | const struct ip_conntrack_tuple *tuple) | ||
1162 | { | ||
1163 | NFA_PUT(skb, CTA_PROTO_SRC_PORT, sizeof(u_int16_t), | ||
1164 | &tuple->src.u.tcp.port); | ||
1165 | NFA_PUT(skb, CTA_PROTO_DST_PORT, sizeof(u_int16_t), | ||
1166 | &tuple->dst.u.tcp.port); | ||
1167 | return 0; | ||
1168 | |||
1169 | nfattr_failure: | ||
1170 | return -1; | ||
1171 | } | ||
1172 | |||
1173 | int ip_ct_port_nfattr_to_tuple(struct nfattr *tb[], | ||
1174 | struct ip_conntrack_tuple *t) | ||
1175 | { | ||
1176 | if (!tb[CTA_PROTO_SRC_PORT-1] || !tb[CTA_PROTO_DST_PORT-1]) | ||
1177 | return -EINVAL; | ||
1178 | |||
1179 | t->src.u.tcp.port = | ||
1180 | *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_SRC_PORT-1]); | ||
1181 | t->dst.u.tcp.port = | ||
1182 | *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_DST_PORT-1]); | ||
1183 | |||
1184 | return 0; | ||
1185 | } | ||
1186 | #endif | ||
1187 | |||
936 | /* Returns new sk_buff, or NULL */ | 1188 | /* Returns new sk_buff, or NULL */ |
937 | struct sk_buff * | 1189 | struct sk_buff * |
938 | ip_ct_gather_frags(struct sk_buff *skb, u_int32_t user) | 1190 | ip_ct_gather_frags(struct sk_buff *skb, u_int32_t user) |
@@ -943,10 +1195,8 @@ ip_ct_gather_frags(struct sk_buff *skb, u_int32_t user) | |||
943 | skb = ip_defrag(skb, user); | 1195 | skb = ip_defrag(skb, user); |
944 | local_bh_enable(); | 1196 | local_bh_enable(); |
945 | 1197 | ||
946 | if (skb) { | 1198 | if (skb) |
947 | ip_send_check(skb->nh.iph); | 1199 | ip_send_check(skb->nh.iph); |
948 | skb->nfcache |= NFC_ALTERED; | ||
949 | } | ||
950 | return skb; | 1200 | return skb; |
951 | } | 1201 | } |
952 | 1202 | ||
@@ -1096,16 +1346,14 @@ static void free_conntrack_hash(void) | |||
1096 | * ip_conntrack_htable_size)); | 1346 | * ip_conntrack_htable_size)); |
1097 | } | 1347 | } |
1098 | 1348 | ||
1099 | /* Mishearing the voices in his head, our hero wonders how he's | 1349 | void ip_conntrack_flush() |
1100 | supposed to kill the mall. */ | ||
1101 | void ip_conntrack_cleanup(void) | ||
1102 | { | 1350 | { |
1103 | ip_ct_attach = NULL; | ||
1104 | /* This makes sure all current packets have passed through | 1351 | /* This makes sure all current packets have passed through |
1105 | netfilter framework. Roll on, two-stage module | 1352 | netfilter framework. Roll on, two-stage module |
1106 | delete... */ | 1353 | delete... */ |
1107 | synchronize_net(); | 1354 | synchronize_net(); |
1108 | 1355 | ||
1356 | ip_ct_event_cache_flush(); | ||
1109 | i_see_dead_people: | 1357 | i_see_dead_people: |
1110 | ip_ct_iterate_cleanup(kill_all, NULL); | 1358 | ip_ct_iterate_cleanup(kill_all, NULL); |
1111 | if (atomic_read(&ip_conntrack_count) != 0) { | 1359 | if (atomic_read(&ip_conntrack_count) != 0) { |
@@ -1115,7 +1363,14 @@ void ip_conntrack_cleanup(void) | |||
1115 | /* wait until all references to ip_conntrack_untracked are dropped */ | 1363 | /* wait until all references to ip_conntrack_untracked are dropped */ |
1116 | while (atomic_read(&ip_conntrack_untracked.ct_general.use) > 1) | 1364 | while (atomic_read(&ip_conntrack_untracked.ct_general.use) > 1) |
1117 | schedule(); | 1365 | schedule(); |
1366 | } | ||
1118 | 1367 | ||
1368 | /* Mishearing the voices in his head, our hero wonders how he's | ||
1369 | supposed to kill the mall. */ | ||
1370 | void ip_conntrack_cleanup(void) | ||
1371 | { | ||
1372 | ip_ct_attach = NULL; | ||
1373 | ip_conntrack_flush(); | ||
1119 | kmem_cache_destroy(ip_conntrack_cachep); | 1374 | kmem_cache_destroy(ip_conntrack_cachep); |
1120 | kmem_cache_destroy(ip_conntrack_expect_cachep); | 1375 | kmem_cache_destroy(ip_conntrack_expect_cachep); |
1121 | free_conntrack_hash(); | 1376 | free_conntrack_hash(); |
diff --git a/net/ipv4/netfilter/ip_conntrack_ftp.c b/net/ipv4/netfilter/ip_conntrack_ftp.c index 7a3b773be3f9..3a2627db1729 100644 --- a/net/ipv4/netfilter/ip_conntrack_ftp.c +++ b/net/ipv4/netfilter/ip_conntrack_ftp.c | |||
@@ -25,8 +25,7 @@ MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>"); | |||
25 | MODULE_DESCRIPTION("ftp connection tracking helper"); | 25 | MODULE_DESCRIPTION("ftp connection tracking helper"); |
26 | 26 | ||
27 | /* This is slow, but it's simple. --RR */ | 27 | /* This is slow, but it's simple. --RR */ |
28 | static char ftp_buffer[65536]; | 28 | static char *ftp_buffer; |
29 | |||
30 | static DEFINE_SPINLOCK(ip_ftp_lock); | 29 | static DEFINE_SPINLOCK(ip_ftp_lock); |
31 | 30 | ||
32 | #define MAX_PORTS 8 | 31 | #define MAX_PORTS 8 |
@@ -262,7 +261,8 @@ static int find_nl_seq(u32 seq, const struct ip_ct_ftp_master *info, int dir) | |||
262 | } | 261 | } |
263 | 262 | ||
264 | /* We don't update if it's older than what we have. */ | 263 | /* We don't update if it's older than what we have. */ |
265 | static void update_nl_seq(u32 nl_seq, struct ip_ct_ftp_master *info, int dir) | 264 | static void update_nl_seq(u32 nl_seq, struct ip_ct_ftp_master *info, int dir, |
265 | struct sk_buff *skb) | ||
266 | { | 266 | { |
267 | unsigned int i, oldest = NUM_SEQ_TO_REMEMBER; | 267 | unsigned int i, oldest = NUM_SEQ_TO_REMEMBER; |
268 | 268 | ||
@@ -276,10 +276,13 @@ static void update_nl_seq(u32 nl_seq, struct ip_ct_ftp_master *info, int dir) | |||
276 | oldest = i; | 276 | oldest = i; |
277 | } | 277 | } |
278 | 278 | ||
279 | if (info->seq_aft_nl_num[dir] < NUM_SEQ_TO_REMEMBER) | 279 | if (info->seq_aft_nl_num[dir] < NUM_SEQ_TO_REMEMBER) { |
280 | info->seq_aft_nl[dir][info->seq_aft_nl_num[dir]++] = nl_seq; | 280 | info->seq_aft_nl[dir][info->seq_aft_nl_num[dir]++] = nl_seq; |
281 | else if (oldest != NUM_SEQ_TO_REMEMBER) | 281 | ip_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb); |
282 | } else if (oldest != NUM_SEQ_TO_REMEMBER) { | ||
282 | info->seq_aft_nl[dir][oldest] = nl_seq; | 283 | info->seq_aft_nl[dir][oldest] = nl_seq; |
284 | ip_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb); | ||
285 | } | ||
283 | } | 286 | } |
284 | 287 | ||
285 | static int help(struct sk_buff **pskb, | 288 | static int help(struct sk_buff **pskb, |
@@ -439,7 +442,7 @@ out_update_nl: | |||
439 | /* Now if this ends in \n, update ftp info. Seq may have been | 442 | /* Now if this ends in \n, update ftp info. Seq may have been |
440 | * adjusted by NAT code. */ | 443 | * adjusted by NAT code. */ |
441 | if (ends_in_nl) | 444 | if (ends_in_nl) |
442 | update_nl_seq(seq, ct_ftp_info,dir); | 445 | update_nl_seq(seq, ct_ftp_info,dir, *pskb); |
443 | out: | 446 | out: |
444 | spin_unlock_bh(&ip_ftp_lock); | 447 | spin_unlock_bh(&ip_ftp_lock); |
445 | return ret; | 448 | return ret; |
@@ -457,6 +460,8 @@ static void fini(void) | |||
457 | ports[i]); | 460 | ports[i]); |
458 | ip_conntrack_helper_unregister(&ftp[i]); | 461 | ip_conntrack_helper_unregister(&ftp[i]); |
459 | } | 462 | } |
463 | |||
464 | kfree(ftp_buffer); | ||
460 | } | 465 | } |
461 | 466 | ||
462 | static int __init init(void) | 467 | static int __init init(void) |
@@ -464,6 +469,10 @@ static int __init init(void) | |||
464 | int i, ret; | 469 | int i, ret; |
465 | char *tmpname; | 470 | char *tmpname; |
466 | 471 | ||
472 | ftp_buffer = kmalloc(65536, GFP_KERNEL); | ||
473 | if (!ftp_buffer) | ||
474 | return -ENOMEM; | ||
475 | |||
467 | if (ports_c == 0) | 476 | if (ports_c == 0) |
468 | ports[ports_c++] = FTP_PORT; | 477 | ports[ports_c++] = FTP_PORT; |
469 | 478 | ||
diff --git a/net/ipv4/netfilter/ip_conntrack_irc.c b/net/ipv4/netfilter/ip_conntrack_irc.c index 4a28f297d502..25438eec21a1 100644 --- a/net/ipv4/netfilter/ip_conntrack_irc.c +++ b/net/ipv4/netfilter/ip_conntrack_irc.c | |||
@@ -39,7 +39,7 @@ static int ports_c; | |||
39 | static int max_dcc_channels = 8; | 39 | static int max_dcc_channels = 8; |
40 | static unsigned int dcc_timeout = 300; | 40 | static unsigned int dcc_timeout = 300; |
41 | /* This is slow, but it's simple. --RR */ | 41 | /* This is slow, but it's simple. --RR */ |
42 | static char irc_buffer[65536]; | 42 | static char *irc_buffer; |
43 | static DEFINE_SPINLOCK(irc_buffer_lock); | 43 | static DEFINE_SPINLOCK(irc_buffer_lock); |
44 | 44 | ||
45 | unsigned int (*ip_nat_irc_hook)(struct sk_buff **pskb, | 45 | unsigned int (*ip_nat_irc_hook)(struct sk_buff **pskb, |
@@ -257,6 +257,10 @@ static int __init init(void) | |||
257 | printk("ip_conntrack_irc: dcc_timeout must be a positive integer\n"); | 257 | printk("ip_conntrack_irc: dcc_timeout must be a positive integer\n"); |
258 | return -EBUSY; | 258 | return -EBUSY; |
259 | } | 259 | } |
260 | |||
261 | irc_buffer = kmalloc(65536, GFP_KERNEL); | ||
262 | if (!irc_buffer) | ||
263 | return -ENOMEM; | ||
260 | 264 | ||
261 | /* If no port given, default to standard irc port */ | 265 | /* If no port given, default to standard irc port */ |
262 | if (ports_c == 0) | 266 | if (ports_c == 0) |
@@ -304,6 +308,7 @@ static void fini(void) | |||
304 | ports[i]); | 308 | ports[i]); |
305 | ip_conntrack_helper_unregister(&irc_helpers[i]); | 309 | ip_conntrack_helper_unregister(&irc_helpers[i]); |
306 | } | 310 | } |
311 | kfree(irc_buffer); | ||
307 | } | 312 | } |
308 | 313 | ||
309 | module_init(init); | 314 | module_init(init); |
diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c new file mode 100644 index 000000000000..a4e9278db4ed --- /dev/null +++ b/net/ipv4/netfilter/ip_conntrack_netlink.c | |||
@@ -0,0 +1,1579 @@ | |||
1 | /* Connection tracking via netlink socket. Allows for user space | ||
2 | * protocol helpers and general trouble making from userspace. | ||
3 | * | ||
4 | * (C) 2001 by Jay Schulist <jschlst@samba.org> | ||
5 | * (C) 2002-2005 by Harald Welte <laforge@gnumonks.org> | ||
6 | * (C) 2003 by Patrick Mchardy <kaber@trash.net> | ||
7 | * (C) 2005 by Pablo Neira Ayuso <pablo@eurodev.net> | ||
8 | * | ||
9 | * I've reworked this stuff to use attributes instead of conntrack | ||
10 | * structures. 5.44 am. I need more tea. --pablo 05/07/11. | ||
11 | * | ||
12 | * Initial connection tracking via netlink development funded and | ||
13 | * generally made possible by Network Robots, Inc. (www.networkrobots.com) | ||
14 | * | ||
15 | * Further development of this code funded by Astaro AG (http://www.astaro.com) | ||
16 | * | ||
17 | * This software may be used and distributed according to the terms | ||
18 | * of the GNU General Public License, incorporated herein by reference. | ||
19 | */ | ||
20 | |||
21 | #include <linux/init.h> | ||
22 | #include <linux/module.h> | ||
23 | #include <linux/kernel.h> | ||
24 | #include <linux/types.h> | ||
25 | #include <linux/timer.h> | ||
26 | #include <linux/skbuff.h> | ||
27 | #include <linux/errno.h> | ||
28 | #include <linux/netlink.h> | ||
29 | #include <linux/spinlock.h> | ||
30 | #include <linux/notifier.h> | ||
31 | #include <linux/rtnetlink.h> | ||
32 | |||
33 | #include <linux/netfilter.h> | ||
34 | #include <linux/netfilter_ipv4.h> | ||
35 | #include <linux/netfilter_ipv4/ip_tables.h> | ||
36 | #include <linux/netfilter_ipv4/ip_conntrack.h> | ||
37 | #include <linux/netfilter_ipv4/ip_conntrack_core.h> | ||
38 | #include <linux/netfilter_ipv4/ip_conntrack_helper.h> | ||
39 | #include <linux/netfilter_ipv4/ip_conntrack_protocol.h> | ||
40 | #include <linux/netfilter_ipv4/ip_nat_protocol.h> | ||
41 | |||
42 | #include <linux/netfilter/nfnetlink.h> | ||
43 | #include <linux/netfilter/nfnetlink_conntrack.h> | ||
44 | |||
45 | MODULE_LICENSE("GPL"); | ||
46 | |||
47 | static char __initdata version[] = "0.90"; | ||
48 | |||
49 | #if 0 | ||
50 | #define DEBUGP printk | ||
51 | #else | ||
52 | #define DEBUGP(format, args...) | ||
53 | #endif | ||
54 | |||
55 | |||
56 | static inline int | ||
57 | ctnetlink_dump_tuples_proto(struct sk_buff *skb, | ||
58 | const struct ip_conntrack_tuple *tuple) | ||
59 | { | ||
60 | struct ip_conntrack_protocol *proto; | ||
61 | |||
62 | NFA_PUT(skb, CTA_PROTO_NUM, sizeof(u_int8_t), &tuple->dst.protonum); | ||
63 | |||
64 | proto = ip_conntrack_proto_find_get(tuple->dst.protonum); | ||
65 | if (proto && proto->tuple_to_nfattr) | ||
66 | return proto->tuple_to_nfattr(skb, tuple); | ||
67 | |||
68 | return 0; | ||
69 | |||
70 | nfattr_failure: | ||
71 | return -1; | ||
72 | } | ||
73 | |||
74 | static inline int | ||
75 | ctnetlink_dump_tuples(struct sk_buff *skb, | ||
76 | const struct ip_conntrack_tuple *tuple) | ||
77 | { | ||
78 | struct nfattr *nest_parms; | ||
79 | |||
80 | nest_parms = NFA_NEST(skb, CTA_TUPLE_IP); | ||
81 | NFA_PUT(skb, CTA_IP_V4_SRC, sizeof(u_int32_t), &tuple->src.ip); | ||
82 | NFA_PUT(skb, CTA_IP_V4_DST, sizeof(u_int32_t), &tuple->dst.ip); | ||
83 | NFA_NEST_END(skb, nest_parms); | ||
84 | |||
85 | nest_parms = NFA_NEST(skb, CTA_TUPLE_PROTO); | ||
86 | ctnetlink_dump_tuples_proto(skb, tuple); | ||
87 | NFA_NEST_END(skb, nest_parms); | ||
88 | |||
89 | return 0; | ||
90 | |||
91 | nfattr_failure: | ||
92 | return -1; | ||
93 | } | ||
94 | |||
95 | static inline int | ||
96 | ctnetlink_dump_status(struct sk_buff *skb, const struct ip_conntrack *ct) | ||
97 | { | ||
98 | u_int32_t status = htonl((u_int32_t) ct->status); | ||
99 | NFA_PUT(skb, CTA_STATUS, sizeof(status), &status); | ||
100 | return 0; | ||
101 | |||
102 | nfattr_failure: | ||
103 | return -1; | ||
104 | } | ||
105 | |||
106 | static inline int | ||
107 | ctnetlink_dump_timeout(struct sk_buff *skb, const struct ip_conntrack *ct) | ||
108 | { | ||
109 | long timeout_l = ct->timeout.expires - jiffies; | ||
110 | u_int32_t timeout; | ||
111 | |||
112 | if (timeout_l < 0) | ||
113 | timeout = 0; | ||
114 | else | ||
115 | timeout = htonl(timeout_l / HZ); | ||
116 | |||
117 | NFA_PUT(skb, CTA_TIMEOUT, sizeof(timeout), &timeout); | ||
118 | return 0; | ||
119 | |||
120 | nfattr_failure: | ||
121 | return -1; | ||
122 | } | ||
123 | |||
124 | static inline int | ||
125 | ctnetlink_dump_protoinfo(struct sk_buff *skb, const struct ip_conntrack *ct) | ||
126 | { | ||
127 | struct ip_conntrack_protocol *proto = ip_conntrack_proto_find_get(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum); | ||
128 | |||
129 | struct nfattr *nest_proto; | ||
130 | int ret; | ||
131 | |||
132 | if (!proto || !proto->to_nfattr) | ||
133 | return 0; | ||
134 | |||
135 | nest_proto = NFA_NEST(skb, CTA_PROTOINFO); | ||
136 | |||
137 | ret = proto->to_nfattr(skb, nest_proto, ct); | ||
138 | |||
139 | ip_conntrack_proto_put(proto); | ||
140 | |||
141 | NFA_NEST_END(skb, nest_proto); | ||
142 | |||
143 | return ret; | ||
144 | |||
145 | nfattr_failure: | ||
146 | return -1; | ||
147 | } | ||
148 | |||
149 | static inline int | ||
150 | ctnetlink_dump_helpinfo(struct sk_buff *skb, const struct ip_conntrack *ct) | ||
151 | { | ||
152 | struct nfattr *nest_helper; | ||
153 | |||
154 | if (!ct->helper) | ||
155 | return 0; | ||
156 | |||
157 | nest_helper = NFA_NEST(skb, CTA_HELP); | ||
158 | NFA_PUT(skb, CTA_HELP_NAME, CTA_HELP_MAXNAMESIZE, &ct->helper->name); | ||
159 | |||
160 | if (ct->helper->to_nfattr) | ||
161 | ct->helper->to_nfattr(skb, ct); | ||
162 | |||
163 | NFA_NEST_END(skb, nest_helper); | ||
164 | |||
165 | return 0; | ||
166 | |||
167 | nfattr_failure: | ||
168 | return -1; | ||
169 | } | ||
170 | |||
171 | #ifdef CONFIG_IP_NF_CT_ACCT | ||
172 | static inline int | ||
173 | ctnetlink_dump_counters(struct sk_buff *skb, const struct ip_conntrack *ct, | ||
174 | enum ip_conntrack_dir dir) | ||
175 | { | ||
176 | enum ctattr_type type = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG; | ||
177 | struct nfattr *nest_count = NFA_NEST(skb, type); | ||
178 | u_int64_t tmp; | ||
179 | |||
180 | tmp = cpu_to_be64(ct->counters[dir].packets); | ||
181 | NFA_PUT(skb, CTA_COUNTERS_PACKETS, sizeof(u_int64_t), &tmp); | ||
182 | |||
183 | tmp = cpu_to_be64(ct->counters[dir].bytes); | ||
184 | NFA_PUT(skb, CTA_COUNTERS_BYTES, sizeof(u_int64_t), &tmp); | ||
185 | |||
186 | NFA_NEST_END(skb, nest_count); | ||
187 | |||
188 | return 0; | ||
189 | |||
190 | nfattr_failure: | ||
191 | return -1; | ||
192 | } | ||
193 | #else | ||
194 | #define ctnetlink_dump_counters(a, b, c) (0) | ||
195 | #endif | ||
196 | |||
197 | #ifdef CONFIG_IP_NF_CONNTRACK_MARK | ||
198 | static inline int | ||
199 | ctnetlink_dump_mark(struct sk_buff *skb, const struct ip_conntrack *ct) | ||
200 | { | ||
201 | u_int32_t mark = htonl(ct->mark); | ||
202 | |||
203 | NFA_PUT(skb, CTA_MARK, sizeof(u_int32_t), &mark); | ||
204 | return 0; | ||
205 | |||
206 | nfattr_failure: | ||
207 | return -1; | ||
208 | } | ||
209 | #else | ||
210 | #define ctnetlink_dump_mark(a, b) (0) | ||
211 | #endif | ||
212 | |||
213 | static inline int | ||
214 | ctnetlink_dump_id(struct sk_buff *skb, const struct ip_conntrack *ct) | ||
215 | { | ||
216 | u_int32_t id = htonl(ct->id); | ||
217 | NFA_PUT(skb, CTA_ID, sizeof(u_int32_t), &id); | ||
218 | return 0; | ||
219 | |||
220 | nfattr_failure: | ||
221 | return -1; | ||
222 | } | ||
223 | |||
224 | static inline int | ||
225 | ctnetlink_dump_use(struct sk_buff *skb, const struct ip_conntrack *ct) | ||
226 | { | ||
227 | unsigned int use = htonl(atomic_read(&ct->ct_general.use)); | ||
228 | |||
229 | NFA_PUT(skb, CTA_USE, sizeof(u_int32_t), &use); | ||
230 | return 0; | ||
231 | |||
232 | nfattr_failure: | ||
233 | return -1; | ||
234 | } | ||
235 | |||
236 | #define tuple(ct, dir) (&(ct)->tuplehash[dir].tuple) | ||
237 | |||
238 | static int | ||
239 | ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq, | ||
240 | int event, int nowait, | ||
241 | const struct ip_conntrack *ct) | ||
242 | { | ||
243 | struct nlmsghdr *nlh; | ||
244 | struct nfgenmsg *nfmsg; | ||
245 | struct nfattr *nest_parms; | ||
246 | unsigned char *b; | ||
247 | |||
248 | b = skb->tail; | ||
249 | |||
250 | event |= NFNL_SUBSYS_CTNETLINK << 8; | ||
251 | nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg)); | ||
252 | nfmsg = NLMSG_DATA(nlh); | ||
253 | |||
254 | nlh->nlmsg_flags = (nowait && pid) ? NLM_F_MULTI : 0; | ||
255 | nfmsg->nfgen_family = AF_INET; | ||
256 | nfmsg->version = NFNETLINK_V0; | ||
257 | nfmsg->res_id = 0; | ||
258 | |||
259 | nest_parms = NFA_NEST(skb, CTA_TUPLE_ORIG); | ||
260 | if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_ORIGINAL)) < 0) | ||
261 | goto nfattr_failure; | ||
262 | NFA_NEST_END(skb, nest_parms); | ||
263 | |||
264 | nest_parms = NFA_NEST(skb, CTA_TUPLE_REPLY); | ||
265 | if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0) | ||
266 | goto nfattr_failure; | ||
267 | NFA_NEST_END(skb, nest_parms); | ||
268 | |||
269 | if (ctnetlink_dump_status(skb, ct) < 0 || | ||
270 | ctnetlink_dump_timeout(skb, ct) < 0 || | ||
271 | ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || | ||
272 | ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 || | ||
273 | ctnetlink_dump_protoinfo(skb, ct) < 0 || | ||
274 | ctnetlink_dump_helpinfo(skb, ct) < 0 || | ||
275 | ctnetlink_dump_mark(skb, ct) < 0 || | ||
276 | ctnetlink_dump_id(skb, ct) < 0 || | ||
277 | ctnetlink_dump_use(skb, ct) < 0) | ||
278 | goto nfattr_failure; | ||
279 | |||
280 | nlh->nlmsg_len = skb->tail - b; | ||
281 | return skb->len; | ||
282 | |||
283 | nlmsg_failure: | ||
284 | nfattr_failure: | ||
285 | skb_trim(skb, b - skb->data); | ||
286 | return -1; | ||
287 | } | ||
288 | |||
289 | #ifdef CONFIG_IP_NF_CONNTRACK_EVENTS | ||
290 | static int ctnetlink_conntrack_event(struct notifier_block *this, | ||
291 | unsigned long events, void *ptr) | ||
292 | { | ||
293 | struct nlmsghdr *nlh; | ||
294 | struct nfgenmsg *nfmsg; | ||
295 | struct nfattr *nest_parms; | ||
296 | struct ip_conntrack *ct = (struct ip_conntrack *)ptr; | ||
297 | struct sk_buff *skb; | ||
298 | unsigned int type; | ||
299 | unsigned char *b; | ||
300 | unsigned int flags = 0, group; | ||
301 | |||
302 | /* ignore our fake conntrack entry */ | ||
303 | if (ct == &ip_conntrack_untracked) | ||
304 | return NOTIFY_DONE; | ||
305 | |||
306 | if (events & IPCT_DESTROY) { | ||
307 | type = IPCTNL_MSG_CT_DELETE; | ||
308 | group = NFNLGRP_CONNTRACK_DESTROY; | ||
309 | goto alloc_skb; | ||
310 | } | ||
311 | if (events & (IPCT_NEW | IPCT_RELATED)) { | ||
312 | type = IPCTNL_MSG_CT_NEW; | ||
313 | flags = NLM_F_CREATE|NLM_F_EXCL; | ||
314 | /* dump everything */ | ||
315 | events = ~0UL; | ||
316 | group = NFNLGRP_CONNTRACK_NEW; | ||
317 | goto alloc_skb; | ||
318 | } | ||
319 | if (events & (IPCT_STATUS | | ||
320 | IPCT_PROTOINFO | | ||
321 | IPCT_HELPER | | ||
322 | IPCT_HELPINFO | | ||
323 | IPCT_NATINFO)) { | ||
324 | type = IPCTNL_MSG_CT_NEW; | ||
325 | group = NFNLGRP_CONNTRACK_UPDATE; | ||
326 | goto alloc_skb; | ||
327 | } | ||
328 | |||
329 | return NOTIFY_DONE; | ||
330 | |||
331 | alloc_skb: | ||
332 | /* FIXME: Check if there are any listeners before, don't hurt performance */ | ||
333 | |||
334 | skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); | ||
335 | if (!skb) | ||
336 | return NOTIFY_DONE; | ||
337 | |||
338 | b = skb->tail; | ||
339 | |||
340 | type |= NFNL_SUBSYS_CTNETLINK << 8; | ||
341 | nlh = NLMSG_PUT(skb, 0, 0, type, sizeof(struct nfgenmsg)); | ||
342 | nfmsg = NLMSG_DATA(nlh); | ||
343 | |||
344 | nlh->nlmsg_flags = flags; | ||
345 | nfmsg->nfgen_family = AF_INET; | ||
346 | nfmsg->version = NFNETLINK_V0; | ||
347 | nfmsg->res_id = 0; | ||
348 | |||
349 | nest_parms = NFA_NEST(skb, CTA_TUPLE_ORIG); | ||
350 | if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_ORIGINAL)) < 0) | ||
351 | goto nfattr_failure; | ||
352 | NFA_NEST_END(skb, nest_parms); | ||
353 | |||
354 | nest_parms = NFA_NEST(skb, CTA_TUPLE_REPLY); | ||
355 | if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0) | ||
356 | goto nfattr_failure; | ||
357 | NFA_NEST_END(skb, nest_parms); | ||
358 | |||
359 | /* NAT stuff is now a status flag */ | ||
360 | if ((events & IPCT_STATUS || events & IPCT_NATINFO) | ||
361 | && ctnetlink_dump_status(skb, ct) < 0) | ||
362 | goto nfattr_failure; | ||
363 | if (events & IPCT_REFRESH | ||
364 | && ctnetlink_dump_timeout(skb, ct) < 0) | ||
365 | goto nfattr_failure; | ||
366 | if (events & IPCT_PROTOINFO | ||
367 | && ctnetlink_dump_protoinfo(skb, ct) < 0) | ||
368 | goto nfattr_failure; | ||
369 | if (events & IPCT_HELPINFO | ||
370 | && ctnetlink_dump_helpinfo(skb, ct) < 0) | ||
371 | goto nfattr_failure; | ||
372 | |||
373 | if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || | ||
374 | ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0) | ||
375 | goto nfattr_failure; | ||
376 | |||
377 | nlh->nlmsg_len = skb->tail - b; | ||
378 | nfnetlink_send(skb, 0, group, 0); | ||
379 | return NOTIFY_DONE; | ||
380 | |||
381 | nlmsg_failure: | ||
382 | nfattr_failure: | ||
383 | kfree_skb(skb); | ||
384 | return NOTIFY_DONE; | ||
385 | } | ||
386 | #endif /* CONFIG_IP_NF_CONNTRACK_EVENTS */ | ||
387 | |||
388 | static int ctnetlink_done(struct netlink_callback *cb) | ||
389 | { | ||
390 | DEBUGP("entered %s\n", __FUNCTION__); | ||
391 | return 0; | ||
392 | } | ||
393 | |||
394 | static int | ||
395 | ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) | ||
396 | { | ||
397 | struct ip_conntrack *ct = NULL; | ||
398 | struct ip_conntrack_tuple_hash *h; | ||
399 | struct list_head *i; | ||
400 | u_int32_t *id = (u_int32_t *) &cb->args[1]; | ||
401 | |||
402 | DEBUGP("entered %s, last bucket=%lu id=%u\n", __FUNCTION__, | ||
403 | cb->args[0], *id); | ||
404 | |||
405 | read_lock_bh(&ip_conntrack_lock); | ||
406 | for (; cb->args[0] < ip_conntrack_htable_size; cb->args[0]++, *id = 0) { | ||
407 | list_for_each_prev(i, &ip_conntrack_hash[cb->args[0]]) { | ||
408 | h = (struct ip_conntrack_tuple_hash *) i; | ||
409 | if (DIRECTION(h) != IP_CT_DIR_ORIGINAL) | ||
410 | continue; | ||
411 | ct = tuplehash_to_ctrack(h); | ||
412 | if (ct->id <= *id) | ||
413 | continue; | ||
414 | if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid, | ||
415 | cb->nlh->nlmsg_seq, | ||
416 | IPCTNL_MSG_CT_NEW, | ||
417 | 1, ct) < 0) | ||
418 | goto out; | ||
419 | *id = ct->id; | ||
420 | } | ||
421 | } | ||
422 | out: | ||
423 | read_unlock_bh(&ip_conntrack_lock); | ||
424 | |||
425 | DEBUGP("leaving, last bucket=%lu id=%u\n", cb->args[0], *id); | ||
426 | |||
427 | return skb->len; | ||
428 | } | ||
429 | |||
430 | #ifdef CONFIG_IP_NF_CT_ACCT | ||
431 | static int | ||
432 | ctnetlink_dump_table_w(struct sk_buff *skb, struct netlink_callback *cb) | ||
433 | { | ||
434 | struct ip_conntrack *ct = NULL; | ||
435 | struct ip_conntrack_tuple_hash *h; | ||
436 | struct list_head *i; | ||
437 | u_int32_t *id = (u_int32_t *) &cb->args[1]; | ||
438 | |||
439 | DEBUGP("entered %s, last bucket=%u id=%u\n", __FUNCTION__, | ||
440 | cb->args[0], *id); | ||
441 | |||
442 | write_lock_bh(&ip_conntrack_lock); | ||
443 | for (; cb->args[0] < ip_conntrack_htable_size; cb->args[0]++, *id = 0) { | ||
444 | list_for_each_prev(i, &ip_conntrack_hash[cb->args[0]]) { | ||
445 | h = (struct ip_conntrack_tuple_hash *) i; | ||
446 | if (DIRECTION(h) != IP_CT_DIR_ORIGINAL) | ||
447 | continue; | ||
448 | ct = tuplehash_to_ctrack(h); | ||
449 | if (ct->id <= *id) | ||
450 | continue; | ||
451 | if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid, | ||
452 | cb->nlh->nlmsg_seq, | ||
453 | IPCTNL_MSG_CT_NEW, | ||
454 | 1, ct) < 0) | ||
455 | goto out; | ||
456 | *id = ct->id; | ||
457 | |||
458 | memset(&ct->counters, 0, sizeof(ct->counters)); | ||
459 | } | ||
460 | } | ||
461 | out: | ||
462 | write_unlock_bh(&ip_conntrack_lock); | ||
463 | |||
464 | DEBUGP("leaving, last bucket=%lu id=%u\n", cb->args[0], *id); | ||
465 | |||
466 | return skb->len; | ||
467 | } | ||
468 | #endif | ||
469 | |||
470 | static const int cta_min_ip[CTA_IP_MAX] = { | ||
471 | [CTA_IP_V4_SRC-1] = sizeof(u_int32_t), | ||
472 | [CTA_IP_V4_DST-1] = sizeof(u_int32_t), | ||
473 | }; | ||
474 | |||
475 | static inline int | ||
476 | ctnetlink_parse_tuple_ip(struct nfattr *attr, struct ip_conntrack_tuple *tuple) | ||
477 | { | ||
478 | struct nfattr *tb[CTA_IP_MAX]; | ||
479 | |||
480 | DEBUGP("entered %s\n", __FUNCTION__); | ||
481 | |||
482 | |||
483 | if (nfattr_parse_nested(tb, CTA_IP_MAX, attr) < 0) | ||
484 | goto nfattr_failure; | ||
485 | |||
486 | if (nfattr_bad_size(tb, CTA_IP_MAX, cta_min_ip)) | ||
487 | return -EINVAL; | ||
488 | |||
489 | if (!tb[CTA_IP_V4_SRC-1]) | ||
490 | return -EINVAL; | ||
491 | tuple->src.ip = *(u_int32_t *)NFA_DATA(tb[CTA_IP_V4_SRC-1]); | ||
492 | |||
493 | if (!tb[CTA_IP_V4_DST-1]) | ||
494 | return -EINVAL; | ||
495 | tuple->dst.ip = *(u_int32_t *)NFA_DATA(tb[CTA_IP_V4_DST-1]); | ||
496 | |||
497 | DEBUGP("leaving\n"); | ||
498 | |||
499 | return 0; | ||
500 | |||
501 | nfattr_failure: | ||
502 | return -1; | ||
503 | } | ||
504 | |||
505 | static const int cta_min_proto[CTA_PROTO_MAX] = { | ||
506 | [CTA_PROTO_NUM-1] = sizeof(u_int16_t), | ||
507 | [CTA_PROTO_SRC_PORT-1] = sizeof(u_int16_t), | ||
508 | [CTA_PROTO_DST_PORT-1] = sizeof(u_int16_t), | ||
509 | [CTA_PROTO_ICMP_TYPE-1] = sizeof(u_int8_t), | ||
510 | [CTA_PROTO_ICMP_CODE-1] = sizeof(u_int8_t), | ||
511 | [CTA_PROTO_ICMP_ID-1] = sizeof(u_int16_t), | ||
512 | }; | ||
513 | |||
514 | static inline int | ||
515 | ctnetlink_parse_tuple_proto(struct nfattr *attr, | ||
516 | struct ip_conntrack_tuple *tuple) | ||
517 | { | ||
518 | struct nfattr *tb[CTA_PROTO_MAX]; | ||
519 | struct ip_conntrack_protocol *proto; | ||
520 | int ret = 0; | ||
521 | |||
522 | DEBUGP("entered %s\n", __FUNCTION__); | ||
523 | |||
524 | if (nfattr_parse_nested(tb, CTA_PROTO_MAX, attr) < 0) | ||
525 | goto nfattr_failure; | ||
526 | |||
527 | if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto)) | ||
528 | return -EINVAL; | ||
529 | |||
530 | if (!tb[CTA_PROTO_NUM-1]) | ||
531 | return -EINVAL; | ||
532 | tuple->dst.protonum = *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_NUM-1]); | ||
533 | |||
534 | proto = ip_conntrack_proto_find_get(tuple->dst.protonum); | ||
535 | |||
536 | if (likely(proto && proto->nfattr_to_tuple)) { | ||
537 | ret = proto->nfattr_to_tuple(tb, tuple); | ||
538 | ip_conntrack_proto_put(proto); | ||
539 | } | ||
540 | |||
541 | return ret; | ||
542 | |||
543 | nfattr_failure: | ||
544 | return -1; | ||
545 | } | ||
546 | |||
547 | static inline int | ||
548 | ctnetlink_parse_tuple(struct nfattr *cda[], struct ip_conntrack_tuple *tuple, | ||
549 | enum ctattr_tuple type) | ||
550 | { | ||
551 | struct nfattr *tb[CTA_TUPLE_MAX]; | ||
552 | int err; | ||
553 | |||
554 | DEBUGP("entered %s\n", __FUNCTION__); | ||
555 | |||
556 | memset(tuple, 0, sizeof(*tuple)); | ||
557 | |||
558 | if (nfattr_parse_nested(tb, CTA_TUPLE_MAX, cda[type-1]) < 0) | ||
559 | goto nfattr_failure; | ||
560 | |||
561 | if (!tb[CTA_TUPLE_IP-1]) | ||
562 | return -EINVAL; | ||
563 | |||
564 | err = ctnetlink_parse_tuple_ip(tb[CTA_TUPLE_IP-1], tuple); | ||
565 | if (err < 0) | ||
566 | return err; | ||
567 | |||
568 | if (!tb[CTA_TUPLE_PROTO-1]) | ||
569 | return -EINVAL; | ||
570 | |||
571 | err = ctnetlink_parse_tuple_proto(tb[CTA_TUPLE_PROTO-1], tuple); | ||
572 | if (err < 0) | ||
573 | return err; | ||
574 | |||
575 | /* orig and expect tuples get DIR_ORIGINAL */ | ||
576 | if (type == CTA_TUPLE_REPLY) | ||
577 | tuple->dst.dir = IP_CT_DIR_REPLY; | ||
578 | else | ||
579 | tuple->dst.dir = IP_CT_DIR_ORIGINAL; | ||
580 | |||
581 | DUMP_TUPLE(tuple); | ||
582 | |||
583 | DEBUGP("leaving\n"); | ||
584 | |||
585 | return 0; | ||
586 | |||
587 | nfattr_failure: | ||
588 | return -1; | ||
589 | } | ||
590 | |||
591 | #ifdef CONFIG_IP_NF_NAT_NEEDED | ||
592 | static const int cta_min_protonat[CTA_PROTONAT_MAX] = { | ||
593 | [CTA_PROTONAT_PORT_MIN-1] = sizeof(u_int16_t), | ||
594 | [CTA_PROTONAT_PORT_MAX-1] = sizeof(u_int16_t), | ||
595 | }; | ||
596 | |||
597 | static int ctnetlink_parse_nat_proto(struct nfattr *attr, | ||
598 | const struct ip_conntrack *ct, | ||
599 | struct ip_nat_range *range) | ||
600 | { | ||
601 | struct nfattr *tb[CTA_PROTONAT_MAX]; | ||
602 | struct ip_nat_protocol *npt; | ||
603 | |||
604 | DEBUGP("entered %s\n", __FUNCTION__); | ||
605 | |||
606 | if (nfattr_parse_nested(tb, CTA_PROTONAT_MAX, attr) < 0) | ||
607 | goto nfattr_failure; | ||
608 | |||
609 | if (nfattr_bad_size(tb, CTA_PROTONAT_MAX, cta_min_protonat)) | ||
610 | goto nfattr_failure; | ||
611 | |||
612 | npt = ip_nat_proto_find_get(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum); | ||
613 | if (!npt) | ||
614 | return 0; | ||
615 | |||
616 | if (!npt->nfattr_to_range) { | ||
617 | ip_nat_proto_put(npt); | ||
618 | return 0; | ||
619 | } | ||
620 | |||
621 | /* nfattr_to_range returns 1 if it parsed, 0 if not, neg. on error */ | ||
622 | if (npt->nfattr_to_range(tb, range) > 0) | ||
623 | range->flags |= IP_NAT_RANGE_PROTO_SPECIFIED; | ||
624 | |||
625 | ip_nat_proto_put(npt); | ||
626 | |||
627 | DEBUGP("leaving\n"); | ||
628 | return 0; | ||
629 | |||
630 | nfattr_failure: | ||
631 | return -1; | ||
632 | } | ||
633 | |||
634 | static inline int | ||
635 | ctnetlink_parse_nat(struct nfattr *cda[], | ||
636 | const struct ip_conntrack *ct, struct ip_nat_range *range) | ||
637 | { | ||
638 | struct nfattr *tb[CTA_NAT_MAX]; | ||
639 | int err; | ||
640 | |||
641 | DEBUGP("entered %s\n", __FUNCTION__); | ||
642 | |||
643 | memset(range, 0, sizeof(*range)); | ||
644 | |||
645 | if (nfattr_parse_nested(tb, CTA_NAT_MAX, cda[CTA_NAT-1]) < 0) | ||
646 | goto nfattr_failure; | ||
647 | |||
648 | if (tb[CTA_NAT_MINIP-1]) | ||
649 | range->min_ip = *(u_int32_t *)NFA_DATA(tb[CTA_NAT_MINIP-1]); | ||
650 | |||
651 | if (!tb[CTA_NAT_MAXIP-1]) | ||
652 | range->max_ip = range->min_ip; | ||
653 | else | ||
654 | range->max_ip = *(u_int32_t *)NFA_DATA(tb[CTA_NAT_MAXIP-1]); | ||
655 | |||
656 | if (range->min_ip) | ||
657 | range->flags |= IP_NAT_RANGE_MAP_IPS; | ||
658 | |||
659 | if (!tb[CTA_NAT_PROTO-1]) | ||
660 | return 0; | ||
661 | |||
662 | err = ctnetlink_parse_nat_proto(tb[CTA_NAT_PROTO-1], ct, range); | ||
663 | if (err < 0) | ||
664 | return err; | ||
665 | |||
666 | DEBUGP("leaving\n"); | ||
667 | return 0; | ||
668 | |||
669 | nfattr_failure: | ||
670 | return -1; | ||
671 | } | ||
672 | #endif | ||
673 | |||
674 | static inline int | ||
675 | ctnetlink_parse_help(struct nfattr *attr, char **helper_name) | ||
676 | { | ||
677 | struct nfattr *tb[CTA_HELP_MAX]; | ||
678 | |||
679 | DEBUGP("entered %s\n", __FUNCTION__); | ||
680 | |||
681 | if (nfattr_parse_nested(tb, CTA_HELP_MAX, attr) < 0) | ||
682 | goto nfattr_failure; | ||
683 | |||
684 | if (!tb[CTA_HELP_NAME-1]) | ||
685 | return -EINVAL; | ||
686 | |||
687 | *helper_name = NFA_DATA(tb[CTA_HELP_NAME-1]); | ||
688 | |||
689 | return 0; | ||
690 | |||
691 | nfattr_failure: | ||
692 | return -1; | ||
693 | } | ||
694 | |||
695 | static int | ||
696 | ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, | ||
697 | struct nlmsghdr *nlh, struct nfattr *cda[], int *errp) | ||
698 | { | ||
699 | struct ip_conntrack_tuple_hash *h; | ||
700 | struct ip_conntrack_tuple tuple; | ||
701 | struct ip_conntrack *ct; | ||
702 | int err = 0; | ||
703 | |||
704 | DEBUGP("entered %s\n", __FUNCTION__); | ||
705 | |||
706 | if (cda[CTA_TUPLE_ORIG-1]) | ||
707 | err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG); | ||
708 | else if (cda[CTA_TUPLE_REPLY-1]) | ||
709 | err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY); | ||
710 | else { | ||
711 | /* Flush the whole table */ | ||
712 | ip_conntrack_flush(); | ||
713 | return 0; | ||
714 | } | ||
715 | |||
716 | if (err < 0) | ||
717 | return err; | ||
718 | |||
719 | h = ip_conntrack_find_get(&tuple, NULL); | ||
720 | if (!h) { | ||
721 | DEBUGP("tuple not found in conntrack hash\n"); | ||
722 | return -ENOENT; | ||
723 | } | ||
724 | |||
725 | ct = tuplehash_to_ctrack(h); | ||
726 | |||
727 | if (cda[CTA_ID-1]) { | ||
728 | u_int32_t id = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_ID-1])); | ||
729 | if (ct->id != id) { | ||
730 | ip_conntrack_put(ct); | ||
731 | return -ENOENT; | ||
732 | } | ||
733 | } | ||
734 | if (del_timer(&ct->timeout)) { | ||
735 | ip_conntrack_put(ct); | ||
736 | ct->timeout.function((unsigned long)ct); | ||
737 | return 0; | ||
738 | } | ||
739 | ip_conntrack_put(ct); | ||
740 | DEBUGP("leaving\n"); | ||
741 | |||
742 | return 0; | ||
743 | } | ||
744 | |||
745 | static int | ||
746 | ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, | ||
747 | struct nlmsghdr *nlh, struct nfattr *cda[], int *errp) | ||
748 | { | ||
749 | struct ip_conntrack_tuple_hash *h; | ||
750 | struct ip_conntrack_tuple tuple; | ||
751 | struct ip_conntrack *ct; | ||
752 | struct sk_buff *skb2 = NULL; | ||
753 | int err = 0; | ||
754 | |||
755 | DEBUGP("entered %s\n", __FUNCTION__); | ||
756 | |||
757 | if (nlh->nlmsg_flags & NLM_F_DUMP) { | ||
758 | struct nfgenmsg *msg = NLMSG_DATA(nlh); | ||
759 | u32 rlen; | ||
760 | |||
761 | if (msg->nfgen_family != AF_INET) | ||
762 | return -EAFNOSUPPORT; | ||
763 | |||
764 | if (NFNL_MSG_TYPE(nlh->nlmsg_type) == | ||
765 | IPCTNL_MSG_CT_GET_CTRZERO) { | ||
766 | #ifdef CONFIG_IP_NF_CT_ACCT | ||
767 | if ((*errp = netlink_dump_start(ctnl, skb, nlh, | ||
768 | ctnetlink_dump_table_w, | ||
769 | ctnetlink_done)) != 0) | ||
770 | return -EINVAL; | ||
771 | #else | ||
772 | return -ENOTSUPP; | ||
773 | #endif | ||
774 | } else { | ||
775 | if ((*errp = netlink_dump_start(ctnl, skb, nlh, | ||
776 | ctnetlink_dump_table, | ||
777 | ctnetlink_done)) != 0) | ||
778 | return -EINVAL; | ||
779 | } | ||
780 | |||
781 | rlen = NLMSG_ALIGN(nlh->nlmsg_len); | ||
782 | if (rlen > skb->len) | ||
783 | rlen = skb->len; | ||
784 | skb_pull(skb, rlen); | ||
785 | return 0; | ||
786 | } | ||
787 | |||
788 | if (cda[CTA_TUPLE_ORIG-1]) | ||
789 | err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG); | ||
790 | else if (cda[CTA_TUPLE_REPLY-1]) | ||
791 | err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY); | ||
792 | else | ||
793 | return -EINVAL; | ||
794 | |||
795 | if (err < 0) | ||
796 | return err; | ||
797 | |||
798 | h = ip_conntrack_find_get(&tuple, NULL); | ||
799 | if (!h) { | ||
800 | DEBUGP("tuple not found in conntrack hash"); | ||
801 | return -ENOENT; | ||
802 | } | ||
803 | DEBUGP("tuple found\n"); | ||
804 | ct = tuplehash_to_ctrack(h); | ||
805 | |||
806 | err = -ENOMEM; | ||
807 | skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); | ||
808 | if (!skb2) { | ||
809 | ip_conntrack_put(ct); | ||
810 | return -ENOMEM; | ||
811 | } | ||
812 | NETLINK_CB(skb2).dst_pid = NETLINK_CB(skb).pid; | ||
813 | |||
814 | err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, | ||
815 | IPCTNL_MSG_CT_NEW, 1, ct); | ||
816 | ip_conntrack_put(ct); | ||
817 | if (err <= 0) | ||
818 | goto out; | ||
819 | |||
820 | err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); | ||
821 | if (err < 0) | ||
822 | goto out; | ||
823 | |||
824 | DEBUGP("leaving\n"); | ||
825 | return 0; | ||
826 | |||
827 | out: | ||
828 | if (skb2) | ||
829 | kfree_skb(skb2); | ||
830 | return -1; | ||
831 | } | ||
832 | |||
833 | static inline int | ||
834 | ctnetlink_change_status(struct ip_conntrack *ct, struct nfattr *cda[]) | ||
835 | { | ||
836 | unsigned long d, status = *(u_int32_t *)NFA_DATA(cda[CTA_STATUS-1]); | ||
837 | d = ct->status ^ status; | ||
838 | |||
839 | if (d & (IPS_EXPECTED|IPS_CONFIRMED|IPS_DYING)) | ||
840 | /* unchangeable */ | ||
841 | return -EINVAL; | ||
842 | |||
843 | if (d & IPS_SEEN_REPLY && !(status & IPS_SEEN_REPLY)) | ||
844 | /* SEEN_REPLY bit can only be set */ | ||
845 | return -EINVAL; | ||
846 | |||
847 | |||
848 | if (d & IPS_ASSURED && !(status & IPS_ASSURED)) | ||
849 | /* ASSURED bit can only be set */ | ||
850 | return -EINVAL; | ||
851 | |||
852 | if (cda[CTA_NAT-1]) { | ||
853 | #ifndef CONFIG_IP_NF_NAT_NEEDED | ||
854 | return -EINVAL; | ||
855 | #else | ||
856 | unsigned int hooknum; | ||
857 | struct ip_nat_range range; | ||
858 | |||
859 | if (ctnetlink_parse_nat(cda, ct, &range) < 0) | ||
860 | return -EINVAL; | ||
861 | |||
862 | DEBUGP("NAT: %u.%u.%u.%u-%u.%u.%u.%u:%u-%u\n", | ||
863 | NIPQUAD(range.min_ip), NIPQUAD(range.max_ip), | ||
864 | htons(range.min.all), htons(range.max.all)); | ||
865 | |||
866 | /* This is tricky but it works. ip_nat_setup_info needs the | ||
867 | * hook number as parameter, so let's do the correct | ||
868 | * conversion and run away */ | ||
869 | if (status & IPS_SRC_NAT_DONE) | ||
870 | hooknum = NF_IP_POST_ROUTING; /* IP_NAT_MANIP_SRC */ | ||
871 | else if (status & IPS_DST_NAT_DONE) | ||
872 | hooknum = NF_IP_PRE_ROUTING; /* IP_NAT_MANIP_DST */ | ||
873 | else | ||
874 | return -EINVAL; /* Missing NAT flags */ | ||
875 | |||
876 | DEBUGP("NAT status: %lu\n", | ||
877 | status & (IPS_NAT_MASK | IPS_NAT_DONE_MASK)); | ||
878 | |||
879 | if (ip_nat_initialized(ct, hooknum)) | ||
880 | return -EEXIST; | ||
881 | ip_nat_setup_info(ct, &range, hooknum); | ||
882 | |||
883 | DEBUGP("NAT status after setup_info: %lu\n", | ||
884 | ct->status & (IPS_NAT_MASK | IPS_NAT_DONE_MASK)); | ||
885 | #endif | ||
886 | } | ||
887 | |||
888 | /* Be careful here, modifying NAT bits can screw up things, | ||
889 | * so don't let users modify them directly if they don't pass | ||
890 | * ip_nat_range. */ | ||
891 | ct->status |= status & ~(IPS_NAT_DONE_MASK | IPS_NAT_MASK); | ||
892 | return 0; | ||
893 | } | ||
894 | |||
895 | |||
896 | static inline int | ||
897 | ctnetlink_change_helper(struct ip_conntrack *ct, struct nfattr *cda[]) | ||
898 | { | ||
899 | struct ip_conntrack_helper *helper; | ||
900 | char *helpname; | ||
901 | int err; | ||
902 | |||
903 | DEBUGP("entered %s\n", __FUNCTION__); | ||
904 | |||
905 | /* don't change helper of sibling connections */ | ||
906 | if (ct->master) | ||
907 | return -EINVAL; | ||
908 | |||
909 | err = ctnetlink_parse_help(cda[CTA_HELP-1], &helpname); | ||
910 | if (err < 0) | ||
911 | return err; | ||
912 | |||
913 | helper = __ip_conntrack_helper_find_byname(helpname); | ||
914 | if (!helper) { | ||
915 | if (!strcmp(helpname, "")) | ||
916 | helper = NULL; | ||
917 | else | ||
918 | return -EINVAL; | ||
919 | } | ||
920 | |||
921 | if (ct->helper) { | ||
922 | if (!helper) { | ||
923 | /* we had a helper before ... */ | ||
924 | ip_ct_remove_expectations(ct); | ||
925 | ct->helper = NULL; | ||
926 | } else { | ||
927 | /* need to zero data of old helper */ | ||
928 | memset(&ct->help, 0, sizeof(ct->help)); | ||
929 | } | ||
930 | } | ||
931 | |||
932 | ct->helper = helper; | ||
933 | |||
934 | return 0; | ||
935 | } | ||
936 | |||
937 | static inline int | ||
938 | ctnetlink_change_timeout(struct ip_conntrack *ct, struct nfattr *cda[]) | ||
939 | { | ||
940 | u_int32_t timeout = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_TIMEOUT-1])); | ||
941 | |||
942 | if (!del_timer(&ct->timeout)) | ||
943 | return -ETIME; | ||
944 | |||
945 | ct->timeout.expires = jiffies + timeout * HZ; | ||
946 | add_timer(&ct->timeout); | ||
947 | |||
948 | return 0; | ||
949 | } | ||
950 | |||
951 | static int | ||
952 | ctnetlink_change_conntrack(struct ip_conntrack *ct, struct nfattr *cda[]) | ||
953 | { | ||
954 | int err; | ||
955 | |||
956 | DEBUGP("entered %s\n", __FUNCTION__); | ||
957 | |||
958 | if (cda[CTA_HELP-1]) { | ||
959 | err = ctnetlink_change_helper(ct, cda); | ||
960 | if (err < 0) | ||
961 | return err; | ||
962 | } | ||
963 | |||
964 | if (cda[CTA_TIMEOUT-1]) { | ||
965 | err = ctnetlink_change_timeout(ct, cda); | ||
966 | if (err < 0) | ||
967 | return err; | ||
968 | } | ||
969 | |||
970 | if (cda[CTA_STATUS-1]) { | ||
971 | err = ctnetlink_change_status(ct, cda); | ||
972 | if (err < 0) | ||
973 | return err; | ||
974 | } | ||
975 | |||
976 | DEBUGP("all done\n"); | ||
977 | return 0; | ||
978 | } | ||
979 | |||
980 | static int | ||
981 | ctnetlink_create_conntrack(struct nfattr *cda[], | ||
982 | struct ip_conntrack_tuple *otuple, | ||
983 | struct ip_conntrack_tuple *rtuple) | ||
984 | { | ||
985 | struct ip_conntrack *ct; | ||
986 | int err = -EINVAL; | ||
987 | |||
988 | DEBUGP("entered %s\n", __FUNCTION__); | ||
989 | |||
990 | ct = ip_conntrack_alloc(otuple, rtuple); | ||
991 | if (ct == NULL || IS_ERR(ct)) | ||
992 | return -ENOMEM; | ||
993 | |||
994 | if (!cda[CTA_TIMEOUT-1]) | ||
995 | goto err; | ||
996 | ct->timeout.expires = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_TIMEOUT-1])); | ||
997 | |||
998 | ct->timeout.expires = jiffies + ct->timeout.expires * HZ; | ||
999 | ct->status |= IPS_CONFIRMED; | ||
1000 | |||
1001 | err = ctnetlink_change_status(ct, cda); | ||
1002 | if (err < 0) | ||
1003 | goto err; | ||
1004 | |||
1005 | ct->helper = ip_conntrack_helper_find_get(rtuple); | ||
1006 | |||
1007 | add_timer(&ct->timeout); | ||
1008 | ip_conntrack_hash_insert(ct); | ||
1009 | |||
1010 | if (ct->helper) | ||
1011 | ip_conntrack_helper_put(ct->helper); | ||
1012 | |||
1013 | DEBUGP("conntrack with id %u inserted\n", ct->id); | ||
1014 | return 0; | ||
1015 | |||
1016 | err: | ||
1017 | ip_conntrack_free(ct); | ||
1018 | return err; | ||
1019 | } | ||
1020 | |||
1021 | static int | ||
1022 | ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, | ||
1023 | struct nlmsghdr *nlh, struct nfattr *cda[], int *errp) | ||
1024 | { | ||
1025 | struct ip_conntrack_tuple otuple, rtuple; | ||
1026 | struct ip_conntrack_tuple_hash *h = NULL; | ||
1027 | int err = 0; | ||
1028 | |||
1029 | DEBUGP("entered %s\n", __FUNCTION__); | ||
1030 | |||
1031 | if (cda[CTA_TUPLE_ORIG-1]) { | ||
1032 | err = ctnetlink_parse_tuple(cda, &otuple, CTA_TUPLE_ORIG); | ||
1033 | if (err < 0) | ||
1034 | return err; | ||
1035 | } | ||
1036 | |||
1037 | if (cda[CTA_TUPLE_REPLY-1]) { | ||
1038 | err = ctnetlink_parse_tuple(cda, &rtuple, CTA_TUPLE_REPLY); | ||
1039 | if (err < 0) | ||
1040 | return err; | ||
1041 | } | ||
1042 | |||
1043 | write_lock_bh(&ip_conntrack_lock); | ||
1044 | if (cda[CTA_TUPLE_ORIG-1]) | ||
1045 | h = __ip_conntrack_find(&otuple, NULL); | ||
1046 | else if (cda[CTA_TUPLE_REPLY-1]) | ||
1047 | h = __ip_conntrack_find(&rtuple, NULL); | ||
1048 | |||
1049 | if (h == NULL) { | ||
1050 | write_unlock_bh(&ip_conntrack_lock); | ||
1051 | DEBUGP("no such conntrack, create new\n"); | ||
1052 | err = -ENOENT; | ||
1053 | if (nlh->nlmsg_flags & NLM_F_CREATE) | ||
1054 | err = ctnetlink_create_conntrack(cda, &otuple, &rtuple); | ||
1055 | return err; | ||
1056 | } | ||
1057 | /* implicit 'else' */ | ||
1058 | |||
1059 | /* we only allow nat config for new conntracks */ | ||
1060 | if (cda[CTA_NAT-1]) { | ||
1061 | err = -EINVAL; | ||
1062 | goto out_unlock; | ||
1063 | } | ||
1064 | |||
1065 | /* We manipulate the conntrack inside the global conntrack table lock, | ||
1066 | * so there's no need to increase the refcount */ | ||
1067 | DEBUGP("conntrack found\n"); | ||
1068 | err = -EEXIST; | ||
1069 | if (!(nlh->nlmsg_flags & NLM_F_EXCL)) | ||
1070 | err = ctnetlink_change_conntrack(tuplehash_to_ctrack(h), cda); | ||
1071 | |||
1072 | out_unlock: | ||
1073 | write_unlock_bh(&ip_conntrack_lock); | ||
1074 | return err; | ||
1075 | } | ||
1076 | |||
1077 | /*********************************************************************** | ||
1078 | * EXPECT | ||
1079 | ***********************************************************************/ | ||
1080 | |||
1081 | static inline int | ||
1082 | ctnetlink_exp_dump_tuple(struct sk_buff *skb, | ||
1083 | const struct ip_conntrack_tuple *tuple, | ||
1084 | enum ctattr_expect type) | ||
1085 | { | ||
1086 | struct nfattr *nest_parms = NFA_NEST(skb, type); | ||
1087 | |||
1088 | if (ctnetlink_dump_tuples(skb, tuple) < 0) | ||
1089 | goto nfattr_failure; | ||
1090 | |||
1091 | NFA_NEST_END(skb, nest_parms); | ||
1092 | |||
1093 | return 0; | ||
1094 | |||
1095 | nfattr_failure: | ||
1096 | return -1; | ||
1097 | } | ||
1098 | |||
1099 | static inline int | ||
1100 | ctnetlink_exp_dump_expect(struct sk_buff *skb, | ||
1101 | const struct ip_conntrack_expect *exp) | ||
1102 | { | ||
1103 | struct ip_conntrack *master = exp->master; | ||
1104 | u_int32_t timeout = htonl((exp->timeout.expires - jiffies) / HZ); | ||
1105 | u_int32_t id = htonl(exp->id); | ||
1106 | |||
1107 | if (ctnetlink_exp_dump_tuple(skb, &exp->tuple, CTA_EXPECT_TUPLE) < 0) | ||
1108 | goto nfattr_failure; | ||
1109 | if (ctnetlink_exp_dump_tuple(skb, &exp->mask, CTA_EXPECT_MASK) < 0) | ||
1110 | goto nfattr_failure; | ||
1111 | if (ctnetlink_exp_dump_tuple(skb, | ||
1112 | &master->tuplehash[IP_CT_DIR_ORIGINAL].tuple, | ||
1113 | CTA_EXPECT_MASTER) < 0) | ||
1114 | goto nfattr_failure; | ||
1115 | |||
1116 | NFA_PUT(skb, CTA_EXPECT_TIMEOUT, sizeof(timeout), &timeout); | ||
1117 | NFA_PUT(skb, CTA_EXPECT_ID, sizeof(u_int32_t), &id); | ||
1118 | |||
1119 | return 0; | ||
1120 | |||
1121 | nfattr_failure: | ||
1122 | return -1; | ||
1123 | } | ||
1124 | |||
1125 | static int | ||
1126 | ctnetlink_exp_fill_info(struct sk_buff *skb, u32 pid, u32 seq, | ||
1127 | int event, | ||
1128 | int nowait, | ||
1129 | const struct ip_conntrack_expect *exp) | ||
1130 | { | ||
1131 | struct nlmsghdr *nlh; | ||
1132 | struct nfgenmsg *nfmsg; | ||
1133 | unsigned char *b; | ||
1134 | |||
1135 | b = skb->tail; | ||
1136 | |||
1137 | event |= NFNL_SUBSYS_CTNETLINK_EXP << 8; | ||
1138 | nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg)); | ||
1139 | nfmsg = NLMSG_DATA(nlh); | ||
1140 | |||
1141 | nlh->nlmsg_flags = (nowait && pid) ? NLM_F_MULTI : 0; | ||
1142 | nfmsg->nfgen_family = AF_INET; | ||
1143 | nfmsg->version = NFNETLINK_V0; | ||
1144 | nfmsg->res_id = 0; | ||
1145 | |||
1146 | if (ctnetlink_exp_dump_expect(skb, exp) < 0) | ||
1147 | goto nfattr_failure; | ||
1148 | |||
1149 | nlh->nlmsg_len = skb->tail - b; | ||
1150 | return skb->len; | ||
1151 | |||
1152 | nlmsg_failure: | ||
1153 | nfattr_failure: | ||
1154 | skb_trim(skb, b - skb->data); | ||
1155 | return -1; | ||
1156 | } | ||
1157 | |||
1158 | #ifdef CONFIG_IP_NF_CONNTRACK_EVENTS | ||
1159 | static int ctnetlink_expect_event(struct notifier_block *this, | ||
1160 | unsigned long events, void *ptr) | ||
1161 | { | ||
1162 | struct nlmsghdr *nlh; | ||
1163 | struct nfgenmsg *nfmsg; | ||
1164 | struct ip_conntrack_expect *exp = (struct ip_conntrack_expect *)ptr; | ||
1165 | struct sk_buff *skb; | ||
1166 | unsigned int type; | ||
1167 | unsigned char *b; | ||
1168 | int flags = 0; | ||
1169 | u16 proto; | ||
1170 | |||
1171 | if (events & IPEXP_NEW) { | ||
1172 | type = IPCTNL_MSG_EXP_NEW; | ||
1173 | flags = NLM_F_CREATE|NLM_F_EXCL; | ||
1174 | } else | ||
1175 | return NOTIFY_DONE; | ||
1176 | |||
1177 | skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); | ||
1178 | if (!skb) | ||
1179 | return NOTIFY_DONE; | ||
1180 | |||
1181 | b = skb->tail; | ||
1182 | |||
1183 | type |= NFNL_SUBSYS_CTNETLINK << 8; | ||
1184 | nlh = NLMSG_PUT(skb, 0, 0, type, sizeof(struct nfgenmsg)); | ||
1185 | nfmsg = NLMSG_DATA(nlh); | ||
1186 | |||
1187 | nlh->nlmsg_flags = flags; | ||
1188 | nfmsg->nfgen_family = AF_INET; | ||
1189 | nfmsg->version = NFNETLINK_V0; | ||
1190 | nfmsg->res_id = 0; | ||
1191 | |||
1192 | if (ctnetlink_exp_dump_expect(skb, exp) < 0) | ||
1193 | goto nfattr_failure; | ||
1194 | |||
1195 | nlh->nlmsg_len = skb->tail - b; | ||
1196 | proto = exp->tuple.dst.protonum; | ||
1197 | nfnetlink_send(skb, 0, NFNLGRP_CONNTRACK_EXP_NEW, 0); | ||
1198 | return NOTIFY_DONE; | ||
1199 | |||
1200 | nlmsg_failure: | ||
1201 | nfattr_failure: | ||
1202 | kfree_skb(skb); | ||
1203 | return NOTIFY_DONE; | ||
1204 | } | ||
1205 | #endif | ||
1206 | |||
1207 | static int | ||
1208 | ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb) | ||
1209 | { | ||
1210 | struct ip_conntrack_expect *exp = NULL; | ||
1211 | struct list_head *i; | ||
1212 | u_int32_t *id = (u_int32_t *) &cb->args[0]; | ||
1213 | |||
1214 | DEBUGP("entered %s, last id=%llu\n", __FUNCTION__, *id); | ||
1215 | |||
1216 | read_lock_bh(&ip_conntrack_lock); | ||
1217 | list_for_each_prev(i, &ip_conntrack_expect_list) { | ||
1218 | exp = (struct ip_conntrack_expect *) i; | ||
1219 | if (exp->id <= *id) | ||
1220 | continue; | ||
1221 | if (ctnetlink_exp_fill_info(skb, NETLINK_CB(cb->skb).pid, | ||
1222 | cb->nlh->nlmsg_seq, | ||
1223 | IPCTNL_MSG_EXP_NEW, | ||
1224 | 1, exp) < 0) | ||
1225 | goto out; | ||
1226 | *id = exp->id; | ||
1227 | } | ||
1228 | out: | ||
1229 | read_unlock_bh(&ip_conntrack_lock); | ||
1230 | |||
1231 | DEBUGP("leaving, last id=%llu\n", *id); | ||
1232 | |||
1233 | return skb->len; | ||
1234 | } | ||
1235 | |||
1236 | static int | ||
1237 | ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, | ||
1238 | struct nlmsghdr *nlh, struct nfattr *cda[], int *errp) | ||
1239 | { | ||
1240 | struct ip_conntrack_tuple tuple; | ||
1241 | struct ip_conntrack_expect *exp; | ||
1242 | struct sk_buff *skb2; | ||
1243 | int err = 0; | ||
1244 | |||
1245 | DEBUGP("entered %s\n", __FUNCTION__); | ||
1246 | |||
1247 | if (nlh->nlmsg_flags & NLM_F_DUMP) { | ||
1248 | struct nfgenmsg *msg = NLMSG_DATA(nlh); | ||
1249 | u32 rlen; | ||
1250 | |||
1251 | if (msg->nfgen_family != AF_INET) | ||
1252 | return -EAFNOSUPPORT; | ||
1253 | |||
1254 | if ((*errp = netlink_dump_start(ctnl, skb, nlh, | ||
1255 | ctnetlink_exp_dump_table, | ||
1256 | ctnetlink_done)) != 0) | ||
1257 | return -EINVAL; | ||
1258 | rlen = NLMSG_ALIGN(nlh->nlmsg_len); | ||
1259 | if (rlen > skb->len) | ||
1260 | rlen = skb->len; | ||
1261 | skb_pull(skb, rlen); | ||
1262 | return 0; | ||
1263 | } | ||
1264 | |||
1265 | if (cda[CTA_EXPECT_MASTER-1]) | ||
1266 | err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER); | ||
1267 | else | ||
1268 | return -EINVAL; | ||
1269 | |||
1270 | if (err < 0) | ||
1271 | return err; | ||
1272 | |||
1273 | exp = ip_conntrack_expect_find_get(&tuple); | ||
1274 | if (!exp) | ||
1275 | return -ENOENT; | ||
1276 | |||
1277 | err = -ENOMEM; | ||
1278 | skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); | ||
1279 | if (!skb2) | ||
1280 | goto out; | ||
1281 | NETLINK_CB(skb2).dst_pid = NETLINK_CB(skb).pid; | ||
1282 | |||
1283 | err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).pid, | ||
1284 | nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, | ||
1285 | 1, exp); | ||
1286 | if (err <= 0) | ||
1287 | goto out; | ||
1288 | |||
1289 | ip_conntrack_expect_put(exp); | ||
1290 | |||
1291 | err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); | ||
1292 | if (err < 0) | ||
1293 | goto free; | ||
1294 | |||
1295 | return err; | ||
1296 | |||
1297 | out: | ||
1298 | ip_conntrack_expect_put(exp); | ||
1299 | free: | ||
1300 | if (skb2) | ||
1301 | kfree_skb(skb2); | ||
1302 | return err; | ||
1303 | } | ||
1304 | |||
1305 | static int | ||
1306 | ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, | ||
1307 | struct nlmsghdr *nlh, struct nfattr *cda[], int *errp) | ||
1308 | { | ||
1309 | struct ip_conntrack_expect *exp, *tmp; | ||
1310 | struct ip_conntrack_tuple tuple; | ||
1311 | struct ip_conntrack_helper *h; | ||
1312 | int err; | ||
1313 | |||
1314 | if (cda[CTA_EXPECT_TUPLE-1]) { | ||
1315 | /* delete a single expect by tuple */ | ||
1316 | err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE); | ||
1317 | if (err < 0) | ||
1318 | return err; | ||
1319 | |||
1320 | /* bump usage count to 2 */ | ||
1321 | exp = ip_conntrack_expect_find_get(&tuple); | ||
1322 | if (!exp) | ||
1323 | return -ENOENT; | ||
1324 | |||
1325 | if (cda[CTA_EXPECT_ID-1]) { | ||
1326 | u_int32_t id = | ||
1327 | *(u_int32_t *)NFA_DATA(cda[CTA_EXPECT_ID-1]); | ||
1328 | if (exp->id != ntohl(id)) { | ||
1329 | ip_conntrack_expect_put(exp); | ||
1330 | return -ENOENT; | ||
1331 | } | ||
1332 | } | ||
1333 | |||
1334 | /* after list removal, usage count == 1 */ | ||
1335 | ip_conntrack_unexpect_related(exp); | ||
1336 | /* have to put what we 'get' above. | ||
1337 | * after this line usage count == 0 */ | ||
1338 | ip_conntrack_expect_put(exp); | ||
1339 | } else if (cda[CTA_EXPECT_HELP_NAME-1]) { | ||
1340 | char *name = NFA_DATA(cda[CTA_EXPECT_HELP_NAME-1]); | ||
1341 | |||
1342 | /* delete all expectations for this helper */ | ||
1343 | write_lock_bh(&ip_conntrack_lock); | ||
1344 | h = __ip_conntrack_helper_find_byname(name); | ||
1345 | if (!h) { | ||
1346 | write_unlock_bh(&ip_conntrack_lock); | ||
1347 | return -EINVAL; | ||
1348 | } | ||
1349 | list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, | ||
1350 | list) { | ||
1351 | if (exp->master->helper == h | ||
1352 | && del_timer(&exp->timeout)) | ||
1353 | __ip_ct_expect_unlink_destroy(exp); | ||
1354 | } | ||
1355 | write_unlock(&ip_conntrack_lock); | ||
1356 | } else { | ||
1357 | /* This basically means we have to flush everything*/ | ||
1358 | write_lock_bh(&ip_conntrack_lock); | ||
1359 | list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, | ||
1360 | list) { | ||
1361 | if (del_timer(&exp->timeout)) | ||
1362 | __ip_ct_expect_unlink_destroy(exp); | ||
1363 | } | ||
1364 | write_unlock_bh(&ip_conntrack_lock); | ||
1365 | } | ||
1366 | |||
1367 | return 0; | ||
1368 | } | ||
1369 | static int | ||
1370 | ctnetlink_change_expect(struct ip_conntrack_expect *x, struct nfattr *cda[]) | ||
1371 | { | ||
1372 | return -EOPNOTSUPP; | ||
1373 | } | ||
1374 | |||
1375 | static int | ||
1376 | ctnetlink_create_expect(struct nfattr *cda[]) | ||
1377 | { | ||
1378 | struct ip_conntrack_tuple tuple, mask, master_tuple; | ||
1379 | struct ip_conntrack_tuple_hash *h = NULL; | ||
1380 | struct ip_conntrack_expect *exp; | ||
1381 | struct ip_conntrack *ct; | ||
1382 | int err = 0; | ||
1383 | |||
1384 | DEBUGP("entered %s\n", __FUNCTION__); | ||
1385 | |||
1386 | /* caller guarantees that those three CTA_EXPECT_* exist */ | ||
1387 | err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE); | ||
1388 | if (err < 0) | ||
1389 | return err; | ||
1390 | err = ctnetlink_parse_tuple(cda, &mask, CTA_EXPECT_MASK); | ||
1391 | if (err < 0) | ||
1392 | return err; | ||
1393 | err = ctnetlink_parse_tuple(cda, &master_tuple, CTA_EXPECT_MASTER); | ||
1394 | if (err < 0) | ||
1395 | return err; | ||
1396 | |||
1397 | /* Look for master conntrack of this expectation */ | ||
1398 | h = ip_conntrack_find_get(&master_tuple, NULL); | ||
1399 | if (!h) | ||
1400 | return -ENOENT; | ||
1401 | ct = tuplehash_to_ctrack(h); | ||
1402 | |||
1403 | if (!ct->helper) { | ||
1404 | /* such conntrack hasn't got any helper, abort */ | ||
1405 | err = -EINVAL; | ||
1406 | goto out; | ||
1407 | } | ||
1408 | |||
1409 | exp = ip_conntrack_expect_alloc(ct); | ||
1410 | if (!exp) { | ||
1411 | err = -ENOMEM; | ||
1412 | goto out; | ||
1413 | } | ||
1414 | |||
1415 | exp->expectfn = NULL; | ||
1416 | exp->master = ct; | ||
1417 | memcpy(&exp->tuple, &tuple, sizeof(struct ip_conntrack_tuple)); | ||
1418 | memcpy(&exp->mask, &mask, sizeof(struct ip_conntrack_tuple)); | ||
1419 | |||
1420 | err = ip_conntrack_expect_related(exp); | ||
1421 | ip_conntrack_expect_put(exp); | ||
1422 | |||
1423 | out: | ||
1424 | ip_conntrack_put(tuplehash_to_ctrack(h)); | ||
1425 | return err; | ||
1426 | } | ||
1427 | |||
1428 | static int | ||
1429 | ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, | ||
1430 | struct nlmsghdr *nlh, struct nfattr *cda[], int *errp) | ||
1431 | { | ||
1432 | struct ip_conntrack_tuple tuple; | ||
1433 | struct ip_conntrack_expect *exp; | ||
1434 | int err = 0; | ||
1435 | |||
1436 | DEBUGP("entered %s\n", __FUNCTION__); | ||
1437 | |||
1438 | if (!cda[CTA_EXPECT_TUPLE-1] | ||
1439 | || !cda[CTA_EXPECT_MASK-1] | ||
1440 | || !cda[CTA_EXPECT_MASTER-1]) | ||
1441 | return -EINVAL; | ||
1442 | |||
1443 | err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE); | ||
1444 | if (err < 0) | ||
1445 | return err; | ||
1446 | |||
1447 | write_lock_bh(&ip_conntrack_lock); | ||
1448 | exp = __ip_conntrack_expect_find(&tuple); | ||
1449 | |||
1450 | if (!exp) { | ||
1451 | write_unlock_bh(&ip_conntrack_lock); | ||
1452 | err = -ENOENT; | ||
1453 | if (nlh->nlmsg_flags & NLM_F_CREATE) | ||
1454 | err = ctnetlink_create_expect(cda); | ||
1455 | return err; | ||
1456 | } | ||
1457 | |||
1458 | err = -EEXIST; | ||
1459 | if (!(nlh->nlmsg_flags & NLM_F_EXCL)) | ||
1460 | err = ctnetlink_change_expect(exp, cda); | ||
1461 | write_unlock_bh(&ip_conntrack_lock); | ||
1462 | |||
1463 | DEBUGP("leaving\n"); | ||
1464 | |||
1465 | return err; | ||
1466 | } | ||
1467 | |||
1468 | #ifdef CONFIG_IP_NF_CONNTRACK_EVENTS | ||
1469 | static struct notifier_block ctnl_notifier = { | ||
1470 | .notifier_call = ctnetlink_conntrack_event, | ||
1471 | }; | ||
1472 | |||
1473 | static struct notifier_block ctnl_notifier_exp = { | ||
1474 | .notifier_call = ctnetlink_expect_event, | ||
1475 | }; | ||
1476 | #endif | ||
1477 | |||
1478 | static struct nfnl_callback ctnl_cb[IPCTNL_MSG_MAX] = { | ||
1479 | [IPCTNL_MSG_CT_NEW] = { .call = ctnetlink_new_conntrack, | ||
1480 | .attr_count = CTA_MAX, | ||
1481 | .cap_required = CAP_NET_ADMIN }, | ||
1482 | [IPCTNL_MSG_CT_GET] = { .call = ctnetlink_get_conntrack, | ||
1483 | .attr_count = CTA_MAX, | ||
1484 | .cap_required = CAP_NET_ADMIN }, | ||
1485 | [IPCTNL_MSG_CT_DELETE] = { .call = ctnetlink_del_conntrack, | ||
1486 | .attr_count = CTA_MAX, | ||
1487 | .cap_required = CAP_NET_ADMIN }, | ||
1488 | [IPCTNL_MSG_CT_GET_CTRZERO] = { .call = ctnetlink_get_conntrack, | ||
1489 | .attr_count = CTA_MAX, | ||
1490 | .cap_required = CAP_NET_ADMIN }, | ||
1491 | }; | ||
1492 | |||
1493 | static struct nfnl_callback ctnl_exp_cb[IPCTNL_MSG_EXP_MAX] = { | ||
1494 | [IPCTNL_MSG_EXP_GET] = { .call = ctnetlink_get_expect, | ||
1495 | .attr_count = CTA_EXPECT_MAX, | ||
1496 | .cap_required = CAP_NET_ADMIN }, | ||
1497 | [IPCTNL_MSG_EXP_NEW] = { .call = ctnetlink_new_expect, | ||
1498 | .attr_count = CTA_EXPECT_MAX, | ||
1499 | .cap_required = CAP_NET_ADMIN }, | ||
1500 | [IPCTNL_MSG_EXP_DELETE] = { .call = ctnetlink_del_expect, | ||
1501 | .attr_count = CTA_EXPECT_MAX, | ||
1502 | .cap_required = CAP_NET_ADMIN }, | ||
1503 | }; | ||
1504 | |||
1505 | static struct nfnetlink_subsystem ctnl_subsys = { | ||
1506 | .name = "conntrack", | ||
1507 | .subsys_id = NFNL_SUBSYS_CTNETLINK, | ||
1508 | .cb_count = IPCTNL_MSG_MAX, | ||
1509 | .cb = ctnl_cb, | ||
1510 | }; | ||
1511 | |||
1512 | static struct nfnetlink_subsystem ctnl_exp_subsys = { | ||
1513 | .name = "conntrack_expect", | ||
1514 | .subsys_id = NFNL_SUBSYS_CTNETLINK_EXP, | ||
1515 | .cb_count = IPCTNL_MSG_EXP_MAX, | ||
1516 | .cb = ctnl_exp_cb, | ||
1517 | }; | ||
1518 | |||
1519 | static int __init ctnetlink_init(void) | ||
1520 | { | ||
1521 | int ret; | ||
1522 | |||
1523 | printk("ctnetlink v%s: registering with nfnetlink.\n", version); | ||
1524 | ret = nfnetlink_subsys_register(&ctnl_subsys); | ||
1525 | if (ret < 0) { | ||
1526 | printk("ctnetlink_init: cannot register with nfnetlink.\n"); | ||
1527 | goto err_out; | ||
1528 | } | ||
1529 | |||
1530 | ret = nfnetlink_subsys_register(&ctnl_exp_subsys); | ||
1531 | if (ret < 0) { | ||
1532 | printk("ctnetlink_init: cannot register exp with nfnetlink.\n"); | ||
1533 | goto err_unreg_subsys; | ||
1534 | } | ||
1535 | |||
1536 | #ifdef CONFIG_IP_NF_CONNTRACK_EVENTS | ||
1537 | ret = ip_conntrack_register_notifier(&ctnl_notifier); | ||
1538 | if (ret < 0) { | ||
1539 | printk("ctnetlink_init: cannot register notifier.\n"); | ||
1540 | goto err_unreg_exp_subsys; | ||
1541 | } | ||
1542 | |||
1543 | ret = ip_conntrack_expect_register_notifier(&ctnl_notifier_exp); | ||
1544 | if (ret < 0) { | ||
1545 | printk("ctnetlink_init: cannot expect register notifier.\n"); | ||
1546 | goto err_unreg_notifier; | ||
1547 | } | ||
1548 | #endif | ||
1549 | |||
1550 | return 0; | ||
1551 | |||
1552 | #ifdef CONFIG_IP_NF_CONNTRACK_EVENTS | ||
1553 | err_unreg_notifier: | ||
1554 | ip_conntrack_unregister_notifier(&ctnl_notifier); | ||
1555 | err_unreg_exp_subsys: | ||
1556 | nfnetlink_subsys_unregister(&ctnl_exp_subsys); | ||
1557 | #endif | ||
1558 | err_unreg_subsys: | ||
1559 | nfnetlink_subsys_unregister(&ctnl_subsys); | ||
1560 | err_out: | ||
1561 | return ret; | ||
1562 | } | ||
1563 | |||
1564 | static void __exit ctnetlink_exit(void) | ||
1565 | { | ||
1566 | printk("ctnetlink: unregistering from nfnetlink.\n"); | ||
1567 | |||
1568 | #ifdef CONFIG_IP_NF_CONNTRACK_EVENTS | ||
1569 | ip_conntrack_unregister_notifier(&ctnl_notifier_exp); | ||
1570 | ip_conntrack_unregister_notifier(&ctnl_notifier); | ||
1571 | #endif | ||
1572 | |||
1573 | nfnetlink_subsys_unregister(&ctnl_exp_subsys); | ||
1574 | nfnetlink_subsys_unregister(&ctnl_subsys); | ||
1575 | return; | ||
1576 | } | ||
1577 | |||
1578 | module_init(ctnetlink_init); | ||
1579 | module_exit(ctnetlink_exit); | ||
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c index 602c74db3252..838d1d69b36e 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c | |||
@@ -102,22 +102,24 @@ static int icmp_packet(struct ip_conntrack *ct, | |||
102 | ct->timeout.function((unsigned long)ct); | 102 | ct->timeout.function((unsigned long)ct); |
103 | } else { | 103 | } else { |
104 | atomic_inc(&ct->proto.icmp.count); | 104 | atomic_inc(&ct->proto.icmp.count); |
105 | ip_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb); | ||
105 | ip_ct_refresh_acct(ct, ctinfo, skb, ip_ct_icmp_timeout); | 106 | ip_ct_refresh_acct(ct, ctinfo, skb, ip_ct_icmp_timeout); |
106 | } | 107 | } |
107 | 108 | ||
108 | return NF_ACCEPT; | 109 | return NF_ACCEPT; |
109 | } | 110 | } |
110 | 111 | ||
112 | static u_int8_t valid_new[] = { | ||
113 | [ICMP_ECHO] = 1, | ||
114 | [ICMP_TIMESTAMP] = 1, | ||
115 | [ICMP_INFO_REQUEST] = 1, | ||
116 | [ICMP_ADDRESS] = 1 | ||
117 | }; | ||
118 | |||
111 | /* Called when a new connection for this protocol found. */ | 119 | /* Called when a new connection for this protocol found. */ |
112 | static int icmp_new(struct ip_conntrack *conntrack, | 120 | static int icmp_new(struct ip_conntrack *conntrack, |
113 | const struct sk_buff *skb) | 121 | const struct sk_buff *skb) |
114 | { | 122 | { |
115 | static u_int8_t valid_new[] | ||
116 | = { [ICMP_ECHO] = 1, | ||
117 | [ICMP_TIMESTAMP] = 1, | ||
118 | [ICMP_INFO_REQUEST] = 1, | ||
119 | [ICMP_ADDRESS] = 1 }; | ||
120 | |||
121 | if (conntrack->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new) | 123 | if (conntrack->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new) |
122 | || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type]) { | 124 | || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type]) { |
123 | /* Can't create a new ICMP `conn' with this. */ | 125 | /* Can't create a new ICMP `conn' with this. */ |
@@ -158,11 +160,12 @@ icmp_error_message(struct sk_buff *skb, | |||
158 | return NF_ACCEPT; | 160 | return NF_ACCEPT; |
159 | } | 161 | } |
160 | 162 | ||
161 | innerproto = ip_ct_find_proto(inside->ip.protocol); | 163 | innerproto = ip_conntrack_proto_find_get(inside->ip.protocol); |
162 | dataoff = skb->nh.iph->ihl*4 + sizeof(inside->icmp) + inside->ip.ihl*4; | 164 | dataoff = skb->nh.iph->ihl*4 + sizeof(inside->icmp) + inside->ip.ihl*4; |
163 | /* Are they talking about one of our connections? */ | 165 | /* Are they talking about one of our connections? */ |
164 | if (!ip_ct_get_tuple(&inside->ip, skb, dataoff, &origtuple, innerproto)) { | 166 | if (!ip_ct_get_tuple(&inside->ip, skb, dataoff, &origtuple, innerproto)) { |
165 | DEBUGP("icmp_error: ! get_tuple p=%u", inside->ip.protocol); | 167 | DEBUGP("icmp_error: ! get_tuple p=%u", inside->ip.protocol); |
168 | ip_conntrack_proto_put(innerproto); | ||
166 | return NF_ACCEPT; | 169 | return NF_ACCEPT; |
167 | } | 170 | } |
168 | 171 | ||
@@ -170,8 +173,10 @@ icmp_error_message(struct sk_buff *skb, | |||
170 | been preserved inside the ICMP. */ | 173 | been preserved inside the ICMP. */ |
171 | if (!ip_ct_invert_tuple(&innertuple, &origtuple, innerproto)) { | 174 | if (!ip_ct_invert_tuple(&innertuple, &origtuple, innerproto)) { |
172 | DEBUGP("icmp_error_track: Can't invert tuple\n"); | 175 | DEBUGP("icmp_error_track: Can't invert tuple\n"); |
176 | ip_conntrack_proto_put(innerproto); | ||
173 | return NF_ACCEPT; | 177 | return NF_ACCEPT; |
174 | } | 178 | } |
179 | ip_conntrack_proto_put(innerproto); | ||
175 | 180 | ||
176 | *ctinfo = IP_CT_RELATED; | 181 | *ctinfo = IP_CT_RELATED; |
177 | 182 | ||
@@ -212,7 +217,7 @@ icmp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo, | |||
212 | icmph = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_ih), &_ih); | 217 | icmph = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_ih), &_ih); |
213 | if (icmph == NULL) { | 218 | if (icmph == NULL) { |
214 | if (LOG_INVALID(IPPROTO_ICMP)) | 219 | if (LOG_INVALID(IPPROTO_ICMP)) |
215 | nf_log_packet(PF_INET, 0, skb, NULL, NULL, | 220 | nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, |
216 | "ip_ct_icmp: short packet "); | 221 | "ip_ct_icmp: short packet "); |
217 | return -NF_ACCEPT; | 222 | return -NF_ACCEPT; |
218 | } | 223 | } |
@@ -226,13 +231,13 @@ icmp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo, | |||
226 | if (!(u16)csum_fold(skb->csum)) | 231 | if (!(u16)csum_fold(skb->csum)) |
227 | break; | 232 | break; |
228 | if (LOG_INVALID(IPPROTO_ICMP)) | 233 | if (LOG_INVALID(IPPROTO_ICMP)) |
229 | nf_log_packet(PF_INET, 0, skb, NULL, NULL, | 234 | nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, |
230 | "ip_ct_icmp: bad HW ICMP checksum "); | 235 | "ip_ct_icmp: bad HW ICMP checksum "); |
231 | return -NF_ACCEPT; | 236 | return -NF_ACCEPT; |
232 | case CHECKSUM_NONE: | 237 | case CHECKSUM_NONE: |
233 | if ((u16)csum_fold(skb_checksum(skb, 0, skb->len, 0))) { | 238 | if ((u16)csum_fold(skb_checksum(skb, 0, skb->len, 0))) { |
234 | if (LOG_INVALID(IPPROTO_ICMP)) | 239 | if (LOG_INVALID(IPPROTO_ICMP)) |
235 | nf_log_packet(PF_INET, 0, skb, NULL, NULL, | 240 | nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, |
236 | "ip_ct_icmp: bad ICMP checksum "); | 241 | "ip_ct_icmp: bad ICMP checksum "); |
237 | return -NF_ACCEPT; | 242 | return -NF_ACCEPT; |
238 | } | 243 | } |
@@ -249,7 +254,7 @@ checksum_skipped: | |||
249 | */ | 254 | */ |
250 | if (icmph->type > NR_ICMP_TYPES) { | 255 | if (icmph->type > NR_ICMP_TYPES) { |
251 | if (LOG_INVALID(IPPROTO_ICMP)) | 256 | if (LOG_INVALID(IPPROTO_ICMP)) |
252 | nf_log_packet(PF_INET, 0, skb, NULL, NULL, | 257 | nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, |
253 | "ip_ct_icmp: invalid ICMP type "); | 258 | "ip_ct_icmp: invalid ICMP type "); |
254 | return -NF_ACCEPT; | 259 | return -NF_ACCEPT; |
255 | } | 260 | } |
@@ -265,6 +270,47 @@ checksum_skipped: | |||
265 | return icmp_error_message(skb, ctinfo, hooknum); | 270 | return icmp_error_message(skb, ctinfo, hooknum); |
266 | } | 271 | } |
267 | 272 | ||
273 | #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ | ||
274 | defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) | ||
275 | static int icmp_tuple_to_nfattr(struct sk_buff *skb, | ||
276 | const struct ip_conntrack_tuple *t) | ||
277 | { | ||
278 | NFA_PUT(skb, CTA_PROTO_ICMP_ID, sizeof(u_int16_t), | ||
279 | &t->src.u.icmp.id); | ||
280 | NFA_PUT(skb, CTA_PROTO_ICMP_TYPE, sizeof(u_int8_t), | ||
281 | &t->dst.u.icmp.type); | ||
282 | NFA_PUT(skb, CTA_PROTO_ICMP_CODE, sizeof(u_int8_t), | ||
283 | &t->dst.u.icmp.code); | ||
284 | |||
285 | if (t->dst.u.icmp.type >= sizeof(valid_new) | ||
286 | || !valid_new[t->dst.u.icmp.type]) | ||
287 | return -EINVAL; | ||
288 | |||
289 | return 0; | ||
290 | |||
291 | nfattr_failure: | ||
292 | return -1; | ||
293 | } | ||
294 | |||
295 | static int icmp_nfattr_to_tuple(struct nfattr *tb[], | ||
296 | struct ip_conntrack_tuple *tuple) | ||
297 | { | ||
298 | if (!tb[CTA_PROTO_ICMP_TYPE-1] | ||
299 | || !tb[CTA_PROTO_ICMP_CODE-1] | ||
300 | || !tb[CTA_PROTO_ICMP_ID-1]) | ||
301 | return -1; | ||
302 | |||
303 | tuple->dst.u.icmp.type = | ||
304 | *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_TYPE-1]); | ||
305 | tuple->dst.u.icmp.code = | ||
306 | *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_CODE-1]); | ||
307 | tuple->src.u.icmp.id = | ||
308 | *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_ID-1]); | ||
309 | |||
310 | return 0; | ||
311 | } | ||
312 | #endif | ||
313 | |||
268 | struct ip_conntrack_protocol ip_conntrack_protocol_icmp = | 314 | struct ip_conntrack_protocol ip_conntrack_protocol_icmp = |
269 | { | 315 | { |
270 | .proto = IPPROTO_ICMP, | 316 | .proto = IPPROTO_ICMP, |
@@ -276,4 +322,9 @@ struct ip_conntrack_protocol ip_conntrack_protocol_icmp = | |||
276 | .packet = icmp_packet, | 322 | .packet = icmp_packet, |
277 | .new = icmp_new, | 323 | .new = icmp_new, |
278 | .error = icmp_error, | 324 | .error = icmp_error, |
325 | #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ | ||
326 | defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) | ||
327 | .tuple_to_nfattr = icmp_tuple_to_nfattr, | ||
328 | .nfattr_to_tuple = icmp_nfattr_to_tuple, | ||
329 | #endif | ||
279 | }; | 330 | }; |
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c index 31d75390bf12..a875f35e576d 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c | |||
@@ -404,6 +404,8 @@ static int sctp_packet(struct ip_conntrack *conntrack, | |||
404 | } | 404 | } |
405 | 405 | ||
406 | conntrack->proto.sctp.state = newconntrack; | 406 | conntrack->proto.sctp.state = newconntrack; |
407 | if (oldsctpstate != newconntrack) | ||
408 | ip_conntrack_event_cache(IPCT_PROTOINFO, skb); | ||
407 | write_unlock_bh(&sctp_lock); | 409 | write_unlock_bh(&sctp_lock); |
408 | } | 410 | } |
409 | 411 | ||
@@ -503,7 +505,12 @@ static struct ip_conntrack_protocol ip_conntrack_protocol_sctp = { | |||
503 | .packet = sctp_packet, | 505 | .packet = sctp_packet, |
504 | .new = sctp_new, | 506 | .new = sctp_new, |
505 | .destroy = NULL, | 507 | .destroy = NULL, |
506 | .me = THIS_MODULE | 508 | .me = THIS_MODULE, |
509 | #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ | ||
510 | defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) | ||
511 | .tuple_to_nfattr = ip_ct_port_tuple_to_nfattr, | ||
512 | .nfattr_to_tuple = ip_ct_port_nfattr_to_tuple, | ||
513 | #endif | ||
507 | }; | 514 | }; |
508 | 515 | ||
509 | #ifdef CONFIG_SYSCTL | 516 | #ifdef CONFIG_SYSCTL |
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c index 809dfed766d4..f23ef1f88c46 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c | |||
@@ -336,6 +336,23 @@ static int tcp_print_conntrack(struct seq_file *s, | |||
336 | return seq_printf(s, "%s ", tcp_conntrack_names[state]); | 336 | return seq_printf(s, "%s ", tcp_conntrack_names[state]); |
337 | } | 337 | } |
338 | 338 | ||
339 | #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ | ||
340 | defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) | ||
341 | static int tcp_to_nfattr(struct sk_buff *skb, struct nfattr *nfa, | ||
342 | const struct ip_conntrack *ct) | ||
343 | { | ||
344 | read_lock_bh(&tcp_lock); | ||
345 | NFA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t), | ||
346 | &ct->proto.tcp.state); | ||
347 | read_unlock_bh(&tcp_lock); | ||
348 | |||
349 | return 0; | ||
350 | |||
351 | nfattr_failure: | ||
352 | return -1; | ||
353 | } | ||
354 | #endif | ||
355 | |||
339 | static unsigned int get_conntrack_index(const struct tcphdr *tcph) | 356 | static unsigned int get_conntrack_index(const struct tcphdr *tcph) |
340 | { | 357 | { |
341 | if (tcph->rst) return TCP_RST_SET; | 358 | if (tcph->rst) return TCP_RST_SET; |
@@ -699,7 +716,7 @@ static int tcp_in_window(struct ip_ct_tcp *state, | |||
699 | res = 1; | 716 | res = 1; |
700 | } else { | 717 | } else { |
701 | if (LOG_INVALID(IPPROTO_TCP)) | 718 | if (LOG_INVALID(IPPROTO_TCP)) |
702 | nf_log_packet(PF_INET, 0, skb, NULL, NULL, | 719 | nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, |
703 | "ip_ct_tcp: %s ", | 720 | "ip_ct_tcp: %s ", |
704 | before(seq, sender->td_maxend + 1) ? | 721 | before(seq, sender->td_maxend + 1) ? |
705 | after(end, sender->td_end - receiver->td_maxwin - 1) ? | 722 | after(end, sender->td_end - receiver->td_maxwin - 1) ? |
@@ -798,7 +815,7 @@ static int tcp_error(struct sk_buff *skb, | |||
798 | sizeof(_tcph), &_tcph); | 815 | sizeof(_tcph), &_tcph); |
799 | if (th == NULL) { | 816 | if (th == NULL) { |
800 | if (LOG_INVALID(IPPROTO_TCP)) | 817 | if (LOG_INVALID(IPPROTO_TCP)) |
801 | nf_log_packet(PF_INET, 0, skb, NULL, NULL, | 818 | nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, |
802 | "ip_ct_tcp: short packet "); | 819 | "ip_ct_tcp: short packet "); |
803 | return -NF_ACCEPT; | 820 | return -NF_ACCEPT; |
804 | } | 821 | } |
@@ -806,7 +823,7 @@ static int tcp_error(struct sk_buff *skb, | |||
806 | /* Not whole TCP header or malformed packet */ | 823 | /* Not whole TCP header or malformed packet */ |
807 | if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) { | 824 | if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) { |
808 | if (LOG_INVALID(IPPROTO_TCP)) | 825 | if (LOG_INVALID(IPPROTO_TCP)) |
809 | nf_log_packet(PF_INET, 0, skb, NULL, NULL, | 826 | nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, |
810 | "ip_ct_tcp: truncated/malformed packet "); | 827 | "ip_ct_tcp: truncated/malformed packet "); |
811 | return -NF_ACCEPT; | 828 | return -NF_ACCEPT; |
812 | } | 829 | } |
@@ -823,7 +840,7 @@ static int tcp_error(struct sk_buff *skb, | |||
823 | skb->ip_summed == CHECKSUM_HW ? skb->csum | 840 | skb->ip_summed == CHECKSUM_HW ? skb->csum |
824 | : skb_checksum(skb, iph->ihl*4, tcplen, 0))) { | 841 | : skb_checksum(skb, iph->ihl*4, tcplen, 0))) { |
825 | if (LOG_INVALID(IPPROTO_TCP)) | 842 | if (LOG_INVALID(IPPROTO_TCP)) |
826 | nf_log_packet(PF_INET, 0, skb, NULL, NULL, | 843 | nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, |
827 | "ip_ct_tcp: bad TCP checksum "); | 844 | "ip_ct_tcp: bad TCP checksum "); |
828 | return -NF_ACCEPT; | 845 | return -NF_ACCEPT; |
829 | } | 846 | } |
@@ -832,7 +849,7 @@ static int tcp_error(struct sk_buff *skb, | |||
832 | tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR)); | 849 | tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR)); |
833 | if (!tcp_valid_flags[tcpflags]) { | 850 | if (!tcp_valid_flags[tcpflags]) { |
834 | if (LOG_INVALID(IPPROTO_TCP)) | 851 | if (LOG_INVALID(IPPROTO_TCP)) |
835 | nf_log_packet(PF_INET, 0, skb, NULL, NULL, | 852 | nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, |
836 | "ip_ct_tcp: invalid TCP flag combination "); | 853 | "ip_ct_tcp: invalid TCP flag combination "); |
837 | return -NF_ACCEPT; | 854 | return -NF_ACCEPT; |
838 | } | 855 | } |
@@ -880,8 +897,9 @@ static int tcp_packet(struct ip_conntrack *conntrack, | |||
880 | */ | 897 | */ |
881 | write_unlock_bh(&tcp_lock); | 898 | write_unlock_bh(&tcp_lock); |
882 | if (LOG_INVALID(IPPROTO_TCP)) | 899 | if (LOG_INVALID(IPPROTO_TCP)) |
883 | nf_log_packet(PF_INET, 0, skb, NULL, NULL, | 900 | nf_log_packet(PF_INET, 0, skb, NULL, NULL, |
884 | "ip_ct_tcp: killing out of sync session "); | 901 | NULL, "ip_ct_tcp: " |
902 | "killing out of sync session "); | ||
885 | if (del_timer(&conntrack->timeout)) | 903 | if (del_timer(&conntrack->timeout)) |
886 | conntrack->timeout.function((unsigned long) | 904 | conntrack->timeout.function((unsigned long) |
887 | conntrack); | 905 | conntrack); |
@@ -895,7 +913,7 @@ static int tcp_packet(struct ip_conntrack *conntrack, | |||
895 | 913 | ||
896 | write_unlock_bh(&tcp_lock); | 914 | write_unlock_bh(&tcp_lock); |
897 | if (LOG_INVALID(IPPROTO_TCP)) | 915 | if (LOG_INVALID(IPPROTO_TCP)) |
898 | nf_log_packet(PF_INET, 0, skb, NULL, NULL, | 916 | nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, |
899 | "ip_ct_tcp: invalid packet ignored "); | 917 | "ip_ct_tcp: invalid packet ignored "); |
900 | return NF_ACCEPT; | 918 | return NF_ACCEPT; |
901 | case TCP_CONNTRACK_MAX: | 919 | case TCP_CONNTRACK_MAX: |
@@ -905,7 +923,7 @@ static int tcp_packet(struct ip_conntrack *conntrack, | |||
905 | old_state); | 923 | old_state); |
906 | write_unlock_bh(&tcp_lock); | 924 | write_unlock_bh(&tcp_lock); |
907 | if (LOG_INVALID(IPPROTO_TCP)) | 925 | if (LOG_INVALID(IPPROTO_TCP)) |
908 | nf_log_packet(PF_INET, 0, skb, NULL, NULL, | 926 | nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, |
909 | "ip_ct_tcp: invalid state "); | 927 | "ip_ct_tcp: invalid state "); |
910 | return -NF_ACCEPT; | 928 | return -NF_ACCEPT; |
911 | case TCP_CONNTRACK_SYN_SENT: | 929 | case TCP_CONNTRACK_SYN_SENT: |
@@ -926,7 +944,7 @@ static int tcp_packet(struct ip_conntrack *conntrack, | |||
926 | write_unlock_bh(&tcp_lock); | 944 | write_unlock_bh(&tcp_lock); |
927 | if (LOG_INVALID(IPPROTO_TCP)) | 945 | if (LOG_INVALID(IPPROTO_TCP)) |
928 | nf_log_packet(PF_INET, 0, skb, NULL, NULL, | 946 | nf_log_packet(PF_INET, 0, skb, NULL, NULL, |
929 | "ip_ct_tcp: invalid SYN"); | 947 | NULL, "ip_ct_tcp: invalid SYN"); |
930 | return -NF_ACCEPT; | 948 | return -NF_ACCEPT; |
931 | } | 949 | } |
932 | case TCP_CONNTRACK_CLOSE: | 950 | case TCP_CONNTRACK_CLOSE: |
@@ -973,6 +991,10 @@ static int tcp_packet(struct ip_conntrack *conntrack, | |||
973 | ? ip_ct_tcp_timeout_max_retrans : *tcp_timeouts[new_state]; | 991 | ? ip_ct_tcp_timeout_max_retrans : *tcp_timeouts[new_state]; |
974 | write_unlock_bh(&tcp_lock); | 992 | write_unlock_bh(&tcp_lock); |
975 | 993 | ||
994 | ip_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb); | ||
995 | if (new_state != old_state) | ||
996 | ip_conntrack_event_cache(IPCT_PROTOINFO, skb); | ||
997 | |||
976 | if (!test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) { | 998 | if (!test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) { |
977 | /* If only reply is a RST, we can consider ourselves not to | 999 | /* If only reply is a RST, we can consider ourselves not to |
978 | have an established connection: this is a fairly common | 1000 | have an established connection: this is a fairly common |
@@ -1096,4 +1118,10 @@ struct ip_conntrack_protocol ip_conntrack_protocol_tcp = | |||
1096 | .packet = tcp_packet, | 1118 | .packet = tcp_packet, |
1097 | .new = tcp_new, | 1119 | .new = tcp_new, |
1098 | .error = tcp_error, | 1120 | .error = tcp_error, |
1121 | #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ | ||
1122 | defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) | ||
1123 | .to_nfattr = tcp_to_nfattr, | ||
1124 | .tuple_to_nfattr = ip_ct_port_tuple_to_nfattr, | ||
1125 | .nfattr_to_tuple = ip_ct_port_nfattr_to_tuple, | ||
1126 | #endif | ||
1099 | }; | 1127 | }; |
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/net/ipv4/netfilter/ip_conntrack_proto_udp.c index 8c1eaba098d4..f2dcac7c7660 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_udp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_udp.c | |||
@@ -73,7 +73,8 @@ static int udp_packet(struct ip_conntrack *conntrack, | |||
73 | ip_ct_refresh_acct(conntrack, ctinfo, skb, | 73 | ip_ct_refresh_acct(conntrack, ctinfo, skb, |
74 | ip_ct_udp_timeout_stream); | 74 | ip_ct_udp_timeout_stream); |
75 | /* Also, more likely to be important, and not a probe */ | 75 | /* Also, more likely to be important, and not a probe */ |
76 | set_bit(IPS_ASSURED_BIT, &conntrack->status); | 76 | if (!test_and_set_bit(IPS_ASSURED_BIT, &conntrack->status)) |
77 | ip_conntrack_event_cache(IPCT_STATUS, skb); | ||
77 | } else | 78 | } else |
78 | ip_ct_refresh_acct(conntrack, ctinfo, skb, ip_ct_udp_timeout); | 79 | ip_ct_refresh_acct(conntrack, ctinfo, skb, ip_ct_udp_timeout); |
79 | 80 | ||
@@ -97,7 +98,7 @@ static int udp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo, | |||
97 | hdr = skb_header_pointer(skb, iph->ihl*4, sizeof(_hdr), &_hdr); | 98 | hdr = skb_header_pointer(skb, iph->ihl*4, sizeof(_hdr), &_hdr); |
98 | if (hdr == NULL) { | 99 | if (hdr == NULL) { |
99 | if (LOG_INVALID(IPPROTO_UDP)) | 100 | if (LOG_INVALID(IPPROTO_UDP)) |
100 | nf_log_packet(PF_INET, 0, skb, NULL, NULL, | 101 | nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, |
101 | "ip_ct_udp: short packet "); | 102 | "ip_ct_udp: short packet "); |
102 | return -NF_ACCEPT; | 103 | return -NF_ACCEPT; |
103 | } | 104 | } |
@@ -105,7 +106,7 @@ static int udp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo, | |||
105 | /* Truncated/malformed packets */ | 106 | /* Truncated/malformed packets */ |
106 | if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) { | 107 | if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) { |
107 | if (LOG_INVALID(IPPROTO_UDP)) | 108 | if (LOG_INVALID(IPPROTO_UDP)) |
108 | nf_log_packet(PF_INET, 0, skb, NULL, NULL, | 109 | nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, |
109 | "ip_ct_udp: truncated/malformed packet "); | 110 | "ip_ct_udp: truncated/malformed packet "); |
110 | return -NF_ACCEPT; | 111 | return -NF_ACCEPT; |
111 | } | 112 | } |
@@ -125,7 +126,7 @@ static int udp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo, | |||
125 | skb->ip_summed == CHECKSUM_HW ? skb->csum | 126 | skb->ip_summed == CHECKSUM_HW ? skb->csum |
126 | : skb_checksum(skb, iph->ihl*4, udplen, 0))) { | 127 | : skb_checksum(skb, iph->ihl*4, udplen, 0))) { |
127 | if (LOG_INVALID(IPPROTO_UDP)) | 128 | if (LOG_INVALID(IPPROTO_UDP)) |
128 | nf_log_packet(PF_INET, 0, skb, NULL, NULL, | 129 | nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, |
129 | "ip_ct_udp: bad UDP checksum "); | 130 | "ip_ct_udp: bad UDP checksum "); |
130 | return -NF_ACCEPT; | 131 | return -NF_ACCEPT; |
131 | } | 132 | } |
@@ -144,4 +145,9 @@ struct ip_conntrack_protocol ip_conntrack_protocol_udp = | |||
144 | .packet = udp_packet, | 145 | .packet = udp_packet, |
145 | .new = udp_new, | 146 | .new = udp_new, |
146 | .error = udp_error, | 147 | .error = udp_error, |
148 | #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ | ||
149 | defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) | ||
150 | .tuple_to_nfattr = ip_ct_port_tuple_to_nfattr, | ||
151 | .nfattr_to_tuple = ip_ct_port_nfattr_to_tuple, | ||
152 | #endif | ||
147 | }; | 153 | }; |
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c index 61798c46e91d..ee5895afd0c3 100644 --- a/net/ipv4/netfilter/ip_conntrack_standalone.c +++ b/net/ipv4/netfilter/ip_conntrack_standalone.c | |||
@@ -5,7 +5,7 @@ | |||
5 | */ | 5 | */ |
6 | 6 | ||
7 | /* (C) 1999-2001 Paul `Rusty' Russell | 7 | /* (C) 1999-2001 Paul `Rusty' Russell |
8 | * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> | 8 | * (C) 2002-2005 Netfilter Core Team <coreteam@netfilter.org> |
9 | * | 9 | * |
10 | * This program is free software; you can redistribute it and/or modify | 10 | * This program is free software; you can redistribute it and/or modify |
11 | * it under the terms of the GNU General Public License version 2 as | 11 | * it under the terms of the GNU General Public License version 2 as |
@@ -147,8 +147,7 @@ static int ct_seq_show(struct seq_file *s, void *v) | |||
147 | if (DIRECTION(hash)) | 147 | if (DIRECTION(hash)) |
148 | return 0; | 148 | return 0; |
149 | 149 | ||
150 | proto = ip_ct_find_proto(conntrack->tuplehash[IP_CT_DIR_ORIGINAL] | 150 | proto = __ip_conntrack_proto_find(conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum); |
151 | .tuple.dst.protonum); | ||
152 | IP_NF_ASSERT(proto); | 151 | IP_NF_ASSERT(proto); |
153 | 152 | ||
154 | if (seq_printf(s, "%-8s %u %ld ", | 153 | if (seq_printf(s, "%-8s %u %ld ", |
@@ -185,7 +184,7 @@ static int ct_seq_show(struct seq_file *s, void *v) | |||
185 | return -ENOSPC; | 184 | return -ENOSPC; |
186 | 185 | ||
187 | #if defined(CONFIG_IP_NF_CONNTRACK_MARK) | 186 | #if defined(CONFIG_IP_NF_CONNTRACK_MARK) |
188 | if (seq_printf(s, "mark=%lu ", conntrack->mark)) | 187 | if (seq_printf(s, "mark=%u ", conntrack->mark)) |
189 | return -ENOSPC; | 188 | return -ENOSPC; |
190 | #endif | 189 | #endif |
191 | 190 | ||
@@ -283,7 +282,7 @@ static int exp_seq_show(struct seq_file *s, void *v) | |||
283 | seq_printf(s, "proto=%u ", expect->tuple.dst.protonum); | 282 | seq_printf(s, "proto=%u ", expect->tuple.dst.protonum); |
284 | 283 | ||
285 | print_tuple(s, &expect->tuple, | 284 | print_tuple(s, &expect->tuple, |
286 | ip_ct_find_proto(expect->tuple.dst.protonum)); | 285 | __ip_conntrack_proto_find(expect->tuple.dst.protonum)); |
287 | return seq_putc(s, '\n'); | 286 | return seq_putc(s, '\n'); |
288 | } | 287 | } |
289 | 288 | ||
@@ -889,6 +888,7 @@ static int init_or_cleanup(int init) | |||
889 | return ret; | 888 | return ret; |
890 | 889 | ||
891 | cleanup: | 890 | cleanup: |
891 | synchronize_net(); | ||
892 | #ifdef CONFIG_SYSCTL | 892 | #ifdef CONFIG_SYSCTL |
893 | unregister_sysctl_table(ip_ct_sysctl_header); | 893 | unregister_sysctl_table(ip_ct_sysctl_header); |
894 | cleanup_localinops: | 894 | cleanup_localinops: |
@@ -971,6 +971,14 @@ void need_ip_conntrack(void) | |||
971 | { | 971 | { |
972 | } | 972 | } |
973 | 973 | ||
974 | #ifdef CONFIG_IP_NF_CONNTRACK_EVENTS | ||
975 | EXPORT_SYMBOL_GPL(ip_conntrack_chain); | ||
976 | EXPORT_SYMBOL_GPL(ip_conntrack_expect_chain); | ||
977 | EXPORT_SYMBOL_GPL(ip_conntrack_register_notifier); | ||
978 | EXPORT_SYMBOL_GPL(ip_conntrack_unregister_notifier); | ||
979 | EXPORT_SYMBOL_GPL(__ip_ct_event_cache_init); | ||
980 | EXPORT_PER_CPU_SYMBOL_GPL(ip_conntrack_ecache); | ||
981 | #endif | ||
974 | EXPORT_SYMBOL(ip_conntrack_protocol_register); | 982 | EXPORT_SYMBOL(ip_conntrack_protocol_register); |
975 | EXPORT_SYMBOL(ip_conntrack_protocol_unregister); | 983 | EXPORT_SYMBOL(ip_conntrack_protocol_unregister); |
976 | EXPORT_SYMBOL(ip_ct_get_tuple); | 984 | EXPORT_SYMBOL(ip_ct_get_tuple); |
@@ -982,12 +990,16 @@ EXPORT_SYMBOL(ip_conntrack_helper_register); | |||
982 | EXPORT_SYMBOL(ip_conntrack_helper_unregister); | 990 | EXPORT_SYMBOL(ip_conntrack_helper_unregister); |
983 | EXPORT_SYMBOL(ip_ct_iterate_cleanup); | 991 | EXPORT_SYMBOL(ip_ct_iterate_cleanup); |
984 | EXPORT_SYMBOL(ip_ct_refresh_acct); | 992 | EXPORT_SYMBOL(ip_ct_refresh_acct); |
985 | EXPORT_SYMBOL(ip_ct_protos); | 993 | |
986 | EXPORT_SYMBOL(ip_ct_find_proto); | ||
987 | EXPORT_SYMBOL(ip_conntrack_expect_alloc); | 994 | EXPORT_SYMBOL(ip_conntrack_expect_alloc); |
988 | EXPORT_SYMBOL(ip_conntrack_expect_put); | 995 | EXPORT_SYMBOL(ip_conntrack_expect_put); |
996 | EXPORT_SYMBOL_GPL(ip_conntrack_expect_find_get); | ||
989 | EXPORT_SYMBOL(ip_conntrack_expect_related); | 997 | EXPORT_SYMBOL(ip_conntrack_expect_related); |
990 | EXPORT_SYMBOL(ip_conntrack_unexpect_related); | 998 | EXPORT_SYMBOL(ip_conntrack_unexpect_related); |
999 | EXPORT_SYMBOL_GPL(ip_conntrack_expect_list); | ||
1000 | EXPORT_SYMBOL_GPL(__ip_conntrack_expect_find); | ||
1001 | EXPORT_SYMBOL_GPL(__ip_ct_expect_unlink_destroy); | ||
1002 | |||
991 | EXPORT_SYMBOL(ip_conntrack_tuple_taken); | 1003 | EXPORT_SYMBOL(ip_conntrack_tuple_taken); |
992 | EXPORT_SYMBOL(ip_ct_gather_frags); | 1004 | EXPORT_SYMBOL(ip_ct_gather_frags); |
993 | EXPORT_SYMBOL(ip_conntrack_htable_size); | 1005 | EXPORT_SYMBOL(ip_conntrack_htable_size); |
@@ -995,7 +1007,28 @@ EXPORT_SYMBOL(ip_conntrack_lock); | |||
995 | EXPORT_SYMBOL(ip_conntrack_hash); | 1007 | EXPORT_SYMBOL(ip_conntrack_hash); |
996 | EXPORT_SYMBOL(ip_conntrack_untracked); | 1008 | EXPORT_SYMBOL(ip_conntrack_untracked); |
997 | EXPORT_SYMBOL_GPL(ip_conntrack_find_get); | 1009 | EXPORT_SYMBOL_GPL(ip_conntrack_find_get); |
998 | EXPORT_SYMBOL_GPL(ip_conntrack_put); | ||
999 | #ifdef CONFIG_IP_NF_NAT_NEEDED | 1010 | #ifdef CONFIG_IP_NF_NAT_NEEDED |
1000 | EXPORT_SYMBOL(ip_conntrack_tcp_update); | 1011 | EXPORT_SYMBOL(ip_conntrack_tcp_update); |
1001 | #endif | 1012 | #endif |
1013 | |||
1014 | EXPORT_SYMBOL_GPL(ip_conntrack_flush); | ||
1015 | EXPORT_SYMBOL_GPL(__ip_conntrack_find); | ||
1016 | |||
1017 | EXPORT_SYMBOL_GPL(ip_conntrack_alloc); | ||
1018 | EXPORT_SYMBOL_GPL(ip_conntrack_free); | ||
1019 | EXPORT_SYMBOL_GPL(ip_conntrack_hash_insert); | ||
1020 | |||
1021 | EXPORT_SYMBOL_GPL(ip_ct_remove_expectations); | ||
1022 | |||
1023 | EXPORT_SYMBOL_GPL(ip_conntrack_helper_find_get); | ||
1024 | EXPORT_SYMBOL_GPL(ip_conntrack_helper_put); | ||
1025 | EXPORT_SYMBOL_GPL(__ip_conntrack_helper_find_byname); | ||
1026 | |||
1027 | EXPORT_SYMBOL_GPL(ip_conntrack_proto_find_get); | ||
1028 | EXPORT_SYMBOL_GPL(ip_conntrack_proto_put); | ||
1029 | EXPORT_SYMBOL_GPL(__ip_conntrack_proto_find); | ||
1030 | #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ | ||
1031 | defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) | ||
1032 | EXPORT_SYMBOL_GPL(ip_ct_port_tuple_to_nfattr); | ||
1033 | EXPORT_SYMBOL_GPL(ip_ct_port_nfattr_to_tuple); | ||
1034 | #endif | ||
diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c index 739b6dde1c82..1adedb743f60 100644 --- a/net/ipv4/netfilter/ip_nat_core.c +++ b/net/ipv4/netfilter/ip_nat_core.c | |||
@@ -47,8 +47,39 @@ DEFINE_RWLOCK(ip_nat_lock); | |||
47 | static unsigned int ip_nat_htable_size; | 47 | static unsigned int ip_nat_htable_size; |
48 | 48 | ||
49 | static struct list_head *bysource; | 49 | static struct list_head *bysource; |
50 | |||
51 | #define MAX_IP_NAT_PROTO 256 | ||
50 | struct ip_nat_protocol *ip_nat_protos[MAX_IP_NAT_PROTO]; | 52 | struct ip_nat_protocol *ip_nat_protos[MAX_IP_NAT_PROTO]; |
51 | 53 | ||
54 | static inline struct ip_nat_protocol * | ||
55 | __ip_nat_proto_find(u_int8_t protonum) | ||
56 | { | ||
57 | return ip_nat_protos[protonum]; | ||
58 | } | ||
59 | |||
60 | struct ip_nat_protocol * | ||
61 | ip_nat_proto_find_get(u_int8_t protonum) | ||
62 | { | ||
63 | struct ip_nat_protocol *p; | ||
64 | |||
65 | /* we need to disable preemption to make sure 'p' doesn't get | ||
66 | * removed until we've grabbed the reference */ | ||
67 | preempt_disable(); | ||
68 | p = __ip_nat_proto_find(protonum); | ||
69 | if (p) { | ||
70 | if (!try_module_get(p->me)) | ||
71 | p = &ip_nat_unknown_protocol; | ||
72 | } | ||
73 | preempt_enable(); | ||
74 | |||
75 | return p; | ||
76 | } | ||
77 | |||
78 | void | ||
79 | ip_nat_proto_put(struct ip_nat_protocol *p) | ||
80 | { | ||
81 | module_put(p->me); | ||
82 | } | ||
52 | 83 | ||
53 | /* We keep an extra hash for each conntrack, for fast searching. */ | 84 | /* We keep an extra hash for each conntrack, for fast searching. */ |
54 | static inline unsigned int | 85 | static inline unsigned int |
@@ -103,7 +134,8 @@ static int | |||
103 | in_range(const struct ip_conntrack_tuple *tuple, | 134 | in_range(const struct ip_conntrack_tuple *tuple, |
104 | const struct ip_nat_range *range) | 135 | const struct ip_nat_range *range) |
105 | { | 136 | { |
106 | struct ip_nat_protocol *proto = ip_nat_find_proto(tuple->dst.protonum); | 137 | struct ip_nat_protocol *proto = |
138 | __ip_nat_proto_find(tuple->dst.protonum); | ||
107 | 139 | ||
108 | /* If we are supposed to map IPs, then we must be in the | 140 | /* If we are supposed to map IPs, then we must be in the |
109 | range specified, otherwise let this drag us onto a new src IP. */ | 141 | range specified, otherwise let this drag us onto a new src IP. */ |
@@ -216,8 +248,7 @@ get_unique_tuple(struct ip_conntrack_tuple *tuple, | |||
216 | struct ip_conntrack *conntrack, | 248 | struct ip_conntrack *conntrack, |
217 | enum ip_nat_manip_type maniptype) | 249 | enum ip_nat_manip_type maniptype) |
218 | { | 250 | { |
219 | struct ip_nat_protocol *proto | 251 | struct ip_nat_protocol *proto; |
220 | = ip_nat_find_proto(orig_tuple->dst.protonum); | ||
221 | 252 | ||
222 | /* 1) If this srcip/proto/src-proto-part is currently mapped, | 253 | /* 1) If this srcip/proto/src-proto-part is currently mapped, |
223 | and that same mapping gives a unique tuple within the given | 254 | and that same mapping gives a unique tuple within the given |
@@ -242,14 +273,20 @@ get_unique_tuple(struct ip_conntrack_tuple *tuple, | |||
242 | /* 3) The per-protocol part of the manip is made to map into | 273 | /* 3) The per-protocol part of the manip is made to map into |
243 | the range to make a unique tuple. */ | 274 | the range to make a unique tuple. */ |
244 | 275 | ||
276 | proto = ip_nat_proto_find_get(orig_tuple->dst.protonum); | ||
277 | |||
245 | /* Only bother mapping if it's not already in range and unique */ | 278 | /* Only bother mapping if it's not already in range and unique */ |
246 | if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) | 279 | if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) |
247 | || proto->in_range(tuple, maniptype, &range->min, &range->max)) | 280 | || proto->in_range(tuple, maniptype, &range->min, &range->max)) |
248 | && !ip_nat_used_tuple(tuple, conntrack)) | 281 | && !ip_nat_used_tuple(tuple, conntrack)) { |
282 | ip_nat_proto_put(proto); | ||
249 | return; | 283 | return; |
284 | } | ||
250 | 285 | ||
251 | /* Last change: get protocol to try to obtain unique tuple. */ | 286 | /* Last change: get protocol to try to obtain unique tuple. */ |
252 | proto->unique_tuple(tuple, range, maniptype, conntrack); | 287 | proto->unique_tuple(tuple, range, maniptype, conntrack); |
288 | |||
289 | ip_nat_proto_put(proto); | ||
253 | } | 290 | } |
254 | 291 | ||
255 | unsigned int | 292 | unsigned int |
@@ -320,17 +357,20 @@ manip_pkt(u_int16_t proto, | |||
320 | enum ip_nat_manip_type maniptype) | 357 | enum ip_nat_manip_type maniptype) |
321 | { | 358 | { |
322 | struct iphdr *iph; | 359 | struct iphdr *iph; |
360 | struct ip_nat_protocol *p; | ||
323 | 361 | ||
324 | (*pskb)->nfcache |= NFC_ALTERED; | 362 | if (!skb_make_writable(pskb, iphdroff + sizeof(*iph))) |
325 | if (!skb_ip_make_writable(pskb, iphdroff + sizeof(*iph))) | ||
326 | return 0; | 363 | return 0; |
327 | 364 | ||
328 | iph = (void *)(*pskb)->data + iphdroff; | 365 | iph = (void *)(*pskb)->data + iphdroff; |
329 | 366 | ||
330 | /* Manipulate protcol part. */ | 367 | /* Manipulate protcol part. */ |
331 | if (!ip_nat_find_proto(proto)->manip_pkt(pskb, iphdroff, | 368 | p = ip_nat_proto_find_get(proto); |
332 | target, maniptype)) | 369 | if (!p->manip_pkt(pskb, iphdroff, target, maniptype)) { |
370 | ip_nat_proto_put(p); | ||
333 | return 0; | 371 | return 0; |
372 | } | ||
373 | ip_nat_proto_put(p); | ||
334 | 374 | ||
335 | iph = (void *)(*pskb)->data + iphdroff; | 375 | iph = (void *)(*pskb)->data + iphdroff; |
336 | 376 | ||
@@ -391,7 +431,7 @@ int icmp_reply_translation(struct sk_buff **pskb, | |||
391 | struct ip_conntrack_tuple inner, target; | 431 | struct ip_conntrack_tuple inner, target; |
392 | int hdrlen = (*pskb)->nh.iph->ihl * 4; | 432 | int hdrlen = (*pskb)->nh.iph->ihl * 4; |
393 | 433 | ||
394 | if (!skb_ip_make_writable(pskb, hdrlen + sizeof(*inside))) | 434 | if (!skb_make_writable(pskb, hdrlen + sizeof(*inside))) |
395 | return 0; | 435 | return 0; |
396 | 436 | ||
397 | inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4; | 437 | inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4; |
@@ -426,7 +466,8 @@ int icmp_reply_translation(struct sk_buff **pskb, | |||
426 | 466 | ||
427 | if (!ip_ct_get_tuple(&inside->ip, *pskb, (*pskb)->nh.iph->ihl*4 + | 467 | if (!ip_ct_get_tuple(&inside->ip, *pskb, (*pskb)->nh.iph->ihl*4 + |
428 | sizeof(struct icmphdr) + inside->ip.ihl*4, | 468 | sizeof(struct icmphdr) + inside->ip.ihl*4, |
429 | &inner, ip_ct_find_proto(inside->ip.protocol))) | 469 | &inner, |
470 | __ip_conntrack_proto_find(inside->ip.protocol))) | ||
430 | return 0; | 471 | return 0; |
431 | 472 | ||
432 | /* Change inner back to look like incoming packet. We do the | 473 | /* Change inner back to look like incoming packet. We do the |
@@ -496,6 +537,49 @@ void ip_nat_protocol_unregister(struct ip_nat_protocol *proto) | |||
496 | synchronize_net(); | 537 | synchronize_net(); |
497 | } | 538 | } |
498 | 539 | ||
540 | #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ | ||
541 | defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) | ||
542 | int | ||
543 | ip_nat_port_range_to_nfattr(struct sk_buff *skb, | ||
544 | const struct ip_nat_range *range) | ||
545 | { | ||
546 | NFA_PUT(skb, CTA_PROTONAT_PORT_MIN, sizeof(u_int16_t), | ||
547 | &range->min.tcp.port); | ||
548 | NFA_PUT(skb, CTA_PROTONAT_PORT_MAX, sizeof(u_int16_t), | ||
549 | &range->max.tcp.port); | ||
550 | |||
551 | return 0; | ||
552 | |||
553 | nfattr_failure: | ||
554 | return -1; | ||
555 | } | ||
556 | |||
557 | int | ||
558 | ip_nat_port_nfattr_to_range(struct nfattr *tb[], struct ip_nat_range *range) | ||
559 | { | ||
560 | int ret = 0; | ||
561 | |||
562 | /* we have to return whether we actually parsed something or not */ | ||
563 | |||
564 | if (tb[CTA_PROTONAT_PORT_MIN-1]) { | ||
565 | ret = 1; | ||
566 | range->min.tcp.port = | ||
567 | *(u_int16_t *)NFA_DATA(tb[CTA_PROTONAT_PORT_MIN-1]); | ||
568 | } | ||
569 | |||
570 | if (!tb[CTA_PROTONAT_PORT_MAX-1]) { | ||
571 | if (ret) | ||
572 | range->max.tcp.port = range->min.tcp.port; | ||
573 | } else { | ||
574 | ret = 1; | ||
575 | range->max.tcp.port = | ||
576 | *(u_int16_t *)NFA_DATA(tb[CTA_PROTONAT_PORT_MAX-1]); | ||
577 | } | ||
578 | |||
579 | return ret; | ||
580 | } | ||
581 | #endif | ||
582 | |||
499 | int __init ip_nat_init(void) | 583 | int __init ip_nat_init(void) |
500 | { | 584 | { |
501 | size_t i; | 585 | size_t i; |
diff --git a/net/ipv4/netfilter/ip_nat_helper.c b/net/ipv4/netfilter/ip_nat_helper.c index 158f34f32c04..d2dd5d313556 100644 --- a/net/ipv4/netfilter/ip_nat_helper.c +++ b/net/ipv4/netfilter/ip_nat_helper.c | |||
@@ -168,7 +168,7 @@ ip_nat_mangle_tcp_packet(struct sk_buff **pskb, | |||
168 | struct tcphdr *tcph; | 168 | struct tcphdr *tcph; |
169 | int datalen; | 169 | int datalen; |
170 | 170 | ||
171 | if (!skb_ip_make_writable(pskb, (*pskb)->len)) | 171 | if (!skb_make_writable(pskb, (*pskb)->len)) |
172 | return 0; | 172 | return 0; |
173 | 173 | ||
174 | if (rep_len > match_len | 174 | if (rep_len > match_len |
@@ -228,7 +228,7 @@ ip_nat_mangle_udp_packet(struct sk_buff **pskb, | |||
228 | match_offset + match_len) | 228 | match_offset + match_len) |
229 | return 0; | 229 | return 0; |
230 | 230 | ||
231 | if (!skb_ip_make_writable(pskb, (*pskb)->len)) | 231 | if (!skb_make_writable(pskb, (*pskb)->len)) |
232 | return 0; | 232 | return 0; |
233 | 233 | ||
234 | if (rep_len > match_len | 234 | if (rep_len > match_len |
@@ -315,7 +315,7 @@ ip_nat_sack_adjust(struct sk_buff **pskb, | |||
315 | optoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct tcphdr); | 315 | optoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct tcphdr); |
316 | optend = (*pskb)->nh.iph->ihl*4 + tcph->doff*4; | 316 | optend = (*pskb)->nh.iph->ihl*4 + tcph->doff*4; |
317 | 317 | ||
318 | if (!skb_ip_make_writable(pskb, optend)) | 318 | if (!skb_make_writable(pskb, optend)) |
319 | return 0; | 319 | return 0; |
320 | 320 | ||
321 | dir = CTINFO2DIR(ctinfo); | 321 | dir = CTINFO2DIR(ctinfo); |
@@ -363,7 +363,7 @@ ip_nat_seq_adjust(struct sk_buff **pskb, | |||
363 | this_way = &ct->nat.info.seq[dir]; | 363 | this_way = &ct->nat.info.seq[dir]; |
364 | other_way = &ct->nat.info.seq[!dir]; | 364 | other_way = &ct->nat.info.seq[!dir]; |
365 | 365 | ||
366 | if (!skb_ip_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph))) | 366 | if (!skb_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph))) |
367 | return 0; | 367 | return 0; |
368 | 368 | ||
369 | tcph = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4; | 369 | tcph = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4; |
diff --git a/net/ipv4/netfilter/ip_nat_proto_icmp.c b/net/ipv4/netfilter/ip_nat_proto_icmp.c index 6596c9ee1655..938719043999 100644 --- a/net/ipv4/netfilter/ip_nat_proto_icmp.c +++ b/net/ipv4/netfilter/ip_nat_proto_icmp.c | |||
@@ -62,7 +62,7 @@ icmp_manip_pkt(struct sk_buff **pskb, | |||
62 | struct icmphdr *hdr; | 62 | struct icmphdr *hdr; |
63 | unsigned int hdroff = iphdroff + iph->ihl*4; | 63 | unsigned int hdroff = iphdroff + iph->ihl*4; |
64 | 64 | ||
65 | if (!skb_ip_make_writable(pskb, hdroff + sizeof(*hdr))) | 65 | if (!skb_make_writable(pskb, hdroff + sizeof(*hdr))) |
66 | return 0; | 66 | return 0; |
67 | 67 | ||
68 | hdr = (struct icmphdr *)((*pskb)->data + hdroff); | 68 | hdr = (struct icmphdr *)((*pskb)->data + hdroff); |
@@ -106,11 +106,18 @@ icmp_print_range(char *buffer, const struct ip_nat_range *range) | |||
106 | else return 0; | 106 | else return 0; |
107 | } | 107 | } |
108 | 108 | ||
109 | struct ip_nat_protocol ip_nat_protocol_icmp | 109 | struct ip_nat_protocol ip_nat_protocol_icmp = { |
110 | = { "ICMP", IPPROTO_ICMP, | 110 | .name = "ICMP", |
111 | icmp_manip_pkt, | 111 | .protonum = IPPROTO_ICMP, |
112 | icmp_in_range, | 112 | .me = THIS_MODULE, |
113 | icmp_unique_tuple, | 113 | .manip_pkt = icmp_manip_pkt, |
114 | icmp_print, | 114 | .in_range = icmp_in_range, |
115 | icmp_print_range | 115 | .unique_tuple = icmp_unique_tuple, |
116 | .print = icmp_print, | ||
117 | .print_range = icmp_print_range, | ||
118 | #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ | ||
119 | defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) | ||
120 | .range_to_nfattr = ip_nat_port_range_to_nfattr, | ||
121 | .nfattr_to_range = ip_nat_port_nfattr_to_range, | ||
122 | #endif | ||
116 | }; | 123 | }; |
diff --git a/net/ipv4/netfilter/ip_nat_proto_tcp.c b/net/ipv4/netfilter/ip_nat_proto_tcp.c index a98e36d2b3c6..1d381bf68574 100644 --- a/net/ipv4/netfilter/ip_nat_proto_tcp.c +++ b/net/ipv4/netfilter/ip_nat_proto_tcp.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <linux/ip.h> | 12 | #include <linux/ip.h> |
13 | #include <linux/tcp.h> | 13 | #include <linux/tcp.h> |
14 | #include <linux/if.h> | 14 | #include <linux/if.h> |
15 | #include <linux/netfilter/nfnetlink_conntrack.h> | ||
15 | #include <linux/netfilter_ipv4/ip_nat.h> | 16 | #include <linux/netfilter_ipv4/ip_nat.h> |
16 | #include <linux/netfilter_ipv4/ip_nat_rule.h> | 17 | #include <linux/netfilter_ipv4/ip_nat_rule.h> |
17 | #include <linux/netfilter_ipv4/ip_nat_protocol.h> | 18 | #include <linux/netfilter_ipv4/ip_nat_protocol.h> |
@@ -102,7 +103,7 @@ tcp_manip_pkt(struct sk_buff **pskb, | |||
102 | if ((*pskb)->len >= hdroff + sizeof(struct tcphdr)) | 103 | if ((*pskb)->len >= hdroff + sizeof(struct tcphdr)) |
103 | hdrsize = sizeof(struct tcphdr); | 104 | hdrsize = sizeof(struct tcphdr); |
104 | 105 | ||
105 | if (!skb_ip_make_writable(pskb, hdroff + hdrsize)) | 106 | if (!skb_make_writable(pskb, hdroff + hdrsize)) |
106 | return 0; | 107 | return 0; |
107 | 108 | ||
108 | iph = (struct iphdr *)((*pskb)->data + iphdroff); | 109 | iph = (struct iphdr *)((*pskb)->data + iphdroff); |
@@ -169,11 +170,18 @@ tcp_print_range(char *buffer, const struct ip_nat_range *range) | |||
169 | else return 0; | 170 | else return 0; |
170 | } | 171 | } |
171 | 172 | ||
172 | struct ip_nat_protocol ip_nat_protocol_tcp | 173 | struct ip_nat_protocol ip_nat_protocol_tcp = { |
173 | = { "TCP", IPPROTO_TCP, | 174 | .name = "TCP", |
174 | tcp_manip_pkt, | 175 | .protonum = IPPROTO_TCP, |
175 | tcp_in_range, | 176 | .me = THIS_MODULE, |
176 | tcp_unique_tuple, | 177 | .manip_pkt = tcp_manip_pkt, |
177 | tcp_print, | 178 | .in_range = tcp_in_range, |
178 | tcp_print_range | 179 | .unique_tuple = tcp_unique_tuple, |
180 | .print = tcp_print, | ||
181 | .print_range = tcp_print_range, | ||
182 | #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ | ||
183 | defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) | ||
184 | .range_to_nfattr = ip_nat_port_range_to_nfattr, | ||
185 | .nfattr_to_range = ip_nat_port_nfattr_to_range, | ||
186 | #endif | ||
179 | }; | 187 | }; |
diff --git a/net/ipv4/netfilter/ip_nat_proto_udp.c b/net/ipv4/netfilter/ip_nat_proto_udp.c index 9f66e5625664..c4906e1aa24a 100644 --- a/net/ipv4/netfilter/ip_nat_proto_udp.c +++ b/net/ipv4/netfilter/ip_nat_proto_udp.c | |||
@@ -94,7 +94,7 @@ udp_manip_pkt(struct sk_buff **pskb, | |||
94 | u32 oldip, newip; | 94 | u32 oldip, newip; |
95 | u16 *portptr, newport; | 95 | u16 *portptr, newport; |
96 | 96 | ||
97 | if (!skb_ip_make_writable(pskb, hdroff + sizeof(*hdr))) | 97 | if (!skb_make_writable(pskb, hdroff + sizeof(*hdr))) |
98 | return 0; | 98 | return 0; |
99 | 99 | ||
100 | iph = (struct iphdr *)((*pskb)->data + iphdroff); | 100 | iph = (struct iphdr *)((*pskb)->data + iphdroff); |
@@ -156,11 +156,18 @@ udp_print_range(char *buffer, const struct ip_nat_range *range) | |||
156 | else return 0; | 156 | else return 0; |
157 | } | 157 | } |
158 | 158 | ||
159 | struct ip_nat_protocol ip_nat_protocol_udp | 159 | struct ip_nat_protocol ip_nat_protocol_udp = { |
160 | = { "UDP", IPPROTO_UDP, | 160 | .name = "UDP", |
161 | udp_manip_pkt, | 161 | .protonum = IPPROTO_UDP, |
162 | udp_in_range, | 162 | .me = THIS_MODULE, |
163 | udp_unique_tuple, | 163 | .manip_pkt = udp_manip_pkt, |
164 | udp_print, | 164 | .in_range = udp_in_range, |
165 | udp_print_range | 165 | .unique_tuple = udp_unique_tuple, |
166 | .print = udp_print, | ||
167 | .print_range = udp_print_range, | ||
168 | #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ | ||
169 | defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) | ||
170 | .range_to_nfattr = ip_nat_port_range_to_nfattr, | ||
171 | .nfattr_to_range = ip_nat_port_nfattr_to_range, | ||
172 | #endif | ||
166 | }; | 173 | }; |
diff --git a/net/ipv4/netfilter/ip_nat_proto_unknown.c b/net/ipv4/netfilter/ip_nat_proto_unknown.c index f5525bd58d16..99bbef56f84e 100644 --- a/net/ipv4/netfilter/ip_nat_proto_unknown.c +++ b/net/ipv4/netfilter/ip_nat_proto_unknown.c | |||
@@ -61,10 +61,11 @@ unknown_print_range(char *buffer, const struct ip_nat_range *range) | |||
61 | } | 61 | } |
62 | 62 | ||
63 | struct ip_nat_protocol ip_nat_unknown_protocol = { | 63 | struct ip_nat_protocol ip_nat_unknown_protocol = { |
64 | "unknown", 0, | 64 | .name = "unknown", |
65 | unknown_manip_pkt, | 65 | .me = THIS_MODULE, |
66 | unknown_in_range, | 66 | .manip_pkt = unknown_manip_pkt, |
67 | unknown_unique_tuple, | 67 | .in_range = unknown_in_range, |
68 | unknown_print, | 68 | .unique_tuple = unknown_unique_tuple, |
69 | unknown_print_range | 69 | .print = unknown_print, |
70 | .print_range = unknown_print_range | ||
70 | }; | 71 | }; |
diff --git a/net/ipv4/netfilter/ip_nat_snmp_basic.c b/net/ipv4/netfilter/ip_nat_snmp_basic.c index 2a48b6e635ae..93b2c5111bb2 100644 --- a/net/ipv4/netfilter/ip_nat_snmp_basic.c +++ b/net/ipv4/netfilter/ip_nat_snmp_basic.c | |||
@@ -1275,7 +1275,7 @@ static int help(struct sk_buff **pskb, | |||
1275 | return NF_DROP; | 1275 | return NF_DROP; |
1276 | } | 1276 | } |
1277 | 1277 | ||
1278 | if (!skb_ip_make_writable(pskb, (*pskb)->len)) | 1278 | if (!skb_make_writable(pskb, (*pskb)->len)) |
1279 | return NF_DROP; | 1279 | return NF_DROP; |
1280 | 1280 | ||
1281 | spin_lock_bh(&snmp_lock); | 1281 | spin_lock_bh(&snmp_lock); |
diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c index 91d5ea1dbbc9..89db052add81 100644 --- a/net/ipv4/netfilter/ip_nat_standalone.c +++ b/net/ipv4/netfilter/ip_nat_standalone.c | |||
@@ -73,8 +73,6 @@ ip_nat_fn(unsigned int hooknum, | |||
73 | IP_NF_ASSERT(!((*pskb)->nh.iph->frag_off | 73 | IP_NF_ASSERT(!((*pskb)->nh.iph->frag_off |
74 | & htons(IP_MF|IP_OFFSET))); | 74 | & htons(IP_MF|IP_OFFSET))); |
75 | 75 | ||
76 | (*pskb)->nfcache |= NFC_UNKNOWN; | ||
77 | |||
78 | /* If we had a hardware checksum before, it's now invalid */ | 76 | /* If we had a hardware checksum before, it's now invalid */ |
79 | if ((*pskb)->ip_summed == CHECKSUM_HW) | 77 | if ((*pskb)->ip_summed == CHECKSUM_HW) |
80 | if (skb_checksum_help(*pskb, (out == NULL))) | 78 | if (skb_checksum_help(*pskb, (out == NULL))) |
@@ -396,6 +394,8 @@ module_exit(fini); | |||
396 | EXPORT_SYMBOL(ip_nat_setup_info); | 394 | EXPORT_SYMBOL(ip_nat_setup_info); |
397 | EXPORT_SYMBOL(ip_nat_protocol_register); | 395 | EXPORT_SYMBOL(ip_nat_protocol_register); |
398 | EXPORT_SYMBOL(ip_nat_protocol_unregister); | 396 | EXPORT_SYMBOL(ip_nat_protocol_unregister); |
397 | EXPORT_SYMBOL_GPL(ip_nat_proto_find_get); | ||
398 | EXPORT_SYMBOL_GPL(ip_nat_proto_put); | ||
399 | EXPORT_SYMBOL(ip_nat_cheat_check); | 399 | EXPORT_SYMBOL(ip_nat_cheat_check); |
400 | EXPORT_SYMBOL(ip_nat_mangle_tcp_packet); | 400 | EXPORT_SYMBOL(ip_nat_mangle_tcp_packet); |
401 | EXPORT_SYMBOL(ip_nat_mangle_udp_packet); | 401 | EXPORT_SYMBOL(ip_nat_mangle_udp_packet); |
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index c6baa8174389..d54f14d926f6 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c | |||
@@ -43,17 +43,10 @@ | |||
43 | #define NET_IPQ_QMAX 2088 | 43 | #define NET_IPQ_QMAX 2088 |
44 | #define NET_IPQ_QMAX_NAME "ip_queue_maxlen" | 44 | #define NET_IPQ_QMAX_NAME "ip_queue_maxlen" |
45 | 45 | ||
46 | struct ipq_rt_info { | ||
47 | __u8 tos; | ||
48 | __u32 daddr; | ||
49 | __u32 saddr; | ||
50 | }; | ||
51 | |||
52 | struct ipq_queue_entry { | 46 | struct ipq_queue_entry { |
53 | struct list_head list; | 47 | struct list_head list; |
54 | struct nf_info *info; | 48 | struct nf_info *info; |
55 | struct sk_buff *skb; | 49 | struct sk_buff *skb; |
56 | struct ipq_rt_info rt_info; | ||
57 | }; | 50 | }; |
58 | 51 | ||
59 | typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long); | 52 | typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long); |
@@ -247,8 +240,8 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) | |||
247 | 240 | ||
248 | pmsg->packet_id = (unsigned long )entry; | 241 | pmsg->packet_id = (unsigned long )entry; |
249 | pmsg->data_len = data_len; | 242 | pmsg->data_len = data_len; |
250 | pmsg->timestamp_sec = entry->skb->stamp.tv_sec; | 243 | pmsg->timestamp_sec = skb_tv_base.tv_sec + entry->skb->tstamp.off_sec; |
251 | pmsg->timestamp_usec = entry->skb->stamp.tv_usec; | 244 | pmsg->timestamp_usec = skb_tv_base.tv_usec + entry->skb->tstamp.off_usec; |
252 | pmsg->mark = entry->skb->nfmark; | 245 | pmsg->mark = entry->skb->nfmark; |
253 | pmsg->hook = entry->info->hook; | 246 | pmsg->hook = entry->info->hook; |
254 | pmsg->hw_protocol = entry->skb->protocol; | 247 | pmsg->hw_protocol = entry->skb->protocol; |
@@ -287,7 +280,8 @@ nlmsg_failure: | |||
287 | } | 280 | } |
288 | 281 | ||
289 | static int | 282 | static int |
290 | ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, void *data) | 283 | ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, |
284 | unsigned int queuenum, void *data) | ||
291 | { | 285 | { |
292 | int status = -EINVAL; | 286 | int status = -EINVAL; |
293 | struct sk_buff *nskb; | 287 | struct sk_buff *nskb; |
@@ -305,14 +299,6 @@ ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, void *data) | |||
305 | entry->info = info; | 299 | entry->info = info; |
306 | entry->skb = skb; | 300 | entry->skb = skb; |
307 | 301 | ||
308 | if (entry->info->hook == NF_IP_LOCAL_OUT) { | ||
309 | struct iphdr *iph = skb->nh.iph; | ||
310 | |||
311 | entry->rt_info.tos = iph->tos; | ||
312 | entry->rt_info.daddr = iph->daddr; | ||
313 | entry->rt_info.saddr = iph->saddr; | ||
314 | } | ||
315 | |||
316 | nskb = ipq_build_packet_message(entry, &status); | 302 | nskb = ipq_build_packet_message(entry, &status); |
317 | if (nskb == NULL) | 303 | if (nskb == NULL) |
318 | goto err_out_free; | 304 | goto err_out_free; |
@@ -388,24 +374,11 @@ ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct ipq_queue_entry *e) | |||
388 | } | 374 | } |
389 | skb_put(e->skb, diff); | 375 | skb_put(e->skb, diff); |
390 | } | 376 | } |
391 | if (!skb_ip_make_writable(&e->skb, v->data_len)) | 377 | if (!skb_make_writable(&e->skb, v->data_len)) |
392 | return -ENOMEM; | 378 | return -ENOMEM; |
393 | memcpy(e->skb->data, v->payload, v->data_len); | 379 | memcpy(e->skb->data, v->payload, v->data_len); |
394 | e->skb->ip_summed = CHECKSUM_NONE; | 380 | e->skb->ip_summed = CHECKSUM_NONE; |
395 | e->skb->nfcache |= NFC_ALTERED; | 381 | |
396 | |||
397 | /* | ||
398 | * Extra routing may needed on local out, as the QUEUE target never | ||
399 | * returns control to the table. | ||
400 | */ | ||
401 | if (e->info->hook == NF_IP_LOCAL_OUT) { | ||
402 | struct iphdr *iph = e->skb->nh.iph; | ||
403 | |||
404 | if (!(iph->tos == e->rt_info.tos | ||
405 | && iph->daddr == e->rt_info.daddr | ||
406 | && iph->saddr == e->rt_info.saddr)) | ||
407 | return ip_route_me_harder(&e->skb); | ||
408 | } | ||
409 | return 0; | 382 | return 0; |
410 | } | 383 | } |
411 | 384 | ||
@@ -683,6 +656,11 @@ ipq_get_info(char *buffer, char **start, off_t offset, int length) | |||
683 | } | 656 | } |
684 | #endif /* CONFIG_PROC_FS */ | 657 | #endif /* CONFIG_PROC_FS */ |
685 | 658 | ||
659 | static struct nf_queue_handler nfqh = { | ||
660 | .name = "ip_queue", | ||
661 | .outfn = &ipq_enqueue_packet, | ||
662 | }; | ||
663 | |||
686 | static int | 664 | static int |
687 | init_or_cleanup(int init) | 665 | init_or_cleanup(int init) |
688 | { | 666 | { |
@@ -693,7 +671,8 @@ init_or_cleanup(int init) | |||
693 | goto cleanup; | 671 | goto cleanup; |
694 | 672 | ||
695 | netlink_register_notifier(&ipq_nl_notifier); | 673 | netlink_register_notifier(&ipq_nl_notifier); |
696 | ipqnl = netlink_kernel_create(NETLINK_FIREWALL, ipq_rcv_sk); | 674 | ipqnl = netlink_kernel_create(NETLINK_FIREWALL, 0, ipq_rcv_sk, |
675 | THIS_MODULE); | ||
697 | if (ipqnl == NULL) { | 676 | if (ipqnl == NULL) { |
698 | printk(KERN_ERR "ip_queue: failed to create netlink socket\n"); | 677 | printk(KERN_ERR "ip_queue: failed to create netlink socket\n"); |
699 | goto cleanup_netlink_notifier; | 678 | goto cleanup_netlink_notifier; |
@@ -710,7 +689,7 @@ init_or_cleanup(int init) | |||
710 | register_netdevice_notifier(&ipq_dev_notifier); | 689 | register_netdevice_notifier(&ipq_dev_notifier); |
711 | ipq_sysctl_header = register_sysctl_table(ipq_root_table, 0); | 690 | ipq_sysctl_header = register_sysctl_table(ipq_root_table, 0); |
712 | 691 | ||
713 | status = nf_register_queue_handler(PF_INET, ipq_enqueue_packet, NULL); | 692 | status = nf_register_queue_handler(PF_INET, &nfqh); |
714 | if (status < 0) { | 693 | if (status < 0) { |
715 | printk(KERN_ERR "ip_queue: failed to register queue handler\n"); | 694 | printk(KERN_ERR "ip_queue: failed to register queue handler\n"); |
716 | goto cleanup_sysctl; | 695 | goto cleanup_sysctl; |
@@ -718,7 +697,7 @@ init_or_cleanup(int init) | |||
718 | return status; | 697 | return status; |
719 | 698 | ||
720 | cleanup: | 699 | cleanup: |
721 | nf_unregister_queue_handler(PF_INET); | 700 | nf_unregister_queue_handlers(&nfqh); |
722 | synchronize_net(); | 701 | synchronize_net(); |
723 | ipq_flush(NF_DROP); | 702 | ipq_flush(NF_DROP); |
724 | 703 | ||
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index c88dfcd38c56..eef99a1b5de6 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c | |||
@@ -312,7 +312,6 @@ ipt_do_table(struct sk_buff **pskb, | |||
312 | do { | 312 | do { |
313 | IP_NF_ASSERT(e); | 313 | IP_NF_ASSERT(e); |
314 | IP_NF_ASSERT(back); | 314 | IP_NF_ASSERT(back); |
315 | (*pskb)->nfcache |= e->nfcache; | ||
316 | if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) { | 315 | if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) { |
317 | struct ipt_entry_target *t; | 316 | struct ipt_entry_target *t; |
318 | 317 | ||
@@ -341,8 +340,8 @@ ipt_do_table(struct sk_buff **pskb, | |||
341 | back->comefrom); | 340 | back->comefrom); |
342 | continue; | 341 | continue; |
343 | } | 342 | } |
344 | if (table_base + v | 343 | if (table_base + v != (void *)e + e->next_offset |
345 | != (void *)e + e->next_offset) { | 344 | && !(e->ip.flags & IPT_F_GOTO)) { |
346 | /* Save old back ptr in next entry */ | 345 | /* Save old back ptr in next entry */ |
347 | struct ipt_entry *next | 346 | struct ipt_entry *next |
348 | = (void *)e + e->next_offset; | 347 | = (void *)e + e->next_offset; |
diff --git a/net/ipv4/netfilter/ipt_CLASSIFY.c b/net/ipv4/netfilter/ipt_CLASSIFY.c index 9842e6e23184..dab78d8bd494 100644 --- a/net/ipv4/netfilter/ipt_CLASSIFY.c +++ b/net/ipv4/netfilter/ipt_CLASSIFY.c | |||
@@ -32,10 +32,8 @@ target(struct sk_buff **pskb, | |||
32 | { | 32 | { |
33 | const struct ipt_classify_target_info *clinfo = targinfo; | 33 | const struct ipt_classify_target_info *clinfo = targinfo; |
34 | 34 | ||
35 | if((*pskb)->priority != clinfo->priority) { | 35 | if((*pskb)->priority != clinfo->priority) |
36 | (*pskb)->priority = clinfo->priority; | 36 | (*pskb)->priority = clinfo->priority; |
37 | (*pskb)->nfcache |= NFC_ALTERED; | ||
38 | } | ||
39 | 37 | ||
40 | return IPT_CONTINUE; | 38 | return IPT_CONTINUE; |
41 | } | 39 | } |
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 6706d3a1bc4f..2d05cafec221 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c | |||
@@ -367,7 +367,7 @@ target(struct sk_buff **pskb, | |||
367 | #ifdef DEBUG_CLUSTERP | 367 | #ifdef DEBUG_CLUSTERP |
368 | DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); | 368 | DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); |
369 | #endif | 369 | #endif |
370 | DEBUGP("hash=%u ct_hash=%lu ", hash, ct->mark); | 370 | DEBUGP("hash=%u ct_hash=%u ", hash, ct->mark); |
371 | if (!clusterip_responsible(cipinfo->config, hash)) { | 371 | if (!clusterip_responsible(cipinfo->config, hash)) { |
372 | DEBUGP("not responsible\n"); | 372 | DEBUGP("not responsible\n"); |
373 | return NF_DROP; | 373 | return NF_DROP; |
diff --git a/net/ipv4/netfilter/ipt_CONNMARK.c b/net/ipv4/netfilter/ipt_CONNMARK.c index 30ddd3e18eb7..134638021339 100644 --- a/net/ipv4/netfilter/ipt_CONNMARK.c +++ b/net/ipv4/netfilter/ipt_CONNMARK.c | |||
@@ -40,9 +40,9 @@ target(struct sk_buff **pskb, | |||
40 | void *userinfo) | 40 | void *userinfo) |
41 | { | 41 | { |
42 | const struct ipt_connmark_target_info *markinfo = targinfo; | 42 | const struct ipt_connmark_target_info *markinfo = targinfo; |
43 | unsigned long diff; | 43 | u_int32_t diff; |
44 | unsigned long nfmark; | 44 | u_int32_t nfmark; |
45 | unsigned long newmark; | 45 | u_int32_t newmark; |
46 | 46 | ||
47 | enum ip_conntrack_info ctinfo; | 47 | enum ip_conntrack_info ctinfo; |
48 | struct ip_conntrack *ct = ip_conntrack_get((*pskb), &ctinfo); | 48 | struct ip_conntrack *ct = ip_conntrack_get((*pskb), &ctinfo); |
@@ -61,10 +61,8 @@ target(struct sk_buff **pskb, | |||
61 | case IPT_CONNMARK_RESTORE: | 61 | case IPT_CONNMARK_RESTORE: |
62 | nfmark = (*pskb)->nfmark; | 62 | nfmark = (*pskb)->nfmark; |
63 | diff = (ct->mark ^ nfmark) & markinfo->mask; | 63 | diff = (ct->mark ^ nfmark) & markinfo->mask; |
64 | if (diff != 0) { | 64 | if (diff != 0) |
65 | (*pskb)->nfmark = nfmark ^ diff; | 65 | (*pskb)->nfmark = nfmark ^ diff; |
66 | (*pskb)->nfcache |= NFC_ALTERED; | ||
67 | } | ||
68 | break; | 66 | break; |
69 | } | 67 | } |
70 | } | 68 | } |
@@ -94,6 +92,11 @@ checkentry(const char *tablename, | |||
94 | } | 92 | } |
95 | } | 93 | } |
96 | 94 | ||
95 | if (matchinfo->mark > 0xffffffff || matchinfo->mask > 0xffffffff) { | ||
96 | printk(KERN_WARNING "CONNMARK: Only supports 32bit mark\n"); | ||
97 | return 0; | ||
98 | } | ||
99 | |||
97 | return 1; | 100 | return 1; |
98 | } | 101 | } |
99 | 102 | ||
diff --git a/net/ipv4/netfilter/ipt_DSCP.c b/net/ipv4/netfilter/ipt_DSCP.c index 3ea4509099f9..6e319570a28c 100644 --- a/net/ipv4/netfilter/ipt_DSCP.c +++ b/net/ipv4/netfilter/ipt_DSCP.c | |||
@@ -39,7 +39,7 @@ target(struct sk_buff **pskb, | |||
39 | if (((*pskb)->nh.iph->tos & IPT_DSCP_MASK) != sh_dscp) { | 39 | if (((*pskb)->nh.iph->tos & IPT_DSCP_MASK) != sh_dscp) { |
40 | u_int16_t diffs[2]; | 40 | u_int16_t diffs[2]; |
41 | 41 | ||
42 | if (!skb_ip_make_writable(pskb, sizeof(struct iphdr))) | 42 | if (!skb_make_writable(pskb, sizeof(struct iphdr))) |
43 | return NF_DROP; | 43 | return NF_DROP; |
44 | 44 | ||
45 | diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF; | 45 | diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF; |
@@ -51,7 +51,6 @@ target(struct sk_buff **pskb, | |||
51 | sizeof(diffs), | 51 | sizeof(diffs), |
52 | (*pskb)->nh.iph->check | 52 | (*pskb)->nh.iph->check |
53 | ^ 0xFFFF)); | 53 | ^ 0xFFFF)); |
54 | (*pskb)->nfcache |= NFC_ALTERED; | ||
55 | } | 54 | } |
56 | return IPT_CONTINUE; | 55 | return IPT_CONTINUE; |
57 | } | 56 | } |
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c index 94a0ce1c1c9d..a1319693f648 100644 --- a/net/ipv4/netfilter/ipt_ECN.c +++ b/net/ipv4/netfilter/ipt_ECN.c | |||
@@ -31,7 +31,7 @@ set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) | |||
31 | != (einfo->ip_ect & IPT_ECN_IP_MASK)) { | 31 | != (einfo->ip_ect & IPT_ECN_IP_MASK)) { |
32 | u_int16_t diffs[2]; | 32 | u_int16_t diffs[2]; |
33 | 33 | ||
34 | if (!skb_ip_make_writable(pskb, sizeof(struct iphdr))) | 34 | if (!skb_make_writable(pskb, sizeof(struct iphdr))) |
35 | return 0; | 35 | return 0; |
36 | 36 | ||
37 | diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF; | 37 | diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF; |
@@ -43,7 +43,6 @@ set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) | |||
43 | sizeof(diffs), | 43 | sizeof(diffs), |
44 | (*pskb)->nh.iph->check | 44 | (*pskb)->nh.iph->check |
45 | ^0xFFFF)); | 45 | ^0xFFFF)); |
46 | (*pskb)->nfcache |= NFC_ALTERED; | ||
47 | } | 46 | } |
48 | return 1; | 47 | return 1; |
49 | } | 48 | } |
@@ -67,7 +66,7 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo, int inward) | |||
67 | tcph->cwr == einfo->proto.tcp.cwr))) | 66 | tcph->cwr == einfo->proto.tcp.cwr))) |
68 | return 1; | 67 | return 1; |
69 | 68 | ||
70 | if (!skb_ip_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph))) | 69 | if (!skb_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph))) |
71 | return 0; | 70 | return 0; |
72 | tcph = (void *)(*pskb)->nh.iph + (*pskb)->nh.iph->ihl*4; | 71 | tcph = (void *)(*pskb)->nh.iph + (*pskb)->nh.iph->ihl*4; |
73 | 72 | ||
@@ -87,7 +86,6 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo, int inward) | |||
87 | tcph->check = csum_fold(csum_partial((char *)diffs, | 86 | tcph->check = csum_fold(csum_partial((char *)diffs, |
88 | sizeof(diffs), | 87 | sizeof(diffs), |
89 | tcph->check^0xFFFF)); | 88 | tcph->check^0xFFFF)); |
90 | (*pskb)->nfcache |= NFC_ALTERED; | ||
91 | return 1; | 89 | return 1; |
92 | } | 90 | } |
93 | 91 | ||
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c index ef08733d26da..92ed050fac69 100644 --- a/net/ipv4/netfilter/ipt_LOG.c +++ b/net/ipv4/netfilter/ipt_LOG.c | |||
@@ -27,10 +27,6 @@ MODULE_LICENSE("GPL"); | |||
27 | MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); | 27 | MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); |
28 | MODULE_DESCRIPTION("iptables syslog logging module"); | 28 | MODULE_DESCRIPTION("iptables syslog logging module"); |
29 | 29 | ||
30 | static unsigned int nflog = 1; | ||
31 | module_param(nflog, int, 0400); | ||
32 | MODULE_PARM_DESC(nflog, "register as internal netfilter logging module"); | ||
33 | |||
34 | #if 0 | 30 | #if 0 |
35 | #define DEBUGP printk | 31 | #define DEBUGP printk |
36 | #else | 32 | #else |
@@ -41,11 +37,17 @@ MODULE_PARM_DESC(nflog, "register as internal netfilter logging module"); | |||
41 | static DEFINE_SPINLOCK(log_lock); | 37 | static DEFINE_SPINLOCK(log_lock); |
42 | 38 | ||
43 | /* One level of recursion won't kill us */ | 39 | /* One level of recursion won't kill us */ |
44 | static void dump_packet(const struct ipt_log_info *info, | 40 | static void dump_packet(const struct nf_loginfo *info, |
45 | const struct sk_buff *skb, | 41 | const struct sk_buff *skb, |
46 | unsigned int iphoff) | 42 | unsigned int iphoff) |
47 | { | 43 | { |
48 | struct iphdr _iph, *ih; | 44 | struct iphdr _iph, *ih; |
45 | unsigned int logflags; | ||
46 | |||
47 | if (info->type == NF_LOG_TYPE_LOG) | ||
48 | logflags = info->u.log.logflags; | ||
49 | else | ||
50 | logflags = NF_LOG_MASK; | ||
49 | 51 | ||
50 | ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph); | 52 | ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph); |
51 | if (ih == NULL) { | 53 | if (ih == NULL) { |
@@ -76,7 +78,7 @@ static void dump_packet(const struct ipt_log_info *info, | |||
76 | if (ntohs(ih->frag_off) & IP_OFFSET) | 78 | if (ntohs(ih->frag_off) & IP_OFFSET) |
77 | printk("FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET); | 79 | printk("FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET); |
78 | 80 | ||
79 | if ((info->logflags & IPT_LOG_IPOPT) | 81 | if ((logflags & IPT_LOG_IPOPT) |
80 | && ih->ihl * 4 > sizeof(struct iphdr)) { | 82 | && ih->ihl * 4 > sizeof(struct iphdr)) { |
81 | unsigned char _opt[4 * 15 - sizeof(struct iphdr)], *op; | 83 | unsigned char _opt[4 * 15 - sizeof(struct iphdr)], *op; |
82 | unsigned int i, optsize; | 84 | unsigned int i, optsize; |
@@ -119,7 +121,7 @@ static void dump_packet(const struct ipt_log_info *info, | |||
119 | printk("SPT=%u DPT=%u ", | 121 | printk("SPT=%u DPT=%u ", |
120 | ntohs(th->source), ntohs(th->dest)); | 122 | ntohs(th->source), ntohs(th->dest)); |
121 | /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */ | 123 | /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */ |
122 | if (info->logflags & IPT_LOG_TCPSEQ) | 124 | if (logflags & IPT_LOG_TCPSEQ) |
123 | printk("SEQ=%u ACK=%u ", | 125 | printk("SEQ=%u ACK=%u ", |
124 | ntohl(th->seq), ntohl(th->ack_seq)); | 126 | ntohl(th->seq), ntohl(th->ack_seq)); |
125 | /* Max length: 13 "WINDOW=65535 " */ | 127 | /* Max length: 13 "WINDOW=65535 " */ |
@@ -146,7 +148,7 @@ static void dump_packet(const struct ipt_log_info *info, | |||
146 | /* Max length: 11 "URGP=65535 " */ | 148 | /* Max length: 11 "URGP=65535 " */ |
147 | printk("URGP=%u ", ntohs(th->urg_ptr)); | 149 | printk("URGP=%u ", ntohs(th->urg_ptr)); |
148 | 150 | ||
149 | if ((info->logflags & IPT_LOG_TCPOPT) | 151 | if ((logflags & IPT_LOG_TCPOPT) |
150 | && th->doff * 4 > sizeof(struct tcphdr)) { | 152 | && th->doff * 4 > sizeof(struct tcphdr)) { |
151 | unsigned char _opt[4 * 15 - sizeof(struct tcphdr)]; | 153 | unsigned char _opt[4 * 15 - sizeof(struct tcphdr)]; |
152 | unsigned char *op; | 154 | unsigned char *op; |
@@ -328,7 +330,7 @@ static void dump_packet(const struct ipt_log_info *info, | |||
328 | } | 330 | } |
329 | 331 | ||
330 | /* Max length: 15 "UID=4294967295 " */ | 332 | /* Max length: 15 "UID=4294967295 " */ |
331 | if ((info->logflags & IPT_LOG_UID) && !iphoff && skb->sk) { | 333 | if ((logflags & IPT_LOG_UID) && !iphoff && skb->sk) { |
332 | read_lock_bh(&skb->sk->sk_callback_lock); | 334 | read_lock_bh(&skb->sk->sk_callback_lock); |
333 | if (skb->sk->sk_socket && skb->sk->sk_socket->file) | 335 | if (skb->sk->sk_socket && skb->sk->sk_socket->file) |
334 | printk("UID=%u ", skb->sk->sk_socket->file->f_uid); | 336 | printk("UID=%u ", skb->sk->sk_socket->file->f_uid); |
@@ -349,19 +351,31 @@ static void dump_packet(const struct ipt_log_info *info, | |||
349 | /* maxlen = 230+ 91 + 230 + 252 = 803 */ | 351 | /* maxlen = 230+ 91 + 230 + 252 = 803 */ |
350 | } | 352 | } |
351 | 353 | ||
354 | struct nf_loginfo default_loginfo = { | ||
355 | .type = NF_LOG_TYPE_LOG, | ||
356 | .u = { | ||
357 | .log = { | ||
358 | .level = 0, | ||
359 | .logflags = NF_LOG_MASK, | ||
360 | }, | ||
361 | }, | ||
362 | }; | ||
363 | |||
352 | static void | 364 | static void |
353 | ipt_log_packet(unsigned int hooknum, | 365 | ipt_log_packet(unsigned int pf, |
366 | unsigned int hooknum, | ||
354 | const struct sk_buff *skb, | 367 | const struct sk_buff *skb, |
355 | const struct net_device *in, | 368 | const struct net_device *in, |
356 | const struct net_device *out, | 369 | const struct net_device *out, |
357 | const struct ipt_log_info *loginfo, | 370 | const struct nf_loginfo *loginfo, |
358 | const char *level_string, | ||
359 | const char *prefix) | 371 | const char *prefix) |
360 | { | 372 | { |
373 | if (!loginfo) | ||
374 | loginfo = &default_loginfo; | ||
375 | |||
361 | spin_lock_bh(&log_lock); | 376 | spin_lock_bh(&log_lock); |
362 | printk(level_string); | 377 | printk("<%d>%sIN=%s OUT=%s ", loginfo->u.log.level, |
363 | printk("%sIN=%s OUT=%s ", | 378 | prefix, |
364 | prefix == NULL ? loginfo->prefix : prefix, | ||
365 | in ? in->name : "", | 379 | in ? in->name : "", |
366 | out ? out->name : ""); | 380 | out ? out->name : ""); |
367 | #ifdef CONFIG_BRIDGE_NETFILTER | 381 | #ifdef CONFIG_BRIDGE_NETFILTER |
@@ -405,28 +419,15 @@ ipt_log_target(struct sk_buff **pskb, | |||
405 | void *userinfo) | 419 | void *userinfo) |
406 | { | 420 | { |
407 | const struct ipt_log_info *loginfo = targinfo; | 421 | const struct ipt_log_info *loginfo = targinfo; |
408 | char level_string[4] = "< >"; | 422 | struct nf_loginfo li; |
409 | 423 | ||
410 | level_string[1] = '0' + (loginfo->level % 8); | 424 | li.type = NF_LOG_TYPE_LOG; |
411 | ipt_log_packet(hooknum, *pskb, in, out, loginfo, level_string, NULL); | 425 | li.u.log.level = loginfo->level; |
426 | li.u.log.logflags = loginfo->logflags; | ||
412 | 427 | ||
413 | return IPT_CONTINUE; | 428 | nf_log_packet(PF_INET, hooknum, *pskb, in, out, &li, loginfo->prefix); |
414 | } | ||
415 | 429 | ||
416 | static void | 430 | return IPT_CONTINUE; |
417 | ipt_logfn(unsigned int hooknum, | ||
418 | const struct sk_buff *skb, | ||
419 | const struct net_device *in, | ||
420 | const struct net_device *out, | ||
421 | const char *prefix) | ||
422 | { | ||
423 | struct ipt_log_info loginfo = { | ||
424 | .level = 0, | ||
425 | .logflags = IPT_LOG_MASK, | ||
426 | .prefix = "" | ||
427 | }; | ||
428 | |||
429 | ipt_log_packet(hooknum, skb, in, out, &loginfo, KERN_WARNING, prefix); | ||
430 | } | 431 | } |
431 | 432 | ||
432 | static int ipt_log_checkentry(const char *tablename, | 433 | static int ipt_log_checkentry(const char *tablename, |
@@ -464,20 +465,29 @@ static struct ipt_target ipt_log_reg = { | |||
464 | .me = THIS_MODULE, | 465 | .me = THIS_MODULE, |
465 | }; | 466 | }; |
466 | 467 | ||
468 | static struct nf_logger ipt_log_logger ={ | ||
469 | .name = "ipt_LOG", | ||
470 | .logfn = &ipt_log_packet, | ||
471 | .me = THIS_MODULE, | ||
472 | }; | ||
473 | |||
467 | static int __init init(void) | 474 | static int __init init(void) |
468 | { | 475 | { |
469 | if (ipt_register_target(&ipt_log_reg)) | 476 | if (ipt_register_target(&ipt_log_reg)) |
470 | return -EINVAL; | 477 | return -EINVAL; |
471 | if (nflog) | 478 | if (nf_log_register(PF_INET, &ipt_log_logger) < 0) { |
472 | nf_log_register(PF_INET, &ipt_logfn); | 479 | printk(KERN_WARNING "ipt_LOG: not logging via system console " |
480 | "since somebody else already registered for PF_INET\n"); | ||
481 | /* we cannot make module load fail here, since otherwise | ||
482 | * iptables userspace would abort */ | ||
483 | } | ||
473 | 484 | ||
474 | return 0; | 485 | return 0; |
475 | } | 486 | } |
476 | 487 | ||
477 | static void __exit fini(void) | 488 | static void __exit fini(void) |
478 | { | 489 | { |
479 | if (nflog) | 490 | nf_log_unregister_logger(&ipt_log_logger); |
480 | nf_log_unregister(PF_INET, &ipt_logfn); | ||
481 | ipt_unregister_target(&ipt_log_reg); | 491 | ipt_unregister_target(&ipt_log_reg); |
482 | } | 492 | } |
483 | 493 | ||
diff --git a/net/ipv4/netfilter/ipt_MARK.c b/net/ipv4/netfilter/ipt_MARK.c index 33c6f9b63b8d..52b4f2c296bf 100644 --- a/net/ipv4/netfilter/ipt_MARK.c +++ b/net/ipv4/netfilter/ipt_MARK.c | |||
@@ -29,10 +29,9 @@ target_v0(struct sk_buff **pskb, | |||
29 | { | 29 | { |
30 | const struct ipt_mark_target_info *markinfo = targinfo; | 30 | const struct ipt_mark_target_info *markinfo = targinfo; |
31 | 31 | ||
32 | if((*pskb)->nfmark != markinfo->mark) { | 32 | if((*pskb)->nfmark != markinfo->mark) |
33 | (*pskb)->nfmark = markinfo->mark; | 33 | (*pskb)->nfmark = markinfo->mark; |
34 | (*pskb)->nfcache |= NFC_ALTERED; | 34 | |
35 | } | ||
36 | return IPT_CONTINUE; | 35 | return IPT_CONTINUE; |
37 | } | 36 | } |
38 | 37 | ||
@@ -61,10 +60,9 @@ target_v1(struct sk_buff **pskb, | |||
61 | break; | 60 | break; |
62 | } | 61 | } |
63 | 62 | ||
64 | if((*pskb)->nfmark != mark) { | 63 | if((*pskb)->nfmark != mark) |
65 | (*pskb)->nfmark = mark; | 64 | (*pskb)->nfmark = mark; |
66 | (*pskb)->nfcache |= NFC_ALTERED; | 65 | |
67 | } | ||
68 | return IPT_CONTINUE; | 66 | return IPT_CONTINUE; |
69 | } | 67 | } |
70 | 68 | ||
@@ -76,6 +74,8 @@ checkentry_v0(const char *tablename, | |||
76 | unsigned int targinfosize, | 74 | unsigned int targinfosize, |
77 | unsigned int hook_mask) | 75 | unsigned int hook_mask) |
78 | { | 76 | { |
77 | struct ipt_mark_target_info *markinfo = targinfo; | ||
78 | |||
79 | if (targinfosize != IPT_ALIGN(sizeof(struct ipt_mark_target_info))) { | 79 | if (targinfosize != IPT_ALIGN(sizeof(struct ipt_mark_target_info))) { |
80 | printk(KERN_WARNING "MARK: targinfosize %u != %Zu\n", | 80 | printk(KERN_WARNING "MARK: targinfosize %u != %Zu\n", |
81 | targinfosize, | 81 | targinfosize, |
@@ -88,6 +88,11 @@ checkentry_v0(const char *tablename, | |||
88 | return 0; | 88 | return 0; |
89 | } | 89 | } |
90 | 90 | ||
91 | if (markinfo->mark > 0xffffffff) { | ||
92 | printk(KERN_WARNING "MARK: Only supports 32bit wide mark\n"); | ||
93 | return 0; | ||
94 | } | ||
95 | |||
91 | return 1; | 96 | return 1; |
92 | } | 97 | } |
93 | 98 | ||
@@ -120,6 +125,11 @@ checkentry_v1(const char *tablename, | |||
120 | return 0; | 125 | return 0; |
121 | } | 126 | } |
122 | 127 | ||
128 | if (markinfo->mark > 0xffffffff) { | ||
129 | printk(KERN_WARNING "MARK: Only supports 32bit wide mark\n"); | ||
130 | return 0; | ||
131 | } | ||
132 | |||
123 | return 1; | 133 | return 1; |
124 | } | 134 | } |
125 | 135 | ||
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index 91e74502c3d3..2f3e181c8e97 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c | |||
@@ -86,11 +86,6 @@ masquerade_target(struct sk_buff **pskb, | |||
86 | 86 | ||
87 | IP_NF_ASSERT(hooknum == NF_IP_POST_ROUTING); | 87 | IP_NF_ASSERT(hooknum == NF_IP_POST_ROUTING); |
88 | 88 | ||
89 | /* FIXME: For the moment, don't do local packets, breaks | ||
90 | testsuite for 2.3.49 --RR */ | ||
91 | if ((*pskb)->sk) | ||
92 | return NF_ACCEPT; | ||
93 | |||
94 | ct = ip_conntrack_get(*pskb, &ctinfo); | 89 | ct = ip_conntrack_get(*pskb, &ctinfo); |
95 | IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED | 90 | IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED |
96 | || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)); | 91 | || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)); |
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c index 06254b29d034..e6e7b6095363 100644 --- a/net/ipv4/netfilter/ipt_NETMAP.c +++ b/net/ipv4/netfilter/ipt_NETMAP.c | |||
@@ -46,7 +46,8 @@ check(const char *tablename, | |||
46 | DEBUGP(MODULENAME":check: size %u.\n", targinfosize); | 46 | DEBUGP(MODULENAME":check: size %u.\n", targinfosize); |
47 | return 0; | 47 | return 0; |
48 | } | 48 | } |
49 | if (hook_mask & ~((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_POST_ROUTING))) { | 49 | if (hook_mask & ~((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_POST_ROUTING) | |
50 | (1 << NF_IP_LOCAL_OUT))) { | ||
50 | DEBUGP(MODULENAME":check: bad hooks %x.\n", hook_mask); | 51 | DEBUGP(MODULENAME":check: bad hooks %x.\n", hook_mask); |
51 | return 0; | 52 | return 0; |
52 | } | 53 | } |
@@ -76,12 +77,13 @@ target(struct sk_buff **pskb, | |||
76 | struct ip_nat_range newrange; | 77 | struct ip_nat_range newrange; |
77 | 78 | ||
78 | IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING | 79 | IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING |
79 | || hooknum == NF_IP_POST_ROUTING); | 80 | || hooknum == NF_IP_POST_ROUTING |
81 | || hooknum == NF_IP_LOCAL_OUT); | ||
80 | ct = ip_conntrack_get(*pskb, &ctinfo); | 82 | ct = ip_conntrack_get(*pskb, &ctinfo); |
81 | 83 | ||
82 | netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip); | 84 | netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip); |
83 | 85 | ||
84 | if (hooknum == NF_IP_PRE_ROUTING) | 86 | if (hooknum == NF_IP_PRE_ROUTING || hooknum == NF_IP_LOCAL_OUT) |
85 | new_ip = (*pskb)->nh.iph->daddr & ~netmask; | 87 | new_ip = (*pskb)->nh.iph->daddr & ~netmask; |
86 | else | 88 | else |
87 | new_ip = (*pskb)->nh.iph->saddr & ~netmask; | 89 | new_ip = (*pskb)->nh.iph->saddr & ~netmask; |
diff --git a/net/ipv4/netfilter/ipt_NFQUEUE.c b/net/ipv4/netfilter/ipt_NFQUEUE.c new file mode 100644 index 000000000000..3cedc9be8807 --- /dev/null +++ b/net/ipv4/netfilter/ipt_NFQUEUE.c | |||
@@ -0,0 +1,70 @@ | |||
1 | /* iptables module for using new netfilter netlink queue | ||
2 | * | ||
3 | * (C) 2005 by Harald Welte <laforge@netfilter.org> | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License version 2 as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | */ | ||
10 | |||
11 | #include <linux/module.h> | ||
12 | #include <linux/skbuff.h> | ||
13 | |||
14 | #include <linux/netfilter.h> | ||
15 | #include <linux/netfilter_ipv4/ip_tables.h> | ||
16 | #include <linux/netfilter_ipv4/ipt_NFQUEUE.h> | ||
17 | |||
18 | MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); | ||
19 | MODULE_DESCRIPTION("iptables NFQUEUE target"); | ||
20 | MODULE_LICENSE("GPL"); | ||
21 | |||
22 | static unsigned int | ||
23 | target(struct sk_buff **pskb, | ||
24 | const struct net_device *in, | ||
25 | const struct net_device *out, | ||
26 | unsigned int hooknum, | ||
27 | const void *targinfo, | ||
28 | void *userinfo) | ||
29 | { | ||
30 | const struct ipt_NFQ_info *tinfo = targinfo; | ||
31 | |||
32 | return NF_QUEUE_NR(tinfo->queuenum); | ||
33 | } | ||
34 | |||
35 | static int | ||
36 | checkentry(const char *tablename, | ||
37 | const struct ipt_entry *e, | ||
38 | void *targinfo, | ||
39 | unsigned int targinfosize, | ||
40 | unsigned int hook_mask) | ||
41 | { | ||
42 | if (targinfosize != IPT_ALIGN(sizeof(struct ipt_NFQ_info))) { | ||
43 | printk(KERN_WARNING "NFQUEUE: targinfosize %u != %Zu\n", | ||
44 | targinfosize, | ||
45 | IPT_ALIGN(sizeof(struct ipt_NFQ_info))); | ||
46 | return 0; | ||
47 | } | ||
48 | |||
49 | return 1; | ||
50 | } | ||
51 | |||
52 | static struct ipt_target ipt_NFQ_reg = { | ||
53 | .name = "NFQUEUE", | ||
54 | .target = target, | ||
55 | .checkentry = checkentry, | ||
56 | .me = THIS_MODULE, | ||
57 | }; | ||
58 | |||
59 | static int __init init(void) | ||
60 | { | ||
61 | return ipt_register_target(&ipt_NFQ_reg); | ||
62 | } | ||
63 | |||
64 | static void __exit fini(void) | ||
65 | { | ||
66 | ipt_unregister_target(&ipt_NFQ_reg); | ||
67 | } | ||
68 | |||
69 | module_init(init); | ||
70 | module_exit(fini); | ||
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index 915696446020..f115a84a4ac6 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c | |||
@@ -156,7 +156,6 @@ static void send_reset(struct sk_buff *oldskb, int hook) | |||
156 | 156 | ||
157 | /* This packet will not be the same as the other: clear nf fields */ | 157 | /* This packet will not be the same as the other: clear nf fields */ |
158 | nf_reset(nskb); | 158 | nf_reset(nskb); |
159 | nskb->nfcache = 0; | ||
160 | nskb->nfmark = 0; | 159 | nskb->nfmark = 0; |
161 | #ifdef CONFIG_BRIDGE_NETFILTER | 160 | #ifdef CONFIG_BRIDGE_NETFILTER |
162 | nf_bridge_put(nskb->nf_bridge); | 161 | nf_bridge_put(nskb->nf_bridge); |
diff --git a/net/ipv4/netfilter/ipt_TCPMSS.c b/net/ipv4/netfilter/ipt_TCPMSS.c index 7b84a254440e..8db70d6908c3 100644 --- a/net/ipv4/netfilter/ipt_TCPMSS.c +++ b/net/ipv4/netfilter/ipt_TCPMSS.c | |||
@@ -58,7 +58,7 @@ ipt_tcpmss_target(struct sk_buff **pskb, | |||
58 | unsigned int i; | 58 | unsigned int i; |
59 | u_int8_t *opt; | 59 | u_int8_t *opt; |
60 | 60 | ||
61 | if (!skb_ip_make_writable(pskb, (*pskb)->len)) | 61 | if (!skb_make_writable(pskb, (*pskb)->len)) |
62 | return NF_DROP; | 62 | return NF_DROP; |
63 | 63 | ||
64 | if ((*pskb)->ip_summed == CHECKSUM_HW && | 64 | if ((*pskb)->ip_summed == CHECKSUM_HW && |
@@ -190,7 +190,6 @@ ipt_tcpmss_target(struct sk_buff **pskb, | |||
190 | newmss); | 190 | newmss); |
191 | 191 | ||
192 | retmodified: | 192 | retmodified: |
193 | (*pskb)->nfcache |= NFC_UNKNOWN | NFC_ALTERED; | ||
194 | return IPT_CONTINUE; | 193 | return IPT_CONTINUE; |
195 | } | 194 | } |
196 | 195 | ||
diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c index 85c70d240f8b..deadb36d4428 100644 --- a/net/ipv4/netfilter/ipt_TOS.c +++ b/net/ipv4/netfilter/ipt_TOS.c | |||
@@ -33,7 +33,7 @@ target(struct sk_buff **pskb, | |||
33 | if (((*pskb)->nh.iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) { | 33 | if (((*pskb)->nh.iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) { |
34 | u_int16_t diffs[2]; | 34 | u_int16_t diffs[2]; |
35 | 35 | ||
36 | if (!skb_ip_make_writable(pskb, sizeof(struct iphdr))) | 36 | if (!skb_make_writable(pskb, sizeof(struct iphdr))) |
37 | return NF_DROP; | 37 | return NF_DROP; |
38 | 38 | ||
39 | diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF; | 39 | diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF; |
@@ -46,7 +46,6 @@ target(struct sk_buff **pskb, | |||
46 | sizeof(diffs), | 46 | sizeof(diffs), |
47 | (*pskb)->nh.iph->check | 47 | (*pskb)->nh.iph->check |
48 | ^0xFFFF)); | 48 | ^0xFFFF)); |
49 | (*pskb)->nfcache |= NFC_ALTERED; | ||
50 | } | 49 | } |
51 | return IPT_CONTINUE; | 50 | return IPT_CONTINUE; |
52 | } | 51 | } |
diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c new file mode 100644 index 000000000000..b9ae6a9382f3 --- /dev/null +++ b/net/ipv4/netfilter/ipt_TTL.c | |||
@@ -0,0 +1,119 @@ | |||
1 | /* TTL modification target for IP tables | ||
2 | * (C) 2000,2005 by Harald Welte <laforge@netfilter.org> | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License version 2 as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | */ | ||
9 | |||
10 | #include <linux/module.h> | ||
11 | #include <linux/skbuff.h> | ||
12 | #include <linux/ip.h> | ||
13 | #include <net/checksum.h> | ||
14 | |||
15 | #include <linux/netfilter_ipv4/ip_tables.h> | ||
16 | #include <linux/netfilter_ipv4/ipt_TTL.h> | ||
17 | |||
18 | MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); | ||
19 | MODULE_DESCRIPTION("IP tables TTL modification module"); | ||
20 | MODULE_LICENSE("GPL"); | ||
21 | |||
22 | static unsigned int | ||
23 | ipt_ttl_target(struct sk_buff **pskb, const struct net_device *in, | ||
24 | const struct net_device *out, unsigned int hooknum, | ||
25 | const void *targinfo, void *userinfo) | ||
26 | { | ||
27 | struct iphdr *iph; | ||
28 | const struct ipt_TTL_info *info = targinfo; | ||
29 | u_int16_t diffs[2]; | ||
30 | int new_ttl; | ||
31 | |||
32 | if (!skb_make_writable(pskb, (*pskb)->len)) | ||
33 | return NF_DROP; | ||
34 | |||
35 | iph = (*pskb)->nh.iph; | ||
36 | |||
37 | switch (info->mode) { | ||
38 | case IPT_TTL_SET: | ||
39 | new_ttl = info->ttl; | ||
40 | break; | ||
41 | case IPT_TTL_INC: | ||
42 | new_ttl = iph->ttl + info->ttl; | ||
43 | if (new_ttl > 255) | ||
44 | new_ttl = 255; | ||
45 | break; | ||
46 | case IPT_TTL_DEC: | ||
47 | new_ttl = iph->ttl - info->ttl; | ||
48 | if (new_ttl < 0) | ||
49 | new_ttl = 0; | ||
50 | break; | ||
51 | default: | ||
52 | new_ttl = iph->ttl; | ||
53 | break; | ||
54 | } | ||
55 | |||
56 | if (new_ttl != iph->ttl) { | ||
57 | diffs[0] = htons(((unsigned)iph->ttl) << 8) ^ 0xFFFF; | ||
58 | iph->ttl = new_ttl; | ||
59 | diffs[1] = htons(((unsigned)iph->ttl) << 8); | ||
60 | iph->check = csum_fold(csum_partial((char *)diffs, | ||
61 | sizeof(diffs), | ||
62 | iph->check^0xFFFF)); | ||
63 | } | ||
64 | |||
65 | return IPT_CONTINUE; | ||
66 | } | ||
67 | |||
68 | static int ipt_ttl_checkentry(const char *tablename, | ||
69 | const struct ipt_entry *e, | ||
70 | void *targinfo, | ||
71 | unsigned int targinfosize, | ||
72 | unsigned int hook_mask) | ||
73 | { | ||
74 | struct ipt_TTL_info *info = targinfo; | ||
75 | |||
76 | if (targinfosize != IPT_ALIGN(sizeof(struct ipt_TTL_info))) { | ||
77 | printk(KERN_WARNING "ipt_TTL: targinfosize %u != %Zu\n", | ||
78 | targinfosize, | ||
79 | IPT_ALIGN(sizeof(struct ipt_TTL_info))); | ||
80 | return 0; | ||
81 | } | ||
82 | |||
83 | if (strcmp(tablename, "mangle")) { | ||
84 | printk(KERN_WARNING "ipt_TTL: can only be called from " | ||
85 | "\"mangle\" table, not \"%s\"\n", tablename); | ||
86 | return 0; | ||
87 | } | ||
88 | |||
89 | if (info->mode > IPT_TTL_MAXMODE) { | ||
90 | printk(KERN_WARNING "ipt_TTL: invalid or unknown Mode %u\n", | ||
91 | info->mode); | ||
92 | return 0; | ||
93 | } | ||
94 | |||
95 | if ((info->mode != IPT_TTL_SET) && (info->ttl == 0)) | ||
96 | return 0; | ||
97 | |||
98 | return 1; | ||
99 | } | ||
100 | |||
101 | static struct ipt_target ipt_TTL = { | ||
102 | .name = "TTL", | ||
103 | .target = ipt_ttl_target, | ||
104 | .checkentry = ipt_ttl_checkentry, | ||
105 | .me = THIS_MODULE, | ||
106 | }; | ||
107 | |||
108 | static int __init init(void) | ||
109 | { | ||
110 | return ipt_register_target(&ipt_TTL); | ||
111 | } | ||
112 | |||
113 | static void __exit fini(void) | ||
114 | { | ||
115 | ipt_unregister_target(&ipt_TTL); | ||
116 | } | ||
117 | |||
118 | module_init(init); | ||
119 | module_exit(fini); | ||
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index 52a0076302a7..e2c14f3cb2fc 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c | |||
@@ -62,6 +62,7 @@ | |||
62 | MODULE_LICENSE("GPL"); | 62 | MODULE_LICENSE("GPL"); |
63 | MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); | 63 | MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); |
64 | MODULE_DESCRIPTION("iptables userspace logging module"); | 64 | MODULE_DESCRIPTION("iptables userspace logging module"); |
65 | MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NFLOG); | ||
65 | 66 | ||
66 | #define ULOG_NL_EVENT 111 /* Harald's favorite number */ | 67 | #define ULOG_NL_EVENT 111 /* Harald's favorite number */ |
67 | #define ULOG_MAXNLGROUPS 32 /* numer of nlgroups */ | 68 | #define ULOG_MAXNLGROUPS 32 /* numer of nlgroups */ |
@@ -115,10 +116,10 @@ static void ulog_send(unsigned int nlgroupnum) | |||
115 | if (ub->qlen > 1) | 116 | if (ub->qlen > 1) |
116 | ub->lastnlh->nlmsg_type = NLMSG_DONE; | 117 | ub->lastnlh->nlmsg_type = NLMSG_DONE; |
117 | 118 | ||
118 | NETLINK_CB(ub->skb).dst_groups = (1 << nlgroupnum); | 119 | NETLINK_CB(ub->skb).dst_group = nlgroupnum + 1; |
119 | DEBUGP("ipt_ULOG: throwing %d packets to netlink mask %u\n", | 120 | DEBUGP("ipt_ULOG: throwing %d packets to netlink group %u\n", |
120 | ub->qlen, nlgroupnum); | 121 | ub->qlen, nlgroupnum + 1); |
121 | netlink_broadcast(nflognl, ub->skb, 0, (1 << nlgroupnum), GFP_ATOMIC); | 122 | netlink_broadcast(nflognl, ub->skb, 0, nlgroupnum + 1, GFP_ATOMIC); |
122 | 123 | ||
123 | ub->qlen = 0; | 124 | ub->qlen = 0; |
124 | ub->skb = NULL; | 125 | ub->skb = NULL; |
@@ -219,13 +220,13 @@ static void ipt_ulog_packet(unsigned int hooknum, | |||
219 | pm = NLMSG_DATA(nlh); | 220 | pm = NLMSG_DATA(nlh); |
220 | 221 | ||
221 | /* We might not have a timestamp, get one */ | 222 | /* We might not have a timestamp, get one */ |
222 | if (skb->stamp.tv_sec == 0) | 223 | if (skb->tstamp.off_sec == 0) |
223 | do_gettimeofday((struct timeval *)&skb->stamp); | 224 | __net_timestamp((struct sk_buff *)skb); |
224 | 225 | ||
225 | /* copy hook, prefix, timestamp, payload, etc. */ | 226 | /* copy hook, prefix, timestamp, payload, etc. */ |
226 | pm->data_len = copy_len; | 227 | pm->data_len = copy_len; |
227 | pm->timestamp_sec = skb->stamp.tv_sec; | 228 | pm->timestamp_sec = skb_tv_base.tv_sec + skb->tstamp.off_sec; |
228 | pm->timestamp_usec = skb->stamp.tv_usec; | 229 | pm->timestamp_usec = skb_tv_base.tv_usec + skb->tstamp.off_usec; |
229 | pm->mark = skb->nfmark; | 230 | pm->mark = skb->nfmark; |
230 | pm->hook = hooknum; | 231 | pm->hook = hooknum; |
231 | if (prefix != NULL) | 232 | if (prefix != NULL) |
@@ -303,18 +304,27 @@ static unsigned int ipt_ulog_target(struct sk_buff **pskb, | |||
303 | return IPT_CONTINUE; | 304 | return IPT_CONTINUE; |
304 | } | 305 | } |
305 | 306 | ||
306 | static void ipt_logfn(unsigned int hooknum, | 307 | static void ipt_logfn(unsigned int pf, |
308 | unsigned int hooknum, | ||
307 | const struct sk_buff *skb, | 309 | const struct sk_buff *skb, |
308 | const struct net_device *in, | 310 | const struct net_device *in, |
309 | const struct net_device *out, | 311 | const struct net_device *out, |
312 | const struct nf_loginfo *li, | ||
310 | const char *prefix) | 313 | const char *prefix) |
311 | { | 314 | { |
312 | struct ipt_ulog_info loginfo = { | 315 | struct ipt_ulog_info loginfo; |
313 | .nl_group = ULOG_DEFAULT_NLGROUP, | 316 | |
314 | .copy_range = 0, | 317 | if (!li || li->type != NF_LOG_TYPE_ULOG) { |
315 | .qthreshold = ULOG_DEFAULT_QTHRESHOLD, | 318 | loginfo.nl_group = ULOG_DEFAULT_NLGROUP; |
316 | .prefix = "" | 319 | loginfo.copy_range = 0; |
317 | }; | 320 | loginfo.qthreshold = ULOG_DEFAULT_QTHRESHOLD; |
321 | loginfo.prefix[0] = '\0'; | ||
322 | } else { | ||
323 | loginfo.nl_group = li->u.ulog.group; | ||
324 | loginfo.copy_range = li->u.ulog.copy_len; | ||
325 | loginfo.qthreshold = li->u.ulog.qthreshold; | ||
326 | strlcpy(loginfo.prefix, prefix, sizeof(loginfo.prefix)); | ||
327 | } | ||
318 | 328 | ||
319 | ipt_ulog_packet(hooknum, skb, in, out, &loginfo, prefix); | 329 | ipt_ulog_packet(hooknum, skb, in, out, &loginfo, prefix); |
320 | } | 330 | } |
@@ -354,6 +364,12 @@ static struct ipt_target ipt_ulog_reg = { | |||
354 | .me = THIS_MODULE, | 364 | .me = THIS_MODULE, |
355 | }; | 365 | }; |
356 | 366 | ||
367 | static struct nf_logger ipt_ulog_logger = { | ||
368 | .name = "ipt_ULOG", | ||
369 | .logfn = &ipt_logfn, | ||
370 | .me = THIS_MODULE, | ||
371 | }; | ||
372 | |||
357 | static int __init init(void) | 373 | static int __init init(void) |
358 | { | 374 | { |
359 | int i; | 375 | int i; |
@@ -372,7 +388,8 @@ static int __init init(void) | |||
372 | ulog_buffers[i].timer.data = i; | 388 | ulog_buffers[i].timer.data = i; |
373 | } | 389 | } |
374 | 390 | ||
375 | nflognl = netlink_kernel_create(NETLINK_NFLOG, NULL); | 391 | nflognl = netlink_kernel_create(NETLINK_NFLOG, ULOG_MAXNLGROUPS, NULL, |
392 | THIS_MODULE); | ||
376 | if (!nflognl) | 393 | if (!nflognl) |
377 | return -ENOMEM; | 394 | return -ENOMEM; |
378 | 395 | ||
@@ -381,7 +398,7 @@ static int __init init(void) | |||
381 | return -EINVAL; | 398 | return -EINVAL; |
382 | } | 399 | } |
383 | if (nflog) | 400 | if (nflog) |
384 | nf_log_register(PF_INET, &ipt_logfn); | 401 | nf_log_register(PF_INET, &ipt_ulog_logger); |
385 | 402 | ||
386 | return 0; | 403 | return 0; |
387 | } | 404 | } |
@@ -394,7 +411,7 @@ static void __exit fini(void) | |||
394 | DEBUGP("ipt_ULOG: cleanup_module\n"); | 411 | DEBUGP("ipt_ULOG: cleanup_module\n"); |
395 | 412 | ||
396 | if (nflog) | 413 | if (nflog) |
397 | nf_log_unregister(PF_INET, &ipt_logfn); | 414 | nf_log_unregister_logger(&ipt_ulog_logger); |
398 | ipt_unregister_target(&ipt_ulog_reg); | 415 | ipt_unregister_target(&ipt_ulog_reg); |
399 | sock_release(nflognl->sk_socket); | 416 | sock_release(nflognl->sk_socket); |
400 | 417 | ||
diff --git a/net/ipv4/netfilter/ipt_connbytes.c b/net/ipv4/netfilter/ipt_connbytes.c new file mode 100644 index 000000000000..df4a42c6da22 --- /dev/null +++ b/net/ipv4/netfilter/ipt_connbytes.c | |||
@@ -0,0 +1,162 @@ | |||
1 | /* Kernel module to match connection tracking byte counter. | ||
2 | * GPL (C) 2002 Martin Devera (devik@cdi.cz). | ||
3 | * | ||
4 | * 2004-07-20 Harald Welte <laforge@netfilter.org> | ||
5 | * - reimplemented to use per-connection accounting counters | ||
6 | * - add functionality to match number of packets | ||
7 | * - add functionality to match average packet size | ||
8 | * - add support to match directions seperately | ||
9 | * | ||
10 | */ | ||
11 | #include <linux/module.h> | ||
12 | #include <linux/skbuff.h> | ||
13 | #include <linux/netfilter_ipv4/ip_conntrack.h> | ||
14 | #include <linux/netfilter_ipv4/ip_tables.h> | ||
15 | #include <linux/netfilter_ipv4/ipt_connbytes.h> | ||
16 | |||
17 | #include <asm/div64.h> | ||
18 | #include <asm/bitops.h> | ||
19 | |||
20 | MODULE_LICENSE("GPL"); | ||
21 | MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); | ||
22 | MODULE_DESCRIPTION("iptables match for matching number of pkts/bytes per connection"); | ||
23 | |||
24 | /* 64bit divisor, dividend and result. dynamic precision */ | ||
25 | static u_int64_t div64_64(u_int64_t dividend, u_int64_t divisor) | ||
26 | { | ||
27 | u_int32_t d = divisor; | ||
28 | |||
29 | if (divisor > 0xffffffffULL) { | ||
30 | unsigned int shift = fls(divisor >> 32); | ||
31 | |||
32 | d = divisor >> shift; | ||
33 | dividend >>= shift; | ||
34 | } | ||
35 | |||
36 | do_div(dividend, d); | ||
37 | return dividend; | ||
38 | } | ||
39 | |||
40 | static int | ||
41 | match(const struct sk_buff *skb, | ||
42 | const struct net_device *in, | ||
43 | const struct net_device *out, | ||
44 | const void *matchinfo, | ||
45 | int offset, | ||
46 | int *hotdrop) | ||
47 | { | ||
48 | const struct ipt_connbytes_info *sinfo = matchinfo; | ||
49 | enum ip_conntrack_info ctinfo; | ||
50 | struct ip_conntrack *ct; | ||
51 | u_int64_t what = 0; /* initialize to make gcc happy */ | ||
52 | |||
53 | if (!(ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo))) | ||
54 | return 0; /* no match */ | ||
55 | |||
56 | switch (sinfo->what) { | ||
57 | case IPT_CONNBYTES_PKTS: | ||
58 | switch (sinfo->direction) { | ||
59 | case IPT_CONNBYTES_DIR_ORIGINAL: | ||
60 | what = ct->counters[IP_CT_DIR_ORIGINAL].packets; | ||
61 | break; | ||
62 | case IPT_CONNBYTES_DIR_REPLY: | ||
63 | what = ct->counters[IP_CT_DIR_REPLY].packets; | ||
64 | break; | ||
65 | case IPT_CONNBYTES_DIR_BOTH: | ||
66 | what = ct->counters[IP_CT_DIR_ORIGINAL].packets; | ||
67 | what += ct->counters[IP_CT_DIR_REPLY].packets; | ||
68 | break; | ||
69 | } | ||
70 | break; | ||
71 | case IPT_CONNBYTES_BYTES: | ||
72 | switch (sinfo->direction) { | ||
73 | case IPT_CONNBYTES_DIR_ORIGINAL: | ||
74 | what = ct->counters[IP_CT_DIR_ORIGINAL].bytes; | ||
75 | break; | ||
76 | case IPT_CONNBYTES_DIR_REPLY: | ||
77 | what = ct->counters[IP_CT_DIR_REPLY].bytes; | ||
78 | break; | ||
79 | case IPT_CONNBYTES_DIR_BOTH: | ||
80 | what = ct->counters[IP_CT_DIR_ORIGINAL].bytes; | ||
81 | what += ct->counters[IP_CT_DIR_REPLY].bytes; | ||
82 | break; | ||
83 | } | ||
84 | break; | ||
85 | case IPT_CONNBYTES_AVGPKT: | ||
86 | switch (sinfo->direction) { | ||
87 | case IPT_CONNBYTES_DIR_ORIGINAL: | ||
88 | what = div64_64(ct->counters[IP_CT_DIR_ORIGINAL].bytes, | ||
89 | ct->counters[IP_CT_DIR_ORIGINAL].packets); | ||
90 | break; | ||
91 | case IPT_CONNBYTES_DIR_REPLY: | ||
92 | what = div64_64(ct->counters[IP_CT_DIR_REPLY].bytes, | ||
93 | ct->counters[IP_CT_DIR_REPLY].packets); | ||
94 | break; | ||
95 | case IPT_CONNBYTES_DIR_BOTH: | ||
96 | { | ||
97 | u_int64_t bytes; | ||
98 | u_int64_t pkts; | ||
99 | bytes = ct->counters[IP_CT_DIR_ORIGINAL].bytes + | ||
100 | ct->counters[IP_CT_DIR_REPLY].bytes; | ||
101 | pkts = ct->counters[IP_CT_DIR_ORIGINAL].packets+ | ||
102 | ct->counters[IP_CT_DIR_REPLY].packets; | ||
103 | |||
104 | /* FIXME_THEORETICAL: what to do if sum | ||
105 | * overflows ? */ | ||
106 | |||
107 | what = div64_64(bytes, pkts); | ||
108 | } | ||
109 | break; | ||
110 | } | ||
111 | break; | ||
112 | } | ||
113 | |||
114 | if (sinfo->count.to) | ||
115 | return (what <= sinfo->count.to && what >= sinfo->count.from); | ||
116 | else | ||
117 | return (what >= sinfo->count.from); | ||
118 | } | ||
119 | |||
120 | static int check(const char *tablename, | ||
121 | const struct ipt_ip *ip, | ||
122 | void *matchinfo, | ||
123 | unsigned int matchsize, | ||
124 | unsigned int hook_mask) | ||
125 | { | ||
126 | const struct ipt_connbytes_info *sinfo = matchinfo; | ||
127 | |||
128 | if (matchsize != IPT_ALIGN(sizeof(struct ipt_connbytes_info))) | ||
129 | return 0; | ||
130 | |||
131 | if (sinfo->what != IPT_CONNBYTES_PKTS && | ||
132 | sinfo->what != IPT_CONNBYTES_BYTES && | ||
133 | sinfo->what != IPT_CONNBYTES_AVGPKT) | ||
134 | return 0; | ||
135 | |||
136 | if (sinfo->direction != IPT_CONNBYTES_DIR_ORIGINAL && | ||
137 | sinfo->direction != IPT_CONNBYTES_DIR_REPLY && | ||
138 | sinfo->direction != IPT_CONNBYTES_DIR_BOTH) | ||
139 | return 0; | ||
140 | |||
141 | return 1; | ||
142 | } | ||
143 | |||
144 | static struct ipt_match state_match = { | ||
145 | .name = "connbytes", | ||
146 | .match = &match, | ||
147 | .checkentry = &check, | ||
148 | .me = THIS_MODULE | ||
149 | }; | ||
150 | |||
151 | static int __init init(void) | ||
152 | { | ||
153 | return ipt_register_match(&state_match); | ||
154 | } | ||
155 | |||
156 | static void __exit fini(void) | ||
157 | { | ||
158 | ipt_unregister_match(&state_match); | ||
159 | } | ||
160 | |||
161 | module_init(init); | ||
162 | module_exit(fini); | ||
diff --git a/net/ipv4/netfilter/ipt_connmark.c b/net/ipv4/netfilter/ipt_connmark.c index 2706f96cea55..bf8de47ce004 100644 --- a/net/ipv4/netfilter/ipt_connmark.c +++ b/net/ipv4/netfilter/ipt_connmark.c | |||
@@ -54,9 +54,16 @@ checkentry(const char *tablename, | |||
54 | unsigned int matchsize, | 54 | unsigned int matchsize, |
55 | unsigned int hook_mask) | 55 | unsigned int hook_mask) |
56 | { | 56 | { |
57 | struct ipt_connmark_info *cm = | ||
58 | (struct ipt_connmark_info *)matchinfo; | ||
57 | if (matchsize != IPT_ALIGN(sizeof(struct ipt_connmark_info))) | 59 | if (matchsize != IPT_ALIGN(sizeof(struct ipt_connmark_info))) |
58 | return 0; | 60 | return 0; |
59 | 61 | ||
62 | if (cm->mark > 0xffffffff || cm->mask > 0xffffffff) { | ||
63 | printk(KERN_WARNING "connmark: only support 32bit mark\n"); | ||
64 | return 0; | ||
65 | } | ||
66 | |||
60 | return 1; | 67 | return 1; |
61 | } | 68 | } |
62 | 69 | ||
diff --git a/net/ipv4/netfilter/ipt_dccp.c b/net/ipv4/netfilter/ipt_dccp.c new file mode 100644 index 000000000000..ad3278bba6c1 --- /dev/null +++ b/net/ipv4/netfilter/ipt_dccp.c | |||
@@ -0,0 +1,176 @@ | |||
1 | /* | ||
2 | * iptables module for DCCP protocol header matching | ||
3 | * | ||
4 | * (C) 2005 by Harald Welte <laforge@netfilter.org> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License version 2 as | ||
8 | * published by the Free Software Foundation. | ||
9 | */ | ||
10 | |||
11 | #include <linux/module.h> | ||
12 | #include <linux/skbuff.h> | ||
13 | #include <linux/spinlock.h> | ||
14 | #include <net/ip.h> | ||
15 | #include <linux/dccp.h> | ||
16 | |||
17 | #include <linux/netfilter_ipv4/ip_tables.h> | ||
18 | #include <linux/netfilter_ipv4/ipt_dccp.h> | ||
19 | |||
20 | #define DCCHECK(cond, option, flag, invflag) (!((flag) & (option)) \ | ||
21 | || (!!((invflag) & (option)) ^ (cond))) | ||
22 | |||
23 | static unsigned char *dccp_optbuf; | ||
24 | static DEFINE_SPINLOCK(dccp_buflock); | ||
25 | |||
26 | static inline int | ||
27 | dccp_find_option(u_int8_t option, | ||
28 | const struct sk_buff *skb, | ||
29 | const struct dccp_hdr *dh, | ||
30 | int *hotdrop) | ||
31 | { | ||
32 | /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */ | ||
33 | unsigned char *op; | ||
34 | unsigned int optoff = __dccp_hdr_len(dh); | ||
35 | unsigned int optlen = dh->dccph_doff*4 - __dccp_hdr_len(dh); | ||
36 | unsigned int i; | ||
37 | |||
38 | if (dh->dccph_doff * 4 < __dccp_hdr_len(dh)) { | ||
39 | *hotdrop = 1; | ||
40 | return 0; | ||
41 | } | ||
42 | |||
43 | if (!optlen) | ||
44 | return 0; | ||
45 | |||
46 | spin_lock_bh(&dccp_buflock); | ||
47 | op = skb_header_pointer(skb, | ||
48 | skb->nh.iph->ihl*4 + optoff, | ||
49 | optlen, dccp_optbuf); | ||
50 | if (op == NULL) { | ||
51 | /* If we don't have the whole header, drop packet. */ | ||
52 | spin_unlock_bh(&dccp_buflock); | ||
53 | *hotdrop = 1; | ||
54 | return 0; | ||
55 | } | ||
56 | |||
57 | for (i = 0; i < optlen; ) { | ||
58 | if (op[i] == option) { | ||
59 | spin_unlock_bh(&dccp_buflock); | ||
60 | return 1; | ||
61 | } | ||
62 | |||
63 | if (op[i] < 2) | ||
64 | i++; | ||
65 | else | ||
66 | i += op[i+1]?:1; | ||
67 | } | ||
68 | |||
69 | spin_unlock_bh(&dccp_buflock); | ||
70 | return 0; | ||
71 | } | ||
72 | |||
73 | |||
74 | static inline int | ||
75 | match_types(const struct dccp_hdr *dh, u_int16_t typemask) | ||
76 | { | ||
77 | return (typemask & (1 << dh->dccph_type)); | ||
78 | } | ||
79 | |||
80 | static inline int | ||
81 | match_option(u_int8_t option, const struct sk_buff *skb, | ||
82 | const struct dccp_hdr *dh, int *hotdrop) | ||
83 | { | ||
84 | return dccp_find_option(option, skb, dh, hotdrop); | ||
85 | } | ||
86 | |||
87 | static int | ||
88 | match(const struct sk_buff *skb, | ||
89 | const struct net_device *in, | ||
90 | const struct net_device *out, | ||
91 | const void *matchinfo, | ||
92 | int offset, | ||
93 | int *hotdrop) | ||
94 | { | ||
95 | const struct ipt_dccp_info *info = | ||
96 | (const struct ipt_dccp_info *)matchinfo; | ||
97 | struct dccp_hdr _dh, *dh; | ||
98 | |||
99 | if (offset) | ||
100 | return 0; | ||
101 | |||
102 | dh = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_dh), &_dh); | ||
103 | if (dh == NULL) { | ||
104 | *hotdrop = 1; | ||
105 | return 0; | ||
106 | } | ||
107 | |||
108 | return DCCHECK(((ntohs(dh->dccph_sport) >= info->spts[0]) | ||
109 | && (ntohs(dh->dccph_sport) <= info->spts[1])), | ||
110 | IPT_DCCP_SRC_PORTS, info->flags, info->invflags) | ||
111 | && DCCHECK(((ntohs(dh->dccph_dport) >= info->dpts[0]) | ||
112 | && (ntohs(dh->dccph_dport) <= info->dpts[1])), | ||
113 | IPT_DCCP_DEST_PORTS, info->flags, info->invflags) | ||
114 | && DCCHECK(match_types(dh, info->typemask), | ||
115 | IPT_DCCP_TYPE, info->flags, info->invflags) | ||
116 | && DCCHECK(match_option(info->option, skb, dh, hotdrop), | ||
117 | IPT_DCCP_OPTION, info->flags, info->invflags); | ||
118 | } | ||
119 | |||
120 | static int | ||
121 | checkentry(const char *tablename, | ||
122 | const struct ipt_ip *ip, | ||
123 | void *matchinfo, | ||
124 | unsigned int matchsize, | ||
125 | unsigned int hook_mask) | ||
126 | { | ||
127 | const struct ipt_dccp_info *info; | ||
128 | |||
129 | info = (const struct ipt_dccp_info *)matchinfo; | ||
130 | |||
131 | return ip->proto == IPPROTO_DCCP | ||
132 | && !(ip->invflags & IPT_INV_PROTO) | ||
133 | && matchsize == IPT_ALIGN(sizeof(struct ipt_dccp_info)) | ||
134 | && !(info->flags & ~IPT_DCCP_VALID_FLAGS) | ||
135 | && !(info->invflags & ~IPT_DCCP_VALID_FLAGS) | ||
136 | && !(info->invflags & ~info->flags); | ||
137 | } | ||
138 | |||
139 | static struct ipt_match dccp_match = | ||
140 | { | ||
141 | .name = "dccp", | ||
142 | .match = &match, | ||
143 | .checkentry = &checkentry, | ||
144 | .me = THIS_MODULE, | ||
145 | }; | ||
146 | |||
147 | static int __init init(void) | ||
148 | { | ||
149 | int ret; | ||
150 | |||
151 | /* doff is 8 bits, so the maximum option size is (4*256). Don't put | ||
152 | * this in BSS since DaveM is worried about locked TLB's for kernel | ||
153 | * BSS. */ | ||
154 | dccp_optbuf = kmalloc(256 * 4, GFP_KERNEL); | ||
155 | if (!dccp_optbuf) | ||
156 | return -ENOMEM; | ||
157 | ret = ipt_register_match(&dccp_match); | ||
158 | if (ret) | ||
159 | kfree(dccp_optbuf); | ||
160 | |||
161 | return ret; | ||
162 | } | ||
163 | |||
164 | static void __exit fini(void) | ||
165 | { | ||
166 | ipt_unregister_match(&dccp_match); | ||
167 | kfree(dccp_optbuf); | ||
168 | } | ||
169 | |||
170 | module_init(init); | ||
171 | module_exit(fini); | ||
172 | |||
173 | MODULE_LICENSE("GPL"); | ||
174 | MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); | ||
175 | MODULE_DESCRIPTION("Match for DCCP protocol packets"); | ||
176 | |||
diff --git a/net/ipv4/netfilter/ipt_hashlimit.c b/net/ipv4/netfilter/ipt_hashlimit.c index 564b49bfebcf..2dd1cccbdab9 100644 --- a/net/ipv4/netfilter/ipt_hashlimit.c +++ b/net/ipv4/netfilter/ipt_hashlimit.c | |||
@@ -94,7 +94,7 @@ struct ipt_hashlimit_htable { | |||
94 | static DEFINE_SPINLOCK(hashlimit_lock); /* protects htables list */ | 94 | static DEFINE_SPINLOCK(hashlimit_lock); /* protects htables list */ |
95 | static DECLARE_MUTEX(hlimit_mutex); /* additional checkentry protection */ | 95 | static DECLARE_MUTEX(hlimit_mutex); /* additional checkentry protection */ |
96 | static HLIST_HEAD(hashlimit_htables); | 96 | static HLIST_HEAD(hashlimit_htables); |
97 | static kmem_cache_t *hashlimit_cachep; | 97 | static kmem_cache_t *hashlimit_cachep __read_mostly; |
98 | 98 | ||
99 | static inline int dst_cmp(const struct dsthash_ent *ent, struct dsthash_dst *b) | 99 | static inline int dst_cmp(const struct dsthash_ent *ent, struct dsthash_dst *b) |
100 | { | 100 | { |
diff --git a/net/ipv4/netfilter/ipt_mark.c b/net/ipv4/netfilter/ipt_mark.c index 8955728127b9..00bef6cdd3f8 100644 --- a/net/ipv4/netfilter/ipt_mark.c +++ b/net/ipv4/netfilter/ipt_mark.c | |||
@@ -37,9 +37,16 @@ checkentry(const char *tablename, | |||
37 | unsigned int matchsize, | 37 | unsigned int matchsize, |
38 | unsigned int hook_mask) | 38 | unsigned int hook_mask) |
39 | { | 39 | { |
40 | struct ipt_mark_info *minfo = (struct ipt_mark_info *) matchinfo; | ||
41 | |||
40 | if (matchsize != IPT_ALIGN(sizeof(struct ipt_mark_info))) | 42 | if (matchsize != IPT_ALIGN(sizeof(struct ipt_mark_info))) |
41 | return 0; | 43 | return 0; |
42 | 44 | ||
45 | if (minfo->mark > 0xffffffff || minfo->mask > 0xffffffff) { | ||
46 | printk(KERN_WARNING "mark: only supports 32bit mark\n"); | ||
47 | return 0; | ||
48 | } | ||
49 | |||
43 | return 1; | 50 | return 1; |
44 | } | 51 | } |
45 | 52 | ||
diff --git a/net/ipv4/netfilter/ipt_owner.c b/net/ipv4/netfilter/ipt_owner.c index 3b9065e06381..c1889f88262b 100644 --- a/net/ipv4/netfilter/ipt_owner.c +++ b/net/ipv4/netfilter/ipt_owner.c | |||
@@ -21,106 +21,6 @@ MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>"); | |||
21 | MODULE_DESCRIPTION("iptables owner match"); | 21 | MODULE_DESCRIPTION("iptables owner match"); |
22 | 22 | ||
23 | static int | 23 | static int |
24 | match_comm(const struct sk_buff *skb, const char *comm) | ||
25 | { | ||
26 | struct task_struct *g, *p; | ||
27 | struct files_struct *files; | ||
28 | int i; | ||
29 | |||
30 | read_lock(&tasklist_lock); | ||
31 | do_each_thread(g, p) { | ||
32 | if(strncmp(p->comm, comm, sizeof(p->comm))) | ||
33 | continue; | ||
34 | |||
35 | task_lock(p); | ||
36 | files = p->files; | ||
37 | if(files) { | ||
38 | spin_lock(&files->file_lock); | ||
39 | for (i=0; i < files->max_fds; i++) { | ||
40 | if (fcheck_files(files, i) == | ||
41 | skb->sk->sk_socket->file) { | ||
42 | spin_unlock(&files->file_lock); | ||
43 | task_unlock(p); | ||
44 | read_unlock(&tasklist_lock); | ||
45 | return 1; | ||
46 | } | ||
47 | } | ||
48 | spin_unlock(&files->file_lock); | ||
49 | } | ||
50 | task_unlock(p); | ||
51 | } while_each_thread(g, p); | ||
52 | read_unlock(&tasklist_lock); | ||
53 | return 0; | ||
54 | } | ||
55 | |||
56 | static int | ||
57 | match_pid(const struct sk_buff *skb, pid_t pid) | ||
58 | { | ||
59 | struct task_struct *p; | ||
60 | struct files_struct *files; | ||
61 | int i; | ||
62 | |||
63 | read_lock(&tasklist_lock); | ||
64 | p = find_task_by_pid(pid); | ||
65 | if (!p) | ||
66 | goto out; | ||
67 | task_lock(p); | ||
68 | files = p->files; | ||
69 | if(files) { | ||
70 | spin_lock(&files->file_lock); | ||
71 | for (i=0; i < files->max_fds; i++) { | ||
72 | if (fcheck_files(files, i) == | ||
73 | skb->sk->sk_socket->file) { | ||
74 | spin_unlock(&files->file_lock); | ||
75 | task_unlock(p); | ||
76 | read_unlock(&tasklist_lock); | ||
77 | return 1; | ||
78 | } | ||
79 | } | ||
80 | spin_unlock(&files->file_lock); | ||
81 | } | ||
82 | task_unlock(p); | ||
83 | out: | ||
84 | read_unlock(&tasklist_lock); | ||
85 | return 0; | ||
86 | } | ||
87 | |||
88 | static int | ||
89 | match_sid(const struct sk_buff *skb, pid_t sid) | ||
90 | { | ||
91 | struct task_struct *g, *p; | ||
92 | struct file *file = skb->sk->sk_socket->file; | ||
93 | int i, found=0; | ||
94 | |||
95 | read_lock(&tasklist_lock); | ||
96 | do_each_thread(g, p) { | ||
97 | struct files_struct *files; | ||
98 | if (p->signal->session != sid) | ||
99 | continue; | ||
100 | |||
101 | task_lock(p); | ||
102 | files = p->files; | ||
103 | if (files) { | ||
104 | spin_lock(&files->file_lock); | ||
105 | for (i=0; i < files->max_fds; i++) { | ||
106 | if (fcheck_files(files, i) == file) { | ||
107 | found = 1; | ||
108 | break; | ||
109 | } | ||
110 | } | ||
111 | spin_unlock(&files->file_lock); | ||
112 | } | ||
113 | task_unlock(p); | ||
114 | if (found) | ||
115 | goto out; | ||
116 | } while_each_thread(g, p); | ||
117 | out: | ||
118 | read_unlock(&tasklist_lock); | ||
119 | |||
120 | return found; | ||
121 | } | ||
122 | |||
123 | static int | ||
124 | match(const struct sk_buff *skb, | 24 | match(const struct sk_buff *skb, |
125 | const struct net_device *in, | 25 | const struct net_device *in, |
126 | const struct net_device *out, | 26 | const struct net_device *out, |
@@ -145,24 +45,6 @@ match(const struct sk_buff *skb, | |||
145 | return 0; | 45 | return 0; |
146 | } | 46 | } |
147 | 47 | ||
148 | if(info->match & IPT_OWNER_PID) { | ||
149 | if (!match_pid(skb, info->pid) ^ | ||
150 | !!(info->invert & IPT_OWNER_PID)) | ||
151 | return 0; | ||
152 | } | ||
153 | |||
154 | if(info->match & IPT_OWNER_SID) { | ||
155 | if (!match_sid(skb, info->sid) ^ | ||
156 | !!(info->invert & IPT_OWNER_SID)) | ||
157 | return 0; | ||
158 | } | ||
159 | |||
160 | if(info->match & IPT_OWNER_COMM) { | ||
161 | if (!match_comm(skb, info->comm) ^ | ||
162 | !!(info->invert & IPT_OWNER_COMM)) | ||
163 | return 0; | ||
164 | } | ||
165 | |||
166 | return 1; | 48 | return 1; |
167 | } | 49 | } |
168 | 50 | ||
@@ -173,6 +55,8 @@ checkentry(const char *tablename, | |||
173 | unsigned int matchsize, | 55 | unsigned int matchsize, |
174 | unsigned int hook_mask) | 56 | unsigned int hook_mask) |
175 | { | 57 | { |
58 | const struct ipt_owner_info *info = matchinfo; | ||
59 | |||
176 | if (hook_mask | 60 | if (hook_mask |
177 | & ~((1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_POST_ROUTING))) { | 61 | & ~((1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_POST_ROUTING))) { |
178 | printk("ipt_owner: only valid for LOCAL_OUT or POST_ROUTING.\n"); | 62 | printk("ipt_owner: only valid for LOCAL_OUT or POST_ROUTING.\n"); |
@@ -184,15 +68,13 @@ checkentry(const char *tablename, | |||
184 | IPT_ALIGN(sizeof(struct ipt_owner_info))); | 68 | IPT_ALIGN(sizeof(struct ipt_owner_info))); |
185 | return 0; | 69 | return 0; |
186 | } | 70 | } |
187 | #ifdef CONFIG_SMP | 71 | |
188 | /* files->file_lock can not be used in a BH */ | 72 | if (info->match & (IPT_OWNER_PID|IPT_OWNER_SID|IPT_OWNER_COMM)) { |
189 | if (((struct ipt_owner_info *)matchinfo)->match | 73 | printk("ipt_owner: pid, sid and command matching " |
190 | & (IPT_OWNER_PID|IPT_OWNER_SID|IPT_OWNER_COMM)) { | 74 | "not supported anymore\n"); |
191 | printk("ipt_owner: pid, sid and command matching is broken " | ||
192 | "on SMP.\n"); | ||
193 | return 0; | 75 | return 0; |
194 | } | 76 | } |
195 | #endif | 77 | |
196 | return 1; | 78 | return 1; |
197 | } | 79 | } |
198 | 80 | ||
diff --git a/net/ipv4/netfilter/ipt_string.c b/net/ipv4/netfilter/ipt_string.c new file mode 100644 index 000000000000..b5def204d798 --- /dev/null +++ b/net/ipv4/netfilter/ipt_string.c | |||
@@ -0,0 +1,91 @@ | |||
1 | /* String matching match for iptables | ||
2 | * | ||
3 | * (C) 2005 Pablo Neira Ayuso <pablo@eurodev.net> | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License version 2 as | ||
7 | * published by the Free Software Foundation. | ||
8 | */ | ||
9 | |||
10 | #include <linux/init.h> | ||
11 | #include <linux/module.h> | ||
12 | #include <linux/kernel.h> | ||
13 | #include <linux/skbuff.h> | ||
14 | #include <linux/netfilter_ipv4/ip_tables.h> | ||
15 | #include <linux/netfilter_ipv4/ipt_string.h> | ||
16 | #include <linux/textsearch.h> | ||
17 | |||
18 | MODULE_AUTHOR("Pablo Neira Ayuso <pablo@eurodev.net>"); | ||
19 | MODULE_DESCRIPTION("IP tables string match module"); | ||
20 | MODULE_LICENSE("GPL"); | ||
21 | |||
22 | static int match(const struct sk_buff *skb, | ||
23 | const struct net_device *in, | ||
24 | const struct net_device *out, | ||
25 | const void *matchinfo, | ||
26 | int offset, | ||
27 | int *hotdrop) | ||
28 | { | ||
29 | struct ts_state state; | ||
30 | struct ipt_string_info *conf = (struct ipt_string_info *) matchinfo; | ||
31 | |||
32 | memset(&state, 0, sizeof(struct ts_state)); | ||
33 | |||
34 | return (skb_find_text((struct sk_buff *)skb, conf->from_offset, | ||
35 | conf->to_offset, conf->config, &state) | ||
36 | != UINT_MAX) && !conf->invert; | ||
37 | } | ||
38 | |||
39 | #define STRING_TEXT_PRIV(m) ((struct ipt_string_info *) m) | ||
40 | |||
41 | static int checkentry(const char *tablename, | ||
42 | const struct ipt_ip *ip, | ||
43 | void *matchinfo, | ||
44 | unsigned int matchsize, | ||
45 | unsigned int hook_mask) | ||
46 | { | ||
47 | struct ipt_string_info *conf = matchinfo; | ||
48 | struct ts_config *ts_conf; | ||
49 | |||
50 | if (matchsize != IPT_ALIGN(sizeof(struct ipt_string_info))) | ||
51 | return 0; | ||
52 | |||
53 | /* Damn, can't handle this case properly with iptables... */ | ||
54 | if (conf->from_offset > conf->to_offset) | ||
55 | return 0; | ||
56 | |||
57 | ts_conf = textsearch_prepare(conf->algo, conf->pattern, conf->patlen, | ||
58 | GFP_KERNEL, TS_AUTOLOAD); | ||
59 | if (IS_ERR(ts_conf)) | ||
60 | return 0; | ||
61 | |||
62 | conf->config = ts_conf; | ||
63 | |||
64 | return 1; | ||
65 | } | ||
66 | |||
67 | static void destroy(void *matchinfo, unsigned int matchsize) | ||
68 | { | ||
69 | textsearch_destroy(STRING_TEXT_PRIV(matchinfo)->config); | ||
70 | } | ||
71 | |||
72 | static struct ipt_match string_match = { | ||
73 | .name = "string", | ||
74 | .match = match, | ||
75 | .checkentry = checkentry, | ||
76 | .destroy = destroy, | ||
77 | .me = THIS_MODULE | ||
78 | }; | ||
79 | |||
80 | static int __init init(void) | ||
81 | { | ||
82 | return ipt_register_match(&string_match); | ||
83 | } | ||
84 | |||
85 | static void __exit fini(void) | ||
86 | { | ||
87 | ipt_unregister_match(&string_match); | ||
88 | } | ||
89 | |||
90 | module_init(init); | ||
91 | module_exit(fini); | ||
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 912bbcc7f415..f7943ba1f43c 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c | |||
@@ -59,13 +59,10 @@ static int fold_prot_inuse(struct proto *proto) | |||
59 | */ | 59 | */ |
60 | static int sockstat_seq_show(struct seq_file *seq, void *v) | 60 | static int sockstat_seq_show(struct seq_file *seq, void *v) |
61 | { | 61 | { |
62 | /* From net/socket.c */ | ||
63 | extern void socket_seq_show(struct seq_file *seq); | ||
64 | |||
65 | socket_seq_show(seq); | 62 | socket_seq_show(seq); |
66 | seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n", | 63 | seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n", |
67 | fold_prot_inuse(&tcp_prot), atomic_read(&tcp_orphan_count), | 64 | fold_prot_inuse(&tcp_prot), atomic_read(&tcp_orphan_count), |
68 | tcp_tw_count, atomic_read(&tcp_sockets_allocated), | 65 | tcp_death_row.tw_count, atomic_read(&tcp_sockets_allocated), |
69 | atomic_read(&tcp_memory_allocated)); | 66 | atomic_read(&tcp_memory_allocated)); |
70 | seq_printf(seq, "UDP: inuse %d\n", fold_prot_inuse(&udp_prot)); | 67 | seq_printf(seq, "UDP: inuse %d\n", fold_prot_inuse(&udp_prot)); |
71 | seq_printf(seq, "RAW: inuse %d\n", fold_prot_inuse(&raw_prot)); | 68 | seq_printf(seq, "RAW: inuse %d\n", fold_prot_inuse(&raw_prot)); |
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c index 0db405a869f2..291831e792af 100644 --- a/net/ipv4/protocol.c +++ b/net/ipv4/protocol.c | |||
@@ -40,7 +40,6 @@ | |||
40 | #include <linux/timer.h> | 40 | #include <linux/timer.h> |
41 | #include <net/ip.h> | 41 | #include <net/ip.h> |
42 | #include <net/protocol.h> | 42 | #include <net/protocol.h> |
43 | #include <net/tcp.h> | ||
44 | #include <linux/skbuff.h> | 43 | #include <linux/skbuff.h> |
45 | #include <net/sock.h> | 44 | #include <net/sock.h> |
46 | #include <net/icmp.h> | 45 | #include <net/icmp.h> |
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index d1835b1bc8c4..304bb0a1d4f0 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c | |||
@@ -59,7 +59,6 @@ | |||
59 | #include <linux/netdevice.h> | 59 | #include <linux/netdevice.h> |
60 | #include <linux/in_route.h> | 60 | #include <linux/in_route.h> |
61 | #include <linux/route.h> | 61 | #include <linux/route.h> |
62 | #include <linux/tcp.h> | ||
63 | #include <linux/skbuff.h> | 62 | #include <linux/skbuff.h> |
64 | #include <net/dst.h> | 63 | #include <net/dst.h> |
65 | #include <net/sock.h> | 64 | #include <net/sock.h> |
@@ -71,6 +70,7 @@ | |||
71 | #include <net/udp.h> | 70 | #include <net/udp.h> |
72 | #include <net/raw.h> | 71 | #include <net/raw.h> |
73 | #include <net/snmp.h> | 72 | #include <net/snmp.h> |
73 | #include <net/tcp_states.h> | ||
74 | #include <net/inet_common.h> | 74 | #include <net/inet_common.h> |
75 | #include <net/checksum.h> | 75 | #include <net/checksum.h> |
76 | #include <net/xfrm.h> | 76 | #include <net/xfrm.h> |
@@ -150,10 +150,11 @@ static __inline__ int icmp_filter(struct sock *sk, struct sk_buff *skb) | |||
150 | * RFC 1122: SHOULD pass TOS value up to the transport layer. | 150 | * RFC 1122: SHOULD pass TOS value up to the transport layer. |
151 | * -> It does. And not only TOS, but all IP header. | 151 | * -> It does. And not only TOS, but all IP header. |
152 | */ | 152 | */ |
153 | void raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash) | 153 | int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash) |
154 | { | 154 | { |
155 | struct sock *sk; | 155 | struct sock *sk; |
156 | struct hlist_head *head; | 156 | struct hlist_head *head; |
157 | int delivered = 0; | ||
157 | 158 | ||
158 | read_lock(&raw_v4_lock); | 159 | read_lock(&raw_v4_lock); |
159 | head = &raw_v4_htable[hash]; | 160 | head = &raw_v4_htable[hash]; |
@@ -164,6 +165,7 @@ void raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash) | |||
164 | skb->dev->ifindex); | 165 | skb->dev->ifindex); |
165 | 166 | ||
166 | while (sk) { | 167 | while (sk) { |
168 | delivered = 1; | ||
167 | if (iph->protocol != IPPROTO_ICMP || !icmp_filter(sk, skb)) { | 169 | if (iph->protocol != IPPROTO_ICMP || !icmp_filter(sk, skb)) { |
168 | struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC); | 170 | struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC); |
169 | 171 | ||
@@ -177,6 +179,7 @@ void raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash) | |||
177 | } | 179 | } |
178 | out: | 180 | out: |
179 | read_unlock(&raw_v4_lock); | 181 | read_unlock(&raw_v4_lock); |
182 | return delivered; | ||
180 | } | 183 | } |
181 | 184 | ||
182 | void raw_err (struct sock *sk, struct sk_buff *skb, u32 info) | 185 | void raw_err (struct sock *sk, struct sk_buff *skb, u32 info) |
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index d675ff80b04d..8c0b14e3beec 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -240,7 +240,9 @@ static unsigned rt_hash_mask; | |||
240 | static int rt_hash_log; | 240 | static int rt_hash_log; |
241 | static unsigned int rt_hash_rnd; | 241 | static unsigned int rt_hash_rnd; |
242 | 242 | ||
243 | struct rt_cache_stat *rt_cache_stat; | 243 | static struct rt_cache_stat *rt_cache_stat; |
244 | #define RT_CACHE_STAT_INC(field) \ | ||
245 | (per_cpu_ptr(rt_cache_stat, raw_smp_processor_id())->field++) | ||
244 | 246 | ||
245 | static int rt_intern_hash(unsigned hash, struct rtable *rth, | 247 | static int rt_intern_hash(unsigned hash, struct rtable *rth, |
246 | struct rtable **res); | 248 | struct rtable **res); |
@@ -2600,6 +2602,8 @@ int __ip_route_output_key(struct rtable **rp, const struct flowi *flp) | |||
2600 | return ip_route_output_slow(rp, flp); | 2602 | return ip_route_output_slow(rp, flp); |
2601 | } | 2603 | } |
2602 | 2604 | ||
2605 | EXPORT_SYMBOL_GPL(__ip_route_output_key); | ||
2606 | |||
2603 | int ip_route_output_flow(struct rtable **rp, struct flowi *flp, struct sock *sk, int flags) | 2607 | int ip_route_output_flow(struct rtable **rp, struct flowi *flp, struct sock *sk, int flags) |
2604 | { | 2608 | { |
2605 | int err; | 2609 | int err; |
@@ -2618,6 +2622,8 @@ int ip_route_output_flow(struct rtable **rp, struct flowi *flp, struct sock *sk, | |||
2618 | return 0; | 2622 | return 0; |
2619 | } | 2623 | } |
2620 | 2624 | ||
2625 | EXPORT_SYMBOL_GPL(ip_route_output_flow); | ||
2626 | |||
2621 | int ip_route_output_key(struct rtable **rp, struct flowi *flp) | 2627 | int ip_route_output_key(struct rtable **rp, struct flowi *flp) |
2622 | { | 2628 | { |
2623 | return ip_route_output_flow(rp, flp, NULL, 0); | 2629 | return ip_route_output_flow(rp, flp, NULL, 0); |
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 72d014442185..a34e60ea48a1 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c | |||
@@ -169,8 +169,6 @@ static inline int cookie_check(struct sk_buff *skb, __u32 cookie) | |||
169 | return mssind < NUM_MSS ? msstab[mssind] + 1 : 0; | 169 | return mssind < NUM_MSS ? msstab[mssind] + 1 : 0; |
170 | } | 170 | } |
171 | 171 | ||
172 | extern struct request_sock_ops tcp_request_sock_ops; | ||
173 | |||
174 | static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, | 172 | static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, |
175 | struct request_sock *req, | 173 | struct request_sock *req, |
176 | struct dst_entry *dst) | 174 | struct dst_entry *dst) |
@@ -180,7 +178,7 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, | |||
180 | 178 | ||
181 | child = tp->af_specific->syn_recv_sock(sk, skb, req, dst); | 179 | child = tp->af_specific->syn_recv_sock(sk, skb, req, dst); |
182 | if (child) | 180 | if (child) |
183 | tcp_acceptq_queue(sk, req, child); | 181 | inet_csk_reqsk_queue_add(sk, req, child); |
184 | else | 182 | else |
185 | reqsk_free(req); | 183 | reqsk_free(req); |
186 | 184 | ||
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index e32894532416..652685623519 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c | |||
@@ -11,7 +11,9 @@ | |||
11 | #include <linux/module.h> | 11 | #include <linux/module.h> |
12 | #include <linux/sysctl.h> | 12 | #include <linux/sysctl.h> |
13 | #include <linux/config.h> | 13 | #include <linux/config.h> |
14 | #include <linux/igmp.h> | ||
14 | #include <net/snmp.h> | 15 | #include <net/snmp.h> |
16 | #include <net/icmp.h> | ||
15 | #include <net/ip.h> | 17 | #include <net/ip.h> |
16 | #include <net/route.h> | 18 | #include <net/route.h> |
17 | #include <net/tcp.h> | 19 | #include <net/tcp.h> |
@@ -19,36 +21,6 @@ | |||
19 | /* From af_inet.c */ | 21 | /* From af_inet.c */ |
20 | extern int sysctl_ip_nonlocal_bind; | 22 | extern int sysctl_ip_nonlocal_bind; |
21 | 23 | ||
22 | /* From icmp.c */ | ||
23 | extern int sysctl_icmp_echo_ignore_all; | ||
24 | extern int sysctl_icmp_echo_ignore_broadcasts; | ||
25 | extern int sysctl_icmp_ignore_bogus_error_responses; | ||
26 | extern int sysctl_icmp_errors_use_inbound_ifaddr; | ||
27 | |||
28 | /* From ip_fragment.c */ | ||
29 | extern int sysctl_ipfrag_low_thresh; | ||
30 | extern int sysctl_ipfrag_high_thresh; | ||
31 | extern int sysctl_ipfrag_time; | ||
32 | extern int sysctl_ipfrag_secret_interval; | ||
33 | |||
34 | /* From ip_output.c */ | ||
35 | extern int sysctl_ip_dynaddr; | ||
36 | |||
37 | /* From icmp.c */ | ||
38 | extern int sysctl_icmp_ratelimit; | ||
39 | extern int sysctl_icmp_ratemask; | ||
40 | |||
41 | /* From igmp.c */ | ||
42 | extern int sysctl_igmp_max_memberships; | ||
43 | extern int sysctl_igmp_max_msf; | ||
44 | |||
45 | /* From inetpeer.c */ | ||
46 | extern int inet_peer_threshold; | ||
47 | extern int inet_peer_minttl; | ||
48 | extern int inet_peer_maxttl; | ||
49 | extern int inet_peer_gc_mintime; | ||
50 | extern int inet_peer_gc_maxtime; | ||
51 | |||
52 | #ifdef CONFIG_SYSCTL | 24 | #ifdef CONFIG_SYSCTL |
53 | static int tcp_retr1_max = 255; | 25 | static int tcp_retr1_max = 255; |
54 | static int ip_local_port_range_min[] = { 1, 1 }; | 26 | static int ip_local_port_range_min[] = { 1, 1 }; |
@@ -57,8 +29,6 @@ static int ip_local_port_range_max[] = { 65535, 65535 }; | |||
57 | 29 | ||
58 | struct ipv4_config ipv4_config; | 30 | struct ipv4_config ipv4_config; |
59 | 31 | ||
60 | extern ctl_table ipv4_route_table[]; | ||
61 | |||
62 | #ifdef CONFIG_SYSCTL | 32 | #ifdef CONFIG_SYSCTL |
63 | 33 | ||
64 | static | 34 | static |
@@ -136,10 +106,11 @@ static int proc_tcp_congestion_control(ctl_table *ctl, int write, struct file * | |||
136 | return ret; | 106 | return ret; |
137 | } | 107 | } |
138 | 108 | ||
139 | int sysctl_tcp_congestion_control(ctl_table *table, int __user *name, int nlen, | 109 | static int sysctl_tcp_congestion_control(ctl_table *table, int __user *name, |
140 | void __user *oldval, size_t __user *oldlenp, | 110 | int nlen, void __user *oldval, |
141 | void __user *newval, size_t newlen, | 111 | size_t __user *oldlenp, |
142 | void **context) | 112 | void __user *newval, size_t newlen, |
113 | void **context) | ||
143 | { | 114 | { |
144 | char val[TCP_CA_NAME_MAX]; | 115 | char val[TCP_CA_NAME_MAX]; |
145 | ctl_table tbl = { | 116 | ctl_table tbl = { |
@@ -259,7 +230,7 @@ ctl_table ipv4_table[] = { | |||
259 | { | 230 | { |
260 | .ctl_name = NET_TCP_MAX_TW_BUCKETS, | 231 | .ctl_name = NET_TCP_MAX_TW_BUCKETS, |
261 | .procname = "tcp_max_tw_buckets", | 232 | .procname = "tcp_max_tw_buckets", |
262 | .data = &sysctl_tcp_max_tw_buckets, | 233 | .data = &tcp_death_row.sysctl_max_tw_buckets, |
263 | .maxlen = sizeof(int), | 234 | .maxlen = sizeof(int), |
264 | .mode = 0644, | 235 | .mode = 0644, |
265 | .proc_handler = &proc_dointvec | 236 | .proc_handler = &proc_dointvec |
@@ -363,7 +334,7 @@ ctl_table ipv4_table[] = { | |||
363 | { | 334 | { |
364 | .ctl_name = NET_TCP_TW_RECYCLE, | 335 | .ctl_name = NET_TCP_TW_RECYCLE, |
365 | .procname = "tcp_tw_recycle", | 336 | .procname = "tcp_tw_recycle", |
366 | .data = &sysctl_tcp_tw_recycle, | 337 | .data = &tcp_death_row.sysctl_tw_recycle, |
367 | .maxlen = sizeof(int), | 338 | .maxlen = sizeof(int), |
368 | .mode = 0644, | 339 | .mode = 0644, |
369 | .proc_handler = &proc_dointvec | 340 | .proc_handler = &proc_dointvec |
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 69b1fcf70077..02fdda68718d 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -269,13 +269,12 @@ | |||
269 | 269 | ||
270 | int sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT; | 270 | int sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT; |
271 | 271 | ||
272 | DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics); | 272 | DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics) __read_mostly; |
273 | |||
274 | kmem_cache_t *tcp_bucket_cachep; | ||
275 | kmem_cache_t *tcp_timewait_cachep; | ||
276 | 273 | ||
277 | atomic_t tcp_orphan_count = ATOMIC_INIT(0); | 274 | atomic_t tcp_orphan_count = ATOMIC_INIT(0); |
278 | 275 | ||
276 | EXPORT_SYMBOL_GPL(tcp_orphan_count); | ||
277 | |||
279 | int sysctl_tcp_mem[3]; | 278 | int sysctl_tcp_mem[3]; |
280 | int sysctl_tcp_wmem[3] = { 4 * 1024, 16 * 1024, 128 * 1024 }; | 279 | int sysctl_tcp_wmem[3] = { 4 * 1024, 16 * 1024, 128 * 1024 }; |
281 | int sysctl_tcp_rmem[3] = { 4 * 1024, 87380, 87380 * 2 }; | 280 | int sysctl_tcp_rmem[3] = { 4 * 1024, 87380, 87380 * 2 }; |
@@ -311,15 +310,6 @@ void tcp_enter_memory_pressure(void) | |||
311 | EXPORT_SYMBOL(tcp_enter_memory_pressure); | 310 | EXPORT_SYMBOL(tcp_enter_memory_pressure); |
312 | 311 | ||
313 | /* | 312 | /* |
314 | * LISTEN is a special case for poll.. | ||
315 | */ | ||
316 | static __inline__ unsigned int tcp_listen_poll(struct sock *sk, | ||
317 | poll_table *wait) | ||
318 | { | ||
319 | return !reqsk_queue_empty(&tcp_sk(sk)->accept_queue) ? (POLLIN | POLLRDNORM) : 0; | ||
320 | } | ||
321 | |||
322 | /* | ||
323 | * Wait for a TCP event. | 313 | * Wait for a TCP event. |
324 | * | 314 | * |
325 | * Note that we don't need to lock the socket, as the upper poll layers | 315 | * Note that we don't need to lock the socket, as the upper poll layers |
@@ -334,7 +324,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) | |||
334 | 324 | ||
335 | poll_wait(file, sk->sk_sleep, wait); | 325 | poll_wait(file, sk->sk_sleep, wait); |
336 | if (sk->sk_state == TCP_LISTEN) | 326 | if (sk->sk_state == TCP_LISTEN) |
337 | return tcp_listen_poll(sk, wait); | 327 | return inet_csk_listen_poll(sk); |
338 | 328 | ||
339 | /* Socket is not locked. We are protected from async events | 329 | /* Socket is not locked. We are protected from async events |
340 | by poll logic and correct handling of state changes | 330 | by poll logic and correct handling of state changes |
@@ -457,109 +447,6 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) | |||
457 | return put_user(answ, (int __user *)arg); | 447 | return put_user(answ, (int __user *)arg); |
458 | } | 448 | } |
459 | 449 | ||
460 | |||
461 | int tcp_listen_start(struct sock *sk) | ||
462 | { | ||
463 | struct inet_sock *inet = inet_sk(sk); | ||
464 | struct tcp_sock *tp = tcp_sk(sk); | ||
465 | int rc = reqsk_queue_alloc(&tp->accept_queue, TCP_SYNQ_HSIZE); | ||
466 | |||
467 | if (rc != 0) | ||
468 | return rc; | ||
469 | |||
470 | sk->sk_max_ack_backlog = 0; | ||
471 | sk->sk_ack_backlog = 0; | ||
472 | tcp_delack_init(tp); | ||
473 | |||
474 | /* There is race window here: we announce ourselves listening, | ||
475 | * but this transition is still not validated by get_port(). | ||
476 | * It is OK, because this socket enters to hash table only | ||
477 | * after validation is complete. | ||
478 | */ | ||
479 | sk->sk_state = TCP_LISTEN; | ||
480 | if (!sk->sk_prot->get_port(sk, inet->num)) { | ||
481 | inet->sport = htons(inet->num); | ||
482 | |||
483 | sk_dst_reset(sk); | ||
484 | sk->sk_prot->hash(sk); | ||
485 | |||
486 | return 0; | ||
487 | } | ||
488 | |||
489 | sk->sk_state = TCP_CLOSE; | ||
490 | reqsk_queue_destroy(&tp->accept_queue); | ||
491 | return -EADDRINUSE; | ||
492 | } | ||
493 | |||
494 | /* | ||
495 | * This routine closes sockets which have been at least partially | ||
496 | * opened, but not yet accepted. | ||
497 | */ | ||
498 | |||
499 | static void tcp_listen_stop (struct sock *sk) | ||
500 | { | ||
501 | struct tcp_sock *tp = tcp_sk(sk); | ||
502 | struct listen_sock *lopt; | ||
503 | struct request_sock *acc_req; | ||
504 | struct request_sock *req; | ||
505 | int i; | ||
506 | |||
507 | tcp_delete_keepalive_timer(sk); | ||
508 | |||
509 | /* make all the listen_opt local to us */ | ||
510 | lopt = reqsk_queue_yank_listen_sk(&tp->accept_queue); | ||
511 | acc_req = reqsk_queue_yank_acceptq(&tp->accept_queue); | ||
512 | |||
513 | if (lopt->qlen) { | ||
514 | for (i = 0; i < TCP_SYNQ_HSIZE; i++) { | ||
515 | while ((req = lopt->syn_table[i]) != NULL) { | ||
516 | lopt->syn_table[i] = req->dl_next; | ||
517 | lopt->qlen--; | ||
518 | reqsk_free(req); | ||
519 | |||
520 | /* Following specs, it would be better either to send FIN | ||
521 | * (and enter FIN-WAIT-1, it is normal close) | ||
522 | * or to send active reset (abort). | ||
523 | * Certainly, it is pretty dangerous while synflood, but it is | ||
524 | * bad justification for our negligence 8) | ||
525 | * To be honest, we are not able to make either | ||
526 | * of the variants now. --ANK | ||
527 | */ | ||
528 | } | ||
529 | } | ||
530 | } | ||
531 | BUG_TRAP(!lopt->qlen); | ||
532 | |||
533 | kfree(lopt); | ||
534 | |||
535 | while ((req = acc_req) != NULL) { | ||
536 | struct sock *child = req->sk; | ||
537 | |||
538 | acc_req = req->dl_next; | ||
539 | |||
540 | local_bh_disable(); | ||
541 | bh_lock_sock(child); | ||
542 | BUG_TRAP(!sock_owned_by_user(child)); | ||
543 | sock_hold(child); | ||
544 | |||
545 | tcp_disconnect(child, O_NONBLOCK); | ||
546 | |||
547 | sock_orphan(child); | ||
548 | |||
549 | atomic_inc(&tcp_orphan_count); | ||
550 | |||
551 | tcp_destroy_sock(child); | ||
552 | |||
553 | bh_unlock_sock(child); | ||
554 | local_bh_enable(); | ||
555 | sock_put(child); | ||
556 | |||
557 | sk_acceptq_removed(sk); | ||
558 | __reqsk_free(req); | ||
559 | } | ||
560 | BUG_TRAP(!sk->sk_ack_backlog); | ||
561 | } | ||
562 | |||
563 | static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb) | 450 | static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb) |
564 | { | 451 | { |
565 | TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; | 452 | TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; |
@@ -975,7 +862,7 @@ do_fault: | |||
975 | if (!skb->len) { | 862 | if (!skb->len) { |
976 | if (sk->sk_send_head == skb) | 863 | if (sk->sk_send_head == skb) |
977 | sk->sk_send_head = NULL; | 864 | sk->sk_send_head = NULL; |
978 | __skb_unlink(skb, skb->list); | 865 | __skb_unlink(skb, &sk->sk_write_queue); |
979 | sk_stream_free_skb(sk, skb); | 866 | sk_stream_free_skb(sk, skb); |
980 | } | 867 | } |
981 | 868 | ||
@@ -1057,20 +944,21 @@ static void cleanup_rbuf(struct sock *sk, int copied) | |||
1057 | BUG_TRAP(!skb || before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq)); | 944 | BUG_TRAP(!skb || before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq)); |
1058 | #endif | 945 | #endif |
1059 | 946 | ||
1060 | if (tcp_ack_scheduled(tp)) { | 947 | if (inet_csk_ack_scheduled(sk)) { |
948 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
1061 | /* Delayed ACKs frequently hit locked sockets during bulk | 949 | /* Delayed ACKs frequently hit locked sockets during bulk |
1062 | * receive. */ | 950 | * receive. */ |
1063 | if (tp->ack.blocked || | 951 | if (icsk->icsk_ack.blocked || |
1064 | /* Once-per-two-segments ACK was not sent by tcp_input.c */ | 952 | /* Once-per-two-segments ACK was not sent by tcp_input.c */ |
1065 | tp->rcv_nxt - tp->rcv_wup > tp->ack.rcv_mss || | 953 | tp->rcv_nxt - tp->rcv_wup > icsk->icsk_ack.rcv_mss || |
1066 | /* | 954 | /* |
1067 | * If this read emptied read buffer, we send ACK, if | 955 | * If this read emptied read buffer, we send ACK, if |
1068 | * connection is not bidirectional, user drained | 956 | * connection is not bidirectional, user drained |
1069 | * receive buffer and there was a small segment | 957 | * receive buffer and there was a small segment |
1070 | * in queue. | 958 | * in queue. |
1071 | */ | 959 | */ |
1072 | (copied > 0 && (tp->ack.pending & TCP_ACK_PUSHED) && | 960 | (copied > 0 && (icsk->icsk_ack.pending & ICSK_ACK_PUSHED) && |
1073 | !tp->ack.pingpong && !atomic_read(&sk->sk_rmem_alloc))) | 961 | !icsk->icsk_ack.pingpong && !atomic_read(&sk->sk_rmem_alloc))) |
1074 | time_to_ack = 1; | 962 | time_to_ack = 1; |
1075 | } | 963 | } |
1076 | 964 | ||
@@ -1572,40 +1460,6 @@ void tcp_shutdown(struct sock *sk, int how) | |||
1572 | } | 1460 | } |
1573 | } | 1461 | } |
1574 | 1462 | ||
1575 | /* | ||
1576 | * At this point, there should be no process reference to this | ||
1577 | * socket, and thus no user references at all. Therefore we | ||
1578 | * can assume the socket waitqueue is inactive and nobody will | ||
1579 | * try to jump onto it. | ||
1580 | */ | ||
1581 | void tcp_destroy_sock(struct sock *sk) | ||
1582 | { | ||
1583 | BUG_TRAP(sk->sk_state == TCP_CLOSE); | ||
1584 | BUG_TRAP(sock_flag(sk, SOCK_DEAD)); | ||
1585 | |||
1586 | /* It cannot be in hash table! */ | ||
1587 | BUG_TRAP(sk_unhashed(sk)); | ||
1588 | |||
1589 | /* If it has not 0 inet_sk(sk)->num, it must be bound */ | ||
1590 | BUG_TRAP(!inet_sk(sk)->num || tcp_sk(sk)->bind_hash); | ||
1591 | |||
1592 | sk->sk_prot->destroy(sk); | ||
1593 | |||
1594 | sk_stream_kill_queues(sk); | ||
1595 | |||
1596 | xfrm_sk_free_policy(sk); | ||
1597 | |||
1598 | #ifdef INET_REFCNT_DEBUG | ||
1599 | if (atomic_read(&sk->sk_refcnt) != 1) { | ||
1600 | printk(KERN_DEBUG "Destruction TCP %p delayed, c=%d\n", | ||
1601 | sk, atomic_read(&sk->sk_refcnt)); | ||
1602 | } | ||
1603 | #endif | ||
1604 | |||
1605 | atomic_dec(&tcp_orphan_count); | ||
1606 | sock_put(sk); | ||
1607 | } | ||
1608 | |||
1609 | void tcp_close(struct sock *sk, long timeout) | 1463 | void tcp_close(struct sock *sk, long timeout) |
1610 | { | 1464 | { |
1611 | struct sk_buff *skb; | 1465 | struct sk_buff *skb; |
@@ -1618,7 +1472,7 @@ void tcp_close(struct sock *sk, long timeout) | |||
1618 | tcp_set_state(sk, TCP_CLOSE); | 1472 | tcp_set_state(sk, TCP_CLOSE); |
1619 | 1473 | ||
1620 | /* Special case. */ | 1474 | /* Special case. */ |
1621 | tcp_listen_stop(sk); | 1475 | inet_csk_listen_stop(sk); |
1622 | 1476 | ||
1623 | goto adjudge_to_death; | 1477 | goto adjudge_to_death; |
1624 | } | 1478 | } |
@@ -1721,12 +1575,12 @@ adjudge_to_death: | |||
1721 | tcp_send_active_reset(sk, GFP_ATOMIC); | 1575 | tcp_send_active_reset(sk, GFP_ATOMIC); |
1722 | NET_INC_STATS_BH(LINUX_MIB_TCPABORTONLINGER); | 1576 | NET_INC_STATS_BH(LINUX_MIB_TCPABORTONLINGER); |
1723 | } else { | 1577 | } else { |
1724 | int tmo = tcp_fin_time(tp); | 1578 | const int tmo = tcp_fin_time(sk); |
1725 | 1579 | ||
1726 | if (tmo > TCP_TIMEWAIT_LEN) { | 1580 | if (tmo > TCP_TIMEWAIT_LEN) { |
1727 | tcp_reset_keepalive_timer(sk, tcp_fin_time(tp)); | 1581 | inet_csk_reset_keepalive_timer(sk, tcp_fin_time(sk)); |
1728 | } else { | 1582 | } else { |
1729 | atomic_inc(&tcp_orphan_count); | 1583 | atomic_inc(sk->sk_prot->orphan_count); |
1730 | tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); | 1584 | tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); |
1731 | goto out; | 1585 | goto out; |
1732 | } | 1586 | } |
@@ -1734,7 +1588,7 @@ adjudge_to_death: | |||
1734 | } | 1588 | } |
1735 | if (sk->sk_state != TCP_CLOSE) { | 1589 | if (sk->sk_state != TCP_CLOSE) { |
1736 | sk_stream_mem_reclaim(sk); | 1590 | sk_stream_mem_reclaim(sk); |
1737 | if (atomic_read(&tcp_orphan_count) > sysctl_tcp_max_orphans || | 1591 | if (atomic_read(sk->sk_prot->orphan_count) > sysctl_tcp_max_orphans || |
1738 | (sk->sk_wmem_queued > SOCK_MIN_SNDBUF && | 1592 | (sk->sk_wmem_queued > SOCK_MIN_SNDBUF && |
1739 | atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])) { | 1593 | atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])) { |
1740 | if (net_ratelimit()) | 1594 | if (net_ratelimit()) |
@@ -1745,10 +1599,10 @@ adjudge_to_death: | |||
1745 | NET_INC_STATS_BH(LINUX_MIB_TCPABORTONMEMORY); | 1599 | NET_INC_STATS_BH(LINUX_MIB_TCPABORTONMEMORY); |
1746 | } | 1600 | } |
1747 | } | 1601 | } |
1748 | atomic_inc(&tcp_orphan_count); | 1602 | atomic_inc(sk->sk_prot->orphan_count); |
1749 | 1603 | ||
1750 | if (sk->sk_state == TCP_CLOSE) | 1604 | if (sk->sk_state == TCP_CLOSE) |
1751 | tcp_destroy_sock(sk); | 1605 | inet_csk_destroy_sock(sk); |
1752 | /* Otherwise, socket is reprieved until protocol close. */ | 1606 | /* Otherwise, socket is reprieved until protocol close. */ |
1753 | 1607 | ||
1754 | out: | 1608 | out: |
@@ -1769,6 +1623,7 @@ static inline int tcp_need_reset(int state) | |||
1769 | int tcp_disconnect(struct sock *sk, int flags) | 1623 | int tcp_disconnect(struct sock *sk, int flags) |
1770 | { | 1624 | { |
1771 | struct inet_sock *inet = inet_sk(sk); | 1625 | struct inet_sock *inet = inet_sk(sk); |
1626 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
1772 | struct tcp_sock *tp = tcp_sk(sk); | 1627 | struct tcp_sock *tp = tcp_sk(sk); |
1773 | int err = 0; | 1628 | int err = 0; |
1774 | int old_state = sk->sk_state; | 1629 | int old_state = sk->sk_state; |
@@ -1778,7 +1633,7 @@ int tcp_disconnect(struct sock *sk, int flags) | |||
1778 | 1633 | ||
1779 | /* ABORT function of RFC793 */ | 1634 | /* ABORT function of RFC793 */ |
1780 | if (old_state == TCP_LISTEN) { | 1635 | if (old_state == TCP_LISTEN) { |
1781 | tcp_listen_stop(sk); | 1636 | inet_csk_listen_stop(sk); |
1782 | } else if (tcp_need_reset(old_state) || | 1637 | } else if (tcp_need_reset(old_state) || |
1783 | (tp->snd_nxt != tp->write_seq && | 1638 | (tp->snd_nxt != tp->write_seq && |
1784 | (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) { | 1639 | (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) { |
@@ -1805,125 +1660,34 @@ int tcp_disconnect(struct sock *sk, int flags) | |||
1805 | tp->srtt = 0; | 1660 | tp->srtt = 0; |
1806 | if ((tp->write_seq += tp->max_window + 2) == 0) | 1661 | if ((tp->write_seq += tp->max_window + 2) == 0) |
1807 | tp->write_seq = 1; | 1662 | tp->write_seq = 1; |
1808 | tp->backoff = 0; | 1663 | icsk->icsk_backoff = 0; |
1809 | tp->snd_cwnd = 2; | 1664 | tp->snd_cwnd = 2; |
1810 | tp->probes_out = 0; | 1665 | icsk->icsk_probes_out = 0; |
1811 | tp->packets_out = 0; | 1666 | tp->packets_out = 0; |
1812 | tp->snd_ssthresh = 0x7fffffff; | 1667 | tp->snd_ssthresh = 0x7fffffff; |
1813 | tp->snd_cwnd_cnt = 0; | 1668 | tp->snd_cwnd_cnt = 0; |
1814 | tcp_set_ca_state(tp, TCP_CA_Open); | 1669 | tcp_set_ca_state(sk, TCP_CA_Open); |
1815 | tcp_clear_retrans(tp); | 1670 | tcp_clear_retrans(tp); |
1816 | tcp_delack_init(tp); | 1671 | inet_csk_delack_init(sk); |
1817 | sk->sk_send_head = NULL; | 1672 | sk->sk_send_head = NULL; |
1818 | tp->rx_opt.saw_tstamp = 0; | 1673 | tp->rx_opt.saw_tstamp = 0; |
1819 | tcp_sack_reset(&tp->rx_opt); | 1674 | tcp_sack_reset(&tp->rx_opt); |
1820 | __sk_dst_reset(sk); | 1675 | __sk_dst_reset(sk); |
1821 | 1676 | ||
1822 | BUG_TRAP(!inet->num || tp->bind_hash); | 1677 | BUG_TRAP(!inet->num || icsk->icsk_bind_hash); |
1823 | 1678 | ||
1824 | sk->sk_error_report(sk); | 1679 | sk->sk_error_report(sk); |
1825 | return err; | 1680 | return err; |
1826 | } | 1681 | } |
1827 | 1682 | ||
1828 | /* | 1683 | /* |
1829 | * Wait for an incoming connection, avoid race | ||
1830 | * conditions. This must be called with the socket locked. | ||
1831 | */ | ||
1832 | static int wait_for_connect(struct sock *sk, long timeo) | ||
1833 | { | ||
1834 | struct tcp_sock *tp = tcp_sk(sk); | ||
1835 | DEFINE_WAIT(wait); | ||
1836 | int err; | ||
1837 | |||
1838 | /* | ||
1839 | * True wake-one mechanism for incoming connections: only | ||
1840 | * one process gets woken up, not the 'whole herd'. | ||
1841 | * Since we do not 'race & poll' for established sockets | ||
1842 | * anymore, the common case will execute the loop only once. | ||
1843 | * | ||
1844 | * Subtle issue: "add_wait_queue_exclusive()" will be added | ||
1845 | * after any current non-exclusive waiters, and we know that | ||
1846 | * it will always _stay_ after any new non-exclusive waiters | ||
1847 | * because all non-exclusive waiters are added at the | ||
1848 | * beginning of the wait-queue. As such, it's ok to "drop" | ||
1849 | * our exclusiveness temporarily when we get woken up without | ||
1850 | * having to remove and re-insert us on the wait queue. | ||
1851 | */ | ||
1852 | for (;;) { | ||
1853 | prepare_to_wait_exclusive(sk->sk_sleep, &wait, | ||
1854 | TASK_INTERRUPTIBLE); | ||
1855 | release_sock(sk); | ||
1856 | if (reqsk_queue_empty(&tp->accept_queue)) | ||
1857 | timeo = schedule_timeout(timeo); | ||
1858 | lock_sock(sk); | ||
1859 | err = 0; | ||
1860 | if (!reqsk_queue_empty(&tp->accept_queue)) | ||
1861 | break; | ||
1862 | err = -EINVAL; | ||
1863 | if (sk->sk_state != TCP_LISTEN) | ||
1864 | break; | ||
1865 | err = sock_intr_errno(timeo); | ||
1866 | if (signal_pending(current)) | ||
1867 | break; | ||
1868 | err = -EAGAIN; | ||
1869 | if (!timeo) | ||
1870 | break; | ||
1871 | } | ||
1872 | finish_wait(sk->sk_sleep, &wait); | ||
1873 | return err; | ||
1874 | } | ||
1875 | |||
1876 | /* | ||
1877 | * This will accept the next outstanding connection. | ||
1878 | */ | ||
1879 | |||
1880 | struct sock *tcp_accept(struct sock *sk, int flags, int *err) | ||
1881 | { | ||
1882 | struct tcp_sock *tp = tcp_sk(sk); | ||
1883 | struct sock *newsk; | ||
1884 | int error; | ||
1885 | |||
1886 | lock_sock(sk); | ||
1887 | |||
1888 | /* We need to make sure that this socket is listening, | ||
1889 | * and that it has something pending. | ||
1890 | */ | ||
1891 | error = -EINVAL; | ||
1892 | if (sk->sk_state != TCP_LISTEN) | ||
1893 | goto out_err; | ||
1894 | |||
1895 | /* Find already established connection */ | ||
1896 | if (reqsk_queue_empty(&tp->accept_queue)) { | ||
1897 | long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); | ||
1898 | |||
1899 | /* If this is a non blocking socket don't sleep */ | ||
1900 | error = -EAGAIN; | ||
1901 | if (!timeo) | ||
1902 | goto out_err; | ||
1903 | |||
1904 | error = wait_for_connect(sk, timeo); | ||
1905 | if (error) | ||
1906 | goto out_err; | ||
1907 | } | ||
1908 | |||
1909 | newsk = reqsk_queue_get_child(&tp->accept_queue, sk); | ||
1910 | BUG_TRAP(newsk->sk_state != TCP_SYN_RECV); | ||
1911 | out: | ||
1912 | release_sock(sk); | ||
1913 | return newsk; | ||
1914 | out_err: | ||
1915 | newsk = NULL; | ||
1916 | *err = error; | ||
1917 | goto out; | ||
1918 | } | ||
1919 | |||
1920 | /* | ||
1921 | * Socket option code for TCP. | 1684 | * Socket option code for TCP. |
1922 | */ | 1685 | */ |
1923 | int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, | 1686 | int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, |
1924 | int optlen) | 1687 | int optlen) |
1925 | { | 1688 | { |
1926 | struct tcp_sock *tp = tcp_sk(sk); | 1689 | struct tcp_sock *tp = tcp_sk(sk); |
1690 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
1927 | int val; | 1691 | int val; |
1928 | int err = 0; | 1692 | int err = 0; |
1929 | 1693 | ||
@@ -1945,7 +1709,7 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, | |||
1945 | name[val] = 0; | 1709 | name[val] = 0; |
1946 | 1710 | ||
1947 | lock_sock(sk); | 1711 | lock_sock(sk); |
1948 | err = tcp_set_congestion_control(tp, name); | 1712 | err = tcp_set_congestion_control(sk, name); |
1949 | release_sock(sk); | 1713 | release_sock(sk); |
1950 | return err; | 1714 | return err; |
1951 | } | 1715 | } |
@@ -2022,7 +1786,7 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, | |||
2022 | elapsed = tp->keepalive_time - elapsed; | 1786 | elapsed = tp->keepalive_time - elapsed; |
2023 | else | 1787 | else |
2024 | elapsed = 0; | 1788 | elapsed = 0; |
2025 | tcp_reset_keepalive_timer(sk, elapsed); | 1789 | inet_csk_reset_keepalive_timer(sk, elapsed); |
2026 | } | 1790 | } |
2027 | } | 1791 | } |
2028 | break; | 1792 | break; |
@@ -2042,7 +1806,7 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, | |||
2042 | if (val < 1 || val > MAX_TCP_SYNCNT) | 1806 | if (val < 1 || val > MAX_TCP_SYNCNT) |
2043 | err = -EINVAL; | 1807 | err = -EINVAL; |
2044 | else | 1808 | else |
2045 | tp->syn_retries = val; | 1809 | icsk->icsk_syn_retries = val; |
2046 | break; | 1810 | break; |
2047 | 1811 | ||
2048 | case TCP_LINGER2: | 1812 | case TCP_LINGER2: |
@@ -2055,15 +1819,15 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, | |||
2055 | break; | 1819 | break; |
2056 | 1820 | ||
2057 | case TCP_DEFER_ACCEPT: | 1821 | case TCP_DEFER_ACCEPT: |
2058 | tp->defer_accept = 0; | 1822 | icsk->icsk_accept_queue.rskq_defer_accept = 0; |
2059 | if (val > 0) { | 1823 | if (val > 0) { |
2060 | /* Translate value in seconds to number of | 1824 | /* Translate value in seconds to number of |
2061 | * retransmits */ | 1825 | * retransmits */ |
2062 | while (tp->defer_accept < 32 && | 1826 | while (icsk->icsk_accept_queue.rskq_defer_accept < 32 && |
2063 | val > ((TCP_TIMEOUT_INIT / HZ) << | 1827 | val > ((TCP_TIMEOUT_INIT / HZ) << |
2064 | tp->defer_accept)) | 1828 | icsk->icsk_accept_queue.rskq_defer_accept)) |
2065 | tp->defer_accept++; | 1829 | icsk->icsk_accept_queue.rskq_defer_accept++; |
2066 | tp->defer_accept++; | 1830 | icsk->icsk_accept_queue.rskq_defer_accept++; |
2067 | } | 1831 | } |
2068 | break; | 1832 | break; |
2069 | 1833 | ||
@@ -2081,16 +1845,16 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, | |||
2081 | 1845 | ||
2082 | case TCP_QUICKACK: | 1846 | case TCP_QUICKACK: |
2083 | if (!val) { | 1847 | if (!val) { |
2084 | tp->ack.pingpong = 1; | 1848 | icsk->icsk_ack.pingpong = 1; |
2085 | } else { | 1849 | } else { |
2086 | tp->ack.pingpong = 0; | 1850 | icsk->icsk_ack.pingpong = 0; |
2087 | if ((1 << sk->sk_state) & | 1851 | if ((1 << sk->sk_state) & |
2088 | (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) && | 1852 | (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) && |
2089 | tcp_ack_scheduled(tp)) { | 1853 | inet_csk_ack_scheduled(sk)) { |
2090 | tp->ack.pending |= TCP_ACK_PUSHED; | 1854 | icsk->icsk_ack.pending |= ICSK_ACK_PUSHED; |
2091 | cleanup_rbuf(sk, 1); | 1855 | cleanup_rbuf(sk, 1); |
2092 | if (!(val & 1)) | 1856 | if (!(val & 1)) |
2093 | tp->ack.pingpong = 1; | 1857 | icsk->icsk_ack.pingpong = 1; |
2094 | } | 1858 | } |
2095 | } | 1859 | } |
2096 | break; | 1860 | break; |
@@ -2107,15 +1871,16 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, | |||
2107 | void tcp_get_info(struct sock *sk, struct tcp_info *info) | 1871 | void tcp_get_info(struct sock *sk, struct tcp_info *info) |
2108 | { | 1872 | { |
2109 | struct tcp_sock *tp = tcp_sk(sk); | 1873 | struct tcp_sock *tp = tcp_sk(sk); |
1874 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
2110 | u32 now = tcp_time_stamp; | 1875 | u32 now = tcp_time_stamp; |
2111 | 1876 | ||
2112 | memset(info, 0, sizeof(*info)); | 1877 | memset(info, 0, sizeof(*info)); |
2113 | 1878 | ||
2114 | info->tcpi_state = sk->sk_state; | 1879 | info->tcpi_state = sk->sk_state; |
2115 | info->tcpi_ca_state = tp->ca_state; | 1880 | info->tcpi_ca_state = icsk->icsk_ca_state; |
2116 | info->tcpi_retransmits = tp->retransmits; | 1881 | info->tcpi_retransmits = icsk->icsk_retransmits; |
2117 | info->tcpi_probes = tp->probes_out; | 1882 | info->tcpi_probes = icsk->icsk_probes_out; |
2118 | info->tcpi_backoff = tp->backoff; | 1883 | info->tcpi_backoff = icsk->icsk_backoff; |
2119 | 1884 | ||
2120 | if (tp->rx_opt.tstamp_ok) | 1885 | if (tp->rx_opt.tstamp_ok) |
2121 | info->tcpi_options |= TCPI_OPT_TIMESTAMPS; | 1886 | info->tcpi_options |= TCPI_OPT_TIMESTAMPS; |
@@ -2130,10 +1895,10 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) | |||
2130 | if (tp->ecn_flags&TCP_ECN_OK) | 1895 | if (tp->ecn_flags&TCP_ECN_OK) |
2131 | info->tcpi_options |= TCPI_OPT_ECN; | 1896 | info->tcpi_options |= TCPI_OPT_ECN; |
2132 | 1897 | ||
2133 | info->tcpi_rto = jiffies_to_usecs(tp->rto); | 1898 | info->tcpi_rto = jiffies_to_usecs(icsk->icsk_rto); |
2134 | info->tcpi_ato = jiffies_to_usecs(tp->ack.ato); | 1899 | info->tcpi_ato = jiffies_to_usecs(icsk->icsk_ack.ato); |
2135 | info->tcpi_snd_mss = tp->mss_cache; | 1900 | info->tcpi_snd_mss = tp->mss_cache; |
2136 | info->tcpi_rcv_mss = tp->ack.rcv_mss; | 1901 | info->tcpi_rcv_mss = icsk->icsk_ack.rcv_mss; |
2137 | 1902 | ||
2138 | info->tcpi_unacked = tp->packets_out; | 1903 | info->tcpi_unacked = tp->packets_out; |
2139 | info->tcpi_sacked = tp->sacked_out; | 1904 | info->tcpi_sacked = tp->sacked_out; |
@@ -2142,7 +1907,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) | |||
2142 | info->tcpi_fackets = tp->fackets_out; | 1907 | info->tcpi_fackets = tp->fackets_out; |
2143 | 1908 | ||
2144 | info->tcpi_last_data_sent = jiffies_to_msecs(now - tp->lsndtime); | 1909 | info->tcpi_last_data_sent = jiffies_to_msecs(now - tp->lsndtime); |
2145 | info->tcpi_last_data_recv = jiffies_to_msecs(now - tp->ack.lrcvtime); | 1910 | info->tcpi_last_data_recv = jiffies_to_msecs(now - icsk->icsk_ack.lrcvtime); |
2146 | info->tcpi_last_ack_recv = jiffies_to_msecs(now - tp->rcv_tstamp); | 1911 | info->tcpi_last_ack_recv = jiffies_to_msecs(now - tp->rcv_tstamp); |
2147 | 1912 | ||
2148 | info->tcpi_pmtu = tp->pmtu_cookie; | 1913 | info->tcpi_pmtu = tp->pmtu_cookie; |
@@ -2165,6 +1930,7 @@ EXPORT_SYMBOL_GPL(tcp_get_info); | |||
2165 | int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, | 1930 | int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, |
2166 | int __user *optlen) | 1931 | int __user *optlen) |
2167 | { | 1932 | { |
1933 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
2168 | struct tcp_sock *tp = tcp_sk(sk); | 1934 | struct tcp_sock *tp = tcp_sk(sk); |
2169 | int val, len; | 1935 | int val, len; |
2170 | 1936 | ||
@@ -2202,7 +1968,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, | |||
2202 | val = tp->keepalive_probes ? : sysctl_tcp_keepalive_probes; | 1968 | val = tp->keepalive_probes ? : sysctl_tcp_keepalive_probes; |
2203 | break; | 1969 | break; |
2204 | case TCP_SYNCNT: | 1970 | case TCP_SYNCNT: |
2205 | val = tp->syn_retries ? : sysctl_tcp_syn_retries; | 1971 | val = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; |
2206 | break; | 1972 | break; |
2207 | case TCP_LINGER2: | 1973 | case TCP_LINGER2: |
2208 | val = tp->linger2; | 1974 | val = tp->linger2; |
@@ -2210,8 +1976,8 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, | |||
2210 | val = (val ? : sysctl_tcp_fin_timeout) / HZ; | 1976 | val = (val ? : sysctl_tcp_fin_timeout) / HZ; |
2211 | break; | 1977 | break; |
2212 | case TCP_DEFER_ACCEPT: | 1978 | case TCP_DEFER_ACCEPT: |
2213 | val = !tp->defer_accept ? 0 : ((TCP_TIMEOUT_INIT / HZ) << | 1979 | val = !icsk->icsk_accept_queue.rskq_defer_accept ? 0 : |
2214 | (tp->defer_accept - 1)); | 1980 | ((TCP_TIMEOUT_INIT / HZ) << (icsk->icsk_accept_queue.rskq_defer_accept - 1)); |
2215 | break; | 1981 | break; |
2216 | case TCP_WINDOW_CLAMP: | 1982 | case TCP_WINDOW_CLAMP: |
2217 | val = tp->window_clamp; | 1983 | val = tp->window_clamp; |
@@ -2232,7 +1998,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, | |||
2232 | return 0; | 1998 | return 0; |
2233 | } | 1999 | } |
2234 | case TCP_QUICKACK: | 2000 | case TCP_QUICKACK: |
2235 | val = !tp->ack.pingpong; | 2001 | val = !icsk->icsk_ack.pingpong; |
2236 | break; | 2002 | break; |
2237 | 2003 | ||
2238 | case TCP_CONGESTION: | 2004 | case TCP_CONGESTION: |
@@ -2241,7 +2007,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, | |||
2241 | len = min_t(unsigned int, len, TCP_CA_NAME_MAX); | 2007 | len = min_t(unsigned int, len, TCP_CA_NAME_MAX); |
2242 | if (put_user(len, optlen)) | 2008 | if (put_user(len, optlen)) |
2243 | return -EFAULT; | 2009 | return -EFAULT; |
2244 | if (copy_to_user(optval, tp->ca_ops->name, len)) | 2010 | if (copy_to_user(optval, icsk->icsk_ca_ops->name, len)) |
2245 | return -EFAULT; | 2011 | return -EFAULT; |
2246 | return 0; | 2012 | return 0; |
2247 | default: | 2013 | default: |
@@ -2278,79 +2044,72 @@ void __init tcp_init(void) | |||
2278 | __skb_cb_too_small_for_tcp(sizeof(struct tcp_skb_cb), | 2044 | __skb_cb_too_small_for_tcp(sizeof(struct tcp_skb_cb), |
2279 | sizeof(skb->cb)); | 2045 | sizeof(skb->cb)); |
2280 | 2046 | ||
2281 | tcp_bucket_cachep = kmem_cache_create("tcp_bind_bucket", | 2047 | tcp_hashinfo.bind_bucket_cachep = |
2282 | sizeof(struct tcp_bind_bucket), | 2048 | kmem_cache_create("tcp_bind_bucket", |
2283 | 0, SLAB_HWCACHE_ALIGN, | 2049 | sizeof(struct inet_bind_bucket), 0, |
2284 | NULL, NULL); | 2050 | SLAB_HWCACHE_ALIGN, NULL, NULL); |
2285 | if (!tcp_bucket_cachep) | 2051 | if (!tcp_hashinfo.bind_bucket_cachep) |
2286 | panic("tcp_init: Cannot alloc tcp_bind_bucket cache."); | 2052 | panic("tcp_init: Cannot alloc tcp_bind_bucket cache."); |
2287 | 2053 | ||
2288 | tcp_timewait_cachep = kmem_cache_create("tcp_tw_bucket", | ||
2289 | sizeof(struct tcp_tw_bucket), | ||
2290 | 0, SLAB_HWCACHE_ALIGN, | ||
2291 | NULL, NULL); | ||
2292 | if (!tcp_timewait_cachep) | ||
2293 | panic("tcp_init: Cannot alloc tcp_tw_bucket cache."); | ||
2294 | |||
2295 | /* Size and allocate the main established and bind bucket | 2054 | /* Size and allocate the main established and bind bucket |
2296 | * hash tables. | 2055 | * hash tables. |
2297 | * | 2056 | * |
2298 | * The methodology is similar to that of the buffer cache. | 2057 | * The methodology is similar to that of the buffer cache. |
2299 | */ | 2058 | */ |
2300 | tcp_ehash = (struct tcp_ehash_bucket *) | 2059 | tcp_hashinfo.ehash = |
2301 | alloc_large_system_hash("TCP established", | 2060 | alloc_large_system_hash("TCP established", |
2302 | sizeof(struct tcp_ehash_bucket), | 2061 | sizeof(struct inet_ehash_bucket), |
2303 | thash_entries, | 2062 | thash_entries, |
2304 | (num_physpages >= 128 * 1024) ? | 2063 | (num_physpages >= 128 * 1024) ? |
2305 | (25 - PAGE_SHIFT) : | 2064 | (25 - PAGE_SHIFT) : |
2306 | (27 - PAGE_SHIFT), | 2065 | (27 - PAGE_SHIFT), |
2307 | HASH_HIGHMEM, | 2066 | HASH_HIGHMEM, |
2308 | &tcp_ehash_size, | 2067 | &tcp_hashinfo.ehash_size, |
2309 | NULL, | 2068 | NULL, |
2310 | 0); | 2069 | 0); |
2311 | tcp_ehash_size = (1 << tcp_ehash_size) >> 1; | 2070 | tcp_hashinfo.ehash_size = (1 << tcp_hashinfo.ehash_size) >> 1; |
2312 | for (i = 0; i < (tcp_ehash_size << 1); i++) { | 2071 | for (i = 0; i < (tcp_hashinfo.ehash_size << 1); i++) { |
2313 | rwlock_init(&tcp_ehash[i].lock); | 2072 | rwlock_init(&tcp_hashinfo.ehash[i].lock); |
2314 | INIT_HLIST_HEAD(&tcp_ehash[i].chain); | 2073 | INIT_HLIST_HEAD(&tcp_hashinfo.ehash[i].chain); |
2315 | } | 2074 | } |
2316 | 2075 | ||
2317 | tcp_bhash = (struct tcp_bind_hashbucket *) | 2076 | tcp_hashinfo.bhash = |
2318 | alloc_large_system_hash("TCP bind", | 2077 | alloc_large_system_hash("TCP bind", |
2319 | sizeof(struct tcp_bind_hashbucket), | 2078 | sizeof(struct inet_bind_hashbucket), |
2320 | tcp_ehash_size, | 2079 | tcp_hashinfo.ehash_size, |
2321 | (num_physpages >= 128 * 1024) ? | 2080 | (num_physpages >= 128 * 1024) ? |
2322 | (25 - PAGE_SHIFT) : | 2081 | (25 - PAGE_SHIFT) : |
2323 | (27 - PAGE_SHIFT), | 2082 | (27 - PAGE_SHIFT), |
2324 | HASH_HIGHMEM, | 2083 | HASH_HIGHMEM, |
2325 | &tcp_bhash_size, | 2084 | &tcp_hashinfo.bhash_size, |
2326 | NULL, | 2085 | NULL, |
2327 | 64 * 1024); | 2086 | 64 * 1024); |
2328 | tcp_bhash_size = 1 << tcp_bhash_size; | 2087 | tcp_hashinfo.bhash_size = 1 << tcp_hashinfo.bhash_size; |
2329 | for (i = 0; i < tcp_bhash_size; i++) { | 2088 | for (i = 0; i < tcp_hashinfo.bhash_size; i++) { |
2330 | spin_lock_init(&tcp_bhash[i].lock); | 2089 | spin_lock_init(&tcp_hashinfo.bhash[i].lock); |
2331 | INIT_HLIST_HEAD(&tcp_bhash[i].chain); | 2090 | INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain); |
2332 | } | 2091 | } |
2333 | 2092 | ||
2334 | /* Try to be a bit smarter and adjust defaults depending | 2093 | /* Try to be a bit smarter and adjust defaults depending |
2335 | * on available memory. | 2094 | * on available memory. |
2336 | */ | 2095 | */ |
2337 | for (order = 0; ((1 << order) << PAGE_SHIFT) < | 2096 | for (order = 0; ((1 << order) << PAGE_SHIFT) < |
2338 | (tcp_bhash_size * sizeof(struct tcp_bind_hashbucket)); | 2097 | (tcp_hashinfo.bhash_size * sizeof(struct inet_bind_hashbucket)); |
2339 | order++) | 2098 | order++) |
2340 | ; | 2099 | ; |
2341 | if (order >= 4) { | 2100 | if (order >= 4) { |
2342 | sysctl_local_port_range[0] = 32768; | 2101 | sysctl_local_port_range[0] = 32768; |
2343 | sysctl_local_port_range[1] = 61000; | 2102 | sysctl_local_port_range[1] = 61000; |
2344 | sysctl_tcp_max_tw_buckets = 180000; | 2103 | tcp_death_row.sysctl_max_tw_buckets = 180000; |
2345 | sysctl_tcp_max_orphans = 4096 << (order - 4); | 2104 | sysctl_tcp_max_orphans = 4096 << (order - 4); |
2346 | sysctl_max_syn_backlog = 1024; | 2105 | sysctl_max_syn_backlog = 1024; |
2347 | } else if (order < 3) { | 2106 | } else if (order < 3) { |
2348 | sysctl_local_port_range[0] = 1024 * (3 - order); | 2107 | sysctl_local_port_range[0] = 1024 * (3 - order); |
2349 | sysctl_tcp_max_tw_buckets >>= (3 - order); | 2108 | tcp_death_row.sysctl_max_tw_buckets >>= (3 - order); |
2350 | sysctl_tcp_max_orphans >>= (3 - order); | 2109 | sysctl_tcp_max_orphans >>= (3 - order); |
2351 | sysctl_max_syn_backlog = 128; | 2110 | sysctl_max_syn_backlog = 128; |
2352 | } | 2111 | } |
2353 | tcp_port_rover = sysctl_local_port_range[0] - 1; | 2112 | tcp_hashinfo.port_rover = sysctl_local_port_range[0] - 1; |
2354 | 2113 | ||
2355 | sysctl_tcp_mem[0] = 768 << order; | 2114 | sysctl_tcp_mem[0] = 768 << order; |
2356 | sysctl_tcp_mem[1] = 1024 << order; | 2115 | sysctl_tcp_mem[1] = 1024 << order; |
@@ -2365,14 +2124,12 @@ void __init tcp_init(void) | |||
2365 | 2124 | ||
2366 | printk(KERN_INFO "TCP: Hash tables configured " | 2125 | printk(KERN_INFO "TCP: Hash tables configured " |
2367 | "(established %d bind %d)\n", | 2126 | "(established %d bind %d)\n", |
2368 | tcp_ehash_size << 1, tcp_bhash_size); | 2127 | tcp_hashinfo.ehash_size << 1, tcp_hashinfo.bhash_size); |
2369 | 2128 | ||
2370 | tcp_register_congestion_control(&tcp_reno); | 2129 | tcp_register_congestion_control(&tcp_reno); |
2371 | } | 2130 | } |
2372 | 2131 | ||
2373 | EXPORT_SYMBOL(tcp_accept); | ||
2374 | EXPORT_SYMBOL(tcp_close); | 2132 | EXPORT_SYMBOL(tcp_close); |
2375 | EXPORT_SYMBOL(tcp_destroy_sock); | ||
2376 | EXPORT_SYMBOL(tcp_disconnect); | 2133 | EXPORT_SYMBOL(tcp_disconnect); |
2377 | EXPORT_SYMBOL(tcp_getsockopt); | 2134 | EXPORT_SYMBOL(tcp_getsockopt); |
2378 | EXPORT_SYMBOL(tcp_ioctl); | 2135 | EXPORT_SYMBOL(tcp_ioctl); |
@@ -2384,4 +2141,3 @@ EXPORT_SYMBOL(tcp_sendpage); | |||
2384 | EXPORT_SYMBOL(tcp_setsockopt); | 2141 | EXPORT_SYMBOL(tcp_setsockopt); |
2385 | EXPORT_SYMBOL(tcp_shutdown); | 2142 | EXPORT_SYMBOL(tcp_shutdown); |
2386 | EXPORT_SYMBOL(tcp_statistics); | 2143 | EXPORT_SYMBOL(tcp_statistics); |
2387 | EXPORT_SYMBOL(tcp_timewait_cachep); | ||
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index ec38d45d6649..b940346de4e7 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c | |||
@@ -86,11 +86,11 @@ static inline void bictcp_reset(struct bictcp *ca) | |||
86 | ca->delayed_ack = 2 << ACK_RATIO_SHIFT; | 86 | ca->delayed_ack = 2 << ACK_RATIO_SHIFT; |
87 | } | 87 | } |
88 | 88 | ||
89 | static void bictcp_init(struct tcp_sock *tp) | 89 | static void bictcp_init(struct sock *sk) |
90 | { | 90 | { |
91 | bictcp_reset(tcp_ca(tp)); | 91 | bictcp_reset(inet_csk_ca(sk)); |
92 | if (initial_ssthresh) | 92 | if (initial_ssthresh) |
93 | tp->snd_ssthresh = initial_ssthresh; | 93 | tcp_sk(sk)->snd_ssthresh = initial_ssthresh; |
94 | } | 94 | } |
95 | 95 | ||
96 | /* | 96 | /* |
@@ -156,9 +156,10 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) | |||
156 | 156 | ||
157 | 157 | ||
158 | /* Detect low utilization in congestion avoidance */ | 158 | /* Detect low utilization in congestion avoidance */ |
159 | static inline void bictcp_low_utilization(struct tcp_sock *tp, int flag) | 159 | static inline void bictcp_low_utilization(struct sock *sk, int flag) |
160 | { | 160 | { |
161 | struct bictcp *ca = tcp_ca(tp); | 161 | const struct tcp_sock *tp = tcp_sk(sk); |
162 | struct bictcp *ca = inet_csk_ca(sk); | ||
162 | u32 dist, delay; | 163 | u32 dist, delay; |
163 | 164 | ||
164 | /* No time stamp */ | 165 | /* No time stamp */ |
@@ -208,12 +209,13 @@ static inline void bictcp_low_utilization(struct tcp_sock *tp, int flag) | |||
208 | 209 | ||
209 | } | 210 | } |
210 | 211 | ||
211 | static void bictcp_cong_avoid(struct tcp_sock *tp, u32 ack, | 212 | static void bictcp_cong_avoid(struct sock *sk, u32 ack, |
212 | u32 seq_rtt, u32 in_flight, int data_acked) | 213 | u32 seq_rtt, u32 in_flight, int data_acked) |
213 | { | 214 | { |
214 | struct bictcp *ca = tcp_ca(tp); | 215 | struct tcp_sock *tp = tcp_sk(sk); |
216 | struct bictcp *ca = inet_csk_ca(sk); | ||
215 | 217 | ||
216 | bictcp_low_utilization(tp, data_acked); | 218 | bictcp_low_utilization(sk, data_acked); |
217 | 219 | ||
218 | if (in_flight < tp->snd_cwnd) | 220 | if (in_flight < tp->snd_cwnd) |
219 | return; | 221 | return; |
@@ -242,9 +244,10 @@ static void bictcp_cong_avoid(struct tcp_sock *tp, u32 ack, | |||
242 | * behave like Reno until low_window is reached, | 244 | * behave like Reno until low_window is reached, |
243 | * then increase congestion window slowly | 245 | * then increase congestion window slowly |
244 | */ | 246 | */ |
245 | static u32 bictcp_recalc_ssthresh(struct tcp_sock *tp) | 247 | static u32 bictcp_recalc_ssthresh(struct sock *sk) |
246 | { | 248 | { |
247 | struct bictcp *ca = tcp_ca(tp); | 249 | const struct tcp_sock *tp = tcp_sk(sk); |
250 | struct bictcp *ca = inet_csk_ca(sk); | ||
248 | 251 | ||
249 | ca->epoch_start = 0; /* end of epoch */ | 252 | ca->epoch_start = 0; /* end of epoch */ |
250 | 253 | ||
@@ -269,31 +272,34 @@ static u32 bictcp_recalc_ssthresh(struct tcp_sock *tp) | |||
269 | return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U); | 272 | return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U); |
270 | } | 273 | } |
271 | 274 | ||
272 | static u32 bictcp_undo_cwnd(struct tcp_sock *tp) | 275 | static u32 bictcp_undo_cwnd(struct sock *sk) |
273 | { | 276 | { |
274 | struct bictcp *ca = tcp_ca(tp); | 277 | const struct tcp_sock *tp = tcp_sk(sk); |
275 | 278 | const struct bictcp *ca = inet_csk_ca(sk); | |
276 | return max(tp->snd_cwnd, ca->last_max_cwnd); | 279 | return max(tp->snd_cwnd, ca->last_max_cwnd); |
277 | } | 280 | } |
278 | 281 | ||
279 | static u32 bictcp_min_cwnd(struct tcp_sock *tp) | 282 | static u32 bictcp_min_cwnd(struct sock *sk) |
280 | { | 283 | { |
284 | const struct tcp_sock *tp = tcp_sk(sk); | ||
281 | return tp->snd_ssthresh; | 285 | return tp->snd_ssthresh; |
282 | } | 286 | } |
283 | 287 | ||
284 | static void bictcp_state(struct tcp_sock *tp, u8 new_state) | 288 | static void bictcp_state(struct sock *sk, u8 new_state) |
285 | { | 289 | { |
286 | if (new_state == TCP_CA_Loss) | 290 | if (new_state == TCP_CA_Loss) |
287 | bictcp_reset(tcp_ca(tp)); | 291 | bictcp_reset(inet_csk_ca(sk)); |
288 | } | 292 | } |
289 | 293 | ||
290 | /* Track delayed acknowledgement ratio using sliding window | 294 | /* Track delayed acknowledgement ratio using sliding window |
291 | * ratio = (15*ratio + sample) / 16 | 295 | * ratio = (15*ratio + sample) / 16 |
292 | */ | 296 | */ |
293 | static void bictcp_acked(struct tcp_sock *tp, u32 cnt) | 297 | static void bictcp_acked(struct sock *sk, u32 cnt) |
294 | { | 298 | { |
295 | if (cnt > 0 && tp->ca_state == TCP_CA_Open) { | 299 | const struct inet_connection_sock *icsk = inet_csk(sk); |
296 | struct bictcp *ca = tcp_ca(tp); | 300 | |
301 | if (cnt > 0 && icsk->icsk_ca_state == TCP_CA_Open) { | ||
302 | struct bictcp *ca = inet_csk_ca(sk); | ||
297 | cnt -= ca->delayed_ack >> ACK_RATIO_SHIFT; | 303 | cnt -= ca->delayed_ack >> ACK_RATIO_SHIFT; |
298 | ca->delayed_ack += cnt; | 304 | ca->delayed_ack += cnt; |
299 | } | 305 | } |
@@ -314,7 +320,7 @@ static struct tcp_congestion_ops bictcp = { | |||
314 | 320 | ||
315 | static int __init bictcp_register(void) | 321 | static int __init bictcp_register(void) |
316 | { | 322 | { |
317 | BUG_ON(sizeof(struct bictcp) > TCP_CA_PRIV_SIZE); | 323 | BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE); |
318 | return tcp_register_congestion_control(&bictcp); | 324 | return tcp_register_congestion_control(&bictcp); |
319 | } | 325 | } |
320 | 326 | ||
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 4970d10a7785..bbf2d6624e89 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c | |||
@@ -73,33 +73,36 @@ void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca) | |||
73 | EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control); | 73 | EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control); |
74 | 74 | ||
75 | /* Assign choice of congestion control. */ | 75 | /* Assign choice of congestion control. */ |
76 | void tcp_init_congestion_control(struct tcp_sock *tp) | 76 | void tcp_init_congestion_control(struct sock *sk) |
77 | { | 77 | { |
78 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
78 | struct tcp_congestion_ops *ca; | 79 | struct tcp_congestion_ops *ca; |
79 | 80 | ||
80 | if (tp->ca_ops != &tcp_init_congestion_ops) | 81 | if (icsk->icsk_ca_ops != &tcp_init_congestion_ops) |
81 | return; | 82 | return; |
82 | 83 | ||
83 | rcu_read_lock(); | 84 | rcu_read_lock(); |
84 | list_for_each_entry_rcu(ca, &tcp_cong_list, list) { | 85 | list_for_each_entry_rcu(ca, &tcp_cong_list, list) { |
85 | if (try_module_get(ca->owner)) { | 86 | if (try_module_get(ca->owner)) { |
86 | tp->ca_ops = ca; | 87 | icsk->icsk_ca_ops = ca; |
87 | break; | 88 | break; |
88 | } | 89 | } |
89 | 90 | ||
90 | } | 91 | } |
91 | rcu_read_unlock(); | 92 | rcu_read_unlock(); |
92 | 93 | ||
93 | if (tp->ca_ops->init) | 94 | if (icsk->icsk_ca_ops->init) |
94 | tp->ca_ops->init(tp); | 95 | icsk->icsk_ca_ops->init(sk); |
95 | } | 96 | } |
96 | 97 | ||
97 | /* Manage refcounts on socket close. */ | 98 | /* Manage refcounts on socket close. */ |
98 | void tcp_cleanup_congestion_control(struct tcp_sock *tp) | 99 | void tcp_cleanup_congestion_control(struct sock *sk) |
99 | { | 100 | { |
100 | if (tp->ca_ops->release) | 101 | struct inet_connection_sock *icsk = inet_csk(sk); |
101 | tp->ca_ops->release(tp); | 102 | |
102 | module_put(tp->ca_ops->owner); | 103 | if (icsk->icsk_ca_ops->release) |
104 | icsk->icsk_ca_ops->release(sk); | ||
105 | module_put(icsk->icsk_ca_ops->owner); | ||
103 | } | 106 | } |
104 | 107 | ||
105 | /* Used by sysctl to change default congestion control */ | 108 | /* Used by sysctl to change default congestion control */ |
@@ -143,14 +146,15 @@ void tcp_get_default_congestion_control(char *name) | |||
143 | } | 146 | } |
144 | 147 | ||
145 | /* Change congestion control for socket */ | 148 | /* Change congestion control for socket */ |
146 | int tcp_set_congestion_control(struct tcp_sock *tp, const char *name) | 149 | int tcp_set_congestion_control(struct sock *sk, const char *name) |
147 | { | 150 | { |
151 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
148 | struct tcp_congestion_ops *ca; | 152 | struct tcp_congestion_ops *ca; |
149 | int err = 0; | 153 | int err = 0; |
150 | 154 | ||
151 | rcu_read_lock(); | 155 | rcu_read_lock(); |
152 | ca = tcp_ca_find(name); | 156 | ca = tcp_ca_find(name); |
153 | if (ca == tp->ca_ops) | 157 | if (ca == icsk->icsk_ca_ops) |
154 | goto out; | 158 | goto out; |
155 | 159 | ||
156 | if (!ca) | 160 | if (!ca) |
@@ -160,10 +164,10 @@ int tcp_set_congestion_control(struct tcp_sock *tp, const char *name) | |||
160 | err = -EBUSY; | 164 | err = -EBUSY; |
161 | 165 | ||
162 | else { | 166 | else { |
163 | tcp_cleanup_congestion_control(tp); | 167 | tcp_cleanup_congestion_control(sk); |
164 | tp->ca_ops = ca; | 168 | icsk->icsk_ca_ops = ca; |
165 | if (tp->ca_ops->init) | 169 | if (icsk->icsk_ca_ops->init) |
166 | tp->ca_ops->init(tp); | 170 | icsk->icsk_ca_ops->init(sk); |
167 | } | 171 | } |
168 | out: | 172 | out: |
169 | rcu_read_unlock(); | 173 | rcu_read_unlock(); |
@@ -177,9 +181,11 @@ int tcp_set_congestion_control(struct tcp_sock *tp, const char *name) | |||
177 | /* This is Jacobson's slow start and congestion avoidance. | 181 | /* This is Jacobson's slow start and congestion avoidance. |
178 | * SIGCOMM '88, p. 328. | 182 | * SIGCOMM '88, p. 328. |
179 | */ | 183 | */ |
180 | void tcp_reno_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, u32 in_flight, | 184 | void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight, |
181 | int flag) | 185 | int flag) |
182 | { | 186 | { |
187 | struct tcp_sock *tp = tcp_sk(sk); | ||
188 | |||
183 | if (in_flight < tp->snd_cwnd) | 189 | if (in_flight < tp->snd_cwnd) |
184 | return; | 190 | return; |
185 | 191 | ||
@@ -202,15 +208,17 @@ void tcp_reno_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, u32 in_flight, | |||
202 | EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid); | 208 | EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid); |
203 | 209 | ||
204 | /* Slow start threshold is half the congestion window (min 2) */ | 210 | /* Slow start threshold is half the congestion window (min 2) */ |
205 | u32 tcp_reno_ssthresh(struct tcp_sock *tp) | 211 | u32 tcp_reno_ssthresh(struct sock *sk) |
206 | { | 212 | { |
213 | const struct tcp_sock *tp = tcp_sk(sk); | ||
207 | return max(tp->snd_cwnd >> 1U, 2U); | 214 | return max(tp->snd_cwnd >> 1U, 2U); |
208 | } | 215 | } |
209 | EXPORT_SYMBOL_GPL(tcp_reno_ssthresh); | 216 | EXPORT_SYMBOL_GPL(tcp_reno_ssthresh); |
210 | 217 | ||
211 | /* Lower bound on congestion window. */ | 218 | /* Lower bound on congestion window. */ |
212 | u32 tcp_reno_min_cwnd(struct tcp_sock *tp) | 219 | u32 tcp_reno_min_cwnd(struct sock *sk) |
213 | { | 220 | { |
221 | const struct tcp_sock *tp = tcp_sk(sk); | ||
214 | return tp->snd_ssthresh/2; | 222 | return tp->snd_ssthresh/2; |
215 | } | 223 | } |
216 | EXPORT_SYMBOL_GPL(tcp_reno_min_cwnd); | 224 | EXPORT_SYMBOL_GPL(tcp_reno_min_cwnd); |
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index f66945cb158f..c148c1081880 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * tcp_diag.c Module for monitoring TCP sockets. | 2 | * tcp_diag.c Module for monitoring TCP transport protocols sockets. |
3 | * | 3 | * |
4 | * Version: $Id: tcp_diag.c,v 1.3 2002/02/01 22:01:04 davem Exp $ | 4 | * Version: $Id: tcp_diag.c,v 1.3 2002/02/01 22:01:04 davem Exp $ |
5 | * | 5 | * |
@@ -12,779 +12,43 @@ | |||
12 | */ | 12 | */ |
13 | 13 | ||
14 | #include <linux/config.h> | 14 | #include <linux/config.h> |
15 | #include <linux/module.h> | ||
16 | #include <linux/types.h> | ||
17 | #include <linux/fcntl.h> | ||
18 | #include <linux/random.h> | ||
19 | #include <linux/cache.h> | ||
20 | #include <linux/init.h> | ||
21 | #include <linux/time.h> | ||
22 | |||
23 | #include <net/icmp.h> | ||
24 | #include <net/tcp.h> | ||
25 | #include <net/ipv6.h> | ||
26 | #include <net/inet_common.h> | ||
27 | |||
28 | #include <linux/inet.h> | ||
29 | #include <linux/stddef.h> | ||
30 | |||
31 | #include <linux/tcp_diag.h> | ||
32 | 15 | ||
33 | struct tcpdiag_entry | 16 | #include <linux/module.h> |
34 | { | 17 | #include <linux/inet_diag.h> |
35 | u32 *saddr; | ||
36 | u32 *daddr; | ||
37 | u16 sport; | ||
38 | u16 dport; | ||
39 | u16 family; | ||
40 | u16 userlocks; | ||
41 | }; | ||
42 | 18 | ||
43 | static struct sock *tcpnl; | 19 | #include <linux/tcp.h> |
44 | 20 | ||
45 | #define TCPDIAG_PUT(skb, attrtype, attrlen) \ | 21 | #include <net/tcp.h> |
46 | RTA_DATA(__RTA_PUT(skb, attrtype, attrlen)) | ||
47 | 22 | ||
48 | static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, | 23 | static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, |
49 | int ext, u32 pid, u32 seq, u16 nlmsg_flags) | 24 | void *_info) |
50 | { | 25 | { |
51 | struct inet_sock *inet = inet_sk(sk); | 26 | const struct tcp_sock *tp = tcp_sk(sk); |
52 | struct tcp_sock *tp = tcp_sk(sk); | 27 | struct tcp_info *info = _info; |
53 | struct tcpdiagmsg *r; | ||
54 | struct nlmsghdr *nlh; | ||
55 | struct tcp_info *info = NULL; | ||
56 | struct tcpdiag_meminfo *minfo = NULL; | ||
57 | unsigned char *b = skb->tail; | ||
58 | |||
59 | nlh = NLMSG_PUT(skb, pid, seq, TCPDIAG_GETSOCK, sizeof(*r)); | ||
60 | nlh->nlmsg_flags = nlmsg_flags; | ||
61 | r = NLMSG_DATA(nlh); | ||
62 | if (sk->sk_state != TCP_TIME_WAIT) { | ||
63 | if (ext & (1<<(TCPDIAG_MEMINFO-1))) | ||
64 | minfo = TCPDIAG_PUT(skb, TCPDIAG_MEMINFO, sizeof(*minfo)); | ||
65 | if (ext & (1<<(TCPDIAG_INFO-1))) | ||
66 | info = TCPDIAG_PUT(skb, TCPDIAG_INFO, sizeof(*info)); | ||
67 | |||
68 | if (ext & (1<<(TCPDIAG_CONG-1))) { | ||
69 | size_t len = strlen(tp->ca_ops->name); | ||
70 | strcpy(TCPDIAG_PUT(skb, TCPDIAG_CONG, len+1), | ||
71 | tp->ca_ops->name); | ||
72 | } | ||
73 | } | ||
74 | r->tcpdiag_family = sk->sk_family; | ||
75 | r->tcpdiag_state = sk->sk_state; | ||
76 | r->tcpdiag_timer = 0; | ||
77 | r->tcpdiag_retrans = 0; | ||
78 | |||
79 | r->id.tcpdiag_if = sk->sk_bound_dev_if; | ||
80 | r->id.tcpdiag_cookie[0] = (u32)(unsigned long)sk; | ||
81 | r->id.tcpdiag_cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1); | ||
82 | |||
83 | if (r->tcpdiag_state == TCP_TIME_WAIT) { | ||
84 | struct tcp_tw_bucket *tw = (struct tcp_tw_bucket*)sk; | ||
85 | long tmo = tw->tw_ttd - jiffies; | ||
86 | if (tmo < 0) | ||
87 | tmo = 0; | ||
88 | |||
89 | r->id.tcpdiag_sport = tw->tw_sport; | ||
90 | r->id.tcpdiag_dport = tw->tw_dport; | ||
91 | r->id.tcpdiag_src[0] = tw->tw_rcv_saddr; | ||
92 | r->id.tcpdiag_dst[0] = tw->tw_daddr; | ||
93 | r->tcpdiag_state = tw->tw_substate; | ||
94 | r->tcpdiag_timer = 3; | ||
95 | r->tcpdiag_expires = (tmo*1000+HZ-1)/HZ; | ||
96 | r->tcpdiag_rqueue = 0; | ||
97 | r->tcpdiag_wqueue = 0; | ||
98 | r->tcpdiag_uid = 0; | ||
99 | r->tcpdiag_inode = 0; | ||
100 | #ifdef CONFIG_IP_TCPDIAG_IPV6 | ||
101 | if (r->tcpdiag_family == AF_INET6) { | ||
102 | ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_src, | ||
103 | &tw->tw_v6_rcv_saddr); | ||
104 | ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_dst, | ||
105 | &tw->tw_v6_daddr); | ||
106 | } | ||
107 | #endif | ||
108 | nlh->nlmsg_len = skb->tail - b; | ||
109 | return skb->len; | ||
110 | } | ||
111 | |||
112 | r->id.tcpdiag_sport = inet->sport; | ||
113 | r->id.tcpdiag_dport = inet->dport; | ||
114 | r->id.tcpdiag_src[0] = inet->rcv_saddr; | ||
115 | r->id.tcpdiag_dst[0] = inet->daddr; | ||
116 | |||
117 | #ifdef CONFIG_IP_TCPDIAG_IPV6 | ||
118 | if (r->tcpdiag_family == AF_INET6) { | ||
119 | struct ipv6_pinfo *np = inet6_sk(sk); | ||
120 | |||
121 | ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_src, | ||
122 | &np->rcv_saddr); | ||
123 | ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_dst, | ||
124 | &np->daddr); | ||
125 | } | ||
126 | #endif | ||
127 | |||
128 | #define EXPIRES_IN_MS(tmo) ((tmo-jiffies)*1000+HZ-1)/HZ | ||
129 | |||
130 | if (tp->pending == TCP_TIME_RETRANS) { | ||
131 | r->tcpdiag_timer = 1; | ||
132 | r->tcpdiag_retrans = tp->retransmits; | ||
133 | r->tcpdiag_expires = EXPIRES_IN_MS(tp->timeout); | ||
134 | } else if (tp->pending == TCP_TIME_PROBE0) { | ||
135 | r->tcpdiag_timer = 4; | ||
136 | r->tcpdiag_retrans = tp->probes_out; | ||
137 | r->tcpdiag_expires = EXPIRES_IN_MS(tp->timeout); | ||
138 | } else if (timer_pending(&sk->sk_timer)) { | ||
139 | r->tcpdiag_timer = 2; | ||
140 | r->tcpdiag_retrans = tp->probes_out; | ||
141 | r->tcpdiag_expires = EXPIRES_IN_MS(sk->sk_timer.expires); | ||
142 | } else { | ||
143 | r->tcpdiag_timer = 0; | ||
144 | r->tcpdiag_expires = 0; | ||
145 | } | ||
146 | #undef EXPIRES_IN_MS | ||
147 | 28 | ||
148 | r->tcpdiag_rqueue = tp->rcv_nxt - tp->copied_seq; | 29 | r->idiag_rqueue = tp->rcv_nxt - tp->copied_seq; |
149 | r->tcpdiag_wqueue = tp->write_seq - tp->snd_una; | 30 | r->idiag_wqueue = tp->write_seq - tp->snd_una; |
150 | r->tcpdiag_uid = sock_i_uid(sk); | 31 | if (info != NULL) |
151 | r->tcpdiag_inode = sock_i_ino(sk); | ||
152 | |||
153 | if (minfo) { | ||
154 | minfo->tcpdiag_rmem = atomic_read(&sk->sk_rmem_alloc); | ||
155 | minfo->tcpdiag_wmem = sk->sk_wmem_queued; | ||
156 | minfo->tcpdiag_fmem = sk->sk_forward_alloc; | ||
157 | minfo->tcpdiag_tmem = atomic_read(&sk->sk_wmem_alloc); | ||
158 | } | ||
159 | |||
160 | if (info) | ||
161 | tcp_get_info(sk, info); | 32 | tcp_get_info(sk, info); |
162 | |||
163 | if (sk->sk_state < TCP_TIME_WAIT && tp->ca_ops->get_info) | ||
164 | tp->ca_ops->get_info(tp, ext, skb); | ||
165 | |||
166 | nlh->nlmsg_len = skb->tail - b; | ||
167 | return skb->len; | ||
168 | |||
169 | rtattr_failure: | ||
170 | nlmsg_failure: | ||
171 | skb_trim(skb, b - skb->data); | ||
172 | return -1; | ||
173 | } | ||
174 | |||
175 | extern struct sock *tcp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport, | ||
176 | int dif); | ||
177 | #ifdef CONFIG_IP_TCPDIAG_IPV6 | ||
178 | extern struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport, | ||
179 | struct in6_addr *daddr, u16 dport, | ||
180 | int dif); | ||
181 | #else | ||
182 | static inline struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport, | ||
183 | struct in6_addr *daddr, u16 dport, | ||
184 | int dif) | ||
185 | { | ||
186 | return NULL; | ||
187 | } | ||
188 | #endif | ||
189 | |||
190 | static int tcpdiag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nlh) | ||
191 | { | ||
192 | int err; | ||
193 | struct sock *sk; | ||
194 | struct tcpdiagreq *req = NLMSG_DATA(nlh); | ||
195 | struct sk_buff *rep; | ||
196 | |||
197 | if (req->tcpdiag_family == AF_INET) { | ||
198 | sk = tcp_v4_lookup(req->id.tcpdiag_dst[0], req->id.tcpdiag_dport, | ||
199 | req->id.tcpdiag_src[0], req->id.tcpdiag_sport, | ||
200 | req->id.tcpdiag_if); | ||
201 | } | ||
202 | #ifdef CONFIG_IP_TCPDIAG_IPV6 | ||
203 | else if (req->tcpdiag_family == AF_INET6) { | ||
204 | sk = tcp_v6_lookup((struct in6_addr*)req->id.tcpdiag_dst, req->id.tcpdiag_dport, | ||
205 | (struct in6_addr*)req->id.tcpdiag_src, req->id.tcpdiag_sport, | ||
206 | req->id.tcpdiag_if); | ||
207 | } | ||
208 | #endif | ||
209 | else { | ||
210 | return -EINVAL; | ||
211 | } | ||
212 | |||
213 | if (sk == NULL) | ||
214 | return -ENOENT; | ||
215 | |||
216 | err = -ESTALE; | ||
217 | if ((req->id.tcpdiag_cookie[0] != TCPDIAG_NOCOOKIE || | ||
218 | req->id.tcpdiag_cookie[1] != TCPDIAG_NOCOOKIE) && | ||
219 | ((u32)(unsigned long)sk != req->id.tcpdiag_cookie[0] || | ||
220 | (u32)((((unsigned long)sk) >> 31) >> 1) != req->id.tcpdiag_cookie[1])) | ||
221 | goto out; | ||
222 | |||
223 | err = -ENOMEM; | ||
224 | rep = alloc_skb(NLMSG_SPACE(sizeof(struct tcpdiagmsg)+ | ||
225 | sizeof(struct tcpdiag_meminfo)+ | ||
226 | sizeof(struct tcp_info)+64), GFP_KERNEL); | ||
227 | if (!rep) | ||
228 | goto out; | ||
229 | |||
230 | if (tcpdiag_fill(rep, sk, req->tcpdiag_ext, | ||
231 | NETLINK_CB(in_skb).pid, | ||
232 | nlh->nlmsg_seq, 0) <= 0) | ||
233 | BUG(); | ||
234 | |||
235 | err = netlink_unicast(tcpnl, rep, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); | ||
236 | if (err > 0) | ||
237 | err = 0; | ||
238 | |||
239 | out: | ||
240 | if (sk) { | ||
241 | if (sk->sk_state == TCP_TIME_WAIT) | ||
242 | tcp_tw_put((struct tcp_tw_bucket*)sk); | ||
243 | else | ||
244 | sock_put(sk); | ||
245 | } | ||
246 | return err; | ||
247 | } | ||
248 | |||
249 | static int bitstring_match(const u32 *a1, const u32 *a2, int bits) | ||
250 | { | ||
251 | int words = bits >> 5; | ||
252 | |||
253 | bits &= 0x1f; | ||
254 | |||
255 | if (words) { | ||
256 | if (memcmp(a1, a2, words << 2)) | ||
257 | return 0; | ||
258 | } | ||
259 | if (bits) { | ||
260 | __u32 w1, w2; | ||
261 | __u32 mask; | ||
262 | |||
263 | w1 = a1[words]; | ||
264 | w2 = a2[words]; | ||
265 | |||
266 | mask = htonl((0xffffffff) << (32 - bits)); | ||
267 | |||
268 | if ((w1 ^ w2) & mask) | ||
269 | return 0; | ||
270 | } | ||
271 | |||
272 | return 1; | ||
273 | } | ||
274 | |||
275 | |||
276 | static int tcpdiag_bc_run(const void *bc, int len, | ||
277 | const struct tcpdiag_entry *entry) | ||
278 | { | ||
279 | while (len > 0) { | ||
280 | int yes = 1; | ||
281 | const struct tcpdiag_bc_op *op = bc; | ||
282 | |||
283 | switch (op->code) { | ||
284 | case TCPDIAG_BC_NOP: | ||
285 | break; | ||
286 | case TCPDIAG_BC_JMP: | ||
287 | yes = 0; | ||
288 | break; | ||
289 | case TCPDIAG_BC_S_GE: | ||
290 | yes = entry->sport >= op[1].no; | ||
291 | break; | ||
292 | case TCPDIAG_BC_S_LE: | ||
293 | yes = entry->dport <= op[1].no; | ||
294 | break; | ||
295 | case TCPDIAG_BC_D_GE: | ||
296 | yes = entry->dport >= op[1].no; | ||
297 | break; | ||
298 | case TCPDIAG_BC_D_LE: | ||
299 | yes = entry->dport <= op[1].no; | ||
300 | break; | ||
301 | case TCPDIAG_BC_AUTO: | ||
302 | yes = !(entry->userlocks & SOCK_BINDPORT_LOCK); | ||
303 | break; | ||
304 | case TCPDIAG_BC_S_COND: | ||
305 | case TCPDIAG_BC_D_COND: | ||
306 | { | ||
307 | struct tcpdiag_hostcond *cond = (struct tcpdiag_hostcond*)(op+1); | ||
308 | u32 *addr; | ||
309 | |||
310 | if (cond->port != -1 && | ||
311 | cond->port != (op->code == TCPDIAG_BC_S_COND ? | ||
312 | entry->sport : entry->dport)) { | ||
313 | yes = 0; | ||
314 | break; | ||
315 | } | ||
316 | |||
317 | if (cond->prefix_len == 0) | ||
318 | break; | ||
319 | |||
320 | if (op->code == TCPDIAG_BC_S_COND) | ||
321 | addr = entry->saddr; | ||
322 | else | ||
323 | addr = entry->daddr; | ||
324 | |||
325 | if (bitstring_match(addr, cond->addr, cond->prefix_len)) | ||
326 | break; | ||
327 | if (entry->family == AF_INET6 && | ||
328 | cond->family == AF_INET) { | ||
329 | if (addr[0] == 0 && addr[1] == 0 && | ||
330 | addr[2] == htonl(0xffff) && | ||
331 | bitstring_match(addr+3, cond->addr, cond->prefix_len)) | ||
332 | break; | ||
333 | } | ||
334 | yes = 0; | ||
335 | break; | ||
336 | } | ||
337 | } | ||
338 | |||
339 | if (yes) { | ||
340 | len -= op->yes; | ||
341 | bc += op->yes; | ||
342 | } else { | ||
343 | len -= op->no; | ||
344 | bc += op->no; | ||
345 | } | ||
346 | } | ||
347 | return (len == 0); | ||
348 | } | ||
349 | |||
350 | static int valid_cc(const void *bc, int len, int cc) | ||
351 | { | ||
352 | while (len >= 0) { | ||
353 | const struct tcpdiag_bc_op *op = bc; | ||
354 | |||
355 | if (cc > len) | ||
356 | return 0; | ||
357 | if (cc == len) | ||
358 | return 1; | ||
359 | if (op->yes < 4) | ||
360 | return 0; | ||
361 | len -= op->yes; | ||
362 | bc += op->yes; | ||
363 | } | ||
364 | return 0; | ||
365 | } | ||
366 | |||
367 | static int tcpdiag_bc_audit(const void *bytecode, int bytecode_len) | ||
368 | { | ||
369 | const unsigned char *bc = bytecode; | ||
370 | int len = bytecode_len; | ||
371 | |||
372 | while (len > 0) { | ||
373 | struct tcpdiag_bc_op *op = (struct tcpdiag_bc_op*)bc; | ||
374 | |||
375 | //printk("BC: %d %d %d {%d} / %d\n", op->code, op->yes, op->no, op[1].no, len); | ||
376 | switch (op->code) { | ||
377 | case TCPDIAG_BC_AUTO: | ||
378 | case TCPDIAG_BC_S_COND: | ||
379 | case TCPDIAG_BC_D_COND: | ||
380 | case TCPDIAG_BC_S_GE: | ||
381 | case TCPDIAG_BC_S_LE: | ||
382 | case TCPDIAG_BC_D_GE: | ||
383 | case TCPDIAG_BC_D_LE: | ||
384 | if (op->yes < 4 || op->yes > len+4) | ||
385 | return -EINVAL; | ||
386 | case TCPDIAG_BC_JMP: | ||
387 | if (op->no < 4 || op->no > len+4) | ||
388 | return -EINVAL; | ||
389 | if (op->no < len && | ||
390 | !valid_cc(bytecode, bytecode_len, len-op->no)) | ||
391 | return -EINVAL; | ||
392 | break; | ||
393 | case TCPDIAG_BC_NOP: | ||
394 | if (op->yes < 4 || op->yes > len+4) | ||
395 | return -EINVAL; | ||
396 | break; | ||
397 | default: | ||
398 | return -EINVAL; | ||
399 | } | ||
400 | bc += op->yes; | ||
401 | len -= op->yes; | ||
402 | } | ||
403 | return len == 0 ? 0 : -EINVAL; | ||
404 | } | ||
405 | |||
406 | static int tcpdiag_dump_sock(struct sk_buff *skb, struct sock *sk, | ||
407 | struct netlink_callback *cb) | ||
408 | { | ||
409 | struct tcpdiagreq *r = NLMSG_DATA(cb->nlh); | ||
410 | |||
411 | if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) { | ||
412 | struct tcpdiag_entry entry; | ||
413 | struct rtattr *bc = (struct rtattr *)(r + 1); | ||
414 | struct inet_sock *inet = inet_sk(sk); | ||
415 | |||
416 | entry.family = sk->sk_family; | ||
417 | #ifdef CONFIG_IP_TCPDIAG_IPV6 | ||
418 | if (entry.family == AF_INET6) { | ||
419 | struct ipv6_pinfo *np = inet6_sk(sk); | ||
420 | |||
421 | entry.saddr = np->rcv_saddr.s6_addr32; | ||
422 | entry.daddr = np->daddr.s6_addr32; | ||
423 | } else | ||
424 | #endif | ||
425 | { | ||
426 | entry.saddr = &inet->rcv_saddr; | ||
427 | entry.daddr = &inet->daddr; | ||
428 | } | ||
429 | entry.sport = inet->num; | ||
430 | entry.dport = ntohs(inet->dport); | ||
431 | entry.userlocks = sk->sk_userlocks; | ||
432 | |||
433 | if (!tcpdiag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), &entry)) | ||
434 | return 0; | ||
435 | } | ||
436 | |||
437 | return tcpdiag_fill(skb, sk, r->tcpdiag_ext, NETLINK_CB(cb->skb).pid, | ||
438 | cb->nlh->nlmsg_seq, NLM_F_MULTI); | ||
439 | } | 33 | } |
440 | 34 | ||
441 | static int tcpdiag_fill_req(struct sk_buff *skb, struct sock *sk, | 35 | static struct inet_diag_handler tcp_diag_handler = { |
442 | struct request_sock *req, | 36 | .idiag_hashinfo = &tcp_hashinfo, |
443 | u32 pid, u32 seq) | 37 | .idiag_get_info = tcp_diag_get_info, |
444 | { | 38 | .idiag_type = TCPDIAG_GETSOCK, |
445 | const struct inet_request_sock *ireq = inet_rsk(req); | 39 | .idiag_info_size = sizeof(struct tcp_info), |
446 | struct inet_sock *inet = inet_sk(sk); | 40 | }; |
447 | unsigned char *b = skb->tail; | ||
448 | struct tcpdiagmsg *r; | ||
449 | struct nlmsghdr *nlh; | ||
450 | long tmo; | ||
451 | |||
452 | nlh = NLMSG_PUT(skb, pid, seq, TCPDIAG_GETSOCK, sizeof(*r)); | ||
453 | nlh->nlmsg_flags = NLM_F_MULTI; | ||
454 | r = NLMSG_DATA(nlh); | ||
455 | |||
456 | r->tcpdiag_family = sk->sk_family; | ||
457 | r->tcpdiag_state = TCP_SYN_RECV; | ||
458 | r->tcpdiag_timer = 1; | ||
459 | r->tcpdiag_retrans = req->retrans; | ||
460 | |||
461 | r->id.tcpdiag_if = sk->sk_bound_dev_if; | ||
462 | r->id.tcpdiag_cookie[0] = (u32)(unsigned long)req; | ||
463 | r->id.tcpdiag_cookie[1] = (u32)(((unsigned long)req >> 31) >> 1); | ||
464 | |||
465 | tmo = req->expires - jiffies; | ||
466 | if (tmo < 0) | ||
467 | tmo = 0; | ||
468 | |||
469 | r->id.tcpdiag_sport = inet->sport; | ||
470 | r->id.tcpdiag_dport = ireq->rmt_port; | ||
471 | r->id.tcpdiag_src[0] = ireq->loc_addr; | ||
472 | r->id.tcpdiag_dst[0] = ireq->rmt_addr; | ||
473 | r->tcpdiag_expires = jiffies_to_msecs(tmo), | ||
474 | r->tcpdiag_rqueue = 0; | ||
475 | r->tcpdiag_wqueue = 0; | ||
476 | r->tcpdiag_uid = sock_i_uid(sk); | ||
477 | r->tcpdiag_inode = 0; | ||
478 | #ifdef CONFIG_IP_TCPDIAG_IPV6 | ||
479 | if (r->tcpdiag_family == AF_INET6) { | ||
480 | ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_src, | ||
481 | &tcp6_rsk(req)->loc_addr); | ||
482 | ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_dst, | ||
483 | &tcp6_rsk(req)->rmt_addr); | ||
484 | } | ||
485 | #endif | ||
486 | nlh->nlmsg_len = skb->tail - b; | ||
487 | |||
488 | return skb->len; | ||
489 | |||
490 | nlmsg_failure: | ||
491 | skb_trim(skb, b - skb->data); | ||
492 | return -1; | ||
493 | } | ||
494 | |||
495 | static int tcpdiag_dump_reqs(struct sk_buff *skb, struct sock *sk, | ||
496 | struct netlink_callback *cb) | ||
497 | { | ||
498 | struct tcpdiag_entry entry; | ||
499 | struct tcpdiagreq *r = NLMSG_DATA(cb->nlh); | ||
500 | struct tcp_sock *tp = tcp_sk(sk); | ||
501 | struct listen_sock *lopt; | ||
502 | struct rtattr *bc = NULL; | ||
503 | struct inet_sock *inet = inet_sk(sk); | ||
504 | int j, s_j; | ||
505 | int reqnum, s_reqnum; | ||
506 | int err = 0; | ||
507 | |||
508 | s_j = cb->args[3]; | ||
509 | s_reqnum = cb->args[4]; | ||
510 | |||
511 | if (s_j > 0) | ||
512 | s_j--; | ||
513 | |||
514 | entry.family = sk->sk_family; | ||
515 | |||
516 | read_lock_bh(&tp->accept_queue.syn_wait_lock); | ||
517 | |||
518 | lopt = tp->accept_queue.listen_opt; | ||
519 | if (!lopt || !lopt->qlen) | ||
520 | goto out; | ||
521 | |||
522 | if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) { | ||
523 | bc = (struct rtattr *)(r + 1); | ||
524 | entry.sport = inet->num; | ||
525 | entry.userlocks = sk->sk_userlocks; | ||
526 | } | ||
527 | |||
528 | for (j = s_j; j < TCP_SYNQ_HSIZE; j++) { | ||
529 | struct request_sock *req, *head = lopt->syn_table[j]; | ||
530 | |||
531 | reqnum = 0; | ||
532 | for (req = head; req; reqnum++, req = req->dl_next) { | ||
533 | struct inet_request_sock *ireq = inet_rsk(req); | ||
534 | |||
535 | if (reqnum < s_reqnum) | ||
536 | continue; | ||
537 | if (r->id.tcpdiag_dport != ireq->rmt_port && | ||
538 | r->id.tcpdiag_dport) | ||
539 | continue; | ||
540 | |||
541 | if (bc) { | ||
542 | entry.saddr = | ||
543 | #ifdef CONFIG_IP_TCPDIAG_IPV6 | ||
544 | (entry.family == AF_INET6) ? | ||
545 | tcp6_rsk(req)->loc_addr.s6_addr32 : | ||
546 | #endif | ||
547 | &ireq->loc_addr; | ||
548 | entry.daddr = | ||
549 | #ifdef CONFIG_IP_TCPDIAG_IPV6 | ||
550 | (entry.family == AF_INET6) ? | ||
551 | tcp6_rsk(req)->rmt_addr.s6_addr32 : | ||
552 | #endif | ||
553 | &ireq->rmt_addr; | ||
554 | entry.dport = ntohs(ireq->rmt_port); | ||
555 | |||
556 | if (!tcpdiag_bc_run(RTA_DATA(bc), | ||
557 | RTA_PAYLOAD(bc), &entry)) | ||
558 | continue; | ||
559 | } | ||
560 | |||
561 | err = tcpdiag_fill_req(skb, sk, req, | ||
562 | NETLINK_CB(cb->skb).pid, | ||
563 | cb->nlh->nlmsg_seq); | ||
564 | if (err < 0) { | ||
565 | cb->args[3] = j + 1; | ||
566 | cb->args[4] = reqnum; | ||
567 | goto out; | ||
568 | } | ||
569 | } | ||
570 | |||
571 | s_reqnum = 0; | ||
572 | } | ||
573 | |||
574 | out: | ||
575 | read_unlock_bh(&tp->accept_queue.syn_wait_lock); | ||
576 | |||
577 | return err; | ||
578 | } | ||
579 | |||
580 | static int tcpdiag_dump(struct sk_buff *skb, struct netlink_callback *cb) | ||
581 | { | ||
582 | int i, num; | ||
583 | int s_i, s_num; | ||
584 | struct tcpdiagreq *r = NLMSG_DATA(cb->nlh); | ||
585 | |||
586 | s_i = cb->args[1]; | ||
587 | s_num = num = cb->args[2]; | ||
588 | |||
589 | if (cb->args[0] == 0) { | ||
590 | if (!(r->tcpdiag_states&(TCPF_LISTEN|TCPF_SYN_RECV))) | ||
591 | goto skip_listen_ht; | ||
592 | tcp_listen_lock(); | ||
593 | for (i = s_i; i < TCP_LHTABLE_SIZE; i++) { | ||
594 | struct sock *sk; | ||
595 | struct hlist_node *node; | ||
596 | |||
597 | num = 0; | ||
598 | sk_for_each(sk, node, &tcp_listening_hash[i]) { | ||
599 | struct inet_sock *inet = inet_sk(sk); | ||
600 | |||
601 | if (num < s_num) { | ||
602 | num++; | ||
603 | continue; | ||
604 | } | ||
605 | |||
606 | if (r->id.tcpdiag_sport != inet->sport && | ||
607 | r->id.tcpdiag_sport) | ||
608 | goto next_listen; | ||
609 | |||
610 | if (!(r->tcpdiag_states&TCPF_LISTEN) || | ||
611 | r->id.tcpdiag_dport || | ||
612 | cb->args[3] > 0) | ||
613 | goto syn_recv; | ||
614 | |||
615 | if (tcpdiag_dump_sock(skb, sk, cb) < 0) { | ||
616 | tcp_listen_unlock(); | ||
617 | goto done; | ||
618 | } | ||
619 | |||
620 | syn_recv: | ||
621 | if (!(r->tcpdiag_states&TCPF_SYN_RECV)) | ||
622 | goto next_listen; | ||
623 | |||
624 | if (tcpdiag_dump_reqs(skb, sk, cb) < 0) { | ||
625 | tcp_listen_unlock(); | ||
626 | goto done; | ||
627 | } | ||
628 | |||
629 | next_listen: | ||
630 | cb->args[3] = 0; | ||
631 | cb->args[4] = 0; | ||
632 | ++num; | ||
633 | } | ||
634 | |||
635 | s_num = 0; | ||
636 | cb->args[3] = 0; | ||
637 | cb->args[4] = 0; | ||
638 | } | ||
639 | tcp_listen_unlock(); | ||
640 | skip_listen_ht: | ||
641 | cb->args[0] = 1; | ||
642 | s_i = num = s_num = 0; | ||
643 | } | ||
644 | |||
645 | if (!(r->tcpdiag_states&~(TCPF_LISTEN|TCPF_SYN_RECV))) | ||
646 | return skb->len; | ||
647 | |||
648 | for (i = s_i; i < tcp_ehash_size; i++) { | ||
649 | struct tcp_ehash_bucket *head = &tcp_ehash[i]; | ||
650 | struct sock *sk; | ||
651 | struct hlist_node *node; | ||
652 | |||
653 | if (i > s_i) | ||
654 | s_num = 0; | ||
655 | |||
656 | read_lock_bh(&head->lock); | ||
657 | |||
658 | num = 0; | ||
659 | sk_for_each(sk, node, &head->chain) { | ||
660 | struct inet_sock *inet = inet_sk(sk); | ||
661 | |||
662 | if (num < s_num) | ||
663 | goto next_normal; | ||
664 | if (!(r->tcpdiag_states & (1 << sk->sk_state))) | ||
665 | goto next_normal; | ||
666 | if (r->id.tcpdiag_sport != inet->sport && | ||
667 | r->id.tcpdiag_sport) | ||
668 | goto next_normal; | ||
669 | if (r->id.tcpdiag_dport != inet->dport && r->id.tcpdiag_dport) | ||
670 | goto next_normal; | ||
671 | if (tcpdiag_dump_sock(skb, sk, cb) < 0) { | ||
672 | read_unlock_bh(&head->lock); | ||
673 | goto done; | ||
674 | } | ||
675 | next_normal: | ||
676 | ++num; | ||
677 | } | ||
678 | |||
679 | if (r->tcpdiag_states&TCPF_TIME_WAIT) { | ||
680 | sk_for_each(sk, node, | ||
681 | &tcp_ehash[i + tcp_ehash_size].chain) { | ||
682 | struct inet_sock *inet = inet_sk(sk); | ||
683 | |||
684 | if (num < s_num) | ||
685 | goto next_dying; | ||
686 | if (r->id.tcpdiag_sport != inet->sport && | ||
687 | r->id.tcpdiag_sport) | ||
688 | goto next_dying; | ||
689 | if (r->id.tcpdiag_dport != inet->dport && | ||
690 | r->id.tcpdiag_dport) | ||
691 | goto next_dying; | ||
692 | if (tcpdiag_dump_sock(skb, sk, cb) < 0) { | ||
693 | read_unlock_bh(&head->lock); | ||
694 | goto done; | ||
695 | } | ||
696 | next_dying: | ||
697 | ++num; | ||
698 | } | ||
699 | } | ||
700 | read_unlock_bh(&head->lock); | ||
701 | } | ||
702 | |||
703 | done: | ||
704 | cb->args[1] = i; | ||
705 | cb->args[2] = num; | ||
706 | return skb->len; | ||
707 | } | ||
708 | |||
709 | static int tcpdiag_dump_done(struct netlink_callback *cb) | ||
710 | { | ||
711 | return 0; | ||
712 | } | ||
713 | |||
714 | |||
715 | static __inline__ int | ||
716 | tcpdiag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | ||
717 | { | ||
718 | if (!(nlh->nlmsg_flags&NLM_F_REQUEST)) | ||
719 | return 0; | ||
720 | |||
721 | if (nlh->nlmsg_type != TCPDIAG_GETSOCK) | ||
722 | goto err_inval; | ||
723 | |||
724 | if (NLMSG_LENGTH(sizeof(struct tcpdiagreq)) > skb->len) | ||
725 | goto err_inval; | ||
726 | |||
727 | if (nlh->nlmsg_flags&NLM_F_DUMP) { | ||
728 | if (nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(struct tcpdiagreq))) { | ||
729 | struct rtattr *rta = (struct rtattr*)(NLMSG_DATA(nlh) + sizeof(struct tcpdiagreq)); | ||
730 | if (rta->rta_type != TCPDIAG_REQ_BYTECODE || | ||
731 | rta->rta_len < 8 || | ||
732 | rta->rta_len > nlh->nlmsg_len - NLMSG_SPACE(sizeof(struct tcpdiagreq))) | ||
733 | goto err_inval; | ||
734 | if (tcpdiag_bc_audit(RTA_DATA(rta), RTA_PAYLOAD(rta))) | ||
735 | goto err_inval; | ||
736 | } | ||
737 | return netlink_dump_start(tcpnl, skb, nlh, | ||
738 | tcpdiag_dump, | ||
739 | tcpdiag_dump_done); | ||
740 | } else { | ||
741 | return tcpdiag_get_exact(skb, nlh); | ||
742 | } | ||
743 | |||
744 | err_inval: | ||
745 | return -EINVAL; | ||
746 | } | ||
747 | |||
748 | |||
749 | static inline void tcpdiag_rcv_skb(struct sk_buff *skb) | ||
750 | { | ||
751 | int err; | ||
752 | struct nlmsghdr * nlh; | ||
753 | |||
754 | if (skb->len >= NLMSG_SPACE(0)) { | ||
755 | nlh = (struct nlmsghdr *)skb->data; | ||
756 | if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len) | ||
757 | return; | ||
758 | err = tcpdiag_rcv_msg(skb, nlh); | ||
759 | if (err || nlh->nlmsg_flags & NLM_F_ACK) | ||
760 | netlink_ack(skb, nlh, err); | ||
761 | } | ||
762 | } | ||
763 | |||
764 | static void tcpdiag_rcv(struct sock *sk, int len) | ||
765 | { | ||
766 | struct sk_buff *skb; | ||
767 | unsigned int qlen = skb_queue_len(&sk->sk_receive_queue); | ||
768 | |||
769 | while (qlen-- && (skb = skb_dequeue(&sk->sk_receive_queue))) { | ||
770 | tcpdiag_rcv_skb(skb); | ||
771 | kfree_skb(skb); | ||
772 | } | ||
773 | } | ||
774 | 41 | ||
775 | static int __init tcpdiag_init(void) | 42 | static int __init tcp_diag_init(void) |
776 | { | 43 | { |
777 | tcpnl = netlink_kernel_create(NETLINK_TCPDIAG, tcpdiag_rcv); | 44 | return inet_diag_register(&tcp_diag_handler); |
778 | if (tcpnl == NULL) | ||
779 | return -ENOMEM; | ||
780 | return 0; | ||
781 | } | 45 | } |
782 | 46 | ||
783 | static void __exit tcpdiag_exit(void) | 47 | static void __exit tcp_diag_exit(void) |
784 | { | 48 | { |
785 | sock_release(tcpnl->sk_socket); | 49 | inet_diag_unregister(&tcp_diag_handler); |
786 | } | 50 | } |
787 | 51 | ||
788 | module_init(tcpdiag_init); | 52 | module_init(tcp_diag_init); |
789 | module_exit(tcpdiag_exit); | 53 | module_exit(tcp_diag_exit); |
790 | MODULE_LICENSE("GPL"); | 54 | MODULE_LICENSE("GPL"); |
diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c index 36c51f8136bf..6acc04bde080 100644 --- a/net/ipv4/tcp_highspeed.c +++ b/net/ipv4/tcp_highspeed.c | |||
@@ -98,9 +98,10 @@ struct hstcp { | |||
98 | u32 ai; | 98 | u32 ai; |
99 | }; | 99 | }; |
100 | 100 | ||
101 | static void hstcp_init(struct tcp_sock *tp) | 101 | static void hstcp_init(struct sock *sk) |
102 | { | 102 | { |
103 | struct hstcp *ca = tcp_ca(tp); | 103 | struct tcp_sock *tp = tcp_sk(sk); |
104 | struct hstcp *ca = inet_csk_ca(sk); | ||
104 | 105 | ||
105 | ca->ai = 0; | 106 | ca->ai = 0; |
106 | 107 | ||
@@ -109,10 +110,11 @@ static void hstcp_init(struct tcp_sock *tp) | |||
109 | tp->snd_cwnd_clamp = min_t(u32, tp->snd_cwnd_clamp, 0xffffffff/128); | 110 | tp->snd_cwnd_clamp = min_t(u32, tp->snd_cwnd_clamp, 0xffffffff/128); |
110 | } | 111 | } |
111 | 112 | ||
112 | static void hstcp_cong_avoid(struct tcp_sock *tp, u32 adk, u32 rtt, | 113 | static void hstcp_cong_avoid(struct sock *sk, u32 adk, u32 rtt, |
113 | u32 in_flight, int good) | 114 | u32 in_flight, int good) |
114 | { | 115 | { |
115 | struct hstcp *ca = tcp_ca(tp); | 116 | struct tcp_sock *tp = tcp_sk(sk); |
117 | struct hstcp *ca = inet_csk_ca(sk); | ||
116 | 118 | ||
117 | if (in_flight < tp->snd_cwnd) | 119 | if (in_flight < tp->snd_cwnd) |
118 | return; | 120 | return; |
@@ -143,9 +145,10 @@ static void hstcp_cong_avoid(struct tcp_sock *tp, u32 adk, u32 rtt, | |||
143 | } | 145 | } |
144 | } | 146 | } |
145 | 147 | ||
146 | static u32 hstcp_ssthresh(struct tcp_sock *tp) | 148 | static u32 hstcp_ssthresh(struct sock *sk) |
147 | { | 149 | { |
148 | struct hstcp *ca = tcp_ca(tp); | 150 | const struct tcp_sock *tp = tcp_sk(sk); |
151 | const struct hstcp *ca = inet_csk_ca(sk); | ||
149 | 152 | ||
150 | /* Do multiplicative decrease */ | 153 | /* Do multiplicative decrease */ |
151 | return max(tp->snd_cwnd - ((tp->snd_cwnd * hstcp_aimd_vals[ca->ai].md) >> 8), 2U); | 154 | return max(tp->snd_cwnd - ((tp->snd_cwnd * hstcp_aimd_vals[ca->ai].md) >> 8), 2U); |
@@ -164,7 +167,7 @@ static struct tcp_congestion_ops tcp_highspeed = { | |||
164 | 167 | ||
165 | static int __init hstcp_register(void) | 168 | static int __init hstcp_register(void) |
166 | { | 169 | { |
167 | BUG_ON(sizeof(struct hstcp) > TCP_CA_PRIV_SIZE); | 170 | BUG_ON(sizeof(struct hstcp) > ICSK_CA_PRIV_SIZE); |
168 | return tcp_register_congestion_control(&tcp_highspeed); | 171 | return tcp_register_congestion_control(&tcp_highspeed); |
169 | } | 172 | } |
170 | 173 | ||
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c index 40168275acf9..e47b37984e95 100644 --- a/net/ipv4/tcp_htcp.c +++ b/net/ipv4/tcp_htcp.c | |||
@@ -55,18 +55,21 @@ static inline void htcp_reset(struct htcp *ca) | |||
55 | ca->snd_cwnd_cnt2 = 0; | 55 | ca->snd_cwnd_cnt2 = 0; |
56 | } | 56 | } |
57 | 57 | ||
58 | static u32 htcp_cwnd_undo(struct tcp_sock *tp) | 58 | static u32 htcp_cwnd_undo(struct sock *sk) |
59 | { | 59 | { |
60 | struct htcp *ca = tcp_ca(tp); | 60 | const struct tcp_sock *tp = tcp_sk(sk); |
61 | struct htcp *ca = inet_csk_ca(sk); | ||
61 | ca->ccount = ca->undo_ccount; | 62 | ca->ccount = ca->undo_ccount; |
62 | ca->maxRTT = ca->undo_maxRTT; | 63 | ca->maxRTT = ca->undo_maxRTT; |
63 | ca->old_maxB = ca->undo_old_maxB; | 64 | ca->old_maxB = ca->undo_old_maxB; |
64 | return max(tp->snd_cwnd, (tp->snd_ssthresh<<7)/ca->beta); | 65 | return max(tp->snd_cwnd, (tp->snd_ssthresh<<7)/ca->beta); |
65 | } | 66 | } |
66 | 67 | ||
67 | static inline void measure_rtt(struct tcp_sock *tp) | 68 | static inline void measure_rtt(struct sock *sk) |
68 | { | 69 | { |
69 | struct htcp *ca = tcp_ca(tp); | 70 | const struct inet_connection_sock *icsk = inet_csk(sk); |
71 | const struct tcp_sock *tp = tcp_sk(sk); | ||
72 | struct htcp *ca = inet_csk_ca(sk); | ||
70 | u32 srtt = tp->srtt>>3; | 73 | u32 srtt = tp->srtt>>3; |
71 | 74 | ||
72 | /* keep track of minimum RTT seen so far, minRTT is zero at first */ | 75 | /* keep track of minimum RTT seen so far, minRTT is zero at first */ |
@@ -74,7 +77,7 @@ static inline void measure_rtt(struct tcp_sock *tp) | |||
74 | ca->minRTT = srtt; | 77 | ca->minRTT = srtt; |
75 | 78 | ||
76 | /* max RTT */ | 79 | /* max RTT */ |
77 | if (tp->ca_state == TCP_CA_Open && tp->snd_ssthresh < 0xFFFF && ca->ccount > 3) { | 80 | if (icsk->icsk_ca_state == TCP_CA_Open && tp->snd_ssthresh < 0xFFFF && ca->ccount > 3) { |
78 | if (ca->maxRTT < ca->minRTT) | 81 | if (ca->maxRTT < ca->minRTT) |
79 | ca->maxRTT = ca->minRTT; | 82 | ca->maxRTT = ca->minRTT; |
80 | if (ca->maxRTT < srtt && srtt <= ca->maxRTT+HZ/50) | 83 | if (ca->maxRTT < srtt && srtt <= ca->maxRTT+HZ/50) |
@@ -82,13 +85,16 @@ static inline void measure_rtt(struct tcp_sock *tp) | |||
82 | } | 85 | } |
83 | } | 86 | } |
84 | 87 | ||
85 | static void measure_achieved_throughput(struct tcp_sock *tp, u32 pkts_acked) | 88 | static void measure_achieved_throughput(struct sock *sk, u32 pkts_acked) |
86 | { | 89 | { |
87 | struct htcp *ca = tcp_ca(tp); | 90 | const struct inet_connection_sock *icsk = inet_csk(sk); |
91 | const struct tcp_sock *tp = tcp_sk(sk); | ||
92 | struct htcp *ca = inet_csk_ca(sk); | ||
88 | u32 now = tcp_time_stamp; | 93 | u32 now = tcp_time_stamp; |
89 | 94 | ||
90 | /* achieved throughput calculations */ | 95 | /* achieved throughput calculations */ |
91 | if (tp->ca_state != TCP_CA_Open && tp->ca_state != TCP_CA_Disorder) { | 96 | if (icsk->icsk_ca_state != TCP_CA_Open && |
97 | icsk->icsk_ca_state != TCP_CA_Disorder) { | ||
92 | ca->packetcount = 0; | 98 | ca->packetcount = 0; |
93 | ca->lasttime = now; | 99 | ca->lasttime = now; |
94 | return; | 100 | return; |
@@ -173,9 +179,9 @@ static inline void htcp_alpha_update(struct htcp *ca) | |||
173 | * that point do we really have a real sense of maxRTT (the queues en route | 179 | * that point do we really have a real sense of maxRTT (the queues en route |
174 | * were getting just too full now). | 180 | * were getting just too full now). |
175 | */ | 181 | */ |
176 | static void htcp_param_update(struct tcp_sock *tp) | 182 | static void htcp_param_update(struct sock *sk) |
177 | { | 183 | { |
178 | struct htcp *ca = tcp_ca(tp); | 184 | struct htcp *ca = inet_csk_ca(sk); |
179 | u32 minRTT = ca->minRTT; | 185 | u32 minRTT = ca->minRTT; |
180 | u32 maxRTT = ca->maxRTT; | 186 | u32 maxRTT = ca->maxRTT; |
181 | 187 | ||
@@ -187,17 +193,19 @@ static void htcp_param_update(struct tcp_sock *tp) | |||
187 | ca->maxRTT = minRTT + ((maxRTT-minRTT)*95)/100; | 193 | ca->maxRTT = minRTT + ((maxRTT-minRTT)*95)/100; |
188 | } | 194 | } |
189 | 195 | ||
190 | static u32 htcp_recalc_ssthresh(struct tcp_sock *tp) | 196 | static u32 htcp_recalc_ssthresh(struct sock *sk) |
191 | { | 197 | { |
192 | struct htcp *ca = tcp_ca(tp); | 198 | const struct tcp_sock *tp = tcp_sk(sk); |
193 | htcp_param_update(tp); | 199 | const struct htcp *ca = inet_csk_ca(sk); |
200 | htcp_param_update(sk); | ||
194 | return max((tp->snd_cwnd * ca->beta) >> 7, 2U); | 201 | return max((tp->snd_cwnd * ca->beta) >> 7, 2U); |
195 | } | 202 | } |
196 | 203 | ||
197 | static void htcp_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, | 204 | static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt, |
198 | u32 in_flight, int data_acked) | 205 | u32 in_flight, int data_acked) |
199 | { | 206 | { |
200 | struct htcp *ca = tcp_ca(tp); | 207 | struct tcp_sock *tp = tcp_sk(sk); |
208 | struct htcp *ca = inet_csk_ca(sk); | ||
201 | 209 | ||
202 | if (in_flight < tp->snd_cwnd) | 210 | if (in_flight < tp->snd_cwnd) |
203 | return; | 211 | return; |
@@ -207,7 +215,7 @@ static void htcp_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, | |||
207 | if (tp->snd_cwnd < tp->snd_cwnd_clamp) | 215 | if (tp->snd_cwnd < tp->snd_cwnd_clamp) |
208 | tp->snd_cwnd++; | 216 | tp->snd_cwnd++; |
209 | } else { | 217 | } else { |
210 | measure_rtt(tp); | 218 | measure_rtt(sk); |
211 | 219 | ||
212 | /* keep track of number of round-trip times since last backoff event */ | 220 | /* keep track of number of round-trip times since last backoff event */ |
213 | if (ca->snd_cwnd_cnt2++ > tp->snd_cwnd) { | 221 | if (ca->snd_cwnd_cnt2++ > tp->snd_cwnd) { |
@@ -229,28 +237,29 @@ static void htcp_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, | |||
229 | } | 237 | } |
230 | 238 | ||
231 | /* Lower bound on congestion window. */ | 239 | /* Lower bound on congestion window. */ |
232 | static u32 htcp_min_cwnd(struct tcp_sock *tp) | 240 | static u32 htcp_min_cwnd(struct sock *sk) |
233 | { | 241 | { |
242 | const struct tcp_sock *tp = tcp_sk(sk); | ||
234 | return tp->snd_ssthresh; | 243 | return tp->snd_ssthresh; |
235 | } | 244 | } |
236 | 245 | ||
237 | 246 | ||
238 | static void htcp_init(struct tcp_sock *tp) | 247 | static void htcp_init(struct sock *sk) |
239 | { | 248 | { |
240 | struct htcp *ca = tcp_ca(tp); | 249 | struct htcp *ca = inet_csk_ca(sk); |
241 | 250 | ||
242 | memset(ca, 0, sizeof(struct htcp)); | 251 | memset(ca, 0, sizeof(struct htcp)); |
243 | ca->alpha = ALPHA_BASE; | 252 | ca->alpha = ALPHA_BASE; |
244 | ca->beta = BETA_MIN; | 253 | ca->beta = BETA_MIN; |
245 | } | 254 | } |
246 | 255 | ||
247 | static void htcp_state(struct tcp_sock *tp, u8 new_state) | 256 | static void htcp_state(struct sock *sk, u8 new_state) |
248 | { | 257 | { |
249 | switch (new_state) { | 258 | switch (new_state) { |
250 | case TCP_CA_CWR: | 259 | case TCP_CA_CWR: |
251 | case TCP_CA_Recovery: | 260 | case TCP_CA_Recovery: |
252 | case TCP_CA_Loss: | 261 | case TCP_CA_Loss: |
253 | htcp_reset(tcp_ca(tp)); | 262 | htcp_reset(inet_csk_ca(sk)); |
254 | break; | 263 | break; |
255 | } | 264 | } |
256 | } | 265 | } |
@@ -269,7 +278,7 @@ static struct tcp_congestion_ops htcp = { | |||
269 | 278 | ||
270 | static int __init htcp_register(void) | 279 | static int __init htcp_register(void) |
271 | { | 280 | { |
272 | BUG_ON(sizeof(struct htcp) > TCP_CA_PRIV_SIZE); | 281 | BUG_ON(sizeof(struct htcp) > ICSK_CA_PRIV_SIZE); |
273 | BUILD_BUG_ON(BETA_MIN >= BETA_MAX); | 282 | BUILD_BUG_ON(BETA_MIN >= BETA_MAX); |
274 | if (!use_bandwidth_switch) | 283 | if (!use_bandwidth_switch) |
275 | htcp.pkts_acked = NULL; | 284 | htcp.pkts_acked = NULL; |
diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c index 13a66342c304..77add63623df 100644 --- a/net/ipv4/tcp_hybla.c +++ b/net/ipv4/tcp_hybla.c | |||
@@ -33,19 +33,20 @@ MODULE_PARM_DESC(rtt0, "reference rout trip time (ms)"); | |||
33 | 33 | ||
34 | 34 | ||
35 | /* This is called to refresh values for hybla parameters */ | 35 | /* This is called to refresh values for hybla parameters */ |
36 | static inline void hybla_recalc_param (struct tcp_sock *tp) | 36 | static inline void hybla_recalc_param (struct sock *sk) |
37 | { | 37 | { |
38 | struct hybla *ca = tcp_ca(tp); | 38 | struct hybla *ca = inet_csk_ca(sk); |
39 | 39 | ||
40 | ca->rho_3ls = max_t(u32, tp->srtt / msecs_to_jiffies(rtt0), 8); | 40 | ca->rho_3ls = max_t(u32, tcp_sk(sk)->srtt / msecs_to_jiffies(rtt0), 8); |
41 | ca->rho = ca->rho_3ls >> 3; | 41 | ca->rho = ca->rho_3ls >> 3; |
42 | ca->rho2_7ls = (ca->rho_3ls * ca->rho_3ls) << 1; | 42 | ca->rho2_7ls = (ca->rho_3ls * ca->rho_3ls) << 1; |
43 | ca->rho2 = ca->rho2_7ls >>7; | 43 | ca->rho2 = ca->rho2_7ls >>7; |
44 | } | 44 | } |
45 | 45 | ||
46 | static void hybla_init(struct tcp_sock *tp) | 46 | static void hybla_init(struct sock *sk) |
47 | { | 47 | { |
48 | struct hybla *ca = tcp_ca(tp); | 48 | struct tcp_sock *tp = tcp_sk(sk); |
49 | struct hybla *ca = inet_csk_ca(sk); | ||
49 | 50 | ||
50 | ca->rho = 0; | 51 | ca->rho = 0; |
51 | ca->rho2 = 0; | 52 | ca->rho2 = 0; |
@@ -57,17 +58,16 @@ static void hybla_init(struct tcp_sock *tp) | |||
57 | tp->snd_cwnd_clamp = 65535; | 58 | tp->snd_cwnd_clamp = 65535; |
58 | 59 | ||
59 | /* 1st Rho measurement based on initial srtt */ | 60 | /* 1st Rho measurement based on initial srtt */ |
60 | hybla_recalc_param(tp); | 61 | hybla_recalc_param(sk); |
61 | 62 | ||
62 | /* set minimum rtt as this is the 1st ever seen */ | 63 | /* set minimum rtt as this is the 1st ever seen */ |
63 | ca->minrtt = tp->srtt; | 64 | ca->minrtt = tp->srtt; |
64 | tp->snd_cwnd = ca->rho; | 65 | tp->snd_cwnd = ca->rho; |
65 | } | 66 | } |
66 | 67 | ||
67 | static void hybla_state(struct tcp_sock *tp, u8 ca_state) | 68 | static void hybla_state(struct sock *sk, u8 ca_state) |
68 | { | 69 | { |
69 | struct hybla *ca = tcp_ca(tp); | 70 | struct hybla *ca = inet_csk_ca(sk); |
70 | |||
71 | ca->hybla_en = (ca_state == TCP_CA_Open); | 71 | ca->hybla_en = (ca_state == TCP_CA_Open); |
72 | } | 72 | } |
73 | 73 | ||
@@ -86,27 +86,28 @@ static inline u32 hybla_fraction(u32 odds) | |||
86 | * o Give cwnd a new value based on the model proposed | 86 | * o Give cwnd a new value based on the model proposed |
87 | * o remember increments <1 | 87 | * o remember increments <1 |
88 | */ | 88 | */ |
89 | static void hybla_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, | 89 | static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 rtt, |
90 | u32 in_flight, int flag) | 90 | u32 in_flight, int flag) |
91 | { | 91 | { |
92 | struct hybla *ca = tcp_ca(tp); | 92 | struct tcp_sock *tp = tcp_sk(sk); |
93 | struct hybla *ca = inet_csk_ca(sk); | ||
93 | u32 increment, odd, rho_fractions; | 94 | u32 increment, odd, rho_fractions; |
94 | int is_slowstart = 0; | 95 | int is_slowstart = 0; |
95 | 96 | ||
96 | /* Recalculate rho only if this srtt is the lowest */ | 97 | /* Recalculate rho only if this srtt is the lowest */ |
97 | if (tp->srtt < ca->minrtt){ | 98 | if (tp->srtt < ca->minrtt){ |
98 | hybla_recalc_param(tp); | 99 | hybla_recalc_param(sk); |
99 | ca->minrtt = tp->srtt; | 100 | ca->minrtt = tp->srtt; |
100 | } | 101 | } |
101 | 102 | ||
102 | if (!ca->hybla_en) | 103 | if (!ca->hybla_en) |
103 | return tcp_reno_cong_avoid(tp, ack, rtt, in_flight, flag); | 104 | return tcp_reno_cong_avoid(sk, ack, rtt, in_flight, flag); |
104 | 105 | ||
105 | if (in_flight < tp->snd_cwnd) | 106 | if (in_flight < tp->snd_cwnd) |
106 | return; | 107 | return; |
107 | 108 | ||
108 | if (ca->rho == 0) | 109 | if (ca->rho == 0) |
109 | hybla_recalc_param(tp); | 110 | hybla_recalc_param(sk); |
110 | 111 | ||
111 | rho_fractions = ca->rho_3ls - (ca->rho << 3); | 112 | rho_fractions = ca->rho_3ls - (ca->rho << 3); |
112 | 113 | ||
@@ -170,7 +171,7 @@ static struct tcp_congestion_ops tcp_hybla = { | |||
170 | 171 | ||
171 | static int __init hybla_register(void) | 172 | static int __init hybla_register(void) |
172 | { | 173 | { |
173 | BUG_ON(sizeof(struct hybla) > TCP_CA_PRIV_SIZE); | 174 | BUG_ON(sizeof(struct hybla) > ICSK_CA_PRIV_SIZE); |
174 | return tcp_register_congestion_control(&tcp_hybla); | 175 | return tcp_register_congestion_control(&tcp_hybla); |
175 | } | 176 | } |
176 | 177 | ||
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 53a8a5399f1e..1afb080bdf0c 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -114,20 +114,21 @@ int sysctl_tcp_moderate_rcvbuf = 1; | |||
114 | /* Adapt the MSS value used to make delayed ack decision to the | 114 | /* Adapt the MSS value used to make delayed ack decision to the |
115 | * real world. | 115 | * real world. |
116 | */ | 116 | */ |
117 | static inline void tcp_measure_rcv_mss(struct tcp_sock *tp, | 117 | static inline void tcp_measure_rcv_mss(struct sock *sk, |
118 | struct sk_buff *skb) | 118 | const struct sk_buff *skb) |
119 | { | 119 | { |
120 | unsigned int len, lss; | 120 | struct inet_connection_sock *icsk = inet_csk(sk); |
121 | const unsigned int lss = icsk->icsk_ack.last_seg_size; | ||
122 | unsigned int len; | ||
121 | 123 | ||
122 | lss = tp->ack.last_seg_size; | 124 | icsk->icsk_ack.last_seg_size = 0; |
123 | tp->ack.last_seg_size = 0; | ||
124 | 125 | ||
125 | /* skb->len may jitter because of SACKs, even if peer | 126 | /* skb->len may jitter because of SACKs, even if peer |
126 | * sends good full-sized frames. | 127 | * sends good full-sized frames. |
127 | */ | 128 | */ |
128 | len = skb->len; | 129 | len = skb->len; |
129 | if (len >= tp->ack.rcv_mss) { | 130 | if (len >= icsk->icsk_ack.rcv_mss) { |
130 | tp->ack.rcv_mss = len; | 131 | icsk->icsk_ack.rcv_mss = len; |
131 | } else { | 132 | } else { |
132 | /* Otherwise, we make more careful check taking into account, | 133 | /* Otherwise, we make more careful check taking into account, |
133 | * that SACKs block is variable. | 134 | * that SACKs block is variable. |
@@ -147,41 +148,44 @@ static inline void tcp_measure_rcv_mss(struct tcp_sock *tp, | |||
147 | * tcp header plus fixed timestamp option length. | 148 | * tcp header plus fixed timestamp option length. |
148 | * Resulting "len" is MSS free of SACK jitter. | 149 | * Resulting "len" is MSS free of SACK jitter. |
149 | */ | 150 | */ |
150 | len -= tp->tcp_header_len; | 151 | len -= tcp_sk(sk)->tcp_header_len; |
151 | tp->ack.last_seg_size = len; | 152 | icsk->icsk_ack.last_seg_size = len; |
152 | if (len == lss) { | 153 | if (len == lss) { |
153 | tp->ack.rcv_mss = len; | 154 | icsk->icsk_ack.rcv_mss = len; |
154 | return; | 155 | return; |
155 | } | 156 | } |
156 | } | 157 | } |
157 | tp->ack.pending |= TCP_ACK_PUSHED; | 158 | icsk->icsk_ack.pending |= ICSK_ACK_PUSHED; |
158 | } | 159 | } |
159 | } | 160 | } |
160 | 161 | ||
161 | static void tcp_incr_quickack(struct tcp_sock *tp) | 162 | static void tcp_incr_quickack(struct sock *sk) |
162 | { | 163 | { |
163 | unsigned quickacks = tp->rcv_wnd/(2*tp->ack.rcv_mss); | 164 | struct inet_connection_sock *icsk = inet_csk(sk); |
165 | unsigned quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss); | ||
164 | 166 | ||
165 | if (quickacks==0) | 167 | if (quickacks==0) |
166 | quickacks=2; | 168 | quickacks=2; |
167 | if (quickacks > tp->ack.quick) | 169 | if (quickacks > icsk->icsk_ack.quick) |
168 | tp->ack.quick = min(quickacks, TCP_MAX_QUICKACKS); | 170 | icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS); |
169 | } | 171 | } |
170 | 172 | ||
171 | void tcp_enter_quickack_mode(struct tcp_sock *tp) | 173 | void tcp_enter_quickack_mode(struct sock *sk) |
172 | { | 174 | { |
173 | tcp_incr_quickack(tp); | 175 | struct inet_connection_sock *icsk = inet_csk(sk); |
174 | tp->ack.pingpong = 0; | 176 | tcp_incr_quickack(sk); |
175 | tp->ack.ato = TCP_ATO_MIN; | 177 | icsk->icsk_ack.pingpong = 0; |
178 | icsk->icsk_ack.ato = TCP_ATO_MIN; | ||
176 | } | 179 | } |
177 | 180 | ||
178 | /* Send ACKs quickly, if "quick" count is not exhausted | 181 | /* Send ACKs quickly, if "quick" count is not exhausted |
179 | * and the session is not interactive. | 182 | * and the session is not interactive. |
180 | */ | 183 | */ |
181 | 184 | ||
182 | static __inline__ int tcp_in_quickack_mode(struct tcp_sock *tp) | 185 | static inline int tcp_in_quickack_mode(const struct sock *sk) |
183 | { | 186 | { |
184 | return (tp->ack.quick && !tp->ack.pingpong); | 187 | const struct inet_connection_sock *icsk = inet_csk(sk); |
188 | return icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong; | ||
185 | } | 189 | } |
186 | 190 | ||
187 | /* Buffer size and advertised window tuning. | 191 | /* Buffer size and advertised window tuning. |
@@ -224,8 +228,8 @@ static void tcp_fixup_sndbuf(struct sock *sk) | |||
224 | */ | 228 | */ |
225 | 229 | ||
226 | /* Slow part of check#2. */ | 230 | /* Slow part of check#2. */ |
227 | static int __tcp_grow_window(struct sock *sk, struct tcp_sock *tp, | 231 | static int __tcp_grow_window(const struct sock *sk, struct tcp_sock *tp, |
228 | struct sk_buff *skb) | 232 | const struct sk_buff *skb) |
229 | { | 233 | { |
230 | /* Optimize this! */ | 234 | /* Optimize this! */ |
231 | int truesize = tcp_win_from_space(skb->truesize)/2; | 235 | int truesize = tcp_win_from_space(skb->truesize)/2; |
@@ -233,7 +237,7 @@ static int __tcp_grow_window(struct sock *sk, struct tcp_sock *tp, | |||
233 | 237 | ||
234 | while (tp->rcv_ssthresh <= window) { | 238 | while (tp->rcv_ssthresh <= window) { |
235 | if (truesize <= skb->len) | 239 | if (truesize <= skb->len) |
236 | return 2*tp->ack.rcv_mss; | 240 | return 2 * inet_csk(sk)->icsk_ack.rcv_mss; |
237 | 241 | ||
238 | truesize >>= 1; | 242 | truesize >>= 1; |
239 | window >>= 1; | 243 | window >>= 1; |
@@ -260,7 +264,7 @@ static inline void tcp_grow_window(struct sock *sk, struct tcp_sock *tp, | |||
260 | 264 | ||
261 | if (incr) { | 265 | if (incr) { |
262 | tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr, tp->window_clamp); | 266 | tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr, tp->window_clamp); |
263 | tp->ack.quick |= 1; | 267 | inet_csk(sk)->icsk_ack.quick |= 1; |
264 | } | 268 | } |
265 | } | 269 | } |
266 | } | 270 | } |
@@ -321,11 +325,12 @@ static void tcp_init_buffer_space(struct sock *sk) | |||
321 | /* 5. Recalculate window clamp after socket hit its memory bounds. */ | 325 | /* 5. Recalculate window clamp after socket hit its memory bounds. */ |
322 | static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp) | 326 | static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp) |
323 | { | 327 | { |
328 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
324 | struct sk_buff *skb; | 329 | struct sk_buff *skb; |
325 | unsigned int app_win = tp->rcv_nxt - tp->copied_seq; | 330 | unsigned int app_win = tp->rcv_nxt - tp->copied_seq; |
326 | int ofo_win = 0; | 331 | int ofo_win = 0; |
327 | 332 | ||
328 | tp->ack.quick = 0; | 333 | icsk->icsk_ack.quick = 0; |
329 | 334 | ||
330 | skb_queue_walk(&tp->out_of_order_queue, skb) { | 335 | skb_queue_walk(&tp->out_of_order_queue, skb) { |
331 | ofo_win += skb->len; | 336 | ofo_win += skb->len; |
@@ -346,8 +351,8 @@ static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp) | |||
346 | app_win += ofo_win; | 351 | app_win += ofo_win; |
347 | if (atomic_read(&sk->sk_rmem_alloc) >= 2 * sk->sk_rcvbuf) | 352 | if (atomic_read(&sk->sk_rmem_alloc) >= 2 * sk->sk_rcvbuf) |
348 | app_win >>= 1; | 353 | app_win >>= 1; |
349 | if (app_win > tp->ack.rcv_mss) | 354 | if (app_win > icsk->icsk_ack.rcv_mss) |
350 | app_win -= tp->ack.rcv_mss; | 355 | app_win -= icsk->icsk_ack.rcv_mss; |
351 | app_win = max(app_win, 2U*tp->advmss); | 356 | app_win = max(app_win, 2U*tp->advmss); |
352 | 357 | ||
353 | if (!ofo_win) | 358 | if (!ofo_win) |
@@ -415,11 +420,12 @@ new_measure: | |||
415 | tp->rcv_rtt_est.time = tcp_time_stamp; | 420 | tp->rcv_rtt_est.time = tcp_time_stamp; |
416 | } | 421 | } |
417 | 422 | ||
418 | static inline void tcp_rcv_rtt_measure_ts(struct tcp_sock *tp, struct sk_buff *skb) | 423 | static inline void tcp_rcv_rtt_measure_ts(struct sock *sk, const struct sk_buff *skb) |
419 | { | 424 | { |
425 | struct tcp_sock *tp = tcp_sk(sk); | ||
420 | if (tp->rx_opt.rcv_tsecr && | 426 | if (tp->rx_opt.rcv_tsecr && |
421 | (TCP_SKB_CB(skb)->end_seq - | 427 | (TCP_SKB_CB(skb)->end_seq - |
422 | TCP_SKB_CB(skb)->seq >= tp->ack.rcv_mss)) | 428 | TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss)) |
423 | tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rx_opt.rcv_tsecr, 0); | 429 | tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rx_opt.rcv_tsecr, 0); |
424 | } | 430 | } |
425 | 431 | ||
@@ -492,41 +498,42 @@ new_measure: | |||
492 | */ | 498 | */ |
493 | static void tcp_event_data_recv(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb) | 499 | static void tcp_event_data_recv(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb) |
494 | { | 500 | { |
501 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
495 | u32 now; | 502 | u32 now; |
496 | 503 | ||
497 | tcp_schedule_ack(tp); | 504 | inet_csk_schedule_ack(sk); |
498 | 505 | ||
499 | tcp_measure_rcv_mss(tp, skb); | 506 | tcp_measure_rcv_mss(sk, skb); |
500 | 507 | ||
501 | tcp_rcv_rtt_measure(tp); | 508 | tcp_rcv_rtt_measure(tp); |
502 | 509 | ||
503 | now = tcp_time_stamp; | 510 | now = tcp_time_stamp; |
504 | 511 | ||
505 | if (!tp->ack.ato) { | 512 | if (!icsk->icsk_ack.ato) { |
506 | /* The _first_ data packet received, initialize | 513 | /* The _first_ data packet received, initialize |
507 | * delayed ACK engine. | 514 | * delayed ACK engine. |
508 | */ | 515 | */ |
509 | tcp_incr_quickack(tp); | 516 | tcp_incr_quickack(sk); |
510 | tp->ack.ato = TCP_ATO_MIN; | 517 | icsk->icsk_ack.ato = TCP_ATO_MIN; |
511 | } else { | 518 | } else { |
512 | int m = now - tp->ack.lrcvtime; | 519 | int m = now - icsk->icsk_ack.lrcvtime; |
513 | 520 | ||
514 | if (m <= TCP_ATO_MIN/2) { | 521 | if (m <= TCP_ATO_MIN/2) { |
515 | /* The fastest case is the first. */ | 522 | /* The fastest case is the first. */ |
516 | tp->ack.ato = (tp->ack.ato>>1) + TCP_ATO_MIN/2; | 523 | icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + TCP_ATO_MIN / 2; |
517 | } else if (m < tp->ack.ato) { | 524 | } else if (m < icsk->icsk_ack.ato) { |
518 | tp->ack.ato = (tp->ack.ato>>1) + m; | 525 | icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + m; |
519 | if (tp->ack.ato > tp->rto) | 526 | if (icsk->icsk_ack.ato > icsk->icsk_rto) |
520 | tp->ack.ato = tp->rto; | 527 | icsk->icsk_ack.ato = icsk->icsk_rto; |
521 | } else if (m > tp->rto) { | 528 | } else if (m > icsk->icsk_rto) { |
522 | /* Too long gap. Apparently sender falled to | 529 | /* Too long gap. Apparently sender falled to |
523 | * restart window, so that we send ACKs quickly. | 530 | * restart window, so that we send ACKs quickly. |
524 | */ | 531 | */ |
525 | tcp_incr_quickack(tp); | 532 | tcp_incr_quickack(sk); |
526 | sk_stream_mem_reclaim(sk); | 533 | sk_stream_mem_reclaim(sk); |
527 | } | 534 | } |
528 | } | 535 | } |
529 | tp->ack.lrcvtime = now; | 536 | icsk->icsk_ack.lrcvtime = now; |
530 | 537 | ||
531 | TCP_ECN_check_ce(tp, skb); | 538 | TCP_ECN_check_ce(tp, skb); |
532 | 539 | ||
@@ -543,8 +550,10 @@ static void tcp_event_data_recv(struct sock *sk, struct tcp_sock *tp, struct sk_ | |||
543 | * To save cycles in the RFC 1323 implementation it was better to break | 550 | * To save cycles in the RFC 1323 implementation it was better to break |
544 | * it up into three procedures. -- erics | 551 | * it up into three procedures. -- erics |
545 | */ | 552 | */ |
546 | static void tcp_rtt_estimator(struct tcp_sock *tp, __u32 mrtt, u32 *usrtt) | 553 | static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt, u32 *usrtt) |
547 | { | 554 | { |
555 | struct tcp_sock *tp = tcp_sk(sk); | ||
556 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
548 | long m = mrtt; /* RTT */ | 557 | long m = mrtt; /* RTT */ |
549 | 558 | ||
550 | /* The following amusing code comes from Jacobson's | 559 | /* The following amusing code comes from Jacobson's |
@@ -604,15 +613,16 @@ static void tcp_rtt_estimator(struct tcp_sock *tp, __u32 mrtt, u32 *usrtt) | |||
604 | tp->rtt_seq = tp->snd_nxt; | 613 | tp->rtt_seq = tp->snd_nxt; |
605 | } | 614 | } |
606 | 615 | ||
607 | if (tp->ca_ops->rtt_sample) | 616 | if (icsk->icsk_ca_ops->rtt_sample) |
608 | tp->ca_ops->rtt_sample(tp, *usrtt); | 617 | icsk->icsk_ca_ops->rtt_sample(sk, *usrtt); |
609 | } | 618 | } |
610 | 619 | ||
611 | /* Calculate rto without backoff. This is the second half of Van Jacobson's | 620 | /* Calculate rto without backoff. This is the second half of Van Jacobson's |
612 | * routine referred to above. | 621 | * routine referred to above. |
613 | */ | 622 | */ |
614 | static inline void tcp_set_rto(struct tcp_sock *tp) | 623 | static inline void tcp_set_rto(struct sock *sk) |
615 | { | 624 | { |
625 | const struct tcp_sock *tp = tcp_sk(sk); | ||
616 | /* Old crap is replaced with new one. 8) | 626 | /* Old crap is replaced with new one. 8) |
617 | * | 627 | * |
618 | * More seriously: | 628 | * More seriously: |
@@ -623,7 +633,7 @@ static inline void tcp_set_rto(struct tcp_sock *tp) | |||
623 | * is invisible. Actually, Linux-2.4 also generates erratic | 633 | * is invisible. Actually, Linux-2.4 also generates erratic |
624 | * ACKs in some curcumstances. | 634 | * ACKs in some curcumstances. |
625 | */ | 635 | */ |
626 | tp->rto = (tp->srtt >> 3) + tp->rttvar; | 636 | inet_csk(sk)->icsk_rto = (tp->srtt >> 3) + tp->rttvar; |
627 | 637 | ||
628 | /* 2. Fixups made earlier cannot be right. | 638 | /* 2. Fixups made earlier cannot be right. |
629 | * If we do not estimate RTO correctly without them, | 639 | * If we do not estimate RTO correctly without them, |
@@ -635,10 +645,10 @@ static inline void tcp_set_rto(struct tcp_sock *tp) | |||
635 | /* NOTE: clamping at TCP_RTO_MIN is not required, current algo | 645 | /* NOTE: clamping at TCP_RTO_MIN is not required, current algo |
636 | * guarantees that rto is higher. | 646 | * guarantees that rto is higher. |
637 | */ | 647 | */ |
638 | static inline void tcp_bound_rto(struct tcp_sock *tp) | 648 | static inline void tcp_bound_rto(struct sock *sk) |
639 | { | 649 | { |
640 | if (tp->rto > TCP_RTO_MAX) | 650 | if (inet_csk(sk)->icsk_rto > TCP_RTO_MAX) |
641 | tp->rto = TCP_RTO_MAX; | 651 | inet_csk(sk)->icsk_rto = TCP_RTO_MAX; |
642 | } | 652 | } |
643 | 653 | ||
644 | /* Save metrics learned by this TCP session. | 654 | /* Save metrics learned by this TCP session. |
@@ -656,9 +666,10 @@ void tcp_update_metrics(struct sock *sk) | |||
656 | dst_confirm(dst); | 666 | dst_confirm(dst); |
657 | 667 | ||
658 | if (dst && (dst->flags&DST_HOST)) { | 668 | if (dst && (dst->flags&DST_HOST)) { |
669 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
659 | int m; | 670 | int m; |
660 | 671 | ||
661 | if (tp->backoff || !tp->srtt) { | 672 | if (icsk->icsk_backoff || !tp->srtt) { |
662 | /* This session failed to estimate rtt. Why? | 673 | /* This session failed to estimate rtt. Why? |
663 | * Probably, no packets returned in time. | 674 | * Probably, no packets returned in time. |
664 | * Reset our results. | 675 | * Reset our results. |
@@ -707,7 +718,7 @@ void tcp_update_metrics(struct sock *sk) | |||
707 | tp->snd_cwnd > dst_metric(dst, RTAX_CWND)) | 718 | tp->snd_cwnd > dst_metric(dst, RTAX_CWND)) |
708 | dst->metrics[RTAX_CWND-1] = tp->snd_cwnd; | 719 | dst->metrics[RTAX_CWND-1] = tp->snd_cwnd; |
709 | } else if (tp->snd_cwnd > tp->snd_ssthresh && | 720 | } else if (tp->snd_cwnd > tp->snd_ssthresh && |
710 | tp->ca_state == TCP_CA_Open) { | 721 | icsk->icsk_ca_state == TCP_CA_Open) { |
711 | /* Cong. avoidance phase, cwnd is reliable. */ | 722 | /* Cong. avoidance phase, cwnd is reliable. */ |
712 | if (!dst_metric_locked(dst, RTAX_SSTHRESH)) | 723 | if (!dst_metric_locked(dst, RTAX_SSTHRESH)) |
713 | dst->metrics[RTAX_SSTHRESH-1] = | 724 | dst->metrics[RTAX_SSTHRESH-1] = |
@@ -801,9 +812,9 @@ static void tcp_init_metrics(struct sock *sk) | |||
801 | tp->mdev = dst_metric(dst, RTAX_RTTVAR); | 812 | tp->mdev = dst_metric(dst, RTAX_RTTVAR); |
802 | tp->mdev_max = tp->rttvar = max(tp->mdev, TCP_RTO_MIN); | 813 | tp->mdev_max = tp->rttvar = max(tp->mdev, TCP_RTO_MIN); |
803 | } | 814 | } |
804 | tcp_set_rto(tp); | 815 | tcp_set_rto(sk); |
805 | tcp_bound_rto(tp); | 816 | tcp_bound_rto(sk); |
806 | if (tp->rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp) | 817 | if (inet_csk(sk)->icsk_rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp) |
807 | goto reset; | 818 | goto reset; |
808 | tp->snd_cwnd = tcp_init_cwnd(tp, dst); | 819 | tp->snd_cwnd = tcp_init_cwnd(tp, dst); |
809 | tp->snd_cwnd_stamp = tcp_time_stamp; | 820 | tp->snd_cwnd_stamp = tcp_time_stamp; |
@@ -817,12 +828,14 @@ reset: | |||
817 | if (!tp->rx_opt.saw_tstamp && tp->srtt) { | 828 | if (!tp->rx_opt.saw_tstamp && tp->srtt) { |
818 | tp->srtt = 0; | 829 | tp->srtt = 0; |
819 | tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_INIT; | 830 | tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_INIT; |
820 | tp->rto = TCP_TIMEOUT_INIT; | 831 | inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT; |
821 | } | 832 | } |
822 | } | 833 | } |
823 | 834 | ||
824 | static void tcp_update_reordering(struct tcp_sock *tp, int metric, int ts) | 835 | static void tcp_update_reordering(struct sock *sk, const int metric, |
836 | const int ts) | ||
825 | { | 837 | { |
838 | struct tcp_sock *tp = tcp_sk(sk); | ||
826 | if (metric > tp->reordering) { | 839 | if (metric > tp->reordering) { |
827 | tp->reordering = min(TCP_MAX_REORDERING, metric); | 840 | tp->reordering = min(TCP_MAX_REORDERING, metric); |
828 | 841 | ||
@@ -837,7 +850,7 @@ static void tcp_update_reordering(struct tcp_sock *tp, int metric, int ts) | |||
837 | NET_INC_STATS_BH(LINUX_MIB_TCPSACKREORDER); | 850 | NET_INC_STATS_BH(LINUX_MIB_TCPSACKREORDER); |
838 | #if FASTRETRANS_DEBUG > 1 | 851 | #if FASTRETRANS_DEBUG > 1 |
839 | printk(KERN_DEBUG "Disorder%d %d %u f%u s%u rr%d\n", | 852 | printk(KERN_DEBUG "Disorder%d %d %u f%u s%u rr%d\n", |
840 | tp->rx_opt.sack_ok, tp->ca_state, | 853 | tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state, |
841 | tp->reordering, | 854 | tp->reordering, |
842 | tp->fackets_out, | 855 | tp->fackets_out, |
843 | tp->sacked_out, | 856 | tp->sacked_out, |
@@ -899,6 +912,7 @@ static void tcp_update_reordering(struct tcp_sock *tp, int metric, int ts) | |||
899 | static int | 912 | static int |
900 | tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_una) | 913 | tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_una) |
901 | { | 914 | { |
915 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
902 | struct tcp_sock *tp = tcp_sk(sk); | 916 | struct tcp_sock *tp = tcp_sk(sk); |
903 | unsigned char *ptr = ack_skb->h.raw + TCP_SKB_CB(ack_skb)->sacked; | 917 | unsigned char *ptr = ack_skb->h.raw + TCP_SKB_CB(ack_skb)->sacked; |
904 | struct tcp_sack_block *sp = (struct tcp_sack_block *)(ptr+2); | 918 | struct tcp_sack_block *sp = (struct tcp_sack_block *)(ptr+2); |
@@ -1064,7 +1078,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ | |||
1064 | * we have to account for reordering! Ugly, | 1078 | * we have to account for reordering! Ugly, |
1065 | * but should help. | 1079 | * but should help. |
1066 | */ | 1080 | */ |
1067 | if (lost_retrans && tp->ca_state == TCP_CA_Recovery) { | 1081 | if (lost_retrans && icsk->icsk_ca_state == TCP_CA_Recovery) { |
1068 | struct sk_buff *skb; | 1082 | struct sk_buff *skb; |
1069 | 1083 | ||
1070 | sk_stream_for_retrans_queue(skb, sk) { | 1084 | sk_stream_for_retrans_queue(skb, sk) { |
@@ -1093,8 +1107,8 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ | |||
1093 | 1107 | ||
1094 | tp->left_out = tp->sacked_out + tp->lost_out; | 1108 | tp->left_out = tp->sacked_out + tp->lost_out; |
1095 | 1109 | ||
1096 | if ((reord < tp->fackets_out) && tp->ca_state != TCP_CA_Loss) | 1110 | if ((reord < tp->fackets_out) && icsk->icsk_ca_state != TCP_CA_Loss) |
1097 | tcp_update_reordering(tp, ((tp->fackets_out + 1) - reord), 0); | 1111 | tcp_update_reordering(sk, ((tp->fackets_out + 1) - reord), 0); |
1098 | 1112 | ||
1099 | #if FASTRETRANS_DEBUG > 0 | 1113 | #if FASTRETRANS_DEBUG > 0 |
1100 | BUG_TRAP((int)tp->sacked_out >= 0); | 1114 | BUG_TRAP((int)tp->sacked_out >= 0); |
@@ -1111,17 +1125,18 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ | |||
1111 | */ | 1125 | */ |
1112 | void tcp_enter_frto(struct sock *sk) | 1126 | void tcp_enter_frto(struct sock *sk) |
1113 | { | 1127 | { |
1128 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
1114 | struct tcp_sock *tp = tcp_sk(sk); | 1129 | struct tcp_sock *tp = tcp_sk(sk); |
1115 | struct sk_buff *skb; | 1130 | struct sk_buff *skb; |
1116 | 1131 | ||
1117 | tp->frto_counter = 1; | 1132 | tp->frto_counter = 1; |
1118 | 1133 | ||
1119 | if (tp->ca_state <= TCP_CA_Disorder || | 1134 | if (icsk->icsk_ca_state <= TCP_CA_Disorder || |
1120 | tp->snd_una == tp->high_seq || | 1135 | tp->snd_una == tp->high_seq || |
1121 | (tp->ca_state == TCP_CA_Loss && !tp->retransmits)) { | 1136 | (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) { |
1122 | tp->prior_ssthresh = tcp_current_ssthresh(tp); | 1137 | tp->prior_ssthresh = tcp_current_ssthresh(sk); |
1123 | tp->snd_ssthresh = tp->ca_ops->ssthresh(tp); | 1138 | tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); |
1124 | tcp_ca_event(tp, CA_EVENT_FRTO); | 1139 | tcp_ca_event(sk, CA_EVENT_FRTO); |
1125 | } | 1140 | } |
1126 | 1141 | ||
1127 | /* Have to clear retransmission markers here to keep the bookkeeping | 1142 | /* Have to clear retransmission markers here to keep the bookkeeping |
@@ -1138,7 +1153,7 @@ void tcp_enter_frto(struct sock *sk) | |||
1138 | } | 1153 | } |
1139 | tcp_sync_left_out(tp); | 1154 | tcp_sync_left_out(tp); |
1140 | 1155 | ||
1141 | tcp_set_ca_state(tp, TCP_CA_Open); | 1156 | tcp_set_ca_state(sk, TCP_CA_Open); |
1142 | tp->frto_highmark = tp->snd_nxt; | 1157 | tp->frto_highmark = tp->snd_nxt; |
1143 | } | 1158 | } |
1144 | 1159 | ||
@@ -1184,7 +1199,7 @@ static void tcp_enter_frto_loss(struct sock *sk) | |||
1184 | 1199 | ||
1185 | tp->reordering = min_t(unsigned int, tp->reordering, | 1200 | tp->reordering = min_t(unsigned int, tp->reordering, |
1186 | sysctl_tcp_reordering); | 1201 | sysctl_tcp_reordering); |
1187 | tcp_set_ca_state(tp, TCP_CA_Loss); | 1202 | tcp_set_ca_state(sk, TCP_CA_Loss); |
1188 | tp->high_seq = tp->frto_highmark; | 1203 | tp->high_seq = tp->frto_highmark; |
1189 | TCP_ECN_queue_cwr(tp); | 1204 | TCP_ECN_queue_cwr(tp); |
1190 | } | 1205 | } |
@@ -1208,16 +1223,17 @@ void tcp_clear_retrans(struct tcp_sock *tp) | |||
1208 | */ | 1223 | */ |
1209 | void tcp_enter_loss(struct sock *sk, int how) | 1224 | void tcp_enter_loss(struct sock *sk, int how) |
1210 | { | 1225 | { |
1226 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
1211 | struct tcp_sock *tp = tcp_sk(sk); | 1227 | struct tcp_sock *tp = tcp_sk(sk); |
1212 | struct sk_buff *skb; | 1228 | struct sk_buff *skb; |
1213 | int cnt = 0; | 1229 | int cnt = 0; |
1214 | 1230 | ||
1215 | /* Reduce ssthresh if it has not yet been made inside this window. */ | 1231 | /* Reduce ssthresh if it has not yet been made inside this window. */ |
1216 | if (tp->ca_state <= TCP_CA_Disorder || tp->snd_una == tp->high_seq || | 1232 | if (icsk->icsk_ca_state <= TCP_CA_Disorder || tp->snd_una == tp->high_seq || |
1217 | (tp->ca_state == TCP_CA_Loss && !tp->retransmits)) { | 1233 | (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) { |
1218 | tp->prior_ssthresh = tcp_current_ssthresh(tp); | 1234 | tp->prior_ssthresh = tcp_current_ssthresh(sk); |
1219 | tp->snd_ssthresh = tp->ca_ops->ssthresh(tp); | 1235 | tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); |
1220 | tcp_ca_event(tp, CA_EVENT_LOSS); | 1236 | tcp_ca_event(sk, CA_EVENT_LOSS); |
1221 | } | 1237 | } |
1222 | tp->snd_cwnd = 1; | 1238 | tp->snd_cwnd = 1; |
1223 | tp->snd_cwnd_cnt = 0; | 1239 | tp->snd_cwnd_cnt = 0; |
@@ -1248,12 +1264,12 @@ void tcp_enter_loss(struct sock *sk, int how) | |||
1248 | 1264 | ||
1249 | tp->reordering = min_t(unsigned int, tp->reordering, | 1265 | tp->reordering = min_t(unsigned int, tp->reordering, |
1250 | sysctl_tcp_reordering); | 1266 | sysctl_tcp_reordering); |
1251 | tcp_set_ca_state(tp, TCP_CA_Loss); | 1267 | tcp_set_ca_state(sk, TCP_CA_Loss); |
1252 | tp->high_seq = tp->snd_nxt; | 1268 | tp->high_seq = tp->snd_nxt; |
1253 | TCP_ECN_queue_cwr(tp); | 1269 | TCP_ECN_queue_cwr(tp); |
1254 | } | 1270 | } |
1255 | 1271 | ||
1256 | static int tcp_check_sack_reneging(struct sock *sk, struct tcp_sock *tp) | 1272 | static int tcp_check_sack_reneging(struct sock *sk) |
1257 | { | 1273 | { |
1258 | struct sk_buff *skb; | 1274 | struct sk_buff *skb; |
1259 | 1275 | ||
@@ -1265,12 +1281,14 @@ static int tcp_check_sack_reneging(struct sock *sk, struct tcp_sock *tp) | |||
1265 | */ | 1281 | */ |
1266 | if ((skb = skb_peek(&sk->sk_write_queue)) != NULL && | 1282 | if ((skb = skb_peek(&sk->sk_write_queue)) != NULL && |
1267 | (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) { | 1283 | (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) { |
1284 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
1268 | NET_INC_STATS_BH(LINUX_MIB_TCPSACKRENEGING); | 1285 | NET_INC_STATS_BH(LINUX_MIB_TCPSACKRENEGING); |
1269 | 1286 | ||
1270 | tcp_enter_loss(sk, 1); | 1287 | tcp_enter_loss(sk, 1); |
1271 | tp->retransmits++; | 1288 | icsk->icsk_retransmits++; |
1272 | tcp_retransmit_skb(sk, skb_peek(&sk->sk_write_queue)); | 1289 | tcp_retransmit_skb(sk, skb_peek(&sk->sk_write_queue)); |
1273 | tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); | 1290 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, |
1291 | icsk->icsk_rto, TCP_RTO_MAX); | ||
1274 | return 1; | 1292 | return 1; |
1275 | } | 1293 | } |
1276 | return 0; | 1294 | return 0; |
@@ -1281,15 +1299,15 @@ static inline int tcp_fackets_out(struct tcp_sock *tp) | |||
1281 | return IsReno(tp) ? tp->sacked_out+1 : tp->fackets_out; | 1299 | return IsReno(tp) ? tp->sacked_out+1 : tp->fackets_out; |
1282 | } | 1300 | } |
1283 | 1301 | ||
1284 | static inline int tcp_skb_timedout(struct tcp_sock *tp, struct sk_buff *skb) | 1302 | static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb) |
1285 | { | 1303 | { |
1286 | return (tcp_time_stamp - TCP_SKB_CB(skb)->when > tp->rto); | 1304 | return (tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto); |
1287 | } | 1305 | } |
1288 | 1306 | ||
1289 | static inline int tcp_head_timedout(struct sock *sk, struct tcp_sock *tp) | 1307 | static inline int tcp_head_timedout(struct sock *sk, struct tcp_sock *tp) |
1290 | { | 1308 | { |
1291 | return tp->packets_out && | 1309 | return tp->packets_out && |
1292 | tcp_skb_timedout(tp, skb_peek(&sk->sk_write_queue)); | 1310 | tcp_skb_timedout(sk, skb_peek(&sk->sk_write_queue)); |
1293 | } | 1311 | } |
1294 | 1312 | ||
1295 | /* Linux NewReno/SACK/FACK/ECN state machine. | 1313 | /* Linux NewReno/SACK/FACK/ECN state machine. |
@@ -1423,8 +1441,9 @@ static int tcp_time_to_recover(struct sock *sk, struct tcp_sock *tp) | |||
1423 | * in assumption of absent reordering, interpret this as reordering. | 1441 | * in assumption of absent reordering, interpret this as reordering. |
1424 | * The only another reason could be bug in receiver TCP. | 1442 | * The only another reason could be bug in receiver TCP. |
1425 | */ | 1443 | */ |
1426 | static void tcp_check_reno_reordering(struct tcp_sock *tp, int addend) | 1444 | static void tcp_check_reno_reordering(struct sock *sk, const int addend) |
1427 | { | 1445 | { |
1446 | struct tcp_sock *tp = tcp_sk(sk); | ||
1428 | u32 holes; | 1447 | u32 holes; |
1429 | 1448 | ||
1430 | holes = max(tp->lost_out, 1U); | 1449 | holes = max(tp->lost_out, 1U); |
@@ -1432,16 +1451,17 @@ static void tcp_check_reno_reordering(struct tcp_sock *tp, int addend) | |||
1432 | 1451 | ||
1433 | if ((tp->sacked_out + holes) > tp->packets_out) { | 1452 | if ((tp->sacked_out + holes) > tp->packets_out) { |
1434 | tp->sacked_out = tp->packets_out - holes; | 1453 | tp->sacked_out = tp->packets_out - holes; |
1435 | tcp_update_reordering(tp, tp->packets_out+addend, 0); | 1454 | tcp_update_reordering(sk, tp->packets_out + addend, 0); |
1436 | } | 1455 | } |
1437 | } | 1456 | } |
1438 | 1457 | ||
1439 | /* Emulate SACKs for SACKless connection: account for a new dupack. */ | 1458 | /* Emulate SACKs for SACKless connection: account for a new dupack. */ |
1440 | 1459 | ||
1441 | static void tcp_add_reno_sack(struct tcp_sock *tp) | 1460 | static void tcp_add_reno_sack(struct sock *sk) |
1442 | { | 1461 | { |
1462 | struct tcp_sock *tp = tcp_sk(sk); | ||
1443 | tp->sacked_out++; | 1463 | tp->sacked_out++; |
1444 | tcp_check_reno_reordering(tp, 0); | 1464 | tcp_check_reno_reordering(sk, 0); |
1445 | tcp_sync_left_out(tp); | 1465 | tcp_sync_left_out(tp); |
1446 | } | 1466 | } |
1447 | 1467 | ||
@@ -1456,7 +1476,7 @@ static void tcp_remove_reno_sacks(struct sock *sk, struct tcp_sock *tp, int acke | |||
1456 | else | 1476 | else |
1457 | tp->sacked_out -= acked-1; | 1477 | tp->sacked_out -= acked-1; |
1458 | } | 1478 | } |
1459 | tcp_check_reno_reordering(tp, acked); | 1479 | tcp_check_reno_reordering(sk, acked); |
1460 | tcp_sync_left_out(tp); | 1480 | tcp_sync_left_out(tp); |
1461 | } | 1481 | } |
1462 | 1482 | ||
@@ -1509,7 +1529,7 @@ static void tcp_update_scoreboard(struct sock *sk, struct tcp_sock *tp) | |||
1509 | struct sk_buff *skb; | 1529 | struct sk_buff *skb; |
1510 | 1530 | ||
1511 | sk_stream_for_retrans_queue(skb, sk) { | 1531 | sk_stream_for_retrans_queue(skb, sk) { |
1512 | if (tcp_skb_timedout(tp, skb) && | 1532 | if (tcp_skb_timedout(sk, skb) && |
1513 | !(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) { | 1533 | !(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) { |
1514 | TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; | 1534 | TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; |
1515 | tp->lost_out += tcp_skb_pcount(skb); | 1535 | tp->lost_out += tcp_skb_pcount(skb); |
@@ -1530,14 +1550,16 @@ static inline void tcp_moderate_cwnd(struct tcp_sock *tp) | |||
1530 | } | 1550 | } |
1531 | 1551 | ||
1532 | /* Decrease cwnd each second ack. */ | 1552 | /* Decrease cwnd each second ack. */ |
1533 | static void tcp_cwnd_down(struct tcp_sock *tp) | 1553 | static void tcp_cwnd_down(struct sock *sk) |
1534 | { | 1554 | { |
1555 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
1556 | struct tcp_sock *tp = tcp_sk(sk); | ||
1535 | int decr = tp->snd_cwnd_cnt + 1; | 1557 | int decr = tp->snd_cwnd_cnt + 1; |
1536 | 1558 | ||
1537 | tp->snd_cwnd_cnt = decr&1; | 1559 | tp->snd_cwnd_cnt = decr&1; |
1538 | decr >>= 1; | 1560 | decr >>= 1; |
1539 | 1561 | ||
1540 | if (decr && tp->snd_cwnd > tp->ca_ops->min_cwnd(tp)) | 1562 | if (decr && tp->snd_cwnd > icsk->icsk_ca_ops->min_cwnd(sk)) |
1541 | tp->snd_cwnd -= decr; | 1563 | tp->snd_cwnd -= decr; |
1542 | 1564 | ||
1543 | tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp)+1); | 1565 | tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp)+1); |
@@ -1571,11 +1593,15 @@ static void DBGUNDO(struct sock *sk, struct tcp_sock *tp, const char *msg) | |||
1571 | #define DBGUNDO(x...) do { } while (0) | 1593 | #define DBGUNDO(x...) do { } while (0) |
1572 | #endif | 1594 | #endif |
1573 | 1595 | ||
1574 | static void tcp_undo_cwr(struct tcp_sock *tp, int undo) | 1596 | static void tcp_undo_cwr(struct sock *sk, const int undo) |
1575 | { | 1597 | { |
1598 | struct tcp_sock *tp = tcp_sk(sk); | ||
1599 | |||
1576 | if (tp->prior_ssthresh) { | 1600 | if (tp->prior_ssthresh) { |
1577 | if (tp->ca_ops->undo_cwnd) | 1601 | const struct inet_connection_sock *icsk = inet_csk(sk); |
1578 | tp->snd_cwnd = tp->ca_ops->undo_cwnd(tp); | 1602 | |
1603 | if (icsk->icsk_ca_ops->undo_cwnd) | ||
1604 | tp->snd_cwnd = icsk->icsk_ca_ops->undo_cwnd(sk); | ||
1579 | else | 1605 | else |
1580 | tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh<<1); | 1606 | tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh<<1); |
1581 | 1607 | ||
@@ -1603,9 +1629,9 @@ static int tcp_try_undo_recovery(struct sock *sk, struct tcp_sock *tp) | |||
1603 | /* Happy end! We did not retransmit anything | 1629 | /* Happy end! We did not retransmit anything |
1604 | * or our original transmission succeeded. | 1630 | * or our original transmission succeeded. |
1605 | */ | 1631 | */ |
1606 | DBGUNDO(sk, tp, tp->ca_state == TCP_CA_Loss ? "loss" : "retrans"); | 1632 | DBGUNDO(sk, tp, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans"); |
1607 | tcp_undo_cwr(tp, 1); | 1633 | tcp_undo_cwr(sk, 1); |
1608 | if (tp->ca_state == TCP_CA_Loss) | 1634 | if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss) |
1609 | NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO); | 1635 | NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO); |
1610 | else | 1636 | else |
1611 | NET_INC_STATS_BH(LINUX_MIB_TCPFULLUNDO); | 1637 | NET_INC_STATS_BH(LINUX_MIB_TCPFULLUNDO); |
@@ -1618,7 +1644,7 @@ static int tcp_try_undo_recovery(struct sock *sk, struct tcp_sock *tp) | |||
1618 | tcp_moderate_cwnd(tp); | 1644 | tcp_moderate_cwnd(tp); |
1619 | return 1; | 1645 | return 1; |
1620 | } | 1646 | } |
1621 | tcp_set_ca_state(tp, TCP_CA_Open); | 1647 | tcp_set_ca_state(sk, TCP_CA_Open); |
1622 | return 0; | 1648 | return 0; |
1623 | } | 1649 | } |
1624 | 1650 | ||
@@ -1627,7 +1653,7 @@ static void tcp_try_undo_dsack(struct sock *sk, struct tcp_sock *tp) | |||
1627 | { | 1653 | { |
1628 | if (tp->undo_marker && !tp->undo_retrans) { | 1654 | if (tp->undo_marker && !tp->undo_retrans) { |
1629 | DBGUNDO(sk, tp, "D-SACK"); | 1655 | DBGUNDO(sk, tp, "D-SACK"); |
1630 | tcp_undo_cwr(tp, 1); | 1656 | tcp_undo_cwr(sk, 1); |
1631 | tp->undo_marker = 0; | 1657 | tp->undo_marker = 0; |
1632 | NET_INC_STATS_BH(LINUX_MIB_TCPDSACKUNDO); | 1658 | NET_INC_STATS_BH(LINUX_MIB_TCPDSACKUNDO); |
1633 | } | 1659 | } |
@@ -1648,10 +1674,10 @@ static int tcp_try_undo_partial(struct sock *sk, struct tcp_sock *tp, | |||
1648 | if (tp->retrans_out == 0) | 1674 | if (tp->retrans_out == 0) |
1649 | tp->retrans_stamp = 0; | 1675 | tp->retrans_stamp = 0; |
1650 | 1676 | ||
1651 | tcp_update_reordering(tp, tcp_fackets_out(tp)+acked, 1); | 1677 | tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1); |
1652 | 1678 | ||
1653 | DBGUNDO(sk, tp, "Hoe"); | 1679 | DBGUNDO(sk, tp, "Hoe"); |
1654 | tcp_undo_cwr(tp, 0); | 1680 | tcp_undo_cwr(sk, 0); |
1655 | NET_INC_STATS_BH(LINUX_MIB_TCPPARTIALUNDO); | 1681 | NET_INC_STATS_BH(LINUX_MIB_TCPPARTIALUNDO); |
1656 | 1682 | ||
1657 | /* So... Do not make Hoe's retransmit yet. | 1683 | /* So... Do not make Hoe's retransmit yet. |
@@ -1674,22 +1700,23 @@ static int tcp_try_undo_loss(struct sock *sk, struct tcp_sock *tp) | |||
1674 | DBGUNDO(sk, tp, "partial loss"); | 1700 | DBGUNDO(sk, tp, "partial loss"); |
1675 | tp->lost_out = 0; | 1701 | tp->lost_out = 0; |
1676 | tp->left_out = tp->sacked_out; | 1702 | tp->left_out = tp->sacked_out; |
1677 | tcp_undo_cwr(tp, 1); | 1703 | tcp_undo_cwr(sk, 1); |
1678 | NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO); | 1704 | NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO); |
1679 | tp->retransmits = 0; | 1705 | inet_csk(sk)->icsk_retransmits = 0; |
1680 | tp->undo_marker = 0; | 1706 | tp->undo_marker = 0; |
1681 | if (!IsReno(tp)) | 1707 | if (!IsReno(tp)) |
1682 | tcp_set_ca_state(tp, TCP_CA_Open); | 1708 | tcp_set_ca_state(sk, TCP_CA_Open); |
1683 | return 1; | 1709 | return 1; |
1684 | } | 1710 | } |
1685 | return 0; | 1711 | return 0; |
1686 | } | 1712 | } |
1687 | 1713 | ||
1688 | static inline void tcp_complete_cwr(struct tcp_sock *tp) | 1714 | static inline void tcp_complete_cwr(struct sock *sk) |
1689 | { | 1715 | { |
1716 | struct tcp_sock *tp = tcp_sk(sk); | ||
1690 | tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); | 1717 | tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); |
1691 | tp->snd_cwnd_stamp = tcp_time_stamp; | 1718 | tp->snd_cwnd_stamp = tcp_time_stamp; |
1692 | tcp_ca_event(tp, CA_EVENT_COMPLETE_CWR); | 1719 | tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); |
1693 | } | 1720 | } |
1694 | 1721 | ||
1695 | static void tcp_try_to_open(struct sock *sk, struct tcp_sock *tp, int flag) | 1722 | static void tcp_try_to_open(struct sock *sk, struct tcp_sock *tp, int flag) |
@@ -1700,21 +1727,21 @@ static void tcp_try_to_open(struct sock *sk, struct tcp_sock *tp, int flag) | |||
1700 | tp->retrans_stamp = 0; | 1727 | tp->retrans_stamp = 0; |
1701 | 1728 | ||
1702 | if (flag&FLAG_ECE) | 1729 | if (flag&FLAG_ECE) |
1703 | tcp_enter_cwr(tp); | 1730 | tcp_enter_cwr(sk); |
1704 | 1731 | ||
1705 | if (tp->ca_state != TCP_CA_CWR) { | 1732 | if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) { |
1706 | int state = TCP_CA_Open; | 1733 | int state = TCP_CA_Open; |
1707 | 1734 | ||
1708 | if (tp->left_out || tp->retrans_out || tp->undo_marker) | 1735 | if (tp->left_out || tp->retrans_out || tp->undo_marker) |
1709 | state = TCP_CA_Disorder; | 1736 | state = TCP_CA_Disorder; |
1710 | 1737 | ||
1711 | if (tp->ca_state != state) { | 1738 | if (inet_csk(sk)->icsk_ca_state != state) { |
1712 | tcp_set_ca_state(tp, state); | 1739 | tcp_set_ca_state(sk, state); |
1713 | tp->high_seq = tp->snd_nxt; | 1740 | tp->high_seq = tp->snd_nxt; |
1714 | } | 1741 | } |
1715 | tcp_moderate_cwnd(tp); | 1742 | tcp_moderate_cwnd(tp); |
1716 | } else { | 1743 | } else { |
1717 | tcp_cwnd_down(tp); | 1744 | tcp_cwnd_down(sk); |
1718 | } | 1745 | } |
1719 | } | 1746 | } |
1720 | 1747 | ||
@@ -1733,6 +1760,7 @@ static void | |||
1733 | tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, | 1760 | tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, |
1734 | int prior_packets, int flag) | 1761 | int prior_packets, int flag) |
1735 | { | 1762 | { |
1763 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
1736 | struct tcp_sock *tp = tcp_sk(sk); | 1764 | struct tcp_sock *tp = tcp_sk(sk); |
1737 | int is_dupack = (tp->snd_una == prior_snd_una && !(flag&FLAG_NOT_DUP)); | 1765 | int is_dupack = (tp->snd_una == prior_snd_una && !(flag&FLAG_NOT_DUP)); |
1738 | 1766 | ||
@@ -1750,13 +1778,13 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, | |||
1750 | tp->prior_ssthresh = 0; | 1778 | tp->prior_ssthresh = 0; |
1751 | 1779 | ||
1752 | /* B. In all the states check for reneging SACKs. */ | 1780 | /* B. In all the states check for reneging SACKs. */ |
1753 | if (tp->sacked_out && tcp_check_sack_reneging(sk, tp)) | 1781 | if (tp->sacked_out && tcp_check_sack_reneging(sk)) |
1754 | return; | 1782 | return; |
1755 | 1783 | ||
1756 | /* C. Process data loss notification, provided it is valid. */ | 1784 | /* C. Process data loss notification, provided it is valid. */ |
1757 | if ((flag&FLAG_DATA_LOST) && | 1785 | if ((flag&FLAG_DATA_LOST) && |
1758 | before(tp->snd_una, tp->high_seq) && | 1786 | before(tp->snd_una, tp->high_seq) && |
1759 | tp->ca_state != TCP_CA_Open && | 1787 | icsk->icsk_ca_state != TCP_CA_Open && |
1760 | tp->fackets_out > tp->reordering) { | 1788 | tp->fackets_out > tp->reordering) { |
1761 | tcp_mark_head_lost(sk, tp, tp->fackets_out-tp->reordering, tp->high_seq); | 1789 | tcp_mark_head_lost(sk, tp, tp->fackets_out-tp->reordering, tp->high_seq); |
1762 | NET_INC_STATS_BH(LINUX_MIB_TCPLOSS); | 1790 | NET_INC_STATS_BH(LINUX_MIB_TCPLOSS); |
@@ -1767,14 +1795,14 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, | |||
1767 | 1795 | ||
1768 | /* E. Check state exit conditions. State can be terminated | 1796 | /* E. Check state exit conditions. State can be terminated |
1769 | * when high_seq is ACKed. */ | 1797 | * when high_seq is ACKed. */ |
1770 | if (tp->ca_state == TCP_CA_Open) { | 1798 | if (icsk->icsk_ca_state == TCP_CA_Open) { |
1771 | if (!sysctl_tcp_frto) | 1799 | if (!sysctl_tcp_frto) |
1772 | BUG_TRAP(tp->retrans_out == 0); | 1800 | BUG_TRAP(tp->retrans_out == 0); |
1773 | tp->retrans_stamp = 0; | 1801 | tp->retrans_stamp = 0; |
1774 | } else if (!before(tp->snd_una, tp->high_seq)) { | 1802 | } else if (!before(tp->snd_una, tp->high_seq)) { |
1775 | switch (tp->ca_state) { | 1803 | switch (icsk->icsk_ca_state) { |
1776 | case TCP_CA_Loss: | 1804 | case TCP_CA_Loss: |
1777 | tp->retransmits = 0; | 1805 | icsk->icsk_retransmits = 0; |
1778 | if (tcp_try_undo_recovery(sk, tp)) | 1806 | if (tcp_try_undo_recovery(sk, tp)) |
1779 | return; | 1807 | return; |
1780 | break; | 1808 | break; |
@@ -1783,8 +1811,8 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, | |||
1783 | /* CWR is to be held something *above* high_seq | 1811 | /* CWR is to be held something *above* high_seq |
1784 | * is ACKed for CWR bit to reach receiver. */ | 1812 | * is ACKed for CWR bit to reach receiver. */ |
1785 | if (tp->snd_una != tp->high_seq) { | 1813 | if (tp->snd_una != tp->high_seq) { |
1786 | tcp_complete_cwr(tp); | 1814 | tcp_complete_cwr(sk); |
1787 | tcp_set_ca_state(tp, TCP_CA_Open); | 1815 | tcp_set_ca_state(sk, TCP_CA_Open); |
1788 | } | 1816 | } |
1789 | break; | 1817 | break; |
1790 | 1818 | ||
@@ -1795,7 +1823,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, | |||
1795 | * catching for all duplicate ACKs. */ | 1823 | * catching for all duplicate ACKs. */ |
1796 | IsReno(tp) || tp->snd_una != tp->high_seq) { | 1824 | IsReno(tp) || tp->snd_una != tp->high_seq) { |
1797 | tp->undo_marker = 0; | 1825 | tp->undo_marker = 0; |
1798 | tcp_set_ca_state(tp, TCP_CA_Open); | 1826 | tcp_set_ca_state(sk, TCP_CA_Open); |
1799 | } | 1827 | } |
1800 | break; | 1828 | break; |
1801 | 1829 | ||
@@ -1804,17 +1832,17 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, | |||
1804 | tcp_reset_reno_sack(tp); | 1832 | tcp_reset_reno_sack(tp); |
1805 | if (tcp_try_undo_recovery(sk, tp)) | 1833 | if (tcp_try_undo_recovery(sk, tp)) |
1806 | return; | 1834 | return; |
1807 | tcp_complete_cwr(tp); | 1835 | tcp_complete_cwr(sk); |
1808 | break; | 1836 | break; |
1809 | } | 1837 | } |
1810 | } | 1838 | } |
1811 | 1839 | ||
1812 | /* F. Process state. */ | 1840 | /* F. Process state. */ |
1813 | switch (tp->ca_state) { | 1841 | switch (icsk->icsk_ca_state) { |
1814 | case TCP_CA_Recovery: | 1842 | case TCP_CA_Recovery: |
1815 | if (prior_snd_una == tp->snd_una) { | 1843 | if (prior_snd_una == tp->snd_una) { |
1816 | if (IsReno(tp) && is_dupack) | 1844 | if (IsReno(tp) && is_dupack) |
1817 | tcp_add_reno_sack(tp); | 1845 | tcp_add_reno_sack(sk); |
1818 | } else { | 1846 | } else { |
1819 | int acked = prior_packets - tp->packets_out; | 1847 | int acked = prior_packets - tp->packets_out; |
1820 | if (IsReno(tp)) | 1848 | if (IsReno(tp)) |
@@ -1824,13 +1852,13 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, | |||
1824 | break; | 1852 | break; |
1825 | case TCP_CA_Loss: | 1853 | case TCP_CA_Loss: |
1826 | if (flag&FLAG_DATA_ACKED) | 1854 | if (flag&FLAG_DATA_ACKED) |
1827 | tp->retransmits = 0; | 1855 | icsk->icsk_retransmits = 0; |
1828 | if (!tcp_try_undo_loss(sk, tp)) { | 1856 | if (!tcp_try_undo_loss(sk, tp)) { |
1829 | tcp_moderate_cwnd(tp); | 1857 | tcp_moderate_cwnd(tp); |
1830 | tcp_xmit_retransmit_queue(sk); | 1858 | tcp_xmit_retransmit_queue(sk); |
1831 | return; | 1859 | return; |
1832 | } | 1860 | } |
1833 | if (tp->ca_state != TCP_CA_Open) | 1861 | if (icsk->icsk_ca_state != TCP_CA_Open) |
1834 | return; | 1862 | return; |
1835 | /* Loss is undone; fall through to processing in Open state. */ | 1863 | /* Loss is undone; fall through to processing in Open state. */ |
1836 | default: | 1864 | default: |
@@ -1838,10 +1866,10 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, | |||
1838 | if (tp->snd_una != prior_snd_una) | 1866 | if (tp->snd_una != prior_snd_una) |
1839 | tcp_reset_reno_sack(tp); | 1867 | tcp_reset_reno_sack(tp); |
1840 | if (is_dupack) | 1868 | if (is_dupack) |
1841 | tcp_add_reno_sack(tp); | 1869 | tcp_add_reno_sack(sk); |
1842 | } | 1870 | } |
1843 | 1871 | ||
1844 | if (tp->ca_state == TCP_CA_Disorder) | 1872 | if (icsk->icsk_ca_state == TCP_CA_Disorder) |
1845 | tcp_try_undo_dsack(sk, tp); | 1873 | tcp_try_undo_dsack(sk, tp); |
1846 | 1874 | ||
1847 | if (!tcp_time_to_recover(sk, tp)) { | 1875 | if (!tcp_time_to_recover(sk, tp)) { |
@@ -1861,30 +1889,28 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, | |||
1861 | tp->undo_marker = tp->snd_una; | 1889 | tp->undo_marker = tp->snd_una; |
1862 | tp->undo_retrans = tp->retrans_out; | 1890 | tp->undo_retrans = tp->retrans_out; |
1863 | 1891 | ||
1864 | if (tp->ca_state < TCP_CA_CWR) { | 1892 | if (icsk->icsk_ca_state < TCP_CA_CWR) { |
1865 | if (!(flag&FLAG_ECE)) | 1893 | if (!(flag&FLAG_ECE)) |
1866 | tp->prior_ssthresh = tcp_current_ssthresh(tp); | 1894 | tp->prior_ssthresh = tcp_current_ssthresh(sk); |
1867 | tp->snd_ssthresh = tp->ca_ops->ssthresh(tp); | 1895 | tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); |
1868 | TCP_ECN_queue_cwr(tp); | 1896 | TCP_ECN_queue_cwr(tp); |
1869 | } | 1897 | } |
1870 | 1898 | ||
1871 | tp->snd_cwnd_cnt = 0; | 1899 | tp->snd_cwnd_cnt = 0; |
1872 | tcp_set_ca_state(tp, TCP_CA_Recovery); | 1900 | tcp_set_ca_state(sk, TCP_CA_Recovery); |
1873 | } | 1901 | } |
1874 | 1902 | ||
1875 | if (is_dupack || tcp_head_timedout(sk, tp)) | 1903 | if (is_dupack || tcp_head_timedout(sk, tp)) |
1876 | tcp_update_scoreboard(sk, tp); | 1904 | tcp_update_scoreboard(sk, tp); |
1877 | tcp_cwnd_down(tp); | 1905 | tcp_cwnd_down(sk); |
1878 | tcp_xmit_retransmit_queue(sk); | 1906 | tcp_xmit_retransmit_queue(sk); |
1879 | } | 1907 | } |
1880 | 1908 | ||
1881 | /* Read draft-ietf-tcplw-high-performance before mucking | 1909 | /* Read draft-ietf-tcplw-high-performance before mucking |
1882 | * with this code. (Superceeds RFC1323) | 1910 | * with this code. (Superceeds RFC1323) |
1883 | */ | 1911 | */ |
1884 | static void tcp_ack_saw_tstamp(struct tcp_sock *tp, u32 *usrtt, int flag) | 1912 | static void tcp_ack_saw_tstamp(struct sock *sk, u32 *usrtt, int flag) |
1885 | { | 1913 | { |
1886 | __u32 seq_rtt; | ||
1887 | |||
1888 | /* RTTM Rule: A TSecr value received in a segment is used to | 1914 | /* RTTM Rule: A TSecr value received in a segment is used to |
1889 | * update the averaged RTT measurement only if the segment | 1915 | * update the averaged RTT measurement only if the segment |
1890 | * acknowledges some new data, i.e., only if it advances the | 1916 | * acknowledges some new data, i.e., only if it advances the |
@@ -1900,14 +1926,15 @@ static void tcp_ack_saw_tstamp(struct tcp_sock *tp, u32 *usrtt, int flag) | |||
1900 | * answer arrives rto becomes 120 seconds! If at least one of segments | 1926 | * answer arrives rto becomes 120 seconds! If at least one of segments |
1901 | * in window is lost... Voila. --ANK (010210) | 1927 | * in window is lost... Voila. --ANK (010210) |
1902 | */ | 1928 | */ |
1903 | seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr; | 1929 | struct tcp_sock *tp = tcp_sk(sk); |
1904 | tcp_rtt_estimator(tp, seq_rtt, usrtt); | 1930 | const __u32 seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr; |
1905 | tcp_set_rto(tp); | 1931 | tcp_rtt_estimator(sk, seq_rtt, usrtt); |
1906 | tp->backoff = 0; | 1932 | tcp_set_rto(sk); |
1907 | tcp_bound_rto(tp); | 1933 | inet_csk(sk)->icsk_backoff = 0; |
1934 | tcp_bound_rto(sk); | ||
1908 | } | 1935 | } |
1909 | 1936 | ||
1910 | static void tcp_ack_no_tstamp(struct tcp_sock *tp, u32 seq_rtt, u32 *usrtt, int flag) | 1937 | static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, u32 *usrtt, int flag) |
1911 | { | 1938 | { |
1912 | /* We don't have a timestamp. Can only use | 1939 | /* We don't have a timestamp. Can only use |
1913 | * packets that are not retransmitted to determine | 1940 | * packets that are not retransmitted to determine |
@@ -1921,27 +1948,29 @@ static void tcp_ack_no_tstamp(struct tcp_sock *tp, u32 seq_rtt, u32 *usrtt, int | |||
1921 | if (flag & FLAG_RETRANS_DATA_ACKED) | 1948 | if (flag & FLAG_RETRANS_DATA_ACKED) |
1922 | return; | 1949 | return; |
1923 | 1950 | ||
1924 | tcp_rtt_estimator(tp, seq_rtt, usrtt); | 1951 | tcp_rtt_estimator(sk, seq_rtt, usrtt); |
1925 | tcp_set_rto(tp); | 1952 | tcp_set_rto(sk); |
1926 | tp->backoff = 0; | 1953 | inet_csk(sk)->icsk_backoff = 0; |
1927 | tcp_bound_rto(tp); | 1954 | tcp_bound_rto(sk); |
1928 | } | 1955 | } |
1929 | 1956 | ||
1930 | static inline void tcp_ack_update_rtt(struct tcp_sock *tp, | 1957 | static inline void tcp_ack_update_rtt(struct sock *sk, const int flag, |
1931 | int flag, s32 seq_rtt, u32 *usrtt) | 1958 | const s32 seq_rtt, u32 *usrtt) |
1932 | { | 1959 | { |
1960 | const struct tcp_sock *tp = tcp_sk(sk); | ||
1933 | /* Note that peer MAY send zero echo. In this case it is ignored. (rfc1323) */ | 1961 | /* Note that peer MAY send zero echo. In this case it is ignored. (rfc1323) */ |
1934 | if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) | 1962 | if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) |
1935 | tcp_ack_saw_tstamp(tp, usrtt, flag); | 1963 | tcp_ack_saw_tstamp(sk, usrtt, flag); |
1936 | else if (seq_rtt >= 0) | 1964 | else if (seq_rtt >= 0) |
1937 | tcp_ack_no_tstamp(tp, seq_rtt, usrtt, flag); | 1965 | tcp_ack_no_tstamp(sk, seq_rtt, usrtt, flag); |
1938 | } | 1966 | } |
1939 | 1967 | ||
1940 | static inline void tcp_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, | 1968 | static inline void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt, |
1941 | u32 in_flight, int good) | 1969 | u32 in_flight, int good) |
1942 | { | 1970 | { |
1943 | tp->ca_ops->cong_avoid(tp, ack, rtt, in_flight, good); | 1971 | const struct inet_connection_sock *icsk = inet_csk(sk); |
1944 | tp->snd_cwnd_stamp = tcp_time_stamp; | 1972 | icsk->icsk_ca_ops->cong_avoid(sk, ack, rtt, in_flight, good); |
1973 | tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp; | ||
1945 | } | 1974 | } |
1946 | 1975 | ||
1947 | /* Restart timer after forward progress on connection. | 1976 | /* Restart timer after forward progress on connection. |
@@ -1951,9 +1980,9 @@ static inline void tcp_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, | |||
1951 | static inline void tcp_ack_packets_out(struct sock *sk, struct tcp_sock *tp) | 1980 | static inline void tcp_ack_packets_out(struct sock *sk, struct tcp_sock *tp) |
1952 | { | 1981 | { |
1953 | if (!tp->packets_out) { | 1982 | if (!tp->packets_out) { |
1954 | tcp_clear_xmit_timer(sk, TCP_TIME_RETRANS); | 1983 | inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); |
1955 | } else { | 1984 | } else { |
1956 | tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); | 1985 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto, TCP_RTO_MAX); |
1957 | } | 1986 | } |
1958 | } | 1987 | } |
1959 | 1988 | ||
@@ -2068,9 +2097,13 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt | |||
2068 | seq_rtt = -1; | 2097 | seq_rtt = -1; |
2069 | } else if (seq_rtt < 0) | 2098 | } else if (seq_rtt < 0) |
2070 | seq_rtt = now - scb->when; | 2099 | seq_rtt = now - scb->when; |
2071 | if (seq_usrtt) | 2100 | if (seq_usrtt) { |
2072 | *seq_usrtt = (usnow.tv_sec - skb->stamp.tv_sec) * 1000000 | 2101 | struct timeval tv; |
2073 | + (usnow.tv_usec - skb->stamp.tv_usec); | 2102 | |
2103 | skb_get_timestamp(skb, &tv); | ||
2104 | *seq_usrtt = (usnow.tv_sec - tv.tv_sec) * 1000000 | ||
2105 | + (usnow.tv_usec - tv.tv_usec); | ||
2106 | } | ||
2074 | 2107 | ||
2075 | if (sacked & TCPCB_SACKED_ACKED) | 2108 | if (sacked & TCPCB_SACKED_ACKED) |
2076 | tp->sacked_out -= tcp_skb_pcount(skb); | 2109 | tp->sacked_out -= tcp_skb_pcount(skb); |
@@ -2085,16 +2118,17 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt | |||
2085 | seq_rtt = now - scb->when; | 2118 | seq_rtt = now - scb->when; |
2086 | tcp_dec_pcount_approx(&tp->fackets_out, skb); | 2119 | tcp_dec_pcount_approx(&tp->fackets_out, skb); |
2087 | tcp_packets_out_dec(tp, skb); | 2120 | tcp_packets_out_dec(tp, skb); |
2088 | __skb_unlink(skb, skb->list); | 2121 | __skb_unlink(skb, &sk->sk_write_queue); |
2089 | sk_stream_free_skb(sk, skb); | 2122 | sk_stream_free_skb(sk, skb); |
2090 | } | 2123 | } |
2091 | 2124 | ||
2092 | if (acked&FLAG_ACKED) { | 2125 | if (acked&FLAG_ACKED) { |
2093 | tcp_ack_update_rtt(tp, acked, seq_rtt, seq_usrtt); | 2126 | const struct inet_connection_sock *icsk = inet_csk(sk); |
2127 | tcp_ack_update_rtt(sk, acked, seq_rtt, seq_usrtt); | ||
2094 | tcp_ack_packets_out(sk, tp); | 2128 | tcp_ack_packets_out(sk, tp); |
2095 | 2129 | ||
2096 | if (tp->ca_ops->pkts_acked) | 2130 | if (icsk->icsk_ca_ops->pkts_acked) |
2097 | tp->ca_ops->pkts_acked(tp, pkts_acked); | 2131 | icsk->icsk_ca_ops->pkts_acked(sk, pkts_acked); |
2098 | } | 2132 | } |
2099 | 2133 | ||
2100 | #if FASTRETRANS_DEBUG > 0 | 2134 | #if FASTRETRANS_DEBUG > 0 |
@@ -2102,19 +2136,20 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt | |||
2102 | BUG_TRAP((int)tp->lost_out >= 0); | 2136 | BUG_TRAP((int)tp->lost_out >= 0); |
2103 | BUG_TRAP((int)tp->retrans_out >= 0); | 2137 | BUG_TRAP((int)tp->retrans_out >= 0); |
2104 | if (!tp->packets_out && tp->rx_opt.sack_ok) { | 2138 | if (!tp->packets_out && tp->rx_opt.sack_ok) { |
2139 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
2105 | if (tp->lost_out) { | 2140 | if (tp->lost_out) { |
2106 | printk(KERN_DEBUG "Leak l=%u %d\n", | 2141 | printk(KERN_DEBUG "Leak l=%u %d\n", |
2107 | tp->lost_out, tp->ca_state); | 2142 | tp->lost_out, icsk->icsk_ca_state); |
2108 | tp->lost_out = 0; | 2143 | tp->lost_out = 0; |
2109 | } | 2144 | } |
2110 | if (tp->sacked_out) { | 2145 | if (tp->sacked_out) { |
2111 | printk(KERN_DEBUG "Leak s=%u %d\n", | 2146 | printk(KERN_DEBUG "Leak s=%u %d\n", |
2112 | tp->sacked_out, tp->ca_state); | 2147 | tp->sacked_out, icsk->icsk_ca_state); |
2113 | tp->sacked_out = 0; | 2148 | tp->sacked_out = 0; |
2114 | } | 2149 | } |
2115 | if (tp->retrans_out) { | 2150 | if (tp->retrans_out) { |
2116 | printk(KERN_DEBUG "Leak r=%u %d\n", | 2151 | printk(KERN_DEBUG "Leak r=%u %d\n", |
2117 | tp->retrans_out, tp->ca_state); | 2152 | tp->retrans_out, icsk->icsk_ca_state); |
2118 | tp->retrans_out = 0; | 2153 | tp->retrans_out = 0; |
2119 | } | 2154 | } |
2120 | } | 2155 | } |
@@ -2125,40 +2160,43 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt | |||
2125 | 2160 | ||
2126 | static void tcp_ack_probe(struct sock *sk) | 2161 | static void tcp_ack_probe(struct sock *sk) |
2127 | { | 2162 | { |
2128 | struct tcp_sock *tp = tcp_sk(sk); | 2163 | const struct tcp_sock *tp = tcp_sk(sk); |
2164 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
2129 | 2165 | ||
2130 | /* Was it a usable window open? */ | 2166 | /* Was it a usable window open? */ |
2131 | 2167 | ||
2132 | if (!after(TCP_SKB_CB(sk->sk_send_head)->end_seq, | 2168 | if (!after(TCP_SKB_CB(sk->sk_send_head)->end_seq, |
2133 | tp->snd_una + tp->snd_wnd)) { | 2169 | tp->snd_una + tp->snd_wnd)) { |
2134 | tp->backoff = 0; | 2170 | icsk->icsk_backoff = 0; |
2135 | tcp_clear_xmit_timer(sk, TCP_TIME_PROBE0); | 2171 | inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0); |
2136 | /* Socket must be waked up by subsequent tcp_data_snd_check(). | 2172 | /* Socket must be waked up by subsequent tcp_data_snd_check(). |
2137 | * This function is not for random using! | 2173 | * This function is not for random using! |
2138 | */ | 2174 | */ |
2139 | } else { | 2175 | } else { |
2140 | tcp_reset_xmit_timer(sk, TCP_TIME_PROBE0, | 2176 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, |
2141 | min(tp->rto << tp->backoff, TCP_RTO_MAX)); | 2177 | min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX), |
2178 | TCP_RTO_MAX); | ||
2142 | } | 2179 | } |
2143 | } | 2180 | } |
2144 | 2181 | ||
2145 | static inline int tcp_ack_is_dubious(struct tcp_sock *tp, int flag) | 2182 | static inline int tcp_ack_is_dubious(const struct sock *sk, const int flag) |
2146 | { | 2183 | { |
2147 | return (!(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) || | 2184 | return (!(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) || |
2148 | tp->ca_state != TCP_CA_Open); | 2185 | inet_csk(sk)->icsk_ca_state != TCP_CA_Open); |
2149 | } | 2186 | } |
2150 | 2187 | ||
2151 | static inline int tcp_may_raise_cwnd(struct tcp_sock *tp, int flag) | 2188 | static inline int tcp_may_raise_cwnd(const struct sock *sk, const int flag) |
2152 | { | 2189 | { |
2190 | const struct tcp_sock *tp = tcp_sk(sk); | ||
2153 | return (!(flag & FLAG_ECE) || tp->snd_cwnd < tp->snd_ssthresh) && | 2191 | return (!(flag & FLAG_ECE) || tp->snd_cwnd < tp->snd_ssthresh) && |
2154 | !((1<<tp->ca_state)&(TCPF_CA_Recovery|TCPF_CA_CWR)); | 2192 | !((1 << inet_csk(sk)->icsk_ca_state) & (TCPF_CA_Recovery | TCPF_CA_CWR)); |
2155 | } | 2193 | } |
2156 | 2194 | ||
2157 | /* Check that window update is acceptable. | 2195 | /* Check that window update is acceptable. |
2158 | * The function assumes that snd_una<=ack<=snd_next. | 2196 | * The function assumes that snd_una<=ack<=snd_next. |
2159 | */ | 2197 | */ |
2160 | static inline int tcp_may_update_window(struct tcp_sock *tp, u32 ack, | 2198 | static inline int tcp_may_update_window(const struct tcp_sock *tp, const u32 ack, |
2161 | u32 ack_seq, u32 nwin) | 2199 | const u32 ack_seq, const u32 nwin) |
2162 | { | 2200 | { |
2163 | return (after(ack, tp->snd_una) || | 2201 | return (after(ack, tp->snd_una) || |
2164 | after(ack_seq, tp->snd_wl1) || | 2202 | after(ack_seq, tp->snd_wl1) || |
@@ -2241,6 +2279,7 @@ static void tcp_process_frto(struct sock *sk, u32 prior_snd_una) | |||
2241 | /* This routine deals with incoming acks, but not outgoing ones. */ | 2279 | /* This routine deals with incoming acks, but not outgoing ones. */ |
2242 | static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) | 2280 | static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) |
2243 | { | 2281 | { |
2282 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
2244 | struct tcp_sock *tp = tcp_sk(sk); | 2283 | struct tcp_sock *tp = tcp_sk(sk); |
2245 | u32 prior_snd_una = tp->snd_una; | 2284 | u32 prior_snd_una = tp->snd_una; |
2246 | u32 ack_seq = TCP_SKB_CB(skb)->seq; | 2285 | u32 ack_seq = TCP_SKB_CB(skb)->seq; |
@@ -2268,7 +2307,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) | |||
2268 | tp->snd_una = ack; | 2307 | tp->snd_una = ack; |
2269 | flag |= FLAG_WIN_UPDATE; | 2308 | flag |= FLAG_WIN_UPDATE; |
2270 | 2309 | ||
2271 | tcp_ca_event(tp, CA_EVENT_FAST_ACK); | 2310 | tcp_ca_event(sk, CA_EVENT_FAST_ACK); |
2272 | 2311 | ||
2273 | NET_INC_STATS_BH(LINUX_MIB_TCPHPACKS); | 2312 | NET_INC_STATS_BH(LINUX_MIB_TCPHPACKS); |
2274 | } else { | 2313 | } else { |
@@ -2285,7 +2324,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) | |||
2285 | if (TCP_ECN_rcv_ecn_echo(tp, skb->h.th)) | 2324 | if (TCP_ECN_rcv_ecn_echo(tp, skb->h.th)) |
2286 | flag |= FLAG_ECE; | 2325 | flag |= FLAG_ECE; |
2287 | 2326 | ||
2288 | tcp_ca_event(tp, CA_EVENT_SLOW_ACK); | 2327 | tcp_ca_event(sk, CA_EVENT_SLOW_ACK); |
2289 | } | 2328 | } |
2290 | 2329 | ||
2291 | /* We passed data and got it acked, remove any soft error | 2330 | /* We passed data and got it acked, remove any soft error |
@@ -2301,19 +2340,19 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) | |||
2301 | 2340 | ||
2302 | /* See if we can take anything off of the retransmit queue. */ | 2341 | /* See if we can take anything off of the retransmit queue. */ |
2303 | flag |= tcp_clean_rtx_queue(sk, &seq_rtt, | 2342 | flag |= tcp_clean_rtx_queue(sk, &seq_rtt, |
2304 | tp->ca_ops->rtt_sample ? &seq_usrtt : NULL); | 2343 | icsk->icsk_ca_ops->rtt_sample ? &seq_usrtt : NULL); |
2305 | 2344 | ||
2306 | if (tp->frto_counter) | 2345 | if (tp->frto_counter) |
2307 | tcp_process_frto(sk, prior_snd_una); | 2346 | tcp_process_frto(sk, prior_snd_una); |
2308 | 2347 | ||
2309 | if (tcp_ack_is_dubious(tp, flag)) { | 2348 | if (tcp_ack_is_dubious(sk, flag)) { |
2310 | /* Advanve CWND, if state allows this. */ | 2349 | /* Advanve CWND, if state allows this. */ |
2311 | if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(tp, flag)) | 2350 | if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag)) |
2312 | tcp_cong_avoid(tp, ack, seq_rtt, prior_in_flight, 0); | 2351 | tcp_cong_avoid(sk, ack, seq_rtt, prior_in_flight, 0); |
2313 | tcp_fastretrans_alert(sk, prior_snd_una, prior_packets, flag); | 2352 | tcp_fastretrans_alert(sk, prior_snd_una, prior_packets, flag); |
2314 | } else { | 2353 | } else { |
2315 | if ((flag & FLAG_DATA_ACKED)) | 2354 | if ((flag & FLAG_DATA_ACKED)) |
2316 | tcp_cong_avoid(tp, ack, seq_rtt, prior_in_flight, 1); | 2355 | tcp_cong_avoid(sk, ack, seq_rtt, prior_in_flight, 1); |
2317 | } | 2356 | } |
2318 | 2357 | ||
2319 | if ((flag & FLAG_FORWARD_PROGRESS) || !(flag&FLAG_NOT_DUP)) | 2358 | if ((flag & FLAG_FORWARD_PROGRESS) || !(flag&FLAG_NOT_DUP)) |
@@ -2322,7 +2361,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) | |||
2322 | return 1; | 2361 | return 1; |
2323 | 2362 | ||
2324 | no_queue: | 2363 | no_queue: |
2325 | tp->probes_out = 0; | 2364 | icsk->icsk_probes_out = 0; |
2326 | 2365 | ||
2327 | /* If this ack opens up a zero window, clear backoff. It was | 2366 | /* If this ack opens up a zero window, clear backoff. It was |
2328 | * being used to time the probes, and is probably far higher than | 2367 | * being used to time the probes, and is probably far higher than |
@@ -2500,8 +2539,9 @@ static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) | |||
2500 | * up to bandwidth of 18Gigabit/sec. 8) ] | 2539 | * up to bandwidth of 18Gigabit/sec. 8) ] |
2501 | */ | 2540 | */ |
2502 | 2541 | ||
2503 | static int tcp_disordered_ack(struct tcp_sock *tp, struct sk_buff *skb) | 2542 | static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb) |
2504 | { | 2543 | { |
2544 | struct tcp_sock *tp = tcp_sk(sk); | ||
2505 | struct tcphdr *th = skb->h.th; | 2545 | struct tcphdr *th = skb->h.th; |
2506 | u32 seq = TCP_SKB_CB(skb)->seq; | 2546 | u32 seq = TCP_SKB_CB(skb)->seq; |
2507 | u32 ack = TCP_SKB_CB(skb)->ack_seq; | 2547 | u32 ack = TCP_SKB_CB(skb)->ack_seq; |
@@ -2516,14 +2556,15 @@ static int tcp_disordered_ack(struct tcp_sock *tp, struct sk_buff *skb) | |||
2516 | !tcp_may_update_window(tp, ack, seq, ntohs(th->window) << tp->rx_opt.snd_wscale) && | 2556 | !tcp_may_update_window(tp, ack, seq, ntohs(th->window) << tp->rx_opt.snd_wscale) && |
2517 | 2557 | ||
2518 | /* 4. ... and sits in replay window. */ | 2558 | /* 4. ... and sits in replay window. */ |
2519 | (s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <= (tp->rto*1024)/HZ); | 2559 | (s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <= (inet_csk(sk)->icsk_rto * 1024) / HZ); |
2520 | } | 2560 | } |
2521 | 2561 | ||
2522 | static inline int tcp_paws_discard(struct tcp_sock *tp, struct sk_buff *skb) | 2562 | static inline int tcp_paws_discard(const struct sock *sk, const struct sk_buff *skb) |
2523 | { | 2563 | { |
2564 | const struct tcp_sock *tp = tcp_sk(sk); | ||
2524 | return ((s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) > TCP_PAWS_WINDOW && | 2565 | return ((s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) > TCP_PAWS_WINDOW && |
2525 | xtime.tv_sec < tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS && | 2566 | xtime.tv_sec < tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS && |
2526 | !tcp_disordered_ack(tp, skb)); | 2567 | !tcp_disordered_ack(sk, skb)); |
2527 | } | 2568 | } |
2528 | 2569 | ||
2529 | /* Check segment sequence number for validity. | 2570 | /* Check segment sequence number for validity. |
@@ -2586,7 +2627,7 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th) | |||
2586 | { | 2627 | { |
2587 | struct tcp_sock *tp = tcp_sk(sk); | 2628 | struct tcp_sock *tp = tcp_sk(sk); |
2588 | 2629 | ||
2589 | tcp_schedule_ack(tp); | 2630 | inet_csk_schedule_ack(sk); |
2590 | 2631 | ||
2591 | sk->sk_shutdown |= RCV_SHUTDOWN; | 2632 | sk->sk_shutdown |= RCV_SHUTDOWN; |
2592 | sock_set_flag(sk, SOCK_DONE); | 2633 | sock_set_flag(sk, SOCK_DONE); |
@@ -2596,7 +2637,7 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th) | |||
2596 | case TCP_ESTABLISHED: | 2637 | case TCP_ESTABLISHED: |
2597 | /* Move to CLOSE_WAIT */ | 2638 | /* Move to CLOSE_WAIT */ |
2598 | tcp_set_state(sk, TCP_CLOSE_WAIT); | 2639 | tcp_set_state(sk, TCP_CLOSE_WAIT); |
2599 | tp->ack.pingpong = 1; | 2640 | inet_csk(sk)->icsk_ack.pingpong = 1; |
2600 | break; | 2641 | break; |
2601 | 2642 | ||
2602 | case TCP_CLOSE_WAIT: | 2643 | case TCP_CLOSE_WAIT: |
@@ -2694,7 +2735,7 @@ static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb) | |||
2694 | if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && | 2735 | if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && |
2695 | before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { | 2736 | before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { |
2696 | NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOST); | 2737 | NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOST); |
2697 | tcp_enter_quickack_mode(tp); | 2738 | tcp_enter_quickack_mode(sk); |
2698 | 2739 | ||
2699 | if (tp->rx_opt.sack_ok && sysctl_tcp_dsack) { | 2740 | if (tp->rx_opt.sack_ok && sysctl_tcp_dsack) { |
2700 | u32 end_seq = TCP_SKB_CB(skb)->end_seq; | 2741 | u32 end_seq = TCP_SKB_CB(skb)->end_seq; |
@@ -2853,7 +2894,7 @@ static void tcp_ofo_queue(struct sock *sk) | |||
2853 | 2894 | ||
2854 | if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) { | 2895 | if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) { |
2855 | SOCK_DEBUG(sk, "ofo packet was already received \n"); | 2896 | SOCK_DEBUG(sk, "ofo packet was already received \n"); |
2856 | __skb_unlink(skb, skb->list); | 2897 | __skb_unlink(skb, &tp->out_of_order_queue); |
2857 | __kfree_skb(skb); | 2898 | __kfree_skb(skb); |
2858 | continue; | 2899 | continue; |
2859 | } | 2900 | } |
@@ -2861,7 +2902,7 @@ static void tcp_ofo_queue(struct sock *sk) | |||
2861 | tp->rcv_nxt, TCP_SKB_CB(skb)->seq, | 2902 | tp->rcv_nxt, TCP_SKB_CB(skb)->seq, |
2862 | TCP_SKB_CB(skb)->end_seq); | 2903 | TCP_SKB_CB(skb)->end_seq); |
2863 | 2904 | ||
2864 | __skb_unlink(skb, skb->list); | 2905 | __skb_unlink(skb, &tp->out_of_order_queue); |
2865 | __skb_queue_tail(&sk->sk_receive_queue, skb); | 2906 | __skb_queue_tail(&sk->sk_receive_queue, skb); |
2866 | tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; | 2907 | tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; |
2867 | if(skb->h.th->fin) | 2908 | if(skb->h.th->fin) |
@@ -2942,7 +2983,7 @@ queue_and_out: | |||
2942 | * gap in queue is filled. | 2983 | * gap in queue is filled. |
2943 | */ | 2984 | */ |
2944 | if (skb_queue_empty(&tp->out_of_order_queue)) | 2985 | if (skb_queue_empty(&tp->out_of_order_queue)) |
2945 | tp->ack.pingpong = 0; | 2986 | inet_csk(sk)->icsk_ack.pingpong = 0; |
2946 | } | 2987 | } |
2947 | 2988 | ||
2948 | if (tp->rx_opt.num_sacks) | 2989 | if (tp->rx_opt.num_sacks) |
@@ -2963,8 +3004,8 @@ queue_and_out: | |||
2963 | tcp_dsack_set(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); | 3004 | tcp_dsack_set(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); |
2964 | 3005 | ||
2965 | out_of_window: | 3006 | out_of_window: |
2966 | tcp_enter_quickack_mode(tp); | 3007 | tcp_enter_quickack_mode(sk); |
2967 | tcp_schedule_ack(tp); | 3008 | inet_csk_schedule_ack(sk); |
2968 | drop: | 3009 | drop: |
2969 | __kfree_skb(skb); | 3010 | __kfree_skb(skb); |
2970 | return; | 3011 | return; |
@@ -2974,7 +3015,7 @@ drop: | |||
2974 | if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt + tcp_receive_window(tp))) | 3015 | if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt + tcp_receive_window(tp))) |
2975 | goto out_of_window; | 3016 | goto out_of_window; |
2976 | 3017 | ||
2977 | tcp_enter_quickack_mode(tp); | 3018 | tcp_enter_quickack_mode(sk); |
2978 | 3019 | ||
2979 | if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { | 3020 | if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { |
2980 | /* Partial packet, seq < rcv_next < end_seq */ | 3021 | /* Partial packet, seq < rcv_next < end_seq */ |
@@ -3003,7 +3044,7 @@ drop: | |||
3003 | 3044 | ||
3004 | /* Disable header prediction. */ | 3045 | /* Disable header prediction. */ |
3005 | tp->pred_flags = 0; | 3046 | tp->pred_flags = 0; |
3006 | tcp_schedule_ack(tp); | 3047 | inet_csk_schedule_ack(sk); |
3007 | 3048 | ||
3008 | SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n", | 3049 | SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n", |
3009 | tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); | 3050 | tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); |
@@ -3027,7 +3068,7 @@ drop: | |||
3027 | u32 end_seq = TCP_SKB_CB(skb)->end_seq; | 3068 | u32 end_seq = TCP_SKB_CB(skb)->end_seq; |
3028 | 3069 | ||
3029 | if (seq == TCP_SKB_CB(skb1)->end_seq) { | 3070 | if (seq == TCP_SKB_CB(skb1)->end_seq) { |
3030 | __skb_append(skb1, skb); | 3071 | __skb_append(skb1, skb, &tp->out_of_order_queue); |
3031 | 3072 | ||
3032 | if (!tp->rx_opt.num_sacks || | 3073 | if (!tp->rx_opt.num_sacks || |
3033 | tp->selective_acks[0].end_seq != seq) | 3074 | tp->selective_acks[0].end_seq != seq) |
@@ -3071,7 +3112,7 @@ drop: | |||
3071 | tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq, end_seq); | 3112 | tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq, end_seq); |
3072 | break; | 3113 | break; |
3073 | } | 3114 | } |
3074 | __skb_unlink(skb1, skb1->list); | 3115 | __skb_unlink(skb1, &tp->out_of_order_queue); |
3075 | tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq, TCP_SKB_CB(skb1)->end_seq); | 3116 | tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq, TCP_SKB_CB(skb1)->end_seq); |
3076 | __kfree_skb(skb1); | 3117 | __kfree_skb(skb1); |
3077 | } | 3118 | } |
@@ -3088,8 +3129,9 @@ add_sack: | |||
3088 | * simplifies code) | 3129 | * simplifies code) |
3089 | */ | 3130 | */ |
3090 | static void | 3131 | static void |
3091 | tcp_collapse(struct sock *sk, struct sk_buff *head, | 3132 | tcp_collapse(struct sock *sk, struct sk_buff_head *list, |
3092 | struct sk_buff *tail, u32 start, u32 end) | 3133 | struct sk_buff *head, struct sk_buff *tail, |
3134 | u32 start, u32 end) | ||
3093 | { | 3135 | { |
3094 | struct sk_buff *skb; | 3136 | struct sk_buff *skb; |
3095 | 3137 | ||
@@ -3099,7 +3141,7 @@ tcp_collapse(struct sock *sk, struct sk_buff *head, | |||
3099 | /* No new bits? It is possible on ofo queue. */ | 3141 | /* No new bits? It is possible on ofo queue. */ |
3100 | if (!before(start, TCP_SKB_CB(skb)->end_seq)) { | 3142 | if (!before(start, TCP_SKB_CB(skb)->end_seq)) { |
3101 | struct sk_buff *next = skb->next; | 3143 | struct sk_buff *next = skb->next; |
3102 | __skb_unlink(skb, skb->list); | 3144 | __skb_unlink(skb, list); |
3103 | __kfree_skb(skb); | 3145 | __kfree_skb(skb); |
3104 | NET_INC_STATS_BH(LINUX_MIB_TCPRCVCOLLAPSED); | 3146 | NET_INC_STATS_BH(LINUX_MIB_TCPRCVCOLLAPSED); |
3105 | skb = next; | 3147 | skb = next; |
@@ -3145,7 +3187,7 @@ tcp_collapse(struct sock *sk, struct sk_buff *head, | |||
3145 | nskb->mac.raw = nskb->head + (skb->mac.raw-skb->head); | 3187 | nskb->mac.raw = nskb->head + (skb->mac.raw-skb->head); |
3146 | memcpy(nskb->cb, skb->cb, sizeof(skb->cb)); | 3188 | memcpy(nskb->cb, skb->cb, sizeof(skb->cb)); |
3147 | TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start; | 3189 | TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start; |
3148 | __skb_insert(nskb, skb->prev, skb, skb->list); | 3190 | __skb_insert(nskb, skb->prev, skb, list); |
3149 | sk_stream_set_owner_r(nskb, sk); | 3191 | sk_stream_set_owner_r(nskb, sk); |
3150 | 3192 | ||
3151 | /* Copy data, releasing collapsed skbs. */ | 3193 | /* Copy data, releasing collapsed skbs. */ |
@@ -3164,7 +3206,7 @@ tcp_collapse(struct sock *sk, struct sk_buff *head, | |||
3164 | } | 3206 | } |
3165 | if (!before(start, TCP_SKB_CB(skb)->end_seq)) { | 3207 | if (!before(start, TCP_SKB_CB(skb)->end_seq)) { |
3166 | struct sk_buff *next = skb->next; | 3208 | struct sk_buff *next = skb->next; |
3167 | __skb_unlink(skb, skb->list); | 3209 | __skb_unlink(skb, list); |
3168 | __kfree_skb(skb); | 3210 | __kfree_skb(skb); |
3169 | NET_INC_STATS_BH(LINUX_MIB_TCPRCVCOLLAPSED); | 3211 | NET_INC_STATS_BH(LINUX_MIB_TCPRCVCOLLAPSED); |
3170 | skb = next; | 3212 | skb = next; |
@@ -3200,7 +3242,8 @@ static void tcp_collapse_ofo_queue(struct sock *sk) | |||
3200 | if (skb == (struct sk_buff *)&tp->out_of_order_queue || | 3242 | if (skb == (struct sk_buff *)&tp->out_of_order_queue || |
3201 | after(TCP_SKB_CB(skb)->seq, end) || | 3243 | after(TCP_SKB_CB(skb)->seq, end) || |
3202 | before(TCP_SKB_CB(skb)->end_seq, start)) { | 3244 | before(TCP_SKB_CB(skb)->end_seq, start)) { |
3203 | tcp_collapse(sk, head, skb, start, end); | 3245 | tcp_collapse(sk, &tp->out_of_order_queue, |
3246 | head, skb, start, end); | ||
3204 | head = skb; | 3247 | head = skb; |
3205 | if (skb == (struct sk_buff *)&tp->out_of_order_queue) | 3248 | if (skb == (struct sk_buff *)&tp->out_of_order_queue) |
3206 | break; | 3249 | break; |
@@ -3237,7 +3280,8 @@ static int tcp_prune_queue(struct sock *sk) | |||
3237 | tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); | 3280 | tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); |
3238 | 3281 | ||
3239 | tcp_collapse_ofo_queue(sk); | 3282 | tcp_collapse_ofo_queue(sk); |
3240 | tcp_collapse(sk, sk->sk_receive_queue.next, | 3283 | tcp_collapse(sk, &sk->sk_receive_queue, |
3284 | sk->sk_receive_queue.next, | ||
3241 | (struct sk_buff*)&sk->sk_receive_queue, | 3285 | (struct sk_buff*)&sk->sk_receive_queue, |
3242 | tp->copied_seq, tp->rcv_nxt); | 3286 | tp->copied_seq, tp->rcv_nxt); |
3243 | sk_stream_mem_reclaim(sk); | 3287 | sk_stream_mem_reclaim(sk); |
@@ -3286,12 +3330,12 @@ void tcp_cwnd_application_limited(struct sock *sk) | |||
3286 | { | 3330 | { |
3287 | struct tcp_sock *tp = tcp_sk(sk); | 3331 | struct tcp_sock *tp = tcp_sk(sk); |
3288 | 3332 | ||
3289 | if (tp->ca_state == TCP_CA_Open && | 3333 | if (inet_csk(sk)->icsk_ca_state == TCP_CA_Open && |
3290 | sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { | 3334 | sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { |
3291 | /* Limited by application or receiver window. */ | 3335 | /* Limited by application or receiver window. */ |
3292 | u32 win_used = max(tp->snd_cwnd_used, 2U); | 3336 | u32 win_used = max(tp->snd_cwnd_used, 2U); |
3293 | if (win_used < tp->snd_cwnd) { | 3337 | if (win_used < tp->snd_cwnd) { |
3294 | tp->snd_ssthresh = tcp_current_ssthresh(tp); | 3338 | tp->snd_ssthresh = tcp_current_ssthresh(sk); |
3295 | tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1; | 3339 | tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1; |
3296 | } | 3340 | } |
3297 | tp->snd_cwnd_used = 0; | 3341 | tp->snd_cwnd_used = 0; |
@@ -3370,13 +3414,13 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) | |||
3370 | struct tcp_sock *tp = tcp_sk(sk); | 3414 | struct tcp_sock *tp = tcp_sk(sk); |
3371 | 3415 | ||
3372 | /* More than one full frame received... */ | 3416 | /* More than one full frame received... */ |
3373 | if (((tp->rcv_nxt - tp->rcv_wup) > tp->ack.rcv_mss | 3417 | if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss |
3374 | /* ... and right edge of window advances far enough. | 3418 | /* ... and right edge of window advances far enough. |
3375 | * (tcp_recvmsg() will send ACK otherwise). Or... | 3419 | * (tcp_recvmsg() will send ACK otherwise). Or... |
3376 | */ | 3420 | */ |
3377 | && __tcp_select_window(sk) >= tp->rcv_wnd) || | 3421 | && __tcp_select_window(sk) >= tp->rcv_wnd) || |
3378 | /* We ACK each frame or... */ | 3422 | /* We ACK each frame or... */ |
3379 | tcp_in_quickack_mode(tp) || | 3423 | tcp_in_quickack_mode(sk) || |
3380 | /* We have out of order data. */ | 3424 | /* We have out of order data. */ |
3381 | (ofo_possible && | 3425 | (ofo_possible && |
3382 | skb_peek(&tp->out_of_order_queue))) { | 3426 | skb_peek(&tp->out_of_order_queue))) { |
@@ -3390,8 +3434,7 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) | |||
3390 | 3434 | ||
3391 | static __inline__ void tcp_ack_snd_check(struct sock *sk) | 3435 | static __inline__ void tcp_ack_snd_check(struct sock *sk) |
3392 | { | 3436 | { |
3393 | struct tcp_sock *tp = tcp_sk(sk); | 3437 | if (!inet_csk_ack_scheduled(sk)) { |
3394 | if (!tcp_ack_scheduled(tp)) { | ||
3395 | /* We sent a data segment already. */ | 3438 | /* We sent a data segment already. */ |
3396 | return; | 3439 | return; |
3397 | } | 3440 | } |
@@ -3462,7 +3505,7 @@ static void tcp_check_urg(struct sock * sk, struct tcphdr * th) | |||
3462 | struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); | 3505 | struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); |
3463 | tp->copied_seq++; | 3506 | tp->copied_seq++; |
3464 | if (skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq)) { | 3507 | if (skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq)) { |
3465 | __skb_unlink(skb, skb->list); | 3508 | __skb_unlink(skb, &sk->sk_receive_queue); |
3466 | __kfree_skb(skb); | 3509 | __kfree_skb(skb); |
3467 | } | 3510 | } |
3468 | } | 3511 | } |
@@ -3645,7 +3688,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, | |||
3645 | tp->rcv_nxt == tp->rcv_wup) | 3688 | tp->rcv_nxt == tp->rcv_wup) |
3646 | tcp_store_ts_recent(tp); | 3689 | tcp_store_ts_recent(tp); |
3647 | 3690 | ||
3648 | tcp_rcv_rtt_measure_ts(tp, skb); | 3691 | tcp_rcv_rtt_measure_ts(sk, skb); |
3649 | 3692 | ||
3650 | /* We know that such packets are checksummed | 3693 | /* We know that such packets are checksummed |
3651 | * on entry. | 3694 | * on entry. |
@@ -3678,7 +3721,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, | |||
3678 | tp->rcv_nxt == tp->rcv_wup) | 3721 | tp->rcv_nxt == tp->rcv_wup) |
3679 | tcp_store_ts_recent(tp); | 3722 | tcp_store_ts_recent(tp); |
3680 | 3723 | ||
3681 | tcp_rcv_rtt_measure_ts(tp, skb); | 3724 | tcp_rcv_rtt_measure_ts(sk, skb); |
3682 | 3725 | ||
3683 | __skb_pull(skb, tcp_header_len); | 3726 | __skb_pull(skb, tcp_header_len); |
3684 | tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; | 3727 | tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; |
@@ -3699,7 +3742,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, | |||
3699 | tp->rcv_nxt == tp->rcv_wup) | 3742 | tp->rcv_nxt == tp->rcv_wup) |
3700 | tcp_store_ts_recent(tp); | 3743 | tcp_store_ts_recent(tp); |
3701 | 3744 | ||
3702 | tcp_rcv_rtt_measure_ts(tp, skb); | 3745 | tcp_rcv_rtt_measure_ts(sk, skb); |
3703 | 3746 | ||
3704 | if ((int)skb->truesize > sk->sk_forward_alloc) | 3747 | if ((int)skb->truesize > sk->sk_forward_alloc) |
3705 | goto step5; | 3748 | goto step5; |
@@ -3719,7 +3762,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, | |||
3719 | /* Well, only one small jumplet in fast path... */ | 3762 | /* Well, only one small jumplet in fast path... */ |
3720 | tcp_ack(sk, skb, FLAG_DATA); | 3763 | tcp_ack(sk, skb, FLAG_DATA); |
3721 | tcp_data_snd_check(sk, tp); | 3764 | tcp_data_snd_check(sk, tp); |
3722 | if (!tcp_ack_scheduled(tp)) | 3765 | if (!inet_csk_ack_scheduled(sk)) |
3723 | goto no_ack; | 3766 | goto no_ack; |
3724 | } | 3767 | } |
3725 | 3768 | ||
@@ -3741,7 +3784,7 @@ slow_path: | |||
3741 | * RFC1323: H1. Apply PAWS check first. | 3784 | * RFC1323: H1. Apply PAWS check first. |
3742 | */ | 3785 | */ |
3743 | if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp && | 3786 | if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp && |
3744 | tcp_paws_discard(tp, skb)) { | 3787 | tcp_paws_discard(sk, skb)) { |
3745 | if (!th->rst) { | 3788 | if (!th->rst) { |
3746 | NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED); | 3789 | NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED); |
3747 | tcp_send_dupack(sk, skb); | 3790 | tcp_send_dupack(sk, skb); |
@@ -3788,7 +3831,7 @@ step5: | |||
3788 | if(th->ack) | 3831 | if(th->ack) |
3789 | tcp_ack(sk, skb, FLAG_SLOWPATH); | 3832 | tcp_ack(sk, skb, FLAG_SLOWPATH); |
3790 | 3833 | ||
3791 | tcp_rcv_rtt_measure_ts(tp, skb); | 3834 | tcp_rcv_rtt_measure_ts(sk, skb); |
3792 | 3835 | ||
3793 | /* Process urgent data. */ | 3836 | /* Process urgent data. */ |
3794 | tcp_urg(sk, skb, th); | 3837 | tcp_urg(sk, skb, th); |
@@ -3817,6 +3860,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, | |||
3817 | tcp_parse_options(skb, &tp->rx_opt, 0); | 3860 | tcp_parse_options(skb, &tp->rx_opt, 0); |
3818 | 3861 | ||
3819 | if (th->ack) { | 3862 | if (th->ack) { |
3863 | struct inet_connection_sock *icsk; | ||
3820 | /* rfc793: | 3864 | /* rfc793: |
3821 | * "If the state is SYN-SENT then | 3865 | * "If the state is SYN-SENT then |
3822 | * first check the ACK bit | 3866 | * first check the ACK bit |
@@ -3920,7 +3964,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, | |||
3920 | 3964 | ||
3921 | tcp_init_metrics(sk); | 3965 | tcp_init_metrics(sk); |
3922 | 3966 | ||
3923 | tcp_init_congestion_control(tp); | 3967 | tcp_init_congestion_control(sk); |
3924 | 3968 | ||
3925 | /* Prevent spurious tcp_cwnd_restart() on first data | 3969 | /* Prevent spurious tcp_cwnd_restart() on first data |
3926 | * packet. | 3970 | * packet. |
@@ -3930,7 +3974,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, | |||
3930 | tcp_init_buffer_space(sk); | 3974 | tcp_init_buffer_space(sk); |
3931 | 3975 | ||
3932 | if (sock_flag(sk, SOCK_KEEPOPEN)) | 3976 | if (sock_flag(sk, SOCK_KEEPOPEN)) |
3933 | tcp_reset_keepalive_timer(sk, keepalive_time_when(tp)); | 3977 | inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp)); |
3934 | 3978 | ||
3935 | if (!tp->rx_opt.snd_wscale) | 3979 | if (!tp->rx_opt.snd_wscale) |
3936 | __tcp_fast_path_on(tp, tp->snd_wnd); | 3980 | __tcp_fast_path_on(tp, tp->snd_wnd); |
@@ -3942,7 +3986,11 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, | |||
3942 | sk_wake_async(sk, 0, POLL_OUT); | 3986 | sk_wake_async(sk, 0, POLL_OUT); |
3943 | } | 3987 | } |
3944 | 3988 | ||
3945 | if (sk->sk_write_pending || tp->defer_accept || tp->ack.pingpong) { | 3989 | icsk = inet_csk(sk); |
3990 | |||
3991 | if (sk->sk_write_pending || | ||
3992 | icsk->icsk_accept_queue.rskq_defer_accept || | ||
3993 | icsk->icsk_ack.pingpong) { | ||
3946 | /* Save one ACK. Data will be ready after | 3994 | /* Save one ACK. Data will be ready after |
3947 | * several ticks, if write_pending is set. | 3995 | * several ticks, if write_pending is set. |
3948 | * | 3996 | * |
@@ -3950,12 +3998,13 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, | |||
3950 | * look so _wonderfully_ clever, that I was not able | 3998 | * look so _wonderfully_ clever, that I was not able |
3951 | * to stand against the temptation 8) --ANK | 3999 | * to stand against the temptation 8) --ANK |
3952 | */ | 4000 | */ |
3953 | tcp_schedule_ack(tp); | 4001 | inet_csk_schedule_ack(sk); |
3954 | tp->ack.lrcvtime = tcp_time_stamp; | 4002 | icsk->icsk_ack.lrcvtime = tcp_time_stamp; |
3955 | tp->ack.ato = TCP_ATO_MIN; | 4003 | icsk->icsk_ack.ato = TCP_ATO_MIN; |
3956 | tcp_incr_quickack(tp); | 4004 | tcp_incr_quickack(sk); |
3957 | tcp_enter_quickack_mode(tp); | 4005 | tcp_enter_quickack_mode(sk); |
3958 | tcp_reset_xmit_timer(sk, TCP_TIME_DACK, TCP_DELACK_MAX); | 4006 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, |
4007 | TCP_DELACK_MAX, TCP_RTO_MAX); | ||
3959 | 4008 | ||
3960 | discard: | 4009 | discard: |
3961 | __kfree_skb(skb); | 4010 | __kfree_skb(skb); |
@@ -4111,7 +4160,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
4111 | } | 4160 | } |
4112 | 4161 | ||
4113 | if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp && | 4162 | if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp && |
4114 | tcp_paws_discard(tp, skb)) { | 4163 | tcp_paws_discard(sk, skb)) { |
4115 | if (!th->rst) { | 4164 | if (!th->rst) { |
4116 | NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED); | 4165 | NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED); |
4117 | tcp_send_dupack(sk, skb); | 4166 | tcp_send_dupack(sk, skb); |
@@ -4180,7 +4229,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
4180 | */ | 4229 | */ |
4181 | if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && | 4230 | if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && |
4182 | !tp->srtt) | 4231 | !tp->srtt) |
4183 | tcp_ack_saw_tstamp(tp, 0, 0); | 4232 | tcp_ack_saw_tstamp(sk, NULL, 0); |
4184 | 4233 | ||
4185 | if (tp->rx_opt.tstamp_ok) | 4234 | if (tp->rx_opt.tstamp_ok) |
4186 | tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; | 4235 | tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; |
@@ -4192,7 +4241,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
4192 | 4241 | ||
4193 | tcp_init_metrics(sk); | 4242 | tcp_init_metrics(sk); |
4194 | 4243 | ||
4195 | tcp_init_congestion_control(tp); | 4244 | tcp_init_congestion_control(sk); |
4196 | 4245 | ||
4197 | /* Prevent spurious tcp_cwnd_restart() on | 4246 | /* Prevent spurious tcp_cwnd_restart() on |
4198 | * first data packet. | 4247 | * first data packet. |
@@ -4227,9 +4276,9 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
4227 | return 1; | 4276 | return 1; |
4228 | } | 4277 | } |
4229 | 4278 | ||
4230 | tmo = tcp_fin_time(tp); | 4279 | tmo = tcp_fin_time(sk); |
4231 | if (tmo > TCP_TIMEWAIT_LEN) { | 4280 | if (tmo > TCP_TIMEWAIT_LEN) { |
4232 | tcp_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN); | 4281 | inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN); |
4233 | } else if (th->fin || sock_owned_by_user(sk)) { | 4282 | } else if (th->fin || sock_owned_by_user(sk)) { |
4234 | /* Bad case. We could lose such FIN otherwise. | 4283 | /* Bad case. We could lose such FIN otherwise. |
4235 | * It is not a big problem, but it looks confusing | 4284 | * It is not a big problem, but it looks confusing |
@@ -4237,7 +4286,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
4237 | * if it spins in bh_lock_sock(), but it is really | 4286 | * if it spins in bh_lock_sock(), but it is really |
4238 | * marginal case. | 4287 | * marginal case. |
4239 | */ | 4288 | */ |
4240 | tcp_reset_keepalive_timer(sk, tmo); | 4289 | inet_csk_reset_keepalive_timer(sk, tmo); |
4241 | } else { | 4290 | } else { |
4242 | tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); | 4291 | tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); |
4243 | goto discard; | 4292 | goto discard; |
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 67c670886c1f..13dfb391cdf1 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -64,7 +64,9 @@ | |||
64 | #include <linux/times.h> | 64 | #include <linux/times.h> |
65 | 65 | ||
66 | #include <net/icmp.h> | 66 | #include <net/icmp.h> |
67 | #include <net/inet_hashtables.h> | ||
67 | #include <net/tcp.h> | 68 | #include <net/tcp.h> |
69 | #include <net/transp_v6.h> | ||
68 | #include <net/ipv6.h> | 70 | #include <net/ipv6.h> |
69 | #include <net/inet_common.h> | 71 | #include <net/inet_common.h> |
70 | #include <net/xfrm.h> | 72 | #include <net/xfrm.h> |
@@ -75,7 +77,6 @@ | |||
75 | #include <linux/proc_fs.h> | 77 | #include <linux/proc_fs.h> |
76 | #include <linux/seq_file.h> | 78 | #include <linux/seq_file.h> |
77 | 79 | ||
78 | extern int sysctl_ip_dynaddr; | ||
79 | int sysctl_tcp_tw_reuse; | 80 | int sysctl_tcp_tw_reuse; |
80 | int sysctl_tcp_low_latency; | 81 | int sysctl_tcp_low_latency; |
81 | 82 | ||
@@ -88,463 +89,29 @@ static struct socket *tcp_socket; | |||
88 | void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len, | 89 | void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len, |
89 | struct sk_buff *skb); | 90 | struct sk_buff *skb); |
90 | 91 | ||
91 | struct tcp_hashinfo __cacheline_aligned tcp_hashinfo = { | 92 | struct inet_hashinfo __cacheline_aligned tcp_hashinfo = { |
92 | .__tcp_lhash_lock = RW_LOCK_UNLOCKED, | 93 | .lhash_lock = RW_LOCK_UNLOCKED, |
93 | .__tcp_lhash_users = ATOMIC_INIT(0), | 94 | .lhash_users = ATOMIC_INIT(0), |
94 | .__tcp_lhash_wait | 95 | .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait), |
95 | = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.__tcp_lhash_wait), | 96 | .portalloc_lock = SPIN_LOCK_UNLOCKED, |
96 | .__tcp_portalloc_lock = SPIN_LOCK_UNLOCKED | 97 | .port_rover = 1024 - 1, |
97 | }; | 98 | }; |
98 | 99 | ||
99 | /* | ||
100 | * This array holds the first and last local port number. | ||
101 | * For high-usage systems, use sysctl to change this to | ||
102 | * 32768-61000 | ||
103 | */ | ||
104 | int sysctl_local_port_range[2] = { 1024, 4999 }; | ||
105 | int tcp_port_rover = 1024 - 1; | ||
106 | |||
107 | static __inline__ int tcp_hashfn(__u32 laddr, __u16 lport, | ||
108 | __u32 faddr, __u16 fport) | ||
109 | { | ||
110 | int h = (laddr ^ lport) ^ (faddr ^ fport); | ||
111 | h ^= h >> 16; | ||
112 | h ^= h >> 8; | ||
113 | return h & (tcp_ehash_size - 1); | ||
114 | } | ||
115 | |||
116 | static __inline__ int tcp_sk_hashfn(struct sock *sk) | ||
117 | { | ||
118 | struct inet_sock *inet = inet_sk(sk); | ||
119 | __u32 laddr = inet->rcv_saddr; | ||
120 | __u16 lport = inet->num; | ||
121 | __u32 faddr = inet->daddr; | ||
122 | __u16 fport = inet->dport; | ||
123 | |||
124 | return tcp_hashfn(laddr, lport, faddr, fport); | ||
125 | } | ||
126 | |||
127 | /* Allocate and initialize a new TCP local port bind bucket. | ||
128 | * The bindhash mutex for snum's hash chain must be held here. | ||
129 | */ | ||
130 | struct tcp_bind_bucket *tcp_bucket_create(struct tcp_bind_hashbucket *head, | ||
131 | unsigned short snum) | ||
132 | { | ||
133 | struct tcp_bind_bucket *tb = kmem_cache_alloc(tcp_bucket_cachep, | ||
134 | SLAB_ATOMIC); | ||
135 | if (tb) { | ||
136 | tb->port = snum; | ||
137 | tb->fastreuse = 0; | ||
138 | INIT_HLIST_HEAD(&tb->owners); | ||
139 | hlist_add_head(&tb->node, &head->chain); | ||
140 | } | ||
141 | return tb; | ||
142 | } | ||
143 | |||
144 | /* Caller must hold hashbucket lock for this tb with local BH disabled */ | ||
145 | void tcp_bucket_destroy(struct tcp_bind_bucket *tb) | ||
146 | { | ||
147 | if (hlist_empty(&tb->owners)) { | ||
148 | __hlist_del(&tb->node); | ||
149 | kmem_cache_free(tcp_bucket_cachep, tb); | ||
150 | } | ||
151 | } | ||
152 | |||
153 | /* Caller must disable local BH processing. */ | ||
154 | static __inline__ void __tcp_inherit_port(struct sock *sk, struct sock *child) | ||
155 | { | ||
156 | struct tcp_bind_hashbucket *head = | ||
157 | &tcp_bhash[tcp_bhashfn(inet_sk(child)->num)]; | ||
158 | struct tcp_bind_bucket *tb; | ||
159 | |||
160 | spin_lock(&head->lock); | ||
161 | tb = tcp_sk(sk)->bind_hash; | ||
162 | sk_add_bind_node(child, &tb->owners); | ||
163 | tcp_sk(child)->bind_hash = tb; | ||
164 | spin_unlock(&head->lock); | ||
165 | } | ||
166 | |||
167 | inline void tcp_inherit_port(struct sock *sk, struct sock *child) | ||
168 | { | ||
169 | local_bh_disable(); | ||
170 | __tcp_inherit_port(sk, child); | ||
171 | local_bh_enable(); | ||
172 | } | ||
173 | |||
174 | void tcp_bind_hash(struct sock *sk, struct tcp_bind_bucket *tb, | ||
175 | unsigned short snum) | ||
176 | { | ||
177 | inet_sk(sk)->num = snum; | ||
178 | sk_add_bind_node(sk, &tb->owners); | ||
179 | tcp_sk(sk)->bind_hash = tb; | ||
180 | } | ||
181 | |||
182 | static inline int tcp_bind_conflict(struct sock *sk, struct tcp_bind_bucket *tb) | ||
183 | { | ||
184 | const u32 sk_rcv_saddr = tcp_v4_rcv_saddr(sk); | ||
185 | struct sock *sk2; | ||
186 | struct hlist_node *node; | ||
187 | int reuse = sk->sk_reuse; | ||
188 | |||
189 | sk_for_each_bound(sk2, node, &tb->owners) { | ||
190 | if (sk != sk2 && | ||
191 | !tcp_v6_ipv6only(sk2) && | ||
192 | (!sk->sk_bound_dev_if || | ||
193 | !sk2->sk_bound_dev_if || | ||
194 | sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { | ||
195 | if (!reuse || !sk2->sk_reuse || | ||
196 | sk2->sk_state == TCP_LISTEN) { | ||
197 | const u32 sk2_rcv_saddr = tcp_v4_rcv_saddr(sk2); | ||
198 | if (!sk2_rcv_saddr || !sk_rcv_saddr || | ||
199 | sk2_rcv_saddr == sk_rcv_saddr) | ||
200 | break; | ||
201 | } | ||
202 | } | ||
203 | } | ||
204 | return node != NULL; | ||
205 | } | ||
206 | |||
207 | /* Obtain a reference to a local port for the given sock, | ||
208 | * if snum is zero it means select any available local port. | ||
209 | */ | ||
210 | static int tcp_v4_get_port(struct sock *sk, unsigned short snum) | 100 | static int tcp_v4_get_port(struct sock *sk, unsigned short snum) |
211 | { | 101 | { |
212 | struct tcp_bind_hashbucket *head; | 102 | return inet_csk_get_port(&tcp_hashinfo, sk, snum); |
213 | struct hlist_node *node; | ||
214 | struct tcp_bind_bucket *tb; | ||
215 | int ret; | ||
216 | |||
217 | local_bh_disable(); | ||
218 | if (!snum) { | ||
219 | int low = sysctl_local_port_range[0]; | ||
220 | int high = sysctl_local_port_range[1]; | ||
221 | int remaining = (high - low) + 1; | ||
222 | int rover; | ||
223 | |||
224 | spin_lock(&tcp_portalloc_lock); | ||
225 | if (tcp_port_rover < low) | ||
226 | rover = low; | ||
227 | else | ||
228 | rover = tcp_port_rover; | ||
229 | do { | ||
230 | rover++; | ||
231 | if (rover > high) | ||
232 | rover = low; | ||
233 | head = &tcp_bhash[tcp_bhashfn(rover)]; | ||
234 | spin_lock(&head->lock); | ||
235 | tb_for_each(tb, node, &head->chain) | ||
236 | if (tb->port == rover) | ||
237 | goto next; | ||
238 | break; | ||
239 | next: | ||
240 | spin_unlock(&head->lock); | ||
241 | } while (--remaining > 0); | ||
242 | tcp_port_rover = rover; | ||
243 | spin_unlock(&tcp_portalloc_lock); | ||
244 | |||
245 | /* Exhausted local port range during search? It is not | ||
246 | * possible for us to be holding one of the bind hash | ||
247 | * locks if this test triggers, because if 'remaining' | ||
248 | * drops to zero, we broke out of the do/while loop at | ||
249 | * the top level, not from the 'break;' statement. | ||
250 | */ | ||
251 | ret = 1; | ||
252 | if (unlikely(remaining <= 0)) | ||
253 | goto fail; | ||
254 | |||
255 | /* OK, here is the one we will use. HEAD is | ||
256 | * non-NULL and we hold it's mutex. | ||
257 | */ | ||
258 | snum = rover; | ||
259 | } else { | ||
260 | head = &tcp_bhash[tcp_bhashfn(snum)]; | ||
261 | spin_lock(&head->lock); | ||
262 | tb_for_each(tb, node, &head->chain) | ||
263 | if (tb->port == snum) | ||
264 | goto tb_found; | ||
265 | } | ||
266 | tb = NULL; | ||
267 | goto tb_not_found; | ||
268 | tb_found: | ||
269 | if (!hlist_empty(&tb->owners)) { | ||
270 | if (sk->sk_reuse > 1) | ||
271 | goto success; | ||
272 | if (tb->fastreuse > 0 && | ||
273 | sk->sk_reuse && sk->sk_state != TCP_LISTEN) { | ||
274 | goto success; | ||
275 | } else { | ||
276 | ret = 1; | ||
277 | if (tcp_bind_conflict(sk, tb)) | ||
278 | goto fail_unlock; | ||
279 | } | ||
280 | } | ||
281 | tb_not_found: | ||
282 | ret = 1; | ||
283 | if (!tb && (tb = tcp_bucket_create(head, snum)) == NULL) | ||
284 | goto fail_unlock; | ||
285 | if (hlist_empty(&tb->owners)) { | ||
286 | if (sk->sk_reuse && sk->sk_state != TCP_LISTEN) | ||
287 | tb->fastreuse = 1; | ||
288 | else | ||
289 | tb->fastreuse = 0; | ||
290 | } else if (tb->fastreuse && | ||
291 | (!sk->sk_reuse || sk->sk_state == TCP_LISTEN)) | ||
292 | tb->fastreuse = 0; | ||
293 | success: | ||
294 | if (!tcp_sk(sk)->bind_hash) | ||
295 | tcp_bind_hash(sk, tb, snum); | ||
296 | BUG_TRAP(tcp_sk(sk)->bind_hash == tb); | ||
297 | ret = 0; | ||
298 | |||
299 | fail_unlock: | ||
300 | spin_unlock(&head->lock); | ||
301 | fail: | ||
302 | local_bh_enable(); | ||
303 | return ret; | ||
304 | } | ||
305 | |||
306 | /* Get rid of any references to a local port held by the | ||
307 | * given sock. | ||
308 | */ | ||
309 | static void __tcp_put_port(struct sock *sk) | ||
310 | { | ||
311 | struct inet_sock *inet = inet_sk(sk); | ||
312 | struct tcp_bind_hashbucket *head = &tcp_bhash[tcp_bhashfn(inet->num)]; | ||
313 | struct tcp_bind_bucket *tb; | ||
314 | |||
315 | spin_lock(&head->lock); | ||
316 | tb = tcp_sk(sk)->bind_hash; | ||
317 | __sk_del_bind_node(sk); | ||
318 | tcp_sk(sk)->bind_hash = NULL; | ||
319 | inet->num = 0; | ||
320 | tcp_bucket_destroy(tb); | ||
321 | spin_unlock(&head->lock); | ||
322 | } | ||
323 | |||
324 | void tcp_put_port(struct sock *sk) | ||
325 | { | ||
326 | local_bh_disable(); | ||
327 | __tcp_put_port(sk); | ||
328 | local_bh_enable(); | ||
329 | } | ||
330 | |||
331 | /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it can be very bad on SMP. | ||
332 | * Look, when several writers sleep and reader wakes them up, all but one | ||
333 | * immediately hit write lock and grab all the cpus. Exclusive sleep solves | ||
334 | * this, _but_ remember, it adds useless work on UP machines (wake up each | ||
335 | * exclusive lock release). It should be ifdefed really. | ||
336 | */ | ||
337 | |||
338 | void tcp_listen_wlock(void) | ||
339 | { | ||
340 | write_lock(&tcp_lhash_lock); | ||
341 | |||
342 | if (atomic_read(&tcp_lhash_users)) { | ||
343 | DEFINE_WAIT(wait); | ||
344 | |||
345 | for (;;) { | ||
346 | prepare_to_wait_exclusive(&tcp_lhash_wait, | ||
347 | &wait, TASK_UNINTERRUPTIBLE); | ||
348 | if (!atomic_read(&tcp_lhash_users)) | ||
349 | break; | ||
350 | write_unlock_bh(&tcp_lhash_lock); | ||
351 | schedule(); | ||
352 | write_lock_bh(&tcp_lhash_lock); | ||
353 | } | ||
354 | |||
355 | finish_wait(&tcp_lhash_wait, &wait); | ||
356 | } | ||
357 | } | ||
358 | |||
359 | static __inline__ void __tcp_v4_hash(struct sock *sk, const int listen_possible) | ||
360 | { | ||
361 | struct hlist_head *list; | ||
362 | rwlock_t *lock; | ||
363 | |||
364 | BUG_TRAP(sk_unhashed(sk)); | ||
365 | if (listen_possible && sk->sk_state == TCP_LISTEN) { | ||
366 | list = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)]; | ||
367 | lock = &tcp_lhash_lock; | ||
368 | tcp_listen_wlock(); | ||
369 | } else { | ||
370 | list = &tcp_ehash[(sk->sk_hashent = tcp_sk_hashfn(sk))].chain; | ||
371 | lock = &tcp_ehash[sk->sk_hashent].lock; | ||
372 | write_lock(lock); | ||
373 | } | ||
374 | __sk_add_node(sk, list); | ||
375 | sock_prot_inc_use(sk->sk_prot); | ||
376 | write_unlock(lock); | ||
377 | if (listen_possible && sk->sk_state == TCP_LISTEN) | ||
378 | wake_up(&tcp_lhash_wait); | ||
379 | } | 103 | } |
380 | 104 | ||
381 | static void tcp_v4_hash(struct sock *sk) | 105 | static void tcp_v4_hash(struct sock *sk) |
382 | { | 106 | { |
383 | if (sk->sk_state != TCP_CLOSE) { | 107 | inet_hash(&tcp_hashinfo, sk); |
384 | local_bh_disable(); | ||
385 | __tcp_v4_hash(sk, 1); | ||
386 | local_bh_enable(); | ||
387 | } | ||
388 | } | 108 | } |
389 | 109 | ||
390 | void tcp_unhash(struct sock *sk) | 110 | void tcp_unhash(struct sock *sk) |
391 | { | 111 | { |
392 | rwlock_t *lock; | 112 | inet_unhash(&tcp_hashinfo, sk); |
393 | |||
394 | if (sk_unhashed(sk)) | ||
395 | goto ende; | ||
396 | |||
397 | if (sk->sk_state == TCP_LISTEN) { | ||
398 | local_bh_disable(); | ||
399 | tcp_listen_wlock(); | ||
400 | lock = &tcp_lhash_lock; | ||
401 | } else { | ||
402 | struct tcp_ehash_bucket *head = &tcp_ehash[sk->sk_hashent]; | ||
403 | lock = &head->lock; | ||
404 | write_lock_bh(&head->lock); | ||
405 | } | ||
406 | |||
407 | if (__sk_del_node_init(sk)) | ||
408 | sock_prot_dec_use(sk->sk_prot); | ||
409 | write_unlock_bh(lock); | ||
410 | |||
411 | ende: | ||
412 | if (sk->sk_state == TCP_LISTEN) | ||
413 | wake_up(&tcp_lhash_wait); | ||
414 | } | ||
415 | |||
416 | /* Don't inline this cruft. Here are some nice properties to | ||
417 | * exploit here. The BSD API does not allow a listening TCP | ||
418 | * to specify the remote port nor the remote address for the | ||
419 | * connection. So always assume those are both wildcarded | ||
420 | * during the search since they can never be otherwise. | ||
421 | */ | ||
422 | static struct sock *__tcp_v4_lookup_listener(struct hlist_head *head, u32 daddr, | ||
423 | unsigned short hnum, int dif) | ||
424 | { | ||
425 | struct sock *result = NULL, *sk; | ||
426 | struct hlist_node *node; | ||
427 | int score, hiscore; | ||
428 | |||
429 | hiscore=-1; | ||
430 | sk_for_each(sk, node, head) { | ||
431 | struct inet_sock *inet = inet_sk(sk); | ||
432 | |||
433 | if (inet->num == hnum && !ipv6_only_sock(sk)) { | ||
434 | __u32 rcv_saddr = inet->rcv_saddr; | ||
435 | |||
436 | score = (sk->sk_family == PF_INET ? 1 : 0); | ||
437 | if (rcv_saddr) { | ||
438 | if (rcv_saddr != daddr) | ||
439 | continue; | ||
440 | score+=2; | ||
441 | } | ||
442 | if (sk->sk_bound_dev_if) { | ||
443 | if (sk->sk_bound_dev_if != dif) | ||
444 | continue; | ||
445 | score+=2; | ||
446 | } | ||
447 | if (score == 5) | ||
448 | return sk; | ||
449 | if (score > hiscore) { | ||
450 | hiscore = score; | ||
451 | result = sk; | ||
452 | } | ||
453 | } | ||
454 | } | ||
455 | return result; | ||
456 | } | ||
457 | |||
458 | /* Optimize the common listener case. */ | ||
459 | static inline struct sock *tcp_v4_lookup_listener(u32 daddr, | ||
460 | unsigned short hnum, int dif) | ||
461 | { | ||
462 | struct sock *sk = NULL; | ||
463 | struct hlist_head *head; | ||
464 | |||
465 | read_lock(&tcp_lhash_lock); | ||
466 | head = &tcp_listening_hash[tcp_lhashfn(hnum)]; | ||
467 | if (!hlist_empty(head)) { | ||
468 | struct inet_sock *inet = inet_sk((sk = __sk_head(head))); | ||
469 | |||
470 | if (inet->num == hnum && !sk->sk_node.next && | ||
471 | (!inet->rcv_saddr || inet->rcv_saddr == daddr) && | ||
472 | (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) && | ||
473 | !sk->sk_bound_dev_if) | ||
474 | goto sherry_cache; | ||
475 | sk = __tcp_v4_lookup_listener(head, daddr, hnum, dif); | ||
476 | } | ||
477 | if (sk) { | ||
478 | sherry_cache: | ||
479 | sock_hold(sk); | ||
480 | } | ||
481 | read_unlock(&tcp_lhash_lock); | ||
482 | return sk; | ||
483 | } | 113 | } |
484 | 114 | ||
485 | /* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so | ||
486 | * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM | ||
487 | * | ||
488 | * Local BH must be disabled here. | ||
489 | */ | ||
490 | |||
491 | static inline struct sock *__tcp_v4_lookup_established(u32 saddr, u16 sport, | ||
492 | u32 daddr, u16 hnum, | ||
493 | int dif) | ||
494 | { | ||
495 | struct tcp_ehash_bucket *head; | ||
496 | TCP_V4_ADDR_COOKIE(acookie, saddr, daddr) | ||
497 | __u32 ports = TCP_COMBINED_PORTS(sport, hnum); | ||
498 | struct sock *sk; | ||
499 | struct hlist_node *node; | ||
500 | /* Optimize here for direct hit, only listening connections can | ||
501 | * have wildcards anyways. | ||
502 | */ | ||
503 | int hash = tcp_hashfn(daddr, hnum, saddr, sport); | ||
504 | head = &tcp_ehash[hash]; | ||
505 | read_lock(&head->lock); | ||
506 | sk_for_each(sk, node, &head->chain) { | ||
507 | if (TCP_IPV4_MATCH(sk, acookie, saddr, daddr, ports, dif)) | ||
508 | goto hit; /* You sunk my battleship! */ | ||
509 | } | ||
510 | |||
511 | /* Must check for a TIME_WAIT'er before going to listener hash. */ | ||
512 | sk_for_each(sk, node, &(head + tcp_ehash_size)->chain) { | ||
513 | if (TCP_IPV4_TW_MATCH(sk, acookie, saddr, daddr, ports, dif)) | ||
514 | goto hit; | ||
515 | } | ||
516 | sk = NULL; | ||
517 | out: | ||
518 | read_unlock(&head->lock); | ||
519 | return sk; | ||
520 | hit: | ||
521 | sock_hold(sk); | ||
522 | goto out; | ||
523 | } | ||
524 | |||
525 | static inline struct sock *__tcp_v4_lookup(u32 saddr, u16 sport, | ||
526 | u32 daddr, u16 hnum, int dif) | ||
527 | { | ||
528 | struct sock *sk = __tcp_v4_lookup_established(saddr, sport, | ||
529 | daddr, hnum, dif); | ||
530 | |||
531 | return sk ? : tcp_v4_lookup_listener(daddr, hnum, dif); | ||
532 | } | ||
533 | |||
534 | inline struct sock *tcp_v4_lookup(u32 saddr, u16 sport, u32 daddr, | ||
535 | u16 dport, int dif) | ||
536 | { | ||
537 | struct sock *sk; | ||
538 | |||
539 | local_bh_disable(); | ||
540 | sk = __tcp_v4_lookup(saddr, sport, daddr, ntohs(dport), dif); | ||
541 | local_bh_enable(); | ||
542 | |||
543 | return sk; | ||
544 | } | ||
545 | |||
546 | EXPORT_SYMBOL_GPL(tcp_v4_lookup); | ||
547 | |||
548 | static inline __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb) | 115 | static inline __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb) |
549 | { | 116 | { |
550 | return secure_tcp_sequence_number(skb->nh.iph->daddr, | 117 | return secure_tcp_sequence_number(skb->nh.iph->daddr, |
@@ -555,27 +122,28 @@ static inline __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb) | |||
555 | 122 | ||
556 | /* called with local bh disabled */ | 123 | /* called with local bh disabled */ |
557 | static int __tcp_v4_check_established(struct sock *sk, __u16 lport, | 124 | static int __tcp_v4_check_established(struct sock *sk, __u16 lport, |
558 | struct tcp_tw_bucket **twp) | 125 | struct inet_timewait_sock **twp) |
559 | { | 126 | { |
560 | struct inet_sock *inet = inet_sk(sk); | 127 | struct inet_sock *inet = inet_sk(sk); |
561 | u32 daddr = inet->rcv_saddr; | 128 | u32 daddr = inet->rcv_saddr; |
562 | u32 saddr = inet->daddr; | 129 | u32 saddr = inet->daddr; |
563 | int dif = sk->sk_bound_dev_if; | 130 | int dif = sk->sk_bound_dev_if; |
564 | TCP_V4_ADDR_COOKIE(acookie, saddr, daddr) | 131 | INET_ADDR_COOKIE(acookie, saddr, daddr) |
565 | __u32 ports = TCP_COMBINED_PORTS(inet->dport, lport); | 132 | const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport); |
566 | int hash = tcp_hashfn(daddr, lport, saddr, inet->dport); | 133 | const int hash = inet_ehashfn(daddr, lport, saddr, inet->dport, tcp_hashinfo.ehash_size); |
567 | struct tcp_ehash_bucket *head = &tcp_ehash[hash]; | 134 | struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash]; |
568 | struct sock *sk2; | 135 | struct sock *sk2; |
569 | struct hlist_node *node; | 136 | const struct hlist_node *node; |
570 | struct tcp_tw_bucket *tw; | 137 | struct inet_timewait_sock *tw; |
571 | 138 | ||
572 | write_lock(&head->lock); | 139 | write_lock(&head->lock); |
573 | 140 | ||
574 | /* Check TIME-WAIT sockets first. */ | 141 | /* Check TIME-WAIT sockets first. */ |
575 | sk_for_each(sk2, node, &(head + tcp_ehash_size)->chain) { | 142 | sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) { |
576 | tw = (struct tcp_tw_bucket *)sk2; | 143 | tw = inet_twsk(sk2); |
577 | 144 | ||
578 | if (TCP_IPV4_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif)) { | 145 | if (INET_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif)) { |
146 | const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2); | ||
579 | struct tcp_sock *tp = tcp_sk(sk); | 147 | struct tcp_sock *tp = tcp_sk(sk); |
580 | 148 | ||
581 | /* With PAWS, it is safe from the viewpoint | 149 | /* With PAWS, it is safe from the viewpoint |
@@ -592,15 +160,15 @@ static int __tcp_v4_check_established(struct sock *sk, __u16 lport, | |||
592 | fall back to VJ's scheme and use initial | 160 | fall back to VJ's scheme and use initial |
593 | timestamp retrieved from peer table. | 161 | timestamp retrieved from peer table. |
594 | */ | 162 | */ |
595 | if (tw->tw_ts_recent_stamp && | 163 | if (tcptw->tw_ts_recent_stamp && |
596 | (!twp || (sysctl_tcp_tw_reuse && | 164 | (!twp || (sysctl_tcp_tw_reuse && |
597 | xtime.tv_sec - | 165 | xtime.tv_sec - |
598 | tw->tw_ts_recent_stamp > 1))) { | 166 | tcptw->tw_ts_recent_stamp > 1))) { |
599 | if ((tp->write_seq = | 167 | tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2; |
600 | tw->tw_snd_nxt + 65535 + 2) == 0) | 168 | if (tp->write_seq == 0) |
601 | tp->write_seq = 1; | 169 | tp->write_seq = 1; |
602 | tp->rx_opt.ts_recent = tw->tw_ts_recent; | 170 | tp->rx_opt.ts_recent = tcptw->tw_ts_recent; |
603 | tp->rx_opt.ts_recent_stamp = tw->tw_ts_recent_stamp; | 171 | tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; |
604 | sock_hold(sk2); | 172 | sock_hold(sk2); |
605 | goto unique; | 173 | goto unique; |
606 | } else | 174 | } else |
@@ -611,7 +179,7 @@ static int __tcp_v4_check_established(struct sock *sk, __u16 lport, | |||
611 | 179 | ||
612 | /* And established part... */ | 180 | /* And established part... */ |
613 | sk_for_each(sk2, node, &head->chain) { | 181 | sk_for_each(sk2, node, &head->chain) { |
614 | if (TCP_IPV4_MATCH(sk2, acookie, saddr, daddr, ports, dif)) | 182 | if (INET_MATCH(sk2, acookie, saddr, daddr, ports, dif)) |
615 | goto not_unique; | 183 | goto not_unique; |
616 | } | 184 | } |
617 | 185 | ||
@@ -631,10 +199,10 @@ unique: | |||
631 | NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); | 199 | NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); |
632 | } else if (tw) { | 200 | } else if (tw) { |
633 | /* Silly. Should hash-dance instead... */ | 201 | /* Silly. Should hash-dance instead... */ |
634 | tcp_tw_deschedule(tw); | 202 | inet_twsk_deschedule(tw, &tcp_death_row); |
635 | NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); | 203 | NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); |
636 | 204 | ||
637 | tcp_tw_put(tw); | 205 | inet_twsk_put(tw); |
638 | } | 206 | } |
639 | 207 | ||
640 | return 0; | 208 | return 0; |
@@ -657,9 +225,9 @@ static inline u32 connect_port_offset(const struct sock *sk) | |||
657 | */ | 225 | */ |
658 | static inline int tcp_v4_hash_connect(struct sock *sk) | 226 | static inline int tcp_v4_hash_connect(struct sock *sk) |
659 | { | 227 | { |
660 | unsigned short snum = inet_sk(sk)->num; | 228 | const unsigned short snum = inet_sk(sk)->num; |
661 | struct tcp_bind_hashbucket *head; | 229 | struct inet_bind_hashbucket *head; |
662 | struct tcp_bind_bucket *tb; | 230 | struct inet_bind_bucket *tb; |
663 | int ret; | 231 | int ret; |
664 | 232 | ||
665 | if (!snum) { | 233 | if (!snum) { |
@@ -671,19 +239,19 @@ static inline int tcp_v4_hash_connect(struct sock *sk) | |||
671 | static u32 hint; | 239 | static u32 hint; |
672 | u32 offset = hint + connect_port_offset(sk); | 240 | u32 offset = hint + connect_port_offset(sk); |
673 | struct hlist_node *node; | 241 | struct hlist_node *node; |
674 | struct tcp_tw_bucket *tw = NULL; | 242 | struct inet_timewait_sock *tw = NULL; |
675 | 243 | ||
676 | local_bh_disable(); | 244 | local_bh_disable(); |
677 | for (i = 1; i <= range; i++) { | 245 | for (i = 1; i <= range; i++) { |
678 | port = low + (i + offset) % range; | 246 | port = low + (i + offset) % range; |
679 | head = &tcp_bhash[tcp_bhashfn(port)]; | 247 | head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)]; |
680 | spin_lock(&head->lock); | 248 | spin_lock(&head->lock); |
681 | 249 | ||
682 | /* Does not bother with rcv_saddr checks, | 250 | /* Does not bother with rcv_saddr checks, |
683 | * because the established check is already | 251 | * because the established check is already |
684 | * unique enough. | 252 | * unique enough. |
685 | */ | 253 | */ |
686 | tb_for_each(tb, node, &head->chain) { | 254 | inet_bind_bucket_for_each(tb, node, &head->chain) { |
687 | if (tb->port == port) { | 255 | if (tb->port == port) { |
688 | BUG_TRAP(!hlist_empty(&tb->owners)); | 256 | BUG_TRAP(!hlist_empty(&tb->owners)); |
689 | if (tb->fastreuse >= 0) | 257 | if (tb->fastreuse >= 0) |
@@ -696,7 +264,7 @@ static inline int tcp_v4_hash_connect(struct sock *sk) | |||
696 | } | 264 | } |
697 | } | 265 | } |
698 | 266 | ||
699 | tb = tcp_bucket_create(head, port); | 267 | tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port); |
700 | if (!tb) { | 268 | if (!tb) { |
701 | spin_unlock(&head->lock); | 269 | spin_unlock(&head->lock); |
702 | break; | 270 | break; |
@@ -715,27 +283,27 @@ ok: | |||
715 | hint += i; | 283 | hint += i; |
716 | 284 | ||
717 | /* Head lock still held and bh's disabled */ | 285 | /* Head lock still held and bh's disabled */ |
718 | tcp_bind_hash(sk, tb, port); | 286 | inet_bind_hash(sk, tb, port); |
719 | if (sk_unhashed(sk)) { | 287 | if (sk_unhashed(sk)) { |
720 | inet_sk(sk)->sport = htons(port); | 288 | inet_sk(sk)->sport = htons(port); |
721 | __tcp_v4_hash(sk, 0); | 289 | __inet_hash(&tcp_hashinfo, sk, 0); |
722 | } | 290 | } |
723 | spin_unlock(&head->lock); | 291 | spin_unlock(&head->lock); |
724 | 292 | ||
725 | if (tw) { | 293 | if (tw) { |
726 | tcp_tw_deschedule(tw); | 294 | inet_twsk_deschedule(tw, &tcp_death_row);; |
727 | tcp_tw_put(tw); | 295 | inet_twsk_put(tw); |
728 | } | 296 | } |
729 | 297 | ||
730 | ret = 0; | 298 | ret = 0; |
731 | goto out; | 299 | goto out; |
732 | } | 300 | } |
733 | 301 | ||
734 | head = &tcp_bhash[tcp_bhashfn(snum)]; | 302 | head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)]; |
735 | tb = tcp_sk(sk)->bind_hash; | 303 | tb = inet_csk(sk)->icsk_bind_hash; |
736 | spin_lock_bh(&head->lock); | 304 | spin_lock_bh(&head->lock); |
737 | if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { | 305 | if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { |
738 | __tcp_v4_hash(sk, 0); | 306 | __inet_hash(&tcp_hashinfo, sk, 0); |
739 | spin_unlock_bh(&head->lock); | 307 | spin_unlock_bh(&head->lock); |
740 | return 0; | 308 | return 0; |
741 | } else { | 309 | } else { |
@@ -798,7 +366,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
798 | tp->write_seq = 0; | 366 | tp->write_seq = 0; |
799 | } | 367 | } |
800 | 368 | ||
801 | if (sysctl_tcp_tw_recycle && | 369 | if (tcp_death_row.sysctl_tw_recycle && |
802 | !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) { | 370 | !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) { |
803 | struct inet_peer *peer = rt_get_peer(rt); | 371 | struct inet_peer *peer = rt_get_peer(rt); |
804 | 372 | ||
@@ -837,8 +405,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
837 | goto failure; | 405 | goto failure; |
838 | 406 | ||
839 | /* OK, now commit destination to socket. */ | 407 | /* OK, now commit destination to socket. */ |
840 | __sk_dst_set(sk, &rt->u.dst); | 408 | sk_setup_caps(sk, &rt->u.dst); |
841 | tcp_v4_setup_caps(sk, &rt->u.dst); | ||
842 | 409 | ||
843 | if (!tp->write_seq) | 410 | if (!tp->write_seq) |
844 | tp->write_seq = secure_tcp_sequence_number(inet->saddr, | 411 | tp->write_seq = secure_tcp_sequence_number(inet->saddr, |
@@ -864,53 +431,6 @@ failure: | |||
864 | return err; | 431 | return err; |
865 | } | 432 | } |
866 | 433 | ||
867 | static __inline__ int tcp_v4_iif(struct sk_buff *skb) | ||
868 | { | ||
869 | return ((struct rtable *)skb->dst)->rt_iif; | ||
870 | } | ||
871 | |||
872 | static __inline__ u32 tcp_v4_synq_hash(u32 raddr, u16 rport, u32 rnd) | ||
873 | { | ||
874 | return (jhash_2words(raddr, (u32) rport, rnd) & (TCP_SYNQ_HSIZE - 1)); | ||
875 | } | ||
876 | |||
877 | static struct request_sock *tcp_v4_search_req(struct tcp_sock *tp, | ||
878 | struct request_sock ***prevp, | ||
879 | __u16 rport, | ||
880 | __u32 raddr, __u32 laddr) | ||
881 | { | ||
882 | struct listen_sock *lopt = tp->accept_queue.listen_opt; | ||
883 | struct request_sock *req, **prev; | ||
884 | |||
885 | for (prev = &lopt->syn_table[tcp_v4_synq_hash(raddr, rport, lopt->hash_rnd)]; | ||
886 | (req = *prev) != NULL; | ||
887 | prev = &req->dl_next) { | ||
888 | const struct inet_request_sock *ireq = inet_rsk(req); | ||
889 | |||
890 | if (ireq->rmt_port == rport && | ||
891 | ireq->rmt_addr == raddr && | ||
892 | ireq->loc_addr == laddr && | ||
893 | TCP_INET_FAMILY(req->rsk_ops->family)) { | ||
894 | BUG_TRAP(!req->sk); | ||
895 | *prevp = prev; | ||
896 | break; | ||
897 | } | ||
898 | } | ||
899 | |||
900 | return req; | ||
901 | } | ||
902 | |||
903 | static void tcp_v4_synq_add(struct sock *sk, struct request_sock *req) | ||
904 | { | ||
905 | struct tcp_sock *tp = tcp_sk(sk); | ||
906 | struct listen_sock *lopt = tp->accept_queue.listen_opt; | ||
907 | u32 h = tcp_v4_synq_hash(inet_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd); | ||
908 | |||
909 | reqsk_queue_hash_req(&tp->accept_queue, h, req, TCP_TIMEOUT_INIT); | ||
910 | tcp_synq_added(sk); | ||
911 | } | ||
912 | |||
913 | |||
914 | /* | 434 | /* |
915 | * This routine does path mtu discovery as defined in RFC1191. | 435 | * This routine does path mtu discovery as defined in RFC1191. |
916 | */ | 436 | */ |
@@ -993,14 +513,14 @@ void tcp_v4_err(struct sk_buff *skb, u32 info) | |||
993 | return; | 513 | return; |
994 | } | 514 | } |
995 | 515 | ||
996 | sk = tcp_v4_lookup(iph->daddr, th->dest, iph->saddr, | 516 | sk = inet_lookup(&tcp_hashinfo, iph->daddr, th->dest, iph->saddr, |
997 | th->source, tcp_v4_iif(skb)); | 517 | th->source, inet_iif(skb)); |
998 | if (!sk) { | 518 | if (!sk) { |
999 | ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); | 519 | ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); |
1000 | return; | 520 | return; |
1001 | } | 521 | } |
1002 | if (sk->sk_state == TCP_TIME_WAIT) { | 522 | if (sk->sk_state == TCP_TIME_WAIT) { |
1003 | tcp_tw_put((struct tcp_tw_bucket *)sk); | 523 | inet_twsk_put((struct inet_timewait_sock *)sk); |
1004 | return; | 524 | return; |
1005 | } | 525 | } |
1006 | 526 | ||
@@ -1054,8 +574,8 @@ void tcp_v4_err(struct sk_buff *skb, u32 info) | |||
1054 | if (sock_owned_by_user(sk)) | 574 | if (sock_owned_by_user(sk)) |
1055 | goto out; | 575 | goto out; |
1056 | 576 | ||
1057 | req = tcp_v4_search_req(tp, &prev, th->dest, | 577 | req = inet_csk_search_req(sk, &prev, th->dest, |
1058 | iph->daddr, iph->saddr); | 578 | iph->daddr, iph->saddr); |
1059 | if (!req) | 579 | if (!req) |
1060 | goto out; | 580 | goto out; |
1061 | 581 | ||
@@ -1075,7 +595,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 info) | |||
1075 | * created socket, and POSIX does not want network | 595 | * created socket, and POSIX does not want network |
1076 | * errors returned from accept(). | 596 | * errors returned from accept(). |
1077 | */ | 597 | */ |
1078 | tcp_synq_drop(sk, req, prev); | 598 | inet_csk_reqsk_queue_drop(sk, req, prev); |
1079 | goto out; | 599 | goto out; |
1080 | 600 | ||
1081 | case TCP_SYN_SENT: | 601 | case TCP_SYN_SENT: |
@@ -1245,12 +765,13 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, | |||
1245 | 765 | ||
1246 | static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) | 766 | static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) |
1247 | { | 767 | { |
1248 | struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk; | 768 | struct inet_timewait_sock *tw = inet_twsk(sk); |
769 | const struct tcp_timewait_sock *tcptw = tcp_twsk(sk); | ||
1249 | 770 | ||
1250 | tcp_v4_send_ack(skb, tw->tw_snd_nxt, tw->tw_rcv_nxt, | 771 | tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, |
1251 | tw->tw_rcv_wnd >> tw->tw_rcv_wscale, tw->tw_ts_recent); | 772 | tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcptw->tw_ts_recent); |
1252 | 773 | ||
1253 | tcp_tw_put(tw); | 774 | inet_twsk_put(tw); |
1254 | } | 775 | } |
1255 | 776 | ||
1256 | static void tcp_v4_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req) | 777 | static void tcp_v4_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req) |
@@ -1259,36 +780,6 @@ static void tcp_v4_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req) | |||
1259 | req->ts_recent); | 780 | req->ts_recent); |
1260 | } | 781 | } |
1261 | 782 | ||
1262 | static struct dst_entry* tcp_v4_route_req(struct sock *sk, | ||
1263 | struct request_sock *req) | ||
1264 | { | ||
1265 | struct rtable *rt; | ||
1266 | const struct inet_request_sock *ireq = inet_rsk(req); | ||
1267 | struct ip_options *opt = inet_rsk(req)->opt; | ||
1268 | struct flowi fl = { .oif = sk->sk_bound_dev_if, | ||
1269 | .nl_u = { .ip4_u = | ||
1270 | { .daddr = ((opt && opt->srr) ? | ||
1271 | opt->faddr : | ||
1272 | ireq->rmt_addr), | ||
1273 | .saddr = ireq->loc_addr, | ||
1274 | .tos = RT_CONN_FLAGS(sk) } }, | ||
1275 | .proto = IPPROTO_TCP, | ||
1276 | .uli_u = { .ports = | ||
1277 | { .sport = inet_sk(sk)->sport, | ||
1278 | .dport = ireq->rmt_port } } }; | ||
1279 | |||
1280 | if (ip_route_output_flow(&rt, &fl, sk, 0)) { | ||
1281 | IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); | ||
1282 | return NULL; | ||
1283 | } | ||
1284 | if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) { | ||
1285 | ip_rt_put(rt); | ||
1286 | IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); | ||
1287 | return NULL; | ||
1288 | } | ||
1289 | return &rt->u.dst; | ||
1290 | } | ||
1291 | |||
1292 | /* | 783 | /* |
1293 | * Send a SYN-ACK after having received an ACK. | 784 | * Send a SYN-ACK after having received an ACK. |
1294 | * This still operates on a request_sock only, not on a big | 785 | * This still operates on a request_sock only, not on a big |
@@ -1302,7 +793,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req, | |||
1302 | struct sk_buff * skb; | 793 | struct sk_buff * skb; |
1303 | 794 | ||
1304 | /* First, grab a route. */ | 795 | /* First, grab a route. */ |
1305 | if (!dst && (dst = tcp_v4_route_req(sk, req)) == NULL) | 796 | if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL) |
1306 | goto out; | 797 | goto out; |
1307 | 798 | ||
1308 | skb = tcp_make_synack(sk, dst, req); | 799 | skb = tcp_make_synack(sk, dst, req); |
@@ -1404,7 +895,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1404 | * limitations, they conserve resources and peer is | 895 | * limitations, they conserve resources and peer is |
1405 | * evidently real one. | 896 | * evidently real one. |
1406 | */ | 897 | */ |
1407 | if (tcp_synq_is_full(sk) && !isn) { | 898 | if (inet_csk_reqsk_queue_is_full(sk) && !isn) { |
1408 | #ifdef CONFIG_SYN_COOKIES | 899 | #ifdef CONFIG_SYN_COOKIES |
1409 | if (sysctl_tcp_syncookies) { | 900 | if (sysctl_tcp_syncookies) { |
1410 | want_cookie = 1; | 901 | want_cookie = 1; |
@@ -1418,7 +909,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1418 | * clogging syn queue with openreqs with exponentially increasing | 909 | * clogging syn queue with openreqs with exponentially increasing |
1419 | * timeout. | 910 | * timeout. |
1420 | */ | 911 | */ |
1421 | if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1) | 912 | if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) |
1422 | goto drop; | 913 | goto drop; |
1423 | 914 | ||
1424 | req = reqsk_alloc(&tcp_request_sock_ops); | 915 | req = reqsk_alloc(&tcp_request_sock_ops); |
@@ -1474,8 +965,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1474 | * are made in the function processing timewait state. | 965 | * are made in the function processing timewait state. |
1475 | */ | 966 | */ |
1476 | if (tmp_opt.saw_tstamp && | 967 | if (tmp_opt.saw_tstamp && |
1477 | sysctl_tcp_tw_recycle && | 968 | tcp_death_row.sysctl_tw_recycle && |
1478 | (dst = tcp_v4_route_req(sk, req)) != NULL && | 969 | (dst = inet_csk_route_req(sk, req)) != NULL && |
1479 | (peer = rt_get_peer((struct rtable *)dst)) != NULL && | 970 | (peer = rt_get_peer((struct rtable *)dst)) != NULL && |
1480 | peer->v4daddr == saddr) { | 971 | peer->v4daddr == saddr) { |
1481 | if (xtime.tv_sec < peer->tcp_ts_stamp + TCP_PAWS_MSL && | 972 | if (xtime.tv_sec < peer->tcp_ts_stamp + TCP_PAWS_MSL && |
@@ -1488,7 +979,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1488 | } | 979 | } |
1489 | /* Kill the following clause, if you dislike this way. */ | 980 | /* Kill the following clause, if you dislike this way. */ |
1490 | else if (!sysctl_tcp_syncookies && | 981 | else if (!sysctl_tcp_syncookies && |
1491 | (sysctl_max_syn_backlog - tcp_synq_len(sk) < | 982 | (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < |
1492 | (sysctl_max_syn_backlog >> 2)) && | 983 | (sysctl_max_syn_backlog >> 2)) && |
1493 | (!peer || !peer->tcp_ts_stamp) && | 984 | (!peer || !peer->tcp_ts_stamp) && |
1494 | (!dst || !dst_metric(dst, RTAX_RTT))) { | 985 | (!dst || !dst_metric(dst, RTAX_RTT))) { |
@@ -1499,11 +990,10 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1499 | * to destinations, already remembered | 990 | * to destinations, already remembered |
1500 | * to the moment of synflood. | 991 | * to the moment of synflood. |
1501 | */ | 992 | */ |
1502 | LIMIT_NETDEBUG(printk(KERN_DEBUG "TCP: drop open " | 993 | LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open " |
1503 | "request from %u.%u." | 994 | "request from %u.%u.%u.%u/%u\n", |
1504 | "%u.%u/%u\n", | 995 | NIPQUAD(saddr), |
1505 | NIPQUAD(saddr), | 996 | ntohs(skb->h.th->source)); |
1506 | ntohs(skb->h.th->source))); | ||
1507 | dst_release(dst); | 997 | dst_release(dst); |
1508 | goto drop_and_free; | 998 | goto drop_and_free; |
1509 | } | 999 | } |
@@ -1518,7 +1008,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1518 | if (want_cookie) { | 1008 | if (want_cookie) { |
1519 | reqsk_free(req); | 1009 | reqsk_free(req); |
1520 | } else { | 1010 | } else { |
1521 | tcp_v4_synq_add(sk, req); | 1011 | inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); |
1522 | } | 1012 | } |
1523 | return 0; | 1013 | return 0; |
1524 | 1014 | ||
@@ -1546,15 +1036,14 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
1546 | if (sk_acceptq_is_full(sk)) | 1036 | if (sk_acceptq_is_full(sk)) |
1547 | goto exit_overflow; | 1037 | goto exit_overflow; |
1548 | 1038 | ||
1549 | if (!dst && (dst = tcp_v4_route_req(sk, req)) == NULL) | 1039 | if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL) |
1550 | goto exit; | 1040 | goto exit; |
1551 | 1041 | ||
1552 | newsk = tcp_create_openreq_child(sk, req, skb); | 1042 | newsk = tcp_create_openreq_child(sk, req, skb); |
1553 | if (!newsk) | 1043 | if (!newsk) |
1554 | goto exit; | 1044 | goto exit; |
1555 | 1045 | ||
1556 | newsk->sk_dst_cache = dst; | 1046 | sk_setup_caps(newsk, dst); |
1557 | tcp_v4_setup_caps(newsk, dst); | ||
1558 | 1047 | ||
1559 | newtp = tcp_sk(newsk); | 1048 | newtp = tcp_sk(newsk); |
1560 | newinet = inet_sk(newsk); | 1049 | newinet = inet_sk(newsk); |
@@ -1564,7 +1053,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
1564 | newinet->saddr = ireq->loc_addr; | 1053 | newinet->saddr = ireq->loc_addr; |
1565 | newinet->opt = ireq->opt; | 1054 | newinet->opt = ireq->opt; |
1566 | ireq->opt = NULL; | 1055 | ireq->opt = NULL; |
1567 | newinet->mc_index = tcp_v4_iif(skb); | 1056 | newinet->mc_index = inet_iif(skb); |
1568 | newinet->mc_ttl = skb->nh.iph->ttl; | 1057 | newinet->mc_ttl = skb->nh.iph->ttl; |
1569 | newtp->ext_header_len = 0; | 1058 | newtp->ext_header_len = 0; |
1570 | if (newinet->opt) | 1059 | if (newinet->opt) |
@@ -1575,8 +1064,8 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
1575 | newtp->advmss = dst_metric(dst, RTAX_ADVMSS); | 1064 | newtp->advmss = dst_metric(dst, RTAX_ADVMSS); |
1576 | tcp_initialize_rcv_mss(newsk); | 1065 | tcp_initialize_rcv_mss(newsk); |
1577 | 1066 | ||
1578 | __tcp_v4_hash(newsk, 0); | 1067 | __inet_hash(&tcp_hashinfo, newsk, 0); |
1579 | __tcp_inherit_port(sk, newsk); | 1068 | __inet_inherit_port(&tcp_hashinfo, sk, newsk); |
1580 | 1069 | ||
1581 | return newsk; | 1070 | return newsk; |
1582 | 1071 | ||
@@ -1592,27 +1081,24 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) | |||
1592 | { | 1081 | { |
1593 | struct tcphdr *th = skb->h.th; | 1082 | struct tcphdr *th = skb->h.th; |
1594 | struct iphdr *iph = skb->nh.iph; | 1083 | struct iphdr *iph = skb->nh.iph; |
1595 | struct tcp_sock *tp = tcp_sk(sk); | ||
1596 | struct sock *nsk; | 1084 | struct sock *nsk; |
1597 | struct request_sock **prev; | 1085 | struct request_sock **prev; |
1598 | /* Find possible connection requests. */ | 1086 | /* Find possible connection requests. */ |
1599 | struct request_sock *req = tcp_v4_search_req(tp, &prev, th->source, | 1087 | struct request_sock *req = inet_csk_search_req(sk, &prev, th->source, |
1600 | iph->saddr, iph->daddr); | 1088 | iph->saddr, iph->daddr); |
1601 | if (req) | 1089 | if (req) |
1602 | return tcp_check_req(sk, skb, req, prev); | 1090 | return tcp_check_req(sk, skb, req, prev); |
1603 | 1091 | ||
1604 | nsk = __tcp_v4_lookup_established(skb->nh.iph->saddr, | 1092 | nsk = __inet_lookup_established(&tcp_hashinfo, skb->nh.iph->saddr, |
1605 | th->source, | 1093 | th->source, skb->nh.iph->daddr, |
1606 | skb->nh.iph->daddr, | 1094 | ntohs(th->dest), inet_iif(skb)); |
1607 | ntohs(th->dest), | ||
1608 | tcp_v4_iif(skb)); | ||
1609 | 1095 | ||
1610 | if (nsk) { | 1096 | if (nsk) { |
1611 | if (nsk->sk_state != TCP_TIME_WAIT) { | 1097 | if (nsk->sk_state != TCP_TIME_WAIT) { |
1612 | bh_lock_sock(nsk); | 1098 | bh_lock_sock(nsk); |
1613 | return nsk; | 1099 | return nsk; |
1614 | } | 1100 | } |
1615 | tcp_tw_put((struct tcp_tw_bucket *)nsk); | 1101 | inet_twsk_put((struct inet_timewait_sock *)nsk); |
1616 | return NULL; | 1102 | return NULL; |
1617 | } | 1103 | } |
1618 | 1104 | ||
@@ -1631,7 +1117,7 @@ static int tcp_v4_checksum_init(struct sk_buff *skb) | |||
1631 | skb->nh.iph->daddr, skb->csum)) | 1117 | skb->nh.iph->daddr, skb->csum)) |
1632 | return 0; | 1118 | return 0; |
1633 | 1119 | ||
1634 | LIMIT_NETDEBUG(printk(KERN_DEBUG "hw tcp v4 csum failed\n")); | 1120 | LIMIT_NETDEBUG(KERN_DEBUG "hw tcp v4 csum failed\n"); |
1635 | skb->ip_summed = CHECKSUM_NONE; | 1121 | skb->ip_summed = CHECKSUM_NONE; |
1636 | } | 1122 | } |
1637 | if (skb->len <= 76) { | 1123 | if (skb->len <= 76) { |
@@ -1747,9 +1233,9 @@ int tcp_v4_rcv(struct sk_buff *skb) | |||
1747 | TCP_SKB_CB(skb)->flags = skb->nh.iph->tos; | 1233 | TCP_SKB_CB(skb)->flags = skb->nh.iph->tos; |
1748 | TCP_SKB_CB(skb)->sacked = 0; | 1234 | TCP_SKB_CB(skb)->sacked = 0; |
1749 | 1235 | ||
1750 | sk = __tcp_v4_lookup(skb->nh.iph->saddr, th->source, | 1236 | sk = __inet_lookup(&tcp_hashinfo, skb->nh.iph->saddr, th->source, |
1751 | skb->nh.iph->daddr, ntohs(th->dest), | 1237 | skb->nh.iph->daddr, ntohs(th->dest), |
1752 | tcp_v4_iif(skb)); | 1238 | inet_iif(skb)); |
1753 | 1239 | ||
1754 | if (!sk) | 1240 | if (!sk) |
1755 | goto no_tcp_socket; | 1241 | goto no_tcp_socket; |
@@ -1801,24 +1287,26 @@ discard_and_relse: | |||
1801 | 1287 | ||
1802 | do_time_wait: | 1288 | do_time_wait: |
1803 | if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { | 1289 | if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { |
1804 | tcp_tw_put((struct tcp_tw_bucket *) sk); | 1290 | inet_twsk_put((struct inet_timewait_sock *) sk); |
1805 | goto discard_it; | 1291 | goto discard_it; |
1806 | } | 1292 | } |
1807 | 1293 | ||
1808 | if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) { | 1294 | if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) { |
1809 | TCP_INC_STATS_BH(TCP_MIB_INERRS); | 1295 | TCP_INC_STATS_BH(TCP_MIB_INERRS); |
1810 | tcp_tw_put((struct tcp_tw_bucket *) sk); | 1296 | inet_twsk_put((struct inet_timewait_sock *) sk); |
1811 | goto discard_it; | 1297 | goto discard_it; |
1812 | } | 1298 | } |
1813 | switch (tcp_timewait_state_process((struct tcp_tw_bucket *)sk, | 1299 | switch (tcp_timewait_state_process((struct inet_timewait_sock *)sk, |
1814 | skb, th, skb->len)) { | 1300 | skb, th)) { |
1815 | case TCP_TW_SYN: { | 1301 | case TCP_TW_SYN: { |
1816 | struct sock *sk2 = tcp_v4_lookup_listener(skb->nh.iph->daddr, | 1302 | struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo, |
1817 | ntohs(th->dest), | 1303 | skb->nh.iph->daddr, |
1818 | tcp_v4_iif(skb)); | 1304 | ntohs(th->dest), |
1305 | inet_iif(skb)); | ||
1819 | if (sk2) { | 1306 | if (sk2) { |
1820 | tcp_tw_deschedule((struct tcp_tw_bucket *)sk); | 1307 | inet_twsk_deschedule((struct inet_timewait_sock *)sk, |
1821 | tcp_tw_put((struct tcp_tw_bucket *)sk); | 1308 | &tcp_death_row); |
1309 | inet_twsk_put((struct inet_timewait_sock *)sk); | ||
1822 | sk = sk2; | 1310 | sk = sk2; |
1823 | goto process; | 1311 | goto process; |
1824 | } | 1312 | } |
@@ -1834,112 +1322,6 @@ do_time_wait: | |||
1834 | goto discard_it; | 1322 | goto discard_it; |
1835 | } | 1323 | } |
1836 | 1324 | ||
1837 | /* With per-bucket locks this operation is not-atomic, so that | ||
1838 | * this version is not worse. | ||
1839 | */ | ||
1840 | static void __tcp_v4_rehash(struct sock *sk) | ||
1841 | { | ||
1842 | sk->sk_prot->unhash(sk); | ||
1843 | sk->sk_prot->hash(sk); | ||
1844 | } | ||
1845 | |||
1846 | static int tcp_v4_reselect_saddr(struct sock *sk) | ||
1847 | { | ||
1848 | struct inet_sock *inet = inet_sk(sk); | ||
1849 | int err; | ||
1850 | struct rtable *rt; | ||
1851 | __u32 old_saddr = inet->saddr; | ||
1852 | __u32 new_saddr; | ||
1853 | __u32 daddr = inet->daddr; | ||
1854 | |||
1855 | if (inet->opt && inet->opt->srr) | ||
1856 | daddr = inet->opt->faddr; | ||
1857 | |||
1858 | /* Query new route. */ | ||
1859 | err = ip_route_connect(&rt, daddr, 0, | ||
1860 | RT_CONN_FLAGS(sk), | ||
1861 | sk->sk_bound_dev_if, | ||
1862 | IPPROTO_TCP, | ||
1863 | inet->sport, inet->dport, sk); | ||
1864 | if (err) | ||
1865 | return err; | ||
1866 | |||
1867 | __sk_dst_set(sk, &rt->u.dst); | ||
1868 | tcp_v4_setup_caps(sk, &rt->u.dst); | ||
1869 | |||
1870 | new_saddr = rt->rt_src; | ||
1871 | |||
1872 | if (new_saddr == old_saddr) | ||
1873 | return 0; | ||
1874 | |||
1875 | if (sysctl_ip_dynaddr > 1) { | ||
1876 | printk(KERN_INFO "tcp_v4_rebuild_header(): shifting inet->" | ||
1877 | "saddr from %d.%d.%d.%d to %d.%d.%d.%d\n", | ||
1878 | NIPQUAD(old_saddr), | ||
1879 | NIPQUAD(new_saddr)); | ||
1880 | } | ||
1881 | |||
1882 | inet->saddr = new_saddr; | ||
1883 | inet->rcv_saddr = new_saddr; | ||
1884 | |||
1885 | /* XXX The only one ugly spot where we need to | ||
1886 | * XXX really change the sockets identity after | ||
1887 | * XXX it has entered the hashes. -DaveM | ||
1888 | * | ||
1889 | * Besides that, it does not check for connection | ||
1890 | * uniqueness. Wait for troubles. | ||
1891 | */ | ||
1892 | __tcp_v4_rehash(sk); | ||
1893 | return 0; | ||
1894 | } | ||
1895 | |||
1896 | int tcp_v4_rebuild_header(struct sock *sk) | ||
1897 | { | ||
1898 | struct inet_sock *inet = inet_sk(sk); | ||
1899 | struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0); | ||
1900 | u32 daddr; | ||
1901 | int err; | ||
1902 | |||
1903 | /* Route is OK, nothing to do. */ | ||
1904 | if (rt) | ||
1905 | return 0; | ||
1906 | |||
1907 | /* Reroute. */ | ||
1908 | daddr = inet->daddr; | ||
1909 | if (inet->opt && inet->opt->srr) | ||
1910 | daddr = inet->opt->faddr; | ||
1911 | |||
1912 | { | ||
1913 | struct flowi fl = { .oif = sk->sk_bound_dev_if, | ||
1914 | .nl_u = { .ip4_u = | ||
1915 | { .daddr = daddr, | ||
1916 | .saddr = inet->saddr, | ||
1917 | .tos = RT_CONN_FLAGS(sk) } }, | ||
1918 | .proto = IPPROTO_TCP, | ||
1919 | .uli_u = { .ports = | ||
1920 | { .sport = inet->sport, | ||
1921 | .dport = inet->dport } } }; | ||
1922 | |||
1923 | err = ip_route_output_flow(&rt, &fl, sk, 0); | ||
1924 | } | ||
1925 | if (!err) { | ||
1926 | __sk_dst_set(sk, &rt->u.dst); | ||
1927 | tcp_v4_setup_caps(sk, &rt->u.dst); | ||
1928 | return 0; | ||
1929 | } | ||
1930 | |||
1931 | /* Routing failed... */ | ||
1932 | sk->sk_route_caps = 0; | ||
1933 | |||
1934 | if (!sysctl_ip_dynaddr || | ||
1935 | sk->sk_state != TCP_SYN_SENT || | ||
1936 | (sk->sk_userlocks & SOCK_BINDADDR_LOCK) || | ||
1937 | (err = tcp_v4_reselect_saddr(sk)) != 0) | ||
1938 | sk->sk_err_soft = -err; | ||
1939 | |||
1940 | return err; | ||
1941 | } | ||
1942 | |||
1943 | static void v4_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr) | 1325 | static void v4_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr) |
1944 | { | 1326 | { |
1945 | struct sockaddr_in *sin = (struct sockaddr_in *) uaddr; | 1327 | struct sockaddr_in *sin = (struct sockaddr_in *) uaddr; |
@@ -1988,18 +1370,18 @@ int tcp_v4_remember_stamp(struct sock *sk) | |||
1988 | return 0; | 1370 | return 0; |
1989 | } | 1371 | } |
1990 | 1372 | ||
1991 | int tcp_v4_tw_remember_stamp(struct tcp_tw_bucket *tw) | 1373 | int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw) |
1992 | { | 1374 | { |
1993 | struct inet_peer *peer = NULL; | 1375 | struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1); |
1994 | |||
1995 | peer = inet_getpeer(tw->tw_daddr, 1); | ||
1996 | 1376 | ||
1997 | if (peer) { | 1377 | if (peer) { |
1998 | if ((s32)(peer->tcp_ts - tw->tw_ts_recent) <= 0 || | 1378 | const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); |
1379 | |||
1380 | if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 || | ||
1999 | (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec && | 1381 | (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec && |
2000 | peer->tcp_ts_stamp <= tw->tw_ts_recent_stamp)) { | 1382 | peer->tcp_ts_stamp <= tcptw->tw_ts_recent_stamp)) { |
2001 | peer->tcp_ts_stamp = tw->tw_ts_recent_stamp; | 1383 | peer->tcp_ts_stamp = tcptw->tw_ts_recent_stamp; |
2002 | peer->tcp_ts = tw->tw_ts_recent; | 1384 | peer->tcp_ts = tcptw->tw_ts_recent; |
2003 | } | 1385 | } |
2004 | inet_putpeer(peer); | 1386 | inet_putpeer(peer); |
2005 | return 1; | 1387 | return 1; |
@@ -2011,7 +1393,7 @@ int tcp_v4_tw_remember_stamp(struct tcp_tw_bucket *tw) | |||
2011 | struct tcp_func ipv4_specific = { | 1393 | struct tcp_func ipv4_specific = { |
2012 | .queue_xmit = ip_queue_xmit, | 1394 | .queue_xmit = ip_queue_xmit, |
2013 | .send_check = tcp_v4_send_check, | 1395 | .send_check = tcp_v4_send_check, |
2014 | .rebuild_header = tcp_v4_rebuild_header, | 1396 | .rebuild_header = inet_sk_rebuild_header, |
2015 | .conn_request = tcp_v4_conn_request, | 1397 | .conn_request = tcp_v4_conn_request, |
2016 | .syn_recv_sock = tcp_v4_syn_recv_sock, | 1398 | .syn_recv_sock = tcp_v4_syn_recv_sock, |
2017 | .remember_stamp = tcp_v4_remember_stamp, | 1399 | .remember_stamp = tcp_v4_remember_stamp, |
@@ -2027,13 +1409,14 @@ struct tcp_func ipv4_specific = { | |||
2027 | */ | 1409 | */ |
2028 | static int tcp_v4_init_sock(struct sock *sk) | 1410 | static int tcp_v4_init_sock(struct sock *sk) |
2029 | { | 1411 | { |
1412 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
2030 | struct tcp_sock *tp = tcp_sk(sk); | 1413 | struct tcp_sock *tp = tcp_sk(sk); |
2031 | 1414 | ||
2032 | skb_queue_head_init(&tp->out_of_order_queue); | 1415 | skb_queue_head_init(&tp->out_of_order_queue); |
2033 | tcp_init_xmit_timers(sk); | 1416 | tcp_init_xmit_timers(sk); |
2034 | tcp_prequeue_init(tp); | 1417 | tcp_prequeue_init(tp); |
2035 | 1418 | ||
2036 | tp->rto = TCP_TIMEOUT_INIT; | 1419 | icsk->icsk_rto = TCP_TIMEOUT_INIT; |
2037 | tp->mdev = TCP_TIMEOUT_INIT; | 1420 | tp->mdev = TCP_TIMEOUT_INIT; |
2038 | 1421 | ||
2039 | /* So many TCP implementations out there (incorrectly) count the | 1422 | /* So many TCP implementations out there (incorrectly) count the |
@@ -2051,7 +1434,7 @@ static int tcp_v4_init_sock(struct sock *sk) | |||
2051 | tp->mss_cache = 536; | 1434 | tp->mss_cache = 536; |
2052 | 1435 | ||
2053 | tp->reordering = sysctl_tcp_reordering; | 1436 | tp->reordering = sysctl_tcp_reordering; |
2054 | tp->ca_ops = &tcp_init_congestion_ops; | 1437 | icsk->icsk_ca_ops = &tcp_init_congestion_ops; |
2055 | 1438 | ||
2056 | sk->sk_state = TCP_CLOSE; | 1439 | sk->sk_state = TCP_CLOSE; |
2057 | 1440 | ||
@@ -2074,7 +1457,7 @@ int tcp_v4_destroy_sock(struct sock *sk) | |||
2074 | 1457 | ||
2075 | tcp_clear_xmit_timers(sk); | 1458 | tcp_clear_xmit_timers(sk); |
2076 | 1459 | ||
2077 | tcp_cleanup_congestion_control(tp); | 1460 | tcp_cleanup_congestion_control(sk); |
2078 | 1461 | ||
2079 | /* Cleanup up the write buffer. */ | 1462 | /* Cleanup up the write buffer. */ |
2080 | sk_stream_writequeue_purge(sk); | 1463 | sk_stream_writequeue_purge(sk); |
@@ -2086,8 +1469,8 @@ int tcp_v4_destroy_sock(struct sock *sk) | |||
2086 | __skb_queue_purge(&tp->ucopy.prequeue); | 1469 | __skb_queue_purge(&tp->ucopy.prequeue); |
2087 | 1470 | ||
2088 | /* Clean up a referenced TCP bind bucket. */ | 1471 | /* Clean up a referenced TCP bind bucket. */ |
2089 | if (tp->bind_hash) | 1472 | if (inet_csk(sk)->icsk_bind_hash) |
2090 | tcp_put_port(sk); | 1473 | inet_put_port(&tcp_hashinfo, sk); |
2091 | 1474 | ||
2092 | /* | 1475 | /* |
2093 | * If sendmsg cached page exists, toss it. | 1476 | * If sendmsg cached page exists, toss it. |
@@ -2107,13 +1490,13 @@ EXPORT_SYMBOL(tcp_v4_destroy_sock); | |||
2107 | #ifdef CONFIG_PROC_FS | 1490 | #ifdef CONFIG_PROC_FS |
2108 | /* Proc filesystem TCP sock list dumping. */ | 1491 | /* Proc filesystem TCP sock list dumping. */ |
2109 | 1492 | ||
2110 | static inline struct tcp_tw_bucket *tw_head(struct hlist_head *head) | 1493 | static inline struct inet_timewait_sock *tw_head(struct hlist_head *head) |
2111 | { | 1494 | { |
2112 | return hlist_empty(head) ? NULL : | 1495 | return hlist_empty(head) ? NULL : |
2113 | list_entry(head->first, struct tcp_tw_bucket, tw_node); | 1496 | list_entry(head->first, struct inet_timewait_sock, tw_node); |
2114 | } | 1497 | } |
2115 | 1498 | ||
2116 | static inline struct tcp_tw_bucket *tw_next(struct tcp_tw_bucket *tw) | 1499 | static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw) |
2117 | { | 1500 | { |
2118 | return tw->tw_node.next ? | 1501 | return tw->tw_node.next ? |
2119 | hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL; | 1502 | hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL; |
@@ -2121,14 +1504,14 @@ static inline struct tcp_tw_bucket *tw_next(struct tcp_tw_bucket *tw) | |||
2121 | 1504 | ||
2122 | static void *listening_get_next(struct seq_file *seq, void *cur) | 1505 | static void *listening_get_next(struct seq_file *seq, void *cur) |
2123 | { | 1506 | { |
2124 | struct tcp_sock *tp; | 1507 | struct inet_connection_sock *icsk; |
2125 | struct hlist_node *node; | 1508 | struct hlist_node *node; |
2126 | struct sock *sk = cur; | 1509 | struct sock *sk = cur; |
2127 | struct tcp_iter_state* st = seq->private; | 1510 | struct tcp_iter_state* st = seq->private; |
2128 | 1511 | ||
2129 | if (!sk) { | 1512 | if (!sk) { |
2130 | st->bucket = 0; | 1513 | st->bucket = 0; |
2131 | sk = sk_head(&tcp_listening_hash[0]); | 1514 | sk = sk_head(&tcp_hashinfo.listening_hash[0]); |
2132 | goto get_sk; | 1515 | goto get_sk; |
2133 | } | 1516 | } |
2134 | 1517 | ||
@@ -2137,7 +1520,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur) | |||
2137 | if (st->state == TCP_SEQ_STATE_OPENREQ) { | 1520 | if (st->state == TCP_SEQ_STATE_OPENREQ) { |
2138 | struct request_sock *req = cur; | 1521 | struct request_sock *req = cur; |
2139 | 1522 | ||
2140 | tp = tcp_sk(st->syn_wait_sk); | 1523 | icsk = inet_csk(st->syn_wait_sk); |
2141 | req = req->dl_next; | 1524 | req = req->dl_next; |
2142 | while (1) { | 1525 | while (1) { |
2143 | while (req) { | 1526 | while (req) { |
@@ -2150,17 +1533,17 @@ static void *listening_get_next(struct seq_file *seq, void *cur) | |||
2150 | if (++st->sbucket >= TCP_SYNQ_HSIZE) | 1533 | if (++st->sbucket >= TCP_SYNQ_HSIZE) |
2151 | break; | 1534 | break; |
2152 | get_req: | 1535 | get_req: |
2153 | req = tp->accept_queue.listen_opt->syn_table[st->sbucket]; | 1536 | req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket]; |
2154 | } | 1537 | } |
2155 | sk = sk_next(st->syn_wait_sk); | 1538 | sk = sk_next(st->syn_wait_sk); |
2156 | st->state = TCP_SEQ_STATE_LISTENING; | 1539 | st->state = TCP_SEQ_STATE_LISTENING; |
2157 | read_unlock_bh(&tp->accept_queue.syn_wait_lock); | 1540 | read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); |
2158 | } else { | 1541 | } else { |
2159 | tp = tcp_sk(sk); | 1542 | icsk = inet_csk(sk); |
2160 | read_lock_bh(&tp->accept_queue.syn_wait_lock); | 1543 | read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); |
2161 | if (reqsk_queue_len(&tp->accept_queue)) | 1544 | if (reqsk_queue_len(&icsk->icsk_accept_queue)) |
2162 | goto start_req; | 1545 | goto start_req; |
2163 | read_unlock_bh(&tp->accept_queue.syn_wait_lock); | 1546 | read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); |
2164 | sk = sk_next(sk); | 1547 | sk = sk_next(sk); |
2165 | } | 1548 | } |
2166 | get_sk: | 1549 | get_sk: |
@@ -2169,9 +1552,9 @@ get_sk: | |||
2169 | cur = sk; | 1552 | cur = sk; |
2170 | goto out; | 1553 | goto out; |
2171 | } | 1554 | } |
2172 | tp = tcp_sk(sk); | 1555 | icsk = inet_csk(sk); |
2173 | read_lock_bh(&tp->accept_queue.syn_wait_lock); | 1556 | read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); |
2174 | if (reqsk_queue_len(&tp->accept_queue)) { | 1557 | if (reqsk_queue_len(&icsk->icsk_accept_queue)) { |
2175 | start_req: | 1558 | start_req: |
2176 | st->uid = sock_i_uid(sk); | 1559 | st->uid = sock_i_uid(sk); |
2177 | st->syn_wait_sk = sk; | 1560 | st->syn_wait_sk = sk; |
@@ -2179,10 +1562,10 @@ start_req: | |||
2179 | st->sbucket = 0; | 1562 | st->sbucket = 0; |
2180 | goto get_req; | 1563 | goto get_req; |
2181 | } | 1564 | } |
2182 | read_unlock_bh(&tp->accept_queue.syn_wait_lock); | 1565 | read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); |
2183 | } | 1566 | } |
2184 | if (++st->bucket < TCP_LHTABLE_SIZE) { | 1567 | if (++st->bucket < INET_LHTABLE_SIZE) { |
2185 | sk = sk_head(&tcp_listening_hash[st->bucket]); | 1568 | sk = sk_head(&tcp_hashinfo.listening_hash[st->bucket]); |
2186 | goto get_sk; | 1569 | goto get_sk; |
2187 | } | 1570 | } |
2188 | cur = NULL; | 1571 | cur = NULL; |
@@ -2206,16 +1589,16 @@ static void *established_get_first(struct seq_file *seq) | |||
2206 | struct tcp_iter_state* st = seq->private; | 1589 | struct tcp_iter_state* st = seq->private; |
2207 | void *rc = NULL; | 1590 | void *rc = NULL; |
2208 | 1591 | ||
2209 | for (st->bucket = 0; st->bucket < tcp_ehash_size; ++st->bucket) { | 1592 | for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) { |
2210 | struct sock *sk; | 1593 | struct sock *sk; |
2211 | struct hlist_node *node; | 1594 | struct hlist_node *node; |
2212 | struct tcp_tw_bucket *tw; | 1595 | struct inet_timewait_sock *tw; |
2213 | 1596 | ||
2214 | /* We can reschedule _before_ having picked the target: */ | 1597 | /* We can reschedule _before_ having picked the target: */ |
2215 | cond_resched_softirq(); | 1598 | cond_resched_softirq(); |
2216 | 1599 | ||
2217 | read_lock(&tcp_ehash[st->bucket].lock); | 1600 | read_lock(&tcp_hashinfo.ehash[st->bucket].lock); |
2218 | sk_for_each(sk, node, &tcp_ehash[st->bucket].chain) { | 1601 | sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { |
2219 | if (sk->sk_family != st->family) { | 1602 | if (sk->sk_family != st->family) { |
2220 | continue; | 1603 | continue; |
2221 | } | 1604 | } |
@@ -2223,15 +1606,15 @@ static void *established_get_first(struct seq_file *seq) | |||
2223 | goto out; | 1606 | goto out; |
2224 | } | 1607 | } |
2225 | st->state = TCP_SEQ_STATE_TIME_WAIT; | 1608 | st->state = TCP_SEQ_STATE_TIME_WAIT; |
2226 | tw_for_each(tw, node, | 1609 | inet_twsk_for_each(tw, node, |
2227 | &tcp_ehash[st->bucket + tcp_ehash_size].chain) { | 1610 | &tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain) { |
2228 | if (tw->tw_family != st->family) { | 1611 | if (tw->tw_family != st->family) { |
2229 | continue; | 1612 | continue; |
2230 | } | 1613 | } |
2231 | rc = tw; | 1614 | rc = tw; |
2232 | goto out; | 1615 | goto out; |
2233 | } | 1616 | } |
2234 | read_unlock(&tcp_ehash[st->bucket].lock); | 1617 | read_unlock(&tcp_hashinfo.ehash[st->bucket].lock); |
2235 | st->state = TCP_SEQ_STATE_ESTABLISHED; | 1618 | st->state = TCP_SEQ_STATE_ESTABLISHED; |
2236 | } | 1619 | } |
2237 | out: | 1620 | out: |
@@ -2241,7 +1624,7 @@ out: | |||
2241 | static void *established_get_next(struct seq_file *seq, void *cur) | 1624 | static void *established_get_next(struct seq_file *seq, void *cur) |
2242 | { | 1625 | { |
2243 | struct sock *sk = cur; | 1626 | struct sock *sk = cur; |
2244 | struct tcp_tw_bucket *tw; | 1627 | struct inet_timewait_sock *tw; |
2245 | struct hlist_node *node; | 1628 | struct hlist_node *node; |
2246 | struct tcp_iter_state* st = seq->private; | 1629 | struct tcp_iter_state* st = seq->private; |
2247 | 1630 | ||
@@ -2258,15 +1641,15 @@ get_tw: | |||
2258 | cur = tw; | 1641 | cur = tw; |
2259 | goto out; | 1642 | goto out; |
2260 | } | 1643 | } |
2261 | read_unlock(&tcp_ehash[st->bucket].lock); | 1644 | read_unlock(&tcp_hashinfo.ehash[st->bucket].lock); |
2262 | st->state = TCP_SEQ_STATE_ESTABLISHED; | 1645 | st->state = TCP_SEQ_STATE_ESTABLISHED; |
2263 | 1646 | ||
2264 | /* We can reschedule between buckets: */ | 1647 | /* We can reschedule between buckets: */ |
2265 | cond_resched_softirq(); | 1648 | cond_resched_softirq(); |
2266 | 1649 | ||
2267 | if (++st->bucket < tcp_ehash_size) { | 1650 | if (++st->bucket < tcp_hashinfo.ehash_size) { |
2268 | read_lock(&tcp_ehash[st->bucket].lock); | 1651 | read_lock(&tcp_hashinfo.ehash[st->bucket].lock); |
2269 | sk = sk_head(&tcp_ehash[st->bucket].chain); | 1652 | sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain); |
2270 | } else { | 1653 | } else { |
2271 | cur = NULL; | 1654 | cur = NULL; |
2272 | goto out; | 1655 | goto out; |
@@ -2280,7 +1663,7 @@ get_tw: | |||
2280 | } | 1663 | } |
2281 | 1664 | ||
2282 | st->state = TCP_SEQ_STATE_TIME_WAIT; | 1665 | st->state = TCP_SEQ_STATE_TIME_WAIT; |
2283 | tw = tw_head(&tcp_ehash[st->bucket + tcp_ehash_size].chain); | 1666 | tw = tw_head(&tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain); |
2284 | goto get_tw; | 1667 | goto get_tw; |
2285 | found: | 1668 | found: |
2286 | cur = sk; | 1669 | cur = sk; |
@@ -2304,12 +1687,12 @@ static void *tcp_get_idx(struct seq_file *seq, loff_t pos) | |||
2304 | void *rc; | 1687 | void *rc; |
2305 | struct tcp_iter_state* st = seq->private; | 1688 | struct tcp_iter_state* st = seq->private; |
2306 | 1689 | ||
2307 | tcp_listen_lock(); | 1690 | inet_listen_lock(&tcp_hashinfo); |
2308 | st->state = TCP_SEQ_STATE_LISTENING; | 1691 | st->state = TCP_SEQ_STATE_LISTENING; |
2309 | rc = listening_get_idx(seq, &pos); | 1692 | rc = listening_get_idx(seq, &pos); |
2310 | 1693 | ||
2311 | if (!rc) { | 1694 | if (!rc) { |
2312 | tcp_listen_unlock(); | 1695 | inet_listen_unlock(&tcp_hashinfo); |
2313 | local_bh_disable(); | 1696 | local_bh_disable(); |
2314 | st->state = TCP_SEQ_STATE_ESTABLISHED; | 1697 | st->state = TCP_SEQ_STATE_ESTABLISHED; |
2315 | rc = established_get_idx(seq, pos); | 1698 | rc = established_get_idx(seq, pos); |
@@ -2342,7 +1725,7 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |||
2342 | case TCP_SEQ_STATE_LISTENING: | 1725 | case TCP_SEQ_STATE_LISTENING: |
2343 | rc = listening_get_next(seq, v); | 1726 | rc = listening_get_next(seq, v); |
2344 | if (!rc) { | 1727 | if (!rc) { |
2345 | tcp_listen_unlock(); | 1728 | inet_listen_unlock(&tcp_hashinfo); |
2346 | local_bh_disable(); | 1729 | local_bh_disable(); |
2347 | st->state = TCP_SEQ_STATE_ESTABLISHED; | 1730 | st->state = TCP_SEQ_STATE_ESTABLISHED; |
2348 | rc = established_get_first(seq); | 1731 | rc = established_get_first(seq); |
@@ -2365,17 +1748,17 @@ static void tcp_seq_stop(struct seq_file *seq, void *v) | |||
2365 | switch (st->state) { | 1748 | switch (st->state) { |
2366 | case TCP_SEQ_STATE_OPENREQ: | 1749 | case TCP_SEQ_STATE_OPENREQ: |
2367 | if (v) { | 1750 | if (v) { |
2368 | struct tcp_sock *tp = tcp_sk(st->syn_wait_sk); | 1751 | struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk); |
2369 | read_unlock_bh(&tp->accept_queue.syn_wait_lock); | 1752 | read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); |
2370 | } | 1753 | } |
2371 | case TCP_SEQ_STATE_LISTENING: | 1754 | case TCP_SEQ_STATE_LISTENING: |
2372 | if (v != SEQ_START_TOKEN) | 1755 | if (v != SEQ_START_TOKEN) |
2373 | tcp_listen_unlock(); | 1756 | inet_listen_unlock(&tcp_hashinfo); |
2374 | break; | 1757 | break; |
2375 | case TCP_SEQ_STATE_TIME_WAIT: | 1758 | case TCP_SEQ_STATE_TIME_WAIT: |
2376 | case TCP_SEQ_STATE_ESTABLISHED: | 1759 | case TCP_SEQ_STATE_ESTABLISHED: |
2377 | if (v) | 1760 | if (v) |
2378 | read_unlock(&tcp_ehash[st->bucket].lock); | 1761 | read_unlock(&tcp_hashinfo.ehash[st->bucket].lock); |
2379 | local_bh_enable(); | 1762 | local_bh_enable(); |
2380 | break; | 1763 | break; |
2381 | } | 1764 | } |
@@ -2472,18 +1855,19 @@ static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i) | |||
2472 | int timer_active; | 1855 | int timer_active; |
2473 | unsigned long timer_expires; | 1856 | unsigned long timer_expires; |
2474 | struct tcp_sock *tp = tcp_sk(sp); | 1857 | struct tcp_sock *tp = tcp_sk(sp); |
1858 | const struct inet_connection_sock *icsk = inet_csk(sp); | ||
2475 | struct inet_sock *inet = inet_sk(sp); | 1859 | struct inet_sock *inet = inet_sk(sp); |
2476 | unsigned int dest = inet->daddr; | 1860 | unsigned int dest = inet->daddr; |
2477 | unsigned int src = inet->rcv_saddr; | 1861 | unsigned int src = inet->rcv_saddr; |
2478 | __u16 destp = ntohs(inet->dport); | 1862 | __u16 destp = ntohs(inet->dport); |
2479 | __u16 srcp = ntohs(inet->sport); | 1863 | __u16 srcp = ntohs(inet->sport); |
2480 | 1864 | ||
2481 | if (tp->pending == TCP_TIME_RETRANS) { | 1865 | if (icsk->icsk_pending == ICSK_TIME_RETRANS) { |
2482 | timer_active = 1; | 1866 | timer_active = 1; |
2483 | timer_expires = tp->timeout; | 1867 | timer_expires = icsk->icsk_timeout; |
2484 | } else if (tp->pending == TCP_TIME_PROBE0) { | 1868 | } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { |
2485 | timer_active = 4; | 1869 | timer_active = 4; |
2486 | timer_expires = tp->timeout; | 1870 | timer_expires = icsk->icsk_timeout; |
2487 | } else if (timer_pending(&sp->sk_timer)) { | 1871 | } else if (timer_pending(&sp->sk_timer)) { |
2488 | timer_active = 2; | 1872 | timer_active = 2; |
2489 | timer_expires = sp->sk_timer.expires; | 1873 | timer_expires = sp->sk_timer.expires; |
@@ -2498,17 +1882,19 @@ static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i) | |||
2498 | tp->write_seq - tp->snd_una, tp->rcv_nxt - tp->copied_seq, | 1882 | tp->write_seq - tp->snd_una, tp->rcv_nxt - tp->copied_seq, |
2499 | timer_active, | 1883 | timer_active, |
2500 | jiffies_to_clock_t(timer_expires - jiffies), | 1884 | jiffies_to_clock_t(timer_expires - jiffies), |
2501 | tp->retransmits, | 1885 | icsk->icsk_retransmits, |
2502 | sock_i_uid(sp), | 1886 | sock_i_uid(sp), |
2503 | tp->probes_out, | 1887 | icsk->icsk_probes_out, |
2504 | sock_i_ino(sp), | 1888 | sock_i_ino(sp), |
2505 | atomic_read(&sp->sk_refcnt), sp, | 1889 | atomic_read(&sp->sk_refcnt), sp, |
2506 | tp->rto, tp->ack.ato, (tp->ack.quick << 1) | tp->ack.pingpong, | 1890 | icsk->icsk_rto, |
1891 | icsk->icsk_ack.ato, | ||
1892 | (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, | ||
2507 | tp->snd_cwnd, | 1893 | tp->snd_cwnd, |
2508 | tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh); | 1894 | tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh); |
2509 | } | 1895 | } |
2510 | 1896 | ||
2511 | static void get_timewait4_sock(struct tcp_tw_bucket *tw, char *tmpbuf, int i) | 1897 | static void get_timewait4_sock(struct inet_timewait_sock *tw, char *tmpbuf, int i) |
2512 | { | 1898 | { |
2513 | unsigned int dest, src; | 1899 | unsigned int dest, src; |
2514 | __u16 destp, srcp; | 1900 | __u16 destp, srcp; |
@@ -2588,7 +1974,7 @@ struct proto tcp_prot = { | |||
2588 | .close = tcp_close, | 1974 | .close = tcp_close, |
2589 | .connect = tcp_v4_connect, | 1975 | .connect = tcp_v4_connect, |
2590 | .disconnect = tcp_disconnect, | 1976 | .disconnect = tcp_disconnect, |
2591 | .accept = tcp_accept, | 1977 | .accept = inet_csk_accept, |
2592 | .ioctl = tcp_ioctl, | 1978 | .ioctl = tcp_ioctl, |
2593 | .init = tcp_v4_init_sock, | 1979 | .init = tcp_v4_init_sock, |
2594 | .destroy = tcp_v4_destroy_sock, | 1980 | .destroy = tcp_v4_destroy_sock, |
@@ -2603,6 +1989,7 @@ struct proto tcp_prot = { | |||
2603 | .get_port = tcp_v4_get_port, | 1989 | .get_port = tcp_v4_get_port, |
2604 | .enter_memory_pressure = tcp_enter_memory_pressure, | 1990 | .enter_memory_pressure = tcp_enter_memory_pressure, |
2605 | .sockets_allocated = &tcp_sockets_allocated, | 1991 | .sockets_allocated = &tcp_sockets_allocated, |
1992 | .orphan_count = &tcp_orphan_count, | ||
2606 | .memory_allocated = &tcp_memory_allocated, | 1993 | .memory_allocated = &tcp_memory_allocated, |
2607 | .memory_pressure = &tcp_memory_pressure, | 1994 | .memory_pressure = &tcp_memory_pressure, |
2608 | .sysctl_mem = sysctl_tcp_mem, | 1995 | .sysctl_mem = sysctl_tcp_mem, |
@@ -2610,6 +1997,7 @@ struct proto tcp_prot = { | |||
2610 | .sysctl_rmem = sysctl_tcp_rmem, | 1997 | .sysctl_rmem = sysctl_tcp_rmem, |
2611 | .max_header = MAX_TCP_HEADER, | 1998 | .max_header = MAX_TCP_HEADER, |
2612 | .obj_size = sizeof(struct tcp_sock), | 1999 | .obj_size = sizeof(struct tcp_sock), |
2000 | .twsk_obj_size = sizeof(struct tcp_timewait_sock), | ||
2613 | .rsk_prot = &tcp_request_sock_ops, | 2001 | .rsk_prot = &tcp_request_sock_ops, |
2614 | }; | 2002 | }; |
2615 | 2003 | ||
@@ -2631,19 +2019,13 @@ void __init tcp_v4_init(struct net_proto_family *ops) | |||
2631 | } | 2019 | } |
2632 | 2020 | ||
2633 | EXPORT_SYMBOL(ipv4_specific); | 2021 | EXPORT_SYMBOL(ipv4_specific); |
2634 | EXPORT_SYMBOL(tcp_bind_hash); | 2022 | EXPORT_SYMBOL(inet_bind_bucket_create); |
2635 | EXPORT_SYMBOL(tcp_bucket_create); | ||
2636 | EXPORT_SYMBOL(tcp_hashinfo); | 2023 | EXPORT_SYMBOL(tcp_hashinfo); |
2637 | EXPORT_SYMBOL(tcp_inherit_port); | ||
2638 | EXPORT_SYMBOL(tcp_listen_wlock); | ||
2639 | EXPORT_SYMBOL(tcp_port_rover); | ||
2640 | EXPORT_SYMBOL(tcp_prot); | 2024 | EXPORT_SYMBOL(tcp_prot); |
2641 | EXPORT_SYMBOL(tcp_put_port); | ||
2642 | EXPORT_SYMBOL(tcp_unhash); | 2025 | EXPORT_SYMBOL(tcp_unhash); |
2643 | EXPORT_SYMBOL(tcp_v4_conn_request); | 2026 | EXPORT_SYMBOL(tcp_v4_conn_request); |
2644 | EXPORT_SYMBOL(tcp_v4_connect); | 2027 | EXPORT_SYMBOL(tcp_v4_connect); |
2645 | EXPORT_SYMBOL(tcp_v4_do_rcv); | 2028 | EXPORT_SYMBOL(tcp_v4_do_rcv); |
2646 | EXPORT_SYMBOL(tcp_v4_rebuild_header); | ||
2647 | EXPORT_SYMBOL(tcp_v4_remember_stamp); | 2029 | EXPORT_SYMBOL(tcp_v4_remember_stamp); |
2648 | EXPORT_SYMBOL(tcp_v4_send_check); | 2030 | EXPORT_SYMBOL(tcp_v4_send_check); |
2649 | EXPORT_SYMBOL(tcp_v4_syn_recv_sock); | 2031 | EXPORT_SYMBOL(tcp_v4_syn_recv_sock); |
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index f42a284164b7..a88db28b0af7 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c | |||
@@ -35,13 +35,27 @@ | |||
35 | #define SYNC_INIT 1 | 35 | #define SYNC_INIT 1 |
36 | #endif | 36 | #endif |
37 | 37 | ||
38 | int sysctl_tcp_tw_recycle; | ||
39 | int sysctl_tcp_max_tw_buckets = NR_FILE*2; | ||
40 | |||
41 | int sysctl_tcp_syncookies = SYNC_INIT; | 38 | int sysctl_tcp_syncookies = SYNC_INIT; |
42 | int sysctl_tcp_abort_on_overflow; | 39 | int sysctl_tcp_abort_on_overflow; |
43 | 40 | ||
44 | static void tcp_tw_schedule(struct tcp_tw_bucket *tw, int timeo); | 41 | struct inet_timewait_death_row tcp_death_row = { |
42 | .sysctl_max_tw_buckets = NR_FILE * 2, | ||
43 | .period = TCP_TIMEWAIT_LEN / INET_TWDR_TWKILL_SLOTS, | ||
44 | .death_lock = SPIN_LOCK_UNLOCKED, | ||
45 | .hashinfo = &tcp_hashinfo, | ||
46 | .tw_timer = TIMER_INITIALIZER(inet_twdr_hangman, 0, | ||
47 | (unsigned long)&tcp_death_row), | ||
48 | .twkill_work = __WORK_INITIALIZER(tcp_death_row.twkill_work, | ||
49 | inet_twdr_twkill_work, | ||
50 | &tcp_death_row), | ||
51 | /* Short-time timewait calendar */ | ||
52 | |||
53 | .twcal_hand = -1, | ||
54 | .twcal_timer = TIMER_INITIALIZER(inet_twdr_twcal_tick, 0, | ||
55 | (unsigned long)&tcp_death_row), | ||
56 | }; | ||
57 | |||
58 | EXPORT_SYMBOL_GPL(tcp_death_row); | ||
45 | 59 | ||
46 | static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) | 60 | static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) |
47 | { | 61 | { |
@@ -52,47 +66,6 @@ static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) | |||
52 | return (seq == e_win && seq == end_seq); | 66 | return (seq == e_win && seq == end_seq); |
53 | } | 67 | } |
54 | 68 | ||
55 | /* New-style handling of TIME_WAIT sockets. */ | ||
56 | |||
57 | int tcp_tw_count; | ||
58 | |||
59 | |||
60 | /* Must be called with locally disabled BHs. */ | ||
61 | static void tcp_timewait_kill(struct tcp_tw_bucket *tw) | ||
62 | { | ||
63 | struct tcp_ehash_bucket *ehead; | ||
64 | struct tcp_bind_hashbucket *bhead; | ||
65 | struct tcp_bind_bucket *tb; | ||
66 | |||
67 | /* Unlink from established hashes. */ | ||
68 | ehead = &tcp_ehash[tw->tw_hashent]; | ||
69 | write_lock(&ehead->lock); | ||
70 | if (hlist_unhashed(&tw->tw_node)) { | ||
71 | write_unlock(&ehead->lock); | ||
72 | return; | ||
73 | } | ||
74 | __hlist_del(&tw->tw_node); | ||
75 | sk_node_init(&tw->tw_node); | ||
76 | write_unlock(&ehead->lock); | ||
77 | |||
78 | /* Disassociate with bind bucket. */ | ||
79 | bhead = &tcp_bhash[tcp_bhashfn(tw->tw_num)]; | ||
80 | spin_lock(&bhead->lock); | ||
81 | tb = tw->tw_tb; | ||
82 | __hlist_del(&tw->tw_bind_node); | ||
83 | tw->tw_tb = NULL; | ||
84 | tcp_bucket_destroy(tb); | ||
85 | spin_unlock(&bhead->lock); | ||
86 | |||
87 | #ifdef INET_REFCNT_DEBUG | ||
88 | if (atomic_read(&tw->tw_refcnt) != 1) { | ||
89 | printk(KERN_DEBUG "tw_bucket %p refcnt=%d\n", tw, | ||
90 | atomic_read(&tw->tw_refcnt)); | ||
91 | } | ||
92 | #endif | ||
93 | tcp_tw_put(tw); | ||
94 | } | ||
95 | |||
96 | /* | 69 | /* |
97 | * * Main purpose of TIME-WAIT state is to close connection gracefully, | 70 | * * Main purpose of TIME-WAIT state is to close connection gracefully, |
98 | * when one of ends sits in LAST-ACK or CLOSING retransmitting FIN | 71 | * when one of ends sits in LAST-ACK or CLOSING retransmitting FIN |
@@ -122,19 +95,20 @@ static void tcp_timewait_kill(struct tcp_tw_bucket *tw) | |||
122 | * to avoid misread sequence numbers, states etc. --ANK | 95 | * to avoid misread sequence numbers, states etc. --ANK |
123 | */ | 96 | */ |
124 | enum tcp_tw_status | 97 | enum tcp_tw_status |
125 | tcp_timewait_state_process(struct tcp_tw_bucket *tw, struct sk_buff *skb, | 98 | tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, |
126 | struct tcphdr *th, unsigned len) | 99 | const struct tcphdr *th) |
127 | { | 100 | { |
101 | struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); | ||
128 | struct tcp_options_received tmp_opt; | 102 | struct tcp_options_received tmp_opt; |
129 | int paws_reject = 0; | 103 | int paws_reject = 0; |
130 | 104 | ||
131 | tmp_opt.saw_tstamp = 0; | 105 | tmp_opt.saw_tstamp = 0; |
132 | if (th->doff > (sizeof(struct tcphdr) >> 2) && tw->tw_ts_recent_stamp) { | 106 | if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) { |
133 | tcp_parse_options(skb, &tmp_opt, 0); | 107 | tcp_parse_options(skb, &tmp_opt, 0); |
134 | 108 | ||
135 | if (tmp_opt.saw_tstamp) { | 109 | if (tmp_opt.saw_tstamp) { |
136 | tmp_opt.ts_recent = tw->tw_ts_recent; | 110 | tmp_opt.ts_recent = tcptw->tw_ts_recent; |
137 | tmp_opt.ts_recent_stamp = tw->tw_ts_recent_stamp; | 111 | tmp_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; |
138 | paws_reject = tcp_paws_check(&tmp_opt, th->rst); | 112 | paws_reject = tcp_paws_check(&tmp_opt, th->rst); |
139 | } | 113 | } |
140 | } | 114 | } |
@@ -145,20 +119,20 @@ tcp_timewait_state_process(struct tcp_tw_bucket *tw, struct sk_buff *skb, | |||
145 | /* Out of window, send ACK */ | 119 | /* Out of window, send ACK */ |
146 | if (paws_reject || | 120 | if (paws_reject || |
147 | !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq, | 121 | !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq, |
148 | tw->tw_rcv_nxt, | 122 | tcptw->tw_rcv_nxt, |
149 | tw->tw_rcv_nxt + tw->tw_rcv_wnd)) | 123 | tcptw->tw_rcv_nxt + tcptw->tw_rcv_wnd)) |
150 | return TCP_TW_ACK; | 124 | return TCP_TW_ACK; |
151 | 125 | ||
152 | if (th->rst) | 126 | if (th->rst) |
153 | goto kill; | 127 | goto kill; |
154 | 128 | ||
155 | if (th->syn && !before(TCP_SKB_CB(skb)->seq, tw->tw_rcv_nxt)) | 129 | if (th->syn && !before(TCP_SKB_CB(skb)->seq, tcptw->tw_rcv_nxt)) |
156 | goto kill_with_rst; | 130 | goto kill_with_rst; |
157 | 131 | ||
158 | /* Dup ACK? */ | 132 | /* Dup ACK? */ |
159 | if (!after(TCP_SKB_CB(skb)->end_seq, tw->tw_rcv_nxt) || | 133 | if (!after(TCP_SKB_CB(skb)->end_seq, tcptw->tw_rcv_nxt) || |
160 | TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq) { | 134 | TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq) { |
161 | tcp_tw_put(tw); | 135 | inet_twsk_put(tw); |
162 | return TCP_TW_SUCCESS; | 136 | return TCP_TW_SUCCESS; |
163 | } | 137 | } |
164 | 138 | ||
@@ -166,19 +140,19 @@ tcp_timewait_state_process(struct tcp_tw_bucket *tw, struct sk_buff *skb, | |||
166 | * reset. | 140 | * reset. |
167 | */ | 141 | */ |
168 | if (!th->fin || | 142 | if (!th->fin || |
169 | TCP_SKB_CB(skb)->end_seq != tw->tw_rcv_nxt + 1) { | 143 | TCP_SKB_CB(skb)->end_seq != tcptw->tw_rcv_nxt + 1) { |
170 | kill_with_rst: | 144 | kill_with_rst: |
171 | tcp_tw_deschedule(tw); | 145 | inet_twsk_deschedule(tw, &tcp_death_row); |
172 | tcp_tw_put(tw); | 146 | inet_twsk_put(tw); |
173 | return TCP_TW_RST; | 147 | return TCP_TW_RST; |
174 | } | 148 | } |
175 | 149 | ||
176 | /* FIN arrived, enter true time-wait state. */ | 150 | /* FIN arrived, enter true time-wait state. */ |
177 | tw->tw_substate = TCP_TIME_WAIT; | 151 | tw->tw_substate = TCP_TIME_WAIT; |
178 | tw->tw_rcv_nxt = TCP_SKB_CB(skb)->end_seq; | 152 | tcptw->tw_rcv_nxt = TCP_SKB_CB(skb)->end_seq; |
179 | if (tmp_opt.saw_tstamp) { | 153 | if (tmp_opt.saw_tstamp) { |
180 | tw->tw_ts_recent_stamp = xtime.tv_sec; | 154 | tcptw->tw_ts_recent_stamp = xtime.tv_sec; |
181 | tw->tw_ts_recent = tmp_opt.rcv_tsval; | 155 | tcptw->tw_ts_recent = tmp_opt.rcv_tsval; |
182 | } | 156 | } |
183 | 157 | ||
184 | /* I am shamed, but failed to make it more elegant. | 158 | /* I am shamed, but failed to make it more elegant. |
@@ -187,11 +161,13 @@ kill_with_rst: | |||
187 | * do not undertsnad recycling in any case, it not | 161 | * do not undertsnad recycling in any case, it not |
188 | * a big problem in practice. --ANK */ | 162 | * a big problem in practice. --ANK */ |
189 | if (tw->tw_family == AF_INET && | 163 | if (tw->tw_family == AF_INET && |
190 | sysctl_tcp_tw_recycle && tw->tw_ts_recent_stamp && | 164 | tcp_death_row.sysctl_tw_recycle && tcptw->tw_ts_recent_stamp && |
191 | tcp_v4_tw_remember_stamp(tw)) | 165 | tcp_v4_tw_remember_stamp(tw)) |
192 | tcp_tw_schedule(tw, tw->tw_timeout); | 166 | inet_twsk_schedule(tw, &tcp_death_row, tw->tw_timeout, |
167 | TCP_TIMEWAIT_LEN); | ||
193 | else | 168 | else |
194 | tcp_tw_schedule(tw, TCP_TIMEWAIT_LEN); | 169 | inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN, |
170 | TCP_TIMEWAIT_LEN); | ||
195 | return TCP_TW_ACK; | 171 | return TCP_TW_ACK; |
196 | } | 172 | } |
197 | 173 | ||
@@ -213,7 +189,7 @@ kill_with_rst: | |||
213 | */ | 189 | */ |
214 | 190 | ||
215 | if (!paws_reject && | 191 | if (!paws_reject && |
216 | (TCP_SKB_CB(skb)->seq == tw->tw_rcv_nxt && | 192 | (TCP_SKB_CB(skb)->seq == tcptw->tw_rcv_nxt && |
217 | (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq || th->rst))) { | 193 | (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq || th->rst))) { |
218 | /* In window segment, it may be only reset or bare ack. */ | 194 | /* In window segment, it may be only reset or bare ack. */ |
219 | 195 | ||
@@ -224,19 +200,20 @@ kill_with_rst: | |||
224 | */ | 200 | */ |
225 | if (sysctl_tcp_rfc1337 == 0) { | 201 | if (sysctl_tcp_rfc1337 == 0) { |
226 | kill: | 202 | kill: |
227 | tcp_tw_deschedule(tw); | 203 | inet_twsk_deschedule(tw, &tcp_death_row); |
228 | tcp_tw_put(tw); | 204 | inet_twsk_put(tw); |
229 | return TCP_TW_SUCCESS; | 205 | return TCP_TW_SUCCESS; |
230 | } | 206 | } |
231 | } | 207 | } |
232 | tcp_tw_schedule(tw, TCP_TIMEWAIT_LEN); | 208 | inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN, |
209 | TCP_TIMEWAIT_LEN); | ||
233 | 210 | ||
234 | if (tmp_opt.saw_tstamp) { | 211 | if (tmp_opt.saw_tstamp) { |
235 | tw->tw_ts_recent = tmp_opt.rcv_tsval; | 212 | tcptw->tw_ts_recent = tmp_opt.rcv_tsval; |
236 | tw->tw_ts_recent_stamp = xtime.tv_sec; | 213 | tcptw->tw_ts_recent_stamp = xtime.tv_sec; |
237 | } | 214 | } |
238 | 215 | ||
239 | tcp_tw_put(tw); | 216 | inet_twsk_put(tw); |
240 | return TCP_TW_SUCCESS; | 217 | return TCP_TW_SUCCESS; |
241 | } | 218 | } |
242 | 219 | ||
@@ -258,9 +235,10 @@ kill: | |||
258 | */ | 235 | */ |
259 | 236 | ||
260 | if (th->syn && !th->rst && !th->ack && !paws_reject && | 237 | if (th->syn && !th->rst && !th->ack && !paws_reject && |
261 | (after(TCP_SKB_CB(skb)->seq, tw->tw_rcv_nxt) || | 238 | (after(TCP_SKB_CB(skb)->seq, tcptw->tw_rcv_nxt) || |
262 | (tmp_opt.saw_tstamp && (s32)(tw->tw_ts_recent - tmp_opt.rcv_tsval) < 0))) { | 239 | (tmp_opt.saw_tstamp && |
263 | u32 isn = tw->tw_snd_nxt + 65535 + 2; | 240 | (s32)(tcptw->tw_ts_recent - tmp_opt.rcv_tsval) < 0))) { |
241 | u32 isn = tcptw->tw_snd_nxt + 65535 + 2; | ||
264 | if (isn == 0) | 242 | if (isn == 0) |
265 | isn++; | 243 | isn++; |
266 | TCP_SKB_CB(skb)->when = isn; | 244 | TCP_SKB_CB(skb)->when = isn; |
@@ -278,107 +256,57 @@ kill: | |||
278 | * Do not reschedule in the last case. | 256 | * Do not reschedule in the last case. |
279 | */ | 257 | */ |
280 | if (paws_reject || th->ack) | 258 | if (paws_reject || th->ack) |
281 | tcp_tw_schedule(tw, TCP_TIMEWAIT_LEN); | 259 | inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN, |
260 | TCP_TIMEWAIT_LEN); | ||
282 | 261 | ||
283 | /* Send ACK. Note, we do not put the bucket, | 262 | /* Send ACK. Note, we do not put the bucket, |
284 | * it will be released by caller. | 263 | * it will be released by caller. |
285 | */ | 264 | */ |
286 | return TCP_TW_ACK; | 265 | return TCP_TW_ACK; |
287 | } | 266 | } |
288 | tcp_tw_put(tw); | 267 | inet_twsk_put(tw); |
289 | return TCP_TW_SUCCESS; | 268 | return TCP_TW_SUCCESS; |
290 | } | 269 | } |
291 | 270 | ||
292 | /* Enter the time wait state. This is called with locally disabled BH. | ||
293 | * Essentially we whip up a timewait bucket, copy the | ||
294 | * relevant info into it from the SK, and mess with hash chains | ||
295 | * and list linkage. | ||
296 | */ | ||
297 | static void __tcp_tw_hashdance(struct sock *sk, struct tcp_tw_bucket *tw) | ||
298 | { | ||
299 | struct tcp_ehash_bucket *ehead = &tcp_ehash[sk->sk_hashent]; | ||
300 | struct tcp_bind_hashbucket *bhead; | ||
301 | |||
302 | /* Step 1: Put TW into bind hash. Original socket stays there too. | ||
303 | Note, that any socket with inet_sk(sk)->num != 0 MUST be bound in | ||
304 | binding cache, even if it is closed. | ||
305 | */ | ||
306 | bhead = &tcp_bhash[tcp_bhashfn(inet_sk(sk)->num)]; | ||
307 | spin_lock(&bhead->lock); | ||
308 | tw->tw_tb = tcp_sk(sk)->bind_hash; | ||
309 | BUG_TRAP(tcp_sk(sk)->bind_hash); | ||
310 | tw_add_bind_node(tw, &tw->tw_tb->owners); | ||
311 | spin_unlock(&bhead->lock); | ||
312 | |||
313 | write_lock(&ehead->lock); | ||
314 | |||
315 | /* Step 2: Remove SK from established hash. */ | ||
316 | if (__sk_del_node_init(sk)) | ||
317 | sock_prot_dec_use(sk->sk_prot); | ||
318 | |||
319 | /* Step 3: Hash TW into TIMEWAIT half of established hash table. */ | ||
320 | tw_add_node(tw, &(ehead + tcp_ehash_size)->chain); | ||
321 | atomic_inc(&tw->tw_refcnt); | ||
322 | |||
323 | write_unlock(&ehead->lock); | ||
324 | } | ||
325 | |||
326 | /* | 271 | /* |
327 | * Move a socket to time-wait or dead fin-wait-2 state. | 272 | * Move a socket to time-wait or dead fin-wait-2 state. |
328 | */ | 273 | */ |
329 | void tcp_time_wait(struct sock *sk, int state, int timeo) | 274 | void tcp_time_wait(struct sock *sk, int state, int timeo) |
330 | { | 275 | { |
331 | struct tcp_tw_bucket *tw = NULL; | 276 | struct inet_timewait_sock *tw = NULL; |
332 | struct tcp_sock *tp = tcp_sk(sk); | 277 | const struct tcp_sock *tp = tcp_sk(sk); |
333 | int recycle_ok = 0; | 278 | int recycle_ok = 0; |
334 | 279 | ||
335 | if (sysctl_tcp_tw_recycle && tp->rx_opt.ts_recent_stamp) | 280 | if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp) |
336 | recycle_ok = tp->af_specific->remember_stamp(sk); | 281 | recycle_ok = tp->af_specific->remember_stamp(sk); |
337 | 282 | ||
338 | if (tcp_tw_count < sysctl_tcp_max_tw_buckets) | 283 | if (tcp_death_row.tw_count < tcp_death_row.sysctl_max_tw_buckets) |
339 | tw = kmem_cache_alloc(tcp_timewait_cachep, SLAB_ATOMIC); | 284 | tw = inet_twsk_alloc(sk, state); |
340 | |||
341 | if(tw != NULL) { | ||
342 | struct inet_sock *inet = inet_sk(sk); | ||
343 | int rto = (tp->rto<<2) - (tp->rto>>1); | ||
344 | |||
345 | /* Give us an identity. */ | ||
346 | tw->tw_daddr = inet->daddr; | ||
347 | tw->tw_rcv_saddr = inet->rcv_saddr; | ||
348 | tw->tw_bound_dev_if = sk->sk_bound_dev_if; | ||
349 | tw->tw_num = inet->num; | ||
350 | tw->tw_state = TCP_TIME_WAIT; | ||
351 | tw->tw_substate = state; | ||
352 | tw->tw_sport = inet->sport; | ||
353 | tw->tw_dport = inet->dport; | ||
354 | tw->tw_family = sk->sk_family; | ||
355 | tw->tw_reuse = sk->sk_reuse; | ||
356 | tw->tw_rcv_wscale = tp->rx_opt.rcv_wscale; | ||
357 | atomic_set(&tw->tw_refcnt, 1); | ||
358 | 285 | ||
359 | tw->tw_hashent = sk->sk_hashent; | 286 | if (tw != NULL) { |
360 | tw->tw_rcv_nxt = tp->rcv_nxt; | 287 | struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); |
361 | tw->tw_snd_nxt = tp->snd_nxt; | 288 | const struct inet_connection_sock *icsk = inet_csk(sk); |
362 | tw->tw_rcv_wnd = tcp_receive_window(tp); | 289 | const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1); |
363 | tw->tw_ts_recent = tp->rx_opt.ts_recent; | 290 | |
364 | tw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp; | 291 | tw->tw_rcv_wscale = tp->rx_opt.rcv_wscale; |
365 | tw_dead_node_init(tw); | 292 | tcptw->tw_rcv_nxt = tp->rcv_nxt; |
293 | tcptw->tw_snd_nxt = tp->snd_nxt; | ||
294 | tcptw->tw_rcv_wnd = tcp_receive_window(tp); | ||
295 | tcptw->tw_ts_recent = tp->rx_opt.ts_recent; | ||
296 | tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp; | ||
366 | 297 | ||
367 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | 298 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
368 | if (tw->tw_family == PF_INET6) { | 299 | if (tw->tw_family == PF_INET6) { |
369 | struct ipv6_pinfo *np = inet6_sk(sk); | 300 | struct ipv6_pinfo *np = inet6_sk(sk); |
301 | struct tcp6_timewait_sock *tcp6tw = tcp6_twsk((struct sock *)tw); | ||
370 | 302 | ||
371 | ipv6_addr_copy(&tw->tw_v6_daddr, &np->daddr); | 303 | ipv6_addr_copy(&tcp6tw->tw_v6_daddr, &np->daddr); |
372 | ipv6_addr_copy(&tw->tw_v6_rcv_saddr, &np->rcv_saddr); | 304 | ipv6_addr_copy(&tcp6tw->tw_v6_rcv_saddr, &np->rcv_saddr); |
373 | tw->tw_v6_ipv6only = np->ipv6only; | 305 | tw->tw_ipv6only = np->ipv6only; |
374 | } else { | ||
375 | memset(&tw->tw_v6_daddr, 0, sizeof(tw->tw_v6_daddr)); | ||
376 | memset(&tw->tw_v6_rcv_saddr, 0, sizeof(tw->tw_v6_rcv_saddr)); | ||
377 | tw->tw_v6_ipv6only = 0; | ||
378 | } | 306 | } |
379 | #endif | 307 | #endif |
380 | /* Linkage updates. */ | 308 | /* Linkage updates. */ |
381 | __tcp_tw_hashdance(sk, tw); | 309 | __inet_twsk_hashdance(tw, sk, &tcp_hashinfo); |
382 | 310 | ||
383 | /* Get the TIME_WAIT timeout firing. */ | 311 | /* Get the TIME_WAIT timeout firing. */ |
384 | if (timeo < rto) | 312 | if (timeo < rto) |
@@ -392,8 +320,9 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) | |||
392 | timeo = TCP_TIMEWAIT_LEN; | 320 | timeo = TCP_TIMEWAIT_LEN; |
393 | } | 321 | } |
394 | 322 | ||
395 | tcp_tw_schedule(tw, timeo); | 323 | inet_twsk_schedule(tw, &tcp_death_row, timeo, |
396 | tcp_tw_put(tw); | 324 | TCP_TIMEWAIT_LEN); |
325 | inet_twsk_put(tw); | ||
397 | } else { | 326 | } else { |
398 | /* Sorry, if we're out of memory, just CLOSE this | 327 | /* Sorry, if we're out of memory, just CLOSE this |
399 | * socket up. We've got bigger problems than | 328 | * socket up. We've got bigger problems than |
@@ -407,277 +336,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) | |||
407 | tcp_done(sk); | 336 | tcp_done(sk); |
408 | } | 337 | } |
409 | 338 | ||
410 | /* Kill off TIME_WAIT sockets once their lifetime has expired. */ | ||
411 | static int tcp_tw_death_row_slot; | ||
412 | |||
413 | static void tcp_twkill(unsigned long); | ||
414 | |||
415 | /* TIME_WAIT reaping mechanism. */ | ||
416 | #define TCP_TWKILL_SLOTS 8 /* Please keep this a power of 2. */ | ||
417 | #define TCP_TWKILL_PERIOD (TCP_TIMEWAIT_LEN/TCP_TWKILL_SLOTS) | ||
418 | |||
419 | #define TCP_TWKILL_QUOTA 100 | ||
420 | |||
421 | static struct hlist_head tcp_tw_death_row[TCP_TWKILL_SLOTS]; | ||
422 | static DEFINE_SPINLOCK(tw_death_lock); | ||
423 | static struct timer_list tcp_tw_timer = TIMER_INITIALIZER(tcp_twkill, 0, 0); | ||
424 | static void twkill_work(void *); | ||
425 | static DECLARE_WORK(tcp_twkill_work, twkill_work, NULL); | ||
426 | static u32 twkill_thread_slots; | ||
427 | |||
428 | /* Returns non-zero if quota exceeded. */ | ||
429 | static int tcp_do_twkill_work(int slot, unsigned int quota) | ||
430 | { | ||
431 | struct tcp_tw_bucket *tw; | ||
432 | struct hlist_node *node; | ||
433 | unsigned int killed; | ||
434 | int ret; | ||
435 | |||
436 | /* NOTE: compare this to previous version where lock | ||
437 | * was released after detaching chain. It was racy, | ||
438 | * because tw buckets are scheduled in not serialized context | ||
439 | * in 2.3 (with netfilter), and with softnet it is common, because | ||
440 | * soft irqs are not sequenced. | ||
441 | */ | ||
442 | killed = 0; | ||
443 | ret = 0; | ||
444 | rescan: | ||
445 | tw_for_each_inmate(tw, node, &tcp_tw_death_row[slot]) { | ||
446 | __tw_del_dead_node(tw); | ||
447 | spin_unlock(&tw_death_lock); | ||
448 | tcp_timewait_kill(tw); | ||
449 | tcp_tw_put(tw); | ||
450 | killed++; | ||
451 | spin_lock(&tw_death_lock); | ||
452 | if (killed > quota) { | ||
453 | ret = 1; | ||
454 | break; | ||
455 | } | ||
456 | |||
457 | /* While we dropped tw_death_lock, another cpu may have | ||
458 | * killed off the next TW bucket in the list, therefore | ||
459 | * do a fresh re-read of the hlist head node with the | ||
460 | * lock reacquired. We still use the hlist traversal | ||
461 | * macro in order to get the prefetches. | ||
462 | */ | ||
463 | goto rescan; | ||
464 | } | ||
465 | |||
466 | tcp_tw_count -= killed; | ||
467 | NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITED, killed); | ||
468 | |||
469 | return ret; | ||
470 | } | ||
471 | |||
472 | static void tcp_twkill(unsigned long dummy) | ||
473 | { | ||
474 | int need_timer, ret; | ||
475 | |||
476 | spin_lock(&tw_death_lock); | ||
477 | |||
478 | if (tcp_tw_count == 0) | ||
479 | goto out; | ||
480 | |||
481 | need_timer = 0; | ||
482 | ret = tcp_do_twkill_work(tcp_tw_death_row_slot, TCP_TWKILL_QUOTA); | ||
483 | if (ret) { | ||
484 | twkill_thread_slots |= (1 << tcp_tw_death_row_slot); | ||
485 | mb(); | ||
486 | schedule_work(&tcp_twkill_work); | ||
487 | need_timer = 1; | ||
488 | } else { | ||
489 | /* We purged the entire slot, anything left? */ | ||
490 | if (tcp_tw_count) | ||
491 | need_timer = 1; | ||
492 | } | ||
493 | tcp_tw_death_row_slot = | ||
494 | ((tcp_tw_death_row_slot + 1) & (TCP_TWKILL_SLOTS - 1)); | ||
495 | if (need_timer) | ||
496 | mod_timer(&tcp_tw_timer, jiffies + TCP_TWKILL_PERIOD); | ||
497 | out: | ||
498 | spin_unlock(&tw_death_lock); | ||
499 | } | ||
500 | |||
501 | extern void twkill_slots_invalid(void); | ||
502 | |||
503 | static void twkill_work(void *dummy) | ||
504 | { | ||
505 | int i; | ||
506 | |||
507 | if ((TCP_TWKILL_SLOTS - 1) > (sizeof(twkill_thread_slots) * 8)) | ||
508 | twkill_slots_invalid(); | ||
509 | |||
510 | while (twkill_thread_slots) { | ||
511 | spin_lock_bh(&tw_death_lock); | ||
512 | for (i = 0; i < TCP_TWKILL_SLOTS; i++) { | ||
513 | if (!(twkill_thread_slots & (1 << i))) | ||
514 | continue; | ||
515 | |||
516 | while (tcp_do_twkill_work(i, TCP_TWKILL_QUOTA) != 0) { | ||
517 | if (need_resched()) { | ||
518 | spin_unlock_bh(&tw_death_lock); | ||
519 | schedule(); | ||
520 | spin_lock_bh(&tw_death_lock); | ||
521 | } | ||
522 | } | ||
523 | |||
524 | twkill_thread_slots &= ~(1 << i); | ||
525 | } | ||
526 | spin_unlock_bh(&tw_death_lock); | ||
527 | } | ||
528 | } | ||
529 | |||
530 | /* These are always called from BH context. See callers in | ||
531 | * tcp_input.c to verify this. | ||
532 | */ | ||
533 | |||
534 | /* This is for handling early-kills of TIME_WAIT sockets. */ | ||
535 | void tcp_tw_deschedule(struct tcp_tw_bucket *tw) | ||
536 | { | ||
537 | spin_lock(&tw_death_lock); | ||
538 | if (tw_del_dead_node(tw)) { | ||
539 | tcp_tw_put(tw); | ||
540 | if (--tcp_tw_count == 0) | ||
541 | del_timer(&tcp_tw_timer); | ||
542 | } | ||
543 | spin_unlock(&tw_death_lock); | ||
544 | tcp_timewait_kill(tw); | ||
545 | } | ||
546 | |||
547 | /* Short-time timewait calendar */ | ||
548 | |||
549 | static int tcp_twcal_hand = -1; | ||
550 | static int tcp_twcal_jiffie; | ||
551 | static void tcp_twcal_tick(unsigned long); | ||
552 | static struct timer_list tcp_twcal_timer = | ||
553 | TIMER_INITIALIZER(tcp_twcal_tick, 0, 0); | ||
554 | static struct hlist_head tcp_twcal_row[TCP_TW_RECYCLE_SLOTS]; | ||
555 | |||
556 | static void tcp_tw_schedule(struct tcp_tw_bucket *tw, int timeo) | ||
557 | { | ||
558 | struct hlist_head *list; | ||
559 | int slot; | ||
560 | |||
561 | /* timeout := RTO * 3.5 | ||
562 | * | ||
563 | * 3.5 = 1+2+0.5 to wait for two retransmits. | ||
564 | * | ||
565 | * RATIONALE: if FIN arrived and we entered TIME-WAIT state, | ||
566 | * our ACK acking that FIN can be lost. If N subsequent retransmitted | ||
567 | * FINs (or previous seqments) are lost (probability of such event | ||
568 | * is p^(N+1), where p is probability to lose single packet and | ||
569 | * time to detect the loss is about RTO*(2^N - 1) with exponential | ||
570 | * backoff). Normal timewait length is calculated so, that we | ||
571 | * waited at least for one retransmitted FIN (maximal RTO is 120sec). | ||
572 | * [ BTW Linux. following BSD, violates this requirement waiting | ||
573 | * only for 60sec, we should wait at least for 240 secs. | ||
574 | * Well, 240 consumes too much of resources 8) | ||
575 | * ] | ||
576 | * This interval is not reduced to catch old duplicate and | ||
577 | * responces to our wandering segments living for two MSLs. | ||
578 | * However, if we use PAWS to detect | ||
579 | * old duplicates, we can reduce the interval to bounds required | ||
580 | * by RTO, rather than MSL. So, if peer understands PAWS, we | ||
581 | * kill tw bucket after 3.5*RTO (it is important that this number | ||
582 | * is greater than TS tick!) and detect old duplicates with help | ||
583 | * of PAWS. | ||
584 | */ | ||
585 | slot = (timeo + (1<<TCP_TW_RECYCLE_TICK) - 1) >> TCP_TW_RECYCLE_TICK; | ||
586 | |||
587 | spin_lock(&tw_death_lock); | ||
588 | |||
589 | /* Unlink it, if it was scheduled */ | ||
590 | if (tw_del_dead_node(tw)) | ||
591 | tcp_tw_count--; | ||
592 | else | ||
593 | atomic_inc(&tw->tw_refcnt); | ||
594 | |||
595 | if (slot >= TCP_TW_RECYCLE_SLOTS) { | ||
596 | /* Schedule to slow timer */ | ||
597 | if (timeo >= TCP_TIMEWAIT_LEN) { | ||
598 | slot = TCP_TWKILL_SLOTS-1; | ||
599 | } else { | ||
600 | slot = (timeo + TCP_TWKILL_PERIOD-1) / TCP_TWKILL_PERIOD; | ||
601 | if (slot >= TCP_TWKILL_SLOTS) | ||
602 | slot = TCP_TWKILL_SLOTS-1; | ||
603 | } | ||
604 | tw->tw_ttd = jiffies + timeo; | ||
605 | slot = (tcp_tw_death_row_slot + slot) & (TCP_TWKILL_SLOTS - 1); | ||
606 | list = &tcp_tw_death_row[slot]; | ||
607 | } else { | ||
608 | tw->tw_ttd = jiffies + (slot << TCP_TW_RECYCLE_TICK); | ||
609 | |||
610 | if (tcp_twcal_hand < 0) { | ||
611 | tcp_twcal_hand = 0; | ||
612 | tcp_twcal_jiffie = jiffies; | ||
613 | tcp_twcal_timer.expires = tcp_twcal_jiffie + (slot<<TCP_TW_RECYCLE_TICK); | ||
614 | add_timer(&tcp_twcal_timer); | ||
615 | } else { | ||
616 | if (time_after(tcp_twcal_timer.expires, jiffies + (slot<<TCP_TW_RECYCLE_TICK))) | ||
617 | mod_timer(&tcp_twcal_timer, jiffies + (slot<<TCP_TW_RECYCLE_TICK)); | ||
618 | slot = (tcp_twcal_hand + slot)&(TCP_TW_RECYCLE_SLOTS-1); | ||
619 | } | ||
620 | list = &tcp_twcal_row[slot]; | ||
621 | } | ||
622 | |||
623 | hlist_add_head(&tw->tw_death_node, list); | ||
624 | |||
625 | if (tcp_tw_count++ == 0) | ||
626 | mod_timer(&tcp_tw_timer, jiffies+TCP_TWKILL_PERIOD); | ||
627 | spin_unlock(&tw_death_lock); | ||
628 | } | ||
629 | |||
630 | void tcp_twcal_tick(unsigned long dummy) | ||
631 | { | ||
632 | int n, slot; | ||
633 | unsigned long j; | ||
634 | unsigned long now = jiffies; | ||
635 | int killed = 0; | ||
636 | int adv = 0; | ||
637 | |||
638 | spin_lock(&tw_death_lock); | ||
639 | if (tcp_twcal_hand < 0) | ||
640 | goto out; | ||
641 | |||
642 | slot = tcp_twcal_hand; | ||
643 | j = tcp_twcal_jiffie; | ||
644 | |||
645 | for (n=0; n<TCP_TW_RECYCLE_SLOTS; n++) { | ||
646 | if (time_before_eq(j, now)) { | ||
647 | struct hlist_node *node, *safe; | ||
648 | struct tcp_tw_bucket *tw; | ||
649 | |||
650 | tw_for_each_inmate_safe(tw, node, safe, | ||
651 | &tcp_twcal_row[slot]) { | ||
652 | __tw_del_dead_node(tw); | ||
653 | tcp_timewait_kill(tw); | ||
654 | tcp_tw_put(tw); | ||
655 | killed++; | ||
656 | } | ||
657 | } else { | ||
658 | if (!adv) { | ||
659 | adv = 1; | ||
660 | tcp_twcal_jiffie = j; | ||
661 | tcp_twcal_hand = slot; | ||
662 | } | ||
663 | |||
664 | if (!hlist_empty(&tcp_twcal_row[slot])) { | ||
665 | mod_timer(&tcp_twcal_timer, j); | ||
666 | goto out; | ||
667 | } | ||
668 | } | ||
669 | j += (1<<TCP_TW_RECYCLE_TICK); | ||
670 | slot = (slot+1)&(TCP_TW_RECYCLE_SLOTS-1); | ||
671 | } | ||
672 | tcp_twcal_hand = -1; | ||
673 | |||
674 | out: | ||
675 | if ((tcp_tw_count -= killed) == 0) | ||
676 | del_timer(&tcp_tw_timer); | ||
677 | NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITKILLED, killed); | ||
678 | spin_unlock(&tw_death_lock); | ||
679 | } | ||
680 | |||
681 | /* This is not only more efficient than what we used to do, it eliminates | 339 | /* This is not only more efficient than what we used to do, it eliminates |
682 | * a lot of code duplication between IPv4/IPv6 SYN recv processing. -DaveM | 340 | * a lot of code duplication between IPv4/IPv6 SYN recv processing. -DaveM |
683 | * | 341 | * |
@@ -686,75 +344,27 @@ out: | |||
686 | */ | 344 | */ |
687 | struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, struct sk_buff *skb) | 345 | struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, struct sk_buff *skb) |
688 | { | 346 | { |
689 | /* allocate the newsk from the same slab of the master sock, | 347 | struct sock *newsk = inet_csk_clone(sk, req, GFP_ATOMIC); |
690 | * if not, at sk_free time we'll try to free it from the wrong | ||
691 | * slabcache (i.e. is it TCPv4 or v6?), this is handled thru sk->sk_prot -acme */ | ||
692 | struct sock *newsk = sk_alloc(PF_INET, GFP_ATOMIC, sk->sk_prot, 0); | ||
693 | 348 | ||
694 | if(newsk != NULL) { | 349 | if (newsk != NULL) { |
695 | struct inet_request_sock *ireq = inet_rsk(req); | 350 | const struct inet_request_sock *ireq = inet_rsk(req); |
696 | struct tcp_request_sock *treq = tcp_rsk(req); | 351 | struct tcp_request_sock *treq = tcp_rsk(req); |
352 | struct inet_connection_sock *newicsk = inet_csk(sk); | ||
697 | struct tcp_sock *newtp; | 353 | struct tcp_sock *newtp; |
698 | struct sk_filter *filter; | ||
699 | |||
700 | memcpy(newsk, sk, sizeof(struct tcp_sock)); | ||
701 | newsk->sk_state = TCP_SYN_RECV; | ||
702 | |||
703 | /* SANITY */ | ||
704 | sk_node_init(&newsk->sk_node); | ||
705 | tcp_sk(newsk)->bind_hash = NULL; | ||
706 | |||
707 | /* Clone the TCP header template */ | ||
708 | inet_sk(newsk)->dport = ireq->rmt_port; | ||
709 | |||
710 | sock_lock_init(newsk); | ||
711 | bh_lock_sock(newsk); | ||
712 | |||
713 | rwlock_init(&newsk->sk_dst_lock); | ||
714 | atomic_set(&newsk->sk_rmem_alloc, 0); | ||
715 | skb_queue_head_init(&newsk->sk_receive_queue); | ||
716 | atomic_set(&newsk->sk_wmem_alloc, 0); | ||
717 | skb_queue_head_init(&newsk->sk_write_queue); | ||
718 | atomic_set(&newsk->sk_omem_alloc, 0); | ||
719 | newsk->sk_wmem_queued = 0; | ||
720 | newsk->sk_forward_alloc = 0; | ||
721 | |||
722 | sock_reset_flag(newsk, SOCK_DONE); | ||
723 | newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK; | ||
724 | newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL; | ||
725 | newsk->sk_send_head = NULL; | ||
726 | rwlock_init(&newsk->sk_callback_lock); | ||
727 | skb_queue_head_init(&newsk->sk_error_queue); | ||
728 | newsk->sk_write_space = sk_stream_write_space; | ||
729 | |||
730 | if ((filter = newsk->sk_filter) != NULL) | ||
731 | sk_filter_charge(newsk, filter); | ||
732 | |||
733 | if (unlikely(xfrm_sk_clone_policy(newsk))) { | ||
734 | /* It is still raw copy of parent, so invalidate | ||
735 | * destructor and make plain sk_free() */ | ||
736 | newsk->sk_destruct = NULL; | ||
737 | sk_free(newsk); | ||
738 | return NULL; | ||
739 | } | ||
740 | 354 | ||
741 | /* Now setup tcp_sock */ | 355 | /* Now setup tcp_sock */ |
742 | newtp = tcp_sk(newsk); | 356 | newtp = tcp_sk(newsk); |
743 | newtp->pred_flags = 0; | 357 | newtp->pred_flags = 0; |
744 | newtp->rcv_nxt = treq->rcv_isn + 1; | 358 | newtp->rcv_nxt = treq->rcv_isn + 1; |
745 | newtp->snd_nxt = treq->snt_isn + 1; | 359 | newtp->snd_nxt = newtp->snd_una = newtp->snd_sml = treq->snt_isn + 1; |
746 | newtp->snd_una = treq->snt_isn + 1; | ||
747 | newtp->snd_sml = treq->snt_isn + 1; | ||
748 | 360 | ||
749 | tcp_prequeue_init(newtp); | 361 | tcp_prequeue_init(newtp); |
750 | 362 | ||
751 | tcp_init_wl(newtp, treq->snt_isn, treq->rcv_isn); | 363 | tcp_init_wl(newtp, treq->snt_isn, treq->rcv_isn); |
752 | 364 | ||
753 | newtp->retransmits = 0; | ||
754 | newtp->backoff = 0; | ||
755 | newtp->srtt = 0; | 365 | newtp->srtt = 0; |
756 | newtp->mdev = TCP_TIMEOUT_INIT; | 366 | newtp->mdev = TCP_TIMEOUT_INIT; |
757 | newtp->rto = TCP_TIMEOUT_INIT; | 367 | newicsk->icsk_rto = TCP_TIMEOUT_INIT; |
758 | 368 | ||
759 | newtp->packets_out = 0; | 369 | newtp->packets_out = 0; |
760 | newtp->left_out = 0; | 370 | newtp->left_out = 0; |
@@ -774,9 +384,9 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, | |||
774 | newtp->frto_counter = 0; | 384 | newtp->frto_counter = 0; |
775 | newtp->frto_highmark = 0; | 385 | newtp->frto_highmark = 0; |
776 | 386 | ||
777 | newtp->ca_ops = &tcp_reno; | 387 | newicsk->icsk_ca_ops = &tcp_reno; |
778 | 388 | ||
779 | tcp_set_ca_state(newtp, TCP_CA_Open); | 389 | tcp_set_ca_state(newsk, TCP_CA_Open); |
780 | tcp_init_xmit_timers(newsk); | 390 | tcp_init_xmit_timers(newsk); |
781 | skb_queue_head_init(&newtp->out_of_order_queue); | 391 | skb_queue_head_init(&newtp->out_of_order_queue); |
782 | newtp->rcv_wup = treq->rcv_isn + 1; | 392 | newtp->rcv_wup = treq->rcv_isn + 1; |
@@ -789,26 +399,12 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, | |||
789 | newtp->rx_opt.dsack = 0; | 399 | newtp->rx_opt.dsack = 0; |
790 | newtp->rx_opt.eff_sacks = 0; | 400 | newtp->rx_opt.eff_sacks = 0; |
791 | 401 | ||
792 | newtp->probes_out = 0; | ||
793 | newtp->rx_opt.num_sacks = 0; | 402 | newtp->rx_opt.num_sacks = 0; |
794 | newtp->urg_data = 0; | 403 | newtp->urg_data = 0; |
795 | /* Deinitialize accept_queue to trap illegal accesses. */ | ||
796 | memset(&newtp->accept_queue, 0, sizeof(newtp->accept_queue)); | ||
797 | |||
798 | /* Back to base struct sock members. */ | ||
799 | newsk->sk_err = 0; | ||
800 | newsk->sk_priority = 0; | ||
801 | atomic_set(&newsk->sk_refcnt, 2); | ||
802 | #ifdef INET_REFCNT_DEBUG | ||
803 | atomic_inc(&inet_sock_nr); | ||
804 | #endif | ||
805 | atomic_inc(&tcp_sockets_allocated); | ||
806 | 404 | ||
807 | if (sock_flag(newsk, SOCK_KEEPOPEN)) | 405 | if (sock_flag(newsk, SOCK_KEEPOPEN)) |
808 | tcp_reset_keepalive_timer(newsk, | 406 | inet_csk_reset_keepalive_timer(newsk, |
809 | keepalive_time_when(newtp)); | 407 | keepalive_time_when(newtp)); |
810 | newsk->sk_socket = NULL; | ||
811 | newsk->sk_sleep = NULL; | ||
812 | 408 | ||
813 | newtp->rx_opt.tstamp_ok = ireq->tstamp_ok; | 409 | newtp->rx_opt.tstamp_ok = ireq->tstamp_ok; |
814 | if((newtp->rx_opt.sack_ok = ireq->sack_ok) != 0) { | 410 | if((newtp->rx_opt.sack_ok = ireq->sack_ok) != 0) { |
@@ -838,7 +434,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, | |||
838 | newtp->tcp_header_len = sizeof(struct tcphdr); | 434 | newtp->tcp_header_len = sizeof(struct tcphdr); |
839 | } | 435 | } |
840 | if (skb->len >= TCP_MIN_RCVMSS+newtp->tcp_header_len) | 436 | if (skb->len >= TCP_MIN_RCVMSS+newtp->tcp_header_len) |
841 | newtp->ack.last_seg_size = skb->len-newtp->tcp_header_len; | 437 | newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len; |
842 | newtp->rx_opt.mss_clamp = req->mss; | 438 | newtp->rx_opt.mss_clamp = req->mss; |
843 | TCP_ECN_openreq_child(newtp, req); | 439 | TCP_ECN_openreq_child(newtp, req); |
844 | if (newtp->ecn_flags&TCP_ECN_OK) | 440 | if (newtp->ecn_flags&TCP_ECN_OK) |
@@ -934,9 +530,10 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, | |||
934 | does sequence test, SYN is truncated, and thus we consider | 530 | does sequence test, SYN is truncated, and thus we consider |
935 | it a bare ACK. | 531 | it a bare ACK. |
936 | 532 | ||
937 | If tp->defer_accept, we silently drop this bare ACK. Otherwise, | 533 | If icsk->icsk_accept_queue.rskq_defer_accept, we silently drop this |
938 | we create an established connection. Both ends (listening sockets) | 534 | bare ACK. Otherwise, we create an established connection. Both |
939 | accept the new incoming connection and try to talk to each other. 8-) | 535 | ends (listening sockets) accept the new incoming connection and try |
536 | to talk to each other. 8-) | ||
940 | 537 | ||
941 | Note: This case is both harmless, and rare. Possibility is about the | 538 | Note: This case is both harmless, and rare. Possibility is about the |
942 | same as us discovering intelligent life on another plant tomorrow. | 539 | same as us discovering intelligent life on another plant tomorrow. |
@@ -1003,7 +600,8 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, | |||
1003 | return NULL; | 600 | return NULL; |
1004 | 601 | ||
1005 | /* If TCP_DEFER_ACCEPT is set, drop bare ACK. */ | 602 | /* If TCP_DEFER_ACCEPT is set, drop bare ACK. */ |
1006 | if (tp->defer_accept && TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { | 603 | if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && |
604 | TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { | ||
1007 | inet_rsk(req)->acked = 1; | 605 | inet_rsk(req)->acked = 1; |
1008 | return NULL; | 606 | return NULL; |
1009 | } | 607 | } |
@@ -1018,10 +616,10 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, | |||
1018 | if (child == NULL) | 616 | if (child == NULL) |
1019 | goto listen_overflow; | 617 | goto listen_overflow; |
1020 | 618 | ||
1021 | tcp_synq_unlink(tp, req, prev); | 619 | inet_csk_reqsk_queue_unlink(sk, req, prev); |
1022 | tcp_synq_removed(sk, req); | 620 | inet_csk_reqsk_queue_removed(sk, req); |
1023 | 621 | ||
1024 | tcp_acceptq_queue(sk, req, child); | 622 | inet_csk_reqsk_queue_add(sk, req, child); |
1025 | return child; | 623 | return child; |
1026 | 624 | ||
1027 | listen_overflow: | 625 | listen_overflow: |
@@ -1035,7 +633,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, | |||
1035 | if (!(flg & TCP_FLAG_RST)) | 633 | if (!(flg & TCP_FLAG_RST)) |
1036 | req->rsk_ops->send_reset(skb); | 634 | req->rsk_ops->send_reset(skb); |
1037 | 635 | ||
1038 | tcp_synq_drop(sk, req, prev); | 636 | inet_csk_reqsk_queue_drop(sk, req, prev); |
1039 | return NULL; | 637 | return NULL; |
1040 | } | 638 | } |
1041 | 639 | ||
@@ -1074,4 +672,3 @@ EXPORT_SYMBOL(tcp_check_req); | |||
1074 | EXPORT_SYMBOL(tcp_child_process); | 672 | EXPORT_SYMBOL(tcp_child_process); |
1075 | EXPORT_SYMBOL(tcp_create_openreq_child); | 673 | EXPORT_SYMBOL(tcp_create_openreq_child); |
1076 | EXPORT_SYMBOL(tcp_timewait_state_process); | 674 | EXPORT_SYMBOL(tcp_timewait_state_process); |
1077 | EXPORT_SYMBOL(tcp_tw_deschedule); | ||
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index dd30dd137b74..75b68116682a 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -105,18 +105,19 @@ static __u16 tcp_advertise_mss(struct sock *sk) | |||
105 | 105 | ||
106 | /* RFC2861. Reset CWND after idle period longer RTO to "restart window". | 106 | /* RFC2861. Reset CWND after idle period longer RTO to "restart window". |
107 | * This is the first part of cwnd validation mechanism. */ | 107 | * This is the first part of cwnd validation mechanism. */ |
108 | static void tcp_cwnd_restart(struct tcp_sock *tp, struct dst_entry *dst) | 108 | static void tcp_cwnd_restart(struct sock *sk, struct dst_entry *dst) |
109 | { | 109 | { |
110 | struct tcp_sock *tp = tcp_sk(sk); | ||
110 | s32 delta = tcp_time_stamp - tp->lsndtime; | 111 | s32 delta = tcp_time_stamp - tp->lsndtime; |
111 | u32 restart_cwnd = tcp_init_cwnd(tp, dst); | 112 | u32 restart_cwnd = tcp_init_cwnd(tp, dst); |
112 | u32 cwnd = tp->snd_cwnd; | 113 | u32 cwnd = tp->snd_cwnd; |
113 | 114 | ||
114 | tcp_ca_event(tp, CA_EVENT_CWND_RESTART); | 115 | tcp_ca_event(sk, CA_EVENT_CWND_RESTART); |
115 | 116 | ||
116 | tp->snd_ssthresh = tcp_current_ssthresh(tp); | 117 | tp->snd_ssthresh = tcp_current_ssthresh(sk); |
117 | restart_cwnd = min(restart_cwnd, cwnd); | 118 | restart_cwnd = min(restart_cwnd, cwnd); |
118 | 119 | ||
119 | while ((delta -= tp->rto) > 0 && cwnd > restart_cwnd) | 120 | while ((delta -= inet_csk(sk)->icsk_rto) > 0 && cwnd > restart_cwnd) |
120 | cwnd >>= 1; | 121 | cwnd >>= 1; |
121 | tp->snd_cwnd = max(cwnd, restart_cwnd); | 122 | tp->snd_cwnd = max(cwnd, restart_cwnd); |
122 | tp->snd_cwnd_stamp = tcp_time_stamp; | 123 | tp->snd_cwnd_stamp = tcp_time_stamp; |
@@ -126,26 +127,25 @@ static void tcp_cwnd_restart(struct tcp_sock *tp, struct dst_entry *dst) | |||
126 | static inline void tcp_event_data_sent(struct tcp_sock *tp, | 127 | static inline void tcp_event_data_sent(struct tcp_sock *tp, |
127 | struct sk_buff *skb, struct sock *sk) | 128 | struct sk_buff *skb, struct sock *sk) |
128 | { | 129 | { |
129 | u32 now = tcp_time_stamp; | 130 | struct inet_connection_sock *icsk = inet_csk(sk); |
131 | const u32 now = tcp_time_stamp; | ||
130 | 132 | ||
131 | if (!tp->packets_out && (s32)(now - tp->lsndtime) > tp->rto) | 133 | if (!tp->packets_out && (s32)(now - tp->lsndtime) > icsk->icsk_rto) |
132 | tcp_cwnd_restart(tp, __sk_dst_get(sk)); | 134 | tcp_cwnd_restart(sk, __sk_dst_get(sk)); |
133 | 135 | ||
134 | tp->lsndtime = now; | 136 | tp->lsndtime = now; |
135 | 137 | ||
136 | /* If it is a reply for ato after last received | 138 | /* If it is a reply for ato after last received |
137 | * packet, enter pingpong mode. | 139 | * packet, enter pingpong mode. |
138 | */ | 140 | */ |
139 | if ((u32)(now - tp->ack.lrcvtime) < tp->ack.ato) | 141 | if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato) |
140 | tp->ack.pingpong = 1; | 142 | icsk->icsk_ack.pingpong = 1; |
141 | } | 143 | } |
142 | 144 | ||
143 | static __inline__ void tcp_event_ack_sent(struct sock *sk, unsigned int pkts) | 145 | static __inline__ void tcp_event_ack_sent(struct sock *sk, unsigned int pkts) |
144 | { | 146 | { |
145 | struct tcp_sock *tp = tcp_sk(sk); | 147 | tcp_dec_quickack_mode(sk, pkts); |
146 | 148 | inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); | |
147 | tcp_dec_quickack_mode(tp, pkts); | ||
148 | tcp_clear_xmit_timer(sk, TCP_TIME_DACK); | ||
149 | } | 149 | } |
150 | 150 | ||
151 | /* Determine a window scaling and initial window to offer. | 151 | /* Determine a window scaling and initial window to offer. |
@@ -265,6 +265,7 @@ static __inline__ u16 tcp_select_window(struct sock *sk) | |||
265 | static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb) | 265 | static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb) |
266 | { | 266 | { |
267 | if (skb != NULL) { | 267 | if (skb != NULL) { |
268 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
268 | struct inet_sock *inet = inet_sk(sk); | 269 | struct inet_sock *inet = inet_sk(sk); |
269 | struct tcp_sock *tp = tcp_sk(sk); | 270 | struct tcp_sock *tp = tcp_sk(sk); |
270 | struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); | 271 | struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); |
@@ -280,8 +281,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb) | |||
280 | #define SYSCTL_FLAG_SACK 0x4 | 281 | #define SYSCTL_FLAG_SACK 0x4 |
281 | 282 | ||
282 | /* If congestion control is doing timestamping */ | 283 | /* If congestion control is doing timestamping */ |
283 | if (tp->ca_ops->rtt_sample) | 284 | if (icsk->icsk_ca_ops->rtt_sample) |
284 | do_gettimeofday(&skb->stamp); | 285 | __net_timestamp(skb); |
285 | 286 | ||
286 | sysctl_flags = 0; | 287 | sysctl_flags = 0; |
287 | if (tcb->flags & TCPCB_FLAG_SYN) { | 288 | if (tcb->flags & TCPCB_FLAG_SYN) { |
@@ -308,7 +309,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb) | |||
308 | } | 309 | } |
309 | 310 | ||
310 | if (tcp_packets_in_flight(tp) == 0) | 311 | if (tcp_packets_in_flight(tp) == 0) |
311 | tcp_ca_event(tp, CA_EVENT_TX_START); | 312 | tcp_ca_event(sk, CA_EVENT_TX_START); |
312 | 313 | ||
313 | th = (struct tcphdr *) skb_push(skb, tcp_header_size); | 314 | th = (struct tcphdr *) skb_push(skb, tcp_header_size); |
314 | skb->h.th = th; | 315 | skb->h.th = th; |
@@ -366,7 +367,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb) | |||
366 | if (err <= 0) | 367 | if (err <= 0) |
367 | return err; | 368 | return err; |
368 | 369 | ||
369 | tcp_enter_cwr(tp); | 370 | tcp_enter_cwr(sk); |
370 | 371 | ||
371 | /* NET_XMIT_CN is special. It does not guarantee, | 372 | /* NET_XMIT_CN is special. It does not guarantee, |
372 | * that this packet is lost. It tells that device | 373 | * that this packet is lost. It tells that device |
@@ -482,7 +483,7 @@ static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned | |||
482 | * skbs, which it never sent before. --ANK | 483 | * skbs, which it never sent before. --ANK |
483 | */ | 484 | */ |
484 | TCP_SKB_CB(buff)->when = TCP_SKB_CB(skb)->when; | 485 | TCP_SKB_CB(buff)->when = TCP_SKB_CB(skb)->when; |
485 | buff->stamp = skb->stamp; | 486 | buff->tstamp = skb->tstamp; |
486 | 487 | ||
487 | if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) { | 488 | if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) { |
488 | tp->lost_out -= tcp_skb_pcount(skb); | 489 | tp->lost_out -= tcp_skb_pcount(skb); |
@@ -505,7 +506,7 @@ static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned | |||
505 | 506 | ||
506 | /* Link BUFF into the send queue. */ | 507 | /* Link BUFF into the send queue. */ |
507 | skb_header_release(buff); | 508 | skb_header_release(buff); |
508 | __skb_append(skb, buff); | 509 | __skb_append(skb, buff, &sk->sk_write_queue); |
509 | 510 | ||
510 | return 0; | 511 | return 0; |
511 | } | 512 | } |
@@ -696,7 +697,7 @@ static inline void tcp_cwnd_validate(struct sock *sk, struct tcp_sock *tp) | |||
696 | if (tp->packets_out > tp->snd_cwnd_used) | 697 | if (tp->packets_out > tp->snd_cwnd_used) |
697 | tp->snd_cwnd_used = tp->packets_out; | 698 | tp->snd_cwnd_used = tp->packets_out; |
698 | 699 | ||
699 | if ((s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= tp->rto) | 700 | if ((s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto) |
700 | tcp_cwnd_application_limited(sk); | 701 | tcp_cwnd_application_limited(sk); |
701 | } | 702 | } |
702 | } | 703 | } |
@@ -893,7 +894,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, | |||
893 | 894 | ||
894 | /* Link BUFF into the send queue. */ | 895 | /* Link BUFF into the send queue. */ |
895 | skb_header_release(buff); | 896 | skb_header_release(buff); |
896 | __skb_append(skb, buff); | 897 | __skb_append(skb, buff, &sk->sk_write_queue); |
897 | 898 | ||
898 | return 0; | 899 | return 0; |
899 | } | 900 | } |
@@ -905,12 +906,13 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, | |||
905 | */ | 906 | */ |
906 | static int tcp_tso_should_defer(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb) | 907 | static int tcp_tso_should_defer(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb) |
907 | { | 908 | { |
909 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
908 | u32 send_win, cong_win, limit, in_flight; | 910 | u32 send_win, cong_win, limit, in_flight; |
909 | 911 | ||
910 | if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) | 912 | if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) |
911 | return 0; | 913 | return 0; |
912 | 914 | ||
913 | if (tp->ca_state != TCP_CA_Open) | 915 | if (icsk->icsk_ca_state != TCP_CA_Open) |
914 | return 0; | 916 | return 0; |
915 | 917 | ||
916 | in_flight = tcp_packets_in_flight(tp); | 918 | in_flight = tcp_packets_in_flight(tp); |
@@ -1147,6 +1149,7 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now) | |||
1147 | */ | 1149 | */ |
1148 | u32 __tcp_select_window(struct sock *sk) | 1150 | u32 __tcp_select_window(struct sock *sk) |
1149 | { | 1151 | { |
1152 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
1150 | struct tcp_sock *tp = tcp_sk(sk); | 1153 | struct tcp_sock *tp = tcp_sk(sk); |
1151 | /* MSS for the peer's data. Previous verions used mss_clamp | 1154 | /* MSS for the peer's data. Previous verions used mss_clamp |
1152 | * here. I don't know if the value based on our guesses | 1155 | * here. I don't know if the value based on our guesses |
@@ -1154,7 +1157,7 @@ u32 __tcp_select_window(struct sock *sk) | |||
1154 | * but may be worse for the performance because of rcv_mss | 1157 | * but may be worse for the performance because of rcv_mss |
1155 | * fluctuations. --SAW 1998/11/1 | 1158 | * fluctuations. --SAW 1998/11/1 |
1156 | */ | 1159 | */ |
1157 | int mss = tp->ack.rcv_mss; | 1160 | int mss = icsk->icsk_ack.rcv_mss; |
1158 | int free_space = tcp_space(sk); | 1161 | int free_space = tcp_space(sk); |
1159 | int full_space = min_t(int, tp->window_clamp, tcp_full_space(sk)); | 1162 | int full_space = min_t(int, tp->window_clamp, tcp_full_space(sk)); |
1160 | int window; | 1163 | int window; |
@@ -1163,7 +1166,7 @@ u32 __tcp_select_window(struct sock *sk) | |||
1163 | mss = full_space; | 1166 | mss = full_space; |
1164 | 1167 | ||
1165 | if (free_space < full_space/2) { | 1168 | if (free_space < full_space/2) { |
1166 | tp->ack.quick = 0; | 1169 | icsk->icsk_ack.quick = 0; |
1167 | 1170 | ||
1168 | if (tcp_memory_pressure) | 1171 | if (tcp_memory_pressure) |
1169 | tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U*tp->advmss); | 1172 | tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U*tp->advmss); |
@@ -1238,7 +1241,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m | |||
1238 | tcp_skb_pcount(next_skb) != 1); | 1241 | tcp_skb_pcount(next_skb) != 1); |
1239 | 1242 | ||
1240 | /* Ok. We will be able to collapse the packet. */ | 1243 | /* Ok. We will be able to collapse the packet. */ |
1241 | __skb_unlink(next_skb, next_skb->list); | 1244 | __skb_unlink(next_skb, &sk->sk_write_queue); |
1242 | 1245 | ||
1243 | memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size); | 1246 | memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size); |
1244 | 1247 | ||
@@ -1286,6 +1289,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m | |||
1286 | */ | 1289 | */ |
1287 | void tcp_simple_retransmit(struct sock *sk) | 1290 | void tcp_simple_retransmit(struct sock *sk) |
1288 | { | 1291 | { |
1292 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
1289 | struct tcp_sock *tp = tcp_sk(sk); | 1293 | struct tcp_sock *tp = tcp_sk(sk); |
1290 | struct sk_buff *skb; | 1294 | struct sk_buff *skb; |
1291 | unsigned int mss = tcp_current_mss(sk, 0); | 1295 | unsigned int mss = tcp_current_mss(sk, 0); |
@@ -1316,12 +1320,12 @@ void tcp_simple_retransmit(struct sock *sk) | |||
1316 | * in network, but units changed and effective | 1320 | * in network, but units changed and effective |
1317 | * cwnd/ssthresh really reduced now. | 1321 | * cwnd/ssthresh really reduced now. |
1318 | */ | 1322 | */ |
1319 | if (tp->ca_state != TCP_CA_Loss) { | 1323 | if (icsk->icsk_ca_state != TCP_CA_Loss) { |
1320 | tp->high_seq = tp->snd_nxt; | 1324 | tp->high_seq = tp->snd_nxt; |
1321 | tp->snd_ssthresh = tcp_current_ssthresh(tp); | 1325 | tp->snd_ssthresh = tcp_current_ssthresh(sk); |
1322 | tp->prior_ssthresh = 0; | 1326 | tp->prior_ssthresh = 0; |
1323 | tp->undo_marker = 0; | 1327 | tp->undo_marker = 0; |
1324 | tcp_set_ca_state(tp, TCP_CA_Loss); | 1328 | tcp_set_ca_state(sk, TCP_CA_Loss); |
1325 | } | 1329 | } |
1326 | tcp_xmit_retransmit_queue(sk); | 1330 | tcp_xmit_retransmit_queue(sk); |
1327 | } | 1331 | } |
@@ -1461,6 +1465,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | |||
1461 | */ | 1465 | */ |
1462 | void tcp_xmit_retransmit_queue(struct sock *sk) | 1466 | void tcp_xmit_retransmit_queue(struct sock *sk) |
1463 | { | 1467 | { |
1468 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
1464 | struct tcp_sock *tp = tcp_sk(sk); | 1469 | struct tcp_sock *tp = tcp_sk(sk); |
1465 | struct sk_buff *skb; | 1470 | struct sk_buff *skb; |
1466 | int packet_cnt = tp->lost_out; | 1471 | int packet_cnt = tp->lost_out; |
@@ -1484,14 +1489,16 @@ void tcp_xmit_retransmit_queue(struct sock *sk) | |||
1484 | if (!(sacked&(TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) { | 1489 | if (!(sacked&(TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) { |
1485 | if (tcp_retransmit_skb(sk, skb)) | 1490 | if (tcp_retransmit_skb(sk, skb)) |
1486 | return; | 1491 | return; |
1487 | if (tp->ca_state != TCP_CA_Loss) | 1492 | if (icsk->icsk_ca_state != TCP_CA_Loss) |
1488 | NET_INC_STATS_BH(LINUX_MIB_TCPFASTRETRANS); | 1493 | NET_INC_STATS_BH(LINUX_MIB_TCPFASTRETRANS); |
1489 | else | 1494 | else |
1490 | NET_INC_STATS_BH(LINUX_MIB_TCPSLOWSTARTRETRANS); | 1495 | NET_INC_STATS_BH(LINUX_MIB_TCPSLOWSTARTRETRANS); |
1491 | 1496 | ||
1492 | if (skb == | 1497 | if (skb == |
1493 | skb_peek(&sk->sk_write_queue)) | 1498 | skb_peek(&sk->sk_write_queue)) |
1494 | tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); | 1499 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, |
1500 | inet_csk(sk)->icsk_rto, | ||
1501 | TCP_RTO_MAX); | ||
1495 | } | 1502 | } |
1496 | 1503 | ||
1497 | packet_cnt -= tcp_skb_pcount(skb); | 1504 | packet_cnt -= tcp_skb_pcount(skb); |
@@ -1504,7 +1511,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) | |||
1504 | /* OK, demanded retransmission is finished. */ | 1511 | /* OK, demanded retransmission is finished. */ |
1505 | 1512 | ||
1506 | /* Forward retransmissions are possible only during Recovery. */ | 1513 | /* Forward retransmissions are possible only during Recovery. */ |
1507 | if (tp->ca_state != TCP_CA_Recovery) | 1514 | if (icsk->icsk_ca_state != TCP_CA_Recovery) |
1508 | return; | 1515 | return; |
1509 | 1516 | ||
1510 | /* No forward retransmissions in Reno are possible. */ | 1517 | /* No forward retransmissions in Reno are possible. */ |
@@ -1544,7 +1551,9 @@ void tcp_xmit_retransmit_queue(struct sock *sk) | |||
1544 | break; | 1551 | break; |
1545 | 1552 | ||
1546 | if (skb == skb_peek(&sk->sk_write_queue)) | 1553 | if (skb == skb_peek(&sk->sk_write_queue)) |
1547 | tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); | 1554 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, |
1555 | inet_csk(sk)->icsk_rto, | ||
1556 | TCP_RTO_MAX); | ||
1548 | 1557 | ||
1549 | NET_INC_STATS_BH(LINUX_MIB_TCPFORWARDRETRANS); | 1558 | NET_INC_STATS_BH(LINUX_MIB_TCPFORWARDRETRANS); |
1550 | } | 1559 | } |
@@ -1573,7 +1582,7 @@ void tcp_send_fin(struct sock *sk) | |||
1573 | } else { | 1582 | } else { |
1574 | /* Socket is locked, keep trying until memory is available. */ | 1583 | /* Socket is locked, keep trying until memory is available. */ |
1575 | for (;;) { | 1584 | for (;;) { |
1576 | skb = alloc_skb(MAX_TCP_HEADER, GFP_KERNEL); | 1585 | skb = alloc_skb_fclone(MAX_TCP_HEADER, GFP_KERNEL); |
1577 | if (skb) | 1586 | if (skb) |
1578 | break; | 1587 | break; |
1579 | yield(); | 1588 | yield(); |
@@ -1780,8 +1789,8 @@ static inline void tcp_connect_init(struct sock *sk) | |||
1780 | tp->rcv_wup = 0; | 1789 | tp->rcv_wup = 0; |
1781 | tp->copied_seq = 0; | 1790 | tp->copied_seq = 0; |
1782 | 1791 | ||
1783 | tp->rto = TCP_TIMEOUT_INIT; | 1792 | inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT; |
1784 | tp->retransmits = 0; | 1793 | inet_csk(sk)->icsk_retransmits = 0; |
1785 | tcp_clear_retrans(tp); | 1794 | tcp_clear_retrans(tp); |
1786 | } | 1795 | } |
1787 | 1796 | ||
@@ -1795,7 +1804,7 @@ int tcp_connect(struct sock *sk) | |||
1795 | 1804 | ||
1796 | tcp_connect_init(sk); | 1805 | tcp_connect_init(sk); |
1797 | 1806 | ||
1798 | buff = alloc_skb(MAX_TCP_HEADER + 15, sk->sk_allocation); | 1807 | buff = alloc_skb_fclone(MAX_TCP_HEADER + 15, sk->sk_allocation); |
1799 | if (unlikely(buff == NULL)) | 1808 | if (unlikely(buff == NULL)) |
1800 | return -ENOBUFS; | 1809 | return -ENOBUFS; |
1801 | 1810 | ||
@@ -1824,7 +1833,8 @@ int tcp_connect(struct sock *sk) | |||
1824 | TCP_INC_STATS(TCP_MIB_ACTIVEOPENS); | 1833 | TCP_INC_STATS(TCP_MIB_ACTIVEOPENS); |
1825 | 1834 | ||
1826 | /* Timer for repeating the SYN until an answer. */ | 1835 | /* Timer for repeating the SYN until an answer. */ |
1827 | tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); | 1836 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, |
1837 | inet_csk(sk)->icsk_rto, TCP_RTO_MAX); | ||
1828 | return 0; | 1838 | return 0; |
1829 | } | 1839 | } |
1830 | 1840 | ||
@@ -1834,20 +1844,21 @@ int tcp_connect(struct sock *sk) | |||
1834 | */ | 1844 | */ |
1835 | void tcp_send_delayed_ack(struct sock *sk) | 1845 | void tcp_send_delayed_ack(struct sock *sk) |
1836 | { | 1846 | { |
1837 | struct tcp_sock *tp = tcp_sk(sk); | 1847 | struct inet_connection_sock *icsk = inet_csk(sk); |
1838 | int ato = tp->ack.ato; | 1848 | int ato = icsk->icsk_ack.ato; |
1839 | unsigned long timeout; | 1849 | unsigned long timeout; |
1840 | 1850 | ||
1841 | if (ato > TCP_DELACK_MIN) { | 1851 | if (ato > TCP_DELACK_MIN) { |
1852 | const struct tcp_sock *tp = tcp_sk(sk); | ||
1842 | int max_ato = HZ/2; | 1853 | int max_ato = HZ/2; |
1843 | 1854 | ||
1844 | if (tp->ack.pingpong || (tp->ack.pending&TCP_ACK_PUSHED)) | 1855 | if (icsk->icsk_ack.pingpong || (icsk->icsk_ack.pending & ICSK_ACK_PUSHED)) |
1845 | max_ato = TCP_DELACK_MAX; | 1856 | max_ato = TCP_DELACK_MAX; |
1846 | 1857 | ||
1847 | /* Slow path, intersegment interval is "high". */ | 1858 | /* Slow path, intersegment interval is "high". */ |
1848 | 1859 | ||
1849 | /* If some rtt estimate is known, use it to bound delayed ack. | 1860 | /* If some rtt estimate is known, use it to bound delayed ack. |
1850 | * Do not use tp->rto here, use results of rtt measurements | 1861 | * Do not use inet_csk(sk)->icsk_rto here, use results of rtt measurements |
1851 | * directly. | 1862 | * directly. |
1852 | */ | 1863 | */ |
1853 | if (tp->srtt) { | 1864 | if (tp->srtt) { |
@@ -1864,21 +1875,22 @@ void tcp_send_delayed_ack(struct sock *sk) | |||
1864 | timeout = jiffies + ato; | 1875 | timeout = jiffies + ato; |
1865 | 1876 | ||
1866 | /* Use new timeout only if there wasn't a older one earlier. */ | 1877 | /* Use new timeout only if there wasn't a older one earlier. */ |
1867 | if (tp->ack.pending&TCP_ACK_TIMER) { | 1878 | if (icsk->icsk_ack.pending & ICSK_ACK_TIMER) { |
1868 | /* If delack timer was blocked or is about to expire, | 1879 | /* If delack timer was blocked or is about to expire, |
1869 | * send ACK now. | 1880 | * send ACK now. |
1870 | */ | 1881 | */ |
1871 | if (tp->ack.blocked || time_before_eq(tp->ack.timeout, jiffies+(ato>>2))) { | 1882 | if (icsk->icsk_ack.blocked || |
1883 | time_before_eq(icsk->icsk_ack.timeout, jiffies + (ato >> 2))) { | ||
1872 | tcp_send_ack(sk); | 1884 | tcp_send_ack(sk); |
1873 | return; | 1885 | return; |
1874 | } | 1886 | } |
1875 | 1887 | ||
1876 | if (!time_before(timeout, tp->ack.timeout)) | 1888 | if (!time_before(timeout, icsk->icsk_ack.timeout)) |
1877 | timeout = tp->ack.timeout; | 1889 | timeout = icsk->icsk_ack.timeout; |
1878 | } | 1890 | } |
1879 | tp->ack.pending |= TCP_ACK_SCHED|TCP_ACK_TIMER; | 1891 | icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER; |
1880 | tp->ack.timeout = timeout; | 1892 | icsk->icsk_ack.timeout = timeout; |
1881 | sk_reset_timer(sk, &tp->delack_timer, timeout); | 1893 | sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout); |
1882 | } | 1894 | } |
1883 | 1895 | ||
1884 | /* This routine sends an ack and also updates the window. */ | 1896 | /* This routine sends an ack and also updates the window. */ |
@@ -1895,9 +1907,10 @@ void tcp_send_ack(struct sock *sk) | |||
1895 | */ | 1907 | */ |
1896 | buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC); | 1908 | buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC); |
1897 | if (buff == NULL) { | 1909 | if (buff == NULL) { |
1898 | tcp_schedule_ack(tp); | 1910 | inet_csk_schedule_ack(sk); |
1899 | tp->ack.ato = TCP_ATO_MIN; | 1911 | inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN; |
1900 | tcp_reset_xmit_timer(sk, TCP_TIME_DACK, TCP_DELACK_MAX); | 1912 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, |
1913 | TCP_DELACK_MAX, TCP_RTO_MAX); | ||
1901 | return; | 1914 | return; |
1902 | } | 1915 | } |
1903 | 1916 | ||
@@ -2011,6 +2024,7 @@ int tcp_write_wakeup(struct sock *sk) | |||
2011 | */ | 2024 | */ |
2012 | void tcp_send_probe0(struct sock *sk) | 2025 | void tcp_send_probe0(struct sock *sk) |
2013 | { | 2026 | { |
2027 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
2014 | struct tcp_sock *tp = tcp_sk(sk); | 2028 | struct tcp_sock *tp = tcp_sk(sk); |
2015 | int err; | 2029 | int err; |
2016 | 2030 | ||
@@ -2018,28 +2032,31 @@ void tcp_send_probe0(struct sock *sk) | |||
2018 | 2032 | ||
2019 | if (tp->packets_out || !sk->sk_send_head) { | 2033 | if (tp->packets_out || !sk->sk_send_head) { |
2020 | /* Cancel probe timer, if it is not required. */ | 2034 | /* Cancel probe timer, if it is not required. */ |
2021 | tp->probes_out = 0; | 2035 | icsk->icsk_probes_out = 0; |
2022 | tp->backoff = 0; | 2036 | icsk->icsk_backoff = 0; |
2023 | return; | 2037 | return; |
2024 | } | 2038 | } |
2025 | 2039 | ||
2026 | if (err <= 0) { | 2040 | if (err <= 0) { |
2027 | if (tp->backoff < sysctl_tcp_retries2) | 2041 | if (icsk->icsk_backoff < sysctl_tcp_retries2) |
2028 | tp->backoff++; | 2042 | icsk->icsk_backoff++; |
2029 | tp->probes_out++; | 2043 | icsk->icsk_probes_out++; |
2030 | tcp_reset_xmit_timer (sk, TCP_TIME_PROBE0, | 2044 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, |
2031 | min(tp->rto << tp->backoff, TCP_RTO_MAX)); | 2045 | min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX), |
2046 | TCP_RTO_MAX); | ||
2032 | } else { | 2047 | } else { |
2033 | /* If packet was not sent due to local congestion, | 2048 | /* If packet was not sent due to local congestion, |
2034 | * do not backoff and do not remember probes_out. | 2049 | * do not backoff and do not remember icsk_probes_out. |
2035 | * Let local senders to fight for local resources. | 2050 | * Let local senders to fight for local resources. |
2036 | * | 2051 | * |
2037 | * Use accumulated backoff yet. | 2052 | * Use accumulated backoff yet. |
2038 | */ | 2053 | */ |
2039 | if (!tp->probes_out) | 2054 | if (!icsk->icsk_probes_out) |
2040 | tp->probes_out=1; | 2055 | icsk->icsk_probes_out = 1; |
2041 | tcp_reset_xmit_timer (sk, TCP_TIME_PROBE0, | 2056 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, |
2042 | min(tp->rto << tp->backoff, TCP_RESOURCE_PROBE_INTERVAL)); | 2057 | min(icsk->icsk_rto << icsk->icsk_backoff, |
2058 | TCP_RESOURCE_PROBE_INTERVAL), | ||
2059 | TCP_RTO_MAX); | ||
2043 | } | 2060 | } |
2044 | } | 2061 | } |
2045 | 2062 | ||
diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c index 70e108e15c71..327770bf5522 100644 --- a/net/ipv4/tcp_scalable.c +++ b/net/ipv4/tcp_scalable.c | |||
@@ -16,9 +16,10 @@ | |||
16 | #define TCP_SCALABLE_AI_CNT 50U | 16 | #define TCP_SCALABLE_AI_CNT 50U |
17 | #define TCP_SCALABLE_MD_SCALE 3 | 17 | #define TCP_SCALABLE_MD_SCALE 3 |
18 | 18 | ||
19 | static void tcp_scalable_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, | 19 | static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 rtt, |
20 | u32 in_flight, int flag) | 20 | u32 in_flight, int flag) |
21 | { | 21 | { |
22 | struct tcp_sock *tp = tcp_sk(sk); | ||
22 | if (in_flight < tp->snd_cwnd) | 23 | if (in_flight < tp->snd_cwnd) |
23 | return; | 24 | return; |
24 | 25 | ||
@@ -35,8 +36,9 @@ static void tcp_scalable_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, | |||
35 | tp->snd_cwnd_stamp = tcp_time_stamp; | 36 | tp->snd_cwnd_stamp = tcp_time_stamp; |
36 | } | 37 | } |
37 | 38 | ||
38 | static u32 tcp_scalable_ssthresh(struct tcp_sock *tp) | 39 | static u32 tcp_scalable_ssthresh(struct sock *sk) |
39 | { | 40 | { |
41 | const struct tcp_sock *tp = tcp_sk(sk); | ||
40 | return max(tp->snd_cwnd - (tp->snd_cwnd>>TCP_SCALABLE_MD_SCALE), 2U); | 42 | return max(tp->snd_cwnd - (tp->snd_cwnd>>TCP_SCALABLE_MD_SCALE), 2U); |
41 | } | 43 | } |
42 | 44 | ||
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 0084227438c2..415ee47ac1c5 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c | |||
@@ -36,49 +36,13 @@ static void tcp_write_timer(unsigned long); | |||
36 | static void tcp_delack_timer(unsigned long); | 36 | static void tcp_delack_timer(unsigned long); |
37 | static void tcp_keepalive_timer (unsigned long data); | 37 | static void tcp_keepalive_timer (unsigned long data); |
38 | 38 | ||
39 | #ifdef TCP_DEBUG | ||
40 | const char tcp_timer_bug_msg[] = KERN_DEBUG "tcpbug: unknown timer value\n"; | ||
41 | EXPORT_SYMBOL(tcp_timer_bug_msg); | ||
42 | #endif | ||
43 | |||
44 | /* | ||
45 | * Using different timers for retransmit, delayed acks and probes | ||
46 | * We may wish use just one timer maintaining a list of expire jiffies | ||
47 | * to optimize. | ||
48 | */ | ||
49 | |||
50 | void tcp_init_xmit_timers(struct sock *sk) | 39 | void tcp_init_xmit_timers(struct sock *sk) |
51 | { | 40 | { |
52 | struct tcp_sock *tp = tcp_sk(sk); | 41 | inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer, |
53 | 42 | &tcp_keepalive_timer); | |
54 | init_timer(&tp->retransmit_timer); | ||
55 | tp->retransmit_timer.function=&tcp_write_timer; | ||
56 | tp->retransmit_timer.data = (unsigned long) sk; | ||
57 | tp->pending = 0; | ||
58 | |||
59 | init_timer(&tp->delack_timer); | ||
60 | tp->delack_timer.function=&tcp_delack_timer; | ||
61 | tp->delack_timer.data = (unsigned long) sk; | ||
62 | tp->ack.pending = 0; | ||
63 | |||
64 | init_timer(&sk->sk_timer); | ||
65 | sk->sk_timer.function = &tcp_keepalive_timer; | ||
66 | sk->sk_timer.data = (unsigned long)sk; | ||
67 | } | 43 | } |
68 | 44 | ||
69 | void tcp_clear_xmit_timers(struct sock *sk) | 45 | EXPORT_SYMBOL(tcp_init_xmit_timers); |
70 | { | ||
71 | struct tcp_sock *tp = tcp_sk(sk); | ||
72 | |||
73 | tp->pending = 0; | ||
74 | sk_stop_timer(sk, &tp->retransmit_timer); | ||
75 | |||
76 | tp->ack.pending = 0; | ||
77 | tp->ack.blocked = 0; | ||
78 | sk_stop_timer(sk, &tp->delack_timer); | ||
79 | |||
80 | sk_stop_timer(sk, &sk->sk_timer); | ||
81 | } | ||
82 | 46 | ||
83 | static void tcp_write_err(struct sock *sk) | 47 | static void tcp_write_err(struct sock *sk) |
84 | { | 48 | { |
@@ -155,15 +119,15 @@ static int tcp_orphan_retries(struct sock *sk, int alive) | |||
155 | /* A write timeout has occurred. Process the after effects. */ | 119 | /* A write timeout has occurred. Process the after effects. */ |
156 | static int tcp_write_timeout(struct sock *sk) | 120 | static int tcp_write_timeout(struct sock *sk) |
157 | { | 121 | { |
158 | struct tcp_sock *tp = tcp_sk(sk); | 122 | const struct inet_connection_sock *icsk = inet_csk(sk); |
159 | int retry_until; | 123 | int retry_until; |
160 | 124 | ||
161 | if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { | 125 | if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { |
162 | if (tp->retransmits) | 126 | if (icsk->icsk_retransmits) |
163 | dst_negative_advice(&sk->sk_dst_cache); | 127 | dst_negative_advice(&sk->sk_dst_cache); |
164 | retry_until = tp->syn_retries ? : sysctl_tcp_syn_retries; | 128 | retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; |
165 | } else { | 129 | } else { |
166 | if (tp->retransmits >= sysctl_tcp_retries1) { | 130 | if (icsk->icsk_retransmits >= sysctl_tcp_retries1) { |
167 | /* NOTE. draft-ietf-tcpimpl-pmtud-01.txt requires pmtu black | 131 | /* NOTE. draft-ietf-tcpimpl-pmtud-01.txt requires pmtu black |
168 | hole detection. :-( | 132 | hole detection. :-( |
169 | 133 | ||
@@ -189,16 +153,16 @@ static int tcp_write_timeout(struct sock *sk) | |||
189 | 153 | ||
190 | retry_until = sysctl_tcp_retries2; | 154 | retry_until = sysctl_tcp_retries2; |
191 | if (sock_flag(sk, SOCK_DEAD)) { | 155 | if (sock_flag(sk, SOCK_DEAD)) { |
192 | int alive = (tp->rto < TCP_RTO_MAX); | 156 | const int alive = (icsk->icsk_rto < TCP_RTO_MAX); |
193 | 157 | ||
194 | retry_until = tcp_orphan_retries(sk, alive); | 158 | retry_until = tcp_orphan_retries(sk, alive); |
195 | 159 | ||
196 | if (tcp_out_of_resources(sk, alive || tp->retransmits < retry_until)) | 160 | if (tcp_out_of_resources(sk, alive || icsk->icsk_retransmits < retry_until)) |
197 | return 1; | 161 | return 1; |
198 | } | 162 | } |
199 | } | 163 | } |
200 | 164 | ||
201 | if (tp->retransmits >= retry_until) { | 165 | if (icsk->icsk_retransmits >= retry_until) { |
202 | /* Has it gone just too far? */ | 166 | /* Has it gone just too far? */ |
203 | tcp_write_err(sk); | 167 | tcp_write_err(sk); |
204 | return 1; | 168 | return 1; |
@@ -210,26 +174,27 @@ static void tcp_delack_timer(unsigned long data) | |||
210 | { | 174 | { |
211 | struct sock *sk = (struct sock*)data; | 175 | struct sock *sk = (struct sock*)data; |
212 | struct tcp_sock *tp = tcp_sk(sk); | 176 | struct tcp_sock *tp = tcp_sk(sk); |
177 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
213 | 178 | ||
214 | bh_lock_sock(sk); | 179 | bh_lock_sock(sk); |
215 | if (sock_owned_by_user(sk)) { | 180 | if (sock_owned_by_user(sk)) { |
216 | /* Try again later. */ | 181 | /* Try again later. */ |
217 | tp->ack.blocked = 1; | 182 | icsk->icsk_ack.blocked = 1; |
218 | NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOCKED); | 183 | NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOCKED); |
219 | sk_reset_timer(sk, &tp->delack_timer, jiffies + TCP_DELACK_MIN); | 184 | sk_reset_timer(sk, &icsk->icsk_delack_timer, jiffies + TCP_DELACK_MIN); |
220 | goto out_unlock; | 185 | goto out_unlock; |
221 | } | 186 | } |
222 | 187 | ||
223 | sk_stream_mem_reclaim(sk); | 188 | sk_stream_mem_reclaim(sk); |
224 | 189 | ||
225 | if (sk->sk_state == TCP_CLOSE || !(tp->ack.pending & TCP_ACK_TIMER)) | 190 | if (sk->sk_state == TCP_CLOSE || !(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) |
226 | goto out; | 191 | goto out; |
227 | 192 | ||
228 | if (time_after(tp->ack.timeout, jiffies)) { | 193 | if (time_after(icsk->icsk_ack.timeout, jiffies)) { |
229 | sk_reset_timer(sk, &tp->delack_timer, tp->ack.timeout); | 194 | sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout); |
230 | goto out; | 195 | goto out; |
231 | } | 196 | } |
232 | tp->ack.pending &= ~TCP_ACK_TIMER; | 197 | icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER; |
233 | 198 | ||
234 | if (!skb_queue_empty(&tp->ucopy.prequeue)) { | 199 | if (!skb_queue_empty(&tp->ucopy.prequeue)) { |
235 | struct sk_buff *skb; | 200 | struct sk_buff *skb; |
@@ -242,16 +207,16 @@ static void tcp_delack_timer(unsigned long data) | |||
242 | tp->ucopy.memory = 0; | 207 | tp->ucopy.memory = 0; |
243 | } | 208 | } |
244 | 209 | ||
245 | if (tcp_ack_scheduled(tp)) { | 210 | if (inet_csk_ack_scheduled(sk)) { |
246 | if (!tp->ack.pingpong) { | 211 | if (!icsk->icsk_ack.pingpong) { |
247 | /* Delayed ACK missed: inflate ATO. */ | 212 | /* Delayed ACK missed: inflate ATO. */ |
248 | tp->ack.ato = min(tp->ack.ato << 1, tp->rto); | 213 | icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1, icsk->icsk_rto); |
249 | } else { | 214 | } else { |
250 | /* Delayed ACK missed: leave pingpong mode and | 215 | /* Delayed ACK missed: leave pingpong mode and |
251 | * deflate ATO. | 216 | * deflate ATO. |
252 | */ | 217 | */ |
253 | tp->ack.pingpong = 0; | 218 | icsk->icsk_ack.pingpong = 0; |
254 | tp->ack.ato = TCP_ATO_MIN; | 219 | icsk->icsk_ack.ato = TCP_ATO_MIN; |
255 | } | 220 | } |
256 | tcp_send_ack(sk); | 221 | tcp_send_ack(sk); |
257 | NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKS); | 222 | NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKS); |
@@ -268,11 +233,12 @@ out_unlock: | |||
268 | 233 | ||
269 | static void tcp_probe_timer(struct sock *sk) | 234 | static void tcp_probe_timer(struct sock *sk) |
270 | { | 235 | { |
236 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
271 | struct tcp_sock *tp = tcp_sk(sk); | 237 | struct tcp_sock *tp = tcp_sk(sk); |
272 | int max_probes; | 238 | int max_probes; |
273 | 239 | ||
274 | if (tp->packets_out || !sk->sk_send_head) { | 240 | if (tp->packets_out || !sk->sk_send_head) { |
275 | tp->probes_out = 0; | 241 | icsk->icsk_probes_out = 0; |
276 | return; | 242 | return; |
277 | } | 243 | } |
278 | 244 | ||
@@ -283,7 +249,7 @@ static void tcp_probe_timer(struct sock *sk) | |||
283 | * FIXME: We ought not to do it, Solaris 2.5 actually has fixing | 249 | * FIXME: We ought not to do it, Solaris 2.5 actually has fixing |
284 | * this behaviour in Solaris down as a bug fix. [AC] | 250 | * this behaviour in Solaris down as a bug fix. [AC] |
285 | * | 251 | * |
286 | * Let me to explain. probes_out is zeroed by incoming ACKs | 252 | * Let me to explain. icsk_probes_out is zeroed by incoming ACKs |
287 | * even if they advertise zero window. Hence, connection is killed only | 253 | * even if they advertise zero window. Hence, connection is killed only |
288 | * if we received no ACKs for normal connection timeout. It is not killed | 254 | * if we received no ACKs for normal connection timeout. It is not killed |
289 | * only because window stays zero for some time, window may be zero | 255 | * only because window stays zero for some time, window may be zero |
@@ -294,15 +260,15 @@ static void tcp_probe_timer(struct sock *sk) | |||
294 | max_probes = sysctl_tcp_retries2; | 260 | max_probes = sysctl_tcp_retries2; |
295 | 261 | ||
296 | if (sock_flag(sk, SOCK_DEAD)) { | 262 | if (sock_flag(sk, SOCK_DEAD)) { |
297 | int alive = ((tp->rto<<tp->backoff) < TCP_RTO_MAX); | 263 | const int alive = ((icsk->icsk_rto << icsk->icsk_backoff) < TCP_RTO_MAX); |
298 | 264 | ||
299 | max_probes = tcp_orphan_retries(sk, alive); | 265 | max_probes = tcp_orphan_retries(sk, alive); |
300 | 266 | ||
301 | if (tcp_out_of_resources(sk, alive || tp->probes_out <= max_probes)) | 267 | if (tcp_out_of_resources(sk, alive || icsk->icsk_probes_out <= max_probes)) |
302 | return; | 268 | return; |
303 | } | 269 | } |
304 | 270 | ||
305 | if (tp->probes_out > max_probes) { | 271 | if (icsk->icsk_probes_out > max_probes) { |
306 | tcp_write_err(sk); | 272 | tcp_write_err(sk); |
307 | } else { | 273 | } else { |
308 | /* Only send another probe if we didn't close things up. */ | 274 | /* Only send another probe if we didn't close things up. */ |
@@ -317,6 +283,7 @@ static void tcp_probe_timer(struct sock *sk) | |||
317 | static void tcp_retransmit_timer(struct sock *sk) | 283 | static void tcp_retransmit_timer(struct sock *sk) |
318 | { | 284 | { |
319 | struct tcp_sock *tp = tcp_sk(sk); | 285 | struct tcp_sock *tp = tcp_sk(sk); |
286 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
320 | 287 | ||
321 | if (!tp->packets_out) | 288 | if (!tp->packets_out) |
322 | goto out; | 289 | goto out; |
@@ -351,20 +318,21 @@ static void tcp_retransmit_timer(struct sock *sk) | |||
351 | if (tcp_write_timeout(sk)) | 318 | if (tcp_write_timeout(sk)) |
352 | goto out; | 319 | goto out; |
353 | 320 | ||
354 | if (tp->retransmits == 0) { | 321 | if (icsk->icsk_retransmits == 0) { |
355 | if (tp->ca_state == TCP_CA_Disorder || tp->ca_state == TCP_CA_Recovery) { | 322 | if (icsk->icsk_ca_state == TCP_CA_Disorder || |
323 | icsk->icsk_ca_state == TCP_CA_Recovery) { | ||
356 | if (tp->rx_opt.sack_ok) { | 324 | if (tp->rx_opt.sack_ok) { |
357 | if (tp->ca_state == TCP_CA_Recovery) | 325 | if (icsk->icsk_ca_state == TCP_CA_Recovery) |
358 | NET_INC_STATS_BH(LINUX_MIB_TCPSACKRECOVERYFAIL); | 326 | NET_INC_STATS_BH(LINUX_MIB_TCPSACKRECOVERYFAIL); |
359 | else | 327 | else |
360 | NET_INC_STATS_BH(LINUX_MIB_TCPSACKFAILURES); | 328 | NET_INC_STATS_BH(LINUX_MIB_TCPSACKFAILURES); |
361 | } else { | 329 | } else { |
362 | if (tp->ca_state == TCP_CA_Recovery) | 330 | if (icsk->icsk_ca_state == TCP_CA_Recovery) |
363 | NET_INC_STATS_BH(LINUX_MIB_TCPRENORECOVERYFAIL); | 331 | NET_INC_STATS_BH(LINUX_MIB_TCPRENORECOVERYFAIL); |
364 | else | 332 | else |
365 | NET_INC_STATS_BH(LINUX_MIB_TCPRENOFAILURES); | 333 | NET_INC_STATS_BH(LINUX_MIB_TCPRENOFAILURES); |
366 | } | 334 | } |
367 | } else if (tp->ca_state == TCP_CA_Loss) { | 335 | } else if (icsk->icsk_ca_state == TCP_CA_Loss) { |
368 | NET_INC_STATS_BH(LINUX_MIB_TCPLOSSFAILURES); | 336 | NET_INC_STATS_BH(LINUX_MIB_TCPLOSSFAILURES); |
369 | } else { | 337 | } else { |
370 | NET_INC_STATS_BH(LINUX_MIB_TCPTIMEOUTS); | 338 | NET_INC_STATS_BH(LINUX_MIB_TCPTIMEOUTS); |
@@ -381,10 +349,11 @@ static void tcp_retransmit_timer(struct sock *sk) | |||
381 | /* Retransmission failed because of local congestion, | 349 | /* Retransmission failed because of local congestion, |
382 | * do not backoff. | 350 | * do not backoff. |
383 | */ | 351 | */ |
384 | if (!tp->retransmits) | 352 | if (!icsk->icsk_retransmits) |
385 | tp->retransmits=1; | 353 | icsk->icsk_retransmits = 1; |
386 | tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, | 354 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, |
387 | min(tp->rto, TCP_RESOURCE_PROBE_INTERVAL)); | 355 | min(icsk->icsk_rto, TCP_RESOURCE_PROBE_INTERVAL), |
356 | TCP_RTO_MAX); | ||
388 | goto out; | 357 | goto out; |
389 | } | 358 | } |
390 | 359 | ||
@@ -403,13 +372,13 @@ static void tcp_retransmit_timer(struct sock *sk) | |||
403 | * implemented ftp to mars will work nicely. We will have to fix | 372 | * implemented ftp to mars will work nicely. We will have to fix |
404 | * the 120 second clamps though! | 373 | * the 120 second clamps though! |
405 | */ | 374 | */ |
406 | tp->backoff++; | 375 | icsk->icsk_backoff++; |
407 | tp->retransmits++; | 376 | icsk->icsk_retransmits++; |
408 | 377 | ||
409 | out_reset_timer: | 378 | out_reset_timer: |
410 | tp->rto = min(tp->rto << 1, TCP_RTO_MAX); | 379 | icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); |
411 | tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); | 380 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); |
412 | if (tp->retransmits > sysctl_tcp_retries1) | 381 | if (icsk->icsk_retransmits > sysctl_tcp_retries1) |
413 | __sk_dst_reset(sk); | 382 | __sk_dst_reset(sk); |
414 | 383 | ||
415 | out:; | 384 | out:; |
@@ -418,32 +387,32 @@ out:; | |||
418 | static void tcp_write_timer(unsigned long data) | 387 | static void tcp_write_timer(unsigned long data) |
419 | { | 388 | { |
420 | struct sock *sk = (struct sock*)data; | 389 | struct sock *sk = (struct sock*)data; |
421 | struct tcp_sock *tp = tcp_sk(sk); | 390 | struct inet_connection_sock *icsk = inet_csk(sk); |
422 | int event; | 391 | int event; |
423 | 392 | ||
424 | bh_lock_sock(sk); | 393 | bh_lock_sock(sk); |
425 | if (sock_owned_by_user(sk)) { | 394 | if (sock_owned_by_user(sk)) { |
426 | /* Try again later */ | 395 | /* Try again later */ |
427 | sk_reset_timer(sk, &tp->retransmit_timer, jiffies + (HZ / 20)); | 396 | sk_reset_timer(sk, &icsk->icsk_retransmit_timer, jiffies + (HZ / 20)); |
428 | goto out_unlock; | 397 | goto out_unlock; |
429 | } | 398 | } |
430 | 399 | ||
431 | if (sk->sk_state == TCP_CLOSE || !tp->pending) | 400 | if (sk->sk_state == TCP_CLOSE || !icsk->icsk_pending) |
432 | goto out; | 401 | goto out; |
433 | 402 | ||
434 | if (time_after(tp->timeout, jiffies)) { | 403 | if (time_after(icsk->icsk_timeout, jiffies)) { |
435 | sk_reset_timer(sk, &tp->retransmit_timer, tp->timeout); | 404 | sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout); |
436 | goto out; | 405 | goto out; |
437 | } | 406 | } |
438 | 407 | ||
439 | event = tp->pending; | 408 | event = icsk->icsk_pending; |
440 | tp->pending = 0; | 409 | icsk->icsk_pending = 0; |
441 | 410 | ||
442 | switch (event) { | 411 | switch (event) { |
443 | case TCP_TIME_RETRANS: | 412 | case ICSK_TIME_RETRANS: |
444 | tcp_retransmit_timer(sk); | 413 | tcp_retransmit_timer(sk); |
445 | break; | 414 | break; |
446 | case TCP_TIME_PROBE0: | 415 | case ICSK_TIME_PROBE0: |
447 | tcp_probe_timer(sk); | 416 | tcp_probe_timer(sk); |
448 | break; | 417 | break; |
449 | } | 418 | } |
@@ -462,96 +431,8 @@ out_unlock: | |||
462 | 431 | ||
463 | static void tcp_synack_timer(struct sock *sk) | 432 | static void tcp_synack_timer(struct sock *sk) |
464 | { | 433 | { |
465 | struct tcp_sock *tp = tcp_sk(sk); | 434 | inet_csk_reqsk_queue_prune(sk, TCP_SYNQ_INTERVAL, |
466 | struct listen_sock *lopt = tp->accept_queue.listen_opt; | 435 | TCP_TIMEOUT_INIT, TCP_RTO_MAX); |
467 | int max_retries = tp->syn_retries ? : sysctl_tcp_synack_retries; | ||
468 | int thresh = max_retries; | ||
469 | unsigned long now = jiffies; | ||
470 | struct request_sock **reqp, *req; | ||
471 | int i, budget; | ||
472 | |||
473 | if (lopt == NULL || lopt->qlen == 0) | ||
474 | return; | ||
475 | |||
476 | /* Normally all the openreqs are young and become mature | ||
477 | * (i.e. converted to established socket) for first timeout. | ||
478 | * If synack was not acknowledged for 3 seconds, it means | ||
479 | * one of the following things: synack was lost, ack was lost, | ||
480 | * rtt is high or nobody planned to ack (i.e. synflood). | ||
481 | * When server is a bit loaded, queue is populated with old | ||
482 | * open requests, reducing effective size of queue. | ||
483 | * When server is well loaded, queue size reduces to zero | ||
484 | * after several minutes of work. It is not synflood, | ||
485 | * it is normal operation. The solution is pruning | ||
486 | * too old entries overriding normal timeout, when | ||
487 | * situation becomes dangerous. | ||
488 | * | ||
489 | * Essentially, we reserve half of room for young | ||
490 | * embrions; and abort old ones without pity, if old | ||
491 | * ones are about to clog our table. | ||
492 | */ | ||
493 | if (lopt->qlen>>(lopt->max_qlen_log-1)) { | ||
494 | int young = (lopt->qlen_young<<1); | ||
495 | |||
496 | while (thresh > 2) { | ||
497 | if (lopt->qlen < young) | ||
498 | break; | ||
499 | thresh--; | ||
500 | young <<= 1; | ||
501 | } | ||
502 | } | ||
503 | |||
504 | if (tp->defer_accept) | ||
505 | max_retries = tp->defer_accept; | ||
506 | |||
507 | budget = 2*(TCP_SYNQ_HSIZE/(TCP_TIMEOUT_INIT/TCP_SYNQ_INTERVAL)); | ||
508 | i = lopt->clock_hand; | ||
509 | |||
510 | do { | ||
511 | reqp=&lopt->syn_table[i]; | ||
512 | while ((req = *reqp) != NULL) { | ||
513 | if (time_after_eq(now, req->expires)) { | ||
514 | if ((req->retrans < thresh || | ||
515 | (inet_rsk(req)->acked && req->retrans < max_retries)) | ||
516 | && !req->rsk_ops->rtx_syn_ack(sk, req, NULL)) { | ||
517 | unsigned long timeo; | ||
518 | |||
519 | if (req->retrans++ == 0) | ||
520 | lopt->qlen_young--; | ||
521 | timeo = min((TCP_TIMEOUT_INIT << req->retrans), | ||
522 | TCP_RTO_MAX); | ||
523 | req->expires = now + timeo; | ||
524 | reqp = &req->dl_next; | ||
525 | continue; | ||
526 | } | ||
527 | |||
528 | /* Drop this request */ | ||
529 | tcp_synq_unlink(tp, req, reqp); | ||
530 | reqsk_queue_removed(&tp->accept_queue, req); | ||
531 | reqsk_free(req); | ||
532 | continue; | ||
533 | } | ||
534 | reqp = &req->dl_next; | ||
535 | } | ||
536 | |||
537 | i = (i+1)&(TCP_SYNQ_HSIZE-1); | ||
538 | |||
539 | } while (--budget > 0); | ||
540 | |||
541 | lopt->clock_hand = i; | ||
542 | |||
543 | if (lopt->qlen) | ||
544 | tcp_reset_keepalive_timer(sk, TCP_SYNQ_INTERVAL); | ||
545 | } | ||
546 | |||
547 | void tcp_delete_keepalive_timer (struct sock *sk) | ||
548 | { | ||
549 | sk_stop_timer(sk, &sk->sk_timer); | ||
550 | } | ||
551 | |||
552 | void tcp_reset_keepalive_timer (struct sock *sk, unsigned long len) | ||
553 | { | ||
554 | sk_reset_timer(sk, &sk->sk_timer, jiffies + len); | ||
555 | } | 436 | } |
556 | 437 | ||
557 | void tcp_set_keepalive(struct sock *sk, int val) | 438 | void tcp_set_keepalive(struct sock *sk, int val) |
@@ -560,15 +441,16 @@ void tcp_set_keepalive(struct sock *sk, int val) | |||
560 | return; | 441 | return; |
561 | 442 | ||
562 | if (val && !sock_flag(sk, SOCK_KEEPOPEN)) | 443 | if (val && !sock_flag(sk, SOCK_KEEPOPEN)) |
563 | tcp_reset_keepalive_timer(sk, keepalive_time_when(tcp_sk(sk))); | 444 | inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tcp_sk(sk))); |
564 | else if (!val) | 445 | else if (!val) |
565 | tcp_delete_keepalive_timer(sk); | 446 | inet_csk_delete_keepalive_timer(sk); |
566 | } | 447 | } |
567 | 448 | ||
568 | 449 | ||
569 | static void tcp_keepalive_timer (unsigned long data) | 450 | static void tcp_keepalive_timer (unsigned long data) |
570 | { | 451 | { |
571 | struct sock *sk = (struct sock *) data; | 452 | struct sock *sk = (struct sock *) data; |
453 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
572 | struct tcp_sock *tp = tcp_sk(sk); | 454 | struct tcp_sock *tp = tcp_sk(sk); |
573 | __u32 elapsed; | 455 | __u32 elapsed; |
574 | 456 | ||
@@ -576,7 +458,7 @@ static void tcp_keepalive_timer (unsigned long data) | |||
576 | bh_lock_sock(sk); | 458 | bh_lock_sock(sk); |
577 | if (sock_owned_by_user(sk)) { | 459 | if (sock_owned_by_user(sk)) { |
578 | /* Try again later. */ | 460 | /* Try again later. */ |
579 | tcp_reset_keepalive_timer (sk, HZ/20); | 461 | inet_csk_reset_keepalive_timer (sk, HZ/20); |
580 | goto out; | 462 | goto out; |
581 | } | 463 | } |
582 | 464 | ||
@@ -587,7 +469,7 @@ static void tcp_keepalive_timer (unsigned long data) | |||
587 | 469 | ||
588 | if (sk->sk_state == TCP_FIN_WAIT2 && sock_flag(sk, SOCK_DEAD)) { | 470 | if (sk->sk_state == TCP_FIN_WAIT2 && sock_flag(sk, SOCK_DEAD)) { |
589 | if (tp->linger2 >= 0) { | 471 | if (tp->linger2 >= 0) { |
590 | int tmo = tcp_fin_time(tp) - TCP_TIMEWAIT_LEN; | 472 | const int tmo = tcp_fin_time(sk) - TCP_TIMEWAIT_LEN; |
591 | 473 | ||
592 | if (tmo > 0) { | 474 | if (tmo > 0) { |
593 | tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); | 475 | tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); |
@@ -610,14 +492,14 @@ static void tcp_keepalive_timer (unsigned long data) | |||
610 | elapsed = tcp_time_stamp - tp->rcv_tstamp; | 492 | elapsed = tcp_time_stamp - tp->rcv_tstamp; |
611 | 493 | ||
612 | if (elapsed >= keepalive_time_when(tp)) { | 494 | if (elapsed >= keepalive_time_when(tp)) { |
613 | if ((!tp->keepalive_probes && tp->probes_out >= sysctl_tcp_keepalive_probes) || | 495 | if ((!tp->keepalive_probes && icsk->icsk_probes_out >= sysctl_tcp_keepalive_probes) || |
614 | (tp->keepalive_probes && tp->probes_out >= tp->keepalive_probes)) { | 496 | (tp->keepalive_probes && icsk->icsk_probes_out >= tp->keepalive_probes)) { |
615 | tcp_send_active_reset(sk, GFP_ATOMIC); | 497 | tcp_send_active_reset(sk, GFP_ATOMIC); |
616 | tcp_write_err(sk); | 498 | tcp_write_err(sk); |
617 | goto out; | 499 | goto out; |
618 | } | 500 | } |
619 | if (tcp_write_wakeup(sk) <= 0) { | 501 | if (tcp_write_wakeup(sk) <= 0) { |
620 | tp->probes_out++; | 502 | icsk->icsk_probes_out++; |
621 | elapsed = keepalive_intvl_when(tp); | 503 | elapsed = keepalive_intvl_when(tp); |
622 | } else { | 504 | } else { |
623 | /* If keepalive was lost due to local congestion, | 505 | /* If keepalive was lost due to local congestion, |
@@ -634,7 +516,7 @@ static void tcp_keepalive_timer (unsigned long data) | |||
634 | sk_stream_mem_reclaim(sk); | 516 | sk_stream_mem_reclaim(sk); |
635 | 517 | ||
636 | resched: | 518 | resched: |
637 | tcp_reset_keepalive_timer (sk, elapsed); | 519 | inet_csk_reset_keepalive_timer (sk, elapsed); |
638 | goto out; | 520 | goto out; |
639 | 521 | ||
640 | death: | 522 | death: |
@@ -644,8 +526,3 @@ out: | |||
644 | bh_unlock_sock(sk); | 526 | bh_unlock_sock(sk); |
645 | sock_put(sk); | 527 | sock_put(sk); |
646 | } | 528 | } |
647 | |||
648 | EXPORT_SYMBOL(tcp_clear_xmit_timers); | ||
649 | EXPORT_SYMBOL(tcp_delete_keepalive_timer); | ||
650 | EXPORT_SYMBOL(tcp_init_xmit_timers); | ||
651 | EXPORT_SYMBOL(tcp_reset_keepalive_timer); | ||
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index 9bd443db5193..93c5f92070f9 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c | |||
@@ -35,7 +35,7 @@ | |||
35 | #include <linux/mm.h> | 35 | #include <linux/mm.h> |
36 | #include <linux/module.h> | 36 | #include <linux/module.h> |
37 | #include <linux/skbuff.h> | 37 | #include <linux/skbuff.h> |
38 | #include <linux/tcp_diag.h> | 38 | #include <linux/inet_diag.h> |
39 | 39 | ||
40 | #include <net/tcp.h> | 40 | #include <net/tcp.h> |
41 | 41 | ||
@@ -82,9 +82,10 @@ struct vegas { | |||
82 | * Instead we must wait until the completion of an RTT during | 82 | * Instead we must wait until the completion of an RTT during |
83 | * which we actually receive ACKs. | 83 | * which we actually receive ACKs. |
84 | */ | 84 | */ |
85 | static inline void vegas_enable(struct tcp_sock *tp) | 85 | static inline void vegas_enable(struct sock *sk) |
86 | { | 86 | { |
87 | struct vegas *vegas = tcp_ca(tp); | 87 | const struct tcp_sock *tp = tcp_sk(sk); |
88 | struct vegas *vegas = inet_csk_ca(sk); | ||
88 | 89 | ||
89 | /* Begin taking Vegas samples next time we send something. */ | 90 | /* Begin taking Vegas samples next time we send something. */ |
90 | vegas->doing_vegas_now = 1; | 91 | vegas->doing_vegas_now = 1; |
@@ -97,19 +98,19 @@ static inline void vegas_enable(struct tcp_sock *tp) | |||
97 | } | 98 | } |
98 | 99 | ||
99 | /* Stop taking Vegas samples for now. */ | 100 | /* Stop taking Vegas samples for now. */ |
100 | static inline void vegas_disable(struct tcp_sock *tp) | 101 | static inline void vegas_disable(struct sock *sk) |
101 | { | 102 | { |
102 | struct vegas *vegas = tcp_ca(tp); | 103 | struct vegas *vegas = inet_csk_ca(sk); |
103 | 104 | ||
104 | vegas->doing_vegas_now = 0; | 105 | vegas->doing_vegas_now = 0; |
105 | } | 106 | } |
106 | 107 | ||
107 | static void tcp_vegas_init(struct tcp_sock *tp) | 108 | static void tcp_vegas_init(struct sock *sk) |
108 | { | 109 | { |
109 | struct vegas *vegas = tcp_ca(tp); | 110 | struct vegas *vegas = inet_csk_ca(sk); |
110 | 111 | ||
111 | vegas->baseRTT = 0x7fffffff; | 112 | vegas->baseRTT = 0x7fffffff; |
112 | vegas_enable(tp); | 113 | vegas_enable(sk); |
113 | } | 114 | } |
114 | 115 | ||
115 | /* Do RTT sampling needed for Vegas. | 116 | /* Do RTT sampling needed for Vegas. |
@@ -120,9 +121,9 @@ static void tcp_vegas_init(struct tcp_sock *tp) | |||
120 | * o min-filter RTT samples from a much longer window (forever for now) | 121 | * o min-filter RTT samples from a much longer window (forever for now) |
121 | * to find the propagation delay (baseRTT) | 122 | * to find the propagation delay (baseRTT) |
122 | */ | 123 | */ |
123 | static void tcp_vegas_rtt_calc(struct tcp_sock *tp, u32 usrtt) | 124 | static void tcp_vegas_rtt_calc(struct sock *sk, u32 usrtt) |
124 | { | 125 | { |
125 | struct vegas *vegas = tcp_ca(tp); | 126 | struct vegas *vegas = inet_csk_ca(sk); |
126 | u32 vrtt = usrtt + 1; /* Never allow zero rtt or baseRTT */ | 127 | u32 vrtt = usrtt + 1; /* Never allow zero rtt or baseRTT */ |
127 | 128 | ||
128 | /* Filter to find propagation delay: */ | 129 | /* Filter to find propagation delay: */ |
@@ -136,13 +137,13 @@ static void tcp_vegas_rtt_calc(struct tcp_sock *tp, u32 usrtt) | |||
136 | vegas->cntRTT++; | 137 | vegas->cntRTT++; |
137 | } | 138 | } |
138 | 139 | ||
139 | static void tcp_vegas_state(struct tcp_sock *tp, u8 ca_state) | 140 | static void tcp_vegas_state(struct sock *sk, u8 ca_state) |
140 | { | 141 | { |
141 | 142 | ||
142 | if (ca_state == TCP_CA_Open) | 143 | if (ca_state == TCP_CA_Open) |
143 | vegas_enable(tp); | 144 | vegas_enable(sk); |
144 | else | 145 | else |
145 | vegas_disable(tp); | 146 | vegas_disable(sk); |
146 | } | 147 | } |
147 | 148 | ||
148 | /* | 149 | /* |
@@ -154,20 +155,21 @@ static void tcp_vegas_state(struct tcp_sock *tp, u8 ca_state) | |||
154 | * packets, _then_ we can make Vegas calculations | 155 | * packets, _then_ we can make Vegas calculations |
155 | * again. | 156 | * again. |
156 | */ | 157 | */ |
157 | static void tcp_vegas_cwnd_event(struct tcp_sock *tp, enum tcp_ca_event event) | 158 | static void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event) |
158 | { | 159 | { |
159 | if (event == CA_EVENT_CWND_RESTART || | 160 | if (event == CA_EVENT_CWND_RESTART || |
160 | event == CA_EVENT_TX_START) | 161 | event == CA_EVENT_TX_START) |
161 | tcp_vegas_init(tp); | 162 | tcp_vegas_init(sk); |
162 | } | 163 | } |
163 | 164 | ||
164 | static void tcp_vegas_cong_avoid(struct tcp_sock *tp, u32 ack, | 165 | static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, |
165 | u32 seq_rtt, u32 in_flight, int flag) | 166 | u32 seq_rtt, u32 in_flight, int flag) |
166 | { | 167 | { |
167 | struct vegas *vegas = tcp_ca(tp); | 168 | struct tcp_sock *tp = tcp_sk(sk); |
169 | struct vegas *vegas = inet_csk_ca(sk); | ||
168 | 170 | ||
169 | if (!vegas->doing_vegas_now) | 171 | if (!vegas->doing_vegas_now) |
170 | return tcp_reno_cong_avoid(tp, ack, seq_rtt, in_flight, flag); | 172 | return tcp_reno_cong_avoid(sk, ack, seq_rtt, in_flight, flag); |
171 | 173 | ||
172 | /* The key players are v_beg_snd_una and v_beg_snd_nxt. | 174 | /* The key players are v_beg_snd_una and v_beg_snd_nxt. |
173 | * | 175 | * |
@@ -219,7 +221,7 @@ static void tcp_vegas_cong_avoid(struct tcp_sock *tp, u32 ack, | |||
219 | * but that's not too awful, since we're taking the min, | 221 | * but that's not too awful, since we're taking the min, |
220 | * rather than averaging. | 222 | * rather than averaging. |
221 | */ | 223 | */ |
222 | tcp_vegas_rtt_calc(tp, seq_rtt*1000); | 224 | tcp_vegas_rtt_calc(sk, seq_rtt * 1000); |
223 | 225 | ||
224 | /* We do the Vegas calculations only if we got enough RTT | 226 | /* We do the Vegas calculations only if we got enough RTT |
225 | * samples that we can be reasonably sure that we got | 227 | * samples that we can be reasonably sure that we got |
@@ -359,14 +361,14 @@ static void tcp_vegas_cong_avoid(struct tcp_sock *tp, u32 ack, | |||
359 | } | 361 | } |
360 | 362 | ||
361 | /* Extract info for Tcp socket info provided via netlink. */ | 363 | /* Extract info for Tcp socket info provided via netlink. */ |
362 | static void tcp_vegas_get_info(struct tcp_sock *tp, u32 ext, | 364 | static void tcp_vegas_get_info(struct sock *sk, u32 ext, |
363 | struct sk_buff *skb) | 365 | struct sk_buff *skb) |
364 | { | 366 | { |
365 | const struct vegas *ca = tcp_ca(tp); | 367 | const struct vegas *ca = inet_csk_ca(sk); |
366 | if (ext & (1<<(TCPDIAG_VEGASINFO-1))) { | 368 | if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) { |
367 | struct tcpvegas_info *info; | 369 | struct tcpvegas_info *info; |
368 | 370 | ||
369 | info = RTA_DATA(__RTA_PUT(skb, TCPDIAG_VEGASINFO, | 371 | info = RTA_DATA(__RTA_PUT(skb, INET_DIAG_VEGASINFO, |
370 | sizeof(*info))); | 372 | sizeof(*info))); |
371 | 373 | ||
372 | info->tcpv_enabled = ca->doing_vegas_now; | 374 | info->tcpv_enabled = ca->doing_vegas_now; |
@@ -393,7 +395,7 @@ static struct tcp_congestion_ops tcp_vegas = { | |||
393 | 395 | ||
394 | static int __init tcp_vegas_register(void) | 396 | static int __init tcp_vegas_register(void) |
395 | { | 397 | { |
396 | BUG_ON(sizeof(struct vegas) > TCP_CA_PRIV_SIZE); | 398 | BUG_ON(sizeof(struct vegas) > ICSK_CA_PRIV_SIZE); |
397 | tcp_register_congestion_control(&tcp_vegas); | 399 | tcp_register_congestion_control(&tcp_vegas); |
398 | return 0; | 400 | return 0; |
399 | } | 401 | } |
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c index ef827242c940..0c340c3756c2 100644 --- a/net/ipv4/tcp_westwood.c +++ b/net/ipv4/tcp_westwood.c | |||
@@ -8,7 +8,7 @@ | |||
8 | #include <linux/mm.h> | 8 | #include <linux/mm.h> |
9 | #include <linux/module.h> | 9 | #include <linux/module.h> |
10 | #include <linux/skbuff.h> | 10 | #include <linux/skbuff.h> |
11 | #include <linux/tcp_diag.h> | 11 | #include <linux/inet_diag.h> |
12 | #include <net/tcp.h> | 12 | #include <net/tcp.h> |
13 | 13 | ||
14 | /* TCP Westwood structure */ | 14 | /* TCP Westwood structure */ |
@@ -40,9 +40,9 @@ struct westwood { | |||
40 | * way as soon as possible. It will reasonably happen within the first | 40 | * way as soon as possible. It will reasonably happen within the first |
41 | * RTT period of the connection lifetime. | 41 | * RTT period of the connection lifetime. |
42 | */ | 42 | */ |
43 | static void tcp_westwood_init(struct tcp_sock *tp) | 43 | static void tcp_westwood_init(struct sock *sk) |
44 | { | 44 | { |
45 | struct westwood *w = tcp_ca(tp); | 45 | struct westwood *w = inet_csk_ca(sk); |
46 | 46 | ||
47 | w->bk = 0; | 47 | w->bk = 0; |
48 | w->bw_ns_est = 0; | 48 | w->bw_ns_est = 0; |
@@ -51,7 +51,7 @@ static void tcp_westwood_init(struct tcp_sock *tp) | |||
51 | w->cumul_ack = 0; | 51 | w->cumul_ack = 0; |
52 | w->rtt_min = w->rtt = TCP_WESTWOOD_INIT_RTT; | 52 | w->rtt_min = w->rtt = TCP_WESTWOOD_INIT_RTT; |
53 | w->rtt_win_sx = tcp_time_stamp; | 53 | w->rtt_win_sx = tcp_time_stamp; |
54 | w->snd_una = tp->snd_una; | 54 | w->snd_una = tcp_sk(sk)->snd_una; |
55 | } | 55 | } |
56 | 56 | ||
57 | /* | 57 | /* |
@@ -74,11 +74,11 @@ static inline void westwood_filter(struct westwood *w, u32 delta) | |||
74 | * Called after processing group of packets. | 74 | * Called after processing group of packets. |
75 | * but all westwood needs is the last sample of srtt. | 75 | * but all westwood needs is the last sample of srtt. |
76 | */ | 76 | */ |
77 | static void tcp_westwood_pkts_acked(struct tcp_sock *tp, u32 cnt) | 77 | static void tcp_westwood_pkts_acked(struct sock *sk, u32 cnt) |
78 | { | 78 | { |
79 | struct westwood *w = tcp_ca(tp); | 79 | struct westwood *w = inet_csk_ca(sk); |
80 | if (cnt > 0) | 80 | if (cnt > 0) |
81 | w->rtt = tp->srtt >> 3; | 81 | w->rtt = tcp_sk(sk)->srtt >> 3; |
82 | } | 82 | } |
83 | 83 | ||
84 | /* | 84 | /* |
@@ -86,9 +86,9 @@ static void tcp_westwood_pkts_acked(struct tcp_sock *tp, u32 cnt) | |||
86 | * It updates RTT evaluation window if it is the right moment to do | 86 | * It updates RTT evaluation window if it is the right moment to do |
87 | * it. If so it calls filter for evaluating bandwidth. | 87 | * it. If so it calls filter for evaluating bandwidth. |
88 | */ | 88 | */ |
89 | static void westwood_update_window(struct tcp_sock *tp) | 89 | static void westwood_update_window(struct sock *sk) |
90 | { | 90 | { |
91 | struct westwood *w = tcp_ca(tp); | 91 | struct westwood *w = inet_csk_ca(sk); |
92 | s32 delta = tcp_time_stamp - w->rtt_win_sx; | 92 | s32 delta = tcp_time_stamp - w->rtt_win_sx; |
93 | 93 | ||
94 | /* | 94 | /* |
@@ -114,11 +114,12 @@ static void westwood_update_window(struct tcp_sock *tp) | |||
114 | * header prediction is successful. In such case in fact update is | 114 | * header prediction is successful. In such case in fact update is |
115 | * straight forward and doesn't need any particular care. | 115 | * straight forward and doesn't need any particular care. |
116 | */ | 116 | */ |
117 | static inline void westwood_fast_bw(struct tcp_sock *tp) | 117 | static inline void westwood_fast_bw(struct sock *sk) |
118 | { | 118 | { |
119 | struct westwood *w = tcp_ca(tp); | 119 | const struct tcp_sock *tp = tcp_sk(sk); |
120 | struct westwood *w = inet_csk_ca(sk); | ||
120 | 121 | ||
121 | westwood_update_window(tp); | 122 | westwood_update_window(sk); |
122 | 123 | ||
123 | w->bk += tp->snd_una - w->snd_una; | 124 | w->bk += tp->snd_una - w->snd_una; |
124 | w->snd_una = tp->snd_una; | 125 | w->snd_una = tp->snd_una; |
@@ -130,9 +131,10 @@ static inline void westwood_fast_bw(struct tcp_sock *tp) | |||
130 | * This function evaluates cumul_ack for evaluating bk in case of | 131 | * This function evaluates cumul_ack for evaluating bk in case of |
131 | * delayed or partial acks. | 132 | * delayed or partial acks. |
132 | */ | 133 | */ |
133 | static inline u32 westwood_acked_count(struct tcp_sock *tp) | 134 | static inline u32 westwood_acked_count(struct sock *sk) |
134 | { | 135 | { |
135 | struct westwood *w = tcp_ca(tp); | 136 | const struct tcp_sock *tp = tcp_sk(sk); |
137 | struct westwood *w = inet_csk_ca(sk); | ||
136 | 138 | ||
137 | w->cumul_ack = tp->snd_una - w->snd_una; | 139 | w->cumul_ack = tp->snd_una - w->snd_una; |
138 | 140 | ||
@@ -160,9 +162,10 @@ static inline u32 westwood_acked_count(struct tcp_sock *tp) | |||
160 | return w->cumul_ack; | 162 | return w->cumul_ack; |
161 | } | 163 | } |
162 | 164 | ||
163 | static inline u32 westwood_bw_rttmin(const struct tcp_sock *tp) | 165 | static inline u32 westwood_bw_rttmin(const struct sock *sk) |
164 | { | 166 | { |
165 | struct westwood *w = tcp_ca(tp); | 167 | const struct tcp_sock *tp = tcp_sk(sk); |
168 | const struct westwood *w = inet_csk_ca(sk); | ||
166 | return max_t(u32, (w->bw_est * w->rtt_min) / tp->mss_cache, 2); | 169 | return max_t(u32, (w->bw_est * w->rtt_min) / tp->mss_cache, 2); |
167 | } | 170 | } |
168 | 171 | ||
@@ -172,31 +175,32 @@ static inline u32 westwood_bw_rttmin(const struct tcp_sock *tp) | |||
172 | * in packets we use mss_cache). Rttmin is guaranteed to be >= 2 | 175 | * in packets we use mss_cache). Rttmin is guaranteed to be >= 2 |
173 | * so avoids ever returning 0. | 176 | * so avoids ever returning 0. |
174 | */ | 177 | */ |
175 | static u32 tcp_westwood_cwnd_min(struct tcp_sock *tp) | 178 | static u32 tcp_westwood_cwnd_min(struct sock *sk) |
176 | { | 179 | { |
177 | return westwood_bw_rttmin(tp); | 180 | return westwood_bw_rttmin(sk); |
178 | } | 181 | } |
179 | 182 | ||
180 | static void tcp_westwood_event(struct tcp_sock *tp, enum tcp_ca_event event) | 183 | static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event) |
181 | { | 184 | { |
182 | struct westwood *w = tcp_ca(tp); | 185 | struct tcp_sock *tp = tcp_sk(sk); |
186 | struct westwood *w = inet_csk_ca(sk); | ||
183 | 187 | ||
184 | switch(event) { | 188 | switch(event) { |
185 | case CA_EVENT_FAST_ACK: | 189 | case CA_EVENT_FAST_ACK: |
186 | westwood_fast_bw(tp); | 190 | westwood_fast_bw(sk); |
187 | break; | 191 | break; |
188 | 192 | ||
189 | case CA_EVENT_COMPLETE_CWR: | 193 | case CA_EVENT_COMPLETE_CWR: |
190 | tp->snd_cwnd = tp->snd_ssthresh = westwood_bw_rttmin(tp); | 194 | tp->snd_cwnd = tp->snd_ssthresh = westwood_bw_rttmin(sk); |
191 | break; | 195 | break; |
192 | 196 | ||
193 | case CA_EVENT_FRTO: | 197 | case CA_EVENT_FRTO: |
194 | tp->snd_ssthresh = westwood_bw_rttmin(tp); | 198 | tp->snd_ssthresh = westwood_bw_rttmin(sk); |
195 | break; | 199 | break; |
196 | 200 | ||
197 | case CA_EVENT_SLOW_ACK: | 201 | case CA_EVENT_SLOW_ACK: |
198 | westwood_update_window(tp); | 202 | westwood_update_window(sk); |
199 | w->bk += westwood_acked_count(tp); | 203 | w->bk += westwood_acked_count(sk); |
200 | w->rtt_min = min(w->rtt, w->rtt_min); | 204 | w->rtt_min = min(w->rtt, w->rtt_min); |
201 | break; | 205 | break; |
202 | 206 | ||
@@ -208,15 +212,15 @@ static void tcp_westwood_event(struct tcp_sock *tp, enum tcp_ca_event event) | |||
208 | 212 | ||
209 | 213 | ||
210 | /* Extract info for Tcp socket info provided via netlink. */ | 214 | /* Extract info for Tcp socket info provided via netlink. */ |
211 | static void tcp_westwood_info(struct tcp_sock *tp, u32 ext, | 215 | static void tcp_westwood_info(struct sock *sk, u32 ext, |
212 | struct sk_buff *skb) | 216 | struct sk_buff *skb) |
213 | { | 217 | { |
214 | const struct westwood *ca = tcp_ca(tp); | 218 | const struct westwood *ca = inet_csk_ca(sk); |
215 | if (ext & (1<<(TCPDIAG_VEGASINFO-1))) { | 219 | if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) { |
216 | struct rtattr *rta; | 220 | struct rtattr *rta; |
217 | struct tcpvegas_info *info; | 221 | struct tcpvegas_info *info; |
218 | 222 | ||
219 | rta = __RTA_PUT(skb, TCPDIAG_VEGASINFO, sizeof(*info)); | 223 | rta = __RTA_PUT(skb, INET_DIAG_VEGASINFO, sizeof(*info)); |
220 | info = RTA_DATA(rta); | 224 | info = RTA_DATA(rta); |
221 | info->tcpv_enabled = 1; | 225 | info->tcpv_enabled = 1; |
222 | info->tcpv_rttcnt = 0; | 226 | info->tcpv_rttcnt = 0; |
@@ -242,7 +246,7 @@ static struct tcp_congestion_ops tcp_westwood = { | |||
242 | 246 | ||
243 | static int __init tcp_westwood_register(void) | 247 | static int __init tcp_westwood_register(void) |
244 | { | 248 | { |
245 | BUG_ON(sizeof(struct westwood) > TCP_CA_PRIV_SIZE); | 249 | BUG_ON(sizeof(struct westwood) > ICSK_CA_PRIV_SIZE); |
246 | return tcp_register_congestion_control(&tcp_westwood); | 250 | return tcp_register_congestion_control(&tcp_westwood); |
247 | } | 251 | } |
248 | 252 | ||
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index dc4d07357e3a..e5beca7de86c 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
@@ -95,7 +95,8 @@ | |||
95 | #include <linux/ipv6.h> | 95 | #include <linux/ipv6.h> |
96 | #include <linux/netdevice.h> | 96 | #include <linux/netdevice.h> |
97 | #include <net/snmp.h> | 97 | #include <net/snmp.h> |
98 | #include <net/tcp.h> | 98 | #include <net/ip.h> |
99 | #include <net/tcp_states.h> | ||
99 | #include <net/protocol.h> | 100 | #include <net/protocol.h> |
100 | #include <linux/skbuff.h> | 101 | #include <linux/skbuff.h> |
101 | #include <linux/proc_fs.h> | 102 | #include <linux/proc_fs.h> |
@@ -112,7 +113,7 @@ | |||
112 | * Snmp MIB for the UDP layer | 113 | * Snmp MIB for the UDP layer |
113 | */ | 114 | */ |
114 | 115 | ||
115 | DEFINE_SNMP_STAT(struct udp_mib, udp_statistics); | 116 | DEFINE_SNMP_STAT(struct udp_mib, udp_statistics) __read_mostly; |
116 | 117 | ||
117 | struct hlist_head udp_hash[UDP_HTABLE_SIZE]; | 118 | struct hlist_head udp_hash[UDP_HTABLE_SIZE]; |
118 | DEFINE_RWLOCK(udp_hash_lock); | 119 | DEFINE_RWLOCK(udp_hash_lock); |
@@ -628,7 +629,7 @@ back_from_confirm: | |||
628 | /* ... which is an evident application bug. --ANK */ | 629 | /* ... which is an evident application bug. --ANK */ |
629 | release_sock(sk); | 630 | release_sock(sk); |
630 | 631 | ||
631 | LIMIT_NETDEBUG(printk(KERN_DEBUG "udp cork app bug 2\n")); | 632 | LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 2\n"); |
632 | err = -EINVAL; | 633 | err = -EINVAL; |
633 | goto out; | 634 | goto out; |
634 | } | 635 | } |
@@ -693,7 +694,7 @@ static int udp_sendpage(struct sock *sk, struct page *page, int offset, | |||
693 | if (unlikely(!up->pending)) { | 694 | if (unlikely(!up->pending)) { |
694 | release_sock(sk); | 695 | release_sock(sk); |
695 | 696 | ||
696 | LIMIT_NETDEBUG(printk(KERN_DEBUG "udp cork app bug 3\n")); | 697 | LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 3\n"); |
697 | return -EINVAL; | 698 | return -EINVAL; |
698 | } | 699 | } |
699 | 700 | ||
@@ -1102,7 +1103,7 @@ static int udp_checksum_init(struct sk_buff *skb, struct udphdr *uh, | |||
1102 | skb->ip_summed = CHECKSUM_UNNECESSARY; | 1103 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
1103 | if (!udp_check(uh, ulen, saddr, daddr, skb->csum)) | 1104 | if (!udp_check(uh, ulen, saddr, daddr, skb->csum)) |
1104 | return 0; | 1105 | return 0; |
1105 | LIMIT_NETDEBUG(printk(KERN_DEBUG "udp v4 hw csum failure.\n")); | 1106 | LIMIT_NETDEBUG(KERN_DEBUG "udp v4 hw csum failure.\n"); |
1106 | skb->ip_summed = CHECKSUM_NONE; | 1107 | skb->ip_summed = CHECKSUM_NONE; |
1107 | } | 1108 | } |
1108 | if (skb->ip_summed != CHECKSUM_UNNECESSARY) | 1109 | if (skb->ip_summed != CHECKSUM_UNNECESSARY) |
@@ -1181,13 +1182,13 @@ int udp_rcv(struct sk_buff *skb) | |||
1181 | return(0); | 1182 | return(0); |
1182 | 1183 | ||
1183 | short_packet: | 1184 | short_packet: |
1184 | LIMIT_NETDEBUG(printk(KERN_DEBUG "UDP: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n", | 1185 | LIMIT_NETDEBUG(KERN_DEBUG "UDP: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n", |
1185 | NIPQUAD(saddr), | 1186 | NIPQUAD(saddr), |
1186 | ntohs(uh->source), | 1187 | ntohs(uh->source), |
1187 | ulen, | 1188 | ulen, |
1188 | len, | 1189 | len, |
1189 | NIPQUAD(daddr), | 1190 | NIPQUAD(daddr), |
1190 | ntohs(uh->dest))); | 1191 | ntohs(uh->dest)); |
1191 | no_header: | 1192 | no_header: |
1192 | UDP_INC_STATS_BH(UDP_MIB_INERRORS); | 1193 | UDP_INC_STATS_BH(UDP_MIB_INERRORS); |
1193 | kfree_skb(skb); | 1194 | kfree_skb(skb); |
@@ -1198,12 +1199,12 @@ csum_error: | |||
1198 | * RFC1122: OK. Discards the bad packet silently (as far as | 1199 | * RFC1122: OK. Discards the bad packet silently (as far as |
1199 | * the network is concerned, anyway) as per 4.1.3.4 (MUST). | 1200 | * the network is concerned, anyway) as per 4.1.3.4 (MUST). |
1200 | */ | 1201 | */ |
1201 | LIMIT_NETDEBUG(printk(KERN_DEBUG "UDP: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n", | 1202 | LIMIT_NETDEBUG(KERN_DEBUG "UDP: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n", |
1202 | NIPQUAD(saddr), | 1203 | NIPQUAD(saddr), |
1203 | ntohs(uh->source), | 1204 | ntohs(uh->source), |
1204 | NIPQUAD(daddr), | 1205 | NIPQUAD(daddr), |
1205 | ntohs(uh->dest), | 1206 | ntohs(uh->dest), |
1206 | ulen)); | 1207 | ulen); |
1207 | drop: | 1208 | drop: |
1208 | UDP_INC_STATS_BH(UDP_MIB_INERRORS); | 1209 | UDP_INC_STATS_BH(UDP_MIB_INERRORS); |
1209 | kfree_skb(skb); | 1210 | kfree_skb(skb); |
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 050611d7a967..d23e07fc81fa 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c | |||
@@ -128,8 +128,10 @@ void __init xfrm4_state_init(void) | |||
128 | xfrm_state_register_afinfo(&xfrm4_state_afinfo); | 128 | xfrm_state_register_afinfo(&xfrm4_state_afinfo); |
129 | } | 129 | } |
130 | 130 | ||
131 | #if 0 | ||
131 | void __exit xfrm4_state_fini(void) | 132 | void __exit xfrm4_state_fini(void) |
132 | { | 133 | { |
133 | xfrm_state_unregister_afinfo(&xfrm4_state_afinfo); | 134 | xfrm_state_unregister_afinfo(&xfrm4_state_afinfo); |
134 | } | 135 | } |
136 | #endif /* 0 */ | ||
135 | 137 | ||
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index b39e04940590..6460eec834b7 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile | |||
@@ -8,7 +8,7 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o sit.o \ | |||
8 | route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o raw.o \ | 8 | route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o raw.o \ |
9 | protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \ | 9 | protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \ |
10 | exthdrs.o sysctl_net_ipv6.o datagram.o proc.o \ | 10 | exthdrs.o sysctl_net_ipv6.o datagram.o proc.o \ |
11 | ip6_flowlabel.o ipv6_syms.o | 11 | ip6_flowlabel.o ipv6_syms.o netfilter.o |
12 | 12 | ||
13 | ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \ | 13 | ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \ |
14 | xfrm6_output.o | 14 | xfrm6_output.o |
@@ -23,3 +23,5 @@ obj-$(CONFIG_NETFILTER) += netfilter/ | |||
23 | obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o | 23 | obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o |
24 | 24 | ||
25 | obj-y += exthdrs_core.o | 25 | obj-y += exthdrs_core.o |
26 | |||
27 | obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o | ||
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 77004b9456c0..937ad32db77c 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c | |||
@@ -1041,9 +1041,9 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) | |||
1041 | const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr; | 1041 | const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr; |
1042 | const struct in6_addr *sk2_rcv_saddr6 = tcp_v6_rcv_saddr(sk2); | 1042 | const struct in6_addr *sk2_rcv_saddr6 = tcp_v6_rcv_saddr(sk2); |
1043 | u32 sk_rcv_saddr = inet_sk(sk)->rcv_saddr; | 1043 | u32 sk_rcv_saddr = inet_sk(sk)->rcv_saddr; |
1044 | u32 sk2_rcv_saddr = tcp_v4_rcv_saddr(sk2); | 1044 | u32 sk2_rcv_saddr = inet_rcv_saddr(sk2); |
1045 | int sk_ipv6only = ipv6_only_sock(sk); | 1045 | int sk_ipv6only = ipv6_only_sock(sk); |
1046 | int sk2_ipv6only = tcp_v6_ipv6only(sk2); | 1046 | int sk2_ipv6only = inet_v6_ipv6only(sk2); |
1047 | int addr_type = ipv6_addr_type(sk_rcv_saddr6); | 1047 | int addr_type = ipv6_addr_type(sk_rcv_saddr6); |
1048 | int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED; | 1048 | int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED; |
1049 | 1049 | ||
@@ -1126,7 +1126,7 @@ void addrconf_leave_solict(struct inet6_dev *idev, struct in6_addr *addr) | |||
1126 | __ipv6_dev_mc_dec(idev, &maddr); | 1126 | __ipv6_dev_mc_dec(idev, &maddr); |
1127 | } | 1127 | } |
1128 | 1128 | ||
1129 | void addrconf_join_anycast(struct inet6_ifaddr *ifp) | 1129 | static void addrconf_join_anycast(struct inet6_ifaddr *ifp) |
1130 | { | 1130 | { |
1131 | struct in6_addr addr; | 1131 | struct in6_addr addr; |
1132 | ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len); | 1132 | ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len); |
@@ -1135,7 +1135,7 @@ void addrconf_join_anycast(struct inet6_ifaddr *ifp) | |||
1135 | ipv6_dev_ac_inc(ifp->idev->dev, &addr); | 1135 | ipv6_dev_ac_inc(ifp->idev->dev, &addr); |
1136 | } | 1136 | } |
1137 | 1137 | ||
1138 | void addrconf_leave_anycast(struct inet6_ifaddr *ifp) | 1138 | static void addrconf_leave_anycast(struct inet6_ifaddr *ifp) |
1139 | { | 1139 | { |
1140 | struct in6_addr addr; | 1140 | struct in6_addr addr; |
1141 | ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len); | 1141 | ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len); |
@@ -2858,16 +2858,16 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa) | |||
2858 | 2858 | ||
2859 | skb = alloc_skb(size, GFP_ATOMIC); | 2859 | skb = alloc_skb(size, GFP_ATOMIC); |
2860 | if (!skb) { | 2860 | if (!skb) { |
2861 | netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFADDR, ENOBUFS); | 2861 | netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFADDR, ENOBUFS); |
2862 | return; | 2862 | return; |
2863 | } | 2863 | } |
2864 | if (inet6_fill_ifaddr(skb, ifa, current->pid, 0, event, 0) < 0) { | 2864 | if (inet6_fill_ifaddr(skb, ifa, current->pid, 0, event, 0) < 0) { |
2865 | kfree_skb(skb); | 2865 | kfree_skb(skb); |
2866 | netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFADDR, EINVAL); | 2866 | netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFADDR, EINVAL); |
2867 | return; | 2867 | return; |
2868 | } | 2868 | } |
2869 | NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_IFADDR; | 2869 | NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_IFADDR; |
2870 | netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_IFADDR, GFP_ATOMIC); | 2870 | netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_IFADDR, GFP_ATOMIC); |
2871 | } | 2871 | } |
2872 | 2872 | ||
2873 | static void inline ipv6_store_devconf(struct ipv6_devconf *cnf, | 2873 | static void inline ipv6_store_devconf(struct ipv6_devconf *cnf, |
@@ -2994,16 +2994,16 @@ void inet6_ifinfo_notify(int event, struct inet6_dev *idev) | |||
2994 | 2994 | ||
2995 | skb = alloc_skb(size, GFP_ATOMIC); | 2995 | skb = alloc_skb(size, GFP_ATOMIC); |
2996 | if (!skb) { | 2996 | if (!skb) { |
2997 | netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFINFO, ENOBUFS); | 2997 | netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFINFO, ENOBUFS); |
2998 | return; | 2998 | return; |
2999 | } | 2999 | } |
3000 | if (inet6_fill_ifinfo(skb, idev, current->pid, 0, event, 0) < 0) { | 3000 | if (inet6_fill_ifinfo(skb, idev, current->pid, 0, event, 0) < 0) { |
3001 | kfree_skb(skb); | 3001 | kfree_skb(skb); |
3002 | netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFINFO, EINVAL); | 3002 | netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFINFO, EINVAL); |
3003 | return; | 3003 | return; |
3004 | } | 3004 | } |
3005 | NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_IFINFO; | 3005 | NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_IFINFO; |
3006 | netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_IFINFO, GFP_ATOMIC); | 3006 | netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_IFINFO, GFP_ATOMIC); |
3007 | } | 3007 | } |
3008 | 3008 | ||
3009 | static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev, | 3009 | static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev, |
@@ -3054,16 +3054,16 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev, | |||
3054 | 3054 | ||
3055 | skb = alloc_skb(size, GFP_ATOMIC); | 3055 | skb = alloc_skb(size, GFP_ATOMIC); |
3056 | if (!skb) { | 3056 | if (!skb) { |
3057 | netlink_set_err(rtnl, 0, RTMGRP_IPV6_PREFIX, ENOBUFS); | 3057 | netlink_set_err(rtnl, 0, RTNLGRP_IPV6_PREFIX, ENOBUFS); |
3058 | return; | 3058 | return; |
3059 | } | 3059 | } |
3060 | if (inet6_fill_prefix(skb, idev, pinfo, current->pid, 0, event, 0) < 0) { | 3060 | if (inet6_fill_prefix(skb, idev, pinfo, current->pid, 0, event, 0) < 0) { |
3061 | kfree_skb(skb); | 3061 | kfree_skb(skb); |
3062 | netlink_set_err(rtnl, 0, RTMGRP_IPV6_PREFIX, EINVAL); | 3062 | netlink_set_err(rtnl, 0, RTNLGRP_IPV6_PREFIX, EINVAL); |
3063 | return; | 3063 | return; |
3064 | } | 3064 | } |
3065 | NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_PREFIX; | 3065 | NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_PREFIX; |
3066 | netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_PREFIX, GFP_ATOMIC); | 3066 | netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_PREFIX, GFP_ATOMIC); |
3067 | } | 3067 | } |
3068 | 3068 | ||
3069 | static struct rtnetlink_link inet6_rtnetlink_table[RTM_NR_MSGTYPES] = { | 3069 | static struct rtnetlink_link inet6_rtnetlink_table[RTM_NR_MSGTYPES] = { |
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 28d9bcab0970..4f8795af2edb 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c | |||
@@ -44,6 +44,7 @@ | |||
44 | #include <linux/netdevice.h> | 44 | #include <linux/netdevice.h> |
45 | #include <linux/icmpv6.h> | 45 | #include <linux/icmpv6.h> |
46 | #include <linux/smp_lock.h> | 46 | #include <linux/smp_lock.h> |
47 | #include <linux/netfilter_ipv6.h> | ||
47 | 48 | ||
48 | #include <net/ip.h> | 49 | #include <net/ip.h> |
49 | #include <net/ipv6.h> | 50 | #include <net/ipv6.h> |
@@ -66,45 +67,14 @@ MODULE_AUTHOR("Cast of dozens"); | |||
66 | MODULE_DESCRIPTION("IPv6 protocol stack for Linux"); | 67 | MODULE_DESCRIPTION("IPv6 protocol stack for Linux"); |
67 | MODULE_LICENSE("GPL"); | 68 | MODULE_LICENSE("GPL"); |
68 | 69 | ||
69 | /* IPv6 procfs goodies... */ | ||
70 | |||
71 | #ifdef CONFIG_PROC_FS | ||
72 | extern int raw6_proc_init(void); | ||
73 | extern void raw6_proc_exit(void); | ||
74 | extern int tcp6_proc_init(void); | ||
75 | extern void tcp6_proc_exit(void); | ||
76 | extern int udp6_proc_init(void); | ||
77 | extern void udp6_proc_exit(void); | ||
78 | extern int ipv6_misc_proc_init(void); | ||
79 | extern void ipv6_misc_proc_exit(void); | ||
80 | extern int ac6_proc_init(void); | ||
81 | extern void ac6_proc_exit(void); | ||
82 | extern int if6_proc_init(void); | ||
83 | extern void if6_proc_exit(void); | ||
84 | #endif | ||
85 | |||
86 | int sysctl_ipv6_bindv6only; | 70 | int sysctl_ipv6_bindv6only; |
87 | 71 | ||
88 | #ifdef INET_REFCNT_DEBUG | ||
89 | atomic_t inet6_sock_nr; | ||
90 | EXPORT_SYMBOL(inet6_sock_nr); | ||
91 | #endif | ||
92 | |||
93 | /* The inetsw table contains everything that inet_create needs to | 72 | /* The inetsw table contains everything that inet_create needs to |
94 | * build a new socket. | 73 | * build a new socket. |
95 | */ | 74 | */ |
96 | static struct list_head inetsw6[SOCK_MAX]; | 75 | static struct list_head inetsw6[SOCK_MAX]; |
97 | static DEFINE_SPINLOCK(inetsw6_lock); | 76 | static DEFINE_SPINLOCK(inetsw6_lock); |
98 | 77 | ||
99 | static void inet6_sock_destruct(struct sock *sk) | ||
100 | { | ||
101 | inet_sock_destruct(sk); | ||
102 | |||
103 | #ifdef INET_REFCNT_DEBUG | ||
104 | atomic_dec(&inet6_sock_nr); | ||
105 | #endif | ||
106 | } | ||
107 | |||
108 | static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk) | 78 | static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk) |
109 | { | 79 | { |
110 | const int offset = sk->sk_prot->obj_size - sizeof(struct ipv6_pinfo); | 80 | const int offset = sk->sk_prot->obj_size - sizeof(struct ipv6_pinfo); |
@@ -185,7 +155,7 @@ static int inet6_create(struct socket *sock, int protocol) | |||
185 | inet->hdrincl = 1; | 155 | inet->hdrincl = 1; |
186 | } | 156 | } |
187 | 157 | ||
188 | sk->sk_destruct = inet6_sock_destruct; | 158 | sk->sk_destruct = inet_sock_destruct; |
189 | sk->sk_family = PF_INET6; | 159 | sk->sk_family = PF_INET6; |
190 | sk->sk_protocol = protocol; | 160 | sk->sk_protocol = protocol; |
191 | 161 | ||
@@ -212,12 +182,17 @@ static int inet6_create(struct socket *sock, int protocol) | |||
212 | inet->pmtudisc = IP_PMTUDISC_DONT; | 182 | inet->pmtudisc = IP_PMTUDISC_DONT; |
213 | else | 183 | else |
214 | inet->pmtudisc = IP_PMTUDISC_WANT; | 184 | inet->pmtudisc = IP_PMTUDISC_WANT; |
185 | /* | ||
186 | * Increment only the relevant sk_prot->socks debug field, this changes | ||
187 | * the previous behaviour of incrementing both the equivalent to | ||
188 | * answer->prot->socks (inet6_sock_nr) and inet_sock_nr. | ||
189 | * | ||
190 | * This allows better debug granularity as we'll know exactly how many | ||
191 | * UDPv6, TCPv6, etc socks were allocated, not the sum of all IPv6 | ||
192 | * transport protocol socks. -acme | ||
193 | */ | ||
194 | sk_refcnt_debug_inc(sk); | ||
215 | 195 | ||
216 | |||
217 | #ifdef INET_REFCNT_DEBUG | ||
218 | atomic_inc(&inet6_sock_nr); | ||
219 | atomic_inc(&inet_sock_nr); | ||
220 | #endif | ||
221 | if (inet->num) { | 196 | if (inet->num) { |
222 | /* It assumes that any protocol which allows | 197 | /* It assumes that any protocol which allows |
223 | * the user to assign a number at socket | 198 | * the user to assign a number at socket |
@@ -513,11 +488,6 @@ static struct net_proto_family inet6_family_ops = { | |||
513 | .owner = THIS_MODULE, | 488 | .owner = THIS_MODULE, |
514 | }; | 489 | }; |
515 | 490 | ||
516 | #ifdef CONFIG_SYSCTL | ||
517 | extern void ipv6_sysctl_register(void); | ||
518 | extern void ipv6_sysctl_unregister(void); | ||
519 | #endif | ||
520 | |||
521 | /* Same as inet6_dgram_ops, sans udp_poll. */ | 491 | /* Same as inet6_dgram_ops, sans udp_poll. */ |
522 | static struct proto_ops inet6_sockraw_ops = { | 492 | static struct proto_ops inet6_sockraw_ops = { |
523 | .family = PF_INET6, | 493 | .family = PF_INET6, |
@@ -684,8 +654,6 @@ static void cleanup_ipv6_mibs(void) | |||
684 | snmp6_mib_free((void **)udp_stats_in6); | 654 | snmp6_mib_free((void **)udp_stats_in6); |
685 | } | 655 | } |
686 | 656 | ||
687 | extern int ipv6_misc_proc_init(void); | ||
688 | |||
689 | static int __init inet6_init(void) | 657 | static int __init inet6_init(void) |
690 | { | 658 | { |
691 | struct sk_buff *dummy_skb; | 659 | struct sk_buff *dummy_skb; |
@@ -757,6 +725,9 @@ static int __init inet6_init(void) | |||
757 | err = igmp6_init(&inet6_family_ops); | 725 | err = igmp6_init(&inet6_family_ops); |
758 | if (err) | 726 | if (err) |
759 | goto igmp_fail; | 727 | goto igmp_fail; |
728 | err = ipv6_netfilter_init(); | ||
729 | if (err) | ||
730 | goto netfilter_fail; | ||
760 | /* Create /proc/foo6 entries. */ | 731 | /* Create /proc/foo6 entries. */ |
761 | #ifdef CONFIG_PROC_FS | 732 | #ifdef CONFIG_PROC_FS |
762 | err = -ENOMEM; | 733 | err = -ENOMEM; |
@@ -813,6 +784,8 @@ proc_tcp6_fail: | |||
813 | raw6_proc_exit(); | 784 | raw6_proc_exit(); |
814 | proc_raw6_fail: | 785 | proc_raw6_fail: |
815 | #endif | 786 | #endif |
787 | ipv6_netfilter_fini(); | ||
788 | netfilter_fail: | ||
816 | igmp6_cleanup(); | 789 | igmp6_cleanup(); |
817 | igmp_fail: | 790 | igmp_fail: |
818 | ndisc_cleanup(); | 791 | ndisc_cleanup(); |
@@ -852,6 +825,7 @@ static void __exit inet6_exit(void) | |||
852 | ip6_route_cleanup(); | 825 | ip6_route_cleanup(); |
853 | ipv6_packet_cleanup(); | 826 | ipv6_packet_cleanup(); |
854 | igmp6_cleanup(); | 827 | igmp6_cleanup(); |
828 | ipv6_netfilter_fini(); | ||
855 | ndisc_cleanup(); | 829 | ndisc_cleanup(); |
856 | icmpv6_cleanup(); | 830 | icmpv6_cleanup(); |
857 | #ifdef CONFIG_SYSCTL | 831 | #ifdef CONFIG_SYSCTL |
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 986fdfdccbcd..0ebfad907a03 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c | |||
@@ -131,10 +131,10 @@ static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len) | |||
131 | case NEXTHDR_HOP: | 131 | case NEXTHDR_HOP: |
132 | case NEXTHDR_DEST: | 132 | case NEXTHDR_DEST: |
133 | if (!zero_out_mutable_opts(exthdr.opth)) { | 133 | if (!zero_out_mutable_opts(exthdr.opth)) { |
134 | LIMIT_NETDEBUG(printk( | 134 | LIMIT_NETDEBUG( |
135 | KERN_WARNING "overrun %sopts\n", | 135 | KERN_WARNING "overrun %sopts\n", |
136 | nexthdr == NEXTHDR_HOP ? | 136 | nexthdr == NEXTHDR_HOP ? |
137 | "hop" : "dest")); | 137 | "hop" : "dest"); |
138 | return -EINVAL; | 138 | return -EINVAL; |
139 | } | 139 | } |
140 | break; | 140 | break; |
@@ -293,8 +293,7 @@ static int ah6_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struc | |||
293 | skb_push(skb, skb->data - skb->nh.raw); | 293 | skb_push(skb, skb->data - skb->nh.raw); |
294 | ahp->icv(ahp, skb, ah->auth_data); | 294 | ahp->icv(ahp, skb, ah->auth_data); |
295 | if (memcmp(ah->auth_data, auth_data, ahp->icv_trunc_len)) { | 295 | if (memcmp(ah->auth_data, auth_data, ahp->icv_trunc_len)) { |
296 | LIMIT_NETDEBUG( | 296 | LIMIT_NETDEBUG(KERN_WARNING "ipsec ah authentication error\n"); |
297 | printk(KERN_WARNING "ipsec ah authentication error\n")); | ||
298 | x->stats.integrity_failed++; | 297 | x->stats.integrity_failed++; |
299 | goto free_out; | 298 | goto free_out; |
300 | } | 299 | } |
@@ -332,9 +331,9 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, | |||
332 | if (!x) | 331 | if (!x) |
333 | return; | 332 | return; |
334 | 333 | ||
335 | NETDEBUG(printk(KERN_DEBUG "pmtu discovery on SA AH/%08x/" | 334 | NETDEBUG(KERN_DEBUG "pmtu discovery on SA AH/%08x/" |
336 | "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n", | 335 | "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n", |
337 | ntohl(ah->spi), NIP6(iph->daddr))); | 336 | ntohl(ah->spi), NIP6(iph->daddr)); |
338 | 337 | ||
339 | xfrm_state_put(x); | 338 | xfrm_state_put(x); |
340 | } | 339 | } |
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 5229365cd8b4..01468fab3d3d 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include <net/addrconf.h> | 29 | #include <net/addrconf.h> |
30 | #include <net/transp_v6.h> | 30 | #include <net/transp_v6.h> |
31 | #include <net/ip6_route.h> | 31 | #include <net/ip6_route.h> |
32 | #include <net/tcp_states.h> | ||
32 | 33 | ||
33 | #include <linux/errqueue.h> | 34 | #include <linux/errqueue.h> |
34 | #include <asm/uaccess.h> | 35 | #include <asm/uaccess.h> |
@@ -588,8 +589,8 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, | |||
588 | break; | 589 | break; |
589 | 590 | ||
590 | default: | 591 | default: |
591 | LIMIT_NETDEBUG( | 592 | LIMIT_NETDEBUG(KERN_DEBUG "invalid cmsg type: %d\n", |
592 | printk(KERN_DEBUG "invalid cmsg type: %d\n", cmsg->cmsg_type)); | 593 | cmsg->cmsg_type); |
593 | err = -EINVAL; | 594 | err = -EINVAL; |
594 | break; | 595 | break; |
595 | }; | 596 | }; |
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 324db62515a2..e8bff9d3d96c 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c | |||
@@ -212,8 +212,7 @@ static int esp6_input(struct xfrm_state *x, struct xfrm_decap_state *decap, stru | |||
212 | 212 | ||
213 | padlen = nexthdr[0]; | 213 | padlen = nexthdr[0]; |
214 | if (padlen+2 >= elen) { | 214 | if (padlen+2 >= elen) { |
215 | LIMIT_NETDEBUG( | 215 | LIMIT_NETDEBUG(KERN_WARNING "ipsec esp packet is garbage padlen=%d, elen=%d\n", padlen+2, elen); |
216 | printk(KERN_WARNING "ipsec esp packet is garbage padlen=%d, elen=%d\n", padlen+2, elen)); | ||
217 | ret = -EINVAL; | 216 | ret = -EINVAL; |
218 | goto out; | 217 | goto out; |
219 | } | 218 | } |
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index e0839eafc3a9..5be6da2584ee 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c | |||
@@ -424,8 +424,8 @@ static int ipv6_hop_ra(struct sk_buff *skb, int optoff) | |||
424 | IP6CB(skb)->ra = optoff; | 424 | IP6CB(skb)->ra = optoff; |
425 | return 1; | 425 | return 1; |
426 | } | 426 | } |
427 | LIMIT_NETDEBUG( | 427 | LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_ra: wrong RA length %d\n", |
428 | printk(KERN_DEBUG "ipv6_hop_ra: wrong RA length %d\n", skb->nh.raw[optoff+1])); | 428 | skb->nh.raw[optoff+1]); |
429 | kfree_skb(skb); | 429 | kfree_skb(skb); |
430 | return 0; | 430 | return 0; |
431 | } | 431 | } |
@@ -437,8 +437,8 @@ static int ipv6_hop_jumbo(struct sk_buff *skb, int optoff) | |||
437 | u32 pkt_len; | 437 | u32 pkt_len; |
438 | 438 | ||
439 | if (skb->nh.raw[optoff+1] != 4 || (optoff&3) != 2) { | 439 | if (skb->nh.raw[optoff+1] != 4 || (optoff&3) != 2) { |
440 | LIMIT_NETDEBUG( | 440 | LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n", |
441 | printk(KERN_DEBUG "ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n", skb->nh.raw[optoff+1])); | 441 | skb->nh.raw[optoff+1]); |
442 | IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); | 442 | IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); |
443 | goto drop; | 443 | goto drop; |
444 | } | 444 | } |
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index ff3ec9822e36..5176fc655ea9 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c | |||
@@ -67,7 +67,7 @@ | |||
67 | #include <asm/uaccess.h> | 67 | #include <asm/uaccess.h> |
68 | #include <asm/system.h> | 68 | #include <asm/system.h> |
69 | 69 | ||
70 | DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics); | 70 | DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics) __read_mostly; |
71 | 71 | ||
72 | /* | 72 | /* |
73 | * The ICMP socket(s). This is the most convenient way to flow control | 73 | * The ICMP socket(s). This is the most convenient way to flow control |
@@ -332,8 +332,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, | |||
332 | * for now we don't know that. | 332 | * for now we don't know that. |
333 | */ | 333 | */ |
334 | if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) { | 334 | if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) { |
335 | LIMIT_NETDEBUG( | 335 | LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: addr_any/mcast source\n"); |
336 | printk(KERN_DEBUG "icmpv6_send: addr_any/mcast source\n")); | ||
337 | return; | 336 | return; |
338 | } | 337 | } |
339 | 338 | ||
@@ -341,8 +340,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, | |||
341 | * Never answer to a ICMP packet. | 340 | * Never answer to a ICMP packet. |
342 | */ | 341 | */ |
343 | if (is_ineligible(skb)) { | 342 | if (is_ineligible(skb)) { |
344 | LIMIT_NETDEBUG( | 343 | LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: no reply to icmp error\n"); |
345 | printk(KERN_DEBUG "icmpv6_send: no reply to icmp error\n")); | ||
346 | return; | 344 | return; |
347 | } | 345 | } |
348 | 346 | ||
@@ -393,8 +391,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, | |||
393 | len = skb->len - msg.offset; | 391 | len = skb->len - msg.offset; |
394 | len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) -sizeof(struct icmp6hdr)); | 392 | len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) -sizeof(struct icmp6hdr)); |
395 | if (len < 0) { | 393 | if (len < 0) { |
396 | LIMIT_NETDEBUG( | 394 | LIMIT_NETDEBUG(KERN_DEBUG "icmp: len problem\n"); |
397 | printk(KERN_DEBUG "icmp: len problem\n")); | ||
398 | goto out_dst_release; | 395 | goto out_dst_release; |
399 | } | 396 | } |
400 | 397 | ||
@@ -551,7 +548,8 @@ static void icmpv6_notify(struct sk_buff *skb, int type, int code, u32 info) | |||
551 | 548 | ||
552 | read_lock(&raw_v6_lock); | 549 | read_lock(&raw_v6_lock); |
553 | if ((sk = sk_head(&raw_v6_htable[hash])) != NULL) { | 550 | if ((sk = sk_head(&raw_v6_htable[hash])) != NULL) { |
554 | while((sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr))) { | 551 | while((sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr, |
552 | skb->dev->ifindex))) { | ||
555 | rawv6_err(sk, skb, NULL, type, code, inner_offset, info); | 553 | rawv6_err(sk, skb, NULL, type, code, inner_offset, info); |
556 | sk = sk_next(sk); | 554 | sk = sk_next(sk); |
557 | } | 555 | } |
@@ -583,17 +581,15 @@ static int icmpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) | |||
583 | skb->ip_summed = CHECKSUM_UNNECESSARY; | 581 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
584 | if (csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6, | 582 | if (csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6, |
585 | skb->csum)) { | 583 | skb->csum)) { |
586 | LIMIT_NETDEBUG( | 584 | LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 hw checksum failed\n"); |
587 | printk(KERN_DEBUG "ICMPv6 hw checksum failed\n")); | ||
588 | skb->ip_summed = CHECKSUM_NONE; | 585 | skb->ip_summed = CHECKSUM_NONE; |
589 | } | 586 | } |
590 | } | 587 | } |
591 | if (skb->ip_summed == CHECKSUM_NONE) { | 588 | if (skb->ip_summed == CHECKSUM_NONE) { |
592 | if (csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6, | 589 | if (csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6, |
593 | skb_checksum(skb, 0, skb->len, 0))) { | 590 | skb_checksum(skb, 0, skb->len, 0))) { |
594 | LIMIT_NETDEBUG( | 591 | LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x > %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x]\n", |
595 | printk(KERN_DEBUG "ICMPv6 checksum failed [%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x > %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x]\n", | 592 | NIP6(*saddr), NIP6(*daddr)); |
596 | NIP6(*saddr), NIP6(*daddr))); | ||
597 | goto discard_it; | 593 | goto discard_it; |
598 | } | 594 | } |
599 | } | 595 | } |
@@ -669,8 +665,7 @@ static int icmpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) | |||
669 | break; | 665 | break; |
670 | 666 | ||
671 | default: | 667 | default: |
672 | LIMIT_NETDEBUG( | 668 | LIMIT_NETDEBUG(KERN_DEBUG "icmpv6: msg of unknown type\n"); |
673 | printk(KERN_DEBUG "icmpv6: msg of unknown type\n")); | ||
674 | 669 | ||
675 | /* informational */ | 670 | /* informational */ |
676 | if (type & ICMPV6_INFOMSG_MASK) | 671 | if (type & ICMPV6_INFOMSG_MASK) |
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c new file mode 100644 index 000000000000..01d5f46d4e40 --- /dev/null +++ b/net/ipv6/inet6_hashtables.c | |||
@@ -0,0 +1,81 @@ | |||
1 | /* | ||
2 | * INET An implementation of the TCP/IP protocol suite for the LINUX | ||
3 | * operating system. INET is implemented using the BSD Socket | ||
4 | * interface as the means of communication with the user level. | ||
5 | * | ||
6 | * Generic INET6 transport hashtables | ||
7 | * | ||
8 | * Authors: Lotsa people, from code originally in tcp | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public License | ||
12 | * as published by the Free Software Foundation; either version | ||
13 | * 2 of the License, or (at your option) any later version. | ||
14 | */ | ||
15 | |||
16 | #include <linux/config.h> | ||
17 | |||
18 | #include <linux/module.h> | ||
19 | |||
20 | #include <net/inet_connection_sock.h> | ||
21 | #include <net/inet_hashtables.h> | ||
22 | #include <net/inet6_hashtables.h> | ||
23 | |||
24 | struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo, | ||
25 | const struct in6_addr *daddr, | ||
26 | const unsigned short hnum, const int dif) | ||
27 | { | ||
28 | struct sock *sk; | ||
29 | const struct hlist_node *node; | ||
30 | struct sock *result = NULL; | ||
31 | int score, hiscore = 0; | ||
32 | |||
33 | read_lock(&hashinfo->lhash_lock); | ||
34 | sk_for_each(sk, node, &hashinfo->listening_hash[inet_lhashfn(hnum)]) { | ||
35 | if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) { | ||
36 | const struct ipv6_pinfo *np = inet6_sk(sk); | ||
37 | |||
38 | score = 1; | ||
39 | if (!ipv6_addr_any(&np->rcv_saddr)) { | ||
40 | if (!ipv6_addr_equal(&np->rcv_saddr, daddr)) | ||
41 | continue; | ||
42 | score++; | ||
43 | } | ||
44 | if (sk->sk_bound_dev_if) { | ||
45 | if (sk->sk_bound_dev_if != dif) | ||
46 | continue; | ||
47 | score++; | ||
48 | } | ||
49 | if (score == 3) { | ||
50 | result = sk; | ||
51 | break; | ||
52 | } | ||
53 | if (score > hiscore) { | ||
54 | hiscore = score; | ||
55 | result = sk; | ||
56 | } | ||
57 | } | ||
58 | } | ||
59 | if (result) | ||
60 | sock_hold(result); | ||
61 | read_unlock(&hashinfo->lhash_lock); | ||
62 | return result; | ||
63 | } | ||
64 | |||
65 | EXPORT_SYMBOL_GPL(inet6_lookup_listener); | ||
66 | |||
67 | struct sock *inet6_lookup(struct inet_hashinfo *hashinfo, | ||
68 | const struct in6_addr *saddr, const u16 sport, | ||
69 | const struct in6_addr *daddr, const u16 dport, | ||
70 | const int dif) | ||
71 | { | ||
72 | struct sock *sk; | ||
73 | |||
74 | local_bh_disable(); | ||
75 | sk = __inet6_lookup(hashinfo, saddr, sport, daddr, ntohs(dport), dif); | ||
76 | local_bh_enable(); | ||
77 | |||
78 | return sk; | ||
79 | } | ||
80 | |||
81 | EXPORT_SYMBOL_GPL(inet6_lookup); | ||
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 1b354aa97934..16af874c9e8f 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c | |||
@@ -49,7 +49,7 @@ | |||
49 | 49 | ||
50 | struct rt6_statistics rt6_stats; | 50 | struct rt6_statistics rt6_stats; |
51 | 51 | ||
52 | static kmem_cache_t * fib6_node_kmem; | 52 | static kmem_cache_t * fib6_node_kmem __read_mostly; |
53 | 53 | ||
54 | enum fib_walk_state_t | 54 | enum fib_walk_state_t |
55 | { | 55 | { |
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 10fbb50daea4..6e3480426939 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c | |||
@@ -56,7 +56,7 @@ static inline int ip6_rcv_finish( struct sk_buff *skb) | |||
56 | return dst_input(skb); | 56 | return dst_input(skb); |
57 | } | 57 | } |
58 | 58 | ||
59 | int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) | 59 | int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) |
60 | { | 60 | { |
61 | struct ipv6hdr *hdr; | 61 | struct ipv6hdr *hdr; |
62 | u32 pkt_len; | 62 | u32 pkt_len; |
@@ -166,8 +166,8 @@ resubmit: | |||
166 | nexthdr = skb->nh.raw[nhoff]; | 166 | nexthdr = skb->nh.raw[nhoff]; |
167 | 167 | ||
168 | raw_sk = sk_head(&raw_v6_htable[nexthdr & (MAX_INET_PROTOS - 1)]); | 168 | raw_sk = sk_head(&raw_v6_htable[nexthdr & (MAX_INET_PROTOS - 1)]); |
169 | if (raw_sk) | 169 | if (raw_sk && !ipv6_raw_deliver(skb, nexthdr)) |
170 | ipv6_raw_deliver(skb, nexthdr); | 170 | raw_sk = NULL; |
171 | 171 | ||
172 | hash = nexthdr & (MAX_INET_PROTOS - 1); | 172 | hash = nexthdr & (MAX_INET_PROTOS - 1); |
173 | if ((ipprot = rcu_dereference(inet6_protos[hash])) != NULL) { | 173 | if ((ipprot = rcu_dereference(inet6_protos[hash])) != NULL) { |
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index ae652ca14bc9..01ef94f7c7f1 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c | |||
@@ -153,51 +153,6 @@ int ip6_output(struct sk_buff *skb) | |||
153 | return ip6_output2(skb); | 153 | return ip6_output2(skb); |
154 | } | 154 | } |
155 | 155 | ||
156 | #ifdef CONFIG_NETFILTER | ||
157 | int ip6_route_me_harder(struct sk_buff *skb) | ||
158 | { | ||
159 | struct ipv6hdr *iph = skb->nh.ipv6h; | ||
160 | struct dst_entry *dst; | ||
161 | struct flowi fl = { | ||
162 | .oif = skb->sk ? skb->sk->sk_bound_dev_if : 0, | ||
163 | .nl_u = | ||
164 | { .ip6_u = | ||
165 | { .daddr = iph->daddr, | ||
166 | .saddr = iph->saddr, } }, | ||
167 | .proto = iph->nexthdr, | ||
168 | }; | ||
169 | |||
170 | dst = ip6_route_output(skb->sk, &fl); | ||
171 | |||
172 | if (dst->error) { | ||
173 | IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES); | ||
174 | LIMIT_NETDEBUG( | ||
175 | printk(KERN_DEBUG "ip6_route_me_harder: No more route.\n")); | ||
176 | dst_release(dst); | ||
177 | return -EINVAL; | ||
178 | } | ||
179 | |||
180 | /* Drop old route. */ | ||
181 | dst_release(skb->dst); | ||
182 | |||
183 | skb->dst = dst; | ||
184 | return 0; | ||
185 | } | ||
186 | #endif | ||
187 | |||
188 | static inline int ip6_maybe_reroute(struct sk_buff *skb) | ||
189 | { | ||
190 | #ifdef CONFIG_NETFILTER | ||
191 | if (skb->nfcache & NFC_ALTERED){ | ||
192 | if (ip6_route_me_harder(skb) != 0){ | ||
193 | kfree_skb(skb); | ||
194 | return -EINVAL; | ||
195 | } | ||
196 | } | ||
197 | #endif /* CONFIG_NETFILTER */ | ||
198 | return dst_output(skb); | ||
199 | } | ||
200 | |||
201 | /* | 156 | /* |
202 | * xmit an sk_buff (used by TCP) | 157 | * xmit an sk_buff (used by TCP) |
203 | */ | 158 | */ |
@@ -266,7 +221,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, | |||
266 | mtu = dst_mtu(dst); | 221 | mtu = dst_mtu(dst); |
267 | if ((skb->len <= mtu) || ipfragok) { | 222 | if ((skb->len <= mtu) || ipfragok) { |
268 | IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS); | 223 | IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS); |
269 | return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, ip6_maybe_reroute); | 224 | return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, |
225 | dst_output); | ||
270 | } | 226 | } |
271 | 227 | ||
272 | if (net_ratelimit()) | 228 | if (net_ratelimit()) |
@@ -321,7 +277,9 @@ static int ip6_call_ra_chain(struct sk_buff *skb, int sel) | |||
321 | read_lock(&ip6_ra_lock); | 277 | read_lock(&ip6_ra_lock); |
322 | for (ra = ip6_ra_chain; ra; ra = ra->next) { | 278 | for (ra = ip6_ra_chain; ra; ra = ra->next) { |
323 | struct sock *sk = ra->sk; | 279 | struct sock *sk = ra->sk; |
324 | if (sk && ra->sel == sel) { | 280 | if (sk && ra->sel == sel && |
281 | (!sk->sk_bound_dev_if || | ||
282 | sk->sk_bound_dev_if == skb->dev->ifindex)) { | ||
325 | if (last) { | 283 | if (last) { |
326 | struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); | 284 | struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); |
327 | if (skb2) | 285 | if (skb2) |
@@ -667,7 +625,7 @@ slow_path: | |||
667 | */ | 625 | */ |
668 | 626 | ||
669 | if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) { | 627 | if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) { |
670 | NETDEBUG(printk(KERN_INFO "IPv6: frag: no memory for new fragment!\n")); | 628 | NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n"); |
671 | IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS); | 629 | IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS); |
672 | err = -ENOMEM; | 630 | err = -ENOMEM; |
673 | goto fail; | 631 | goto fail; |
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 3bc144a79fa5..76466af8331e 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c | |||
@@ -55,7 +55,7 @@ | |||
55 | 55 | ||
56 | #include <asm/uaccess.h> | 56 | #include <asm/uaccess.h> |
57 | 57 | ||
58 | DEFINE_SNMP_STAT(struct ipstats_mib, ipv6_statistics); | 58 | DEFINE_SNMP_STAT(struct ipstats_mib, ipv6_statistics) __read_mostly; |
59 | 59 | ||
60 | static struct packet_type ipv6_packet_type = { | 60 | static struct packet_type ipv6_packet_type = { |
61 | .type = __constant_htons(ETH_P_IPV6), | 61 | .type = __constant_htons(ETH_P_IPV6), |
@@ -109,13 +109,6 @@ int ip6_ra_control(struct sock *sk, int sel, void (*destructor)(struct sock *)) | |||
109 | return 0; | 109 | return 0; |
110 | } | 110 | } |
111 | 111 | ||
112 | extern int ip6_mc_source(int add, int omode, struct sock *sk, | ||
113 | struct group_source_req *pgsr); | ||
114 | extern int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf); | ||
115 | extern int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, | ||
116 | struct group_filter __user *optval, int __user *optlen); | ||
117 | |||
118 | |||
119 | int ipv6_setsockopt(struct sock *sk, int level, int optname, | 112 | int ipv6_setsockopt(struct sock *sk, int level, int optname, |
120 | char __user *optval, int optlen) | 113 | char __user *optval, int optlen) |
121 | { | 114 | { |
@@ -163,6 +156,13 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname, | |||
163 | fl6_free_socklist(sk); | 156 | fl6_free_socklist(sk); |
164 | ipv6_sock_mc_close(sk); | 157 | ipv6_sock_mc_close(sk); |
165 | 158 | ||
159 | /* | ||
160 | * Sock is moving from IPv6 to IPv4 (sk_prot), so | ||
161 | * remove it from the refcnt debug socks count in the | ||
162 | * original family... | ||
163 | */ | ||
164 | sk_refcnt_debug_dec(sk); | ||
165 | |||
166 | if (sk->sk_protocol == IPPROTO_TCP) { | 166 | if (sk->sk_protocol == IPPROTO_TCP) { |
167 | struct tcp_sock *tp = tcp_sk(sk); | 167 | struct tcp_sock *tp = tcp_sk(sk); |
168 | 168 | ||
@@ -192,9 +192,11 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname, | |||
192 | kfree_skb(pktopt); | 192 | kfree_skb(pktopt); |
193 | 193 | ||
194 | sk->sk_destruct = inet_sock_destruct; | 194 | sk->sk_destruct = inet_sock_destruct; |
195 | #ifdef INET_REFCNT_DEBUG | 195 | /* |
196 | atomic_dec(&inet6_sock_nr); | 196 | * ... and add it to the refcnt debug socks count |
197 | #endif | 197 | * in the new family. -acme |
198 | */ | ||
199 | sk_refcnt_debug_inc(sk); | ||
198 | module_put(THIS_MODULE); | 200 | module_put(THIS_MODULE); |
199 | retv = 0; | 201 | retv = 0; |
200 | break; | 202 | break; |
@@ -437,7 +439,6 @@ done: | |||
437 | } | 439 | } |
438 | case MCAST_MSFILTER: | 440 | case MCAST_MSFILTER: |
439 | { | 441 | { |
440 | extern int sysctl_optmem_max; | ||
441 | extern int sysctl_mld_max_msf; | 442 | extern int sysctl_mld_max_msf; |
442 | struct group_filter *gsf; | 443 | struct group_filter *gsf; |
443 | 444 | ||
diff --git a/net/ipv6/ipv6_syms.c b/net/ipv6/ipv6_syms.c index 5ade5a5d1990..37a4a99c9fe9 100644 --- a/net/ipv6/ipv6_syms.c +++ b/net/ipv6/ipv6_syms.c | |||
@@ -15,9 +15,6 @@ EXPORT_SYMBOL(ndisc_mc_map); | |||
15 | EXPORT_SYMBOL(register_inet6addr_notifier); | 15 | EXPORT_SYMBOL(register_inet6addr_notifier); |
16 | EXPORT_SYMBOL(unregister_inet6addr_notifier); | 16 | EXPORT_SYMBOL(unregister_inet6addr_notifier); |
17 | EXPORT_SYMBOL(ip6_route_output); | 17 | EXPORT_SYMBOL(ip6_route_output); |
18 | #ifdef CONFIG_NETFILTER | ||
19 | EXPORT_SYMBOL(ip6_route_me_harder); | ||
20 | #endif | ||
21 | EXPORT_SYMBOL(addrconf_lock); | 18 | EXPORT_SYMBOL(addrconf_lock); |
22 | EXPORT_SYMBOL(ipv6_setsockopt); | 19 | EXPORT_SYMBOL(ipv6_setsockopt); |
23 | EXPORT_SYMBOL(ipv6_getsockopt); | 20 | EXPORT_SYMBOL(ipv6_getsockopt); |
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 7ae72d4c9bd2..a7eae30f4554 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c | |||
@@ -812,7 +812,7 @@ static void ndisc_recv_ns(struct sk_buff *skb) | |||
812 | if (ipv6_chk_acast_addr(dev, &msg->target) || | 812 | if (ipv6_chk_acast_addr(dev, &msg->target) || |
813 | (idev->cnf.forwarding && | 813 | (idev->cnf.forwarding && |
814 | pneigh_lookup(&nd_tbl, &msg->target, dev, 0))) { | 814 | pneigh_lookup(&nd_tbl, &msg->target, dev, 0))) { |
815 | if (skb->stamp.tv_sec != LOCALLY_ENQUEUED && | 815 | if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) && |
816 | skb->pkt_type != PACKET_HOST && | 816 | skb->pkt_type != PACKET_HOST && |
817 | inc != 0 && | 817 | inc != 0 && |
818 | idev->nd_parms->proxy_delay != 0) { | 818 | idev->nd_parms->proxy_delay != 0) { |
@@ -1487,6 +1487,8 @@ int ndisc_rcv(struct sk_buff *skb) | |||
1487 | return 0; | 1487 | return 0; |
1488 | } | 1488 | } |
1489 | 1489 | ||
1490 | memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb)); | ||
1491 | |||
1490 | switch (msg->icmph.icmp6_type) { | 1492 | switch (msg->icmph.icmp6_type) { |
1491 | case NDISC_NEIGHBOUR_SOLICITATION: | 1493 | case NDISC_NEIGHBOUR_SOLICITATION: |
1492 | ndisc_recv_ns(skb); | 1494 | ndisc_recv_ns(skb); |
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c new file mode 100644 index 000000000000..f8626ebf90fd --- /dev/null +++ b/net/ipv6/netfilter.c | |||
@@ -0,0 +1,104 @@ | |||
1 | #include <linux/config.h> | ||
2 | #include <linux/init.h> | ||
3 | |||
4 | #ifdef CONFIG_NETFILTER | ||
5 | |||
6 | #include <linux/kernel.h> | ||
7 | #include <linux/ipv6.h> | ||
8 | #include <linux/netfilter.h> | ||
9 | #include <linux/netfilter_ipv6.h> | ||
10 | #include <net/dst.h> | ||
11 | #include <net/ipv6.h> | ||
12 | #include <net/ip6_route.h> | ||
13 | |||
14 | int ip6_route_me_harder(struct sk_buff *skb) | ||
15 | { | ||
16 | struct ipv6hdr *iph = skb->nh.ipv6h; | ||
17 | struct dst_entry *dst; | ||
18 | struct flowi fl = { | ||
19 | .oif = skb->sk ? skb->sk->sk_bound_dev_if : 0, | ||
20 | .nl_u = | ||
21 | { .ip6_u = | ||
22 | { .daddr = iph->daddr, | ||
23 | .saddr = iph->saddr, } }, | ||
24 | .proto = iph->nexthdr, | ||
25 | }; | ||
26 | |||
27 | dst = ip6_route_output(skb->sk, &fl); | ||
28 | |||
29 | if (dst->error) { | ||
30 | IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES); | ||
31 | LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n"); | ||
32 | dst_release(dst); | ||
33 | return -EINVAL; | ||
34 | } | ||
35 | |||
36 | /* Drop old route. */ | ||
37 | dst_release(skb->dst); | ||
38 | |||
39 | skb->dst = dst; | ||
40 | return 0; | ||
41 | } | ||
42 | EXPORT_SYMBOL(ip6_route_me_harder); | ||
43 | |||
44 | /* | ||
45 | * Extra routing may needed on local out, as the QUEUE target never | ||
46 | * returns control to the table. | ||
47 | */ | ||
48 | |||
49 | struct ip6_rt_info { | ||
50 | struct in6_addr daddr; | ||
51 | struct in6_addr saddr; | ||
52 | }; | ||
53 | |||
54 | static void save(const struct sk_buff *skb, struct nf_info *info) | ||
55 | { | ||
56 | struct ip6_rt_info *rt_info = nf_info_reroute(info); | ||
57 | |||
58 | if (info->hook == NF_IP6_LOCAL_OUT) { | ||
59 | struct ipv6hdr *iph = skb->nh.ipv6h; | ||
60 | |||
61 | rt_info->daddr = iph->daddr; | ||
62 | rt_info->saddr = iph->saddr; | ||
63 | } | ||
64 | } | ||
65 | |||
66 | static int reroute(struct sk_buff **pskb, const struct nf_info *info) | ||
67 | { | ||
68 | struct ip6_rt_info *rt_info = nf_info_reroute(info); | ||
69 | |||
70 | if (info->hook == NF_IP6_LOCAL_OUT) { | ||
71 | struct ipv6hdr *iph = (*pskb)->nh.ipv6h; | ||
72 | if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) || | ||
73 | !ipv6_addr_equal(&iph->saddr, &rt_info->saddr)) | ||
74 | return ip6_route_me_harder(*pskb); | ||
75 | } | ||
76 | return 0; | ||
77 | } | ||
78 | |||
79 | static struct nf_queue_rerouter ip6_reroute = { | ||
80 | .rer_size = sizeof(struct ip6_rt_info), | ||
81 | .save = &save, | ||
82 | .reroute = &reroute, | ||
83 | }; | ||
84 | |||
85 | int __init ipv6_netfilter_init(void) | ||
86 | { | ||
87 | return nf_register_queue_rerouter(PF_INET6, &ip6_reroute); | ||
88 | } | ||
89 | |||
90 | void ipv6_netfilter_fini(void) | ||
91 | { | ||
92 | nf_unregister_queue_rerouter(PF_INET6); | ||
93 | } | ||
94 | |||
95 | #else /* CONFIG_NETFILTER */ | ||
96 | int __init ipv6_netfilter_init(void) | ||
97 | { | ||
98 | return 0; | ||
99 | } | ||
100 | |||
101 | void ipv6_netfilter_fini(void) | ||
102 | { | ||
103 | } | ||
104 | #endif /* CONFIG_NETFILTER */ | ||
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 77ec704c9ee3..216fbe1ac65c 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig | |||
@@ -10,13 +10,16 @@ menu "IPv6: Netfilter Configuration (EXPERIMENTAL)" | |||
10 | # dep_tristate ' FTP protocol support' CONFIG_IP6_NF_FTP $CONFIG_IP6_NF_CONNTRACK | 10 | # dep_tristate ' FTP protocol support' CONFIG_IP6_NF_FTP $CONFIG_IP6_NF_CONNTRACK |
11 | #fi | 11 | #fi |
12 | config IP6_NF_QUEUE | 12 | config IP6_NF_QUEUE |
13 | tristate "Userspace queueing via NETLINK" | 13 | tristate "IP6 Userspace queueing via NETLINK (OBSOLETE)" |
14 | ---help--- | 14 | ---help--- |
15 | 15 | ||
16 | This option adds a queue handler to the kernel for IPv6 | 16 | This option adds a queue handler to the kernel for IPv6 |
17 | packets which lets us to receive the filtered packets | 17 | packets which enables users to receive the filtered packets |
18 | with QUEUE target using libiptc as we can do with | 18 | with QUEUE target using libipq. |
19 | the IPv4 now. | 19 | |
20 | THis option enables the old IPv6-only "ip6_queue" implementation | ||
21 | which has been obsoleted by the new "nfnetlink_queue" code (see | ||
22 | CONFIG_NETFILTER_NETLINK_QUEUE). | ||
20 | 23 | ||
21 | (C) Fernando Anton 2001 | 24 | (C) Fernando Anton 2001 |
22 | IPv64 Project - Work based in IPv64 draft by Arturo Azcorra. | 25 | IPv64 Project - Work based in IPv64 draft by Arturo Azcorra. |
@@ -196,6 +199,16 @@ config IP6_NF_TARGET_LOG | |||
196 | 199 | ||
197 | To compile it as a module, choose M here. If unsure, say N. | 200 | To compile it as a module, choose M here. If unsure, say N. |
198 | 201 | ||
202 | config IP6_NF_TARGET_REJECT | ||
203 | tristate "REJECT target support" | ||
204 | depends on IP6_NF_FILTER | ||
205 | help | ||
206 | The REJECT target allows a filtering rule to specify that an ICMPv6 | ||
207 | error should be issued in response to an incoming packet, rather | ||
208 | than silently being dropped. | ||
209 | |||
210 | To compile it as a module, choose M here. If unsure, say N. | ||
211 | |||
199 | # if [ "$CONFIG_IP6_NF_FILTER" != "n" ]; then | 212 | # if [ "$CONFIG_IP6_NF_FILTER" != "n" ]; then |
200 | # dep_tristate ' REJECT target support' CONFIG_IP6_NF_TARGET_REJECT $CONFIG_IP6_NF_FILTER | 213 | # dep_tristate ' REJECT target support' CONFIG_IP6_NF_TARGET_REJECT $CONFIG_IP6_NF_FILTER |
201 | # if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then | 214 | # if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then |
@@ -226,6 +239,22 @@ config IP6_NF_TARGET_MARK | |||
226 | 239 | ||
227 | To compile it as a module, choose M here. If unsure, say N. | 240 | To compile it as a module, choose M here. If unsure, say N. |
228 | 241 | ||
242 | config IP6_NF_TARGET_HL | ||
243 | tristate 'HL (hoplimit) target support' | ||
244 | depends on IP6_NF_MANGLE | ||
245 | help | ||
246 | This option adds a `HL' target, which enables the user to decrement | ||
247 | the hoplimit value of the IPv6 header or set it to a given (lower) | ||
248 | value. | ||
249 | |||
250 | While it is safe to decrement the hoplimit value, this option also | ||
251 | enables functionality to increment and set the hoplimit value of the | ||
252 | IPv6 header to arbitrary values. This is EXTREMELY DANGEROUS since | ||
253 | you can easily create immortal packets that loop forever on the | ||
254 | network. | ||
255 | |||
256 | To compile it as a module, choose M here. If unsure, say N. | ||
257 | |||
229 | #dep_tristate ' LOG target support' CONFIG_IP6_NF_TARGET_LOG $CONFIG_IP6_NF_IPTABLES | 258 | #dep_tristate ' LOG target support' CONFIG_IP6_NF_TARGET_LOG $CONFIG_IP6_NF_IPTABLES |
230 | config IP6_NF_RAW | 259 | config IP6_NF_RAW |
231 | tristate 'raw table support (required for TRACE)' | 260 | tristate 'raw table support (required for TRACE)' |
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 2e51714953b6..bd9a16a5cbba 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile | |||
@@ -20,7 +20,10 @@ obj-$(CONFIG_IP6_NF_MATCH_PHYSDEV) += ip6t_physdev.o | |||
20 | obj-$(CONFIG_IP6_NF_FILTER) += ip6table_filter.o | 20 | obj-$(CONFIG_IP6_NF_FILTER) += ip6table_filter.o |
21 | obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o | 21 | obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o |
22 | obj-$(CONFIG_IP6_NF_TARGET_MARK) += ip6t_MARK.o | 22 | obj-$(CONFIG_IP6_NF_TARGET_MARK) += ip6t_MARK.o |
23 | obj-$(CONFIG_IP6_NF_TARGET_HL) += ip6t_HL.o | ||
23 | obj-$(CONFIG_IP6_NF_QUEUE) += ip6_queue.o | 24 | obj-$(CONFIG_IP6_NF_QUEUE) += ip6_queue.o |
24 | obj-$(CONFIG_IP6_NF_TARGET_LOG) += ip6t_LOG.o | 25 | obj-$(CONFIG_IP6_NF_TARGET_LOG) += ip6t_LOG.o |
25 | obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o | 26 | obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o |
26 | obj-$(CONFIG_IP6_NF_MATCH_HL) += ip6t_hl.o | 27 | obj-$(CONFIG_IP6_NF_MATCH_HL) += ip6t_hl.o |
28 | obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o | ||
29 | obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += ip6t_NFQUEUE.o | ||
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index a16df5b27c84..aa11cf366efa 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c | |||
@@ -47,16 +47,10 @@ | |||
47 | #define NET_IPQ_QMAX 2088 | 47 | #define NET_IPQ_QMAX 2088 |
48 | #define NET_IPQ_QMAX_NAME "ip6_queue_maxlen" | 48 | #define NET_IPQ_QMAX_NAME "ip6_queue_maxlen" |
49 | 49 | ||
50 | struct ipq_rt_info { | ||
51 | struct in6_addr daddr; | ||
52 | struct in6_addr saddr; | ||
53 | }; | ||
54 | |||
55 | struct ipq_queue_entry { | 50 | struct ipq_queue_entry { |
56 | struct list_head list; | 51 | struct list_head list; |
57 | struct nf_info *info; | 52 | struct nf_info *info; |
58 | struct sk_buff *skb; | 53 | struct sk_buff *skb; |
59 | struct ipq_rt_info rt_info; | ||
60 | }; | 54 | }; |
61 | 55 | ||
62 | typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long); | 56 | typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long); |
@@ -244,8 +238,8 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) | |||
244 | 238 | ||
245 | pmsg->packet_id = (unsigned long )entry; | 239 | pmsg->packet_id = (unsigned long )entry; |
246 | pmsg->data_len = data_len; | 240 | pmsg->data_len = data_len; |
247 | pmsg->timestamp_sec = entry->skb->stamp.tv_sec; | 241 | pmsg->timestamp_sec = skb_tv_base.tv_sec + entry->skb->tstamp.off_sec; |
248 | pmsg->timestamp_usec = entry->skb->stamp.tv_usec; | 242 | pmsg->timestamp_usec = skb_tv_base.tv_usec + entry->skb->tstamp.off_usec; |
249 | pmsg->mark = entry->skb->nfmark; | 243 | pmsg->mark = entry->skb->nfmark; |
250 | pmsg->hook = entry->info->hook; | 244 | pmsg->hook = entry->info->hook; |
251 | pmsg->hw_protocol = entry->skb->protocol; | 245 | pmsg->hw_protocol = entry->skb->protocol; |
@@ -284,7 +278,8 @@ nlmsg_failure: | |||
284 | } | 278 | } |
285 | 279 | ||
286 | static int | 280 | static int |
287 | ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, void *data) | 281 | ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, |
282 | unsigned int queuenum, void *data) | ||
288 | { | 283 | { |
289 | int status = -EINVAL; | 284 | int status = -EINVAL; |
290 | struct sk_buff *nskb; | 285 | struct sk_buff *nskb; |
@@ -302,13 +297,6 @@ ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, void *data) | |||
302 | entry->info = info; | 297 | entry->info = info; |
303 | entry->skb = skb; | 298 | entry->skb = skb; |
304 | 299 | ||
305 | if (entry->info->hook == NF_IP_LOCAL_OUT) { | ||
306 | struct ipv6hdr *iph = skb->nh.ipv6h; | ||
307 | |||
308 | entry->rt_info.daddr = iph->daddr; | ||
309 | entry->rt_info.saddr = iph->saddr; | ||
310 | } | ||
311 | |||
312 | nskb = ipq_build_packet_message(entry, &status); | 300 | nskb = ipq_build_packet_message(entry, &status); |
313 | if (nskb == NULL) | 301 | if (nskb == NULL) |
314 | goto err_out_free; | 302 | goto err_out_free; |
@@ -384,23 +372,11 @@ ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct ipq_queue_entry *e) | |||
384 | } | 372 | } |
385 | skb_put(e->skb, diff); | 373 | skb_put(e->skb, diff); |
386 | } | 374 | } |
387 | if (!skb_ip_make_writable(&e->skb, v->data_len)) | 375 | if (!skb_make_writable(&e->skb, v->data_len)) |
388 | return -ENOMEM; | 376 | return -ENOMEM; |
389 | memcpy(e->skb->data, v->payload, v->data_len); | 377 | memcpy(e->skb->data, v->payload, v->data_len); |
390 | e->skb->ip_summed = CHECKSUM_NONE; | 378 | e->skb->ip_summed = CHECKSUM_NONE; |
391 | e->skb->nfcache |= NFC_ALTERED; | 379 | |
392 | |||
393 | /* | ||
394 | * Extra routing may needed on local out, as the QUEUE target never | ||
395 | * returns control to the table. | ||
396 | * Not a nice way to cmp, but works | ||
397 | */ | ||
398 | if (e->info->hook == NF_IP_LOCAL_OUT) { | ||
399 | struct ipv6hdr *iph = e->skb->nh.ipv6h; | ||
400 | if (!ipv6_addr_equal(&iph->daddr, &e->rt_info.daddr) || | ||
401 | !ipv6_addr_equal(&iph->saddr, &e->rt_info.saddr)) | ||
402 | return ip6_route_me_harder(e->skb); | ||
403 | } | ||
404 | return 0; | 380 | return 0; |
405 | } | 381 | } |
406 | 382 | ||
@@ -676,6 +652,11 @@ ipq_get_info(char *buffer, char **start, off_t offset, int length) | |||
676 | return len; | 652 | return len; |
677 | } | 653 | } |
678 | 654 | ||
655 | static struct nf_queue_handler nfqh = { | ||
656 | .name = "ip6_queue", | ||
657 | .outfn = &ipq_enqueue_packet, | ||
658 | }; | ||
659 | |||
679 | static int | 660 | static int |
680 | init_or_cleanup(int init) | 661 | init_or_cleanup(int init) |
681 | { | 662 | { |
@@ -686,7 +667,8 @@ init_or_cleanup(int init) | |||
686 | goto cleanup; | 667 | goto cleanup; |
687 | 668 | ||
688 | netlink_register_notifier(&ipq_nl_notifier); | 669 | netlink_register_notifier(&ipq_nl_notifier); |
689 | ipqnl = netlink_kernel_create(NETLINK_IP6_FW, ipq_rcv_sk); | 670 | ipqnl = netlink_kernel_create(NETLINK_IP6_FW, 0, ipq_rcv_sk, |
671 | THIS_MODULE); | ||
690 | if (ipqnl == NULL) { | 672 | if (ipqnl == NULL) { |
691 | printk(KERN_ERR "ip6_queue: failed to create netlink socket\n"); | 673 | printk(KERN_ERR "ip6_queue: failed to create netlink socket\n"); |
692 | goto cleanup_netlink_notifier; | 674 | goto cleanup_netlink_notifier; |
@@ -703,7 +685,7 @@ init_or_cleanup(int init) | |||
703 | register_netdevice_notifier(&ipq_dev_notifier); | 685 | register_netdevice_notifier(&ipq_dev_notifier); |
704 | ipq_sysctl_header = register_sysctl_table(ipq_root_table, 0); | 686 | ipq_sysctl_header = register_sysctl_table(ipq_root_table, 0); |
705 | 687 | ||
706 | status = nf_register_queue_handler(PF_INET6, ipq_enqueue_packet, NULL); | 688 | status = nf_register_queue_handler(PF_INET6, &nfqh); |
707 | if (status < 0) { | 689 | if (status < 0) { |
708 | printk(KERN_ERR "ip6_queue: failed to register queue handler\n"); | 690 | printk(KERN_ERR "ip6_queue: failed to register queue handler\n"); |
709 | goto cleanup_sysctl; | 691 | goto cleanup_sysctl; |
@@ -711,7 +693,7 @@ init_or_cleanup(int init) | |||
711 | return status; | 693 | return status; |
712 | 694 | ||
713 | cleanup: | 695 | cleanup: |
714 | nf_unregister_queue_handler(PF_INET6); | 696 | nf_unregister_queue_handlers(&nfqh); |
715 | synchronize_net(); | 697 | synchronize_net(); |
716 | ipq_flush(NF_DROP); | 698 | ipq_flush(NF_DROP); |
717 | 699 | ||
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 73034511c8db..1cb8adb2787f 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c | |||
@@ -401,7 +401,6 @@ ip6t_do_table(struct sk_buff **pskb, | |||
401 | do { | 401 | do { |
402 | IP_NF_ASSERT(e); | 402 | IP_NF_ASSERT(e); |
403 | IP_NF_ASSERT(back); | 403 | IP_NF_ASSERT(back); |
404 | (*pskb)->nfcache |= e->nfcache; | ||
405 | if (ip6_packet_match(*pskb, indev, outdev, &e->ipv6, | 404 | if (ip6_packet_match(*pskb, indev, outdev, &e->ipv6, |
406 | &protoff, &offset)) { | 405 | &protoff, &offset)) { |
407 | struct ip6t_entry_target *t; | 406 | struct ip6t_entry_target *t; |
@@ -434,8 +433,8 @@ ip6t_do_table(struct sk_buff **pskb, | |||
434 | back->comefrom); | 433 | back->comefrom); |
435 | continue; | 434 | continue; |
436 | } | 435 | } |
437 | if (table_base + v | 436 | if (table_base + v != (void *)e + e->next_offset |
438 | != (void *)e + e->next_offset) { | 437 | && !(e->ipv6.flags & IP6T_F_GOTO)) { |
439 | /* Save old back ptr in next entry */ | 438 | /* Save old back ptr in next entry */ |
440 | struct ip6t_entry *next | 439 | struct ip6t_entry *next |
441 | = (void *)e + e->next_offset; | 440 | = (void *)e + e->next_offset; |
diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c new file mode 100644 index 000000000000..8f5549b72720 --- /dev/null +++ b/net/ipv6/netfilter/ip6t_HL.c | |||
@@ -0,0 +1,118 @@ | |||
1 | /* | ||
2 | * Hop Limit modification target for ip6tables | ||
3 | * Maciej Soltysiak <solt@dns.toxicfilms.tv> | ||
4 | * Based on HW's TTL module | ||
5 | * | ||
6 | * This software is distributed under the terms of GNU GPL | ||
7 | */ | ||
8 | |||
9 | #include <linux/module.h> | ||
10 | #include <linux/skbuff.h> | ||
11 | #include <linux/ip.h> | ||
12 | |||
13 | #include <linux/netfilter_ipv6/ip6_tables.h> | ||
14 | #include <linux/netfilter_ipv6/ip6t_HL.h> | ||
15 | |||
16 | MODULE_AUTHOR("Maciej Soltysiak <solt@dns.toxicfilms.tv>"); | ||
17 | MODULE_DESCRIPTION("IP tables Hop Limit modification module"); | ||
18 | MODULE_LICENSE("GPL"); | ||
19 | |||
20 | static unsigned int ip6t_hl_target(struct sk_buff **pskb, | ||
21 | const struct net_device *in, | ||
22 | const struct net_device *out, | ||
23 | unsigned int hooknum, | ||
24 | const void *targinfo, void *userinfo) | ||
25 | { | ||
26 | struct ipv6hdr *ip6h; | ||
27 | const struct ip6t_HL_info *info = targinfo; | ||
28 | u_int16_t diffs[2]; | ||
29 | int new_hl; | ||
30 | |||
31 | if (!skb_make_writable(pskb, (*pskb)->len)) | ||
32 | return NF_DROP; | ||
33 | |||
34 | ip6h = (*pskb)->nh.ipv6h; | ||
35 | |||
36 | switch (info->mode) { | ||
37 | case IP6T_HL_SET: | ||
38 | new_hl = info->hop_limit; | ||
39 | break; | ||
40 | case IP6T_HL_INC: | ||
41 | new_hl = ip6h->hop_limit + info->hop_limit; | ||
42 | if (new_hl > 255) | ||
43 | new_hl = 255; | ||
44 | break; | ||
45 | case IP6T_HL_DEC: | ||
46 | new_hl = ip6h->hop_limit - info->hop_limit; | ||
47 | if (new_hl < 0) | ||
48 | new_hl = 0; | ||
49 | break; | ||
50 | default: | ||
51 | new_hl = ip6h->hop_limit; | ||
52 | break; | ||
53 | } | ||
54 | |||
55 | if (new_hl != ip6h->hop_limit) { | ||
56 | diffs[0] = htons(((unsigned)ip6h->hop_limit) << 8) ^ 0xFFFF; | ||
57 | ip6h->hop_limit = new_hl; | ||
58 | diffs[1] = htons(((unsigned)ip6h->hop_limit) << 8); | ||
59 | } | ||
60 | |||
61 | return IP6T_CONTINUE; | ||
62 | } | ||
63 | |||
64 | static int ip6t_hl_checkentry(const char *tablename, | ||
65 | const struct ip6t_entry *e, | ||
66 | void *targinfo, | ||
67 | unsigned int targinfosize, | ||
68 | unsigned int hook_mask) | ||
69 | { | ||
70 | struct ip6t_HL_info *info = targinfo; | ||
71 | |||
72 | if (targinfosize != IP6T_ALIGN(sizeof(struct ip6t_HL_info))) { | ||
73 | printk(KERN_WARNING "ip6t_HL: targinfosize %u != %Zu\n", | ||
74 | targinfosize, | ||
75 | IP6T_ALIGN(sizeof(struct ip6t_HL_info))); | ||
76 | return 0; | ||
77 | } | ||
78 | |||
79 | if (strcmp(tablename, "mangle")) { | ||
80 | printk(KERN_WARNING "ip6t_HL: can only be called from " | ||
81 | "\"mangle\" table, not \"%s\"\n", tablename); | ||
82 | return 0; | ||
83 | } | ||
84 | |||
85 | if (info->mode > IP6T_HL_MAXMODE) { | ||
86 | printk(KERN_WARNING "ip6t_HL: invalid or unknown Mode %u\n", | ||
87 | info->mode); | ||
88 | return 0; | ||
89 | } | ||
90 | |||
91 | if ((info->mode != IP6T_HL_SET) && (info->hop_limit == 0)) { | ||
92 | printk(KERN_WARNING "ip6t_HL: increment/decrement doesn't " | ||
93 | "make sense with value 0\n"); | ||
94 | return 0; | ||
95 | } | ||
96 | |||
97 | return 1; | ||
98 | } | ||
99 | |||
100 | static struct ip6t_target ip6t_HL = { | ||
101 | .name = "HL", | ||
102 | .target = ip6t_hl_target, | ||
103 | .checkentry = ip6t_hl_checkentry, | ||
104 | .me = THIS_MODULE | ||
105 | }; | ||
106 | |||
107 | static int __init init(void) | ||
108 | { | ||
109 | return ip6t_register_target(&ip6t_HL); | ||
110 | } | ||
111 | |||
112 | static void __exit fini(void) | ||
113 | { | ||
114 | ip6t_unregister_target(&ip6t_HL); | ||
115 | } | ||
116 | |||
117 | module_init(init); | ||
118 | module_exit(fini); | ||
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index a692e26a4fa3..0cd1d1bd9033 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c | |||
@@ -26,10 +26,6 @@ MODULE_AUTHOR("Jan Rekorajski <baggins@pld.org.pl>"); | |||
26 | MODULE_DESCRIPTION("IP6 tables LOG target module"); | 26 | MODULE_DESCRIPTION("IP6 tables LOG target module"); |
27 | MODULE_LICENSE("GPL"); | 27 | MODULE_LICENSE("GPL"); |
28 | 28 | ||
29 | static unsigned int nflog = 1; | ||
30 | module_param(nflog, int, 0400); | ||
31 | MODULE_PARM_DESC(nflog, "register as internal netfilter logging module"); | ||
32 | |||
33 | struct in_device; | 29 | struct in_device; |
34 | #include <net/route.h> | 30 | #include <net/route.h> |
35 | #include <linux/netfilter_ipv6/ip6t_LOG.h> | 31 | #include <linux/netfilter_ipv6/ip6t_LOG.h> |
@@ -44,7 +40,7 @@ struct in_device; | |||
44 | static DEFINE_SPINLOCK(log_lock); | 40 | static DEFINE_SPINLOCK(log_lock); |
45 | 41 | ||
46 | /* One level of recursion won't kill us */ | 42 | /* One level of recursion won't kill us */ |
47 | static void dump_packet(const struct ip6t_log_info *info, | 43 | static void dump_packet(const struct nf_loginfo *info, |
48 | const struct sk_buff *skb, unsigned int ip6hoff, | 44 | const struct sk_buff *skb, unsigned int ip6hoff, |
49 | int recurse) | 45 | int recurse) |
50 | { | 46 | { |
@@ -53,6 +49,12 @@ static void dump_packet(const struct ip6t_log_info *info, | |||
53 | struct ipv6hdr _ip6h, *ih; | 49 | struct ipv6hdr _ip6h, *ih; |
54 | unsigned int ptr; | 50 | unsigned int ptr; |
55 | unsigned int hdrlen = 0; | 51 | unsigned int hdrlen = 0; |
52 | unsigned int logflags; | ||
53 | |||
54 | if (info->type == NF_LOG_TYPE_LOG) | ||
55 | logflags = info->u.log.logflags; | ||
56 | else | ||
57 | logflags = NF_LOG_MASK; | ||
56 | 58 | ||
57 | ih = skb_header_pointer(skb, ip6hoff, sizeof(_ip6h), &_ip6h); | 59 | ih = skb_header_pointer(skb, ip6hoff, sizeof(_ip6h), &_ip6h); |
58 | if (ih == NULL) { | 60 | if (ih == NULL) { |
@@ -84,7 +86,7 @@ static void dump_packet(const struct ip6t_log_info *info, | |||
84 | } | 86 | } |
85 | 87 | ||
86 | /* Max length: 48 "OPT (...) " */ | 88 | /* Max length: 48 "OPT (...) " */ |
87 | if (info->logflags & IP6T_LOG_IPOPT) | 89 | if (logflags & IP6T_LOG_IPOPT) |
88 | printk("OPT ( "); | 90 | printk("OPT ( "); |
89 | 91 | ||
90 | switch (currenthdr) { | 92 | switch (currenthdr) { |
@@ -119,7 +121,7 @@ static void dump_packet(const struct ip6t_log_info *info, | |||
119 | case IPPROTO_ROUTING: | 121 | case IPPROTO_ROUTING: |
120 | case IPPROTO_HOPOPTS: | 122 | case IPPROTO_HOPOPTS: |
121 | if (fragment) { | 123 | if (fragment) { |
122 | if (info->logflags & IP6T_LOG_IPOPT) | 124 | if (logflags & IP6T_LOG_IPOPT) |
123 | printk(")"); | 125 | printk(")"); |
124 | return; | 126 | return; |
125 | } | 127 | } |
@@ -127,7 +129,7 @@ static void dump_packet(const struct ip6t_log_info *info, | |||
127 | break; | 129 | break; |
128 | /* Max Length */ | 130 | /* Max Length */ |
129 | case IPPROTO_AH: | 131 | case IPPROTO_AH: |
130 | if (info->logflags & IP6T_LOG_IPOPT) { | 132 | if (logflags & IP6T_LOG_IPOPT) { |
131 | struct ip_auth_hdr _ahdr, *ah; | 133 | struct ip_auth_hdr _ahdr, *ah; |
132 | 134 | ||
133 | /* Max length: 3 "AH " */ | 135 | /* Max length: 3 "AH " */ |
@@ -158,7 +160,7 @@ static void dump_packet(const struct ip6t_log_info *info, | |||
158 | hdrlen = (hp->hdrlen+2)<<2; | 160 | hdrlen = (hp->hdrlen+2)<<2; |
159 | break; | 161 | break; |
160 | case IPPROTO_ESP: | 162 | case IPPROTO_ESP: |
161 | if (info->logflags & IP6T_LOG_IPOPT) { | 163 | if (logflags & IP6T_LOG_IPOPT) { |
162 | struct ip_esp_hdr _esph, *eh; | 164 | struct ip_esp_hdr _esph, *eh; |
163 | 165 | ||
164 | /* Max length: 4 "ESP " */ | 166 | /* Max length: 4 "ESP " */ |
@@ -190,7 +192,7 @@ static void dump_packet(const struct ip6t_log_info *info, | |||
190 | printk("Unknown Ext Hdr %u", currenthdr); | 192 | printk("Unknown Ext Hdr %u", currenthdr); |
191 | return; | 193 | return; |
192 | } | 194 | } |
193 | if (info->logflags & IP6T_LOG_IPOPT) | 195 | if (logflags & IP6T_LOG_IPOPT) |
194 | printk(") "); | 196 | printk(") "); |
195 | 197 | ||
196 | currenthdr = hp->nexthdr; | 198 | currenthdr = hp->nexthdr; |
@@ -218,7 +220,7 @@ static void dump_packet(const struct ip6t_log_info *info, | |||
218 | printk("SPT=%u DPT=%u ", | 220 | printk("SPT=%u DPT=%u ", |
219 | ntohs(th->source), ntohs(th->dest)); | 221 | ntohs(th->source), ntohs(th->dest)); |
220 | /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */ | 222 | /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */ |
221 | if (info->logflags & IP6T_LOG_TCPSEQ) | 223 | if (logflags & IP6T_LOG_TCPSEQ) |
222 | printk("SEQ=%u ACK=%u ", | 224 | printk("SEQ=%u ACK=%u ", |
223 | ntohl(th->seq), ntohl(th->ack_seq)); | 225 | ntohl(th->seq), ntohl(th->ack_seq)); |
224 | /* Max length: 13 "WINDOW=65535 " */ | 226 | /* Max length: 13 "WINDOW=65535 " */ |
@@ -245,7 +247,7 @@ static void dump_packet(const struct ip6t_log_info *info, | |||
245 | /* Max length: 11 "URGP=65535 " */ | 247 | /* Max length: 11 "URGP=65535 " */ |
246 | printk("URGP=%u ", ntohs(th->urg_ptr)); | 248 | printk("URGP=%u ", ntohs(th->urg_ptr)); |
247 | 249 | ||
248 | if ((info->logflags & IP6T_LOG_TCPOPT) | 250 | if ((logflags & IP6T_LOG_TCPOPT) |
249 | && th->doff * 4 > sizeof(struct tcphdr)) { | 251 | && th->doff * 4 > sizeof(struct tcphdr)) { |
250 | u_int8_t _opt[60 - sizeof(struct tcphdr)], *op; | 252 | u_int8_t _opt[60 - sizeof(struct tcphdr)], *op; |
251 | unsigned int i; | 253 | unsigned int i; |
@@ -349,7 +351,7 @@ static void dump_packet(const struct ip6t_log_info *info, | |||
349 | } | 351 | } |
350 | 352 | ||
351 | /* Max length: 15 "UID=4294967295 " */ | 353 | /* Max length: 15 "UID=4294967295 " */ |
352 | if ((info->logflags & IP6T_LOG_UID) && recurse && skb->sk) { | 354 | if ((logflags & IP6T_LOG_UID) && recurse && skb->sk) { |
353 | read_lock_bh(&skb->sk->sk_callback_lock); | 355 | read_lock_bh(&skb->sk->sk_callback_lock); |
354 | if (skb->sk->sk_socket && skb->sk->sk_socket->file) | 356 | if (skb->sk->sk_socket && skb->sk->sk_socket->file) |
355 | printk("UID=%u ", skb->sk->sk_socket->file->f_uid); | 357 | printk("UID=%u ", skb->sk->sk_socket->file->f_uid); |
@@ -357,19 +359,31 @@ static void dump_packet(const struct ip6t_log_info *info, | |||
357 | } | 359 | } |
358 | } | 360 | } |
359 | 361 | ||
362 | static struct nf_loginfo default_loginfo = { | ||
363 | .type = NF_LOG_TYPE_LOG, | ||
364 | .u = { | ||
365 | .log = { | ||
366 | .level = 0, | ||
367 | .logflags = NF_LOG_MASK, | ||
368 | }, | ||
369 | }, | ||
370 | }; | ||
371 | |||
360 | static void | 372 | static void |
361 | ip6t_log_packet(unsigned int hooknum, | 373 | ip6t_log_packet(unsigned int pf, |
374 | unsigned int hooknum, | ||
362 | const struct sk_buff *skb, | 375 | const struct sk_buff *skb, |
363 | const struct net_device *in, | 376 | const struct net_device *in, |
364 | const struct net_device *out, | 377 | const struct net_device *out, |
365 | const struct ip6t_log_info *loginfo, | 378 | const struct nf_loginfo *loginfo, |
366 | const char *level_string, | ||
367 | const char *prefix) | 379 | const char *prefix) |
368 | { | 380 | { |
381 | if (!loginfo) | ||
382 | loginfo = &default_loginfo; | ||
383 | |||
369 | spin_lock_bh(&log_lock); | 384 | spin_lock_bh(&log_lock); |
370 | printk(level_string); | 385 | printk("<%d>%sIN=%s OUT=%s ", loginfo->u.log.level, |
371 | printk("%sIN=%s OUT=%s ", | 386 | prefix, |
372 | prefix == NULL ? loginfo->prefix : prefix, | ||
373 | in ? in->name : "", | 387 | in ? in->name : "", |
374 | out ? out->name : ""); | 388 | out ? out->name : ""); |
375 | if (in && !out) { | 389 | if (in && !out) { |
@@ -416,29 +430,17 @@ ip6t_log_target(struct sk_buff **pskb, | |||
416 | void *userinfo) | 430 | void *userinfo) |
417 | { | 431 | { |
418 | const struct ip6t_log_info *loginfo = targinfo; | 432 | const struct ip6t_log_info *loginfo = targinfo; |
419 | char level_string[4] = "< >"; | 433 | struct nf_loginfo li; |
434 | |||
435 | li.type = NF_LOG_TYPE_LOG; | ||
436 | li.u.log.level = loginfo->level; | ||
437 | li.u.log.logflags = loginfo->logflags; | ||
420 | 438 | ||
421 | level_string[1] = '0' + (loginfo->level % 8); | 439 | nf_log_packet(PF_INET6, hooknum, *pskb, in, out, &li, loginfo->prefix); |
422 | ip6t_log_packet(hooknum, *pskb, in, out, loginfo, level_string, NULL); | ||
423 | 440 | ||
424 | return IP6T_CONTINUE; | 441 | return IP6T_CONTINUE; |
425 | } | 442 | } |
426 | 443 | ||
427 | static void | ||
428 | ip6t_logfn(unsigned int hooknum, | ||
429 | const struct sk_buff *skb, | ||
430 | const struct net_device *in, | ||
431 | const struct net_device *out, | ||
432 | const char *prefix) | ||
433 | { | ||
434 | struct ip6t_log_info loginfo = { | ||
435 | .level = 0, | ||
436 | .logflags = IP6T_LOG_MASK, | ||
437 | .prefix = "" | ||
438 | }; | ||
439 | |||
440 | ip6t_log_packet(hooknum, skb, in, out, &loginfo, KERN_WARNING, prefix); | ||
441 | } | ||
442 | 444 | ||
443 | static int ip6t_log_checkentry(const char *tablename, | 445 | static int ip6t_log_checkentry(const char *tablename, |
444 | const struct ip6t_entry *e, | 446 | const struct ip6t_entry *e, |
@@ -475,20 +477,29 @@ static struct ip6t_target ip6t_log_reg = { | |||
475 | .me = THIS_MODULE, | 477 | .me = THIS_MODULE, |
476 | }; | 478 | }; |
477 | 479 | ||
480 | static struct nf_logger ip6t_logger = { | ||
481 | .name = "ip6t_LOG", | ||
482 | .logfn = &ip6t_log_packet, | ||
483 | .me = THIS_MODULE, | ||
484 | }; | ||
485 | |||
478 | static int __init init(void) | 486 | static int __init init(void) |
479 | { | 487 | { |
480 | if (ip6t_register_target(&ip6t_log_reg)) | 488 | if (ip6t_register_target(&ip6t_log_reg)) |
481 | return -EINVAL; | 489 | return -EINVAL; |
482 | if (nflog) | 490 | if (nf_log_register(PF_INET6, &ip6t_logger) < 0) { |
483 | nf_log_register(PF_INET6, &ip6t_logfn); | 491 | printk(KERN_WARNING "ip6t_LOG: not logging via system console " |
492 | "since somebody else already registered for PF_INET6\n"); | ||
493 | /* we cannot make module load fail here, since otherwise | ||
494 | * ip6tables userspace would abort */ | ||
495 | } | ||
484 | 496 | ||
485 | return 0; | 497 | return 0; |
486 | } | 498 | } |
487 | 499 | ||
488 | static void __exit fini(void) | 500 | static void __exit fini(void) |
489 | { | 501 | { |
490 | if (nflog) | 502 | nf_log_unregister_logger(&ip6t_logger); |
491 | nf_log_unregister(PF_INET6, &ip6t_logfn); | ||
492 | ip6t_unregister_target(&ip6t_log_reg); | 503 | ip6t_unregister_target(&ip6t_log_reg); |
493 | } | 504 | } |
494 | 505 | ||
diff --git a/net/ipv6/netfilter/ip6t_MARK.c b/net/ipv6/netfilter/ip6t_MARK.c index d09ceb05013a..81924fcc5857 100644 --- a/net/ipv6/netfilter/ip6t_MARK.c +++ b/net/ipv6/netfilter/ip6t_MARK.c | |||
@@ -28,10 +28,9 @@ target(struct sk_buff **pskb, | |||
28 | { | 28 | { |
29 | const struct ip6t_mark_target_info *markinfo = targinfo; | 29 | const struct ip6t_mark_target_info *markinfo = targinfo; |
30 | 30 | ||
31 | if((*pskb)->nfmark != markinfo->mark) { | 31 | if((*pskb)->nfmark != markinfo->mark) |
32 | (*pskb)->nfmark = markinfo->mark; | 32 | (*pskb)->nfmark = markinfo->mark; |
33 | (*pskb)->nfcache |= NFC_ALTERED; | 33 | |
34 | } | ||
35 | return IP6T_CONTINUE; | 34 | return IP6T_CONTINUE; |
36 | } | 35 | } |
37 | 36 | ||
diff --git a/net/ipv6/netfilter/ip6t_NFQUEUE.c b/net/ipv6/netfilter/ip6t_NFQUEUE.c new file mode 100644 index 000000000000..c6e3730e7409 --- /dev/null +++ b/net/ipv6/netfilter/ip6t_NFQUEUE.c | |||
@@ -0,0 +1,70 @@ | |||
1 | /* ip6tables module for using new netfilter netlink queue | ||
2 | * | ||
3 | * (C) 2005 by Harald Welte <laforge@netfilter.org> | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License version 2 as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | */ | ||
10 | |||
11 | #include <linux/module.h> | ||
12 | #include <linux/skbuff.h> | ||
13 | |||
14 | #include <linux/netfilter.h> | ||
15 | #include <linux/netfilter_ipv6/ip6_tables.h> | ||
16 | #include <linux/netfilter_ipv4/ipt_NFQUEUE.h> | ||
17 | |||
18 | MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); | ||
19 | MODULE_DESCRIPTION("ip6tables NFQUEUE target"); | ||
20 | MODULE_LICENSE("GPL"); | ||
21 | |||
22 | static unsigned int | ||
23 | target(struct sk_buff **pskb, | ||
24 | const struct net_device *in, | ||
25 | const struct net_device *out, | ||
26 | unsigned int hooknum, | ||
27 | const void *targinfo, | ||
28 | void *userinfo) | ||
29 | { | ||
30 | const struct ipt_NFQ_info *tinfo = targinfo; | ||
31 | |||
32 | return NF_QUEUE_NR(tinfo->queuenum); | ||
33 | } | ||
34 | |||
35 | static int | ||
36 | checkentry(const char *tablename, | ||
37 | const struct ip6t_entry *e, | ||
38 | void *targinfo, | ||
39 | unsigned int targinfosize, | ||
40 | unsigned int hook_mask) | ||
41 | { | ||
42 | if (targinfosize != IP6T_ALIGN(sizeof(struct ipt_NFQ_info))) { | ||
43 | printk(KERN_WARNING "NFQUEUE: targinfosize %u != %Zu\n", | ||
44 | targinfosize, | ||
45 | IP6T_ALIGN(sizeof(struct ipt_NFQ_info))); | ||
46 | return 0; | ||
47 | } | ||
48 | |||
49 | return 1; | ||
50 | } | ||
51 | |||
52 | static struct ip6t_target ipt_NFQ_reg = { | ||
53 | .name = "NFQUEUE", | ||
54 | .target = target, | ||
55 | .checkentry = checkentry, | ||
56 | .me = THIS_MODULE, | ||
57 | }; | ||
58 | |||
59 | static int __init init(void) | ||
60 | { | ||
61 | return ip6t_register_target(&ipt_NFQ_reg); | ||
62 | } | ||
63 | |||
64 | static void __exit fini(void) | ||
65 | { | ||
66 | ip6t_unregister_target(&ipt_NFQ_reg); | ||
67 | } | ||
68 | |||
69 | module_init(init); | ||
70 | module_exit(fini); | ||
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c new file mode 100644 index 000000000000..14316c3ebde4 --- /dev/null +++ b/net/ipv6/netfilter/ip6t_REJECT.c | |||
@@ -0,0 +1,284 @@ | |||
1 | /* | ||
2 | * IP6 tables REJECT target module | ||
3 | * Linux INET6 implementation | ||
4 | * | ||
5 | * Copyright (C)2003 USAGI/WIDE Project | ||
6 | * | ||
7 | * Authors: | ||
8 | * Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp> | ||
9 | * | ||
10 | * Based on net/ipv4/netfilter/ipt_REJECT.c | ||
11 | * | ||
12 | * This program is free software; you can redistribute it and/or | ||
13 | * modify it under the terms of the GNU General Public License | ||
14 | * as published by the Free Software Foundation; either version | ||
15 | * 2 of the License, or (at your option) any later version. | ||
16 | */ | ||
17 | |||
18 | #include <linux/config.h> | ||
19 | #include <linux/module.h> | ||
20 | #include <linux/skbuff.h> | ||
21 | #include <linux/icmpv6.h> | ||
22 | #include <linux/netdevice.h> | ||
23 | #include <net/ipv6.h> | ||
24 | #include <net/tcp.h> | ||
25 | #include <net/icmp.h> | ||
26 | #include <net/ip6_checksum.h> | ||
27 | #include <net/ip6_fib.h> | ||
28 | #include <net/ip6_route.h> | ||
29 | #include <net/flow.h> | ||
30 | #include <linux/netfilter_ipv6/ip6_tables.h> | ||
31 | #include <linux/netfilter_ipv6/ip6t_REJECT.h> | ||
32 | |||
33 | MODULE_AUTHOR("Yasuyuki KOZAKAI <yasuyuki.kozakai@toshiba.co.jp>"); | ||
34 | MODULE_DESCRIPTION("IP6 tables REJECT target module"); | ||
35 | MODULE_LICENSE("GPL"); | ||
36 | |||
37 | #if 0 | ||
38 | #define DEBUGP printk | ||
39 | #else | ||
40 | #define DEBUGP(format, args...) | ||
41 | #endif | ||
42 | |||
43 | /* Send RST reply */ | ||
44 | static void send_reset(struct sk_buff *oldskb) | ||
45 | { | ||
46 | struct sk_buff *nskb; | ||
47 | struct tcphdr otcph, *tcph; | ||
48 | unsigned int otcplen, hh_len; | ||
49 | int tcphoff, needs_ack; | ||
50 | struct ipv6hdr *oip6h = oldskb->nh.ipv6h, *ip6h; | ||
51 | struct dst_entry *dst = NULL; | ||
52 | u8 proto; | ||
53 | struct flowi fl; | ||
54 | |||
55 | if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) || | ||
56 | (!(ipv6_addr_type(&oip6h->daddr) & IPV6_ADDR_UNICAST))) { | ||
57 | DEBUGP("ip6t_REJECT: addr is not unicast.\n"); | ||
58 | return; | ||
59 | } | ||
60 | |||
61 | proto = oip6h->nexthdr; | ||
62 | tcphoff = ipv6_skip_exthdr(oldskb, ((u8*)(oip6h+1) - oldskb->data), &proto); | ||
63 | |||
64 | if ((tcphoff < 0) || (tcphoff > oldskb->len)) { | ||
65 | DEBUGP("ip6t_REJECT: Can't get TCP header.\n"); | ||
66 | return; | ||
67 | } | ||
68 | |||
69 | otcplen = oldskb->len - tcphoff; | ||
70 | |||
71 | /* IP header checks: fragment, too short. */ | ||
72 | if ((proto != IPPROTO_TCP) || (otcplen < sizeof(struct tcphdr))) { | ||
73 | DEBUGP("ip6t_REJECT: proto(%d) != IPPROTO_TCP, or too short. otcplen = %d\n", | ||
74 | proto, otcplen); | ||
75 | return; | ||
76 | } | ||
77 | |||
78 | if (skb_copy_bits(oldskb, tcphoff, &otcph, sizeof(struct tcphdr))) | ||
79 | BUG(); | ||
80 | |||
81 | /* No RST for RST. */ | ||
82 | if (otcph.rst) { | ||
83 | DEBUGP("ip6t_REJECT: RST is set\n"); | ||
84 | return; | ||
85 | } | ||
86 | |||
87 | /* Check checksum. */ | ||
88 | if (csum_ipv6_magic(&oip6h->saddr, &oip6h->daddr, otcplen, IPPROTO_TCP, | ||
89 | skb_checksum(oldskb, tcphoff, otcplen, 0))) { | ||
90 | DEBUGP("ip6t_REJECT: TCP checksum is invalid\n"); | ||
91 | return; | ||
92 | } | ||
93 | |||
94 | memset(&fl, 0, sizeof(fl)); | ||
95 | fl.proto = IPPROTO_TCP; | ||
96 | ipv6_addr_copy(&fl.fl6_src, &oip6h->daddr); | ||
97 | ipv6_addr_copy(&fl.fl6_dst, &oip6h->saddr); | ||
98 | fl.fl_ip_sport = otcph.dest; | ||
99 | fl.fl_ip_dport = otcph.source; | ||
100 | dst = ip6_route_output(NULL, &fl); | ||
101 | if (dst == NULL) | ||
102 | return; | ||
103 | if (dst->error || | ||
104 | xfrm_lookup(&dst, &fl, NULL, 0)) { | ||
105 | dst_release(dst); | ||
106 | return; | ||
107 | } | ||
108 | |||
109 | hh_len = (dst->dev->hard_header_len + 15)&~15; | ||
110 | nskb = alloc_skb(hh_len + 15 + dst->header_len + sizeof(struct ipv6hdr) | ||
111 | + sizeof(struct tcphdr) + dst->trailer_len, | ||
112 | GFP_ATOMIC); | ||
113 | |||
114 | if (!nskb) { | ||
115 | if (net_ratelimit()) | ||
116 | printk("ip6t_REJECT: Can't alloc skb\n"); | ||
117 | dst_release(dst); | ||
118 | return; | ||
119 | } | ||
120 | |||
121 | nskb->dst = dst; | ||
122 | |||
123 | skb_reserve(nskb, hh_len + dst->header_len); | ||
124 | |||
125 | ip6h = nskb->nh.ipv6h = (struct ipv6hdr *) | ||
126 | skb_put(nskb, sizeof(struct ipv6hdr)); | ||
127 | ip6h->version = 6; | ||
128 | ip6h->hop_limit = dst_metric(dst, RTAX_HOPLIMIT); | ||
129 | ip6h->nexthdr = IPPROTO_TCP; | ||
130 | ip6h->payload_len = htons(sizeof(struct tcphdr)); | ||
131 | ipv6_addr_copy(&ip6h->saddr, &oip6h->daddr); | ||
132 | ipv6_addr_copy(&ip6h->daddr, &oip6h->saddr); | ||
133 | |||
134 | tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr)); | ||
135 | /* Truncate to length (no data) */ | ||
136 | tcph->doff = sizeof(struct tcphdr)/4; | ||
137 | tcph->source = otcph.dest; | ||
138 | tcph->dest = otcph.source; | ||
139 | |||
140 | if (otcph.ack) { | ||
141 | needs_ack = 0; | ||
142 | tcph->seq = otcph.ack_seq; | ||
143 | tcph->ack_seq = 0; | ||
144 | } else { | ||
145 | needs_ack = 1; | ||
146 | tcph->ack_seq = htonl(ntohl(otcph.seq) + otcph.syn + otcph.fin | ||
147 | + otcplen - (otcph.doff<<2)); | ||
148 | tcph->seq = 0; | ||
149 | } | ||
150 | |||
151 | /* Reset flags */ | ||
152 | ((u_int8_t *)tcph)[13] = 0; | ||
153 | tcph->rst = 1; | ||
154 | tcph->ack = needs_ack; | ||
155 | tcph->window = 0; | ||
156 | tcph->urg_ptr = 0; | ||
157 | tcph->check = 0; | ||
158 | |||
159 | /* Adjust TCP checksum */ | ||
160 | tcph->check = csum_ipv6_magic(&nskb->nh.ipv6h->saddr, | ||
161 | &nskb->nh.ipv6h->daddr, | ||
162 | sizeof(struct tcphdr), IPPROTO_TCP, | ||
163 | csum_partial((char *)tcph, | ||
164 | sizeof(struct tcphdr), 0)); | ||
165 | |||
166 | NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, nskb, NULL, nskb->dst->dev, | ||
167 | dst_output); | ||
168 | } | ||
169 | |||
170 | static inline void | ||
171 | send_unreach(struct sk_buff *skb_in, unsigned char code, unsigned int hooknum) | ||
172 | { | ||
173 | if (hooknum == NF_IP6_LOCAL_OUT && skb_in->dev == NULL) | ||
174 | skb_in->dev = &loopback_dev; | ||
175 | |||
176 | icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0, NULL); | ||
177 | } | ||
178 | |||
179 | static unsigned int reject6_target(struct sk_buff **pskb, | ||
180 | const struct net_device *in, | ||
181 | const struct net_device *out, | ||
182 | unsigned int hooknum, | ||
183 | const void *targinfo, | ||
184 | void *userinfo) | ||
185 | { | ||
186 | const struct ip6t_reject_info *reject = targinfo; | ||
187 | |||
188 | DEBUGP(KERN_DEBUG "%s: medium point\n", __FUNCTION__); | ||
189 | /* WARNING: This code causes reentry within ip6tables. | ||
190 | This means that the ip6tables jump stack is now crap. We | ||
191 | must return an absolute verdict. --RR */ | ||
192 | switch (reject->with) { | ||
193 | case IP6T_ICMP6_NO_ROUTE: | ||
194 | send_unreach(*pskb, ICMPV6_NOROUTE, hooknum); | ||
195 | break; | ||
196 | case IP6T_ICMP6_ADM_PROHIBITED: | ||
197 | send_unreach(*pskb, ICMPV6_ADM_PROHIBITED, hooknum); | ||
198 | break; | ||
199 | case IP6T_ICMP6_NOT_NEIGHBOUR: | ||
200 | send_unreach(*pskb, ICMPV6_NOT_NEIGHBOUR, hooknum); | ||
201 | break; | ||
202 | case IP6T_ICMP6_ADDR_UNREACH: | ||
203 | send_unreach(*pskb, ICMPV6_ADDR_UNREACH, hooknum); | ||
204 | break; | ||
205 | case IP6T_ICMP6_PORT_UNREACH: | ||
206 | send_unreach(*pskb, ICMPV6_PORT_UNREACH, hooknum); | ||
207 | break; | ||
208 | case IP6T_ICMP6_ECHOREPLY: | ||
209 | /* Do nothing */ | ||
210 | break; | ||
211 | case IP6T_TCP_RESET: | ||
212 | send_reset(*pskb); | ||
213 | break; | ||
214 | default: | ||
215 | if (net_ratelimit()) | ||
216 | printk(KERN_WARNING "ip6t_REJECT: case %u not handled yet\n", reject->with); | ||
217 | break; | ||
218 | } | ||
219 | |||
220 | return NF_DROP; | ||
221 | } | ||
222 | |||
223 | static int check(const char *tablename, | ||
224 | const struct ip6t_entry *e, | ||
225 | void *targinfo, | ||
226 | unsigned int targinfosize, | ||
227 | unsigned int hook_mask) | ||
228 | { | ||
229 | const struct ip6t_reject_info *rejinfo = targinfo; | ||
230 | |||
231 | if (targinfosize != IP6T_ALIGN(sizeof(struct ip6t_reject_info))) { | ||
232 | DEBUGP("ip6t_REJECT: targinfosize %u != 0\n", targinfosize); | ||
233 | return 0; | ||
234 | } | ||
235 | |||
236 | /* Only allow these for packet filtering. */ | ||
237 | if (strcmp(tablename, "filter") != 0) { | ||
238 | DEBUGP("ip6t_REJECT: bad table `%s'.\n", tablename); | ||
239 | return 0; | ||
240 | } | ||
241 | |||
242 | if ((hook_mask & ~((1 << NF_IP6_LOCAL_IN) | ||
243 | | (1 << NF_IP6_FORWARD) | ||
244 | | (1 << NF_IP6_LOCAL_OUT))) != 0) { | ||
245 | DEBUGP("ip6t_REJECT: bad hook mask %X\n", hook_mask); | ||
246 | return 0; | ||
247 | } | ||
248 | |||
249 | if (rejinfo->with == IP6T_ICMP6_ECHOREPLY) { | ||
250 | printk("ip6t_REJECT: ECHOREPLY is not supported.\n"); | ||
251 | return 0; | ||
252 | } else if (rejinfo->with == IP6T_TCP_RESET) { | ||
253 | /* Must specify that it's a TCP packet */ | ||
254 | if (e->ipv6.proto != IPPROTO_TCP | ||
255 | || (e->ipv6.invflags & IP6T_INV_PROTO)) { | ||
256 | DEBUGP("ip6t_REJECT: TCP_RESET illegal for non-tcp\n"); | ||
257 | return 0; | ||
258 | } | ||
259 | } | ||
260 | |||
261 | return 1; | ||
262 | } | ||
263 | |||
264 | static struct ip6t_target ip6t_reject_reg = { | ||
265 | .name = "REJECT", | ||
266 | .target = reject6_target, | ||
267 | .checkentry = check, | ||
268 | .me = THIS_MODULE | ||
269 | }; | ||
270 | |||
271 | static int __init init(void) | ||
272 | { | ||
273 | if (ip6t_register_target(&ip6t_reject_reg)) | ||
274 | return -EINVAL; | ||
275 | return 0; | ||
276 | } | ||
277 | |||
278 | static void __exit fini(void) | ||
279 | { | ||
280 | ip6t_unregister_target(&ip6t_reject_reg); | ||
281 | } | ||
282 | |||
283 | module_init(init); | ||
284 | module_exit(fini); | ||
diff --git a/net/ipv6/netfilter/ip6t_owner.c b/net/ipv6/netfilter/ip6t_owner.c index ab0e32d3de46..9b91decbfddb 100644 --- a/net/ipv6/netfilter/ip6t_owner.c +++ b/net/ipv6/netfilter/ip6t_owner.c | |||
@@ -20,71 +20,6 @@ MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>"); | |||
20 | MODULE_DESCRIPTION("IP6 tables owner matching module"); | 20 | MODULE_DESCRIPTION("IP6 tables owner matching module"); |
21 | MODULE_LICENSE("GPL"); | 21 | MODULE_LICENSE("GPL"); |
22 | 22 | ||
23 | static int | ||
24 | match_pid(const struct sk_buff *skb, pid_t pid) | ||
25 | { | ||
26 | struct task_struct *p; | ||
27 | struct files_struct *files; | ||
28 | int i; | ||
29 | |||
30 | read_lock(&tasklist_lock); | ||
31 | p = find_task_by_pid(pid); | ||
32 | if (!p) | ||
33 | goto out; | ||
34 | task_lock(p); | ||
35 | files = p->files; | ||
36 | if(files) { | ||
37 | spin_lock(&files->file_lock); | ||
38 | for (i=0; i < files->max_fds; i++) { | ||
39 | if (fcheck_files(files, i) == skb->sk->sk_socket->file) { | ||
40 | spin_unlock(&files->file_lock); | ||
41 | task_unlock(p); | ||
42 | read_unlock(&tasklist_lock); | ||
43 | return 1; | ||
44 | } | ||
45 | } | ||
46 | spin_unlock(&files->file_lock); | ||
47 | } | ||
48 | task_unlock(p); | ||
49 | out: | ||
50 | read_unlock(&tasklist_lock); | ||
51 | return 0; | ||
52 | } | ||
53 | |||
54 | static int | ||
55 | match_sid(const struct sk_buff *skb, pid_t sid) | ||
56 | { | ||
57 | struct task_struct *g, *p; | ||
58 | struct file *file = skb->sk->sk_socket->file; | ||
59 | int i, found=0; | ||
60 | |||
61 | read_lock(&tasklist_lock); | ||
62 | do_each_thread(g, p) { | ||
63 | struct files_struct *files; | ||
64 | if (p->signal->session != sid) | ||
65 | continue; | ||
66 | |||
67 | task_lock(p); | ||
68 | files = p->files; | ||
69 | if (files) { | ||
70 | spin_lock(&files->file_lock); | ||
71 | for (i=0; i < files->max_fds; i++) { | ||
72 | if (fcheck_files(files, i) == file) { | ||
73 | found = 1; | ||
74 | break; | ||
75 | } | ||
76 | } | ||
77 | spin_unlock(&files->file_lock); | ||
78 | } | ||
79 | task_unlock(p); | ||
80 | if (found) | ||
81 | goto out; | ||
82 | } while_each_thread(g, p); | ||
83 | out: | ||
84 | read_unlock(&tasklist_lock); | ||
85 | |||
86 | return found; | ||
87 | } | ||
88 | 23 | ||
89 | static int | 24 | static int |
90 | match(const struct sk_buff *skb, | 25 | match(const struct sk_buff *skb, |
@@ -112,18 +47,6 @@ match(const struct sk_buff *skb, | |||
112 | return 0; | 47 | return 0; |
113 | } | 48 | } |
114 | 49 | ||
115 | if(info->match & IP6T_OWNER_PID) { | ||
116 | if (!match_pid(skb, info->pid) ^ | ||
117 | !!(info->invert & IP6T_OWNER_PID)) | ||
118 | return 0; | ||
119 | } | ||
120 | |||
121 | if(info->match & IP6T_OWNER_SID) { | ||
122 | if (!match_sid(skb, info->sid) ^ | ||
123 | !!(info->invert & IP6T_OWNER_SID)) | ||
124 | return 0; | ||
125 | } | ||
126 | |||
127 | return 1; | 50 | return 1; |
128 | } | 51 | } |
129 | 52 | ||
@@ -134,6 +57,8 @@ checkentry(const char *tablename, | |||
134 | unsigned int matchsize, | 57 | unsigned int matchsize, |
135 | unsigned int hook_mask) | 58 | unsigned int hook_mask) |
136 | { | 59 | { |
60 | const struct ip6t_owner_info *info = matchinfo; | ||
61 | |||
137 | if (hook_mask | 62 | if (hook_mask |
138 | & ~((1 << NF_IP6_LOCAL_OUT) | (1 << NF_IP6_POST_ROUTING))) { | 63 | & ~((1 << NF_IP6_LOCAL_OUT) | (1 << NF_IP6_POST_ROUTING))) { |
139 | printk("ip6t_owner: only valid for LOCAL_OUT or POST_ROUTING.\n"); | 64 | printk("ip6t_owner: only valid for LOCAL_OUT or POST_ROUTING.\n"); |
@@ -142,14 +67,13 @@ checkentry(const char *tablename, | |||
142 | 67 | ||
143 | if (matchsize != IP6T_ALIGN(sizeof(struct ip6t_owner_info))) | 68 | if (matchsize != IP6T_ALIGN(sizeof(struct ip6t_owner_info))) |
144 | return 0; | 69 | return 0; |
145 | #ifdef CONFIG_SMP | 70 | |
146 | /* files->file_lock can not be used in a BH */ | 71 | if (info->match & (IP6T_OWNER_PID|IP6T_OWNER_SID)) { |
147 | if (((struct ip6t_owner_info *)matchinfo)->match | 72 | printk("ipt_owner: pid and sid matching " |
148 | & (IP6T_OWNER_PID|IP6T_OWNER_SID)) { | 73 | "not supported anymore\n"); |
149 | printk("ip6t_owner: pid and sid matching is broken on SMP.\n"); | ||
150 | return 0; | 74 | return 0; |
151 | } | 75 | } |
152 | #endif | 76 | |
153 | return 1; | 77 | return 1; |
154 | } | 78 | } |
155 | 79 | ||
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 1d4d75b34d32..7a5863298f3f 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c | |||
@@ -49,6 +49,7 @@ | |||
49 | #include <net/transp_v6.h> | 49 | #include <net/transp_v6.h> |
50 | #include <net/udp.h> | 50 | #include <net/udp.h> |
51 | #include <net/inet_common.h> | 51 | #include <net/inet_common.h> |
52 | #include <net/tcp_states.h> | ||
52 | 53 | ||
53 | #include <net/rawv6.h> | 54 | #include <net/rawv6.h> |
54 | #include <net/xfrm.h> | 55 | #include <net/xfrm.h> |
@@ -81,7 +82,8 @@ static void raw_v6_unhash(struct sock *sk) | |||
81 | 82 | ||
82 | /* Grumble... icmp and ip_input want to get at this... */ | 83 | /* Grumble... icmp and ip_input want to get at this... */ |
83 | struct sock *__raw_v6_lookup(struct sock *sk, unsigned short num, | 84 | struct sock *__raw_v6_lookup(struct sock *sk, unsigned short num, |
84 | struct in6_addr *loc_addr, struct in6_addr *rmt_addr) | 85 | struct in6_addr *loc_addr, struct in6_addr *rmt_addr, |
86 | int dif) | ||
85 | { | 87 | { |
86 | struct hlist_node *node; | 88 | struct hlist_node *node; |
87 | int is_multicast = ipv6_addr_is_multicast(loc_addr); | 89 | int is_multicast = ipv6_addr_is_multicast(loc_addr); |
@@ -94,6 +96,9 @@ struct sock *__raw_v6_lookup(struct sock *sk, unsigned short num, | |||
94 | !ipv6_addr_equal(&np->daddr, rmt_addr)) | 96 | !ipv6_addr_equal(&np->daddr, rmt_addr)) |
95 | continue; | 97 | continue; |
96 | 98 | ||
99 | if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif) | ||
100 | continue; | ||
101 | |||
97 | if (!ipv6_addr_any(&np->rcv_saddr)) { | 102 | if (!ipv6_addr_any(&np->rcv_saddr)) { |
98 | if (ipv6_addr_equal(&np->rcv_saddr, loc_addr)) | 103 | if (ipv6_addr_equal(&np->rcv_saddr, loc_addr)) |
99 | goto found; | 104 | goto found; |
@@ -137,11 +142,12 @@ static __inline__ int icmpv6_filter(struct sock *sk, struct sk_buff *skb) | |||
137 | * | 142 | * |
138 | * Caller owns SKB so we must make clones. | 143 | * Caller owns SKB so we must make clones. |
139 | */ | 144 | */ |
140 | void ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) | 145 | int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) |
141 | { | 146 | { |
142 | struct in6_addr *saddr; | 147 | struct in6_addr *saddr; |
143 | struct in6_addr *daddr; | 148 | struct in6_addr *daddr; |
144 | struct sock *sk; | 149 | struct sock *sk; |
150 | int delivered = 0; | ||
145 | __u8 hash; | 151 | __u8 hash; |
146 | 152 | ||
147 | saddr = &skb->nh.ipv6h->saddr; | 153 | saddr = &skb->nh.ipv6h->saddr; |
@@ -160,9 +166,10 @@ void ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) | |||
160 | if (sk == NULL) | 166 | if (sk == NULL) |
161 | goto out; | 167 | goto out; |
162 | 168 | ||
163 | sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr); | 169 | sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr, skb->dev->ifindex); |
164 | 170 | ||
165 | while (sk) { | 171 | while (sk) { |
172 | delivered = 1; | ||
166 | if (nexthdr != IPPROTO_ICMPV6 || !icmpv6_filter(sk, skb)) { | 173 | if (nexthdr != IPPROTO_ICMPV6 || !icmpv6_filter(sk, skb)) { |
167 | struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC); | 174 | struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC); |
168 | 175 | ||
@@ -170,10 +177,12 @@ void ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) | |||
170 | if (clone) | 177 | if (clone) |
171 | rawv6_rcv(sk, clone); | 178 | rawv6_rcv(sk, clone); |
172 | } | 179 | } |
173 | sk = __raw_v6_lookup(sk_next(sk), nexthdr, daddr, saddr); | 180 | sk = __raw_v6_lookup(sk_next(sk), nexthdr, daddr, saddr, |
181 | skb->dev->ifindex); | ||
174 | } | 182 | } |
175 | out: | 183 | out: |
176 | read_unlock(&raw_v6_lock); | 184 | read_unlock(&raw_v6_lock); |
185 | return delivered; | ||
177 | } | 186 | } |
178 | 187 | ||
179 | /* This cleans up af_inet6 a bit. -DaveM */ | 188 | /* This cleans up af_inet6 a bit. -DaveM */ |
@@ -334,8 +343,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb) | |||
334 | if (csum_ipv6_magic(&skb->nh.ipv6h->saddr, | 343 | if (csum_ipv6_magic(&skb->nh.ipv6h->saddr, |
335 | &skb->nh.ipv6h->daddr, | 344 | &skb->nh.ipv6h->daddr, |
336 | skb->len, inet->num, skb->csum)) { | 345 | skb->len, inet->num, skb->csum)) { |
337 | LIMIT_NETDEBUG( | 346 | LIMIT_NETDEBUG(KERN_DEBUG "raw v6 hw csum failure.\n"); |
338 | printk(KERN_DEBUG "raw v6 hw csum failure.\n")); | ||
339 | skb->ip_summed = CHECKSUM_NONE; | 347 | skb->ip_summed = CHECKSUM_NONE; |
340 | } | 348 | } |
341 | } | 349 | } |
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 59e7c6317872..9d9e04344c77 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c | |||
@@ -562,7 +562,7 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, | |||
562 | if (skb->dev) | 562 | if (skb->dev) |
563 | fq->iif = skb->dev->ifindex; | 563 | fq->iif = skb->dev->ifindex; |
564 | skb->dev = NULL; | 564 | skb->dev = NULL; |
565 | fq->stamp = skb->stamp; | 565 | skb_get_timestamp(skb, &fq->stamp); |
566 | fq->meat += skb->len; | 566 | fq->meat += skb->len; |
567 | atomic_add(skb->truesize, &ip6_frag_mem); | 567 | atomic_add(skb->truesize, &ip6_frag_mem); |
568 | 568 | ||
@@ -664,7 +664,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in, | |||
664 | 664 | ||
665 | head->next = NULL; | 665 | head->next = NULL; |
666 | head->dev = dev; | 666 | head->dev = dev; |
667 | head->stamp = fq->stamp; | 667 | skb_set_timestamp(head, &fq->stamp); |
668 | head->nh.ipv6h->payload_len = htons(payload_len); | 668 | head->nh.ipv6h->payload_len = htons(payload_len); |
669 | 669 | ||
670 | *skb_in = head; | 670 | *skb_in = head; |
diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 878789b3122d..5d5bbb49ec78 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c | |||
@@ -1372,7 +1372,7 @@ int ipv6_route_ioctl(unsigned int cmd, void __user *arg) | |||
1372 | * Drop the packet on the floor | 1372 | * Drop the packet on the floor |
1373 | */ | 1373 | */ |
1374 | 1374 | ||
1375 | int ip6_pkt_discard(struct sk_buff *skb) | 1375 | static int ip6_pkt_discard(struct sk_buff *skb) |
1376 | { | 1376 | { |
1377 | IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES); | 1377 | IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES); |
1378 | icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev); | 1378 | icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev); |
@@ -1380,7 +1380,7 @@ int ip6_pkt_discard(struct sk_buff *skb) | |||
1380 | return 0; | 1380 | return 0; |
1381 | } | 1381 | } |
1382 | 1382 | ||
1383 | int ip6_pkt_discard_out(struct sk_buff *skb) | 1383 | static int ip6_pkt_discard_out(struct sk_buff *skb) |
1384 | { | 1384 | { |
1385 | skb->dev = skb->dst->dev; | 1385 | skb->dev = skb->dst->dev; |
1386 | return ip6_pkt_discard(skb); | 1386 | return ip6_pkt_discard(skb); |
@@ -1850,16 +1850,16 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh, | |||
1850 | 1850 | ||
1851 | skb = alloc_skb(size, gfp_any()); | 1851 | skb = alloc_skb(size, gfp_any()); |
1852 | if (!skb) { | 1852 | if (!skb) { |
1853 | netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, ENOBUFS); | 1853 | netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS); |
1854 | return; | 1854 | return; |
1855 | } | 1855 | } |
1856 | if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) { | 1856 | if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) { |
1857 | kfree_skb(skb); | 1857 | kfree_skb(skb); |
1858 | netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, EINVAL); | 1858 | netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL); |
1859 | return; | 1859 | return; |
1860 | } | 1860 | } |
1861 | NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_ROUTE; | 1861 | NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE; |
1862 | netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_ROUTE, gfp_any()); | 1862 | netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any()); |
1863 | } | 1863 | } |
1864 | 1864 | ||
1865 | /* | 1865 | /* |
@@ -1960,8 +1960,6 @@ static int rt6_proc_info(char *buffer, char **start, off_t offset, int length) | |||
1960 | return arg.len; | 1960 | return arg.len; |
1961 | } | 1961 | } |
1962 | 1962 | ||
1963 | extern struct rt6_statistics rt6_stats; | ||
1964 | |||
1965 | static int rt6_stats_seq_show(struct seq_file *seq, void *v) | 1963 | static int rt6_stats_seq_show(struct seq_file *seq, void *v) |
1966 | { | 1964 | { |
1967 | seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n", | 1965 | seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n", |
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index e553e5b80d6e..c3123c9e1a8e 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c | |||
@@ -770,7 +770,7 @@ static int ipip6_tunnel_init(struct net_device *dev) | |||
770 | return 0; | 770 | return 0; |
771 | } | 771 | } |
772 | 772 | ||
773 | int __init ipip6_fb_tunnel_init(struct net_device *dev) | 773 | static int __init ipip6_fb_tunnel_init(struct net_device *dev) |
774 | { | 774 | { |
775 | struct ip_tunnel *tunnel = dev->priv; | 775 | struct ip_tunnel *tunnel = dev->priv; |
776 | struct iphdr *iph = &tunnel->parms.iph; | 776 | struct iphdr *iph = &tunnel->parms.iph; |
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 3a18e0e6ffed..8eff9fa1e983 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c | |||
@@ -14,9 +14,6 @@ | |||
14 | #include <net/ipv6.h> | 14 | #include <net/ipv6.h> |
15 | #include <net/addrconf.h> | 15 | #include <net/addrconf.h> |
16 | 16 | ||
17 | extern ctl_table ipv6_route_table[]; | ||
18 | extern ctl_table ipv6_icmp_table[]; | ||
19 | |||
20 | #ifdef CONFIG_SYSCTL | 17 | #ifdef CONFIG_SYSCTL |
21 | 18 | ||
22 | static ctl_table ipv6_table[] = { | 19 | static ctl_table ipv6_table[] = { |
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index ef29cfd936d3..794734f1d230 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c | |||
@@ -47,6 +47,7 @@ | |||
47 | 47 | ||
48 | #include <net/tcp.h> | 48 | #include <net/tcp.h> |
49 | #include <net/ndisc.h> | 49 | #include <net/ndisc.h> |
50 | #include <net/inet6_hashtables.h> | ||
50 | #include <net/ipv6.h> | 51 | #include <net/ipv6.h> |
51 | #include <net/transp_v6.h> | 52 | #include <net/transp_v6.h> |
52 | #include <net/addrconf.h> | 53 | #include <net/addrconf.h> |
@@ -75,34 +76,11 @@ static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok); | |||
75 | static struct tcp_func ipv6_mapped; | 76 | static struct tcp_func ipv6_mapped; |
76 | static struct tcp_func ipv6_specific; | 77 | static struct tcp_func ipv6_specific; |
77 | 78 | ||
78 | /* I have no idea if this is a good hash for v6 or not. -DaveM */ | 79 | static inline int tcp_v6_bind_conflict(const struct sock *sk, |
79 | static __inline__ int tcp_v6_hashfn(struct in6_addr *laddr, u16 lport, | 80 | const struct inet_bind_bucket *tb) |
80 | struct in6_addr *faddr, u16 fport) | ||
81 | { | 81 | { |
82 | int hashent = (lport ^ fport); | 82 | const struct sock *sk2; |
83 | 83 | const struct hlist_node *node; | |
84 | hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]); | ||
85 | hashent ^= hashent>>16; | ||
86 | hashent ^= hashent>>8; | ||
87 | return (hashent & (tcp_ehash_size - 1)); | ||
88 | } | ||
89 | |||
90 | static __inline__ int tcp_v6_sk_hashfn(struct sock *sk) | ||
91 | { | ||
92 | struct inet_sock *inet = inet_sk(sk); | ||
93 | struct ipv6_pinfo *np = inet6_sk(sk); | ||
94 | struct in6_addr *laddr = &np->rcv_saddr; | ||
95 | struct in6_addr *faddr = &np->daddr; | ||
96 | __u16 lport = inet->num; | ||
97 | __u16 fport = inet->dport; | ||
98 | return tcp_v6_hashfn(laddr, lport, faddr, fport); | ||
99 | } | ||
100 | |||
101 | static inline int tcp_v6_bind_conflict(struct sock *sk, | ||
102 | struct tcp_bind_bucket *tb) | ||
103 | { | ||
104 | struct sock *sk2; | ||
105 | struct hlist_node *node; | ||
106 | 84 | ||
107 | /* We must walk the whole port owner list in this case. -DaveM */ | 85 | /* We must walk the whole port owner list in this case. -DaveM */ |
108 | sk_for_each_bound(sk2, node, &tb->owners) { | 86 | sk_for_each_bound(sk2, node, &tb->owners) { |
@@ -126,8 +104,8 @@ static inline int tcp_v6_bind_conflict(struct sock *sk, | |||
126 | */ | 104 | */ |
127 | static int tcp_v6_get_port(struct sock *sk, unsigned short snum) | 105 | static int tcp_v6_get_port(struct sock *sk, unsigned short snum) |
128 | { | 106 | { |
129 | struct tcp_bind_hashbucket *head; | 107 | struct inet_bind_hashbucket *head; |
130 | struct tcp_bind_bucket *tb; | 108 | struct inet_bind_bucket *tb; |
131 | struct hlist_node *node; | 109 | struct hlist_node *node; |
132 | int ret; | 110 | int ret; |
133 | 111 | ||
@@ -138,25 +116,25 @@ static int tcp_v6_get_port(struct sock *sk, unsigned short snum) | |||
138 | int remaining = (high - low) + 1; | 116 | int remaining = (high - low) + 1; |
139 | int rover; | 117 | int rover; |
140 | 118 | ||
141 | spin_lock(&tcp_portalloc_lock); | 119 | spin_lock(&tcp_hashinfo.portalloc_lock); |
142 | if (tcp_port_rover < low) | 120 | if (tcp_hashinfo.port_rover < low) |
143 | rover = low; | 121 | rover = low; |
144 | else | 122 | else |
145 | rover = tcp_port_rover; | 123 | rover = tcp_hashinfo.port_rover; |
146 | do { rover++; | 124 | do { rover++; |
147 | if (rover > high) | 125 | if (rover > high) |
148 | rover = low; | 126 | rover = low; |
149 | head = &tcp_bhash[tcp_bhashfn(rover)]; | 127 | head = &tcp_hashinfo.bhash[inet_bhashfn(rover, tcp_hashinfo.bhash_size)]; |
150 | spin_lock(&head->lock); | 128 | spin_lock(&head->lock); |
151 | tb_for_each(tb, node, &head->chain) | 129 | inet_bind_bucket_for_each(tb, node, &head->chain) |
152 | if (tb->port == rover) | 130 | if (tb->port == rover) |
153 | goto next; | 131 | goto next; |
154 | break; | 132 | break; |
155 | next: | 133 | next: |
156 | spin_unlock(&head->lock); | 134 | spin_unlock(&head->lock); |
157 | } while (--remaining > 0); | 135 | } while (--remaining > 0); |
158 | tcp_port_rover = rover; | 136 | tcp_hashinfo.port_rover = rover; |
159 | spin_unlock(&tcp_portalloc_lock); | 137 | spin_unlock(&tcp_hashinfo.portalloc_lock); |
160 | 138 | ||
161 | /* Exhausted local port range during search? It is not | 139 | /* Exhausted local port range during search? It is not |
162 | * possible for us to be holding one of the bind hash | 140 | * possible for us to be holding one of the bind hash |
@@ -171,9 +149,9 @@ static int tcp_v6_get_port(struct sock *sk, unsigned short snum) | |||
171 | /* OK, here is the one we will use. */ | 149 | /* OK, here is the one we will use. */ |
172 | snum = rover; | 150 | snum = rover; |
173 | } else { | 151 | } else { |
174 | head = &tcp_bhash[tcp_bhashfn(snum)]; | 152 | head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)]; |
175 | spin_lock(&head->lock); | 153 | spin_lock(&head->lock); |
176 | tb_for_each(tb, node, &head->chain) | 154 | inet_bind_bucket_for_each(tb, node, &head->chain) |
177 | if (tb->port == snum) | 155 | if (tb->port == snum) |
178 | goto tb_found; | 156 | goto tb_found; |
179 | } | 157 | } |
@@ -192,8 +170,11 @@ tb_found: | |||
192 | } | 170 | } |
193 | tb_not_found: | 171 | tb_not_found: |
194 | ret = 1; | 172 | ret = 1; |
195 | if (!tb && (tb = tcp_bucket_create(head, snum)) == NULL) | 173 | if (tb == NULL) { |
196 | goto fail_unlock; | 174 | tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, snum); |
175 | if (tb == NULL) | ||
176 | goto fail_unlock; | ||
177 | } | ||
197 | if (hlist_empty(&tb->owners)) { | 178 | if (hlist_empty(&tb->owners)) { |
198 | if (sk->sk_reuse && sk->sk_state != TCP_LISTEN) | 179 | if (sk->sk_reuse && sk->sk_state != TCP_LISTEN) |
199 | tb->fastreuse = 1; | 180 | tb->fastreuse = 1; |
@@ -204,9 +185,9 @@ tb_not_found: | |||
204 | tb->fastreuse = 0; | 185 | tb->fastreuse = 0; |
205 | 186 | ||
206 | success: | 187 | success: |
207 | if (!tcp_sk(sk)->bind_hash) | 188 | if (!inet_csk(sk)->icsk_bind_hash) |
208 | tcp_bind_hash(sk, tb, snum); | 189 | inet_bind_hash(sk, tb, snum); |
209 | BUG_TRAP(tcp_sk(sk)->bind_hash == tb); | 190 | BUG_TRAP(inet_csk(sk)->icsk_bind_hash == tb); |
210 | ret = 0; | 191 | ret = 0; |
211 | 192 | ||
212 | fail_unlock: | 193 | fail_unlock: |
@@ -224,13 +205,13 @@ static __inline__ void __tcp_v6_hash(struct sock *sk) | |||
224 | BUG_TRAP(sk_unhashed(sk)); | 205 | BUG_TRAP(sk_unhashed(sk)); |
225 | 206 | ||
226 | if (sk->sk_state == TCP_LISTEN) { | 207 | if (sk->sk_state == TCP_LISTEN) { |
227 | list = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)]; | 208 | list = &tcp_hashinfo.listening_hash[inet_sk_listen_hashfn(sk)]; |
228 | lock = &tcp_lhash_lock; | 209 | lock = &tcp_hashinfo.lhash_lock; |
229 | tcp_listen_wlock(); | 210 | inet_listen_wlock(&tcp_hashinfo); |
230 | } else { | 211 | } else { |
231 | sk->sk_hashent = tcp_v6_sk_hashfn(sk); | 212 | sk->sk_hashent = inet6_sk_ehashfn(sk, tcp_hashinfo.ehash_size); |
232 | list = &tcp_ehash[sk->sk_hashent].chain; | 213 | list = &tcp_hashinfo.ehash[sk->sk_hashent].chain; |
233 | lock = &tcp_ehash[sk->sk_hashent].lock; | 214 | lock = &tcp_hashinfo.ehash[sk->sk_hashent].lock; |
234 | write_lock(lock); | 215 | write_lock(lock); |
235 | } | 216 | } |
236 | 217 | ||
@@ -255,131 +236,11 @@ static void tcp_v6_hash(struct sock *sk) | |||
255 | } | 236 | } |
256 | } | 237 | } |
257 | 238 | ||
258 | static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned short hnum, int dif) | ||
259 | { | ||
260 | struct sock *sk; | ||
261 | struct hlist_node *node; | ||
262 | struct sock *result = NULL; | ||
263 | int score, hiscore; | ||
264 | |||
265 | hiscore=0; | ||
266 | read_lock(&tcp_lhash_lock); | ||
267 | sk_for_each(sk, node, &tcp_listening_hash[tcp_lhashfn(hnum)]) { | ||
268 | if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) { | ||
269 | struct ipv6_pinfo *np = inet6_sk(sk); | ||
270 | |||
271 | score = 1; | ||
272 | if (!ipv6_addr_any(&np->rcv_saddr)) { | ||
273 | if (!ipv6_addr_equal(&np->rcv_saddr, daddr)) | ||
274 | continue; | ||
275 | score++; | ||
276 | } | ||
277 | if (sk->sk_bound_dev_if) { | ||
278 | if (sk->sk_bound_dev_if != dif) | ||
279 | continue; | ||
280 | score++; | ||
281 | } | ||
282 | if (score == 3) { | ||
283 | result = sk; | ||
284 | break; | ||
285 | } | ||
286 | if (score > hiscore) { | ||
287 | hiscore = score; | ||
288 | result = sk; | ||
289 | } | ||
290 | } | ||
291 | } | ||
292 | if (result) | ||
293 | sock_hold(result); | ||
294 | read_unlock(&tcp_lhash_lock); | ||
295 | return result; | ||
296 | } | ||
297 | |||
298 | /* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so | ||
299 | * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM | ||
300 | * | ||
301 | * The sockhash lock must be held as a reader here. | ||
302 | */ | ||
303 | |||
304 | static inline struct sock *__tcp_v6_lookup_established(struct in6_addr *saddr, u16 sport, | ||
305 | struct in6_addr *daddr, u16 hnum, | ||
306 | int dif) | ||
307 | { | ||
308 | struct tcp_ehash_bucket *head; | ||
309 | struct sock *sk; | ||
310 | struct hlist_node *node; | ||
311 | __u32 ports = TCP_COMBINED_PORTS(sport, hnum); | ||
312 | int hash; | ||
313 | |||
314 | /* Optimize here for direct hit, only listening connections can | ||
315 | * have wildcards anyways. | ||
316 | */ | ||
317 | hash = tcp_v6_hashfn(daddr, hnum, saddr, sport); | ||
318 | head = &tcp_ehash[hash]; | ||
319 | read_lock(&head->lock); | ||
320 | sk_for_each(sk, node, &head->chain) { | ||
321 | /* For IPV6 do the cheaper port and family tests first. */ | ||
322 | if(TCP_IPV6_MATCH(sk, saddr, daddr, ports, dif)) | ||
323 | goto hit; /* You sunk my battleship! */ | ||
324 | } | ||
325 | /* Must check for a TIME_WAIT'er before going to listener hash. */ | ||
326 | sk_for_each(sk, node, &(head + tcp_ehash_size)->chain) { | ||
327 | /* FIXME: acme: check this... */ | ||
328 | struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk; | ||
329 | |||
330 | if(*((__u32 *)&(tw->tw_dport)) == ports && | ||
331 | sk->sk_family == PF_INET6) { | ||
332 | if(ipv6_addr_equal(&tw->tw_v6_daddr, saddr) && | ||
333 | ipv6_addr_equal(&tw->tw_v6_rcv_saddr, daddr) && | ||
334 | (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif)) | ||
335 | goto hit; | ||
336 | } | ||
337 | } | ||
338 | read_unlock(&head->lock); | ||
339 | return NULL; | ||
340 | |||
341 | hit: | ||
342 | sock_hold(sk); | ||
343 | read_unlock(&head->lock); | ||
344 | return sk; | ||
345 | } | ||
346 | |||
347 | |||
348 | static inline struct sock *__tcp_v6_lookup(struct in6_addr *saddr, u16 sport, | ||
349 | struct in6_addr *daddr, u16 hnum, | ||
350 | int dif) | ||
351 | { | ||
352 | struct sock *sk; | ||
353 | |||
354 | sk = __tcp_v6_lookup_established(saddr, sport, daddr, hnum, dif); | ||
355 | |||
356 | if (sk) | ||
357 | return sk; | ||
358 | |||
359 | return tcp_v6_lookup_listener(daddr, hnum, dif); | ||
360 | } | ||
361 | |||
362 | inline struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport, | ||
363 | struct in6_addr *daddr, u16 dport, | ||
364 | int dif) | ||
365 | { | ||
366 | struct sock *sk; | ||
367 | |||
368 | local_bh_disable(); | ||
369 | sk = __tcp_v6_lookup(saddr, sport, daddr, ntohs(dport), dif); | ||
370 | local_bh_enable(); | ||
371 | |||
372 | return sk; | ||
373 | } | ||
374 | |||
375 | EXPORT_SYMBOL_GPL(tcp_v6_lookup); | ||
376 | |||
377 | |||
378 | /* | 239 | /* |
379 | * Open request hash tables. | 240 | * Open request hash tables. |
380 | */ | 241 | */ |
381 | 242 | ||
382 | static u32 tcp_v6_synq_hash(struct in6_addr *raddr, u16 rport, u32 rnd) | 243 | static u32 tcp_v6_synq_hash(const struct in6_addr *raddr, const u16 rport, const u32 rnd) |
383 | { | 244 | { |
384 | u32 a, b, c; | 245 | u32 a, b, c; |
385 | 246 | ||
@@ -399,14 +260,15 @@ static u32 tcp_v6_synq_hash(struct in6_addr *raddr, u16 rport, u32 rnd) | |||
399 | return c & (TCP_SYNQ_HSIZE - 1); | 260 | return c & (TCP_SYNQ_HSIZE - 1); |
400 | } | 261 | } |
401 | 262 | ||
402 | static struct request_sock *tcp_v6_search_req(struct tcp_sock *tp, | 263 | static struct request_sock *tcp_v6_search_req(const struct sock *sk, |
403 | struct request_sock ***prevp, | 264 | struct request_sock ***prevp, |
404 | __u16 rport, | 265 | __u16 rport, |
405 | struct in6_addr *raddr, | 266 | struct in6_addr *raddr, |
406 | struct in6_addr *laddr, | 267 | struct in6_addr *laddr, |
407 | int iif) | 268 | int iif) |
408 | { | 269 | { |
409 | struct listen_sock *lopt = tp->accept_queue.listen_opt; | 270 | const struct inet_connection_sock *icsk = inet_csk(sk); |
271 | struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; | ||
410 | struct request_sock *req, **prev; | 272 | struct request_sock *req, **prev; |
411 | 273 | ||
412 | for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)]; | 274 | for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)]; |
@@ -451,44 +313,48 @@ static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb) | |||
451 | } | 313 | } |
452 | } | 314 | } |
453 | 315 | ||
454 | static int __tcp_v6_check_established(struct sock *sk, __u16 lport, | 316 | static int __tcp_v6_check_established(struct sock *sk, const __u16 lport, |
455 | struct tcp_tw_bucket **twp) | 317 | struct inet_timewait_sock **twp) |
456 | { | 318 | { |
457 | struct inet_sock *inet = inet_sk(sk); | 319 | struct inet_sock *inet = inet_sk(sk); |
458 | struct ipv6_pinfo *np = inet6_sk(sk); | 320 | const struct ipv6_pinfo *np = inet6_sk(sk); |
459 | struct in6_addr *daddr = &np->rcv_saddr; | 321 | const struct in6_addr *daddr = &np->rcv_saddr; |
460 | struct in6_addr *saddr = &np->daddr; | 322 | const struct in6_addr *saddr = &np->daddr; |
461 | int dif = sk->sk_bound_dev_if; | 323 | const int dif = sk->sk_bound_dev_if; |
462 | u32 ports = TCP_COMBINED_PORTS(inet->dport, lport); | 324 | const u32 ports = INET_COMBINED_PORTS(inet->dport, lport); |
463 | int hash = tcp_v6_hashfn(daddr, inet->num, saddr, inet->dport); | 325 | const int hash = inet6_ehashfn(daddr, inet->num, saddr, inet->dport, |
464 | struct tcp_ehash_bucket *head = &tcp_ehash[hash]; | 326 | tcp_hashinfo.ehash_size); |
327 | struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash]; | ||
465 | struct sock *sk2; | 328 | struct sock *sk2; |
466 | struct hlist_node *node; | 329 | const struct hlist_node *node; |
467 | struct tcp_tw_bucket *tw; | 330 | struct inet_timewait_sock *tw; |
468 | 331 | ||
469 | write_lock(&head->lock); | 332 | write_lock(&head->lock); |
470 | 333 | ||
471 | /* Check TIME-WAIT sockets first. */ | 334 | /* Check TIME-WAIT sockets first. */ |
472 | sk_for_each(sk2, node, &(head + tcp_ehash_size)->chain) { | 335 | sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) { |
473 | tw = (struct tcp_tw_bucket*)sk2; | 336 | const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk2); |
337 | |||
338 | tw = inet_twsk(sk2); | ||
474 | 339 | ||
475 | if(*((__u32 *)&(tw->tw_dport)) == ports && | 340 | if(*((__u32 *)&(tw->tw_dport)) == ports && |
476 | sk2->sk_family == PF_INET6 && | 341 | sk2->sk_family == PF_INET6 && |
477 | ipv6_addr_equal(&tw->tw_v6_daddr, saddr) && | 342 | ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) && |
478 | ipv6_addr_equal(&tw->tw_v6_rcv_saddr, daddr) && | 343 | ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr) && |
479 | sk2->sk_bound_dev_if == sk->sk_bound_dev_if) { | 344 | sk2->sk_bound_dev_if == sk->sk_bound_dev_if) { |
345 | const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2); | ||
480 | struct tcp_sock *tp = tcp_sk(sk); | 346 | struct tcp_sock *tp = tcp_sk(sk); |
481 | 347 | ||
482 | if (tw->tw_ts_recent_stamp && | 348 | if (tcptw->tw_ts_recent_stamp && |
483 | (!twp || (sysctl_tcp_tw_reuse && | 349 | (!twp || |
484 | xtime.tv_sec - | 350 | (sysctl_tcp_tw_reuse && |
485 | tw->tw_ts_recent_stamp > 1))) { | 351 | xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) { |
486 | /* See comment in tcp_ipv4.c */ | 352 | /* See comment in tcp_ipv4.c */ |
487 | tp->write_seq = tw->tw_snd_nxt + 65535 + 2; | 353 | tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2; |
488 | if (!tp->write_seq) | 354 | if (!tp->write_seq) |
489 | tp->write_seq = 1; | 355 | tp->write_seq = 1; |
490 | tp->rx_opt.ts_recent = tw->tw_ts_recent; | 356 | tp->rx_opt.ts_recent = tcptw->tw_ts_recent; |
491 | tp->rx_opt.ts_recent_stamp = tw->tw_ts_recent_stamp; | 357 | tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; |
492 | sock_hold(sk2); | 358 | sock_hold(sk2); |
493 | goto unique; | 359 | goto unique; |
494 | } else | 360 | } else |
@@ -499,7 +365,7 @@ static int __tcp_v6_check_established(struct sock *sk, __u16 lport, | |||
499 | 365 | ||
500 | /* And established part... */ | 366 | /* And established part... */ |
501 | sk_for_each(sk2, node, &head->chain) { | 367 | sk_for_each(sk2, node, &head->chain) { |
502 | if(TCP_IPV6_MATCH(sk2, saddr, daddr, ports, dif)) | 368 | if (INET6_MATCH(sk2, saddr, daddr, ports, dif)) |
503 | goto not_unique; | 369 | goto not_unique; |
504 | } | 370 | } |
505 | 371 | ||
@@ -515,10 +381,10 @@ unique: | |||
515 | NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); | 381 | NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); |
516 | } else if (tw) { | 382 | } else if (tw) { |
517 | /* Silly. Should hash-dance instead... */ | 383 | /* Silly. Should hash-dance instead... */ |
518 | tcp_tw_deschedule(tw); | 384 | inet_twsk_deschedule(tw, &tcp_death_row); |
519 | NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); | 385 | NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); |
520 | 386 | ||
521 | tcp_tw_put(tw); | 387 | inet_twsk_put(tw); |
522 | } | 388 | } |
523 | return 0; | 389 | return 0; |
524 | 390 | ||
@@ -540,8 +406,8 @@ static inline u32 tcpv6_port_offset(const struct sock *sk) | |||
540 | static int tcp_v6_hash_connect(struct sock *sk) | 406 | static int tcp_v6_hash_connect(struct sock *sk) |
541 | { | 407 | { |
542 | unsigned short snum = inet_sk(sk)->num; | 408 | unsigned short snum = inet_sk(sk)->num; |
543 | struct tcp_bind_hashbucket *head; | 409 | struct inet_bind_hashbucket *head; |
544 | struct tcp_bind_bucket *tb; | 410 | struct inet_bind_bucket *tb; |
545 | int ret; | 411 | int ret; |
546 | 412 | ||
547 | if (!snum) { | 413 | if (!snum) { |
@@ -553,19 +419,19 @@ static int tcp_v6_hash_connect(struct sock *sk) | |||
553 | static u32 hint; | 419 | static u32 hint; |
554 | u32 offset = hint + tcpv6_port_offset(sk); | 420 | u32 offset = hint + tcpv6_port_offset(sk); |
555 | struct hlist_node *node; | 421 | struct hlist_node *node; |
556 | struct tcp_tw_bucket *tw = NULL; | 422 | struct inet_timewait_sock *tw = NULL; |
557 | 423 | ||
558 | local_bh_disable(); | 424 | local_bh_disable(); |
559 | for (i = 1; i <= range; i++) { | 425 | for (i = 1; i <= range; i++) { |
560 | port = low + (i + offset) % range; | 426 | port = low + (i + offset) % range; |
561 | head = &tcp_bhash[tcp_bhashfn(port)]; | 427 | head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)]; |
562 | spin_lock(&head->lock); | 428 | spin_lock(&head->lock); |
563 | 429 | ||
564 | /* Does not bother with rcv_saddr checks, | 430 | /* Does not bother with rcv_saddr checks, |
565 | * because the established check is already | 431 | * because the established check is already |
566 | * unique enough. | 432 | * unique enough. |
567 | */ | 433 | */ |
568 | tb_for_each(tb, node, &head->chain) { | 434 | inet_bind_bucket_for_each(tb, node, &head->chain) { |
569 | if (tb->port == port) { | 435 | if (tb->port == port) { |
570 | BUG_TRAP(!hlist_empty(&tb->owners)); | 436 | BUG_TRAP(!hlist_empty(&tb->owners)); |
571 | if (tb->fastreuse >= 0) | 437 | if (tb->fastreuse >= 0) |
@@ -578,7 +444,7 @@ static int tcp_v6_hash_connect(struct sock *sk) | |||
578 | } | 444 | } |
579 | } | 445 | } |
580 | 446 | ||
581 | tb = tcp_bucket_create(head, port); | 447 | tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port); |
582 | if (!tb) { | 448 | if (!tb) { |
583 | spin_unlock(&head->lock); | 449 | spin_unlock(&head->lock); |
584 | break; | 450 | break; |
@@ -597,7 +463,7 @@ ok: | |||
597 | hint += i; | 463 | hint += i; |
598 | 464 | ||
599 | /* Head lock still held and bh's disabled */ | 465 | /* Head lock still held and bh's disabled */ |
600 | tcp_bind_hash(sk, tb, port); | 466 | inet_bind_hash(sk, tb, port); |
601 | if (sk_unhashed(sk)) { | 467 | if (sk_unhashed(sk)) { |
602 | inet_sk(sk)->sport = htons(port); | 468 | inet_sk(sk)->sport = htons(port); |
603 | __tcp_v6_hash(sk); | 469 | __tcp_v6_hash(sk); |
@@ -605,16 +471,16 @@ ok: | |||
605 | spin_unlock(&head->lock); | 471 | spin_unlock(&head->lock); |
606 | 472 | ||
607 | if (tw) { | 473 | if (tw) { |
608 | tcp_tw_deschedule(tw); | 474 | inet_twsk_deschedule(tw, &tcp_death_row); |
609 | tcp_tw_put(tw); | 475 | inet_twsk_put(tw); |
610 | } | 476 | } |
611 | 477 | ||
612 | ret = 0; | 478 | ret = 0; |
613 | goto out; | 479 | goto out; |
614 | } | 480 | } |
615 | 481 | ||
616 | head = &tcp_bhash[tcp_bhashfn(snum)]; | 482 | head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)]; |
617 | tb = tcp_sk(sk)->bind_hash; | 483 | tb = inet_csk(sk)->icsk_bind_hash; |
618 | spin_lock_bh(&head->lock); | 484 | spin_lock_bh(&head->lock); |
619 | 485 | ||
620 | if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { | 486 | if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { |
@@ -631,11 +497,6 @@ out: | |||
631 | } | 497 | } |
632 | } | 498 | } |
633 | 499 | ||
634 | static __inline__ int tcp_v6_iif(struct sk_buff *skb) | ||
635 | { | ||
636 | return IP6CB(skb)->iif; | ||
637 | } | ||
638 | |||
639 | static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, | 500 | static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, |
640 | int addr_len) | 501 | int addr_len) |
641 | { | 502 | { |
@@ -827,14 +688,15 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, | |||
827 | int type, int code, int offset, __u32 info) | 688 | int type, int code, int offset, __u32 info) |
828 | { | 689 | { |
829 | struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data; | 690 | struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data; |
830 | struct tcphdr *th = (struct tcphdr *)(skb->data+offset); | 691 | const struct tcphdr *th = (struct tcphdr *)(skb->data+offset); |
831 | struct ipv6_pinfo *np; | 692 | struct ipv6_pinfo *np; |
832 | struct sock *sk; | 693 | struct sock *sk; |
833 | int err; | 694 | int err; |
834 | struct tcp_sock *tp; | 695 | struct tcp_sock *tp; |
835 | __u32 seq; | 696 | __u32 seq; |
836 | 697 | ||
837 | sk = tcp_v6_lookup(&hdr->daddr, th->dest, &hdr->saddr, th->source, skb->dev->ifindex); | 698 | sk = inet6_lookup(&tcp_hashinfo, &hdr->daddr, th->dest, &hdr->saddr, |
699 | th->source, skb->dev->ifindex); | ||
838 | 700 | ||
839 | if (sk == NULL) { | 701 | if (sk == NULL) { |
840 | ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); | 702 | ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); |
@@ -842,7 +704,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, | |||
842 | } | 704 | } |
843 | 705 | ||
844 | if (sk->sk_state == TCP_TIME_WAIT) { | 706 | if (sk->sk_state == TCP_TIME_WAIT) { |
845 | tcp_tw_put((struct tcp_tw_bucket*)sk); | 707 | inet_twsk_put((struct inet_timewait_sock *)sk); |
846 | return; | 708 | return; |
847 | } | 709 | } |
848 | 710 | ||
@@ -920,8 +782,8 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, | |||
920 | if (sock_owned_by_user(sk)) | 782 | if (sock_owned_by_user(sk)) |
921 | goto out; | 783 | goto out; |
922 | 784 | ||
923 | req = tcp_v6_search_req(tp, &prev, th->dest, &hdr->daddr, | 785 | req = tcp_v6_search_req(sk, &prev, th->dest, &hdr->daddr, |
924 | &hdr->saddr, tcp_v6_iif(skb)); | 786 | &hdr->saddr, inet6_iif(skb)); |
925 | if (!req) | 787 | if (!req) |
926 | goto out; | 788 | goto out; |
927 | 789 | ||
@@ -935,7 +797,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, | |||
935 | goto out; | 797 | goto out; |
936 | } | 798 | } |
937 | 799 | ||
938 | tcp_synq_drop(sk, req, prev); | 800 | inet_csk_reqsk_queue_drop(sk, req, prev); |
939 | goto out; | 801 | goto out; |
940 | 802 | ||
941 | case TCP_SYN_SENT: | 803 | case TCP_SYN_SENT: |
@@ -1132,7 +994,7 @@ static void tcp_v6_send_reset(struct sk_buff *skb) | |||
1132 | buff->csum); | 994 | buff->csum); |
1133 | 995 | ||
1134 | fl.proto = IPPROTO_TCP; | 996 | fl.proto = IPPROTO_TCP; |
1135 | fl.oif = tcp_v6_iif(skb); | 997 | fl.oif = inet6_iif(skb); |
1136 | fl.fl_ip_dport = t1->dest; | 998 | fl.fl_ip_dport = t1->dest; |
1137 | fl.fl_ip_sport = t1->source; | 999 | fl.fl_ip_sport = t1->source; |
1138 | 1000 | ||
@@ -1201,7 +1063,7 @@ static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 | |||
1201 | buff->csum); | 1063 | buff->csum); |
1202 | 1064 | ||
1203 | fl.proto = IPPROTO_TCP; | 1065 | fl.proto = IPPROTO_TCP; |
1204 | fl.oif = tcp_v6_iif(skb); | 1066 | fl.oif = inet6_iif(skb); |
1205 | fl.fl_ip_dport = t1->dest; | 1067 | fl.fl_ip_dport = t1->dest; |
1206 | fl.fl_ip_sport = t1->source; | 1068 | fl.fl_ip_sport = t1->source; |
1207 | 1069 | ||
@@ -1220,12 +1082,14 @@ static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 | |||
1220 | 1082 | ||
1221 | static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) | 1083 | static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) |
1222 | { | 1084 | { |
1223 | struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk; | 1085 | struct inet_timewait_sock *tw = inet_twsk(sk); |
1086 | const struct tcp_timewait_sock *tcptw = tcp_twsk(sk); | ||
1224 | 1087 | ||
1225 | tcp_v6_send_ack(skb, tw->tw_snd_nxt, tw->tw_rcv_nxt, | 1088 | tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, |
1226 | tw->tw_rcv_wnd >> tw->tw_rcv_wscale, tw->tw_ts_recent); | 1089 | tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, |
1090 | tcptw->tw_ts_recent); | ||
1227 | 1091 | ||
1228 | tcp_tw_put(tw); | 1092 | inet_twsk_put(tw); |
1229 | } | 1093 | } |
1230 | 1094 | ||
1231 | static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req) | 1095 | static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req) |
@@ -1237,28 +1101,25 @@ static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req) | |||
1237 | static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) | 1101 | static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) |
1238 | { | 1102 | { |
1239 | struct request_sock *req, **prev; | 1103 | struct request_sock *req, **prev; |
1240 | struct tcphdr *th = skb->h.th; | 1104 | const struct tcphdr *th = skb->h.th; |
1241 | struct tcp_sock *tp = tcp_sk(sk); | ||
1242 | struct sock *nsk; | 1105 | struct sock *nsk; |
1243 | 1106 | ||
1244 | /* Find possible connection requests. */ | 1107 | /* Find possible connection requests. */ |
1245 | req = tcp_v6_search_req(tp, &prev, th->source, &skb->nh.ipv6h->saddr, | 1108 | req = tcp_v6_search_req(sk, &prev, th->source, &skb->nh.ipv6h->saddr, |
1246 | &skb->nh.ipv6h->daddr, tcp_v6_iif(skb)); | 1109 | &skb->nh.ipv6h->daddr, inet6_iif(skb)); |
1247 | if (req) | 1110 | if (req) |
1248 | return tcp_check_req(sk, skb, req, prev); | 1111 | return tcp_check_req(sk, skb, req, prev); |
1249 | 1112 | ||
1250 | nsk = __tcp_v6_lookup_established(&skb->nh.ipv6h->saddr, | 1113 | nsk = __inet6_lookup_established(&tcp_hashinfo, &skb->nh.ipv6h->saddr, |
1251 | th->source, | 1114 | th->source, &skb->nh.ipv6h->daddr, |
1252 | &skb->nh.ipv6h->daddr, | 1115 | ntohs(th->dest), inet6_iif(skb)); |
1253 | ntohs(th->dest), | ||
1254 | tcp_v6_iif(skb)); | ||
1255 | 1116 | ||
1256 | if (nsk) { | 1117 | if (nsk) { |
1257 | if (nsk->sk_state != TCP_TIME_WAIT) { | 1118 | if (nsk->sk_state != TCP_TIME_WAIT) { |
1258 | bh_lock_sock(nsk); | 1119 | bh_lock_sock(nsk); |
1259 | return nsk; | 1120 | return nsk; |
1260 | } | 1121 | } |
1261 | tcp_tw_put((struct tcp_tw_bucket*)nsk); | 1122 | inet_twsk_put((struct inet_timewait_sock *)nsk); |
1262 | return NULL; | 1123 | return NULL; |
1263 | } | 1124 | } |
1264 | 1125 | ||
@@ -1271,12 +1132,12 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) | |||
1271 | 1132 | ||
1272 | static void tcp_v6_synq_add(struct sock *sk, struct request_sock *req) | 1133 | static void tcp_v6_synq_add(struct sock *sk, struct request_sock *req) |
1273 | { | 1134 | { |
1274 | struct tcp_sock *tp = tcp_sk(sk); | 1135 | struct inet_connection_sock *icsk = inet_csk(sk); |
1275 | struct listen_sock *lopt = tp->accept_queue.listen_opt; | 1136 | struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; |
1276 | u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd); | 1137 | const u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd); |
1277 | 1138 | ||
1278 | reqsk_queue_hash_req(&tp->accept_queue, h, req, TCP_TIMEOUT_INIT); | 1139 | reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, TCP_TIMEOUT_INIT); |
1279 | tcp_synq_added(sk); | 1140 | inet_csk_reqsk_queue_added(sk, TCP_TIMEOUT_INIT); |
1280 | } | 1141 | } |
1281 | 1142 | ||
1282 | 1143 | ||
@@ -1301,13 +1162,13 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1301 | /* | 1162 | /* |
1302 | * There are no SYN attacks on IPv6, yet... | 1163 | * There are no SYN attacks on IPv6, yet... |
1303 | */ | 1164 | */ |
1304 | if (tcp_synq_is_full(sk) && !isn) { | 1165 | if (inet_csk_reqsk_queue_is_full(sk) && !isn) { |
1305 | if (net_ratelimit()) | 1166 | if (net_ratelimit()) |
1306 | printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n"); | 1167 | printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n"); |
1307 | goto drop; | 1168 | goto drop; |
1308 | } | 1169 | } |
1309 | 1170 | ||
1310 | if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1) | 1171 | if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) |
1311 | goto drop; | 1172 | goto drop; |
1312 | 1173 | ||
1313 | req = reqsk_alloc(&tcp6_request_sock_ops); | 1174 | req = reqsk_alloc(&tcp6_request_sock_ops); |
@@ -1339,7 +1200,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1339 | /* So that link locals have meaning */ | 1200 | /* So that link locals have meaning */ |
1340 | if (!sk->sk_bound_dev_if && | 1201 | if (!sk->sk_bound_dev_if && |
1341 | ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL) | 1202 | ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL) |
1342 | treq->iif = tcp_v6_iif(skb); | 1203 | treq->iif = inet6_iif(skb); |
1343 | 1204 | ||
1344 | if (isn == 0) | 1205 | if (isn == 0) |
1345 | isn = tcp_v6_init_sequence(sk,skb); | 1206 | isn = tcp_v6_init_sequence(sk,skb); |
@@ -1404,15 +1265,14 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
1404 | newsk->sk_backlog_rcv = tcp_v4_do_rcv; | 1265 | newsk->sk_backlog_rcv = tcp_v4_do_rcv; |
1405 | newnp->pktoptions = NULL; | 1266 | newnp->pktoptions = NULL; |
1406 | newnp->opt = NULL; | 1267 | newnp->opt = NULL; |
1407 | newnp->mcast_oif = tcp_v6_iif(skb); | 1268 | newnp->mcast_oif = inet6_iif(skb); |
1408 | newnp->mcast_hops = skb->nh.ipv6h->hop_limit; | 1269 | newnp->mcast_hops = skb->nh.ipv6h->hop_limit; |
1409 | 1270 | ||
1410 | /* Charge newly allocated IPv6 socket. Though it is mapped, | 1271 | /* |
1411 | * it is IPv6 yet. | 1272 | * No need to charge this sock to the relevant IPv6 refcnt debug socks count |
1273 | * here, tcp_create_openreq_child now does this for us, see the comment in | ||
1274 | * that function for the gory details. -acme | ||
1412 | */ | 1275 | */ |
1413 | #ifdef INET_REFCNT_DEBUG | ||
1414 | atomic_inc(&inet6_sock_nr); | ||
1415 | #endif | ||
1416 | 1276 | ||
1417 | /* It is tricky place. Until this moment IPv4 tcp | 1277 | /* It is tricky place. Until this moment IPv4 tcp |
1418 | worked with IPv6 af_tcp.af_specific. | 1278 | worked with IPv6 af_tcp.af_specific. |
@@ -1467,10 +1327,11 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
1467 | if (newsk == NULL) | 1327 | if (newsk == NULL) |
1468 | goto out; | 1328 | goto out; |
1469 | 1329 | ||
1470 | /* Charge newly allocated IPv6 socket */ | 1330 | /* |
1471 | #ifdef INET_REFCNT_DEBUG | 1331 | * No need to charge this sock to the relevant IPv6 refcnt debug socks |
1472 | atomic_inc(&inet6_sock_nr); | 1332 | * count here, tcp_create_openreq_child now does this for us, see the |
1473 | #endif | 1333 | * comment in that function for the gory details. -acme |
1334 | */ | ||
1474 | 1335 | ||
1475 | ip6_dst_store(newsk, dst, NULL); | 1336 | ip6_dst_store(newsk, dst, NULL); |
1476 | newsk->sk_route_caps = dst->dev->features & | 1337 | newsk->sk_route_caps = dst->dev->features & |
@@ -1509,7 +1370,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
1509 | skb_set_owner_r(newnp->pktoptions, newsk); | 1370 | skb_set_owner_r(newnp->pktoptions, newsk); |
1510 | } | 1371 | } |
1511 | newnp->opt = NULL; | 1372 | newnp->opt = NULL; |
1512 | newnp->mcast_oif = tcp_v6_iif(skb); | 1373 | newnp->mcast_oif = inet6_iif(skb); |
1513 | newnp->mcast_hops = skb->nh.ipv6h->hop_limit; | 1374 | newnp->mcast_hops = skb->nh.ipv6h->hop_limit; |
1514 | 1375 | ||
1515 | /* Clone native IPv6 options from listening socket (if any) | 1376 | /* Clone native IPv6 options from listening socket (if any) |
@@ -1536,7 +1397,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
1536 | newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6; | 1397 | newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6; |
1537 | 1398 | ||
1538 | __tcp_v6_hash(newsk); | 1399 | __tcp_v6_hash(newsk); |
1539 | tcp_inherit_port(sk, newsk); | 1400 | inet_inherit_port(&tcp_hashinfo, sk, newsk); |
1540 | 1401 | ||
1541 | return newsk; | 1402 | return newsk; |
1542 | 1403 | ||
@@ -1557,7 +1418,7 @@ static int tcp_v6_checksum_init(struct sk_buff *skb) | |||
1557 | if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr, | 1418 | if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr, |
1558 | &skb->nh.ipv6h->daddr,skb->csum)) | 1419 | &skb->nh.ipv6h->daddr,skb->csum)) |
1559 | return 0; | 1420 | return 0; |
1560 | LIMIT_NETDEBUG(printk(KERN_DEBUG "hw tcp v6 csum failed\n")); | 1421 | LIMIT_NETDEBUG(KERN_DEBUG "hw tcp v6 csum failed\n"); |
1561 | } | 1422 | } |
1562 | if (skb->len <= 76) { | 1423 | if (skb->len <= 76) { |
1563 | if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr, | 1424 | if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr, |
@@ -1684,7 +1545,7 @@ ipv6_pktoptions: | |||
1684 | if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt && | 1545 | if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt && |
1685 | !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { | 1546 | !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { |
1686 | if (np->rxopt.bits.rxinfo) | 1547 | if (np->rxopt.bits.rxinfo) |
1687 | np->mcast_oif = tcp_v6_iif(opt_skb); | 1548 | np->mcast_oif = inet6_iif(opt_skb); |
1688 | if (np->rxopt.bits.rxhlim) | 1549 | if (np->rxopt.bits.rxhlim) |
1689 | np->mcast_hops = opt_skb->nh.ipv6h->hop_limit; | 1550 | np->mcast_hops = opt_skb->nh.ipv6h->hop_limit; |
1690 | if (ipv6_opt_accepted(sk, opt_skb)) { | 1551 | if (ipv6_opt_accepted(sk, opt_skb)) { |
@@ -1739,8 +1600,9 @@ static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) | |||
1739 | TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h); | 1600 | TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h); |
1740 | TCP_SKB_CB(skb)->sacked = 0; | 1601 | TCP_SKB_CB(skb)->sacked = 0; |
1741 | 1602 | ||
1742 | sk = __tcp_v6_lookup(&skb->nh.ipv6h->saddr, th->source, | 1603 | sk = __inet6_lookup(&tcp_hashinfo, &skb->nh.ipv6h->saddr, th->source, |
1743 | &skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb)); | 1604 | &skb->nh.ipv6h->daddr, ntohs(th->dest), |
1605 | inet6_iif(skb)); | ||
1744 | 1606 | ||
1745 | if (!sk) | 1607 | if (!sk) |
1746 | goto no_tcp_socket; | 1608 | goto no_tcp_socket; |
@@ -1795,26 +1657,29 @@ discard_and_relse: | |||
1795 | 1657 | ||
1796 | do_time_wait: | 1658 | do_time_wait: |
1797 | if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { | 1659 | if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { |
1798 | tcp_tw_put((struct tcp_tw_bucket *) sk); | 1660 | inet_twsk_put((struct inet_timewait_sock *)sk); |
1799 | goto discard_it; | 1661 | goto discard_it; |
1800 | } | 1662 | } |
1801 | 1663 | ||
1802 | if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) { | 1664 | if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) { |
1803 | TCP_INC_STATS_BH(TCP_MIB_INERRS); | 1665 | TCP_INC_STATS_BH(TCP_MIB_INERRS); |
1804 | tcp_tw_put((struct tcp_tw_bucket *) sk); | 1666 | inet_twsk_put((struct inet_timewait_sock *)sk); |
1805 | goto discard_it; | 1667 | goto discard_it; |
1806 | } | 1668 | } |
1807 | 1669 | ||
1808 | switch(tcp_timewait_state_process((struct tcp_tw_bucket *)sk, | 1670 | switch (tcp_timewait_state_process((struct inet_timewait_sock *)sk, |
1809 | skb, th, skb->len)) { | 1671 | skb, th)) { |
1810 | case TCP_TW_SYN: | 1672 | case TCP_TW_SYN: |
1811 | { | 1673 | { |
1812 | struct sock *sk2; | 1674 | struct sock *sk2; |
1813 | 1675 | ||
1814 | sk2 = tcp_v6_lookup_listener(&skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb)); | 1676 | sk2 = inet6_lookup_listener(&tcp_hashinfo, |
1677 | &skb->nh.ipv6h->daddr, | ||
1678 | ntohs(th->dest), inet6_iif(skb)); | ||
1815 | if (sk2 != NULL) { | 1679 | if (sk2 != NULL) { |
1816 | tcp_tw_deschedule((struct tcp_tw_bucket *)sk); | 1680 | struct inet_timewait_sock *tw = inet_twsk(sk); |
1817 | tcp_tw_put((struct tcp_tw_bucket *)sk); | 1681 | inet_twsk_deschedule(tw, &tcp_death_row); |
1682 | inet_twsk_put(tw); | ||
1818 | sk = sk2; | 1683 | sk = sk2; |
1819 | goto process; | 1684 | goto process; |
1820 | } | 1685 | } |
@@ -1983,7 +1848,7 @@ static struct tcp_func ipv6_specific = { | |||
1983 | static struct tcp_func ipv6_mapped = { | 1848 | static struct tcp_func ipv6_mapped = { |
1984 | .queue_xmit = ip_queue_xmit, | 1849 | .queue_xmit = ip_queue_xmit, |
1985 | .send_check = tcp_v4_send_check, | 1850 | .send_check = tcp_v4_send_check, |
1986 | .rebuild_header = tcp_v4_rebuild_header, | 1851 | .rebuild_header = inet_sk_rebuild_header, |
1987 | .conn_request = tcp_v6_conn_request, | 1852 | .conn_request = tcp_v6_conn_request, |
1988 | .syn_recv_sock = tcp_v6_syn_recv_sock, | 1853 | .syn_recv_sock = tcp_v6_syn_recv_sock, |
1989 | .remember_stamp = tcp_v4_remember_stamp, | 1854 | .remember_stamp = tcp_v4_remember_stamp, |
@@ -2002,13 +1867,14 @@ static struct tcp_func ipv6_mapped = { | |||
2002 | */ | 1867 | */ |
2003 | static int tcp_v6_init_sock(struct sock *sk) | 1868 | static int tcp_v6_init_sock(struct sock *sk) |
2004 | { | 1869 | { |
1870 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
2005 | struct tcp_sock *tp = tcp_sk(sk); | 1871 | struct tcp_sock *tp = tcp_sk(sk); |
2006 | 1872 | ||
2007 | skb_queue_head_init(&tp->out_of_order_queue); | 1873 | skb_queue_head_init(&tp->out_of_order_queue); |
2008 | tcp_init_xmit_timers(sk); | 1874 | tcp_init_xmit_timers(sk); |
2009 | tcp_prequeue_init(tp); | 1875 | tcp_prequeue_init(tp); |
2010 | 1876 | ||
2011 | tp->rto = TCP_TIMEOUT_INIT; | 1877 | icsk->icsk_rto = TCP_TIMEOUT_INIT; |
2012 | tp->mdev = TCP_TIMEOUT_INIT; | 1878 | tp->mdev = TCP_TIMEOUT_INIT; |
2013 | 1879 | ||
2014 | /* So many TCP implementations out there (incorrectly) count the | 1880 | /* So many TCP implementations out there (incorrectly) count the |
@@ -2030,7 +1896,7 @@ static int tcp_v6_init_sock(struct sock *sk) | |||
2030 | sk->sk_state = TCP_CLOSE; | 1896 | sk->sk_state = TCP_CLOSE; |
2031 | 1897 | ||
2032 | tp->af_specific = &ipv6_specific; | 1898 | tp->af_specific = &ipv6_specific; |
2033 | tp->ca_ops = &tcp_init_congestion_ops; | 1899 | icsk->icsk_ca_ops = &tcp_init_congestion_ops; |
2034 | sk->sk_write_space = sk_stream_write_space; | 1900 | sk->sk_write_space = sk_stream_write_space; |
2035 | sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); | 1901 | sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); |
2036 | 1902 | ||
@@ -2044,8 +1910,6 @@ static int tcp_v6_init_sock(struct sock *sk) | |||
2044 | 1910 | ||
2045 | static int tcp_v6_destroy_sock(struct sock *sk) | 1911 | static int tcp_v6_destroy_sock(struct sock *sk) |
2046 | { | 1912 | { |
2047 | extern int tcp_v4_destroy_sock(struct sock *sk); | ||
2048 | |||
2049 | tcp_v4_destroy_sock(sk); | 1913 | tcp_v4_destroy_sock(sk); |
2050 | return inet6_destroy_sock(sk); | 1914 | return inet6_destroy_sock(sk); |
2051 | } | 1915 | } |
@@ -2091,18 +1955,20 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) | |||
2091 | unsigned long timer_expires; | 1955 | unsigned long timer_expires; |
2092 | struct inet_sock *inet = inet_sk(sp); | 1956 | struct inet_sock *inet = inet_sk(sp); |
2093 | struct tcp_sock *tp = tcp_sk(sp); | 1957 | struct tcp_sock *tp = tcp_sk(sp); |
1958 | const struct inet_connection_sock *icsk = inet_csk(sp); | ||
2094 | struct ipv6_pinfo *np = inet6_sk(sp); | 1959 | struct ipv6_pinfo *np = inet6_sk(sp); |
2095 | 1960 | ||
2096 | dest = &np->daddr; | 1961 | dest = &np->daddr; |
2097 | src = &np->rcv_saddr; | 1962 | src = &np->rcv_saddr; |
2098 | destp = ntohs(inet->dport); | 1963 | destp = ntohs(inet->dport); |
2099 | srcp = ntohs(inet->sport); | 1964 | srcp = ntohs(inet->sport); |
2100 | if (tp->pending == TCP_TIME_RETRANS) { | 1965 | |
1966 | if (icsk->icsk_pending == ICSK_TIME_RETRANS) { | ||
2101 | timer_active = 1; | 1967 | timer_active = 1; |
2102 | timer_expires = tp->timeout; | 1968 | timer_expires = icsk->icsk_timeout; |
2103 | } else if (tp->pending == TCP_TIME_PROBE0) { | 1969 | } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { |
2104 | timer_active = 4; | 1970 | timer_active = 4; |
2105 | timer_expires = tp->timeout; | 1971 | timer_expires = icsk->icsk_timeout; |
2106 | } else if (timer_pending(&sp->sk_timer)) { | 1972 | } else if (timer_pending(&sp->sk_timer)) { |
2107 | timer_active = 2; | 1973 | timer_active = 2; |
2108 | timer_expires = sp->sk_timer.expires; | 1974 | timer_expires = sp->sk_timer.expires; |
@@ -2123,28 +1989,31 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) | |||
2123 | tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq, | 1989 | tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq, |
2124 | timer_active, | 1990 | timer_active, |
2125 | jiffies_to_clock_t(timer_expires - jiffies), | 1991 | jiffies_to_clock_t(timer_expires - jiffies), |
2126 | tp->retransmits, | 1992 | icsk->icsk_retransmits, |
2127 | sock_i_uid(sp), | 1993 | sock_i_uid(sp), |
2128 | tp->probes_out, | 1994 | icsk->icsk_probes_out, |
2129 | sock_i_ino(sp), | 1995 | sock_i_ino(sp), |
2130 | atomic_read(&sp->sk_refcnt), sp, | 1996 | atomic_read(&sp->sk_refcnt), sp, |
2131 | tp->rto, tp->ack.ato, (tp->ack.quick<<1)|tp->ack.pingpong, | 1997 | icsk->icsk_rto, |
1998 | icsk->icsk_ack.ato, | ||
1999 | (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong, | ||
2132 | tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh | 2000 | tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh |
2133 | ); | 2001 | ); |
2134 | } | 2002 | } |
2135 | 2003 | ||
2136 | static void get_timewait6_sock(struct seq_file *seq, | 2004 | static void get_timewait6_sock(struct seq_file *seq, |
2137 | struct tcp_tw_bucket *tw, int i) | 2005 | struct inet_timewait_sock *tw, int i) |
2138 | { | 2006 | { |
2139 | struct in6_addr *dest, *src; | 2007 | struct in6_addr *dest, *src; |
2140 | __u16 destp, srcp; | 2008 | __u16 destp, srcp; |
2009 | struct tcp6_timewait_sock *tcp6tw = tcp6_twsk((struct sock *)tw); | ||
2141 | int ttd = tw->tw_ttd - jiffies; | 2010 | int ttd = tw->tw_ttd - jiffies; |
2142 | 2011 | ||
2143 | if (ttd < 0) | 2012 | if (ttd < 0) |
2144 | ttd = 0; | 2013 | ttd = 0; |
2145 | 2014 | ||
2146 | dest = &tw->tw_v6_daddr; | 2015 | dest = &tcp6tw->tw_v6_daddr; |
2147 | src = &tw->tw_v6_rcv_saddr; | 2016 | src = &tcp6tw->tw_v6_rcv_saddr; |
2148 | destp = ntohs(tw->tw_dport); | 2017 | destp = ntohs(tw->tw_dport); |
2149 | srcp = ntohs(tw->tw_sport); | 2018 | srcp = ntohs(tw->tw_sport); |
2150 | 2019 | ||
@@ -2219,7 +2088,7 @@ struct proto tcpv6_prot = { | |||
2219 | .close = tcp_close, | 2088 | .close = tcp_close, |
2220 | .connect = tcp_v6_connect, | 2089 | .connect = tcp_v6_connect, |
2221 | .disconnect = tcp_disconnect, | 2090 | .disconnect = tcp_disconnect, |
2222 | .accept = tcp_accept, | 2091 | .accept = inet_csk_accept, |
2223 | .ioctl = tcp_ioctl, | 2092 | .ioctl = tcp_ioctl, |
2224 | .init = tcp_v6_init_sock, | 2093 | .init = tcp_v6_init_sock, |
2225 | .destroy = tcp_v6_destroy_sock, | 2094 | .destroy = tcp_v6_destroy_sock, |
@@ -2236,11 +2105,13 @@ struct proto tcpv6_prot = { | |||
2236 | .sockets_allocated = &tcp_sockets_allocated, | 2105 | .sockets_allocated = &tcp_sockets_allocated, |
2237 | .memory_allocated = &tcp_memory_allocated, | 2106 | .memory_allocated = &tcp_memory_allocated, |
2238 | .memory_pressure = &tcp_memory_pressure, | 2107 | .memory_pressure = &tcp_memory_pressure, |
2108 | .orphan_count = &tcp_orphan_count, | ||
2239 | .sysctl_mem = sysctl_tcp_mem, | 2109 | .sysctl_mem = sysctl_tcp_mem, |
2240 | .sysctl_wmem = sysctl_tcp_wmem, | 2110 | .sysctl_wmem = sysctl_tcp_wmem, |
2241 | .sysctl_rmem = sysctl_tcp_rmem, | 2111 | .sysctl_rmem = sysctl_tcp_rmem, |
2242 | .max_header = MAX_TCP_HEADER, | 2112 | .max_header = MAX_TCP_HEADER, |
2243 | .obj_size = sizeof(struct tcp6_sock), | 2113 | .obj_size = sizeof(struct tcp6_sock), |
2114 | .twsk_obj_size = sizeof(struct tcp6_timewait_sock), | ||
2244 | .rsk_prot = &tcp6_request_sock_ops, | 2115 | .rsk_prot = &tcp6_request_sock_ops, |
2245 | }; | 2116 | }; |
2246 | 2117 | ||
@@ -2250,8 +2121,6 @@ static struct inet6_protocol tcpv6_protocol = { | |||
2250 | .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, | 2121 | .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, |
2251 | }; | 2122 | }; |
2252 | 2123 | ||
2253 | extern struct proto_ops inet6_stream_ops; | ||
2254 | |||
2255 | static struct inet_protosw tcpv6_protosw = { | 2124 | static struct inet_protosw tcpv6_protosw = { |
2256 | .type = SOCK_STREAM, | 2125 | .type = SOCK_STREAM, |
2257 | .protocol = IPPROTO_TCP, | 2126 | .protocol = IPPROTO_TCP, |
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index eff050ac7049..390d750449ce 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c | |||
@@ -51,6 +51,7 @@ | |||
51 | #include <net/udp.h> | 51 | #include <net/udp.h> |
52 | #include <net/raw.h> | 52 | #include <net/raw.h> |
53 | #include <net/inet_common.h> | 53 | #include <net/inet_common.h> |
54 | #include <net/tcp_states.h> | ||
54 | 55 | ||
55 | #include <net/ip6_checksum.h> | 56 | #include <net/ip6_checksum.h> |
56 | #include <net/xfrm.h> | 57 | #include <net/xfrm.h> |
@@ -58,7 +59,7 @@ | |||
58 | #include <linux/proc_fs.h> | 59 | #include <linux/proc_fs.h> |
59 | #include <linux/seq_file.h> | 60 | #include <linux/seq_file.h> |
60 | 61 | ||
61 | DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6); | 62 | DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6) __read_mostly; |
62 | 63 | ||
63 | /* Grrr, addr_type already calculated by caller, but I don't want | 64 | /* Grrr, addr_type already calculated by caller, but I don't want |
64 | * to add some silly "cookie" argument to this method just for that. | 65 | * to add some silly "cookie" argument to this method just for that. |
@@ -477,8 +478,7 @@ static int udpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) | |||
477 | /* RFC 2460 section 8.1 says that we SHOULD log | 478 | /* RFC 2460 section 8.1 says that we SHOULD log |
478 | this error. Well, it is reasonable. | 479 | this error. Well, it is reasonable. |
479 | */ | 480 | */ |
480 | LIMIT_NETDEBUG( | 481 | LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0\n"); |
481 | printk(KERN_INFO "IPv6: udp checksum is 0\n")); | ||
482 | goto discard; | 482 | goto discard; |
483 | } | 483 | } |
484 | 484 | ||
@@ -493,7 +493,7 @@ static int udpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) | |||
493 | if (skb->ip_summed==CHECKSUM_HW) { | 493 | if (skb->ip_summed==CHECKSUM_HW) { |
494 | skb->ip_summed = CHECKSUM_UNNECESSARY; | 494 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
495 | if (csum_ipv6_magic(saddr, daddr, ulen, IPPROTO_UDP, skb->csum)) { | 495 | if (csum_ipv6_magic(saddr, daddr, ulen, IPPROTO_UDP, skb->csum)) { |
496 | LIMIT_NETDEBUG(printk(KERN_DEBUG "udp v6 hw csum failure.\n")); | 496 | LIMIT_NETDEBUG(KERN_DEBUG "udp v6 hw csum failure.\n"); |
497 | skb->ip_summed = CHECKSUM_NONE; | 497 | skb->ip_summed = CHECKSUM_NONE; |
498 | } | 498 | } |
499 | } | 499 | } |
@@ -825,7 +825,7 @@ back_from_confirm: | |||
825 | /* ... which is an evident application bug. --ANK */ | 825 | /* ... which is an evident application bug. --ANK */ |
826 | release_sock(sk); | 826 | release_sock(sk); |
827 | 827 | ||
828 | LIMIT_NETDEBUG(printk(KERN_DEBUG "udp cork app bug 2\n")); | 828 | LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 2\n"); |
829 | err = -EINVAL; | 829 | err = -EINVAL; |
830 | goto out; | 830 | goto out; |
831 | } | 831 | } |
@@ -1054,8 +1054,6 @@ struct proto udpv6_prot = { | |||
1054 | .obj_size = sizeof(struct udp6_sock), | 1054 | .obj_size = sizeof(struct udp6_sock), |
1055 | }; | 1055 | }; |
1056 | 1056 | ||
1057 | extern struct proto_ops inet6_dgram_ops; | ||
1058 | |||
1059 | static struct inet_protosw udpv6_protosw = { | 1057 | static struct inet_protosw udpv6_protosw = { |
1060 | .type = SOCK_DGRAM, | 1058 | .type = SOCK_DGRAM, |
1061 | .protocol = IPPROTO_UDP, | 1059 | .protocol = IPPROTO_UDP, |
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index 60c26c87277e..fbef7826a74f 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c | |||
@@ -79,7 +79,7 @@ static u32 xfrm6_tunnel_spi; | |||
79 | #define XFRM6_TUNNEL_SPI_MIN 1 | 79 | #define XFRM6_TUNNEL_SPI_MIN 1 |
80 | #define XFRM6_TUNNEL_SPI_MAX 0xffffffff | 80 | #define XFRM6_TUNNEL_SPI_MAX 0xffffffff |
81 | 81 | ||
82 | static kmem_cache_t *xfrm6_tunnel_spi_kmem; | 82 | static kmem_cache_t *xfrm6_tunnel_spi_kmem __read_mostly; |
83 | 83 | ||
84 | #define XFRM6_TUNNEL_SPI_BYADDR_HSIZE 256 | 84 | #define XFRM6_TUNNEL_SPI_BYADDR_HSIZE 256 |
85 | #define XFRM6_TUNNEL_SPI_BYSPI_HSIZE 256 | 85 | #define XFRM6_TUNNEL_SPI_BYSPI_HSIZE 256 |
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 5a27e5df5886..34b3bb868409 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c | |||
@@ -44,7 +44,6 @@ | |||
44 | #include <linux/socket.h> | 44 | #include <linux/socket.h> |
45 | #include <linux/sockios.h> | 45 | #include <linux/sockios.h> |
46 | #include <linux/string.h> | 46 | #include <linux/string.h> |
47 | #include <linux/tcp.h> | ||
48 | #include <linux/types.h> | 47 | #include <linux/types.h> |
49 | #include <linux/termios.h> | 48 | #include <linux/termios.h> |
50 | 49 | ||
@@ -52,6 +51,7 @@ | |||
52 | #include <net/p8022.h> | 51 | #include <net/p8022.h> |
53 | #include <net/psnap.h> | 52 | #include <net/psnap.h> |
54 | #include <net/sock.h> | 53 | #include <net/sock.h> |
54 | #include <net/tcp_states.h> | ||
55 | 55 | ||
56 | #include <asm/uaccess.h> | 56 | #include <asm/uaccess.h> |
57 | 57 | ||
@@ -1627,7 +1627,7 @@ out: | |||
1627 | return rc; | 1627 | return rc; |
1628 | } | 1628 | } |
1629 | 1629 | ||
1630 | static int ipx_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) | 1630 | static int ipx_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) |
1631 | { | 1631 | { |
1632 | /* NULL here for pt means the packet was looped back */ | 1632 | /* NULL here for pt means the packet was looped back */ |
1633 | struct ipx_interface *intrfc; | 1633 | struct ipx_interface *intrfc; |
@@ -1796,8 +1796,8 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock, | |||
1796 | copied); | 1796 | copied); |
1797 | if (rc) | 1797 | if (rc) |
1798 | goto out_free; | 1798 | goto out_free; |
1799 | if (skb->stamp.tv_sec) | 1799 | if (skb->tstamp.off_sec) |
1800 | sk->sk_stamp = skb->stamp; | 1800 | skb_get_timestamp(skb, &sk->sk_stamp); |
1801 | 1801 | ||
1802 | msg->msg_namelen = sizeof(*sipx); | 1802 | msg->msg_namelen = sizeof(*sipx); |
1803 | 1803 | ||
@@ -1940,9 +1940,7 @@ static struct notifier_block ipx_dev_notifier = { | |||
1940 | }; | 1940 | }; |
1941 | 1941 | ||
1942 | extern struct datalink_proto *make_EII_client(void); | 1942 | extern struct datalink_proto *make_EII_client(void); |
1943 | extern struct datalink_proto *make_8023_client(void); | ||
1944 | extern void destroy_EII_client(struct datalink_proto *); | 1943 | extern void destroy_EII_client(struct datalink_proto *); |
1945 | extern void destroy_8023_client(struct datalink_proto *); | ||
1946 | 1944 | ||
1947 | static unsigned char ipx_8022_type = 0xE0; | 1945 | static unsigned char ipx_8022_type = 0xE0; |
1948 | static unsigned char ipx_snap_id[5] = { 0x0, 0x0, 0x0, 0x81, 0x37 }; | 1946 | static unsigned char ipx_snap_id[5] = { 0x0, 0x0, 0x0, 0x81, 0x37 }; |
diff --git a/net/ipx/ipx_proc.c b/net/ipx/ipx_proc.c index b6761913445a..1f73d9ea434d 100644 --- a/net/ipx/ipx_proc.c +++ b/net/ipx/ipx_proc.c | |||
@@ -10,7 +10,7 @@ | |||
10 | #include <linux/proc_fs.h> | 10 | #include <linux/proc_fs.h> |
11 | #include <linux/spinlock.h> | 11 | #include <linux/spinlock.h> |
12 | #include <linux/seq_file.h> | 12 | #include <linux/seq_file.h> |
13 | #include <linux/tcp.h> | 13 | #include <net/tcp_states.h> |
14 | #include <net/ipx.h> | 14 | #include <net/ipx.h> |
15 | 15 | ||
16 | static __inline__ struct ipx_interface *ipx_get_interface_idx(loff_t pos) | 16 | static __inline__ struct ipx_interface *ipx_get_interface_idx(loff_t pos) |
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index 92c6e8d4e731..6f92f9c62990 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c | |||
@@ -56,7 +56,7 @@ | |||
56 | #include <asm/uaccess.h> | 56 | #include <asm/uaccess.h> |
57 | 57 | ||
58 | #include <net/sock.h> | 58 | #include <net/sock.h> |
59 | #include <net/tcp.h> | 59 | #include <net/tcp_states.h> |
60 | 60 | ||
61 | #include <net/irda/af_irda.h> | 61 | #include <net/irda/af_irda.h> |
62 | 62 | ||
diff --git a/net/irda/irlap_frame.c b/net/irda/irlap_frame.c index 6dafbb43b529..3e9a06abbdd0 100644 --- a/net/irda/irlap_frame.c +++ b/net/irda/irlap_frame.c | |||
@@ -988,9 +988,6 @@ void irlap_resend_rejected_frames(struct irlap_cb *self, int command) | |||
988 | IRDA_DEBUG(0, "%s(), unable to copy\n", __FUNCTION__); | 988 | IRDA_DEBUG(0, "%s(), unable to copy\n", __FUNCTION__); |
989 | return; | 989 | return; |
990 | } | 990 | } |
991 | /* Unlink tx_skb from list */ | ||
992 | tx_skb->next = tx_skb->prev = NULL; | ||
993 | tx_skb->list = NULL; | ||
994 | 991 | ||
995 | /* Clear old Nr field + poll bit */ | 992 | /* Clear old Nr field + poll bit */ |
996 | tx_skb->data[1] &= 0x0f; | 993 | tx_skb->data[1] &= 0x0f; |
@@ -1063,9 +1060,6 @@ void irlap_resend_rejected_frame(struct irlap_cb *self, int command) | |||
1063 | IRDA_DEBUG(0, "%s(), unable to copy\n", __FUNCTION__); | 1060 | IRDA_DEBUG(0, "%s(), unable to copy\n", __FUNCTION__); |
1064 | return; | 1061 | return; |
1065 | } | 1062 | } |
1066 | /* Unlink tx_skb from list */ | ||
1067 | tx_skb->next = tx_skb->prev = NULL; | ||
1068 | tx_skb->list = NULL; | ||
1069 | 1063 | ||
1070 | /* Clear old Nr field + poll bit */ | 1064 | /* Clear old Nr field + poll bit */ |
1071 | tx_skb->data[1] &= 0x0f; | 1065 | tx_skb->data[1] &= 0x0f; |
@@ -1309,7 +1303,7 @@ static void irlap_recv_test_frame(struct irlap_cb *self, struct sk_buff *skb, | |||
1309 | * Jean II | 1303 | * Jean II |
1310 | */ | 1304 | */ |
1311 | int irlap_driver_rcv(struct sk_buff *skb, struct net_device *dev, | 1305 | int irlap_driver_rcv(struct sk_buff *skb, struct net_device *dev, |
1312 | struct packet_type *ptype) | 1306 | struct packet_type *ptype, struct net_device *orig_dev) |
1313 | { | 1307 | { |
1314 | struct irlap_info info; | 1308 | struct irlap_info info; |
1315 | struct irlap_cb *self; | 1309 | struct irlap_cb *self; |
diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c index 7a4a4d7fbe66..c19e9ce05a3a 100644 --- a/net/irda/irlmp.c +++ b/net/irda/irlmp.c | |||
@@ -53,7 +53,6 @@ struct irlmp_cb *irlmp = NULL; | |||
53 | /* These can be altered by the sysctl interface */ | 53 | /* These can be altered by the sysctl interface */ |
54 | int sysctl_discovery = 0; | 54 | int sysctl_discovery = 0; |
55 | int sysctl_discovery_timeout = 3; /* 3 seconds by default */ | 55 | int sysctl_discovery_timeout = 3; /* 3 seconds by default */ |
56 | EXPORT_SYMBOL(sysctl_discovery_timeout); | ||
57 | int sysctl_discovery_slots = 6; /* 6 slots by default */ | 56 | int sysctl_discovery_slots = 6; /* 6 slots by default */ |
58 | int sysctl_lap_keepalive_time = LM_IDLE_TIMEOUT * 1000 / HZ; | 57 | int sysctl_lap_keepalive_time = LM_IDLE_TIMEOUT * 1000 / HZ; |
59 | char sysctl_devname[65]; | 58 | char sysctl_devname[65]; |
@@ -67,7 +66,6 @@ const char *irlmp_reasons[] = { | |||
67 | "LM_INIT_DISCONNECT", | 66 | "LM_INIT_DISCONNECT", |
68 | "ERROR, NOT USED", | 67 | "ERROR, NOT USED", |
69 | }; | 68 | }; |
70 | EXPORT_SYMBOL(irlmp_reasons); | ||
71 | 69 | ||
72 | /* | 70 | /* |
73 | * Function irlmp_init (void) | 71 | * Function irlmp_init (void) |
@@ -675,7 +673,6 @@ struct lsap_cb *irlmp_dup(struct lsap_cb *orig, void *instance) | |||
675 | 673 | ||
676 | return new; | 674 | return new; |
677 | } | 675 | } |
678 | EXPORT_SYMBOL(irlmp_dup); | ||
679 | 676 | ||
680 | /* | 677 | /* |
681 | * Function irlmp_disconnect_request (handle, userdata) | 678 | * Function irlmp_disconnect_request (handle, userdata) |
diff --git a/net/irda/irmod.c b/net/irda/irmod.c index 6ffaed4544e9..634901dd156f 100644 --- a/net/irda/irmod.c +++ b/net/irda/irmod.c | |||
@@ -54,7 +54,7 @@ extern int irsock_init(void); | |||
54 | extern void irsock_cleanup(void); | 54 | extern void irsock_cleanup(void); |
55 | /* irlap_frame.c */ | 55 | /* irlap_frame.c */ |
56 | extern int irlap_driver_rcv(struct sk_buff *, struct net_device *, | 56 | extern int irlap_driver_rcv(struct sk_buff *, struct net_device *, |
57 | struct packet_type *); | 57 | struct packet_type *, struct net_device *); |
58 | 58 | ||
59 | /* | 59 | /* |
60 | * Module parameters | 60 | * Module parameters |
diff --git a/net/irda/irnet/irnet.h b/net/irda/irnet/irnet.h index 9004f7349a76..b391cb3893d4 100644 --- a/net/irda/irnet/irnet.h +++ b/net/irda/irnet/irnet.h | |||
@@ -517,9 +517,6 @@ extern int | |||
517 | irda_irnet_init(void); /* Initialise IrDA part of IrNET */ | 517 | irda_irnet_init(void); /* Initialise IrDA part of IrNET */ |
518 | extern void | 518 | extern void |
519 | irda_irnet_cleanup(void); /* Teardown IrDA part of IrNET */ | 519 | irda_irnet_cleanup(void); /* Teardown IrDA part of IrNET */ |
520 | /* ---------------------------- MODULE ---------------------------- */ | ||
521 | extern int | ||
522 | irnet_init(void); /* Initialise IrNET module */ | ||
523 | 520 | ||
524 | /**************************** VARIABLES ****************************/ | 521 | /**************************** VARIABLES ****************************/ |
525 | 522 | ||
diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c index f8f984bb9922..e53bf9e0053e 100644 --- a/net/irda/irnet/irnet_ppp.c +++ b/net/irda/irnet/irnet_ppp.c | |||
@@ -1107,7 +1107,7 @@ ppp_irnet_cleanup(void) | |||
1107 | /* | 1107 | /* |
1108 | * Module main entry point | 1108 | * Module main entry point |
1109 | */ | 1109 | */ |
1110 | int __init | 1110 | static int __init |
1111 | irnet_init(void) | 1111 | irnet_init(void) |
1112 | { | 1112 | { |
1113 | int err; | 1113 | int err; |
diff --git a/net/irda/irqueue.c b/net/irda/irqueue.c index b0dd3ea35999..1ba8c7106639 100644 --- a/net/irda/irqueue.c +++ b/net/irda/irqueue.c | |||
@@ -822,7 +822,6 @@ void* hashbin_find_next( hashbin_t* hashbin, long hashv, const char* name, | |||
822 | 822 | ||
823 | return entry; | 823 | return entry; |
824 | } | 824 | } |
825 | EXPORT_SYMBOL(hashbin_find_next); | ||
826 | 825 | ||
827 | /* | 826 | /* |
828 | * Function hashbin_get_first (hashbin) | 827 | * Function hashbin_get_first (hashbin) |
diff --git a/net/lapb/lapb_subr.c b/net/lapb/lapb_subr.c index 5de05a0bc0ff..8b5eefd70f03 100644 --- a/net/lapb/lapb_subr.c +++ b/net/lapb/lapb_subr.c | |||
@@ -78,7 +78,7 @@ void lapb_requeue_frames(struct lapb_cb *lapb) | |||
78 | if (!skb_prev) | 78 | if (!skb_prev) |
79 | skb_queue_head(&lapb->write_queue, skb); | 79 | skb_queue_head(&lapb->write_queue, skb); |
80 | else | 80 | else |
81 | skb_append(skb_prev, skb); | 81 | skb_append(skb_prev, skb, &lapb->write_queue); |
82 | skb_prev = skb; | 82 | skb_prev = skb; |
83 | } | 83 | } |
84 | } | 84 | } |
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 20b4cfebd74c..66f55e514b56 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c | |||
@@ -23,13 +23,13 @@ | |||
23 | #include <linux/config.h> | 23 | #include <linux/config.h> |
24 | #include <linux/kernel.h> | 24 | #include <linux/kernel.h> |
25 | #include <linux/module.h> | 25 | #include <linux/module.h> |
26 | #include <linux/tcp.h> | ||
27 | #include <linux/rtnetlink.h> | 26 | #include <linux/rtnetlink.h> |
28 | #include <linux/init.h> | 27 | #include <linux/init.h> |
29 | #include <net/llc.h> | 28 | #include <net/llc.h> |
30 | #include <net/llc_sap.h> | 29 | #include <net/llc_sap.h> |
31 | #include <net/llc_pdu.h> | 30 | #include <net/llc_pdu.h> |
32 | #include <net/llc_conn.h> | 31 | #include <net/llc_conn.h> |
32 | #include <net/tcp_states.h> | ||
33 | 33 | ||
34 | /* remember: uninitialized global data is zeroed because its in .bss */ | 34 | /* remember: uninitialized global data is zeroed because its in .bss */ |
35 | static u16 llc_ui_sap_last_autoport = LLC_SAP_DYN_START; | 35 | static u16 llc_ui_sap_last_autoport = LLC_SAP_DYN_START; |
@@ -714,7 +714,7 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock, | |||
714 | if (uaddr) | 714 | if (uaddr) |
715 | memcpy(uaddr, llc_ui_skb_cb(skb), sizeof(*uaddr)); | 715 | memcpy(uaddr, llc_ui_skb_cb(skb), sizeof(*uaddr)); |
716 | msg->msg_namelen = sizeof(*uaddr); | 716 | msg->msg_namelen = sizeof(*uaddr); |
717 | if (!skb->list) { | 717 | if (!skb->next) { |
718 | dgram_free: | 718 | dgram_free: |
719 | kfree_skb(skb); | 719 | kfree_skb(skb); |
720 | } | 720 | } |
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index eba812a9c69c..4c644bc70eae 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c | |||
@@ -16,7 +16,7 @@ | |||
16 | #include <net/llc_sap.h> | 16 | #include <net/llc_sap.h> |
17 | #include <net/llc_conn.h> | 17 | #include <net/llc_conn.h> |
18 | #include <net/sock.h> | 18 | #include <net/sock.h> |
19 | #include <linux/tcp.h> | 19 | #include <net/tcp_states.h> |
20 | #include <net/llc_c_ev.h> | 20 | #include <net/llc_c_ev.h> |
21 | #include <net/llc_c_ac.h> | 21 | #include <net/llc_c_ac.h> |
22 | #include <net/llc_c_st.h> | 22 | #include <net/llc_c_st.h> |
@@ -71,7 +71,11 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb) | |||
71 | 71 | ||
72 | if (!ev->ind_prim && !ev->cfm_prim) { | 72 | if (!ev->ind_prim && !ev->cfm_prim) { |
73 | /* indicate or confirm not required */ | 73 | /* indicate or confirm not required */ |
74 | if (!skb->list) | 74 | /* XXX this is not very pretty, perhaps we should store |
75 | * XXX indicate/confirm-needed state in the llc_conn_state_ev | ||
76 | * XXX control block of the SKB instead? -DaveM | ||
77 | */ | ||
78 | if (!skb->next) | ||
75 | goto out_kfree_skb; | 79 | goto out_kfree_skb; |
76 | goto out_skb_put; | 80 | goto out_skb_put; |
77 | } | 81 | } |
diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c index 5ff02c080a0b..9727455bf0e7 100644 --- a/net/llc/llc_core.c +++ b/net/llc/llc_core.c | |||
@@ -103,7 +103,8 @@ out: | |||
103 | struct llc_sap *llc_sap_open(unsigned char lsap, | 103 | struct llc_sap *llc_sap_open(unsigned char lsap, |
104 | int (*func)(struct sk_buff *skb, | 104 | int (*func)(struct sk_buff *skb, |
105 | struct net_device *dev, | 105 | struct net_device *dev, |
106 | struct packet_type *pt)) | 106 | struct packet_type *pt, |
107 | struct net_device *orig_dev)) | ||
107 | { | 108 | { |
108 | struct llc_sap *sap = llc_sap_find(lsap); | 109 | struct llc_sap *sap = llc_sap_find(lsap); |
109 | 110 | ||
diff --git a/net/llc/llc_if.c b/net/llc/llc_if.c index 0f9fc48aeaf9..0f84f66018e4 100644 --- a/net/llc/llc_if.c +++ b/net/llc/llc_if.c | |||
@@ -15,7 +15,6 @@ | |||
15 | #include <linux/module.h> | 15 | #include <linux/module.h> |
16 | #include <linux/kernel.h> | 16 | #include <linux/kernel.h> |
17 | #include <linux/netdevice.h> | 17 | #include <linux/netdevice.h> |
18 | #include <linux/tcp.h> | ||
19 | #include <asm/errno.h> | 18 | #include <asm/errno.h> |
20 | #include <net/llc_if.h> | 19 | #include <net/llc_if.h> |
21 | #include <net/llc_sap.h> | 20 | #include <net/llc_sap.h> |
@@ -25,6 +24,7 @@ | |||
25 | #include <net/llc_c_ev.h> | 24 | #include <net/llc_c_ev.h> |
26 | #include <net/llc_c_ac.h> | 25 | #include <net/llc_c_ac.h> |
27 | #include <net/llc_c_st.h> | 26 | #include <net/llc_c_st.h> |
27 | #include <net/tcp_states.h> | ||
28 | 28 | ||
29 | u8 llc_mac_null_var[IFHWADDRLEN]; | 29 | u8 llc_mac_null_var[IFHWADDRLEN]; |
30 | 30 | ||
diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c index 4da6976efc9c..13b46240b7a1 100644 --- a/net/llc/llc_input.c +++ b/net/llc/llc_input.c | |||
@@ -132,7 +132,7 @@ static inline int llc_fixup_skb(struct sk_buff *skb) | |||
132 | * data now), it queues this frame in the connection's backlog. | 132 | * data now), it queues this frame in the connection's backlog. |
133 | */ | 133 | */ |
134 | int llc_rcv(struct sk_buff *skb, struct net_device *dev, | 134 | int llc_rcv(struct sk_buff *skb, struct net_device *dev, |
135 | struct packet_type *pt) | 135 | struct packet_type *pt, struct net_device *orig_dev) |
136 | { | 136 | { |
137 | struct llc_sap *sap; | 137 | struct llc_sap *sap; |
138 | struct llc_pdu_sn *pdu; | 138 | struct llc_pdu_sn *pdu; |
@@ -165,7 +165,7 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev, | |||
165 | * LLC functionality | 165 | * LLC functionality |
166 | */ | 166 | */ |
167 | if (sap->rcv_func) { | 167 | if (sap->rcv_func) { |
168 | sap->rcv_func(skb, dev, pt); | 168 | sap->rcv_func(skb, dev, pt, orig_dev); |
169 | goto out; | 169 | goto out; |
170 | } | 170 | } |
171 | dest = llc_pdu_type(skb); | 171 | dest = llc_pdu_type(skb); |
diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c index 965c94eb4bbc..34228ef14985 100644 --- a/net/llc/llc_sap.c +++ b/net/llc/llc_sap.c | |||
@@ -21,7 +21,7 @@ | |||
21 | #include <net/llc_s_ev.h> | 21 | #include <net/llc_s_ev.h> |
22 | #include <net/llc_s_st.h> | 22 | #include <net/llc_s_st.h> |
23 | #include <net/sock.h> | 23 | #include <net/sock.h> |
24 | #include <linux/tcp.h> | 24 | #include <net/tcp_states.h> |
25 | #include <linux/llc.h> | 25 | #include <linux/llc.h> |
26 | 26 | ||
27 | /** | 27 | /** |
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig new file mode 100644 index 000000000000..8296b38bf270 --- /dev/null +++ b/net/netfilter/Kconfig | |||
@@ -0,0 +1,24 @@ | |||
1 | config NETFILTER_NETLINK | ||
2 | tristate "Netfilter netlink interface" | ||
3 | help | ||
4 | If this option is enabled, the kernel will include support | ||
5 | for the new netfilter netlink interface. | ||
6 | |||
7 | config NETFILTER_NETLINK_QUEUE | ||
8 | tristate "Netfilter NFQUEUE over NFNETLINK interface" | ||
9 | depends on NETFILTER_NETLINK | ||
10 | help | ||
11 | If this option isenabled, the kernel will include support | ||
12 | for queueing packets via NFNETLINK. | ||
13 | |||
14 | config NETFILTER_NETLINK_LOG | ||
15 | tristate "Netfilter LOG over NFNETLINK interface" | ||
16 | depends on NETFILTER_NETLINK | ||
17 | help | ||
18 | If this option is enabled, the kernel will include support | ||
19 | for logging packets via NFNETLINK. | ||
20 | |||
21 | This obsoletes the existing ipt_ULOG and ebg_ulog mechanisms, | ||
22 | and is also scheduled to replace the old syslog-based ipt_LOG | ||
23 | and ip6t_LOG modules. | ||
24 | |||
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile new file mode 100644 index 000000000000..b3b44f8b415a --- /dev/null +++ b/net/netfilter/Makefile | |||
@@ -0,0 +1,7 @@ | |||
1 | netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o | ||
2 | |||
3 | obj-$(CONFIG_NETFILTER) = netfilter.o | ||
4 | |||
5 | obj-$(CONFIG_NETFILTER_NETLINK) += nfnetlink.o | ||
6 | obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += nfnetlink_queue.o | ||
7 | obj-$(CONFIG_NETFILTER_NETLINK_LOG) += nfnetlink_log.o | ||
diff --git a/net/netfilter/core.c b/net/netfilter/core.c new file mode 100644 index 000000000000..1ceb1a6c254b --- /dev/null +++ b/net/netfilter/core.c | |||
@@ -0,0 +1,216 @@ | |||
1 | /* netfilter.c: look after the filters for various protocols. | ||
2 | * Heavily influenced by the old firewall.c by David Bonn and Alan Cox. | ||
3 | * | ||
4 | * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any | ||
5 | * way. | ||
6 | * | ||
7 | * Rusty Russell (C)2000 -- This code is GPL. | ||
8 | * | ||
9 | * February 2000: Modified by James Morris to have 1 queue per protocol. | ||
10 | * 15-Mar-2000: Added NF_REPEAT --RR. | ||
11 | * 08-May-2003: Internal logging interface added by Jozsef Kadlecsik. | ||
12 | */ | ||
13 | #include <linux/config.h> | ||
14 | #include <linux/kernel.h> | ||
15 | #include <linux/netfilter.h> | ||
16 | #include <net/protocol.h> | ||
17 | #include <linux/init.h> | ||
18 | #include <linux/skbuff.h> | ||
19 | #include <linux/wait.h> | ||
20 | #include <linux/module.h> | ||
21 | #include <linux/interrupt.h> | ||
22 | #include <linux/if.h> | ||
23 | #include <linux/netdevice.h> | ||
24 | #include <linux/inetdevice.h> | ||
25 | #include <linux/proc_fs.h> | ||
26 | #include <net/sock.h> | ||
27 | |||
28 | #include "nf_internals.h" | ||
29 | |||
30 | /* In this code, we can be waiting indefinitely for userspace to | ||
31 | * service a packet if a hook returns NF_QUEUE. We could keep a count | ||
32 | * of skbuffs queued for userspace, and not deregister a hook unless | ||
33 | * this is zero, but that sucks. Now, we simply check when the | ||
34 | * packets come back: if the hook is gone, the packet is discarded. */ | ||
35 | struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS]; | ||
36 | EXPORT_SYMBOL(nf_hooks); | ||
37 | static DEFINE_SPINLOCK(nf_hook_lock); | ||
38 | |||
39 | int nf_register_hook(struct nf_hook_ops *reg) | ||
40 | { | ||
41 | struct list_head *i; | ||
42 | |||
43 | spin_lock_bh(&nf_hook_lock); | ||
44 | list_for_each(i, &nf_hooks[reg->pf][reg->hooknum]) { | ||
45 | if (reg->priority < ((struct nf_hook_ops *)i)->priority) | ||
46 | break; | ||
47 | } | ||
48 | list_add_rcu(®->list, i->prev); | ||
49 | spin_unlock_bh(&nf_hook_lock); | ||
50 | |||
51 | synchronize_net(); | ||
52 | return 0; | ||
53 | } | ||
54 | EXPORT_SYMBOL(nf_register_hook); | ||
55 | |||
56 | void nf_unregister_hook(struct nf_hook_ops *reg) | ||
57 | { | ||
58 | spin_lock_bh(&nf_hook_lock); | ||
59 | list_del_rcu(®->list); | ||
60 | spin_unlock_bh(&nf_hook_lock); | ||
61 | |||
62 | synchronize_net(); | ||
63 | } | ||
64 | EXPORT_SYMBOL(nf_unregister_hook); | ||
65 | |||
66 | unsigned int nf_iterate(struct list_head *head, | ||
67 | struct sk_buff **skb, | ||
68 | int hook, | ||
69 | const struct net_device *indev, | ||
70 | const struct net_device *outdev, | ||
71 | struct list_head **i, | ||
72 | int (*okfn)(struct sk_buff *), | ||
73 | int hook_thresh) | ||
74 | { | ||
75 | unsigned int verdict; | ||
76 | |||
77 | /* | ||
78 | * The caller must not block between calls to this | ||
79 | * function because of risk of continuing from deleted element. | ||
80 | */ | ||
81 | list_for_each_continue_rcu(*i, head) { | ||
82 | struct nf_hook_ops *elem = (struct nf_hook_ops *)*i; | ||
83 | |||
84 | if (hook_thresh > elem->priority) | ||
85 | continue; | ||
86 | |||
87 | /* Optimization: we don't need to hold module | ||
88 | reference here, since function can't sleep. --RR */ | ||
89 | verdict = elem->hook(hook, skb, indev, outdev, okfn); | ||
90 | if (verdict != NF_ACCEPT) { | ||
91 | #ifdef CONFIG_NETFILTER_DEBUG | ||
92 | if (unlikely((verdict & NF_VERDICT_MASK) | ||
93 | > NF_MAX_VERDICT)) { | ||
94 | NFDEBUG("Evil return from %p(%u).\n", | ||
95 | elem->hook, hook); | ||
96 | continue; | ||
97 | } | ||
98 | #endif | ||
99 | if (verdict != NF_REPEAT) | ||
100 | return verdict; | ||
101 | *i = (*i)->prev; | ||
102 | } | ||
103 | } | ||
104 | return NF_ACCEPT; | ||
105 | } | ||
106 | |||
107 | |||
108 | /* Returns 1 if okfn() needs to be executed by the caller, | ||
109 | * -EPERM for NF_DROP, 0 otherwise. */ | ||
110 | int nf_hook_slow(int pf, unsigned int hook, struct sk_buff **pskb, | ||
111 | struct net_device *indev, | ||
112 | struct net_device *outdev, | ||
113 | int (*okfn)(struct sk_buff *), | ||
114 | int hook_thresh) | ||
115 | { | ||
116 | struct list_head *elem; | ||
117 | unsigned int verdict; | ||
118 | int ret = 0; | ||
119 | |||
120 | /* We may already have this, but read-locks nest anyway */ | ||
121 | rcu_read_lock(); | ||
122 | |||
123 | elem = &nf_hooks[pf][hook]; | ||
124 | next_hook: | ||
125 | verdict = nf_iterate(&nf_hooks[pf][hook], pskb, hook, indev, | ||
126 | outdev, &elem, okfn, hook_thresh); | ||
127 | if (verdict == NF_ACCEPT || verdict == NF_STOP) { | ||
128 | ret = 1; | ||
129 | goto unlock; | ||
130 | } else if (verdict == NF_DROP) { | ||
131 | kfree_skb(*pskb); | ||
132 | ret = -EPERM; | ||
133 | } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) { | ||
134 | NFDEBUG("nf_hook: Verdict = QUEUE.\n"); | ||
135 | if (!nf_queue(pskb, elem, pf, hook, indev, outdev, okfn, | ||
136 | verdict >> NF_VERDICT_BITS)) | ||
137 | goto next_hook; | ||
138 | } | ||
139 | unlock: | ||
140 | rcu_read_unlock(); | ||
141 | return ret; | ||
142 | } | ||
143 | EXPORT_SYMBOL(nf_hook_slow); | ||
144 | |||
145 | |||
146 | int skb_make_writable(struct sk_buff **pskb, unsigned int writable_len) | ||
147 | { | ||
148 | struct sk_buff *nskb; | ||
149 | |||
150 | if (writable_len > (*pskb)->len) | ||
151 | return 0; | ||
152 | |||
153 | /* Not exclusive use of packet? Must copy. */ | ||
154 | if (skb_shared(*pskb) || skb_cloned(*pskb)) | ||
155 | goto copy_skb; | ||
156 | |||
157 | return pskb_may_pull(*pskb, writable_len); | ||
158 | |||
159 | copy_skb: | ||
160 | nskb = skb_copy(*pskb, GFP_ATOMIC); | ||
161 | if (!nskb) | ||
162 | return 0; | ||
163 | BUG_ON(skb_is_nonlinear(nskb)); | ||
164 | |||
165 | /* Rest of kernel will get very unhappy if we pass it a | ||
166 | suddenly-orphaned skbuff */ | ||
167 | if ((*pskb)->sk) | ||
168 | skb_set_owner_w(nskb, (*pskb)->sk); | ||
169 | kfree_skb(*pskb); | ||
170 | *pskb = nskb; | ||
171 | return 1; | ||
172 | } | ||
173 | EXPORT_SYMBOL(skb_make_writable); | ||
174 | |||
175 | |||
176 | /* This does not belong here, but locally generated errors need it if connection | ||
177 | tracking in use: without this, connection may not be in hash table, and hence | ||
178 | manufactured ICMP or RST packets will not be associated with it. */ | ||
179 | void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *); | ||
180 | EXPORT_SYMBOL(ip_ct_attach); | ||
181 | |||
182 | void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) | ||
183 | { | ||
184 | void (*attach)(struct sk_buff *, struct sk_buff *); | ||
185 | |||
186 | if (skb->nfct && (attach = ip_ct_attach) != NULL) { | ||
187 | mb(); /* Just to be sure: must be read before executing this */ | ||
188 | attach(new, skb); | ||
189 | } | ||
190 | } | ||
191 | EXPORT_SYMBOL(nf_ct_attach); | ||
192 | |||
193 | #ifdef CONFIG_PROC_FS | ||
194 | struct proc_dir_entry *proc_net_netfilter; | ||
195 | EXPORT_SYMBOL(proc_net_netfilter); | ||
196 | #endif | ||
197 | |||
198 | void __init netfilter_init(void) | ||
199 | { | ||
200 | int i, h; | ||
201 | for (i = 0; i < NPROTO; i++) { | ||
202 | for (h = 0; h < NF_MAX_HOOKS; h++) | ||
203 | INIT_LIST_HEAD(&nf_hooks[i][h]); | ||
204 | } | ||
205 | |||
206 | #ifdef CONFIG_PROC_FS | ||
207 | proc_net_netfilter = proc_mkdir("netfilter", proc_net); | ||
208 | if (!proc_net_netfilter) | ||
209 | panic("cannot create netfilter proc entry"); | ||
210 | #endif | ||
211 | |||
212 | if (netfilter_queue_init() < 0) | ||
213 | panic("cannot initialize nf_queue"); | ||
214 | if (netfilter_log_init() < 0) | ||
215 | panic("cannot initialize nf_log"); | ||
216 | } | ||
diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h new file mode 100644 index 000000000000..6bdee2910617 --- /dev/null +++ b/net/netfilter/nf_internals.h | |||
@@ -0,0 +1,39 @@ | |||
1 | #ifndef _NF_INTERNALS_H | ||
2 | #define _NF_INTERNALS_H | ||
3 | |||
4 | #include <linux/config.h> | ||
5 | #include <linux/list.h> | ||
6 | #include <linux/skbuff.h> | ||
7 | #include <linux/netdevice.h> | ||
8 | |||
9 | #ifdef CONFIG_NETFILTER_DEBUG | ||
10 | #define NFDEBUG(format, args...) printk(format , ## args) | ||
11 | #else | ||
12 | #define NFDEBUG(format, args...) | ||
13 | #endif | ||
14 | |||
15 | |||
16 | /* core.c */ | ||
17 | extern unsigned int nf_iterate(struct list_head *head, | ||
18 | struct sk_buff **skb, | ||
19 | int hook, | ||
20 | const struct net_device *indev, | ||
21 | const struct net_device *outdev, | ||
22 | struct list_head **i, | ||
23 | int (*okfn)(struct sk_buff *), | ||
24 | int hook_thresh); | ||
25 | |||
26 | /* nf_queue.c */ | ||
27 | extern int nf_queue(struct sk_buff **skb, | ||
28 | struct list_head *elem, | ||
29 | int pf, unsigned int hook, | ||
30 | struct net_device *indev, | ||
31 | struct net_device *outdev, | ||
32 | int (*okfn)(struct sk_buff *), | ||
33 | unsigned int queuenum); | ||
34 | extern int __init netfilter_queue_init(void); | ||
35 | |||
36 | /* nf_log.c */ | ||
37 | extern int __init netfilter_log_init(void); | ||
38 | |||
39 | #endif | ||
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c new file mode 100644 index 000000000000..3e76bd0824a2 --- /dev/null +++ b/net/netfilter/nf_log.c | |||
@@ -0,0 +1,178 @@ | |||
1 | #include <linux/config.h> | ||
2 | #include <linux/kernel.h> | ||
3 | #include <linux/init.h> | ||
4 | #include <linux/module.h> | ||
5 | #include <linux/proc_fs.h> | ||
6 | #include <linux/skbuff.h> | ||
7 | #include <linux/netfilter.h> | ||
8 | #include <linux/seq_file.h> | ||
9 | #include <net/protocol.h> | ||
10 | |||
11 | #include "nf_internals.h" | ||
12 | |||
13 | /* Internal logging interface, which relies on the real | ||
14 | LOG target modules */ | ||
15 | |||
16 | #define NF_LOG_PREFIXLEN 128 | ||
17 | |||
18 | static struct nf_logger *nf_logging[NPROTO]; /* = NULL */ | ||
19 | static DEFINE_SPINLOCK(nf_log_lock); | ||
20 | |||
21 | /* return EBUSY if somebody else is registered, EEXIST if the same logger | ||
22 | * is registred, 0 on success. */ | ||
23 | int nf_log_register(int pf, struct nf_logger *logger) | ||
24 | { | ||
25 | int ret = -EBUSY; | ||
26 | |||
27 | if (pf >= NPROTO) | ||
28 | return -EINVAL; | ||
29 | |||
30 | /* Any setup of logging members must be done before | ||
31 | * substituting pointer. */ | ||
32 | spin_lock(&nf_log_lock); | ||
33 | if (!nf_logging[pf]) { | ||
34 | rcu_assign_pointer(nf_logging[pf], logger); | ||
35 | ret = 0; | ||
36 | } else if (nf_logging[pf] == logger) | ||
37 | ret = -EEXIST; | ||
38 | |||
39 | spin_unlock(&nf_log_lock); | ||
40 | return ret; | ||
41 | } | ||
42 | EXPORT_SYMBOL(nf_log_register); | ||
43 | |||
44 | int nf_log_unregister_pf(int pf) | ||
45 | { | ||
46 | if (pf >= NPROTO) | ||
47 | return -EINVAL; | ||
48 | |||
49 | spin_lock(&nf_log_lock); | ||
50 | nf_logging[pf] = NULL; | ||
51 | spin_unlock(&nf_log_lock); | ||
52 | |||
53 | /* Give time to concurrent readers. */ | ||
54 | synchronize_net(); | ||
55 | |||
56 | return 0; | ||
57 | } | ||
58 | EXPORT_SYMBOL(nf_log_unregister_pf); | ||
59 | |||
60 | void nf_log_unregister_logger(struct nf_logger *logger) | ||
61 | { | ||
62 | int i; | ||
63 | |||
64 | spin_lock(&nf_log_lock); | ||
65 | for (i = 0; i < NPROTO; i++) { | ||
66 | if (nf_logging[i] == logger) | ||
67 | nf_logging[i] = NULL; | ||
68 | } | ||
69 | spin_unlock(&nf_log_lock); | ||
70 | |||
71 | synchronize_net(); | ||
72 | } | ||
73 | EXPORT_SYMBOL(nf_log_unregister_logger); | ||
74 | |||
75 | void nf_log_packet(int pf, | ||
76 | unsigned int hooknum, | ||
77 | const struct sk_buff *skb, | ||
78 | const struct net_device *in, | ||
79 | const struct net_device *out, | ||
80 | struct nf_loginfo *loginfo, | ||
81 | const char *fmt, ...) | ||
82 | { | ||
83 | va_list args; | ||
84 | char prefix[NF_LOG_PREFIXLEN]; | ||
85 | struct nf_logger *logger; | ||
86 | |||
87 | rcu_read_lock(); | ||
88 | logger = rcu_dereference(nf_logging[pf]); | ||
89 | if (logger) { | ||
90 | va_start(args, fmt); | ||
91 | vsnprintf(prefix, sizeof(prefix), fmt, args); | ||
92 | va_end(args); | ||
93 | /* We must read logging before nf_logfn[pf] */ | ||
94 | logger->logfn(pf, hooknum, skb, in, out, loginfo, prefix); | ||
95 | } else if (net_ratelimit()) { | ||
96 | printk(KERN_WARNING "nf_log_packet: can\'t log since " | ||
97 | "no backend logging module loaded in! Please either " | ||
98 | "load one, or disable logging explicitly\n"); | ||
99 | } | ||
100 | rcu_read_unlock(); | ||
101 | } | ||
102 | EXPORT_SYMBOL(nf_log_packet); | ||
103 | |||
104 | #ifdef CONFIG_PROC_FS | ||
105 | static void *seq_start(struct seq_file *seq, loff_t *pos) | ||
106 | { | ||
107 | rcu_read_lock(); | ||
108 | |||
109 | if (*pos >= NPROTO) | ||
110 | return NULL; | ||
111 | |||
112 | return pos; | ||
113 | } | ||
114 | |||
115 | static void *seq_next(struct seq_file *s, void *v, loff_t *pos) | ||
116 | { | ||
117 | (*pos)++; | ||
118 | |||
119 | if (*pos >= NPROTO) | ||
120 | return NULL; | ||
121 | |||
122 | return pos; | ||
123 | } | ||
124 | |||
125 | static void seq_stop(struct seq_file *s, void *v) | ||
126 | { | ||
127 | rcu_read_unlock(); | ||
128 | } | ||
129 | |||
130 | static int seq_show(struct seq_file *s, void *v) | ||
131 | { | ||
132 | loff_t *pos = v; | ||
133 | const struct nf_logger *logger; | ||
134 | |||
135 | logger = rcu_dereference(nf_logging[*pos]); | ||
136 | |||
137 | if (!logger) | ||
138 | return seq_printf(s, "%2lld NONE\n", *pos); | ||
139 | |||
140 | return seq_printf(s, "%2lld %s\n", *pos, logger->name); | ||
141 | } | ||
142 | |||
143 | static struct seq_operations nflog_seq_ops = { | ||
144 | .start = seq_start, | ||
145 | .next = seq_next, | ||
146 | .stop = seq_stop, | ||
147 | .show = seq_show, | ||
148 | }; | ||
149 | |||
150 | static int nflog_open(struct inode *inode, struct file *file) | ||
151 | { | ||
152 | return seq_open(file, &nflog_seq_ops); | ||
153 | } | ||
154 | |||
155 | static struct file_operations nflog_file_ops = { | ||
156 | .owner = THIS_MODULE, | ||
157 | .open = nflog_open, | ||
158 | .read = seq_read, | ||
159 | .llseek = seq_lseek, | ||
160 | .release = seq_release, | ||
161 | }; | ||
162 | |||
163 | #endif /* PROC_FS */ | ||
164 | |||
165 | |||
166 | int __init netfilter_log_init(void) | ||
167 | { | ||
168 | #ifdef CONFIG_PROC_FS | ||
169 | struct proc_dir_entry *pde; | ||
170 | |||
171 | pde = create_proc_entry("nf_log", S_IRUGO, proc_net_netfilter); | ||
172 | if (!pde) | ||
173 | return -1; | ||
174 | |||
175 | pde->proc_fops = &nflog_file_ops; | ||
176 | #endif | ||
177 | return 0; | ||
178 | } | ||
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c new file mode 100644 index 000000000000..d10d552d9c40 --- /dev/null +++ b/net/netfilter/nf_queue.c | |||
@@ -0,0 +1,343 @@ | |||
1 | #include <linux/config.h> | ||
2 | #include <linux/kernel.h> | ||
3 | #include <linux/init.h> | ||
4 | #include <linux/module.h> | ||
5 | #include <linux/proc_fs.h> | ||
6 | #include <linux/skbuff.h> | ||
7 | #include <linux/netfilter.h> | ||
8 | #include <linux/seq_file.h> | ||
9 | #include <net/protocol.h> | ||
10 | |||
11 | #include "nf_internals.h" | ||
12 | |||
13 | /* | ||
14 | * A queue handler may be registered for each protocol. Each is protected by | ||
15 | * long term mutex. The handler must provide an an outfn() to accept packets | ||
16 | * for queueing and must reinject all packets it receives, no matter what. | ||
17 | */ | ||
18 | static struct nf_queue_handler *queue_handler[NPROTO]; | ||
19 | static struct nf_queue_rerouter *queue_rerouter; | ||
20 | |||
21 | static DEFINE_RWLOCK(queue_handler_lock); | ||
22 | |||
23 | /* return EBUSY when somebody else is registered, return EEXIST if the | ||
24 | * same handler is registered, return 0 in case of success. */ | ||
25 | int nf_register_queue_handler(int pf, struct nf_queue_handler *qh) | ||
26 | { | ||
27 | int ret; | ||
28 | |||
29 | if (pf >= NPROTO) | ||
30 | return -EINVAL; | ||
31 | |||
32 | write_lock_bh(&queue_handler_lock); | ||
33 | if (queue_handler[pf] == qh) | ||
34 | ret = -EEXIST; | ||
35 | else if (queue_handler[pf]) | ||
36 | ret = -EBUSY; | ||
37 | else { | ||
38 | queue_handler[pf] = qh; | ||
39 | ret = 0; | ||
40 | } | ||
41 | write_unlock_bh(&queue_handler_lock); | ||
42 | |||
43 | return ret; | ||
44 | } | ||
45 | EXPORT_SYMBOL(nf_register_queue_handler); | ||
46 | |||
47 | /* The caller must flush their queue before this */ | ||
48 | int nf_unregister_queue_handler(int pf) | ||
49 | { | ||
50 | if (pf >= NPROTO) | ||
51 | return -EINVAL; | ||
52 | |||
53 | write_lock_bh(&queue_handler_lock); | ||
54 | queue_handler[pf] = NULL; | ||
55 | write_unlock_bh(&queue_handler_lock); | ||
56 | |||
57 | return 0; | ||
58 | } | ||
59 | EXPORT_SYMBOL(nf_unregister_queue_handler); | ||
60 | |||
61 | int nf_register_queue_rerouter(int pf, struct nf_queue_rerouter *rer) | ||
62 | { | ||
63 | if (pf >= NPROTO) | ||
64 | return -EINVAL; | ||
65 | |||
66 | write_lock_bh(&queue_handler_lock); | ||
67 | memcpy(&queue_rerouter[pf], rer, sizeof(queue_rerouter[pf])); | ||
68 | write_unlock_bh(&queue_handler_lock); | ||
69 | |||
70 | return 0; | ||
71 | } | ||
72 | EXPORT_SYMBOL_GPL(nf_register_queue_rerouter); | ||
73 | |||
74 | int nf_unregister_queue_rerouter(int pf) | ||
75 | { | ||
76 | if (pf >= NPROTO) | ||
77 | return -EINVAL; | ||
78 | |||
79 | write_lock_bh(&queue_handler_lock); | ||
80 | memset(&queue_rerouter[pf], 0, sizeof(queue_rerouter[pf])); | ||
81 | write_unlock_bh(&queue_handler_lock); | ||
82 | return 0; | ||
83 | } | ||
84 | EXPORT_SYMBOL_GPL(nf_unregister_queue_rerouter); | ||
85 | |||
86 | void nf_unregister_queue_handlers(struct nf_queue_handler *qh) | ||
87 | { | ||
88 | int pf; | ||
89 | |||
90 | write_lock_bh(&queue_handler_lock); | ||
91 | for (pf = 0; pf < NPROTO; pf++) { | ||
92 | if (queue_handler[pf] == qh) | ||
93 | queue_handler[pf] = NULL; | ||
94 | } | ||
95 | write_unlock_bh(&queue_handler_lock); | ||
96 | } | ||
97 | EXPORT_SYMBOL_GPL(nf_unregister_queue_handlers); | ||
98 | |||
99 | /* | ||
100 | * Any packet that leaves via this function must come back | ||
101 | * through nf_reinject(). | ||
102 | */ | ||
103 | int nf_queue(struct sk_buff **skb, | ||
104 | struct list_head *elem, | ||
105 | int pf, unsigned int hook, | ||
106 | struct net_device *indev, | ||
107 | struct net_device *outdev, | ||
108 | int (*okfn)(struct sk_buff *), | ||
109 | unsigned int queuenum) | ||
110 | { | ||
111 | int status; | ||
112 | struct nf_info *info; | ||
113 | #ifdef CONFIG_BRIDGE_NETFILTER | ||
114 | struct net_device *physindev = NULL; | ||
115 | struct net_device *physoutdev = NULL; | ||
116 | #endif | ||
117 | |||
118 | /* QUEUE == DROP if noone is waiting, to be safe. */ | ||
119 | read_lock(&queue_handler_lock); | ||
120 | if (!queue_handler[pf]->outfn) { | ||
121 | read_unlock(&queue_handler_lock); | ||
122 | kfree_skb(*skb); | ||
123 | return 1; | ||
124 | } | ||
125 | |||
126 | info = kmalloc(sizeof(*info)+queue_rerouter[pf].rer_size, GFP_ATOMIC); | ||
127 | if (!info) { | ||
128 | if (net_ratelimit()) | ||
129 | printk(KERN_ERR "OOM queueing packet %p\n", | ||
130 | *skb); | ||
131 | read_unlock(&queue_handler_lock); | ||
132 | kfree_skb(*skb); | ||
133 | return 1; | ||
134 | } | ||
135 | |||
136 | *info = (struct nf_info) { | ||
137 | (struct nf_hook_ops *)elem, pf, hook, indev, outdev, okfn }; | ||
138 | |||
139 | /* If it's going away, ignore hook. */ | ||
140 | if (!try_module_get(info->elem->owner)) { | ||
141 | read_unlock(&queue_handler_lock); | ||
142 | kfree(info); | ||
143 | return 0; | ||
144 | } | ||
145 | |||
146 | /* Bump dev refs so they don't vanish while packet is out */ | ||
147 | if (indev) dev_hold(indev); | ||
148 | if (outdev) dev_hold(outdev); | ||
149 | |||
150 | #ifdef CONFIG_BRIDGE_NETFILTER | ||
151 | if ((*skb)->nf_bridge) { | ||
152 | physindev = (*skb)->nf_bridge->physindev; | ||
153 | if (physindev) dev_hold(physindev); | ||
154 | physoutdev = (*skb)->nf_bridge->physoutdev; | ||
155 | if (physoutdev) dev_hold(physoutdev); | ||
156 | } | ||
157 | #endif | ||
158 | if (queue_rerouter[pf].save) | ||
159 | queue_rerouter[pf].save(*skb, info); | ||
160 | |||
161 | status = queue_handler[pf]->outfn(*skb, info, queuenum, | ||
162 | queue_handler[pf]->data); | ||
163 | |||
164 | if (status >= 0 && queue_rerouter[pf].reroute) | ||
165 | status = queue_rerouter[pf].reroute(skb, info); | ||
166 | |||
167 | read_unlock(&queue_handler_lock); | ||
168 | |||
169 | if (status < 0) { | ||
170 | /* James M doesn't say fuck enough. */ | ||
171 | if (indev) dev_put(indev); | ||
172 | if (outdev) dev_put(outdev); | ||
173 | #ifdef CONFIG_BRIDGE_NETFILTER | ||
174 | if (physindev) dev_put(physindev); | ||
175 | if (physoutdev) dev_put(physoutdev); | ||
176 | #endif | ||
177 | module_put(info->elem->owner); | ||
178 | kfree(info); | ||
179 | kfree_skb(*skb); | ||
180 | |||
181 | return 1; | ||
182 | } | ||
183 | |||
184 | return 1; | ||
185 | } | ||
186 | |||
187 | void nf_reinject(struct sk_buff *skb, struct nf_info *info, | ||
188 | unsigned int verdict) | ||
189 | { | ||
190 | struct list_head *elem = &info->elem->list; | ||
191 | struct list_head *i; | ||
192 | |||
193 | rcu_read_lock(); | ||
194 | |||
195 | /* Release those devices we held, or Alexey will kill me. */ | ||
196 | if (info->indev) dev_put(info->indev); | ||
197 | if (info->outdev) dev_put(info->outdev); | ||
198 | #ifdef CONFIG_BRIDGE_NETFILTER | ||
199 | if (skb->nf_bridge) { | ||
200 | if (skb->nf_bridge->physindev) | ||
201 | dev_put(skb->nf_bridge->physindev); | ||
202 | if (skb->nf_bridge->physoutdev) | ||
203 | dev_put(skb->nf_bridge->physoutdev); | ||
204 | } | ||
205 | #endif | ||
206 | |||
207 | /* Drop reference to owner of hook which queued us. */ | ||
208 | module_put(info->elem->owner); | ||
209 | |||
210 | list_for_each_rcu(i, &nf_hooks[info->pf][info->hook]) { | ||
211 | if (i == elem) | ||
212 | break; | ||
213 | } | ||
214 | |||
215 | if (elem == &nf_hooks[info->pf][info->hook]) { | ||
216 | /* The module which sent it to userspace is gone. */ | ||
217 | NFDEBUG("%s: module disappeared, dropping packet.\n", | ||
218 | __FUNCTION__); | ||
219 | verdict = NF_DROP; | ||
220 | } | ||
221 | |||
222 | /* Continue traversal iff userspace said ok... */ | ||
223 | if (verdict == NF_REPEAT) { | ||
224 | elem = elem->prev; | ||
225 | verdict = NF_ACCEPT; | ||
226 | } | ||
227 | |||
228 | if (verdict == NF_ACCEPT) { | ||
229 | next_hook: | ||
230 | verdict = nf_iterate(&nf_hooks[info->pf][info->hook], | ||
231 | &skb, info->hook, | ||
232 | info->indev, info->outdev, &elem, | ||
233 | info->okfn, INT_MIN); | ||
234 | } | ||
235 | |||
236 | switch (verdict & NF_VERDICT_MASK) { | ||
237 | case NF_ACCEPT: | ||
238 | info->okfn(skb); | ||
239 | break; | ||
240 | |||
241 | case NF_QUEUE: | ||
242 | if (!nf_queue(&skb, elem, info->pf, info->hook, | ||
243 | info->indev, info->outdev, info->okfn, | ||
244 | verdict >> NF_VERDICT_BITS)) | ||
245 | goto next_hook; | ||
246 | break; | ||
247 | } | ||
248 | rcu_read_unlock(); | ||
249 | |||
250 | if (verdict == NF_DROP) | ||
251 | kfree_skb(skb); | ||
252 | |||
253 | kfree(info); | ||
254 | return; | ||
255 | } | ||
256 | EXPORT_SYMBOL(nf_reinject); | ||
257 | |||
258 | #ifdef CONFIG_PROC_FS | ||
259 | static void *seq_start(struct seq_file *seq, loff_t *pos) | ||
260 | { | ||
261 | if (*pos >= NPROTO) | ||
262 | return NULL; | ||
263 | |||
264 | return pos; | ||
265 | } | ||
266 | |||
267 | static void *seq_next(struct seq_file *s, void *v, loff_t *pos) | ||
268 | { | ||
269 | (*pos)++; | ||
270 | |||
271 | if (*pos >= NPROTO) | ||
272 | return NULL; | ||
273 | |||
274 | return pos; | ||
275 | } | ||
276 | |||
277 | static void seq_stop(struct seq_file *s, void *v) | ||
278 | { | ||
279 | |||
280 | } | ||
281 | |||
282 | static int seq_show(struct seq_file *s, void *v) | ||
283 | { | ||
284 | int ret; | ||
285 | loff_t *pos = v; | ||
286 | struct nf_queue_handler *qh; | ||
287 | |||
288 | read_lock_bh(&queue_handler_lock); | ||
289 | qh = queue_handler[*pos]; | ||
290 | if (!qh) | ||
291 | ret = seq_printf(s, "%2lld NONE\n", *pos); | ||
292 | else | ||
293 | ret = seq_printf(s, "%2lld %s\n", *pos, qh->name); | ||
294 | read_unlock_bh(&queue_handler_lock); | ||
295 | |||
296 | return ret; | ||
297 | } | ||
298 | |||
299 | static struct seq_operations nfqueue_seq_ops = { | ||
300 | .start = seq_start, | ||
301 | .next = seq_next, | ||
302 | .stop = seq_stop, | ||
303 | .show = seq_show, | ||
304 | }; | ||
305 | |||
306 | static int nfqueue_open(struct inode *inode, struct file *file) | ||
307 | { | ||
308 | return seq_open(file, &nfqueue_seq_ops); | ||
309 | } | ||
310 | |||
311 | static struct file_operations nfqueue_file_ops = { | ||
312 | .owner = THIS_MODULE, | ||
313 | .open = nfqueue_open, | ||
314 | .read = seq_read, | ||
315 | .llseek = seq_lseek, | ||
316 | .release = seq_release, | ||
317 | }; | ||
318 | #endif /* PROC_FS */ | ||
319 | |||
320 | |||
321 | int __init netfilter_queue_init(void) | ||
322 | { | ||
323 | #ifdef CONFIG_PROC_FS | ||
324 | struct proc_dir_entry *pde; | ||
325 | #endif | ||
326 | queue_rerouter = kmalloc(NPROTO * sizeof(struct nf_queue_rerouter), | ||
327 | GFP_KERNEL); | ||
328 | if (!queue_rerouter) | ||
329 | return -ENOMEM; | ||
330 | |||
331 | #ifdef CONFIG_PROC_FS | ||
332 | pde = create_proc_entry("nf_queue", S_IRUGO, proc_net_netfilter); | ||
333 | if (!pde) { | ||
334 | kfree(queue_rerouter); | ||
335 | return -1; | ||
336 | } | ||
337 | pde->proc_fops = &nfqueue_file_ops; | ||
338 | #endif | ||
339 | memset(queue_rerouter, 0, NPROTO * sizeof(struct nf_queue_rerouter)); | ||
340 | |||
341 | return 0; | ||
342 | } | ||
343 | |||
diff --git a/net/netfilter/nf_sockopt.c b/net/netfilter/nf_sockopt.c new file mode 100644 index 000000000000..61a833a9caa6 --- /dev/null +++ b/net/netfilter/nf_sockopt.c | |||
@@ -0,0 +1,132 @@ | |||
1 | #include <linux/config.h> | ||
2 | #include <linux/kernel.h> | ||
3 | #include <linux/init.h> | ||
4 | #include <linux/module.h> | ||
5 | #include <linux/skbuff.h> | ||
6 | #include <linux/netfilter.h> | ||
7 | #include <net/sock.h> | ||
8 | |||
9 | #include "nf_internals.h" | ||
10 | |||
11 | /* Sockopts only registered and called from user context, so | ||
12 | net locking would be overkill. Also, [gs]etsockopt calls may | ||
13 | sleep. */ | ||
14 | static DECLARE_MUTEX(nf_sockopt_mutex); | ||
15 | static LIST_HEAD(nf_sockopts); | ||
16 | |||
17 | /* Do exclusive ranges overlap? */ | ||
18 | static inline int overlap(int min1, int max1, int min2, int max2) | ||
19 | { | ||
20 | return max1 > min2 && min1 < max2; | ||
21 | } | ||
22 | |||
23 | /* Functions to register sockopt ranges (exclusive). */ | ||
24 | int nf_register_sockopt(struct nf_sockopt_ops *reg) | ||
25 | { | ||
26 | struct list_head *i; | ||
27 | int ret = 0; | ||
28 | |||
29 | if (down_interruptible(&nf_sockopt_mutex) != 0) | ||
30 | return -EINTR; | ||
31 | |||
32 | list_for_each(i, &nf_sockopts) { | ||
33 | struct nf_sockopt_ops *ops = (struct nf_sockopt_ops *)i; | ||
34 | if (ops->pf == reg->pf | ||
35 | && (overlap(ops->set_optmin, ops->set_optmax, | ||
36 | reg->set_optmin, reg->set_optmax) | ||
37 | || overlap(ops->get_optmin, ops->get_optmax, | ||
38 | reg->get_optmin, reg->get_optmax))) { | ||
39 | NFDEBUG("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n", | ||
40 | ops->set_optmin, ops->set_optmax, | ||
41 | ops->get_optmin, ops->get_optmax, | ||
42 | reg->set_optmin, reg->set_optmax, | ||
43 | reg->get_optmin, reg->get_optmax); | ||
44 | ret = -EBUSY; | ||
45 | goto out; | ||
46 | } | ||
47 | } | ||
48 | |||
49 | list_add(®->list, &nf_sockopts); | ||
50 | out: | ||
51 | up(&nf_sockopt_mutex); | ||
52 | return ret; | ||
53 | } | ||
54 | EXPORT_SYMBOL(nf_register_sockopt); | ||
55 | |||
56 | void nf_unregister_sockopt(struct nf_sockopt_ops *reg) | ||
57 | { | ||
58 | /* No point being interruptible: we're probably in cleanup_module() */ | ||
59 | restart: | ||
60 | down(&nf_sockopt_mutex); | ||
61 | if (reg->use != 0) { | ||
62 | /* To be woken by nf_sockopt call... */ | ||
63 | /* FIXME: Stuart Young's name appears gratuitously. */ | ||
64 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
65 | reg->cleanup_task = current; | ||
66 | up(&nf_sockopt_mutex); | ||
67 | schedule(); | ||
68 | goto restart; | ||
69 | } | ||
70 | list_del(®->list); | ||
71 | up(&nf_sockopt_mutex); | ||
72 | } | ||
73 | EXPORT_SYMBOL(nf_unregister_sockopt); | ||
74 | |||
75 | /* Call get/setsockopt() */ | ||
76 | static int nf_sockopt(struct sock *sk, int pf, int val, | ||
77 | char __user *opt, int *len, int get) | ||
78 | { | ||
79 | struct list_head *i; | ||
80 | struct nf_sockopt_ops *ops; | ||
81 | int ret; | ||
82 | |||
83 | if (down_interruptible(&nf_sockopt_mutex) != 0) | ||
84 | return -EINTR; | ||
85 | |||
86 | list_for_each(i, &nf_sockopts) { | ||
87 | ops = (struct nf_sockopt_ops *)i; | ||
88 | if (ops->pf == pf) { | ||
89 | if (get) { | ||
90 | if (val >= ops->get_optmin | ||
91 | && val < ops->get_optmax) { | ||
92 | ops->use++; | ||
93 | up(&nf_sockopt_mutex); | ||
94 | ret = ops->get(sk, val, opt, len); | ||
95 | goto out; | ||
96 | } | ||
97 | } else { | ||
98 | if (val >= ops->set_optmin | ||
99 | && val < ops->set_optmax) { | ||
100 | ops->use++; | ||
101 | up(&nf_sockopt_mutex); | ||
102 | ret = ops->set(sk, val, opt, *len); | ||
103 | goto out; | ||
104 | } | ||
105 | } | ||
106 | } | ||
107 | } | ||
108 | up(&nf_sockopt_mutex); | ||
109 | return -ENOPROTOOPT; | ||
110 | |||
111 | out: | ||
112 | down(&nf_sockopt_mutex); | ||
113 | ops->use--; | ||
114 | if (ops->cleanup_task) | ||
115 | wake_up_process(ops->cleanup_task); | ||
116 | up(&nf_sockopt_mutex); | ||
117 | return ret; | ||
118 | } | ||
119 | |||
120 | int nf_setsockopt(struct sock *sk, int pf, int val, char __user *opt, | ||
121 | int len) | ||
122 | { | ||
123 | return nf_sockopt(sk, pf, val, opt, &len, 0); | ||
124 | } | ||
125 | EXPORT_SYMBOL(nf_setsockopt); | ||
126 | |||
127 | int nf_getsockopt(struct sock *sk, int pf, int val, char __user *opt, int *len) | ||
128 | { | ||
129 | return nf_sockopt(sk, pf, val, opt, len, 1); | ||
130 | } | ||
131 | EXPORT_SYMBOL(nf_getsockopt); | ||
132 | |||
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c new file mode 100644 index 000000000000..e089f17bb803 --- /dev/null +++ b/net/netfilter/nfnetlink.c | |||
@@ -0,0 +1,376 @@ | |||
1 | /* Netfilter messages via netlink socket. Allows for user space | ||
2 | * protocol helpers and general trouble making from userspace. | ||
3 | * | ||
4 | * (C) 2001 by Jay Schulist <jschlst@samba.org>, | ||
5 | * (C) 2002-2005 by Harald Welte <laforge@gnumonks.org> | ||
6 | * (C) 2005 by Pablo Neira Ayuso <pablo@eurodev.net> | ||
7 | * | ||
8 | * Initial netfilter messages via netlink development funded and | ||
9 | * generally made possible by Network Robots, Inc. (www.networkrobots.com) | ||
10 | * | ||
11 | * Further development of this code funded by Astaro AG (http://www.astaro.com) | ||
12 | * | ||
13 | * This software may be used and distributed according to the terms | ||
14 | * of the GNU General Public License, incorporated herein by reference. | ||
15 | */ | ||
16 | |||
17 | #include <linux/config.h> | ||
18 | #include <linux/module.h> | ||
19 | #include <linux/types.h> | ||
20 | #include <linux/socket.h> | ||
21 | #include <linux/kernel.h> | ||
22 | #include <linux/major.h> | ||
23 | #include <linux/sched.h> | ||
24 | #include <linux/timer.h> | ||
25 | #include <linux/string.h> | ||
26 | #include <linux/sockios.h> | ||
27 | #include <linux/net.h> | ||
28 | #include <linux/fcntl.h> | ||
29 | #include <linux/skbuff.h> | ||
30 | #include <asm/uaccess.h> | ||
31 | #include <asm/system.h> | ||
32 | #include <net/sock.h> | ||
33 | #include <linux/init.h> | ||
34 | #include <linux/spinlock.h> | ||
35 | |||
36 | #include <linux/netfilter.h> | ||
37 | #include <linux/netlink.h> | ||
38 | #include <linux/netfilter/nfnetlink.h> | ||
39 | |||
40 | MODULE_LICENSE("GPL"); | ||
41 | MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); | ||
42 | MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER); | ||
43 | |||
44 | static char __initdata nfversion[] = "0.30"; | ||
45 | |||
46 | #if 0 | ||
47 | #define DEBUGP(format, args...) \ | ||
48 | printk(KERN_DEBUG "%s(%d):%s(): " format, __FILE__, \ | ||
49 | __LINE__, __FUNCTION__, ## args) | ||
50 | #else | ||
51 | #define DEBUGP(format, args...) | ||
52 | #endif | ||
53 | |||
54 | static struct sock *nfnl = NULL; | ||
55 | static struct nfnetlink_subsystem *subsys_table[NFNL_SUBSYS_COUNT]; | ||
56 | DECLARE_MUTEX(nfnl_sem); | ||
57 | |||
58 | void nfnl_lock(void) | ||
59 | { | ||
60 | nfnl_shlock(); | ||
61 | } | ||
62 | |||
63 | void nfnl_unlock(void) | ||
64 | { | ||
65 | nfnl_shunlock(); | ||
66 | } | ||
67 | |||
68 | int nfnetlink_subsys_register(struct nfnetlink_subsystem *n) | ||
69 | { | ||
70 | DEBUGP("registering subsystem ID %u\n", n->subsys_id); | ||
71 | |||
72 | nfnl_lock(); | ||
73 | if (subsys_table[n->subsys_id]) { | ||
74 | nfnl_unlock(); | ||
75 | return -EBUSY; | ||
76 | } | ||
77 | subsys_table[n->subsys_id] = n; | ||
78 | nfnl_unlock(); | ||
79 | |||
80 | return 0; | ||
81 | } | ||
82 | |||
83 | int nfnetlink_subsys_unregister(struct nfnetlink_subsystem *n) | ||
84 | { | ||
85 | DEBUGP("unregistering subsystem ID %u\n", n->subsys_id); | ||
86 | |||
87 | nfnl_lock(); | ||
88 | subsys_table[n->subsys_id] = NULL; | ||
89 | nfnl_unlock(); | ||
90 | |||
91 | return 0; | ||
92 | } | ||
93 | |||
94 | static inline struct nfnetlink_subsystem *nfnetlink_get_subsys(u_int16_t type) | ||
95 | { | ||
96 | u_int8_t subsys_id = NFNL_SUBSYS_ID(type); | ||
97 | |||
98 | if (subsys_id >= NFNL_SUBSYS_COUNT | ||
99 | || subsys_table[subsys_id] == NULL) | ||
100 | return NULL; | ||
101 | |||
102 | return subsys_table[subsys_id]; | ||
103 | } | ||
104 | |||
105 | static inline struct nfnl_callback * | ||
106 | nfnetlink_find_client(u_int16_t type, struct nfnetlink_subsystem *ss) | ||
107 | { | ||
108 | u_int8_t cb_id = NFNL_MSG_TYPE(type); | ||
109 | |||
110 | if (cb_id >= ss->cb_count) { | ||
111 | DEBUGP("msgtype %u >= %u, returning\n", type, ss->cb_count); | ||
112 | return NULL; | ||
113 | } | ||
114 | |||
115 | return &ss->cb[cb_id]; | ||
116 | } | ||
117 | |||
118 | void __nfa_fill(struct sk_buff *skb, int attrtype, int attrlen, | ||
119 | const void *data) | ||
120 | { | ||
121 | struct nfattr *nfa; | ||
122 | int size = NFA_LENGTH(attrlen); | ||
123 | |||
124 | nfa = (struct nfattr *)skb_put(skb, NFA_ALIGN(size)); | ||
125 | nfa->nfa_type = attrtype; | ||
126 | nfa->nfa_len = size; | ||
127 | memcpy(NFA_DATA(nfa), data, attrlen); | ||
128 | memset(NFA_DATA(nfa) + attrlen, 0, NFA_ALIGN(size) - size); | ||
129 | } | ||
130 | |||
131 | int nfattr_parse(struct nfattr *tb[], int maxattr, struct nfattr *nfa, int len) | ||
132 | { | ||
133 | memset(tb, 0, sizeof(struct nfattr *) * maxattr); | ||
134 | |||
135 | while (NFA_OK(nfa, len)) { | ||
136 | unsigned flavor = nfa->nfa_type; | ||
137 | if (flavor && flavor <= maxattr) | ||
138 | tb[flavor-1] = nfa; | ||
139 | nfa = NFA_NEXT(nfa, len); | ||
140 | } | ||
141 | |||
142 | return 0; | ||
143 | } | ||
144 | |||
145 | /** | ||
146 | * nfnetlink_check_attributes - check and parse nfnetlink attributes | ||
147 | * | ||
148 | * subsys: nfnl subsystem for which this message is to be parsed | ||
149 | * nlmsghdr: netlink message to be checked/parsed | ||
150 | * cda: array of pointers, needs to be at least subsys->attr_count big | ||
151 | * | ||
152 | */ | ||
153 | static int | ||
154 | nfnetlink_check_attributes(struct nfnetlink_subsystem *subsys, | ||
155 | struct nlmsghdr *nlh, struct nfattr *cda[]) | ||
156 | { | ||
157 | int min_len; | ||
158 | u_int16_t attr_count; | ||
159 | u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type); | ||
160 | |||
161 | if (unlikely(cb_id >= subsys->cb_count)) { | ||
162 | DEBUGP("msgtype %u >= %u, returning\n", | ||
163 | cb_id, subsys->cb_count); | ||
164 | return -EINVAL; | ||
165 | } | ||
166 | |||
167 | min_len = NLMSG_ALIGN(sizeof(struct nfgenmsg)); | ||
168 | if (unlikely(nlh->nlmsg_len < min_len)) | ||
169 | return -EINVAL; | ||
170 | |||
171 | attr_count = subsys->cb[cb_id].attr_count; | ||
172 | memset(cda, 0, sizeof(struct nfattr *) * attr_count); | ||
173 | |||
174 | /* check attribute lengths. */ | ||
175 | if (likely(nlh->nlmsg_len > min_len)) { | ||
176 | struct nfattr *attr = NFM_NFA(NLMSG_DATA(nlh)); | ||
177 | int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len); | ||
178 | |||
179 | while (NFA_OK(attr, attrlen)) { | ||
180 | unsigned flavor = attr->nfa_type; | ||
181 | if (flavor) { | ||
182 | if (flavor > attr_count) | ||
183 | return -EINVAL; | ||
184 | cda[flavor - 1] = attr; | ||
185 | } | ||
186 | attr = NFA_NEXT(attr, attrlen); | ||
187 | } | ||
188 | } | ||
189 | |||
190 | /* implicit: if nlmsg_len == min_len, we return 0, and an empty | ||
191 | * (zeroed) cda[] array. The message is valid, but empty. */ | ||
192 | |||
193 | return 0; | ||
194 | } | ||
195 | |||
196 | int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo) | ||
197 | { | ||
198 | int allocation = in_interrupt() ? GFP_ATOMIC : GFP_KERNEL; | ||
199 | int err = 0; | ||
200 | |||
201 | NETLINK_CB(skb).dst_group = group; | ||
202 | if (echo) | ||
203 | atomic_inc(&skb->users); | ||
204 | netlink_broadcast(nfnl, skb, pid, group, allocation); | ||
205 | if (echo) | ||
206 | err = netlink_unicast(nfnl, skb, pid, MSG_DONTWAIT); | ||
207 | |||
208 | return err; | ||
209 | } | ||
210 | |||
211 | int nfnetlink_unicast(struct sk_buff *skb, u_int32_t pid, int flags) | ||
212 | { | ||
213 | return netlink_unicast(nfnl, skb, pid, flags); | ||
214 | } | ||
215 | |||
216 | /* Process one complete nfnetlink message. */ | ||
217 | static inline int nfnetlink_rcv_msg(struct sk_buff *skb, | ||
218 | struct nlmsghdr *nlh, int *errp) | ||
219 | { | ||
220 | struct nfnl_callback *nc; | ||
221 | struct nfnetlink_subsystem *ss; | ||
222 | int type, err = 0; | ||
223 | |||
224 | DEBUGP("entered; subsys=%u, msgtype=%u\n", | ||
225 | NFNL_SUBSYS_ID(nlh->nlmsg_type), | ||
226 | NFNL_MSG_TYPE(nlh->nlmsg_type)); | ||
227 | |||
228 | /* Only requests are handled by kernel now. */ | ||
229 | if (!(nlh->nlmsg_flags & NLM_F_REQUEST)) { | ||
230 | DEBUGP("received non-request message\n"); | ||
231 | return 0; | ||
232 | } | ||
233 | |||
234 | /* All the messages must at least contain nfgenmsg */ | ||
235 | if (nlh->nlmsg_len < | ||
236 | NLMSG_LENGTH(NLMSG_ALIGN(sizeof(struct nfgenmsg)))) { | ||
237 | DEBUGP("received message was too short\n"); | ||
238 | return 0; | ||
239 | } | ||
240 | |||
241 | type = nlh->nlmsg_type; | ||
242 | ss = nfnetlink_get_subsys(type); | ||
243 | if (!ss) { | ||
244 | #ifdef CONFIG_KMOD | ||
245 | /* don't call nfnl_shunlock, since it would reenter | ||
246 | * with further packet processing */ | ||
247 | up(&nfnl_sem); | ||
248 | request_module("nfnetlink-subsys-%d", NFNL_SUBSYS_ID(type)); | ||
249 | nfnl_shlock(); | ||
250 | ss = nfnetlink_get_subsys(type); | ||
251 | if (!ss) | ||
252 | #endif | ||
253 | goto err_inval; | ||
254 | } | ||
255 | |||
256 | nc = nfnetlink_find_client(type, ss); | ||
257 | if (!nc) { | ||
258 | DEBUGP("unable to find client for type %d\n", type); | ||
259 | goto err_inval; | ||
260 | } | ||
261 | |||
262 | if (nc->cap_required && | ||
263 | !cap_raised(NETLINK_CB(skb).eff_cap, nc->cap_required)) { | ||
264 | DEBUGP("permission denied for type %d\n", type); | ||
265 | *errp = -EPERM; | ||
266 | return -1; | ||
267 | } | ||
268 | |||
269 | { | ||
270 | u_int16_t attr_count = | ||
271 | ss->cb[NFNL_MSG_TYPE(nlh->nlmsg_type)].attr_count; | ||
272 | struct nfattr *cda[attr_count]; | ||
273 | |||
274 | memset(cda, 0, sizeof(struct nfattr *) * attr_count); | ||
275 | |||
276 | err = nfnetlink_check_attributes(ss, nlh, cda); | ||
277 | if (err < 0) | ||
278 | goto err_inval; | ||
279 | |||
280 | DEBUGP("calling handler\n"); | ||
281 | err = nc->call(nfnl, skb, nlh, cda, errp); | ||
282 | *errp = err; | ||
283 | return err; | ||
284 | } | ||
285 | |||
286 | err_inval: | ||
287 | DEBUGP("returning -EINVAL\n"); | ||
288 | *errp = -EINVAL; | ||
289 | return -1; | ||
290 | } | ||
291 | |||
292 | /* Process one packet of messages. */ | ||
293 | static inline int nfnetlink_rcv_skb(struct sk_buff *skb) | ||
294 | { | ||
295 | int err; | ||
296 | struct nlmsghdr *nlh; | ||
297 | |||
298 | while (skb->len >= NLMSG_SPACE(0)) { | ||
299 | u32 rlen; | ||
300 | |||
301 | nlh = (struct nlmsghdr *)skb->data; | ||
302 | if (nlh->nlmsg_len < sizeof(struct nlmsghdr) | ||
303 | || skb->len < nlh->nlmsg_len) | ||
304 | return 0; | ||
305 | rlen = NLMSG_ALIGN(nlh->nlmsg_len); | ||
306 | if (rlen > skb->len) | ||
307 | rlen = skb->len; | ||
308 | if (nfnetlink_rcv_msg(skb, nlh, &err)) { | ||
309 | if (!err) | ||
310 | return -1; | ||
311 | netlink_ack(skb, nlh, err); | ||
312 | } else | ||
313 | if (nlh->nlmsg_flags & NLM_F_ACK) | ||
314 | netlink_ack(skb, nlh, 0); | ||
315 | skb_pull(skb, rlen); | ||
316 | } | ||
317 | |||
318 | return 0; | ||
319 | } | ||
320 | |||
321 | static void nfnetlink_rcv(struct sock *sk, int len) | ||
322 | { | ||
323 | do { | ||
324 | struct sk_buff *skb; | ||
325 | |||
326 | if (nfnl_shlock_nowait()) | ||
327 | return; | ||
328 | |||
329 | while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { | ||
330 | if (nfnetlink_rcv_skb(skb)) { | ||
331 | if (skb->len) | ||
332 | skb_queue_head(&sk->sk_receive_queue, | ||
333 | skb); | ||
334 | else | ||
335 | kfree_skb(skb); | ||
336 | break; | ||
337 | } | ||
338 | kfree_skb(skb); | ||
339 | } | ||
340 | |||
341 | /* don't call nfnl_shunlock, since it would reenter | ||
342 | * with further packet processing */ | ||
343 | up(&nfnl_sem); | ||
344 | } while(nfnl && nfnl->sk_receive_queue.qlen); | ||
345 | } | ||
346 | |||
347 | void __exit nfnetlink_exit(void) | ||
348 | { | ||
349 | printk("Removing netfilter NETLINK layer.\n"); | ||
350 | sock_release(nfnl->sk_socket); | ||
351 | return; | ||
352 | } | ||
353 | |||
354 | int __init nfnetlink_init(void) | ||
355 | { | ||
356 | printk("Netfilter messages via NETLINK v%s.\n", nfversion); | ||
357 | |||
358 | nfnl = netlink_kernel_create(NETLINK_NETFILTER, NFNLGRP_MAX, | ||
359 | nfnetlink_rcv, THIS_MODULE); | ||
360 | if (!nfnl) { | ||
361 | printk(KERN_ERR "cannot initialize nfnetlink!\n"); | ||
362 | return -1; | ||
363 | } | ||
364 | |||
365 | return 0; | ||
366 | } | ||
367 | |||
368 | module_init(nfnetlink_init); | ||
369 | module_exit(nfnetlink_exit); | ||
370 | |||
371 | EXPORT_SYMBOL_GPL(nfnetlink_subsys_register); | ||
372 | EXPORT_SYMBOL_GPL(nfnetlink_subsys_unregister); | ||
373 | EXPORT_SYMBOL_GPL(nfnetlink_send); | ||
374 | EXPORT_SYMBOL_GPL(nfnetlink_unicast); | ||
375 | EXPORT_SYMBOL_GPL(nfattr_parse); | ||
376 | EXPORT_SYMBOL_GPL(__nfa_fill); | ||
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c new file mode 100644 index 000000000000..ff5601ceedcb --- /dev/null +++ b/net/netfilter/nfnetlink_log.c | |||
@@ -0,0 +1,1055 @@ | |||
1 | /* | ||
2 | * This is a module which is used for logging packets to userspace via | ||
3 | * nfetlink. | ||
4 | * | ||
5 | * (C) 2005 by Harald Welte <laforge@netfilter.org> | ||
6 | * | ||
7 | * Based on the old ipv4-only ipt_ULOG.c: | ||
8 | * (C) 2000-2004 by Harald Welte <laforge@netfilter.org> | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License version 2 as | ||
12 | * published by the Free Software Foundation. | ||
13 | * | ||
14 | */ | ||
15 | #include <linux/module.h> | ||
16 | #include <linux/skbuff.h> | ||
17 | #include <linux/init.h> | ||
18 | #include <linux/ip.h> | ||
19 | #include <linux/ipv6.h> | ||
20 | #include <linux/netdevice.h> | ||
21 | #include <linux/netfilter.h> | ||
22 | #include <linux/netlink.h> | ||
23 | #include <linux/netfilter/nfnetlink.h> | ||
24 | #include <linux/netfilter/nfnetlink_log.h> | ||
25 | #include <linux/spinlock.h> | ||
26 | #include <linux/sysctl.h> | ||
27 | #include <linux/proc_fs.h> | ||
28 | #include <linux/security.h> | ||
29 | #include <linux/list.h> | ||
30 | #include <linux/jhash.h> | ||
31 | #include <linux/random.h> | ||
32 | #include <net/sock.h> | ||
33 | |||
34 | #include <asm/atomic.h> | ||
35 | |||
36 | #ifdef CONFIG_BRIDGE_NETFILTER | ||
37 | #include "../bridge/br_private.h" | ||
38 | #endif | ||
39 | |||
40 | #define NFULNL_NLBUFSIZ_DEFAULT 4096 | ||
41 | #define NFULNL_TIMEOUT_DEFAULT 100 /* every second */ | ||
42 | #define NFULNL_QTHRESH_DEFAULT 100 /* 100 packets */ | ||
43 | |||
44 | #define PRINTR(x, args...) do { if (net_ratelimit()) \ | ||
45 | printk(x, ## args); } while (0); | ||
46 | |||
47 | #if 0 | ||
48 | #define UDEBUG(x, args ...) printk(KERN_DEBUG "%s(%d):%s(): " x, \ | ||
49 | __FILE__, __LINE__, __FUNCTION__, \ | ||
50 | ## args) | ||
51 | #else | ||
52 | #define UDEBUG(x, ...) | ||
53 | #endif | ||
54 | |||
55 | struct nfulnl_instance { | ||
56 | struct hlist_node hlist; /* global list of instances */ | ||
57 | spinlock_t lock; | ||
58 | atomic_t use; /* use count */ | ||
59 | |||
60 | unsigned int qlen; /* number of nlmsgs in skb */ | ||
61 | struct sk_buff *skb; /* pre-allocatd skb */ | ||
62 | struct nlmsghdr *lastnlh; /* netlink header of last msg in skb */ | ||
63 | struct timer_list timer; | ||
64 | int peer_pid; /* PID of the peer process */ | ||
65 | |||
66 | /* configurable parameters */ | ||
67 | unsigned int flushtimeout; /* timeout until queue flush */ | ||
68 | unsigned int nlbufsiz; /* netlink buffer allocation size */ | ||
69 | unsigned int qthreshold; /* threshold of the queue */ | ||
70 | u_int32_t copy_range; | ||
71 | u_int16_t group_num; /* number of this queue */ | ||
72 | u_int8_t copy_mode; | ||
73 | }; | ||
74 | |||
75 | static DEFINE_RWLOCK(instances_lock); | ||
76 | |||
77 | #define INSTANCE_BUCKETS 16 | ||
78 | static struct hlist_head instance_table[INSTANCE_BUCKETS]; | ||
79 | static unsigned int hash_init; | ||
80 | |||
81 | static inline u_int8_t instance_hashfn(u_int16_t group_num) | ||
82 | { | ||
83 | return ((group_num & 0xff) % INSTANCE_BUCKETS); | ||
84 | } | ||
85 | |||
86 | static struct nfulnl_instance * | ||
87 | __instance_lookup(u_int16_t group_num) | ||
88 | { | ||
89 | struct hlist_head *head; | ||
90 | struct hlist_node *pos; | ||
91 | struct nfulnl_instance *inst; | ||
92 | |||
93 | UDEBUG("entering (group_num=%u)\n", group_num); | ||
94 | |||
95 | head = &instance_table[instance_hashfn(group_num)]; | ||
96 | hlist_for_each_entry(inst, pos, head, hlist) { | ||
97 | if (inst->group_num == group_num) | ||
98 | return inst; | ||
99 | } | ||
100 | return NULL; | ||
101 | } | ||
102 | |||
103 | static inline void | ||
104 | instance_get(struct nfulnl_instance *inst) | ||
105 | { | ||
106 | atomic_inc(&inst->use); | ||
107 | } | ||
108 | |||
109 | static struct nfulnl_instance * | ||
110 | instance_lookup_get(u_int16_t group_num) | ||
111 | { | ||
112 | struct nfulnl_instance *inst; | ||
113 | |||
114 | read_lock_bh(&instances_lock); | ||
115 | inst = __instance_lookup(group_num); | ||
116 | if (inst) | ||
117 | instance_get(inst); | ||
118 | read_unlock_bh(&instances_lock); | ||
119 | |||
120 | return inst; | ||
121 | } | ||
122 | |||
123 | static void | ||
124 | instance_put(struct nfulnl_instance *inst) | ||
125 | { | ||
126 | if (inst && atomic_dec_and_test(&inst->use)) { | ||
127 | UDEBUG("kfree(inst=%p)\n", inst); | ||
128 | kfree(inst); | ||
129 | } | ||
130 | } | ||
131 | |||
132 | static void nfulnl_timer(unsigned long data); | ||
133 | |||
134 | static struct nfulnl_instance * | ||
135 | instance_create(u_int16_t group_num, int pid) | ||
136 | { | ||
137 | struct nfulnl_instance *inst; | ||
138 | |||
139 | UDEBUG("entering (group_num=%u, pid=%d)\n", group_num, | ||
140 | pid); | ||
141 | |||
142 | write_lock_bh(&instances_lock); | ||
143 | if (__instance_lookup(group_num)) { | ||
144 | inst = NULL; | ||
145 | UDEBUG("aborting, instance already exists\n"); | ||
146 | goto out_unlock; | ||
147 | } | ||
148 | |||
149 | inst = kmalloc(sizeof(*inst), GFP_ATOMIC); | ||
150 | if (!inst) | ||
151 | goto out_unlock; | ||
152 | |||
153 | memset(inst, 0, sizeof(*inst)); | ||
154 | INIT_HLIST_NODE(&inst->hlist); | ||
155 | inst->lock = SPIN_LOCK_UNLOCKED; | ||
156 | /* needs to be two, since we _put() after creation */ | ||
157 | atomic_set(&inst->use, 2); | ||
158 | |||
159 | init_timer(&inst->timer); | ||
160 | inst->timer.function = nfulnl_timer; | ||
161 | inst->timer.data = (unsigned long)inst; | ||
162 | /* don't start timer yet. (re)start it with every packet */ | ||
163 | |||
164 | inst->peer_pid = pid; | ||
165 | inst->group_num = group_num; | ||
166 | |||
167 | inst->qthreshold = NFULNL_QTHRESH_DEFAULT; | ||
168 | inst->flushtimeout = NFULNL_TIMEOUT_DEFAULT; | ||
169 | inst->nlbufsiz = NFULNL_NLBUFSIZ_DEFAULT; | ||
170 | inst->copy_mode = NFULNL_COPY_PACKET; | ||
171 | inst->copy_range = 0xffff; | ||
172 | |||
173 | if (!try_module_get(THIS_MODULE)) | ||
174 | goto out_free; | ||
175 | |||
176 | hlist_add_head(&inst->hlist, | ||
177 | &instance_table[instance_hashfn(group_num)]); | ||
178 | |||
179 | UDEBUG("newly added node: %p, next=%p\n", &inst->hlist, | ||
180 | inst->hlist.next); | ||
181 | |||
182 | write_unlock_bh(&instances_lock); | ||
183 | |||
184 | return inst; | ||
185 | |||
186 | out_free: | ||
187 | instance_put(inst); | ||
188 | out_unlock: | ||
189 | write_unlock_bh(&instances_lock); | ||
190 | return NULL; | ||
191 | } | ||
192 | |||
193 | static int __nfulnl_send(struct nfulnl_instance *inst); | ||
194 | |||
195 | static void | ||
196 | _instance_destroy2(struct nfulnl_instance *inst, int lock) | ||
197 | { | ||
198 | /* first pull it out of the global list */ | ||
199 | if (lock) | ||
200 | write_lock_bh(&instances_lock); | ||
201 | |||
202 | UDEBUG("removing instance %p (queuenum=%u) from hash\n", | ||
203 | inst, inst->group_num); | ||
204 | |||
205 | hlist_del(&inst->hlist); | ||
206 | |||
207 | if (lock) | ||
208 | write_unlock_bh(&instances_lock); | ||
209 | |||
210 | /* then flush all pending packets from skb */ | ||
211 | |||
212 | spin_lock_bh(&inst->lock); | ||
213 | if (inst->skb) { | ||
214 | if (inst->qlen) | ||
215 | __nfulnl_send(inst); | ||
216 | if (inst->skb) { | ||
217 | kfree_skb(inst->skb); | ||
218 | inst->skb = NULL; | ||
219 | } | ||
220 | } | ||
221 | spin_unlock_bh(&inst->lock); | ||
222 | |||
223 | /* and finally put the refcount */ | ||
224 | instance_put(inst); | ||
225 | |||
226 | module_put(THIS_MODULE); | ||
227 | } | ||
228 | |||
229 | static inline void | ||
230 | __instance_destroy(struct nfulnl_instance *inst) | ||
231 | { | ||
232 | _instance_destroy2(inst, 0); | ||
233 | } | ||
234 | |||
235 | static inline void | ||
236 | instance_destroy(struct nfulnl_instance *inst) | ||
237 | { | ||
238 | _instance_destroy2(inst, 1); | ||
239 | } | ||
240 | |||
241 | static int | ||
242 | nfulnl_set_mode(struct nfulnl_instance *inst, u_int8_t mode, | ||
243 | unsigned int range) | ||
244 | { | ||
245 | int status = 0; | ||
246 | |||
247 | spin_lock_bh(&inst->lock); | ||
248 | |||
249 | switch (mode) { | ||
250 | case NFULNL_COPY_NONE: | ||
251 | case NFULNL_COPY_META: | ||
252 | inst->copy_mode = mode; | ||
253 | inst->copy_range = 0; | ||
254 | break; | ||
255 | |||
256 | case NFULNL_COPY_PACKET: | ||
257 | inst->copy_mode = mode; | ||
258 | /* we're using struct nfattr which has 16bit nfa_len */ | ||
259 | if (range > 0xffff) | ||
260 | inst->copy_range = 0xffff; | ||
261 | else | ||
262 | inst->copy_range = range; | ||
263 | break; | ||
264 | |||
265 | default: | ||
266 | status = -EINVAL; | ||
267 | break; | ||
268 | } | ||
269 | |||
270 | spin_unlock_bh(&inst->lock); | ||
271 | |||
272 | return status; | ||
273 | } | ||
274 | |||
275 | static int | ||
276 | nfulnl_set_nlbufsiz(struct nfulnl_instance *inst, u_int32_t nlbufsiz) | ||
277 | { | ||
278 | int status; | ||
279 | |||
280 | spin_lock_bh(&inst->lock); | ||
281 | if (nlbufsiz < NFULNL_NLBUFSIZ_DEFAULT) | ||
282 | status = -ERANGE; | ||
283 | else if (nlbufsiz > 131072) | ||
284 | status = -ERANGE; | ||
285 | else { | ||
286 | inst->nlbufsiz = nlbufsiz; | ||
287 | status = 0; | ||
288 | } | ||
289 | spin_unlock_bh(&inst->lock); | ||
290 | |||
291 | return status; | ||
292 | } | ||
293 | |||
294 | static int | ||
295 | nfulnl_set_timeout(struct nfulnl_instance *inst, u_int32_t timeout) | ||
296 | { | ||
297 | spin_lock_bh(&inst->lock); | ||
298 | inst->flushtimeout = timeout; | ||
299 | spin_unlock_bh(&inst->lock); | ||
300 | |||
301 | return 0; | ||
302 | } | ||
303 | |||
304 | static int | ||
305 | nfulnl_set_qthresh(struct nfulnl_instance *inst, u_int32_t qthresh) | ||
306 | { | ||
307 | spin_lock_bh(&inst->lock); | ||
308 | inst->qthreshold = qthresh; | ||
309 | spin_unlock_bh(&inst->lock); | ||
310 | |||
311 | return 0; | ||
312 | } | ||
313 | |||
314 | static struct sk_buff *nfulnl_alloc_skb(unsigned int inst_size, | ||
315 | unsigned int pkt_size) | ||
316 | { | ||
317 | struct sk_buff *skb; | ||
318 | |||
319 | UDEBUG("entered (%u, %u)\n", inst_size, pkt_size); | ||
320 | |||
321 | /* alloc skb which should be big enough for a whole multipart | ||
322 | * message. WARNING: has to be <= 128k due to slab restrictions */ | ||
323 | |||
324 | skb = alloc_skb(inst_size, GFP_ATOMIC); | ||
325 | if (!skb) { | ||
326 | PRINTR("nfnetlink_log: can't alloc whole buffer (%u bytes)\n", | ||
327 | inst_size); | ||
328 | |||
329 | /* try to allocate only as much as we need for current | ||
330 | * packet */ | ||
331 | |||
332 | skb = alloc_skb(pkt_size, GFP_ATOMIC); | ||
333 | if (!skb) | ||
334 | PRINTR("nfnetlink_log: can't even alloc %u bytes\n", | ||
335 | pkt_size); | ||
336 | } | ||
337 | |||
338 | return skb; | ||
339 | } | ||
340 | |||
341 | static int | ||
342 | __nfulnl_send(struct nfulnl_instance *inst) | ||
343 | { | ||
344 | int status; | ||
345 | |||
346 | if (timer_pending(&inst->timer)) | ||
347 | del_timer(&inst->timer); | ||
348 | |||
349 | if (inst->qlen > 1) | ||
350 | inst->lastnlh->nlmsg_type = NLMSG_DONE; | ||
351 | |||
352 | status = nfnetlink_unicast(inst->skb, inst->peer_pid, MSG_DONTWAIT); | ||
353 | if (status < 0) { | ||
354 | UDEBUG("netlink_unicast() failed\n"); | ||
355 | /* FIXME: statistics */ | ||
356 | } | ||
357 | |||
358 | inst->qlen = 0; | ||
359 | inst->skb = NULL; | ||
360 | inst->lastnlh = NULL; | ||
361 | |||
362 | return status; | ||
363 | } | ||
364 | |||
365 | static void nfulnl_timer(unsigned long data) | ||
366 | { | ||
367 | struct nfulnl_instance *inst = (struct nfulnl_instance *)data; | ||
368 | |||
369 | UDEBUG("timer function called, flushing buffer\n"); | ||
370 | |||
371 | spin_lock_bh(&inst->lock); | ||
372 | __nfulnl_send(inst); | ||
373 | instance_put(inst); | ||
374 | spin_unlock_bh(&inst->lock); | ||
375 | } | ||
376 | |||
377 | static inline int | ||
378 | __build_packet_message(struct nfulnl_instance *inst, | ||
379 | const struct sk_buff *skb, | ||
380 | unsigned int data_len, | ||
381 | unsigned int pf, | ||
382 | unsigned int hooknum, | ||
383 | const struct net_device *indev, | ||
384 | const struct net_device *outdev, | ||
385 | const struct nf_loginfo *li, | ||
386 | const char *prefix) | ||
387 | { | ||
388 | unsigned char *old_tail; | ||
389 | struct nfulnl_msg_packet_hdr pmsg; | ||
390 | struct nlmsghdr *nlh; | ||
391 | struct nfgenmsg *nfmsg; | ||
392 | u_int32_t tmp_uint; | ||
393 | |||
394 | UDEBUG("entered\n"); | ||
395 | |||
396 | old_tail = inst->skb->tail; | ||
397 | nlh = NLMSG_PUT(inst->skb, 0, 0, | ||
398 | NFNL_SUBSYS_ULOG << 8 | NFULNL_MSG_PACKET, | ||
399 | sizeof(struct nfgenmsg)); | ||
400 | nfmsg = NLMSG_DATA(nlh); | ||
401 | nfmsg->nfgen_family = pf; | ||
402 | nfmsg->version = NFNETLINK_V0; | ||
403 | nfmsg->res_id = htons(inst->group_num); | ||
404 | |||
405 | pmsg.hw_protocol = htons(skb->protocol); | ||
406 | pmsg.hook = hooknum; | ||
407 | |||
408 | NFA_PUT(inst->skb, NFULA_PACKET_HDR, sizeof(pmsg), &pmsg); | ||
409 | |||
410 | if (prefix) { | ||
411 | int slen = strlen(prefix); | ||
412 | if (slen > NFULNL_PREFIXLEN) | ||
413 | slen = NFULNL_PREFIXLEN; | ||
414 | NFA_PUT(inst->skb, NFULA_PREFIX, slen, prefix); | ||
415 | } | ||
416 | |||
417 | if (indev) { | ||
418 | tmp_uint = htonl(indev->ifindex); | ||
419 | #ifndef CONFIG_BRIDGE_NETFILTER | ||
420 | NFA_PUT(inst->skb, NFULA_IFINDEX_INDEV, sizeof(tmp_uint), | ||
421 | &tmp_uint); | ||
422 | #else | ||
423 | if (pf == PF_BRIDGE) { | ||
424 | /* Case 1: outdev is physical input device, we need to | ||
425 | * look for bridge group (when called from | ||
426 | * netfilter_bridge) */ | ||
427 | NFA_PUT(inst->skb, NFULA_IFINDEX_PHYSINDEV, | ||
428 | sizeof(tmp_uint), &tmp_uint); | ||
429 | /* this is the bridge group "brX" */ | ||
430 | tmp_uint = htonl(indev->br_port->br->dev->ifindex); | ||
431 | NFA_PUT(inst->skb, NFULA_IFINDEX_INDEV, | ||
432 | sizeof(tmp_uint), &tmp_uint); | ||
433 | } else { | ||
434 | /* Case 2: indev is bridge group, we need to look for | ||
435 | * physical device (when called from ipv4) */ | ||
436 | NFA_PUT(inst->skb, NFULA_IFINDEX_INDEV, | ||
437 | sizeof(tmp_uint), &tmp_uint); | ||
438 | if (skb->nf_bridge && skb->nf_bridge->physindev) { | ||
439 | tmp_uint = | ||
440 | htonl(skb->nf_bridge->physindev->ifindex); | ||
441 | NFA_PUT(inst->skb, NFULA_IFINDEX_PHYSINDEV, | ||
442 | sizeof(tmp_uint), &tmp_uint); | ||
443 | } | ||
444 | } | ||
445 | #endif | ||
446 | } | ||
447 | |||
448 | if (outdev) { | ||
449 | tmp_uint = htonl(outdev->ifindex); | ||
450 | #ifndef CONFIG_BRIDGE_NETFILTER | ||
451 | NFA_PUT(inst->skb, NFULA_IFINDEX_OUTDEV, sizeof(tmp_uint), | ||
452 | &tmp_uint); | ||
453 | #else | ||
454 | if (pf == PF_BRIDGE) { | ||
455 | /* Case 1: outdev is physical output device, we need to | ||
456 | * look for bridge group (when called from | ||
457 | * netfilter_bridge) */ | ||
458 | NFA_PUT(inst->skb, NFULA_IFINDEX_PHYSOUTDEV, | ||
459 | sizeof(tmp_uint), &tmp_uint); | ||
460 | /* this is the bridge group "brX" */ | ||
461 | tmp_uint = htonl(outdev->br_port->br->dev->ifindex); | ||
462 | NFA_PUT(inst->skb, NFULA_IFINDEX_OUTDEV, | ||
463 | sizeof(tmp_uint), &tmp_uint); | ||
464 | } else { | ||
465 | /* Case 2: indev is a bridge group, we need to look | ||
466 | * for physical device (when called from ipv4) */ | ||
467 | NFA_PUT(inst->skb, NFULA_IFINDEX_OUTDEV, | ||
468 | sizeof(tmp_uint), &tmp_uint); | ||
469 | if (skb->nf_bridge) { | ||
470 | tmp_uint = | ||
471 | htonl(skb->nf_bridge->physoutdev->ifindex); | ||
472 | NFA_PUT(inst->skb, NFULA_IFINDEX_PHYSOUTDEV, | ||
473 | sizeof(tmp_uint), &tmp_uint); | ||
474 | } | ||
475 | } | ||
476 | #endif | ||
477 | } | ||
478 | |||
479 | if (skb->nfmark) { | ||
480 | tmp_uint = htonl(skb->nfmark); | ||
481 | NFA_PUT(inst->skb, NFULA_MARK, sizeof(tmp_uint), &tmp_uint); | ||
482 | } | ||
483 | |||
484 | if (indev && skb->dev && skb->dev->hard_header_parse) { | ||
485 | struct nfulnl_msg_packet_hw phw; | ||
486 | |||
487 | phw.hw_addrlen = | ||
488 | skb->dev->hard_header_parse((struct sk_buff *)skb, | ||
489 | phw.hw_addr); | ||
490 | phw.hw_addrlen = htons(phw.hw_addrlen); | ||
491 | NFA_PUT(inst->skb, NFULA_HWADDR, sizeof(phw), &phw); | ||
492 | } | ||
493 | |||
494 | if (skb->tstamp.off_sec) { | ||
495 | struct nfulnl_msg_packet_timestamp ts; | ||
496 | |||
497 | ts.sec = cpu_to_be64(skb_tv_base.tv_sec + skb->tstamp.off_sec); | ||
498 | ts.usec = cpu_to_be64(skb_tv_base.tv_usec + skb->tstamp.off_usec); | ||
499 | |||
500 | NFA_PUT(inst->skb, NFULA_TIMESTAMP, sizeof(ts), &ts); | ||
501 | } | ||
502 | |||
503 | /* UID */ | ||
504 | if (skb->sk) { | ||
505 | read_lock_bh(&skb->sk->sk_callback_lock); | ||
506 | if (skb->sk->sk_socket && skb->sk->sk_socket->file) { | ||
507 | u_int32_t uid = htonl(skb->sk->sk_socket->file->f_uid); | ||
508 | /* need to unlock here since NFA_PUT may goto */ | ||
509 | read_unlock_bh(&skb->sk->sk_callback_lock); | ||
510 | NFA_PUT(inst->skb, NFULA_UID, sizeof(uid), &uid); | ||
511 | } else | ||
512 | read_unlock_bh(&skb->sk->sk_callback_lock); | ||
513 | } | ||
514 | |||
515 | if (data_len) { | ||
516 | struct nfattr *nfa; | ||
517 | int size = NFA_LENGTH(data_len); | ||
518 | |||
519 | if (skb_tailroom(inst->skb) < (int)NFA_SPACE(data_len)) { | ||
520 | printk(KERN_WARNING "nfnetlink_log: no tailroom!\n"); | ||
521 | goto nlmsg_failure; | ||
522 | } | ||
523 | |||
524 | nfa = (struct nfattr *)skb_put(inst->skb, NFA_ALIGN(size)); | ||
525 | nfa->nfa_type = NFULA_PAYLOAD; | ||
526 | nfa->nfa_len = size; | ||
527 | |||
528 | if (skb_copy_bits(skb, 0, NFA_DATA(nfa), data_len)) | ||
529 | BUG(); | ||
530 | } | ||
531 | |||
532 | nlh->nlmsg_len = inst->skb->tail - old_tail; | ||
533 | return 0; | ||
534 | |||
535 | nlmsg_failure: | ||
536 | UDEBUG("nlmsg_failure\n"); | ||
537 | nfattr_failure: | ||
538 | PRINTR(KERN_ERR "nfnetlink_log: error creating log nlmsg\n"); | ||
539 | return -1; | ||
540 | } | ||
541 | |||
542 | #define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0) | ||
543 | |||
544 | static struct nf_loginfo default_loginfo = { | ||
545 | .type = NF_LOG_TYPE_ULOG, | ||
546 | .u = { | ||
547 | .ulog = { | ||
548 | .copy_len = 0xffff, | ||
549 | .group = 0, | ||
550 | .qthreshold = 1, | ||
551 | }, | ||
552 | }, | ||
553 | }; | ||
554 | |||
555 | /* log handler for internal netfilter logging api */ | ||
556 | static void | ||
557 | nfulnl_log_packet(unsigned int pf, | ||
558 | unsigned int hooknum, | ||
559 | const struct sk_buff *skb, | ||
560 | const struct net_device *in, | ||
561 | const struct net_device *out, | ||
562 | const struct nf_loginfo *li_user, | ||
563 | const char *prefix) | ||
564 | { | ||
565 | unsigned int size, data_len; | ||
566 | struct nfulnl_instance *inst; | ||
567 | const struct nf_loginfo *li; | ||
568 | unsigned int qthreshold; | ||
569 | unsigned int nlbufsiz; | ||
570 | |||
571 | if (li_user && li_user->type == NF_LOG_TYPE_ULOG) | ||
572 | li = li_user; | ||
573 | else | ||
574 | li = &default_loginfo; | ||
575 | |||
576 | inst = instance_lookup_get(li->u.ulog.group); | ||
577 | if (!inst) | ||
578 | inst = instance_lookup_get(0); | ||
579 | if (!inst) { | ||
580 | PRINTR("nfnetlink_log: trying to log packet, " | ||
581 | "but no instance for group %u\n", li->u.ulog.group); | ||
582 | return; | ||
583 | } | ||
584 | |||
585 | /* all macros expand to constant values at compile time */ | ||
586 | /* FIXME: do we want to make the size calculation conditional based on | ||
587 | * what is actually present? way more branches and checks, but more | ||
588 | * memory efficient... */ | ||
589 | size = NLMSG_SPACE(sizeof(struct nfgenmsg)) | ||
590 | + NFA_SPACE(sizeof(struct nfulnl_msg_packet_hdr)) | ||
591 | + NFA_SPACE(sizeof(u_int32_t)) /* ifindex */ | ||
592 | + NFA_SPACE(sizeof(u_int32_t)) /* ifindex */ | ||
593 | #ifdef CONFIG_BRIDGE_NETFILTER | ||
594 | + NFA_SPACE(sizeof(u_int32_t)) /* ifindex */ | ||
595 | + NFA_SPACE(sizeof(u_int32_t)) /* ifindex */ | ||
596 | #endif | ||
597 | + NFA_SPACE(sizeof(u_int32_t)) /* mark */ | ||
598 | + NFA_SPACE(sizeof(u_int32_t)) /* uid */ | ||
599 | + NFA_SPACE(NFULNL_PREFIXLEN) /* prefix */ | ||
600 | + NFA_SPACE(sizeof(struct nfulnl_msg_packet_hw)) | ||
601 | + NFA_SPACE(sizeof(struct nfulnl_msg_packet_timestamp)); | ||
602 | |||
603 | UDEBUG("initial size=%u\n", size); | ||
604 | |||
605 | spin_lock_bh(&inst->lock); | ||
606 | |||
607 | qthreshold = inst->qthreshold; | ||
608 | /* per-rule qthreshold overrides per-instance */ | ||
609 | if (qthreshold > li->u.ulog.qthreshold) | ||
610 | qthreshold = li->u.ulog.qthreshold; | ||
611 | |||
612 | switch (inst->copy_mode) { | ||
613 | case NFULNL_COPY_META: | ||
614 | case NFULNL_COPY_NONE: | ||
615 | data_len = 0; | ||
616 | break; | ||
617 | |||
618 | case NFULNL_COPY_PACKET: | ||
619 | if (inst->copy_range == 0 | ||
620 | || inst->copy_range > skb->len) | ||
621 | data_len = skb->len; | ||
622 | else | ||
623 | data_len = inst->copy_range; | ||
624 | |||
625 | size += NFA_SPACE(data_len); | ||
626 | UDEBUG("copy_packet, therefore size now %u\n", size); | ||
627 | break; | ||
628 | |||
629 | default: | ||
630 | spin_unlock_bh(&inst->lock); | ||
631 | instance_put(inst); | ||
632 | return; | ||
633 | } | ||
634 | |||
635 | if (size > inst->nlbufsiz) | ||
636 | nlbufsiz = size; | ||
637 | else | ||
638 | nlbufsiz = inst->nlbufsiz; | ||
639 | |||
640 | if (!inst->skb) { | ||
641 | if (!(inst->skb = nfulnl_alloc_skb(nlbufsiz, size))) { | ||
642 | UDEBUG("error in nfulnl_alloc_skb(%u, %u)\n", | ||
643 | inst->nlbufsiz, size); | ||
644 | goto alloc_failure; | ||
645 | } | ||
646 | } else if (inst->qlen >= qthreshold || | ||
647 | size > skb_tailroom(inst->skb)) { | ||
648 | /* either the queue len is too high or we don't have | ||
649 | * enough room in the skb left. flush to userspace. */ | ||
650 | UDEBUG("flushing old skb\n"); | ||
651 | |||
652 | __nfulnl_send(inst); | ||
653 | |||
654 | if (!(inst->skb = nfulnl_alloc_skb(nlbufsiz, size))) { | ||
655 | UDEBUG("error in nfulnl_alloc_skb(%u, %u)\n", | ||
656 | inst->nlbufsiz, size); | ||
657 | goto alloc_failure; | ||
658 | } | ||
659 | } | ||
660 | |||
661 | UDEBUG("qlen %d, qthreshold %d\n", inst->qlen, qthreshold); | ||
662 | inst->qlen++; | ||
663 | |||
664 | __build_packet_message(inst, skb, data_len, pf, | ||
665 | hooknum, in, out, li, prefix); | ||
666 | |||
667 | /* timer_pending always called within inst->lock, so there | ||
668 | * is no chance of a race here */ | ||
669 | if (!timer_pending(&inst->timer)) { | ||
670 | instance_get(inst); | ||
671 | inst->timer.expires = jiffies + (inst->flushtimeout*HZ/100); | ||
672 | add_timer(&inst->timer); | ||
673 | } | ||
674 | spin_unlock_bh(&inst->lock); | ||
675 | |||
676 | return; | ||
677 | |||
678 | alloc_failure: | ||
679 | spin_unlock_bh(&inst->lock); | ||
680 | instance_put(inst); | ||
681 | UDEBUG("error allocating skb\n"); | ||
682 | /* FIXME: statistics */ | ||
683 | } | ||
684 | |||
685 | static int | ||
686 | nfulnl_rcv_nl_event(struct notifier_block *this, | ||
687 | unsigned long event, void *ptr) | ||
688 | { | ||
689 | struct netlink_notify *n = ptr; | ||
690 | |||
691 | if (event == NETLINK_URELEASE && | ||
692 | n->protocol == NETLINK_NETFILTER && n->pid) { | ||
693 | int i; | ||
694 | |||
695 | /* destroy all instances for this pid */ | ||
696 | write_lock_bh(&instances_lock); | ||
697 | for (i = 0; i < INSTANCE_BUCKETS; i++) { | ||
698 | struct hlist_node *tmp, *t2; | ||
699 | struct nfulnl_instance *inst; | ||
700 | struct hlist_head *head = &instance_table[i]; | ||
701 | |||
702 | hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) { | ||
703 | UDEBUG("node = %p\n", inst); | ||
704 | if (n->pid == inst->peer_pid) | ||
705 | __instance_destroy(inst); | ||
706 | } | ||
707 | } | ||
708 | write_unlock_bh(&instances_lock); | ||
709 | } | ||
710 | return NOTIFY_DONE; | ||
711 | } | ||
712 | |||
713 | static struct notifier_block nfulnl_rtnl_notifier = { | ||
714 | .notifier_call = nfulnl_rcv_nl_event, | ||
715 | }; | ||
716 | |||
717 | static int | ||
718 | nfulnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb, | ||
719 | struct nlmsghdr *nlh, struct nfattr *nfqa[], int *errp) | ||
720 | { | ||
721 | return -ENOTSUPP; | ||
722 | } | ||
723 | |||
724 | static struct nf_logger nfulnl_logger = { | ||
725 | .name = "nfnetlink_log", | ||
726 | .logfn = &nfulnl_log_packet, | ||
727 | .me = THIS_MODULE, | ||
728 | }; | ||
729 | |||
730 | static const int nfula_min[NFULA_MAX] = { | ||
731 | [NFULA_PACKET_HDR-1] = sizeof(struct nfulnl_msg_packet_hdr), | ||
732 | [NFULA_MARK-1] = sizeof(u_int32_t), | ||
733 | [NFULA_TIMESTAMP-1] = sizeof(struct nfulnl_msg_packet_timestamp), | ||
734 | [NFULA_IFINDEX_INDEV-1] = sizeof(u_int32_t), | ||
735 | [NFULA_IFINDEX_OUTDEV-1]= sizeof(u_int32_t), | ||
736 | [NFULA_HWADDR-1] = sizeof(struct nfulnl_msg_packet_hw), | ||
737 | [NFULA_PAYLOAD-1] = 0, | ||
738 | [NFULA_PREFIX-1] = 0, | ||
739 | [NFULA_UID-1] = sizeof(u_int32_t), | ||
740 | }; | ||
741 | |||
742 | static const int nfula_cfg_min[NFULA_CFG_MAX] = { | ||
743 | [NFULA_CFG_CMD-1] = sizeof(struct nfulnl_msg_config_cmd), | ||
744 | [NFULA_CFG_MODE-1] = sizeof(struct nfulnl_msg_config_mode), | ||
745 | [NFULA_CFG_TIMEOUT-1] = sizeof(u_int32_t), | ||
746 | [NFULA_CFG_QTHRESH-1] = sizeof(u_int32_t), | ||
747 | [NFULA_CFG_NLBUFSIZ-1] = sizeof(u_int32_t), | ||
748 | }; | ||
749 | |||
750 | static int | ||
751 | nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb, | ||
752 | struct nlmsghdr *nlh, struct nfattr *nfula[], int *errp) | ||
753 | { | ||
754 | struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); | ||
755 | u_int16_t group_num = ntohs(nfmsg->res_id); | ||
756 | struct nfulnl_instance *inst; | ||
757 | int ret = 0; | ||
758 | |||
759 | UDEBUG("entering for msg %u\n", NFNL_MSG_TYPE(nlh->nlmsg_type)); | ||
760 | |||
761 | if (nfattr_bad_size(nfula, NFULA_CFG_MAX, nfula_cfg_min)) { | ||
762 | UDEBUG("bad attribute size\n"); | ||
763 | return -EINVAL; | ||
764 | } | ||
765 | |||
766 | inst = instance_lookup_get(group_num); | ||
767 | if (nfula[NFULA_CFG_CMD-1]) { | ||
768 | u_int8_t pf = nfmsg->nfgen_family; | ||
769 | struct nfulnl_msg_config_cmd *cmd; | ||
770 | cmd = NFA_DATA(nfula[NFULA_CFG_CMD-1]); | ||
771 | UDEBUG("found CFG_CMD for\n"); | ||
772 | |||
773 | switch (cmd->command) { | ||
774 | case NFULNL_CFG_CMD_BIND: | ||
775 | if (inst) { | ||
776 | ret = -EBUSY; | ||
777 | goto out_put; | ||
778 | } | ||
779 | |||
780 | inst = instance_create(group_num, | ||
781 | NETLINK_CB(skb).pid); | ||
782 | if (!inst) { | ||
783 | ret = -EINVAL; | ||
784 | goto out_put; | ||
785 | } | ||
786 | break; | ||
787 | case NFULNL_CFG_CMD_UNBIND: | ||
788 | if (!inst) { | ||
789 | ret = -ENODEV; | ||
790 | goto out_put; | ||
791 | } | ||
792 | |||
793 | if (inst->peer_pid != NETLINK_CB(skb).pid) { | ||
794 | ret = -EPERM; | ||
795 | goto out_put; | ||
796 | } | ||
797 | |||
798 | instance_destroy(inst); | ||
799 | break; | ||
800 | case NFULNL_CFG_CMD_PF_BIND: | ||
801 | UDEBUG("registering log handler for pf=%u\n", pf); | ||
802 | ret = nf_log_register(pf, &nfulnl_logger); | ||
803 | break; | ||
804 | case NFULNL_CFG_CMD_PF_UNBIND: | ||
805 | UDEBUG("unregistering log handler for pf=%u\n", pf); | ||
806 | /* This is a bug and a feature. We cannot unregister | ||
807 | * other handlers, like nfnetlink_inst can */ | ||
808 | nf_log_unregister_pf(pf); | ||
809 | break; | ||
810 | default: | ||
811 | ret = -EINVAL; | ||
812 | break; | ||
813 | } | ||
814 | } else { | ||
815 | if (!inst) { | ||
816 | UDEBUG("no config command, and no instance for " | ||
817 | "group=%u pid=%u =>ENOENT\n", | ||
818 | group_num, NETLINK_CB(skb).pid); | ||
819 | ret = -ENOENT; | ||
820 | goto out_put; | ||
821 | } | ||
822 | |||
823 | if (inst->peer_pid != NETLINK_CB(skb).pid) { | ||
824 | UDEBUG("no config command, and wrong pid\n"); | ||
825 | ret = -EPERM; | ||
826 | goto out_put; | ||
827 | } | ||
828 | } | ||
829 | |||
830 | if (nfula[NFULA_CFG_MODE-1]) { | ||
831 | struct nfulnl_msg_config_mode *params; | ||
832 | params = NFA_DATA(nfula[NFULA_CFG_MODE-1]); | ||
833 | |||
834 | nfulnl_set_mode(inst, params->copy_mode, | ||
835 | ntohs(params->copy_range)); | ||
836 | } | ||
837 | |||
838 | if (nfula[NFULA_CFG_TIMEOUT-1]) { | ||
839 | u_int32_t timeout = | ||
840 | *(u_int32_t *)NFA_DATA(nfula[NFULA_CFG_TIMEOUT-1]); | ||
841 | |||
842 | nfulnl_set_timeout(inst, ntohl(timeout)); | ||
843 | } | ||
844 | |||
845 | if (nfula[NFULA_CFG_NLBUFSIZ-1]) { | ||
846 | u_int32_t nlbufsiz = | ||
847 | *(u_int32_t *)NFA_DATA(nfula[NFULA_CFG_NLBUFSIZ-1]); | ||
848 | |||
849 | nfulnl_set_nlbufsiz(inst, ntohl(nlbufsiz)); | ||
850 | } | ||
851 | |||
852 | if (nfula[NFULA_CFG_QTHRESH-1]) { | ||
853 | u_int32_t qthresh = | ||
854 | *(u_int16_t *)NFA_DATA(nfula[NFULA_CFG_QTHRESH-1]); | ||
855 | |||
856 | nfulnl_set_qthresh(inst, ntohl(qthresh)); | ||
857 | } | ||
858 | |||
859 | out_put: | ||
860 | instance_put(inst); | ||
861 | return ret; | ||
862 | } | ||
863 | |||
864 | static struct nfnl_callback nfulnl_cb[NFULNL_MSG_MAX] = { | ||
865 | [NFULNL_MSG_PACKET] = { .call = nfulnl_recv_unsupp, | ||
866 | .attr_count = NFULA_MAX, | ||
867 | .cap_required = CAP_NET_ADMIN, }, | ||
868 | [NFULNL_MSG_CONFIG] = { .call = nfulnl_recv_config, | ||
869 | .attr_count = NFULA_CFG_MAX, | ||
870 | .cap_required = CAP_NET_ADMIN }, | ||
871 | }; | ||
872 | |||
873 | static struct nfnetlink_subsystem nfulnl_subsys = { | ||
874 | .name = "log", | ||
875 | .subsys_id = NFNL_SUBSYS_ULOG, | ||
876 | .cb_count = NFULNL_MSG_MAX, | ||
877 | .cb = nfulnl_cb, | ||
878 | }; | ||
879 | |||
880 | #ifdef CONFIG_PROC_FS | ||
881 | struct iter_state { | ||
882 | unsigned int bucket; | ||
883 | }; | ||
884 | |||
885 | static struct hlist_node *get_first(struct seq_file *seq) | ||
886 | { | ||
887 | struct iter_state *st = seq->private; | ||
888 | |||
889 | if (!st) | ||
890 | return NULL; | ||
891 | |||
892 | for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) { | ||
893 | if (!hlist_empty(&instance_table[st->bucket])) | ||
894 | return instance_table[st->bucket].first; | ||
895 | } | ||
896 | return NULL; | ||
897 | } | ||
898 | |||
899 | static struct hlist_node *get_next(struct seq_file *seq, struct hlist_node *h) | ||
900 | { | ||
901 | struct iter_state *st = seq->private; | ||
902 | |||
903 | h = h->next; | ||
904 | while (!h) { | ||
905 | if (++st->bucket >= INSTANCE_BUCKETS) | ||
906 | return NULL; | ||
907 | |||
908 | h = instance_table[st->bucket].first; | ||
909 | } | ||
910 | return h; | ||
911 | } | ||
912 | |||
913 | static struct hlist_node *get_idx(struct seq_file *seq, loff_t pos) | ||
914 | { | ||
915 | struct hlist_node *head; | ||
916 | head = get_first(seq); | ||
917 | |||
918 | if (head) | ||
919 | while (pos && (head = get_next(seq, head))) | ||
920 | pos--; | ||
921 | return pos ? NULL : head; | ||
922 | } | ||
923 | |||
924 | static void *seq_start(struct seq_file *seq, loff_t *pos) | ||
925 | { | ||
926 | read_lock_bh(&instances_lock); | ||
927 | return get_idx(seq, *pos); | ||
928 | } | ||
929 | |||
930 | static void *seq_next(struct seq_file *s, void *v, loff_t *pos) | ||
931 | { | ||
932 | (*pos)++; | ||
933 | return get_next(s, v); | ||
934 | } | ||
935 | |||
936 | static void seq_stop(struct seq_file *s, void *v) | ||
937 | { | ||
938 | read_unlock_bh(&instances_lock); | ||
939 | } | ||
940 | |||
941 | static int seq_show(struct seq_file *s, void *v) | ||
942 | { | ||
943 | const struct nfulnl_instance *inst = v; | ||
944 | |||
945 | return seq_printf(s, "%5d %6d %5d %1d %5d %6d %2d\n", | ||
946 | inst->group_num, | ||
947 | inst->peer_pid, inst->qlen, | ||
948 | inst->copy_mode, inst->copy_range, | ||
949 | inst->flushtimeout, atomic_read(&inst->use)); | ||
950 | } | ||
951 | |||
952 | static struct seq_operations nful_seq_ops = { | ||
953 | .start = seq_start, | ||
954 | .next = seq_next, | ||
955 | .stop = seq_stop, | ||
956 | .show = seq_show, | ||
957 | }; | ||
958 | |||
959 | static int nful_open(struct inode *inode, struct file *file) | ||
960 | { | ||
961 | struct seq_file *seq; | ||
962 | struct iter_state *is; | ||
963 | int ret; | ||
964 | |||
965 | is = kmalloc(sizeof(*is), GFP_KERNEL); | ||
966 | if (!is) | ||
967 | return -ENOMEM; | ||
968 | memset(is, 0, sizeof(*is)); | ||
969 | ret = seq_open(file, &nful_seq_ops); | ||
970 | if (ret < 0) | ||
971 | goto out_free; | ||
972 | seq = file->private_data; | ||
973 | seq->private = is; | ||
974 | return ret; | ||
975 | out_free: | ||
976 | kfree(is); | ||
977 | return ret; | ||
978 | } | ||
979 | |||
980 | static struct file_operations nful_file_ops = { | ||
981 | .owner = THIS_MODULE, | ||
982 | .open = nful_open, | ||
983 | .read = seq_read, | ||
984 | .llseek = seq_lseek, | ||
985 | .release = seq_release_private, | ||
986 | }; | ||
987 | |||
988 | #endif /* PROC_FS */ | ||
989 | |||
990 | static int | ||
991 | init_or_cleanup(int init) | ||
992 | { | ||
993 | int i, status = -ENOMEM; | ||
994 | #ifdef CONFIG_PROC_FS | ||
995 | struct proc_dir_entry *proc_nful; | ||
996 | #endif | ||
997 | |||
998 | if (!init) | ||
999 | goto cleanup; | ||
1000 | |||
1001 | for (i = 0; i < INSTANCE_BUCKETS; i++) | ||
1002 | INIT_HLIST_HEAD(&instance_table[i]); | ||
1003 | |||
1004 | /* it's not really all that important to have a random value, so | ||
1005 | * we can do this from the init function, even if there hasn't | ||
1006 | * been that much entropy yet */ | ||
1007 | get_random_bytes(&hash_init, sizeof(hash_init)); | ||
1008 | |||
1009 | netlink_register_notifier(&nfulnl_rtnl_notifier); | ||
1010 | status = nfnetlink_subsys_register(&nfulnl_subsys); | ||
1011 | if (status < 0) { | ||
1012 | printk(KERN_ERR "log: failed to create netlink socket\n"); | ||
1013 | goto cleanup_netlink_notifier; | ||
1014 | } | ||
1015 | |||
1016 | #ifdef CONFIG_PROC_FS | ||
1017 | proc_nful = create_proc_entry("nfnetlink_log", 0440, | ||
1018 | proc_net_netfilter); | ||
1019 | if (!proc_nful) | ||
1020 | goto cleanup_subsys; | ||
1021 | proc_nful->proc_fops = &nful_file_ops; | ||
1022 | #endif | ||
1023 | |||
1024 | return status; | ||
1025 | |||
1026 | cleanup: | ||
1027 | nf_log_unregister_logger(&nfulnl_logger); | ||
1028 | #ifdef CONFIG_PROC_FS | ||
1029 | remove_proc_entry("nfnetlink_log", proc_net_netfilter); | ||
1030 | cleanup_subsys: | ||
1031 | #endif | ||
1032 | nfnetlink_subsys_unregister(&nfulnl_subsys); | ||
1033 | cleanup_netlink_notifier: | ||
1034 | netlink_unregister_notifier(&nfulnl_rtnl_notifier); | ||
1035 | return status; | ||
1036 | } | ||
1037 | |||
1038 | static int __init init(void) | ||
1039 | { | ||
1040 | |||
1041 | return init_or_cleanup(1); | ||
1042 | } | ||
1043 | |||
1044 | static void __exit fini(void) | ||
1045 | { | ||
1046 | init_or_cleanup(0); | ||
1047 | } | ||
1048 | |||
1049 | MODULE_DESCRIPTION("netfilter userspace logging"); | ||
1050 | MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); | ||
1051 | MODULE_LICENSE("GPL"); | ||
1052 | MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_ULOG); | ||
1053 | |||
1054 | module_init(init); | ||
1055 | module_exit(fini); | ||
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c new file mode 100644 index 000000000000..e3a5285329af --- /dev/null +++ b/net/netfilter/nfnetlink_queue.c | |||
@@ -0,0 +1,1132 @@ | |||
1 | /* | ||
2 | * This is a module which is used for queueing packets and communicating with | ||
3 | * userspace via nfetlink. | ||
4 | * | ||
5 | * (C) 2005 by Harald Welte <laforge@netfilter.org> | ||
6 | * | ||
7 | * Based on the old ipv4-only ip_queue.c: | ||
8 | * (C) 2000-2002 James Morris <jmorris@intercode.com.au> | ||
9 | * (C) 2003-2005 Netfilter Core Team <coreteam@netfilter.org> | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify | ||
12 | * it under the terms of the GNU General Public License version 2 as | ||
13 | * published by the Free Software Foundation. | ||
14 | * | ||
15 | */ | ||
16 | #include <linux/module.h> | ||
17 | #include <linux/skbuff.h> | ||
18 | #include <linux/init.h> | ||
19 | #include <linux/spinlock.h> | ||
20 | #include <linux/notifier.h> | ||
21 | #include <linux/netdevice.h> | ||
22 | #include <linux/netfilter.h> | ||
23 | #include <linux/proc_fs.h> | ||
24 | #include <linux/netfilter_ipv4.h> | ||
25 | #include <linux/netfilter_ipv6.h> | ||
26 | #include <linux/netfilter/nfnetlink.h> | ||
27 | #include <linux/netfilter/nfnetlink_queue.h> | ||
28 | #include <linux/list.h> | ||
29 | #include <net/sock.h> | ||
30 | |||
31 | #include <asm/atomic.h> | ||
32 | |||
33 | #ifdef CONFIG_BRIDGE_NETFILTER | ||
34 | #include "../bridge/br_private.h" | ||
35 | #endif | ||
36 | |||
37 | #define NFQNL_QMAX_DEFAULT 1024 | ||
38 | |||
39 | #if 0 | ||
40 | #define QDEBUG(x, args ...) printk(KERN_DEBUG "%s(%d):%s(): " x, \ | ||
41 | __FILE__, __LINE__, __FUNCTION__, \ | ||
42 | ## args) | ||
43 | #else | ||
44 | #define QDEBUG(x, ...) | ||
45 | #endif | ||
46 | |||
47 | struct nfqnl_queue_entry { | ||
48 | struct list_head list; | ||
49 | struct nf_info *info; | ||
50 | struct sk_buff *skb; | ||
51 | unsigned int id; | ||
52 | }; | ||
53 | |||
54 | struct nfqnl_instance { | ||
55 | struct hlist_node hlist; /* global list of queues */ | ||
56 | atomic_t use; | ||
57 | |||
58 | int peer_pid; | ||
59 | unsigned int queue_maxlen; | ||
60 | unsigned int copy_range; | ||
61 | unsigned int queue_total; | ||
62 | unsigned int queue_dropped; | ||
63 | unsigned int queue_user_dropped; | ||
64 | |||
65 | atomic_t id_sequence; /* 'sequence' of pkt ids */ | ||
66 | |||
67 | u_int16_t queue_num; /* number of this queue */ | ||
68 | u_int8_t copy_mode; | ||
69 | |||
70 | spinlock_t lock; | ||
71 | |||
72 | struct list_head queue_list; /* packets in queue */ | ||
73 | }; | ||
74 | |||
75 | typedef int (*nfqnl_cmpfn)(struct nfqnl_queue_entry *, unsigned long); | ||
76 | |||
77 | static DEFINE_RWLOCK(instances_lock); | ||
78 | |||
79 | u_int64_t htonll(u_int64_t in) | ||
80 | { | ||
81 | u_int64_t out; | ||
82 | int i; | ||
83 | |||
84 | for (i = 0; i < sizeof(u_int64_t); i++) | ||
85 | ((u_int8_t *)&out)[sizeof(u_int64_t)-1] = ((u_int8_t *)&in)[i]; | ||
86 | |||
87 | return out; | ||
88 | } | ||
89 | |||
90 | #define INSTANCE_BUCKETS 16 | ||
91 | static struct hlist_head instance_table[INSTANCE_BUCKETS]; | ||
92 | |||
93 | static inline u_int8_t instance_hashfn(u_int16_t queue_num) | ||
94 | { | ||
95 | return ((queue_num >> 8) | queue_num) % INSTANCE_BUCKETS; | ||
96 | } | ||
97 | |||
98 | static struct nfqnl_instance * | ||
99 | __instance_lookup(u_int16_t queue_num) | ||
100 | { | ||
101 | struct hlist_head *head; | ||
102 | struct hlist_node *pos; | ||
103 | struct nfqnl_instance *inst; | ||
104 | |||
105 | head = &instance_table[instance_hashfn(queue_num)]; | ||
106 | hlist_for_each_entry(inst, pos, head, hlist) { | ||
107 | if (inst->queue_num == queue_num) | ||
108 | return inst; | ||
109 | } | ||
110 | return NULL; | ||
111 | } | ||
112 | |||
113 | static struct nfqnl_instance * | ||
114 | instance_lookup_get(u_int16_t queue_num) | ||
115 | { | ||
116 | struct nfqnl_instance *inst; | ||
117 | |||
118 | read_lock_bh(&instances_lock); | ||
119 | inst = __instance_lookup(queue_num); | ||
120 | if (inst) | ||
121 | atomic_inc(&inst->use); | ||
122 | read_unlock_bh(&instances_lock); | ||
123 | |||
124 | return inst; | ||
125 | } | ||
126 | |||
127 | static void | ||
128 | instance_put(struct nfqnl_instance *inst) | ||
129 | { | ||
130 | if (inst && atomic_dec_and_test(&inst->use)) { | ||
131 | QDEBUG("kfree(inst=%p)\n", inst); | ||
132 | kfree(inst); | ||
133 | } | ||
134 | } | ||
135 | |||
136 | static struct nfqnl_instance * | ||
137 | instance_create(u_int16_t queue_num, int pid) | ||
138 | { | ||
139 | struct nfqnl_instance *inst; | ||
140 | |||
141 | QDEBUG("entering for queue_num=%u, pid=%d\n", queue_num, pid); | ||
142 | |||
143 | write_lock_bh(&instances_lock); | ||
144 | if (__instance_lookup(queue_num)) { | ||
145 | inst = NULL; | ||
146 | QDEBUG("aborting, instance already exists\n"); | ||
147 | goto out_unlock; | ||
148 | } | ||
149 | |||
150 | inst = kmalloc(sizeof(*inst), GFP_ATOMIC); | ||
151 | if (!inst) | ||
152 | goto out_unlock; | ||
153 | |||
154 | memset(inst, 0, sizeof(*inst)); | ||
155 | inst->queue_num = queue_num; | ||
156 | inst->peer_pid = pid; | ||
157 | inst->queue_maxlen = NFQNL_QMAX_DEFAULT; | ||
158 | inst->copy_range = 0xfffff; | ||
159 | inst->copy_mode = NFQNL_COPY_NONE; | ||
160 | atomic_set(&inst->id_sequence, 0); | ||
161 | /* needs to be two, since we _put() after creation */ | ||
162 | atomic_set(&inst->use, 2); | ||
163 | inst->lock = SPIN_LOCK_UNLOCKED; | ||
164 | INIT_LIST_HEAD(&inst->queue_list); | ||
165 | |||
166 | if (!try_module_get(THIS_MODULE)) | ||
167 | goto out_free; | ||
168 | |||
169 | hlist_add_head(&inst->hlist, | ||
170 | &instance_table[instance_hashfn(queue_num)]); | ||
171 | |||
172 | write_unlock_bh(&instances_lock); | ||
173 | |||
174 | QDEBUG("successfully created new instance\n"); | ||
175 | |||
176 | return inst; | ||
177 | |||
178 | out_free: | ||
179 | kfree(inst); | ||
180 | out_unlock: | ||
181 | write_unlock_bh(&instances_lock); | ||
182 | return NULL; | ||
183 | } | ||
184 | |||
185 | static void nfqnl_flush(struct nfqnl_instance *queue, int verdict); | ||
186 | |||
187 | static void | ||
188 | _instance_destroy2(struct nfqnl_instance *inst, int lock) | ||
189 | { | ||
190 | /* first pull it out of the global list */ | ||
191 | if (lock) | ||
192 | write_lock_bh(&instances_lock); | ||
193 | |||
194 | QDEBUG("removing instance %p (queuenum=%u) from hash\n", | ||
195 | inst, inst->queue_num); | ||
196 | hlist_del(&inst->hlist); | ||
197 | |||
198 | if (lock) | ||
199 | write_unlock_bh(&instances_lock); | ||
200 | |||
201 | /* then flush all pending skbs from the queue */ | ||
202 | nfqnl_flush(inst, NF_DROP); | ||
203 | |||
204 | /* and finally put the refcount */ | ||
205 | instance_put(inst); | ||
206 | |||
207 | module_put(THIS_MODULE); | ||
208 | } | ||
209 | |||
210 | static inline void | ||
211 | __instance_destroy(struct nfqnl_instance *inst) | ||
212 | { | ||
213 | _instance_destroy2(inst, 0); | ||
214 | } | ||
215 | |||
216 | static inline void | ||
217 | instance_destroy(struct nfqnl_instance *inst) | ||
218 | { | ||
219 | _instance_destroy2(inst, 1); | ||
220 | } | ||
221 | |||
222 | |||
223 | |||
224 | static void | ||
225 | issue_verdict(struct nfqnl_queue_entry *entry, int verdict) | ||
226 | { | ||
227 | QDEBUG("entering for entry %p, verdict %u\n", entry, verdict); | ||
228 | |||
229 | /* TCP input path (and probably other bits) assume to be called | ||
230 | * from softirq context, not from syscall, like issue_verdict is | ||
231 | * called. TCP input path deadlocks with locks taken from timer | ||
232 | * softirq, e.g. We therefore emulate this by local_bh_disable() */ | ||
233 | |||
234 | local_bh_disable(); | ||
235 | nf_reinject(entry->skb, entry->info, verdict); | ||
236 | local_bh_enable(); | ||
237 | |||
238 | kfree(entry); | ||
239 | } | ||
240 | |||
241 | static inline void | ||
242 | __enqueue_entry(struct nfqnl_instance *queue, | ||
243 | struct nfqnl_queue_entry *entry) | ||
244 | { | ||
245 | list_add(&entry->list, &queue->queue_list); | ||
246 | queue->queue_total++; | ||
247 | } | ||
248 | |||
249 | /* | ||
250 | * Find and return a queued entry matched by cmpfn, or return the last | ||
251 | * entry if cmpfn is NULL. | ||
252 | */ | ||
253 | static inline struct nfqnl_queue_entry * | ||
254 | __find_entry(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, | ||
255 | unsigned long data) | ||
256 | { | ||
257 | struct list_head *p; | ||
258 | |||
259 | list_for_each_prev(p, &queue->queue_list) { | ||
260 | struct nfqnl_queue_entry *entry = (struct nfqnl_queue_entry *)p; | ||
261 | |||
262 | if (!cmpfn || cmpfn(entry, data)) | ||
263 | return entry; | ||
264 | } | ||
265 | return NULL; | ||
266 | } | ||
267 | |||
268 | static inline void | ||
269 | __dequeue_entry(struct nfqnl_instance *q, struct nfqnl_queue_entry *entry) | ||
270 | { | ||
271 | list_del(&entry->list); | ||
272 | q->queue_total--; | ||
273 | } | ||
274 | |||
275 | static inline struct nfqnl_queue_entry * | ||
276 | __find_dequeue_entry(struct nfqnl_instance *queue, | ||
277 | nfqnl_cmpfn cmpfn, unsigned long data) | ||
278 | { | ||
279 | struct nfqnl_queue_entry *entry; | ||
280 | |||
281 | entry = __find_entry(queue, cmpfn, data); | ||
282 | if (entry == NULL) | ||
283 | return NULL; | ||
284 | |||
285 | __dequeue_entry(queue, entry); | ||
286 | return entry; | ||
287 | } | ||
288 | |||
289 | |||
290 | static inline void | ||
291 | __nfqnl_flush(struct nfqnl_instance *queue, int verdict) | ||
292 | { | ||
293 | struct nfqnl_queue_entry *entry; | ||
294 | |||
295 | while ((entry = __find_dequeue_entry(queue, NULL, 0))) | ||
296 | issue_verdict(entry, verdict); | ||
297 | } | ||
298 | |||
299 | static inline int | ||
300 | __nfqnl_set_mode(struct nfqnl_instance *queue, | ||
301 | unsigned char mode, unsigned int range) | ||
302 | { | ||
303 | int status = 0; | ||
304 | |||
305 | switch (mode) { | ||
306 | case NFQNL_COPY_NONE: | ||
307 | case NFQNL_COPY_META: | ||
308 | queue->copy_mode = mode; | ||
309 | queue->copy_range = 0; | ||
310 | break; | ||
311 | |||
312 | case NFQNL_COPY_PACKET: | ||
313 | queue->copy_mode = mode; | ||
314 | /* we're using struct nfattr which has 16bit nfa_len */ | ||
315 | if (range > 0xffff) | ||
316 | queue->copy_range = 0xffff; | ||
317 | else | ||
318 | queue->copy_range = range; | ||
319 | break; | ||
320 | |||
321 | default: | ||
322 | status = -EINVAL; | ||
323 | |||
324 | } | ||
325 | return status; | ||
326 | } | ||
327 | |||
328 | static struct nfqnl_queue_entry * | ||
329 | find_dequeue_entry(struct nfqnl_instance *queue, | ||
330 | nfqnl_cmpfn cmpfn, unsigned long data) | ||
331 | { | ||
332 | struct nfqnl_queue_entry *entry; | ||
333 | |||
334 | spin_lock_bh(&queue->lock); | ||
335 | entry = __find_dequeue_entry(queue, cmpfn, data); | ||
336 | spin_unlock_bh(&queue->lock); | ||
337 | |||
338 | return entry; | ||
339 | } | ||
340 | |||
341 | static void | ||
342 | nfqnl_flush(struct nfqnl_instance *queue, int verdict) | ||
343 | { | ||
344 | spin_lock_bh(&queue->lock); | ||
345 | __nfqnl_flush(queue, verdict); | ||
346 | spin_unlock_bh(&queue->lock); | ||
347 | } | ||
348 | |||
349 | static struct sk_buff * | ||
350 | nfqnl_build_packet_message(struct nfqnl_instance *queue, | ||
351 | struct nfqnl_queue_entry *entry, int *errp) | ||
352 | { | ||
353 | unsigned char *old_tail; | ||
354 | size_t size; | ||
355 | size_t data_len = 0; | ||
356 | struct sk_buff *skb; | ||
357 | struct nfqnl_msg_packet_hdr pmsg; | ||
358 | struct nlmsghdr *nlh; | ||
359 | struct nfgenmsg *nfmsg; | ||
360 | unsigned int tmp_uint; | ||
361 | |||
362 | QDEBUG("entered\n"); | ||
363 | |||
364 | /* all macros expand to constant values at compile time */ | ||
365 | size = NLMSG_SPACE(sizeof(struct nfqnl_msg_packet_hdr)) | ||
366 | + NLMSG_SPACE(sizeof(u_int32_t)) /* ifindex */ | ||
367 | + NLMSG_SPACE(sizeof(u_int32_t)) /* ifindex */ | ||
368 | #ifdef CONFIG_BRIDGE_NETFILTER | ||
369 | + NLMSG_SPACE(sizeof(u_int32_t)) /* ifindex */ | ||
370 | + NLMSG_SPACE(sizeof(u_int32_t)) /* ifindex */ | ||
371 | #endif | ||
372 | + NLMSG_SPACE(sizeof(u_int32_t)) /* mark */ | ||
373 | + NLMSG_SPACE(sizeof(struct nfqnl_msg_packet_hw)) | ||
374 | + NLMSG_SPACE(sizeof(struct nfqnl_msg_packet_timestamp)); | ||
375 | |||
376 | spin_lock_bh(&queue->lock); | ||
377 | |||
378 | switch (queue->copy_mode) { | ||
379 | case NFQNL_COPY_META: | ||
380 | case NFQNL_COPY_NONE: | ||
381 | data_len = 0; | ||
382 | break; | ||
383 | |||
384 | case NFQNL_COPY_PACKET: | ||
385 | if (queue->copy_range == 0 | ||
386 | || queue->copy_range > entry->skb->len) | ||
387 | data_len = entry->skb->len; | ||
388 | else | ||
389 | data_len = queue->copy_range; | ||
390 | |||
391 | size += NLMSG_SPACE(data_len); | ||
392 | break; | ||
393 | |||
394 | default: | ||
395 | *errp = -EINVAL; | ||
396 | spin_unlock_bh(&queue->lock); | ||
397 | return NULL; | ||
398 | } | ||
399 | |||
400 | spin_unlock_bh(&queue->lock); | ||
401 | |||
402 | skb = alloc_skb(size, GFP_ATOMIC); | ||
403 | if (!skb) | ||
404 | goto nlmsg_failure; | ||
405 | |||
406 | old_tail= skb->tail; | ||
407 | nlh = NLMSG_PUT(skb, 0, 0, | ||
408 | NFNL_SUBSYS_QUEUE << 8 | NFQNL_MSG_PACKET, | ||
409 | sizeof(struct nfgenmsg)); | ||
410 | nfmsg = NLMSG_DATA(nlh); | ||
411 | nfmsg->nfgen_family = entry->info->pf; | ||
412 | nfmsg->version = NFNETLINK_V0; | ||
413 | nfmsg->res_id = htons(queue->queue_num); | ||
414 | |||
415 | pmsg.packet_id = htonl(entry->id); | ||
416 | pmsg.hw_protocol = htons(entry->skb->protocol); | ||
417 | pmsg.hook = entry->info->hook; | ||
418 | |||
419 | NFA_PUT(skb, NFQA_PACKET_HDR, sizeof(pmsg), &pmsg); | ||
420 | |||
421 | if (entry->info->indev) { | ||
422 | tmp_uint = htonl(entry->info->indev->ifindex); | ||
423 | #ifndef CONFIG_BRIDGE_NETFILTER | ||
424 | NFA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint), &tmp_uint); | ||
425 | #else | ||
426 | if (entry->info->pf == PF_BRIDGE) { | ||
427 | /* Case 1: indev is physical input device, we need to | ||
428 | * look for bridge group (when called from | ||
429 | * netfilter_bridge) */ | ||
430 | NFA_PUT(skb, NFQA_IFINDEX_PHYSINDEV, sizeof(tmp_uint), | ||
431 | &tmp_uint); | ||
432 | /* this is the bridge group "brX" */ | ||
433 | tmp_uint = htonl(entry->info->indev->br_port->br->dev->ifindex); | ||
434 | NFA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint), | ||
435 | &tmp_uint); | ||
436 | } else { | ||
437 | /* Case 2: indev is bridge group, we need to look for | ||
438 | * physical device (when called from ipv4) */ | ||
439 | NFA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint), | ||
440 | &tmp_uint); | ||
441 | if (entry->skb->nf_bridge | ||
442 | && entry->skb->nf_bridge->physindev) { | ||
443 | tmp_uint = htonl(entry->skb->nf_bridge->physindev->ifindex); | ||
444 | NFA_PUT(skb, NFQA_IFINDEX_PHYSINDEV, | ||
445 | sizeof(tmp_uint), &tmp_uint); | ||
446 | } | ||
447 | } | ||
448 | #endif | ||
449 | } | ||
450 | |||
451 | if (entry->info->outdev) { | ||
452 | tmp_uint = htonl(entry->info->outdev->ifindex); | ||
453 | #ifndef CONFIG_BRIDGE_NETFILTER | ||
454 | NFA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint), &tmp_uint); | ||
455 | #else | ||
456 | if (entry->info->pf == PF_BRIDGE) { | ||
457 | /* Case 1: outdev is physical output device, we need to | ||
458 | * look for bridge group (when called from | ||
459 | * netfilter_bridge) */ | ||
460 | NFA_PUT(skb, NFQA_IFINDEX_PHYSOUTDEV, sizeof(tmp_uint), | ||
461 | &tmp_uint); | ||
462 | /* this is the bridge group "brX" */ | ||
463 | tmp_uint = htonl(entry->info->outdev->br_port->br->dev->ifindex); | ||
464 | NFA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint), | ||
465 | &tmp_uint); | ||
466 | } else { | ||
467 | /* Case 2: outdev is bridge group, we need to look for | ||
468 | * physical output device (when called from ipv4) */ | ||
469 | NFA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint), | ||
470 | &tmp_uint); | ||
471 | if (entry->skb->nf_bridge | ||
472 | && entry->skb->nf_bridge->physoutdev) { | ||
473 | tmp_uint = htonl(entry->skb->nf_bridge->physoutdev->ifindex); | ||
474 | NFA_PUT(skb, NFQA_IFINDEX_PHYSOUTDEV, | ||
475 | sizeof(tmp_uint), &tmp_uint); | ||
476 | } | ||
477 | } | ||
478 | #endif | ||
479 | } | ||
480 | |||
481 | if (entry->skb->nfmark) { | ||
482 | tmp_uint = htonl(entry->skb->nfmark); | ||
483 | NFA_PUT(skb, NFQA_MARK, sizeof(u_int32_t), &tmp_uint); | ||
484 | } | ||
485 | |||
486 | if (entry->info->indev && entry->skb->dev | ||
487 | && entry->skb->dev->hard_header_parse) { | ||
488 | struct nfqnl_msg_packet_hw phw; | ||
489 | |||
490 | phw.hw_addrlen = | ||
491 | entry->skb->dev->hard_header_parse(entry->skb, | ||
492 | phw.hw_addr); | ||
493 | phw.hw_addrlen = htons(phw.hw_addrlen); | ||
494 | NFA_PUT(skb, NFQA_HWADDR, sizeof(phw), &phw); | ||
495 | } | ||
496 | |||
497 | if (entry->skb->tstamp.off_sec) { | ||
498 | struct nfqnl_msg_packet_timestamp ts; | ||
499 | |||
500 | ts.sec = htonll(skb_tv_base.tv_sec + entry->skb->tstamp.off_sec); | ||
501 | ts.usec = htonll(skb_tv_base.tv_usec + entry->skb->tstamp.off_usec); | ||
502 | |||
503 | NFA_PUT(skb, NFQA_TIMESTAMP, sizeof(ts), &ts); | ||
504 | } | ||
505 | |||
506 | if (data_len) { | ||
507 | struct nfattr *nfa; | ||
508 | int size = NFA_LENGTH(data_len); | ||
509 | |||
510 | if (skb_tailroom(skb) < (int)NFA_SPACE(data_len)) { | ||
511 | printk(KERN_WARNING "nf_queue: no tailroom!\n"); | ||
512 | goto nlmsg_failure; | ||
513 | } | ||
514 | |||
515 | nfa = (struct nfattr *)skb_put(skb, NFA_ALIGN(size)); | ||
516 | nfa->nfa_type = NFQA_PAYLOAD; | ||
517 | nfa->nfa_len = size; | ||
518 | |||
519 | if (skb_copy_bits(entry->skb, 0, NFA_DATA(nfa), data_len)) | ||
520 | BUG(); | ||
521 | } | ||
522 | |||
523 | nlh->nlmsg_len = skb->tail - old_tail; | ||
524 | return skb; | ||
525 | |||
526 | nlmsg_failure: | ||
527 | nfattr_failure: | ||
528 | if (skb) | ||
529 | kfree_skb(skb); | ||
530 | *errp = -EINVAL; | ||
531 | if (net_ratelimit()) | ||
532 | printk(KERN_ERR "nf_queue: error creating packet message\n"); | ||
533 | return NULL; | ||
534 | } | ||
535 | |||
536 | static int | ||
537 | nfqnl_enqueue_packet(struct sk_buff *skb, struct nf_info *info, | ||
538 | unsigned int queuenum, void *data) | ||
539 | { | ||
540 | int status = -EINVAL; | ||
541 | struct sk_buff *nskb; | ||
542 | struct nfqnl_instance *queue; | ||
543 | struct nfqnl_queue_entry *entry; | ||
544 | |||
545 | QDEBUG("entered\n"); | ||
546 | |||
547 | queue = instance_lookup_get(queuenum); | ||
548 | if (!queue) { | ||
549 | QDEBUG("no queue instance matching\n"); | ||
550 | return -EINVAL; | ||
551 | } | ||
552 | |||
553 | if (queue->copy_mode == NFQNL_COPY_NONE) { | ||
554 | QDEBUG("mode COPY_NONE, aborting\n"); | ||
555 | status = -EAGAIN; | ||
556 | goto err_out_put; | ||
557 | } | ||
558 | |||
559 | entry = kmalloc(sizeof(*entry), GFP_ATOMIC); | ||
560 | if (entry == NULL) { | ||
561 | if (net_ratelimit()) | ||
562 | printk(KERN_ERR | ||
563 | "nf_queue: OOM in nfqnl_enqueue_packet()\n"); | ||
564 | status = -ENOMEM; | ||
565 | goto err_out_put; | ||
566 | } | ||
567 | |||
568 | entry->info = info; | ||
569 | entry->skb = skb; | ||
570 | entry->id = atomic_inc_return(&queue->id_sequence); | ||
571 | |||
572 | nskb = nfqnl_build_packet_message(queue, entry, &status); | ||
573 | if (nskb == NULL) | ||
574 | goto err_out_free; | ||
575 | |||
576 | spin_lock_bh(&queue->lock); | ||
577 | |||
578 | if (!queue->peer_pid) | ||
579 | goto err_out_free_nskb; | ||
580 | |||
581 | if (queue->queue_total >= queue->queue_maxlen) { | ||
582 | queue->queue_dropped++; | ||
583 | status = -ENOSPC; | ||
584 | if (net_ratelimit()) | ||
585 | printk(KERN_WARNING "ip_queue: full at %d entries, " | ||
586 | "dropping packets(s). Dropped: %d\n", | ||
587 | queue->queue_total, queue->queue_dropped); | ||
588 | goto err_out_free_nskb; | ||
589 | } | ||
590 | |||
591 | /* nfnetlink_unicast will either free the nskb or add it to a socket */ | ||
592 | status = nfnetlink_unicast(nskb, queue->peer_pid, MSG_DONTWAIT); | ||
593 | if (status < 0) { | ||
594 | queue->queue_user_dropped++; | ||
595 | goto err_out_unlock; | ||
596 | } | ||
597 | |||
598 | __enqueue_entry(queue, entry); | ||
599 | |||
600 | spin_unlock_bh(&queue->lock); | ||
601 | instance_put(queue); | ||
602 | return status; | ||
603 | |||
604 | err_out_free_nskb: | ||
605 | kfree_skb(nskb); | ||
606 | |||
607 | err_out_unlock: | ||
608 | spin_unlock_bh(&queue->lock); | ||
609 | |||
610 | err_out_free: | ||
611 | kfree(entry); | ||
612 | err_out_put: | ||
613 | instance_put(queue); | ||
614 | return status; | ||
615 | } | ||
616 | |||
617 | static int | ||
618 | nfqnl_mangle(void *data, int data_len, struct nfqnl_queue_entry *e) | ||
619 | { | ||
620 | int diff; | ||
621 | |||
622 | diff = data_len - e->skb->len; | ||
623 | if (diff < 0) | ||
624 | skb_trim(e->skb, data_len); | ||
625 | else if (diff > 0) { | ||
626 | if (data_len > 0xFFFF) | ||
627 | return -EINVAL; | ||
628 | if (diff > skb_tailroom(e->skb)) { | ||
629 | struct sk_buff *newskb; | ||
630 | |||
631 | newskb = skb_copy_expand(e->skb, | ||
632 | skb_headroom(e->skb), | ||
633 | diff, | ||
634 | GFP_ATOMIC); | ||
635 | if (newskb == NULL) { | ||
636 | printk(KERN_WARNING "ip_queue: OOM " | ||
637 | "in mangle, dropping packet\n"); | ||
638 | return -ENOMEM; | ||
639 | } | ||
640 | if (e->skb->sk) | ||
641 | skb_set_owner_w(newskb, e->skb->sk); | ||
642 | kfree_skb(e->skb); | ||
643 | e->skb = newskb; | ||
644 | } | ||
645 | skb_put(e->skb, diff); | ||
646 | } | ||
647 | if (!skb_make_writable(&e->skb, data_len)) | ||
648 | return -ENOMEM; | ||
649 | memcpy(e->skb->data, data, data_len); | ||
650 | |||
651 | return 0; | ||
652 | } | ||
653 | |||
654 | static inline int | ||
655 | id_cmp(struct nfqnl_queue_entry *e, unsigned long id) | ||
656 | { | ||
657 | return (id == e->id); | ||
658 | } | ||
659 | |||
660 | static int | ||
661 | nfqnl_set_mode(struct nfqnl_instance *queue, | ||
662 | unsigned char mode, unsigned int range) | ||
663 | { | ||
664 | int status; | ||
665 | |||
666 | spin_lock_bh(&queue->lock); | ||
667 | status = __nfqnl_set_mode(queue, mode, range); | ||
668 | spin_unlock_bh(&queue->lock); | ||
669 | |||
670 | return status; | ||
671 | } | ||
672 | |||
673 | static int | ||
674 | dev_cmp(struct nfqnl_queue_entry *entry, unsigned long ifindex) | ||
675 | { | ||
676 | if (entry->info->indev) | ||
677 | if (entry->info->indev->ifindex == ifindex) | ||
678 | return 1; | ||
679 | |||
680 | if (entry->info->outdev) | ||
681 | if (entry->info->outdev->ifindex == ifindex) | ||
682 | return 1; | ||
683 | |||
684 | return 0; | ||
685 | } | ||
686 | |||
687 | /* drop all packets with either indev or outdev == ifindex from all queue | ||
688 | * instances */ | ||
689 | static void | ||
690 | nfqnl_dev_drop(int ifindex) | ||
691 | { | ||
692 | int i; | ||
693 | |||
694 | QDEBUG("entering for ifindex %u\n", ifindex); | ||
695 | |||
696 | /* this only looks like we have to hold the readlock for a way too long | ||
697 | * time, issue_verdict(), nf_reinject(), ... - but we always only | ||
698 | * issue NF_DROP, which is processed directly in nf_reinject() */ | ||
699 | read_lock_bh(&instances_lock); | ||
700 | |||
701 | for (i = 0; i < INSTANCE_BUCKETS; i++) { | ||
702 | struct hlist_node *tmp; | ||
703 | struct nfqnl_instance *inst; | ||
704 | struct hlist_head *head = &instance_table[i]; | ||
705 | |||
706 | hlist_for_each_entry(inst, tmp, head, hlist) { | ||
707 | struct nfqnl_queue_entry *entry; | ||
708 | while ((entry = find_dequeue_entry(inst, dev_cmp, | ||
709 | ifindex)) != NULL) | ||
710 | issue_verdict(entry, NF_DROP); | ||
711 | } | ||
712 | } | ||
713 | |||
714 | read_unlock_bh(&instances_lock); | ||
715 | } | ||
716 | |||
717 | #define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0) | ||
718 | |||
719 | static int | ||
720 | nfqnl_rcv_dev_event(struct notifier_block *this, | ||
721 | unsigned long event, void *ptr) | ||
722 | { | ||
723 | struct net_device *dev = ptr; | ||
724 | |||
725 | /* Drop any packets associated with the downed device */ | ||
726 | if (event == NETDEV_DOWN) | ||
727 | nfqnl_dev_drop(dev->ifindex); | ||
728 | return NOTIFY_DONE; | ||
729 | } | ||
730 | |||
731 | static struct notifier_block nfqnl_dev_notifier = { | ||
732 | .notifier_call = nfqnl_rcv_dev_event, | ||
733 | }; | ||
734 | |||
735 | static int | ||
736 | nfqnl_rcv_nl_event(struct notifier_block *this, | ||
737 | unsigned long event, void *ptr) | ||
738 | { | ||
739 | struct netlink_notify *n = ptr; | ||
740 | |||
741 | if (event == NETLINK_URELEASE && | ||
742 | n->protocol == NETLINK_NETFILTER && n->pid) { | ||
743 | int i; | ||
744 | |||
745 | /* destroy all instances for this pid */ | ||
746 | write_lock_bh(&instances_lock); | ||
747 | for (i = 0; i < INSTANCE_BUCKETS; i++) { | ||
748 | struct hlist_node *tmp, *t2; | ||
749 | struct nfqnl_instance *inst; | ||
750 | struct hlist_head *head = &instance_table[i]; | ||
751 | |||
752 | hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) { | ||
753 | if (n->pid == inst->peer_pid) | ||
754 | __instance_destroy(inst); | ||
755 | } | ||
756 | } | ||
757 | write_unlock_bh(&instances_lock); | ||
758 | } | ||
759 | return NOTIFY_DONE; | ||
760 | } | ||
761 | |||
762 | static struct notifier_block nfqnl_rtnl_notifier = { | ||
763 | .notifier_call = nfqnl_rcv_nl_event, | ||
764 | }; | ||
765 | |||
766 | static const int nfqa_verdict_min[NFQA_MAX] = { | ||
767 | [NFQA_VERDICT_HDR-1] = sizeof(struct nfqnl_msg_verdict_hdr), | ||
768 | [NFQA_MARK-1] = sizeof(u_int32_t), | ||
769 | [NFQA_PAYLOAD-1] = 0, | ||
770 | }; | ||
771 | |||
772 | static int | ||
773 | nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, | ||
774 | struct nlmsghdr *nlh, struct nfattr *nfqa[], int *errp) | ||
775 | { | ||
776 | struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); | ||
777 | u_int16_t queue_num = ntohs(nfmsg->res_id); | ||
778 | |||
779 | struct nfqnl_msg_verdict_hdr *vhdr; | ||
780 | struct nfqnl_instance *queue; | ||
781 | unsigned int verdict; | ||
782 | struct nfqnl_queue_entry *entry; | ||
783 | int err; | ||
784 | |||
785 | if (nfattr_bad_size(nfqa, NFQA_MAX, nfqa_verdict_min)) { | ||
786 | QDEBUG("bad attribute size\n"); | ||
787 | return -EINVAL; | ||
788 | } | ||
789 | |||
790 | queue = instance_lookup_get(queue_num); | ||
791 | if (!queue) | ||
792 | return -ENODEV; | ||
793 | |||
794 | if (queue->peer_pid != NETLINK_CB(skb).pid) { | ||
795 | err = -EPERM; | ||
796 | goto err_out_put; | ||
797 | } | ||
798 | |||
799 | if (!nfqa[NFQA_VERDICT_HDR-1]) { | ||
800 | err = -EINVAL; | ||
801 | goto err_out_put; | ||
802 | } | ||
803 | |||
804 | vhdr = NFA_DATA(nfqa[NFQA_VERDICT_HDR-1]); | ||
805 | verdict = ntohl(vhdr->verdict); | ||
806 | |||
807 | if ((verdict & NF_VERDICT_MASK) > NF_MAX_VERDICT) { | ||
808 | err = -EINVAL; | ||
809 | goto err_out_put; | ||
810 | } | ||
811 | |||
812 | entry = find_dequeue_entry(queue, id_cmp, ntohl(vhdr->id)); | ||
813 | if (entry == NULL) { | ||
814 | err = -ENOENT; | ||
815 | goto err_out_put; | ||
816 | } | ||
817 | |||
818 | if (nfqa[NFQA_PAYLOAD-1]) { | ||
819 | if (nfqnl_mangle(NFA_DATA(nfqa[NFQA_PAYLOAD-1]), | ||
820 | NFA_PAYLOAD(nfqa[NFQA_PAYLOAD-1]), entry) < 0) | ||
821 | verdict = NF_DROP; | ||
822 | } | ||
823 | |||
824 | if (nfqa[NFQA_MARK-1]) | ||
825 | skb->nfmark = ntohl(*(u_int32_t *)NFA_DATA(nfqa[NFQA_MARK-1])); | ||
826 | |||
827 | issue_verdict(entry, verdict); | ||
828 | instance_put(queue); | ||
829 | return 0; | ||
830 | |||
831 | err_out_put: | ||
832 | instance_put(queue); | ||
833 | return err; | ||
834 | } | ||
835 | |||
836 | static int | ||
837 | nfqnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb, | ||
838 | struct nlmsghdr *nlh, struct nfattr *nfqa[], int *errp) | ||
839 | { | ||
840 | return -ENOTSUPP; | ||
841 | } | ||
842 | |||
843 | static const int nfqa_cfg_min[NFQA_CFG_MAX] = { | ||
844 | [NFQA_CFG_CMD-1] = sizeof(struct nfqnl_msg_config_cmd), | ||
845 | [NFQA_CFG_PARAMS-1] = sizeof(struct nfqnl_msg_config_params), | ||
846 | }; | ||
847 | |||
848 | static struct nf_queue_handler nfqh = { | ||
849 | .name = "nf_queue", | ||
850 | .outfn = &nfqnl_enqueue_packet, | ||
851 | }; | ||
852 | |||
853 | static int | ||
854 | nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, | ||
855 | struct nlmsghdr *nlh, struct nfattr *nfqa[], int *errp) | ||
856 | { | ||
857 | struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); | ||
858 | u_int16_t queue_num = ntohs(nfmsg->res_id); | ||
859 | struct nfqnl_instance *queue; | ||
860 | int ret = 0; | ||
861 | |||
862 | QDEBUG("entering for msg %u\n", NFNL_MSG_TYPE(nlh->nlmsg_type)); | ||
863 | |||
864 | if (nfattr_bad_size(nfqa, NFQA_CFG_MAX, nfqa_cfg_min)) { | ||
865 | QDEBUG("bad attribute size\n"); | ||
866 | return -EINVAL; | ||
867 | } | ||
868 | |||
869 | queue = instance_lookup_get(queue_num); | ||
870 | if (nfqa[NFQA_CFG_CMD-1]) { | ||
871 | struct nfqnl_msg_config_cmd *cmd; | ||
872 | cmd = NFA_DATA(nfqa[NFQA_CFG_CMD-1]); | ||
873 | QDEBUG("found CFG_CMD\n"); | ||
874 | |||
875 | switch (cmd->command) { | ||
876 | case NFQNL_CFG_CMD_BIND: | ||
877 | if (queue) | ||
878 | return -EBUSY; | ||
879 | |||
880 | queue = instance_create(queue_num, NETLINK_CB(skb).pid); | ||
881 | if (!queue) | ||
882 | return -EINVAL; | ||
883 | break; | ||
884 | case NFQNL_CFG_CMD_UNBIND: | ||
885 | if (!queue) | ||
886 | return -ENODEV; | ||
887 | |||
888 | if (queue->peer_pid != NETLINK_CB(skb).pid) { | ||
889 | ret = -EPERM; | ||
890 | goto out_put; | ||
891 | } | ||
892 | |||
893 | instance_destroy(queue); | ||
894 | break; | ||
895 | case NFQNL_CFG_CMD_PF_BIND: | ||
896 | QDEBUG("registering queue handler for pf=%u\n", | ||
897 | ntohs(cmd->pf)); | ||
898 | ret = nf_register_queue_handler(ntohs(cmd->pf), &nfqh); | ||
899 | break; | ||
900 | case NFQNL_CFG_CMD_PF_UNBIND: | ||
901 | QDEBUG("unregistering queue handler for pf=%u\n", | ||
902 | ntohs(cmd->pf)); | ||
903 | /* This is a bug and a feature. We can unregister | ||
904 | * other handlers(!) */ | ||
905 | ret = nf_unregister_queue_handler(ntohs(cmd->pf)); | ||
906 | break; | ||
907 | default: | ||
908 | ret = -EINVAL; | ||
909 | break; | ||
910 | } | ||
911 | } else { | ||
912 | if (!queue) { | ||
913 | QDEBUG("no config command, and no instance ENOENT\n"); | ||
914 | ret = -ENOENT; | ||
915 | goto out_put; | ||
916 | } | ||
917 | |||
918 | if (queue->peer_pid != NETLINK_CB(skb).pid) { | ||
919 | QDEBUG("no config command, and wrong pid\n"); | ||
920 | ret = -EPERM; | ||
921 | goto out_put; | ||
922 | } | ||
923 | } | ||
924 | |||
925 | if (nfqa[NFQA_CFG_PARAMS-1]) { | ||
926 | struct nfqnl_msg_config_params *params; | ||
927 | params = NFA_DATA(nfqa[NFQA_CFG_PARAMS-1]); | ||
928 | |||
929 | nfqnl_set_mode(queue, params->copy_mode, | ||
930 | ntohl(params->copy_range)); | ||
931 | } | ||
932 | |||
933 | out_put: | ||
934 | instance_put(queue); | ||
935 | return ret; | ||
936 | } | ||
937 | |||
938 | static struct nfnl_callback nfqnl_cb[NFQNL_MSG_MAX] = { | ||
939 | [NFQNL_MSG_PACKET] = { .call = nfqnl_recv_unsupp, | ||
940 | .attr_count = NFQA_MAX, | ||
941 | .cap_required = CAP_NET_ADMIN }, | ||
942 | [NFQNL_MSG_VERDICT] = { .call = nfqnl_recv_verdict, | ||
943 | .attr_count = NFQA_MAX, | ||
944 | .cap_required = CAP_NET_ADMIN }, | ||
945 | [NFQNL_MSG_CONFIG] = { .call = nfqnl_recv_config, | ||
946 | .attr_count = NFQA_CFG_MAX, | ||
947 | .cap_required = CAP_NET_ADMIN }, | ||
948 | }; | ||
949 | |||
950 | static struct nfnetlink_subsystem nfqnl_subsys = { | ||
951 | .name = "nf_queue", | ||
952 | .subsys_id = NFNL_SUBSYS_QUEUE, | ||
953 | .cb_count = NFQNL_MSG_MAX, | ||
954 | .cb = nfqnl_cb, | ||
955 | }; | ||
956 | |||
957 | #ifdef CONFIG_PROC_FS | ||
958 | struct iter_state { | ||
959 | unsigned int bucket; | ||
960 | }; | ||
961 | |||
962 | static struct hlist_node *get_first(struct seq_file *seq) | ||
963 | { | ||
964 | struct iter_state *st = seq->private; | ||
965 | |||
966 | if (!st) | ||
967 | return NULL; | ||
968 | |||
969 | for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) { | ||
970 | if (!hlist_empty(&instance_table[st->bucket])) | ||
971 | return instance_table[st->bucket].first; | ||
972 | } | ||
973 | return NULL; | ||
974 | } | ||
975 | |||
976 | static struct hlist_node *get_next(struct seq_file *seq, struct hlist_node *h) | ||
977 | { | ||
978 | struct iter_state *st = seq->private; | ||
979 | |||
980 | h = h->next; | ||
981 | while (!h) { | ||
982 | if (++st->bucket >= INSTANCE_BUCKETS) | ||
983 | return NULL; | ||
984 | |||
985 | h = instance_table[st->bucket].first; | ||
986 | } | ||
987 | return h; | ||
988 | } | ||
989 | |||
990 | static struct hlist_node *get_idx(struct seq_file *seq, loff_t pos) | ||
991 | { | ||
992 | struct hlist_node *head; | ||
993 | head = get_first(seq); | ||
994 | |||
995 | if (head) | ||
996 | while (pos && (head = get_next(seq, head))) | ||
997 | pos--; | ||
998 | return pos ? NULL : head; | ||
999 | } | ||
1000 | |||
1001 | static void *seq_start(struct seq_file *seq, loff_t *pos) | ||
1002 | { | ||
1003 | read_lock_bh(&instances_lock); | ||
1004 | return get_idx(seq, *pos); | ||
1005 | } | ||
1006 | |||
1007 | static void *seq_next(struct seq_file *s, void *v, loff_t *pos) | ||
1008 | { | ||
1009 | (*pos)++; | ||
1010 | return get_next(s, v); | ||
1011 | } | ||
1012 | |||
1013 | static void seq_stop(struct seq_file *s, void *v) | ||
1014 | { | ||
1015 | read_unlock_bh(&instances_lock); | ||
1016 | } | ||
1017 | |||
1018 | static int seq_show(struct seq_file *s, void *v) | ||
1019 | { | ||
1020 | const struct nfqnl_instance *inst = v; | ||
1021 | |||
1022 | return seq_printf(s, "%5d %6d %5d %1d %5d %5d %5d %8d %2d\n", | ||
1023 | inst->queue_num, | ||
1024 | inst->peer_pid, inst->queue_total, | ||
1025 | inst->copy_mode, inst->copy_range, | ||
1026 | inst->queue_dropped, inst->queue_user_dropped, | ||
1027 | atomic_read(&inst->id_sequence), | ||
1028 | atomic_read(&inst->use)); | ||
1029 | } | ||
1030 | |||
1031 | static struct seq_operations nfqnl_seq_ops = { | ||
1032 | .start = seq_start, | ||
1033 | .next = seq_next, | ||
1034 | .stop = seq_stop, | ||
1035 | .show = seq_show, | ||
1036 | }; | ||
1037 | |||
1038 | static int nfqnl_open(struct inode *inode, struct file *file) | ||
1039 | { | ||
1040 | struct seq_file *seq; | ||
1041 | struct iter_state *is; | ||
1042 | int ret; | ||
1043 | |||
1044 | is = kmalloc(sizeof(*is), GFP_KERNEL); | ||
1045 | if (!is) | ||
1046 | return -ENOMEM; | ||
1047 | memset(is, 0, sizeof(*is)); | ||
1048 | ret = seq_open(file, &nfqnl_seq_ops); | ||
1049 | if (ret < 0) | ||
1050 | goto out_free; | ||
1051 | seq = file->private_data; | ||
1052 | seq->private = is; | ||
1053 | return ret; | ||
1054 | out_free: | ||
1055 | kfree(is); | ||
1056 | return ret; | ||
1057 | } | ||
1058 | |||
1059 | static struct file_operations nfqnl_file_ops = { | ||
1060 | .owner = THIS_MODULE, | ||
1061 | .open = nfqnl_open, | ||
1062 | .read = seq_read, | ||
1063 | .llseek = seq_lseek, | ||
1064 | .release = seq_release_private, | ||
1065 | }; | ||
1066 | |||
1067 | #endif /* PROC_FS */ | ||
1068 | |||
1069 | static int | ||
1070 | init_or_cleanup(int init) | ||
1071 | { | ||
1072 | int i, status = -ENOMEM; | ||
1073 | #ifdef CONFIG_PROC_FS | ||
1074 | struct proc_dir_entry *proc_nfqueue; | ||
1075 | #endif | ||
1076 | |||
1077 | if (!init) | ||
1078 | goto cleanup; | ||
1079 | |||
1080 | for (i = 0; i < INSTANCE_BUCKETS; i++) | ||
1081 | INIT_HLIST_HEAD(&instance_table[i]); | ||
1082 | |||
1083 | netlink_register_notifier(&nfqnl_rtnl_notifier); | ||
1084 | status = nfnetlink_subsys_register(&nfqnl_subsys); | ||
1085 | if (status < 0) { | ||
1086 | printk(KERN_ERR "nf_queue: failed to create netlink socket\n"); | ||
1087 | goto cleanup_netlink_notifier; | ||
1088 | } | ||
1089 | |||
1090 | #ifdef CONFIG_PROC_FS | ||
1091 | proc_nfqueue = create_proc_entry("nfnetlink_queue", 0440, | ||
1092 | proc_net_netfilter); | ||
1093 | if (!proc_nfqueue) | ||
1094 | goto cleanup_subsys; | ||
1095 | proc_nfqueue->proc_fops = &nfqnl_file_ops; | ||
1096 | #endif | ||
1097 | |||
1098 | register_netdevice_notifier(&nfqnl_dev_notifier); | ||
1099 | |||
1100 | return status; | ||
1101 | |||
1102 | cleanup: | ||
1103 | nf_unregister_queue_handlers(&nfqh); | ||
1104 | unregister_netdevice_notifier(&nfqnl_dev_notifier); | ||
1105 | #ifdef CONFIG_PROC_FS | ||
1106 | remove_proc_entry("nfnetlink_queue", proc_net_netfilter); | ||
1107 | cleanup_subsys: | ||
1108 | #endif | ||
1109 | nfnetlink_subsys_unregister(&nfqnl_subsys); | ||
1110 | cleanup_netlink_notifier: | ||
1111 | netlink_unregister_notifier(&nfqnl_rtnl_notifier); | ||
1112 | return status; | ||
1113 | } | ||
1114 | |||
1115 | static int __init init(void) | ||
1116 | { | ||
1117 | |||
1118 | return init_or_cleanup(1); | ||
1119 | } | ||
1120 | |||
1121 | static void __exit fini(void) | ||
1122 | { | ||
1123 | init_or_cleanup(0); | ||
1124 | } | ||
1125 | |||
1126 | MODULE_DESCRIPTION("netfilter packet queue handler"); | ||
1127 | MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); | ||
1128 | MODULE_LICENSE("GPL"); | ||
1129 | MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_QUEUE); | ||
1130 | |||
1131 | module_init(init); | ||
1132 | module_exit(fini); | ||
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index ff774a06c89d..62435ffc6184 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c | |||
@@ -13,7 +13,12 @@ | |||
13 | * added netlink_proto_exit | 13 | * added netlink_proto_exit |
14 | * Tue Jan 22 18:32:44 BRST 2002 Arnaldo C. de Melo <acme@conectiva.com.br> | 14 | * Tue Jan 22 18:32:44 BRST 2002 Arnaldo C. de Melo <acme@conectiva.com.br> |
15 | * use nlk_sk, as sk->protinfo is on a diet 8) | 15 | * use nlk_sk, as sk->protinfo is on a diet 8) |
16 | * | 16 | * Fri Jul 22 19:51:12 MEST 2005 Harald Welte <laforge@gnumonks.org> |
17 | * - inc module use count of module that owns | ||
18 | * the kernel socket in case userspace opens | ||
19 | * socket of same protocol | ||
20 | * - remove all module support, since netlink is | ||
21 | * mandatory if CONFIG_NET=y these days | ||
17 | */ | 22 | */ |
18 | 23 | ||
19 | #include <linux/config.h> | 24 | #include <linux/config.h> |
@@ -55,21 +60,29 @@ | |||
55 | #include <net/scm.h> | 60 | #include <net/scm.h> |
56 | 61 | ||
57 | #define Nprintk(a...) | 62 | #define Nprintk(a...) |
63 | #define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8) | ||
58 | 64 | ||
59 | struct netlink_sock { | 65 | struct netlink_sock { |
60 | /* struct sock has to be the first member of netlink_sock */ | 66 | /* struct sock has to be the first member of netlink_sock */ |
61 | struct sock sk; | 67 | struct sock sk; |
62 | u32 pid; | 68 | u32 pid; |
63 | unsigned int groups; | ||
64 | u32 dst_pid; | 69 | u32 dst_pid; |
65 | unsigned int dst_groups; | 70 | u32 dst_group; |
71 | u32 flags; | ||
72 | u32 subscriptions; | ||
73 | u32 ngroups; | ||
74 | unsigned long *groups; | ||
66 | unsigned long state; | 75 | unsigned long state; |
67 | wait_queue_head_t wait; | 76 | wait_queue_head_t wait; |
68 | struct netlink_callback *cb; | 77 | struct netlink_callback *cb; |
69 | spinlock_t cb_lock; | 78 | spinlock_t cb_lock; |
70 | void (*data_ready)(struct sock *sk, int bytes); | 79 | void (*data_ready)(struct sock *sk, int bytes); |
80 | struct module *module; | ||
71 | }; | 81 | }; |
72 | 82 | ||
83 | #define NETLINK_KERNEL_SOCKET 0x1 | ||
84 | #define NETLINK_RECV_PKTINFO 0x2 | ||
85 | |||
73 | static inline struct netlink_sock *nlk_sk(struct sock *sk) | 86 | static inline struct netlink_sock *nlk_sk(struct sock *sk) |
74 | { | 87 | { |
75 | return (struct netlink_sock *)sk; | 88 | return (struct netlink_sock *)sk; |
@@ -92,6 +105,9 @@ struct netlink_table { | |||
92 | struct nl_pid_hash hash; | 105 | struct nl_pid_hash hash; |
93 | struct hlist_head mc_list; | 106 | struct hlist_head mc_list; |
94 | unsigned int nl_nonroot; | 107 | unsigned int nl_nonroot; |
108 | unsigned int groups; | ||
109 | struct module *module; | ||
110 | int registered; | ||
95 | }; | 111 | }; |
96 | 112 | ||
97 | static struct netlink_table *nl_table; | 113 | static struct netlink_table *nl_table; |
@@ -106,6 +122,11 @@ static atomic_t nl_table_users = ATOMIC_INIT(0); | |||
106 | 122 | ||
107 | static struct notifier_block *netlink_chain; | 123 | static struct notifier_block *netlink_chain; |
108 | 124 | ||
125 | static u32 netlink_group_mask(u32 group) | ||
126 | { | ||
127 | return group ? 1 << (group - 1) : 0; | ||
128 | } | ||
129 | |||
109 | static struct hlist_head *nl_pid_hashfn(struct nl_pid_hash *hash, u32 pid) | 130 | static struct hlist_head *nl_pid_hashfn(struct nl_pid_hash *hash, u32 pid) |
110 | { | 131 | { |
111 | return &hash->table[jhash_1word(pid, hash->rnd) & hash->mask]; | 132 | return &hash->table[jhash_1word(pid, hash->rnd) & hash->mask]; |
@@ -122,6 +143,7 @@ static void netlink_sock_destruct(struct sock *sk) | |||
122 | BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc)); | 143 | BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc)); |
123 | BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc)); | 144 | BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc)); |
124 | BUG_TRAP(!nlk_sk(sk)->cb); | 145 | BUG_TRAP(!nlk_sk(sk)->cb); |
146 | BUG_TRAP(!nlk_sk(sk)->groups); | ||
125 | } | 147 | } |
126 | 148 | ||
127 | /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on SMP. | 149 | /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on SMP. |
@@ -317,7 +339,7 @@ static void netlink_remove(struct sock *sk) | |||
317 | netlink_table_grab(); | 339 | netlink_table_grab(); |
318 | if (sk_del_node_init(sk)) | 340 | if (sk_del_node_init(sk)) |
319 | nl_table[sk->sk_protocol].hash.entries--; | 341 | nl_table[sk->sk_protocol].hash.entries--; |
320 | if (nlk_sk(sk)->groups) | 342 | if (nlk_sk(sk)->subscriptions) |
321 | __sk_del_bind_node(sk); | 343 | __sk_del_bind_node(sk); |
322 | netlink_table_ungrab(); | 344 | netlink_table_ungrab(); |
323 | } | 345 | } |
@@ -328,19 +350,11 @@ static struct proto netlink_proto = { | |||
328 | .obj_size = sizeof(struct netlink_sock), | 350 | .obj_size = sizeof(struct netlink_sock), |
329 | }; | 351 | }; |
330 | 352 | ||
331 | static int netlink_create(struct socket *sock, int protocol) | 353 | static int __netlink_create(struct socket *sock, int protocol) |
332 | { | 354 | { |
333 | struct sock *sk; | 355 | struct sock *sk; |
334 | struct netlink_sock *nlk; | 356 | struct netlink_sock *nlk; |
335 | 357 | ||
336 | sock->state = SS_UNCONNECTED; | ||
337 | |||
338 | if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM) | ||
339 | return -ESOCKTNOSUPPORT; | ||
340 | |||
341 | if (protocol<0 || protocol >= MAX_LINKS) | ||
342 | return -EPROTONOSUPPORT; | ||
343 | |||
344 | sock->ops = &netlink_ops; | 358 | sock->ops = &netlink_ops; |
345 | 359 | ||
346 | sk = sk_alloc(PF_NETLINK, GFP_KERNEL, &netlink_proto, 1); | 360 | sk = sk_alloc(PF_NETLINK, GFP_KERNEL, &netlink_proto, 1); |
@@ -350,15 +364,67 @@ static int netlink_create(struct socket *sock, int protocol) | |||
350 | sock_init_data(sock, sk); | 364 | sock_init_data(sock, sk); |
351 | 365 | ||
352 | nlk = nlk_sk(sk); | 366 | nlk = nlk_sk(sk); |
353 | |||
354 | spin_lock_init(&nlk->cb_lock); | 367 | spin_lock_init(&nlk->cb_lock); |
355 | init_waitqueue_head(&nlk->wait); | 368 | init_waitqueue_head(&nlk->wait); |
356 | sk->sk_destruct = netlink_sock_destruct; | ||
357 | 369 | ||
370 | sk->sk_destruct = netlink_sock_destruct; | ||
358 | sk->sk_protocol = protocol; | 371 | sk->sk_protocol = protocol; |
359 | return 0; | 372 | return 0; |
360 | } | 373 | } |
361 | 374 | ||
375 | static int netlink_create(struct socket *sock, int protocol) | ||
376 | { | ||
377 | struct module *module = NULL; | ||
378 | struct netlink_sock *nlk; | ||
379 | unsigned int groups; | ||
380 | int err = 0; | ||
381 | |||
382 | sock->state = SS_UNCONNECTED; | ||
383 | |||
384 | if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM) | ||
385 | return -ESOCKTNOSUPPORT; | ||
386 | |||
387 | if (protocol<0 || protocol >= MAX_LINKS) | ||
388 | return -EPROTONOSUPPORT; | ||
389 | |||
390 | netlink_lock_table(); | ||
391 | #ifdef CONFIG_KMOD | ||
392 | if (!nl_table[protocol].registered) { | ||
393 | netlink_unlock_table(); | ||
394 | request_module("net-pf-%d-proto-%d", PF_NETLINK, protocol); | ||
395 | netlink_lock_table(); | ||
396 | } | ||
397 | #endif | ||
398 | if (nl_table[protocol].registered && | ||
399 | try_module_get(nl_table[protocol].module)) | ||
400 | module = nl_table[protocol].module; | ||
401 | else | ||
402 | err = -EPROTONOSUPPORT; | ||
403 | groups = nl_table[protocol].groups; | ||
404 | netlink_unlock_table(); | ||
405 | |||
406 | if (err || (err = __netlink_create(sock, protocol) < 0)) | ||
407 | goto out_module; | ||
408 | |||
409 | nlk = nlk_sk(sock->sk); | ||
410 | |||
411 | nlk->groups = kmalloc(NLGRPSZ(groups), GFP_KERNEL); | ||
412 | if (nlk->groups == NULL) { | ||
413 | err = -ENOMEM; | ||
414 | goto out_module; | ||
415 | } | ||
416 | memset(nlk->groups, 0, NLGRPSZ(groups)); | ||
417 | nlk->ngroups = groups; | ||
418 | |||
419 | nlk->module = module; | ||
420 | out: | ||
421 | return err; | ||
422 | |||
423 | out_module: | ||
424 | module_put(module); | ||
425 | goto out; | ||
426 | } | ||
427 | |||
362 | static int netlink_release(struct socket *sock) | 428 | static int netlink_release(struct socket *sock) |
363 | { | 429 | { |
364 | struct sock *sk = sock->sk; | 430 | struct sock *sk = sock->sk; |
@@ -387,14 +453,27 @@ static int netlink_release(struct socket *sock) | |||
387 | 453 | ||
388 | skb_queue_purge(&sk->sk_write_queue); | 454 | skb_queue_purge(&sk->sk_write_queue); |
389 | 455 | ||
390 | if (nlk->pid && !nlk->groups) { | 456 | if (nlk->pid && !nlk->subscriptions) { |
391 | struct netlink_notify n = { | 457 | struct netlink_notify n = { |
392 | .protocol = sk->sk_protocol, | 458 | .protocol = sk->sk_protocol, |
393 | .pid = nlk->pid, | 459 | .pid = nlk->pid, |
394 | }; | 460 | }; |
395 | notifier_call_chain(&netlink_chain, NETLINK_URELEASE, &n); | 461 | notifier_call_chain(&netlink_chain, NETLINK_URELEASE, &n); |
396 | } | 462 | } |
397 | 463 | ||
464 | if (nlk->module) | ||
465 | module_put(nlk->module); | ||
466 | |||
467 | if (nlk->flags & NETLINK_KERNEL_SOCKET) { | ||
468 | netlink_table_grab(); | ||
469 | nl_table[sk->sk_protocol].module = NULL; | ||
470 | nl_table[sk->sk_protocol].registered = 0; | ||
471 | netlink_table_ungrab(); | ||
472 | } | ||
473 | |||
474 | kfree(nlk->groups); | ||
475 | nlk->groups = NULL; | ||
476 | |||
398 | sock_put(sk); | 477 | sock_put(sk); |
399 | return 0; | 478 | return 0; |
400 | } | 479 | } |
@@ -443,6 +522,18 @@ static inline int netlink_capable(struct socket *sock, unsigned int flag) | |||
443 | capable(CAP_NET_ADMIN); | 522 | capable(CAP_NET_ADMIN); |
444 | } | 523 | } |
445 | 524 | ||
525 | static void | ||
526 | netlink_update_subscriptions(struct sock *sk, unsigned int subscriptions) | ||
527 | { | ||
528 | struct netlink_sock *nlk = nlk_sk(sk); | ||
529 | |||
530 | if (nlk->subscriptions && !subscriptions) | ||
531 | __sk_del_bind_node(sk); | ||
532 | else if (!nlk->subscriptions && subscriptions) | ||
533 | sk_add_bind_node(sk, &nl_table[sk->sk_protocol].mc_list); | ||
534 | nlk->subscriptions = subscriptions; | ||
535 | } | ||
536 | |||
446 | static int netlink_bind(struct socket *sock, struct sockaddr *addr, int addr_len) | 537 | static int netlink_bind(struct socket *sock, struct sockaddr *addr, int addr_len) |
447 | { | 538 | { |
448 | struct sock *sk = sock->sk; | 539 | struct sock *sk = sock->sk; |
@@ -468,15 +559,14 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, int addr_len | |||
468 | return err; | 559 | return err; |
469 | } | 560 | } |
470 | 561 | ||
471 | if (!nladdr->nl_groups && !nlk->groups) | 562 | if (!nladdr->nl_groups && !(u32)nlk->groups[0]) |
472 | return 0; | 563 | return 0; |
473 | 564 | ||
474 | netlink_table_grab(); | 565 | netlink_table_grab(); |
475 | if (nlk->groups && !nladdr->nl_groups) | 566 | netlink_update_subscriptions(sk, nlk->subscriptions + |
476 | __sk_del_bind_node(sk); | 567 | hweight32(nladdr->nl_groups) - |
477 | else if (!nlk->groups && nladdr->nl_groups) | 568 | hweight32(nlk->groups[0])); |
478 | sk_add_bind_node(sk, &nl_table[sk->sk_protocol].mc_list); | 569 | nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | nladdr->nl_groups; |
479 | nlk->groups = nladdr->nl_groups; | ||
480 | netlink_table_ungrab(); | 570 | netlink_table_ungrab(); |
481 | 571 | ||
482 | return 0; | 572 | return 0; |
@@ -493,7 +583,7 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr, | |||
493 | if (addr->sa_family == AF_UNSPEC) { | 583 | if (addr->sa_family == AF_UNSPEC) { |
494 | sk->sk_state = NETLINK_UNCONNECTED; | 584 | sk->sk_state = NETLINK_UNCONNECTED; |
495 | nlk->dst_pid = 0; | 585 | nlk->dst_pid = 0; |
496 | nlk->dst_groups = 0; | 586 | nlk->dst_group = 0; |
497 | return 0; | 587 | return 0; |
498 | } | 588 | } |
499 | if (addr->sa_family != AF_NETLINK) | 589 | if (addr->sa_family != AF_NETLINK) |
@@ -509,7 +599,7 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr, | |||
509 | if (err == 0) { | 599 | if (err == 0) { |
510 | sk->sk_state = NETLINK_CONNECTED; | 600 | sk->sk_state = NETLINK_CONNECTED; |
511 | nlk->dst_pid = nladdr->nl_pid; | 601 | nlk->dst_pid = nladdr->nl_pid; |
512 | nlk->dst_groups = nladdr->nl_groups; | 602 | nlk->dst_group = ffs(nladdr->nl_groups); |
513 | } | 603 | } |
514 | 604 | ||
515 | return err; | 605 | return err; |
@@ -527,10 +617,10 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr, int *addr | |||
527 | 617 | ||
528 | if (peer) { | 618 | if (peer) { |
529 | nladdr->nl_pid = nlk->dst_pid; | 619 | nladdr->nl_pid = nlk->dst_pid; |
530 | nladdr->nl_groups = nlk->dst_groups; | 620 | nladdr->nl_groups = netlink_group_mask(nlk->dst_group); |
531 | } else { | 621 | } else { |
532 | nladdr->nl_pid = nlk->pid; | 622 | nladdr->nl_pid = nlk->pid; |
533 | nladdr->nl_groups = nlk->groups; | 623 | nladdr->nl_groups = nlk->groups[0]; |
534 | } | 624 | } |
535 | return 0; | 625 | return 0; |
536 | } | 626 | } |
@@ -731,7 +821,8 @@ static inline int do_one_broadcast(struct sock *sk, | |||
731 | if (p->exclude_sk == sk) | 821 | if (p->exclude_sk == sk) |
732 | goto out; | 822 | goto out; |
733 | 823 | ||
734 | if (nlk->pid == p->pid || !(nlk->groups & p->group)) | 824 | if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups || |
825 | !test_bit(p->group - 1, nlk->groups)) | ||
735 | goto out; | 826 | goto out; |
736 | 827 | ||
737 | if (p->failure) { | 828 | if (p->failure) { |
@@ -770,7 +861,7 @@ out: | |||
770 | } | 861 | } |
771 | 862 | ||
772 | int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid, | 863 | int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid, |
773 | u32 group, int allocation) | 864 | u32 group, unsigned int __nocast allocation) |
774 | { | 865 | { |
775 | struct netlink_broadcast_data info; | 866 | struct netlink_broadcast_data info; |
776 | struct hlist_node *node; | 867 | struct hlist_node *node; |
@@ -827,7 +918,8 @@ static inline int do_one_set_err(struct sock *sk, | |||
827 | if (sk == p->exclude_sk) | 918 | if (sk == p->exclude_sk) |
828 | goto out; | 919 | goto out; |
829 | 920 | ||
830 | if (nlk->pid == p->pid || !(nlk->groups & p->group)) | 921 | if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups || |
922 | !test_bit(p->group - 1, nlk->groups)) | ||
831 | goto out; | 923 | goto out; |
832 | 924 | ||
833 | sk->sk_err = p->code; | 925 | sk->sk_err = p->code; |
@@ -855,6 +947,94 @@ void netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code) | |||
855 | read_unlock(&nl_table_lock); | 947 | read_unlock(&nl_table_lock); |
856 | } | 948 | } |
857 | 949 | ||
950 | static int netlink_setsockopt(struct socket *sock, int level, int optname, | ||
951 | char __user *optval, int optlen) | ||
952 | { | ||
953 | struct sock *sk = sock->sk; | ||
954 | struct netlink_sock *nlk = nlk_sk(sk); | ||
955 | int val = 0, err; | ||
956 | |||
957 | if (level != SOL_NETLINK) | ||
958 | return -ENOPROTOOPT; | ||
959 | |||
960 | if (optlen >= sizeof(int) && | ||
961 | get_user(val, (int __user *)optval)) | ||
962 | return -EFAULT; | ||
963 | |||
964 | switch (optname) { | ||
965 | case NETLINK_PKTINFO: | ||
966 | if (val) | ||
967 | nlk->flags |= NETLINK_RECV_PKTINFO; | ||
968 | else | ||
969 | nlk->flags &= ~NETLINK_RECV_PKTINFO; | ||
970 | err = 0; | ||
971 | break; | ||
972 | case NETLINK_ADD_MEMBERSHIP: | ||
973 | case NETLINK_DROP_MEMBERSHIP: { | ||
974 | unsigned int subscriptions; | ||
975 | int old, new = optname == NETLINK_ADD_MEMBERSHIP ? 1 : 0; | ||
976 | |||
977 | if (!netlink_capable(sock, NL_NONROOT_RECV)) | ||
978 | return -EPERM; | ||
979 | if (!val || val - 1 >= nlk->ngroups) | ||
980 | return -EINVAL; | ||
981 | netlink_table_grab(); | ||
982 | old = test_bit(val - 1, nlk->groups); | ||
983 | subscriptions = nlk->subscriptions - old + new; | ||
984 | if (new) | ||
985 | __set_bit(val - 1, nlk->groups); | ||
986 | else | ||
987 | __clear_bit(val - 1, nlk->groups); | ||
988 | netlink_update_subscriptions(sk, subscriptions); | ||
989 | netlink_table_ungrab(); | ||
990 | err = 0; | ||
991 | break; | ||
992 | } | ||
993 | default: | ||
994 | err = -ENOPROTOOPT; | ||
995 | } | ||
996 | return err; | ||
997 | } | ||
998 | |||
999 | static int netlink_getsockopt(struct socket *sock, int level, int optname, | ||
1000 | char __user *optval, int __user *optlen) | ||
1001 | { | ||
1002 | struct sock *sk = sock->sk; | ||
1003 | struct netlink_sock *nlk = nlk_sk(sk); | ||
1004 | int len, val, err; | ||
1005 | |||
1006 | if (level != SOL_NETLINK) | ||
1007 | return -ENOPROTOOPT; | ||
1008 | |||
1009 | if (get_user(len, optlen)) | ||
1010 | return -EFAULT; | ||
1011 | if (len < 0) | ||
1012 | return -EINVAL; | ||
1013 | |||
1014 | switch (optname) { | ||
1015 | case NETLINK_PKTINFO: | ||
1016 | if (len < sizeof(int)) | ||
1017 | return -EINVAL; | ||
1018 | len = sizeof(int); | ||
1019 | val = nlk->flags & NETLINK_RECV_PKTINFO ? 1 : 0; | ||
1020 | put_user(len, optlen); | ||
1021 | put_user(val, optval); | ||
1022 | err = 0; | ||
1023 | break; | ||
1024 | default: | ||
1025 | err = -ENOPROTOOPT; | ||
1026 | } | ||
1027 | return err; | ||
1028 | } | ||
1029 | |||
1030 | static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) | ||
1031 | { | ||
1032 | struct nl_pktinfo info; | ||
1033 | |||
1034 | info.group = NETLINK_CB(skb).dst_group; | ||
1035 | put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info); | ||
1036 | } | ||
1037 | |||
858 | static inline void netlink_rcv_wake(struct sock *sk) | 1038 | static inline void netlink_rcv_wake(struct sock *sk) |
859 | { | 1039 | { |
860 | struct netlink_sock *nlk = nlk_sk(sk); | 1040 | struct netlink_sock *nlk = nlk_sk(sk); |
@@ -873,7 +1053,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, | |||
873 | struct netlink_sock *nlk = nlk_sk(sk); | 1053 | struct netlink_sock *nlk = nlk_sk(sk); |
874 | struct sockaddr_nl *addr=msg->msg_name; | 1054 | struct sockaddr_nl *addr=msg->msg_name; |
875 | u32 dst_pid; | 1055 | u32 dst_pid; |
876 | u32 dst_groups; | 1056 | u32 dst_group; |
877 | struct sk_buff *skb; | 1057 | struct sk_buff *skb; |
878 | int err; | 1058 | int err; |
879 | struct scm_cookie scm; | 1059 | struct scm_cookie scm; |
@@ -891,12 +1071,12 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, | |||
891 | if (addr->nl_family != AF_NETLINK) | 1071 | if (addr->nl_family != AF_NETLINK) |
892 | return -EINVAL; | 1072 | return -EINVAL; |
893 | dst_pid = addr->nl_pid; | 1073 | dst_pid = addr->nl_pid; |
894 | dst_groups = addr->nl_groups; | 1074 | dst_group = ffs(addr->nl_groups); |
895 | if (dst_groups && !netlink_capable(sock, NL_NONROOT_SEND)) | 1075 | if (dst_group && !netlink_capable(sock, NL_NONROOT_SEND)) |
896 | return -EPERM; | 1076 | return -EPERM; |
897 | } else { | 1077 | } else { |
898 | dst_pid = nlk->dst_pid; | 1078 | dst_pid = nlk->dst_pid; |
899 | dst_groups = nlk->dst_groups; | 1079 | dst_group = nlk->dst_group; |
900 | } | 1080 | } |
901 | 1081 | ||
902 | if (!nlk->pid) { | 1082 | if (!nlk->pid) { |
@@ -914,9 +1094,8 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, | |||
914 | goto out; | 1094 | goto out; |
915 | 1095 | ||
916 | NETLINK_CB(skb).pid = nlk->pid; | 1096 | NETLINK_CB(skb).pid = nlk->pid; |
917 | NETLINK_CB(skb).groups = nlk->groups; | ||
918 | NETLINK_CB(skb).dst_pid = dst_pid; | 1097 | NETLINK_CB(skb).dst_pid = dst_pid; |
919 | NETLINK_CB(skb).dst_groups = dst_groups; | 1098 | NETLINK_CB(skb).dst_group = dst_group; |
920 | NETLINK_CB(skb).loginuid = audit_get_loginuid(current->audit_context); | 1099 | NETLINK_CB(skb).loginuid = audit_get_loginuid(current->audit_context); |
921 | memcpy(NETLINK_CREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); | 1100 | memcpy(NETLINK_CREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); |
922 | 1101 | ||
@@ -938,9 +1117,9 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, | |||
938 | goto out; | 1117 | goto out; |
939 | } | 1118 | } |
940 | 1119 | ||
941 | if (dst_groups) { | 1120 | if (dst_group) { |
942 | atomic_inc(&skb->users); | 1121 | atomic_inc(&skb->users); |
943 | netlink_broadcast(sk, skb, dst_pid, dst_groups, GFP_KERNEL); | 1122 | netlink_broadcast(sk, skb, dst_pid, dst_group, GFP_KERNEL); |
944 | } | 1123 | } |
945 | err = netlink_unicast(sk, skb, dst_pid, msg->msg_flags&MSG_DONTWAIT); | 1124 | err = netlink_unicast(sk, skb, dst_pid, msg->msg_flags&MSG_DONTWAIT); |
946 | 1125 | ||
@@ -986,7 +1165,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, | |||
986 | addr->nl_family = AF_NETLINK; | 1165 | addr->nl_family = AF_NETLINK; |
987 | addr->nl_pad = 0; | 1166 | addr->nl_pad = 0; |
988 | addr->nl_pid = NETLINK_CB(skb).pid; | 1167 | addr->nl_pid = NETLINK_CB(skb).pid; |
989 | addr->nl_groups = NETLINK_CB(skb).dst_groups; | 1168 | addr->nl_groups = netlink_group_mask(NETLINK_CB(skb).dst_group); |
990 | msg->msg_namelen = sizeof(*addr); | 1169 | msg->msg_namelen = sizeof(*addr); |
991 | } | 1170 | } |
992 | 1171 | ||
@@ -1001,6 +1180,8 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, | |||
1001 | netlink_dump(sk); | 1180 | netlink_dump(sk); |
1002 | 1181 | ||
1003 | scm_recv(sock, msg, siocb->scm, flags); | 1182 | scm_recv(sock, msg, siocb->scm, flags); |
1183 | if (nlk->flags & NETLINK_RECV_PKTINFO) | ||
1184 | netlink_cmsg_recv_pktinfo(msg, skb); | ||
1004 | 1185 | ||
1005 | out: | 1186 | out: |
1006 | netlink_rcv_wake(sk); | 1187 | netlink_rcv_wake(sk); |
@@ -1023,10 +1204,13 @@ static void netlink_data_ready(struct sock *sk, int len) | |||
1023 | */ | 1204 | */ |
1024 | 1205 | ||
1025 | struct sock * | 1206 | struct sock * |
1026 | netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len)) | 1207 | netlink_kernel_create(int unit, unsigned int groups, |
1208 | void (*input)(struct sock *sk, int len), | ||
1209 | struct module *module) | ||
1027 | { | 1210 | { |
1028 | struct socket *sock; | 1211 | struct socket *sock; |
1029 | struct sock *sk; | 1212 | struct sock *sk; |
1213 | struct netlink_sock *nlk; | ||
1030 | 1214 | ||
1031 | if (!nl_table) | 1215 | if (!nl_table) |
1032 | return NULL; | 1216 | return NULL; |
@@ -1037,20 +1221,31 @@ netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len)) | |||
1037 | if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock)) | 1221 | if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock)) |
1038 | return NULL; | 1222 | return NULL; |
1039 | 1223 | ||
1040 | if (netlink_create(sock, unit) < 0) { | 1224 | if (__netlink_create(sock, unit) < 0) |
1041 | sock_release(sock); | 1225 | goto out_sock_release; |
1042 | return NULL; | 1226 | |
1043 | } | ||
1044 | sk = sock->sk; | 1227 | sk = sock->sk; |
1045 | sk->sk_data_ready = netlink_data_ready; | 1228 | sk->sk_data_ready = netlink_data_ready; |
1046 | if (input) | 1229 | if (input) |
1047 | nlk_sk(sk)->data_ready = input; | 1230 | nlk_sk(sk)->data_ready = input; |
1048 | 1231 | ||
1049 | if (netlink_insert(sk, 0)) { | 1232 | if (netlink_insert(sk, 0)) |
1050 | sock_release(sock); | 1233 | goto out_sock_release; |
1051 | return NULL; | 1234 | |
1052 | } | 1235 | nlk = nlk_sk(sk); |
1236 | nlk->flags |= NETLINK_KERNEL_SOCKET; | ||
1237 | |||
1238 | netlink_table_grab(); | ||
1239 | nl_table[unit].groups = groups < 32 ? 32 : groups; | ||
1240 | nl_table[unit].module = module; | ||
1241 | nl_table[unit].registered = 1; | ||
1242 | netlink_table_ungrab(); | ||
1243 | |||
1053 | return sk; | 1244 | return sk; |
1245 | |||
1246 | out_sock_release: | ||
1247 | sock_release(sock); | ||
1248 | return NULL; | ||
1054 | } | 1249 | } |
1055 | 1250 | ||
1056 | void netlink_set_nonroot(int protocol, unsigned int flags) | 1251 | void netlink_set_nonroot(int protocol, unsigned int flags) |
@@ -1288,7 +1483,8 @@ static int netlink_seq_show(struct seq_file *seq, void *v) | |||
1288 | s, | 1483 | s, |
1289 | s->sk_protocol, | 1484 | s->sk_protocol, |
1290 | nlk->pid, | 1485 | nlk->pid, |
1291 | nlk->groups, | 1486 | nlk->flags & NETLINK_KERNEL_SOCKET ? |
1487 | 0 : (unsigned int)nlk->groups[0], | ||
1292 | atomic_read(&s->sk_rmem_alloc), | 1488 | atomic_read(&s->sk_rmem_alloc), |
1293 | atomic_read(&s->sk_wmem_alloc), | 1489 | atomic_read(&s->sk_wmem_alloc), |
1294 | nlk->cb, | 1490 | nlk->cb, |
@@ -1362,8 +1558,8 @@ static struct proto_ops netlink_ops = { | |||
1362 | .ioctl = sock_no_ioctl, | 1558 | .ioctl = sock_no_ioctl, |
1363 | .listen = sock_no_listen, | 1559 | .listen = sock_no_listen, |
1364 | .shutdown = sock_no_shutdown, | 1560 | .shutdown = sock_no_shutdown, |
1365 | .setsockopt = sock_no_setsockopt, | 1561 | .setsockopt = netlink_setsockopt, |
1366 | .getsockopt = sock_no_getsockopt, | 1562 | .getsockopt = netlink_getsockopt, |
1367 | .sendmsg = netlink_sendmsg, | 1563 | .sendmsg = netlink_sendmsg, |
1368 | .recvmsg = netlink_recvmsg, | 1564 | .recvmsg = netlink_recvmsg, |
1369 | .mmap = sock_no_mmap, | 1565 | .mmap = sock_no_mmap, |
@@ -1438,21 +1634,7 @@ out: | |||
1438 | return err; | 1634 | return err; |
1439 | } | 1635 | } |
1440 | 1636 | ||
1441 | static void __exit netlink_proto_exit(void) | ||
1442 | { | ||
1443 | sock_unregister(PF_NETLINK); | ||
1444 | proc_net_remove("netlink"); | ||
1445 | kfree(nl_table); | ||
1446 | nl_table = NULL; | ||
1447 | proto_unregister(&netlink_proto); | ||
1448 | } | ||
1449 | |||
1450 | core_initcall(netlink_proto_init); | 1637 | core_initcall(netlink_proto_init); |
1451 | module_exit(netlink_proto_exit); | ||
1452 | |||
1453 | MODULE_LICENSE("GPL"); | ||
1454 | |||
1455 | MODULE_ALIAS_NETPROTO(PF_NETLINK); | ||
1456 | 1638 | ||
1457 | EXPORT_SYMBOL(netlink_ack); | 1639 | EXPORT_SYMBOL(netlink_ack); |
1458 | EXPORT_SYMBOL(netlink_broadcast); | 1640 | EXPORT_SYMBOL(netlink_broadcast); |
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 162a85fed150..4b53de982114 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c | |||
@@ -39,7 +39,7 @@ | |||
39 | #include <linux/proc_fs.h> | 39 | #include <linux/proc_fs.h> |
40 | #include <linux/seq_file.h> | 40 | #include <linux/seq_file.h> |
41 | #include <net/ip.h> | 41 | #include <net/ip.h> |
42 | #include <net/tcp.h> | 42 | #include <net/tcp_states.h> |
43 | #include <net/arp.h> | 43 | #include <net/arp.h> |
44 | #include <linux/init.h> | 44 | #include <linux/init.h> |
45 | 45 | ||
@@ -858,17 +858,16 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev) | |||
858 | frametype = skb->data[19] & 0x0F; | 858 | frametype = skb->data[19] & 0x0F; |
859 | flags = skb->data[19] & 0xF0; | 859 | flags = skb->data[19] & 0xF0; |
860 | 860 | ||
861 | #ifdef CONFIG_INET | ||
862 | /* | 861 | /* |
863 | * Check for an incoming IP over NET/ROM frame. | 862 | * Check for an incoming IP over NET/ROM frame. |
864 | */ | 863 | */ |
865 | if (frametype == NR_PROTOEXT && circuit_index == NR_PROTO_IP && circuit_id == NR_PROTO_IP) { | 864 | if (frametype == NR_PROTOEXT && |
865 | circuit_index == NR_PROTO_IP && circuit_id == NR_PROTO_IP) { | ||
866 | skb_pull(skb, NR_NETWORK_LEN + NR_TRANSPORT_LEN); | 866 | skb_pull(skb, NR_NETWORK_LEN + NR_TRANSPORT_LEN); |
867 | skb->h.raw = skb->data; | 867 | skb->h.raw = skb->data; |
868 | 868 | ||
869 | return nr_rx_ip(skb, dev); | 869 | return nr_rx_ip(skb, dev); |
870 | } | 870 | } |
871 | #endif | ||
872 | 871 | ||
873 | /* | 872 | /* |
874 | * Find an existing socket connection, based on circuit ID, if it's | 873 | * Find an existing socket connection, based on circuit ID, if it's |
diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c index 220bf7494f71..263da4c26494 100644 --- a/net/netrom/nr_dev.c +++ b/net/netrom/nr_dev.c | |||
@@ -38,8 +38,6 @@ | |||
38 | #include <net/ax25.h> | 38 | #include <net/ax25.h> |
39 | #include <net/netrom.h> | 39 | #include <net/netrom.h> |
40 | 40 | ||
41 | #ifdef CONFIG_INET | ||
42 | |||
43 | /* | 41 | /* |
44 | * Only allow IP over NET/ROM frames through if the netrom device is up. | 42 | * Only allow IP over NET/ROM frames through if the netrom device is up. |
45 | */ | 43 | */ |
@@ -64,11 +62,12 @@ int nr_rx_ip(struct sk_buff *skb, struct net_device *dev) | |||
64 | skb->nh.raw = skb->data; | 62 | skb->nh.raw = skb->data; |
65 | skb->pkt_type = PACKET_HOST; | 63 | skb->pkt_type = PACKET_HOST; |
66 | 64 | ||
67 | ip_rcv(skb, skb->dev, NULL); | 65 | netif_rx(skb); |
68 | 66 | ||
69 | return 1; | 67 | return 1; |
70 | } | 68 | } |
71 | 69 | ||
70 | #ifdef CONFIG_INET | ||
72 | 71 | ||
73 | static int nr_rebuild_header(struct sk_buff *skb) | 72 | static int nr_rebuild_header(struct sk_buff *skb) |
74 | { | 73 | { |
diff --git a/net/netrom/nr_in.c b/net/netrom/nr_in.c index 9c44b3794126..64b81a796907 100644 --- a/net/netrom/nr_in.c +++ b/net/netrom/nr_in.c | |||
@@ -22,8 +22,7 @@ | |||
22 | #include <linux/netdevice.h> | 22 | #include <linux/netdevice.h> |
23 | #include <linux/skbuff.h> | 23 | #include <linux/skbuff.h> |
24 | #include <net/sock.h> | 24 | #include <net/sock.h> |
25 | #include <net/tcp.h> | 25 | #include <net/tcp_states.h> |
26 | #include <net/ip.h> /* For ip_rcv */ | ||
27 | #include <asm/uaccess.h> | 26 | #include <asm/uaccess.h> |
28 | #include <asm/system.h> | 27 | #include <asm/system.h> |
29 | #include <linux/fcntl.h> | 28 | #include <linux/fcntl.h> |
diff --git a/net/netrom/nr_subr.c b/net/netrom/nr_subr.c index 0627347b14b8..587bed2674bf 100644 --- a/net/netrom/nr_subr.c +++ b/net/netrom/nr_subr.c | |||
@@ -21,7 +21,7 @@ | |||
21 | #include <linux/netdevice.h> | 21 | #include <linux/netdevice.h> |
22 | #include <linux/skbuff.h> | 22 | #include <linux/skbuff.h> |
23 | #include <net/sock.h> | 23 | #include <net/sock.h> |
24 | #include <net/tcp.h> | 24 | #include <net/tcp_states.h> |
25 | #include <asm/uaccess.h> | 25 | #include <asm/uaccess.h> |
26 | #include <asm/system.h> | 26 | #include <asm/system.h> |
27 | #include <linux/fcntl.h> | 27 | #include <linux/fcntl.h> |
@@ -77,7 +77,7 @@ void nr_requeue_frames(struct sock *sk) | |||
77 | if (skb_prev == NULL) | 77 | if (skb_prev == NULL) |
78 | skb_queue_head(&sk->sk_write_queue, skb); | 78 | skb_queue_head(&sk->sk_write_queue, skb); |
79 | else | 79 | else |
80 | skb_append(skb_prev, skb); | 80 | skb_append(skb_prev, skb, &sk->sk_write_queue); |
81 | skb_prev = skb; | 81 | skb_prev = skb; |
82 | } | 82 | } |
83 | } | 83 | } |
diff --git a/net/netrom/nr_timer.c b/net/netrom/nr_timer.c index faabda8088be..75b72d389ba9 100644 --- a/net/netrom/nr_timer.c +++ b/net/netrom/nr_timer.c | |||
@@ -22,7 +22,7 @@ | |||
22 | #include <linux/netdevice.h> | 22 | #include <linux/netdevice.h> |
23 | #include <linux/skbuff.h> | 23 | #include <linux/skbuff.h> |
24 | #include <net/sock.h> | 24 | #include <net/sock.h> |
25 | #include <net/tcp.h> | 25 | #include <net/tcp_states.h> |
26 | #include <asm/uaccess.h> | 26 | #include <asm/uaccess.h> |
27 | #include <asm/system.h> | 27 | #include <asm/system.h> |
28 | #include <linux/fcntl.h> | 28 | #include <linux/fcntl.h> |
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index c9d5980aa4de..ba997095f08f 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c | |||
@@ -241,7 +241,7 @@ static struct proto_ops packet_ops; | |||
241 | #ifdef CONFIG_SOCK_PACKET | 241 | #ifdef CONFIG_SOCK_PACKET |
242 | static struct proto_ops packet_ops_spkt; | 242 | static struct proto_ops packet_ops_spkt; |
243 | 243 | ||
244 | static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) | 244 | static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) |
245 | { | 245 | { |
246 | struct sock *sk; | 246 | struct sock *sk; |
247 | struct sockaddr_pkt *spkt; | 247 | struct sockaddr_pkt *spkt; |
@@ -441,7 +441,7 @@ static inline unsigned run_filter(struct sk_buff *skb, struct sock *sk, unsigned | |||
441 | we will not harm anyone. | 441 | we will not harm anyone. |
442 | */ | 442 | */ |
443 | 443 | ||
444 | static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) | 444 | static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) |
445 | { | 445 | { |
446 | struct sock *sk; | 446 | struct sock *sk; |
447 | struct sockaddr_ll *sll; | 447 | struct sockaddr_ll *sll; |
@@ -546,7 +546,7 @@ drop: | |||
546 | } | 546 | } |
547 | 547 | ||
548 | #ifdef CONFIG_PACKET_MMAP | 548 | #ifdef CONFIG_PACKET_MMAP |
549 | static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) | 549 | static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) |
550 | { | 550 | { |
551 | struct sock *sk; | 551 | struct sock *sk; |
552 | struct packet_sock *po; | 552 | struct packet_sock *po; |
@@ -635,12 +635,12 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct pack | |||
635 | h->tp_snaplen = snaplen; | 635 | h->tp_snaplen = snaplen; |
636 | h->tp_mac = macoff; | 636 | h->tp_mac = macoff; |
637 | h->tp_net = netoff; | 637 | h->tp_net = netoff; |
638 | if (skb->stamp.tv_sec == 0) { | 638 | if (skb->tstamp.off_sec == 0) { |
639 | do_gettimeofday(&skb->stamp); | 639 | __net_timestamp(skb); |
640 | sock_enable_timestamp(sk); | 640 | sock_enable_timestamp(sk); |
641 | } | 641 | } |
642 | h->tp_sec = skb->stamp.tv_sec; | 642 | h->tp_sec = skb_tv_base.tv_sec + skb->tstamp.off_sec; |
643 | h->tp_usec = skb->stamp.tv_usec; | 643 | h->tp_usec = skb_tv_base.tv_usec + skb->tstamp.off_usec; |
644 | 644 | ||
645 | sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h))); | 645 | sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h))); |
646 | sll->sll_halen = 0; | 646 | sll->sll_halen = 0; |
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 5480caf8ccc2..c6e59f84c3ae 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c | |||
@@ -41,7 +41,7 @@ | |||
41 | #include <net/rose.h> | 41 | #include <net/rose.h> |
42 | #include <linux/proc_fs.h> | 42 | #include <linux/proc_fs.h> |
43 | #include <linux/seq_file.h> | 43 | #include <linux/seq_file.h> |
44 | #include <net/tcp.h> | 44 | #include <net/tcp_states.h> |
45 | #include <net/ip.h> | 45 | #include <net/ip.h> |
46 | #include <net/arp.h> | 46 | #include <net/arp.h> |
47 | 47 | ||
diff --git a/net/rose/rose_in.c b/net/rose/rose_in.c index ef475a1bb1ba..8348d33f1efe 100644 --- a/net/rose/rose_in.c +++ b/net/rose/rose_in.c | |||
@@ -26,8 +26,7 @@ | |||
26 | #include <linux/netdevice.h> | 26 | #include <linux/netdevice.h> |
27 | #include <linux/skbuff.h> | 27 | #include <linux/skbuff.h> |
28 | #include <net/sock.h> | 28 | #include <net/sock.h> |
29 | #include <net/ip.h> /* For ip_rcv */ | 29 | #include <net/tcp_states.h> |
30 | #include <net/tcp.h> | ||
31 | #include <asm/system.h> | 30 | #include <asm/system.h> |
32 | #include <linux/fcntl.h> | 31 | #include <linux/fcntl.h> |
33 | #include <linux/mm.h> | 32 | #include <linux/mm.h> |
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index 25da6f699fd0..4510cd7613ec 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c | |||
@@ -24,7 +24,7 @@ | |||
24 | #include <linux/if_arp.h> | 24 | #include <linux/if_arp.h> |
25 | #include <linux/skbuff.h> | 25 | #include <linux/skbuff.h> |
26 | #include <net/sock.h> | 26 | #include <net/sock.h> |
27 | #include <net/tcp.h> | 27 | #include <net/tcp_states.h> |
28 | #include <asm/system.h> | 28 | #include <asm/system.h> |
29 | #include <asm/uaccess.h> | 29 | #include <asm/uaccess.h> |
30 | #include <linux/fcntl.h> | 30 | #include <linux/fcntl.h> |
diff --git a/net/rose/rose_subr.c b/net/rose/rose_subr.c index 7db7e1cedc3a..a29a3a960fd6 100644 --- a/net/rose/rose_subr.c +++ b/net/rose/rose_subr.c | |||
@@ -21,7 +21,7 @@ | |||
21 | #include <linux/netdevice.h> | 21 | #include <linux/netdevice.h> |
22 | #include <linux/skbuff.h> | 22 | #include <linux/skbuff.h> |
23 | #include <net/sock.h> | 23 | #include <net/sock.h> |
24 | #include <net/tcp.h> | 24 | #include <net/tcp_states.h> |
25 | #include <asm/system.h> | 25 | #include <asm/system.h> |
26 | #include <linux/fcntl.h> | 26 | #include <linux/fcntl.h> |
27 | #include <linux/mm.h> | 27 | #include <linux/mm.h> |
@@ -74,7 +74,7 @@ void rose_requeue_frames(struct sock *sk) | |||
74 | if (skb_prev == NULL) | 74 | if (skb_prev == NULL) |
75 | skb_queue_head(&sk->sk_write_queue, skb); | 75 | skb_queue_head(&sk->sk_write_queue, skb); |
76 | else | 76 | else |
77 | skb_append(skb_prev, skb); | 77 | skb_append(skb_prev, skb, &sk->sk_write_queue); |
78 | skb_prev = skb; | 78 | skb_prev = skb; |
79 | } | 79 | } |
80 | } | 80 | } |
diff --git a/net/rose/rose_timer.c b/net/rose/rose_timer.c index 84dd4403f792..50ae0371dab8 100644 --- a/net/rose/rose_timer.c +++ b/net/rose/rose_timer.c | |||
@@ -22,7 +22,7 @@ | |||
22 | #include <linux/netdevice.h> | 22 | #include <linux/netdevice.h> |
23 | #include <linux/skbuff.h> | 23 | #include <linux/skbuff.h> |
24 | #include <net/sock.h> | 24 | #include <net/sock.h> |
25 | #include <net/tcp.h> | 25 | #include <net/tcp_states.h> |
26 | #include <asm/system.h> | 26 | #include <asm/system.h> |
27 | #include <linux/fcntl.h> | 27 | #include <linux/fcntl.h> |
28 | #include <linux/mm.h> | 28 | #include <linux/mm.h> |
diff --git a/net/rxrpc/transport.c b/net/rxrpc/transport.c index 9bce7794130a..122c086ee2db 100644 --- a/net/rxrpc/transport.c +++ b/net/rxrpc/transport.c | |||
@@ -330,7 +330,7 @@ static int rxrpc_incoming_msg(struct rxrpc_transport *trans, | |||
330 | 330 | ||
331 | msg->trans = trans; | 331 | msg->trans = trans; |
332 | msg->state = RXRPC_MSG_RECEIVED; | 332 | msg->state = RXRPC_MSG_RECEIVED; |
333 | msg->stamp = pkt->stamp; | 333 | skb_get_timestamp(pkt, &msg->stamp); |
334 | if (msg->stamp.tv_sec == 0) { | 334 | if (msg->stamp.tv_sec == 0) { |
335 | do_gettimeofday(&msg->stamp); | 335 | do_gettimeofday(&msg->stamp); |
336 | if (pkt->sk) | 336 | if (pkt->sk) |
diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 59d3e71f8b85..45d3bc0812c8 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig | |||
@@ -491,6 +491,7 @@ config NET_EMATCH_TEXT | |||
491 | depends on NET_EMATCH | 491 | depends on NET_EMATCH |
492 | select TEXTSEARCH | 492 | select TEXTSEARCH |
493 | select TEXTSEARCH_KMP | 493 | select TEXTSEARCH_KMP |
494 | select TEXTSEARCH_BM | ||
494 | select TEXTSEARCH_FSM | 495 | select TEXTSEARCH_FSM |
495 | ---help--- | 496 | ---help--- |
496 | Say Y here if you want to be ablt to classify packets based on | 497 | Say Y here if you want to be ablt to classify packets based on |
diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 249c61936ea0..8aebe8f6d271 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c | |||
@@ -165,7 +165,7 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action *act, | |||
165 | while ((a = act) != NULL) { | 165 | while ((a = act) != NULL) { |
166 | repeat: | 166 | repeat: |
167 | if (a->ops && a->ops->act) { | 167 | if (a->ops && a->ops->act) { |
168 | ret = a->ops->act(&skb, a); | 168 | ret = a->ops->act(&skb, a, res); |
169 | if (TC_MUNGED & skb->tc_verd) { | 169 | if (TC_MUNGED & skb->tc_verd) { |
170 | /* copied already, allow trampling */ | 170 | /* copied already, allow trampling */ |
171 | skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd); | 171 | skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd); |
@@ -179,11 +179,6 @@ repeat: | |||
179 | act = a->next; | 179 | act = a->next; |
180 | } | 180 | } |
181 | exec_done: | 181 | exec_done: |
182 | if (skb->tc_classid > 0) { | ||
183 | res->classid = skb->tc_classid; | ||
184 | res->class = 0; | ||
185 | skb->tc_classid = 0; | ||
186 | } | ||
187 | return ret; | 182 | return ret; |
188 | } | 183 | } |
189 | 184 | ||
@@ -598,7 +593,7 @@ static int tca_action_flush(struct rtattr *rta, struct nlmsghdr *n, u32 pid) | |||
598 | nlh->nlmsg_flags |= NLM_F_ROOT; | 593 | nlh->nlmsg_flags |= NLM_F_ROOT; |
599 | module_put(a->ops->owner); | 594 | module_put(a->ops->owner); |
600 | kfree(a); | 595 | kfree(a); |
601 | err = rtnetlink_send(skb, pid, RTMGRP_TC, n->nlmsg_flags&NLM_F_ECHO); | 596 | err = rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); |
602 | if (err > 0) | 597 | if (err > 0) |
603 | return 0; | 598 | return 0; |
604 | 599 | ||
@@ -661,7 +656,7 @@ tca_action_gd(struct rtattr *rta, struct nlmsghdr *n, u32 pid, int event) | |||
661 | 656 | ||
662 | /* now do the delete */ | 657 | /* now do the delete */ |
663 | tcf_action_destroy(head, 0); | 658 | tcf_action_destroy(head, 0); |
664 | ret = rtnetlink_send(skb, pid, RTMGRP_TC, | 659 | ret = rtnetlink_send(skb, pid, RTNLGRP_TC, |
665 | n->nlmsg_flags&NLM_F_ECHO); | 660 | n->nlmsg_flags&NLM_F_ECHO); |
666 | if (ret > 0) | 661 | if (ret > 0) |
667 | return 0; | 662 | return 0; |
@@ -703,9 +698,9 @@ static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event, | |||
703 | x->rta_len = skb->tail - (u8*)x; | 698 | x->rta_len = skb->tail - (u8*)x; |
704 | 699 | ||
705 | nlh->nlmsg_len = skb->tail - b; | 700 | nlh->nlmsg_len = skb->tail - b; |
706 | NETLINK_CB(skb).dst_groups = RTMGRP_TC; | 701 | NETLINK_CB(skb).dst_group = RTNLGRP_TC; |
707 | 702 | ||
708 | err = rtnetlink_send(skb, pid, RTMGRP_TC, flags&NLM_F_ECHO); | 703 | err = rtnetlink_send(skb, pid, RTNLGRP_TC, flags&NLM_F_ECHO); |
709 | if (err > 0) | 704 | if (err > 0) |
710 | err = 0; | 705 | err = 0; |
711 | return err; | 706 | return err; |
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 3b5714ef4d1a..b4d89fbb3782 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c | |||
@@ -367,7 +367,7 @@ static int tfilter_notify(struct sk_buff *oskb, struct nlmsghdr *n, | |||
367 | return -EINVAL; | 367 | return -EINVAL; |
368 | } | 368 | } |
369 | 369 | ||
370 | return rtnetlink_send(skb, pid, RTMGRP_TC, n->nlmsg_flags&NLM_F_ECHO); | 370 | return rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); |
371 | } | 371 | } |
372 | 372 | ||
373 | struct tcf_dump_args | 373 | struct tcf_dump_args |
diff --git a/net/sched/gact.c b/net/sched/gact.c index a811c89fef7f..d1c6d542912a 100644 --- a/net/sched/gact.c +++ b/net/sched/gact.c | |||
@@ -135,7 +135,7 @@ tcf_gact_cleanup(struct tc_action *a, int bind) | |||
135 | } | 135 | } |
136 | 136 | ||
137 | static int | 137 | static int |
138 | tcf_gact(struct sk_buff **pskb, struct tc_action *a) | 138 | tcf_gact(struct sk_buff **pskb, struct tc_action *a, struct tcf_result *res) |
139 | { | 139 | { |
140 | struct tcf_gact *p = PRIV(a, gact); | 140 | struct tcf_gact *p = PRIV(a, gact); |
141 | struct sk_buff *skb = *pskb; | 141 | struct sk_buff *skb = *pskb; |
diff --git a/net/sched/ipt.c b/net/sched/ipt.c index b114d994d523..f50136eed211 100644 --- a/net/sched/ipt.c +++ b/net/sched/ipt.c | |||
@@ -201,7 +201,7 @@ tcf_ipt_cleanup(struct tc_action *a, int bind) | |||
201 | } | 201 | } |
202 | 202 | ||
203 | static int | 203 | static int |
204 | tcf_ipt(struct sk_buff **pskb, struct tc_action *a) | 204 | tcf_ipt(struct sk_buff **pskb, struct tc_action *a, struct tcf_result *res) |
205 | { | 205 | { |
206 | int ret = 0, result = 0; | 206 | int ret = 0, result = 0; |
207 | struct tcf_ipt *p = PRIV(a, ipt); | 207 | struct tcf_ipt *p = PRIV(a, ipt); |
diff --git a/net/sched/mirred.c b/net/sched/mirred.c index f309ce336803..20d06916dc0b 100644 --- a/net/sched/mirred.c +++ b/net/sched/mirred.c | |||
@@ -158,7 +158,7 @@ tcf_mirred_cleanup(struct tc_action *a, int bind) | |||
158 | } | 158 | } |
159 | 159 | ||
160 | static int | 160 | static int |
161 | tcf_mirred(struct sk_buff **pskb, struct tc_action *a) | 161 | tcf_mirred(struct sk_buff **pskb, struct tc_action *a, struct tcf_result *res) |
162 | { | 162 | { |
163 | struct tcf_mirred *p = PRIV(a, mirred); | 163 | struct tcf_mirred *p = PRIV(a, mirred); |
164 | struct net_device *dev; | 164 | struct net_device *dev; |
diff --git a/net/sched/pedit.c b/net/sched/pedit.c index 678be6a645fb..767d24f4610e 100644 --- a/net/sched/pedit.c +++ b/net/sched/pedit.c | |||
@@ -130,7 +130,7 @@ tcf_pedit_cleanup(struct tc_action *a, int bind) | |||
130 | } | 130 | } |
131 | 131 | ||
132 | static int | 132 | static int |
133 | tcf_pedit(struct sk_buff **pskb, struct tc_action *a) | 133 | tcf_pedit(struct sk_buff **pskb, struct tc_action *a, struct tcf_result *res) |
134 | { | 134 | { |
135 | struct tcf_pedit *p = PRIV(a, pedit); | 135 | struct tcf_pedit *p = PRIV(a, pedit); |
136 | struct sk_buff *skb = *pskb; | 136 | struct sk_buff *skb = *pskb; |
diff --git a/net/sched/police.c b/net/sched/police.c index c03545faf523..eb39fb2f39b6 100644 --- a/net/sched/police.c +++ b/net/sched/police.c | |||
@@ -284,7 +284,8 @@ static int tcf_act_police_cleanup(struct tc_action *a, int bind) | |||
284 | return 0; | 284 | return 0; |
285 | } | 285 | } |
286 | 286 | ||
287 | static int tcf_act_police(struct sk_buff **pskb, struct tc_action *a) | 287 | static int tcf_act_police(struct sk_buff **pskb, struct tc_action *a, |
288 | struct tcf_result *res) | ||
288 | { | 289 | { |
289 | psched_time_t now; | 290 | psched_time_t now; |
290 | struct sk_buff *skb = *pskb; | 291 | struct sk_buff *skb = *pskb; |
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index b9a069af4a02..737681cb9a92 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c | |||
@@ -816,7 +816,7 @@ static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, | |||
816 | } | 816 | } |
817 | 817 | ||
818 | if (skb->len) | 818 | if (skb->len) |
819 | return rtnetlink_send(skb, pid, RTMGRP_TC, n->nlmsg_flags&NLM_F_ECHO); | 819 | return rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); |
820 | 820 | ||
821 | err_out: | 821 | err_out: |
822 | kfree_skb(skb); | 822 | kfree_skb(skb); |
@@ -1040,7 +1040,7 @@ static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n, | |||
1040 | return -EINVAL; | 1040 | return -EINVAL; |
1041 | } | 1041 | } |
1042 | 1042 | ||
1043 | return rtnetlink_send(skb, pid, RTMGRP_TC, n->nlmsg_flags&NLM_F_ECHO); | 1043 | return rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); |
1044 | } | 1044 | } |
1045 | 1045 | ||
1046 | struct qdisc_dump_args | 1046 | struct qdisc_dump_args |
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 0d066c965342..99ceb91f0150 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c | |||
@@ -238,6 +238,20 @@ static void dev_watchdog_down(struct net_device *dev) | |||
238 | spin_unlock_bh(&dev->xmit_lock); | 238 | spin_unlock_bh(&dev->xmit_lock); |
239 | } | 239 | } |
240 | 240 | ||
241 | void netif_carrier_on(struct net_device *dev) | ||
242 | { | ||
243 | if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) | ||
244 | linkwatch_fire_event(dev); | ||
245 | if (netif_running(dev)) | ||
246 | __netdev_watchdog_up(dev); | ||
247 | } | ||
248 | |||
249 | void netif_carrier_off(struct net_device *dev) | ||
250 | { | ||
251 | if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state)) | ||
252 | linkwatch_fire_event(dev); | ||
253 | } | ||
254 | |||
241 | /* "NOOP" scheduler: the best scheduler, recommended for all interfaces | 255 | /* "NOOP" scheduler: the best scheduler, recommended for all interfaces |
242 | under all circumstances. It is difficult to invent anything faster or | 256 | under all circumstances. It is difficult to invent anything faster or |
243 | cheaper. | 257 | cheaper. |
@@ -600,6 +614,8 @@ void dev_shutdown(struct net_device *dev) | |||
600 | } | 614 | } |
601 | 615 | ||
602 | EXPORT_SYMBOL(__netdev_watchdog_up); | 616 | EXPORT_SYMBOL(__netdev_watchdog_up); |
617 | EXPORT_SYMBOL(netif_carrier_on); | ||
618 | EXPORT_SYMBOL(netif_carrier_off); | ||
603 | EXPORT_SYMBOL(noop_qdisc); | 619 | EXPORT_SYMBOL(noop_qdisc); |
604 | EXPORT_SYMBOL(noop_qdisc_ops); | 620 | EXPORT_SYMBOL(noop_qdisc_ops); |
605 | EXPORT_SYMBOL(qdisc_create_dflt); | 621 | EXPORT_SYMBOL(qdisc_create_dflt); |
diff --git a/net/sched/simple.c b/net/sched/simple.c index 3ab4c675ab5d..8a6ae4f491e8 100644 --- a/net/sched/simple.c +++ b/net/sched/simple.c | |||
@@ -44,7 +44,7 @@ static DEFINE_RWLOCK(simp_lock); | |||
44 | #include <net/pkt_act.h> | 44 | #include <net/pkt_act.h> |
45 | #include <net/act_generic.h> | 45 | #include <net/act_generic.h> |
46 | 46 | ||
47 | static int tcf_simp(struct sk_buff **pskb, struct tc_action *a) | 47 | static int tcf_simp(struct sk_buff **pskb, struct tc_action *a, struct tcf_result *res) |
48 | { | 48 | { |
49 | struct sk_buff *skb = *pskb; | 49 | struct sk_buff *skb = *pskb; |
50 | struct tcf_defact *p = PRIV(a, defact); | 50 | struct tcf_defact *p = PRIV(a, defact); |
diff --git a/net/sctp/input.c b/net/sctp/input.c index 742be9171b7d..28f32243397f 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c | |||
@@ -236,8 +236,8 @@ int sctp_rcv(struct sk_buff *skb) | |||
236 | } | 236 | } |
237 | 237 | ||
238 | /* SCTP seems to always need a timestamp right now (FIXME) */ | 238 | /* SCTP seems to always need a timestamp right now (FIXME) */ |
239 | if (skb->stamp.tv_sec == 0) { | 239 | if (skb->tstamp.off_sec == 0) { |
240 | do_gettimeofday(&skb->stamp); | 240 | __net_timestamp(skb); |
241 | sock_enable_timestamp(sk); | 241 | sock_enable_timestamp(sk); |
242 | } | 242 | } |
243 | 243 | ||
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index e9b2fd480d61..fa3be2b8fb5f 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c | |||
@@ -66,8 +66,8 @@ | |||
66 | #include <linux/seq_file.h> | 66 | #include <linux/seq_file.h> |
67 | 67 | ||
68 | #include <net/protocol.h> | 68 | #include <net/protocol.h> |
69 | #include <net/tcp.h> | ||
70 | #include <net/ndisc.h> | 69 | #include <net/ndisc.h> |
70 | #include <net/ip.h> | ||
71 | #include <net/ipv6.h> | 71 | #include <net/ipv6.h> |
72 | #include <net/transp_v6.h> | 72 | #include <net/transp_v6.h> |
73 | #include <net/addrconf.h> | 73 | #include <net/addrconf.h> |
@@ -641,10 +641,7 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk, | |||
641 | else | 641 | else |
642 | newinet->pmtudisc = IP_PMTUDISC_WANT; | 642 | newinet->pmtudisc = IP_PMTUDISC_WANT; |
643 | 643 | ||
644 | #ifdef INET_REFCNT_DEBUG | 644 | sk_refcnt_debug_inc(newsk); |
645 | atomic_inc(&inet6_sock_nr); | ||
646 | atomic_inc(&inet_sock_nr); | ||
647 | #endif | ||
648 | 645 | ||
649 | if (newsk->sk_prot->init(newsk)) { | 646 | if (newsk->sk_prot->init(newsk)) { |
650 | sk_common_release(newsk); | 647 | sk_common_release(newsk); |
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index ce9245e71fca..e7025be77691 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c | |||
@@ -62,7 +62,7 @@ | |||
62 | /* Global data structures. */ | 62 | /* Global data structures. */ |
63 | struct sctp_globals sctp_globals; | 63 | struct sctp_globals sctp_globals; |
64 | struct proc_dir_entry *proc_net_sctp; | 64 | struct proc_dir_entry *proc_net_sctp; |
65 | DEFINE_SNMP_STAT(struct sctp_mib, sctp_statistics); | 65 | DEFINE_SNMP_STAT(struct sctp_mib, sctp_statistics) __read_mostly; |
66 | 66 | ||
67 | struct idr sctp_assocs_id; | 67 | struct idr sctp_assocs_id; |
68 | DEFINE_SPINLOCK(sctp_assocs_id_lock); | 68 | DEFINE_SPINLOCK(sctp_assocs_id_lock); |
@@ -78,8 +78,8 @@ static struct sctp_pf *sctp_pf_inet_specific; | |||
78 | static struct sctp_af *sctp_af_v4_specific; | 78 | static struct sctp_af *sctp_af_v4_specific; |
79 | static struct sctp_af *sctp_af_v6_specific; | 79 | static struct sctp_af *sctp_af_v6_specific; |
80 | 80 | ||
81 | kmem_cache_t *sctp_chunk_cachep; | 81 | kmem_cache_t *sctp_chunk_cachep __read_mostly; |
82 | kmem_cache_t *sctp_bucket_cachep; | 82 | kmem_cache_t *sctp_bucket_cachep __read_mostly; |
83 | 83 | ||
84 | extern int sctp_snmp_proc_init(void); | 84 | extern int sctp_snmp_proc_init(void); |
85 | extern int sctp_snmp_proc_exit(void); | 85 | extern int sctp_snmp_proc_exit(void); |
@@ -593,9 +593,7 @@ static struct sock *sctp_v4_create_accept_sk(struct sock *sk, | |||
593 | newinet->mc_index = 0; | 593 | newinet->mc_index = 0; |
594 | newinet->mc_list = NULL; | 594 | newinet->mc_list = NULL; |
595 | 595 | ||
596 | #ifdef INET_REFCNT_DEBUG | 596 | sk_refcnt_debug_inc(newsk); |
597 | atomic_inc(&inet_sock_nr); | ||
598 | #endif | ||
599 | 597 | ||
600 | if (newsk->sk_prot->init(newsk)) { | 598 | if (newsk->sk_prot->init(newsk)) { |
601 | sk_common_release(newsk); | 599 | sk_common_release(newsk); |
@@ -1244,6 +1242,10 @@ SCTP_STATIC __exit void sctp_exit(void) | |||
1244 | module_init(sctp_init); | 1242 | module_init(sctp_init); |
1245 | module_exit(sctp_exit); | 1243 | module_exit(sctp_exit); |
1246 | 1244 | ||
1245 | /* | ||
1246 | * __stringify doesn't likes enums, so use IPPROTO_SCTP value (132) directly. | ||
1247 | */ | ||
1248 | MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-132"); | ||
1247 | MODULE_AUTHOR("Linux Kernel SCTP developers <lksctp-developers@lists.sourceforge.net>"); | 1249 | MODULE_AUTHOR("Linux Kernel SCTP developers <lksctp-developers@lists.sourceforge.net>"); |
1248 | MODULE_DESCRIPTION("Support for the SCTP protocol (RFC2960)"); | 1250 | MODULE_DESCRIPTION("Support for the SCTP protocol (RFC2960)"); |
1249 | MODULE_LICENSE("GPL"); | 1251 | MODULE_LICENSE("GPL"); |
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 00d32b7c8266..3868a8d70cc0 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c | |||
@@ -1362,6 +1362,7 @@ struct sctp_association *sctp_unpack_cookie( | |||
1362 | char *key; | 1362 | char *key; |
1363 | sctp_scope_t scope; | 1363 | sctp_scope_t scope; |
1364 | struct sk_buff *skb = chunk->skb; | 1364 | struct sk_buff *skb = chunk->skb; |
1365 | struct timeval tv; | ||
1365 | 1366 | ||
1366 | headersize = sizeof(sctp_chunkhdr_t) + SCTP_SECRET_SIZE; | 1367 | headersize = sizeof(sctp_chunkhdr_t) + SCTP_SECRET_SIZE; |
1367 | bodysize = ntohs(chunk->chunk_hdr->length) - headersize; | 1368 | bodysize = ntohs(chunk->chunk_hdr->length) - headersize; |
@@ -1434,7 +1435,8 @@ no_hmac: | |||
1434 | * an association, there is no need to check cookie's expiration | 1435 | * an association, there is no need to check cookie's expiration |
1435 | * for init collision case of lost COOKIE ACK. | 1436 | * for init collision case of lost COOKIE ACK. |
1436 | */ | 1437 | */ |
1437 | if (!asoc && tv_lt(bear_cookie->expiration, skb->stamp)) { | 1438 | skb_get_timestamp(skb, &tv); |
1439 | if (!asoc && tv_lt(bear_cookie->expiration, tv)) { | ||
1438 | __u16 len; | 1440 | __u16 len; |
1439 | /* | 1441 | /* |
1440 | * Section 3.3.10.3 Stale Cookie Error (3) | 1442 | * Section 3.3.10.3 Stale Cookie Error (3) |
@@ -1447,10 +1449,9 @@ no_hmac: | |||
1447 | len = ntohs(chunk->chunk_hdr->length); | 1449 | len = ntohs(chunk->chunk_hdr->length); |
1448 | *errp = sctp_make_op_error_space(asoc, chunk, len); | 1450 | *errp = sctp_make_op_error_space(asoc, chunk, len); |
1449 | if (*errp) { | 1451 | if (*errp) { |
1450 | suseconds_t usecs = (skb->stamp.tv_sec - | 1452 | suseconds_t usecs = (tv.tv_sec - |
1451 | bear_cookie->expiration.tv_sec) * 1000000L + | 1453 | bear_cookie->expiration.tv_sec) * 1000000L + |
1452 | skb->stamp.tv_usec - | 1454 | tv.tv_usec - bear_cookie->expiration.tv_usec; |
1453 | bear_cookie->expiration.tv_usec; | ||
1454 | 1455 | ||
1455 | usecs = htonl(usecs); | 1456 | usecs = htonl(usecs); |
1456 | sctp_init_cause(*errp, SCTP_ERROR_STALE_COOKIE, | 1457 | sctp_init_cause(*errp, SCTP_ERROR_STALE_COOKIE, |
diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 091a66f06a35..4454afe4727e 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c | |||
@@ -4892,7 +4892,7 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, | |||
4892 | sctp_skb_for_each(skb, &oldsk->sk_receive_queue, tmp) { | 4892 | sctp_skb_for_each(skb, &oldsk->sk_receive_queue, tmp) { |
4893 | event = sctp_skb2event(skb); | 4893 | event = sctp_skb2event(skb); |
4894 | if (event->asoc == assoc) { | 4894 | if (event->asoc == assoc) { |
4895 | __skb_unlink(skb, skb->list); | 4895 | __skb_unlink(skb, &oldsk->sk_receive_queue); |
4896 | __skb_queue_tail(&newsk->sk_receive_queue, skb); | 4896 | __skb_queue_tail(&newsk->sk_receive_queue, skb); |
4897 | } | 4897 | } |
4898 | } | 4898 | } |
@@ -4921,7 +4921,7 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, | |||
4921 | sctp_skb_for_each(skb, &oldsp->pd_lobby, tmp) { | 4921 | sctp_skb_for_each(skb, &oldsp->pd_lobby, tmp) { |
4922 | event = sctp_skb2event(skb); | 4922 | event = sctp_skb2event(skb); |
4923 | if (event->asoc == assoc) { | 4923 | if (event->asoc == assoc) { |
4924 | __skb_unlink(skb, skb->list); | 4924 | __skb_unlink(skb, &oldsp->pd_lobby); |
4925 | __skb_queue_tail(queue, skb); | 4925 | __skb_queue_tail(queue, skb); |
4926 | } | 4926 | } |
4927 | } | 4927 | } |
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index 8bbc279d6c99..ec2c857eae7f 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c | |||
@@ -50,9 +50,9 @@ | |||
50 | 50 | ||
51 | /* Forward declarations for internal helpers. */ | 51 | /* Forward declarations for internal helpers. */ |
52 | static struct sctp_ulpevent * sctp_ulpq_reasm(struct sctp_ulpq *ulpq, | 52 | static struct sctp_ulpevent * sctp_ulpq_reasm(struct sctp_ulpq *ulpq, |
53 | struct sctp_ulpevent *); | 53 | struct sctp_ulpevent *); |
54 | static struct sctp_ulpevent * sctp_ulpq_order(struct sctp_ulpq *, | 54 | static struct sctp_ulpevent * sctp_ulpq_order(struct sctp_ulpq *, |
55 | struct sctp_ulpevent *); | 55 | struct sctp_ulpevent *); |
56 | 56 | ||
57 | /* 1st Level Abstractions */ | 57 | /* 1st Level Abstractions */ |
58 | 58 | ||
@@ -125,7 +125,9 @@ int sctp_ulpq_tail_data(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, | |||
125 | event = sctp_ulpq_order(ulpq, event); | 125 | event = sctp_ulpq_order(ulpq, event); |
126 | } | 126 | } |
127 | 127 | ||
128 | /* Send event to the ULP. */ | 128 | /* Send event to the ULP. 'event' is the sctp_ulpevent for |
129 | * very first SKB on the 'temp' list. | ||
130 | */ | ||
129 | if (event) | 131 | if (event) |
130 | sctp_ulpq_tail_event(ulpq, event); | 132 | sctp_ulpq_tail_event(ulpq, event); |
131 | 133 | ||
@@ -158,14 +160,18 @@ static int sctp_ulpq_clear_pd(struct sctp_ulpq *ulpq) | |||
158 | return sctp_clear_pd(ulpq->asoc->base.sk); | 160 | return sctp_clear_pd(ulpq->asoc->base.sk); |
159 | } | 161 | } |
160 | 162 | ||
161 | 163 | /* If the SKB of 'event' is on a list, it is the first such member | |
162 | 164 | * of that list. | |
165 | */ | ||
163 | int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) | 166 | int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) |
164 | { | 167 | { |
165 | struct sock *sk = ulpq->asoc->base.sk; | 168 | struct sock *sk = ulpq->asoc->base.sk; |
166 | struct sk_buff_head *queue; | 169 | struct sk_buff_head *queue, *skb_list; |
170 | struct sk_buff *skb = sctp_event2skb(event); | ||
167 | int clear_pd = 0; | 171 | int clear_pd = 0; |
168 | 172 | ||
173 | skb_list = (struct sk_buff_head *) skb->prev; | ||
174 | |||
169 | /* If the socket is just going to throw this away, do not | 175 | /* If the socket is just going to throw this away, do not |
170 | * even try to deliver it. | 176 | * even try to deliver it. |
171 | */ | 177 | */ |
@@ -197,10 +203,10 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) | |||
197 | /* If we are harvesting multiple skbs they will be | 203 | /* If we are harvesting multiple skbs they will be |
198 | * collected on a list. | 204 | * collected on a list. |
199 | */ | 205 | */ |
200 | if (sctp_event2skb(event)->list) | 206 | if (skb_list) |
201 | sctp_skb_list_tail(sctp_event2skb(event)->list, queue); | 207 | sctp_skb_list_tail(skb_list, queue); |
202 | else | 208 | else |
203 | __skb_queue_tail(queue, sctp_event2skb(event)); | 209 | __skb_queue_tail(queue, skb); |
204 | 210 | ||
205 | /* Did we just complete partial delivery and need to get | 211 | /* Did we just complete partial delivery and need to get |
206 | * rolling again? Move pending data to the receive | 212 | * rolling again? Move pending data to the receive |
@@ -214,10 +220,11 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) | |||
214 | return 1; | 220 | return 1; |
215 | 221 | ||
216 | out_free: | 222 | out_free: |
217 | if (sctp_event2skb(event)->list) | 223 | if (skb_list) |
218 | sctp_queue_purge_ulpevents(sctp_event2skb(event)->list); | 224 | sctp_queue_purge_ulpevents(skb_list); |
219 | else | 225 | else |
220 | sctp_ulpevent_free(event); | 226 | sctp_ulpevent_free(event); |
227 | |||
221 | return 0; | 228 | return 0; |
222 | } | 229 | } |
223 | 230 | ||
@@ -269,7 +276,7 @@ static inline void sctp_ulpq_store_reasm(struct sctp_ulpq *ulpq, | |||
269 | * payload was fragmented on the way and ip had to reassemble them. | 276 | * payload was fragmented on the way and ip had to reassemble them. |
270 | * We add the rest of skb's to the first skb's fraglist. | 277 | * We add the rest of skb's to the first skb's fraglist. |
271 | */ | 278 | */ |
272 | static struct sctp_ulpevent *sctp_make_reassembled_event(struct sk_buff *f_frag, struct sk_buff *l_frag) | 279 | static struct sctp_ulpevent *sctp_make_reassembled_event(struct sk_buff_head *queue, struct sk_buff *f_frag, struct sk_buff *l_frag) |
273 | { | 280 | { |
274 | struct sk_buff *pos; | 281 | struct sk_buff *pos; |
275 | struct sctp_ulpevent *event; | 282 | struct sctp_ulpevent *event; |
@@ -294,7 +301,7 @@ static struct sctp_ulpevent *sctp_make_reassembled_event(struct sk_buff *f_frag, | |||
294 | skb_shinfo(f_frag)->frag_list = pos; | 301 | skb_shinfo(f_frag)->frag_list = pos; |
295 | 302 | ||
296 | /* Remove the first fragment from the reassembly queue. */ | 303 | /* Remove the first fragment from the reassembly queue. */ |
297 | __skb_unlink(f_frag, f_frag->list); | 304 | __skb_unlink(f_frag, queue); |
298 | while (pos) { | 305 | while (pos) { |
299 | 306 | ||
300 | pnext = pos->next; | 307 | pnext = pos->next; |
@@ -304,7 +311,7 @@ static struct sctp_ulpevent *sctp_make_reassembled_event(struct sk_buff *f_frag, | |||
304 | f_frag->data_len += pos->len; | 311 | f_frag->data_len += pos->len; |
305 | 312 | ||
306 | /* Remove the fragment from the reassembly queue. */ | 313 | /* Remove the fragment from the reassembly queue. */ |
307 | __skb_unlink(pos, pos->list); | 314 | __skb_unlink(pos, queue); |
308 | 315 | ||
309 | /* Break if we have reached the last fragment. */ | 316 | /* Break if we have reached the last fragment. */ |
310 | if (pos == l_frag) | 317 | if (pos == l_frag) |
@@ -375,7 +382,7 @@ static inline struct sctp_ulpevent *sctp_ulpq_retrieve_reassembled(struct sctp_u | |||
375 | done: | 382 | done: |
376 | return retval; | 383 | return retval; |
377 | found: | 384 | found: |
378 | retval = sctp_make_reassembled_event(first_frag, pos); | 385 | retval = sctp_make_reassembled_event(&ulpq->reasm, first_frag, pos); |
379 | if (retval) | 386 | if (retval) |
380 | retval->msg_flags |= MSG_EOR; | 387 | retval->msg_flags |= MSG_EOR; |
381 | goto done; | 388 | goto done; |
@@ -435,7 +442,7 @@ static inline struct sctp_ulpevent *sctp_ulpq_retrieve_partial(struct sctp_ulpq | |||
435 | * further. | 442 | * further. |
436 | */ | 443 | */ |
437 | done: | 444 | done: |
438 | retval = sctp_make_reassembled_event(first_frag, last_frag); | 445 | retval = sctp_make_reassembled_event(&ulpq->reasm, first_frag, last_frag); |
439 | if (retval && is_last) | 446 | if (retval && is_last) |
440 | retval->msg_flags |= MSG_EOR; | 447 | retval->msg_flags |= MSG_EOR; |
441 | 448 | ||
@@ -527,7 +534,7 @@ static inline struct sctp_ulpevent *sctp_ulpq_retrieve_first(struct sctp_ulpq *u | |||
527 | * further. | 534 | * further. |
528 | */ | 535 | */ |
529 | done: | 536 | done: |
530 | retval = sctp_make_reassembled_event(first_frag, last_frag); | 537 | retval = sctp_make_reassembled_event(&ulpq->reasm, first_frag, last_frag); |
531 | return retval; | 538 | return retval; |
532 | } | 539 | } |
533 | 540 | ||
@@ -537,6 +544,7 @@ done: | |||
537 | static inline void sctp_ulpq_retrieve_ordered(struct sctp_ulpq *ulpq, | 544 | static inline void sctp_ulpq_retrieve_ordered(struct sctp_ulpq *ulpq, |
538 | struct sctp_ulpevent *event) | 545 | struct sctp_ulpevent *event) |
539 | { | 546 | { |
547 | struct sk_buff_head *event_list; | ||
540 | struct sk_buff *pos, *tmp; | 548 | struct sk_buff *pos, *tmp; |
541 | struct sctp_ulpevent *cevent; | 549 | struct sctp_ulpevent *cevent; |
542 | struct sctp_stream *in; | 550 | struct sctp_stream *in; |
@@ -547,6 +555,8 @@ static inline void sctp_ulpq_retrieve_ordered(struct sctp_ulpq *ulpq, | |||
547 | ssn = event->ssn; | 555 | ssn = event->ssn; |
548 | in = &ulpq->asoc->ssnmap->in; | 556 | in = &ulpq->asoc->ssnmap->in; |
549 | 557 | ||
558 | event_list = (struct sk_buff_head *) sctp_event2skb(event)->prev; | ||
559 | |||
550 | /* We are holding the chunks by stream, by SSN. */ | 560 | /* We are holding the chunks by stream, by SSN. */ |
551 | sctp_skb_for_each(pos, &ulpq->lobby, tmp) { | 561 | sctp_skb_for_each(pos, &ulpq->lobby, tmp) { |
552 | cevent = (struct sctp_ulpevent *) pos->cb; | 562 | cevent = (struct sctp_ulpevent *) pos->cb; |
@@ -567,10 +577,10 @@ static inline void sctp_ulpq_retrieve_ordered(struct sctp_ulpq *ulpq, | |||
567 | /* Found it, so mark in the ssnmap. */ | 577 | /* Found it, so mark in the ssnmap. */ |
568 | sctp_ssn_next(in, sid); | 578 | sctp_ssn_next(in, sid); |
569 | 579 | ||
570 | __skb_unlink(pos, pos->list); | 580 | __skb_unlink(pos, &ulpq->lobby); |
571 | 581 | ||
572 | /* Attach all gathered skbs to the event. */ | 582 | /* Attach all gathered skbs to the event. */ |
573 | __skb_queue_tail(sctp_event2skb(event)->list, pos); | 583 | __skb_queue_tail(event_list, pos); |
574 | } | 584 | } |
575 | } | 585 | } |
576 | 586 | ||
@@ -626,7 +636,7 @@ static inline void sctp_ulpq_store_ordered(struct sctp_ulpq *ulpq, | |||
626 | } | 636 | } |
627 | 637 | ||
628 | static struct sctp_ulpevent *sctp_ulpq_order(struct sctp_ulpq *ulpq, | 638 | static struct sctp_ulpevent *sctp_ulpq_order(struct sctp_ulpq *ulpq, |
629 | struct sctp_ulpevent *event) | 639 | struct sctp_ulpevent *event) |
630 | { | 640 | { |
631 | __u16 sid, ssn; | 641 | __u16 sid, ssn; |
632 | struct sctp_stream *in; | 642 | struct sctp_stream *in; |
@@ -667,7 +677,7 @@ static inline void sctp_ulpq_reap_ordered(struct sctp_ulpq *ulpq) | |||
667 | { | 677 | { |
668 | struct sk_buff *pos, *tmp; | 678 | struct sk_buff *pos, *tmp; |
669 | struct sctp_ulpevent *cevent; | 679 | struct sctp_ulpevent *cevent; |
670 | struct sctp_ulpevent *event = NULL; | 680 | struct sctp_ulpevent *event; |
671 | struct sctp_stream *in; | 681 | struct sctp_stream *in; |
672 | struct sk_buff_head temp; | 682 | struct sk_buff_head temp; |
673 | __u16 csid, cssn; | 683 | __u16 csid, cssn; |
@@ -675,6 +685,8 @@ static inline void sctp_ulpq_reap_ordered(struct sctp_ulpq *ulpq) | |||
675 | in = &ulpq->asoc->ssnmap->in; | 685 | in = &ulpq->asoc->ssnmap->in; |
676 | 686 | ||
677 | /* We are holding the chunks by stream, by SSN. */ | 687 | /* We are holding the chunks by stream, by SSN. */ |
688 | skb_queue_head_init(&temp); | ||
689 | event = NULL; | ||
678 | sctp_skb_for_each(pos, &ulpq->lobby, tmp) { | 690 | sctp_skb_for_each(pos, &ulpq->lobby, tmp) { |
679 | cevent = (struct sctp_ulpevent *) pos->cb; | 691 | cevent = (struct sctp_ulpevent *) pos->cb; |
680 | csid = cevent->stream; | 692 | csid = cevent->stream; |
@@ -686,19 +698,20 @@ static inline void sctp_ulpq_reap_ordered(struct sctp_ulpq *ulpq) | |||
686 | /* Found it, so mark in the ssnmap. */ | 698 | /* Found it, so mark in the ssnmap. */ |
687 | sctp_ssn_next(in, csid); | 699 | sctp_ssn_next(in, csid); |
688 | 700 | ||
689 | __skb_unlink(pos, pos->list); | 701 | __skb_unlink(pos, &ulpq->lobby); |
690 | if (!event) { | 702 | if (!event) { |
691 | /* Create a temporary list to collect chunks on. */ | 703 | /* Create a temporary list to collect chunks on. */ |
692 | event = sctp_skb2event(pos); | 704 | event = sctp_skb2event(pos); |
693 | skb_queue_head_init(&temp); | ||
694 | __skb_queue_tail(&temp, sctp_event2skb(event)); | 705 | __skb_queue_tail(&temp, sctp_event2skb(event)); |
695 | } else { | 706 | } else { |
696 | /* Attach all gathered skbs to the event. */ | 707 | /* Attach all gathered skbs to the event. */ |
697 | __skb_queue_tail(sctp_event2skb(event)->list, pos); | 708 | __skb_queue_tail(&temp, pos); |
698 | } | 709 | } |
699 | } | 710 | } |
700 | 711 | ||
701 | /* Send event to the ULP. */ | 712 | /* Send event to the ULP. 'event' is the sctp_ulpevent for |
713 | * very first SKB on the 'temp' list. | ||
714 | */ | ||
702 | if (event) | 715 | if (event) |
703 | sctp_ulpq_tail_event(ulpq, event); | 716 | sctp_ulpq_tail_event(ulpq, event); |
704 | } | 717 | } |
diff --git a/net/socket.c b/net/socket.c index 6f2a17881972..94fe638b4d72 100644 --- a/net/socket.c +++ b/net/socket.c | |||
@@ -70,6 +70,8 @@ | |||
70 | #include <linux/seq_file.h> | 70 | #include <linux/seq_file.h> |
71 | #include <linux/wanrouter.h> | 71 | #include <linux/wanrouter.h> |
72 | #include <linux/if_bridge.h> | 72 | #include <linux/if_bridge.h> |
73 | #include <linux/if_frad.h> | ||
74 | #include <linux/if_vlan.h> | ||
73 | #include <linux/init.h> | 75 | #include <linux/init.h> |
74 | #include <linux/poll.h> | 76 | #include <linux/poll.h> |
75 | #include <linux/cache.h> | 77 | #include <linux/cache.h> |
@@ -272,7 +274,7 @@ int move_addr_to_user(void *kaddr, int klen, void __user *uaddr, int __user *ule | |||
272 | 274 | ||
273 | #define SOCKFS_MAGIC 0x534F434B | 275 | #define SOCKFS_MAGIC 0x534F434B |
274 | 276 | ||
275 | static kmem_cache_t * sock_inode_cachep; | 277 | static kmem_cache_t * sock_inode_cachep __read_mostly; |
276 | 278 | ||
277 | static struct inode *sock_alloc_inode(struct super_block *sb) | 279 | static struct inode *sock_alloc_inode(struct super_block *sb) |
278 | { | 280 | { |
@@ -331,7 +333,7 @@ static struct super_block *sockfs_get_sb(struct file_system_type *fs_type, | |||
331 | return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC); | 333 | return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC); |
332 | } | 334 | } |
333 | 335 | ||
334 | static struct vfsmount *sock_mnt; | 336 | static struct vfsmount *sock_mnt __read_mostly; |
335 | 337 | ||
336 | static struct file_system_type sock_fs_type = { | 338 | static struct file_system_type sock_fs_type = { |
337 | .name = "sockfs", | 339 | .name = "sockfs", |
@@ -404,6 +406,7 @@ int sock_map_fd(struct socket *sock) | |||
404 | file->f_mode = FMODE_READ | FMODE_WRITE; | 406 | file->f_mode = FMODE_READ | FMODE_WRITE; |
405 | file->f_flags = O_RDWR; | 407 | file->f_flags = O_RDWR; |
406 | file->f_pos = 0; | 408 | file->f_pos = 0; |
409 | file->private_data = sock; | ||
407 | fd_install(fd, file); | 410 | fd_install(fd, file); |
408 | } | 411 | } |
409 | 412 | ||
@@ -436,6 +439,9 @@ struct socket *sockfd_lookup(int fd, int *err) | |||
436 | return NULL; | 439 | return NULL; |
437 | } | 440 | } |
438 | 441 | ||
442 | if (file->f_op == &socket_file_ops) | ||
443 | return file->private_data; /* set in sock_map_fd */ | ||
444 | |||
439 | inode = file->f_dentry->d_inode; | 445 | inode = file->f_dentry->d_inode; |
440 | if (!S_ISSOCK(inode->i_mode)) { | 446 | if (!S_ISSOCK(inode->i_mode)) { |
441 | *err = -ENOTSOCK; | 447 | *err = -ENOTSOCK; |
@@ -720,8 +726,8 @@ static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *ubuf, | |||
720 | return __sock_sendmsg(iocb, sock, &x->async_msg, size); | 726 | return __sock_sendmsg(iocb, sock, &x->async_msg, size); |
721 | } | 727 | } |
722 | 728 | ||
723 | ssize_t sock_sendpage(struct file *file, struct page *page, | 729 | static ssize_t sock_sendpage(struct file *file, struct page *page, |
724 | int offset, size_t size, loff_t *ppos, int more) | 730 | int offset, size_t size, loff_t *ppos, int more) |
725 | { | 731 | { |
726 | struct socket *sock; | 732 | struct socket *sock; |
727 | int flags; | 733 | int flags; |
@@ -944,7 +950,7 @@ static int sock_mmap(struct file * file, struct vm_area_struct * vma) | |||
944 | return sock->ops->mmap(file, sock, vma); | 950 | return sock->ops->mmap(file, sock, vma); |
945 | } | 951 | } |
946 | 952 | ||
947 | int sock_close(struct inode *inode, struct file *filp) | 953 | static int sock_close(struct inode *inode, struct file *filp) |
948 | { | 954 | { |
949 | /* | 955 | /* |
950 | * It was possible the inode is NULL we were | 956 | * It was possible the inode is NULL we were |
@@ -2023,9 +2029,6 @@ int sock_unregister(int family) | |||
2023 | return 0; | 2029 | return 0; |
2024 | } | 2030 | } |
2025 | 2031 | ||
2026 | |||
2027 | extern void sk_init(void); | ||
2028 | |||
2029 | void __init sock_init(void) | 2032 | void __init sock_init(void) |
2030 | { | 2033 | { |
2031 | /* | 2034 | /* |
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 554f224c0445..fe1a73ce6cff 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c | |||
@@ -28,13 +28,13 @@ | |||
28 | #include <linux/workqueue.h> | 28 | #include <linux/workqueue.h> |
29 | #include <linux/sunrpc/rpc_pipe_fs.h> | 29 | #include <linux/sunrpc/rpc_pipe_fs.h> |
30 | 30 | ||
31 | static struct vfsmount *rpc_mount; | 31 | static struct vfsmount *rpc_mount __read_mostly; |
32 | static int rpc_mount_count; | 32 | static int rpc_mount_count; |
33 | 33 | ||
34 | static struct file_system_type rpc_pipe_fs_type; | 34 | static struct file_system_type rpc_pipe_fs_type; |
35 | 35 | ||
36 | 36 | ||
37 | static kmem_cache_t *rpc_inode_cachep; | 37 | static kmem_cache_t *rpc_inode_cachep __read_mostly; |
38 | 38 | ||
39 | #define RPC_UPCALL_TIMEOUT (30*HZ) | 39 | #define RPC_UPCALL_TIMEOUT (30*HZ) |
40 | 40 | ||
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 2d9eb7fbd521..f3104035e35d 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c | |||
@@ -34,10 +34,10 @@ static int rpc_task_id; | |||
34 | #define RPC_BUFFER_MAXSIZE (2048) | 34 | #define RPC_BUFFER_MAXSIZE (2048) |
35 | #define RPC_BUFFER_POOLSIZE (8) | 35 | #define RPC_BUFFER_POOLSIZE (8) |
36 | #define RPC_TASK_POOLSIZE (8) | 36 | #define RPC_TASK_POOLSIZE (8) |
37 | static kmem_cache_t *rpc_task_slabp; | 37 | static kmem_cache_t *rpc_task_slabp __read_mostly; |
38 | static kmem_cache_t *rpc_buffer_slabp; | 38 | static kmem_cache_t *rpc_buffer_slabp __read_mostly; |
39 | static mempool_t *rpc_task_mempool; | 39 | static mempool_t *rpc_task_mempool __read_mostly; |
40 | static mempool_t *rpc_buffer_mempool; | 40 | static mempool_t *rpc_buffer_mempool __read_mostly; |
41 | 41 | ||
42 | static void __rpc_default_timer(struct rpc_task *task); | 42 | static void __rpc_default_timer(struct rpc_task *task); |
43 | static void rpciod_killall(void); | 43 | static void rpciod_killall(void); |
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index d0c3120d0233..05fe2e735538 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c | |||
@@ -34,7 +34,7 @@ | |||
34 | #include <net/sock.h> | 34 | #include <net/sock.h> |
35 | #include <net/checksum.h> | 35 | #include <net/checksum.h> |
36 | #include <net/ip.h> | 36 | #include <net/ip.h> |
37 | #include <net/tcp.h> | 37 | #include <net/tcp_states.h> |
38 | #include <asm/uaccess.h> | 38 | #include <asm/uaccess.h> |
39 | #include <asm/ioctls.h> | 39 | #include <asm/ioctls.h> |
40 | 40 | ||
@@ -584,13 +584,16 @@ svc_udp_recvfrom(struct svc_rqst *rqstp) | |||
584 | /* possibly an icmp error */ | 584 | /* possibly an icmp error */ |
585 | dprintk("svc: recvfrom returned error %d\n", -err); | 585 | dprintk("svc: recvfrom returned error %d\n", -err); |
586 | } | 586 | } |
587 | if (skb->stamp.tv_sec == 0) { | 587 | if (skb->tstamp.off_sec == 0) { |
588 | skb->stamp.tv_sec = xtime.tv_sec; | 588 | struct timeval tv; |
589 | skb->stamp.tv_usec = xtime.tv_nsec / NSEC_PER_USEC; | 589 | |
590 | tv.tv_sec = xtime.tv_sec; | ||
591 | tv.tv_usec = xtime.tv_nsec * 1000; | ||
592 | skb_set_timestamp(skb, &tv); | ||
590 | /* Don't enable netstamp, sunrpc doesn't | 593 | /* Don't enable netstamp, sunrpc doesn't |
591 | need that much accuracy */ | 594 | need that much accuracy */ |
592 | } | 595 | } |
593 | svsk->sk_sk->sk_stamp = skb->stamp; | 596 | skb_get_timestamp(skb, &svsk->sk_sk->sk_stamp); |
594 | set_bit(SK_DATA, &svsk->sk_flags); /* there may be more data... */ | 597 | set_bit(SK_DATA, &svsk->sk_flags); /* there may be more data... */ |
595 | 598 | ||
596 | /* | 599 | /* |
diff --git a/net/sysctl_net.c b/net/sysctl_net.c index 3f6e31069c54..c5241fcbb966 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c | |||
@@ -17,17 +17,15 @@ | |||
17 | #include <linux/sysctl.h> | 17 | #include <linux/sysctl.h> |
18 | 18 | ||
19 | #ifdef CONFIG_INET | 19 | #ifdef CONFIG_INET |
20 | extern struct ctl_table ipv4_table[]; | 20 | #include <net/ip.h> |
21 | #endif | 21 | #endif |
22 | 22 | ||
23 | extern struct ctl_table core_table[]; | ||
24 | |||
25 | #ifdef CONFIG_NET | 23 | #ifdef CONFIG_NET |
26 | extern struct ctl_table ether_table[]; | 24 | #include <linux/if_ether.h> |
27 | #endif | 25 | #endif |
28 | 26 | ||
29 | #ifdef CONFIG_TR | 27 | #ifdef CONFIG_TR |
30 | extern struct ctl_table tr_table[]; | 28 | #include <linux/if_tr.h> |
31 | #endif | 29 | #endif |
32 | 30 | ||
33 | struct ctl_table net_table[] = { | 31 | struct ctl_table net_table[] = { |
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index d403e34088ad..41feca3bef86 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c | |||
@@ -105,7 +105,7 @@ | |||
105 | #include <linux/skbuff.h> | 105 | #include <linux/skbuff.h> |
106 | #include <linux/netdevice.h> | 106 | #include <linux/netdevice.h> |
107 | #include <net/sock.h> | 107 | #include <net/sock.h> |
108 | #include <linux/tcp.h> | 108 | #include <net/tcp_states.h> |
109 | #include <net/af_unix.h> | 109 | #include <net/af_unix.h> |
110 | #include <linux/proc_fs.h> | 110 | #include <linux/proc_fs.h> |
111 | #include <linux/seq_file.h> | 111 | #include <linux/seq_file.h> |
@@ -2026,14 +2026,6 @@ static struct net_proto_family unix_family_ops = { | |||
2026 | .owner = THIS_MODULE, | 2026 | .owner = THIS_MODULE, |
2027 | }; | 2027 | }; |
2028 | 2028 | ||
2029 | #ifdef CONFIG_SYSCTL | ||
2030 | extern void unix_sysctl_register(void); | ||
2031 | extern void unix_sysctl_unregister(void); | ||
2032 | #else | ||
2033 | static inline void unix_sysctl_register(void) {} | ||
2034 | static inline void unix_sysctl_unregister(void) {} | ||
2035 | #endif | ||
2036 | |||
2037 | static int __init af_unix_init(void) | 2029 | static int __init af_unix_init(void) |
2038 | { | 2030 | { |
2039 | int rc = -1; | 2031 | int rc = -1; |
diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 4bd95c8f5934..6ffc64e1712d 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c | |||
@@ -76,11 +76,11 @@ | |||
76 | #include <linux/netdevice.h> | 76 | #include <linux/netdevice.h> |
77 | #include <linux/file.h> | 77 | #include <linux/file.h> |
78 | #include <linux/proc_fs.h> | 78 | #include <linux/proc_fs.h> |
79 | #include <linux/tcp.h> | ||
80 | 79 | ||
81 | #include <net/sock.h> | 80 | #include <net/sock.h> |
82 | #include <net/af_unix.h> | 81 | #include <net/af_unix.h> |
83 | #include <net/scm.h> | 82 | #include <net/scm.h> |
83 | #include <net/tcp_states.h> | ||
84 | 84 | ||
85 | /* Internal data structures and random procedures: */ | 85 | /* Internal data structures and random procedures: */ |
86 | 86 | ||
@@ -286,16 +286,16 @@ void unix_gc(void) | |||
286 | skb = skb_peek(&s->sk_receive_queue); | 286 | skb = skb_peek(&s->sk_receive_queue); |
287 | while (skb && | 287 | while (skb && |
288 | skb != (struct sk_buff *)&s->sk_receive_queue) { | 288 | skb != (struct sk_buff *)&s->sk_receive_queue) { |
289 | nextsk=skb->next; | 289 | nextsk = skb->next; |
290 | /* | 290 | /* |
291 | * Do we have file descriptors ? | 291 | * Do we have file descriptors ? |
292 | */ | 292 | */ |
293 | if(UNIXCB(skb).fp) | 293 | if (UNIXCB(skb).fp) { |
294 | { | 294 | __skb_unlink(skb, |
295 | __skb_unlink(skb, skb->list); | 295 | &s->sk_receive_queue); |
296 | __skb_queue_tail(&hitlist,skb); | 296 | __skb_queue_tail(&hitlist, skb); |
297 | } | 297 | } |
298 | skb=nextsk; | 298 | skb = nextsk; |
299 | } | 299 | } |
300 | spin_unlock(&s->sk_receive_queue.lock); | 300 | spin_unlock(&s->sk_receive_queue.lock); |
301 | } | 301 | } |
diff --git a/net/unix/sysctl_net_unix.c b/net/unix/sysctl_net_unix.c index c974dac4580a..690ffa5d5bfb 100644 --- a/net/unix/sysctl_net_unix.c +++ b/net/unix/sysctl_net_unix.c | |||
@@ -12,7 +12,7 @@ | |||
12 | #include <linux/mm.h> | 12 | #include <linux/mm.h> |
13 | #include <linux/sysctl.h> | 13 | #include <linux/sysctl.h> |
14 | 14 | ||
15 | extern int sysctl_unix_max_dgram_qlen; | 15 | #include <net/af_unix.h> |
16 | 16 | ||
17 | static ctl_table unix_table[] = { | 17 | static ctl_table unix_table[] = { |
18 | { | 18 | { |
diff --git a/net/wanrouter/af_wanpipe.c b/net/wanrouter/af_wanpipe.c index d93b19faaab7..596cb96e5f47 100644 --- a/net/wanrouter/af_wanpipe.c +++ b/net/wanrouter/af_wanpipe.c | |||
@@ -57,7 +57,7 @@ | |||
57 | #include <linux/wanpipe.h> | 57 | #include <linux/wanpipe.h> |
58 | #include <linux/if_wanpipe.h> | 58 | #include <linux/if_wanpipe.h> |
59 | #include <linux/pkt_sched.h> | 59 | #include <linux/pkt_sched.h> |
60 | #include <linux/tcp.h> | 60 | #include <linux/tcp_states.h> |
61 | #include <linux/if_wanpipe_common.h> | 61 | #include <linux/if_wanpipe_common.h> |
62 | #include <linux/sdla_x25.h> | 62 | #include <linux/sdla_x25.h> |
63 | 63 | ||
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 04bec047fa9a..020d73cc8414 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c | |||
@@ -47,7 +47,7 @@ | |||
47 | #include <linux/if_arp.h> | 47 | #include <linux/if_arp.h> |
48 | #include <linux/skbuff.h> | 48 | #include <linux/skbuff.h> |
49 | #include <net/sock.h> | 49 | #include <net/sock.h> |
50 | #include <net/tcp.h> | 50 | #include <net/tcp_states.h> |
51 | #include <asm/uaccess.h> | 51 | #include <asm/uaccess.h> |
52 | #include <linux/fcntl.h> | 52 | #include <linux/fcntl.h> |
53 | #include <linux/termios.h> /* For TIOCINQ/OUTQ */ | 53 | #include <linux/termios.h> /* For TIOCINQ/OUTQ */ |
diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c index 36fc3bf6d882..adfe7b8df355 100644 --- a/net/x25/x25_dev.c +++ b/net/x25/x25_dev.c | |||
@@ -81,7 +81,7 @@ static int x25_receive_data(struct sk_buff *skb, struct x25_neigh *nb) | |||
81 | } | 81 | } |
82 | 82 | ||
83 | int x25_lapb_receive_frame(struct sk_buff *skb, struct net_device *dev, | 83 | int x25_lapb_receive_frame(struct sk_buff *skb, struct net_device *dev, |
84 | struct packet_type *ptype) | 84 | struct packet_type *ptype, struct net_device *orig_dev) |
85 | { | 85 | { |
86 | struct sk_buff *nskb; | 86 | struct sk_buff *nskb; |
87 | struct x25_neigh *nb; | 87 | struct x25_neigh *nb; |
diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c index b0197c70a9fc..26146874b839 100644 --- a/net/x25/x25_in.c +++ b/net/x25/x25_in.c | |||
@@ -28,7 +28,7 @@ | |||
28 | #include <linux/string.h> | 28 | #include <linux/string.h> |
29 | #include <linux/skbuff.h> | 29 | #include <linux/skbuff.h> |
30 | #include <net/sock.h> | 30 | #include <net/sock.h> |
31 | #include <net/tcp.h> | 31 | #include <net/tcp_states.h> |
32 | #include <net/x25.h> | 32 | #include <net/x25.h> |
33 | 33 | ||
34 | static int x25_queue_rx_frame(struct sock *sk, struct sk_buff *skb, int more) | 34 | static int x25_queue_rx_frame(struct sock *sk, struct sk_buff *skb, int more) |
diff --git a/net/x25/x25_subr.c b/net/x25/x25_subr.c index 7fd872ad0c20..8be9b8fbc24d 100644 --- a/net/x25/x25_subr.c +++ b/net/x25/x25_subr.c | |||
@@ -27,7 +27,7 @@ | |||
27 | #include <linux/string.h> | 27 | #include <linux/string.h> |
28 | #include <linux/skbuff.h> | 28 | #include <linux/skbuff.h> |
29 | #include <net/sock.h> | 29 | #include <net/sock.h> |
30 | #include <net/tcp.h> | 30 | #include <net/tcp_states.h> |
31 | #include <net/x25.h> | 31 | #include <net/x25.h> |
32 | 32 | ||
33 | /* | 33 | /* |
@@ -80,7 +80,7 @@ void x25_requeue_frames(struct sock *sk) | |||
80 | if (!skb_prev) | 80 | if (!skb_prev) |
81 | skb_queue_head(&sk->sk_write_queue, skb); | 81 | skb_queue_head(&sk->sk_write_queue, skb); |
82 | else | 82 | else |
83 | skb_append(skb_prev, skb); | 83 | skb_append(skb_prev, skb, &sk->sk_write_queue); |
84 | skb_prev = skb; | 84 | skb_prev = skb; |
85 | } | 85 | } |
86 | } | 86 | } |
diff --git a/net/x25/x25_timer.c b/net/x25/x25_timer.c index d6a21a3ad80e..0a92e1da3922 100644 --- a/net/x25/x25_timer.c +++ b/net/x25/x25_timer.c | |||
@@ -23,7 +23,7 @@ | |||
23 | #include <linux/jiffies.h> | 23 | #include <linux/jiffies.h> |
24 | #include <linux/timer.h> | 24 | #include <linux/timer.h> |
25 | #include <net/sock.h> | 25 | #include <net/sock.h> |
26 | #include <net/tcp.h> | 26 | #include <net/tcp_states.h> |
27 | #include <net/x25.h> | 27 | #include <net/x25.h> |
28 | 28 | ||
29 | static void x25_heartbeat_expiry(unsigned long); | 29 | static void x25_heartbeat_expiry(unsigned long); |
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index c58a6f05a0b6..2407a7072327 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c | |||
@@ -12,7 +12,7 @@ | |||
12 | #include <net/ip.h> | 12 | #include <net/ip.h> |
13 | #include <net/xfrm.h> | 13 | #include <net/xfrm.h> |
14 | 14 | ||
15 | static kmem_cache_t *secpath_cachep; | 15 | static kmem_cache_t *secpath_cachep __read_mostly; |
16 | 16 | ||
17 | void __secpath_destroy(struct sec_path *sp) | 17 | void __secpath_destroy(struct sec_path *sp) |
18 | { | 18 | { |
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index d65ed8684fc1..83c8135e1764 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c | |||
@@ -37,7 +37,7 @@ EXPORT_SYMBOL(xfrm_policy_list); | |||
37 | static DEFINE_RWLOCK(xfrm_policy_afinfo_lock); | 37 | static DEFINE_RWLOCK(xfrm_policy_afinfo_lock); |
38 | static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO]; | 38 | static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO]; |
39 | 39 | ||
40 | static kmem_cache_t *xfrm_dst_cache; | 40 | static kmem_cache_t *xfrm_dst_cache __read_mostly; |
41 | 41 | ||
42 | static struct work_struct xfrm_policy_gc_work; | 42 | static struct work_struct xfrm_policy_gc_work; |
43 | static struct list_head xfrm_policy_gc_list = | 43 | static struct list_head xfrm_policy_gc_list = |
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 8da3e25b2c4c..c35336a0f71b 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c | |||
@@ -1125,9 +1125,8 @@ static int xfrm_exp_state_notify(struct xfrm_state *x, struct km_event *c) | |||
1125 | if (build_expire(skb, x, c->data.hard) < 0) | 1125 | if (build_expire(skb, x, c->data.hard) < 0) |
1126 | BUG(); | 1126 | BUG(); |
1127 | 1127 | ||
1128 | NETLINK_CB(skb).dst_groups = XFRMGRP_EXPIRE; | 1128 | NETLINK_CB(skb).dst_group = XFRMNLGRP_EXPIRE; |
1129 | 1129 | return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC); | |
1130 | return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_EXPIRE, GFP_ATOMIC); | ||
1131 | } | 1130 | } |
1132 | 1131 | ||
1133 | static int xfrm_notify_sa_flush(struct km_event *c) | 1132 | static int xfrm_notify_sa_flush(struct km_event *c) |
@@ -1152,7 +1151,8 @@ static int xfrm_notify_sa_flush(struct km_event *c) | |||
1152 | 1151 | ||
1153 | nlh->nlmsg_len = skb->tail - b; | 1152 | nlh->nlmsg_len = skb->tail - b; |
1154 | 1153 | ||
1155 | return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_SA, GFP_ATOMIC); | 1154 | NETLINK_CB(skb).dst_group = XFRMNLGRP_SA; |
1155 | return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC); | ||
1156 | 1156 | ||
1157 | nlmsg_failure: | 1157 | nlmsg_failure: |
1158 | kfree_skb(skb); | 1158 | kfree_skb(skb); |
@@ -1226,7 +1226,8 @@ static int xfrm_notify_sa(struct xfrm_state *x, struct km_event *c) | |||
1226 | 1226 | ||
1227 | nlh->nlmsg_len = skb->tail - b; | 1227 | nlh->nlmsg_len = skb->tail - b; |
1228 | 1228 | ||
1229 | return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_SA, GFP_ATOMIC); | 1229 | NETLINK_CB(skb).dst_group = XFRMNLGRP_SA; |
1230 | return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC); | ||
1230 | 1231 | ||
1231 | nlmsg_failure: | 1232 | nlmsg_failure: |
1232 | rtattr_failure: | 1233 | rtattr_failure: |
@@ -1304,9 +1305,8 @@ static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt, | |||
1304 | if (build_acquire(skb, x, xt, xp, dir) < 0) | 1305 | if (build_acquire(skb, x, xt, xp, dir) < 0) |
1305 | BUG(); | 1306 | BUG(); |
1306 | 1307 | ||
1307 | NETLINK_CB(skb).dst_groups = XFRMGRP_ACQUIRE; | 1308 | NETLINK_CB(skb).dst_group = XFRMNLGRP_ACQUIRE; |
1308 | 1309 | return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_ACQUIRE, GFP_ATOMIC); | |
1309 | return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_ACQUIRE, GFP_ATOMIC); | ||
1310 | } | 1310 | } |
1311 | 1311 | ||
1312 | /* User gives us xfrm_user_policy_info followed by an array of 0 | 1312 | /* User gives us xfrm_user_policy_info followed by an array of 0 |
@@ -1405,9 +1405,8 @@ static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, struct km_eve | |||
1405 | if (build_polexpire(skb, xp, dir, c->data.hard) < 0) | 1405 | if (build_polexpire(skb, xp, dir, c->data.hard) < 0) |
1406 | BUG(); | 1406 | BUG(); |
1407 | 1407 | ||
1408 | NETLINK_CB(skb).dst_groups = XFRMGRP_EXPIRE; | 1408 | NETLINK_CB(skb).dst_group = XFRMNLGRP_EXPIRE; |
1409 | 1409 | return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC); | |
1410 | return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_EXPIRE, GFP_ATOMIC); | ||
1411 | } | 1410 | } |
1412 | 1411 | ||
1413 | static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *c) | 1412 | static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *c) |
@@ -1455,7 +1454,8 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event * | |||
1455 | 1454 | ||
1456 | nlh->nlmsg_len = skb->tail - b; | 1455 | nlh->nlmsg_len = skb->tail - b; |
1457 | 1456 | ||
1458 | return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_POLICY, GFP_ATOMIC); | 1457 | NETLINK_CB(skb).dst_group = XFRMNLGRP_POLICY; |
1458 | return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC); | ||
1459 | 1459 | ||
1460 | nlmsg_failure: | 1460 | nlmsg_failure: |
1461 | rtattr_failure: | 1461 | rtattr_failure: |
@@ -1480,7 +1480,8 @@ static int xfrm_notify_policy_flush(struct km_event *c) | |||
1480 | 1480 | ||
1481 | nlh->nlmsg_len = skb->tail - b; | 1481 | nlh->nlmsg_len = skb->tail - b; |
1482 | 1482 | ||
1483 | return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_POLICY, GFP_ATOMIC); | 1483 | NETLINK_CB(skb).dst_group = XFRMNLGRP_POLICY; |
1484 | return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC); | ||
1484 | 1485 | ||
1485 | nlmsg_failure: | 1486 | nlmsg_failure: |
1486 | kfree_skb(skb); | 1487 | kfree_skb(skb); |
@@ -1519,7 +1520,8 @@ static int __init xfrm_user_init(void) | |||
1519 | { | 1520 | { |
1520 | printk(KERN_INFO "Initializing IPsec netlink socket\n"); | 1521 | printk(KERN_INFO "Initializing IPsec netlink socket\n"); |
1521 | 1522 | ||
1522 | xfrm_nl = netlink_kernel_create(NETLINK_XFRM, xfrm_netlink_rcv); | 1523 | xfrm_nl = netlink_kernel_create(NETLINK_XFRM, XFRMNLGRP_MAX, |
1524 | xfrm_netlink_rcv, THIS_MODULE); | ||
1523 | if (xfrm_nl == NULL) | 1525 | if (xfrm_nl == NULL) |
1524 | return -ENOMEM; | 1526 | return -ENOMEM; |
1525 | 1527 | ||
@@ -1537,3 +1539,4 @@ static void __exit xfrm_user_exit(void) | |||
1537 | module_init(xfrm_user_init); | 1539 | module_init(xfrm_user_init); |
1538 | module_exit(xfrm_user_exit); | 1540 | module_exit(xfrm_user_exit); |
1539 | MODULE_LICENSE("GPL"); | 1541 | MODULE_LICENSE("GPL"); |
1542 | MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_XFRM); | ||
diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c index 5180405c1a84..d8ee38aede26 100644 --- a/scripts/mod/file2alias.c +++ b/scripts/mod/file2alias.c | |||
@@ -341,6 +341,22 @@ static int do_of_entry (const char *filename, struct of_device_id *of, char *ali | |||
341 | return 1; | 341 | return 1; |
342 | } | 342 | } |
343 | 343 | ||
344 | static int do_vio_entry(const char *filename, struct vio_device_id *vio, | ||
345 | char *alias) | ||
346 | { | ||
347 | char *tmp; | ||
348 | |||
349 | sprintf(alias, "vio:T%sS%s", vio->type[0] ? vio->type : "*", | ||
350 | vio->compat[0] ? vio->compat : "*"); | ||
351 | |||
352 | /* Replace all whitespace with underscores */ | ||
353 | for (tmp = alias; tmp && *tmp; tmp++) | ||
354 | if (isspace (*tmp)) | ||
355 | *tmp = '_'; | ||
356 | |||
357 | return 1; | ||
358 | } | ||
359 | |||
344 | /* Ignore any prefix, eg. v850 prepends _ */ | 360 | /* Ignore any prefix, eg. v850 prepends _ */ |
345 | static inline int sym_is(const char *symbol, const char *name) | 361 | static inline int sym_is(const char *symbol, const char *name) |
346 | { | 362 | { |
@@ -422,6 +438,9 @@ void handle_moddevtable(struct module *mod, struct elf_info *info, | |||
422 | else if (sym_is(symname, "__mod_of_device_table")) | 438 | else if (sym_is(symname, "__mod_of_device_table")) |
423 | do_table(symval, sym->st_size, sizeof(struct of_device_id), | 439 | do_table(symval, sym->st_size, sizeof(struct of_device_id), |
424 | do_of_entry, mod); | 440 | do_of_entry, mod); |
441 | else if (sym_is(symname, "__mod_vio_device_table")) | ||
442 | do_table(symval, sym->st_size, sizeof(struct vio_device_id), | ||
443 | do_vio_entry, mod); | ||
425 | 444 | ||
426 | } | 445 | } |
427 | 446 | ||
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 2253f388234f..8641f8894b4c 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c | |||
@@ -659,7 +659,7 @@ static inline u16 socket_type_to_security_class(int family, int type, int protoc | |||
659 | return SECCLASS_NETLINK_ROUTE_SOCKET; | 659 | return SECCLASS_NETLINK_ROUTE_SOCKET; |
660 | case NETLINK_FIREWALL: | 660 | case NETLINK_FIREWALL: |
661 | return SECCLASS_NETLINK_FIREWALL_SOCKET; | 661 | return SECCLASS_NETLINK_FIREWALL_SOCKET; |
662 | case NETLINK_TCPDIAG: | 662 | case NETLINK_INET_DIAG: |
663 | return SECCLASS_NETLINK_TCPDIAG_SOCKET; | 663 | return SECCLASS_NETLINK_TCPDIAG_SOCKET; |
664 | case NETLINK_NFLOG: | 664 | case NETLINK_NFLOG: |
665 | return SECCLASS_NETLINK_NFLOG_SOCKET; | 665 | return SECCLASS_NETLINK_NFLOG_SOCKET; |
diff --git a/security/selinux/netlink.c b/security/selinux/netlink.c index 18d08acafa78..e203883406dd 100644 --- a/security/selinux/netlink.c +++ b/security/selinux/netlink.c | |||
@@ -80,7 +80,8 @@ static void selnl_notify(int msgtype, void *data) | |||
80 | nlh = NLMSG_PUT(skb, 0, 0, msgtype, len); | 80 | nlh = NLMSG_PUT(skb, 0, 0, msgtype, len); |
81 | selnl_add_payload(nlh, len, msgtype, data); | 81 | selnl_add_payload(nlh, len, msgtype, data); |
82 | nlh->nlmsg_len = skb->tail - tmp; | 82 | nlh->nlmsg_len = skb->tail - tmp; |
83 | netlink_broadcast(selnl, skb, 0, SELNL_GRP_AVC, GFP_USER); | 83 | NETLINK_CB(skb).dst_group = SELNLGRP_AVC; |
84 | netlink_broadcast(selnl, skb, 0, SELNLGRP_AVC, GFP_USER); | ||
84 | out: | 85 | out: |
85 | return; | 86 | return; |
86 | 87 | ||
@@ -103,7 +104,8 @@ void selnl_notify_policyload(u32 seqno) | |||
103 | 104 | ||
104 | static int __init selnl_init(void) | 105 | static int __init selnl_init(void) |
105 | { | 106 | { |
106 | selnl = netlink_kernel_create(NETLINK_SELINUX, NULL); | 107 | selnl = netlink_kernel_create(NETLINK_SELINUX, SELNLGRP_MAX, NULL, |
108 | THIS_MODULE); | ||
107 | if (selnl == NULL) | 109 | if (selnl == NULL) |
108 | panic("SELinux: Cannot create netlink socket."); | 110 | panic("SELinux: Cannot create netlink socket."); |
109 | netlink_set_nonroot(NETLINK_SELINUX, NL_NONROOT_RECV); | 111 | netlink_set_nonroot(NETLINK_SELINUX, NL_NONROOT_RECV); |
diff --git a/security/selinux/nlmsgtab.c b/security/selinux/nlmsgtab.c index 92b057becb4b..69b9329b2054 100644 --- a/security/selinux/nlmsgtab.c +++ b/security/selinux/nlmsgtab.c | |||
@@ -16,7 +16,7 @@ | |||
16 | #include <linux/rtnetlink.h> | 16 | #include <linux/rtnetlink.h> |
17 | #include <linux/if.h> | 17 | #include <linux/if.h> |
18 | #include <linux/netfilter_ipv4/ip_queue.h> | 18 | #include <linux/netfilter_ipv4/ip_queue.h> |
19 | #include <linux/tcp_diag.h> | 19 | #include <linux/inet_diag.h> |
20 | #include <linux/xfrm.h> | 20 | #include <linux/xfrm.h> |
21 | #include <linux/audit.h> | 21 | #include <linux/audit.h> |
22 | 22 | ||
@@ -76,6 +76,7 @@ static struct nlmsg_perm nlmsg_firewall_perms[] = | |||
76 | static struct nlmsg_perm nlmsg_tcpdiag_perms[] = | 76 | static struct nlmsg_perm nlmsg_tcpdiag_perms[] = |
77 | { | 77 | { |
78 | { TCPDIAG_GETSOCK, NETLINK_TCPDIAG_SOCKET__NLMSG_READ }, | 78 | { TCPDIAG_GETSOCK, NETLINK_TCPDIAG_SOCKET__NLMSG_READ }, |
79 | { DCCPDIAG_GETSOCK, NETLINK_TCPDIAG_SOCKET__NLMSG_READ }, | ||
79 | }; | 80 | }; |
80 | 81 | ||
81 | static struct nlmsg_perm nlmsg_xfrm_perms[] = | 82 | static struct nlmsg_perm nlmsg_xfrm_perms[] = |