aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/edac.txt8
-rw-r--r--MAINTAINERS2
-rw-r--r--arch/arm/mach-s3c2410/h1940-bluetooth.c8
-rw-r--r--arch/arm/mach-s3c2416/irq.c5
-rw-r--r--arch/arm/mach-s3c2443/irq.c5
-rw-r--r--arch/arm/mach-s3c64xx/mach-mini6410.c2
-rw-r--r--arch/arm/mach-s3c64xx/mach-real6410.c2
-rw-r--r--arch/arm/mach-s5pv210/mach-smdkc110.c1
-rw-r--r--arch/arm/mach-s5pv210/mach-smdkv210.c1
-rw-r--r--arch/arm/mach-shmobile/board-ap4evb.c147
-rw-r--r--arch/arm/mach-shmobile/clock-sh7372.c33
-rw-r--r--arch/arm/plat-pxa/include/plat/sdhci.h3
-rw-r--r--arch/arm/plat-s3c24xx/spi-bus0-gpe11_12_13.c6
-rw-r--r--arch/arm/plat-s3c24xx/spi-bus1-gpd8_9_10.c6
-rw-r--r--arch/arm/plat-s3c24xx/spi-bus1-gpg5_6_7.c6
-rw-r--r--arch/s390/kernel/nmi.c10
-rw-r--r--arch/s390/lib/delay.c14
-rw-r--r--arch/tile/Kconfig12
-rw-r--r--arch/tile/include/asm/cacheflush.h52
-rw-r--r--arch/tile/include/asm/io.h15
-rw-r--r--arch/tile/include/asm/pci-bridge.h117
-rw-r--r--arch/tile/include/asm/pci.h107
-rw-r--r--arch/tile/include/asm/processor.h10
-rw-r--r--arch/tile/include/hv/drv_xgbe_impl.h300
-rw-r--r--arch/tile/include/hv/drv_xgbe_intf.h615
-rw-r--r--arch/tile/include/hv/netio_errors.h122
-rw-r--r--arch/tile/include/hv/netio_intf.h2975
-rw-r--r--arch/tile/kernel/Makefile1
-rw-r--r--arch/tile/kernel/pci.c621
-rw-r--r--arch/tile/kernel/setup.c2
-rw-r--r--arch/tile/lib/memchr_32.c35
-rw-r--r--arch/tile/lib/spinlock_32.c29
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/include/asm/msr-index.h2
-rw-r--r--arch/x86/include/asm/paravirt.h10
-rw-r--r--arch/x86/include/asm/uv/uv_hub.h4
-rw-r--r--arch/x86/include/asm/uv/uv_mmrs.h19
-rw-r--r--arch/x86/kernel/apic/hw_nmi.c7
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c25
-rw-r--r--arch/x86/kernel/cpu/perf_event.c20
-rw-r--r--arch/x86/kernel/entry_32.S2
-rw-r--r--arch/x86/kernel/entry_64.S2
-rw-r--r--arch/x86/kernel/hw_breakpoint.c4
-rw-r--r--arch/x86/kernel/mmconf-fam10h_64.c64
-rw-r--r--arch/x86/mm/tlb.c5
-rw-r--r--arch/x86/platform/uv/tlb_uv.c2
-rw-r--r--arch/x86/platform/uv/uv_time.c4
-rw-r--r--arch/x86/xen/enlighten.c2
-rw-r--r--arch/x86/xen/mmu.c69
-rw-r--r--arch/x86/xen/setup.c1
-rw-r--r--block/blk-throttle.c2
-rw-r--r--drivers/block/amiflop.c2
-rw-r--r--drivers/block/ataflop.c2
-rw-r--r--drivers/block/cciss.c3
-rw-r--r--drivers/block/xen-blkfront.c55
-rw-r--r--drivers/edac/Makefile8
-rw-r--r--drivers/edac/mce_amd_inj.c2
-rw-r--r--drivers/firewire/net.c160
-rw-r--r--drivers/hwmon/i5k_amb.c2
-rw-r--r--drivers/hwmon/lis3lv02d_i2c.c10
-rw-r--r--drivers/mmc/core/core.c2
-rw-r--r--drivers/mmc/core/mmc.c8
-rw-r--r--drivers/mmc/core/sdio.c51
-rw-r--r--drivers/mmc/core/sdio_bus.c33
-rw-r--r--drivers/mmc/host/omap_hsmmc.c2
-rw-r--r--drivers/mmc/host/sdhci-esdhc-imx.c12
-rw-r--r--drivers/mmc/host/sdhci-pci.c31
-rw-r--r--drivers/mmc/host/sdhci-pxa.c4
-rw-r--r--drivers/mmc/host/sdhci.c54
-rw-r--r--drivers/mmc/host/sdhci.h9
-rw-r--r--drivers/mmc/host/ushc.c30
-rw-r--r--drivers/mtd/ubi/scan.c16
-rw-r--r--drivers/net/Kconfig12
-rw-r--r--drivers/net/Makefile1
-rw-r--r--drivers/net/tile/Makefile10
-rw-r--r--drivers/net/tile/tilepro.c2406
-rw-r--r--drivers/pci/Makefile1
-rw-r--r--drivers/pci/quirks.c18
-rw-r--r--drivers/s390/cio/qdio_thinint.c2
-rw-r--r--fs/btrfs/compression.c15
-rw-r--r--fs/btrfs/ctree.h6
-rw-r--r--fs/btrfs/disk-io.c38
-rw-r--r--fs/btrfs/export.c76
-rw-r--r--fs/btrfs/extent-tree.c2
-rw-r--r--fs/btrfs/extent_io.c77
-rw-r--r--fs/btrfs/extent_io.h3
-rw-r--r--fs/btrfs/file.c7
-rw-r--r--fs/btrfs/inode.c294
-rw-r--r--fs/btrfs/ioctl.c31
-rw-r--r--fs/btrfs/ordered-data.c67
-rw-r--r--fs/btrfs/ordered-data.h3
-rw-r--r--fs/btrfs/super.c41
-rw-r--r--fs/btrfs/transaction.c5
-rw-r--r--fs/btrfs/tree-log.c21
-rw-r--r--fs/gfs2/quota.c15
-rw-r--r--fs/ioprio.c31
-rw-r--r--fs/nfs/dir.c62
-rw-r--r--fs/nfs/direct.c2
-rw-r--r--fs/nfs/internal.h9
-rw-r--r--fs/nfs/nfs2xdr.c4
-rw-r--r--fs/nfs/nfs3xdr.c4
-rw-r--r--fs/nfs/nfs4xdr.c6
-rw-r--r--fs/nilfs2/dat.c2
-rw-r--r--fs/nilfs2/ioctl.c4
-rw-r--r--fs/pipe.c14
-rw-r--r--fs/splice.c24
-rw-r--r--include/linux/dmar.h17
-rw-r--r--include/linux/hw_breakpoint.h4
-rw-r--r--include/linux/mmc/host.h1
-rw-r--r--include/linux/nfs_xdr.h1
-rw-r--r--include/linux/pci_ids.h1
-rw-r--r--include/linux/perf_event.h30
-rw-r--r--include/linux/pipe_fs_i.h1
-rw-r--r--include/sound/sh_fsi.h6
-rw-r--r--kernel/hw_breakpoint.c3
-rw-r--r--kernel/irq_work.c4
-rw-r--r--kernel/module.c12
-rw-r--r--kernel/perf_event.c93
-rw-r--r--kernel/posix-cpu-timers.c12
-rw-r--r--kernel/sched_fair.c8
-rw-r--r--kernel/trace/trace.c19
-rw-r--r--net/sunrpc/clnt.c24
-rw-r--r--sound/pci/hda/patch_realtek.c56
-rw-r--r--sound/soc/atmel/sam9g20_wm8731.c9
-rw-r--r--sound/soc/atmel/snd-soc-afeb9260.c1
-rw-r--r--sound/soc/codecs/max98088.c3
-rw-r--r--sound/soc/codecs/stac9766.c1
-rw-r--r--sound/soc/codecs/tlv320aic3x.c6
-rw-r--r--sound/soc/codecs/tpa6130a2.c4
-rw-r--r--sound/soc/codecs/wm8523.c1
-rw-r--r--sound/soc/codecs/wm8904.c2
-rw-r--r--sound/soc/codecs/wm8961.c4
-rw-r--r--sound/soc/codecs/wm8994.c4
-rw-r--r--sound/soc/davinci/davinci-vcif.c3
-rw-r--r--sound/soc/ep93xx/simone.c18
-rw-r--r--sound/soc/fsl/efika-audio-fabric.c1
-rw-r--r--sound/soc/fsl/mpc5200_dma.c1
-rw-r--r--sound/soc/fsl/pcm030-audio-fabric.c1
-rw-r--r--sound/soc/imx/imx-ssi.c15
-rw-r--r--sound/soc/imx/phycore-ac97.c28
-rw-r--r--sound/soc/nuc900/nuc900-ac97.c2
-rw-r--r--sound/soc/omap/omap3pandora.c1
-rw-r--r--sound/soc/omap/osk5912.c11
-rw-r--r--sound/soc/pxa/Kconfig1
-rw-r--r--sound/soc/s3c24xx/smdk_spdif.c4
-rw-r--r--sound/soc/sh/fsi.c25
-rw-r--r--sound/soc/sh/ssi.c2
-rw-r--r--tools/perf/builtin-record.c17
-rw-r--r--tools/perf/util/symbol.c4
149 files changed, 8907 insertions, 881 deletions
diff --git a/Documentation/edac.txt b/Documentation/edac.txt
index 0b875e8da969..9ee774de57cd 100644
--- a/Documentation/edac.txt
+++ b/Documentation/edac.txt
@@ -196,7 +196,7 @@ csrow3.
196The representation of the above is reflected in the directory tree 196The representation of the above is reflected in the directory tree
197in EDAC's sysfs interface. Starting in directory 197in EDAC's sysfs interface. Starting in directory
198/sys/devices/system/edac/mc each memory controller will be represented 198/sys/devices/system/edac/mc each memory controller will be represented
199by its own 'mcX' directory, where 'X" is the index of the MC. 199by its own 'mcX' directory, where 'X' is the index of the MC.
200 200
201 201
202 ..../edac/mc/ 202 ..../edac/mc/
@@ -207,7 +207,7 @@ by its own 'mcX' directory, where 'X" is the index of the MC.
207 .... 207 ....
208 208
209Under each 'mcX' directory each 'csrowX' is again represented by a 209Under each 'mcX' directory each 'csrowX' is again represented by a
210'csrowX', where 'X" is the csrow index: 210'csrowX', where 'X' is the csrow index:
211 211
212 212
213 .../mc/mc0/ 213 .../mc/mc0/
@@ -232,7 +232,7 @@ EDAC control and attribute files.
232 232
233 233
234In 'mcX' directories are EDAC control and attribute files for 234In 'mcX' directories are EDAC control and attribute files for
235this 'X" instance of the memory controllers: 235this 'X' instance of the memory controllers:
236 236
237 237
238Counter reset control file: 238Counter reset control file:
@@ -343,7 +343,7 @@ Sdram memory scrubbing rate:
343'csrowX' DIRECTORIES 343'csrowX' DIRECTORIES
344 344
345In the 'csrowX' directories are EDAC control and attribute files for 345In the 'csrowX' directories are EDAC control and attribute files for
346this 'X" instance of csrow: 346this 'X' instance of csrow:
347 347
348 348
349Total Uncorrectable Errors count attribute file: 349Total Uncorrectable Errors count attribute file:
diff --git a/MAINTAINERS b/MAINTAINERS
index 98fc0450d7e1..b3be8b3d0437 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5839,6 +5839,8 @@ M: Chris Metcalf <cmetcalf@tilera.com>
5839W: http://www.tilera.com/scm/ 5839W: http://www.tilera.com/scm/
5840S: Supported 5840S: Supported
5841F: arch/tile/ 5841F: arch/tile/
5842F: drivers/char/hvc_tile.c
5843F: drivers/net/tile/
5842 5844
5843TLAN NETWORK DRIVER 5845TLAN NETWORK DRIVER
5844M: Samuel Chessman <chessman@tux.org> 5846M: Samuel Chessman <chessman@tux.org>
diff --git a/arch/arm/mach-s3c2410/h1940-bluetooth.c b/arch/arm/mach-s3c2410/h1940-bluetooth.c
index 8aa2f1902a94..6b86a722a7db 100644
--- a/arch/arm/mach-s3c2410/h1940-bluetooth.c
+++ b/arch/arm/mach-s3c2410/h1940-bluetooth.c
@@ -77,13 +77,13 @@ static int __devinit h1940bt_probe(struct platform_device *pdev)
77 77
78 /* Configures BT serial port GPIOs */ 78 /* Configures BT serial port GPIOs */
79 s3c_gpio_cfgpin(S3C2410_GPH(0), S3C2410_GPH0_nCTS0); 79 s3c_gpio_cfgpin(S3C2410_GPH(0), S3C2410_GPH0_nCTS0);
80 s3c_gpio_cfgpull(S3C2410_GPH(0), S3C_GPIO_PULL_NONE); 80 s3c_gpio_setpull(S3C2410_GPH(0), S3C_GPIO_PULL_NONE);
81 s3c_gpio_cfgpin(S3C2410_GPH(1), S3C2410_GPIO_OUTPUT); 81 s3c_gpio_cfgpin(S3C2410_GPH(1), S3C2410_GPIO_OUTPUT);
82 s3c_gpio_cfgpull(S3C2410_GPH(1), S3C_GPIO_PULL_NONE); 82 s3c_gpio_setpull(S3C2410_GPH(1), S3C_GPIO_PULL_NONE);
83 s3c_gpio_cfgpin(S3C2410_GPH(2), S3C2410_GPH2_TXD0); 83 s3c_gpio_cfgpin(S3C2410_GPH(2), S3C2410_GPH2_TXD0);
84 s3c_gpio_cfgpull(S3C2410_GPH(2), S3C_GPIO_PULL_NONE); 84 s3c_gpio_setpull(S3C2410_GPH(2), S3C_GPIO_PULL_NONE);
85 s3c_gpio_cfgpin(S3C2410_GPH(3), S3C2410_GPH3_RXD0); 85 s3c_gpio_cfgpin(S3C2410_GPH(3), S3C2410_GPH3_RXD0);
86 s3c_gpio_cfgpull(S3C2410_GPH(3), S3C_GPIO_PULL_NONE); 86 s3c_gpio_setpull(S3C2410_GPH(3), S3C_GPIO_PULL_NONE);
87 87
88 88
89 rfk = rfkill_alloc(DRV_NAME, &pdev->dev, RFKILL_TYPE_BLUETOOTH, 89 rfk = rfkill_alloc(DRV_NAME, &pdev->dev, RFKILL_TYPE_BLUETOOTH,
diff --git a/arch/arm/mach-s3c2416/irq.c b/arch/arm/mach-s3c2416/irq.c
index 084d121f368c..00174daf1526 100644
--- a/arch/arm/mach-s3c2416/irq.c
+++ b/arch/arm/mach-s3c2416/irq.c
@@ -168,12 +168,11 @@ static struct irq_chip s3c2416_irq_dma = {
168 168
169static void s3c2416_irq_demux_uart3(unsigned int irq, struct irq_desc *desc) 169static void s3c2416_irq_demux_uart3(unsigned int irq, struct irq_desc *desc)
170{ 170{
171 s3c2416_irq_demux(IRQ_S3C2443_UART3, 3); 171 s3c2416_irq_demux(IRQ_S3C2443_RX3, 3);
172} 172}
173 173
174#define INTMSK_UART3 (1UL << (IRQ_S3C2443_UART3 - IRQ_EINT0)) 174#define INTMSK_UART3 (1UL << (IRQ_S3C2443_UART3 - IRQ_EINT0))
175#define SUBMSK_UART3 (0xf << (IRQ_S3C2443_RX3 - S3C2410_IRQSUB(0))) 175#define SUBMSK_UART3 (0x7 << (IRQ_S3C2443_RX3 - S3C2410_IRQSUB(0)))
176
177 176
178static void s3c2416_irq_uart3_mask(unsigned int irqno) 177static void s3c2416_irq_uart3_mask(unsigned int irqno)
179{ 178{
diff --git a/arch/arm/mach-s3c2443/irq.c b/arch/arm/mach-s3c2443/irq.c
index 0e0d693f3974..893424767ce1 100644
--- a/arch/arm/mach-s3c2443/irq.c
+++ b/arch/arm/mach-s3c2443/irq.c
@@ -166,12 +166,11 @@ static struct irq_chip s3c2443_irq_dma = {
166 166
167static void s3c2443_irq_demux_uart3(unsigned int irq, struct irq_desc *desc) 167static void s3c2443_irq_demux_uart3(unsigned int irq, struct irq_desc *desc)
168{ 168{
169 s3c2443_irq_demux(IRQ_S3C2443_UART3, 3); 169 s3c2443_irq_demux(IRQ_S3C2443_RX3, 3);
170} 170}
171 171
172#define INTMSK_UART3 (1UL << (IRQ_S3C2443_UART3 - IRQ_EINT0)) 172#define INTMSK_UART3 (1UL << (IRQ_S3C2443_UART3 - IRQ_EINT0))
173#define SUBMSK_UART3 (0xf << (IRQ_S3C2443_RX3 - S3C2410_IRQSUB(0))) 173#define SUBMSK_UART3 (0x7 << (IRQ_S3C2443_RX3 - S3C2410_IRQSUB(0)))
174
175 174
176static void s3c2443_irq_uart3_mask(unsigned int irqno) 175static void s3c2443_irq_uart3_mask(unsigned int irqno)
177{ 176{
diff --git a/arch/arm/mach-s3c64xx/mach-mini6410.c b/arch/arm/mach-s3c64xx/mach-mini6410.c
index 249c62956471..89f35e02e883 100644
--- a/arch/arm/mach-s3c64xx/mach-mini6410.c
+++ b/arch/arm/mach-s3c64xx/mach-mini6410.c
@@ -45,7 +45,7 @@
45 45
46#include <video/platform_lcd.h> 46#include <video/platform_lcd.h>
47 47
48#define UCON (S3C2410_UCON_DEFAULT | S3C2410_UCON_UCLK) 48#define UCON S3C2410_UCON_DEFAULT
49#define ULCON (S3C2410_LCON_CS8 | S3C2410_LCON_PNONE | S3C2410_LCON_STOPB) 49#define ULCON (S3C2410_LCON_CS8 | S3C2410_LCON_PNONE | S3C2410_LCON_STOPB)
50#define UFCON (S3C2410_UFCON_RXTRIG8 | S3C2410_UFCON_FIFOMODE) 50#define UFCON (S3C2410_UFCON_RXTRIG8 | S3C2410_UFCON_FIFOMODE)
51 51
diff --git a/arch/arm/mach-s3c64xx/mach-real6410.c b/arch/arm/mach-s3c64xx/mach-real6410.c
index f9ef9b5c5f5a..4957ab0a0d4a 100644
--- a/arch/arm/mach-s3c64xx/mach-real6410.c
+++ b/arch/arm/mach-s3c64xx/mach-real6410.c
@@ -46,7 +46,7 @@
46 46
47#include <video/platform_lcd.h> 47#include <video/platform_lcd.h>
48 48
49#define UCON (S3C2410_UCON_DEFAULT | S3C2410_UCON_UCLK) 49#define UCON S3C2410_UCON_DEFAULT
50#define ULCON (S3C2410_LCON_CS8 | S3C2410_LCON_PNONE | S3C2410_LCON_STOPB) 50#define ULCON (S3C2410_LCON_CS8 | S3C2410_LCON_PNONE | S3C2410_LCON_STOPB)
51#define UFCON (S3C2410_UFCON_RXTRIG8 | S3C2410_UFCON_FIFOMODE) 51#define UFCON (S3C2410_UFCON_RXTRIG8 | S3C2410_UFCON_FIFOMODE)
52 52
diff --git a/arch/arm/mach-s5pv210/mach-smdkc110.c b/arch/arm/mach-s5pv210/mach-smdkc110.c
index 0ad7924fe62e..5dd1681c069e 100644
--- a/arch/arm/mach-s5pv210/mach-smdkc110.c
+++ b/arch/arm/mach-s5pv210/mach-smdkc110.c
@@ -13,6 +13,7 @@
13#include <linux/init.h> 13#include <linux/init.h>
14#include <linux/serial_core.h> 14#include <linux/serial_core.h>
15#include <linux/i2c.h> 15#include <linux/i2c.h>
16#include <linux/sysdev.h>
16 17
17#include <asm/mach/arch.h> 18#include <asm/mach/arch.h>
18#include <asm/mach/map.h> 19#include <asm/mach/map.h>
diff --git a/arch/arm/mach-s5pv210/mach-smdkv210.c b/arch/arm/mach-s5pv210/mach-smdkv210.c
index bcd7a5d53401..1fbc45b2a432 100644
--- a/arch/arm/mach-s5pv210/mach-smdkv210.c
+++ b/arch/arm/mach-s5pv210/mach-smdkv210.c
@@ -13,6 +13,7 @@
13#include <linux/i2c.h> 13#include <linux/i2c.h>
14#include <linux/init.h> 14#include <linux/init.h>
15#include <linux/serial_core.h> 15#include <linux/serial_core.h>
16#include <linux/sysdev.h>
16 17
17#include <asm/mach/arch.h> 18#include <asm/mach/arch.h>
18#include <asm/mach/map.h> 19#include <asm/mach/map.h>
diff --git a/arch/arm/mach-shmobile/board-ap4evb.c b/arch/arm/mach-shmobile/board-ap4evb.c
index d3260542b943..d440e5f456ad 100644
--- a/arch/arm/mach-shmobile/board-ap4evb.c
+++ b/arch/arm/mach-shmobile/board-ap4evb.c
@@ -567,38 +567,127 @@ static struct platform_device *qhd_devices[] __initdata = {
567 567
568/* FSI */ 568/* FSI */
569#define IRQ_FSI evt2irq(0x1840) 569#define IRQ_FSI evt2irq(0x1840)
570static int __fsi_set_rate(struct clk *clk, long rate, int enable)
571{
572 int ret = 0;
573
574 if (rate <= 0)
575 return ret;
576
577 if (enable) {
578 ret = clk_set_rate(clk, rate);
579 if (0 == ret)
580 ret = clk_enable(clk);
581 } else {
582 clk_disable(clk);
583 }
584
585 return ret;
586}
587
588static int __fsi_set_round_rate(struct clk *clk, long rate, int enable)
589{
590 return __fsi_set_rate(clk, clk_round_rate(clk, rate), enable);
591}
570 592
571static int fsi_set_rate(int is_porta, int rate) 593static int fsi_ak4642_set_rate(struct device *dev, int rate, int enable)
594{
595 struct clk *fsia_ick;
596 struct clk *fsiack;
597 int ret = -EIO;
598
599 fsia_ick = clk_get(dev, "icka");
600 if (IS_ERR(fsia_ick))
601 return PTR_ERR(fsia_ick);
602
603 /*
604 * FSIACK is connected to AK4642,
605 * and use external clock pin from it.
606 * it is parent of fsia_ick now.
607 */
608 fsiack = clk_get_parent(fsia_ick);
609 if (!fsiack)
610 goto fsia_ick_out;
611
612 /*
613 * we get 1/1 divided clock by setting same rate to fsiack and fsia_ick
614 *
615 ** FIXME **
616 * Because the freq_table of external clk (fsiack) are all 0,
617 * the return value of clk_round_rate became 0.
618 * So, it use __fsi_set_rate here.
619 */
620 ret = __fsi_set_rate(fsiack, rate, enable);
621 if (ret < 0)
622 goto fsiack_out;
623
624 ret = __fsi_set_round_rate(fsia_ick, rate, enable);
625 if ((ret < 0) && enable)
626 __fsi_set_round_rate(fsiack, rate, 0); /* disable FSI ACK */
627
628fsiack_out:
629 clk_put(fsiack);
630
631fsia_ick_out:
632 clk_put(fsia_ick);
633
634 return 0;
635}
636
637static int fsi_hdmi_set_rate(struct device *dev, int rate, int enable)
572{ 638{
573 struct clk *fsib_clk; 639 struct clk *fsib_clk;
574 struct clk *fdiv_clk = &sh7372_fsidivb_clk; 640 struct clk *fdiv_clk = &sh7372_fsidivb_clk;
641 long fsib_rate = 0;
642 long fdiv_rate = 0;
643 int ackmd_bpfmd;
575 int ret; 644 int ret;
576 645
577 /* set_rate is not needed if port A */
578 if (is_porta)
579 return 0;
580
581 fsib_clk = clk_get(NULL, "fsib_clk");
582 if (IS_ERR(fsib_clk))
583 return -EINVAL;
584
585 switch (rate) { 646 switch (rate) {
586 case 44100: 647 case 44100:
587 clk_set_rate(fsib_clk, clk_round_rate(fsib_clk, 11283000)); 648 fsib_rate = rate * 256;
588 ret = SH_FSI_ACKMD_256 | SH_FSI_BPFMD_64; 649 ackmd_bpfmd = SH_FSI_ACKMD_256 | SH_FSI_BPFMD_64;
589 break; 650 break;
590 case 48000: 651 case 48000:
591 clk_set_rate(fsib_clk, clk_round_rate(fsib_clk, 85428000)); 652 fsib_rate = 85428000; /* around 48kHz x 256 x 7 */
592 clk_set_rate(fdiv_clk, clk_round_rate(fdiv_clk, 12204000)); 653 fdiv_rate = rate * 256;
593 ret = SH_FSI_ACKMD_256 | SH_FSI_BPFMD_64; 654 ackmd_bpfmd = SH_FSI_ACKMD_256 | SH_FSI_BPFMD_64;
594 break; 655 break;
595 default: 656 default:
596 pr_err("unsupported rate in FSI2 port B\n"); 657 pr_err("unsupported rate in FSI2 port B\n");
597 ret = -EINVAL; 658 return -EINVAL;
598 break;
599 } 659 }
600 660
661 /* FSI B setting */
662 fsib_clk = clk_get(dev, "ickb");
663 if (IS_ERR(fsib_clk))
664 return -EIO;
665
666 ret = __fsi_set_round_rate(fsib_clk, fsib_rate, enable);
601 clk_put(fsib_clk); 667 clk_put(fsib_clk);
668 if (ret < 0)
669 return ret;
670
671 /* FSI DIV setting */
672 ret = __fsi_set_round_rate(fdiv_clk, fdiv_rate, enable);
673 if (ret < 0) {
674 /* disable FSI B */
675 if (enable)
676 __fsi_set_round_rate(fsib_clk, fsib_rate, 0);
677 return ret;
678 }
679
680 return ackmd_bpfmd;
681}
682
683static int fsi_set_rate(struct device *dev, int is_porta, int rate, int enable)
684{
685 int ret;
686
687 if (is_porta)
688 ret = fsi_ak4642_set_rate(dev, rate, enable);
689 else
690 ret = fsi_hdmi_set_rate(dev, rate, enable);
602 691
603 return ret; 692 return ret;
604} 693}
@@ -880,6 +969,11 @@ static int __init hdmi_init_pm_clock(void)
880 goto out; 969 goto out;
881 } 970 }
882 971
972 ret = clk_enable(&sh7372_pllc2_clk);
973 if (ret < 0) {
974 pr_err("Cannot enable pllc2 clock\n");
975 goto out;
976 }
883 pr_debug("PLLC2 set frequency %lu\n", rate); 977 pr_debug("PLLC2 set frequency %lu\n", rate);
884 978
885 ret = clk_set_parent(hdmi_ick, &sh7372_pllc2_clk); 979 ret = clk_set_parent(hdmi_ick, &sh7372_pllc2_clk);
@@ -896,23 +990,11 @@ out:
896 990
897device_initcall(hdmi_init_pm_clock); 991device_initcall(hdmi_init_pm_clock);
898 992
899#define FSIACK_DUMMY_RATE 48000
900static int __init fsi_init_pm_clock(void) 993static int __init fsi_init_pm_clock(void)
901{ 994{
902 struct clk *fsia_ick; 995 struct clk *fsia_ick;
903 int ret; 996 int ret;
904 997
905 /*
906 * FSIACK is connected to AK4642,
907 * and the rate is depend on playing sound rate.
908 * So, set dummy rate (= 48k) here
909 */
910 ret = clk_set_rate(&sh7372_fsiack_clk, FSIACK_DUMMY_RATE);
911 if (ret < 0) {
912 pr_err("Cannot set FSIACK dummy rate: %d\n", ret);
913 return ret;
914 }
915
916 fsia_ick = clk_get(&fsi_device.dev, "icka"); 998 fsia_ick = clk_get(&fsi_device.dev, "icka");
917 if (IS_ERR(fsia_ick)) { 999 if (IS_ERR(fsia_ick)) {
918 ret = PTR_ERR(fsia_ick); 1000 ret = PTR_ERR(fsia_ick);
@@ -921,16 +1003,9 @@ static int __init fsi_init_pm_clock(void)
921 } 1003 }
922 1004
923 ret = clk_set_parent(fsia_ick, &sh7372_fsiack_clk); 1005 ret = clk_set_parent(fsia_ick, &sh7372_fsiack_clk);
924 if (ret < 0) {
925 pr_err("Cannot set FSI-A parent: %d\n", ret);
926 goto out;
927 }
928
929 ret = clk_set_rate(fsia_ick, FSIACK_DUMMY_RATE);
930 if (ret < 0) 1006 if (ret < 0)
931 pr_err("Cannot set FSI-A rate: %d\n", ret); 1007 pr_err("Cannot set FSI-A parent: %d\n", ret);
932 1008
933out:
934 clk_put(fsia_ick); 1009 clk_put(fsia_ick);
935 1010
936 return ret; 1011 return ret;
diff --git a/arch/arm/mach-shmobile/clock-sh7372.c b/arch/arm/mach-shmobile/clock-sh7372.c
index b25ce90a346e..3aa026069435 100644
--- a/arch/arm/mach-shmobile/clock-sh7372.c
+++ b/arch/arm/mach-shmobile/clock-sh7372.c
@@ -229,21 +229,13 @@ static int pllc2_set_rate(struct clk *clk, unsigned long rate)
229 if (idx < 0) 229 if (idx < 0)
230 return idx; 230 return idx;
231 231
232 if (rate == clk->parent->rate) { 232 if (rate == clk->parent->rate)
233 pllc2_disable(clk); 233 return -EINVAL;
234 return 0;
235 }
236 234
237 value = __raw_readl(PLLC2CR) & ~(0x3f << 24); 235 value = __raw_readl(PLLC2CR) & ~(0x3f << 24);
238 236
239 if (value & 0x80000000)
240 pllc2_disable(clk);
241
242 __raw_writel((value & ~0x80000000) | ((idx + 19) << 24), PLLC2CR); 237 __raw_writel((value & ~0x80000000) | ((idx + 19) << 24), PLLC2CR);
243 238
244 if (value & 0x80000000)
245 return pllc2_enable(clk);
246
247 return 0; 239 return 0;
248} 240}
249 241
@@ -452,10 +444,8 @@ static int fsidiv_enable(struct clk *clk)
452 unsigned long value; 444 unsigned long value;
453 445
454 value = __raw_readl(clk->mapping->base) >> 16; 446 value = __raw_readl(clk->mapping->base) >> 16;
455 if (value < 2) { 447 if (value < 2)
456 fsidiv_disable(clk); 448 return -EIO;
457 return -ENOENT;
458 }
459 449
460 __raw_writel((value << 16) | 0x3, clk->mapping->base); 450 __raw_writel((value << 16) | 0x3, clk->mapping->base);
461 451
@@ -466,17 +456,12 @@ static int fsidiv_set_rate(struct clk *clk, unsigned long rate)
466{ 456{
467 int idx; 457 int idx;
468 458
469 if (clk->parent->rate == rate) {
470 fsidiv_disable(clk);
471 return 0;
472 }
473
474 idx = (clk->parent->rate / rate) & 0xffff; 459 idx = (clk->parent->rate / rate) & 0xffff;
475 if (idx < 2) 460 if (idx < 2)
476 return -ENOENT; 461 return -EINVAL;
477 462
478 __raw_writel(idx << 16, clk->mapping->base); 463 __raw_writel(idx << 16, clk->mapping->base);
479 return fsidiv_enable(clk); 464 return 0;
480} 465}
481 466
482static struct clk_ops fsidiv_clk_ops = { 467static struct clk_ops fsidiv_clk_ops = {
@@ -607,8 +592,6 @@ static struct clk_lookup lookups[] = {
607 CLKDEV_CON_ID("vck3_clk", &div6_clks[DIV6_VCK3]), 592 CLKDEV_CON_ID("vck3_clk", &div6_clks[DIV6_VCK3]),
608 CLKDEV_CON_ID("fmsi_clk", &div6_clks[DIV6_FMSI]), 593 CLKDEV_CON_ID("fmsi_clk", &div6_clks[DIV6_FMSI]),
609 CLKDEV_CON_ID("fmso_clk", &div6_clks[DIV6_FMSO]), 594 CLKDEV_CON_ID("fmso_clk", &div6_clks[DIV6_FMSO]),
610 CLKDEV_CON_ID("fsia_clk", &div6_reparent_clks[DIV6_FSIA]),
611 CLKDEV_CON_ID("fsib_clk", &div6_reparent_clks[DIV6_FSIB]),
612 CLKDEV_CON_ID("sub_clk", &div6_clks[DIV6_SUB]), 595 CLKDEV_CON_ID("sub_clk", &div6_clks[DIV6_SUB]),
613 CLKDEV_CON_ID("spu_clk", &div6_clks[DIV6_SPU]), 596 CLKDEV_CON_ID("spu_clk", &div6_clks[DIV6_SPU]),
614 CLKDEV_CON_ID("vou_clk", &div6_clks[DIV6_VOU]), 597 CLKDEV_CON_ID("vou_clk", &div6_clks[DIV6_VOU]),
@@ -645,8 +628,8 @@ static struct clk_lookup lookups[] = {
645 CLKDEV_DEV_ID("sh_cmt.10", &mstp_clks[MSTP329]), /* CMT10 */ 628 CLKDEV_DEV_ID("sh_cmt.10", &mstp_clks[MSTP329]), /* CMT10 */
646 CLKDEV_DEV_ID("sh_fsi2", &mstp_clks[MSTP328]), /* FSI2 */ 629 CLKDEV_DEV_ID("sh_fsi2", &mstp_clks[MSTP328]), /* FSI2 */
647 CLKDEV_DEV_ID("i2c-sh_mobile.1", &mstp_clks[MSTP323]), /* IIC1 */ 630 CLKDEV_DEV_ID("i2c-sh_mobile.1", &mstp_clks[MSTP323]), /* IIC1 */
648 CLKDEV_DEV_ID("r8a66597_hcd.0", &mstp_clks[MSTP323]), /* USB0 */ 631 CLKDEV_DEV_ID("r8a66597_hcd.0", &mstp_clks[MSTP322]), /* USB0 */
649 CLKDEV_DEV_ID("r8a66597_udc.0", &mstp_clks[MSTP323]), /* USB0 */ 632 CLKDEV_DEV_ID("r8a66597_udc.0", &mstp_clks[MSTP322]), /* USB0 */
650 CLKDEV_DEV_ID("sh_mobile_sdhi.0", &mstp_clks[MSTP314]), /* SDHI0 */ 633 CLKDEV_DEV_ID("sh_mobile_sdhi.0", &mstp_clks[MSTP314]), /* SDHI0 */
651 CLKDEV_DEV_ID("sh_mobile_sdhi.1", &mstp_clks[MSTP313]), /* SDHI1 */ 634 CLKDEV_DEV_ID("sh_mobile_sdhi.1", &mstp_clks[MSTP313]), /* SDHI1 */
652 CLKDEV_DEV_ID("sh_mmcif.0", &mstp_clks[MSTP312]), /* MMC */ 635 CLKDEV_DEV_ID("sh_mmcif.0", &mstp_clks[MSTP312]), /* MMC */
diff --git a/arch/arm/plat-pxa/include/plat/sdhci.h b/arch/arm/plat-pxa/include/plat/sdhci.h
index e49c5b6fc4e2..1ab332e37d7d 100644
--- a/arch/arm/plat-pxa/include/plat/sdhci.h
+++ b/arch/arm/plat-pxa/include/plat/sdhci.h
@@ -17,6 +17,9 @@
17/* Require clock free running */ 17/* Require clock free running */
18#define PXA_FLAG_DISABLE_CLOCK_GATING (1<<0) 18#define PXA_FLAG_DISABLE_CLOCK_GATING (1<<0)
19 19
20/* Board design supports 8-bit data on SD/SDIO BUS */
21#define PXA_FLAG_SD_8_BIT_CAPABLE_SLOT (1<<2)
22
20/* 23/*
21 * struct pxa_sdhci_platdata() - Platform device data for PXA SDHCI 24 * struct pxa_sdhci_platdata() - Platform device data for PXA SDHCI
22 * @max_speed: the maximum speed supported 25 * @max_speed: the maximum speed supported
diff --git a/arch/arm/plat-s3c24xx/spi-bus0-gpe11_12_13.c b/arch/arm/plat-s3c24xx/spi-bus0-gpe11_12_13.c
index 9793544a6ace..704175b0573f 100644
--- a/arch/arm/plat-s3c24xx/spi-bus0-gpe11_12_13.c
+++ b/arch/arm/plat-s3c24xx/spi-bus0-gpe11_12_13.c
@@ -29,8 +29,8 @@ void s3c24xx_spi_gpiocfg_bus0_gpe11_12_13(struct s3c2410_spi_info *spi,
29 } else { 29 } else {
30 s3c_gpio_cfgpin(S3C2410_GPE(13), S3C2410_GPIO_INPUT); 30 s3c_gpio_cfgpin(S3C2410_GPE(13), S3C2410_GPIO_INPUT);
31 s3c_gpio_cfgpin(S3C2410_GPE(11), S3C2410_GPIO_INPUT); 31 s3c_gpio_cfgpin(S3C2410_GPE(11), S3C2410_GPIO_INPUT);
32 s3c_gpio_cfgpull(S3C2410_GPE(11), S3C_GPIO_PULL_NONE); 32 s3c_gpio_setpull(S3C2410_GPE(11), S3C_GPIO_PULL_NONE);
33 s3c_gpio_cfgpull(S3C2410_GPE(12), S3C_GPIO_PULL_NONE); 33 s3c_gpio_setpull(S3C2410_GPE(12), S3C_GPIO_PULL_NONE);
34 s3c_gpio_cfgpull(S3C2410_GPE(13), S3C_GPIO_PULL_NONE); 34 s3c_gpio_setpull(S3C2410_GPE(13), S3C_GPIO_PULL_NONE);
35 } 35 }
36} 36}
diff --git a/arch/arm/plat-s3c24xx/spi-bus1-gpd8_9_10.c b/arch/arm/plat-s3c24xx/spi-bus1-gpd8_9_10.c
index db9e9e477ec1..72457afd6255 100644
--- a/arch/arm/plat-s3c24xx/spi-bus1-gpd8_9_10.c
+++ b/arch/arm/plat-s3c24xx/spi-bus1-gpd8_9_10.c
@@ -31,8 +31,8 @@ void s3c24xx_spi_gpiocfg_bus1_gpd8_9_10(struct s3c2410_spi_info *spi,
31 } else { 31 } else {
32 s3c_gpio_cfgpin(S3C2410_GPD(8), S3C2410_GPIO_INPUT); 32 s3c_gpio_cfgpin(S3C2410_GPD(8), S3C2410_GPIO_INPUT);
33 s3c_gpio_cfgpin(S3C2410_GPD(9), S3C2410_GPIO_INPUT); 33 s3c_gpio_cfgpin(S3C2410_GPD(9), S3C2410_GPIO_INPUT);
34 s3c_gpio_cfgpull(S3C2410_GPD(10), S3C_GPIO_PULL_NONE); 34 s3c_gpio_setpull(S3C2410_GPD(10), S3C_GPIO_PULL_NONE);
35 s3c_gpio_cfgpull(S3C2410_GPD(9), S3C_GPIO_PULL_NONE); 35 s3c_gpio_setpull(S3C2410_GPD(9), S3C_GPIO_PULL_NONE);
36 s3c_gpio_cfgpull(S3C2410_GPD(8), S3C_GPIO_PULL_NONE); 36 s3c_gpio_setpull(S3C2410_GPD(8), S3C_GPIO_PULL_NONE);
37 } 37 }
38} 38}
diff --git a/arch/arm/plat-s3c24xx/spi-bus1-gpg5_6_7.c b/arch/arm/plat-s3c24xx/spi-bus1-gpg5_6_7.c
index 8ea663a438bb..c3972b645d13 100644
--- a/arch/arm/plat-s3c24xx/spi-bus1-gpg5_6_7.c
+++ b/arch/arm/plat-s3c24xx/spi-bus1-gpg5_6_7.c
@@ -29,8 +29,8 @@ void s3c24xx_spi_gpiocfg_bus1_gpg5_6_7(struct s3c2410_spi_info *spi,
29 } else { 29 } else {
30 s3c_gpio_cfgpin(S3C2410_GPG(7), S3C2410_GPIO_INPUT); 30 s3c_gpio_cfgpin(S3C2410_GPG(7), S3C2410_GPIO_INPUT);
31 s3c_gpio_cfgpin(S3C2410_GPG(5), S3C2410_GPIO_INPUT); 31 s3c_gpio_cfgpin(S3C2410_GPG(5), S3C2410_GPIO_INPUT);
32 s3c_gpio_cfgpull(S3C2410_GPG(5), S3C_GPIO_PULL_NONE); 32 s3c_gpio_setpull(S3C2410_GPG(5), S3C_GPIO_PULL_NONE);
33 s3c_gpio_cfgpull(S3C2410_GPG(6), S3C_GPIO_PULL_NONE); 33 s3c_gpio_setpull(S3C2410_GPG(6), S3C_GPIO_PULL_NONE);
34 s3c_gpio_cfgpull(S3C2410_GPG(7), S3C_GPIO_PULL_NONE); 34 s3c_gpio_setpull(S3C2410_GPG(7), S3C_GPIO_PULL_NONE);
35 } 35 }
36} 36}
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index ac151399ef34..1995c1712fc8 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -95,7 +95,6 @@ EXPORT_SYMBOL_GPL(s390_handle_mcck);
95static int notrace s390_revalidate_registers(struct mci *mci) 95static int notrace s390_revalidate_registers(struct mci *mci)
96{ 96{
97 int kill_task; 97 int kill_task;
98 u64 tmpclock;
99 u64 zero; 98 u64 zero;
100 void *fpt_save_area, *fpt_creg_save_area; 99 void *fpt_save_area, *fpt_creg_save_area;
101 100
@@ -214,11 +213,10 @@ static int notrace s390_revalidate_registers(struct mci *mci)
214 : "0", "cc"); 213 : "0", "cc");
215#endif 214#endif
216 /* Revalidate clock comparator register */ 215 /* Revalidate clock comparator register */
217 asm volatile( 216 if (S390_lowcore.clock_comparator == -1)
218 " stck 0(%1)\n" 217 set_clock_comparator(S390_lowcore.mcck_clock);
219 " sckc 0(%1)" 218 else
220 : "=m" (tmpclock) : "a" (&(tmpclock)) : "cc", "memory"); 219 set_clock_comparator(S390_lowcore.clock_comparator);
221
222 /* Check if old PSW is valid */ 220 /* Check if old PSW is valid */
223 if (!mci->wp) 221 if (!mci->wp)
224 /* 222 /*
diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c
index 752b362bf651..7c37ec359ec2 100644
--- a/arch/s390/lib/delay.c
+++ b/arch/s390/lib/delay.c
@@ -29,17 +29,21 @@ static void __udelay_disabled(unsigned long long usecs)
29{ 29{
30 unsigned long mask, cr0, cr0_saved; 30 unsigned long mask, cr0, cr0_saved;
31 u64 clock_saved; 31 u64 clock_saved;
32 u64 end;
32 33
34 mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_EXT;
35 end = get_clock() + (usecs << 12);
33 clock_saved = local_tick_disable(); 36 clock_saved = local_tick_disable();
34 set_clock_comparator(get_clock() + (usecs << 12));
35 __ctl_store(cr0_saved, 0, 0); 37 __ctl_store(cr0_saved, 0, 0);
36 cr0 = (cr0_saved & 0xffff00e0) | 0x00000800; 38 cr0 = (cr0_saved & 0xffff00e0) | 0x00000800;
37 __ctl_load(cr0 , 0, 0); 39 __ctl_load(cr0 , 0, 0);
38 mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_EXT;
39 lockdep_off(); 40 lockdep_off();
40 trace_hardirqs_on(); 41 do {
41 __load_psw_mask(mask); 42 set_clock_comparator(end);
42 local_irq_disable(); 43 trace_hardirqs_on();
44 __load_psw_mask(mask);
45 local_irq_disable();
46 } while (get_clock() < end);
43 lockdep_on(); 47 lockdep_on();
44 __ctl_load(cr0_saved, 0, 0); 48 __ctl_load(cr0_saved, 0, 0);
45 local_tick_enable(clock_saved); 49 local_tick_enable(clock_saved);
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index 07ec8a865c1d..e11b5fcb70eb 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -329,6 +329,18 @@ endmenu # Tilera-specific configuration
329 329
330menu "Bus options" 330menu "Bus options"
331 331
332config PCI
333 bool "PCI support"
334 default y
335 select PCI_DOMAINS
336 ---help---
337 Enable PCI root complex support, so PCIe endpoint devices can
338 be attached to the Tile chip. Many, but not all, PCI devices
339 are supported under Tilera's root complex driver.
340
341config PCI_DOMAINS
342 bool
343
332config NO_IOMEM 344config NO_IOMEM
333 def_bool !PCI 345 def_bool !PCI
334 346
diff --git a/arch/tile/include/asm/cacheflush.h b/arch/tile/include/asm/cacheflush.h
index c5741da4eeac..14a3f8556ace 100644
--- a/arch/tile/include/asm/cacheflush.h
+++ b/arch/tile/include/asm/cacheflush.h
@@ -137,4 +137,56 @@ static inline void finv_buffer(void *buffer, size_t size)
137 mb_incoherent(); 137 mb_incoherent();
138} 138}
139 139
140/*
141 * Flush & invalidate a VA range that is homed remotely on a single core,
142 * waiting until the memory controller holds the flushed values.
143 */
144static inline void finv_buffer_remote(void *buffer, size_t size)
145{
146 char *p;
147 int i;
148
149 /*
150 * Flush and invalidate the buffer out of the local L1/L2
151 * and request the home cache to flush and invalidate as well.
152 */
153 __finv_buffer(buffer, size);
154
155 /*
156 * Wait for the home cache to acknowledge that it has processed
157 * all the flush-and-invalidate requests. This does not mean
158 * that the flushed data has reached the memory controller yet,
159 * but it does mean the home cache is processing the flushes.
160 */
161 __insn_mf();
162
163 /*
164 * Issue a load to the last cache line, which can't complete
165 * until all the previously-issued flushes to the same memory
166 * controller have also completed. If we weren't striping
167 * memory, that one load would be sufficient, but since we may
168 * be, we also need to back up to the last load issued to
169 * another memory controller, which would be the point where
170 * we crossed an 8KB boundary (the granularity of striping
171 * across memory controllers). Keep backing up and doing this
172 * until we are before the beginning of the buffer, or have
173 * hit all the controllers.
174 */
175 for (i = 0, p = (char *)buffer + size - 1;
176 i < (1 << CHIP_LOG_NUM_MSHIMS()) && p >= (char *)buffer;
177 ++i) {
178 const unsigned long STRIPE_WIDTH = 8192;
179
180 /* Force a load instruction to issue. */
181 *(volatile char *)p;
182
183 /* Jump to end of previous stripe. */
184 p -= STRIPE_WIDTH;
185 p = (char *)((unsigned long)p | (STRIPE_WIDTH - 1));
186 }
187
188 /* Wait for the loads (and thus flushes) to have completed. */
189 __insn_mf();
190}
191
140#endif /* _ASM_TILE_CACHEFLUSH_H */ 192#endif /* _ASM_TILE_CACHEFLUSH_H */
diff --git a/arch/tile/include/asm/io.h b/arch/tile/include/asm/io.h
index ee43328713ab..d3cbb9b14cbe 100644
--- a/arch/tile/include/asm/io.h
+++ b/arch/tile/include/asm/io.h
@@ -55,9 +55,6 @@ extern void iounmap(volatile void __iomem *addr);
55#define ioremap_writethrough(physaddr, size) ioremap(physaddr, size) 55#define ioremap_writethrough(physaddr, size) ioremap(physaddr, size)
56#define ioremap_fullcache(physaddr, size) ioremap(physaddr, size) 56#define ioremap_fullcache(physaddr, size) ioremap(physaddr, size)
57 57
58void __iomem *ioport_map(unsigned long port, unsigned int len);
59extern inline void ioport_unmap(void __iomem *addr) {}
60
61#define mmiowb() 58#define mmiowb()
62 59
63/* Conversion between virtual and physical mappings. */ 60/* Conversion between virtual and physical mappings. */
@@ -189,12 +186,22 @@ static inline void memcpy_toio(volatile void __iomem *dst, const void *src,
189 * we never run, uses them unconditionally. 186 * we never run, uses them unconditionally.
190 */ 187 */
191 188
192static inline int ioport_panic(void) 189static inline long ioport_panic(void)
193{ 190{
194 panic("inb/outb and friends do not exist on tile"); 191 panic("inb/outb and friends do not exist on tile");
195 return 0; 192 return 0;
196} 193}
197 194
195static inline void __iomem *ioport_map(unsigned long port, unsigned int len)
196{
197 return (void __iomem *) ioport_panic();
198}
199
200static inline void ioport_unmap(void __iomem *addr)
201{
202 ioport_panic();
203}
204
198static inline u8 inb(unsigned long addr) 205static inline u8 inb(unsigned long addr)
199{ 206{
200 return ioport_panic(); 207 return ioport_panic();
diff --git a/arch/tile/include/asm/pci-bridge.h b/arch/tile/include/asm/pci-bridge.h
deleted file mode 100644
index e853b0e2793b..000000000000
--- a/arch/tile/include/asm/pci-bridge.h
+++ /dev/null
@@ -1,117 +0,0 @@
1/*
2 * Copyright 2010 Tilera Corporation. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for
12 * more details.
13 */
14
15#ifndef _ASM_TILE_PCI_BRIDGE_H
16#define _ASM_TILE_PCI_BRIDGE_H
17
18#include <linux/ioport.h>
19#include <linux/pci.h>
20
21struct device_node;
22struct pci_controller;
23
24/*
25 * pci_io_base returns the memory address at which you can access
26 * the I/O space for PCI bus number `bus' (or NULL on error).
27 */
28extern void __iomem *pci_bus_io_base(unsigned int bus);
29extern unsigned long pci_bus_io_base_phys(unsigned int bus);
30extern unsigned long pci_bus_mem_base_phys(unsigned int bus);
31
32/* Allocate a new PCI host bridge structure */
33extern struct pci_controller *pcibios_alloc_controller(void);
34
35/* Helper function for setting up resources */
36extern void pci_init_resource(struct resource *res, unsigned long start,
37 unsigned long end, int flags, char *name);
38
39/* Get the PCI host controller for a bus */
40extern struct pci_controller *pci_bus_to_hose(int bus);
41
42/*
43 * Structure of a PCI controller (host bridge)
44 */
45struct pci_controller {
46 int index; /* PCI domain number */
47 struct pci_bus *root_bus;
48
49 int first_busno;
50 int last_busno;
51
52 int hv_cfg_fd[2]; /* config{0,1} fds for this PCIe controller */
53 int hv_mem_fd; /* fd to Hypervisor for MMIO operations */
54
55 struct pci_ops *ops;
56
57 int irq_base; /* Base IRQ from the Hypervisor */
58 int plx_gen1; /* flag for PLX Gen 1 configuration */
59
60 /* Address ranges that are routed to this controller/bridge. */
61 struct resource mem_resources[3];
62};
63
64static inline struct pci_controller *pci_bus_to_host(struct pci_bus *bus)
65{
66 return bus->sysdata;
67}
68
69extern void setup_indirect_pci_nomap(struct pci_controller *hose,
70 void __iomem *cfg_addr, void __iomem *cfg_data);
71extern void setup_indirect_pci(struct pci_controller *hose,
72 u32 cfg_addr, u32 cfg_data);
73extern void setup_grackle(struct pci_controller *hose);
74
75extern unsigned char common_swizzle(struct pci_dev *, unsigned char *);
76
77/*
78 * The following code swizzles for exactly one bridge. The routine
79 * common_swizzle below handles multiple bridges. But there are a
80 * some boards that don't follow the PCI spec's suggestion so we
81 * break this piece out separately.
82 */
83static inline unsigned char bridge_swizzle(unsigned char pin,
84 unsigned char idsel)
85{
86 return (((pin-1) + idsel) % 4) + 1;
87}
88
89/*
90 * The following macro is used to lookup irqs in a standard table
91 * format for those PPC systems that do not already have PCI
92 * interrupts properly routed.
93 */
94/* FIXME - double check this */
95#define PCI_IRQ_TABLE_LOOKUP ({ \
96 long _ctl_ = -1; \
97 if (idsel >= min_idsel && idsel <= max_idsel && pin <= irqs_per_slot) \
98 _ctl_ = pci_irq_table[idsel - min_idsel][pin-1]; \
99 _ctl_; \
100})
101
102/*
103 * Scan the buses below a given PCI host bridge and assign suitable
104 * resources to all devices found.
105 */
106extern int pciauto_bus_scan(struct pci_controller *, int);
107
108#ifdef CONFIG_PCI
109extern unsigned long pci_address_to_pio(phys_addr_t address);
110#else
111static inline unsigned long pci_address_to_pio(phys_addr_t address)
112{
113 return (unsigned long)-1;
114}
115#endif
116
117#endif /* _ASM_TILE_PCI_BRIDGE_H */
diff --git a/arch/tile/include/asm/pci.h b/arch/tile/include/asm/pci.h
index b0c15da2d5d5..c3fc458a0d32 100644
--- a/arch/tile/include/asm/pci.h
+++ b/arch/tile/include/asm/pci.h
@@ -15,7 +15,29 @@
15#ifndef _ASM_TILE_PCI_H 15#ifndef _ASM_TILE_PCI_H
16#define _ASM_TILE_PCI_H 16#define _ASM_TILE_PCI_H
17 17
18#include <asm/pci-bridge.h> 18#include <linux/pci.h>
19
20/*
21 * Structure of a PCI controller (host bridge)
22 */
23struct pci_controller {
24 int index; /* PCI domain number */
25 struct pci_bus *root_bus;
26
27 int first_busno;
28 int last_busno;
29
30 int hv_cfg_fd[2]; /* config{0,1} fds for this PCIe controller */
31 int hv_mem_fd; /* fd to Hypervisor for MMIO operations */
32
33 struct pci_ops *ops;
34
35 int irq_base; /* Base IRQ from the Hypervisor */
36 int plx_gen1; /* flag for PLX Gen 1 configuration */
37
38 /* Address ranges that are routed to this controller/bridge. */
39 struct resource mem_resources[3];
40};
19 41
20/* 42/*
21 * The hypervisor maps the entirety of CPA-space as bus addresses, so 43 * The hypervisor maps the entirety of CPA-space as bus addresses, so
@@ -24,56 +46,12 @@
24 */ 46 */
25#define PCI_DMA_BUS_IS_PHYS 1 47#define PCI_DMA_BUS_IS_PHYS 1
26 48
27struct pci_controller *pci_bus_to_hose(int bus);
28unsigned char __init common_swizzle(struct pci_dev *dev, unsigned char *pinp);
29int __init tile_pci_init(void); 49int __init tile_pci_init(void);
30void pci_iounmap(struct pci_dev *dev, void __iomem *addr);
31void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max);
32void __devinit pcibios_fixup_bus(struct pci_bus *bus);
33 50
34int __devinit _tile_cfg_read(struct pci_controller *hose, 51void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max);
35 int bus, 52static inline void pci_iounmap(struct pci_dev *dev, void __iomem *addr) {}
36 int slot,
37 int function,
38 int offset,
39 int size,
40 u32 *val);
41int __devinit _tile_cfg_write(struct pci_controller *hose,
42 int bus,
43 int slot,
44 int function,
45 int offset,
46 int size,
47 u32 val);
48 53
49/* 54void __devinit pcibios_fixup_bus(struct pci_bus *bus);
50 * These are used to to config reads and writes in the early stages of
51 * setup before the driver infrastructure has been set up enough to be
52 * able to do config reads and writes.
53 */
54#define early_cfg_read(where, size, value) \
55 _tile_cfg_read(controller, \
56 current_bus, \
57 pci_slot, \
58 pci_fn, \
59 where, \
60 size, \
61 value)
62
63#define early_cfg_write(where, size, value) \
64 _tile_cfg_write(controller, \
65 current_bus, \
66 pci_slot, \
67 pci_fn, \
68 where, \
69 size, \
70 value)
71
72
73
74#define PCICFG_BYTE 1
75#define PCICFG_WORD 2
76#define PCICFG_DWORD 4
77 55
78#define TILE_NUM_PCIE 2 56#define TILE_NUM_PCIE 2
79 57
@@ -88,33 +66,33 @@ static inline int pci_proc_domain(struct pci_bus *bus)
88} 66}
89 67
90/* 68/*
91 * I/O space is currently not supported. 69 * pcibios_assign_all_busses() tells whether or not the bus numbers
70 * should be reassigned, in case the BIOS didn't do it correctly, or
71 * in case we don't have a BIOS and we want to let Linux do it.
92 */ 72 */
73static inline int pcibios_assign_all_busses(void)
74{
75 return 1;
76}
93 77
94#define TILE_PCIE_LOWER_IO 0x0 78/*
95#define TILE_PCIE_UPPER_IO 0x10000 79 * No special bus mastering setup handling.
96#define TILE_PCIE_PCIE_IO_SIZE 0x0000FFFF 80 */
97
98#define _PAGE_NO_CACHE 0
99#define _PAGE_GUARDED 0
100
101
102#define pcibios_assign_all_busses() pci_assign_all_buses
103extern int pci_assign_all_buses;
104
105static inline void pcibios_set_master(struct pci_dev *dev) 81static inline void pcibios_set_master(struct pci_dev *dev)
106{ 82{
107 /* No special bus mastering setup handling */
108} 83}
109 84
110#define PCIBIOS_MIN_MEM 0 85#define PCIBIOS_MIN_MEM 0
111#define PCIBIOS_MIN_IO TILE_PCIE_LOWER_IO 86#define PCIBIOS_MIN_IO 0
112 87
113/* 88/*
114 * This flag tells if the platform is TILEmpower that needs 89 * This flag tells if the platform is TILEmpower that needs
115 * special configuration for the PLX switch chip. 90 * special configuration for the PLX switch chip.
116 */ 91 */
117extern int blade_pci; 92extern int tile_plx_gen1;
93
94/* Use any cpu for PCI. */
95#define cpumask_of_pcibus(bus) cpu_online_mask
118 96
119/* implement the pci_ DMA API in terms of the generic device dma_ one */ 97/* implement the pci_ DMA API in terms of the generic device dma_ one */
120#include <asm-generic/pci-dma-compat.h> 98#include <asm-generic/pci-dma-compat.h>
@@ -122,7 +100,4 @@ extern int blade_pci;
122/* generic pci stuff */ 100/* generic pci stuff */
123#include <asm-generic/pci.h> 101#include <asm-generic/pci.h>
124 102
125/* Use any cpu for PCI. */
126#define cpumask_of_pcibus(bus) cpu_online_mask
127
128#endif /* _ASM_TILE_PCI_H */ 103#endif /* _ASM_TILE_PCI_H */
diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h
index 1747ff3946b2..a9e7c8760334 100644
--- a/arch/tile/include/asm/processor.h
+++ b/arch/tile/include/asm/processor.h
@@ -292,8 +292,18 @@ extern int kstack_hash;
292/* Are we using huge pages in the TLB for kernel data? */ 292/* Are we using huge pages in the TLB for kernel data? */
293extern int kdata_huge; 293extern int kdata_huge;
294 294
295/* Support standard Linux prefetching. */
296#define ARCH_HAS_PREFETCH
297#define prefetch(x) __builtin_prefetch(x)
295#define PREFETCH_STRIDE CHIP_L2_LINE_SIZE() 298#define PREFETCH_STRIDE CHIP_L2_LINE_SIZE()
296 299
300/* Bring a value into the L1D, faulting the TLB if necessary. */
301#ifdef __tilegx__
302#define prefetch_L1(x) __insn_prefetch_l1_fault((void *)(x))
303#else
304#define prefetch_L1(x) __insn_prefetch_L1((void *)(x))
305#endif
306
297#else /* __ASSEMBLY__ */ 307#else /* __ASSEMBLY__ */
298 308
299/* Do some slow action (e.g. read a slow SPR). */ 309/* Do some slow action (e.g. read a slow SPR). */
diff --git a/arch/tile/include/hv/drv_xgbe_impl.h b/arch/tile/include/hv/drv_xgbe_impl.h
new file mode 100644
index 000000000000..3a73b2b44913
--- /dev/null
+++ b/arch/tile/include/hv/drv_xgbe_impl.h
@@ -0,0 +1,300 @@
1/*
2 * Copyright 2010 Tilera Corporation. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for
12 * more details.
13 */
14
15/**
16 * @file drivers/xgbe/impl.h
17 * Implementation details for the NetIO library.
18 */
19
20#ifndef __DRV_XGBE_IMPL_H__
21#define __DRV_XGBE_IMPL_H__
22
23#include <hv/netio_errors.h>
24#include <hv/netio_intf.h>
25#include <hv/drv_xgbe_intf.h>
26
27
28/** How many groups we have (log2). */
29#define LOG2_NUM_GROUPS (12)
30/** How many groups we have. */
31#define NUM_GROUPS (1 << LOG2_NUM_GROUPS)
32
33/** Number of output requests we'll buffer per tile. */
34#define EPP_REQS_PER_TILE (32)
35
36/** Words used in an eDMA command without checksum acceleration. */
37#define EDMA_WDS_NO_CSUM 8
38/** Words used in an eDMA command with checksum acceleration. */
39#define EDMA_WDS_CSUM 10
40/** Total available words in the eDMA command FIFO. */
41#define EDMA_WDS_TOTAL 128
42
43
44/*
45 * FIXME: These definitions are internal and should have underscores!
46 * NOTE: The actual numeric values here are intentional and allow us to
47 * optimize the concept "if small ... else if large ... else ...", by
48 * checking for the low bit being set, and then for non-zero.
49 * These are used as array indices, so they must have the values (0, 1, 2)
50 * in some order.
51 */
52#define SIZE_SMALL (1) /**< Small packet queue. */
53#define SIZE_LARGE (2) /**< Large packet queue. */
54#define SIZE_JUMBO (0) /**< Jumbo packet queue. */
55
56/** The number of "SIZE_xxx" values. */
57#define NETIO_NUM_SIZES 3
58
59
60/*
61 * Default numbers of packets for IPP drivers. These values are chosen
62 * such that CIPP1 will not overflow its L2 cache.
63 */
64
65/** The default number of small packets. */
66#define NETIO_DEFAULT_SMALL_PACKETS 2750
67/** The default number of large packets. */
68#define NETIO_DEFAULT_LARGE_PACKETS 2500
69/** The default number of jumbo packets. */
70#define NETIO_DEFAULT_JUMBO_PACKETS 250
71
72
73/** Log2 of the size of a memory arena. */
74#define NETIO_ARENA_SHIFT 24 /* 16 MB */
75/** Size of a memory arena. */
76#define NETIO_ARENA_SIZE (1 << NETIO_ARENA_SHIFT)
77
78
79/** A queue of packets.
80 *
81 * This structure partially defines a queue of packets waiting to be
82 * processed. The queue as a whole is written to by an interrupt handler and
83 * read by non-interrupt code; this data structure is what's touched by the
84 * interrupt handler. The other part of the queue state, the read offset, is
85 * kept in user space, not in hypervisor space, so it is in a separate data
86 * structure.
87 *
88 * The read offset (__packet_receive_read in the user part of the queue
89 * structure) points to the next packet to be read. When the read offset is
90 * equal to the write offset, the queue is empty; therefore the queue must
91 * contain one more slot than the required maximum queue size.
92 *
93 * Here's an example of all 3 state variables and what they mean. All
94 * pointers move left to right.
95 *
96 * @code
97 * I I V V V V I I I I
98 * 0 1 2 3 4 5 6 7 8 9 10
99 * ^ ^ ^ ^
100 * | | |
101 * | | __last_packet_plus_one
102 * | __buffer_write
103 * __packet_receive_read
104 * @endcode
105 *
106 * This queue has 10 slots, and thus can hold 9 packets (_last_packet_plus_one
107 * = 10). The read pointer is at 2, and the write pointer is at 6; thus,
108 * there are valid, unread packets in slots 2, 3, 4, and 5. The remaining
109 * slots are invalid (do not contain a packet).
110 */
111typedef struct {
112 /** Byte offset of the next notify packet to be written: zero for the first
113 * packet on the queue, sizeof (netio_pkt_t) for the second packet on the
114 * queue, etc. */
115 volatile uint32_t __packet_write;
116
117 /** Offset of the packet after the last valid packet (i.e., when any
118 * pointer is incremented to this value, it wraps back to zero). */
119 uint32_t __last_packet_plus_one;
120}
121__netio_packet_queue_t;
122
123
124/** A queue of buffers.
125 *
126 * This structure partially defines a queue of empty buffers which have been
127 * obtained via requests to the IPP. (The elements of the queue are packet
128 * handles, which are transformed into a full netio_pkt_t when the buffer is
129 * retrieved.) The queue as a whole is written to by an interrupt handler and
130 * read by non-interrupt code; this data structure is what's touched by the
131 * interrupt handler. The other parts of the queue state, the read offset and
132 * requested write offset, are kept in user space, not in hypervisor space, so
133 * they are in a separate data structure.
134 *
135 * The read offset (__buffer_read in the user part of the queue structure)
136 * points to the next buffer to be read. When the read offset is equal to the
137 * write offset, the queue is empty; therefore the queue must contain one more
138 * slot than the required maximum queue size.
139 *
140 * The requested write offset (__buffer_requested_write in the user part of
141 * the queue structure) points to the slot which will hold the next buffer we
142 * request from the IPP, once we get around to sending such a request. When
143 * the requested write offset is equal to the write offset, no requests for
144 * new buffers are outstanding; when the requested write offset is one greater
145 * than the read offset, no more requests may be sent.
146 *
147 * Note that, unlike the packet_queue, the buffer_queue places incoming
148 * buffers at decreasing addresses. This makes the check for "is it time to
149 * wrap the buffer pointer" cheaper in the assembly code which receives new
150 * buffers, and means that the value which defines the queue size,
151 * __last_buffer, is different than in the packet queue. Also, the offset
152 * used in the packet_queue is already scaled by the size of a packet; here we
153 * use unscaled slot indices for the offsets. (These differences are
154 * historical, and in the future it's possible that the packet_queue will look
155 * more like this queue.)
156 *
157 * @code
158 * Here's an example of all 4 state variables and what they mean. Remember:
159 * all pointers move right to left.
160 *
161 * V V V I I R R V V V
162 * 0 1 2 3 4 5 6 7 8 9
163 * ^ ^ ^ ^
164 * | | | |
165 * | | | __last_buffer
166 * | | __buffer_write
167 * | __buffer_requested_write
168 * __buffer_read
169 * @endcode
170 *
171 * This queue has 10 slots, and thus can hold 9 buffers (_last_buffer = 9).
172 * The read pointer is at 2, and the write pointer is at 6; thus, there are
173 * valid, unread buffers in slots 2, 1, 0, 9, 8, and 7. The requested write
174 * pointer is at 4; thus, requests have been made to the IPP for buffers which
175 * will be placed in slots 6 and 5 when they arrive. Finally, the remaining
176 * slots are invalid (do not contain a buffer).
177 */
178typedef struct
179{
180 /** Ordinal number of the next buffer to be written: 0 for the first slot in
181 * the queue, 1 for the second slot in the queue, etc. */
182 volatile uint32_t __buffer_write;
183
184 /** Ordinal number of the last buffer (i.e., when any pointer is decremented
185 * below zero, it is reloaded with this value). */
186 uint32_t __last_buffer;
187}
188__netio_buffer_queue_t;
189
190
191/**
192 * An object for providing Ethernet packets to a process.
193 */
194typedef struct __netio_queue_impl_t
195{
196 /** The queue of packets waiting to be received. */
197 __netio_packet_queue_t __packet_receive_queue;
198 /** The intr bit mask that IDs this device. */
199 unsigned int __intr_id;
200 /** Offset to queues of empty buffers, one per size. */
201 uint32_t __buffer_queue[NETIO_NUM_SIZES];
202 /** The address of the first EPP tile, or -1 if no EPP. */
203 /* ISSUE: Actually this is always "0" or "~0". */
204 uint32_t __epp_location;
205 /** The queue ID that this queue represents. */
206 unsigned int __queue_id;
207 /** Number of acknowledgements received. */
208 volatile uint32_t __acks_received;
209 /** Last completion number received for packet_sendv. */
210 volatile uint32_t __last_completion_rcv;
211 /** Number of packets allowed to be outstanding. */
212 uint32_t __max_outstanding;
213 /** First VA available for packets. */
214 void* __va_0;
215 /** First VA in second range available for packets. */
216 void* __va_1;
217 /** Padding to align the "__packets" field to the size of a netio_pkt_t. */
218 uint32_t __padding[3];
219 /** The packets themselves. */
220 netio_pkt_t __packets[0];
221}
222netio_queue_impl_t;
223
224
225/**
226 * An object for managing the user end of a NetIO queue.
227 */
228typedef struct __netio_queue_user_impl_t
229{
230 /** The next incoming packet to be read. */
231 uint32_t __packet_receive_read;
232 /** The next empty buffers to be read, one index per size. */
233 uint8_t __buffer_read[NETIO_NUM_SIZES];
234 /** Where the empty buffer we next request from the IPP will go, one index
235 * per size. */
236 uint8_t __buffer_requested_write[NETIO_NUM_SIZES];
237 /** PCIe interface flag. */
238 uint8_t __pcie;
239 /** Number of packets left to be received before we send a credit update. */
240 uint32_t __receive_credit_remaining;
241 /** Value placed in __receive_credit_remaining when it reaches zero. */
242 uint32_t __receive_credit_interval;
243 /** First fast I/O routine index. */
244 uint32_t __fastio_index;
245 /** Number of acknowledgements expected. */
246 uint32_t __acks_outstanding;
247 /** Last completion number requested. */
248 uint32_t __last_completion_req;
249 /** File descriptor for driver. */
250 int __fd;
251}
252netio_queue_user_impl_t;
253
254
255#define NETIO_GROUP_CHUNK_SIZE 64 /**< Max # groups in one IPP request */
256#define NETIO_BUCKET_CHUNK_SIZE 64 /**< Max # buckets in one IPP request */
257
258
259/** Internal structure used to convey packet send information to the
260 * hypervisor. FIXME: Actually, it's not used for that anymore, but
261 * netio_packet_send() still uses it internally.
262 */
263typedef struct
264{
265 uint16_t flags; /**< Packet flags (__NETIO_SEND_FLG_xxx) */
266 uint16_t transfer_size; /**< Size of packet */
267 uint32_t va; /**< VA of start of packet */
268 __netio_pkt_handle_t handle; /**< Packet handle */
269 uint32_t csum0; /**< First checksum word */
270 uint32_t csum1; /**< Second checksum word */
271}
272__netio_send_cmd_t;
273
274
275/** Flags used in two contexts:
276 * - As the "flags" member in the __netio_send_cmd_t, above; used only
277 * for netio_pkt_send_{prepare,commit}.
278 * - As part of the flags passed to the various send packet fast I/O calls.
279 */
280
281/** Need acknowledgement on this packet. Note that some code in the
282 * normal send_pkt fast I/O handler assumes that this is equal to 1. */
283#define __NETIO_SEND_FLG_ACK 0x1
284
285/** Do checksum on this packet. (Only used with the __netio_send_cmd_t;
286 * normal packet sends use a special fast I/O index to denote checksumming,
287 * and multi-segment sends test the checksum descriptor.) */
288#define __NETIO_SEND_FLG_CSUM 0x2
289
290/** Get a completion on this packet. Only used with multi-segment sends. */
291#define __NETIO_SEND_FLG_COMPLETION 0x4
292
293/** Position of the number-of-extra-segments value in the flags word.
294 Only used with multi-segment sends. */
295#define __NETIO_SEND_FLG_XSEG_SHIFT 3
296
297/** Width of the number-of-extra-segments value in the flags word. */
298#define __NETIO_SEND_FLG_XSEG_WIDTH 2
299
300#endif /* __DRV_XGBE_IMPL_H__ */
diff --git a/arch/tile/include/hv/drv_xgbe_intf.h b/arch/tile/include/hv/drv_xgbe_intf.h
new file mode 100644
index 000000000000..146e47d5334b
--- /dev/null
+++ b/arch/tile/include/hv/drv_xgbe_intf.h
@@ -0,0 +1,615 @@
1/*
2 * Copyright 2010 Tilera Corporation. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for
12 * more details.
13 */
14
15/**
16 * @file drv_xgbe_intf.h
17 * Interface to the hypervisor XGBE driver.
18 */
19
20#ifndef __DRV_XGBE_INTF_H__
21#define __DRV_XGBE_INTF_H__
22
23/**
24 * An object for forwarding VAs and PAs to the hypervisor.
25 * @ingroup types
26 *
27 * This allows the supervisor to specify a number of areas of memory to
28 * store packet buffers.
29 */
30typedef struct
31{
32 /** The physical address of the memory. */
33 HV_PhysAddr pa;
34 /** Page table entry for the memory. This is only used to derive the
35 * memory's caching mode; the PA bits are ignored. */
36 HV_PTE pte;
37 /** The virtual address of the memory. */
38 HV_VirtAddr va;
39 /** Size (in bytes) of the memory area. */
40 int size;
41
42}
43netio_ipp_address_t;
44
45/** The various pread/pwrite offsets into the hypervisor-level driver.
46 * @ingroup types
47 */
48typedef enum
49{
50 /** Inform the Linux driver of the address of the NetIO arena memory.
51 * This offset is actually only used to convey information from netio
52 * to the Linux driver; it never makes it from there to the hypervisor.
53 * Write-only; takes a uint32_t specifying the VA address. */
54 NETIO_FIXED_ADDR = 0x5000000000000000ULL,
55
56 /** Inform the Linux driver of the size of the NetIO arena memory.
57 * This offset is actually only used to convey information from netio
58 * to the Linux driver; it never makes it from there to the hypervisor.
59 * Write-only; takes a uint32_t specifying the VA size. */
60 NETIO_FIXED_SIZE = 0x5100000000000000ULL,
61
62 /** Register current tile with IPP. Write then read: write, takes a
63 * netio_input_config_t, read returns a pointer to a netio_queue_impl_t. */
64 NETIO_IPP_INPUT_REGISTER_OFF = 0x6000000000000000ULL,
65
66 /** Unregister current tile from IPP. Write-only, takes a dummy argument. */
67 NETIO_IPP_INPUT_UNREGISTER_OFF = 0x6100000000000000ULL,
68
69 /** Start packets flowing. Write-only, takes a dummy argument. */
70 NETIO_IPP_INPUT_INIT_OFF = 0x6200000000000000ULL,
71
72 /** Stop packets flowing. Write-only, takes a dummy argument. */
73 NETIO_IPP_INPUT_UNINIT_OFF = 0x6300000000000000ULL,
74
75 /** Configure group (typically we group on VLAN). Write-only: takes an
76 * array of netio_group_t's, low 24 bits of the offset is the base group
77 * number times the size of a netio_group_t. */
78 NETIO_IPP_INPUT_GROUP_CFG_OFF = 0x6400000000000000ULL,
79
80 /** Configure bucket. Write-only: takes an array of netio_bucket_t's, low
81 * 24 bits of the offset is the base bucket number times the size of a
82 * netio_bucket_t. */
83 NETIO_IPP_INPUT_BUCKET_CFG_OFF = 0x6500000000000000ULL,
84
85 /** Get/set a parameter. Read or write: read or write data is the parameter
86 * value, low 32 bits of the offset is a __netio_getset_offset_t. */
87 NETIO_IPP_PARAM_OFF = 0x6600000000000000ULL,
88
89 /** Get fast I/O index. Read-only; returns a 4-byte base index value. */
90 NETIO_IPP_GET_FASTIO_OFF = 0x6700000000000000ULL,
91
92 /** Configure hijack IP address. Packets with this IPv4 dest address
93 * go to bucket NETIO_NUM_BUCKETS - 1. Write-only: takes an IP address
94 * in some standard form. FIXME: Define the form! */
95 NETIO_IPP_INPUT_HIJACK_CFG_OFF = 0x6800000000000000ULL,
96
97 /**
98 * Offsets beyond this point are reserved for the supervisor (although that
99 * enforcement must be done by the supervisor driver itself).
100 */
101 NETIO_IPP_USER_MAX_OFF = 0x6FFFFFFFFFFFFFFFULL,
102
103 /** Register I/O memory. Write-only, takes a netio_ipp_address_t. */
104 NETIO_IPP_IOMEM_REGISTER_OFF = 0x7000000000000000ULL,
105
106 /** Unregister I/O memory. Write-only, takes a netio_ipp_address_t. */
107 NETIO_IPP_IOMEM_UNREGISTER_OFF = 0x7100000000000000ULL,
108
109 /* Offsets greater than 0x7FFFFFFF can't be used directly from Linux
110 * userspace code due to limitations in the pread/pwrite syscalls. */
111
112 /** Drain LIPP buffers. */
113 NETIO_IPP_DRAIN_OFF = 0xFA00000000000000ULL,
114
115 /** Supply a netio_ipp_address_t to be used as shared memory for the
116 * LEPP command queue. */
117 NETIO_EPP_SHM_OFF = 0xFB00000000000000ULL,
118
119 /* 0xFC... is currently unused. */
120
121 /** Stop IPP/EPP tiles. Write-only, takes a dummy argument. */
122 NETIO_IPP_STOP_SHIM_OFF = 0xFD00000000000000ULL,
123
124 /** Start IPP/EPP tiles. Write-only, takes a dummy argument. */
125 NETIO_IPP_START_SHIM_OFF = 0xFE00000000000000ULL,
126
127 /** Supply packet arena. Write-only, takes an array of
128 * netio_ipp_address_t values. */
129 NETIO_IPP_ADDRESS_OFF = 0xFF00000000000000ULL,
130} netio_hv_offset_t;
131
132/** Extract the base offset from an offset */
133#define NETIO_BASE_OFFSET(off) ((off) & 0xFF00000000000000ULL)
134/** Extract the local offset from an offset */
135#define NETIO_LOCAL_OFFSET(off) ((off) & 0x00FFFFFFFFFFFFFFULL)
136
137
138/**
139 * Get/set offset.
140 */
141typedef union
142{
143 struct
144 {
145 uint64_t addr:48; /**< Class-specific address */
146 unsigned int class:8; /**< Class (e.g., NETIO_PARAM) */
147 unsigned int opcode:8; /**< High 8 bits of NETIO_IPP_PARAM_OFF */
148 }
149 bits; /**< Bitfields */
150 uint64_t word; /**< Aggregated value to use as the offset */
151}
152__netio_getset_offset_t;
153
154/**
155 * Fast I/O index offsets (must be contiguous).
156 */
157typedef enum
158{
159 NETIO_FASTIO_ALLOCATE = 0, /**< Get empty packet buffer */
160 NETIO_FASTIO_FREE_BUFFER = 1, /**< Give buffer back to IPP */
161 NETIO_FASTIO_RETURN_CREDITS = 2, /**< Give credits to IPP */
162 NETIO_FASTIO_SEND_PKT_NOCK = 3, /**< Send a packet, no checksum */
163 NETIO_FASTIO_SEND_PKT_CK = 4, /**< Send a packet, with checksum */
164 NETIO_FASTIO_SEND_PKT_VEC = 5, /**< Send a vector of packets */
165 NETIO_FASTIO_SENDV_PKT = 6, /**< Sendv one packet */
166 NETIO_FASTIO_NUM_INDEX = 7, /**< Total number of fast I/O indices */
167} netio_fastio_index_t;
168
169/** 3-word return type for Fast I/O call. */
170typedef struct
171{
172 int err; /**< Error code. */
173 uint32_t val0; /**< Value. Meaning depends upon the specific call. */
174 uint32_t val1; /**< Value. Meaning depends upon the specific call. */
175} netio_fastio_rv3_t;
176
177/** 0-argument fast I/O call */
178int __netio_fastio0(uint32_t fastio_index);
179/** 1-argument fast I/O call */
180int __netio_fastio1(uint32_t fastio_index, uint32_t arg0);
181/** 3-argument fast I/O call, 2-word return value */
182netio_fastio_rv3_t __netio_fastio3_rv3(uint32_t fastio_index, uint32_t arg0,
183 uint32_t arg1, uint32_t arg2);
184/** 4-argument fast I/O call */
185int __netio_fastio4(uint32_t fastio_index, uint32_t arg0, uint32_t arg1,
186 uint32_t arg2, uint32_t arg3);
187/** 6-argument fast I/O call */
188int __netio_fastio6(uint32_t fastio_index, uint32_t arg0, uint32_t arg1,
189 uint32_t arg2, uint32_t arg3, uint32_t arg4, uint32_t arg5);
190/** 9-argument fast I/O call */
191int __netio_fastio9(uint32_t fastio_index, uint32_t arg0, uint32_t arg1,
192 uint32_t arg2, uint32_t arg3, uint32_t arg4, uint32_t arg5,
193 uint32_t arg6, uint32_t arg7, uint32_t arg8);
194
195/** Allocate an empty packet.
196 * @param fastio_index Fast I/O index.
197 * @param size Size of the packet to allocate.
198 */
199#define __netio_fastio_allocate(fastio_index, size) \
200 __netio_fastio1((fastio_index) + NETIO_FASTIO_ALLOCATE, size)
201
202/** Free a buffer.
203 * @param fastio_index Fast I/O index.
204 * @param handle Handle for the packet to free.
205 */
206#define __netio_fastio_free_buffer(fastio_index, handle) \
207 __netio_fastio1((fastio_index) + NETIO_FASTIO_FREE_BUFFER, handle)
208
209/** Increment our receive credits.
210 * @param fastio_index Fast I/O index.
211 * @param credits Number of credits to add.
212 */
213#define __netio_fastio_return_credits(fastio_index, credits) \
214 __netio_fastio1((fastio_index) + NETIO_FASTIO_RETURN_CREDITS, credits)
215
216/** Send packet, no checksum.
217 * @param fastio_index Fast I/O index.
218 * @param ackflag Nonzero if we want an ack.
219 * @param size Size of the packet.
220 * @param va Virtual address of start of packet.
221 * @param handle Packet handle.
222 */
223#define __netio_fastio_send_pkt_nock(fastio_index, ackflag, size, va, handle) \
224 __netio_fastio4((fastio_index) + NETIO_FASTIO_SEND_PKT_NOCK, ackflag, \
225 size, va, handle)
226
227/** Send packet, calculate checksum.
228 * @param fastio_index Fast I/O index.
229 * @param ackflag Nonzero if we want an ack.
230 * @param size Size of the packet.
231 * @param va Virtual address of start of packet.
232 * @param handle Packet handle.
233 * @param csum0 Shim checksum header.
234 * @param csum1 Checksum seed.
235 */
236#define __netio_fastio_send_pkt_ck(fastio_index, ackflag, size, va, handle, \
237 csum0, csum1) \
238 __netio_fastio6((fastio_index) + NETIO_FASTIO_SEND_PKT_CK, ackflag, \
239 size, va, handle, csum0, csum1)
240
241
242/** Format for the "csum0" argument to the __netio_fastio_send routines
243 * and LEPP. Note that this is currently exactly identical to the
244 * ShimProtocolOffloadHeader.
245 */
246typedef union
247{
248 struct
249 {
250 unsigned int start_byte:7; /**< The first byte to be checksummed */
251 unsigned int count:14; /**< Number of bytes to be checksummed. */
252 unsigned int destination_byte:7; /**< The byte to write the checksum to. */
253 unsigned int reserved:4; /**< Reserved. */
254 } bits; /**< Decomposed method of access. */
255 unsigned int word; /**< To send out the IDN. */
256} __netio_checksum_header_t;
257
258
259/** Sendv packet with 1 or 2 segments.
260 * @param fastio_index Fast I/O index.
261 * @param flags Ack/csum/notify flags in low 3 bits; number of segments minus
262 * 1 in next 2 bits; expected checksum in high 16 bits.
263 * @param confno Confirmation number to request, if notify flag set.
264 * @param csum0 Checksum descriptor; if zero, no checksum.
265 * @param va_F Virtual address of first segment.
266 * @param va_L Virtual address of last segment, if 2 segments.
267 * @param len_F_L Length of first segment in low 16 bits; length of last
268 * segment, if 2 segments, in high 16 bits.
269 */
270#define __netio_fastio_sendv_pkt_1_2(fastio_index, flags, confno, csum0, \
271 va_F, va_L, len_F_L) \
272 __netio_fastio6((fastio_index) + NETIO_FASTIO_SENDV_PKT, flags, confno, \
273 csum0, va_F, va_L, len_F_L)
274
275/** Send packet on PCIe interface.
276 * @param fastio_index Fast I/O index.
277 * @param flags Ack/csum/notify flags in low 3 bits.
278 * @param confno Confirmation number to request, if notify flag set.
279 * @param csum0 Checksum descriptor; Hard wired 0, not needed for PCIe.
280 * @param va_F Virtual address of the packet buffer.
281 * @param va_L Virtual address of last segment, if 2 segments. Hard wired 0.
282 * @param len_F_L Length of the packet buffer in low 16 bits.
283 */
284#define __netio_fastio_send_pcie_pkt(fastio_index, flags, confno, csum0, \
285 va_F, va_L, len_F_L) \
286 __netio_fastio6((fastio_index) + PCIE_FASTIO_SENDV_PKT, flags, confno, \
287 csum0, va_F, va_L, len_F_L)
288
289/** Sendv packet with 3 or 4 segments.
290 * @param fastio_index Fast I/O index.
291 * @param flags Ack/csum/notify flags in low 3 bits; number of segments minus
292 * 1 in next 2 bits; expected checksum in high 16 bits.
293 * @param confno Confirmation number to request, if notify flag set.
294 * @param csum0 Checksum descriptor; if zero, no checksum.
295 * @param va_F Virtual address of first segment.
296 * @param va_L Virtual address of last segment (third segment if 3 segments,
297 * fourth segment if 4 segments).
298 * @param len_F_L Length of first segment in low 16 bits; length of last
299 * segment in high 16 bits.
300 * @param va_M0 Virtual address of "middle 0" segment; this segment is sent
301 * second when there are three segments, and third if there are four.
302 * @param va_M1 Virtual address of "middle 1" segment; this segment is sent
303 * second when there are four segments.
304 * @param len_M0_M1 Length of middle 0 segment in low 16 bits; length of middle
305 * 1 segment, if 4 segments, in high 16 bits.
306 */
307#define __netio_fastio_sendv_pkt_3_4(fastio_index, flags, confno, csum0, va_F, \
308 va_L, len_F_L, va_M0, va_M1, len_M0_M1) \
309 __netio_fastio9((fastio_index) + NETIO_FASTIO_SENDV_PKT, flags, confno, \
310 csum0, va_F, va_L, len_F_L, va_M0, va_M1, len_M0_M1)
311
312/** Send vector of packets.
313 * @param fastio_index Fast I/O index.
314 * @param seqno Number of packets transmitted so far on this interface;
315 * used to decide which packets should be acknowledged.
316 * @param nentries Number of entries in vector.
317 * @param va Virtual address of start of vector entry array.
318 * @return 3-word netio_fastio_rv3_t structure. The structure's err member
319 * is an error code, or zero if no error. The val0 member is the
320 * updated value of seqno; it has been incremented by 1 for each
321 * packet sent. That increment may be less than nentries if an
322 * error occured, or if some of the entries in the vector contain
323 * handles equal to NETIO_PKT_HANDLE_NONE. The val1 member is the
324 * updated value of nentries; it has been decremented by 1 for each
325 * vector entry processed. Again, that decrement may be less than
326 * nentries (leaving the returned value positive) if an error
327 * occurred.
328 */
329#define __netio_fastio_send_pkt_vec(fastio_index, seqno, nentries, va) \
330 __netio_fastio3_rv3((fastio_index) + NETIO_FASTIO_SEND_PKT_VEC, seqno, \
331 nentries, va)
332
333
334/** An egress DMA command for LEPP. */
335typedef struct
336{
337 /** Is this a TSO transfer?
338 *
339 * NOTE: This field is always 0, to distinguish it from
340 * lepp_tso_cmd_t. It must come first!
341 */
342 uint8_t tso : 1;
343
344 /** Unused padding bits. */
345 uint8_t _unused : 3;
346
347 /** Should this packet be sent directly from caches instead of DRAM,
348 * using hash-for-home to locate the packet data?
349 */
350 uint8_t hash_for_home : 1;
351
352 /** Should we compute a checksum? */
353 uint8_t compute_checksum : 1;
354
355 /** Is this the final buffer for this packet?
356 *
357 * A single packet can be split over several input buffers (a "gather"
358 * operation). This flag indicates that this is the last buffer
359 * in a packet.
360 */
361 uint8_t end_of_packet : 1;
362
363 /** Should LEPP advance 'comp_busy' when this DMA is fully finished? */
364 uint8_t send_completion : 1;
365
366 /** High bits of Client Physical Address of the start of the buffer
367 * to be egressed.
368 *
369 * NOTE: Only 6 bits are actually needed here, as CPAs are
370 * currently 38 bits. So two bits could be scavenged from this.
371 */
372 uint8_t cpa_hi;
373
374 /** The number of bytes to be egressed. */
375 uint16_t length;
376
377 /** Low 32 bits of Client Physical Address of the start of the buffer
378 * to be egressed.
379 */
380 uint32_t cpa_lo;
381
382 /** Checksum information (only used if 'compute_checksum'). */
383 __netio_checksum_header_t checksum_data;
384
385} lepp_cmd_t;
386
387
388/** A chunk of physical memory for a TSO egress. */
389typedef struct
390{
391 /** The low bits of the CPA. */
392 uint32_t cpa_lo;
393 /** The high bits of the CPA. */
394 uint16_t cpa_hi : 15;
395 /** Should this packet be sent directly from caches instead of DRAM,
396 * using hash-for-home to locate the packet data?
397 */
398 uint16_t hash_for_home : 1;
399 /** The length in bytes. */
400 uint16_t length;
401} lepp_frag_t;
402
403
404/** An LEPP command that handles TSO. */
405typedef struct
406{
407 /** Is this a TSO transfer?
408 *
409 * NOTE: This field is always 1, to distinguish it from
410 * lepp_cmd_t. It must come first!
411 */
412 uint8_t tso : 1;
413
414 /** Unused padding bits. */
415 uint8_t _unused : 7;
416
417 /** Size of the header[] array in bytes. It must be in the range
418 * [40, 127], which are the smallest header for a TCP packet over
419 * Ethernet and the maximum possible prepend size supported by
420 * hardware, respectively. Note that the array storage must be
421 * padded out to a multiple of four bytes so that the following
422 * LEPP command is aligned properly.
423 */
424 uint8_t header_size;
425
426 /** Byte offset of the IP header in header[]. */
427 uint8_t ip_offset;
428
429 /** Byte offset of the TCP header in header[]. */
430 uint8_t tcp_offset;
431
432 /** The number of bytes to use for the payload of each packet,
433 * except of course the last one, which may not have enough bytes.
434 * This means that each Ethernet packet except the last will have a
435 * size of header_size + payload_size.
436 */
437 uint16_t payload_size;
438
439 /** The length of the 'frags' array that follows this struct. */
440 uint16_t num_frags;
441
442 /** The actual frags. */
443 lepp_frag_t frags[0 /* Variable-sized; num_frags entries. */];
444
445 /*
446 * The packet header template logically follows frags[],
447 * but you can't declare that in C.
448 *
449 * uint32_t header[header_size_in_words_rounded_up];
450 */
451
452} lepp_tso_cmd_t;
453
454
455/** An LEPP completion ring entry. */
456typedef void* lepp_comp_t;
457
458
459/** Maximum number of frags for one TSO command. This is adapted from
460 * linux's "MAX_SKB_FRAGS", and presumably over-estimates by one, for
461 * our page size of exactly 65536. We add one for a "body" fragment.
462 */
463#define LEPP_MAX_FRAGS (65536 / HV_PAGE_SIZE_SMALL + 2 + 1)
464
465/** Total number of bytes needed for an lepp_tso_cmd_t. */
466#define LEPP_TSO_CMD_SIZE(num_frags, header_size) \
467 (sizeof(lepp_tso_cmd_t) + \
468 (num_frags) * sizeof(lepp_frag_t) + \
469 (((header_size) + 3) & -4))
470
471/** The size of the lepp "cmd" queue. */
472#define LEPP_CMD_QUEUE_BYTES \
473 (((CHIP_L2_CACHE_SIZE() - 2 * CHIP_L2_LINE_SIZE()) / \
474 (sizeof(lepp_cmd_t) + sizeof(lepp_comp_t))) * sizeof(lepp_cmd_t))
475
476/** The largest possible command that can go in lepp_queue_t::cmds[]. */
477#define LEPP_MAX_CMD_SIZE LEPP_TSO_CMD_SIZE(LEPP_MAX_FRAGS, 128)
478
479/** The largest possible value of lepp_queue_t::cmd_{head, tail} (inclusive).
480 */
481#define LEPP_CMD_LIMIT \
482 (LEPP_CMD_QUEUE_BYTES - LEPP_MAX_CMD_SIZE)
483
484/** The maximum number of completions in an LEPP queue. */
485#define LEPP_COMP_QUEUE_SIZE \
486 ((LEPP_CMD_LIMIT + sizeof(lepp_cmd_t) - 1) / sizeof(lepp_cmd_t))
487
488/** Increment an index modulo the queue size. */
489#define LEPP_QINC(var) \
490 (var = __insn_mnz(var - (LEPP_COMP_QUEUE_SIZE - 1), var + 1))
491
492/** A queue used to convey egress commands from the client to LEPP. */
493typedef struct
494{
495 /** Index of first completion not yet processed by user code.
496 * If this is equal to comp_busy, there are no such completions.
497 *
498 * NOTE: This is only read/written by the user.
499 */
500 unsigned int comp_head;
501
502 /** Index of first completion record not yet completed.
503 * If this is equal to comp_tail, there are no such completions.
504 * This index gets advanced (modulo LEPP_QUEUE_SIZE) whenever
505 * a command with the 'completion' bit set is finished.
506 *
507 * NOTE: This is only written by LEPP, only read by the user.
508 */
509 volatile unsigned int comp_busy;
510
511 /** Index of the first empty slot in the completion ring.
512 * Entries from this up to but not including comp_head (in ring order)
513 * can be filled in with completion data.
514 *
515 * NOTE: This is only read/written by the user.
516 */
517 unsigned int comp_tail;
518
519 /** Byte index of first command enqueued for LEPP but not yet processed.
520 *
521 * This is always divisible by sizeof(void*) and always <= LEPP_CMD_LIMIT.
522 *
523 * NOTE: LEPP advances this counter as soon as it no longer needs
524 * the cmds[] storage for this entry, but the transfer is not actually
525 * complete (i.e. the buffer pointed to by the command is no longer
526 * needed) until comp_busy advances.
527 *
528 * If this is equal to cmd_tail, the ring is empty.
529 *
530 * NOTE: This is only written by LEPP, only read by the user.
531 */
532 volatile unsigned int cmd_head;
533
534 /** Byte index of first empty slot in the command ring. This field can
535 * be incremented up to but not equal to cmd_head (because that would
536 * mean the ring is empty).
537 *
538 * This is always divisible by sizeof(void*) and always <= LEPP_CMD_LIMIT.
539 *
540 * NOTE: This is read/written by the user, only read by LEPP.
541 */
542 volatile unsigned int cmd_tail;
543
544 /** A ring of variable-sized egress DMA commands.
545 *
546 * NOTE: Only written by the user, only read by LEPP.
547 */
548 char cmds[LEPP_CMD_QUEUE_BYTES]
549 __attribute__((aligned(CHIP_L2_LINE_SIZE())));
550
551 /** A ring of user completion data.
552 * NOTE: Only read/written by the user.
553 */
554 lepp_comp_t comps[LEPP_COMP_QUEUE_SIZE]
555 __attribute__((aligned(CHIP_L2_LINE_SIZE())));
556} lepp_queue_t;
557
558
559/** An internal helper function for determining the number of entries
560 * available in a ring buffer, given that there is one sentinel.
561 */
562static inline unsigned int
563_lepp_num_free_slots(unsigned int head, unsigned int tail)
564{
565 /*
566 * One entry is reserved for use as a sentinel, to distinguish
567 * "empty" from "full". So we compute
568 * (head - tail - 1) % LEPP_QUEUE_SIZE, but without using a slow % operation.
569 */
570 return (head - tail - 1) + ((head <= tail) ? LEPP_COMP_QUEUE_SIZE : 0);
571}
572
573
574/** Returns how many new comp entries can be enqueued. */
575static inline unsigned int
576lepp_num_free_comp_slots(const lepp_queue_t* q)
577{
578 return _lepp_num_free_slots(q->comp_head, q->comp_tail);
579}
580
581static inline int
582lepp_qsub(int v1, int v2)
583{
584 int delta = v1 - v2;
585 return delta + ((delta >> 31) & LEPP_COMP_QUEUE_SIZE);
586}
587
588
589/** FIXME: Check this from linux, via a new "pwrite()" call. */
590#define LIPP_VERSION 1
591
592
593/** We use exactly two bytes of alignment padding. */
594#define LIPP_PACKET_PADDING 2
595
596/** The minimum size of a "small" buffer (including the padding). */
597#define LIPP_SMALL_PACKET_SIZE 128
598
599/*
600 * NOTE: The following two values should total to less than around
601 * 13582, to keep the total size used for "lipp_state_t" below 64K.
602 */
603
604/** The maximum number of "small" buffers.
605 * This is enough for 53 network cpus with 128 credits. Note that
606 * if these are exhausted, we will fall back to using large buffers.
607 */
608#define LIPP_SMALL_BUFFERS 6785
609
610/** The maximum number of "large" buffers.
611 * This is enough for 53 network cpus with 128 credits.
612 */
613#define LIPP_LARGE_BUFFERS 6785
614
615#endif /* __DRV_XGBE_INTF_H__ */
diff --git a/arch/tile/include/hv/netio_errors.h b/arch/tile/include/hv/netio_errors.h
new file mode 100644
index 000000000000..e1591bff61b5
--- /dev/null
+++ b/arch/tile/include/hv/netio_errors.h
@@ -0,0 +1,122 @@
1/*
2 * Copyright 2010 Tilera Corporation. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for
12 * more details.
13 */
14
15/**
16 * Error codes returned from NetIO routines.
17 */
18
19#ifndef __NETIO_ERRORS_H__
20#define __NETIO_ERRORS_H__
21
22/**
23 * @addtogroup error
24 *
25 * @brief The error codes returned by NetIO functions.
26 *
27 * NetIO functions return 0 (defined as ::NETIO_NO_ERROR) on success, and
28 * a negative value if an error occurs.
29 *
30 * In cases where a NetIO function failed due to a error reported by
31 * system libraries, the error code will be the negation of the
32 * system errno at the time of failure. The @ref netio_strerror()
33 * function will deliver error strings for both NetIO and system error
34 * codes.
35 *
36 * @{
37 */
38
39/** The set of all NetIO errors. */
40typedef enum
41{
42 /** Operation successfully completed. */
43 NETIO_NO_ERROR = 0,
44
45 /** A packet was successfully retrieved from an input queue. */
46 NETIO_PKT = 0,
47
48 /** Largest NetIO error number. */
49 NETIO_ERR_MAX = -701,
50
51 /** The tile is not registered with the IPP. */
52 NETIO_NOT_REGISTERED = -701,
53
54 /** No packet was available to retrieve from the input queue. */
55 NETIO_NOPKT = -702,
56
57 /** The requested function is not implemented. */
58 NETIO_NOT_IMPLEMENTED = -703,
59
60 /** On a registration operation, the target queue already has the maximum
61 * number of tiles registered for it, and no more may be added. On a
62 * packet send operation, the output queue is full and nothing more can
63 * be queued until some of the queued packets are actually transmitted. */
64 NETIO_QUEUE_FULL = -704,
65
66 /** The calling process or thread is not bound to exactly one CPU. */
67 NETIO_BAD_AFFINITY = -705,
68
69 /** Cannot allocate memory on requested controllers. */
70 NETIO_CANNOT_HOME = -706,
71
72 /** On a registration operation, the IPP specified is not configured
73 * to support the options requested; for instance, the application
74 * wants a specific type of tagged headers which the configured IPP
75 * doesn't support. Or, the supplied configuration information is
76 * not self-consistent, or is out of range; for instance, specifying
77 * both NETIO_RECV and NETIO_NO_RECV, or asking for more than
78 * NETIO_MAX_SEND_BUFFERS to be preallocated. On a VLAN or bucket
79 * configure operation, the number of items, or the base item, was
80 * out of range.
81 */
82 NETIO_BAD_CONFIG = -707,
83
84 /** Too many tiles have registered to transmit packets. */
85 NETIO_TOOMANY_XMIT = -708,
86
87 /** Packet transmission was attempted on a queue which was registered
88 with transmit disabled. */
89 NETIO_UNREG_XMIT = -709,
90
91 /** This tile is already registered with the IPP. */
92 NETIO_ALREADY_REGISTERED = -710,
93
94 /** The Ethernet link is down. The application should try again later. */
95 NETIO_LINK_DOWN = -711,
96
97 /** An invalid memory buffer has been specified. This may be an unmapped
98 * virtual address, or one which does not meet alignment requirements.
99 * For netio_input_register(), this error may be returned when multiple
100 * processes specify different memory regions to be used for NetIO
101 * buffers. That can happen if these processes specify explicit memory
102 * regions with the ::NETIO_FIXED_BUFFER_VA flag, or if tmc_cmem_init()
103 * has not been called by a common ancestor of the processes.
104 */
105 NETIO_FAULT = -712,
106
107 /** Cannot combine user-managed shared memory and cache coherence. */
108 NETIO_BAD_CACHE_CONFIG = -713,
109
110 /** Smallest NetIO error number. */
111 NETIO_ERR_MIN = -713,
112
113#ifndef __DOXYGEN__
114 /** Used internally to mean that no response is needed; never returned to
115 * an application. */
116 NETIO_NO_RESPONSE = 1
117#endif
118} netio_error_t;
119
120/** @} */
121
122#endif /* __NETIO_ERRORS_H__ */
diff --git a/arch/tile/include/hv/netio_intf.h b/arch/tile/include/hv/netio_intf.h
new file mode 100644
index 000000000000..8d20972aba2c
--- /dev/null
+++ b/arch/tile/include/hv/netio_intf.h
@@ -0,0 +1,2975 @@
1/*
2 * Copyright 2010 Tilera Corporation. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for
12 * more details.
13 */
14
15/**
16 * NetIO interface structures and macros.
17 */
18
19#ifndef __NETIO_INTF_H__
20#define __NETIO_INTF_H__
21
22#include <hv/netio_errors.h>
23
24#ifdef __KERNEL__
25#include <linux/types.h>
26#else
27#include <stdint.h>
28#endif
29
30#if !defined(__HV__) && !defined(__BOGUX__) && !defined(__KERNEL__)
31#include <assert.h>
32#define netio_assert assert /**< Enable assertions from macros */
33#else
34#define netio_assert(...) ((void)(0)) /**< Disable assertions from macros */
35#endif
36
37/*
38 * If none of these symbols are defined, we're building libnetio in an
39 * environment where we have pthreads, so we'll enable locking.
40 */
41#if !defined(__HV__) && !defined(__BOGUX__) && !defined(__KERNEL__) && \
42 !defined(__NEWLIB__)
43#define _NETIO_PTHREAD /**< Include a mutex in netio_queue_t below */
44
45/*
46 * If NETIO_UNLOCKED is defined, we don't do use per-cpu locks on
47 * per-packet NetIO operations. We still do pthread locking on things
48 * like netio_input_register, though. This is used for building
49 * libnetio_unlocked.
50 */
51#ifndef NETIO_UNLOCKED
52
53/* Avoid PLT overhead by using our own inlined per-cpu lock. */
54#include <sched.h>
55typedef int _netio_percpu_mutex_t;
56
57static __inline int
58_netio_percpu_mutex_init(_netio_percpu_mutex_t* lock)
59{
60 *lock = 0;
61 return 0;
62}
63
64static __inline int
65_netio_percpu_mutex_lock(_netio_percpu_mutex_t* lock)
66{
67 while (__builtin_expect(__insn_tns(lock), 0))
68 sched_yield();
69 return 0;
70}
71
72static __inline int
73_netio_percpu_mutex_unlock(_netio_percpu_mutex_t* lock)
74{
75 *lock = 0;
76 return 0;
77}
78
79#else /* NETIO_UNLOCKED */
80
81/* Don't do any locking for per-packet NetIO operations. */
82typedef int _netio_percpu_mutex_t;
83#define _netio_percpu_mutex_init(L)
84#define _netio_percpu_mutex_lock(L)
85#define _netio_percpu_mutex_unlock(L)
86
87#endif /* NETIO_UNLOCKED */
88#endif /* !__HV__, !__BOGUX, !__KERNEL__, !__NEWLIB__ */
89
90/** How many tiles can register for a given queue.
91 * @ingroup setup */
92#define NETIO_MAX_TILES_PER_QUEUE 64
93
94
95/** Largest permissible queue identifier.
96 * @ingroup setup */
97#define NETIO_MAX_QUEUE_ID 255
98
99
100#ifndef __DOXYGEN__
101
102/* Metadata packet checksum/ethertype flags. */
103
104/** The L4 checksum has not been calculated. */
105#define _NETIO_PKT_NO_L4_CSUM_SHIFT 0
106#define _NETIO_PKT_NO_L4_CSUM_RMASK 1
107#define _NETIO_PKT_NO_L4_CSUM_MASK \
108 (_NETIO_PKT_NO_L4_CSUM_RMASK << _NETIO_PKT_NO_L4_CSUM_SHIFT)
109
110/** The L3 checksum has not been calculated. */
111#define _NETIO_PKT_NO_L3_CSUM_SHIFT 1
112#define _NETIO_PKT_NO_L3_CSUM_RMASK 1
113#define _NETIO_PKT_NO_L3_CSUM_MASK \
114 (_NETIO_PKT_NO_L3_CSUM_RMASK << _NETIO_PKT_NO_L3_CSUM_SHIFT)
115
116/** The L3 checksum is incorrect (or perhaps has not been calculated). */
117#define _NETIO_PKT_BAD_L3_CSUM_SHIFT 2
118#define _NETIO_PKT_BAD_L3_CSUM_RMASK 1
119#define _NETIO_PKT_BAD_L3_CSUM_MASK \
120 (_NETIO_PKT_BAD_L3_CSUM_RMASK << _NETIO_PKT_BAD_L3_CSUM_SHIFT)
121
122/** The Ethernet packet type is unrecognized. */
123#define _NETIO_PKT_TYPE_UNRECOGNIZED_SHIFT 3
124#define _NETIO_PKT_TYPE_UNRECOGNIZED_RMASK 1
125#define _NETIO_PKT_TYPE_UNRECOGNIZED_MASK \
126 (_NETIO_PKT_TYPE_UNRECOGNIZED_RMASK << \
127 _NETIO_PKT_TYPE_UNRECOGNIZED_SHIFT)
128
129/* Metadata packet type flags. */
130
131/** Where the packet type bits are; this field is the index into
132 * _netio_pkt_info. */
133#define _NETIO_PKT_TYPE_SHIFT 4
134#define _NETIO_PKT_TYPE_RMASK 0x3F
135
136/** How many VLAN tags the packet has, and, if we have two, which one we
137 * actually grouped on. A VLAN within a proprietary (Marvell or Broadcom)
138 * tag is counted here. */
139#define _NETIO_PKT_VLAN_SHIFT 4
140#define _NETIO_PKT_VLAN_RMASK 0x3
141#define _NETIO_PKT_VLAN_MASK \
142 (_NETIO_PKT_VLAN_RMASK << _NETIO_PKT_VLAN_SHIFT)
143#define _NETIO_PKT_VLAN_NONE 0 /* No VLAN tag. */
144#define _NETIO_PKT_VLAN_ONE 1 /* One VLAN tag. */
145#define _NETIO_PKT_VLAN_TWO_OUTER 2 /* Two VLAN tags, outer one used. */
146#define _NETIO_PKT_VLAN_TWO_INNER 3 /* Two VLAN tags, inner one used. */
147
148/** Which proprietary tags the packet has. */
149#define _NETIO_PKT_TAG_SHIFT 6
150#define _NETIO_PKT_TAG_RMASK 0x3
151#define _NETIO_PKT_TAG_MASK \
152 (_NETIO_PKT_TAG_RMASK << _NETIO_PKT_TAG_SHIFT)
153#define _NETIO_PKT_TAG_NONE 0 /* No proprietary tags. */
154#define _NETIO_PKT_TAG_MRVL 1 /* Marvell HyperG.Stack tags. */
155#define _NETIO_PKT_TAG_MRVL_EXT 2 /* HyperG.Stack extended tags. */
156#define _NETIO_PKT_TAG_BRCM 3 /* Broadcom HiGig tags. */
157
158/** Whether a packet has an LLC + SNAP header. */
159#define _NETIO_PKT_SNAP_SHIFT 8
160#define _NETIO_PKT_SNAP_RMASK 0x1
161#define _NETIO_PKT_SNAP_MASK \
162 (_NETIO_PKT_SNAP_RMASK << _NETIO_PKT_SNAP_SHIFT)
163
164/* NOTE: Bits 9 and 10 are unused. */
165
166/** Length of any custom data before the L2 header, in words. */
167#define _NETIO_PKT_CUSTOM_LEN_SHIFT 11
168#define _NETIO_PKT_CUSTOM_LEN_RMASK 0x1F
169#define _NETIO_PKT_CUSTOM_LEN_MASK \
170 (_NETIO_PKT_CUSTOM_LEN_RMASK << _NETIO_PKT_CUSTOM_LEN_SHIFT)
171
172/** The L4 checksum is incorrect (or perhaps has not been calculated). */
173#define _NETIO_PKT_BAD_L4_CSUM_SHIFT 16
174#define _NETIO_PKT_BAD_L4_CSUM_RMASK 0x1
175#define _NETIO_PKT_BAD_L4_CSUM_MASK \
176 (_NETIO_PKT_BAD_L4_CSUM_RMASK << _NETIO_PKT_BAD_L4_CSUM_SHIFT)
177
178/** Length of the L2 header, in words. */
179#define _NETIO_PKT_L2_LEN_SHIFT 17
180#define _NETIO_PKT_L2_LEN_RMASK 0x1F
181#define _NETIO_PKT_L2_LEN_MASK \
182 (_NETIO_PKT_L2_LEN_RMASK << _NETIO_PKT_L2_LEN_SHIFT)
183
184
185/* Flags in minimal packet metadata. */
186
187/** We need an eDMA checksum on this packet. */
188#define _NETIO_PKT_NEED_EDMA_CSUM_SHIFT 0
189#define _NETIO_PKT_NEED_EDMA_CSUM_RMASK 1
190#define _NETIO_PKT_NEED_EDMA_CSUM_MASK \
191 (_NETIO_PKT_NEED_EDMA_CSUM_RMASK << _NETIO_PKT_NEED_EDMA_CSUM_SHIFT)
192
193/* Data within the packet information table. */
194
195/* Note that, for efficiency, code which uses these fields assumes that none
196 * of the shift values below are zero. See uses below for an explanation. */
197
198/** Offset within the L2 header of the innermost ethertype (in halfwords). */
199#define _NETIO_PKT_INFO_ETYPE_SHIFT 6
200#define _NETIO_PKT_INFO_ETYPE_RMASK 0x1F
201
202/** Offset within the L2 header of the VLAN tag (in halfwords). */
203#define _NETIO_PKT_INFO_VLAN_SHIFT 11
204#define _NETIO_PKT_INFO_VLAN_RMASK 0x1F
205
206#endif
207
208
209/** The size of a memory buffer representing a small packet.
210 * @ingroup egress */
211#define SMALL_PACKET_SIZE 256
212
213/** The size of a memory buffer representing a large packet.
214 * @ingroup egress */
215#define LARGE_PACKET_SIZE 2048
216
217/** The size of a memory buffer representing a jumbo packet.
218 * @ingroup egress */
219#define JUMBO_PACKET_SIZE (12 * 1024)
220
221
222/* Common ethertypes.
223 * @ingroup ingress */
224/** @{ */
225/** The ethertype of IPv4. */
226#define ETHERTYPE_IPv4 (0x0800)
227/** The ethertype of ARP. */
228#define ETHERTYPE_ARP (0x0806)
229/** The ethertype of VLANs. */
230#define ETHERTYPE_VLAN (0x8100)
231/** The ethertype of a Q-in-Q header. */
232#define ETHERTYPE_Q_IN_Q (0x9100)
233/** The ethertype of IPv6. */
234#define ETHERTYPE_IPv6 (0x86DD)
235/** The ethertype of MPLS. */
236#define ETHERTYPE_MPLS (0x8847)
237/** @} */
238
239
240/** The possible return values of NETIO_PKT_STATUS.
241 * @ingroup ingress
242 */
243typedef enum
244{
245 /** No problems were detected with this packet. */
246 NETIO_PKT_STATUS_OK,
247 /** The packet is undersized; this is expected behavior if the packet's
248 * ethertype is unrecognized, but otherwise the packet is likely corrupt. */
249 NETIO_PKT_STATUS_UNDERSIZE,
250 /** The packet is oversized and some trailing bytes have been discarded.
251 This is expected behavior for short packets, since it's impossible to
252 precisely determine the amount of padding which may have been added to
253 them to make them meet the minimum Ethernet packet size. */
254 NETIO_PKT_STATUS_OVERSIZE,
255 /** The packet was judged to be corrupt by hardware (for instance, it had
256 a bad CRC, or part of it was discarded due to lack of buffer space in
257 the I/O shim) and should be discarded. */
258 NETIO_PKT_STATUS_BAD
259} netio_pkt_status_t;
260
261
262/** Log2 of how many buckets we have. */
263#define NETIO_LOG2_NUM_BUCKETS (10)
264
265/** How many buckets we have.
266 * @ingroup ingress */
267#define NETIO_NUM_BUCKETS (1 << NETIO_LOG2_NUM_BUCKETS)
268
269
270/**
271 * @brief A group-to-bucket identifier.
272 *
273 * @ingroup setup
274 *
275 * This tells us what to do with a given group.
276 */
277typedef union {
278 /** The header broken down into bits. */
279 struct {
280 /** Whether we should balance on L4, if available */
281 unsigned int __balance_on_l4:1;
282 /** Whether we should balance on L3, if available */
283 unsigned int __balance_on_l3:1;
284 /** Whether we should balance on L2, if available */
285 unsigned int __balance_on_l2:1;
286 /** Reserved for future use */
287 unsigned int __reserved:1;
288 /** The base bucket to use to send traffic */
289 unsigned int __bucket_base:NETIO_LOG2_NUM_BUCKETS;
290 /** The mask to apply to the balancing value. This must be one less
291 * than a power of two, e.g. 0x3 or 0xFF.
292 */
293 unsigned int __bucket_mask:NETIO_LOG2_NUM_BUCKETS;
294 /** Pad to 32 bits */
295 unsigned int __padding:(32 - 4 - 2 * NETIO_LOG2_NUM_BUCKETS);
296 } bits;
297 /** To send out the IDN. */
298 unsigned int word;
299}
300netio_group_t;
301
302
303/**
304 * @brief A VLAN-to-bucket identifier.
305 *
306 * @ingroup setup
307 *
308 * This tells us what to do with a given VLAN.
309 */
310typedef netio_group_t netio_vlan_t;
311
312
313/**
314 * A bucket-to-queue mapping.
315 * @ingroup setup
316 */
317typedef unsigned char netio_bucket_t;
318
319
320/**
321 * A packet size can always fit in a netio_size_t.
322 * @ingroup setup
323 */
324typedef unsigned int netio_size_t;
325
326
327/**
328 * @brief Ethernet standard (ingress) packet metadata.
329 *
330 * @ingroup ingress
331 *
332 * This is additional data associated with each packet.
333 * This structure is opaque and accessed through the @ref ingress.
334 *
335 * Also, the buffer population operation currently assumes that standard
336 * metadata is at least as large as minimal metadata, and will need to be
337 * modified if that is no longer the case.
338 */
339typedef struct
340{
341#ifdef __DOXYGEN__
342 /** This structure is opaque. */
343 unsigned char opaque[24];
344#else
345 /** The overall ordinal of the packet */
346 unsigned int __packet_ordinal;
347 /** The ordinal of the packet within the group */
348 unsigned int __group_ordinal;
349 /** The best flow hash IPP could compute. */
350 unsigned int __flow_hash;
351 /** Flags pertaining to checksum calculation, packet type, etc. */
352 unsigned int __flags;
353 /** The first word of "user data". */
354 unsigned int __user_data_0;
355 /** The second word of "user data". */
356 unsigned int __user_data_1;
357#endif
358}
359netio_pkt_metadata_t;
360
361
362/** To ensure that the L3 header is aligned mod 4, the L2 header should be
363 * aligned mod 4 plus 2, since every supported L2 header is 4n + 2 bytes
364 * long. The standard way to do this is to simply add 2 bytes of padding
365 * before the L2 header.
366 */
367#define NETIO_PACKET_PADDING 2
368
369
370
371/**
372 * @brief Ethernet minimal (egress) packet metadata.
373 *
374 * @ingroup egress
375 *
376 * This structure represents information about packets which have
377 * been processed by @ref netio_populate_buffer() or
378 * @ref netio_populate_prepend_buffer(). This structure is opaque
379 * and accessed through the @ref egress.
380 *
381 * @internal This structure is actually copied into the memory used by
382 * standard metadata, which is assumed to be large enough.
383 */
384typedef struct
385{
386#ifdef __DOXYGEN__
387 /** This structure is opaque. */
388 unsigned char opaque[14];
389#else
390 /** The offset of the L2 header from the start of the packet data. */
391 unsigned short l2_offset;
392 /** The offset of the L3 header from the start of the packet data. */
393 unsigned short l3_offset;
394 /** Where to write the checksum. */
395 unsigned char csum_location;
396 /** Where to start checksumming from. */
397 unsigned char csum_start;
398 /** Flags pertaining to checksum calculation etc. */
399 unsigned short flags;
400 /** The L2 length of the packet. */
401 unsigned short l2_length;
402 /** The checksum with which to seed the checksum generator. */
403 unsigned short csum_seed;
404 /** How much to checksum. */
405 unsigned short csum_length;
406#endif
407}
408netio_pkt_minimal_metadata_t;
409
410
411#ifndef __DOXYGEN__
412
413/**
414 * @brief An I/O notification header.
415 *
416 * This is the first word of data received from an I/O shim in a notification
417 * packet. It contains framing and status information.
418 */
419typedef union
420{
421 unsigned int word; /**< The whole word. */
422 /** The various fields. */
423 struct
424 {
425 unsigned int __channel:7; /**< Resource channel. */
426 unsigned int __type:4; /**< Type. */
427 unsigned int __ack:1; /**< Whether an acknowledgement is needed. */
428 unsigned int __reserved:1; /**< Reserved. */
429 unsigned int __protocol:1; /**< A protocol-specific word is added. */
430 unsigned int __status:2; /**< Status of the transfer. */
431 unsigned int __framing:2; /**< Framing of the transfer. */
432 unsigned int __transfer_size:14; /**< Transfer size in bytes (total). */
433 } bits;
434}
435__netio_pkt_notif_t;
436
437
438/**
439 * Returns the base address of the packet.
440 */
441#define _NETIO_PKT_HANDLE_BASE(p) \
442 ((unsigned char*)((p).word & 0xFFFFFFC0))
443
444/**
445 * Returns the base address of the packet.
446 */
447#define _NETIO_PKT_BASE(p) \
448 _NETIO_PKT_HANDLE_BASE(p->__packet)
449
450/**
451 * @brief An I/O notification packet (second word)
452 *
453 * This is the second word of data received from an I/O shim in a notification
454 * packet. This is the virtual address of the packet buffer, plus some flag
455 * bits. (The virtual address of the packet is always 256-byte aligned so we
456 * have room for 8 bits' worth of flags in the low 8 bits.)
457 *
458 * @internal
459 * NOTE: The low two bits must contain "__queue", so the "packet size"
460 * (SIZE_SMALL, SIZE_LARGE, or SIZE_JUMBO) can be determined quickly.
461 *
462 * If __addr or __offset are moved, _NETIO_PKT_BASE
463 * (defined right below this) must be changed.
464 */
465typedef union
466{
467 unsigned int word; /**< The whole word. */
468 /** The various fields. */
469 struct
470 {
471 /** Which queue the packet will be returned to once it is sent back to
472 the IPP. This is one of the SIZE_xxx values. */
473 unsigned int __queue:2;
474
475 /** The IPP handle of the sending IPP. */
476 unsigned int __ipp_handle:2;
477
478 /** Reserved for future use. */
479 unsigned int __reserved:1;
480
481 /** If 1, this packet has minimal (egress) metadata; otherwise, it
482 has standard (ingress) metadata. */
483 unsigned int __minimal:1;
484
485 /** Offset of the metadata within the packet. This value is multiplied
486 * by 64 and added to the base packet address to get the metadata
487 * address. Note that this field is aligned within the word such that
488 * you can easily extract the metadata address with a 26-bit mask. */
489 unsigned int __offset:2;
490
491 /** The top 24 bits of the packet's virtual address. */
492 unsigned int __addr:24;
493 } bits;
494}
495__netio_pkt_handle_t;
496
497#endif /* !__DOXYGEN__ */
498
499
500/**
501 * @brief A handle for an I/O packet's storage.
502 * @ingroup ingress
503 *
504 * netio_pkt_handle_t encodes the concept of a ::netio_pkt_t with its
505 * packet metadata removed. It is a much smaller type that exists to
506 * facilitate applications where the full ::netio_pkt_t type is too
507 * large, such as those that cache enormous numbers of packets or wish
508 * to transmit packet descriptors over the UDN.
509 *
510 * Because there is no metadata, most ::netio_pkt_t operations cannot be
511 * performed on a netio_pkt_handle_t. It supports only
512 * netio_free_handle() (to free the buffer) and
513 * NETIO_PKT_CUSTOM_DATA_H() (to access a pointer to its contents).
514 * The application must acquire any additional metadata it wants from the
515 * original ::netio_pkt_t and record it separately.
516 *
517 * A netio_pkt_handle_t can be extracted from a ::netio_pkt_t by calling
518 * NETIO_PKT_HANDLE(). An invalid handle (analogous to NULL) can be
519 * created by assigning the value ::NETIO_PKT_HANDLE_NONE. A handle can
520 * be tested for validity with NETIO_PKT_HANDLE_IS_VALID().
521 */
522typedef struct
523{
524 unsigned int word; /**< Opaque bits. */
525} netio_pkt_handle_t;
526
527/**
528 * @brief A packet descriptor.
529 *
530 * @ingroup ingress
531 * @ingroup egress
532 *
533 * This data structure represents a packet. The structure is manipulated
534 * through the @ref ingress and the @ref egress.
535 *
536 * While the contents of a netio_pkt_t are opaque, the structure itself is
537 * portable. This means that it may be shared between all tiles which have
538 * done a netio_input_register() call for the interface on which the pkt_t
539 * was initially received (via netio_get_packet()) or retrieved (via
540 * netio_get_buffer()). The contents of a netio_pkt_t can be transmitted to
541 * another tile via shared memory, or via a UDN message, or by other means.
542 * The destination tile may then use the pkt_t as if it had originally been
543 * received locally; it may read or write the packet's data, read its
544 * metadata, free the packet, send the packet, transfer the netio_pkt_t to
545 * yet another tile, and so forth.
546 *
547 * Once a netio_pkt_t has been transferred to a second tile, the first tile
548 * should not reference the original copy; in particular, if more than one
549 * tile frees or sends the same netio_pkt_t, the IPP's packet free lists will
550 * become corrupted. Note also that each tile which reads or modifies
551 * packet data must obey the memory coherency rules outlined in @ref input.
552 */
553typedef struct
554{
555#ifdef __DOXYGEN__
556 /** This structure is opaque. */
557 unsigned char opaque[32];
558#else
559 /** For an ingress packet (one with standard metadata), this is the
560 * notification header we got from the I/O shim. For an egress packet
561 * (one with minimal metadata), this word is zero if the packet has not
562 * been populated, and nonzero if it has. */
563 __netio_pkt_notif_t __notif_header;
564
565 /** Virtual address of the packet buffer, plus state flags. */
566 __netio_pkt_handle_t __packet;
567
568 /** Metadata associated with the packet. */
569 netio_pkt_metadata_t __metadata;
570#endif
571}
572netio_pkt_t;
573
574
575#ifndef __DOXYGEN__
576
577#define __NETIO_PKT_NOTIF_HEADER(pkt) ((pkt)->__notif_header)
578#define __NETIO_PKT_IPP_HANDLE(pkt) ((pkt)->__packet.bits.__ipp_handle)
579#define __NETIO_PKT_QUEUE(pkt) ((pkt)->__packet.bits.__queue)
580#define __NETIO_PKT_NOTIF_HEADER_M(mda, pkt) ((pkt)->__notif_header)
581#define __NETIO_PKT_IPP_HANDLE_M(mda, pkt) ((pkt)->__packet.bits.__ipp_handle)
582#define __NETIO_PKT_MINIMAL(pkt) ((pkt)->__packet.bits.__minimal)
583#define __NETIO_PKT_QUEUE_M(mda, pkt) ((pkt)->__packet.bits.__queue)
584#define __NETIO_PKT_FLAGS_M(mda, pkt) ((mda)->__flags)
585
586/* Packet information table, used by the attribute access functions below. */
587extern const uint16_t _netio_pkt_info[];
588
589#endif /* __DOXYGEN__ */
590
591
592#ifndef __DOXYGEN__
593/* These macros are deprecated and will disappear in a future MDE release. */
594#define NETIO_PKT_GOOD_CHECKSUM(pkt) \
595 NETIO_PKT_L4_CSUM_CORRECT(pkt)
596#define NETIO_PKT_GOOD_CHECKSUM_M(mda, pkt) \
597 NETIO_PKT_L4_CSUM_CORRECT_M(mda, pkt)
598#endif /* __DOXYGEN__ */
599
600
601/* Packet attribute access functions. */
602
603/** Return a pointer to the metadata for a packet.
604 * @ingroup ingress
605 *
606 * Calling this function once and passing the result to other retrieval
607 * functions with a "_M" suffix usually improves performance. This
608 * function must be called on an 'ingress' packet (i.e. one retrieved
609 * by @ref netio_get_packet(), on which @ref netio_populate_buffer() or
610 * @ref netio_populate_prepend_buffer have not been called). Use of this
611 * function on an 'egress' packet will cause an assertion failure.
612 *
613 * @param[in] pkt Packet on which to operate.
614 * @return A pointer to the packet's standard metadata.
615 */
616static __inline netio_pkt_metadata_t*
617NETIO_PKT_METADATA(netio_pkt_t* pkt)
618{
619 netio_assert(!pkt->__packet.bits.__minimal);
620 return &pkt->__metadata;
621}
622
623
624/** Return a pointer to the minimal metadata for a packet.
625 * @ingroup egress
626 *
627 * Calling this function once and passing the result to other retrieval
628 * functions with a "_MM" suffix usually improves performance. This
629 * function must be called on an 'egress' packet (i.e. one on which
630 * @ref netio_populate_buffer() or @ref netio_populate_prepend_buffer()
631 * have been called, or one retrieved by @ref netio_get_buffer()). Use of
632 * this function on an 'ingress' packet will cause an assertion failure.
633 *
634 * @param[in] pkt Packet on which to operate.
635 * @return A pointer to the packet's standard metadata.
636 */
637static __inline netio_pkt_minimal_metadata_t*
638NETIO_PKT_MINIMAL_METADATA(netio_pkt_t* pkt)
639{
640 netio_assert(pkt->__packet.bits.__minimal);
641 return (netio_pkt_minimal_metadata_t*) &pkt->__metadata;
642}
643
644
645/** Determine whether a packet has 'minimal' metadata.
646 * @ingroup pktfuncs
647 *
648 * This function will return nonzero if the packet is an 'egress'
649 * packet (i.e. one on which @ref netio_populate_buffer() or
650 * @ref netio_populate_prepend_buffer() have been called, or one
651 * retrieved by @ref netio_get_buffer()), and zero if the packet
652 * is an 'ingress' packet (i.e. one retrieved by @ref netio_get_packet(),
653 * which has not been converted into an 'egress' packet).
654 *
655 * @param[in] pkt Packet on which to operate.
656 * @return Nonzero if the packet has minimal metadata.
657 */
658static __inline unsigned int
659NETIO_PKT_IS_MINIMAL(netio_pkt_t* pkt)
660{
661 return pkt->__packet.bits.__minimal;
662}
663
664
665/** Return a handle for a packet's storage.
666 * @ingroup pktfuncs
667 *
668 * @param[in] pkt Packet on which to operate.
669 * @return A handle for the packet's storage.
670 */
671static __inline netio_pkt_handle_t
672NETIO_PKT_HANDLE(netio_pkt_t* pkt)
673{
674 netio_pkt_handle_t h;
675 h.word = pkt->__packet.word;
676 return h;
677}
678
679
680/** A special reserved value indicating the absence of a packet handle.
681 *
682 * @ingroup pktfuncs
683 */
684#define NETIO_PKT_HANDLE_NONE ((netio_pkt_handle_t) { 0 })
685
686
687/** Test whether a packet handle is valid.
688 *
689 * Applications may wish to use the reserved value NETIO_PKT_HANDLE_NONE
690 * to indicate no packet at all. This function tests to see if a packet
691 * handle is a real handle, not this special reserved value.
692 *
693 * @ingroup pktfuncs
694 *
695 * @param[in] handle Handle on which to operate.
696 * @return One if the packet handle is valid, else zero.
697 */
698static __inline unsigned int
699NETIO_PKT_HANDLE_IS_VALID(netio_pkt_handle_t handle)
700{
701 return handle.word != 0;
702}
703
704
705
706/** Return a pointer to the start of the packet's custom header.
707 * A custom header may or may not be present, depending upon the IPP; its
708 * contents and alignment are also IPP-dependent. Currently, none of the
709 * standard IPPs supplied by Tilera produce a custom header. If present,
710 * the custom header precedes the L2 header in the packet buffer.
711 * @ingroup ingress
712 *
713 * @param[in] handle Handle on which to operate.
714 * @return A pointer to start of the packet.
715 */
716static __inline unsigned char*
717NETIO_PKT_CUSTOM_DATA_H(netio_pkt_handle_t handle)
718{
719 return _NETIO_PKT_HANDLE_BASE(handle) + NETIO_PACKET_PADDING;
720}
721
722
723/** Return the length of the packet's custom header.
724 * A custom header may or may not be present, depending upon the IPP; its
725 * contents and alignment are also IPP-dependent. Currently, none of the
726 * standard IPPs supplied by Tilera produce a custom header. If present,
727 * the custom header precedes the L2 header in the packet buffer.
728 *
729 * @ingroup ingress
730 *
731 * @param[in] mda Pointer to packet's standard metadata.
732 * @param[in] pkt Packet on which to operate.
733 * @return The length of the packet's custom header, in bytes.
734 */
735static __inline netio_size_t
736NETIO_PKT_CUSTOM_HEADER_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
737{
738 /*
739 * Note that we effectively need to extract a quantity from the flags word
740 * which is measured in words, and then turn it into bytes by shifting
741 * it left by 2. We do this all at once by just shifting right two less
742 * bits, and shifting the mask up two bits.
743 */
744 return ((mda->__flags >> (_NETIO_PKT_CUSTOM_LEN_SHIFT - 2)) &
745 (_NETIO_PKT_CUSTOM_LEN_RMASK << 2));
746}
747
748
749/** Return the length of the packet, starting with the custom header.
750 * A custom header may or may not be present, depending upon the IPP; its
751 * contents and alignment are also IPP-dependent. Currently, none of the
752 * standard IPPs supplied by Tilera produce a custom header. If present,
753 * the custom header precedes the L2 header in the packet buffer.
754 * @ingroup ingress
755 *
756 * @param[in] mda Pointer to packet's standard metadata.
757 * @param[in] pkt Packet on which to operate.
758 * @return The length of the packet, in bytes.
759 */
760static __inline netio_size_t
761NETIO_PKT_CUSTOM_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
762{
763 return (__NETIO_PKT_NOTIF_HEADER(pkt).bits.__transfer_size -
764 NETIO_PACKET_PADDING);
765}
766
767
768/** Return a pointer to the start of the packet's custom header.
769 * A custom header may or may not be present, depending upon the IPP; its
770 * contents and alignment are also IPP-dependent. Currently, none of the
771 * standard IPPs supplied by Tilera produce a custom header. If present,
772 * the custom header precedes the L2 header in the packet buffer.
773 * @ingroup ingress
774 *
775 * @param[in] mda Pointer to packet's standard metadata.
776 * @param[in] pkt Packet on which to operate.
777 * @return A pointer to start of the packet.
778 */
779static __inline unsigned char*
780NETIO_PKT_CUSTOM_DATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
781{
782 return NETIO_PKT_CUSTOM_DATA_H(NETIO_PKT_HANDLE(pkt));
783}
784
785
786/** Return the length of the packet's L2 (Ethernet plus VLAN or SNAP) header.
787 * @ingroup ingress
788 *
789 * @param[in] mda Pointer to packet's standard metadata.
790 * @param[in] pkt Packet on which to operate.
791 * @return The length of the packet's L2 header, in bytes.
792 */
793static __inline netio_size_t
794NETIO_PKT_L2_HEADER_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
795{
796 /*
797 * Note that we effectively need to extract a quantity from the flags word
798 * which is measured in words, and then turn it into bytes by shifting
799 * it left by 2. We do this all at once by just shifting right two less
800 * bits, and shifting the mask up two bits. We then add two bytes.
801 */
802 return ((mda->__flags >> (_NETIO_PKT_L2_LEN_SHIFT - 2)) &
803 (_NETIO_PKT_L2_LEN_RMASK << 2)) + 2;
804}
805
806
807/** Return the length of the packet, starting with the L2 (Ethernet) header.
808 * @ingroup ingress
809 *
810 * @param[in] mda Pointer to packet's standard metadata.
811 * @param[in] pkt Packet on which to operate.
812 * @return The length of the packet, in bytes.
813 */
814static __inline netio_size_t
815NETIO_PKT_L2_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
816{
817 return (NETIO_PKT_CUSTOM_LENGTH_M(mda, pkt) -
818 NETIO_PKT_CUSTOM_HEADER_LENGTH_M(mda,pkt));
819}
820
821
822/** Return a pointer to the start of the packet's L2 (Ethernet) header.
823 * @ingroup ingress
824 *
825 * @param[in] mda Pointer to packet's standard metadata.
826 * @param[in] pkt Packet on which to operate.
827 * @return A pointer to start of the packet.
828 */
829static __inline unsigned char*
830NETIO_PKT_L2_DATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
831{
832 return (NETIO_PKT_CUSTOM_DATA_M(mda, pkt) +
833 NETIO_PKT_CUSTOM_HEADER_LENGTH_M(mda, pkt));
834}
835
836
837/** Retrieve the length of the packet, starting with the L3 (generally,
838 * the IP) header.
839 * @ingroup ingress
840 *
841 * @param[in] mda Pointer to packet's standard metadata.
842 * @param[in] pkt Packet on which to operate.
843 * @return Length of the packet's L3 header and data, in bytes.
844 */
845static __inline netio_size_t
846NETIO_PKT_L3_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
847{
848 return (NETIO_PKT_L2_LENGTH_M(mda, pkt) -
849 NETIO_PKT_L2_HEADER_LENGTH_M(mda,pkt));
850}
851
852
853/** Return a pointer to the packet's L3 (generally, the IP) header.
854 * @ingroup ingress
855 *
856 * Note that we guarantee word alignment of the L3 header.
857 *
858 * @param[in] mda Pointer to packet's standard metadata.
859 * @param[in] pkt Packet on which to operate.
860 * @return A pointer to the packet's L3 header.
861 */
862static __inline unsigned char*
863NETIO_PKT_L3_DATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
864{
865 return (NETIO_PKT_L2_DATA_M(mda, pkt) +
866 NETIO_PKT_L2_HEADER_LENGTH_M(mda, pkt));
867}
868
869
870/** Return the ordinal of the packet.
871 * @ingroup ingress
872 *
873 * Each packet is given an ordinal number when it is delivered by the IPP.
874 * In the medium term, the ordinal is unique and monotonically increasing,
875 * being incremented by 1 for each packet; the ordinal of the first packet
876 * delivered after the IPP starts is zero. (Since the ordinal is of finite
877 * size, given enough input packets, it will eventually wrap around to zero;
878 * in the long term, therefore, ordinals are not unique.) The ordinals
879 * handed out by different IPPs are not disjoint, so two packets from
880 * different IPPs may have identical ordinals. Packets dropped by the
881 * IPP or by the I/O shim are not assigned ordinals.
882 *
883 * @param[in] mda Pointer to packet's standard metadata.
884 * @param[in] pkt Packet on which to operate.
885 * @return The packet's per-IPP packet ordinal.
886 */
887static __inline unsigned int
888NETIO_PKT_ORDINAL_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
889{
890 return mda->__packet_ordinal;
891}
892
893
894/** Return the per-group ordinal of the packet.
895 * @ingroup ingress
896 *
897 * Each packet is given a per-group ordinal number when it is
898 * delivered by the IPP. By default, the group is the packet's VLAN,
899 * although IPP can be recompiled to use different values. In
900 * the medium term, the ordinal is unique and monotonically
901 * increasing, being incremented by 1 for each packet; the ordinal of
902 * the first packet distributed to a particular group is zero.
903 * (Since the ordinal is of finite size, given enough input packets,
904 * it will eventually wrap around to zero; in the long term,
905 * therefore, ordinals are not unique.) The ordinals handed out by
906 * different IPPs are not disjoint, so two packets from different IPPs
907 * may have identical ordinals; similarly, packets distributed to
908 * different groups may have identical ordinals. Packets dropped by
909 * the IPP or by the I/O shim are not assigned ordinals.
910 *
911 * @param[in] mda Pointer to packet's standard metadata.
912 * @param[in] pkt Packet on which to operate.
913 * @return The packet's per-IPP, per-group ordinal.
914 */
915static __inline unsigned int
916NETIO_PKT_GROUP_ORDINAL_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
917{
918 return mda->__group_ordinal;
919}
920
921
922/** Return the VLAN ID assigned to the packet.
923 * @ingroup ingress
924 *
925 * This value is usually contained within the packet header.
926 *
927 * This value will be zero if the packet does not have a VLAN tag, or if
928 * this value was not extracted from the packet.
929 *
930 * @param[in] mda Pointer to packet's standard metadata.
931 * @param[in] pkt Packet on which to operate.
932 * @return The packet's VLAN ID.
933 */
934static __inline unsigned short
935NETIO_PKT_VLAN_ID_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
936{
937 int vl = (mda->__flags >> _NETIO_PKT_VLAN_SHIFT) & _NETIO_PKT_VLAN_RMASK;
938 unsigned short* pkt_p;
939 int index;
940 unsigned short val;
941
942 if (vl == _NETIO_PKT_VLAN_NONE)
943 return 0;
944
945 pkt_p = (unsigned short*) NETIO_PKT_L2_DATA_M(mda, pkt);
946 index = (mda->__flags >> _NETIO_PKT_TYPE_SHIFT) & _NETIO_PKT_TYPE_RMASK;
947
948 val = pkt_p[(_netio_pkt_info[index] >> _NETIO_PKT_INFO_VLAN_SHIFT) &
949 _NETIO_PKT_INFO_VLAN_RMASK];
950
951#ifdef __TILECC__
952 return (__insn_bytex(val) >> 16) & 0xFFF;
953#else
954 return (__builtin_bswap32(val) >> 16) & 0xFFF;
955#endif
956}
957
958
959/** Return the ethertype of the packet.
960 * @ingroup ingress
961 *
962 * This value is usually contained within the packet header.
963 *
964 * This value is reliable if @ref NETIO_PKT_ETHERTYPE_RECOGNIZED_M()
965 * returns true, and otherwise, may not be well defined.
966 *
967 * @param[in] mda Pointer to packet's standard metadata.
968 * @param[in] pkt Packet on which to operate.
969 * @return The packet's ethertype.
970 */
971static __inline unsigned short
972NETIO_PKT_ETHERTYPE_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
973{
974 unsigned short* pkt_p = (unsigned short*) NETIO_PKT_L2_DATA_M(mda, pkt);
975 int index = (mda->__flags >> _NETIO_PKT_TYPE_SHIFT) & _NETIO_PKT_TYPE_RMASK;
976
977 unsigned short val =
978 pkt_p[(_netio_pkt_info[index] >> _NETIO_PKT_INFO_ETYPE_SHIFT) &
979 _NETIO_PKT_INFO_ETYPE_RMASK];
980
981 return __builtin_bswap32(val) >> 16;
982}
983
984
985/** Return the flow hash computed on the packet.
986 * @ingroup ingress
987 *
988 * For TCP and UDP packets, this hash is calculated by hashing together
989 * the "5-tuple" values, specifically the source IP address, destination
990 * IP address, protocol type, source port and destination port.
991 * The hash value is intended to be helpful for millions of distinct
992 * flows.
993 *
994 * For IPv4 or IPv6 packets which are neither TCP nor UDP, the flow hash is
995 * derived by hashing together the source and destination IP addresses.
996 *
997 * For MPLS-encapsulated packets, the flow hash is derived by hashing
998 * the first MPLS label.
999 *
1000 * For all other packets the flow hash is computed from the source
1001 * and destination Ethernet addresses.
1002 *
1003 * The hash is symmetric, meaning it produces the same value if the
1004 * source and destination are swapped. The only exceptions are
1005 * tunneling protocols 0x04 (IP in IP Encapsulation), 0x29 (Simple
1006 * Internet Protocol), 0x2F (General Routing Encapsulation) and 0x32
1007 * (Encap Security Payload), which use only the destination address
1008 * since the source address is not meaningful.
1009 *
1010 * @param[in] mda Pointer to packet's standard metadata.
1011 * @param[in] pkt Packet on which to operate.
1012 * @return The packet's 32-bit flow hash.
1013 */
1014static __inline unsigned int
1015NETIO_PKT_FLOW_HASH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
1016{
1017 return mda->__flow_hash;
1018}
1019
1020
1021/** Return the first word of "user data" for the packet.
1022 *
1023 * The contents of the user data words depend on the IPP.
1024 *
1025 * When using the standard ipp1, ipp2, or ipp4 sub-drivers, the first
1026 * word of user data contains the least significant bits of the 64-bit
1027 * arrival cycle count (see @c get_cycle_count_low()).
1028 *
1029 * See the <em>System Programmer's Guide</em> for details.
1030 *
1031 * @ingroup ingress
1032 *
1033 * @param[in] mda Pointer to packet's standard metadata.
1034 * @param[in] pkt Packet on which to operate.
1035 * @return The packet's first word of "user data".
1036 */
1037static __inline unsigned int
1038NETIO_PKT_USER_DATA_0_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
1039{
1040 return mda->__user_data_0;
1041}
1042
1043
1044/** Return the second word of "user data" for the packet.
1045 *
1046 * The contents of the user data words depend on the IPP.
1047 *
1048 * When using the standard ipp1, ipp2, or ipp4 sub-drivers, the second
1049 * word of user data contains the most significant bits of the 64-bit
1050 * arrival cycle count (see @c get_cycle_count_high()).
1051 *
1052 * See the <em>System Programmer's Guide</em> for details.
1053 *
1054 * @ingroup ingress
1055 *
1056 * @param[in] mda Pointer to packet's standard metadata.
1057 * @param[in] pkt Packet on which to operate.
1058 * @return The packet's second word of "user data".
1059 */
1060static __inline unsigned int
1061NETIO_PKT_USER_DATA_1_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
1062{
1063 return mda->__user_data_1;
1064}
1065
1066
1067/** Determine whether the L4 (TCP/UDP) checksum was calculated.
1068 * @ingroup ingress
1069 *
1070 * @param[in] mda Pointer to packet's standard metadata.
1071 * @param[in] pkt Packet on which to operate.
1072 * @return Nonzero if the L4 checksum was calculated.
1073 */
1074static __inline unsigned int
1075NETIO_PKT_L4_CSUM_CALCULATED_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
1076{
1077 return !(mda->__flags & _NETIO_PKT_NO_L4_CSUM_MASK);
1078}
1079
1080
1081/** Determine whether the L4 (TCP/UDP) checksum was calculated and found to
1082 * be correct.
1083 * @ingroup ingress
1084 *
1085 * @param[in] mda Pointer to packet's standard metadata.
1086 * @param[in] pkt Packet on which to operate.
1087 * @return Nonzero if the checksum was calculated and is correct.
1088 */
1089static __inline unsigned int
1090NETIO_PKT_L4_CSUM_CORRECT_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
1091{
1092 return !(mda->__flags &
1093 (_NETIO_PKT_BAD_L4_CSUM_MASK | _NETIO_PKT_NO_L4_CSUM_MASK));
1094}
1095
1096
1097/** Determine whether the L3 (IP) checksum was calculated.
1098 * @ingroup ingress
1099 *
1100 * @param[in] mda Pointer to packet's standard metadata.
1101 * @param[in] pkt Packet on which to operate.
1102 * @return Nonzero if the L3 (IP) checksum was calculated.
1103*/
1104static __inline unsigned int
1105NETIO_PKT_L3_CSUM_CALCULATED_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
1106{
1107 return !(mda->__flags & _NETIO_PKT_NO_L3_CSUM_MASK);
1108}
1109
1110
1111/** Determine whether the L3 (IP) checksum was calculated and found to be
1112 * correct.
1113 * @ingroup ingress
1114 *
1115 * @param[in] mda Pointer to packet's standard metadata.
1116 * @param[in] pkt Packet on which to operate.
1117 * @return Nonzero if the checksum was calculated and is correct.
1118 */
1119static __inline unsigned int
1120NETIO_PKT_L3_CSUM_CORRECT_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
1121{
1122 return !(mda->__flags &
1123 (_NETIO_PKT_BAD_L3_CSUM_MASK | _NETIO_PKT_NO_L3_CSUM_MASK));
1124}
1125
1126
1127/** Determine whether the ethertype was recognized and L3 packet data was
1128 * processed.
1129 * @ingroup ingress
1130 *
1131 * @param[in] mda Pointer to packet's standard metadata.
1132 * @param[in] pkt Packet on which to operate.
1133 * @return Nonzero if the ethertype was recognized and L3 packet data was
1134 * processed.
1135 */
1136static __inline unsigned int
1137NETIO_PKT_ETHERTYPE_RECOGNIZED_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
1138{
1139 return !(mda->__flags & _NETIO_PKT_TYPE_UNRECOGNIZED_MASK);
1140}
1141
1142
1143/** Retrieve the status of a packet and any errors that may have occurred
1144 * during ingress processing (length mismatches, CRC errors, etc.).
1145 * @ingroup ingress
1146 *
1147 * Note that packets for which @ref NETIO_PKT_ETHERTYPE_RECOGNIZED()
1148 * returns zero are always reported as underlength, as there is no a priori
1149 * means to determine their length. Normally, applications should use
1150 * @ref NETIO_PKT_BAD_M() instead of explicitly checking status with this
1151 * function.
1152 *
1153 * @param[in] mda Pointer to packet's standard metadata.
1154 * @param[in] pkt Packet on which to operate.
1155 * @return The packet's status.
1156 */
1157static __inline netio_pkt_status_t
1158NETIO_PKT_STATUS_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
1159{
1160 return (netio_pkt_status_t) __NETIO_PKT_NOTIF_HEADER(pkt).bits.__status;
1161}
1162
1163
1164/** Report whether a packet is bad (i.e., was shorter than expected based on
1165 * its headers, or had a bad CRC).
1166 * @ingroup ingress
1167 *
1168 * Note that this function does not verify L3 or L4 checksums.
1169 *
1170 * @param[in] mda Pointer to packet's standard metadata.
1171 * @param[in] pkt Packet on which to operate.
1172 * @return Nonzero if the packet is bad and should be discarded.
1173 */
1174static __inline unsigned int
1175NETIO_PKT_BAD_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
1176{
1177 return ((NETIO_PKT_STATUS_M(mda, pkt) & 1) &&
1178 (NETIO_PKT_ETHERTYPE_RECOGNIZED_M(mda, pkt) ||
1179 NETIO_PKT_STATUS_M(mda, pkt) == NETIO_PKT_STATUS_BAD));
1180}
1181
1182
1183/** Return the length of the packet, starting with the L2 (Ethernet) header.
1184 * @ingroup egress
1185 *
1186 * @param[in] mmd Pointer to packet's minimal metadata.
1187 * @param[in] pkt Packet on which to operate.
1188 * @return The length of the packet, in bytes.
1189 */
1190static __inline netio_size_t
1191NETIO_PKT_L2_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt)
1192{
1193 return mmd->l2_length;
1194}
1195
1196
1197/** Return the length of the L2 (Ethernet) header.
1198 * @ingroup egress
1199 *
1200 * @param[in] mmd Pointer to packet's minimal metadata.
1201 * @param[in] pkt Packet on which to operate.
1202 * @return The length of the packet's L2 header, in bytes.
1203 */
1204static __inline netio_size_t
1205NETIO_PKT_L2_HEADER_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd,
1206 netio_pkt_t* pkt)
1207{
1208 return mmd->l3_offset - mmd->l2_offset;
1209}
1210
1211
1212/** Return the length of the packet, starting with the L3 (IP) header.
1213 * @ingroup egress
1214 *
1215 * @param[in] mmd Pointer to packet's minimal metadata.
1216 * @param[in] pkt Packet on which to operate.
1217 * @return Length of the packet's L3 header and data, in bytes.
1218 */
1219static __inline netio_size_t
1220NETIO_PKT_L3_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt)
1221{
1222 return (NETIO_PKT_L2_LENGTH_MM(mmd, pkt) -
1223 NETIO_PKT_L2_HEADER_LENGTH_MM(mmd, pkt));
1224}
1225
1226
1227/** Return a pointer to the packet's L3 (generally, the IP) header.
1228 * @ingroup egress
1229 *
1230 * Note that we guarantee word alignment of the L3 header.
1231 *
1232 * @param[in] mmd Pointer to packet's minimal metadata.
1233 * @param[in] pkt Packet on which to operate.
1234 * @return A pointer to the packet's L3 header.
1235 */
1236static __inline unsigned char*
1237NETIO_PKT_L3_DATA_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt)
1238{
1239 return _NETIO_PKT_BASE(pkt) + mmd->l3_offset;
1240}
1241
1242
1243/** Return a pointer to the packet's L2 (Ethernet) header.
1244 * @ingroup egress
1245 *
1246 * @param[in] mmd Pointer to packet's minimal metadata.
1247 * @param[in] pkt Packet on which to operate.
1248 * @return A pointer to start of the packet.
1249 */
1250static __inline unsigned char*
1251NETIO_PKT_L2_DATA_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt)
1252{
1253 return _NETIO_PKT_BASE(pkt) + mmd->l2_offset;
1254}
1255
1256
1257/** Retrieve the status of a packet and any errors that may have occurred
1258 * during ingress processing (length mismatches, CRC errors, etc.).
1259 * @ingroup ingress
1260 *
1261 * Note that packets for which @ref NETIO_PKT_ETHERTYPE_RECOGNIZED()
1262 * returns zero are always reported as underlength, as there is no a priori
1263 * means to determine their length. Normally, applications should use
1264 * @ref NETIO_PKT_BAD() instead of explicitly checking status with this
1265 * function.
1266 *
1267 * @param[in] pkt Packet on which to operate.
1268 * @return The packet's status.
1269 */
1270static __inline netio_pkt_status_t
1271NETIO_PKT_STATUS(netio_pkt_t* pkt)
1272{
1273 netio_assert(!pkt->__packet.bits.__minimal);
1274
1275 return (netio_pkt_status_t) __NETIO_PKT_NOTIF_HEADER(pkt).bits.__status;
1276}
1277
1278
1279/** Report whether a packet is bad (i.e., was shorter than expected based on
1280 * its headers, or had a bad CRC).
1281 * @ingroup ingress
1282 *
1283 * Note that this function does not verify L3 or L4 checksums.
1284 *
1285 * @param[in] pkt Packet on which to operate.
1286 * @return Nonzero if the packet is bad and should be discarded.
1287 */
1288static __inline unsigned int
1289NETIO_PKT_BAD(netio_pkt_t* pkt)
1290{
1291 netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
1292
1293 return NETIO_PKT_BAD_M(mda, pkt);
1294}
1295
1296
1297/** Return the length of the packet's custom header.
1298 * A custom header may or may not be present, depending upon the IPP; its
1299 * contents and alignment are also IPP-dependent. Currently, none of the
1300 * standard IPPs supplied by Tilera produce a custom header. If present,
1301 * the custom header precedes the L2 header in the packet buffer.
1302 * @ingroup pktfuncs
1303 *
1304 * @param[in] pkt Packet on which to operate.
1305 * @return The length of the packet's custom header, in bytes.
1306 */
1307static __inline netio_size_t
1308NETIO_PKT_CUSTOM_HEADER_LENGTH(netio_pkt_t* pkt)
1309{
1310 netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
1311
1312 return NETIO_PKT_CUSTOM_HEADER_LENGTH_M(mda, pkt);
1313}
1314
1315
1316/** Return the length of the packet, starting with the custom header.
1317 * A custom header may or may not be present, depending upon the IPP; its
1318 * contents and alignment are also IPP-dependent. Currently, none of the
1319 * standard IPPs supplied by Tilera produce a custom header. If present,
1320 * the custom header precedes the L2 header in the packet buffer.
1321 * @ingroup pktfuncs
1322 *
1323 * @param[in] pkt Packet on which to operate.
1324 * @return The length of the packet, in bytes.
1325 */
1326static __inline netio_size_t
1327NETIO_PKT_CUSTOM_LENGTH(netio_pkt_t* pkt)
1328{
1329 netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
1330
1331 return NETIO_PKT_CUSTOM_LENGTH_M(mda, pkt);
1332}
1333
1334
1335/** Return a pointer to the packet's custom header.
1336 * A custom header may or may not be present, depending upon the IPP; its
1337 * contents and alignment are also IPP-dependent. Currently, none of the
1338 * standard IPPs supplied by Tilera produce a custom header. If present,
1339 * the custom header precedes the L2 header in the packet buffer.
1340 * @ingroup pktfuncs
1341 *
1342 * @param[in] pkt Packet on which to operate.
1343 * @return A pointer to start of the packet.
1344 */
1345static __inline unsigned char*
1346NETIO_PKT_CUSTOM_DATA(netio_pkt_t* pkt)
1347{
1348 netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
1349
1350 return NETIO_PKT_CUSTOM_DATA_M(mda, pkt);
1351}
1352
1353
1354/** Return the length of the packet's L2 (Ethernet plus VLAN or SNAP) header.
1355 * @ingroup pktfuncs
1356 *
1357 * @param[in] pkt Packet on which to operate.
1358 * @return The length of the packet's L2 header, in bytes.
1359 */
1360static __inline netio_size_t
1361NETIO_PKT_L2_HEADER_LENGTH(netio_pkt_t* pkt)
1362{
1363 if (NETIO_PKT_IS_MINIMAL(pkt))
1364 {
1365 netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
1366
1367 return NETIO_PKT_L2_HEADER_LENGTH_MM(mmd, pkt);
1368 }
1369 else
1370 {
1371 netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
1372
1373 return NETIO_PKT_L2_HEADER_LENGTH_M(mda, pkt);
1374 }
1375}
1376
1377
1378/** Return the length of the packet, starting with the L2 (Ethernet) header.
1379 * @ingroup pktfuncs
1380 *
1381 * @param[in] pkt Packet on which to operate.
1382 * @return The length of the packet, in bytes.
1383 */
1384static __inline netio_size_t
1385NETIO_PKT_L2_LENGTH(netio_pkt_t* pkt)
1386{
1387 if (NETIO_PKT_IS_MINIMAL(pkt))
1388 {
1389 netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
1390
1391 return NETIO_PKT_L2_LENGTH_MM(mmd, pkt);
1392 }
1393 else
1394 {
1395 netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
1396
1397 return NETIO_PKT_L2_LENGTH_M(mda, pkt);
1398 }
1399}
1400
1401
1402/** Return a pointer to the packet's L2 (Ethernet) header.
1403 * @ingroup pktfuncs
1404 *
1405 * @param[in] pkt Packet on which to operate.
1406 * @return A pointer to start of the packet.
1407 */
1408static __inline unsigned char*
1409NETIO_PKT_L2_DATA(netio_pkt_t* pkt)
1410{
1411 if (NETIO_PKT_IS_MINIMAL(pkt))
1412 {
1413 netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
1414
1415 return NETIO_PKT_L2_DATA_MM(mmd, pkt);
1416 }
1417 else
1418 {
1419 netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
1420
1421 return NETIO_PKT_L2_DATA_M(mda, pkt);
1422 }
1423}
1424
1425
1426/** Retrieve the length of the packet, starting with the L3 (generally, the IP)
1427 * header.
1428 * @ingroup pktfuncs
1429 *
1430 * @param[in] pkt Packet on which to operate.
1431 * @return Length of the packet's L3 header and data, in bytes.
1432 */
1433static __inline netio_size_t
1434NETIO_PKT_L3_LENGTH(netio_pkt_t* pkt)
1435{
1436 if (NETIO_PKT_IS_MINIMAL(pkt))
1437 {
1438 netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
1439
1440 return NETIO_PKT_L3_LENGTH_MM(mmd, pkt);
1441 }
1442 else
1443 {
1444 netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
1445
1446 return NETIO_PKT_L3_LENGTH_M(mda, pkt);
1447 }
1448}
1449
1450
1451/** Return a pointer to the packet's L3 (generally, the IP) header.
1452 * @ingroup pktfuncs
1453 *
1454 * Note that we guarantee word alignment of the L3 header.
1455 *
1456 * @param[in] pkt Packet on which to operate.
1457 * @return A pointer to the packet's L3 header.
1458 */
1459static __inline unsigned char*
1460NETIO_PKT_L3_DATA(netio_pkt_t* pkt)
1461{
1462 if (NETIO_PKT_IS_MINIMAL(pkt))
1463 {
1464 netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
1465
1466 return NETIO_PKT_L3_DATA_MM(mmd, pkt);
1467 }
1468 else
1469 {
1470 netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
1471
1472 return NETIO_PKT_L3_DATA_M(mda, pkt);
1473 }
1474}
1475
1476
1477/** Return the ordinal of the packet.
1478 * @ingroup ingress
1479 *
1480 * Each packet is given an ordinal number when it is delivered by the IPP.
1481 * In the medium term, the ordinal is unique and monotonically increasing,
1482 * being incremented by 1 for each packet; the ordinal of the first packet
1483 * delivered after the IPP starts is zero. (Since the ordinal is of finite
1484 * size, given enough input packets, it will eventually wrap around to zero;
1485 * in the long term, therefore, ordinals are not unique.) The ordinals
1486 * handed out by different IPPs are not disjoint, so two packets from
1487 * different IPPs may have identical ordinals. Packets dropped by the
1488 * IPP or by the I/O shim are not assigned ordinals.
1489 *
1490 *
1491 * @param[in] pkt Packet on which to operate.
1492 * @return The packet's per-IPP packet ordinal.
1493 */
1494static __inline unsigned int
1495NETIO_PKT_ORDINAL(netio_pkt_t* pkt)
1496{
1497 netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
1498
1499 return NETIO_PKT_ORDINAL_M(mda, pkt);
1500}
1501
1502
1503/** Return the per-group ordinal of the packet.
1504 * @ingroup ingress
1505 *
1506 * Each packet is given a per-group ordinal number when it is
1507 * delivered by the IPP. By default, the group is the packet's VLAN,
1508 * although IPP can be recompiled to use different values. In
1509 * the medium term, the ordinal is unique and monotonically
1510 * increasing, being incremented by 1 for each packet; the ordinal of
1511 * the first packet distributed to a particular group is zero.
1512 * (Since the ordinal is of finite size, given enough input packets,
1513 * it will eventually wrap around to zero; in the long term,
1514 * therefore, ordinals are not unique.) The ordinals handed out by
1515 * different IPPs are not disjoint, so two packets from different IPPs
1516 * may have identical ordinals; similarly, packets distributed to
1517 * different groups may have identical ordinals. Packets dropped by
1518 * the IPP or by the I/O shim are not assigned ordinals.
1519 *
1520 * @param[in] pkt Packet on which to operate.
1521 * @return The packet's per-IPP, per-group ordinal.
1522 */
1523static __inline unsigned int
1524NETIO_PKT_GROUP_ORDINAL(netio_pkt_t* pkt)
1525{
1526 netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
1527
1528 return NETIO_PKT_GROUP_ORDINAL_M(mda, pkt);
1529}
1530
1531
1532/** Return the VLAN ID assigned to the packet.
1533 * @ingroup ingress
1534 *
1535 * This is usually also contained within the packet header. If the packet
1536 * does not have a VLAN tag, the VLAN ID returned by this function is zero.
1537 *
1538 * @param[in] pkt Packet on which to operate.
1539 * @return The packet's VLAN ID.
1540 */
1541static __inline unsigned short
1542NETIO_PKT_VLAN_ID(netio_pkt_t* pkt)
1543{
1544 netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
1545
1546 return NETIO_PKT_VLAN_ID_M(mda, pkt);
1547}
1548
1549
1550/** Return the ethertype of the packet.
1551 * @ingroup ingress
1552 *
1553 * This value is reliable if @ref NETIO_PKT_ETHERTYPE_RECOGNIZED()
1554 * returns true, and otherwise, may not be well defined.
1555 *
1556 * @param[in] pkt Packet on which to operate.
1557 * @return The packet's ethertype.
1558 */
1559static __inline unsigned short
1560NETIO_PKT_ETHERTYPE(netio_pkt_t* pkt)
1561{
1562 netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
1563
1564 return NETIO_PKT_ETHERTYPE_M(mda, pkt);
1565}
1566
1567
1568/** Return the flow hash computed on the packet.
1569 * @ingroup ingress
1570 *
1571 * For TCP and UDP packets, this hash is calculated by hashing together
1572 * the "5-tuple" values, specifically the source IP address, destination
1573 * IP address, protocol type, source port and destination port.
1574 * The hash value is intended to be helpful for millions of distinct
1575 * flows.
1576 *
1577 * For IPv4 or IPv6 packets which are neither TCP nor UDP, the flow hash is
1578 * derived by hashing together the source and destination IP addresses.
1579 *
1580 * For MPLS-encapsulated packets, the flow hash is derived by hashing
1581 * the first MPLS label.
1582 *
1583 * For all other packets the flow hash is computed from the source
1584 * and destination Ethernet addresses.
1585 *
1586 * The hash is symmetric, meaning it produces the same value if the
1587 * source and destination are swapped. The only exceptions are
1588 * tunneling protocols 0x04 (IP in IP Encapsulation), 0x29 (Simple
1589 * Internet Protocol), 0x2F (General Routing Encapsulation) and 0x32
1590 * (Encap Security Payload), which use only the destination address
1591 * since the source address is not meaningful.
1592 *
1593 * @param[in] pkt Packet on which to operate.
1594 * @return The packet's 32-bit flow hash.
1595 */
1596static __inline unsigned int
1597NETIO_PKT_FLOW_HASH(netio_pkt_t* pkt)
1598{
1599 netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
1600
1601 return NETIO_PKT_FLOW_HASH_M(mda, pkt);
1602}
1603
1604
1605/** Return the first word of "user data" for the packet.
1606 *
1607 * The contents of the user data words depend on the IPP.
1608 *
1609 * When using the standard ipp1, ipp2, or ipp4 sub-drivers, the first
1610 * word of user data contains the least significant bits of the 64-bit
1611 * arrival cycle count (see @c get_cycle_count_low()).
1612 *
1613 * See the <em>System Programmer's Guide</em> for details.
1614 *
1615 * @ingroup ingress
1616 *
1617 * @param[in] pkt Packet on which to operate.
1618 * @return The packet's first word of "user data".
1619 */
1620static __inline unsigned int
1621NETIO_PKT_USER_DATA_0(netio_pkt_t* pkt)
1622{
1623 netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
1624
1625 return NETIO_PKT_USER_DATA_0_M(mda, pkt);
1626}
1627
1628
1629/** Return the second word of "user data" for the packet.
1630 *
1631 * The contents of the user data words depend on the IPP.
1632 *
1633 * When using the standard ipp1, ipp2, or ipp4 sub-drivers, the second
1634 * word of user data contains the most significant bits of the 64-bit
1635 * arrival cycle count (see @c get_cycle_count_high()).
1636 *
1637 * See the <em>System Programmer's Guide</em> for details.
1638 *
1639 * @ingroup ingress
1640 *
1641 * @param[in] pkt Packet on which to operate.
1642 * @return The packet's second word of "user data".
1643 */
1644static __inline unsigned int
1645NETIO_PKT_USER_DATA_1(netio_pkt_t* pkt)
1646{
1647 netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
1648
1649 return NETIO_PKT_USER_DATA_1_M(mda, pkt);
1650}
1651
1652
1653/** Determine whether the L4 (TCP/UDP) checksum was calculated.
1654 * @ingroup ingress
1655 *
1656 * @param[in] pkt Packet on which to operate.
1657 * @return Nonzero if the L4 checksum was calculated.
1658 */
1659static __inline unsigned int
1660NETIO_PKT_L4_CSUM_CALCULATED(netio_pkt_t* pkt)
1661{
1662 netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
1663
1664 return NETIO_PKT_L4_CSUM_CALCULATED_M(mda, pkt);
1665}
1666
1667
1668/** Determine whether the L4 (TCP/UDP) checksum was calculated and found to
1669 * be correct.
1670 * @ingroup ingress
1671 *
1672 * @param[in] pkt Packet on which to operate.
1673 * @return Nonzero if the checksum was calculated and is correct.
1674 */
1675static __inline unsigned int
1676NETIO_PKT_L4_CSUM_CORRECT(netio_pkt_t* pkt)
1677{
1678 netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
1679
1680 return NETIO_PKT_L4_CSUM_CORRECT_M(mda, pkt);
1681}
1682
1683
1684/** Determine whether the L3 (IP) checksum was calculated.
1685 * @ingroup ingress
1686 *
1687 * @param[in] pkt Packet on which to operate.
1688 * @return Nonzero if the L3 (IP) checksum was calculated.
1689*/
1690static __inline unsigned int
1691NETIO_PKT_L3_CSUM_CALCULATED(netio_pkt_t* pkt)
1692{
1693 netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
1694
1695 return NETIO_PKT_L3_CSUM_CALCULATED_M(mda, pkt);
1696}
1697
1698
1699/** Determine whether the L3 (IP) checksum was calculated and found to be
1700 * correct.
1701 * @ingroup ingress
1702 *
1703 * @param[in] pkt Packet on which to operate.
1704 * @return Nonzero if the checksum was calculated and is correct.
1705 */
1706static __inline unsigned int
1707NETIO_PKT_L3_CSUM_CORRECT(netio_pkt_t* pkt)
1708{
1709 netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
1710
1711 return NETIO_PKT_L3_CSUM_CORRECT_M(mda, pkt);
1712}
1713
1714
1715/** Determine whether the Ethertype was recognized and L3 packet data was
1716 * processed.
1717 * @ingroup ingress
1718 *
1719 * @param[in] pkt Packet on which to operate.
1720 * @return Nonzero if the Ethertype was recognized and L3 packet data was
1721 * processed.
1722 */
1723static __inline unsigned int
1724NETIO_PKT_ETHERTYPE_RECOGNIZED(netio_pkt_t* pkt)
1725{
1726 netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
1727
1728 return NETIO_PKT_ETHERTYPE_RECOGNIZED_M(mda, pkt);
1729}
1730
1731
1732/** Set an egress packet's L2 length, using a metadata pointer to speed the
1733 * computation.
1734 * @ingroup egress
1735 *
1736 * @param[in,out] mmd Pointer to packet's minimal metadata.
1737 * @param[in] pkt Packet on which to operate.
1738 * @param[in] len Packet L2 length, in bytes.
1739 */
1740static __inline void
1741NETIO_PKT_SET_L2_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt,
1742 int len)
1743{
1744 mmd->l2_length = len;
1745}
1746
1747
1748/** Set an egress packet's L2 length.
1749 * @ingroup egress
1750 *
1751 * @param[in,out] pkt Packet on which to operate.
1752 * @param[in] len Packet L2 length, in bytes.
1753 */
1754static __inline void
1755NETIO_PKT_SET_L2_LENGTH(netio_pkt_t* pkt, int len)
1756{
1757 netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
1758
1759 NETIO_PKT_SET_L2_LENGTH_MM(mmd, pkt, len);
1760}
1761
1762
1763/** Set an egress packet's L2 header length, using a metadata pointer to
1764 * speed the computation.
1765 * @ingroup egress
1766 *
1767 * It is not normally necessary to call this routine; only the L2 length,
1768 * not the header length, is needed to transmit a packet. It may be useful if
1769 * the egress packet will later be processed by code which expects to use
1770 * functions like @ref NETIO_PKT_L3_DATA() to get a pointer to the L3 payload.
1771 *
1772 * @param[in,out] mmd Pointer to packet's minimal metadata.
1773 * @param[in] pkt Packet on which to operate.
1774 * @param[in] len Packet L2 header length, in bytes.
1775 */
1776static __inline void
1777NETIO_PKT_SET_L2_HEADER_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd,
1778 netio_pkt_t* pkt, int len)
1779{
1780 mmd->l3_offset = mmd->l2_offset + len;
1781}
1782
1783
1784/** Set an egress packet's L2 header length.
1785 * @ingroup egress
1786 *
1787 * It is not normally necessary to call this routine; only the L2 length,
1788 * not the header length, is needed to transmit a packet. It may be useful if
1789 * the egress packet will later be processed by code which expects to use
1790 * functions like @ref NETIO_PKT_L3_DATA() to get a pointer to the L3 payload.
1791 *
1792 * @param[in,out] pkt Packet on which to operate.
1793 * @param[in] len Packet L2 header length, in bytes.
1794 */
1795static __inline void
1796NETIO_PKT_SET_L2_HEADER_LENGTH(netio_pkt_t* pkt, int len)
1797{
1798 netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
1799
1800 NETIO_PKT_SET_L2_HEADER_LENGTH_MM(mmd, pkt, len);
1801}
1802
1803
1804/** Set up an egress packet for hardware checksum computation, using a
1805 * metadata pointer to speed the operation.
1806 * @ingroup egress
1807 *
1808 * NetIO provides the ability to automatically calculate a standard
1809 * 16-bit Internet checksum on transmitted packets. The application
1810 * may specify the point in the packet where the checksum starts, the
1811 * number of bytes to be checksummed, and the two bytes in the packet
1812 * which will be replaced with the completed checksum. (If the range
1813 * of bytes to be checksummed includes the bytes to be replaced, the
1814 * initial values of those bytes will be included in the checksum.)
1815 *
1816 * For some protocols, the packet checksum covers data which is not present
1817 * in the packet, or is at least not contiguous to the main data payload.
1818 * For instance, the TCP checksum includes a "pseudo-header" which includes
1819 * the source and destination IP addresses of the packet. To accommodate
1820 * this, the checksum engine may be "seeded" with an initial value, which
1821 * the application would need to compute based on the specific protocol's
1822 * requirements. Note that the seed is given in host byte order (little-
1823 * endian), not network byte order (big-endian); code written to compute a
1824 * pseudo-header checksum in network byte order will need to byte-swap it
1825 * before use as the seed.
1826 *
1827 * Note that the checksum is computed as part of the transmission process,
1828 * so it will not be present in the packet upon completion of this routine.
1829 *
1830 * @param[in,out] mmd Pointer to packet's minimal metadata.
1831 * @param[in] pkt Packet on which to operate.
1832 * @param[in] start Offset within L2 packet of the first byte to include in
1833 * the checksum.
1834 * @param[in] length Number of bytes to include in the checksum.
1835 * the checksum.
1836 * @param[in] location Offset within L2 packet of the first of the two bytes
1837 * to be replaced with the calculated checksum.
1838 * @param[in] seed Initial value of the running checksum before any of the
1839 * packet data is added.
1840 */
1841static __inline void
1842NETIO_PKT_DO_EGRESS_CSUM_MM(netio_pkt_minimal_metadata_t* mmd,
1843 netio_pkt_t* pkt, int start, int length,
1844 int location, uint16_t seed)
1845{
1846 mmd->csum_start = start;
1847 mmd->csum_length = length;
1848 mmd->csum_location = location;
1849 mmd->csum_seed = seed;
1850 mmd->flags |= _NETIO_PKT_NEED_EDMA_CSUM_MASK;
1851}
1852
1853
1854/** Set up an egress packet for hardware checksum computation.
1855 * @ingroup egress
1856 *
1857 * NetIO provides the ability to automatically calculate a standard
1858 * 16-bit Internet checksum on transmitted packets. The application
1859 * may specify the point in the packet where the checksum starts, the
1860 * number of bytes to be checksummed, and the two bytes in the packet
1861 * which will be replaced with the completed checksum. (If the range
1862 * of bytes to be checksummed includes the bytes to be replaced, the
1863 * initial values of those bytes will be included in the checksum.)
1864 *
1865 * For some protocols, the packet checksum covers data which is not present
1866 * in the packet, or is at least not contiguous to the main data payload.
1867 * For instance, the TCP checksum includes a "pseudo-header" which includes
1868 * the source and destination IP addresses of the packet. To accommodate
1869 * this, the checksum engine may be "seeded" with an initial value, which
1870 * the application would need to compute based on the specific protocol's
1871 * requirements. Note that the seed is given in host byte order (little-
1872 * endian), not network byte order (big-endian); code written to compute a
1873 * pseudo-header checksum in network byte order will need to byte-swap it
1874 * before use as the seed.
1875 *
1876 * Note that the checksum is computed as part of the transmission process,
1877 * so it will not be present in the packet upon completion of this routine.
1878 *
1879 * @param[in,out] pkt Packet on which to operate.
1880 * @param[in] start Offset within L2 packet of the first byte to include in
1881 * the checksum.
1882 * @param[in] length Number of bytes to include in the checksum.
1883 * the checksum.
1884 * @param[in] location Offset within L2 packet of the first of the two bytes
1885 * to be replaced with the calculated checksum.
1886 * @param[in] seed Initial value of the running checksum before any of the
1887 * packet data is added.
1888 */
1889static __inline void
1890NETIO_PKT_DO_EGRESS_CSUM(netio_pkt_t* pkt, int start, int length,
1891 int location, uint16_t seed)
1892{
1893 netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
1894
1895 NETIO_PKT_DO_EGRESS_CSUM_MM(mmd, pkt, start, length, location, seed);
1896}
1897
1898
1899/** Return the number of bytes which could be prepended to a packet, using a
1900 * metadata pointer to speed the operation.
1901 * See @ref netio_populate_prepend_buffer() to get a full description of
1902 * prepending.
1903 *
1904 * @param[in,out] mda Pointer to packet's standard metadata.
1905 * @param[in] pkt Packet on which to operate.
1906 */
1907static __inline int
1908NETIO_PKT_PREPEND_AVAIL_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
1909{
1910 return (pkt->__packet.bits.__offset << 6) +
1911 NETIO_PKT_CUSTOM_HEADER_LENGTH_M(mda, pkt);
1912}
1913
1914
1915/** Return the number of bytes which could be prepended to a packet, using a
1916 * metadata pointer to speed the operation.
1917 * See @ref netio_populate_prepend_buffer() to get a full description of
1918 * prepending.
1919 * @ingroup egress
1920 *
1921 * @param[in,out] mmd Pointer to packet's minimal metadata.
1922 * @param[in] pkt Packet on which to operate.
1923 */
1924static __inline int
1925NETIO_PKT_PREPEND_AVAIL_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt)
1926{
1927 return (pkt->__packet.bits.__offset << 6) + mmd->l2_offset;
1928}
1929
1930
1931/** Return the number of bytes which could be prepended to a packet.
1932 * See @ref netio_populate_prepend_buffer() to get a full description of
1933 * prepending.
1934 * @ingroup egress
1935 *
1936 * @param[in] pkt Packet on which to operate.
1937 */
1938static __inline int
1939NETIO_PKT_PREPEND_AVAIL(netio_pkt_t* pkt)
1940{
1941 if (NETIO_PKT_IS_MINIMAL(pkt))
1942 {
1943 netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
1944
1945 return NETIO_PKT_PREPEND_AVAIL_MM(mmd, pkt);
1946 }
1947 else
1948 {
1949 netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
1950
1951 return NETIO_PKT_PREPEND_AVAIL_M(mda, pkt);
1952 }
1953}
1954
1955
1956/** Flush a packet's minimal metadata from the cache, using a metadata pointer
1957 * to speed the operation.
1958 * @ingroup egress
1959 *
1960 * @param[in] mmd Pointer to packet's minimal metadata.
1961 * @param[in] pkt Packet on which to operate.
1962 */
1963static __inline void
1964NETIO_PKT_FLUSH_MINIMAL_METADATA_MM(netio_pkt_minimal_metadata_t* mmd,
1965 netio_pkt_t* pkt)
1966{
1967}
1968
1969
1970/** Invalidate a packet's minimal metadata from the cache, using a metadata
1971 * pointer to speed the operation.
1972 * @ingroup egress
1973 *
1974 * @param[in] mmd Pointer to packet's minimal metadata.
1975 * @param[in] pkt Packet on which to operate.
1976 */
1977static __inline void
1978NETIO_PKT_INV_MINIMAL_METADATA_MM(netio_pkt_minimal_metadata_t* mmd,
1979 netio_pkt_t* pkt)
1980{
1981}
1982
1983
1984/** Flush and then invalidate a packet's minimal metadata from the cache,
1985 * using a metadata pointer to speed the operation.
1986 * @ingroup egress
1987 *
1988 * @param[in] mmd Pointer to packet's minimal metadata.
1989 * @param[in] pkt Packet on which to operate.
1990 */
1991static __inline void
1992NETIO_PKT_FLUSH_INV_MINIMAL_METADATA_MM(netio_pkt_minimal_metadata_t* mmd,
1993 netio_pkt_t* pkt)
1994{
1995}
1996
1997
1998/** Flush a packet's metadata from the cache, using a metadata pointer
1999 * to speed the operation.
2000 * @ingroup ingress
2001 *
2002 * @param[in] mda Pointer to packet's minimal metadata.
2003 * @param[in] pkt Packet on which to operate.
2004 */
2005static __inline void
2006NETIO_PKT_FLUSH_METADATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
2007{
2008}
2009
2010
2011/** Invalidate a packet's metadata from the cache, using a metadata
2012 * pointer to speed the operation.
2013 * @ingroup ingress
2014 *
2015 * @param[in] mda Pointer to packet's metadata.
2016 * @param[in] pkt Packet on which to operate.
2017 */
2018static __inline void
2019NETIO_PKT_INV_METADATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
2020{
2021}
2022
2023
2024/** Flush and then invalidate a packet's metadata from the cache,
2025 * using a metadata pointer to speed the operation.
2026 * @ingroup ingress
2027 *
2028 * @param[in] mda Pointer to packet's metadata.
2029 * @param[in] pkt Packet on which to operate.
2030 */
2031static __inline void
2032NETIO_PKT_FLUSH_INV_METADATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
2033{
2034}
2035
2036
2037/** Flush a packet's minimal metadata from the cache.
2038 * @ingroup egress
2039 *
2040 * @param[in] pkt Packet on which to operate.
2041 */
2042static __inline void
2043NETIO_PKT_FLUSH_MINIMAL_METADATA(netio_pkt_t* pkt)
2044{
2045}
2046
2047
2048/** Invalidate a packet's minimal metadata from the cache.
2049 * @ingroup egress
2050 *
2051 * @param[in] pkt Packet on which to operate.
2052 */
2053static __inline void
2054NETIO_PKT_INV_MINIMAL_METADATA(netio_pkt_t* pkt)
2055{
2056}
2057
2058
2059/** Flush and then invalidate a packet's minimal metadata from the cache.
2060 * @ingroup egress
2061 *
2062 * @param[in] pkt Packet on which to operate.
2063 */
2064static __inline void
2065NETIO_PKT_FLUSH_INV_MINIMAL_METADATA(netio_pkt_t* pkt)
2066{
2067}
2068
2069
2070/** Flush a packet's metadata from the cache.
2071 * @ingroup ingress
2072 *
2073 * @param[in] pkt Packet on which to operate.
2074 */
2075static __inline void
2076NETIO_PKT_FLUSH_METADATA(netio_pkt_t* pkt)
2077{
2078}
2079
2080
2081/** Invalidate a packet's metadata from the cache.
2082 * @ingroup ingress
2083 *
2084 * @param[in] pkt Packet on which to operate.
2085 */
2086static __inline void
2087NETIO_PKT_INV_METADATA(netio_pkt_t* pkt)
2088{
2089}
2090
2091
2092/** Flush and then invalidate a packet's metadata from the cache.
2093 * @ingroup ingress
2094 *
2095 * @param[in] pkt Packet on which to operate.
2096 */
2097static __inline void
2098NETIO_PKT_FLUSH_INV_METADATA(netio_pkt_t* pkt)
2099{
2100}
2101
2102/** Number of NUMA nodes we can distribute buffers to.
2103 * @ingroup setup */
2104#define NETIO_NUM_NODE_WEIGHTS 16
2105
2106/**
2107 * @brief An object for specifying the characteristics of NetIO communication
2108 * endpoint.
2109 *
2110 * @ingroup setup
2111 *
2112 * The @ref netio_input_register() function uses this structure to define
2113 * how an application tile will communicate with an IPP.
2114 *
2115 *
2116 * Future updates to NetIO may add new members to this structure,
2117 * which can affect the success of the registration operation. Thus,
2118 * if dynamically initializing the structure, applications are urged to
2119 * zero it out first, for example:
2120 *
2121 * @code
2122 * netio_input_config_t config;
2123 * memset(&config, 0, sizeof (config));
2124 * config.flags = NETIO_RECV | NETIO_XMIT_CSUM | NETIO_TAG_NONE;
2125 * config.num_receive_packets = NETIO_MAX_RECEIVE_PKTS;
2126 * config.queue_id = 0;
2127 * .
2128 * .
2129 * .
2130 * @endcode
2131 *
2132 * since that guarantees that any unused structure members, including
2133 * members which did not exist when the application was first developed,
2134 * will not have unexpected values.
2135 *
2136 * If statically initializing the structure, we strongly recommend use of
2137 * C99-style named initializers, for example:
2138 *
2139 * @code
2140 * netio_input_config_t config = {
2141 * .flags = NETIO_RECV | NETIO_XMIT_CSUM | NETIO_TAG_NONE,
2142 * .num_receive_packets = NETIO_MAX_RECEIVE_PKTS,
2143 * .queue_id = 0,
2144 * },
2145 * @endcode
2146 *
2147 * instead of the old-style structure initialization:
2148 *
2149 * @code
2150 * // Bad example! Currently equivalent to the above, but don't do this.
2151 * netio_input_config_t config = {
2152 * NETIO_RECV | NETIO_XMIT_CSUM | NETIO_TAG_NONE, NETIO_MAX_RECEIVE_PKTS, 0
2153 * },
2154 * @endcode
2155 *
2156 * since the C99 style requires no changes to the code if elements of the
2157 * config structure are rearranged. (It also makes the initialization much
2158 * easier to understand.)
2159 *
2160 * Except for items which address a particular tile's transmit or receive
2161 * characteristics, such as the ::NETIO_RECV flag, applications are advised
2162 * to specify the same set of configuration data on all registrations.
2163 * This prevents differing results if multiple tiles happen to do their
2164 * registration operations in a different order on different invocations of
2165 * the application. This is particularly important for things like link
2166 * management flags, and buffer size and homing specifications.
2167 *
2168 * Unless the ::NETIO_FIXED_BUFFER_VA flag is specified in flags, the NetIO
2169 * buffer pool is automatically created and mapped into the application's
2170 * virtual address space at an address chosen by the operating system,
2171 * using the common memory (cmem) facility in the Tilera Multicore
2172 * Components library. The cmem facility allows multiple processes to gain
2173 * access to shared memory which is mapped into each process at an
2174 * identical virtual address. In order for this to work, the processes
2175 * must have a common ancestor, which must create the common memory using
2176 * tmc_cmem_init().
2177 *
2178 * In programs using the iLib process creation API, or in programs which use
2179 * only one process (which include programs using the pthreads library),
2180 * tmc_cmem_init() is called automatically. All other applications
2181 * must call it explicitly, before any child processes which might call
2182 * netio_input_register() are created.
2183 */
2184typedef struct
2185{
2186 /** Registration characteristics.
2187
2188 This value determines several characteristics of the registration;
2189 flags for different types of behavior are ORed together to make the
2190 final flag value. Generally applications should specify exactly
2191 one flag from each of the following categories:
2192
2193 - Whether the application will be receiving packets on this queue
2194 (::NETIO_RECV or ::NETIO_NO_RECV).
2195
2196 - Whether the application will be transmitting packets on this queue,
2197 and if so, whether it will request egress checksum calculation
2198 (::NETIO_XMIT, ::NETIO_XMIT_CSUM, or ::NETIO_NO_XMIT). It is
2199 legal to call netio_get_buffer() without one of the XMIT flags,
2200 as long as ::NETIO_RECV is specified; in this case, the retrieved
2201 buffers must be passed to another tile for transmission.
2202
2203 - Whether the application expects any vendor-specific tags in
2204 its packets' L2 headers (::NETIO_TAG_NONE, ::NETIO_TAG_BRCM,
2205 or ::NETIO_TAG_MRVL). This must match the configuration of the
2206 target IPP.
2207
2208 To accommodate applications written to previous versions of the NetIO
2209 interface, none of the flags above are currently required; if omitted,
2210 NetIO behaves more or less as if ::NETIO_RECV | ::NETIO_XMIT_CSUM |
2211 ::NETIO_TAG_NONE were used. However, explicit specification of
2212 the relevant flags allows NetIO to do a better job of resource
2213 allocation, allows earlier detection of certain configuration errors,
2214 and may enable advanced features or higher performance in the future,
2215 so their use is strongly recommended.
2216
2217 Note that specifying ::NETIO_NO_RECV along with ::NETIO_NO_XMIT
2218 is a special case, intended primarily for use by programs which
2219 retrieve network statistics or do link management operations.
2220 When these flags are both specified, the resulting queue may not
2221 be used with NetIO routines other than netio_get(), netio_set(),
2222 and netio_input_unregister(). See @ref link for more information
2223 on link management.
2224
2225 Other flags are optional; their use is described below.
2226 */
2227 int flags;
2228
2229 /** Interface name. This is a string which identifies the specific
2230 Ethernet controller hardware to be used. The format of the string
2231 is a device type and a device index, separated by a slash; so,
2232 the first 10 Gigabit Ethernet controller is named "xgbe/0", while
2233 the second 10/100/1000 Megabit Ethernet controller is named "gbe/1".
2234 */
2235 const char* interface;
2236
2237 /** Receive packet queue size. This specifies the maximum number
2238 of ingress packets that can be received on this queue without
2239 being retrieved by @ref netio_get_packet(). If the IPP's distribution
2240 algorithm calls for a packet to be sent to this queue, and this
2241 number of packets are already pending there, the new packet
2242 will either be discarded, or sent to another tile registered
2243 for the same queue_id (see @ref drops). This value must
2244 be at least ::NETIO_MIN_RECEIVE_PKTS, can always be at least
2245 ::NETIO_MAX_RECEIVE_PKTS, and may be larger than that on certain
2246 interfaces.
2247 */
2248 int num_receive_packets;
2249
2250 /** The queue ID being requested. Legal values for this range from 0
2251 to ::NETIO_MAX_QUEUE_ID, inclusive. ::NETIO_MAX_QUEUE_ID is always
2252 greater than or equal to the number of tiles; this allows one queue
2253 for each tile, plus at least one additional queue. Some applications
2254 may wish to use the additional queue as a destination for unwanted
2255 packets, since packets delivered to queues for which no tiles have
2256 registered are discarded.
2257 */
2258 unsigned int queue_id;
2259
2260 /** Maximum number of small send buffers to be held in the local empty
2261 buffer cache. This specifies the size of the area which holds
2262 empty small egress buffers requested from the IPP but not yet
2263 retrieved via @ref netio_get_buffer(). This value must be greater
2264 than zero if the application will ever use @ref netio_get_buffer()
2265 to allocate empty small egress buffers; it may be no larger than
2266 ::NETIO_MAX_SEND_BUFFERS. See @ref epp for more details on empty
2267 buffer caching.
2268 */
2269 int num_send_buffers_small_total;
2270
2271 /** Number of small send buffers to be preallocated at registration.
2272 If this value is nonzero, the specified number of empty small egress
2273 buffers will be requested from the IPP during the netio_input_register
2274 operation; this may speed the execution of @ref netio_get_buffer().
2275 This may be no larger than @ref num_send_buffers_small_total. See @ref
2276 epp for more details on empty buffer caching.
2277 */
2278 int num_send_buffers_small_prealloc;
2279
2280 /** Maximum number of large send buffers to be held in the local empty
2281 buffer cache. This specifies the size of the area which holds empty
2282 large egress buffers requested from the IPP but not yet retrieved via
2283 @ref netio_get_buffer(). This value must be greater than zero if the
2284 application will ever use @ref netio_get_buffer() to allocate empty
2285 large egress buffers; it may be no larger than ::NETIO_MAX_SEND_BUFFERS.
2286 See @ref epp for more details on empty buffer caching.
2287 */
2288 int num_send_buffers_large_total;
2289
2290 /** Number of large send buffers to be preallocated at registration.
2291 If this value is nonzero, the specified number of empty large egress
2292 buffers will be requested from the IPP during the netio_input_register
2293 operation; this may speed the execution of @ref netio_get_buffer().
2294 This may be no larger than @ref num_send_buffers_large_total. See @ref
2295 epp for more details on empty buffer caching.
2296 */
2297 int num_send_buffers_large_prealloc;
2298
2299 /** Maximum number of jumbo send buffers to be held in the local empty
2300 buffer cache. This specifies the size of the area which holds empty
2301 jumbo egress buffers requested from the IPP but not yet retrieved via
2302 @ref netio_get_buffer(). This value must be greater than zero if the
2303 application will ever use @ref netio_get_buffer() to allocate empty
2304 jumbo egress buffers; it may be no larger than ::NETIO_MAX_SEND_BUFFERS.
2305 See @ref epp for more details on empty buffer caching.
2306 */
2307 int num_send_buffers_jumbo_total;
2308
2309 /** Number of jumbo send buffers to be preallocated at registration.
2310 If this value is nonzero, the specified number of empty jumbo egress
2311 buffers will be requested from the IPP during the netio_input_register
2312 operation; this may speed the execution of @ref netio_get_buffer().
2313 This may be no larger than @ref num_send_buffers_jumbo_total. See @ref
2314 epp for more details on empty buffer caching.
2315 */
2316 int num_send_buffers_jumbo_prealloc;
2317
2318 /** Total packet buffer size. This determines the total size, in bytes,
2319 of the NetIO buffer pool. Note that the maximum number of available
2320 buffers of each size is determined during hypervisor configuration
2321 (see the <em>System Programmer's Guide</em> for details); this just
2322 influences how much host memory is allocated for those buffers.
2323
2324 The buffer pool is allocated from common memory, which will be
2325 automatically initialized if needed. If your buffer pool is larger
2326 than 240 MB, you might need to explicitly call @c tmc_cmem_init(),
2327 as described in the Application Libraries Reference Manual (UG227).
2328
2329 Packet buffers are currently allocated in chunks of 16 MB; this
2330 value will be rounded up to the next larger multiple of 16 MB.
2331 If this value is zero, a default of 32 MB will be used; this was
2332 the value used by previous versions of NetIO. Note that taking this
2333 default also affects the placement of buffers on Linux NUMA nodes.
2334 See @ref buffer_node_weights for an explanation of buffer placement.
2335
2336 In order to successfully allocate packet buffers, Linux must have
2337 available huge pages on the relevant Linux NUMA nodes. See the
2338 <em>System Programmer's Guide</em> for information on configuring
2339 huge page support in Linux.
2340 */
2341 uint64_t total_buffer_size;
2342
2343 /** Buffer placement weighting factors.
2344
2345 This array specifies the relative amount of buffering to place
2346 on each of the available Linux NUMA nodes. This array is
2347 indexed by the NUMA node, and the values in the array are
2348 proportional to the amount of buffer space to allocate on that
2349 node.
2350
2351 If memory striping is enabled in the Hypervisor, then there is
2352 only one logical NUMA node (node 0). In that case, NetIO will by
2353 default ignore the suggested buffer node weights, and buffers
2354 will be striped across the physical memory controllers. See
2355 UG209 System Programmer's Guide for a description of the
2356 hypervisor option that controls memory striping.
2357
2358 If memory striping is disabled, then there are up to four NUMA
2359 nodes, corresponding to the four DDRAM controllers in the TILE
2360 processor architecture. See UG100 Tile Processor Architecture
2361 Overview for a diagram showing the location of each of the DDRAM
2362 controllers relative to the tile array.
2363
2364 For instance, if memory striping is disabled, the following
2365 configuration strucure:
2366
2367 @code
2368 netio_input_config_t config = {
2369 .
2370 .
2371 .
2372 .total_buffer_size = 4 * 16 * 1024 * 1024;
2373 .buffer_node_weights = { 1, 0, 1, 0 },
2374 },
2375 @endcode
2376
2377 would result in 32 MB of buffers being placed on controller 0, and
2378 32 MB on controller 2. (Since buffers are allocated in units of
2379 16 MB, some sets of weights will not be able to be matched exactly.)
2380
2381 For the weights to be effective, @ref total_buffer_size must be
2382 nonzero. If @ref total_buffer_size is zero, causing the default
2383 32 MB of buffer space to be used, then any specified weights will
2384 be ignored, and buffers will positioned as they were in previous
2385 versions of NetIO:
2386
2387 - For xgbe/0 and gbe/0, 16 MB of buffers will be placed on controller 1,
2388 and the other 16 MB will be placed on controller 2.
2389
2390 - For xgbe/1 and gbe/1, 16 MB of buffers will be placed on controller 2,
2391 and the other 16 MB will be placed on controller 3.
2392
2393 If @ref total_buffer_size is nonzero, but all weights are zero,
2394 then all buffer space will be allocated on Linux NUMA node zero.
2395
2396 By default, the specified buffer placement is treated as a hint;
2397 if sufficient free memory is not available on the specified
2398 controllers, the buffers will be allocated elsewhere. However,
2399 if the ::NETIO_STRICT_HOMING flag is specified in @ref flags, then a
2400 failure to allocate buffer space exactly as requested will cause the
2401 registration operation to fail with an error of ::NETIO_CANNOT_HOME.
2402
2403 Note that maximal network performance cannot be achieved with
2404 only one memory controller.
2405 */
2406 uint8_t buffer_node_weights[NETIO_NUM_NODE_WEIGHTS];
2407
2408 /** Fixed virtual address for packet buffers. Only valid when
2409 ::NETIO_FIXED_BUFFER_VA is specified in @ref flags; see the
2410 description of that flag for details.
2411 */
2412 void* fixed_buffer_va;
2413
2414 /**
2415 Maximum number of outstanding send packet requests. This value is
2416 only relevant when an EPP is in use; it determines the number of
2417 slots in the EPP's outgoing packet queue which this tile is allowed
2418 to consume, and thus the number of packets which may be sent before
2419 the sending tile must wait for an acknowledgment from the EPP.
2420 Modifying this value is generally only helpful when using @ref
2421 netio_send_packet_vector(), where it can help improve performance by
2422 allowing a single vector send operation to process more packets.
2423 Typically it is not specified, and the default, which divides the
2424 outgoing packet slots evenly between all tiles on the chip, is used.
2425
2426 If a registration asks for more outgoing packet queue slots than are
2427 available, ::NETIO_TOOMANY_XMIT will be returned. The total number
2428 of packet queue slots which are available for all tiles for each EPP
2429 is subject to change, but is currently ::NETIO_TOTAL_SENDS_OUTSTANDING.
2430
2431
2432 This value is ignored if ::NETIO_XMIT is not specified in flags.
2433 If you want to specify a large value here for a specific tile, you are
2434 advised to specify NETIO_NO_XMIT on other, non-transmitting tiles so
2435 that they do not consume a default number of packet slots. Any tile
2436 transmitting is required to have at least ::NETIO_MIN_SENDS_OUTSTANDING
2437 slots allocated to it; values less than that will be silently
2438 increased by the NetIO library.
2439 */
2440 int num_sends_outstanding;
2441}
2442netio_input_config_t;
2443
2444
2445/** Registration flags; used in the @ref netio_input_config_t structure.
2446 * @addtogroup setup
2447 */
2448/** @{ */
2449
2450/** Fail a registration request if we can't put packet buffers
2451 on the specified memory controllers. */
2452#define NETIO_STRICT_HOMING 0x00000002
2453
2454/** This application expects no tags on its L2 headers. */
2455#define NETIO_TAG_NONE 0x00000004
2456
2457/** This application expects Marvell extended tags on its L2 headers. */
2458#define NETIO_TAG_MRVL 0x00000008
2459
2460/** This application expects Broadcom tags on its L2 headers. */
2461#define NETIO_TAG_BRCM 0x00000010
2462
2463/** This registration may call routines which receive packets. */
2464#define NETIO_RECV 0x00000020
2465
2466/** This registration may not call routines which receive packets. */
2467#define NETIO_NO_RECV 0x00000040
2468
2469/** This registration may call routines which transmit packets. */
2470#define NETIO_XMIT 0x00000080
2471
2472/** This registration may call routines which transmit packets with
2473 checksum acceleration. */
2474#define NETIO_XMIT_CSUM 0x00000100
2475
2476/** This registration may not call routines which transmit packets. */
2477#define NETIO_NO_XMIT 0x00000200
2478
2479/** This registration wants NetIO buffers mapped at an application-specified
2480 virtual address.
2481
2482 NetIO buffers are by default created by the TMC common memory facility,
2483 which must be configured by a common ancestor of all processes sharing
2484 a network interface. When this flag is specified, NetIO buffers are
2485 instead mapped at an address chosen by the application (and specified
2486 in @ref netio_input_config_t::fixed_buffer_va). This allows multiple
2487 unrelated but cooperating processes to share a NetIO interface.
2488 All processes sharing the same interface must specify this flag,
2489 and all must specify the same fixed virtual address.
2490
2491 @ref netio_input_config_t::fixed_buffer_va must be a
2492 multiple of 16 MB, and the packet buffers will occupy @ref
2493 netio_input_config_t::total_buffer_size bytes of virtual address
2494 space, beginning at that address. If any of those virtual addresses
2495 are currently occupied by other memory objects, like application or
2496 shared library code or data, @ref netio_input_register() will return
2497 ::NETIO_FAULT. While it is impossible to provide a fixed_buffer_va
2498 which will work for all applications, a good first guess might be to
2499 use 0xb0000000 minus @ref netio_input_config_t::total_buffer_size.
2500 If that fails, it might be helpful to consult the running application's
2501 virtual address description file (/proc/<em>pid</em>/maps) to see
2502 which regions of virtual address space are available.
2503 */
2504#define NETIO_FIXED_BUFFER_VA 0x00000400
2505
2506/** This registration call will not complete unless the network link
2507 is up. The process will wait several seconds for this to happen (the
2508 precise interval is link-dependent), but if the link does not come up,
2509 ::NETIO_LINK_DOWN will be returned. This flag is the default if
2510 ::NETIO_NOREQUIRE_LINK_UP is not specified. Note that this flag by
2511 itself does not request that the link be brought up; that can be done
2512 with the ::NETIO_AUTO_LINK_UPDN or ::NETIO_AUTO_LINK_UP flags (the
2513 latter is the default if no NETIO_AUTO_LINK_xxx flags are specified),
2514 or by explicitly setting the link's desired state via netio_set().
2515 If the link is not brought up by one of those methods, and this flag
2516 is specified, the registration operation will return ::NETIO_LINK_DOWN.
2517 This flag is ignored if it is specified along with ::NETIO_NO_XMIT and
2518 ::NETIO_NO_RECV. See @ref link for more information on link
2519 management.
2520 */
2521#define NETIO_REQUIRE_LINK_UP 0x00000800
2522
2523/** This registration call will complete even if the network link is not up.
2524 Whenever the link is not up, packets will not be sent or received:
2525 netio_get_packet() will return ::NETIO_NOPKT once all queued packets
2526 have been drained, and netio_send_packet() and similar routines will
2527 return NETIO_QUEUE_FULL once the outgoing packet queue in the EPP
2528 or the I/O shim is full. See @ref link for more information on link
2529 management.
2530 */
2531#define NETIO_NOREQUIRE_LINK_UP 0x00001000
2532
2533#ifndef __DOXYGEN__
2534/*
2535 * These are part of the implementation of the NETIO_AUTO_LINK_xxx flags,
2536 * but should not be used directly by applications, and are thus not
2537 * documented.
2538 */
2539#define _NETIO_AUTO_UP 0x00002000
2540#define _NETIO_AUTO_DN 0x00004000
2541#define _NETIO_AUTO_PRESENT 0x00008000
2542#endif
2543
2544/** Set the desired state of the link to up, allowing any speeds which are
2545 supported by the link hardware, as part of this registration operation.
2546 Do not take down the link automatically. This is the default if
2547 no other NETIO_AUTO_LINK_xxx flags are specified. This flag is ignored
2548 if it is specified along with ::NETIO_NO_XMIT and ::NETIO_NO_RECV.
2549 See @ref link for more information on link management.
2550 */
2551#define NETIO_AUTO_LINK_UP (_NETIO_AUTO_PRESENT | _NETIO_AUTO_UP)
2552
2553/** Set the desired state of the link to up, allowing any speeds which are
2554 supported by the link hardware, as part of this registration operation.
2555 Set the desired state of the link to down the next time no tiles are
2556 registered for packet reception or transmission. This flag is ignored
2557 if it is specified along with ::NETIO_NO_XMIT and ::NETIO_NO_RECV.
2558 See @ref link for more information on link management.
2559 */
2560#define NETIO_AUTO_LINK_UPDN (_NETIO_AUTO_PRESENT | _NETIO_AUTO_UP | \
2561 _NETIO_AUTO_DN)
2562
2563/** Set the desired state of the link to down the next time no tiles are
2564 registered for packet reception or transmission. This flag is ignored
2565 if it is specified along with ::NETIO_NO_XMIT and ::NETIO_NO_RECV.
2566 See @ref link for more information on link management.
2567 */
2568#define NETIO_AUTO_LINK_DN (_NETIO_AUTO_PRESENT | _NETIO_AUTO_DN)
2569
2570/** Do not bring up the link automatically as part of this registration
2571 operation. Do not take down the link automatically. This flag
2572 is ignored if it is specified along with ::NETIO_NO_XMIT and
2573 ::NETIO_NO_RECV. See @ref link for more information on link management.
2574 */
2575#define NETIO_AUTO_LINK_NONE _NETIO_AUTO_PRESENT
2576
2577
2578/** Minimum number of receive packets. */
2579#define NETIO_MIN_RECEIVE_PKTS 16
2580
2581/** Lower bound on the maximum number of receive packets; may be higher
2582 than this on some interfaces. */
2583#define NETIO_MAX_RECEIVE_PKTS 128
2584
2585/** Maximum number of send buffers, per packet size. */
2586#define NETIO_MAX_SEND_BUFFERS 16
2587
2588/** Number of EPP queue slots, and thus outstanding sends, per EPP. */
2589#define NETIO_TOTAL_SENDS_OUTSTANDING 2015
2590
2591/** Minimum number of EPP queue slots, and thus outstanding sends, per
2592 * transmitting tile. */
2593#define NETIO_MIN_SENDS_OUTSTANDING 16
2594
2595
2596/**@}*/
2597
2598#ifndef __DOXYGEN__
2599
2600/**
2601 * An object for providing Ethernet packets to a process.
2602 */
2603struct __netio_queue_impl_t;
2604
2605/**
2606 * An object for managing the user end of a NetIO queue.
2607 */
2608struct __netio_queue_user_impl_t;
2609
2610#endif /* !__DOXYGEN__ */
2611
2612
2613/** A netio_queue_t describes a NetIO communications endpoint.
2614 * @ingroup setup
2615 */
2616typedef struct
2617{
2618#ifdef __DOXYGEN__
2619 uint8_t opaque[8]; /**< This is an opaque structure. */
2620#else
2621 struct __netio_queue_impl_t* __system_part; /**< The system part. */
2622 struct __netio_queue_user_impl_t* __user_part; /**< The user part. */
2623#ifdef _NETIO_PTHREAD
2624 _netio_percpu_mutex_t lock; /**< Queue lock. */
2625#endif
2626#endif
2627}
2628netio_queue_t;
2629
2630
2631/**
2632 * @brief Packet send context.
2633 *
2634 * @ingroup egress
2635 *
2636 * Packet send context for use with netio_send_packet_prepare and _commit.
2637 */
2638typedef struct
2639{
2640#ifdef __DOXYGEN__
2641 uint8_t opaque[44]; /**< This is an opaque structure. */
2642#else
2643 uint8_t flags; /**< Defined below */
2644 uint8_t datalen; /**< Number of valid words pointed to by data. */
2645 uint32_t request[9]; /**< Request to be sent to the EPP or shim. Note
2646 that this is smaller than the 11-word maximum
2647 request size, since some constant values are
2648 not saved in the context. */
2649 uint32_t *data; /**< Data to be sent to the EPP or shim via IDN. */
2650#endif
2651}
2652netio_send_pkt_context_t;
2653
2654
2655#ifndef __DOXYGEN__
2656#define SEND_PKT_CTX_USE_EPP 1 /**< We're sending to an EPP. */
2657#define SEND_PKT_CTX_SEND_CSUM 2 /**< Request includes a checksum. */
2658#endif
2659
2660/**
2661 * @brief Packet vector entry.
2662 *
2663 * @ingroup egress
2664 *
2665 * This data structure is used with netio_send_packet_vector() to send multiple
2666 * packets with one NetIO call. The structure should be initialized by
2667 * calling netio_pkt_vector_set(), rather than by setting the fields
2668 * directly.
2669 *
2670 * This structure is guaranteed to be a power of two in size, no
2671 * bigger than one L2 cache line, and to be aligned modulo its size.
2672 */
2673typedef struct
2674#ifndef __DOXYGEN__
2675__attribute__((aligned(8)))
2676#endif
2677{
2678 /** Reserved for use by the user application. When initialized with
2679 * the netio_set_pkt_vector_entry() function, this field is guaranteed
2680 * to be visible to readers only after all other fields are already
2681 * visible. This way it can be used as a valid flag or generation
2682 * counter. */
2683 uint8_t user_data;
2684
2685 /* Structure members below this point should not be accessed directly by
2686 * applications, as they may change in the future. */
2687
2688 /** Low 8 bits of the packet address to send. The high bits are
2689 * acquired from the 'handle' field. */
2690 uint8_t buffer_address_low;
2691
2692 /** Number of bytes to transmit. */
2693 uint16_t size;
2694
2695 /** The raw handle from a netio_pkt_t. If this is NETIO_PKT_HANDLE_NONE,
2696 * this vector entry will be skipped and no packet will be transmitted. */
2697 netio_pkt_handle_t handle;
2698}
2699netio_pkt_vector_entry_t;
2700
2701
2702/**
2703 * @brief Initialize fields in a packet vector entry.
2704 *
2705 * @ingroup egress
2706 *
2707 * @param[out] v Pointer to the vector entry to be initialized.
2708 * @param[in] pkt Packet to be transmitted when the vector entry is passed to
2709 * netio_send_packet_vector(). Note that the packet's attributes
2710 * (e.g., its L2 offset and length) are captured at the time this
2711 * routine is called; subsequent changes in those attributes will not
2712 * be reflected in the packet which is actually transmitted.
2713 * Changes in the packet's contents, however, will be so reflected.
2714 * If this is NULL, no packet will be transmitted.
2715 * @param[in] user_data User data to be set in the vector entry.
2716 * This function guarantees that the "user_data" field will become
2717 * visible to a reader only after all other fields have become visible.
2718 * This allows a structure in a ring buffer to be written and read
2719 * by a polling reader without any locks or other synchronization.
2720 */
2721static __inline void
2722netio_pkt_vector_set(volatile netio_pkt_vector_entry_t* v, netio_pkt_t* pkt,
2723 uint8_t user_data)
2724{
2725 if (pkt)
2726 {
2727 if (NETIO_PKT_IS_MINIMAL(pkt))
2728 {
2729 netio_pkt_minimal_metadata_t* mmd =
2730 (netio_pkt_minimal_metadata_t*) &pkt->__metadata;
2731 v->buffer_address_low = (uintptr_t) NETIO_PKT_L2_DATA_MM(mmd, pkt) & 0xFF;
2732 v->size = NETIO_PKT_L2_LENGTH_MM(mmd, pkt);
2733 }
2734 else
2735 {
2736 netio_pkt_metadata_t* mda = &pkt->__metadata;
2737 v->buffer_address_low = (uintptr_t) NETIO_PKT_L2_DATA_M(mda, pkt) & 0xFF;
2738 v->size = NETIO_PKT_L2_LENGTH_M(mda, pkt);
2739 }
2740 v->handle.word = pkt->__packet.word;
2741 }
2742 else
2743 {
2744 v->handle.word = 0; /* Set handle to NETIO_PKT_HANDLE_NONE. */
2745 }
2746
2747 __asm__("" : : : "memory");
2748
2749 v->user_data = user_data;
2750}
2751
2752
2753/**
2754 * Flags and structures for @ref netio_get() and @ref netio_set().
2755 * @ingroup config
2756 */
2757
2758/** @{ */
2759/** Parameter class; addr is a NETIO_PARAM_xxx value. */
2760#define NETIO_PARAM 0
2761/** Interface MAC address. This address is only valid with @ref netio_get().
2762 * The value is a 6-byte MAC address. Depending upon the overall system
2763 * design, a MAC address may or may not be available for each interface. */
2764#define NETIO_PARAM_MAC 0
2765
2766/** Determine whether to suspend output on the receipt of pause frames.
2767 * If the value is nonzero, the I/O shim will suspend output when a pause
2768 * frame is received. If the value is zero, pause frames will be ignored. */
2769#define NETIO_PARAM_PAUSE_IN 1
2770
2771/** Determine whether to send pause frames if the I/O shim packet FIFOs are
2772 * nearly full. If the value is zero, pause frames are not sent. If
2773 * the value is nonzero, it is the delay value which will be sent in any
2774 * pause frames which are output, in units of 512 bit times. */
2775#define NETIO_PARAM_PAUSE_OUT 2
2776
2777/** Jumbo frame support. The value is a 4-byte integer. If the value is
2778 * nonzero, the MAC will accept frames of up to 10240 bytes. If the value
2779 * is zero, the MAC will only accept frames of up to 1544 bytes. */
2780#define NETIO_PARAM_JUMBO 3
2781
2782/** I/O shim's overflow statistics register. The value is two 16-bit integers.
2783 * The first 16-bit value (or the low 16 bits, if the value is treated as a
2784 * 32-bit number) is the count of packets which were completely dropped and
2785 * not delivered by the shim. The second 16-bit value (or the high 16 bits,
2786 * if the value is treated as a 32-bit number) is the count of packets
2787 * which were truncated and thus only partially delivered by the shim. This
2788 * register is automatically reset to zero after it has been read.
2789 */
2790#define NETIO_PARAM_OVERFLOW 4
2791
2792/** IPP statistics. This address is only valid with @ref netio_get(). The
2793 * value is a netio_stat_t structure. Unlike the I/O shim statistics, the
2794 * IPP statistics are not all reset to zero on read; see the description
2795 * of the netio_stat_t for details. */
2796#define NETIO_PARAM_STAT 5
2797
2798/** Possible link state. The value is a combination of "NETIO_LINK_xxx"
2799 * flags. With @ref netio_get(), this will indicate which flags are
2800 * actually supported by the hardware.
2801 *
2802 * For historical reasons, specifying this value to netio_set() will have
2803 * the same behavior as using ::NETIO_PARAM_LINK_CONFIG, but this usage is
2804 * discouraged.
2805 */
2806#define NETIO_PARAM_LINK_POSSIBLE_STATE 6
2807
2808/** Link configuration. The value is a combination of "NETIO_LINK_xxx" flags.
2809 * With @ref netio_set(), this will attempt to immediately bring up the
2810 * link using whichever of the requested flags are supported by the
2811 * hardware, or take down the link if the flags are zero; if this is
2812 * not possible, an error will be returned. Many programs will want
2813 * to use ::NETIO_PARAM_LINK_DESIRED_STATE instead.
2814 *
2815 * For historical reasons, specifying this value to netio_get() will
2816 * have the same behavior as using ::NETIO_PARAM_LINK_POSSIBLE_STATE,
2817 * but this usage is discouraged.
2818 */
2819#define NETIO_PARAM_LINK_CONFIG NETIO_PARAM_LINK_POSSIBLE_STATE
2820
2821/** Current link state. This address is only valid with @ref netio_get().
2822 * The value is zero or more of the "NETIO_LINK_xxx" flags, ORed together.
2823 * If the link is down, the value ANDed with NETIO_LINK_SPEED will be
2824 * zero; if the link is up, the value ANDed with NETIO_LINK_SPEED will
2825 * result in exactly one of the NETIO_LINK_xxx values, indicating the
2826 * current speed. */
2827#define NETIO_PARAM_LINK_CURRENT_STATE 7
2828
2829/** Variant symbol for current state, retained for compatibility with
2830 * pre-MDE-2.1 programs. */
2831#define NETIO_PARAM_LINK_STATUS NETIO_PARAM_LINK_CURRENT_STATE
2832
2833/** Packet Coherence protocol. This address is only valid with @ref netio_get().
2834 * The value is nonzero if the interface is configured for cache-coherent DMA.
2835 */
2836#define NETIO_PARAM_COHERENT 8
2837
2838/** Desired link state. The value is a conbination of "NETIO_LINK_xxx"
2839 * flags, which specify the desired state for the link. With @ref
2840 * netio_set(), this will, in the background, attempt to bring up the link
2841 * using whichever of the requested flags are reasonable, or take down the
2842 * link if the flags are zero. The actual link up or down operation may
2843 * happen after this call completes. If the link state changes in the
2844 * future, the system will continue to try to get back to the desired link
2845 * state; for instance, if the link is brought up successfully, and then
2846 * the network cable is disconnected, the link will go down. However, the
2847 * desired state of the link is still up, so if the cable is reconnected,
2848 * the link will be brought up again.
2849 *
2850 * With @ref netio_get(), this will indicate the desired state for the
2851 * link, as set with a previous netio_set() call, or implicitly by a
2852 * netio_input_register() or netio_input_unregister() operation. This may
2853 * not reflect the current state of the link; to get that, use
2854 * ::NETIO_PARAM_LINK_CURRENT_STATE. */
2855#define NETIO_PARAM_LINK_DESIRED_STATE 9
2856
2857/** NetIO statistics structure. Retrieved using the ::NETIO_PARAM_STAT
2858 * address passed to @ref netio_get(). */
2859typedef struct
2860{
2861 /** Number of packets which have been received by the IPP and forwarded
2862 * to a tile's receive queue for processing. This value wraps at its
2863 * maximum, and is not cleared upon read. */
2864 uint32_t packets_received;
2865
2866 /** Number of packets which have been dropped by the IPP, because they could
2867 * not be received, or could not be forwarded to a tile. The former happens
2868 * when the IPP does not have a free packet buffer of suitable size for an
2869 * incoming frame. The latter happens when all potential destination tiles
2870 * for a packet, as defined by the group, bucket, and queue configuration,
2871 * have full receive queues. This value wraps at its maximum, and is not
2872 * cleared upon read. */
2873 uint32_t packets_dropped;
2874
2875 /*
2876 * Note: the #defines after each of the following four one-byte values
2877 * denote their location within the third word of the netio_stat_t. They
2878 * are intended for use only by the IPP implementation and are thus omitted
2879 * from the Doxygen output.
2880 */
2881
2882 /** Number of packets dropped because no worker was able to accept a new
2883 * packet. This value saturates at its maximum, and is cleared upon
2884 * read. */
2885 uint8_t drops_no_worker;
2886#ifndef __DOXYGEN__
2887#define NETIO_STAT_DROPS_NO_WORKER 0
2888#endif
2889
2890 /** Number of packets dropped because no small buffers were available.
2891 * This value saturates at its maximum, and is cleared upon read. */
2892 uint8_t drops_no_smallbuf;
2893#ifndef __DOXYGEN__
2894#define NETIO_STAT_DROPS_NO_SMALLBUF 1
2895#endif
2896
2897 /** Number of packets dropped because no large buffers were available.
2898 * This value saturates at its maximum, and is cleared upon read. */
2899 uint8_t drops_no_largebuf;
2900#ifndef __DOXYGEN__
2901#define NETIO_STAT_DROPS_NO_LARGEBUF 2
2902#endif
2903
2904 /** Number of packets dropped because no jumbo buffers were available.
2905 * This value saturates at its maximum, and is cleared upon read. */
2906 uint8_t drops_no_jumbobuf;
2907#ifndef __DOXYGEN__
2908#define NETIO_STAT_DROPS_NO_JUMBOBUF 3
2909#endif
2910}
2911netio_stat_t;
2912
2913
2914/** Link can run, should run, or is running at 10 Mbps. */
2915#define NETIO_LINK_10M 0x01
2916
2917/** Link can run, should run, or is running at 100 Mbps. */
2918#define NETIO_LINK_100M 0x02
2919
2920/** Link can run, should run, or is running at 1 Gbps. */
2921#define NETIO_LINK_1G 0x04
2922
2923/** Link can run, should run, or is running at 10 Gbps. */
2924#define NETIO_LINK_10G 0x08
2925
2926/** Link should run at the highest speed supported by the link and by
2927 * the device connected to the link. Only usable as a value for
2928 * the link's desired state; never returned as a value for the current
2929 * or possible states. */
2930#define NETIO_LINK_ANYSPEED 0x10
2931
2932/** All legal link speeds. */
2933#define NETIO_LINK_SPEED (NETIO_LINK_10M | \
2934 NETIO_LINK_100M | \
2935 NETIO_LINK_1G | \
2936 NETIO_LINK_10G | \
2937 NETIO_LINK_ANYSPEED)
2938
2939
2940/** MAC register class. Addr is a register offset within the MAC.
2941 * Registers within the XGbE and GbE MACs are documented in the Tile
2942 * Processor I/O Device Guide (UG104). MAC registers start at address
2943 * 0x4000, and do not include the MAC_INTERFACE registers. */
2944#define NETIO_MAC 1
2945
2946/** MDIO register class (IEEE 802.3 clause 22 format). Addr is the "addr"
2947 * member of a netio_mdio_addr_t structure. */
2948#define NETIO_MDIO 2
2949
2950/** MDIO register class (IEEE 802.3 clause 45 format). Addr is the "addr"
2951 * member of a netio_mdio_addr_t structure. */
2952#define NETIO_MDIO_CLAUSE45 3
2953
2954/** NetIO MDIO address type. Retrieved or provided using the ::NETIO_MDIO
2955 * address passed to @ref netio_get() or @ref netio_set(). */
2956typedef union
2957{
2958 struct
2959 {
2960 unsigned int reg:16; /**< MDIO register offset. For clause 22 access,
2961 must be less than 32. */
2962 unsigned int phy:5; /**< Which MDIO PHY to access. */
2963 unsigned int dev:5; /**< Which MDIO device to access within that PHY.
2964 Applicable for clause 45 access only; ignored
2965 for clause 22 access. */
2966 }
2967 bits; /**< Container for bitfields. */
2968 uint64_t addr; /**< Value to pass to @ref netio_get() or
2969 * @ref netio_set(). */
2970}
2971netio_mdio_addr_t;
2972
2973/** @} */
2974
2975#endif /* __NETIO_INTF_H__ */
diff --git a/arch/tile/kernel/Makefile b/arch/tile/kernel/Makefile
index 112b1e248f05..b4c8e8ec45dc 100644
--- a/arch/tile/kernel/Makefile
+++ b/arch/tile/kernel/Makefile
@@ -15,3 +15,4 @@ obj-$(CONFIG_SMP) += smpboot.o smp.o tlb.o
15obj-$(CONFIG_MODULES) += module.o 15obj-$(CONFIG_MODULES) += module.o
16obj-$(CONFIG_EARLY_PRINTK) += early_printk.o 16obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
17obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o 17obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o
18obj-$(CONFIG_PCI) += pci.o
diff --git a/arch/tile/kernel/pci.c b/arch/tile/kernel/pci.c
new file mode 100644
index 000000000000..a1ee25be9ad9
--- /dev/null
+++ b/arch/tile/kernel/pci.c
@@ -0,0 +1,621 @@
1/*
2 * Copyright 2010 Tilera Corporation. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for
12 * more details.
13 */
14
15#include <linux/kernel.h>
16#include <linux/pci.h>
17#include <linux/delay.h>
18#include <linux/string.h>
19#include <linux/init.h>
20#include <linux/capability.h>
21#include <linux/sched.h>
22#include <linux/errno.h>
23#include <linux/bootmem.h>
24#include <linux/irq.h>
25#include <linux/io.h>
26#include <linux/uaccess.h>
27
28#include <asm/processor.h>
29#include <asm/sections.h>
30#include <asm/byteorder.h>
31#include <asm/hv_driver.h>
32#include <hv/drv_pcie_rc_intf.h>
33
34
35/*
36 * Initialization flow and process
37 * -------------------------------
38 *
39 * This files containes the routines to search for PCI buses,
40 * enumerate the buses, and configure any attached devices.
41 *
42 * There are two entry points here:
43 * 1) tile_pci_init
44 * This sets up the pci_controller structs, and opens the
45 * FDs to the hypervisor. This is called from setup_arch() early
46 * in the boot process.
47 * 2) pcibios_init
48 * This probes the PCI bus(es) for any attached hardware. It's
49 * called by subsys_initcall. All of the real work is done by the
50 * generic Linux PCI layer.
51 *
52 */
53
54/*
55 * This flag tells if the platform is TILEmpower that needs
56 * special configuration for the PLX switch chip.
57 */
58int __write_once tile_plx_gen1;
59
60static struct pci_controller controllers[TILE_NUM_PCIE];
61static int num_controllers;
62
63static struct pci_ops tile_cfg_ops;
64
65
66/*
67 * We don't need to worry about the alignment of resources.
68 */
69resource_size_t pcibios_align_resource(void *data, const struct resource *res,
70 resource_size_t size, resource_size_t align)
71{
72 return res->start;
73}
74EXPORT_SYMBOL(pcibios_align_resource);
75
76/*
77 * Open a FD to the hypervisor PCI device.
78 *
79 * controller_id is the controller number, config type is 0 or 1 for
80 * config0 or config1 operations.
81 */
82static int __init tile_pcie_open(int controller_id, int config_type)
83{
84 char filename[32];
85 int fd;
86
87 sprintf(filename, "pcie/%d/config%d", controller_id, config_type);
88
89 fd = hv_dev_open((HV_VirtAddr)filename, 0);
90
91 return fd;
92}
93
94
95/*
96 * Get the IRQ numbers from the HV and set up the handlers for them.
97 */
98static int __init tile_init_irqs(int controller_id,
99 struct pci_controller *controller)
100{
101 char filename[32];
102 int fd;
103 int ret;
104 int x;
105 struct pcie_rc_config rc_config;
106
107 sprintf(filename, "pcie/%d/ctl", controller_id);
108 fd = hv_dev_open((HV_VirtAddr)filename, 0);
109 if (fd < 0) {
110 pr_err("PCI: hv_dev_open(%s) failed\n", filename);
111 return -1;
112 }
113 ret = hv_dev_pread(fd, 0, (HV_VirtAddr)(&rc_config),
114 sizeof(rc_config), PCIE_RC_CONFIG_MASK_OFF);
115 hv_dev_close(fd);
116 if (ret != sizeof(rc_config)) {
117 pr_err("PCI: wanted %zd bytes, got %d\n",
118 sizeof(rc_config), ret);
119 return -1;
120 }
121 /* Record irq_base so that we can map INTx to IRQ # later. */
122 controller->irq_base = rc_config.intr;
123
124 for (x = 0; x < 4; x++)
125 tile_irq_activate(rc_config.intr + x,
126 TILE_IRQ_HW_CLEAR);
127
128 if (rc_config.plx_gen1)
129 controller->plx_gen1 = 1;
130
131 return 0;
132}
133
134/*
135 * First initialization entry point, called from setup_arch().
136 *
137 * Find valid controllers and fill in pci_controller structs for each
138 * of them.
139 *
140 * Returns the number of controllers discovered.
141 */
142int __init tile_pci_init(void)
143{
144 int i;
145
146 pr_info("PCI: Searching for controllers...\n");
147
148 /* Do any configuration we need before using the PCIe */
149
150 for (i = 0; i < TILE_NUM_PCIE; i++) {
151 int hv_cfg_fd0 = -1;
152 int hv_cfg_fd1 = -1;
153 int hv_mem_fd = -1;
154 char name[32];
155 struct pci_controller *controller;
156
157 /*
158 * Open the fd to the HV. If it fails then this
159 * device doesn't exist.
160 */
161 hv_cfg_fd0 = tile_pcie_open(i, 0);
162 if (hv_cfg_fd0 < 0)
163 continue;
164 hv_cfg_fd1 = tile_pcie_open(i, 1);
165 if (hv_cfg_fd1 < 0) {
166 pr_err("PCI: Couldn't open config fd to HV "
167 "for controller %d\n", i);
168 goto err_cont;
169 }
170
171 sprintf(name, "pcie/%d/mem", i);
172 hv_mem_fd = hv_dev_open((HV_VirtAddr)name, 0);
173 if (hv_mem_fd < 0) {
174 pr_err("PCI: Could not open mem fd to HV!\n");
175 goto err_cont;
176 }
177
178 pr_info("PCI: Found PCI controller #%d\n", i);
179
180 controller = &controllers[num_controllers];
181
182 if (tile_init_irqs(i, controller)) {
183 pr_err("PCI: Could not initialize "
184 "IRQs, aborting.\n");
185 goto err_cont;
186 }
187
188 controller->index = num_controllers;
189 controller->hv_cfg_fd[0] = hv_cfg_fd0;
190 controller->hv_cfg_fd[1] = hv_cfg_fd1;
191 controller->hv_mem_fd = hv_mem_fd;
192 controller->first_busno = 0;
193 controller->last_busno = 0xff;
194 controller->ops = &tile_cfg_ops;
195
196 num_controllers++;
197 continue;
198
199err_cont:
200 if (hv_cfg_fd0 >= 0)
201 hv_dev_close(hv_cfg_fd0);
202 if (hv_cfg_fd1 >= 0)
203 hv_dev_close(hv_cfg_fd1);
204 if (hv_mem_fd >= 0)
205 hv_dev_close(hv_mem_fd);
206 continue;
207 }
208
209 /*
210 * Before using the PCIe, see if we need to do any platform-specific
211 * configuration, such as the PLX switch Gen 1 issue on TILEmpower.
212 */
213 for (i = 0; i < num_controllers; i++) {
214 struct pci_controller *controller = &controllers[i];
215
216 if (controller->plx_gen1)
217 tile_plx_gen1 = 1;
218 }
219
220 return num_controllers;
221}
222
223/*
224 * (pin - 1) converts from the PCI standard's [1:4] convention to
225 * a normal [0:3] range.
226 */
227static int tile_map_irq(struct pci_dev *dev, u8 slot, u8 pin)
228{
229 struct pci_controller *controller =
230 (struct pci_controller *)dev->sysdata;
231 return (pin - 1) + controller->irq_base;
232}
233
234
235static void __init fixup_read_and_payload_sizes(void)
236{
237 struct pci_dev *dev = NULL;
238 int smallest_max_payload = 0x1; /* Tile maxes out at 256 bytes. */
239 int max_read_size = 0x2; /* Limit to 512 byte reads. */
240 u16 new_values;
241
242 /* Scan for the smallest maximum payload size. */
243 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
244 int pcie_caps_offset;
245 u32 devcap;
246 int max_payload;
247
248 pcie_caps_offset = pci_find_capability(dev, PCI_CAP_ID_EXP);
249 if (pcie_caps_offset == 0)
250 continue;
251
252 pci_read_config_dword(dev, pcie_caps_offset + PCI_EXP_DEVCAP,
253 &devcap);
254 max_payload = devcap & PCI_EXP_DEVCAP_PAYLOAD;
255 if (max_payload < smallest_max_payload)
256 smallest_max_payload = max_payload;
257 }
258
259 /* Now, set the max_payload_size for all devices to that value. */
260 new_values = (max_read_size << 12) | (smallest_max_payload << 5);
261 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
262 int pcie_caps_offset;
263 u16 devctl;
264
265 pcie_caps_offset = pci_find_capability(dev, PCI_CAP_ID_EXP);
266 if (pcie_caps_offset == 0)
267 continue;
268
269 pci_read_config_word(dev, pcie_caps_offset + PCI_EXP_DEVCTL,
270 &devctl);
271 devctl &= ~(PCI_EXP_DEVCTL_PAYLOAD | PCI_EXP_DEVCTL_READRQ);
272 devctl |= new_values;
273 pci_write_config_word(dev, pcie_caps_offset + PCI_EXP_DEVCTL,
274 devctl);
275 }
276}
277
278
279/*
280 * Second PCI initialization entry point, called by subsys_initcall.
281 *
282 * The controllers have been set up by the time we get here, by a call to
283 * tile_pci_init.
284 */
285static int __init pcibios_init(void)
286{
287 int i;
288
289 pr_info("PCI: Probing PCI hardware\n");
290
291 /*
292 * Delay a bit in case devices aren't ready. Some devices are
293 * known to require at least 20ms here, but we use a more
294 * conservative value.
295 */
296 mdelay(250);
297
298 /* Scan all of the recorded PCI controllers. */
299 for (i = 0; i < num_controllers; i++) {
300 struct pci_controller *controller = &controllers[i];
301 struct pci_bus *bus;
302
303 pr_info("PCI: initializing controller #%d\n", i);
304
305 /*
306 * This comes from the generic Linux PCI driver.
307 *
308 * It reads the PCI tree for this bus into the Linux
309 * data structures.
310 *
311 * This is inlined in linux/pci.h and calls into
312 * pci_scan_bus_parented() in probe.c.
313 */
314 bus = pci_scan_bus(0, controller->ops, controller);
315 controller->root_bus = bus;
316 controller->last_busno = bus->subordinate;
317
318 }
319
320 /* Do machine dependent PCI interrupt routing */
321 pci_fixup_irqs(pci_common_swizzle, tile_map_irq);
322
323 /*
324 * This comes from the generic Linux PCI driver.
325 *
326 * It allocates all of the resources (I/O memory, etc)
327 * associated with the devices read in above.
328 */
329
330 pci_assign_unassigned_resources();
331
332 /* Configure the max_read_size and max_payload_size values. */
333 fixup_read_and_payload_sizes();
334
335 /* Record the I/O resources in the PCI controller structure. */
336 for (i = 0; i < num_controllers; i++) {
337 struct pci_bus *root_bus = controllers[i].root_bus;
338 struct pci_bus *next_bus;
339 struct pci_dev *dev;
340
341 list_for_each_entry(dev, &root_bus->devices, bus_list) {
342 /* Find the PCI host controller, ie. the 1st bridge. */
343 if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI &&
344 (PCI_SLOT(dev->devfn) == 0)) {
345 next_bus = dev->subordinate;
346 controllers[i].mem_resources[0] =
347 *next_bus->resource[0];
348 controllers[i].mem_resources[1] =
349 *next_bus->resource[1];
350 controllers[i].mem_resources[2] =
351 *next_bus->resource[2];
352
353 break;
354 }
355 }
356
357 }
358
359 return 0;
360}
361subsys_initcall(pcibios_init);
362
363/*
364 * No bus fixups needed.
365 */
366void __devinit pcibios_fixup_bus(struct pci_bus *bus)
367{
368 /* Nothing needs to be done. */
369}
370
371/*
372 * This can be called from the generic PCI layer, but doesn't need to
373 * do anything.
374 */
375char __devinit *pcibios_setup(char *str)
376{
377 /* Nothing needs to be done. */
378 return str;
379}
380
381/*
382 * This is called from the generic Linux layer.
383 */
384void __init pcibios_update_irq(struct pci_dev *dev, int irq)
385{
386 pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
387}
388
389/*
390 * Enable memory and/or address decoding, as appropriate, for the
391 * device described by the 'dev' struct.
392 *
393 * This is called from the generic PCI layer, and can be called
394 * for bridges or endpoints.
395 */
396int pcibios_enable_device(struct pci_dev *dev, int mask)
397{
398 u16 cmd, old_cmd;
399 u8 header_type;
400 int i;
401 struct resource *r;
402
403 pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type);
404
405 pci_read_config_word(dev, PCI_COMMAND, &cmd);
406 old_cmd = cmd;
407 if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) {
408 /*
409 * For bridges, we enable both memory and I/O decoding
410 * in call cases.
411 */
412 cmd |= PCI_COMMAND_IO;
413 cmd |= PCI_COMMAND_MEMORY;
414 } else {
415 /*
416 * For endpoints, we enable memory and/or I/O decoding
417 * only if they have a memory resource of that type.
418 */
419 for (i = 0; i < 6; i++) {
420 r = &dev->resource[i];
421 if (r->flags & IORESOURCE_UNSET) {
422 pr_err("PCI: Device %s not available "
423 "because of resource collisions\n",
424 pci_name(dev));
425 return -EINVAL;
426 }
427 if (r->flags & IORESOURCE_IO)
428 cmd |= PCI_COMMAND_IO;
429 if (r->flags & IORESOURCE_MEM)
430 cmd |= PCI_COMMAND_MEMORY;
431 }
432 }
433
434 /*
435 * We only write the command if it changed.
436 */
437 if (cmd != old_cmd)
438 pci_write_config_word(dev, PCI_COMMAND, cmd);
439 return 0;
440}
441
442void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max)
443{
444 unsigned long start = pci_resource_start(dev, bar);
445 unsigned long len = pci_resource_len(dev, bar);
446 unsigned long flags = pci_resource_flags(dev, bar);
447
448 if (!len)
449 return NULL;
450 if (max && len > max)
451 len = max;
452
453 if (!(flags & IORESOURCE_MEM)) {
454 pr_info("PCI: Trying to map invalid resource %#lx\n", flags);
455 start = 0;
456 }
457
458 return (void __iomem *)start;
459}
460EXPORT_SYMBOL(pci_iomap);
461
462
463/****************************************************************
464 *
465 * Tile PCI config space read/write routines
466 *
467 ****************************************************************/
468
469/*
470 * These are the normal read and write ops
471 * These are expanded with macros from pci_bus_read_config_byte() etc.
472 *
473 * devfn is the combined PCI slot & function.
474 *
475 * offset is in bytes, from the start of config space for the
476 * specified bus & slot.
477 */
478
479static int __devinit tile_cfg_read(struct pci_bus *bus,
480 unsigned int devfn,
481 int offset,
482 int size,
483 u32 *val)
484{
485 struct pci_controller *controller = bus->sysdata;
486 int busnum = bus->number & 0xff;
487 int slot = (devfn >> 3) & 0x1f;
488 int function = devfn & 0x7;
489 u32 addr;
490 int config_mode = 1;
491
492 /*
493 * There is no bridge between the Tile and bus 0, so we
494 * use config0 to talk to bus 0.
495 *
496 * If we're talking to a bus other than zero then we
497 * must have found a bridge.
498 */
499 if (busnum == 0) {
500 /*
501 * We fake an empty slot for (busnum == 0) && (slot > 0),
502 * since there is only one slot on bus 0.
503 */
504 if (slot) {
505 *val = 0xFFFFFFFF;
506 return 0;
507 }
508 config_mode = 0;
509 }
510
511 addr = busnum << 20; /* Bus in 27:20 */
512 addr |= slot << 15; /* Slot (device) in 19:15 */
513 addr |= function << 12; /* Function is in 14:12 */
514 addr |= (offset & 0xFFF); /* byte address in 0:11 */
515
516 return hv_dev_pread(controller->hv_cfg_fd[config_mode], 0,
517 (HV_VirtAddr)(val), size, addr);
518}
519
520
521/*
522 * See tile_cfg_read() for relevent comments.
523 * Note that "val" is the value to write, not a pointer to that value.
524 */
525static int __devinit tile_cfg_write(struct pci_bus *bus,
526 unsigned int devfn,
527 int offset,
528 int size,
529 u32 val)
530{
531 struct pci_controller *controller = bus->sysdata;
532 int busnum = bus->number & 0xff;
533 int slot = (devfn >> 3) & 0x1f;
534 int function = devfn & 0x7;
535 u32 addr;
536 int config_mode = 1;
537 HV_VirtAddr valp = (HV_VirtAddr)&val;
538
539 /*
540 * For bus 0 slot 0 we use config 0 accesses.
541 */
542 if (busnum == 0) {
543 /*
544 * We fake an empty slot for (busnum == 0) && (slot > 0),
545 * since there is only one slot on bus 0.
546 */
547 if (slot)
548 return 0;
549 config_mode = 0;
550 }
551
552 addr = busnum << 20; /* Bus in 27:20 */
553 addr |= slot << 15; /* Slot (device) in 19:15 */
554 addr |= function << 12; /* Function is in 14:12 */
555 addr |= (offset & 0xFFF); /* byte address in 0:11 */
556
557#ifdef __BIG_ENDIAN
558 /* Point to the correct part of the 32-bit "val". */
559 valp += 4 - size;
560#endif
561
562 return hv_dev_pwrite(controller->hv_cfg_fd[config_mode], 0,
563 valp, size, addr);
564}
565
566
567static struct pci_ops tile_cfg_ops = {
568 .read = tile_cfg_read,
569 .write = tile_cfg_write,
570};
571
572
573/*
574 * In the following, each PCI controller's mem_resources[1]
575 * represents its (non-prefetchable) PCI memory resource.
576 * mem_resources[0] and mem_resources[2] refer to its PCI I/O and
577 * prefetchable PCI memory resources, respectively.
578 * For more details, see pci_setup_bridge() in setup-bus.c.
579 * By comparing the target PCI memory address against the
580 * end address of controller 0, we can determine the controller
581 * that should accept the PCI memory access.
582 */
583#define TILE_READ(size, type) \
584type _tile_read##size(unsigned long addr) \
585{ \
586 type val; \
587 int idx = 0; \
588 if (addr > controllers[0].mem_resources[1].end && \
589 addr > controllers[0].mem_resources[2].end) \
590 idx = 1; \
591 if (hv_dev_pread(controllers[idx].hv_mem_fd, 0, \
592 (HV_VirtAddr)(&val), sizeof(type), addr)) \
593 pr_err("PCI: read %zd bytes at 0x%lX failed\n", \
594 sizeof(type), addr); \
595 return val; \
596} \
597EXPORT_SYMBOL(_tile_read##size)
598
599TILE_READ(b, u8);
600TILE_READ(w, u16);
601TILE_READ(l, u32);
602TILE_READ(q, u64);
603
604#define TILE_WRITE(size, type) \
605void _tile_write##size(type val, unsigned long addr) \
606{ \
607 int idx = 0; \
608 if (addr > controllers[0].mem_resources[1].end && \
609 addr > controllers[0].mem_resources[2].end) \
610 idx = 1; \
611 if (hv_dev_pwrite(controllers[idx].hv_mem_fd, 0, \
612 (HV_VirtAddr)(&val), sizeof(type), addr)) \
613 pr_err("PCI: write %zd bytes at 0x%lX failed\n", \
614 sizeof(type), addr); \
615} \
616EXPORT_SYMBOL(_tile_write##size)
617
618TILE_WRITE(b, u8);
619TILE_WRITE(w, u16);
620TILE_WRITE(l, u32);
621TILE_WRITE(q, u64);
diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c
index fb0b3cbeae14..f18573643ed1 100644
--- a/arch/tile/kernel/setup.c
+++ b/arch/tile/kernel/setup.c
@@ -840,7 +840,7 @@ static int __init topology_init(void)
840 for_each_online_node(i) 840 for_each_online_node(i)
841 register_one_node(i); 841 register_one_node(i);
842 842
843 for_each_present_cpu(i) 843 for (i = 0; i < smp_height * smp_width; ++i)
844 register_cpu(&cpu_devices[i], i); 844 register_cpu(&cpu_devices[i], i);
845 845
846 return 0; 846 return 0;
diff --git a/arch/tile/lib/memchr_32.c b/arch/tile/lib/memchr_32.c
index 6235283b4859..cc3d9badf030 100644
--- a/arch/tile/lib/memchr_32.c
+++ b/arch/tile/lib/memchr_32.c
@@ -18,12 +18,24 @@
18 18
19void *memchr(const void *s, int c, size_t n) 19void *memchr(const void *s, int c, size_t n)
20{ 20{
21 const uint32_t *last_word_ptr;
22 const uint32_t *p;
23 const char *last_byte_ptr;
24 uintptr_t s_int;
25 uint32_t goal, before_mask, v, bits;
26 char *ret;
27
28 if (__builtin_expect(n == 0, 0)) {
29 /* Don't dereference any memory if the array is empty. */
30 return NULL;
31 }
32
21 /* Get an aligned pointer. */ 33 /* Get an aligned pointer. */
22 const uintptr_t s_int = (uintptr_t) s; 34 s_int = (uintptr_t) s;
23 const uint32_t *p = (const uint32_t *)(s_int & -4); 35 p = (const uint32_t *)(s_int & -4);
24 36
25 /* Create four copies of the byte for which we are looking. */ 37 /* Create four copies of the byte for which we are looking. */
26 const uint32_t goal = 0x01010101 * (uint8_t) c; 38 goal = 0x01010101 * (uint8_t) c;
27 39
28 /* Read the first word, but munge it so that bytes before the array 40 /* Read the first word, but munge it so that bytes before the array
29 * will not match goal. 41 * will not match goal.
@@ -31,23 +43,14 @@ void *memchr(const void *s, int c, size_t n)
31 * Note that this shift count expression works because we know 43 * Note that this shift count expression works because we know
32 * shift counts are taken mod 32. 44 * shift counts are taken mod 32.
33 */ 45 */
34 const uint32_t before_mask = (1 << (s_int << 3)) - 1; 46 before_mask = (1 << (s_int << 3)) - 1;
35 uint32_t v = (*p | before_mask) ^ (goal & before_mask); 47 v = (*p | before_mask) ^ (goal & before_mask);
36 48
37 /* Compute the address of the last byte. */ 49 /* Compute the address of the last byte. */
38 const char *const last_byte_ptr = (const char *)s + n - 1; 50 last_byte_ptr = (const char *)s + n - 1;
39 51
40 /* Compute the address of the word containing the last byte. */ 52 /* Compute the address of the word containing the last byte. */
41 const uint32_t *const last_word_ptr = 53 last_word_ptr = (const uint32_t *)((uintptr_t) last_byte_ptr & -4);
42 (const uint32_t *)((uintptr_t) last_byte_ptr & -4);
43
44 uint32_t bits;
45 char *ret;
46
47 if (__builtin_expect(n == 0, 0)) {
48 /* Don't dereference any memory if the array is empty. */
49 return NULL;
50 }
51 54
52 while ((bits = __insn_seqb(v, goal)) == 0) { 55 while ((bits = __insn_seqb(v, goal)) == 0) {
53 if (__builtin_expect(p == last_word_ptr, 0)) { 56 if (__builtin_expect(p == last_word_ptr, 0)) {
diff --git a/arch/tile/lib/spinlock_32.c b/arch/tile/lib/spinlock_32.c
index 485e24d62c6b..5cd1c4004eca 100644
--- a/arch/tile/lib/spinlock_32.c
+++ b/arch/tile/lib/spinlock_32.c
@@ -167,23 +167,30 @@ void arch_write_lock_slow(arch_rwlock_t *rwlock, u32 val)
167 * when we compare them. 167 * when we compare them.
168 */ 168 */
169 u32 my_ticket_; 169 u32 my_ticket_;
170 u32 iterations = 0;
170 171
171 /* Take out the next ticket; this will also stop would-be readers. */ 172 /*
172 if (val & 1) 173 * Wait until there are no readers, then bump up the next
173 val = get_rwlock(rwlock); 174 * field and capture the ticket value.
174 rwlock->lock = __insn_addb(val, 1 << WR_NEXT_SHIFT); 175 */
176 for (;;) {
177 if (!(val & 1)) {
178 if ((val >> RD_COUNT_SHIFT) == 0)
179 break;
180 rwlock->lock = val;
181 }
182 delay_backoff(iterations++);
183 val = __insn_tns((int *)&rwlock->lock);
184 }
175 185
176 /* Extract my ticket value from the original word. */ 186 /* Take out the next ticket and extract my ticket value. */
187 rwlock->lock = __insn_addb(val, 1 << WR_NEXT_SHIFT);
177 my_ticket_ = val >> WR_NEXT_SHIFT; 188 my_ticket_ = val >> WR_NEXT_SHIFT;
178 189
179 /* 190 /* Wait until the "current" field matches our ticket. */
180 * Wait until the "current" field matches our ticket, and
181 * there are no remaining readers.
182 */
183 for (;;) { 191 for (;;) {
184 u32 curr_ = val >> WR_CURR_SHIFT; 192 u32 curr_ = val >> WR_CURR_SHIFT;
185 u32 readers = val >> RD_COUNT_SHIFT; 193 u32 delta = ((my_ticket_ - curr_) & WR_MASK);
186 u32 delta = ((my_ticket_ - curr_) & WR_MASK) + !!readers;
187 if (likely(delta == 0)) 194 if (likely(delta == 0))
188 break; 195 break;
189 196
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index e8327686d3c5..e330da21b84f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -21,7 +21,7 @@ config X86
21 select HAVE_UNSTABLE_SCHED_CLOCK 21 select HAVE_UNSTABLE_SCHED_CLOCK
22 select HAVE_IDE 22 select HAVE_IDE
23 select HAVE_OPROFILE 23 select HAVE_OPROFILE
24 select HAVE_PERF_EVENTS if (!M386 && !M486) 24 select HAVE_PERF_EVENTS
25 select HAVE_IRQ_WORK 25 select HAVE_IRQ_WORK
26 select HAVE_IOREMAP_PROT 26 select HAVE_IOREMAP_PROT
27 select HAVE_KPROBES 27 select HAVE_KPROBES
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 3ea3dc487047..6b89f5e86021 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -128,7 +128,7 @@
128#define FAM10H_MMIO_CONF_ENABLE (1<<0) 128#define FAM10H_MMIO_CONF_ENABLE (1<<0)
129#define FAM10H_MMIO_CONF_BUSRANGE_MASK 0xf 129#define FAM10H_MMIO_CONF_BUSRANGE_MASK 0xf
130#define FAM10H_MMIO_CONF_BUSRANGE_SHIFT 2 130#define FAM10H_MMIO_CONF_BUSRANGE_SHIFT 2
131#define FAM10H_MMIO_CONF_BASE_MASK 0xfffffff 131#define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL
132#define FAM10H_MMIO_CONF_BASE_SHIFT 20 132#define FAM10H_MMIO_CONF_BASE_SHIFT 20
133#define MSR_FAM10H_NODE_ID 0xc001100c 133#define MSR_FAM10H_NODE_ID 0xc001100c
134 134
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 18e3b8a8709f..ef9975812c77 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -824,27 +824,27 @@ static __always_inline void arch_spin_unlock(struct arch_spinlock *lock)
824#define __PV_IS_CALLEE_SAVE(func) \ 824#define __PV_IS_CALLEE_SAVE(func) \
825 ((struct paravirt_callee_save) { func }) 825 ((struct paravirt_callee_save) { func })
826 826
827static inline unsigned long arch_local_save_flags(void) 827static inline notrace unsigned long arch_local_save_flags(void)
828{ 828{
829 return PVOP_CALLEE0(unsigned long, pv_irq_ops.save_fl); 829 return PVOP_CALLEE0(unsigned long, pv_irq_ops.save_fl);
830} 830}
831 831
832static inline void arch_local_irq_restore(unsigned long f) 832static inline notrace void arch_local_irq_restore(unsigned long f)
833{ 833{
834 PVOP_VCALLEE1(pv_irq_ops.restore_fl, f); 834 PVOP_VCALLEE1(pv_irq_ops.restore_fl, f);
835} 835}
836 836
837static inline void arch_local_irq_disable(void) 837static inline notrace void arch_local_irq_disable(void)
838{ 838{
839 PVOP_VCALLEE0(pv_irq_ops.irq_disable); 839 PVOP_VCALLEE0(pv_irq_ops.irq_disable);
840} 840}
841 841
842static inline void arch_local_irq_enable(void) 842static inline notrace void arch_local_irq_enable(void)
843{ 843{
844 PVOP_VCALLEE0(pv_irq_ops.irq_enable); 844 PVOP_VCALLEE0(pv_irq_ops.irq_enable);
845} 845}
846 846
847static inline unsigned long arch_local_irq_save(void) 847static inline notrace unsigned long arch_local_irq_save(void)
848{ 848{
849 unsigned long f; 849 unsigned long f;
850 850
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index e969f691cbfd..a501741c2335 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -199,6 +199,8 @@ union uvh_apicid {
199#define UVH_APICID 0x002D0E00L 199#define UVH_APICID 0x002D0E00L
200#define UV_APIC_PNODE_SHIFT 6 200#define UV_APIC_PNODE_SHIFT 6
201 201
202#define UV_APICID_HIBIT_MASK 0xffff0000
203
202/* Local Bus from cpu's perspective */ 204/* Local Bus from cpu's perspective */
203#define LOCAL_BUS_BASE 0x1c00000 205#define LOCAL_BUS_BASE 0x1c00000
204#define LOCAL_BUS_SIZE (4 * 1024 * 1024) 206#define LOCAL_BUS_SIZE (4 * 1024 * 1024)
@@ -491,8 +493,10 @@ static inline void uv_set_cpu_scir_bits(int cpu, unsigned char value)
491 } 493 }
492} 494}
493 495
496extern unsigned int uv_apicid_hibits;
494static unsigned long uv_hub_ipi_value(int apicid, int vector, int mode) 497static unsigned long uv_hub_ipi_value(int apicid, int vector, int mode)
495{ 498{
499 apicid |= uv_apicid_hibits;
496 return (1UL << UVH_IPI_INT_SEND_SHFT) | 500 return (1UL << UVH_IPI_INT_SEND_SHFT) |
497 ((apicid) << UVH_IPI_INT_APIC_ID_SHFT) | 501 ((apicid) << UVH_IPI_INT_APIC_ID_SHFT) |
498 (mode << UVH_IPI_INT_DELIVERY_MODE_SHFT) | 502 (mode << UVH_IPI_INT_DELIVERY_MODE_SHFT) |
diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h
index 6d90adf4428a..20cafeac7455 100644
--- a/arch/x86/include/asm/uv/uv_mmrs.h
+++ b/arch/x86/include/asm/uv/uv_mmrs.h
@@ -5,7 +5,7 @@
5 * 5 *
6 * SGI UV MMR definitions 6 * SGI UV MMR definitions
7 * 7 *
8 * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved. 8 * Copyright (C) 2007-2010 Silicon Graphics, Inc. All rights reserved.
9 */ 9 */
10 10
11#ifndef _ASM_X86_UV_UV_MMRS_H 11#ifndef _ASM_X86_UV_UV_MMRS_H
@@ -754,6 +754,23 @@ union uvh_lb_bau_sb_descriptor_base_u {
754}; 754};
755 755
756/* ========================================================================= */ 756/* ========================================================================= */
757/* UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK */
758/* ========================================================================= */
759#define UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK 0x320130UL
760#define UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK_32 0x009f0
761
762#define UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK_BIT_ENABLES_SHFT 0
763#define UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK_BIT_ENABLES_MASK 0x00000000ffffffffUL
764
765union uvh_lb_target_physical_apic_id_mask_u {
766 unsigned long v;
767 struct uvh_lb_target_physical_apic_id_mask_s {
768 unsigned long bit_enables : 32; /* RW */
769 unsigned long rsvd_32_63 : 32; /* */
770 } s;
771};
772
773/* ========================================================================= */
757/* UVH_NODE_ID */ 774/* UVH_NODE_ID */
758/* ========================================================================= */ 775/* ========================================================================= */
759#define UVH_NODE_ID 0x0UL 776#define UVH_NODE_ID 0x0UL
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index cefd6942f0e9..62f6e1e55b90 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -17,15 +17,16 @@
17#include <linux/nmi.h> 17#include <linux/nmi.h>
18#include <linux/module.h> 18#include <linux/module.h>
19 19
20/* For reliability, we're prepared to waste bits here. */
21static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
22
23u64 hw_nmi_get_sample_period(void) 20u64 hw_nmi_get_sample_period(void)
24{ 21{
25 return (u64)(cpu_khz) * 1000 * 60; 22 return (u64)(cpu_khz) * 1000 * 60;
26} 23}
27 24
28#ifdef ARCH_HAS_NMI_WATCHDOG 25#ifdef ARCH_HAS_NMI_WATCHDOG
26
27/* For reliability, we're prepared to waste bits here. */
28static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
29
29void arch_trigger_all_cpu_backtrace(void) 30void arch_trigger_all_cpu_backtrace(void)
30{ 31{
31 int i; 32 int i;
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 194539aea175..c1c52c341f40 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -44,6 +44,8 @@ static u64 gru_start_paddr, gru_end_paddr;
44static union uvh_apicid uvh_apicid; 44static union uvh_apicid uvh_apicid;
45int uv_min_hub_revision_id; 45int uv_min_hub_revision_id;
46EXPORT_SYMBOL_GPL(uv_min_hub_revision_id); 46EXPORT_SYMBOL_GPL(uv_min_hub_revision_id);
47unsigned int uv_apicid_hibits;
48EXPORT_SYMBOL_GPL(uv_apicid_hibits);
47static DEFINE_SPINLOCK(uv_nmi_lock); 49static DEFINE_SPINLOCK(uv_nmi_lock);
48 50
49static inline bool is_GRU_range(u64 start, u64 end) 51static inline bool is_GRU_range(u64 start, u64 end)
@@ -85,6 +87,23 @@ static void __init early_get_apic_pnode_shift(void)
85 uvh_apicid.s.pnode_shift = UV_APIC_PNODE_SHIFT; 87 uvh_apicid.s.pnode_shift = UV_APIC_PNODE_SHIFT;
86} 88}
87 89
90/*
91 * Add an extra bit as dictated by bios to the destination apicid of
92 * interrupts potentially passing through the UV HUB. This prevents
93 * a deadlock between interrupts and IO port operations.
94 */
95static void __init uv_set_apicid_hibit(void)
96{
97 union uvh_lb_target_physical_apic_id_mask_u apicid_mask;
98 unsigned long *mmr;
99
100 mmr = early_ioremap(UV_LOCAL_MMR_BASE |
101 UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK, sizeof(*mmr));
102 apicid_mask.v = *mmr;
103 early_iounmap(mmr, sizeof(*mmr));
104 uv_apicid_hibits = apicid_mask.s.bit_enables & UV_APICID_HIBIT_MASK;
105}
106
88static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) 107static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
89{ 108{
90 int nodeid; 109 int nodeid;
@@ -102,6 +121,7 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
102 __get_cpu_var(x2apic_extra_bits) = 121 __get_cpu_var(x2apic_extra_bits) =
103 nodeid << (uvh_apicid.s.pnode_shift - 1); 122 nodeid << (uvh_apicid.s.pnode_shift - 1);
104 uv_system_type = UV_NON_UNIQUE_APIC; 123 uv_system_type = UV_NON_UNIQUE_APIC;
124 uv_set_apicid_hibit();
105 return 1; 125 return 1;
106 } 126 }
107 } 127 }
@@ -155,6 +175,7 @@ static int __cpuinit uv_wakeup_secondary(int phys_apicid, unsigned long start_ri
155 int pnode; 175 int pnode;
156 176
157 pnode = uv_apicid_to_pnode(phys_apicid); 177 pnode = uv_apicid_to_pnode(phys_apicid);
178 phys_apicid |= uv_apicid_hibits;
158 val = (1UL << UVH_IPI_INT_SEND_SHFT) | 179 val = (1UL << UVH_IPI_INT_SEND_SHFT) |
159 (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) | 180 (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) |
160 ((start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) | 181 ((start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
@@ -236,7 +257,7 @@ static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask)
236 int cpu = cpumask_first(cpumask); 257 int cpu = cpumask_first(cpumask);
237 258
238 if ((unsigned)cpu < nr_cpu_ids) 259 if ((unsigned)cpu < nr_cpu_ids)
239 return per_cpu(x86_cpu_to_apicid, cpu); 260 return per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits;
240 else 261 else
241 return BAD_APICID; 262 return BAD_APICID;
242} 263}
@@ -255,7 +276,7 @@ uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
255 if (cpumask_test_cpu(cpu, cpu_online_mask)) 276 if (cpumask_test_cpu(cpu, cpu_online_mask))
256 break; 277 break;
257 } 278 }
258 return per_cpu(x86_cpu_to_apicid, cpu); 279 return per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits;
259} 280}
260 281
261static unsigned int x2apic_get_apic_id(unsigned long x) 282static unsigned int x2apic_get_apic_id(unsigned long x)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index ed6310183efb..6d75b9145b13 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -381,6 +381,20 @@ static void release_pmc_hardware(void) {}
381 381
382#endif 382#endif
383 383
384static bool check_hw_exists(void)
385{
386 u64 val, val_new = 0;
387 int ret = 0;
388
389 val = 0xabcdUL;
390 ret |= checking_wrmsrl(x86_pmu.perfctr, val);
391 ret |= rdmsrl_safe(x86_pmu.perfctr, &val_new);
392 if (ret || val != val_new)
393 return false;
394
395 return true;
396}
397
384static void reserve_ds_buffers(void); 398static void reserve_ds_buffers(void);
385static void release_ds_buffers(void); 399static void release_ds_buffers(void);
386 400
@@ -1372,6 +1386,12 @@ void __init init_hw_perf_events(void)
1372 1386
1373 pmu_check_apic(); 1387 pmu_check_apic();
1374 1388
1389 /* sanity check that the hardware exists or is emulated */
1390 if (!check_hw_exists()) {
1391 pr_cont("Broken PMU hardware detected, software events only.\n");
1392 return;
1393 }
1394
1375 pr_cont("%s PMU driver.\n", x86_pmu.name); 1395 pr_cont("%s PMU driver.\n", x86_pmu.name);
1376 1396
1377 if (x86_pmu.quirks) 1397 if (x86_pmu.quirks)
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 59e175e89599..591e60104278 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -395,7 +395,7 @@ sysenter_past_esp:
395 * A tiny bit of offset fixup is necessary - 4*4 means the 4 words 395 * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
396 * pushed above; +8 corresponds to copy_thread's esp0 setting. 396 * pushed above; +8 corresponds to copy_thread's esp0 setting.
397 */ 397 */
398 pushl_cfi (TI_sysenter_return-THREAD_SIZE_asm+8+4*4)(%esp) 398 pushl_cfi ((TI_sysenter_return)-THREAD_SIZE_asm+8+4*4)(%esp)
399 CFI_REL_OFFSET eip, 0 399 CFI_REL_OFFSET eip, 0
400 400
401 pushl_cfi %eax 401 pushl_cfi %eax
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index fe2690d71c0c..e3ba417e8697 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -295,6 +295,7 @@ ENDPROC(native_usergs_sysret64)
295 .endm 295 .endm
296 296
297/* save partial stack frame */ 297/* save partial stack frame */
298 .pushsection .kprobes.text, "ax"
298ENTRY(save_args) 299ENTRY(save_args)
299 XCPT_FRAME 300 XCPT_FRAME
300 cld 301 cld
@@ -334,6 +335,7 @@ ENTRY(save_args)
334 ret 335 ret
335 CFI_ENDPROC 336 CFI_ENDPROC
336END(save_args) 337END(save_args)
338 .popsection
337 339
338ENTRY(save_rest) 340ENTRY(save_rest)
339 PARTIAL_FRAME 1 REST_SKIP+8 341 PARTIAL_FRAME 1 REST_SKIP+8
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index ff15c9dcc25d..42c594254507 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -433,6 +433,10 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)
433 dr6_p = (unsigned long *)ERR_PTR(args->err); 433 dr6_p = (unsigned long *)ERR_PTR(args->err);
434 dr6 = *dr6_p; 434 dr6 = *dr6_p;
435 435
436 /* If it's a single step, TRAP bits are random */
437 if (dr6 & DR_STEP)
438 return NOTIFY_DONE;
439
436 /* Do an early return if no trap bits are set in DR6 */ 440 /* Do an early return if no trap bits are set in DR6 */
437 if ((dr6 & DR_TRAP_BITS) == 0) 441 if ((dr6 & DR_TRAP_BITS) == 0)
438 return NOTIFY_DONE; 442 return NOTIFY_DONE;
diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c
index 6da143c2a6b8..ac861b8348e2 100644
--- a/arch/x86/kernel/mmconf-fam10h_64.c
+++ b/arch/x86/kernel/mmconf-fam10h_64.c
@@ -25,7 +25,6 @@ struct pci_hostbridge_probe {
25}; 25};
26 26
27static u64 __cpuinitdata fam10h_pci_mmconf_base; 27static u64 __cpuinitdata fam10h_pci_mmconf_base;
28static int __cpuinitdata fam10h_pci_mmconf_base_status;
29 28
30static struct pci_hostbridge_probe pci_probes[] __cpuinitdata = { 29static struct pci_hostbridge_probe pci_probes[] __cpuinitdata = {
31 { 0, 0x18, PCI_VENDOR_ID_AMD, 0x1200 }, 30 { 0, 0x18, PCI_VENDOR_ID_AMD, 0x1200 },
@@ -44,10 +43,12 @@ static int __cpuinit cmp_range(const void *x1, const void *x2)
44 return start1 - start2; 43 return start1 - start2;
45} 44}
46 45
47/*[47:0] */ 46#define MMCONF_UNIT (1ULL << FAM10H_MMIO_CONF_BASE_SHIFT)
48/* need to avoid (0xfd<<32) and (0xfe<<32), ht used space */ 47#define MMCONF_MASK (~(MMCONF_UNIT - 1))
48#define MMCONF_SIZE (MMCONF_UNIT << 8)
49/* need to avoid (0xfd<<32), (0xfe<<32), and (0xff<<32), ht used space */
49#define FAM10H_PCI_MMCONF_BASE (0xfcULL<<32) 50#define FAM10H_PCI_MMCONF_BASE (0xfcULL<<32)
50#define BASE_VALID(b) ((b != (0xfdULL << 32)) && (b != (0xfeULL << 32))) 51#define BASE_VALID(b) ((b) + MMCONF_SIZE <= (0xfdULL<<32) || (b) >= (1ULL<<40))
51static void __cpuinit get_fam10h_pci_mmconf_base(void) 52static void __cpuinit get_fam10h_pci_mmconf_base(void)
52{ 53{
53 int i; 54 int i;
@@ -64,12 +65,11 @@ static void __cpuinit get_fam10h_pci_mmconf_base(void)
64 struct range range[8]; 65 struct range range[8];
65 66
66 /* only try to get setting from BSP */ 67 /* only try to get setting from BSP */
67 /* -1 or 1 */ 68 if (fam10h_pci_mmconf_base)
68 if (fam10h_pci_mmconf_base_status)
69 return; 69 return;
70 70
71 if (!early_pci_allowed()) 71 if (!early_pci_allowed())
72 goto fail; 72 return;
73 73
74 found = 0; 74 found = 0;
75 for (i = 0; i < ARRAY_SIZE(pci_probes); i++) { 75 for (i = 0; i < ARRAY_SIZE(pci_probes); i++) {
@@ -91,7 +91,7 @@ static void __cpuinit get_fam10h_pci_mmconf_base(void)
91 } 91 }
92 92
93 if (!found) 93 if (!found)
94 goto fail; 94 return;
95 95
96 /* SYS_CFG */ 96 /* SYS_CFG */
97 address = MSR_K8_SYSCFG; 97 address = MSR_K8_SYSCFG;
@@ -99,16 +99,16 @@ static void __cpuinit get_fam10h_pci_mmconf_base(void)
99 99
100 /* TOP_MEM2 is not enabled? */ 100 /* TOP_MEM2 is not enabled? */
101 if (!(val & (1<<21))) { 101 if (!(val & (1<<21))) {
102 tom2 = 0; 102 tom2 = 1ULL << 32;
103 } else { 103 } else {
104 /* TOP_MEM2 */ 104 /* TOP_MEM2 */
105 address = MSR_K8_TOP_MEM2; 105 address = MSR_K8_TOP_MEM2;
106 rdmsrl(address, val); 106 rdmsrl(address, val);
107 tom2 = val & (0xffffULL<<32); 107 tom2 = max(val & 0xffffff800000ULL, 1ULL << 32);
108 } 108 }
109 109
110 if (base <= tom2) 110 if (base <= tom2)
111 base = tom2 + (1ULL<<32); 111 base = (tom2 + 2 * MMCONF_UNIT - 1) & MMCONF_MASK;
112 112
113 /* 113 /*
114 * need to check if the range is in the high mmio range that is 114 * need to check if the range is in the high mmio range that is
@@ -123,11 +123,11 @@ static void __cpuinit get_fam10h_pci_mmconf_base(void)
123 if (!(reg & 3)) 123 if (!(reg & 3))
124 continue; 124 continue;
125 125
126 start = (((u64)reg) << 8) & (0xffULL << 32); /* 39:16 on 31:8*/ 126 start = (u64)(reg & 0xffffff00) << 8; /* 39:16 on 31:8*/
127 reg = read_pci_config(bus, slot, 1, 0x84 + (i << 3)); 127 reg = read_pci_config(bus, slot, 1, 0x84 + (i << 3));
128 end = (((u64)reg) << 8) & (0xffULL << 32); /* 39:16 on 31:8*/ 128 end = ((u64)(reg & 0xffffff00) << 8) | 0xffff; /* 39:16 on 31:8*/
129 129
130 if (!end) 130 if (end < tom2)
131 continue; 131 continue;
132 132
133 range[hi_mmio_num].start = start; 133 range[hi_mmio_num].start = start;
@@ -143,32 +143,27 @@ static void __cpuinit get_fam10h_pci_mmconf_base(void)
143 143
144 if (range[hi_mmio_num - 1].end < base) 144 if (range[hi_mmio_num - 1].end < base)
145 goto out; 145 goto out;
146 if (range[0].start > base) 146 if (range[0].start > base + MMCONF_SIZE)
147 goto out; 147 goto out;
148 148
149 /* need to find one window */ 149 /* need to find one window */
150 base = range[0].start - (1ULL << 32); 150 base = (range[0].start & MMCONF_MASK) - MMCONF_UNIT;
151 if ((base > tom2) && BASE_VALID(base)) 151 if ((base > tom2) && BASE_VALID(base))
152 goto out; 152 goto out;
153 base = range[hi_mmio_num - 1].end + (1ULL << 32); 153 base = (range[hi_mmio_num - 1].end + MMCONF_UNIT) & MMCONF_MASK;
154 if ((base > tom2) && BASE_VALID(base)) 154 if (BASE_VALID(base))
155 goto out; 155 goto out;
156 /* need to find window between ranges */ 156 /* need to find window between ranges */
157 if (hi_mmio_num > 1) 157 for (i = 1; i < hi_mmio_num; i++) {
158 for (i = 0; i < hi_mmio_num - 1; i++) { 158 base = (range[i - 1].end + MMCONF_UNIT) & MMCONF_MASK;
159 if (range[i + 1].start > (range[i].end + (1ULL << 32))) { 159 val = range[i].start & MMCONF_MASK;
160 base = range[i].end + (1ULL << 32); 160 if (val >= base + MMCONF_SIZE && BASE_VALID(base))
161 if ((base > tom2) && BASE_VALID(base)) 161 goto out;
162 goto out;
163 }
164 } 162 }
165
166fail:
167 fam10h_pci_mmconf_base_status = -1;
168 return; 163 return;
164
169out: 165out:
170 fam10h_pci_mmconf_base = base; 166 fam10h_pci_mmconf_base = base;
171 fam10h_pci_mmconf_base_status = 1;
172} 167}
173 168
174void __cpuinit fam10h_check_enable_mmcfg(void) 169void __cpuinit fam10h_check_enable_mmcfg(void)
@@ -190,11 +185,10 @@ void __cpuinit fam10h_check_enable_mmcfg(void)
190 185
191 /* only trust the one handle 256 buses, if acpi=off */ 186 /* only trust the one handle 256 buses, if acpi=off */
192 if (!acpi_pci_disabled || busnbits >= 8) { 187 if (!acpi_pci_disabled || busnbits >= 8) {
193 u64 base; 188 u64 base = val & MMCONF_MASK;
194 base = val & (0xffffULL << 32); 189
195 if (fam10h_pci_mmconf_base_status <= 0) { 190 if (!fam10h_pci_mmconf_base) {
196 fam10h_pci_mmconf_base = base; 191 fam10h_pci_mmconf_base = base;
197 fam10h_pci_mmconf_base_status = 1;
198 return; 192 return;
199 } else if (fam10h_pci_mmconf_base == base) 193 } else if (fam10h_pci_mmconf_base == base)
200 return; 194 return;
@@ -206,8 +200,10 @@ void __cpuinit fam10h_check_enable_mmcfg(void)
206 * with 256 buses 200 * with 256 buses
207 */ 201 */
208 get_fam10h_pci_mmconf_base(); 202 get_fam10h_pci_mmconf_base();
209 if (fam10h_pci_mmconf_base_status <= 0) 203 if (!fam10h_pci_mmconf_base) {
204 pci_probe &= ~PCI_CHECK_ENABLE_AMD_MMCONF;
210 return; 205 return;
206 }
211 207
212 printk(KERN_INFO "Enable MMCONFIG on AMD Family 10h\n"); 208 printk(KERN_INFO "Enable MMCONFIG on AMD Family 10h\n");
213 val &= ~((FAM10H_MMIO_CONF_BASE_MASK<<FAM10H_MMIO_CONF_BASE_SHIFT) | 209 val &= ~((FAM10H_MMIO_CONF_BASE_MASK<<FAM10H_MMIO_CONF_BASE_SHIFT) |
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 12cdbb17ad18..6acc724d5d8f 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -223,7 +223,7 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
223 223
224static void __cpuinit calculate_tlb_offset(void) 224static void __cpuinit calculate_tlb_offset(void)
225{ 225{
226 int cpu, node, nr_node_vecs; 226 int cpu, node, nr_node_vecs, idx = 0;
227 /* 227 /*
228 * we are changing tlb_vector_offset for each CPU in runtime, but this 228 * we are changing tlb_vector_offset for each CPU in runtime, but this
229 * will not cause inconsistency, as the write is atomic under X86. we 229 * will not cause inconsistency, as the write is atomic under X86. we
@@ -239,7 +239,7 @@ static void __cpuinit calculate_tlb_offset(void)
239 nr_node_vecs = NUM_INVALIDATE_TLB_VECTORS/nr_online_nodes; 239 nr_node_vecs = NUM_INVALIDATE_TLB_VECTORS/nr_online_nodes;
240 240
241 for_each_online_node(node) { 241 for_each_online_node(node) {
242 int node_offset = (node % NUM_INVALIDATE_TLB_VECTORS) * 242 int node_offset = (idx % NUM_INVALIDATE_TLB_VECTORS) *
243 nr_node_vecs; 243 nr_node_vecs;
244 int cpu_offset = 0; 244 int cpu_offset = 0;
245 for_each_cpu(cpu, cpumask_of_node(node)) { 245 for_each_cpu(cpu, cpumask_of_node(node)) {
@@ -248,6 +248,7 @@ static void __cpuinit calculate_tlb_offset(void)
248 cpu_offset++; 248 cpu_offset++;
249 cpu_offset = cpu_offset % nr_node_vecs; 249 cpu_offset = cpu_offset % nr_node_vecs;
250 } 250 }
251 idx++;
251 } 252 }
252} 253}
253 254
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index a318194002b5..ba9caa808a9c 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -1455,7 +1455,7 @@ static void __init uv_init_uvhub(int uvhub, int vector)
1455 * the below initialization can't be in firmware because the 1455 * the below initialization can't be in firmware because the
1456 * messaging IRQ will be determined by the OS 1456 * messaging IRQ will be determined by the OS
1457 */ 1457 */
1458 apicid = uvhub_to_first_apicid(uvhub); 1458 apicid = uvhub_to_first_apicid(uvhub) | uv_apicid_hibits;
1459 uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, 1459 uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG,
1460 ((apicid << 32) | vector)); 1460 ((apicid << 32) | vector));
1461} 1461}
diff --git a/arch/x86/platform/uv/uv_time.c b/arch/x86/platform/uv/uv_time.c
index 56e421bc379b..9daf5d1af9f1 100644
--- a/arch/x86/platform/uv/uv_time.c
+++ b/arch/x86/platform/uv/uv_time.c
@@ -89,6 +89,7 @@ static void uv_rtc_send_IPI(int cpu)
89 89
90 apicid = cpu_physical_id(cpu); 90 apicid = cpu_physical_id(cpu);
91 pnode = uv_apicid_to_pnode(apicid); 91 pnode = uv_apicid_to_pnode(apicid);
92 apicid |= uv_apicid_hibits;
92 val = (1UL << UVH_IPI_INT_SEND_SHFT) | 93 val = (1UL << UVH_IPI_INT_SEND_SHFT) |
93 (apicid << UVH_IPI_INT_APIC_ID_SHFT) | 94 (apicid << UVH_IPI_INT_APIC_ID_SHFT) |
94 (X86_PLATFORM_IPI_VECTOR << UVH_IPI_INT_VECTOR_SHFT); 95 (X86_PLATFORM_IPI_VECTOR << UVH_IPI_INT_VECTOR_SHFT);
@@ -107,6 +108,7 @@ static int uv_intr_pending(int pnode)
107static int uv_setup_intr(int cpu, u64 expires) 108static int uv_setup_intr(int cpu, u64 expires)
108{ 109{
109 u64 val; 110 u64 val;
111 unsigned long apicid = cpu_physical_id(cpu) | uv_apicid_hibits;
110 int pnode = uv_cpu_to_pnode(cpu); 112 int pnode = uv_cpu_to_pnode(cpu);
111 113
112 uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG, 114 uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG,
@@ -117,7 +119,7 @@ static int uv_setup_intr(int cpu, u64 expires)
117 UVH_EVENT_OCCURRED0_RTC1_MASK); 119 UVH_EVENT_OCCURRED0_RTC1_MASK);
118 120
119 val = (X86_PLATFORM_IPI_VECTOR << UVH_RTC1_INT_CONFIG_VECTOR_SHFT) | 121 val = (X86_PLATFORM_IPI_VECTOR << UVH_RTC1_INT_CONFIG_VECTOR_SHFT) |
120 ((u64)cpu_physical_id(cpu) << UVH_RTC1_INT_CONFIG_APIC_ID_SHFT); 122 ((u64)apicid << UVH_RTC1_INT_CONFIG_APIC_ID_SHFT);
121 123
122 /* Set configuration */ 124 /* Set configuration */
123 uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG, val); 125 uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG, val);
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 7250bef7f49e..02c710bebf7a 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1200,8 +1200,6 @@ asmlinkage void __init xen_start_kernel(void)
1200 /* Allocate and initialize top and mid mfn levels for p2m structure */ 1200 /* Allocate and initialize top and mid mfn levels for p2m structure */
1201 xen_build_mfn_list_list(); 1201 xen_build_mfn_list_list();
1202 1202
1203 init_mm.pgd = pgd;
1204
1205 /* keep using Xen gdt for now; no urgent need to change it */ 1203 /* keep using Xen gdt for now; no urgent need to change it */
1206 1204
1207#ifdef CONFIG_X86_32 1205#ifdef CONFIG_X86_32
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 790af908284e..a1feff9e59b6 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -2133,44 +2133,83 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
2133 return pgd; 2133 return pgd;
2134} 2134}
2135#else /* !CONFIG_X86_64 */ 2135#else /* !CONFIG_X86_64 */
2136static RESERVE_BRK_ARRAY(pmd_t, level2_kernel_pgt, PTRS_PER_PMD); 2136static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD);
2137static RESERVE_BRK_ARRAY(pmd_t, swapper_kernel_pmd, PTRS_PER_PMD);
2138
2139static __init void xen_write_cr3_init(unsigned long cr3)
2140{
2141 unsigned long pfn = PFN_DOWN(__pa(swapper_pg_dir));
2142
2143 BUG_ON(read_cr3() != __pa(initial_page_table));
2144 BUG_ON(cr3 != __pa(swapper_pg_dir));
2145
2146 /*
2147 * We are switching to swapper_pg_dir for the first time (from
2148 * initial_page_table) and therefore need to mark that page
2149 * read-only and then pin it.
2150 *
2151 * Xen disallows sharing of kernel PMDs for PAE
2152 * guests. Therefore we must copy the kernel PMD from
2153 * initial_page_table into a new kernel PMD to be used in
2154 * swapper_pg_dir.
2155 */
2156 swapper_kernel_pmd =
2157 extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE);
2158 memcpy(swapper_kernel_pmd, initial_kernel_pmd,
2159 sizeof(pmd_t) * PTRS_PER_PMD);
2160 swapper_pg_dir[KERNEL_PGD_BOUNDARY] =
2161 __pgd(__pa(swapper_kernel_pmd) | _PAGE_PRESENT);
2162 set_page_prot(swapper_kernel_pmd, PAGE_KERNEL_RO);
2163
2164 set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO);
2165 xen_write_cr3(cr3);
2166 pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, pfn);
2167
2168 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE,
2169 PFN_DOWN(__pa(initial_page_table)));
2170 set_page_prot(initial_page_table, PAGE_KERNEL);
2171 set_page_prot(initial_kernel_pmd, PAGE_KERNEL);
2172
2173 pv_mmu_ops.write_cr3 = &xen_write_cr3;
2174}
2137 2175
2138__init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, 2176__init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
2139 unsigned long max_pfn) 2177 unsigned long max_pfn)
2140{ 2178{
2141 pmd_t *kernel_pmd; 2179 pmd_t *kernel_pmd;
2142 2180
2143 level2_kernel_pgt = extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE); 2181 initial_kernel_pmd =
2182 extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE);
2144 2183
2145 max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->pt_base) + 2184 max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->pt_base) +
2146 xen_start_info->nr_pt_frames * PAGE_SIZE + 2185 xen_start_info->nr_pt_frames * PAGE_SIZE +
2147 512*1024); 2186 512*1024);
2148 2187
2149 kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); 2188 kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd);
2150 memcpy(level2_kernel_pgt, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD); 2189 memcpy(initial_kernel_pmd, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD);
2151 2190
2152 xen_map_identity_early(level2_kernel_pgt, max_pfn); 2191 xen_map_identity_early(initial_kernel_pmd, max_pfn);
2153 2192
2154 memcpy(swapper_pg_dir, pgd, sizeof(pgd_t) * PTRS_PER_PGD); 2193 memcpy(initial_page_table, pgd, sizeof(pgd_t) * PTRS_PER_PGD);
2155 set_pgd(&swapper_pg_dir[KERNEL_PGD_BOUNDARY], 2194 initial_page_table[KERNEL_PGD_BOUNDARY] =
2156 __pgd(__pa(level2_kernel_pgt) | _PAGE_PRESENT)); 2195 __pgd(__pa(initial_kernel_pmd) | _PAGE_PRESENT);
2157 2196
2158 set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); 2197 set_page_prot(initial_kernel_pmd, PAGE_KERNEL_RO);
2159 set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO); 2198 set_page_prot(initial_page_table, PAGE_KERNEL_RO);
2160 set_page_prot(empty_zero_page, PAGE_KERNEL_RO); 2199 set_page_prot(empty_zero_page, PAGE_KERNEL_RO);
2161 2200
2162 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); 2201 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
2163 2202
2164 xen_write_cr3(__pa(swapper_pg_dir)); 2203 pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE,
2165 2204 PFN_DOWN(__pa(initial_page_table)));
2166 pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(swapper_pg_dir))); 2205 xen_write_cr3(__pa(initial_page_table));
2167 2206
2168 memblock_x86_reserve_range(__pa(xen_start_info->pt_base), 2207 memblock_x86_reserve_range(__pa(xen_start_info->pt_base),
2169 __pa(xen_start_info->pt_base + 2208 __pa(xen_start_info->pt_base +
2170 xen_start_info->nr_pt_frames * PAGE_SIZE), 2209 xen_start_info->nr_pt_frames * PAGE_SIZE),
2171 "XEN PAGETABLES"); 2210 "XEN PAGETABLES");
2172 2211
2173 return swapper_pg_dir; 2212 return initial_page_table;
2174} 2213}
2175#endif /* CONFIG_X86_64 */ 2214#endif /* CONFIG_X86_64 */
2176 2215
@@ -2304,7 +2343,11 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
2304 .write_cr2 = xen_write_cr2, 2343 .write_cr2 = xen_write_cr2,
2305 2344
2306 .read_cr3 = xen_read_cr3, 2345 .read_cr3 = xen_read_cr3,
2346#ifdef CONFIG_X86_32
2347 .write_cr3 = xen_write_cr3_init,
2348#else
2307 .write_cr3 = xen_write_cr3, 2349 .write_cr3 = xen_write_cr3,
2350#endif
2308 2351
2309 .flush_tlb_user = xen_flush_tlb, 2352 .flush_tlb_user = xen_flush_tlb,
2310 .flush_tlb_kernel = xen_flush_tlb, 2353 .flush_tlb_kernel = xen_flush_tlb,
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 38fdffaa71d3..01afd8a94607 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -23,7 +23,6 @@
23#include <xen/interface/callback.h> 23#include <xen/interface/callback.h>
24#include <xen/interface/memory.h> 24#include <xen/interface/memory.h>
25#include <xen/interface/physdev.h> 25#include <xen/interface/physdev.h>
26#include <xen/interface/memory.h>
27#include <xen/features.h> 26#include <xen/features.h>
28 27
29#include "xen-ops.h" 28#include "xen-ops.h"
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 56ad4531b412..004be80fd894 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -645,7 +645,7 @@ static int throtl_dispatch_tg(struct throtl_data *td, struct throtl_grp *tg,
645{ 645{
646 unsigned int nr_reads = 0, nr_writes = 0; 646 unsigned int nr_reads = 0, nr_writes = 0;
647 unsigned int max_nr_reads = throtl_grp_quantum*3/4; 647 unsigned int max_nr_reads = throtl_grp_quantum*3/4;
648 unsigned int max_nr_writes = throtl_grp_quantum - nr_reads; 648 unsigned int max_nr_writes = throtl_grp_quantum - max_nr_reads;
649 struct bio *bio; 649 struct bio *bio;
650 650
651 /* Try to dispatch 75% READS and 25% WRITES */ 651 /* Try to dispatch 75% READS and 25% WRITES */
diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index a1725e6488d3..7888501ad9ee 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c
@@ -1341,7 +1341,7 @@ static struct request *set_next_request(void)
1341{ 1341{
1342 struct request_queue *q; 1342 struct request_queue *q;
1343 int cnt = FD_MAX_UNITS; 1343 int cnt = FD_MAX_UNITS;
1344 struct request *rq; 1344 struct request *rq = NULL;
1345 1345
1346 /* Find next queue we can dispatch from */ 1346 /* Find next queue we can dispatch from */
1347 fdc_queue = fdc_queue + 1; 1347 fdc_queue = fdc_queue + 1;
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index 4e4cc6c828cb..605a67e40bbf 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c
@@ -1399,7 +1399,7 @@ static struct request *set_next_request(void)
1399{ 1399{
1400 struct request_queue *q; 1400 struct request_queue *q;
1401 int old_pos = fdc_queue; 1401 int old_pos = fdc_queue;
1402 struct request *rq; 1402 struct request *rq = NULL;
1403 1403
1404 do { 1404 do {
1405 q = unit[fdc_queue].disk->queue; 1405 q = unit[fdc_queue].disk->queue;
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index a67d0a611a8a..f291587d753e 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -66,6 +66,7 @@ MODULE_VERSION("3.6.26");
66MODULE_LICENSE("GPL"); 66MODULE_LICENSE("GPL");
67 67
68static DEFINE_MUTEX(cciss_mutex); 68static DEFINE_MUTEX(cciss_mutex);
69static struct proc_dir_entry *proc_cciss;
69 70
70#include "cciss_cmd.h" 71#include "cciss_cmd.h"
71#include "cciss.h" 72#include "cciss.h"
@@ -363,8 +364,6 @@ static const char *raid_label[] = { "0", "4", "1(1+0)", "5", "5+1", "ADG",
363#define ENG_GIG_FACTOR (ENG_GIG/512) 364#define ENG_GIG_FACTOR (ENG_GIG/512)
364#define ENGAGE_SCSI "engage scsi" 365#define ENGAGE_SCSI "engage scsi"
365 366
366static struct proc_dir_entry *proc_cciss;
367
368static void cciss_seq_show_header(struct seq_file *seq) 367static void cciss_seq_show_header(struct seq_file *seq)
369{ 368{
370 ctlr_info_t *h = seq->private; 369 ctlr_info_t *h = seq->private;
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 255035cfc88a..4f9e22f29138 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -65,7 +65,7 @@ enum blkif_state {
65 65
66struct blk_shadow { 66struct blk_shadow {
67 struct blkif_request req; 67 struct blkif_request req;
68 unsigned long request; 68 struct request *request;
69 unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 69 unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST];
70}; 70};
71 71
@@ -136,7 +136,7 @@ static void add_id_to_freelist(struct blkfront_info *info,
136 unsigned long id) 136 unsigned long id)
137{ 137{
138 info->shadow[id].req.id = info->shadow_free; 138 info->shadow[id].req.id = info->shadow_free;
139 info->shadow[id].request = 0; 139 info->shadow[id].request = NULL;
140 info->shadow_free = id; 140 info->shadow_free = id;
141} 141}
142 142
@@ -245,14 +245,11 @@ static int blkif_ioctl(struct block_device *bdev, fmode_t mode,
245} 245}
246 246
247/* 247/*
248 * blkif_queue_request 248 * Generate a Xen blkfront IO request from a blk layer request. Reads
249 * and writes are handled as expected. Since we lack a loose flush
250 * request, we map flushes into a full ordered barrier.
249 * 251 *
250 * request block io 252 * @req: a request struct
251 *
252 * id: for guest use only.
253 * operation: BLKIF_OP_{READ,WRITE,PROBE}
254 * buffer: buffer to read/write into. this should be a
255 * virtual address in the guest os.
256 */ 253 */
257static int blkif_queue_request(struct request *req) 254static int blkif_queue_request(struct request *req)
258{ 255{
@@ -281,7 +278,7 @@ static int blkif_queue_request(struct request *req)
281 /* Fill out a communications ring structure. */ 278 /* Fill out a communications ring structure. */
282 ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); 279 ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
283 id = get_id_from_freelist(info); 280 id = get_id_from_freelist(info);
284 info->shadow[id].request = (unsigned long)req; 281 info->shadow[id].request = req;
285 282
286 ring_req->id = id; 283 ring_req->id = id;
287 ring_req->sector_number = (blkif_sector_t)blk_rq_pos(req); 284 ring_req->sector_number = (blkif_sector_t)blk_rq_pos(req);
@@ -290,6 +287,18 @@ static int blkif_queue_request(struct request *req)
290 ring_req->operation = rq_data_dir(req) ? 287 ring_req->operation = rq_data_dir(req) ?
291 BLKIF_OP_WRITE : BLKIF_OP_READ; 288 BLKIF_OP_WRITE : BLKIF_OP_READ;
292 289
290 if (req->cmd_flags & (REQ_FLUSH | REQ_FUA)) {
291 /*
292 * Ideally we could just do an unordered
293 * flush-to-disk, but all we have is a full write
294 * barrier at the moment. However, a barrier write is
295 * a superset of FUA, so we can implement it the same
296 * way. (It's also a FLUSH+FUA, since it is
297 * guaranteed ordered WRT previous writes.)
298 */
299 ring_req->operation = BLKIF_OP_WRITE_BARRIER;
300 }
301
293 ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg); 302 ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg);
294 BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST); 303 BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST);
295 304
@@ -634,7 +643,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
634 643
635 bret = RING_GET_RESPONSE(&info->ring, i); 644 bret = RING_GET_RESPONSE(&info->ring, i);
636 id = bret->id; 645 id = bret->id;
637 req = (struct request *)info->shadow[id].request; 646 req = info->shadow[id].request;
638 647
639 blkif_completion(&info->shadow[id]); 648 blkif_completion(&info->shadow[id]);
640 649
@@ -647,6 +656,16 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
647 printk(KERN_WARNING "blkfront: %s: write barrier op failed\n", 656 printk(KERN_WARNING "blkfront: %s: write barrier op failed\n",
648 info->gd->disk_name); 657 info->gd->disk_name);
649 error = -EOPNOTSUPP; 658 error = -EOPNOTSUPP;
659 }
660 if (unlikely(bret->status == BLKIF_RSP_ERROR &&
661 info->shadow[id].req.nr_segments == 0)) {
662 printk(KERN_WARNING "blkfront: %s: empty write barrier op failed\n",
663 info->gd->disk_name);
664 error = -EOPNOTSUPP;
665 }
666 if (unlikely(error)) {
667 if (error == -EOPNOTSUPP)
668 error = 0;
650 info->feature_flush = 0; 669 info->feature_flush = 0;
651 xlvbd_flush(info); 670 xlvbd_flush(info);
652 } 671 }
@@ -899,7 +918,7 @@ static int blkif_recover(struct blkfront_info *info)
899 /* Stage 3: Find pending requests and requeue them. */ 918 /* Stage 3: Find pending requests and requeue them. */
900 for (i = 0; i < BLK_RING_SIZE; i++) { 919 for (i = 0; i < BLK_RING_SIZE; i++) {
901 /* Not in use? */ 920 /* Not in use? */
902 if (copy[i].request == 0) 921 if (!copy[i].request)
903 continue; 922 continue;
904 923
905 /* Grab a request slot and copy shadow state into it. */ 924 /* Grab a request slot and copy shadow state into it. */
@@ -916,9 +935,7 @@ static int blkif_recover(struct blkfront_info *info)
916 req->seg[j].gref, 935 req->seg[j].gref,
917 info->xbdev->otherend_id, 936 info->xbdev->otherend_id,
918 pfn_to_mfn(info->shadow[req->id].frame[j]), 937 pfn_to_mfn(info->shadow[req->id].frame[j]),
919 rq_data_dir( 938 rq_data_dir(info->shadow[req->id].request));
920 (struct request *)
921 info->shadow[req->id].request));
922 info->shadow[req->id].req = *req; 939 info->shadow[req->id].req = *req;
923 940
924 info->ring.req_prod_pvt++; 941 info->ring.req_prod_pvt++;
@@ -1067,14 +1084,8 @@ static void blkfront_connect(struct blkfront_info *info)
1067 */ 1084 */
1068 info->feature_flush = 0; 1085 info->feature_flush = 0;
1069 1086
1070 /*
1071 * The driver doesn't properly handled empty flushes, so
1072 * lets disable barrier support for now.
1073 */
1074#if 0
1075 if (!err && barrier) 1087 if (!err && barrier)
1076 info->feature_flush = REQ_FLUSH; 1088 info->feature_flush = REQ_FLUSH | REQ_FUA;
1077#endif
1078 1089
1079 err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size); 1090 err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
1080 if (err) { 1091 if (err) {
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index b3781399b38a..ba2898b3639b 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -10,16 +10,16 @@ obj-$(CONFIG_EDAC) := edac_stub.o
10obj-$(CONFIG_EDAC_MM_EDAC) += edac_core.o 10obj-$(CONFIG_EDAC_MM_EDAC) += edac_core.o
11obj-$(CONFIG_EDAC_MCE) += edac_mce.o 11obj-$(CONFIG_EDAC_MCE) += edac_mce.o
12 12
13edac_core-objs := edac_mc.o edac_device.o edac_mc_sysfs.o edac_pci_sysfs.o 13edac_core-y := edac_mc.o edac_device.o edac_mc_sysfs.o edac_pci_sysfs.o
14edac_core-objs += edac_module.o edac_device_sysfs.o 14edac_core-y += edac_module.o edac_device_sysfs.o
15 15
16ifdef CONFIG_PCI 16ifdef CONFIG_PCI
17edac_core-objs += edac_pci.o edac_pci_sysfs.o 17edac_core-y += edac_pci.o edac_pci_sysfs.o
18endif 18endif
19 19
20obj-$(CONFIG_EDAC_MCE_INJ) += mce_amd_inj.o 20obj-$(CONFIG_EDAC_MCE_INJ) += mce_amd_inj.o
21 21
22edac_mce_amd-objs := mce_amd.o 22edac_mce_amd-y := mce_amd.o
23obj-$(CONFIG_EDAC_DECODE_MCE) += edac_mce_amd.o 23obj-$(CONFIG_EDAC_DECODE_MCE) += edac_mce_amd.o
24 24
25obj-$(CONFIG_EDAC_AMD76X) += amd76x_edac.o 25obj-$(CONFIG_EDAC_AMD76X) += amd76x_edac.o
diff --git a/drivers/edac/mce_amd_inj.c b/drivers/edac/mce_amd_inj.c
index 8d0688f36d4c..39faded3cadd 100644
--- a/drivers/edac/mce_amd_inj.c
+++ b/drivers/edac/mce_amd_inj.c
@@ -139,7 +139,7 @@ static int __init edac_init_mce_inject(void)
139 return 0; 139 return 0;
140 140
141err_sysfs_create: 141err_sysfs_create:
142 while (i-- >= 0) 142 while (--i >= 0)
143 sysfs_remove_file(mce_kobj, &sysfs_attrs[i]->attr); 143 sysfs_remove_file(mce_kobj, &sysfs_attrs[i]->attr);
144 144
145 kobject_del(mce_kobj); 145 kobject_del(mce_kobj);
diff --git a/drivers/firewire/net.c b/drivers/firewire/net.c
index 18fdd9703b48..1a467a91fb0b 100644
--- a/drivers/firewire/net.c
+++ b/drivers/firewire/net.c
@@ -7,6 +7,7 @@
7 */ 7 */
8 8
9#include <linux/bug.h> 9#include <linux/bug.h>
10#include <linux/delay.h>
10#include <linux/device.h> 11#include <linux/device.h>
11#include <linux/firewire.h> 12#include <linux/firewire.h>
12#include <linux/firewire-constants.h> 13#include <linux/firewire-constants.h>
@@ -26,8 +27,14 @@
26#include <asm/unaligned.h> 27#include <asm/unaligned.h>
27#include <net/arp.h> 28#include <net/arp.h>
28 29
29#define FWNET_MAX_FRAGMENTS 25 /* arbitrary limit */ 30/* rx limits */
30#define FWNET_ISO_PAGE_COUNT (PAGE_SIZE < 16 * 1024 ? 4 : 2) 31#define FWNET_MAX_FRAGMENTS 30 /* arbitrary, > TX queue depth */
32#define FWNET_ISO_PAGE_COUNT (PAGE_SIZE < 16*1024 ? 4 : 2)
33
34/* tx limits */
35#define FWNET_MAX_QUEUED_DATAGRAMS 20 /* < 64 = number of tlabels */
36#define FWNET_MIN_QUEUED_DATAGRAMS 10 /* should keep AT DMA busy enough */
37#define FWNET_TX_QUEUE_LEN FWNET_MAX_QUEUED_DATAGRAMS /* ? */
31 38
32#define IEEE1394_BROADCAST_CHANNEL 31 39#define IEEE1394_BROADCAST_CHANNEL 31
33#define IEEE1394_ALL_NODES (0xffc0 | 0x003f) 40#define IEEE1394_ALL_NODES (0xffc0 | 0x003f)
@@ -169,15 +176,8 @@ struct fwnet_device {
169 struct fw_address_handler handler; 176 struct fw_address_handler handler;
170 u64 local_fifo; 177 u64 local_fifo;
171 178
172 /* List of packets to be sent */ 179 /* Number of tx datagrams that have been queued but not yet acked */
173 struct list_head packet_list; 180 int queued_datagrams;
174 /*
175 * List of packets that were broadcasted. When we get an ISO interrupt
176 * one of them has been sent
177 */
178 struct list_head broadcasted_list;
179 /* List of packets that have been sent but not yet acked */
180 struct list_head sent_list;
181 181
182 struct list_head peer_list; 182 struct list_head peer_list;
183 struct fw_card *card; 183 struct fw_card *card;
@@ -195,7 +195,7 @@ struct fwnet_peer {
195 unsigned pdg_size; /* pd_list size */ 195 unsigned pdg_size; /* pd_list size */
196 196
197 u16 datagram_label; /* outgoing datagram label */ 197 u16 datagram_label; /* outgoing datagram label */
198 unsigned max_payload; /* includes RFC2374_FRAG_HDR_SIZE overhead */ 198 u16 max_payload; /* includes RFC2374_FRAG_HDR_SIZE overhead */
199 int node_id; 199 int node_id;
200 int generation; 200 int generation;
201 unsigned speed; 201 unsigned speed;
@@ -203,22 +203,18 @@ struct fwnet_peer {
203 203
204/* This is our task struct. It's used for the packet complete callback. */ 204/* This is our task struct. It's used for the packet complete callback. */
205struct fwnet_packet_task { 205struct fwnet_packet_task {
206 /*
207 * ptask can actually be on dev->packet_list, dev->broadcasted_list,
208 * or dev->sent_list depending on its current state.
209 */
210 struct list_head pt_link;
211 struct fw_transaction transaction; 206 struct fw_transaction transaction;
212 struct rfc2734_header hdr; 207 struct rfc2734_header hdr;
213 struct sk_buff *skb; 208 struct sk_buff *skb;
214 struct fwnet_device *dev; 209 struct fwnet_device *dev;
215 210
216 int outstanding_pkts; 211 int outstanding_pkts;
217 unsigned max_payload;
218 u64 fifo_addr; 212 u64 fifo_addr;
219 u16 dest_node; 213 u16 dest_node;
214 u16 max_payload;
220 u8 generation; 215 u8 generation;
221 u8 speed; 216 u8 speed;
217 u8 enqueued;
222}; 218};
223 219
224/* 220/*
@@ -650,8 +646,6 @@ static int fwnet_finish_incoming_packet(struct net_device *net,
650 net->stats.rx_packets++; 646 net->stats.rx_packets++;
651 net->stats.rx_bytes += skb->len; 647 net->stats.rx_bytes += skb->len;
652 } 648 }
653 if (netif_queue_stopped(net))
654 netif_wake_queue(net);
655 649
656 return 0; 650 return 0;
657 651
@@ -660,8 +654,6 @@ static int fwnet_finish_incoming_packet(struct net_device *net,
660 net->stats.rx_dropped++; 654 net->stats.rx_dropped++;
661 655
662 dev_kfree_skb_any(skb); 656 dev_kfree_skb_any(skb);
663 if (netif_queue_stopped(net))
664 netif_wake_queue(net);
665 657
666 return -ENOENT; 658 return -ENOENT;
667} 659}
@@ -793,15 +785,10 @@ static int fwnet_incoming_packet(struct fwnet_device *dev, __be32 *buf, int len,
793 * Datagram is not complete, we're done for the 785 * Datagram is not complete, we're done for the
794 * moment. 786 * moment.
795 */ 787 */
796 spin_unlock_irqrestore(&dev->lock, flags); 788 retval = 0;
797
798 return 0;
799 fail: 789 fail:
800 spin_unlock_irqrestore(&dev->lock, flags); 790 spin_unlock_irqrestore(&dev->lock, flags);
801 791
802 if (netif_queue_stopped(net))
803 netif_wake_queue(net);
804
805 return retval; 792 return retval;
806} 793}
807 794
@@ -901,11 +888,19 @@ static void fwnet_free_ptask(struct fwnet_packet_task *ptask)
901 kmem_cache_free(fwnet_packet_task_cache, ptask); 888 kmem_cache_free(fwnet_packet_task_cache, ptask);
902} 889}
903 890
891/* Caller must hold dev->lock. */
892static void dec_queued_datagrams(struct fwnet_device *dev)
893{
894 if (--dev->queued_datagrams == FWNET_MIN_QUEUED_DATAGRAMS)
895 netif_wake_queue(dev->netdev);
896}
897
904static int fwnet_send_packet(struct fwnet_packet_task *ptask); 898static int fwnet_send_packet(struct fwnet_packet_task *ptask);
905 899
906static void fwnet_transmit_packet_done(struct fwnet_packet_task *ptask) 900static void fwnet_transmit_packet_done(struct fwnet_packet_task *ptask)
907{ 901{
908 struct fwnet_device *dev = ptask->dev; 902 struct fwnet_device *dev = ptask->dev;
903 struct sk_buff *skb = ptask->skb;
909 unsigned long flags; 904 unsigned long flags;
910 bool free; 905 bool free;
911 906
@@ -914,10 +909,14 @@ static void fwnet_transmit_packet_done(struct fwnet_packet_task *ptask)
914 ptask->outstanding_pkts--; 909 ptask->outstanding_pkts--;
915 910
916 /* Check whether we or the networking TX soft-IRQ is last user. */ 911 /* Check whether we or the networking TX soft-IRQ is last user. */
917 free = (ptask->outstanding_pkts == 0 && !list_empty(&ptask->pt_link)); 912 free = (ptask->outstanding_pkts == 0 && ptask->enqueued);
913 if (free)
914 dec_queued_datagrams(dev);
918 915
919 if (ptask->outstanding_pkts == 0) 916 if (ptask->outstanding_pkts == 0) {
920 list_del(&ptask->pt_link); 917 dev->netdev->stats.tx_packets++;
918 dev->netdev->stats.tx_bytes += skb->len;
919 }
921 920
922 spin_unlock_irqrestore(&dev->lock, flags); 921 spin_unlock_irqrestore(&dev->lock, flags);
923 922
@@ -926,7 +925,6 @@ static void fwnet_transmit_packet_done(struct fwnet_packet_task *ptask)
926 u16 fg_off; 925 u16 fg_off;
927 u16 datagram_label; 926 u16 datagram_label;
928 u16 lf; 927 u16 lf;
929 struct sk_buff *skb;
930 928
931 /* Update the ptask to point to the next fragment and send it */ 929 /* Update the ptask to point to the next fragment and send it */
932 lf = fwnet_get_hdr_lf(&ptask->hdr); 930 lf = fwnet_get_hdr_lf(&ptask->hdr);
@@ -953,7 +951,7 @@ static void fwnet_transmit_packet_done(struct fwnet_packet_task *ptask)
953 datagram_label = fwnet_get_hdr_dgl(&ptask->hdr); 951 datagram_label = fwnet_get_hdr_dgl(&ptask->hdr);
954 break; 952 break;
955 } 953 }
956 skb = ptask->skb; 954
957 skb_pull(skb, ptask->max_payload); 955 skb_pull(skb, ptask->max_payload);
958 if (ptask->outstanding_pkts > 1) { 956 if (ptask->outstanding_pkts > 1) {
959 fwnet_make_sf_hdr(&ptask->hdr, RFC2374_HDR_INTFRAG, 957 fwnet_make_sf_hdr(&ptask->hdr, RFC2374_HDR_INTFRAG,
@@ -970,6 +968,31 @@ static void fwnet_transmit_packet_done(struct fwnet_packet_task *ptask)
970 fwnet_free_ptask(ptask); 968 fwnet_free_ptask(ptask);
971} 969}
972 970
971static void fwnet_transmit_packet_failed(struct fwnet_packet_task *ptask)
972{
973 struct fwnet_device *dev = ptask->dev;
974 unsigned long flags;
975 bool free;
976
977 spin_lock_irqsave(&dev->lock, flags);
978
979 /* One fragment failed; don't try to send remaining fragments. */
980 ptask->outstanding_pkts = 0;
981
982 /* Check whether we or the networking TX soft-IRQ is last user. */
983 free = ptask->enqueued;
984 if (free)
985 dec_queued_datagrams(dev);
986
987 dev->netdev->stats.tx_dropped++;
988 dev->netdev->stats.tx_errors++;
989
990 spin_unlock_irqrestore(&dev->lock, flags);
991
992 if (free)
993 fwnet_free_ptask(ptask);
994}
995
973static void fwnet_write_complete(struct fw_card *card, int rcode, 996static void fwnet_write_complete(struct fw_card *card, int rcode,
974 void *payload, size_t length, void *data) 997 void *payload, size_t length, void *data)
975{ 998{
@@ -977,11 +1000,12 @@ static void fwnet_write_complete(struct fw_card *card, int rcode,
977 1000
978 ptask = data; 1001 ptask = data;
979 1002
980 if (rcode == RCODE_COMPLETE) 1003 if (rcode == RCODE_COMPLETE) {
981 fwnet_transmit_packet_done(ptask); 1004 fwnet_transmit_packet_done(ptask);
982 else 1005 } else {
983 fw_error("fwnet_write_complete: failed: %x\n", rcode); 1006 fw_error("fwnet_write_complete: failed: %x\n", rcode);
984 /* ??? error recovery */ 1007 fwnet_transmit_packet_failed(ptask);
1008 }
985} 1009}
986 1010
987static int fwnet_send_packet(struct fwnet_packet_task *ptask) 1011static int fwnet_send_packet(struct fwnet_packet_task *ptask)
@@ -1039,9 +1063,11 @@ static int fwnet_send_packet(struct fwnet_packet_task *ptask)
1039 spin_lock_irqsave(&dev->lock, flags); 1063 spin_lock_irqsave(&dev->lock, flags);
1040 1064
1041 /* If the AT tasklet already ran, we may be last user. */ 1065 /* If the AT tasklet already ran, we may be last user. */
1042 free = (ptask->outstanding_pkts == 0 && list_empty(&ptask->pt_link)); 1066 free = (ptask->outstanding_pkts == 0 && !ptask->enqueued);
1043 if (!free) 1067 if (!free)
1044 list_add_tail(&ptask->pt_link, &dev->broadcasted_list); 1068 ptask->enqueued = true;
1069 else
1070 dec_queued_datagrams(dev);
1045 1071
1046 spin_unlock_irqrestore(&dev->lock, flags); 1072 spin_unlock_irqrestore(&dev->lock, flags);
1047 1073
@@ -1056,9 +1082,11 @@ static int fwnet_send_packet(struct fwnet_packet_task *ptask)
1056 spin_lock_irqsave(&dev->lock, flags); 1082 spin_lock_irqsave(&dev->lock, flags);
1057 1083
1058 /* If the AT tasklet already ran, we may be last user. */ 1084 /* If the AT tasklet already ran, we may be last user. */
1059 free = (ptask->outstanding_pkts == 0 && list_empty(&ptask->pt_link)); 1085 free = (ptask->outstanding_pkts == 0 && !ptask->enqueued);
1060 if (!free) 1086 if (!free)
1061 list_add_tail(&ptask->pt_link, &dev->sent_list); 1087 ptask->enqueued = true;
1088 else
1089 dec_queued_datagrams(dev);
1062 1090
1063 spin_unlock_irqrestore(&dev->lock, flags); 1091 spin_unlock_irqrestore(&dev->lock, flags);
1064 1092
@@ -1224,6 +1252,15 @@ static netdev_tx_t fwnet_tx(struct sk_buff *skb, struct net_device *net)
1224 struct fwnet_peer *peer; 1252 struct fwnet_peer *peer;
1225 unsigned long flags; 1253 unsigned long flags;
1226 1254
1255 spin_lock_irqsave(&dev->lock, flags);
1256
1257 /* Can this happen? */
1258 if (netif_queue_stopped(dev->netdev)) {
1259 spin_unlock_irqrestore(&dev->lock, flags);
1260
1261 return NETDEV_TX_BUSY;
1262 }
1263
1227 ptask = kmem_cache_alloc(fwnet_packet_task_cache, GFP_ATOMIC); 1264 ptask = kmem_cache_alloc(fwnet_packet_task_cache, GFP_ATOMIC);
1228 if (ptask == NULL) 1265 if (ptask == NULL)
1229 goto fail; 1266 goto fail;
@@ -1242,9 +1279,6 @@ static netdev_tx_t fwnet_tx(struct sk_buff *skb, struct net_device *net)
1242 proto = hdr_buf.h_proto; 1279 proto = hdr_buf.h_proto;
1243 dg_size = skb->len; 1280 dg_size = skb->len;
1244 1281
1245 /* serialize access to peer, including peer->datagram_label */
1246 spin_lock_irqsave(&dev->lock, flags);
1247
1248 /* 1282 /*
1249 * Set the transmission type for the packet. ARP packets and IP 1283 * Set the transmission type for the packet. ARP packets and IP
1250 * broadcast packets are sent via GASP. 1284 * broadcast packets are sent via GASP.
@@ -1266,7 +1300,7 @@ static netdev_tx_t fwnet_tx(struct sk_buff *skb, struct net_device *net)
1266 1300
1267 peer = fwnet_peer_find_by_guid(dev, be64_to_cpu(guid)); 1301 peer = fwnet_peer_find_by_guid(dev, be64_to_cpu(guid));
1268 if (!peer || peer->fifo == FWNET_NO_FIFO_ADDR) 1302 if (!peer || peer->fifo == FWNET_NO_FIFO_ADDR)
1269 goto fail_unlock; 1303 goto fail;
1270 1304
1271 generation = peer->generation; 1305 generation = peer->generation;
1272 dest_node = peer->node_id; 1306 dest_node = peer->node_id;
@@ -1320,18 +1354,21 @@ static netdev_tx_t fwnet_tx(struct sk_buff *skb, struct net_device *net)
1320 max_payload += RFC2374_FRAG_HDR_SIZE; 1354 max_payload += RFC2374_FRAG_HDR_SIZE;
1321 } 1355 }
1322 1356
1357 if (++dev->queued_datagrams == FWNET_MAX_QUEUED_DATAGRAMS)
1358 netif_stop_queue(dev->netdev);
1359
1323 spin_unlock_irqrestore(&dev->lock, flags); 1360 spin_unlock_irqrestore(&dev->lock, flags);
1324 1361
1325 ptask->max_payload = max_payload; 1362 ptask->max_payload = max_payload;
1326 INIT_LIST_HEAD(&ptask->pt_link); 1363 ptask->enqueued = 0;
1327 1364
1328 fwnet_send_packet(ptask); 1365 fwnet_send_packet(ptask);
1329 1366
1330 return NETDEV_TX_OK; 1367 return NETDEV_TX_OK;
1331 1368
1332 fail_unlock:
1333 spin_unlock_irqrestore(&dev->lock, flags);
1334 fail: 1369 fail:
1370 spin_unlock_irqrestore(&dev->lock, flags);
1371
1335 if (ptask) 1372 if (ptask)
1336 kmem_cache_free(fwnet_packet_task_cache, ptask); 1373 kmem_cache_free(fwnet_packet_task_cache, ptask);
1337 1374
@@ -1377,7 +1414,7 @@ static void fwnet_init_dev(struct net_device *net)
1377 net->addr_len = FWNET_ALEN; 1414 net->addr_len = FWNET_ALEN;
1378 net->hard_header_len = FWNET_HLEN; 1415 net->hard_header_len = FWNET_HLEN;
1379 net->type = ARPHRD_IEEE1394; 1416 net->type = ARPHRD_IEEE1394;
1380 net->tx_queue_len = 10; 1417 net->tx_queue_len = FWNET_TX_QUEUE_LEN;
1381} 1418}
1382 1419
1383/* caller must hold fwnet_device_mutex */ 1420/* caller must hold fwnet_device_mutex */
@@ -1457,14 +1494,9 @@ static int fwnet_probe(struct device *_dev)
1457 dev->broadcast_rcv_context = NULL; 1494 dev->broadcast_rcv_context = NULL;
1458 dev->broadcast_xmt_max_payload = 0; 1495 dev->broadcast_xmt_max_payload = 0;
1459 dev->broadcast_xmt_datagramlabel = 0; 1496 dev->broadcast_xmt_datagramlabel = 0;
1460
1461 dev->local_fifo = FWNET_NO_FIFO_ADDR; 1497 dev->local_fifo = FWNET_NO_FIFO_ADDR;
1462 1498 dev->queued_datagrams = 0;
1463 INIT_LIST_HEAD(&dev->packet_list);
1464 INIT_LIST_HEAD(&dev->broadcasted_list);
1465 INIT_LIST_HEAD(&dev->sent_list);
1466 INIT_LIST_HEAD(&dev->peer_list); 1499 INIT_LIST_HEAD(&dev->peer_list);
1467
1468 dev->card = card; 1500 dev->card = card;
1469 dev->netdev = net; 1501 dev->netdev = net;
1470 1502
@@ -1522,7 +1554,7 @@ static int fwnet_remove(struct device *_dev)
1522 struct fwnet_peer *peer = dev_get_drvdata(_dev); 1554 struct fwnet_peer *peer = dev_get_drvdata(_dev);
1523 struct fwnet_device *dev = peer->dev; 1555 struct fwnet_device *dev = peer->dev;
1524 struct net_device *net; 1556 struct net_device *net;
1525 struct fwnet_packet_task *ptask, *pt_next; 1557 int i;
1526 1558
1527 mutex_lock(&fwnet_device_mutex); 1559 mutex_lock(&fwnet_device_mutex);
1528 1560
@@ -1540,21 +1572,9 @@ static int fwnet_remove(struct device *_dev)
1540 dev->card); 1572 dev->card);
1541 fw_iso_context_destroy(dev->broadcast_rcv_context); 1573 fw_iso_context_destroy(dev->broadcast_rcv_context);
1542 } 1574 }
1543 list_for_each_entry_safe(ptask, pt_next, 1575 for (i = 0; dev->queued_datagrams && i < 5; i++)
1544 &dev->packet_list, pt_link) { 1576 ssleep(1);
1545 dev_kfree_skb_any(ptask->skb); 1577 WARN_ON(dev->queued_datagrams);
1546 kmem_cache_free(fwnet_packet_task_cache, ptask);
1547 }
1548 list_for_each_entry_safe(ptask, pt_next,
1549 &dev->broadcasted_list, pt_link) {
1550 dev_kfree_skb_any(ptask->skb);
1551 kmem_cache_free(fwnet_packet_task_cache, ptask);
1552 }
1553 list_for_each_entry_safe(ptask, pt_next,
1554 &dev->sent_list, pt_link) {
1555 dev_kfree_skb_any(ptask->skb);
1556 kmem_cache_free(fwnet_packet_task_cache, ptask);
1557 }
1558 list_del(&dev->dev_link); 1578 list_del(&dev->dev_link);
1559 1579
1560 free_netdev(net); 1580 free_netdev(net);
diff --git a/drivers/hwmon/i5k_amb.c b/drivers/hwmon/i5k_amb.c
index 937983407e2a..c4c40be0edbf 100644
--- a/drivers/hwmon/i5k_amb.c
+++ b/drivers/hwmon/i5k_amb.c
@@ -497,12 +497,14 @@ static unsigned long chipset_ids[] = {
497 0 497 0
498}; 498};
499 499
500#ifdef MODULE
500static struct pci_device_id i5k_amb_ids[] __devinitdata = { 501static struct pci_device_id i5k_amb_ids[] __devinitdata = {
501 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5000_ERR) }, 502 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5000_ERR) },
502 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5400_ERR) }, 503 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5400_ERR) },
503 { 0, } 504 { 0, }
504}; 505};
505MODULE_DEVICE_TABLE(pci, i5k_amb_ids); 506MODULE_DEVICE_TABLE(pci, i5k_amb_ids);
507#endif
506 508
507static int __devinit i5k_amb_probe(struct platform_device *pdev) 509static int __devinit i5k_amb_probe(struct platform_device *pdev)
508{ 510{
diff --git a/drivers/hwmon/lis3lv02d_i2c.c b/drivers/hwmon/lis3lv02d_i2c.c
index 9f4bae07f719..8853afce85ce 100644
--- a/drivers/hwmon/lis3lv02d_i2c.c
+++ b/drivers/hwmon/lis3lv02d_i2c.c
@@ -186,7 +186,7 @@ static int __devexit lis3lv02d_i2c_remove(struct i2c_client *client)
186 return 0; 186 return 0;
187} 187}
188 188
189#ifdef CONFIG_PM 189#ifdef CONFIG_PM_SLEEP
190static int lis3lv02d_i2c_suspend(struct device *dev) 190static int lis3lv02d_i2c_suspend(struct device *dev)
191{ 191{
192 struct i2c_client *client = container_of(dev, struct i2c_client, dev); 192 struct i2c_client *client = container_of(dev, struct i2c_client, dev);
@@ -213,12 +213,9 @@ static int lis3lv02d_i2c_resume(struct device *dev)
213 213
214 return 0; 214 return 0;
215} 215}
216#else 216#endif /* CONFIG_PM_SLEEP */
217#define lis3lv02d_i2c_suspend NULL
218#define lis3lv02d_i2c_resume NULL
219#define lis3lv02d_i2c_shutdown NULL
220#endif
221 217
218#ifdef CONFIG_PM_RUNTIME
222static int lis3_i2c_runtime_suspend(struct device *dev) 219static int lis3_i2c_runtime_suspend(struct device *dev)
223{ 220{
224 struct i2c_client *client = container_of(dev, struct i2c_client, dev); 221 struct i2c_client *client = container_of(dev, struct i2c_client, dev);
@@ -236,6 +233,7 @@ static int lis3_i2c_runtime_resume(struct device *dev)
236 lis3lv02d_poweron(lis3); 233 lis3lv02d_poweron(lis3);
237 return 0; 234 return 0;
238} 235}
236#endif /* CONFIG_PM_RUNTIME */
239 237
240static const struct i2c_device_id lis3lv02d_id[] = { 238static const struct i2c_device_id lis3lv02d_id[] = {
241 {"lis3lv02d", 0 }, 239 {"lis3lv02d", 0 },
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 8f86d702e46e..31ae07a36576 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -1559,7 +1559,7 @@ void mmc_stop_host(struct mmc_host *host)
1559 1559
1560 if (host->caps & MMC_CAP_DISABLE) 1560 if (host->caps & MMC_CAP_DISABLE)
1561 cancel_delayed_work(&host->disable); 1561 cancel_delayed_work(&host->disable);
1562 cancel_delayed_work(&host->detect); 1562 cancel_delayed_work_sync(&host->detect);
1563 mmc_flush_scheduled_work(); 1563 mmc_flush_scheduled_work();
1564 1564
1565 /* clear pm flags now and let card drivers set them as needed */ 1565 /* clear pm flags now and let card drivers set them as needed */
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 995261f7fd70..77f93c3b8808 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -375,7 +375,7 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
375 struct mmc_card *oldcard) 375 struct mmc_card *oldcard)
376{ 376{
377 struct mmc_card *card; 377 struct mmc_card *card;
378 int err, ddr = MMC_SDR_MODE; 378 int err, ddr = 0;
379 u32 cid[4]; 379 u32 cid[4];
380 unsigned int max_dtr; 380 unsigned int max_dtr;
381 381
@@ -562,7 +562,11 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
562 1 << bus_width, ddr); 562 1 << bus_width, ddr);
563 err = 0; 563 err = 0;
564 } else { 564 } else {
565 mmc_card_set_ddr_mode(card); 565 if (ddr)
566 mmc_card_set_ddr_mode(card);
567 else
568 ddr = MMC_SDR_MODE;
569
566 mmc_set_bus_width_ddr(card->host, bus_width, ddr); 570 mmc_set_bus_width_ddr(card->host, bus_width, ddr);
567 } 571 }
568 } 572 }
diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index c3ad1058cd31..efef5f94ac42 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -547,9 +547,11 @@ static void mmc_sdio_detect(struct mmc_host *host)
547 BUG_ON(!host->card); 547 BUG_ON(!host->card);
548 548
549 /* Make sure card is powered before detecting it */ 549 /* Make sure card is powered before detecting it */
550 err = pm_runtime_get_sync(&host->card->dev); 550 if (host->caps & MMC_CAP_POWER_OFF_CARD) {
551 if (err < 0) 551 err = pm_runtime_get_sync(&host->card->dev);
552 goto out; 552 if (err < 0)
553 goto out;
554 }
553 555
554 mmc_claim_host(host); 556 mmc_claim_host(host);
555 557
@@ -560,6 +562,20 @@ static void mmc_sdio_detect(struct mmc_host *host)
560 562
561 mmc_release_host(host); 563 mmc_release_host(host);
562 564
565 /*
566 * Tell PM core it's OK to power off the card now.
567 *
568 * The _sync variant is used in order to ensure that the card
569 * is left powered off in case an error occurred, and the card
570 * is going to be removed.
571 *
572 * Since there is no specific reason to believe a new user
573 * is about to show up at this point, the _sync variant is
574 * desirable anyway.
575 */
576 if (host->caps & MMC_CAP_POWER_OFF_CARD)
577 pm_runtime_put_sync(&host->card->dev);
578
563out: 579out:
564 if (err) { 580 if (err) {
565 mmc_sdio_remove(host); 581 mmc_sdio_remove(host);
@@ -568,9 +584,6 @@ out:
568 mmc_detach_bus(host); 584 mmc_detach_bus(host);
569 mmc_release_host(host); 585 mmc_release_host(host);
570 } 586 }
571
572 /* Tell PM core that we're done */
573 pm_runtime_put(&host->card->dev);
574} 587}
575 588
576/* 589/*
@@ -718,16 +731,21 @@ int mmc_attach_sdio(struct mmc_host *host, u32 ocr)
718 card = host->card; 731 card = host->card;
719 732
720 /* 733 /*
721 * Let runtime PM core know our card is active 734 * Enable runtime PM only if supported by host+card+board
722 */ 735 */
723 err = pm_runtime_set_active(&card->dev); 736 if (host->caps & MMC_CAP_POWER_OFF_CARD) {
724 if (err) 737 /*
725 goto remove; 738 * Let runtime PM core know our card is active
739 */
740 err = pm_runtime_set_active(&card->dev);
741 if (err)
742 goto remove;
726 743
727 /* 744 /*
728 * Enable runtime PM for this card 745 * Enable runtime PM for this card
729 */ 746 */
730 pm_runtime_enable(&card->dev); 747 pm_runtime_enable(&card->dev);
748 }
731 749
732 /* 750 /*
733 * The number of functions on the card is encoded inside 751 * The number of functions on the card is encoded inside
@@ -745,9 +763,10 @@ int mmc_attach_sdio(struct mmc_host *host, u32 ocr)
745 goto remove; 763 goto remove;
746 764
747 /* 765 /*
748 * Enable Runtime PM for this func 766 * Enable Runtime PM for this func (if supported)
749 */ 767 */
750 pm_runtime_enable(&card->sdio_func[i]->dev); 768 if (host->caps & MMC_CAP_POWER_OFF_CARD)
769 pm_runtime_enable(&card->sdio_func[i]->dev);
751 } 770 }
752 771
753 mmc_release_host(host); 772 mmc_release_host(host);
diff --git a/drivers/mmc/core/sdio_bus.c b/drivers/mmc/core/sdio_bus.c
index 2716c7ab6bbf..203da443e339 100644
--- a/drivers/mmc/core/sdio_bus.c
+++ b/drivers/mmc/core/sdio_bus.c
@@ -17,6 +17,7 @@
17#include <linux/pm_runtime.h> 17#include <linux/pm_runtime.h>
18 18
19#include <linux/mmc/card.h> 19#include <linux/mmc/card.h>
20#include <linux/mmc/host.h>
20#include <linux/mmc/sdio_func.h> 21#include <linux/mmc/sdio_func.h>
21 22
22#include "sdio_cis.h" 23#include "sdio_cis.h"
@@ -132,9 +133,11 @@ static int sdio_bus_probe(struct device *dev)
132 * it should call pm_runtime_put_noidle() in its probe routine and 133 * it should call pm_runtime_put_noidle() in its probe routine and
133 * pm_runtime_get_noresume() in its remove routine. 134 * pm_runtime_get_noresume() in its remove routine.
134 */ 135 */
135 ret = pm_runtime_get_sync(dev); 136 if (func->card->host->caps & MMC_CAP_POWER_OFF_CARD) {
136 if (ret < 0) 137 ret = pm_runtime_get_sync(dev);
137 goto out; 138 if (ret < 0)
139 goto out;
140 }
138 141
139 /* Set the default block size so the driver is sure it's something 142 /* Set the default block size so the driver is sure it's something
140 * sensible. */ 143 * sensible. */
@@ -151,7 +154,8 @@ static int sdio_bus_probe(struct device *dev)
151 return 0; 154 return 0;
152 155
153disable_runtimepm: 156disable_runtimepm:
154 pm_runtime_put_noidle(dev); 157 if (func->card->host->caps & MMC_CAP_POWER_OFF_CARD)
158 pm_runtime_put_noidle(dev);
155out: 159out:
156 return ret; 160 return ret;
157} 161}
@@ -160,12 +164,14 @@ static int sdio_bus_remove(struct device *dev)
160{ 164{
161 struct sdio_driver *drv = to_sdio_driver(dev->driver); 165 struct sdio_driver *drv = to_sdio_driver(dev->driver);
162 struct sdio_func *func = dev_to_sdio_func(dev); 166 struct sdio_func *func = dev_to_sdio_func(dev);
163 int ret; 167 int ret = 0;
164 168
165 /* Make sure card is powered before invoking ->remove() */ 169 /* Make sure card is powered before invoking ->remove() */
166 ret = pm_runtime_get_sync(dev); 170 if (func->card->host->caps & MMC_CAP_POWER_OFF_CARD) {
167 if (ret < 0) 171 ret = pm_runtime_get_sync(dev);
168 goto out; 172 if (ret < 0)
173 goto out;
174 }
169 175
170 drv->remove(func); 176 drv->remove(func);
171 177
@@ -178,10 +184,12 @@ static int sdio_bus_remove(struct device *dev)
178 } 184 }
179 185
180 /* First, undo the increment made directly above */ 186 /* First, undo the increment made directly above */
181 pm_runtime_put_noidle(dev); 187 if (func->card->host->caps & MMC_CAP_POWER_OFF_CARD)
188 pm_runtime_put_noidle(dev);
182 189
183 /* Then undo the runtime PM settings in sdio_bus_probe() */ 190 /* Then undo the runtime PM settings in sdio_bus_probe() */
184 pm_runtime_put_noidle(dev); 191 if (func->card->host->caps & MMC_CAP_POWER_OFF_CARD)
192 pm_runtime_put_noidle(dev);
185 193
186out: 194out:
187 return ret; 195 return ret;
@@ -191,6 +199,8 @@ out:
191 199
192static int sdio_bus_pm_prepare(struct device *dev) 200static int sdio_bus_pm_prepare(struct device *dev)
193{ 201{
202 struct sdio_func *func = dev_to_sdio_func(dev);
203
194 /* 204 /*
195 * Resume an SDIO device which was suspended at run time at this 205 * Resume an SDIO device which was suspended at run time at this
196 * point, in order to allow standard SDIO suspend/resume paths 206 * point, in order to allow standard SDIO suspend/resume paths
@@ -212,7 +222,8 @@ static int sdio_bus_pm_prepare(struct device *dev)
212 * since there is little point in failing system suspend if a 222 * since there is little point in failing system suspend if a
213 * device can't be resumed. 223 * device can't be resumed.
214 */ 224 */
215 pm_runtime_resume(dev); 225 if (func->card->host->caps & MMC_CAP_POWER_OFF_CARD)
226 pm_runtime_resume(dev);
216 227
217 return 0; 228 return 0;
218} 229}
diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 82a1079bbdc7..5d46021cbb57 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -1002,7 +1002,7 @@ static inline void omap_hsmmc_reset_controller_fsm(struct omap_hsmmc_host *host,
1002 * Monitor a 0->1 transition first 1002 * Monitor a 0->1 transition first
1003 */ 1003 */
1004 if (mmc_slot(host).features & HSMMC_HAS_UPDATED_RESET) { 1004 if (mmc_slot(host).features & HSMMC_HAS_UPDATED_RESET) {
1005 while ((!(OMAP_HSMMC_READ(host, SYSCTL) & bit)) 1005 while ((!(OMAP_HSMMC_READ(host->base, SYSCTL) & bit))
1006 && (i++ < limit)) 1006 && (i++ < limit))
1007 cpu_relax(); 1007 cpu_relax();
1008 } 1008 }
diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c
index 2e9cca19c90b..9b82910b9dbb 100644
--- a/drivers/mmc/host/sdhci-esdhc-imx.c
+++ b/drivers/mmc/host/sdhci-esdhc-imx.c
@@ -17,6 +17,7 @@
17#include <linux/clk.h> 17#include <linux/clk.h>
18#include <linux/mmc/host.h> 18#include <linux/mmc/host.h>
19#include <linux/mmc/sdhci-pltfm.h> 19#include <linux/mmc/sdhci-pltfm.h>
20#include <mach/hardware.h>
20#include "sdhci.h" 21#include "sdhci.h"
21#include "sdhci-pltfm.h" 22#include "sdhci-pltfm.h"
22#include "sdhci-esdhc.h" 23#include "sdhci-esdhc.h"
@@ -112,6 +113,13 @@ static int esdhc_pltfm_init(struct sdhci_host *host, struct sdhci_pltfm_data *pd
112 clk_enable(clk); 113 clk_enable(clk);
113 pltfm_host->clk = clk; 114 pltfm_host->clk = clk;
114 115
116 if (cpu_is_mx35() || cpu_is_mx51())
117 host->quirks |= SDHCI_QUIRK_BROKEN_TIMEOUT_VAL;
118
119 /* Fix errata ENGcm07207 which is present on i.MX25 and i.MX35 */
120 if (cpu_is_mx25() || cpu_is_mx35())
121 host->quirks |= SDHCI_QUIRK_NO_MULTIBLOCK;
122
115 return 0; 123 return 0;
116} 124}
117 125
@@ -133,10 +141,8 @@ static struct sdhci_ops sdhci_esdhc_ops = {
133}; 141};
134 142
135struct sdhci_pltfm_data sdhci_esdhc_imx_pdata = { 143struct sdhci_pltfm_data sdhci_esdhc_imx_pdata = {
136 .quirks = ESDHC_DEFAULT_QUIRKS | SDHCI_QUIRK_NO_MULTIBLOCK 144 .quirks = ESDHC_DEFAULT_QUIRKS | SDHCI_QUIRK_BROKEN_ADMA,
137 | SDHCI_QUIRK_BROKEN_ADMA,
138 /* ADMA has issues. Might be fixable */ 145 /* ADMA has issues. Might be fixable */
139 /* NO_MULTIBLOCK might be MX35 only (Errata: ENGcm07207) */
140 .ops = &sdhci_esdhc_ops, 146 .ops = &sdhci_esdhc_ops,
141 .init = esdhc_pltfm_init, 147 .init = esdhc_pltfm_init,
142 .exit = esdhc_pltfm_exit, 148 .exit = esdhc_pltfm_exit,
diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c
index 55746bac2f44..3d9c2460d437 100644
--- a/drivers/mmc/host/sdhci-pci.c
+++ b/drivers/mmc/host/sdhci-pci.c
@@ -149,11 +149,11 @@ static const struct sdhci_pci_fixes sdhci_cafe = {
149 * ADMA operation is disabled for Moorestown platform due to 149 * ADMA operation is disabled for Moorestown platform due to
150 * hardware bugs. 150 * hardware bugs.
151 */ 151 */
152static int mrst_hc1_probe(struct sdhci_pci_chip *chip) 152static int mrst_hc_probe(struct sdhci_pci_chip *chip)
153{ 153{
154 /* 154 /*
155 * slots number is fixed here for MRST as SDIO3 is never used and has 155 * slots number is fixed here for MRST as SDIO3/5 are never used and
156 * hardware bugs. 156 * have hardware bugs.
157 */ 157 */
158 chip->num_slots = 1; 158 chip->num_slots = 1;
159 return 0; 159 return 0;
@@ -163,9 +163,9 @@ static const struct sdhci_pci_fixes sdhci_intel_mrst_hc0 = {
163 .quirks = SDHCI_QUIRK_BROKEN_ADMA | SDHCI_QUIRK_NO_HISPD_BIT, 163 .quirks = SDHCI_QUIRK_BROKEN_ADMA | SDHCI_QUIRK_NO_HISPD_BIT,
164}; 164};
165 165
166static const struct sdhci_pci_fixes sdhci_intel_mrst_hc1 = { 166static const struct sdhci_pci_fixes sdhci_intel_mrst_hc1_hc2 = {
167 .quirks = SDHCI_QUIRK_BROKEN_ADMA | SDHCI_QUIRK_NO_HISPD_BIT, 167 .quirks = SDHCI_QUIRK_BROKEN_ADMA | SDHCI_QUIRK_NO_HISPD_BIT,
168 .probe = mrst_hc1_probe, 168 .probe = mrst_hc_probe,
169}; 169};
170 170
171static const struct sdhci_pci_fixes sdhci_intel_mfd_sd = { 171static const struct sdhci_pci_fixes sdhci_intel_mfd_sd = {
@@ -538,7 +538,15 @@ static const struct pci_device_id pci_ids[] __devinitdata = {
538 .device = PCI_DEVICE_ID_INTEL_MRST_SD1, 538 .device = PCI_DEVICE_ID_INTEL_MRST_SD1,
539 .subvendor = PCI_ANY_ID, 539 .subvendor = PCI_ANY_ID,
540 .subdevice = PCI_ANY_ID, 540 .subdevice = PCI_ANY_ID,
541 .driver_data = (kernel_ulong_t)&sdhci_intel_mrst_hc1, 541 .driver_data = (kernel_ulong_t)&sdhci_intel_mrst_hc1_hc2,
542 },
543
544 {
545 .vendor = PCI_VENDOR_ID_INTEL,
546 .device = PCI_DEVICE_ID_INTEL_MRST_SD2,
547 .subvendor = PCI_ANY_ID,
548 .subdevice = PCI_ANY_ID,
549 .driver_data = (kernel_ulong_t)&sdhci_intel_mrst_hc1_hc2,
542 }, 550 },
543 551
544 { 552 {
@@ -637,6 +645,7 @@ static int sdhci_pci_suspend (struct pci_dev *pdev, pm_message_t state)
637{ 645{
638 struct sdhci_pci_chip *chip; 646 struct sdhci_pci_chip *chip;
639 struct sdhci_pci_slot *slot; 647 struct sdhci_pci_slot *slot;
648 mmc_pm_flag_t slot_pm_flags;
640 mmc_pm_flag_t pm_flags = 0; 649 mmc_pm_flag_t pm_flags = 0;
641 int i, ret; 650 int i, ret;
642 651
@@ -657,7 +666,11 @@ static int sdhci_pci_suspend (struct pci_dev *pdev, pm_message_t state)
657 return ret; 666 return ret;
658 } 667 }
659 668
660 pm_flags |= slot->host->mmc->pm_flags; 669 slot_pm_flags = slot->host->mmc->pm_flags;
670 if (slot_pm_flags & MMC_PM_WAKE_SDIO_IRQ)
671 sdhci_enable_irq_wakeups(slot->host);
672
673 pm_flags |= slot_pm_flags;
661 } 674 }
662 675
663 if (chip->fixes && chip->fixes->suspend) { 676 if (chip->fixes && chip->fixes->suspend) {
@@ -671,8 +684,10 @@ static int sdhci_pci_suspend (struct pci_dev *pdev, pm_message_t state)
671 684
672 pci_save_state(pdev); 685 pci_save_state(pdev);
673 if (pm_flags & MMC_PM_KEEP_POWER) { 686 if (pm_flags & MMC_PM_KEEP_POWER) {
674 if (pm_flags & MMC_PM_WAKE_SDIO_IRQ) 687 if (pm_flags & MMC_PM_WAKE_SDIO_IRQ) {
688 pci_pme_active(pdev, true);
675 pci_enable_wake(pdev, PCI_D3hot, 1); 689 pci_enable_wake(pdev, PCI_D3hot, 1);
690 }
676 pci_set_power_state(pdev, PCI_D3hot); 691 pci_set_power_state(pdev, PCI_D3hot);
677 } else { 692 } else {
678 pci_enable_wake(pdev, pci_choose_state(pdev, state), 0); 693 pci_enable_wake(pdev, pci_choose_state(pdev, state), 0);
diff --git a/drivers/mmc/host/sdhci-pxa.c b/drivers/mmc/host/sdhci-pxa.c
index fc406ac5d193..5a61208cbc66 100644
--- a/drivers/mmc/host/sdhci-pxa.c
+++ b/drivers/mmc/host/sdhci-pxa.c
@@ -141,6 +141,10 @@ static int __devinit sdhci_pxa_probe(struct platform_device *pdev)
141 if (pdata->quirks) 141 if (pdata->quirks)
142 host->quirks |= pdata->quirks; 142 host->quirks |= pdata->quirks;
143 143
144 /* If slot design supports 8 bit data, indicate this to MMC. */
145 if (pdata->flags & PXA_FLAG_SD_8_BIT_CAPABLE_SLOT)
146 host->mmc->caps |= MMC_CAP_8_BIT_DATA;
147
144 ret = sdhci_add_host(host); 148 ret = sdhci_add_host(host);
145 if (ret) { 149 if (ret) {
146 dev_err(&pdev->dev, "failed to add host\n"); 150 dev_err(&pdev->dev, "failed to add host\n");
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 782c0ee3c925..a25db426c910 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -1185,17 +1185,31 @@ static void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
1185 if (host->ops->platform_send_init_74_clocks) 1185 if (host->ops->platform_send_init_74_clocks)
1186 host->ops->platform_send_init_74_clocks(host, ios->power_mode); 1186 host->ops->platform_send_init_74_clocks(host, ios->power_mode);
1187 1187
1188 ctrl = sdhci_readb(host, SDHCI_HOST_CONTROL); 1188 /*
1189 1189 * If your platform has 8-bit width support but is not a v3 controller,
1190 if (ios->bus_width == MMC_BUS_WIDTH_8) 1190 * or if it requires special setup code, you should implement that in
1191 ctrl |= SDHCI_CTRL_8BITBUS; 1191 * platform_8bit_width().
1192 else 1192 */
1193 ctrl &= ~SDHCI_CTRL_8BITBUS; 1193 if (host->ops->platform_8bit_width)
1194 host->ops->platform_8bit_width(host, ios->bus_width);
1195 else {
1196 ctrl = sdhci_readb(host, SDHCI_HOST_CONTROL);
1197 if (ios->bus_width == MMC_BUS_WIDTH_8) {
1198 ctrl &= ~SDHCI_CTRL_4BITBUS;
1199 if (host->version >= SDHCI_SPEC_300)
1200 ctrl |= SDHCI_CTRL_8BITBUS;
1201 } else {
1202 if (host->version >= SDHCI_SPEC_300)
1203 ctrl &= ~SDHCI_CTRL_8BITBUS;
1204 if (ios->bus_width == MMC_BUS_WIDTH_4)
1205 ctrl |= SDHCI_CTRL_4BITBUS;
1206 else
1207 ctrl &= ~SDHCI_CTRL_4BITBUS;
1208 }
1209 sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL);
1210 }
1194 1211
1195 if (ios->bus_width == MMC_BUS_WIDTH_4) 1212 ctrl = sdhci_readb(host, SDHCI_HOST_CONTROL);
1196 ctrl |= SDHCI_CTRL_4BITBUS;
1197 else
1198 ctrl &= ~SDHCI_CTRL_4BITBUS;
1199 1213
1200 if ((ios->timing == MMC_TIMING_SD_HS || 1214 if ((ios->timing == MMC_TIMING_SD_HS ||
1201 ios->timing == MMC_TIMING_MMC_HS) 1215 ios->timing == MMC_TIMING_MMC_HS)
@@ -1681,6 +1695,16 @@ int sdhci_resume_host(struct sdhci_host *host)
1681 1695
1682EXPORT_SYMBOL_GPL(sdhci_resume_host); 1696EXPORT_SYMBOL_GPL(sdhci_resume_host);
1683 1697
1698void sdhci_enable_irq_wakeups(struct sdhci_host *host)
1699{
1700 u8 val;
1701 val = sdhci_readb(host, SDHCI_WAKE_UP_CONTROL);
1702 val |= SDHCI_WAKE_ON_INT;
1703 sdhci_writeb(host, val, SDHCI_WAKE_UP_CONTROL);
1704}
1705
1706EXPORT_SYMBOL_GPL(sdhci_enable_irq_wakeups);
1707
1684#endif /* CONFIG_PM */ 1708#endif /* CONFIG_PM */
1685 1709
1686/*****************************************************************************\ 1710/*****************************************************************************\
@@ -1845,11 +1869,19 @@ int sdhci_add_host(struct sdhci_host *host)
1845 mmc->f_min = host->max_clk / SDHCI_MAX_DIV_SPEC_300; 1869 mmc->f_min = host->max_clk / SDHCI_MAX_DIV_SPEC_300;
1846 else 1870 else
1847 mmc->f_min = host->max_clk / SDHCI_MAX_DIV_SPEC_200; 1871 mmc->f_min = host->max_clk / SDHCI_MAX_DIV_SPEC_200;
1872
1848 mmc->f_max = host->max_clk; 1873 mmc->f_max = host->max_clk;
1849 mmc->caps |= MMC_CAP_SDIO_IRQ; 1874 mmc->caps |= MMC_CAP_SDIO_IRQ;
1850 1875
1876 /*
1877 * A controller may support 8-bit width, but the board itself
1878 * might not have the pins brought out. Boards that support
1879 * 8-bit width must set "mmc->caps |= MMC_CAP_8_BIT_DATA;" in
1880 * their platform code before calling sdhci_add_host(), and we
1881 * won't assume 8-bit width for hosts without that CAP.
1882 */
1851 if (!(host->quirks & SDHCI_QUIRK_FORCE_1_BIT_DATA)) 1883 if (!(host->quirks & SDHCI_QUIRK_FORCE_1_BIT_DATA))
1852 mmc->caps |= MMC_CAP_4_BIT_DATA | MMC_CAP_8_BIT_DATA; 1884 mmc->caps |= MMC_CAP_4_BIT_DATA;
1853 1885
1854 if (caps & SDHCI_CAN_DO_HISPD) 1886 if (caps & SDHCI_CAN_DO_HISPD)
1855 mmc->caps |= MMC_CAP_SD_HIGHSPEED | MMC_CAP_MMC_HIGHSPEED; 1887 mmc->caps |= MMC_CAP_SD_HIGHSPEED | MMC_CAP_MMC_HIGHSPEED;
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index b7b8a3b28b01..e42d7f00c060 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -76,7 +76,7 @@
76#define SDHCI_CTRL_ADMA1 0x08 76#define SDHCI_CTRL_ADMA1 0x08
77#define SDHCI_CTRL_ADMA32 0x10 77#define SDHCI_CTRL_ADMA32 0x10
78#define SDHCI_CTRL_ADMA64 0x18 78#define SDHCI_CTRL_ADMA64 0x18
79#define SDHCI_CTRL_8BITBUS 0x20 79#define SDHCI_CTRL_8BITBUS 0x20
80 80
81#define SDHCI_POWER_CONTROL 0x29 81#define SDHCI_POWER_CONTROL 0x29
82#define SDHCI_POWER_ON 0x01 82#define SDHCI_POWER_ON 0x01
@@ -87,6 +87,9 @@
87#define SDHCI_BLOCK_GAP_CONTROL 0x2A 87#define SDHCI_BLOCK_GAP_CONTROL 0x2A
88 88
89#define SDHCI_WAKE_UP_CONTROL 0x2B 89#define SDHCI_WAKE_UP_CONTROL 0x2B
90#define SDHCI_WAKE_ON_INT 0x01
91#define SDHCI_WAKE_ON_INSERT 0x02
92#define SDHCI_WAKE_ON_REMOVE 0x04
90 93
91#define SDHCI_CLOCK_CONTROL 0x2C 94#define SDHCI_CLOCK_CONTROL 0x2C
92#define SDHCI_DIVIDER_SHIFT 8 95#define SDHCI_DIVIDER_SHIFT 8
@@ -152,6 +155,7 @@
152#define SDHCI_CLOCK_BASE_SHIFT 8 155#define SDHCI_CLOCK_BASE_SHIFT 8
153#define SDHCI_MAX_BLOCK_MASK 0x00030000 156#define SDHCI_MAX_BLOCK_MASK 0x00030000
154#define SDHCI_MAX_BLOCK_SHIFT 16 157#define SDHCI_MAX_BLOCK_SHIFT 16
158#define SDHCI_CAN_DO_8BIT 0x00040000
155#define SDHCI_CAN_DO_ADMA2 0x00080000 159#define SDHCI_CAN_DO_ADMA2 0x00080000
156#define SDHCI_CAN_DO_ADMA1 0x00100000 160#define SDHCI_CAN_DO_ADMA1 0x00100000
157#define SDHCI_CAN_DO_HISPD 0x00200000 161#define SDHCI_CAN_DO_HISPD 0x00200000
@@ -212,6 +216,8 @@ struct sdhci_ops {
212 unsigned int (*get_max_clock)(struct sdhci_host *host); 216 unsigned int (*get_max_clock)(struct sdhci_host *host);
213 unsigned int (*get_min_clock)(struct sdhci_host *host); 217 unsigned int (*get_min_clock)(struct sdhci_host *host);
214 unsigned int (*get_timeout_clock)(struct sdhci_host *host); 218 unsigned int (*get_timeout_clock)(struct sdhci_host *host);
219 int (*platform_8bit_width)(struct sdhci_host *host,
220 int width);
215 void (*platform_send_init_74_clocks)(struct sdhci_host *host, 221 void (*platform_send_init_74_clocks)(struct sdhci_host *host,
216 u8 power_mode); 222 u8 power_mode);
217 unsigned int (*get_ro)(struct sdhci_host *host); 223 unsigned int (*get_ro)(struct sdhci_host *host);
@@ -317,6 +323,7 @@ extern void sdhci_remove_host(struct sdhci_host *host, int dead);
317#ifdef CONFIG_PM 323#ifdef CONFIG_PM
318extern int sdhci_suspend_host(struct sdhci_host *host, pm_message_t state); 324extern int sdhci_suspend_host(struct sdhci_host *host, pm_message_t state);
319extern int sdhci_resume_host(struct sdhci_host *host); 325extern int sdhci_resume_host(struct sdhci_host *host);
326extern void sdhci_enable_irq_wakeups(struct sdhci_host *host);
320#endif 327#endif
321 328
322#endif /* __SDHCI_HW_H */ 329#endif /* __SDHCI_HW_H */
diff --git a/drivers/mmc/host/ushc.c b/drivers/mmc/host/ushc.c
index b4ead4a13c98..f8f65df9b017 100644
--- a/drivers/mmc/host/ushc.c
+++ b/drivers/mmc/host/ushc.c
@@ -425,7 +425,7 @@ static int ushc_probe(struct usb_interface *intf, const struct usb_device_id *id
425 struct usb_device *usb_dev = interface_to_usbdev(intf); 425 struct usb_device *usb_dev = interface_to_usbdev(intf);
426 struct mmc_host *mmc; 426 struct mmc_host *mmc;
427 struct ushc_data *ushc; 427 struct ushc_data *ushc;
428 int ret = -ENOMEM; 428 int ret;
429 429
430 mmc = mmc_alloc_host(sizeof(struct ushc_data), &intf->dev); 430 mmc = mmc_alloc_host(sizeof(struct ushc_data), &intf->dev);
431 if (mmc == NULL) 431 if (mmc == NULL)
@@ -462,11 +462,15 @@ static int ushc_probe(struct usb_interface *intf, const struct usb_device_id *id
462 mmc->max_blk_count = 511; 462 mmc->max_blk_count = 511;
463 463
464 ushc->int_urb = usb_alloc_urb(0, GFP_KERNEL); 464 ushc->int_urb = usb_alloc_urb(0, GFP_KERNEL);
465 if (ushc->int_urb == NULL) 465 if (ushc->int_urb == NULL) {
466 ret = -ENOMEM;
466 goto err; 467 goto err;
468 }
467 ushc->int_data = kzalloc(sizeof(struct ushc_int_data), GFP_KERNEL); 469 ushc->int_data = kzalloc(sizeof(struct ushc_int_data), GFP_KERNEL);
468 if (ushc->int_data == NULL) 470 if (ushc->int_data == NULL) {
471 ret = -ENOMEM;
469 goto err; 472 goto err;
473 }
470 usb_fill_int_urb(ushc->int_urb, ushc->usb_dev, 474 usb_fill_int_urb(ushc->int_urb, ushc->usb_dev,
471 usb_rcvintpipe(usb_dev, 475 usb_rcvintpipe(usb_dev,
472 intf->cur_altsetting->endpoint[0].desc.bEndpointAddress), 476 intf->cur_altsetting->endpoint[0].desc.bEndpointAddress),
@@ -475,11 +479,15 @@ static int ushc_probe(struct usb_interface *intf, const struct usb_device_id *id
475 intf->cur_altsetting->endpoint[0].desc.bInterval); 479 intf->cur_altsetting->endpoint[0].desc.bInterval);
476 480
477 ushc->cbw_urb = usb_alloc_urb(0, GFP_KERNEL); 481 ushc->cbw_urb = usb_alloc_urb(0, GFP_KERNEL);
478 if (ushc->cbw_urb == NULL) 482 if (ushc->cbw_urb == NULL) {
483 ret = -ENOMEM;
479 goto err; 484 goto err;
485 }
480 ushc->cbw = kzalloc(sizeof(struct ushc_cbw), GFP_KERNEL); 486 ushc->cbw = kzalloc(sizeof(struct ushc_cbw), GFP_KERNEL);
481 if (ushc->cbw == NULL) 487 if (ushc->cbw == NULL) {
488 ret = -ENOMEM;
482 goto err; 489 goto err;
490 }
483 ushc->cbw->signature = USHC_CBW_SIGNATURE; 491 ushc->cbw->signature = USHC_CBW_SIGNATURE;
484 492
485 usb_fill_bulk_urb(ushc->cbw_urb, ushc->usb_dev, usb_sndbulkpipe(usb_dev, 2), 493 usb_fill_bulk_urb(ushc->cbw_urb, ushc->usb_dev, usb_sndbulkpipe(usb_dev, 2),
@@ -487,15 +495,21 @@ static int ushc_probe(struct usb_interface *intf, const struct usb_device_id *id
487 cbw_callback, ushc); 495 cbw_callback, ushc);
488 496
489 ushc->data_urb = usb_alloc_urb(0, GFP_KERNEL); 497 ushc->data_urb = usb_alloc_urb(0, GFP_KERNEL);
490 if (ushc->data_urb == NULL) 498 if (ushc->data_urb == NULL) {
499 ret = -ENOMEM;
491 goto err; 500 goto err;
501 }
492 502
493 ushc->csw_urb = usb_alloc_urb(0, GFP_KERNEL); 503 ushc->csw_urb = usb_alloc_urb(0, GFP_KERNEL);
494 if (ushc->csw_urb == NULL) 504 if (ushc->csw_urb == NULL) {
505 ret = -ENOMEM;
495 goto err; 506 goto err;
507 }
496 ushc->csw = kzalloc(sizeof(struct ushc_cbw), GFP_KERNEL); 508 ushc->csw = kzalloc(sizeof(struct ushc_cbw), GFP_KERNEL);
497 if (ushc->csw == NULL) 509 if (ushc->csw == NULL) {
510 ret = -ENOMEM;
498 goto err; 511 goto err;
512 }
499 usb_fill_bulk_urb(ushc->csw_urb, ushc->usb_dev, usb_rcvbulkpipe(usb_dev, 6), 513 usb_fill_bulk_urb(ushc->csw_urb, ushc->usb_dev, usb_rcvbulkpipe(usb_dev, 6),
500 ushc->csw, sizeof(struct ushc_csw), 514 ushc->csw, sizeof(struct ushc_csw),
501 csw_callback, ushc); 515 csw_callback, ushc);
diff --git a/drivers/mtd/ubi/scan.c b/drivers/mtd/ubi/scan.c
index 3c631863bf40..204345be8e62 100644
--- a/drivers/mtd/ubi/scan.c
+++ b/drivers/mtd/ubi/scan.c
@@ -787,16 +787,15 @@ static int check_corruption(struct ubi_device *ubi, struct ubi_vid_hdr *vid_hdr,
787 * erased, so it became unstable and corrupted, and should be 787 * erased, so it became unstable and corrupted, and should be
788 * erased. 788 * erased.
789 */ 789 */
790 return 0; 790 err = 0;
791 goto out_unlock;
791 } 792 }
792 793
793 if (err) 794 if (err)
794 return err; 795 goto out_unlock;
795 796
796 if (ubi_check_pattern(ubi->peb_buf1, 0xFF, ubi->leb_size)) { 797 if (ubi_check_pattern(ubi->peb_buf1, 0xFF, ubi->leb_size))
797 mutex_unlock(&ubi->buf_mutex); 798 goto out_unlock;
798 return 0;
799 }
800 799
801 ubi_err("PEB %d contains corrupted VID header, and the data does not " 800 ubi_err("PEB %d contains corrupted VID header, and the data does not "
802 "contain all 0xFF, this may be a non-UBI PEB or a severe VID " 801 "contain all 0xFF, this may be a non-UBI PEB or a severe VID "
@@ -806,8 +805,11 @@ static int check_corruption(struct ubi_device *ubi, struct ubi_vid_hdr *vid_hdr,
806 pnum, ubi->leb_start, ubi->leb_size); 805 pnum, ubi->leb_start, ubi->leb_size);
807 ubi_dbg_print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, 806 ubi_dbg_print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1,
808 ubi->peb_buf1, ubi->leb_size, 1); 807 ubi->peb_buf1, ubi->leb_size, 1);
808 err = 1;
809
810out_unlock:
809 mutex_unlock(&ubi->buf_mutex); 811 mutex_unlock(&ubi->buf_mutex);
810 return 1; 812 return err;
811} 813}
812 814
813/** 815/**
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index f6668cdaac85..43db398437b7 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -2945,6 +2945,18 @@ source "drivers/s390/net/Kconfig"
2945 2945
2946source "drivers/net/caif/Kconfig" 2946source "drivers/net/caif/Kconfig"
2947 2947
2948config TILE_NET
2949 tristate "Tilera GBE/XGBE network driver support"
2950 depends on TILE
2951 default y
2952 select CRC32
2953 help
2954 This is a standard Linux network device driver for the
2955 on-chip Tilera Gigabit Ethernet and XAUI interfaces.
2956
2957 To compile this driver as a module, choose M here: the module
2958 will be called tile_net.
2959
2948config XEN_NETDEV_FRONTEND 2960config XEN_NETDEV_FRONTEND
2949 tristate "Xen network device frontend driver" 2961 tristate "Xen network device frontend driver"
2950 depends on XEN 2962 depends on XEN
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 652fc6b98039..b90738d13994 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -301,3 +301,4 @@ obj-$(CONFIG_CAIF) += caif/
301 301
302obj-$(CONFIG_OCTEON_MGMT_ETHERNET) += octeon/ 302obj-$(CONFIG_OCTEON_MGMT_ETHERNET) += octeon/
303obj-$(CONFIG_PCH_GBE) += pch_gbe/ 303obj-$(CONFIG_PCH_GBE) += pch_gbe/
304obj-$(CONFIG_TILE_NET) += tile/
diff --git a/drivers/net/tile/Makefile b/drivers/net/tile/Makefile
new file mode 100644
index 000000000000..f634f142cab4
--- /dev/null
+++ b/drivers/net/tile/Makefile
@@ -0,0 +1,10 @@
1#
2# Makefile for the TILE on-chip networking support.
3#
4
5obj-$(CONFIG_TILE_NET) += tile_net.o
6ifdef CONFIG_TILEGX
7tile_net-objs := tilegx.o mpipe.o iorpc_mpipe.o dma_queue.o
8else
9tile_net-objs := tilepro.o
10endif
diff --git a/drivers/net/tile/tilepro.c b/drivers/net/tile/tilepro.c
new file mode 100644
index 000000000000..0e6bac5ec65b
--- /dev/null
+++ b/drivers/net/tile/tilepro.c
@@ -0,0 +1,2406 @@
1/*
2 * Copyright 2010 Tilera Corporation. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for
12 * more details.
13 */
14
15#include <linux/module.h>
16#include <linux/init.h>
17#include <linux/moduleparam.h>
18#include <linux/sched.h>
19#include <linux/kernel.h> /* printk() */
20#include <linux/slab.h> /* kmalloc() */
21#include <linux/errno.h> /* error codes */
22#include <linux/types.h> /* size_t */
23#include <linux/interrupt.h>
24#include <linux/in.h>
25#include <linux/netdevice.h> /* struct device, and other headers */
26#include <linux/etherdevice.h> /* eth_type_trans */
27#include <linux/skbuff.h>
28#include <linux/ioctl.h>
29#include <linux/cdev.h>
30#include <linux/hugetlb.h>
31#include <linux/in6.h>
32#include <linux/timer.h>
33#include <linux/io.h>
34#include <asm/checksum.h>
35#include <asm/homecache.h>
36
37#include <hv/drv_xgbe_intf.h>
38#include <hv/drv_xgbe_impl.h>
39#include <hv/hypervisor.h>
40#include <hv/netio_intf.h>
41
42/* For TSO */
43#include <linux/ip.h>
44#include <linux/tcp.h>
45
46
47/* There is no singlethread_cpu, so schedule work on the current cpu. */
48#define singlethread_cpu -1
49
50
51/*
52 * First, "tile_net_init_module()" initializes all four "devices" which
53 * can be used by linux.
54 *
55 * Then, "ifconfig DEVICE up" calls "tile_net_open()", which analyzes
56 * the network cpus, then uses "tile_net_open_aux()" to initialize
57 * LIPP/LEPP, and then uses "tile_net_open_inner()" to register all
58 * the tiles, provide buffers to LIPP, allow ingress to start, and
59 * turn on hypervisor interrupt handling (and NAPI) on all tiles.
60 *
61 * If registration fails due to the link being down, then "retry_work"
62 * is used to keep calling "tile_net_open_inner()" until it succeeds.
63 *
64 * If "ifconfig DEVICE down" is called, it uses "tile_net_stop()" to
65 * stop egress, drain the LIPP buffers, unregister all the tiles, stop
66 * LIPP/LEPP, and wipe the LEPP queue.
67 *
68 * We start out with the ingress interrupt enabled on each CPU. When
69 * this interrupt fires, we disable it, and call "napi_schedule()".
70 * This will cause "tile_net_poll()" to be called, which will pull
71 * packets from the netio queue, filtering them out, or passing them
72 * to "netif_receive_skb()". If our budget is exhausted, we will
73 * return, knowing we will be called again later. Otherwise, we
74 * reenable the ingress interrupt, and call "napi_complete()".
75 *
76 *
77 * NOTE: The use of "native_driver" ensures that EPP exists, and that
78 * "epp_sendv" is legal, and that "LIPP" is being used.
79 *
80 * NOTE: Failing to free completions for an arbitrarily long time
81 * (which is defined to be illegal) does in fact cause bizarre
82 * problems. The "egress_timer" helps prevent this from happening.
83 *
84 * NOTE: The egress code can be interrupted by the interrupt handler.
85 */
86
87
88/* HACK: Allow use of "jumbo" packets. */
89/* This should be 1500 if "jumbo" is not set in LIPP. */
90/* This should be at most 10226 (10240 - 14) if "jumbo" is set in LIPP. */
91/* ISSUE: This has not been thoroughly tested (except at 1500). */
92#define TILE_NET_MTU 1500
93
94/* HACK: Define to support GSO. */
95/* ISSUE: This may actually hurt performance of the TCP blaster. */
96/* #define TILE_NET_GSO */
97
98/* Define this to collapse "duplicate" acks. */
99/* #define IGNORE_DUP_ACKS */
100
101/* HACK: Define this to verify incoming packets. */
102/* #define TILE_NET_VERIFY_INGRESS */
103
104/* Use 3000 to enable the Linux Traffic Control (QoS) layer, else 0. */
105#define TILE_NET_TX_QUEUE_LEN 0
106
107/* Define to dump packets (prints out the whole packet on tx and rx). */
108/* #define TILE_NET_DUMP_PACKETS */
109
110/* Define to enable debug spew (all PDEBUG's are enabled). */
111/* #define TILE_NET_DEBUG */
112
113
114/* Define to activate paranoia checks. */
115/* #define TILE_NET_PARANOIA */
116
117/* Default transmit lockup timeout period, in jiffies. */
118#define TILE_NET_TIMEOUT (5 * HZ)
119
120/* Default retry interval for bringing up the NetIO interface, in jiffies. */
121#define TILE_NET_RETRY_INTERVAL (5 * HZ)
122
123/* Number of ports (xgbe0, xgbe1, gbe0, gbe1). */
124#define TILE_NET_DEVS 4
125
126
127
128/* Paranoia. */
129#if NET_IP_ALIGN != LIPP_PACKET_PADDING
130#error "NET_IP_ALIGN must match LIPP_PACKET_PADDING."
131#endif
132
133
134/* Debug print. */
135#ifdef TILE_NET_DEBUG
136#define PDEBUG(fmt, args...) net_printk(fmt, ## args)
137#else
138#define PDEBUG(fmt, args...)
139#endif
140
141
142MODULE_AUTHOR("Tilera");
143MODULE_LICENSE("GPL");
144
145
146#define IS_MULTICAST(mac_addr) \
147 (((u8 *)(mac_addr))[0] & 0x01)
148
149#define IS_BROADCAST(mac_addr) \
150 (((u16 *)(mac_addr))[0] == 0xffff)
151
152
153/*
154 * Queue of incoming packets for a specific cpu and device.
155 *
156 * Includes a pointer to the "system" data, and the actual "user" data.
157 */
158struct tile_netio_queue {
159 netio_queue_impl_t *__system_part;
160 netio_queue_user_impl_t __user_part;
161
162};
163
164
165/*
166 * Statistics counters for a specific cpu and device.
167 */
168struct tile_net_stats_t {
169 u32 rx_packets;
170 u32 rx_bytes;
171 u32 tx_packets;
172 u32 tx_bytes;
173};
174
175
176/*
177 * Info for a specific cpu and device.
178 *
179 * ISSUE: There is a "dev" pointer in "napi" as well.
180 */
181struct tile_net_cpu {
182 /* The NAPI struct. */
183 struct napi_struct napi;
184 /* Packet queue. */
185 struct tile_netio_queue queue;
186 /* Statistics. */
187 struct tile_net_stats_t stats;
188 /* ISSUE: Is this needed? */
189 bool napi_enabled;
190 /* True if this tile has succcessfully registered with the IPP. */
191 bool registered;
192 /* True if the link was down last time we tried to register. */
193 bool link_down;
194 /* True if "egress_timer" is scheduled. */
195 bool egress_timer_scheduled;
196 /* Number of small sk_buffs which must still be provided. */
197 unsigned int num_needed_small_buffers;
198 /* Number of large sk_buffs which must still be provided. */
199 unsigned int num_needed_large_buffers;
200 /* A timer for handling egress completions. */
201 struct timer_list egress_timer;
202};
203
204
205/*
206 * Info for a specific device.
207 */
208struct tile_net_priv {
209 /* Our network device. */
210 struct net_device *dev;
211 /* The actual egress queue. */
212 lepp_queue_t *epp_queue;
213 /* Protects "epp_queue->cmd_tail" and "epp_queue->comp_tail" */
214 spinlock_t cmd_lock;
215 /* Protects "epp_queue->comp_head". */
216 spinlock_t comp_lock;
217 /* The hypervisor handle for this interface. */
218 int hv_devhdl;
219 /* The intr bit mask that IDs this device. */
220 u32 intr_id;
221 /* True iff "tile_net_open_aux()" has succeeded. */
222 int partly_opened;
223 /* True iff "tile_net_open_inner()" has succeeded. */
224 int fully_opened;
225 /* Effective network cpus. */
226 struct cpumask network_cpus_map;
227 /* Number of network cpus. */
228 int network_cpus_count;
229 /* Credits per network cpu. */
230 int network_cpus_credits;
231 /* Network stats. */
232 struct net_device_stats stats;
233 /* For NetIO bringup retries. */
234 struct delayed_work retry_work;
235 /* Quick access to per cpu data. */
236 struct tile_net_cpu *cpu[NR_CPUS];
237};
238
239
240/*
241 * The actual devices (xgbe0, xgbe1, gbe0, gbe1).
242 */
243static struct net_device *tile_net_devs[TILE_NET_DEVS];
244
245/*
246 * The "tile_net_cpu" structures for each device.
247 */
248static DEFINE_PER_CPU(struct tile_net_cpu, hv_xgbe0);
249static DEFINE_PER_CPU(struct tile_net_cpu, hv_xgbe1);
250static DEFINE_PER_CPU(struct tile_net_cpu, hv_gbe0);
251static DEFINE_PER_CPU(struct tile_net_cpu, hv_gbe1);
252
253
254/*
255 * True if "network_cpus" was specified.
256 */
257static bool network_cpus_used;
258
259/*
260 * The actual cpus in "network_cpus".
261 */
262static struct cpumask network_cpus_map;
263
264
265
266#ifdef TILE_NET_DEBUG
267/*
268 * printk with extra stuff.
269 *
270 * We print the CPU we're running in brackets.
271 */
272static void net_printk(char *fmt, ...)
273{
274 int i;
275 int len;
276 va_list args;
277 static char buf[256];
278
279 len = sprintf(buf, "tile_net[%2.2d]: ", smp_processor_id());
280 va_start(args, fmt);
281 i = vscnprintf(buf + len, sizeof(buf) - len - 1, fmt, args);
282 va_end(args);
283 buf[255] = '\0';
284 pr_notice(buf);
285}
286#endif
287
288
289#ifdef TILE_NET_DUMP_PACKETS
290/*
291 * Dump a packet.
292 */
293static void dump_packet(unsigned char *data, unsigned long length, char *s)
294{
295 unsigned long i;
296 static unsigned int count;
297
298 pr_info("dump_packet(data %p, length 0x%lx s %s count 0x%x)\n",
299 data, length, s, count++);
300
301 pr_info("\n");
302
303 for (i = 0; i < length; i++) {
304 if ((i & 0xf) == 0)
305 sprintf(buf, "%8.8lx:", i);
306 sprintf(buf + strlen(buf), " %2.2x", data[i]);
307 if ((i & 0xf) == 0xf || i == length - 1)
308 pr_info("%s\n", buf);
309 }
310}
311#endif
312
313
314/*
315 * Provide support for the __netio_fastio1() swint
316 * (see <hv/drv_xgbe_intf.h> for how it is used).
317 *
318 * The fastio swint2 call may clobber all the caller-saved registers.
319 * It rarely clobbers memory, but we allow for the possibility in
320 * the signature just to be on the safe side.
321 *
322 * Also, gcc doesn't seem to allow an input operand to be
323 * clobbered, so we fake it with dummy outputs.
324 *
325 * This function can't be static because of the way it is declared
326 * in the netio header.
327 */
328inline int __netio_fastio1(u32 fastio_index, u32 arg0)
329{
330 long result, clobber_r1, clobber_r10;
331 asm volatile("swint2"
332 : "=R00" (result),
333 "=R01" (clobber_r1), "=R10" (clobber_r10)
334 : "R10" (fastio_index), "R01" (arg0)
335 : "memory", "r2", "r3", "r4",
336 "r5", "r6", "r7", "r8", "r9",
337 "r11", "r12", "r13", "r14",
338 "r15", "r16", "r17", "r18", "r19",
339 "r20", "r21", "r22", "r23", "r24",
340 "r25", "r26", "r27", "r28", "r29");
341 return result;
342}
343
344
345/*
346 * Provide a linux buffer to LIPP.
347 */
348static void tile_net_provide_linux_buffer(struct tile_net_cpu *info,
349 void *va, bool small)
350{
351 struct tile_netio_queue *queue = &info->queue;
352
353 /* Convert "va" and "small" to "linux_buffer_t". */
354 unsigned int buffer = ((unsigned int)(__pa(va) >> 7) << 1) + small;
355
356 __netio_fastio_free_buffer(queue->__user_part.__fastio_index, buffer);
357}
358
359
360/*
361 * Provide a linux buffer for LIPP.
362 */
363static bool tile_net_provide_needed_buffer(struct tile_net_cpu *info,
364 bool small)
365{
366 /* ISSUE: What should we use here? */
367 unsigned int large_size = NET_IP_ALIGN + TILE_NET_MTU + 100;
368
369 /* Round up to ensure to avoid "false sharing" with last cache line. */
370 unsigned int buffer_size =
371 (((small ? LIPP_SMALL_PACKET_SIZE : large_size) +
372 CHIP_L2_LINE_SIZE() - 1) & -CHIP_L2_LINE_SIZE());
373
374 /*
375 * ISSUE: Since CPAs are 38 bits, and we can only encode the
376 * high 31 bits in a "linux_buffer_t", the low 7 bits must be
377 * zero, and thus, we must align the actual "va" mod 128.
378 */
379 const unsigned long align = 128;
380
381 struct sk_buff *skb;
382 void *va;
383
384 struct sk_buff **skb_ptr;
385
386 /* Note that "dev_alloc_skb()" adds NET_SKB_PAD more bytes, */
387 /* and also "reserves" that many bytes. */
388 /* ISSUE: Can we "share" the NET_SKB_PAD bytes with "skb_ptr"? */
389 int len = sizeof(*skb_ptr) + align + buffer_size;
390
391 while (1) {
392
393 /* Allocate (or fail). */
394 skb = dev_alloc_skb(len);
395 if (skb == NULL)
396 return false;
397
398 /* Make room for a back-pointer to 'skb'. */
399 skb_reserve(skb, sizeof(*skb_ptr));
400
401 /* Make sure we are aligned. */
402 skb_reserve(skb, -(long)skb->data & (align - 1));
403
404 /* This address is given to IPP. */
405 va = skb->data;
406
407 if (small)
408 break;
409
410 /* ISSUE: This has never been observed! */
411 /* Large buffers must not span a huge page. */
412 if (((((long)va & ~HPAGE_MASK) + 1535) & HPAGE_MASK) == 0)
413 break;
414 pr_err("Leaking unaligned linux buffer at %p.\n", va);
415 }
416
417 /* Skip two bytes to satisfy LIPP assumptions. */
418 /* Note that this aligns IP on a 16 byte boundary. */
419 /* ISSUE: Do this when the packet arrives? */
420 skb_reserve(skb, NET_IP_ALIGN);
421
422 /* Save a back-pointer to 'skb'. */
423 skb_ptr = va - sizeof(*skb_ptr);
424 *skb_ptr = skb;
425
426 /* Invalidate the packet buffer. */
427 if (!hash_default)
428 __inv_buffer(skb->data, buffer_size);
429
430 /* Make sure "skb_ptr" has been flushed. */
431 __insn_mf();
432
433#ifdef TILE_NET_PARANOIA
434#if CHIP_HAS_CBOX_HOME_MAP()
435 if (hash_default) {
436 HV_PTE pte = *virt_to_pte(current->mm, (unsigned long)va);
437 if (hv_pte_get_mode(pte) != HV_PTE_MODE_CACHE_HASH_L3)
438 panic("Non-coherent ingress buffer!");
439 }
440#endif
441#endif
442
443 /* Provide the new buffer. */
444 tile_net_provide_linux_buffer(info, va, small);
445
446 return true;
447}
448
449
450/*
451 * Provide linux buffers for LIPP.
452 */
453static void tile_net_provide_needed_buffers(struct tile_net_cpu *info)
454{
455 while (info->num_needed_small_buffers != 0) {
456 if (!tile_net_provide_needed_buffer(info, true))
457 goto oops;
458 info->num_needed_small_buffers--;
459 }
460
461 while (info->num_needed_large_buffers != 0) {
462 if (!tile_net_provide_needed_buffer(info, false))
463 goto oops;
464 info->num_needed_large_buffers--;
465 }
466
467 return;
468
469oops:
470
471 /* Add a description to the page allocation failure dump. */
472 pr_notice("Could not provide a linux buffer to LIPP.\n");
473}
474
475
476/*
477 * Grab some LEPP completions, and store them in "comps", of size
478 * "comps_size", and return the number of completions which were
479 * stored, so the caller can free them.
480 *
481 * If "pending" is not NULL, it will be set to true if there might
482 * still be some pending completions caused by this tile, else false.
483 */
484static unsigned int tile_net_lepp_grab_comps(struct net_device *dev,
485 struct sk_buff *comps[],
486 unsigned int comps_size,
487 bool *pending)
488{
489 struct tile_net_priv *priv = netdev_priv(dev);
490
491 lepp_queue_t *eq = priv->epp_queue;
492
493 unsigned int n = 0;
494
495 unsigned int comp_head;
496 unsigned int comp_busy;
497 unsigned int comp_tail;
498
499 spin_lock(&priv->comp_lock);
500
501 comp_head = eq->comp_head;
502 comp_busy = eq->comp_busy;
503 comp_tail = eq->comp_tail;
504
505 while (comp_head != comp_busy && n < comps_size) {
506 comps[n++] = eq->comps[comp_head];
507 LEPP_QINC(comp_head);
508 }
509
510 if (pending != NULL)
511 *pending = (comp_head != comp_tail);
512
513 eq->comp_head = comp_head;
514
515 spin_unlock(&priv->comp_lock);
516
517 return n;
518}
519
520
521/*
522 * Make sure the egress timer is scheduled.
523 *
524 * Note that we use "schedule if not scheduled" logic instead of the more
525 * obvious "reschedule" logic, because "reschedule" is fairly expensive.
526 */
527static void tile_net_schedule_egress_timer(struct tile_net_cpu *info)
528{
529 if (!info->egress_timer_scheduled) {
530 mod_timer_pinned(&info->egress_timer, jiffies + 1);
531 info->egress_timer_scheduled = true;
532 }
533}
534
535
536/*
537 * The "function" for "info->egress_timer".
538 *
539 * This timer will reschedule itself as long as there are any pending
540 * completions expected (on behalf of any tile).
541 *
542 * ISSUE: Realistically, will the timer ever stop scheduling itself?
543 *
544 * ISSUE: This timer is almost never actually needed, so just use a global
545 * timer that can run on any tile.
546 *
547 * ISSUE: Maybe instead track number of expected completions, and free
548 * only that many, resetting to zero if "pending" is ever false.
549 */
550static void tile_net_handle_egress_timer(unsigned long arg)
551{
552 struct tile_net_cpu *info = (struct tile_net_cpu *)arg;
553 struct net_device *dev = info->napi.dev;
554
555 struct sk_buff *olds[32];
556 unsigned int wanted = 32;
557 unsigned int i, nolds = 0;
558 bool pending;
559
560 /* The timer is no longer scheduled. */
561 info->egress_timer_scheduled = false;
562
563 nolds = tile_net_lepp_grab_comps(dev, olds, wanted, &pending);
564
565 for (i = 0; i < nolds; i++)
566 kfree_skb(olds[i]);
567
568 /* Reschedule timer if needed. */
569 if (pending)
570 tile_net_schedule_egress_timer(info);
571}
572
573
574#ifdef IGNORE_DUP_ACKS
575
576/*
577 * Help detect "duplicate" ACKs. These are sequential packets (for a
578 * given flow) which are exactly 66 bytes long, sharing everything but
579 * ID=2@0x12, Hsum=2@0x18, Ack=4@0x2a, WinSize=2@0x30, Csum=2@0x32,
580 * Tstamps=10@0x38. The ID's are +1, the Hsum's are -1, the Ack's are
581 * +N, and the Tstamps are usually identical.
582 *
583 * NOTE: Apparently truly duplicate acks (with identical "ack" values),
584 * should not be collapsed, as they are used for some kind of flow control.
585 */
586static bool is_dup_ack(char *s1, char *s2, unsigned int len)
587{
588 int i;
589
590 unsigned long long ignorable = 0;
591
592 /* Identification. */
593 ignorable |= (1ULL << 0x12);
594 ignorable |= (1ULL << 0x13);
595
596 /* Header checksum. */
597 ignorable |= (1ULL << 0x18);
598 ignorable |= (1ULL << 0x19);
599
600 /* ACK. */
601 ignorable |= (1ULL << 0x2a);
602 ignorable |= (1ULL << 0x2b);
603 ignorable |= (1ULL << 0x2c);
604 ignorable |= (1ULL << 0x2d);
605
606 /* WinSize. */
607 ignorable |= (1ULL << 0x30);
608 ignorable |= (1ULL << 0x31);
609
610 /* Checksum. */
611 ignorable |= (1ULL << 0x32);
612 ignorable |= (1ULL << 0x33);
613
614 for (i = 0; i < len; i++, ignorable >>= 1) {
615
616 if ((ignorable & 1) || (s1[i] == s2[i]))
617 continue;
618
619#ifdef TILE_NET_DEBUG
620 /* HACK: Mention non-timestamp diffs. */
621 if (i < 0x38 && i != 0x2f &&
622 net_ratelimit())
623 pr_info("Diff at 0x%x\n", i);
624#endif
625
626 return false;
627 }
628
629#ifdef TILE_NET_NO_SUPPRESS_DUP_ACKS
630 /* HACK: Do not suppress truly duplicate ACKs. */
631 /* ISSUE: Is this actually necessary or helpful? */
632 if (s1[0x2a] == s2[0x2a] &&
633 s1[0x2b] == s2[0x2b] &&
634 s1[0x2c] == s2[0x2c] &&
635 s1[0x2d] == s2[0x2d]) {
636 return false;
637 }
638#endif
639
640 return true;
641}
642
643#endif
644
645
646
647/*
648 * Like "tile_net_handle_packets()", but just discard packets.
649 */
650static void tile_net_discard_packets(struct net_device *dev)
651{
652 struct tile_net_priv *priv = netdev_priv(dev);
653 int my_cpu = smp_processor_id();
654 struct tile_net_cpu *info = priv->cpu[my_cpu];
655 struct tile_netio_queue *queue = &info->queue;
656 netio_queue_impl_t *qsp = queue->__system_part;
657 netio_queue_user_impl_t *qup = &queue->__user_part;
658
659 while (qup->__packet_receive_read !=
660 qsp->__packet_receive_queue.__packet_write) {
661
662 int index = qup->__packet_receive_read;
663
664 int index2_aux = index + sizeof(netio_pkt_t);
665 int index2 =
666 ((index2_aux ==
667 qsp->__packet_receive_queue.__last_packet_plus_one) ?
668 0 : index2_aux);
669
670 netio_pkt_t *pkt = (netio_pkt_t *)
671 ((unsigned long) &qsp[1] + index);
672
673 /* Extract the "linux_buffer_t". */
674 unsigned int buffer = pkt->__packet.word;
675
676 /* Convert "linux_buffer_t" to "va". */
677 void *va = __va((phys_addr_t)(buffer >> 1) << 7);
678
679 /* Acquire the associated "skb". */
680 struct sk_buff **skb_ptr = va - sizeof(*skb_ptr);
681 struct sk_buff *skb = *skb_ptr;
682
683 kfree_skb(skb);
684
685 /* Consume this packet. */
686 qup->__packet_receive_read = index2;
687 }
688}
689
690
691/*
692 * Handle the next packet. Return true if "processed", false if "filtered".
693 */
694static bool tile_net_poll_aux(struct tile_net_cpu *info, int index)
695{
696 struct net_device *dev = info->napi.dev;
697
698 struct tile_netio_queue *queue = &info->queue;
699 netio_queue_impl_t *qsp = queue->__system_part;
700 netio_queue_user_impl_t *qup = &queue->__user_part;
701 struct tile_net_stats_t *stats = &info->stats;
702
703 int filter;
704
705 int index2_aux = index + sizeof(netio_pkt_t);
706 int index2 =
707 ((index2_aux ==
708 qsp->__packet_receive_queue.__last_packet_plus_one) ?
709 0 : index2_aux);
710
711 netio_pkt_t *pkt = (netio_pkt_t *)((unsigned long) &qsp[1] + index);
712
713 netio_pkt_metadata_t *metadata = NETIO_PKT_METADATA(pkt);
714
715 /* Extract the packet size. */
716 unsigned long len =
717 (NETIO_PKT_CUSTOM_LENGTH(pkt) +
718 NET_IP_ALIGN - NETIO_PACKET_PADDING);
719
720 /* Extract the "linux_buffer_t". */
721 unsigned int buffer = pkt->__packet.word;
722
723 /* Extract "small" (vs "large"). */
724 bool small = ((buffer & 1) != 0);
725
726 /* Convert "linux_buffer_t" to "va". */
727 void *va = __va((phys_addr_t)(buffer >> 1) << 7);
728
729 /* Extract the packet data pointer. */
730 /* Compare to "NETIO_PKT_CUSTOM_DATA(pkt)". */
731 unsigned char *buf = va + NET_IP_ALIGN;
732
733#ifdef IGNORE_DUP_ACKS
734
735 static int other;
736 static int final;
737 static int keep;
738 static int skip;
739
740#endif
741
742 /* Invalidate the packet buffer. */
743 if (!hash_default)
744 __inv_buffer(buf, len);
745
746 /* ISSUE: Is this needed? */
747 dev->last_rx = jiffies;
748
749#ifdef TILE_NET_DUMP_PACKETS
750 dump_packet(buf, len, "rx");
751#endif /* TILE_NET_DUMP_PACKETS */
752
753#ifdef TILE_NET_VERIFY_INGRESS
754 if (!NETIO_PKT_L4_CSUM_CORRECT_M(metadata, pkt) &&
755 NETIO_PKT_L4_CSUM_CALCULATED_M(metadata, pkt)) {
756 /*
757 * FIXME: This complains about UDP packets
758 * with a "zero" checksum (bug 6624).
759 */
760#ifdef TILE_NET_PANIC_ON_BAD
761 dump_packet(buf, len, "rx");
762 panic("Bad L4 checksum.");
763#else
764 pr_warning("Bad L4 checksum on %d byte packet.\n", len);
765#endif
766 }
767 if (!NETIO_PKT_L3_CSUM_CORRECT_M(metadata, pkt) &&
768 NETIO_PKT_L3_CSUM_CALCULATED_M(metadata, pkt)) {
769 dump_packet(buf, len, "rx");
770 panic("Bad L3 checksum.");
771 }
772 switch (NETIO_PKT_STATUS_M(metadata, pkt)) {
773 case NETIO_PKT_STATUS_OVERSIZE:
774 if (len >= 64) {
775 dump_packet(buf, len, "rx");
776 panic("Unexpected OVERSIZE.");
777 }
778 break;
779 case NETIO_PKT_STATUS_BAD:
780#ifdef TILE_NET_PANIC_ON_BAD
781 dump_packet(buf, len, "rx");
782 panic("Unexpected BAD packet.");
783#else
784 pr_warning("Unexpected BAD %d byte packet.\n", len);
785#endif
786 }
787#endif
788
789 filter = 0;
790
791 if (!(dev->flags & IFF_UP)) {
792 /* Filter packets received before we're up. */
793 filter = 1;
794 } else if (!(dev->flags & IFF_PROMISC)) {
795 /*
796 * FIXME: Implement HW multicast filter.
797 */
798 if (!IS_MULTICAST(buf) && !IS_BROADCAST(buf)) {
799 /* Filter packets not for our address. */
800 const u8 *mine = dev->dev_addr;
801 filter = compare_ether_addr(mine, buf);
802 }
803 }
804
805#ifdef IGNORE_DUP_ACKS
806
807 if (len != 66) {
808 /* FIXME: Must check "is_tcp_ack(buf, len)" somehow. */
809
810 other++;
811
812 } else if (index2 ==
813 qsp->__packet_receive_queue.__packet_write) {
814
815 final++;
816
817 } else {
818
819 netio_pkt_t *pkt2 = (netio_pkt_t *)
820 ((unsigned long) &qsp[1] + index2);
821
822 netio_pkt_metadata_t *metadata2 =
823 NETIO_PKT_METADATA(pkt2);
824
825 /* Extract the packet size. */
826 unsigned long len2 =
827 (NETIO_PKT_CUSTOM_LENGTH(pkt2) +
828 NET_IP_ALIGN - NETIO_PACKET_PADDING);
829
830 if (len2 == 66 &&
831 NETIO_PKT_FLOW_HASH_M(metadata, pkt) ==
832 NETIO_PKT_FLOW_HASH_M(metadata2, pkt2)) {
833
834 /* Extract the "linux_buffer_t". */
835 unsigned int buffer2 = pkt2->__packet.word;
836
837 /* Convert "linux_buffer_t" to "va". */
838 void *va2 =
839 __va((phys_addr_t)(buffer2 >> 1) << 7);
840
841 /* Extract the packet data pointer. */
842 /* Compare to "NETIO_PKT_CUSTOM_DATA(pkt)". */
843 unsigned char *buf2 = va2 + NET_IP_ALIGN;
844
845 /* Invalidate the packet buffer. */
846 if (!hash_default)
847 __inv_buffer(buf2, len2);
848
849 if (is_dup_ack(buf, buf2, len)) {
850 skip++;
851 filter = 1;
852 } else {
853 keep++;
854 }
855 }
856 }
857
858 if (net_ratelimit())
859 pr_info("Other %d Final %d Keep %d Skip %d.\n",
860 other, final, keep, skip);
861
862#endif
863
864 if (filter) {
865
866 /* ISSUE: Update "drop" statistics? */
867
868 tile_net_provide_linux_buffer(info, va, small);
869
870 } else {
871
872 /* Acquire the associated "skb". */
873 struct sk_buff **skb_ptr = va - sizeof(*skb_ptr);
874 struct sk_buff *skb = *skb_ptr;
875
876 /* Paranoia. */
877 if (skb->data != buf)
878 panic("Corrupt linux buffer from LIPP! "
879 "VA=%p, skb=%p, skb->data=%p\n",
880 va, skb, skb->data);
881
882 /* Encode the actual packet length. */
883 skb_put(skb, len);
884
885 /* NOTE: This call also sets "skb->dev = dev". */
886 skb->protocol = eth_type_trans(skb, dev);
887
888 /* ISSUE: Discard corrupt packets? */
889 /* ISSUE: Discard packets with bad checksums? */
890
891 /* Avoid recomputing TCP/UDP checksums. */
892 if (NETIO_PKT_L4_CSUM_CORRECT_M(metadata, pkt))
893 skb->ip_summed = CHECKSUM_UNNECESSARY;
894
895 netif_receive_skb(skb);
896
897 stats->rx_packets++;
898 stats->rx_bytes += len;
899
900 if (small)
901 info->num_needed_small_buffers++;
902 else
903 info->num_needed_large_buffers++;
904 }
905
906 /* Return four credits after every fourth packet. */
907 if (--qup->__receive_credit_remaining == 0) {
908 u32 interval = qup->__receive_credit_interval;
909 qup->__receive_credit_remaining = interval;
910 __netio_fastio_return_credits(qup->__fastio_index, interval);
911 }
912
913 /* Consume this packet. */
914 qup->__packet_receive_read = index2;
915
916 return !filter;
917}
918
919
920/*
921 * Handle some packets for the given device on the current CPU.
922 *
923 * ISSUE: The "rotting packet" race condition occurs if a packet
924 * arrives after the queue appears to be empty, and before the
925 * hypervisor interrupt is re-enabled.
926 */
927static int tile_net_poll(struct napi_struct *napi, int budget)
928{
929 struct net_device *dev = napi->dev;
930 struct tile_net_priv *priv = netdev_priv(dev);
931 int my_cpu = smp_processor_id();
932 struct tile_net_cpu *info = priv->cpu[my_cpu];
933 struct tile_netio_queue *queue = &info->queue;
934 netio_queue_impl_t *qsp = queue->__system_part;
935 netio_queue_user_impl_t *qup = &queue->__user_part;
936
937 unsigned int work = 0;
938
939 while (1) {
940 int index = qup->__packet_receive_read;
941 if (index == qsp->__packet_receive_queue.__packet_write)
942 break;
943
944 if (tile_net_poll_aux(info, index)) {
945 if (++work >= budget)
946 goto done;
947 }
948 }
949
950 napi_complete(&info->napi);
951
952 /* Re-enable hypervisor interrupts. */
953 enable_percpu_irq(priv->intr_id);
954
955 /* HACK: Avoid the "rotting packet" problem. */
956 if (qup->__packet_receive_read !=
957 qsp->__packet_receive_queue.__packet_write)
958 napi_schedule(&info->napi);
959
960 /* ISSUE: Handle completions? */
961
962done:
963
964 tile_net_provide_needed_buffers(info);
965
966 return work;
967}
968
969
970/*
971 * Handle an ingress interrupt for the given device on the current cpu.
972 */
973static irqreturn_t tile_net_handle_ingress_interrupt(int irq, void *dev_ptr)
974{
975 struct net_device *dev = (struct net_device *)dev_ptr;
976 struct tile_net_priv *priv = netdev_priv(dev);
977 int my_cpu = smp_processor_id();
978 struct tile_net_cpu *info = priv->cpu[my_cpu];
979
980 /* Disable hypervisor interrupt. */
981 disable_percpu_irq(priv->intr_id);
982
983 napi_schedule(&info->napi);
984
985 return IRQ_HANDLED;
986}
987
988
989/*
990 * One time initialization per interface.
991 */
992static int tile_net_open_aux(struct net_device *dev)
993{
994 struct tile_net_priv *priv = netdev_priv(dev);
995
996 int ret;
997 int dummy;
998 unsigned int epp_lotar;
999
1000 /*
1001 * Find out where EPP memory should be homed.
1002 */
1003 ret = hv_dev_pread(priv->hv_devhdl, 0,
1004 (HV_VirtAddr)&epp_lotar, sizeof(epp_lotar),
1005 NETIO_EPP_SHM_OFF);
1006 if (ret < 0) {
1007 pr_err("could not read epp_shm_queue lotar.\n");
1008 return -EIO;
1009 }
1010
1011 /*
1012 * Home the page on the EPP.
1013 */
1014 {
1015 int epp_home = hv_lotar_to_cpu(epp_lotar);
1016 struct page *page = virt_to_page(priv->epp_queue);
1017 homecache_change_page_home(page, 0, epp_home);
1018 }
1019
1020 /*
1021 * Register the EPP shared memory queue.
1022 */
1023 {
1024 netio_ipp_address_t ea = {
1025 .va = 0,
1026 .pa = __pa(priv->epp_queue),
1027 .pte = hv_pte(0),
1028 .size = PAGE_SIZE,
1029 };
1030 ea.pte = hv_pte_set_lotar(ea.pte, epp_lotar);
1031 ea.pte = hv_pte_set_mode(ea.pte, HV_PTE_MODE_CACHE_TILE_L3);
1032 ret = hv_dev_pwrite(priv->hv_devhdl, 0,
1033 (HV_VirtAddr)&ea,
1034 sizeof(ea),
1035 NETIO_EPP_SHM_OFF);
1036 if (ret < 0)
1037 return -EIO;
1038 }
1039
1040 /*
1041 * Start LIPP/LEPP.
1042 */
1043 if (hv_dev_pwrite(priv->hv_devhdl, 0, (HV_VirtAddr)&dummy,
1044 sizeof(dummy), NETIO_IPP_START_SHIM_OFF) < 0) {
1045 pr_warning("Failed to start LIPP/LEPP.\n");
1046 return -EIO;
1047 }
1048
1049 return 0;
1050}
1051
1052
1053/*
1054 * Register with hypervisor on each CPU.
1055 *
1056 * Strangely, this function does important things even if it "fails",
1057 * which is especially common if the link is not up yet. Hopefully
1058 * these things are all "harmless" if done twice!
1059 */
1060static void tile_net_register(void *dev_ptr)
1061{
1062 struct net_device *dev = (struct net_device *)dev_ptr;
1063 struct tile_net_priv *priv = netdev_priv(dev);
1064 int my_cpu = smp_processor_id();
1065 struct tile_net_cpu *info;
1066
1067 struct tile_netio_queue *queue;
1068
1069 /* Only network cpus can receive packets. */
1070 int queue_id =
1071 cpumask_test_cpu(my_cpu, &priv->network_cpus_map) ? 0 : 255;
1072
1073 netio_input_config_t config = {
1074 .flags = 0,
1075 .num_receive_packets = priv->network_cpus_credits,
1076 .queue_id = queue_id
1077 };
1078
1079 int ret = 0;
1080 netio_queue_impl_t *queuep;
1081
1082 PDEBUG("tile_net_register(queue_id %d)\n", queue_id);
1083
1084 if (!strcmp(dev->name, "xgbe0"))
1085 info = &__get_cpu_var(hv_xgbe0);
1086 else if (!strcmp(dev->name, "xgbe1"))
1087 info = &__get_cpu_var(hv_xgbe1);
1088 else if (!strcmp(dev->name, "gbe0"))
1089 info = &__get_cpu_var(hv_gbe0);
1090 else if (!strcmp(dev->name, "gbe1"))
1091 info = &__get_cpu_var(hv_gbe1);
1092 else
1093 BUG();
1094
1095 /* Initialize the egress timer. */
1096 init_timer(&info->egress_timer);
1097 info->egress_timer.data = (long)info;
1098 info->egress_timer.function = tile_net_handle_egress_timer;
1099
1100 priv->cpu[my_cpu] = info;
1101
1102 /*
1103 * Register ourselves with the IPP.
1104 */
1105 ret = hv_dev_pwrite(priv->hv_devhdl, 0,
1106 (HV_VirtAddr)&config,
1107 sizeof(netio_input_config_t),
1108 NETIO_IPP_INPUT_REGISTER_OFF);
1109 PDEBUG("hv_dev_pwrite(NETIO_IPP_INPUT_REGISTER_OFF) returned %d\n",
1110 ret);
1111 if (ret < 0) {
1112 printk(KERN_DEBUG "hv_dev_pwrite NETIO_IPP_INPUT_REGISTER_OFF"
1113 " failure %d\n", ret);
1114 info->link_down = (ret == NETIO_LINK_DOWN);
1115 return;
1116 }
1117
1118 /*
1119 * Get the pointer to our queue's system part.
1120 */
1121
1122 ret = hv_dev_pread(priv->hv_devhdl, 0,
1123 (HV_VirtAddr)&queuep,
1124 sizeof(netio_queue_impl_t *),
1125 NETIO_IPP_INPUT_REGISTER_OFF);
1126 PDEBUG("hv_dev_pread(NETIO_IPP_INPUT_REGISTER_OFF) returned %d\n",
1127 ret);
1128 PDEBUG("queuep %p\n", queuep);
1129 if (ret <= 0) {
1130 /* ISSUE: Shouldn't this be a fatal error? */
1131 pr_err("hv_dev_pread NETIO_IPP_INPUT_REGISTER_OFF failure\n");
1132 return;
1133 }
1134
1135 queue = &info->queue;
1136
1137 queue->__system_part = queuep;
1138
1139 memset(&queue->__user_part, 0, sizeof(netio_queue_user_impl_t));
1140
1141 /* This is traditionally "config.num_receive_packets / 2". */
1142 queue->__user_part.__receive_credit_interval = 4;
1143 queue->__user_part.__receive_credit_remaining =
1144 queue->__user_part.__receive_credit_interval;
1145
1146 /*
1147 * Get a fastio index from the hypervisor.
1148 * ISSUE: Shouldn't this check the result?
1149 */
1150 ret = hv_dev_pread(priv->hv_devhdl, 0,
1151 (HV_VirtAddr)&queue->__user_part.__fastio_index,
1152 sizeof(queue->__user_part.__fastio_index),
1153 NETIO_IPP_GET_FASTIO_OFF);
1154 PDEBUG("hv_dev_pread(NETIO_IPP_GET_FASTIO_OFF) returned %d\n", ret);
1155
1156 netif_napi_add(dev, &info->napi, tile_net_poll, 64);
1157
1158 /* Now we are registered. */
1159 info->registered = true;
1160}
1161
1162
1163/*
1164 * Unregister with hypervisor on each CPU.
1165 */
1166static void tile_net_unregister(void *dev_ptr)
1167{
1168 struct net_device *dev = (struct net_device *)dev_ptr;
1169 struct tile_net_priv *priv = netdev_priv(dev);
1170 int my_cpu = smp_processor_id();
1171 struct tile_net_cpu *info = priv->cpu[my_cpu];
1172
1173 int ret = 0;
1174 int dummy = 0;
1175
1176 /* Do nothing if never registered. */
1177 if (info == NULL)
1178 return;
1179
1180 /* Do nothing if already unregistered. */
1181 if (!info->registered)
1182 return;
1183
1184 /*
1185 * Unregister ourselves with LIPP.
1186 */
1187 ret = hv_dev_pwrite(priv->hv_devhdl, 0, (HV_VirtAddr)&dummy,
1188 sizeof(dummy), NETIO_IPP_INPUT_UNREGISTER_OFF);
1189 PDEBUG("hv_dev_pwrite(NETIO_IPP_INPUT_UNREGISTER_OFF) returned %d\n",
1190 ret);
1191 if (ret < 0) {
1192 /* FIXME: Just panic? */
1193 pr_err("hv_dev_pwrite NETIO_IPP_INPUT_UNREGISTER_OFF"
1194 " failure %d\n", ret);
1195 }
1196
1197 /*
1198 * Discard all packets still in our NetIO queue. Hopefully,
1199 * once the unregister call is complete, there will be no
1200 * packets still in flight on the IDN.
1201 */
1202 tile_net_discard_packets(dev);
1203
1204 /* Reset state. */
1205 info->num_needed_small_buffers = 0;
1206 info->num_needed_large_buffers = 0;
1207
1208 /* Cancel egress timer. */
1209 del_timer(&info->egress_timer);
1210 info->egress_timer_scheduled = false;
1211
1212 netif_napi_del(&info->napi);
1213
1214 /* Now we are unregistered. */
1215 info->registered = false;
1216}
1217
1218
1219/*
1220 * Helper function for "tile_net_stop()".
1221 *
1222 * Also used to handle registration failure in "tile_net_open_inner()",
1223 * when "fully_opened" is known to be false, and the various extra
1224 * steps in "tile_net_stop()" are not necessary. ISSUE: It might be
1225 * simpler if we could just call "tile_net_stop()" anyway.
1226 */
1227static void tile_net_stop_aux(struct net_device *dev)
1228{
1229 struct tile_net_priv *priv = netdev_priv(dev);
1230
1231 int dummy = 0;
1232
1233 /* Unregister all tiles, so LIPP will stop delivering packets. */
1234 on_each_cpu(tile_net_unregister, (void *)dev, 1);
1235
1236 /* Stop LIPP/LEPP. */
1237 if (hv_dev_pwrite(priv->hv_devhdl, 0, (HV_VirtAddr)&dummy,
1238 sizeof(dummy), NETIO_IPP_STOP_SHIM_OFF) < 0)
1239 panic("Failed to stop LIPP/LEPP!\n");
1240
1241 priv->partly_opened = 0;
1242}
1243
1244
1245/*
1246 * Disable ingress interrupts for the given device on the current cpu.
1247 */
1248static void tile_net_disable_intr(void *dev_ptr)
1249{
1250 struct net_device *dev = (struct net_device *)dev_ptr;
1251 struct tile_net_priv *priv = netdev_priv(dev);
1252 int my_cpu = smp_processor_id();
1253 struct tile_net_cpu *info = priv->cpu[my_cpu];
1254
1255 /* Disable hypervisor interrupt. */
1256 disable_percpu_irq(priv->intr_id);
1257
1258 /* Disable NAPI if needed. */
1259 if (info != NULL && info->napi_enabled) {
1260 napi_disable(&info->napi);
1261 info->napi_enabled = false;
1262 }
1263}
1264
1265
1266/*
1267 * Enable ingress interrupts for the given device on the current cpu.
1268 */
1269static void tile_net_enable_intr(void *dev_ptr)
1270{
1271 struct net_device *dev = (struct net_device *)dev_ptr;
1272 struct tile_net_priv *priv = netdev_priv(dev);
1273 int my_cpu = smp_processor_id();
1274 struct tile_net_cpu *info = priv->cpu[my_cpu];
1275
1276 /* Enable hypervisor interrupt. */
1277 enable_percpu_irq(priv->intr_id);
1278
1279 /* Enable NAPI. */
1280 napi_enable(&info->napi);
1281 info->napi_enabled = true;
1282}
1283
1284
1285/*
1286 * tile_net_open_inner does most of the work of bringing up the interface.
1287 * It's called from tile_net_open(), and also from tile_net_retry_open().
1288 * The return value is 0 if the interface was brought up, < 0 if
1289 * tile_net_open() should return the return value as an error, and > 0 if
1290 * tile_net_open() should return success and schedule a work item to
1291 * periodically retry the bringup.
1292 */
1293static int tile_net_open_inner(struct net_device *dev)
1294{
1295 struct tile_net_priv *priv = netdev_priv(dev);
1296 int my_cpu = smp_processor_id();
1297 struct tile_net_cpu *info;
1298 struct tile_netio_queue *queue;
1299 unsigned int irq;
1300 int i;
1301
1302 /*
1303 * First try to register just on the local CPU, and handle any
1304 * semi-expected "link down" failure specially. Note that we
1305 * do NOT call "tile_net_stop_aux()", unlike below.
1306 */
1307 tile_net_register(dev);
1308 info = priv->cpu[my_cpu];
1309 if (!info->registered) {
1310 if (info->link_down)
1311 return 1;
1312 return -EAGAIN;
1313 }
1314
1315 /*
1316 * Now register everywhere else. If any registration fails,
1317 * even for "link down" (which might not be possible), we
1318 * clean up using "tile_net_stop_aux()".
1319 */
1320 smp_call_function(tile_net_register, (void *)dev, 1);
1321 for_each_online_cpu(i) {
1322 if (!priv->cpu[i]->registered) {
1323 tile_net_stop_aux(dev);
1324 return -EAGAIN;
1325 }
1326 }
1327
1328 queue = &info->queue;
1329
1330 /*
1331 * Set the device intr bit mask.
1332 * The tile_net_register above sets per tile __intr_id.
1333 */
1334 priv->intr_id = queue->__system_part->__intr_id;
1335 BUG_ON(!priv->intr_id);
1336
1337 /*
1338 * Register the device interrupt handler.
1339 * The __ffs() function returns the index into the interrupt handler
1340 * table from the interrupt bit mask which should have one bit
1341 * and one bit only set.
1342 */
1343 irq = __ffs(priv->intr_id);
1344 tile_irq_activate(irq, TILE_IRQ_PERCPU);
1345 BUG_ON(request_irq(irq, tile_net_handle_ingress_interrupt,
1346 0, dev->name, (void *)dev) != 0);
1347
1348 /* ISSUE: How could "priv->fully_opened" ever be "true" here? */
1349
1350 if (!priv->fully_opened) {
1351
1352 int dummy = 0;
1353
1354 /* Allocate initial buffers. */
1355
1356 int max_buffers =
1357 priv->network_cpus_count * priv->network_cpus_credits;
1358
1359 info->num_needed_small_buffers =
1360 min(LIPP_SMALL_BUFFERS, max_buffers);
1361
1362 info->num_needed_large_buffers =
1363 min(LIPP_LARGE_BUFFERS, max_buffers);
1364
1365 tile_net_provide_needed_buffers(info);
1366
1367 if (info->num_needed_small_buffers != 0 ||
1368 info->num_needed_large_buffers != 0)
1369 panic("Insufficient memory for buffer stack!");
1370
1371 /* Start LIPP/LEPP and activate "ingress" at the shim. */
1372 if (hv_dev_pwrite(priv->hv_devhdl, 0, (HV_VirtAddr)&dummy,
1373 sizeof(dummy), NETIO_IPP_INPUT_INIT_OFF) < 0)
1374 panic("Failed to activate the LIPP Shim!\n");
1375
1376 priv->fully_opened = 1;
1377 }
1378
1379 /* On each tile, enable the hypervisor to trigger interrupts. */
1380 /* ISSUE: Do this before starting LIPP/LEPP? */
1381 on_each_cpu(tile_net_enable_intr, (void *)dev, 1);
1382
1383 /* Start our transmit queue. */
1384 netif_start_queue(dev);
1385
1386 return 0;
1387}
1388
1389
1390/*
1391 * Called periodically to retry bringing up the NetIO interface,
1392 * if it doesn't come up cleanly during tile_net_open().
1393 */
1394static void tile_net_open_retry(struct work_struct *w)
1395{
1396 struct delayed_work *dw =
1397 container_of(w, struct delayed_work, work);
1398
1399 struct tile_net_priv *priv =
1400 container_of(dw, struct tile_net_priv, retry_work);
1401
1402 /*
1403 * Try to bring the NetIO interface up. If it fails, reschedule
1404 * ourselves to try again later; otherwise, tell Linux we now have
1405 * a working link. ISSUE: What if the return value is negative?
1406 */
1407 if (tile_net_open_inner(priv->dev))
1408 schedule_delayed_work_on(singlethread_cpu, &priv->retry_work,
1409 TILE_NET_RETRY_INTERVAL);
1410 else
1411 netif_carrier_on(priv->dev);
1412}
1413
1414
1415/*
1416 * Called when a network interface is made active.
1417 *
1418 * Returns 0 on success, negative value on failure.
1419 *
1420 * The open entry point is called when a network interface is made
1421 * active by the system (IFF_UP). At this point all resources needed
1422 * for transmit and receive operations are allocated, the interrupt
1423 * handler is registered with the OS, the watchdog timer is started,
1424 * and the stack is notified that the interface is ready.
1425 *
1426 * If the actual link is not available yet, then we tell Linux that
1427 * we have no carrier, and we keep checking until the link comes up.
1428 */
1429static int tile_net_open(struct net_device *dev)
1430{
1431 int ret = 0;
1432 struct tile_net_priv *priv = netdev_priv(dev);
1433
1434 /*
1435 * We rely on priv->partly_opened to tell us if this is the
1436 * first time this interface is being brought up. If it is
1437 * set, the IPP was already initialized and should not be
1438 * initialized again.
1439 */
1440 if (!priv->partly_opened) {
1441
1442 int count;
1443 int credits;
1444
1445 /* Initialize LIPP/LEPP, and start the Shim. */
1446 ret = tile_net_open_aux(dev);
1447 if (ret < 0) {
1448 pr_err("tile_net_open_aux failed: %d\n", ret);
1449 return ret;
1450 }
1451
1452 /* Analyze the network cpus. */
1453
1454 if (network_cpus_used)
1455 cpumask_copy(&priv->network_cpus_map,
1456 &network_cpus_map);
1457 else
1458 cpumask_copy(&priv->network_cpus_map, cpu_online_mask);
1459
1460
1461 count = cpumask_weight(&priv->network_cpus_map);
1462
1463 /* Limit credits to available buffers, and apply min. */
1464 credits = max(16, (LIPP_LARGE_BUFFERS / count) & ~1);
1465
1466 /* Apply "GBE" max limit. */
1467 /* ISSUE: Use higher limit for XGBE? */
1468 credits = min(NETIO_MAX_RECEIVE_PKTS, credits);
1469
1470 priv->network_cpus_count = count;
1471 priv->network_cpus_credits = credits;
1472
1473#ifdef TILE_NET_DEBUG
1474 pr_info("Using %d network cpus, with %d credits each\n",
1475 priv->network_cpus_count, priv->network_cpus_credits);
1476#endif
1477
1478 priv->partly_opened = 1;
1479 }
1480
1481 /*
1482 * Attempt to bring up the link.
1483 */
1484 ret = tile_net_open_inner(dev);
1485 if (ret <= 0) {
1486 if (ret == 0)
1487 netif_carrier_on(dev);
1488 return ret;
1489 }
1490
1491 /*
1492 * We were unable to bring up the NetIO interface, but we want to
1493 * try again in a little bit. Tell Linux that we have no carrier
1494 * so it doesn't try to use the interface before the link comes up
1495 * and then remember to try again later.
1496 */
1497 netif_carrier_off(dev);
1498 schedule_delayed_work_on(singlethread_cpu, &priv->retry_work,
1499 TILE_NET_RETRY_INTERVAL);
1500
1501 return 0;
1502}
1503
1504
1505/*
1506 * Disables a network interface.
1507 *
1508 * Returns 0, this is not allowed to fail.
1509 *
1510 * The close entry point is called when an interface is de-activated
1511 * by the OS. The hardware is still under the drivers control, but
1512 * needs to be disabled. A global MAC reset is issued to stop the
1513 * hardware, and all transmit and receive resources are freed.
1514 *
1515 * ISSUE: Can this can be called while "tile_net_poll()" is running?
1516 */
1517static int tile_net_stop(struct net_device *dev)
1518{
1519 struct tile_net_priv *priv = netdev_priv(dev);
1520
1521 bool pending = true;
1522
1523 PDEBUG("tile_net_stop()\n");
1524
1525 /* ISSUE: Only needed if not yet fully open. */
1526 cancel_delayed_work_sync(&priv->retry_work);
1527
1528 /* Can't transmit any more. */
1529 netif_stop_queue(dev);
1530
1531 /*
1532 * Disable hypervisor interrupts on each tile.
1533 */
1534 on_each_cpu(tile_net_disable_intr, (void *)dev, 1);
1535
1536 /*
1537 * Unregister the interrupt handler.
1538 * The __ffs() function returns the index into the interrupt handler
1539 * table from the interrupt bit mask which should have one bit
1540 * and one bit only set.
1541 */
1542 if (priv->intr_id)
1543 free_irq(__ffs(priv->intr_id), dev);
1544
1545 /*
1546 * Drain all the LIPP buffers.
1547 */
1548
1549 while (true) {
1550 int buffer;
1551
1552 /* NOTE: This should never fail. */
1553 if (hv_dev_pread(priv->hv_devhdl, 0, (HV_VirtAddr)&buffer,
1554 sizeof(buffer), NETIO_IPP_DRAIN_OFF) < 0)
1555 break;
1556
1557 /* Stop when done. */
1558 if (buffer == 0)
1559 break;
1560
1561 {
1562 /* Convert "linux_buffer_t" to "va". */
1563 void *va = __va((phys_addr_t)(buffer >> 1) << 7);
1564
1565 /* Acquire the associated "skb". */
1566 struct sk_buff **skb_ptr = va - sizeof(*skb_ptr);
1567 struct sk_buff *skb = *skb_ptr;
1568
1569 kfree_skb(skb);
1570 }
1571 }
1572
1573 /* Stop LIPP/LEPP. */
1574 tile_net_stop_aux(dev);
1575
1576
1577 priv->fully_opened = 0;
1578
1579
1580 /*
1581 * XXX: ISSUE: It appears that, in practice anyway, by the
1582 * time we get here, there are no pending completions.
1583 */
1584 while (pending) {
1585
1586 struct sk_buff *olds[32];
1587 unsigned int wanted = 32;
1588 unsigned int i, nolds = 0;
1589
1590 nolds = tile_net_lepp_grab_comps(dev, olds,
1591 wanted, &pending);
1592
1593 /* ISSUE: We have never actually seen this debug spew. */
1594 if (nolds != 0)
1595 pr_info("During tile_net_stop(), grabbed %d comps.\n",
1596 nolds);
1597
1598 for (i = 0; i < nolds; i++)
1599 kfree_skb(olds[i]);
1600 }
1601
1602
1603 /* Wipe the EPP queue. */
1604 memset(priv->epp_queue, 0, sizeof(lepp_queue_t));
1605
1606 /* Evict the EPP queue. */
1607 finv_buffer(priv->epp_queue, PAGE_SIZE);
1608
1609 return 0;
1610}
1611
1612
1613/*
1614 * Prepare the "frags" info for the resulting LEPP command.
1615 *
1616 * If needed, flush the memory used by the frags.
1617 */
1618static unsigned int tile_net_tx_frags(lepp_frag_t *frags,
1619 struct sk_buff *skb,
1620 void *b_data, unsigned int b_len)
1621{
1622 unsigned int i, n = 0;
1623
1624 struct skb_shared_info *sh = skb_shinfo(skb);
1625
1626 phys_addr_t cpa;
1627
1628 if (b_len != 0) {
1629
1630 if (!hash_default)
1631 finv_buffer_remote(b_data, b_len);
1632
1633 cpa = __pa(b_data);
1634 frags[n].cpa_lo = cpa;
1635 frags[n].cpa_hi = cpa >> 32;
1636 frags[n].length = b_len;
1637 frags[n].hash_for_home = hash_default;
1638 n++;
1639 }
1640
1641 for (i = 0; i < sh->nr_frags; i++) {
1642
1643 skb_frag_t *f = &sh->frags[i];
1644 unsigned long pfn = page_to_pfn(f->page);
1645
1646 /* FIXME: Compute "hash_for_home" properly. */
1647 /* ISSUE: The hypervisor checks CHIP_HAS_REV1_DMA_PACKETS(). */
1648 int hash_for_home = hash_default;
1649
1650 /* FIXME: Hmmm. */
1651 if (!hash_default) {
1652 void *va = pfn_to_kaddr(pfn) + f->page_offset;
1653 BUG_ON(PageHighMem(f->page));
1654 finv_buffer_remote(va, f->size);
1655 }
1656
1657 cpa = ((phys_addr_t)pfn << PAGE_SHIFT) + f->page_offset;
1658 frags[n].cpa_lo = cpa;
1659 frags[n].cpa_hi = cpa >> 32;
1660 frags[n].length = f->size;
1661 frags[n].hash_for_home = hash_for_home;
1662 n++;
1663 }
1664
1665 return n;
1666}
1667
1668
1669/*
1670 * This function takes "skb", consisting of a header template and a
1671 * payload, and hands it to LEPP, to emit as one or more segments,
1672 * each consisting of a possibly modified header, plus a piece of the
1673 * payload, via a process known as "tcp segmentation offload".
1674 *
1675 * Usually, "data" will contain the header template, of size "sh_len",
1676 * and "sh->frags" will contain "skb->data_len" bytes of payload, and
1677 * there will be "sh->gso_segs" segments.
1678 *
1679 * Sometimes, if "sendfile()" requires copying, we will be called with
1680 * "data" containing the header and payload, with "frags" being empty.
1681 *
1682 * In theory, "sh->nr_frags" could be 3, but in practice, it seems
1683 * that this will never actually happen.
1684 *
1685 * See "emulate_large_send_offload()" for some reference code, which
1686 * does not handle checksumming.
1687 *
1688 * ISSUE: How do we make sure that high memory DMA does not migrate?
1689 */
1690static int tile_net_tx_tso(struct sk_buff *skb, struct net_device *dev)
1691{
1692 struct tile_net_priv *priv = netdev_priv(dev);
1693 int my_cpu = smp_processor_id();
1694 struct tile_net_cpu *info = priv->cpu[my_cpu];
1695 struct tile_net_stats_t *stats = &info->stats;
1696
1697 struct skb_shared_info *sh = skb_shinfo(skb);
1698
1699 unsigned char *data = skb->data;
1700
1701 /* The ip header follows the ethernet header. */
1702 struct iphdr *ih = ip_hdr(skb);
1703 unsigned int ih_len = ih->ihl * 4;
1704
1705 /* Note that "nh == ih", by definition. */
1706 unsigned char *nh = skb_network_header(skb);
1707 unsigned int eh_len = nh - data;
1708
1709 /* The tcp header follows the ip header. */
1710 struct tcphdr *th = (struct tcphdr *)(nh + ih_len);
1711 unsigned int th_len = th->doff * 4;
1712
1713 /* The total number of header bytes. */
1714 /* NOTE: This may be less than skb_headlen(skb). */
1715 unsigned int sh_len = eh_len + ih_len + th_len;
1716
1717 /* The number of payload bytes at "skb->data + sh_len". */
1718 /* This is non-zero for sendfile() without HIGHDMA. */
1719 unsigned int b_len = skb_headlen(skb) - sh_len;
1720
1721 /* The total number of payload bytes. */
1722 unsigned int d_len = b_len + skb->data_len;
1723
1724 /* The maximum payload size. */
1725 unsigned int p_len = sh->gso_size;
1726
1727 /* The total number of segments. */
1728 unsigned int num_segs = sh->gso_segs;
1729
1730 /* The temporary copy of the command. */
1731 u32 cmd_body[(LEPP_MAX_CMD_SIZE + 3) / 4];
1732 lepp_tso_cmd_t *cmd = (lepp_tso_cmd_t *)cmd_body;
1733
1734 /* Analyze the "frags". */
1735 unsigned int num_frags =
1736 tile_net_tx_frags(cmd->frags, skb, data + sh_len, b_len);
1737
1738 /* The size of the command, including frags and header. */
1739 size_t cmd_size = LEPP_TSO_CMD_SIZE(num_frags, sh_len);
1740
1741 /* The command header. */
1742 lepp_tso_cmd_t cmd_init = {
1743 .tso = true,
1744 .header_size = sh_len,
1745 .ip_offset = eh_len,
1746 .tcp_offset = eh_len + ih_len,
1747 .payload_size = p_len,
1748 .num_frags = num_frags,
1749 };
1750
1751 unsigned long irqflags;
1752
1753 lepp_queue_t *eq = priv->epp_queue;
1754
1755 struct sk_buff *olds[4];
1756 unsigned int wanted = 4;
1757 unsigned int i, nolds = 0;
1758
1759 unsigned int cmd_head, cmd_tail, cmd_next;
1760 unsigned int comp_tail;
1761
1762 unsigned int free_slots;
1763
1764
1765 /* Paranoia. */
1766 BUG_ON(skb->protocol != htons(ETH_P_IP));
1767 BUG_ON(ih->protocol != IPPROTO_TCP);
1768 BUG_ON(skb->ip_summed != CHECKSUM_PARTIAL);
1769 BUG_ON(num_frags > LEPP_MAX_FRAGS);
1770 /*--BUG_ON(num_segs != (d_len + (p_len - 1)) / p_len); */
1771 BUG_ON(num_segs <= 1);
1772
1773
1774 /* Finish preparing the command. */
1775
1776 /* Copy the command header. */
1777 *cmd = cmd_init;
1778
1779 /* Copy the "header". */
1780 memcpy(&cmd->frags[num_frags], data, sh_len);
1781
1782
1783 /* Prefetch and wait, to minimize time spent holding the spinlock. */
1784 prefetch_L1(&eq->comp_tail);
1785 prefetch_L1(&eq->cmd_tail);
1786 mb();
1787
1788
1789 /* Enqueue the command. */
1790
1791 spin_lock_irqsave(&priv->cmd_lock, irqflags);
1792
1793 /*
1794 * Handle completions if needed to make room.
1795 * HACK: Spin until there is sufficient room.
1796 */
1797 free_slots = lepp_num_free_comp_slots(eq);
1798 if (free_slots < 1) {
1799spin:
1800 nolds += tile_net_lepp_grab_comps(dev, olds + nolds,
1801 wanted - nolds, NULL);
1802 if (lepp_num_free_comp_slots(eq) < 1)
1803 goto spin;
1804 }
1805
1806 cmd_head = eq->cmd_head;
1807 cmd_tail = eq->cmd_tail;
1808
1809 /* NOTE: The "gotos" below are untested. */
1810
1811 /* Prepare to advance, detecting full queue. */
1812 cmd_next = cmd_tail + cmd_size;
1813 if (cmd_tail < cmd_head && cmd_next >= cmd_head)
1814 goto spin;
1815 if (cmd_next > LEPP_CMD_LIMIT) {
1816 cmd_next = 0;
1817 if (cmd_next == cmd_head)
1818 goto spin;
1819 }
1820
1821 /* Copy the command. */
1822 memcpy(&eq->cmds[cmd_tail], cmd, cmd_size);
1823
1824 /* Advance. */
1825 cmd_tail = cmd_next;
1826
1827 /* Record "skb" for eventual freeing. */
1828 comp_tail = eq->comp_tail;
1829 eq->comps[comp_tail] = skb;
1830 LEPP_QINC(comp_tail);
1831 eq->comp_tail = comp_tail;
1832
1833 /* Flush before allowing LEPP to handle the command. */
1834 __insn_mf();
1835
1836 eq->cmd_tail = cmd_tail;
1837
1838 spin_unlock_irqrestore(&priv->cmd_lock, irqflags);
1839
1840 if (nolds == 0)
1841 nolds = tile_net_lepp_grab_comps(dev, olds, wanted, NULL);
1842
1843 /* Handle completions. */
1844 for (i = 0; i < nolds; i++)
1845 kfree_skb(olds[i]);
1846
1847 /* Update stats. */
1848 stats->tx_packets += num_segs;
1849 stats->tx_bytes += (num_segs * sh_len) + d_len;
1850
1851 /* Make sure the egress timer is scheduled. */
1852 tile_net_schedule_egress_timer(info);
1853
1854 return NETDEV_TX_OK;
1855}
1856
1857
1858/*
1859 * Transmit a packet (called by the kernel via "hard_start_xmit" hook).
1860 */
1861static int tile_net_tx(struct sk_buff *skb, struct net_device *dev)
1862{
1863 struct tile_net_priv *priv = netdev_priv(dev);
1864 int my_cpu = smp_processor_id();
1865 struct tile_net_cpu *info = priv->cpu[my_cpu];
1866 struct tile_net_stats_t *stats = &info->stats;
1867
1868 unsigned long irqflags;
1869
1870 struct skb_shared_info *sh = skb_shinfo(skb);
1871
1872 unsigned int len = skb->len;
1873 unsigned char *data = skb->data;
1874
1875 unsigned int csum_start = skb->csum_start - skb_headroom(skb);
1876
1877 lepp_frag_t frags[LEPP_MAX_FRAGS];
1878
1879 unsigned int num_frags;
1880
1881 lepp_queue_t *eq = priv->epp_queue;
1882
1883 struct sk_buff *olds[4];
1884 unsigned int wanted = 4;
1885 unsigned int i, nolds = 0;
1886
1887 unsigned int cmd_size = sizeof(lepp_cmd_t);
1888
1889 unsigned int cmd_head, cmd_tail, cmd_next;
1890 unsigned int comp_tail;
1891
1892 lepp_cmd_t cmds[LEPP_MAX_FRAGS];
1893
1894 unsigned int free_slots;
1895
1896
1897 /*
1898 * This is paranoia, since we think that if the link doesn't come
1899 * up, telling Linux we have no carrier will keep it from trying
1900 * to transmit. If it does, though, we can't execute this routine,
1901 * since data structures we depend on aren't set up yet.
1902 */
1903 if (!info->registered)
1904 return NETDEV_TX_BUSY;
1905
1906
1907 /* Save the timestamp. */
1908 dev->trans_start = jiffies;
1909
1910
1911#ifdef TILE_NET_PARANOIA
1912#if CHIP_HAS_CBOX_HOME_MAP()
1913 if (hash_default) {
1914 HV_PTE pte = *virt_to_pte(current->mm, (unsigned long)data);
1915 if (hv_pte_get_mode(pte) != HV_PTE_MODE_CACHE_HASH_L3)
1916 panic("Non-coherent egress buffer!");
1917 }
1918#endif
1919#endif
1920
1921
1922#ifdef TILE_NET_DUMP_PACKETS
1923 /* ISSUE: Does not dump the "frags". */
1924 dump_packet(data, skb_headlen(skb), "tx");
1925#endif /* TILE_NET_DUMP_PACKETS */
1926
1927
1928 if (sh->gso_size != 0)
1929 return tile_net_tx_tso(skb, dev);
1930
1931
1932 /* Prepare the commands. */
1933
1934 num_frags = tile_net_tx_frags(frags, skb, data, skb_headlen(skb));
1935
1936 for (i = 0; i < num_frags; i++) {
1937
1938 bool final = (i == num_frags - 1);
1939
1940 lepp_cmd_t cmd = {
1941 .cpa_lo = frags[i].cpa_lo,
1942 .cpa_hi = frags[i].cpa_hi,
1943 .length = frags[i].length,
1944 .hash_for_home = frags[i].hash_for_home,
1945 .send_completion = final,
1946 .end_of_packet = final
1947 };
1948
1949 if (i == 0 && skb->ip_summed == CHECKSUM_PARTIAL) {
1950 cmd.compute_checksum = 1;
1951 cmd.checksum_data.bits.start_byte = csum_start;
1952 cmd.checksum_data.bits.count = len - csum_start;
1953 cmd.checksum_data.bits.destination_byte =
1954 csum_start + skb->csum_offset;
1955 }
1956
1957 cmds[i] = cmd;
1958 }
1959
1960
1961 /* Prefetch and wait, to minimize time spent holding the spinlock. */
1962 prefetch_L1(&eq->comp_tail);
1963 prefetch_L1(&eq->cmd_tail);
1964 mb();
1965
1966
1967 /* Enqueue the commands. */
1968
1969 spin_lock_irqsave(&priv->cmd_lock, irqflags);
1970
1971 /*
1972 * Handle completions if needed to make room.
1973 * HACK: Spin until there is sufficient room.
1974 */
1975 free_slots = lepp_num_free_comp_slots(eq);
1976 if (free_slots < 1) {
1977spin:
1978 nolds += tile_net_lepp_grab_comps(dev, olds + nolds,
1979 wanted - nolds, NULL);
1980 if (lepp_num_free_comp_slots(eq) < 1)
1981 goto spin;
1982 }
1983
1984 cmd_head = eq->cmd_head;
1985 cmd_tail = eq->cmd_tail;
1986
1987 /* NOTE: The "gotos" below are untested. */
1988
1989 /* Copy the commands, or fail. */
1990 for (i = 0; i < num_frags; i++) {
1991
1992 /* Prepare to advance, detecting full queue. */
1993 cmd_next = cmd_tail + cmd_size;
1994 if (cmd_tail < cmd_head && cmd_next >= cmd_head)
1995 goto spin;
1996 if (cmd_next > LEPP_CMD_LIMIT) {
1997 cmd_next = 0;
1998 if (cmd_next == cmd_head)
1999 goto spin;
2000 }
2001
2002 /* Copy the command. */
2003 *(lepp_cmd_t *)&eq->cmds[cmd_tail] = cmds[i];
2004
2005 /* Advance. */
2006 cmd_tail = cmd_next;
2007 }
2008
2009 /* Record "skb" for eventual freeing. */
2010 comp_tail = eq->comp_tail;
2011 eq->comps[comp_tail] = skb;
2012 LEPP_QINC(comp_tail);
2013 eq->comp_tail = comp_tail;
2014
2015 /* Flush before allowing LEPP to handle the command. */
2016 __insn_mf();
2017
2018 eq->cmd_tail = cmd_tail;
2019
2020 spin_unlock_irqrestore(&priv->cmd_lock, irqflags);
2021
2022 if (nolds == 0)
2023 nolds = tile_net_lepp_grab_comps(dev, olds, wanted, NULL);
2024
2025 /* Handle completions. */
2026 for (i = 0; i < nolds; i++)
2027 kfree_skb(olds[i]);
2028
2029 /* HACK: Track "expanded" size for short packets (e.g. 42 < 60). */
2030 stats->tx_packets++;
2031 stats->tx_bytes += ((len >= ETH_ZLEN) ? len : ETH_ZLEN);
2032
2033 /* Make sure the egress timer is scheduled. */
2034 tile_net_schedule_egress_timer(info);
2035
2036 return NETDEV_TX_OK;
2037}
2038
2039
2040/*
2041 * Deal with a transmit timeout.
2042 */
2043static void tile_net_tx_timeout(struct net_device *dev)
2044{
2045 PDEBUG("tile_net_tx_timeout()\n");
2046 PDEBUG("Transmit timeout at %ld, latency %ld\n", jiffies,
2047 jiffies - dev->trans_start);
2048
2049 /* XXX: ISSUE: This doesn't seem useful for us. */
2050 netif_wake_queue(dev);
2051}
2052
2053
2054/*
2055 * Ioctl commands.
2056 */
2057static int tile_net_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
2058{
2059 return -EOPNOTSUPP;
2060}
2061
2062
2063/*
2064 * Get System Network Statistics.
2065 *
2066 * Returns the address of the device statistics structure.
2067 */
2068static struct net_device_stats *tile_net_get_stats(struct net_device *dev)
2069{
2070 struct tile_net_priv *priv = netdev_priv(dev);
2071 u32 rx_packets = 0;
2072 u32 tx_packets = 0;
2073 u32 rx_bytes = 0;
2074 u32 tx_bytes = 0;
2075 int i;
2076
2077 for_each_online_cpu(i) {
2078 if (priv->cpu[i]) {
2079 rx_packets += priv->cpu[i]->stats.rx_packets;
2080 rx_bytes += priv->cpu[i]->stats.rx_bytes;
2081 tx_packets += priv->cpu[i]->stats.tx_packets;
2082 tx_bytes += priv->cpu[i]->stats.tx_bytes;
2083 }
2084 }
2085
2086 priv->stats.rx_packets = rx_packets;
2087 priv->stats.rx_bytes = rx_bytes;
2088 priv->stats.tx_packets = tx_packets;
2089 priv->stats.tx_bytes = tx_bytes;
2090
2091 return &priv->stats;
2092}
2093
2094
2095/*
2096 * Change the "mtu".
2097 *
2098 * The "change_mtu" method is usually not needed.
2099 * If you need it, it must be like this.
2100 */
2101static int tile_net_change_mtu(struct net_device *dev, int new_mtu)
2102{
2103 PDEBUG("tile_net_change_mtu()\n");
2104
2105 /* Check ranges. */
2106 if ((new_mtu < 68) || (new_mtu > 1500))
2107 return -EINVAL;
2108
2109 /* Accept the value. */
2110 dev->mtu = new_mtu;
2111
2112 return 0;
2113}
2114
2115
2116/*
2117 * Change the Ethernet Address of the NIC.
2118 *
2119 * The hypervisor driver does not support changing MAC address. However,
2120 * the IPP does not do anything with the MAC address, so the address which
2121 * gets used on outgoing packets, and which is accepted on incoming packets,
2122 * is completely up to the NetIO program or kernel driver which is actually
2123 * handling them.
2124 *
2125 * Returns 0 on success, negative on failure.
2126 */
2127static int tile_net_set_mac_address(struct net_device *dev, void *p)
2128{
2129 struct sockaddr *addr = p;
2130
2131 if (!is_valid_ether_addr(addr->sa_data))
2132 return -EINVAL;
2133
2134 /* ISSUE: Note that "dev_addr" is now a pointer. */
2135 memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
2136
2137 return 0;
2138}
2139
2140
2141/*
2142 * Obtain the MAC address from the hypervisor.
2143 * This must be done before opening the device.
2144 */
2145static int tile_net_get_mac(struct net_device *dev)
2146{
2147 struct tile_net_priv *priv = netdev_priv(dev);
2148
2149 char hv_dev_name[32];
2150 int len;
2151
2152 __netio_getset_offset_t offset = { .word = NETIO_IPP_PARAM_OFF };
2153
2154 int ret;
2155
2156 /* For example, "xgbe0". */
2157 strcpy(hv_dev_name, dev->name);
2158 len = strlen(hv_dev_name);
2159
2160 /* For example, "xgbe/0". */
2161 hv_dev_name[len] = hv_dev_name[len - 1];
2162 hv_dev_name[len - 1] = '/';
2163 len++;
2164
2165 /* For example, "xgbe/0/native_hash". */
2166 strcpy(hv_dev_name + len, hash_default ? "/native_hash" : "/native");
2167
2168 /* Get the hypervisor handle for this device. */
2169 priv->hv_devhdl = hv_dev_open((HV_VirtAddr)hv_dev_name, 0);
2170 PDEBUG("hv_dev_open(%s) returned %d %p\n",
2171 hv_dev_name, priv->hv_devhdl, &priv->hv_devhdl);
2172 if (priv->hv_devhdl < 0) {
2173 if (priv->hv_devhdl == HV_ENODEV)
2174 printk(KERN_DEBUG "Ignoring unconfigured device %s\n",
2175 hv_dev_name);
2176 else
2177 printk(KERN_DEBUG "hv_dev_open(%s) returned %d\n",
2178 hv_dev_name, priv->hv_devhdl);
2179 return -1;
2180 }
2181
2182 /*
2183 * Read the hardware address from the hypervisor.
2184 * ISSUE: Note that "dev_addr" is now a pointer.
2185 */
2186 offset.bits.class = NETIO_PARAM;
2187 offset.bits.addr = NETIO_PARAM_MAC;
2188 ret = hv_dev_pread(priv->hv_devhdl, 0,
2189 (HV_VirtAddr)dev->dev_addr, dev->addr_len,
2190 offset.word);
2191 PDEBUG("hv_dev_pread(NETIO_PARAM_MAC) returned %d\n", ret);
2192 if (ret <= 0) {
2193 printk(KERN_DEBUG "hv_dev_pread(NETIO_PARAM_MAC) %s failed\n",
2194 dev->name);
2195 /*
2196 * Since the device is configured by the hypervisor but we
2197 * can't get its MAC address, we are most likely running
2198 * the simulator, so let's generate a random MAC address.
2199 */
2200 random_ether_addr(dev->dev_addr);
2201 }
2202
2203 return 0;
2204}
2205
2206
2207static struct net_device_ops tile_net_ops = {
2208 .ndo_open = tile_net_open,
2209 .ndo_stop = tile_net_stop,
2210 .ndo_start_xmit = tile_net_tx,
2211 .ndo_do_ioctl = tile_net_ioctl,
2212 .ndo_get_stats = tile_net_get_stats,
2213 .ndo_change_mtu = tile_net_change_mtu,
2214 .ndo_tx_timeout = tile_net_tx_timeout,
2215 .ndo_set_mac_address = tile_net_set_mac_address
2216};
2217
2218
2219/*
2220 * The setup function.
2221 *
2222 * This uses ether_setup() to assign various fields in dev, including
2223 * setting IFF_BROADCAST and IFF_MULTICAST, then sets some extra fields.
2224 */
2225static void tile_net_setup(struct net_device *dev)
2226{
2227 PDEBUG("tile_net_setup()\n");
2228
2229 ether_setup(dev);
2230
2231 dev->netdev_ops = &tile_net_ops;
2232
2233 dev->watchdog_timeo = TILE_NET_TIMEOUT;
2234
2235 /* We want lockless xmit. */
2236 dev->features |= NETIF_F_LLTX;
2237
2238 /* We support hardware tx checksums. */
2239 dev->features |= NETIF_F_HW_CSUM;
2240
2241 /* We support scatter/gather. */
2242 dev->features |= NETIF_F_SG;
2243
2244 /* We support TSO. */
2245 dev->features |= NETIF_F_TSO;
2246
2247#ifdef TILE_NET_GSO
2248 /* We support GSO. */
2249 dev->features |= NETIF_F_GSO;
2250#endif
2251
2252 if (hash_default)
2253 dev->features |= NETIF_F_HIGHDMA;
2254
2255 /* ISSUE: We should support NETIF_F_UFO. */
2256
2257 dev->tx_queue_len = TILE_NET_TX_QUEUE_LEN;
2258
2259 dev->mtu = TILE_NET_MTU;
2260}
2261
2262
2263/*
2264 * Allocate the device structure, register the device, and obtain the
2265 * MAC address from the hypervisor.
2266 */
2267static struct net_device *tile_net_dev_init(const char *name)
2268{
2269 int ret;
2270 struct net_device *dev;
2271 struct tile_net_priv *priv;
2272 struct page *page;
2273
2274 /*
2275 * Allocate the device structure. This allocates "priv", calls
2276 * tile_net_setup(), and saves "name". Normally, "name" is a
2277 * template, instantiated by register_netdev(), but not for us.
2278 */
2279 dev = alloc_netdev(sizeof(*priv), name, tile_net_setup);
2280 if (!dev) {
2281 pr_err("alloc_netdev(%s) failed\n", name);
2282 return NULL;
2283 }
2284
2285 priv = netdev_priv(dev);
2286
2287 /* Initialize "priv". */
2288
2289 memset(priv, 0, sizeof(*priv));
2290
2291 /* Save "dev" for "tile_net_open_retry()". */
2292 priv->dev = dev;
2293
2294 INIT_DELAYED_WORK(&priv->retry_work, tile_net_open_retry);
2295
2296 spin_lock_init(&priv->cmd_lock);
2297 spin_lock_init(&priv->comp_lock);
2298
2299 /* Allocate "epp_queue". */
2300 BUG_ON(get_order(sizeof(lepp_queue_t)) != 0);
2301 page = alloc_pages(GFP_KERNEL | __GFP_ZERO, 0);
2302 if (!page) {
2303 free_netdev(dev);
2304 return NULL;
2305 }
2306 priv->epp_queue = page_address(page);
2307
2308 /* Register the network device. */
2309 ret = register_netdev(dev);
2310 if (ret) {
2311 pr_err("register_netdev %s failed %d\n", dev->name, ret);
2312 free_page((unsigned long)priv->epp_queue);
2313 free_netdev(dev);
2314 return NULL;
2315 }
2316
2317 /* Get the MAC address. */
2318 ret = tile_net_get_mac(dev);
2319 if (ret < 0) {
2320 unregister_netdev(dev);
2321 free_page((unsigned long)priv->epp_queue);
2322 free_netdev(dev);
2323 return NULL;
2324 }
2325
2326 return dev;
2327}
2328
2329
2330/*
2331 * Module cleanup.
2332 */
2333static void tile_net_cleanup(void)
2334{
2335 int i;
2336
2337 for (i = 0; i < TILE_NET_DEVS; i++) {
2338 if (tile_net_devs[i]) {
2339 struct net_device *dev = tile_net_devs[i];
2340 struct tile_net_priv *priv = netdev_priv(dev);
2341 unregister_netdev(dev);
2342 finv_buffer(priv->epp_queue, PAGE_SIZE);
2343 free_page((unsigned long)priv->epp_queue);
2344 free_netdev(dev);
2345 }
2346 }
2347}
2348
2349
2350/*
2351 * Module initialization.
2352 */
2353static int tile_net_init_module(void)
2354{
2355 pr_info("Tilera IPP Net Driver\n");
2356
2357 tile_net_devs[0] = tile_net_dev_init("xgbe0");
2358 tile_net_devs[1] = tile_net_dev_init("xgbe1");
2359 tile_net_devs[2] = tile_net_dev_init("gbe0");
2360 tile_net_devs[3] = tile_net_dev_init("gbe1");
2361
2362 return 0;
2363}
2364
2365
2366#ifndef MODULE
2367/*
2368 * The "network_cpus" boot argument specifies the cpus that are dedicated
2369 * to handle ingress packets.
2370 *
2371 * The parameter should be in the form "network_cpus=m-n[,x-y]", where
2372 * m, n, x, y are integer numbers that represent the cpus that can be
2373 * neither a dedicated cpu nor a dataplane cpu.
2374 */
2375static int __init network_cpus_setup(char *str)
2376{
2377 int rc = cpulist_parse_crop(str, &network_cpus_map);
2378 if (rc != 0) {
2379 pr_warning("network_cpus=%s: malformed cpu list\n",
2380 str);
2381 } else {
2382
2383 /* Remove dedicated cpus. */
2384 cpumask_and(&network_cpus_map, &network_cpus_map,
2385 cpu_possible_mask);
2386
2387
2388 if (cpumask_empty(&network_cpus_map)) {
2389 pr_warning("Ignoring network_cpus='%s'.\n",
2390 str);
2391 } else {
2392 char buf[1024];
2393 cpulist_scnprintf(buf, sizeof(buf), &network_cpus_map);
2394 pr_info("Linux network CPUs: %s\n", buf);
2395 network_cpus_used = true;
2396 }
2397 }
2398
2399 return 0;
2400}
2401__setup("network_cpus=", network_cpus_setup);
2402#endif
2403
2404
2405module_init(tile_net_init_module);
2406module_exit(tile_net_cleanup);
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
index f01e344cf4bd..98e6fdf34d30 100644
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile
@@ -49,6 +49,7 @@ obj-$(CONFIG_MIPS) += setup-bus.o setup-irq.o
49obj-$(CONFIG_X86_VISWS) += setup-irq.o 49obj-$(CONFIG_X86_VISWS) += setup-irq.o
50obj-$(CONFIG_MN10300) += setup-bus.o 50obj-$(CONFIG_MN10300) += setup-bus.o
51obj-$(CONFIG_MICROBLAZE) += setup-bus.o 51obj-$(CONFIG_MICROBLAZE) += setup-bus.o
52obj-$(CONFIG_TILE) += setup-bus.o setup-irq.o
52 53
53# 54#
54# ACPI Related PCI FW Functions 55# ACPI Related PCI FW Functions
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index f5c63fe9db5c..6f9350cabbd5 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -2136,6 +2136,24 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82865_HB,
2136DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82875_HB, 2136DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82875_HB,
2137 quirk_unhide_mch_dev6); 2137 quirk_unhide_mch_dev6);
2138 2138
2139#ifdef CONFIG_TILE
2140/*
2141 * The Tilera TILEmpower platform needs to set the link speed
2142 * to 2.5GT(Giga-Transfers)/s (Gen 1). The default link speed
2143 * setting is 5GT/s (Gen 2). 0x98 is the Link Control2 PCIe
2144 * capability register of the PEX8624 PCIe switch. The switch
2145 * supports link speed auto negotiation, but falsely sets
2146 * the link speed to 5GT/s.
2147 */
2148static void __devinit quirk_tile_plx_gen1(struct pci_dev *dev)
2149{
2150 if (tile_plx_gen1) {
2151 pci_write_config_dword(dev, 0x98, 0x1);
2152 mdelay(50);
2153 }
2154}
2155DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_PLX, 0x8624, quirk_tile_plx_gen1);
2156#endif /* CONFIG_TILE */
2139 2157
2140#ifdef CONFIG_PCI_MSI 2158#ifdef CONFIG_PCI_MSI
2141/* Some chipsets do not support MSI. We cannot easily rely on setting 2159/* Some chipsets do not support MSI. We cannot easily rely on setting
diff --git a/drivers/s390/cio/qdio_thinint.c b/drivers/s390/cio/qdio_thinint.c
index 752dbee06af5..5d9c66627b6e 100644
--- a/drivers/s390/cio/qdio_thinint.c
+++ b/drivers/s390/cio/qdio_thinint.c
@@ -292,8 +292,8 @@ void qdio_shutdown_thinint(struct qdio_irq *irq_ptr)
292 return; 292 return;
293 293
294 /* reset adapter interrupt indicators */ 294 /* reset adapter interrupt indicators */
295 put_indicator(irq_ptr->dsci);
296 set_subchannel_ind(irq_ptr, 1); 295 set_subchannel_ind(irq_ptr, 1);
296 put_indicator(irq_ptr->dsci);
297} 297}
298 298
299void __exit tiqdio_unregister_thinints(void) 299void __exit tiqdio_unregister_thinints(void)
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 7845d1f7d1d9..b50bc4bd5c56 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -91,23 +91,10 @@ static inline int compressed_bio_size(struct btrfs_root *root,
91static struct bio *compressed_bio_alloc(struct block_device *bdev, 91static struct bio *compressed_bio_alloc(struct block_device *bdev,
92 u64 first_byte, gfp_t gfp_flags) 92 u64 first_byte, gfp_t gfp_flags)
93{ 93{
94 struct bio *bio;
95 int nr_vecs; 94 int nr_vecs;
96 95
97 nr_vecs = bio_get_nr_vecs(bdev); 96 nr_vecs = bio_get_nr_vecs(bdev);
98 bio = bio_alloc(gfp_flags, nr_vecs); 97 return btrfs_bio_alloc(bdev, first_byte >> 9, nr_vecs, gfp_flags);
99
100 if (bio == NULL && (current->flags & PF_MEMALLOC)) {
101 while (!bio && (nr_vecs /= 2))
102 bio = bio_alloc(gfp_flags, nr_vecs);
103 }
104
105 if (bio) {
106 bio->bi_size = 0;
107 bio->bi_bdev = bdev;
108 bio->bi_sector = first_byte >> 9;
109 }
110 return bio;
111} 98}
112 99
113static int check_compressed_csum(struct inode *inode, 100static int check_compressed_csum(struct inode *inode,
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 8db9234f6b41..af52f6d7a4d8 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -808,9 +808,9 @@ struct btrfs_block_group_cache {
808 int extents_thresh; 808 int extents_thresh;
809 int free_extents; 809 int free_extents;
810 int total_bitmaps; 810 int total_bitmaps;
811 int ro:1; 811 unsigned int ro:1;
812 int dirty:1; 812 unsigned int dirty:1;
813 int iref:1; 813 unsigned int iref:1;
814 814
815 int disk_cache_state; 815 int disk_cache_state;
816 816
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index fb827d0d7181..c547cca26a26 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -28,6 +28,7 @@
28#include <linux/freezer.h> 28#include <linux/freezer.h>
29#include <linux/crc32c.h> 29#include <linux/crc32c.h>
30#include <linux/slab.h> 30#include <linux/slab.h>
31#include <linux/migrate.h>
31#include "compat.h" 32#include "compat.h"
32#include "ctree.h" 33#include "ctree.h"
33#include "disk-io.h" 34#include "disk-io.h"
@@ -355,6 +356,8 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
355 ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE, 356 ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE,
356 btrfs_header_generation(eb)); 357 btrfs_header_generation(eb));
357 BUG_ON(ret); 358 BUG_ON(ret);
359 WARN_ON(!btrfs_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN));
360
358 found_start = btrfs_header_bytenr(eb); 361 found_start = btrfs_header_bytenr(eb);
359 if (found_start != start) { 362 if (found_start != start) {
360 WARN_ON(1); 363 WARN_ON(1);
@@ -693,6 +696,29 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
693 __btree_submit_bio_done); 696 __btree_submit_bio_done);
694} 697}
695 698
699static int btree_migratepage(struct address_space *mapping,
700 struct page *newpage, struct page *page)
701{
702 /*
703 * we can't safely write a btree page from here,
704 * we haven't done the locking hook
705 */
706 if (PageDirty(page))
707 return -EAGAIN;
708 /*
709 * Buffers may be managed in a filesystem specific way.
710 * We must have no buffers or drop them.
711 */
712 if (page_has_private(page) &&
713 !try_to_release_page(page, GFP_KERNEL))
714 return -EAGAIN;
715#ifdef CONFIG_MIGRATION
716 return migrate_page(mapping, newpage, page);
717#else
718 return -ENOSYS;
719#endif
720}
721
696static int btree_writepage(struct page *page, struct writeback_control *wbc) 722static int btree_writepage(struct page *page, struct writeback_control *wbc)
697{ 723{
698 struct extent_io_tree *tree; 724 struct extent_io_tree *tree;
@@ -707,8 +733,7 @@ static int btree_writepage(struct page *page, struct writeback_control *wbc)
707 } 733 }
708 734
709 redirty_page_for_writepage(wbc, page); 735 redirty_page_for_writepage(wbc, page);
710 eb = btrfs_find_tree_block(root, page_offset(page), 736 eb = btrfs_find_tree_block(root, page_offset(page), PAGE_CACHE_SIZE);
711 PAGE_CACHE_SIZE);
712 WARN_ON(!eb); 737 WARN_ON(!eb);
713 738
714 was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags); 739 was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
@@ -799,6 +824,9 @@ static const struct address_space_operations btree_aops = {
799 .releasepage = btree_releasepage, 824 .releasepage = btree_releasepage,
800 .invalidatepage = btree_invalidatepage, 825 .invalidatepage = btree_invalidatepage,
801 .sync_page = block_sync_page, 826 .sync_page = block_sync_page,
827#ifdef CONFIG_MIGRATION
828 .migratepage = btree_migratepage,
829#endif
802}; 830};
803 831
804int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, 832int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize,
@@ -1538,10 +1566,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1538 GFP_NOFS); 1566 GFP_NOFS);
1539 struct btrfs_root *csum_root = kzalloc(sizeof(struct btrfs_root), 1567 struct btrfs_root *csum_root = kzalloc(sizeof(struct btrfs_root),
1540 GFP_NOFS); 1568 GFP_NOFS);
1541 struct btrfs_root *tree_root = kzalloc(sizeof(struct btrfs_root), 1569 struct btrfs_root *tree_root = btrfs_sb(sb);
1542 GFP_NOFS); 1570 struct btrfs_fs_info *fs_info = tree_root->fs_info;
1543 struct btrfs_fs_info *fs_info = kzalloc(sizeof(*fs_info),
1544 GFP_NOFS);
1545 struct btrfs_root *chunk_root = kzalloc(sizeof(struct btrfs_root), 1571 struct btrfs_root *chunk_root = kzalloc(sizeof(struct btrfs_root),
1546 GFP_NOFS); 1572 GFP_NOFS);
1547 struct btrfs_root *dev_root = kzalloc(sizeof(struct btrfs_root), 1573 struct btrfs_root *dev_root = kzalloc(sizeof(struct btrfs_root),
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index 951ef09b82f4..6f0444473594 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -232,9 +232,85 @@ fail:
232 return ERR_PTR(ret); 232 return ERR_PTR(ret);
233} 233}
234 234
235static int btrfs_get_name(struct dentry *parent, char *name,
236 struct dentry *child)
237{
238 struct inode *inode = child->d_inode;
239 struct inode *dir = parent->d_inode;
240 struct btrfs_path *path;
241 struct btrfs_root *root = BTRFS_I(dir)->root;
242 struct btrfs_inode_ref *iref;
243 struct btrfs_root_ref *rref;
244 struct extent_buffer *leaf;
245 unsigned long name_ptr;
246 struct btrfs_key key;
247 int name_len;
248 int ret;
249
250 if (!dir || !inode)
251 return -EINVAL;
252
253 if (!S_ISDIR(dir->i_mode))
254 return -EINVAL;
255
256 path = btrfs_alloc_path();
257 if (!path)
258 return -ENOMEM;
259 path->leave_spinning = 1;
260
261 if (inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) {
262 key.objectid = BTRFS_I(inode)->root->root_key.objectid;
263 key.type = BTRFS_ROOT_BACKREF_KEY;
264 key.offset = (u64)-1;
265 root = root->fs_info->tree_root;
266 } else {
267 key.objectid = inode->i_ino;
268 key.offset = dir->i_ino;
269 key.type = BTRFS_INODE_REF_KEY;
270 }
271
272 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
273 if (ret < 0) {
274 btrfs_free_path(path);
275 return ret;
276 } else if (ret > 0) {
277 if (inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) {
278 path->slots[0]--;
279 } else {
280 btrfs_free_path(path);
281 return -ENOENT;
282 }
283 }
284 leaf = path->nodes[0];
285
286 if (inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) {
287 rref = btrfs_item_ptr(leaf, path->slots[0],
288 struct btrfs_root_ref);
289 name_ptr = (unsigned long)(rref + 1);
290 name_len = btrfs_root_ref_name_len(leaf, rref);
291 } else {
292 iref = btrfs_item_ptr(leaf, path->slots[0],
293 struct btrfs_inode_ref);
294 name_ptr = (unsigned long)(iref + 1);
295 name_len = btrfs_inode_ref_name_len(leaf, iref);
296 }
297
298 read_extent_buffer(leaf, name, name_ptr, name_len);
299 btrfs_free_path(path);
300
301 /*
302 * have to add the null termination to make sure that reconnect_path
303 * gets the right len for strlen
304 */
305 name[name_len] = '\0';
306
307 return 0;
308}
309
235const struct export_operations btrfs_export_ops = { 310const struct export_operations btrfs_export_ops = {
236 .encode_fh = btrfs_encode_fh, 311 .encode_fh = btrfs_encode_fh,
237 .fh_to_dentry = btrfs_fh_to_dentry, 312 .fh_to_dentry = btrfs_fh_to_dentry,
238 .fh_to_parent = btrfs_fh_to_parent, 313 .fh_to_parent = btrfs_fh_to_parent,
239 .get_parent = btrfs_get_parent, 314 .get_parent = btrfs_get_parent,
315 .get_name = btrfs_get_name,
240}; 316};
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 0c097f3aec41..bcd59c7dfb57 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3412,7 +3412,7 @@ again:
3412 * our reservation. 3412 * our reservation.
3413 */ 3413 */
3414 if (unused <= space_info->total_bytes) { 3414 if (unused <= space_info->total_bytes) {
3415 unused -= space_info->total_bytes; 3415 unused = space_info->total_bytes - unused;
3416 if (unused >= num_bytes) { 3416 if (unused >= num_bytes) {
3417 if (!reserved) 3417 if (!reserved)
3418 space_info->bytes_reserved += orig_bytes; 3418 space_info->bytes_reserved += orig_bytes;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index eac10e3260a9..3e86b9f36507 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1828,9 +1828,9 @@ static void end_bio_extent_preparewrite(struct bio *bio, int err)
1828 bio_put(bio); 1828 bio_put(bio);
1829} 1829}
1830 1830
1831static struct bio * 1831struct bio *
1832extent_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, 1832btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
1833 gfp_t gfp_flags) 1833 gfp_t gfp_flags)
1834{ 1834{
1835 struct bio *bio; 1835 struct bio *bio;
1836 1836
@@ -1919,7 +1919,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
1919 else 1919 else
1920 nr = bio_get_nr_vecs(bdev); 1920 nr = bio_get_nr_vecs(bdev);
1921 1921
1922 bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH); 1922 bio = btrfs_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH);
1923 1923
1924 bio_add_page(bio, page, page_size, offset); 1924 bio_add_page(bio, page, page_size, offset);
1925 bio->bi_end_io = end_io_func; 1925 bio->bi_end_io = end_io_func;
@@ -2901,21 +2901,53 @@ out:
2901int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 2901int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2902 __u64 start, __u64 len, get_extent_t *get_extent) 2902 __u64 start, __u64 len, get_extent_t *get_extent)
2903{ 2903{
2904 int ret; 2904 int ret = 0;
2905 u64 off = start; 2905 u64 off = start;
2906 u64 max = start + len; 2906 u64 max = start + len;
2907 u32 flags = 0; 2907 u32 flags = 0;
2908 u32 found_type;
2909 u64 last;
2908 u64 disko = 0; 2910 u64 disko = 0;
2911 struct btrfs_key found_key;
2909 struct extent_map *em = NULL; 2912 struct extent_map *em = NULL;
2910 struct extent_state *cached_state = NULL; 2913 struct extent_state *cached_state = NULL;
2914 struct btrfs_path *path;
2915 struct btrfs_file_extent_item *item;
2911 int end = 0; 2916 int end = 0;
2912 u64 em_start = 0, em_len = 0; 2917 u64 em_start = 0, em_len = 0;
2913 unsigned long emflags; 2918 unsigned long emflags;
2914 ret = 0; 2919 int hole = 0;
2915 2920
2916 if (len == 0) 2921 if (len == 0)
2917 return -EINVAL; 2922 return -EINVAL;
2918 2923
2924 path = btrfs_alloc_path();
2925 if (!path)
2926 return -ENOMEM;
2927 path->leave_spinning = 1;
2928
2929 ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root,
2930 path, inode->i_ino, -1, 0);
2931 if (ret < 0) {
2932 btrfs_free_path(path);
2933 return ret;
2934 }
2935 WARN_ON(!ret);
2936 path->slots[0]--;
2937 item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2938 struct btrfs_file_extent_item);
2939 btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
2940 found_type = btrfs_key_type(&found_key);
2941
2942 /* No extents, just return */
2943 if (found_key.objectid != inode->i_ino ||
2944 found_type != BTRFS_EXTENT_DATA_KEY) {
2945 btrfs_free_path(path);
2946 return 0;
2947 }
2948 last = found_key.offset;
2949 btrfs_free_path(path);
2950
2919 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0, 2951 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0,
2920 &cached_state, GFP_NOFS); 2952 &cached_state, GFP_NOFS);
2921 em = get_extent(inode, NULL, 0, off, max - off, 0); 2953 em = get_extent(inode, NULL, 0, off, max - off, 0);
@@ -2925,11 +2957,18 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2925 ret = PTR_ERR(em); 2957 ret = PTR_ERR(em);
2926 goto out; 2958 goto out;
2927 } 2959 }
2960
2928 while (!end) { 2961 while (!end) {
2962 hole = 0;
2929 off = em->start + em->len; 2963 off = em->start + em->len;
2930 if (off >= max) 2964 if (off >= max)
2931 end = 1; 2965 end = 1;
2932 2966
2967 if (em->block_start == EXTENT_MAP_HOLE) {
2968 hole = 1;
2969 goto next;
2970 }
2971
2933 em_start = em->start; 2972 em_start = em->start;
2934 em_len = em->len; 2973 em_len = em->len;
2935 2974
@@ -2939,8 +2978,6 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2939 if (em->block_start == EXTENT_MAP_LAST_BYTE) { 2978 if (em->block_start == EXTENT_MAP_LAST_BYTE) {
2940 end = 1; 2979 end = 1;
2941 flags |= FIEMAP_EXTENT_LAST; 2980 flags |= FIEMAP_EXTENT_LAST;
2942 } else if (em->block_start == EXTENT_MAP_HOLE) {
2943 flags |= FIEMAP_EXTENT_UNWRITTEN;
2944 } else if (em->block_start == EXTENT_MAP_INLINE) { 2981 } else if (em->block_start == EXTENT_MAP_INLINE) {
2945 flags |= (FIEMAP_EXTENT_DATA_INLINE | 2982 flags |= (FIEMAP_EXTENT_DATA_INLINE |
2946 FIEMAP_EXTENT_NOT_ALIGNED); 2983 FIEMAP_EXTENT_NOT_ALIGNED);
@@ -2953,10 +2990,10 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2953 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) 2990 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
2954 flags |= FIEMAP_EXTENT_ENCODED; 2991 flags |= FIEMAP_EXTENT_ENCODED;
2955 2992
2993next:
2956 emflags = em->flags; 2994 emflags = em->flags;
2957 free_extent_map(em); 2995 free_extent_map(em);
2958 em = NULL; 2996 em = NULL;
2959
2960 if (!end) { 2997 if (!end) {
2961 em = get_extent(inode, NULL, 0, off, max - off, 0); 2998 em = get_extent(inode, NULL, 0, off, max - off, 0);
2962 if (!em) 2999 if (!em)
@@ -2967,15 +3004,23 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2967 } 3004 }
2968 emflags = em->flags; 3005 emflags = em->flags;
2969 } 3006 }
3007
2970 if (test_bit(EXTENT_FLAG_VACANCY, &emflags)) { 3008 if (test_bit(EXTENT_FLAG_VACANCY, &emflags)) {
2971 flags |= FIEMAP_EXTENT_LAST; 3009 flags |= FIEMAP_EXTENT_LAST;
2972 end = 1; 3010 end = 1;
2973 } 3011 }
2974 3012
2975 ret = fiemap_fill_next_extent(fieinfo, em_start, disko, 3013 if (em_start == last) {
2976 em_len, flags); 3014 flags |= FIEMAP_EXTENT_LAST;
2977 if (ret) 3015 end = 1;
2978 goto out_free; 3016 }
3017
3018 if (!hole) {
3019 ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
3020 em_len, flags);
3021 if (ret)
3022 goto out_free;
3023 }
2979 } 3024 }
2980out_free: 3025out_free:
2981 free_extent_map(em); 3026 free_extent_map(em);
@@ -3836,8 +3881,10 @@ int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page)
3836 3881
3837 spin_lock(&tree->buffer_lock); 3882 spin_lock(&tree->buffer_lock);
3838 eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT); 3883 eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
3839 if (!eb) 3884 if (!eb) {
3840 goto out; 3885 spin_unlock(&tree->buffer_lock);
3886 return ret;
3887 }
3841 3888
3842 if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { 3889 if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
3843 ret = 0; 3890 ret = 0;
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 1c6d4f342ef7..4183c8178f01 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -310,4 +310,7 @@ int extent_clear_unlock_delalloc(struct inode *inode,
310 struct extent_io_tree *tree, 310 struct extent_io_tree *tree,
311 u64 start, u64 end, struct page *locked_page, 311 u64 start, u64 end, struct page *locked_page,
312 unsigned long op); 312 unsigned long op);
313struct bio *
314btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
315 gfp_t gfp_flags);
313#endif 316#endif
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index e354c33df082..c1faded5fca0 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1047,8 +1047,14 @@ out:
1047 1047
1048 if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { 1048 if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) {
1049 trans = btrfs_start_transaction(root, 0); 1049 trans = btrfs_start_transaction(root, 0);
1050 if (IS_ERR(trans)) {
1051 num_written = PTR_ERR(trans);
1052 goto done;
1053 }
1054 mutex_lock(&inode->i_mutex);
1050 ret = btrfs_log_dentry_safe(trans, root, 1055 ret = btrfs_log_dentry_safe(trans, root,
1051 file->f_dentry); 1056 file->f_dentry);
1057 mutex_unlock(&inode->i_mutex);
1052 if (ret == 0) { 1058 if (ret == 0) {
1053 ret = btrfs_sync_log(trans, root); 1059 ret = btrfs_sync_log(trans, root);
1054 if (ret == 0) 1060 if (ret == 0)
@@ -1067,6 +1073,7 @@ out:
1067 (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT); 1073 (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT);
1068 } 1074 }
1069 } 1075 }
1076done:
1070 current->backing_dev_info = NULL; 1077 current->backing_dev_info = NULL;
1071 return num_written ? num_written : err; 1078 return num_written ? num_written : err;
1072} 1079}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 558cac2dfa54..8039390bd6a6 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4501,6 +4501,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
4501 BTRFS_I(inode)->index_cnt = 2; 4501 BTRFS_I(inode)->index_cnt = 2;
4502 BTRFS_I(inode)->root = root; 4502 BTRFS_I(inode)->root = root;
4503 BTRFS_I(inode)->generation = trans->transid; 4503 BTRFS_I(inode)->generation = trans->transid;
4504 inode->i_generation = BTRFS_I(inode)->generation;
4504 btrfs_set_inode_space_info(root, inode); 4505 btrfs_set_inode_space_info(root, inode);
4505 4506
4506 if (mode & S_IFDIR) 4507 if (mode & S_IFDIR)
@@ -4622,12 +4623,12 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
4622} 4623}
4623 4624
4624static int btrfs_add_nondir(struct btrfs_trans_handle *trans, 4625static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
4625 struct dentry *dentry, struct inode *inode, 4626 struct inode *dir, struct dentry *dentry,
4626 int backref, u64 index) 4627 struct inode *inode, int backref, u64 index)
4627{ 4628{
4628 int err = btrfs_add_link(trans, dentry->d_parent->d_inode, 4629 int err = btrfs_add_link(trans, dir, inode,
4629 inode, dentry->d_name.name, 4630 dentry->d_name.name, dentry->d_name.len,
4630 dentry->d_name.len, backref, index); 4631 backref, index);
4631 if (!err) { 4632 if (!err) {
4632 d_instantiate(dentry, inode); 4633 d_instantiate(dentry, inode);
4633 return 0; 4634 return 0;
@@ -4668,8 +4669,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
4668 btrfs_set_trans_block_group(trans, dir); 4669 btrfs_set_trans_block_group(trans, dir);
4669 4670
4670 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 4671 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
4671 dentry->d_name.len, 4672 dentry->d_name.len, dir->i_ino, objectid,
4672 dentry->d_parent->d_inode->i_ino, objectid,
4673 BTRFS_I(dir)->block_group, mode, &index); 4673 BTRFS_I(dir)->block_group, mode, &index);
4674 err = PTR_ERR(inode); 4674 err = PTR_ERR(inode);
4675 if (IS_ERR(inode)) 4675 if (IS_ERR(inode))
@@ -4682,7 +4682,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
4682 } 4682 }
4683 4683
4684 btrfs_set_trans_block_group(trans, inode); 4684 btrfs_set_trans_block_group(trans, inode);
4685 err = btrfs_add_nondir(trans, dentry, inode, 0, index); 4685 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
4686 if (err) 4686 if (err)
4687 drop_inode = 1; 4687 drop_inode = 1;
4688 else { 4688 else {
@@ -4730,10 +4730,8 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
4730 btrfs_set_trans_block_group(trans, dir); 4730 btrfs_set_trans_block_group(trans, dir);
4731 4731
4732 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 4732 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
4733 dentry->d_name.len, 4733 dentry->d_name.len, dir->i_ino, objectid,
4734 dentry->d_parent->d_inode->i_ino, 4734 BTRFS_I(dir)->block_group, mode, &index);
4735 objectid, BTRFS_I(dir)->block_group, mode,
4736 &index);
4737 err = PTR_ERR(inode); 4735 err = PTR_ERR(inode);
4738 if (IS_ERR(inode)) 4736 if (IS_ERR(inode))
4739 goto out_unlock; 4737 goto out_unlock;
@@ -4745,7 +4743,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
4745 } 4743 }
4746 4744
4747 btrfs_set_trans_block_group(trans, inode); 4745 btrfs_set_trans_block_group(trans, inode);
4748 err = btrfs_add_nondir(trans, dentry, inode, 0, index); 4746 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
4749 if (err) 4747 if (err)
4750 drop_inode = 1; 4748 drop_inode = 1;
4751 else { 4749 else {
@@ -4787,6 +4785,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4787 return -EPERM; 4785 return -EPERM;
4788 4786
4789 btrfs_inc_nlink(inode); 4787 btrfs_inc_nlink(inode);
4788 inode->i_ctime = CURRENT_TIME;
4790 4789
4791 err = btrfs_set_inode_index(dir, &index); 4790 err = btrfs_set_inode_index(dir, &index);
4792 if (err) 4791 if (err)
@@ -4805,15 +4804,17 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4805 btrfs_set_trans_block_group(trans, dir); 4804 btrfs_set_trans_block_group(trans, dir);
4806 ihold(inode); 4805 ihold(inode);
4807 4806
4808 err = btrfs_add_nondir(trans, dentry, inode, 1, index); 4807 err = btrfs_add_nondir(trans, dir, dentry, inode, 1, index);
4809 4808
4810 if (err) { 4809 if (err) {
4811 drop_inode = 1; 4810 drop_inode = 1;
4812 } else { 4811 } else {
4812 struct dentry *parent = dget_parent(dentry);
4813 btrfs_update_inode_block_group(trans, dir); 4813 btrfs_update_inode_block_group(trans, dir);
4814 err = btrfs_update_inode(trans, root, inode); 4814 err = btrfs_update_inode(trans, root, inode);
4815 BUG_ON(err); 4815 BUG_ON(err);
4816 btrfs_log_new_name(trans, inode, NULL, dentry->d_parent); 4816 btrfs_log_new_name(trans, inode, NULL, parent);
4817 dput(parent);
4817 } 4818 }
4818 4819
4819 nr = trans->blocks_used; 4820 nr = trans->blocks_used;
@@ -4853,8 +4854,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
4853 btrfs_set_trans_block_group(trans, dir); 4854 btrfs_set_trans_block_group(trans, dir);
4854 4855
4855 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 4856 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
4856 dentry->d_name.len, 4857 dentry->d_name.len, dir->i_ino, objectid,
4857 dentry->d_parent->d_inode->i_ino, objectid,
4858 BTRFS_I(dir)->block_group, S_IFDIR | mode, 4858 BTRFS_I(dir)->block_group, S_IFDIR | mode,
4859 &index); 4859 &index);
4860 if (IS_ERR(inode)) { 4860 if (IS_ERR(inode)) {
@@ -4877,9 +4877,8 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
4877 if (err) 4877 if (err)
4878 goto out_fail; 4878 goto out_fail;
4879 4879
4880 err = btrfs_add_link(trans, dentry->d_parent->d_inode, 4880 err = btrfs_add_link(trans, dir, inode, dentry->d_name.name,
4881 inode, dentry->d_name.name, 4881 dentry->d_name.len, 0, index);
4882 dentry->d_name.len, 0, index);
4883 if (err) 4882 if (err)
4884 goto out_fail; 4883 goto out_fail;
4885 4884
@@ -5535,13 +5534,21 @@ struct btrfs_dio_private {
5535 u64 bytes; 5534 u64 bytes;
5536 u32 *csums; 5535 u32 *csums;
5537 void *private; 5536 void *private;
5537
5538 /* number of bios pending for this dio */
5539 atomic_t pending_bios;
5540
5541 /* IO errors */
5542 int errors;
5543
5544 struct bio *orig_bio;
5538}; 5545};
5539 5546
5540static void btrfs_endio_direct_read(struct bio *bio, int err) 5547static void btrfs_endio_direct_read(struct bio *bio, int err)
5541{ 5548{
5549 struct btrfs_dio_private *dip = bio->bi_private;
5542 struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1; 5550 struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1;
5543 struct bio_vec *bvec = bio->bi_io_vec; 5551 struct bio_vec *bvec = bio->bi_io_vec;
5544 struct btrfs_dio_private *dip = bio->bi_private;
5545 struct inode *inode = dip->inode; 5552 struct inode *inode = dip->inode;
5546 struct btrfs_root *root = BTRFS_I(inode)->root; 5553 struct btrfs_root *root = BTRFS_I(inode)->root;
5547 u64 start; 5554 u64 start;
@@ -5595,15 +5602,18 @@ static void btrfs_endio_direct_write(struct bio *bio, int err)
5595 struct btrfs_trans_handle *trans; 5602 struct btrfs_trans_handle *trans;
5596 struct btrfs_ordered_extent *ordered = NULL; 5603 struct btrfs_ordered_extent *ordered = NULL;
5597 struct extent_state *cached_state = NULL; 5604 struct extent_state *cached_state = NULL;
5605 u64 ordered_offset = dip->logical_offset;
5606 u64 ordered_bytes = dip->bytes;
5598 int ret; 5607 int ret;
5599 5608
5600 if (err) 5609 if (err)
5601 goto out_done; 5610 goto out_done;
5602 5611again:
5603 ret = btrfs_dec_test_ordered_pending(inode, &ordered, 5612 ret = btrfs_dec_test_first_ordered_pending(inode, &ordered,
5604 dip->logical_offset, dip->bytes); 5613 &ordered_offset,
5614 ordered_bytes);
5605 if (!ret) 5615 if (!ret)
5606 goto out_done; 5616 goto out_test;
5607 5617
5608 BUG_ON(!ordered); 5618 BUG_ON(!ordered);
5609 5619
@@ -5663,8 +5673,20 @@ out_unlock:
5663out: 5673out:
5664 btrfs_delalloc_release_metadata(inode, ordered->len); 5674 btrfs_delalloc_release_metadata(inode, ordered->len);
5665 btrfs_end_transaction(trans, root); 5675 btrfs_end_transaction(trans, root);
5676 ordered_offset = ordered->file_offset + ordered->len;
5666 btrfs_put_ordered_extent(ordered); 5677 btrfs_put_ordered_extent(ordered);
5667 btrfs_put_ordered_extent(ordered); 5678 btrfs_put_ordered_extent(ordered);
5679
5680out_test:
5681 /*
5682 * our bio might span multiple ordered extents. If we haven't
5683 * completed the accounting for the whole dio, go back and try again
5684 */
5685 if (ordered_offset < dip->logical_offset + dip->bytes) {
5686 ordered_bytes = dip->logical_offset + dip->bytes -
5687 ordered_offset;
5688 goto again;
5689 }
5668out_done: 5690out_done:
5669 bio->bi_private = dip->private; 5691 bio->bi_private = dip->private;
5670 5692
@@ -5684,6 +5706,176 @@ static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw,
5684 return 0; 5706 return 0;
5685} 5707}
5686 5708
5709static void btrfs_end_dio_bio(struct bio *bio, int err)
5710{
5711 struct btrfs_dio_private *dip = bio->bi_private;
5712
5713 if (err) {
5714 printk(KERN_ERR "btrfs direct IO failed ino %lu rw %lu "
5715 "disk_bytenr %lu len %u err no %d\n",
5716 dip->inode->i_ino, bio->bi_rw, bio->bi_sector,
5717 bio->bi_size, err);
5718 dip->errors = 1;
5719
5720 /*
5721 * before atomic variable goto zero, we must make sure
5722 * dip->errors is perceived to be set.
5723 */
5724 smp_mb__before_atomic_dec();
5725 }
5726
5727 /* if there are more bios still pending for this dio, just exit */
5728 if (!atomic_dec_and_test(&dip->pending_bios))
5729 goto out;
5730
5731 if (dip->errors)
5732 bio_io_error(dip->orig_bio);
5733 else {
5734 set_bit(BIO_UPTODATE, &dip->orig_bio->bi_flags);
5735 bio_endio(dip->orig_bio, 0);
5736 }
5737out:
5738 bio_put(bio);
5739}
5740
5741static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev,
5742 u64 first_sector, gfp_t gfp_flags)
5743{
5744 int nr_vecs = bio_get_nr_vecs(bdev);
5745 return btrfs_bio_alloc(bdev, first_sector, nr_vecs, gfp_flags);
5746}
5747
5748static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
5749 int rw, u64 file_offset, int skip_sum,
5750 u32 *csums)
5751{
5752 int write = rw & REQ_WRITE;
5753 struct btrfs_root *root = BTRFS_I(inode)->root;
5754 int ret;
5755
5756 bio_get(bio);
5757 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
5758 if (ret)
5759 goto err;
5760
5761 if (write && !skip_sum) {
5762 ret = btrfs_wq_submit_bio(root->fs_info,
5763 inode, rw, bio, 0, 0,
5764 file_offset,
5765 __btrfs_submit_bio_start_direct_io,
5766 __btrfs_submit_bio_done);
5767 goto err;
5768 } else if (!skip_sum)
5769 btrfs_lookup_bio_sums_dio(root, inode, bio,
5770 file_offset, csums);
5771
5772 ret = btrfs_map_bio(root, rw, bio, 0, 1);
5773err:
5774 bio_put(bio);
5775 return ret;
5776}
5777
5778static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
5779 int skip_sum)
5780{
5781 struct inode *inode = dip->inode;
5782 struct btrfs_root *root = BTRFS_I(inode)->root;
5783 struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
5784 struct bio *bio;
5785 struct bio *orig_bio = dip->orig_bio;
5786 struct bio_vec *bvec = orig_bio->bi_io_vec;
5787 u64 start_sector = orig_bio->bi_sector;
5788 u64 file_offset = dip->logical_offset;
5789 u64 submit_len = 0;
5790 u64 map_length;
5791 int nr_pages = 0;
5792 u32 *csums = dip->csums;
5793 int ret = 0;
5794
5795 bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS);
5796 if (!bio)
5797 return -ENOMEM;
5798 bio->bi_private = dip;
5799 bio->bi_end_io = btrfs_end_dio_bio;
5800 atomic_inc(&dip->pending_bios);
5801
5802 map_length = orig_bio->bi_size;
5803 ret = btrfs_map_block(map_tree, READ, start_sector << 9,
5804 &map_length, NULL, 0);
5805 if (ret) {
5806 bio_put(bio);
5807 return -EIO;
5808 }
5809
5810 while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) {
5811 if (unlikely(map_length < submit_len + bvec->bv_len ||
5812 bio_add_page(bio, bvec->bv_page, bvec->bv_len,
5813 bvec->bv_offset) < bvec->bv_len)) {
5814 /*
5815 * inc the count before we submit the bio so
5816 * we know the end IO handler won't happen before
5817 * we inc the count. Otherwise, the dip might get freed
5818 * before we're done setting it up
5819 */
5820 atomic_inc(&dip->pending_bios);
5821 ret = __btrfs_submit_dio_bio(bio, inode, rw,
5822 file_offset, skip_sum,
5823 csums);
5824 if (ret) {
5825 bio_put(bio);
5826 atomic_dec(&dip->pending_bios);
5827 goto out_err;
5828 }
5829
5830 if (!skip_sum)
5831 csums = csums + nr_pages;
5832 start_sector += submit_len >> 9;
5833 file_offset += submit_len;
5834
5835 submit_len = 0;
5836 nr_pages = 0;
5837
5838 bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev,
5839 start_sector, GFP_NOFS);
5840 if (!bio)
5841 goto out_err;
5842 bio->bi_private = dip;
5843 bio->bi_end_io = btrfs_end_dio_bio;
5844
5845 map_length = orig_bio->bi_size;
5846 ret = btrfs_map_block(map_tree, READ, start_sector << 9,
5847 &map_length, NULL, 0);
5848 if (ret) {
5849 bio_put(bio);
5850 goto out_err;
5851 }
5852 } else {
5853 submit_len += bvec->bv_len;
5854 nr_pages ++;
5855 bvec++;
5856 }
5857 }
5858
5859 ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum,
5860 csums);
5861 if (!ret)
5862 return 0;
5863
5864 bio_put(bio);
5865out_err:
5866 dip->errors = 1;
5867 /*
5868 * before atomic variable goto zero, we must
5869 * make sure dip->errors is perceived to be set.
5870 */
5871 smp_mb__before_atomic_dec();
5872 if (atomic_dec_and_test(&dip->pending_bios))
5873 bio_io_error(dip->orig_bio);
5874
5875 /* bio_end_io() will handle error, so we needn't return it */
5876 return 0;
5877}
5878
5687static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode, 5879static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
5688 loff_t file_offset) 5880 loff_t file_offset)
5689{ 5881{
@@ -5723,36 +5915,18 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
5723 5915
5724 dip->disk_bytenr = (u64)bio->bi_sector << 9; 5916 dip->disk_bytenr = (u64)bio->bi_sector << 9;
5725 bio->bi_private = dip; 5917 bio->bi_private = dip;
5918 dip->errors = 0;
5919 dip->orig_bio = bio;
5920 atomic_set(&dip->pending_bios, 0);
5726 5921
5727 if (write) 5922 if (write)
5728 bio->bi_end_io = btrfs_endio_direct_write; 5923 bio->bi_end_io = btrfs_endio_direct_write;
5729 else 5924 else
5730 bio->bi_end_io = btrfs_endio_direct_read; 5925 bio->bi_end_io = btrfs_endio_direct_read;
5731 5926
5732 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); 5927 ret = btrfs_submit_direct_hook(rw, dip, skip_sum);
5733 if (ret) 5928 if (!ret)
5734 goto out_err;
5735
5736 if (write && !skip_sum) {
5737 ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
5738 inode, rw, bio, 0, 0,
5739 dip->logical_offset,
5740 __btrfs_submit_bio_start_direct_io,
5741 __btrfs_submit_bio_done);
5742 if (ret)
5743 goto out_err;
5744 return; 5929 return;
5745 } else if (!skip_sum)
5746 btrfs_lookup_bio_sums_dio(root, inode, bio,
5747 dip->logical_offset, dip->csums);
5748
5749 ret = btrfs_map_bio(root, rw, bio, 0, 1);
5750 if (ret)
5751 goto out_err;
5752 return;
5753out_err:
5754 kfree(dip->csums);
5755 kfree(dip);
5756free_ordered: 5930free_ordered:
5757 /* 5931 /*
5758 * If this is a write, we need to clean up the reserved space and kill 5932 * If this is a write, we need to clean up the reserved space and kill
@@ -6607,8 +6781,9 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
6607 BUG_ON(ret); 6781 BUG_ON(ret);
6608 6782
6609 if (old_inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) { 6783 if (old_inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) {
6610 btrfs_log_new_name(trans, old_inode, old_dir, 6784 struct dentry *parent = dget_parent(new_dentry);
6611 new_dentry->d_parent); 6785 btrfs_log_new_name(trans, old_inode, old_dir, parent);
6786 dput(parent);
6612 btrfs_end_log_trans(root); 6787 btrfs_end_log_trans(root);
6613 } 6788 }
6614out_fail: 6789out_fail:
@@ -6758,8 +6933,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
6758 btrfs_set_trans_block_group(trans, dir); 6933 btrfs_set_trans_block_group(trans, dir);
6759 6934
6760 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 6935 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
6761 dentry->d_name.len, 6936 dentry->d_name.len, dir->i_ino, objectid,
6762 dentry->d_parent->d_inode->i_ino, objectid,
6763 BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO, 6937 BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO,
6764 &index); 6938 &index);
6765 err = PTR_ERR(inode); 6939 err = PTR_ERR(inode);
@@ -6773,7 +6947,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
6773 } 6947 }
6774 6948
6775 btrfs_set_trans_block_group(trans, inode); 6949 btrfs_set_trans_block_group(trans, inode);
6776 err = btrfs_add_nondir(trans, dentry, inode, 0, index); 6950 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
6777 if (err) 6951 if (err)
6778 drop_inode = 1; 6952 drop_inode = 1;
6779 else { 6953 else {
@@ -6844,6 +7018,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
6844 struct btrfs_root *root = BTRFS_I(inode)->root; 7018 struct btrfs_root *root = BTRFS_I(inode)->root;
6845 struct btrfs_key ins; 7019 struct btrfs_key ins;
6846 u64 cur_offset = start; 7020 u64 cur_offset = start;
7021 u64 i_size;
6847 int ret = 0; 7022 int ret = 0;
6848 bool own_trans = true; 7023 bool own_trans = true;
6849 7024
@@ -6885,11 +7060,11 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
6885 (actual_len > inode->i_size) && 7060 (actual_len > inode->i_size) &&
6886 (cur_offset > inode->i_size)) { 7061 (cur_offset > inode->i_size)) {
6887 if (cur_offset > actual_len) 7062 if (cur_offset > actual_len)
6888 i_size_write(inode, actual_len); 7063 i_size = actual_len;
6889 else 7064 else
6890 i_size_write(inode, cur_offset); 7065 i_size = cur_offset;
6891 i_size_write(inode, cur_offset); 7066 i_size_write(inode, i_size);
6892 btrfs_ordered_update_i_size(inode, cur_offset, NULL); 7067 btrfs_ordered_update_i_size(inode, i_size, NULL);
6893 } 7068 }
6894 7069
6895 ret = btrfs_update_inode(trans, root, inode); 7070 ret = btrfs_update_inode(trans, root, inode);
@@ -6943,6 +7118,10 @@ static long btrfs_fallocate(struct inode *inode, int mode,
6943 btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start); 7118 btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start);
6944 7119
6945 mutex_lock(&inode->i_mutex); 7120 mutex_lock(&inode->i_mutex);
7121 ret = inode_newsize_ok(inode, alloc_end);
7122 if (ret)
7123 goto out;
7124
6946 if (alloc_start > inode->i_size) { 7125 if (alloc_start > inode->i_size) {
6947 ret = btrfs_cont_expand(inode, alloc_start); 7126 ret = btrfs_cont_expand(inode, alloc_start);
6948 if (ret) 7127 if (ret)
@@ -7139,6 +7318,7 @@ static const struct inode_operations btrfs_symlink_inode_operations = {
7139 .readlink = generic_readlink, 7318 .readlink = generic_readlink,
7140 .follow_link = page_follow_link_light, 7319 .follow_link = page_follow_link_light,
7141 .put_link = page_put_link, 7320 .put_link = page_put_link,
7321 .getattr = btrfs_getattr,
7142 .permission = btrfs_permission, 7322 .permission = btrfs_permission,
7143 .setxattr = btrfs_setxattr, 7323 .setxattr = btrfs_setxattr,
7144 .getxattr = btrfs_getxattr, 7324 .getxattr = btrfs_getxattr,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 463d91b4dd3a..f1c9bb4079ed 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -233,7 +233,8 @@ static noinline int create_subvol(struct btrfs_root *root,
233 struct btrfs_inode_item *inode_item; 233 struct btrfs_inode_item *inode_item;
234 struct extent_buffer *leaf; 234 struct extent_buffer *leaf;
235 struct btrfs_root *new_root; 235 struct btrfs_root *new_root;
236 struct inode *dir = dentry->d_parent->d_inode; 236 struct dentry *parent = dget_parent(dentry);
237 struct inode *dir;
237 int ret; 238 int ret;
238 int err; 239 int err;
239 u64 objectid; 240 u64 objectid;
@@ -242,8 +243,13 @@ static noinline int create_subvol(struct btrfs_root *root,
242 243
243 ret = btrfs_find_free_objectid(NULL, root->fs_info->tree_root, 244 ret = btrfs_find_free_objectid(NULL, root->fs_info->tree_root,
244 0, &objectid); 245 0, &objectid);
245 if (ret) 246 if (ret) {
247 dput(parent);
246 return ret; 248 return ret;
249 }
250
251 dir = parent->d_inode;
252
247 /* 253 /*
248 * 1 - inode item 254 * 1 - inode item
249 * 2 - refs 255 * 2 - refs
@@ -251,8 +257,10 @@ static noinline int create_subvol(struct btrfs_root *root,
251 * 2 - dir items 257 * 2 - dir items
252 */ 258 */
253 trans = btrfs_start_transaction(root, 6); 259 trans = btrfs_start_transaction(root, 6);
254 if (IS_ERR(trans)) 260 if (IS_ERR(trans)) {
261 dput(parent);
255 return PTR_ERR(trans); 262 return PTR_ERR(trans);
263 }
256 264
257 leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 265 leaf = btrfs_alloc_free_block(trans, root, root->leafsize,
258 0, objectid, NULL, 0, 0, 0); 266 0, objectid, NULL, 0, 0, 0);
@@ -339,6 +347,7 @@ static noinline int create_subvol(struct btrfs_root *root,
339 347
340 d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry)); 348 d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
341fail: 349fail:
350 dput(parent);
342 if (async_transid) { 351 if (async_transid) {
343 *async_transid = trans->transid; 352 *async_transid = trans->transid;
344 err = btrfs_commit_transaction_async(trans, root, 1); 353 err = btrfs_commit_transaction_async(trans, root, 1);
@@ -354,6 +363,7 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
354 char *name, int namelen, u64 *async_transid) 363 char *name, int namelen, u64 *async_transid)
355{ 364{
356 struct inode *inode; 365 struct inode *inode;
366 struct dentry *parent;
357 struct btrfs_pending_snapshot *pending_snapshot; 367 struct btrfs_pending_snapshot *pending_snapshot;
358 struct btrfs_trans_handle *trans; 368 struct btrfs_trans_handle *trans;
359 int ret; 369 int ret;
@@ -396,7 +406,9 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
396 406
397 btrfs_orphan_cleanup(pending_snapshot->snap); 407 btrfs_orphan_cleanup(pending_snapshot->snap);
398 408
399 inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry); 409 parent = dget_parent(dentry);
410 inode = btrfs_lookup_dentry(parent->d_inode, dentry);
411 dput(parent);
400 if (IS_ERR(inode)) { 412 if (IS_ERR(inode)) {
401 ret = PTR_ERR(inode); 413 ret = PTR_ERR(inode);
402 goto fail; 414 goto fail;
@@ -1669,12 +1681,11 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1669 olen = len = src->i_size - off; 1681 olen = len = src->i_size - off;
1670 /* if we extend to eof, continue to block boundary */ 1682 /* if we extend to eof, continue to block boundary */
1671 if (off + len == src->i_size) 1683 if (off + len == src->i_size)
1672 len = ((src->i_size + bs-1) & ~(bs-1)) 1684 len = ALIGN(src->i_size, bs) - off;
1673 - off;
1674 1685
1675 /* verify the end result is block aligned */ 1686 /* verify the end result is block aligned */
1676 if ((off & (bs-1)) || 1687 if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) ||
1677 ((off + len) & (bs-1))) 1688 !IS_ALIGNED(destoff, bs))
1678 goto out_unlock; 1689 goto out_unlock;
1679 1690
1680 /* do any pending delalloc/csum calc on src, one way or 1691 /* do any pending delalloc/csum calc on src, one way or
@@ -1874,8 +1885,8 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1874 * but shouldn't round up the file size 1885 * but shouldn't round up the file size
1875 */ 1886 */
1876 endoff = new_key.offset + datal; 1887 endoff = new_key.offset + datal;
1877 if (endoff > off+olen) 1888 if (endoff > destoff+olen)
1878 endoff = off+olen; 1889 endoff = destoff+olen;
1879 if (endoff > inode->i_size) 1890 if (endoff > inode->i_size)
1880 btrfs_i_size_write(inode, endoff); 1891 btrfs_i_size_write(inode, endoff);
1881 1892
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index f4621f6deca1..ae7737e352c9 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -250,6 +250,73 @@ int btrfs_add_ordered_sum(struct inode *inode,
250 250
251/* 251/*
252 * this is used to account for finished IO across a given range 252 * this is used to account for finished IO across a given range
253 * of the file. The IO may span ordered extents. If
254 * a given ordered_extent is completely done, 1 is returned, otherwise
255 * 0.
256 *
257 * test_and_set_bit on a flag in the struct btrfs_ordered_extent is used
258 * to make sure this function only returns 1 once for a given ordered extent.
259 *
260 * file_offset is updated to one byte past the range that is recorded as
261 * complete. This allows you to walk forward in the file.
262 */
263int btrfs_dec_test_first_ordered_pending(struct inode *inode,
264 struct btrfs_ordered_extent **cached,
265 u64 *file_offset, u64 io_size)
266{
267 struct btrfs_ordered_inode_tree *tree;
268 struct rb_node *node;
269 struct btrfs_ordered_extent *entry = NULL;
270 int ret;
271 u64 dec_end;
272 u64 dec_start;
273 u64 to_dec;
274
275 tree = &BTRFS_I(inode)->ordered_tree;
276 spin_lock(&tree->lock);
277 node = tree_search(tree, *file_offset);
278 if (!node) {
279 ret = 1;
280 goto out;
281 }
282
283 entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
284 if (!offset_in_entry(entry, *file_offset)) {
285 ret = 1;
286 goto out;
287 }
288
289 dec_start = max(*file_offset, entry->file_offset);
290 dec_end = min(*file_offset + io_size, entry->file_offset +
291 entry->len);
292 *file_offset = dec_end;
293 if (dec_start > dec_end) {
294 printk(KERN_CRIT "bad ordering dec_start %llu end %llu\n",
295 (unsigned long long)dec_start,
296 (unsigned long long)dec_end);
297 }
298 to_dec = dec_end - dec_start;
299 if (to_dec > entry->bytes_left) {
300 printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n",
301 (unsigned long long)entry->bytes_left,
302 (unsigned long long)to_dec);
303 }
304 entry->bytes_left -= to_dec;
305 if (entry->bytes_left == 0)
306 ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
307 else
308 ret = 1;
309out:
310 if (!ret && cached && entry) {
311 *cached = entry;
312 atomic_inc(&entry->refs);
313 }
314 spin_unlock(&tree->lock);
315 return ret == 0;
316}
317
318/*
319 * this is used to account for finished IO across a given range
253 * of the file. The IO should not span ordered extents. If 320 * of the file. The IO should not span ordered extents. If
254 * a given ordered_extent is completely done, 1 is returned, otherwise 321 * a given ordered_extent is completely done, 1 is returned, otherwise
255 * 0. 322 * 0.
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 8ac365492a3f..61dca83119dd 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -141,6 +141,9 @@ int btrfs_remove_ordered_extent(struct inode *inode,
141int btrfs_dec_test_ordered_pending(struct inode *inode, 141int btrfs_dec_test_ordered_pending(struct inode *inode,
142 struct btrfs_ordered_extent **cached, 142 struct btrfs_ordered_extent **cached,
143 u64 file_offset, u64 io_size); 143 u64 file_offset, u64 io_size);
144int btrfs_dec_test_first_ordered_pending(struct inode *inode,
145 struct btrfs_ordered_extent **cached,
146 u64 *file_offset, u64 io_size);
144int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, 147int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
145 u64 start, u64 len, u64 disk_len, int type); 148 u64 start, u64 len, u64 disk_len, int type);
146int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, 149int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset,
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 8299a25ffc8f..dbb51ea7a13c 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -244,6 +244,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
244 case Opt_space_cache: 244 case Opt_space_cache:
245 printk(KERN_INFO "btrfs: enabling disk space caching\n"); 245 printk(KERN_INFO "btrfs: enabling disk space caching\n");
246 btrfs_set_opt(info->mount_opt, SPACE_CACHE); 246 btrfs_set_opt(info->mount_opt, SPACE_CACHE);
247 break;
247 case Opt_clear_cache: 248 case Opt_clear_cache:
248 printk(KERN_INFO "btrfs: force clearing of disk cache\n"); 249 printk(KERN_INFO "btrfs: force clearing of disk cache\n");
249 btrfs_set_opt(info->mount_opt, CLEAR_CACHE); 250 btrfs_set_opt(info->mount_opt, CLEAR_CACHE);
@@ -562,12 +563,26 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
562 563
563static int btrfs_test_super(struct super_block *s, void *data) 564static int btrfs_test_super(struct super_block *s, void *data)
564{ 565{
565 struct btrfs_fs_devices *test_fs_devices = data; 566 struct btrfs_root *test_root = data;
566 struct btrfs_root *root = btrfs_sb(s); 567 struct btrfs_root *root = btrfs_sb(s);
567 568
568 return root->fs_info->fs_devices == test_fs_devices; 569 /*
570 * If this super block is going away, return false as it
571 * can't match as an existing super block.
572 */
573 if (!atomic_read(&s->s_active))
574 return 0;
575 return root->fs_info->fs_devices == test_root->fs_info->fs_devices;
576}
577
578static int btrfs_set_super(struct super_block *s, void *data)
579{
580 s->s_fs_info = data;
581
582 return set_anon_super(s, data);
569} 583}
570 584
585
571/* 586/*
572 * Find a superblock for the given device / mount point. 587 * Find a superblock for the given device / mount point.
573 * 588 *
@@ -581,6 +596,8 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
581 struct super_block *s; 596 struct super_block *s;
582 struct dentry *root; 597 struct dentry *root;
583 struct btrfs_fs_devices *fs_devices = NULL; 598 struct btrfs_fs_devices *fs_devices = NULL;
599 struct btrfs_root *tree_root = NULL;
600 struct btrfs_fs_info *fs_info = NULL;
584 fmode_t mode = FMODE_READ; 601 fmode_t mode = FMODE_READ;
585 char *subvol_name = NULL; 602 char *subvol_name = NULL;
586 u64 subvol_objectid = 0; 603 u64 subvol_objectid = 0;
@@ -608,8 +625,24 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
608 goto error_close_devices; 625 goto error_close_devices;
609 } 626 }
610 627
628 /*
629 * Setup a dummy root and fs_info for test/set super. This is because
630 * we don't actually fill this stuff out until open_ctree, but we need
631 * it for searching for existing supers, so this lets us do that and
632 * then open_ctree will properly initialize everything later.
633 */
634 fs_info = kzalloc(sizeof(struct btrfs_fs_info), GFP_NOFS);
635 tree_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS);
636 if (!fs_info || !tree_root) {
637 error = -ENOMEM;
638 goto error_close_devices;
639 }
640 fs_info->tree_root = tree_root;
641 fs_info->fs_devices = fs_devices;
642 tree_root->fs_info = fs_info;
643
611 bdev = fs_devices->latest_bdev; 644 bdev = fs_devices->latest_bdev;
612 s = sget(fs_type, btrfs_test_super, set_anon_super, fs_devices); 645 s = sget(fs_type, btrfs_test_super, btrfs_set_super, tree_root);
613 if (IS_ERR(s)) 646 if (IS_ERR(s))
614 goto error_s; 647 goto error_s;
615 648
@@ -675,6 +708,8 @@ error_s:
675 error = PTR_ERR(s); 708 error = PTR_ERR(s);
676error_close_devices: 709error_close_devices:
677 btrfs_close_devices(fs_devices); 710 btrfs_close_devices(fs_devices);
711 kfree(fs_info);
712 kfree(tree_root);
678error_free_subvol_name: 713error_free_subvol_name:
679 kfree(subvol_name); 714 kfree(subvol_name);
680 return ERR_PTR(error); 715 return ERR_PTR(error);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 1fffbc017bdf..f50e931fc217 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -902,6 +902,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
902 struct btrfs_root *root = pending->root; 902 struct btrfs_root *root = pending->root;
903 struct btrfs_root *parent_root; 903 struct btrfs_root *parent_root;
904 struct inode *parent_inode; 904 struct inode *parent_inode;
905 struct dentry *parent;
905 struct dentry *dentry; 906 struct dentry *dentry;
906 struct extent_buffer *tmp; 907 struct extent_buffer *tmp;
907 struct extent_buffer *old; 908 struct extent_buffer *old;
@@ -941,7 +942,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
941 trans->block_rsv = &pending->block_rsv; 942 trans->block_rsv = &pending->block_rsv;
942 943
943 dentry = pending->dentry; 944 dentry = pending->dentry;
944 parent_inode = dentry->d_parent->d_inode; 945 parent = dget_parent(dentry);
946 parent_inode = parent->d_inode;
945 parent_root = BTRFS_I(parent_inode)->root; 947 parent_root = BTRFS_I(parent_inode)->root;
946 record_root_in_trans(trans, parent_root); 948 record_root_in_trans(trans, parent_root);
947 949
@@ -989,6 +991,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
989 parent_inode->i_ino, index, 991 parent_inode->i_ino, index,
990 dentry->d_name.name, dentry->d_name.len); 992 dentry->d_name.name, dentry->d_name.len);
991 BUG_ON(ret); 993 BUG_ON(ret);
994 dput(parent);
992 995
993 key.offset = (u64)-1; 996 key.offset = (u64)-1;
994 pending->snap = btrfs_read_fs_root_no_name(root->fs_info, &key); 997 pending->snap = btrfs_read_fs_root_no_name(root->fs_info, &key);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index a29f19384a27..054744ac5719 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -2869,6 +2869,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
2869{ 2869{
2870 int ret = 0; 2870 int ret = 0;
2871 struct btrfs_root *root; 2871 struct btrfs_root *root;
2872 struct dentry *old_parent = NULL;
2872 2873
2873 /* 2874 /*
2874 * for regular files, if its inode is already on disk, we don't 2875 * for regular files, if its inode is already on disk, we don't
@@ -2910,10 +2911,13 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
2910 if (IS_ROOT(parent)) 2911 if (IS_ROOT(parent))
2911 break; 2912 break;
2912 2913
2913 parent = parent->d_parent; 2914 parent = dget_parent(parent);
2915 dput(old_parent);
2916 old_parent = parent;
2914 inode = parent->d_inode; 2917 inode = parent->d_inode;
2915 2918
2916 } 2919 }
2920 dput(old_parent);
2917out: 2921out:
2918 return ret; 2922 return ret;
2919} 2923}
@@ -2945,6 +2949,7 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
2945{ 2949{
2946 int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL; 2950 int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL;
2947 struct super_block *sb; 2951 struct super_block *sb;
2952 struct dentry *old_parent = NULL;
2948 int ret = 0; 2953 int ret = 0;
2949 u64 last_committed = root->fs_info->last_trans_committed; 2954 u64 last_committed = root->fs_info->last_trans_committed;
2950 2955
@@ -3016,10 +3021,13 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
3016 if (IS_ROOT(parent)) 3021 if (IS_ROOT(parent))
3017 break; 3022 break;
3018 3023
3019 parent = parent->d_parent; 3024 parent = dget_parent(parent);
3025 dput(old_parent);
3026 old_parent = parent;
3020 } 3027 }
3021 ret = 0; 3028 ret = 0;
3022end_trans: 3029end_trans:
3030 dput(old_parent);
3023 if (ret < 0) { 3031 if (ret < 0) {
3024 BUG_ON(ret != -ENOSPC); 3032 BUG_ON(ret != -ENOSPC);
3025 root->fs_info->last_trans_log_full_commit = trans->transid; 3033 root->fs_info->last_trans_log_full_commit = trans->transid;
@@ -3039,8 +3047,13 @@ end_no_trans:
3039int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, 3047int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
3040 struct btrfs_root *root, struct dentry *dentry) 3048 struct btrfs_root *root, struct dentry *dentry)
3041{ 3049{
3042 return btrfs_log_inode_parent(trans, root, dentry->d_inode, 3050 struct dentry *parent = dget_parent(dentry);
3043 dentry->d_parent, 0); 3051 int ret;
3052
3053 ret = btrfs_log_inode_parent(trans, root, dentry->d_inode, parent, 0);
3054 dput(parent);
3055
3056 return ret;
3044} 3057}
3045 3058
3046/* 3059/*
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 58a9b9998b42..f606baf9ba72 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -631,6 +631,7 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
631 struct fs_disk_quota *fdq) 631 struct fs_disk_quota *fdq)
632{ 632{
633 struct inode *inode = &ip->i_inode; 633 struct inode *inode = &ip->i_inode;
634 struct gfs2_sbd *sdp = GFS2_SB(inode);
634 struct address_space *mapping = inode->i_mapping; 635 struct address_space *mapping = inode->i_mapping;
635 unsigned long index = loc >> PAGE_CACHE_SHIFT; 636 unsigned long index = loc >> PAGE_CACHE_SHIFT;
636 unsigned offset = loc & (PAGE_CACHE_SIZE - 1); 637 unsigned offset = loc & (PAGE_CACHE_SIZE - 1);
@@ -658,11 +659,11 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
658 qd->qd_qb.qb_value = qp->qu_value; 659 qd->qd_qb.qb_value = qp->qu_value;
659 if (fdq) { 660 if (fdq) {
660 if (fdq->d_fieldmask & FS_DQ_BSOFT) { 661 if (fdq->d_fieldmask & FS_DQ_BSOFT) {
661 qp->qu_warn = cpu_to_be64(fdq->d_blk_softlimit); 662 qp->qu_warn = cpu_to_be64(fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift);
662 qd->qd_qb.qb_warn = qp->qu_warn; 663 qd->qd_qb.qb_warn = qp->qu_warn;
663 } 664 }
664 if (fdq->d_fieldmask & FS_DQ_BHARD) { 665 if (fdq->d_fieldmask & FS_DQ_BHARD) {
665 qp->qu_limit = cpu_to_be64(fdq->d_blk_hardlimit); 666 qp->qu_limit = cpu_to_be64(fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift);
666 qd->qd_qb.qb_limit = qp->qu_limit; 667 qd->qd_qb.qb_limit = qp->qu_limit;
667 } 668 }
668 } 669 }
@@ -1497,9 +1498,9 @@ static int gfs2_get_dqblk(struct super_block *sb, int type, qid_t id,
1497 fdq->d_version = FS_DQUOT_VERSION; 1498 fdq->d_version = FS_DQUOT_VERSION;
1498 fdq->d_flags = (type == QUOTA_USER) ? FS_USER_QUOTA : FS_GROUP_QUOTA; 1499 fdq->d_flags = (type == QUOTA_USER) ? FS_USER_QUOTA : FS_GROUP_QUOTA;
1499 fdq->d_id = id; 1500 fdq->d_id = id;
1500 fdq->d_blk_hardlimit = be64_to_cpu(qlvb->qb_limit); 1501 fdq->d_blk_hardlimit = be64_to_cpu(qlvb->qb_limit) << sdp->sd_fsb2bb_shift;
1501 fdq->d_blk_softlimit = be64_to_cpu(qlvb->qb_warn); 1502 fdq->d_blk_softlimit = be64_to_cpu(qlvb->qb_warn) << sdp->sd_fsb2bb_shift;
1502 fdq->d_bcount = be64_to_cpu(qlvb->qb_value); 1503 fdq->d_bcount = be64_to_cpu(qlvb->qb_value) << sdp->sd_fsb2bb_shift;
1503 1504
1504 gfs2_glock_dq_uninit(&q_gh); 1505 gfs2_glock_dq_uninit(&q_gh);
1505out: 1506out:
@@ -1566,10 +1567,10 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
1566 1567
1567 /* If nothing has changed, this is a no-op */ 1568 /* If nothing has changed, this is a no-op */
1568 if ((fdq->d_fieldmask & FS_DQ_BSOFT) && 1569 if ((fdq->d_fieldmask & FS_DQ_BSOFT) &&
1569 (fdq->d_blk_softlimit == be64_to_cpu(qd->qd_qb.qb_warn))) 1570 ((fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_warn)))
1570 fdq->d_fieldmask ^= FS_DQ_BSOFT; 1571 fdq->d_fieldmask ^= FS_DQ_BSOFT;
1571 if ((fdq->d_fieldmask & FS_DQ_BHARD) && 1572 if ((fdq->d_fieldmask & FS_DQ_BHARD) &&
1572 (fdq->d_blk_hardlimit == be64_to_cpu(qd->qd_qb.qb_limit))) 1573 ((fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_limit)))
1573 fdq->d_fieldmask ^= FS_DQ_BHARD; 1574 fdq->d_fieldmask ^= FS_DQ_BHARD;
1574 if (fdq->d_fieldmask == 0) 1575 if (fdq->d_fieldmask == 0)
1575 goto out_i; 1576 goto out_i;
diff --git a/fs/ioprio.c b/fs/ioprio.c
index 2f7d05c89922..7da2a06508e5 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -103,22 +103,15 @@ SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio)
103 } 103 }
104 104
105 ret = -ESRCH; 105 ret = -ESRCH;
106 /* 106 rcu_read_lock();
107 * We want IOPRIO_WHO_PGRP/IOPRIO_WHO_USER to be "atomic",
108 * so we can't use rcu_read_lock(). See re-copy of ->ioprio
109 * in copy_process().
110 */
111 read_lock(&tasklist_lock);
112 switch (which) { 107 switch (which) {
113 case IOPRIO_WHO_PROCESS: 108 case IOPRIO_WHO_PROCESS:
114 rcu_read_lock();
115 if (!who) 109 if (!who)
116 p = current; 110 p = current;
117 else 111 else
118 p = find_task_by_vpid(who); 112 p = find_task_by_vpid(who);
119 if (p) 113 if (p)
120 ret = set_task_ioprio(p, ioprio); 114 ret = set_task_ioprio(p, ioprio);
121 rcu_read_unlock();
122 break; 115 break;
123 case IOPRIO_WHO_PGRP: 116 case IOPRIO_WHO_PGRP:
124 if (!who) 117 if (!who)
@@ -141,12 +134,7 @@ SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio)
141 break; 134 break;
142 135
143 do_each_thread(g, p) { 136 do_each_thread(g, p) {
144 int match; 137 if (__task_cred(p)->uid != who)
145
146 rcu_read_lock();
147 match = __task_cred(p)->uid == who;
148 rcu_read_unlock();
149 if (!match)
150 continue; 138 continue;
151 ret = set_task_ioprio(p, ioprio); 139 ret = set_task_ioprio(p, ioprio);
152 if (ret) 140 if (ret)
@@ -160,7 +148,7 @@ free_uid:
160 ret = -EINVAL; 148 ret = -EINVAL;
161 } 149 }
162 150
163 read_unlock(&tasklist_lock); 151 rcu_read_unlock();
164 return ret; 152 return ret;
165} 153}
166 154
@@ -204,17 +192,15 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who)
204 int ret = -ESRCH; 192 int ret = -ESRCH;
205 int tmpio; 193 int tmpio;
206 194
207 read_lock(&tasklist_lock); 195 rcu_read_lock();
208 switch (which) { 196 switch (which) {
209 case IOPRIO_WHO_PROCESS: 197 case IOPRIO_WHO_PROCESS:
210 rcu_read_lock();
211 if (!who) 198 if (!who)
212 p = current; 199 p = current;
213 else 200 else
214 p = find_task_by_vpid(who); 201 p = find_task_by_vpid(who);
215 if (p) 202 if (p)
216 ret = get_task_ioprio(p); 203 ret = get_task_ioprio(p);
217 rcu_read_unlock();
218 break; 204 break;
219 case IOPRIO_WHO_PGRP: 205 case IOPRIO_WHO_PGRP:
220 if (!who) 206 if (!who)
@@ -241,12 +227,7 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who)
241 break; 227 break;
242 228
243 do_each_thread(g, p) { 229 do_each_thread(g, p) {
244 int match; 230 if (__task_cred(p)->uid != user->uid)
245
246 rcu_read_lock();
247 match = __task_cred(p)->uid == user->uid;
248 rcu_read_unlock();
249 if (!match)
250 continue; 231 continue;
251 tmpio = get_task_ioprio(p); 232 tmpio = get_task_ioprio(p);
252 if (tmpio < 0) 233 if (tmpio < 0)
@@ -264,6 +245,6 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who)
264 ret = -EINVAL; 245 ret = -EINVAL;
265 } 246 }
266 247
267 read_unlock(&tasklist_lock); 248 rcu_read_unlock();
268 return ret; 249 return ret;
269} 250}
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 662df2a5fad5..8ea4a4180a87 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -162,6 +162,7 @@ struct nfs_cache_array_entry {
162 u64 cookie; 162 u64 cookie;
163 u64 ino; 163 u64 ino;
164 struct qstr string; 164 struct qstr string;
165 unsigned char d_type;
165}; 166};
166 167
167struct nfs_cache_array { 168struct nfs_cache_array {
@@ -171,8 +172,6 @@ struct nfs_cache_array {
171 struct nfs_cache_array_entry array[0]; 172 struct nfs_cache_array_entry array[0];
172}; 173};
173 174
174#define MAX_READDIR_ARRAY ((PAGE_SIZE - sizeof(struct nfs_cache_array)) / sizeof(struct nfs_cache_array_entry))
175
176typedef __be32 * (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int); 175typedef __be32 * (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
177typedef struct { 176typedef struct {
178 struct file *file; 177 struct file *file;
@@ -257,13 +256,17 @@ int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page)
257 256
258 if (IS_ERR(array)) 257 if (IS_ERR(array))
259 return PTR_ERR(array); 258 return PTR_ERR(array);
259
260 cache_entry = &array->array[array->size];
261
262 /* Check that this entry lies within the page bounds */
260 ret = -ENOSPC; 263 ret = -ENOSPC;
261 if (array->size >= MAX_READDIR_ARRAY) 264 if ((char *)&cache_entry[1] - (char *)page_address(page) > PAGE_SIZE)
262 goto out; 265 goto out;
263 266
264 cache_entry = &array->array[array->size];
265 cache_entry->cookie = entry->prev_cookie; 267 cache_entry->cookie = entry->prev_cookie;
266 cache_entry->ino = entry->ino; 268 cache_entry->ino = entry->ino;
269 cache_entry->d_type = entry->d_type;
267 ret = nfs_readdir_make_qstr(&cache_entry->string, entry->name, entry->len); 270 ret = nfs_readdir_make_qstr(&cache_entry->string, entry->name, entry->len);
268 if (ret) 271 if (ret)
269 goto out; 272 goto out;
@@ -466,8 +469,9 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
466 struct xdr_stream stream; 469 struct xdr_stream stream;
467 struct xdr_buf buf; 470 struct xdr_buf buf;
468 __be32 *ptr = xdr_page; 471 __be32 *ptr = xdr_page;
469 int status;
470 struct nfs_cache_array *array; 472 struct nfs_cache_array *array;
473 unsigned int count = 0;
474 int status;
471 475
472 buf.head->iov_base = xdr_page; 476 buf.head->iov_base = xdr_page;
473 buf.head->iov_len = buflen; 477 buf.head->iov_len = buflen;
@@ -488,6 +492,8 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
488 break; 492 break;
489 } 493 }
490 494
495 count++;
496
491 if (desc->plus == 1) 497 if (desc->plus == 1)
492 nfs_prime_dcache(desc->file->f_path.dentry, entry); 498 nfs_prime_dcache(desc->file->f_path.dentry, entry);
493 499
@@ -496,13 +502,14 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
496 break; 502 break;
497 } while (!entry->eof); 503 } while (!entry->eof);
498 504
499 if (status == -EBADCOOKIE && entry->eof) { 505 if (count == 0 || (status == -EBADCOOKIE && entry->eof == 1)) {
500 array = nfs_readdir_get_array(page); 506 array = nfs_readdir_get_array(page);
501 if (!IS_ERR(array)) { 507 if (!IS_ERR(array)) {
502 array->eof_index = array->size; 508 array->eof_index = array->size;
503 status = 0; 509 status = 0;
504 nfs_readdir_release_array(page); 510 nfs_readdir_release_array(page);
505 } 511 } else
512 status = PTR_ERR(array);
506 } 513 }
507 return status; 514 return status;
508} 515}
@@ -696,21 +703,23 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
696 int i = 0; 703 int i = 0;
697 int res = 0; 704 int res = 0;
698 struct nfs_cache_array *array = NULL; 705 struct nfs_cache_array *array = NULL;
699 unsigned int d_type = DT_UNKNOWN;
700 struct dentry *dentry = NULL;
701 706
702 array = nfs_readdir_get_array(desc->page); 707 array = nfs_readdir_get_array(desc->page);
703 if (IS_ERR(array)) 708 if (IS_ERR(array)) {
704 return PTR_ERR(array); 709 res = PTR_ERR(array);
710 goto out;
711 }
705 712
706 for (i = desc->cache_entry_index; i < array->size; i++) { 713 for (i = desc->cache_entry_index; i < array->size; i++) {
707 d_type = DT_UNKNOWN; 714 struct nfs_cache_array_entry *ent;
708 715
709 res = filldir(dirent, array->array[i].string.name, 716 ent = &array->array[i];
710 array->array[i].string.len, file->f_pos, 717 if (filldir(dirent, ent->string.name, ent->string.len,
711 nfs_compat_user_ino64(array->array[i].ino), d_type); 718 file->f_pos, nfs_compat_user_ino64(ent->ino),
712 if (res < 0) 719 ent->d_type) < 0) {
720 desc->eof = 1;
713 break; 721 break;
722 }
714 file->f_pos++; 723 file->f_pos++;
715 desc->cache_entry_index = i; 724 desc->cache_entry_index = i;
716 if (i < (array->size-1)) 725 if (i < (array->size-1))
@@ -722,9 +731,8 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
722 desc->eof = 1; 731 desc->eof = 1;
723 732
724 nfs_readdir_release_array(desc->page); 733 nfs_readdir_release_array(desc->page);
734out:
725 cache_page_release(desc); 735 cache_page_release(desc);
726 if (dentry != NULL)
727 dput(dentry);
728 dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", 736 dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n",
729 (unsigned long long)*desc->dir_cookie, res); 737 (unsigned long long)*desc->dir_cookie, res);
730 return res; 738 return res;
@@ -759,13 +767,13 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
759 goto out; 767 goto out;
760 } 768 }
761 769
762 if (nfs_readdir_xdr_to_array(desc, page, inode) == -1) {
763 status = -EIO;
764 goto out_release;
765 }
766
767 desc->page_index = 0; 770 desc->page_index = 0;
768 desc->page = page; 771 desc->page = page;
772
773 status = nfs_readdir_xdr_to_array(desc, page, inode);
774 if (status < 0)
775 goto out_release;
776
769 status = nfs_do_filldir(desc, dirent, filldir); 777 status = nfs_do_filldir(desc, dirent, filldir);
770 778
771 out: 779 out:
@@ -816,14 +824,14 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
816 res = readdir_search_pagecache(desc); 824 res = readdir_search_pagecache(desc);
817 825
818 if (res == -EBADCOOKIE) { 826 if (res == -EBADCOOKIE) {
827 res = 0;
819 /* This means either end of directory */ 828 /* This means either end of directory */
820 if (*desc->dir_cookie && desc->eof == 0) { 829 if (*desc->dir_cookie && desc->eof == 0) {
821 /* Or that the server has 'lost' a cookie */ 830 /* Or that the server has 'lost' a cookie */
822 res = uncached_readdir(desc, dirent, filldir); 831 res = uncached_readdir(desc, dirent, filldir);
823 if (res >= 0) 832 if (res == 0)
824 continue; 833 continue;
825 } 834 }
826 res = 0;
827 break; 835 break;
828 } 836 }
829 if (res == -ETOOSMALL && desc->plus) { 837 if (res == -ETOOSMALL && desc->plus) {
@@ -838,10 +846,8 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
838 break; 846 break;
839 847
840 res = nfs_do_filldir(desc, dirent, filldir); 848 res = nfs_do_filldir(desc, dirent, filldir);
841 if (res < 0) { 849 if (res < 0)
842 res = 0;
843 break; 850 break;
844 }
845 } 851 }
846out: 852out:
847 nfs_unblock_sillyrename(dentry); 853 nfs_unblock_sillyrename(dentry);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 84d3c8b90206..e6ace0d93c71 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -867,7 +867,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
867 goto out; 867 goto out;
868 nfs_alloc_commit_data(dreq); 868 nfs_alloc_commit_data(dreq);
869 869
870 if (dreq->commit_data == NULL || count < wsize) 870 if (dreq->commit_data == NULL || count <= wsize)
871 sync = NFS_FILE_SYNC; 871 sync = NFS_FILE_SYNC;
872 872
873 dreq->inode = inode; 873 dreq->inode = inode;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index db08ff3ff454..e6356b750b77 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -362,6 +362,15 @@ unsigned int nfs_page_length(struct page *page)
362} 362}
363 363
364/* 364/*
365 * Convert a umode to a dirent->d_type
366 */
367static inline
368unsigned char nfs_umode_to_dtype(umode_t mode)
369{
370 return (mode >> 12) & 15;
371}
372
373/*
365 * Determine the number of pages in an array of length 'len' and 374 * Determine the number of pages in an array of length 'len' and
366 * with a base offset of 'base' 375 * with a base offset of 'base'
367 */ 376 */
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 2563f765c9b4..5914a1911c95 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -485,6 +485,8 @@ nfs_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_se
485 entry->prev_cookie = entry->cookie; 485 entry->prev_cookie = entry->cookie;
486 entry->cookie = ntohl(*p++); 486 entry->cookie = ntohl(*p++);
487 487
488 entry->d_type = DT_UNKNOWN;
489
488 p = xdr_inline_peek(xdr, 8); 490 p = xdr_inline_peek(xdr, 8);
489 if (p != NULL) 491 if (p != NULL)
490 entry->eof = !p[0] && p[1]; 492 entry->eof = !p[0] && p[1];
@@ -495,7 +497,7 @@ nfs_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_se
495 497
496out_overflow: 498out_overflow:
497 print_overflow_msg(__func__, xdr); 499 print_overflow_msg(__func__, xdr);
498 return ERR_PTR(-EIO); 500 return ERR_PTR(-EAGAIN);
499} 501}
500 502
501/* 503/*
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 748dc91a4a14..f6cc60f06dac 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -622,11 +622,13 @@ nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_s
622 entry->prev_cookie = entry->cookie; 622 entry->prev_cookie = entry->cookie;
623 p = xdr_decode_hyper(p, &entry->cookie); 623 p = xdr_decode_hyper(p, &entry->cookie);
624 624
625 entry->d_type = DT_UNKNOWN;
625 if (plus) { 626 if (plus) {
626 entry->fattr->valid = 0; 627 entry->fattr->valid = 0;
627 p = xdr_decode_post_op_attr_stream(xdr, entry->fattr); 628 p = xdr_decode_post_op_attr_stream(xdr, entry->fattr);
628 if (IS_ERR(p)) 629 if (IS_ERR(p))
629 goto out_overflow_exit; 630 goto out_overflow_exit;
631 entry->d_type = nfs_umode_to_dtype(entry->fattr->mode);
630 /* In fact, a post_op_fh3: */ 632 /* In fact, a post_op_fh3: */
631 p = xdr_inline_decode(xdr, 4); 633 p = xdr_inline_decode(xdr, 4);
632 if (unlikely(!p)) 634 if (unlikely(!p))
@@ -656,7 +658,7 @@ nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_s
656out_overflow: 658out_overflow:
657 print_overflow_msg(__func__, xdr); 659 print_overflow_msg(__func__, xdr);
658out_overflow_exit: 660out_overflow_exit:
659 return ERR_PTR(-EIO); 661 return ERR_PTR(-EAGAIN);
660} 662}
661 663
662/* 664/*
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index b7a204ff6fe1..9f1826b012e6 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -6208,6 +6208,10 @@ __be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
6208 if (entry->fattr->valid & NFS_ATTR_FATTR_FILEID) 6208 if (entry->fattr->valid & NFS_ATTR_FATTR_FILEID)
6209 entry->ino = entry->fattr->fileid; 6209 entry->ino = entry->fattr->fileid;
6210 6210
6211 entry->d_type = DT_UNKNOWN;
6212 if (entry->fattr->valid & NFS_ATTR_FATTR_TYPE)
6213 entry->d_type = nfs_umode_to_dtype(entry->fattr->mode);
6214
6211 if (verify_attr_len(xdr, p, len) < 0) 6215 if (verify_attr_len(xdr, p, len) < 0)
6212 goto out_overflow; 6216 goto out_overflow;
6213 6217
@@ -6221,7 +6225,7 @@ __be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
6221 6225
6222out_overflow: 6226out_overflow:
6223 print_overflow_msg(__func__, xdr); 6227 print_overflow_msg(__func__, xdr);
6224 return ERR_PTR(-EIO); 6228 return ERR_PTR(-EAGAIN);
6225} 6229}
6226 6230
6227/* 6231/*
diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c
index 49c844dab33a..59e5fe742f7b 100644
--- a/fs/nilfs2/dat.c
+++ b/fs/nilfs2/dat.c
@@ -335,7 +335,7 @@ int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr)
335 * the device at this point. 335 * the device at this point.
336 * 336 *
337 * To prevent nilfs_dat_translate() from returning the 337 * To prevent nilfs_dat_translate() from returning the
338 * uncommited block number, this makes a copy of the entry 338 * uncommitted block number, this makes a copy of the entry
339 * buffer and redirects nilfs_dat_translate() to the copy. 339 * buffer and redirects nilfs_dat_translate() to the copy.
340 */ 340 */
341 if (!buffer_nilfs_redirected(entry_bh)) { 341 if (!buffer_nilfs_redirected(entry_bh)) {
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index 3e90f86d5bfe..e00d9457c256 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -349,8 +349,8 @@ static int nilfs_ioctl_move_blocks(struct super_block *sb,
349 ino = vdesc->vd_ino; 349 ino = vdesc->vd_ino;
350 cno = vdesc->vd_cno; 350 cno = vdesc->vd_cno;
351 inode = nilfs_iget_for_gc(sb, ino, cno); 351 inode = nilfs_iget_for_gc(sb, ino, cno);
352 if (unlikely(inode == NULL)) { 352 if (IS_ERR(inode)) {
353 ret = -ENOMEM; 353 ret = PTR_ERR(inode);
354 goto failed; 354 goto failed;
355 } 355 }
356 do { 356 do {
diff --git a/fs/pipe.c b/fs/pipe.c
index a8012a955720..04629f36e397 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1199,12 +1199,24 @@ int pipe_proc_fn(struct ctl_table *table, int write, void __user *buf,
1199 return ret; 1199 return ret;
1200} 1200}
1201 1201
1202/*
1203 * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same
1204 * location, so checking ->i_pipe is not enough to verify that this is a
1205 * pipe.
1206 */
1207struct pipe_inode_info *get_pipe_info(struct file *file)
1208{
1209 struct inode *i = file->f_path.dentry->d_inode;
1210
1211 return S_ISFIFO(i->i_mode) ? i->i_pipe : NULL;
1212}
1213
1202long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg) 1214long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
1203{ 1215{
1204 struct pipe_inode_info *pipe; 1216 struct pipe_inode_info *pipe;
1205 long ret; 1217 long ret;
1206 1218
1207 pipe = file->f_path.dentry->d_inode->i_pipe; 1219 pipe = get_pipe_info(file);
1208 if (!pipe) 1220 if (!pipe)
1209 return -EBADF; 1221 return -EBADF;
1210 1222
diff --git a/fs/splice.c b/fs/splice.c
index 8f1dfaecc8f0..ce2f02579e35 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1311,18 +1311,6 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
1311static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe, 1311static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
1312 struct pipe_inode_info *opipe, 1312 struct pipe_inode_info *opipe,
1313 size_t len, unsigned int flags); 1313 size_t len, unsigned int flags);
1314/*
1315 * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same
1316 * location, so checking ->i_pipe is not enough to verify that this is a
1317 * pipe.
1318 */
1319static inline struct pipe_inode_info *pipe_info(struct inode *inode)
1320{
1321 if (S_ISFIFO(inode->i_mode))
1322 return inode->i_pipe;
1323
1324 return NULL;
1325}
1326 1314
1327/* 1315/*
1328 * Determine where to splice to/from. 1316 * Determine where to splice to/from.
@@ -1336,8 +1324,8 @@ static long do_splice(struct file *in, loff_t __user *off_in,
1336 loff_t offset, *off; 1324 loff_t offset, *off;
1337 long ret; 1325 long ret;
1338 1326
1339 ipipe = pipe_info(in->f_path.dentry->d_inode); 1327 ipipe = get_pipe_info(in);
1340 opipe = pipe_info(out->f_path.dentry->d_inode); 1328 opipe = get_pipe_info(out);
1341 1329
1342 if (ipipe && opipe) { 1330 if (ipipe && opipe) {
1343 if (off_in || off_out) 1331 if (off_in || off_out)
@@ -1555,7 +1543,7 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *iov,
1555 int error; 1543 int error;
1556 long ret; 1544 long ret;
1557 1545
1558 pipe = pipe_info(file->f_path.dentry->d_inode); 1546 pipe = get_pipe_info(file);
1559 if (!pipe) 1547 if (!pipe)
1560 return -EBADF; 1548 return -EBADF;
1561 1549
@@ -1642,7 +1630,7 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov,
1642 }; 1630 };
1643 long ret; 1631 long ret;
1644 1632
1645 pipe = pipe_info(file->f_path.dentry->d_inode); 1633 pipe = get_pipe_info(file);
1646 if (!pipe) 1634 if (!pipe)
1647 return -EBADF; 1635 return -EBADF;
1648 1636
@@ -2022,8 +2010,8 @@ static int link_pipe(struct pipe_inode_info *ipipe,
2022static long do_tee(struct file *in, struct file *out, size_t len, 2010static long do_tee(struct file *in, struct file *out, size_t len,
2023 unsigned int flags) 2011 unsigned int flags)
2024{ 2012{
2025 struct pipe_inode_info *ipipe = pipe_info(in->f_path.dentry->d_inode); 2013 struct pipe_inode_info *ipipe = get_pipe_info(in);
2026 struct pipe_inode_info *opipe = pipe_info(out->f_path.dentry->d_inode); 2014 struct pipe_inode_info *opipe = get_pipe_info(out);
2027 int ret = -EINVAL; 2015 int ret = -EINVAL;
2028 2016
2029 /* 2017 /*
diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index a7d9dc21391d..7b776d71d36d 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -175,10 +175,21 @@ static inline int set_msi_sid(struct irte *irte, struct pci_dev *dev)
175 return 0; 175 return 0;
176} 176}
177 177
178#define enable_intr_remapping(mode) (-1)
179#define disable_intr_remapping() (0)
180#define reenable_intr_remapping(mode) (0)
181#define intr_remapping_enabled (0) 178#define intr_remapping_enabled (0)
179
180static inline int enable_intr_remapping(int eim)
181{
182 return -1;
183}
184
185static inline void disable_intr_remapping(void)
186{
187}
188
189static inline int reenable_intr_remapping(int eim)
190{
191 return 0;
192}
182#endif 193#endif
183 194
184/* Can't use the common MSI interrupt functions 195/* Can't use the common MSI interrupt functions
diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h
index a2d6ea49ec56..d1e55fed2c7d 100644
--- a/include/linux/hw_breakpoint.h
+++ b/include/linux/hw_breakpoint.h
@@ -33,6 +33,8 @@ enum bp_type_idx {
33 33
34#ifdef CONFIG_HAVE_HW_BREAKPOINT 34#ifdef CONFIG_HAVE_HW_BREAKPOINT
35 35
36extern int __init init_hw_breakpoint(void);
37
36static inline void hw_breakpoint_init(struct perf_event_attr *attr) 38static inline void hw_breakpoint_init(struct perf_event_attr *attr)
37{ 39{
38 memset(attr, 0, sizeof(*attr)); 40 memset(attr, 0, sizeof(*attr));
@@ -108,6 +110,8 @@ static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp)
108 110
109#else /* !CONFIG_HAVE_HW_BREAKPOINT */ 111#else /* !CONFIG_HAVE_HW_BREAKPOINT */
110 112
113static inline int __init init_hw_breakpoint(void) { return 0; }
114
111static inline struct perf_event * 115static inline struct perf_event *
112register_user_hw_breakpoint(struct perf_event_attr *attr, 116register_user_hw_breakpoint(struct perf_event_attr *attr,
113 perf_overflow_handler_t triggered, 117 perf_overflow_handler_t triggered,
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 6d87f68ce4b6..30f6fad99a58 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -168,6 +168,7 @@ struct mmc_host {
168 /* DDR mode at 1.8V */ 168 /* DDR mode at 1.8V */
169#define MMC_CAP_1_2V_DDR (1 << 12) /* can support */ 169#define MMC_CAP_1_2V_DDR (1 << 12) /* can support */
170 /* DDR mode at 1.2V */ 170 /* DDR mode at 1.2V */
171#define MMC_CAP_POWER_OFF_CARD (1 << 13) /* Can power off after boot */
171 172
172 mmc_pm_flag_t pm_caps; /* supported pm features */ 173 mmc_pm_flag_t pm_caps; /* supported pm features */
173 174
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index ba6cc8f223c9..80f07198a31a 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -483,6 +483,7 @@ struct nfs_entry {
483 int eof; 483 int eof;
484 struct nfs_fh * fh; 484 struct nfs_fh * fh;
485 struct nfs_fattr * fattr; 485 struct nfs_fattr * fattr;
486 unsigned char d_type;
486}; 487};
487 488
488/* 489/*
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index c6bcfe93b9ca..d369b533dc2a 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2441,6 +2441,7 @@
2441#define PCI_DEVICE_ID_INTEL_MFD_SDIO2 0x0822 2441#define PCI_DEVICE_ID_INTEL_MFD_SDIO2 0x0822
2442#define PCI_DEVICE_ID_INTEL_MFD_EMMC0 0x0823 2442#define PCI_DEVICE_ID_INTEL_MFD_EMMC0 0x0823
2443#define PCI_DEVICE_ID_INTEL_MFD_EMMC1 0x0824 2443#define PCI_DEVICE_ID_INTEL_MFD_EMMC1 0x0824
2444#define PCI_DEVICE_ID_INTEL_MRST_SD2 0x084F
2444#define PCI_DEVICE_ID_INTEL_I960 0x0960 2445#define PCI_DEVICE_ID_INTEL_I960 0x0960
2445#define PCI_DEVICE_ID_INTEL_I960RM 0x0962 2446#define PCI_DEVICE_ID_INTEL_I960RM 0x0962
2446#define PCI_DEVICE_ID_INTEL_8257X_SOL 0x1062 2447#define PCI_DEVICE_ID_INTEL_8257X_SOL 0x1062
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 40150f345982..de2c41758e29 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -850,6 +850,7 @@ struct perf_event_context {
850 int nr_active; 850 int nr_active;
851 int is_active; 851 int is_active;
852 int nr_stat; 852 int nr_stat;
853 int rotate_disable;
853 atomic_t refcount; 854 atomic_t refcount;
854 struct task_struct *task; 855 struct task_struct *task;
855 856
@@ -908,20 +909,6 @@ extern int perf_num_counters(void);
908extern const char *perf_pmu_name(void); 909extern const char *perf_pmu_name(void);
909extern void __perf_event_task_sched_in(struct task_struct *task); 910extern void __perf_event_task_sched_in(struct task_struct *task);
910extern void __perf_event_task_sched_out(struct task_struct *task, struct task_struct *next); 911extern void __perf_event_task_sched_out(struct task_struct *task, struct task_struct *next);
911
912extern atomic_t perf_task_events;
913
914static inline void perf_event_task_sched_in(struct task_struct *task)
915{
916 COND_STMT(&perf_task_events, __perf_event_task_sched_in(task));
917}
918
919static inline
920void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next)
921{
922 COND_STMT(&perf_task_events, __perf_event_task_sched_out(task, next));
923}
924
925extern int perf_event_init_task(struct task_struct *child); 912extern int perf_event_init_task(struct task_struct *child);
926extern void perf_event_exit_task(struct task_struct *child); 913extern void perf_event_exit_task(struct task_struct *child);
927extern void perf_event_free_task(struct task_struct *task); 914extern void perf_event_free_task(struct task_struct *task);
@@ -1030,6 +1017,21 @@ have_event:
1030 __perf_sw_event(event_id, nr, nmi, regs, addr); 1017 __perf_sw_event(event_id, nr, nmi, regs, addr);
1031} 1018}
1032 1019
1020extern atomic_t perf_task_events;
1021
1022static inline void perf_event_task_sched_in(struct task_struct *task)
1023{
1024 COND_STMT(&perf_task_events, __perf_event_task_sched_in(task));
1025}
1026
1027static inline
1028void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next)
1029{
1030 perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0);
1031
1032 COND_STMT(&perf_task_events, __perf_event_task_sched_out(task, next));
1033}
1034
1033extern void perf_event_mmap(struct vm_area_struct *vma); 1035extern void perf_event_mmap(struct vm_area_struct *vma);
1034extern struct perf_guest_info_callbacks *perf_guest_cbs; 1036extern struct perf_guest_info_callbacks *perf_guest_cbs;
1035extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); 1037extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index 445796945ac9..bb27d7ec2fb9 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -160,5 +160,6 @@ void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *);
160 160
161/* for F_SETPIPE_SZ and F_GETPIPE_SZ */ 161/* for F_SETPIPE_SZ and F_GETPIPE_SZ */
162long pipe_fcntl(struct file *, unsigned int, unsigned long arg); 162long pipe_fcntl(struct file *, unsigned int, unsigned long arg);
163struct pipe_inode_info *get_pipe_info(struct file *file);
163 164
164#endif 165#endif
diff --git a/include/sound/sh_fsi.h b/include/sound/sh_fsi.h
index fa60cbda90a4..d79894192ae3 100644
--- a/include/sound/sh_fsi.h
+++ b/include/sound/sh_fsi.h
@@ -85,7 +85,9 @@
85 * ACK_MD (FSI2) 85 * ACK_MD (FSI2)
86 * CKG1 (FSI) 86 * CKG1 (FSI)
87 * 87 *
88 * err: return value < 0 88 * err : return value < 0
89 * no change : return value == 0
90 * change xMD : return value > 0
89 * 91 *
90 * 0x-00000AB 92 * 0x-00000AB
91 * 93 *
@@ -111,7 +113,7 @@
111struct sh_fsi_platform_info { 113struct sh_fsi_platform_info {
112 unsigned long porta_flags; 114 unsigned long porta_flags;
113 unsigned long portb_flags; 115 unsigned long portb_flags;
114 int (*set_rate)(int is_porta, int rate); /* for master mode */ 116 int (*set_rate)(struct device *dev, int is_porta, int rate, int enable);
115}; 117};
116 118
117#endif /* __SOUND_FSI_H */ 119#endif /* __SOUND_FSI_H */
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index 2c9120f0afca..e5325825aeb6 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -620,7 +620,7 @@ static struct pmu perf_breakpoint = {
620 .read = hw_breakpoint_pmu_read, 620 .read = hw_breakpoint_pmu_read,
621}; 621};
622 622
623static int __init init_hw_breakpoint(void) 623int __init init_hw_breakpoint(void)
624{ 624{
625 unsigned int **task_bp_pinned; 625 unsigned int **task_bp_pinned;
626 int cpu, err_cpu; 626 int cpu, err_cpu;
@@ -655,6 +655,5 @@ static int __init init_hw_breakpoint(void)
655 655
656 return -ENOMEM; 656 return -ENOMEM;
657} 657}
658core_initcall(init_hw_breakpoint);
659 658
660 659
diff --git a/kernel/irq_work.c b/kernel/irq_work.c
index f16763ff8481..90f881904bb1 100644
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -145,7 +145,9 @@ void irq_work_run(void)
145 * Clear the BUSY bit and return to the free state if 145 * Clear the BUSY bit and return to the free state if
146 * no-one else claimed it meanwhile. 146 * no-one else claimed it meanwhile.
147 */ 147 */
148 cmpxchg(&entry->next, next_flags(NULL, IRQ_WORK_BUSY), NULL); 148 (void)cmpxchg(&entry->next,
149 next_flags(NULL, IRQ_WORK_BUSY),
150 NULL);
149 } 151 }
150} 152}
151EXPORT_SYMBOL_GPL(irq_work_run); 153EXPORT_SYMBOL_GPL(irq_work_run);
diff --git a/kernel/module.c b/kernel/module.c
index 437a74a7524a..d190664f25ff 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2326,6 +2326,18 @@ static void find_module_sections(struct module *mod, struct load_info *info)
2326 kmemleak_scan_area(mod->trace_events, sizeof(*mod->trace_events) * 2326 kmemleak_scan_area(mod->trace_events, sizeof(*mod->trace_events) *
2327 mod->num_trace_events, GFP_KERNEL); 2327 mod->num_trace_events, GFP_KERNEL);
2328#endif 2328#endif
2329#ifdef CONFIG_TRACING
2330 mod->trace_bprintk_fmt_start = section_objs(info, "__trace_printk_fmt",
2331 sizeof(*mod->trace_bprintk_fmt_start),
2332 &mod->num_trace_bprintk_fmt);
2333 /*
2334 * This section contains pointers to allocated objects in the trace
2335 * code and not scanning it leads to false positives.
2336 */
2337 kmemleak_scan_area(mod->trace_bprintk_fmt_start,
2338 sizeof(*mod->trace_bprintk_fmt_start) *
2339 mod->num_trace_bprintk_fmt, GFP_KERNEL);
2340#endif
2329#ifdef CONFIG_FTRACE_MCOUNT_RECORD 2341#ifdef CONFIG_FTRACE_MCOUNT_RECORD
2330 /* sechdrs[0].sh_size is always zero */ 2342 /* sechdrs[0].sh_size is always zero */
2331 mod->ftrace_callsites = section_objs(info, "__mcount_loc", 2343 mod->ftrace_callsites = section_objs(info, "__mcount_loc",
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index cb6c0d2af68f..eac7e3364335 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -31,6 +31,7 @@
31#include <linux/kernel_stat.h> 31#include <linux/kernel_stat.h>
32#include <linux/perf_event.h> 32#include <linux/perf_event.h>
33#include <linux/ftrace_event.h> 33#include <linux/ftrace_event.h>
34#include <linux/hw_breakpoint.h>
34 35
35#include <asm/irq_regs.h> 36#include <asm/irq_regs.h>
36 37
@@ -1286,8 +1287,6 @@ void __perf_event_task_sched_out(struct task_struct *task,
1286{ 1287{
1287 int ctxn; 1288 int ctxn;
1288 1289
1289 perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0);
1290
1291 for_each_task_context_nr(ctxn) 1290 for_each_task_context_nr(ctxn)
1292 perf_event_context_sched_out(task, ctxn, next); 1291 perf_event_context_sched_out(task, ctxn, next);
1293} 1292}
@@ -1621,8 +1620,12 @@ static void rotate_ctx(struct perf_event_context *ctx)
1621{ 1620{
1622 raw_spin_lock(&ctx->lock); 1621 raw_spin_lock(&ctx->lock);
1623 1622
1624 /* Rotate the first entry last of non-pinned groups */ 1623 /*
1625 list_rotate_left(&ctx->flexible_groups); 1624 * Rotate the first entry last of non-pinned groups. Rotation might be
1625 * disabled by the inheritance code.
1626 */
1627 if (!ctx->rotate_disable)
1628 list_rotate_left(&ctx->flexible_groups);
1626 1629
1627 raw_spin_unlock(&ctx->lock); 1630 raw_spin_unlock(&ctx->lock);
1628} 1631}
@@ -2234,11 +2237,6 @@ int perf_event_release_kernel(struct perf_event *event)
2234 raw_spin_unlock_irq(&ctx->lock); 2237 raw_spin_unlock_irq(&ctx->lock);
2235 mutex_unlock(&ctx->mutex); 2238 mutex_unlock(&ctx->mutex);
2236 2239
2237 mutex_lock(&event->owner->perf_event_mutex);
2238 list_del_init(&event->owner_entry);
2239 mutex_unlock(&event->owner->perf_event_mutex);
2240 put_task_struct(event->owner);
2241
2242 free_event(event); 2240 free_event(event);
2243 2241
2244 return 0; 2242 return 0;
@@ -2251,9 +2249,43 @@ EXPORT_SYMBOL_GPL(perf_event_release_kernel);
2251static int perf_release(struct inode *inode, struct file *file) 2249static int perf_release(struct inode *inode, struct file *file)
2252{ 2250{
2253 struct perf_event *event = file->private_data; 2251 struct perf_event *event = file->private_data;
2252 struct task_struct *owner;
2254 2253
2255 file->private_data = NULL; 2254 file->private_data = NULL;
2256 2255
2256 rcu_read_lock();
2257 owner = ACCESS_ONCE(event->owner);
2258 /*
2259 * Matches the smp_wmb() in perf_event_exit_task(). If we observe
2260 * !owner it means the list deletion is complete and we can indeed
2261 * free this event, otherwise we need to serialize on
2262 * owner->perf_event_mutex.
2263 */
2264 smp_read_barrier_depends();
2265 if (owner) {
2266 /*
2267 * Since delayed_put_task_struct() also drops the last
2268 * task reference we can safely take a new reference
2269 * while holding the rcu_read_lock().
2270 */
2271 get_task_struct(owner);
2272 }
2273 rcu_read_unlock();
2274
2275 if (owner) {
2276 mutex_lock(&owner->perf_event_mutex);
2277 /*
2278 * We have to re-check the event->owner field, if it is cleared
2279 * we raced with perf_event_exit_task(), acquiring the mutex
2280 * ensured they're done, and we can proceed with freeing the
2281 * event.
2282 */
2283 if (event->owner)
2284 list_del_init(&event->owner_entry);
2285 mutex_unlock(&owner->perf_event_mutex);
2286 put_task_struct(owner);
2287 }
2288
2257 return perf_event_release_kernel(event); 2289 return perf_event_release_kernel(event);
2258} 2290}
2259 2291
@@ -5677,7 +5709,7 @@ SYSCALL_DEFINE5(perf_event_open,
5677 mutex_unlock(&ctx->mutex); 5709 mutex_unlock(&ctx->mutex);
5678 5710
5679 event->owner = current; 5711 event->owner = current;
5680 get_task_struct(current); 5712
5681 mutex_lock(&current->perf_event_mutex); 5713 mutex_lock(&current->perf_event_mutex);
5682 list_add_tail(&event->owner_entry, &current->perf_event_list); 5714 list_add_tail(&event->owner_entry, &current->perf_event_list);
5683 mutex_unlock(&current->perf_event_mutex); 5715 mutex_unlock(&current->perf_event_mutex);
@@ -5745,12 +5777,6 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
5745 ++ctx->generation; 5777 ++ctx->generation;
5746 mutex_unlock(&ctx->mutex); 5778 mutex_unlock(&ctx->mutex);
5747 5779
5748 event->owner = current;
5749 get_task_struct(current);
5750 mutex_lock(&current->perf_event_mutex);
5751 list_add_tail(&event->owner_entry, &current->perf_event_list);
5752 mutex_unlock(&current->perf_event_mutex);
5753
5754 return event; 5780 return event;
5755 5781
5756err_free: 5782err_free:
@@ -5901,8 +5927,24 @@ again:
5901 */ 5927 */
5902void perf_event_exit_task(struct task_struct *child) 5928void perf_event_exit_task(struct task_struct *child)
5903{ 5929{
5930 struct perf_event *event, *tmp;
5904 int ctxn; 5931 int ctxn;
5905 5932
5933 mutex_lock(&child->perf_event_mutex);
5934 list_for_each_entry_safe(event, tmp, &child->perf_event_list,
5935 owner_entry) {
5936 list_del_init(&event->owner_entry);
5937
5938 /*
5939 * Ensure the list deletion is visible before we clear
5940 * the owner, closes a race against perf_release() where
5941 * we need to serialize on the owner->perf_event_mutex.
5942 */
5943 smp_wmb();
5944 event->owner = NULL;
5945 }
5946 mutex_unlock(&child->perf_event_mutex);
5947
5906 for_each_task_context_nr(ctxn) 5948 for_each_task_context_nr(ctxn)
5907 perf_event_exit_task_context(child, ctxn); 5949 perf_event_exit_task_context(child, ctxn);
5908} 5950}
@@ -6122,6 +6164,7 @@ int perf_event_init_context(struct task_struct *child, int ctxn)
6122 struct perf_event *event; 6164 struct perf_event *event;
6123 struct task_struct *parent = current; 6165 struct task_struct *parent = current;
6124 int inherited_all = 1; 6166 int inherited_all = 1;
6167 unsigned long flags;
6125 int ret = 0; 6168 int ret = 0;
6126 6169
6127 child->perf_event_ctxp[ctxn] = NULL; 6170 child->perf_event_ctxp[ctxn] = NULL;
@@ -6162,6 +6205,15 @@ int perf_event_init_context(struct task_struct *child, int ctxn)
6162 break; 6205 break;
6163 } 6206 }
6164 6207
6208 /*
6209 * We can't hold ctx->lock when iterating the ->flexible_group list due
6210 * to allocations, but we need to prevent rotation because
6211 * rotate_ctx() will change the list from interrupt context.
6212 */
6213 raw_spin_lock_irqsave(&parent_ctx->lock, flags);
6214 parent_ctx->rotate_disable = 1;
6215 raw_spin_unlock_irqrestore(&parent_ctx->lock, flags);
6216
6165 list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) { 6217 list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) {
6166 ret = inherit_task_group(event, parent, parent_ctx, 6218 ret = inherit_task_group(event, parent, parent_ctx,
6167 child, ctxn, &inherited_all); 6219 child, ctxn, &inherited_all);
@@ -6169,6 +6221,10 @@ int perf_event_init_context(struct task_struct *child, int ctxn)
6169 break; 6221 break;
6170 } 6222 }
6171 6223
6224 raw_spin_lock_irqsave(&parent_ctx->lock, flags);
6225 parent_ctx->rotate_disable = 0;
6226 raw_spin_unlock_irqrestore(&parent_ctx->lock, flags);
6227
6172 child_ctx = child->perf_event_ctxp[ctxn]; 6228 child_ctx = child->perf_event_ctxp[ctxn];
6173 6229
6174 if (child_ctx && inherited_all) { 6230 if (child_ctx && inherited_all) {
@@ -6321,6 +6377,8 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
6321 6377
6322void __init perf_event_init(void) 6378void __init perf_event_init(void)
6323{ 6379{
6380 int ret;
6381
6324 perf_event_init_all_cpus(); 6382 perf_event_init_all_cpus();
6325 init_srcu_struct(&pmus_srcu); 6383 init_srcu_struct(&pmus_srcu);
6326 perf_pmu_register(&perf_swevent); 6384 perf_pmu_register(&perf_swevent);
@@ -6328,4 +6386,7 @@ void __init perf_event_init(void)
6328 perf_pmu_register(&perf_task_clock); 6386 perf_pmu_register(&perf_task_clock);
6329 perf_tp_register(); 6387 perf_tp_register();
6330 perf_cpu_notifier(perf_cpu_notify); 6388 perf_cpu_notifier(perf_cpu_notify);
6389
6390 ret = init_hw_breakpoint();
6391 WARN(ret, "hw_breakpoint initialization failed with: %d", ret);
6331} 6392}
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 6842eeba5879..05bb7173850e 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -37,13 +37,13 @@ static int check_clock(const clockid_t which_clock)
37 if (pid == 0) 37 if (pid == 0)
38 return 0; 38 return 0;
39 39
40 read_lock(&tasklist_lock); 40 rcu_read_lock();
41 p = find_task_by_vpid(pid); 41 p = find_task_by_vpid(pid);
42 if (!p || !(CPUCLOCK_PERTHREAD(which_clock) ? 42 if (!p || !(CPUCLOCK_PERTHREAD(which_clock) ?
43 same_thread_group(p, current) : thread_group_leader(p))) { 43 same_thread_group(p, current) : has_group_leader_pid(p))) {
44 error = -EINVAL; 44 error = -EINVAL;
45 } 45 }
46 read_unlock(&tasklist_lock); 46 rcu_read_unlock();
47 47
48 return error; 48 return error;
49} 49}
@@ -390,7 +390,7 @@ int posix_cpu_timer_create(struct k_itimer *new_timer)
390 390
391 INIT_LIST_HEAD(&new_timer->it.cpu.entry); 391 INIT_LIST_HEAD(&new_timer->it.cpu.entry);
392 392
393 read_lock(&tasklist_lock); 393 rcu_read_lock();
394 if (CPUCLOCK_PERTHREAD(new_timer->it_clock)) { 394 if (CPUCLOCK_PERTHREAD(new_timer->it_clock)) {
395 if (pid == 0) { 395 if (pid == 0) {
396 p = current; 396 p = current;
@@ -404,7 +404,7 @@ int posix_cpu_timer_create(struct k_itimer *new_timer)
404 p = current->group_leader; 404 p = current->group_leader;
405 } else { 405 } else {
406 p = find_task_by_vpid(pid); 406 p = find_task_by_vpid(pid);
407 if (p && !thread_group_leader(p)) 407 if (p && !has_group_leader_pid(p))
408 p = NULL; 408 p = NULL;
409 } 409 }
410 } 410 }
@@ -414,7 +414,7 @@ int posix_cpu_timer_create(struct k_itimer *new_timer)
414 } else { 414 } else {
415 ret = -EINVAL; 415 ret = -EINVAL;
416 } 416 }
417 read_unlock(&tasklist_lock); 417 rcu_read_unlock();
418 418
419 return ret; 419 return ret;
420} 420}
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 52ab113d8bb9..00ebd7686676 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1758,10 +1758,6 @@ static void pull_task(struct rq *src_rq, struct task_struct *p,
1758 set_task_cpu(p, this_cpu); 1758 set_task_cpu(p, this_cpu);
1759 activate_task(this_rq, p, 0); 1759 activate_task(this_rq, p, 0);
1760 check_preempt_curr(this_rq, p, 0); 1760 check_preempt_curr(this_rq, p, 0);
1761
1762 /* re-arm NEWIDLE balancing when moving tasks */
1763 src_rq->avg_idle = this_rq->avg_idle = 2*sysctl_sched_migration_cost;
1764 this_rq->idle_stamp = 0;
1765} 1761}
1766 1762
1767/* 1763/*
@@ -3219,8 +3215,10 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
3219 interval = msecs_to_jiffies(sd->balance_interval); 3215 interval = msecs_to_jiffies(sd->balance_interval);
3220 if (time_after(next_balance, sd->last_balance + interval)) 3216 if (time_after(next_balance, sd->last_balance + interval))
3221 next_balance = sd->last_balance + interval; 3217 next_balance = sd->last_balance + interval;
3222 if (pulled_task) 3218 if (pulled_task) {
3219 this_rq->idle_stamp = 0;
3223 break; 3220 break;
3221 }
3224 } 3222 }
3225 3223
3226 raw_spin_lock(&this_rq->lock); 3224 raw_spin_lock(&this_rq->lock);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 042084157980..c380612273bf 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1283,6 +1283,8 @@ void trace_dump_stack(void)
1283 __ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count()); 1283 __ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count());
1284} 1284}
1285 1285
1286static DEFINE_PER_CPU(int, user_stack_count);
1287
1286void 1288void
1287ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) 1289ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1288{ 1290{
@@ -1301,6 +1303,18 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1301 if (unlikely(in_nmi())) 1303 if (unlikely(in_nmi()))
1302 return; 1304 return;
1303 1305
1306 /*
1307 * prevent recursion, since the user stack tracing may
1308 * trigger other kernel events.
1309 */
1310 preempt_disable();
1311 if (__this_cpu_read(user_stack_count))
1312 goto out;
1313
1314 __this_cpu_inc(user_stack_count);
1315
1316
1317
1304 event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK, 1318 event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1305 sizeof(*entry), flags, pc); 1319 sizeof(*entry), flags, pc);
1306 if (!event) 1320 if (!event)
@@ -1318,6 +1332,11 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1318 save_stack_trace_user(&trace); 1332 save_stack_trace_user(&trace);
1319 if (!filter_check_discard(call, entry, buffer, event)) 1333 if (!filter_check_discard(call, entry, buffer, event))
1320 ring_buffer_unlock_commit(buffer, event); 1334 ring_buffer_unlock_commit(buffer, event);
1335
1336 __this_cpu_dec(user_stack_count);
1337
1338 out:
1339 preempt_enable();
1321} 1340}
1322 1341
1323#ifdef UNUSED 1342#ifdef UNUSED
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 9dab9573be41..92ce94f5146b 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -989,20 +989,26 @@ call_refreshresult(struct rpc_task *task)
989 dprint_status(task); 989 dprint_status(task);
990 990
991 task->tk_status = 0; 991 task->tk_status = 0;
992 task->tk_action = call_allocate; 992 task->tk_action = call_refresh;
993 if (status >= 0 && rpcauth_uptodatecred(task))
994 return;
995 switch (status) { 993 switch (status) {
996 case -EACCES: 994 case 0:
997 rpc_exit(task, -EACCES); 995 if (rpcauth_uptodatecred(task))
998 return; 996 task->tk_action = call_allocate;
999 case -ENOMEM:
1000 rpc_exit(task, -ENOMEM);
1001 return; 997 return;
1002 case -ETIMEDOUT: 998 case -ETIMEDOUT:
1003 rpc_delay(task, 3*HZ); 999 rpc_delay(task, 3*HZ);
1000 case -EAGAIN:
1001 status = -EACCES;
1002 if (!task->tk_cred_retry)
1003 break;
1004 task->tk_cred_retry--;
1005 dprintk("RPC: %5u %s: retry refresh creds\n",
1006 task->tk_pid, __func__);
1007 return;
1004 } 1008 }
1005 task->tk_action = call_refresh; 1009 dprintk("RPC: %5u %s: refresh creds failed with error %d\n",
1010 task->tk_pid, __func__, status);
1011 rpc_exit(task, status);
1006} 1012}
1007 1013
1008/* 1014/*
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 0ac6aed0c889..886d7c72936e 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -1614,6 +1614,7 @@ do_sku:
1614 spec->init_amp = ALC_INIT_GPIO3; 1614 spec->init_amp = ALC_INIT_GPIO3;
1615 break; 1615 break;
1616 case 5: 1616 case 5:
1617 default:
1617 spec->init_amp = ALC_INIT_DEFAULT; 1618 spec->init_amp = ALC_INIT_DEFAULT;
1618 break; 1619 break;
1619 } 1620 }
@@ -2014,6 +2015,36 @@ static struct hda_verb alc888_acer_aspire_6530g_verbs[] = {
2014}; 2015};
2015 2016
2016/* 2017/*
2018 *ALC888 Acer Aspire 7730G model
2019 */
2020
2021static struct hda_verb alc888_acer_aspire_7730G_verbs[] = {
2022/* Bias voltage on for external mic port */
2023 {0x18, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN | PIN_VREF80},
2024/* Front Mic: set to PIN_IN (empty by default) */
2025 {0x12, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN},
2026/* Unselect Front Mic by default in input mixer 3 */
2027 {0x22, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0xb)},
2028/* Enable unsolicited event for HP jack */
2029 {0x15, AC_VERB_SET_UNSOLICITED_ENABLE, ALC880_HP_EVENT | AC_USRSP_EN},
2030/* Enable speaker output */
2031 {0x14, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
2032 {0x14, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
2033 {0x14, AC_VERB_SET_EAPD_BTLENABLE, 2},
2034/* Enable headphone output */
2035 {0x15, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT | PIN_HP},
2036 {0x15, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
2037 {0x15, AC_VERB_SET_CONNECT_SEL, 0x00},
2038 {0x15, AC_VERB_SET_EAPD_BTLENABLE, 2},
2039/*Enable internal subwoofer */
2040 {0x17, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
2041 {0x17, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
2042 {0x17, AC_VERB_SET_CONNECT_SEL, 0x02},
2043 {0x17, AC_VERB_SET_EAPD_BTLENABLE, 2},
2044 { }
2045};
2046
2047/*
2017 * ALC889 Acer Aspire 8930G model 2048 * ALC889 Acer Aspire 8930G model
2018 */ 2049 */
2019 2050
@@ -2200,6 +2231,16 @@ static void alc888_acer_aspire_6530g_setup(struct hda_codec *codec)
2200 spec->autocfg.speaker_pins[2] = 0x17; 2231 spec->autocfg.speaker_pins[2] = 0x17;
2201} 2232}
2202 2233
2234static void alc888_acer_aspire_7730g_setup(struct hda_codec *codec)
2235{
2236 struct alc_spec *spec = codec->spec;
2237
2238 spec->autocfg.hp_pins[0] = 0x15;
2239 spec->autocfg.speaker_pins[0] = 0x14;
2240 spec->autocfg.speaker_pins[1] = 0x16;
2241 spec->autocfg.speaker_pins[2] = 0x17;
2242}
2243
2203static void alc889_acer_aspire_8930g_setup(struct hda_codec *codec) 2244static void alc889_acer_aspire_8930g_setup(struct hda_codec *codec)
2204{ 2245{
2205 struct alc_spec *spec = codec->spec; 2246 struct alc_spec *spec = codec->spec;
@@ -9524,13 +9565,6 @@ static struct hda_verb alc883_acer_eapd_verbs[] = {
9524 { } 9565 { }
9525}; 9566};
9526 9567
9527static struct hda_verb alc888_acer_aspire_7730G_verbs[] = {
9528 {0x15, AC_VERB_SET_CONNECT_SEL, 0x00},
9529 {0x17, AC_VERB_SET_CONNECT_SEL, 0x02},
9530 {0x15, AC_VERB_SET_UNSOLICITED_ENABLE, ALC880_HP_EVENT | AC_USRSP_EN},
9531 { } /* end */
9532};
9533
9534static void alc888_6st_dell_setup(struct hda_codec *codec) 9568static void alc888_6st_dell_setup(struct hda_codec *codec)
9535{ 9569{
9536 struct alc_spec *spec = codec->spec; 9570 struct alc_spec *spec = codec->spec;
@@ -10328,7 +10362,7 @@ static struct alc_config_preset alc882_presets[] = {
10328 .const_channel_count = 6, 10362 .const_channel_count = 6,
10329 .input_mux = &alc883_capture_source, 10363 .input_mux = &alc883_capture_source,
10330 .unsol_event = alc_automute_amp_unsol_event, 10364 .unsol_event = alc_automute_amp_unsol_event,
10331 .setup = alc888_acer_aspire_6530g_setup, 10365 .setup = alc888_acer_aspire_7730g_setup,
10332 .init_hook = alc_automute_amp, 10366 .init_hook = alc_automute_amp,
10333 }, 10367 },
10334 [ALC883_MEDION] = { 10368 [ALC883_MEDION] = {
@@ -16910,7 +16944,7 @@ static struct alc_config_preset alc861vd_presets[] = {
16910static int alc861vd_auto_create_input_ctls(struct hda_codec *codec, 16944static int alc861vd_auto_create_input_ctls(struct hda_codec *codec,
16911 const struct auto_pin_cfg *cfg) 16945 const struct auto_pin_cfg *cfg)
16912{ 16946{
16913 return alc_auto_create_input_ctls(codec, cfg, 0x15, 0x09, 0); 16947 return alc_auto_create_input_ctls(codec, cfg, 0x0b, 0x22, 0);
16914} 16948}
16915 16949
16916 16950
@@ -18964,6 +18998,8 @@ static inline hda_nid_t alc662_mix_to_dac(hda_nid_t nid)
18964 return 0x02; 18998 return 0x02;
18965 else if (nid >= 0x0c && nid <= 0x0e) 18999 else if (nid >= 0x0c && nid <= 0x0e)
18966 return nid - 0x0c + 0x02; 19000 return nid - 0x0c + 0x02;
19001 else if (nid == 0x26) /* ALC887-VD has this DAC too */
19002 return 0x25;
18967 else 19003 else
18968 return 0; 19004 return 0;
18969} 19005}
@@ -18972,7 +19008,7 @@ static inline hda_nid_t alc662_mix_to_dac(hda_nid_t nid)
18972static hda_nid_t alc662_dac_to_mix(struct hda_codec *codec, hda_nid_t pin, 19008static hda_nid_t alc662_dac_to_mix(struct hda_codec *codec, hda_nid_t pin,
18973 hda_nid_t dac) 19009 hda_nid_t dac)
18974{ 19010{
18975 hda_nid_t mix[4]; 19011 hda_nid_t mix[5];
18976 int i, num; 19012 int i, num;
18977 19013
18978 num = snd_hda_get_connections(codec, pin, mix, ARRAY_SIZE(mix)); 19014 num = snd_hda_get_connections(codec, pin, mix, ARRAY_SIZE(mix));
diff --git a/sound/soc/atmel/sam9g20_wm8731.c b/sound/soc/atmel/sam9g20_wm8731.c
index 293569dfd0ed..e521ada80542 100644
--- a/sound/soc/atmel/sam9g20_wm8731.c
+++ b/sound/soc/atmel/sam9g20_wm8731.c
@@ -222,9 +222,9 @@ static int __init at91sam9g20ek_init(void)
222 } 222 }
223 223
224 pllb = clk_get(NULL, "pllb"); 224 pllb = clk_get(NULL, "pllb");
225 if (IS_ERR(mclk)) { 225 if (IS_ERR(pllb)) {
226 printk(KERN_ERR "ASoC: Failed to get PLLB\n"); 226 printk(KERN_ERR "ASoC: Failed to get PLLB\n");
227 ret = PTR_ERR(mclk); 227 ret = PTR_ERR(pllb);
228 goto err_mclk; 228 goto err_mclk;
229 } 229 }
230 ret = clk_set_parent(mclk, pllb); 230 ret = clk_set_parent(mclk, pllb);
@@ -240,6 +240,7 @@ static int __init at91sam9g20ek_init(void)
240 if (!at91sam9g20ek_snd_device) { 240 if (!at91sam9g20ek_snd_device) {
241 printk(KERN_ERR "ASoC: Platform device allocation failed\n"); 241 printk(KERN_ERR "ASoC: Platform device allocation failed\n");
242 ret = -ENOMEM; 242 ret = -ENOMEM;
243 goto err_mclk;
243 } 244 }
244 245
245 platform_set_drvdata(at91sam9g20ek_snd_device, 246 platform_set_drvdata(at91sam9g20ek_snd_device,
@@ -248,11 +249,13 @@ static int __init at91sam9g20ek_init(void)
248 ret = platform_device_add(at91sam9g20ek_snd_device); 249 ret = platform_device_add(at91sam9g20ek_snd_device);
249 if (ret) { 250 if (ret) {
250 printk(KERN_ERR "ASoC: Platform device allocation failed\n"); 251 printk(KERN_ERR "ASoC: Platform device allocation failed\n");
251 platform_device_put(at91sam9g20ek_snd_device); 252 goto err_device_add;
252 } 253 }
253 254
254 return ret; 255 return ret;
255 256
257err_device_add:
258 platform_device_put(at91sam9g20ek_snd_device);
256err_mclk: 259err_mclk:
257 clk_put(mclk); 260 clk_put(mclk);
258 mclk = NULL; 261 mclk = NULL;
diff --git a/sound/soc/atmel/snd-soc-afeb9260.c b/sound/soc/atmel/snd-soc-afeb9260.c
index e3d283561c19..86e0f8586dc3 100644
--- a/sound/soc/atmel/snd-soc-afeb9260.c
+++ b/sound/soc/atmel/snd-soc-afeb9260.c
@@ -167,7 +167,6 @@ static int __init afeb9260_soc_init(void)
167 167
168 return 0; 168 return 0;
169err1: 169err1:
170 platform_device_del(afeb9260_snd_device);
171 platform_device_put(afeb9260_snd_device); 170 platform_device_put(afeb9260_snd_device);
172 return err; 171 return err;
173} 172}
diff --git a/sound/soc/codecs/max98088.c b/sound/soc/codecs/max98088.c
index 470cb93b1d1f..d63e28773eb1 100644
--- a/sound/soc/codecs/max98088.c
+++ b/sound/soc/codecs/max98088.c
@@ -2019,7 +2019,10 @@ err_access:
2019 2019
2020static int max98088_remove(struct snd_soc_codec *codec) 2020static int max98088_remove(struct snd_soc_codec *codec)
2021{ 2021{
2022 struct max98088_priv *max98088 = snd_soc_codec_get_drvdata(codec);
2023
2022 max98088_set_bias_level(codec, SND_SOC_BIAS_OFF); 2024 max98088_set_bias_level(codec, SND_SOC_BIAS_OFF);
2025 kfree(max98088->eq_texts);
2023 2026
2024 return 0; 2027 return 0;
2025} 2028}
diff --git a/sound/soc/codecs/stac9766.c b/sound/soc/codecs/stac9766.c
index 00d67cc8e206..061f9e5a497b 100644
--- a/sound/soc/codecs/stac9766.c
+++ b/sound/soc/codecs/stac9766.c
@@ -383,6 +383,7 @@ static struct snd_soc_codec_driver soc_codec_dev_stac9766 = {
383 .reg_cache_size = sizeof(stac9766_reg), 383 .reg_cache_size = sizeof(stac9766_reg),
384 .reg_word_size = sizeof(u16), 384 .reg_word_size = sizeof(u16),
385 .reg_cache_step = 2, 385 .reg_cache_step = 2,
386 .reg_cache_default = stac9766_reg,
386}; 387};
387 388
388static __devinit int stac9766_probe(struct platform_device *pdev) 389static __devinit int stac9766_probe(struct platform_device *pdev)
diff --git a/sound/soc/codecs/tlv320aic3x.c b/sound/soc/codecs/tlv320aic3x.c
index fc687790188b..77b8f9ae29be 100644
--- a/sound/soc/codecs/tlv320aic3x.c
+++ b/sound/soc/codecs/tlv320aic3x.c
@@ -1176,7 +1176,7 @@ EXPORT_SYMBOL_GPL(aic3x_set_gpio);
1176int aic3x_get_gpio(struct snd_soc_codec *codec, int gpio) 1176int aic3x_get_gpio(struct snd_soc_codec *codec, int gpio)
1177{ 1177{
1178 u8 reg = gpio ? AIC3X_GPIO2_REG : AIC3X_GPIO1_REG; 1178 u8 reg = gpio ? AIC3X_GPIO2_REG : AIC3X_GPIO1_REG;
1179 u8 val, bit = gpio ? 2: 1; 1179 u8 val = 0, bit = gpio ? 2 : 1;
1180 1180
1181 aic3x_read(codec, reg, &val); 1181 aic3x_read(codec, reg, &val);
1182 return (val >> bit) & 1; 1182 return (val >> bit) & 1;
@@ -1204,7 +1204,7 @@ EXPORT_SYMBOL_GPL(aic3x_set_headset_detection);
1204 1204
1205int aic3x_headset_detected(struct snd_soc_codec *codec) 1205int aic3x_headset_detected(struct snd_soc_codec *codec)
1206{ 1206{
1207 u8 val; 1207 u8 val = 0;
1208 aic3x_read(codec, AIC3X_HEADSET_DETECT_CTRL_B, &val); 1208 aic3x_read(codec, AIC3X_HEADSET_DETECT_CTRL_B, &val);
1209 return (val >> 4) & 1; 1209 return (val >> 4) & 1;
1210} 1210}
@@ -1212,7 +1212,7 @@ EXPORT_SYMBOL_GPL(aic3x_headset_detected);
1212 1212
1213int aic3x_button_pressed(struct snd_soc_codec *codec) 1213int aic3x_button_pressed(struct snd_soc_codec *codec)
1214{ 1214{
1215 u8 val; 1215 u8 val = 0;
1216 aic3x_read(codec, AIC3X_HEADSET_DETECT_CTRL_B, &val); 1216 aic3x_read(codec, AIC3X_HEADSET_DETECT_CTRL_B, &val);
1217 return (val >> 5) & 1; 1217 return (val >> 5) & 1;
1218} 1218}
diff --git a/sound/soc/codecs/tpa6130a2.c b/sound/soc/codecs/tpa6130a2.c
index ee4fb201de60..d2c243095673 100644
--- a/sound/soc/codecs/tpa6130a2.c
+++ b/sound/soc/codecs/tpa6130a2.c
@@ -78,8 +78,10 @@ static int tpa6130a2_i2c_write(int reg, u8 value)
78 78
79 if (data->power_state) { 79 if (data->power_state) {
80 val = i2c_smbus_write_byte_data(tpa6130a2_client, reg, value); 80 val = i2c_smbus_write_byte_data(tpa6130a2_client, reg, value);
81 if (val < 0) 81 if (val < 0) {
82 dev_err(&tpa6130a2_client->dev, "Write failed\n"); 82 dev_err(&tpa6130a2_client->dev, "Write failed\n");
83 return val;
84 }
83 } 85 }
84 86
85 /* Either powered on or off, we save the context */ 87 /* Either powered on or off, we save the context */
diff --git a/sound/soc/codecs/wm8523.c b/sound/soc/codecs/wm8523.c
index 712ef7c76f90..9a433a5396cb 100644
--- a/sound/soc/codecs/wm8523.c
+++ b/sound/soc/codecs/wm8523.c
@@ -146,7 +146,6 @@ static int wm8523_startup(struct snd_pcm_substream *substream,
146 return -EINVAL; 146 return -EINVAL;
147 } 147 }
148 148
149 return 0;
150 snd_pcm_hw_constraint_list(substream->runtime, 0, 149 snd_pcm_hw_constraint_list(substream->runtime, 0,
151 SNDRV_PCM_HW_PARAM_RATE, 150 SNDRV_PCM_HW_PARAM_RATE,
152 &wm8523->rate_constraint); 151 &wm8523->rate_constraint);
diff --git a/sound/soc/codecs/wm8904.c b/sound/soc/codecs/wm8904.c
index 33be84e506ea..fca60a0b57b8 100644
--- a/sound/soc/codecs/wm8904.c
+++ b/sound/soc/codecs/wm8904.c
@@ -2498,6 +2498,8 @@ static int wm8904_remove(struct snd_soc_codec *codec)
2498 2498
2499 wm8904_set_bias_level(codec, SND_SOC_BIAS_OFF); 2499 wm8904_set_bias_level(codec, SND_SOC_BIAS_OFF);
2500 regulator_bulk_free(ARRAY_SIZE(wm8904->supplies), wm8904->supplies); 2500 regulator_bulk_free(ARRAY_SIZE(wm8904->supplies), wm8904->supplies);
2501 kfree(wm8904->retune_mobile_texts);
2502 kfree(wm8904->drc_texts);
2501 2503
2502 return 0; 2504 return 0;
2503} 2505}
diff --git a/sound/soc/codecs/wm8961.c b/sound/soc/codecs/wm8961.c
index 4f326f604104..8340485c9851 100644
--- a/sound/soc/codecs/wm8961.c
+++ b/sound/soc/codecs/wm8961.c
@@ -711,7 +711,7 @@ static int wm8961_hw_params(struct snd_pcm_substream *substream,
711 if (fs <= 24000) 711 if (fs <= 24000)
712 reg |= WM8961_DACSLOPE; 712 reg |= WM8961_DACSLOPE;
713 else 713 else
714 reg &= WM8961_DACSLOPE; 714 reg &= ~WM8961_DACSLOPE;
715 snd_soc_write(codec, WM8961_ADC_DAC_CONTROL_2, reg); 715 snd_soc_write(codec, WM8961_ADC_DAC_CONTROL_2, reg);
716 716
717 return 0; 717 return 0;
@@ -736,7 +736,7 @@ static int wm8961_set_sysclk(struct snd_soc_dai *dai, int clk_id,
736 freq /= 2; 736 freq /= 2;
737 } else { 737 } else {
738 dev_dbg(codec->dev, "Using MCLK/1 for %dHz MCLK\n", freq); 738 dev_dbg(codec->dev, "Using MCLK/1 for %dHz MCLK\n", freq);
739 reg &= WM8961_MCLKDIV; 739 reg &= ~WM8961_MCLKDIV;
740 } 740 }
741 741
742 snd_soc_write(codec, WM8961_CLOCKING1, reg); 742 snd_soc_write(codec, WM8961_CLOCKING1, reg);
diff --git a/sound/soc/codecs/wm8994.c b/sound/soc/codecs/wm8994.c
index 830dfdd66c5f..4d3e6f1ac584 100644
--- a/sound/soc/codecs/wm8994.c
+++ b/sound/soc/codecs/wm8994.c
@@ -4061,6 +4061,8 @@ static int wm8994_codec_remove(struct snd_soc_codec *codec)
4061 wm8994_free_irq(codec->control_data, WM8994_IRQ_MIC2_DET, wm8994); 4061 wm8994_free_irq(codec->control_data, WM8994_IRQ_MIC2_DET, wm8994);
4062 wm8994_free_irq(codec->control_data, WM8994_IRQ_MIC1_SHRT, wm8994); 4062 wm8994_free_irq(codec->control_data, WM8994_IRQ_MIC1_SHRT, wm8994);
4063 wm8994_free_irq(codec->control_data, WM8994_IRQ_MIC1_DET, wm8994); 4063 wm8994_free_irq(codec->control_data, WM8994_IRQ_MIC1_DET, wm8994);
4064 kfree(wm8994->retune_mobile_texts);
4065 kfree(wm8994->drc_texts);
4064 kfree(wm8994); 4066 kfree(wm8994);
4065 4067
4066 return 0; 4068 return 0;
@@ -4073,6 +4075,8 @@ static struct snd_soc_codec_driver soc_codec_dev_wm8994 = {
4073 .resume = wm8994_resume, 4075 .resume = wm8994_resume,
4074 .read = wm8994_read, 4076 .read = wm8994_read,
4075 .write = wm8994_write, 4077 .write = wm8994_write,
4078 .readable_register = wm8994_readable,
4079 .volatile_register = wm8994_volatile,
4076 .set_bias_level = wm8994_set_bias_level, 4080 .set_bias_level = wm8994_set_bias_level,
4077}; 4081};
4078 4082
diff --git a/sound/soc/davinci/davinci-vcif.c b/sound/soc/davinci/davinci-vcif.c
index fb4cc1edf339..9d2afccc3a2d 100644
--- a/sound/soc/davinci/davinci-vcif.c
+++ b/sound/soc/davinci/davinci-vcif.c
@@ -247,7 +247,10 @@ fail:
247 247
248static int davinci_vcif_remove(struct platform_device *pdev) 248static int davinci_vcif_remove(struct platform_device *pdev)
249{ 249{
250 struct davinci_vcif_dev *davinci_vcif_dev = dev_get_drvdata(&pdev->dev);
251
250 snd_soc_unregister_dai(&pdev->dev); 252 snd_soc_unregister_dai(&pdev->dev);
253 kfree(davinci_vcif_dev);
251 254
252 return 0; 255 return 0;
253} 256}
diff --git a/sound/soc/ep93xx/simone.c b/sound/soc/ep93xx/simone.c
index 4b0d19913728..286817946c56 100644
--- a/sound/soc/ep93xx/simone.c
+++ b/sound/soc/ep93xx/simone.c
@@ -54,24 +54,26 @@ static int __init simone_init(void)
54 54
55 ret = platform_device_add(simone_snd_ac97_device); 55 ret = platform_device_add(simone_snd_ac97_device);
56 if (ret) 56 if (ret)
57 goto fail; 57 goto fail1;
58 58
59 simone_snd_device = platform_device_alloc("soc-audio", -1); 59 simone_snd_device = platform_device_alloc("soc-audio", -1);
60 if (!simone_snd_device) { 60 if (!simone_snd_device) {
61 ret = -ENOMEM; 61 ret = -ENOMEM;
62 goto fail; 62 goto fail2;
63 } 63 }
64 64
65 platform_set_drvdata(simone_snd_device, &snd_soc_simone); 65 platform_set_drvdata(simone_snd_device, &snd_soc_simone);
66 ret = platform_device_add(simone_snd_device); 66 ret = platform_device_add(simone_snd_device);
67 if (ret) { 67 if (ret)
68 platform_device_put(simone_snd_device); 68 goto fail3;
69 goto fail;
70 }
71 69
72 return ret; 70 return 0;
73 71
74fail: 72fail3:
73 platform_device_put(simone_snd_device);
74fail2:
75 platform_device_del(simone_snd_ac97_device);
76fail1:
75 platform_device_put(simone_snd_ac97_device); 77 platform_device_put(simone_snd_ac97_device);
76 return ret; 78 return ret;
77} 79}
diff --git a/sound/soc/fsl/efika-audio-fabric.c b/sound/soc/fsl/efika-audio-fabric.c
index 53251e6b5bd5..108b5d8bd0e9 100644
--- a/sound/soc/fsl/efika-audio-fabric.c
+++ b/sound/soc/fsl/efika-audio-fabric.c
@@ -76,6 +76,7 @@ static __init int efika_fabric_init(void)
76 rc = platform_device_add(pdev); 76 rc = platform_device_add(pdev);
77 if (rc) { 77 if (rc) {
78 pr_err("efika_fabric_init: platform_device_add() failed\n"); 78 pr_err("efika_fabric_init: platform_device_add() failed\n");
79 platform_device_put(pdev);
79 return -ENODEV; 80 return -ENODEV;
80 } 81 }
81 return 0; 82 return 0;
diff --git a/sound/soc/fsl/mpc5200_dma.c b/sound/soc/fsl/mpc5200_dma.c
index dce6b551cd78..f92dca07cd35 100644
--- a/sound/soc/fsl/mpc5200_dma.c
+++ b/sound/soc/fsl/mpc5200_dma.c
@@ -9,7 +9,6 @@
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/of_device.h> 10#include <linux/of_device.h>
11#include <linux/slab.h> 11#include <linux/slab.h>
12#include <linux/of_device.h>
13#include <linux/of_platform.h> 12#include <linux/of_platform.h>
14 13
15#include <sound/soc.h> 14#include <sound/soc.h>
diff --git a/sound/soc/fsl/pcm030-audio-fabric.c b/sound/soc/fsl/pcm030-audio-fabric.c
index 25f27ec1dd6e..ba4d85e317ed 100644
--- a/sound/soc/fsl/pcm030-audio-fabric.c
+++ b/sound/soc/fsl/pcm030-audio-fabric.c
@@ -76,6 +76,7 @@ static __init int pcm030_fabric_init(void)
76 rc = platform_device_add(pdev); 76 rc = platform_device_add(pdev);
77 if (rc) { 77 if (rc) {
78 pr_err("pcm030_fabric_init: platform_device_add() failed\n"); 78 pr_err("pcm030_fabric_init: platform_device_add() failed\n");
79 platform_device_put(pdev);
79 return -ENODEV; 80 return -ENODEV;
80 } 81 }
81 return 0; 82 return 0;
diff --git a/sound/soc/imx/imx-ssi.c b/sound/soc/imx/imx-ssi.c
index d2d98c75ee8a..390b6ffc2658 100644
--- a/sound/soc/imx/imx-ssi.c
+++ b/sound/soc/imx/imx-ssi.c
@@ -679,8 +679,11 @@ static int imx_ssi_probe(struct platform_device *pdev)
679 } 679 }
680 680
681 ssi->soc_platform_pdev_fiq = platform_device_alloc("imx-fiq-pcm-audio", pdev->id); 681 ssi->soc_platform_pdev_fiq = platform_device_alloc("imx-fiq-pcm-audio", pdev->id);
682 if (!ssi->soc_platform_pdev_fiq) 682 if (!ssi->soc_platform_pdev_fiq) {
683 ret = -ENOMEM;
683 goto failed_pdev_fiq_alloc; 684 goto failed_pdev_fiq_alloc;
685 }
686
684 platform_set_drvdata(ssi->soc_platform_pdev_fiq, ssi); 687 platform_set_drvdata(ssi->soc_platform_pdev_fiq, ssi);
685 ret = platform_device_add(ssi->soc_platform_pdev_fiq); 688 ret = platform_device_add(ssi->soc_platform_pdev_fiq);
686 if (ret) { 689 if (ret) {
@@ -689,8 +692,11 @@ static int imx_ssi_probe(struct platform_device *pdev)
689 } 692 }
690 693
691 ssi->soc_platform_pdev = platform_device_alloc("imx-pcm-audio", pdev->id); 694 ssi->soc_platform_pdev = platform_device_alloc("imx-pcm-audio", pdev->id);
692 if (!ssi->soc_platform_pdev) 695 if (!ssi->soc_platform_pdev) {
696 ret = -ENOMEM;
693 goto failed_pdev_alloc; 697 goto failed_pdev_alloc;
698 }
699
694 platform_set_drvdata(ssi->soc_platform_pdev, ssi); 700 platform_set_drvdata(ssi->soc_platform_pdev, ssi);
695 ret = platform_device_add(ssi->soc_platform_pdev); 701 ret = platform_device_add(ssi->soc_platform_pdev);
696 if (ret) { 702 if (ret) {
@@ -703,6 +709,7 @@ static int imx_ssi_probe(struct platform_device *pdev)
703failed_pdev_add: 709failed_pdev_add:
704 platform_device_put(ssi->soc_platform_pdev); 710 platform_device_put(ssi->soc_platform_pdev);
705failed_pdev_alloc: 711failed_pdev_alloc:
712 platform_device_del(ssi->soc_platform_pdev_fiq);
706failed_pdev_fiq_add: 713failed_pdev_fiq_add:
707 platform_device_put(ssi->soc_platform_pdev_fiq); 714 platform_device_put(ssi->soc_platform_pdev_fiq);
708failed_pdev_fiq_alloc: 715failed_pdev_fiq_alloc:
@@ -726,8 +733,8 @@ static int __devexit imx_ssi_remove(struct platform_device *pdev)
726 struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0); 733 struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
727 struct imx_ssi *ssi = platform_get_drvdata(pdev); 734 struct imx_ssi *ssi = platform_get_drvdata(pdev);
728 735
729 platform_device_del(ssi->soc_platform_pdev); 736 platform_device_unregister(ssi->soc_platform_pdev);
730 platform_device_put(ssi->soc_platform_pdev); 737 platform_device_unregister(ssi->soc_platform_pdev_fiq);
731 738
732 snd_soc_unregister_dai(&pdev->dev); 739 snd_soc_unregister_dai(&pdev->dev);
733 740
diff --git a/sound/soc/imx/phycore-ac97.c b/sound/soc/imx/phycore-ac97.c
index 39f23734781a..9eabc28667e6 100644
--- a/sound/soc/imx/phycore-ac97.c
+++ b/sound/soc/imx/phycore-ac97.c
@@ -43,6 +43,7 @@ static struct snd_soc_card imx_phycore = {
43 .num_links = ARRAY_SIZE(imx_phycore_dai_ac97), 43 .num_links = ARRAY_SIZE(imx_phycore_dai_ac97),
44}; 44};
45 45
46static struct platform_device *imx_phycore_snd_ac97_device;
46static struct platform_device *imx_phycore_snd_device; 47static struct platform_device *imx_phycore_snd_device;
47 48
48static int __init imx_phycore_init(void) 49static int __init imx_phycore_init(void)
@@ -53,29 +54,42 @@ static int __init imx_phycore_init(void)
53 /* return happy. We might run on a totally different machine */ 54 /* return happy. We might run on a totally different machine */
54 return 0; 55 return 0;
55 56
56 imx_phycore_snd_device = platform_device_alloc("soc-audio", -1); 57 imx_phycore_snd_ac97_device = platform_device_alloc("soc-audio", -1);
57 if (!imx_phycore_snd_device) 58 if (!imx_phycore_snd_ac97_device)
58 return -ENOMEM; 59 return -ENOMEM;
59 60
60 platform_set_drvdata(imx_phycore_snd_device, &imx_phycore); 61 platform_set_drvdata(imx_phycore_snd_ac97_device, &imx_phycore);
61 ret = platform_device_add(imx_phycore_snd_device); 62 ret = platform_device_add(imx_phycore_snd_ac97_device);
63 if (ret)
64 goto fail1;
62 65
63 imx_phycore_snd_device = platform_device_alloc("wm9712-codec", -1); 66 imx_phycore_snd_device = platform_device_alloc("wm9712-codec", -1);
64 if (!imx_phycore_snd_device) 67 if (!imx_phycore_snd_device) {
65 return -ENOMEM; 68 ret = -ENOMEM;
69 goto fail2;
70 }
66 ret = platform_device_add(imx_phycore_snd_device); 71 ret = platform_device_add(imx_phycore_snd_device);
67 72
68 if (ret) { 73 if (ret) {
69 printk(KERN_ERR "ASoC: Platform device allocation failed\n"); 74 printk(KERN_ERR "ASoC: Platform device allocation failed\n");
70 platform_device_put(imx_phycore_snd_device); 75 goto fail3;
71 } 76 }
72 77
78 return 0;
79
80fail3:
81 platform_device_put(imx_phycore_snd_device);
82fail2:
83 platform_device_del(imx_phycore_snd_ac97_device);
84fail1:
85 platform_device_put(imx_phycore_snd_ac97_device);
73 return ret; 86 return ret;
74} 87}
75 88
76static void __exit imx_phycore_exit(void) 89static void __exit imx_phycore_exit(void)
77{ 90{
78 platform_device_unregister(imx_phycore_snd_device); 91 platform_device_unregister(imx_phycore_snd_device);
92 platform_device_unregister(imx_phycore_snd_ac97_device);
79} 93}
80 94
81late_initcall(imx_phycore_init); 95late_initcall(imx_phycore_init);
diff --git a/sound/soc/nuc900/nuc900-ac97.c b/sound/soc/nuc900/nuc900-ac97.c
index 293dc748797c..e00e39dd6576 100644
--- a/sound/soc/nuc900/nuc900-ac97.c
+++ b/sound/soc/nuc900/nuc900-ac97.c
@@ -384,7 +384,6 @@ out0:
384 384
385static int __devexit nuc900_ac97_drvremove(struct platform_device *pdev) 385static int __devexit nuc900_ac97_drvremove(struct platform_device *pdev)
386{ 386{
387
388 snd_soc_unregister_dai(&pdev->dev); 387 snd_soc_unregister_dai(&pdev->dev);
389 388
390 clk_put(nuc900_ac97_data->clk); 389 clk_put(nuc900_ac97_data->clk);
@@ -392,6 +391,7 @@ static int __devexit nuc900_ac97_drvremove(struct platform_device *pdev)
392 release_mem_region(nuc900_ac97_data->res->start, 391 release_mem_region(nuc900_ac97_data->res->start,
393 resource_size(nuc900_ac97_data->res)); 392 resource_size(nuc900_ac97_data->res));
394 393
394 kfree(nuc900_ac97_data);
395 nuc900_ac97_data = NULL; 395 nuc900_ac97_data = NULL;
396 396
397 return 0; 397 return 0;
diff --git a/sound/soc/omap/omap3pandora.c b/sound/soc/omap/omap3pandora.c
index dbd9d96b5f92..4ee33ce2cb98 100644
--- a/sound/soc/omap/omap3pandora.c
+++ b/sound/soc/omap/omap3pandora.c
@@ -306,6 +306,7 @@ static int __init omap3pandora_soc_init(void)
306 pr_err(PREFIX "Failed to get DAC regulator from %s: %ld\n", 306 pr_err(PREFIX "Failed to get DAC regulator from %s: %ld\n",
307 dev_name(&omap3pandora_snd_device->dev), 307 dev_name(&omap3pandora_snd_device->dev),
308 PTR_ERR(omap3pandora_dac_reg)); 308 PTR_ERR(omap3pandora_dac_reg));
309 ret = PTR_ERR(omap3pandora_dac_reg);
309 goto fail3; 310 goto fail3;
310 } 311 }
311 312
diff --git a/sound/soc/omap/osk5912.c b/sound/soc/omap/osk5912.c
index f0e662556428..65ae00e976ef 100644
--- a/sound/soc/omap/osk5912.c
+++ b/sound/soc/omap/osk5912.c
@@ -177,7 +177,8 @@ static int __init osk_soc_init(void)
177 tlv320aic23_mclk = clk_get(dev, "mclk"); 177 tlv320aic23_mclk = clk_get(dev, "mclk");
178 if (IS_ERR(tlv320aic23_mclk)) { 178 if (IS_ERR(tlv320aic23_mclk)) {
179 printk(KERN_ERR "Could not get mclk clock\n"); 179 printk(KERN_ERR "Could not get mclk clock\n");
180 return -ENODEV; 180 err = PTR_ERR(tlv320aic23_mclk);
181 goto err2;
181 } 182 }
182 183
183 /* 184 /*
@@ -188,7 +189,7 @@ static int __init osk_soc_init(void)
188 if (clk_set_rate(tlv320aic23_mclk, CODEC_CLOCK)) { 189 if (clk_set_rate(tlv320aic23_mclk, CODEC_CLOCK)) {
189 printk(KERN_ERR "Cannot set MCLK for AIC23 CODEC\n"); 190 printk(KERN_ERR "Cannot set MCLK for AIC23 CODEC\n");
190 err = -ECANCELED; 191 err = -ECANCELED;
191 goto err1; 192 goto err3;
192 } 193 }
193 } 194 }
194 195
@@ -196,9 +197,12 @@ static int __init osk_soc_init(void)
196 (uint) clk_get_rate(tlv320aic23_mclk), CODEC_CLOCK); 197 (uint) clk_get_rate(tlv320aic23_mclk), CODEC_CLOCK);
197 198
198 return 0; 199 return 0;
199err1: 200
201err3:
200 clk_put(tlv320aic23_mclk); 202 clk_put(tlv320aic23_mclk);
203err2:
201 platform_device_del(osk_snd_device); 204 platform_device_del(osk_snd_device);
205err1:
202 platform_device_put(osk_snd_device); 206 platform_device_put(osk_snd_device);
203 207
204 return err; 208 return err;
@@ -207,6 +211,7 @@ err1:
207 211
208static void __exit osk_soc_exit(void) 212static void __exit osk_soc_exit(void)
209{ 213{
214 clk_put(tlv320aic23_mclk);
210 platform_device_unregister(osk_snd_device); 215 platform_device_unregister(osk_snd_device);
211} 216}
212 217
diff --git a/sound/soc/pxa/Kconfig b/sound/soc/pxa/Kconfig
index 37f191bbfdd9..580f48571303 100644
--- a/sound/soc/pxa/Kconfig
+++ b/sound/soc/pxa/Kconfig
@@ -1,6 +1,7 @@
1config SND_PXA2XX_SOC 1config SND_PXA2XX_SOC
2 tristate "SoC Audio for the Intel PXA2xx chip" 2 tristate "SoC Audio for the Intel PXA2xx chip"
3 depends on ARCH_PXA 3 depends on ARCH_PXA
4 select SND_ARM
4 select SND_PXA2XX_LIB 5 select SND_PXA2XX_LIB
5 help 6 help
6 Say Y or M if you want to add support for codecs attached to 7 Say Y or M if you want to add support for codecs attached to
diff --git a/sound/soc/s3c24xx/smdk_spdif.c b/sound/soc/s3c24xx/smdk_spdif.c
index f31d22ad7c88..c8bd90488a87 100644
--- a/sound/soc/s3c24xx/smdk_spdif.c
+++ b/sound/soc/s3c24xx/smdk_spdif.c
@@ -38,7 +38,7 @@ static int set_audio_clock_heirachy(struct platform_device *pdev)
38 } 38 }
39 39
40 mout_epll = clk_get(NULL, "mout_epll"); 40 mout_epll = clk_get(NULL, "mout_epll");
41 if (IS_ERR(fout_epll)) { 41 if (IS_ERR(mout_epll)) {
42 printk(KERN_WARNING "%s: Cannot find mout_epll.\n", 42 printk(KERN_WARNING "%s: Cannot find mout_epll.\n",
43 __func__); 43 __func__);
44 ret = -EINVAL; 44 ret = -EINVAL;
@@ -54,7 +54,7 @@ static int set_audio_clock_heirachy(struct platform_device *pdev)
54 } 54 }
55 55
56 sclk_spdif = clk_get(NULL, "sclk_spdif"); 56 sclk_spdif = clk_get(NULL, "sclk_spdif");
57 if (IS_ERR(fout_epll)) { 57 if (IS_ERR(sclk_spdif)) {
58 printk(KERN_WARNING "%s: Cannot find sclk_spdif.\n", 58 printk(KERN_WARNING "%s: Cannot find sclk_spdif.\n",
59 __func__); 59 __func__);
60 ret = -EINVAL; 60 ret = -EINVAL;
diff --git a/sound/soc/sh/fsi.c b/sound/soc/sh/fsi.c
index 507e709f2807..4c2404b1b862 100644
--- a/sound/soc/sh/fsi.c
+++ b/sound/soc/sh/fsi.c
@@ -132,6 +132,8 @@ struct fsi_priv {
132 struct fsi_stream playback; 132 struct fsi_stream playback;
133 struct fsi_stream capture; 133 struct fsi_stream capture;
134 134
135 long rate;
136
135 u32 mst_ctrl; 137 u32 mst_ctrl;
136}; 138};
137 139
@@ -854,10 +856,17 @@ static void fsi_dai_shutdown(struct snd_pcm_substream *substream,
854{ 856{
855 struct fsi_priv *fsi = fsi_get_priv(substream); 857 struct fsi_priv *fsi = fsi_get_priv(substream);
856 int is_play = fsi_is_play(substream); 858 int is_play = fsi_is_play(substream);
859 struct fsi_master *master = fsi_get_master(fsi);
860 int (*set_rate)(struct device *dev, int is_porta, int rate, int enable);
857 861
858 fsi_irq_disable(fsi, is_play); 862 fsi_irq_disable(fsi, is_play);
859 fsi_clk_ctrl(fsi, 0); 863 fsi_clk_ctrl(fsi, 0);
860 864
865 set_rate = master->info->set_rate;
866 if (set_rate && fsi->rate)
867 set_rate(dai->dev, fsi_is_port_a(fsi), fsi->rate, 0);
868 fsi->rate = 0;
869
861 pm_runtime_put_sync(dai->dev); 870 pm_runtime_put_sync(dai->dev);
862} 871}
863 872
@@ -891,20 +900,20 @@ static int fsi_dai_hw_params(struct snd_pcm_substream *substream,
891{ 900{
892 struct fsi_priv *fsi = fsi_get_priv(substream); 901 struct fsi_priv *fsi = fsi_get_priv(substream);
893 struct fsi_master *master = fsi_get_master(fsi); 902 struct fsi_master *master = fsi_get_master(fsi);
894 int (*set_rate)(int is_porta, int rate) = master->info->set_rate; 903 int (*set_rate)(struct device *dev, int is_porta, int rate, int enable);
895 int fsi_ver = master->core->ver; 904 int fsi_ver = master->core->ver;
896 int is_play = fsi_is_play(substream); 905 long rate = params_rate(params);
897 int ret; 906 int ret;
898 907
899 /* if slave mode, set_rate is not needed */ 908 set_rate = master->info->set_rate;
900 if (!fsi_is_master_mode(fsi, is_play)) 909 if (!set_rate)
901 return 0; 910 return 0;
902 911
903 /* it is error if no set_rate */ 912 ret = set_rate(dai->dev, fsi_is_port_a(fsi), rate, 1);
904 if (!set_rate) 913 if (ret < 0) /* error */
905 return -EIO; 914 return ret;
906 915
907 ret = set_rate(fsi_is_port_a(fsi), params_rate(params)); 916 fsi->rate = rate;
908 if (ret > 0) { 917 if (ret > 0) {
909 u32 data = 0; 918 u32 data = 0;
910 919
diff --git a/sound/soc/sh/ssi.c b/sound/soc/sh/ssi.c
index 40bbdf1591dc..05192d97b377 100644
--- a/sound/soc/sh/ssi.c
+++ b/sound/soc/sh/ssi.c
@@ -387,7 +387,7 @@ static int __devinit sh4_soc_dai_probe(struct platform_device *pdev)
387 387
388static int __devexit sh4_soc_dai_remove(struct platform_device *pdev) 388static int __devexit sh4_soc_dai_remove(struct platform_device *pdev)
389{ 389{
390 snd_soc_unregister_dai(&pdev->dev, ARRAY_SIZE(sh4_ssi_dai)); 390 snd_soc_unregister_dais(&pdev->dev, ARRAY_SIZE(sh4_ssi_dai));
391 return 0; 391 return 0;
392} 392}
393 393
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 93bd2ff001fb..e2c2de201eec 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -697,17 +697,18 @@ static int __cmd_record(int argc, const char **argv)
697 if (err < 0) 697 if (err < 0)
698 err = event__synthesize_kernel_mmap(process_synthesized_event, 698 err = event__synthesize_kernel_mmap(process_synthesized_event,
699 session, machine, "_stext"); 699 session, machine, "_stext");
700 if (err < 0) { 700 if (err < 0)
701 pr_err("Couldn't record kernel reference relocation symbol.\n"); 701 pr_err("Couldn't record kernel reference relocation symbol\n"
702 return err; 702 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
703 } 703 "Check /proc/kallsyms permission or run as root.\n");
704 704
705 err = event__synthesize_modules(process_synthesized_event, 705 err = event__synthesize_modules(process_synthesized_event,
706 session, machine); 706 session, machine);
707 if (err < 0) { 707 if (err < 0)
708 pr_err("Couldn't record kernel reference relocation symbol.\n"); 708 pr_err("Couldn't record kernel module information.\n"
709 return err; 709 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
710 } 710 "Check /proc/modules permission or run as root.\n");
711
711 if (perf_guest) 712 if (perf_guest)
712 perf_session__process_machines(session, event__synthesize_guest_os); 713 perf_session__process_machines(session, event__synthesize_guest_os);
713 714
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index b39f499e575a..0500895a45af 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -295,7 +295,9 @@ static void symbols__insert_by_name(struct rb_root *self, struct symbol *sym)
295{ 295{
296 struct rb_node **p = &self->rb_node; 296 struct rb_node **p = &self->rb_node;
297 struct rb_node *parent = NULL; 297 struct rb_node *parent = NULL;
298 struct symbol_name_rb_node *symn = ((void *)sym) - sizeof(*parent), *s; 298 struct symbol_name_rb_node *symn, *s;
299
300 symn = container_of(sym, struct symbol_name_rb_node, sym);
299 301
300 while (*p != NULL) { 302 while (*p != NULL) {
301 parent = *p; 303 parent = *p;